diff options
Diffstat (limited to 'BFS')
| -rwxr-xr-x | BFS/benchmark-scripts/ccmcc25-sim.sh | 27 | ||||
| -rwxr-xr-x | BFS/benchmark-scripts/ccmcc25.sh | 27 | ||||
| -rw-r--r-- | BFS/host/app.c | 84 | ||||
| -rw-r--r-- | BFS/host/mram-management.h | 22 |
4 files changed, 87 insertions, 73 deletions
diff --git a/BFS/benchmark-scripts/ccmcc25-sim.sh b/BFS/benchmark-scripts/ccmcc25-sim.sh new file mode 100755 index 0000000..bcbe284 --- /dev/null +++ b/BFS/benchmark-scripts/ccmcc25-sim.sh @@ -0,0 +1,27 @@ +#!/bin/bash + +mkdir -p log/$(hostname) + +run_benchmark_nmc() { + local "$@" + set -e + make -B NR_DPUS=${nr_dpus} NR_TASKLETS=${nr_tasklets} \ + aspectc=1 aspectc_timing=1 dfatool_timing=0 + bin/host_code -f ${data} 2>&1 +} + +export -f run_benchmark_nmc + +fn=log/$(hostname)/ccmcc25-sdk${sdk}-sim + +source ~/lib/local/upmem/upmem-2025.1.0-Linux-x86_64/upmem_env.sh simulator + +echo "prim-benchmarks BFS $(git describe --all --long) $(git rev-parse HEAD) $(date -R)" >> ${fn}.txt + +# BFS does not support repeated kernel invocations → repeat it here +parallel -j1 --eta --joblog ${fn}.joblog --resume --header : \ + run_benchmark_nmc nr_dpus={nr_dpus} nr_tasklets=16 data={data} \ + ::: i $(seq 0 4) \ + ::: data data/roadNet-CA.txt data/loc-gowalla_edges.txt \ + ::: nr_dpus 1 2 4 8 16 32 48 64 \ +>> ${fn}.txt diff --git a/BFS/benchmark-scripts/ccmcc25.sh b/BFS/benchmark-scripts/ccmcc25.sh index 436d2eb..0dcf4bb 100755 --- a/BFS/benchmark-scripts/ccmcc25.sh +++ b/BFS/benchmark-scripts/ccmcc25.sh @@ -1,9 +1,6 @@ #!/bin/bash mkdir -p log/$(hostname) -fn=log/$(hostname)/ccmcc25 - -source /opt/upmem/upmem-2025.1.0-Linux-x86_64/upmem_env.sh run_benchmark_nmc() { local "$@" @@ -16,12 +13,20 @@ run_benchmark_nmc() { export -f run_benchmark_nmc -echo "prim-benchmarks BFS $(git describe --all --long) $(git rev-parse HEAD) $(date -R)" >> ${fn}.txt +for sdk in 2023.2.0 2024.1.0 2024.2.0 2025.1.0; do + + fn=log/$(hostname)/ccmcc25-sdk${sdk} + + source /opt/upmem/upmem-${sdk}-Linux-x86_64/upmem_env.sh + + echo "prim-benchmarks BFS $(git describe --all --long) $(git rev-parse HEAD) $(date -R)" >> ${fn}.txt + + # BFS does not support repeated kernel invocations → repeat it here + parallel -j1 --eta --joblog ${fn}.joblog --resume --header : \ + run_benchmark_nmc nr_dpus={nr_dpus} nr_tasklets=16 numa_rank=any data={data} \ + ::: i $(seq 0 10) \ + ::: data data/roadNet-CA.txt data/loc-gowalla_edges.txt \ + ::: nr_dpus 64 128 256 512 768 1024 1536 2048 2304 \ + >> ${fn}.txt -# BFS does not support repeated kernel invocations → repeat it here -parallel -j1 --eta --joblog ${fn}.joblog --resume --header : \ - run_benchmark_nmc nr_dpus={nr_dpus} nr_tasklets=16 numa_rank=any data={data} \ - ::: i $(seq 0 10) \ - ::: data data/roadNet-CA.txt data/loc-gowalla_edges.txt \ - ::: nr_dpus 64 128 256 512 768 1024 1536 2048 2304 \ ->> ${fn}.txt +done diff --git a/BFS/host/app.c b/BFS/host/app.c index 763ff7a..4431193 100644 --- a/BFS/host/app.c +++ b/BFS/host/app.c @@ -186,19 +186,27 @@ int main(int argc, char **argv) PRINT_INFO(p.verbosity >= 2, " Copying data to DPU"); startTimer(&timer, 2, t0ini++); - copyToDPU(dpu, (uint8_t *) dpuNodePtrs_h, dpuNodePtrs_m, - (dpuNumNodes + 1) * sizeof(uint32_t)); - copyToDPU(dpu, (uint8_t *) dpuNeighborIdxs_h, - dpuNeighborIdxs_m, - dpuNumNeighbors * sizeof(uint32_t)); - copyToDPU(dpu, (uint8_t *) dpuNodeLevel_h, - dpuNodeLevel_m, - dpuNumNodes * sizeof(uint32_t)); - copyToDPU(dpu, (uint8_t *) visited, dpuVisited_m, - numNodes / 64 * sizeof(uint64_t)); - copyToDPU(dpu, (uint8_t *) nextFrontier, - dpuNextFrontier_m, - numNodes / 64 * sizeof(uint64_t)); + + DPU_ASSERT(dpu_copy_to(dpu, DPU_MRAM_HEAP_POINTER_NAME, + dpuNodePtrs_m, (uint8_t *) dpuNodePtrs_h, + ROUND_UP_TO_MULTIPLE_OF_8((dpuNumNodes + 1) * sizeof(uint32_t)))); + + DPU_ASSERT(dpu_copy_to(dpu, DPU_MRAM_HEAP_POINTER_NAME, + dpuNeighborIdxs_m, (uint8_t *) dpuNeighborIdxs_h, + ROUND_UP_TO_MULTIPLE_OF_8(dpuNumNeighbors * sizeof(uint32_t)))); + + DPU_ASSERT(dpu_copy_to(dpu, DPU_MRAM_HEAP_POINTER_NAME, + dpuNodeLevel_m, (uint8_t *) dpuNodeLevel_h, + ROUND_UP_TO_MULTIPLE_OF_8(dpuNumNodes * sizeof(uint32_t)))); + + DPU_ASSERT(dpu_copy_to(dpu, DPU_MRAM_HEAP_POINTER_NAME, + dpuVisited_m, (uint8_t *) visited, + ROUND_UP_TO_MULTIPLE_OF_8(numNodes / 64 * sizeof(uint64_t)))); + + DPU_ASSERT(dpu_copy_to(dpu, DPU_MRAM_HEAP_POINTER_NAME, + dpuNextFrontier_m, (uint8_t *) nextFrontier, + ROUND_UP_TO_MULTIPLE_OF_8(numNodes / 64 * sizeof(uint64_t)))); + // NOTE: No need to copy current frontier because it is written before being read stopTimer(&timer, 2); //loadTime += getElapsedTime(timer); @@ -208,8 +216,9 @@ int main(int argc, char **argv) PRINT_INFO(p.verbosity >= 2, " Copying parameters to DPU"); startTimer(&timer, 2, t1ini++); - copyToDPU(dpu, (uint8_t *) & dpuParams[dpuIdx], - dpuParams_m[dpuIdx], sizeof(struct DPUParams)); + DPU_ASSERT(dpu_copy_to(dpu, DPU_MRAM_HEAP_POINTER_NAME, + dpuParams_m[dpuIdx], (uint8_t *) & dpuParams[dpuIdx], + ROUND_UP_TO_MULTIPLE_OF_8(sizeof(struct DPUParams)))); stopTimer(&timer, 2); //loadTime += getElapsedTime(timer); @@ -248,19 +257,15 @@ int main(int argc, char **argv) uint32_t dpuNumNodes = dpuParams[dpuIdx].dpuNumNodes; if (dpuNumNodes > 0) { if (dpuIdx == 0) { - copyFromDPU(dpu, - dpuParams[dpuIdx]. - dpuNextFrontier_m, - (uint8_t *) currentFrontier, - numNodes / 64 * - sizeof(uint64_t)); + DPU_ASSERT(dpu_copy_from(dpu, DPU_MRAM_HEAP_POINTER_NAME, + dpuParams[dpuIdx].dpuNextFrontier_m, + (uint8_t *) currentFrontier, + ROUND_UP_TO_MULTIPLE_OF_8(numNodes / 64 * sizeof(uint64_t)))); } else { - copyFromDPU(dpu, - dpuParams[dpuIdx]. - dpuNextFrontier_m, - (uint8_t *) nextFrontier, - numNodes / 64 * - sizeof(uint64_t)); + DPU_ASSERT(dpu_copy_from(dpu, DPU_MRAM_HEAP_POINTER_NAME, + dpuParams[dpuIdx].dpuNextFrontier_m, + (uint8_t *) nextFrontier, + ROUND_UP_TO_MULTIPLE_OF_8(numNodes / 64 * sizeof(uint64_t)))); for (uint32_t i = 0; i < numNodes / 64; ++i) { currentFrontier[i] |= @@ -287,19 +292,15 @@ int main(int argc, char **argv) dpuParams[dpuIdx].dpuNumNodes; if (dpuNumNodes > 0) { // Copy current frontier to all DPUs (place in next frontier and DPU will update visited and copy to current frontier) - copyToDPU(dpu, - (uint8_t *) currentFrontier, - dpuParams[dpuIdx]. - dpuNextFrontier_m, - numNodes / 64 * - sizeof(uint64_t)); + DPU_ASSERT(dpu_copy_to(dpu, DPU_MRAM_HEAP_POINTER_NAME, + dpuParams[dpuIdx].dpuNextFrontier_m, + (uint8_t *) currentFrontier, + ROUND_UP_TO_MULTIPLE_OF_8(numNodes / 64 * sizeof(uint64_t)))); // Copy new level to DPU dpuParams[dpuIdx].level = level; - copyToDPU(dpu, - (uint8_t *) & - dpuParams[dpuIdx], - dpuParams_m[dpuIdx], - sizeof(struct DPUParams)); + DPU_ASSERT(dpu_copy_to(dpu, DPU_MRAM_HEAP_POINTER_NAME, + dpuParams_m[dpuIdx], (uint8_t *) &dpuParams[dpuIdx], + ROUND_UP_TO_MULTIPLE_OF_8(sizeof(struct DPUParams)))); ++dpuIdx; } } @@ -317,9 +318,10 @@ int main(int argc, char **argv) uint32_t dpuNumNodes = dpuParams[dpuIdx].dpuNumNodes; if (dpuNumNodes > 0) { uint32_t dpuStartNodeIdx = dpuIdx * numNodesPerDPU; - copyFromDPU(dpu, dpuParams[dpuIdx].dpuNodeLevel_m, - (uint8_t *) (nodeLevel + dpuStartNodeIdx), - dpuNumNodes * sizeof(float)); + DPU_ASSERT(dpu_copy_from(dpu, DPU_MRAM_HEAP_POINTER_NAME, + dpuParams[dpuIdx].dpuNodeLevel_m, + (uint8_t *) (nodeLevel + dpuStartNodeIdx), + ROUND_UP_TO_MULTIPLE_OF_8(dpuNumNodes * sizeof(float)))); } ++dpuIdx; } diff --git a/BFS/host/mram-management.h b/BFS/host/mram-management.h index 61616da..a953d6a 100644 --- a/BFS/host/mram-management.h +++ b/BFS/host/mram-management.h @@ -1,6 +1,4 @@ - -#ifndef _MRAM_MANAGEMENT_H_ -#define _MRAM_MANAGEMENT_H_ +#pragma once #include "common.h" #include "utils.h" @@ -29,21 +27,3 @@ static uint32_t mram_heap_alloc(struct mram_heap_allocator_t *allocator, } return ret; } - -static void copyToDPU(struct dpu_set_t dpu, uint8_t *hostPtr, uint32_t mramIdx, - uint32_t size) -{ - DPU_ASSERT(dpu_copy_to - (dpu, DPU_MRAM_HEAP_POINTER_NAME, mramIdx, hostPtr, - ROUND_UP_TO_MULTIPLE_OF_8(size))); -} - -static void copyFromDPU(struct dpu_set_t dpu, uint32_t mramIdx, - uint8_t *hostPtr, uint32_t size) -{ - DPU_ASSERT(dpu_copy_from - (dpu, DPU_MRAM_HEAP_POINTER_NAME, mramIdx, hostPtr, - ROUND_UP_TO_MULTIPLE_OF_8(size))); -} - -#endif |
