summaryrefslogtreecommitdiff
path: root/BFS
diff options
context:
space:
mode:
Diffstat (limited to 'BFS')
-rwxr-xr-xBFS/benchmark-scripts/ccmcc25-sim.sh27
-rwxr-xr-xBFS/benchmark-scripts/ccmcc25.sh27
-rw-r--r--BFS/host/app.c84
-rw-r--r--BFS/host/mram-management.h22
4 files changed, 87 insertions, 73 deletions
diff --git a/BFS/benchmark-scripts/ccmcc25-sim.sh b/BFS/benchmark-scripts/ccmcc25-sim.sh
new file mode 100755
index 0000000..bcbe284
--- /dev/null
+++ b/BFS/benchmark-scripts/ccmcc25-sim.sh
@@ -0,0 +1,27 @@
+#!/bin/bash
+
+mkdir -p log/$(hostname)
+
+run_benchmark_nmc() {
+ local "$@"
+ set -e
+ make -B NR_DPUS=${nr_dpus} NR_TASKLETS=${nr_tasklets} \
+ aspectc=1 aspectc_timing=1 dfatool_timing=0
+ bin/host_code -f ${data} 2>&1
+}
+
+export -f run_benchmark_nmc
+
+fn=log/$(hostname)/ccmcc25-sdk${sdk}-sim
+
+source ~/lib/local/upmem/upmem-2025.1.0-Linux-x86_64/upmem_env.sh simulator
+
+echo "prim-benchmarks BFS $(git describe --all --long) $(git rev-parse HEAD) $(date -R)" >> ${fn}.txt
+
+# BFS does not support repeated kernel invocations → repeat it here
+parallel -j1 --eta --joblog ${fn}.joblog --resume --header : \
+ run_benchmark_nmc nr_dpus={nr_dpus} nr_tasklets=16 data={data} \
+ ::: i $(seq 0 4) \
+ ::: data data/roadNet-CA.txt data/loc-gowalla_edges.txt \
+ ::: nr_dpus 1 2 4 8 16 32 48 64 \
+>> ${fn}.txt
diff --git a/BFS/benchmark-scripts/ccmcc25.sh b/BFS/benchmark-scripts/ccmcc25.sh
index 436d2eb..0dcf4bb 100755
--- a/BFS/benchmark-scripts/ccmcc25.sh
+++ b/BFS/benchmark-scripts/ccmcc25.sh
@@ -1,9 +1,6 @@
#!/bin/bash
mkdir -p log/$(hostname)
-fn=log/$(hostname)/ccmcc25
-
-source /opt/upmem/upmem-2025.1.0-Linux-x86_64/upmem_env.sh
run_benchmark_nmc() {
local "$@"
@@ -16,12 +13,20 @@ run_benchmark_nmc() {
export -f run_benchmark_nmc
-echo "prim-benchmarks BFS $(git describe --all --long) $(git rev-parse HEAD) $(date -R)" >> ${fn}.txt
+for sdk in 2023.2.0 2024.1.0 2024.2.0 2025.1.0; do
+
+ fn=log/$(hostname)/ccmcc25-sdk${sdk}
+
+ source /opt/upmem/upmem-${sdk}-Linux-x86_64/upmem_env.sh
+
+ echo "prim-benchmarks BFS $(git describe --all --long) $(git rev-parse HEAD) $(date -R)" >> ${fn}.txt
+
+ # BFS does not support repeated kernel invocations → repeat it here
+ parallel -j1 --eta --joblog ${fn}.joblog --resume --header : \
+ run_benchmark_nmc nr_dpus={nr_dpus} nr_tasklets=16 numa_rank=any data={data} \
+ ::: i $(seq 0 10) \
+ ::: data data/roadNet-CA.txt data/loc-gowalla_edges.txt \
+ ::: nr_dpus 64 128 256 512 768 1024 1536 2048 2304 \
+ >> ${fn}.txt
-# BFS does not support repeated kernel invocations → repeat it here
-parallel -j1 --eta --joblog ${fn}.joblog --resume --header : \
- run_benchmark_nmc nr_dpus={nr_dpus} nr_tasklets=16 numa_rank=any data={data} \
- ::: i $(seq 0 10) \
- ::: data data/roadNet-CA.txt data/loc-gowalla_edges.txt \
- ::: nr_dpus 64 128 256 512 768 1024 1536 2048 2304 \
->> ${fn}.txt
+done
diff --git a/BFS/host/app.c b/BFS/host/app.c
index 763ff7a..4431193 100644
--- a/BFS/host/app.c
+++ b/BFS/host/app.c
@@ -186,19 +186,27 @@ int main(int argc, char **argv)
PRINT_INFO(p.verbosity >= 2,
" Copying data to DPU");
startTimer(&timer, 2, t0ini++);
- copyToDPU(dpu, (uint8_t *) dpuNodePtrs_h, dpuNodePtrs_m,
- (dpuNumNodes + 1) * sizeof(uint32_t));
- copyToDPU(dpu, (uint8_t *) dpuNeighborIdxs_h,
- dpuNeighborIdxs_m,
- dpuNumNeighbors * sizeof(uint32_t));
- copyToDPU(dpu, (uint8_t *) dpuNodeLevel_h,
- dpuNodeLevel_m,
- dpuNumNodes * sizeof(uint32_t));
- copyToDPU(dpu, (uint8_t *) visited, dpuVisited_m,
- numNodes / 64 * sizeof(uint64_t));
- copyToDPU(dpu, (uint8_t *) nextFrontier,
- dpuNextFrontier_m,
- numNodes / 64 * sizeof(uint64_t));
+
+ DPU_ASSERT(dpu_copy_to(dpu, DPU_MRAM_HEAP_POINTER_NAME,
+ dpuNodePtrs_m, (uint8_t *) dpuNodePtrs_h,
+ ROUND_UP_TO_MULTIPLE_OF_8((dpuNumNodes + 1) * sizeof(uint32_t))));
+
+ DPU_ASSERT(dpu_copy_to(dpu, DPU_MRAM_HEAP_POINTER_NAME,
+ dpuNeighborIdxs_m, (uint8_t *) dpuNeighborIdxs_h,
+ ROUND_UP_TO_MULTIPLE_OF_8(dpuNumNeighbors * sizeof(uint32_t))));
+
+ DPU_ASSERT(dpu_copy_to(dpu, DPU_MRAM_HEAP_POINTER_NAME,
+ dpuNodeLevel_m, (uint8_t *) dpuNodeLevel_h,
+ ROUND_UP_TO_MULTIPLE_OF_8(dpuNumNodes * sizeof(uint32_t))));
+
+ DPU_ASSERT(dpu_copy_to(dpu, DPU_MRAM_HEAP_POINTER_NAME,
+ dpuVisited_m, (uint8_t *) visited,
+ ROUND_UP_TO_MULTIPLE_OF_8(numNodes / 64 * sizeof(uint64_t))));
+
+ DPU_ASSERT(dpu_copy_to(dpu, DPU_MRAM_HEAP_POINTER_NAME,
+ dpuNextFrontier_m, (uint8_t *) nextFrontier,
+ ROUND_UP_TO_MULTIPLE_OF_8(numNodes / 64 * sizeof(uint64_t))));
+
// NOTE: No need to copy current frontier because it is written before being read
stopTimer(&timer, 2);
//loadTime += getElapsedTime(timer);
@@ -208,8 +216,9 @@ int main(int argc, char **argv)
PRINT_INFO(p.verbosity >= 2,
" Copying parameters to DPU");
startTimer(&timer, 2, t1ini++);
- copyToDPU(dpu, (uint8_t *) & dpuParams[dpuIdx],
- dpuParams_m[dpuIdx], sizeof(struct DPUParams));
+ DPU_ASSERT(dpu_copy_to(dpu, DPU_MRAM_HEAP_POINTER_NAME,
+ dpuParams_m[dpuIdx], (uint8_t *) & dpuParams[dpuIdx],
+ ROUND_UP_TO_MULTIPLE_OF_8(sizeof(struct DPUParams))));
stopTimer(&timer, 2);
//loadTime += getElapsedTime(timer);
@@ -248,19 +257,15 @@ int main(int argc, char **argv)
uint32_t dpuNumNodes = dpuParams[dpuIdx].dpuNumNodes;
if (dpuNumNodes > 0) {
if (dpuIdx == 0) {
- copyFromDPU(dpu,
- dpuParams[dpuIdx].
- dpuNextFrontier_m,
- (uint8_t *) currentFrontier,
- numNodes / 64 *
- sizeof(uint64_t));
+ DPU_ASSERT(dpu_copy_from(dpu, DPU_MRAM_HEAP_POINTER_NAME,
+ dpuParams[dpuIdx].dpuNextFrontier_m,
+ (uint8_t *) currentFrontier,
+ ROUND_UP_TO_MULTIPLE_OF_8(numNodes / 64 * sizeof(uint64_t))));
} else {
- copyFromDPU(dpu,
- dpuParams[dpuIdx].
- dpuNextFrontier_m,
- (uint8_t *) nextFrontier,
- numNodes / 64 *
- sizeof(uint64_t));
+ DPU_ASSERT(dpu_copy_from(dpu, DPU_MRAM_HEAP_POINTER_NAME,
+ dpuParams[dpuIdx].dpuNextFrontier_m,
+ (uint8_t *) nextFrontier,
+ ROUND_UP_TO_MULTIPLE_OF_8(numNodes / 64 * sizeof(uint64_t))));
for (uint32_t i = 0; i < numNodes / 64;
++i) {
currentFrontier[i] |=
@@ -287,19 +292,15 @@ int main(int argc, char **argv)
dpuParams[dpuIdx].dpuNumNodes;
if (dpuNumNodes > 0) {
// Copy current frontier to all DPUs (place in next frontier and DPU will update visited and copy to current frontier)
- copyToDPU(dpu,
- (uint8_t *) currentFrontier,
- dpuParams[dpuIdx].
- dpuNextFrontier_m,
- numNodes / 64 *
- sizeof(uint64_t));
+ DPU_ASSERT(dpu_copy_to(dpu, DPU_MRAM_HEAP_POINTER_NAME,
+ dpuParams[dpuIdx].dpuNextFrontier_m,
+ (uint8_t *) currentFrontier,
+ ROUND_UP_TO_MULTIPLE_OF_8(numNodes / 64 * sizeof(uint64_t))));
// Copy new level to DPU
dpuParams[dpuIdx].level = level;
- copyToDPU(dpu,
- (uint8_t *) &
- dpuParams[dpuIdx],
- dpuParams_m[dpuIdx],
- sizeof(struct DPUParams));
+ DPU_ASSERT(dpu_copy_to(dpu, DPU_MRAM_HEAP_POINTER_NAME,
+ dpuParams_m[dpuIdx], (uint8_t *) &dpuParams[dpuIdx],
+ ROUND_UP_TO_MULTIPLE_OF_8(sizeof(struct DPUParams))));
++dpuIdx;
}
}
@@ -317,9 +318,10 @@ int main(int argc, char **argv)
uint32_t dpuNumNodes = dpuParams[dpuIdx].dpuNumNodes;
if (dpuNumNodes > 0) {
uint32_t dpuStartNodeIdx = dpuIdx * numNodesPerDPU;
- copyFromDPU(dpu, dpuParams[dpuIdx].dpuNodeLevel_m,
- (uint8_t *) (nodeLevel + dpuStartNodeIdx),
- dpuNumNodes * sizeof(float));
+ DPU_ASSERT(dpu_copy_from(dpu, DPU_MRAM_HEAP_POINTER_NAME,
+ dpuParams[dpuIdx].dpuNodeLevel_m,
+ (uint8_t *) (nodeLevel + dpuStartNodeIdx),
+ ROUND_UP_TO_MULTIPLE_OF_8(dpuNumNodes * sizeof(float))));
}
++dpuIdx;
}
diff --git a/BFS/host/mram-management.h b/BFS/host/mram-management.h
index 61616da..a953d6a 100644
--- a/BFS/host/mram-management.h
+++ b/BFS/host/mram-management.h
@@ -1,6 +1,4 @@
-
-#ifndef _MRAM_MANAGEMENT_H_
-#define _MRAM_MANAGEMENT_H_
+#pragma once
#include "common.h"
#include "utils.h"
@@ -29,21 +27,3 @@ static uint32_t mram_heap_alloc(struct mram_heap_allocator_t *allocator,
}
return ret;
}
-
-static void copyToDPU(struct dpu_set_t dpu, uint8_t *hostPtr, uint32_t mramIdx,
- uint32_t size)
-{
- DPU_ASSERT(dpu_copy_to
- (dpu, DPU_MRAM_HEAP_POINTER_NAME, mramIdx, hostPtr,
- ROUND_UP_TO_MULTIPLE_OF_8(size)));
-}
-
-static void copyFromDPU(struct dpu_set_t dpu, uint32_t mramIdx,
- uint8_t *hostPtr, uint32_t size)
-{
- DPU_ASSERT(dpu_copy_from
- (dpu, DPU_MRAM_HEAP_POINTER_NAME, mramIdx, hostPtr,
- ROUND_UP_TO_MULTIPLE_OF_8(size)));
-}
-
-#endif