summaryrefslogtreecommitdiff
path: root/BFS
diff options
context:
space:
mode:
Diffstat (limited to 'BFS')
-rw-r--r--BFS/Makefile33
-rwxr-xr-xBFS/benchmark-scripts/ccmcc25-sim.sh27
-rwxr-xr-xBFS/benchmark-scripts/ccmcc25.sh32
-rw-r--r--BFS/dpu/task.c2
-rw-r--r--BFS/host/app.c144
-rw-r--r--BFS/host/mram-management.h26
-rw-r--r--BFS/include/common.h (renamed from BFS/support/common.h)0
-rw-r--r--BFS/include/dfatool_host.ah30
-rw-r--r--BFS/include/graph.h (renamed from BFS/support/graph.h)0
-rw-r--r--BFS/include/params.h (renamed from BFS/support/params.h)0
-rw-r--r--BFS/include/timer.h8
-rw-r--r--BFS/include/utils.h (renamed from BFS/support/utils.h)0
-rwxr-xr-xBFS/run-paper-strong-full.sh23
-rwxr-xr-xBFS/run-paper-strong-rank.sh23
-rwxr-xr-xBFS/run-paper-weak.sh24
-rwxr-xr-xBFS/run.sh25
-rw-r--r--BFS/support/timer.h31
17 files changed, 202 insertions, 226 deletions
diff --git a/BFS/Makefile b/BFS/Makefile
index d43202f..a773b38 100644
--- a/BFS/Makefile
+++ b/BFS/Makefile
@@ -4,17 +4,34 @@ WITH_ALLOC_OVERHEAD ?= 0
WITH_LOAD_OVERHEAD ?= 0
WITH_FREE_OVERHEAD ?= 0
-COMMON_INCLUDES := support
HOST_SOURCES := $(wildcard host/*.c)
DPU_SOURCES := $(wildcard dpu/*.c)
-COMMON_FLAGS := -Wall -Wextra -g -I${COMMON_INCLUDES}
-HOST_FLAGS := ${COMMON_FLAGS} -std=c11 -O3 `dpu-pkg-config --cflags --libs dpu` -DNR_TASKLETS=${NR_TASKLETS} -DNR_DPUS=${NR_DPUS} -DWITH_ALLOC_OVERHEAD=${WITH_ALLOC_OVERHEAD} -DWITH_LOAD_OVERHEAD=${WITH_LOAD_OVERHEAD} -DWITH_FREE_OVERHEAD=${WITH_FREE_OVERHEAD}
+aspectc ?= 0
+aspectc_timing ?= 0
+dfatool_timing ?= 1
+
+HOST_CC := ${CC}
+
+COMMON_FLAGS := -Wall -Wextra -g -Iinclude
+HOST_FLAGS := ${COMMON_FLAGS} -O3 `dpu-pkg-config --cflags --libs dpu` -DNR_TASKLETS=${NR_TASKLETS} -DNR_DPUS=${NR_DPUS} -DWITH_ALLOC_OVERHEAD=${WITH_ALLOC_OVERHEAD} -DWITH_LOAD_OVERHEAD=${WITH_LOAD_OVERHEAD} -DWITH_FREE_OVERHEAD=${WITH_FREE_OVERHEAD} -DDFATOOL_TIMING=${dfatool_timing} -DASPECTC=${aspectc}
DPU_FLAGS := ${COMMON_FLAGS} -O2 -DNR_TASKLETS=${NR_TASKLETS}
+ifeq (${aspectc_timing}, 1)
+ ASPECTC_HOST_FLAGS += -ainclude/dfatool_host_dpu.ah -ainclude/dfatool_host.ah
+endif
+
+ASPECTC_HOST_FLAGS ?= -a0
+
+ifeq (${aspectc}, 1)
+ HOST_CC = ag++ -r repo.acp -v 0 ${ASPECTC_HOST_FLAGS} --c_compiler ${UPMEM_HOME}/bin/clang++ -p . --Xcompiler
+else
+ HOST_FLAGS += -std=c11
+endif
+
QUIET = @
-ifdef verbose
+ifeq (${verbose}, 1)
QUIET =
endif
@@ -23,11 +40,13 @@ all: bin/host_code bin/dpu_code
bin:
${QUIET}mkdir -p bin
-bin/dpu_code: ${DPU_SOURCES} ${COMMON_INCLUDES} bin
+bin/dpu_code: ${DPU_SOURCES} include bin
${QUIET}dpu-upmem-dpurte-clang ${DPU_FLAGS} -o $@ ${DPU_SOURCES}
-bin/host_code: ${HOST_SOURCES} ${COMMON_INCLUDES} bin
- ${QUIET}${CC} -o $@ ${HOST_SOURCES} ${HOST_FLAGS}
+bin/host_code: ${HOST_SOURCES} include bin
+ ${QUIET}cp ../include/dfatool_host_dpu.ah include
+ ${QUIET}${HOST_CC} -o $@ ${HOST_SOURCES} ${HOST_FLAGS}
+ ${QUIET}rm -f include/dfatool_host_dpu.ah
clean:
${QUIET}rm -rf bin
diff --git a/BFS/benchmark-scripts/ccmcc25-sim.sh b/BFS/benchmark-scripts/ccmcc25-sim.sh
new file mode 100755
index 0000000..bcbe284
--- /dev/null
+++ b/BFS/benchmark-scripts/ccmcc25-sim.sh
@@ -0,0 +1,27 @@
+#!/bin/bash
+
+mkdir -p log/$(hostname)
+
+run_benchmark_nmc() {
+ local "$@"
+ set -e
+ make -B NR_DPUS=${nr_dpus} NR_TASKLETS=${nr_tasklets} \
+ aspectc=1 aspectc_timing=1 dfatool_timing=0
+ bin/host_code -f ${data} 2>&1
+}
+
+export -f run_benchmark_nmc
+
+fn=log/$(hostname)/ccmcc25-sdk${sdk}-sim
+
+source ~/lib/local/upmem/upmem-2025.1.0-Linux-x86_64/upmem_env.sh simulator
+
+echo "prim-benchmarks BFS $(git describe --all --long) $(git rev-parse HEAD) $(date -R)" >> ${fn}.txt
+
+# BFS does not support repeated kernel invocations → repeat it here
+parallel -j1 --eta --joblog ${fn}.joblog --resume --header : \
+ run_benchmark_nmc nr_dpus={nr_dpus} nr_tasklets=16 data={data} \
+ ::: i $(seq 0 4) \
+ ::: data data/roadNet-CA.txt data/loc-gowalla_edges.txt \
+ ::: nr_dpus 1 2 4 8 16 32 48 64 \
+>> ${fn}.txt
diff --git a/BFS/benchmark-scripts/ccmcc25.sh b/BFS/benchmark-scripts/ccmcc25.sh
new file mode 100755
index 0000000..0dcf4bb
--- /dev/null
+++ b/BFS/benchmark-scripts/ccmcc25.sh
@@ -0,0 +1,32 @@
+#!/bin/bash
+
+mkdir -p log/$(hostname)
+
+run_benchmark_nmc() {
+ local "$@"
+ set -e
+ sudo limit_ranks_to_numa_node ${numa_rank}
+ make -B NR_DPUS=${nr_dpus} NR_TASKLETS=${nr_tasklets} \
+ aspectc=1 aspectc_timing=1 dfatool_timing=0
+ bin/host_code -f ${data} 2>&1
+}
+
+export -f run_benchmark_nmc
+
+for sdk in 2023.2.0 2024.1.0 2024.2.0 2025.1.0; do
+
+ fn=log/$(hostname)/ccmcc25-sdk${sdk}
+
+ source /opt/upmem/upmem-${sdk}-Linux-x86_64/upmem_env.sh
+
+ echo "prim-benchmarks BFS $(git describe --all --long) $(git rev-parse HEAD) $(date -R)" >> ${fn}.txt
+
+ # BFS does not support repeated kernel invocations → repeat it here
+ parallel -j1 --eta --joblog ${fn}.joblog --resume --header : \
+ run_benchmark_nmc nr_dpus={nr_dpus} nr_tasklets=16 numa_rank=any data={data} \
+ ::: i $(seq 0 10) \
+ ::: data data/roadNet-CA.txt data/loc-gowalla_edges.txt \
+ ::: nr_dpus 64 128 256 512 768 1024 1536 2048 2304 \
+ >> ${fn}.txt
+
+done
diff --git a/BFS/dpu/task.c b/BFS/dpu/task.c
index 44ec214..5275047 100644
--- a/BFS/dpu/task.c
+++ b/BFS/dpu/task.c
@@ -12,7 +12,7 @@
#include <perfcounter.h>
#include "dpu-utils.h"
-#include "../support/common.h"
+#include "common.h"
BARRIER_INIT(my_barrier, NR_TASKLETS);
diff --git a/BFS/host/app.c b/BFS/host/app.c
index 9ba7ffb..4431193 100644
--- a/BFS/host/app.c
+++ b/BFS/host/app.c
@@ -3,9 +3,24 @@
* BFS Host Application Source File
*
*/
+#if ASPECTC
+extern "C" {
+#endif
+
#include <dpu.h>
#include <dpu_log.h>
+#ifndef ENERGY
+#define ENERGY 0
+#endif
+#if ENERGY
+#include <dpu_probe.h>
+#endif
+
+#if ASPECTC
+}
+#endif
+
#include <assert.h>
#include <getopt.h>
#include <stdio.h>
@@ -14,18 +29,11 @@
#include <unistd.h>
#include "mram-management.h"
-#include "../support/common.h"
-#include "../support/graph.h"
-#include "../support/params.h"
-#include "../support/timer.h"
-#include "../support/utils.h"
-
-#ifndef ENERGY
-#define ENERGY 0
-#endif
-#if ENERGY
-#include <dpu_probe.h>
-#endif
+#include "common.h"
+#include "graph.h"
+#include "params.h"
+#include "timer.h"
+#include "utils.h"
#define DPU_BINARY "./bin/dpu_code"
@@ -44,10 +52,6 @@ int main(int argc, char **argv)
double tenergy = 0;
#endif
- printf
- ("WITH_ALLOC_OVERHEAD=%d WITH_LOAD_OVERHEAD=%d WITH_FREE_OVERHEAD=%d\n",
- WITH_ALLOC_OVERHEAD, WITH_LOAD_OVERHEAD, WITH_FREE_OVERHEAD);
-
// Allocate DPUs and load binary
struct dpu_set_t dpu_set, dpu;
uint32_t numDPUs, numRanks;
@@ -59,7 +63,7 @@ int main(int argc, char **argv)
#if WITH_ALLOC_OVERHEAD
stopTimer(&timer, 0);
#else
- timer.time[0] = 0;
+ zeroTimer(&timer, 0);
#endif
#if WITH_LOAD_OVERHEAD
@@ -69,7 +73,7 @@ int main(int argc, char **argv)
#if WITH_LOAD_OVERHEAD
stopTimer(&timer, 0);
#else
- timer.time[1] = 0;
+ zeroTimer(&timer, 1);
#endif
DPU_ASSERT(dpu_get_nr_dpus(dpu_set, &numDPUs));
@@ -86,10 +90,10 @@ int main(int argc, char **argv)
uint32_t numNodes = csrGraph.numNodes;
uint32_t *nodePtrs = csrGraph.nodePtrs;
uint32_t *neighborIdxs = csrGraph.neighborIdxs;
- uint32_t *nodeLevel = calloc(numNodes, sizeof(uint32_t)); // Node's BFS level (initially all 0 meaning not reachable)
- uint64_t *visited = calloc(numNodes / 64, sizeof(uint64_t)); // Bit vector with one bit per node
- uint64_t *currentFrontier = calloc(numNodes / 64, sizeof(uint64_t)); // Bit vector with one bit per node
- uint64_t *nextFrontier = calloc(numNodes / 64, sizeof(uint64_t)); // Bit vector with one bit per node
+ uint32_t *nodeLevel = (uint32_t*)calloc(numNodes, sizeof(uint32_t)); // Node's BFS level (initially all 0 meaning not reachable)
+ uint64_t *visited = (uint64_t*)calloc(numNodes / 64, sizeof(uint64_t)); // Bit vector with one bit per node
+ uint64_t *currentFrontier = (uint64_t*)calloc(numNodes / 64, sizeof(uint64_t)); // Bit vector with one bit per node
+ uint64_t *nextFrontier = (uint64_t*)calloc(numNodes / 64, sizeof(uint64_t)); // Bit vector with one bit per node
setBit(nextFrontier[0], 0); // Initialize frontier to first node
uint32_t level = 1;
@@ -182,19 +186,27 @@ int main(int argc, char **argv)
PRINT_INFO(p.verbosity >= 2,
" Copying data to DPU");
startTimer(&timer, 2, t0ini++);
- copyToDPU(dpu, (uint8_t *) dpuNodePtrs_h, dpuNodePtrs_m,
- (dpuNumNodes + 1) * sizeof(uint32_t));
- copyToDPU(dpu, (uint8_t *) dpuNeighborIdxs_h,
- dpuNeighborIdxs_m,
- dpuNumNeighbors * sizeof(uint32_t));
- copyToDPU(dpu, (uint8_t *) dpuNodeLevel_h,
- dpuNodeLevel_m,
- dpuNumNodes * sizeof(uint32_t));
- copyToDPU(dpu, (uint8_t *) visited, dpuVisited_m,
- numNodes / 64 * sizeof(uint64_t));
- copyToDPU(dpu, (uint8_t *) nextFrontier,
- dpuNextFrontier_m,
- numNodes / 64 * sizeof(uint64_t));
+
+ DPU_ASSERT(dpu_copy_to(dpu, DPU_MRAM_HEAP_POINTER_NAME,
+ dpuNodePtrs_m, (uint8_t *) dpuNodePtrs_h,
+ ROUND_UP_TO_MULTIPLE_OF_8((dpuNumNodes + 1) * sizeof(uint32_t))));
+
+ DPU_ASSERT(dpu_copy_to(dpu, DPU_MRAM_HEAP_POINTER_NAME,
+ dpuNeighborIdxs_m, (uint8_t *) dpuNeighborIdxs_h,
+ ROUND_UP_TO_MULTIPLE_OF_8(dpuNumNeighbors * sizeof(uint32_t))));
+
+ DPU_ASSERT(dpu_copy_to(dpu, DPU_MRAM_HEAP_POINTER_NAME,
+ dpuNodeLevel_m, (uint8_t *) dpuNodeLevel_h,
+ ROUND_UP_TO_MULTIPLE_OF_8(dpuNumNodes * sizeof(uint32_t))));
+
+ DPU_ASSERT(dpu_copy_to(dpu, DPU_MRAM_HEAP_POINTER_NAME,
+ dpuVisited_m, (uint8_t *) visited,
+ ROUND_UP_TO_MULTIPLE_OF_8(numNodes / 64 * sizeof(uint64_t))));
+
+ DPU_ASSERT(dpu_copy_to(dpu, DPU_MRAM_HEAP_POINTER_NAME,
+ dpuNextFrontier_m, (uint8_t *) nextFrontier,
+ ROUND_UP_TO_MULTIPLE_OF_8(numNodes / 64 * sizeof(uint64_t))));
+
// NOTE: No need to copy current frontier because it is written before being read
stopTimer(&timer, 2);
//loadTime += getElapsedTime(timer);
@@ -204,8 +216,9 @@ int main(int argc, char **argv)
PRINT_INFO(p.verbosity >= 2,
" Copying parameters to DPU");
startTimer(&timer, 2, t1ini++);
- copyToDPU(dpu, (uint8_t *) & dpuParams[dpuIdx],
- dpuParams_m[dpuIdx], sizeof(struct DPUParams));
+ DPU_ASSERT(dpu_copy_to(dpu, DPU_MRAM_HEAP_POINTER_NAME,
+ dpuParams_m[dpuIdx], (uint8_t *) & dpuParams[dpuIdx],
+ ROUND_UP_TO_MULTIPLE_OF_8(sizeof(struct DPUParams))));
stopTimer(&timer, 2);
//loadTime += getElapsedTime(timer);
@@ -244,19 +257,15 @@ int main(int argc, char **argv)
uint32_t dpuNumNodes = dpuParams[dpuIdx].dpuNumNodes;
if (dpuNumNodes > 0) {
if (dpuIdx == 0) {
- copyFromDPU(dpu,
- dpuParams[dpuIdx].
- dpuNextFrontier_m,
- (uint8_t *) currentFrontier,
- numNodes / 64 *
- sizeof(uint64_t));
+ DPU_ASSERT(dpu_copy_from(dpu, DPU_MRAM_HEAP_POINTER_NAME,
+ dpuParams[dpuIdx].dpuNextFrontier_m,
+ (uint8_t *) currentFrontier,
+ ROUND_UP_TO_MULTIPLE_OF_8(numNodes / 64 * sizeof(uint64_t))));
} else {
- copyFromDPU(dpu,
- dpuParams[dpuIdx].
- dpuNextFrontier_m,
- (uint8_t *) nextFrontier,
- numNodes / 64 *
- sizeof(uint64_t));
+ DPU_ASSERT(dpu_copy_from(dpu, DPU_MRAM_HEAP_POINTER_NAME,
+ dpuParams[dpuIdx].dpuNextFrontier_m,
+ (uint8_t *) nextFrontier,
+ ROUND_UP_TO_MULTIPLE_OF_8(numNodes / 64 * sizeof(uint64_t))));
for (uint32_t i = 0; i < numNodes / 64;
++i) {
currentFrontier[i] |=
@@ -283,19 +292,15 @@ int main(int argc, char **argv)
dpuParams[dpuIdx].dpuNumNodes;
if (dpuNumNodes > 0) {
// Copy current frontier to all DPUs (place in next frontier and DPU will update visited and copy to current frontier)
- copyToDPU(dpu,
- (uint8_t *) currentFrontier,
- dpuParams[dpuIdx].
- dpuNextFrontier_m,
- numNodes / 64 *
- sizeof(uint64_t));
+ DPU_ASSERT(dpu_copy_to(dpu, DPU_MRAM_HEAP_POINTER_NAME,
+ dpuParams[dpuIdx].dpuNextFrontier_m,
+ (uint8_t *) currentFrontier,
+ ROUND_UP_TO_MULTIPLE_OF_8(numNodes / 64 * sizeof(uint64_t))));
// Copy new level to DPU
dpuParams[dpuIdx].level = level;
- copyToDPU(dpu,
- (uint8_t *) &
- dpuParams[dpuIdx],
- dpuParams_m[dpuIdx],
- sizeof(struct DPUParams));
+ DPU_ASSERT(dpu_copy_to(dpu, DPU_MRAM_HEAP_POINTER_NAME,
+ dpuParams_m[dpuIdx], (uint8_t *) &dpuParams[dpuIdx],
+ ROUND_UP_TO_MULTIPLE_OF_8(sizeof(struct DPUParams))));
++dpuIdx;
}
}
@@ -313,9 +318,10 @@ int main(int argc, char **argv)
uint32_t dpuNumNodes = dpuParams[dpuIdx].dpuNumNodes;
if (dpuNumNodes > 0) {
uint32_t dpuStartNodeIdx = dpuIdx * numNodesPerDPU;
- copyFromDPU(dpu, dpuParams[dpuIdx].dpuNodeLevel_m,
- (uint8_t *) (nodeLevel + dpuStartNodeIdx),
- dpuNumNodes * sizeof(float));
+ DPU_ASSERT(dpu_copy_from(dpu, DPU_MRAM_HEAP_POINTER_NAME,
+ dpuParams[dpuIdx].dpuNodeLevel_m,
+ (uint8_t *) (nodeLevel + dpuStartNodeIdx),
+ ROUND_UP_TO_MULTIPLE_OF_8(dpuNumNodes * sizeof(float))));
}
++dpuIdx;
}
@@ -325,7 +331,7 @@ int main(int argc, char **argv)
// Calculating result on CPU
PRINT_INFO(p.verbosity >= 1, "Calculating result on CPU");
- uint32_t *nodeLevelReference = calloc(numNodes, sizeof(uint32_t)); // Node's BFS level (initially all 0 meaning not reachable)
+ uint32_t *nodeLevelReference = (uint32_t*) calloc(numNodes, sizeof(uint32_t)); // Node's BFS level (initially all 0 meaning not reachable)
memset(nextFrontier, 0, numNodes / 64 * sizeof(uint64_t));
setBit(nextFrontier[0], 0); // Initialize frontier to first node
nextFrontierEmpty = 0;
@@ -395,7 +401,7 @@ int main(int argc, char **argv)
#if WITH_FREE_OVERHEAD
stopTimer(&timer, 7);
#else
- timer.time[7] = 0;
+ zeroTimer(&timer, 7);
#endif
// Verify the result
@@ -412,9 +418,9 @@ int main(int argc, char **argv)
}
if (status) {
- printf
+ dfatool_printf
("[::] BFS-UMEM | n_dpus=%d n_ranks=%d n_tasklets=%d e_type=%s n_elements=%d "
- "| throughput_pim_MBps=%f throughput_MBps=%f", numDPUs,
+ "| throughput_pim_MBps=%f throughput_MBps=%f", numDPUs, numRanks,
NR_TASKLETS, "uint32_t", numNodes,
numNodes * sizeof(uint32_t) / (timer.time[2] +
timer.time[3]),
@@ -423,12 +429,12 @@ int main(int argc, char **argv)
timer.time[2] +
timer.time[3] +
timer.time[4]));
- printf(" throughput_pim_MOpps=%f throughput_MOpps=%f",
+ dfatool_printf(" throughput_pim_MOpps=%f throughput_MOpps=%f",
numNodes / (timer.time[2] + timer.time[3]),
numNodes / (timer.time[0] + timer.time[1] +
timer.time[2] + timer.time[3] +
timer.time[4]));
- printf
+ dfatool_printf
(" latency_alloc_us=%f latency_load_us=%f latency_write_us=%f latency_kernel_us=%f latency_sync_us=%f latency_read_us=%f latency_cpu_us=%f latency_free_us=%f\n",
timer.time[0], timer.time[1], timer.time[2], timer.time[3],
timer.time[4], timer.time[5], timer.time[6],
diff --git a/BFS/host/mram-management.h b/BFS/host/mram-management.h
index f2ee031..a953d6a 100644
--- a/BFS/host/mram-management.h
+++ b/BFS/host/mram-management.h
@@ -1,9 +1,7 @@
+#pragma once
-#ifndef _MRAM_MANAGEMENT_H_
-#define _MRAM_MANAGEMENT_H_
-
-#include "../support/common.h"
-#include "../support/utils.h"
+#include "common.h"
+#include "utils.h"
#define DPU_CAPACITY (64 << 20) // A DPU's capacity is 64 MiB
@@ -29,21 +27,3 @@ static uint32_t mram_heap_alloc(struct mram_heap_allocator_t *allocator,
}
return ret;
}
-
-static void copyToDPU(struct dpu_set_t dpu, uint8_t *hostPtr, uint32_t mramIdx,
- uint32_t size)
-{
- DPU_ASSERT(dpu_copy_to
- (dpu, DPU_MRAM_HEAP_POINTER_NAME, mramIdx, hostPtr,
- ROUND_UP_TO_MULTIPLE_OF_8(size)));
-}
-
-static void copyFromDPU(struct dpu_set_t dpu, uint32_t mramIdx,
- uint8_t *hostPtr, uint32_t size)
-{
- DPU_ASSERT(dpu_copy_from
- (dpu, DPU_MRAM_HEAP_POINTER_NAME, mramIdx, hostPtr,
- ROUND_UP_TO_MULTIPLE_OF_8(size)));
-}
-
-#endif
diff --git a/BFS/support/common.h b/BFS/include/common.h
index 5f2aa0d..5f2aa0d 100644
--- a/BFS/support/common.h
+++ b/BFS/include/common.h
diff --git a/BFS/include/dfatool_host.ah b/BFS/include/dfatool_host.ah
new file mode 100644
index 0000000..b2677e1
--- /dev/null
+++ b/BFS/include/dfatool_host.ah
@@ -0,0 +1,30 @@
+#pragma once
+
+#include <sys/time.h>
+#include "dfatool_host_dpu.ah"
+
+aspect DfatoolHostTiming : public DfatoolHostDPUTiming {
+
+ unsigned long input_size;
+ unsigned int element_size;
+
+ virtual int getKernel() { return 1; }
+
+ DfatoolHostTiming() {
+ element_size = sizeof(uint32_t);
+ }
+
+ advice call("% input_params(...)"): after() {
+ printf("[>>] BFS | n_dpus=%u\n", NR_DPUS);
+ }
+
+ advice call("% coo2csr(...)") : after() {
+ struct CSRGraph *g = tjp->result();
+ input_size = g->numNodes;
+ printf("[--] BFS | n_dpus=%u n_nodes=%lu\n", NR_DPUS, input_size);
+ }
+
+ advice execution("% main(...)") : after() {
+ printf("[<<] BFS | n_dpus=%u n_nodes=%lu\n", NR_DPUS, input_size);
+ }
+};
diff --git a/BFS/support/graph.h b/BFS/include/graph.h
index 2a19f67..2a19f67 100644
--- a/BFS/support/graph.h
+++ b/BFS/include/graph.h
diff --git a/BFS/support/params.h b/BFS/include/params.h
index f9169bc..f9169bc 100644
--- a/BFS/support/params.h
+++ b/BFS/include/params.h
diff --git a/BFS/include/timer.h b/BFS/include/timer.h
new file mode 100644
index 0000000..e85490f
--- /dev/null
+++ b/BFS/include/timer.h
@@ -0,0 +1,8 @@
+#pragma once
+
+#define N_TIMERS 8
+#define startTimer start
+#define stopTimer stop
+#define zeroTimer zero
+#include "../../include/timer_base.h"
+#undef N_TIMERS
diff --git a/BFS/support/utils.h b/BFS/include/utils.h
index ccd8fbd..ccd8fbd 100644
--- a/BFS/support/utils.h
+++ b/BFS/include/utils.h
diff --git a/BFS/run-paper-strong-full.sh b/BFS/run-paper-strong-full.sh
deleted file mode 100755
index 42806a2..0000000
--- a/BFS/run-paper-strong-full.sh
+++ /dev/null
@@ -1,23 +0,0 @@
-#!/bin/bash
-
-set -e
-
-(
-
-echo "prim-benchmarks BFS strong-full (dfatool edition)"
-echo "Started at $(date)"
-echo "Revision $(git describe --always)"
-
-# >2048 are not part of upstream
-for nr_dpus in 2543 2304 256 512 1024 2048; do
- for nr_tasklets in 1 2 4 8 16; do
- echo
- if make -B NR_DPUS=${nr_dpus} NR_TASKLETS=${nr_tasklets} verbose=1; then
- # repetition is not part of upstream setup
- for i in `seq 1 50`; do
- timeout --foreground -k 1m 5m bin/host_code -f data/loc-gowalla_edges.txt || true
- done
- fi
- done
-done
-) | tee log-paper-strong-full.txt
diff --git a/BFS/run-paper-strong-rank.sh b/BFS/run-paper-strong-rank.sh
deleted file mode 100755
index e01d18a..0000000
--- a/BFS/run-paper-strong-rank.sh
+++ /dev/null
@@ -1,23 +0,0 @@
-#!/bin/bash
-
-set -e
-
-(
-
-echo "prim-benchmarks BFS strong-rank (dfatool edition)"
-echo "Started at $(date)"
-echo "Revision $(git describe --always)"
-
-# >64 are not part of upstream
-for nr_dpus in 128 1 4 16 64; do
- for nr_tasklets in 1 2 4 8 16; do
- echo
- if make -B NR_DPUS=${nr_dpus} NR_TASKLETS=${nr_tasklets} verbose=1; then
- # repetition is not part of upstream setup
- for i in `seq 1 50`; do
- timeout --foreground -k 1m 5m bin/host_code -f data/loc-gowalla_edges.txt || true
- done
- fi
- done
-done
-) | tee log-paper-strong-rank.txt
diff --git a/BFS/run-paper-weak.sh b/BFS/run-paper-weak.sh
deleted file mode 100755
index 121758a..0000000
--- a/BFS/run-paper-weak.sh
+++ /dev/null
@@ -1,24 +0,0 @@
-#!/bin/bash
-
-set -e
-
-(
-
-echo "prim-benchmarks BFS weak (dfatool edition)"
-echo "Started at $(date)"
-echo "Revision $(git describe --always)"
-
-# 256 and 512 are not part of upstream
-for nr_dpus in 256 512 1 4 16 64; do
- for nr_tasklets in 1 2 4 8 16; do
- echo
- if make -B NR_DPUS=${nr_dpus} NR_TASKLETS=${nr_tasklets} verbose=1; then
- # repetition is not part of upstream setup
- for i in `seq 1 50`; do
- # upstream code uses some kind of generated rMat graphs, but does not provide instructions for reproduction
- timeout --foreground -k 1m 3m bin/host_code -f data/loc-gowalla_edges.txt || true
- done
- fi
- done
-done |
-) tee log-paper-weak.txt
diff --git a/BFS/run.sh b/BFS/run.sh
deleted file mode 100755
index 8f5bfb8..0000000
--- a/BFS/run.sh
+++ /dev/null
@@ -1,25 +0,0 @@
-#!/bin/bash
-
-set -e
-
-# -f: input file (i.e., input size)
-# bin/host_code -f data/loc-gowalla_edges.txt
-
-# input size depends on file -> strong scaling only
-
-echo "prim-benchmarks BFS (dfatool edition)"
-echo "Started at $(date)"
-echo "Revision $(git describe --always)"
-
-for nr_dpus in 1 2 4 8 16 32 64 128 256 512; do
- for nr_tasklets in 1 2 3 4 6 8 10 12 16 20 24; do
- for f in loc-gowalla_edges roadNet-CA; do
- echo
- if make -B NR_DPUS=${nr_dpus} NR_TASKLETS=${nr_tasklets}; then
- for i in `seq 1 20`; do
- timeout --foreground -k 1m 30m bin/host_code -f data/${f}.txt || true
- done
- fi
- done
- done
-done
diff --git a/BFS/support/timer.h b/BFS/support/timer.h
deleted file mode 100644
index 63b5567..0000000
--- a/BFS/support/timer.h
+++ /dev/null
@@ -1,31 +0,0 @@
-
-#ifndef _TIMER_H_
-#define _TIMER_H_
-
-#include <stdio.h>
-#include <sys/time.h>
-
-typedef struct Timer {
- struct timeval startTime[8];
- struct timeval stopTime[8];
- double time[8];
-} Timer;
-
-static void startTimer(Timer *timer, int i, int rep)
-{
- if (rep == 0) {
- timer->time[i] = 0.0;
- }
- gettimeofday(&timer->startTime[i], NULL);
-}
-
-static void stopTimer(Timer *timer, int i)
-{
- gettimeofday(&timer->stopTime[i], NULL);
- timer->time[i] +=
- (timer->stopTime[i].tv_sec -
- timer->startTime[i].tv_sec) * 1000000.0 +
- (timer->stopTime[i].tv_usec - timer->startTime[i].tv_usec);
-}
-
-#endif