summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDaniel Friesel <daniel.friesel@uos.de>2023-05-25 10:24:36 +0200
committerDaniel Friesel <daniel.friesel@uos.de>2023-05-25 10:24:36 +0200
commit1aad3d349b0bdd2e7eae586ec6a7400e8ba9225e (patch)
treef82dafa7674896f5773e976a9056f79c416dc69e
parentb3235ccc79c0783e95c1c5524b8b421d2d048cf7 (diff)
port BFS CPU baseline to dfatool
-rw-r--r--BFS/baselines/cpu/Makefile28
-rw-r--r--BFS/baselines/cpu/app.c231
-rwxr-xr-xBFS/baselines/cpu/run.sh14
-rw-r--r--BFS/support/timer.h27
4 files changed, 188 insertions, 112 deletions
diff --git a/BFS/baselines/cpu/Makefile b/BFS/baselines/cpu/Makefile
index 895d38b..1b1d57c 100644
--- a/BFS/baselines/cpu/Makefile
+++ b/BFS/baselines/cpu/Makefile
@@ -1,7 +1,27 @@
-all:
- gcc -o bfs -fopenmp app.c
+.PHONY: all
+all: bfs
-clean:
- rm bfs
+bfs: app.c
+ gcc -O2 -o bfs -fopenmp app.c
+
+bfs_O0: app.c
+ gcc -o bfs_O0 -fopenmp app.c
+
+bfs_O2: app.c
+ gcc -O2 -o bfs_O2 -fopenmp app.c
+.PHONY: run
+run: bfs
+ ./bfs -f ../../data/loc-gowalla_edges.txt
+.PHONY: run_O0
+run_O0: bfs_O0
+ OMP_NUM_THREADS=4 ./bfs_O0 -f ../../data/loc-gowalla_edges.txt
+
+.PHONY: run_O2
+run_O2: bfs_O2
+ OMP_NUM_THREADS=4 ./bfs_O2 -f ../../data/loc-gowalla_edges.txt
+
+.PHONY: clean
+clean:
+ rm -f bfs bfs_O0 bfs_O2
diff --git a/BFS/baselines/cpu/app.c b/BFS/baselines/cpu/app.c
index f75a877..65cd420 100644
--- a/BFS/baselines/cpu/app.c
+++ b/BFS/baselines/cpu/app.c
@@ -23,123 +23,158 @@ int main(int argc, char** argv) {
PRINT_INFO(p.verbosity >= 1, "Reading graph %s", p.fileName);
struct COOGraph cooGraph = readCOOGraph(p.fileName);
PRINT_INFO(p.verbosity >= 1, " Graph has %d nodes and %d edges", cooGraph.numNodes, cooGraph.numEdges);
- struct CSRGraph csrGraph = coo2csr(cooGraph);
- uint32_t* nodeLevel = (uint32_t*) malloc(csrGraph.numNodes*sizeof(uint32_t));
- uint32_t* nodeLevelRef = (uint32_t*) malloc(csrGraph.numNodes*sizeof(uint32_t));
- for(uint32_t i = 0; i < csrGraph.numNodes; ++i) {
- nodeLevel[i] = UINT32_MAX; // Unreachable
- nodeLevelRef[i] = UINT32_MAX; // Unreachable
- }
- uint32_t srcNode = 0;
- // Initialize frontier double buffers
- uint32_t* buffer1 = (uint32_t*) malloc(csrGraph.numNodes*sizeof(uint32_t));
- uint32_t* buffer2 = (uint32_t*) malloc(csrGraph.numNodes*sizeof(uint32_t));
- uint32_t* prevFrontier = buffer1;
- uint32_t* currFrontier = buffer2;
- // Calculating result on CPU
- PRINT_INFO(p.verbosity >= 1, "Calculating result on CPU (OpenMP)");
- omp_set_num_threads(4);
Timer timer;
- startTimer(&timer);
- nodeLevel[srcNode] = 0;
- prevFrontier[0] = srcNode;
- uint32_t numPrevFrontier = 1;
- for(uint32_t level = 1; numPrevFrontier > 0; ++level) {
-
- uint32_t numCurrFrontier = 0;
-
- // Visit nodes in the previous frontier
- #pragma omp parallel for
- for(uint32_t i = 0; i < numPrevFrontier; ++i) {
- uint32_t node = prevFrontier[i];
- for(uint32_t edge = csrGraph.nodePtrs[node]; edge < csrGraph.nodePtrs[node + 1]; ++edge) {
- uint32_t neighbor = csrGraph.neighborIdxs[edge];
- uint32_t justVisited = 0;
- #pragma omp critical
- {
- if(nodeLevel[neighbor] == UINT32_MAX) { // Node not previously visited
- nodeLevel[neighbor] = level;
- justVisited = 1;
- }
- }
- if(justVisited) {
- uint32_t currFrontierIdx;
+ for(int rep = 0; rep < 100; rep++) {
+
+ struct CSRGraph csrGraph = coo2csr(cooGraph);
+ uint32_t* nodeLevel = (uint32_t*) malloc(csrGraph.numNodes*sizeof(uint32_t));
+ uint32_t* nodeLevelRef = (uint32_t*) malloc(csrGraph.numNodes*sizeof(uint32_t));
+ for(uint32_t i = 0; i < csrGraph.numNodes; ++i) {
+ nodeLevel[i] = UINT32_MAX; // Unreachable
+ nodeLevelRef[i] = UINT32_MAX; // Unreachable
+ }
+ uint32_t srcNode = 0;
+
+ // Initialize frontier double buffers
+ uint32_t* buffer1 = (uint32_t*) malloc(csrGraph.numNodes*sizeof(uint32_t));
+ uint32_t* buffer2 = (uint32_t*) malloc(csrGraph.numNodes*sizeof(uint32_t));
+ uint32_t* prevFrontier = buffer1;
+ uint32_t* currFrontier = buffer2;
+
+ // Calculating result on CPU
+ startTimer(&timer, 0, 0);
+ nodeLevel[srcNode] = 0;
+ prevFrontier[0] = srcNode;
+ uint32_t numPrevFrontier = 1;
+ for(uint32_t level = 1; numPrevFrontier > 0; ++level) {
+
+ uint32_t numCurrFrontier = 0;
+
+ // Visit nodes in the previous frontier
+ #pragma omp parallel for
+ for(uint32_t i = 0; i < numPrevFrontier; ++i) {
+ uint32_t node = prevFrontier[i];
+ for(uint32_t edge = csrGraph.nodePtrs[node]; edge < csrGraph.nodePtrs[node + 1]; ++edge) {
+ uint32_t neighbor = csrGraph.neighborIdxs[edge];
+ uint32_t justVisited = 0;
#pragma omp critical
{
- currFrontierIdx = numCurrFrontier++;
+ if(nodeLevel[neighbor] == UINT32_MAX) { // Node not previously visited
+ nodeLevel[neighbor] = level;
+ justVisited = 1;
+ }
+ }
+ if(justVisited) {
+ uint32_t currFrontierIdx;
+ #pragma omp critical
+ {
+ currFrontierIdx = numCurrFrontier++;
+ }
+ currFrontier[currFrontierIdx] = neighbor;
}
- currFrontier[currFrontierIdx] = neighbor;
}
}
+
+ // Swap buffers
+ uint32_t* tmp = prevFrontier;
+ prevFrontier = currFrontier;
+ currFrontier = tmp;
+ numPrevFrontier = numCurrFrontier;
+
}
-
- // Swap buffers
- uint32_t* tmp = prevFrontier;
- prevFrontier = currFrontier;
- currFrontier = tmp;
- numPrevFrontier = numCurrFrontier;
-
- }
- stopTimer(&timer);
- if(p.verbosity == 0) PRINT("%f", getElapsedTime(timer)*1e3);
- PRINT_INFO(p.verbosity >= 1, "Elapsed time: %f ms", getElapsedTime(timer)*1e3);
-
- // Calculating result on CPU sequentially
- PRINT_INFO(p.verbosity >= 1, "Calculating result on CPU (sequential)");
- startTimer(&timer);
- nodeLevelRef[srcNode] = 0;
- prevFrontier[0] = srcNode;
- numPrevFrontier = 1;
- for(uint32_t level = 1; numPrevFrontier > 0; ++level) {
-
- uint32_t numCurrFrontier = 0;
-
- // Visit nodes in the previous frontier
- for(uint32_t i = 0; i < numPrevFrontier; ++i) {
- uint32_t node = prevFrontier[i];
- for(uint32_t edge = csrGraph.nodePtrs[node]; edge < csrGraph.nodePtrs[node + 1]; ++edge) {
- uint32_t neighbor = csrGraph.neighborIdxs[edge];
- uint32_t justVisited = 0;
- if(nodeLevelRef[neighbor] == UINT32_MAX) { // Node not previously visited
- nodeLevelRef[neighbor] = level;
- justVisited = 1;
- }
- if(justVisited) {
- uint32_t currFrontierIdx;
- currFrontierIdx = numCurrFrontier++;
- currFrontier[currFrontierIdx] = neighbor;
+ stopTimer(&timer, 0);
+
+ freeCSRGraph(csrGraph);
+ free(buffer1);
+ free(buffer2);
+
+ csrGraph = coo2csr(cooGraph);
+ srcNode = 0;
+
+ // Initialize frontier double buffers
+ buffer1 = (uint32_t*) malloc(csrGraph.numNodes*sizeof(uint32_t));
+ buffer2 = (uint32_t*) malloc(csrGraph.numNodes*sizeof(uint32_t));
+ prevFrontier = buffer1;
+ currFrontier = buffer2;
+
+ // Calculating result on CPU sequentially
+ startTimer(&timer, 1, 0);
+ nodeLevelRef[srcNode] = 0;
+ prevFrontier[0] = srcNode;
+ numPrevFrontier = 1;
+ for(uint32_t level = 1; numPrevFrontier > 0; ++level) {
+
+ uint32_t numCurrFrontier = 0;
+
+ // Visit nodes in the previous frontier
+ for(uint32_t i = 0; i < numPrevFrontier; ++i) {
+ uint32_t node = prevFrontier[i];
+ for(uint32_t edge = csrGraph.nodePtrs[node]; edge < csrGraph.nodePtrs[node + 1]; ++edge) {
+ uint32_t neighbor = csrGraph.neighborIdxs[edge];
+ uint32_t justVisited = 0;
+ if(nodeLevelRef[neighbor] == UINT32_MAX) { // Node not previously visited
+ nodeLevelRef[neighbor] = level;
+ justVisited = 1;
+ }
+ if(justVisited) {
+ uint32_t currFrontierIdx;
+ currFrontierIdx = numCurrFrontier++;
+ currFrontier[currFrontierIdx] = neighbor;
+ }
}
}
+
+ // Swap buffers
+ uint32_t* tmp = prevFrontier;
+ prevFrontier = currFrontier;
+ currFrontier = tmp;
+ numPrevFrontier = numCurrFrontier;
+
+ }
+ stopTimer(&timer, 1);
+
+ unsigned int nr_threads = 0;
+#pragma omp parallel
+#pragma omp atomic
+ nr_threads++;
+
+ // Verifying result
+ int isOK = 1;
+ for(uint32_t nodeIdx = 0; nodeIdx < csrGraph.numNodes; ++nodeIdx) {
+ if(nodeLevel[nodeIdx] != nodeLevelRef[nodeIdx]) {
+ PRINT_ERROR("Mismatch at node %u (CPU sequential result = level %u, CPU parallel result = level %u)", nodeIdx, nodeLevelRef[nodeIdx], nodeLevel[nodeIdx]);
+ isOK = 0;
+ }
}
- // Swap buffers
- uint32_t* tmp = prevFrontier;
- prevFrontier = currFrontier;
- currFrontier = tmp;
- numPrevFrontier = numCurrFrontier;
-
- }
- stopTimer(&timer);
- if(p.verbosity == 0) PRINT("%f", getElapsedTime(timer)*1e3);
- PRINT_INFO(p.verbosity >= 1, "Elapsed time: %f ms", getElapsedTime(timer)*1e3);
-
- // Verifying result
- PRINT_INFO(p.verbosity >= 1, "Verifying the result");
- for(uint32_t nodeIdx = 0; nodeIdx < csrGraph.numNodes; ++nodeIdx) {
- if(nodeLevel[nodeIdx] != nodeLevelRef[nodeIdx]) {
- PRINT_ERROR("Mismatch at node %u (CPU sequential result = level %u, CPU parallel result = level %u)", nodeIdx, nodeLevelRef[nodeIdx], nodeLevel[nodeIdx]);
+ if (isOK) {
+ printf("[::] n_threads=%d e_type=%s n_elements=%d "
+ "| throughput_cpu_ref_MBps=%f throughput_cpu_omp_MBps=%f\n",
+ nr_threads, "uint32_t", csrGraph.numNodes,
+ csrGraph.numNodes * sizeof(uint32_t) / timer.time[1],
+ csrGraph.numNodes * sizeof(uint32_t) / timer.time[0]);
+ printf("[::] n_threads=%d e_type=%s n_elements=%d "
+ "| throughput_cpu_ref_MOpps=%f throughput_cpu_omp_MOpps=%f\n",
+ nr_threads, "uint32_t", csrGraph.numNodes,
+ csrGraph.numNodes / timer.time[1],
+ csrGraph.numNodes / timer.time[0]);
+ printf("[::] n_threads=%d e_type=%s n_elements=%d |",
+ nr_threads, "uint32_t", csrGraph.numNodes);
+ printAll(&timer, 1);
}
+
+ freeCSRGraph(csrGraph);
+ free(nodeLevel);
+ free(nodeLevelRef);
+ free(buffer1);
+ free(buffer2);
}
// Deallocate data structures
freeCOOGraph(cooGraph);
- freeCSRGraph(csrGraph);
- free(nodeLevel);
- free(buffer1);
- free(buffer2);
return 0;
diff --git a/BFS/baselines/cpu/run.sh b/BFS/baselines/cpu/run.sh
new file mode 100755
index 0000000..cbed050
--- /dev/null
+++ b/BFS/baselines/cpu/run.sh
@@ -0,0 +1,14 @@
+#!/bin/sh
+
+set -e
+
+echo "prim-benchmarks BFS CPU (dfatool edition)"
+echo "Started at $(date)"
+echo "Revision $(git describe --always)"
+
+# default threads: 4
+
+make
+for nr_threads in 1 2 4 6 8 12 16 20 24 32; do
+ OMP_NUM_THREADS=${nr_threads} timeout -k 1m 30m ./bfs -f ../../data/loc-gowalla_edges.txt || true
+done
diff --git a/BFS/support/timer.h b/BFS/support/timer.h
index f8cd5fc..23116e3 100644
--- a/BFS/support/timer.h
+++ b/BFS/support/timer.h
@@ -6,22 +6,29 @@
#include <sys/time.h>
typedef struct Timer {
- struct timeval startTime;
- struct timeval endTime;
+ struct timeval startTime[4];
+ struct timeval stopTime[4];
+ double time[4];
} Timer;
-static void startTimer(Timer* timer) {
- gettimeofday(&(timer->startTime), NULL);
+static void startTimer(Timer *timer, int i, int rep) {
+ if(rep == 0) {
+ timer->time[i] = 0.0;
+ }
+ gettimeofday(&timer->startTime[i], NULL);
}
-static void stopTimer(Timer* timer) {
- gettimeofday(&(timer->endTime), NULL);
+static void stopTimer(Timer *timer, int i) {
+ gettimeofday(&timer->stopTime[i], NULL);
+ timer->time[i] += (timer->stopTime[i].tv_sec - timer->startTime[i].tv_sec) * 1000000.0 +
+ (timer->stopTime[i].tv_usec - timer->startTime[i].tv_usec);
}
-static float getElapsedTime(Timer timer) {
- return ((float) ((timer.endTime.tv_sec - timer.startTime.tv_sec)
- + (timer.endTime.tv_usec - timer.startTime.tv_usec)/1.0e6));
+static void printAll(Timer *timer, int maxt) {
+ for (int i = 0; i <= maxt; i++) {
+ printf(" timer%d_us=%f", i, timer->time[i]);
+ }
+ printf("\n");
}
#endif
-