summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--BFS/baselines/cpu/Makefile26
-rw-r--r--BFS/baselines/cpu/app.c41
2 files changed, 61 insertions, 6 deletions
diff --git a/BFS/baselines/cpu/Makefile b/BFS/baselines/cpu/Makefile
index 1f6ed3c..1efe457 100644
--- a/BFS/baselines/cpu/Makefile
+++ b/BFS/baselines/cpu/Makefile
@@ -1,8 +1,26 @@
-.PHONY: all
-all: bfs
+benchmark ?= 1
+debug ?= 0
+native ?= 1
+nop_sync ?= 0
+numa ?= 0
+
+LDFLAGS =
+CFLAGS =
+
+ifeq (${debug}, 1)
+ CFLAGS += -g
+endif
+
+ifeq (${native}, 1)
+ CFLAGS += -march=native
+endif
+
+ifeq (${numa}, 1)
+ LDFLAGS += -lnuma
+endif
bfs: app.c
- gcc -Wall -Wextra -pedantic -march=native -O2 -o bfs -fopenmp app.c
+ gcc -Wall -Wextra -pedantic -O3 ${CFLAGS} -DNUMA=${numa} -DNUMA_MEMCPY=${numa_memcpy} -DNOP_SYNC=${nop_sync} -DWITH_BENCHMARK=${benchmark} -o bfs -fopenmp app.c ${LDFLAGS}
bfs_O0: app.c
gcc -o bfs_O0 -fopenmp app.c
@@ -27,3 +45,5 @@ run_O2: bfs_O2
.PHONY: clean
clean:
rm -f bfs bfs_O0 bfs_O2
+
+.PHONY: all
diff --git a/BFS/baselines/cpu/app.c b/BFS/baselines/cpu/app.c
index caf4cbc..390b1f9 100644
--- a/BFS/baselines/cpu/app.c
+++ b/BFS/baselines/cpu/app.c
@@ -8,12 +8,30 @@
#include <omp.h>
+#if NUMA
+#include <numaif.h>
+#include <numa.h>
+
+void* mp_pages[1];
+int mp_status[1];
+int mp_nodes[1];
+struct bitmask* bitmask_in;
+int numa_node_in = -1;
+int numa_node_cpu = -1;
+#endif
+
#include "../../support/common.h"
#include "../../support/graph.h"
#include "../../support/params.h"
-#include "../../support/timer.h"
#include "../../support/utils.h"
+#if WITH_BENCHMARK
+#include "../../support/timer.h"
+#else
+#define startTimer(...)
+#define stopTimer(...)
+#endif
+
int main(int argc, char** argv) {
// Process parameters
@@ -24,8 +42,9 @@ int main(int argc, char** argv) {
struct COOGraph cooGraph = readCOOGraph(p.fileName);
PRINT_INFO(p.verbosity >= 1, " Graph has %d nodes and %d edges", cooGraph.numNodes, cooGraph.numEdges);
-
+#if WITH_BENCHMARK
Timer timer;
+#endif
for(int rep = 0; rep < 100; rep++) {
struct CSRGraph csrGraph = coo2csr(cooGraph);
@@ -43,6 +62,12 @@ int main(int argc, char** argv) {
uint32_t* prevFrontier = buffer1;
uint32_t* currFrontier = buffer2;
+#if NOP_SYNC
+ for(int rep = 0; rep < 200000; rep++) {
+ asm volatile("nop" ::);
+ }
+#endif
+
// Calculating result on CPU
startTimer(&timer, 0, 0);
nodeLevel[srcNode] = 0;
@@ -86,6 +111,12 @@ int main(int argc, char** argv) {
}
stopTimer(&timer, 0);
+#if NOP_SYNC
+ for(int rep = 0; rep < 200000; rep++) {
+ asm volatile("nop" ::);
+ }
+#endif
+
freeCSRGraph(csrGraph);
free(buffer1);
free(buffer2);
@@ -135,6 +166,7 @@ int main(int argc, char** argv) {
}
stopTimer(&timer, 1);
+#if WITH_BENCHMARK
unsigned int nr_threads = 0;
#pragma omp parallel
#pragma omp atomic
@@ -158,8 +190,11 @@ int main(int argc, char** argv) {
printf(" throughput_seq_MOpps=%f throughput_MOpps=%f",
csrGraph.numNodes / timer.time[1],
csrGraph.numNodes / timer.time[0]);
- printAll(&timer, 1);
+ printf(" latency_us=%f latency_seq_us=%f\n",
+ timer.time[0],
+ timer.time[1]);
}
+#endif // WITH_BENCHMARK
freeCSRGraph(csrGraph);
free(nodeLevel);