diff options
-rw-r--r-- | BFS/baselines/cpu/Makefile | 26 | ||||
-rw-r--r-- | BFS/baselines/cpu/app.c | 41 |
2 files changed, 61 insertions, 6 deletions
diff --git a/BFS/baselines/cpu/Makefile b/BFS/baselines/cpu/Makefile index 1f6ed3c..1efe457 100644 --- a/BFS/baselines/cpu/Makefile +++ b/BFS/baselines/cpu/Makefile @@ -1,8 +1,26 @@ -.PHONY: all -all: bfs +benchmark ?= 1 +debug ?= 0 +native ?= 1 +nop_sync ?= 0 +numa ?= 0 + +LDFLAGS = +CFLAGS = + +ifeq (${debug}, 1) + CFLAGS += -g +endif + +ifeq (${native}, 1) + CFLAGS += -march=native +endif + +ifeq (${numa}, 1) + LDFLAGS += -lnuma +endif bfs: app.c - gcc -Wall -Wextra -pedantic -march=native -O2 -o bfs -fopenmp app.c + gcc -Wall -Wextra -pedantic -O3 ${CFLAGS} -DNUMA=${numa} -DNUMA_MEMCPY=${numa_memcpy} -DNOP_SYNC=${nop_sync} -DWITH_BENCHMARK=${benchmark} -o bfs -fopenmp app.c ${LDFLAGS} bfs_O0: app.c gcc -o bfs_O0 -fopenmp app.c @@ -27,3 +45,5 @@ run_O2: bfs_O2 .PHONY: clean clean: rm -f bfs bfs_O0 bfs_O2 + +.PHONY: all diff --git a/BFS/baselines/cpu/app.c b/BFS/baselines/cpu/app.c index caf4cbc..390b1f9 100644 --- a/BFS/baselines/cpu/app.c +++ b/BFS/baselines/cpu/app.c @@ -8,12 +8,30 @@ #include <omp.h> +#if NUMA +#include <numaif.h> +#include <numa.h> + +void* mp_pages[1]; +int mp_status[1]; +int mp_nodes[1]; +struct bitmask* bitmask_in; +int numa_node_in = -1; +int numa_node_cpu = -1; +#endif + #include "../../support/common.h" #include "../../support/graph.h" #include "../../support/params.h" -#include "../../support/timer.h" #include "../../support/utils.h" +#if WITH_BENCHMARK +#include "../../support/timer.h" +#else +#define startTimer(...) +#define stopTimer(...) +#endif + int main(int argc, char** argv) { // Process parameters @@ -24,8 +42,9 @@ int main(int argc, char** argv) { struct COOGraph cooGraph = readCOOGraph(p.fileName); PRINT_INFO(p.verbosity >= 1, " Graph has %d nodes and %d edges", cooGraph.numNodes, cooGraph.numEdges); - +#if WITH_BENCHMARK Timer timer; +#endif for(int rep = 0; rep < 100; rep++) { struct CSRGraph csrGraph = coo2csr(cooGraph); @@ -43,6 +62,12 @@ int main(int argc, char** argv) { uint32_t* prevFrontier = buffer1; uint32_t* currFrontier = buffer2; +#if NOP_SYNC + for(int rep = 0; rep < 200000; rep++) { + asm volatile("nop" ::); + } +#endif + // Calculating result on CPU startTimer(&timer, 0, 0); nodeLevel[srcNode] = 0; @@ -86,6 +111,12 @@ int main(int argc, char** argv) { } stopTimer(&timer, 0); +#if NOP_SYNC + for(int rep = 0; rep < 200000; rep++) { + asm volatile("nop" ::); + } +#endif + freeCSRGraph(csrGraph); free(buffer1); free(buffer2); @@ -135,6 +166,7 @@ int main(int argc, char** argv) { } stopTimer(&timer, 1); +#if WITH_BENCHMARK unsigned int nr_threads = 0; #pragma omp parallel #pragma omp atomic @@ -158,8 +190,11 @@ int main(int argc, char** argv) { printf(" throughput_seq_MOpps=%f throughput_MOpps=%f", csrGraph.numNodes / timer.time[1], csrGraph.numNodes / timer.time[0]); - printAll(&timer, 1); + printf(" latency_us=%f latency_seq_us=%f\n", + timer.time[0], + timer.time[1]); } +#endif // WITH_BENCHMARK freeCSRGraph(csrGraph); free(nodeLevel); |