diff options
Diffstat (limited to 'TRNS/baselines/cpu')
-rw-r--r-- | TRNS/baselines/cpu/Makefile | 28 | ||||
-rw-r--r-- | TRNS/baselines/cpu/main.cpp | 16 | ||||
-rwxr-xr-x | TRNS/baselines/cpu/run-perf.sh | 6 |
3 files changed, 39 insertions, 11 deletions
diff --git a/TRNS/baselines/cpu/Makefile b/TRNS/baselines/cpu/Makefile index 236f7bb..2f28738 100644 --- a/TRNS/baselines/cpu/Makefile +++ b/TRNS/baselines/cpu/Makefile @@ -32,16 +32,30 @@ # THE SOFTWARE. # -NUMA ?= 0 -NUMA_MEMCPY ?= 0 -FLAGS = +benchmark ?= 1 +debug ?= 0 +native ?= 1 +nop_sync ?= 0 +numa ?= 0 +numa_memcpy ?= 0 -ifeq (${NUMA}, 1) - FLAGS += -lnuma +CFLAGS = +LDFLAGS = + +ifeq (${debug}, 1) + CFLAGS += -g +endif + +ifeq (${native}, 1) + CFLAGS += -march=native +endif + +ifeq (${numa}, 1) + LDFLAGS += -lnuma endif CXX=g++ -CXX_FLAGS=-std=c++11 -Wall -Wextra -pedantic -DNUMA=${NUMA} -DNUMA_MEMCPY=${NUMA_MEMCPY} +CXX_FLAGS=-std=c++11 -Wall -Wextra -pedantic -DNUMA=${numa} -DNUMA_MEMCPY=${numa_memcpy} -DNOP_SYNC=${nop_sync} -DWITH_BENCHMARK=${benchmark} LIB=-L/usr/lib/ -lm -pthread @@ -52,7 +66,7 @@ EXE=trns all: trns trns: ${SRC} - $(CXX) -O2 $(CXX_FLAGS) $(SRC) $(LIB) -o $(EXE) $(FLAGS) + $(CXX) -O3 $(CXX_FLAGS) ${CFLAGS} $(SRC) $(LIB) -o $(EXE) ${LDFLAGS} trns_O0: ${SRC} $(CXX) $(CXX_FLAGS) $(SRC) $(LIB) -o $(EXE)_O0 diff --git a/TRNS/baselines/cpu/main.cpp b/TRNS/baselines/cpu/main.cpp index c8cccaf..b4cd149 100644 --- a/TRNS/baselines/cpu/main.cpp +++ b/TRNS/baselines/cpu/main.cpp @@ -36,9 +36,18 @@ #include "support/setup.h" #include "kernel.h" #include "support/common.h" -#include "support/timer.h" #include "support/verify.h" +#if WITH_BENCHMARK +#include "support/timer.h" +#else +#include <string> +struct Timer { + inline void start(std::string name) {(void)name;} + inline void stop(std::string name) {(void)name;} +}; +#endif + #include <unistd.h> #include <thread> #include <string.h> @@ -362,6 +371,7 @@ int main(int argc, char **argv) { timer.stop("free"); #endif +#if WITH_BENCHMARK if (rep >= p.n_warmup) { #if NUMA_MEMCPY printf("[::] TRNS-CPU-MEMCPY | n_threads=%d e_type=%s n_elements=%d" @@ -396,10 +406,8 @@ int main(int argc, char **argv) { timer.get("Step 1") + timer.get("Step 2") + timer.get("Step 3")); #endif // NUMA_MEMCPY } +#endif // WITH_BENCHMARK } - //timer.print("Step 1", p.n_reps); - //timer.print("Step 2", p.n_reps); - //timer.print("Step 3", p.n_reps); // Verify answer //verify(h_local, h_in_backup, M_ * m, N_ * n, 1); diff --git a/TRNS/baselines/cpu/run-perf.sh b/TRNS/baselines/cpu/run-perf.sh new file mode 100755 index 0000000..f16a3b1 --- /dev/null +++ b/TRNS/baselines/cpu/run-perf.sh @@ -0,0 +1,6 @@ +#!/bin/zsh + +make -B numa=1 + +perf stat record -o t1.perf -e ${(j:,:):-$(grep -v '^#' ../../../perf-events.txt | cut -d ' ' -f 1)} ./trns -w 0 -r 20 -p 2048 -o 2048 -m 16 -n 8 -t 1 -a 4 -c 4 +perf stat record -o t4.perf -e ${(j:,:):-$(grep -v '^#' ../../../perf-events.txt | cut -d ' ' -f 1)} ./trns -w 0 -r 20 -p 2048 -o 2048 -m 16 -n 8 -t 4 -a 4 -c 4 |