diff options
-rw-r--r-- | TRNS/baselines/cpu/Makefile | 19 | ||||
-rwxr-xr-x | TRNS/baselines/cpu/run-perf.sh | 2 | ||||
-rwxr-xr-x | TRNS/dimes-hetsim-hbm.sh | 7 | ||||
-rwxr-xr-x | TRNS/dimes-hetsim-nmc.sh | 4 |
4 files changed, 19 insertions, 13 deletions
diff --git a/TRNS/baselines/cpu/Makefile b/TRNS/baselines/cpu/Makefile index 236f7bb..438b9fb 100644 --- a/TRNS/baselines/cpu/Makefile +++ b/TRNS/baselines/cpu/Makefile @@ -32,16 +32,23 @@ # THE SOFTWARE. # -NUMA ?= 0 -NUMA_MEMCPY ?= 0 -FLAGS = +native ?= 1 +numa ?= 0 +numa_memcpy ?= 0 -ifeq (${NUMA}, 1) +CFLAGS = +LDFLAGS = + +ifeq (${native}, 1) + CFLAGS += -march=native +endif + +ifeq (${numa}, 1) FLAGS += -lnuma endif CXX=g++ -CXX_FLAGS=-std=c++11 -Wall -Wextra -pedantic -DNUMA=${NUMA} -DNUMA_MEMCPY=${NUMA_MEMCPY} +CXX_FLAGS=-std=c++11 -Wall -Wextra -pedantic -DNUMA=${numa} -DNUMA_MEMCPY=${numa_memcpy} LIB=-L/usr/lib/ -lm -pthread @@ -52,7 +59,7 @@ EXE=trns all: trns trns: ${SRC} - $(CXX) -O2 $(CXX_FLAGS) $(SRC) $(LIB) -o $(EXE) $(FLAGS) + $(CXX) -O3 $(CXX_FLAGS) ${CFLAGS} $(SRC) $(LIB) -o $(EXE) ${LDFLAGS} trns_O0: ${SRC} $(CXX) $(CXX_FLAGS) $(SRC) $(LIB) -o $(EXE)_O0 diff --git a/TRNS/baselines/cpu/run-perf.sh b/TRNS/baselines/cpu/run-perf.sh index 08bff53..f16a3b1 100755 --- a/TRNS/baselines/cpu/run-perf.sh +++ b/TRNS/baselines/cpu/run-perf.sh @@ -1,6 +1,6 @@ #!/bin/zsh -make -B NUMA=1 +make -B numa=1 perf stat record -o t1.perf -e ${(j:,:):-$(grep -v '^#' ../../../perf-events.txt | cut -d ' ' -f 1)} ./trns -w 0 -r 20 -p 2048 -o 2048 -m 16 -n 8 -t 1 -a 4 -c 4 perf stat record -o t4.perf -e ${(j:,:):-$(grep -v '^#' ../../../perf-events.txt | cut -d ' ' -f 1)} ./trns -w 0 -r 20 -p 2048 -o 2048 -m 16 -n 8 -t 4 -a 4 -c 4 diff --git a/TRNS/dimes-hetsim-hbm.sh b/TRNS/dimes-hetsim-hbm.sh index e2efaee..cc5dc68 100755 --- a/TRNS/dimes-hetsim-hbm.sh +++ b/TRNS/dimes-hetsim-hbm.sh @@ -32,7 +32,7 @@ fn=log/$(hostname)/dimes-hetsim-hbm ( -make -B NUMA=1 NUMA_MEMCPY=1 +make -B numa=1 numa_memcpy=1 echo "CPU single-node operation with setup cost, memcpy node == input node, cpu node == output node (1/3)" >&2 @@ -43,10 +43,9 @@ parallel -j1 --eta --joblog ${fn}.1.joblog --resume --header : \ ::: ram_in $(seq 0 15) \ :::+ cpu_memcpy $(seq 0 7) $(seq 0 7) \ ::: ram_local $(seq 0 15) \ - :::+ cpu $(seq 0 7) $(seq 0 7) \ - ::: input_size 167772160 + :::+ cpu $(seq 0 7) $(seq 0 7) -make -B NUMA=1 +make -B numa=1 echo "CPU single-node operation (2/3)" >&2 diff --git a/TRNS/dimes-hetsim-nmc.sh b/TRNS/dimes-hetsim-nmc.sh index b5f6f13..80987e7 100755 --- a/TRNS/dimes-hetsim-nmc.sh +++ b/TRNS/dimes-hetsim-nmc.sh @@ -73,7 +73,7 @@ parallel -j1 --eta --joblog ${fn}.4.joblog --resume --header : \ ) >> ${fn}.txt cd baselines/cpu -make -B NUMA=1 +make -B numa=1 ( @@ -97,7 +97,7 @@ parallel -j1 --eta --joblog ${fn}.2.joblog --resume --header : \ ) >> ${fn}.txt -make -B NUMA=1 NUMA_MEMCPY=1 +make -B numa=1 numa_memcpy=1 ( |