summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--TRNS/baselines/cpu/Makefile19
-rwxr-xr-xTRNS/baselines/cpu/run-perf.sh2
-rwxr-xr-xTRNS/dimes-hetsim-hbm.sh7
-rwxr-xr-xTRNS/dimes-hetsim-nmc.sh4
4 files changed, 19 insertions, 13 deletions
diff --git a/TRNS/baselines/cpu/Makefile b/TRNS/baselines/cpu/Makefile
index 236f7bb..438b9fb 100644
--- a/TRNS/baselines/cpu/Makefile
+++ b/TRNS/baselines/cpu/Makefile
@@ -32,16 +32,23 @@
# THE SOFTWARE.
#
-NUMA ?= 0
-NUMA_MEMCPY ?= 0
-FLAGS =
+native ?= 1
+numa ?= 0
+numa_memcpy ?= 0
-ifeq (${NUMA}, 1)
+CFLAGS =
+LDFLAGS =
+
+ifeq (${native}, 1)
+ CFLAGS += -march=native
+endif
+
+ifeq (${numa}, 1)
FLAGS += -lnuma
endif
CXX=g++
-CXX_FLAGS=-std=c++11 -Wall -Wextra -pedantic -DNUMA=${NUMA} -DNUMA_MEMCPY=${NUMA_MEMCPY}
+CXX_FLAGS=-std=c++11 -Wall -Wextra -pedantic -DNUMA=${numa} -DNUMA_MEMCPY=${numa_memcpy}
LIB=-L/usr/lib/ -lm -pthread
@@ -52,7 +59,7 @@ EXE=trns
all: trns
trns: ${SRC}
- $(CXX) -O2 $(CXX_FLAGS) $(SRC) $(LIB) -o $(EXE) $(FLAGS)
+ $(CXX) -O3 $(CXX_FLAGS) ${CFLAGS} $(SRC) $(LIB) -o $(EXE) ${LDFLAGS}
trns_O0: ${SRC}
$(CXX) $(CXX_FLAGS) $(SRC) $(LIB) -o $(EXE)_O0
diff --git a/TRNS/baselines/cpu/run-perf.sh b/TRNS/baselines/cpu/run-perf.sh
index 08bff53..f16a3b1 100755
--- a/TRNS/baselines/cpu/run-perf.sh
+++ b/TRNS/baselines/cpu/run-perf.sh
@@ -1,6 +1,6 @@
#!/bin/zsh
-make -B NUMA=1
+make -B numa=1
perf stat record -o t1.perf -e ${(j:,:):-$(grep -v '^#' ../../../perf-events.txt | cut -d ' ' -f 1)} ./trns -w 0 -r 20 -p 2048 -o 2048 -m 16 -n 8 -t 1 -a 4 -c 4
perf stat record -o t4.perf -e ${(j:,:):-$(grep -v '^#' ../../../perf-events.txt | cut -d ' ' -f 1)} ./trns -w 0 -r 20 -p 2048 -o 2048 -m 16 -n 8 -t 4 -a 4 -c 4
diff --git a/TRNS/dimes-hetsim-hbm.sh b/TRNS/dimes-hetsim-hbm.sh
index e2efaee..cc5dc68 100755
--- a/TRNS/dimes-hetsim-hbm.sh
+++ b/TRNS/dimes-hetsim-hbm.sh
@@ -32,7 +32,7 @@ fn=log/$(hostname)/dimes-hetsim-hbm
(
-make -B NUMA=1 NUMA_MEMCPY=1
+make -B numa=1 numa_memcpy=1
echo "CPU single-node operation with setup cost, memcpy node == input node, cpu node == output node (1/3)" >&2
@@ -43,10 +43,9 @@ parallel -j1 --eta --joblog ${fn}.1.joblog --resume --header : \
::: ram_in $(seq 0 15) \
:::+ cpu_memcpy $(seq 0 7) $(seq 0 7) \
::: ram_local $(seq 0 15) \
- :::+ cpu $(seq 0 7) $(seq 0 7) \
- ::: input_size 167772160
+ :::+ cpu $(seq 0 7) $(seq 0 7)
-make -B NUMA=1
+make -B numa=1
echo "CPU single-node operation (2/3)" >&2
diff --git a/TRNS/dimes-hetsim-nmc.sh b/TRNS/dimes-hetsim-nmc.sh
index b5f6f13..80987e7 100755
--- a/TRNS/dimes-hetsim-nmc.sh
+++ b/TRNS/dimes-hetsim-nmc.sh
@@ -73,7 +73,7 @@ parallel -j1 --eta --joblog ${fn}.4.joblog --resume --header : \
) >> ${fn}.txt
cd baselines/cpu
-make -B NUMA=1
+make -B numa=1
(
@@ -97,7 +97,7 @@ parallel -j1 --eta --joblog ${fn}.2.joblog --resume --header : \
) >> ${fn}.txt
-make -B NUMA=1 NUMA_MEMCPY=1
+make -B numa=1 numa_memcpy=1
(