diff options
Diffstat (limited to 'VA/baselines/cpu')
-rw-r--r-- | VA/baselines/cpu/Makefile | 26 | ||||
-rw-r--r-- | VA/baselines/cpu/app_baseline.c | 22 | ||||
-rwxr-xr-x | VA/baselines/cpu/run-perf.sh | 2 | ||||
-rwxr-xr-x | VA/baselines/cpu/run-ws.sh | 6 |
4 files changed, 49 insertions, 7 deletions
diff --git a/VA/baselines/cpu/Makefile b/VA/baselines/cpu/Makefile index 76a82e1..04aacb6 100644 --- a/VA/baselines/cpu/Makefile +++ b/VA/baselines/cpu/Makefile @@ -1,9 +1,23 @@ -NUMA ?= 0 -NUMA_MEMCPY ?= 0 -FLAGS = +benchmark ?= 1 +debug ?= 0 +native ?= 1 +nop_sync ?= 0 +numa ?= 0 +numa_memcpy ?= 0 + +CFLAGS = +LDFLAGS = + +ifeq (${debug}, 1) + CFLAGS += -g +endif + +ifeq (${native}, 1) + CFLAGS += -march=native +endif -ifeq (${NUMA}, 1) - FLAGS += -lnuma +ifeq (${numa}, 1) + LDFLAGS += -lnuma endif .PHONY: all @@ -12,7 +26,7 @@ all: va TYPE ?= int32_t va: app_baseline.c - gcc -Wall -Wextra -pedantic -march=native -O2 -o va -fopenmp -DNUMA=${NUMA} -DNUMA_MEMCPY=${NUMA_MEMCPY} -DT=${TYPE} app_baseline.c ${FLAGS} + gcc -Wall -Wextra -pedantic ${CFLAGS} -O3 -o va -fopenmp -DNUMA=${numa} -DNUMA_MEMCPY=${numa_memcpy} -DNOP_SYNC=${nop_sync} -DWITH_BENCHMARK=${benchmark} -DT=${TYPE} app_baseline.c ${LDFLAGS} va_O0: app_baseline.c gcc -o va_O0 -fopenmp app_baseline.c diff --git a/VA/baselines/cpu/app_baseline.c b/VA/baselines/cpu/app_baseline.c index 4c8610a..99865e9 100644 --- a/VA/baselines/cpu/app_baseline.c +++ b/VA/baselines/cpu/app_baseline.c @@ -13,7 +13,13 @@ #include <stdint.h> #include <omp.h> + +#if WITH_BENCHMARK #include "../../support/timer.h" +#else +#define start(...) +#define stop(...) +#endif #if NUMA #include <numaif.h> @@ -238,7 +244,15 @@ int main(int argc, char **argv) { numa_node_in_is_local = ((numa_node_cpu == numa_node_in) || (numa_node_cpu + 8 == numa_node_in)) * 1; #endif +#if WITH_BENCHMARK Timer timer; +#endif + +#if NOP_SYNC + for(int rep = 0; rep < 200000; rep++) { + asm volatile("nop" ::); + } +#endif for(int rep = 0; rep < p.n_warmup + p.n_reps; rep++) { @@ -298,6 +312,7 @@ int main(int argc, char **argv) { stop(&timer, 3); #endif +#if WITH_BENCHMARK unsigned int nr_threads = 0; #pragma omp parallel #pragma omp atomic @@ -333,8 +348,15 @@ int main(int argc, char **argv) { timer.time[0]); #endif // NUMA_MEMCPY } +#endif // WITH_BENCHMARK } +#if NOP_SYNC + for(int rep = 0; rep < 200000; rep++) { + asm volatile("nop" ::); + } +#endif + #if NUMA numa_free(A, input_size * sizeof(T)); numa_free(B, input_size * sizeof(T)); diff --git a/VA/baselines/cpu/run-perf.sh b/VA/baselines/cpu/run-perf.sh index 33cb7b5..8075256 100755 --- a/VA/baselines/cpu/run-perf.sh +++ b/VA/baselines/cpu/run-perf.sh @@ -1,6 +1,6 @@ #!/bin/zsh -make -B NUMA=1 +make -B numa=1 perf stat record -o t1.perf -e ${(j:,:):-$(grep -v '^#' ../../../perf-events.txt | cut -d ' ' -f 1)} ./va -a 4 -b 4 -c 4 -t 1 -e 20 -w 0 -i 167772160 perf stat record -o t4.perf -e ${(j:,:):-$(grep -v '^#' ../../../perf-events.txt | cut -d ' ' -f 1)} ./va -a 4 -b 4 -c 4 -t 4 -e 20 -w 0 -i 167772160 diff --git a/VA/baselines/cpu/run-ws.sh b/VA/baselines/cpu/run-ws.sh new file mode 100755 index 0000000..ccc4993 --- /dev/null +++ b/VA/baselines/cpu/run-ws.sh @@ -0,0 +1,6 @@ +#!/bin/zsh + +make -B benchmark=0 debug=1 native=0 nop_sync=1 numa=1 + +~/var/source/valgrind/vg-in-place --tool=ws --ws-file=t1.ws --ws-peak-detect=yes --ws-every=50000 --ws-track-locality=yes ./va -a 4 -b 4 -c 4 -t 1 -e 20 -w 0 -i 16777216 +~/var/source/valgrind/vg-in-place --tool=ws --ws-file=t4.ws --ws-peak-detect=yes --ws-every=50000 --ws-track-locality=yes ./va -a 4 -b 4 -c 4 -t 4 -e 20 -w 0 -i 16777216 |