summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorBirte Kristina Friesel <birte.friesel@uos.de>2025-01-13 15:32:57 +0100
committerBirte Kristina Friesel <birte.friesel@uos.de>2025-01-13 15:32:57 +0100
commitdce09c3e7bedd426400d64abfb86e21a2c724de4 (patch)
tree5d307f2a6422de4233537c2a3bdce5598bd3a45e
parenta30ffa52be405db50c660012c92e3e333412ed58 (diff)
VA: Add valgrind-ws benchmark script and baseline adjustments
-rw-r--r--VA/baselines/cpu/Makefile26
-rw-r--r--VA/baselines/cpu/app_baseline.c22
-rwxr-xr-xVA/baselines/cpu/run-perf.sh2
-rwxr-xr-xVA/baselines/cpu/run-ws.sh6
4 files changed, 49 insertions, 7 deletions
diff --git a/VA/baselines/cpu/Makefile b/VA/baselines/cpu/Makefile
index 76a82e1..04aacb6 100644
--- a/VA/baselines/cpu/Makefile
+++ b/VA/baselines/cpu/Makefile
@@ -1,9 +1,23 @@
-NUMA ?= 0
-NUMA_MEMCPY ?= 0
-FLAGS =
+benchmark ?= 1
+debug ?= 0
+native ?= 1
+nop_sync ?= 0
+numa ?= 0
+numa_memcpy ?= 0
+
+CFLAGS =
+LDFLAGS =
+
+ifeq (${debug}, 1)
+ CFLAGS += -g
+endif
+
+ifeq (${native}, 1)
+ CFLAGS += -march=native
+endif
-ifeq (${NUMA}, 1)
- FLAGS += -lnuma
+ifeq (${numa}, 1)
+ LDFLAGS += -lnuma
endif
.PHONY: all
@@ -12,7 +26,7 @@ all: va
TYPE ?= int32_t
va: app_baseline.c
- gcc -Wall -Wextra -pedantic -march=native -O2 -o va -fopenmp -DNUMA=${NUMA} -DNUMA_MEMCPY=${NUMA_MEMCPY} -DT=${TYPE} app_baseline.c ${FLAGS}
+ gcc -Wall -Wextra -pedantic ${CFLAGS} -O3 -o va -fopenmp -DNUMA=${numa} -DNUMA_MEMCPY=${numa_memcpy} -DNOP_SYNC=${nop_sync} -DWITH_BENCHMARK=${benchmark} -DT=${TYPE} app_baseline.c ${LDFLAGS}
va_O0: app_baseline.c
gcc -o va_O0 -fopenmp app_baseline.c
diff --git a/VA/baselines/cpu/app_baseline.c b/VA/baselines/cpu/app_baseline.c
index 4c8610a..99865e9 100644
--- a/VA/baselines/cpu/app_baseline.c
+++ b/VA/baselines/cpu/app_baseline.c
@@ -13,7 +13,13 @@
#include <stdint.h>
#include <omp.h>
+
+#if WITH_BENCHMARK
#include "../../support/timer.h"
+#else
+#define start(...)
+#define stop(...)
+#endif
#if NUMA
#include <numaif.h>
@@ -238,7 +244,15 @@ int main(int argc, char **argv) {
numa_node_in_is_local = ((numa_node_cpu == numa_node_in) || (numa_node_cpu + 8 == numa_node_in)) * 1;
#endif
+#if WITH_BENCHMARK
Timer timer;
+#endif
+
+#if NOP_SYNC
+ for(int rep = 0; rep < 200000; rep++) {
+ asm volatile("nop" ::);
+ }
+#endif
for(int rep = 0; rep < p.n_warmup + p.n_reps; rep++) {
@@ -298,6 +312,7 @@ int main(int argc, char **argv) {
stop(&timer, 3);
#endif
+#if WITH_BENCHMARK
unsigned int nr_threads = 0;
#pragma omp parallel
#pragma omp atomic
@@ -333,8 +348,15 @@ int main(int argc, char **argv) {
timer.time[0]);
#endif // NUMA_MEMCPY
}
+#endif // WITH_BENCHMARK
}
+#if NOP_SYNC
+ for(int rep = 0; rep < 200000; rep++) {
+ asm volatile("nop" ::);
+ }
+#endif
+
#if NUMA
numa_free(A, input_size * sizeof(T));
numa_free(B, input_size * sizeof(T));
diff --git a/VA/baselines/cpu/run-perf.sh b/VA/baselines/cpu/run-perf.sh
index 33cb7b5..8075256 100755
--- a/VA/baselines/cpu/run-perf.sh
+++ b/VA/baselines/cpu/run-perf.sh
@@ -1,6 +1,6 @@
#!/bin/zsh
-make -B NUMA=1
+make -B numa=1
perf stat record -o t1.perf -e ${(j:,:):-$(grep -v '^#' ../../../perf-events.txt | cut -d ' ' -f 1)} ./va -a 4 -b 4 -c 4 -t 1 -e 20 -w 0 -i 167772160
perf stat record -o t4.perf -e ${(j:,:):-$(grep -v '^#' ../../../perf-events.txt | cut -d ' ' -f 1)} ./va -a 4 -b 4 -c 4 -t 4 -e 20 -w 0 -i 167772160
diff --git a/VA/baselines/cpu/run-ws.sh b/VA/baselines/cpu/run-ws.sh
new file mode 100755
index 0000000..ccc4993
--- /dev/null
+++ b/VA/baselines/cpu/run-ws.sh
@@ -0,0 +1,6 @@
+#!/bin/zsh
+
+make -B benchmark=0 debug=1 native=0 nop_sync=1 numa=1
+
+~/var/source/valgrind/vg-in-place --tool=ws --ws-file=t1.ws --ws-peak-detect=yes --ws-every=50000 --ws-track-locality=yes ./va -a 4 -b 4 -c 4 -t 1 -e 20 -w 0 -i 16777216
+~/var/source/valgrind/vg-in-place --tool=ws --ws-file=t4.ws --ws-peak-detect=yes --ws-every=50000 --ws-track-locality=yes ./va -a 4 -b 4 -c 4 -t 4 -e 20 -w 0 -i 16777216