diff options
-rw-r--r-- | BS/baselines/cpu/Makefile | 19 | ||||
-rwxr-xr-x | BS/baselines/cpu/run-perf.sh | 2 | ||||
-rwxr-xr-x | BS/dimes-hetsim-hbm.sh | 2 | ||||
-rwxr-xr-x | BS/dimes-hetsim-nmc.sh | 8 |
4 files changed, 21 insertions, 10 deletions
diff --git a/BS/baselines/cpu/Makefile b/BS/baselines/cpu/Makefile index b67602f..735fe84 100644 --- a/BS/baselines/cpu/Makefile +++ b/BS/baselines/cpu/Makefile @@ -1,16 +1,23 @@ -NUMA ?= 0 -NUMA_MEMCPY ?= 0 -FLAGS = +native ?= 1 +numa ?= 0 +numa_memcpy ?= 0 -ifeq (${NUMA}, 1) - FLAGS += -lnuma +CFLAGS = +LDFLAGS = + +ifeq (${native}, 1) + CFLAGS += -march=native +endif + +ifeq (${numa}, 1) + LDFLAGS += -lnuma endif .PHONY: all all: bs_omp bs_omp: bs_omp.c - gcc -Wall -Wextra -pedantic -march=native -O2 -DNUMA=${NUMA} -DNUMA_MEMCPY=${NUMA_MEMCPY} bs_omp.c -o bs_omp -fopenmp ${FLAGS} + gcc -Wall -Wextra -pedantic -O3 ${CFLAGS} -DNUMA=${numa} -DNUMA_MEMCPY=${numa_memcpy} bs_omp.c -o bs_omp -fopenmp ${LDFLAGS} bs_omp_O0: bs_omp.c gcc bs_omp.c -o bs_omp_O0 -fopenmp diff --git a/BS/baselines/cpu/run-perf.sh b/BS/baselines/cpu/run-perf.sh index a35e0fc..5b671e0 100755 --- a/BS/baselines/cpu/run-perf.sh +++ b/BS/baselines/cpu/run-perf.sh @@ -1,6 +1,6 @@ #!/bin/zsh -make -B NUMA=1 +make -B numa=1 OMP_NUM_THREADS=1 perf stat record -o t1.perf -e ${(j:,:):-$(grep -v '^#' ../../../perf-events.txt | cut -d ' ' -f 1)} ./bs_omp $((2**29)) 16777216 4 4 OMP_NUM_THREADS=4 perf stat record -o t4.perf -e ${(j:,:):-$(grep -v '^#' ../../../perf-events.txt | cut -d ' ' -f 1)} ./bs_omp $((2**29)) 16777216 4 4 diff --git a/BS/dimes-hetsim-hbm.sh b/BS/dimes-hetsim-hbm.sh index 4e1500d..4a775ae 100755 --- a/BS/dimes-hetsim-hbm.sh +++ b/BS/dimes-hetsim-hbm.sh @@ -1,7 +1,7 @@ #!/bin/bash cd baselines/cpu -make -B NUMA=1 +make -B numa=1 mkdir -p log/$(hostname) fn=log/$(hostname)/dimes-hetsim-hbm diff --git a/BS/dimes-hetsim-nmc.sh b/BS/dimes-hetsim-nmc.sh index 195334b..fa697bf 100755 --- a/BS/dimes-hetsim-nmc.sh +++ b/BS/dimes-hetsim-nmc.sh @@ -3,6 +3,8 @@ mkdir -p log/$(hostname) baselines/cpu/log/$(hostname) fn=log/$(hostname)/dimes-hetsim-nmc +source /opt/upmem/upmem-2024.1.0-Linux-x86_64/upmem_env.sh + # upstream DPU version uses 2048576 * uint64 ≈ 16 MiB (DPU max: 64 MiB) # upstream DPU version uses 2 queries input_size_upstream=2048576 @@ -11,6 +13,8 @@ num_queries_upstream=2 input_size_dpu=$(perl -E 'say 2 ** 22') num_queries_dpu=1048576 +# Make sure that num_queries > input_size! + run_benchmark_nmc() { local "$@" set -e @@ -69,7 +73,7 @@ cd baselines/cpu ( -make -B NUMA=1 NUMA_MEMCPY=1 +make -B numa=1 numa_memcpy=1 echo "CPU single-node upstream-ref with memcpy, copy node == input node (1/6)" >&2 @@ -97,7 +101,7 @@ parallel -j1 --eta --joblog ${fn}.2.joblog --resume --header : \ :::+ cpu 0 1 \ ::: nr_threads 1 2 4 8 12 16 -make -B NUMA=1 +make -B numa=1 echo "CPU single-node upstream-ref (3/6)" >&2 |