diff options
Diffstat (limited to 'VA')
| -rw-r--r-- | VA/Makefile | 6 | ||||
| -rw-r--r-- | VA/baselines/cpu/Makefile | 2 | ||||
| -rw-r--r-- | VA/baselines/cpu/app_baseline.c | 10 | ||||
| -rwxr-xr-x | VA/benchmark-scripts/ccmcc25-sim.sh | 25 | ||||
| -rwxr-xr-x | VA/benchmark-scripts/ccmcc25.sh | 27 |
5 files changed, 52 insertions, 18 deletions
diff --git a/VA/Makefile b/VA/Makefile index e0d392f..a67c600 100644 --- a/VA/Makefile +++ b/VA/Makefile @@ -35,7 +35,7 @@ endif QUIET = @ -ifdef verbose +ifeq (${verbose}, 1) QUIET = endif @@ -45,12 +45,12 @@ bin: ${QUIET}mkdir -p bin # cp/rm are needed to work around AspectC++ not liking symlinks -bin/host_code: ${HOST_SOURCES} ${COMMON_INCLUDES} bin +bin/host_code: ${HOST_SOURCES} include bin ${QUIET}cp ../include/dfatool_host_dpu.ah include ${QUIET}${HOST_CC} -o $@ ${HOST_SOURCES} ${HOST_FLAGS} ${QUIET}rm -f include/dfatool_host_dpu.ah -bin/dpu_code: ${DPU_SOURCES} ${COMMON_INCLUDES} bin +bin/dpu_code: ${DPU_SOURCES} include bin ${QUIET}dpu-upmem-dpurte-clang ${DPU_FLAGS} -o $@ ${DPU_SOURCES} clean: diff --git a/VA/baselines/cpu/Makefile b/VA/baselines/cpu/Makefile index 04aacb6..279b0f3 100644 --- a/VA/baselines/cpu/Makefile +++ b/VA/baselines/cpu/Makefile @@ -5,7 +5,7 @@ nop_sync ?= 0 numa ?= 0 numa_memcpy ?= 0 -CFLAGS = +CFLAGS = -DDFATOOL_TIMING=1 LDFLAGS = ifeq (${debug}, 1) diff --git a/VA/baselines/cpu/app_baseline.c b/VA/baselines/cpu/app_baseline.c index 7975200..fe5125d 100644 --- a/VA/baselines/cpu/app_baseline.c +++ b/VA/baselines/cpu/app_baseline.c @@ -15,7 +15,7 @@ #include <omp.h> #if WITH_BENCHMARK -#include "../../support/timer.h" +#include "../../include/timer.h" #else #define start(...) #define stop(...) @@ -109,7 +109,7 @@ struct Params input_params(int argc, char **argv) p.n_warmup = 1; p.n_reps = 3; p.exp = 1; - p.n_threads = 5; + p.n_threads = 8; #if NUMA p.bitmask_in = NULL; p.bitmask_out = NULL; @@ -213,9 +213,11 @@ int main(int argc, char **argv) C = (T *) malloc(input_size * sizeof(T)); #endif + omp_set_num_threads(p.n_threads); + #pragma omp parallel for for (unsigned long i = 0; i < input_size; i++) { - A[i] = (T) (rand()); - B[i] = (T) (rand()); + A[i] = (T) i % (1<<31) + 5; + B[i] = (T) i % (1<<31) + 6; } #if NUMA diff --git a/VA/benchmark-scripts/ccmcc25-sim.sh b/VA/benchmark-scripts/ccmcc25-sim.sh new file mode 100755 index 0000000..386cf90 --- /dev/null +++ b/VA/benchmark-scripts/ccmcc25-sim.sh @@ -0,0 +1,25 @@ +#!/bin/bash + +mkdir -p log/$(hostname) + +run_benchmark_nmc() { + local "$@" + set -e + make -B NR_DPUS=${nr_dpus} NR_TASKLETS=${nr_tasklets} BL=10 \ + aspectc=1 aspectc_timing=1 dfatool_timing=0 + bin/host_code -w 0 -e 5 -i ${input_size} +} + +export -f run_benchmark_nmc + +fn=log/$(hostname)/ccmcc25-sim + +source ~/lib/local/upmem/upmem-2025.1.0-Linux-x86_64/upmem_env.sh simulator + +echo "prim-benchmarks VA $(git describe --all --long) $(git rev-parse HEAD) $(date -R)" >> ${fn}.txt + +parallel -j1 --eta --joblog ${fn}.joblog --resume --header : \ + run_benchmark_nmc nr_dpus={nr_dpus} nr_tasklets=16 input_size={input_size} \ + ::: nr_dpus 1 2 4 8 16 32 48 64 \ + ::: input_size 327680 655360 1310720 2621440 \ +>> ${fn}.txt diff --git a/VA/benchmark-scripts/ccmcc25.sh b/VA/benchmark-scripts/ccmcc25.sh index ac0e066..f6d441d 100755 --- a/VA/benchmark-scripts/ccmcc25.sh +++ b/VA/benchmark-scripts/ccmcc25.sh @@ -1,24 +1,31 @@ #!/bin/bash mkdir -p log/$(hostname) -fn=log/$(hostname)/ccmcc25 - -source /opt/upmem/upmem-2025.1.0-Linux-x86_64/upmem_env.sh run_benchmark_nmc() { local "$@" set -e sudo limit_ranks_to_numa_node ${numa_rank} make -B NR_DPUS=${nr_dpus} NR_TASKLETS=${nr_tasklets} BL=10 \ - dfatool_timing=0 aspectc=1 aspectc_timing=1 + aspectc=1 aspectc_timing=1 dfatool_timing=0 bin/host_code -w 0 -e 50 -i ${input_size} } export -f run_benchmark_nmc -parallel -j1 --eta --joblog ${fn}.joblog --resume --header : \ - run_benchmark_nmc nr_dpus={nr_dpus} nr_tasklets=16 input_size={input_size} numa_rank={numa_rank} \ - ::: numa_rank any \ - ::: nr_dpus 64 128 256 512 768 1024 1536 2048 2304 \ - ::: input_size 83886080 167772160 335544320 671088640 \ ->> ${fn}.txt +for sdk in 2023.2.0 2024.1.0 2024.2.0 2025.1.0; do + + fn=log/$(hostname)/ccmcc25-sdk${sdk} + + source /opt/upmem/upmem-${sdk}-Linux-x86_64/upmem_env.sh + + echo "prim-benchmarks VA $(git describe --all --long) $(git rev-parse HEAD) $(date -R)" >> ${fn}.txt + + parallel -j1 --eta --joblog ${fn}.joblog --resume --header : \ + run_benchmark_nmc nr_dpus={nr_dpus} nr_tasklets=16 input_size={input_size} numa_rank={numa_rank} \ + ::: numa_rank any \ + ::: nr_dpus 64 128 256 512 768 1024 1536 2048 2304 \ + ::: input_size 83886080 167772160 335544320 671088640 \ + >> ${fn}.txt + +done |
