diff options
-rw-r--r-- | VA/baselines/cpu/Makefile | 2 | ||||
-rw-r--r-- | VA/baselines/cpu/app_baseline.c | 2 | ||||
-rwxr-xr-x | VA/dimes-hetsim-hbm.sh | 39 | ||||
-rwxr-xr-x | VA/dimes-hetsim-nmc.sh | 58 | ||||
-rwxr-xr-x | VA/dimes-hetsim.sh | 23 | ||||
-rw-r--r-- | VA/host/app.c | 4 |
6 files changed, 100 insertions, 28 deletions
diff --git a/VA/baselines/cpu/Makefile b/VA/baselines/cpu/Makefile index e7c60c0..117ef19 100644 --- a/VA/baselines/cpu/Makefile +++ b/VA/baselines/cpu/Makefile @@ -11,7 +11,7 @@ all: va TYPE ?= int32_t va: app_baseline.c - gcc -O2 -Wall -Wextra -pedantic -march=native -o va -fopenmp -DNUMA=${NUMA} -DT=${TYPE} app_baseline.c ${FLAGS} + gcc -Wall -Wextra -pedantic -march=native -O2 -o va -fopenmp -DNUMA=${NUMA} -DT=${TYPE} app_baseline.c ${FLAGS} va_O0: app_baseline.c gcc -o va_O0 -fopenmp app_baseline.c diff --git a/VA/baselines/cpu/app_baseline.c b/VA/baselines/cpu/app_baseline.c index 458cf41..5f9a4f6 100644 --- a/VA/baselines/cpu/app_baseline.c +++ b/VA/baselines/cpu/app_baseline.c @@ -216,7 +216,7 @@ int main(int argc, char **argv) { nr_threads++; if (rep >= p.n_warmup) { - printf("[::] VA CPU | n_threads=%d e_type=%s n_elements=%d" + printf("[::] VA-CPU | n_threads=%d e_type=%s n_elements=%d" #if NUMA " numa_node_in=%d numa_node_out=%d numa_node_cpu=%d numa_distance_in_cpu=%d numa_distance_cpu_out=%d" #endif diff --git a/VA/dimes-hetsim-hbm.sh b/VA/dimes-hetsim-hbm.sh new file mode 100755 index 0000000..b4809dc --- /dev/null +++ b/VA/dimes-hetsim-hbm.sh @@ -0,0 +1,39 @@ +#!/bin/bash + +cd baselines/cpu +make -B NUMA=1 + +mkdir -p log/$(hostname) +fn=log/$(hostname)/$(date +%Y%m%d)-baseline + +# upstream uses 167772160 * int32 == 1.25 GiB for DPU version + +run_benchmark() { + eval "$@" + ./va -i ${input_size} -a ${ram} -b ${ram} -c ${cpu} -t ${nr_threads} -w 0 -e 40 + return $? +} + +export -f run_benchmark + +( + +parallel -j1 --eta --joblog ${fn}.1.joblog --resume --header : \ + run_benchmark nr_threads={nr_threads} ram={ram} cpu={cpu} \ + input_size={input_size} \ + ::: nr_threads 1 2 4 8 12 16 \ + ::: cpu $(seq 0 7) \ + ::: ram $(seq 0 15) \ + ::: input_size 167772160 + +parallel -j1 --eta --joblog ${fn}.2.joblog --resume --header : \ + run_benchmark nr_threads={nr_threads} ram={ram} cpu={cpu} \ + input_size={input_size} \ + ::: nr_threads 32 48 64 96 128 \ + ::: cpu -1 \ + ::: ram $(seq 0 15) \ + ::: input_size 167772160 + +) | tee ${fn}.txt + +xz -f -v -9 -M 800M ${fn}.txt diff --git a/VA/dimes-hetsim-nmc.sh b/VA/dimes-hetsim-nmc.sh new file mode 100755 index 0000000..455f369 --- /dev/null +++ b/VA/dimes-hetsim-nmc.sh @@ -0,0 +1,58 @@ +#!/bin/bash + +mkdir -p log/$(hostname) baselines/cpu/log/$(hostname) +fn=log/$(hostname)/$(date +%Y%m%d) + +# upstream uses 167772160 * int32 == 1.25 GiB for DPU version + +run_benchmark_nmc() { + eval "$@" + if make -B NR_DPUS=${nr_dpus} NR_TASKLETS=${nr_tasklets} BL=10 WITH_ALLOC_OVERHEAD=1 WITH_LOAD_OVERHEAD=1 WITH_FREE_OVERHEAD=1; then + bin/host_code -w 0 -e 100 -i ${input_size} -x 1 + fi + return $? +} + +export -f run_benchmark_nmc + +run_benchmark_baseline() { + eval "$@" + ./va -i ${input_size} -a ${ram} -b ${ram} -c ${cpu} -t $nr_threads -w 0 -e 40 + return $? +} + +export -f run_benchmark_baseline + +( + +parallel -j1 --eta --joblog ${fn}.joblog --resume --header : \ + run_benchmark_nmc nr_dpus={nr_dpus} nr_tasklets=16 input_size={input_size} \ + ::: nr_dpus 64 128 256 512 768 1024 1536 2048 2304 \ + ::: input_size 167772160 + +) | tee ${fn}.txt + +xz -f -v -9 -M 800M ${fn}.txt + +cd baselines/cpu +make -B NUMA=1 + +( + +parallel -j1 --eta --joblog ${fn}.1.joblog --resume --header : \ + run_benchmark_baseline nr_threads={nr_threads} input_size={input_size} ram={ram} cpu={cpu} \ + ::: ram 0 1 \ + ::: cpu 0 1 \ + ::: nr_threads 1 2 4 8 12 16 32 \ + ::: input_size 167772160 + +parallel -j1 --eta --joblog ${fn}.2.joblog --resume --header : \ + run_benchmark_baseline nr_threads={nr_threads} input_size={input_size} ram={ram} cpu={cpu} \ + ::: ram any \ + ::: cpu -1 \ + ::: nr_threads 48 64 \ + ::: input_size 167772160 + +) | tee ${fn}.txt + +xz -f -v -9 -M 800M ${fn}.txt diff --git a/VA/dimes-hetsim.sh b/VA/dimes-hetsim.sh deleted file mode 100755 index e4c8ee2..0000000 --- a/VA/dimes-hetsim.sh +++ /dev/null @@ -1,23 +0,0 @@ -#!/bin/sh - -cd baselines/cpu -make -B NUMA=1 - -# upstream uses 16777216 * int32 == 64 MiB by default -# 2^29 elements * int32 == 2 GiB - -for nr_threads in 1 2 4 8 12 16; do - for cpu in 0 1 2 3 4 5 6 7; do - for ram in 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15; do - ./va -a $ram -b $ram -c $cpu -t $nr_threads -w 0 -e 50 - ./va -i $(perl -E 'say 2 ** 29') -a $ram -b $ram -c $cpu -t $nr_threads -w 0 -e 10 - done - done -done - -for nr_threads in 32 48 64 96 128; do - for ram in 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15; do - ./va -a $ram -b $ram -c -1 -t $nr_threads -w 0 -e 50 - ./va -i $(perl -E 'say 2 ** 29') -a $ram -b $ram -c -1 -t $nr_threads -w 0 -e 50 - done -done diff --git a/VA/host/app.c b/VA/host/app.c index b339bf3..d363514 100644 --- a/VA/host/app.c +++ b/VA/host/app.c @@ -255,7 +255,7 @@ int main(int argc, char **argv) { if (status) { printf("[" ANSI_COLOR_GREEN "OK" ANSI_COLOR_RESET "] Outputs are equal\n"); if (rep >= p.n_warmup) { - printf("[::] VA UPMEM | n_dpus=%d n_ranks=%d n_tasklets=%d e_type=%s block_size_B=%d n_elements=%d n_elements_per_dpu=%d", + printf("[::] VA-UPMEM | n_dpus=%d n_ranks=%d n_tasklets=%d e_type=%s block_size_B=%d n_elements=%d n_elements_per_dpu=%d", nr_of_dpus, nr_of_ranks, NR_TASKLETS, XSTR(T), BLOCK_SIZE, input_size, input_size / NR_DPUS); printf(" b_with_alloc_overhead=%d b_with_load_overhead=%d b_with_free_overhead=%d ", WITH_ALLOC_OVERHEAD, WITH_LOAD_OVERHEAD, WITH_FREE_OVERHEAD); @@ -288,8 +288,6 @@ int main(int argc, char **argv) { printf("[" ANSI_COLOR_RED "ERROR" ANSI_COLOR_RESET "] Outputs differ!\n"); } } - printf("throughput_*_MOpps == n_elements / (+ latency_*_us ...)\n"); - printf("throughput_*_MBps == 3 * sizeof(e_type) * throughput_*_MOpps \n"); #if ENERGY double energy; |