diff options
-rwxr-xr-x | BS/benchmark-scripts/milos-hbm-cxl.sh | 44 | ||||
-rwxr-xr-x | BS/run-fgbs24a.sh | 33 | ||||
-rwxr-xr-x | BS/run-paper-strong-full.sh | 26 | ||||
-rwxr-xr-x | BS/run-paper-strong-rank.sh | 25 | ||||
-rwxr-xr-x | BS/run-paper-weak.sh | 28 | ||||
-rwxr-xr-x | BS/run.sh | 28 |
6 files changed, 44 insertions, 140 deletions
diff --git a/BS/benchmark-scripts/milos-hbm-cxl.sh b/BS/benchmark-scripts/milos-hbm-cxl.sh new file mode 100755 index 0000000..79d02c7 --- /dev/null +++ b/BS/benchmark-scripts/milos-hbm-cxl.sh @@ -0,0 +1,44 @@ +#!/bin/bash + +cd baselines/cpu +make -B numa=1 + +mkdir -p log/$(hostname) +fn=log/$(hostname)/milos-hbm-cxl + +# * uint64 == 128 MiB +num_queries_hbm=16777216 + +run_benchmark() { + local "$@" + OMP_NUM_THREADS=${nr_threads} ./bs_omp ${input_size} ${num_queries} $ram $cpu 2>&1 + return $? +} + +export -f run_benchmark + +( + +echo "single-node execution, HBM ref (1/2)" >&2 + +# 4 GiB +parallel -j1 --eta --joblog ${fn}.1.joblog --resume --header : \ + run_benchmark i={i} nr_threads={nr_threads} ram={ram} cpu={cpu} \ + input_size=$(perl -E 'say 2 ** 29') num_queries=${num_queries_hbm} \ + ::: i $(seq 1 5) \ + ::: nr_threads 1 2 4 8 12 16 \ + ::: cpu $(seq 0 7) \ + ::: ram $(seq 0 16) + +echo "multi-node execution, HBM ref (2/2)" >&2 + +# 8 GiB +parallel -j1 --eta --joblog ${fn}.2.joblog --resume --header : \ + run_benchmark i={i} nr_threads={nr_threads} ram={ram} cpu={cpu} \ + input_size=$(perl -E 'say 2 ** 30') num_queries=${num_queries_hbm} \ + ::: i $(seq 1 40) \ + ::: nr_threads 32 48 64 96 128 \ + ::: cpu -1 \ + ::: ram $(seq 0 16) + +) >> ${fn}.txt diff --git a/BS/run-fgbs24a.sh b/BS/run-fgbs24a.sh deleted file mode 100755 index 06f8766..0000000 --- a/BS/run-fgbs24a.sh +++ /dev/null @@ -1,33 +0,0 @@ -#!/bin/bash - -set -e - -mkdir -p $(hostname) - -ts=$(date +%Y%m%d) - -# BL: use 2^(BL) B blocks for MRAM <-> WRAM transfers on PIM module -# T: data type -# -w: number of un-timed warmup iterations -# -e: number of timed iterations -# -i; ignored, always uses 262144 elements - -( - -echo "prim-benchmarks BS (dfatool fgbs24a edition)" -echo "Started at $(date)" -echo "Revision $(git describe --always)" - -for nr_dpus in 2304 2048 2543; do - for nr_tasklets in 16; do - echo - if make -B NR_DPUS=${nr_dpus} NR_TASKLETS=${nr_tasklets} BL=10; then - timeout --foreground -k 1m 30m bin/bs_host -w 0 -e 100 -i 16777216 || true - fi - if make -B NR_DPUS=${nr_dpus} NR_TASKLETS=${nr_tasklets} BL=10 WITH_ALLOC_OVERHEAD=1 WITH_LOAD_OVERHEAD=1 WITH_FREE_OVERHEAD=1; then - timeout --foreground -k 1m 30m bin/bs_host -w 0 -e 100 -i 16777216 || true - fi - done -done -echo "Completed at $(date)" -) | tee "$(hostname)/${ts}-fgbs24a.txt" diff --git a/BS/run-paper-strong-full.sh b/BS/run-paper-strong-full.sh deleted file mode 100755 index a6129aa..0000000 --- a/BS/run-paper-strong-full.sh +++ /dev/null @@ -1,26 +0,0 @@ -#!/bin/bash - -set -e - -# BL: use 2^(BL) B blocks for MRAM <-> WRAM transfers on PIM module -# T: data type -# -w: number of un-timed warmup iterations -# -e: number of timed iterations -# -i; ignored, always uses 262144 elements - -( - -echo "prim-benchmarks BS strong-full (dfatool edition)" -echo "Started at $(date)" -echo "Revision $(git describe --always)" - -# >2048 are not part of uptsream -for nr_dpus in 2543 2304 256 512 1024 2048; do - for nr_tasklets in 1 2 4 8 16; do - echo - if make -B NR_DPUS=${nr_dpus} NR_TASKLETS=${nr_tasklets} BL=10 verbose=1; then - timeout --foreground -k 1m 30m bin/bs_host -w 0 -e 100 -i 16777216 || true - fi - done -done -) | tee log-paper-strong-full.txt diff --git a/BS/run-paper-strong-rank.sh b/BS/run-paper-strong-rank.sh deleted file mode 100755 index c2d4f36..0000000 --- a/BS/run-paper-strong-rank.sh +++ /dev/null @@ -1,25 +0,0 @@ -#!/bin/bash - -set -e - -# BL: use 2^(BL) B blocks for MRAM <-> WRAM transfers on PIM module -# T: data type -# -w: number of un-timed warmup iterations -# -e: number of timed iterations -# -i; ignored, always uses 262144 elements - -( - -echo "prim-benchmarks BS strong-rank (dfatool edition)" -echo "Started at $(date)" -echo "Revision $(git describe --always)" - -for nr_dpus in 1 4 16 64; do - for nr_tasklets in 1 2 4 8 16; do - echo - if make -B NR_DPUS=${nr_dpus} NR_TASKLETS=${nr_tasklets} BL=10 verbose=1; then - timeout --foreground -k 1m 30m bin/bs_host -w 0 -e 100 -i 262144 || true - fi - done -done -) | tee log-paper-strong-rank.txt diff --git a/BS/run-paper-weak.sh b/BS/run-paper-weak.sh deleted file mode 100755 index a27c547..0000000 --- a/BS/run-paper-weak.sh +++ /dev/null @@ -1,28 +0,0 @@ -#!/bin/bash - -set -e - -# BL: use 2^(BL) B blocks for MRAM <-> WRAM transfers on PIM module -# T: data type -# -w: number of un-timed warmup iterations -# -e: number of timed iterations -# -i; ignored, always uses 262144 elements -# ... so the weak rank script might be bogus - -( - -echo "prim-benchmarks BS weak (dfatool edition)" -echo "Started at $(date)" -echo "Revision $(git describe --always)" - -for nr_dpus in 1 4 16 64; do - for nr_tasklets in 1 2 4 8 16; do - echo - # original Makefile sets PROBLEM_SIZE=2, for some reason. - if make -B NR_DPUS=${nr_dpus} NR_TASKLETS=${nr_tasklets} BL=10 verbose=1 PROBLEM_SIZE=2; then - i=$(( nr_dpus * 262144 )) - timeout --foreground -k 1m 30m bin/bs_host -w 0 -e 100 -i $i || true - fi - done -done -) | tee log-paper-weak.txt diff --git a/BS/run.sh b/BS/run.sh deleted file mode 100755 index 0c67c93..0000000 --- a/BS/run.sh +++ /dev/null @@ -1,28 +0,0 @@ -#!/bin/bash - -set -e - -# BL: use 2^(BL) B blocks for MRAM <-> WRAM transfers on PIM module -# T: data type -# -w: number of un-timed warmup iterations -# -e: number of timed iterations -# -i; ignored, always uses 262144 elements - -( - -echo "prim-benchmarks BS (dfatool edition)" -echo "Started at $(date)" -echo "Revision $(git describe --always)" - -for i in 262144 16777216; do - for nr_dpus in 1 4 8 16 32 64 128 256 512 768 1024 1536 2048 2304 2542; do - for nr_tasklets in 8 12 16; do - echo - if make -B NR_DPUS=${nr_dpus} NR_TASKLETS=${nr_tasklets} BL=10; then - timeout --foreground -k 1m 30m bin/bs_host -w 0 -e 100 -i $i || true - fi - done - done -done -echo "Completed at $(date)" -) | tee "log-$(hostname)-ndpus.txt" |