diff options
-rwxr-xr-x | SCAN-SSA/benchmark-scripts/ccmcc25-sim.sh | 25 | ||||
-rwxr-xr-x | SCAN-SSA/benchmark-scripts/ccmcc25.sh | 31 | ||||
-rwxr-xr-x | SCAN-SSA/run-omp.sh | 23 | ||||
-rwxr-xr-x | SCAN-SSA/run.sh | 27 |
4 files changed, 56 insertions, 50 deletions
diff --git a/SCAN-SSA/benchmark-scripts/ccmcc25-sim.sh b/SCAN-SSA/benchmark-scripts/ccmcc25-sim.sh new file mode 100755 index 0000000..2715db7 --- /dev/null +++ b/SCAN-SSA/benchmark-scripts/ccmcc25-sim.sh @@ -0,0 +1,25 @@ +#!/bin/bash + +mkdir -p log/$(hostname) + +run_benchmark_nmc() { + local "$@" + set -e + make -B NR_DPUS=${nr_dpus} NR_TASKLETS=${nr_tasklets} BL=10 \ + aspectc=1 aspectc_timing=1 dfatool_timing=0 + bin/host_code -w 0 -e 5 -i ${input_size} +} + +export -f run_benchmark_nmc + +fn=log/$(hostname)/ccmcc25-sdk${sdk}-sim + +source ~/lib/local/upmem/upmem-2025.1.0-Linux-x86_64/upmem_env.sh simulator + +echo "prim-benchmarks SCAN-RSS $(git describe --all --long) $(git rev-parse HEAD) $(date -R)" >> ${fn}.txt + +parallel -j1 --eta --joblog ${fn}.joblog --resume --header : \ + run_benchmark_nmc nr_dpus={nr_dpus} nr_tasklets=16 input_size={input_size} \ + ::: nr_dpus 1 2 4 8 16 32 48 64 \ + ::: input_size $((2**22)) $((2**23)) $((2**24)) \ +>> ${fn}.txt diff --git a/SCAN-SSA/benchmark-scripts/ccmcc25.sh b/SCAN-SSA/benchmark-scripts/ccmcc25.sh new file mode 100755 index 0000000..c9655c8 --- /dev/null +++ b/SCAN-SSA/benchmark-scripts/ccmcc25.sh @@ -0,0 +1,31 @@ +#!/bin/bash + +mkdir -p log/$(hostname) + +run_benchmark_nmc() { + local "$@" + set -e + sudo limit_ranks_to_numa_node ${numa_rank} + make -B NR_DPUS=${nr_dpus} NR_TASKLETS=${nr_tasklets} BL=10 \ + aspectc=1 aspectc_timing=1 dfatool_timing=0 + bin/host_code -w 0 -e 50 -i ${input_size} +} + +export -f run_benchmark_nmc + +for sdk in 2023.2.0 2024.1.0 2024.2.0 2025.1.0; do + + fn=log/$(hostname)/ccmcc25-sdk${sdk} + + source /opt/upmem/upmem-${sdk}-Linux-x86_64/upmem_env.sh + + echo "prim-benchmarks SCAN-SSA $(git describe --all --long) $(git rev-parse HEAD) $(date -R)" >> ${fn}.txt + + parallel -j1 --eta --joblog ${fn}.joblog --resume --header : \ + run_benchmark_nmc nr_dpus={nr_dpus} nr_tasklets=16 input_size={input_size} numa_rank={numa_rank} \ + ::: numa_rank any \ + ::: nr_dpus 64 128 256 512 768 1024 1536 2048 2304 \ + ::: input_size $((2**27)) $((2**28)) $((2**29)) \ + >> ${fn}.txt + +done diff --git a/SCAN-SSA/run-omp.sh b/SCAN-SSA/run-omp.sh deleted file mode 100755 index ccbb1bd..0000000 --- a/SCAN-SSA/run-omp.sh +++ /dev/null @@ -1,23 +0,0 @@ -#!/bin/bash - -set -e - -# T: data type -# -w: number of un-timed warmup iterations -# -e: number of timed iterations -# -i: input size (number of elements, not number of bytes!) - -echo "prim-benchmarks SCAN-SSA (dfatool edition)" -echo "Started at $(date)" -echo "Revision $(git describe --always)" - -for nr_threads in 1 2 4 6 8 12 16 20 24 32; do - for i in 2048 4096 8192 16384 65536 262144 1048576 3932160 15728640 31457280; do - for dt in UINT32 UINT64 INT32 INT64 FLOAT DOUBLE; do - echo - if make -B TYPE=${dt} bin/omp_code; then - OMP_NUM_THREADS=$nr_threads timeout -k 1m 30m bin/omp_code -w 0 -e 100 -i ${i} || true - fi - done - done -done diff --git a/SCAN-SSA/run.sh b/SCAN-SSA/run.sh deleted file mode 100755 index 54d5f93..0000000 --- a/SCAN-SSA/run.sh +++ /dev/null @@ -1,27 +0,0 @@ -#!/bin/bash - -set -e - -# BL: use 2^(BL) B blocks for MRAM <-> WRAM transfers on PIM module -# T: data type -# -w: number of un-timed warmup iterations -# -e: number of timed iterations -# -i: input size (number of elements, not number of bytes!) - -echo "prim-benchmarks SCAN-SSA (dfatool edition)" -echo "Started at $(date)" -echo "Revision $(git describe --always)" - -for nr_dpus in 1 2 4 8 16 32 64 128 256 512; do - for nr_tasklets in 1 2 3 4 6 8 10 12 16 20 24; do - for i in 2048 4096 8192 16384 65536 262144 1048576 3932160; do - for dt in UINT32 UINT64 INT32 INT64 FLOAT DOUBLE; do - echo - if make -B NR_DPUS=${nr_dpus} NR_TASKLETS=${nr_tasklets} BL=10 TYPE=${dt} UNROLL=1 \ - || make -B NR_DPUS=${nr_dpus} NR_TASKLETS=${nr_tasklets} BL=10 TYPE=${dt} UNROLL=0; then - timeout -k 1m 30m bin/host_code -w 0 -e 100 -i ${i} || true - fi - done - done - done -done |