summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rwxr-xr-xSCAN-SSA/benchmark-scripts/ccmcc25-sim.sh25
-rwxr-xr-xSCAN-SSA/benchmark-scripts/ccmcc25.sh31
-rwxr-xr-xSCAN-SSA/run-omp.sh23
-rwxr-xr-xSCAN-SSA/run.sh27
4 files changed, 56 insertions, 50 deletions
diff --git a/SCAN-SSA/benchmark-scripts/ccmcc25-sim.sh b/SCAN-SSA/benchmark-scripts/ccmcc25-sim.sh
new file mode 100755
index 0000000..2715db7
--- /dev/null
+++ b/SCAN-SSA/benchmark-scripts/ccmcc25-sim.sh
@@ -0,0 +1,25 @@
+#!/bin/bash
+
+mkdir -p log/$(hostname)
+
+run_benchmark_nmc() {
+ local "$@"
+ set -e
+ make -B NR_DPUS=${nr_dpus} NR_TASKLETS=${nr_tasklets} BL=10 \
+ aspectc=1 aspectc_timing=1 dfatool_timing=0
+ bin/host_code -w 0 -e 5 -i ${input_size}
+}
+
+export -f run_benchmark_nmc
+
+fn=log/$(hostname)/ccmcc25-sdk${sdk}-sim
+
+source ~/lib/local/upmem/upmem-2025.1.0-Linux-x86_64/upmem_env.sh simulator
+
+echo "prim-benchmarks SCAN-RSS $(git describe --all --long) $(git rev-parse HEAD) $(date -R)" >> ${fn}.txt
+
+parallel -j1 --eta --joblog ${fn}.joblog --resume --header : \
+ run_benchmark_nmc nr_dpus={nr_dpus} nr_tasklets=16 input_size={input_size} \
+ ::: nr_dpus 1 2 4 8 16 32 48 64 \
+ ::: input_size $((2**22)) $((2**23)) $((2**24)) \
+>> ${fn}.txt
diff --git a/SCAN-SSA/benchmark-scripts/ccmcc25.sh b/SCAN-SSA/benchmark-scripts/ccmcc25.sh
new file mode 100755
index 0000000..c9655c8
--- /dev/null
+++ b/SCAN-SSA/benchmark-scripts/ccmcc25.sh
@@ -0,0 +1,31 @@
+#!/bin/bash
+
+mkdir -p log/$(hostname)
+
+run_benchmark_nmc() {
+ local "$@"
+ set -e
+ sudo limit_ranks_to_numa_node ${numa_rank}
+ make -B NR_DPUS=${nr_dpus} NR_TASKLETS=${nr_tasklets} BL=10 \
+ aspectc=1 aspectc_timing=1 dfatool_timing=0
+ bin/host_code -w 0 -e 50 -i ${input_size}
+}
+
+export -f run_benchmark_nmc
+
+for sdk in 2023.2.0 2024.1.0 2024.2.0 2025.1.0; do
+
+ fn=log/$(hostname)/ccmcc25-sdk${sdk}
+
+ source /opt/upmem/upmem-${sdk}-Linux-x86_64/upmem_env.sh
+
+ echo "prim-benchmarks SCAN-SSA $(git describe --all --long) $(git rev-parse HEAD) $(date -R)" >> ${fn}.txt
+
+ parallel -j1 --eta --joblog ${fn}.joblog --resume --header : \
+ run_benchmark_nmc nr_dpus={nr_dpus} nr_tasklets=16 input_size={input_size} numa_rank={numa_rank} \
+ ::: numa_rank any \
+ ::: nr_dpus 64 128 256 512 768 1024 1536 2048 2304 \
+ ::: input_size $((2**27)) $((2**28)) $((2**29)) \
+ >> ${fn}.txt
+
+done
diff --git a/SCAN-SSA/run-omp.sh b/SCAN-SSA/run-omp.sh
deleted file mode 100755
index ccbb1bd..0000000
--- a/SCAN-SSA/run-omp.sh
+++ /dev/null
@@ -1,23 +0,0 @@
-#!/bin/bash
-
-set -e
-
-# T: data type
-# -w: number of un-timed warmup iterations
-# -e: number of timed iterations
-# -i: input size (number of elements, not number of bytes!)
-
-echo "prim-benchmarks SCAN-SSA (dfatool edition)"
-echo "Started at $(date)"
-echo "Revision $(git describe --always)"
-
-for nr_threads in 1 2 4 6 8 12 16 20 24 32; do
- for i in 2048 4096 8192 16384 65536 262144 1048576 3932160 15728640 31457280; do
- for dt in UINT32 UINT64 INT32 INT64 FLOAT DOUBLE; do
- echo
- if make -B TYPE=${dt} bin/omp_code; then
- OMP_NUM_THREADS=$nr_threads timeout -k 1m 30m bin/omp_code -w 0 -e 100 -i ${i} || true
- fi
- done
- done
-done
diff --git a/SCAN-SSA/run.sh b/SCAN-SSA/run.sh
deleted file mode 100755
index 54d5f93..0000000
--- a/SCAN-SSA/run.sh
+++ /dev/null
@@ -1,27 +0,0 @@
-#!/bin/bash
-
-set -e
-
-# BL: use 2^(BL) B blocks for MRAM <-> WRAM transfers on PIM module
-# T: data type
-# -w: number of un-timed warmup iterations
-# -e: number of timed iterations
-# -i: input size (number of elements, not number of bytes!)
-
-echo "prim-benchmarks SCAN-SSA (dfatool edition)"
-echo "Started at $(date)"
-echo "Revision $(git describe --always)"
-
-for nr_dpus in 1 2 4 8 16 32 64 128 256 512; do
- for nr_tasklets in 1 2 3 4 6 8 10 12 16 20 24; do
- for i in 2048 4096 8192 16384 65536 262144 1048576 3932160; do
- for dt in UINT32 UINT64 INT32 INT64 FLOAT DOUBLE; do
- echo
- if make -B NR_DPUS=${nr_dpus} NR_TASKLETS=${nr_tasklets} BL=10 TYPE=${dt} UNROLL=1 \
- || make -B NR_DPUS=${nr_dpus} NR_TASKLETS=${nr_tasklets} BL=10 TYPE=${dt} UNROLL=0; then
- timeout -k 1m 30m bin/host_code -w 0 -e 100 -i ${i} || true
- fi
- done
- done
- done
-done