summaryrefslogtreecommitdiff
path: root/Microbenchmarks/STREAM/run-rank.sh
diff options
context:
space:
mode:
authorBirte Kristina Friesel <birte.friesel@uos.de>2024-02-22 08:08:08 +0100
committerBirte Kristina Friesel <birte.friesel@uos.de>2024-02-22 08:08:08 +0100
commit0f65437a68a26b906ab0da02d9f0ec4b177650fc (patch)
treec263aa41fa2b400762d205b250f82b1b44ce1a26 /Microbenchmarks/STREAM/run-rank.sh
parenta1746b5e3b78b35ea94a65979f9a0ba41dd1eed4 (diff)
STREAM: Use nano- rather than microsecond precision internally
Diffstat (limited to 'Microbenchmarks/STREAM/run-rank.sh')
-rwxr-xr-xMicrobenchmarks/STREAM/run-rank.sh38
1 files changed, 38 insertions, 0 deletions
diff --git a/Microbenchmarks/STREAM/run-rank.sh b/Microbenchmarks/STREAM/run-rank.sh
new file mode 100755
index 0000000..49253ea
--- /dev/null
+++ b/Microbenchmarks/STREAM/run-rank.sh
@@ -0,0 +1,38 @@
+#!/bin/bash
+
+set -e
+
+# BL: use 2^(BL) B blocks for MRAM <-> WRAM transfers on PIM module
+# T: data type
+# -w: number of un-timed warmup iterations
+# -e: number of timed iterations
+# -i: input size (number of elements, not number of bytes!)
+# Each DPU uses three buffers, each of which holds $i * sizeof($dt) bytes.
+# With a total MRAM capacity of 64M, this gives us ~21M per buffer, or 16M when rounding down to the next power of two.
+# With a maximum data type width of 8B (uint64_t, double), this limits the number of elements per DPU to 2097152.
+
+(
+
+echo "prim-benchmarks STREAM microbenchmark (dfatool edition)"
+echo "Started at $(date)"
+echo "Revision $(git describe --always)"
+
+for i in 2097152 1048576 131072 16384 4096; do
+ for nr_dpus in 1 4 8 16 32 48 64; do
+ for nr_tasklets in 1 8 12 16; do
+ for dt in uint64_t uint8_t uint16_t uint32_t float double; do
+ for op in tried scale add copy copyw; do
+ for bl in 3 4 5 6 8 10; do
+ echo
+ if make -B OP=${op} NR_DPUS=${nr_dpus} NR_TASKLETS=${nr_tasklets} BL=${bl} T=${dt} UNROLL=1 WITH_ALLOC_OVERHEAD=1 WITH_LOAD_OVERHEAD=1 WITH_FREE_OVERHEAD=1 \
+ || make -B OP=${op} NR_DPUS=${nr_dpus} NR_TASKLETS=${nr_tasklets} BL=${bl} T=${dt} UNROLL=0 WITH_ALLOC_OVERHEAD=1 WITH_LOAD_OVERHEAD=1 WITH_FREE_OVERHEAD=1; then
+ timeout --foreground -k 1m 30m bin/host_code -w 0 -e 100 -i $i -x 0 || true
+ fi
+ done
+ done
+ done
+ done
+ done
+done
+echo "Completed at $(date)"
+) | tee "log-$(hostname)-rank-idle.txt"