diff options
author | Birte Kristina Friesel <birte.friesel@uos.de> | 2024-02-22 08:08:08 +0100 |
---|---|---|
committer | Birte Kristina Friesel <birte.friesel@uos.de> | 2024-02-22 08:08:08 +0100 |
commit | 0f65437a68a26b906ab0da02d9f0ec4b177650fc (patch) | |
tree | c263aa41fa2b400762d205b250f82b1b44ce1a26 /Microbenchmarks/STREAM/run-rank.sh | |
parent | a1746b5e3b78b35ea94a65979f9a0ba41dd1eed4 (diff) |
STREAM: Use nano- rather than microsecond precision internally
Diffstat (limited to 'Microbenchmarks/STREAM/run-rank.sh')
-rwxr-xr-x | Microbenchmarks/STREAM/run-rank.sh | 38 |
1 files changed, 38 insertions, 0 deletions
diff --git a/Microbenchmarks/STREAM/run-rank.sh b/Microbenchmarks/STREAM/run-rank.sh new file mode 100755 index 0000000..49253ea --- /dev/null +++ b/Microbenchmarks/STREAM/run-rank.sh @@ -0,0 +1,38 @@ +#!/bin/bash + +set -e + +# BL: use 2^(BL) B blocks for MRAM <-> WRAM transfers on PIM module +# T: data type +# -w: number of un-timed warmup iterations +# -e: number of timed iterations +# -i: input size (number of elements, not number of bytes!) +# Each DPU uses three buffers, each of which holds $i * sizeof($dt) bytes. +# With a total MRAM capacity of 64M, this gives us ~21M per buffer, or 16M when rounding down to the next power of two. +# With a maximum data type width of 8B (uint64_t, double), this limits the number of elements per DPU to 2097152. + +( + +echo "prim-benchmarks STREAM microbenchmark (dfatool edition)" +echo "Started at $(date)" +echo "Revision $(git describe --always)" + +for i in 2097152 1048576 131072 16384 4096; do + for nr_dpus in 1 4 8 16 32 48 64; do + for nr_tasklets in 1 8 12 16; do + for dt in uint64_t uint8_t uint16_t uint32_t float double; do + for op in tried scale add copy copyw; do + for bl in 3 4 5 6 8 10; do + echo + if make -B OP=${op} NR_DPUS=${nr_dpus} NR_TASKLETS=${nr_tasklets} BL=${bl} T=${dt} UNROLL=1 WITH_ALLOC_OVERHEAD=1 WITH_LOAD_OVERHEAD=1 WITH_FREE_OVERHEAD=1 \ + || make -B OP=${op} NR_DPUS=${nr_dpus} NR_TASKLETS=${nr_tasklets} BL=${bl} T=${dt} UNROLL=0 WITH_ALLOC_OVERHEAD=1 WITH_LOAD_OVERHEAD=1 WITH_FREE_OVERHEAD=1; then + timeout --foreground -k 1m 30m bin/host_code -w 0 -e 100 -i $i -x 0 || true + fi + done + done + done + done + done +done +echo "Completed at $(date)" +) | tee "log-$(hostname)-rank-idle.txt" |