summaryrefslogtreecommitdiff
path: root/Microbenchmarks/STREAM/run.sh
diff options
context:
space:
mode:
Diffstat (limited to 'Microbenchmarks/STREAM/run.sh')
-rwxr-xr-xMicrobenchmarks/STREAM/run.sh49
1 files changed, 18 insertions, 31 deletions
diff --git a/Microbenchmarks/STREAM/run.sh b/Microbenchmarks/STREAM/run.sh
index 3ed965e..24b08df 100755
--- a/Microbenchmarks/STREAM/run.sh
+++ b/Microbenchmarks/STREAM/run.sh
@@ -1,38 +1,25 @@
#!/bin/bash
-mkdir -p profile
set -e
-# MRAM
-for i in copy copyw add scale triad
-do
- for j in 1
- do
- for k in 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16
- do
- NR_DPUS=$j NR_TASKLETS=$k BL=10 MEM=MRAM OP=$i make all
- wait
- ./bin/host_code -w 0 -e 1 -i 2097152 >& profile/${i}_${j}_tl${k}_MRAM.txt
- wait
- make clean
- wait
- done
- done
-done
+# BL: use 2^(BL) B blocks for MRAM <-> WRAM transfers on PIM module
+# T: data type
+# -w: number of un-timed warmup iterations
+# -e: number of timed iterations
+# -i: input size (number of elements, not number of bytes!)
+# 2097152 B -> 2M is maximum for 64bit types (due to 16M per DPU)
-# WRAM
-for i in copyw add scale triad
-do
- for j in 1
- do
- for k in 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16
- do
- NR_DPUS=$j NR_TASKLETS=$k BL=10 MEM=WRAM OP=$i make all
- wait
- ./bin/host_code -w 0 -e 1 -i 2097152 >& profile/${i}_${j}_tl${k}_WRAM.txt
- wait
- make clean
- wait
- done
+for mem in MRAM WRAM; do
+ for nr_dpus in 1 2 4 8 16 32 64 128 256 512; do
+ for nr_tasklets in 1 2 3 4 6 8 10 12 16 20 24; do
+ for op in copy copyw add scale triad; do
+ for dt in uint8_t uint16_t uint32_t uint64_t float double; do
+ if make -B MEM=${mem} OP=${op} NR_DPUS=${nr_dpus} NR_TASKLETS=${nr_tasklets} BL=10 T=${dt} UNROLL=1 \
+ || make -B MEM=${mem} OP=${op} NR_DPUS=${nr_dpus} NR_TASKLETS=${nr_tasklets} BL=10 T=${dt} UNROLL=0; then
+ bin/host_code -w 0 -e 20 -i 2097152
+ fi
+ done
+ done
+ done
done
done