diff options
-rwxr-xr-x | TRNS/run-fgbs24a.sh | 29 | ||||
-rwxr-xr-x | TS/run-fgbs24a.sh | 31 | ||||
-rw-r--r-- | UNI/host/app.c | 2 | ||||
-rwxr-xr-x | UNI/run-fgbs24a.sh | 28 | ||||
-rwxr-xr-x | VA/run-fgbs24a.sh | 28 |
5 files changed, 117 insertions, 1 deletions
diff --git a/TRNS/run-fgbs24a.sh b/TRNS/run-fgbs24a.sh new file mode 100755 index 0000000..ffdadcf --- /dev/null +++ b/TRNS/run-fgbs24a.sh @@ -0,0 +1,29 @@ +#!/bin/bash + +set -e + +mkdir -p $(hostname) + +# BL: use 2^(BL) B blocks for MRAM <-> WRAM transfers on PIM module +# T: data type +# -w: number of un-timed warmup iterations +# -e: number of timed iterations +# -i; ignored, always uses 262144 elements + +( + +echo "prim-benchmarks TRNS strong-full (dfatool fgbs24a edition)" +echo "Started at $(date)" +echo "Revision $(git describe --always)" + +for nr_dpus in 2543 2304 2048; do + for nr_tasklets in 16; do + echo + if make -B NR_DPUS=${nr_dpus} NR_TASKLETS=${nr_tasklets}; then + # upstream uses -p 2048, but then the number of DPUs is always constant... + timeout --foreground -k 1m 180m bin/host_code -w 0 -e 100 -p $nr_dpus -o 12288 -x 1 || true + fi + done +done +echo "Completed at $(date)" +) | tee "$(hostname)/fgbs24a.txt" diff --git a/TS/run-fgbs24a.sh b/TS/run-fgbs24a.sh new file mode 100755 index 0000000..91b46f9 --- /dev/null +++ b/TS/run-fgbs24a.sh @@ -0,0 +1,31 @@ +#!/bin/bash + +set -e + +mkdir -p $(hostname) + +# BL: use 2^(BL) B blocks for MRAM <-> WRAM transfers on PIM module +# T: data type +# -w: number of un-timed warmup iterations +# -e: number of timed iterations +# -i; ignored, always uses 262144 elements + +( + +echo "prim-benchmarks TS (dfatool fgbs24a edition)" +echo "Started at $(date)" +echo "Revision $(git describe --always)" + +for nr_dpus in 2543 2304 2048; do + # upstream code only works with up to 8 tasklets. funky. + for nr_tasklets in 8 12 16; do + echo + # upstream code did not respect $BL in the makefile and used 256B (BL=8) instead. + # This appears to be faster than BL=10. + if make -B NR_DPUS=${nr_dpus} NR_TASKLETS=${nr_tasklets} BL=8; then + timeout --foreground -k 1m 30m bin/ts_host -w 0 -e 100 -n 33554432 || true + fi + done +done +echo "Completed at $(date)" +) | tee "$(hostname)/fgbs24a.txt" diff --git a/UNI/host/app.c b/UNI/host/app.c index e624fa2..596d0cf 100644 --- a/UNI/host/app.c +++ b/UNI/host/app.c @@ -319,7 +319,7 @@ int main(int argc, char **argv) { input_size / timer.time[2], input_size / (timer.time[4]), input_size / (timer.time[0] + timer.time[1] + timer.time[3] + timer.time[4] + timer.time[5] + timer.time[6] + timer.time[7])); - printf(" throughput_upmem_wxsr_MBps=%f throughput_upmem_lwxsr_MBps=%f throughput_upmem_alwxsr_MBps=%f\n", + printf(" throughput_upmem_wxsr_MOpps=%f throughput_upmem_lwxsr_MOpps=%f throughput_upmem_alwxsr_MOpps=%f\n", input_size / (timer.time[3] + timer.time[4] + timer.time[5] + timer.time[6]), input_size / (timer.time[1] + timer.time[3] + timer.time[4] + timer.time[5] + timer.time[6]), input_size / (timer.time[0] + timer.time[1] + timer.time[3] + timer.time[4] + timer.time[5] + timer.time[6])); diff --git a/UNI/run-fgbs24a.sh b/UNI/run-fgbs24a.sh new file mode 100755 index 0000000..92f73bf --- /dev/null +++ b/UNI/run-fgbs24a.sh @@ -0,0 +1,28 @@ +#!/bin/bash + +set -e + +mkdir -p $(hostname) + +# BL: use 2^(BL) B blocks for MRAM <-> WRAM transfers on PIM module +# T: data type +# -w: number of un-timed warmup iterations +# -e: number of timed iterations +# -i; ignored, always uses 262144 elements + +( + +echo "prim-benchmarks UNI (dfatool fgbs24a edition)" +echo "Started at $(date)" +echo "Revision $(git describe --always)" + +for nr_dpus in 2543 2304 2048; do + for nr_tasklets in 16; do + echo + if make -B NR_DPUS=${nr_dpus} NR_TASKLETS=${nr_tasklets} BL=10; then + timeout --foreground -k 1m 30m bin/host_code -w 0 -e 100 -i 251658240 -x 1 || true + fi + done +done +echo "Completed at $(date)" +) | tee "$(hostname)/fgbs24a.txt" diff --git a/VA/run-fgbs24a.sh b/VA/run-fgbs24a.sh new file mode 100755 index 0000000..f8941fb --- /dev/null +++ b/VA/run-fgbs24a.sh @@ -0,0 +1,28 @@ +#!/bin/bash + +set -e + +mkdir -p $(hostname) + +# BL: use 2^(BL) B blocks for MRAM <-> WRAM transfers on PIM module +# T: data type +# -w: number of un-timed warmup iterations +# -e: number of timed iterations +# -i; ignored, always uses 262144 elements + +( + +echo "prim-benchmarks VA (dfatool fgbs24a edition)" +echo "Started at $(date)" +echo "Revision $(git describe --always)" + +for nr_dpus in 2543 2304 2048; do + for nr_tasklets in 16; do + echo + if make -B NR_DPUS=${nr_dpus} NR_TASKLETS=${nr_tasklets} BL=10; then + timeout --foreground -k 1m 30m bin/host_code -w 0 -e 100 -i 167772160 -x 1 || true + fi + done +done +echo "Completed at $(date)" +) | tee "$(hostname)/fgbs24a.txt" |