From f264f58636579436f58aa3b0c2cb392938000a59 Mon Sep 17 00:00:00 2001 From: Birte Kristina Friesel Date: Wed, 28 Feb 2024 15:24:34 +0100 Subject: some fgbs24a scripts --- TRNS/run-fgbs24a.sh | 29 +++++++++++++++++++++++++++++ TS/run-fgbs24a.sh | 31 +++++++++++++++++++++++++++++++ UNI/host/app.c | 2 +- UNI/run-fgbs24a.sh | 28 ++++++++++++++++++++++++++++ VA/run-fgbs24a.sh | 28 ++++++++++++++++++++++++++++ 5 files changed, 117 insertions(+), 1 deletion(-) create mode 100755 TRNS/run-fgbs24a.sh create mode 100755 TS/run-fgbs24a.sh create mode 100755 UNI/run-fgbs24a.sh create mode 100755 VA/run-fgbs24a.sh diff --git a/TRNS/run-fgbs24a.sh b/TRNS/run-fgbs24a.sh new file mode 100755 index 0000000..ffdadcf --- /dev/null +++ b/TRNS/run-fgbs24a.sh @@ -0,0 +1,29 @@ +#!/bin/bash + +set -e + +mkdir -p $(hostname) + +# BL: use 2^(BL) B blocks for MRAM <-> WRAM transfers on PIM module +# T: data type +# -w: number of un-timed warmup iterations +# -e: number of timed iterations +# -i; ignored, always uses 262144 elements + +( + +echo "prim-benchmarks TRNS strong-full (dfatool fgbs24a edition)" +echo "Started at $(date)" +echo "Revision $(git describe --always)" + +for nr_dpus in 2543 2304 2048; do + for nr_tasklets in 16; do + echo + if make -B NR_DPUS=${nr_dpus} NR_TASKLETS=${nr_tasklets}; then + # upstream uses -p 2048, but then the number of DPUs is always constant... + timeout --foreground -k 1m 180m bin/host_code -w 0 -e 100 -p $nr_dpus -o 12288 -x 1 || true + fi + done +done +echo "Completed at $(date)" +) | tee "$(hostname)/fgbs24a.txt" diff --git a/TS/run-fgbs24a.sh b/TS/run-fgbs24a.sh new file mode 100755 index 0000000..91b46f9 --- /dev/null +++ b/TS/run-fgbs24a.sh @@ -0,0 +1,31 @@ +#!/bin/bash + +set -e + +mkdir -p $(hostname) + +# BL: use 2^(BL) B blocks for MRAM <-> WRAM transfers on PIM module +# T: data type +# -w: number of un-timed warmup iterations +# -e: number of timed iterations +# -i; ignored, always uses 262144 elements + +( + +echo "prim-benchmarks TS (dfatool fgbs24a edition)" +echo "Started at $(date)" +echo "Revision $(git describe --always)" + +for nr_dpus in 2543 2304 2048; do + # upstream code only works with up to 8 tasklets. funky. + for nr_tasklets in 8 12 16; do + echo + # upstream code did not respect $BL in the makefile and used 256B (BL=8) instead. + # This appears to be faster than BL=10. + if make -B NR_DPUS=${nr_dpus} NR_TASKLETS=${nr_tasklets} BL=8; then + timeout --foreground -k 1m 30m bin/ts_host -w 0 -e 100 -n 33554432 || true + fi + done +done +echo "Completed at $(date)" +) | tee "$(hostname)/fgbs24a.txt" diff --git a/UNI/host/app.c b/UNI/host/app.c index e624fa2..596d0cf 100644 --- a/UNI/host/app.c +++ b/UNI/host/app.c @@ -319,7 +319,7 @@ int main(int argc, char **argv) { input_size / timer.time[2], input_size / (timer.time[4]), input_size / (timer.time[0] + timer.time[1] + timer.time[3] + timer.time[4] + timer.time[5] + timer.time[6] + timer.time[7])); - printf(" throughput_upmem_wxsr_MBps=%f throughput_upmem_lwxsr_MBps=%f throughput_upmem_alwxsr_MBps=%f\n", + printf(" throughput_upmem_wxsr_MOpps=%f throughput_upmem_lwxsr_MOpps=%f throughput_upmem_alwxsr_MOpps=%f\n", input_size / (timer.time[3] + timer.time[4] + timer.time[5] + timer.time[6]), input_size / (timer.time[1] + timer.time[3] + timer.time[4] + timer.time[5] + timer.time[6]), input_size / (timer.time[0] + timer.time[1] + timer.time[3] + timer.time[4] + timer.time[5] + timer.time[6])); diff --git a/UNI/run-fgbs24a.sh b/UNI/run-fgbs24a.sh new file mode 100755 index 0000000..92f73bf --- /dev/null +++ b/UNI/run-fgbs24a.sh @@ -0,0 +1,28 @@ +#!/bin/bash + +set -e + +mkdir -p $(hostname) + +# BL: use 2^(BL) B blocks for MRAM <-> WRAM transfers on PIM module +# T: data type +# -w: number of un-timed warmup iterations +# -e: number of timed iterations +# -i; ignored, always uses 262144 elements + +( + +echo "prim-benchmarks UNI (dfatool fgbs24a edition)" +echo "Started at $(date)" +echo "Revision $(git describe --always)" + +for nr_dpus in 2543 2304 2048; do + for nr_tasklets in 16; do + echo + if make -B NR_DPUS=${nr_dpus} NR_TASKLETS=${nr_tasklets} BL=10; then + timeout --foreground -k 1m 30m bin/host_code -w 0 -e 100 -i 251658240 -x 1 || true + fi + done +done +echo "Completed at $(date)" +) | tee "$(hostname)/fgbs24a.txt" diff --git a/VA/run-fgbs24a.sh b/VA/run-fgbs24a.sh new file mode 100755 index 0000000..f8941fb --- /dev/null +++ b/VA/run-fgbs24a.sh @@ -0,0 +1,28 @@ +#!/bin/bash + +set -e + +mkdir -p $(hostname) + +# BL: use 2^(BL) B blocks for MRAM <-> WRAM transfers on PIM module +# T: data type +# -w: number of un-timed warmup iterations +# -e: number of timed iterations +# -i; ignored, always uses 262144 elements + +( + +echo "prim-benchmarks VA (dfatool fgbs24a edition)" +echo "Started at $(date)" +echo "Revision $(git describe --always)" + +for nr_dpus in 2543 2304 2048; do + for nr_tasklets in 16; do + echo + if make -B NR_DPUS=${nr_dpus} NR_TASKLETS=${nr_tasklets} BL=10; then + timeout --foreground -k 1m 30m bin/host_code -w 0 -e 100 -i 167772160 -x 1 || true + fi + done +done +echo "Completed at $(date)" +) | tee "$(hostname)/fgbs24a.txt" -- cgit v1.2.3