summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rwxr-xr-xTRNS/run-fgbs24a.sh29
-rwxr-xr-xTS/run-fgbs24a.sh31
-rw-r--r--UNI/host/app.c2
-rwxr-xr-xUNI/run-fgbs24a.sh28
-rwxr-xr-xVA/run-fgbs24a.sh28
5 files changed, 117 insertions, 1 deletions
diff --git a/TRNS/run-fgbs24a.sh b/TRNS/run-fgbs24a.sh
new file mode 100755
index 0000000..ffdadcf
--- /dev/null
+++ b/TRNS/run-fgbs24a.sh
@@ -0,0 +1,29 @@
+#!/bin/bash
+
+set -e
+
+mkdir -p $(hostname)
+
+# BL: use 2^(BL) B blocks for MRAM <-> WRAM transfers on PIM module
+# T: data type
+# -w: number of un-timed warmup iterations
+# -e: number of timed iterations
+# -i; ignored, always uses 262144 elements
+
+(
+
+echo "prim-benchmarks TRNS strong-full (dfatool fgbs24a edition)"
+echo "Started at $(date)"
+echo "Revision $(git describe --always)"
+
+for nr_dpus in 2543 2304 2048; do
+ for nr_tasklets in 16; do
+ echo
+ if make -B NR_DPUS=${nr_dpus} NR_TASKLETS=${nr_tasklets}; then
+ # upstream uses -p 2048, but then the number of DPUs is always constant...
+ timeout --foreground -k 1m 180m bin/host_code -w 0 -e 100 -p $nr_dpus -o 12288 -x 1 || true
+ fi
+ done
+done
+echo "Completed at $(date)"
+) | tee "$(hostname)/fgbs24a.txt"
diff --git a/TS/run-fgbs24a.sh b/TS/run-fgbs24a.sh
new file mode 100755
index 0000000..91b46f9
--- /dev/null
+++ b/TS/run-fgbs24a.sh
@@ -0,0 +1,31 @@
+#!/bin/bash
+
+set -e
+
+mkdir -p $(hostname)
+
+# BL: use 2^(BL) B blocks for MRAM <-> WRAM transfers on PIM module
+# T: data type
+# -w: number of un-timed warmup iterations
+# -e: number of timed iterations
+# -i; ignored, always uses 262144 elements
+
+(
+
+echo "prim-benchmarks TS (dfatool fgbs24a edition)"
+echo "Started at $(date)"
+echo "Revision $(git describe --always)"
+
+for nr_dpus in 2543 2304 2048; do
+ # upstream code only works with up to 8 tasklets. funky.
+ for nr_tasklets in 8 12 16; do
+ echo
+ # upstream code did not respect $BL in the makefile and used 256B (BL=8) instead.
+ # This appears to be faster than BL=10.
+ if make -B NR_DPUS=${nr_dpus} NR_TASKLETS=${nr_tasklets} BL=8; then
+ timeout --foreground -k 1m 30m bin/ts_host -w 0 -e 100 -n 33554432 || true
+ fi
+ done
+done
+echo "Completed at $(date)"
+) | tee "$(hostname)/fgbs24a.txt"
diff --git a/UNI/host/app.c b/UNI/host/app.c
index e624fa2..596d0cf 100644
--- a/UNI/host/app.c
+++ b/UNI/host/app.c
@@ -319,7 +319,7 @@ int main(int argc, char **argv) {
input_size / timer.time[2],
input_size / (timer.time[4]),
input_size / (timer.time[0] + timer.time[1] + timer.time[3] + timer.time[4] + timer.time[5] + timer.time[6] + timer.time[7]));
- printf(" throughput_upmem_wxsr_MBps=%f throughput_upmem_lwxsr_MBps=%f throughput_upmem_alwxsr_MBps=%f\n",
+ printf(" throughput_upmem_wxsr_MOpps=%f throughput_upmem_lwxsr_MOpps=%f throughput_upmem_alwxsr_MOpps=%f\n",
input_size / (timer.time[3] + timer.time[4] + timer.time[5] + timer.time[6]),
input_size / (timer.time[1] + timer.time[3] + timer.time[4] + timer.time[5] + timer.time[6]),
input_size / (timer.time[0] + timer.time[1] + timer.time[3] + timer.time[4] + timer.time[5] + timer.time[6]));
diff --git a/UNI/run-fgbs24a.sh b/UNI/run-fgbs24a.sh
new file mode 100755
index 0000000..92f73bf
--- /dev/null
+++ b/UNI/run-fgbs24a.sh
@@ -0,0 +1,28 @@
+#!/bin/bash
+
+set -e
+
+mkdir -p $(hostname)
+
+# BL: use 2^(BL) B blocks for MRAM <-> WRAM transfers on PIM module
+# T: data type
+# -w: number of un-timed warmup iterations
+# -e: number of timed iterations
+# -i; ignored, always uses 262144 elements
+
+(
+
+echo "prim-benchmarks UNI (dfatool fgbs24a edition)"
+echo "Started at $(date)"
+echo "Revision $(git describe --always)"
+
+for nr_dpus in 2543 2304 2048; do
+ for nr_tasklets in 16; do
+ echo
+ if make -B NR_DPUS=${nr_dpus} NR_TASKLETS=${nr_tasklets} BL=10; then
+ timeout --foreground -k 1m 30m bin/host_code -w 0 -e 100 -i 251658240 -x 1 || true
+ fi
+ done
+done
+echo "Completed at $(date)"
+) | tee "$(hostname)/fgbs24a.txt"
diff --git a/VA/run-fgbs24a.sh b/VA/run-fgbs24a.sh
new file mode 100755
index 0000000..f8941fb
--- /dev/null
+++ b/VA/run-fgbs24a.sh
@@ -0,0 +1,28 @@
+#!/bin/bash
+
+set -e
+
+mkdir -p $(hostname)
+
+# BL: use 2^(BL) B blocks for MRAM <-> WRAM transfers on PIM module
+# T: data type
+# -w: number of un-timed warmup iterations
+# -e: number of timed iterations
+# -i; ignored, always uses 262144 elements
+
+(
+
+echo "prim-benchmarks VA (dfatool fgbs24a edition)"
+echo "Started at $(date)"
+echo "Revision $(git describe --always)"
+
+for nr_dpus in 2543 2304 2048; do
+ for nr_tasklets in 16; do
+ echo
+ if make -B NR_DPUS=${nr_dpus} NR_TASKLETS=${nr_tasklets} BL=10; then
+ timeout --foreground -k 1m 30m bin/host_code -w 0 -e 100 -i 167772160 -x 1 || true
+ fi
+ done
+done
+echo "Completed at $(date)"
+) | tee "$(hostname)/fgbs24a.txt"