diff options
author | Daniel Friesel <daniel.friesel@uos.de> | 2023-06-01 08:03:59 +0200 |
---|---|---|
committer | Daniel Friesel <daniel.friesel@uos.de> | 2023-06-01 08:03:59 +0200 |
commit | 7720bd5223c96c5f46efc9033ec023fc4038da46 (patch) | |
tree | dd3a199013dfc5ea583cf93f657dd9976c63ebe0 /SEL | |
parent | 0540fa9e26e2c8e7e11fcf3e5444e4981f811e1c (diff) |
port SEL NMC to dfatool
Diffstat (limited to 'SEL')
-rw-r--r-- | SEL/baselines/cpu/Makefile | 6 | ||||
-rw-r--r-- | SEL/baselines/cpu/app_baseline.c | 10 | ||||
-rw-r--r-- | SEL/host/app.c | 51 | ||||
-rwxr-xr-x | SEL/run-paper-strong-full.sh | 25 | ||||
-rwxr-xr-x | SEL/run-paper-strong-rank.sh | 26 | ||||
-rwxr-xr-x | SEL/run-paper-weak.sh | 23 |
6 files changed, 114 insertions, 27 deletions
diff --git a/SEL/baselines/cpu/Makefile b/SEL/baselines/cpu/Makefile index 02d930c..81f6d17 100644 --- a/SEL/baselines/cpu/Makefile +++ b/SEL/baselines/cpu/Makefile @@ -16,13 +16,15 @@ sel_O2: app_baseline.c run: sel ./sel -i 1258291200 -t 4 +# upstream code does not include -e 20 and does 3 iterations instead + .PHONY: run_O0 run_O0: sel_O0 - ./sel_O0 -i 1258291200 -t 4 + ./sel_O0 -i 1258291200 -t 4 -e 20 .PHONY: run_O2 run_O2: sel_O2 - ./sel_O2 -i 1258291200 -t 4 + ./sel_O2 -i 1258291200 -t 4 -e 20 .PHONY: clean clean: diff --git a/SEL/baselines/cpu/app_baseline.c b/SEL/baselines/cpu/app_baseline.c index 04a569f..6ee1cae 100644 --- a/SEL/baselines/cpu/app_baseline.c +++ b/SEL/baselines/cpu/app_baseline.c @@ -147,16 +147,12 @@ int main(int argc, char **argv) { nr_threads++; if (rep >= p.n_warmup) { - printf("[::] n_threads=%d e_type=%s n_elements=%d " - "| throughput_cpu_MBps=%f\n", + printf("[::] SEL CPU | n_threads=%d e_type=%s n_elements=%d " + "| throughput_MBps=%f", nr_threads, XSTR(T), file_size, file_size * 2 * sizeof(T) / timer.time[0]); - printf("[::] n_threads=%d e_type=%s n_elements=%d " - "| throughput_cpu_MOpps=%f\n", - nr_threads, XSTR(T), file_size, + printf(" throughput_MOpps=%f", file_size / timer.time[0]); - printf("[::] n_threads=%d e_type=%s n_elements=%d |", - nr_threads, XSTR(T), file_size); printall(&timer, 0); } } diff --git a/SEL/host/app.c b/SEL/host/app.c index ef07cf9..2194c81 100644 --- a/SEL/host/app.c +++ b/SEL/host/app.c @@ -22,6 +22,9 @@ #define DPU_BINARY "./bin/dpu_code" #endif +#define XSTR(x) STR(x) +#define STR(x) #x + #if ENERGY #include <dpu_probe.h> #endif @@ -208,6 +211,35 @@ int main(int argc, char **argv) { // Free memory free(results_scan); + + // Check output + bool status = true; + if(accum != total_count) status = false; + for (i = 0; i < accum; i++) { + if(C[i] != bufferC[i]){ + status = false; +#if PRINT + printf("%d: %lu -- %lu\n", i, C[i], bufferC[i]); +#endif + } + } + if (status) { + printf("[" ANSI_COLOR_GREEN "OK" ANSI_COLOR_RESET "] Outputs are equal\n"); + if (rep >= p.n_warmup) { + printf("[::] SEL NMC | n_dpus=%d n_tasklets=%d e_type=%s block_size_B=%d n_elements=%d " + "| throughput_cpu_MBps=%f throughput_pim_MBps=%f throughput_MBps=%f", + nr_of_dpus, NR_TASKLETS, XSTR(T), BLOCK_SIZE, input_size, + input_size * sizeof(T) / timer.time[0], + input_size * sizeof(T) / timer.time[2], + input_size * sizeof(T) / (timer.time[1] + timer.time[2] + timer.time[3] + timer.time[4])); + printf(" throughput_cpu_MOpps=%f throughput_pim_MOpps=%f throughput_MOpps=%f", + input_size / timer.time[0], + input_size / timer.time[2], + input_size / (timer.time[1] + timer.time[2] + timer.time[3] + timer.time[4])); + printall(&timer, 4); + } else { + printf("[" ANSI_COLOR_RED "ERROR" ANSI_COLOR_RESET "] Outputs differ!\n"); + } } // Print timing results @@ -228,28 +260,11 @@ int main(int argc, char **argv) { printf("DPU Energy (J): %f\t", energy); #endif - // Check output - bool status = true; - if(accum != total_count) status = false; - for (i = 0; i < accum; i++) { - if(C[i] != bufferC[i]){ - status = false; -#if PRINT - printf("%d: %lu -- %lu\n", i, C[i], bufferC[i]); -#endif - } - } - if (status) { - printf("[" ANSI_COLOR_GREEN "OK" ANSI_COLOR_RESET "] Outputs are equal\n"); - } else { - printf("[" ANSI_COLOR_RED "ERROR" ANSI_COLOR_RESET "] Outputs differ!\n"); - } - // Deallocation free(A); free(C); free(C2); DPU_ASSERT(dpu_free(dpu_set)); - return status ? 0 : -1; + return 0; } diff --git a/SEL/run-paper-strong-full.sh b/SEL/run-paper-strong-full.sh new file mode 100755 index 0000000..cc1a99d --- /dev/null +++ b/SEL/run-paper-strong-full.sh @@ -0,0 +1,25 @@ +#!/bin/bash + +set -e + +# BL: use 2^(BL) B blocks for MRAM <-> WRAM transfers on PIM module +# T: data type +# -w: number of un-timed warmup iterations +# -e: number of timed iterations +# -i; ignored, always uses 262144 elements + +( + +echo "prim-benchmarks UNI strong-full (dfatool edition)" +echo "Started at $(date)" +echo "Revision $(git describe --always)" + +for nr_dpus in 256 512 1024 2048; do + for nr_tasklets in 1 2 4 8 16; do + echo + if make -B NR_DPUS=${nr_dpus} NR_TASKLETS=${nr_tasklets} BL=10; then + timeout --foreground -k 1m 30m bin/host_code -w 0 -e 100 -i 251658240 -x 1 || true + fi + done +done +) | tee log-paper-strong-full.txt diff --git a/SEL/run-paper-strong-rank.sh b/SEL/run-paper-strong-rank.sh new file mode 100755 index 0000000..6cffd65 --- /dev/null +++ b/SEL/run-paper-strong-rank.sh @@ -0,0 +1,26 @@ +#!/bin/bash + +set -e + +# BL: use 2^(BL) B blocks for MRAM <-> WRAM transfers on PIM module +# T: data type +# -w: number of un-timed warmup iterations +# -e: number of timed iterations +# -i; ignored, always uses 262144 elements + +( + +echo "prim-benchmarks UNI strong-rank (dfatool edition)" +echo "Started at $(date)" +echo "Revision $(git describe --always)" + +# 256 and 512 are not part of upstream config space +for nr_dpus in 512 256 1 4 16 64; do + for nr_tasklets in 1 2 4 8 16; do + echo + if make -B NR_DPUS=${nr_dpus} NR_TASKLETS=${nr_tasklets} BL=10; then + timeout --foreground -k 1m 30m bin/host_code -w 0 -e 100 -i 3932160 -x 1 || true + fi + done +done +) | tee log-paper-strong-rank.txt diff --git a/SEL/run-paper-weak.sh b/SEL/run-paper-weak.sh new file mode 100755 index 0000000..5e83c5e --- /dev/null +++ b/SEL/run-paper-weak.sh @@ -0,0 +1,23 @@ +#!/bin/bash + +set -e + +# BL: use 2^(BL) B blocks for MRAM <-> WRAM transfers on PIM module +# T: data type +# -w: number of un-timed warmup iterations +# -e: number of timed iterations +# -i; ignored, always uses 262144 elements + +echo "prim-benchmarks UNI weak (dfatool edition)" +echo "Started at $(date)" +echo "Revision $(git describe --always)" + +# 256 and 512 are not part of upstream config space +for nr_dpus in 512 256 1 4 16 64; do + for nr_tasklets in 1 2 4 8 16; do + echo + if make -B NR_DPUS=${nr_dpus} NR_TASKLETS=${nr_tasklets} BL=10; then + timeout --foreground -k 1m 30m bin/host_code -w 0 -e 100 -i 3932160 -x 0 || true + fi + done +done | tee log-paper-weak.txt |