diff options
author | Birte Kristina Friesel <birte.friesel@uos.de> | 2024-02-29 15:44:37 +0100 |
---|---|---|
committer | Birte Kristina Friesel <birte.friesel@uos.de> | 2024-02-29 15:44:37 +0100 |
commit | f96944231fdaa50d0ff92e28783025ec63eaea91 (patch) | |
tree | 6107b342564d70c99d9c359b2c43ff88a95ee899 /Microbenchmarks/STREAM/run-rank.sh | |
parent | e414e0ec440afa48b221589108e03e4032bddfc0 (diff) |
STREAM: adjust run-rank BL range
Diffstat (limited to 'Microbenchmarks/STREAM/run-rank.sh')
-rwxr-xr-x | Microbenchmarks/STREAM/run-rank.sh | 6 |
1 files changed, 3 insertions, 3 deletions
diff --git a/Microbenchmarks/STREAM/run-rank.sh b/Microbenchmarks/STREAM/run-rank.sh index 8a831c3..89bac24 100755 --- a/Microbenchmarks/STREAM/run-rank.sh +++ b/Microbenchmarks/STREAM/run-rank.sh @@ -10,15 +10,15 @@ echo "Revision $(git describe --always)" # Each DPU uses three buffers, each of which holds $i * sizeof($dt) bytes. # With a total MRAM capacity of 64M, this gives us ~21M per buffer, or 16M when rounding down to the next power of two. # With a maximum data type width of 8B (uint64_t, double), this limits the number of elements per DPU to 2097152. -for i in 2097152 1048576 131072 16384 4096; do - for dt in uint64_t uint8_t uint16_t uint32_t float double; do +for dt in uint64_t uint32_t ; do #uint8_t uint16_t float double; do + for i in 2097152 1048576 524288 131072 16384 4096; do for nr_dpus in 1 4 8 16 32 48 64; do for nr_tasklets in 1 8 12 16; do for op in triad scale add copy copyw; do # BL: use 2^(BL) B blocks for MRAM <-> WRAM transfers on PIM module # Our largest data type holds 8B, so the minimum block size is 3. # From a performance perspective, 8 to 10 is usually best for sequential operations. - for bl in 3 4 5 6 8 10; do + for bl in 3 8 10; do echo if make -B OP=${op} NR_DPUS=${nr_dpus} NR_TASKLETS=${nr_tasklets} BL=${bl} T=${dt} UNROLL=1 WITH_ALLOC_OVERHEAD=0 WITH_LOAD_OVERHEAD=0 WITH_FREE_OVERHEAD=0 \ || make -B OP=${op} NR_DPUS=${nr_dpus} NR_TASKLETS=${nr_tasklets} BL=${bl} T=${dt} UNROLL=0 WITH_ALLOC_OVERHEAD=0 WITH_LOAD_OVERHEAD=0 WITH_FREE_OVERHEAD=0; then |