blob: 778efa8afcedc29c8531be10a744ddc4367ec058 (
plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
|
#!/bin/bash
set -e
# BL: use 2^(BL) B blocks for MRAM <-> WRAM transfers on PIM module
# T: data type
# -w: number of un-timed warmup iterations
# -e: number of timed iterations
# -i: input size (number of elements, not number of bytes!)
# 2097152 B -> 2M is maximum for 64bit types (due to 16M per DPU)
(
echo "prim-benchmarks STREAM microbenchmark (dfatool edition)"
echo "Started at $(date)"
echo "Revision $(git describe --always)"
for i in 4096 16384 131072 1048576 2097152; do
for nr_dpus in 1 4 8 16 32 64 128 256 512 768 1024 1536 2048 2304 2542; do
for nr_tasklets in 8 12 16; do
for dt in uint8_t uint16_t uint32_t uint64_t float double; do
for op in copy copyw add scale triad; do
echo
if make -B OP=${op} NR_DPUS=${nr_dpus} NR_TASKLETS=${nr_tasklets} BL=10 T=${dt} UNROLL=1 WITH_ALLOC_OVERHEAD=1 WITH_LOAD_OVERHEAD=1 WITH_FREE_OVERHEAD=1 \
|| make -B OP=${op} NR_DPUS=${nr_dpus} NR_TASKLETS=${nr_tasklets} BL=10 T=${dt} UNROLL=0 WITH_ALLOC_OVERHEAD=1 WITH_LOAD_OVERHEAD=1 WITH_FREE_OVERHEAD=1; then
timeout --foreground -k 1m 30m bin/host_code -w 0 -e 40 -i $i || true
fi
done
done
done
done
done
echo "Completed at $(date)"
) | tee "log-$(hostname)-ndpus.txt"
|