diff options
-rwxr-xr-x | Microbenchmarks/CPU-DPU/dimes24-hetsim-alloc.sh | 29 | ||||
-rwxr-xr-x | Microbenchmarks/CPU-DPU/dimes24-hetsim-transfer.sh | 48 | ||||
-rwxr-xr-x | Microbenchmarks/CPU-DPU/dimes24-hetsim.sh | 11 |
3 files changed, 88 insertions, 0 deletions
diff --git a/Microbenchmarks/CPU-DPU/dimes24-hetsim-alloc.sh b/Microbenchmarks/CPU-DPU/dimes24-hetsim-alloc.sh new file mode 100755 index 0000000..a7b76d6 --- /dev/null +++ b/Microbenchmarks/CPU-DPU/dimes24-hetsim-alloc.sh @@ -0,0 +1,29 @@ +#!/bin/sh + +set -e + +echo "prim-benchmarks CPU-DPU alloc (dfatool edition)" +echo "Started at $(date)" +echo "Revision $(git describe --always)" + +for i in $(seq 1 20); do + for rank_node in 0 1; do + sudo limit_ranks_to_numa_node $rank_node + for j in $(seq 0 16); do + echo $i/20 $j/16 + ./make-size.sh $j + n_nops=$((j * 256)) + if make -B NR_RANKS=$i NR_TASKLETS=1 BL=10 DPU_BINARY=\'\"bin/dpu_size\"\' NUMA=1; then + for l in $(seq 1 100); do + bin/host_code -c 0 -w 1 -e 0 -x 1 -i 65536 -N $n_nops -I $(size -A bin/dpu_size | awk '($1 == ".text") {print $2/8}') || true + bin/host_code -c 1 -w 1 -e 0 -x 1 -i 65536 -N $n_nops -I $(size -A bin/dpu_size | awk '($1 == ".text") {print $2/8}') || true + done + fi + done + done + +done + +sudo limit_ranks_to_numa_node any + +echo "Completed at $(date)" diff --git a/Microbenchmarks/CPU-DPU/dimes24-hetsim-transfer.sh b/Microbenchmarks/CPU-DPU/dimes24-hetsim-transfer.sh new file mode 100755 index 0000000..45714ec --- /dev/null +++ b/Microbenchmarks/CPU-DPU/dimes24-hetsim-transfer.sh @@ -0,0 +1,48 @@ +#!/bin/sh + +set -e + +echo "prim-benchmarks CPU-DPU alloc (dfatool edition)" +echo "Started at $(date)" +echo "Revision $(git describe --always)" + +./make-size.sh 0 + +for i in $(seq 1 20); do + for k in BROADCAST; do + # BROADCAST sends the same data to all DPUs, so data size must not exceed the amount of MRAM available on a single DPU (i.e., 64 MB) + for l in 4194304 6291456; do + make -B NR_RANKS=$i NR_TASKLETS=1 BL=10 TRANSFER=$k NUMA=1 + for numa_rank in 0 1; do + sudo limit_ranks_to_numa_node $numa_rank + for numa_in in 0 1; do + for numa_out in 0 1; do + for numa_cpu in 0 1; do + bin/host_code -a $numa_in -b $numa_out -c $numa_cpu -w 0 -e 100 -x 1 -N 0 -I $(size -A bin/dpu_code | awk '($1 == ".text") {print $2/8}') -i $l + done + done + done + done + done + done + + # utilize 32MiB / 50% of per-DPU MRAM capacity -- otherwise DRAM capacity per NUMA node is insufficient + for numa_rank in 0 1; do + sudo limit_ranks_to_numa_node $numa_rank + for numa_in in 0 1; do + for numa_out in 0 1; do + for numa_cpu in 0 1; do + make -B NR_RANKS=$i NR_TASKLETS=1 BL=10 TRANSFER=PUSH NUMA=1 + bin/host_code -a $numa_in -b $numa_out -c $numa_cpu -w 0 -e 100 -x 0 -N 0 -I $(size -A bin/dpu_code | awk '($1 == ".text") {print $2/8}') -i 1 + bin/host_code -a $numa_in -b $numa_out -c $numa_cpu -w 0 -e 100 -x 0 -N 0 -I $(size -A bin/dpu_code | awk '($1 == ".text") {print $2/8}') -i 4194304 + make -B NR_RANKS=$i NR_TASKLETS=1 BL=10 TRANSFER=SERIAL NUMA=1 + bin/host_code -a $numa_in -b $numa_out -c $numa_cpu -w 0 -e 100 -x 0 -N 0 -I $(size -A bin/dpu_code | awk '($1 == ".text") {print $2/8}') -i 1 + done + done + done + done +done + +sudo limit_ranks_to_numa_node any + +echo "Completed at $(date)" diff --git a/Microbenchmarks/CPU-DPU/dimes24-hetsim.sh b/Microbenchmarks/CPU-DPU/dimes24-hetsim.sh new file mode 100755 index 0000000..6dfa443 --- /dev/null +++ b/Microbenchmarks/CPU-DPU/dimes24-hetsim.sh @@ -0,0 +1,11 @@ +#!/bin/sh + +mkdir -p "log/dimes24-$(hostname)" + +ts="$(date +%Y%m%d)" + +./dimes24-hetsim-alloc.sh | tee "log/dimes24-$(hostname)/${ts}-alloc.txt" +./dimes24-hetsim-transfer.sh | tee "log/dimes24-$(hostname)/${ts}-transfer.txt" + +xz -f -v -9 -M 800M "log/dimes24-$(hostname)/${ts}-alloc.txt" +xz -f -v -9 -M 800M "log/dimes24-$(hostname)/${ts}-transfer.txt" |