diff options
-rwxr-xr-x | Microbenchmarks/CPU-DPU/dimes24-hetsim-alloc.sh | 56 | ||||
-rwxr-xr-x | Microbenchmarks/CPU-DPU/dimes24-hetsim-transfer.sh | 80 | ||||
-rwxr-xr-x | Microbenchmarks/CPU-DPU/dimes24-hetsim.sh | 11 |
3 files changed, 72 insertions, 75 deletions
diff --git a/Microbenchmarks/CPU-DPU/dimes24-hetsim-alloc.sh b/Microbenchmarks/CPU-DPU/dimes24-hetsim-alloc.sh index a7b76d6..702194f 100755 --- a/Microbenchmarks/CPU-DPU/dimes24-hetsim-alloc.sh +++ b/Microbenchmarks/CPU-DPU/dimes24-hetsim-alloc.sh @@ -1,29 +1,41 @@ -#!/bin/sh +#!/bin/bash set -e -echo "prim-benchmarks CPU-DPU alloc (dfatool edition)" -echo "Started at $(date)" -echo "Revision $(git describe --always)" - -for i in $(seq 1 20); do - for rank_node in 0 1; do - sudo limit_ranks_to_numa_node $rank_node - for j in $(seq 0 16); do - echo $i/20 $j/16 - ./make-size.sh $j - n_nops=$((j * 256)) - if make -B NR_RANKS=$i NR_TASKLETS=1 BL=10 DPU_BINARY=\'\"bin/dpu_size\"\' NUMA=1; then - for l in $(seq 1 100); do - bin/host_code -c 0 -w 1 -e 0 -x 1 -i 65536 -N $n_nops -I $(size -A bin/dpu_size | awk '($1 == ".text") {print $2/8}') || true - bin/host_code -c 1 -w 1 -e 0 -x 1 -i 65536 -N $n_nops -I $(size -A bin/dpu_size | awk '($1 == ".text") {print $2/8}') || true - done - fi +mkdir -p log/$(hostname) +fn=log/$(hostname)/$(date +%Y%m%d).a + +run_benchmark_nmc() { + local "$@" + sudo limit_ranks_to_numa_node ${numa_rank} + ./make-size.sh ${size} + n_nops=$((size * 256)) + if make -B NR_RANKS=${nr_ranks} NR_TASKLETS=1 BL=10 DPU_BINARY=\'\"bin/dpu_size\"\' NUMA=1; then + for l in $(seq 1 20); do + bin/host_code -c ${numa_cpu} -w 1 -e 0 -x 1 -i 65536 -N $n_nops -I $(size -A bin/dpu_size | awk '($1 == ".text") {print $2/8}') done - done + fi + return $? +} + +export -f run_benchmark_nmc + +( -done +parallel -j1 --eta --joblog ${fn}.1.joblog --resume --header : \ + run_benchmark_nmc nr_ranks={nr_ranks} numa_rank={numa_rank} numa_cpu={numa_cpu} size={size} \ + ::: i $(seq 1 5) \ + ::: numa_rank 0 1 \ + ::: numa_cpu 0 1 \ + ::: nr_ranks $(seq 1 20) \ + ::: size $(seq 0 16) \ -sudo limit_ranks_to_numa_node any +parallel -j1 --eta --joblog ${fn}.2.joblog --resume --header : \ + run_benchmark_nmc nr_ranks={nr_ranks} numa_rank={numa_rank} numa_cpu={numa_cpu} size={size} \ + ::: i $(seq 1 5) \ + ::: numa_rank any \ + ::: numa_cpu 0 1 \ + ::: nr_ranks $(seq 21 40) \ + ::: size $(seq 0 16) \ -echo "Completed at $(date)" +) >> ${fn}.txt diff --git a/Microbenchmarks/CPU-DPU/dimes24-hetsim-transfer.sh b/Microbenchmarks/CPU-DPU/dimes24-hetsim-transfer.sh index 45714ec..3eb71c5 100755 --- a/Microbenchmarks/CPU-DPU/dimes24-hetsim-transfer.sh +++ b/Microbenchmarks/CPU-DPU/dimes24-hetsim-transfer.sh @@ -1,48 +1,44 @@ -#!/bin/sh +#!/bin/bash set -e -echo "prim-benchmarks CPU-DPU alloc (dfatool edition)" -echo "Started at $(date)" -echo "Revision $(git describe --always)" +mkdir -p log/$(hostname) +fn=log/$(hostname)/$(date +%Y%m%d).t ./make-size.sh 0 -for i in $(seq 1 20); do - for k in BROADCAST; do - # BROADCAST sends the same data to all DPUs, so data size must not exceed the amount of MRAM available on a single DPU (i.e., 64 MB) - for l in 4194304 6291456; do - make -B NR_RANKS=$i NR_TASKLETS=1 BL=10 TRANSFER=$k NUMA=1 - for numa_rank in 0 1; do - sudo limit_ranks_to_numa_node $numa_rank - for numa_in in 0 1; do - for numa_out in 0 1; do - for numa_cpu in 0 1; do - bin/host_code -a $numa_in -b $numa_out -c $numa_cpu -w 0 -e 100 -x 1 -N 0 -I $(size -A bin/dpu_code | awk '($1 == ".text") {print $2/8}') -i $l - done - done - done - done - done - done - - # utilize 32MiB / 50% of per-DPU MRAM capacity -- otherwise DRAM capacity per NUMA node is insufficient - for numa_rank in 0 1; do - sudo limit_ranks_to_numa_node $numa_rank - for numa_in in 0 1; do - for numa_out in 0 1; do - for numa_cpu in 0 1; do - make -B NR_RANKS=$i NR_TASKLETS=1 BL=10 TRANSFER=PUSH NUMA=1 - bin/host_code -a $numa_in -b $numa_out -c $numa_cpu -w 0 -e 100 -x 0 -N 0 -I $(size -A bin/dpu_code | awk '($1 == ".text") {print $2/8}') -i 1 - bin/host_code -a $numa_in -b $numa_out -c $numa_cpu -w 0 -e 100 -x 0 -N 0 -I $(size -A bin/dpu_code | awk '($1 == ".text") {print $2/8}') -i 4194304 - make -B NR_RANKS=$i NR_TASKLETS=1 BL=10 TRANSFER=SERIAL NUMA=1 - bin/host_code -a $numa_in -b $numa_out -c $numa_cpu -w 0 -e 100 -x 0 -N 0 -I $(size -A bin/dpu_code | awk '($1 == ".text") {print $2/8}') -i 1 - done - done - done - done -done - -sudo limit_ranks_to_numa_node any - -echo "Completed at $(date)" +run_benchmark_nmc() { + local "$@" + sudo limit_ranks_to_numa_node ${numa_rank} + make -B NR_RANKS=${nr_ranks} NR_TASKLETS=1 BL=10 TRANSFER=PUSH NUMA=1 + bin/host_code -a $numa_in -b $numa_out -c $numa_cpu -w 0 -e 20 -x 0 -N 0 -I $(size -A bin/dpu_code | awk '($1 == ".text") {print $2/8}') -i ${input_size} + return $? +} + +export -f run_benchmark_nmc + +# 16 MiB per DPU + +( + +parallel -j1 --eta --joblog ${fn}.1.joblog --resume --header : \ + run_benchmark_nmc nr_ranks={nr_ranks} numa_rank={numa_rank} numa_in={numa_in} numa_out={numa_out} numa_cpu={numa_cpu} input_size={input_size} \ + ::: i $(seq 1 5) \ + ::: numa_rank 0 1 \ + ::: numa_in 0 1 \ + ::: numa_out 0 1 \ + ::: numa_cpu 0 1 \ + ::: nr_ranks $(seq 1 20) \ + ::: input_size 1 2097152 + +parallel -j1 --eta --joblog ${fn}.2.joblog --resume --header : \ + run_benchmark_nmc nr_ranks={nr_ranks} numa_rank={numa_rank} numa_in={numa_in} numa_out={numa_out} numa_cpu={numa_cpu} input_size={input_size} \ + ::: i $(seq 1 5) \ + ::: numa_rank any \ + ::: numa_in 0 1 \ + ::: numa_out 0 1 \ + ::: numa_cpu 0 1 \ + ::: nr_ranks $(seq 21 40) \ + ::: input_size 1 2097152 + +) >> ${fn}.txt diff --git a/Microbenchmarks/CPU-DPU/dimes24-hetsim.sh b/Microbenchmarks/CPU-DPU/dimes24-hetsim.sh deleted file mode 100755 index 6dfa443..0000000 --- a/Microbenchmarks/CPU-DPU/dimes24-hetsim.sh +++ /dev/null @@ -1,11 +0,0 @@ -#!/bin/sh - -mkdir -p "log/dimes24-$(hostname)" - -ts="$(date +%Y%m%d)" - -./dimes24-hetsim-alloc.sh | tee "log/dimes24-$(hostname)/${ts}-alloc.txt" -./dimes24-hetsim-transfer.sh | tee "log/dimes24-$(hostname)/${ts}-transfer.txt" - -xz -f -v -9 -M 800M "log/dimes24-$(hostname)/${ts}-alloc.txt" -xz -f -v -9 -M 800M "log/dimes24-$(hostname)/${ts}-transfer.txt" |