summaryrefslogtreecommitdiff
path: root/Microbenchmarks
diff options
context:
space:
mode:
authorBirte Kristina Friesel <birte.friesel@uos.de>2024-07-11 14:28:29 +0200
committerBirte Kristina Friesel <birte.friesel@uos.de>2024-07-11 14:28:29 +0200
commite6ae3c96e25efac53f27292416abdc84a2bfed60 (patch)
tree84bb7156e1f5e7324c7c14f24de3bc0afb055ea9 /Microbenchmarks
parentb95a66b300b14852eea90ecc232b3c35e159b3a9 (diff)
alloc and transfer microbenchmarks: switch to GNU parallel
Diffstat (limited to 'Microbenchmarks')
-rwxr-xr-xMicrobenchmarks/CPU-DPU/dimes24-hetsim-alloc.sh56
-rwxr-xr-xMicrobenchmarks/CPU-DPU/dimes24-hetsim-transfer.sh80
-rwxr-xr-xMicrobenchmarks/CPU-DPU/dimes24-hetsim.sh11
3 files changed, 72 insertions, 75 deletions
diff --git a/Microbenchmarks/CPU-DPU/dimes24-hetsim-alloc.sh b/Microbenchmarks/CPU-DPU/dimes24-hetsim-alloc.sh
index a7b76d6..702194f 100755
--- a/Microbenchmarks/CPU-DPU/dimes24-hetsim-alloc.sh
+++ b/Microbenchmarks/CPU-DPU/dimes24-hetsim-alloc.sh
@@ -1,29 +1,41 @@
-#!/bin/sh
+#!/bin/bash
set -e
-echo "prim-benchmarks CPU-DPU alloc (dfatool edition)"
-echo "Started at $(date)"
-echo "Revision $(git describe --always)"
-
-for i in $(seq 1 20); do
- for rank_node in 0 1; do
- sudo limit_ranks_to_numa_node $rank_node
- for j in $(seq 0 16); do
- echo $i/20 $j/16
- ./make-size.sh $j
- n_nops=$((j * 256))
- if make -B NR_RANKS=$i NR_TASKLETS=1 BL=10 DPU_BINARY=\'\"bin/dpu_size\"\' NUMA=1; then
- for l in $(seq 1 100); do
- bin/host_code -c 0 -w 1 -e 0 -x 1 -i 65536 -N $n_nops -I $(size -A bin/dpu_size | awk '($1 == ".text") {print $2/8}') || true
- bin/host_code -c 1 -w 1 -e 0 -x 1 -i 65536 -N $n_nops -I $(size -A bin/dpu_size | awk '($1 == ".text") {print $2/8}') || true
- done
- fi
+mkdir -p log/$(hostname)
+fn=log/$(hostname)/$(date +%Y%m%d).a
+
+run_benchmark_nmc() {
+ local "$@"
+ sudo limit_ranks_to_numa_node ${numa_rank}
+ ./make-size.sh ${size}
+ n_nops=$((size * 256))
+ if make -B NR_RANKS=${nr_ranks} NR_TASKLETS=1 BL=10 DPU_BINARY=\'\"bin/dpu_size\"\' NUMA=1; then
+ for l in $(seq 1 20); do
+ bin/host_code -c ${numa_cpu} -w 1 -e 0 -x 1 -i 65536 -N $n_nops -I $(size -A bin/dpu_size | awk '($1 == ".text") {print $2/8}')
done
- done
+ fi
+ return $?
+}
+
+export -f run_benchmark_nmc
+
+(
-done
+parallel -j1 --eta --joblog ${fn}.1.joblog --resume --header : \
+ run_benchmark_nmc nr_ranks={nr_ranks} numa_rank={numa_rank} numa_cpu={numa_cpu} size={size} \
+ ::: i $(seq 1 5) \
+ ::: numa_rank 0 1 \
+ ::: numa_cpu 0 1 \
+ ::: nr_ranks $(seq 1 20) \
+ ::: size $(seq 0 16) \
-sudo limit_ranks_to_numa_node any
+parallel -j1 --eta --joblog ${fn}.2.joblog --resume --header : \
+ run_benchmark_nmc nr_ranks={nr_ranks} numa_rank={numa_rank} numa_cpu={numa_cpu} size={size} \
+ ::: i $(seq 1 5) \
+ ::: numa_rank any \
+ ::: numa_cpu 0 1 \
+ ::: nr_ranks $(seq 21 40) \
+ ::: size $(seq 0 16) \
-echo "Completed at $(date)"
+) >> ${fn}.txt
diff --git a/Microbenchmarks/CPU-DPU/dimes24-hetsim-transfer.sh b/Microbenchmarks/CPU-DPU/dimes24-hetsim-transfer.sh
index 45714ec..3eb71c5 100755
--- a/Microbenchmarks/CPU-DPU/dimes24-hetsim-transfer.sh
+++ b/Microbenchmarks/CPU-DPU/dimes24-hetsim-transfer.sh
@@ -1,48 +1,44 @@
-#!/bin/sh
+#!/bin/bash
set -e
-echo "prim-benchmarks CPU-DPU alloc (dfatool edition)"
-echo "Started at $(date)"
-echo "Revision $(git describe --always)"
+mkdir -p log/$(hostname)
+fn=log/$(hostname)/$(date +%Y%m%d).t
./make-size.sh 0
-for i in $(seq 1 20); do
- for k in BROADCAST; do
- # BROADCAST sends the same data to all DPUs, so data size must not exceed the amount of MRAM available on a single DPU (i.e., 64 MB)
- for l in 4194304 6291456; do
- make -B NR_RANKS=$i NR_TASKLETS=1 BL=10 TRANSFER=$k NUMA=1
- for numa_rank in 0 1; do
- sudo limit_ranks_to_numa_node $numa_rank
- for numa_in in 0 1; do
- for numa_out in 0 1; do
- for numa_cpu in 0 1; do
- bin/host_code -a $numa_in -b $numa_out -c $numa_cpu -w 0 -e 100 -x 1 -N 0 -I $(size -A bin/dpu_code | awk '($1 == ".text") {print $2/8}') -i $l
- done
- done
- done
- done
- done
- done
-
- # utilize 32MiB / 50% of per-DPU MRAM capacity -- otherwise DRAM capacity per NUMA node is insufficient
- for numa_rank in 0 1; do
- sudo limit_ranks_to_numa_node $numa_rank
- for numa_in in 0 1; do
- for numa_out in 0 1; do
- for numa_cpu in 0 1; do
- make -B NR_RANKS=$i NR_TASKLETS=1 BL=10 TRANSFER=PUSH NUMA=1
- bin/host_code -a $numa_in -b $numa_out -c $numa_cpu -w 0 -e 100 -x 0 -N 0 -I $(size -A bin/dpu_code | awk '($1 == ".text") {print $2/8}') -i 1
- bin/host_code -a $numa_in -b $numa_out -c $numa_cpu -w 0 -e 100 -x 0 -N 0 -I $(size -A bin/dpu_code | awk '($1 == ".text") {print $2/8}') -i 4194304
- make -B NR_RANKS=$i NR_TASKLETS=1 BL=10 TRANSFER=SERIAL NUMA=1
- bin/host_code -a $numa_in -b $numa_out -c $numa_cpu -w 0 -e 100 -x 0 -N 0 -I $(size -A bin/dpu_code | awk '($1 == ".text") {print $2/8}') -i 1
- done
- done
- done
- done
-done
-
-sudo limit_ranks_to_numa_node any
-
-echo "Completed at $(date)"
+run_benchmark_nmc() {
+ local "$@"
+ sudo limit_ranks_to_numa_node ${numa_rank}
+ make -B NR_RANKS=${nr_ranks} NR_TASKLETS=1 BL=10 TRANSFER=PUSH NUMA=1
+ bin/host_code -a $numa_in -b $numa_out -c $numa_cpu -w 0 -e 20 -x 0 -N 0 -I $(size -A bin/dpu_code | awk '($1 == ".text") {print $2/8}') -i ${input_size}
+ return $?
+}
+
+export -f run_benchmark_nmc
+
+# 16 MiB per DPU
+
+(
+
+parallel -j1 --eta --joblog ${fn}.1.joblog --resume --header : \
+ run_benchmark_nmc nr_ranks={nr_ranks} numa_rank={numa_rank} numa_in={numa_in} numa_out={numa_out} numa_cpu={numa_cpu} input_size={input_size} \
+ ::: i $(seq 1 5) \
+ ::: numa_rank 0 1 \
+ ::: numa_in 0 1 \
+ ::: numa_out 0 1 \
+ ::: numa_cpu 0 1 \
+ ::: nr_ranks $(seq 1 20) \
+ ::: input_size 1 2097152
+
+parallel -j1 --eta --joblog ${fn}.2.joblog --resume --header : \
+ run_benchmark_nmc nr_ranks={nr_ranks} numa_rank={numa_rank} numa_in={numa_in} numa_out={numa_out} numa_cpu={numa_cpu} input_size={input_size} \
+ ::: i $(seq 1 5) \
+ ::: numa_rank any \
+ ::: numa_in 0 1 \
+ ::: numa_out 0 1 \
+ ::: numa_cpu 0 1 \
+ ::: nr_ranks $(seq 21 40) \
+ ::: input_size 1 2097152
+
+) >> ${fn}.txt
diff --git a/Microbenchmarks/CPU-DPU/dimes24-hetsim.sh b/Microbenchmarks/CPU-DPU/dimes24-hetsim.sh
deleted file mode 100755
index 6dfa443..0000000
--- a/Microbenchmarks/CPU-DPU/dimes24-hetsim.sh
+++ /dev/null
@@ -1,11 +0,0 @@
-#!/bin/sh
-
-mkdir -p "log/dimes24-$(hostname)"
-
-ts="$(date +%Y%m%d)"
-
-./dimes24-hetsim-alloc.sh | tee "log/dimes24-$(hostname)/${ts}-alloc.txt"
-./dimes24-hetsim-transfer.sh | tee "log/dimes24-$(hostname)/${ts}-transfer.txt"
-
-xz -f -v -9 -M 800M "log/dimes24-$(hostname)/${ts}-alloc.txt"
-xz -f -v -9 -M 800M "log/dimes24-$(hostname)/${ts}-transfer.txt"