diff options
author | Birte Kristina Friesel <birte.friesel@uos.de> | 2024-02-26 11:23:04 +0100 |
---|---|---|
committer | Birte Kristina Friesel <birte.friesel@uos.de> | 2024-02-26 11:23:04 +0100 |
commit | b4362b2c7ec261e69da085b95ecbc5073b83fc5e (patch) | |
tree | 527f05b0edbaab72c872e7efa984b32c9cbe1c7a | |
parent | 453879e98456477c571be90deebe2b551b66f5c7 (diff) |
CPU-DPU microbenchmarks: explicitly log number of instructions
-rw-r--r-- | Microbenchmarks/CPU-DPU/host/app.c | 8 | ||||
-rwxr-xr-x | Microbenchmarks/CPU-DPU/run-alloc-rank-stress.sh | 4 | ||||
-rwxr-xr-x | Microbenchmarks/CPU-DPU/run-alloc-rank.sh | 2 | ||||
-rw-r--r-- | Microbenchmarks/CPU-DPU/support/params.h | 6 |
4 files changed, 12 insertions, 8 deletions
diff --git a/Microbenchmarks/CPU-DPU/host/app.c b/Microbenchmarks/CPU-DPU/host/app.c index bb86d09..ba63d89 100644 --- a/Microbenchmarks/CPU-DPU/host/app.c +++ b/Microbenchmarks/CPU-DPU/host/app.c @@ -103,9 +103,9 @@ int main(int argc, char **argv) { read_input(A, B, input_size); //printf("NR_TASKLETS\t%d\tBL\t%d\n", NR_TASKLETS, BL); - printf("[::] NMC reconfiguration | n_dpus=%d n_ranks=%d n_tasklets=%d n_nops=%d e_type=%s n_elements=%u e_mode=%s" + printf("[::] NMC reconfiguration | n_dpus=%d n_ranks=%d n_tasklets=%d n_nops=%d n_instr=%d e_type=%s n_elements=%u e_mode=%s" " | latency_dpu_alloc_ns=%lu latency_dpu_load_ns=%lu latency_dpu_get_ns=%lu\n", - nr_of_dpus, nr_of_ranks, NR_TASKLETS, p.n_nops, XSTR(T), transfer_size, transfer_mode, + nr_of_dpus, nr_of_ranks, NR_TASKLETS, p.n_nops, p.n_instr, XSTR(T), transfer_size, transfer_mode, timer.nanoseconds[4], timer.nanoseconds[5], timer.nanoseconds[6]); // Loop over main kernel @@ -170,9 +170,9 @@ int main(int argc, char **argv) { stop(&timer, 3); if (rep >= p.n_warmup) { - printf("[::] transfer UPMEM | n_dpus=%d n_ranks=%d n_tasklets=%d n_nops=%d e_type=%s n_elements=%u e_mode=%s" + printf("[::] transfer UPMEM | n_dpus=%d n_ranks=%d n_tasklets=%d n_nops=%d n_instr=%d e_type=%s n_elements=%u e_mode=%s" " | latency_dram_mram_ns=%lu latency_mram_dram_ns=%lu throughput_dram_mram_Bps=%f throughput_mram_dram_Bps=%f", - nr_of_dpus, nr_of_ranks, NR_TASKLETS, p.n_nops, XSTR(T), transfer_size, transfer_mode, + nr_of_dpus, nr_of_ranks, NR_TASKLETS, p.n_nops, p.n_instr, XSTR(T), transfer_size, transfer_mode, timer.nanoseconds[1], timer.nanoseconds[3], transfer_size * sizeof(T) * 1e9 / timer.nanoseconds[1], transfer_size * sizeof(T) * 1e9 / timer.nanoseconds[3]); diff --git a/Microbenchmarks/CPU-DPU/run-alloc-rank-stress.sh b/Microbenchmarks/CPU-DPU/run-alloc-rank-stress.sh index da136c0..e166a4e 100755 --- a/Microbenchmarks/CPU-DPU/run-alloc-rank-stress.sh +++ b/Microbenchmarks/CPU-DPU/run-alloc-rank-stress.sh @@ -5,6 +5,8 @@ mkdir -p "$(hostname)-alloc" NCORES=$(grep -c '^processor' /proc/cpuinfo) cleanexit() { pkill -f "stress -c ${NCORES}" + xz -f -v -9 -M 800M "$(hostname)-alloc/rank-stress-c${NCORES}.txt" + exit 0 } trap cleanexit TERM INT @@ -14,5 +16,3 @@ stress -c ${NCORES} & ./run-alloc-rank.sh | tee "$(hostname)-alloc/rank-stress-c${NCORES}.txt" cleanexit - -xz -f -v -9 -M 800M "$(hostname)-alloc/rank-stress-c${NCORES}.txt" diff --git a/Microbenchmarks/CPU-DPU/run-alloc-rank.sh b/Microbenchmarks/CPU-DPU/run-alloc-rank.sh index a6907fe..2e20e52 100755 --- a/Microbenchmarks/CPU-DPU/run-alloc-rank.sh +++ b/Microbenchmarks/CPU-DPU/run-alloc-rank.sh @@ -16,7 +16,7 @@ for i in 1 4 8 16 32 48 64; do n_nops=$((j * 256)) if make -B NR_DPUS=$i NR_TASKLETS=1 BL=10 DPU_BINARY=\'\"bin/dpu_size\"\'; then for l in $(seq 1 100); do - bin/host_code -w 1 -e 0 -x 1 -i 65536 -N $n_nops || true + bin/host_code -w 1 -e 0 -x 1 -i 65536 -N $n_nops -I $(size -A bin/dpu_size | awk '($1 == ".text") {print $2/8}') || true done fi done diff --git a/Microbenchmarks/CPU-DPU/support/params.h b/Microbenchmarks/CPU-DPU/support/params.h index 1ecf71d..3bb1535 100644 --- a/Microbenchmarks/CPU-DPU/support/params.h +++ b/Microbenchmarks/CPU-DPU/support/params.h @@ -7,6 +7,7 @@ typedef struct Params { unsigned int input_size; unsigned int n_threads; unsigned int n_nops; + unsigned int n_instr; int n_warmup; int n_reps; int exp; @@ -26,6 +27,7 @@ static void usage() { "\n -i <I> input size (default=8K elements)" "\n -n <N> number of threads per pool (default=8)" "\n -N <N> number of nops in dpu task (default=0)" + "\n -I <N> number of instructions in dpu binary (default=0)" "\n"); } @@ -34,12 +36,13 @@ struct Params input_params(int argc, char **argv) { p.input_size = 8 << 10; p.n_threads = 8; p.n_nops = 0; + p.n_instr = 0; p.n_warmup = 1; p.n_reps = 3; p.exp = 0; int opt; - while((opt = getopt(argc, argv, "hi:n:w:e:x:N:")) >= 0) { + while((opt = getopt(argc, argv, "hi:n:w:e:x:N:I:")) >= 0) { switch(opt) { case 'h': usage(); @@ -48,6 +51,7 @@ struct Params input_params(int argc, char **argv) { case 'i': p.input_size = atoi(optarg); break; case 'n': p.n_threads = atoi(optarg); break; case 'N': p.n_nops = atoi(optarg); break; + case 'I': p.n_instr = atoi(optarg); break; case 'w': p.n_warmup = atoi(optarg); break; case 'e': p.n_reps = atoi(optarg); break; case 'x': p.exp = atoi(optarg); break; |