diff options
author | Birte Kristina Friesel <birte.friesel@uos.de> | 2024-02-22 12:24:04 +0100 |
---|---|---|
committer | Birte Kristina Friesel <birte.friesel@uos.de> | 2024-02-22 12:24:04 +0100 |
commit | d7b9186a2a0285356b9aa38b84c8a7a151e48593 (patch) | |
tree | 689aab2d5fd90989b591eb16e665e9bbd3b99f86 /Microbenchmarks/CPU-DPU/host/app.c | |
parent | ba4f965bf9b7fb9f7cd441e82f37255e3948b736 (diff) |
CPU-DPU microbenchmark: switch to nanoseconds
Diffstat (limited to 'Microbenchmarks/CPU-DPU/host/app.c')
-rw-r--r-- | Microbenchmarks/CPU-DPU/host/app.c | 35 |
1 files changed, 11 insertions, 24 deletions
diff --git a/Microbenchmarks/CPU-DPU/host/app.c b/Microbenchmarks/CPU-DPU/host/app.c index 07431ee..7c402bc 100644 --- a/Microbenchmarks/CPU-DPU/host/app.c +++ b/Microbenchmarks/CPU-DPU/host/app.c @@ -104,9 +104,9 @@ int main(int argc, char **argv) { //printf("NR_TASKLETS\t%d\tBL\t%d\n", NR_TASKLETS, BL); printf("[::] NMC reconfiguration | n_dpus=%d n_ranks=%d n_tasklets=%d n_nops=%d e_type=%s n_elements=%u e_mode=%s" - " | latency_dpu_alloc_us=%f latency_dpu_load_us=%f latency_dpu_get_us=%f\n", + " | latency_dpu_alloc_ns=%lu latency_dpu_load_ns=%lu latency_dpu_get_ns=%lu\n", nr_of_dpus, nr_of_ranks, NR_TASKLETS, p.n_nops, XSTR(T), transfer_size, transfer_mode, - timer.time[4], timer.time[5], timer.time[6]); + timer.nanoseconds[4], timer.nanoseconds[5], timer.nanoseconds[6]); // Loop over main kernel for(int rep = 0; rep < p.n_warmup + p.n_reps; rep++) { @@ -171,32 +171,19 @@ int main(int argc, char **argv) { if (rep >= p.n_warmup) { printf("[::] transfer UPMEM | n_dpus=%d n_ranks=%d n_tasklets=%d n_nops=%d e_type=%s n_elements=%u e_mode=%s" - " | throughput_dram_mram_MBps=%f throughput_mram_dram_MBps=%f", + " | latency_dram_mram_ns=%lu latency_mram_dram_ns=%lu throughput_dram_mram_Bps=%f throughput_mram_dram_Bps=%f", nr_of_dpus, nr_of_ranks, NR_TASKLETS, p.n_nops, XSTR(T), transfer_size, transfer_mode, - transfer_size * sizeof(T) / timer.time[1], - transfer_size * sizeof(T) / timer.time[3]); - printf(" throughput_dram_mram_MOpps=%f throughput_mram_dram_MOpps=%f", - transfer_size / timer.time[1], - transfer_size / timer.time[3]); - printf(" latency_dpu_launch_us=%f\n", - timer.time[2]); + timer.nanoseconds[1], timer.nanoseconds[3], + transfer_size * sizeof(T) * 1e9 / timer.nanoseconds[1], + transfer_size * sizeof(T) * 1e9 / timer.nanoseconds[3]); + printf(" throughput_dram_mram_Opps=%f throughput_mram_dram_Opps=%f", + transfer_size * 1e9 / timer.nanoseconds[1], + transfer_size * 1e9 / timer.nanoseconds[3]); + printf(" latency_dpu_launch_ns=%lu\n", + timer.nanoseconds[2]); } } - // Print timing results - /* - printf("CPU-DPU "); - print(&timer, 1, p.n_reps); - double time_load = timer.time[1] / (1000 * 1); - printf("CPU-DPU Bandwidth (GB/s): %f\n", (input_size * 8)/(time_load*1e6)); - printf("DPU Kernel "); - print(&timer, 2, p.n_reps); - printf("\n"); - printf("DPU-CPU "); - print(&timer, 3, p.n_reps); - double time_retrieve = timer.time[3] / (1000 * 1); - printf("DPU-CPU Bandwidth (GB/s): %f\n", (input_size * 8)/(time_retrieve*1e6)); - */ // Check output bool status = true; #ifdef BROADCASTX |