diff options
Diffstat (limited to 'include/dfatool_host_dpu.ah')
-rw-r--r-- | include/dfatool_host_dpu.ah | 8 |
1 files changed, 5 insertions, 3 deletions
diff --git a/include/dfatool_host_dpu.ah b/include/dfatool_host_dpu.ah index 1056a94..560e410 100644 --- a/include/dfatool_host_dpu.ah +++ b/include/dfatool_host_dpu.ah @@ -5,6 +5,8 @@ aspect DfatoolHostDPUTiming { struct timeval starttime; struct timeval stoptime; + unsigned long input_size; + unsigned int element_size; uint32_t n_ranks = 0; uint32_t n_dpus = 0; @@ -73,16 +75,16 @@ aspect DfatoolHostDPUTiming { tjp->proceed(); gettimeofday(&stoptime, NULL); double latency_us = (stoptime.tv_sec - starttime.tv_sec) * 1000000.0 + (stoptime.tv_usec - starttime.tv_usec); - unsigned long input_size = p.input_size; - printf("[::] dpu_launch @ %s:%d | n_dpus=%u n_ranks=%u e_kernel=kernel%d n_elements=%lu | latency_us=%f throughput_Mrps=%f throughput_MiBps=%f\n", + printf("[::] dpu_launch @ %s:%d | n_dpus=%u n_ranks=%u e_kernel=kernel%d n_elements=%lu n_elements_per_dpu=%lu | latency_us=%f throughput_Mps=%f throughput_MiBps=%f\n", tjp->filename(), tjp->line(), n_dpus, n_ranks, kernel + 1, input_size, + input_size / n_dpus, latency_us, input_size / latency_us, - input_size * sizeof(T) / (latency_us * M_to_Mi) + input_size * element_size / (latency_us * M_to_Mi) ); } |