summaryrefslogtreecommitdiff
path: root/include/dfatool_host_dpu.ah
diff options
context:
space:
mode:
Diffstat (limited to 'include/dfatool_host_dpu.ah')
-rw-r--r--include/dfatool_host_dpu.ah20
1 files changed, 19 insertions, 1 deletions
diff --git a/include/dfatool_host_dpu.ah b/include/dfatool_host_dpu.ah
index 0e03d31..010c18c 100644
--- a/include/dfatool_host_dpu.ah
+++ b/include/dfatool_host_dpu.ah
@@ -12,6 +12,8 @@ aspect DfatoolHostDPUTiming {
double const M_to_Mi = 1.048576; /* 2^20 / 1e6 */
+ virtual int getKernel() = 0;
+
advice call("% dpu_get_nr_dpus(...)") : after() {
n_dpus = **(tjp->arg<1>());
}
@@ -79,7 +81,7 @@ aspect DfatoolHostDPUTiming {
tjp->filename(),
tjp->line(),
n_dpus, n_ranks,
- kernel + 1,
+ getKernel(),
input_size,
input_size / n_dpus,
latency_us,
@@ -104,6 +106,22 @@ aspect DfatoolHostDPUTiming {
);
}
+ advice call("% dpu_copy_from(...)") : around() {
+ size_t payload_size = *(tjp->arg<4>());
+ gettimeofday(&starttime, NULL);
+ tjp->proceed();
+ gettimeofday(&stoptime, NULL);
+ double time_us = (stoptime.tv_sec - starttime.tv_sec) * 1000000.0 + (stoptime.tv_usec - starttime.tv_usec);
+ printf("[::] dpu_copy_from @ %s:%d | n_dpus=%u n_ranks=%u payload_B=%lu | latency_us=%f throughput_MiBps=%f\n",
+ tjp->filename(),
+ tjp->line(),
+ n_dpus, n_ranks,
+ payload_size,
+ time_us,
+ payload_size / (time_us * M_to_Mi)
+ );
+ }
+
advice call("% dpu_push_xfer(...)") : around() {
size_t payload_size = *(tjp->arg<4>());
gettimeofday(&starttime, NULL);