summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorBirte Kristina Friesel <derf@finalrewind.org>2025-05-13 13:45:22 +0200
committerBirte Kristina Friesel <derf@finalrewind.org>2025-05-13 13:45:22 +0200
commit05001600966baeeba4ef42ddcf1804975fe88625 (patch)
treeeff58e7f37a9bdec030e69e00ebd5b227d2cb1fc
parent591e195f3eeb06237dedd9c5a66fcfe0b0a10889 (diff)
tracing: provide input_size and element_size in derived aspect
-rw-r--r--VA/include/dfatool_host.ah14
-rw-r--r--include/dfatool_host_dpu.ah8
2 files changed, 17 insertions, 5 deletions
diff --git a/VA/include/dfatool_host.ah b/VA/include/dfatool_host.ah
index 6818333..7b33cec 100644
--- a/VA/include/dfatool_host.ah
+++ b/VA/include/dfatool_host.ah
@@ -4,12 +4,22 @@
#include "dfatool_host_dpu.ah"
aspect DfatoolHostTiming : public DfatoolHostDPUTiming {
+
+ DfatoolHostTiming() {
+ element_size = sizeof(T);
+ }
+
advice call("% input_params(...)") : after() {
Params* p = tjp->result();
- printf("[==] VA | n_dpus=%u n_elements=%lu e_exp=%d \n", NR_DPUS, p->input_size, p->exp);
+ printf("[>>] VA | n_dpus=%u n_elements=%lu e_exp=%d\n", NR_DPUS, p->input_size, p->exp);
+ input_size = p->input_size;
}
advice call("% vector_addition_host(...)") : after() {
- printf("[--] VA | n_dpus=%u n_ranks=%u n_elements=%lu n_elements_per_dpu=%lu e_exp=%d \n", n_dpus, n_ranks, p.input_size, p.input_size / n_dpus, p.exp);
+ printf("[--] VA | n_dpus=%u n_ranks=%u n_elements=%lu e_exp=%d\n", n_dpus, n_ranks, p.input_size, p.exp);
+ }
+
+ advice execution("% main(...)") : after() {
+ printf("[<<] VA | n_dpus=%u n_elements=%lu e_exp=%d\n", NR_DPUS, p.input_size, p.exp);
}
};
diff --git a/include/dfatool_host_dpu.ah b/include/dfatool_host_dpu.ah
index 1056a94..560e410 100644
--- a/include/dfatool_host_dpu.ah
+++ b/include/dfatool_host_dpu.ah
@@ -5,6 +5,8 @@
aspect DfatoolHostDPUTiming {
struct timeval starttime;
struct timeval stoptime;
+ unsigned long input_size;
+ unsigned int element_size;
uint32_t n_ranks = 0;
uint32_t n_dpus = 0;
@@ -73,16 +75,16 @@ aspect DfatoolHostDPUTiming {
tjp->proceed();
gettimeofday(&stoptime, NULL);
double latency_us = (stoptime.tv_sec - starttime.tv_sec) * 1000000.0 + (stoptime.tv_usec - starttime.tv_usec);
- unsigned long input_size = p.input_size;
- printf("[::] dpu_launch @ %s:%d | n_dpus=%u n_ranks=%u e_kernel=kernel%d n_elements=%lu | latency_us=%f throughput_Mrps=%f throughput_MiBps=%f\n",
+ printf("[::] dpu_launch @ %s:%d | n_dpus=%u n_ranks=%u e_kernel=kernel%d n_elements=%lu n_elements_per_dpu=%lu | latency_us=%f throughput_Mps=%f throughput_MiBps=%f\n",
tjp->filename(),
tjp->line(),
n_dpus, n_ranks,
kernel + 1,
input_size,
+ input_size / n_dpus,
latency_us,
input_size / latency_us,
- input_size * sizeof(T) / (latency_us * M_to_Mi)
+ input_size * element_size / (latency_us * M_to_Mi)
);
}