summaryrefslogtreecommitdiff
path: root/HST-S
diff options
context:
space:
mode:
authorBirte Kristina Friesel <birte.friesel@uos.de>2024-07-05 11:55:09 +0200
committerBirte Kristina Friesel <birte.friesel@uos.de>2024-07-05 11:55:09 +0200
commitd9e1b49e1e70bbc70b24f24f54394766da22c0ee (patch)
tree24b83f0b5aeb6200e9889e65190b0efed8dbc2d5 /HST-S
parenta31c52fac913281e26c3aaad2f943e22722c93e8 (diff)
HST-S: log NUMA node of rank
Diffstat (limited to 'HST-S')
-rw-r--r--HST-S/host/app.c27
1 files changed, 22 insertions, 5 deletions
diff --git a/HST-S/host/app.c b/HST-S/host/app.c
index 20ad07c..19091bb 100644
--- a/HST-S/host/app.c
+++ b/HST-S/host/app.c
@@ -30,10 +30,8 @@
#include <dpu_probe.h>
#endif
-#if WITH_DPUINFO
#include <dpu_management.h>
#include <dpu_target_macros.h>
-#endif
#if SDK_SINGLETHREADED
#define DPU_ALLOC_PROFILE "nrThreadsPerRank=0"
@@ -104,6 +102,8 @@ int main(int argc, char **argv) {
// Timer declaration
Timer timer;
+ int numa_node_rank = -2;
+
// Allocate DPUs and load binary
#if !WITH_ALLOC_OVERHEAD
DPU_ASSERT(dpu_alloc(NR_DPUS, DPU_ALLOC_PROFILE, &dpu_set));
@@ -191,6 +191,23 @@ int main(int argc, char **argv) {
assert(nr_of_dpus == NR_DPUS);
#endif
+ // int prev_rank_id = -1;
+ int rank_id = -1;
+ DPU_FOREACH (dpu_set, dpu) {
+ rank_id = dpu_get_rank_id(dpu_get_rank(dpu_from_set(dpu))) & DPU_TARGET_MASK;
+ if ((numa_node_rank != -2) && numa_node_rank != dpu_get_rank_numa_node(dpu_get_rank(dpu_from_set(dpu)))) {
+ numa_node_rank = -1;
+ } else {
+ numa_node_rank = dpu_get_rank_numa_node(dpu_get_rank(dpu_from_set(dpu)));
+ }
+ /*
+ if (rank_id != prev_rank_id) {
+ printf("/dev/dpu_rank%d @ NUMA node %d\n", rank_id, numa_node_rank);
+ prev_rank_id = rank_id;
+ }
+ */
+ }
+
// Compute output on CPU (performance comparison and verification purposes)
if(rep >= p.n_warmup) {
start(&timer, 2, 0);
@@ -295,10 +312,10 @@ int main(int argc, char **argv) {
#endif
if (rep >= p.n_warmup) {
- printf("[::] HST-S UPMEM | n_dpus=%d n_ranks=%d n_tasklets=%d e_type=%s n_elements=%d n_bins=%d",
+ printf("[::] HST-S-UPMEM | n_dpus=%d n_ranks=%d n_tasklets=%d e_type=%s n_elements=%d n_bins=%d",
nr_of_dpus, nr_of_ranks, NR_TASKLETS, XSTR(T), input_size, p.bins);
- printf(" b_with_alloc_overhead=%d b_with_load_overhead=%d b_with_free_overhead=%d ",
- WITH_ALLOC_OVERHEAD, WITH_LOAD_OVERHEAD, WITH_FREE_OVERHEAD);
+ printf(" b_with_alloc_overhead=%d b_with_load_overhead=%d b_with_free_overhead=%d numa_node_rank=%d ",
+ WITH_ALLOC_OVERHEAD, WITH_LOAD_OVERHEAD, WITH_FREE_OVERHEAD, numa_node_rank);
printf("| latency_alloc_us=%f latency_load_us=%f latency_cpu_us=%f latency_write_us=%f latency_kernel_us=%f latency_read_us=%f latency_free_us=%f",
timer.time[0],
timer.time[1],