summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorBirte Kristina Friesel <derf@finalrewind.org>2025-05-09 14:45:10 +0200
committerBirte Kristina Friesel <derf@finalrewind.org>2025-05-09 14:45:10 +0200
commit55928ee11b560d4c90ad7362d65794a83e11f659 (patch)
treedf3df5b0a2575191926b583db28b32ba1fc5fe85
parent1f538b06ecf5dabb642b15c9bbaa890cbd5d2fa9 (diff)
TRNS: Guard dfatool output behind a compile-time flag
-rw-r--r--TRNS/Makefile4
-rw-r--r--TRNS/host/app.c14
-rwxr-xr-xTRNS/support/timer.h26
3 files changed, 29 insertions, 15 deletions
diff --git a/TRNS/Makefile b/TRNS/Makefile
index 0a5f998..9d6a60b 100644
--- a/TRNS/Makefile
+++ b/TRNS/Makefile
@@ -6,8 +6,10 @@ COMMON_INCLUDES := support
HOST_SOURCES := $(wildcard host/*.c)
DPU_SOURCES := $(wildcard dpu/*.c)
+dfatool_timing ?= 1
+
COMMON_FLAGS := -Wall -Wextra -g -I${COMMON_INCLUDES}
-HOST_FLAGS := ${COMMON_FLAGS} -std=c11 -O3 `dpu-pkg-config --cflags --libs dpu` -DNR_TASKLETS=${NR_TASKLETS} -DNR_DPUS=${NR_DPUS} -DENERGY=${ENERGY}
+HOST_FLAGS := ${COMMON_FLAGS} -std=c11 -O3 `dpu-pkg-config --cflags --libs dpu` -DNR_TASKLETS=${NR_TASKLETS} -DNR_DPUS=${NR_DPUS} -DENERGY=${ENERGY} -DDFATOOL_TIMING=${dfatool_timing}
DPU_FLAGS := ${COMMON_FLAGS} -O2 -DNR_TASKLETS=${NR_TASKLETS}
QUIET = @
diff --git a/TRNS/host/app.c b/TRNS/host/app.c
index 452b894..4ed69ec 100644
--- a/TRNS/host/app.c
+++ b/TRNS/host/app.c
@@ -313,35 +313,35 @@ int main(int argc, char **argv) {
* timer 8: run DPU program (second kernel)
* timer 9: read transposed matrix
*/
- printf("[::] TRNS-UPMEM | n_dpus=%d n_ranks=%d n_tasklets=%d e_type=%s n_elements=%lu numa_node_rank=%d ",
+ dfatool_printf("[::] TRNS-UPMEM | n_dpus=%d n_ranks=%d n_tasklets=%d e_type=%s n_elements=%lu numa_node_rank=%d ",
NR_DPUS, nr_of_ranks, NR_TASKLETS, XSTR(T), input_size, numa_node_rank);
- printf("| latency_cpu_us=%f latency_realloc_us=%f latency_load_us=%f latency_write_us=%f latency_kernel_us=%f latency_read_us=%f",
+ dfatool_printf("| latency_cpu_us=%f latency_realloc_us=%f latency_load_us=%f latency_write_us=%f latency_kernel_us=%f latency_read_us=%f",
timer.time[0], // CPU
timer.time[1], // free + alloc
timer.time[2], // load
timer.time[3] + timer.time[4] + timer.time[5] + timer.time[7], // write
timer.time[6] + timer.time[8], // kernel
timer.time[9]); // read
- printf(" latency_write1_us=%f latency_write2_us=%f latency_write3_us=%f latency_write4_us=%f latency_kernel1_us=%f latency_kernel2_us=%f",
+ dfatool_printf(" latency_write1_us=%f latency_write2_us=%f latency_write3_us=%f latency_write4_us=%f latency_kernel1_us=%f latency_kernel2_us=%f",
timer.time[3],
timer.time[4],
timer.time[5],
timer.time[7],
timer.time[6],
timer.time[8]);
- printf(" throughput_cpu_MBps=%f throughput_upmem_kernel_MBps=%f throughput_upmem_total_MBps=%f",
+ dfatool_printf(" throughput_cpu_MBps=%f throughput_upmem_kernel_MBps=%f throughput_upmem_total_MBps=%f",
input_size * sizeof(T) / timer.time[0],
input_size * sizeof(T) / (timer.time[6] + timer.time[8]),
input_size * sizeof(T) / (timer.time[1] + timer.time[2] + timer.time[3] + timer.time[4] + timer.time[5] + timer.time[6] + timer.time[7] + timer.time[8] + timer.time[9]));
- printf(" throughput_upmem_wxr_MBps=%f throughput_upmem_lwxr_MBps=%f throughput_upmem_alwxr_MBps=%f",
+ dfatool_printf(" throughput_upmem_wxr_MBps=%f throughput_upmem_lwxr_MBps=%f throughput_upmem_alwxr_MBps=%f",
input_size * sizeof(T) / (timer.time[3] + timer.time[4] + timer.time[5] + timer.time[6] + timer.time[7] + timer.time[8] + timer.time[9]),
input_size * sizeof(T) / (timer.time[2] + timer.time[3] + timer.time[4] + timer.time[5] + timer.time[6] + timer.time[7] + timer.time[8] + timer.time[9]),
input_size * sizeof(T) / (timer.time[1] + timer.time[2] + timer.time[3] + timer.time[4] + timer.time[5] + timer.time[6] + timer.time[7] + timer.time[8] + timer.time[9]));
- printf(" throughput_cpu_MOpps=%f throughput_upmem_kernel_MOpps=%f throughput_upmem_total_MOpps=%f",
+ dfatool_printf(" throughput_cpu_MOpps=%f throughput_upmem_kernel_MOpps=%f throughput_upmem_total_MOpps=%f",
input_size / timer.time[0],
input_size / (timer.time[6] + timer.time[8]),
input_size / (timer.time[1] + timer.time[2] + timer.time[3] + timer.time[4] + timer.time[5] + timer.time[6] + timer.time[7] + timer.time[8] + timer.time[9]));
- printf(" throughput_upmem_wxr_MOpps=%f throughput_upmem_lwxr_MOpps=%f throughput_upmem_alwxr_MOpps=%f\n",
+ dfatool_printf(" throughput_upmem_wxr_MOpps=%f throughput_upmem_lwxr_MOpps=%f throughput_upmem_alwxr_MOpps=%f\n",
input_size / (timer.time[3] + timer.time[4] + timer.time[5] + timer.time[6] + timer.time[7] + timer.time[8] + timer.time[9]),
input_size / (timer.time[2] + timer.time[3] + timer.time[4] + timer.time[5] + timer.time[6] + timer.time[7] + timer.time[8] + timer.time[9]),
input_size / (timer.time[1] + timer.time[2] + timer.time[3] + timer.time[4] + timer.time[5] + timer.time[6] + timer.time[7] + timer.time[8] + timer.time[9]));
diff --git a/TRNS/support/timer.h b/TRNS/support/timer.h
index c087931..e04a202 100755
--- a/TRNS/support/timer.h
+++ b/TRNS/support/timer.h
@@ -35,6 +35,8 @@
#include <sys/time.h>
+#if DFATOOL_TIMING
+
typedef struct Timer {
struct timeval startTime[10];
@@ -43,6 +45,8 @@ typedef struct Timer {
} Timer;
+#define dfatool_printf(fmt, ...) do { printf(fmt, __VA_ARGS__); } while (0)
+
void start(Timer *timer, int i, int rep)
{
if (rep == 0) {
@@ -60,15 +64,23 @@ void stop(Timer *timer, int i)
(timer->stopTime[i].tv_usec - timer->startTime[i].tv_usec);
}
-void print(Timer *timer, int i, int REP)
+#else
+
+#define dfatool_printf(fmt, ...) do {} while (0)
+
+typedef int Timer;
+
+void start(Timer *timer, int i, int rep)
{
- printf("Time (ms): %f\t", timer->time[i] / (1000 * REP));
+ (void)timer;
+ (void)i;
+ (void)rep;
}
-void printall(Timer *timer, int maxt)
+void stop(Timer *timer, int i)
{
- for (int i = 0; i <= maxt; i++) {
- printf(" timer%d_us=%f", i, timer->time[i]);
- }
- printf("\n");
+ (void)timer;
+ (void)i;
}
+
+#endif