summaryrefslogtreecommitdiff
path: root/BFS/host
diff options
context:
space:
mode:
Diffstat (limited to 'BFS/host')
-rw-r--r--BFS/host/app.c64
1 files changed, 38 insertions, 26 deletions
diff --git a/BFS/host/app.c b/BFS/host/app.c
index c6f8301..5fb7254 100644
--- a/BFS/host/app.c
+++ b/BFS/host/app.c
@@ -37,7 +37,6 @@ int main(int argc, char** argv) {
// Timer and profiling
Timer timer;
- float loadTime = 0.0f, dpuTime = 0.0f, hostTime = 0.0f, retrieveTime = 0.0f;
#if ENERGY
struct dpu_probe_t probe;
DPU_ASSERT(dpu_probe_init("energy_probe", &probe));
@@ -73,6 +72,10 @@ int main(int argc, char** argv) {
struct DPUParams dpuParams[numDPUs];
uint32_t dpuParams_m[numDPUs];
unsigned int dpuIdx = 0;
+ unsigned int t0ini = 0;
+ unsigned int t1ini = 0;
+ unsigned int t2ini = 0;
+ unsigned int t3ini = 0;
DPU_FOREACH (dpu_set, dpu) {
// Allocate parameters
@@ -127,29 +130,28 @@ int main(int argc, char** argv) {
// Send data to DPU
PRINT_INFO(p.verbosity >= 2, " Copying data to DPU");
- startTimer(&timer);
+ startTimer(&timer, 0, t0ini++);
copyToDPU(dpu, (uint8_t*)dpuNodePtrs_h, dpuNodePtrs_m, (dpuNumNodes + 1)*sizeof(uint32_t));
copyToDPU(dpu, (uint8_t*)dpuNeighborIdxs_h, dpuNeighborIdxs_m, dpuNumNeighbors*sizeof(uint32_t));
copyToDPU(dpu, (uint8_t*)dpuNodeLevel_h, dpuNodeLevel_m, dpuNumNodes*sizeof(uint32_t));
copyToDPU(dpu, (uint8_t*)visited, dpuVisited_m, numNodes/64*sizeof(uint64_t));
copyToDPU(dpu, (uint8_t*)nextFrontier, dpuNextFrontier_m, numNodes/64*sizeof(uint64_t));
// NOTE: No need to copy current frontier because it is written before being read
- stopTimer(&timer);
- loadTime += getElapsedTime(timer);
+ stopTimer(&timer, 0);
+ //loadTime += getElapsedTime(timer);
}
// Send parameters to DPU
PRINT_INFO(p.verbosity >= 2, " Copying parameters to DPU");
- startTimer(&timer);
+ startTimer(&timer, 1, t1ini++);
copyToDPU(dpu, (uint8_t*)&dpuParams[dpuIdx], dpuParams_m[dpuIdx], sizeof(struct DPUParams));
- stopTimer(&timer);
- loadTime += getElapsedTime(timer);
+ stopTimer(&timer, 1);
+ //loadTime += getElapsedTime(timer);
++dpuIdx;
}
- PRINT_INFO(p.verbosity >= 1, " CPU-DPU Time: %f ms", loadTime*1e3);
// Iterate until next frontier is empty
uint32_t nextFrontierEmpty = 0;
@@ -162,11 +164,10 @@ int main(int argc, char** argv) {
#endif
// Run all DPUs
PRINT_INFO(p.verbosity >= 1, " Booting DPUs");
- startTimer(&timer);
+ startTimer(&timer, 2, t2ini++);
DPU_ASSERT(dpu_launch(dpu_set, DPU_SYNCHRONOUS));
- stopTimer(&timer);
- dpuTime += getElapsedTime(timer);
- PRINT_INFO(p.verbosity >= 2, " Level DPU Time: %f ms", getElapsedTime(timer)*1e3);
+ stopTimer(&timer, 2);
+ //dpuTime += getElapsedTime(timer);
#if ENERGY
DPU_ASSERT(dpu_probe_stop(&probe));
double energy;
@@ -177,7 +178,7 @@ int main(int argc, char** argv) {
// Copy back next frontier from all DPUs and compute their union as the current frontier
- startTimer(&timer);
+ startTimer(&timer, 3, t3ini++);
dpuIdx = 0;
DPU_FOREACH (dpu_set, dpu) {
uint32_t dpuNumNodes = dpuParams[dpuIdx].dpuNumNodes;
@@ -217,20 +218,14 @@ int main(int argc, char** argv) {
}
}
}
- stopTimer(&timer);
- hostTime += getElapsedTime(timer);
- PRINT_INFO(p.verbosity >= 2, " Level Inter-DPU Time: %f ms", getElapsedTime(timer)*1e3);
+ stopTimer(&timer, 3);
+ //hostTime += getElapsedTime(timer);
}
- PRINT_INFO(p.verbosity >= 1, "DPU Kernel Time: %f ms", dpuTime*1e3);
- PRINT_INFO(p.verbosity >= 1, "Inter-DPU Time: %f ms", hostTime*1e3);
- #if ENERGY
- PRINT_INFO(p.verbosity >= 1, " DPU Energy: %f J", tenergy);
- #endif
// Copy back node levels
PRINT_INFO(p.verbosity >= 1, "Copying back the result");
- startTimer(&timer);
+ startTimer(&timer, 4, 0);
dpuIdx = 0;
DPU_FOREACH (dpu_set, dpu) {
uint32_t dpuNumNodes = dpuParams[dpuIdx].dpuNumNodes;
@@ -240,10 +235,9 @@ int main(int argc, char** argv) {
}
++dpuIdx;
}
- stopTimer(&timer);
- retrieveTime += getElapsedTime(timer);
- PRINT_INFO(p.verbosity >= 1, " DPU-CPU Time: %f ms", retrieveTime*1e3);
- if(p.verbosity == 0) PRINT("CPU-DPU Time(ms): %f DPU Kernel Time (ms): %f Inter-DPU Time (ms): %f DPU-CPU Time (ms): %f", loadTime*1e3, dpuTime*1e3, hostTime*1e3, retrieveTime*1e3);
+ stopTimer(&timer, 4);
+ //retrieveTime += getElapsedTime(timer);
+ //if(p.verbosity == 0) PRINT("CPU-DPU Time(ms): %f DPU Kernel Time (ms): %f Inter-DPU Time (ms): %f DPU-CPU Time (ms): %f", loadTime*1e3, dpuTime*1e3, hostTime*1e3, retrieveTime*1e3);
// Calculating result on CPU
PRINT_INFO(p.verbosity >= 1, "Calculating result on CPU");
@@ -294,12 +288,30 @@ int main(int argc, char** argv) {
// Verify the result
PRINT_INFO(p.verbosity >= 1, "Verifying the result");
+ int status = 1;
for(uint32_t nodeIdx = 0; nodeIdx < numNodes; ++nodeIdx) {
if(nodeLevel[nodeIdx] != nodeLevelReference[nodeIdx]) {
PRINT_ERROR("Mismatch at node %u (CPU result = level %u, DPU result = level %u)", nodeIdx, nodeLevelReference[nodeIdx], nodeLevel[nodeIdx]);
+ status = 0;
}
}
+ if (status) {
+ printf("[::] n_dpus=%d n_tasklets=%d e_type=%s n_elements=%d "
+ "| throughput_pim_MBps=%f throughput_MBps=%f\n",
+ numDPUs, NR_TASKLETS, "uint32_t", numNodes,
+ numNodes * sizeof(uint32_t) / (timer.time[2]),
+ numNodes * sizeof(uint32_t) / (timer.time[0] + timer.time[1] + timer.time[2] + timer.time[3] + timer.time[4]));
+ printf("[::] n_dpus=%d n_tasklets=%d e_type=%s n_elements=%d "
+ "| throughput_pim_MOpps=%f throughput_MOpps=%f\n",
+ numDPUs, NR_TASKLETS, "uint32_t", numNodes,
+ numNodes / (timer.time[2]),
+ numNodes / (timer.time[0] + timer.time[1] + timer.time[2] + timer.time[3] + timer.time[4]));
+ printf("[::] n_dpus=%d n_tasklets=%d e_type=%s n_elements=%d | ",
+ numDPUs, NR_TASKLETS, "uint32_t", numNodes);
+ printAll(&timer, 4);
+ }
+
// Display DPU Logs
if(p.verbosity >= 2) {
PRINT_INFO(p.verbosity >= 2, "Displaying DPU Logs:");