summaryrefslogtreecommitdiff
path: root/TS/dpu/task.c
diff options
context:
space:
mode:
Diffstat (limited to 'TS/dpu/task.c')
-rw-r--r--TS/dpu/task.c138
1 files changed, 78 insertions, 60 deletions
diff --git a/TS/dpu/task.c b/TS/dpu/task.c
index d704160..5a756aa 100644
--- a/TS/dpu/task.c
+++ b/TS/dpu/task.c
@@ -18,18 +18,18 @@ __host dpu_arguments_t DPU_INPUT_ARGUMENTS;
__host dpu_result_t DPU_RESULTS[NR_TASKLETS];
// Dot product
-static void dot_product(DTYPE *vectorA, DTYPE *vectorA_aux, DTYPE *vectorB, DTYPE * result) {
-
- for(uint32_t i = 0; i < BLOCK_SIZE / sizeof(DTYPE); i++)
- {
- for(uint32_t j = 0; j < DOTPIP; j++)
- {
- if((j + i) > BLOCK_SIZE / sizeof(DTYPE) - 1)
- {
- result[j] += vectorA_aux[(j + i) - BLOCK_SIZE / sizeof(DTYPE)] * vectorB[i];
- }
- else
- {
+static void dot_product(DTYPE *vectorA, DTYPE *vectorA_aux, DTYPE *vectorB,
+ DTYPE *result)
+{
+
+ for (uint32_t i = 0; i < BLOCK_SIZE / sizeof(DTYPE); i++) {
+ for (uint32_t j = 0; j < DOTPIP; j++) {
+ if ((j + i) > BLOCK_SIZE / sizeof(DTYPE) - 1) {
+ result[j] +=
+ vectorA_aux[(j + i) -
+ BLOCK_SIZE / sizeof(DTYPE)] *
+ vectorB[i];
+ } else {
result[j] += vectorA[j + i] * vectorB[i];
}
}
@@ -40,43 +40,46 @@ BARRIER_INIT(my_barrier, NR_TASKLETS);
extern int main_kernel1(void);
-int(*kernels[nr_kernels])(void) = {main_kernel1};
+int (*kernels[nr_kernels])(void) = { main_kernel1 };
-int main(void){
+int main(void)
+{
// Kernel
- return kernels[DPU_INPUT_ARGUMENTS.kernel]();
+ return kernels[DPU_INPUT_ARGUMENTS.kernel] ();
}
// main_kernel1
-int main_kernel1() {
+int main_kernel1()
+{
unsigned int tasklet_id = me();
#if PRINT
printf("tasklet_id = %u\n", tasklet_id);
#endif
- if(tasklet_id == 0){
- mem_reset(); // Reset the heap
+ if (tasklet_id == 0) {
+ mem_reset(); // Reset the heap
}
// Barrier
barrier_wait(&my_barrier);
// Input arguments
- uint32_t query_length = DPU_INPUT_ARGUMENTS.query_length;
- DTYPE query_mean = DPU_INPUT_ARGUMENTS.query_mean;
- DTYPE query_std = DPU_INPUT_ARGUMENTS.query_std;
+ uint32_t query_length = DPU_INPUT_ARGUMENTS.query_length;
+ DTYPE query_mean = DPU_INPUT_ARGUMENTS.query_mean;
+ DTYPE query_std = DPU_INPUT_ARGUMENTS.query_std;
uint32_t slice_per_dpu = DPU_INPUT_ARGUMENTS.slice_per_dpu;
// Boundaries for current tasklet
- uint32_t myStartElem = tasklet_id * (slice_per_dpu / (NR_TASKLETS));
- uint32_t myEndElem = myStartElem + (slice_per_dpu / (NR_TASKLETS)) - 1;
+ uint32_t myStartElem = tasklet_id * (slice_per_dpu / (NR_TASKLETS));
+ uint32_t myEndElem = myStartElem + (slice_per_dpu / (NR_TASKLETS)) - 1;
// Check time series limit
- if(myEndElem > slice_per_dpu - query_length) myEndElem = slice_per_dpu - query_length;
+ if (myEndElem > slice_per_dpu - query_length)
+ myEndElem = slice_per_dpu - query_length;
// Starting address of the current processing block in MRAM
uint32_t mem_offset = (uint32_t) DPU_MRAM_HEAP_POINTER;
// Starting address of the query subsequence
- uint32_t current_mram_block_addr_query = (uint32_t)(mem_offset);
+ uint32_t current_mram_block_addr_query = (uint32_t) (mem_offset);
mem_offset += query_length * sizeof(DTYPE);
// Starting address of the time series slice
@@ -86,18 +89,18 @@ int main_kernel1() {
// Starting address of the time series means
mem_offset += (slice_per_dpu + query_length) * sizeof(DTYPE);
- uint32_t current_mram_block_addr_TSMean = (uint32_t)(mem_offset);
+ uint32_t current_mram_block_addr_TSMean = (uint32_t) (mem_offset);
// Starting address of the time series standard deviations
mem_offset += (slice_per_dpu + query_length) * sizeof(DTYPE);
- uint32_t current_mram_block_addr_TSSigma = (uint32_t)(mem_offset);
+ uint32_t current_mram_block_addr_TSSigma = (uint32_t) (mem_offset);
// Initialize local caches to store the MRAM blocks
- DTYPE *cache_TS = (DTYPE *) mem_alloc(BLOCK_SIZE);
- DTYPE *cache_TS_aux = (DTYPE *) mem_alloc(BLOCK_SIZE);
- DTYPE *cache_query = (DTYPE *) mem_alloc(BLOCK_SIZE);
- DTYPE *cache_TSMean = (DTYPE *) mem_alloc(BLOCK_SIZE);
- DTYPE *cache_TSSigma = (DTYPE *) mem_alloc(BLOCK_SIZE);
+ DTYPE *cache_TS = (DTYPE *) mem_alloc(BLOCK_SIZE);
+ DTYPE *cache_TS_aux = (DTYPE *) mem_alloc(BLOCK_SIZE);
+ DTYPE *cache_query = (DTYPE *) mem_alloc(BLOCK_SIZE);
+ DTYPE *cache_TSMean = (DTYPE *) mem_alloc(BLOCK_SIZE);
+ DTYPE *cache_TSSigma = (DTYPE *) mem_alloc(BLOCK_SIZE);
DTYPE *cache_dotprods = (DTYPE *) mem_alloc(BLOCK_SIZE);
// Create result structure pointer
@@ -108,41 +111,56 @@ int main_kernel1() {
DTYPE min_distance = DTYPE_MAX;
uint32_t min_index = 0;
-
- for(uint32_t i = myStartElem; i < myEndElem; i+= (BLOCK_SIZE / sizeof(DTYPE)))
- {
- for(uint32_t d = 0; d < DOTPIP; d++)
+ for (uint32_t i = myStartElem; i < myEndElem;
+ i += (BLOCK_SIZE / sizeof(DTYPE))) {
+ for (uint32_t d = 0; d < DOTPIP; d++)
cache_dotprods[d] = 0;
- current_mram_block_addr_TS = (uint32_t) starting_offset_ts + (i - myStartElem) * sizeof(DTYPE);
- current_mram_block_addr_query = (uint32_t) DPU_MRAM_HEAP_POINTER;
-
- for(uint32_t j = 0; j < (query_length) / (BLOCK_SIZE / sizeof(DTYPE)); j++)
- {
- mram_read((__mram_ptr void const *) current_mram_block_addr_TS, cache_TS, BLOCK_SIZE);
- mram_read((__mram_ptr void const *) current_mram_block_addr_TS + BLOCK_SIZE, cache_TS_aux, BLOCK_SIZE);
- mram_read((__mram_ptr void const *) current_mram_block_addr_query, cache_query, BLOCK_SIZE);
-
- current_mram_block_addr_TS += BLOCK_SIZE;
+ current_mram_block_addr_TS =
+ (uint32_t) starting_offset_ts + (i -
+ myStartElem) *
+ sizeof(DTYPE);
+ current_mram_block_addr_query =
+ (uint32_t) DPU_MRAM_HEAP_POINTER;
+
+ for (uint32_t j = 0;
+ j < (query_length) / (BLOCK_SIZE / sizeof(DTYPE)); j++) {
+ mram_read((__mram_ptr void const *)
+ current_mram_block_addr_TS, cache_TS,
+ BLOCK_SIZE);
+ mram_read((__mram_ptr void const *)
+ current_mram_block_addr_TS + BLOCK_SIZE,
+ cache_TS_aux, BLOCK_SIZE);
+ mram_read((__mram_ptr void const *)
+ current_mram_block_addr_query, cache_query,
+ BLOCK_SIZE);
+
+ current_mram_block_addr_TS += BLOCK_SIZE;
current_mram_block_addr_query += BLOCK_SIZE;
- dot_product(cache_TS, cache_TS_aux, cache_query, cache_dotprods);
+ dot_product(cache_TS, cache_TS_aux, cache_query,
+ cache_dotprods);
}
-
- mram_read((__mram_ptr void const *) current_mram_block_addr_TSMean, cache_TSMean, BLOCK_SIZE);
- mram_read((__mram_ptr void const *) current_mram_block_addr_TSSigma, cache_TSSigma, BLOCK_SIZE);
- current_mram_block_addr_TSMean += BLOCK_SIZE;
+ mram_read((__mram_ptr void const *)
+ current_mram_block_addr_TSMean, cache_TSMean,
+ BLOCK_SIZE);
+ mram_read((__mram_ptr void const *)
+ current_mram_block_addr_TSSigma, cache_TSSigma,
+ BLOCK_SIZE);
+ current_mram_block_addr_TSMean += BLOCK_SIZE;
current_mram_block_addr_TSSigma += BLOCK_SIZE;
- for (uint32_t k = 0; k < (BLOCK_SIZE / sizeof(DTYPE)); k++)
- {
- distance = 2 * ((DTYPE) query_length - (cache_dotprods[k] - (DTYPE) query_length * cache_TSMean[k]
- * query_mean) / (cache_TSSigma[k] * query_std));
-
- if(distance < min_distance)
- {
- min_distance = distance;
- min_index = i + k;
+ for (uint32_t k = 0; k < (BLOCK_SIZE / sizeof(DTYPE)); k++) {
+ distance =
+ 2 * ((DTYPE) query_length -
+ (cache_dotprods[k] -
+ (DTYPE) query_length * cache_TSMean[k]
+ * query_mean) / (cache_TSSigma[k] *
+ query_std));
+
+ if (distance < min_distance) {
+ min_distance = distance;
+ min_index = i + k;
}
}
}