diff options
Diffstat (limited to 'SpMV/baselines/cpu/app.c')
-rw-r--r-- | SpMV/baselines/cpu/app.c | 95 |
1 files changed, 49 insertions, 46 deletions
diff --git a/SpMV/baselines/cpu/app.c b/SpMV/baselines/cpu/app.c index 8d360ee..e33761f 100644 --- a/SpMV/baselines/cpu/app.c +++ b/SpMV/baselines/cpu/app.c @@ -13,60 +13,63 @@ #include "../../support/timer.h" #include "../../support/utils.h" -int main(int argc, char** argv) { +int main(int argc, char **argv) +{ - // Process parameters - struct Params p = input_params(argc, argv); + // Process parameters + struct Params p = input_params(argc, argv); - // Initialize SpMV data structures - PRINT_INFO(p.verbosity >= 1, "Reading matrix %s", p.fileName); - struct COOMatrix cooMatrix = readCOOMatrix(p.fileName); - PRINT_INFO(p.verbosity >= 1, " %u rows, %u columns, %u nonzeros", cooMatrix.numRows, cooMatrix.numCols, cooMatrix.numNonzeros); - struct CSRMatrix csrMatrix = coo2csr(cooMatrix); - float* inVector = malloc(csrMatrix.numCols*sizeof(float)); - float* outVector = malloc(csrMatrix.numRows*sizeof(float)); - initVector(inVector, csrMatrix.numCols); + // Initialize SpMV data structures + PRINT_INFO(p.verbosity >= 1, "Reading matrix %s", p.fileName); + struct COOMatrix cooMatrix = readCOOMatrix(p.fileName); + PRINT_INFO(p.verbosity >= 1, " %u rows, %u columns, %u nonzeros", + cooMatrix.numRows, cooMatrix.numCols, cooMatrix.numNonzeros); + struct CSRMatrix csrMatrix = coo2csr(cooMatrix); + float *inVector = malloc(csrMatrix.numCols * sizeof(float)); + float *outVector = malloc(csrMatrix.numRows * sizeof(float)); + initVector(inVector, csrMatrix.numCols); - // Calculating result on CPU - PRINT_INFO(p.verbosity >= 1, "Calculating result on CPU"); - //omp_set_num_threads(4); - Timer timer; - startTimer(&timer); - #pragma omp parallel for - for(uint32_t rowIdx = 0; rowIdx < csrMatrix.numRows; ++rowIdx) { - float sum = 0.0f; - for(uint32_t i = csrMatrix.rowPtrs[rowIdx]; i < csrMatrix.rowPtrs[rowIdx + 1]; ++i) { - uint32_t colIdx = csrMatrix.nonzeros[i].col; - float value = csrMatrix.nonzeros[i].value; - sum += inVector[colIdx]*value; - } - outVector[rowIdx] = sum; - } - stopTimer(&timer); + // Calculating result on CPU + PRINT_INFO(p.verbosity >= 1, "Calculating result on CPU"); + //omp_set_num_threads(4); + Timer timer; + startTimer(&timer); +#pragma omp parallel for + for (uint32_t rowIdx = 0; rowIdx < csrMatrix.numRows; ++rowIdx) { + float sum = 0.0f; + for (uint32_t i = csrMatrix.rowPtrs[rowIdx]; + i < csrMatrix.rowPtrs[rowIdx + 1]; ++i) { + uint32_t colIdx = csrMatrix.nonzeros[i].col; + float value = csrMatrix.nonzeros[i].value; + sum += inVector[colIdx] * value; + } + outVector[rowIdx] = sum; + } + stopTimer(&timer); - - unsigned int nr_threads = 0; + unsigned int nr_threads = 0; #pragma omp parallel #pragma omp atomic - nr_threads++; - + nr_threads++; - // coomatrix / csrmatrix use uint32_t indexes and float values - printf("[::] SpMV CPU | n_threads=%u e_type=float n_elements=%u |" - " throughput_MBps=%f throughput_MOpps=%f timer0_us=%f\n", - nr_threads, csrMatrix.numNonzeros, - csrMatrix.numNonzeros * sizeof(float) / (getElapsedTime(timer)*1e6), - csrMatrix.numNonzeros / (getElapsedTime(timer)*1e6), - getElapsedTime(timer)*1e6); - //if(p.verbosity == 0) PRINT("%f", getElapsedTime(timer)*1e3); - PRINT_INFO(p.verbosity >= 1, " Elapsed time: %f ms", getElapsedTime(timer)*1e3); + // coomatrix / csrmatrix use uint32_t indexes and float values + printf("[::] SpMV CPU | n_threads=%u e_type=float n_elements=%u |" + " throughput_MBps=%f throughput_MOpps=%f timer0_us=%f\n", + nr_threads, csrMatrix.numNonzeros, + csrMatrix.numNonzeros * sizeof(float) / (getElapsedTime(timer) * + 1e6), + csrMatrix.numNonzeros / (getElapsedTime(timer) * 1e6), + getElapsedTime(timer) * 1e6); + //if(p.verbosity == 0) PRINT("%f", getElapsedTime(timer)*1e3); + PRINT_INFO(p.verbosity >= 1, " Elapsed time: %f ms", + getElapsedTime(timer) * 1e3); - // Deallocate data structures - freeCOOMatrix(cooMatrix); - freeCSRMatrix(csrMatrix); - free(inVector); - free(outVector); + // Deallocate data structures + freeCOOMatrix(cooMatrix); + freeCSRMatrix(csrMatrix); + free(inVector); + free(outVector); - return 0; + return 0; } |