summaryrefslogtreecommitdiff
path: root/SpMV/baselines/cpu/app.c
diff options
context:
space:
mode:
Diffstat (limited to 'SpMV/baselines/cpu/app.c')
-rw-r--r--SpMV/baselines/cpu/app.c95
1 files changed, 49 insertions, 46 deletions
diff --git a/SpMV/baselines/cpu/app.c b/SpMV/baselines/cpu/app.c
index 8d360ee..e33761f 100644
--- a/SpMV/baselines/cpu/app.c
+++ b/SpMV/baselines/cpu/app.c
@@ -13,60 +13,63 @@
#include "../../support/timer.h"
#include "../../support/utils.h"
-int main(int argc, char** argv) {
+int main(int argc, char **argv)
+{
- // Process parameters
- struct Params p = input_params(argc, argv);
+ // Process parameters
+ struct Params p = input_params(argc, argv);
- // Initialize SpMV data structures
- PRINT_INFO(p.verbosity >= 1, "Reading matrix %s", p.fileName);
- struct COOMatrix cooMatrix = readCOOMatrix(p.fileName);
- PRINT_INFO(p.verbosity >= 1, " %u rows, %u columns, %u nonzeros", cooMatrix.numRows, cooMatrix.numCols, cooMatrix.numNonzeros);
- struct CSRMatrix csrMatrix = coo2csr(cooMatrix);
- float* inVector = malloc(csrMatrix.numCols*sizeof(float));
- float* outVector = malloc(csrMatrix.numRows*sizeof(float));
- initVector(inVector, csrMatrix.numCols);
+ // Initialize SpMV data structures
+ PRINT_INFO(p.verbosity >= 1, "Reading matrix %s", p.fileName);
+ struct COOMatrix cooMatrix = readCOOMatrix(p.fileName);
+ PRINT_INFO(p.verbosity >= 1, " %u rows, %u columns, %u nonzeros",
+ cooMatrix.numRows, cooMatrix.numCols, cooMatrix.numNonzeros);
+ struct CSRMatrix csrMatrix = coo2csr(cooMatrix);
+ float *inVector = malloc(csrMatrix.numCols * sizeof(float));
+ float *outVector = malloc(csrMatrix.numRows * sizeof(float));
+ initVector(inVector, csrMatrix.numCols);
- // Calculating result on CPU
- PRINT_INFO(p.verbosity >= 1, "Calculating result on CPU");
- //omp_set_num_threads(4);
- Timer timer;
- startTimer(&timer);
- #pragma omp parallel for
- for(uint32_t rowIdx = 0; rowIdx < csrMatrix.numRows; ++rowIdx) {
- float sum = 0.0f;
- for(uint32_t i = csrMatrix.rowPtrs[rowIdx]; i < csrMatrix.rowPtrs[rowIdx + 1]; ++i) {
- uint32_t colIdx = csrMatrix.nonzeros[i].col;
- float value = csrMatrix.nonzeros[i].value;
- sum += inVector[colIdx]*value;
- }
- outVector[rowIdx] = sum;
- }
- stopTimer(&timer);
+ // Calculating result on CPU
+ PRINT_INFO(p.verbosity >= 1, "Calculating result on CPU");
+ //omp_set_num_threads(4);
+ Timer timer;
+ startTimer(&timer);
+#pragma omp parallel for
+ for (uint32_t rowIdx = 0; rowIdx < csrMatrix.numRows; ++rowIdx) {
+ float sum = 0.0f;
+ for (uint32_t i = csrMatrix.rowPtrs[rowIdx];
+ i < csrMatrix.rowPtrs[rowIdx + 1]; ++i) {
+ uint32_t colIdx = csrMatrix.nonzeros[i].col;
+ float value = csrMatrix.nonzeros[i].value;
+ sum += inVector[colIdx] * value;
+ }
+ outVector[rowIdx] = sum;
+ }
+ stopTimer(&timer);
-
- unsigned int nr_threads = 0;
+ unsigned int nr_threads = 0;
#pragma omp parallel
#pragma omp atomic
- nr_threads++;
-
+ nr_threads++;
- // coomatrix / csrmatrix use uint32_t indexes and float values
- printf("[::] SpMV CPU | n_threads=%u e_type=float n_elements=%u |"
- " throughput_MBps=%f throughput_MOpps=%f timer0_us=%f\n",
- nr_threads, csrMatrix.numNonzeros,
- csrMatrix.numNonzeros * sizeof(float) / (getElapsedTime(timer)*1e6),
- csrMatrix.numNonzeros / (getElapsedTime(timer)*1e6),
- getElapsedTime(timer)*1e6);
- //if(p.verbosity == 0) PRINT("%f", getElapsedTime(timer)*1e3);
- PRINT_INFO(p.verbosity >= 1, " Elapsed time: %f ms", getElapsedTime(timer)*1e3);
+ // coomatrix / csrmatrix use uint32_t indexes and float values
+ printf("[::] SpMV CPU | n_threads=%u e_type=float n_elements=%u |"
+ " throughput_MBps=%f throughput_MOpps=%f timer0_us=%f\n",
+ nr_threads, csrMatrix.numNonzeros,
+ csrMatrix.numNonzeros * sizeof(float) / (getElapsedTime(timer) *
+ 1e6),
+ csrMatrix.numNonzeros / (getElapsedTime(timer) * 1e6),
+ getElapsedTime(timer) * 1e6);
+ //if(p.verbosity == 0) PRINT("%f", getElapsedTime(timer)*1e3);
+ PRINT_INFO(p.verbosity >= 1, " Elapsed time: %f ms",
+ getElapsedTime(timer) * 1e3);
- // Deallocate data structures
- freeCOOMatrix(cooMatrix);
- freeCSRMatrix(csrMatrix);
- free(inVector);
- free(outVector);
+ // Deallocate data structures
+ freeCOOMatrix(cooMatrix);
+ freeCSRMatrix(csrMatrix);
+ free(inVector);
+ free(outVector);
- return 0;
+ return 0;
}