summaryrefslogtreecommitdiff
path: root/SpMV/baselines/cpu/app.c
blob: e33761f9bdd5cce50f83723b6c5bcf6980ffaaf0 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75

#include <assert.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#include <stdint.h>

#include <omp.h>

#include "../../support/matrix.h"
#include "../../support/params.h"
#include "../../support/timer.h"
#include "../../support/utils.h"

int main(int argc, char **argv)
{

	// Process parameters
	struct Params p = input_params(argc, argv);

	// Initialize SpMV data structures
	PRINT_INFO(p.verbosity >= 1, "Reading matrix %s", p.fileName);
	struct COOMatrix cooMatrix = readCOOMatrix(p.fileName);
	PRINT_INFO(p.verbosity >= 1, "    %u rows, %u columns, %u nonzeros",
		   cooMatrix.numRows, cooMatrix.numCols, cooMatrix.numNonzeros);
	struct CSRMatrix csrMatrix = coo2csr(cooMatrix);
	float *inVector = malloc(csrMatrix.numCols * sizeof(float));
	float *outVector = malloc(csrMatrix.numRows * sizeof(float));
	initVector(inVector, csrMatrix.numCols);

	// Calculating result on CPU
	PRINT_INFO(p.verbosity >= 1, "Calculating result on CPU");
	//omp_set_num_threads(4);
	Timer timer;
	startTimer(&timer);
#pragma omp parallel for
	for (uint32_t rowIdx = 0; rowIdx < csrMatrix.numRows; ++rowIdx) {
		float sum = 0.0f;
		for (uint32_t i = csrMatrix.rowPtrs[rowIdx];
		     i < csrMatrix.rowPtrs[rowIdx + 1]; ++i) {
			uint32_t colIdx = csrMatrix.nonzeros[i].col;
			float value = csrMatrix.nonzeros[i].value;
			sum += inVector[colIdx] * value;
		}
		outVector[rowIdx] = sum;
	}
	stopTimer(&timer);

	unsigned int nr_threads = 0;
#pragma omp parallel
#pragma omp atomic
	nr_threads++;

	// coomatrix / csrmatrix use uint32_t indexes and float values
	printf("[::] SpMV CPU | n_threads=%u e_type=float n_elements=%u |"
	       " throughput_MBps=%f throughput_MOpps=%f timer0_us=%f\n",
	       nr_threads, csrMatrix.numNonzeros,
	       csrMatrix.numNonzeros * sizeof(float) / (getElapsedTime(timer) *
							1e6),
	       csrMatrix.numNonzeros / (getElapsedTime(timer) * 1e6),
	       getElapsedTime(timer) * 1e6);
	//if(p.verbosity == 0) PRINT("%f", getElapsedTime(timer)*1e3);
	PRINT_INFO(p.verbosity >= 1, "    Elapsed time: %f ms",
		   getElapsedTime(timer) * 1e3);

	// Deallocate data structures
	freeCOOMatrix(cooMatrix);
	freeCSRMatrix(csrMatrix);
	free(inVector);
	free(outVector);

	return 0;

}