summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorBirte Kristina Friesel <birte.friesel@uos.de>2024-07-10 13:02:22 +0200
committerBirte Kristina Friesel <birte.friesel@uos.de>2024-07-10 13:02:22 +0200
commit786494ca9c6d9d6704c02f0e91eed4004e436e78 (patch)
treeae738031a5a549520dbf4325a9d9d7a4df3c5105
parent3633e0409cb244a951a251047bd97a2e35986084 (diff)
GEMV: Support (variable) data types which do not align with pointer size
-rw-r--r--GEMV/baselines/cpu/Makefile3
-rw-r--r--GEMV/baselines/cpu/gemv_openmp.c53
-rw-r--r--GEMV/baselines/cpu/gemv_utils.h20
3 files changed, 47 insertions, 29 deletions
diff --git a/GEMV/baselines/cpu/Makefile b/GEMV/baselines/cpu/Makefile
index 5ddbb35..382fa96 100644
--- a/GEMV/baselines/cpu/Makefile
+++ b/GEMV/baselines/cpu/Makefile
@@ -1,5 +1,6 @@
NUMA ?= 0
FLAGS =
+TYPE ?= double
ifeq (${NUMA}, 1)
FLAGS += -lnuma
@@ -9,7 +10,7 @@ endif
all: gemv
gemv: gemv_openmp.c
- gcc -Wall -Wextra -pedantic -march=native -O2 -o gemv -fopenmp -DNUMA=${NUMA} gemv_openmp.c ${FLAGS}
+ gcc -Wall -Wextra -pedantic -march=native -O2 -o gemv -fopenmp -DNUMA=${NUMA} -DT=${TYPE} -DTYPE_${TYPE} gemv_openmp.c ${FLAGS}
gemv_O0: gemv_openmp.c
gcc -o gemv_O0 -fopenmp gemv_openmp.c
diff --git a/GEMV/baselines/cpu/gemv_openmp.c b/GEMV/baselines/cpu/gemv_openmp.c
index d42d9bb..870e06c 100644
--- a/GEMV/baselines/cpu/gemv_openmp.c
+++ b/GEMV/baselines/cpu/gemv_openmp.c
@@ -6,6 +6,10 @@
#include <numaif.h>
#include <numa.h>
+#ifndef T
+#define T double
+#endif
+
struct bitmask* bitmask_in;
struct bitmask* bitmask_out;
@@ -17,6 +21,9 @@ int numa_node_out = -1;
int numa_node_cpu = -1;
#endif
+#define XSTR(x) STR(x)
+#define STR(x) #x
+
#include "gemv_utils.h"
int main(int argc, char *argv[])
@@ -31,12 +38,14 @@ int main(int argc, char *argv[])
const size_t rows = 163840;
const size_t cols = 4096;
- double **A, *b, *x;
+ T **A, *b, *x;
#if NUMA
bitmask_in = numa_parse_nodestring(argv[1]);
bitmask_out = numa_parse_nodestring(argv[2]);
numa_node_cpu = atoi(argv[3]);
+#else
+ (void) argv;
#endif
#if NUMA
@@ -44,9 +53,9 @@ int main(int argc, char *argv[])
numa_set_membind(bitmask_out);
numa_free_nodemask(bitmask_out);
}
- b = (double*) numa_alloc(sizeof(double)*rows);
+ b = (T*) numa_alloc(sizeof(T)*rows);
#else
- b = (double*) malloc(sizeof(double)*rows);
+ b = (T*) malloc(sizeof(T)*rows);
#endif
#if NUMA
@@ -54,9 +63,9 @@ int main(int argc, char *argv[])
numa_set_membind(bitmask_in);
// no free yet, re-used in allocate_dense
}
- x = (double*) numa_alloc(sizeof(double)*cols);
+ x = (T*) numa_alloc(sizeof(T)*cols);
#else
- x = (double*) malloc(sizeof(double)*cols);
+ x = (T*) malloc(sizeof(T)*cols);
#endif
allocate_dense(rows, cols, &A);
@@ -108,12 +117,12 @@ int main(int argc, char *argv[])
{
#pragma omp for
for (size_t i = 0; i < cols; i++) {
- x[i] = (double) i+1 ;
+ x[i] = (T) i+1 ;
}
#pragma omp for
for (size_t i = 0; i < rows; i++) {
- b[i] = (double) 0.0;
+ b[i] = (T) 0;
}
}
@@ -130,11 +139,11 @@ int main(int argc, char *argv[])
" numa_node_in=%d numa_node_out=%d numa_node_cpu=%d numa_distance_in_cpu=%d numa_distance_cpu_out=%d"
#endif
" | throughput_MBps=%f",
- nr_threads, "double", rows * cols,
+ nr_threads, STR(T), rows * cols,
#if NUMA
numa_node_in, numa_node_out, numa_node_cpu, numa_distance(numa_node_in, numa_node_cpu), numa_distance(numa_node_cpu, numa_node_out),
#endif
- rows * cols * sizeof(double) / timer.time[0]);
+ rows * cols * sizeof(T) / timer.time[0]);
printf(" throughput_MOpps=%f",
rows * cols / timer.time[0]);
printall(&timer, 0);
@@ -147,13 +156,17 @@ int main(int argc, char *argv[])
print_vec(b, rows);
#endif
+#if TYPE_double || TYPE_float
printf("sum(x) = %f, sum(Ax) = %f\n", sum_vec(x,cols), sum_vec(b,rows));
+#else
+ printf("sum(x) = %d, sum(Ax) = %d\n", sum_vec(x,cols), sum_vec(b,rows));
+#endif
#if NUMA
- numa_free(b, sizeof(double)*rows);
- numa_free(x, sizeof(double)*cols);
- numa_free(*A, sizeof(double)*rows*cols);
- numa_free(A, sizeof(double)*rows);
+ numa_free(b, sizeof(T)*rows);
+ numa_free(x, sizeof(T)*cols);
+ numa_free(*A, sizeof(T)*rows*cols);
+ numa_free(A, sizeof(void*)*rows);
#else
free(b);
free(x);
@@ -164,7 +177,7 @@ int main(int argc, char *argv[])
return 0;
}
-void gemv(double** A, double* x, size_t rows, size_t cols, double** b) {
+void gemv(T** A, T* x, size_t rows, size_t cols, T** b) {
#pragma omp parallel for
for (size_t i = 0; i < rows; i ++ )
for (size_t j = 0; j < cols; j ++ ) {
@@ -172,17 +185,21 @@ void gemv(double** A, double* x, size_t rows, size_t cols, double** b) {
}
}
-void make_hilbert_mat(size_t rows, size_t cols, double*** A) {
+void make_hilbert_mat(size_t rows, size_t cols, T*** A) {
#pragma omp parallel for
for (size_t i = 0; i < rows; i++) {
for (size_t j = 0; j < cols; j++) {
- (*A)[i][j] = 1.0/( (double) i + (double) j + 1.0);
+#if TYPE_double || TYPE_float
+ (*A)[i][j] = 1.0/( (T) i + (T) j + 1.0);
+#else
+ (*A)[i][j] = (T)(((i+j)%10));
+#endif
}
}
}
-double sum_vec(double* vec, size_t rows) {
- double sum = 0.0;
+T sum_vec(T* vec, size_t rows) {
+ T sum = 0;
#pragma omp parallel for reduction(+:sum)
for (int i = 0; i < rows; i++) sum = sum + vec[i];
return sum;
diff --git a/GEMV/baselines/cpu/gemv_utils.h b/GEMV/baselines/cpu/gemv_utils.h
index c99763a..30e4701 100644
--- a/GEMV/baselines/cpu/gemv_utils.h
+++ b/GEMV/baselines/cpu/gemv_utils.h
@@ -1,15 +1,15 @@
-void allocate_dense(size_t rows,size_t cols, double*** dense) {
+void allocate_dense(size_t rows,size_t cols, T*** dense) {
#if NUMA
if (bitmask_in) {
numa_set_membind(bitmask_in);
numa_free_nodemask(bitmask_in);
}
- *dense = numa_alloc(sizeof(double)*rows);
- **dense = numa_alloc(sizeof(double)*rows*cols);
+ *dense = numa_alloc(sizeof(void*)*rows);
+ **dense = numa_alloc(sizeof(T)*rows*cols);
#else
- *dense = malloc(sizeof(double)*rows);
- **dense = malloc(sizeof(double)*rows*cols);
+ *dense = malloc(sizeof(void*)*rows);
+ **dense = malloc(sizeof(T)*rows*cols);
#endif
for (size_t i=0; i < rows; i++ ) {
@@ -18,7 +18,7 @@ void allocate_dense(size_t rows,size_t cols, double*** dense) {
}
-void print_mat(double** A, size_t rows, size_t cols) {
+void print_mat(T** A, size_t rows, size_t cols) {
for (size_t i = 0; i < rows; i++) {
for (size_t j = 0; j < cols; j++) {
printf("%f ", A[i][j]);
@@ -27,12 +27,12 @@ void print_mat(double** A, size_t rows, size_t cols) {
}
}
-void print_vec(double* b, size_t rows) {
+void print_vec(T* b, size_t rows) {
for (size_t i = 0; i < rows; i++) {
printf("%f\n", b[i]);
}
}
-void gemv(double** A, double* x, size_t rows, size_t cols, double** b);
-void make_hilbert_mat(size_t rows, size_t cols, double*** A);
-double sum_vec(double* vec, size_t rows);
+void gemv(T** A, T* x, size_t rows, size_t cols, T** b);
+void make_hilbert_mat(size_t rows, size_t cols, T*** A);
+T sum_vec(T* vec, size_t rows);