GEMV: Support (variable) data types which do not align with pointer size

author: Birte Kristina Friesel <birte.friesel@uos.de> 2024-07-10 13:02:22 +0200
committer: Birte Kristina Friesel <birte.friesel@uos.de> 2024-07-10 13:02:22 +0200
commit: 786494ca9c6d9d6704c02f0e91eed4004e436e78 (patch)
tree: ae738031a5a549520dbf4325a9d9d7a4df3c5105
parent: 3633e0409cb244a951a251047bd97a2e35986084 (diff)
3 files changed, 47 insertions, 29 deletions
diff --git a/GEMV/baselines/cpu/Makefile b/GEMV/baselines/cpu/Makefile
index 5ddbb35..382fa96 100644
--- a/GEMV/baselines/cpu/Makefile
+++ b/GEMV/baselines/cpu/Makefile
@@ -1,5 +1,6 @@
 NUMA ?= 0
 FLAGS =
+TYPE ?= double
 
 ifeq (${NUMA}, 1)
 	FLAGS += -lnuma
@@ -9,7 +10,7 @@ endif
 all: gemv
 
 gemv: gemv_openmp.c
-	gcc -Wall -Wextra -pedantic -march=native -O2 -o gemv -fopenmp -DNUMA=${NUMA} gemv_openmp.c ${FLAGS}
+	gcc -Wall -Wextra -pedantic -march=native -O2 -o gemv -fopenmp -DNUMA=${NUMA} -DT=${TYPE} -DTYPE_${TYPE} gemv_openmp.c ${FLAGS}
 
 gemv_O0: gemv_openmp.c
 	gcc -o gemv_O0 -fopenmp gemv_openmp.c
diff --git a/GEMV/baselines/cpu/gemv_openmp.c b/GEMV/baselines/cpu/gemv_openmp.c
index d42d9bb..870e06c 100644
--- a/GEMV/baselines/cpu/gemv_openmp.c
+++ b/GEMV/baselines/cpu/gemv_openmp.c
@@ -6,6 +6,10 @@
 #include <numaif.h>
 #include <numa.h>
 
+#ifndef T
+#define T double
+#endif
+
 struct bitmask* bitmask_in;
 struct bitmask* bitmask_out;
 
@@ -17,6 +21,9 @@ int numa_node_out = -1;
 int numa_node_cpu = -1;
 #endif
 
+#define XSTR(x) STR(x)
+#define STR(x) #x
+
 #include "gemv_utils.h"
 
 int main(int argc, char *argv[])
@@ -31,12 +38,14 @@ int main(int argc, char *argv[])
     const size_t rows = 163840;
     const size_t cols = 4096;
 
-    double **A, *b, *x;
+    T **A, *b, *x;
 
 #if NUMA
     bitmask_in    = numa_parse_nodestring(argv[1]);
     bitmask_out   = numa_parse_nodestring(argv[2]);
     numa_node_cpu = atoi(argv[3]);
+#else
+    (void) argv;
 #endif
 
 #if NUMA
@@ -44,9 +53,9 @@ int main(int argc, char *argv[])
         numa_set_membind(bitmask_out);
         numa_free_nodemask(bitmask_out);
     }
-    b = (double*) numa_alloc(sizeof(double)*rows);
+    b = (T*) numa_alloc(sizeof(T)*rows);
 #else
-    b = (double*) malloc(sizeof(double)*rows);
+    b = (T*) malloc(sizeof(T)*rows);
 #endif
 
 #if NUMA
@@ -54,9 +63,9 @@ int main(int argc, char *argv[])
         numa_set_membind(bitmask_in);
         // no free yet, re-used in allocate_dense
     }
-    x = (double*) numa_alloc(sizeof(double)*cols);
+    x = (T*) numa_alloc(sizeof(T)*cols);
 #else
-    x = (double*) malloc(sizeof(double)*cols);
+    x = (T*) malloc(sizeof(T)*cols);
 #endif
 
     allocate_dense(rows, cols, &A);
@@ -108,12 +117,12 @@ int main(int argc, char *argv[])
         {
 #pragma omp for
         for (size_t i = 0; i < cols; i++) {
-          x[i] = (double) i+1 ;
+          x[i] = (T) i+1 ;
         }
 
 #pragma omp for
         for (size_t i = 0; i < rows; i++) {
-          b[i] = (double) 0.0;
+          b[i] = (T) 0;
         }
         }
 
@@ -130,11 +139,11 @@ int main(int argc, char *argv[])
             " numa_node_in=%d numa_node_out=%d numa_node_cpu=%d numa_distance_in_cpu=%d numa_distance_cpu_out=%d"
 #endif
             " | throughput_MBps=%f",
-            nr_threads, "double", rows * cols,
+            nr_threads, STR(T), rows * cols,
 #if NUMA
             numa_node_in, numa_node_out, numa_node_cpu, numa_distance(numa_node_in, numa_node_cpu), numa_distance(numa_node_cpu, numa_node_out),
 #endif
-            rows * cols * sizeof(double) / timer.time[0]);
+            rows * cols * sizeof(T) / timer.time[0]);
         printf(" throughput_MOpps=%f",
             rows * cols / timer.time[0]);
         printall(&timer, 0);
@@ -147,13 +156,17 @@ int main(int argc, char *argv[])
   print_vec(b, rows);
 #endif
 
+#if TYPE_double || TYPE_float
   printf("sum(x) = %f, sum(Ax) = %f\n", sum_vec(x,cols), sum_vec(b,rows));
+#else
+  printf("sum(x) = %d, sum(Ax) = %d\n", sum_vec(x,cols), sum_vec(b,rows));
+#endif
 
 #if NUMA
-  numa_free(b, sizeof(double)*rows);
-  numa_free(x, sizeof(double)*cols);
-  numa_free(*A, sizeof(double)*rows*cols);
-  numa_free(A, sizeof(double)*rows);
+  numa_free(b, sizeof(T)*rows);
+  numa_free(x, sizeof(T)*cols);
+  numa_free(*A, sizeof(T)*rows*cols);
+  numa_free(A, sizeof(void*)*rows);
 #else
   free(b);
   free(x);
@@ -164,7 +177,7 @@ int main(int argc, char *argv[])
   return 0;
 }
 
-void gemv(double** A, double* x, size_t rows, size_t cols, double** b) {
+void gemv(T** A, T* x, size_t rows, size_t cols, T** b) {
 #pragma omp parallel for
   for (size_t i = 0; i < rows; i ++ )
   for (size_t j = 0; j < cols; j ++ ) {
@@ -172,17 +185,21 @@ void gemv(double** A, double* x, size_t rows, size_t cols, double** b) {
   }
 }
 
-void make_hilbert_mat(size_t rows, size_t cols, double*** A) {
+void make_hilbert_mat(size_t rows, size_t cols, T*** A) {
 #pragma omp parallel for
   for (size_t i = 0; i < rows; i++) {
     for (size_t j = 0; j < cols; j++) {
-      (*A)[i][j] = 1.0/( (double) i + (double) j + 1.0);
+#if TYPE_double || TYPE_float
+      (*A)[i][j] = 1.0/( (T) i + (T) j + 1.0);
+#else
+      (*A)[i][j] = (T)(((i+j)%10));
+#endif
     }
   }
 }
 
-double sum_vec(double* vec, size_t rows) {
-  double sum = 0.0;
+T sum_vec(T* vec, size_t rows) {
+  T sum = 0;
 #pragma omp parallel for reduction(+:sum)
   for (int i = 0; i < rows; i++) sum = sum + vec[i];
   return sum;
diff --git a/GEMV/baselines/cpu/gemv_utils.h b/GEMV/baselines/cpu/gemv_utils.h
index c99763a..30e4701 100644
--- a/GEMV/baselines/cpu/gemv_utils.h
+++ b/GEMV/baselines/cpu/gemv_utils.h
@@ -1,15 +1,15 @@
-void allocate_dense(size_t rows,size_t  cols, double*** dense) {
+void allocate_dense(size_t rows,size_t  cols, T*** dense) {
 
 #if NUMA
     if (bitmask_in) {
         numa_set_membind(bitmask_in);
         numa_free_nodemask(bitmask_in);
     }
-  *dense = numa_alloc(sizeof(double)*rows);
-  **dense = numa_alloc(sizeof(double)*rows*cols);
+  *dense = numa_alloc(sizeof(void*)*rows);
+  **dense = numa_alloc(sizeof(T)*rows*cols);
 #else
-  *dense = malloc(sizeof(double)*rows);
-  **dense = malloc(sizeof(double)*rows*cols);
+  *dense = malloc(sizeof(void*)*rows);
+  **dense = malloc(sizeof(T)*rows*cols);
 #endif
 
   for (size_t i=0; i < rows; i++ ) {
@@ -18,7 +18,7 @@ void allocate_dense(size_t rows,size_t  cols, double*** dense) {
 
 }
 
-void print_mat(double** A, size_t rows, size_t cols) {
+void print_mat(T** A, size_t rows, size_t cols) {
   for (size_t i = 0; i < rows; i++) {
     for (size_t j = 0; j < cols; j++) {
       printf("%f ", A[i][j]);
@@ -27,12 +27,12 @@ void print_mat(double** A, size_t rows, size_t cols) {
   }
 }
 
-void print_vec(double* b, size_t rows) {
+void print_vec(T* b, size_t rows) {
   for (size_t i = 0; i < rows; i++) {
     printf("%f\n", b[i]);
   }
 }
 
-void gemv(double** A, double* x, size_t rows, size_t cols, double** b);
-void make_hilbert_mat(size_t rows, size_t cols, double*** A);
-double sum_vec(double* vec, size_t rows);
+void gemv(T** A, T* x, size_t rows, size_t cols, T** b);
+void make_hilbert_mat(size_t rows, size_t cols, T*** A);
+T sum_vec(T* vec, size_t rows);
author	Birte Kristina Friesel <birte.friesel@uos.de>	2024-07-10 13:02:22 +0200
committer	Birte Kristina Friesel <birte.friesel@uos.de>	2024-07-10 13:02:22 +0200
commit	786494ca9c6d9d6704c02f0e91eed4004e436e78 (patch)
tree	ae738031a5a549520dbf4325a9d9d7a4df3c5105
parent	3633e0409cb244a951a251047bd97a2e35986084 (diff)