GEMV: indent -linux

author: Birte Kristina Friesel <birte.friesel@uos.de> 2025-01-16 08:18:46 +0100
committer: Birte Kristina Friesel <birte.friesel@uos.de> 2025-01-16 08:18:46 +0100
commit: d83ae32594f5407c71a67f1b913fe9893f30015e (patch)
tree: d7b5a37778811ce6847a55967c467506905ae639 /GEMV/baselines
parent: 8e71c657efe8c10648849a7dc31c1689bbc7882d (diff)
1 files changed, 209 insertions, 201 deletions
diff --git a/GEMV/baselines/cpu/gemv_openmp.c b/GEMV/baselines/cpu/gemv_openmp.c
index 21e24cb..99bba55 100644
--- a/GEMV/baselines/cpu/gemv_openmp.c
+++ b/GEMV/baselines/cpu/gemv_openmp.c
@@ -10,10 +10,10 @@
 #include <numaif.h>
 #include <numa.h>
 
-struct bitmask* bitmask_in;
-struct bitmask* bitmask_out;
+struct bitmask *bitmask_in;
+struct bitmask *bitmask_out;
 
-void* mp_pages[1];
+void *mp_pages[1];
 int mp_status[1];
 int mp_nodes[1];
 int numa_node_in = -1;
@@ -22,7 +22,7 @@ int numa_node_cpu = -1;
 #endif
 
 #if NUMA_MEMCPY
-struct bitmask* bitmask_cpu;
+struct bitmask *bitmask_cpu;
 int numa_node_cpu_memcpy = -1;
 int numa_node_local = -1;
 int numa_node_in_is_local = 0;
@@ -35,284 +35,292 @@ int numa_node_in_is_local = 0;
 
 int main(int argc, char *argv[])
 {
-    (void) argc;
+	(void)argc;
 /*  // upstream config:
     const size_t rows = 20480;
     const size_t cols = 8192;
 */
 
-    // DPU config: 163840 -n 4096
-    const size_t rows = 163840;
-    const size_t cols = 4096;
+	// DPU config: 163840 -n 4096
+	const size_t rows = 163840;
+	const size_t cols = 4096;
 
-    T **A, *b, *x;
+	T **A, *b, *x;
 
-    T **A_local, *x_local;
+	T **A_local, *x_local;
 
 #if NUMA
-    bitmask_in    = numa_parse_nodestring(argv[1]);
-    bitmask_out   = numa_parse_nodestring(argv[2]);
-    numa_node_cpu = atoi(argv[3]);
+	bitmask_in = numa_parse_nodestring(argv[1]);
+	bitmask_out = numa_parse_nodestring(argv[2]);
+	numa_node_cpu = atoi(argv[3]);
 #if NUMA_MEMCPY
-    bitmask_cpu   = numa_parse_nodestring(argv[4]);
-    numa_node_cpu_memcpy = atoi(argv[5]);
-#endif // NUMA_MEMCPY
+	bitmask_cpu = numa_parse_nodestring(argv[4]);
+	numa_node_cpu_memcpy = atoi(argv[5]);
+#endif				// NUMA_MEMCPY
 #else
-    (void) argv;
-#endif // NUMA
+	(void)argv;
+#endif				// NUMA
 
 #if NUMA
-    if (bitmask_out) {
-        numa_set_membind(bitmask_out);
-        numa_free_nodemask(bitmask_out);
-    }
-    b = (T*) numa_alloc(sizeof(T)*rows);
+	if (bitmask_out) {
+		numa_set_membind(bitmask_out);
+		numa_free_nodemask(bitmask_out);
+	}
+	b = (T *) numa_alloc(sizeof(T) * rows);
 #else
-    b = (T*) malloc(sizeof(T)*rows);
+	b = (T *) malloc(sizeof(T) * rows);
 #endif
 
 #if NUMA
-    if (bitmask_in) {
-        numa_set_membind(bitmask_in);
-        // no free yet, re-used in allocate_dense
-    }
-    x = (T*) numa_alloc(sizeof(T)*cols);
+	if (bitmask_in) {
+		numa_set_membind(bitmask_in);
+		// no free yet, re-used in allocate_dense
+	}
+	x = (T *) numa_alloc(sizeof(T) * cols);
 #else
-    x = (T*) malloc(sizeof(T)*cols);
+	x = (T *) malloc(sizeof(T) * cols);
 #endif
 
-    allocate_dense(rows, cols, &A);
+	allocate_dense(rows, cols, &A);
 
 #if NUMA
-    if (bitmask_in) {
-        numa_free_nodemask(bitmask_in);
-    }
+	if (bitmask_in) {
+		numa_free_nodemask(bitmask_in);
+	}
 #endif
 
-    make_hilbert_mat(rows,cols, &A);
+	make_hilbert_mat(rows, cols, &A);
 
 #if NUMA
 #if NUMA_MEMCPY
-    if (bitmask_cpu) {
-        numa_set_membind(bitmask_cpu);
-        numa_free_nodemask(bitmask_cpu);
-    }
+	if (bitmask_cpu) {
+		numa_set_membind(bitmask_cpu);
+		numa_free_nodemask(bitmask_cpu);
+	}
 #else
-    struct bitmask *bitmask_all = numa_allocate_nodemask();
-    numa_bitmask_setall(bitmask_all);
-    numa_set_membind(bitmask_all);
-    numa_free_nodemask(bitmask_all);
-#endif // NUMA_MEMCPY
-#endif // NUMA
+	struct bitmask *bitmask_all = numa_allocate_nodemask();
+	numa_bitmask_setall(bitmask_all);
+	numa_set_membind(bitmask_all);
+	numa_free_nodemask(bitmask_all);
+#endif				// NUMA_MEMCPY
+#endif				// NUMA
 
-    A_local = A;
-    x_local = x;
+	A_local = A;
+	x_local = x;
 
 #if NUMA
-    mp_pages[0] = A;
-    if (move_pages(0, 1, mp_pages, NULL, mp_status, 0) == -1) {
-        perror("move_pages(A)");
-    }
-    else if (mp_status[0] < 0) {
-        printf("move_pages(A) error: %d", mp_status[0]);
-    }
-    else {
-        numa_node_in = mp_status[0];
-    }
-
-    mp_pages[0] = b;
-    if (move_pages(0, 1, mp_pages, NULL, mp_status, 0) == -1) {
-        perror("move_pages(b)");
-    }
-    else if (mp_status[0] < 0) {
-        printf("move_pages(b) error: %d", mp_status[0]);
-    }
-    else {
-        numa_node_out = mp_status[0];
-    }
-
-    if (numa_node_cpu != -1) {
-        if (numa_run_on_node(numa_node_cpu) == -1) {
-            perror("numa_run_on_node");
-            numa_node_cpu = -1;
-        }
-    }
+	mp_pages[0] = A;
+	if (move_pages(0, 1, mp_pages, NULL, mp_status, 0) == -1) {
+		perror("move_pages(A)");
+	} else if (mp_status[0] < 0) {
+		printf("move_pages(A) error: %d", mp_status[0]);
+	} else {
+		numa_node_in = mp_status[0];
+	}
+
+	mp_pages[0] = b;
+	if (move_pages(0, 1, mp_pages, NULL, mp_status, 0) == -1) {
+		perror("move_pages(b)");
+	} else if (mp_status[0] < 0) {
+		printf("move_pages(b) error: %d", mp_status[0]);
+	} else {
+		numa_node_out = mp_status[0];
+	}
+
+	if (numa_node_cpu != -1) {
+		if (numa_run_on_node(numa_node_cpu) == -1) {
+			perror("numa_run_on_node");
+			numa_node_cpu = -1;
+		}
+	}
 #endif
 
 #if NUMA_MEMCPY
-    numa_node_in_is_local = ((numa_node_cpu == numa_node_in) || (numa_node_cpu + 8 == numa_node_in)) * 1;
+	numa_node_in_is_local = ((numa_node_cpu == numa_node_in)
+				 || (numa_node_cpu + 8 == numa_node_in)) * 1;
 #endif
 
-    Timer timer;
-    for (int i = 0; i < 20; i++) {
+	Timer timer;
+	for (int i = 0; i < 20; i++) {
 
 #pragma omp parallel
-        {
+		{
 #pragma omp for
-        for (size_t i = 0; i < cols; i++) {
-          x[i] = (T) i+1 ;
-        }
+			for (size_t i = 0; i < cols; i++) {
+				x[i] = (T) i + 1;
+			}
 
 #pragma omp for
-        for (size_t i = 0; i < rows; i++) {
-          b[i] = (T) 0;
-        }
-        }
+			for (size_t i = 0; i < rows; i++) {
+				b[i] = (T) 0;
+			}
+		}
 
 #if NUMA_MEMCPY
-        start(&timer, 1, 0);
-        if (!numa_node_in_is_local) {
-            x_local = (T*) numa_alloc(sizeof(T)*cols);
-            allocate_dense(rows, cols, &A_local);
-        }
-        stop(&timer, 1);
-
-        if (x_local == NULL) {
-            return 1;
-        }
-        if (A_local == NULL) {
-            return 1;
-        }
-
-        if (!numa_node_in_is_local) {
-            if (numa_node_cpu_memcpy != -1) {
-                if (numa_run_on_node(numa_node_cpu_memcpy) == -1) {
-                    perror("numa_run_on_node");
-                    numa_node_cpu_memcpy = -1;
-                }
-            }
-        }
-
-        start(&timer, 2, 0);
-        if (!numa_node_in_is_local) {
-            //for (size_t i=0; i < rows; i++ ) {
-             //   memcpy(A_local[i], A[i], cols * sizeof(T));
-            //}
-            memcpy(*A_local, *A, rows * cols * sizeof(T));
-            memcpy(x_local, x, cols * sizeof(T));
-        } else {
-            A_local = A;
-            x_local = x;
-        }
-        stop(&timer, 2);
-
-        if (numa_node_cpu != -1) {
-            if (numa_run_on_node(numa_node_cpu) == -1) {
-                perror("numa_run_on_node");
-                numa_node_cpu = -1;
-            }
-        }
-
-        mp_pages[0] = A_local;
-        if (move_pages(0, 1, mp_pages, NULL, mp_status, 0) == -1) {
-            perror("move_pages(A_local)");
-        }
-        else if (mp_status[0] < 0) {
-            printf("move_pages error: %d", mp_status[0]);
-        }
-        else {
-            numa_node_local = mp_status[0];
-        }
+		start(&timer, 1, 0);
+		if (!numa_node_in_is_local) {
+			x_local = (T *) numa_alloc(sizeof(T) * cols);
+			allocate_dense(rows, cols, &A_local);
+		}
+		stop(&timer, 1);
+
+		if (x_local == NULL) {
+			return 1;
+		}
+		if (A_local == NULL) {
+			return 1;
+		}
+
+		if (!numa_node_in_is_local) {
+			if (numa_node_cpu_memcpy != -1) {
+				if (numa_run_on_node(numa_node_cpu_memcpy) ==
+				    -1) {
+					perror("numa_run_on_node");
+					numa_node_cpu_memcpy = -1;
+				}
+			}
+		}
+
+		start(&timer, 2, 0);
+		if (!numa_node_in_is_local) {
+			//for (size_t i=0; i < rows; i++ ) {
+			//   memcpy(A_local[i], A[i], cols * sizeof(T));
+			//}
+			memcpy(*A_local, *A, rows * cols * sizeof(T));
+			memcpy(x_local, x, cols * sizeof(T));
+		} else {
+			A_local = A;
+			x_local = x;
+		}
+		stop(&timer, 2);
+
+		if (numa_node_cpu != -1) {
+			if (numa_run_on_node(numa_node_cpu) == -1) {
+				perror("numa_run_on_node");
+				numa_node_cpu = -1;
+			}
+		}
+
+		mp_pages[0] = A_local;
+		if (move_pages(0, 1, mp_pages, NULL, mp_status, 0) == -1) {
+			perror("move_pages(A_local)");
+		} else if (mp_status[0] < 0) {
+			printf("move_pages error: %d", mp_status[0]);
+		} else {
+			numa_node_local = mp_status[0];
+		}
 #endif
 
-        unsigned int nr_threads = 0;
+		unsigned int nr_threads = 0;
 #pragma omp parallel
 #pragma omp atomic
-        nr_threads++;
+		nr_threads++;
 
-        start(&timer, 0, 0);
-        gemv(A_local, x_local, rows, cols, &b);
-        stop(&timer, 0);
+		start(&timer, 0, 0);
+		gemv(A_local, x_local, rows, cols, &b);
+		stop(&timer, 0);
 
 #if NUMA_MEMCPY
-        start(&timer, 3, 0);
-        if (!numa_node_in_is_local) {
-            numa_free(x_local, sizeof(T) * cols);
-            numa_free(*A_local, sizeof(T) * rows * cols);
-            numa_free(A_local, sizeof(void*) * rows);
-        }
-        stop(&timer, 3);
+		start(&timer, 3, 0);
+		if (!numa_node_in_is_local) {
+			numa_free(x_local, sizeof(T) * cols);
+			numa_free(*A_local, sizeof(T) * rows * cols);
+			numa_free(A_local, sizeof(void *) * rows);
+		}
+		stop(&timer, 3);
 #endif
 
 #if NUMA_MEMCPY
-        printf("[::] GEMV-CPU-MEMCPY | n_threads=%d e_type=%s n_elements=%ld"
-            " numa_node_in=%d numa_node_out=%d numa_node_cpu=%d numa_node_local=%d numa_node_cpu_memcpy=%d numa_distance_in_cpu=%d numa_distance_cpu_out=%d"
-            " | throughput_MBps=%f throughput_MOpps=%f",
-            nr_threads, XSTR(T), rows * cols,
-            numa_node_in, numa_node_out, numa_node_cpu, numa_node_local, numa_node_cpu_memcpy, numa_distance(numa_node_in, numa_node_cpu), numa_distance(numa_node_cpu, numa_node_out),
-            rows * cols * sizeof(T) / timer.time[0],
-            rows * cols / timer.time[0]);
-        printf(" latency_kernel_us=%f latency_alloc_us=%f latency_memcpy_us=%f latency_free_us=%f latency_total_us=%f\n",
-            timer.time[0], timer.time[1], timer.time[2], timer.time[3],
-            timer.time[0] + timer.time[1] + timer.time[2] + timer.time[3]);
+		printf
+		    ("[::] GEMV-CPU-MEMCPY | n_threads=%d e_type=%s n_elements=%ld"
+		     " numa_node_in=%d numa_node_out=%d numa_node_cpu=%d numa_node_local=%d numa_node_cpu_memcpy=%d numa_distance_in_cpu=%d numa_distance_cpu_out=%d"
+		     " | throughput_MBps=%f throughput_MOpps=%f", nr_threads,
+		     XSTR(T), rows * cols, numa_node_in, numa_node_out,
+		     numa_node_cpu, numa_node_local, numa_node_cpu_memcpy,
+		     numa_distance(numa_node_in, numa_node_cpu),
+		     numa_distance(numa_node_cpu, numa_node_out),
+		     rows * cols * sizeof(T) / timer.time[0],
+		     rows * cols / timer.time[0]);
+		printf
+		    (" latency_kernel_us=%f latency_alloc_us=%f latency_memcpy_us=%f latency_free_us=%f latency_total_us=%f\n",
+		     timer.time[0], timer.time[1], timer.time[2], timer.time[3],
+		     timer.time[0] + timer.time[1] + timer.time[2] +
+		     timer.time[3]);
 #else
-        printf("[::] GEMV-CPU | n_threads=%d e_type=%s n_elements=%ld"
+		printf("[::] GEMV-CPU | n_threads=%d e_type=%s n_elements=%ld"
 #if NUMA
-            " numa_node_in=%d numa_node_out=%d numa_node_cpu=%d numa_distance_in_cpu=%d numa_distance_cpu_out=%d"
+		       " numa_node_in=%d numa_node_out=%d numa_node_cpu=%d numa_distance_in_cpu=%d numa_distance_cpu_out=%d"
 #endif
-            " | throughput_MBps=%f",
-            nr_threads, XSTR(T), rows * cols,
+		       " | throughput_MBps=%f",
+		       nr_threads, XSTR(T), rows * cols,
 #if NUMA
-            numa_node_in, numa_node_out, numa_node_cpu, numa_distance(numa_node_in, numa_node_cpu), numa_distance(numa_node_cpu, numa_node_out),
+		       numa_node_in, numa_node_out, numa_node_cpu,
+		       numa_distance(numa_node_in, numa_node_cpu),
+		       numa_distance(numa_node_cpu, numa_node_out),
 #endif
-            rows * cols * sizeof(T) / timer.time[0]);
-        printf(" throughput_MOpps=%f latency_us=%f\n",
-            rows * cols / timer.time[0], timer.time[0]);
+		       rows * cols * sizeof(T) / timer.time[0]);
+		printf(" throughput_MOpps=%f latency_us=%f\n",
+		       rows * cols / timer.time[0], timer.time[0]);
 #endif
-    }
-
+	}
 
 #if 0
-  print_vec(x, rows);
-  print_mat(A, rows, cols);
-  print_vec(b, rows);
+	print_vec(x, rows);
+	print_mat(A, rows, cols);
+	print_vec(b, rows);
 #endif
 
 #if TYPE_double || TYPE_float
-  printf("sum(x) = %f, sum(Ax) = %f\n", sum_vec(x,cols), sum_vec(b,rows));
+	printf("sum(x) = %f, sum(Ax) = %f\n", sum_vec(x, cols),
+	       sum_vec(b, rows));
 #else
-  printf("sum(x) = %d, sum(Ax) = %d\n", sum_vec(x,cols), sum_vec(b,rows));
+	printf("sum(x) = %d, sum(Ax) = %d\n", sum_vec(x, cols),
+	       sum_vec(b, rows));
 #endif
 
 #if NUMA
-  numa_free(b, sizeof(T)*rows);
-  numa_free(x, sizeof(T)*cols);
-  numa_free(*A, sizeof(T)*rows*cols);
-  numa_free(A, sizeof(void*)*rows);
+	numa_free(b, sizeof(T) * rows);
+	numa_free(x, sizeof(T) * cols);
+	numa_free(*A, sizeof(T) * rows * cols);
+	numa_free(A, sizeof(void *) * rows);
 #else
-  free(b);
-  free(x);
-  free(*A);
-  free(A);
+	free(b);
+	free(x);
+	free(*A);
+	free(A);
 #endif
 
-  return 0;
+	return 0;
 }
 
-void gemv(T** A, T* x, size_t rows, size_t cols, T** b) {
+void gemv(T **A, T *x, size_t rows, size_t cols, T **b)
+{
 #pragma omp parallel for
-  for (size_t i = 0; i < rows; i ++ )
-  for (size_t j = 0; j < cols; j ++ ) {
-    (*b)[i] = (*b)[i] + A[i][j]*x[j];
-  }
+	for (size_t i = 0; i < rows; i++)
+		for (size_t j = 0; j < cols; j++) {
+			(*b)[i] = (*b)[i] + A[i][j] * x[j];
+		}
 }
 
-void make_hilbert_mat(size_t rows, size_t cols, T*** A) {
+void make_hilbert_mat(size_t rows, size_t cols, T ***A)
+{
 #pragma omp parallel for
-  for (size_t i = 0; i < rows; i++) {
-    for (size_t j = 0; j < cols; j++) {
+	for (size_t i = 0; i < rows; i++) {
+		for (size_t j = 0; j < cols; j++) {
 #if TYPE_double || TYPE_float
-      (*A)[i][j] = 1.0/( (T) i + (T) j + 1.0);
+			(*A)[i][j] = 1.0 / ((T) i + (T) j + 1.0);
 #else
-      (*A)[i][j] = (T)(((i+j)%10));
+			(*A)[i][j] = (T) (((i + j) % 10));
 #endif
-    }
-  }
+		}
+	}
 }
 
-T sum_vec(T* vec, size_t rows) {
-  T sum = 0;
+T sum_vec(T *vec, size_t rows)
+{
+	T sum = 0;
 #pragma omp parallel for reduction(+:sum)
-  for (int i = 0; i < rows; i++) sum = sum + vec[i];
-  return sum;
+	for (int i = 0; i < rows; i++)
+		sum = sum + vec[i];
+	return sum;
 }
author	Birte Kristina Friesel <birte.friesel@uos.de>	2025-01-16 08:18:46 +0100
committer	Birte Kristina Friesel <birte.friesel@uos.de>	2025-01-16 08:18:46 +0100
commit	d83ae32594f5407c71a67f1b913fe9893f30015e (patch)
tree	d7b5a37778811ce6847a55967c467506905ae639 /GEMV/baselines
parent	8e71c657efe8c10648849a7dc31c1689bbc7882d (diff)