summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--GEMV/baselines/cpu/Makefile9
-rw-r--r--GEMV/baselines/cpu/gemv_openmp.c88
-rw-r--r--GEMV/baselines/cpu/gemv_utils.h9
-rwxr-xr-xGEMV/dimes-hetsim.sh30
4 files changed, 133 insertions, 3 deletions
diff --git a/GEMV/baselines/cpu/Makefile b/GEMV/baselines/cpu/Makefile
index 6f369a7..4f91f97 100644
--- a/GEMV/baselines/cpu/Makefile
+++ b/GEMV/baselines/cpu/Makefile
@@ -1,8 +1,15 @@
+NUMA ?= 0
+FLAGS =
+
+ifeq (${NUMA}, 1)
+ FLAGS += -lnuma
+endif
+
.PHONY: all
all: gemv
gemv: gemv_openmp.c
- gcc -O2 -o gemv -fopenmp gemv_openmp.c
+ gcc -O2 -Wall -Wextra -pedantic -o gemv -fopenmp -DNUMA=${NUMA} gemv_openmp.c ${FLAGS}
gemv_O0: gemv_openmp.c
gcc -o gemv_O0 -fopenmp gemv_openmp.c
diff --git a/GEMV/baselines/cpu/gemv_openmp.c b/GEMV/baselines/cpu/gemv_openmp.c
index df70be3..3af84e5 100644
--- a/GEMV/baselines/cpu/gemv_openmp.c
+++ b/GEMV/baselines/cpu/gemv_openmp.c
@@ -1,22 +1,100 @@
#include <stdlib.h>
#include <stdio.h>
#include "../../support/timer.h"
+
+#if NUMA
+#include <numaif.h>
+#include <numa.h>
+
+struct bitmask* bitmask_in;
+struct bitmask* bitmask_out;
+
+void* mp_pages[1];
+int mp_status[1];
+int mp_nodes[1];
+int numa_node_in = -1;
+int numa_node_out = -1;
+int numa_node_cpu = -1;
+#endif
+
#include "gemv_utils.h"
int main(int argc, char *argv[])
{
+ (void) argc;
const size_t rows = 20480;
const size_t cols = 8192;
double **A, *b, *x;
+#if NUMA
+ bitmask_in = numa_parse_nodestring(argv[1]);
+ bitmask_out = numa_parse_nodestring(argv[2]);
+ numa_node_cpu = atoi(argv[3]);
+#endif
+
+#if NUMA
+ if (bitmask_out) {
+ numa_set_membind(bitmask_out);
+ numa_free_nodemask(bitmask_out);
+ }
+ b = (double*) numa_alloc(sizeof(double)*rows);
+#else
b = (double*) malloc(sizeof(double)*rows);
+#endif
+
+#if NUMA
+ if (bitmask_in) {
+ numa_set_membind(bitmask_in);
+ // no free yet, re-used in allocate_dense
+ }
+ x = (double*) numa_alloc(sizeof(double)*cols);
+#else
x = (double*) malloc(sizeof(double)*cols);
+#endif
allocate_dense(rows, cols, &A);
make_hilbert_mat(rows,cols, &A);
+#if NUMA
+ struct bitmask *bitmask_all = numa_allocate_nodemask();
+ numa_bitmask_setall(bitmask_all);
+ numa_set_membind(bitmask_all);
+ numa_free_nodemask(bitmask_all);
+#endif
+
+#if NUMA
+ mp_pages[0] = A;
+ if (move_pages(0, 1, mp_pages, NULL, mp_status, 0) == -1) {
+ perror("move_pages(A)");
+ }
+ else if (mp_status[0] < 0) {
+ printf("move_pages(A) error: %d", mp_status[0]);
+ }
+ else {
+ numa_node_in = mp_status[0];
+ }
+
+ mp_pages[0] = b;
+ if (move_pages(0, 1, mp_pages, NULL, mp_status, 0) == -1) {
+ perror("move_pages(b)");
+ }
+ else if (mp_status[0] < 0) {
+ printf("move_pages(b) error: %d", mp_status[0]);
+ }
+ else {
+ numa_node_out = mp_status[0];
+ }
+
+ if (numa_node_cpu != -1) {
+ if (numa_run_on_node(numa_node_cpu) == -1) {
+ perror("numa_run_on_node");
+ numa_node_cpu = -1;
+ }
+ }
+#endif
+
Timer timer;
for (int i = 0; i < 100; i++) {
@@ -41,9 +119,15 @@ int main(int argc, char *argv[])
start(&timer, 0, 0);
gemv(A, x, rows, cols, &b);
stop(&timer, 0);
- printf("[::] GEMV CPU | n_threads=%d e_type=%s n_elements=%d "
- "| throughput_MBps=%f",
+ printf("[::] GEMV CPU | n_threads=%d e_type=%s n_elements=%ld"
+#if NUMA
+ " numa_node_in=%d numa_node_out=%d numa_node_cpu=%d numa_distance_in_cpu=%d numa_distance_cpu_out=%d"
+#endif
+ " | throughput_MBps=%f",
nr_threads, "double", rows * cols,
+#if NUMA
+ numa_node_in, numa_node_out, numa_node_cpu, numa_distance(numa_node_in, numa_node_cpu), numa_distance(numa_node_cpu, numa_node_out),
+#endif
rows * cols * sizeof(double) / timer.time[0]);
printf(" throughput_MOpps=%f",
rows * cols / timer.time[0]);
diff --git a/GEMV/baselines/cpu/gemv_utils.h b/GEMV/baselines/cpu/gemv_utils.h
index 605f148..c99763a 100644
--- a/GEMV/baselines/cpu/gemv_utils.h
+++ b/GEMV/baselines/cpu/gemv_utils.h
@@ -1,7 +1,16 @@
void allocate_dense(size_t rows,size_t cols, double*** dense) {
+#if NUMA
+ if (bitmask_in) {
+ numa_set_membind(bitmask_in);
+ numa_free_nodemask(bitmask_in);
+ }
+ *dense = numa_alloc(sizeof(double)*rows);
+ **dense = numa_alloc(sizeof(double)*rows*cols);
+#else
*dense = malloc(sizeof(double)*rows);
**dense = malloc(sizeof(double)*rows*cols);
+#endif
for (size_t i=0; i < rows; i++ ) {
(*dense)[i] = (*dense)[0] + i*cols;
diff --git a/GEMV/dimes-hetsim.sh b/GEMV/dimes-hetsim.sh
new file mode 100755
index 0000000..d7163d5
--- /dev/null
+++ b/GEMV/dimes-hetsim.sh
@@ -0,0 +1,30 @@
+#!/bin/sh
+
+cd baselines/cpu
+make -B NUMA=1
+
+mkdir -p log/$(hostname)
+fn=log/$(hostname)/$(date +%Y%m%d)-baseline.txt
+
+# gemv hardcodes 167772160 double elements → 1.25 GiB of data
+
+(
+
+for i in `seq 1 20`; do
+ for nr_threads in 1 2 4 8 12 16; do
+ for cpu in 0 1 2 3 4 5 6 7; do
+ for ram in 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15; do
+ OMP_NUM_THREADS=$nr_threads ./gemv $ram $ram $cpu
+ done
+ done
+ done
+ for nr_threads in 32 48 64 96 128; do
+ for ram in 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15; do
+ OMP_NUM_THREADS=$nr_threads ./gemv $ram $ram -1
+ done
+ done
+done
+
+) | tee $fn
+
+xz -f -v -9 -M 800M $fn