diff options
Diffstat (limited to 'GEMV/baselines')
-rw-r--r-- | GEMV/baselines/cpu/Makefile | 9 | ||||
-rw-r--r-- | GEMV/baselines/cpu/gemv_openmp.c | 88 | ||||
-rw-r--r-- | GEMV/baselines/cpu/gemv_utils.h | 9 |
3 files changed, 103 insertions, 3 deletions
diff --git a/GEMV/baselines/cpu/Makefile b/GEMV/baselines/cpu/Makefile index 6f369a7..4f91f97 100644 --- a/GEMV/baselines/cpu/Makefile +++ b/GEMV/baselines/cpu/Makefile @@ -1,8 +1,15 @@ +NUMA ?= 0 +FLAGS = + +ifeq (${NUMA}, 1) + FLAGS += -lnuma +endif + .PHONY: all all: gemv gemv: gemv_openmp.c - gcc -O2 -o gemv -fopenmp gemv_openmp.c + gcc -O2 -Wall -Wextra -pedantic -o gemv -fopenmp -DNUMA=${NUMA} gemv_openmp.c ${FLAGS} gemv_O0: gemv_openmp.c gcc -o gemv_O0 -fopenmp gemv_openmp.c diff --git a/GEMV/baselines/cpu/gemv_openmp.c b/GEMV/baselines/cpu/gemv_openmp.c index df70be3..3af84e5 100644 --- a/GEMV/baselines/cpu/gemv_openmp.c +++ b/GEMV/baselines/cpu/gemv_openmp.c @@ -1,22 +1,100 @@ #include <stdlib.h> #include <stdio.h> #include "../../support/timer.h" + +#if NUMA +#include <numaif.h> +#include <numa.h> + +struct bitmask* bitmask_in; +struct bitmask* bitmask_out; + +void* mp_pages[1]; +int mp_status[1]; +int mp_nodes[1]; +int numa_node_in = -1; +int numa_node_out = -1; +int numa_node_cpu = -1; +#endif + #include "gemv_utils.h" int main(int argc, char *argv[]) { + (void) argc; const size_t rows = 20480; const size_t cols = 8192; double **A, *b, *x; +#if NUMA + bitmask_in = numa_parse_nodestring(argv[1]); + bitmask_out = numa_parse_nodestring(argv[2]); + numa_node_cpu = atoi(argv[3]); +#endif + +#if NUMA + if (bitmask_out) { + numa_set_membind(bitmask_out); + numa_free_nodemask(bitmask_out); + } + b = (double*) numa_alloc(sizeof(double)*rows); +#else b = (double*) malloc(sizeof(double)*rows); +#endif + +#if NUMA + if (bitmask_in) { + numa_set_membind(bitmask_in); + // no free yet, re-used in allocate_dense + } + x = (double*) numa_alloc(sizeof(double)*cols); +#else x = (double*) malloc(sizeof(double)*cols); +#endif allocate_dense(rows, cols, &A); make_hilbert_mat(rows,cols, &A); +#if NUMA + struct bitmask *bitmask_all = numa_allocate_nodemask(); + numa_bitmask_setall(bitmask_all); + numa_set_membind(bitmask_all); + numa_free_nodemask(bitmask_all); +#endif + +#if NUMA + mp_pages[0] = A; + if (move_pages(0, 1, mp_pages, NULL, mp_status, 0) == -1) { + perror("move_pages(A)"); + } + else if (mp_status[0] < 0) { + printf("move_pages(A) error: %d", mp_status[0]); + } + else { + numa_node_in = mp_status[0]; + } + + mp_pages[0] = b; + if (move_pages(0, 1, mp_pages, NULL, mp_status, 0) == -1) { + perror("move_pages(b)"); + } + else if (mp_status[0] < 0) { + printf("move_pages(b) error: %d", mp_status[0]); + } + else { + numa_node_out = mp_status[0]; + } + + if (numa_node_cpu != -1) { + if (numa_run_on_node(numa_node_cpu) == -1) { + perror("numa_run_on_node"); + numa_node_cpu = -1; + } + } +#endif + Timer timer; for (int i = 0; i < 100; i++) { @@ -41,9 +119,15 @@ int main(int argc, char *argv[]) start(&timer, 0, 0); gemv(A, x, rows, cols, &b); stop(&timer, 0); - printf("[::] GEMV CPU | n_threads=%d e_type=%s n_elements=%d " - "| throughput_MBps=%f", + printf("[::] GEMV CPU | n_threads=%d e_type=%s n_elements=%ld" +#if NUMA + " numa_node_in=%d numa_node_out=%d numa_node_cpu=%d numa_distance_in_cpu=%d numa_distance_cpu_out=%d" +#endif + " | throughput_MBps=%f", nr_threads, "double", rows * cols, +#if NUMA + numa_node_in, numa_node_out, numa_node_cpu, numa_distance(numa_node_in, numa_node_cpu), numa_distance(numa_node_cpu, numa_node_out), +#endif rows * cols * sizeof(double) / timer.time[0]); printf(" throughput_MOpps=%f", rows * cols / timer.time[0]); diff --git a/GEMV/baselines/cpu/gemv_utils.h b/GEMV/baselines/cpu/gemv_utils.h index 605f148..c99763a 100644 --- a/GEMV/baselines/cpu/gemv_utils.h +++ b/GEMV/baselines/cpu/gemv_utils.h @@ -1,7 +1,16 @@ void allocate_dense(size_t rows,size_t cols, double*** dense) { +#if NUMA + if (bitmask_in) { + numa_set_membind(bitmask_in); + numa_free_nodemask(bitmask_in); + } + *dense = numa_alloc(sizeof(double)*rows); + **dense = numa_alloc(sizeof(double)*rows*cols); +#else *dense = malloc(sizeof(double)*rows); **dense = malloc(sizeof(double)*rows*cols); +#endif for (size_t i=0; i < rows; i++ ) { (*dense)[i] = (*dense)[0] + i*cols; |