diff options
-rw-r--r-- | MLP/baselines/cpu/Makefile | 23 | ||||
-rw-r--r-- | MLP/baselines/cpu/mlp_openmp.c | 34 | ||||
-rwxr-xr-x | MLP/baselines/cpu/run.sh | 13 |
3 files changed, 61 insertions, 9 deletions
diff --git a/MLP/baselines/cpu/Makefile b/MLP/baselines/cpu/Makefile index 581897e..e2e6780 100644 --- a/MLP/baselines/cpu/Makefile +++ b/MLP/baselines/cpu/Makefile @@ -1,4 +1,21 @@ -all: - gcc mlp_openmp.c -o mlp_openmp -fopenmp -std=c99 -run: +all: mlp_openmp + +mlp_openmp: mlp_openmp.c + gcc -O2 mlp_openmp.c -o mlp_openmp -fopenmp -std=c99 + +mlp_openmp_O0: mlp_openmp.c + gcc mlp_openmp.c -o mlp_openmp_O0 -fopenmp -std=c99 + +mlp_openmp_O2: mlp_openmp.c + gcc -O2 mlp_openmp.c -o mlp_openmp_O2 -fopenmp -std=c99 + +run: mlp_openmp ./mlp_openmp + +run_O0: mlp_openmp_O0 + ./mlp_openmp_O0 + +run_O2: mlp_openmp_O2 + ./mlp_openmp_O2 + +.PHONY: all run run_O0 run_O2 diff --git a/MLP/baselines/cpu/mlp_openmp.c b/MLP/baselines/cpu/mlp_openmp.c index ef478c1..8f95e7c 100644 --- a/MLP/baselines/cpu/mlp_openmp.c +++ b/MLP/baselines/cpu/mlp_openmp.c @@ -14,6 +14,9 @@ #include "../../support/timer.h" #include "../../support/common.h" +#define XSTR(x) STR(x) +#define STR(x) #x + T** A; T* B; T* C; @@ -136,12 +139,31 @@ void usage() { B = malloc(m_size*sizeof(unsigned int)); C = malloc(m_size*sizeof(unsigned int)); - // Create an input file with arbitrary data. - init_data(A, B, m_size, n_size); - - start(&timer, 0, 1); - mlp_host(C, A, B, n_size, m_size); - stop(&timer, 0); + for (int i = 0; i < 100; i++) { + // Create an input file with arbitrary data. + init_data(A, B, m_size, n_size); + + start(&timer, 0, 0); + mlp_host(C, A, B, n_size, m_size); + stop(&timer, 0); + + unsigned int nr_threads = 0; +#pragma omp parallel +#pragma omp atomic + nr_threads++; + + printf("[::] n_threads=%d e_type=%s n_elements=%lu " + "| throughput_cpu_omp_MBps=%f\n", + nr_threads, XSTR(T), n_size * m_size, + n_size * m_size * sizeof(T) / timer.time[0]); + printf("[::] n_threads=%d e_type=%s n_elements=%lu " + "| throughput_cpu_omp_MOpps=%f\n", + nr_threads, XSTR(T), n_size * m_size, + n_size * m_size / timer.time[0]); + printf("[::] n_threads=%d e_type=%s n_elements=%lu |", + nr_threads, XSTR(T), n_size * m_size); + printall(&timer, 0); + } uint32_t sum = mlp_host_sum(n_size, m_size); diff --git a/MLP/baselines/cpu/run.sh b/MLP/baselines/cpu/run.sh new file mode 100755 index 0000000..d0dcb25 --- /dev/null +++ b/MLP/baselines/cpu/run.sh @@ -0,0 +1,13 @@ +#!/bin/sh + +set -e + +echo "prim-benchmarks MLP CPU (dfatool edition)" +echo "Started at $(date)" +echo "Revision $(git describe --always)" + +make + +for nr_threads in 1 2 4 6 8 12 16 20 24 32; do + OMP_NUM_THREADS=${nr_threads} timeout --foreground -k 1m 30m ./mlp_openmp || true +done |