diff options
Diffstat (limited to 'VA/baselines/cpu')
-rw-r--r-- | VA/baselines/cpu/Makefile | 30 | ||||
-rw-r--r-- | VA/baselines/cpu/app_baseline.c | 59 | ||||
-rwxr-xr-x | VA/baselines/cpu/run.sh | 22 |
3 files changed, 90 insertions, 21 deletions
diff --git a/VA/baselines/cpu/Makefile b/VA/baselines/cpu/Makefile index f320d87..3171e19 100644 --- a/VA/baselines/cpu/Makefile +++ b/VA/baselines/cpu/Makefile @@ -1,5 +1,29 @@ -all: - gcc -o va -fopenmp app_baseline.c +.PHONY: all +all: va +TYPE ?= int32_t + +va: app_baseline.c + gcc -O2 -o va -fopenmp -DT=${TYPE} app_baseline.c + +va_O0: app_baseline.c + gcc -o va_O0 -fopenmp app_baseline.c + +va_O2: app_baseline.c + gcc -O2 -o va_O2 -fopenmp app_baseline.c + +.PHONY: run +run: va + ./va -t 4 + +.PHONY: run_O0 +run_O0: va_O0 + ./va_O0 -t 4 + +.PHONY: run_O2 +run_O2: va_O2 + ./va_O2 -t 4 + +.PHONY: clean clean: - rm va + rm -f va va_O0 va_O2 diff --git a/VA/baselines/cpu/app_baseline.c b/VA/baselines/cpu/app_baseline.c index ecd8efa..b4396d2 100644 --- a/VA/baselines/cpu/app_baseline.c +++ b/VA/baselines/cpu/app_baseline.c @@ -15,10 +15,17 @@ #include <omp.h> #include "../../support/timer.h" -static int32_t *A; -static int32_t *B; -static int32_t *C; -static int32_t *C2; +#define XSTR(x) STR(x) +#define STR(x) #x + +#ifndef T +#define T int32_t +#endif + +static T *A; +static T *B; +static T *C; +static T *C2; /** * @brief creates a "test file" by filling a buffer of 64MB with pseudo-random values @@ -27,14 +34,13 @@ static int32_t *C2; */ void *create_test_file(unsigned int nr_elements) { srand(0); - printf("nr_elements\t%u\t", nr_elements); - A = (uint32_t*) malloc(nr_elements * sizeof(uint32_t)); - B = (uint32_t*) malloc(nr_elements * sizeof(uint32_t)); - C = (uint32_t*) malloc(nr_elements * sizeof(uint32_t)); + A = (T*) malloc(nr_elements * sizeof(T)); + B = (T*) malloc(nr_elements * sizeof(T)); + C = (T*) malloc(nr_elements * sizeof(T)); for (int i = 0; i < nr_elements; i++) { - A[i] = (int) (rand()); - B[i] = (int) (rand()); + A[i] = (T) (rand()); + B[i] = (T) (rand()); } } @@ -115,14 +121,31 @@ int main(int argc, char **argv) { create_test_file(file_size); Timer timer; - start(&timer, 0, 0); - - vector_addition_host(file_size, p.n_threads); - - stop(&timer, 0); - printf("Kernel "); - print(&timer, 0, 1); - printf("\n"); + + for(int rep = 0; rep < p.n_warmup + p.n_reps; rep++) { + start(&timer, 0, 0); + vector_addition_host(file_size, p.n_threads); + stop(&timer, 0); + + unsigned int nr_threads = 0; +#pragma omp parallel +#pragma omp atomic + nr_threads++; + + if (rep >= p.n_warmup) { + printf("[::] n_threads=%d e_type=%s n_elements=%d " + "| throughput_cpu_MBps=%f\n", + nr_threads, XSTR(T), file_size, + file_size * 3 * sizeof(T) / timer.time[0]); + printf("[::] n_threads=%d e_type=%s n_elements=%d " + "| throughput_cpu_MOpps=%f\n", + nr_threads, XSTR(T), file_size, + file_size / timer.time[0]); + printf("[::] n_threads=%d e_type=%s n_elements=%d |", + nr_threads, XSTR(T), file_size); + printall(&timer, 0); + } + } free(A); free(B); diff --git a/VA/baselines/cpu/run.sh b/VA/baselines/cpu/run.sh new file mode 100755 index 0000000..dd4acad --- /dev/null +++ b/VA/baselines/cpu/run.sh @@ -0,0 +1,22 @@ +#!/bin/sh + +set -e + +echo "prim-benchmarks VA CPU (dfatool edition)" +echo "Started at $(date)" +echo "Revision $(git describe --always)" + +# default input size: 16777216 +# default threads: 4 +# default type: int32_t + +for nr_threads in 1 2 4 6 8 12 16 20 24 32; do + for i in 16777216 8388608 4194304 33554432 67108864; do + for dt in int8_t int16_t int32_t int64_t float double; do + if make -B TYPE=${dt}; then + # -w 1 to make sure that target array (C) is allocated + timeout -k 1m 30m ./va -i ${i} -w 1 -e 100 -t ${nr_threads} || true + fi + done + done +done |