summaryrefslogtreecommitdiff
path: root/VA/baselines/cpu
diff options
context:
space:
mode:
Diffstat (limited to 'VA/baselines/cpu')
-rw-r--r--VA/baselines/cpu/Makefile30
-rw-r--r--VA/baselines/cpu/app_baseline.c59
-rwxr-xr-xVA/baselines/cpu/run.sh22
3 files changed, 90 insertions, 21 deletions
diff --git a/VA/baselines/cpu/Makefile b/VA/baselines/cpu/Makefile
index f320d87..3171e19 100644
--- a/VA/baselines/cpu/Makefile
+++ b/VA/baselines/cpu/Makefile
@@ -1,5 +1,29 @@
-all:
- gcc -o va -fopenmp app_baseline.c
+.PHONY: all
+all: va
+TYPE ?= int32_t
+
+va: app_baseline.c
+ gcc -O2 -o va -fopenmp -DT=${TYPE} app_baseline.c
+
+va_O0: app_baseline.c
+ gcc -o va_O0 -fopenmp app_baseline.c
+
+va_O2: app_baseline.c
+ gcc -O2 -o va_O2 -fopenmp app_baseline.c
+
+.PHONY: run
+run: va
+ ./va -t 4
+
+.PHONY: run_O0
+run_O0: va_O0
+ ./va_O0 -t 4
+
+.PHONY: run_O2
+run_O2: va_O2
+ ./va_O2 -t 4
+
+.PHONY: clean
clean:
- rm va
+ rm -f va va_O0 va_O2
diff --git a/VA/baselines/cpu/app_baseline.c b/VA/baselines/cpu/app_baseline.c
index ecd8efa..b4396d2 100644
--- a/VA/baselines/cpu/app_baseline.c
+++ b/VA/baselines/cpu/app_baseline.c
@@ -15,10 +15,17 @@
#include <omp.h>
#include "../../support/timer.h"
-static int32_t *A;
-static int32_t *B;
-static int32_t *C;
-static int32_t *C2;
+#define XSTR(x) STR(x)
+#define STR(x) #x
+
+#ifndef T
+#define T int32_t
+#endif
+
+static T *A;
+static T *B;
+static T *C;
+static T *C2;
/**
* @brief creates a "test file" by filling a buffer of 64MB with pseudo-random values
@@ -27,14 +34,13 @@ static int32_t *C2;
*/
void *create_test_file(unsigned int nr_elements) {
srand(0);
- printf("nr_elements\t%u\t", nr_elements);
- A = (uint32_t*) malloc(nr_elements * sizeof(uint32_t));
- B = (uint32_t*) malloc(nr_elements * sizeof(uint32_t));
- C = (uint32_t*) malloc(nr_elements * sizeof(uint32_t));
+ A = (T*) malloc(nr_elements * sizeof(T));
+ B = (T*) malloc(nr_elements * sizeof(T));
+ C = (T*) malloc(nr_elements * sizeof(T));
for (int i = 0; i < nr_elements; i++) {
- A[i] = (int) (rand());
- B[i] = (int) (rand());
+ A[i] = (T) (rand());
+ B[i] = (T) (rand());
}
}
@@ -115,14 +121,31 @@ int main(int argc, char **argv) {
create_test_file(file_size);
Timer timer;
- start(&timer, 0, 0);
-
- vector_addition_host(file_size, p.n_threads);
-
- stop(&timer, 0);
- printf("Kernel ");
- print(&timer, 0, 1);
- printf("\n");
+
+ for(int rep = 0; rep < p.n_warmup + p.n_reps; rep++) {
+ start(&timer, 0, 0);
+ vector_addition_host(file_size, p.n_threads);
+ stop(&timer, 0);
+
+ unsigned int nr_threads = 0;
+#pragma omp parallel
+#pragma omp atomic
+ nr_threads++;
+
+ if (rep >= p.n_warmup) {
+ printf("[::] n_threads=%d e_type=%s n_elements=%d "
+ "| throughput_cpu_MBps=%f\n",
+ nr_threads, XSTR(T), file_size,
+ file_size * 3 * sizeof(T) / timer.time[0]);
+ printf("[::] n_threads=%d e_type=%s n_elements=%d "
+ "| throughput_cpu_MOpps=%f\n",
+ nr_threads, XSTR(T), file_size,
+ file_size / timer.time[0]);
+ printf("[::] n_threads=%d e_type=%s n_elements=%d |",
+ nr_threads, XSTR(T), file_size);
+ printall(&timer, 0);
+ }
+ }
free(A);
free(B);
diff --git a/VA/baselines/cpu/run.sh b/VA/baselines/cpu/run.sh
new file mode 100755
index 0000000..dd4acad
--- /dev/null
+++ b/VA/baselines/cpu/run.sh
@@ -0,0 +1,22 @@
+#!/bin/sh
+
+set -e
+
+echo "prim-benchmarks VA CPU (dfatool edition)"
+echo "Started at $(date)"
+echo "Revision $(git describe --always)"
+
+# default input size: 16777216
+# default threads: 4
+# default type: int32_t
+
+for nr_threads in 1 2 4 6 8 12 16 20 24 32; do
+ for i in 16777216 8388608 4194304 33554432 67108864; do
+ for dt in int8_t int16_t int32_t int64_t float double; do
+ if make -B TYPE=${dt}; then
+ # -w 1 to make sure that target array (C) is allocated
+ timeout -k 1m 30m ./va -i ${i} -w 1 -e 100 -t ${nr_threads} || true
+ fi
+ done
+ done
+done