summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rwxr-xr-xbenchmark-scripts/milos-read.sh14
-rwxr-xr-xbenchmark-scripts/milos-roofline.sh (renamed from milos-roofline.sh)0
-rwxr-xr-xbenchmark-scripts/milos-write.sh14
-rw-r--r--mbw.c30
4 files changed, 52 insertions, 6 deletions
diff --git a/benchmark-scripts/milos-read.sh b/benchmark-scripts/milos-read.sh
new file mode 100755
index 0000000..b8c49c9
--- /dev/null
+++ b/benchmark-scripts/milos-read.sh
@@ -0,0 +1,14 @@
+#!/bin/sh
+
+mkdir -p log/${HOST}
+fn=log/${HOST}/read
+
+make -B numa=1 pthread=1
+
+parallel -j1 --eta --joblog ${fn}.joblog --resume --header : \
+ ./mbw -a {ram_in} -b {ram_out} -c {cpu} -n 10 -N {nr_threads} -t4 4096 \
+ ::: ram_in $(seq 0 15) \
+ :::+ ram_out $(seq 0 15) \
+ ::: cpu $(seq 0 7) \
+ ::: nr_threads $(seq 1 16) \
+>> ${fn}.txt
diff --git a/milos-roofline.sh b/benchmark-scripts/milos-roofline.sh
index 092d147..092d147 100755
--- a/milos-roofline.sh
+++ b/benchmark-scripts/milos-roofline.sh
diff --git a/benchmark-scripts/milos-write.sh b/benchmark-scripts/milos-write.sh
new file mode 100755
index 0000000..c24a3de
--- /dev/null
+++ b/benchmark-scripts/milos-write.sh
@@ -0,0 +1,14 @@
+#!/bin/sh
+
+mkdir -p log/${HOST}
+fn=log/${HOST}/write
+
+make -B numa=1 pthread=1
+
+parallel -j1 --eta --joblog ${fn}.joblog --resume --header : \
+ ./mbw -a {ram_in} -b {ram_out} -c {cpu} -n 10 -N {nr_threads} -t5 4096 \
+ ::: ram_out $(seq 0 15) \
+ :::+ ram_in $(seq 0 15) \
+ ::: cpu $(seq 0 7) \
+ ::: nr_threads $(seq 1 16) \
+>> ${fn}.txt
diff --git a/mbw.c b/mbw.c
index 991c37a..5539608 100644
--- a/mbw.c
+++ b/mbw.c
@@ -428,7 +428,7 @@ void *thread_worker(void *arg)
for(t=plain_start; t<plain_stop; t++) {
tmp ^= arr_a[t];
}
- arr_b[plain_stop-1] = tmp;
+ arr_a[plain_stop-1] = tmp;
} else if(test_type==TEST_WRITE_PLAIN) {
long tmp = 0;
for(t=plain_start; t<plain_stop; t++) {
@@ -525,7 +525,7 @@ double worker()
tmp ^= arr_a[t];
}
clock_gettime(CLOCK_MONOTONIC, &endtime);
- arr_b[arr_size-1] = tmp;
+ arr_a[arr_size-1] = tmp;
} else if(test_type==TEST_WRITE_PLAIN) {
long tmp = 0;
clock_gettime(CLOCK_MONOTONIC, &starttime);
@@ -639,11 +639,20 @@ int main(int argc, char **argv)
}
}
- /* default is to run all tests if no specific tests were requested */
+#ifndef HAVE_AVX512
+ if (tests[TEST_AVX512]) {
+ printf("Error: AVX512 memcpy requested, but this mbw build has been compiled without AVX512 support\n");
+ exit(1);
+ }
+#endif
+
+ /* default is to run most tests if no specific tests were requested */
if( (tests[0]+tests[1]+tests[2]+tests[3]+tests[4]+tests[5]) == 0) {
tests[0]=1;
tests[1]=1;
tests[2]=1;
+ tests[4]=1;
+ tests[5]=1;
}
if( nr_loops==0 && ((tests[0]+tests[1]+tests[2]+tests[3]+tests[4]+tests[5]) != 1) ) {
@@ -675,7 +684,6 @@ int main(int argc, char **argv)
if(!quiet) {
printf("Long uses %d bytes. ", long_size);
- printf("Allocating 2*%lld elements = %lld bytes of memory.\n", arr_size, 2*arr_size*long_size);
if(tests[2]) {
printf("Using %lld bytes as blocks for memcpy block copy test.\n", block_size);
}
@@ -689,7 +697,12 @@ int main(int argc, char **argv)
numa_free_nodemask(bitmask_a);
}
#endif
- arr_a=make_array();
+ if (tests[TEST_MEMCPY]+tests[TEST_PLAIN]+tests[TEST_MCBLOCK]+tests[TEST_AVX512]+tests[TEST_READ_PLAIN]) {
+ if (!quiet) {
+ printf("Allocating %lld elements = %lld MiB of input memory.\n", arr_size, arr_size*long_size / 1024 / 1024);
+ }
+ arr_a=make_array();
+ }
#ifdef NUMA
if (bitmask_b) {
@@ -697,7 +710,12 @@ int main(int argc, char **argv)
numa_free_nodemask(bitmask_b);
}
#endif
- arr_b=make_array();
+ if (tests[TEST_MEMCPY]+tests[TEST_PLAIN]+tests[TEST_MCBLOCK]+tests[TEST_AVX512]+tests[TEST_WRITE_PLAIN]) {
+ if (!quiet) {
+ printf("Allocating %lld elements = %lld MiB of output memory.\n", arr_size, arr_size*long_size / 1024 / 1024);
+ }
+ arr_b=make_array();
+ }
#ifdef NUMA
numa_set_membind(bitmask_all);