diff options
-rwxr-xr-x | benchmark-scripts/milos-read.sh | 14 | ||||
-rwxr-xr-x | benchmark-scripts/milos-roofline.sh (renamed from milos-roofline.sh) | 0 | ||||
-rwxr-xr-x | benchmark-scripts/milos-write.sh | 14 | ||||
-rw-r--r-- | mbw.c | 30 |
4 files changed, 52 insertions, 6 deletions
diff --git a/benchmark-scripts/milos-read.sh b/benchmark-scripts/milos-read.sh new file mode 100755 index 0000000..b8c49c9 --- /dev/null +++ b/benchmark-scripts/milos-read.sh @@ -0,0 +1,14 @@ +#!/bin/sh + +mkdir -p log/${HOST} +fn=log/${HOST}/read + +make -B numa=1 pthread=1 + +parallel -j1 --eta --joblog ${fn}.joblog --resume --header : \ + ./mbw -a {ram_in} -b {ram_out} -c {cpu} -n 10 -N {nr_threads} -t4 4096 \ + ::: ram_in $(seq 0 15) \ + :::+ ram_out $(seq 0 15) \ + ::: cpu $(seq 0 7) \ + ::: nr_threads $(seq 1 16) \ +>> ${fn}.txt diff --git a/milos-roofline.sh b/benchmark-scripts/milos-roofline.sh index 092d147..092d147 100755 --- a/milos-roofline.sh +++ b/benchmark-scripts/milos-roofline.sh diff --git a/benchmark-scripts/milos-write.sh b/benchmark-scripts/milos-write.sh new file mode 100755 index 0000000..c24a3de --- /dev/null +++ b/benchmark-scripts/milos-write.sh @@ -0,0 +1,14 @@ +#!/bin/sh + +mkdir -p log/${HOST} +fn=log/${HOST}/write + +make -B numa=1 pthread=1 + +parallel -j1 --eta --joblog ${fn}.joblog --resume --header : \ + ./mbw -a {ram_in} -b {ram_out} -c {cpu} -n 10 -N {nr_threads} -t5 4096 \ + ::: ram_out $(seq 0 15) \ + :::+ ram_in $(seq 0 15) \ + ::: cpu $(seq 0 7) \ + ::: nr_threads $(seq 1 16) \ +>> ${fn}.txt @@ -428,7 +428,7 @@ void *thread_worker(void *arg) for(t=plain_start; t<plain_stop; t++) { tmp ^= arr_a[t]; } - arr_b[plain_stop-1] = tmp; + arr_a[plain_stop-1] = tmp; } else if(test_type==TEST_WRITE_PLAIN) { long tmp = 0; for(t=plain_start; t<plain_stop; t++) { @@ -525,7 +525,7 @@ double worker() tmp ^= arr_a[t]; } clock_gettime(CLOCK_MONOTONIC, &endtime); - arr_b[arr_size-1] = tmp; + arr_a[arr_size-1] = tmp; } else if(test_type==TEST_WRITE_PLAIN) { long tmp = 0; clock_gettime(CLOCK_MONOTONIC, &starttime); @@ -639,11 +639,20 @@ int main(int argc, char **argv) } } - /* default is to run all tests if no specific tests were requested */ +#ifndef HAVE_AVX512 + if (tests[TEST_AVX512]) { + printf("Error: AVX512 memcpy requested, but this mbw build has been compiled without AVX512 support\n"); + exit(1); + } +#endif + + /* default is to run most tests if no specific tests were requested */ if( (tests[0]+tests[1]+tests[2]+tests[3]+tests[4]+tests[5]) == 0) { tests[0]=1; tests[1]=1; tests[2]=1; + tests[4]=1; + tests[5]=1; } if( nr_loops==0 && ((tests[0]+tests[1]+tests[2]+tests[3]+tests[4]+tests[5]) != 1) ) { @@ -675,7 +684,6 @@ int main(int argc, char **argv) if(!quiet) { printf("Long uses %d bytes. ", long_size); - printf("Allocating 2*%lld elements = %lld bytes of memory.\n", arr_size, 2*arr_size*long_size); if(tests[2]) { printf("Using %lld bytes as blocks for memcpy block copy test.\n", block_size); } @@ -689,7 +697,12 @@ int main(int argc, char **argv) numa_free_nodemask(bitmask_a); } #endif - arr_a=make_array(); + if (tests[TEST_MEMCPY]+tests[TEST_PLAIN]+tests[TEST_MCBLOCK]+tests[TEST_AVX512]+tests[TEST_READ_PLAIN]) { + if (!quiet) { + printf("Allocating %lld elements = %lld MiB of input memory.\n", arr_size, arr_size*long_size / 1024 / 1024); + } + arr_a=make_array(); + } #ifdef NUMA if (bitmask_b) { @@ -697,7 +710,12 @@ int main(int argc, char **argv) numa_free_nodemask(bitmask_b); } #endif - arr_b=make_array(); + if (tests[TEST_MEMCPY]+tests[TEST_PLAIN]+tests[TEST_MCBLOCK]+tests[TEST_AVX512]+tests[TEST_WRITE_PLAIN]) { + if (!quiet) { + printf("Allocating %lld elements = %lld MiB of output memory.\n", arr_size, arr_size*long_size / 1024 / 1024); + } + arr_b=make_array(); + } #ifdef NUMA numa_set_membind(bitmask_all); |