diff options
-rw-r--r-- | TS/baselines/cpu/Makefile | 26 | ||||
-rwxr-xr-x | TS/baselines/cpu/run.sh | 16 | ||||
-rw-r--r-- | TS/baselines/cpu/streamp_openmp.cpp | 7 |
3 files changed, 45 insertions, 4 deletions
diff --git a/TS/baselines/cpu/Makefile b/TS/baselines/cpu/Makefile index 80729b0..69121ed 100644 --- a/TS/baselines/cpu/Makefile +++ b/TS/baselines/cpu/Makefile @@ -1,4 +1,22 @@ -all: - g++ streamp_openmp.cpp tools.cpp -o streamp_openmp -std=c++11 -fopenmp -run: - ./streamp_openmp SampleInput/randomlist5M.txt 256 + +all: streamp_openmp + +streamp_openmp: streamp_openmp.cpp tools.cpp + g++ -O2 streamp_openmp.cpp tools.cpp -o streamp_openmp -std=c++11 -fopenmp + +streamp_openmp_O0: streamp_openmp.cpp tools.cpp + g++ streamp_openmp.cpp tools.cpp -o streamp_openmp_O0 -std=c++11 -fopenmp + +streamp_openmp_O2: streamp_openmp.cpp tools.cpp + g++ -O2 streamp_openmp.cpp tools.cpp -o streamp_openmp_O2 -std=c++11 -fopenmp + +run: streamp_openmp + ./streamp_openmp inputs/randomlist33M.txt 256 + +run_O0: streamp_openmp_O0 + ./streamp_openmp_O0 inputs/randomlist33M.txt 256 + +run_O2: streamp_openmp_O2 + ./streamp_openmp_O2 inputs/randomlist33M.txt 256 + +.PHONY: all run run_O0 run_O2 clean diff --git a/TS/baselines/cpu/run.sh b/TS/baselines/cpu/run.sh new file mode 100755 index 0000000..7d8fe37 --- /dev/null +++ b/TS/baselines/cpu/run.sh @@ -0,0 +1,16 @@ +#!/bin/sh + +set -e + +echo "prim-benchmarks TS CPU (dfatool edition)" +echo "Started at $(date)" +echo "Revision $(git describe --always)" + +# input size depends on file -> strong scaling only + +make +for i in $(seq 1 10); do + for nr_threads in 1 2 4 6 8 12 16 20 24 32; do + OMP_NUM_THREADS=${nr_threads} timeout --foreground -k 1m 30m ./streamp_openmp inputs/randomlist33M.txt 256 || true + done +done diff --git a/TS/baselines/cpu/streamp_openmp.cpp b/TS/baselines/cpu/streamp_openmp.cpp index 1925e52..94f110f 100644 --- a/TS/baselines/cpu/streamp_openmp.cpp +++ b/TS/baselines/cpu/streamp_openmp.cpp @@ -39,6 +39,9 @@ The second column of the output file is the matrix profile index. #include <chrono>
#include <omp.h>
+#define XSTR(x) STR(x)
+#define STR(x) #x
+
#include "mprofile.h"
bool interrupt = false;
@@ -335,6 +338,7 @@ int main(int argc, char* argv[]) tend = std::chrono::high_resolution_clock::now();
time_elapsed = tend - tstart;
std::cout << "[OK] Preprocess Time: " << std::setprecision(std::numeric_limits<double>::digits10 + 2) << time_elapsed.count() << " seconds." << std::endl;
+ printf("[::] n_threads=%d e_type=%s n_elements=%d | throughput_preproc_MBps=%f throughput_preproc_MOpps=%f\n", numThreads, XSTR(DTYPE), timeSeriesLength, timeSeriesLength * sizeof(DTYPE) / (time_elapsed.count() * 1e6), timeSeriesLength / (time_elapsed.count() * 1e6));
//Initialize Matrix Profile and Matrix Profile Index
std::cout << "[>>] Initializing Profile..." << std::endl;
@@ -351,6 +355,7 @@ int main(int argc, char* argv[]) tend = std::chrono::high_resolution_clock::now();
time_elapsed = tend - tstart;
std::cout << "[OK] Initialize Profile Time: " << std::setprecision(std::numeric_limits<DTYPE>::digits10 + 2) << time_elapsed.count() << " seconds." << std::endl;
+ printf("[::] n_threads=%d e_type=%s n_elements=%d | throughput_init_MBps=%f throughput_init_MOpps=%f\n", numThreads, XSTR(DTYPE), timeSeriesLength, timeSeriesLength * sizeof(DTYPE) / (time_elapsed.count() * 1e6), timeSeriesLength / (time_elapsed.count() * 1e6));
// Random shuffle the diagonals
idx.clear();
@@ -369,6 +374,7 @@ int main(int argc, char* argv[]) tend = std::chrono::high_resolution_clock::now();
time_elapsed = tend - tstart;
std::cout << "[OK] STREAMP Time: " << std::setprecision(std::numeric_limits<DTYPE>::digits10 + 2) << time_elapsed.count() << " seconds." << std::endl;
+ printf("[::] n_threads=%d e_type=%s n_elements=%d | throughput_streamp_MBps=%f throughput_streamp_MOpps=%f\n", numThreads, XSTR(DTYPE), timeSeriesLength, timeSeriesLength * sizeof(DTYPE) / (time_elapsed.count() * 1e6), timeSeriesLength / (time_elapsed.count() * 1e6));
// Save profile to file
//std::cout << "[>>] Saving Profile..." << std::endl;
@@ -383,6 +389,7 @@ int main(int argc, char* argv[]) // Calculate total time
time_elapsed = tend - tprogstart;
std::cout << "[OK] Total Time: " << std::setprecision(std::numeric_limits<DTYPE>::digits10 + 2) << time_elapsed.count() << " seconds." << std::endl;
+ printf("[::] n_threads=%d e_type=%s n_elements=%d | throughput_total_MBps=%f throughput_total_MOpps=%f\n", numThreads, XSTR(DTYPE), timeSeriesLength, timeSeriesLength * sizeof(DTYPE) / (time_elapsed.count() * 1e6), timeSeriesLength / (time_elapsed.count() * 1e6));
std::cout << std::endl;
delete profile;
|