diff options
-rw-r--r-- | src/experiment.cpp | 35 | ||||
-rw-r--r-- | src/experiment.h | 23 | ||||
-rw-r--r-- | src/output.cpp | 6 | ||||
-rw-r--r-- | src/run.cpp | 33 |
4 files changed, 77 insertions, 20 deletions
diff --git a/src/experiment.cpp b/src/experiment.cpp index 4bf4eff..9b6af7c 100644 --- a/src/experiment.cpp +++ b/src/experiment.cpp @@ -53,7 +53,7 @@ Experiment::Experiment() : seconds (DEFAULT_SECONDS), iterations (DEFAULT_ITERATIONS), experiments (DEFAULT_EXPERIMENTS), - prefetch (DEFAULT_PREFETCH), + prefetch_hint (NONE), output_mode (TABLE), access_pattern (RANDOM), stride (1), @@ -80,7 +80,7 @@ Experiment::~Experiment() { // -i or --iters iterations // -e or --experiments experiments // -g or --loop cycles to execute for each iteration (latency hiding) -// -f or --prefetch prefetch data +// -f or --prefetch use of prefetching // -a or --access memory access pattern // random random access pattern // forward <stride> exclusive OR and mask @@ -214,7 +214,25 @@ int Experiment::parse_args(int argc, char* argv[]) { } } else if (strcasecmp(argv[i], "-f") == 0 || strcasecmp(argv[i], "--prefetch") == 0) { - this->prefetch = true; + i++; + if (i == argc) { + error = 1; + break; + } + if (strcasecmp(argv[i], "none") == 0) { + this->prefetch_hint = Experiment::NONE; + } else if (strcasecmp(argv[i], "nta") == 0) { + this->prefetch_hint = Experiment::NTA; + } else if (strcasecmp(argv[i], "t0") == 0) { + this->prefetch_hint = Experiment::T0; + } else if (strcasecmp(argv[i], "t1") == 0) { + this->prefetch_hint = Experiment::T1; + } else if (strcasecmp(argv[i], "t2") == 0) { + this->prefetch_hint = Experiment::T2; + } else { + error = 1; + break; + } } else if (strcasecmp(argv[i], "-a") == 0 || strcasecmp(argv[i], "--access") == 0) { i++; @@ -346,7 +364,7 @@ int Experiment::parse_args(int argc, char* argv[]) { printf(" [-n|--numa] <placement> # numa placement\n"); printf(" [-s|--seconds] <number> # run each experiment for <number> seconds\n"); printf(" [-g|--loop] <number> # cycles to execute for each iteration (latency hiding)\n"); - printf(" [-f|--prefetch] # prefetch data\n"); + printf(" [-f|--prefetch] <hint> # use of prefetching\n"); printf(" [-x|--strict] # fail rather than adjust options to sensible values\n"); printf("\n"); printf("<pattern> is selected from the following:\n"); @@ -363,6 +381,13 @@ int Experiment::parse_args(int argc, char* argv[]) { printf(" both # header and results in csv format\n"); printf(" table # human-readable table of values\n"); printf("\n"); + printf("<hint> is selected from the following:\n"); + printf(" none # do not use prefetching\n"); + printf(" nta # use the NTA hint (non-temporal, only used once)\n"); + printf(" t0 # use the T0 hint (prefetch into all caches)\n"); + printf(" t1 # use the T1 hint (prefetch into all caches except L1)\n"); + printf(" t2 # use the T2 hint (prefetch into all caches except L1 & L2)\n"); + printf("\n"); printf("<placement> is selected from the following:\n"); printf(" local # all chains are allocated locally\n"); printf(" xor <mask> # exclusive OR and mask\n"); @@ -656,7 +681,7 @@ void Experiment::print() { printf("num_threads = %d\n", num_threads); printf("bytes_per_test = %d\n", bytes_per_test); printf("loop length = %d\n", loop_length); - printf("prefetch = %s\n", prefetch?"yes":"no"); + printf("prefetch hint = %s\n", prefetch_hint_string(prefetch_hint)); printf("iterations = %d\n", iterations); printf("experiments = %d\n", experiments); printf("access_pattern = %d\n", access_pattern); diff --git a/src/experiment.h b/src/experiment.h index 5ede451..539e96a 100644 --- a/src/experiment.h +++ b/src/experiment.h @@ -54,12 +54,14 @@ public: int64 num_threads; // number of threads in the experiment int64 bytes_per_test; // test working set size (bytes) int64 loop_length; // length of the inner loop (cycles) - bool prefetch; // use of prefetching float seconds; // number of seconds per experiment int64 iterations; // number of iterations per experiment int64 experiments; // number of experiments per test + enum { NONE, T0, T1, T2, NTA } + prefetch_hint; // use of prefetching + enum { CSV, BOTH, HEADER, TABLE } output_mode; // results output mode @@ -101,9 +103,6 @@ public: const static int32 DEFAULT_ITERATIONS = 0; const static int32 DEFAULT_EXPERIMENTS = 1; - const static int32 DEFAULT_OUTPUT_MODE = 1; - const static bool DEFAULT_PREFETCH = false; - void alloc_local(); void alloc_xor(); void alloc_add(); @@ -114,4 +113,20 @@ public: private: }; + +inline const char* prefetch_hint_string(int32 prefetch_hint) { + switch (prefetch_hint) { + case Experiment::NONE: + return "none"; + case Experiment::T0: + return "t0"; + case Experiment::T1: + return "t1"; + case Experiment::T2: + return "t2"; + case Experiment::NTA: + return "nta"; + } +} + #endif diff --git a/src/output.cpp b/src/output.cpp index 4efb415..ad4aa98 100644 --- a/src/output.cpp +++ b/src/output.cpp @@ -50,7 +50,7 @@ void Output::header(Experiment &e, int64 ops, double secs, double ck_res) { printf("number of threads,"); printf("iterations,"); printf("loop length,"); - printf("prefetch,"); + printf("prefetch hint,"); printf("experiments,"); printf("access pattern,"); printf("stride,"); @@ -80,7 +80,7 @@ void Output::csv(Experiment &e, int64 ops, double secs, double ck_res) { printf("%ld,", e.num_threads); printf("%ld,", e.iterations); printf("%ld,", e.loop_length); - printf("%s,", e.prefetch?"yes":"no"); + printf("%s,", prefetch_hint_string(e.prefetch_hint)); printf("%ld,", e.experiments); printf("%s,", e.access()); printf("%ld,", e.stride); @@ -123,7 +123,7 @@ void Output::table(Experiment &e, int64 ops, double secs, double ck_res) { printf("number of threads = %ld\n", e.num_threads); printf("iterations = %ld\n", e.iterations); printf("loop length = %ld\n", e.loop_length); - printf("prefetch = %s\n", e.prefetch?"yes":"no"); + printf("prefetch hint = %s\n", prefetch_hint_string(e.prefetch_hint)); printf("experiments = %ld\n", e.experiments); printf("access pattern = %s\n", e.access()); printf("stride = %ld\n", e.stride); diff --git a/src/run.cpp b/src/run.cpp index bc9a533..b990a4c 100644 --- a/src/run.cpp +++ b/src/run.cpp @@ -40,13 +40,13 @@ static double min(double v1, double v2); typedef void (*benchmark)(const Chain**); typedef benchmark (*generator)(int64 chains_per_thread, int64 bytes_per_line, int64 bytes_per_chain, - int64 stride, int64 loop_length, bool prefetch); + int64 stride, int64 loop_length, int32 prefetch_hint); static benchmark chase_pointers(int64 chains_per_thread, int64 bytes_per_line, int64 bytes_per_chain, - int64 stride, int64 loop_length, bool prefetch); + int64 stride, int64 loop_length, int32 prefetch_hint); static benchmark follow_streams(int64 chains_per_thread, int64 bytes_per_line, int64 bytes_per_chain, - int64 stride, int64 loop_length, bool prefetch); + int64 stride, int64 loop_length, int32 prefetch_hint); Lock Run::global_mutex; int64 Run::_ops_per_chain = 0; @@ -121,7 +121,7 @@ int Run::run() { benchmark bench = gen(this->exp->chains_per_thread, this->exp->bytes_per_line, this->exp->bytes_per_chain, this->exp->stride, this->exp->loop_length, - this->exp->prefetch); + this->exp->prefetch_hint); volatile static double istart = 0; volatile static double istop = 0; @@ -171,7 +171,7 @@ int Run::run() { benchmark bench = gen(this->exp->chains_per_thread, this->exp->bytes_per_line, this->exp->bytes_per_chain, this->exp->stride, this->exp->loop_length, - this->exp->prefetch); + this->exp->prefetch_hint); for (int e = 0; e < this->exp->experiments; e++) { // barrier @@ -374,7 +374,7 @@ static benchmark chase_pointers(int64 chains_per_thread, // memory loading per t int64 bytes_per_chain, // ignored int64 stride, // ignored int64 loop_length, // length of the inner loop - bool prefetch // prefetch + int32 prefetch_hint // use of prefetching ) { // Create Compiler. AsmJit::Compiler c; @@ -417,8 +417,25 @@ static benchmark chase_pointers(int64 chains_per_thread, // memory loading per t c.mov(positions[i], ptr(positions[i], offsetof(Chain, next))); // Prefetch next - if (prefetch) + switch (prefetch_hint) + { + case Experiment::T0: c.prefetch(ptr(positions[i]), AsmJit::PREFETCH_T0); + break; + case Experiment::T1: + c.prefetch(ptr(positions[i]), AsmJit::PREFETCH_T1); + break; + case Experiment::T2: + c.prefetch(ptr(positions[i]), AsmJit::PREFETCH_T2); + break; + case Experiment::NTA: + c.prefetch(ptr(positions[i]), AsmJit::PREFETCH_NTA); + break; + case Experiment::NONE: + default: + break; + + } } // Wait @@ -485,7 +502,7 @@ static benchmark follow_streams(int64 chains_per_thread, // memory loading per t int64 bytes_per_chain, // ignored int64 stride, // ignored int64 loop_length, // ignored - bool prefetch // ignored + int32 prefetch_hint // ignored ) { return 0; /* |