summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorTim Besard <tim.besard@gmail.com>2011-11-19 16:58:56 +0100
committerTim Besard <tim.besard@gmail.com>2011-11-19 16:58:56 +0100
commit393401b50f728ff5dc7a5d29c9c976eaff6a2a26 (patch)
treed8c3e71a84d88ed81030a44cf6a527a6589e2042
parent365dfd66a5e1fccf344d852b659e229c6c1e31c3 (diff)
Support for different prefetching hints.
-rw-r--r--src/experiment.cpp35
-rw-r--r--src/experiment.h23
-rw-r--r--src/output.cpp6
-rw-r--r--src/run.cpp33
4 files changed, 77 insertions, 20 deletions
diff --git a/src/experiment.cpp b/src/experiment.cpp
index 4bf4eff..9b6af7c 100644
--- a/src/experiment.cpp
+++ b/src/experiment.cpp
@@ -53,7 +53,7 @@ Experiment::Experiment() :
seconds (DEFAULT_SECONDS),
iterations (DEFAULT_ITERATIONS),
experiments (DEFAULT_EXPERIMENTS),
- prefetch (DEFAULT_PREFETCH),
+ prefetch_hint (NONE),
output_mode (TABLE),
access_pattern (RANDOM),
stride (1),
@@ -80,7 +80,7 @@ Experiment::~Experiment() {
// -i or --iters iterations
// -e or --experiments experiments
// -g or --loop cycles to execute for each iteration (latency hiding)
-// -f or --prefetch prefetch data
+// -f or --prefetch use of prefetching
// -a or --access memory access pattern
// random random access pattern
// forward <stride> exclusive OR and mask
@@ -214,7 +214,25 @@ int Experiment::parse_args(int argc, char* argv[]) {
}
} else if (strcasecmp(argv[i], "-f") == 0
|| strcasecmp(argv[i], "--prefetch") == 0) {
- this->prefetch = true;
+ i++;
+ if (i == argc) {
+ error = 1;
+ break;
+ }
+ if (strcasecmp(argv[i], "none") == 0) {
+ this->prefetch_hint = Experiment::NONE;
+ } else if (strcasecmp(argv[i], "nta") == 0) {
+ this->prefetch_hint = Experiment::NTA;
+ } else if (strcasecmp(argv[i], "t0") == 0) {
+ this->prefetch_hint = Experiment::T0;
+ } else if (strcasecmp(argv[i], "t1") == 0) {
+ this->prefetch_hint = Experiment::T1;
+ } else if (strcasecmp(argv[i], "t2") == 0) {
+ this->prefetch_hint = Experiment::T2;
+ } else {
+ error = 1;
+ break;
+ }
} else if (strcasecmp(argv[i], "-a") == 0
|| strcasecmp(argv[i], "--access") == 0) {
i++;
@@ -346,7 +364,7 @@ int Experiment::parse_args(int argc, char* argv[]) {
printf(" [-n|--numa] <placement> # numa placement\n");
printf(" [-s|--seconds] <number> # run each experiment for <number> seconds\n");
printf(" [-g|--loop] <number> # cycles to execute for each iteration (latency hiding)\n");
- printf(" [-f|--prefetch] # prefetch data\n");
+ printf(" [-f|--prefetch] <hint> # use of prefetching\n");
printf(" [-x|--strict] # fail rather than adjust options to sensible values\n");
printf("\n");
printf("<pattern> is selected from the following:\n");
@@ -363,6 +381,13 @@ int Experiment::parse_args(int argc, char* argv[]) {
printf(" both # header and results in csv format\n");
printf(" table # human-readable table of values\n");
printf("\n");
+ printf("<hint> is selected from the following:\n");
+ printf(" none # do not use prefetching\n");
+ printf(" nta # use the NTA hint (non-temporal, only used once)\n");
+ printf(" t0 # use the T0 hint (prefetch into all caches)\n");
+ printf(" t1 # use the T1 hint (prefetch into all caches except L1)\n");
+ printf(" t2 # use the T2 hint (prefetch into all caches except L1 & L2)\n");
+ printf("\n");
printf("<placement> is selected from the following:\n");
printf(" local # all chains are allocated locally\n");
printf(" xor <mask> # exclusive OR and mask\n");
@@ -656,7 +681,7 @@ void Experiment::print() {
printf("num_threads = %d\n", num_threads);
printf("bytes_per_test = %d\n", bytes_per_test);
printf("loop length = %d\n", loop_length);
- printf("prefetch = %s\n", prefetch?"yes":"no");
+ printf("prefetch hint = %s\n", prefetch_hint_string(prefetch_hint));
printf("iterations = %d\n", iterations);
printf("experiments = %d\n", experiments);
printf("access_pattern = %d\n", access_pattern);
diff --git a/src/experiment.h b/src/experiment.h
index 5ede451..539e96a 100644
--- a/src/experiment.h
+++ b/src/experiment.h
@@ -54,12 +54,14 @@ public:
int64 num_threads; // number of threads in the experiment
int64 bytes_per_test; // test working set size (bytes)
int64 loop_length; // length of the inner loop (cycles)
- bool prefetch; // use of prefetching
float seconds; // number of seconds per experiment
int64 iterations; // number of iterations per experiment
int64 experiments; // number of experiments per test
+ enum { NONE, T0, T1, T2, NTA }
+ prefetch_hint; // use of prefetching
+
enum { CSV, BOTH, HEADER, TABLE }
output_mode; // results output mode
@@ -101,9 +103,6 @@ public:
const static int32 DEFAULT_ITERATIONS = 0;
const static int32 DEFAULT_EXPERIMENTS = 1;
- const static int32 DEFAULT_OUTPUT_MODE = 1;
- const static bool DEFAULT_PREFETCH = false;
-
void alloc_local();
void alloc_xor();
void alloc_add();
@@ -114,4 +113,20 @@ public:
private:
};
+
+inline const char* prefetch_hint_string(int32 prefetch_hint) {
+ switch (prefetch_hint) {
+ case Experiment::NONE:
+ return "none";
+ case Experiment::T0:
+ return "t0";
+ case Experiment::T1:
+ return "t1";
+ case Experiment::T2:
+ return "t2";
+ case Experiment::NTA:
+ return "nta";
+ }
+}
+
#endif
diff --git a/src/output.cpp b/src/output.cpp
index 4efb415..ad4aa98 100644
--- a/src/output.cpp
+++ b/src/output.cpp
@@ -50,7 +50,7 @@ void Output::header(Experiment &e, int64 ops, double secs, double ck_res) {
printf("number of threads,");
printf("iterations,");
printf("loop length,");
- printf("prefetch,");
+ printf("prefetch hint,");
printf("experiments,");
printf("access pattern,");
printf("stride,");
@@ -80,7 +80,7 @@ void Output::csv(Experiment &e, int64 ops, double secs, double ck_res) {
printf("%ld,", e.num_threads);
printf("%ld,", e.iterations);
printf("%ld,", e.loop_length);
- printf("%s,", e.prefetch?"yes":"no");
+ printf("%s,", prefetch_hint_string(e.prefetch_hint));
printf("%ld,", e.experiments);
printf("%s,", e.access());
printf("%ld,", e.stride);
@@ -123,7 +123,7 @@ void Output::table(Experiment &e, int64 ops, double secs, double ck_res) {
printf("number of threads = %ld\n", e.num_threads);
printf("iterations = %ld\n", e.iterations);
printf("loop length = %ld\n", e.loop_length);
- printf("prefetch = %s\n", e.prefetch?"yes":"no");
+ printf("prefetch hint = %s\n", prefetch_hint_string(e.prefetch_hint));
printf("experiments = %ld\n", e.experiments);
printf("access pattern = %s\n", e.access());
printf("stride = %ld\n", e.stride);
diff --git a/src/run.cpp b/src/run.cpp
index bc9a533..b990a4c 100644
--- a/src/run.cpp
+++ b/src/run.cpp
@@ -40,13 +40,13 @@ static double min(double v1, double v2);
typedef void (*benchmark)(const Chain**);
typedef benchmark (*generator)(int64 chains_per_thread,
int64 bytes_per_line, int64 bytes_per_chain,
- int64 stride, int64 loop_length, bool prefetch);
+ int64 stride, int64 loop_length, int32 prefetch_hint);
static benchmark chase_pointers(int64 chains_per_thread,
int64 bytes_per_line, int64 bytes_per_chain,
- int64 stride, int64 loop_length, bool prefetch);
+ int64 stride, int64 loop_length, int32 prefetch_hint);
static benchmark follow_streams(int64 chains_per_thread,
int64 bytes_per_line, int64 bytes_per_chain,
- int64 stride, int64 loop_length, bool prefetch);
+ int64 stride, int64 loop_length, int32 prefetch_hint);
Lock Run::global_mutex;
int64 Run::_ops_per_chain = 0;
@@ -121,7 +121,7 @@ int Run::run() {
benchmark bench = gen(this->exp->chains_per_thread,
this->exp->bytes_per_line, this->exp->bytes_per_chain,
this->exp->stride, this->exp->loop_length,
- this->exp->prefetch);
+ this->exp->prefetch_hint);
volatile static double istart = 0;
volatile static double istop = 0;
@@ -171,7 +171,7 @@ int Run::run() {
benchmark bench = gen(this->exp->chains_per_thread,
this->exp->bytes_per_line, this->exp->bytes_per_chain,
this->exp->stride, this->exp->loop_length,
- this->exp->prefetch);
+ this->exp->prefetch_hint);
for (int e = 0; e < this->exp->experiments; e++) {
// barrier
@@ -374,7 +374,7 @@ static benchmark chase_pointers(int64 chains_per_thread, // memory loading per t
int64 bytes_per_chain, // ignored
int64 stride, // ignored
int64 loop_length, // length of the inner loop
- bool prefetch // prefetch
+ int32 prefetch_hint // use of prefetching
) {
// Create Compiler.
AsmJit::Compiler c;
@@ -417,8 +417,25 @@ static benchmark chase_pointers(int64 chains_per_thread, // memory loading per t
c.mov(positions[i], ptr(positions[i], offsetof(Chain, next)));
// Prefetch next
- if (prefetch)
+ switch (prefetch_hint)
+ {
+ case Experiment::T0:
c.prefetch(ptr(positions[i]), AsmJit::PREFETCH_T0);
+ break;
+ case Experiment::T1:
+ c.prefetch(ptr(positions[i]), AsmJit::PREFETCH_T1);
+ break;
+ case Experiment::T2:
+ c.prefetch(ptr(positions[i]), AsmJit::PREFETCH_T2);
+ break;
+ case Experiment::NTA:
+ c.prefetch(ptr(positions[i]), AsmJit::PREFETCH_NTA);
+ break;
+ case Experiment::NONE:
+ default:
+ break;
+
+ }
}
// Wait
@@ -485,7 +502,7 @@ static benchmark follow_streams(int64 chains_per_thread, // memory loading per t
int64 bytes_per_chain, // ignored
int64 stride, // ignored
int64 loop_length, // ignored
- bool prefetch // ignored
+ int32 prefetch_hint // ignored
) {
return 0;
/*