summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorTim Besard <tim.besard@gmail.com>2011-11-02 09:13:38 +0100
committerTim Besard <tim.besard@gmail.com>2011-11-02 09:13:38 +0100
commitc108197c20aa7b93849e383a3aaaf7b2bba30405 (patch)
treef05aa2ff019892dfc936ad2242d43373191f16cd /src
parenteb6995fb5a0f4382cb4a01d301423e74ea8babe6 (diff)
Formatting the source.
Diffstat (limited to 'src')
-rw-r--r--src/Chain.cpp23
-rw-r--r--src/Chain.h11
-rw-r--r--src/Experiment.cpp1068
-rw-r--r--src/Experiment.h37
-rw-r--r--src/Lock.cpp31
-rw-r--r--src/Lock.h13
-rw-r--r--src/Main.c114
-rw-r--r--src/Main.cpp107
-rw-r--r--src/Output.cpp74
-rw-r--r--src/Output.h9
-rw-r--r--src/Run.cpp2259
-rw-r--r--src/Run.h44
-rw-r--r--src/SpinBarrier.cpp25
-rw-r--r--src/SpinBarrier.h11
-rw-r--r--src/Thread.cpp64
-rw-r--r--src/Thread.h43
-rw-r--r--src/Timer.cpp192
-rw-r--r--src/Timer.h11
-rw-r--r--src/Types.cpp1
-rw-r--r--src/Types.h3
20 files changed, 2085 insertions, 2055 deletions
diff --git a/src/Chain.cpp b/src/Chain.cpp
index 1eda774..ceb1b31 100644
--- a/src/Chain.cpp
+++ b/src/Chain.cpp
@@ -7,29 +7,24 @@
* *
* Contributors: *
* Douglas M. Pase - initial API and implementation *
- *******************************************************************************/
-
+ *******************************************************************************/
#include <stdio.h>
#include "Chain.h"
-Chain::Chain()
-: next(END_OF_CHAIN)
-{
+Chain::Chain() :
+ next(END_OF_CHAIN) {
}
-Chain::Chain(Chain *end)
-: next(end)
-{
+Chain::Chain(Chain *end) :
+ next(end) {
}
-Chain::~Chain()
-{
+Chain::~Chain() {
}
-Chain* Chain::END()
-{
- static Chain chain((Chain*) 0xDEADBEEF);
- return &chain;
+Chain* Chain::END() {
+ static Chain chain((Chain*) 0xDEADBEEF);
+ return &chain;
}
diff --git a/src/Chain.h b/src/Chain.h
index 5a55865..8413a43 100644
--- a/src/Chain.h
+++ b/src/Chain.h
@@ -9,18 +9,17 @@
* Douglas M. Pase - initial API and implementation *
*******************************************************************************/
-
#if !defined(Chain_h)
#define Chain_h
class Chain {
public:
- Chain();
- Chain(Chain* end);
- ~Chain();
- Chain* next;
+ Chain();
+ Chain(Chain* end);
+ ~Chain();
+ Chain* next;
- static Chain* END();
+ static Chain* END();
private:
};
diff --git a/src/Experiment.cpp b/src/Experiment.cpp
index 27e1a25..e58be0a 100644
--- a/src/Experiment.cpp
+++ b/src/Experiment.cpp
@@ -35,7 +35,7 @@ Experiment::Experiment() :
pages_per_chain (DEFAULT_PAGES_PER_CHAIN),
chains_per_thread(DEFAULT_CHAINS_PER_THREAD),
bytes_per_thread (DEFAULT_BYTES_PER_THREAD),
- num_threads (DEFAULT_THREADS),
+ num_threads (DEFAULT_THREADS),
bytes_per_test (DEFAULT_BYTES_PER_TEST),
busy_cycles (DEFAULT_BUSY_CYCLES),
seconds (DEFAULT_SECONDS),
@@ -55,233 +55,334 @@ Experiment::Experiment() :
{
}
-Experiment::~Experiment()
-{
+Experiment::~Experiment() {
}
- // interface:
- //
- // -l or --line bytes per cache line (line size)
- // -p or --page bytes per page (page size)
- // -c or --chain bytes per chain (used to compute pages per chain)
- // -r or --references chains per thread (memory loading)
- // -t or --threads number of threads (concurrency and contention)
- // -i or --iters iterations
- // -e or --experiments experiments
- // -b or --busy amount of cycles processor should remain busy
- // -f or --prefetch prefetch data
- // -a or --access memory access pattern
- // random random access pattern
- // forward <stride> exclusive OR and mask
- // reverse <stride> addition and offset
- // -o or --output output mode
- // hdr header only
- // csv csv only
- // both header + csv
- // table human-readable table of values
- // -n or --numa numa placement
- // local local allocation of all chains
- // xor <mask> exclusive OR and mask
- // add <offset> addition and offset
- // map <map> explicit mapping of threads and chains to domains
-
-int
-Experiment::parse_args(int argc, char* argv[])
-{
- int error = 0;
- for (int i=1; i < argc; i++) {
- if (strcasecmp(argv[i], "-x") == 0 || strcasecmp(argv[i], "--strict") == 0) {
- this->strict = 1;
- } else if (strcasecmp(argv[i], "-s") == 0 || strcasecmp(argv[i], "--seconds") == 0) {
- i++;
- if (i == argc) { error = 1; break; }
- this->seconds = Experiment::parse_real(argv[i]);
- this->iterations = 0;
- if (this->seconds == 0) { error = 1; break; }
- } else if (strcasecmp(argv[i], "-l") == 0 || strcasecmp(argv[i], "--line") == 0) {
- i++;
- if (i == argc) { error = 1; break; }
- this->bytes_per_line = Experiment::parse_number(argv[i]);
- if (this->bytes_per_line == 0) { error = 1; break; }
- } else if (strcasecmp(argv[i], "-p") == 0 || strcasecmp(argv[i], "--page") == 0) {
- i++;
- if (i == argc) { error = 1; break; }
- this->bytes_per_page = Experiment::parse_number(argv[i]);
- if (this->bytes_per_page == 0) { error = 1; break; }
- } else if (strcasecmp(argv[i], "-c") == 0 || strcasecmp(argv[i], "--chain") == 0) {
- i++;
- if (i == argc) { error = 1; break; }
- this->bytes_per_chain = Experiment::parse_number(argv[i]);
- if (this->bytes_per_chain == 0) { error = 1; break; }
- } else if (strcasecmp(argv[i], "-r") == 0 || strcasecmp(argv[i], "--references") == 0) {
- i++;
- if (i == argc) { error = 1; break; }
- this->chains_per_thread = Experiment::parse_number(argv[i]);
- if (this->chains_per_thread == 0) { error = 1; break; }
- } else if (strcasecmp(argv[i], "-t") == 0 || strcasecmp(argv[i], "--threads") == 0) {
- i++;
- if (i == argc) { error = 1; break; }
- this->num_threads = Experiment::parse_number(argv[i]);
- if (this->num_threads == 0) { error = 1; break; }
- } else if (strcasecmp(argv[i], "-i") == 0 || strcasecmp(argv[i], "--iterations") == 0) {
- i++;
- if (i == argc) { error = 1; break; }
- this->iterations = Experiment::parse_number(argv[i]);
- this->seconds = 0;
- if (this->iterations == 0) { error = 1; break; }
- } else if (strcasecmp(argv[i], "-e") == 0 || strcasecmp(argv[i], "--experiments") == 0) {
- i++;
- if (i == argc) { error = 1; break; }
- this->experiments = Experiment::parse_number(argv[i]);
- if (this->experiments == 0) { error = 1; break; }
- } else if (strcasecmp(argv[i], "-b") == 0 || strcasecmp(argv[i], "--busy") == 0) {
- i++;
- if (i == argc) { error = 1; break; }
- this->busy_cycles = Experiment::parse_number(argv[i]);
- if (this->experiments == 0) { error = 1; break; }
- } else if (strcasecmp(argv[i], "-f") == 0 || strcasecmp(argv[i], "--prefetch") == 0) {
- this->prefetch = true;
- } else if (strcasecmp(argv[i], "-a") == 0 || strcasecmp(argv[i], "--access") == 0) {
- i++;
- if (i == argc) { error = 1; break; }
- if (strcasecmp(argv[i], "random") == 0) {
- this->access_pattern = RANDOM;
- } else if (strcasecmp(argv[i], "forward") == 0) {
- this->access_pattern = STRIDED;
- i++;
- if (i == argc) { error = 1; break; }
- this->stride = Experiment::parse_number(argv[i]);
- if (this->stride == 0) { error = 1; break; }
- } else if (strcasecmp(argv[i], "reverse") == 0) {
- this->access_pattern = STRIDED;
- i++;
- if (i == argc) { error = 1; break; }
- this->stride = - Experiment::parse_number(argv[i]);
- if (this->stride == 0) { error = 1; break; }
- } else if (strcasecmp(argv[i], "stream") == 0) {
- this->access_pattern = STREAM;
- i++;
- if (i == argc) { error = 1; break; }
- this->stride = Experiment::parse_number(argv[i]);
- if (this->stride == 0) { error = 1; break; }
- } else {
- error = 1;
- break;
- }
- } else if (strcasecmp(argv[i], "-o") == 0 || strcasecmp(argv[i], "--output") == 0) {
- i++;
- if (i == argc) { error = 1; break; }
- if (strcasecmp(argv[i], "table") == 0) {
- this->output_mode = TABLE;
- } else if (strcasecmp(argv[i], "csv") == 0) {
- this->output_mode = CSV;
- } else if (strcasecmp(argv[i], "both") == 0) {
- this->output_mode = BOTH;
- } else if (strcasecmp(argv[i], "hdr") == 0) {
- this->output_mode = HEADER;
- } else if (strcasecmp(argv[i], "header") == 0) {
- this->output_mode = HEADER;
- } else {
- error = 1;
- break;
- }
- } else if (strcasecmp(argv[i], "-n") == 0 || strcasecmp(argv[i], "--numa") == 0) {
- i++;
- if (i == argc) { error = 1; break; }
- if (strcasecmp(argv[i], "local") == 0) {
- this->numa_placement = LOCAL;
- } else if (strcasecmp(argv[i], "xor") == 0) {
- this->numa_placement = XOR;
- i++;
- if (i == argc) { error = 1; break; }
- this->offset_or_mask = Experiment::parse_number(argv[i]);
- } else if (strcasecmp(argv[i], "add") == 0) {
- this->numa_placement = ADD;
- i++;
- if (i == argc) { error = 1; break; }
- this->offset_or_mask = Experiment::parse_number(argv[i]);
- } else if (strcasecmp(argv[i], "map") == 0) {
- this->numa_placement = MAP;
- i++;
- if (i == argc) { error = 1; break; }
- this->placement_map = argv[i];
- } else {
- error = 1;
- break;
- }
- } else {
- error = 1;
- break;
+// interface:
+//
+// -l or --line bytes per cache line (line size)
+// -p or --page bytes per page (page size)
+// -c or --chain bytes per chain (used to compute pages per chain)
+// -r or --references chains per thread (memory loading)
+// -t or --threads number of threads (concurrency and contention)
+// -i or --iters iterations
+// -e or --experiments experiments
+// -b or --busy amount of cycles processor should remain busy
+// -f or --prefetch prefetch data
+// -a or --access memory access pattern
+// random random access pattern
+// forward <stride> exclusive OR and mask
+// reverse <stride> addition and offset
+// -o or --output output mode
+// hdr header only
+// csv csv only
+// both header + csv
+// table human-readable table of values
+// -n or --numa numa placement
+// local local allocation of all chains
+// xor <mask> exclusive OR and mask
+// add <offset> addition and offset
+// map <map> explicit mapping of threads and chains to domains
+
+int Experiment::parse_args(int argc, char* argv[]) {
+ int error = 0;
+ for (int i = 1; i < argc; i++) {
+ if (strcasecmp(argv[i], "-x") == 0
+ || strcasecmp(argv[i], "--strict") == 0) {
+ this->strict = 1;
+ } else if (strcasecmp(argv[i], "-s") == 0
+ || strcasecmp(argv[i], "--seconds") == 0) {
+ i++;
+ if (i == argc) {
+ error = 1;
+ break;
+ }
+ this->seconds = Experiment::parse_real(argv[i]);
+ this->iterations = 0;
+ if (this->seconds == 0) {
+ error = 1;
+ break;
+ }
+ } else if (strcasecmp(argv[i], "-l") == 0
+ || strcasecmp(argv[i], "--line") == 0) {
+ i++;
+ if (i == argc) {
+ error = 1;
+ break;
+ }
+ this->bytes_per_line = Experiment::parse_number(argv[i]);
+ if (this->bytes_per_line == 0) {
+ error = 1;
+ break;
+ }
+ } else if (strcasecmp(argv[i], "-p") == 0
+ || strcasecmp(argv[i], "--page") == 0) {
+ i++;
+ if (i == argc) {
+ error = 1;
+ break;
+ }
+ this->bytes_per_page = Experiment::parse_number(argv[i]);
+ if (this->bytes_per_page == 0) {
+ error = 1;
+ break;
+ }
+ } else if (strcasecmp(argv[i], "-c") == 0
+ || strcasecmp(argv[i], "--chain") == 0) {
+ i++;
+ if (i == argc) {
+ error = 1;
+ break;
+ }
+ this->bytes_per_chain = Experiment::parse_number(argv[i]);
+ if (this->bytes_per_chain == 0) {
+ error = 1;
+ break;
+ }
+ } else if (strcasecmp(argv[i], "-r") == 0
+ || strcasecmp(argv[i], "--references") == 0) {
+ i++;
+ if (i == argc) {
+ error = 1;
+ break;
+ }
+ this->chains_per_thread = Experiment::parse_number(argv[i]);
+ if (this->chains_per_thread == 0) {
+ error = 1;
+ break;
+ }
+ } else if (strcasecmp(argv[i], "-t") == 0
+ || strcasecmp(argv[i], "--threads") == 0) {
+ i++;
+ if (i == argc) {
+ error = 1;
+ break;
+ }
+ this->num_threads = Experiment::parse_number(argv[i]);
+ if (this->num_threads == 0) {
+ error = 1;
+ break;
+ }
+ } else if (strcasecmp(argv[i], "-i") == 0
+ || strcasecmp(argv[i], "--iterations") == 0) {
+ i++;
+ if (i == argc) {
+ error = 1;
+ break;
+ }
+ this->iterations = Experiment::parse_number(argv[i]);
+ this->seconds = 0;
+ if (this->iterations == 0) {
+ error = 1;
+ break;
+ }
+ } else if (strcasecmp(argv[i], "-e") == 0
+ || strcasecmp(argv[i], "--experiments") == 0) {
+ i++;
+ if (i == argc) {
+ error = 1;
+ break;
+ }
+ this->experiments = Experiment::parse_number(argv[i]);
+ if (this->experiments == 0) {
+ error = 1;
+ break;
+ }
+ } else if (strcasecmp(argv[i], "-b") == 0
+ || strcasecmp(argv[i], "--busy") == 0) {
+ i++;
+ if (i == argc) {
+ error = 1;
+ break;
+ }
+ this->busy_cycles = Experiment::parse_number(argv[i]);
+ if (this->experiments == 0) {
+ error = 1;
+ break;
+ }
+ } else if (strcasecmp(argv[i], "-f") == 0
+ || strcasecmp(argv[i], "--prefetch") == 0) {
+ this->prefetch = true;
+ } else if (strcasecmp(argv[i], "-a") == 0
+ || strcasecmp(argv[i], "--access") == 0) {
+ i++;
+ if (i == argc) {
+ error = 1;
+ break;
+ }
+ if (strcasecmp(argv[i], "random") == 0) {
+ this->access_pattern = RANDOM;
+ } else if (strcasecmp(argv[i], "forward") == 0) {
+ this->access_pattern = STRIDED;
+ i++;
+ if (i == argc) {
+ error = 1;
+ break;
+ }
+ this->stride = Experiment::parse_number(argv[i]);
+ if (this->stride == 0) {
+ error = 1;
+ break;
+ }
+ } else if (strcasecmp(argv[i], "reverse") == 0) {
+ this->access_pattern = STRIDED;
+ i++;
+ if (i == argc) {
+ error = 1;
+ break;
+ }
+ this->stride = -Experiment::parse_number(argv[i]);
+ if (this->stride == 0) {
+ error = 1;
+ break;
+ }
+ } else if (strcasecmp(argv[i], "stream") == 0) {
+ this->access_pattern = STREAM;
+ i++;
+ if (i == argc) {
+ error = 1;
+ break;
+ }
+ this->stride = Experiment::parse_number(argv[i]);
+ if (this->stride == 0) {
+ error = 1;
+ break;
+ }
+ } else {
+ error = 1;
+ break;
+ }
+ } else if (strcasecmp(argv[i], "-o") == 0
+ || strcasecmp(argv[i], "--output") == 0) {
+ i++;
+ if (i == argc) {
+ error = 1;
+ break;
+ }
+ if (strcasecmp(argv[i], "table") == 0) {
+ this->output_mode = TABLE;
+ } else if (strcasecmp(argv[i], "csv") == 0) {
+ this->output_mode = CSV;
+ } else if (strcasecmp(argv[i], "both") == 0) {
+ this->output_mode = BOTH;
+ } else if (strcasecmp(argv[i], "hdr") == 0) {
+ this->output_mode = HEADER;
+ } else if (strcasecmp(argv[i], "header") == 0) {
+ this->output_mode = HEADER;
+ } else {
+ error = 1;
+ break;
+ }
+ } else if (strcasecmp(argv[i], "-n") == 0
+ || strcasecmp(argv[i], "--numa") == 0) {
+ i++;
+ if (i == argc) {
+ error = 1;
+ break;
+ }
+ if (strcasecmp(argv[i], "local") == 0) {
+ this->numa_placement = LOCAL;
+ } else if (strcasecmp(argv[i], "xor") == 0) {
+ this->numa_placement = XOR;
+ i++;
+ if (i == argc) {
+ error = 1;
+ break;
+ }
+ this->offset_or_mask = Experiment::parse_number(argv[i]);
+ } else if (strcasecmp(argv[i], "add") == 0) {
+ this->numa_placement = ADD;
+ i++;
+ if (i == argc) {
+ error = 1;
+ break;
+ }
+ this->offset_or_mask = Experiment::parse_number(argv[i]);
+ } else if (strcasecmp(argv[i], "map") == 0) {
+ this->numa_placement = MAP;
+ i++;
+ if (i == argc) {
+ error = 1;
+ break;
+ }
+ this->placement_map = argv[i];
+ } else {
+ error = 1;
+ break;
+ }
+ } else {
+ error = 1;
+ break;
+ }
+ }
+
+
+ // if we've hit an error, print a message and quit
+ if (error) {
+ printf("usage: %s <options>\n", argv[0]);
+ printf("where <options> are selected from the following:\n");
+ printf(" [-h|--help] # this message\n");
+ printf(" [-l|--line] <number> # bytes per cache line (cache line size)\n");
+ printf(" [-p|--page] <number> # bytes per page (page size)\n");
+ printf(" [-c|--chain] <number> # bytes per chain (used to compute pages per chain)\n");
+ printf(" [-r|--references] <number> # chains per thread (memory loading)\n");
+ printf(" [-t|--threads] <number> # number of threads (concurrency and contention)\n");
+ printf(" [-i|--iterations] <number> # iterations per experiment\n");
+ printf(" [-e|--experiments] <number> # experiments\n");
+ printf(" [-a|--access] <pattern> # memory access pattern\n");
+ printf(" [-o|--output] <format> # output format\n");
+ printf(" [-n|--numa] <placement> # numa placement\n");
+ printf(" [-s|--seconds] <number> # run each experiment for <number> seconds\n");
+ printf(" [-b|--busy] <number> # how much processing cycles each loop should count\n");
+ printf(" [-f|--prefetch] # prefetch data\n");
+ printf(" [-x|--strict] # fail rather than adjust options to sensible values\n");
+ printf("\n");
+ printf("<pattern> is selected from the following:\n");
+ printf(" random # all chains are accessed randomly\n");
+ printf(" forward <stride> # chains are in forward order with constant stride\n");
+ printf(" reverse <stride> # chains are in reverse order with constant stride\n");
+ printf(" stream <stride> # references are calculated rather than read from memory\n");
+ printf("\n");
+ printf("Note: <stride> is always a small positive integer.\n");
+ printf("\n");
+ printf("<format> is selected from the following:\n");
+ printf(" hdr # csv header only\n");
+ printf(" csv # results in csv format only\n");
+ printf(" both # header and results in csv format\n");
+ printf(" table # human-readable table of values\n");
+ printf("\n");
+ printf("<placement> is selected from the following:\n");
+ printf(" local # all chains are allocated locally\n");
+ printf(" xor <mask> # exclusive OR and mask\n");
+ printf(" add <offset> # addition and offset\n");
+ printf(" map <map> # explicit mapping of threads and chains to domains\n");
+ printf("\n");
+ printf("<map> has the form \"t1:c11,c12,...,c1m;t2:c21,...,c2m;...;tn:cn1,...,cnm\"\n");
+ printf("where t[i] is the NUMA domain where the ith thread is run,\n");
+ printf("and c[i][j] is the NUMA domain where the jth chain in the ith thread is allocated.\n");
+ printf("(The values t[i] and c[i][j] must all be zero or small positive integers.)\n");
+ printf("\n");
+ printf("Note: for maps, each thread must have the same number of chains,\n");
+ printf("maps override the -t or --threads specification,\n");
+ printf("NUMA domains are whole numbers in the range of 0..N, and\n");
+ printf("thread or chain domains that exceed the maximum NUMA domain\n");
+ printf("are wrapped around using a MOD function.\n");
+ printf("\n");
+ printf("To determine the number of NUMA domains currently available\n");
+ printf("on your system, use a command such as \"numastat\".\n");
+ printf("\n");
+ printf("Final note: strict is not yet fully implemented, and\n");
+ printf("maps do not gracefully handle ill-formed map specifications.\n");
+
+ return 1;
}
- }
-
-
- // if we've hit an error, print a message and quit
- if (error) {
- printf("usage: %s <options>\n", argv[0]);
- printf("where <options> are selected from the following:\n");
- printf(" [-h|--help] # this message\n");
- printf(" [-l|--line] <number> # bytes per cache line (cache line size)\n");
- printf(" [-p|--page] <number> # bytes per page (page size)\n");
- printf(" [-c|--chain] <number> # bytes per chain (used to compute pages per chain)\n");
- printf(" [-r|--references] <number> # chains per thread (memory loading)\n");
- printf(" [-t|--threads] <number> # number of threads (concurrency and contention)\n");
- printf(" [-i|--iterations] <number> # iterations per experiment\n");
- printf(" [-e|--experiments] <number> # experiments\n");
- printf(" [-a|--access] <pattern> # memory access pattern\n");
- printf(" [-o|--output] <format> # output format\n");
- printf(" [-n|--numa] <placement> # numa placement\n");
- printf(" [-s|--seconds] <number> # run each experiment for <number> seconds\n");
- printf(" [-b|--busy] <number> # how much processing cycles each loop should count\n");
- printf(" [-f|--prefetch] # prefetch data\n");
- printf(" [-x|--strict] # fail rather than adjust options to sensible values\n");
- printf("\n");
- printf("<pattern> is selected from the following:\n");
- printf(" random # all chains are accessed randomly\n");
- printf(" forward <stride> # chains are in forward order with constant stride\n");
- printf(" reverse <stride> # chains are in reverse order with constant stride\n");
- printf(" stream <stride> # references are calculated rather than read from memory\n");
- printf("\n");
- printf("Note: <stride> is always a small positive integer.\n");
- printf("\n");
- printf("<format> is selected from the following:\n");
- printf(" hdr # csv header only\n");
- printf(" csv # results in csv format only\n");
- printf(" both # header and results in csv format\n");
- printf(" table # human-readable table of values\n");
- printf("\n");
- printf("<placement> is selected from the following:\n");
- printf(" local # all chains are allocated locally\n");
- printf(" xor <mask> # exclusive OR and mask\n");
- printf(" add <offset> # addition and offset\n");
- printf(" map <map> # explicit mapping of threads and chains to domains\n");
- printf("\n");
- printf("<map> has the form \"t1:c11,c12,...,c1m;t2:c21,...,c2m;...;tn:cn1,...,cnm\"\n");
- printf("where t[i] is the NUMA domain where the ith thread is run,\n");
- printf("and c[i][j] is the NUMA domain where the jth chain in the ith thread is allocated.\n");
- printf("(The values t[i] and c[i][j] must all be zero or small positive integers.)\n");
- printf("\n");
- printf("Note: for maps, each thread must have the same number of chains,\n");
- printf("maps override the -t or --threads specification,\n");
- printf("NUMA domains are whole numbers in the range of 0..N, and\n");
- printf("thread or chain domains that exceed the maximum NUMA domain\n");
- printf("are wrapped around using a MOD function.\n");
- printf("\n");
- printf("To determine the number of NUMA domains currently available\n");
- printf("on your system, use a command such as \"numastat\".\n");
- printf("\n");
- printf("Final note: strict is not yet fully implemented, and\n");
- printf("maps do not gracefully handle ill-formed map specifications.\n");
-
- return 1;
- }
-
-
- // STRICT -- fail if specifications are inconsistent
-
- // compute lines per page and lines per chain
- // based on input and defaults.
- // we round up page and chain sizes when needed.
+
+
+ // STRICT -- fail if specifications are inconsistent
+
+ // compute lines per page and lines per chain
+ // based on input and defaults.
+ // we round up page and chain sizes when needed.
this->lines_per_page = (this->bytes_per_page+this->bytes_per_line-1) / this->bytes_per_line;
this->bytes_per_page = this->bytes_per_line * this->lines_per_page;
this->pages_per_chain = (this->bytes_per_chain+this->bytes_per_page-1) / this->bytes_per_page;
@@ -294,314 +395,307 @@ Experiment::parse_args(int argc, char* argv[])
this->links_per_chain = this->lines_per_chain * this->links_per_line;
- // allocate the chain roots for all threads
- // and compute the chain locations
- // (the chains themselves are initialized by the threads)
- switch (this->numa_placement) {
- case LOCAL :
- case XOR :
- case ADD :
- this->thread_domain = new int32 [ this->num_threads ];
- this->chain_domain = new int32*[ this->num_threads ];
- this->random_state = new char* [ this->num_threads ];
-
- for (int i=0; i < this->num_threads; i++) {
- this->chain_domain[i] = new int32 [ this->chains_per_thread ];
-
- const int state_size = 256;
- this->random_state[i] = new char[state_size];
- initstate((unsigned int) i, (char *) this->random_state[i], (size_t) state_size);
+ // allocate the chain roots for all threads
+ // and compute the chain locations
+ // (the chains themselves are initialized by the threads)
+ switch (this->numa_placement) {
+ case LOCAL:
+ case XOR:
+ case ADD:
+ this->thread_domain = new int32[this->num_threads];
+ this->chain_domain = new int32*[this->num_threads];
+ this->random_state = new char*[this->num_threads];
+
+ for (int i = 0; i < this->num_threads; i++) {
+ this->chain_domain[i] = new int32[this->chains_per_thread];
+
+ const int state_size = 256;
+ this->random_state[i] = new char[state_size];
+ initstate((unsigned int) i, (char *) this->random_state[i],
+ (size_t) state_size);
+ }
+ break;
}
- break;
- }
-
#if defined(NUMA)
- this->numa_max_domain = numa_max_node();
- this->num_numa_domains = this->numa_max_domain + 1;
+ this->numa_max_domain = numa_max_node();
+ this->num_numa_domains = this->numa_max_domain + 1;
#endif
+ switch (this->numa_placement) {
+ case LOCAL:
+ default:
+ this->alloc_local();
+ break;
+ case XOR:
+ this->alloc_xor();
+ break;
+ case ADD:
+ this->alloc_add();
+ break;
+ case MAP:
+ this->alloc_map();
+ break;
+ }
- switch (this->numa_placement) {
- case LOCAL :
- default:
- this->alloc_local();
- break;
- case XOR :
- this->alloc_xor();
- break;
- case ADD :
- this->alloc_add();
- break;
- case MAP :
- this->alloc_map();
- break;
- }
-
- return 0;
+ return 0;
}
-
-int64
-Experiment::parse_number( const char* s )
-{
- int64 result = 0;
-
- int len = strlen( s );
- for (int i=0; i < len; i++) {
- if ( '0' <= s[i] && s[i] <= '9' ) {
- result = result * 10 + s[i] - '0';
- } else if (s[i] == 'k' || s[i] == 'K') {
- result = result << 10;
- break;
- } else if (s[i] == 'm' || s[i] == 'M') {
- result = result << 20;
- break;
- } else if (s[i] == 'g' || s[i] == 'G') {
- result = result << 30;
- break;
- } else if (s[i] == 't' || s[i] == 'T') {
- result = result << 40;
- break;
- } else {
- break;
+int64 Experiment::parse_number(const char* s) {
+ int64 result = 0;
+
+ int len = strlen(s);
+ for (int i = 0; i < len; i++) {
+ if ('0' <= s[i] && s[i] <= '9') {
+ result = result * 10 + s[i] - '0';
+ } else if (s[i] == 'k' || s[i] == 'K') {
+ result = result << 10;
+ break;
+ } else if (s[i] == 'm' || s[i] == 'M') {
+ result = result << 20;
+ break;
+ } else if (s[i] == 'g' || s[i] == 'G') {
+ result = result << 30;
+ break;
+ } else if (s[i] == 't' || s[i] == 'T') {
+ result = result << 40;
+ break;
+ } else {
+ break;
+ }
}
- }
- return result;
+ return result;
}
-
-float
-Experiment::parse_real( const char* s )
-{
- float result = 0;
- bool decimal = false;
- float power = 1;
-
- int len = strlen( s );
- for (int i=0; i < len; i++) {
- if ( '0' <= s[i] && s[i] <= '9' ) {
- if (! decimal) {
- result = result * 10 + s[i] - '0';
- } else {
- power = power / 10;
- result = result + (s[i] - '0') * power;
- }
- } else if ( '.' == s[i] ) {
- decimal = true;
- } else {
- break;
+float Experiment::parse_real(const char* s) {
+ float result = 0;
+ bool decimal = false;
+ float power = 1;
+
+ int len = strlen(s);
+ for (int i = 0; i < len; i++) {
+ if ('0' <= s[i] && s[i] <= '9') {
+ if (!decimal) {
+ result = result * 10 + s[i] - '0';
+ } else {
+ power = power / 10;
+ result = result + (s[i] - '0') * power;
+ }
+ } else if ('.' == s[i]) {
+ decimal = true;
+ } else {
+ break;
+ }
}
- }
- return result;
+ return result;
}
-void
-Experiment::alloc_local()
-{
- for (int i=0; i < this->num_threads; i++) {
- this->thread_domain[i] = i % this->num_numa_domains;
- for (int j=0; j < this->chains_per_thread; j++) {
- this->chain_domain[i][j] = this->thread_domain[i];
+void Experiment::alloc_local() {
+ for (int i = 0; i < this->num_threads; i++) {
+ this->thread_domain[i] = i % this->num_numa_domains;
+ for (int j = 0; j < this->chains_per_thread; j++) {
+ this->chain_domain[i][j] = this->thread_domain[i];
+ }
}
- }
}
-void
-Experiment::alloc_xor()
-{
- for (int i=0; i < this->num_threads; i++) {
- this->thread_domain[i] = i % this->num_numa_domains;
- for (int j=0; j < this->chains_per_thread; j++) {
- this->chain_domain[i][j] = (this->thread_domain[i] ^ this->offset_or_mask) % this->num_numa_domains;
+void Experiment::alloc_xor() {
+ for (int i = 0; i < this->num_threads; i++) {
+ this->thread_domain[i] = i % this->num_numa_domains;
+ for (int j = 0; j < this->chains_per_thread; j++) {
+ this->chain_domain[i][j] = (this->thread_domain[i]
+ ^ this->offset_or_mask) % this->num_numa_domains;
+ }
}
- }
}
-void
-Experiment::alloc_add()
-{
- for (int i=0; i < this->num_threads; i++) {
- this->thread_domain[i] = i % this->num_numa_domains;
- for (int j=0; j < this->chains_per_thread; j++) {
- this->chain_domain[i][j] = (this->thread_domain[i] + this->offset_or_mask) % this->num_numa_domains;
+void Experiment::alloc_add() {
+ for (int i = 0; i < this->num_threads; i++) {
+ this->thread_domain[i] = i % this->num_numa_domains;
+ for (int j = 0; j < this->chains_per_thread; j++) {
+ this->chain_domain[i][j] = (this->thread_domain[i]
+ + this->offset_or_mask) % this->num_numa_domains;
+ }
}
- }
}
- // DOES NOT HANDLE ILL-FORMED SPECIFICATIONS
-void
-Experiment::alloc_map()
-{
- // STRICT -- fail if specifications are inconsistent
-
- // maps look like "t1:c11,c12,...,c1m;t2:c21,...,c2m;...;tn:cn1,...,cnm"
- // where t[i] is the thread domain of the ith thread,
- // and c[i][j] is the chain domain of the jth chain in the ith thread
-
- // count the thread descriptors by counting ";" up to EOS
- int threads = 1;
- char *p = this->placement_map;
- while (*p != '\0') {
- if (*p == ';') threads += 1;
- p++;
- }
- int thread_domain[ threads ];
-
- // count the chain descriptors by counting "," up to ";" or EOS
- int chains = 1;
- p = this->placement_map;
- while (*p != '\0') {
- if (*p == ';') break;
- if (*p == ',') chains += 1;
- p++;
- }
- int chain_domain [ threads ][ chains ];
-
- int t=0, c=0;
- p = this->placement_map;
- while (*p != '\0') {
- // everything up to ":" is the thread domain
- int i = 0;
- char buf[64];
+// DOES NOT HANDLE ILL-FORMED SPECIFICATIONS
+void Experiment::alloc_map() {
+ // STRICT -- fail if specifications are inconsistent
+
+ // maps look like "t1:c11,c12,...,c1m;t2:c21,...,c2m;...;tn:cn1,...,cnm"
+ // where t[i] is the thread domain of the ith thread,
+ // and c[i][j] is the chain domain of the jth chain in the ith thread
+
+ // count the thread descriptors by counting ";" up to EOS
+ int threads = 1;
+ char *p = this->placement_map;
while (*p != '\0') {
- if (*p == ':') { p++; break; }
- buf[i] = *p;
- i++;
- p++;
+ if (*p == ';')
+ threads += 1;
+ p++;
}
- buf[i] = '\0';
- thread_domain[t] = Experiment::parse_number(buf);
+ int thread_domain[threads];
- // search for one or several ','
- c = 0;
- while (*p != '\0' && *p != ';') {
- if (chains <= c || threads <= t) {
- // error in the thread/chain specification
- fprintf(stderr, "Malformed map.\n");
- exit(1);
- }
- int i = 0;
- while (*p != '\0' && *p != ';') {
- if (*p == ',') { p++; break; }
- buf[i] = *p;
- i++;
+ // count the chain descriptors by counting "," up to ";" or EOS
+ int chains = 1;
+ p = this->placement_map;
+ while (*p != '\0') {
+ if (*p == ';')
+ break;
+ if (*p == ',')
+ chains += 1;
p++;
- }
- buf[i] = '\0';
- chain_domain[t][c] = Experiment::parse_number(buf);
- c++;
}
+ int chain_domain[threads][chains];
- if (*p == '\0') break;
- if (*p == ';') p++;
- t++;
- }
-
+ int t = 0, c = 0;
+ p = this->placement_map;
+ while (*p != '\0') {
+ // everything up to ":" is the thread domain
+ int i = 0;
+ char buf[64];
+ while (*p != '\0') {
+ if (*p == ':') {
+ p++;
+ break;
+ }
+ buf[i] = *p;
+ i++;
+ p++;
+ }
+ buf[i] = '\0';
+ thread_domain[t] = Experiment::parse_number(buf);
+
+ // search for one or several ','
+ c = 0;
+ while (*p != '\0' && *p != ';') {
+ if (chains <= c || threads <= t) {
+ // error in the thread/chain specification
+ fprintf(stderr, "Malformed map.\n");
+ exit(1);
+ }
+ int i = 0;
+ while (*p != '\0' && *p != ';') {
+ if (*p == ',') {
+ p++;
+ break;
+ }
+ buf[i] = *p;
+ i++;
+ p++;
+ }
+ buf[i] = '\0';
+ chain_domain[t][c] = Experiment::parse_number(buf);
+ c++;
+ }
+
+ if (*p == '\0')
+ break;
+ if (*p == ';')
+ p++;
+ t++;
+ }
- this->num_threads = threads;
- this->chains_per_thread = chains;
+ this->num_threads = threads;
+ this->chains_per_thread = chains;
- this->thread_domain = new int32 [ this->num_threads ];
- this->chain_domain = new int32*[ this->num_threads ];
- this->random_state = new char* [ this->num_threads ];
+ this->thread_domain = new int32[this->num_threads];
+ this->chain_domain = new int32*[this->num_threads];
+ this->random_state = new char*[this->num_threads];
- for (int i=0; i < this->num_threads; i++) {
- this->thread_domain[i] = thread_domain[i] % this->num_numa_domains;
+ for (int i = 0; i < this->num_threads; i++) {
+ this->thread_domain[i] = thread_domain[i] % this->num_numa_domains;
- const int state_size = 256;
- this->random_state[i] = new char[state_size];
- initstate((unsigned int) i, (char *) this->random_state[i], (size_t) state_size);
+ const int state_size = 256;
+ this->random_state[i] = new char[state_size];
+ initstate((unsigned int) i, (char *) this->random_state[i],
+ (size_t) state_size);
- this->chain_domain[i] = new int32 [ this->chains_per_thread ];
- for (int j=0; j < this->chains_per_thread; j++) {
- this->chain_domain[i][j] = chain_domain[i][j] % this->num_numa_domains;
+ this->chain_domain[i] = new int32[this->chains_per_thread];
+ for (int j = 0; j < this->chains_per_thread; j++) {
+ this->chain_domain[i][j] = chain_domain[i][j]
+ % this->num_numa_domains;
+ }
}
- }
- this->bytes_per_thread = this->bytes_per_chain * this->chains_per_thread;
- this->bytes_per_test = this->bytes_per_thread * this->num_threads;
+ this->bytes_per_thread = this->bytes_per_chain * this->chains_per_thread;
+ this->bytes_per_test = this->bytes_per_thread * this->num_threads;
}
#include "Chain.h"
-void
-Experiment::print()
-{
- printf("strict = %d\n", strict);
- printf("pointer_size = %d\n", pointer_size);
- printf("sizeof(Chain) = %d\n", sizeof(Chain));
- printf("sizeof(Chain *) = %d\n", sizeof(Chain *));
- printf("bytes_per_line = %d\n", bytes_per_line);
- printf("links_per_line = %d\n", links_per_line);
- printf("bytes_per_page = %d\n", bytes_per_page);
- printf("lines_per_page = %d\n", lines_per_page);
- printf("links_per_page = %d\n", links_per_page);
- printf("bytes_per_chain = %d\n", bytes_per_chain);
- printf("lines_per_chain = %d\n", lines_per_chain);
- printf("links_per_chain = %d\n", links_per_chain);
- printf("pages_per_chain = %d\n", pages_per_chain);
- printf("chains_per_thread = %d\n", chains_per_thread);
- printf("bytes_per_thread = %d\n", bytes_per_thread);
- printf("num_threads = %d\n", num_threads);
- printf("bytes_per_test = %d\n", bytes_per_test);
- printf("busy cycles = %d\n", busy_cycles);
- printf("prefetch = %d\n", prefetch);
- printf("iterations = %d\n", iterations);
- printf("experiments = %d\n", experiments);
- printf("access_pattern = %d\n", access_pattern);
- printf("stride = %d\n", stride);
- printf("output_mode = %d\n", output_mode);
- printf("numa_placement = %d\n", numa_placement);
- printf("offset_or_mask = %d\n", offset_or_mask);
- printf("numa_max_domain = %d\n", numa_max_domain);
- printf("num_numa_domains = %d\n", num_numa_domains);
-
- for (int i=0; i < this->num_threads; i++) {
- printf("%d: ", this->thread_domain[i]);
- for (int j=0; j < this->chains_per_thread; j++) {
- printf("%d,", this->chain_domain[i][j]);
+void Experiment::print() {
+ printf("strict = %d\n", strict);
+ printf("pointer_size = %d\n", pointer_size);
+ printf("sizeof(Chain) = %d\n", sizeof(Chain));
+ printf("sizeof(Chain *) = %d\n", sizeof(Chain *));
+ printf("bytes_per_line = %d\n", bytes_per_line);
+ printf("links_per_line = %d\n", links_per_line);
+ printf("bytes_per_page = %d\n", bytes_per_page);
+ printf("lines_per_page = %d\n", lines_per_page);
+ printf("links_per_page = %d\n", links_per_page);
+ printf("bytes_per_chain = %d\n", bytes_per_chain);
+ printf("lines_per_chain = %d\n", lines_per_chain);
+ printf("links_per_chain = %d\n", links_per_chain);
+ printf("pages_per_chain = %d\n", pages_per_chain);
+ printf("chains_per_thread = %d\n", chains_per_thread);
+ printf("bytes_per_thread = %d\n", bytes_per_thread);
+ printf("num_threads = %d\n", num_threads);
+ printf("bytes_per_test = %d\n", bytes_per_test);
+ printf("busy cycles = %d\n", busy_cycles);
+ printf("prefetch = %d\n", prefetch);
+ printf("iterations = %d\n", iterations);
+ printf("experiments = %d\n", experiments);
+ printf("access_pattern = %d\n", access_pattern);
+ printf("stride = %d\n", stride);
+ printf("output_mode = %d\n", output_mode);
+ printf("numa_placement = %d\n", numa_placement);
+ printf("offset_or_mask = %d\n", offset_or_mask);
+ printf("numa_max_domain = %d\n", numa_max_domain);
+ printf("num_numa_domains = %d\n", num_numa_domains);
+
+ for (int i = 0; i < this->num_threads; i++) {
+ printf("%d: ", this->thread_domain[i]);
+ for (int j = 0; j < this->chains_per_thread; j++) {
+ printf("%d,", this->chain_domain[i][j]);
+ }
+ printf("\n");
}
- printf("\n");
- }
- fflush(stdout);
+ fflush(stdout);
}
-const char*
-Experiment::access()
-{
- const char* result = NULL;
-
- if (this->access_pattern == RANDOM) {
- result = "random";
- } else if (this->access_pattern == STRIDED && 0 < this->stride) {
- result = "forward";
- } else if (this->access_pattern == STRIDED && this->stride < 0) {
- result = "reverse";
- } else if (this->access_pattern == STREAM) {
- result = "stream";
- }
-
- return result;
+const char* Experiment::access() {
+ const char* result = NULL;
+
+ if (this->access_pattern == RANDOM) {
+ result = "random";
+ } else if (this->access_pattern == STRIDED && 0 < this->stride) {
+ result = "forward";
+ } else if (this->access_pattern == STRIDED && this->stride < 0) {
+ result = "reverse";
+ } else if (this->access_pattern == STREAM) {
+ result = "stream";
+ }
+
+ return result;
}
-const char*
-Experiment::placement()
-{
- const char* result = NULL;
-
- if (this->numa_placement == LOCAL) {
- result = "local";
- } else if (this->numa_placement == XOR) {
- result = "xor";
- } else if (this->numa_placement == ADD) {
- result = "add";
- } else if (this->numa_placement == MAP) {
- result = "map";
- }
-
- return result;
+const char* Experiment::placement() {
+ const char* result = NULL;
+
+ if (this->numa_placement == LOCAL) {
+ result = "local";
+ } else if (this->numa_placement == XOR) {
+ result = "xor";
+ } else if (this->numa_placement == ADD) {
+ result = "add";
+ } else if (this->numa_placement == MAP) {
+ result = "map";
+ }
+
+ return result;
}
diff --git a/src/Experiment.h b/src/Experiment.h
index 38756f0..0089c2f 100644
--- a/src/Experiment.h
+++ b/src/Experiment.h
@@ -9,7 +9,6 @@
* Douglas M. Pase - initial API and implementation *
*******************************************************************************/
-
#if !defined(Experiment_h)
#define Experiment_h
@@ -18,17 +17,17 @@
class Experiment {
public:
- Experiment();
- ~Experiment();
+ Experiment();
+ ~Experiment();
- int parse_args(int argc, char* argv[]);
- int64 parse_number( const char* s );
- float parse_real( const char* s );
+ int parse_args(int argc, char* argv[]);
+ int64 parse_number(const char* s);
+ float parse_real(const char* s);
- const char* placement();
- const char* access();
+ const char* placement();
+ const char* access();
- // fundamental parameters
+ // fundamental parameters
int64 pointer_size; // number of bytes in a pointer
int64 bytes_per_line; // working set cache line size (bytes)
int64 links_per_line; // working set cache line size (links)
@@ -46,23 +45,23 @@ public:
int64 busy_cycles; // processing cycles
bool prefetch; // use of prefetching
- float seconds; // number of seconds per experiment
+ float seconds; // number of seconds per experiment
int64 iterations; // number of iterations per experiment
int64 experiments; // number of experiments per test
enum { CSV, BOTH, HEADER, TABLE }
- output_mode; // results output mode
+ output_mode; // results output mode
enum { RANDOM, STRIDED, STREAM }
- access_pattern; // memory access pattern
+ access_pattern; // memory access pattern
int64 stride;
enum { LOCAL, XOR, ADD, MAP }
- numa_placement; // memory allocation mode
+ numa_placement; // memory allocation mode
int64 offset_or_mask;
char* placement_map;
- // maps threads and chains to numa domains
+ // maps threads and chains to numa domains
int32* thread_domain; // thread_domain[thread]
int32** chain_domain; // chain_domain[thread][chain]
int32 numa_max_domain; // highest numa domain id
@@ -70,7 +69,7 @@ public:
char** random_state; // random state for each thread
- int strict; // strictly adhere to user input, or fail
+ int strict; // strictly adhere to user input, or fail
const static int32 DEFAULT_POINTER_SIZE = sizeof(Chain);
const static int32 DEFAULT_BYTES_PER_LINE = 64;
@@ -95,11 +94,11 @@ public:
const static bool DEFAULT_PREFETCH = false;
void alloc_local();
- void alloc_xor();
- void alloc_add();
- void alloc_map();
+ void alloc_xor();
+ void alloc_add();
+ void alloc_map();
- void print();
+ void print();
private:
};
diff --git a/src/Lock.cpp b/src/Lock.cpp
index 104dc81..517843d 100644
--- a/src/Lock.cpp
+++ b/src/Lock.cpp
@@ -9,37 +9,28 @@
* Douglas M. Pase - initial API and implementation *
*******************************************************************************/
-
#include <stdio.h>
#include <pthread.h>
#include "Lock.h"
-Lock::Lock()
-{
- pthread_mutex_init( &(this->mutex), NULL );
+Lock::Lock() {
+ pthread_mutex_init(&(this->mutex), NULL);
}
-Lock::~Lock()
-{
- pthread_mutex_destroy( &(this->mutex) );
+Lock::~Lock() {
+ pthread_mutex_destroy(&(this->mutex));
}
-void
-Lock::lock()
-{
- pthread_mutex_lock( &(this->mutex) );
+void Lock::lock() {
+ pthread_mutex_lock(&(this->mutex));
}
-int
-Lock::test()
-{
- pthread_mutex_trylock( &(this->mutex) );
+int Lock::test() {
+ pthread_mutex_trylock(&(this->mutex));
}
-void
-Lock::unlock()
-{
- pthread_mutex_unlock( &(this->mutex) );
-}
+void Lock::unlock() {
+ pthread_mutex_unlock(&(this->mutex));
+}
diff --git a/src/Lock.h b/src/Lock.h
index 14bf1dc..04d5e15 100644
--- a/src/Lock.h
+++ b/src/Lock.h
@@ -9,7 +9,6 @@
* Douglas M. Pase - initial API and implementation *
*******************************************************************************/
-
#if !defined(Lock_h)
#define Lock_h
@@ -17,14 +16,14 @@
class Lock {
public:
- Lock();
- ~Lock();
- void lock();
- int test();
- void unlock();
+ Lock();
+ ~Lock();
+ void lock();
+ int test();
+ void unlock();
private:
- pthread_mutex_t mutex;
+ pthread_mutex_t mutex;
};
#endif
diff --git a/src/Main.c b/src/Main.c
index 5d5f243..3a2ec10 100644
--- a/src/Main.c
+++ b/src/Main.c
@@ -9,7 +9,6 @@
* Douglas M. Pase - initial API and implementation *
*******************************************************************************/
-
#include <stdio.h>
#include "Main.h"
@@ -21,70 +20,75 @@
#include "Experiment.h"
#include "SpinBarrier.h"
- // This program allocates and accesses
- // a number of blocks of memory, one or more
- // for each thread that executes. Blocks
- // are divided into sub-blocks called
- // pages, and pages are divided into
- // sub-blocks called cache lines.
- //
- // All pages are collected into a list.
- // Pages are selected for the list in
- // a particular order. Each cache line
- // within the page is similarly gathered
- // into a list in a particular order.
- // In both cases the order may be random
- // or linear.
- //
- // A root pointer points to the first
- // cache line. A pointer in the cache
- // line points to the next cache line,
- // which contains a pointer to the cache
- // line after that, and so on. This
- // forms a pointer chain that touches all
- // cache lines within the first page,
- // then all cache lines within the second
- // page, and so on until all pages are
- // covered. The last pointer contains
- // NULL, terminating the chain.
- //
- // Depending on compile-time options,
- // pointers may be 32-bit or 64-bit
- // pointers.
+// This program allocates and accesses
+// a number of blocks of memory, one or more
+// for each thread that executes. Blocks
+// are divided into sub-blocks called
+// pages, and pages are divided into
+// sub-blocks called cache lines.
+//
+// All pages are collected into a list.
+// Pages are selected for the list in
+// a particular order. Each cache line
+// within the page is similarly gathered
+// into a list in a particular order.
+// In both cases the order may be random
+// or linear.
+//
+// A root pointer points to the first
+// cache line. A pointer in the cache
+// line points to the next cache line,
+// which contains a pointer to the cache
+// line after that, and so on. This
+// forms a pointer chain that touches all
+// cache lines within the first page,
+// then all cache lines within the second
+// page, and so on until all pages are
+// covered. The last pointer contains
+// NULL, terminating the chain.
+//
+// Depending on compile-time options,
+// pointers may be 32-bit or 64-bit
+// pointers.
int verbose = 0;
-int
-main( int argc, char* argv[] )
-{
- Timer::calibrate(10000);
- double clk_res = Timer::resolution();
+int main(int argc, char* argv[]) {
+ Timer
+ ::calibrate(10000);
+ double clk_res = Timer
+ ::resolution();
- Experiment e;
- if (e.parse_args(argc, argv)) {
- return 0;
- }
+ Experiment e;
+ if (e.parse_args(argc, argv)) {
+ return 0;
+ }
#if defined(UNDEFINED)
- e.print();
- if (argv != NULL) return 0;
+ e.print();
+ if (argv != NULL) return 0;
#endif
- SpinBarrier sb( e.num_threads );
- Run r[ e.num_threads ];
- for (int i=0; i < e.num_threads; i++) {
- r[i].set( e, &sb );
- r[i].start();
- }
+ SpinBarrier
+ sb(e.num_threads);
+ Run r[e.num_threads];
+ for (int i = 0; i < e.num_threads; i++) {
+ r[i].set(e, &sb);
+ r[i].start();
+ }
- for (int i=0; i < e.num_threads; i++) {
- r[i].wait();
- }
+ for (int i = 0; i < e.num_threads; i++) {
+ r[i].wait();
+ }
- int64 ops = Run::ops_per_chain();
- double secs = Run::seconds();
+ int64
+ ops = Run
+ ::ops_per_chain();
+ double secs = Run
+ ::seconds();
- Output::print(e, ops, secs, clk_res);
+ Output
+ ::print(e, ops, secs, clk_res);
- return 0;
+ return 0;
}
diff --git a/src/Main.cpp b/src/Main.cpp
index ebd276a..a4d68e4 100644
--- a/src/Main.cpp
+++ b/src/Main.cpp
@@ -9,7 +9,6 @@
* Douglas M. Pase - initial API and implementation *
*******************************************************************************/
-
#include <stdio.h>
#include "Main.h"
@@ -20,70 +19,68 @@
#include "Output.h"
#include "Experiment.h"
- // This program allocates and accesses
- // a number of blocks of memory, one or more
- // for each thread that executes. Blocks
- // are divided into sub-blocks called
- // pages, and pages are divided into
- // sub-blocks called cache lines.
- //
- // All pages are collected into a list.
- // Pages are selected for the list in
- // a particular order. Each cache line
- // within the page is similarly gathered
- // into a list in a particular order.
- // In both cases the order may be random
- // or linear.
- //
- // A root pointer points to the first
- // cache line. A pointer in the cache
- // line points to the next cache line,
- // which contains a pointer to the cache
- // line after that, and so on. This
- // forms a pointer chain that touches all
- // cache lines within the first page,
- // then all cache lines within the second
- // page, and so on until all pages are
- // covered. The last pointer contains
- // NULL, terminating the chain.
- //
- // Depending on compile-time options,
- // pointers may be 32-bit or 64-bit
- // pointers.
+// This program allocates and accesses
+// a number of blocks of memory, one or more
+// for each thread that executes. Blocks
+// are divided into sub-blocks called
+// pages, and pages are divided into
+// sub-blocks called cache lines.
+//
+// All pages are collected into a list.
+// Pages are selected for the list in
+// a particular order. Each cache line
+// within the page is similarly gathered
+// into a list in a particular order.
+// In both cases the order may be random
+// or linear.
+//
+// A root pointer points to the first
+// cache line. A pointer in the cache
+// line points to the next cache line,
+// which contains a pointer to the cache
+// line after that, and so on. This
+// forms a pointer chain that touches all
+// cache lines within the first page,
+// then all cache lines within the second
+// page, and so on until all pages are
+// covered. The last pointer contains
+// NULL, terminating the chain.
+//
+// Depending on compile-time options,
+// pointers may be 32-bit or 64-bit
+// pointers.
int verbose = 0;
-int
-main( int argc, char* argv[] )
-{
- Timer::calibrate(10000);
- double clk_res = Timer::resolution();
+int main(int argc, char* argv[]) {
+ Timer::calibrate(10000);
+ double clk_res = Timer::resolution();
- Experiment e;
- if (e.parse_args(argc, argv)) {
- return 0;
- }
+ Experiment e;
+ if (e.parse_args(argc, argv)) {
+ return 0;
+ }
#if defined(UNDEFINED)
- e.print();
- if (argv != NULL) return 0;
+ e.print();
+ if (argv != NULL) return 0;
#endif
- SpinBarrier sb( e.num_threads );
- Run r[ e.num_threads ];
- for (int i=0; i < e.num_threads; i++) {
- r[i].set( e, &sb );
- r[i].start();
- }
+ SpinBarrier sb(e.num_threads);
+ Run r[e.num_threads];
+ for (int i = 0; i < e.num_threads; i++) {
+ r[i].set(e, &sb);
+ r[i].start();
+ }
- for (int i=0; i < e.num_threads; i++) {
- r[i].wait();
- }
+ for (int i = 0; i < e.num_threads; i++) {
+ r[i].wait();
+ }
- int64 ops = Run::ops_per_chain();
- double secs = Run::seconds();
+ int64 ops = Run::ops_per_chain();
+ double secs = Run::seconds();
- Output::print(e, ops, secs, clk_res);
+ Output::print(e, ops, secs, clk_res);
- return 0;
+ return 0;
}
diff --git a/src/Output.cpp b/src/Output.cpp
index 9f9c09a..84eb0df 100644
--- a/src/Output.cpp
+++ b/src/Output.cpp
@@ -9,7 +9,6 @@
* Douglas M. Pase - initial API and implementation *
*******************************************************************************/
-
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
@@ -19,25 +18,20 @@
#include "Types.h"
#include "Experiment.h"
-
-void
-Output::print( Experiment &e, int64 ops, double secs, double ck_res )
-{
- if (e.output_mode == Experiment::CSV) {
- Output::csv(e, ops, secs, ck_res);
- } else if (e.output_mode == Experiment::BOTH) {
- Output::header(e, ops, secs, ck_res);
- Output::csv(e, ops, secs, ck_res);
- } else if (e.output_mode == Experiment::HEADER) {
- Output::header(e, ops, secs, ck_res);
- } else {
- Output::table(e, ops, secs, ck_res);
- }
+void Output::print(Experiment &e, int64 ops, double secs, double ck_res) {
+ if (e.output_mode == Experiment::CSV) {
+ Output::csv(e, ops, secs, ck_res);
+ } else if (e.output_mode == Experiment::BOTH) {
+ Output::header(e, ops, secs, ck_res);
+ Output::csv(e, ops, secs, ck_res);
+ } else if (e.output_mode == Experiment::HEADER) {
+ Output::header(e, ops, secs, ck_res);
+ } else {
+ Output::table(e, ops, secs, ck_res);
+ }
}
-void
-Output::header( Experiment &e, int64 ops, double secs, double ck_res )
-{
+void Output::header(Experiment &e, int64 ops, double secs, double ck_res) {
printf("pointer size (bytes),");
printf("cache line size (bytes),");
printf("page size (bytes),");
@@ -65,9 +59,7 @@ Output::header( Experiment &e, int64 ops, double secs, double ck_res )
fflush(stdout);
}
-void
-Output::csv( Experiment &e, int64 ops, double secs, double ck_res )
-{
+void Output::csv(Experiment &e, int64 ops, double secs, double ck_res) {
printf("%ld,", e.pointer_size);
printf("%ld,", e.bytes_per_line);
printf("%ld,", e.bytes_per_page);
@@ -86,16 +78,16 @@ Output::csv( Experiment &e, int64 ops, double secs, double ck_res )
printf("\"");
printf("%d:", e.thread_domain[0]);
printf("%d", e.chain_domain[0][0]);
- for (int j=1; j < e.chains_per_thread; j++) {
- printf(",%d", e.chain_domain[0][j]);
- }
- for (int i=1; i < e.num_threads; i++) {
- printf(";%d:", e.thread_domain[i]);
- printf("%d", e.chain_domain[i][0]);
- for (int j=1; j < e.chains_per_thread; j++) {
- printf(",%d", e.chain_domain[i][j]);
+ for (int j = 1; j < e.chains_per_thread; j++) {
+ printf(",%d", e.chain_domain[0][j]);
+ }
+ for (int i = 1; i < e.num_threads; i++) {
+ printf(";%d:", e.thread_domain[i]);
+ printf("%d", e.chain_domain[i][0]);
+ for (int j = 1; j < e.chains_per_thread; j++) {
+ printf(",%d", e.chain_domain[i][j]);
+ }
}
- }
printf("\",");
printf("%ld,", ops);
printf("%ld,", ops * e.chains_per_thread * e.num_threads);
@@ -108,9 +100,7 @@ Output::csv( Experiment &e, int64 ops, double secs, double ck_res )
fflush(stdout);
}
-void
-Output::table( Experiment &e, int64 ops, double secs, double ck_res )
-{
+void Output::table(Experiment &e, int64 ops, double secs, double ck_res) {
printf("pointer size = %ld (bytes)\n", e.pointer_size);
printf("cache line size = %ld (bytes)\n", e.bytes_per_line);
printf("page size = %ld (bytes)\n", e.bytes_per_page);
@@ -130,16 +120,16 @@ Output::table( Experiment &e, int64 ops, double secs, double ck_res )
printf("\"");
printf("%d:", e.thread_domain[0]);
printf("%d", e.chain_domain[0][0]);
- for (int j=1; j < e.chains_per_thread; j++) {
- printf(",%d", e.chain_domain[0][j]);
- }
- for (int i=1; i < e.num_threads; i++) {
- printf(";%d:", e.thread_domain[i]);
- printf("%d", e.chain_domain[i][0]);
- for (int j=1; j < e.chains_per_thread; j++) {
- printf(",%d", e.chain_domain[i][j]);
+ for (int j = 1; j < e.chains_per_thread; j++) {
+ printf(",%d", e.chain_domain[0][j]);
+ }
+ for (int i = 1; i < e.num_threads; i++) {
+ printf(";%d:", e.thread_domain[i]);
+ printf("%d", e.chain_domain[i][0]);
+ for (int j = 1; j < e.chains_per_thread; j++) {
+ printf(",%d", e.chain_domain[i][j]);
+ }
}
- }
printf("\"\n");
printf("operations per chain = %ld\n", ops);
printf("total operations = %ld\n", ops * e.chains_per_thread * e.num_threads);
diff --git a/src/Output.h b/src/Output.h
index 9ee2c80..65d3926 100644
--- a/src/Output.h
+++ b/src/Output.h
@@ -9,7 +9,6 @@
* Douglas M. Pase - initial API and implementation *
*******************************************************************************/
-
#if !defined(Output_h)
#define Output_h
@@ -18,10 +17,10 @@
class Output {
public:
- static void print ( Experiment &e, int64 ops, double secs, double ck_res );
- static void header( Experiment &e, int64 ops, double secs, double ck_res );
- static void csv ( Experiment &e, int64 ops, double secs, double ck_res );
- static void table ( Experiment &e, int64 ops, double secs, double ck_res );
+ static void print(Experiment &e, int64 ops, double secs, double ck_res);
+ static void header(Experiment &e, int64 ops, double secs, double ck_res);
+ static void csv(Experiment &e, int64 ops, double secs, double ck_res);
+ static void table(Experiment &e, int64 ops, double secs, double ck_res);
private:
};
diff --git a/src/Run.cpp b/src/Run.cpp
index c774b99..24435b4 100644
--- a/src/Run.cpp
+++ b/src/Run.cpp
@@ -9,7 +9,6 @@
* Douglas M. Pase - initial API and implementation *
*******************************************************************************/
-
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
@@ -24,1253 +23,1255 @@
#include "Timer.h"
#include "SpinBarrier.h"
+static double max(double v1, double v2);
+static double min(double v1, double v2);
+static void chase_pointers(int64 chains_per_thread, int64 iterations,
+ Chain** root, int64 bytes_per_line, int64 bytes_per_chain, int64 stride,
+ int64 busy_cycles, bool prefetch);
+static void follow_streams(int64 chains_per_thread, int64 iterations,
+ Chain** root, int64 bytes_per_line, int64 bytes_per_chain, int64 stride,
+ int64 busy_cycles, bool prefetch);
+static void (*run_benchmark)(int64 chains_per_thread, int64 iterations,
+ Chain** root, int64 bytes_per_line, int64 bytes_per_chain, int64 stride,
+ int64 busy_cycles, bool prefetch) = chase_pointers;
-static double max( double v1, double v2 );
-static double min( double v1, double v2 );
-static void chase_pointers(int64 chains_per_thread, int64 iterations, Chain** root, int64 bytes_per_line, int64 bytes_per_chain, int64 stride, int64 busy_cycles, bool prefetch);
-static void follow_streams(int64 chains_per_thread, int64 iterations, Chain** root, int64 bytes_per_line, int64 bytes_per_chain, int64 stride, int64 busy_cycles, bool prefetch);
-static void (*run_benchmark)(int64 chains_per_thread, int64 iterations, Chain** root, int64 bytes_per_line, int64 bytes_per_chain, int64 stride, int64 busy_cycles, bool prefetch) = chase_pointers;
-
-Lock Run::global_mutex;
-int64 Run::_ops_per_chain = 0;
-double Run::_seconds = 1E9;
+Lock Run::global_mutex;
+int64 Run::_ops_per_chain = 0;
+double Run::_seconds = 1E9;
#define prefetch(x) __builtin_prefetch(x)
-Run::Run()
-: exp(NULL), bp(NULL)
-{
+Run::Run() :
+ exp(NULL), bp(NULL) {
}
-Run::~Run()
-{
+Run::~Run() {
}
-void
-Run::set( Experiment &e, SpinBarrier* sbp )
-{
- this->exp = &e;
- this->bp = sbp;
+void Run::set(Experiment &e, SpinBarrier* sbp) {
+ this->exp = &e;
+ this->bp = sbp;
}
-int
-Run::run()
-{
- // first allocate all memory for the chains,
- // making sure it is allocated within the
- // intended numa domains
- Chain** chain_memory = new Chain* [ this->exp->chains_per_thread ];
- Chain** root = new Chain* [ this->exp->chains_per_thread ];
+int Run::run() {
+ // first allocate all memory for the chains,
+ // making sure it is allocated within the
+ // intended numa domains
+ Chain** chain_memory = new Chain*[this->exp->chains_per_thread];
+ Chain** root = new Chain*[this->exp->chains_per_thread];
#if defined(NUMA)
- // establish the node id where this thread
- // will run. threads are mapped to nodes
- // by the set-up code for Experiment.
- int run_node_id = this->exp->thread_domain[this->thread_id()];
- numa_run_on_node(run_node_id);
+ // establish the node id where this thread
+ // will run. threads are mapped to nodes
+ // by the set-up code for Experiment.
+ int run_node_id = this->exp->thread_domain[this->thread_id()];
+ numa_run_on_node(run_node_id);
- // establish the node id where this thread's
- // memory will be allocated.
- for (int i=0; i < this->exp->chains_per_thread; i++) {
- int alloc_node_id = this->exp->chain_domain[this->thread_id()][i];
- nodemask_t alloc_mask;
- nodemask_zero(&alloc_mask);
- nodemask_set(&alloc_mask, alloc_node_id);
- numa_set_membind(&alloc_mask);
+ // establish the node id where this thread's
+ // memory will be allocated.
+ for (int i=0; i < this->exp->chains_per_thread; i++) {
+ int alloc_node_id = this->exp->chain_domain[this->thread_id()][i];
+ nodemask_t alloc_mask;
+ nodemask_zero(&alloc_mask);
+ nodemask_set(&alloc_mask, alloc_node_id);
+ numa_set_membind(&alloc_mask);
- chain_memory[i] = new Chain[ this->exp->links_per_chain ];
- }
+ chain_memory[i] = new Chain[ this->exp->links_per_chain ];
+ }
#else
- for (int i=0; i < this->exp->chains_per_thread; i++) {
- chain_memory[i] = new Chain[ this->exp->links_per_chain ];
- }
+ for (int i = 0; i < this->exp->chains_per_thread; i++) {
+ chain_memory[i] = new Chain[this->exp->links_per_chain];
+ }
#endif
- // initialize the chains and
- // select the function that
- // will execute the tests
- for (int i=0; i < this->exp->chains_per_thread; i++) {
- if (this->exp->access_pattern == Experiment::RANDOM) {
- root[i] = random_mem_init( chain_memory[i] );
- run_benchmark = chase_pointers;
- } else if (this->exp->access_pattern == Experiment::STRIDED) {
- if (0 < this->exp->stride) {
- root[i] = forward_mem_init( chain_memory[i] );
- } else {
- root[i] = reverse_mem_init( chain_memory[i] );
- }
- run_benchmark = chase_pointers;
- } else if (this->exp->access_pattern == Experiment::STREAM) {
- root[i] = stream_mem_init( chain_memory[i] );
- run_benchmark = follow_streams;
+ // initialize the chains and
+ // select the function that
+ // will execute the tests
+ for (int i = 0; i < this->exp->chains_per_thread; i++) {
+ if (this->exp->access_pattern == Experiment::RANDOM) {
+ root[i] = random_mem_init(chain_memory[i]);
+ run_benchmark = chase_pointers;
+ } else if (this->exp->access_pattern == Experiment::STRIDED) {
+ if (0 < this->exp->stride) {
+ root[i] = forward_mem_init(chain_memory[i]);
+ } else {
+ root[i] = reverse_mem_init(chain_memory[i]);
+ }
+ run_benchmark = chase_pointers;
+ } else if (this->exp->access_pattern == Experiment::STREAM) {
+ root[i] = stream_mem_init(chain_memory[i]);
+ run_benchmark = follow_streams;
+ }
}
- }
- if (this->exp->iterations <= 0) {
- volatile static double istart = 0;
- volatile static double istop = 0;
- volatile static double elapsed = 0;
- volatile static int64 iters = 1;
- volatile double bound = max(0.2, 10 * Timer::resolution());
- for (iters=1; elapsed <= bound; iters=iters<<1) {
- this->bp->barrier();
+ if (this->exp->iterations <= 0) {
+ volatile static double istart = 0;
+ volatile static double istop = 0;
+ volatile static double elapsed = 0;
+ volatile static int64 iters = 1;
+ volatile double bound = max(0.2, 10 * Timer::resolution());
+ for (iters = 1; elapsed <= bound; iters = iters << 1) {
+ this->bp->barrier();
- // start timer
- if (this->thread_id() == 0) {
- istart = Timer::seconds();
- }
- this->bp->barrier();
+ // start timer
+ if (this->thread_id() == 0) {
+ istart = Timer::seconds();
+ }
+ this->bp->barrier();
- // chase pointers
- run_benchmark(this->exp->chains_per_thread, iters, root, this->exp->bytes_per_line, this->exp->bytes_per_chain, this->exp->stride, this->exp->busy_cycles, this->exp->prefetch);
+ // chase pointers
+ run_benchmark(this->exp->chains_per_thread, iters, root,
+ this->exp->bytes_per_line, this->exp->bytes_per_chain,
+ this->exp->stride, this->exp->busy_cycles,
+ this->exp->prefetch);
- // barrier
- this->bp->barrier();
+ // barrier
+ this->bp->barrier();
- // stop timer
- if (this->thread_id() == 0) {
- istop = Timer::seconds();
- elapsed = istop - istart;
- }
- this->bp->barrier();
- }
+ // stop timer
+ if (this->thread_id() == 0) {
+ istop = Timer::seconds();
+ elapsed = istop - istart;
+ }
+ this->bp->barrier();
+ }
- // calculate the number of iterations
- if (this->thread_id() == 0) {
- if (0 < this->exp->seconds) {
- this->exp->iterations = max(1, 0.9999 + 0.5 * this->exp->seconds * iters / elapsed);
- } else {
- this->exp->iterations = max(1, 0.9999 + iters / elapsed);
- }
+ // calculate the number of iterations
+ if (this->thread_id() == 0) {
+ if (0 < this->exp->seconds) {
+ this->exp->iterations = max(1,
+ 0.9999 + 0.5 * this->exp->seconds * iters / elapsed);
+ } else {
+ this->exp->iterations = max(1, 0.9999 + iters / elapsed);
+ }
+ }
+ this->bp->barrier();
}
- this->bp->barrier();
- }
#if defined(UNDEFINED)
#endif
- // barrier
- for (int e=0; e < this->exp->experiments; e++) {
- this->bp->barrier();
+ // barrier
+ for (int e = 0; e < this->exp->experiments; e++) {
+ this->bp->barrier();
- // start timer
- double start = 0;
- if (this->thread_id() == 0) start = Timer::seconds();
- this->bp->barrier();
+ // start timer
+ double start = 0;
+ if (this->thread_id() == 0)
+ start = Timer::seconds();
+ this->bp->barrier();
- // chase pointers
- run_benchmark(this->exp->chains_per_thread, this->exp->iterations, root, this->exp->bytes_per_line, this->exp->bytes_per_chain, this->exp->stride, this->exp->busy_cycles, this->exp->prefetch);
+ // chase pointers
+ run_benchmark(this->exp->chains_per_thread, this->exp->iterations, root,
+ this->exp->bytes_per_line, this->exp->bytes_per_chain,
+ this->exp->stride, this->exp->busy_cycles, this->exp->prefetch);
- // barrier
- this->bp->barrier();
+ // barrier
+ this->bp->barrier();
- // stop timer
- double stop = 0;
- if (this->thread_id() == 0) stop = Timer::seconds();
- this->bp->barrier();
+ // stop timer
+ double stop = 0;
+ if (this->thread_id() == 0)
+ stop = Timer::seconds();
+ this->bp->barrier();
- if (0 <= e) {
- if (this->thread_id() == 0) {
- double delta = stop - start;
- if (0 < delta) {
- Run::_seconds = min( Run::_seconds, delta );
+ if (0 <= e) {
+ if (this->thread_id() == 0) {
+ double delta = stop - start;
+ if (0 < delta) {
+ Run::_seconds = min(Run::_seconds, delta);
+ }
+ }
}
- }
}
- }
- this->bp->barrier();
+ this->bp->barrier();
- for (int i=0; i < this->exp->chains_per_thread; i++) {
- if (chain_memory[i] != NULL) delete [] chain_memory[i];
- }
- if (chain_memory != NULL) delete [] chain_memory;
+ for (int i = 0; i < this->exp->chains_per_thread; i++) {
+ if (chain_memory[i] != NULL
+ ) delete[] chain_memory[i];
+ }
+ if (chain_memory != NULL
+ ) delete[] chain_memory;
- return 0;
+ return 0;
}
int dummy = 0;
-void
-Run::mem_check( Chain *m )
-{
- if (m == NULL) dummy += 1;
+void Run::mem_check(Chain *m) {
+ if (m == NULL
+ ) dummy += 1;
}
-static double
-max( double v1, double v2 )
-{
- if (v1 < v2) return v2;
- return v1;
+static double max(double v1, double v2) {
+ if (v1 < v2)
+ return v2;
+ return v1;
}
-static double
-min( double v1, double v2 )
-{
- if (v2 < v1) return v2;
- return v1;
+static double min(double v1, double v2) {
+ if (v2 < v1)
+ return v2;
+ return v1;
}
- // exclude 2 and mersienne primes, i.e.,
- // primes of the form 2**n - 1, e.g.,
- // 3, 7, 31, 127
-static const int prime_table[] = { 5, 11, 13, 17, 19, 23, 37, 41, 43, 47,
- 53, 61, 71, 73, 79, 83, 89, 97, 101, 103, 109, 113, 131, 137, 139, 149,
- 151, 157, 163, };
+// exclude 2 and mersienne primes, i.e.,
+// primes of the form 2**n - 1, e.g.,
+// 3, 7, 31, 127
+static const int prime_table[] = { 5, 11, 13, 17, 19, 23, 37, 41, 43, 47, 53,
+ 61, 71, 73, 79, 83, 89, 97, 101, 103, 109, 113, 131, 137, 139, 149, 151,
+ 157, 163, };
static const int prime_table_size = sizeof prime_table / sizeof prime_table[0];
Chain*
-Run::random_mem_init( Chain *mem )
-{
- // initialize pointers --
- // choose a page at random, then use
- // one pointer from each cache line
- // within the page. all pages and
- // cache lines are chosen at random.
- Chain* root = END_OF_CHAIN;
- Chain* prev = END_OF_CHAIN;
- int link_within_line = 0;
- int64 local_ops_per_chain = 0;
+Run::random_mem_init(Chain *mem) {
+ // initialize pointers --
+ // choose a page at random, then use
+ // one pointer from each cache line
+ // within the page. all pages and
+ // cache lines are chosen at random.
+ Chain* root = END_OF_CHAIN;
+ Chain* prev = END_OF_CHAIN;
+ int link_within_line = 0;
+ int64 local_ops_per_chain = 0;
- // we must set a lock because random()
- // is not thread safe
- Run::global_mutex.lock();
- setstate(this->exp->random_state[this->thread_id()]);
- int page_factor = prime_table[ random() % prime_table_size ];
- int page_offset = random() % this->exp->pages_per_chain;
- Run::global_mutex.unlock();
-
- // loop through the pages
- for (int i=0; i < this->exp->pages_per_chain; i++) {
- int page = (page_factor * i + page_offset) % this->exp->pages_per_chain;
+ // we must set a lock because random()
+ // is not thread safe
Run::global_mutex.lock();
setstate(this->exp->random_state[this->thread_id()]);
- int line_factor = prime_table[ random() % prime_table_size ];
- int line_offset = random() % this->exp->lines_per_page;
+ int page_factor = prime_table[random() % prime_table_size];
+ int page_offset = random() % this->exp->pages_per_chain;
Run::global_mutex.unlock();
- // loop through the lines within a page
- for (int j=0; j < this->exp->lines_per_page; j++) {
- int line_within_page = (line_factor * j + line_offset) % this->exp->lines_per_page;
- int link = page * this->exp->links_per_page + line_within_page * this->exp->links_per_line + link_within_line;
+ // loop through the pages
+ for (int i = 0; i < this->exp->pages_per_chain; i++) {
+ int page = (page_factor * i + page_offset) % this->exp->pages_per_chain;
+ Run::global_mutex.lock();
+ setstate(this->exp->random_state[this->thread_id()]);
+ int line_factor = prime_table[random() % prime_table_size];
+ int line_offset = random() % this->exp->lines_per_page;
+ Run::global_mutex.unlock();
+
+ // loop through the lines within a page
+ for (int j = 0; j < this->exp->lines_per_page; j++) {
+ int line_within_page = (line_factor * j + line_offset)
+ % this->exp->lines_per_page;
+ int link = page * this->exp->links_per_page
+ + line_within_page * this->exp->links_per_line
+ + link_within_line;
- if (root == END_OF_CHAIN) {
+ if (root == END_OF_CHAIN) {
// printf("root = %d(%d)[0x%x].\n", page, line_within_page, mem+link);
- prev = root = mem + link;
- local_ops_per_chain += 1;
- } else {
+ prev = root = mem + link;
+ local_ops_per_chain += 1;
+ } else {
// printf("0x%x = %d(%d)[0x%x].\n", prev, page, line_within_page, mem+link);
- prev->next = mem + link;
- prev = prev->next;
- local_ops_per_chain += 1;
- }
+ prev->next = mem + link;
+ prev = prev->next;
+ local_ops_per_chain += 1;
+ }
+ }
}
- }
- Run::global_mutex.lock();
- Run::_ops_per_chain = local_ops_per_chain;
- Run::global_mutex.unlock();
+ Run::global_mutex.lock();
+ Run::_ops_per_chain = local_ops_per_chain;
+ Run::global_mutex.unlock();
- return root;
+ return root;
}
Chain*
-Run::forward_mem_init( Chain *mem )
-{
- Chain* root = END_OF_CHAIN;
- Chain* prev = END_OF_CHAIN;
- int link_within_line = 0;
- int64 local_ops_per_chain = 0;
+Run::forward_mem_init(Chain *mem) {
+ Chain* root = END_OF_CHAIN;
+ Chain* prev = END_OF_CHAIN;
+ int link_within_line = 0;
+ int64 local_ops_per_chain = 0;
- for (int i=0; i < this->exp->lines_per_chain; i += this->exp->stride) {
- int link = i * this->exp->links_per_line + link_within_line;
- if (root == NULL) {
+ for (int i = 0; i < this->exp->lines_per_chain; i += this->exp->stride) {
+ int link = i * this->exp->links_per_line + link_within_line;
+ if (root == NULL) {
// printf("root = %d(%d)[0x%x].\n", page, line_within_page, mem+link);
- prev = root = mem + link;
- local_ops_per_chain += 1;
- } else {
+ prev = root = mem + link;
+ local_ops_per_chain += 1;
+ } else {
// printf("0x%x = %d(%d)[0x%x].\n", prev, page, line_within_page, mem+link);
- prev->next = mem + link;
- prev = prev->next;
- local_ops_per_chain += 1;
+ prev->next = mem + link;
+ prev = prev->next;
+ local_ops_per_chain += 1;
+ }
}
- }
- Run::global_mutex.lock();
- Run::_ops_per_chain = local_ops_per_chain;
- Run::global_mutex.unlock();
+ Run::global_mutex.lock();
+ Run::_ops_per_chain = local_ops_per_chain;
+ Run::global_mutex.unlock();
- return root;
+ return root;
}
Chain*
-Run::reverse_mem_init( Chain *mem )
-{
- Chain* root = END_OF_CHAIN;
- Chain* prev = END_OF_CHAIN;
- int link_within_line = 0;
- int64 local_ops_per_chain = 0;
+Run::reverse_mem_init(Chain *mem) {
+ Chain* root = END_OF_CHAIN;
+ Chain* prev = END_OF_CHAIN;
+ int link_within_line = 0;
+ int64 local_ops_per_chain = 0;
- int stride = -this->exp->stride;
- int last;
- for (int i=0; i < this->exp->lines_per_chain; i += stride) {
- last = i;
- }
+ int stride = -this->exp->stride;
+ int last;
+ for (int i = 0; i < this->exp->lines_per_chain; i += stride) {
+ last = i;
+ }
- for (int i=last; 0 <= i; i -= stride) {
- int link = i * this->exp->links_per_line + link_within_line;
- if (root == END_OF_CHAIN) {
+ for (int i = last; 0 <= i; i -= stride) {
+ int link = i * this->exp->links_per_line + link_within_line;
+ if (root == END_OF_CHAIN) {
// printf("root = %d(%d)[0x%x].\n", page, line_within_page, mem+link);
- prev = root = mem + link;
- local_ops_per_chain += 1;
- } else {
+ prev = root = mem + link;
+ local_ops_per_chain += 1;
+ } else {
// printf("0x%x = %d(%d)[0x%x].\n", prev, page, line_within_page, mem+link);
- prev->next = mem + link;
- prev = prev->next;
- local_ops_per_chain += 1;
+ prev->next = mem + link;
+ prev = prev->next;
+ local_ops_per_chain += 1;
+ }
}
- }
- Run::global_mutex.lock();
- Run::_ops_per_chain = local_ops_per_chain;
- Run::global_mutex.unlock();
+ Run::global_mutex.lock();
+ Run::_ops_per_chain = local_ops_per_chain;
+ Run::global_mutex.unlock();
- return root;
+ return root;
}
static int64 dumb_ck = 0;
-void
-mem_chk( Chain *m )
-{
- if (m == END_OF_CHAIN) dumb_ck += 1;
+void mem_chk(Chain *m) {
+ if (m == END_OF_CHAIN)
+ dumb_ck += 1;
}
-static void
-chase_pointers(
- int64 chains_per_thread, // memory loading per thread
- int64 iterations, // number of iterations per experiment
- Chain** root, // root(s) of the chain(s) to follow
- int64 bytes_per_line, // ignored
- int64 bytes_per_chain, // ignored
- int64 stride, // ignored
- int64 busy_cycles, // processing cycles
- bool prefetch // prefetch?
-)
-{
- // chase pointers
- switch (chains_per_thread) {
- default:
- case 1:
- for (int64 i=0; i < iterations; i++) {
- Chain* a = root[0];
- while (a != END_OF_CHAIN) {
- a = a->next;
- if (prefetch)
- prefetch(a->next);
- for (int64 j=0; j < busy_cycles; j++)
- asm("nop");
- }
- mem_chk( a );
- }
- break;
- case 2:
- for (int64 i=0; i < iterations; i++) {
- Chain* a = root[0];
- Chain* b = root[1];
- while (a != END_OF_CHAIN) {
- a = a->next;
- b = b->next;
- if (prefetch)
+static void chase_pointers(int64 chains_per_thread, // memory loading per thread
+ int64 iterations, // number of iterations per experiment
+ Chain** root, // root(s) of the chain(s) to follow
+ int64 bytes_per_line, // ignored
+ int64 bytes_per_chain, // ignored
+ int64 stride, // ignored
+ int64 busy_cycles, // processing cycles
+ bool prefetch // prefetch?
+ ) {
+ // chase pointers
+ switch (chains_per_thread) {
+ default:
+ case 1:
+ for (int64 i = 0; i < iterations; i++) {
+ Chain* a = root[0];
+ while (a != END_OF_CHAIN) {
+ a = a->next;
+ if (prefetch)
prefetch(a->next);
- for (int64 j=0; j < busy_cycles; j++)
- asm("nop");
- }
- mem_chk( a );
- mem_chk( b );
- }
- break;
- case 3:
- for (int64 i=0; i < iterations; i++) {
- Chain* a = root[0];
- Chain* b = root[1];
- Chain* c = root[2];
- while (a != END_OF_CHAIN) {
- a = a->next;
- b = b->next;
- c = c->next;
- if (prefetch)
+ for (int64 j = 0; j < busy_cycles; j++)
+ asm("nop");
+ }
+ mem_chk(a);
+ }
+ break;
+ case 2:
+ for (int64 i = 0; i < iterations; i++) {
+ Chain* a = root[0];
+ Chain* b = root[1];
+ while (a != END_OF_CHAIN) {
+ a = a->next;
+ b = b->next;
+ if (prefetch)
prefetch(a->next);
- for (int64 j=0; j < busy_cycles; j++)
- asm("nop");
- }
- mem_chk( a );
- mem_chk( b );
- mem_chk( c );
- }
- break;
- case 4:
- for (int64 i=0; i < iterations; i++) {
- Chain* a = root[0];
- Chain* b = root[1];
- Chain* c = root[2];
- Chain* d = root[3];
- while (a != END_OF_CHAIN) {
- a = a->next;
- b = b->next;
- c = c->next;
- d = d->next;
- if (prefetch)
+ for (int64 j = 0; j < busy_cycles; j++)
+ asm("nop");
+ }
+ mem_chk(a);
+ mem_chk(b);
+ }
+ break;
+ case 3:
+ for (int64 i = 0; i < iterations; i++) {
+ Chain* a = root[0];
+ Chain* b = root[1];
+ Chain* c = root[2];
+ while (a != END_OF_CHAIN) {
+ a = a->next;
+ b = b->next;
+ c = c->next;
+ if (prefetch)
prefetch(a->next);
- for (int64 j=0; j < busy_cycles; j++)
- asm("nop");
- }
- mem_chk( a );
- mem_chk( b );
- mem_chk( c );
- mem_chk( d );
- }
- break;
- case 5:
- for (int64 i=0; i < iterations; i++) {
- Chain* a = root[0];
- Chain* b = root[1];
- Chain* c = root[2];
- Chain* d = root[3];
- Chain* e = root[4];
- while (a != END_OF_CHAIN) {
- a = a->next;
- b = b->next;
- c = c->next;
- d = d->next;
- e = e->next;
- if (prefetch)
+ for (int64 j = 0; j < busy_cycles; j++)
+ asm("nop");
+ }
+ mem_chk(a);
+ mem_chk(b);
+ mem_chk(c);
+ }
+ break;
+ case 4:
+ for (int64 i = 0; i < iterations; i++) {
+ Chain* a = root[0];
+ Chain* b = root[1];
+ Chain* c = root[2];
+ Chain* d = root[3];
+ while (a != END_OF_CHAIN) {
+ a = a->next;
+ b = b->next;
+ c = c->next;
+ d = d->next;
+ if (prefetch)
prefetch(a->next);
- for (int64 j=0; j < busy_cycles; j++)
- asm("nop");
- }
- mem_chk( a );
- mem_chk( b );
- mem_chk( c );
- mem_chk( d );
- mem_chk( e );
- }
- break;
- case 6:
- for (int64 i=0; i < iterations; i++) {
- Chain* a = root[0];
- Chain* b = root[1];
- Chain* c = root[2];
- Chain* d = root[3];
- Chain* e = root[4];
- Chain* f = root[5];
- while (a != END_OF_CHAIN) {
- a = a->next;
- b = b->next;
- c = c->next;
- d = d->next;
- e = e->next;
- f = f->next;
- if (prefetch)
+ for (int64 j = 0; j < busy_cycles; j++)
+ asm("nop");
+ }
+ mem_chk(a);
+ mem_chk(b);
+ mem_chk(c);
+ mem_chk(d);
+ }
+ break;
+ case 5:
+ for (int64 i = 0; i < iterations; i++) {
+ Chain* a = root[0];
+ Chain* b = root[1];
+ Chain* c = root[2];
+ Chain* d = root[3];
+ Chain* e = root[4];
+ while (a != END_OF_CHAIN) {
+ a = a->next;
+ b = b->next;
+ c = c->next;
+ d = d->next;
+ e = e->next;
+ if (prefetch)
prefetch(a->next);
- for (int64 j=0; j < busy_cycles; j++)
- asm("nop");
- }
- mem_chk( a );
- mem_chk( b );
- mem_chk( c );
- mem_chk( d );
- mem_chk( e );
- mem_chk( f );
- }
- break;
- case 7:
- for (int64 i=0; i < iterations; i++) {
- Chain* a = root[0];
- Chain* b = root[1];
- Chain* c = root[2];
- Chain* d = root[3];
- Chain* e = root[4];
- Chain* f = root[5];
- Chain* g = root[6];
- while (a != END_OF_CHAIN) {
- a = a->next;
- b = b->next;
- c = c->next;
- d = d->next;
- e = e->next;
- f = f->next;
- g = g->next;
- if (prefetch)
+ for (int64 j = 0; j < busy_cycles; j++)
+ asm("nop");
+ }
+ mem_chk(a);
+ mem_chk(b);
+ mem_chk(c);
+ mem_chk(d);
+ mem_chk(e);
+ }
+ break;
+ case 6:
+ for (int64 i = 0; i < iterations; i++) {
+ Chain* a = root[0];
+ Chain* b = root[1];
+ Chain* c = root[2];
+ Chain* d = root[3];
+ Chain* e = root[4];
+ Chain* f = root[5];
+ while (a != END_OF_CHAIN) {
+ a = a->next;
+ b = b->next;
+ c = c->next;
+ d = d->next;
+ e = e->next;
+ f = f->next;
+ if (prefetch)
prefetch(a->next);
- for (int64 j=0; j < busy_cycles; j++)
- asm("nop");
- }
- mem_chk( a );
- mem_chk( b );
- mem_chk( c );
- mem_chk( d );
- mem_chk( e );
- mem_chk( f );
- mem_chk( g );
- }
- break;
- case 8:
- for (int64 i=0; i < iterations; i++) {
- Chain* a = root[0];
- Chain* b = root[1];
- Chain* c = root[2];
- Chain* d = root[3];
- Chain* e = root[4];
- Chain* f = root[5];
- Chain* g = root[6];
- Chain* h = root[7];
- while (a != END_OF_CHAIN) {
- a = a->next;
- b = b->next;
- c = c->next;
- d = d->next;
- e = e->next;
- f = f->next;
- g = g->next;
- h = h->next;
- if (prefetch)
+ for (int64 j = 0; j < busy_cycles; j++)
+ asm("nop");
+ }
+ mem_chk(a);
+ mem_chk(b);
+ mem_chk(c);
+ mem_chk(d);
+ mem_chk(e);
+ mem_chk(f);
+ }
+ break;
+ case 7:
+ for (int64 i = 0; i < iterations; i++) {
+ Chain* a = root[0];
+ Chain* b = root[1];
+ Chain* c = root[2];
+ Chain* d = root[3];
+ Chain* e = root[4];
+ Chain* f = root[5];
+ Chain* g = root[6];
+ while (a != END_OF_CHAIN) {
+ a = a->next;
+ b = b->next;
+ c = c->next;
+ d = d->next;
+ e = e->next;
+ f = f->next;
+ g = g->next;
+ if (prefetch)
prefetch(a->next);
- for (int64 j=0; j < busy_cycles; j++)
- asm("nop");
- }
- mem_chk( a );
- mem_chk( b );
- mem_chk( c );
- mem_chk( d );
- mem_chk( e );
- mem_chk( f );
- mem_chk( g );
- mem_chk( h );
- }
- break;
- case 9:
- for (int64 i=0; i < iterations; i++) {
- Chain* a = root[0];
- Chain* b = root[1];
- Chain* c = root[2];
- Chain* d = root[3];
- Chain* e = root[4];
- Chain* f = root[5];
- Chain* g = root[6];
- Chain* h = root[7];
- Chain* j = root[8];
- while (a != END_OF_CHAIN) {
- a = a->next;
- b = b->next;
- c = c->next;
- d = d->next;
- e = e->next;
- f = f->next;
- g = g->next;
- h = h->next;
- j = j->next;
- if (prefetch)
+ for (int64 j = 0; j < busy_cycles; j++)
+ asm("nop");
+ }
+ mem_chk(a);
+ mem_chk(b);
+ mem_chk(c);
+ mem_chk(d);
+ mem_chk(e);
+ mem_chk(f);
+ mem_chk(g);
+ }
+ break;
+ case 8:
+ for (int64 i = 0; i < iterations; i++) {
+ Chain* a = root[0];
+ Chain* b = root[1];
+ Chain* c = root[2];
+ Chain* d = root[3];
+ Chain* e = root[4];
+ Chain* f = root[5];
+ Chain* g = root[6];
+ Chain* h = root[7];
+ while (a != END_OF_CHAIN) {
+ a = a->next;
+ b = b->next;
+ c = c->next;
+ d = d->next;
+ e = e->next;
+ f = f->next;
+ g = g->next;
+ h = h->next;
+ if (prefetch)
prefetch(a->next);
- for (int64 j=0; j < busy_cycles; j++)
- asm("nop");
- }
- mem_chk( a );
- mem_chk( b );
- mem_chk( c );
- mem_chk( d );
- mem_chk( e );
- mem_chk( f );
- mem_chk( g );
- mem_chk( h );
- mem_chk( j );
- }
- break;
- case 10:
- for (int64 i=0; i < iterations; i++) {
- Chain* a = root[0];
- Chain* b = root[1];
- Chain* c = root[2];
- Chain* d = root[3];
- Chain* e = root[4];
- Chain* f = root[5];
- Chain* g = root[6];
- Chain* h = root[7];
- Chain* j = root[8];
- Chain* k = root[9];
- while (a != END_OF_CHAIN) {
- a = a->next;
- b = b->next;
- c = c->next;
- d = d->next;
- e = e->next;
- f = f->next;
- g = g->next;
- h = h->next;
- j = j->next;
- k = k->next;
- if (prefetch)
+ for (int64 j = 0; j < busy_cycles; j++)
+ asm("nop");
+ }
+ mem_chk(a);
+ mem_chk(b);
+ mem_chk(c);
+ mem_chk(d);
+ mem_chk(e);
+ mem_chk(f);
+ mem_chk(g);
+ mem_chk(h);
+ }
+ break;
+ case 9:
+ for (int64 i = 0; i < iterations; i++) {
+ Chain* a = root[0];
+ Chain* b = root[1];
+ Chain* c = root[2];
+ Chain* d = root[3];
+ Chain* e = root[4];
+ Chain* f = root[5];
+ Chain* g = root[6];
+ Chain* h = root[7];
+ Chain* j = root[8];
+ while (a != END_OF_CHAIN) {
+ a = a->next;
+ b = b->next;
+ c = c->next;
+ d = d->next;
+ e = e->next;
+ f = f->next;
+ g = g->next;
+ h = h->next;
+ j = j->next;
+ if (prefetch)
prefetch(a->next);
- for (int64 j=0; j < busy_cycles; j++)
- asm("nop");
- }
- mem_chk( a );
- mem_chk( b );
- mem_chk( c );
- mem_chk( d );
- mem_chk( e );
- mem_chk( f );
- mem_chk( g );
- mem_chk( h );
- mem_chk( j );
- mem_chk( k );
- }
- break;
- case 11:
- for (int64 i=0; i < iterations; i++) {
- Chain* a = root[0];
- Chain* b = root[1];
- Chain* c = root[2];
- Chain* d = root[3];
- Chain* e = root[4];
- Chain* f = root[5];
- Chain* g = root[6];
- Chain* h = root[7];
- Chain* j = root[8];
- Chain* k = root[9];
- Chain* l = root[10];
- while (a != END_OF_CHAIN) {
- a = a->next;
- b = b->next;
- c = c->next;
- d = d->next;
- e = e->next;
- f = f->next;
- g = g->next;
- h = h->next;
- j = j->next;
- k = k->next;
- l = l->next;
- if (prefetch)
+ for (int64 j = 0; j < busy_cycles; j++)
+ asm("nop");
+ }
+ mem_chk(a);
+ mem_chk(b);
+ mem_chk(c);
+ mem_chk(d);
+ mem_chk(e);
+ mem_chk(f);
+ mem_chk(g);
+ mem_chk(h);
+ mem_chk(j);
+ }
+ break;
+ case 10:
+ for (int64 i = 0; i < iterations; i++) {
+ Chain* a = root[0];
+ Chain* b = root[1];
+ Chain* c = root[2];
+ Chain* d = root[3];
+ Chain* e = root[4];
+ Chain* f = root[5];
+ Chain* g = root[6];
+ Chain* h = root[7];
+ Chain* j = root[8];
+ Chain* k = root[9];
+ while (a != END_OF_CHAIN) {
+ a = a->next;
+ b = b->next;
+ c = c->next;
+ d = d->next;
+ e = e->next;
+ f = f->next;
+ g = g->next;
+ h = h->next;
+ j = j->next;
+ k = k->next;
+ if (prefetch)
prefetch(a->next);
- for (int64 j=0; j < busy_cycles; j++)
- asm("nop");
- }
- mem_chk( a );
- mem_chk( b );
- mem_chk( c );
- mem_chk( d );
- mem_chk( e );
- mem_chk( f );
- mem_chk( g );
- mem_chk( h );
- mem_chk( j );
- mem_chk( k );
- mem_chk( l );
- }
- break;
- case 12:
- for (int64 i=0; i < iterations; i++) {
- Chain* a = root[0];
- Chain* b = root[1];
- Chain* c = root[2];
- Chain* d = root[3];
- Chain* e = root[4];
- Chain* f = root[5];
- Chain* g = root[6];
- Chain* h = root[7];
- Chain* j = root[8];
- Chain* k = root[9];
- Chain* l = root[10];
- Chain* m = root[11];
- while (a != END_OF_CHAIN) {
- a = a->next;
- b = b->next;
- c = c->next;
- d = d->next;
- e = e->next;
- f = f->next;
- g = g->next;
- h = h->next;
- j = j->next;
- k = k->next;
- l = l->next;
- m = m->next;
- if (prefetch)
+ for (int64 j = 0; j < busy_cycles; j++)
+ asm("nop");
+ }
+ mem_chk(a);
+ mem_chk(b);
+ mem_chk(c);
+ mem_chk(d);
+ mem_chk(e);
+ mem_chk(f);
+ mem_chk(g);
+ mem_chk(h);
+ mem_chk(j);
+ mem_chk(k);
+ }
+ break;
+ case 11:
+ for (int64 i = 0; i < iterations; i++) {
+ Chain* a = root[0];
+ Chain* b = root[1];
+ Chain* c = root[2];
+ Chain* d = root[3];
+ Chain* e = root[4];
+ Chain* f = root[5];
+ Chain* g = root[6];
+ Chain* h = root[7];
+ Chain* j = root[8];
+ Chain* k = root[9];
+ Chain* l = root[10];
+ while (a != END_OF_CHAIN) {
+ a = a->next;
+ b = b->next;
+ c = c->next;
+ d = d->next;
+ e = e->next;
+ f = f->next;
+ g = g->next;
+ h = h->next;
+ j = j->next;
+ k = k->next;
+ l = l->next;
+ if (prefetch)
prefetch(a->next);
- for (int64 j=0; j < busy_cycles; j++)
- asm("nop");
- }
- mem_chk( a );
- mem_chk( b );
- mem_chk( c );
- mem_chk( d );
- mem_chk( e );
- mem_chk( f );
- mem_chk( g );
- mem_chk( h );
- mem_chk( j );
- mem_chk( k );
- mem_chk( l );
- mem_chk( m );
- }
- break;
- case 13:
- for (int64 i=0; i < iterations; i++) {
- Chain* a = root[0];
- Chain* b = root[1];
- Chain* c = root[2];
- Chain* d = root[3];
- Chain* e = root[4];
- Chain* f = root[5];
- Chain* g = root[6];
- Chain* h = root[7];
- Chain* j = root[8];
- Chain* k = root[9];
- Chain* l = root[10];
- Chain* m = root[11];
- Chain* n = root[12];
- while (a != END_OF_CHAIN) {
- a = a->next;
- b = b->next;
- c = c->next;
- d = d->next;
- e = e->next;
- f = f->next;
- g = g->next;
- h = h->next;
- j = j->next;
- k = k->next;
- l = l->next;
- m = m->next;
- n = n->next;
- if (prefetch)
+ for (int64 j = 0; j < busy_cycles; j++)
+ asm("nop");
+ }
+ mem_chk(a);
+ mem_chk(b);
+ mem_chk(c);
+ mem_chk(d);
+ mem_chk(e);
+ mem_chk(f);
+ mem_chk(g);
+ mem_chk(h);
+ mem_chk(j);
+ mem_chk(k);
+ mem_chk(l);
+ }
+ break;
+ case 12:
+ for (int64 i = 0; i < iterations; i++) {
+ Chain* a = root[0];
+ Chain* b = root[1];
+ Chain* c = root[2];
+ Chain* d = root[3];
+ Chain* e = root[4];
+ Chain* f = root[5];
+ Chain* g = root[6];
+ Chain* h = root[7];
+ Chain* j = root[8];
+ Chain* k = root[9];
+ Chain* l = root[10];
+ Chain* m = root[11];
+ while (a != END_OF_CHAIN) {
+ a = a->next;
+ b = b->next;
+ c = c->next;
+ d = d->next;
+ e = e->next;
+ f = f->next;
+ g = g->next;
+ h = h->next;
+ j = j->next;
+ k = k->next;
+ l = l->next;
+ m = m->next;
+ if (prefetch)
prefetch(a->next);
- for (int64 j=0; j < busy_cycles; j++)
- asm("nop");
- }
- mem_chk( a );
- mem_chk( b );
- mem_chk( c );
- mem_chk( d );
- mem_chk( e );
- mem_chk( f );
- mem_chk( g );
- mem_chk( h );
- mem_chk( j );
- mem_chk( k );
- mem_chk( l );
- mem_chk( m );
- mem_chk( n );
- }
- break;
- case 14:
- for (int64 i=0; i < iterations; i++) {
- Chain* a = root[0];
- Chain* b = root[1];
- Chain* c = root[2];
- Chain* d = root[3];
- Chain* e = root[4];
- Chain* f = root[5];
- Chain* g = root[6];
- Chain* h = root[7];
- Chain* j = root[8];
- Chain* k = root[9];
- Chain* l = root[10];
- Chain* m = root[11];
- Chain* n = root[12];
- Chain* o = root[13];
- while (a != END_OF_CHAIN) {
- a = a->next;
- b = b->next;
- c = c->next;
- d = d->next;
- e = e->next;
- f = f->next;
- g = g->next;
- h = h->next;
- j = j->next;
- k = k->next;
- l = l->next;
- m = m->next;
- n = n->next;
- o = o->next;
- if (prefetch)
+ for (int64 j = 0; j < busy_cycles; j++)
+ asm("nop");
+ }
+ mem_chk(a);
+ mem_chk(b);
+ mem_chk(c);
+ mem_chk(d);
+ mem_chk(e);
+ mem_chk(f);
+ mem_chk(g);
+ mem_chk(h);
+ mem_chk(j);
+ mem_chk(k);
+ mem_chk(l);
+ mem_chk(m);
+ }
+ break;
+ case 13:
+ for (int64 i = 0; i < iterations; i++) {
+ Chain* a = root[0];
+ Chain* b = root[1];
+ Chain* c = root[2];
+ Chain* d = root[3];
+ Chain* e = root[4];
+ Chain* f = root[5];
+ Chain* g = root[6];
+ Chain* h = root[7];
+ Chain* j = root[8];
+ Chain* k = root[9];
+ Chain* l = root[10];
+ Chain* m = root[11];
+ Chain* n = root[12];
+ while (a != END_OF_CHAIN) {
+ a = a->next;
+ b = b->next;
+ c = c->next;
+ d = d->next;
+ e = e->next;
+ f = f->next;
+ g = g->next;
+ h = h->next;
+ j = j->next;
+ k = k->next;
+ l = l->next;
+ m = m->next;
+ n = n->next;
+ if (prefetch)
prefetch(a->next);
- for (int64 j=0; j < busy_cycles; j++)
- asm("nop");
- }
- mem_chk( a );
- mem_chk( b );
- mem_chk( c );
- mem_chk( d );
- mem_chk( e );
- mem_chk( f );
- mem_chk( g );
- mem_chk( h );
- mem_chk( j );
- mem_chk( k );
- mem_chk( l );
- mem_chk( m );
- mem_chk( n );
- mem_chk( o );
- }
- break;
- case 15:
- for (int64 i=0; i < iterations; i++) {
- Chain* a = root[0];
- Chain* b = root[1];
- Chain* c = root[2];
- Chain* d = root[3];
- Chain* e = root[4];
- Chain* f = root[5];
- Chain* g = root[6];
- Chain* h = root[7];
- Chain* j = root[8];
- Chain* k = root[9];
- Chain* l = root[10];
- Chain* m = root[11];
- Chain* n = root[12];
- Chain* o = root[13];
- Chain* p = root[14];
- while (a != END_OF_CHAIN) {
- a = a->next;
- b = b->next;
- c = c->next;
- d = d->next;
- e = e->next;
- f = f->next;
- g = g->next;
- h = h->next;
- j = j->next;
- k = k->next;
- l = l->next;
- m = m->next;
- n = n->next;
- o = o->next;
- p = p->next;
- if (prefetch)
+ for (int64 j = 0; j < busy_cycles; j++)
+ asm("nop");
+ }
+ mem_chk(a);
+ mem_chk(b);
+ mem_chk(c);
+ mem_chk(d);
+ mem_chk(e);
+ mem_chk(f);
+ mem_chk(g);
+ mem_chk(h);
+ mem_chk(j);
+ mem_chk(k);
+ mem_chk(l);
+ mem_chk(m);
+ mem_chk(n);
+ }
+ break;
+ case 14:
+ for (int64 i = 0; i < iterations; i++) {
+ Chain* a = root[0];
+ Chain* b = root[1];
+ Chain* c = root[2];
+ Chain* d = root[3];
+ Chain* e = root[4];
+ Chain* f = root[5];
+ Chain* g = root[6];
+ Chain* h = root[7];
+ Chain* j = root[8];
+ Chain* k = root[9];
+ Chain* l = root[10];
+ Chain* m = root[11];
+ Chain* n = root[12];
+ Chain* o = root[13];
+ while (a != END_OF_CHAIN) {
+ a = a->next;
+ b = b->next;
+ c = c->next;
+ d = d->next;
+ e = e->next;
+ f = f->next;
+ g = g->next;
+ h = h->next;
+ j = j->next;
+ k = k->next;
+ l = l->next;
+ m = m->next;
+ n = n->next;
+ o = o->next;
+ if (prefetch)
prefetch(a->next);
- for (int64 j=0; j < busy_cycles; j++)
- asm("nop");
- }
- mem_chk( a );
- mem_chk( b );
- mem_chk( c );
- mem_chk( d );
- mem_chk( e );
- mem_chk( f );
- mem_chk( g );
- mem_chk( h );
- mem_chk( j );
- mem_chk( k );
- mem_chk( l );
- mem_chk( m );
- mem_chk( n );
- mem_chk( o );
- mem_chk( p );
- }
- break;
- case 16:
- for (int64 i=0; i < iterations; i++) {
- Chain* a = root[0];
- Chain* b = root[1];
- Chain* c = root[2];
- Chain* d = root[3];
- Chain* e = root[4];
- Chain* f = root[5];
- Chain* g = root[6];
- Chain* h = root[7];
- Chain* j = root[8];
- Chain* k = root[9];
- Chain* l = root[10];
- Chain* m = root[11];
- Chain* n = root[12];
- Chain* o = root[13];
- Chain* p = root[14];
- Chain* q = root[15];
- while (a != END_OF_CHAIN) {
- a = a->next;
- b = b->next;
- c = c->next;
- d = d->next;
- e = e->next;
- f = f->next;
- g = g->next;
- h = h->next;
- j = j->next;
- k = k->next;
- l = l->next;
- m = m->next;
- n = n->next;
- o = o->next;
- p = p->next;
- q = q->next;
- if (prefetch)
+ for (int64 j = 0; j < busy_cycles; j++)
+ asm("nop");
+ }
+ mem_chk(a);
+ mem_chk(b);
+ mem_chk(c);
+ mem_chk(d);
+ mem_chk(e);
+ mem_chk(f);
+ mem_chk(g);
+ mem_chk(h);
+ mem_chk(j);
+ mem_chk(k);
+ mem_chk(l);
+ mem_chk(m);
+ mem_chk(n);
+ mem_chk(o);
+ }
+ break;
+ case 15:
+ for (int64 i = 0; i < iterations; i++) {
+ Chain* a = root[0];
+ Chain* b = root[1];
+ Chain* c = root[2];
+ Chain* d = root[3];
+ Chain* e = root[4];
+ Chain* f = root[5];
+ Chain* g = root[6];
+ Chain* h = root[7];
+ Chain* j = root[8];
+ Chain* k = root[9];
+ Chain* l = root[10];
+ Chain* m = root[11];
+ Chain* n = root[12];
+ Chain* o = root[13];
+ Chain* p = root[14];
+ while (a != END_OF_CHAIN) {
+ a = a->next;
+ b = b->next;
+ c = c->next;
+ d = d->next;
+ e = e->next;
+ f = f->next;
+ g = g->next;
+ h = h->next;
+ j = j->next;
+ k = k->next;
+ l = l->next;
+ m = m->next;
+ n = n->next;
+ o = o->next;
+ p = p->next;
+ if (prefetch)
+ prefetch(a->next);
+ for (int64 j = 0; j < busy_cycles; j++)
+ asm("nop");
+ }
+ mem_chk(a);
+ mem_chk(b);
+ mem_chk(c);
+ mem_chk(d);
+ mem_chk(e);
+ mem_chk(f);
+ mem_chk(g);
+ mem_chk(h);
+ mem_chk(j);
+ mem_chk(k);
+ mem_chk(l);
+ mem_chk(m);
+ mem_chk(n);
+ mem_chk(o);
+ mem_chk(p);
+ }
+ break;
+ case 16:
+ for (int64 i = 0; i < iterations; i++) {
+ Chain* a = root[0];
+ Chain* b = root[1];
+ Chain* c = root[2];
+ Chain* d = root[3];
+ Chain* e = root[4];
+ Chain* f = root[5];
+ Chain* g = root[6];
+ Chain* h = root[7];
+ Chain* j = root[8];
+ Chain* k = root[9];
+ Chain* l = root[10];
+ Chain* m = root[11];
+ Chain* n = root[12];
+ Chain* o = root[13];
+ Chain* p = root[14];
+ Chain* q = root[15];
+ while (a != END_OF_CHAIN) {
+ a = a->next;
+ b = b->next;
+ c = c->next;
+ d = d->next;
+ e = e->next;
+ f = f->next;
+ g = g->next;
+ h = h->next;
+ j = j->next;
+ k = k->next;
+ l = l->next;
+ m = m->next;
+ n = n->next;
+ o = o->next;
+ p = p->next;
+ q = q->next;
+ if (prefetch)
prefetch(a->next);
- for (int64 j=0; j < busy_cycles; j++)
- asm("nop");
- }
- mem_chk( a );
- mem_chk( b );
- mem_chk( c );
- mem_chk( d );
- mem_chk( e );
- mem_chk( f );
- mem_chk( g );
- mem_chk( h );
- mem_chk( j );
- mem_chk( k );
- mem_chk( l );
- mem_chk( m );
- mem_chk( n );
- mem_chk( o );
- mem_chk( p );
- mem_chk( q );
+ for (int64 j = 0; j < busy_cycles; j++)
+ asm("nop");
+ }
+ mem_chk(a);
+ mem_chk(b);
+ mem_chk(c);
+ mem_chk(d);
+ mem_chk(e);
+ mem_chk(f);
+ mem_chk(g);
+ mem_chk(h);
+ mem_chk(j);
+ mem_chk(k);
+ mem_chk(l);
+ mem_chk(m);
+ mem_chk(n);
+ mem_chk(o);
+ mem_chk(p);
+ mem_chk(q);
+ }
}
- }
}
- // NOT WRITTEN YET -- DMP
- // JUST A PLACE HOLDER!
+// NOT WRITTEN YET -- DMP
+// JUST A PLACE HOLDER!
Chain*
-Run::stream_mem_init( Chain *mem )
-{
+Run::stream_mem_init(Chain *mem) {
// fprintf(stderr, "made it into stream_mem_init.\n");
// fprintf(stderr, "chains_per_thread = %ld\n", this->exp->chains_per_thread);
// fprintf(stderr, "iterations = %ld\n", this->exp->iterations);
// fprintf(stderr, "bytes_per_chain = %ld\n", this->exp->bytes_per_chain);
// fprintf(stderr, "stride = %ld\n", this->exp->stride);
- int64 local_ops_per_chain = 0;
- double* tmp = (double *) mem;
- int64 refs_per_line = this->exp->bytes_per_line / sizeof(double);
- int64 refs_per_chain = this->exp->bytes_per_chain / sizeof(double);
+ int64 local_ops_per_chain = 0;
+ double* tmp = (double *) mem;
+ int64 refs_per_line = this->exp->bytes_per_line / sizeof(double);
+ int64 refs_per_chain = this->exp->bytes_per_chain / sizeof(double);
// fprintf(stderr, "refs_per_chain = %ld\n", refs_per_chain);
- for (int64 i=0; i < refs_per_chain; i += this->exp->stride*refs_per_line) {
- tmp[i] = 0;
- local_ops_per_chain += 1;
- }
+ for (int64 i = 0; i < refs_per_chain;
+ i += this->exp->stride * refs_per_line) {
+ tmp[i] = 0;
+ local_ops_per_chain += 1;
+ }
- Run::global_mutex.lock();
- Run::_ops_per_chain = local_ops_per_chain;
- Run::global_mutex.unlock();
+ Run::global_mutex.lock();
+ Run::_ops_per_chain = local_ops_per_chain;
+ Run::global_mutex.unlock();
// fprintf(stderr, "made it out of stream_mem_init.\n");
- return mem;
+ return mem;
}
static int64 summ_ck = 0;
-void
-sum_chk( double t )
-{
- if (t != 0) summ_ck += 1;
+void sum_chk(double t) {
+ if (t != 0)
+ summ_ck += 1;
}
- // NOT WRITTEN YET -- DMP
- // JUST A PLACE HOLDER!
-static void
-follow_streams(
- int64 chains_per_thread, // memory loading per thread
- int64 iterations, // number of iterations per experiment
- Chain** root, // root(s) of the chain(s) to follow
- int64 bytes_per_line, // ignored
- int64 bytes_per_chain, // ignored
- int64 stride, // ignored
- int64 busy_cycles, // ignored
- bool prefetch // ignored
-)
-{
- int64 refs_per_line = bytes_per_line / sizeof(double);
- int64 refs_per_chain = bytes_per_chain / sizeof(double);
+// NOT WRITTEN YET -- DMP
+// JUST A PLACE HOLDER!
+static void follow_streams(int64 chains_per_thread, // memory loading per thread
+ int64 iterations, // number of iterations per experiment
+ Chain** root, // root(s) of the chain(s) to follow
+ int64 bytes_per_line, // ignored
+ int64 bytes_per_chain, // ignored
+ int64 stride, // ignored
+ int64 busy_cycles, // ignored
+ bool prefetch // ignored
+ ) {
+ int64 refs_per_line = bytes_per_line / sizeof(double);
+ int64 refs_per_chain = bytes_per_chain / sizeof(double);
- // chase pointers
- switch (chains_per_thread) {
- default:
- case 1:
- for (int64 i=0; i < iterations; i++) {
- double t = 0;
- double* a0 = (double *) root[0];
- for (int64 j=0; j < refs_per_chain; j+=stride*refs_per_line) {
- t += a0[j];
- }
- sum_chk( t );
- }
- break;
- case 2:
- for (int64 i=0; i < iterations; i++) {
- double t = 0;
- double* a0 = (double *) root[0];
- double* a1 = (double *) root[1];
- for (int64 j=0; j < refs_per_chain; j+=stride*refs_per_line) {
- t += a0[j] + a1[j];
- }
- sum_chk( t );
- }
- break;
- case 3:
- for (int64 i=0; i < iterations; i++) {
- double t = 0;
- double* a0 = (double *) root[0];
- double* a1 = (double *) root[1];
- double* a2 = (double *) root[2];
- for (int64 j=0; j < refs_per_chain; j+=stride*refs_per_line) {
- t += a0[j] + a1[j] + a2[j];
- }
- sum_chk( t );
- }
- break;
- case 4:
- for (int64 i=0; i < iterations; i++) {
- double t = 0;
- double* a0 = (double *) root[0];
- double* a1 = (double *) root[1];
- double* a2 = (double *) root[2];
- double* a3 = (double *) root[3];
- for (int64 j=0; j < refs_per_chain; j+=stride*refs_per_line) {
- t += a0[j] + a1[j] + a2[j] + a3[j];
- }
- sum_chk( t );
- }
- break;
- case 5:
- for (int64 i=0; i < iterations; i++) {
- double t = 0;
- double* a0 = (double *) root[0];
- double* a1 = (double *) root[1];
- double* a2 = (double *) root[2];
- double* a3 = (double *) root[3];
- double* a4 = (double *) root[4];
- for (int64 j=0; j < refs_per_chain; j+=stride*refs_per_line) {
- t += a0[j] + a1[j] + a2[j] + a3[j] + a4[j];
- }
- sum_chk( t );
- }
- break;
- case 6:
- for (int64 i=0; i < iterations; i++) {
- double t = 0;
- double* a0 = (double *) root[0];
- double* a1 = (double *) root[1];
- double* a2 = (double *) root[2];
- double* a3 = (double *) root[3];
- double* a4 = (double *) root[4];
- double* a5 = (double *) root[5];
- for (int64 j=0; j < refs_per_chain; j+=stride*refs_per_line) {
- t += a0[j] + a1[j] + a2[j] + a3[j] + a4[j] + a5[j];
- }
- sum_chk( t );
- }
- break;
- case 7:
- for (int64 i=0; i < iterations; i++) {
- double t = 0;
- double* a0 = (double *) root[0];
- double* a1 = (double *) root[1];
- double* a2 = (double *) root[2];
- double* a3 = (double *) root[3];
- double* a4 = (double *) root[4];
- double* a5 = (double *) root[5];
- double* a6 = (double *) root[6];
- for (int64 j=0; j < refs_per_chain; j+=stride*refs_per_line) {
- t += a0[j] + a1[j] + a2[j] + a3[j] + a4[j] + a5[j] + a6[j];
- }
- sum_chk( t );
- }
- break;
- case 8:
- for (int64 i=0; i < iterations; i++) {
- double t = 0;
- double* a0 = (double *) root[0];
- double* a1 = (double *) root[1];
- double* a2 = (double *) root[2];
- double* a3 = (double *) root[3];
- double* a4 = (double *) root[4];
- double* a5 = (double *) root[5];
- double* a6 = (double *) root[6];
- double* a7 = (double *) root[7];
- for (int64 j=0; j < refs_per_chain; j+=stride*refs_per_line) {
- t += a0[j] + a1[j] + a2[j] + a3[j] + a4[j] + a5[j] + a6[j] + a7[j];
- }
- sum_chk( t );
- }
- break;
- case 9:
- for (int64 i=0; i < iterations; i++) {
- double t = 0;
- double* a0 = (double *) root[0];
- double* a1 = (double *) root[1];
- double* a2 = (double *) root[2];
- double* a3 = (double *) root[3];
- double* a4 = (double *) root[4];
- double* a5 = (double *) root[5];
- double* a6 = (double *) root[6];
- double* a7 = (double *) root[7];
- double* a8 = (double *) root[8];
- for (int64 j=0; j < refs_per_chain; j+=stride*refs_per_line) {
- t += a0[j] + a1[j] + a2[j] + a3[j] + a4[j] + a5[j] + a6[j] + a7[j] +
- a8[j];
- }
- sum_chk( t );
- }
- break;
- case 10:
- for (int64 i=0; i < iterations; i++) {
- double t = 0;
- double* a0 = (double *) root[0];
- double* a1 = (double *) root[1];
- double* a2 = (double *) root[2];
- double* a3 = (double *) root[3];
- double* a4 = (double *) root[4];
- double* a5 = (double *) root[5];
- double* a6 = (double *) root[6];
- double* a7 = (double *) root[7];
- double* a8 = (double *) root[8];
- double* a9 = (double *) root[9];
- for (int64 j=0; j < refs_per_chain; j+=stride*refs_per_line) {
- t += a0[j] + a1[j] + a2[j] + a3[j] + a4[j] + a5[j] + a6[j] + a7[j] +
- a8[j] + a9[j];
- }
- sum_chk( t );
- }
- break;
- case 11:
- for (int64 i=0; i < iterations; i++) {
- double t = 0;
- double* a0 = (double *) root[ 0];
- double* a1 = (double *) root[ 1];
- double* a2 = (double *) root[ 2];
- double* a3 = (double *) root[ 3];
- double* a4 = (double *) root[ 4];
- double* a5 = (double *) root[ 5];
- double* a6 = (double *) root[ 6];
- double* a7 = (double *) root[ 7];
- double* a8 = (double *) root[ 8];
- double* a9 = (double *) root[ 9];
- double* a10 = (double *) root[10];
- for (int64 j=0; j < refs_per_chain; j+=stride*refs_per_line) {
- t += a0[j] + a1[j] + a2 [j] + a3[j] + a4[j] + a5[j] + a6[j] + a7[j] +
- a8[j] + a9[j] + a10[j];
- }
- sum_chk( t );
- }
- break;
- case 12:
- for (int64 i=0; i < iterations; i++) {
- double t = 0;
- double* a0 = (double *) root[ 0];
- double* a1 = (double *) root[ 1];
- double* a2 = (double *) root[ 2];
- double* a3 = (double *) root[ 3];
- double* a4 = (double *) root[ 4];
- double* a5 = (double *) root[ 5];
- double* a6 = (double *) root[ 6];
- double* a7 = (double *) root[ 7];
- double* a8 = (double *) root[ 8];
- double* a9 = (double *) root[ 9];
- double* a10 = (double *) root[10];
- double* a11 = (double *) root[11];
- for (int64 j=0; j < refs_per_chain; j+=stride*refs_per_line) {
- t += a0[j] + a1[j] + a2 [j] + a3 [j] + a4[j] + a5[j] + a6[j] + a7[j] +
- a8[j] + a9[j] + a10[j] + a11[j];
- }
- sum_chk( t );
- }
- break;
- case 13:
- for (int64 i=0; i < iterations; i++) {
- double t = 0;
- double* a0 = (double *) root[ 0];
- double* a1 = (double *) root[ 1];
- double* a2 = (double *) root[ 2];
- double* a3 = (double *) root[ 3];
- double* a4 = (double *) root[ 4];
- double* a5 = (double *) root[ 5];
- double* a6 = (double *) root[ 6];
- double* a7 = (double *) root[ 7];
- double* a8 = (double *) root[ 8];
- double* a9 = (double *) root[ 9];
- double* a10 = (double *) root[10];
- double* a11 = (double *) root[11];
- double* a12 = (double *) root[12];
- for (int64 j=0; j < refs_per_chain; j+=stride*refs_per_line) {
- t += a0[j] + a1[j] + a2 [j] + a3 [j] + a4 [j] + a5[j] + a6[j] + a7[j] +
- a8[j] + a9[j] + a10[j] + a11[j] + a12[j];
- }
- sum_chk( t );
- }
- break;
- case 14:
- for (int64 i=0; i < iterations; i++) {
- double t = 0;
- double* a0 = (double *) root[ 0];
- double* a1 = (double *) root[ 1];
- double* a2 = (double *) root[ 2];
- double* a3 = (double *) root[ 3];
- double* a4 = (double *) root[ 4];
- double* a5 = (double *) root[ 5];
- double* a6 = (double *) root[ 6];
- double* a7 = (double *) root[ 7];
- double* a8 = (double *) root[ 8];
- double* a9 = (double *) root[ 9];
- double* a10 = (double *) root[10];
- double* a11 = (double *) root[11];
- double* a12 = (double *) root[12];
- double* a13 = (double *) root[13];
- for (int64 j=0; j < refs_per_chain; j+=stride*refs_per_line) {
- t += a0[j] + a1[j] + a2 [j] + a3 [j] + a4 [j] + a5 [j] + a6[j] + a7[j] +
- a8[j] + a9[j] + a10[j] + a11[j] + a12[j] + a13[j];
- }
- sum_chk( t );
- }
- break;
- case 15:
- for (int64 i=0; i < iterations; i++) {
- double t = 0;
- double* a0 = (double *) root[ 0];
- double* a1 = (double *) root[ 1];
- double* a2 = (double *) root[ 2];
- double* a3 = (double *) root[ 3];
- double* a4 = (double *) root[ 4];
- double* a5 = (double *) root[ 5];
- double* a6 = (double *) root[ 6];
- double* a7 = (double *) root[ 7];
- double* a8 = (double *) root[ 8];
- double* a9 = (double *) root[ 9];
- double* a10 = (double *) root[10];
- double* a11 = (double *) root[11];
- double* a12 = (double *) root[12];
- double* a13 = (double *) root[13];
- double* a14 = (double *) root[14];
- for (int64 j=0; j < refs_per_chain; j+=stride*refs_per_line) {
- t += a0[j] + a1[j] + a2 [j] + a3 [j] + a4 [j] + a5 [j] + a6 [j] + a7[j] +
- a8[j] + a9[j] + a10[j] + a11[j] + a12[j] + a13[j] + a14[j];
- }
- sum_chk( t );
- }
- break;
- case 16:
- for (int64 i=0; i < iterations; i++) {
- double t = 0;
- double* a0 = (double *) root[ 0];
- double* a1 = (double *) root[ 1];
- double* a2 = (double *) root[ 2];
- double* a3 = (double *) root[ 3];
- double* a4 = (double *) root[ 4];
- double* a5 = (double *) root[ 5];
- double* a6 = (double *) root[ 6];
- double* a7 = (double *) root[ 7];
- double* a8 = (double *) root[ 8];
- double* a9 = (double *) root[ 9];
- double* a10 = (double *) root[10];
- double* a11 = (double *) root[11];
- double* a12 = (double *) root[12];
- double* a13 = (double *) root[13];
- double* a14 = (double *) root[14];
- double* a15 = (double *) root[15];
- for (int64 j=0; j < refs_per_chain; j+=stride*refs_per_line) {
- t += a0[j] + a1[j] + a2 [j] + a3 [j] + a4 [j] + a5 [j] + a6 [j] + a7 [j] +
- a8[j] + a9[j] + a10[j] + a11[j] + a12[j] + a13[j] + a14[j] + a15[j];
- }
- sum_chk( t );
+ // chase pointers
+ switch (chains_per_thread) {
+ default:
+ case 1:
+ for (int64 i = 0; i < iterations; i++) {
+ double t = 0;
+ double* a0 = (double *) root[0];
+ for (int64 j = 0; j < refs_per_chain; j += stride * refs_per_line) {
+ t += a0[j];
+ }
+ sum_chk(t);
+ }
+ break;
+ case 2:
+ for (int64 i = 0; i < iterations; i++) {
+ double t = 0;
+ double* a0 = (double *) root[0];
+ double* a1 = (double *) root[1];
+ for (int64 j = 0; j < refs_per_chain; j += stride * refs_per_line) {
+ t += a0[j] + a1[j];
+ }
+ sum_chk(t);
+ }
+ break;
+ case 3:
+ for (int64 i = 0; i < iterations; i++) {
+ double t = 0;
+ double* a0 = (double *) root[0];
+ double* a1 = (double *) root[1];
+ double* a2 = (double *) root[2];
+ for (int64 j = 0; j < refs_per_chain; j += stride * refs_per_line) {
+ t += a0[j] + a1[j] + a2[j];
+ }
+ sum_chk(t);
+ }
+ break;
+ case 4:
+ for (int64 i = 0; i < iterations; i++) {
+ double t = 0;
+ double* a0 = (double *) root[0];
+ double* a1 = (double *) root[1];
+ double* a2 = (double *) root[2];
+ double* a3 = (double *) root[3];
+ for (int64 j = 0; j < refs_per_chain; j += stride * refs_per_line) {
+ t += a0[j] + a1[j] + a2[j] + a3[j];
+ }
+ sum_chk(t);
+ }
+ break;
+ case 5:
+ for (int64 i = 0; i < iterations; i++) {
+ double t = 0;
+ double* a0 = (double *) root[0];
+ double* a1 = (double *) root[1];
+ double* a2 = (double *) root[2];
+ double* a3 = (double *) root[3];
+ double* a4 = (double *) root[4];
+ for (int64 j = 0; j < refs_per_chain; j += stride * refs_per_line) {
+ t += a0[j] + a1[j] + a2[j] + a3[j] + a4[j];
+ }
+ sum_chk(t);
+ }
+ break;
+ case 6:
+ for (int64 i = 0; i < iterations; i++) {
+ double t = 0;
+ double* a0 = (double *) root[0];
+ double* a1 = (double *) root[1];
+ double* a2 = (double *) root[2];
+ double* a3 = (double *) root[3];
+ double* a4 = (double *) root[4];
+ double* a5 = (double *) root[5];
+ for (int64 j = 0; j < refs_per_chain; j += stride * refs_per_line) {
+ t += a0[j] + a1[j] + a2[j] + a3[j] + a4[j] + a5[j];
+ }
+ sum_chk(t);
+ }
+ break;
+ case 7:
+ for (int64 i = 0; i < iterations; i++) {
+ double t = 0;
+ double* a0 = (double *) root[0];
+ double* a1 = (double *) root[1];
+ double* a2 = (double *) root[2];
+ double* a3 = (double *) root[3];
+ double* a4 = (double *) root[4];
+ double* a5 = (double *) root[5];
+ double* a6 = (double *) root[6];
+ for (int64 j = 0; j < refs_per_chain; j += stride * refs_per_line) {
+ t += a0[j] + a1[j] + a2[j] + a3[j] + a4[j] + a5[j] + a6[j];
+ }
+ sum_chk(t);
+ }
+ break;
+ case 8:
+ for (int64 i = 0; i < iterations; i++) {
+ double t = 0;
+ double* a0 = (double *) root[0];
+ double* a1 = (double *) root[1];
+ double* a2 = (double *) root[2];
+ double* a3 = (double *) root[3];
+ double* a4 = (double *) root[4];
+ double* a5 = (double *) root[5];
+ double* a6 = (double *) root[6];
+ double* a7 = (double *) root[7];
+ for (int64 j = 0; j < refs_per_chain; j += stride * refs_per_line) {
+ t += a0[j] + a1[j] + a2[j] + a3[j] + a4[j] + a5[j] + a6[j]
+ + a7[j];
+ }
+ sum_chk(t);
+ }
+ break;
+ case 9:
+ for (int64 i = 0; i < iterations; i++) {
+ double t = 0;
+ double* a0 = (double *) root[0];
+ double* a1 = (double *) root[1];
+ double* a2 = (double *) root[2];
+ double* a3 = (double *) root[3];
+ double* a4 = (double *) root[4];
+ double* a5 = (double *) root[5];
+ double* a6 = (double *) root[6];
+ double* a7 = (double *) root[7];
+ double* a8 = (double *) root[8];
+ for (int64 j = 0; j < refs_per_chain; j += stride * refs_per_line) {
+ t += a0[j] + a1[j] + a2[j] + a3[j] + a4[j] + a5[j] + a6[j]
+ + a7[j] + a8[j];
+ }
+ sum_chk(t);
+ }
+ break;
+ case 10:
+ for (int64 i = 0; i < iterations; i++) {
+ double t = 0;
+ double* a0 = (double *) root[0];
+ double* a1 = (double *) root[1];
+ double* a2 = (double *) root[2];
+ double* a3 = (double *) root[3];
+ double* a4 = (double *) root[4];
+ double* a5 = (double *) root[5];
+ double* a6 = (double *) root[6];
+ double* a7 = (double *) root[7];
+ double* a8 = (double *) root[8];
+ double* a9 = (double *) root[9];
+ for (int64 j = 0; j < refs_per_chain; j += stride * refs_per_line) {
+ t += a0[j] + a1[j] + a2[j] + a3[j] + a4[j] + a5[j] + a6[j]
+ + a7[j] + a8[j] + a9[j];
+ }
+ sum_chk(t);
+ }
+ break;
+ case 11:
+ for (int64 i = 0; i < iterations; i++) {
+ double t = 0;
+ double* a0 = (double *) root[0];
+ double* a1 = (double *) root[1];
+ double* a2 = (double *) root[2];
+ double* a3 = (double *) root[3];
+ double* a4 = (double *) root[4];
+ double* a5 = (double *) root[5];
+ double* a6 = (double *) root[6];
+ double* a7 = (double *) root[7];
+ double* a8 = (double *) root[8];
+ double* a9 = (double *) root[9];
+ double* a10 = (double *) root[10];
+ for (int64 j = 0; j < refs_per_chain; j += stride * refs_per_line) {
+ t += a0[j] + a1[j] + a2[j] + a3[j] + a4[j] + a5[j] + a6[j]
+ + a7[j] + a8[j] + a9[j] + a10[j];
+ }
+ sum_chk(t);
+ }
+ break;
+ case 12:
+ for (int64 i = 0; i < iterations; i++) {
+ double t = 0;
+ double* a0 = (double *) root[0];
+ double* a1 = (double *) root[1];
+ double* a2 = (double *) root[2];
+ double* a3 = (double *) root[3];
+ double* a4 = (double *) root[4];
+ double* a5 = (double *) root[5];
+ double* a6 = (double *) root[6];
+ double* a7 = (double *) root[7];
+ double* a8 = (double *) root[8];
+ double* a9 = (double *) root[9];
+ double* a10 = (double *) root[10];
+ double* a11 = (double *) root[11];
+ for (int64 j = 0; j < refs_per_chain; j += stride * refs_per_line) {
+ t += a0[j] + a1[j] + a2[j] + a3[j] + a4[j] + a5[j] + a6[j]
+ + a7[j] + a8[j] + a9[j] + a10[j] + a11[j];
+ }
+ sum_chk(t);
+ }
+ break;
+ case 13:
+ for (int64 i = 0; i < iterations; i++) {
+ double t = 0;
+ double* a0 = (double *) root[0];
+ double* a1 = (double *) root[1];
+ double* a2 = (double *) root[2];
+ double* a3 = (double *) root[3];
+ double* a4 = (double *) root[4];
+ double* a5 = (double *) root[5];
+ double* a6 = (double *) root[6];
+ double* a7 = (double *) root[7];
+ double* a8 = (double *) root[8];
+ double* a9 = (double *) root[9];
+ double* a10 = (double *) root[10];
+ double* a11 = (double *) root[11];
+ double* a12 = (double *) root[12];
+ for (int64 j = 0; j < refs_per_chain; j += stride * refs_per_line) {
+ t += a0[j] + a1[j] + a2[j] + a3[j] + a4[j] + a5[j] + a6[j]
+ + a7[j] + a8[j] + a9[j] + a10[j] + a11[j] + a12[j];
+ }
+ sum_chk(t);
+ }
+ break;
+ case 14:
+ for (int64 i = 0; i < iterations; i++) {
+ double t = 0;
+ double* a0 = (double *) root[0];
+ double* a1 = (double *) root[1];
+ double* a2 = (double *) root[2];
+ double* a3 = (double *) root[3];
+ double* a4 = (double *) root[4];
+ double* a5 = (double *) root[5];
+ double* a6 = (double *) root[6];
+ double* a7 = (double *) root[7];
+ double* a8 = (double *) root[8];
+ double* a9 = (double *) root[9];
+ double* a10 = (double *) root[10];
+ double* a11 = (double *) root[11];
+ double* a12 = (double *) root[12];
+ double* a13 = (double *) root[13];
+ for (int64 j = 0; j < refs_per_chain; j += stride * refs_per_line) {
+ t += a0[j] + a1[j] + a2[j] + a3[j] + a4[j] + a5[j] + a6[j]
+ + a7[j] + a8[j] + a9[j] + a10[j] + a11[j] + a12[j]
+ + a13[j];
+ }
+ sum_chk(t);
+ }
+ break;
+ case 15:
+ for (int64 i = 0; i < iterations; i++) {
+ double t = 0;
+ double* a0 = (double *) root[0];
+ double* a1 = (double *) root[1];
+ double* a2 = (double *) root[2];
+ double* a3 = (double *) root[3];
+ double* a4 = (double *) root[4];
+ double* a5 = (double *) root[5];
+ double* a6 = (double *) root[6];
+ double* a7 = (double *) root[7];
+ double* a8 = (double *) root[8];
+ double* a9 = (double *) root[9];
+ double* a10 = (double *) root[10];
+ double* a11 = (double *) root[11];
+ double* a12 = (double *) root[12];
+ double* a13 = (double *) root[13];
+ double* a14 = (double *) root[14];
+ for (int64 j = 0; j < refs_per_chain; j += stride * refs_per_line) {
+ t += a0[j] + a1[j] + a2[j] + a3[j] + a4[j] + a5[j] + a6[j]
+ + a7[j] + a8[j] + a9[j] + a10[j] + a11[j] + a12[j]
+ + a13[j] + a14[j];
+ }
+ sum_chk(t);
+ }
+ break;
+ case 16:
+ for (int64 i = 0; i < iterations; i++) {
+ double t = 0;
+ double* a0 = (double *) root[0];
+ double* a1 = (double *) root[1];
+ double* a2 = (double *) root[2];
+ double* a3 = (double *) root[3];
+ double* a4 = (double *) root[4];
+ double* a5 = (double *) root[5];
+ double* a6 = (double *) root[6];
+ double* a7 = (double *) root[7];
+ double* a8 = (double *) root[8];
+ double* a9 = (double *) root[9];
+ double* a10 = (double *) root[10];
+ double* a11 = (double *) root[11];
+ double* a12 = (double *) root[12];
+ double* a13 = (double *) root[13];
+ double* a14 = (double *) root[14];
+ double* a15 = (double *) root[15];
+ for (int64 j = 0; j < refs_per_chain; j += stride * refs_per_line) {
+ t += a0[j] + a1[j] + a2[j] + a3[j] + a4[j] + a5[j] + a6[j]
+ + a7[j] + a8[j] + a9[j] + a10[j] + a11[j] + a12[j]
+ + a13[j] + a14[j] + a15[j];
+ }
+ sum_chk(t);
+ }
+ break;
}
- break;
- }
}
diff --git a/src/Run.h b/src/Run.h
index 810c2e8..8b51397 100644
--- a/src/Run.h
+++ b/src/Run.h
@@ -9,7 +9,6 @@
* Douglas M. Pase - initial API and implementation *
*******************************************************************************/
-
#if !defined(Run_h)
#define Run_h
@@ -23,28 +22,31 @@
class Run: public Thread {
public:
- Run();
- ~Run();
- int run();
- void set( Experiment &e, SpinBarrier* sbp );
-
- static int64 ops_per_chain() { return _ops_per_chain; }
- static double seconds() { return _seconds; }
+ Run();
+ ~Run();
+ int run();
+ void set(Experiment &e, SpinBarrier* sbp);
+
+ static int64 ops_per_chain() {
+ return _ops_per_chain;
+ }
+ static double seconds() {
+ return _seconds;
+ }
private:
- Experiment* exp; // experiment data
- SpinBarrier* bp; // spin barrier used by all threads
-
- void mem_check( Chain *m );
- Chain* random_mem_init( Chain *m );
- Chain* forward_mem_init( Chain *m );
- Chain* reverse_mem_init( Chain *m );
- Chain* stream_mem_init( Chain *m );
-
- static Lock global_mutex; // global lock
- static int64 _ops_per_chain; // total number of operations per chain
- static double _seconds; // total number of seconds
+ Experiment* exp; // experiment data
+ SpinBarrier* bp; // spin barrier used by all threads
+
+ void mem_check(Chain *m);
+ Chain* random_mem_init(Chain *m);
+ Chain* forward_mem_init(Chain *m);
+ Chain* reverse_mem_init(Chain *m);
+ Chain* stream_mem_init(Chain *m);
+
+ static Lock global_mutex; // global lock
+ static int64 _ops_per_chain; // total number of operations per chain
+ static double _seconds; // total number of seconds
};
-
#endif
diff --git a/src/SpinBarrier.cpp b/src/SpinBarrier.cpp
index d3d2d7b..d89e7c3 100644
--- a/src/SpinBarrier.cpp
+++ b/src/SpinBarrier.cpp
@@ -9,7 +9,6 @@
* Douglas M. Pase - initial API and implementation *
*******************************************************************************/
-
/******************************************************************************
* *
* SpinBarrier *
@@ -27,22 +26,18 @@
#include "SpinBarrier.h"
- // create a new barrier
-SpinBarrier::SpinBarrier(int participants)
-: limit( participants )
-{
- pthread_barrier_init( &barrier_obj, NULL, this->limit );
+// create a new barrier
+SpinBarrier::SpinBarrier(int participants) :
+ limit(participants) {
+ pthread_barrier_init(&barrier_obj, NULL, this->limit);
}
- // destroy an old barrier
-SpinBarrier::~SpinBarrier()
-{
+// destroy an old barrier
+SpinBarrier::~SpinBarrier() {
}
- // enter the barrier and wait. everyone leaves
- // when the last participant enters the barrier.
-void
-SpinBarrier::barrier()
-{
- pthread_barrier_wait( &this->barrier_obj );
+// enter the barrier and wait. everyone leaves
+// when the last participant enters the barrier.
+void SpinBarrier::barrier() {
+ pthread_barrier_wait(&this->barrier_obj);
}
diff --git a/src/SpinBarrier.h b/src/SpinBarrier.h
index f0b76d3..4ab3242 100644
--- a/src/SpinBarrier.h
+++ b/src/SpinBarrier.h
@@ -9,7 +9,6 @@
* Douglas M. Pase - initial API and implementation *
*******************************************************************************/
-
/******************************************************************************
* *
* SpinBarrier *
@@ -31,14 +30,14 @@
class SpinBarrier {
public:
- SpinBarrier(int participants);
- ~SpinBarrier();
+ SpinBarrier(int participants);
+ ~SpinBarrier();
- void barrier();
+ void barrier();
private:
- int limit; // number of barrier participants
- pthread_barrier_t barrier_obj;
+ int limit; // number of barrier participants
+ pthread_barrier_t barrier_obj;
};
#endif
diff --git a/src/Thread.cpp b/src/Thread.cpp
index 8908cfe..0dfb91c 100644
--- a/src/Thread.cpp
+++ b/src/Thread.cpp
@@ -9,7 +9,6 @@
* Douglas M. Pase - initial API and implementation *
*******************************************************************************/
-
#include <stdio.h>
#include <pthread.h>
#include <unistd.h>
@@ -19,68 +18,51 @@
#include "Lock.h"
Lock Thread::_global_lock;
-int Thread::count = 0;
+int Thread::count = 0;
-Thread::Thread()
-{
- Thread::global_lock();
+Thread::Thread() {
+ Thread::global_lock();
this->id = Thread::count;
Thread::count += 1;
- Thread::global_unlock();
+ Thread::global_unlock();
}
-Thread::~Thread()
-{
+Thread::~Thread() {
}
-int
-Thread::start()
-{
- return pthread_create(&this->thread, NULL, Thread::start_routine, this);
+int Thread::start() {
+ return pthread_create(&this->thread, NULL, Thread::start_routine, this);
}
void*
-Thread::start_routine(void* p)
-{
- ((Thread*)p)->run();
+Thread::start_routine(void* p) {
+ ((Thread*) p)->run();
- return NULL;
+ return NULL;
}
-void
-Thread::exit()
-{
- pthread_exit(NULL);
+void Thread::exit() {
+ pthread_exit(NULL);
}
-int
-Thread::wait()
-{
- pthread_join(this->thread, NULL);
+int Thread::wait() {
+ pthread_join(this->thread, NULL);
- return 0;
+ return 0;
}
-void
-Thread::lock()
-{
- this->object_lock.lock();
+void Thread::lock() {
+ this->object_lock.lock();
}
-void
-Thread::unlock()
-{
- this->object_lock.unlock();
+void Thread::unlock() {
+ this->object_lock.unlock();
}
-void
-Thread::global_lock()
-{
- Thread::_global_lock.lock();
+void Thread::global_lock() {
+ Thread::_global_lock.lock();
}
-void
-Thread::global_unlock()
-{
- Thread::_global_lock.unlock();
+void Thread::global_unlock() {
+ Thread::_global_lock.unlock();
}
diff --git a/src/Thread.h b/src/Thread.h
index 3948f56..55ebf1c 100644
--- a/src/Thread.h
+++ b/src/Thread.h
@@ -9,7 +9,6 @@
* Douglas M. Pase - initial API and implementation *
*******************************************************************************/
-
#if !defined(Thread_h)
#define Thread_h
@@ -19,35 +18,39 @@
class Thread {
public:
- Thread();
- ~Thread();
+ Thread();
+ ~Thread();
- virtual int run() = 0;
+ virtual int run() = 0;
- int start();
- int wait();
- int thread_count() { return Thread::count; }
- int thread_id() { return id; }
+ int start();
+ int wait();
+ int thread_count() {
+ return Thread::count;
+ }
+ int thread_id() {
+ return id;
+ }
- static void exit();
+ static void exit();
protected:
- void lock();
- void unlock();
- static void global_lock();
- static void global_unlock();
+ void lock();
+ void unlock();
+ static void global_lock();
+ static void global_unlock();
private:
- static void* start_routine(void *);
- static Lock _global_lock;
+ static void* start_routine(void *);
+ static Lock _global_lock;
- Lock object_lock;
+ Lock object_lock;
- pthread_t thread;
+ pthread_t thread;
- static int count;
- int id;
- int lock_obj;
+ static int count;
+ int id;
+ int lock_obj;
};
#endif
diff --git a/src/Timer.cpp b/src/Timer.cpp
index b326048..8331b9a 100644
--- a/src/Timer.cpp
+++ b/src/Timer.cpp
@@ -9,7 +9,6 @@
* Douglas M. Pase - initial API and implementation *
*******************************************************************************/
-
#include <stdio.h>
#include <sys/time.h>
@@ -17,15 +16,15 @@
#include "Types.h"
-static int64 read_rtc();
-static void calibrate_rtc(int n);
+static int64 read_rtc();
+static void calibrate_rtc(int n);
static double wall_seconds();
-static int wall_ticks = -1;
-static int rtc_ticks = -1;
+static int wall_ticks = -1;
+static int rtc_ticks = -1;
static double wall_elapsed = -1;
-static int64 rtc_elapsed = -1;
-static double time_factor = -1;
+static int64 rtc_elapsed = -1;
+static double time_factor = -1;
#if !defined(RTC) && !defined(GTOD)
#define RTC
@@ -33,109 +32,97 @@ static double time_factor = -1;
#if defined(RTC)
-double
-Timer::seconds()
-{
- return (double) read_rtc() * time_factor;
+double Timer::seconds() {
+ return (double) read_rtc() * time_factor;
}
-int64
-Timer::ticks()
-{
- // See pg. 406 of the AMD x86-64 Architecture
- // Programmer's Manual, Volume 2, System Programming
- unsigned int eax=0, edx=0;
-
- __asm__ __volatile__(
- "rdtsc ;"
- "movl %%eax,%0;"
- "movl %%edx,%1;"
- ""
- : "=r"(eax), "=r"(edx)
- :
- : "%eax", "%edx"
- );
-
- return ((int64) edx << 32) | (int64) eax;
+int64 Timer::ticks() {
+ // See pg. 406 of the AMD x86-64 Architecture
+ // Programmer's Manual, Volume 2, System Programming
+ unsigned int eax = 0, edx = 0;
+
+ __asm__ __volatile__(
+ "rdtsc ;"
+ "movl %%eax,%0;"
+ "movl %%edx,%1;"
+ ""
+ : "=r"(eax), "=r"(edx)
+ :
+ : "%eax", "%edx"
+ );
+
+ return ((int64) edx << 32) | (int64) eax;
}
-static int64
-read_rtc()
-{
- // See pg. 406 of the AMD x86-64 Architecture
- // Programmer's Manual, Volume 2, System Programming
- unsigned int eax=0, edx=0;
-
- __asm__ __volatile__(
- "rdtsc ;"
- "movl %%eax,%0;"
- "movl %%edx,%1;"
- ""
- : "=r"(eax), "=r"(edx)
- :
- : "%eax", "%edx"
- );
-
- return ((int64) edx << 32) | (int64) eax;
+static int64 read_rtc() {
+ // See pg. 406 of the AMD x86-64 Architecture
+ // Programmer's Manual, Volume 2, System Programming
+ unsigned int eax = 0, edx = 0;
+
+ __asm__ __volatile__(
+ "rdtsc ;"
+ "movl %%eax,%0;"
+ "movl %%edx,%1;"
+ ""
+ : "=r"(eax), "=r"(edx)
+ :
+ : "%eax", "%edx"
+ );
+
+ return ((int64) edx << 32) | (int64) eax;
}
-void
-Timer::calibrate()
-{
- Timer::calibrate(1000);
+void Timer::calibrate() {
+ Timer::calibrate(1000);
}
-void
-Timer::calibrate(int n)
-{
- wall_ticks = n;
-
- double wall_start,wall_finish,t;
- t = wall_seconds();
- while (t == (wall_start=wall_seconds())) {
- ;
- }
- int64 rtc_start = read_rtc();
- for (int i=0; i < wall_ticks; i++) {
+void Timer::calibrate(int n) {
+ wall_ticks = n;
+
+ double wall_start, wall_finish, t;
t = wall_seconds();
- while (t == (wall_finish=wall_seconds())) {
- ;
+ while (t == (wall_start = wall_seconds())) {
+ ;
}
- }
- int64 rtc_finish = read_rtc();
+ int64 rtc_start = read_rtc();
+ for (int i = 0; i < wall_ticks; i++) {
+ t = wall_seconds();
+ while (t == (wall_finish = wall_seconds())) {
+ ;
+ }
+ }
+ int64 rtc_finish = read_rtc();
- wall_elapsed = wall_finish - wall_start;
- rtc_elapsed = rtc_finish - rtc_start;
- time_factor = wall_elapsed / (double) rtc_elapsed;
+ wall_elapsed = wall_finish - wall_start;
+ rtc_elapsed = rtc_finish - rtc_start;
+ time_factor = wall_elapsed / (double) rtc_elapsed;
}
-static double
-wall_seconds()
-{
- struct timeval t;
- gettimeofday(&t, NULL);
+static double wall_seconds() {
+ struct timeval t;
+ gettimeofday(&t, NULL);
- return (double) t.tv_sec + (double) t.tv_usec * 1E-6;
+ return (double) t.tv_sec + (double) t.tv_usec * 1E-6;
}
#else
double
Timer::seconds()
-{
- struct timeval t;
- gettimeofday(&t, NULL);
+{
+ struct timeval t;
+ gettimeofday(&t, NULL);
- return (double) t.tv_sec + (double) t.tv_usec * 1E-6;
+ return (double) t.tv_sec + (double) t.tv_usec * 1E-6;
}
int64
Timer::ticks()
-{
- struct timeval t;
- gettimeofday(&t, NULL);
+{
+ struct timeval t;
+ gettimeofday(&t, NULL);
- return 1000000 * (int64) t.tv_sec + (int64) t.tv_usec;
+ return 1000000 * (int64) t.tv_sec + (int64) t.tv_usec;
}
void
@@ -150,26 +137,23 @@ Timer::calibrate(int n)
#endif
-static double
-min( double v1, double v2 )
-{
- if (v2 < v1) return v2;
- return v1;
+static double min(double v1, double v2) {
+ if (v2 < v1)
+ return v2;
+ return v1;
}
-double
-Timer::resolution()
-{
- double a,b,c=1E9;
- for (int i=0; i < 10; i++) {
- a = Timer::seconds();
- while (a == (b=Timer::seconds()))
- ;
- a = Timer::seconds();
- while (a == (b=Timer::seconds()))
- ;
- c = min(b - a, c);
- }
-
- return c;
+double Timer::resolution() {
+ double a, b, c = 1E9;
+ for (int i = 0; i < 10; i++) {
+ a = Timer::seconds();
+ while (a == (b = Timer::seconds()))
+ ;
+ a = Timer::seconds();
+ while (a == (b = Timer::seconds()))
+ ;
+ c = min(b - a, c);
+ }
+
+ return c;
}
diff --git a/src/Timer.h b/src/Timer.h
index ba2c503..abc52af 100644
--- a/src/Timer.h
+++ b/src/Timer.h
@@ -9,7 +9,6 @@
* Douglas M. Pase - initial API and implementation *
*******************************************************************************/
-
#if !defined(Timer_h)
#define Timer_h
@@ -17,11 +16,11 @@
class Timer {
public:
- static double seconds();
- static double resolution();
- static int64 ticks();
- static void calibrate();
- static void calibrate(int n);
+ static double seconds();
+ static double resolution();
+ static int64 ticks();
+ static void calibrate();
+ static void calibrate(int n);
private:
};
diff --git a/src/Types.cpp b/src/Types.cpp
index da5ecd0..409f727 100644
--- a/src/Types.cpp
+++ b/src/Types.cpp
@@ -9,5 +9,4 @@
* Douglas M. Pase - initial API and implementation *
*******************************************************************************/
-
#include "Types.h"
diff --git a/src/Types.h b/src/Types.h
index 9e2eeb0..9bb6038 100644
--- a/src/Types.h
+++ b/src/Types.h
@@ -9,7 +9,6 @@
* Douglas M. Pase - initial API and implementation *
*******************************************************************************/
-
#if !defined(Types_h)
#define Types_h
@@ -24,6 +23,6 @@ typedef unsigned short uint16;
typedef unsigned char uint8;
typedef double float64;
-typedef float float32;
+typedef float float32;
#endif