diff options
Diffstat (limited to 'src')
-rw-r--r-- | src/Chain.cpp | 24 | ||||
-rw-r--r-- | src/Chain.h | 24 | ||||
-rw-r--r-- | src/Experiment.cpp | 592 | ||||
-rw-r--r-- | src/Experiment.h | 103 | ||||
-rw-r--r-- | src/Lock.cpp | 45 | ||||
-rw-r--r-- | src/Lock.h | 30 | ||||
-rw-r--r-- | src/Main.c | 90 | ||||
-rw-r--r-- | src/Main.cpp | 89 | ||||
-rw-r--r-- | src/Main.h | 18 | ||||
-rw-r--r-- | src/Output.cpp | 153 | ||||
-rw-r--r-- | src/Output.h | 28 | ||||
-rw-r--r-- | src/Run.cpp | 1206 | ||||
-rw-r--r-- | src/Run.h | 50 | ||||
-rw-r--r-- | src/SpinBarrier.cpp | 48 | ||||
-rw-r--r-- | src/SpinBarrier.h | 44 | ||||
-rw-r--r-- | src/Thread.cpp | 86 | ||||
-rw-r--r-- | src/Thread.h | 53 | ||||
-rw-r--r-- | src/Timer.cpp | 175 | ||||
-rw-r--r-- | src/Timer.h | 28 | ||||
-rw-r--r-- | src/Types.cpp | 13 | ||||
-rw-r--r-- | src/Types.h | 29 |
21 files changed, 2928 insertions, 0 deletions
diff --git a/src/Chain.cpp b/src/Chain.cpp new file mode 100644 index 0000000..ddbc104 --- /dev/null +++ b/src/Chain.cpp @@ -0,0 +1,24 @@ +/******************************************************************************* + * Copyright (c) 2006 International Business Machines Corporation. * + * All rights reserved. This program and the accompanying materials * + * are made available under the terms of the Common Public License v1.0 * + * which accompanies this distribution, and is available at * + * http://www.opensource.org/licenses/cpl1.0.php * + * * + * Contributors: * + * Douglas M. Pase - initial API and implementation * + *******************************************************************************/ + + +#include <stdio.h> + +#include "Chain.h" + +Chain::Chain() +: next(NULL) +{ +} + +Chain::~Chain() +{ +} diff --git a/src/Chain.h b/src/Chain.h new file mode 100644 index 0000000..8bdb584 --- /dev/null +++ b/src/Chain.h @@ -0,0 +1,24 @@ +/******************************************************************************* + * Copyright (c) 2006 International Business Machines Corporation. * + * All rights reserved. This program and the accompanying materials * + * are made available under the terms of the Common Public License v1.0 * + * which accompanies this distribution, and is available at * + * http://www.opensource.org/licenses/cpl1.0.php * + * * + * Contributors: * + * Douglas M. Pase - initial API and implementation * + *******************************************************************************/ + + +#if !defined(Chain_h) +#define Chain_h + +class Chain { +public: + Chain(); + ~Chain(); + Chain* next; +private: +}; + +#endif diff --git a/src/Experiment.cpp b/src/Experiment.cpp new file mode 100644 index 0000000..75b1cab --- /dev/null +++ b/src/Experiment.cpp @@ -0,0 +1,592 @@ +/******************************************************************************* + * Copyright (c) 2006 International Business Machines Corporation. * + * All rights reserved. This program and the accompanying materials * + * are made available under the terms of the Common Public License v1.0 * + * which accompanies this distribution, and is available at * + * http://www.opensource.org/licenses/cpl1.0.php * + * * + * Contributors: * + * Douglas M. Pase - initial API and implementation * + *******************************************************************************/ + + +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> + +#if defined(NUMA) +#include <numa.h> +#endif + +#include "Experiment.h" + +Experiment::Experiment() : + strict (0), + pointer_size (DEFAULT_POINTER_SIZE), + bytes_per_line (DEFAULT_BYTES_PER_LINE), + links_per_line (DEFAULT_LINKS_PER_LINE), + bytes_per_page (DEFAULT_BYTES_PER_PAGE), + lines_per_page (DEFAULT_LINES_PER_PAGE), + links_per_page (DEFAULT_LINKS_PER_PAGE), + bytes_per_chain (DEFAULT_BYTES_PER_CHAIN), + lines_per_chain (DEFAULT_LINES_PER_CHAIN), + links_per_chain (DEFAULT_LINKS_PER_CHAIN), + pages_per_chain (DEFAULT_PAGES_PER_CHAIN), + chains_per_thread(DEFAULT_CHAINS_PER_THREAD), + bytes_per_thread (DEFAULT_BYTES_PER_THREAD), + num_threads (DEFAULT_THREADS), + bytes_per_test (DEFAULT_BYTES_PER_TEST), + seconds (DEFAULT_SECONDS), + iterations (DEFAULT_ITERATIONS), + experiments (DEFAULT_EXPERIMENTS), + output_mode (TABLE), + access_pattern (RANDOM), + stride (1), + numa_placement (LOCAL), + offset_or_mask (0), + placement_map (NULL), + thread_domain (NULL), + chain_domain (NULL), + numa_max_domain (0), + num_numa_domains (1) +{ +} + +Experiment::~Experiment() +{ +} + + // interface: + // + // -l or --line bytes per cache line (line size) + // -p or --page bytes per page (page size) + // -c or --chain bytes per chain (used to compute pages per chain) + // -r or --references chains per thread (memory loading) + // -t or --threads number of threads (concurrency and contention) + // -i or --iters iterations + // -e or --experiments experiments + // -a or --access memory access pattern + // random random access pattern + // forward <stride> exclusive OR and mask + // reverse <stride> addition and offset + // -o or --output output mode + // hdr header only + // csv csv only + // both header + csv + // table human-readable table of values + // -n or --numa numa placement + // local local allocation of all chains + // xor <mask> exclusive OR and mask + // add <offset> addition and offset + // map <map> explicit mapping of threads and chains to domains + +int +Experiment::parse_args(int argc, char* argv[]) +{ + int error = 0; + for (int i=1; i < argc; i++) { + if (strcasecmp(argv[i], "-x") == 0 || strcasecmp(argv[i], "--strict") == 0) { + this->strict = 1; + } else if (strcasecmp(argv[i], "-s") == 0 || strcasecmp(argv[i], "--seconds") == 0) { + i++; + if (i == argc) { error = 1; break; } + this->seconds = Experiment::parse_real(argv[i]); + this->iterations = 0; + if (this->seconds == 0) { error = 1; break; } + } else if (strcasecmp(argv[i], "-l") == 0 || strcasecmp(argv[i], "--line") == 0) { + i++; + if (i == argc) { error = 1; break; } + this->bytes_per_line = Experiment::parse_number(argv[i]); + if (this->bytes_per_line == 0) { error = 1; break; } + } else if (strcasecmp(argv[i], "-p") == 0 || strcasecmp(argv[i], "--page") == 0) { + i++; + if (i == argc) { error = 1; break; } + this->bytes_per_page = Experiment::parse_number(argv[i]); + if (this->bytes_per_page == 0) { error = 1; break; } + } else if (strcasecmp(argv[i], "-c") == 0 || strcasecmp(argv[i], "--chain") == 0) { + i++; + if (i == argc) { error = 1; break; } + this->bytes_per_chain = Experiment::parse_number(argv[i]); + if (this->bytes_per_chain == 0) { error = 1; break; } + } else if (strcasecmp(argv[i], "-r") == 0 || strcasecmp(argv[i], "--references") == 0) { + i++; + if (i == argc) { error = 1; break; } + this->chains_per_thread = Experiment::parse_number(argv[i]); + if (this->chains_per_thread == 0) { error = 1; break; } + } else if (strcasecmp(argv[i], "-t") == 0 || strcasecmp(argv[i], "--threads") == 0) { + i++; + if (i == argc) { error = 1; break; } + this->num_threads = Experiment::parse_number(argv[i]); + if (this->num_threads == 0) { error = 1; break; } + } else if (strcasecmp(argv[i], "-i") == 0 || strcasecmp(argv[i], "--iterations") == 0) { + i++; + if (i == argc) { error = 1; break; } + this->iterations = Experiment::parse_number(argv[i]); + this->seconds = 0; + if (this->iterations == 0) { error = 1; break; } + } else if (strcasecmp(argv[i], "-e") == 0 || strcasecmp(argv[i], "--experiments") == 0) { + i++; + if (i == argc) { error = 1; break; } + this->experiments = Experiment::parse_number(argv[i]); + if (this->experiments == 0) { error = 1; break; } + } else if (strcasecmp(argv[i], "-a") == 0 || strcasecmp(argv[i], "--access") == 0) { + i++; + if (i == argc) { error = 1; break; } + if (strcasecmp(argv[i], "random") == 0) { + this->access_pattern = RANDOM; + } else if (strcasecmp(argv[i], "forward") == 0) { + this->access_pattern = STRIDED; + i++; + if (i == argc) { error = 1; break; } + this->stride = Experiment::parse_number(argv[i]); + if (this->stride == 0) { error = 1; break; } + } else if (strcasecmp(argv[i], "reverse") == 0) { + this->access_pattern = STRIDED; + i++; + if (i == argc) { error = 1; break; } + this->stride = - Experiment::parse_number(argv[i]); + if (this->stride == 0) { error = 1; break; } + } else if (strcasecmp(argv[i], "stream") == 0) { + this->access_pattern = STREAM; + i++; + if (i == argc) { error = 1; break; } + this->stride = Experiment::parse_number(argv[i]); + if (this->stride == 0) { error = 1; break; } + } else { + error = 1; + break; + } + } else if (strcasecmp(argv[i], "-o") == 0 || strcasecmp(argv[i], "--output") == 0) { + i++; + if (i == argc) { error = 1; break; } + if (strcasecmp(argv[i], "table") == 0) { + this->output_mode = TABLE; + } else if (strcasecmp(argv[i], "csv") == 0) { + this->output_mode = CSV; + } else if (strcasecmp(argv[i], "both") == 0) { + this->output_mode = BOTH; + } else if (strcasecmp(argv[i], "hdr") == 0) { + this->output_mode = HEADER; + } else if (strcasecmp(argv[i], "header") == 0) { + this->output_mode = HEADER; + } else { + error = 1; + break; + } + } else if (strcasecmp(argv[i], "-n") == 0 || strcasecmp(argv[i], "--numa") == 0) { + i++; + if (i == argc) { error = 1; break; } + if (strcasecmp(argv[i], "local") == 0) { + this->numa_placement = LOCAL; + } else if (strcasecmp(argv[i], "xor") == 0) { + this->numa_placement = XOR; + i++; + if (i == argc) { error = 1; break; } + this->offset_or_mask = Experiment::parse_number(argv[i]); + } else if (strcasecmp(argv[i], "add") == 0) { + this->numa_placement = ADD; + i++; + if (i == argc) { error = 1; break; } + this->offset_or_mask = Experiment::parse_number(argv[i]); + } else if (strcasecmp(argv[i], "map") == 0) { + this->numa_placement = MAP; + i++; + if (i == argc) { error = 1; break; } + this->placement_map = argv[i]; + } else { + error = 1; + break; + } + } else { + error = 1; + break; + } + } + + + // if we've hit an error, print a message and quit + if (error) { + printf("usage: %s <options>\n", argv[0]); + printf("where <options> are selected from the following:\n"); + printf(" [-h|--help] # this message\n"); + printf(" [-l|--line] <number> # bytes per cache line (cache line size)\n"); + printf(" [-p|--page] <number> # bytes per page (page size)\n"); + printf(" [-c|--chain] <number> # bytes per chain (used to compute pages per chain)\n"); + printf(" [-r|--references] <number> # chains per thread (memory loading)\n"); + printf(" [-t|--threads] <number> # number of threads (concurrency and contention)\n"); + printf(" [-i|--iterations] <number> # iterations per experiment\n"); + printf(" [-e|--experiments] <number> # experiments\n"); + printf(" [-a|--access] <pattern> # memory access pattern\n"); + printf(" [-o|--output] <format> # output format\n"); + printf(" [-n|--numa] <placement> # numa placement\n"); + printf(" [-s|--seconds] <number> # run each experiment for <number> seconds\n"); + printf(" [-x|--strict] # fail rather than adjust options to sensible values\n"); + printf("\n"); + printf("<pattern> is selected from the following:\n"); + printf(" random # all chains are accessed randomly\n"); + printf(" forward <stride> # chains are in forward order with constant stride\n"); + printf(" reverse <stride> # chains are in reverse order with constant stride\n"); + printf(" stream <stride> # references are calculated rather than read from memory\n"); + printf("\n"); + printf("Note: <stride> is always a small positive integer.\n"); + printf("\n"); + printf("<format> is selected from the following:\n"); + printf(" hdr # csv header only\n"); + printf(" csv # results in csv format only\n"); + printf(" both # header and results in csv format\n"); + printf(" table # human-readable table of values\n"); + printf("\n"); + printf("<placement> is selected from the following:\n"); + printf(" local # all chains are allocated locally\n"); + printf(" xor <mask> # exclusive OR and mask\n"); + printf(" add <offset> # addition and offset\n"); + printf(" map <map> # explicit mapping of threads and chains to domains\n"); + printf("\n"); + printf("<map> has the form \"t1:c11,c12,...,c1m;t2:c21,...,c2m;...;tn:cn1,...,cnm\"\n"); + printf("where t[i] is the NUMA domain where the ith thread is run,\n"); + printf("and c[i][j] is the NUMA domain where the jth chain in the ith thread is allocated.\n"); + printf("(The values t[i] and c[i][j] must all be zero or small positive integers.)\n"); + printf("\n"); + printf("Note: for maps, each thread must have the same number of chains,\n"); + printf("maps override the -t or --threads specification,\n"); + printf("NUMA domains are whole numbers in the range of 0..N, and\n"); + printf("thread or chain domains that exceed the maximum NUMA domain\n"); + printf("are wrapped around using a MOD function.\n"); + printf("\n"); + printf("To determine the number of NUMA domains currently available\n"); + printf("on your system, use a command such as \"numastat\".\n"); + printf("\n"); + printf("Final note: strict is not yet fully implemented, and\n"); + printf("maps do not gracefully handle ill-formed map specifications.\n"); + + return 1; + } + + + // STRICT -- fail if specifications are inconsistent + + // compute lines per page and lines per chain + // based on input and defaults. + // we round up page and chain sizes when needed. + this->lines_per_page = (this->bytes_per_page+this->bytes_per_line-1) / this->bytes_per_line; + this->bytes_per_page = this->bytes_per_line * this->lines_per_page; + this->pages_per_chain = (this->bytes_per_chain+this->bytes_per_page-1) / this->bytes_per_page; + this->bytes_per_chain = this->bytes_per_page * this->pages_per_chain; + this->bytes_per_thread = this->bytes_per_chain * this->chains_per_thread; + this->bytes_per_test = this->bytes_per_thread * this->num_threads; + this->links_per_line = this->bytes_per_line / pointer_size; + this->links_per_page = this->lines_per_page * this->links_per_line; + this->lines_per_chain = this->lines_per_page * this->pages_per_chain; + this->links_per_chain = this->lines_per_chain * this->links_per_line; + + + // allocate the chain roots for all threads + // and compute the chain locations + // (the chains themselves are initialized by the threads) + switch (this->numa_placement) { + case LOCAL : + case XOR : + case ADD : + this->thread_domain = new int32 [ this->num_threads ]; + this->chain_domain = new int32*[ this->num_threads ]; + this->random_state = new char* [ this->num_threads ]; + + for (int i=0; i < this->num_threads; i++) { + this->chain_domain[i] = new int32 [ this->chains_per_thread ]; + + const int state_size = 256; + this->random_state[i] = new char[state_size]; + initstate((unsigned int) i, (char *) this->random_state[i], (size_t) state_size); + } + break; + } + + +#if defined(NUMA) + this->numa_max_domain = numa_max_node(); + this->num_numa_domains = this->numa_max_domain + 1; +#endif + + + switch (this->numa_placement) { + case LOCAL : + default: + this->alloc_local(); + break; + case XOR : + this->alloc_xor(); + break; + case ADD : + this->alloc_add(); + break; + case MAP : + this->alloc_map(); + break; + } + + return 0; +} + + +int64 +Experiment::parse_number( const char* s ) +{ + int64 result = 0; + + int len = strlen( s ); + for (int i=0; i < len; i++) { + if ( '0' <= s[i] && s[i] <= '9' ) { + result = result * 10 + s[i] - '0'; + } else if (s[i] == 'k' || s[i] == 'K') { + result = result << 10; + break; + } else if (s[i] == 'm' || s[i] == 'M') { + result = result << 20; + break; + } else if (s[i] == 'g' || s[i] == 'G') { + result = result << 30; + break; + } else if (s[i] == 't' || s[i] == 'T') { + result = result << 40; + break; + } else { + break; + } + } + + return result; +} + + +float +Experiment::parse_real( const char* s ) +{ + float result = 0; + bool decimal = false; + float power = 1; + + int len = strlen( s ); + for (int i=0; i < len; i++) { + if ( '0' <= s[i] && s[i] <= '9' ) { + if (! decimal) { + result = result * 10 + s[i] - '0'; + } else { + power = power / 10; + result = result + (s[i] - '0') * power; + } + } else if ( '.' == s[i] ) { + decimal = true; + } else { + break; + } + } + + return result; +} + +void +Experiment::alloc_local() +{ + for (int i=0; i < this->num_threads; i++) { + this->thread_domain[i] = i % this->num_numa_domains; + for (int j=0; j < this->chains_per_thread; j++) { + this->chain_domain[i][j] = this->thread_domain[i]; + } + } +} + +void +Experiment::alloc_xor() +{ + for (int i=0; i < this->num_threads; i++) { + this->thread_domain[i] = i % this->num_numa_domains; + for (int j=0; j < this->chains_per_thread; j++) { + this->chain_domain[i][j] = (this->thread_domain[i] ^ this->offset_or_mask) % this->num_numa_domains; + } + } +} + +void +Experiment::alloc_add() +{ + for (int i=0; i < this->num_threads; i++) { + this->thread_domain[i] = i % this->num_numa_domains; + for (int j=0; j < this->chains_per_thread; j++) { + this->chain_domain[i][j] = (this->thread_domain[i] + this->offset_or_mask) % this->num_numa_domains; + } + } +} + + // DOES NOT HANDLE ILL-FORMED SPECIFICATIONS +void +Experiment::alloc_map() +{ + // STRICT -- fail if specifications are inconsistent + + // maps look like "t1:c11,c12,...,c1m;t2:c21,...,c2m;...;tn:cn1,...,cnm" + // where t[i] is the thread domain of the ith thread, + // and c[i][j] is the chain domain of the jth chain in the ith thread + + // count the thread descriptors by counting ";" up to EOS + int threads = 1; + char *p = this->placement_map; + while (*p != '\0') { + if (*p == ';') threads += 1; + p++; + } + int thread_domain[ threads ]; + + // count the chain descriptors by counting "," up to ";" or EOS + int chains = 1; + p = this->placement_map; + while (*p != '\0') { + if (*p == ';') break; + if (*p == ',') chains += 1; + p++; + } + int chain_domain [ threads ][ chains ]; + + int t=0, c=0; + p = this->placement_map; + while (*p != '\0') { + // everything up to ":" is the thread domain + int i = 0; + char buf[64]; + while (*p != '\0') { + if (*p == ':') { p++; break; } + buf[i] = *p; + i++; + p++; + } + buf[i] = '\0'; + thread_domain[t] = Experiment::parse_number(buf); + + // search for one or several ',' + c = 0; + while (*p != '\0' && *p != ';') { + if (chains <= c || threads <= t) { + // error in the thread/chain specification + fprintf(stderr, "Malformed map.\n"); + exit(1); + } + int i = 0; + while (*p != '\0' && *p != ';') { + if (*p == ',') { p++; break; } + buf[i] = *p; + i++; + p++; + } + buf[i] = '\0'; + chain_domain[t][c] = Experiment::parse_number(buf); + c++; + } + + if (*p == '\0') break; + if (*p == ';') p++; + t++; + } + + + this->num_threads = threads; + this->chains_per_thread = chains; + + this->thread_domain = new int32 [ this->num_threads ]; + this->chain_domain = new int32*[ this->num_threads ]; + this->random_state = new char* [ this->num_threads ]; + + for (int i=0; i < this->num_threads; i++) { + this->thread_domain[i] = thread_domain[i] % this->num_numa_domains; + + const int state_size = 256; + this->random_state[i] = new char[state_size]; + initstate((unsigned int) i, (char *) this->random_state[i], (size_t) state_size); + + this->chain_domain[i] = new int32 [ this->chains_per_thread ]; + for (int j=0; j < this->chains_per_thread; j++) { + this->chain_domain[i][j] = chain_domain[i][j] % this->num_numa_domains; + } + } + + this->bytes_per_thread = this->bytes_per_chain * this->chains_per_thread; + this->bytes_per_test = this->bytes_per_thread * this->num_threads; +} + +#include "Chain.h" + +void +Experiment::print() +{ + printf("strict = %d\n", strict); + printf("pointer_size = %d\n", pointer_size); + printf("sizeof(Chain) = %d\n", sizeof(Chain)); + printf("sizeof(Chain *) = %d\n", sizeof(Chain *)); + printf("bytes_per_line = %d\n", bytes_per_line); + printf("links_per_line = %d\n", links_per_line); + printf("bytes_per_page = %d\n", bytes_per_page); + printf("lines_per_page = %d\n", lines_per_page); + printf("links_per_page = %d\n", links_per_page); + printf("bytes_per_chain = %d\n", bytes_per_chain); + printf("lines_per_chain = %d\n", lines_per_chain); + printf("links_per_chain = %d\n", links_per_chain); + printf("pages_per_chain = %d\n", pages_per_chain); + printf("chains_per_thread = %d\n", chains_per_thread); + printf("bytes_per_thread = %d\n", bytes_per_thread); + printf("num_threads = %d\n", num_threads); + printf("bytes_per_test = %d\n", bytes_per_test); + printf("iterations = %d\n", iterations); + printf("experiments = %d\n", experiments); + printf("access_pattern = %d\n", access_pattern); + printf("stride = %d\n", stride); + printf("output_mode = %d\n", output_mode); + printf("numa_placement = %d\n", numa_placement); + printf("offset_or_mask = %d\n", offset_or_mask); + printf("numa_max_domain = %d\n", numa_max_domain); + printf("num_numa_domains = %d\n", num_numa_domains); + + for (int i=0; i < this->num_threads; i++) { + printf("%d: ", this->thread_domain[i]); + for (int j=0; j < this->chains_per_thread; j++) { + printf("%d,", this->chain_domain[i][j]); + } + printf("\n"); + } + + fflush(stdout); +} + +const char* +Experiment::access() +{ + const char* result = NULL; + + if (this->access_pattern == RANDOM) { + result = "random"; + } else if (this->access_pattern == STRIDED && 0 < this->stride) { + result = "forward"; + } else if (this->access_pattern == STRIDED && this->stride < 0) { + result = "reverse"; + } else if (this->access_pattern == STREAM) { + result = "stream"; + } + + return result; +} + +const char* +Experiment::placement() +{ + const char* result = NULL; + + if (this->numa_placement == LOCAL) { + result = "local"; + } else if (this->numa_placement == XOR) { + result = "xor"; + } else if (this->numa_placement == ADD) { + result = "add"; + } else if (this->numa_placement == MAP) { + result = "map"; + } + + return result; +} diff --git a/src/Experiment.h b/src/Experiment.h new file mode 100644 index 0000000..2c749d3 --- /dev/null +++ b/src/Experiment.h @@ -0,0 +1,103 @@ +/******************************************************************************* + * Copyright (c) 2006 International Business Machines Corporation. * + * All rights reserved. This program and the accompanying materials * + * are made available under the terms of the Common Public License v1.0 * + * which accompanies this distribution, and is available at * + * http://www.opensource.org/licenses/cpl1.0.php * + * * + * Contributors: * + * Douglas M. Pase - initial API and implementation * + *******************************************************************************/ + + +#if !defined(Experiment_h) +#define Experiment_h + +#include "Chain.h" +#include "Types.h" + +class Experiment { +public: + Experiment(); + ~Experiment(); + + int parse_args(int argc, char* argv[]); + int64 parse_number( const char* s ); + float parse_real( const char* s ); + + const char* placement(); + const char* access(); + + // fundamental parameters + int64 pointer_size; // number of bytes in a pointer + int64 bytes_per_line; // working set cache line size (bytes) + int64 links_per_line; // working set cache line size (links) + int64 bytes_per_page; // working set page size (in bytes) + int64 lines_per_page; // working set page size (in lines) + int64 links_per_page; // working set page size (in links) + int64 bytes_per_chain; // working set chain size (bytes) + int64 lines_per_chain; // working set chain size (lines) + int64 links_per_chain; // working set chain size (links) + int64 pages_per_chain; // working set chain size (pages) + int64 bytes_per_thread; // thread working set size (bytes) + int64 chains_per_thread; // memory loading per thread + int64 num_threads; // number of threads in the experiment + int64 bytes_per_test; // test working set size (bytes) + + float seconds; // number of seconds per experiment + int64 iterations; // number of iterations per experiment + int64 experiments; // number of experiments per test + + enum { CSV, BOTH, HEADER, TABLE } + output_mode; // results output mode + + enum { RANDOM, STRIDED, STREAM } + access_pattern; // memory access pattern + int64 stride; + + enum { LOCAL, XOR, ADD, MAP } + numa_placement; // memory allocation mode + int64 offset_or_mask; + char* placement_map; + + // maps threads and chains to numa domains + int32* thread_domain; // thread_domain[thread] + int32** chain_domain; // chain_domain[thread][chain] + int32 numa_max_domain; // highest numa domain id + int32 num_numa_domains; // number of numa domains + + char** random_state; // random state for each thread + + int strict; // strictly adhere to user input, or fail + + const static int32 DEFAULT_POINTER_SIZE = sizeof(Chain); + const static int32 DEFAULT_BYTES_PER_LINE = 64; + const static int32 DEFAULT_LINKS_PER_LINE = DEFAULT_BYTES_PER_LINE / DEFAULT_POINTER_SIZE; + const static int32 DEFAULT_BYTES_PER_PAGE = 4096; + const static int32 DEFAULT_LINES_PER_PAGE = DEFAULT_BYTES_PER_PAGE / DEFAULT_BYTES_PER_LINE; + const static int32 DEFAULT_LINKS_PER_PAGE = DEFAULT_LINES_PER_PAGE * DEFAULT_LINKS_PER_LINE; + const static int32 DEFAULT_PAGES_PER_CHAIN = 4096; + const static int32 DEFAULT_BYTES_PER_CHAIN = DEFAULT_BYTES_PER_PAGE * DEFAULT_PAGES_PER_CHAIN; + const static int32 DEFAULT_LINES_PER_CHAIN = DEFAULT_LINES_PER_PAGE * DEFAULT_PAGES_PER_CHAIN; + const static int32 DEFAULT_LINKS_PER_CHAIN = DEFAULT_LINES_PER_CHAIN * DEFAULT_BYTES_PER_LINE / DEFAULT_POINTER_SIZE; + const static int32 DEFAULT_CHAINS_PER_THREAD = 1; + const static int32 DEFAULT_BYTES_PER_THREAD = DEFAULT_BYTES_PER_CHAIN * DEFAULT_CHAINS_PER_THREAD; + const static int32 DEFAULT_THREADS = 1; + const static int32 DEFAULT_BYTES_PER_TEST = DEFAULT_BYTES_PER_THREAD * DEFAULT_THREADS; + const static int32 DEFAULT_SECONDS = 1; + const static int32 DEFAULT_ITERATIONS = 0; + const static int32 DEFAULT_EXPERIMENTS = 1; + + const static int32 DEFAULT_OUTPUT_MODE = 1; + + void alloc_local(); + void alloc_xor(); + void alloc_add(); + void alloc_map(); + + void print(); + +private: +}; + +#endif diff --git a/src/Lock.cpp b/src/Lock.cpp new file mode 100644 index 0000000..104dc81 --- /dev/null +++ b/src/Lock.cpp @@ -0,0 +1,45 @@ +/******************************************************************************* + * Copyright (c) 2006 International Business Machines Corporation. * + * All rights reserved. This program and the accompanying materials * + * are made available under the terms of the Common Public License v1.0 * + * which accompanies this distribution, and is available at * + * http://www.opensource.org/licenses/cpl1.0.php * + * * + * Contributors: * + * Douglas M. Pase - initial API and implementation * + *******************************************************************************/ + + +#include <stdio.h> +#include <pthread.h> + +#include "Lock.h" + +Lock::Lock() +{ + pthread_mutex_init( &(this->mutex), NULL ); +} + +Lock::~Lock() +{ + pthread_mutex_destroy( &(this->mutex) ); +} + +void +Lock::lock() +{ + pthread_mutex_lock( &(this->mutex) ); +} + +int +Lock::test() +{ + pthread_mutex_trylock( &(this->mutex) ); +} + +void +Lock::unlock() +{ + pthread_mutex_unlock( &(this->mutex) ); +} + diff --git a/src/Lock.h b/src/Lock.h new file mode 100644 index 0000000..14bf1dc --- /dev/null +++ b/src/Lock.h @@ -0,0 +1,30 @@ +/******************************************************************************* + * Copyright (c) 2006 International Business Machines Corporation. * + * All rights reserved. This program and the accompanying materials * + * are made available under the terms of the Common Public License v1.0 * + * which accompanies this distribution, and is available at * + * http://www.opensource.org/licenses/cpl1.0.php * + * * + * Contributors: * + * Douglas M. Pase - initial API and implementation * + *******************************************************************************/ + + +#if !defined(Lock_h) +#define Lock_h + +#include <pthread.h> + +class Lock { +public: + Lock(); + ~Lock(); + void lock(); + int test(); + void unlock(); + +private: + pthread_mutex_t mutex; +}; + +#endif diff --git a/src/Main.c b/src/Main.c new file mode 100644 index 0000000..5d5f243 --- /dev/null +++ b/src/Main.c @@ -0,0 +1,90 @@ +/******************************************************************************* + * Copyright (c) 2006 International Business Machines Corporation. * + * All rights reserved. This program and the accompanying materials * + * are made available under the terms of the Common Public License v1.0 * + * which accompanies this distribution, and is available at * + * http://www.opensource.org/licenses/cpl1.0.php * + * * + * Contributors: * + * Douglas M. Pase - initial API and implementation * + *******************************************************************************/ + + +#include <stdio.h> + +#include "Main.h" + +#include "Run.h" +#include "Timer.h" +#include "Types.h" +#include "Output.h" +#include "Experiment.h" +#include "SpinBarrier.h" + + // This program allocates and accesses + // a number of blocks of memory, one or more + // for each thread that executes. Blocks + // are divided into sub-blocks called + // pages, and pages are divided into + // sub-blocks called cache lines. + // + // All pages are collected into a list. + // Pages are selected for the list in + // a particular order. Each cache line + // within the page is similarly gathered + // into a list in a particular order. + // In both cases the order may be random + // or linear. + // + // A root pointer points to the first + // cache line. A pointer in the cache + // line points to the next cache line, + // which contains a pointer to the cache + // line after that, and so on. This + // forms a pointer chain that touches all + // cache lines within the first page, + // then all cache lines within the second + // page, and so on until all pages are + // covered. The last pointer contains + // NULL, terminating the chain. + // + // Depending on compile-time options, + // pointers may be 32-bit or 64-bit + // pointers. + +int verbose = 0; + +int +main( int argc, char* argv[] ) +{ + Timer::calibrate(10000); + double clk_res = Timer::resolution(); + + Experiment e; + if (e.parse_args(argc, argv)) { + return 0; + } + +#if defined(UNDEFINED) + e.print(); + if (argv != NULL) return 0; +#endif + + SpinBarrier sb( e.num_threads ); + Run r[ e.num_threads ]; + for (int i=0; i < e.num_threads; i++) { + r[i].set( e, &sb ); + r[i].start(); + } + + for (int i=0; i < e.num_threads; i++) { + r[i].wait(); + } + + int64 ops = Run::ops_per_chain(); + double secs = Run::seconds(); + + Output::print(e, ops, secs, clk_res); + + return 0; +} diff --git a/src/Main.cpp b/src/Main.cpp new file mode 100644 index 0000000..ebd276a --- /dev/null +++ b/src/Main.cpp @@ -0,0 +1,89 @@ +/******************************************************************************* + * Copyright (c) 2006 International Business Machines Corporation. * + * All rights reserved. This program and the accompanying materials * + * are made available under the terms of the Common Public License v1.0 * + * which accompanies this distribution, and is available at * + * http://www.opensource.org/licenses/cpl1.0.php * + * * + * Contributors: * + * Douglas M. Pase - initial API and implementation * + *******************************************************************************/ + + +#include <stdio.h> + +#include "Main.h" + +#include "Run.h" +#include "Timer.h" +#include "Types.h" +#include "Output.h" +#include "Experiment.h" + + // This program allocates and accesses + // a number of blocks of memory, one or more + // for each thread that executes. Blocks + // are divided into sub-blocks called + // pages, and pages are divided into + // sub-blocks called cache lines. + // + // All pages are collected into a list. + // Pages are selected for the list in + // a particular order. Each cache line + // within the page is similarly gathered + // into a list in a particular order. + // In both cases the order may be random + // or linear. + // + // A root pointer points to the first + // cache line. A pointer in the cache + // line points to the next cache line, + // which contains a pointer to the cache + // line after that, and so on. This + // forms a pointer chain that touches all + // cache lines within the first page, + // then all cache lines within the second + // page, and so on until all pages are + // covered. The last pointer contains + // NULL, terminating the chain. + // + // Depending on compile-time options, + // pointers may be 32-bit or 64-bit + // pointers. + +int verbose = 0; + +int +main( int argc, char* argv[] ) +{ + Timer::calibrate(10000); + double clk_res = Timer::resolution(); + + Experiment e; + if (e.parse_args(argc, argv)) { + return 0; + } + +#if defined(UNDEFINED) + e.print(); + if (argv != NULL) return 0; +#endif + + SpinBarrier sb( e.num_threads ); + Run r[ e.num_threads ]; + for (int i=0; i < e.num_threads; i++) { + r[i].set( e, &sb ); + r[i].start(); + } + + for (int i=0; i < e.num_threads; i++) { + r[i].wait(); + } + + int64 ops = Run::ops_per_chain(); + double secs = Run::seconds(); + + Output::print(e, ops, secs, clk_res); + + return 0; +} diff --git a/src/Main.h b/src/Main.h new file mode 100644 index 0000000..1492291 --- /dev/null +++ b/src/Main.h @@ -0,0 +1,18 @@ +/******************************************************************************* + * Copyright (c) 2006 International Business Machines Corporation. * + * All rights reserved. This program and the accompanying materials * + * are made available under the terms of the Common Public License v1.0 * + * which accompanies this distribution, and is available at * + * http://www.opensource.org/licenses/cpl1.0.php * + * * + * Contributors: * + * Douglas M. Pase - initial API and implementation * + *******************************************************************************/ + + +#if !defined(Main_h) +#define Main_h + +extern int verbose; + +#endif diff --git a/src/Output.cpp b/src/Output.cpp new file mode 100644 index 0000000..9f9c09a --- /dev/null +++ b/src/Output.cpp @@ -0,0 +1,153 @@ +/******************************************************************************* + * Copyright (c) 2006 International Business Machines Corporation. * + * All rights reserved. This program and the accompanying materials * + * are made available under the terms of the Common Public License v1.0 * + * which accompanies this distribution, and is available at * + * http://www.opensource.org/licenses/cpl1.0.php * + * * + * Contributors: * + * Douglas M. Pase - initial API and implementation * + *******************************************************************************/ + + +#include <stdio.h> +#include <stdlib.h> +#include <string.h> + +#include "Output.h" + +#include "Types.h" +#include "Experiment.h" + + +void +Output::print( Experiment &e, int64 ops, double secs, double ck_res ) +{ + if (e.output_mode == Experiment::CSV) { + Output::csv(e, ops, secs, ck_res); + } else if (e.output_mode == Experiment::BOTH) { + Output::header(e, ops, secs, ck_res); + Output::csv(e, ops, secs, ck_res); + } else if (e.output_mode == Experiment::HEADER) { + Output::header(e, ops, secs, ck_res); + } else { + Output::table(e, ops, secs, ck_res); + } +} + +void +Output::header( Experiment &e, int64 ops, double secs, double ck_res ) +{ + printf("pointer size (bytes),"); + printf("cache line size (bytes),"); + printf("page size (bytes),"); + printf("chain size (bytes),"); + printf("thread size (bytes),"); + printf("test size (bytes),"); + printf("chains per thread,"); + printf("number of threads,"); + printf("iterations,"); + printf("experiments,"); + printf("access pattern,"); + printf("stride,"); + printf("numa placement,"); + printf("offset or mask,"); + printf("numa domains,"); + printf("domain map,"); + printf("operations per chain,"); + printf("total operations,"); + printf("elapsed time (seconds),"); + printf("elapsed time (timer ticks),"); + printf("clock resolution (ns),", ck_res * 1E9); + printf("memory latency (ns),"); + printf("memory bandwidth (MB/s)\n"); + + fflush(stdout); +} + +void +Output::csv( Experiment &e, int64 ops, double secs, double ck_res ) +{ + printf("%ld,", e.pointer_size); + printf("%ld,", e.bytes_per_line); + printf("%ld,", e.bytes_per_page); + printf("%ld,", e.bytes_per_chain); + printf("%ld,", e.bytes_per_thread); + printf("%ld,", e.bytes_per_test); + printf("%lld,", e.chains_per_thread); + printf("%ld,", e.num_threads); + printf("%ld,", e.iterations); + printf("%ld,", e.experiments); + printf("%s,", e.access()); + printf("%ld,", e.stride); + printf("%s,", e.placement()); + printf("%ld,", e.offset_or_mask); + printf("%ld,", e.num_numa_domains); + printf("\""); + printf("%d:", e.thread_domain[0]); + printf("%d", e.chain_domain[0][0]); + for (int j=1; j < e.chains_per_thread; j++) { + printf(",%d", e.chain_domain[0][j]); + } + for (int i=1; i < e.num_threads; i++) { + printf(";%d:", e.thread_domain[i]); + printf("%d", e.chain_domain[i][0]); + for (int j=1; j < e.chains_per_thread; j++) { + printf(",%d", e.chain_domain[i][j]); + } + } + printf("\","); + printf("%ld,", ops); + printf("%ld,", ops * e.chains_per_thread * e.num_threads); + printf("%.3f,", secs); + printf("%.0f,", secs/ck_res); + printf("%.2f,", ck_res * 1E9); + printf("%.2f,", (secs / (ops * e.iterations)) * 1E9); + printf("%.3f\n", ((ops * e.iterations * e.chains_per_thread * e.num_threads * e.bytes_per_line) / secs) * 1E-6); + + fflush(stdout); +} + +void +Output::table( Experiment &e, int64 ops, double secs, double ck_res ) +{ + printf("pointer size = %ld (bytes)\n", e.pointer_size); + printf("cache line size = %ld (bytes)\n", e.bytes_per_line); + printf("page size = %ld (bytes)\n", e.bytes_per_page); + printf("chain size = %ld (bytes)\n", e.bytes_per_chain); + printf("thread size = %ld (bytes)\n", e.bytes_per_thread); + printf("test size = %ld (bytes)\n", e.bytes_per_test); + printf("chains per thread = %ld\n", e.chains_per_thread); + printf("number of threads = %ld\n", e.num_threads); + printf("iterations = %ld\n", e.iterations); + printf("experiments = %ld\n", e.experiments); + printf("access pattern = %s\n", e.access()); + printf("stride = %ld\n", e.stride); + printf("numa placement = %s\n", e.placement()); + printf("offset or mask = %ld\n", e.offset_or_mask); + printf("numa domains = %ld\n", e.num_numa_domains); + printf("domain map = "); + printf("\""); + printf("%d:", e.thread_domain[0]); + printf("%d", e.chain_domain[0][0]); + for (int j=1; j < e.chains_per_thread; j++) { + printf(",%d", e.chain_domain[0][j]); + } + for (int i=1; i < e.num_threads; i++) { + printf(";%d:", e.thread_domain[i]); + printf("%d", e.chain_domain[i][0]); + for (int j=1; j < e.chains_per_thread; j++) { + printf(",%d", e.chain_domain[i][j]); + } + } + printf("\"\n"); + printf("operations per chain = %ld\n", ops); + printf("total operations = %ld\n", ops * e.chains_per_thread * e.num_threads); + printf("elapsed time = %.3f (seconds)\n", secs); + printf("elapsed time = %.0f (timer ticks)\n", secs/ck_res); + printf("clock resolution = %.2f (ns)\n", ck_res * 1E9); + printf("memory latency = %.2f (ns)\n", (secs / (ops * e.iterations)) * 1E9); + printf("memory bandwidth = %.3f (MB/s)\n", ((ops * e.iterations * e.chains_per_thread * e.num_threads * e.bytes_per_line) / secs) * 1E-6); + + fflush(stdout); +} diff --git a/src/Output.h b/src/Output.h new file mode 100644 index 0000000..9ee2c80 --- /dev/null +++ b/src/Output.h @@ -0,0 +1,28 @@ +/******************************************************************************* + * Copyright (c) 2006 International Business Machines Corporation. * + * All rights reserved. This program and the accompanying materials * + * are made available under the terms of the Common Public License v1.0 * + * which accompanies this distribution, and is available at * + * http://www.opensource.org/licenses/cpl1.0.php * + * * + * Contributors: * + * Douglas M. Pase - initial API and implementation * + *******************************************************************************/ + + +#if !defined(Output_h) +#define Output_h + +#include "Types.h" +#include "Experiment.h" + +class Output { +public: + static void print ( Experiment &e, int64 ops, double secs, double ck_res ); + static void header( Experiment &e, int64 ops, double secs, double ck_res ); + static void csv ( Experiment &e, int64 ops, double secs, double ck_res ); + static void table ( Experiment &e, int64 ops, double secs, double ck_res ); +private: +}; + +#endif diff --git a/src/Run.cpp b/src/Run.cpp new file mode 100644 index 0000000..4fb8057 --- /dev/null +++ b/src/Run.cpp @@ -0,0 +1,1206 @@ +/******************************************************************************* + * Copyright (c) 2006 International Business Machines Corporation. * + * All rights reserved. This program and the accompanying materials * + * are made available under the terms of the Common Public License v1.0 * + * which accompanies this distribution, and is available at * + * http://www.opensource.org/licenses/cpl1.0.php * + * * + * Contributors: * + * Douglas M. Pase - initial API and implementation * + *******************************************************************************/ + + +#include <stdio.h> +#include <stdlib.h> +#include <unistd.h> + +#if defined(NUMA) +#include <numa.h> +#endif + +#include "Run.h" + +#include "Chain.h" +#include "Timer.h" +#include "SpinBarrier.h" + + +static double max( double v1, double v2 ); +static double min( double v1, double v2 ); +static void chase_pointers(int64 chains_per_thread, int64 iterations, Chain** root, int64 bytes_per_line, int64 bytes_per_chain, int64 stride); +static void follow_streams(int64 chains_per_thread, int64 iterations, Chain** root, int64 bytes_per_line, int64 bytes_per_chain, int64 stride); +static void (*run_benchmark)(int64 chains_per_thread, int64 iterations, Chain** root, int64 bytes_per_line, int64 bytes_per_chain, int64 stride) = chase_pointers; + +Lock Run::global_mutex; +int64 Run::_ops_per_chain = 0; +double Run::_seconds = 1E9; + +Run::Run() +: exp(NULL), bp(NULL) +{ +} + +Run::~Run() +{ +} + +void +Run::set( Experiment &e, SpinBarrier* sbp ) +{ + this->exp = &e; + this->bp = sbp; +} + +int +Run::run() +{ + // first allocate all memory for the chains, + // making sure it is allocated within the + // intended numa domains + Chain** chain_memory = new Chain* [ this->exp->chains_per_thread ]; + Chain** root = new Chain* [ this->exp->chains_per_thread ]; + +#if defined(NUMA) + // establish the node id where this thread + // will run. threads are mapped to nodes + // by the set-up code for Experiment. + int run_node_id = this->exp->thread_domain[this->thread_id()]; + numa_run_on_node(run_node_id); + + // establish the node id where this thread's + // memory will be allocated. + for (int i=0; i < this->exp->chains_per_thread; i++) { + int alloc_node_id = this->exp->chain_domain[this->thread_id()][i]; + nodemask_t alloc_mask; + nodemask_zero(&alloc_mask); + nodemask_set(&alloc_mask, alloc_node_id); + numa_set_membind(&alloc_mask); + + chain_memory[i] = new Chain[ this->exp->links_per_chain ]; + } +#else + for (int i=0; i < this->exp->chains_per_thread; i++) { + chain_memory[i] = new Chain[ this->exp->links_per_chain ]; + } +#endif + + // initialize the chains and + // select the function that + // will execute the tests + for (int i=0; i < this->exp->chains_per_thread; i++) { + if (this->exp->access_pattern == Experiment::RANDOM) { + root[i] = random_mem_init( chain_memory[i] ); + run_benchmark = chase_pointers; + } else if (this->exp->access_pattern == Experiment::STRIDED) { + if (0 < this->exp->stride) { + root[i] = forward_mem_init( chain_memory[i] ); + } else { + root[i] = reverse_mem_init( chain_memory[i] ); + } + run_benchmark = chase_pointers; + } else if (this->exp->access_pattern == Experiment::STREAM) { + root[i] = stream_mem_init( chain_memory[i] ); + run_benchmark = follow_streams; + } + } + + if (this->exp->iterations <= 0) { + volatile static double istart = 0; + volatile static double istop = 0; + volatile static double elapsed = 0; + volatile static int64 iters = 1; + volatile double bound = max(0.2, 10 * Timer::resolution()); + for (iters=1; elapsed <= bound; iters=iters<<1) { + this->bp->barrier(); + + // start timer + if (this->thread_id() == 0) { + istart = Timer::seconds(); + } + this->bp->barrier(); + + // chase pointers + run_benchmark(this->exp->chains_per_thread, iters, root, this->exp->bytes_per_line, this->exp->bytes_per_chain, this->exp->stride); + + // barrier + this->bp->barrier(); + + // stop timer + if (this->thread_id() == 0) { + istop = Timer::seconds(); + elapsed = istop - istart; + } + this->bp->barrier(); + } + + // calculate the number of iterations + if (this->thread_id() == 0) { + if (0 < this->exp->seconds) { + this->exp->iterations = max(1, 0.9999 + 0.5 * this->exp->seconds * iters / elapsed); + } else { + this->exp->iterations = max(1, 0.9999 + iters / elapsed); + } + } + this->bp->barrier(); + } +#if defined(UNDEFINED) +#endif + + // barrier + for (int e=0; e < this->exp->experiments; e++) { + this->bp->barrier(); + + // start timer + double start = 0; + if (this->thread_id() == 0) start = Timer::seconds(); + this->bp->barrier(); + + // chase pointers + run_benchmark(this->exp->chains_per_thread, this->exp->iterations, root, this->exp->bytes_per_line, this->exp->bytes_per_chain, this->exp->stride); + + // barrier + this->bp->barrier(); + + // stop timer + double stop = 0; + if (this->thread_id() == 0) stop = Timer::seconds(); + this->bp->barrier(); + + if (0 <= e) { + if (this->thread_id() == 0) { + double delta = stop - start; + if (0 < delta) { + Run::_seconds = min( Run::_seconds, delta ); + } + } + } + } + + this->bp->barrier(); + + for (int i=0; i < this->exp->chains_per_thread; i++) { + if (chain_memory[i] != NULL) delete [] chain_memory[i]; + } + if (chain_memory != NULL) delete [] chain_memory; + + return 0; +} + +int dummy = 0; +void +Run::mem_check( Chain *m ) +{ + if (m == NULL) dummy += 1; +} + +static double +max( double v1, double v2 ) +{ + if (v1 < v2) return v2; + return v1; +} + +static double +min( double v1, double v2 ) +{ + if (v2 < v1) return v2; + return v1; +} + + // exclude 2 and mersienne primes, i.e., + // primes of the form 2**n - 1, e.g., + // 3, 7, 31, 127 +static const int prime_table[] = { 5, 11, 13, 17, 19, 23, 37, 41, 43, 47, + 53, 61, 71, 73, 79, 83, 89, 97, 101, 103, 109, 113, 131, 137, 139, 149, + 151, 157, 163, }; +static const int prime_table_size = sizeof prime_table / sizeof prime_table[0]; + +Chain* +Run::random_mem_init( Chain *mem ) +{ + // initialize pointers -- + // choose a page at random, then use + // one pointer from each cache line + // within the page. all pages and + // cache lines are chosen at random. + Chain* root = NULL; + Chain* prev = NULL; + int link_within_line = 0; + int64 local_ops_per_chain = 0; + + // we must set a lock because random() + // is not thread safe + Run::global_mutex.lock(); + setstate(this->exp->random_state[this->thread_id()]); + int page_factor = prime_table[ random() % prime_table_size ]; + int page_offset = random() % this->exp->pages_per_chain; + Run::global_mutex.unlock(); + + // loop through the pages + for (int i=0; i < this->exp->pages_per_chain; i++) { + int page = (page_factor * i + page_offset) % this->exp->pages_per_chain; + Run::global_mutex.lock(); + setstate(this->exp->random_state[this->thread_id()]); + int line_factor = prime_table[ random() % prime_table_size ]; + int line_offset = random() % this->exp->lines_per_page; + Run::global_mutex.unlock(); + + // loop through the lines within a page + for (int j=0; j < this->exp->lines_per_page; j++) { + int line_within_page = (line_factor * j + line_offset) % this->exp->lines_per_page; + int link = page * this->exp->links_per_page + line_within_page * this->exp->links_per_line + link_within_line; + + if (root == NULL) { +// printf("root = %d(%d)[0x%x].\n", page, line_within_page, mem+link); + prev = root = mem + link; + local_ops_per_chain += 1; + } else { +// printf("0x%x = %d(%d)[0x%x].\n", prev, page, line_within_page, mem+link); + prev->next = mem + link; + prev = prev->next; + local_ops_per_chain += 1; + } + } + } + + Run::global_mutex.lock(); + Run::_ops_per_chain = local_ops_per_chain; + Run::global_mutex.unlock(); + + return root; +} + +Chain* +Run::forward_mem_init( Chain *mem ) +{ + Chain* root = NULL; + Chain* prev = NULL; + int link_within_line = 0; + int64 local_ops_per_chain = 0; + + for (int i=0; i < this->exp->lines_per_chain; i += this->exp->stride) { + int link = i * this->exp->links_per_line + link_within_line; + if (root == NULL) { +// printf("root = %d(%d)[0x%x].\n", page, line_within_page, mem+link); + prev = root = mem + link; + local_ops_per_chain += 1; + } else { +// printf("0x%x = %d(%d)[0x%x].\n", prev, page, line_within_page, mem+link); + prev->next = mem + link; + prev = prev->next; + local_ops_per_chain += 1; + } + } + + Run::global_mutex.lock(); + Run::_ops_per_chain = local_ops_per_chain; + Run::global_mutex.unlock(); + + return root; +} + +Chain* +Run::reverse_mem_init( Chain *mem ) +{ + Chain* root = NULL; + Chain* prev = NULL; + int link_within_line = 0; + int64 local_ops_per_chain = 0; + + int stride = -this->exp->stride; + int last; + for (int i=0; i < this->exp->lines_per_chain; i += stride) { + last = i; + } + + for (int i=last; 0 <= i; i -= stride) { + int link = i * this->exp->links_per_line + link_within_line; + if (root == NULL) { +// printf("root = %d(%d)[0x%x].\n", page, line_within_page, mem+link); + prev = root = mem + link; + local_ops_per_chain += 1; + } else { +// printf("0x%x = %d(%d)[0x%x].\n", prev, page, line_within_page, mem+link); + prev->next = mem + link; + prev = prev->next; + local_ops_per_chain += 1; + } + } + + Run::global_mutex.lock(); + Run::_ops_per_chain = local_ops_per_chain; + Run::global_mutex.unlock(); + + return root; +} + +static int64 dumb_ck = 0; +void +mem_chk( Chain *m ) +{ + if (m == NULL) dumb_ck += 1; +} + +static void +chase_pointers( + int64 chains_per_thread, // memory loading per thread + int64 iterations, // number of iterations per experiment + Chain** root, // root(s) of the chain(s) to follow + int64 bytes_per_line, // ignored + int64 bytes_per_chain, // ignored + int64 stride // ignored +) +{ + // chase pointers + switch (chains_per_thread) { + default: + case 1: + for (int64 i=0; i < iterations; i++) { + Chain* a = root[0]; + while (a != NULL) { + a = a->next; + } + mem_chk( a ); + } + break; + case 2: + for (int64 i=0; i < iterations; i++) { + Chain* a = root[0]; + Chain* b = root[1]; + while (a != NULL) { + a = a->next; + b = b->next; + } + mem_chk( a ); + mem_chk( b ); + } + break; + case 3: + for (int64 i=0; i < iterations; i++) { + Chain* a = root[0]; + Chain* b = root[1]; + Chain* c = root[2]; + while (a != NULL) { + a = a->next; + b = b->next; + c = c->next; + } + mem_chk( a ); + mem_chk( b ); + mem_chk( c ); + } + break; + case 4: + for (int64 i=0; i < iterations; i++) { + Chain* a = root[0]; + Chain* b = root[1]; + Chain* c = root[2]; + Chain* d = root[3]; + while (a != NULL) { + a = a->next; + b = b->next; + c = c->next; + d = d->next; + } + mem_chk( a ); + mem_chk( b ); + mem_chk( c ); + mem_chk( d ); + } + break; + case 5: + for (int64 i=0; i < iterations; i++) { + Chain* a = root[0]; + Chain* b = root[1]; + Chain* c = root[2]; + Chain* d = root[3]; + Chain* e = root[4]; + while (a != NULL) { + a = a->next; + b = b->next; + c = c->next; + d = d->next; + e = e->next; + } + mem_chk( a ); + mem_chk( b ); + mem_chk( c ); + mem_chk( d ); + mem_chk( e ); + } + break; + case 6: + for (int64 i=0; i < iterations; i++) { + Chain* a = root[0]; + Chain* b = root[1]; + Chain* c = root[2]; + Chain* d = root[3]; + Chain* e = root[4]; + Chain* f = root[5]; + while (a != NULL) { + a = a->next; + b = b->next; + c = c->next; + d = d->next; + e = e->next; + f = f->next; + } + mem_chk( a ); + mem_chk( b ); + mem_chk( c ); + mem_chk( d ); + mem_chk( e ); + mem_chk( f ); + } + break; + case 7: + for (int64 i=0; i < iterations; i++) { + Chain* a = root[0]; + Chain* b = root[1]; + Chain* c = root[2]; + Chain* d = root[3]; + Chain* e = root[4]; + Chain* f = root[5]; + Chain* g = root[6]; + while (a != NULL) { + a = a->next; + b = b->next; + c = c->next; + d = d->next; + e = e->next; + f = f->next; + g = g->next; + } + mem_chk( a ); + mem_chk( b ); + mem_chk( c ); + mem_chk( d ); + mem_chk( e ); + mem_chk( f ); + mem_chk( g ); + } + break; + case 8: + for (int64 i=0; i < iterations; i++) { + Chain* a = root[0]; + Chain* b = root[1]; + Chain* c = root[2]; + Chain* d = root[3]; + Chain* e = root[4]; + Chain* f = root[5]; + Chain* g = root[6]; + Chain* h = root[7]; + while (a != NULL) { + a = a->next; + b = b->next; + c = c->next; + d = d->next; + e = e->next; + f = f->next; + g = g->next; + h = h->next; + } + mem_chk( a ); + mem_chk( b ); + mem_chk( c ); + mem_chk( d ); + mem_chk( e ); + mem_chk( f ); + mem_chk( g ); + mem_chk( h ); + } + break; + case 9: + for (int64 i=0; i < iterations; i++) { + Chain* a = root[0]; + Chain* b = root[1]; + Chain* c = root[2]; + Chain* d = root[3]; + Chain* e = root[4]; + Chain* f = root[5]; + Chain* g = root[6]; + Chain* h = root[7]; + Chain* j = root[8]; + while (a != NULL) { + a = a->next; + b = b->next; + c = c->next; + d = d->next; + e = e->next; + f = f->next; + g = g->next; + h = h->next; + j = j->next; + } + mem_chk( a ); + mem_chk( b ); + mem_chk( c ); + mem_chk( d ); + mem_chk( e ); + mem_chk( f ); + mem_chk( g ); + mem_chk( h ); + mem_chk( j ); + } + break; + case 10: + for (int64 i=0; i < iterations; i++) { + Chain* a = root[0]; + Chain* b = root[1]; + Chain* c = root[2]; + Chain* d = root[3]; + Chain* e = root[4]; + Chain* f = root[5]; + Chain* g = root[6]; + Chain* h = root[7]; + Chain* j = root[8]; + Chain* k = root[9]; + while (a != NULL) { + a = a->next; + b = b->next; + c = c->next; + d = d->next; + e = e->next; + f = f->next; + g = g->next; + h = h->next; + j = j->next; + k = k->next; + } + mem_chk( a ); + mem_chk( b ); + mem_chk( c ); + mem_chk( d ); + mem_chk( e ); + mem_chk( f ); + mem_chk( g ); + mem_chk( h ); + mem_chk( j ); + mem_chk( k ); + } + break; + case 11: + for (int64 i=0; i < iterations; i++) { + Chain* a = root[0]; + Chain* b = root[1]; + Chain* c = root[2]; + Chain* d = root[3]; + Chain* e = root[4]; + Chain* f = root[5]; + Chain* g = root[6]; + Chain* h = root[7]; + Chain* j = root[8]; + Chain* k = root[9]; + Chain* l = root[10]; + while (a != NULL) { + a = a->next; + b = b->next; + c = c->next; + d = d->next; + e = e->next; + f = f->next; + g = g->next; + h = h->next; + j = j->next; + k = k->next; + l = l->next; + } + mem_chk( a ); + mem_chk( b ); + mem_chk( c ); + mem_chk( d ); + mem_chk( e ); + mem_chk( f ); + mem_chk( g ); + mem_chk( h ); + mem_chk( j ); + mem_chk( k ); + mem_chk( l ); + } + break; + case 12: + for (int64 i=0; i < iterations; i++) { + Chain* a = root[0]; + Chain* b = root[1]; + Chain* c = root[2]; + Chain* d = root[3]; + Chain* e = root[4]; + Chain* f = root[5]; + Chain* g = root[6]; + Chain* h = root[7]; + Chain* j = root[8]; + Chain* k = root[9]; + Chain* l = root[10]; + Chain* m = root[11]; + while (a != NULL) { + a = a->next; + b = b->next; + c = c->next; + d = d->next; + e = e->next; + f = f->next; + g = g->next; + h = h->next; + j = j->next; + k = k->next; + l = l->next; + m = m->next; + } + mem_chk( a ); + mem_chk( b ); + mem_chk( c ); + mem_chk( d ); + mem_chk( e ); + mem_chk( f ); + mem_chk( g ); + mem_chk( h ); + mem_chk( j ); + mem_chk( k ); + mem_chk( l ); + mem_chk( m ); + } + break; + case 13: + for (int64 i=0; i < iterations; i++) { + Chain* a = root[0]; + Chain* b = root[1]; + Chain* c = root[2]; + Chain* d = root[3]; + Chain* e = root[4]; + Chain* f = root[5]; + Chain* g = root[6]; + Chain* h = root[7]; + Chain* j = root[8]; + Chain* k = root[9]; + Chain* l = root[10]; + Chain* m = root[11]; + Chain* n = root[12]; + while (a != NULL) { + a = a->next; + b = b->next; + c = c->next; + d = d->next; + e = e->next; + f = f->next; + g = g->next; + h = h->next; + j = j->next; + k = k->next; + l = l->next; + m = m->next; + n = n->next; + } + mem_chk( a ); + mem_chk( b ); + mem_chk( c ); + mem_chk( d ); + mem_chk( e ); + mem_chk( f ); + mem_chk( g ); + mem_chk( h ); + mem_chk( j ); + mem_chk( k ); + mem_chk( l ); + mem_chk( m ); + mem_chk( n ); + } + break; + case 14: + for (int64 i=0; i < iterations; i++) { + Chain* a = root[0]; + Chain* b = root[1]; + Chain* c = root[2]; + Chain* d = root[3]; + Chain* e = root[4]; + Chain* f = root[5]; + Chain* g = root[6]; + Chain* h = root[7]; + Chain* j = root[8]; + Chain* k = root[9]; + Chain* l = root[10]; + Chain* m = root[11]; + Chain* n = root[12]; + Chain* o = root[13]; + while (a != NULL) { + a = a->next; + b = b->next; + c = c->next; + d = d->next; + e = e->next; + f = f->next; + g = g->next; + h = h->next; + j = j->next; + k = k->next; + l = l->next; + m = m->next; + n = n->next; + o = o->next; + } + mem_chk( a ); + mem_chk( b ); + mem_chk( c ); + mem_chk( d ); + mem_chk( e ); + mem_chk( f ); + mem_chk( g ); + mem_chk( h ); + mem_chk( j ); + mem_chk( k ); + mem_chk( l ); + mem_chk( m ); + mem_chk( n ); + mem_chk( o ); + } + break; + case 15: + for (int64 i=0; i < iterations; i++) { + Chain* a = root[0]; + Chain* b = root[1]; + Chain* c = root[2]; + Chain* d = root[3]; + Chain* e = root[4]; + Chain* f = root[5]; + Chain* g = root[6]; + Chain* h = root[7]; + Chain* j = root[8]; + Chain* k = root[9]; + Chain* l = root[10]; + Chain* m = root[11]; + Chain* n = root[12]; + Chain* o = root[13]; + Chain* p = root[14]; + while (a != NULL) { + a = a->next; + b = b->next; + c = c->next; + d = d->next; + e = e->next; + f = f->next; + g = g->next; + h = h->next; + j = j->next; + k = k->next; + l = l->next; + m = m->next; + n = n->next; + o = o->next; + p = p->next; + } + mem_chk( a ); + mem_chk( b ); + mem_chk( c ); + mem_chk( d ); + mem_chk( e ); + mem_chk( f ); + mem_chk( g ); + mem_chk( h ); + mem_chk( j ); + mem_chk( k ); + mem_chk( l ); + mem_chk( m ); + mem_chk( n ); + mem_chk( o ); + mem_chk( p ); + } + break; + case 16: + for (int64 i=0; i < iterations; i++) { + Chain* a = root[0]; + Chain* b = root[1]; + Chain* c = root[2]; + Chain* d = root[3]; + Chain* e = root[4]; + Chain* f = root[5]; + Chain* g = root[6]; + Chain* h = root[7]; + Chain* j = root[8]; + Chain* k = root[9]; + Chain* l = root[10]; + Chain* m = root[11]; + Chain* n = root[12]; + Chain* o = root[13]; + Chain* p = root[14]; + Chain* q = root[15]; + while (a != NULL) { + a = a->next; + b = b->next; + c = c->next; + d = d->next; + e = e->next; + f = f->next; + g = g->next; + h = h->next; + j = j->next; + k = k->next; + l = l->next; + m = m->next; + n = n->next; + o = o->next; + p = p->next; + q = q->next; + } + mem_chk( a ); + mem_chk( b ); + mem_chk( c ); + mem_chk( d ); + mem_chk( e ); + mem_chk( f ); + mem_chk( g ); + mem_chk( h ); + mem_chk( j ); + mem_chk( k ); + mem_chk( l ); + mem_chk( m ); + mem_chk( n ); + mem_chk( o ); + mem_chk( p ); + mem_chk( q ); + } + } +} + + // NOT WRITTEN YET -- DMP + // JUST A PLACE HOLDER! +Chain* +Run::stream_mem_init( Chain *mem ) +{ +// fprintf(stderr, "made it into stream_mem_init.\n"); +// fprintf(stderr, "chains_per_thread = %ld\n", this->exp->chains_per_thread); +// fprintf(stderr, "iterations = %ld\n", this->exp->iterations); +// fprintf(stderr, "bytes_per_chain = %ld\n", this->exp->bytes_per_chain); +// fprintf(stderr, "stride = %ld\n", this->exp->stride); + int64 local_ops_per_chain = 0; + double* tmp = (double *) mem; + int64 refs_per_line = this->exp->bytes_per_line / sizeof(double); + int64 refs_per_chain = this->exp->bytes_per_chain / sizeof(double); +// fprintf(stderr, "refs_per_chain = %ld\n", refs_per_chain); + + for (int64 i=0; i < refs_per_chain; i += this->exp->stride*refs_per_line) { + tmp[i] = 0; + local_ops_per_chain += 1; + } + + Run::global_mutex.lock(); + Run::_ops_per_chain = local_ops_per_chain; + Run::global_mutex.unlock(); + +// fprintf(stderr, "made it out of stream_mem_init.\n"); + return mem; +} + +static int64 summ_ck = 0; +void +sum_chk( double t ) +{ + if (t != 0) summ_ck += 1; +} + + // NOT WRITTEN YET -- DMP + // JUST A PLACE HOLDER! +static void +follow_streams( + int64 chains_per_thread, // memory loading per thread + int64 iterations, // number of iterations per experiment + Chain** root, // root(s) of the chain(s) to follow + int64 bytes_per_line, // ignored + int64 bytes_per_chain, // ignored + int64 stride // ignored +) +{ + int64 refs_per_line = bytes_per_line / sizeof(double); + int64 refs_per_chain = bytes_per_chain / sizeof(double); + + // chase pointers + switch (chains_per_thread) { + default: + case 1: + for (int64 i=0; i < iterations; i++) { + double t = 0; + double* a0 = (double *) root[0]; + for (int64 j=0; j < refs_per_chain; j+=stride*refs_per_line) { + t += a0[j]; + } + sum_chk( t ); + } + break; + case 2: + for (int64 i=0; i < iterations; i++) { + double t = 0; + double* a0 = (double *) root[0]; + double* a1 = (double *) root[1]; + for (int64 j=0; j < refs_per_chain; j+=stride*refs_per_line) { + t += a0[j] + a1[j]; + } + sum_chk( t ); + } + break; + case 3: + for (int64 i=0; i < iterations; i++) { + double t = 0; + double* a0 = (double *) root[0]; + double* a1 = (double *) root[1]; + double* a2 = (double *) root[2]; + for (int64 j=0; j < refs_per_chain; j+=stride*refs_per_line) { + t += a0[j] + a1[j] + a2[j]; + } + sum_chk( t ); + } + break; + case 4: + for (int64 i=0; i < iterations; i++) { + double t = 0; + double* a0 = (double *) root[0]; + double* a1 = (double *) root[1]; + double* a2 = (double *) root[2]; + double* a3 = (double *) root[3]; + for (int64 j=0; j < refs_per_chain; j+=stride*refs_per_line) { + t += a0[j] + a1[j] + a2[j] + a3[j]; + } + sum_chk( t ); + } + break; + case 5: + for (int64 i=0; i < iterations; i++) { + double t = 0; + double* a0 = (double *) root[0]; + double* a1 = (double *) root[1]; + double* a2 = (double *) root[2]; + double* a3 = (double *) root[3]; + double* a4 = (double *) root[4]; + for (int64 j=0; j < refs_per_chain; j+=stride*refs_per_line) { + t += a0[j] + a1[j] + a2[j] + a3[j] + a4[j]; + } + sum_chk( t ); + } + break; + case 6: + for (int64 i=0; i < iterations; i++) { + double t = 0; + double* a0 = (double *) root[0]; + double* a1 = (double *) root[1]; + double* a2 = (double *) root[2]; + double* a3 = (double *) root[3]; + double* a4 = (double *) root[4]; + double* a5 = (double *) root[5]; + for (int64 j=0; j < refs_per_chain; j+=stride*refs_per_line) { + t += a0[j] + a1[j] + a2[j] + a3[j] + a4[j] + a5[j]; + } + sum_chk( t ); + } + break; + case 7: + for (int64 i=0; i < iterations; i++) { + double t = 0; + double* a0 = (double *) root[0]; + double* a1 = (double *) root[1]; + double* a2 = (double *) root[2]; + double* a3 = (double *) root[3]; + double* a4 = (double *) root[4]; + double* a5 = (double *) root[5]; + double* a6 = (double *) root[6]; + for (int64 j=0; j < refs_per_chain; j+=stride*refs_per_line) { + t += a0[j] + a1[j] + a2[j] + a3[j] + a4[j] + a5[j] + a6[j]; + } + sum_chk( t ); + } + break; + case 8: + for (int64 i=0; i < iterations; i++) { + double t = 0; + double* a0 = (double *) root[0]; + double* a1 = (double *) root[1]; + double* a2 = (double *) root[2]; + double* a3 = (double *) root[3]; + double* a4 = (double *) root[4]; + double* a5 = (double *) root[5]; + double* a6 = (double *) root[6]; + double* a7 = (double *) root[7]; + for (int64 j=0; j < refs_per_chain; j+=stride*refs_per_line) { + t += a0[j] + a1[j] + a2[j] + a3[j] + a4[j] + a5[j] + a6[j] + a7[j]; + } + sum_chk( t ); + } + break; + case 9: + for (int64 i=0; i < iterations; i++) { + double t = 0; + double* a0 = (double *) root[0]; + double* a1 = (double *) root[1]; + double* a2 = (double *) root[2]; + double* a3 = (double *) root[3]; + double* a4 = (double *) root[4]; + double* a5 = (double *) root[5]; + double* a6 = (double *) root[6]; + double* a7 = (double *) root[7]; + double* a8 = (double *) root[8]; + for (int64 j=0; j < refs_per_chain; j+=stride*refs_per_line) { + t += a0[j] + a1[j] + a2[j] + a3[j] + a4[j] + a5[j] + a6[j] + a7[j] + + a8[j]; + } + sum_chk( t ); + } + break; + case 10: + for (int64 i=0; i < iterations; i++) { + double t = 0; + double* a0 = (double *) root[0]; + double* a1 = (double *) root[1]; + double* a2 = (double *) root[2]; + double* a3 = (double *) root[3]; + double* a4 = (double *) root[4]; + double* a5 = (double *) root[5]; + double* a6 = (double *) root[6]; + double* a7 = (double *) root[7]; + double* a8 = (double *) root[8]; + double* a9 = (double *) root[9]; + for (int64 j=0; j < refs_per_chain; j+=stride*refs_per_line) { + t += a0[j] + a1[j] + a2[j] + a3[j] + a4[j] + a5[j] + a6[j] + a7[j] + + a8[j] + a9[j]; + } + sum_chk( t ); + } + break; + case 11: + for (int64 i=0; i < iterations; i++) { + double t = 0; + double* a0 = (double *) root[ 0]; + double* a1 = (double *) root[ 1]; + double* a2 = (double *) root[ 2]; + double* a3 = (double *) root[ 3]; + double* a4 = (double *) root[ 4]; + double* a5 = (double *) root[ 5]; + double* a6 = (double *) root[ 6]; + double* a7 = (double *) root[ 7]; + double* a8 = (double *) root[ 8]; + double* a9 = (double *) root[ 9]; + double* a10 = (double *) root[10]; + for (int64 j=0; j < refs_per_chain; j+=stride*refs_per_line) { + t += a0[j] + a1[j] + a2 [j] + a3[j] + a4[j] + a5[j] + a6[j] + a7[j] + + a8[j] + a9[j] + a10[j]; + } + sum_chk( t ); + } + break; + case 12: + for (int64 i=0; i < iterations; i++) { + double t = 0; + double* a0 = (double *) root[ 0]; + double* a1 = (double *) root[ 1]; + double* a2 = (double *) root[ 2]; + double* a3 = (double *) root[ 3]; + double* a4 = (double *) root[ 4]; + double* a5 = (double *) root[ 5]; + double* a6 = (double *) root[ 6]; + double* a7 = (double *) root[ 7]; + double* a8 = (double *) root[ 8]; + double* a9 = (double *) root[ 9]; + double* a10 = (double *) root[10]; + double* a11 = (double *) root[11]; + for (int64 j=0; j < refs_per_chain; j+=stride*refs_per_line) { + t += a0[j] + a1[j] + a2 [j] + a3 [j] + a4[j] + a5[j] + a6[j] + a7[j] + + a8[j] + a9[j] + a10[j] + a11[j]; + } + sum_chk( t ); + } + break; + case 13: + for (int64 i=0; i < iterations; i++) { + double t = 0; + double* a0 = (double *) root[ 0]; + double* a1 = (double *) root[ 1]; + double* a2 = (double *) root[ 2]; + double* a3 = (double *) root[ 3]; + double* a4 = (double *) root[ 4]; + double* a5 = (double *) root[ 5]; + double* a6 = (double *) root[ 6]; + double* a7 = (double *) root[ 7]; + double* a8 = (double *) root[ 8]; + double* a9 = (double *) root[ 9]; + double* a10 = (double *) root[10]; + double* a11 = (double *) root[11]; + double* a12 = (double *) root[12]; + for (int64 j=0; j < refs_per_chain; j+=stride*refs_per_line) { + t += a0[j] + a1[j] + a2 [j] + a3 [j] + a4 [j] + a5[j] + a6[j] + a7[j] + + a8[j] + a9[j] + a10[j] + a11[j] + a12[j]; + } + sum_chk( t ); + } + break; + case 14: + for (int64 i=0; i < iterations; i++) { + double t = 0; + double* a0 = (double *) root[ 0]; + double* a1 = (double *) root[ 1]; + double* a2 = (double *) root[ 2]; + double* a3 = (double *) root[ 3]; + double* a4 = (double *) root[ 4]; + double* a5 = (double *) root[ 5]; + double* a6 = (double *) root[ 6]; + double* a7 = (double *) root[ 7]; + double* a8 = (double *) root[ 8]; + double* a9 = (double *) root[ 9]; + double* a10 = (double *) root[10]; + double* a11 = (double *) root[11]; + double* a12 = (double *) root[12]; + double* a13 = (double *) root[13]; + for (int64 j=0; j < refs_per_chain; j+=stride*refs_per_line) { + t += a0[j] + a1[j] + a2 [j] + a3 [j] + a4 [j] + a5 [j] + a6[j] + a7[j] + + a8[j] + a9[j] + a10[j] + a11[j] + a12[j] + a13[j]; + } + sum_chk( t ); + } + break; + case 15: + for (int64 i=0; i < iterations; i++) { + double t = 0; + double* a0 = (double *) root[ 0]; + double* a1 = (double *) root[ 1]; + double* a2 = (double *) root[ 2]; + double* a3 = (double *) root[ 3]; + double* a4 = (double *) root[ 4]; + double* a5 = (double *) root[ 5]; + double* a6 = (double *) root[ 6]; + double* a7 = (double *) root[ 7]; + double* a8 = (double *) root[ 8]; + double* a9 = (double *) root[ 9]; + double* a10 = (double *) root[10]; + double* a11 = (double *) root[11]; + double* a12 = (double *) root[12]; + double* a13 = (double *) root[13]; + double* a14 = (double *) root[14]; + for (int64 j=0; j < refs_per_chain; j+=stride*refs_per_line) { + t += a0[j] + a1[j] + a2 [j] + a3 [j] + a4 [j] + a5 [j] + a6 [j] + a7[j] + + a8[j] + a9[j] + a10[j] + a11[j] + a12[j] + a13[j] + a14[j]; + } + sum_chk( t ); + } + break; + case 16: + for (int64 i=0; i < iterations; i++) { + double t = 0; + double* a0 = (double *) root[ 0]; + double* a1 = (double *) root[ 1]; + double* a2 = (double *) root[ 2]; + double* a3 = (double *) root[ 3]; + double* a4 = (double *) root[ 4]; + double* a5 = (double *) root[ 5]; + double* a6 = (double *) root[ 6]; + double* a7 = (double *) root[ 7]; + double* a8 = (double *) root[ 8]; + double* a9 = (double *) root[ 9]; + double* a10 = (double *) root[10]; + double* a11 = (double *) root[11]; + double* a12 = (double *) root[12]; + double* a13 = (double *) root[13]; + double* a14 = (double *) root[14]; + double* a15 = (double *) root[15]; + for (int64 j=0; j < refs_per_chain; j+=stride*refs_per_line) { + t += a0[j] + a1[j] + a2 [j] + a3 [j] + a4 [j] + a5 [j] + a6 [j] + a7 [j] + + a8[j] + a9[j] + a10[j] + a11[j] + a12[j] + a13[j] + a14[j] + a15[j]; + } + sum_chk( t ); + } + break; + } +} diff --git a/src/Run.h b/src/Run.h new file mode 100644 index 0000000..810c2e8 --- /dev/null +++ b/src/Run.h @@ -0,0 +1,50 @@ +/******************************************************************************* + * Copyright (c) 2006 International Business Machines Corporation. * + * All rights reserved. This program and the accompanying materials * + * are made available under the terms of the Common Public License v1.0 * + * which accompanies this distribution, and is available at * + * http://www.opensource.org/licenses/cpl1.0.php * + * * + * Contributors: * + * Douglas M. Pase - initial API and implementation * + *******************************************************************************/ + + +#if !defined(Run_h) +#define Run_h + +#include "Thread.h" + +#include "Lock.h" +#include "Chain.h" +#include "Types.h" +#include "Experiment.h" +#include "SpinBarrier.h" + +class Run: public Thread { +public: + Run(); + ~Run(); + int run(); + void set( Experiment &e, SpinBarrier* sbp ); + + static int64 ops_per_chain() { return _ops_per_chain; } + static double seconds() { return _seconds; } + +private: + Experiment* exp; // experiment data + SpinBarrier* bp; // spin barrier used by all threads + + void mem_check( Chain *m ); + Chain* random_mem_init( Chain *m ); + Chain* forward_mem_init( Chain *m ); + Chain* reverse_mem_init( Chain *m ); + Chain* stream_mem_init( Chain *m ); + + static Lock global_mutex; // global lock + static int64 _ops_per_chain; // total number of operations per chain + static double _seconds; // total number of seconds +}; + + +#endif diff --git a/src/SpinBarrier.cpp b/src/SpinBarrier.cpp new file mode 100644 index 0000000..d3d2d7b --- /dev/null +++ b/src/SpinBarrier.cpp @@ -0,0 +1,48 @@ +/*******************************************************************************
+ * Copyright (c) 2006 International Business Machines Corporation. *
+ * All rights reserved. This program and the accompanying materials *
+ * are made available under the terms of the Common Public License v1.0 *
+ * which accompanies this distribution, and is available at *
+ * http://www.opensource.org/licenses/cpl1.0.php *
+ * *
+ * Contributors: *
+ * Douglas M. Pase - initial API and implementation *
+ *******************************************************************************/
+
+
+/******************************************************************************
+ * *
+ * SpinBarrier *
+ * *
+ * Author: Douglas M. Pase *
+ * *
+ * Date: September 21, 2000 *
+ * Translated to C++, June 19, 2005 *
+ * *
+ * void barrier() *
+ * *
+ ******************************************************************************/
+#include <stdio.h>
+#include <pthread.h>
+
+#include "SpinBarrier.h"
+
+ // create a new barrier
+SpinBarrier::SpinBarrier(int participants)
+: limit( participants )
+{
+ pthread_barrier_init( &barrier_obj, NULL, this->limit );
+}
+
+ // destroy an old barrier
+SpinBarrier::~SpinBarrier()
+{
+}
+
+ // enter the barrier and wait. everyone leaves
+ // when the last participant enters the barrier.
+void
+SpinBarrier::barrier()
+{
+ pthread_barrier_wait( &this->barrier_obj );
+}
diff --git a/src/SpinBarrier.h b/src/SpinBarrier.h new file mode 100644 index 0000000..f0b76d3 --- /dev/null +++ b/src/SpinBarrier.h @@ -0,0 +1,44 @@ +/*******************************************************************************
+ * Copyright (c) 2006 International Business Machines Corporation. *
+ * All rights reserved. This program and the accompanying materials *
+ * are made available under the terms of the Common Public License v1.0 *
+ * which accompanies this distribution, and is available at *
+ * http://www.opensource.org/licenses/cpl1.0.php *
+ * *
+ * Contributors: *
+ * Douglas M. Pase - initial API and implementation *
+ *******************************************************************************/
+
+
+/******************************************************************************
+ * *
+ * SpinBarrier *
+ * *
+ * Author: Douglas M. Pase *
+ * *
+ * Date: September 21, 2000 *
+ * Translated to C++, June 19, 2005 *
+ * Rewritten August 13,2005 *
+ * *
+ * void barrier() *
+ * *
+ ******************************************************************************/
+
+#if !defined( SpinBarrier_h )
+#define SpinBarrier_h
+
+#include <pthread.h>
+
+class SpinBarrier {
+public:
+ SpinBarrier(int participants);
+ ~SpinBarrier();
+
+ void barrier();
+
+private:
+ int limit; // number of barrier participants
+ pthread_barrier_t barrier_obj;
+};
+
+#endif
diff --git a/src/Thread.cpp b/src/Thread.cpp new file mode 100644 index 0000000..8908cfe --- /dev/null +++ b/src/Thread.cpp @@ -0,0 +1,86 @@ +/******************************************************************************* + * Copyright (c) 2006 International Business Machines Corporation. * + * All rights reserved. This program and the accompanying materials * + * are made available under the terms of the Common Public License v1.0 * + * which accompanies this distribution, and is available at * + * http://www.opensource.org/licenses/cpl1.0.php * + * * + * Contributors: * + * Douglas M. Pase - initial API and implementation * + *******************************************************************************/ + + +#include <stdio.h> +#include <pthread.h> +#include <unistd.h> + +#include "Thread.h" + +#include "Lock.h" + +Lock Thread::_global_lock; +int Thread::count = 0; + +Thread::Thread() +{ + Thread::global_lock(); + this->id = Thread::count; + Thread::count += 1; + Thread::global_unlock(); +} + +Thread::~Thread() +{ +} + +int +Thread::start() +{ + return pthread_create(&this->thread, NULL, Thread::start_routine, this); +} + +void* +Thread::start_routine(void* p) +{ + ((Thread*)p)->run(); + + return NULL; +} + +void +Thread::exit() +{ + pthread_exit(NULL); +} + +int +Thread::wait() +{ + pthread_join(this->thread, NULL); + + return 0; +} + +void +Thread::lock() +{ + this->object_lock.lock(); +} + +void +Thread::unlock() +{ + this->object_lock.unlock(); +} + +void +Thread::global_lock() +{ + Thread::_global_lock.lock(); +} + +void +Thread::global_unlock() +{ + Thread::_global_lock.unlock(); +} diff --git a/src/Thread.h b/src/Thread.h new file mode 100644 index 0000000..3948f56 --- /dev/null +++ b/src/Thread.h @@ -0,0 +1,53 @@ +/******************************************************************************* + * Copyright (c) 2006 International Business Machines Corporation. * + * All rights reserved. This program and the accompanying materials * + * are made available under the terms of the Common Public License v1.0 * + * which accompanies this distribution, and is available at * + * http://www.opensource.org/licenses/cpl1.0.php * + * * + * Contributors: * + * Douglas M. Pase - initial API and implementation * + *******************************************************************************/ + + +#if !defined(Thread_h) +#define Thread_h + +#include <pthread.h> + +#include "Lock.h" + +class Thread { +public: + Thread(); + ~Thread(); + + virtual int run() = 0; + + int start(); + int wait(); + int thread_count() { return Thread::count; } + int thread_id() { return id; } + + static void exit(); + +protected: + void lock(); + void unlock(); + static void global_lock(); + static void global_unlock(); + +private: + static void* start_routine(void *); + static Lock _global_lock; + + Lock object_lock; + + pthread_t thread; + + static int count; + int id; + int lock_obj; +}; + +#endif diff --git a/src/Timer.cpp b/src/Timer.cpp new file mode 100644 index 0000000..b326048 --- /dev/null +++ b/src/Timer.cpp @@ -0,0 +1,175 @@ +/******************************************************************************* + * Copyright (c) 2006 International Business Machines Corporation. * + * All rights reserved. This program and the accompanying materials * + * are made available under the terms of the Common Public License v1.0 * + * which accompanies this distribution, and is available at * + * http://www.opensource.org/licenses/cpl1.0.php * + * * + * Contributors: * + * Douglas M. Pase - initial API and implementation * + *******************************************************************************/ + + +#include <stdio.h> +#include <sys/time.h> + +#include "Timer.h" + +#include "Types.h" + +static int64 read_rtc(); +static void calibrate_rtc(int n); +static double wall_seconds(); + +static int wall_ticks = -1; +static int rtc_ticks = -1; +static double wall_elapsed = -1; +static int64 rtc_elapsed = -1; +static double time_factor = -1; + +#if !defined(RTC) && !defined(GTOD) +#define RTC +#endif + +#if defined(RTC) + +double +Timer::seconds() +{ + return (double) read_rtc() * time_factor; +} + +int64 +Timer::ticks() +{ + // See pg. 406 of the AMD x86-64 Architecture + // Programmer's Manual, Volume 2, System Programming + unsigned int eax=0, edx=0; + + __asm__ __volatile__( + "rdtsc ;" + "movl %%eax,%0;" + "movl %%edx,%1;" + "" + : "=r"(eax), "=r"(edx) + : + : "%eax", "%edx" + ); + + return ((int64) edx << 32) | (int64) eax; +} + +static int64 +read_rtc() +{ + // See pg. 406 of the AMD x86-64 Architecture + // Programmer's Manual, Volume 2, System Programming + unsigned int eax=0, edx=0; + + __asm__ __volatile__( + "rdtsc ;" + "movl %%eax,%0;" + "movl %%edx,%1;" + "" + : "=r"(eax), "=r"(edx) + : + : "%eax", "%edx" + ); + + return ((int64) edx << 32) | (int64) eax; +} + +void +Timer::calibrate() +{ + Timer::calibrate(1000); +} + +void +Timer::calibrate(int n) +{ + wall_ticks = n; + + double wall_start,wall_finish,t; + t = wall_seconds(); + while (t == (wall_start=wall_seconds())) { + ; + } + int64 rtc_start = read_rtc(); + for (int i=0; i < wall_ticks; i++) { + t = wall_seconds(); + while (t == (wall_finish=wall_seconds())) { + ; + } + } + int64 rtc_finish = read_rtc(); + + wall_elapsed = wall_finish - wall_start; + rtc_elapsed = rtc_finish - rtc_start; + time_factor = wall_elapsed / (double) rtc_elapsed; +} + +static double +wall_seconds() +{ + struct timeval t; + gettimeofday(&t, NULL); + + return (double) t.tv_sec + (double) t.tv_usec * 1E-6; +} + +#else + +double +Timer::seconds() +{ + struct timeval t; + gettimeofday(&t, NULL); + + return (double) t.tv_sec + (double) t.tv_usec * 1E-6; +} + +int64 +Timer::ticks() +{ + struct timeval t; + gettimeofday(&t, NULL); + + return 1000000 * (int64) t.tv_sec + (int64) t.tv_usec; +} + +void +Timer::calibrate() +{ +} + +void +Timer::calibrate(int n) +{ +} + +#endif + +static double +min( double v1, double v2 ) +{ + if (v2 < v1) return v2; + return v1; +} + +double +Timer::resolution() +{ + double a,b,c=1E9; + for (int i=0; i < 10; i++) { + a = Timer::seconds(); + while (a == (b=Timer::seconds())) + ; + a = Timer::seconds(); + while (a == (b=Timer::seconds())) + ; + c = min(b - a, c); + } + + return c; +} diff --git a/src/Timer.h b/src/Timer.h new file mode 100644 index 0000000..ba2c503 --- /dev/null +++ b/src/Timer.h @@ -0,0 +1,28 @@ +/******************************************************************************* + * Copyright (c) 2006 International Business Machines Corporation. * + * All rights reserved. This program and the accompanying materials * + * are made available under the terms of the Common Public License v1.0 * + * which accompanies this distribution, and is available at * + * http://www.opensource.org/licenses/cpl1.0.php * + * * + * Contributors: * + * Douglas M. Pase - initial API and implementation * + *******************************************************************************/ + + +#if !defined(Timer_h) +#define Timer_h + +#include "Types.h" + +class Timer { +public: + static double seconds(); + static double resolution(); + static int64 ticks(); + static void calibrate(); + static void calibrate(int n); +private: +}; + +#endif diff --git a/src/Types.cpp b/src/Types.cpp new file mode 100644 index 0000000..da5ecd0 --- /dev/null +++ b/src/Types.cpp @@ -0,0 +1,13 @@ +/******************************************************************************* + * Copyright (c) 2006 International Business Machines Corporation. * + * All rights reserved. This program and the accompanying materials * + * are made available under the terms of the Common Public License v1.0 * + * which accompanies this distribution, and is available at * + * http://www.opensource.org/licenses/cpl1.0.php * + * * + * Contributors: * + * Douglas M. Pase - initial API and implementation * + *******************************************************************************/ + + +#include "Types.h" diff --git a/src/Types.h b/src/Types.h new file mode 100644 index 0000000..9e2eeb0 --- /dev/null +++ b/src/Types.h @@ -0,0 +1,29 @@ +/******************************************************************************* + * Copyright (c) 2006 International Business Machines Corporation. * + * All rights reserved. This program and the accompanying materials * + * are made available under the terms of the Common Public License v1.0 * + * which accompanies this distribution, and is available at * + * http://www.opensource.org/licenses/cpl1.0.php * + * * + * Contributors: * + * Douglas M. Pase - initial API and implementation * + *******************************************************************************/ + + +#if !defined(Types_h) +#define Types_h + +typedef long long int64; +typedef int int32; +typedef short int16; +typedef char int8; + +typedef unsigned long long uint64; +typedef unsigned int uint32; +typedef unsigned short uint16; +typedef unsigned char uint8; + +typedef double float64; +typedef float float32; + +#endif |