summaryrefslogtreecommitdiff
path: root/src/experiment.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'src/experiment.cpp')
-rw-r--r--src/experiment.cpp711
1 files changed, 711 insertions, 0 deletions
diff --git a/src/experiment.cpp b/src/experiment.cpp
new file mode 100644
index 0000000..7e5d318
--- /dev/null
+++ b/src/experiment.cpp
@@ -0,0 +1,711 @@
+/*******************************************************************************
+ * Copyright (c) 2006 International Business Machines Corporation. *
+ * All rights reserved. This program and the accompanying materials *
+ * are made available under the terms of the Common Public License v1.0 *
+ * which accompanies this distribution, and is available at *
+ * http://www.opensource.org/licenses/cpl1.0.php *
+ * *
+ * Contributors: *
+ * Douglas M. Pase - initial API and implementation *
+ *******************************************************************************/
+
+//
+// Configuration
+//
+
+// Implementation header
+#include "experiment.h"
+
+// System includes
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#if defined(NUMA)
+#include <numa.h>
+#endif
+
+// Local includes
+#include "chain.h"
+
+
+//
+// Implementation
+//
+
+Experiment::Experiment() :
+ strict (0),
+ pointer_size (DEFAULT_POINTER_SIZE),
+ bytes_per_line (DEFAULT_BYTES_PER_LINE),
+ links_per_line (DEFAULT_LINKS_PER_LINE),
+ bytes_per_page (DEFAULT_BYTES_PER_PAGE),
+ lines_per_page (DEFAULT_LINES_PER_PAGE),
+ links_per_page (DEFAULT_LINKS_PER_PAGE),
+ bytes_per_chain (DEFAULT_BYTES_PER_CHAIN),
+ lines_per_chain (DEFAULT_LINES_PER_CHAIN),
+ links_per_chain (DEFAULT_LINKS_PER_CHAIN),
+ pages_per_chain (DEFAULT_PAGES_PER_CHAIN),
+ chains_per_thread(DEFAULT_CHAINS_PER_THREAD),
+ bytes_per_thread (DEFAULT_BYTES_PER_THREAD),
+ num_threads (DEFAULT_THREADS),
+ bytes_per_test (DEFAULT_BYTES_PER_TEST),
+ busy_cycles (DEFAULT_BUSY_CYCLES),
+ seconds (DEFAULT_SECONDS),
+ iterations (DEFAULT_ITERATIONS),
+ experiments (DEFAULT_EXPERIMENTS),
+ prefetch (DEFAULT_PREFETCH),
+ output_mode (TABLE),
+ access_pattern (RANDOM),
+ stride (1),
+ numa_placement (LOCAL),
+ offset_or_mask (0),
+ placement_map (NULL),
+ thread_domain (NULL),
+ chain_domain (NULL),
+ numa_max_domain (0),
+ num_numa_domains (1)
+{
+}
+
+Experiment::~Experiment() {
+}
+
+// interface:
+//
+// -l or --line bytes per cache line (line size)
+// -p or --page bytes per page (page size)
+// -c or --chain bytes per chain (used to compute pages per chain)
+// -r or --references chains per thread (memory loading)
+// -t or --threads number of threads (concurrency and contention)
+// -i or --iters iterations
+// -e or --experiments experiments
+// -b or --busy amount of cycles processor should remain busy
+// -f or --prefetch prefetch data
+// -a or --access memory access pattern
+// random random access pattern
+// forward <stride> exclusive OR and mask
+// reverse <stride> addition and offset
+// -o or --output output mode
+// hdr header only
+// csv csv only
+// both header + csv
+// table human-readable table of values
+// -n or --numa numa placement
+// local local allocation of all chains
+// xor <mask> exclusive OR and mask
+// add <offset> addition and offset
+// map <map> explicit mapping of threads and chains to domains
+
+int Experiment::parse_args(int argc, char* argv[]) {
+ int error = 0;
+ for (int i = 1; i < argc; i++) {
+ if (strcasecmp(argv[i], "-x") == 0
+ || strcasecmp(argv[i], "--strict") == 0) {
+ this->strict = 1;
+ } else if (strcasecmp(argv[i], "-s") == 0
+ || strcasecmp(argv[i], "--seconds") == 0) {
+ i++;
+ if (i == argc) {
+ error = 1;
+ break;
+ }
+ this->seconds = Experiment::parse_real(argv[i]);
+ this->iterations = 0;
+ if (this->seconds == 0) {
+ error = 1;
+ break;
+ }
+ } else if (strcasecmp(argv[i], "-l") == 0
+ || strcasecmp(argv[i], "--line") == 0) {
+ i++;
+ if (i == argc) {
+ error = 1;
+ break;
+ }
+ this->bytes_per_line = Experiment::parse_number(argv[i]);
+ if (this->bytes_per_line == 0) {
+ error = 1;
+ break;
+ }
+ } else if (strcasecmp(argv[i], "-p") == 0
+ || strcasecmp(argv[i], "--page") == 0) {
+ i++;
+ if (i == argc) {
+ error = 1;
+ break;
+ }
+ this->bytes_per_page = Experiment::parse_number(argv[i]);
+ if (this->bytes_per_page == 0) {
+ error = 1;
+ break;
+ }
+ } else if (strcasecmp(argv[i], "-c") == 0
+ || strcasecmp(argv[i], "--chain") == 0) {
+ i++;
+ if (i == argc) {
+ error = 1;
+ break;
+ }
+ this->bytes_per_chain = Experiment::parse_number(argv[i]);
+ if (this->bytes_per_chain == 0) {
+ error = 1;
+ break;
+ }
+ } else if (strcasecmp(argv[i], "-r") == 0
+ || strcasecmp(argv[i], "--references") == 0) {
+ i++;
+ if (i == argc) {
+ error = 1;
+ break;
+ }
+ this->chains_per_thread = Experiment::parse_number(argv[i]);
+ if (this->chains_per_thread == 0) {
+ error = 1;
+ break;
+ }
+ } else if (strcasecmp(argv[i], "-t") == 0
+ || strcasecmp(argv[i], "--threads") == 0) {
+ i++;
+ if (i == argc) {
+ error = 1;
+ break;
+ }
+ this->num_threads = Experiment::parse_number(argv[i]);
+ if (this->num_threads == 0) {
+ error = 1;
+ break;
+ }
+ } else if (strcasecmp(argv[i], "-i") == 0
+ || strcasecmp(argv[i], "--iterations") == 0) {
+ i++;
+ if (i == argc) {
+ error = 1;
+ break;
+ }
+ this->iterations = Experiment::parse_number(argv[i]);
+ this->seconds = 0;
+ if (this->iterations == 0) {
+ error = 1;
+ break;
+ }
+ } else if (strcasecmp(argv[i], "-e") == 0
+ || strcasecmp(argv[i], "--experiments") == 0) {
+ i++;
+ if (i == argc) {
+ error = 1;
+ break;
+ }
+ this->experiments = Experiment::parse_number(argv[i]);
+ if (this->experiments == 0) {
+ error = 1;
+ break;
+ }
+ } else if (strcasecmp(argv[i], "-b") == 0
+ || strcasecmp(argv[i], "--busy") == 0) {
+ i++;
+ if (i == argc) {
+ error = 1;
+ break;
+ }
+ this->busy_cycles = Experiment::parse_number(argv[i]);
+ if (this->experiments == 0) {
+ error = 1;
+ break;
+ }
+ } else if (strcasecmp(argv[i], "-f") == 0
+ || strcasecmp(argv[i], "--prefetch") == 0) {
+ this->prefetch = true;
+ } else if (strcasecmp(argv[i], "-a") == 0
+ || strcasecmp(argv[i], "--access") == 0) {
+ i++;
+ if (i == argc) {
+ error = 1;
+ break;
+ }
+ if (strcasecmp(argv[i], "random") == 0) {
+ this->access_pattern = RANDOM;
+ } else if (strcasecmp(argv[i], "forward") == 0) {
+ this->access_pattern = STRIDED;
+ i++;
+ if (i == argc) {
+ error = 1;
+ break;
+ }
+ this->stride = Experiment::parse_number(argv[i]);
+ if (this->stride == 0) {
+ error = 1;
+ break;
+ }
+ } else if (strcasecmp(argv[i], "reverse") == 0) {
+ this->access_pattern = STRIDED;
+ i++;
+ if (i == argc) {
+ error = 1;
+ break;
+ }
+ this->stride = -Experiment::parse_number(argv[i]);
+ if (this->stride == 0) {
+ error = 1;
+ break;
+ }
+ } else if (strcasecmp(argv[i], "stream") == 0) {
+ this->access_pattern = STREAM;
+ i++;
+ if (i == argc) {
+ error = 1;
+ break;
+ }
+ this->stride = Experiment::parse_number(argv[i]);
+ if (this->stride == 0) {
+ error = 1;
+ break;
+ }
+ } else {
+ error = 1;
+ break;
+ }
+ } else if (strcasecmp(argv[i], "-o") == 0
+ || strcasecmp(argv[i], "--output") == 0) {
+ i++;
+ if (i == argc) {
+ error = 1;
+ break;
+ }
+ if (strcasecmp(argv[i], "table") == 0) {
+ this->output_mode = TABLE;
+ } else if (strcasecmp(argv[i], "csv") == 0) {
+ this->output_mode = CSV;
+ } else if (strcasecmp(argv[i], "both") == 0) {
+ this->output_mode = BOTH;
+ } else if (strcasecmp(argv[i], "hdr") == 0) {
+ this->output_mode = HEADER;
+ } else if (strcasecmp(argv[i], "header") == 0) {
+ this->output_mode = HEADER;
+ } else {
+ error = 1;
+ break;
+ }
+ } else if (strcasecmp(argv[i], "-n") == 0
+ || strcasecmp(argv[i], "--numa") == 0) {
+ i++;
+ if (i == argc) {
+ error = 1;
+ break;
+ }
+ if (strcasecmp(argv[i], "local") == 0) {
+ this->numa_placement = LOCAL;
+ } else if (strcasecmp(argv[i], "xor") == 0) {
+ this->numa_placement = XOR;
+ i++;
+ if (i == argc) {
+ error = 1;
+ break;
+ }
+ this->offset_or_mask = Experiment::parse_number(argv[i]);
+ } else if (strcasecmp(argv[i], "add") == 0) {
+ this->numa_placement = ADD;
+ i++;
+ if (i == argc) {
+ error = 1;
+ break;
+ }
+ this->offset_or_mask = Experiment::parse_number(argv[i]);
+ } else if (strcasecmp(argv[i], "map") == 0) {
+ this->numa_placement = MAP;
+ i++;
+ if (i == argc) {
+ error = 1;
+ break;
+ }
+ this->placement_map = argv[i];
+ } else {
+ error = 1;
+ break;
+ }
+ } else {
+ error = 1;
+ break;
+ }
+ }
+
+
+ // if we've hit an error, print a message and quit
+ if (error) {
+ printf("usage: %s <options>\n", argv[0]);
+ printf("where <options> are selected from the following:\n");
+ printf(" [-h|--help] # this message\n");
+ printf(" [-l|--line] <number> # bytes per cache line (cache line size)\n");
+ printf(" [-p|--page] <number> # bytes per page (page size)\n");
+ printf(" [-c|--chain] <number> # bytes per chain (used to compute pages per chain)\n");
+ printf(" [-r|--references] <number> # chains per thread (memory loading)\n");
+ printf(" [-t|--threads] <number> # number of threads (concurrency and contention)\n");
+ printf(" [-i|--iterations] <number> # iterations per experiment\n");
+ printf(" [-e|--experiments] <number> # experiments\n");
+ printf(" [-a|--access] <pattern> # memory access pattern\n");
+ printf(" [-o|--output] <format> # output format\n");
+ printf(" [-n|--numa] <placement> # numa placement\n");
+ printf(" [-s|--seconds] <number> # run each experiment for <number> seconds\n");
+ printf(" [-b|--busy] <number> # how much processing cycles each loop should count\n");
+ printf(" [-f|--prefetch] # prefetch data\n");
+ printf(" [-x|--strict] # fail rather than adjust options to sensible values\n");
+ printf("\n");
+ printf("<pattern> is selected from the following:\n");
+ printf(" random # all chains are accessed randomly\n");
+ printf(" forward <stride> # chains are in forward order with constant stride\n");
+ printf(" reverse <stride> # chains are in reverse order with constant stride\n");
+ printf(" stream <stride> # references are calculated rather than read from memory\n");
+ printf("\n");
+ printf("Note: <stride> is always a small positive integer.\n");
+ printf("\n");
+ printf("<format> is selected from the following:\n");
+ printf(" hdr # csv header only\n");
+ printf(" csv # results in csv format only\n");
+ printf(" both # header and results in csv format\n");
+ printf(" table # human-readable table of values\n");
+ printf("\n");
+ printf("<placement> is selected from the following:\n");
+ printf(" local # all chains are allocated locally\n");
+ printf(" xor <mask> # exclusive OR and mask\n");
+ printf(" add <offset> # addition and offset\n");
+ printf(" map <map> # explicit mapping of threads and chains to domains\n");
+ printf("\n");
+ printf("<map> has the form \"t1:c11,c12,...,c1m;t2:c21,...,c2m;...;tn:cn1,...,cnm\"\n");
+ printf("where t[i] is the NUMA domain where the ith thread is run,\n");
+ printf("and c[i][j] is the NUMA domain where the jth chain in the ith thread is allocated.\n");
+ printf("(The values t[i] and c[i][j] must all be zero or small positive integers.)\n");
+ printf("\n");
+ printf("Note: for maps, each thread must have the same number of chains,\n");
+ printf("maps override the -t or --threads specification,\n");
+ printf("NUMA domains are whole numbers in the range of 0..N, and\n");
+ printf("thread or chain domains that exceed the maximum NUMA domain\n");
+ printf("are wrapped around using a MOD function.\n");
+ printf("\n");
+ printf("To determine the number of NUMA domains currently available\n");
+ printf("on your system, use a command such as \"numastat\".\n");
+ printf("\n");
+ printf("Final note: strict is not yet fully implemented, and\n");
+ printf("maps do not gracefully handle ill-formed map specifications.\n");
+
+ return 1;
+ }
+
+
+ // STRICT -- fail if specifications are inconsistent
+
+ // compute lines per page and lines per chain
+ // based on input and defaults.
+ // we round up page and chain sizes when needed.
+ this->lines_per_page = (this->bytes_per_page+this->bytes_per_line-1) / this->bytes_per_line;
+ this->bytes_per_page = this->bytes_per_line * this->lines_per_page;
+ this->pages_per_chain = (this->bytes_per_chain+this->bytes_per_page-1) / this->bytes_per_page;
+ this->bytes_per_chain = this->bytes_per_page * this->pages_per_chain;
+ this->bytes_per_thread = this->bytes_per_chain * this->chains_per_thread;
+ this->bytes_per_test = this->bytes_per_thread * this->num_threads;
+ this->links_per_line = this->bytes_per_line / pointer_size;
+ this->links_per_page = this->lines_per_page * this->links_per_line;
+ this->lines_per_chain = this->lines_per_page * this->pages_per_chain;
+ this->links_per_chain = this->lines_per_chain * this->links_per_line;
+
+
+ // allocate the chain roots for all threads
+ // and compute the chain locations
+ // (the chains themselves are initialized by the threads)
+ switch (this->numa_placement) {
+ case LOCAL:
+ case XOR:
+ case ADD:
+ this->thread_domain = new int32[this->num_threads];
+ this->chain_domain = new int32*[this->num_threads];
+ this->random_state = new char*[this->num_threads];
+
+ for (int i = 0; i < this->num_threads; i++) {
+ this->chain_domain[i] = new int32[this->chains_per_thread];
+
+ const int state_size = 256;
+ this->random_state[i] = new char[state_size];
+ initstate((unsigned int) i, (char *) this->random_state[i],
+ (size_t) state_size);
+ }
+ break;
+ }
+
+#if defined(NUMA)
+ this->numa_max_domain = numa_max_node();
+ this->num_numa_domains = this->numa_max_domain + 1;
+#endif
+
+ switch (this->numa_placement) {
+ case LOCAL:
+ default:
+ this->alloc_local();
+ break;
+ case XOR:
+ this->alloc_xor();
+ break;
+ case ADD:
+ this->alloc_add();
+ break;
+ case MAP:
+ this->alloc_map();
+ break;
+ }
+
+ return 0;
+}
+
+int64 Experiment::parse_number(const char* s) {
+ int64 result = 0;
+
+ int len = strlen(s);
+ for (int i = 0; i < len; i++) {
+ if ('0' <= s[i] && s[i] <= '9') {
+ result = result * 10 + s[i] - '0';
+ } else if (s[i] == 'k' || s[i] == 'K') {
+ result = result << 10;
+ break;
+ } else if (s[i] == 'm' || s[i] == 'M') {
+ result = result << 20;
+ break;
+ } else if (s[i] == 'g' || s[i] == 'G') {
+ result = result << 30;
+ break;
+ } else if (s[i] == 't' || s[i] == 'T') {
+ result = result << 40;
+ break;
+ } else {
+ break;
+ }
+ }
+
+ return result;
+}
+
+float Experiment::parse_real(const char* s) {
+ float result = 0;
+ bool decimal = false;
+ float power = 1;
+
+ int len = strlen(s);
+ for (int i = 0; i < len; i++) {
+ if ('0' <= s[i] && s[i] <= '9') {
+ if (!decimal) {
+ result = result * 10 + s[i] - '0';
+ } else {
+ power = power / 10;
+ result = result + (s[i] - '0') * power;
+ }
+ } else if ('.' == s[i]) {
+ decimal = true;
+ } else {
+ break;
+ }
+ }
+
+ return result;
+}
+
+void Experiment::alloc_local() {
+ for (int i = 0; i < this->num_threads; i++) {
+ this->thread_domain[i] = i % this->num_numa_domains;
+ for (int j = 0; j < this->chains_per_thread; j++) {
+ this->chain_domain[i][j] = this->thread_domain[i];
+ }
+ }
+}
+
+void Experiment::alloc_xor() {
+ for (int i = 0; i < this->num_threads; i++) {
+ this->thread_domain[i] = i % this->num_numa_domains;
+ for (int j = 0; j < this->chains_per_thread; j++) {
+ this->chain_domain[i][j] = (this->thread_domain[i]
+ ^ this->offset_or_mask) % this->num_numa_domains;
+ }
+ }
+}
+
+void Experiment::alloc_add() {
+ for (int i = 0; i < this->num_threads; i++) {
+ this->thread_domain[i] = i % this->num_numa_domains;
+ for (int j = 0; j < this->chains_per_thread; j++) {
+ this->chain_domain[i][j] = (this->thread_domain[i]
+ + this->offset_or_mask) % this->num_numa_domains;
+ }
+ }
+}
+
+// DOES NOT HANDLE ILL-FORMED SPECIFICATIONS
+void Experiment::alloc_map() {
+ // STRICT -- fail if specifications are inconsistent
+
+ // maps look like "t1:c11,c12,...,c1m;t2:c21,...,c2m;...;tn:cn1,...,cnm"
+ // where t[i] is the thread domain of the ith thread,
+ // and c[i][j] is the chain domain of the jth chain in the ith thread
+
+ // count the thread descriptors by counting ";" up to EOS
+ int threads = 1;
+ char *p = this->placement_map;
+ while (*p != '\0') {
+ if (*p == ';')
+ threads += 1;
+ p++;
+ }
+ int thread_domain[threads];
+
+ // count the chain descriptors by counting "," up to ";" or EOS
+ int chains = 1;
+ p = this->placement_map;
+ while (*p != '\0') {
+ if (*p == ';')
+ break;
+ if (*p == ',')
+ chains += 1;
+ p++;
+ }
+ int chain_domain[threads][chains];
+
+ int t = 0, c = 0;
+ p = this->placement_map;
+ while (*p != '\0') {
+ // everything up to ":" is the thread domain
+ int i = 0;
+ char buf[64];
+ while (*p != '\0') {
+ if (*p == ':') {
+ p++;
+ break;
+ }
+ buf[i] = *p;
+ i++;
+ p++;
+ }
+ buf[i] = '\0';
+ thread_domain[t] = Experiment::parse_number(buf);
+
+ // search for one or several ','
+ c = 0;
+ while (*p != '\0' && *p != ';') {
+ if (chains <= c || threads <= t) {
+ // error in the thread/chain specification
+ fprintf(stderr, "Malformed map.\n");
+ exit(1);
+ }
+ int i = 0;
+ while (*p != '\0' && *p != ';') {
+ if (*p == ',') {
+ p++;
+ break;
+ }
+ buf[i] = *p;
+ i++;
+ p++;
+ }
+ buf[i] = '\0';
+ chain_domain[t][c] = Experiment::parse_number(buf);
+ c++;
+ }
+
+ if (*p == '\0')
+ break;
+ if (*p == ';')
+ p++;
+ t++;
+ }
+
+ this->num_threads = threads;
+ this->chains_per_thread = chains;
+
+ this->thread_domain = new int32[this->num_threads];
+ this->chain_domain = new int32*[this->num_threads];
+ this->random_state = new char*[this->num_threads];
+
+ for (int i = 0; i < this->num_threads; i++) {
+ this->thread_domain[i] = thread_domain[i] % this->num_numa_domains;
+
+ const int state_size = 256;
+ this->random_state[i] = new char[state_size];
+ initstate((unsigned int) i, (char *) this->random_state[i],
+ (size_t) state_size);
+
+ this->chain_domain[i] = new int32[this->chains_per_thread];
+ for (int j = 0; j < this->chains_per_thread; j++) {
+ this->chain_domain[i][j] = chain_domain[i][j]
+ % this->num_numa_domains;
+ }
+ }
+
+ this->bytes_per_thread = this->bytes_per_chain * this->chains_per_thread;
+ this->bytes_per_test = this->bytes_per_thread * this->num_threads;
+}
+
+void Experiment::print() {
+ printf("strict = %d\n", strict);
+ printf("pointer_size = %d\n", pointer_size);
+ printf("sizeof(Chain) = %d\n", sizeof(Chain));
+ printf("sizeof(Chain *) = %d\n", sizeof(Chain *));
+ printf("bytes_per_line = %d\n", bytes_per_line);
+ printf("links_per_line = %d\n", links_per_line);
+ printf("bytes_per_page = %d\n", bytes_per_page);
+ printf("lines_per_page = %d\n", lines_per_page);
+ printf("links_per_page = %d\n", links_per_page);
+ printf("bytes_per_chain = %d\n", bytes_per_chain);
+ printf("lines_per_chain = %d\n", lines_per_chain);
+ printf("links_per_chain = %d\n", links_per_chain);
+ printf("pages_per_chain = %d\n", pages_per_chain);
+ printf("chains_per_thread = %d\n", chains_per_thread);
+ printf("bytes_per_thread = %d\n", bytes_per_thread);
+ printf("num_threads = %d\n", num_threads);
+ printf("bytes_per_test = %d\n", bytes_per_test);
+ printf("busy cycles = %d\n", busy_cycles);
+ printf("prefetch = %d\n", prefetch);
+ printf("iterations = %d\n", iterations);
+ printf("experiments = %d\n", experiments);
+ printf("access_pattern = %d\n", access_pattern);
+ printf("stride = %d\n", stride);
+ printf("output_mode = %d\n", output_mode);
+ printf("numa_placement = %d\n", numa_placement);
+ printf("offset_or_mask = %d\n", offset_or_mask);
+ printf("numa_max_domain = %d\n", numa_max_domain);
+ printf("num_numa_domains = %d\n", num_numa_domains);
+
+ for (int i = 0; i < this->num_threads; i++) {
+ printf("%d: ", this->thread_domain[i]);
+ for (int j = 0; j < this->chains_per_thread; j++) {
+ printf("%d,", this->chain_domain[i][j]);
+ }
+ printf("\n");
+ }
+
+ fflush(stdout);
+}
+
+const char* Experiment::access() {
+ const char* result = NULL;
+
+ if (this->access_pattern == RANDOM) {
+ result = "random";
+ } else if (this->access_pattern == STRIDED && 0 < this->stride) {
+ result = "forward";
+ } else if (this->access_pattern == STRIDED && this->stride < 0) {
+ result = "reverse";
+ } else if (this->access_pattern == STREAM) {
+ result = "stream";
+ }
+
+ return result;
+}
+
+const char* Experiment::placement() {
+ const char* result = NULL;
+
+ if (this->numa_placement == LOCAL) {
+ result = "local";
+ } else if (this->numa_placement == XOR) {
+ result = "xor";
+ } else if (this->numa_placement == ADD) {
+ result = "add";
+ } else if (this->numa_placement == MAP) {
+ result = "map";
+ }
+
+ return result;
+}