From 95e36280e5f825269babd2c322586b9f632f6148 Mon Sep 17 00:00:00 2001 From: Tim Besard Date: Mon, 7 Nov 2011 09:37:55 +0100 Subject: Lowercase is nicer. --- CMakeLists.txt | 20 +- src/Chain.h | 29 -- src/Experiment.cpp | 710 ---------------------------------------------- src/Experiment.h | 117 -------- src/Lock.cpp | 46 --- src/Lock.h | 41 --- src/Main.cpp | 95 ------- src/Output.cpp | 151 ---------- src/Output.h | 38 --- src/Run.cpp | 792 ---------------------------------------------------- src/Run.h | 62 ---- src/SpinBarrier.cpp | 54 ---- src/SpinBarrier.h | 50 ---- src/Thread.cpp | 77 ----- src/Thread.h | 68 ----- src/Timer.cpp | 168 ----------- src/Timer.h | 38 --- src/Types.h | 38 --- src/chain.h | 29 ++ src/experiment.cpp | 711 ++++++++++++++++++++++++++++++++++++++++++++++ src/experiment.h | 117 ++++++++ src/lock.cpp | 46 +++ src/lock.h | 40 +++ src/main.cpp | 95 +++++++ src/output.cpp | 151 ++++++++++ src/output.h | 38 +++ src/run.cpp | 792 ++++++++++++++++++++++++++++++++++++++++++++++++++++ src/run.h | 62 ++++ src/spinbarrier.cpp | 54 ++++ src/spinbarrier.h | 50 ++++ src/thread.cpp | 77 +++++ src/thread.h | 68 +++++ src/timer.cpp | 168 +++++++++++ src/timer.h | 38 +++ src/types.h | 38 +++ 35 files changed, 2584 insertions(+), 2584 deletions(-) delete mode 100644 src/Chain.h delete mode 100644 src/Experiment.cpp delete mode 100644 src/Experiment.h delete mode 100644 src/Lock.cpp delete mode 100644 src/Lock.h delete mode 100644 src/Main.cpp delete mode 100644 src/Output.cpp delete mode 100644 src/Output.h delete mode 100644 src/Run.cpp delete mode 100644 src/Run.h delete mode 100644 src/SpinBarrier.cpp delete mode 100644 src/SpinBarrier.h delete mode 100644 src/Thread.cpp delete mode 100644 src/Thread.h delete mode 100644 src/Timer.cpp delete mode 100644 src/Timer.h delete mode 100644 src/Types.h create mode 100644 src/chain.h create mode 100644 src/experiment.cpp create mode 100644 src/experiment.h create mode 100644 src/lock.cpp create mode 100644 src/lock.h create mode 100644 src/main.cpp create mode 100644 src/output.cpp create mode 100644 src/output.h create mode 100644 src/run.cpp create mode 100644 src/run.h create mode 100644 src/spinbarrier.cpp create mode 100644 src/spinbarrier.h create mode 100644 src/thread.cpp create mode 100644 src/thread.h create mode 100644 src/timer.cpp create mode 100644 src/timer.h create mode 100644 src/types.h diff --git a/CMakeLists.txt b/CMakeLists.txt index 6c88c0e..0bc1120 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -21,23 +21,23 @@ add_subdirectory(lib/AsmJit) # Code compilation # -add_library(Experiment src/Experiment.h src/Experiment.cpp) +add_library(experiment src/experiment.h src/experiment.cpp) -add_library(Thread src/Thread.h src/Thread.cpp) +add_library(thread src/thread.h src/thread.cpp) -add_library(Lock src/Lock.h src/Lock.cpp) +add_library(lock src/lock.h src/lock.cpp) -add_library(Output src/Output.h src/Output.cpp) +add_library(output src/output.h src/output.cpp) -add_library(Run src/Run.h src/Run.cpp) -target_link_libraries(Run Lock Thread) +add_library(run src/run.h src/run.cpp) +target_link_libraries(run lock thread) -add_library(SpinBarrier src/SpinBarrier.h src/SpinBarrier.cpp) +add_library(spinbarrier src/spinbarrier.h src/spinbarrier.cpp) -add_library(Timer src/Timer.h src/Timer.cpp) +add_library(timer src/timer.h src/timer.cpp) -add_executable (chase src/Main.cpp) -target_link_libraries(chase Run Timer Output Experiment SpinBarrier) +add_executable (chase src/main.cpp) +target_link_libraries(chase run timer output experiment spinbarrier) target_link_libraries(chase ${CMAKE_THREAD_LIBS_INIT}) if (USE_LIBNUMA) if(LIBNUMA) diff --git a/src/Chain.h b/src/Chain.h deleted file mode 100644 index f3ec83f..0000000 --- a/src/Chain.h +++ /dev/null @@ -1,29 +0,0 @@ -/******************************************************************************* - * Copyright (c) 2006 International Business Machines Corporation. * - * All rights reserved. This program and the accompanying materials * - * are made available under the terms of the Common Public License v1.0 * - * which accompanies this distribution, and is available at * - * http://www.opensource.org/licenses/cpl1.0.php * - * * - * Contributors: * - * Douglas M. Pase - initial API and implementation * - *******************************************************************************/ - -// -// Configuration -// - -// Include guard -#if !defined(Chain_h) -#define Chain_h - - -// -// Struct definition -// - -struct Chain { - Chain* next; -}; - -#endif diff --git a/src/Experiment.cpp b/src/Experiment.cpp deleted file mode 100644 index 701647c..0000000 --- a/src/Experiment.cpp +++ /dev/null @@ -1,710 +0,0 @@ -/******************************************************************************* - * Copyright (c) 2006 International Business Machines Corporation. * - * All rights reserved. This program and the accompanying materials * - * are made available under the terms of the Common Public License v1.0 * - * which accompanies this distribution, and is available at * - * http://www.opensource.org/licenses/cpl1.0.php * - * * - * Contributors: * - * Douglas M. Pase - initial API and implementation * - *******************************************************************************/ - -// -// Configuration -// - -// Implementation header -#include "Experiment.h" - -// System includes -#include -#include -#include -#include -#if defined(NUMA) -#include -#endif - - -// -// Implementation -// - -Experiment::Experiment() : - strict (0), - pointer_size (DEFAULT_POINTER_SIZE), - bytes_per_line (DEFAULT_BYTES_PER_LINE), - links_per_line (DEFAULT_LINKS_PER_LINE), - bytes_per_page (DEFAULT_BYTES_PER_PAGE), - lines_per_page (DEFAULT_LINES_PER_PAGE), - links_per_page (DEFAULT_LINKS_PER_PAGE), - bytes_per_chain (DEFAULT_BYTES_PER_CHAIN), - lines_per_chain (DEFAULT_LINES_PER_CHAIN), - links_per_chain (DEFAULT_LINKS_PER_CHAIN), - pages_per_chain (DEFAULT_PAGES_PER_CHAIN), - chains_per_thread(DEFAULT_CHAINS_PER_THREAD), - bytes_per_thread (DEFAULT_BYTES_PER_THREAD), - num_threads (DEFAULT_THREADS), - bytes_per_test (DEFAULT_BYTES_PER_TEST), - busy_cycles (DEFAULT_BUSY_CYCLES), - seconds (DEFAULT_SECONDS), - iterations (DEFAULT_ITERATIONS), - experiments (DEFAULT_EXPERIMENTS), - prefetch (DEFAULT_PREFETCH), - output_mode (TABLE), - access_pattern (RANDOM), - stride (1), - numa_placement (LOCAL), - offset_or_mask (0), - placement_map (NULL), - thread_domain (NULL), - chain_domain (NULL), - numa_max_domain (0), - num_numa_domains (1) -{ -} - -Experiment::~Experiment() { -} - -// interface: -// -// -l or --line bytes per cache line (line size) -// -p or --page bytes per page (page size) -// -c or --chain bytes per chain (used to compute pages per chain) -// -r or --references chains per thread (memory loading) -// -t or --threads number of threads (concurrency and contention) -// -i or --iters iterations -// -e or --experiments experiments -// -b or --busy amount of cycles processor should remain busy -// -f or --prefetch prefetch data -// -a or --access memory access pattern -// random random access pattern -// forward exclusive OR and mask -// reverse addition and offset -// -o or --output output mode -// hdr header only -// csv csv only -// both header + csv -// table human-readable table of values -// -n or --numa numa placement -// local local allocation of all chains -// xor exclusive OR and mask -// add addition and offset -// map explicit mapping of threads and chains to domains - -int Experiment::parse_args(int argc, char* argv[]) { - int error = 0; - for (int i = 1; i < argc; i++) { - if (strcasecmp(argv[i], "-x") == 0 - || strcasecmp(argv[i], "--strict") == 0) { - this->strict = 1; - } else if (strcasecmp(argv[i], "-s") == 0 - || strcasecmp(argv[i], "--seconds") == 0) { - i++; - if (i == argc) { - error = 1; - break; - } - this->seconds = Experiment::parse_real(argv[i]); - this->iterations = 0; - if (this->seconds == 0) { - error = 1; - break; - } - } else if (strcasecmp(argv[i], "-l") == 0 - || strcasecmp(argv[i], "--line") == 0) { - i++; - if (i == argc) { - error = 1; - break; - } - this->bytes_per_line = Experiment::parse_number(argv[i]); - if (this->bytes_per_line == 0) { - error = 1; - break; - } - } else if (strcasecmp(argv[i], "-p") == 0 - || strcasecmp(argv[i], "--page") == 0) { - i++; - if (i == argc) { - error = 1; - break; - } - this->bytes_per_page = Experiment::parse_number(argv[i]); - if (this->bytes_per_page == 0) { - error = 1; - break; - } - } else if (strcasecmp(argv[i], "-c") == 0 - || strcasecmp(argv[i], "--chain") == 0) { - i++; - if (i == argc) { - error = 1; - break; - } - this->bytes_per_chain = Experiment::parse_number(argv[i]); - if (this->bytes_per_chain == 0) { - error = 1; - break; - } - } else if (strcasecmp(argv[i], "-r") == 0 - || strcasecmp(argv[i], "--references") == 0) { - i++; - if (i == argc) { - error = 1; - break; - } - this->chains_per_thread = Experiment::parse_number(argv[i]); - if (this->chains_per_thread == 0) { - error = 1; - break; - } - } else if (strcasecmp(argv[i], "-t") == 0 - || strcasecmp(argv[i], "--threads") == 0) { - i++; - if (i == argc) { - error = 1; - break; - } - this->num_threads = Experiment::parse_number(argv[i]); - if (this->num_threads == 0) { - error = 1; - break; - } - } else if (strcasecmp(argv[i], "-i") == 0 - || strcasecmp(argv[i], "--iterations") == 0) { - i++; - if (i == argc) { - error = 1; - break; - } - this->iterations = Experiment::parse_number(argv[i]); - this->seconds = 0; - if (this->iterations == 0) { - error = 1; - break; - } - } else if (strcasecmp(argv[i], "-e") == 0 - || strcasecmp(argv[i], "--experiments") == 0) { - i++; - if (i == argc) { - error = 1; - break; - } - this->experiments = Experiment::parse_number(argv[i]); - if (this->experiments == 0) { - error = 1; - break; - } - } else if (strcasecmp(argv[i], "-b") == 0 - || strcasecmp(argv[i], "--busy") == 0) { - i++; - if (i == argc) { - error = 1; - break; - } - this->busy_cycles = Experiment::parse_number(argv[i]); - if (this->experiments == 0) { - error = 1; - break; - } - } else if (strcasecmp(argv[i], "-f") == 0 - || strcasecmp(argv[i], "--prefetch") == 0) { - this->prefetch = true; - } else if (strcasecmp(argv[i], "-a") == 0 - || strcasecmp(argv[i], "--access") == 0) { - i++; - if (i == argc) { - error = 1; - break; - } - if (strcasecmp(argv[i], "random") == 0) { - this->access_pattern = RANDOM; - } else if (strcasecmp(argv[i], "forward") == 0) { - this->access_pattern = STRIDED; - i++; - if (i == argc) { - error = 1; - break; - } - this->stride = Experiment::parse_number(argv[i]); - if (this->stride == 0) { - error = 1; - break; - } - } else if (strcasecmp(argv[i], "reverse") == 0) { - this->access_pattern = STRIDED; - i++; - if (i == argc) { - error = 1; - break; - } - this->stride = -Experiment::parse_number(argv[i]); - if (this->stride == 0) { - error = 1; - break; - } - } else if (strcasecmp(argv[i], "stream") == 0) { - this->access_pattern = STREAM; - i++; - if (i == argc) { - error = 1; - break; - } - this->stride = Experiment::parse_number(argv[i]); - if (this->stride == 0) { - error = 1; - break; - } - } else { - error = 1; - break; - } - } else if (strcasecmp(argv[i], "-o") == 0 - || strcasecmp(argv[i], "--output") == 0) { - i++; - if (i == argc) { - error = 1; - break; - } - if (strcasecmp(argv[i], "table") == 0) { - this->output_mode = TABLE; - } else if (strcasecmp(argv[i], "csv") == 0) { - this->output_mode = CSV; - } else if (strcasecmp(argv[i], "both") == 0) { - this->output_mode = BOTH; - } else if (strcasecmp(argv[i], "hdr") == 0) { - this->output_mode = HEADER; - } else if (strcasecmp(argv[i], "header") == 0) { - this->output_mode = HEADER; - } else { - error = 1; - break; - } - } else if (strcasecmp(argv[i], "-n") == 0 - || strcasecmp(argv[i], "--numa") == 0) { - i++; - if (i == argc) { - error = 1; - break; - } - if (strcasecmp(argv[i], "local") == 0) { - this->numa_placement = LOCAL; - } else if (strcasecmp(argv[i], "xor") == 0) { - this->numa_placement = XOR; - i++; - if (i == argc) { - error = 1; - break; - } - this->offset_or_mask = Experiment::parse_number(argv[i]); - } else if (strcasecmp(argv[i], "add") == 0) { - this->numa_placement = ADD; - i++; - if (i == argc) { - error = 1; - break; - } - this->offset_or_mask = Experiment::parse_number(argv[i]); - } else if (strcasecmp(argv[i], "map") == 0) { - this->numa_placement = MAP; - i++; - if (i == argc) { - error = 1; - break; - } - this->placement_map = argv[i]; - } else { - error = 1; - break; - } - } else { - error = 1; - break; - } - } - - - // if we've hit an error, print a message and quit - if (error) { - printf("usage: %s \n", argv[0]); - printf("where are selected from the following:\n"); - printf(" [-h|--help] # this message\n"); - printf(" [-l|--line] # bytes per cache line (cache line size)\n"); - printf(" [-p|--page] # bytes per page (page size)\n"); - printf(" [-c|--chain] # bytes per chain (used to compute pages per chain)\n"); - printf(" [-r|--references] # chains per thread (memory loading)\n"); - printf(" [-t|--threads] # number of threads (concurrency and contention)\n"); - printf(" [-i|--iterations] # iterations per experiment\n"); - printf(" [-e|--experiments] # experiments\n"); - printf(" [-a|--access] # memory access pattern\n"); - printf(" [-o|--output] # output format\n"); - printf(" [-n|--numa] # numa placement\n"); - printf(" [-s|--seconds] # run each experiment for seconds\n"); - printf(" [-b|--busy] # how much processing cycles each loop should count\n"); - printf(" [-f|--prefetch] # prefetch data\n"); - printf(" [-x|--strict] # fail rather than adjust options to sensible values\n"); - printf("\n"); - printf(" is selected from the following:\n"); - printf(" random # all chains are accessed randomly\n"); - printf(" forward # chains are in forward order with constant stride\n"); - printf(" reverse # chains are in reverse order with constant stride\n"); - printf(" stream # references are calculated rather than read from memory\n"); - printf("\n"); - printf("Note: is always a small positive integer.\n"); - printf("\n"); - printf(" is selected from the following:\n"); - printf(" hdr # csv header only\n"); - printf(" csv # results in csv format only\n"); - printf(" both # header and results in csv format\n"); - printf(" table # human-readable table of values\n"); - printf("\n"); - printf(" is selected from the following:\n"); - printf(" local # all chains are allocated locally\n"); - printf(" xor # exclusive OR and mask\n"); - printf(" add # addition and offset\n"); - printf(" map # explicit mapping of threads and chains to domains\n"); - printf("\n"); - printf(" has the form \"t1:c11,c12,...,c1m;t2:c21,...,c2m;...;tn:cn1,...,cnm\"\n"); - printf("where t[i] is the NUMA domain where the ith thread is run,\n"); - printf("and c[i][j] is the NUMA domain where the jth chain in the ith thread is allocated.\n"); - printf("(The values t[i] and c[i][j] must all be zero or small positive integers.)\n"); - printf("\n"); - printf("Note: for maps, each thread must have the same number of chains,\n"); - printf("maps override the -t or --threads specification,\n"); - printf("NUMA domains are whole numbers in the range of 0..N, and\n"); - printf("thread or chain domains that exceed the maximum NUMA domain\n"); - printf("are wrapped around using a MOD function.\n"); - printf("\n"); - printf("To determine the number of NUMA domains currently available\n"); - printf("on your system, use a command such as \"numastat\".\n"); - printf("\n"); - printf("Final note: strict is not yet fully implemented, and\n"); - printf("maps do not gracefully handle ill-formed map specifications.\n"); - - return 1; - } - - - // STRICT -- fail if specifications are inconsistent - - // compute lines per page and lines per chain - // based on input and defaults. - // we round up page and chain sizes when needed. - this->lines_per_page = (this->bytes_per_page+this->bytes_per_line-1) / this->bytes_per_line; - this->bytes_per_page = this->bytes_per_line * this->lines_per_page; - this->pages_per_chain = (this->bytes_per_chain+this->bytes_per_page-1) / this->bytes_per_page; - this->bytes_per_chain = this->bytes_per_page * this->pages_per_chain; - this->bytes_per_thread = this->bytes_per_chain * this->chains_per_thread; - this->bytes_per_test = this->bytes_per_thread * this->num_threads; - this->links_per_line = this->bytes_per_line / pointer_size; - this->links_per_page = this->lines_per_page * this->links_per_line; - this->lines_per_chain = this->lines_per_page * this->pages_per_chain; - this->links_per_chain = this->lines_per_chain * this->links_per_line; - - - // allocate the chain roots for all threads - // and compute the chain locations - // (the chains themselves are initialized by the threads) - switch (this->numa_placement) { - case LOCAL: - case XOR: - case ADD: - this->thread_domain = new int32[this->num_threads]; - this->chain_domain = new int32*[this->num_threads]; - this->random_state = new char*[this->num_threads]; - - for (int i = 0; i < this->num_threads; i++) { - this->chain_domain[i] = new int32[this->chains_per_thread]; - - const int state_size = 256; - this->random_state[i] = new char[state_size]; - initstate((unsigned int) i, (char *) this->random_state[i], - (size_t) state_size); - } - break; - } - -#if defined(NUMA) - this->numa_max_domain = numa_max_node(); - this->num_numa_domains = this->numa_max_domain + 1; -#endif - - switch (this->numa_placement) { - case LOCAL: - default: - this->alloc_local(); - break; - case XOR: - this->alloc_xor(); - break; - case ADD: - this->alloc_add(); - break; - case MAP: - this->alloc_map(); - break; - } - - return 0; -} - -int64 Experiment::parse_number(const char* s) { - int64 result = 0; - - int len = strlen(s); - for (int i = 0; i < len; i++) { - if ('0' <= s[i] && s[i] <= '9') { - result = result * 10 + s[i] - '0'; - } else if (s[i] == 'k' || s[i] == 'K') { - result = result << 10; - break; - } else if (s[i] == 'm' || s[i] == 'M') { - result = result << 20; - break; - } else if (s[i] == 'g' || s[i] == 'G') { - result = result << 30; - break; - } else if (s[i] == 't' || s[i] == 'T') { - result = result << 40; - break; - } else { - break; - } - } - - return result; -} - -float Experiment::parse_real(const char* s) { - float result = 0; - bool decimal = false; - float power = 1; - - int len = strlen(s); - for (int i = 0; i < len; i++) { - if ('0' <= s[i] && s[i] <= '9') { - if (!decimal) { - result = result * 10 + s[i] - '0'; - } else { - power = power / 10; - result = result + (s[i] - '0') * power; - } - } else if ('.' == s[i]) { - decimal = true; - } else { - break; - } - } - - return result; -} - -void Experiment::alloc_local() { - for (int i = 0; i < this->num_threads; i++) { - this->thread_domain[i] = i % this->num_numa_domains; - for (int j = 0; j < this->chains_per_thread; j++) { - this->chain_domain[i][j] = this->thread_domain[i]; - } - } -} - -void Experiment::alloc_xor() { - for (int i = 0; i < this->num_threads; i++) { - this->thread_domain[i] = i % this->num_numa_domains; - for (int j = 0; j < this->chains_per_thread; j++) { - this->chain_domain[i][j] = (this->thread_domain[i] - ^ this->offset_or_mask) % this->num_numa_domains; - } - } -} - -void Experiment::alloc_add() { - for (int i = 0; i < this->num_threads; i++) { - this->thread_domain[i] = i % this->num_numa_domains; - for (int j = 0; j < this->chains_per_thread; j++) { - this->chain_domain[i][j] = (this->thread_domain[i] - + this->offset_or_mask) % this->num_numa_domains; - } - } -} - -// DOES NOT HANDLE ILL-FORMED SPECIFICATIONS -void Experiment::alloc_map() { - // STRICT -- fail if specifications are inconsistent - - // maps look like "t1:c11,c12,...,c1m;t2:c21,...,c2m;...;tn:cn1,...,cnm" - // where t[i] is the thread domain of the ith thread, - // and c[i][j] is the chain domain of the jth chain in the ith thread - - // count the thread descriptors by counting ";" up to EOS - int threads = 1; - char *p = this->placement_map; - while (*p != '\0') { - if (*p == ';') - threads += 1; - p++; - } - int thread_domain[threads]; - - // count the chain descriptors by counting "," up to ";" or EOS - int chains = 1; - p = this->placement_map; - while (*p != '\0') { - if (*p == ';') - break; - if (*p == ',') - chains += 1; - p++; - } - int chain_domain[threads][chains]; - - int t = 0, c = 0; - p = this->placement_map; - while (*p != '\0') { - // everything up to ":" is the thread domain - int i = 0; - char buf[64]; - while (*p != '\0') { - if (*p == ':') { - p++; - break; - } - buf[i] = *p; - i++; - p++; - } - buf[i] = '\0'; - thread_domain[t] = Experiment::parse_number(buf); - - // search for one or several ',' - c = 0; - while (*p != '\0' && *p != ';') { - if (chains <= c || threads <= t) { - // error in the thread/chain specification - fprintf(stderr, "Malformed map.\n"); - exit(1); - } - int i = 0; - while (*p != '\0' && *p != ';') { - if (*p == ',') { - p++; - break; - } - buf[i] = *p; - i++; - p++; - } - buf[i] = '\0'; - chain_domain[t][c] = Experiment::parse_number(buf); - c++; - } - - if (*p == '\0') - break; - if (*p == ';') - p++; - t++; - } - - this->num_threads = threads; - this->chains_per_thread = chains; - - this->thread_domain = new int32[this->num_threads]; - this->chain_domain = new int32*[this->num_threads]; - this->random_state = new char*[this->num_threads]; - - for (int i = 0; i < this->num_threads; i++) { - this->thread_domain[i] = thread_domain[i] % this->num_numa_domains; - - const int state_size = 256; - this->random_state[i] = new char[state_size]; - initstate((unsigned int) i, (char *) this->random_state[i], - (size_t) state_size); - - this->chain_domain[i] = new int32[this->chains_per_thread]; - for (int j = 0; j < this->chains_per_thread; j++) { - this->chain_domain[i][j] = chain_domain[i][j] - % this->num_numa_domains; - } - } - - this->bytes_per_thread = this->bytes_per_chain * this->chains_per_thread; - this->bytes_per_test = this->bytes_per_thread * this->num_threads; -} - -#include "Chain.h" - -void Experiment::print() { - printf("strict = %d\n", strict); - printf("pointer_size = %d\n", pointer_size); - printf("sizeof(Chain) = %d\n", sizeof(Chain)); - printf("sizeof(Chain *) = %d\n", sizeof(Chain *)); - printf("bytes_per_line = %d\n", bytes_per_line); - printf("links_per_line = %d\n", links_per_line); - printf("bytes_per_page = %d\n", bytes_per_page); - printf("lines_per_page = %d\n", lines_per_page); - printf("links_per_page = %d\n", links_per_page); - printf("bytes_per_chain = %d\n", bytes_per_chain); - printf("lines_per_chain = %d\n", lines_per_chain); - printf("links_per_chain = %d\n", links_per_chain); - printf("pages_per_chain = %d\n", pages_per_chain); - printf("chains_per_thread = %d\n", chains_per_thread); - printf("bytes_per_thread = %d\n", bytes_per_thread); - printf("num_threads = %d\n", num_threads); - printf("bytes_per_test = %d\n", bytes_per_test); - printf("busy cycles = %d\n", busy_cycles); - printf("prefetch = %d\n", prefetch); - printf("iterations = %d\n", iterations); - printf("experiments = %d\n", experiments); - printf("access_pattern = %d\n", access_pattern); - printf("stride = %d\n", stride); - printf("output_mode = %d\n", output_mode); - printf("numa_placement = %d\n", numa_placement); - printf("offset_or_mask = %d\n", offset_or_mask); - printf("numa_max_domain = %d\n", numa_max_domain); - printf("num_numa_domains = %d\n", num_numa_domains); - - for (int i = 0; i < this->num_threads; i++) { - printf("%d: ", this->thread_domain[i]); - for (int j = 0; j < this->chains_per_thread; j++) { - printf("%d,", this->chain_domain[i][j]); - } - printf("\n"); - } - - fflush(stdout); -} - -const char* Experiment::access() { - const char* result = NULL; - - if (this->access_pattern == RANDOM) { - result = "random"; - } else if (this->access_pattern == STRIDED && 0 < this->stride) { - result = "forward"; - } else if (this->access_pattern == STRIDED && this->stride < 0) { - result = "reverse"; - } else if (this->access_pattern == STREAM) { - result = "stream"; - } - - return result; -} - -const char* Experiment::placement() { - const char* result = NULL; - - if (this->numa_placement == LOCAL) { - result = "local"; - } else if (this->numa_placement == XOR) { - result = "xor"; - } else if (this->numa_placement == ADD) { - result = "add"; - } else if (this->numa_placement == MAP) { - result = "map"; - } - - return result; -} diff --git a/src/Experiment.h b/src/Experiment.h deleted file mode 100644 index b6a2bf5..0000000 --- a/src/Experiment.h +++ /dev/null @@ -1,117 +0,0 @@ -/******************************************************************************* - * Copyright (c) 2006 International Business Machines Corporation. * - * All rights reserved. This program and the accompanying materials * - * are made available under the terms of the Common Public License v1.0 * - * which accompanies this distribution, and is available at * - * http://www.opensource.org/licenses/cpl1.0.php * - * * - * Contributors: * - * Douglas M. Pase - initial API and implementation * - *******************************************************************************/ - -// -// Configuration -// - -// Include guard -#if !defined(Experiment_h) -#define Experiment_h - -// Local includes -#include "Chain.h" -#include "Types.h" - - -// -// Class definition -// - -class Experiment { -public: - Experiment(); - ~Experiment(); - - int parse_args(int argc, char* argv[]); - int64 parse_number(const char* s); - float parse_real(const char* s); - - const char* placement(); - const char* access(); - - // fundamental parameters - int64 pointer_size; // number of bytes in a pointer - int64 bytes_per_line; // working set cache line size (bytes) - int64 links_per_line; // working set cache line size (links) - int64 bytes_per_page; // working set page size (in bytes) - int64 lines_per_page; // working set page size (in lines) - int64 links_per_page; // working set page size (in links) - int64 bytes_per_chain; // working set chain size (bytes) - int64 lines_per_chain; // working set chain size (lines) - int64 links_per_chain; // working set chain size (links) - int64 pages_per_chain; // working set chain size (pages) - int64 bytes_per_thread; // thread working set size (bytes) - int64 chains_per_thread;// memory loading per thread - int64 num_threads; // number of threads in the experiment - int64 bytes_per_test; // test working set size (bytes) - int64 busy_cycles; // processing cycles - bool prefetch; // use of prefetching - - float seconds; // number of seconds per experiment - int64 iterations; // number of iterations per experiment - int64 experiments; // number of experiments per test - - enum { CSV, BOTH, HEADER, TABLE } - output_mode; // results output mode - - enum { RANDOM, STRIDED, STREAM } - access_pattern; // memory access pattern - int64 stride; - - enum { LOCAL, XOR, ADD, MAP } - numa_placement; // memory allocation mode - int64 offset_or_mask; - char* placement_map; - - // maps threads and chains to numa domains - int32* thread_domain; // thread_domain[thread] - int32** chain_domain; // chain_domain[thread][chain] - int32 numa_max_domain; // highest numa domain id - int32 num_numa_domains; // number of numa domains - - char** random_state; // random state for each thread - - int strict; // strictly adhere to user input, or fail - - const static int32 DEFAULT_POINTER_SIZE = sizeof(Chain); - const static int32 DEFAULT_BYTES_PER_LINE = 64; - const static int32 DEFAULT_LINKS_PER_LINE = DEFAULT_BYTES_PER_LINE / DEFAULT_POINTER_SIZE; - const static int32 DEFAULT_BYTES_PER_PAGE = 4096; - const static int32 DEFAULT_LINES_PER_PAGE = DEFAULT_BYTES_PER_PAGE / DEFAULT_BYTES_PER_LINE; - const static int32 DEFAULT_LINKS_PER_PAGE = DEFAULT_LINES_PER_PAGE * DEFAULT_LINKS_PER_LINE; - const static int32 DEFAULT_PAGES_PER_CHAIN = 4096; - const static int32 DEFAULT_BYTES_PER_CHAIN = DEFAULT_BYTES_PER_PAGE * DEFAULT_PAGES_PER_CHAIN; - const static int32 DEFAULT_LINES_PER_CHAIN = DEFAULT_LINES_PER_PAGE * DEFAULT_PAGES_PER_CHAIN; - const static int32 DEFAULT_LINKS_PER_CHAIN = DEFAULT_LINES_PER_CHAIN * DEFAULT_BYTES_PER_LINE / DEFAULT_POINTER_SIZE; - const static int32 DEFAULT_CHAINS_PER_THREAD = 1; - const static int32 DEFAULT_BYTES_PER_THREAD = DEFAULT_BYTES_PER_CHAIN * DEFAULT_CHAINS_PER_THREAD; - const static int32 DEFAULT_THREADS = 1; - const static int32 DEFAULT_BYTES_PER_TEST = DEFAULT_BYTES_PER_THREAD * DEFAULT_THREADS; - const static int32 DEFAULT_BUSY_CYCLES = 0; - const static int32 DEFAULT_SECONDS = 1; - const static int32 DEFAULT_ITERATIONS = 0; - const static int32 DEFAULT_EXPERIMENTS = 1; - - const static int32 DEFAULT_OUTPUT_MODE = 1; - const static bool DEFAULT_PREFETCH = false; - - void alloc_local(); - void alloc_xor(); - void alloc_add(); - void alloc_map(); - - void print(); - -private: -}; - -#endif diff --git a/src/Lock.cpp b/src/Lock.cpp deleted file mode 100644 index c4262e2..0000000 --- a/src/Lock.cpp +++ /dev/null @@ -1,46 +0,0 @@ -/******************************************************************************* - * Copyright (c) 2006 International Business Machines Corporation. * - * All rights reserved. This program and the accompanying materials * - * are made available under the terms of the Common Public License v1.0 * - * which accompanies this distribution, and is available at * - * http://www.opensource.org/licenses/cpl1.0.php * - * * - * Contributors: * - * Douglas M. Pase - initial API and implementation * - *******************************************************************************/ - -// -// Configuration -// - -// Implementation header -#include "Lock.h" - -// System includes -#include - - -// -// Implementation -// - -Lock::Lock() { - pthread_mutex_init(&(this->mutex), NULL); -} - -Lock::~Lock() { - pthread_mutex_destroy(&(this->mutex)); -} - -void Lock::lock() { - pthread_mutex_lock(&(this->mutex)); -} - -int Lock::test() { - pthread_mutex_trylock(&(this->mutex)); -} - -void Lock::unlock() { - pthread_mutex_unlock(&(this->mutex)); -} - diff --git a/src/Lock.h b/src/Lock.h deleted file mode 100644 index 46bd04e..0000000 --- a/src/Lock.h +++ /dev/null @@ -1,41 +0,0 @@ -/******************************************************************************* - * Copyright (c) 2006 International Business Machines Corporation. * - * All rights reserved. This program and the accompanying materials * - * are made available under the terms of the Common Public License v1.0 * - * which accompanies this distribution, and is available at * - * http://www.opensource.org/licenses/cpl1.0.php * - * * - * Contributors: * - * Douglas M. Pase - initial API and implementation * - *******************************************************************************/ - -// -// Configuration -// - - -// Include guard -#if !defined(Lock_h) -#define Lock_h - -// System includes -#include - - -// -// Class definition -// - -class Lock { -public: - Lock(); - ~Lock(); - void lock(); - int test(); - void unlock(); - -private: - pthread_mutex_t mutex; -}; - -#endif diff --git a/src/Main.cpp b/src/Main.cpp deleted file mode 100644 index 17ea869..0000000 --- a/src/Main.cpp +++ /dev/null @@ -1,95 +0,0 @@ -/******************************************************************************* - * Copyright (c) 2006 International Business Machines Corporation. * - * All rights reserved. This program and the accompanying materials * - * are made available under the terms of the Common Public License v1.0 * - * which accompanies this distribution, and is available at * - * http://www.opensource.org/licenses/cpl1.0.php * - * * - * Contributors: * - * Douglas M. Pase - initial API and implementation * - *******************************************************************************/ - -// -// Configuration -// - -// System includes -#include - -// Local includes -#include "Run.h" -#include "Timer.h" -#include "Types.h" -#include "Output.h" -#include "Experiment.h" - -// This program allocates and accesses -// a number of blocks of memory, one or more -// for each thread that executes. Blocks -// are divided into sub-blocks called -// pages, and pages are divided into -// sub-blocks called cache lines. -// -// All pages are collected into a list. -// Pages are selected for the list in -// a particular order. Each cache line -// within the page is similarly gathered -// into a list in a particular order. -// In both cases the order may be random -// or linear. -// -// A root pointer points to the first -// cache line. A pointer in the cache -// line points to the next cache line, -// which contains a pointer to the cache -// line after that, and so on. This -// forms a pointer chain that touches all -// cache lines within the first page, -// then all cache lines within the second -// page, and so on until all pages are -// covered. The last pointer contains -// NULL, terminating the chain. -// -// Depending on compile-time options, -// pointers may be 32-bit or 64-bit -// pointers. - - -// -// Implementation -// - -int verbose = 0; - -int main(int argc, char* argv[]) { - Timer::calibrate(10000); - double clk_res = Timer::resolution(); - - Experiment e; - if (e.parse_args(argc, argv)) { - return 0; - } - -#if defined(UNDEFINED) - e.print(); - if (argv != NULL) return 0; -#endif - - SpinBarrier sb(e.num_threads); - Run r[e.num_threads]; - for (int i = 0; i < e.num_threads; i++) { - r[i].set(e, &sb); - r[i].start(); - } - - for (int i = 0; i < e.num_threads; i++) { - r[i].wait(); - } - - int64 ops = Run::ops_per_chain(); - double secs = Run::seconds(); - - Output::print(e, ops, secs, clk_res); - - return 0; -} diff --git a/src/Output.cpp b/src/Output.cpp deleted file mode 100644 index bfffe0d..0000000 --- a/src/Output.cpp +++ /dev/null @@ -1,151 +0,0 @@ -/******************************************************************************* - * Copyright (c) 2006 International Business Machines Corporation. * - * All rights reserved. This program and the accompanying materials * - * are made available under the terms of the Common Public License v1.0 * - * which accompanies this distribution, and is available at * - * http://www.opensource.org/licenses/cpl1.0.php * - * * - * Contributors: * - * Douglas M. Pase - initial API and implementation * - *******************************************************************************/ - -// -// Configuration -// - -// Implementation header -#include "Output.h" - -// System includes -#include -#include -#include - - -// -// Implementation -// - -void Output::print(Experiment &e, int64 ops, double secs, double ck_res) { - if (e.output_mode == Experiment::CSV) { - Output::csv(e, ops, secs, ck_res); - } else if (e.output_mode == Experiment::BOTH) { - Output::header(e, ops, secs, ck_res); - Output::csv(e, ops, secs, ck_res); - } else if (e.output_mode == Experiment::HEADER) { - Output::header(e, ops, secs, ck_res); - } else { - Output::table(e, ops, secs, ck_res); - } -} - -void Output::header(Experiment &e, int64 ops, double secs, double ck_res) { - printf("pointer size (bytes),"); - printf("cache line size (bytes),"); - printf("page size (bytes),"); - printf("chain size (bytes),"); - printf("thread size (bytes),"); - printf("test size (bytes),"); - printf("chains per thread,"); - printf("number of threads,"); - printf("iterations,"); - printf("experiments,"); - printf("access pattern,"); - printf("stride,"); - printf("numa placement,"); - printf("offset or mask,"); - printf("numa domains,"); - printf("domain map,"); - printf("operations per chain,"); - printf("total operations,"); - printf("elapsed time (seconds),"); - printf("elapsed time (timer ticks),"); - printf("clock resolution (ns),", ck_res * 1E9); - printf("memory latency (ns),"); - printf("memory bandwidth (MB/s)\n"); - - fflush(stdout); -} - -void Output::csv(Experiment &e, int64 ops, double secs, double ck_res) { - printf("%ld,", e.pointer_size); - printf("%ld,", e.bytes_per_line); - printf("%ld,", e.bytes_per_page); - printf("%ld,", e.bytes_per_chain); - printf("%ld,", e.bytes_per_thread); - printf("%ld,", e.bytes_per_test); - printf("%lld,", e.chains_per_thread); - printf("%ld,", e.num_threads); - printf("%ld,", e.iterations); - printf("%ld,", e.experiments); - printf("%s,", e.access()); - printf("%ld,", e.stride); - printf("%s,", e.placement()); - printf("%ld,", e.offset_or_mask); - printf("%ld,", e.num_numa_domains); - printf("\""); - printf("%d:", e.thread_domain[0]); - printf("%d", e.chain_domain[0][0]); - for (int j = 1; j < e.chains_per_thread; j++) { - printf(",%d", e.chain_domain[0][j]); - } - for (int i = 1; i < e.num_threads; i++) { - printf(";%d:", e.thread_domain[i]); - printf("%d", e.chain_domain[i][0]); - for (int j = 1; j < e.chains_per_thread; j++) { - printf(",%d", e.chain_domain[i][j]); - } - } - printf("\","); - printf("%ld,", ops); - printf("%ld,", ops * e.chains_per_thread * e.num_threads); - printf("%.3f,", secs); - printf("%.0f,", secs/ck_res); - printf("%.2f,", ck_res * 1E9); - printf("%.2f,", (secs / (ops * e.iterations)) * 1E9); - printf("%.3f\n", ((ops * e.iterations * e.chains_per_thread * e.num_threads * e.bytes_per_line) / secs) * 1E-6); - - fflush(stdout); -} - -void Output::table(Experiment &e, int64 ops, double secs, double ck_res) { - printf("pointer size = %ld (bytes)\n", e.pointer_size); - printf("cache line size = %ld (bytes)\n", e.bytes_per_line); - printf("page size = %ld (bytes)\n", e.bytes_per_page); - printf("chain size = %ld (bytes)\n", e.bytes_per_chain); - printf("thread size = %ld (bytes)\n", e.bytes_per_thread); - printf("test size = %ld (bytes)\n", e.bytes_per_test); - printf("chains per thread = %ld\n", e.chains_per_thread); - printf("number of threads = %ld\n", e.num_threads); - printf("iterations = %ld\n", e.iterations); - printf("experiments = %ld\n", e.experiments); - printf("access pattern = %s\n", e.access()); - printf("stride = %ld\n", e.stride); - printf("numa placement = %s\n", e.placement()); - printf("offset or mask = %ld\n", e.offset_or_mask); - printf("numa domains = %ld\n", e.num_numa_domains); - printf("domain map = "); - printf("\""); - printf("%d:", e.thread_domain[0]); - printf("%d", e.chain_domain[0][0]); - for (int j = 1; j < e.chains_per_thread; j++) { - printf(",%d", e.chain_domain[0][j]); - } - for (int i = 1; i < e.num_threads; i++) { - printf(";%d:", e.thread_domain[i]); - printf("%d", e.chain_domain[i][0]); - for (int j = 1; j < e.chains_per_thread; j++) { - printf(",%d", e.chain_domain[i][j]); - } - } - printf("\"\n"); - printf("operations per chain = %ld\n", ops); - printf("total operations = %ld\n", ops * e.chains_per_thread * e.num_threads); - printf("elapsed time = %.3f (seconds)\n", secs); - printf("elapsed time = %.0f (timer ticks)\n", secs/ck_res); - printf("clock resolution = %.2f (ns)\n", ck_res * 1E9); - printf("memory latency = %.2f (ns)\n", (secs / (ops * e.iterations)) * 1E9); - printf("memory bandwidth = %.3f (MB/s)\n", ((ops * e.iterations * e.chains_per_thread * e.num_threads * e.bytes_per_line) / secs) * 1E-6); - - fflush(stdout); -} diff --git a/src/Output.h b/src/Output.h deleted file mode 100644 index db7d912..0000000 --- a/src/Output.h +++ /dev/null @@ -1,38 +0,0 @@ -/******************************************************************************* - * Copyright (c) 2006 International Business Machines Corporation. * - * All rights reserved. This program and the accompanying materials * - * are made available under the terms of the Common Public License v1.0 * - * which accompanies this distribution, and is available at * - * http://www.opensource.org/licenses/cpl1.0.php * - * * - * Contributors: * - * Douglas M. Pase - initial API and implementation * - *******************************************************************************/ - -// -// Configuration -// - -// Include guard -#if !defined(Output_h) -#define Output_h - -// Local includes -#include "Types.h" -#include "Experiment.h" - - -// -// Class definition -// - -class Output { -public: - static void print(Experiment &e, int64 ops, double secs, double ck_res); - static void header(Experiment &e, int64 ops, double secs, double ck_res); - static void csv(Experiment &e, int64 ops, double secs, double ck_res); - static void table(Experiment &e, int64 ops, double secs, double ck_res); -private: -}; - -#endif diff --git a/src/Run.cpp b/src/Run.cpp deleted file mode 100644 index 0de55c8..0000000 --- a/src/Run.cpp +++ /dev/null @@ -1,792 +0,0 @@ -/******************************************************************************* - * Copyright (c) 2006 International Business Machines Corporation. * - * All rights reserved. This program and the accompanying materials * - * are made available under the terms of the Common Public License v1.0 * - * which accompanies this distribution, and is available at * - * http://www.opensource.org/licenses/cpl1.0.php * - * * - * Contributors: * - * Douglas M. Pase - initial API and implementation * - *******************************************************************************/ - -// -// Configuration -// - -// Implementation header -#include "Run.h" - -// System includes -#include -#include -#include -#include -#include -#if defined(NUMA) -#include -#endif - -// Local includes -#include -#include "Timer.h" - - -// -// Implementation -// - -static double max(double v1, double v2); -static double min(double v1, double v2); -typedef void (*benchmark)(const Chain**); -typedef benchmark (*generator)(int64 chains_per_thread, - int64 bytes_per_line, int64 bytes_per_chain, - int64 stride, int64 busy_cycles, bool prefetch); -static benchmark chase_pointers(int64 chains_per_thread, - int64 bytes_per_line, int64 bytes_per_chain, - int64 stride, int64 busy_cycles, bool prefetch); -static benchmark follow_streams(int64 chains_per_thread, - int64 bytes_per_line, int64 bytes_per_chain, - int64 stride, int64 busy_cycles, bool prefetch); - -Lock Run::global_mutex; -int64 Run::_ops_per_chain = 0; -double Run::_seconds = 1E9; - -Run::Run() : - exp(NULL), bp(NULL) { -} - -Run::~Run() { -} - -void Run::set(Experiment &e, SpinBarrier* sbp) { - this->exp = &e; - this->bp = sbp; -} - -int Run::run() { - // first allocate all memory for the chains, - // making sure it is allocated within the - // intended numa domains - Chain** chain_memory = new Chain*[this->exp->chains_per_thread]; - Chain** root = new Chain*[this->exp->chains_per_thread]; - -#if defined(NUMA) - // establish the node id where this thread - // will run. threads are mapped to nodes - // by the set-up code for Experiment. - int run_node_id = this->exp->thread_domain[this->thread_id()]; - numa_run_on_node(run_node_id); - - // establish the node id where this thread's - // memory will be allocated. - for (int i=0; i < this->exp->chains_per_thread; i++) { - int alloc_node_id = this->exp->chain_domain[this->thread_id()][i]; - nodemask_t alloc_mask; - nodemask_zero(&alloc_mask); - nodemask_set(&alloc_mask, alloc_node_id); - numa_set_membind(&alloc_mask); - - chain_memory[i] = new Chain[ this->exp->links_per_chain ]; - } -#else - for (int i = 0; i < this->exp->chains_per_thread; i++) { - chain_memory[i] = new Chain[this->exp->links_per_chain]; - } -#endif - - // initialize the chains and - // compile the function that - // will execute the tests - generator gen; - for (int i = 0; i < this->exp->chains_per_thread; i++) { - if (this->exp->access_pattern == Experiment::RANDOM) { - root[i] = random_mem_init(chain_memory[i]); - gen = chase_pointers; - } else if (this->exp->access_pattern == Experiment::STRIDED) { - if (0 < this->exp->stride) { - root[i] = forward_mem_init(chain_memory[i]); - } else { - root[i] = reverse_mem_init(chain_memory[i]); - } - gen = chase_pointers; - } else if (this->exp->access_pattern == Experiment::STREAM) { - root[i] = stream_mem_init(chain_memory[i]); - gen = follow_streams; - } - } - - if (this->exp->iterations <= 0) { - // compile benchmark - benchmark bench = gen(this->exp->chains_per_thread, - this->exp->bytes_per_line, this->exp->bytes_per_chain, - this->exp->stride, this->exp->busy_cycles, - this->exp->prefetch); - - volatile static double istart = 0; - volatile static double istop = 0; - volatile static double elapsed = 0; - volatile static int64 iters = 1; - volatile double bound = max(0.2, 10 * Timer::resolution()); - for (iters = 1; elapsed <= bound; iters = iters << 1) { - // barrier - this->bp->barrier(); - - // start timer - if (this->thread_id() == 0) { - istart = Timer::seconds(); - } - this->bp->barrier(); - - // chase pointers - for (int i = 0; i < iters; i++) - bench((const Chain**) root); - - // barrier - this->bp->barrier(); - - // stop timer - if (this->thread_id() == 0) { - istop = Timer::seconds(); - elapsed = istop - istart; - } - this->bp->barrier(); - } - - // calculate the number of iterations - if (this->thread_id() == 0) { - if (0 < this->exp->seconds) { - this->exp->iterations = max(1, - 0.9999 + 0.5 * this->exp->seconds * iters / elapsed); - } else { - this->exp->iterations = max(1, 0.9999 + iters / elapsed); - } - } - this->bp->barrier(); - } -#if defined(UNDEFINED) -#endif - - // compile benchmark - benchmark bench = gen(this->exp->chains_per_thread, - this->exp->bytes_per_line, this->exp->bytes_per_chain, - this->exp->stride, this->exp->busy_cycles, - this->exp->prefetch); - - for (int e = 0; e < this->exp->experiments; e++) { - // barrier - this->bp->barrier(); - - // start timer - double start = 0; - if (this->thread_id() == 0) - start = Timer::seconds(); - this->bp->barrier(); - - // chase pointers - for (int i = 0; i < this->exp->iterations; i++) - bench((const Chain**) root); - - // barrier - this->bp->barrier(); - - // stop timer - double stop = 0; - if (this->thread_id() == 0) - stop = Timer::seconds(); - this->bp->barrier(); - - if (0 <= e) { - if (this->thread_id() == 0) { - double delta = stop - start; - if (0 < delta) { - Run::_seconds = min(Run::_seconds, delta); - } - } - } - } - - this->bp->barrier(); - - for (int i = 0; i < this->exp->chains_per_thread; i++) { - if (chain_memory[i] != NULL - ) delete[] chain_memory[i]; - } - if (chain_memory != NULL - ) delete[] chain_memory; - - return 0; -} - -int dummy = 0; -void Run::mem_check(Chain *m) { - if (m == NULL - ) dummy += 1; -} - -static double max(double v1, double v2) { - if (v1 < v2) - return v2; - return v1; -} - -static double min(double v1, double v2) { - if (v2 < v1) - return v2; - return v1; -} - -// exclude 2 and Mersenne primes, i.e., -// primes of the form 2**n - 1, e.g., -// 3, 7, 31, 127 -static const int prime_table[] = { 5, 11, 13, 17, 19, 23, 37, 41, 43, 47, 53, - 61, 71, 73, 79, 83, 89, 97, 101, 103, 109, 113, 131, 137, 139, 149, 151, - 157, 163, }; -static const int prime_table_size = sizeof prime_table / sizeof prime_table[0]; - -Chain* -Run::random_mem_init(Chain *mem) { - // initialize pointers -- - // choose a page at random, then use - // one pointer from each cache line - // within the page. all pages and - // cache lines are chosen at random. - Chain* root = 0; - Chain* prev = 0; - int link_within_line = 0; - int64 local_ops_per_chain = 0; - - // we must set a lock because random() - // is not thread safe - Run::global_mutex.lock(); - setstate(this->exp->random_state[this->thread_id()]); - int page_factor = prime_table[random() % prime_table_size]; - int page_offset = random() % this->exp->pages_per_chain; - Run::global_mutex.unlock(); - - // loop through the pages - for (int i = 0; i < this->exp->pages_per_chain; i++) { - int page = (page_factor * i + page_offset) % this->exp->pages_per_chain; - Run::global_mutex.lock(); - setstate(this->exp->random_state[this->thread_id()]); - int line_factor = prime_table[random() % prime_table_size]; - int line_offset = random() % this->exp->lines_per_page; - Run::global_mutex.unlock(); - - // loop through the lines within a page - for (int j = 0; j < this->exp->lines_per_page; j++) { - int line_within_page = (line_factor * j + line_offset) - % this->exp->lines_per_page; - int link = page * this->exp->links_per_page - + line_within_page * this->exp->links_per_line - + link_within_line; - - if (root == 0) { -// printf("root = %d(%d)[0x%x].\n", page, line_within_page, mem+link); - prev = root = mem + link; - local_ops_per_chain += 1; - } else { -// printf("0x%x = %d(%d)[0x%x].\n", prev, page, line_within_page, mem+link); - prev->next = mem + link; - prev = prev->next; - local_ops_per_chain += 1; - } - } - } - - prev->next = root; - - Run::global_mutex.lock(); - Run::_ops_per_chain = local_ops_per_chain; - Run::global_mutex.unlock(); - - return root; -} - -Chain* -Run::forward_mem_init(Chain *mem) { - Chain* root = 0; - Chain* prev = 0; - int link_within_line = 0; - int64 local_ops_per_chain = 0; - - for (int i = 0; i < this->exp->lines_per_chain; i += this->exp->stride) { - int link = i * this->exp->links_per_line + link_within_line; - if (root == NULL) { -// printf("root = %d(%d)[0x%x].\n", page, line_within_page, mem+link); - prev = root = mem + link; - local_ops_per_chain += 1; - } else { -// printf("0x%x = %d(%d)[0x%x].\n", prev, page, line_within_page, mem+link); - prev->next = mem + link; - prev = prev->next; - local_ops_per_chain += 1; - } - } - - prev->next = root; - - Run::global_mutex.lock(); - Run::_ops_per_chain = local_ops_per_chain; - Run::global_mutex.unlock(); - - return root; -} - -Chain* -Run::reverse_mem_init(Chain *mem) { - Chain* root = 0; - Chain* prev = 0; - int link_within_line = 0; - int64 local_ops_per_chain = 0; - - int stride = -this->exp->stride; - int last; - for (int i = 0; i < this->exp->lines_per_chain; i += stride) { - last = i; - } - - for (int i = last; 0 <= i; i -= stride) { - int link = i * this->exp->links_per_line + link_within_line; - if (root == 0) { -// printf("root = %d(%d)[0x%x].\n", page, line_within_page, mem+link); - prev = root = mem + link; - local_ops_per_chain += 1; - } else { -// printf("0x%x = %d(%d)[0x%x].\n", prev, page, line_within_page, mem+link); - prev->next = mem + link; - prev = prev->next; - local_ops_per_chain += 1; - } - } - - prev->next = root; - - Run::global_mutex.lock(); - Run::_ops_per_chain = local_ops_per_chain; - Run::global_mutex.unlock(); - - return root; -} - -static benchmark chase_pointers(int64 chains_per_thread, // memory loading per thread - int64 bytes_per_line, // ignored - int64 bytes_per_chain, // ignored - int64 stride, // ignored - int64 busy_cycles, // processing cycles - bool prefetch // prefetch? - ) { - // Create Compiler. - AsmJit::Compiler c; - - // Tell compiler the function prototype we want. It allocates variables representing - // function arguments that can be accessed through Compiler or Function instance. - c.newFunction(AsmJit::CALL_CONV_DEFAULT, AsmJit::FunctionBuilder1()); - - // Try to generate function without prolog/epilog code: - c.getFunction()->setHint(AsmJit::FUNCTION_HINT_NAKED, true); - - // Create labels. - AsmJit::Label L_Loop = c.newLabel(); - - // Function arguments. - AsmJit::GPVar chain(c.argGP(0)); - - // Save the head - std::vector heads(chains_per_thread); - for (int i = 0; i < chains_per_thread; i++) { - AsmJit::GPVar head = c.newGP(); - c.mov(head, ptr(chain)); - heads[i] = head; - } - - // Current position - std::vector positions(chains_per_thread); - for (int i = 0; i < chains_per_thread; i++) { - AsmJit::GPVar position = c.newGP(); - c.mov(position, heads[0]); - positions[i] = position; - } - - // Loop. - c.bind(L_Loop); - - // Process all links - for (int i = 0; i < chains_per_thread; i++) { - // Chase pointer - c.mov(positions[i], ptr(positions[i], offsetof(Chain, next))); - - // Prefetch next - if (prefetch) - c.prefetch(ptr(positions[i]), AsmJit::PREFETCH_T0); - } - - // Wait - for (int i = 0; i < busy_cycles; i++) - c.nop(); - - // Test if end reached - c.cmp(heads[0], positions[0]); - c.jne(L_Loop); - - // Finish. - c.endFunction(); - - // Make JIT function. - benchmark fn = AsmJit::function_cast(c.make()); - - // Ensure that everything is ok. - if (!fn) { - printf("Error making jit function (%u).\n", c.getError()); - return 0; - } - - return fn; -} - -// NOT WRITTEN YET -- DMP -// JUST A PLACE HOLDER! -Chain* Run::stream_mem_init(Chain *mem) { -// fprintf(stderr, "made it into stream_mem_init.\n"); -// fprintf(stderr, "chains_per_thread = %ld\n", this->exp->chains_per_thread); -// fprintf(stderr, "iterations = %ld\n", this->exp->iterations); -// fprintf(stderr, "bytes_per_chain = %ld\n", this->exp->bytes_per_chain); -// fprintf(stderr, "stride = %ld\n", this->exp->stride); - int64 local_ops_per_chain = 0; - double* tmp = (double *) mem; - int64 refs_per_line = this->exp->bytes_per_line / sizeof(double); - int64 refs_per_chain = this->exp->bytes_per_chain / sizeof(double); -// fprintf(stderr, "refs_per_chain = %ld\n", refs_per_chain); - - for (int64 i = 0; i < refs_per_chain; - i += this->exp->stride * refs_per_line) { - tmp[i] = 0; - local_ops_per_chain += 1; - } - - Run::global_mutex.lock(); - Run::_ops_per_chain = local_ops_per_chain; - Run::global_mutex.unlock(); - -// fprintf(stderr, "made it out of stream_mem_init.\n"); - return mem; -} - -static int64 summ_ck = 0; -void sum_chk(double t) { - if (t != 0) - summ_ck += 1; -} - -// NOT WRITTEN YET -- DMP -// JUST A PLACE HOLDER! -static benchmark follow_streams(int64 chains_per_thread, // memory loading per thread - int64 bytes_per_line, // ignored - int64 bytes_per_chain, // ignored - int64 stride, // ignored - int64 busy_cycles, // ignored - bool prefetch // ignored - ) { - return 0; - /* - int64 refs_per_line = bytes_per_line / sizeof(double); - int64 refs_per_chain = bytes_per_chain / sizeof(double); - - // chase pointers - switch (chains_per_thread) { - default: - case 1: - for (int64 i = 0; i < iterations; i++) { - double t = 0; - double* a0 = (double *) root[0]; - for (int64 j = 0; j < refs_per_chain; j += stride * refs_per_line) { - t += a0[j]; - } - sum_chk(t); - } - break; - case 2: - for (int64 i = 0; i < iterations; i++) { - double t = 0; - double* a0 = (double *) root[0]; - double* a1 = (double *) root[1]; - for (int64 j = 0; j < refs_per_chain; j += stride * refs_per_line) { - t += a0[j] + a1[j]; - } - sum_chk(t); - } - break; - case 3: - for (int64 i = 0; i < iterations; i++) { - double t = 0; - double* a0 = (double *) root[0]; - double* a1 = (double *) root[1]; - double* a2 = (double *) root[2]; - for (int64 j = 0; j < refs_per_chain; j += stride * refs_per_line) { - t += a0[j] + a1[j] + a2[j]; - } - sum_chk(t); - } - break; - case 4: - for (int64 i = 0; i < iterations; i++) { - double t = 0; - double* a0 = (double *) root[0]; - double* a1 = (double *) root[1]; - double* a2 = (double *) root[2]; - double* a3 = (double *) root[3]; - for (int64 j = 0; j < refs_per_chain; j += stride * refs_per_line) { - t += a0[j] + a1[j] + a2[j] + a3[j]; - } - sum_chk(t); - } - break; - case 5: - for (int64 i = 0; i < iterations; i++) { - double t = 0; - double* a0 = (double *) root[0]; - double* a1 = (double *) root[1]; - double* a2 = (double *) root[2]; - double* a3 = (double *) root[3]; - double* a4 = (double *) root[4]; - for (int64 j = 0; j < refs_per_chain; j += stride * refs_per_line) { - t += a0[j] + a1[j] + a2[j] + a3[j] + a4[j]; - } - sum_chk(t); - } - break; - case 6: - for (int64 i = 0; i < iterations; i++) { - double t = 0; - double* a0 = (double *) root[0]; - double* a1 = (double *) root[1]; - double* a2 = (double *) root[2]; - double* a3 = (double *) root[3]; - double* a4 = (double *) root[4]; - double* a5 = (double *) root[5]; - for (int64 j = 0; j < refs_per_chain; j += stride * refs_per_line) { - t += a0[j] + a1[j] + a2[j] + a3[j] + a4[j] + a5[j]; - } - sum_chk(t); - } - break; - case 7: - for (int64 i = 0; i < iterations; i++) { - double t = 0; - double* a0 = (double *) root[0]; - double* a1 = (double *) root[1]; - double* a2 = (double *) root[2]; - double* a3 = (double *) root[3]; - double* a4 = (double *) root[4]; - double* a5 = (double *) root[5]; - double* a6 = (double *) root[6]; - for (int64 j = 0; j < refs_per_chain; j += stride * refs_per_line) { - t += a0[j] + a1[j] + a2[j] + a3[j] + a4[j] + a5[j] + a6[j]; - } - sum_chk(t); - } - break; - case 8: - for (int64 i = 0; i < iterations; i++) { - double t = 0; - double* a0 = (double *) root[0]; - double* a1 = (double *) root[1]; - double* a2 = (double *) root[2]; - double* a3 = (double *) root[3]; - double* a4 = (double *) root[4]; - double* a5 = (double *) root[5]; - double* a6 = (double *) root[6]; - double* a7 = (double *) root[7]; - for (int64 j = 0; j < refs_per_chain; j += stride * refs_per_line) { - t += a0[j] + a1[j] + a2[j] + a3[j] + a4[j] + a5[j] + a6[j] - + a7[j]; - } - sum_chk(t); - } - break; - case 9: - for (int64 i = 0; i < iterations; i++) { - double t = 0; - double* a0 = (double *) root[0]; - double* a1 = (double *) root[1]; - double* a2 = (double *) root[2]; - double* a3 = (double *) root[3]; - double* a4 = (double *) root[4]; - double* a5 = (double *) root[5]; - double* a6 = (double *) root[6]; - double* a7 = (double *) root[7]; - double* a8 = (double *) root[8]; - for (int64 j = 0; j < refs_per_chain; j += stride * refs_per_line) { - t += a0[j] + a1[j] + a2[j] + a3[j] + a4[j] + a5[j] + a6[j] - + a7[j] + a8[j]; - } - sum_chk(t); - } - break; - case 10: - for (int64 i = 0; i < iterations; i++) { - double t = 0; - double* a0 = (double *) root[0]; - double* a1 = (double *) root[1]; - double* a2 = (double *) root[2]; - double* a3 = (double *) root[3]; - double* a4 = (double *) root[4]; - double* a5 = (double *) root[5]; - double* a6 = (double *) root[6]; - double* a7 = (double *) root[7]; - double* a8 = (double *) root[8]; - double* a9 = (double *) root[9]; - for (int64 j = 0; j < refs_per_chain; j += stride * refs_per_line) { - t += a0[j] + a1[j] + a2[j] + a3[j] + a4[j] + a5[j] + a6[j] - + a7[j] + a8[j] + a9[j]; - } - sum_chk(t); - } - break; - case 11: - for (int64 i = 0; i < iterations; i++) { - double t = 0; - double* a0 = (double *) root[0]; - double* a1 = (double *) root[1]; - double* a2 = (double *) root[2]; - double* a3 = (double *) root[3]; - double* a4 = (double *) root[4]; - double* a5 = (double *) root[5]; - double* a6 = (double *) root[6]; - double* a7 = (double *) root[7]; - double* a8 = (double *) root[8]; - double* a9 = (double *) root[9]; - double* a10 = (double *) root[10]; - for (int64 j = 0; j < refs_per_chain; j += stride * refs_per_line) { - t += a0[j] + a1[j] + a2[j] + a3[j] + a4[j] + a5[j] + a6[j] - + a7[j] + a8[j] + a9[j] + a10[j]; - } - sum_chk(t); - } - break; - case 12: - for (int64 i = 0; i < iterations; i++) { - double t = 0; - double* a0 = (double *) root[0]; - double* a1 = (double *) root[1]; - double* a2 = (double *) root[2]; - double* a3 = (double *) root[3]; - double* a4 = (double *) root[4]; - double* a5 = (double *) root[5]; - double* a6 = (double *) root[6]; - double* a7 = (double *) root[7]; - double* a8 = (double *) root[8]; - double* a9 = (double *) root[9]; - double* a10 = (double *) root[10]; - double* a11 = (double *) root[11]; - for (int64 j = 0; j < refs_per_chain; j += stride * refs_per_line) { - t += a0[j] + a1[j] + a2[j] + a3[j] + a4[j] + a5[j] + a6[j] - + a7[j] + a8[j] + a9[j] + a10[j] + a11[j]; - } - sum_chk(t); - } - break; - case 13: - for (int64 i = 0; i < iterations; i++) { - double t = 0; - double* a0 = (double *) root[0]; - double* a1 = (double *) root[1]; - double* a2 = (double *) root[2]; - double* a3 = (double *) root[3]; - double* a4 = (double *) root[4]; - double* a5 = (double *) root[5]; - double* a6 = (double *) root[6]; - double* a7 = (double *) root[7]; - double* a8 = (double *) root[8]; - double* a9 = (double *) root[9]; - double* a10 = (double *) root[10]; - double* a11 = (double *) root[11]; - double* a12 = (double *) root[12]; - for (int64 j = 0; j < refs_per_chain; j += stride * refs_per_line) { - t += a0[j] + a1[j] + a2[j] + a3[j] + a4[j] + a5[j] + a6[j] - + a7[j] + a8[j] + a9[j] + a10[j] + a11[j] + a12[j]; - } - sum_chk(t); - } - break; - case 14: - for (int64 i = 0; i < iterations; i++) { - double t = 0; - double* a0 = (double *) root[0]; - double* a1 = (double *) root[1]; - double* a2 = (double *) root[2]; - double* a3 = (double *) root[3]; - double* a4 = (double *) root[4]; - double* a5 = (double *) root[5]; - double* a6 = (double *) root[6]; - double* a7 = (double *) root[7]; - double* a8 = (double *) root[8]; - double* a9 = (double *) root[9]; - double* a10 = (double *) root[10]; - double* a11 = (double *) root[11]; - double* a12 = (double *) root[12]; - double* a13 = (double *) root[13]; - for (int64 j = 0; j < refs_per_chain; j += stride * refs_per_line) { - t += a0[j] + a1[j] + a2[j] + a3[j] + a4[j] + a5[j] + a6[j] - + a7[j] + a8[j] + a9[j] + a10[j] + a11[j] + a12[j] - + a13[j]; - } - sum_chk(t); - } - break; - case 15: - for (int64 i = 0; i < iterations; i++) { - double t = 0; - double* a0 = (double *) root[0]; - double* a1 = (double *) root[1]; - double* a2 = (double *) root[2]; - double* a3 = (double *) root[3]; - double* a4 = (double *) root[4]; - double* a5 = (double *) root[5]; - double* a6 = (double *) root[6]; - double* a7 = (double *) root[7]; - double* a8 = (double *) root[8]; - double* a9 = (double *) root[9]; - double* a10 = (double *) root[10]; - double* a11 = (double *) root[11]; - double* a12 = (double *) root[12]; - double* a13 = (double *) root[13]; - double* a14 = (double *) root[14]; - for (int64 j = 0; j < refs_per_chain; j += stride * refs_per_line) { - t += a0[j] + a1[j] + a2[j] + a3[j] + a4[j] + a5[j] + a6[j] - + a7[j] + a8[j] + a9[j] + a10[j] + a11[j] + a12[j] - + a13[j] + a14[j]; - } - sum_chk(t); - } - break; - case 16: - for (int64 i = 0; i < iterations; i++) { - double t = 0; - double* a0 = (double *) root[0]; - double* a1 = (double *) root[1]; - double* a2 = (double *) root[2]; - double* a3 = (double *) root[3]; - double* a4 = (double *) root[4]; - double* a5 = (double *) root[5]; - double* a6 = (double *) root[6]; - double* a7 = (double *) root[7]; - double* a8 = (double *) root[8]; - double* a9 = (double *) root[9]; - double* a10 = (double *) root[10]; - double* a11 = (double *) root[11]; - double* a12 = (double *) root[12]; - double* a13 = (double *) root[13]; - double* a14 = (double *) root[14]; - double* a15 = (double *) root[15]; - for (int64 j = 0; j < refs_per_chain; j += stride * refs_per_line) { - t += a0[j] + a1[j] + a2[j] + a3[j] + a4[j] + a5[j] + a6[j] - + a7[j] + a8[j] + a9[j] + a10[j] + a11[j] + a12[j] - + a13[j] + a14[j] + a15[j]; - } - sum_chk(t); - } - break; - } - */ -} diff --git a/src/Run.h b/src/Run.h deleted file mode 100644 index 7f82076..0000000 --- a/src/Run.h +++ /dev/null @@ -1,62 +0,0 @@ -/******************************************************************************* - * Copyright (c) 2006 International Business Machines Corporation. * - * All rights reserved. This program and the accompanying materials * - * are made available under the terms of the Common Public License v1.0 * - * which accompanies this distribution, and is available at * - * http://www.opensource.org/licenses/cpl1.0.php * - * * - * Contributors: * - * Douglas M. Pase - initial API and implementation * - *******************************************************************************/ - -// -// Configuration -// - -// Include guard -#if !defined(Run_h) -#define Run_h - -// Local includes -#include "Thread.h" -#include "Lock.h" -#include "Chain.h" -#include "Types.h" -#include "Experiment.h" -#include "SpinBarrier.h" - - -// -// Class definition -// - -class Run: public Thread { -public: - Run(); - ~Run(); - int run(); - void set(Experiment &e, SpinBarrier* sbp); - - static int64 ops_per_chain() { - return _ops_per_chain; - } - static double seconds() { - return _seconds; - } - -private: - Experiment* exp; // experiment data - SpinBarrier* bp; // spin barrier used by all threads - - void mem_check(Chain *m); - Chain* random_mem_init(Chain *m); - Chain* forward_mem_init(Chain *m); - Chain* reverse_mem_init(Chain *m); - Chain* stream_mem_init(Chain *m); - - static Lock global_mutex; // global lock - static int64 _ops_per_chain; // total number of operations per chain - static double _seconds; // total number of seconds -}; - -#endif diff --git a/src/SpinBarrier.cpp b/src/SpinBarrier.cpp deleted file mode 100644 index 5ff5ce1..0000000 --- a/src/SpinBarrier.cpp +++ /dev/null @@ -1,54 +0,0 @@ -/******************************************************************************* - * Copyright (c) 2006 International Business Machines Corporation. * - * All rights reserved. This program and the accompanying materials * - * are made available under the terms of the Common Public License v1.0 * - * which accompanies this distribution, and is available at * - * http://www.opensource.org/licenses/cpl1.0.php * - * * - * Contributors: * - * Douglas M. Pase - initial API and implementation * - *******************************************************************************/ - -/****************************************************************************** - * * - * SpinBarrier * - * * - * Author: Douglas M. Pase * - * * - * Date: September 21, 2000 * - * Translated to C++, June 19, 2005 * - * * - * void barrier() * - * * - ******************************************************************************/ - -// -// Configuration -// - -// Implementation header -#include "SpinBarrier.h" - -// System includes -#include - - -// -// Implementation -// - -// create a new barrier -SpinBarrier::SpinBarrier(int participants) : - limit(participants) { - pthread_barrier_init(&barrier_obj, NULL, this->limit); -} - -// destroy an old barrier -SpinBarrier::~SpinBarrier() { -} - -// enter the barrier and wait. everyone leaves -// when the last participant enters the barrier. -void SpinBarrier::barrier() { - pthread_barrier_wait(&this->barrier_obj); -} diff --git a/src/SpinBarrier.h b/src/SpinBarrier.h deleted file mode 100644 index a329f7c..0000000 --- a/src/SpinBarrier.h +++ /dev/null @@ -1,50 +0,0 @@ -/******************************************************************************* - * Copyright (c) 2006 International Business Machines Corporation. * - * All rights reserved. This program and the accompanying materials * - * are made available under the terms of the Common Public License v1.0 * - * which accompanies this distribution, and is available at * - * http://www.opensource.org/licenses/cpl1.0.php * - * * - * Contributors: * - * Douglas M. Pase - initial API and implementation * - *******************************************************************************/ - -/****************************************************************************** - * * - * SpinBarrier * - * * - * Author: Douglas M. Pase * - * * - * Date: September 21, 2000 * - * Translated to C++, June 19, 2005 * - * Rewritten August 13,2005 * - * * - * void barrier() * - * * - ******************************************************************************/ - -// Include guard -#if !defined( SpinBarrier_h ) -#define SpinBarrier_h - -// System includes -#include - - -// -// Class definition -// - -class SpinBarrier { -public: - SpinBarrier(int participants); - ~SpinBarrier(); - - void barrier(); - -private: - int limit; // number of barrier participants - pthread_barrier_t barrier_obj; -}; - -#endif diff --git a/src/Thread.cpp b/src/Thread.cpp deleted file mode 100644 index c7ea37d..0000000 --- a/src/Thread.cpp +++ /dev/null @@ -1,77 +0,0 @@ -/******************************************************************************* - * Copyright (c) 2006 International Business Machines Corporation. * - * All rights reserved. This program and the accompanying materials * - * are made available under the terms of the Common Public License v1.0 * - * which accompanies this distribution, and is available at * - * http://www.opensource.org/licenses/cpl1.0.php * - * * - * Contributors: * - * Douglas M. Pase - initial API and implementation * - *******************************************************************************/ - -// -// Configuration -// - -// Implementation header -#include "Thread.h" - -// System includes -#include -#include -#include - -Lock Thread::_global_lock; -int Thread::count = 0; - - -// -// Implementation -// - -Thread::Thread() { - Thread::global_lock(); - this->id = Thread::count; - Thread::count += 1; - Thread::global_unlock(); -} - -Thread::~Thread() { -} - -int Thread::start() { - return pthread_create(&this->thread, NULL, Thread::start_routine, this); -} - -void* -Thread::start_routine(void* p) { - ((Thread*) p)->run(); - - return NULL; -} - -void Thread::exit() { - pthread_exit(NULL); -} - -int Thread::wait() { - pthread_join(this->thread, NULL); - - return 0; -} - -void Thread::lock() { - this->object_lock.lock(); -} - -void Thread::unlock() { - this->object_lock.unlock(); -} - -void Thread::global_lock() { - Thread::_global_lock.lock(); -} - -void Thread::global_unlock() { - Thread::_global_lock.unlock(); -} diff --git a/src/Thread.h b/src/Thread.h deleted file mode 100644 index 8ca2b76..0000000 --- a/src/Thread.h +++ /dev/null @@ -1,68 +0,0 @@ -/******************************************************************************* - * Copyright (c) 2006 International Business Machines Corporation. * - * All rights reserved. This program and the accompanying materials * - * are made available under the terms of the Common Public License v1.0 * - * which accompanies this distribution, and is available at * - * http://www.opensource.org/licenses/cpl1.0.php * - * * - * Contributors: * - * Douglas M. Pase - initial API and implementation * - *******************************************************************************/ - -// -// Configuration -// - -// Include guard -#if !defined(Thread_h) -#define Thread_h - -// System includes -#include - -// Local includes -#include "Lock.h" - - -// -// Class definition -// - -class Thread { -public: - Thread(); - ~Thread(); - - virtual int run() = 0; - - int start(); - int wait(); - int thread_count() { - return Thread::count; - } - int thread_id() { - return id; - } - - static void exit(); - -protected: - void lock(); - void unlock(); - static void global_lock(); - static void global_unlock(); - -private: - static void* start_routine(void *); - static Lock _global_lock; - - Lock object_lock; - - pthread_t thread; - - static int count; - int id; - int lock_obj; -}; - -#endif diff --git a/src/Timer.cpp b/src/Timer.cpp deleted file mode 100644 index 24ec8e9..0000000 --- a/src/Timer.cpp +++ /dev/null @@ -1,168 +0,0 @@ -/******************************************************************************* - * Copyright (c) 2006 International Business Machines Corporation. * - * All rights reserved. This program and the accompanying materials * - * are made available under the terms of the Common Public License v1.0 * - * which accompanies this distribution, and is available at * - * http://www.opensource.org/licenses/cpl1.0.php * - * * - * Contributors: * - * Douglas M. Pase - initial API and implementation * - *******************************************************************************/ - -// -// Configuration -// - -// Implementation header -#include "Timer.h" - -// System includes -#include -#include - -static int64 read_rtc(); -static void calibrate_rtc(int n); -static double wall_seconds(); - -static int wall_ticks = -1; -static int rtc_ticks = -1; -static double wall_elapsed = -1; -static int64 rtc_elapsed = -1; -static double time_factor = -1; - -#if !defined(RTC) && !defined(GTOD) -#define RTC -#endif - - -// -// Implementation -// - -#if defined(RTC) - -double Timer::seconds() { - return (double) read_rtc() * time_factor; -} - -int64 Timer::ticks() { - // See pg. 406 of the AMD x86-64 Architecture - // Programmer's Manual, Volume 2, System Programming - unsigned int eax = 0, edx = 0; - - __asm__ __volatile__( - "rdtsc ;" - "movl %%eax,%0;" - "movl %%edx,%1;" - "" - : "=r"(eax), "=r"(edx) - : - : "%eax", "%edx" - ); - - return ((int64) edx << 32) | (int64) eax; -} - -static int64 read_rtc() { - // See pg. 406 of the AMD x86-64 Architecture - // Programmer's Manual, Volume 2, System Programming - unsigned int eax = 0, edx = 0; - - __asm__ __volatile__( - "rdtsc ;" - "movl %%eax,%0;" - "movl %%edx,%1;" - "" - : "=r"(eax), "=r"(edx) - : - : "%eax", "%edx" - ); - - return ((int64) edx << 32) | (int64) eax; -} - -void Timer::calibrate() { - Timer::calibrate(1000); -} - -void Timer::calibrate(int n) { - wall_ticks = n; - - double wall_start, wall_finish, t; - t = wall_seconds(); - while (t == (wall_start = wall_seconds())) { - ; - } - int64 rtc_start = read_rtc(); - for (int i = 0; i < wall_ticks; i++) { - t = wall_seconds(); - while (t == (wall_finish = wall_seconds())) { - ; - } - } - int64 rtc_finish = read_rtc(); - - wall_elapsed = wall_finish - wall_start; - rtc_elapsed = rtc_finish - rtc_start; - time_factor = wall_elapsed / (double) rtc_elapsed; -} - -static double wall_seconds() { - struct timeval t; - gettimeofday(&t, NULL); - - return (double) t.tv_sec + (double) t.tv_usec * 1E-6; -} - -#else - -double -Timer::seconds() -{ - struct timeval t; - gettimeofday(&t, NULL); - - return (double) t.tv_sec + (double) t.tv_usec * 1E-6; -} - -int64 -Timer::ticks() -{ - struct timeval t; - gettimeofday(&t, NULL); - - return 1000000 * (int64) t.tv_sec + (int64) t.tv_usec; -} - -void -Timer::calibrate() -{ -} - -void -Timer::calibrate(int n) -{ -} - -#endif - -static double min(double v1, double v2) { - if (v2 < v1) - return v2; - return v1; -} - -double Timer::resolution() { - double a, b, c = 1E9; - for (int i = 0; i < 10; i++) { - a = Timer::seconds(); - while (a == (b = Timer::seconds())) - ; - a = Timer::seconds(); - while (a == (b = Timer::seconds())) - ; - c = min(b - a, c); - } - - return c; -} diff --git a/src/Timer.h b/src/Timer.h deleted file mode 100644 index 7689f36..0000000 --- a/src/Timer.h +++ /dev/null @@ -1,38 +0,0 @@ -/******************************************************************************* - * Copyright (c) 2006 International Business Machines Corporation. * - * All rights reserved. This program and the accompanying materials * - * are made available under the terms of the Common Public License v1.0 * - * which accompanies this distribution, and is available at * - * http://www.opensource.org/licenses/cpl1.0.php * - * * - * Contributors: * - * Douglas M. Pase - initial API and implementation * - *******************************************************************************/ - -// -// Configuration -// - -// Include guard -#if !defined(Timer_h) -#define Timer_h - -// Local includes -#include "Types.h" - - -// -// Class definition -// - -class Timer { -public: - static double seconds(); - static double resolution(); - static int64 ticks(); - static void calibrate(); - static void calibrate(int n); -private: -}; - -#endif diff --git a/src/Types.h b/src/Types.h deleted file mode 100644 index 73bd501..0000000 --- a/src/Types.h +++ /dev/null @@ -1,38 +0,0 @@ -/******************************************************************************* - * Copyright (c) 2006 International Business Machines Corporation. * - * All rights reserved. This program and the accompanying materials * - * are made available under the terms of the Common Public License v1.0 * - * which accompanies this distribution, and is available at * - * http://www.opensource.org/licenses/cpl1.0.php * - * * - * Contributors: * - * Douglas M. Pase - initial API and implementation * - *******************************************************************************/ - -// -// Configuration -// - -// Include guard -#if !defined(Types_h) -#define Types_h - - -// -// Type definitions -// - -typedef long long int64; -typedef int int32; -typedef short int16; -typedef char int8; - -typedef unsigned long long uint64; -typedef unsigned int uint32; -typedef unsigned short uint16; -typedef unsigned char uint8; - -typedef double float64; -typedef float float32; - -#endif diff --git a/src/chain.h b/src/chain.h new file mode 100644 index 0000000..b929c4f --- /dev/null +++ b/src/chain.h @@ -0,0 +1,29 @@ +/******************************************************************************* + * Copyright (c) 2006 International Business Machines Corporation. * + * All rights reserved. This program and the accompanying materials * + * are made available under the terms of the Common Public License v1.0 * + * which accompanies this distribution, and is available at * + * http://www.opensource.org/licenses/cpl1.0.php * + * * + * Contributors: * + * Douglas M. Pase - initial API and implementation * + *******************************************************************************/ + +// +// Configuration +// + +// Include guard +#if !defined(CHAIN_H) +#define CHAIN_H + + +// +// Struct definition +// + +struct Chain { + Chain* next; +}; + +#endif diff --git a/src/experiment.cpp b/src/experiment.cpp new file mode 100644 index 0000000..7e5d318 --- /dev/null +++ b/src/experiment.cpp @@ -0,0 +1,711 @@ +/******************************************************************************* + * Copyright (c) 2006 International Business Machines Corporation. * + * All rights reserved. This program and the accompanying materials * + * are made available under the terms of the Common Public License v1.0 * + * which accompanies this distribution, and is available at * + * http://www.opensource.org/licenses/cpl1.0.php * + * * + * Contributors: * + * Douglas M. Pase - initial API and implementation * + *******************************************************************************/ + +// +// Configuration +// + +// Implementation header +#include "experiment.h" + +// System includes +#include +#include +#include +#include +#if defined(NUMA) +#include +#endif + +// Local includes +#include "chain.h" + + +// +// Implementation +// + +Experiment::Experiment() : + strict (0), + pointer_size (DEFAULT_POINTER_SIZE), + bytes_per_line (DEFAULT_BYTES_PER_LINE), + links_per_line (DEFAULT_LINKS_PER_LINE), + bytes_per_page (DEFAULT_BYTES_PER_PAGE), + lines_per_page (DEFAULT_LINES_PER_PAGE), + links_per_page (DEFAULT_LINKS_PER_PAGE), + bytes_per_chain (DEFAULT_BYTES_PER_CHAIN), + lines_per_chain (DEFAULT_LINES_PER_CHAIN), + links_per_chain (DEFAULT_LINKS_PER_CHAIN), + pages_per_chain (DEFAULT_PAGES_PER_CHAIN), + chains_per_thread(DEFAULT_CHAINS_PER_THREAD), + bytes_per_thread (DEFAULT_BYTES_PER_THREAD), + num_threads (DEFAULT_THREADS), + bytes_per_test (DEFAULT_BYTES_PER_TEST), + busy_cycles (DEFAULT_BUSY_CYCLES), + seconds (DEFAULT_SECONDS), + iterations (DEFAULT_ITERATIONS), + experiments (DEFAULT_EXPERIMENTS), + prefetch (DEFAULT_PREFETCH), + output_mode (TABLE), + access_pattern (RANDOM), + stride (1), + numa_placement (LOCAL), + offset_or_mask (0), + placement_map (NULL), + thread_domain (NULL), + chain_domain (NULL), + numa_max_domain (0), + num_numa_domains (1) +{ +} + +Experiment::~Experiment() { +} + +// interface: +// +// -l or --line bytes per cache line (line size) +// -p or --page bytes per page (page size) +// -c or --chain bytes per chain (used to compute pages per chain) +// -r or --references chains per thread (memory loading) +// -t or --threads number of threads (concurrency and contention) +// -i or --iters iterations +// -e or --experiments experiments +// -b or --busy amount of cycles processor should remain busy +// -f or --prefetch prefetch data +// -a or --access memory access pattern +// random random access pattern +// forward exclusive OR and mask +// reverse addition and offset +// -o or --output output mode +// hdr header only +// csv csv only +// both header + csv +// table human-readable table of values +// -n or --numa numa placement +// local local allocation of all chains +// xor exclusive OR and mask +// add addition and offset +// map explicit mapping of threads and chains to domains + +int Experiment::parse_args(int argc, char* argv[]) { + int error = 0; + for (int i = 1; i < argc; i++) { + if (strcasecmp(argv[i], "-x") == 0 + || strcasecmp(argv[i], "--strict") == 0) { + this->strict = 1; + } else if (strcasecmp(argv[i], "-s") == 0 + || strcasecmp(argv[i], "--seconds") == 0) { + i++; + if (i == argc) { + error = 1; + break; + } + this->seconds = Experiment::parse_real(argv[i]); + this->iterations = 0; + if (this->seconds == 0) { + error = 1; + break; + } + } else if (strcasecmp(argv[i], "-l") == 0 + || strcasecmp(argv[i], "--line") == 0) { + i++; + if (i == argc) { + error = 1; + break; + } + this->bytes_per_line = Experiment::parse_number(argv[i]); + if (this->bytes_per_line == 0) { + error = 1; + break; + } + } else if (strcasecmp(argv[i], "-p") == 0 + || strcasecmp(argv[i], "--page") == 0) { + i++; + if (i == argc) { + error = 1; + break; + } + this->bytes_per_page = Experiment::parse_number(argv[i]); + if (this->bytes_per_page == 0) { + error = 1; + break; + } + } else if (strcasecmp(argv[i], "-c") == 0 + || strcasecmp(argv[i], "--chain") == 0) { + i++; + if (i == argc) { + error = 1; + break; + } + this->bytes_per_chain = Experiment::parse_number(argv[i]); + if (this->bytes_per_chain == 0) { + error = 1; + break; + } + } else if (strcasecmp(argv[i], "-r") == 0 + || strcasecmp(argv[i], "--references") == 0) { + i++; + if (i == argc) { + error = 1; + break; + } + this->chains_per_thread = Experiment::parse_number(argv[i]); + if (this->chains_per_thread == 0) { + error = 1; + break; + } + } else if (strcasecmp(argv[i], "-t") == 0 + || strcasecmp(argv[i], "--threads") == 0) { + i++; + if (i == argc) { + error = 1; + break; + } + this->num_threads = Experiment::parse_number(argv[i]); + if (this->num_threads == 0) { + error = 1; + break; + } + } else if (strcasecmp(argv[i], "-i") == 0 + || strcasecmp(argv[i], "--iterations") == 0) { + i++; + if (i == argc) { + error = 1; + break; + } + this->iterations = Experiment::parse_number(argv[i]); + this->seconds = 0; + if (this->iterations == 0) { + error = 1; + break; + } + } else if (strcasecmp(argv[i], "-e") == 0 + || strcasecmp(argv[i], "--experiments") == 0) { + i++; + if (i == argc) { + error = 1; + break; + } + this->experiments = Experiment::parse_number(argv[i]); + if (this->experiments == 0) { + error = 1; + break; + } + } else if (strcasecmp(argv[i], "-b") == 0 + || strcasecmp(argv[i], "--busy") == 0) { + i++; + if (i == argc) { + error = 1; + break; + } + this->busy_cycles = Experiment::parse_number(argv[i]); + if (this->experiments == 0) { + error = 1; + break; + } + } else if (strcasecmp(argv[i], "-f") == 0 + || strcasecmp(argv[i], "--prefetch") == 0) { + this->prefetch = true; + } else if (strcasecmp(argv[i], "-a") == 0 + || strcasecmp(argv[i], "--access") == 0) { + i++; + if (i == argc) { + error = 1; + break; + } + if (strcasecmp(argv[i], "random") == 0) { + this->access_pattern = RANDOM; + } else if (strcasecmp(argv[i], "forward") == 0) { + this->access_pattern = STRIDED; + i++; + if (i == argc) { + error = 1; + break; + } + this->stride = Experiment::parse_number(argv[i]); + if (this->stride == 0) { + error = 1; + break; + } + } else if (strcasecmp(argv[i], "reverse") == 0) { + this->access_pattern = STRIDED; + i++; + if (i == argc) { + error = 1; + break; + } + this->stride = -Experiment::parse_number(argv[i]); + if (this->stride == 0) { + error = 1; + break; + } + } else if (strcasecmp(argv[i], "stream") == 0) { + this->access_pattern = STREAM; + i++; + if (i == argc) { + error = 1; + break; + } + this->stride = Experiment::parse_number(argv[i]); + if (this->stride == 0) { + error = 1; + break; + } + } else { + error = 1; + break; + } + } else if (strcasecmp(argv[i], "-o") == 0 + || strcasecmp(argv[i], "--output") == 0) { + i++; + if (i == argc) { + error = 1; + break; + } + if (strcasecmp(argv[i], "table") == 0) { + this->output_mode = TABLE; + } else if (strcasecmp(argv[i], "csv") == 0) { + this->output_mode = CSV; + } else if (strcasecmp(argv[i], "both") == 0) { + this->output_mode = BOTH; + } else if (strcasecmp(argv[i], "hdr") == 0) { + this->output_mode = HEADER; + } else if (strcasecmp(argv[i], "header") == 0) { + this->output_mode = HEADER; + } else { + error = 1; + break; + } + } else if (strcasecmp(argv[i], "-n") == 0 + || strcasecmp(argv[i], "--numa") == 0) { + i++; + if (i == argc) { + error = 1; + break; + } + if (strcasecmp(argv[i], "local") == 0) { + this->numa_placement = LOCAL; + } else if (strcasecmp(argv[i], "xor") == 0) { + this->numa_placement = XOR; + i++; + if (i == argc) { + error = 1; + break; + } + this->offset_or_mask = Experiment::parse_number(argv[i]); + } else if (strcasecmp(argv[i], "add") == 0) { + this->numa_placement = ADD; + i++; + if (i == argc) { + error = 1; + break; + } + this->offset_or_mask = Experiment::parse_number(argv[i]); + } else if (strcasecmp(argv[i], "map") == 0) { + this->numa_placement = MAP; + i++; + if (i == argc) { + error = 1; + break; + } + this->placement_map = argv[i]; + } else { + error = 1; + break; + } + } else { + error = 1; + break; + } + } + + + // if we've hit an error, print a message and quit + if (error) { + printf("usage: %s \n", argv[0]); + printf("where are selected from the following:\n"); + printf(" [-h|--help] # this message\n"); + printf(" [-l|--line] # bytes per cache line (cache line size)\n"); + printf(" [-p|--page] # bytes per page (page size)\n"); + printf(" [-c|--chain] # bytes per chain (used to compute pages per chain)\n"); + printf(" [-r|--references] # chains per thread (memory loading)\n"); + printf(" [-t|--threads] # number of threads (concurrency and contention)\n"); + printf(" [-i|--iterations] # iterations per experiment\n"); + printf(" [-e|--experiments] # experiments\n"); + printf(" [-a|--access] # memory access pattern\n"); + printf(" [-o|--output] # output format\n"); + printf(" [-n|--numa] # numa placement\n"); + printf(" [-s|--seconds] # run each experiment for seconds\n"); + printf(" [-b|--busy] # how much processing cycles each loop should count\n"); + printf(" [-f|--prefetch] # prefetch data\n"); + printf(" [-x|--strict] # fail rather than adjust options to sensible values\n"); + printf("\n"); + printf(" is selected from the following:\n"); + printf(" random # all chains are accessed randomly\n"); + printf(" forward # chains are in forward order with constant stride\n"); + printf(" reverse # chains are in reverse order with constant stride\n"); + printf(" stream # references are calculated rather than read from memory\n"); + printf("\n"); + printf("Note: is always a small positive integer.\n"); + printf("\n"); + printf(" is selected from the following:\n"); + printf(" hdr # csv header only\n"); + printf(" csv # results in csv format only\n"); + printf(" both # header and results in csv format\n"); + printf(" table # human-readable table of values\n"); + printf("\n"); + printf(" is selected from the following:\n"); + printf(" local # all chains are allocated locally\n"); + printf(" xor # exclusive OR and mask\n"); + printf(" add # addition and offset\n"); + printf(" map # explicit mapping of threads and chains to domains\n"); + printf("\n"); + printf(" has the form \"t1:c11,c12,...,c1m;t2:c21,...,c2m;...;tn:cn1,...,cnm\"\n"); + printf("where t[i] is the NUMA domain where the ith thread is run,\n"); + printf("and c[i][j] is the NUMA domain where the jth chain in the ith thread is allocated.\n"); + printf("(The values t[i] and c[i][j] must all be zero or small positive integers.)\n"); + printf("\n"); + printf("Note: for maps, each thread must have the same number of chains,\n"); + printf("maps override the -t or --threads specification,\n"); + printf("NUMA domains are whole numbers in the range of 0..N, and\n"); + printf("thread or chain domains that exceed the maximum NUMA domain\n"); + printf("are wrapped around using a MOD function.\n"); + printf("\n"); + printf("To determine the number of NUMA domains currently available\n"); + printf("on your system, use a command such as \"numastat\".\n"); + printf("\n"); + printf("Final note: strict is not yet fully implemented, and\n"); + printf("maps do not gracefully handle ill-formed map specifications.\n"); + + return 1; + } + + + // STRICT -- fail if specifications are inconsistent + + // compute lines per page and lines per chain + // based on input and defaults. + // we round up page and chain sizes when needed. + this->lines_per_page = (this->bytes_per_page+this->bytes_per_line-1) / this->bytes_per_line; + this->bytes_per_page = this->bytes_per_line * this->lines_per_page; + this->pages_per_chain = (this->bytes_per_chain+this->bytes_per_page-1) / this->bytes_per_page; + this->bytes_per_chain = this->bytes_per_page * this->pages_per_chain; + this->bytes_per_thread = this->bytes_per_chain * this->chains_per_thread; + this->bytes_per_test = this->bytes_per_thread * this->num_threads; + this->links_per_line = this->bytes_per_line / pointer_size; + this->links_per_page = this->lines_per_page * this->links_per_line; + this->lines_per_chain = this->lines_per_page * this->pages_per_chain; + this->links_per_chain = this->lines_per_chain * this->links_per_line; + + + // allocate the chain roots for all threads + // and compute the chain locations + // (the chains themselves are initialized by the threads) + switch (this->numa_placement) { + case LOCAL: + case XOR: + case ADD: + this->thread_domain = new int32[this->num_threads]; + this->chain_domain = new int32*[this->num_threads]; + this->random_state = new char*[this->num_threads]; + + for (int i = 0; i < this->num_threads; i++) { + this->chain_domain[i] = new int32[this->chains_per_thread]; + + const int state_size = 256; + this->random_state[i] = new char[state_size]; + initstate((unsigned int) i, (char *) this->random_state[i], + (size_t) state_size); + } + break; + } + +#if defined(NUMA) + this->numa_max_domain = numa_max_node(); + this->num_numa_domains = this->numa_max_domain + 1; +#endif + + switch (this->numa_placement) { + case LOCAL: + default: + this->alloc_local(); + break; + case XOR: + this->alloc_xor(); + break; + case ADD: + this->alloc_add(); + break; + case MAP: + this->alloc_map(); + break; + } + + return 0; +} + +int64 Experiment::parse_number(const char* s) { + int64 result = 0; + + int len = strlen(s); + for (int i = 0; i < len; i++) { + if ('0' <= s[i] && s[i] <= '9') { + result = result * 10 + s[i] - '0'; + } else if (s[i] == 'k' || s[i] == 'K') { + result = result << 10; + break; + } else if (s[i] == 'm' || s[i] == 'M') { + result = result << 20; + break; + } else if (s[i] == 'g' || s[i] == 'G') { + result = result << 30; + break; + } else if (s[i] == 't' || s[i] == 'T') { + result = result << 40; + break; + } else { + break; + } + } + + return result; +} + +float Experiment::parse_real(const char* s) { + float result = 0; + bool decimal = false; + float power = 1; + + int len = strlen(s); + for (int i = 0; i < len; i++) { + if ('0' <= s[i] && s[i] <= '9') { + if (!decimal) { + result = result * 10 + s[i] - '0'; + } else { + power = power / 10; + result = result + (s[i] - '0') * power; + } + } else if ('.' == s[i]) { + decimal = true; + } else { + break; + } + } + + return result; +} + +void Experiment::alloc_local() { + for (int i = 0; i < this->num_threads; i++) { + this->thread_domain[i] = i % this->num_numa_domains; + for (int j = 0; j < this->chains_per_thread; j++) { + this->chain_domain[i][j] = this->thread_domain[i]; + } + } +} + +void Experiment::alloc_xor() { + for (int i = 0; i < this->num_threads; i++) { + this->thread_domain[i] = i % this->num_numa_domains; + for (int j = 0; j < this->chains_per_thread; j++) { + this->chain_domain[i][j] = (this->thread_domain[i] + ^ this->offset_or_mask) % this->num_numa_domains; + } + } +} + +void Experiment::alloc_add() { + for (int i = 0; i < this->num_threads; i++) { + this->thread_domain[i] = i % this->num_numa_domains; + for (int j = 0; j < this->chains_per_thread; j++) { + this->chain_domain[i][j] = (this->thread_domain[i] + + this->offset_or_mask) % this->num_numa_domains; + } + } +} + +// DOES NOT HANDLE ILL-FORMED SPECIFICATIONS +void Experiment::alloc_map() { + // STRICT -- fail if specifications are inconsistent + + // maps look like "t1:c11,c12,...,c1m;t2:c21,...,c2m;...;tn:cn1,...,cnm" + // where t[i] is the thread domain of the ith thread, + // and c[i][j] is the chain domain of the jth chain in the ith thread + + // count the thread descriptors by counting ";" up to EOS + int threads = 1; + char *p = this->placement_map; + while (*p != '\0') { + if (*p == ';') + threads += 1; + p++; + } + int thread_domain[threads]; + + // count the chain descriptors by counting "," up to ";" or EOS + int chains = 1; + p = this->placement_map; + while (*p != '\0') { + if (*p == ';') + break; + if (*p == ',') + chains += 1; + p++; + } + int chain_domain[threads][chains]; + + int t = 0, c = 0; + p = this->placement_map; + while (*p != '\0') { + // everything up to ":" is the thread domain + int i = 0; + char buf[64]; + while (*p != '\0') { + if (*p == ':') { + p++; + break; + } + buf[i] = *p; + i++; + p++; + } + buf[i] = '\0'; + thread_domain[t] = Experiment::parse_number(buf); + + // search for one or several ',' + c = 0; + while (*p != '\0' && *p != ';') { + if (chains <= c || threads <= t) { + // error in the thread/chain specification + fprintf(stderr, "Malformed map.\n"); + exit(1); + } + int i = 0; + while (*p != '\0' && *p != ';') { + if (*p == ',') { + p++; + break; + } + buf[i] = *p; + i++; + p++; + } + buf[i] = '\0'; + chain_domain[t][c] = Experiment::parse_number(buf); + c++; + } + + if (*p == '\0') + break; + if (*p == ';') + p++; + t++; + } + + this->num_threads = threads; + this->chains_per_thread = chains; + + this->thread_domain = new int32[this->num_threads]; + this->chain_domain = new int32*[this->num_threads]; + this->random_state = new char*[this->num_threads]; + + for (int i = 0; i < this->num_threads; i++) { + this->thread_domain[i] = thread_domain[i] % this->num_numa_domains; + + const int state_size = 256; + this->random_state[i] = new char[state_size]; + initstate((unsigned int) i, (char *) this->random_state[i], + (size_t) state_size); + + this->chain_domain[i] = new int32[this->chains_per_thread]; + for (int j = 0; j < this->chains_per_thread; j++) { + this->chain_domain[i][j] = chain_domain[i][j] + % this->num_numa_domains; + } + } + + this->bytes_per_thread = this->bytes_per_chain * this->chains_per_thread; + this->bytes_per_test = this->bytes_per_thread * this->num_threads; +} + +void Experiment::print() { + printf("strict = %d\n", strict); + printf("pointer_size = %d\n", pointer_size); + printf("sizeof(Chain) = %d\n", sizeof(Chain)); + printf("sizeof(Chain *) = %d\n", sizeof(Chain *)); + printf("bytes_per_line = %d\n", bytes_per_line); + printf("links_per_line = %d\n", links_per_line); + printf("bytes_per_page = %d\n", bytes_per_page); + printf("lines_per_page = %d\n", lines_per_page); + printf("links_per_page = %d\n", links_per_page); + printf("bytes_per_chain = %d\n", bytes_per_chain); + printf("lines_per_chain = %d\n", lines_per_chain); + printf("links_per_chain = %d\n", links_per_chain); + printf("pages_per_chain = %d\n", pages_per_chain); + printf("chains_per_thread = %d\n", chains_per_thread); + printf("bytes_per_thread = %d\n", bytes_per_thread); + printf("num_threads = %d\n", num_threads); + printf("bytes_per_test = %d\n", bytes_per_test); + printf("busy cycles = %d\n", busy_cycles); + printf("prefetch = %d\n", prefetch); + printf("iterations = %d\n", iterations); + printf("experiments = %d\n", experiments); + printf("access_pattern = %d\n", access_pattern); + printf("stride = %d\n", stride); + printf("output_mode = %d\n", output_mode); + printf("numa_placement = %d\n", numa_placement); + printf("offset_or_mask = %d\n", offset_or_mask); + printf("numa_max_domain = %d\n", numa_max_domain); + printf("num_numa_domains = %d\n", num_numa_domains); + + for (int i = 0; i < this->num_threads; i++) { + printf("%d: ", this->thread_domain[i]); + for (int j = 0; j < this->chains_per_thread; j++) { + printf("%d,", this->chain_domain[i][j]); + } + printf("\n"); + } + + fflush(stdout); +} + +const char* Experiment::access() { + const char* result = NULL; + + if (this->access_pattern == RANDOM) { + result = "random"; + } else if (this->access_pattern == STRIDED && 0 < this->stride) { + result = "forward"; + } else if (this->access_pattern == STRIDED && this->stride < 0) { + result = "reverse"; + } else if (this->access_pattern == STREAM) { + result = "stream"; + } + + return result; +} + +const char* Experiment::placement() { + const char* result = NULL; + + if (this->numa_placement == LOCAL) { + result = "local"; + } else if (this->numa_placement == XOR) { + result = "xor"; + } else if (this->numa_placement == ADD) { + result = "add"; + } else if (this->numa_placement == MAP) { + result = "map"; + } + + return result; +} diff --git a/src/experiment.h b/src/experiment.h new file mode 100644 index 0000000..87ffbbb --- /dev/null +++ b/src/experiment.h @@ -0,0 +1,117 @@ +/******************************************************************************* + * Copyright (c) 2006 International Business Machines Corporation. * + * All rights reserved. This program and the accompanying materials * + * are made available under the terms of the Common Public License v1.0 * + * which accompanies this distribution, and is available at * + * http://www.opensource.org/licenses/cpl1.0.php * + * * + * Contributors: * + * Douglas M. Pase - initial API and implementation * + *******************************************************************************/ + +// +// Configuration +// + +// Include guard +#if !defined(EXPERIMENT_H) +#define EXPERIMENT_H + +// Local includes +#include "chain.h" +#include "types.h" + + +// +// Class definition +// + +class Experiment { +public: + Experiment(); + ~Experiment(); + + int parse_args(int argc, char* argv[]); + int64 parse_number(const char* s); + float parse_real(const char* s); + + const char* placement(); + const char* access(); + + // fundamental parameters + int64 pointer_size; // number of bytes in a pointer + int64 bytes_per_line; // working set cache line size (bytes) + int64 links_per_line; // working set cache line size (links) + int64 bytes_per_page; // working set page size (in bytes) + int64 lines_per_page; // working set page size (in lines) + int64 links_per_page; // working set page size (in links) + int64 bytes_per_chain; // working set chain size (bytes) + int64 lines_per_chain; // working set chain size (lines) + int64 links_per_chain; // working set chain size (links) + int64 pages_per_chain; // working set chain size (pages) + int64 bytes_per_thread; // thread working set size (bytes) + int64 chains_per_thread;// memory loading per thread + int64 num_threads; // number of threads in the experiment + int64 bytes_per_test; // test working set size (bytes) + int64 busy_cycles; // processing cycles + bool prefetch; // use of prefetching + + float seconds; // number of seconds per experiment + int64 iterations; // number of iterations per experiment + int64 experiments; // number of experiments per test + + enum { CSV, BOTH, HEADER, TABLE } + output_mode; // results output mode + + enum { RANDOM, STRIDED, STREAM } + access_pattern; // memory access pattern + int64 stride; + + enum { LOCAL, XOR, ADD, MAP } + numa_placement; // memory allocation mode + int64 offset_or_mask; + char* placement_map; + + // maps threads and chains to numa domains + int32* thread_domain; // thread_domain[thread] + int32** chain_domain; // chain_domain[thread][chain] + int32 numa_max_domain; // highest numa domain id + int32 num_numa_domains; // number of numa domains + + char** random_state; // random state for each thread + + int strict; // strictly adhere to user input, or fail + + const static int32 DEFAULT_POINTER_SIZE = sizeof(Chain); + const static int32 DEFAULT_BYTES_PER_LINE = 64; + const static int32 DEFAULT_LINKS_PER_LINE = DEFAULT_BYTES_PER_LINE / DEFAULT_POINTER_SIZE; + const static int32 DEFAULT_BYTES_PER_PAGE = 4096; + const static int32 DEFAULT_LINES_PER_PAGE = DEFAULT_BYTES_PER_PAGE / DEFAULT_BYTES_PER_LINE; + const static int32 DEFAULT_LINKS_PER_PAGE = DEFAULT_LINES_PER_PAGE * DEFAULT_LINKS_PER_LINE; + const static int32 DEFAULT_PAGES_PER_CHAIN = 4096; + const static int32 DEFAULT_BYTES_PER_CHAIN = DEFAULT_BYTES_PER_PAGE * DEFAULT_PAGES_PER_CHAIN; + const static int32 DEFAULT_LINES_PER_CHAIN = DEFAULT_LINES_PER_PAGE * DEFAULT_PAGES_PER_CHAIN; + const static int32 DEFAULT_LINKS_PER_CHAIN = DEFAULT_LINES_PER_CHAIN * DEFAULT_BYTES_PER_LINE / DEFAULT_POINTER_SIZE; + const static int32 DEFAULT_CHAINS_PER_THREAD = 1; + const static int32 DEFAULT_BYTES_PER_THREAD = DEFAULT_BYTES_PER_CHAIN * DEFAULT_CHAINS_PER_THREAD; + const static int32 DEFAULT_THREADS = 1; + const static int32 DEFAULT_BYTES_PER_TEST = DEFAULT_BYTES_PER_THREAD * DEFAULT_THREADS; + const static int32 DEFAULT_BUSY_CYCLES = 0; + const static int32 DEFAULT_SECONDS = 1; + const static int32 DEFAULT_ITERATIONS = 0; + const static int32 DEFAULT_EXPERIMENTS = 1; + + const static int32 DEFAULT_OUTPUT_MODE = 1; + const static bool DEFAULT_PREFETCH = false; + + void alloc_local(); + void alloc_xor(); + void alloc_add(); + void alloc_map(); + + void print(); + +private: +}; + +#endif diff --git a/src/lock.cpp b/src/lock.cpp new file mode 100644 index 0000000..5c37bea --- /dev/null +++ b/src/lock.cpp @@ -0,0 +1,46 @@ +/******************************************************************************* + * Copyright (c) 2006 International Business Machines Corporation. * + * All rights reserved. This program and the accompanying materials * + * are made available under the terms of the Common Public License v1.0 * + * which accompanies this distribution, and is available at * + * http://www.opensource.org/licenses/cpl1.0.php * + * * + * Contributors: * + * Douglas M. Pase - initial API and implementation * + *******************************************************************************/ + +// +// Configuration +// + +// Implementation header +#include "lock.h" + +// System includes +#include + + +// +// Implementation +// + +Lock::Lock() { + pthread_mutex_init(&(this->mutex), NULL); +} + +Lock::~Lock() { + pthread_mutex_destroy(&(this->mutex)); +} + +void Lock::lock() { + pthread_mutex_lock(&(this->mutex)); +} + +int Lock::test() { + pthread_mutex_trylock(&(this->mutex)); +} + +void Lock::unlock() { + pthread_mutex_unlock(&(this->mutex)); +} + diff --git a/src/lock.h b/src/lock.h new file mode 100644 index 0000000..69a0c8e --- /dev/null +++ b/src/lock.h @@ -0,0 +1,40 @@ +/******************************************************************************* + * Copyright (c) 2006 International Business Machines Corporation. * + * All rights reserved. This program and the accompanying materials * + * are made available under the terms of the Common Public License v1.0 * + * which accompanies this distribution, and is available at * + * http://www.opensource.org/licenses/cpl1.0.php * + * * + * Contributors: * + * Douglas M. Pase - initial API and implementation * + *******************************************************************************/ + +// +// Configuration +// + +// Include guard +#if !defined(LOCK_H) +#define LOCK_H + +// System includes +#include + + +// +// Class definition +// + +class Lock { +public: + Lock(); + ~Lock(); + void lock(); + int test(); + void unlock(); + +private: + pthread_mutex_t mutex; +}; + +#endif diff --git a/src/main.cpp b/src/main.cpp new file mode 100644 index 0000000..7e297d8 --- /dev/null +++ b/src/main.cpp @@ -0,0 +1,95 @@ +/******************************************************************************* + * Copyright (c) 2006 International Business Machines Corporation. * + * All rights reserved. This program and the accompanying materials * + * are made available under the terms of the Common Public License v1.0 * + * which accompanies this distribution, and is available at * + * http://www.opensource.org/licenses/cpl1.0.php * + * * + * Contributors: * + * Douglas M. Pase - initial API and implementation * + *******************************************************************************/ + +// +// Configuration +// + +// System includes +#include + +// Local includes +#include "run.h" +#include "timer.h" +#include "types.h" +#include "output.h" +#include "experiment.h" + +// This program allocates and accesses +// a number of blocks of memory, one or more +// for each thread that executes. Blocks +// are divided into sub-blocks called +// pages, and pages are divided into +// sub-blocks called cache lines. +// +// All pages are collected into a list. +// Pages are selected for the list in +// a particular order. Each cache line +// within the page is similarly gathered +// into a list in a particular order. +// In both cases the order may be random +// or linear. +// +// A root pointer points to the first +// cache line. A pointer in the cache +// line points to the next cache line, +// which contains a pointer to the cache +// line after that, and so on. This +// forms a pointer chain that touches all +// cache lines within the first page, +// then all cache lines within the second +// page, and so on until all pages are +// covered. The last pointer contains +// NULL, terminating the chain. +// +// Depending on compile-time options, +// pointers may be 32-bit or 64-bit +// pointers. + + +// +// Implementation +// + +int verbose = 0; + +int main(int argc, char* argv[]) { + Timer::calibrate(10000); + double clk_res = Timer::resolution(); + + Experiment e; + if (e.parse_args(argc, argv)) { + return 0; + } + +#if defined(UNDEFINED) + e.print(); + if (argv != NULL) return 0; +#endif + + SpinBarrier sb(e.num_threads); + Run r[e.num_threads]; + for (int i = 0; i < e.num_threads; i++) { + r[i].set(e, &sb); + r[i].start(); + } + + for (int i = 0; i < e.num_threads; i++) { + r[i].wait(); + } + + int64 ops = Run::ops_per_chain(); + double secs = Run::seconds(); + + Output::print(e, ops, secs, clk_res); + + return 0; +} diff --git a/src/output.cpp b/src/output.cpp new file mode 100644 index 0000000..0fcc74d --- /dev/null +++ b/src/output.cpp @@ -0,0 +1,151 @@ +/******************************************************************************* + * Copyright (c) 2006 International Business Machines Corporation. * + * All rights reserved. This program and the accompanying materials * + * are made available under the terms of the Common Public License v1.0 * + * which accompanies this distribution, and is available at * + * http://www.opensource.org/licenses/cpl1.0.php * + * * + * Contributors: * + * Douglas M. Pase - initial API and implementation * + *******************************************************************************/ + +// +// Configuration +// + +// Implementation header +#include "output.h" + +// System includes +#include +#include +#include + + +// +// Implementation +// + +void Output::print(Experiment &e, int64 ops, double secs, double ck_res) { + if (e.output_mode == Experiment::CSV) { + Output::csv(e, ops, secs, ck_res); + } else if (e.output_mode == Experiment::BOTH) { + Output::header(e, ops, secs, ck_res); + Output::csv(e, ops, secs, ck_res); + } else if (e.output_mode == Experiment::HEADER) { + Output::header(e, ops, secs, ck_res); + } else { + Output::table(e, ops, secs, ck_res); + } +} + +void Output::header(Experiment &e, int64 ops, double secs, double ck_res) { + printf("pointer size (bytes),"); + printf("cache line size (bytes),"); + printf("page size (bytes),"); + printf("chain size (bytes),"); + printf("thread size (bytes),"); + printf("test size (bytes),"); + printf("chains per thread,"); + printf("number of threads,"); + printf("iterations,"); + printf("experiments,"); + printf("access pattern,"); + printf("stride,"); + printf("numa placement,"); + printf("offset or mask,"); + printf("numa domains,"); + printf("domain map,"); + printf("operations per chain,"); + printf("total operations,"); + printf("elapsed time (seconds),"); + printf("elapsed time (timer ticks),"); + printf("clock resolution (ns),", ck_res * 1E9); + printf("memory latency (ns),"); + printf("memory bandwidth (MB/s)\n"); + + fflush(stdout); +} + +void Output::csv(Experiment &e, int64 ops, double secs, double ck_res) { + printf("%ld,", e.pointer_size); + printf("%ld,", e.bytes_per_line); + printf("%ld,", e.bytes_per_page); + printf("%ld,", e.bytes_per_chain); + printf("%ld,", e.bytes_per_thread); + printf("%ld,", e.bytes_per_test); + printf("%lld,", e.chains_per_thread); + printf("%ld,", e.num_threads); + printf("%ld,", e.iterations); + printf("%ld,", e.experiments); + printf("%s,", e.access()); + printf("%ld,", e.stride); + printf("%s,", e.placement()); + printf("%ld,", e.offset_or_mask); + printf("%ld,", e.num_numa_domains); + printf("\""); + printf("%d:", e.thread_domain[0]); + printf("%d", e.chain_domain[0][0]); + for (int j = 1; j < e.chains_per_thread; j++) { + printf(",%d", e.chain_domain[0][j]); + } + for (int i = 1; i < e.num_threads; i++) { + printf(";%d:", e.thread_domain[i]); + printf("%d", e.chain_domain[i][0]); + for (int j = 1; j < e.chains_per_thread; j++) { + printf(",%d", e.chain_domain[i][j]); + } + } + printf("\","); + printf("%ld,", ops); + printf("%ld,", ops * e.chains_per_thread * e.num_threads); + printf("%.3f,", secs); + printf("%.0f,", secs/ck_res); + printf("%.2f,", ck_res * 1E9); + printf("%.2f,", (secs / (ops * e.iterations)) * 1E9); + printf("%.3f\n", ((ops * e.iterations * e.chains_per_thread * e.num_threads * e.bytes_per_line) / secs) * 1E-6); + + fflush(stdout); +} + +void Output::table(Experiment &e, int64 ops, double secs, double ck_res) { + printf("pointer size = %ld (bytes)\n", e.pointer_size); + printf("cache line size = %ld (bytes)\n", e.bytes_per_line); + printf("page size = %ld (bytes)\n", e.bytes_per_page); + printf("chain size = %ld (bytes)\n", e.bytes_per_chain); + printf("thread size = %ld (bytes)\n", e.bytes_per_thread); + printf("test size = %ld (bytes)\n", e.bytes_per_test); + printf("chains per thread = %ld\n", e.chains_per_thread); + printf("number of threads = %ld\n", e.num_threads); + printf("iterations = %ld\n", e.iterations); + printf("experiments = %ld\n", e.experiments); + printf("access pattern = %s\n", e.access()); + printf("stride = %ld\n", e.stride); + printf("numa placement = %s\n", e.placement()); + printf("offset or mask = %ld\n", e.offset_or_mask); + printf("numa domains = %ld\n", e.num_numa_domains); + printf("domain map = "); + printf("\""); + printf("%d:", e.thread_domain[0]); + printf("%d", e.chain_domain[0][0]); + for (int j = 1; j < e.chains_per_thread; j++) { + printf(",%d", e.chain_domain[0][j]); + } + for (int i = 1; i < e.num_threads; i++) { + printf(";%d:", e.thread_domain[i]); + printf("%d", e.chain_domain[i][0]); + for (int j = 1; j < e.chains_per_thread; j++) { + printf(",%d", e.chain_domain[i][j]); + } + } + printf("\"\n"); + printf("operations per chain = %ld\n", ops); + printf("total operations = %ld\n", ops * e.chains_per_thread * e.num_threads); + printf("elapsed time = %.3f (seconds)\n", secs); + printf("elapsed time = %.0f (timer ticks)\n", secs/ck_res); + printf("clock resolution = %.2f (ns)\n", ck_res * 1E9); + printf("memory latency = %.2f (ns)\n", (secs / (ops * e.iterations)) * 1E9); + printf("memory bandwidth = %.3f (MB/s)\n", ((ops * e.iterations * e.chains_per_thread * e.num_threads * e.bytes_per_line) / secs) * 1E-6); + + fflush(stdout); +} diff --git a/src/output.h b/src/output.h new file mode 100644 index 0000000..2229b33 --- /dev/null +++ b/src/output.h @@ -0,0 +1,38 @@ +/******************************************************************************* + * Copyright (c) 2006 International Business Machines Corporation. * + * All rights reserved. This program and the accompanying materials * + * are made available under the terms of the Common Public License v1.0 * + * which accompanies this distribution, and is available at * + * http://www.opensource.org/licenses/cpl1.0.php * + * * + * Contributors: * + * Douglas M. Pase - initial API and implementation * + *******************************************************************************/ + +// +// Configuration +// + +// Include guard +#if !defined(OUTPUT_H) +#define OUTPUT_H + +// Local includes +#include "types.h" +#include "experiment.h" + + +// +// Class definition +// + +class Output { +public: + static void print(Experiment &e, int64 ops, double secs, double ck_res); + static void header(Experiment &e, int64 ops, double secs, double ck_res); + static void csv(Experiment &e, int64 ops, double secs, double ck_res); + static void table(Experiment &e, int64 ops, double secs, double ck_res); +private: +}; + +#endif diff --git a/src/run.cpp b/src/run.cpp new file mode 100644 index 0000000..54ef7c1 --- /dev/null +++ b/src/run.cpp @@ -0,0 +1,792 @@ +/******************************************************************************* + * Copyright (c) 2006 International Business Machines Corporation. * + * All rights reserved. This program and the accompanying materials * + * are made available under the terms of the Common Public License v1.0 * + * which accompanies this distribution, and is available at * + * http://www.opensource.org/licenses/cpl1.0.php * + * * + * Contributors: * + * Douglas M. Pase - initial API and implementation * + *******************************************************************************/ + +// +// Configuration +// + +// Implementation header +#include "run.h" + +// System includes +#include +#include +#include +#include +#include +#if defined(NUMA) +#include +#endif + +// Local includes +#include +#include "timer.h" + + +// +// Implementation +// + +static double max(double v1, double v2); +static double min(double v1, double v2); +typedef void (*benchmark)(const Chain**); +typedef benchmark (*generator)(int64 chains_per_thread, + int64 bytes_per_line, int64 bytes_per_chain, + int64 stride, int64 busy_cycles, bool prefetch); +static benchmark chase_pointers(int64 chains_per_thread, + int64 bytes_per_line, int64 bytes_per_chain, + int64 stride, int64 busy_cycles, bool prefetch); +static benchmark follow_streams(int64 chains_per_thread, + int64 bytes_per_line, int64 bytes_per_chain, + int64 stride, int64 busy_cycles, bool prefetch); + +Lock Run::global_mutex; +int64 Run::_ops_per_chain = 0; +double Run::_seconds = 1E9; + +Run::Run() : + exp(NULL), bp(NULL) { +} + +Run::~Run() { +} + +void Run::set(Experiment &e, SpinBarrier* sbp) { + this->exp = &e; + this->bp = sbp; +} + +int Run::run() { + // first allocate all memory for the chains, + // making sure it is allocated within the + // intended numa domains + Chain** chain_memory = new Chain*[this->exp->chains_per_thread]; + Chain** root = new Chain*[this->exp->chains_per_thread]; + +#if defined(NUMA) + // establish the node id where this thread + // will run. threads are mapped to nodes + // by the set-up code for Experiment. + int run_node_id = this->exp->thread_domain[this->thread_id()]; + numa_run_on_node(run_node_id); + + // establish the node id where this thread's + // memory will be allocated. + for (int i=0; i < this->exp->chains_per_thread; i++) { + int alloc_node_id = this->exp->chain_domain[this->thread_id()][i]; + nodemask_t alloc_mask; + nodemask_zero(&alloc_mask); + nodemask_set(&alloc_mask, alloc_node_id); + numa_set_membind(&alloc_mask); + + chain_memory[i] = new Chain[ this->exp->links_per_chain ]; + } +#else + for (int i = 0; i < this->exp->chains_per_thread; i++) { + chain_memory[i] = new Chain[this->exp->links_per_chain]; + } +#endif + + // initialize the chains and + // compile the function that + // will execute the tests + generator gen; + for (int i = 0; i < this->exp->chains_per_thread; i++) { + if (this->exp->access_pattern == Experiment::RANDOM) { + root[i] = random_mem_init(chain_memory[i]); + gen = chase_pointers; + } else if (this->exp->access_pattern == Experiment::STRIDED) { + if (0 < this->exp->stride) { + root[i] = forward_mem_init(chain_memory[i]); + } else { + root[i] = reverse_mem_init(chain_memory[i]); + } + gen = chase_pointers; + } else if (this->exp->access_pattern == Experiment::STREAM) { + root[i] = stream_mem_init(chain_memory[i]); + gen = follow_streams; + } + } + + if (this->exp->iterations <= 0) { + // compile benchmark + benchmark bench = gen(this->exp->chains_per_thread, + this->exp->bytes_per_line, this->exp->bytes_per_chain, + this->exp->stride, this->exp->busy_cycles, + this->exp->prefetch); + + volatile static double istart = 0; + volatile static double istop = 0; + volatile static double elapsed = 0; + volatile static int64 iters = 1; + volatile double bound = max(0.2, 10 * Timer::resolution()); + for (iters = 1; elapsed <= bound; iters = iters << 1) { + // barrier + this->bp->barrier(); + + // start timer + if (this->thread_id() == 0) { + istart = Timer::seconds(); + } + this->bp->barrier(); + + // chase pointers + for (int i = 0; i < iters; i++) + bench((const Chain**) root); + + // barrier + this->bp->barrier(); + + // stop timer + if (this->thread_id() == 0) { + istop = Timer::seconds(); + elapsed = istop - istart; + } + this->bp->barrier(); + } + + // calculate the number of iterations + if (this->thread_id() == 0) { + if (0 < this->exp->seconds) { + this->exp->iterations = max(1, + 0.9999 + 0.5 * this->exp->seconds * iters / elapsed); + } else { + this->exp->iterations = max(1, 0.9999 + iters / elapsed); + } + } + this->bp->barrier(); + } +#if defined(UNDEFINED) +#endif + + // compile benchmark + benchmark bench = gen(this->exp->chains_per_thread, + this->exp->bytes_per_line, this->exp->bytes_per_chain, + this->exp->stride, this->exp->busy_cycles, + this->exp->prefetch); + + for (int e = 0; e < this->exp->experiments; e++) { + // barrier + this->bp->barrier(); + + // start timer + double start = 0; + if (this->thread_id() == 0) + start = Timer::seconds(); + this->bp->barrier(); + + // chase pointers + for (int i = 0; i < this->exp->iterations; i++) + bench((const Chain**) root); + + // barrier + this->bp->barrier(); + + // stop timer + double stop = 0; + if (this->thread_id() == 0) + stop = Timer::seconds(); + this->bp->barrier(); + + if (0 <= e) { + if (this->thread_id() == 0) { + double delta = stop - start; + if (0 < delta) { + Run::_seconds = min(Run::_seconds, delta); + } + } + } + } + + this->bp->barrier(); + + for (int i = 0; i < this->exp->chains_per_thread; i++) { + if (chain_memory[i] != NULL + ) delete[] chain_memory[i]; + } + if (chain_memory != NULL + ) delete[] chain_memory; + + return 0; +} + +int dummy = 0; +void Run::mem_check(Chain *m) { + if (m == NULL + ) dummy += 1; +} + +static double max(double v1, double v2) { + if (v1 < v2) + return v2; + return v1; +} + +static double min(double v1, double v2) { + if (v2 < v1) + return v2; + return v1; +} + +// exclude 2 and Mersenne primes, i.e., +// primes of the form 2**n - 1, e.g., +// 3, 7, 31, 127 +static const int prime_table[] = { 5, 11, 13, 17, 19, 23, 37, 41, 43, 47, 53, + 61, 71, 73, 79, 83, 89, 97, 101, 103, 109, 113, 131, 137, 139, 149, 151, + 157, 163, }; +static const int prime_table_size = sizeof prime_table / sizeof prime_table[0]; + +Chain* +Run::random_mem_init(Chain *mem) { + // initialize pointers -- + // choose a page at random, then use + // one pointer from each cache line + // within the page. all pages and + // cache lines are chosen at random. + Chain* root = 0; + Chain* prev = 0; + int link_within_line = 0; + int64 local_ops_per_chain = 0; + + // we must set a lock because random() + // is not thread safe + Run::global_mutex.lock(); + setstate(this->exp->random_state[this->thread_id()]); + int page_factor = prime_table[random() % prime_table_size]; + int page_offset = random() % this->exp->pages_per_chain; + Run::global_mutex.unlock(); + + // loop through the pages + for (int i = 0; i < this->exp->pages_per_chain; i++) { + int page = (page_factor * i + page_offset) % this->exp->pages_per_chain; + Run::global_mutex.lock(); + setstate(this->exp->random_state[this->thread_id()]); + int line_factor = prime_table[random() % prime_table_size]; + int line_offset = random() % this->exp->lines_per_page; + Run::global_mutex.unlock(); + + // loop through the lines within a page + for (int j = 0; j < this->exp->lines_per_page; j++) { + int line_within_page = (line_factor * j + line_offset) + % this->exp->lines_per_page; + int link = page * this->exp->links_per_page + + line_within_page * this->exp->links_per_line + + link_within_line; + + if (root == 0) { +// printf("root = %d(%d)[0x%x].\n", page, line_within_page, mem+link); + prev = root = mem + link; + local_ops_per_chain += 1; + } else { +// printf("0x%x = %d(%d)[0x%x].\n", prev, page, line_within_page, mem+link); + prev->next = mem + link; + prev = prev->next; + local_ops_per_chain += 1; + } + } + } + + prev->next = root; + + Run::global_mutex.lock(); + Run::_ops_per_chain = local_ops_per_chain; + Run::global_mutex.unlock(); + + return root; +} + +Chain* +Run::forward_mem_init(Chain *mem) { + Chain* root = 0; + Chain* prev = 0; + int link_within_line = 0; + int64 local_ops_per_chain = 0; + + for (int i = 0; i < this->exp->lines_per_chain; i += this->exp->stride) { + int link = i * this->exp->links_per_line + link_within_line; + if (root == NULL) { +// printf("root = %d(%d)[0x%x].\n", page, line_within_page, mem+link); + prev = root = mem + link; + local_ops_per_chain += 1; + } else { +// printf("0x%x = %d(%d)[0x%x].\n", prev, page, line_within_page, mem+link); + prev->next = mem + link; + prev = prev->next; + local_ops_per_chain += 1; + } + } + + prev->next = root; + + Run::global_mutex.lock(); + Run::_ops_per_chain = local_ops_per_chain; + Run::global_mutex.unlock(); + + return root; +} + +Chain* +Run::reverse_mem_init(Chain *mem) { + Chain* root = 0; + Chain* prev = 0; + int link_within_line = 0; + int64 local_ops_per_chain = 0; + + int stride = -this->exp->stride; + int last; + for (int i = 0; i < this->exp->lines_per_chain; i += stride) { + last = i; + } + + for (int i = last; 0 <= i; i -= stride) { + int link = i * this->exp->links_per_line + link_within_line; + if (root == 0) { +// printf("root = %d(%d)[0x%x].\n", page, line_within_page, mem+link); + prev = root = mem + link; + local_ops_per_chain += 1; + } else { +// printf("0x%x = %d(%d)[0x%x].\n", prev, page, line_within_page, mem+link); + prev->next = mem + link; + prev = prev->next; + local_ops_per_chain += 1; + } + } + + prev->next = root; + + Run::global_mutex.lock(); + Run::_ops_per_chain = local_ops_per_chain; + Run::global_mutex.unlock(); + + return root; +} + +static benchmark chase_pointers(int64 chains_per_thread, // memory loading per thread + int64 bytes_per_line, // ignored + int64 bytes_per_chain, // ignored + int64 stride, // ignored + int64 busy_cycles, // processing cycles + bool prefetch // prefetch? + ) { + // Create Compiler. + AsmJit::Compiler c; + + // Tell compiler the function prototype we want. It allocates variables representing + // function arguments that can be accessed through Compiler or Function instance. + c.newFunction(AsmJit::CALL_CONV_DEFAULT, AsmJit::FunctionBuilder1()); + + // Try to generate function without prolog/epilog code: + c.getFunction()->setHint(AsmJit::FUNCTION_HINT_NAKED, true); + + // Create labels. + AsmJit::Label L_Loop = c.newLabel(); + + // Function arguments. + AsmJit::GPVar chain(c.argGP(0)); + + // Save the head + std::vector heads(chains_per_thread); + for (int i = 0; i < chains_per_thread; i++) { + AsmJit::GPVar head = c.newGP(); + c.mov(head, ptr(chain)); + heads[i] = head; + } + + // Current position + std::vector positions(chains_per_thread); + for (int i = 0; i < chains_per_thread; i++) { + AsmJit::GPVar position = c.newGP(); + c.mov(position, heads[0]); + positions[i] = position; + } + + // Loop. + c.bind(L_Loop); + + // Process all links + for (int i = 0; i < chains_per_thread; i++) { + // Chase pointer + c.mov(positions[i], ptr(positions[i], offsetof(Chain, next))); + + // Prefetch next + if (prefetch) + c.prefetch(ptr(positions[i]), AsmJit::PREFETCH_T0); + } + + // Wait + for (int i = 0; i < busy_cycles; i++) + c.nop(); + + // Test if end reached + c.cmp(heads[0], positions[0]); + c.jne(L_Loop); + + // Finish. + c.endFunction(); + + // Make JIT function. + benchmark fn = AsmJit::function_cast(c.make()); + + // Ensure that everything is ok. + if (!fn) { + printf("Error making jit function (%u).\n", c.getError()); + return 0; + } + + return fn; +} + +// NOT WRITTEN YET -- DMP +// JUST A PLACE HOLDER! +Chain* Run::stream_mem_init(Chain *mem) { +// fprintf(stderr, "made it into stream_mem_init.\n"); +// fprintf(stderr, "chains_per_thread = %ld\n", this->exp->chains_per_thread); +// fprintf(stderr, "iterations = %ld\n", this->exp->iterations); +// fprintf(stderr, "bytes_per_chain = %ld\n", this->exp->bytes_per_chain); +// fprintf(stderr, "stride = %ld\n", this->exp->stride); + int64 local_ops_per_chain = 0; + double* tmp = (double *) mem; + int64 refs_per_line = this->exp->bytes_per_line / sizeof(double); + int64 refs_per_chain = this->exp->bytes_per_chain / sizeof(double); +// fprintf(stderr, "refs_per_chain = %ld\n", refs_per_chain); + + for (int64 i = 0; i < refs_per_chain; + i += this->exp->stride * refs_per_line) { + tmp[i] = 0; + local_ops_per_chain += 1; + } + + Run::global_mutex.lock(); + Run::_ops_per_chain = local_ops_per_chain; + Run::global_mutex.unlock(); + +// fprintf(stderr, "made it out of stream_mem_init.\n"); + return mem; +} + +static int64 summ_ck = 0; +void sum_chk(double t) { + if (t != 0) + summ_ck += 1; +} + +// NOT WRITTEN YET -- DMP +// JUST A PLACE HOLDER! +static benchmark follow_streams(int64 chains_per_thread, // memory loading per thread + int64 bytes_per_line, // ignored + int64 bytes_per_chain, // ignored + int64 stride, // ignored + int64 busy_cycles, // ignored + bool prefetch // ignored + ) { + return 0; + /* + int64 refs_per_line = bytes_per_line / sizeof(double); + int64 refs_per_chain = bytes_per_chain / sizeof(double); + + // chase pointers + switch (chains_per_thread) { + default: + case 1: + for (int64 i = 0; i < iterations; i++) { + double t = 0; + double* a0 = (double *) root[0]; + for (int64 j = 0; j < refs_per_chain; j += stride * refs_per_line) { + t += a0[j]; + } + sum_chk(t); + } + break; + case 2: + for (int64 i = 0; i < iterations; i++) { + double t = 0; + double* a0 = (double *) root[0]; + double* a1 = (double *) root[1]; + for (int64 j = 0; j < refs_per_chain; j += stride * refs_per_line) { + t += a0[j] + a1[j]; + } + sum_chk(t); + } + break; + case 3: + for (int64 i = 0; i < iterations; i++) { + double t = 0; + double* a0 = (double *) root[0]; + double* a1 = (double *) root[1]; + double* a2 = (double *) root[2]; + for (int64 j = 0; j < refs_per_chain; j += stride * refs_per_line) { + t += a0[j] + a1[j] + a2[j]; + } + sum_chk(t); + } + break; + case 4: + for (int64 i = 0; i < iterations; i++) { + double t = 0; + double* a0 = (double *) root[0]; + double* a1 = (double *) root[1]; + double* a2 = (double *) root[2]; + double* a3 = (double *) root[3]; + for (int64 j = 0; j < refs_per_chain; j += stride * refs_per_line) { + t += a0[j] + a1[j] + a2[j] + a3[j]; + } + sum_chk(t); + } + break; + case 5: + for (int64 i = 0; i < iterations; i++) { + double t = 0; + double* a0 = (double *) root[0]; + double* a1 = (double *) root[1]; + double* a2 = (double *) root[2]; + double* a3 = (double *) root[3]; + double* a4 = (double *) root[4]; + for (int64 j = 0; j < refs_per_chain; j += stride * refs_per_line) { + t += a0[j] + a1[j] + a2[j] + a3[j] + a4[j]; + } + sum_chk(t); + } + break; + case 6: + for (int64 i = 0; i < iterations; i++) { + double t = 0; + double* a0 = (double *) root[0]; + double* a1 = (double *) root[1]; + double* a2 = (double *) root[2]; + double* a3 = (double *) root[3]; + double* a4 = (double *) root[4]; + double* a5 = (double *) root[5]; + for (int64 j = 0; j < refs_per_chain; j += stride * refs_per_line) { + t += a0[j] + a1[j] + a2[j] + a3[j] + a4[j] + a5[j]; + } + sum_chk(t); + } + break; + case 7: + for (int64 i = 0; i < iterations; i++) { + double t = 0; + double* a0 = (double *) root[0]; + double* a1 = (double *) root[1]; + double* a2 = (double *) root[2]; + double* a3 = (double *) root[3]; + double* a4 = (double *) root[4]; + double* a5 = (double *) root[5]; + double* a6 = (double *) root[6]; + for (int64 j = 0; j < refs_per_chain; j += stride * refs_per_line) { + t += a0[j] + a1[j] + a2[j] + a3[j] + a4[j] + a5[j] + a6[j]; + } + sum_chk(t); + } + break; + case 8: + for (int64 i = 0; i < iterations; i++) { + double t = 0; + double* a0 = (double *) root[0]; + double* a1 = (double *) root[1]; + double* a2 = (double *) root[2]; + double* a3 = (double *) root[3]; + double* a4 = (double *) root[4]; + double* a5 = (double *) root[5]; + double* a6 = (double *) root[6]; + double* a7 = (double *) root[7]; + for (int64 j = 0; j < refs_per_chain; j += stride * refs_per_line) { + t += a0[j] + a1[j] + a2[j] + a3[j] + a4[j] + a5[j] + a6[j] + + a7[j]; + } + sum_chk(t); + } + break; + case 9: + for (int64 i = 0; i < iterations; i++) { + double t = 0; + double* a0 = (double *) root[0]; + double* a1 = (double *) root[1]; + double* a2 = (double *) root[2]; + double* a3 = (double *) root[3]; + double* a4 = (double *) root[4]; + double* a5 = (double *) root[5]; + double* a6 = (double *) root[6]; + double* a7 = (double *) root[7]; + double* a8 = (double *) root[8]; + for (int64 j = 0; j < refs_per_chain; j += stride * refs_per_line) { + t += a0[j] + a1[j] + a2[j] + a3[j] + a4[j] + a5[j] + a6[j] + + a7[j] + a8[j]; + } + sum_chk(t); + } + break; + case 10: + for (int64 i = 0; i < iterations; i++) { + double t = 0; + double* a0 = (double *) root[0]; + double* a1 = (double *) root[1]; + double* a2 = (double *) root[2]; + double* a3 = (double *) root[3]; + double* a4 = (double *) root[4]; + double* a5 = (double *) root[5]; + double* a6 = (double *) root[6]; + double* a7 = (double *) root[7]; + double* a8 = (double *) root[8]; + double* a9 = (double *) root[9]; + for (int64 j = 0; j < refs_per_chain; j += stride * refs_per_line) { + t += a0[j] + a1[j] + a2[j] + a3[j] + a4[j] + a5[j] + a6[j] + + a7[j] + a8[j] + a9[j]; + } + sum_chk(t); + } + break; + case 11: + for (int64 i = 0; i < iterations; i++) { + double t = 0; + double* a0 = (double *) root[0]; + double* a1 = (double *) root[1]; + double* a2 = (double *) root[2]; + double* a3 = (double *) root[3]; + double* a4 = (double *) root[4]; + double* a5 = (double *) root[5]; + double* a6 = (double *) root[6]; + double* a7 = (double *) root[7]; + double* a8 = (double *) root[8]; + double* a9 = (double *) root[9]; + double* a10 = (double *) root[10]; + for (int64 j = 0; j < refs_per_chain; j += stride * refs_per_line) { + t += a0[j] + a1[j] + a2[j] + a3[j] + a4[j] + a5[j] + a6[j] + + a7[j] + a8[j] + a9[j] + a10[j]; + } + sum_chk(t); + } + break; + case 12: + for (int64 i = 0; i < iterations; i++) { + double t = 0; + double* a0 = (double *) root[0]; + double* a1 = (double *) root[1]; + double* a2 = (double *) root[2]; + double* a3 = (double *) root[3]; + double* a4 = (double *) root[4]; + double* a5 = (double *) root[5]; + double* a6 = (double *) root[6]; + double* a7 = (double *) root[7]; + double* a8 = (double *) root[8]; + double* a9 = (double *) root[9]; + double* a10 = (double *) root[10]; + double* a11 = (double *) root[11]; + for (int64 j = 0; j < refs_per_chain; j += stride * refs_per_line) { + t += a0[j] + a1[j] + a2[j] + a3[j] + a4[j] + a5[j] + a6[j] + + a7[j] + a8[j] + a9[j] + a10[j] + a11[j]; + } + sum_chk(t); + } + break; + case 13: + for (int64 i = 0; i < iterations; i++) { + double t = 0; + double* a0 = (double *) root[0]; + double* a1 = (double *) root[1]; + double* a2 = (double *) root[2]; + double* a3 = (double *) root[3]; + double* a4 = (double *) root[4]; + double* a5 = (double *) root[5]; + double* a6 = (double *) root[6]; + double* a7 = (double *) root[7]; + double* a8 = (double *) root[8]; + double* a9 = (double *) root[9]; + double* a10 = (double *) root[10]; + double* a11 = (double *) root[11]; + double* a12 = (double *) root[12]; + for (int64 j = 0; j < refs_per_chain; j += stride * refs_per_line) { + t += a0[j] + a1[j] + a2[j] + a3[j] + a4[j] + a5[j] + a6[j] + + a7[j] + a8[j] + a9[j] + a10[j] + a11[j] + a12[j]; + } + sum_chk(t); + } + break; + case 14: + for (int64 i = 0; i < iterations; i++) { + double t = 0; + double* a0 = (double *) root[0]; + double* a1 = (double *) root[1]; + double* a2 = (double *) root[2]; + double* a3 = (double *) root[3]; + double* a4 = (double *) root[4]; + double* a5 = (double *) root[5]; + double* a6 = (double *) root[6]; + double* a7 = (double *) root[7]; + double* a8 = (double *) root[8]; + double* a9 = (double *) root[9]; + double* a10 = (double *) root[10]; + double* a11 = (double *) root[11]; + double* a12 = (double *) root[12]; + double* a13 = (double *) root[13]; + for (int64 j = 0; j < refs_per_chain; j += stride * refs_per_line) { + t += a0[j] + a1[j] + a2[j] + a3[j] + a4[j] + a5[j] + a6[j] + + a7[j] + a8[j] + a9[j] + a10[j] + a11[j] + a12[j] + + a13[j]; + } + sum_chk(t); + } + break; + case 15: + for (int64 i = 0; i < iterations; i++) { + double t = 0; + double* a0 = (double *) root[0]; + double* a1 = (double *) root[1]; + double* a2 = (double *) root[2]; + double* a3 = (double *) root[3]; + double* a4 = (double *) root[4]; + double* a5 = (double *) root[5]; + double* a6 = (double *) root[6]; + double* a7 = (double *) root[7]; + double* a8 = (double *) root[8]; + double* a9 = (double *) root[9]; + double* a10 = (double *) root[10]; + double* a11 = (double *) root[11]; + double* a12 = (double *) root[12]; + double* a13 = (double *) root[13]; + double* a14 = (double *) root[14]; + for (int64 j = 0; j < refs_per_chain; j += stride * refs_per_line) { + t += a0[j] + a1[j] + a2[j] + a3[j] + a4[j] + a5[j] + a6[j] + + a7[j] + a8[j] + a9[j] + a10[j] + a11[j] + a12[j] + + a13[j] + a14[j]; + } + sum_chk(t); + } + break; + case 16: + for (int64 i = 0; i < iterations; i++) { + double t = 0; + double* a0 = (double *) root[0]; + double* a1 = (double *) root[1]; + double* a2 = (double *) root[2]; + double* a3 = (double *) root[3]; + double* a4 = (double *) root[4]; + double* a5 = (double *) root[5]; + double* a6 = (double *) root[6]; + double* a7 = (double *) root[7]; + double* a8 = (double *) root[8]; + double* a9 = (double *) root[9]; + double* a10 = (double *) root[10]; + double* a11 = (double *) root[11]; + double* a12 = (double *) root[12]; + double* a13 = (double *) root[13]; + double* a14 = (double *) root[14]; + double* a15 = (double *) root[15]; + for (int64 j = 0; j < refs_per_chain; j += stride * refs_per_line) { + t += a0[j] + a1[j] + a2[j] + a3[j] + a4[j] + a5[j] + a6[j] + + a7[j] + a8[j] + a9[j] + a10[j] + a11[j] + a12[j] + + a13[j] + a14[j] + a15[j]; + } + sum_chk(t); + } + break; + } + */ +} diff --git a/src/run.h b/src/run.h new file mode 100644 index 0000000..dbc698f --- /dev/null +++ b/src/run.h @@ -0,0 +1,62 @@ +/******************************************************************************* + * Copyright (c) 2006 International Business Machines Corporation. * + * All rights reserved. This program and the accompanying materials * + * are made available under the terms of the Common Public License v1.0 * + * which accompanies this distribution, and is available at * + * http://www.opensource.org/licenses/cpl1.0.php * + * * + * Contributors: * + * Douglas M. Pase - initial API and implementation * + *******************************************************************************/ + +// +// Configuration +// + +// Include guard +#if !defined(RUN_H) +#define RUN_H + +// Local includes +#include "thread.h" +#include "lock.h" +#include "chain.h" +#include "types.h" +#include "experiment.h" +#include "spinbarrier.h" + + +// +// Class definition +// + +class Run: public Thread { +public: + Run(); + ~Run(); + int run(); + void set(Experiment &e, SpinBarrier* sbp); + + static int64 ops_per_chain() { + return _ops_per_chain; + } + static double seconds() { + return _seconds; + } + +private: + Experiment* exp; // experiment data + SpinBarrier* bp; // spin barrier used by all threads + + void mem_check(Chain *m); + Chain* random_mem_init(Chain *m); + Chain* forward_mem_init(Chain *m); + Chain* reverse_mem_init(Chain *m); + Chain* stream_mem_init(Chain *m); + + static Lock global_mutex; // global lock + static int64 _ops_per_chain; // total number of operations per chain + static double _seconds; // total number of seconds +}; + +#endif diff --git a/src/spinbarrier.cpp b/src/spinbarrier.cpp new file mode 100644 index 0000000..e7d3b70 --- /dev/null +++ b/src/spinbarrier.cpp @@ -0,0 +1,54 @@ +/******************************************************************************* + * Copyright (c) 2006 International Business Machines Corporation. * + * All rights reserved. This program and the accompanying materials * + * are made available under the terms of the Common Public License v1.0 * + * which accompanies this distribution, and is available at * + * http://www.opensource.org/licenses/cpl1.0.php * + * * + * Contributors: * + * Douglas M. Pase - initial API and implementation * + *******************************************************************************/ + +/****************************************************************************** + * * + * SpinBarrier * + * * + * Author: Douglas M. Pase * + * * + * Date: September 21, 2000 * + * Translated to C++, June 19, 2005 * + * * + * void barrier() * + * * + ******************************************************************************/ + +// +// Configuration +// + +// Implementation header +#include "spinbarrier.h" + +// System includes +#include + + +// +// Implementation +// + +// create a new barrier +SpinBarrier::SpinBarrier(int participants) : + limit(participants) { + pthread_barrier_init(&barrier_obj, NULL, this->limit); +} + +// destroy an old barrier +SpinBarrier::~SpinBarrier() { +} + +// enter the barrier and wait. everyone leaves +// when the last participant enters the barrier. +void SpinBarrier::barrier() { + pthread_barrier_wait(&this->barrier_obj); +} diff --git a/src/spinbarrier.h b/src/spinbarrier.h new file mode 100644 index 0000000..b0faaed --- /dev/null +++ b/src/spinbarrier.h @@ -0,0 +1,50 @@ +/******************************************************************************* + * Copyright (c) 2006 International Business Machines Corporation. * + * All rights reserved. This program and the accompanying materials * + * are made available under the terms of the Common Public License v1.0 * + * which accompanies this distribution, and is available at * + * http://www.opensource.org/licenses/cpl1.0.php * + * * + * Contributors: * + * Douglas M. Pase - initial API and implementation * + *******************************************************************************/ + +/****************************************************************************** + * * + * SpinBarrier * + * * + * Author: Douglas M. Pase * + * * + * Date: September 21, 2000 * + * Translated to C++, June 19, 2005 * + * Rewritten August 13,2005 * + * * + * void barrier() * + * * + ******************************************************************************/ + +// Include guard +#if !defined(SPINBARRIER_H) +#define SPINBARRIER_H + +// System includes +#include + + +// +// Class definition +// + +class SpinBarrier { +public: + SpinBarrier(int participants); + ~SpinBarrier(); + + void barrier(); + +private: + int limit; // number of barrier participants + pthread_barrier_t barrier_obj; +}; + +#endif diff --git a/src/thread.cpp b/src/thread.cpp new file mode 100644 index 0000000..54902cc --- /dev/null +++ b/src/thread.cpp @@ -0,0 +1,77 @@ +/******************************************************************************* + * Copyright (c) 2006 International Business Machines Corporation. * + * All rights reserved. This program and the accompanying materials * + * are made available under the terms of the Common Public License v1.0 * + * which accompanies this distribution, and is available at * + * http://www.opensource.org/licenses/cpl1.0.php * + * * + * Contributors: * + * Douglas M. Pase - initial API and implementation * + *******************************************************************************/ + +// +// Configuration +// + +// Implementation header +#include "thread.h" + +// System includes +#include +#include +#include + +Lock Thread::_global_lock; +int Thread::count = 0; + + +// +// Implementation +// + +Thread::Thread() { + Thread::global_lock(); + this->id = Thread::count; + Thread::count += 1; + Thread::global_unlock(); +} + +Thread::~Thread() { +} + +int Thread::start() { + return pthread_create(&this->thread, NULL, Thread::start_routine, this); +} + +void* +Thread::start_routine(void* p) { + ((Thread*) p)->run(); + + return NULL; +} + +void Thread::exit() { + pthread_exit(NULL); +} + +int Thread::wait() { + pthread_join(this->thread, NULL); + + return 0; +} + +void Thread::lock() { + this->object_lock.lock(); +} + +void Thread::unlock() { + this->object_lock.unlock(); +} + +void Thread::global_lock() { + Thread::_global_lock.lock(); +} + +void Thread::global_unlock() { + Thread::_global_lock.unlock(); +} diff --git a/src/thread.h b/src/thread.h new file mode 100644 index 0000000..146bf92 --- /dev/null +++ b/src/thread.h @@ -0,0 +1,68 @@ +/******************************************************************************* + * Copyright (c) 2006 International Business Machines Corporation. * + * All rights reserved. This program and the accompanying materials * + * are made available under the terms of the Common Public License v1.0 * + * which accompanies this distribution, and is available at * + * http://www.opensource.org/licenses/cpl1.0.php * + * * + * Contributors: * + * Douglas M. Pase - initial API and implementation * + *******************************************************************************/ + +// +// Configuration +// + +// Include guard +#if !defined(THREAD_H) +#define THREAD_H + +// System includes +#include + +// Local includes +#include "lock.h" + + +// +// Class definition +// + +class Thread { +public: + Thread(); + ~Thread(); + + virtual int run() = 0; + + int start(); + int wait(); + int thread_count() { + return Thread::count; + } + int thread_id() { + return id; + } + + static void exit(); + +protected: + void lock(); + void unlock(); + static void global_lock(); + static void global_unlock(); + +private: + static void* start_routine(void *); + static Lock _global_lock; + + Lock object_lock; + + pthread_t thread; + + static int count; + int id; + int lock_obj; +}; + +#endif diff --git a/src/timer.cpp b/src/timer.cpp new file mode 100644 index 0000000..22015ff --- /dev/null +++ b/src/timer.cpp @@ -0,0 +1,168 @@ +/******************************************************************************* + * Copyright (c) 2006 International Business Machines Corporation. * + * All rights reserved. This program and the accompanying materials * + * are made available under the terms of the Common Public License v1.0 * + * which accompanies this distribution, and is available at * + * http://www.opensource.org/licenses/cpl1.0.php * + * * + * Contributors: * + * Douglas M. Pase - initial API and implementation * + *******************************************************************************/ + +// +// Configuration +// + +// Implementation header +#include "timer.h" + +// System includes +#include +#include + +static int64 read_rtc(); +static void calibrate_rtc(int n); +static double wall_seconds(); + +static int wall_ticks = -1; +static int rtc_ticks = -1; +static double wall_elapsed = -1; +static int64 rtc_elapsed = -1; +static double time_factor = -1; + +#if !defined(RTC) && !defined(GTOD) +#define RTC +#endif + + +// +// Implementation +// + +#if defined(RTC) + +double Timer::seconds() { + return (double) read_rtc() * time_factor; +} + +int64 Timer::ticks() { + // See pg. 406 of the AMD x86-64 Architecture + // Programmer's Manual, Volume 2, System Programming + unsigned int eax = 0, edx = 0; + + __asm__ __volatile__( + "rdtsc ;" + "movl %%eax,%0;" + "movl %%edx,%1;" + "" + : "=r"(eax), "=r"(edx) + : + : "%eax", "%edx" + ); + + return ((int64) edx << 32) | (int64) eax; +} + +static int64 read_rtc() { + // See pg. 406 of the AMD x86-64 Architecture + // Programmer's Manual, Volume 2, System Programming + unsigned int eax = 0, edx = 0; + + __asm__ __volatile__( + "rdtsc ;" + "movl %%eax,%0;" + "movl %%edx,%1;" + "" + : "=r"(eax), "=r"(edx) + : + : "%eax", "%edx" + ); + + return ((int64) edx << 32) | (int64) eax; +} + +void Timer::calibrate() { + Timer::calibrate(1000); +} + +void Timer::calibrate(int n) { + wall_ticks = n; + + double wall_start, wall_finish, t; + t = wall_seconds(); + while (t == (wall_start = wall_seconds())) { + ; + } + int64 rtc_start = read_rtc(); + for (int i = 0; i < wall_ticks; i++) { + t = wall_seconds(); + while (t == (wall_finish = wall_seconds())) { + ; + } + } + int64 rtc_finish = read_rtc(); + + wall_elapsed = wall_finish - wall_start; + rtc_elapsed = rtc_finish - rtc_start; + time_factor = wall_elapsed / (double) rtc_elapsed; +} + +static double wall_seconds() { + struct timeval t; + gettimeofday(&t, NULL); + + return (double) t.tv_sec + (double) t.tv_usec * 1E-6; +} + +#else + +double +Timer::seconds() +{ + struct timeval t; + gettimeofday(&t, NULL); + + return (double) t.tv_sec + (double) t.tv_usec * 1E-6; +} + +int64 +Timer::ticks() +{ + struct timeval t; + gettimeofday(&t, NULL); + + return 1000000 * (int64) t.tv_sec + (int64) t.tv_usec; +} + +void +Timer::calibrate() +{ +} + +void +Timer::calibrate(int n) +{ +} + +#endif + +static double min(double v1, double v2) { + if (v2 < v1) + return v2; + return v1; +} + +double Timer::resolution() { + double a, b, c = 1E9; + for (int i = 0; i < 10; i++) { + a = Timer::seconds(); + while (a == (b = Timer::seconds())) + ; + a = Timer::seconds(); + while (a == (b = Timer::seconds())) + ; + c = min(b - a, c); + } + + return c; +} diff --git a/src/timer.h b/src/timer.h new file mode 100644 index 0000000..d10c926 --- /dev/null +++ b/src/timer.h @@ -0,0 +1,38 @@ +/******************************************************************************* + * Copyright (c) 2006 International Business Machines Corporation. * + * All rights reserved. This program and the accompanying materials * + * are made available under the terms of the Common Public License v1.0 * + * which accompanies this distribution, and is available at * + * http://www.opensource.org/licenses/cpl1.0.php * + * * + * Contributors: * + * Douglas M. Pase - initial API and implementation * + *******************************************************************************/ + +// +// Configuration +// + +// Include guard +#if !defined(TIMER_H) +#define TIMER_H + +// Local includes +#include "types.h" + + +// +// Class definition +// + +class Timer { +public: + static double seconds(); + static double resolution(); + static int64 ticks(); + static void calibrate(); + static void calibrate(int n); +private: +}; + +#endif diff --git a/src/types.h b/src/types.h new file mode 100644 index 0000000..a92e481 --- /dev/null +++ b/src/types.h @@ -0,0 +1,38 @@ +/******************************************************************************* + * Copyright (c) 2006 International Business Machines Corporation. * + * All rights reserved. This program and the accompanying materials * + * are made available under the terms of the Common Public License v1.0 * + * which accompanies this distribution, and is available at * + * http://www.opensource.org/licenses/cpl1.0.php * + * * + * Contributors: * + * Douglas M. Pase - initial API and implementation * + *******************************************************************************/ + +// +// Configuration +// + +// Include guard +#if !defined(TYPES_H) +#define TYPES_H + + +// +// Type definitions +// + +typedef long long int64; +typedef int int32; +typedef short int16; +typedef char int8; + +typedef unsigned long long uint64; +typedef unsigned int uint32; +typedef unsigned short uint16; +typedef unsigned char uint8; + +typedef double float64; +typedef float float32; + +#endif -- cgit v1.2.3