From 122d27347290ff066635bc3005a332d9574bedb2 Mon Sep 17 00:00:00 2001 From: Tim Besard Date: Thu, 27 Oct 2011 16:51:48 +0200 Subject: Cleaning up the code a bit. --- CMakeLists.txt | 49 +++ Chain.C | 24 - Chain.h | 24 - Experiment.C | 592 ------------------------- Experiment.h | 103 ----- Lock.C | 45 -- Lock.h | 30 -- Main.C | 89 ---- Main.h | 18 - Makefile | 63 --- Output.C | 153 ------- Output.h | 28 -- Run.C | 1206 --------------------------------------------------- Run.h | 50 --- SpinBarrier.C | 48 -- SpinBarrier.h | 44 -- Thread.C | 86 ---- Thread.h | 53 --- Timer.C | 175 -------- Timer.h | 28 -- Types.C | 13 - Types.h | 29 -- src/Chain.cpp | 24 + src/Chain.h | 24 + src/Experiment.cpp | 592 +++++++++++++++++++++++++ src/Experiment.h | 103 +++++ src/Lock.cpp | 45 ++ src/Lock.h | 30 ++ src/Main.c | 90 ++++ src/Main.cpp | 89 ++++ src/Main.h | 18 + src/Output.cpp | 153 +++++++ src/Output.h | 28 ++ src/Run.cpp | 1206 +++++++++++++++++++++++++++++++++++++++++++++++++++ src/Run.h | 50 +++ src/SpinBarrier.cpp | 48 ++ src/SpinBarrier.h | 44 ++ src/Thread.cpp | 86 ++++ src/Thread.h | 53 +++ src/Timer.cpp | 175 ++++++++ src/Timer.h | 28 ++ src/Types.cpp | 13 + src/Types.h | 29 ++ 43 files changed, 2977 insertions(+), 2901 deletions(-) create mode 100644 CMakeLists.txt delete mode 100644 Chain.C delete mode 100644 Chain.h delete mode 100644 Experiment.C delete mode 100644 Experiment.h delete mode 100644 Lock.C delete mode 100644 Lock.h delete mode 100644 Main.C delete mode 100644 Main.h delete mode 100644 Makefile delete mode 100644 Output.C delete mode 100644 Output.h delete mode 100644 Run.C delete mode 100644 Run.h delete mode 100644 SpinBarrier.C delete mode 100644 SpinBarrier.h delete mode 100644 Thread.C delete mode 100644 Thread.h delete mode 100644 Timer.C delete mode 100644 Timer.h delete mode 100644 Types.C delete mode 100644 Types.h create mode 100644 src/Chain.cpp create mode 100644 src/Chain.h create mode 100644 src/Experiment.cpp create mode 100644 src/Experiment.h create mode 100644 src/Lock.cpp create mode 100644 src/Lock.h create mode 100644 src/Main.c create mode 100644 src/Main.cpp create mode 100644 src/Main.h create mode 100644 src/Output.cpp create mode 100644 src/Output.h create mode 100644 src/Run.cpp create mode 100644 src/Run.h create mode 100644 src/SpinBarrier.cpp create mode 100644 src/SpinBarrier.h create mode 100644 src/Thread.cpp create mode 100644 src/Thread.h create mode 100644 src/Timer.cpp create mode 100644 src/Timer.h create mode 100644 src/Types.cpp create mode 100644 src/Types.h diff --git a/CMakeLists.txt b/CMakeLists.txt new file mode 100644 index 0000000..3da4838 --- /dev/null +++ b/CMakeLists.txt @@ -0,0 +1,49 @@ +# +# Project configuration +# + +cmake_minimum_required(VERSION 2.6) +project(pChase) + +set (pChase_VERSION_MAJOR 0) +set (pChase_VERSION_MINOR 4) + +find_package(Threads) + +find_library(LIBNUMA numa) +option(USE_LIBNUMA "Build against NUMA libraries" ON) + + +# +# Code compilation +# + +add_library(Chain src/Chain.h src/Chain.cpp) + +add_library(Experiment src/Experiment.h src/Experiment.cpp) + +add_library(Thread src/Thread.h src/Thread.cpp) + +add_library(Lock src/Lock.h src/Lock.cpp) + +add_library(Output src/Output.h src/Output.cpp) + +add_library(Run src/Run.h src/Run.cpp) +target_link_libraries(Run Lock Chain Thread) + +add_library(SpinBarrier src/SpinBarrier.h src/SpinBarrier.cpp) + +add_library(Timer src/Timer.h src/Timer.cpp) + +add_library(Types src/Types.h src/Types.cpp) + +add_executable (pChase src/Main.h src/Main.cpp) +target_link_libraries(pChase Run Timer Output Experiment SpinBarrier) +target_link_libraries(pChase ${CMAKE_THREAD_LIBS_INIT}) +if (USE_LIBNUMA) + if(LIBNUMA) + target_link_libraries(pChase ${LIBNUMA}) + else () + message(STATUS "WARNING: libnuma not found, not compiling against it") + endif () +endif () diff --git a/Chain.C b/Chain.C deleted file mode 100644 index ddbc104..0000000 --- a/Chain.C +++ /dev/null @@ -1,24 +0,0 @@ -/******************************************************************************* - * Copyright (c) 2006 International Business Machines Corporation. * - * All rights reserved. This program and the accompanying materials * - * are made available under the terms of the Common Public License v1.0 * - * which accompanies this distribution, and is available at * - * http://www.opensource.org/licenses/cpl1.0.php * - * * - * Contributors: * - * Douglas M. Pase - initial API and implementation * - *******************************************************************************/ - - -#include - -#include "Chain.h" - -Chain::Chain() -: next(NULL) -{ -} - -Chain::~Chain() -{ -} diff --git a/Chain.h b/Chain.h deleted file mode 100644 index 8bdb584..0000000 --- a/Chain.h +++ /dev/null @@ -1,24 +0,0 @@ -/******************************************************************************* - * Copyright (c) 2006 International Business Machines Corporation. * - * All rights reserved. This program and the accompanying materials * - * are made available under the terms of the Common Public License v1.0 * - * which accompanies this distribution, and is available at * - * http://www.opensource.org/licenses/cpl1.0.php * - * * - * Contributors: * - * Douglas M. Pase - initial API and implementation * - *******************************************************************************/ - - -#if !defined(Chain_h) -#define Chain_h - -class Chain { -public: - Chain(); - ~Chain(); - Chain* next; -private: -}; - -#endif diff --git a/Experiment.C b/Experiment.C deleted file mode 100644 index 75b1cab..0000000 --- a/Experiment.C +++ /dev/null @@ -1,592 +0,0 @@ -/******************************************************************************* - * Copyright (c) 2006 International Business Machines Corporation. * - * All rights reserved. This program and the accompanying materials * - * are made available under the terms of the Common Public License v1.0 * - * which accompanies this distribution, and is available at * - * http://www.opensource.org/licenses/cpl1.0.php * - * * - * Contributors: * - * Douglas M. Pase - initial API and implementation * - *******************************************************************************/ - - -#include -#include -#include -#include - -#if defined(NUMA) -#include -#endif - -#include "Experiment.h" - -Experiment::Experiment() : - strict (0), - pointer_size (DEFAULT_POINTER_SIZE), - bytes_per_line (DEFAULT_BYTES_PER_LINE), - links_per_line (DEFAULT_LINKS_PER_LINE), - bytes_per_page (DEFAULT_BYTES_PER_PAGE), - lines_per_page (DEFAULT_LINES_PER_PAGE), - links_per_page (DEFAULT_LINKS_PER_PAGE), - bytes_per_chain (DEFAULT_BYTES_PER_CHAIN), - lines_per_chain (DEFAULT_LINES_PER_CHAIN), - links_per_chain (DEFAULT_LINKS_PER_CHAIN), - pages_per_chain (DEFAULT_PAGES_PER_CHAIN), - chains_per_thread(DEFAULT_CHAINS_PER_THREAD), - bytes_per_thread (DEFAULT_BYTES_PER_THREAD), - num_threads (DEFAULT_THREADS), - bytes_per_test (DEFAULT_BYTES_PER_TEST), - seconds (DEFAULT_SECONDS), - iterations (DEFAULT_ITERATIONS), - experiments (DEFAULT_EXPERIMENTS), - output_mode (TABLE), - access_pattern (RANDOM), - stride (1), - numa_placement (LOCAL), - offset_or_mask (0), - placement_map (NULL), - thread_domain (NULL), - chain_domain (NULL), - numa_max_domain (0), - num_numa_domains (1) -{ -} - -Experiment::~Experiment() -{ -} - - // interface: - // - // -l or --line bytes per cache line (line size) - // -p or --page bytes per page (page size) - // -c or --chain bytes per chain (used to compute pages per chain) - // -r or --references chains per thread (memory loading) - // -t or --threads number of threads (concurrency and contention) - // -i or --iters iterations - // -e or --experiments experiments - // -a or --access memory access pattern - // random random access pattern - // forward exclusive OR and mask - // reverse addition and offset - // -o or --output output mode - // hdr header only - // csv csv only - // both header + csv - // table human-readable table of values - // -n or --numa numa placement - // local local allocation of all chains - // xor exclusive OR and mask - // add addition and offset - // map explicit mapping of threads and chains to domains - -int -Experiment::parse_args(int argc, char* argv[]) -{ - int error = 0; - for (int i=1; i < argc; i++) { - if (strcasecmp(argv[i], "-x") == 0 || strcasecmp(argv[i], "--strict") == 0) { - this->strict = 1; - } else if (strcasecmp(argv[i], "-s") == 0 || strcasecmp(argv[i], "--seconds") == 0) { - i++; - if (i == argc) { error = 1; break; } - this->seconds = Experiment::parse_real(argv[i]); - this->iterations = 0; - if (this->seconds == 0) { error = 1; break; } - } else if (strcasecmp(argv[i], "-l") == 0 || strcasecmp(argv[i], "--line") == 0) { - i++; - if (i == argc) { error = 1; break; } - this->bytes_per_line = Experiment::parse_number(argv[i]); - if (this->bytes_per_line == 0) { error = 1; break; } - } else if (strcasecmp(argv[i], "-p") == 0 || strcasecmp(argv[i], "--page") == 0) { - i++; - if (i == argc) { error = 1; break; } - this->bytes_per_page = Experiment::parse_number(argv[i]); - if (this->bytes_per_page == 0) { error = 1; break; } - } else if (strcasecmp(argv[i], "-c") == 0 || strcasecmp(argv[i], "--chain") == 0) { - i++; - if (i == argc) { error = 1; break; } - this->bytes_per_chain = Experiment::parse_number(argv[i]); - if (this->bytes_per_chain == 0) { error = 1; break; } - } else if (strcasecmp(argv[i], "-r") == 0 || strcasecmp(argv[i], "--references") == 0) { - i++; - if (i == argc) { error = 1; break; } - this->chains_per_thread = Experiment::parse_number(argv[i]); - if (this->chains_per_thread == 0) { error = 1; break; } - } else if (strcasecmp(argv[i], "-t") == 0 || strcasecmp(argv[i], "--threads") == 0) { - i++; - if (i == argc) { error = 1; break; } - this->num_threads = Experiment::parse_number(argv[i]); - if (this->num_threads == 0) { error = 1; break; } - } else if (strcasecmp(argv[i], "-i") == 0 || strcasecmp(argv[i], "--iterations") == 0) { - i++; - if (i == argc) { error = 1; break; } - this->iterations = Experiment::parse_number(argv[i]); - this->seconds = 0; - if (this->iterations == 0) { error = 1; break; } - } else if (strcasecmp(argv[i], "-e") == 0 || strcasecmp(argv[i], "--experiments") == 0) { - i++; - if (i == argc) { error = 1; break; } - this->experiments = Experiment::parse_number(argv[i]); - if (this->experiments == 0) { error = 1; break; } - } else if (strcasecmp(argv[i], "-a") == 0 || strcasecmp(argv[i], "--access") == 0) { - i++; - if (i == argc) { error = 1; break; } - if (strcasecmp(argv[i], "random") == 0) { - this->access_pattern = RANDOM; - } else if (strcasecmp(argv[i], "forward") == 0) { - this->access_pattern = STRIDED; - i++; - if (i == argc) { error = 1; break; } - this->stride = Experiment::parse_number(argv[i]); - if (this->stride == 0) { error = 1; break; } - } else if (strcasecmp(argv[i], "reverse") == 0) { - this->access_pattern = STRIDED; - i++; - if (i == argc) { error = 1; break; } - this->stride = - Experiment::parse_number(argv[i]); - if (this->stride == 0) { error = 1; break; } - } else if (strcasecmp(argv[i], "stream") == 0) { - this->access_pattern = STREAM; - i++; - if (i == argc) { error = 1; break; } - this->stride = Experiment::parse_number(argv[i]); - if (this->stride == 0) { error = 1; break; } - } else { - error = 1; - break; - } - } else if (strcasecmp(argv[i], "-o") == 0 || strcasecmp(argv[i], "--output") == 0) { - i++; - if (i == argc) { error = 1; break; } - if (strcasecmp(argv[i], "table") == 0) { - this->output_mode = TABLE; - } else if (strcasecmp(argv[i], "csv") == 0) { - this->output_mode = CSV; - } else if (strcasecmp(argv[i], "both") == 0) { - this->output_mode = BOTH; - } else if (strcasecmp(argv[i], "hdr") == 0) { - this->output_mode = HEADER; - } else if (strcasecmp(argv[i], "header") == 0) { - this->output_mode = HEADER; - } else { - error = 1; - break; - } - } else if (strcasecmp(argv[i], "-n") == 0 || strcasecmp(argv[i], "--numa") == 0) { - i++; - if (i == argc) { error = 1; break; } - if (strcasecmp(argv[i], "local") == 0) { - this->numa_placement = LOCAL; - } else if (strcasecmp(argv[i], "xor") == 0) { - this->numa_placement = XOR; - i++; - if (i == argc) { error = 1; break; } - this->offset_or_mask = Experiment::parse_number(argv[i]); - } else if (strcasecmp(argv[i], "add") == 0) { - this->numa_placement = ADD; - i++; - if (i == argc) { error = 1; break; } - this->offset_or_mask = Experiment::parse_number(argv[i]); - } else if (strcasecmp(argv[i], "map") == 0) { - this->numa_placement = MAP; - i++; - if (i == argc) { error = 1; break; } - this->placement_map = argv[i]; - } else { - error = 1; - break; - } - } else { - error = 1; - break; - } - } - - - // if we've hit an error, print a message and quit - if (error) { - printf("usage: %s \n", argv[0]); - printf("where are selected from the following:\n"); - printf(" [-h|--help] # this message\n"); - printf(" [-l|--line] # bytes per cache line (cache line size)\n"); - printf(" [-p|--page] # bytes per page (page size)\n"); - printf(" [-c|--chain] # bytes per chain (used to compute pages per chain)\n"); - printf(" [-r|--references] # chains per thread (memory loading)\n"); - printf(" [-t|--threads] # number of threads (concurrency and contention)\n"); - printf(" [-i|--iterations] # iterations per experiment\n"); - printf(" [-e|--experiments] # experiments\n"); - printf(" [-a|--access] # memory access pattern\n"); - printf(" [-o|--output] # output format\n"); - printf(" [-n|--numa] # numa placement\n"); - printf(" [-s|--seconds] # run each experiment for seconds\n"); - printf(" [-x|--strict] # fail rather than adjust options to sensible values\n"); - printf("\n"); - printf(" is selected from the following:\n"); - printf(" random # all chains are accessed randomly\n"); - printf(" forward # chains are in forward order with constant stride\n"); - printf(" reverse # chains are in reverse order with constant stride\n"); - printf(" stream # references are calculated rather than read from memory\n"); - printf("\n"); - printf("Note: is always a small positive integer.\n"); - printf("\n"); - printf(" is selected from the following:\n"); - printf(" hdr # csv header only\n"); - printf(" csv # results in csv format only\n"); - printf(" both # header and results in csv format\n"); - printf(" table # human-readable table of values\n"); - printf("\n"); - printf(" is selected from the following:\n"); - printf(" local # all chains are allocated locally\n"); - printf(" xor # exclusive OR and mask\n"); - printf(" add # addition and offset\n"); - printf(" map # explicit mapping of threads and chains to domains\n"); - printf("\n"); - printf(" has the form \"t1:c11,c12,...,c1m;t2:c21,...,c2m;...;tn:cn1,...,cnm\"\n"); - printf("where t[i] is the NUMA domain where the ith thread is run,\n"); - printf("and c[i][j] is the NUMA domain where the jth chain in the ith thread is allocated.\n"); - printf("(The values t[i] and c[i][j] must all be zero or small positive integers.)\n"); - printf("\n"); - printf("Note: for maps, each thread must have the same number of chains,\n"); - printf("maps override the -t or --threads specification,\n"); - printf("NUMA domains are whole numbers in the range of 0..N, and\n"); - printf("thread or chain domains that exceed the maximum NUMA domain\n"); - printf("are wrapped around using a MOD function.\n"); - printf("\n"); - printf("To determine the number of NUMA domains currently available\n"); - printf("on your system, use a command such as \"numastat\".\n"); - printf("\n"); - printf("Final note: strict is not yet fully implemented, and\n"); - printf("maps do not gracefully handle ill-formed map specifications.\n"); - - return 1; - } - - - // STRICT -- fail if specifications are inconsistent - - // compute lines per page and lines per chain - // based on input and defaults. - // we round up page and chain sizes when needed. - this->lines_per_page = (this->bytes_per_page+this->bytes_per_line-1) / this->bytes_per_line; - this->bytes_per_page = this->bytes_per_line * this->lines_per_page; - this->pages_per_chain = (this->bytes_per_chain+this->bytes_per_page-1) / this->bytes_per_page; - this->bytes_per_chain = this->bytes_per_page * this->pages_per_chain; - this->bytes_per_thread = this->bytes_per_chain * this->chains_per_thread; - this->bytes_per_test = this->bytes_per_thread * this->num_threads; - this->links_per_line = this->bytes_per_line / pointer_size; - this->links_per_page = this->lines_per_page * this->links_per_line; - this->lines_per_chain = this->lines_per_page * this->pages_per_chain; - this->links_per_chain = this->lines_per_chain * this->links_per_line; - - - // allocate the chain roots for all threads - // and compute the chain locations - // (the chains themselves are initialized by the threads) - switch (this->numa_placement) { - case LOCAL : - case XOR : - case ADD : - this->thread_domain = new int32 [ this->num_threads ]; - this->chain_domain = new int32*[ this->num_threads ]; - this->random_state = new char* [ this->num_threads ]; - - for (int i=0; i < this->num_threads; i++) { - this->chain_domain[i] = new int32 [ this->chains_per_thread ]; - - const int state_size = 256; - this->random_state[i] = new char[state_size]; - initstate((unsigned int) i, (char *) this->random_state[i], (size_t) state_size); - } - break; - } - - -#if defined(NUMA) - this->numa_max_domain = numa_max_node(); - this->num_numa_domains = this->numa_max_domain + 1; -#endif - - - switch (this->numa_placement) { - case LOCAL : - default: - this->alloc_local(); - break; - case XOR : - this->alloc_xor(); - break; - case ADD : - this->alloc_add(); - break; - case MAP : - this->alloc_map(); - break; - } - - return 0; -} - - -int64 -Experiment::parse_number( const char* s ) -{ - int64 result = 0; - - int len = strlen( s ); - for (int i=0; i < len; i++) { - if ( '0' <= s[i] && s[i] <= '9' ) { - result = result * 10 + s[i] - '0'; - } else if (s[i] == 'k' || s[i] == 'K') { - result = result << 10; - break; - } else if (s[i] == 'm' || s[i] == 'M') { - result = result << 20; - break; - } else if (s[i] == 'g' || s[i] == 'G') { - result = result << 30; - break; - } else if (s[i] == 't' || s[i] == 'T') { - result = result << 40; - break; - } else { - break; - } - } - - return result; -} - - -float -Experiment::parse_real( const char* s ) -{ - float result = 0; - bool decimal = false; - float power = 1; - - int len = strlen( s ); - for (int i=0; i < len; i++) { - if ( '0' <= s[i] && s[i] <= '9' ) { - if (! decimal) { - result = result * 10 + s[i] - '0'; - } else { - power = power / 10; - result = result + (s[i] - '0') * power; - } - } else if ( '.' == s[i] ) { - decimal = true; - } else { - break; - } - } - - return result; -} - -void -Experiment::alloc_local() -{ - for (int i=0; i < this->num_threads; i++) { - this->thread_domain[i] = i % this->num_numa_domains; - for (int j=0; j < this->chains_per_thread; j++) { - this->chain_domain[i][j] = this->thread_domain[i]; - } - } -} - -void -Experiment::alloc_xor() -{ - for (int i=0; i < this->num_threads; i++) { - this->thread_domain[i] = i % this->num_numa_domains; - for (int j=0; j < this->chains_per_thread; j++) { - this->chain_domain[i][j] = (this->thread_domain[i] ^ this->offset_or_mask) % this->num_numa_domains; - } - } -} - -void -Experiment::alloc_add() -{ - for (int i=0; i < this->num_threads; i++) { - this->thread_domain[i] = i % this->num_numa_domains; - for (int j=0; j < this->chains_per_thread; j++) { - this->chain_domain[i][j] = (this->thread_domain[i] + this->offset_or_mask) % this->num_numa_domains; - } - } -} - - // DOES NOT HANDLE ILL-FORMED SPECIFICATIONS -void -Experiment::alloc_map() -{ - // STRICT -- fail if specifications are inconsistent - - // maps look like "t1:c11,c12,...,c1m;t2:c21,...,c2m;...;tn:cn1,...,cnm" - // where t[i] is the thread domain of the ith thread, - // and c[i][j] is the chain domain of the jth chain in the ith thread - - // count the thread descriptors by counting ";" up to EOS - int threads = 1; - char *p = this->placement_map; - while (*p != '\0') { - if (*p == ';') threads += 1; - p++; - } - int thread_domain[ threads ]; - - // count the chain descriptors by counting "," up to ";" or EOS - int chains = 1; - p = this->placement_map; - while (*p != '\0') { - if (*p == ';') break; - if (*p == ',') chains += 1; - p++; - } - int chain_domain [ threads ][ chains ]; - - int t=0, c=0; - p = this->placement_map; - while (*p != '\0') { - // everything up to ":" is the thread domain - int i = 0; - char buf[64]; - while (*p != '\0') { - if (*p == ':') { p++; break; } - buf[i] = *p; - i++; - p++; - } - buf[i] = '\0'; - thread_domain[t] = Experiment::parse_number(buf); - - // search for one or several ',' - c = 0; - while (*p != '\0' && *p != ';') { - if (chains <= c || threads <= t) { - // error in the thread/chain specification - fprintf(stderr, "Malformed map.\n"); - exit(1); - } - int i = 0; - while (*p != '\0' && *p != ';') { - if (*p == ',') { p++; break; } - buf[i] = *p; - i++; - p++; - } - buf[i] = '\0'; - chain_domain[t][c] = Experiment::parse_number(buf); - c++; - } - - if (*p == '\0') break; - if (*p == ';') p++; - t++; - } - - - this->num_threads = threads; - this->chains_per_thread = chains; - - this->thread_domain = new int32 [ this->num_threads ]; - this->chain_domain = new int32*[ this->num_threads ]; - this->random_state = new char* [ this->num_threads ]; - - for (int i=0; i < this->num_threads; i++) { - this->thread_domain[i] = thread_domain[i] % this->num_numa_domains; - - const int state_size = 256; - this->random_state[i] = new char[state_size]; - initstate((unsigned int) i, (char *) this->random_state[i], (size_t) state_size); - - this->chain_domain[i] = new int32 [ this->chains_per_thread ]; - for (int j=0; j < this->chains_per_thread; j++) { - this->chain_domain[i][j] = chain_domain[i][j] % this->num_numa_domains; - } - } - - this->bytes_per_thread = this->bytes_per_chain * this->chains_per_thread; - this->bytes_per_test = this->bytes_per_thread * this->num_threads; -} - -#include "Chain.h" - -void -Experiment::print() -{ - printf("strict = %d\n", strict); - printf("pointer_size = %d\n", pointer_size); - printf("sizeof(Chain) = %d\n", sizeof(Chain)); - printf("sizeof(Chain *) = %d\n", sizeof(Chain *)); - printf("bytes_per_line = %d\n", bytes_per_line); - printf("links_per_line = %d\n", links_per_line); - printf("bytes_per_page = %d\n", bytes_per_page); - printf("lines_per_page = %d\n", lines_per_page); - printf("links_per_page = %d\n", links_per_page); - printf("bytes_per_chain = %d\n", bytes_per_chain); - printf("lines_per_chain = %d\n", lines_per_chain); - printf("links_per_chain = %d\n", links_per_chain); - printf("pages_per_chain = %d\n", pages_per_chain); - printf("chains_per_thread = %d\n", chains_per_thread); - printf("bytes_per_thread = %d\n", bytes_per_thread); - printf("num_threads = %d\n", num_threads); - printf("bytes_per_test = %d\n", bytes_per_test); - printf("iterations = %d\n", iterations); - printf("experiments = %d\n", experiments); - printf("access_pattern = %d\n", access_pattern); - printf("stride = %d\n", stride); - printf("output_mode = %d\n", output_mode); - printf("numa_placement = %d\n", numa_placement); - printf("offset_or_mask = %d\n", offset_or_mask); - printf("numa_max_domain = %d\n", numa_max_domain); - printf("num_numa_domains = %d\n", num_numa_domains); - - for (int i=0; i < this->num_threads; i++) { - printf("%d: ", this->thread_domain[i]); - for (int j=0; j < this->chains_per_thread; j++) { - printf("%d,", this->chain_domain[i][j]); - } - printf("\n"); - } - - fflush(stdout); -} - -const char* -Experiment::access() -{ - const char* result = NULL; - - if (this->access_pattern == RANDOM) { - result = "random"; - } else if (this->access_pattern == STRIDED && 0 < this->stride) { - result = "forward"; - } else if (this->access_pattern == STRIDED && this->stride < 0) { - result = "reverse"; - } else if (this->access_pattern == STREAM) { - result = "stream"; - } - - return result; -} - -const char* -Experiment::placement() -{ - const char* result = NULL; - - if (this->numa_placement == LOCAL) { - result = "local"; - } else if (this->numa_placement == XOR) { - result = "xor"; - } else if (this->numa_placement == ADD) { - result = "add"; - } else if (this->numa_placement == MAP) { - result = "map"; - } - - return result; -} diff --git a/Experiment.h b/Experiment.h deleted file mode 100644 index 2c749d3..0000000 --- a/Experiment.h +++ /dev/null @@ -1,103 +0,0 @@ -/******************************************************************************* - * Copyright (c) 2006 International Business Machines Corporation. * - * All rights reserved. This program and the accompanying materials * - * are made available under the terms of the Common Public License v1.0 * - * which accompanies this distribution, and is available at * - * http://www.opensource.org/licenses/cpl1.0.php * - * * - * Contributors: * - * Douglas M. Pase - initial API and implementation * - *******************************************************************************/ - - -#if !defined(Experiment_h) -#define Experiment_h - -#include "Chain.h" -#include "Types.h" - -class Experiment { -public: - Experiment(); - ~Experiment(); - - int parse_args(int argc, char* argv[]); - int64 parse_number( const char* s ); - float parse_real( const char* s ); - - const char* placement(); - const char* access(); - - // fundamental parameters - int64 pointer_size; // number of bytes in a pointer - int64 bytes_per_line; // working set cache line size (bytes) - int64 links_per_line; // working set cache line size (links) - int64 bytes_per_page; // working set page size (in bytes) - int64 lines_per_page; // working set page size (in lines) - int64 links_per_page; // working set page size (in links) - int64 bytes_per_chain; // working set chain size (bytes) - int64 lines_per_chain; // working set chain size (lines) - int64 links_per_chain; // working set chain size (links) - int64 pages_per_chain; // working set chain size (pages) - int64 bytes_per_thread; // thread working set size (bytes) - int64 chains_per_thread; // memory loading per thread - int64 num_threads; // number of threads in the experiment - int64 bytes_per_test; // test working set size (bytes) - - float seconds; // number of seconds per experiment - int64 iterations; // number of iterations per experiment - int64 experiments; // number of experiments per test - - enum { CSV, BOTH, HEADER, TABLE } - output_mode; // results output mode - - enum { RANDOM, STRIDED, STREAM } - access_pattern; // memory access pattern - int64 stride; - - enum { LOCAL, XOR, ADD, MAP } - numa_placement; // memory allocation mode - int64 offset_or_mask; - char* placement_map; - - // maps threads and chains to numa domains - int32* thread_domain; // thread_domain[thread] - int32** chain_domain; // chain_domain[thread][chain] - int32 numa_max_domain; // highest numa domain id - int32 num_numa_domains; // number of numa domains - - char** random_state; // random state for each thread - - int strict; // strictly adhere to user input, or fail - - const static int32 DEFAULT_POINTER_SIZE = sizeof(Chain); - const static int32 DEFAULT_BYTES_PER_LINE = 64; - const static int32 DEFAULT_LINKS_PER_LINE = DEFAULT_BYTES_PER_LINE / DEFAULT_POINTER_SIZE; - const static int32 DEFAULT_BYTES_PER_PAGE = 4096; - const static int32 DEFAULT_LINES_PER_PAGE = DEFAULT_BYTES_PER_PAGE / DEFAULT_BYTES_PER_LINE; - const static int32 DEFAULT_LINKS_PER_PAGE = DEFAULT_LINES_PER_PAGE * DEFAULT_LINKS_PER_LINE; - const static int32 DEFAULT_PAGES_PER_CHAIN = 4096; - const static int32 DEFAULT_BYTES_PER_CHAIN = DEFAULT_BYTES_PER_PAGE * DEFAULT_PAGES_PER_CHAIN; - const static int32 DEFAULT_LINES_PER_CHAIN = DEFAULT_LINES_PER_PAGE * DEFAULT_PAGES_PER_CHAIN; - const static int32 DEFAULT_LINKS_PER_CHAIN = DEFAULT_LINES_PER_CHAIN * DEFAULT_BYTES_PER_LINE / DEFAULT_POINTER_SIZE; - const static int32 DEFAULT_CHAINS_PER_THREAD = 1; - const static int32 DEFAULT_BYTES_PER_THREAD = DEFAULT_BYTES_PER_CHAIN * DEFAULT_CHAINS_PER_THREAD; - const static int32 DEFAULT_THREADS = 1; - const static int32 DEFAULT_BYTES_PER_TEST = DEFAULT_BYTES_PER_THREAD * DEFAULT_THREADS; - const static int32 DEFAULT_SECONDS = 1; - const static int32 DEFAULT_ITERATIONS = 0; - const static int32 DEFAULT_EXPERIMENTS = 1; - - const static int32 DEFAULT_OUTPUT_MODE = 1; - - void alloc_local(); - void alloc_xor(); - void alloc_add(); - void alloc_map(); - - void print(); - -private: -}; - -#endif diff --git a/Lock.C b/Lock.C deleted file mode 100644 index 104dc81..0000000 --- a/Lock.C +++ /dev/null @@ -1,45 +0,0 @@ -/******************************************************************************* - * Copyright (c) 2006 International Business Machines Corporation. * - * All rights reserved. This program and the accompanying materials * - * are made available under the terms of the Common Public License v1.0 * - * which accompanies this distribution, and is available at * - * http://www.opensource.org/licenses/cpl1.0.php * - * * - * Contributors: * - * Douglas M. Pase - initial API and implementation * - *******************************************************************************/ - - -#include -#include - -#include "Lock.h" - -Lock::Lock() -{ - pthread_mutex_init( &(this->mutex), NULL ); -} - -Lock::~Lock() -{ - pthread_mutex_destroy( &(this->mutex) ); -} - -void -Lock::lock() -{ - pthread_mutex_lock( &(this->mutex) ); -} - -int -Lock::test() -{ - pthread_mutex_trylock( &(this->mutex) ); -} - -void -Lock::unlock() -{ - pthread_mutex_unlock( &(this->mutex) ); -} - diff --git a/Lock.h b/Lock.h deleted file mode 100644 index 14bf1dc..0000000 --- a/Lock.h +++ /dev/null @@ -1,30 +0,0 @@ -/******************************************************************************* - * Copyright (c) 2006 International Business Machines Corporation. * - * All rights reserved. This program and the accompanying materials * - * are made available under the terms of the Common Public License v1.0 * - * which accompanies this distribution, and is available at * - * http://www.opensource.org/licenses/cpl1.0.php * - * * - * Contributors: * - * Douglas M. Pase - initial API and implementation * - *******************************************************************************/ - - -#if !defined(Lock_h) -#define Lock_h - -#include - -class Lock { -public: - Lock(); - ~Lock(); - void lock(); - int test(); - void unlock(); - -private: - pthread_mutex_t mutex; -}; - -#endif diff --git a/Main.C b/Main.C deleted file mode 100644 index ebd276a..0000000 --- a/Main.C +++ /dev/null @@ -1,89 +0,0 @@ -/******************************************************************************* - * Copyright (c) 2006 International Business Machines Corporation. * - * All rights reserved. This program and the accompanying materials * - * are made available under the terms of the Common Public License v1.0 * - * which accompanies this distribution, and is available at * - * http://www.opensource.org/licenses/cpl1.0.php * - * * - * Contributors: * - * Douglas M. Pase - initial API and implementation * - *******************************************************************************/ - - -#include - -#include "Main.h" - -#include "Run.h" -#include "Timer.h" -#include "Types.h" -#include "Output.h" -#include "Experiment.h" - - // This program allocates and accesses - // a number of blocks of memory, one or more - // for each thread that executes. Blocks - // are divided into sub-blocks called - // pages, and pages are divided into - // sub-blocks called cache lines. - // - // All pages are collected into a list. - // Pages are selected for the list in - // a particular order. Each cache line - // within the page is similarly gathered - // into a list in a particular order. - // In both cases the order may be random - // or linear. - // - // A root pointer points to the first - // cache line. A pointer in the cache - // line points to the next cache line, - // which contains a pointer to the cache - // line after that, and so on. This - // forms a pointer chain that touches all - // cache lines within the first page, - // then all cache lines within the second - // page, and so on until all pages are - // covered. The last pointer contains - // NULL, terminating the chain. - // - // Depending on compile-time options, - // pointers may be 32-bit or 64-bit - // pointers. - -int verbose = 0; - -int -main( int argc, char* argv[] ) -{ - Timer::calibrate(10000); - double clk_res = Timer::resolution(); - - Experiment e; - if (e.parse_args(argc, argv)) { - return 0; - } - -#if defined(UNDEFINED) - e.print(); - if (argv != NULL) return 0; -#endif - - SpinBarrier sb( e.num_threads ); - Run r[ e.num_threads ]; - for (int i=0; i < e.num_threads; i++) { - r[i].set( e, &sb ); - r[i].start(); - } - - for (int i=0; i < e.num_threads; i++) { - r[i].wait(); - } - - int64 ops = Run::ops_per_chain(); - double secs = Run::seconds(); - - Output::print(e, ops, secs, clk_res); - - return 0; -} diff --git a/Main.h b/Main.h deleted file mode 100644 index 1492291..0000000 --- a/Main.h +++ /dev/null @@ -1,18 +0,0 @@ -/******************************************************************************* - * Copyright (c) 2006 International Business Machines Corporation. * - * All rights reserved. This program and the accompanying materials * - * are made available under the terms of the Common Public License v1.0 * - * which accompanies this distribution, and is available at * - * http://www.opensource.org/licenses/cpl1.0.php * - * * - * Contributors: * - * Douglas M. Pase - initial API and implementation * - *******************************************************************************/ - - -#if !defined(Main_h) -#define Main_h - -extern int verbose; - -#endif diff --git a/Makefile b/Makefile deleted file mode 100644 index 9ae95b9..0000000 --- a/Makefile +++ /dev/null @@ -1,63 +0,0 @@ - -# -# BIT = { 32 | 64 } -# MODE = { NUMA | SMP } -# -ifndef BIT -BIT = 64 -endif -ifndef MODE -MODE = NUMA -endif -ifeq ($(MODE), NUMA) -LIB = -lpthread -lnuma -else -LIB = -lpthread -endif - -SRC = Main.C Chain.C Experiment.C Lock.C Output.C Run.C SpinBarrier.C Timer.C Thread.C Types.C -HDR = $(SRC:.C=.h) -OBJ = $(SRC:.C=.o) -EXE = pChase$(BIT)_$(MODE) -HYPDIR = /web/hypercomputing.org/www/doc/Guest/pChase -PCHDIR = /web/pchase.org/www/doc/Guest/pChase -TARFILE = tgz/pChase-`date +"%Y-%m-%d"`.tgz - -RM = /bin/rm -MV = /bin/mv -CI = /usr/bin/ci -CO = /usr/bin/co -CP = /bin/cp -TAR = /bin/tar - -CXXFLAGS= -O3 -m$(BIT) -D$(MODE) - -.C.o: - $(CXX) -c $(CXXFLAGS) $< - -$(EXE): $(OBJ) - $(CXX) -o $(EXE) $(CXXFLAGS) $(OBJ) $(LIB) - -$(OBJ): $(HDR) - -rmexe: - $(RM) -rf $(EXE) - -rmobj: - $(RM) -rf $(OBJ) - -ci: - $(CI) -f $(SRC) $(HDR) Makefile - -co: - $(CO) -l $(SRC) $(HDR) Makefile - -tar: - $(TAR) -cvzf $(TARFILE) $(SRC) $(HDR) Makefile License.htm License.txt pChase.sh run-pChase.sh - -cptar: - $(TAR) -cvzf $(TARFILE) $(SRC) $(HDR) Makefile License.htm License.txt pChase.sh run-pChase.sh - $(CP) $(TARFILE) $(HYPDIR)/tgz - $(CP) $(SRC) $(HDR) Makefile License.htm License.txt pChase.sh run-pChase.sh $(HYPDIR) - $(CP) $(TARFILE) $(PCHDIR)/tgz - $(CP) $(SRC) $(HDR) Makefile License.htm License.txt pChase.sh run-pChase.sh $(PCHDIR) diff --git a/Output.C b/Output.C deleted file mode 100644 index 9f9c09a..0000000 --- a/Output.C +++ /dev/null @@ -1,153 +0,0 @@ -/******************************************************************************* - * Copyright (c) 2006 International Business Machines Corporation. * - * All rights reserved. This program and the accompanying materials * - * are made available under the terms of the Common Public License v1.0 * - * which accompanies this distribution, and is available at * - * http://www.opensource.org/licenses/cpl1.0.php * - * * - * Contributors: * - * Douglas M. Pase - initial API and implementation * - *******************************************************************************/ - - -#include -#include -#include - -#include "Output.h" - -#include "Types.h" -#include "Experiment.h" - - -void -Output::print( Experiment &e, int64 ops, double secs, double ck_res ) -{ - if (e.output_mode == Experiment::CSV) { - Output::csv(e, ops, secs, ck_res); - } else if (e.output_mode == Experiment::BOTH) { - Output::header(e, ops, secs, ck_res); - Output::csv(e, ops, secs, ck_res); - } else if (e.output_mode == Experiment::HEADER) { - Output::header(e, ops, secs, ck_res); - } else { - Output::table(e, ops, secs, ck_res); - } -} - -void -Output::header( Experiment &e, int64 ops, double secs, double ck_res ) -{ - printf("pointer size (bytes),"); - printf("cache line size (bytes),"); - printf("page size (bytes),"); - printf("chain size (bytes),"); - printf("thread size (bytes),"); - printf("test size (bytes),"); - printf("chains per thread,"); - printf("number of threads,"); - printf("iterations,"); - printf("experiments,"); - printf("access pattern,"); - printf("stride,"); - printf("numa placement,"); - printf("offset or mask,"); - printf("numa domains,"); - printf("domain map,"); - printf("operations per chain,"); - printf("total operations,"); - printf("elapsed time (seconds),"); - printf("elapsed time (timer ticks),"); - printf("clock resolution (ns),", ck_res * 1E9); - printf("memory latency (ns),"); - printf("memory bandwidth (MB/s)\n"); - - fflush(stdout); -} - -void -Output::csv( Experiment &e, int64 ops, double secs, double ck_res ) -{ - printf("%ld,", e.pointer_size); - printf("%ld,", e.bytes_per_line); - printf("%ld,", e.bytes_per_page); - printf("%ld,", e.bytes_per_chain); - printf("%ld,", e.bytes_per_thread); - printf("%ld,", e.bytes_per_test); - printf("%lld,", e.chains_per_thread); - printf("%ld,", e.num_threads); - printf("%ld,", e.iterations); - printf("%ld,", e.experiments); - printf("%s,", e.access()); - printf("%ld,", e.stride); - printf("%s,", e.placement()); - printf("%ld,", e.offset_or_mask); - printf("%ld,", e.num_numa_domains); - printf("\""); - printf("%d:", e.thread_domain[0]); - printf("%d", e.chain_domain[0][0]); - for (int j=1; j < e.chains_per_thread; j++) { - printf(",%d", e.chain_domain[0][j]); - } - for (int i=1; i < e.num_threads; i++) { - printf(";%d:", e.thread_domain[i]); - printf("%d", e.chain_domain[i][0]); - for (int j=1; j < e.chains_per_thread; j++) { - printf(",%d", e.chain_domain[i][j]); - } - } - printf("\","); - printf("%ld,", ops); - printf("%ld,", ops * e.chains_per_thread * e.num_threads); - printf("%.3f,", secs); - printf("%.0f,", secs/ck_res); - printf("%.2f,", ck_res * 1E9); - printf("%.2f,", (secs / (ops * e.iterations)) * 1E9); - printf("%.3f\n", ((ops * e.iterations * e.chains_per_thread * e.num_threads * e.bytes_per_line) / secs) * 1E-6); - - fflush(stdout); -} - -void -Output::table( Experiment &e, int64 ops, double secs, double ck_res ) -{ - printf("pointer size = %ld (bytes)\n", e.pointer_size); - printf("cache line size = %ld (bytes)\n", e.bytes_per_line); - printf("page size = %ld (bytes)\n", e.bytes_per_page); - printf("chain size = %ld (bytes)\n", e.bytes_per_chain); - printf("thread size = %ld (bytes)\n", e.bytes_per_thread); - printf("test size = %ld (bytes)\n", e.bytes_per_test); - printf("chains per thread = %ld\n", e.chains_per_thread); - printf("number of threads = %ld\n", e.num_threads); - printf("iterations = %ld\n", e.iterations); - printf("experiments = %ld\n", e.experiments); - printf("access pattern = %s\n", e.access()); - printf("stride = %ld\n", e.stride); - printf("numa placement = %s\n", e.placement()); - printf("offset or mask = %ld\n", e.offset_or_mask); - printf("numa domains = %ld\n", e.num_numa_domains); - printf("domain map = "); - printf("\""); - printf("%d:", e.thread_domain[0]); - printf("%d", e.chain_domain[0][0]); - for (int j=1; j < e.chains_per_thread; j++) { - printf(",%d", e.chain_domain[0][j]); - } - for (int i=1; i < e.num_threads; i++) { - printf(";%d:", e.thread_domain[i]); - printf("%d", e.chain_domain[i][0]); - for (int j=1; j < e.chains_per_thread; j++) { - printf(",%d", e.chain_domain[i][j]); - } - } - printf("\"\n"); - printf("operations per chain = %ld\n", ops); - printf("total operations = %ld\n", ops * e.chains_per_thread * e.num_threads); - printf("elapsed time = %.3f (seconds)\n", secs); - printf("elapsed time = %.0f (timer ticks)\n", secs/ck_res); - printf("clock resolution = %.2f (ns)\n", ck_res * 1E9); - printf("memory latency = %.2f (ns)\n", (secs / (ops * e.iterations)) * 1E9); - printf("memory bandwidth = %.3f (MB/s)\n", ((ops * e.iterations * e.chains_per_thread * e.num_threads * e.bytes_per_line) / secs) * 1E-6); - - fflush(stdout); -} diff --git a/Output.h b/Output.h deleted file mode 100644 index 9ee2c80..0000000 --- a/Output.h +++ /dev/null @@ -1,28 +0,0 @@ -/******************************************************************************* - * Copyright (c) 2006 International Business Machines Corporation. * - * All rights reserved. This program and the accompanying materials * - * are made available under the terms of the Common Public License v1.0 * - * which accompanies this distribution, and is available at * - * http://www.opensource.org/licenses/cpl1.0.php * - * * - * Contributors: * - * Douglas M. Pase - initial API and implementation * - *******************************************************************************/ - - -#if !defined(Output_h) -#define Output_h - -#include "Types.h" -#include "Experiment.h" - -class Output { -public: - static void print ( Experiment &e, int64 ops, double secs, double ck_res ); - static void header( Experiment &e, int64 ops, double secs, double ck_res ); - static void csv ( Experiment &e, int64 ops, double secs, double ck_res ); - static void table ( Experiment &e, int64 ops, double secs, double ck_res ); -private: -}; - -#endif diff --git a/Run.C b/Run.C deleted file mode 100644 index 4fb8057..0000000 --- a/Run.C +++ /dev/null @@ -1,1206 +0,0 @@ -/******************************************************************************* - * Copyright (c) 2006 International Business Machines Corporation. * - * All rights reserved. This program and the accompanying materials * - * are made available under the terms of the Common Public License v1.0 * - * which accompanies this distribution, and is available at * - * http://www.opensource.org/licenses/cpl1.0.php * - * * - * Contributors: * - * Douglas M. Pase - initial API and implementation * - *******************************************************************************/ - - -#include -#include -#include - -#if defined(NUMA) -#include -#endif - -#include "Run.h" - -#include "Chain.h" -#include "Timer.h" -#include "SpinBarrier.h" - - -static double max( double v1, double v2 ); -static double min( double v1, double v2 ); -static void chase_pointers(int64 chains_per_thread, int64 iterations, Chain** root, int64 bytes_per_line, int64 bytes_per_chain, int64 stride); -static void follow_streams(int64 chains_per_thread, int64 iterations, Chain** root, int64 bytes_per_line, int64 bytes_per_chain, int64 stride); -static void (*run_benchmark)(int64 chains_per_thread, int64 iterations, Chain** root, int64 bytes_per_line, int64 bytes_per_chain, int64 stride) = chase_pointers; - -Lock Run::global_mutex; -int64 Run::_ops_per_chain = 0; -double Run::_seconds = 1E9; - -Run::Run() -: exp(NULL), bp(NULL) -{ -} - -Run::~Run() -{ -} - -void -Run::set( Experiment &e, SpinBarrier* sbp ) -{ - this->exp = &e; - this->bp = sbp; -} - -int -Run::run() -{ - // first allocate all memory for the chains, - // making sure it is allocated within the - // intended numa domains - Chain** chain_memory = new Chain* [ this->exp->chains_per_thread ]; - Chain** root = new Chain* [ this->exp->chains_per_thread ]; - -#if defined(NUMA) - // establish the node id where this thread - // will run. threads are mapped to nodes - // by the set-up code for Experiment. - int run_node_id = this->exp->thread_domain[this->thread_id()]; - numa_run_on_node(run_node_id); - - // establish the node id where this thread's - // memory will be allocated. - for (int i=0; i < this->exp->chains_per_thread; i++) { - int alloc_node_id = this->exp->chain_domain[this->thread_id()][i]; - nodemask_t alloc_mask; - nodemask_zero(&alloc_mask); - nodemask_set(&alloc_mask, alloc_node_id); - numa_set_membind(&alloc_mask); - - chain_memory[i] = new Chain[ this->exp->links_per_chain ]; - } -#else - for (int i=0; i < this->exp->chains_per_thread; i++) { - chain_memory[i] = new Chain[ this->exp->links_per_chain ]; - } -#endif - - // initialize the chains and - // select the function that - // will execute the tests - for (int i=0; i < this->exp->chains_per_thread; i++) { - if (this->exp->access_pattern == Experiment::RANDOM) { - root[i] = random_mem_init( chain_memory[i] ); - run_benchmark = chase_pointers; - } else if (this->exp->access_pattern == Experiment::STRIDED) { - if (0 < this->exp->stride) { - root[i] = forward_mem_init( chain_memory[i] ); - } else { - root[i] = reverse_mem_init( chain_memory[i] ); - } - run_benchmark = chase_pointers; - } else if (this->exp->access_pattern == Experiment::STREAM) { - root[i] = stream_mem_init( chain_memory[i] ); - run_benchmark = follow_streams; - } - } - - if (this->exp->iterations <= 0) { - volatile static double istart = 0; - volatile static double istop = 0; - volatile static double elapsed = 0; - volatile static int64 iters = 1; - volatile double bound = max(0.2, 10 * Timer::resolution()); - for (iters=1; elapsed <= bound; iters=iters<<1) { - this->bp->barrier(); - - // start timer - if (this->thread_id() == 0) { - istart = Timer::seconds(); - } - this->bp->barrier(); - - // chase pointers - run_benchmark(this->exp->chains_per_thread, iters, root, this->exp->bytes_per_line, this->exp->bytes_per_chain, this->exp->stride); - - // barrier - this->bp->barrier(); - - // stop timer - if (this->thread_id() == 0) { - istop = Timer::seconds(); - elapsed = istop - istart; - } - this->bp->barrier(); - } - - // calculate the number of iterations - if (this->thread_id() == 0) { - if (0 < this->exp->seconds) { - this->exp->iterations = max(1, 0.9999 + 0.5 * this->exp->seconds * iters / elapsed); - } else { - this->exp->iterations = max(1, 0.9999 + iters / elapsed); - } - } - this->bp->barrier(); - } -#if defined(UNDEFINED) -#endif - - // barrier - for (int e=0; e < this->exp->experiments; e++) { - this->bp->barrier(); - - // start timer - double start = 0; - if (this->thread_id() == 0) start = Timer::seconds(); - this->bp->barrier(); - - // chase pointers - run_benchmark(this->exp->chains_per_thread, this->exp->iterations, root, this->exp->bytes_per_line, this->exp->bytes_per_chain, this->exp->stride); - - // barrier - this->bp->barrier(); - - // stop timer - double stop = 0; - if (this->thread_id() == 0) stop = Timer::seconds(); - this->bp->barrier(); - - if (0 <= e) { - if (this->thread_id() == 0) { - double delta = stop - start; - if (0 < delta) { - Run::_seconds = min( Run::_seconds, delta ); - } - } - } - } - - this->bp->barrier(); - - for (int i=0; i < this->exp->chains_per_thread; i++) { - if (chain_memory[i] != NULL) delete [] chain_memory[i]; - } - if (chain_memory != NULL) delete [] chain_memory; - - return 0; -} - -int dummy = 0; -void -Run::mem_check( Chain *m ) -{ - if (m == NULL) dummy += 1; -} - -static double -max( double v1, double v2 ) -{ - if (v1 < v2) return v2; - return v1; -} - -static double -min( double v1, double v2 ) -{ - if (v2 < v1) return v2; - return v1; -} - - // exclude 2 and mersienne primes, i.e., - // primes of the form 2**n - 1, e.g., - // 3, 7, 31, 127 -static const int prime_table[] = { 5, 11, 13, 17, 19, 23, 37, 41, 43, 47, - 53, 61, 71, 73, 79, 83, 89, 97, 101, 103, 109, 113, 131, 137, 139, 149, - 151, 157, 163, }; -static const int prime_table_size = sizeof prime_table / sizeof prime_table[0]; - -Chain* -Run::random_mem_init( Chain *mem ) -{ - // initialize pointers -- - // choose a page at random, then use - // one pointer from each cache line - // within the page. all pages and - // cache lines are chosen at random. - Chain* root = NULL; - Chain* prev = NULL; - int link_within_line = 0; - int64 local_ops_per_chain = 0; - - // we must set a lock because random() - // is not thread safe - Run::global_mutex.lock(); - setstate(this->exp->random_state[this->thread_id()]); - int page_factor = prime_table[ random() % prime_table_size ]; - int page_offset = random() % this->exp->pages_per_chain; - Run::global_mutex.unlock(); - - // loop through the pages - for (int i=0; i < this->exp->pages_per_chain; i++) { - int page = (page_factor * i + page_offset) % this->exp->pages_per_chain; - Run::global_mutex.lock(); - setstate(this->exp->random_state[this->thread_id()]); - int line_factor = prime_table[ random() % prime_table_size ]; - int line_offset = random() % this->exp->lines_per_page; - Run::global_mutex.unlock(); - - // loop through the lines within a page - for (int j=0; j < this->exp->lines_per_page; j++) { - int line_within_page = (line_factor * j + line_offset) % this->exp->lines_per_page; - int link = page * this->exp->links_per_page + line_within_page * this->exp->links_per_line + link_within_line; - - if (root == NULL) { -// printf("root = %d(%d)[0x%x].\n", page, line_within_page, mem+link); - prev = root = mem + link; - local_ops_per_chain += 1; - } else { -// printf("0x%x = %d(%d)[0x%x].\n", prev, page, line_within_page, mem+link); - prev->next = mem + link; - prev = prev->next; - local_ops_per_chain += 1; - } - } - } - - Run::global_mutex.lock(); - Run::_ops_per_chain = local_ops_per_chain; - Run::global_mutex.unlock(); - - return root; -} - -Chain* -Run::forward_mem_init( Chain *mem ) -{ - Chain* root = NULL; - Chain* prev = NULL; - int link_within_line = 0; - int64 local_ops_per_chain = 0; - - for (int i=0; i < this->exp->lines_per_chain; i += this->exp->stride) { - int link = i * this->exp->links_per_line + link_within_line; - if (root == NULL) { -// printf("root = %d(%d)[0x%x].\n", page, line_within_page, mem+link); - prev = root = mem + link; - local_ops_per_chain += 1; - } else { -// printf("0x%x = %d(%d)[0x%x].\n", prev, page, line_within_page, mem+link); - prev->next = mem + link; - prev = prev->next; - local_ops_per_chain += 1; - } - } - - Run::global_mutex.lock(); - Run::_ops_per_chain = local_ops_per_chain; - Run::global_mutex.unlock(); - - return root; -} - -Chain* -Run::reverse_mem_init( Chain *mem ) -{ - Chain* root = NULL; - Chain* prev = NULL; - int link_within_line = 0; - int64 local_ops_per_chain = 0; - - int stride = -this->exp->stride; - int last; - for (int i=0; i < this->exp->lines_per_chain; i += stride) { - last = i; - } - - for (int i=last; 0 <= i; i -= stride) { - int link = i * this->exp->links_per_line + link_within_line; - if (root == NULL) { -// printf("root = %d(%d)[0x%x].\n", page, line_within_page, mem+link); - prev = root = mem + link; - local_ops_per_chain += 1; - } else { -// printf("0x%x = %d(%d)[0x%x].\n", prev, page, line_within_page, mem+link); - prev->next = mem + link; - prev = prev->next; - local_ops_per_chain += 1; - } - } - - Run::global_mutex.lock(); - Run::_ops_per_chain = local_ops_per_chain; - Run::global_mutex.unlock(); - - return root; -} - -static int64 dumb_ck = 0; -void -mem_chk( Chain *m ) -{ - if (m == NULL) dumb_ck += 1; -} - -static void -chase_pointers( - int64 chains_per_thread, // memory loading per thread - int64 iterations, // number of iterations per experiment - Chain** root, // root(s) of the chain(s) to follow - int64 bytes_per_line, // ignored - int64 bytes_per_chain, // ignored - int64 stride // ignored -) -{ - // chase pointers - switch (chains_per_thread) { - default: - case 1: - for (int64 i=0; i < iterations; i++) { - Chain* a = root[0]; - while (a != NULL) { - a = a->next; - } - mem_chk( a ); - } - break; - case 2: - for (int64 i=0; i < iterations; i++) { - Chain* a = root[0]; - Chain* b = root[1]; - while (a != NULL) { - a = a->next; - b = b->next; - } - mem_chk( a ); - mem_chk( b ); - } - break; - case 3: - for (int64 i=0; i < iterations; i++) { - Chain* a = root[0]; - Chain* b = root[1]; - Chain* c = root[2]; - while (a != NULL) { - a = a->next; - b = b->next; - c = c->next; - } - mem_chk( a ); - mem_chk( b ); - mem_chk( c ); - } - break; - case 4: - for (int64 i=0; i < iterations; i++) { - Chain* a = root[0]; - Chain* b = root[1]; - Chain* c = root[2]; - Chain* d = root[3]; - while (a != NULL) { - a = a->next; - b = b->next; - c = c->next; - d = d->next; - } - mem_chk( a ); - mem_chk( b ); - mem_chk( c ); - mem_chk( d ); - } - break; - case 5: - for (int64 i=0; i < iterations; i++) { - Chain* a = root[0]; - Chain* b = root[1]; - Chain* c = root[2]; - Chain* d = root[3]; - Chain* e = root[4]; - while (a != NULL) { - a = a->next; - b = b->next; - c = c->next; - d = d->next; - e = e->next; - } - mem_chk( a ); - mem_chk( b ); - mem_chk( c ); - mem_chk( d ); - mem_chk( e ); - } - break; - case 6: - for (int64 i=0; i < iterations; i++) { - Chain* a = root[0]; - Chain* b = root[1]; - Chain* c = root[2]; - Chain* d = root[3]; - Chain* e = root[4]; - Chain* f = root[5]; - while (a != NULL) { - a = a->next; - b = b->next; - c = c->next; - d = d->next; - e = e->next; - f = f->next; - } - mem_chk( a ); - mem_chk( b ); - mem_chk( c ); - mem_chk( d ); - mem_chk( e ); - mem_chk( f ); - } - break; - case 7: - for (int64 i=0; i < iterations; i++) { - Chain* a = root[0]; - Chain* b = root[1]; - Chain* c = root[2]; - Chain* d = root[3]; - Chain* e = root[4]; - Chain* f = root[5]; - Chain* g = root[6]; - while (a != NULL) { - a = a->next; - b = b->next; - c = c->next; - d = d->next; - e = e->next; - f = f->next; - g = g->next; - } - mem_chk( a ); - mem_chk( b ); - mem_chk( c ); - mem_chk( d ); - mem_chk( e ); - mem_chk( f ); - mem_chk( g ); - } - break; - case 8: - for (int64 i=0; i < iterations; i++) { - Chain* a = root[0]; - Chain* b = root[1]; - Chain* c = root[2]; - Chain* d = root[3]; - Chain* e = root[4]; - Chain* f = root[5]; - Chain* g = root[6]; - Chain* h = root[7]; - while (a != NULL) { - a = a->next; - b = b->next; - c = c->next; - d = d->next; - e = e->next; - f = f->next; - g = g->next; - h = h->next; - } - mem_chk( a ); - mem_chk( b ); - mem_chk( c ); - mem_chk( d ); - mem_chk( e ); - mem_chk( f ); - mem_chk( g ); - mem_chk( h ); - } - break; - case 9: - for (int64 i=0; i < iterations; i++) { - Chain* a = root[0]; - Chain* b = root[1]; - Chain* c = root[2]; - Chain* d = root[3]; - Chain* e = root[4]; - Chain* f = root[5]; - Chain* g = root[6]; - Chain* h = root[7]; - Chain* j = root[8]; - while (a != NULL) { - a = a->next; - b = b->next; - c = c->next; - d = d->next; - e = e->next; - f = f->next; - g = g->next; - h = h->next; - j = j->next; - } - mem_chk( a ); - mem_chk( b ); - mem_chk( c ); - mem_chk( d ); - mem_chk( e ); - mem_chk( f ); - mem_chk( g ); - mem_chk( h ); - mem_chk( j ); - } - break; - case 10: - for (int64 i=0; i < iterations; i++) { - Chain* a = root[0]; - Chain* b = root[1]; - Chain* c = root[2]; - Chain* d = root[3]; - Chain* e = root[4]; - Chain* f = root[5]; - Chain* g = root[6]; - Chain* h = root[7]; - Chain* j = root[8]; - Chain* k = root[9]; - while (a != NULL) { - a = a->next; - b = b->next; - c = c->next; - d = d->next; - e = e->next; - f = f->next; - g = g->next; - h = h->next; - j = j->next; - k = k->next; - } - mem_chk( a ); - mem_chk( b ); - mem_chk( c ); - mem_chk( d ); - mem_chk( e ); - mem_chk( f ); - mem_chk( g ); - mem_chk( h ); - mem_chk( j ); - mem_chk( k ); - } - break; - case 11: - for (int64 i=0; i < iterations; i++) { - Chain* a = root[0]; - Chain* b = root[1]; - Chain* c = root[2]; - Chain* d = root[3]; - Chain* e = root[4]; - Chain* f = root[5]; - Chain* g = root[6]; - Chain* h = root[7]; - Chain* j = root[8]; - Chain* k = root[9]; - Chain* l = root[10]; - while (a != NULL) { - a = a->next; - b = b->next; - c = c->next; - d = d->next; - e = e->next; - f = f->next; - g = g->next; - h = h->next; - j = j->next; - k = k->next; - l = l->next; - } - mem_chk( a ); - mem_chk( b ); - mem_chk( c ); - mem_chk( d ); - mem_chk( e ); - mem_chk( f ); - mem_chk( g ); - mem_chk( h ); - mem_chk( j ); - mem_chk( k ); - mem_chk( l ); - } - break; - case 12: - for (int64 i=0; i < iterations; i++) { - Chain* a = root[0]; - Chain* b = root[1]; - Chain* c = root[2]; - Chain* d = root[3]; - Chain* e = root[4]; - Chain* f = root[5]; - Chain* g = root[6]; - Chain* h = root[7]; - Chain* j = root[8]; - Chain* k = root[9]; - Chain* l = root[10]; - Chain* m = root[11]; - while (a != NULL) { - a = a->next; - b = b->next; - c = c->next; - d = d->next; - e = e->next; - f = f->next; - g = g->next; - h = h->next; - j = j->next; - k = k->next; - l = l->next; - m = m->next; - } - mem_chk( a ); - mem_chk( b ); - mem_chk( c ); - mem_chk( d ); - mem_chk( e ); - mem_chk( f ); - mem_chk( g ); - mem_chk( h ); - mem_chk( j ); - mem_chk( k ); - mem_chk( l ); - mem_chk( m ); - } - break; - case 13: - for (int64 i=0; i < iterations; i++) { - Chain* a = root[0]; - Chain* b = root[1]; - Chain* c = root[2]; - Chain* d = root[3]; - Chain* e = root[4]; - Chain* f = root[5]; - Chain* g = root[6]; - Chain* h = root[7]; - Chain* j = root[8]; - Chain* k = root[9]; - Chain* l = root[10]; - Chain* m = root[11]; - Chain* n = root[12]; - while (a != NULL) { - a = a->next; - b = b->next; - c = c->next; - d = d->next; - e = e->next; - f = f->next; - g = g->next; - h = h->next; - j = j->next; - k = k->next; - l = l->next; - m = m->next; - n = n->next; - } - mem_chk( a ); - mem_chk( b ); - mem_chk( c ); - mem_chk( d ); - mem_chk( e ); - mem_chk( f ); - mem_chk( g ); - mem_chk( h ); - mem_chk( j ); - mem_chk( k ); - mem_chk( l ); - mem_chk( m ); - mem_chk( n ); - } - break; - case 14: - for (int64 i=0; i < iterations; i++) { - Chain* a = root[0]; - Chain* b = root[1]; - Chain* c = root[2]; - Chain* d = root[3]; - Chain* e = root[4]; - Chain* f = root[5]; - Chain* g = root[6]; - Chain* h = root[7]; - Chain* j = root[8]; - Chain* k = root[9]; - Chain* l = root[10]; - Chain* m = root[11]; - Chain* n = root[12]; - Chain* o = root[13]; - while (a != NULL) { - a = a->next; - b = b->next; - c = c->next; - d = d->next; - e = e->next; - f = f->next; - g = g->next; - h = h->next; - j = j->next; - k = k->next; - l = l->next; - m = m->next; - n = n->next; - o = o->next; - } - mem_chk( a ); - mem_chk( b ); - mem_chk( c ); - mem_chk( d ); - mem_chk( e ); - mem_chk( f ); - mem_chk( g ); - mem_chk( h ); - mem_chk( j ); - mem_chk( k ); - mem_chk( l ); - mem_chk( m ); - mem_chk( n ); - mem_chk( o ); - } - break; - case 15: - for (int64 i=0; i < iterations; i++) { - Chain* a = root[0]; - Chain* b = root[1]; - Chain* c = root[2]; - Chain* d = root[3]; - Chain* e = root[4]; - Chain* f = root[5]; - Chain* g = root[6]; - Chain* h = root[7]; - Chain* j = root[8]; - Chain* k = root[9]; - Chain* l = root[10]; - Chain* m = root[11]; - Chain* n = root[12]; - Chain* o = root[13]; - Chain* p = root[14]; - while (a != NULL) { - a = a->next; - b = b->next; - c = c->next; - d = d->next; - e = e->next; - f = f->next; - g = g->next; - h = h->next; - j = j->next; - k = k->next; - l = l->next; - m = m->next; - n = n->next; - o = o->next; - p = p->next; - } - mem_chk( a ); - mem_chk( b ); - mem_chk( c ); - mem_chk( d ); - mem_chk( e ); - mem_chk( f ); - mem_chk( g ); - mem_chk( h ); - mem_chk( j ); - mem_chk( k ); - mem_chk( l ); - mem_chk( m ); - mem_chk( n ); - mem_chk( o ); - mem_chk( p ); - } - break; - case 16: - for (int64 i=0; i < iterations; i++) { - Chain* a = root[0]; - Chain* b = root[1]; - Chain* c = root[2]; - Chain* d = root[3]; - Chain* e = root[4]; - Chain* f = root[5]; - Chain* g = root[6]; - Chain* h = root[7]; - Chain* j = root[8]; - Chain* k = root[9]; - Chain* l = root[10]; - Chain* m = root[11]; - Chain* n = root[12]; - Chain* o = root[13]; - Chain* p = root[14]; - Chain* q = root[15]; - while (a != NULL) { - a = a->next; - b = b->next; - c = c->next; - d = d->next; - e = e->next; - f = f->next; - g = g->next; - h = h->next; - j = j->next; - k = k->next; - l = l->next; - m = m->next; - n = n->next; - o = o->next; - p = p->next; - q = q->next; - } - mem_chk( a ); - mem_chk( b ); - mem_chk( c ); - mem_chk( d ); - mem_chk( e ); - mem_chk( f ); - mem_chk( g ); - mem_chk( h ); - mem_chk( j ); - mem_chk( k ); - mem_chk( l ); - mem_chk( m ); - mem_chk( n ); - mem_chk( o ); - mem_chk( p ); - mem_chk( q ); - } - } -} - - // NOT WRITTEN YET -- DMP - // JUST A PLACE HOLDER! -Chain* -Run::stream_mem_init( Chain *mem ) -{ -// fprintf(stderr, "made it into stream_mem_init.\n"); -// fprintf(stderr, "chains_per_thread = %ld\n", this->exp->chains_per_thread); -// fprintf(stderr, "iterations = %ld\n", this->exp->iterations); -// fprintf(stderr, "bytes_per_chain = %ld\n", this->exp->bytes_per_chain); -// fprintf(stderr, "stride = %ld\n", this->exp->stride); - int64 local_ops_per_chain = 0; - double* tmp = (double *) mem; - int64 refs_per_line = this->exp->bytes_per_line / sizeof(double); - int64 refs_per_chain = this->exp->bytes_per_chain / sizeof(double); -// fprintf(stderr, "refs_per_chain = %ld\n", refs_per_chain); - - for (int64 i=0; i < refs_per_chain; i += this->exp->stride*refs_per_line) { - tmp[i] = 0; - local_ops_per_chain += 1; - } - - Run::global_mutex.lock(); - Run::_ops_per_chain = local_ops_per_chain; - Run::global_mutex.unlock(); - -// fprintf(stderr, "made it out of stream_mem_init.\n"); - return mem; -} - -static int64 summ_ck = 0; -void -sum_chk( double t ) -{ - if (t != 0) summ_ck += 1; -} - - // NOT WRITTEN YET -- DMP - // JUST A PLACE HOLDER! -static void -follow_streams( - int64 chains_per_thread, // memory loading per thread - int64 iterations, // number of iterations per experiment - Chain** root, // root(s) of the chain(s) to follow - int64 bytes_per_line, // ignored - int64 bytes_per_chain, // ignored - int64 stride // ignored -) -{ - int64 refs_per_line = bytes_per_line / sizeof(double); - int64 refs_per_chain = bytes_per_chain / sizeof(double); - - // chase pointers - switch (chains_per_thread) { - default: - case 1: - for (int64 i=0; i < iterations; i++) { - double t = 0; - double* a0 = (double *) root[0]; - for (int64 j=0; j < refs_per_chain; j+=stride*refs_per_line) { - t += a0[j]; - } - sum_chk( t ); - } - break; - case 2: - for (int64 i=0; i < iterations; i++) { - double t = 0; - double* a0 = (double *) root[0]; - double* a1 = (double *) root[1]; - for (int64 j=0; j < refs_per_chain; j+=stride*refs_per_line) { - t += a0[j] + a1[j]; - } - sum_chk( t ); - } - break; - case 3: - for (int64 i=0; i < iterations; i++) { - double t = 0; - double* a0 = (double *) root[0]; - double* a1 = (double *) root[1]; - double* a2 = (double *) root[2]; - for (int64 j=0; j < refs_per_chain; j+=stride*refs_per_line) { - t += a0[j] + a1[j] + a2[j]; - } - sum_chk( t ); - } - break; - case 4: - for (int64 i=0; i < iterations; i++) { - double t = 0; - double* a0 = (double *) root[0]; - double* a1 = (double *) root[1]; - double* a2 = (double *) root[2]; - double* a3 = (double *) root[3]; - for (int64 j=0; j < refs_per_chain; j+=stride*refs_per_line) { - t += a0[j] + a1[j] + a2[j] + a3[j]; - } - sum_chk( t ); - } - break; - case 5: - for (int64 i=0; i < iterations; i++) { - double t = 0; - double* a0 = (double *) root[0]; - double* a1 = (double *) root[1]; - double* a2 = (double *) root[2]; - double* a3 = (double *) root[3]; - double* a4 = (double *) root[4]; - for (int64 j=0; j < refs_per_chain; j+=stride*refs_per_line) { - t += a0[j] + a1[j] + a2[j] + a3[j] + a4[j]; - } - sum_chk( t ); - } - break; - case 6: - for (int64 i=0; i < iterations; i++) { - double t = 0; - double* a0 = (double *) root[0]; - double* a1 = (double *) root[1]; - double* a2 = (double *) root[2]; - double* a3 = (double *) root[3]; - double* a4 = (double *) root[4]; - double* a5 = (double *) root[5]; - for (int64 j=0; j < refs_per_chain; j+=stride*refs_per_line) { - t += a0[j] + a1[j] + a2[j] + a3[j] + a4[j] + a5[j]; - } - sum_chk( t ); - } - break; - case 7: - for (int64 i=0; i < iterations; i++) { - double t = 0; - double* a0 = (double *) root[0]; - double* a1 = (double *) root[1]; - double* a2 = (double *) root[2]; - double* a3 = (double *) root[3]; - double* a4 = (double *) root[4]; - double* a5 = (double *) root[5]; - double* a6 = (double *) root[6]; - for (int64 j=0; j < refs_per_chain; j+=stride*refs_per_line) { - t += a0[j] + a1[j] + a2[j] + a3[j] + a4[j] + a5[j] + a6[j]; - } - sum_chk( t ); - } - break; - case 8: - for (int64 i=0; i < iterations; i++) { - double t = 0; - double* a0 = (double *) root[0]; - double* a1 = (double *) root[1]; - double* a2 = (double *) root[2]; - double* a3 = (double *) root[3]; - double* a4 = (double *) root[4]; - double* a5 = (double *) root[5]; - double* a6 = (double *) root[6]; - double* a7 = (double *) root[7]; - for (int64 j=0; j < refs_per_chain; j+=stride*refs_per_line) { - t += a0[j] + a1[j] + a2[j] + a3[j] + a4[j] + a5[j] + a6[j] + a7[j]; - } - sum_chk( t ); - } - break; - case 9: - for (int64 i=0; i < iterations; i++) { - double t = 0; - double* a0 = (double *) root[0]; - double* a1 = (double *) root[1]; - double* a2 = (double *) root[2]; - double* a3 = (double *) root[3]; - double* a4 = (double *) root[4]; - double* a5 = (double *) root[5]; - double* a6 = (double *) root[6]; - double* a7 = (double *) root[7]; - double* a8 = (double *) root[8]; - for (int64 j=0; j < refs_per_chain; j+=stride*refs_per_line) { - t += a0[j] + a1[j] + a2[j] + a3[j] + a4[j] + a5[j] + a6[j] + a7[j] + - a8[j]; - } - sum_chk( t ); - } - break; - case 10: - for (int64 i=0; i < iterations; i++) { - double t = 0; - double* a0 = (double *) root[0]; - double* a1 = (double *) root[1]; - double* a2 = (double *) root[2]; - double* a3 = (double *) root[3]; - double* a4 = (double *) root[4]; - double* a5 = (double *) root[5]; - double* a6 = (double *) root[6]; - double* a7 = (double *) root[7]; - double* a8 = (double *) root[8]; - double* a9 = (double *) root[9]; - for (int64 j=0; j < refs_per_chain; j+=stride*refs_per_line) { - t += a0[j] + a1[j] + a2[j] + a3[j] + a4[j] + a5[j] + a6[j] + a7[j] + - a8[j] + a9[j]; - } - sum_chk( t ); - } - break; - case 11: - for (int64 i=0; i < iterations; i++) { - double t = 0; - double* a0 = (double *) root[ 0]; - double* a1 = (double *) root[ 1]; - double* a2 = (double *) root[ 2]; - double* a3 = (double *) root[ 3]; - double* a4 = (double *) root[ 4]; - double* a5 = (double *) root[ 5]; - double* a6 = (double *) root[ 6]; - double* a7 = (double *) root[ 7]; - double* a8 = (double *) root[ 8]; - double* a9 = (double *) root[ 9]; - double* a10 = (double *) root[10]; - for (int64 j=0; j < refs_per_chain; j+=stride*refs_per_line) { - t += a0[j] + a1[j] + a2 [j] + a3[j] + a4[j] + a5[j] + a6[j] + a7[j] + - a8[j] + a9[j] + a10[j]; - } - sum_chk( t ); - } - break; - case 12: - for (int64 i=0; i < iterations; i++) { - double t = 0; - double* a0 = (double *) root[ 0]; - double* a1 = (double *) root[ 1]; - double* a2 = (double *) root[ 2]; - double* a3 = (double *) root[ 3]; - double* a4 = (double *) root[ 4]; - double* a5 = (double *) root[ 5]; - double* a6 = (double *) root[ 6]; - double* a7 = (double *) root[ 7]; - double* a8 = (double *) root[ 8]; - double* a9 = (double *) root[ 9]; - double* a10 = (double *) root[10]; - double* a11 = (double *) root[11]; - for (int64 j=0; j < refs_per_chain; j+=stride*refs_per_line) { - t += a0[j] + a1[j] + a2 [j] + a3 [j] + a4[j] + a5[j] + a6[j] + a7[j] + - a8[j] + a9[j] + a10[j] + a11[j]; - } - sum_chk( t ); - } - break; - case 13: - for (int64 i=0; i < iterations; i++) { - double t = 0; - double* a0 = (double *) root[ 0]; - double* a1 = (double *) root[ 1]; - double* a2 = (double *) root[ 2]; - double* a3 = (double *) root[ 3]; - double* a4 = (double *) root[ 4]; - double* a5 = (double *) root[ 5]; - double* a6 = (double *) root[ 6]; - double* a7 = (double *) root[ 7]; - double* a8 = (double *) root[ 8]; - double* a9 = (double *) root[ 9]; - double* a10 = (double *) root[10]; - double* a11 = (double *) root[11]; - double* a12 = (double *) root[12]; - for (int64 j=0; j < refs_per_chain; j+=stride*refs_per_line) { - t += a0[j] + a1[j] + a2 [j] + a3 [j] + a4 [j] + a5[j] + a6[j] + a7[j] + - a8[j] + a9[j] + a10[j] + a11[j] + a12[j]; - } - sum_chk( t ); - } - break; - case 14: - for (int64 i=0; i < iterations; i++) { - double t = 0; - double* a0 = (double *) root[ 0]; - double* a1 = (double *) root[ 1]; - double* a2 = (double *) root[ 2]; - double* a3 = (double *) root[ 3]; - double* a4 = (double *) root[ 4]; - double* a5 = (double *) root[ 5]; - double* a6 = (double *) root[ 6]; - double* a7 = (double *) root[ 7]; - double* a8 = (double *) root[ 8]; - double* a9 = (double *) root[ 9]; - double* a10 = (double *) root[10]; - double* a11 = (double *) root[11]; - double* a12 = (double *) root[12]; - double* a13 = (double *) root[13]; - for (int64 j=0; j < refs_per_chain; j+=stride*refs_per_line) { - t += a0[j] + a1[j] + a2 [j] + a3 [j] + a4 [j] + a5 [j] + a6[j] + a7[j] + - a8[j] + a9[j] + a10[j] + a11[j] + a12[j] + a13[j]; - } - sum_chk( t ); - } - break; - case 15: - for (int64 i=0; i < iterations; i++) { - double t = 0; - double* a0 = (double *) root[ 0]; - double* a1 = (double *) root[ 1]; - double* a2 = (double *) root[ 2]; - double* a3 = (double *) root[ 3]; - double* a4 = (double *) root[ 4]; - double* a5 = (double *) root[ 5]; - double* a6 = (double *) root[ 6]; - double* a7 = (double *) root[ 7]; - double* a8 = (double *) root[ 8]; - double* a9 = (double *) root[ 9]; - double* a10 = (double *) root[10]; - double* a11 = (double *) root[11]; - double* a12 = (double *) root[12]; - double* a13 = (double *) root[13]; - double* a14 = (double *) root[14]; - for (int64 j=0; j < refs_per_chain; j+=stride*refs_per_line) { - t += a0[j] + a1[j] + a2 [j] + a3 [j] + a4 [j] + a5 [j] + a6 [j] + a7[j] + - a8[j] + a9[j] + a10[j] + a11[j] + a12[j] + a13[j] + a14[j]; - } - sum_chk( t ); - } - break; - case 16: - for (int64 i=0; i < iterations; i++) { - double t = 0; - double* a0 = (double *) root[ 0]; - double* a1 = (double *) root[ 1]; - double* a2 = (double *) root[ 2]; - double* a3 = (double *) root[ 3]; - double* a4 = (double *) root[ 4]; - double* a5 = (double *) root[ 5]; - double* a6 = (double *) root[ 6]; - double* a7 = (double *) root[ 7]; - double* a8 = (double *) root[ 8]; - double* a9 = (double *) root[ 9]; - double* a10 = (double *) root[10]; - double* a11 = (double *) root[11]; - double* a12 = (double *) root[12]; - double* a13 = (double *) root[13]; - double* a14 = (double *) root[14]; - double* a15 = (double *) root[15]; - for (int64 j=0; j < refs_per_chain; j+=stride*refs_per_line) { - t += a0[j] + a1[j] + a2 [j] + a3 [j] + a4 [j] + a5 [j] + a6 [j] + a7 [j] + - a8[j] + a9[j] + a10[j] + a11[j] + a12[j] + a13[j] + a14[j] + a15[j]; - } - sum_chk( t ); - } - break; - } -} diff --git a/Run.h b/Run.h deleted file mode 100644 index 810c2e8..0000000 --- a/Run.h +++ /dev/null @@ -1,50 +0,0 @@ -/******************************************************************************* - * Copyright (c) 2006 International Business Machines Corporation. * - * All rights reserved. This program and the accompanying materials * - * are made available under the terms of the Common Public License v1.0 * - * which accompanies this distribution, and is available at * - * http://www.opensource.org/licenses/cpl1.0.php * - * * - * Contributors: * - * Douglas M. Pase - initial API and implementation * - *******************************************************************************/ - - -#if !defined(Run_h) -#define Run_h - -#include "Thread.h" - -#include "Lock.h" -#include "Chain.h" -#include "Types.h" -#include "Experiment.h" -#include "SpinBarrier.h" - -class Run: public Thread { -public: - Run(); - ~Run(); - int run(); - void set( Experiment &e, SpinBarrier* sbp ); - - static int64 ops_per_chain() { return _ops_per_chain; } - static double seconds() { return _seconds; } - -private: - Experiment* exp; // experiment data - SpinBarrier* bp; // spin barrier used by all threads - - void mem_check( Chain *m ); - Chain* random_mem_init( Chain *m ); - Chain* forward_mem_init( Chain *m ); - Chain* reverse_mem_init( Chain *m ); - Chain* stream_mem_init( Chain *m ); - - static Lock global_mutex; // global lock - static int64 _ops_per_chain; // total number of operations per chain - static double _seconds; // total number of seconds -}; - - -#endif diff --git a/SpinBarrier.C b/SpinBarrier.C deleted file mode 100644 index d3d2d7b..0000000 --- a/SpinBarrier.C +++ /dev/null @@ -1,48 +0,0 @@ -/******************************************************************************* - * Copyright (c) 2006 International Business Machines Corporation. * - * All rights reserved. This program and the accompanying materials * - * are made available under the terms of the Common Public License v1.0 * - * which accompanies this distribution, and is available at * - * http://www.opensource.org/licenses/cpl1.0.php * - * * - * Contributors: * - * Douglas M. Pase - initial API and implementation * - *******************************************************************************/ - - -/****************************************************************************** - * * - * SpinBarrier * - * * - * Author: Douglas M. Pase * - * * - * Date: September 21, 2000 * - * Translated to C++, June 19, 2005 * - * * - * void barrier() * - * * - ******************************************************************************/ -#include -#include - -#include "SpinBarrier.h" - - // create a new barrier -SpinBarrier::SpinBarrier(int participants) -: limit( participants ) -{ - pthread_barrier_init( &barrier_obj, NULL, this->limit ); -} - - // destroy an old barrier -SpinBarrier::~SpinBarrier() -{ -} - - // enter the barrier and wait. everyone leaves - // when the last participant enters the barrier. -void -SpinBarrier::barrier() -{ - pthread_barrier_wait( &this->barrier_obj ); -} diff --git a/SpinBarrier.h b/SpinBarrier.h deleted file mode 100644 index f0b76d3..0000000 --- a/SpinBarrier.h +++ /dev/null @@ -1,44 +0,0 @@ -/******************************************************************************* - * Copyright (c) 2006 International Business Machines Corporation. * - * All rights reserved. This program and the accompanying materials * - * are made available under the terms of the Common Public License v1.0 * - * which accompanies this distribution, and is available at * - * http://www.opensource.org/licenses/cpl1.0.php * - * * - * Contributors: * - * Douglas M. Pase - initial API and implementation * - *******************************************************************************/ - - -/****************************************************************************** - * * - * SpinBarrier * - * * - * Author: Douglas M. Pase * - * * - * Date: September 21, 2000 * - * Translated to C++, June 19, 2005 * - * Rewritten August 13,2005 * - * * - * void barrier() * - * * - ******************************************************************************/ - -#if !defined( SpinBarrier_h ) -#define SpinBarrier_h - -#include - -class SpinBarrier { -public: - SpinBarrier(int participants); - ~SpinBarrier(); - - void barrier(); - -private: - int limit; // number of barrier participants - pthread_barrier_t barrier_obj; -}; - -#endif diff --git a/Thread.C b/Thread.C deleted file mode 100644 index 8908cfe..0000000 --- a/Thread.C +++ /dev/null @@ -1,86 +0,0 @@ -/******************************************************************************* - * Copyright (c) 2006 International Business Machines Corporation. * - * All rights reserved. This program and the accompanying materials * - * are made available under the terms of the Common Public License v1.0 * - * which accompanies this distribution, and is available at * - * http://www.opensource.org/licenses/cpl1.0.php * - * * - * Contributors: * - * Douglas M. Pase - initial API and implementation * - *******************************************************************************/ - - -#include -#include -#include - -#include "Thread.h" - -#include "Lock.h" - -Lock Thread::_global_lock; -int Thread::count = 0; - -Thread::Thread() -{ - Thread::global_lock(); - this->id = Thread::count; - Thread::count += 1; - Thread::global_unlock(); -} - -Thread::~Thread() -{ -} - -int -Thread::start() -{ - return pthread_create(&this->thread, NULL, Thread::start_routine, this); -} - -void* -Thread::start_routine(void* p) -{ - ((Thread*)p)->run(); - - return NULL; -} - -void -Thread::exit() -{ - pthread_exit(NULL); -} - -int -Thread::wait() -{ - pthread_join(this->thread, NULL); - - return 0; -} - -void -Thread::lock() -{ - this->object_lock.lock(); -} - -void -Thread::unlock() -{ - this->object_lock.unlock(); -} - -void -Thread::global_lock() -{ - Thread::_global_lock.lock(); -} - -void -Thread::global_unlock() -{ - Thread::_global_lock.unlock(); -} diff --git a/Thread.h b/Thread.h deleted file mode 100644 index 3948f56..0000000 --- a/Thread.h +++ /dev/null @@ -1,53 +0,0 @@ -/******************************************************************************* - * Copyright (c) 2006 International Business Machines Corporation. * - * All rights reserved. This program and the accompanying materials * - * are made available under the terms of the Common Public License v1.0 * - * which accompanies this distribution, and is available at * - * http://www.opensource.org/licenses/cpl1.0.php * - * * - * Contributors: * - * Douglas M. Pase - initial API and implementation * - *******************************************************************************/ - - -#if !defined(Thread_h) -#define Thread_h - -#include - -#include "Lock.h" - -class Thread { -public: - Thread(); - ~Thread(); - - virtual int run() = 0; - - int start(); - int wait(); - int thread_count() { return Thread::count; } - int thread_id() { return id; } - - static void exit(); - -protected: - void lock(); - void unlock(); - static void global_lock(); - static void global_unlock(); - -private: - static void* start_routine(void *); - static Lock _global_lock; - - Lock object_lock; - - pthread_t thread; - - static int count; - int id; - int lock_obj; -}; - -#endif diff --git a/Timer.C b/Timer.C deleted file mode 100644 index b326048..0000000 --- a/Timer.C +++ /dev/null @@ -1,175 +0,0 @@ -/******************************************************************************* - * Copyright (c) 2006 International Business Machines Corporation. * - * All rights reserved. This program and the accompanying materials * - * are made available under the terms of the Common Public License v1.0 * - * which accompanies this distribution, and is available at * - * http://www.opensource.org/licenses/cpl1.0.php * - * * - * Contributors: * - * Douglas M. Pase - initial API and implementation * - *******************************************************************************/ - - -#include -#include - -#include "Timer.h" - -#include "Types.h" - -static int64 read_rtc(); -static void calibrate_rtc(int n); -static double wall_seconds(); - -static int wall_ticks = -1; -static int rtc_ticks = -1; -static double wall_elapsed = -1; -static int64 rtc_elapsed = -1; -static double time_factor = -1; - -#if !defined(RTC) && !defined(GTOD) -#define RTC -#endif - -#if defined(RTC) - -double -Timer::seconds() -{ - return (double) read_rtc() * time_factor; -} - -int64 -Timer::ticks() -{ - // See pg. 406 of the AMD x86-64 Architecture - // Programmer's Manual, Volume 2, System Programming - unsigned int eax=0, edx=0; - - __asm__ __volatile__( - "rdtsc ;" - "movl %%eax,%0;" - "movl %%edx,%1;" - "" - : "=r"(eax), "=r"(edx) - : - : "%eax", "%edx" - ); - - return ((int64) edx << 32) | (int64) eax; -} - -static int64 -read_rtc() -{ - // See pg. 406 of the AMD x86-64 Architecture - // Programmer's Manual, Volume 2, System Programming - unsigned int eax=0, edx=0; - - __asm__ __volatile__( - "rdtsc ;" - "movl %%eax,%0;" - "movl %%edx,%1;" - "" - : "=r"(eax), "=r"(edx) - : - : "%eax", "%edx" - ); - - return ((int64) edx << 32) | (int64) eax; -} - -void -Timer::calibrate() -{ - Timer::calibrate(1000); -} - -void -Timer::calibrate(int n) -{ - wall_ticks = n; - - double wall_start,wall_finish,t; - t = wall_seconds(); - while (t == (wall_start=wall_seconds())) { - ; - } - int64 rtc_start = read_rtc(); - for (int i=0; i < wall_ticks; i++) { - t = wall_seconds(); - while (t == (wall_finish=wall_seconds())) { - ; - } - } - int64 rtc_finish = read_rtc(); - - wall_elapsed = wall_finish - wall_start; - rtc_elapsed = rtc_finish - rtc_start; - time_factor = wall_elapsed / (double) rtc_elapsed; -} - -static double -wall_seconds() -{ - struct timeval t; - gettimeofday(&t, NULL); - - return (double) t.tv_sec + (double) t.tv_usec * 1E-6; -} - -#else - -double -Timer::seconds() -{ - struct timeval t; - gettimeofday(&t, NULL); - - return (double) t.tv_sec + (double) t.tv_usec * 1E-6; -} - -int64 -Timer::ticks() -{ - struct timeval t; - gettimeofday(&t, NULL); - - return 1000000 * (int64) t.tv_sec + (int64) t.tv_usec; -} - -void -Timer::calibrate() -{ -} - -void -Timer::calibrate(int n) -{ -} - -#endif - -static double -min( double v1, double v2 ) -{ - if (v2 < v1) return v2; - return v1; -} - -double -Timer::resolution() -{ - double a,b,c=1E9; - for (int i=0; i < 10; i++) { - a = Timer::seconds(); - while (a == (b=Timer::seconds())) - ; - a = Timer::seconds(); - while (a == (b=Timer::seconds())) - ; - c = min(b - a, c); - } - - return c; -} diff --git a/Timer.h b/Timer.h deleted file mode 100644 index ba2c503..0000000 --- a/Timer.h +++ /dev/null @@ -1,28 +0,0 @@ -/******************************************************************************* - * Copyright (c) 2006 International Business Machines Corporation. * - * All rights reserved. This program and the accompanying materials * - * are made available under the terms of the Common Public License v1.0 * - * which accompanies this distribution, and is available at * - * http://www.opensource.org/licenses/cpl1.0.php * - * * - * Contributors: * - * Douglas M. Pase - initial API and implementation * - *******************************************************************************/ - - -#if !defined(Timer_h) -#define Timer_h - -#include "Types.h" - -class Timer { -public: - static double seconds(); - static double resolution(); - static int64 ticks(); - static void calibrate(); - static void calibrate(int n); -private: -}; - -#endif diff --git a/Types.C b/Types.C deleted file mode 100644 index da5ecd0..0000000 --- a/Types.C +++ /dev/null @@ -1,13 +0,0 @@ -/******************************************************************************* - * Copyright (c) 2006 International Business Machines Corporation. * - * All rights reserved. This program and the accompanying materials * - * are made available under the terms of the Common Public License v1.0 * - * which accompanies this distribution, and is available at * - * http://www.opensource.org/licenses/cpl1.0.php * - * * - * Contributors: * - * Douglas M. Pase - initial API and implementation * - *******************************************************************************/ - - -#include "Types.h" diff --git a/Types.h b/Types.h deleted file mode 100644 index 9e2eeb0..0000000 --- a/Types.h +++ /dev/null @@ -1,29 +0,0 @@ -/******************************************************************************* - * Copyright (c) 2006 International Business Machines Corporation. * - * All rights reserved. This program and the accompanying materials * - * are made available under the terms of the Common Public License v1.0 * - * which accompanies this distribution, and is available at * - * http://www.opensource.org/licenses/cpl1.0.php * - * * - * Contributors: * - * Douglas M. Pase - initial API and implementation * - *******************************************************************************/ - - -#if !defined(Types_h) -#define Types_h - -typedef long long int64; -typedef int int32; -typedef short int16; -typedef char int8; - -typedef unsigned long long uint64; -typedef unsigned int uint32; -typedef unsigned short uint16; -typedef unsigned char uint8; - -typedef double float64; -typedef float float32; - -#endif diff --git a/src/Chain.cpp b/src/Chain.cpp new file mode 100644 index 0000000..ddbc104 --- /dev/null +++ b/src/Chain.cpp @@ -0,0 +1,24 @@ +/******************************************************************************* + * Copyright (c) 2006 International Business Machines Corporation. * + * All rights reserved. This program and the accompanying materials * + * are made available under the terms of the Common Public License v1.0 * + * which accompanies this distribution, and is available at * + * http://www.opensource.org/licenses/cpl1.0.php * + * * + * Contributors: * + * Douglas M. Pase - initial API and implementation * + *******************************************************************************/ + + +#include + +#include "Chain.h" + +Chain::Chain() +: next(NULL) +{ +} + +Chain::~Chain() +{ +} diff --git a/src/Chain.h b/src/Chain.h new file mode 100644 index 0000000..8bdb584 --- /dev/null +++ b/src/Chain.h @@ -0,0 +1,24 @@ +/******************************************************************************* + * Copyright (c) 2006 International Business Machines Corporation. * + * All rights reserved. This program and the accompanying materials * + * are made available under the terms of the Common Public License v1.0 * + * which accompanies this distribution, and is available at * + * http://www.opensource.org/licenses/cpl1.0.php * + * * + * Contributors: * + * Douglas M. Pase - initial API and implementation * + *******************************************************************************/ + + +#if !defined(Chain_h) +#define Chain_h + +class Chain { +public: + Chain(); + ~Chain(); + Chain* next; +private: +}; + +#endif diff --git a/src/Experiment.cpp b/src/Experiment.cpp new file mode 100644 index 0000000..75b1cab --- /dev/null +++ b/src/Experiment.cpp @@ -0,0 +1,592 @@ +/******************************************************************************* + * Copyright (c) 2006 International Business Machines Corporation. * + * All rights reserved. This program and the accompanying materials * + * are made available under the terms of the Common Public License v1.0 * + * which accompanies this distribution, and is available at * + * http://www.opensource.org/licenses/cpl1.0.php * + * * + * Contributors: * + * Douglas M. Pase - initial API and implementation * + *******************************************************************************/ + + +#include +#include +#include +#include + +#if defined(NUMA) +#include +#endif + +#include "Experiment.h" + +Experiment::Experiment() : + strict (0), + pointer_size (DEFAULT_POINTER_SIZE), + bytes_per_line (DEFAULT_BYTES_PER_LINE), + links_per_line (DEFAULT_LINKS_PER_LINE), + bytes_per_page (DEFAULT_BYTES_PER_PAGE), + lines_per_page (DEFAULT_LINES_PER_PAGE), + links_per_page (DEFAULT_LINKS_PER_PAGE), + bytes_per_chain (DEFAULT_BYTES_PER_CHAIN), + lines_per_chain (DEFAULT_LINES_PER_CHAIN), + links_per_chain (DEFAULT_LINKS_PER_CHAIN), + pages_per_chain (DEFAULT_PAGES_PER_CHAIN), + chains_per_thread(DEFAULT_CHAINS_PER_THREAD), + bytes_per_thread (DEFAULT_BYTES_PER_THREAD), + num_threads (DEFAULT_THREADS), + bytes_per_test (DEFAULT_BYTES_PER_TEST), + seconds (DEFAULT_SECONDS), + iterations (DEFAULT_ITERATIONS), + experiments (DEFAULT_EXPERIMENTS), + output_mode (TABLE), + access_pattern (RANDOM), + stride (1), + numa_placement (LOCAL), + offset_or_mask (0), + placement_map (NULL), + thread_domain (NULL), + chain_domain (NULL), + numa_max_domain (0), + num_numa_domains (1) +{ +} + +Experiment::~Experiment() +{ +} + + // interface: + // + // -l or --line bytes per cache line (line size) + // -p or --page bytes per page (page size) + // -c or --chain bytes per chain (used to compute pages per chain) + // -r or --references chains per thread (memory loading) + // -t or --threads number of threads (concurrency and contention) + // -i or --iters iterations + // -e or --experiments experiments + // -a or --access memory access pattern + // random random access pattern + // forward exclusive OR and mask + // reverse addition and offset + // -o or --output output mode + // hdr header only + // csv csv only + // both header + csv + // table human-readable table of values + // -n or --numa numa placement + // local local allocation of all chains + // xor exclusive OR and mask + // add addition and offset + // map explicit mapping of threads and chains to domains + +int +Experiment::parse_args(int argc, char* argv[]) +{ + int error = 0; + for (int i=1; i < argc; i++) { + if (strcasecmp(argv[i], "-x") == 0 || strcasecmp(argv[i], "--strict") == 0) { + this->strict = 1; + } else if (strcasecmp(argv[i], "-s") == 0 || strcasecmp(argv[i], "--seconds") == 0) { + i++; + if (i == argc) { error = 1; break; } + this->seconds = Experiment::parse_real(argv[i]); + this->iterations = 0; + if (this->seconds == 0) { error = 1; break; } + } else if (strcasecmp(argv[i], "-l") == 0 || strcasecmp(argv[i], "--line") == 0) { + i++; + if (i == argc) { error = 1; break; } + this->bytes_per_line = Experiment::parse_number(argv[i]); + if (this->bytes_per_line == 0) { error = 1; break; } + } else if (strcasecmp(argv[i], "-p") == 0 || strcasecmp(argv[i], "--page") == 0) { + i++; + if (i == argc) { error = 1; break; } + this->bytes_per_page = Experiment::parse_number(argv[i]); + if (this->bytes_per_page == 0) { error = 1; break; } + } else if (strcasecmp(argv[i], "-c") == 0 || strcasecmp(argv[i], "--chain") == 0) { + i++; + if (i == argc) { error = 1; break; } + this->bytes_per_chain = Experiment::parse_number(argv[i]); + if (this->bytes_per_chain == 0) { error = 1; break; } + } else if (strcasecmp(argv[i], "-r") == 0 || strcasecmp(argv[i], "--references") == 0) { + i++; + if (i == argc) { error = 1; break; } + this->chains_per_thread = Experiment::parse_number(argv[i]); + if (this->chains_per_thread == 0) { error = 1; break; } + } else if (strcasecmp(argv[i], "-t") == 0 || strcasecmp(argv[i], "--threads") == 0) { + i++; + if (i == argc) { error = 1; break; } + this->num_threads = Experiment::parse_number(argv[i]); + if (this->num_threads == 0) { error = 1; break; } + } else if (strcasecmp(argv[i], "-i") == 0 || strcasecmp(argv[i], "--iterations") == 0) { + i++; + if (i == argc) { error = 1; break; } + this->iterations = Experiment::parse_number(argv[i]); + this->seconds = 0; + if (this->iterations == 0) { error = 1; break; } + } else if (strcasecmp(argv[i], "-e") == 0 || strcasecmp(argv[i], "--experiments") == 0) { + i++; + if (i == argc) { error = 1; break; } + this->experiments = Experiment::parse_number(argv[i]); + if (this->experiments == 0) { error = 1; break; } + } else if (strcasecmp(argv[i], "-a") == 0 || strcasecmp(argv[i], "--access") == 0) { + i++; + if (i == argc) { error = 1; break; } + if (strcasecmp(argv[i], "random") == 0) { + this->access_pattern = RANDOM; + } else if (strcasecmp(argv[i], "forward") == 0) { + this->access_pattern = STRIDED; + i++; + if (i == argc) { error = 1; break; } + this->stride = Experiment::parse_number(argv[i]); + if (this->stride == 0) { error = 1; break; } + } else if (strcasecmp(argv[i], "reverse") == 0) { + this->access_pattern = STRIDED; + i++; + if (i == argc) { error = 1; break; } + this->stride = - Experiment::parse_number(argv[i]); + if (this->stride == 0) { error = 1; break; } + } else if (strcasecmp(argv[i], "stream") == 0) { + this->access_pattern = STREAM; + i++; + if (i == argc) { error = 1; break; } + this->stride = Experiment::parse_number(argv[i]); + if (this->stride == 0) { error = 1; break; } + } else { + error = 1; + break; + } + } else if (strcasecmp(argv[i], "-o") == 0 || strcasecmp(argv[i], "--output") == 0) { + i++; + if (i == argc) { error = 1; break; } + if (strcasecmp(argv[i], "table") == 0) { + this->output_mode = TABLE; + } else if (strcasecmp(argv[i], "csv") == 0) { + this->output_mode = CSV; + } else if (strcasecmp(argv[i], "both") == 0) { + this->output_mode = BOTH; + } else if (strcasecmp(argv[i], "hdr") == 0) { + this->output_mode = HEADER; + } else if (strcasecmp(argv[i], "header") == 0) { + this->output_mode = HEADER; + } else { + error = 1; + break; + } + } else if (strcasecmp(argv[i], "-n") == 0 || strcasecmp(argv[i], "--numa") == 0) { + i++; + if (i == argc) { error = 1; break; } + if (strcasecmp(argv[i], "local") == 0) { + this->numa_placement = LOCAL; + } else if (strcasecmp(argv[i], "xor") == 0) { + this->numa_placement = XOR; + i++; + if (i == argc) { error = 1; break; } + this->offset_or_mask = Experiment::parse_number(argv[i]); + } else if (strcasecmp(argv[i], "add") == 0) { + this->numa_placement = ADD; + i++; + if (i == argc) { error = 1; break; } + this->offset_or_mask = Experiment::parse_number(argv[i]); + } else if (strcasecmp(argv[i], "map") == 0) { + this->numa_placement = MAP; + i++; + if (i == argc) { error = 1; break; } + this->placement_map = argv[i]; + } else { + error = 1; + break; + } + } else { + error = 1; + break; + } + } + + + // if we've hit an error, print a message and quit + if (error) { + printf("usage: %s \n", argv[0]); + printf("where are selected from the following:\n"); + printf(" [-h|--help] # this message\n"); + printf(" [-l|--line] # bytes per cache line (cache line size)\n"); + printf(" [-p|--page] # bytes per page (page size)\n"); + printf(" [-c|--chain] # bytes per chain (used to compute pages per chain)\n"); + printf(" [-r|--references] # chains per thread (memory loading)\n"); + printf(" [-t|--threads] # number of threads (concurrency and contention)\n"); + printf(" [-i|--iterations] # iterations per experiment\n"); + printf(" [-e|--experiments] # experiments\n"); + printf(" [-a|--access] # memory access pattern\n"); + printf(" [-o|--output] # output format\n"); + printf(" [-n|--numa] # numa placement\n"); + printf(" [-s|--seconds] # run each experiment for seconds\n"); + printf(" [-x|--strict] # fail rather than adjust options to sensible values\n"); + printf("\n"); + printf(" is selected from the following:\n"); + printf(" random # all chains are accessed randomly\n"); + printf(" forward # chains are in forward order with constant stride\n"); + printf(" reverse # chains are in reverse order with constant stride\n"); + printf(" stream # references are calculated rather than read from memory\n"); + printf("\n"); + printf("Note: is always a small positive integer.\n"); + printf("\n"); + printf(" is selected from the following:\n"); + printf(" hdr # csv header only\n"); + printf(" csv # results in csv format only\n"); + printf(" both # header and results in csv format\n"); + printf(" table # human-readable table of values\n"); + printf("\n"); + printf(" is selected from the following:\n"); + printf(" local # all chains are allocated locally\n"); + printf(" xor # exclusive OR and mask\n"); + printf(" add # addition and offset\n"); + printf(" map # explicit mapping of threads and chains to domains\n"); + printf("\n"); + printf(" has the form \"t1:c11,c12,...,c1m;t2:c21,...,c2m;...;tn:cn1,...,cnm\"\n"); + printf("where t[i] is the NUMA domain where the ith thread is run,\n"); + printf("and c[i][j] is the NUMA domain where the jth chain in the ith thread is allocated.\n"); + printf("(The values t[i] and c[i][j] must all be zero or small positive integers.)\n"); + printf("\n"); + printf("Note: for maps, each thread must have the same number of chains,\n"); + printf("maps override the -t or --threads specification,\n"); + printf("NUMA domains are whole numbers in the range of 0..N, and\n"); + printf("thread or chain domains that exceed the maximum NUMA domain\n"); + printf("are wrapped around using a MOD function.\n"); + printf("\n"); + printf("To determine the number of NUMA domains currently available\n"); + printf("on your system, use a command such as \"numastat\".\n"); + printf("\n"); + printf("Final note: strict is not yet fully implemented, and\n"); + printf("maps do not gracefully handle ill-formed map specifications.\n"); + + return 1; + } + + + // STRICT -- fail if specifications are inconsistent + + // compute lines per page and lines per chain + // based on input and defaults. + // we round up page and chain sizes when needed. + this->lines_per_page = (this->bytes_per_page+this->bytes_per_line-1) / this->bytes_per_line; + this->bytes_per_page = this->bytes_per_line * this->lines_per_page; + this->pages_per_chain = (this->bytes_per_chain+this->bytes_per_page-1) / this->bytes_per_page; + this->bytes_per_chain = this->bytes_per_page * this->pages_per_chain; + this->bytes_per_thread = this->bytes_per_chain * this->chains_per_thread; + this->bytes_per_test = this->bytes_per_thread * this->num_threads; + this->links_per_line = this->bytes_per_line / pointer_size; + this->links_per_page = this->lines_per_page * this->links_per_line; + this->lines_per_chain = this->lines_per_page * this->pages_per_chain; + this->links_per_chain = this->lines_per_chain * this->links_per_line; + + + // allocate the chain roots for all threads + // and compute the chain locations + // (the chains themselves are initialized by the threads) + switch (this->numa_placement) { + case LOCAL : + case XOR : + case ADD : + this->thread_domain = new int32 [ this->num_threads ]; + this->chain_domain = new int32*[ this->num_threads ]; + this->random_state = new char* [ this->num_threads ]; + + for (int i=0; i < this->num_threads; i++) { + this->chain_domain[i] = new int32 [ this->chains_per_thread ]; + + const int state_size = 256; + this->random_state[i] = new char[state_size]; + initstate((unsigned int) i, (char *) this->random_state[i], (size_t) state_size); + } + break; + } + + +#if defined(NUMA) + this->numa_max_domain = numa_max_node(); + this->num_numa_domains = this->numa_max_domain + 1; +#endif + + + switch (this->numa_placement) { + case LOCAL : + default: + this->alloc_local(); + break; + case XOR : + this->alloc_xor(); + break; + case ADD : + this->alloc_add(); + break; + case MAP : + this->alloc_map(); + break; + } + + return 0; +} + + +int64 +Experiment::parse_number( const char* s ) +{ + int64 result = 0; + + int len = strlen( s ); + for (int i=0; i < len; i++) { + if ( '0' <= s[i] && s[i] <= '9' ) { + result = result * 10 + s[i] - '0'; + } else if (s[i] == 'k' || s[i] == 'K') { + result = result << 10; + break; + } else if (s[i] == 'm' || s[i] == 'M') { + result = result << 20; + break; + } else if (s[i] == 'g' || s[i] == 'G') { + result = result << 30; + break; + } else if (s[i] == 't' || s[i] == 'T') { + result = result << 40; + break; + } else { + break; + } + } + + return result; +} + + +float +Experiment::parse_real( const char* s ) +{ + float result = 0; + bool decimal = false; + float power = 1; + + int len = strlen( s ); + for (int i=0; i < len; i++) { + if ( '0' <= s[i] && s[i] <= '9' ) { + if (! decimal) { + result = result * 10 + s[i] - '0'; + } else { + power = power / 10; + result = result + (s[i] - '0') * power; + } + } else if ( '.' == s[i] ) { + decimal = true; + } else { + break; + } + } + + return result; +} + +void +Experiment::alloc_local() +{ + for (int i=0; i < this->num_threads; i++) { + this->thread_domain[i] = i % this->num_numa_domains; + for (int j=0; j < this->chains_per_thread; j++) { + this->chain_domain[i][j] = this->thread_domain[i]; + } + } +} + +void +Experiment::alloc_xor() +{ + for (int i=0; i < this->num_threads; i++) { + this->thread_domain[i] = i % this->num_numa_domains; + for (int j=0; j < this->chains_per_thread; j++) { + this->chain_domain[i][j] = (this->thread_domain[i] ^ this->offset_or_mask) % this->num_numa_domains; + } + } +} + +void +Experiment::alloc_add() +{ + for (int i=0; i < this->num_threads; i++) { + this->thread_domain[i] = i % this->num_numa_domains; + for (int j=0; j < this->chains_per_thread; j++) { + this->chain_domain[i][j] = (this->thread_domain[i] + this->offset_or_mask) % this->num_numa_domains; + } + } +} + + // DOES NOT HANDLE ILL-FORMED SPECIFICATIONS +void +Experiment::alloc_map() +{ + // STRICT -- fail if specifications are inconsistent + + // maps look like "t1:c11,c12,...,c1m;t2:c21,...,c2m;...;tn:cn1,...,cnm" + // where t[i] is the thread domain of the ith thread, + // and c[i][j] is the chain domain of the jth chain in the ith thread + + // count the thread descriptors by counting ";" up to EOS + int threads = 1; + char *p = this->placement_map; + while (*p != '\0') { + if (*p == ';') threads += 1; + p++; + } + int thread_domain[ threads ]; + + // count the chain descriptors by counting "," up to ";" or EOS + int chains = 1; + p = this->placement_map; + while (*p != '\0') { + if (*p == ';') break; + if (*p == ',') chains += 1; + p++; + } + int chain_domain [ threads ][ chains ]; + + int t=0, c=0; + p = this->placement_map; + while (*p != '\0') { + // everything up to ":" is the thread domain + int i = 0; + char buf[64]; + while (*p != '\0') { + if (*p == ':') { p++; break; } + buf[i] = *p; + i++; + p++; + } + buf[i] = '\0'; + thread_domain[t] = Experiment::parse_number(buf); + + // search for one or several ',' + c = 0; + while (*p != '\0' && *p != ';') { + if (chains <= c || threads <= t) { + // error in the thread/chain specification + fprintf(stderr, "Malformed map.\n"); + exit(1); + } + int i = 0; + while (*p != '\0' && *p != ';') { + if (*p == ',') { p++; break; } + buf[i] = *p; + i++; + p++; + } + buf[i] = '\0'; + chain_domain[t][c] = Experiment::parse_number(buf); + c++; + } + + if (*p == '\0') break; + if (*p == ';') p++; + t++; + } + + + this->num_threads = threads; + this->chains_per_thread = chains; + + this->thread_domain = new int32 [ this->num_threads ]; + this->chain_domain = new int32*[ this->num_threads ]; + this->random_state = new char* [ this->num_threads ]; + + for (int i=0; i < this->num_threads; i++) { + this->thread_domain[i] = thread_domain[i] % this->num_numa_domains; + + const int state_size = 256; + this->random_state[i] = new char[state_size]; + initstate((unsigned int) i, (char *) this->random_state[i], (size_t) state_size); + + this->chain_domain[i] = new int32 [ this->chains_per_thread ]; + for (int j=0; j < this->chains_per_thread; j++) { + this->chain_domain[i][j] = chain_domain[i][j] % this->num_numa_domains; + } + } + + this->bytes_per_thread = this->bytes_per_chain * this->chains_per_thread; + this->bytes_per_test = this->bytes_per_thread * this->num_threads; +} + +#include "Chain.h" + +void +Experiment::print() +{ + printf("strict = %d\n", strict); + printf("pointer_size = %d\n", pointer_size); + printf("sizeof(Chain) = %d\n", sizeof(Chain)); + printf("sizeof(Chain *) = %d\n", sizeof(Chain *)); + printf("bytes_per_line = %d\n", bytes_per_line); + printf("links_per_line = %d\n", links_per_line); + printf("bytes_per_page = %d\n", bytes_per_page); + printf("lines_per_page = %d\n", lines_per_page); + printf("links_per_page = %d\n", links_per_page); + printf("bytes_per_chain = %d\n", bytes_per_chain); + printf("lines_per_chain = %d\n", lines_per_chain); + printf("links_per_chain = %d\n", links_per_chain); + printf("pages_per_chain = %d\n", pages_per_chain); + printf("chains_per_thread = %d\n", chains_per_thread); + printf("bytes_per_thread = %d\n", bytes_per_thread); + printf("num_threads = %d\n", num_threads); + printf("bytes_per_test = %d\n", bytes_per_test); + printf("iterations = %d\n", iterations); + printf("experiments = %d\n", experiments); + printf("access_pattern = %d\n", access_pattern); + printf("stride = %d\n", stride); + printf("output_mode = %d\n", output_mode); + printf("numa_placement = %d\n", numa_placement); + printf("offset_or_mask = %d\n", offset_or_mask); + printf("numa_max_domain = %d\n", numa_max_domain); + printf("num_numa_domains = %d\n", num_numa_domains); + + for (int i=0; i < this->num_threads; i++) { + printf("%d: ", this->thread_domain[i]); + for (int j=0; j < this->chains_per_thread; j++) { + printf("%d,", this->chain_domain[i][j]); + } + printf("\n"); + } + + fflush(stdout); +} + +const char* +Experiment::access() +{ + const char* result = NULL; + + if (this->access_pattern == RANDOM) { + result = "random"; + } else if (this->access_pattern == STRIDED && 0 < this->stride) { + result = "forward"; + } else if (this->access_pattern == STRIDED && this->stride < 0) { + result = "reverse"; + } else if (this->access_pattern == STREAM) { + result = "stream"; + } + + return result; +} + +const char* +Experiment::placement() +{ + const char* result = NULL; + + if (this->numa_placement == LOCAL) { + result = "local"; + } else if (this->numa_placement == XOR) { + result = "xor"; + } else if (this->numa_placement == ADD) { + result = "add"; + } else if (this->numa_placement == MAP) { + result = "map"; + } + + return result; +} diff --git a/src/Experiment.h b/src/Experiment.h new file mode 100644 index 0000000..2c749d3 --- /dev/null +++ b/src/Experiment.h @@ -0,0 +1,103 @@ +/******************************************************************************* + * Copyright (c) 2006 International Business Machines Corporation. * + * All rights reserved. This program and the accompanying materials * + * are made available under the terms of the Common Public License v1.0 * + * which accompanies this distribution, and is available at * + * http://www.opensource.org/licenses/cpl1.0.php * + * * + * Contributors: * + * Douglas M. Pase - initial API and implementation * + *******************************************************************************/ + + +#if !defined(Experiment_h) +#define Experiment_h + +#include "Chain.h" +#include "Types.h" + +class Experiment { +public: + Experiment(); + ~Experiment(); + + int parse_args(int argc, char* argv[]); + int64 parse_number( const char* s ); + float parse_real( const char* s ); + + const char* placement(); + const char* access(); + + // fundamental parameters + int64 pointer_size; // number of bytes in a pointer + int64 bytes_per_line; // working set cache line size (bytes) + int64 links_per_line; // working set cache line size (links) + int64 bytes_per_page; // working set page size (in bytes) + int64 lines_per_page; // working set page size (in lines) + int64 links_per_page; // working set page size (in links) + int64 bytes_per_chain; // working set chain size (bytes) + int64 lines_per_chain; // working set chain size (lines) + int64 links_per_chain; // working set chain size (links) + int64 pages_per_chain; // working set chain size (pages) + int64 bytes_per_thread; // thread working set size (bytes) + int64 chains_per_thread; // memory loading per thread + int64 num_threads; // number of threads in the experiment + int64 bytes_per_test; // test working set size (bytes) + + float seconds; // number of seconds per experiment + int64 iterations; // number of iterations per experiment + int64 experiments; // number of experiments per test + + enum { CSV, BOTH, HEADER, TABLE } + output_mode; // results output mode + + enum { RANDOM, STRIDED, STREAM } + access_pattern; // memory access pattern + int64 stride; + + enum { LOCAL, XOR, ADD, MAP } + numa_placement; // memory allocation mode + int64 offset_or_mask; + char* placement_map; + + // maps threads and chains to numa domains + int32* thread_domain; // thread_domain[thread] + int32** chain_domain; // chain_domain[thread][chain] + int32 numa_max_domain; // highest numa domain id + int32 num_numa_domains; // number of numa domains + + char** random_state; // random state for each thread + + int strict; // strictly adhere to user input, or fail + + const static int32 DEFAULT_POINTER_SIZE = sizeof(Chain); + const static int32 DEFAULT_BYTES_PER_LINE = 64; + const static int32 DEFAULT_LINKS_PER_LINE = DEFAULT_BYTES_PER_LINE / DEFAULT_POINTER_SIZE; + const static int32 DEFAULT_BYTES_PER_PAGE = 4096; + const static int32 DEFAULT_LINES_PER_PAGE = DEFAULT_BYTES_PER_PAGE / DEFAULT_BYTES_PER_LINE; + const static int32 DEFAULT_LINKS_PER_PAGE = DEFAULT_LINES_PER_PAGE * DEFAULT_LINKS_PER_LINE; + const static int32 DEFAULT_PAGES_PER_CHAIN = 4096; + const static int32 DEFAULT_BYTES_PER_CHAIN = DEFAULT_BYTES_PER_PAGE * DEFAULT_PAGES_PER_CHAIN; + const static int32 DEFAULT_LINES_PER_CHAIN = DEFAULT_LINES_PER_PAGE * DEFAULT_PAGES_PER_CHAIN; + const static int32 DEFAULT_LINKS_PER_CHAIN = DEFAULT_LINES_PER_CHAIN * DEFAULT_BYTES_PER_LINE / DEFAULT_POINTER_SIZE; + const static int32 DEFAULT_CHAINS_PER_THREAD = 1; + const static int32 DEFAULT_BYTES_PER_THREAD = DEFAULT_BYTES_PER_CHAIN * DEFAULT_CHAINS_PER_THREAD; + const static int32 DEFAULT_THREADS = 1; + const static int32 DEFAULT_BYTES_PER_TEST = DEFAULT_BYTES_PER_THREAD * DEFAULT_THREADS; + const static int32 DEFAULT_SECONDS = 1; + const static int32 DEFAULT_ITERATIONS = 0; + const static int32 DEFAULT_EXPERIMENTS = 1; + + const static int32 DEFAULT_OUTPUT_MODE = 1; + + void alloc_local(); + void alloc_xor(); + void alloc_add(); + void alloc_map(); + + void print(); + +private: +}; + +#endif diff --git a/src/Lock.cpp b/src/Lock.cpp new file mode 100644 index 0000000..104dc81 --- /dev/null +++ b/src/Lock.cpp @@ -0,0 +1,45 @@ +/******************************************************************************* + * Copyright (c) 2006 International Business Machines Corporation. * + * All rights reserved. This program and the accompanying materials * + * are made available under the terms of the Common Public License v1.0 * + * which accompanies this distribution, and is available at * + * http://www.opensource.org/licenses/cpl1.0.php * + * * + * Contributors: * + * Douglas M. Pase - initial API and implementation * + *******************************************************************************/ + + +#include +#include + +#include "Lock.h" + +Lock::Lock() +{ + pthread_mutex_init( &(this->mutex), NULL ); +} + +Lock::~Lock() +{ + pthread_mutex_destroy( &(this->mutex) ); +} + +void +Lock::lock() +{ + pthread_mutex_lock( &(this->mutex) ); +} + +int +Lock::test() +{ + pthread_mutex_trylock( &(this->mutex) ); +} + +void +Lock::unlock() +{ + pthread_mutex_unlock( &(this->mutex) ); +} + diff --git a/src/Lock.h b/src/Lock.h new file mode 100644 index 0000000..14bf1dc --- /dev/null +++ b/src/Lock.h @@ -0,0 +1,30 @@ +/******************************************************************************* + * Copyright (c) 2006 International Business Machines Corporation. * + * All rights reserved. This program and the accompanying materials * + * are made available under the terms of the Common Public License v1.0 * + * which accompanies this distribution, and is available at * + * http://www.opensource.org/licenses/cpl1.0.php * + * * + * Contributors: * + * Douglas M. Pase - initial API and implementation * + *******************************************************************************/ + + +#if !defined(Lock_h) +#define Lock_h + +#include + +class Lock { +public: + Lock(); + ~Lock(); + void lock(); + int test(); + void unlock(); + +private: + pthread_mutex_t mutex; +}; + +#endif diff --git a/src/Main.c b/src/Main.c new file mode 100644 index 0000000..5d5f243 --- /dev/null +++ b/src/Main.c @@ -0,0 +1,90 @@ +/******************************************************************************* + * Copyright (c) 2006 International Business Machines Corporation. * + * All rights reserved. This program and the accompanying materials * + * are made available under the terms of the Common Public License v1.0 * + * which accompanies this distribution, and is available at * + * http://www.opensource.org/licenses/cpl1.0.php * + * * + * Contributors: * + * Douglas M. Pase - initial API and implementation * + *******************************************************************************/ + + +#include + +#include "Main.h" + +#include "Run.h" +#include "Timer.h" +#include "Types.h" +#include "Output.h" +#include "Experiment.h" +#include "SpinBarrier.h" + + // This program allocates and accesses + // a number of blocks of memory, one or more + // for each thread that executes. Blocks + // are divided into sub-blocks called + // pages, and pages are divided into + // sub-blocks called cache lines. + // + // All pages are collected into a list. + // Pages are selected for the list in + // a particular order. Each cache line + // within the page is similarly gathered + // into a list in a particular order. + // In both cases the order may be random + // or linear. + // + // A root pointer points to the first + // cache line. A pointer in the cache + // line points to the next cache line, + // which contains a pointer to the cache + // line after that, and so on. This + // forms a pointer chain that touches all + // cache lines within the first page, + // then all cache lines within the second + // page, and so on until all pages are + // covered. The last pointer contains + // NULL, terminating the chain. + // + // Depending on compile-time options, + // pointers may be 32-bit or 64-bit + // pointers. + +int verbose = 0; + +int +main( int argc, char* argv[] ) +{ + Timer::calibrate(10000); + double clk_res = Timer::resolution(); + + Experiment e; + if (e.parse_args(argc, argv)) { + return 0; + } + +#if defined(UNDEFINED) + e.print(); + if (argv != NULL) return 0; +#endif + + SpinBarrier sb( e.num_threads ); + Run r[ e.num_threads ]; + for (int i=0; i < e.num_threads; i++) { + r[i].set( e, &sb ); + r[i].start(); + } + + for (int i=0; i < e.num_threads; i++) { + r[i].wait(); + } + + int64 ops = Run::ops_per_chain(); + double secs = Run::seconds(); + + Output::print(e, ops, secs, clk_res); + + return 0; +} diff --git a/src/Main.cpp b/src/Main.cpp new file mode 100644 index 0000000..ebd276a --- /dev/null +++ b/src/Main.cpp @@ -0,0 +1,89 @@ +/******************************************************************************* + * Copyright (c) 2006 International Business Machines Corporation. * + * All rights reserved. This program and the accompanying materials * + * are made available under the terms of the Common Public License v1.0 * + * which accompanies this distribution, and is available at * + * http://www.opensource.org/licenses/cpl1.0.php * + * * + * Contributors: * + * Douglas M. Pase - initial API and implementation * + *******************************************************************************/ + + +#include + +#include "Main.h" + +#include "Run.h" +#include "Timer.h" +#include "Types.h" +#include "Output.h" +#include "Experiment.h" + + // This program allocates and accesses + // a number of blocks of memory, one or more + // for each thread that executes. Blocks + // are divided into sub-blocks called + // pages, and pages are divided into + // sub-blocks called cache lines. + // + // All pages are collected into a list. + // Pages are selected for the list in + // a particular order. Each cache line + // within the page is similarly gathered + // into a list in a particular order. + // In both cases the order may be random + // or linear. + // + // A root pointer points to the first + // cache line. A pointer in the cache + // line points to the next cache line, + // which contains a pointer to the cache + // line after that, and so on. This + // forms a pointer chain that touches all + // cache lines within the first page, + // then all cache lines within the second + // page, and so on until all pages are + // covered. The last pointer contains + // NULL, terminating the chain. + // + // Depending on compile-time options, + // pointers may be 32-bit or 64-bit + // pointers. + +int verbose = 0; + +int +main( int argc, char* argv[] ) +{ + Timer::calibrate(10000); + double clk_res = Timer::resolution(); + + Experiment e; + if (e.parse_args(argc, argv)) { + return 0; + } + +#if defined(UNDEFINED) + e.print(); + if (argv != NULL) return 0; +#endif + + SpinBarrier sb( e.num_threads ); + Run r[ e.num_threads ]; + for (int i=0; i < e.num_threads; i++) { + r[i].set( e, &sb ); + r[i].start(); + } + + for (int i=0; i < e.num_threads; i++) { + r[i].wait(); + } + + int64 ops = Run::ops_per_chain(); + double secs = Run::seconds(); + + Output::print(e, ops, secs, clk_res); + + return 0; +} diff --git a/src/Main.h b/src/Main.h new file mode 100644 index 0000000..1492291 --- /dev/null +++ b/src/Main.h @@ -0,0 +1,18 @@ +/******************************************************************************* + * Copyright (c) 2006 International Business Machines Corporation. * + * All rights reserved. This program and the accompanying materials * + * are made available under the terms of the Common Public License v1.0 * + * which accompanies this distribution, and is available at * + * http://www.opensource.org/licenses/cpl1.0.php * + * * + * Contributors: * + * Douglas M. Pase - initial API and implementation * + *******************************************************************************/ + + +#if !defined(Main_h) +#define Main_h + +extern int verbose; + +#endif diff --git a/src/Output.cpp b/src/Output.cpp new file mode 100644 index 0000000..9f9c09a --- /dev/null +++ b/src/Output.cpp @@ -0,0 +1,153 @@ +/******************************************************************************* + * Copyright (c) 2006 International Business Machines Corporation. * + * All rights reserved. This program and the accompanying materials * + * are made available under the terms of the Common Public License v1.0 * + * which accompanies this distribution, and is available at * + * http://www.opensource.org/licenses/cpl1.0.php * + * * + * Contributors: * + * Douglas M. Pase - initial API and implementation * + *******************************************************************************/ + + +#include +#include +#include + +#include "Output.h" + +#include "Types.h" +#include "Experiment.h" + + +void +Output::print( Experiment &e, int64 ops, double secs, double ck_res ) +{ + if (e.output_mode == Experiment::CSV) { + Output::csv(e, ops, secs, ck_res); + } else if (e.output_mode == Experiment::BOTH) { + Output::header(e, ops, secs, ck_res); + Output::csv(e, ops, secs, ck_res); + } else if (e.output_mode == Experiment::HEADER) { + Output::header(e, ops, secs, ck_res); + } else { + Output::table(e, ops, secs, ck_res); + } +} + +void +Output::header( Experiment &e, int64 ops, double secs, double ck_res ) +{ + printf("pointer size (bytes),"); + printf("cache line size (bytes),"); + printf("page size (bytes),"); + printf("chain size (bytes),"); + printf("thread size (bytes),"); + printf("test size (bytes),"); + printf("chains per thread,"); + printf("number of threads,"); + printf("iterations,"); + printf("experiments,"); + printf("access pattern,"); + printf("stride,"); + printf("numa placement,"); + printf("offset or mask,"); + printf("numa domains,"); + printf("domain map,"); + printf("operations per chain,"); + printf("total operations,"); + printf("elapsed time (seconds),"); + printf("elapsed time (timer ticks),"); + printf("clock resolution (ns),", ck_res * 1E9); + printf("memory latency (ns),"); + printf("memory bandwidth (MB/s)\n"); + + fflush(stdout); +} + +void +Output::csv( Experiment &e, int64 ops, double secs, double ck_res ) +{ + printf("%ld,", e.pointer_size); + printf("%ld,", e.bytes_per_line); + printf("%ld,", e.bytes_per_page); + printf("%ld,", e.bytes_per_chain); + printf("%ld,", e.bytes_per_thread); + printf("%ld,", e.bytes_per_test); + printf("%lld,", e.chains_per_thread); + printf("%ld,", e.num_threads); + printf("%ld,", e.iterations); + printf("%ld,", e.experiments); + printf("%s,", e.access()); + printf("%ld,", e.stride); + printf("%s,", e.placement()); + printf("%ld,", e.offset_or_mask); + printf("%ld,", e.num_numa_domains); + printf("\""); + printf("%d:", e.thread_domain[0]); + printf("%d", e.chain_domain[0][0]); + for (int j=1; j < e.chains_per_thread; j++) { + printf(",%d", e.chain_domain[0][j]); + } + for (int i=1; i < e.num_threads; i++) { + printf(";%d:", e.thread_domain[i]); + printf("%d", e.chain_domain[i][0]); + for (int j=1; j < e.chains_per_thread; j++) { + printf(",%d", e.chain_domain[i][j]); + } + } + printf("\","); + printf("%ld,", ops); + printf("%ld,", ops * e.chains_per_thread * e.num_threads); + printf("%.3f,", secs); + printf("%.0f,", secs/ck_res); + printf("%.2f,", ck_res * 1E9); + printf("%.2f,", (secs / (ops * e.iterations)) * 1E9); + printf("%.3f\n", ((ops * e.iterations * e.chains_per_thread * e.num_threads * e.bytes_per_line) / secs) * 1E-6); + + fflush(stdout); +} + +void +Output::table( Experiment &e, int64 ops, double secs, double ck_res ) +{ + printf("pointer size = %ld (bytes)\n", e.pointer_size); + printf("cache line size = %ld (bytes)\n", e.bytes_per_line); + printf("page size = %ld (bytes)\n", e.bytes_per_page); + printf("chain size = %ld (bytes)\n", e.bytes_per_chain); + printf("thread size = %ld (bytes)\n", e.bytes_per_thread); + printf("test size = %ld (bytes)\n", e.bytes_per_test); + printf("chains per thread = %ld\n", e.chains_per_thread); + printf("number of threads = %ld\n", e.num_threads); + printf("iterations = %ld\n", e.iterations); + printf("experiments = %ld\n", e.experiments); + printf("access pattern = %s\n", e.access()); + printf("stride = %ld\n", e.stride); + printf("numa placement = %s\n", e.placement()); + printf("offset or mask = %ld\n", e.offset_or_mask); + printf("numa domains = %ld\n", e.num_numa_domains); + printf("domain map = "); + printf("\""); + printf("%d:", e.thread_domain[0]); + printf("%d", e.chain_domain[0][0]); + for (int j=1; j < e.chains_per_thread; j++) { + printf(",%d", e.chain_domain[0][j]); + } + for (int i=1; i < e.num_threads; i++) { + printf(";%d:", e.thread_domain[i]); + printf("%d", e.chain_domain[i][0]); + for (int j=1; j < e.chains_per_thread; j++) { + printf(",%d", e.chain_domain[i][j]); + } + } + printf("\"\n"); + printf("operations per chain = %ld\n", ops); + printf("total operations = %ld\n", ops * e.chains_per_thread * e.num_threads); + printf("elapsed time = %.3f (seconds)\n", secs); + printf("elapsed time = %.0f (timer ticks)\n", secs/ck_res); + printf("clock resolution = %.2f (ns)\n", ck_res * 1E9); + printf("memory latency = %.2f (ns)\n", (secs / (ops * e.iterations)) * 1E9); + printf("memory bandwidth = %.3f (MB/s)\n", ((ops * e.iterations * e.chains_per_thread * e.num_threads * e.bytes_per_line) / secs) * 1E-6); + + fflush(stdout); +} diff --git a/src/Output.h b/src/Output.h new file mode 100644 index 0000000..9ee2c80 --- /dev/null +++ b/src/Output.h @@ -0,0 +1,28 @@ +/******************************************************************************* + * Copyright (c) 2006 International Business Machines Corporation. * + * All rights reserved. This program and the accompanying materials * + * are made available under the terms of the Common Public License v1.0 * + * which accompanies this distribution, and is available at * + * http://www.opensource.org/licenses/cpl1.0.php * + * * + * Contributors: * + * Douglas M. Pase - initial API and implementation * + *******************************************************************************/ + + +#if !defined(Output_h) +#define Output_h + +#include "Types.h" +#include "Experiment.h" + +class Output { +public: + static void print ( Experiment &e, int64 ops, double secs, double ck_res ); + static void header( Experiment &e, int64 ops, double secs, double ck_res ); + static void csv ( Experiment &e, int64 ops, double secs, double ck_res ); + static void table ( Experiment &e, int64 ops, double secs, double ck_res ); +private: +}; + +#endif diff --git a/src/Run.cpp b/src/Run.cpp new file mode 100644 index 0000000..4fb8057 --- /dev/null +++ b/src/Run.cpp @@ -0,0 +1,1206 @@ +/******************************************************************************* + * Copyright (c) 2006 International Business Machines Corporation. * + * All rights reserved. This program and the accompanying materials * + * are made available under the terms of the Common Public License v1.0 * + * which accompanies this distribution, and is available at * + * http://www.opensource.org/licenses/cpl1.0.php * + * * + * Contributors: * + * Douglas M. Pase - initial API and implementation * + *******************************************************************************/ + + +#include +#include +#include + +#if defined(NUMA) +#include +#endif + +#include "Run.h" + +#include "Chain.h" +#include "Timer.h" +#include "SpinBarrier.h" + + +static double max( double v1, double v2 ); +static double min( double v1, double v2 ); +static void chase_pointers(int64 chains_per_thread, int64 iterations, Chain** root, int64 bytes_per_line, int64 bytes_per_chain, int64 stride); +static void follow_streams(int64 chains_per_thread, int64 iterations, Chain** root, int64 bytes_per_line, int64 bytes_per_chain, int64 stride); +static void (*run_benchmark)(int64 chains_per_thread, int64 iterations, Chain** root, int64 bytes_per_line, int64 bytes_per_chain, int64 stride) = chase_pointers; + +Lock Run::global_mutex; +int64 Run::_ops_per_chain = 0; +double Run::_seconds = 1E9; + +Run::Run() +: exp(NULL), bp(NULL) +{ +} + +Run::~Run() +{ +} + +void +Run::set( Experiment &e, SpinBarrier* sbp ) +{ + this->exp = &e; + this->bp = sbp; +} + +int +Run::run() +{ + // first allocate all memory for the chains, + // making sure it is allocated within the + // intended numa domains + Chain** chain_memory = new Chain* [ this->exp->chains_per_thread ]; + Chain** root = new Chain* [ this->exp->chains_per_thread ]; + +#if defined(NUMA) + // establish the node id where this thread + // will run. threads are mapped to nodes + // by the set-up code for Experiment. + int run_node_id = this->exp->thread_domain[this->thread_id()]; + numa_run_on_node(run_node_id); + + // establish the node id where this thread's + // memory will be allocated. + for (int i=0; i < this->exp->chains_per_thread; i++) { + int alloc_node_id = this->exp->chain_domain[this->thread_id()][i]; + nodemask_t alloc_mask; + nodemask_zero(&alloc_mask); + nodemask_set(&alloc_mask, alloc_node_id); + numa_set_membind(&alloc_mask); + + chain_memory[i] = new Chain[ this->exp->links_per_chain ]; + } +#else + for (int i=0; i < this->exp->chains_per_thread; i++) { + chain_memory[i] = new Chain[ this->exp->links_per_chain ]; + } +#endif + + // initialize the chains and + // select the function that + // will execute the tests + for (int i=0; i < this->exp->chains_per_thread; i++) { + if (this->exp->access_pattern == Experiment::RANDOM) { + root[i] = random_mem_init( chain_memory[i] ); + run_benchmark = chase_pointers; + } else if (this->exp->access_pattern == Experiment::STRIDED) { + if (0 < this->exp->stride) { + root[i] = forward_mem_init( chain_memory[i] ); + } else { + root[i] = reverse_mem_init( chain_memory[i] ); + } + run_benchmark = chase_pointers; + } else if (this->exp->access_pattern == Experiment::STREAM) { + root[i] = stream_mem_init( chain_memory[i] ); + run_benchmark = follow_streams; + } + } + + if (this->exp->iterations <= 0) { + volatile static double istart = 0; + volatile static double istop = 0; + volatile static double elapsed = 0; + volatile static int64 iters = 1; + volatile double bound = max(0.2, 10 * Timer::resolution()); + for (iters=1; elapsed <= bound; iters=iters<<1) { + this->bp->barrier(); + + // start timer + if (this->thread_id() == 0) { + istart = Timer::seconds(); + } + this->bp->barrier(); + + // chase pointers + run_benchmark(this->exp->chains_per_thread, iters, root, this->exp->bytes_per_line, this->exp->bytes_per_chain, this->exp->stride); + + // barrier + this->bp->barrier(); + + // stop timer + if (this->thread_id() == 0) { + istop = Timer::seconds(); + elapsed = istop - istart; + } + this->bp->barrier(); + } + + // calculate the number of iterations + if (this->thread_id() == 0) { + if (0 < this->exp->seconds) { + this->exp->iterations = max(1, 0.9999 + 0.5 * this->exp->seconds * iters / elapsed); + } else { + this->exp->iterations = max(1, 0.9999 + iters / elapsed); + } + } + this->bp->barrier(); + } +#if defined(UNDEFINED) +#endif + + // barrier + for (int e=0; e < this->exp->experiments; e++) { + this->bp->barrier(); + + // start timer + double start = 0; + if (this->thread_id() == 0) start = Timer::seconds(); + this->bp->barrier(); + + // chase pointers + run_benchmark(this->exp->chains_per_thread, this->exp->iterations, root, this->exp->bytes_per_line, this->exp->bytes_per_chain, this->exp->stride); + + // barrier + this->bp->barrier(); + + // stop timer + double stop = 0; + if (this->thread_id() == 0) stop = Timer::seconds(); + this->bp->barrier(); + + if (0 <= e) { + if (this->thread_id() == 0) { + double delta = stop - start; + if (0 < delta) { + Run::_seconds = min( Run::_seconds, delta ); + } + } + } + } + + this->bp->barrier(); + + for (int i=0; i < this->exp->chains_per_thread; i++) { + if (chain_memory[i] != NULL) delete [] chain_memory[i]; + } + if (chain_memory != NULL) delete [] chain_memory; + + return 0; +} + +int dummy = 0; +void +Run::mem_check( Chain *m ) +{ + if (m == NULL) dummy += 1; +} + +static double +max( double v1, double v2 ) +{ + if (v1 < v2) return v2; + return v1; +} + +static double +min( double v1, double v2 ) +{ + if (v2 < v1) return v2; + return v1; +} + + // exclude 2 and mersienne primes, i.e., + // primes of the form 2**n - 1, e.g., + // 3, 7, 31, 127 +static const int prime_table[] = { 5, 11, 13, 17, 19, 23, 37, 41, 43, 47, + 53, 61, 71, 73, 79, 83, 89, 97, 101, 103, 109, 113, 131, 137, 139, 149, + 151, 157, 163, }; +static const int prime_table_size = sizeof prime_table / sizeof prime_table[0]; + +Chain* +Run::random_mem_init( Chain *mem ) +{ + // initialize pointers -- + // choose a page at random, then use + // one pointer from each cache line + // within the page. all pages and + // cache lines are chosen at random. + Chain* root = NULL; + Chain* prev = NULL; + int link_within_line = 0; + int64 local_ops_per_chain = 0; + + // we must set a lock because random() + // is not thread safe + Run::global_mutex.lock(); + setstate(this->exp->random_state[this->thread_id()]); + int page_factor = prime_table[ random() % prime_table_size ]; + int page_offset = random() % this->exp->pages_per_chain; + Run::global_mutex.unlock(); + + // loop through the pages + for (int i=0; i < this->exp->pages_per_chain; i++) { + int page = (page_factor * i + page_offset) % this->exp->pages_per_chain; + Run::global_mutex.lock(); + setstate(this->exp->random_state[this->thread_id()]); + int line_factor = prime_table[ random() % prime_table_size ]; + int line_offset = random() % this->exp->lines_per_page; + Run::global_mutex.unlock(); + + // loop through the lines within a page + for (int j=0; j < this->exp->lines_per_page; j++) { + int line_within_page = (line_factor * j + line_offset) % this->exp->lines_per_page; + int link = page * this->exp->links_per_page + line_within_page * this->exp->links_per_line + link_within_line; + + if (root == NULL) { +// printf("root = %d(%d)[0x%x].\n", page, line_within_page, mem+link); + prev = root = mem + link; + local_ops_per_chain += 1; + } else { +// printf("0x%x = %d(%d)[0x%x].\n", prev, page, line_within_page, mem+link); + prev->next = mem + link; + prev = prev->next; + local_ops_per_chain += 1; + } + } + } + + Run::global_mutex.lock(); + Run::_ops_per_chain = local_ops_per_chain; + Run::global_mutex.unlock(); + + return root; +} + +Chain* +Run::forward_mem_init( Chain *mem ) +{ + Chain* root = NULL; + Chain* prev = NULL; + int link_within_line = 0; + int64 local_ops_per_chain = 0; + + for (int i=0; i < this->exp->lines_per_chain; i += this->exp->stride) { + int link = i * this->exp->links_per_line + link_within_line; + if (root == NULL) { +// printf("root = %d(%d)[0x%x].\n", page, line_within_page, mem+link); + prev = root = mem + link; + local_ops_per_chain += 1; + } else { +// printf("0x%x = %d(%d)[0x%x].\n", prev, page, line_within_page, mem+link); + prev->next = mem + link; + prev = prev->next; + local_ops_per_chain += 1; + } + } + + Run::global_mutex.lock(); + Run::_ops_per_chain = local_ops_per_chain; + Run::global_mutex.unlock(); + + return root; +} + +Chain* +Run::reverse_mem_init( Chain *mem ) +{ + Chain* root = NULL; + Chain* prev = NULL; + int link_within_line = 0; + int64 local_ops_per_chain = 0; + + int stride = -this->exp->stride; + int last; + for (int i=0; i < this->exp->lines_per_chain; i += stride) { + last = i; + } + + for (int i=last; 0 <= i; i -= stride) { + int link = i * this->exp->links_per_line + link_within_line; + if (root == NULL) { +// printf("root = %d(%d)[0x%x].\n", page, line_within_page, mem+link); + prev = root = mem + link; + local_ops_per_chain += 1; + } else { +// printf("0x%x = %d(%d)[0x%x].\n", prev, page, line_within_page, mem+link); + prev->next = mem + link; + prev = prev->next; + local_ops_per_chain += 1; + } + } + + Run::global_mutex.lock(); + Run::_ops_per_chain = local_ops_per_chain; + Run::global_mutex.unlock(); + + return root; +} + +static int64 dumb_ck = 0; +void +mem_chk( Chain *m ) +{ + if (m == NULL) dumb_ck += 1; +} + +static void +chase_pointers( + int64 chains_per_thread, // memory loading per thread + int64 iterations, // number of iterations per experiment + Chain** root, // root(s) of the chain(s) to follow + int64 bytes_per_line, // ignored + int64 bytes_per_chain, // ignored + int64 stride // ignored +) +{ + // chase pointers + switch (chains_per_thread) { + default: + case 1: + for (int64 i=0; i < iterations; i++) { + Chain* a = root[0]; + while (a != NULL) { + a = a->next; + } + mem_chk( a ); + } + break; + case 2: + for (int64 i=0; i < iterations; i++) { + Chain* a = root[0]; + Chain* b = root[1]; + while (a != NULL) { + a = a->next; + b = b->next; + } + mem_chk( a ); + mem_chk( b ); + } + break; + case 3: + for (int64 i=0; i < iterations; i++) { + Chain* a = root[0]; + Chain* b = root[1]; + Chain* c = root[2]; + while (a != NULL) { + a = a->next; + b = b->next; + c = c->next; + } + mem_chk( a ); + mem_chk( b ); + mem_chk( c ); + } + break; + case 4: + for (int64 i=0; i < iterations; i++) { + Chain* a = root[0]; + Chain* b = root[1]; + Chain* c = root[2]; + Chain* d = root[3]; + while (a != NULL) { + a = a->next; + b = b->next; + c = c->next; + d = d->next; + } + mem_chk( a ); + mem_chk( b ); + mem_chk( c ); + mem_chk( d ); + } + break; + case 5: + for (int64 i=0; i < iterations; i++) { + Chain* a = root[0]; + Chain* b = root[1]; + Chain* c = root[2]; + Chain* d = root[3]; + Chain* e = root[4]; + while (a != NULL) { + a = a->next; + b = b->next; + c = c->next; + d = d->next; + e = e->next; + } + mem_chk( a ); + mem_chk( b ); + mem_chk( c ); + mem_chk( d ); + mem_chk( e ); + } + break; + case 6: + for (int64 i=0; i < iterations; i++) { + Chain* a = root[0]; + Chain* b = root[1]; + Chain* c = root[2]; + Chain* d = root[3]; + Chain* e = root[4]; + Chain* f = root[5]; + while (a != NULL) { + a = a->next; + b = b->next; + c = c->next; + d = d->next; + e = e->next; + f = f->next; + } + mem_chk( a ); + mem_chk( b ); + mem_chk( c ); + mem_chk( d ); + mem_chk( e ); + mem_chk( f ); + } + break; + case 7: + for (int64 i=0; i < iterations; i++) { + Chain* a = root[0]; + Chain* b = root[1]; + Chain* c = root[2]; + Chain* d = root[3]; + Chain* e = root[4]; + Chain* f = root[5]; + Chain* g = root[6]; + while (a != NULL) { + a = a->next; + b = b->next; + c = c->next; + d = d->next; + e = e->next; + f = f->next; + g = g->next; + } + mem_chk( a ); + mem_chk( b ); + mem_chk( c ); + mem_chk( d ); + mem_chk( e ); + mem_chk( f ); + mem_chk( g ); + } + break; + case 8: + for (int64 i=0; i < iterations; i++) { + Chain* a = root[0]; + Chain* b = root[1]; + Chain* c = root[2]; + Chain* d = root[3]; + Chain* e = root[4]; + Chain* f = root[5]; + Chain* g = root[6]; + Chain* h = root[7]; + while (a != NULL) { + a = a->next; + b = b->next; + c = c->next; + d = d->next; + e = e->next; + f = f->next; + g = g->next; + h = h->next; + } + mem_chk( a ); + mem_chk( b ); + mem_chk( c ); + mem_chk( d ); + mem_chk( e ); + mem_chk( f ); + mem_chk( g ); + mem_chk( h ); + } + break; + case 9: + for (int64 i=0; i < iterations; i++) { + Chain* a = root[0]; + Chain* b = root[1]; + Chain* c = root[2]; + Chain* d = root[3]; + Chain* e = root[4]; + Chain* f = root[5]; + Chain* g = root[6]; + Chain* h = root[7]; + Chain* j = root[8]; + while (a != NULL) { + a = a->next; + b = b->next; + c = c->next; + d = d->next; + e = e->next; + f = f->next; + g = g->next; + h = h->next; + j = j->next; + } + mem_chk( a ); + mem_chk( b ); + mem_chk( c ); + mem_chk( d ); + mem_chk( e ); + mem_chk( f ); + mem_chk( g ); + mem_chk( h ); + mem_chk( j ); + } + break; + case 10: + for (int64 i=0; i < iterations; i++) { + Chain* a = root[0]; + Chain* b = root[1]; + Chain* c = root[2]; + Chain* d = root[3]; + Chain* e = root[4]; + Chain* f = root[5]; + Chain* g = root[6]; + Chain* h = root[7]; + Chain* j = root[8]; + Chain* k = root[9]; + while (a != NULL) { + a = a->next; + b = b->next; + c = c->next; + d = d->next; + e = e->next; + f = f->next; + g = g->next; + h = h->next; + j = j->next; + k = k->next; + } + mem_chk( a ); + mem_chk( b ); + mem_chk( c ); + mem_chk( d ); + mem_chk( e ); + mem_chk( f ); + mem_chk( g ); + mem_chk( h ); + mem_chk( j ); + mem_chk( k ); + } + break; + case 11: + for (int64 i=0; i < iterations; i++) { + Chain* a = root[0]; + Chain* b = root[1]; + Chain* c = root[2]; + Chain* d = root[3]; + Chain* e = root[4]; + Chain* f = root[5]; + Chain* g = root[6]; + Chain* h = root[7]; + Chain* j = root[8]; + Chain* k = root[9]; + Chain* l = root[10]; + while (a != NULL) { + a = a->next; + b = b->next; + c = c->next; + d = d->next; + e = e->next; + f = f->next; + g = g->next; + h = h->next; + j = j->next; + k = k->next; + l = l->next; + } + mem_chk( a ); + mem_chk( b ); + mem_chk( c ); + mem_chk( d ); + mem_chk( e ); + mem_chk( f ); + mem_chk( g ); + mem_chk( h ); + mem_chk( j ); + mem_chk( k ); + mem_chk( l ); + } + break; + case 12: + for (int64 i=0; i < iterations; i++) { + Chain* a = root[0]; + Chain* b = root[1]; + Chain* c = root[2]; + Chain* d = root[3]; + Chain* e = root[4]; + Chain* f = root[5]; + Chain* g = root[6]; + Chain* h = root[7]; + Chain* j = root[8]; + Chain* k = root[9]; + Chain* l = root[10]; + Chain* m = root[11]; + while (a != NULL) { + a = a->next; + b = b->next; + c = c->next; + d = d->next; + e = e->next; + f = f->next; + g = g->next; + h = h->next; + j = j->next; + k = k->next; + l = l->next; + m = m->next; + } + mem_chk( a ); + mem_chk( b ); + mem_chk( c ); + mem_chk( d ); + mem_chk( e ); + mem_chk( f ); + mem_chk( g ); + mem_chk( h ); + mem_chk( j ); + mem_chk( k ); + mem_chk( l ); + mem_chk( m ); + } + break; + case 13: + for (int64 i=0; i < iterations; i++) { + Chain* a = root[0]; + Chain* b = root[1]; + Chain* c = root[2]; + Chain* d = root[3]; + Chain* e = root[4]; + Chain* f = root[5]; + Chain* g = root[6]; + Chain* h = root[7]; + Chain* j = root[8]; + Chain* k = root[9]; + Chain* l = root[10]; + Chain* m = root[11]; + Chain* n = root[12]; + while (a != NULL) { + a = a->next; + b = b->next; + c = c->next; + d = d->next; + e = e->next; + f = f->next; + g = g->next; + h = h->next; + j = j->next; + k = k->next; + l = l->next; + m = m->next; + n = n->next; + } + mem_chk( a ); + mem_chk( b ); + mem_chk( c ); + mem_chk( d ); + mem_chk( e ); + mem_chk( f ); + mem_chk( g ); + mem_chk( h ); + mem_chk( j ); + mem_chk( k ); + mem_chk( l ); + mem_chk( m ); + mem_chk( n ); + } + break; + case 14: + for (int64 i=0; i < iterations; i++) { + Chain* a = root[0]; + Chain* b = root[1]; + Chain* c = root[2]; + Chain* d = root[3]; + Chain* e = root[4]; + Chain* f = root[5]; + Chain* g = root[6]; + Chain* h = root[7]; + Chain* j = root[8]; + Chain* k = root[9]; + Chain* l = root[10]; + Chain* m = root[11]; + Chain* n = root[12]; + Chain* o = root[13]; + while (a != NULL) { + a = a->next; + b = b->next; + c = c->next; + d = d->next; + e = e->next; + f = f->next; + g = g->next; + h = h->next; + j = j->next; + k = k->next; + l = l->next; + m = m->next; + n = n->next; + o = o->next; + } + mem_chk( a ); + mem_chk( b ); + mem_chk( c ); + mem_chk( d ); + mem_chk( e ); + mem_chk( f ); + mem_chk( g ); + mem_chk( h ); + mem_chk( j ); + mem_chk( k ); + mem_chk( l ); + mem_chk( m ); + mem_chk( n ); + mem_chk( o ); + } + break; + case 15: + for (int64 i=0; i < iterations; i++) { + Chain* a = root[0]; + Chain* b = root[1]; + Chain* c = root[2]; + Chain* d = root[3]; + Chain* e = root[4]; + Chain* f = root[5]; + Chain* g = root[6]; + Chain* h = root[7]; + Chain* j = root[8]; + Chain* k = root[9]; + Chain* l = root[10]; + Chain* m = root[11]; + Chain* n = root[12]; + Chain* o = root[13]; + Chain* p = root[14]; + while (a != NULL) { + a = a->next; + b = b->next; + c = c->next; + d = d->next; + e = e->next; + f = f->next; + g = g->next; + h = h->next; + j = j->next; + k = k->next; + l = l->next; + m = m->next; + n = n->next; + o = o->next; + p = p->next; + } + mem_chk( a ); + mem_chk( b ); + mem_chk( c ); + mem_chk( d ); + mem_chk( e ); + mem_chk( f ); + mem_chk( g ); + mem_chk( h ); + mem_chk( j ); + mem_chk( k ); + mem_chk( l ); + mem_chk( m ); + mem_chk( n ); + mem_chk( o ); + mem_chk( p ); + } + break; + case 16: + for (int64 i=0; i < iterations; i++) { + Chain* a = root[0]; + Chain* b = root[1]; + Chain* c = root[2]; + Chain* d = root[3]; + Chain* e = root[4]; + Chain* f = root[5]; + Chain* g = root[6]; + Chain* h = root[7]; + Chain* j = root[8]; + Chain* k = root[9]; + Chain* l = root[10]; + Chain* m = root[11]; + Chain* n = root[12]; + Chain* o = root[13]; + Chain* p = root[14]; + Chain* q = root[15]; + while (a != NULL) { + a = a->next; + b = b->next; + c = c->next; + d = d->next; + e = e->next; + f = f->next; + g = g->next; + h = h->next; + j = j->next; + k = k->next; + l = l->next; + m = m->next; + n = n->next; + o = o->next; + p = p->next; + q = q->next; + } + mem_chk( a ); + mem_chk( b ); + mem_chk( c ); + mem_chk( d ); + mem_chk( e ); + mem_chk( f ); + mem_chk( g ); + mem_chk( h ); + mem_chk( j ); + mem_chk( k ); + mem_chk( l ); + mem_chk( m ); + mem_chk( n ); + mem_chk( o ); + mem_chk( p ); + mem_chk( q ); + } + } +} + + // NOT WRITTEN YET -- DMP + // JUST A PLACE HOLDER! +Chain* +Run::stream_mem_init( Chain *mem ) +{ +// fprintf(stderr, "made it into stream_mem_init.\n"); +// fprintf(stderr, "chains_per_thread = %ld\n", this->exp->chains_per_thread); +// fprintf(stderr, "iterations = %ld\n", this->exp->iterations); +// fprintf(stderr, "bytes_per_chain = %ld\n", this->exp->bytes_per_chain); +// fprintf(stderr, "stride = %ld\n", this->exp->stride); + int64 local_ops_per_chain = 0; + double* tmp = (double *) mem; + int64 refs_per_line = this->exp->bytes_per_line / sizeof(double); + int64 refs_per_chain = this->exp->bytes_per_chain / sizeof(double); +// fprintf(stderr, "refs_per_chain = %ld\n", refs_per_chain); + + for (int64 i=0; i < refs_per_chain; i += this->exp->stride*refs_per_line) { + tmp[i] = 0; + local_ops_per_chain += 1; + } + + Run::global_mutex.lock(); + Run::_ops_per_chain = local_ops_per_chain; + Run::global_mutex.unlock(); + +// fprintf(stderr, "made it out of stream_mem_init.\n"); + return mem; +} + +static int64 summ_ck = 0; +void +sum_chk( double t ) +{ + if (t != 0) summ_ck += 1; +} + + // NOT WRITTEN YET -- DMP + // JUST A PLACE HOLDER! +static void +follow_streams( + int64 chains_per_thread, // memory loading per thread + int64 iterations, // number of iterations per experiment + Chain** root, // root(s) of the chain(s) to follow + int64 bytes_per_line, // ignored + int64 bytes_per_chain, // ignored + int64 stride // ignored +) +{ + int64 refs_per_line = bytes_per_line / sizeof(double); + int64 refs_per_chain = bytes_per_chain / sizeof(double); + + // chase pointers + switch (chains_per_thread) { + default: + case 1: + for (int64 i=0; i < iterations; i++) { + double t = 0; + double* a0 = (double *) root[0]; + for (int64 j=0; j < refs_per_chain; j+=stride*refs_per_line) { + t += a0[j]; + } + sum_chk( t ); + } + break; + case 2: + for (int64 i=0; i < iterations; i++) { + double t = 0; + double* a0 = (double *) root[0]; + double* a1 = (double *) root[1]; + for (int64 j=0; j < refs_per_chain; j+=stride*refs_per_line) { + t += a0[j] + a1[j]; + } + sum_chk( t ); + } + break; + case 3: + for (int64 i=0; i < iterations; i++) { + double t = 0; + double* a0 = (double *) root[0]; + double* a1 = (double *) root[1]; + double* a2 = (double *) root[2]; + for (int64 j=0; j < refs_per_chain; j+=stride*refs_per_line) { + t += a0[j] + a1[j] + a2[j]; + } + sum_chk( t ); + } + break; + case 4: + for (int64 i=0; i < iterations; i++) { + double t = 0; + double* a0 = (double *) root[0]; + double* a1 = (double *) root[1]; + double* a2 = (double *) root[2]; + double* a3 = (double *) root[3]; + for (int64 j=0; j < refs_per_chain; j+=stride*refs_per_line) { + t += a0[j] + a1[j] + a2[j] + a3[j]; + } + sum_chk( t ); + } + break; + case 5: + for (int64 i=0; i < iterations; i++) { + double t = 0; + double* a0 = (double *) root[0]; + double* a1 = (double *) root[1]; + double* a2 = (double *) root[2]; + double* a3 = (double *) root[3]; + double* a4 = (double *) root[4]; + for (int64 j=0; j < refs_per_chain; j+=stride*refs_per_line) { + t += a0[j] + a1[j] + a2[j] + a3[j] + a4[j]; + } + sum_chk( t ); + } + break; + case 6: + for (int64 i=0; i < iterations; i++) { + double t = 0; + double* a0 = (double *) root[0]; + double* a1 = (double *) root[1]; + double* a2 = (double *) root[2]; + double* a3 = (double *) root[3]; + double* a4 = (double *) root[4]; + double* a5 = (double *) root[5]; + for (int64 j=0; j < refs_per_chain; j+=stride*refs_per_line) { + t += a0[j] + a1[j] + a2[j] + a3[j] + a4[j] + a5[j]; + } + sum_chk( t ); + } + break; + case 7: + for (int64 i=0; i < iterations; i++) { + double t = 0; + double* a0 = (double *) root[0]; + double* a1 = (double *) root[1]; + double* a2 = (double *) root[2]; + double* a3 = (double *) root[3]; + double* a4 = (double *) root[4]; + double* a5 = (double *) root[5]; + double* a6 = (double *) root[6]; + for (int64 j=0; j < refs_per_chain; j+=stride*refs_per_line) { + t += a0[j] + a1[j] + a2[j] + a3[j] + a4[j] + a5[j] + a6[j]; + } + sum_chk( t ); + } + break; + case 8: + for (int64 i=0; i < iterations; i++) { + double t = 0; + double* a0 = (double *) root[0]; + double* a1 = (double *) root[1]; + double* a2 = (double *) root[2]; + double* a3 = (double *) root[3]; + double* a4 = (double *) root[4]; + double* a5 = (double *) root[5]; + double* a6 = (double *) root[6]; + double* a7 = (double *) root[7]; + for (int64 j=0; j < refs_per_chain; j+=stride*refs_per_line) { + t += a0[j] + a1[j] + a2[j] + a3[j] + a4[j] + a5[j] + a6[j] + a7[j]; + } + sum_chk( t ); + } + break; + case 9: + for (int64 i=0; i < iterations; i++) { + double t = 0; + double* a0 = (double *) root[0]; + double* a1 = (double *) root[1]; + double* a2 = (double *) root[2]; + double* a3 = (double *) root[3]; + double* a4 = (double *) root[4]; + double* a5 = (double *) root[5]; + double* a6 = (double *) root[6]; + double* a7 = (double *) root[7]; + double* a8 = (double *) root[8]; + for (int64 j=0; j < refs_per_chain; j+=stride*refs_per_line) { + t += a0[j] + a1[j] + a2[j] + a3[j] + a4[j] + a5[j] + a6[j] + a7[j] + + a8[j]; + } + sum_chk( t ); + } + break; + case 10: + for (int64 i=0; i < iterations; i++) { + double t = 0; + double* a0 = (double *) root[0]; + double* a1 = (double *) root[1]; + double* a2 = (double *) root[2]; + double* a3 = (double *) root[3]; + double* a4 = (double *) root[4]; + double* a5 = (double *) root[5]; + double* a6 = (double *) root[6]; + double* a7 = (double *) root[7]; + double* a8 = (double *) root[8]; + double* a9 = (double *) root[9]; + for (int64 j=0; j < refs_per_chain; j+=stride*refs_per_line) { + t += a0[j] + a1[j] + a2[j] + a3[j] + a4[j] + a5[j] + a6[j] + a7[j] + + a8[j] + a9[j]; + } + sum_chk( t ); + } + break; + case 11: + for (int64 i=0; i < iterations; i++) { + double t = 0; + double* a0 = (double *) root[ 0]; + double* a1 = (double *) root[ 1]; + double* a2 = (double *) root[ 2]; + double* a3 = (double *) root[ 3]; + double* a4 = (double *) root[ 4]; + double* a5 = (double *) root[ 5]; + double* a6 = (double *) root[ 6]; + double* a7 = (double *) root[ 7]; + double* a8 = (double *) root[ 8]; + double* a9 = (double *) root[ 9]; + double* a10 = (double *) root[10]; + for (int64 j=0; j < refs_per_chain; j+=stride*refs_per_line) { + t += a0[j] + a1[j] + a2 [j] + a3[j] + a4[j] + a5[j] + a6[j] + a7[j] + + a8[j] + a9[j] + a10[j]; + } + sum_chk( t ); + } + break; + case 12: + for (int64 i=0; i < iterations; i++) { + double t = 0; + double* a0 = (double *) root[ 0]; + double* a1 = (double *) root[ 1]; + double* a2 = (double *) root[ 2]; + double* a3 = (double *) root[ 3]; + double* a4 = (double *) root[ 4]; + double* a5 = (double *) root[ 5]; + double* a6 = (double *) root[ 6]; + double* a7 = (double *) root[ 7]; + double* a8 = (double *) root[ 8]; + double* a9 = (double *) root[ 9]; + double* a10 = (double *) root[10]; + double* a11 = (double *) root[11]; + for (int64 j=0; j < refs_per_chain; j+=stride*refs_per_line) { + t += a0[j] + a1[j] + a2 [j] + a3 [j] + a4[j] + a5[j] + a6[j] + a7[j] + + a8[j] + a9[j] + a10[j] + a11[j]; + } + sum_chk( t ); + } + break; + case 13: + for (int64 i=0; i < iterations; i++) { + double t = 0; + double* a0 = (double *) root[ 0]; + double* a1 = (double *) root[ 1]; + double* a2 = (double *) root[ 2]; + double* a3 = (double *) root[ 3]; + double* a4 = (double *) root[ 4]; + double* a5 = (double *) root[ 5]; + double* a6 = (double *) root[ 6]; + double* a7 = (double *) root[ 7]; + double* a8 = (double *) root[ 8]; + double* a9 = (double *) root[ 9]; + double* a10 = (double *) root[10]; + double* a11 = (double *) root[11]; + double* a12 = (double *) root[12]; + for (int64 j=0; j < refs_per_chain; j+=stride*refs_per_line) { + t += a0[j] + a1[j] + a2 [j] + a3 [j] + a4 [j] + a5[j] + a6[j] + a7[j] + + a8[j] + a9[j] + a10[j] + a11[j] + a12[j]; + } + sum_chk( t ); + } + break; + case 14: + for (int64 i=0; i < iterations; i++) { + double t = 0; + double* a0 = (double *) root[ 0]; + double* a1 = (double *) root[ 1]; + double* a2 = (double *) root[ 2]; + double* a3 = (double *) root[ 3]; + double* a4 = (double *) root[ 4]; + double* a5 = (double *) root[ 5]; + double* a6 = (double *) root[ 6]; + double* a7 = (double *) root[ 7]; + double* a8 = (double *) root[ 8]; + double* a9 = (double *) root[ 9]; + double* a10 = (double *) root[10]; + double* a11 = (double *) root[11]; + double* a12 = (double *) root[12]; + double* a13 = (double *) root[13]; + for (int64 j=0; j < refs_per_chain; j+=stride*refs_per_line) { + t += a0[j] + a1[j] + a2 [j] + a3 [j] + a4 [j] + a5 [j] + a6[j] + a7[j] + + a8[j] + a9[j] + a10[j] + a11[j] + a12[j] + a13[j]; + } + sum_chk( t ); + } + break; + case 15: + for (int64 i=0; i < iterations; i++) { + double t = 0; + double* a0 = (double *) root[ 0]; + double* a1 = (double *) root[ 1]; + double* a2 = (double *) root[ 2]; + double* a3 = (double *) root[ 3]; + double* a4 = (double *) root[ 4]; + double* a5 = (double *) root[ 5]; + double* a6 = (double *) root[ 6]; + double* a7 = (double *) root[ 7]; + double* a8 = (double *) root[ 8]; + double* a9 = (double *) root[ 9]; + double* a10 = (double *) root[10]; + double* a11 = (double *) root[11]; + double* a12 = (double *) root[12]; + double* a13 = (double *) root[13]; + double* a14 = (double *) root[14]; + for (int64 j=0; j < refs_per_chain; j+=stride*refs_per_line) { + t += a0[j] + a1[j] + a2 [j] + a3 [j] + a4 [j] + a5 [j] + a6 [j] + a7[j] + + a8[j] + a9[j] + a10[j] + a11[j] + a12[j] + a13[j] + a14[j]; + } + sum_chk( t ); + } + break; + case 16: + for (int64 i=0; i < iterations; i++) { + double t = 0; + double* a0 = (double *) root[ 0]; + double* a1 = (double *) root[ 1]; + double* a2 = (double *) root[ 2]; + double* a3 = (double *) root[ 3]; + double* a4 = (double *) root[ 4]; + double* a5 = (double *) root[ 5]; + double* a6 = (double *) root[ 6]; + double* a7 = (double *) root[ 7]; + double* a8 = (double *) root[ 8]; + double* a9 = (double *) root[ 9]; + double* a10 = (double *) root[10]; + double* a11 = (double *) root[11]; + double* a12 = (double *) root[12]; + double* a13 = (double *) root[13]; + double* a14 = (double *) root[14]; + double* a15 = (double *) root[15]; + for (int64 j=0; j < refs_per_chain; j+=stride*refs_per_line) { + t += a0[j] + a1[j] + a2 [j] + a3 [j] + a4 [j] + a5 [j] + a6 [j] + a7 [j] + + a8[j] + a9[j] + a10[j] + a11[j] + a12[j] + a13[j] + a14[j] + a15[j]; + } + sum_chk( t ); + } + break; + } +} diff --git a/src/Run.h b/src/Run.h new file mode 100644 index 0000000..810c2e8 --- /dev/null +++ b/src/Run.h @@ -0,0 +1,50 @@ +/******************************************************************************* + * Copyright (c) 2006 International Business Machines Corporation. * + * All rights reserved. This program and the accompanying materials * + * are made available under the terms of the Common Public License v1.0 * + * which accompanies this distribution, and is available at * + * http://www.opensource.org/licenses/cpl1.0.php * + * * + * Contributors: * + * Douglas M. Pase - initial API and implementation * + *******************************************************************************/ + + +#if !defined(Run_h) +#define Run_h + +#include "Thread.h" + +#include "Lock.h" +#include "Chain.h" +#include "Types.h" +#include "Experiment.h" +#include "SpinBarrier.h" + +class Run: public Thread { +public: + Run(); + ~Run(); + int run(); + void set( Experiment &e, SpinBarrier* sbp ); + + static int64 ops_per_chain() { return _ops_per_chain; } + static double seconds() { return _seconds; } + +private: + Experiment* exp; // experiment data + SpinBarrier* bp; // spin barrier used by all threads + + void mem_check( Chain *m ); + Chain* random_mem_init( Chain *m ); + Chain* forward_mem_init( Chain *m ); + Chain* reverse_mem_init( Chain *m ); + Chain* stream_mem_init( Chain *m ); + + static Lock global_mutex; // global lock + static int64 _ops_per_chain; // total number of operations per chain + static double _seconds; // total number of seconds +}; + + +#endif diff --git a/src/SpinBarrier.cpp b/src/SpinBarrier.cpp new file mode 100644 index 0000000..d3d2d7b --- /dev/null +++ b/src/SpinBarrier.cpp @@ -0,0 +1,48 @@ +/******************************************************************************* + * Copyright (c) 2006 International Business Machines Corporation. * + * All rights reserved. This program and the accompanying materials * + * are made available under the terms of the Common Public License v1.0 * + * which accompanies this distribution, and is available at * + * http://www.opensource.org/licenses/cpl1.0.php * + * * + * Contributors: * + * Douglas M. Pase - initial API and implementation * + *******************************************************************************/ + + +/****************************************************************************** + * * + * SpinBarrier * + * * + * Author: Douglas M. Pase * + * * + * Date: September 21, 2000 * + * Translated to C++, June 19, 2005 * + * * + * void barrier() * + * * + ******************************************************************************/ +#include +#include + +#include "SpinBarrier.h" + + // create a new barrier +SpinBarrier::SpinBarrier(int participants) +: limit( participants ) +{ + pthread_barrier_init( &barrier_obj, NULL, this->limit ); +} + + // destroy an old barrier +SpinBarrier::~SpinBarrier() +{ +} + + // enter the barrier and wait. everyone leaves + // when the last participant enters the barrier. +void +SpinBarrier::barrier() +{ + pthread_barrier_wait( &this->barrier_obj ); +} diff --git a/src/SpinBarrier.h b/src/SpinBarrier.h new file mode 100644 index 0000000..f0b76d3 --- /dev/null +++ b/src/SpinBarrier.h @@ -0,0 +1,44 @@ +/******************************************************************************* + * Copyright (c) 2006 International Business Machines Corporation. * + * All rights reserved. This program and the accompanying materials * + * are made available under the terms of the Common Public License v1.0 * + * which accompanies this distribution, and is available at * + * http://www.opensource.org/licenses/cpl1.0.php * + * * + * Contributors: * + * Douglas M. Pase - initial API and implementation * + *******************************************************************************/ + + +/****************************************************************************** + * * + * SpinBarrier * + * * + * Author: Douglas M. Pase * + * * + * Date: September 21, 2000 * + * Translated to C++, June 19, 2005 * + * Rewritten August 13,2005 * + * * + * void barrier() * + * * + ******************************************************************************/ + +#if !defined( SpinBarrier_h ) +#define SpinBarrier_h + +#include + +class SpinBarrier { +public: + SpinBarrier(int participants); + ~SpinBarrier(); + + void barrier(); + +private: + int limit; // number of barrier participants + pthread_barrier_t barrier_obj; +}; + +#endif diff --git a/src/Thread.cpp b/src/Thread.cpp new file mode 100644 index 0000000..8908cfe --- /dev/null +++ b/src/Thread.cpp @@ -0,0 +1,86 @@ +/******************************************************************************* + * Copyright (c) 2006 International Business Machines Corporation. * + * All rights reserved. This program and the accompanying materials * + * are made available under the terms of the Common Public License v1.0 * + * which accompanies this distribution, and is available at * + * http://www.opensource.org/licenses/cpl1.0.php * + * * + * Contributors: * + * Douglas M. Pase - initial API and implementation * + *******************************************************************************/ + + +#include +#include +#include + +#include "Thread.h" + +#include "Lock.h" + +Lock Thread::_global_lock; +int Thread::count = 0; + +Thread::Thread() +{ + Thread::global_lock(); + this->id = Thread::count; + Thread::count += 1; + Thread::global_unlock(); +} + +Thread::~Thread() +{ +} + +int +Thread::start() +{ + return pthread_create(&this->thread, NULL, Thread::start_routine, this); +} + +void* +Thread::start_routine(void* p) +{ + ((Thread*)p)->run(); + + return NULL; +} + +void +Thread::exit() +{ + pthread_exit(NULL); +} + +int +Thread::wait() +{ + pthread_join(this->thread, NULL); + + return 0; +} + +void +Thread::lock() +{ + this->object_lock.lock(); +} + +void +Thread::unlock() +{ + this->object_lock.unlock(); +} + +void +Thread::global_lock() +{ + Thread::_global_lock.lock(); +} + +void +Thread::global_unlock() +{ + Thread::_global_lock.unlock(); +} diff --git a/src/Thread.h b/src/Thread.h new file mode 100644 index 0000000..3948f56 --- /dev/null +++ b/src/Thread.h @@ -0,0 +1,53 @@ +/******************************************************************************* + * Copyright (c) 2006 International Business Machines Corporation. * + * All rights reserved. This program and the accompanying materials * + * are made available under the terms of the Common Public License v1.0 * + * which accompanies this distribution, and is available at * + * http://www.opensource.org/licenses/cpl1.0.php * + * * + * Contributors: * + * Douglas M. Pase - initial API and implementation * + *******************************************************************************/ + + +#if !defined(Thread_h) +#define Thread_h + +#include + +#include "Lock.h" + +class Thread { +public: + Thread(); + ~Thread(); + + virtual int run() = 0; + + int start(); + int wait(); + int thread_count() { return Thread::count; } + int thread_id() { return id; } + + static void exit(); + +protected: + void lock(); + void unlock(); + static void global_lock(); + static void global_unlock(); + +private: + static void* start_routine(void *); + static Lock _global_lock; + + Lock object_lock; + + pthread_t thread; + + static int count; + int id; + int lock_obj; +}; + +#endif diff --git a/src/Timer.cpp b/src/Timer.cpp new file mode 100644 index 0000000..b326048 --- /dev/null +++ b/src/Timer.cpp @@ -0,0 +1,175 @@ +/******************************************************************************* + * Copyright (c) 2006 International Business Machines Corporation. * + * All rights reserved. This program and the accompanying materials * + * are made available under the terms of the Common Public License v1.0 * + * which accompanies this distribution, and is available at * + * http://www.opensource.org/licenses/cpl1.0.php * + * * + * Contributors: * + * Douglas M. Pase - initial API and implementation * + *******************************************************************************/ + + +#include +#include + +#include "Timer.h" + +#include "Types.h" + +static int64 read_rtc(); +static void calibrate_rtc(int n); +static double wall_seconds(); + +static int wall_ticks = -1; +static int rtc_ticks = -1; +static double wall_elapsed = -1; +static int64 rtc_elapsed = -1; +static double time_factor = -1; + +#if !defined(RTC) && !defined(GTOD) +#define RTC +#endif + +#if defined(RTC) + +double +Timer::seconds() +{ + return (double) read_rtc() * time_factor; +} + +int64 +Timer::ticks() +{ + // See pg. 406 of the AMD x86-64 Architecture + // Programmer's Manual, Volume 2, System Programming + unsigned int eax=0, edx=0; + + __asm__ __volatile__( + "rdtsc ;" + "movl %%eax,%0;" + "movl %%edx,%1;" + "" + : "=r"(eax), "=r"(edx) + : + : "%eax", "%edx" + ); + + return ((int64) edx << 32) | (int64) eax; +} + +static int64 +read_rtc() +{ + // See pg. 406 of the AMD x86-64 Architecture + // Programmer's Manual, Volume 2, System Programming + unsigned int eax=0, edx=0; + + __asm__ __volatile__( + "rdtsc ;" + "movl %%eax,%0;" + "movl %%edx,%1;" + "" + : "=r"(eax), "=r"(edx) + : + : "%eax", "%edx" + ); + + return ((int64) edx << 32) | (int64) eax; +} + +void +Timer::calibrate() +{ + Timer::calibrate(1000); +} + +void +Timer::calibrate(int n) +{ + wall_ticks = n; + + double wall_start,wall_finish,t; + t = wall_seconds(); + while (t == (wall_start=wall_seconds())) { + ; + } + int64 rtc_start = read_rtc(); + for (int i=0; i < wall_ticks; i++) { + t = wall_seconds(); + while (t == (wall_finish=wall_seconds())) { + ; + } + } + int64 rtc_finish = read_rtc(); + + wall_elapsed = wall_finish - wall_start; + rtc_elapsed = rtc_finish - rtc_start; + time_factor = wall_elapsed / (double) rtc_elapsed; +} + +static double +wall_seconds() +{ + struct timeval t; + gettimeofday(&t, NULL); + + return (double) t.tv_sec + (double) t.tv_usec * 1E-6; +} + +#else + +double +Timer::seconds() +{ + struct timeval t; + gettimeofday(&t, NULL); + + return (double) t.tv_sec + (double) t.tv_usec * 1E-6; +} + +int64 +Timer::ticks() +{ + struct timeval t; + gettimeofday(&t, NULL); + + return 1000000 * (int64) t.tv_sec + (int64) t.tv_usec; +} + +void +Timer::calibrate() +{ +} + +void +Timer::calibrate(int n) +{ +} + +#endif + +static double +min( double v1, double v2 ) +{ + if (v2 < v1) return v2; + return v1; +} + +double +Timer::resolution() +{ + double a,b,c=1E9; + for (int i=0; i < 10; i++) { + a = Timer::seconds(); + while (a == (b=Timer::seconds())) + ; + a = Timer::seconds(); + while (a == (b=Timer::seconds())) + ; + c = min(b - a, c); + } + + return c; +} diff --git a/src/Timer.h b/src/Timer.h new file mode 100644 index 0000000..ba2c503 --- /dev/null +++ b/src/Timer.h @@ -0,0 +1,28 @@ +/******************************************************************************* + * Copyright (c) 2006 International Business Machines Corporation. * + * All rights reserved. This program and the accompanying materials * + * are made available under the terms of the Common Public License v1.0 * + * which accompanies this distribution, and is available at * + * http://www.opensource.org/licenses/cpl1.0.php * + * * + * Contributors: * + * Douglas M. Pase - initial API and implementation * + *******************************************************************************/ + + +#if !defined(Timer_h) +#define Timer_h + +#include "Types.h" + +class Timer { +public: + static double seconds(); + static double resolution(); + static int64 ticks(); + static void calibrate(); + static void calibrate(int n); +private: +}; + +#endif diff --git a/src/Types.cpp b/src/Types.cpp new file mode 100644 index 0000000..da5ecd0 --- /dev/null +++ b/src/Types.cpp @@ -0,0 +1,13 @@ +/******************************************************************************* + * Copyright (c) 2006 International Business Machines Corporation. * + * All rights reserved. This program and the accompanying materials * + * are made available under the terms of the Common Public License v1.0 * + * which accompanies this distribution, and is available at * + * http://www.opensource.org/licenses/cpl1.0.php * + * * + * Contributors: * + * Douglas M. Pase - initial API and implementation * + *******************************************************************************/ + + +#include "Types.h" diff --git a/src/Types.h b/src/Types.h new file mode 100644 index 0000000..9e2eeb0 --- /dev/null +++ b/src/Types.h @@ -0,0 +1,29 @@ +/******************************************************************************* + * Copyright (c) 2006 International Business Machines Corporation. * + * All rights reserved. This program and the accompanying materials * + * are made available under the terms of the Common Public License v1.0 * + * which accompanies this distribution, and is available at * + * http://www.opensource.org/licenses/cpl1.0.php * + * * + * Contributors: * + * Douglas M. Pase - initial API and implementation * + *******************************************************************************/ + + +#if !defined(Types_h) +#define Types_h + +typedef long long int64; +typedef int int32; +typedef short int16; +typedef char int8; + +typedef unsigned long long uint64; +typedef unsigned int uint32; +typedef unsigned short uint16; +typedef unsigned char uint8; + +typedef double float64; +typedef float float32; + +#endif -- cgit v1.2.3