summaryrefslogtreecommitdiff
path: root/src/Run.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'src/Run.cpp')
-rw-r--r--src/Run.cpp1206
1 files changed, 1206 insertions, 0 deletions
diff --git a/src/Run.cpp b/src/Run.cpp
new file mode 100644
index 0000000..4fb8057
--- /dev/null
+++ b/src/Run.cpp
@@ -0,0 +1,1206 @@
+/*******************************************************************************
+ * Copyright (c) 2006 International Business Machines Corporation. *
+ * All rights reserved. This program and the accompanying materials *
+ * are made available under the terms of the Common Public License v1.0 *
+ * which accompanies this distribution, and is available at *
+ * http://www.opensource.org/licenses/cpl1.0.php *
+ * *
+ * Contributors: *
+ * Douglas M. Pase - initial API and implementation *
+ *******************************************************************************/
+
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+
+#if defined(NUMA)
+#include <numa.h>
+#endif
+
+#include "Run.h"
+
+#include "Chain.h"
+#include "Timer.h"
+#include "SpinBarrier.h"
+
+
+static double max( double v1, double v2 );
+static double min( double v1, double v2 );
+static void chase_pointers(int64 chains_per_thread, int64 iterations, Chain** root, int64 bytes_per_line, int64 bytes_per_chain, int64 stride);
+static void follow_streams(int64 chains_per_thread, int64 iterations, Chain** root, int64 bytes_per_line, int64 bytes_per_chain, int64 stride);
+static void (*run_benchmark)(int64 chains_per_thread, int64 iterations, Chain** root, int64 bytes_per_line, int64 bytes_per_chain, int64 stride) = chase_pointers;
+
+Lock Run::global_mutex;
+int64 Run::_ops_per_chain = 0;
+double Run::_seconds = 1E9;
+
+Run::Run()
+: exp(NULL), bp(NULL)
+{
+}
+
+Run::~Run()
+{
+}
+
+void
+Run::set( Experiment &e, SpinBarrier* sbp )
+{
+ this->exp = &e;
+ this->bp = sbp;
+}
+
+int
+Run::run()
+{
+ // first allocate all memory for the chains,
+ // making sure it is allocated within the
+ // intended numa domains
+ Chain** chain_memory = new Chain* [ this->exp->chains_per_thread ];
+ Chain** root = new Chain* [ this->exp->chains_per_thread ];
+
+#if defined(NUMA)
+ // establish the node id where this thread
+ // will run. threads are mapped to nodes
+ // by the set-up code for Experiment.
+ int run_node_id = this->exp->thread_domain[this->thread_id()];
+ numa_run_on_node(run_node_id);
+
+ // establish the node id where this thread's
+ // memory will be allocated.
+ for (int i=0; i < this->exp->chains_per_thread; i++) {
+ int alloc_node_id = this->exp->chain_domain[this->thread_id()][i];
+ nodemask_t alloc_mask;
+ nodemask_zero(&alloc_mask);
+ nodemask_set(&alloc_mask, alloc_node_id);
+ numa_set_membind(&alloc_mask);
+
+ chain_memory[i] = new Chain[ this->exp->links_per_chain ];
+ }
+#else
+ for (int i=0; i < this->exp->chains_per_thread; i++) {
+ chain_memory[i] = new Chain[ this->exp->links_per_chain ];
+ }
+#endif
+
+ // initialize the chains and
+ // select the function that
+ // will execute the tests
+ for (int i=0; i < this->exp->chains_per_thread; i++) {
+ if (this->exp->access_pattern == Experiment::RANDOM) {
+ root[i] = random_mem_init( chain_memory[i] );
+ run_benchmark = chase_pointers;
+ } else if (this->exp->access_pattern == Experiment::STRIDED) {
+ if (0 < this->exp->stride) {
+ root[i] = forward_mem_init( chain_memory[i] );
+ } else {
+ root[i] = reverse_mem_init( chain_memory[i] );
+ }
+ run_benchmark = chase_pointers;
+ } else if (this->exp->access_pattern == Experiment::STREAM) {
+ root[i] = stream_mem_init( chain_memory[i] );
+ run_benchmark = follow_streams;
+ }
+ }
+
+ if (this->exp->iterations <= 0) {
+ volatile static double istart = 0;
+ volatile static double istop = 0;
+ volatile static double elapsed = 0;
+ volatile static int64 iters = 1;
+ volatile double bound = max(0.2, 10 * Timer::resolution());
+ for (iters=1; elapsed <= bound; iters=iters<<1) {
+ this->bp->barrier();
+
+ // start timer
+ if (this->thread_id() == 0) {
+ istart = Timer::seconds();
+ }
+ this->bp->barrier();
+
+ // chase pointers
+ run_benchmark(this->exp->chains_per_thread, iters, root, this->exp->bytes_per_line, this->exp->bytes_per_chain, this->exp->stride);
+
+ // barrier
+ this->bp->barrier();
+
+ // stop timer
+ if (this->thread_id() == 0) {
+ istop = Timer::seconds();
+ elapsed = istop - istart;
+ }
+ this->bp->barrier();
+ }
+
+ // calculate the number of iterations
+ if (this->thread_id() == 0) {
+ if (0 < this->exp->seconds) {
+ this->exp->iterations = max(1, 0.9999 + 0.5 * this->exp->seconds * iters / elapsed);
+ } else {
+ this->exp->iterations = max(1, 0.9999 + iters / elapsed);
+ }
+ }
+ this->bp->barrier();
+ }
+#if defined(UNDEFINED)
+#endif
+
+ // barrier
+ for (int e=0; e < this->exp->experiments; e++) {
+ this->bp->barrier();
+
+ // start timer
+ double start = 0;
+ if (this->thread_id() == 0) start = Timer::seconds();
+ this->bp->barrier();
+
+ // chase pointers
+ run_benchmark(this->exp->chains_per_thread, this->exp->iterations, root, this->exp->bytes_per_line, this->exp->bytes_per_chain, this->exp->stride);
+
+ // barrier
+ this->bp->barrier();
+
+ // stop timer
+ double stop = 0;
+ if (this->thread_id() == 0) stop = Timer::seconds();
+ this->bp->barrier();
+
+ if (0 <= e) {
+ if (this->thread_id() == 0) {
+ double delta = stop - start;
+ if (0 < delta) {
+ Run::_seconds = min( Run::_seconds, delta );
+ }
+ }
+ }
+ }
+
+ this->bp->barrier();
+
+ for (int i=0; i < this->exp->chains_per_thread; i++) {
+ if (chain_memory[i] != NULL) delete [] chain_memory[i];
+ }
+ if (chain_memory != NULL) delete [] chain_memory;
+
+ return 0;
+}
+
+int dummy = 0;
+void
+Run::mem_check( Chain *m )
+{
+ if (m == NULL) dummy += 1;
+}
+
+static double
+max( double v1, double v2 )
+{
+ if (v1 < v2) return v2;
+ return v1;
+}
+
+static double
+min( double v1, double v2 )
+{
+ if (v2 < v1) return v2;
+ return v1;
+}
+
+ // exclude 2 and mersienne primes, i.e.,
+ // primes of the form 2**n - 1, e.g.,
+ // 3, 7, 31, 127
+static const int prime_table[] = { 5, 11, 13, 17, 19, 23, 37, 41, 43, 47,
+ 53, 61, 71, 73, 79, 83, 89, 97, 101, 103, 109, 113, 131, 137, 139, 149,
+ 151, 157, 163, };
+static const int prime_table_size = sizeof prime_table / sizeof prime_table[0];
+
+Chain*
+Run::random_mem_init( Chain *mem )
+{
+ // initialize pointers --
+ // choose a page at random, then use
+ // one pointer from each cache line
+ // within the page. all pages and
+ // cache lines are chosen at random.
+ Chain* root = NULL;
+ Chain* prev = NULL;
+ int link_within_line = 0;
+ int64 local_ops_per_chain = 0;
+
+ // we must set a lock because random()
+ // is not thread safe
+ Run::global_mutex.lock();
+ setstate(this->exp->random_state[this->thread_id()]);
+ int page_factor = prime_table[ random() % prime_table_size ];
+ int page_offset = random() % this->exp->pages_per_chain;
+ Run::global_mutex.unlock();
+
+ // loop through the pages
+ for (int i=0; i < this->exp->pages_per_chain; i++) {
+ int page = (page_factor * i + page_offset) % this->exp->pages_per_chain;
+ Run::global_mutex.lock();
+ setstate(this->exp->random_state[this->thread_id()]);
+ int line_factor = prime_table[ random() % prime_table_size ];
+ int line_offset = random() % this->exp->lines_per_page;
+ Run::global_mutex.unlock();
+
+ // loop through the lines within a page
+ for (int j=0; j < this->exp->lines_per_page; j++) {
+ int line_within_page = (line_factor * j + line_offset) % this->exp->lines_per_page;
+ int link = page * this->exp->links_per_page + line_within_page * this->exp->links_per_line + link_within_line;
+
+ if (root == NULL) {
+// printf("root = %d(%d)[0x%x].\n", page, line_within_page, mem+link);
+ prev = root = mem + link;
+ local_ops_per_chain += 1;
+ } else {
+// printf("0x%x = %d(%d)[0x%x].\n", prev, page, line_within_page, mem+link);
+ prev->next = mem + link;
+ prev = prev->next;
+ local_ops_per_chain += 1;
+ }
+ }
+ }
+
+ Run::global_mutex.lock();
+ Run::_ops_per_chain = local_ops_per_chain;
+ Run::global_mutex.unlock();
+
+ return root;
+}
+
+Chain*
+Run::forward_mem_init( Chain *mem )
+{
+ Chain* root = NULL;
+ Chain* prev = NULL;
+ int link_within_line = 0;
+ int64 local_ops_per_chain = 0;
+
+ for (int i=0; i < this->exp->lines_per_chain; i += this->exp->stride) {
+ int link = i * this->exp->links_per_line + link_within_line;
+ if (root == NULL) {
+// printf("root = %d(%d)[0x%x].\n", page, line_within_page, mem+link);
+ prev = root = mem + link;
+ local_ops_per_chain += 1;
+ } else {
+// printf("0x%x = %d(%d)[0x%x].\n", prev, page, line_within_page, mem+link);
+ prev->next = mem + link;
+ prev = prev->next;
+ local_ops_per_chain += 1;
+ }
+ }
+
+ Run::global_mutex.lock();
+ Run::_ops_per_chain = local_ops_per_chain;
+ Run::global_mutex.unlock();
+
+ return root;
+}
+
+Chain*
+Run::reverse_mem_init( Chain *mem )
+{
+ Chain* root = NULL;
+ Chain* prev = NULL;
+ int link_within_line = 0;
+ int64 local_ops_per_chain = 0;
+
+ int stride = -this->exp->stride;
+ int last;
+ for (int i=0; i < this->exp->lines_per_chain; i += stride) {
+ last = i;
+ }
+
+ for (int i=last; 0 <= i; i -= stride) {
+ int link = i * this->exp->links_per_line + link_within_line;
+ if (root == NULL) {
+// printf("root = %d(%d)[0x%x].\n", page, line_within_page, mem+link);
+ prev = root = mem + link;
+ local_ops_per_chain += 1;
+ } else {
+// printf("0x%x = %d(%d)[0x%x].\n", prev, page, line_within_page, mem+link);
+ prev->next = mem + link;
+ prev = prev->next;
+ local_ops_per_chain += 1;
+ }
+ }
+
+ Run::global_mutex.lock();
+ Run::_ops_per_chain = local_ops_per_chain;
+ Run::global_mutex.unlock();
+
+ return root;
+}
+
+static int64 dumb_ck = 0;
+void
+mem_chk( Chain *m )
+{
+ if (m == NULL) dumb_ck += 1;
+}
+
+static void
+chase_pointers(
+ int64 chains_per_thread, // memory loading per thread
+ int64 iterations, // number of iterations per experiment
+ Chain** root, // root(s) of the chain(s) to follow
+ int64 bytes_per_line, // ignored
+ int64 bytes_per_chain, // ignored
+ int64 stride // ignored
+)
+{
+ // chase pointers
+ switch (chains_per_thread) {
+ default:
+ case 1:
+ for (int64 i=0; i < iterations; i++) {
+ Chain* a = root[0];
+ while (a != NULL) {
+ a = a->next;
+ }
+ mem_chk( a );
+ }
+ break;
+ case 2:
+ for (int64 i=0; i < iterations; i++) {
+ Chain* a = root[0];
+ Chain* b = root[1];
+ while (a != NULL) {
+ a = a->next;
+ b = b->next;
+ }
+ mem_chk( a );
+ mem_chk( b );
+ }
+ break;
+ case 3:
+ for (int64 i=0; i < iterations; i++) {
+ Chain* a = root[0];
+ Chain* b = root[1];
+ Chain* c = root[2];
+ while (a != NULL) {
+ a = a->next;
+ b = b->next;
+ c = c->next;
+ }
+ mem_chk( a );
+ mem_chk( b );
+ mem_chk( c );
+ }
+ break;
+ case 4:
+ for (int64 i=0; i < iterations; i++) {
+ Chain* a = root[0];
+ Chain* b = root[1];
+ Chain* c = root[2];
+ Chain* d = root[3];
+ while (a != NULL) {
+ a = a->next;
+ b = b->next;
+ c = c->next;
+ d = d->next;
+ }
+ mem_chk( a );
+ mem_chk( b );
+ mem_chk( c );
+ mem_chk( d );
+ }
+ break;
+ case 5:
+ for (int64 i=0; i < iterations; i++) {
+ Chain* a = root[0];
+ Chain* b = root[1];
+ Chain* c = root[2];
+ Chain* d = root[3];
+ Chain* e = root[4];
+ while (a != NULL) {
+ a = a->next;
+ b = b->next;
+ c = c->next;
+ d = d->next;
+ e = e->next;
+ }
+ mem_chk( a );
+ mem_chk( b );
+ mem_chk( c );
+ mem_chk( d );
+ mem_chk( e );
+ }
+ break;
+ case 6:
+ for (int64 i=0; i < iterations; i++) {
+ Chain* a = root[0];
+ Chain* b = root[1];
+ Chain* c = root[2];
+ Chain* d = root[3];
+ Chain* e = root[4];
+ Chain* f = root[5];
+ while (a != NULL) {
+ a = a->next;
+ b = b->next;
+ c = c->next;
+ d = d->next;
+ e = e->next;
+ f = f->next;
+ }
+ mem_chk( a );
+ mem_chk( b );
+ mem_chk( c );
+ mem_chk( d );
+ mem_chk( e );
+ mem_chk( f );
+ }
+ break;
+ case 7:
+ for (int64 i=0; i < iterations; i++) {
+ Chain* a = root[0];
+ Chain* b = root[1];
+ Chain* c = root[2];
+ Chain* d = root[3];
+ Chain* e = root[4];
+ Chain* f = root[5];
+ Chain* g = root[6];
+ while (a != NULL) {
+ a = a->next;
+ b = b->next;
+ c = c->next;
+ d = d->next;
+ e = e->next;
+ f = f->next;
+ g = g->next;
+ }
+ mem_chk( a );
+ mem_chk( b );
+ mem_chk( c );
+ mem_chk( d );
+ mem_chk( e );
+ mem_chk( f );
+ mem_chk( g );
+ }
+ break;
+ case 8:
+ for (int64 i=0; i < iterations; i++) {
+ Chain* a = root[0];
+ Chain* b = root[1];
+ Chain* c = root[2];
+ Chain* d = root[3];
+ Chain* e = root[4];
+ Chain* f = root[5];
+ Chain* g = root[6];
+ Chain* h = root[7];
+ while (a != NULL) {
+ a = a->next;
+ b = b->next;
+ c = c->next;
+ d = d->next;
+ e = e->next;
+ f = f->next;
+ g = g->next;
+ h = h->next;
+ }
+ mem_chk( a );
+ mem_chk( b );
+ mem_chk( c );
+ mem_chk( d );
+ mem_chk( e );
+ mem_chk( f );
+ mem_chk( g );
+ mem_chk( h );
+ }
+ break;
+ case 9:
+ for (int64 i=0; i < iterations; i++) {
+ Chain* a = root[0];
+ Chain* b = root[1];
+ Chain* c = root[2];
+ Chain* d = root[3];
+ Chain* e = root[4];
+ Chain* f = root[5];
+ Chain* g = root[6];
+ Chain* h = root[7];
+ Chain* j = root[8];
+ while (a != NULL) {
+ a = a->next;
+ b = b->next;
+ c = c->next;
+ d = d->next;
+ e = e->next;
+ f = f->next;
+ g = g->next;
+ h = h->next;
+ j = j->next;
+ }
+ mem_chk( a );
+ mem_chk( b );
+ mem_chk( c );
+ mem_chk( d );
+ mem_chk( e );
+ mem_chk( f );
+ mem_chk( g );
+ mem_chk( h );
+ mem_chk( j );
+ }
+ break;
+ case 10:
+ for (int64 i=0; i < iterations; i++) {
+ Chain* a = root[0];
+ Chain* b = root[1];
+ Chain* c = root[2];
+ Chain* d = root[3];
+ Chain* e = root[4];
+ Chain* f = root[5];
+ Chain* g = root[6];
+ Chain* h = root[7];
+ Chain* j = root[8];
+ Chain* k = root[9];
+ while (a != NULL) {
+ a = a->next;
+ b = b->next;
+ c = c->next;
+ d = d->next;
+ e = e->next;
+ f = f->next;
+ g = g->next;
+ h = h->next;
+ j = j->next;
+ k = k->next;
+ }
+ mem_chk( a );
+ mem_chk( b );
+ mem_chk( c );
+ mem_chk( d );
+ mem_chk( e );
+ mem_chk( f );
+ mem_chk( g );
+ mem_chk( h );
+ mem_chk( j );
+ mem_chk( k );
+ }
+ break;
+ case 11:
+ for (int64 i=0; i < iterations; i++) {
+ Chain* a = root[0];
+ Chain* b = root[1];
+ Chain* c = root[2];
+ Chain* d = root[3];
+ Chain* e = root[4];
+ Chain* f = root[5];
+ Chain* g = root[6];
+ Chain* h = root[7];
+ Chain* j = root[8];
+ Chain* k = root[9];
+ Chain* l = root[10];
+ while (a != NULL) {
+ a = a->next;
+ b = b->next;
+ c = c->next;
+ d = d->next;
+ e = e->next;
+ f = f->next;
+ g = g->next;
+ h = h->next;
+ j = j->next;
+ k = k->next;
+ l = l->next;
+ }
+ mem_chk( a );
+ mem_chk( b );
+ mem_chk( c );
+ mem_chk( d );
+ mem_chk( e );
+ mem_chk( f );
+ mem_chk( g );
+ mem_chk( h );
+ mem_chk( j );
+ mem_chk( k );
+ mem_chk( l );
+ }
+ break;
+ case 12:
+ for (int64 i=0; i < iterations; i++) {
+ Chain* a = root[0];
+ Chain* b = root[1];
+ Chain* c = root[2];
+ Chain* d = root[3];
+ Chain* e = root[4];
+ Chain* f = root[5];
+ Chain* g = root[6];
+ Chain* h = root[7];
+ Chain* j = root[8];
+ Chain* k = root[9];
+ Chain* l = root[10];
+ Chain* m = root[11];
+ while (a != NULL) {
+ a = a->next;
+ b = b->next;
+ c = c->next;
+ d = d->next;
+ e = e->next;
+ f = f->next;
+ g = g->next;
+ h = h->next;
+ j = j->next;
+ k = k->next;
+ l = l->next;
+ m = m->next;
+ }
+ mem_chk( a );
+ mem_chk( b );
+ mem_chk( c );
+ mem_chk( d );
+ mem_chk( e );
+ mem_chk( f );
+ mem_chk( g );
+ mem_chk( h );
+ mem_chk( j );
+ mem_chk( k );
+ mem_chk( l );
+ mem_chk( m );
+ }
+ break;
+ case 13:
+ for (int64 i=0; i < iterations; i++) {
+ Chain* a = root[0];
+ Chain* b = root[1];
+ Chain* c = root[2];
+ Chain* d = root[3];
+ Chain* e = root[4];
+ Chain* f = root[5];
+ Chain* g = root[6];
+ Chain* h = root[7];
+ Chain* j = root[8];
+ Chain* k = root[9];
+ Chain* l = root[10];
+ Chain* m = root[11];
+ Chain* n = root[12];
+ while (a != NULL) {
+ a = a->next;
+ b = b->next;
+ c = c->next;
+ d = d->next;
+ e = e->next;
+ f = f->next;
+ g = g->next;
+ h = h->next;
+ j = j->next;
+ k = k->next;
+ l = l->next;
+ m = m->next;
+ n = n->next;
+ }
+ mem_chk( a );
+ mem_chk( b );
+ mem_chk( c );
+ mem_chk( d );
+ mem_chk( e );
+ mem_chk( f );
+ mem_chk( g );
+ mem_chk( h );
+ mem_chk( j );
+ mem_chk( k );
+ mem_chk( l );
+ mem_chk( m );
+ mem_chk( n );
+ }
+ break;
+ case 14:
+ for (int64 i=0; i < iterations; i++) {
+ Chain* a = root[0];
+ Chain* b = root[1];
+ Chain* c = root[2];
+ Chain* d = root[3];
+ Chain* e = root[4];
+ Chain* f = root[5];
+ Chain* g = root[6];
+ Chain* h = root[7];
+ Chain* j = root[8];
+ Chain* k = root[9];
+ Chain* l = root[10];
+ Chain* m = root[11];
+ Chain* n = root[12];
+ Chain* o = root[13];
+ while (a != NULL) {
+ a = a->next;
+ b = b->next;
+ c = c->next;
+ d = d->next;
+ e = e->next;
+ f = f->next;
+ g = g->next;
+ h = h->next;
+ j = j->next;
+ k = k->next;
+ l = l->next;
+ m = m->next;
+ n = n->next;
+ o = o->next;
+ }
+ mem_chk( a );
+ mem_chk( b );
+ mem_chk( c );
+ mem_chk( d );
+ mem_chk( e );
+ mem_chk( f );
+ mem_chk( g );
+ mem_chk( h );
+ mem_chk( j );
+ mem_chk( k );
+ mem_chk( l );
+ mem_chk( m );
+ mem_chk( n );
+ mem_chk( o );
+ }
+ break;
+ case 15:
+ for (int64 i=0; i < iterations; i++) {
+ Chain* a = root[0];
+ Chain* b = root[1];
+ Chain* c = root[2];
+ Chain* d = root[3];
+ Chain* e = root[4];
+ Chain* f = root[5];
+ Chain* g = root[6];
+ Chain* h = root[7];
+ Chain* j = root[8];
+ Chain* k = root[9];
+ Chain* l = root[10];
+ Chain* m = root[11];
+ Chain* n = root[12];
+ Chain* o = root[13];
+ Chain* p = root[14];
+ while (a != NULL) {
+ a = a->next;
+ b = b->next;
+ c = c->next;
+ d = d->next;
+ e = e->next;
+ f = f->next;
+ g = g->next;
+ h = h->next;
+ j = j->next;
+ k = k->next;
+ l = l->next;
+ m = m->next;
+ n = n->next;
+ o = o->next;
+ p = p->next;
+ }
+ mem_chk( a );
+ mem_chk( b );
+ mem_chk( c );
+ mem_chk( d );
+ mem_chk( e );
+ mem_chk( f );
+ mem_chk( g );
+ mem_chk( h );
+ mem_chk( j );
+ mem_chk( k );
+ mem_chk( l );
+ mem_chk( m );
+ mem_chk( n );
+ mem_chk( o );
+ mem_chk( p );
+ }
+ break;
+ case 16:
+ for (int64 i=0; i < iterations; i++) {
+ Chain* a = root[0];
+ Chain* b = root[1];
+ Chain* c = root[2];
+ Chain* d = root[3];
+ Chain* e = root[4];
+ Chain* f = root[5];
+ Chain* g = root[6];
+ Chain* h = root[7];
+ Chain* j = root[8];
+ Chain* k = root[9];
+ Chain* l = root[10];
+ Chain* m = root[11];
+ Chain* n = root[12];
+ Chain* o = root[13];
+ Chain* p = root[14];
+ Chain* q = root[15];
+ while (a != NULL) {
+ a = a->next;
+ b = b->next;
+ c = c->next;
+ d = d->next;
+ e = e->next;
+ f = f->next;
+ g = g->next;
+ h = h->next;
+ j = j->next;
+ k = k->next;
+ l = l->next;
+ m = m->next;
+ n = n->next;
+ o = o->next;
+ p = p->next;
+ q = q->next;
+ }
+ mem_chk( a );
+ mem_chk( b );
+ mem_chk( c );
+ mem_chk( d );
+ mem_chk( e );
+ mem_chk( f );
+ mem_chk( g );
+ mem_chk( h );
+ mem_chk( j );
+ mem_chk( k );
+ mem_chk( l );
+ mem_chk( m );
+ mem_chk( n );
+ mem_chk( o );
+ mem_chk( p );
+ mem_chk( q );
+ }
+ }
+}
+
+ // NOT WRITTEN YET -- DMP
+ // JUST A PLACE HOLDER!
+Chain*
+Run::stream_mem_init( Chain *mem )
+{
+// fprintf(stderr, "made it into stream_mem_init.\n");
+// fprintf(stderr, "chains_per_thread = %ld\n", this->exp->chains_per_thread);
+// fprintf(stderr, "iterations = %ld\n", this->exp->iterations);
+// fprintf(stderr, "bytes_per_chain = %ld\n", this->exp->bytes_per_chain);
+// fprintf(stderr, "stride = %ld\n", this->exp->stride);
+ int64 local_ops_per_chain = 0;
+ double* tmp = (double *) mem;
+ int64 refs_per_line = this->exp->bytes_per_line / sizeof(double);
+ int64 refs_per_chain = this->exp->bytes_per_chain / sizeof(double);
+// fprintf(stderr, "refs_per_chain = %ld\n", refs_per_chain);
+
+ for (int64 i=0; i < refs_per_chain; i += this->exp->stride*refs_per_line) {
+ tmp[i] = 0;
+ local_ops_per_chain += 1;
+ }
+
+ Run::global_mutex.lock();
+ Run::_ops_per_chain = local_ops_per_chain;
+ Run::global_mutex.unlock();
+
+// fprintf(stderr, "made it out of stream_mem_init.\n");
+ return mem;
+}
+
+static int64 summ_ck = 0;
+void
+sum_chk( double t )
+{
+ if (t != 0) summ_ck += 1;
+}
+
+ // NOT WRITTEN YET -- DMP
+ // JUST A PLACE HOLDER!
+static void
+follow_streams(
+ int64 chains_per_thread, // memory loading per thread
+ int64 iterations, // number of iterations per experiment
+ Chain** root, // root(s) of the chain(s) to follow
+ int64 bytes_per_line, // ignored
+ int64 bytes_per_chain, // ignored
+ int64 stride // ignored
+)
+{
+ int64 refs_per_line = bytes_per_line / sizeof(double);
+ int64 refs_per_chain = bytes_per_chain / sizeof(double);
+
+ // chase pointers
+ switch (chains_per_thread) {
+ default:
+ case 1:
+ for (int64 i=0; i < iterations; i++) {
+ double t = 0;
+ double* a0 = (double *) root[0];
+ for (int64 j=0; j < refs_per_chain; j+=stride*refs_per_line) {
+ t += a0[j];
+ }
+ sum_chk( t );
+ }
+ break;
+ case 2:
+ for (int64 i=0; i < iterations; i++) {
+ double t = 0;
+ double* a0 = (double *) root[0];
+ double* a1 = (double *) root[1];
+ for (int64 j=0; j < refs_per_chain; j+=stride*refs_per_line) {
+ t += a0[j] + a1[j];
+ }
+ sum_chk( t );
+ }
+ break;
+ case 3:
+ for (int64 i=0; i < iterations; i++) {
+ double t = 0;
+ double* a0 = (double *) root[0];
+ double* a1 = (double *) root[1];
+ double* a2 = (double *) root[2];
+ for (int64 j=0; j < refs_per_chain; j+=stride*refs_per_line) {
+ t += a0[j] + a1[j] + a2[j];
+ }
+ sum_chk( t );
+ }
+ break;
+ case 4:
+ for (int64 i=0; i < iterations; i++) {
+ double t = 0;
+ double* a0 = (double *) root[0];
+ double* a1 = (double *) root[1];
+ double* a2 = (double *) root[2];
+ double* a3 = (double *) root[3];
+ for (int64 j=0; j < refs_per_chain; j+=stride*refs_per_line) {
+ t += a0[j] + a1[j] + a2[j] + a3[j];
+ }
+ sum_chk( t );
+ }
+ break;
+ case 5:
+ for (int64 i=0; i < iterations; i++) {
+ double t = 0;
+ double* a0 = (double *) root[0];
+ double* a1 = (double *) root[1];
+ double* a2 = (double *) root[2];
+ double* a3 = (double *) root[3];
+ double* a4 = (double *) root[4];
+ for (int64 j=0; j < refs_per_chain; j+=stride*refs_per_line) {
+ t += a0[j] + a1[j] + a2[j] + a3[j] + a4[j];
+ }
+ sum_chk( t );
+ }
+ break;
+ case 6:
+ for (int64 i=0; i < iterations; i++) {
+ double t = 0;
+ double* a0 = (double *) root[0];
+ double* a1 = (double *) root[1];
+ double* a2 = (double *) root[2];
+ double* a3 = (double *) root[3];
+ double* a4 = (double *) root[4];
+ double* a5 = (double *) root[5];
+ for (int64 j=0; j < refs_per_chain; j+=stride*refs_per_line) {
+ t += a0[j] + a1[j] + a2[j] + a3[j] + a4[j] + a5[j];
+ }
+ sum_chk( t );
+ }
+ break;
+ case 7:
+ for (int64 i=0; i < iterations; i++) {
+ double t = 0;
+ double* a0 = (double *) root[0];
+ double* a1 = (double *) root[1];
+ double* a2 = (double *) root[2];
+ double* a3 = (double *) root[3];
+ double* a4 = (double *) root[4];
+ double* a5 = (double *) root[5];
+ double* a6 = (double *) root[6];
+ for (int64 j=0; j < refs_per_chain; j+=stride*refs_per_line) {
+ t += a0[j] + a1[j] + a2[j] + a3[j] + a4[j] + a5[j] + a6[j];
+ }
+ sum_chk( t );
+ }
+ break;
+ case 8:
+ for (int64 i=0; i < iterations; i++) {
+ double t = 0;
+ double* a0 = (double *) root[0];
+ double* a1 = (double *) root[1];
+ double* a2 = (double *) root[2];
+ double* a3 = (double *) root[3];
+ double* a4 = (double *) root[4];
+ double* a5 = (double *) root[5];
+ double* a6 = (double *) root[6];
+ double* a7 = (double *) root[7];
+ for (int64 j=0; j < refs_per_chain; j+=stride*refs_per_line) {
+ t += a0[j] + a1[j] + a2[j] + a3[j] + a4[j] + a5[j] + a6[j] + a7[j];
+ }
+ sum_chk( t );
+ }
+ break;
+ case 9:
+ for (int64 i=0; i < iterations; i++) {
+ double t = 0;
+ double* a0 = (double *) root[0];
+ double* a1 = (double *) root[1];
+ double* a2 = (double *) root[2];
+ double* a3 = (double *) root[3];
+ double* a4 = (double *) root[4];
+ double* a5 = (double *) root[5];
+ double* a6 = (double *) root[6];
+ double* a7 = (double *) root[7];
+ double* a8 = (double *) root[8];
+ for (int64 j=0; j < refs_per_chain; j+=stride*refs_per_line) {
+ t += a0[j] + a1[j] + a2[j] + a3[j] + a4[j] + a5[j] + a6[j] + a7[j] +
+ a8[j];
+ }
+ sum_chk( t );
+ }
+ break;
+ case 10:
+ for (int64 i=0; i < iterations; i++) {
+ double t = 0;
+ double* a0 = (double *) root[0];
+ double* a1 = (double *) root[1];
+ double* a2 = (double *) root[2];
+ double* a3 = (double *) root[3];
+ double* a4 = (double *) root[4];
+ double* a5 = (double *) root[5];
+ double* a6 = (double *) root[6];
+ double* a7 = (double *) root[7];
+ double* a8 = (double *) root[8];
+ double* a9 = (double *) root[9];
+ for (int64 j=0; j < refs_per_chain; j+=stride*refs_per_line) {
+ t += a0[j] + a1[j] + a2[j] + a3[j] + a4[j] + a5[j] + a6[j] + a7[j] +
+ a8[j] + a9[j];
+ }
+ sum_chk( t );
+ }
+ break;
+ case 11:
+ for (int64 i=0; i < iterations; i++) {
+ double t = 0;
+ double* a0 = (double *) root[ 0];
+ double* a1 = (double *) root[ 1];
+ double* a2 = (double *) root[ 2];
+ double* a3 = (double *) root[ 3];
+ double* a4 = (double *) root[ 4];
+ double* a5 = (double *) root[ 5];
+ double* a6 = (double *) root[ 6];
+ double* a7 = (double *) root[ 7];
+ double* a8 = (double *) root[ 8];
+ double* a9 = (double *) root[ 9];
+ double* a10 = (double *) root[10];
+ for (int64 j=0; j < refs_per_chain; j+=stride*refs_per_line) {
+ t += a0[j] + a1[j] + a2 [j] + a3[j] + a4[j] + a5[j] + a6[j] + a7[j] +
+ a8[j] + a9[j] + a10[j];
+ }
+ sum_chk( t );
+ }
+ break;
+ case 12:
+ for (int64 i=0; i < iterations; i++) {
+ double t = 0;
+ double* a0 = (double *) root[ 0];
+ double* a1 = (double *) root[ 1];
+ double* a2 = (double *) root[ 2];
+ double* a3 = (double *) root[ 3];
+ double* a4 = (double *) root[ 4];
+ double* a5 = (double *) root[ 5];
+ double* a6 = (double *) root[ 6];
+ double* a7 = (double *) root[ 7];
+ double* a8 = (double *) root[ 8];
+ double* a9 = (double *) root[ 9];
+ double* a10 = (double *) root[10];
+ double* a11 = (double *) root[11];
+ for (int64 j=0; j < refs_per_chain; j+=stride*refs_per_line) {
+ t += a0[j] + a1[j] + a2 [j] + a3 [j] + a4[j] + a5[j] + a6[j] + a7[j] +
+ a8[j] + a9[j] + a10[j] + a11[j];
+ }
+ sum_chk( t );
+ }
+ break;
+ case 13:
+ for (int64 i=0; i < iterations; i++) {
+ double t = 0;
+ double* a0 = (double *) root[ 0];
+ double* a1 = (double *) root[ 1];
+ double* a2 = (double *) root[ 2];
+ double* a3 = (double *) root[ 3];
+ double* a4 = (double *) root[ 4];
+ double* a5 = (double *) root[ 5];
+ double* a6 = (double *) root[ 6];
+ double* a7 = (double *) root[ 7];
+ double* a8 = (double *) root[ 8];
+ double* a9 = (double *) root[ 9];
+ double* a10 = (double *) root[10];
+ double* a11 = (double *) root[11];
+ double* a12 = (double *) root[12];
+ for (int64 j=0; j < refs_per_chain; j+=stride*refs_per_line) {
+ t += a0[j] + a1[j] + a2 [j] + a3 [j] + a4 [j] + a5[j] + a6[j] + a7[j] +
+ a8[j] + a9[j] + a10[j] + a11[j] + a12[j];
+ }
+ sum_chk( t );
+ }
+ break;
+ case 14:
+ for (int64 i=0; i < iterations; i++) {
+ double t = 0;
+ double* a0 = (double *) root[ 0];
+ double* a1 = (double *) root[ 1];
+ double* a2 = (double *) root[ 2];
+ double* a3 = (double *) root[ 3];
+ double* a4 = (double *) root[ 4];
+ double* a5 = (double *) root[ 5];
+ double* a6 = (double *) root[ 6];
+ double* a7 = (double *) root[ 7];
+ double* a8 = (double *) root[ 8];
+ double* a9 = (double *) root[ 9];
+ double* a10 = (double *) root[10];
+ double* a11 = (double *) root[11];
+ double* a12 = (double *) root[12];
+ double* a13 = (double *) root[13];
+ for (int64 j=0; j < refs_per_chain; j+=stride*refs_per_line) {
+ t += a0[j] + a1[j] + a2 [j] + a3 [j] + a4 [j] + a5 [j] + a6[j] + a7[j] +
+ a8[j] + a9[j] + a10[j] + a11[j] + a12[j] + a13[j];
+ }
+ sum_chk( t );
+ }
+ break;
+ case 15:
+ for (int64 i=0; i < iterations; i++) {
+ double t = 0;
+ double* a0 = (double *) root[ 0];
+ double* a1 = (double *) root[ 1];
+ double* a2 = (double *) root[ 2];
+ double* a3 = (double *) root[ 3];
+ double* a4 = (double *) root[ 4];
+ double* a5 = (double *) root[ 5];
+ double* a6 = (double *) root[ 6];
+ double* a7 = (double *) root[ 7];
+ double* a8 = (double *) root[ 8];
+ double* a9 = (double *) root[ 9];
+ double* a10 = (double *) root[10];
+ double* a11 = (double *) root[11];
+ double* a12 = (double *) root[12];
+ double* a13 = (double *) root[13];
+ double* a14 = (double *) root[14];
+ for (int64 j=0; j < refs_per_chain; j+=stride*refs_per_line) {
+ t += a0[j] + a1[j] + a2 [j] + a3 [j] + a4 [j] + a5 [j] + a6 [j] + a7[j] +
+ a8[j] + a9[j] + a10[j] + a11[j] + a12[j] + a13[j] + a14[j];
+ }
+ sum_chk( t );
+ }
+ break;
+ case 16:
+ for (int64 i=0; i < iterations; i++) {
+ double t = 0;
+ double* a0 = (double *) root[ 0];
+ double* a1 = (double *) root[ 1];
+ double* a2 = (double *) root[ 2];
+ double* a3 = (double *) root[ 3];
+ double* a4 = (double *) root[ 4];
+ double* a5 = (double *) root[ 5];
+ double* a6 = (double *) root[ 6];
+ double* a7 = (double *) root[ 7];
+ double* a8 = (double *) root[ 8];
+ double* a9 = (double *) root[ 9];
+ double* a10 = (double *) root[10];
+ double* a11 = (double *) root[11];
+ double* a12 = (double *) root[12];
+ double* a13 = (double *) root[13];
+ double* a14 = (double *) root[14];
+ double* a15 = (double *) root[15];
+ for (int64 j=0; j < refs_per_chain; j+=stride*refs_per_line) {
+ t += a0[j] + a1[j] + a2 [j] + a3 [j] + a4 [j] + a5 [j] + a6 [j] + a7 [j] +
+ a8[j] + a9[j] + a10[j] + a11[j] + a12[j] + a13[j] + a14[j] + a15[j];
+ }
+ sum_chk( t );
+ }
+ break;
+ }
+}