summaryrefslogtreecommitdiff
path: root/src/Run.cpp
diff options
context:
space:
mode:
authorTim Besard <tim.besard@gmail.com>2011-11-02 09:13:38 +0100
committerTim Besard <tim.besard@gmail.com>2011-11-02 09:13:38 +0100
commitc108197c20aa7b93849e383a3aaaf7b2bba30405 (patch)
treef05aa2ff019892dfc936ad2242d43373191f16cd /src/Run.cpp
parenteb6995fb5a0f4382cb4a01d301423e74ea8babe6 (diff)
Formatting the source.
Diffstat (limited to 'src/Run.cpp')
-rw-r--r--src/Run.cpp2259
1 files changed, 1130 insertions, 1129 deletions
diff --git a/src/Run.cpp b/src/Run.cpp
index c774b99..24435b4 100644
--- a/src/Run.cpp
+++ b/src/Run.cpp
@@ -9,7 +9,6 @@
* Douglas M. Pase - initial API and implementation *
*******************************************************************************/
-
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
@@ -24,1253 +23,1255 @@
#include "Timer.h"
#include "SpinBarrier.h"
+static double max(double v1, double v2);
+static double min(double v1, double v2);
+static void chase_pointers(int64 chains_per_thread, int64 iterations,
+ Chain** root, int64 bytes_per_line, int64 bytes_per_chain, int64 stride,
+ int64 busy_cycles, bool prefetch);
+static void follow_streams(int64 chains_per_thread, int64 iterations,
+ Chain** root, int64 bytes_per_line, int64 bytes_per_chain, int64 stride,
+ int64 busy_cycles, bool prefetch);
+static void (*run_benchmark)(int64 chains_per_thread, int64 iterations,
+ Chain** root, int64 bytes_per_line, int64 bytes_per_chain, int64 stride,
+ int64 busy_cycles, bool prefetch) = chase_pointers;
-static double max( double v1, double v2 );
-static double min( double v1, double v2 );
-static void chase_pointers(int64 chains_per_thread, int64 iterations, Chain** root, int64 bytes_per_line, int64 bytes_per_chain, int64 stride, int64 busy_cycles, bool prefetch);
-static void follow_streams(int64 chains_per_thread, int64 iterations, Chain** root, int64 bytes_per_line, int64 bytes_per_chain, int64 stride, int64 busy_cycles, bool prefetch);
-static void (*run_benchmark)(int64 chains_per_thread, int64 iterations, Chain** root, int64 bytes_per_line, int64 bytes_per_chain, int64 stride, int64 busy_cycles, bool prefetch) = chase_pointers;
-
-Lock Run::global_mutex;
-int64 Run::_ops_per_chain = 0;
-double Run::_seconds = 1E9;
+Lock Run::global_mutex;
+int64 Run::_ops_per_chain = 0;
+double Run::_seconds = 1E9;
#define prefetch(x) __builtin_prefetch(x)
-Run::Run()
-: exp(NULL), bp(NULL)
-{
+Run::Run() :
+ exp(NULL), bp(NULL) {
}
-Run::~Run()
-{
+Run::~Run() {
}
-void
-Run::set( Experiment &e, SpinBarrier* sbp )
-{
- this->exp = &e;
- this->bp = sbp;
+void Run::set(Experiment &e, SpinBarrier* sbp) {
+ this->exp = &e;
+ this->bp = sbp;
}
-int
-Run::run()
-{
- // first allocate all memory for the chains,
- // making sure it is allocated within the
- // intended numa domains
- Chain** chain_memory = new Chain* [ this->exp->chains_per_thread ];
- Chain** root = new Chain* [ this->exp->chains_per_thread ];
+int Run::run() {
+ // first allocate all memory for the chains,
+ // making sure it is allocated within the
+ // intended numa domains
+ Chain** chain_memory = new Chain*[this->exp->chains_per_thread];
+ Chain** root = new Chain*[this->exp->chains_per_thread];
#if defined(NUMA)
- // establish the node id where this thread
- // will run. threads are mapped to nodes
- // by the set-up code for Experiment.
- int run_node_id = this->exp->thread_domain[this->thread_id()];
- numa_run_on_node(run_node_id);
+ // establish the node id where this thread
+ // will run. threads are mapped to nodes
+ // by the set-up code for Experiment.
+ int run_node_id = this->exp->thread_domain[this->thread_id()];
+ numa_run_on_node(run_node_id);
- // establish the node id where this thread's
- // memory will be allocated.
- for (int i=0; i < this->exp->chains_per_thread; i++) {
- int alloc_node_id = this->exp->chain_domain[this->thread_id()][i];
- nodemask_t alloc_mask;
- nodemask_zero(&alloc_mask);
- nodemask_set(&alloc_mask, alloc_node_id);
- numa_set_membind(&alloc_mask);
+ // establish the node id where this thread's
+ // memory will be allocated.
+ for (int i=0; i < this->exp->chains_per_thread; i++) {
+ int alloc_node_id = this->exp->chain_domain[this->thread_id()][i];
+ nodemask_t alloc_mask;
+ nodemask_zero(&alloc_mask);
+ nodemask_set(&alloc_mask, alloc_node_id);
+ numa_set_membind(&alloc_mask);
- chain_memory[i] = new Chain[ this->exp->links_per_chain ];
- }
+ chain_memory[i] = new Chain[ this->exp->links_per_chain ];
+ }
#else
- for (int i=0; i < this->exp->chains_per_thread; i++) {
- chain_memory[i] = new Chain[ this->exp->links_per_chain ];
- }
+ for (int i = 0; i < this->exp->chains_per_thread; i++) {
+ chain_memory[i] = new Chain[this->exp->links_per_chain];
+ }
#endif
- // initialize the chains and
- // select the function that
- // will execute the tests
- for (int i=0; i < this->exp->chains_per_thread; i++) {
- if (this->exp->access_pattern == Experiment::RANDOM) {
- root[i] = random_mem_init( chain_memory[i] );
- run_benchmark = chase_pointers;
- } else if (this->exp->access_pattern == Experiment::STRIDED) {
- if (0 < this->exp->stride) {
- root[i] = forward_mem_init( chain_memory[i] );
- } else {
- root[i] = reverse_mem_init( chain_memory[i] );
- }
- run_benchmark = chase_pointers;
- } else if (this->exp->access_pattern == Experiment::STREAM) {
- root[i] = stream_mem_init( chain_memory[i] );
- run_benchmark = follow_streams;
+ // initialize the chains and
+ // select the function that
+ // will execute the tests
+ for (int i = 0; i < this->exp->chains_per_thread; i++) {
+ if (this->exp->access_pattern == Experiment::RANDOM) {
+ root[i] = random_mem_init(chain_memory[i]);
+ run_benchmark = chase_pointers;
+ } else if (this->exp->access_pattern == Experiment::STRIDED) {
+ if (0 < this->exp->stride) {
+ root[i] = forward_mem_init(chain_memory[i]);
+ } else {
+ root[i] = reverse_mem_init(chain_memory[i]);
+ }
+ run_benchmark = chase_pointers;
+ } else if (this->exp->access_pattern == Experiment::STREAM) {
+ root[i] = stream_mem_init(chain_memory[i]);
+ run_benchmark = follow_streams;
+ }
}
- }
- if (this->exp->iterations <= 0) {
- volatile static double istart = 0;
- volatile static double istop = 0;
- volatile static double elapsed = 0;
- volatile static int64 iters = 1;
- volatile double bound = max(0.2, 10 * Timer::resolution());
- for (iters=1; elapsed <= bound; iters=iters<<1) {
- this->bp->barrier();
+ if (this->exp->iterations <= 0) {
+ volatile static double istart = 0;
+ volatile static double istop = 0;
+ volatile static double elapsed = 0;
+ volatile static int64 iters = 1;
+ volatile double bound = max(0.2, 10 * Timer::resolution());
+ for (iters = 1; elapsed <= bound; iters = iters << 1) {
+ this->bp->barrier();
- // start timer
- if (this->thread_id() == 0) {
- istart = Timer::seconds();
- }
- this->bp->barrier();
+ // start timer
+ if (this->thread_id() == 0) {
+ istart = Timer::seconds();
+ }
+ this->bp->barrier();
- // chase pointers
- run_benchmark(this->exp->chains_per_thread, iters, root, this->exp->bytes_per_line, this->exp->bytes_per_chain, this->exp->stride, this->exp->busy_cycles, this->exp->prefetch);
+ // chase pointers
+ run_benchmark(this->exp->chains_per_thread, iters, root,
+ this->exp->bytes_per_line, this->exp->bytes_per_chain,
+ this->exp->stride, this->exp->busy_cycles,
+ this->exp->prefetch);
- // barrier
- this->bp->barrier();
+ // barrier
+ this->bp->barrier();
- // stop timer
- if (this->thread_id() == 0) {
- istop = Timer::seconds();
- elapsed = istop - istart;
- }
- this->bp->barrier();
- }
+ // stop timer
+ if (this->thread_id() == 0) {
+ istop = Timer::seconds();
+ elapsed = istop - istart;
+ }
+ this->bp->barrier();
+ }
- // calculate the number of iterations
- if (this->thread_id() == 0) {
- if (0 < this->exp->seconds) {
- this->exp->iterations = max(1, 0.9999 + 0.5 * this->exp->seconds * iters / elapsed);
- } else {
- this->exp->iterations = max(1, 0.9999 + iters / elapsed);
- }
+ // calculate the number of iterations
+ if (this->thread_id() == 0) {
+ if (0 < this->exp->seconds) {
+ this->exp->iterations = max(1,
+ 0.9999 + 0.5 * this->exp->seconds * iters / elapsed);
+ } else {
+ this->exp->iterations = max(1, 0.9999 + iters / elapsed);
+ }
+ }
+ this->bp->barrier();
}
- this->bp->barrier();
- }
#if defined(UNDEFINED)
#endif
- // barrier
- for (int e=0; e < this->exp->experiments; e++) {
- this->bp->barrier();
+ // barrier
+ for (int e = 0; e < this->exp->experiments; e++) {
+ this->bp->barrier();
- // start timer
- double start = 0;
- if (this->thread_id() == 0) start = Timer::seconds();
- this->bp->barrier();
+ // start timer
+ double start = 0;
+ if (this->thread_id() == 0)
+ start = Timer::seconds();
+ this->bp->barrier();
- // chase pointers
- run_benchmark(this->exp->chains_per_thread, this->exp->iterations, root, this->exp->bytes_per_line, this->exp->bytes_per_chain, this->exp->stride, this->exp->busy_cycles, this->exp->prefetch);
+ // chase pointers
+ run_benchmark(this->exp->chains_per_thread, this->exp->iterations, root,
+ this->exp->bytes_per_line, this->exp->bytes_per_chain,
+ this->exp->stride, this->exp->busy_cycles, this->exp->prefetch);
- // barrier
- this->bp->barrier();
+ // barrier
+ this->bp->barrier();
- // stop timer
- double stop = 0;
- if (this->thread_id() == 0) stop = Timer::seconds();
- this->bp->barrier();
+ // stop timer
+ double stop = 0;
+ if (this->thread_id() == 0)
+ stop = Timer::seconds();
+ this->bp->barrier();
- if (0 <= e) {
- if (this->thread_id() == 0) {
- double delta = stop - start;
- if (0 < delta) {
- Run::_seconds = min( Run::_seconds, delta );
+ if (0 <= e) {
+ if (this->thread_id() == 0) {
+ double delta = stop - start;
+ if (0 < delta) {
+ Run::_seconds = min(Run::_seconds, delta);
+ }
+ }
}
- }
}
- }
- this->bp->barrier();
+ this->bp->barrier();
- for (int i=0; i < this->exp->chains_per_thread; i++) {
- if (chain_memory[i] != NULL) delete [] chain_memory[i];
- }
- if (chain_memory != NULL) delete [] chain_memory;
+ for (int i = 0; i < this->exp->chains_per_thread; i++) {
+ if (chain_memory[i] != NULL
+ ) delete[] chain_memory[i];
+ }
+ if (chain_memory != NULL
+ ) delete[] chain_memory;
- return 0;
+ return 0;
}
int dummy = 0;
-void
-Run::mem_check( Chain *m )
-{
- if (m == NULL) dummy += 1;
+void Run::mem_check(Chain *m) {
+ if (m == NULL
+ ) dummy += 1;
}
-static double
-max( double v1, double v2 )
-{
- if (v1 < v2) return v2;
- return v1;
+static double max(double v1, double v2) {
+ if (v1 < v2)
+ return v2;
+ return v1;
}
-static double
-min( double v1, double v2 )
-{
- if (v2 < v1) return v2;
- return v1;
+static double min(double v1, double v2) {
+ if (v2 < v1)
+ return v2;
+ return v1;
}
- // exclude 2 and mersienne primes, i.e.,
- // primes of the form 2**n - 1, e.g.,
- // 3, 7, 31, 127
-static const int prime_table[] = { 5, 11, 13, 17, 19, 23, 37, 41, 43, 47,
- 53, 61, 71, 73, 79, 83, 89, 97, 101, 103, 109, 113, 131, 137, 139, 149,
- 151, 157, 163, };
+// exclude 2 and mersienne primes, i.e.,
+// primes of the form 2**n - 1, e.g.,
+// 3, 7, 31, 127
+static const int prime_table[] = { 5, 11, 13, 17, 19, 23, 37, 41, 43, 47, 53,
+ 61, 71, 73, 79, 83, 89, 97, 101, 103, 109, 113, 131, 137, 139, 149, 151,
+ 157, 163, };
static const int prime_table_size = sizeof prime_table / sizeof prime_table[0];
Chain*
-Run::random_mem_init( Chain *mem )
-{
- // initialize pointers --
- // choose a page at random, then use
- // one pointer from each cache line
- // within the page. all pages and
- // cache lines are chosen at random.
- Chain* root = END_OF_CHAIN;
- Chain* prev = END_OF_CHAIN;
- int link_within_line = 0;
- int64 local_ops_per_chain = 0;
+Run::random_mem_init(Chain *mem) {
+ // initialize pointers --
+ // choose a page at random, then use
+ // one pointer from each cache line
+ // within the page. all pages and
+ // cache lines are chosen at random.
+ Chain* root = END_OF_CHAIN;
+ Chain* prev = END_OF_CHAIN;
+ int link_within_line = 0;
+ int64 local_ops_per_chain = 0;
- // we must set a lock because random()
- // is not thread safe
- Run::global_mutex.lock();
- setstate(this->exp->random_state[this->thread_id()]);
- int page_factor = prime_table[ random() % prime_table_size ];
- int page_offset = random() % this->exp->pages_per_chain;
- Run::global_mutex.unlock();
-
- // loop through the pages
- for (int i=0; i < this->exp->pages_per_chain; i++) {
- int page = (page_factor * i + page_offset) % this->exp->pages_per_chain;
+ // we must set a lock because random()
+ // is not thread safe
Run::global_mutex.lock();
setstate(this->exp->random_state[this->thread_id()]);
- int line_factor = prime_table[ random() % prime_table_size ];
- int line_offset = random() % this->exp->lines_per_page;
+ int page_factor = prime_table[random() % prime_table_size];
+ int page_offset = random() % this->exp->pages_per_chain;
Run::global_mutex.unlock();
- // loop through the lines within a page
- for (int j=0; j < this->exp->lines_per_page; j++) {
- int line_within_page = (line_factor * j + line_offset) % this->exp->lines_per_page;
- int link = page * this->exp->links_per_page + line_within_page * this->exp->links_per_line + link_within_line;
+ // loop through the pages
+ for (int i = 0; i < this->exp->pages_per_chain; i++) {
+ int page = (page_factor * i + page_offset) % this->exp->pages_per_chain;
+ Run::global_mutex.lock();
+ setstate(this->exp->random_state[this->thread_id()]);
+ int line_factor = prime_table[random() % prime_table_size];
+ int line_offset = random() % this->exp->lines_per_page;
+ Run::global_mutex.unlock();
+
+ // loop through the lines within a page
+ for (int j = 0; j < this->exp->lines_per_page; j++) {
+ int line_within_page = (line_factor * j + line_offset)
+ % this->exp->lines_per_page;
+ int link = page * this->exp->links_per_page
+ + line_within_page * this->exp->links_per_line
+ + link_within_line;
- if (root == END_OF_CHAIN) {
+ if (root == END_OF_CHAIN) {
// printf("root = %d(%d)[0x%x].\n", page, line_within_page, mem+link);
- prev = root = mem + link;
- local_ops_per_chain += 1;
- } else {
+ prev = root = mem + link;
+ local_ops_per_chain += 1;
+ } else {
// printf("0x%x = %d(%d)[0x%x].\n", prev, page, line_within_page, mem+link);
- prev->next = mem + link;
- prev = prev->next;
- local_ops_per_chain += 1;
- }
+ prev->next = mem + link;
+ prev = prev->next;
+ local_ops_per_chain += 1;
+ }
+ }
}
- }
- Run::global_mutex.lock();
- Run::_ops_per_chain = local_ops_per_chain;
- Run::global_mutex.unlock();
+ Run::global_mutex.lock();
+ Run::_ops_per_chain = local_ops_per_chain;
+ Run::global_mutex.unlock();
- return root;
+ return root;
}
Chain*
-Run::forward_mem_init( Chain *mem )
-{
- Chain* root = END_OF_CHAIN;
- Chain* prev = END_OF_CHAIN;
- int link_within_line = 0;
- int64 local_ops_per_chain = 0;
+Run::forward_mem_init(Chain *mem) {
+ Chain* root = END_OF_CHAIN;
+ Chain* prev = END_OF_CHAIN;
+ int link_within_line = 0;
+ int64 local_ops_per_chain = 0;
- for (int i=0; i < this->exp->lines_per_chain; i += this->exp->stride) {
- int link = i * this->exp->links_per_line + link_within_line;
- if (root == NULL) {
+ for (int i = 0; i < this->exp->lines_per_chain; i += this->exp->stride) {
+ int link = i * this->exp->links_per_line + link_within_line;
+ if (root == NULL) {
// printf("root = %d(%d)[0x%x].\n", page, line_within_page, mem+link);
- prev = root = mem + link;
- local_ops_per_chain += 1;
- } else {
+ prev = root = mem + link;
+ local_ops_per_chain += 1;
+ } else {
// printf("0x%x = %d(%d)[0x%x].\n", prev, page, line_within_page, mem+link);
- prev->next = mem + link;
- prev = prev->next;
- local_ops_per_chain += 1;
+ prev->next = mem + link;
+ prev = prev->next;
+ local_ops_per_chain += 1;
+ }
}
- }
- Run::global_mutex.lock();
- Run::_ops_per_chain = local_ops_per_chain;
- Run::global_mutex.unlock();
+ Run::global_mutex.lock();
+ Run::_ops_per_chain = local_ops_per_chain;
+ Run::global_mutex.unlock();
- return root;
+ return root;
}
Chain*
-Run::reverse_mem_init( Chain *mem )
-{
- Chain* root = END_OF_CHAIN;
- Chain* prev = END_OF_CHAIN;
- int link_within_line = 0;
- int64 local_ops_per_chain = 0;
+Run::reverse_mem_init(Chain *mem) {
+ Chain* root = END_OF_CHAIN;
+ Chain* prev = END_OF_CHAIN;
+ int link_within_line = 0;
+ int64 local_ops_per_chain = 0;
- int stride = -this->exp->stride;
- int last;
- for (int i=0; i < this->exp->lines_per_chain; i += stride) {
- last = i;
- }
+ int stride = -this->exp->stride;
+ int last;
+ for (int i = 0; i < this->exp->lines_per_chain; i += stride) {
+ last = i;
+ }
- for (int i=last; 0 <= i; i -= stride) {
- int link = i * this->exp->links_per_line + link_within_line;
- if (root == END_OF_CHAIN) {
+ for (int i = last; 0 <= i; i -= stride) {
+ int link = i * this->exp->links_per_line + link_within_line;
+ if (root == END_OF_CHAIN) {
// printf("root = %d(%d)[0x%x].\n", page, line_within_page, mem+link);
- prev = root = mem + link;
- local_ops_per_chain += 1;
- } else {
+ prev = root = mem + link;
+ local_ops_per_chain += 1;
+ } else {
// printf("0x%x = %d(%d)[0x%x].\n", prev, page, line_within_page, mem+link);
- prev->next = mem + link;
- prev = prev->next;
- local_ops_per_chain += 1;
+ prev->next = mem + link;
+ prev = prev->next;
+ local_ops_per_chain += 1;
+ }
}
- }
- Run::global_mutex.lock();
- Run::_ops_per_chain = local_ops_per_chain;
- Run::global_mutex.unlock();
+ Run::global_mutex.lock();
+ Run::_ops_per_chain = local_ops_per_chain;
+ Run::global_mutex.unlock();
- return root;
+ return root;
}
static int64 dumb_ck = 0;
-void
-mem_chk( Chain *m )
-{
- if (m == END_OF_CHAIN) dumb_ck += 1;
+void mem_chk(Chain *m) {
+ if (m == END_OF_CHAIN)
+ dumb_ck += 1;
}
-static void
-chase_pointers(
- int64 chains_per_thread, // memory loading per thread
- int64 iterations, // number of iterations per experiment
- Chain** root, // root(s) of the chain(s) to follow
- int64 bytes_per_line, // ignored
- int64 bytes_per_chain, // ignored
- int64 stride, // ignored
- int64 busy_cycles, // processing cycles
- bool prefetch // prefetch?
-)
-{
- // chase pointers
- switch (chains_per_thread) {
- default:
- case 1:
- for (int64 i=0; i < iterations; i++) {
- Chain* a = root[0];
- while (a != END_OF_CHAIN) {
- a = a->next;
- if (prefetch)
- prefetch(a->next);
- for (int64 j=0; j < busy_cycles; j++)
- asm("nop");
- }
- mem_chk( a );
- }
- break;
- case 2:
- for (int64 i=0; i < iterations; i++) {
- Chain* a = root[0];
- Chain* b = root[1];
- while (a != END_OF_CHAIN) {
- a = a->next;
- b = b->next;
- if (prefetch)
+static void chase_pointers(int64 chains_per_thread, // memory loading per thread
+ int64 iterations, // number of iterations per experiment
+ Chain** root, // root(s) of the chain(s) to follow
+ int64 bytes_per_line, // ignored
+ int64 bytes_per_chain, // ignored
+ int64 stride, // ignored
+ int64 busy_cycles, // processing cycles
+ bool prefetch // prefetch?
+ ) {
+ // chase pointers
+ switch (chains_per_thread) {
+ default:
+ case 1:
+ for (int64 i = 0; i < iterations; i++) {
+ Chain* a = root[0];
+ while (a != END_OF_CHAIN) {
+ a = a->next;
+ if (prefetch)
prefetch(a->next);
- for (int64 j=0; j < busy_cycles; j++)
- asm("nop");
- }
- mem_chk( a );
- mem_chk( b );
- }
- break;
- case 3:
- for (int64 i=0; i < iterations; i++) {
- Chain* a = root[0];
- Chain* b = root[1];
- Chain* c = root[2];
- while (a != END_OF_CHAIN) {
- a = a->next;
- b = b->next;
- c = c->next;
- if (prefetch)
+ for (int64 j = 0; j < busy_cycles; j++)
+ asm("nop");
+ }
+ mem_chk(a);
+ }
+ break;
+ case 2:
+ for (int64 i = 0; i < iterations; i++) {
+ Chain* a = root[0];
+ Chain* b = root[1];
+ while (a != END_OF_CHAIN) {
+ a = a->next;
+ b = b->next;
+ if (prefetch)
prefetch(a->next);
- for (int64 j=0; j < busy_cycles; j++)
- asm("nop");
- }
- mem_chk( a );
- mem_chk( b );
- mem_chk( c );
- }
- break;
- case 4:
- for (int64 i=0; i < iterations; i++) {
- Chain* a = root[0];
- Chain* b = root[1];
- Chain* c = root[2];
- Chain* d = root[3];
- while (a != END_OF_CHAIN) {
- a = a->next;
- b = b->next;
- c = c->next;
- d = d->next;
- if (prefetch)
+ for (int64 j = 0; j < busy_cycles; j++)
+ asm("nop");
+ }
+ mem_chk(a);
+ mem_chk(b);
+ }
+ break;
+ case 3:
+ for (int64 i = 0; i < iterations; i++) {
+ Chain* a = root[0];
+ Chain* b = root[1];
+ Chain* c = root[2];
+ while (a != END_OF_CHAIN) {
+ a = a->next;
+ b = b->next;
+ c = c->next;
+ if (prefetch)
prefetch(a->next);
- for (int64 j=0; j < busy_cycles; j++)
- asm("nop");
- }
- mem_chk( a );
- mem_chk( b );
- mem_chk( c );
- mem_chk( d );
- }
- break;
- case 5:
- for (int64 i=0; i < iterations; i++) {
- Chain* a = root[0];
- Chain* b = root[1];
- Chain* c = root[2];
- Chain* d = root[3];
- Chain* e = root[4];
- while (a != END_OF_CHAIN) {
- a = a->next;
- b = b->next;
- c = c->next;
- d = d->next;
- e = e->next;
- if (prefetch)
+ for (int64 j = 0; j < busy_cycles; j++)
+ asm("nop");
+ }
+ mem_chk(a);
+ mem_chk(b);
+ mem_chk(c);
+ }
+ break;
+ case 4:
+ for (int64 i = 0; i < iterations; i++) {
+ Chain* a = root[0];
+ Chain* b = root[1];
+ Chain* c = root[2];
+ Chain* d = root[3];
+ while (a != END_OF_CHAIN) {
+ a = a->next;
+ b = b->next;
+ c = c->next;
+ d = d->next;
+ if (prefetch)
prefetch(a->next);
- for (int64 j=0; j < busy_cycles; j++)
- asm("nop");
- }
- mem_chk( a );
- mem_chk( b );
- mem_chk( c );
- mem_chk( d );
- mem_chk( e );
- }
- break;
- case 6:
- for (int64 i=0; i < iterations; i++) {
- Chain* a = root[0];
- Chain* b = root[1];
- Chain* c = root[2];
- Chain* d = root[3];
- Chain* e = root[4];
- Chain* f = root[5];
- while (a != END_OF_CHAIN) {
- a = a->next;
- b = b->next;
- c = c->next;
- d = d->next;
- e = e->next;
- f = f->next;
- if (prefetch)
+ for (int64 j = 0; j < busy_cycles; j++)
+ asm("nop");
+ }
+ mem_chk(a);
+ mem_chk(b);
+ mem_chk(c);
+ mem_chk(d);
+ }
+ break;
+ case 5:
+ for (int64 i = 0; i < iterations; i++) {
+ Chain* a = root[0];
+ Chain* b = root[1];
+ Chain* c = root[2];
+ Chain* d = root[3];
+ Chain* e = root[4];
+ while (a != END_OF_CHAIN) {
+ a = a->next;
+ b = b->next;
+ c = c->next;
+ d = d->next;
+ e = e->next;
+ if (prefetch)
prefetch(a->next);
- for (int64 j=0; j < busy_cycles; j++)
- asm("nop");
- }
- mem_chk( a );
- mem_chk( b );
- mem_chk( c );
- mem_chk( d );
- mem_chk( e );
- mem_chk( f );
- }
- break;
- case 7:
- for (int64 i=0; i < iterations; i++) {
- Chain* a = root[0];
- Chain* b = root[1];
- Chain* c = root[2];
- Chain* d = root[3];
- Chain* e = root[4];
- Chain* f = root[5];
- Chain* g = root[6];
- while (a != END_OF_CHAIN) {
- a = a->next;
- b = b->next;
- c = c->next;
- d = d->next;
- e = e->next;
- f = f->next;
- g = g->next;
- if (prefetch)
+ for (int64 j = 0; j < busy_cycles; j++)
+ asm("nop");
+ }
+ mem_chk(a);
+ mem_chk(b);
+ mem_chk(c);
+ mem_chk(d);
+ mem_chk(e);
+ }
+ break;
+ case 6:
+ for (int64 i = 0; i < iterations; i++) {
+ Chain* a = root[0];
+ Chain* b = root[1];
+ Chain* c = root[2];
+ Chain* d = root[3];
+ Chain* e = root[4];
+ Chain* f = root[5];
+ while (a != END_OF_CHAIN) {
+ a = a->next;
+ b = b->next;
+ c = c->next;
+ d = d->next;
+ e = e->next;
+ f = f->next;
+ if (prefetch)
prefetch(a->next);
- for (int64 j=0; j < busy_cycles; j++)
- asm("nop");
- }
- mem_chk( a );
- mem_chk( b );
- mem_chk( c );
- mem_chk( d );
- mem_chk( e );
- mem_chk( f );
- mem_chk( g );
- }
- break;
- case 8:
- for (int64 i=0; i < iterations; i++) {
- Chain* a = root[0];
- Chain* b = root[1];
- Chain* c = root[2];
- Chain* d = root[3];
- Chain* e = root[4];
- Chain* f = root[5];
- Chain* g = root[6];
- Chain* h = root[7];
- while (a != END_OF_CHAIN) {
- a = a->next;
- b = b->next;
- c = c->next;
- d = d->next;
- e = e->next;
- f = f->next;
- g = g->next;
- h = h->next;
- if (prefetch)
+ for (int64 j = 0; j < busy_cycles; j++)
+ asm("nop");
+ }
+ mem_chk(a);
+ mem_chk(b);
+ mem_chk(c);
+ mem_chk(d);
+ mem_chk(e);
+ mem_chk(f);
+ }
+ break;
+ case 7:
+ for (int64 i = 0; i < iterations; i++) {
+ Chain* a = root[0];
+ Chain* b = root[1];
+ Chain* c = root[2];
+ Chain* d = root[3];
+ Chain* e = root[4];
+ Chain* f = root[5];
+ Chain* g = root[6];
+ while (a != END_OF_CHAIN) {
+ a = a->next;
+ b = b->next;
+ c = c->next;
+ d = d->next;
+ e = e->next;
+ f = f->next;
+ g = g->next;
+ if (prefetch)
prefetch(a->next);
- for (int64 j=0; j < busy_cycles; j++)
- asm("nop");
- }
- mem_chk( a );
- mem_chk( b );
- mem_chk( c );
- mem_chk( d );
- mem_chk( e );
- mem_chk( f );
- mem_chk( g );
- mem_chk( h );
- }
- break;
- case 9:
- for (int64 i=0; i < iterations; i++) {
- Chain* a = root[0];
- Chain* b = root[1];
- Chain* c = root[2];
- Chain* d = root[3];
- Chain* e = root[4];
- Chain* f = root[5];
- Chain* g = root[6];
- Chain* h = root[7];
- Chain* j = root[8];
- while (a != END_OF_CHAIN) {
- a = a->next;
- b = b->next;
- c = c->next;
- d = d->next;
- e = e->next;
- f = f->next;
- g = g->next;
- h = h->next;
- j = j->next;
- if (prefetch)
+ for (int64 j = 0; j < busy_cycles; j++)
+ asm("nop");
+ }
+ mem_chk(a);
+ mem_chk(b);
+ mem_chk(c);
+ mem_chk(d);
+ mem_chk(e);
+ mem_chk(f);
+ mem_chk(g);
+ }
+ break;
+ case 8:
+ for (int64 i = 0; i < iterations; i++) {
+ Chain* a = root[0];
+ Chain* b = root[1];
+ Chain* c = root[2];
+ Chain* d = root[3];
+ Chain* e = root[4];
+ Chain* f = root[5];
+ Chain* g = root[6];
+ Chain* h = root[7];
+ while (a != END_OF_CHAIN) {
+ a = a->next;
+ b = b->next;
+ c = c->next;
+ d = d->next;
+ e = e->next;
+ f = f->next;
+ g = g->next;
+ h = h->next;
+ if (prefetch)
prefetch(a->next);
- for (int64 j=0; j < busy_cycles; j++)
- asm("nop");
- }
- mem_chk( a );
- mem_chk( b );
- mem_chk( c );
- mem_chk( d );
- mem_chk( e );
- mem_chk( f );
- mem_chk( g );
- mem_chk( h );
- mem_chk( j );
- }
- break;
- case 10:
- for (int64 i=0; i < iterations; i++) {
- Chain* a = root[0];
- Chain* b = root[1];
- Chain* c = root[2];
- Chain* d = root[3];
- Chain* e = root[4];
- Chain* f = root[5];
- Chain* g = root[6];
- Chain* h = root[7];
- Chain* j = root[8];
- Chain* k = root[9];
- while (a != END_OF_CHAIN) {
- a = a->next;
- b = b->next;
- c = c->next;
- d = d->next;
- e = e->next;
- f = f->next;
- g = g->next;
- h = h->next;
- j = j->next;
- k = k->next;
- if (prefetch)
+ for (int64 j = 0; j < busy_cycles; j++)
+ asm("nop");
+ }
+ mem_chk(a);
+ mem_chk(b);
+ mem_chk(c);
+ mem_chk(d);
+ mem_chk(e);
+ mem_chk(f);
+ mem_chk(g);
+ mem_chk(h);
+ }
+ break;
+ case 9:
+ for (int64 i = 0; i < iterations; i++) {
+ Chain* a = root[0];
+ Chain* b = root[1];
+ Chain* c = root[2];
+ Chain* d = root[3];
+ Chain* e = root[4];
+ Chain* f = root[5];
+ Chain* g = root[6];
+ Chain* h = root[7];
+ Chain* j = root[8];
+ while (a != END_OF_CHAIN) {
+ a = a->next;
+ b = b->next;
+ c = c->next;
+ d = d->next;
+ e = e->next;
+ f = f->next;
+ g = g->next;
+ h = h->next;
+ j = j->next;
+ if (prefetch)
prefetch(a->next);
- for (int64 j=0; j < busy_cycles; j++)
- asm("nop");
- }
- mem_chk( a );
- mem_chk( b );
- mem_chk( c );
- mem_chk( d );
- mem_chk( e );
- mem_chk( f );
- mem_chk( g );
- mem_chk( h );
- mem_chk( j );
- mem_chk( k );
- }
- break;
- case 11:
- for (int64 i=0; i < iterations; i++) {
- Chain* a = root[0];
- Chain* b = root[1];
- Chain* c = root[2];
- Chain* d = root[3];
- Chain* e = root[4];
- Chain* f = root[5];
- Chain* g = root[6];
- Chain* h = root[7];
- Chain* j = root[8];
- Chain* k = root[9];
- Chain* l = root[10];
- while (a != END_OF_CHAIN) {
- a = a->next;
- b = b->next;
- c = c->next;
- d = d->next;
- e = e->next;
- f = f->next;
- g = g->next;
- h = h->next;
- j = j->next;
- k = k->next;
- l = l->next;
- if (prefetch)
+ for (int64 j = 0; j < busy_cycles; j++)
+ asm("nop");
+ }
+ mem_chk(a);
+ mem_chk(b);
+ mem_chk(c);
+ mem_chk(d);
+ mem_chk(e);
+ mem_chk(f);
+ mem_chk(g);
+ mem_chk(h);
+ mem_chk(j);
+ }
+ break;
+ case 10:
+ for (int64 i = 0; i < iterations; i++) {
+ Chain* a = root[0];
+ Chain* b = root[1];
+ Chain* c = root[2];
+ Chain* d = root[3];
+ Chain* e = root[4];
+ Chain* f = root[5];
+ Chain* g = root[6];
+ Chain* h = root[7];
+ Chain* j = root[8];
+ Chain* k = root[9];
+ while (a != END_OF_CHAIN) {
+ a = a->next;
+ b = b->next;
+ c = c->next;
+ d = d->next;
+ e = e->next;
+ f = f->next;
+ g = g->next;
+ h = h->next;
+ j = j->next;
+ k = k->next;
+ if (prefetch)
prefetch(a->next);
- for (int64 j=0; j < busy_cycles; j++)
- asm("nop");
- }
- mem_chk( a );
- mem_chk( b );
- mem_chk( c );
- mem_chk( d );
- mem_chk( e );
- mem_chk( f );
- mem_chk( g );
- mem_chk( h );
- mem_chk( j );
- mem_chk( k );
- mem_chk( l );
- }
- break;
- case 12:
- for (int64 i=0; i < iterations; i++) {
- Chain* a = root[0];
- Chain* b = root[1];
- Chain* c = root[2];
- Chain* d = root[3];
- Chain* e = root[4];
- Chain* f = root[5];
- Chain* g = root[6];
- Chain* h = root[7];
- Chain* j = root[8];
- Chain* k = root[9];
- Chain* l = root[10];
- Chain* m = root[11];
- while (a != END_OF_CHAIN) {
- a = a->next;
- b = b->next;
- c = c->next;
- d = d->next;
- e = e->next;
- f = f->next;
- g = g->next;
- h = h->next;
- j = j->next;
- k = k->next;
- l = l->next;
- m = m->next;
- if (prefetch)
+ for (int64 j = 0; j < busy_cycles; j++)
+ asm("nop");
+ }
+ mem_chk(a);
+ mem_chk(b);
+ mem_chk(c);
+ mem_chk(d);
+ mem_chk(e);
+ mem_chk(f);
+ mem_chk(g);
+ mem_chk(h);
+ mem_chk(j);
+ mem_chk(k);
+ }
+ break;
+ case 11:
+ for (int64 i = 0; i < iterations; i++) {
+ Chain* a = root[0];
+ Chain* b = root[1];
+ Chain* c = root[2];
+ Chain* d = root[3];
+ Chain* e = root[4];
+ Chain* f = root[5];
+ Chain* g = root[6];
+ Chain* h = root[7];
+ Chain* j = root[8];
+ Chain* k = root[9];
+ Chain* l = root[10];
+ while (a != END_OF_CHAIN) {
+ a = a->next;
+ b = b->next;
+ c = c->next;
+ d = d->next;
+ e = e->next;
+ f = f->next;
+ g = g->next;
+ h = h->next;
+ j = j->next;
+ k = k->next;
+ l = l->next;
+ if (prefetch)
prefetch(a->next);
- for (int64 j=0; j < busy_cycles; j++)
- asm("nop");
- }
- mem_chk( a );
- mem_chk( b );
- mem_chk( c );
- mem_chk( d );
- mem_chk( e );
- mem_chk( f );
- mem_chk( g );
- mem_chk( h );
- mem_chk( j );
- mem_chk( k );
- mem_chk( l );
- mem_chk( m );
- }
- break;
- case 13:
- for (int64 i=0; i < iterations; i++) {
- Chain* a = root[0];
- Chain* b = root[1];
- Chain* c = root[2];
- Chain* d = root[3];
- Chain* e = root[4];
- Chain* f = root[5];
- Chain* g = root[6];
- Chain* h = root[7];
- Chain* j = root[8];
- Chain* k = root[9];
- Chain* l = root[10];
- Chain* m = root[11];
- Chain* n = root[12];
- while (a != END_OF_CHAIN) {
- a = a->next;
- b = b->next;
- c = c->next;
- d = d->next;
- e = e->next;
- f = f->next;
- g = g->next;
- h = h->next;
- j = j->next;
- k = k->next;
- l = l->next;
- m = m->next;
- n = n->next;
- if (prefetch)
+ for (int64 j = 0; j < busy_cycles; j++)
+ asm("nop");
+ }
+ mem_chk(a);
+ mem_chk(b);
+ mem_chk(c);
+ mem_chk(d);
+ mem_chk(e);
+ mem_chk(f);
+ mem_chk(g);
+ mem_chk(h);
+ mem_chk(j);
+ mem_chk(k);
+ mem_chk(l);
+ }
+ break;
+ case 12:
+ for (int64 i = 0; i < iterations; i++) {
+ Chain* a = root[0];
+ Chain* b = root[1];
+ Chain* c = root[2];
+ Chain* d = root[3];
+ Chain* e = root[4];
+ Chain* f = root[5];
+ Chain* g = root[6];
+ Chain* h = root[7];
+ Chain* j = root[8];
+ Chain* k = root[9];
+ Chain* l = root[10];
+ Chain* m = root[11];
+ while (a != END_OF_CHAIN) {
+ a = a->next;
+ b = b->next;
+ c = c->next;
+ d = d->next;
+ e = e->next;
+ f = f->next;
+ g = g->next;
+ h = h->next;
+ j = j->next;
+ k = k->next;
+ l = l->next;
+ m = m->next;
+ if (prefetch)
prefetch(a->next);
- for (int64 j=0; j < busy_cycles; j++)
- asm("nop");
- }
- mem_chk( a );
- mem_chk( b );
- mem_chk( c );
- mem_chk( d );
- mem_chk( e );
- mem_chk( f );
- mem_chk( g );
- mem_chk( h );
- mem_chk( j );
- mem_chk( k );
- mem_chk( l );
- mem_chk( m );
- mem_chk( n );
- }
- break;
- case 14:
- for (int64 i=0; i < iterations; i++) {
- Chain* a = root[0];
- Chain* b = root[1];
- Chain* c = root[2];
- Chain* d = root[3];
- Chain* e = root[4];
- Chain* f = root[5];
- Chain* g = root[6];
- Chain* h = root[7];
- Chain* j = root[8];
- Chain* k = root[9];
- Chain* l = root[10];
- Chain* m = root[11];
- Chain* n = root[12];
- Chain* o = root[13];
- while (a != END_OF_CHAIN) {
- a = a->next;
- b = b->next;
- c = c->next;
- d = d->next;
- e = e->next;
- f = f->next;
- g = g->next;
- h = h->next;
- j = j->next;
- k = k->next;
- l = l->next;
- m = m->next;
- n = n->next;
- o = o->next;
- if (prefetch)
+ for (int64 j = 0; j < busy_cycles; j++)
+ asm("nop");
+ }
+ mem_chk(a);
+ mem_chk(b);
+ mem_chk(c);
+ mem_chk(d);
+ mem_chk(e);
+ mem_chk(f);
+ mem_chk(g);
+ mem_chk(h);
+ mem_chk(j);
+ mem_chk(k);
+ mem_chk(l);
+ mem_chk(m);
+ }
+ break;
+ case 13:
+ for (int64 i = 0; i < iterations; i++) {
+ Chain* a = root[0];
+ Chain* b = root[1];
+ Chain* c = root[2];
+ Chain* d = root[3];
+ Chain* e = root[4];
+ Chain* f = root[5];
+ Chain* g = root[6];
+ Chain* h = root[7];
+ Chain* j = root[8];
+ Chain* k = root[9];
+ Chain* l = root[10];
+ Chain* m = root[11];
+ Chain* n = root[12];
+ while (a != END_OF_CHAIN) {
+ a = a->next;
+ b = b->next;
+ c = c->next;
+ d = d->next;
+ e = e->next;
+ f = f->next;
+ g = g->next;
+ h = h->next;
+ j = j->next;
+ k = k->next;
+ l = l->next;
+ m = m->next;
+ n = n->next;
+ if (prefetch)
prefetch(a->next);
- for (int64 j=0; j < busy_cycles; j++)
- asm("nop");
- }
- mem_chk( a );
- mem_chk( b );
- mem_chk( c );
- mem_chk( d );
- mem_chk( e );
- mem_chk( f );
- mem_chk( g );
- mem_chk( h );
- mem_chk( j );
- mem_chk( k );
- mem_chk( l );
- mem_chk( m );
- mem_chk( n );
- mem_chk( o );
- }
- break;
- case 15:
- for (int64 i=0; i < iterations; i++) {
- Chain* a = root[0];
- Chain* b = root[1];
- Chain* c = root[2];
- Chain* d = root[3];
- Chain* e = root[4];
- Chain* f = root[5];
- Chain* g = root[6];
- Chain* h = root[7];
- Chain* j = root[8];
- Chain* k = root[9];
- Chain* l = root[10];
- Chain* m = root[11];
- Chain* n = root[12];
- Chain* o = root[13];
- Chain* p = root[14];
- while (a != END_OF_CHAIN) {
- a = a->next;
- b = b->next;
- c = c->next;
- d = d->next;
- e = e->next;
- f = f->next;
- g = g->next;
- h = h->next;
- j = j->next;
- k = k->next;
- l = l->next;
- m = m->next;
- n = n->next;
- o = o->next;
- p = p->next;
- if (prefetch)
+ for (int64 j = 0; j < busy_cycles; j++)
+ asm("nop");
+ }
+ mem_chk(a);
+ mem_chk(b);
+ mem_chk(c);
+ mem_chk(d);
+ mem_chk(e);
+ mem_chk(f);
+ mem_chk(g);
+ mem_chk(h);
+ mem_chk(j);
+ mem_chk(k);
+ mem_chk(l);
+ mem_chk(m);
+ mem_chk(n);
+ }
+ break;
+ case 14:
+ for (int64 i = 0; i < iterations; i++) {
+ Chain* a = root[0];
+ Chain* b = root[1];
+ Chain* c = root[2];
+ Chain* d = root[3];
+ Chain* e = root[4];
+ Chain* f = root[5];
+ Chain* g = root[6];
+ Chain* h = root[7];
+ Chain* j = root[8];
+ Chain* k = root[9];
+ Chain* l = root[10];
+ Chain* m = root[11];
+ Chain* n = root[12];
+ Chain* o = root[13];
+ while (a != END_OF_CHAIN) {
+ a = a->next;
+ b = b->next;
+ c = c->next;
+ d = d->next;
+ e = e->next;
+ f = f->next;
+ g = g->next;
+ h = h->next;
+ j = j->next;
+ k = k->next;
+ l = l->next;
+ m = m->next;
+ n = n->next;
+ o = o->next;
+ if (prefetch)
prefetch(a->next);
- for (int64 j=0; j < busy_cycles; j++)
- asm("nop");
- }
- mem_chk( a );
- mem_chk( b );
- mem_chk( c );
- mem_chk( d );
- mem_chk( e );
- mem_chk( f );
- mem_chk( g );
- mem_chk( h );
- mem_chk( j );
- mem_chk( k );
- mem_chk( l );
- mem_chk( m );
- mem_chk( n );
- mem_chk( o );
- mem_chk( p );
- }
- break;
- case 16:
- for (int64 i=0; i < iterations; i++) {
- Chain* a = root[0];
- Chain* b = root[1];
- Chain* c = root[2];
- Chain* d = root[3];
- Chain* e = root[4];
- Chain* f = root[5];
- Chain* g = root[6];
- Chain* h = root[7];
- Chain* j = root[8];
- Chain* k = root[9];
- Chain* l = root[10];
- Chain* m = root[11];
- Chain* n = root[12];
- Chain* o = root[13];
- Chain* p = root[14];
- Chain* q = root[15];
- while (a != END_OF_CHAIN) {
- a = a->next;
- b = b->next;
- c = c->next;
- d = d->next;
- e = e->next;
- f = f->next;
- g = g->next;
- h = h->next;
- j = j->next;
- k = k->next;
- l = l->next;
- m = m->next;
- n = n->next;
- o = o->next;
- p = p->next;
- q = q->next;
- if (prefetch)
+ for (int64 j = 0; j < busy_cycles; j++)
+ asm("nop");
+ }
+ mem_chk(a);
+ mem_chk(b);
+ mem_chk(c);
+ mem_chk(d);
+ mem_chk(e);
+ mem_chk(f);
+ mem_chk(g);
+ mem_chk(h);
+ mem_chk(j);
+ mem_chk(k);
+ mem_chk(l);
+ mem_chk(m);
+ mem_chk(n);
+ mem_chk(o);
+ }
+ break;
+ case 15:
+ for (int64 i = 0; i < iterations; i++) {
+ Chain* a = root[0];
+ Chain* b = root[1];
+ Chain* c = root[2];
+ Chain* d = root[3];
+ Chain* e = root[4];
+ Chain* f = root[5];
+ Chain* g = root[6];
+ Chain* h = root[7];
+ Chain* j = root[8];
+ Chain* k = root[9];
+ Chain* l = root[10];
+ Chain* m = root[11];
+ Chain* n = root[12];
+ Chain* o = root[13];
+ Chain* p = root[14];
+ while (a != END_OF_CHAIN) {
+ a = a->next;
+ b = b->next;
+ c = c->next;
+ d = d->next;
+ e = e->next;
+ f = f->next;
+ g = g->next;
+ h = h->next;
+ j = j->next;
+ k = k->next;
+ l = l->next;
+ m = m->next;
+ n = n->next;
+ o = o->next;
+ p = p->next;
+ if (prefetch)
+ prefetch(a->next);
+ for (int64 j = 0; j < busy_cycles; j++)
+ asm("nop");
+ }
+ mem_chk(a);
+ mem_chk(b);
+ mem_chk(c);
+ mem_chk(d);
+ mem_chk(e);
+ mem_chk(f);
+ mem_chk(g);
+ mem_chk(h);
+ mem_chk(j);
+ mem_chk(k);
+ mem_chk(l);
+ mem_chk(m);
+ mem_chk(n);
+ mem_chk(o);
+ mem_chk(p);
+ }
+ break;
+ case 16:
+ for (int64 i = 0; i < iterations; i++) {
+ Chain* a = root[0];
+ Chain* b = root[1];
+ Chain* c = root[2];
+ Chain* d = root[3];
+ Chain* e = root[4];
+ Chain* f = root[5];
+ Chain* g = root[6];
+ Chain* h = root[7];
+ Chain* j = root[8];
+ Chain* k = root[9];
+ Chain* l = root[10];
+ Chain* m = root[11];
+ Chain* n = root[12];
+ Chain* o = root[13];
+ Chain* p = root[14];
+ Chain* q = root[15];
+ while (a != END_OF_CHAIN) {
+ a = a->next;
+ b = b->next;
+ c = c->next;
+ d = d->next;
+ e = e->next;
+ f = f->next;
+ g = g->next;
+ h = h->next;
+ j = j->next;
+ k = k->next;
+ l = l->next;
+ m = m->next;
+ n = n->next;
+ o = o->next;
+ p = p->next;
+ q = q->next;
+ if (prefetch)
prefetch(a->next);
- for (int64 j=0; j < busy_cycles; j++)
- asm("nop");
- }
- mem_chk( a );
- mem_chk( b );
- mem_chk( c );
- mem_chk( d );
- mem_chk( e );
- mem_chk( f );
- mem_chk( g );
- mem_chk( h );
- mem_chk( j );
- mem_chk( k );
- mem_chk( l );
- mem_chk( m );
- mem_chk( n );
- mem_chk( o );
- mem_chk( p );
- mem_chk( q );
+ for (int64 j = 0; j < busy_cycles; j++)
+ asm("nop");
+ }
+ mem_chk(a);
+ mem_chk(b);
+ mem_chk(c);
+ mem_chk(d);
+ mem_chk(e);
+ mem_chk(f);
+ mem_chk(g);
+ mem_chk(h);
+ mem_chk(j);
+ mem_chk(k);
+ mem_chk(l);
+ mem_chk(m);
+ mem_chk(n);
+ mem_chk(o);
+ mem_chk(p);
+ mem_chk(q);
+ }
}
- }
}
- // NOT WRITTEN YET -- DMP
- // JUST A PLACE HOLDER!
+// NOT WRITTEN YET -- DMP
+// JUST A PLACE HOLDER!
Chain*
-Run::stream_mem_init( Chain *mem )
-{
+Run::stream_mem_init(Chain *mem) {
// fprintf(stderr, "made it into stream_mem_init.\n");
// fprintf(stderr, "chains_per_thread = %ld\n", this->exp->chains_per_thread);
// fprintf(stderr, "iterations = %ld\n", this->exp->iterations);
// fprintf(stderr, "bytes_per_chain = %ld\n", this->exp->bytes_per_chain);
// fprintf(stderr, "stride = %ld\n", this->exp->stride);
- int64 local_ops_per_chain = 0;
- double* tmp = (double *) mem;
- int64 refs_per_line = this->exp->bytes_per_line / sizeof(double);
- int64 refs_per_chain = this->exp->bytes_per_chain / sizeof(double);
+ int64 local_ops_per_chain = 0;
+ double* tmp = (double *) mem;
+ int64 refs_per_line = this->exp->bytes_per_line / sizeof(double);
+ int64 refs_per_chain = this->exp->bytes_per_chain / sizeof(double);
// fprintf(stderr, "refs_per_chain = %ld\n", refs_per_chain);
- for (int64 i=0; i < refs_per_chain; i += this->exp->stride*refs_per_line) {
- tmp[i] = 0;
- local_ops_per_chain += 1;
- }
+ for (int64 i = 0; i < refs_per_chain;
+ i += this->exp->stride * refs_per_line) {
+ tmp[i] = 0;
+ local_ops_per_chain += 1;
+ }
- Run::global_mutex.lock();
- Run::_ops_per_chain = local_ops_per_chain;
- Run::global_mutex.unlock();
+ Run::global_mutex.lock();
+ Run::_ops_per_chain = local_ops_per_chain;
+ Run::global_mutex.unlock();
// fprintf(stderr, "made it out of stream_mem_init.\n");
- return mem;
+ return mem;
}
static int64 summ_ck = 0;
-void
-sum_chk( double t )
-{
- if (t != 0) summ_ck += 1;
+void sum_chk(double t) {
+ if (t != 0)
+ summ_ck += 1;
}
- // NOT WRITTEN YET -- DMP
- // JUST A PLACE HOLDER!
-static void
-follow_streams(
- int64 chains_per_thread, // memory loading per thread
- int64 iterations, // number of iterations per experiment
- Chain** root, // root(s) of the chain(s) to follow
- int64 bytes_per_line, // ignored
- int64 bytes_per_chain, // ignored
- int64 stride, // ignored
- int64 busy_cycles, // ignored
- bool prefetch // ignored
-)
-{
- int64 refs_per_line = bytes_per_line / sizeof(double);
- int64 refs_per_chain = bytes_per_chain / sizeof(double);
+// NOT WRITTEN YET -- DMP
+// JUST A PLACE HOLDER!
+static void follow_streams(int64 chains_per_thread, // memory loading per thread
+ int64 iterations, // number of iterations per experiment
+ Chain** root, // root(s) of the chain(s) to follow
+ int64 bytes_per_line, // ignored
+ int64 bytes_per_chain, // ignored
+ int64 stride, // ignored
+ int64 busy_cycles, // ignored
+ bool prefetch // ignored
+ ) {
+ int64 refs_per_line = bytes_per_line / sizeof(double);
+ int64 refs_per_chain = bytes_per_chain / sizeof(double);
- // chase pointers
- switch (chains_per_thread) {
- default:
- case 1:
- for (int64 i=0; i < iterations; i++) {
- double t = 0;
- double* a0 = (double *) root[0];
- for (int64 j=0; j < refs_per_chain; j+=stride*refs_per_line) {
- t += a0[j];
- }
- sum_chk( t );
- }
- break;
- case 2:
- for (int64 i=0; i < iterations; i++) {
- double t = 0;
- double* a0 = (double *) root[0];
- double* a1 = (double *) root[1];
- for (int64 j=0; j < refs_per_chain; j+=stride*refs_per_line) {
- t += a0[j] + a1[j];
- }
- sum_chk( t );
- }
- break;
- case 3:
- for (int64 i=0; i < iterations; i++) {
- double t = 0;
- double* a0 = (double *) root[0];
- double* a1 = (double *) root[1];
- double* a2 = (double *) root[2];
- for (int64 j=0; j < refs_per_chain; j+=stride*refs_per_line) {
- t += a0[j] + a1[j] + a2[j];
- }
- sum_chk( t );
- }
- break;
- case 4:
- for (int64 i=0; i < iterations; i++) {
- double t = 0;
- double* a0 = (double *) root[0];
- double* a1 = (double *) root[1];
- double* a2 = (double *) root[2];
- double* a3 = (double *) root[3];
- for (int64 j=0; j < refs_per_chain; j+=stride*refs_per_line) {
- t += a0[j] + a1[j] + a2[j] + a3[j];
- }
- sum_chk( t );
- }
- break;
- case 5:
- for (int64 i=0; i < iterations; i++) {
- double t = 0;
- double* a0 = (double *) root[0];
- double* a1 = (double *) root[1];
- double* a2 = (double *) root[2];
- double* a3 = (double *) root[3];
- double* a4 = (double *) root[4];
- for (int64 j=0; j < refs_per_chain; j+=stride*refs_per_line) {
- t += a0[j] + a1[j] + a2[j] + a3[j] + a4[j];
- }
- sum_chk( t );
- }
- break;
- case 6:
- for (int64 i=0; i < iterations; i++) {
- double t = 0;
- double* a0 = (double *) root[0];
- double* a1 = (double *) root[1];
- double* a2 = (double *) root[2];
- double* a3 = (double *) root[3];
- double* a4 = (double *) root[4];
- double* a5 = (double *) root[5];
- for (int64 j=0; j < refs_per_chain; j+=stride*refs_per_line) {
- t += a0[j] + a1[j] + a2[j] + a3[j] + a4[j] + a5[j];
- }
- sum_chk( t );
- }
- break;
- case 7:
- for (int64 i=0; i < iterations; i++) {
- double t = 0;
- double* a0 = (double *) root[0];
- double* a1 = (double *) root[1];
- double* a2 = (double *) root[2];
- double* a3 = (double *) root[3];
- double* a4 = (double *) root[4];
- double* a5 = (double *) root[5];
- double* a6 = (double *) root[6];
- for (int64 j=0; j < refs_per_chain; j+=stride*refs_per_line) {
- t += a0[j] + a1[j] + a2[j] + a3[j] + a4[j] + a5[j] + a6[j];
- }
- sum_chk( t );
- }
- break;
- case 8:
- for (int64 i=0; i < iterations; i++) {
- double t = 0;
- double* a0 = (double *) root[0];
- double* a1 = (double *) root[1];
- double* a2 = (double *) root[2];
- double* a3 = (double *) root[3];
- double* a4 = (double *) root[4];
- double* a5 = (double *) root[5];
- double* a6 = (double *) root[6];
- double* a7 = (double *) root[7];
- for (int64 j=0; j < refs_per_chain; j+=stride*refs_per_line) {
- t += a0[j] + a1[j] + a2[j] + a3[j] + a4[j] + a5[j] + a6[j] + a7[j];
- }
- sum_chk( t );
- }
- break;
- case 9:
- for (int64 i=0; i < iterations; i++) {
- double t = 0;
- double* a0 = (double *) root[0];
- double* a1 = (double *) root[1];
- double* a2 = (double *) root[2];
- double* a3 = (double *) root[3];
- double* a4 = (double *) root[4];
- double* a5 = (double *) root[5];
- double* a6 = (double *) root[6];
- double* a7 = (double *) root[7];
- double* a8 = (double *) root[8];
- for (int64 j=0; j < refs_per_chain; j+=stride*refs_per_line) {
- t += a0[j] + a1[j] + a2[j] + a3[j] + a4[j] + a5[j] + a6[j] + a7[j] +
- a8[j];
- }
- sum_chk( t );
- }
- break;
- case 10:
- for (int64 i=0; i < iterations; i++) {
- double t = 0;
- double* a0 = (double *) root[0];
- double* a1 = (double *) root[1];
- double* a2 = (double *) root[2];
- double* a3 = (double *) root[3];
- double* a4 = (double *) root[4];
- double* a5 = (double *) root[5];
- double* a6 = (double *) root[6];
- double* a7 = (double *) root[7];
- double* a8 = (double *) root[8];
- double* a9 = (double *) root[9];
- for (int64 j=0; j < refs_per_chain; j+=stride*refs_per_line) {
- t += a0[j] + a1[j] + a2[j] + a3[j] + a4[j] + a5[j] + a6[j] + a7[j] +
- a8[j] + a9[j];
- }
- sum_chk( t );
- }
- break;
- case 11:
- for (int64 i=0; i < iterations; i++) {
- double t = 0;
- double* a0 = (double *) root[ 0];
- double* a1 = (double *) root[ 1];
- double* a2 = (double *) root[ 2];
- double* a3 = (double *) root[ 3];
- double* a4 = (double *) root[ 4];
- double* a5 = (double *) root[ 5];
- double* a6 = (double *) root[ 6];
- double* a7 = (double *) root[ 7];
- double* a8 = (double *) root[ 8];
- double* a9 = (double *) root[ 9];
- double* a10 = (double *) root[10];
- for (int64 j=0; j < refs_per_chain; j+=stride*refs_per_line) {
- t += a0[j] + a1[j] + a2 [j] + a3[j] + a4[j] + a5[j] + a6[j] + a7[j] +
- a8[j] + a9[j] + a10[j];
- }
- sum_chk( t );
- }
- break;
- case 12:
- for (int64 i=0; i < iterations; i++) {
- double t = 0;
- double* a0 = (double *) root[ 0];
- double* a1 = (double *) root[ 1];
- double* a2 = (double *) root[ 2];
- double* a3 = (double *) root[ 3];
- double* a4 = (double *) root[ 4];
- double* a5 = (double *) root[ 5];
- double* a6 = (double *) root[ 6];
- double* a7 = (double *) root[ 7];
- double* a8 = (double *) root[ 8];
- double* a9 = (double *) root[ 9];
- double* a10 = (double *) root[10];
- double* a11 = (double *) root[11];
- for (int64 j=0; j < refs_per_chain; j+=stride*refs_per_line) {
- t += a0[j] + a1[j] + a2 [j] + a3 [j] + a4[j] + a5[j] + a6[j] + a7[j] +
- a8[j] + a9[j] + a10[j] + a11[j];
- }
- sum_chk( t );
- }
- break;
- case 13:
- for (int64 i=0; i < iterations; i++) {
- double t = 0;
- double* a0 = (double *) root[ 0];
- double* a1 = (double *) root[ 1];
- double* a2 = (double *) root[ 2];
- double* a3 = (double *) root[ 3];
- double* a4 = (double *) root[ 4];
- double* a5 = (double *) root[ 5];
- double* a6 = (double *) root[ 6];
- double* a7 = (double *) root[ 7];
- double* a8 = (double *) root[ 8];
- double* a9 = (double *) root[ 9];
- double* a10 = (double *) root[10];
- double* a11 = (double *) root[11];
- double* a12 = (double *) root[12];
- for (int64 j=0; j < refs_per_chain; j+=stride*refs_per_line) {
- t += a0[j] + a1[j] + a2 [j] + a3 [j] + a4 [j] + a5[j] + a6[j] + a7[j] +
- a8[j] + a9[j] + a10[j] + a11[j] + a12[j];
- }
- sum_chk( t );
- }
- break;
- case 14:
- for (int64 i=0; i < iterations; i++) {
- double t = 0;
- double* a0 = (double *) root[ 0];
- double* a1 = (double *) root[ 1];
- double* a2 = (double *) root[ 2];
- double* a3 = (double *) root[ 3];
- double* a4 = (double *) root[ 4];
- double* a5 = (double *) root[ 5];
- double* a6 = (double *) root[ 6];
- double* a7 = (double *) root[ 7];
- double* a8 = (double *) root[ 8];
- double* a9 = (double *) root[ 9];
- double* a10 = (double *) root[10];
- double* a11 = (double *) root[11];
- double* a12 = (double *) root[12];
- double* a13 = (double *) root[13];
- for (int64 j=0; j < refs_per_chain; j+=stride*refs_per_line) {
- t += a0[j] + a1[j] + a2 [j] + a3 [j] + a4 [j] + a5 [j] + a6[j] + a7[j] +
- a8[j] + a9[j] + a10[j] + a11[j] + a12[j] + a13[j];
- }
- sum_chk( t );
- }
- break;
- case 15:
- for (int64 i=0; i < iterations; i++) {
- double t = 0;
- double* a0 = (double *) root[ 0];
- double* a1 = (double *) root[ 1];
- double* a2 = (double *) root[ 2];
- double* a3 = (double *) root[ 3];
- double* a4 = (double *) root[ 4];
- double* a5 = (double *) root[ 5];
- double* a6 = (double *) root[ 6];
- double* a7 = (double *) root[ 7];
- double* a8 = (double *) root[ 8];
- double* a9 = (double *) root[ 9];
- double* a10 = (double *) root[10];
- double* a11 = (double *) root[11];
- double* a12 = (double *) root[12];
- double* a13 = (double *) root[13];
- double* a14 = (double *) root[14];
- for (int64 j=0; j < refs_per_chain; j+=stride*refs_per_line) {
- t += a0[j] + a1[j] + a2 [j] + a3 [j] + a4 [j] + a5 [j] + a6 [j] + a7[j] +
- a8[j] + a9[j] + a10[j] + a11[j] + a12[j] + a13[j] + a14[j];
- }
- sum_chk( t );
- }
- break;
- case 16:
- for (int64 i=0; i < iterations; i++) {
- double t = 0;
- double* a0 = (double *) root[ 0];
- double* a1 = (double *) root[ 1];
- double* a2 = (double *) root[ 2];
- double* a3 = (double *) root[ 3];
- double* a4 = (double *) root[ 4];
- double* a5 = (double *) root[ 5];
- double* a6 = (double *) root[ 6];
- double* a7 = (double *) root[ 7];
- double* a8 = (double *) root[ 8];
- double* a9 = (double *) root[ 9];
- double* a10 = (double *) root[10];
- double* a11 = (double *) root[11];
- double* a12 = (double *) root[12];
- double* a13 = (double *) root[13];
- double* a14 = (double *) root[14];
- double* a15 = (double *) root[15];
- for (int64 j=0; j < refs_per_chain; j+=stride*refs_per_line) {
- t += a0[j] + a1[j] + a2 [j] + a3 [j] + a4 [j] + a5 [j] + a6 [j] + a7 [j] +
- a8[j] + a9[j] + a10[j] + a11[j] + a12[j] + a13[j] + a14[j] + a15[j];
- }
- sum_chk( t );
+ // chase pointers
+ switch (chains_per_thread) {
+ default:
+ case 1:
+ for (int64 i = 0; i < iterations; i++) {
+ double t = 0;
+ double* a0 = (double *) root[0];
+ for (int64 j = 0; j < refs_per_chain; j += stride * refs_per_line) {
+ t += a0[j];
+ }
+ sum_chk(t);
+ }
+ break;
+ case 2:
+ for (int64 i = 0; i < iterations; i++) {
+ double t = 0;
+ double* a0 = (double *) root[0];
+ double* a1 = (double *) root[1];
+ for (int64 j = 0; j < refs_per_chain; j += stride * refs_per_line) {
+ t += a0[j] + a1[j];
+ }
+ sum_chk(t);
+ }
+ break;
+ case 3:
+ for (int64 i = 0; i < iterations; i++) {
+ double t = 0;
+ double* a0 = (double *) root[0];
+ double* a1 = (double *) root[1];
+ double* a2 = (double *) root[2];
+ for (int64 j = 0; j < refs_per_chain; j += stride * refs_per_line) {
+ t += a0[j] + a1[j] + a2[j];
+ }
+ sum_chk(t);
+ }
+ break;
+ case 4:
+ for (int64 i = 0; i < iterations; i++) {
+ double t = 0;
+ double* a0 = (double *) root[0];
+ double* a1 = (double *) root[1];
+ double* a2 = (double *) root[2];
+ double* a3 = (double *) root[3];
+ for (int64 j = 0; j < refs_per_chain; j += stride * refs_per_line) {
+ t += a0[j] + a1[j] + a2[j] + a3[j];
+ }
+ sum_chk(t);
+ }
+ break;
+ case 5:
+ for (int64 i = 0; i < iterations; i++) {
+ double t = 0;
+ double* a0 = (double *) root[0];
+ double* a1 = (double *) root[1];
+ double* a2 = (double *) root[2];
+ double* a3 = (double *) root[3];
+ double* a4 = (double *) root[4];
+ for (int64 j = 0; j < refs_per_chain; j += stride * refs_per_line) {
+ t += a0[j] + a1[j] + a2[j] + a3[j] + a4[j];
+ }
+ sum_chk(t);
+ }
+ break;
+ case 6:
+ for (int64 i = 0; i < iterations; i++) {
+ double t = 0;
+ double* a0 = (double *) root[0];
+ double* a1 = (double *) root[1];
+ double* a2 = (double *) root[2];
+ double* a3 = (double *) root[3];
+ double* a4 = (double *) root[4];
+ double* a5 = (double *) root[5];
+ for (int64 j = 0; j < refs_per_chain; j += stride * refs_per_line) {
+ t += a0[j] + a1[j] + a2[j] + a3[j] + a4[j] + a5[j];
+ }
+ sum_chk(t);
+ }
+ break;
+ case 7:
+ for (int64 i = 0; i < iterations; i++) {
+ double t = 0;
+ double* a0 = (double *) root[0];
+ double* a1 = (double *) root[1];
+ double* a2 = (double *) root[2];
+ double* a3 = (double *) root[3];
+ double* a4 = (double *) root[4];
+ double* a5 = (double *) root[5];
+ double* a6 = (double *) root[6];
+ for (int64 j = 0; j < refs_per_chain; j += stride * refs_per_line) {
+ t += a0[j] + a1[j] + a2[j] + a3[j] + a4[j] + a5[j] + a6[j];
+ }
+ sum_chk(t);
+ }
+ break;
+ case 8:
+ for (int64 i = 0; i < iterations; i++) {
+ double t = 0;
+ double* a0 = (double *) root[0];
+ double* a1 = (double *) root[1];
+ double* a2 = (double *) root[2];
+ double* a3 = (double *) root[3];
+ double* a4 = (double *) root[4];
+ double* a5 = (double *) root[5];
+ double* a6 = (double *) root[6];
+ double* a7 = (double *) root[7];
+ for (int64 j = 0; j < refs_per_chain; j += stride * refs_per_line) {
+ t += a0[j] + a1[j] + a2[j] + a3[j] + a4[j] + a5[j] + a6[j]
+ + a7[j];
+ }
+ sum_chk(t);
+ }
+ break;
+ case 9:
+ for (int64 i = 0; i < iterations; i++) {
+ double t = 0;
+ double* a0 = (double *) root[0];
+ double* a1 = (double *) root[1];
+ double* a2 = (double *) root[2];
+ double* a3 = (double *) root[3];
+ double* a4 = (double *) root[4];
+ double* a5 = (double *) root[5];
+ double* a6 = (double *) root[6];
+ double* a7 = (double *) root[7];
+ double* a8 = (double *) root[8];
+ for (int64 j = 0; j < refs_per_chain; j += stride * refs_per_line) {
+ t += a0[j] + a1[j] + a2[j] + a3[j] + a4[j] + a5[j] + a6[j]
+ + a7[j] + a8[j];
+ }
+ sum_chk(t);
+ }
+ break;
+ case 10:
+ for (int64 i = 0; i < iterations; i++) {
+ double t = 0;
+ double* a0 = (double *) root[0];
+ double* a1 = (double *) root[1];
+ double* a2 = (double *) root[2];
+ double* a3 = (double *) root[3];
+ double* a4 = (double *) root[4];
+ double* a5 = (double *) root[5];
+ double* a6 = (double *) root[6];
+ double* a7 = (double *) root[7];
+ double* a8 = (double *) root[8];
+ double* a9 = (double *) root[9];
+ for (int64 j = 0; j < refs_per_chain; j += stride * refs_per_line) {
+ t += a0[j] + a1[j] + a2[j] + a3[j] + a4[j] + a5[j] + a6[j]
+ + a7[j] + a8[j] + a9[j];
+ }
+ sum_chk(t);
+ }
+ break;
+ case 11:
+ for (int64 i = 0; i < iterations; i++) {
+ double t = 0;
+ double* a0 = (double *) root[0];
+ double* a1 = (double *) root[1];
+ double* a2 = (double *) root[2];
+ double* a3 = (double *) root[3];
+ double* a4 = (double *) root[4];
+ double* a5 = (double *) root[5];
+ double* a6 = (double *) root[6];
+ double* a7 = (double *) root[7];
+ double* a8 = (double *) root[8];
+ double* a9 = (double *) root[9];
+ double* a10 = (double *) root[10];
+ for (int64 j = 0; j < refs_per_chain; j += stride * refs_per_line) {
+ t += a0[j] + a1[j] + a2[j] + a3[j] + a4[j] + a5[j] + a6[j]
+ + a7[j] + a8[j] + a9[j] + a10[j];
+ }
+ sum_chk(t);
+ }
+ break;
+ case 12:
+ for (int64 i = 0; i < iterations; i++) {
+ double t = 0;
+ double* a0 = (double *) root[0];
+ double* a1 = (double *) root[1];
+ double* a2 = (double *) root[2];
+ double* a3 = (double *) root[3];
+ double* a4 = (double *) root[4];
+ double* a5 = (double *) root[5];
+ double* a6 = (double *) root[6];
+ double* a7 = (double *) root[7];
+ double* a8 = (double *) root[8];
+ double* a9 = (double *) root[9];
+ double* a10 = (double *) root[10];
+ double* a11 = (double *) root[11];
+ for (int64 j = 0; j < refs_per_chain; j += stride * refs_per_line) {
+ t += a0[j] + a1[j] + a2[j] + a3[j] + a4[j] + a5[j] + a6[j]
+ + a7[j] + a8[j] + a9[j] + a10[j] + a11[j];
+ }
+ sum_chk(t);
+ }
+ break;
+ case 13:
+ for (int64 i = 0; i < iterations; i++) {
+ double t = 0;
+ double* a0 = (double *) root[0];
+ double* a1 = (double *) root[1];
+ double* a2 = (double *) root[2];
+ double* a3 = (double *) root[3];
+ double* a4 = (double *) root[4];
+ double* a5 = (double *) root[5];
+ double* a6 = (double *) root[6];
+ double* a7 = (double *) root[7];
+ double* a8 = (double *) root[8];
+ double* a9 = (double *) root[9];
+ double* a10 = (double *) root[10];
+ double* a11 = (double *) root[11];
+ double* a12 = (double *) root[12];
+ for (int64 j = 0; j < refs_per_chain; j += stride * refs_per_line) {
+ t += a0[j] + a1[j] + a2[j] + a3[j] + a4[j] + a5[j] + a6[j]
+ + a7[j] + a8[j] + a9[j] + a10[j] + a11[j] + a12[j];
+ }
+ sum_chk(t);
+ }
+ break;
+ case 14:
+ for (int64 i = 0; i < iterations; i++) {
+ double t = 0;
+ double* a0 = (double *) root[0];
+ double* a1 = (double *) root[1];
+ double* a2 = (double *) root[2];
+ double* a3 = (double *) root[3];
+ double* a4 = (double *) root[4];
+ double* a5 = (double *) root[5];
+ double* a6 = (double *) root[6];
+ double* a7 = (double *) root[7];
+ double* a8 = (double *) root[8];
+ double* a9 = (double *) root[9];
+ double* a10 = (double *) root[10];
+ double* a11 = (double *) root[11];
+ double* a12 = (double *) root[12];
+ double* a13 = (double *) root[13];
+ for (int64 j = 0; j < refs_per_chain; j += stride * refs_per_line) {
+ t += a0[j] + a1[j] + a2[j] + a3[j] + a4[j] + a5[j] + a6[j]
+ + a7[j] + a8[j] + a9[j] + a10[j] + a11[j] + a12[j]
+ + a13[j];
+ }
+ sum_chk(t);
+ }
+ break;
+ case 15:
+ for (int64 i = 0; i < iterations; i++) {
+ double t = 0;
+ double* a0 = (double *) root[0];
+ double* a1 = (double *) root[1];
+ double* a2 = (double *) root[2];
+ double* a3 = (double *) root[3];
+ double* a4 = (double *) root[4];
+ double* a5 = (double *) root[5];
+ double* a6 = (double *) root[6];
+ double* a7 = (double *) root[7];
+ double* a8 = (double *) root[8];
+ double* a9 = (double *) root[9];
+ double* a10 = (double *) root[10];
+ double* a11 = (double *) root[11];
+ double* a12 = (double *) root[12];
+ double* a13 = (double *) root[13];
+ double* a14 = (double *) root[14];
+ for (int64 j = 0; j < refs_per_chain; j += stride * refs_per_line) {
+ t += a0[j] + a1[j] + a2[j] + a3[j] + a4[j] + a5[j] + a6[j]
+ + a7[j] + a8[j] + a9[j] + a10[j] + a11[j] + a12[j]
+ + a13[j] + a14[j];
+ }
+ sum_chk(t);
+ }
+ break;
+ case 16:
+ for (int64 i = 0; i < iterations; i++) {
+ double t = 0;
+ double* a0 = (double *) root[0];
+ double* a1 = (double *) root[1];
+ double* a2 = (double *) root[2];
+ double* a3 = (double *) root[3];
+ double* a4 = (double *) root[4];
+ double* a5 = (double *) root[5];
+ double* a6 = (double *) root[6];
+ double* a7 = (double *) root[7];
+ double* a8 = (double *) root[8];
+ double* a9 = (double *) root[9];
+ double* a10 = (double *) root[10];
+ double* a11 = (double *) root[11];
+ double* a12 = (double *) root[12];
+ double* a13 = (double *) root[13];
+ double* a14 = (double *) root[14];
+ double* a15 = (double *) root[15];
+ for (int64 j = 0; j < refs_per_chain; j += stride * refs_per_line) {
+ t += a0[j] + a1[j] + a2[j] + a3[j] + a4[j] + a5[j] + a6[j]
+ + a7[j] + a8[j] + a9[j] + a10[j] + a11[j] + a12[j]
+ + a13[j] + a14[j] + a15[j];
+ }
+ sum_chk(t);
+ }
+ break;
}
- break;
- }
}