From c108197c20aa7b93849e383a3aaaf7b2bba30405 Mon Sep 17 00:00:00 2001 From: Tim Besard Date: Wed, 2 Nov 2011 09:13:38 +0100 Subject: Formatting the source. --- src/Run.cpp | 2259 ++++++++++++++++++++++++++++++----------------------------- 1 file changed, 1130 insertions(+), 1129 deletions(-) (limited to 'src/Run.cpp') diff --git a/src/Run.cpp b/src/Run.cpp index c774b99..24435b4 100644 --- a/src/Run.cpp +++ b/src/Run.cpp @@ -9,7 +9,6 @@ * Douglas M. Pase - initial API and implementation * *******************************************************************************/ - #include #include #include @@ -24,1253 +23,1255 @@ #include "Timer.h" #include "SpinBarrier.h" +static double max(double v1, double v2); +static double min(double v1, double v2); +static void chase_pointers(int64 chains_per_thread, int64 iterations, + Chain** root, int64 bytes_per_line, int64 bytes_per_chain, int64 stride, + int64 busy_cycles, bool prefetch); +static void follow_streams(int64 chains_per_thread, int64 iterations, + Chain** root, int64 bytes_per_line, int64 bytes_per_chain, int64 stride, + int64 busy_cycles, bool prefetch); +static void (*run_benchmark)(int64 chains_per_thread, int64 iterations, + Chain** root, int64 bytes_per_line, int64 bytes_per_chain, int64 stride, + int64 busy_cycles, bool prefetch) = chase_pointers; -static double max( double v1, double v2 ); -static double min( double v1, double v2 ); -static void chase_pointers(int64 chains_per_thread, int64 iterations, Chain** root, int64 bytes_per_line, int64 bytes_per_chain, int64 stride, int64 busy_cycles, bool prefetch); -static void follow_streams(int64 chains_per_thread, int64 iterations, Chain** root, int64 bytes_per_line, int64 bytes_per_chain, int64 stride, int64 busy_cycles, bool prefetch); -static void (*run_benchmark)(int64 chains_per_thread, int64 iterations, Chain** root, int64 bytes_per_line, int64 bytes_per_chain, int64 stride, int64 busy_cycles, bool prefetch) = chase_pointers; - -Lock Run::global_mutex; -int64 Run::_ops_per_chain = 0; -double Run::_seconds = 1E9; +Lock Run::global_mutex; +int64 Run::_ops_per_chain = 0; +double Run::_seconds = 1E9; #define prefetch(x) __builtin_prefetch(x) -Run::Run() -: exp(NULL), bp(NULL) -{ +Run::Run() : + exp(NULL), bp(NULL) { } -Run::~Run() -{ +Run::~Run() { } -void -Run::set( Experiment &e, SpinBarrier* sbp ) -{ - this->exp = &e; - this->bp = sbp; +void Run::set(Experiment &e, SpinBarrier* sbp) { + this->exp = &e; + this->bp = sbp; } -int -Run::run() -{ - // first allocate all memory for the chains, - // making sure it is allocated within the - // intended numa domains - Chain** chain_memory = new Chain* [ this->exp->chains_per_thread ]; - Chain** root = new Chain* [ this->exp->chains_per_thread ]; +int Run::run() { + // first allocate all memory for the chains, + // making sure it is allocated within the + // intended numa domains + Chain** chain_memory = new Chain*[this->exp->chains_per_thread]; + Chain** root = new Chain*[this->exp->chains_per_thread]; #if defined(NUMA) - // establish the node id where this thread - // will run. threads are mapped to nodes - // by the set-up code for Experiment. - int run_node_id = this->exp->thread_domain[this->thread_id()]; - numa_run_on_node(run_node_id); + // establish the node id where this thread + // will run. threads are mapped to nodes + // by the set-up code for Experiment. + int run_node_id = this->exp->thread_domain[this->thread_id()]; + numa_run_on_node(run_node_id); - // establish the node id where this thread's - // memory will be allocated. - for (int i=0; i < this->exp->chains_per_thread; i++) { - int alloc_node_id = this->exp->chain_domain[this->thread_id()][i]; - nodemask_t alloc_mask; - nodemask_zero(&alloc_mask); - nodemask_set(&alloc_mask, alloc_node_id); - numa_set_membind(&alloc_mask); + // establish the node id where this thread's + // memory will be allocated. + for (int i=0; i < this->exp->chains_per_thread; i++) { + int alloc_node_id = this->exp->chain_domain[this->thread_id()][i]; + nodemask_t alloc_mask; + nodemask_zero(&alloc_mask); + nodemask_set(&alloc_mask, alloc_node_id); + numa_set_membind(&alloc_mask); - chain_memory[i] = new Chain[ this->exp->links_per_chain ]; - } + chain_memory[i] = new Chain[ this->exp->links_per_chain ]; + } #else - for (int i=0; i < this->exp->chains_per_thread; i++) { - chain_memory[i] = new Chain[ this->exp->links_per_chain ]; - } + for (int i = 0; i < this->exp->chains_per_thread; i++) { + chain_memory[i] = new Chain[this->exp->links_per_chain]; + } #endif - // initialize the chains and - // select the function that - // will execute the tests - for (int i=0; i < this->exp->chains_per_thread; i++) { - if (this->exp->access_pattern == Experiment::RANDOM) { - root[i] = random_mem_init( chain_memory[i] ); - run_benchmark = chase_pointers; - } else if (this->exp->access_pattern == Experiment::STRIDED) { - if (0 < this->exp->stride) { - root[i] = forward_mem_init( chain_memory[i] ); - } else { - root[i] = reverse_mem_init( chain_memory[i] ); - } - run_benchmark = chase_pointers; - } else if (this->exp->access_pattern == Experiment::STREAM) { - root[i] = stream_mem_init( chain_memory[i] ); - run_benchmark = follow_streams; + // initialize the chains and + // select the function that + // will execute the tests + for (int i = 0; i < this->exp->chains_per_thread; i++) { + if (this->exp->access_pattern == Experiment::RANDOM) { + root[i] = random_mem_init(chain_memory[i]); + run_benchmark = chase_pointers; + } else if (this->exp->access_pattern == Experiment::STRIDED) { + if (0 < this->exp->stride) { + root[i] = forward_mem_init(chain_memory[i]); + } else { + root[i] = reverse_mem_init(chain_memory[i]); + } + run_benchmark = chase_pointers; + } else if (this->exp->access_pattern == Experiment::STREAM) { + root[i] = stream_mem_init(chain_memory[i]); + run_benchmark = follow_streams; + } } - } - if (this->exp->iterations <= 0) { - volatile static double istart = 0; - volatile static double istop = 0; - volatile static double elapsed = 0; - volatile static int64 iters = 1; - volatile double bound = max(0.2, 10 * Timer::resolution()); - for (iters=1; elapsed <= bound; iters=iters<<1) { - this->bp->barrier(); + if (this->exp->iterations <= 0) { + volatile static double istart = 0; + volatile static double istop = 0; + volatile static double elapsed = 0; + volatile static int64 iters = 1; + volatile double bound = max(0.2, 10 * Timer::resolution()); + for (iters = 1; elapsed <= bound; iters = iters << 1) { + this->bp->barrier(); - // start timer - if (this->thread_id() == 0) { - istart = Timer::seconds(); - } - this->bp->barrier(); + // start timer + if (this->thread_id() == 0) { + istart = Timer::seconds(); + } + this->bp->barrier(); - // chase pointers - run_benchmark(this->exp->chains_per_thread, iters, root, this->exp->bytes_per_line, this->exp->bytes_per_chain, this->exp->stride, this->exp->busy_cycles, this->exp->prefetch); + // chase pointers + run_benchmark(this->exp->chains_per_thread, iters, root, + this->exp->bytes_per_line, this->exp->bytes_per_chain, + this->exp->stride, this->exp->busy_cycles, + this->exp->prefetch); - // barrier - this->bp->barrier(); + // barrier + this->bp->barrier(); - // stop timer - if (this->thread_id() == 0) { - istop = Timer::seconds(); - elapsed = istop - istart; - } - this->bp->barrier(); - } + // stop timer + if (this->thread_id() == 0) { + istop = Timer::seconds(); + elapsed = istop - istart; + } + this->bp->barrier(); + } - // calculate the number of iterations - if (this->thread_id() == 0) { - if (0 < this->exp->seconds) { - this->exp->iterations = max(1, 0.9999 + 0.5 * this->exp->seconds * iters / elapsed); - } else { - this->exp->iterations = max(1, 0.9999 + iters / elapsed); - } + // calculate the number of iterations + if (this->thread_id() == 0) { + if (0 < this->exp->seconds) { + this->exp->iterations = max(1, + 0.9999 + 0.5 * this->exp->seconds * iters / elapsed); + } else { + this->exp->iterations = max(1, 0.9999 + iters / elapsed); + } + } + this->bp->barrier(); } - this->bp->barrier(); - } #if defined(UNDEFINED) #endif - // barrier - for (int e=0; e < this->exp->experiments; e++) { - this->bp->barrier(); + // barrier + for (int e = 0; e < this->exp->experiments; e++) { + this->bp->barrier(); - // start timer - double start = 0; - if (this->thread_id() == 0) start = Timer::seconds(); - this->bp->barrier(); + // start timer + double start = 0; + if (this->thread_id() == 0) + start = Timer::seconds(); + this->bp->barrier(); - // chase pointers - run_benchmark(this->exp->chains_per_thread, this->exp->iterations, root, this->exp->bytes_per_line, this->exp->bytes_per_chain, this->exp->stride, this->exp->busy_cycles, this->exp->prefetch); + // chase pointers + run_benchmark(this->exp->chains_per_thread, this->exp->iterations, root, + this->exp->bytes_per_line, this->exp->bytes_per_chain, + this->exp->stride, this->exp->busy_cycles, this->exp->prefetch); - // barrier - this->bp->barrier(); + // barrier + this->bp->barrier(); - // stop timer - double stop = 0; - if (this->thread_id() == 0) stop = Timer::seconds(); - this->bp->barrier(); + // stop timer + double stop = 0; + if (this->thread_id() == 0) + stop = Timer::seconds(); + this->bp->barrier(); - if (0 <= e) { - if (this->thread_id() == 0) { - double delta = stop - start; - if (0 < delta) { - Run::_seconds = min( Run::_seconds, delta ); + if (0 <= e) { + if (this->thread_id() == 0) { + double delta = stop - start; + if (0 < delta) { + Run::_seconds = min(Run::_seconds, delta); + } + } } - } } - } - this->bp->barrier(); + this->bp->barrier(); - for (int i=0; i < this->exp->chains_per_thread; i++) { - if (chain_memory[i] != NULL) delete [] chain_memory[i]; - } - if (chain_memory != NULL) delete [] chain_memory; + for (int i = 0; i < this->exp->chains_per_thread; i++) { + if (chain_memory[i] != NULL + ) delete[] chain_memory[i]; + } + if (chain_memory != NULL + ) delete[] chain_memory; - return 0; + return 0; } int dummy = 0; -void -Run::mem_check( Chain *m ) -{ - if (m == NULL) dummy += 1; +void Run::mem_check(Chain *m) { + if (m == NULL + ) dummy += 1; } -static double -max( double v1, double v2 ) -{ - if (v1 < v2) return v2; - return v1; +static double max(double v1, double v2) { + if (v1 < v2) + return v2; + return v1; } -static double -min( double v1, double v2 ) -{ - if (v2 < v1) return v2; - return v1; +static double min(double v1, double v2) { + if (v2 < v1) + return v2; + return v1; } - // exclude 2 and mersienne primes, i.e., - // primes of the form 2**n - 1, e.g., - // 3, 7, 31, 127 -static const int prime_table[] = { 5, 11, 13, 17, 19, 23, 37, 41, 43, 47, - 53, 61, 71, 73, 79, 83, 89, 97, 101, 103, 109, 113, 131, 137, 139, 149, - 151, 157, 163, }; +// exclude 2 and mersienne primes, i.e., +// primes of the form 2**n - 1, e.g., +// 3, 7, 31, 127 +static const int prime_table[] = { 5, 11, 13, 17, 19, 23, 37, 41, 43, 47, 53, + 61, 71, 73, 79, 83, 89, 97, 101, 103, 109, 113, 131, 137, 139, 149, 151, + 157, 163, }; static const int prime_table_size = sizeof prime_table / sizeof prime_table[0]; Chain* -Run::random_mem_init( Chain *mem ) -{ - // initialize pointers -- - // choose a page at random, then use - // one pointer from each cache line - // within the page. all pages and - // cache lines are chosen at random. - Chain* root = END_OF_CHAIN; - Chain* prev = END_OF_CHAIN; - int link_within_line = 0; - int64 local_ops_per_chain = 0; +Run::random_mem_init(Chain *mem) { + // initialize pointers -- + // choose a page at random, then use + // one pointer from each cache line + // within the page. all pages and + // cache lines are chosen at random. + Chain* root = END_OF_CHAIN; + Chain* prev = END_OF_CHAIN; + int link_within_line = 0; + int64 local_ops_per_chain = 0; - // we must set a lock because random() - // is not thread safe - Run::global_mutex.lock(); - setstate(this->exp->random_state[this->thread_id()]); - int page_factor = prime_table[ random() % prime_table_size ]; - int page_offset = random() % this->exp->pages_per_chain; - Run::global_mutex.unlock(); - - // loop through the pages - for (int i=0; i < this->exp->pages_per_chain; i++) { - int page = (page_factor * i + page_offset) % this->exp->pages_per_chain; + // we must set a lock because random() + // is not thread safe Run::global_mutex.lock(); setstate(this->exp->random_state[this->thread_id()]); - int line_factor = prime_table[ random() % prime_table_size ]; - int line_offset = random() % this->exp->lines_per_page; + int page_factor = prime_table[random() % prime_table_size]; + int page_offset = random() % this->exp->pages_per_chain; Run::global_mutex.unlock(); - // loop through the lines within a page - for (int j=0; j < this->exp->lines_per_page; j++) { - int line_within_page = (line_factor * j + line_offset) % this->exp->lines_per_page; - int link = page * this->exp->links_per_page + line_within_page * this->exp->links_per_line + link_within_line; + // loop through the pages + for (int i = 0; i < this->exp->pages_per_chain; i++) { + int page = (page_factor * i + page_offset) % this->exp->pages_per_chain; + Run::global_mutex.lock(); + setstate(this->exp->random_state[this->thread_id()]); + int line_factor = prime_table[random() % prime_table_size]; + int line_offset = random() % this->exp->lines_per_page; + Run::global_mutex.unlock(); + + // loop through the lines within a page + for (int j = 0; j < this->exp->lines_per_page; j++) { + int line_within_page = (line_factor * j + line_offset) + % this->exp->lines_per_page; + int link = page * this->exp->links_per_page + + line_within_page * this->exp->links_per_line + + link_within_line; - if (root == END_OF_CHAIN) { + if (root == END_OF_CHAIN) { // printf("root = %d(%d)[0x%x].\n", page, line_within_page, mem+link); - prev = root = mem + link; - local_ops_per_chain += 1; - } else { + prev = root = mem + link; + local_ops_per_chain += 1; + } else { // printf("0x%x = %d(%d)[0x%x].\n", prev, page, line_within_page, mem+link); - prev->next = mem + link; - prev = prev->next; - local_ops_per_chain += 1; - } + prev->next = mem + link; + prev = prev->next; + local_ops_per_chain += 1; + } + } } - } - Run::global_mutex.lock(); - Run::_ops_per_chain = local_ops_per_chain; - Run::global_mutex.unlock(); + Run::global_mutex.lock(); + Run::_ops_per_chain = local_ops_per_chain; + Run::global_mutex.unlock(); - return root; + return root; } Chain* -Run::forward_mem_init( Chain *mem ) -{ - Chain* root = END_OF_CHAIN; - Chain* prev = END_OF_CHAIN; - int link_within_line = 0; - int64 local_ops_per_chain = 0; +Run::forward_mem_init(Chain *mem) { + Chain* root = END_OF_CHAIN; + Chain* prev = END_OF_CHAIN; + int link_within_line = 0; + int64 local_ops_per_chain = 0; - for (int i=0; i < this->exp->lines_per_chain; i += this->exp->stride) { - int link = i * this->exp->links_per_line + link_within_line; - if (root == NULL) { + for (int i = 0; i < this->exp->lines_per_chain; i += this->exp->stride) { + int link = i * this->exp->links_per_line + link_within_line; + if (root == NULL) { // printf("root = %d(%d)[0x%x].\n", page, line_within_page, mem+link); - prev = root = mem + link; - local_ops_per_chain += 1; - } else { + prev = root = mem + link; + local_ops_per_chain += 1; + } else { // printf("0x%x = %d(%d)[0x%x].\n", prev, page, line_within_page, mem+link); - prev->next = mem + link; - prev = prev->next; - local_ops_per_chain += 1; + prev->next = mem + link; + prev = prev->next; + local_ops_per_chain += 1; + } } - } - Run::global_mutex.lock(); - Run::_ops_per_chain = local_ops_per_chain; - Run::global_mutex.unlock(); + Run::global_mutex.lock(); + Run::_ops_per_chain = local_ops_per_chain; + Run::global_mutex.unlock(); - return root; + return root; } Chain* -Run::reverse_mem_init( Chain *mem ) -{ - Chain* root = END_OF_CHAIN; - Chain* prev = END_OF_CHAIN; - int link_within_line = 0; - int64 local_ops_per_chain = 0; +Run::reverse_mem_init(Chain *mem) { + Chain* root = END_OF_CHAIN; + Chain* prev = END_OF_CHAIN; + int link_within_line = 0; + int64 local_ops_per_chain = 0; - int stride = -this->exp->stride; - int last; - for (int i=0; i < this->exp->lines_per_chain; i += stride) { - last = i; - } + int stride = -this->exp->stride; + int last; + for (int i = 0; i < this->exp->lines_per_chain; i += stride) { + last = i; + } - for (int i=last; 0 <= i; i -= stride) { - int link = i * this->exp->links_per_line + link_within_line; - if (root == END_OF_CHAIN) { + for (int i = last; 0 <= i; i -= stride) { + int link = i * this->exp->links_per_line + link_within_line; + if (root == END_OF_CHAIN) { // printf("root = %d(%d)[0x%x].\n", page, line_within_page, mem+link); - prev = root = mem + link; - local_ops_per_chain += 1; - } else { + prev = root = mem + link; + local_ops_per_chain += 1; + } else { // printf("0x%x = %d(%d)[0x%x].\n", prev, page, line_within_page, mem+link); - prev->next = mem + link; - prev = prev->next; - local_ops_per_chain += 1; + prev->next = mem + link; + prev = prev->next; + local_ops_per_chain += 1; + } } - } - Run::global_mutex.lock(); - Run::_ops_per_chain = local_ops_per_chain; - Run::global_mutex.unlock(); + Run::global_mutex.lock(); + Run::_ops_per_chain = local_ops_per_chain; + Run::global_mutex.unlock(); - return root; + return root; } static int64 dumb_ck = 0; -void -mem_chk( Chain *m ) -{ - if (m == END_OF_CHAIN) dumb_ck += 1; +void mem_chk(Chain *m) { + if (m == END_OF_CHAIN) + dumb_ck += 1; } -static void -chase_pointers( - int64 chains_per_thread, // memory loading per thread - int64 iterations, // number of iterations per experiment - Chain** root, // root(s) of the chain(s) to follow - int64 bytes_per_line, // ignored - int64 bytes_per_chain, // ignored - int64 stride, // ignored - int64 busy_cycles, // processing cycles - bool prefetch // prefetch? -) -{ - // chase pointers - switch (chains_per_thread) { - default: - case 1: - for (int64 i=0; i < iterations; i++) { - Chain* a = root[0]; - while (a != END_OF_CHAIN) { - a = a->next; - if (prefetch) - prefetch(a->next); - for (int64 j=0; j < busy_cycles; j++) - asm("nop"); - } - mem_chk( a ); - } - break; - case 2: - for (int64 i=0; i < iterations; i++) { - Chain* a = root[0]; - Chain* b = root[1]; - while (a != END_OF_CHAIN) { - a = a->next; - b = b->next; - if (prefetch) +static void chase_pointers(int64 chains_per_thread, // memory loading per thread + int64 iterations, // number of iterations per experiment + Chain** root, // root(s) of the chain(s) to follow + int64 bytes_per_line, // ignored + int64 bytes_per_chain, // ignored + int64 stride, // ignored + int64 busy_cycles, // processing cycles + bool prefetch // prefetch? + ) { + // chase pointers + switch (chains_per_thread) { + default: + case 1: + for (int64 i = 0; i < iterations; i++) { + Chain* a = root[0]; + while (a != END_OF_CHAIN) { + a = a->next; + if (prefetch) prefetch(a->next); - for (int64 j=0; j < busy_cycles; j++) - asm("nop"); - } - mem_chk( a ); - mem_chk( b ); - } - break; - case 3: - for (int64 i=0; i < iterations; i++) { - Chain* a = root[0]; - Chain* b = root[1]; - Chain* c = root[2]; - while (a != END_OF_CHAIN) { - a = a->next; - b = b->next; - c = c->next; - if (prefetch) + for (int64 j = 0; j < busy_cycles; j++) + asm("nop"); + } + mem_chk(a); + } + break; + case 2: + for (int64 i = 0; i < iterations; i++) { + Chain* a = root[0]; + Chain* b = root[1]; + while (a != END_OF_CHAIN) { + a = a->next; + b = b->next; + if (prefetch) prefetch(a->next); - for (int64 j=0; j < busy_cycles; j++) - asm("nop"); - } - mem_chk( a ); - mem_chk( b ); - mem_chk( c ); - } - break; - case 4: - for (int64 i=0; i < iterations; i++) { - Chain* a = root[0]; - Chain* b = root[1]; - Chain* c = root[2]; - Chain* d = root[3]; - while (a != END_OF_CHAIN) { - a = a->next; - b = b->next; - c = c->next; - d = d->next; - if (prefetch) + for (int64 j = 0; j < busy_cycles; j++) + asm("nop"); + } + mem_chk(a); + mem_chk(b); + } + break; + case 3: + for (int64 i = 0; i < iterations; i++) { + Chain* a = root[0]; + Chain* b = root[1]; + Chain* c = root[2]; + while (a != END_OF_CHAIN) { + a = a->next; + b = b->next; + c = c->next; + if (prefetch) prefetch(a->next); - for (int64 j=0; j < busy_cycles; j++) - asm("nop"); - } - mem_chk( a ); - mem_chk( b ); - mem_chk( c ); - mem_chk( d ); - } - break; - case 5: - for (int64 i=0; i < iterations; i++) { - Chain* a = root[0]; - Chain* b = root[1]; - Chain* c = root[2]; - Chain* d = root[3]; - Chain* e = root[4]; - while (a != END_OF_CHAIN) { - a = a->next; - b = b->next; - c = c->next; - d = d->next; - e = e->next; - if (prefetch) + for (int64 j = 0; j < busy_cycles; j++) + asm("nop"); + } + mem_chk(a); + mem_chk(b); + mem_chk(c); + } + break; + case 4: + for (int64 i = 0; i < iterations; i++) { + Chain* a = root[0]; + Chain* b = root[1]; + Chain* c = root[2]; + Chain* d = root[3]; + while (a != END_OF_CHAIN) { + a = a->next; + b = b->next; + c = c->next; + d = d->next; + if (prefetch) prefetch(a->next); - for (int64 j=0; j < busy_cycles; j++) - asm("nop"); - } - mem_chk( a ); - mem_chk( b ); - mem_chk( c ); - mem_chk( d ); - mem_chk( e ); - } - break; - case 6: - for (int64 i=0; i < iterations; i++) { - Chain* a = root[0]; - Chain* b = root[1]; - Chain* c = root[2]; - Chain* d = root[3]; - Chain* e = root[4]; - Chain* f = root[5]; - while (a != END_OF_CHAIN) { - a = a->next; - b = b->next; - c = c->next; - d = d->next; - e = e->next; - f = f->next; - if (prefetch) + for (int64 j = 0; j < busy_cycles; j++) + asm("nop"); + } + mem_chk(a); + mem_chk(b); + mem_chk(c); + mem_chk(d); + } + break; + case 5: + for (int64 i = 0; i < iterations; i++) { + Chain* a = root[0]; + Chain* b = root[1]; + Chain* c = root[2]; + Chain* d = root[3]; + Chain* e = root[4]; + while (a != END_OF_CHAIN) { + a = a->next; + b = b->next; + c = c->next; + d = d->next; + e = e->next; + if (prefetch) prefetch(a->next); - for (int64 j=0; j < busy_cycles; j++) - asm("nop"); - } - mem_chk( a ); - mem_chk( b ); - mem_chk( c ); - mem_chk( d ); - mem_chk( e ); - mem_chk( f ); - } - break; - case 7: - for (int64 i=0; i < iterations; i++) { - Chain* a = root[0]; - Chain* b = root[1]; - Chain* c = root[2]; - Chain* d = root[3]; - Chain* e = root[4]; - Chain* f = root[5]; - Chain* g = root[6]; - while (a != END_OF_CHAIN) { - a = a->next; - b = b->next; - c = c->next; - d = d->next; - e = e->next; - f = f->next; - g = g->next; - if (prefetch) + for (int64 j = 0; j < busy_cycles; j++) + asm("nop"); + } + mem_chk(a); + mem_chk(b); + mem_chk(c); + mem_chk(d); + mem_chk(e); + } + break; + case 6: + for (int64 i = 0; i < iterations; i++) { + Chain* a = root[0]; + Chain* b = root[1]; + Chain* c = root[2]; + Chain* d = root[3]; + Chain* e = root[4]; + Chain* f = root[5]; + while (a != END_OF_CHAIN) { + a = a->next; + b = b->next; + c = c->next; + d = d->next; + e = e->next; + f = f->next; + if (prefetch) prefetch(a->next); - for (int64 j=0; j < busy_cycles; j++) - asm("nop"); - } - mem_chk( a ); - mem_chk( b ); - mem_chk( c ); - mem_chk( d ); - mem_chk( e ); - mem_chk( f ); - mem_chk( g ); - } - break; - case 8: - for (int64 i=0; i < iterations; i++) { - Chain* a = root[0]; - Chain* b = root[1]; - Chain* c = root[2]; - Chain* d = root[3]; - Chain* e = root[4]; - Chain* f = root[5]; - Chain* g = root[6]; - Chain* h = root[7]; - while (a != END_OF_CHAIN) { - a = a->next; - b = b->next; - c = c->next; - d = d->next; - e = e->next; - f = f->next; - g = g->next; - h = h->next; - if (prefetch) + for (int64 j = 0; j < busy_cycles; j++) + asm("nop"); + } + mem_chk(a); + mem_chk(b); + mem_chk(c); + mem_chk(d); + mem_chk(e); + mem_chk(f); + } + break; + case 7: + for (int64 i = 0; i < iterations; i++) { + Chain* a = root[0]; + Chain* b = root[1]; + Chain* c = root[2]; + Chain* d = root[3]; + Chain* e = root[4]; + Chain* f = root[5]; + Chain* g = root[6]; + while (a != END_OF_CHAIN) { + a = a->next; + b = b->next; + c = c->next; + d = d->next; + e = e->next; + f = f->next; + g = g->next; + if (prefetch) prefetch(a->next); - for (int64 j=0; j < busy_cycles; j++) - asm("nop"); - } - mem_chk( a ); - mem_chk( b ); - mem_chk( c ); - mem_chk( d ); - mem_chk( e ); - mem_chk( f ); - mem_chk( g ); - mem_chk( h ); - } - break; - case 9: - for (int64 i=0; i < iterations; i++) { - Chain* a = root[0]; - Chain* b = root[1]; - Chain* c = root[2]; - Chain* d = root[3]; - Chain* e = root[4]; - Chain* f = root[5]; - Chain* g = root[6]; - Chain* h = root[7]; - Chain* j = root[8]; - while (a != END_OF_CHAIN) { - a = a->next; - b = b->next; - c = c->next; - d = d->next; - e = e->next; - f = f->next; - g = g->next; - h = h->next; - j = j->next; - if (prefetch) + for (int64 j = 0; j < busy_cycles; j++) + asm("nop"); + } + mem_chk(a); + mem_chk(b); + mem_chk(c); + mem_chk(d); + mem_chk(e); + mem_chk(f); + mem_chk(g); + } + break; + case 8: + for (int64 i = 0; i < iterations; i++) { + Chain* a = root[0]; + Chain* b = root[1]; + Chain* c = root[2]; + Chain* d = root[3]; + Chain* e = root[4]; + Chain* f = root[5]; + Chain* g = root[6]; + Chain* h = root[7]; + while (a != END_OF_CHAIN) { + a = a->next; + b = b->next; + c = c->next; + d = d->next; + e = e->next; + f = f->next; + g = g->next; + h = h->next; + if (prefetch) prefetch(a->next); - for (int64 j=0; j < busy_cycles; j++) - asm("nop"); - } - mem_chk( a ); - mem_chk( b ); - mem_chk( c ); - mem_chk( d ); - mem_chk( e ); - mem_chk( f ); - mem_chk( g ); - mem_chk( h ); - mem_chk( j ); - } - break; - case 10: - for (int64 i=0; i < iterations; i++) { - Chain* a = root[0]; - Chain* b = root[1]; - Chain* c = root[2]; - Chain* d = root[3]; - Chain* e = root[4]; - Chain* f = root[5]; - Chain* g = root[6]; - Chain* h = root[7]; - Chain* j = root[8]; - Chain* k = root[9]; - while (a != END_OF_CHAIN) { - a = a->next; - b = b->next; - c = c->next; - d = d->next; - e = e->next; - f = f->next; - g = g->next; - h = h->next; - j = j->next; - k = k->next; - if (prefetch) + for (int64 j = 0; j < busy_cycles; j++) + asm("nop"); + } + mem_chk(a); + mem_chk(b); + mem_chk(c); + mem_chk(d); + mem_chk(e); + mem_chk(f); + mem_chk(g); + mem_chk(h); + } + break; + case 9: + for (int64 i = 0; i < iterations; i++) { + Chain* a = root[0]; + Chain* b = root[1]; + Chain* c = root[2]; + Chain* d = root[3]; + Chain* e = root[4]; + Chain* f = root[5]; + Chain* g = root[6]; + Chain* h = root[7]; + Chain* j = root[8]; + while (a != END_OF_CHAIN) { + a = a->next; + b = b->next; + c = c->next; + d = d->next; + e = e->next; + f = f->next; + g = g->next; + h = h->next; + j = j->next; + if (prefetch) prefetch(a->next); - for (int64 j=0; j < busy_cycles; j++) - asm("nop"); - } - mem_chk( a ); - mem_chk( b ); - mem_chk( c ); - mem_chk( d ); - mem_chk( e ); - mem_chk( f ); - mem_chk( g ); - mem_chk( h ); - mem_chk( j ); - mem_chk( k ); - } - break; - case 11: - for (int64 i=0; i < iterations; i++) { - Chain* a = root[0]; - Chain* b = root[1]; - Chain* c = root[2]; - Chain* d = root[3]; - Chain* e = root[4]; - Chain* f = root[5]; - Chain* g = root[6]; - Chain* h = root[7]; - Chain* j = root[8]; - Chain* k = root[9]; - Chain* l = root[10]; - while (a != END_OF_CHAIN) { - a = a->next; - b = b->next; - c = c->next; - d = d->next; - e = e->next; - f = f->next; - g = g->next; - h = h->next; - j = j->next; - k = k->next; - l = l->next; - if (prefetch) + for (int64 j = 0; j < busy_cycles; j++) + asm("nop"); + } + mem_chk(a); + mem_chk(b); + mem_chk(c); + mem_chk(d); + mem_chk(e); + mem_chk(f); + mem_chk(g); + mem_chk(h); + mem_chk(j); + } + break; + case 10: + for (int64 i = 0; i < iterations; i++) { + Chain* a = root[0]; + Chain* b = root[1]; + Chain* c = root[2]; + Chain* d = root[3]; + Chain* e = root[4]; + Chain* f = root[5]; + Chain* g = root[6]; + Chain* h = root[7]; + Chain* j = root[8]; + Chain* k = root[9]; + while (a != END_OF_CHAIN) { + a = a->next; + b = b->next; + c = c->next; + d = d->next; + e = e->next; + f = f->next; + g = g->next; + h = h->next; + j = j->next; + k = k->next; + if (prefetch) prefetch(a->next); - for (int64 j=0; j < busy_cycles; j++) - asm("nop"); - } - mem_chk( a ); - mem_chk( b ); - mem_chk( c ); - mem_chk( d ); - mem_chk( e ); - mem_chk( f ); - mem_chk( g ); - mem_chk( h ); - mem_chk( j ); - mem_chk( k ); - mem_chk( l ); - } - break; - case 12: - for (int64 i=0; i < iterations; i++) { - Chain* a = root[0]; - Chain* b = root[1]; - Chain* c = root[2]; - Chain* d = root[3]; - Chain* e = root[4]; - Chain* f = root[5]; - Chain* g = root[6]; - Chain* h = root[7]; - Chain* j = root[8]; - Chain* k = root[9]; - Chain* l = root[10]; - Chain* m = root[11]; - while (a != END_OF_CHAIN) { - a = a->next; - b = b->next; - c = c->next; - d = d->next; - e = e->next; - f = f->next; - g = g->next; - h = h->next; - j = j->next; - k = k->next; - l = l->next; - m = m->next; - if (prefetch) + for (int64 j = 0; j < busy_cycles; j++) + asm("nop"); + } + mem_chk(a); + mem_chk(b); + mem_chk(c); + mem_chk(d); + mem_chk(e); + mem_chk(f); + mem_chk(g); + mem_chk(h); + mem_chk(j); + mem_chk(k); + } + break; + case 11: + for (int64 i = 0; i < iterations; i++) { + Chain* a = root[0]; + Chain* b = root[1]; + Chain* c = root[2]; + Chain* d = root[3]; + Chain* e = root[4]; + Chain* f = root[5]; + Chain* g = root[6]; + Chain* h = root[7]; + Chain* j = root[8]; + Chain* k = root[9]; + Chain* l = root[10]; + while (a != END_OF_CHAIN) { + a = a->next; + b = b->next; + c = c->next; + d = d->next; + e = e->next; + f = f->next; + g = g->next; + h = h->next; + j = j->next; + k = k->next; + l = l->next; + if (prefetch) prefetch(a->next); - for (int64 j=0; j < busy_cycles; j++) - asm("nop"); - } - mem_chk( a ); - mem_chk( b ); - mem_chk( c ); - mem_chk( d ); - mem_chk( e ); - mem_chk( f ); - mem_chk( g ); - mem_chk( h ); - mem_chk( j ); - mem_chk( k ); - mem_chk( l ); - mem_chk( m ); - } - break; - case 13: - for (int64 i=0; i < iterations; i++) { - Chain* a = root[0]; - Chain* b = root[1]; - Chain* c = root[2]; - Chain* d = root[3]; - Chain* e = root[4]; - Chain* f = root[5]; - Chain* g = root[6]; - Chain* h = root[7]; - Chain* j = root[8]; - Chain* k = root[9]; - Chain* l = root[10]; - Chain* m = root[11]; - Chain* n = root[12]; - while (a != END_OF_CHAIN) { - a = a->next; - b = b->next; - c = c->next; - d = d->next; - e = e->next; - f = f->next; - g = g->next; - h = h->next; - j = j->next; - k = k->next; - l = l->next; - m = m->next; - n = n->next; - if (prefetch) + for (int64 j = 0; j < busy_cycles; j++) + asm("nop"); + } + mem_chk(a); + mem_chk(b); + mem_chk(c); + mem_chk(d); + mem_chk(e); + mem_chk(f); + mem_chk(g); + mem_chk(h); + mem_chk(j); + mem_chk(k); + mem_chk(l); + } + break; + case 12: + for (int64 i = 0; i < iterations; i++) { + Chain* a = root[0]; + Chain* b = root[1]; + Chain* c = root[2]; + Chain* d = root[3]; + Chain* e = root[4]; + Chain* f = root[5]; + Chain* g = root[6]; + Chain* h = root[7]; + Chain* j = root[8]; + Chain* k = root[9]; + Chain* l = root[10]; + Chain* m = root[11]; + while (a != END_OF_CHAIN) { + a = a->next; + b = b->next; + c = c->next; + d = d->next; + e = e->next; + f = f->next; + g = g->next; + h = h->next; + j = j->next; + k = k->next; + l = l->next; + m = m->next; + if (prefetch) prefetch(a->next); - for (int64 j=0; j < busy_cycles; j++) - asm("nop"); - } - mem_chk( a ); - mem_chk( b ); - mem_chk( c ); - mem_chk( d ); - mem_chk( e ); - mem_chk( f ); - mem_chk( g ); - mem_chk( h ); - mem_chk( j ); - mem_chk( k ); - mem_chk( l ); - mem_chk( m ); - mem_chk( n ); - } - break; - case 14: - for (int64 i=0; i < iterations; i++) { - Chain* a = root[0]; - Chain* b = root[1]; - Chain* c = root[2]; - Chain* d = root[3]; - Chain* e = root[4]; - Chain* f = root[5]; - Chain* g = root[6]; - Chain* h = root[7]; - Chain* j = root[8]; - Chain* k = root[9]; - Chain* l = root[10]; - Chain* m = root[11]; - Chain* n = root[12]; - Chain* o = root[13]; - while (a != END_OF_CHAIN) { - a = a->next; - b = b->next; - c = c->next; - d = d->next; - e = e->next; - f = f->next; - g = g->next; - h = h->next; - j = j->next; - k = k->next; - l = l->next; - m = m->next; - n = n->next; - o = o->next; - if (prefetch) + for (int64 j = 0; j < busy_cycles; j++) + asm("nop"); + } + mem_chk(a); + mem_chk(b); + mem_chk(c); + mem_chk(d); + mem_chk(e); + mem_chk(f); + mem_chk(g); + mem_chk(h); + mem_chk(j); + mem_chk(k); + mem_chk(l); + mem_chk(m); + } + break; + case 13: + for (int64 i = 0; i < iterations; i++) { + Chain* a = root[0]; + Chain* b = root[1]; + Chain* c = root[2]; + Chain* d = root[3]; + Chain* e = root[4]; + Chain* f = root[5]; + Chain* g = root[6]; + Chain* h = root[7]; + Chain* j = root[8]; + Chain* k = root[9]; + Chain* l = root[10]; + Chain* m = root[11]; + Chain* n = root[12]; + while (a != END_OF_CHAIN) { + a = a->next; + b = b->next; + c = c->next; + d = d->next; + e = e->next; + f = f->next; + g = g->next; + h = h->next; + j = j->next; + k = k->next; + l = l->next; + m = m->next; + n = n->next; + if (prefetch) prefetch(a->next); - for (int64 j=0; j < busy_cycles; j++) - asm("nop"); - } - mem_chk( a ); - mem_chk( b ); - mem_chk( c ); - mem_chk( d ); - mem_chk( e ); - mem_chk( f ); - mem_chk( g ); - mem_chk( h ); - mem_chk( j ); - mem_chk( k ); - mem_chk( l ); - mem_chk( m ); - mem_chk( n ); - mem_chk( o ); - } - break; - case 15: - for (int64 i=0; i < iterations; i++) { - Chain* a = root[0]; - Chain* b = root[1]; - Chain* c = root[2]; - Chain* d = root[3]; - Chain* e = root[4]; - Chain* f = root[5]; - Chain* g = root[6]; - Chain* h = root[7]; - Chain* j = root[8]; - Chain* k = root[9]; - Chain* l = root[10]; - Chain* m = root[11]; - Chain* n = root[12]; - Chain* o = root[13]; - Chain* p = root[14]; - while (a != END_OF_CHAIN) { - a = a->next; - b = b->next; - c = c->next; - d = d->next; - e = e->next; - f = f->next; - g = g->next; - h = h->next; - j = j->next; - k = k->next; - l = l->next; - m = m->next; - n = n->next; - o = o->next; - p = p->next; - if (prefetch) + for (int64 j = 0; j < busy_cycles; j++) + asm("nop"); + } + mem_chk(a); + mem_chk(b); + mem_chk(c); + mem_chk(d); + mem_chk(e); + mem_chk(f); + mem_chk(g); + mem_chk(h); + mem_chk(j); + mem_chk(k); + mem_chk(l); + mem_chk(m); + mem_chk(n); + } + break; + case 14: + for (int64 i = 0; i < iterations; i++) { + Chain* a = root[0]; + Chain* b = root[1]; + Chain* c = root[2]; + Chain* d = root[3]; + Chain* e = root[4]; + Chain* f = root[5]; + Chain* g = root[6]; + Chain* h = root[7]; + Chain* j = root[8]; + Chain* k = root[9]; + Chain* l = root[10]; + Chain* m = root[11]; + Chain* n = root[12]; + Chain* o = root[13]; + while (a != END_OF_CHAIN) { + a = a->next; + b = b->next; + c = c->next; + d = d->next; + e = e->next; + f = f->next; + g = g->next; + h = h->next; + j = j->next; + k = k->next; + l = l->next; + m = m->next; + n = n->next; + o = o->next; + if (prefetch) prefetch(a->next); - for (int64 j=0; j < busy_cycles; j++) - asm("nop"); - } - mem_chk( a ); - mem_chk( b ); - mem_chk( c ); - mem_chk( d ); - mem_chk( e ); - mem_chk( f ); - mem_chk( g ); - mem_chk( h ); - mem_chk( j ); - mem_chk( k ); - mem_chk( l ); - mem_chk( m ); - mem_chk( n ); - mem_chk( o ); - mem_chk( p ); - } - break; - case 16: - for (int64 i=0; i < iterations; i++) { - Chain* a = root[0]; - Chain* b = root[1]; - Chain* c = root[2]; - Chain* d = root[3]; - Chain* e = root[4]; - Chain* f = root[5]; - Chain* g = root[6]; - Chain* h = root[7]; - Chain* j = root[8]; - Chain* k = root[9]; - Chain* l = root[10]; - Chain* m = root[11]; - Chain* n = root[12]; - Chain* o = root[13]; - Chain* p = root[14]; - Chain* q = root[15]; - while (a != END_OF_CHAIN) { - a = a->next; - b = b->next; - c = c->next; - d = d->next; - e = e->next; - f = f->next; - g = g->next; - h = h->next; - j = j->next; - k = k->next; - l = l->next; - m = m->next; - n = n->next; - o = o->next; - p = p->next; - q = q->next; - if (prefetch) + for (int64 j = 0; j < busy_cycles; j++) + asm("nop"); + } + mem_chk(a); + mem_chk(b); + mem_chk(c); + mem_chk(d); + mem_chk(e); + mem_chk(f); + mem_chk(g); + mem_chk(h); + mem_chk(j); + mem_chk(k); + mem_chk(l); + mem_chk(m); + mem_chk(n); + mem_chk(o); + } + break; + case 15: + for (int64 i = 0; i < iterations; i++) { + Chain* a = root[0]; + Chain* b = root[1]; + Chain* c = root[2]; + Chain* d = root[3]; + Chain* e = root[4]; + Chain* f = root[5]; + Chain* g = root[6]; + Chain* h = root[7]; + Chain* j = root[8]; + Chain* k = root[9]; + Chain* l = root[10]; + Chain* m = root[11]; + Chain* n = root[12]; + Chain* o = root[13]; + Chain* p = root[14]; + while (a != END_OF_CHAIN) { + a = a->next; + b = b->next; + c = c->next; + d = d->next; + e = e->next; + f = f->next; + g = g->next; + h = h->next; + j = j->next; + k = k->next; + l = l->next; + m = m->next; + n = n->next; + o = o->next; + p = p->next; + if (prefetch) + prefetch(a->next); + for (int64 j = 0; j < busy_cycles; j++) + asm("nop"); + } + mem_chk(a); + mem_chk(b); + mem_chk(c); + mem_chk(d); + mem_chk(e); + mem_chk(f); + mem_chk(g); + mem_chk(h); + mem_chk(j); + mem_chk(k); + mem_chk(l); + mem_chk(m); + mem_chk(n); + mem_chk(o); + mem_chk(p); + } + break; + case 16: + for (int64 i = 0; i < iterations; i++) { + Chain* a = root[0]; + Chain* b = root[1]; + Chain* c = root[2]; + Chain* d = root[3]; + Chain* e = root[4]; + Chain* f = root[5]; + Chain* g = root[6]; + Chain* h = root[7]; + Chain* j = root[8]; + Chain* k = root[9]; + Chain* l = root[10]; + Chain* m = root[11]; + Chain* n = root[12]; + Chain* o = root[13]; + Chain* p = root[14]; + Chain* q = root[15]; + while (a != END_OF_CHAIN) { + a = a->next; + b = b->next; + c = c->next; + d = d->next; + e = e->next; + f = f->next; + g = g->next; + h = h->next; + j = j->next; + k = k->next; + l = l->next; + m = m->next; + n = n->next; + o = o->next; + p = p->next; + q = q->next; + if (prefetch) prefetch(a->next); - for (int64 j=0; j < busy_cycles; j++) - asm("nop"); - } - mem_chk( a ); - mem_chk( b ); - mem_chk( c ); - mem_chk( d ); - mem_chk( e ); - mem_chk( f ); - mem_chk( g ); - mem_chk( h ); - mem_chk( j ); - mem_chk( k ); - mem_chk( l ); - mem_chk( m ); - mem_chk( n ); - mem_chk( o ); - mem_chk( p ); - mem_chk( q ); + for (int64 j = 0; j < busy_cycles; j++) + asm("nop"); + } + mem_chk(a); + mem_chk(b); + mem_chk(c); + mem_chk(d); + mem_chk(e); + mem_chk(f); + mem_chk(g); + mem_chk(h); + mem_chk(j); + mem_chk(k); + mem_chk(l); + mem_chk(m); + mem_chk(n); + mem_chk(o); + mem_chk(p); + mem_chk(q); + } } - } } - // NOT WRITTEN YET -- DMP - // JUST A PLACE HOLDER! +// NOT WRITTEN YET -- DMP +// JUST A PLACE HOLDER! Chain* -Run::stream_mem_init( Chain *mem ) -{ +Run::stream_mem_init(Chain *mem) { // fprintf(stderr, "made it into stream_mem_init.\n"); // fprintf(stderr, "chains_per_thread = %ld\n", this->exp->chains_per_thread); // fprintf(stderr, "iterations = %ld\n", this->exp->iterations); // fprintf(stderr, "bytes_per_chain = %ld\n", this->exp->bytes_per_chain); // fprintf(stderr, "stride = %ld\n", this->exp->stride); - int64 local_ops_per_chain = 0; - double* tmp = (double *) mem; - int64 refs_per_line = this->exp->bytes_per_line / sizeof(double); - int64 refs_per_chain = this->exp->bytes_per_chain / sizeof(double); + int64 local_ops_per_chain = 0; + double* tmp = (double *) mem; + int64 refs_per_line = this->exp->bytes_per_line / sizeof(double); + int64 refs_per_chain = this->exp->bytes_per_chain / sizeof(double); // fprintf(stderr, "refs_per_chain = %ld\n", refs_per_chain); - for (int64 i=0; i < refs_per_chain; i += this->exp->stride*refs_per_line) { - tmp[i] = 0; - local_ops_per_chain += 1; - } + for (int64 i = 0; i < refs_per_chain; + i += this->exp->stride * refs_per_line) { + tmp[i] = 0; + local_ops_per_chain += 1; + } - Run::global_mutex.lock(); - Run::_ops_per_chain = local_ops_per_chain; - Run::global_mutex.unlock(); + Run::global_mutex.lock(); + Run::_ops_per_chain = local_ops_per_chain; + Run::global_mutex.unlock(); // fprintf(stderr, "made it out of stream_mem_init.\n"); - return mem; + return mem; } static int64 summ_ck = 0; -void -sum_chk( double t ) -{ - if (t != 0) summ_ck += 1; +void sum_chk(double t) { + if (t != 0) + summ_ck += 1; } - // NOT WRITTEN YET -- DMP - // JUST A PLACE HOLDER! -static void -follow_streams( - int64 chains_per_thread, // memory loading per thread - int64 iterations, // number of iterations per experiment - Chain** root, // root(s) of the chain(s) to follow - int64 bytes_per_line, // ignored - int64 bytes_per_chain, // ignored - int64 stride, // ignored - int64 busy_cycles, // ignored - bool prefetch // ignored -) -{ - int64 refs_per_line = bytes_per_line / sizeof(double); - int64 refs_per_chain = bytes_per_chain / sizeof(double); +// NOT WRITTEN YET -- DMP +// JUST A PLACE HOLDER! +static void follow_streams(int64 chains_per_thread, // memory loading per thread + int64 iterations, // number of iterations per experiment + Chain** root, // root(s) of the chain(s) to follow + int64 bytes_per_line, // ignored + int64 bytes_per_chain, // ignored + int64 stride, // ignored + int64 busy_cycles, // ignored + bool prefetch // ignored + ) { + int64 refs_per_line = bytes_per_line / sizeof(double); + int64 refs_per_chain = bytes_per_chain / sizeof(double); - // chase pointers - switch (chains_per_thread) { - default: - case 1: - for (int64 i=0; i < iterations; i++) { - double t = 0; - double* a0 = (double *) root[0]; - for (int64 j=0; j < refs_per_chain; j+=stride*refs_per_line) { - t += a0[j]; - } - sum_chk( t ); - } - break; - case 2: - for (int64 i=0; i < iterations; i++) { - double t = 0; - double* a0 = (double *) root[0]; - double* a1 = (double *) root[1]; - for (int64 j=0; j < refs_per_chain; j+=stride*refs_per_line) { - t += a0[j] + a1[j]; - } - sum_chk( t ); - } - break; - case 3: - for (int64 i=0; i < iterations; i++) { - double t = 0; - double* a0 = (double *) root[0]; - double* a1 = (double *) root[1]; - double* a2 = (double *) root[2]; - for (int64 j=0; j < refs_per_chain; j+=stride*refs_per_line) { - t += a0[j] + a1[j] + a2[j]; - } - sum_chk( t ); - } - break; - case 4: - for (int64 i=0; i < iterations; i++) { - double t = 0; - double* a0 = (double *) root[0]; - double* a1 = (double *) root[1]; - double* a2 = (double *) root[2]; - double* a3 = (double *) root[3]; - for (int64 j=0; j < refs_per_chain; j+=stride*refs_per_line) { - t += a0[j] + a1[j] + a2[j] + a3[j]; - } - sum_chk( t ); - } - break; - case 5: - for (int64 i=0; i < iterations; i++) { - double t = 0; - double* a0 = (double *) root[0]; - double* a1 = (double *) root[1]; - double* a2 = (double *) root[2]; - double* a3 = (double *) root[3]; - double* a4 = (double *) root[4]; - for (int64 j=0; j < refs_per_chain; j+=stride*refs_per_line) { - t += a0[j] + a1[j] + a2[j] + a3[j] + a4[j]; - } - sum_chk( t ); - } - break; - case 6: - for (int64 i=0; i < iterations; i++) { - double t = 0; - double* a0 = (double *) root[0]; - double* a1 = (double *) root[1]; - double* a2 = (double *) root[2]; - double* a3 = (double *) root[3]; - double* a4 = (double *) root[4]; - double* a5 = (double *) root[5]; - for (int64 j=0; j < refs_per_chain; j+=stride*refs_per_line) { - t += a0[j] + a1[j] + a2[j] + a3[j] + a4[j] + a5[j]; - } - sum_chk( t ); - } - break; - case 7: - for (int64 i=0; i < iterations; i++) { - double t = 0; - double* a0 = (double *) root[0]; - double* a1 = (double *) root[1]; - double* a2 = (double *) root[2]; - double* a3 = (double *) root[3]; - double* a4 = (double *) root[4]; - double* a5 = (double *) root[5]; - double* a6 = (double *) root[6]; - for (int64 j=0; j < refs_per_chain; j+=stride*refs_per_line) { - t += a0[j] + a1[j] + a2[j] + a3[j] + a4[j] + a5[j] + a6[j]; - } - sum_chk( t ); - } - break; - case 8: - for (int64 i=0; i < iterations; i++) { - double t = 0; - double* a0 = (double *) root[0]; - double* a1 = (double *) root[1]; - double* a2 = (double *) root[2]; - double* a3 = (double *) root[3]; - double* a4 = (double *) root[4]; - double* a5 = (double *) root[5]; - double* a6 = (double *) root[6]; - double* a7 = (double *) root[7]; - for (int64 j=0; j < refs_per_chain; j+=stride*refs_per_line) { - t += a0[j] + a1[j] + a2[j] + a3[j] + a4[j] + a5[j] + a6[j] + a7[j]; - } - sum_chk( t ); - } - break; - case 9: - for (int64 i=0; i < iterations; i++) { - double t = 0; - double* a0 = (double *) root[0]; - double* a1 = (double *) root[1]; - double* a2 = (double *) root[2]; - double* a3 = (double *) root[3]; - double* a4 = (double *) root[4]; - double* a5 = (double *) root[5]; - double* a6 = (double *) root[6]; - double* a7 = (double *) root[7]; - double* a8 = (double *) root[8]; - for (int64 j=0; j < refs_per_chain; j+=stride*refs_per_line) { - t += a0[j] + a1[j] + a2[j] + a3[j] + a4[j] + a5[j] + a6[j] + a7[j] + - a8[j]; - } - sum_chk( t ); - } - break; - case 10: - for (int64 i=0; i < iterations; i++) { - double t = 0; - double* a0 = (double *) root[0]; - double* a1 = (double *) root[1]; - double* a2 = (double *) root[2]; - double* a3 = (double *) root[3]; - double* a4 = (double *) root[4]; - double* a5 = (double *) root[5]; - double* a6 = (double *) root[6]; - double* a7 = (double *) root[7]; - double* a8 = (double *) root[8]; - double* a9 = (double *) root[9]; - for (int64 j=0; j < refs_per_chain; j+=stride*refs_per_line) { - t += a0[j] + a1[j] + a2[j] + a3[j] + a4[j] + a5[j] + a6[j] + a7[j] + - a8[j] + a9[j]; - } - sum_chk( t ); - } - break; - case 11: - for (int64 i=0; i < iterations; i++) { - double t = 0; - double* a0 = (double *) root[ 0]; - double* a1 = (double *) root[ 1]; - double* a2 = (double *) root[ 2]; - double* a3 = (double *) root[ 3]; - double* a4 = (double *) root[ 4]; - double* a5 = (double *) root[ 5]; - double* a6 = (double *) root[ 6]; - double* a7 = (double *) root[ 7]; - double* a8 = (double *) root[ 8]; - double* a9 = (double *) root[ 9]; - double* a10 = (double *) root[10]; - for (int64 j=0; j < refs_per_chain; j+=stride*refs_per_line) { - t += a0[j] + a1[j] + a2 [j] + a3[j] + a4[j] + a5[j] + a6[j] + a7[j] + - a8[j] + a9[j] + a10[j]; - } - sum_chk( t ); - } - break; - case 12: - for (int64 i=0; i < iterations; i++) { - double t = 0; - double* a0 = (double *) root[ 0]; - double* a1 = (double *) root[ 1]; - double* a2 = (double *) root[ 2]; - double* a3 = (double *) root[ 3]; - double* a4 = (double *) root[ 4]; - double* a5 = (double *) root[ 5]; - double* a6 = (double *) root[ 6]; - double* a7 = (double *) root[ 7]; - double* a8 = (double *) root[ 8]; - double* a9 = (double *) root[ 9]; - double* a10 = (double *) root[10]; - double* a11 = (double *) root[11]; - for (int64 j=0; j < refs_per_chain; j+=stride*refs_per_line) { - t += a0[j] + a1[j] + a2 [j] + a3 [j] + a4[j] + a5[j] + a6[j] + a7[j] + - a8[j] + a9[j] + a10[j] + a11[j]; - } - sum_chk( t ); - } - break; - case 13: - for (int64 i=0; i < iterations; i++) { - double t = 0; - double* a0 = (double *) root[ 0]; - double* a1 = (double *) root[ 1]; - double* a2 = (double *) root[ 2]; - double* a3 = (double *) root[ 3]; - double* a4 = (double *) root[ 4]; - double* a5 = (double *) root[ 5]; - double* a6 = (double *) root[ 6]; - double* a7 = (double *) root[ 7]; - double* a8 = (double *) root[ 8]; - double* a9 = (double *) root[ 9]; - double* a10 = (double *) root[10]; - double* a11 = (double *) root[11]; - double* a12 = (double *) root[12]; - for (int64 j=0; j < refs_per_chain; j+=stride*refs_per_line) { - t += a0[j] + a1[j] + a2 [j] + a3 [j] + a4 [j] + a5[j] + a6[j] + a7[j] + - a8[j] + a9[j] + a10[j] + a11[j] + a12[j]; - } - sum_chk( t ); - } - break; - case 14: - for (int64 i=0; i < iterations; i++) { - double t = 0; - double* a0 = (double *) root[ 0]; - double* a1 = (double *) root[ 1]; - double* a2 = (double *) root[ 2]; - double* a3 = (double *) root[ 3]; - double* a4 = (double *) root[ 4]; - double* a5 = (double *) root[ 5]; - double* a6 = (double *) root[ 6]; - double* a7 = (double *) root[ 7]; - double* a8 = (double *) root[ 8]; - double* a9 = (double *) root[ 9]; - double* a10 = (double *) root[10]; - double* a11 = (double *) root[11]; - double* a12 = (double *) root[12]; - double* a13 = (double *) root[13]; - for (int64 j=0; j < refs_per_chain; j+=stride*refs_per_line) { - t += a0[j] + a1[j] + a2 [j] + a3 [j] + a4 [j] + a5 [j] + a6[j] + a7[j] + - a8[j] + a9[j] + a10[j] + a11[j] + a12[j] + a13[j]; - } - sum_chk( t ); - } - break; - case 15: - for (int64 i=0; i < iterations; i++) { - double t = 0; - double* a0 = (double *) root[ 0]; - double* a1 = (double *) root[ 1]; - double* a2 = (double *) root[ 2]; - double* a3 = (double *) root[ 3]; - double* a4 = (double *) root[ 4]; - double* a5 = (double *) root[ 5]; - double* a6 = (double *) root[ 6]; - double* a7 = (double *) root[ 7]; - double* a8 = (double *) root[ 8]; - double* a9 = (double *) root[ 9]; - double* a10 = (double *) root[10]; - double* a11 = (double *) root[11]; - double* a12 = (double *) root[12]; - double* a13 = (double *) root[13]; - double* a14 = (double *) root[14]; - for (int64 j=0; j < refs_per_chain; j+=stride*refs_per_line) { - t += a0[j] + a1[j] + a2 [j] + a3 [j] + a4 [j] + a5 [j] + a6 [j] + a7[j] + - a8[j] + a9[j] + a10[j] + a11[j] + a12[j] + a13[j] + a14[j]; - } - sum_chk( t ); - } - break; - case 16: - for (int64 i=0; i < iterations; i++) { - double t = 0; - double* a0 = (double *) root[ 0]; - double* a1 = (double *) root[ 1]; - double* a2 = (double *) root[ 2]; - double* a3 = (double *) root[ 3]; - double* a4 = (double *) root[ 4]; - double* a5 = (double *) root[ 5]; - double* a6 = (double *) root[ 6]; - double* a7 = (double *) root[ 7]; - double* a8 = (double *) root[ 8]; - double* a9 = (double *) root[ 9]; - double* a10 = (double *) root[10]; - double* a11 = (double *) root[11]; - double* a12 = (double *) root[12]; - double* a13 = (double *) root[13]; - double* a14 = (double *) root[14]; - double* a15 = (double *) root[15]; - for (int64 j=0; j < refs_per_chain; j+=stride*refs_per_line) { - t += a0[j] + a1[j] + a2 [j] + a3 [j] + a4 [j] + a5 [j] + a6 [j] + a7 [j] + - a8[j] + a9[j] + a10[j] + a11[j] + a12[j] + a13[j] + a14[j] + a15[j]; - } - sum_chk( t ); + // chase pointers + switch (chains_per_thread) { + default: + case 1: + for (int64 i = 0; i < iterations; i++) { + double t = 0; + double* a0 = (double *) root[0]; + for (int64 j = 0; j < refs_per_chain; j += stride * refs_per_line) { + t += a0[j]; + } + sum_chk(t); + } + break; + case 2: + for (int64 i = 0; i < iterations; i++) { + double t = 0; + double* a0 = (double *) root[0]; + double* a1 = (double *) root[1]; + for (int64 j = 0; j < refs_per_chain; j += stride * refs_per_line) { + t += a0[j] + a1[j]; + } + sum_chk(t); + } + break; + case 3: + for (int64 i = 0; i < iterations; i++) { + double t = 0; + double* a0 = (double *) root[0]; + double* a1 = (double *) root[1]; + double* a2 = (double *) root[2]; + for (int64 j = 0; j < refs_per_chain; j += stride * refs_per_line) { + t += a0[j] + a1[j] + a2[j]; + } + sum_chk(t); + } + break; + case 4: + for (int64 i = 0; i < iterations; i++) { + double t = 0; + double* a0 = (double *) root[0]; + double* a1 = (double *) root[1]; + double* a2 = (double *) root[2]; + double* a3 = (double *) root[3]; + for (int64 j = 0; j < refs_per_chain; j += stride * refs_per_line) { + t += a0[j] + a1[j] + a2[j] + a3[j]; + } + sum_chk(t); + } + break; + case 5: + for (int64 i = 0; i < iterations; i++) { + double t = 0; + double* a0 = (double *) root[0]; + double* a1 = (double *) root[1]; + double* a2 = (double *) root[2]; + double* a3 = (double *) root[3]; + double* a4 = (double *) root[4]; + for (int64 j = 0; j < refs_per_chain; j += stride * refs_per_line) { + t += a0[j] + a1[j] + a2[j] + a3[j] + a4[j]; + } + sum_chk(t); + } + break; + case 6: + for (int64 i = 0; i < iterations; i++) { + double t = 0; + double* a0 = (double *) root[0]; + double* a1 = (double *) root[1]; + double* a2 = (double *) root[2]; + double* a3 = (double *) root[3]; + double* a4 = (double *) root[4]; + double* a5 = (double *) root[5]; + for (int64 j = 0; j < refs_per_chain; j += stride * refs_per_line) { + t += a0[j] + a1[j] + a2[j] + a3[j] + a4[j] + a5[j]; + } + sum_chk(t); + } + break; + case 7: + for (int64 i = 0; i < iterations; i++) { + double t = 0; + double* a0 = (double *) root[0]; + double* a1 = (double *) root[1]; + double* a2 = (double *) root[2]; + double* a3 = (double *) root[3]; + double* a4 = (double *) root[4]; + double* a5 = (double *) root[5]; + double* a6 = (double *) root[6]; + for (int64 j = 0; j < refs_per_chain; j += stride * refs_per_line) { + t += a0[j] + a1[j] + a2[j] + a3[j] + a4[j] + a5[j] + a6[j]; + } + sum_chk(t); + } + break; + case 8: + for (int64 i = 0; i < iterations; i++) { + double t = 0; + double* a0 = (double *) root[0]; + double* a1 = (double *) root[1]; + double* a2 = (double *) root[2]; + double* a3 = (double *) root[3]; + double* a4 = (double *) root[4]; + double* a5 = (double *) root[5]; + double* a6 = (double *) root[6]; + double* a7 = (double *) root[7]; + for (int64 j = 0; j < refs_per_chain; j += stride * refs_per_line) { + t += a0[j] + a1[j] + a2[j] + a3[j] + a4[j] + a5[j] + a6[j] + + a7[j]; + } + sum_chk(t); + } + break; + case 9: + for (int64 i = 0; i < iterations; i++) { + double t = 0; + double* a0 = (double *) root[0]; + double* a1 = (double *) root[1]; + double* a2 = (double *) root[2]; + double* a3 = (double *) root[3]; + double* a4 = (double *) root[4]; + double* a5 = (double *) root[5]; + double* a6 = (double *) root[6]; + double* a7 = (double *) root[7]; + double* a8 = (double *) root[8]; + for (int64 j = 0; j < refs_per_chain; j += stride * refs_per_line) { + t += a0[j] + a1[j] + a2[j] + a3[j] + a4[j] + a5[j] + a6[j] + + a7[j] + a8[j]; + } + sum_chk(t); + } + break; + case 10: + for (int64 i = 0; i < iterations; i++) { + double t = 0; + double* a0 = (double *) root[0]; + double* a1 = (double *) root[1]; + double* a2 = (double *) root[2]; + double* a3 = (double *) root[3]; + double* a4 = (double *) root[4]; + double* a5 = (double *) root[5]; + double* a6 = (double *) root[6]; + double* a7 = (double *) root[7]; + double* a8 = (double *) root[8]; + double* a9 = (double *) root[9]; + for (int64 j = 0; j < refs_per_chain; j += stride * refs_per_line) { + t += a0[j] + a1[j] + a2[j] + a3[j] + a4[j] + a5[j] + a6[j] + + a7[j] + a8[j] + a9[j]; + } + sum_chk(t); + } + break; + case 11: + for (int64 i = 0; i < iterations; i++) { + double t = 0; + double* a0 = (double *) root[0]; + double* a1 = (double *) root[1]; + double* a2 = (double *) root[2]; + double* a3 = (double *) root[3]; + double* a4 = (double *) root[4]; + double* a5 = (double *) root[5]; + double* a6 = (double *) root[6]; + double* a7 = (double *) root[7]; + double* a8 = (double *) root[8]; + double* a9 = (double *) root[9]; + double* a10 = (double *) root[10]; + for (int64 j = 0; j < refs_per_chain; j += stride * refs_per_line) { + t += a0[j] + a1[j] + a2[j] + a3[j] + a4[j] + a5[j] + a6[j] + + a7[j] + a8[j] + a9[j] + a10[j]; + } + sum_chk(t); + } + break; + case 12: + for (int64 i = 0; i < iterations; i++) { + double t = 0; + double* a0 = (double *) root[0]; + double* a1 = (double *) root[1]; + double* a2 = (double *) root[2]; + double* a3 = (double *) root[3]; + double* a4 = (double *) root[4]; + double* a5 = (double *) root[5]; + double* a6 = (double *) root[6]; + double* a7 = (double *) root[7]; + double* a8 = (double *) root[8]; + double* a9 = (double *) root[9]; + double* a10 = (double *) root[10]; + double* a11 = (double *) root[11]; + for (int64 j = 0; j < refs_per_chain; j += stride * refs_per_line) { + t += a0[j] + a1[j] + a2[j] + a3[j] + a4[j] + a5[j] + a6[j] + + a7[j] + a8[j] + a9[j] + a10[j] + a11[j]; + } + sum_chk(t); + } + break; + case 13: + for (int64 i = 0; i < iterations; i++) { + double t = 0; + double* a0 = (double *) root[0]; + double* a1 = (double *) root[1]; + double* a2 = (double *) root[2]; + double* a3 = (double *) root[3]; + double* a4 = (double *) root[4]; + double* a5 = (double *) root[5]; + double* a6 = (double *) root[6]; + double* a7 = (double *) root[7]; + double* a8 = (double *) root[8]; + double* a9 = (double *) root[9]; + double* a10 = (double *) root[10]; + double* a11 = (double *) root[11]; + double* a12 = (double *) root[12]; + for (int64 j = 0; j < refs_per_chain; j += stride * refs_per_line) { + t += a0[j] + a1[j] + a2[j] + a3[j] + a4[j] + a5[j] + a6[j] + + a7[j] + a8[j] + a9[j] + a10[j] + a11[j] + a12[j]; + } + sum_chk(t); + } + break; + case 14: + for (int64 i = 0; i < iterations; i++) { + double t = 0; + double* a0 = (double *) root[0]; + double* a1 = (double *) root[1]; + double* a2 = (double *) root[2]; + double* a3 = (double *) root[3]; + double* a4 = (double *) root[4]; + double* a5 = (double *) root[5]; + double* a6 = (double *) root[6]; + double* a7 = (double *) root[7]; + double* a8 = (double *) root[8]; + double* a9 = (double *) root[9]; + double* a10 = (double *) root[10]; + double* a11 = (double *) root[11]; + double* a12 = (double *) root[12]; + double* a13 = (double *) root[13]; + for (int64 j = 0; j < refs_per_chain; j += stride * refs_per_line) { + t += a0[j] + a1[j] + a2[j] + a3[j] + a4[j] + a5[j] + a6[j] + + a7[j] + a8[j] + a9[j] + a10[j] + a11[j] + a12[j] + + a13[j]; + } + sum_chk(t); + } + break; + case 15: + for (int64 i = 0; i < iterations; i++) { + double t = 0; + double* a0 = (double *) root[0]; + double* a1 = (double *) root[1]; + double* a2 = (double *) root[2]; + double* a3 = (double *) root[3]; + double* a4 = (double *) root[4]; + double* a5 = (double *) root[5]; + double* a6 = (double *) root[6]; + double* a7 = (double *) root[7]; + double* a8 = (double *) root[8]; + double* a9 = (double *) root[9]; + double* a10 = (double *) root[10]; + double* a11 = (double *) root[11]; + double* a12 = (double *) root[12]; + double* a13 = (double *) root[13]; + double* a14 = (double *) root[14]; + for (int64 j = 0; j < refs_per_chain; j += stride * refs_per_line) { + t += a0[j] + a1[j] + a2[j] + a3[j] + a4[j] + a5[j] + a6[j] + + a7[j] + a8[j] + a9[j] + a10[j] + a11[j] + a12[j] + + a13[j] + a14[j]; + } + sum_chk(t); + } + break; + case 16: + for (int64 i = 0; i < iterations; i++) { + double t = 0; + double* a0 = (double *) root[0]; + double* a1 = (double *) root[1]; + double* a2 = (double *) root[2]; + double* a3 = (double *) root[3]; + double* a4 = (double *) root[4]; + double* a5 = (double *) root[5]; + double* a6 = (double *) root[6]; + double* a7 = (double *) root[7]; + double* a8 = (double *) root[8]; + double* a9 = (double *) root[9]; + double* a10 = (double *) root[10]; + double* a11 = (double *) root[11]; + double* a12 = (double *) root[12]; + double* a13 = (double *) root[13]; + double* a14 = (double *) root[14]; + double* a15 = (double *) root[15]; + for (int64 j = 0; j < refs_per_chain; j += stride * refs_per_line) { + t += a0[j] + a1[j] + a2[j] + a3[j] + a4[j] + a5[j] + a6[j] + + a7[j] + a8[j] + a9[j] + a10[j] + a11[j] + a12[j] + + a13[j] + a14[j] + a15[j]; + } + sum_chk(t); + } + break; } - break; - } } -- cgit v1.2.3