diff options
author | Tim Besard <tim.besard@gmail.com> | 2011-10-27 16:51:48 +0200 |
---|---|---|
committer | Tim Besard <tim.besard@gmail.com> | 2011-10-27 16:51:48 +0200 |
commit | 122d27347290ff066635bc3005a332d9574bedb2 (patch) | |
tree | 32dfc9e9dfc73503606e85a5f469470486e841ee /Run.C | |
parent | a52db2ab61b21fe7721419747b96e1689c9069a0 (diff) |
Cleaning up the code a bit.
Diffstat (limited to 'Run.C')
-rw-r--r-- | Run.C | 1206 |
1 files changed, 0 insertions, 1206 deletions
@@ -1,1206 +0,0 @@ -/******************************************************************************* - * Copyright (c) 2006 International Business Machines Corporation. * - * All rights reserved. This program and the accompanying materials * - * are made available under the terms of the Common Public License v1.0 * - * which accompanies this distribution, and is available at * - * http://www.opensource.org/licenses/cpl1.0.php * - * * - * Contributors: * - * Douglas M. Pase - initial API and implementation * - *******************************************************************************/ - - -#include <stdio.h> -#include <stdlib.h> -#include <unistd.h> - -#if defined(NUMA) -#include <numa.h> -#endif - -#include "Run.h" - -#include "Chain.h" -#include "Timer.h" -#include "SpinBarrier.h" - - -static double max( double v1, double v2 ); -static double min( double v1, double v2 ); -static void chase_pointers(int64 chains_per_thread, int64 iterations, Chain** root, int64 bytes_per_line, int64 bytes_per_chain, int64 stride); -static void follow_streams(int64 chains_per_thread, int64 iterations, Chain** root, int64 bytes_per_line, int64 bytes_per_chain, int64 stride); -static void (*run_benchmark)(int64 chains_per_thread, int64 iterations, Chain** root, int64 bytes_per_line, int64 bytes_per_chain, int64 stride) = chase_pointers; - -Lock Run::global_mutex; -int64 Run::_ops_per_chain = 0; -double Run::_seconds = 1E9; - -Run::Run() -: exp(NULL), bp(NULL) -{ -} - -Run::~Run() -{ -} - -void -Run::set( Experiment &e, SpinBarrier* sbp ) -{ - this->exp = &e; - this->bp = sbp; -} - -int -Run::run() -{ - // first allocate all memory for the chains, - // making sure it is allocated within the - // intended numa domains - Chain** chain_memory = new Chain* [ this->exp->chains_per_thread ]; - Chain** root = new Chain* [ this->exp->chains_per_thread ]; - -#if defined(NUMA) - // establish the node id where this thread - // will run. threads are mapped to nodes - // by the set-up code for Experiment. - int run_node_id = this->exp->thread_domain[this->thread_id()]; - numa_run_on_node(run_node_id); - - // establish the node id where this thread's - // memory will be allocated. - for (int i=0; i < this->exp->chains_per_thread; i++) { - int alloc_node_id = this->exp->chain_domain[this->thread_id()][i]; - nodemask_t alloc_mask; - nodemask_zero(&alloc_mask); - nodemask_set(&alloc_mask, alloc_node_id); - numa_set_membind(&alloc_mask); - - chain_memory[i] = new Chain[ this->exp->links_per_chain ]; - } -#else - for (int i=0; i < this->exp->chains_per_thread; i++) { - chain_memory[i] = new Chain[ this->exp->links_per_chain ]; - } -#endif - - // initialize the chains and - // select the function that - // will execute the tests - for (int i=0; i < this->exp->chains_per_thread; i++) { - if (this->exp->access_pattern == Experiment::RANDOM) { - root[i] = random_mem_init( chain_memory[i] ); - run_benchmark = chase_pointers; - } else if (this->exp->access_pattern == Experiment::STRIDED) { - if (0 < this->exp->stride) { - root[i] = forward_mem_init( chain_memory[i] ); - } else { - root[i] = reverse_mem_init( chain_memory[i] ); - } - run_benchmark = chase_pointers; - } else if (this->exp->access_pattern == Experiment::STREAM) { - root[i] = stream_mem_init( chain_memory[i] ); - run_benchmark = follow_streams; - } - } - - if (this->exp->iterations <= 0) { - volatile static double istart = 0; - volatile static double istop = 0; - volatile static double elapsed = 0; - volatile static int64 iters = 1; - volatile double bound = max(0.2, 10 * Timer::resolution()); - for (iters=1; elapsed <= bound; iters=iters<<1) { - this->bp->barrier(); - - // start timer - if (this->thread_id() == 0) { - istart = Timer::seconds(); - } - this->bp->barrier(); - - // chase pointers - run_benchmark(this->exp->chains_per_thread, iters, root, this->exp->bytes_per_line, this->exp->bytes_per_chain, this->exp->stride); - - // barrier - this->bp->barrier(); - - // stop timer - if (this->thread_id() == 0) { - istop = Timer::seconds(); - elapsed = istop - istart; - } - this->bp->barrier(); - } - - // calculate the number of iterations - if (this->thread_id() == 0) { - if (0 < this->exp->seconds) { - this->exp->iterations = max(1, 0.9999 + 0.5 * this->exp->seconds * iters / elapsed); - } else { - this->exp->iterations = max(1, 0.9999 + iters / elapsed); - } - } - this->bp->barrier(); - } -#if defined(UNDEFINED) -#endif - - // barrier - for (int e=0; e < this->exp->experiments; e++) { - this->bp->barrier(); - - // start timer - double start = 0; - if (this->thread_id() == 0) start = Timer::seconds(); - this->bp->barrier(); - - // chase pointers - run_benchmark(this->exp->chains_per_thread, this->exp->iterations, root, this->exp->bytes_per_line, this->exp->bytes_per_chain, this->exp->stride); - - // barrier - this->bp->barrier(); - - // stop timer - double stop = 0; - if (this->thread_id() == 0) stop = Timer::seconds(); - this->bp->barrier(); - - if (0 <= e) { - if (this->thread_id() == 0) { - double delta = stop - start; - if (0 < delta) { - Run::_seconds = min( Run::_seconds, delta ); - } - } - } - } - - this->bp->barrier(); - - for (int i=0; i < this->exp->chains_per_thread; i++) { - if (chain_memory[i] != NULL) delete [] chain_memory[i]; - } - if (chain_memory != NULL) delete [] chain_memory; - - return 0; -} - -int dummy = 0; -void -Run::mem_check( Chain *m ) -{ - if (m == NULL) dummy += 1; -} - -static double -max( double v1, double v2 ) -{ - if (v1 < v2) return v2; - return v1; -} - -static double -min( double v1, double v2 ) -{ - if (v2 < v1) return v2; - return v1; -} - - // exclude 2 and mersienne primes, i.e., - // primes of the form 2**n - 1, e.g., - // 3, 7, 31, 127 -static const int prime_table[] = { 5, 11, 13, 17, 19, 23, 37, 41, 43, 47, - 53, 61, 71, 73, 79, 83, 89, 97, 101, 103, 109, 113, 131, 137, 139, 149, - 151, 157, 163, }; -static const int prime_table_size = sizeof prime_table / sizeof prime_table[0]; - -Chain* -Run::random_mem_init( Chain *mem ) -{ - // initialize pointers -- - // choose a page at random, then use - // one pointer from each cache line - // within the page. all pages and - // cache lines are chosen at random. - Chain* root = NULL; - Chain* prev = NULL; - int link_within_line = 0; - int64 local_ops_per_chain = 0; - - // we must set a lock because random() - // is not thread safe - Run::global_mutex.lock(); - setstate(this->exp->random_state[this->thread_id()]); - int page_factor = prime_table[ random() % prime_table_size ]; - int page_offset = random() % this->exp->pages_per_chain; - Run::global_mutex.unlock(); - - // loop through the pages - for (int i=0; i < this->exp->pages_per_chain; i++) { - int page = (page_factor * i + page_offset) % this->exp->pages_per_chain; - Run::global_mutex.lock(); - setstate(this->exp->random_state[this->thread_id()]); - int line_factor = prime_table[ random() % prime_table_size ]; - int line_offset = random() % this->exp->lines_per_page; - Run::global_mutex.unlock(); - - // loop through the lines within a page - for (int j=0; j < this->exp->lines_per_page; j++) { - int line_within_page = (line_factor * j + line_offset) % this->exp->lines_per_page; - int link = page * this->exp->links_per_page + line_within_page * this->exp->links_per_line + link_within_line; - - if (root == NULL) { -// printf("root = %d(%d)[0x%x].\n", page, line_within_page, mem+link); - prev = root = mem + link; - local_ops_per_chain += 1; - } else { -// printf("0x%x = %d(%d)[0x%x].\n", prev, page, line_within_page, mem+link); - prev->next = mem + link; - prev = prev->next; - local_ops_per_chain += 1; - } - } - } - - Run::global_mutex.lock(); - Run::_ops_per_chain = local_ops_per_chain; - Run::global_mutex.unlock(); - - return root; -} - -Chain* -Run::forward_mem_init( Chain *mem ) -{ - Chain* root = NULL; - Chain* prev = NULL; - int link_within_line = 0; - int64 local_ops_per_chain = 0; - - for (int i=0; i < this->exp->lines_per_chain; i += this->exp->stride) { - int link = i * this->exp->links_per_line + link_within_line; - if (root == NULL) { -// printf("root = %d(%d)[0x%x].\n", page, line_within_page, mem+link); - prev = root = mem + link; - local_ops_per_chain += 1; - } else { -// printf("0x%x = %d(%d)[0x%x].\n", prev, page, line_within_page, mem+link); - prev->next = mem + link; - prev = prev->next; - local_ops_per_chain += 1; - } - } - - Run::global_mutex.lock(); - Run::_ops_per_chain = local_ops_per_chain; - Run::global_mutex.unlock(); - - return root; -} - -Chain* -Run::reverse_mem_init( Chain *mem ) -{ - Chain* root = NULL; - Chain* prev = NULL; - int link_within_line = 0; - int64 local_ops_per_chain = 0; - - int stride = -this->exp->stride; - int last; - for (int i=0; i < this->exp->lines_per_chain; i += stride) { - last = i; - } - - for (int i=last; 0 <= i; i -= stride) { - int link = i * this->exp->links_per_line + link_within_line; - if (root == NULL) { -// printf("root = %d(%d)[0x%x].\n", page, line_within_page, mem+link); - prev = root = mem + link; - local_ops_per_chain += 1; - } else { -// printf("0x%x = %d(%d)[0x%x].\n", prev, page, line_within_page, mem+link); - prev->next = mem + link; - prev = prev->next; - local_ops_per_chain += 1; - } - } - - Run::global_mutex.lock(); - Run::_ops_per_chain = local_ops_per_chain; - Run::global_mutex.unlock(); - - return root; -} - -static int64 dumb_ck = 0; -void -mem_chk( Chain *m ) -{ - if (m == NULL) dumb_ck += 1; -} - -static void -chase_pointers( - int64 chains_per_thread, // memory loading per thread - int64 iterations, // number of iterations per experiment - Chain** root, // root(s) of the chain(s) to follow - int64 bytes_per_line, // ignored - int64 bytes_per_chain, // ignored - int64 stride // ignored -) -{ - // chase pointers - switch (chains_per_thread) { - default: - case 1: - for (int64 i=0; i < iterations; i++) { - Chain* a = root[0]; - while (a != NULL) { - a = a->next; - } - mem_chk( a ); - } - break; - case 2: - for (int64 i=0; i < iterations; i++) { - Chain* a = root[0]; - Chain* b = root[1]; - while (a != NULL) { - a = a->next; - b = b->next; - } - mem_chk( a ); - mem_chk( b ); - } - break; - case 3: - for (int64 i=0; i < iterations; i++) { - Chain* a = root[0]; - Chain* b = root[1]; - Chain* c = root[2]; - while (a != NULL) { - a = a->next; - b = b->next; - c = c->next; - } - mem_chk( a ); - mem_chk( b ); - mem_chk( c ); - } - break; - case 4: - for (int64 i=0; i < iterations; i++) { - Chain* a = root[0]; - Chain* b = root[1]; - Chain* c = root[2]; - Chain* d = root[3]; - while (a != NULL) { - a = a->next; - b = b->next; - c = c->next; - d = d->next; - } - mem_chk( a ); - mem_chk( b ); - mem_chk( c ); - mem_chk( d ); - } - break; - case 5: - for (int64 i=0; i < iterations; i++) { - Chain* a = root[0]; - Chain* b = root[1]; - Chain* c = root[2]; - Chain* d = root[3]; - Chain* e = root[4]; - while (a != NULL) { - a = a->next; - b = b->next; - c = c->next; - d = d->next; - e = e->next; - } - mem_chk( a ); - mem_chk( b ); - mem_chk( c ); - mem_chk( d ); - mem_chk( e ); - } - break; - case 6: - for (int64 i=0; i < iterations; i++) { - Chain* a = root[0]; - Chain* b = root[1]; - Chain* c = root[2]; - Chain* d = root[3]; - Chain* e = root[4]; - Chain* f = root[5]; - while (a != NULL) { - a = a->next; - b = b->next; - c = c->next; - d = d->next; - e = e->next; - f = f->next; - } - mem_chk( a ); - mem_chk( b ); - mem_chk( c ); - mem_chk( d ); - mem_chk( e ); - mem_chk( f ); - } - break; - case 7: - for (int64 i=0; i < iterations; i++) { - Chain* a = root[0]; - Chain* b = root[1]; - Chain* c = root[2]; - Chain* d = root[3]; - Chain* e = root[4]; - Chain* f = root[5]; - Chain* g = root[6]; - while (a != NULL) { - a = a->next; - b = b->next; - c = c->next; - d = d->next; - e = e->next; - f = f->next; - g = g->next; - } - mem_chk( a ); - mem_chk( b ); - mem_chk( c ); - mem_chk( d ); - mem_chk( e ); - mem_chk( f ); - mem_chk( g ); - } - break; - case 8: - for (int64 i=0; i < iterations; i++) { - Chain* a = root[0]; - Chain* b = root[1]; - Chain* c = root[2]; - Chain* d = root[3]; - Chain* e = root[4]; - Chain* f = root[5]; - Chain* g = root[6]; - Chain* h = root[7]; - while (a != NULL) { - a = a->next; - b = b->next; - c = c->next; - d = d->next; - e = e->next; - f = f->next; - g = g->next; - h = h->next; - } - mem_chk( a ); - mem_chk( b ); - mem_chk( c ); - mem_chk( d ); - mem_chk( e ); - mem_chk( f ); - mem_chk( g ); - mem_chk( h ); - } - break; - case 9: - for (int64 i=0; i < iterations; i++) { - Chain* a = root[0]; - Chain* b = root[1]; - Chain* c = root[2]; - Chain* d = root[3]; - Chain* e = root[4]; - Chain* f = root[5]; - Chain* g = root[6]; - Chain* h = root[7]; - Chain* j = root[8]; - while (a != NULL) { - a = a->next; - b = b->next; - c = c->next; - d = d->next; - e = e->next; - f = f->next; - g = g->next; - h = h->next; - j = j->next; - } - mem_chk( a ); - mem_chk( b ); - mem_chk( c ); - mem_chk( d ); - mem_chk( e ); - mem_chk( f ); - mem_chk( g ); - mem_chk( h ); - mem_chk( j ); - } - break; - case 10: - for (int64 i=0; i < iterations; i++) { - Chain* a = root[0]; - Chain* b = root[1]; - Chain* c = root[2]; - Chain* d = root[3]; - Chain* e = root[4]; - Chain* f = root[5]; - Chain* g = root[6]; - Chain* h = root[7]; - Chain* j = root[8]; - Chain* k = root[9]; - while (a != NULL) { - a = a->next; - b = b->next; - c = c->next; - d = d->next; - e = e->next; - f = f->next; - g = g->next; - h = h->next; - j = j->next; - k = k->next; - } - mem_chk( a ); - mem_chk( b ); - mem_chk( c ); - mem_chk( d ); - mem_chk( e ); - mem_chk( f ); - mem_chk( g ); - mem_chk( h ); - mem_chk( j ); - mem_chk( k ); - } - break; - case 11: - for (int64 i=0; i < iterations; i++) { - Chain* a = root[0]; - Chain* b = root[1]; - Chain* c = root[2]; - Chain* d = root[3]; - Chain* e = root[4]; - Chain* f = root[5]; - Chain* g = root[6]; - Chain* h = root[7]; - Chain* j = root[8]; - Chain* k = root[9]; - Chain* l = root[10]; - while (a != NULL) { - a = a->next; - b = b->next; - c = c->next; - d = d->next; - e = e->next; - f = f->next; - g = g->next; - h = h->next; - j = j->next; - k = k->next; - l = l->next; - } - mem_chk( a ); - mem_chk( b ); - mem_chk( c ); - mem_chk( d ); - mem_chk( e ); - mem_chk( f ); - mem_chk( g ); - mem_chk( h ); - mem_chk( j ); - mem_chk( k ); - mem_chk( l ); - } - break; - case 12: - for (int64 i=0; i < iterations; i++) { - Chain* a = root[0]; - Chain* b = root[1]; - Chain* c = root[2]; - Chain* d = root[3]; - Chain* e = root[4]; - Chain* f = root[5]; - Chain* g = root[6]; - Chain* h = root[7]; - Chain* j = root[8]; - Chain* k = root[9]; - Chain* l = root[10]; - Chain* m = root[11]; - while (a != NULL) { - a = a->next; - b = b->next; - c = c->next; - d = d->next; - e = e->next; - f = f->next; - g = g->next; - h = h->next; - j = j->next; - k = k->next; - l = l->next; - m = m->next; - } - mem_chk( a ); - mem_chk( b ); - mem_chk( c ); - mem_chk( d ); - mem_chk( e ); - mem_chk( f ); - mem_chk( g ); - mem_chk( h ); - mem_chk( j ); - mem_chk( k ); - mem_chk( l ); - mem_chk( m ); - } - break; - case 13: - for (int64 i=0; i < iterations; i++) { - Chain* a = root[0]; - Chain* b = root[1]; - Chain* c = root[2]; - Chain* d = root[3]; - Chain* e = root[4]; - Chain* f = root[5]; - Chain* g = root[6]; - Chain* h = root[7]; - Chain* j = root[8]; - Chain* k = root[9]; - Chain* l = root[10]; - Chain* m = root[11]; - Chain* n = root[12]; - while (a != NULL) { - a = a->next; - b = b->next; - c = c->next; - d = d->next; - e = e->next; - f = f->next; - g = g->next; - h = h->next; - j = j->next; - k = k->next; - l = l->next; - m = m->next; - n = n->next; - } - mem_chk( a ); - mem_chk( b ); - mem_chk( c ); - mem_chk( d ); - mem_chk( e ); - mem_chk( f ); - mem_chk( g ); - mem_chk( h ); - mem_chk( j ); - mem_chk( k ); - mem_chk( l ); - mem_chk( m ); - mem_chk( n ); - } - break; - case 14: - for (int64 i=0; i < iterations; i++) { - Chain* a = root[0]; - Chain* b = root[1]; - Chain* c = root[2]; - Chain* d = root[3]; - Chain* e = root[4]; - Chain* f = root[5]; - Chain* g = root[6]; - Chain* h = root[7]; - Chain* j = root[8]; - Chain* k = root[9]; - Chain* l = root[10]; - Chain* m = root[11]; - Chain* n = root[12]; - Chain* o = root[13]; - while (a != NULL) { - a = a->next; - b = b->next; - c = c->next; - d = d->next; - e = e->next; - f = f->next; - g = g->next; - h = h->next; - j = j->next; - k = k->next; - l = l->next; - m = m->next; - n = n->next; - o = o->next; - } - mem_chk( a ); - mem_chk( b ); - mem_chk( c ); - mem_chk( d ); - mem_chk( e ); - mem_chk( f ); - mem_chk( g ); - mem_chk( h ); - mem_chk( j ); - mem_chk( k ); - mem_chk( l ); - mem_chk( m ); - mem_chk( n ); - mem_chk( o ); - } - break; - case 15: - for (int64 i=0; i < iterations; i++) { - Chain* a = root[0]; - Chain* b = root[1]; - Chain* c = root[2]; - Chain* d = root[3]; - Chain* e = root[4]; - Chain* f = root[5]; - Chain* g = root[6]; - Chain* h = root[7]; - Chain* j = root[8]; - Chain* k = root[9]; - Chain* l = root[10]; - Chain* m = root[11]; - Chain* n = root[12]; - Chain* o = root[13]; - Chain* p = root[14]; - while (a != NULL) { - a = a->next; - b = b->next; - c = c->next; - d = d->next; - e = e->next; - f = f->next; - g = g->next; - h = h->next; - j = j->next; - k = k->next; - l = l->next; - m = m->next; - n = n->next; - o = o->next; - p = p->next; - } - mem_chk( a ); - mem_chk( b ); - mem_chk( c ); - mem_chk( d ); - mem_chk( e ); - mem_chk( f ); - mem_chk( g ); - mem_chk( h ); - mem_chk( j ); - mem_chk( k ); - mem_chk( l ); - mem_chk( m ); - mem_chk( n ); - mem_chk( o ); - mem_chk( p ); - } - break; - case 16: - for (int64 i=0; i < iterations; i++) { - Chain* a = root[0]; - Chain* b = root[1]; - Chain* c = root[2]; - Chain* d = root[3]; - Chain* e = root[4]; - Chain* f = root[5]; - Chain* g = root[6]; - Chain* h = root[7]; - Chain* j = root[8]; - Chain* k = root[9]; - Chain* l = root[10]; - Chain* m = root[11]; - Chain* n = root[12]; - Chain* o = root[13]; - Chain* p = root[14]; - Chain* q = root[15]; - while (a != NULL) { - a = a->next; - b = b->next; - c = c->next; - d = d->next; - e = e->next; - f = f->next; - g = g->next; - h = h->next; - j = j->next; - k = k->next; - l = l->next; - m = m->next; - n = n->next; - o = o->next; - p = p->next; - q = q->next; - } - mem_chk( a ); - mem_chk( b ); - mem_chk( c ); - mem_chk( d ); - mem_chk( e ); - mem_chk( f ); - mem_chk( g ); - mem_chk( h ); - mem_chk( j ); - mem_chk( k ); - mem_chk( l ); - mem_chk( m ); - mem_chk( n ); - mem_chk( o ); - mem_chk( p ); - mem_chk( q ); - } - } -} - - // NOT WRITTEN YET -- DMP - // JUST A PLACE HOLDER! -Chain* -Run::stream_mem_init( Chain *mem ) -{ -// fprintf(stderr, "made it into stream_mem_init.\n"); -// fprintf(stderr, "chains_per_thread = %ld\n", this->exp->chains_per_thread); -// fprintf(stderr, "iterations = %ld\n", this->exp->iterations); -// fprintf(stderr, "bytes_per_chain = %ld\n", this->exp->bytes_per_chain); -// fprintf(stderr, "stride = %ld\n", this->exp->stride); - int64 local_ops_per_chain = 0; - double* tmp = (double *) mem; - int64 refs_per_line = this->exp->bytes_per_line / sizeof(double); - int64 refs_per_chain = this->exp->bytes_per_chain / sizeof(double); -// fprintf(stderr, "refs_per_chain = %ld\n", refs_per_chain); - - for (int64 i=0; i < refs_per_chain; i += this->exp->stride*refs_per_line) { - tmp[i] = 0; - local_ops_per_chain += 1; - } - - Run::global_mutex.lock(); - Run::_ops_per_chain = local_ops_per_chain; - Run::global_mutex.unlock(); - -// fprintf(stderr, "made it out of stream_mem_init.\n"); - return mem; -} - -static int64 summ_ck = 0; -void -sum_chk( double t ) -{ - if (t != 0) summ_ck += 1; -} - - // NOT WRITTEN YET -- DMP - // JUST A PLACE HOLDER! -static void -follow_streams( - int64 chains_per_thread, // memory loading per thread - int64 iterations, // number of iterations per experiment - Chain** root, // root(s) of the chain(s) to follow - int64 bytes_per_line, // ignored - int64 bytes_per_chain, // ignored - int64 stride // ignored -) -{ - int64 refs_per_line = bytes_per_line / sizeof(double); - int64 refs_per_chain = bytes_per_chain / sizeof(double); - - // chase pointers - switch (chains_per_thread) { - default: - case 1: - for (int64 i=0; i < iterations; i++) { - double t = 0; - double* a0 = (double *) root[0]; - for (int64 j=0; j < refs_per_chain; j+=stride*refs_per_line) { - t += a0[j]; - } - sum_chk( t ); - } - break; - case 2: - for (int64 i=0; i < iterations; i++) { - double t = 0; - double* a0 = (double *) root[0]; - double* a1 = (double *) root[1]; - for (int64 j=0; j < refs_per_chain; j+=stride*refs_per_line) { - t += a0[j] + a1[j]; - } - sum_chk( t ); - } - break; - case 3: - for (int64 i=0; i < iterations; i++) { - double t = 0; - double* a0 = (double *) root[0]; - double* a1 = (double *) root[1]; - double* a2 = (double *) root[2]; - for (int64 j=0; j < refs_per_chain; j+=stride*refs_per_line) { - t += a0[j] + a1[j] + a2[j]; - } - sum_chk( t ); - } - break; - case 4: - for (int64 i=0; i < iterations; i++) { - double t = 0; - double* a0 = (double *) root[0]; - double* a1 = (double *) root[1]; - double* a2 = (double *) root[2]; - double* a3 = (double *) root[3]; - for (int64 j=0; j < refs_per_chain; j+=stride*refs_per_line) { - t += a0[j] + a1[j] + a2[j] + a3[j]; - } - sum_chk( t ); - } - break; - case 5: - for (int64 i=0; i < iterations; i++) { - double t = 0; - double* a0 = (double *) root[0]; - double* a1 = (double *) root[1]; - double* a2 = (double *) root[2]; - double* a3 = (double *) root[3]; - double* a4 = (double *) root[4]; - for (int64 j=0; j < refs_per_chain; j+=stride*refs_per_line) { - t += a0[j] + a1[j] + a2[j] + a3[j] + a4[j]; - } - sum_chk( t ); - } - break; - case 6: - for (int64 i=0; i < iterations; i++) { - double t = 0; - double* a0 = (double *) root[0]; - double* a1 = (double *) root[1]; - double* a2 = (double *) root[2]; - double* a3 = (double *) root[3]; - double* a4 = (double *) root[4]; - double* a5 = (double *) root[5]; - for (int64 j=0; j < refs_per_chain; j+=stride*refs_per_line) { - t += a0[j] + a1[j] + a2[j] + a3[j] + a4[j] + a5[j]; - } - sum_chk( t ); - } - break; - case 7: - for (int64 i=0; i < iterations; i++) { - double t = 0; - double* a0 = (double *) root[0]; - double* a1 = (double *) root[1]; - double* a2 = (double *) root[2]; - double* a3 = (double *) root[3]; - double* a4 = (double *) root[4]; - double* a5 = (double *) root[5]; - double* a6 = (double *) root[6]; - for (int64 j=0; j < refs_per_chain; j+=stride*refs_per_line) { - t += a0[j] + a1[j] + a2[j] + a3[j] + a4[j] + a5[j] + a6[j]; - } - sum_chk( t ); - } - break; - case 8: - for (int64 i=0; i < iterations; i++) { - double t = 0; - double* a0 = (double *) root[0]; - double* a1 = (double *) root[1]; - double* a2 = (double *) root[2]; - double* a3 = (double *) root[3]; - double* a4 = (double *) root[4]; - double* a5 = (double *) root[5]; - double* a6 = (double *) root[6]; - double* a7 = (double *) root[7]; - for (int64 j=0; j < refs_per_chain; j+=stride*refs_per_line) { - t += a0[j] + a1[j] + a2[j] + a3[j] + a4[j] + a5[j] + a6[j] + a7[j]; - } - sum_chk( t ); - } - break; - case 9: - for (int64 i=0; i < iterations; i++) { - double t = 0; - double* a0 = (double *) root[0]; - double* a1 = (double *) root[1]; - double* a2 = (double *) root[2]; - double* a3 = (double *) root[3]; - double* a4 = (double *) root[4]; - double* a5 = (double *) root[5]; - double* a6 = (double *) root[6]; - double* a7 = (double *) root[7]; - double* a8 = (double *) root[8]; - for (int64 j=0; j < refs_per_chain; j+=stride*refs_per_line) { - t += a0[j] + a1[j] + a2[j] + a3[j] + a4[j] + a5[j] + a6[j] + a7[j] + - a8[j]; - } - sum_chk( t ); - } - break; - case 10: - for (int64 i=0; i < iterations; i++) { - double t = 0; - double* a0 = (double *) root[0]; - double* a1 = (double *) root[1]; - double* a2 = (double *) root[2]; - double* a3 = (double *) root[3]; - double* a4 = (double *) root[4]; - double* a5 = (double *) root[5]; - double* a6 = (double *) root[6]; - double* a7 = (double *) root[7]; - double* a8 = (double *) root[8]; - double* a9 = (double *) root[9]; - for (int64 j=0; j < refs_per_chain; j+=stride*refs_per_line) { - t += a0[j] + a1[j] + a2[j] + a3[j] + a4[j] + a5[j] + a6[j] + a7[j] + - a8[j] + a9[j]; - } - sum_chk( t ); - } - break; - case 11: - for (int64 i=0; i < iterations; i++) { - double t = 0; - double* a0 = (double *) root[ 0]; - double* a1 = (double *) root[ 1]; - double* a2 = (double *) root[ 2]; - double* a3 = (double *) root[ 3]; - double* a4 = (double *) root[ 4]; - double* a5 = (double *) root[ 5]; - double* a6 = (double *) root[ 6]; - double* a7 = (double *) root[ 7]; - double* a8 = (double *) root[ 8]; - double* a9 = (double *) root[ 9]; - double* a10 = (double *) root[10]; - for (int64 j=0; j < refs_per_chain; j+=stride*refs_per_line) { - t += a0[j] + a1[j] + a2 [j] + a3[j] + a4[j] + a5[j] + a6[j] + a7[j] + - a8[j] + a9[j] + a10[j]; - } - sum_chk( t ); - } - break; - case 12: - for (int64 i=0; i < iterations; i++) { - double t = 0; - double* a0 = (double *) root[ 0]; - double* a1 = (double *) root[ 1]; - double* a2 = (double *) root[ 2]; - double* a3 = (double *) root[ 3]; - double* a4 = (double *) root[ 4]; - double* a5 = (double *) root[ 5]; - double* a6 = (double *) root[ 6]; - double* a7 = (double *) root[ 7]; - double* a8 = (double *) root[ 8]; - double* a9 = (double *) root[ 9]; - double* a10 = (double *) root[10]; - double* a11 = (double *) root[11]; - for (int64 j=0; j < refs_per_chain; j+=stride*refs_per_line) { - t += a0[j] + a1[j] + a2 [j] + a3 [j] + a4[j] + a5[j] + a6[j] + a7[j] + - a8[j] + a9[j] + a10[j] + a11[j]; - } - sum_chk( t ); - } - break; - case 13: - for (int64 i=0; i < iterations; i++) { - double t = 0; - double* a0 = (double *) root[ 0]; - double* a1 = (double *) root[ 1]; - double* a2 = (double *) root[ 2]; - double* a3 = (double *) root[ 3]; - double* a4 = (double *) root[ 4]; - double* a5 = (double *) root[ 5]; - double* a6 = (double *) root[ 6]; - double* a7 = (double *) root[ 7]; - double* a8 = (double *) root[ 8]; - double* a9 = (double *) root[ 9]; - double* a10 = (double *) root[10]; - double* a11 = (double *) root[11]; - double* a12 = (double *) root[12]; - for (int64 j=0; j < refs_per_chain; j+=stride*refs_per_line) { - t += a0[j] + a1[j] + a2 [j] + a3 [j] + a4 [j] + a5[j] + a6[j] + a7[j] + - a8[j] + a9[j] + a10[j] + a11[j] + a12[j]; - } - sum_chk( t ); - } - break; - case 14: - for (int64 i=0; i < iterations; i++) { - double t = 0; - double* a0 = (double *) root[ 0]; - double* a1 = (double *) root[ 1]; - double* a2 = (double *) root[ 2]; - double* a3 = (double *) root[ 3]; - double* a4 = (double *) root[ 4]; - double* a5 = (double *) root[ 5]; - double* a6 = (double *) root[ 6]; - double* a7 = (double *) root[ 7]; - double* a8 = (double *) root[ 8]; - double* a9 = (double *) root[ 9]; - double* a10 = (double *) root[10]; - double* a11 = (double *) root[11]; - double* a12 = (double *) root[12]; - double* a13 = (double *) root[13]; - for (int64 j=0; j < refs_per_chain; j+=stride*refs_per_line) { - t += a0[j] + a1[j] + a2 [j] + a3 [j] + a4 [j] + a5 [j] + a6[j] + a7[j] + - a8[j] + a9[j] + a10[j] + a11[j] + a12[j] + a13[j]; - } - sum_chk( t ); - } - break; - case 15: - for (int64 i=0; i < iterations; i++) { - double t = 0; - double* a0 = (double *) root[ 0]; - double* a1 = (double *) root[ 1]; - double* a2 = (double *) root[ 2]; - double* a3 = (double *) root[ 3]; - double* a4 = (double *) root[ 4]; - double* a5 = (double *) root[ 5]; - double* a6 = (double *) root[ 6]; - double* a7 = (double *) root[ 7]; - double* a8 = (double *) root[ 8]; - double* a9 = (double *) root[ 9]; - double* a10 = (double *) root[10]; - double* a11 = (double *) root[11]; - double* a12 = (double *) root[12]; - double* a13 = (double *) root[13]; - double* a14 = (double *) root[14]; - for (int64 j=0; j < refs_per_chain; j+=stride*refs_per_line) { - t += a0[j] + a1[j] + a2 [j] + a3 [j] + a4 [j] + a5 [j] + a6 [j] + a7[j] + - a8[j] + a9[j] + a10[j] + a11[j] + a12[j] + a13[j] + a14[j]; - } - sum_chk( t ); - } - break; - case 16: - for (int64 i=0; i < iterations; i++) { - double t = 0; - double* a0 = (double *) root[ 0]; - double* a1 = (double *) root[ 1]; - double* a2 = (double *) root[ 2]; - double* a3 = (double *) root[ 3]; - double* a4 = (double *) root[ 4]; - double* a5 = (double *) root[ 5]; - double* a6 = (double *) root[ 6]; - double* a7 = (double *) root[ 7]; - double* a8 = (double *) root[ 8]; - double* a9 = (double *) root[ 9]; - double* a10 = (double *) root[10]; - double* a11 = (double *) root[11]; - double* a12 = (double *) root[12]; - double* a13 = (double *) root[13]; - double* a14 = (double *) root[14]; - double* a15 = (double *) root[15]; - for (int64 j=0; j < refs_per_chain; j+=stride*refs_per_line) { - t += a0[j] + a1[j] + a2 [j] + a3 [j] + a4 [j] + a5 [j] + a6 [j] + a7 [j] + - a8[j] + a9[j] + a10[j] + a11[j] + a12[j] + a13[j] + a14[j] + a15[j]; - } - sum_chk( t ); - } - break; - } -} |