diff options
author | Doug Pase <douglas@pase.us> | 2008-03-05 00:00:00 +0000 |
---|---|---|
committer | Tim Besard <tim.besard@gmail.com> | 2011-10-27 16:18:30 +0200 |
commit | a52db2ab61b21fe7721419747b96e1689c9069a0 (patch) | |
tree | d23d4e442fda7504496ac25fb3f0901ca97a8316 | |
parent | 538302b72dcbe0b74acdfe3903a45193c7288d4a (diff) |
Latest upstream version.
-rw-r--r-- | Chain.C | 2 | ||||
-rw-r--r-- | Chain.h | 2 | ||||
-rw-r--r-- | Experiment.C | 11 | ||||
-rw-r--r-- | Experiment.h | 4 | ||||
-rw-r--r-- | Lock.C | 2 | ||||
-rw-r--r-- | Lock.h | 2 | ||||
-rw-r--r-- | Main.C | 2 | ||||
-rw-r--r-- | Main.h | 2 | ||||
-rw-r--r-- | Makefile | 38 | ||||
-rw-r--r-- | Output.C | 62 | ||||
-rw-r--r-- | Output.h | 2 | ||||
-rw-r--r-- | Run.C | 412 | ||||
-rw-r--r-- | Run.h | 3 | ||||
-rw-r--r-- | SpinBarrier.C | 2 | ||||
-rw-r--r-- | SpinBarrier.h | 2 | ||||
-rw-r--r-- | Thread.C | 2 | ||||
-rw-r--r-- | Thread.h | 2 | ||||
-rw-r--r-- | Timer.C | 2 | ||||
-rw-r--r-- | Timer.h | 2 | ||||
-rw-r--r-- | Types.C | 2 | ||||
-rw-r--r-- | Types.h | 2 | ||||
-rwxr-xr-x | pChase.sh | 6 | ||||
-rwxr-xr-x | pChase64_NUMA | bin | 50553 -> 0 bytes | |||
-rwxr-xr-x | pChase64_SMP | bin | 50417 -> 0 bytes |
24 files changed, 469 insertions, 97 deletions
@@ -6,7 +6,7 @@ * http://www.opensource.org/licenses/cpl1.0.php * * * * Contributors: * - * Douglas M. pase - initial API and implementation * + * Douglas M. Pase - initial API and implementation * *******************************************************************************/ @@ -6,7 +6,7 @@ * http://www.opensource.org/licenses/cpl1.0.php * * * * Contributors: * - * Douglas M. pase - initial API and implementation * + * Douglas M. Pase - initial API and implementation * *******************************************************************************/ diff --git a/Experiment.C b/Experiment.C index 9c73576..75b1cab 100644 --- a/Experiment.C +++ b/Experiment.C @@ -6,7 +6,7 @@ * http://www.opensource.org/licenses/cpl1.0.php * * * * Contributors: * - * Douglas M. pase - initial API and implementation * + * Douglas M. Pase - initial API and implementation * *******************************************************************************/ @@ -147,6 +147,12 @@ Experiment::parse_args(int argc, char* argv[]) if (i == argc) { error = 1; break; } this->stride = - Experiment::parse_number(argv[i]); if (this->stride == 0) { error = 1; break; } + } else if (strcasecmp(argv[i], "stream") == 0) { + this->access_pattern = STREAM; + i++; + if (i == argc) { error = 1; break; } + this->stride = Experiment::parse_number(argv[i]); + if (this->stride == 0) { error = 1; break; } } else { error = 1; break; @@ -221,6 +227,7 @@ Experiment::parse_args(int argc, char* argv[]) printf(" random # all chains are accessed randomly\n"); printf(" forward <stride> # chains are in forward order with constant stride\n"); printf(" reverse <stride> # chains are in reverse order with constant stride\n"); + printf(" stream <stride> # references are calculated rather than read from memory\n"); printf("\n"); printf("Note: <stride> is always a small positive integer.\n"); printf("\n"); @@ -559,6 +566,8 @@ Experiment::access() result = "forward"; } else if (this->access_pattern == STRIDED && this->stride < 0) { result = "reverse"; + } else if (this->access_pattern == STREAM) { + result = "stream"; } return result; diff --git a/Experiment.h b/Experiment.h index 5459949..2c749d3 100644 --- a/Experiment.h +++ b/Experiment.h @@ -6,7 +6,7 @@ * http://www.opensource.org/licenses/cpl1.0.php * * * * Contributors: * - * Douglas M. pase - initial API and implementation * + * Douglas M. Pase - initial API and implementation * *******************************************************************************/ @@ -51,7 +51,7 @@ public: enum { CSV, BOTH, HEADER, TABLE } output_mode; // results output mode - enum { RANDOM, STRIDED } + enum { RANDOM, STRIDED, STREAM } access_pattern; // memory access pattern int64 stride; @@ -6,7 +6,7 @@ * http://www.opensource.org/licenses/cpl1.0.php * * * * Contributors: * - * Douglas M. pase - initial API and implementation * + * Douglas M. Pase - initial API and implementation * *******************************************************************************/ @@ -6,7 +6,7 @@ * http://www.opensource.org/licenses/cpl1.0.php * * * * Contributors: * - * Douglas M. pase - initial API and implementation * + * Douglas M. Pase - initial API and implementation * *******************************************************************************/ @@ -6,7 +6,7 @@ * http://www.opensource.org/licenses/cpl1.0.php * * * * Contributors: * - * Douglas M. pase - initial API and implementation * + * Douglas M. Pase - initial API and implementation * *******************************************************************************/ @@ -6,7 +6,7 @@ * http://www.opensource.org/licenses/cpl1.0.php * * * * Contributors: * - * Douglas M. pase - initial API and implementation * + * Douglas M. Pase - initial API and implementation * *******************************************************************************/ @@ -1,42 +1,34 @@ -# MODE=NUMA make -j # # BIT = { 32 | 64 } # MODE = { NUMA | SMP } # -# BIT = 64 -# MODE = NUMA -# MODE = SMP - -ifneq ($(BIT), 64) -ifneq ($(BIT), 32) +ifndef BIT BIT = 64 endif +ifndef MODE +MODE = NUMA endif - -ifneq ($(MODE), NUMA) -ifneq ($(MODE), SMP) -MODE = SMP -endif -endif - ifeq ($(MODE), NUMA) LIB = -lpthread -lnuma -endif - -ifeq ($(MODE), SMP) -LIB = -lpthread +else +LIB = -lpthread endif SRC = Main.C Chain.C Experiment.C Lock.C Output.C Run.C SpinBarrier.C Timer.C Thread.C Types.C HDR = $(SRC:.C=.h) OBJ = $(SRC:.C=.o) EXE = pChase$(BIT)_$(MODE) +HYPDIR = /web/hypercomputing.org/www/doc/Guest/pChase +PCHDIR = /web/pchase.org/www/doc/Guest/pChase +TARFILE = tgz/pChase-`date +"%Y-%m-%d"`.tgz RM = /bin/rm MV = /bin/mv CI = /usr/bin/ci CO = /usr/bin/co +CP = /bin/cp +TAR = /bin/tar CXXFLAGS= -O3 -m$(BIT) -D$(MODE) @@ -59,3 +51,13 @@ ci: co: $(CO) -l $(SRC) $(HDR) Makefile + +tar: + $(TAR) -cvzf $(TARFILE) $(SRC) $(HDR) Makefile License.htm License.txt pChase.sh run-pChase.sh + +cptar: + $(TAR) -cvzf $(TARFILE) $(SRC) $(HDR) Makefile License.htm License.txt pChase.sh run-pChase.sh + $(CP) $(TARFILE) $(HYPDIR)/tgz + $(CP) $(SRC) $(HDR) Makefile License.htm License.txt pChase.sh run-pChase.sh $(HYPDIR) + $(CP) $(TARFILE) $(PCHDIR)/tgz + $(CP) $(SRC) $(HDR) Makefile License.htm License.txt pChase.sh run-pChase.sh $(PCHDIR) @@ -6,7 +6,7 @@ * http://www.opensource.org/licenses/cpl1.0.php * * * * Contributors: * - * Douglas M. pase - initial API and implementation * + * Douglas M. Pase - initial API and implementation * *******************************************************************************/ @@ -68,21 +68,21 @@ Output::header( Experiment &e, int64 ops, double secs, double ck_res ) void Output::csv( Experiment &e, int64 ops, double secs, double ck_res ) { - printf("%d,", e.pointer_size); - printf("%d,", e.bytes_per_line); - printf("%d,", e.bytes_per_page); - printf("%d,", e.bytes_per_chain); - printf("%d,", e.bytes_per_thread); - printf("%d,", e.bytes_per_test); - printf("%d,", e.chains_per_thread); - printf("%d,", e.num_threads); - printf("%d,", e.iterations); - printf("%d,", e.experiments); + printf("%ld,", e.pointer_size); + printf("%ld,", e.bytes_per_line); + printf("%ld,", e.bytes_per_page); + printf("%ld,", e.bytes_per_chain); + printf("%ld,", e.bytes_per_thread); + printf("%ld,", e.bytes_per_test); + printf("%lld,", e.chains_per_thread); + printf("%ld,", e.num_threads); + printf("%ld,", e.iterations); + printf("%ld,", e.experiments); printf("%s,", e.access()); - printf("%d,", e.stride); + printf("%ld,", e.stride); printf("%s,", e.placement()); - printf("%d,", e.offset_or_mask); - printf("%d,", e.num_numa_domains); + printf("%ld,", e.offset_or_mask); + printf("%ld,", e.num_numa_domains); printf("\""); printf("%d:", e.thread_domain[0]); printf("%d", e.chain_domain[0][0]); @@ -97,8 +97,8 @@ Output::csv( Experiment &e, int64 ops, double secs, double ck_res ) } } printf("\","); - printf("%d,", ops); - printf("%d,", ops * e.chains_per_thread * e.num_threads); + printf("%ld,", ops); + printf("%ld,", ops * e.chains_per_thread * e.num_threads); printf("%.3f,", secs); printf("%.0f,", secs/ck_res); printf("%.2f,", ck_res * 1E9); @@ -111,21 +111,21 @@ Output::csv( Experiment &e, int64 ops, double secs, double ck_res ) void Output::table( Experiment &e, int64 ops, double secs, double ck_res ) { - printf("pointer size = %d (bytes)\n", e.pointer_size); - printf("cache line size = %d (bytes)\n", e.bytes_per_line); - printf("page size = %d (bytes)\n", e.bytes_per_page); - printf("chain size = %d (bytes)\n", e.bytes_per_chain); - printf("thread size = %d (bytes)\n", e.bytes_per_thread); - printf("test size = %d (bytes)\n", e.bytes_per_test); - printf("chains per thread = %d\n", e.chains_per_thread); - printf("number of threads = %d\n", e.num_threads); - printf("iterations = %d\n", e.iterations); - printf("experiments = %d\n", e.experiments); + printf("pointer size = %ld (bytes)\n", e.pointer_size); + printf("cache line size = %ld (bytes)\n", e.bytes_per_line); + printf("page size = %ld (bytes)\n", e.bytes_per_page); + printf("chain size = %ld (bytes)\n", e.bytes_per_chain); + printf("thread size = %ld (bytes)\n", e.bytes_per_thread); + printf("test size = %ld (bytes)\n", e.bytes_per_test); + printf("chains per thread = %ld\n", e.chains_per_thread); + printf("number of threads = %ld\n", e.num_threads); + printf("iterations = %ld\n", e.iterations); + printf("experiments = %ld\n", e.experiments); printf("access pattern = %s\n", e.access()); - printf("stride = %d\n", e.stride); + printf("stride = %ld\n", e.stride); printf("numa placement = %s\n", e.placement()); - printf("offset or mask = %d\n", e.offset_or_mask); - printf("numa domains = %d\n", e.num_numa_domains); + printf("offset or mask = %ld\n", e.offset_or_mask); + printf("numa domains = %ld\n", e.num_numa_domains); printf("domain map = "); printf("\""); printf("%d:", e.thread_domain[0]); @@ -141,8 +141,8 @@ Output::table( Experiment &e, int64 ops, double secs, double ck_res ) } } printf("\"\n"); - printf("operations per chain = %d\n", ops); - printf("total operations = %d\n", ops * e.chains_per_thread * e.num_threads); + printf("operations per chain = %ld\n", ops); + printf("total operations = %ld\n", ops * e.chains_per_thread * e.num_threads); printf("elapsed time = %.3f (seconds)\n", secs); printf("elapsed time = %.0f (timer ticks)\n", secs/ck_res); printf("clock resolution = %.2f (ns)\n", ck_res * 1E9); @@ -6,7 +6,7 @@ * http://www.opensource.org/licenses/cpl1.0.php * * * * Contributors: * - * Douglas M. pase - initial API and implementation * + * Douglas M. Pase - initial API and implementation * *******************************************************************************/ @@ -6,7 +6,7 @@ * http://www.opensource.org/licenses/cpl1.0.php * * * * Contributors: * - * Douglas M. pase - initial API and implementation * + * Douglas M. Pase - initial API and implementation * *******************************************************************************/ @@ -27,7 +27,9 @@ static double max( double v1, double v2 ); static double min( double v1, double v2 ); -static void chase_pointers(int64 chains_per_thread, int64 iterations, Chain** root); +static void chase_pointers(int64 chains_per_thread, int64 iterations, Chain** root, int64 bytes_per_line, int64 bytes_per_chain, int64 stride); +static void follow_streams(int64 chains_per_thread, int64 iterations, Chain** root, int64 bytes_per_line, int64 bytes_per_chain, int64 stride); +static void (*run_benchmark)(int64 chains_per_thread, int64 iterations, Chain** root, int64 bytes_per_line, int64 bytes_per_chain, int64 stride) = chase_pointers; Lock Run::global_mutex; int64 Run::_ops_per_chain = 0; @@ -82,13 +84,23 @@ Run::run() } #endif + // initialize the chains and + // select the function that + // will execute the tests for (int i=0; i < this->exp->chains_per_thread; i++) { if (this->exp->access_pattern == Experiment::RANDOM) { root[i] = random_mem_init( chain_memory[i] ); - } else if (0 < this->exp->stride) { - root[i] = forward_mem_init( chain_memory[i] ); - } else { - root[i] = reverse_mem_init( chain_memory[i] ); + run_benchmark = chase_pointers; + } else if (this->exp->access_pattern == Experiment::STRIDED) { + if (0 < this->exp->stride) { + root[i] = forward_mem_init( chain_memory[i] ); + } else { + root[i] = reverse_mem_init( chain_memory[i] ); + } + run_benchmark = chase_pointers; + } else if (this->exp->access_pattern == Experiment::STREAM) { + root[i] = stream_mem_init( chain_memory[i] ); + run_benchmark = follow_streams; } } @@ -97,7 +109,7 @@ Run::run() volatile static double istop = 0; volatile static double elapsed = 0; volatile static int64 iters = 1; - volatile static double bound = max(0.2, 10 * Timer::resolution()); + volatile double bound = max(0.2, 10 * Timer::resolution()); for (iters=1; elapsed <= bound; iters=iters<<1) { this->bp->barrier(); @@ -108,7 +120,7 @@ Run::run() this->bp->barrier(); // chase pointers - chase_pointers(this->exp->chains_per_thread, iters, root); + run_benchmark(this->exp->chains_per_thread, iters, root, this->exp->bytes_per_line, this->exp->bytes_per_chain, this->exp->stride); // barrier this->bp->barrier(); @@ -144,7 +156,7 @@ Run::run() this->bp->barrier(); // chase pointers - chase_pointers(this->exp->chains_per_thread, this->exp->iterations, root); + run_benchmark(this->exp->chains_per_thread, this->exp->iterations, root, this->exp->bytes_per_line, this->exp->bytes_per_chain, this->exp->stride); // barrier this->bp->barrier(); @@ -333,14 +345,17 @@ static void chase_pointers( int64 chains_per_thread, // memory loading per thread int64 iterations, // number of iterations per experiment - Chain** root // + Chain** root, // root(s) of the chain(s) to follow + int64 bytes_per_line, // ignored + int64 bytes_per_chain, // ignored + int64 stride // ignored ) { // chase pointers switch (chains_per_thread) { default: case 1: - for (int i=0; i < iterations; i++) { + for (int64 i=0; i < iterations; i++) { Chain* a = root[0]; while (a != NULL) { a = a->next; @@ -349,7 +364,7 @@ chase_pointers( } break; case 2: - for (int i=0; i < iterations; i++) { + for (int64 i=0; i < iterations; i++) { Chain* a = root[0]; Chain* b = root[1]; while (a != NULL) { @@ -361,7 +376,7 @@ chase_pointers( } break; case 3: - for (int i=0; i < iterations; i++) { + for (int64 i=0; i < iterations; i++) { Chain* a = root[0]; Chain* b = root[1]; Chain* c = root[2]; @@ -376,7 +391,7 @@ chase_pointers( } break; case 4: - for (int i=0; i < iterations; i++) { + for (int64 i=0; i < iterations; i++) { Chain* a = root[0]; Chain* b = root[1]; Chain* c = root[2]; @@ -394,7 +409,7 @@ chase_pointers( } break; case 5: - for (int i=0; i < iterations; i++) { + for (int64 i=0; i < iterations; i++) { Chain* a = root[0]; Chain* b = root[1]; Chain* c = root[2]; @@ -415,7 +430,7 @@ chase_pointers( } break; case 6: - for (int i=0; i < iterations; i++) { + for (int64 i=0; i < iterations; i++) { Chain* a = root[0]; Chain* b = root[1]; Chain* c = root[2]; @@ -439,7 +454,7 @@ chase_pointers( } break; case 7: - for (int i=0; i < iterations; i++) { + for (int64 i=0; i < iterations; i++) { Chain* a = root[0]; Chain* b = root[1]; Chain* c = root[2]; @@ -466,7 +481,7 @@ chase_pointers( } break; case 8: - for (int i=0; i < iterations; i++) { + for (int64 i=0; i < iterations; i++) { Chain* a = root[0]; Chain* b = root[1]; Chain* c = root[2]; @@ -496,7 +511,7 @@ chase_pointers( } break; case 9: - for (int i=0; i < iterations; i++) { + for (int64 i=0; i < iterations; i++) { Chain* a = root[0]; Chain* b = root[1]; Chain* c = root[2]; @@ -529,7 +544,7 @@ chase_pointers( } break; case 10: - for (int i=0; i < iterations; i++) { + for (int64 i=0; i < iterations; i++) { Chain* a = root[0]; Chain* b = root[1]; Chain* c = root[2]; @@ -565,7 +580,7 @@ chase_pointers( } break; case 11: - for (int i=0; i < iterations; i++) { + for (int64 i=0; i < iterations; i++) { Chain* a = root[0]; Chain* b = root[1]; Chain* c = root[2]; @@ -604,7 +619,7 @@ chase_pointers( } break; case 12: - for (int i=0; i < iterations; i++) { + for (int64 i=0; i < iterations; i++) { Chain* a = root[0]; Chain* b = root[1]; Chain* c = root[2]; @@ -646,7 +661,7 @@ chase_pointers( } break; case 13: - for (int i=0; i < iterations; i++) { + for (int64 i=0; i < iterations; i++) { Chain* a = root[0]; Chain* b = root[1]; Chain* c = root[2]; @@ -691,7 +706,7 @@ chase_pointers( } break; case 14: - for (int i=0; i < iterations; i++) { + for (int64 i=0; i < iterations; i++) { Chain* a = root[0]; Chain* b = root[1]; Chain* c = root[2]; @@ -739,7 +754,7 @@ chase_pointers( } break; case 15: - for (int i=0; i < iterations; i++) { + for (int64 i=0; i < iterations; i++) { Chain* a = root[0]; Chain* b = root[1]; Chain* c = root[2]; @@ -790,7 +805,7 @@ chase_pointers( } break; case 16: - for (int i=0; i < iterations; i++) { + for (int64 i=0; i < iterations; i++) { Chain* a = root[0]; Chain* b = root[1]; Chain* c = root[2]; @@ -844,3 +859,348 @@ chase_pointers( } } } + + // NOT WRITTEN YET -- DMP + // JUST A PLACE HOLDER! +Chain* +Run::stream_mem_init( Chain *mem ) +{ +// fprintf(stderr, "made it into stream_mem_init.\n"); +// fprintf(stderr, "chains_per_thread = %ld\n", this->exp->chains_per_thread); +// fprintf(stderr, "iterations = %ld\n", this->exp->iterations); +// fprintf(stderr, "bytes_per_chain = %ld\n", this->exp->bytes_per_chain); +// fprintf(stderr, "stride = %ld\n", this->exp->stride); + int64 local_ops_per_chain = 0; + double* tmp = (double *) mem; + int64 refs_per_line = this->exp->bytes_per_line / sizeof(double); + int64 refs_per_chain = this->exp->bytes_per_chain / sizeof(double); +// fprintf(stderr, "refs_per_chain = %ld\n", refs_per_chain); + + for (int64 i=0; i < refs_per_chain; i += this->exp->stride*refs_per_line) { + tmp[i] = 0; + local_ops_per_chain += 1; + } + + Run::global_mutex.lock(); + Run::_ops_per_chain = local_ops_per_chain; + Run::global_mutex.unlock(); + +// fprintf(stderr, "made it out of stream_mem_init.\n"); + return mem; +} + +static int64 summ_ck = 0; +void +sum_chk( double t ) +{ + if (t != 0) summ_ck += 1; +} + + // NOT WRITTEN YET -- DMP + // JUST A PLACE HOLDER! +static void +follow_streams( + int64 chains_per_thread, // memory loading per thread + int64 iterations, // number of iterations per experiment + Chain** root, // root(s) of the chain(s) to follow + int64 bytes_per_line, // ignored + int64 bytes_per_chain, // ignored + int64 stride // ignored +) +{ + int64 refs_per_line = bytes_per_line / sizeof(double); + int64 refs_per_chain = bytes_per_chain / sizeof(double); + + // chase pointers + switch (chains_per_thread) { + default: + case 1: + for (int64 i=0; i < iterations; i++) { + double t = 0; + double* a0 = (double *) root[0]; + for (int64 j=0; j < refs_per_chain; j+=stride*refs_per_line) { + t += a0[j]; + } + sum_chk( t ); + } + break; + case 2: + for (int64 i=0; i < iterations; i++) { + double t = 0; + double* a0 = (double *) root[0]; + double* a1 = (double *) root[1]; + for (int64 j=0; j < refs_per_chain; j+=stride*refs_per_line) { + t += a0[j] + a1[j]; + } + sum_chk( t ); + } + break; + case 3: + for (int64 i=0; i < iterations; i++) { + double t = 0; + double* a0 = (double *) root[0]; + double* a1 = (double *) root[1]; + double* a2 = (double *) root[2]; + for (int64 j=0; j < refs_per_chain; j+=stride*refs_per_line) { + t += a0[j] + a1[j] + a2[j]; + } + sum_chk( t ); + } + break; + case 4: + for (int64 i=0; i < iterations; i++) { + double t = 0; + double* a0 = (double *) root[0]; + double* a1 = (double *) root[1]; + double* a2 = (double *) root[2]; + double* a3 = (double *) root[3]; + for (int64 j=0; j < refs_per_chain; j+=stride*refs_per_line) { + t += a0[j] + a1[j] + a2[j] + a3[j]; + } + sum_chk( t ); + } + break; + case 5: + for (int64 i=0; i < iterations; i++) { + double t = 0; + double* a0 = (double *) root[0]; + double* a1 = (double *) root[1]; + double* a2 = (double *) root[2]; + double* a3 = (double *) root[3]; + double* a4 = (double *) root[4]; + for (int64 j=0; j < refs_per_chain; j+=stride*refs_per_line) { + t += a0[j] + a1[j] + a2[j] + a3[j] + a4[j]; + } + sum_chk( t ); + } + break; + case 6: + for (int64 i=0; i < iterations; i++) { + double t = 0; + double* a0 = (double *) root[0]; + double* a1 = (double *) root[1]; + double* a2 = (double *) root[2]; + double* a3 = (double *) root[3]; + double* a4 = (double *) root[4]; + double* a5 = (double *) root[5]; + for (int64 j=0; j < refs_per_chain; j+=stride*refs_per_line) { + t += a0[j] + a1[j] + a2[j] + a3[j] + a4[j] + a5[j]; + } + sum_chk( t ); + } + break; + case 7: + for (int64 i=0; i < iterations; i++) { + double t = 0; + double* a0 = (double *) root[0]; + double* a1 = (double *) root[1]; + double* a2 = (double *) root[2]; + double* a3 = (double *) root[3]; + double* a4 = (double *) root[4]; + double* a5 = (double *) root[5]; + double* a6 = (double *) root[6]; + for (int64 j=0; j < refs_per_chain; j+=stride*refs_per_line) { + t += a0[j] + a1[j] + a2[j] + a3[j] + a4[j] + a5[j] + a6[j]; + } + sum_chk( t ); + } + break; + case 8: + for (int64 i=0; i < iterations; i++) { + double t = 0; + double* a0 = (double *) root[0]; + double* a1 = (double *) root[1]; + double* a2 = (double *) root[2]; + double* a3 = (double *) root[3]; + double* a4 = (double *) root[4]; + double* a5 = (double *) root[5]; + double* a6 = (double *) root[6]; + double* a7 = (double *) root[7]; + for (int64 j=0; j < refs_per_chain; j+=stride*refs_per_line) { + t += a0[j] + a1[j] + a2[j] + a3[j] + a4[j] + a5[j] + a6[j] + a7[j]; + } + sum_chk( t ); + } + break; + case 9: + for (int64 i=0; i < iterations; i++) { + double t = 0; + double* a0 = (double *) root[0]; + double* a1 = (double *) root[1]; + double* a2 = (double *) root[2]; + double* a3 = (double *) root[3]; + double* a4 = (double *) root[4]; + double* a5 = (double *) root[5]; + double* a6 = (double *) root[6]; + double* a7 = (double *) root[7]; + double* a8 = (double *) root[8]; + for (int64 j=0; j < refs_per_chain; j+=stride*refs_per_line) { + t += a0[j] + a1[j] + a2[j] + a3[j] + a4[j] + a5[j] + a6[j] + a7[j] + + a8[j]; + } + sum_chk( t ); + } + break; + case 10: + for (int64 i=0; i < iterations; i++) { + double t = 0; + double* a0 = (double *) root[0]; + double* a1 = (double *) root[1]; + double* a2 = (double *) root[2]; + double* a3 = (double *) root[3]; + double* a4 = (double *) root[4]; + double* a5 = (double *) root[5]; + double* a6 = (double *) root[6]; + double* a7 = (double *) root[7]; + double* a8 = (double *) root[8]; + double* a9 = (double *) root[9]; + for (int64 j=0; j < refs_per_chain; j+=stride*refs_per_line) { + t += a0[j] + a1[j] + a2[j] + a3[j] + a4[j] + a5[j] + a6[j] + a7[j] + + a8[j] + a9[j]; + } + sum_chk( t ); + } + break; + case 11: + for (int64 i=0; i < iterations; i++) { + double t = 0; + double* a0 = (double *) root[ 0]; + double* a1 = (double *) root[ 1]; + double* a2 = (double *) root[ 2]; + double* a3 = (double *) root[ 3]; + double* a4 = (double *) root[ 4]; + double* a5 = (double *) root[ 5]; + double* a6 = (double *) root[ 6]; + double* a7 = (double *) root[ 7]; + double* a8 = (double *) root[ 8]; + double* a9 = (double *) root[ 9]; + double* a10 = (double *) root[10]; + for (int64 j=0; j < refs_per_chain; j+=stride*refs_per_line) { + t += a0[j] + a1[j] + a2 [j] + a3[j] + a4[j] + a5[j] + a6[j] + a7[j] + + a8[j] + a9[j] + a10[j]; + } + sum_chk( t ); + } + break; + case 12: + for (int64 i=0; i < iterations; i++) { + double t = 0; + double* a0 = (double *) root[ 0]; + double* a1 = (double *) root[ 1]; + double* a2 = (double *) root[ 2]; + double* a3 = (double *) root[ 3]; + double* a4 = (double *) root[ 4]; + double* a5 = (double *) root[ 5]; + double* a6 = (double *) root[ 6]; + double* a7 = (double *) root[ 7]; + double* a8 = (double *) root[ 8]; + double* a9 = (double *) root[ 9]; + double* a10 = (double *) root[10]; + double* a11 = (double *) root[11]; + for (int64 j=0; j < refs_per_chain; j+=stride*refs_per_line) { + t += a0[j] + a1[j] + a2 [j] + a3 [j] + a4[j] + a5[j] + a6[j] + a7[j] + + a8[j] + a9[j] + a10[j] + a11[j]; + } + sum_chk( t ); + } + break; + case 13: + for (int64 i=0; i < iterations; i++) { + double t = 0; + double* a0 = (double *) root[ 0]; + double* a1 = (double *) root[ 1]; + double* a2 = (double *) root[ 2]; + double* a3 = (double *) root[ 3]; + double* a4 = (double *) root[ 4]; + double* a5 = (double *) root[ 5]; + double* a6 = (double *) root[ 6]; + double* a7 = (double *) root[ 7]; + double* a8 = (double *) root[ 8]; + double* a9 = (double *) root[ 9]; + double* a10 = (double *) root[10]; + double* a11 = (double *) root[11]; + double* a12 = (double *) root[12]; + for (int64 j=0; j < refs_per_chain; j+=stride*refs_per_line) { + t += a0[j] + a1[j] + a2 [j] + a3 [j] + a4 [j] + a5[j] + a6[j] + a7[j] + + a8[j] + a9[j] + a10[j] + a11[j] + a12[j]; + } + sum_chk( t ); + } + break; + case 14: + for (int64 i=0; i < iterations; i++) { + double t = 0; + double* a0 = (double *) root[ 0]; + double* a1 = (double *) root[ 1]; + double* a2 = (double *) root[ 2]; + double* a3 = (double *) root[ 3]; + double* a4 = (double *) root[ 4]; + double* a5 = (double *) root[ 5]; + double* a6 = (double *) root[ 6]; + double* a7 = (double *) root[ 7]; + double* a8 = (double *) root[ 8]; + double* a9 = (double *) root[ 9]; + double* a10 = (double *) root[10]; + double* a11 = (double *) root[11]; + double* a12 = (double *) root[12]; + double* a13 = (double *) root[13]; + for (int64 j=0; j < refs_per_chain; j+=stride*refs_per_line) { + t += a0[j] + a1[j] + a2 [j] + a3 [j] + a4 [j] + a5 [j] + a6[j] + a7[j] + + a8[j] + a9[j] + a10[j] + a11[j] + a12[j] + a13[j]; + } + sum_chk( t ); + } + break; + case 15: + for (int64 i=0; i < iterations; i++) { + double t = 0; + double* a0 = (double *) root[ 0]; + double* a1 = (double *) root[ 1]; + double* a2 = (double *) root[ 2]; + double* a3 = (double *) root[ 3]; + double* a4 = (double *) root[ 4]; + double* a5 = (double *) root[ 5]; + double* a6 = (double *) root[ 6]; + double* a7 = (double *) root[ 7]; + double* a8 = (double *) root[ 8]; + double* a9 = (double *) root[ 9]; + double* a10 = (double *) root[10]; + double* a11 = (double *) root[11]; + double* a12 = (double *) root[12]; + double* a13 = (double *) root[13]; + double* a14 = (double *) root[14]; + for (int64 j=0; j < refs_per_chain; j+=stride*refs_per_line) { + t += a0[j] + a1[j] + a2 [j] + a3 [j] + a4 [j] + a5 [j] + a6 [j] + a7[j] + + a8[j] + a9[j] + a10[j] + a11[j] + a12[j] + a13[j] + a14[j]; + } + sum_chk( t ); + } + break; + case 16: + for (int64 i=0; i < iterations; i++) { + double t = 0; + double* a0 = (double *) root[ 0]; + double* a1 = (double *) root[ 1]; + double* a2 = (double *) root[ 2]; + double* a3 = (double *) root[ 3]; + double* a4 = (double *) root[ 4]; + double* a5 = (double *) root[ 5]; + double* a6 = (double *) root[ 6]; + double* a7 = (double *) root[ 7]; + double* a8 = (double *) root[ 8]; + double* a9 = (double *) root[ 9]; + double* a10 = (double *) root[10]; + double* a11 = (double *) root[11]; + double* a12 = (double *) root[12]; + double* a13 = (double *) root[13]; + double* a14 = (double *) root[14]; + double* a15 = (double *) root[15]; + for (int64 j=0; j < refs_per_chain; j+=stride*refs_per_line) { + t += a0[j] + a1[j] + a2 [j] + a3 [j] + a4 [j] + a5 [j] + a6 [j] + a7 [j] + + a8[j] + a9[j] + a10[j] + a11[j] + a12[j] + a13[j] + a14[j] + a15[j]; + } + sum_chk( t ); + } + break; + } +} @@ -6,7 +6,7 @@ * http://www.opensource.org/licenses/cpl1.0.php * * * * Contributors: * - * Douglas M. pase - initial API and implementation * + * Douglas M. Pase - initial API and implementation * *******************************************************************************/ @@ -39,6 +39,7 @@ private: Chain* random_mem_init( Chain *m ); Chain* forward_mem_init( Chain *m ); Chain* reverse_mem_init( Chain *m ); + Chain* stream_mem_init( Chain *m ); static Lock global_mutex; // global lock static int64 _ops_per_chain; // total number of operations per chain diff --git a/SpinBarrier.C b/SpinBarrier.C index c150245..d3d2d7b 100644 --- a/SpinBarrier.C +++ b/SpinBarrier.C @@ -6,7 +6,7 @@ * http://www.opensource.org/licenses/cpl1.0.php *
* *
* Contributors: *
- * Douglas M. pase - initial API and implementation *
+ * Douglas M. Pase - initial API and implementation *
*******************************************************************************/
diff --git a/SpinBarrier.h b/SpinBarrier.h index 27cb28d..f0b76d3 100644 --- a/SpinBarrier.h +++ b/SpinBarrier.h @@ -6,7 +6,7 @@ * http://www.opensource.org/licenses/cpl1.0.php *
* *
* Contributors: *
- * Douglas M. pase - initial API and implementation *
+ * Douglas M. Pase - initial API and implementation *
*******************************************************************************/
@@ -6,7 +6,7 @@ * http://www.opensource.org/licenses/cpl1.0.php * * * * Contributors: * - * Douglas M. pase - initial API and implementation * + * Douglas M. Pase - initial API and implementation * *******************************************************************************/ @@ -6,7 +6,7 @@ * http://www.opensource.org/licenses/cpl1.0.php * * * * Contributors: * - * Douglas M. pase - initial API and implementation * + * Douglas M. Pase - initial API and implementation * *******************************************************************************/ @@ -6,7 +6,7 @@ * http://www.opensource.org/licenses/cpl1.0.php * * * * Contributors: * - * Douglas M. pase - initial API and implementation * + * Douglas M. Pase - initial API and implementation * *******************************************************************************/ @@ -6,7 +6,7 @@ * http://www.opensource.org/licenses/cpl1.0.php * * * * Contributors: * - * Douglas M. pase - initial API and implementation * + * Douglas M. Pase - initial API and implementation * *******************************************************************************/ @@ -6,7 +6,7 @@ * http://www.opensource.org/licenses/cpl1.0.php * * * * Contributors: * - * Douglas M. pase - initial API and implementation * + * Douglas M. Pase - initial API and implementation * *******************************************************************************/ @@ -6,7 +6,7 @@ * http://www.opensource.org/licenses/cpl1.0.php * * * * Contributors: * - * Douglas M. pase - initial API and implementation * + * Douglas M. Pase - initial API and implementation * *******************************************************************************/ @@ -2,14 +2,14 @@ pgm=./pChase64_NUMA -b=(8k 16k 24k 32k 48k 64k 96k 128k 192k 256k 384k 512k 768k 1m 1536k 2m 3m 4m 6m 8m 12m 16m ) +b=(8k 16k 24k 32k 48k 64k 96k 128k 192k 256k 384k 512k 768k 1m 1536k 2m 3m 4m 6m 8m 12m 16m) c=5 date uname -a echo $pgm -o hdr -for page in 4k 8k 16k +for page_size in 4k 8k 16k do for threads in 1 2 do @@ -23,7 +23,7 @@ do do for ((j=0; $j < $c; j++)) do - $pgm -p $page -t $threads -r $refs -n add $offset -a $access -c ${b[$i]} -s 1.0 -o csv + $pgm -p $page_size -t $threads -r $refs -n add $offset -a $access -c ${b[$i]} -s 1.0 -o csv done done done diff --git a/pChase64_NUMA b/pChase64_NUMA Binary files differdeleted file mode 100755 index f42a29a..0000000 --- a/pChase64_NUMA +++ /dev/null diff --git a/pChase64_SMP b/pChase64_SMP Binary files differdeleted file mode 100755 index b6ee9a2..0000000 --- a/pChase64_SMP +++ /dev/null |