summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDoug Pase <douglas@pase.us>2008-03-05 00:00:00 +0000
committerTim Besard <tim.besard@gmail.com>2011-10-27 16:18:30 +0200
commita52db2ab61b21fe7721419747b96e1689c9069a0 (patch)
treed23d4e442fda7504496ac25fb3f0901ca97a8316
parent538302b72dcbe0b74acdfe3903a45193c7288d4a (diff)
Latest upstream version.
-rw-r--r--Chain.C2
-rw-r--r--Chain.h2
-rw-r--r--Experiment.C11
-rw-r--r--Experiment.h4
-rw-r--r--Lock.C2
-rw-r--r--Lock.h2
-rw-r--r--Main.C2
-rw-r--r--Main.h2
-rw-r--r--Makefile38
-rw-r--r--Output.C62
-rw-r--r--Output.h2
-rw-r--r--Run.C412
-rw-r--r--Run.h3
-rw-r--r--SpinBarrier.C2
-rw-r--r--SpinBarrier.h2
-rw-r--r--Thread.C2
-rw-r--r--Thread.h2
-rw-r--r--Timer.C2
-rw-r--r--Timer.h2
-rw-r--r--Types.C2
-rw-r--r--Types.h2
-rwxr-xr-xpChase.sh6
-rwxr-xr-xpChase64_NUMAbin50553 -> 0 bytes
-rwxr-xr-xpChase64_SMPbin50417 -> 0 bytes
24 files changed, 469 insertions, 97 deletions
diff --git a/Chain.C b/Chain.C
index d010878..ddbc104 100644
--- a/Chain.C
+++ b/Chain.C
@@ -6,7 +6,7 @@
* http://www.opensource.org/licenses/cpl1.0.php *
* *
* Contributors: *
- * Douglas M. pase - initial API and implementation *
+ * Douglas M. Pase - initial API and implementation *
*******************************************************************************/
diff --git a/Chain.h b/Chain.h
index cf1a51b..8bdb584 100644
--- a/Chain.h
+++ b/Chain.h
@@ -6,7 +6,7 @@
* http://www.opensource.org/licenses/cpl1.0.php *
* *
* Contributors: *
- * Douglas M. pase - initial API and implementation *
+ * Douglas M. Pase - initial API and implementation *
*******************************************************************************/
diff --git a/Experiment.C b/Experiment.C
index 9c73576..75b1cab 100644
--- a/Experiment.C
+++ b/Experiment.C
@@ -6,7 +6,7 @@
* http://www.opensource.org/licenses/cpl1.0.php *
* *
* Contributors: *
- * Douglas M. pase - initial API and implementation *
+ * Douglas M. Pase - initial API and implementation *
*******************************************************************************/
@@ -147,6 +147,12 @@ Experiment::parse_args(int argc, char* argv[])
if (i == argc) { error = 1; break; }
this->stride = - Experiment::parse_number(argv[i]);
if (this->stride == 0) { error = 1; break; }
+ } else if (strcasecmp(argv[i], "stream") == 0) {
+ this->access_pattern = STREAM;
+ i++;
+ if (i == argc) { error = 1; break; }
+ this->stride = Experiment::parse_number(argv[i]);
+ if (this->stride == 0) { error = 1; break; }
} else {
error = 1;
break;
@@ -221,6 +227,7 @@ Experiment::parse_args(int argc, char* argv[])
printf(" random # all chains are accessed randomly\n");
printf(" forward <stride> # chains are in forward order with constant stride\n");
printf(" reverse <stride> # chains are in reverse order with constant stride\n");
+ printf(" stream <stride> # references are calculated rather than read from memory\n");
printf("\n");
printf("Note: <stride> is always a small positive integer.\n");
printf("\n");
@@ -559,6 +566,8 @@ Experiment::access()
result = "forward";
} else if (this->access_pattern == STRIDED && this->stride < 0) {
result = "reverse";
+ } else if (this->access_pattern == STREAM) {
+ result = "stream";
}
return result;
diff --git a/Experiment.h b/Experiment.h
index 5459949..2c749d3 100644
--- a/Experiment.h
+++ b/Experiment.h
@@ -6,7 +6,7 @@
* http://www.opensource.org/licenses/cpl1.0.php *
* *
* Contributors: *
- * Douglas M. pase - initial API and implementation *
+ * Douglas M. Pase - initial API and implementation *
*******************************************************************************/
@@ -51,7 +51,7 @@ public:
enum { CSV, BOTH, HEADER, TABLE }
output_mode; // results output mode
- enum { RANDOM, STRIDED }
+ enum { RANDOM, STRIDED, STREAM }
access_pattern; // memory access pattern
int64 stride;
diff --git a/Lock.C b/Lock.C
index 8b59c3b..104dc81 100644
--- a/Lock.C
+++ b/Lock.C
@@ -6,7 +6,7 @@
* http://www.opensource.org/licenses/cpl1.0.php *
* *
* Contributors: *
- * Douglas M. pase - initial API and implementation *
+ * Douglas M. Pase - initial API and implementation *
*******************************************************************************/
diff --git a/Lock.h b/Lock.h
index a820bef..14bf1dc 100644
--- a/Lock.h
+++ b/Lock.h
@@ -6,7 +6,7 @@
* http://www.opensource.org/licenses/cpl1.0.php *
* *
* Contributors: *
- * Douglas M. pase - initial API and implementation *
+ * Douglas M. Pase - initial API and implementation *
*******************************************************************************/
diff --git a/Main.C b/Main.C
index ffd31d0..ebd276a 100644
--- a/Main.C
+++ b/Main.C
@@ -6,7 +6,7 @@
* http://www.opensource.org/licenses/cpl1.0.php *
* *
* Contributors: *
- * Douglas M. pase - initial API and implementation *
+ * Douglas M. Pase - initial API and implementation *
*******************************************************************************/
diff --git a/Main.h b/Main.h
index 98afdeb..1492291 100644
--- a/Main.h
+++ b/Main.h
@@ -6,7 +6,7 @@
* http://www.opensource.org/licenses/cpl1.0.php *
* *
* Contributors: *
- * Douglas M. pase - initial API and implementation *
+ * Douglas M. Pase - initial API and implementation *
*******************************************************************************/
diff --git a/Makefile b/Makefile
index 662696c..9ae95b9 100644
--- a/Makefile
+++ b/Makefile
@@ -1,42 +1,34 @@
-# MODE=NUMA make -j
#
# BIT = { 32 | 64 }
# MODE = { NUMA | SMP }
#
-# BIT = 64
-# MODE = NUMA
-# MODE = SMP
-
-ifneq ($(BIT), 64)
-ifneq ($(BIT), 32)
+ifndef BIT
BIT = 64
endif
+ifndef MODE
+MODE = NUMA
endif
-
-ifneq ($(MODE), NUMA)
-ifneq ($(MODE), SMP)
-MODE = SMP
-endif
-endif
-
ifeq ($(MODE), NUMA)
LIB = -lpthread -lnuma
-endif
-
-ifeq ($(MODE), SMP)
-LIB = -lpthread
+else
+LIB = -lpthread
endif
SRC = Main.C Chain.C Experiment.C Lock.C Output.C Run.C SpinBarrier.C Timer.C Thread.C Types.C
HDR = $(SRC:.C=.h)
OBJ = $(SRC:.C=.o)
EXE = pChase$(BIT)_$(MODE)
+HYPDIR = /web/hypercomputing.org/www/doc/Guest/pChase
+PCHDIR = /web/pchase.org/www/doc/Guest/pChase
+TARFILE = tgz/pChase-`date +"%Y-%m-%d"`.tgz
RM = /bin/rm
MV = /bin/mv
CI = /usr/bin/ci
CO = /usr/bin/co
+CP = /bin/cp
+TAR = /bin/tar
CXXFLAGS= -O3 -m$(BIT) -D$(MODE)
@@ -59,3 +51,13 @@ ci:
co:
$(CO) -l $(SRC) $(HDR) Makefile
+
+tar:
+ $(TAR) -cvzf $(TARFILE) $(SRC) $(HDR) Makefile License.htm License.txt pChase.sh run-pChase.sh
+
+cptar:
+ $(TAR) -cvzf $(TARFILE) $(SRC) $(HDR) Makefile License.htm License.txt pChase.sh run-pChase.sh
+ $(CP) $(TARFILE) $(HYPDIR)/tgz
+ $(CP) $(SRC) $(HDR) Makefile License.htm License.txt pChase.sh run-pChase.sh $(HYPDIR)
+ $(CP) $(TARFILE) $(PCHDIR)/tgz
+ $(CP) $(SRC) $(HDR) Makefile License.htm License.txt pChase.sh run-pChase.sh $(PCHDIR)
diff --git a/Output.C b/Output.C
index 98071ee..9f9c09a 100644
--- a/Output.C
+++ b/Output.C
@@ -6,7 +6,7 @@
* http://www.opensource.org/licenses/cpl1.0.php *
* *
* Contributors: *
- * Douglas M. pase - initial API and implementation *
+ * Douglas M. Pase - initial API and implementation *
*******************************************************************************/
@@ -68,21 +68,21 @@ Output::header( Experiment &e, int64 ops, double secs, double ck_res )
void
Output::csv( Experiment &e, int64 ops, double secs, double ck_res )
{
- printf("%d,", e.pointer_size);
- printf("%d,", e.bytes_per_line);
- printf("%d,", e.bytes_per_page);
- printf("%d,", e.bytes_per_chain);
- printf("%d,", e.bytes_per_thread);
- printf("%d,", e.bytes_per_test);
- printf("%d,", e.chains_per_thread);
- printf("%d,", e.num_threads);
- printf("%d,", e.iterations);
- printf("%d,", e.experiments);
+ printf("%ld,", e.pointer_size);
+ printf("%ld,", e.bytes_per_line);
+ printf("%ld,", e.bytes_per_page);
+ printf("%ld,", e.bytes_per_chain);
+ printf("%ld,", e.bytes_per_thread);
+ printf("%ld,", e.bytes_per_test);
+ printf("%lld,", e.chains_per_thread);
+ printf("%ld,", e.num_threads);
+ printf("%ld,", e.iterations);
+ printf("%ld,", e.experiments);
printf("%s,", e.access());
- printf("%d,", e.stride);
+ printf("%ld,", e.stride);
printf("%s,", e.placement());
- printf("%d,", e.offset_or_mask);
- printf("%d,", e.num_numa_domains);
+ printf("%ld,", e.offset_or_mask);
+ printf("%ld,", e.num_numa_domains);
printf("\"");
printf("%d:", e.thread_domain[0]);
printf("%d", e.chain_domain[0][0]);
@@ -97,8 +97,8 @@ Output::csv( Experiment &e, int64 ops, double secs, double ck_res )
}
}
printf("\",");
- printf("%d,", ops);
- printf("%d,", ops * e.chains_per_thread * e.num_threads);
+ printf("%ld,", ops);
+ printf("%ld,", ops * e.chains_per_thread * e.num_threads);
printf("%.3f,", secs);
printf("%.0f,", secs/ck_res);
printf("%.2f,", ck_res * 1E9);
@@ -111,21 +111,21 @@ Output::csv( Experiment &e, int64 ops, double secs, double ck_res )
void
Output::table( Experiment &e, int64 ops, double secs, double ck_res )
{
- printf("pointer size = %d (bytes)\n", e.pointer_size);
- printf("cache line size = %d (bytes)\n", e.bytes_per_line);
- printf("page size = %d (bytes)\n", e.bytes_per_page);
- printf("chain size = %d (bytes)\n", e.bytes_per_chain);
- printf("thread size = %d (bytes)\n", e.bytes_per_thread);
- printf("test size = %d (bytes)\n", e.bytes_per_test);
- printf("chains per thread = %d\n", e.chains_per_thread);
- printf("number of threads = %d\n", e.num_threads);
- printf("iterations = %d\n", e.iterations);
- printf("experiments = %d\n", e.experiments);
+ printf("pointer size = %ld (bytes)\n", e.pointer_size);
+ printf("cache line size = %ld (bytes)\n", e.bytes_per_line);
+ printf("page size = %ld (bytes)\n", e.bytes_per_page);
+ printf("chain size = %ld (bytes)\n", e.bytes_per_chain);
+ printf("thread size = %ld (bytes)\n", e.bytes_per_thread);
+ printf("test size = %ld (bytes)\n", e.bytes_per_test);
+ printf("chains per thread = %ld\n", e.chains_per_thread);
+ printf("number of threads = %ld\n", e.num_threads);
+ printf("iterations = %ld\n", e.iterations);
+ printf("experiments = %ld\n", e.experiments);
printf("access pattern = %s\n", e.access());
- printf("stride = %d\n", e.stride);
+ printf("stride = %ld\n", e.stride);
printf("numa placement = %s\n", e.placement());
- printf("offset or mask = %d\n", e.offset_or_mask);
- printf("numa domains = %d\n", e.num_numa_domains);
+ printf("offset or mask = %ld\n", e.offset_or_mask);
+ printf("numa domains = %ld\n", e.num_numa_domains);
printf("domain map = ");
printf("\"");
printf("%d:", e.thread_domain[0]);
@@ -141,8 +141,8 @@ Output::table( Experiment &e, int64 ops, double secs, double ck_res )
}
}
printf("\"\n");
- printf("operations per chain = %d\n", ops);
- printf("total operations = %d\n", ops * e.chains_per_thread * e.num_threads);
+ printf("operations per chain = %ld\n", ops);
+ printf("total operations = %ld\n", ops * e.chains_per_thread * e.num_threads);
printf("elapsed time = %.3f (seconds)\n", secs);
printf("elapsed time = %.0f (timer ticks)\n", secs/ck_res);
printf("clock resolution = %.2f (ns)\n", ck_res * 1E9);
diff --git a/Output.h b/Output.h
index 9216988..9ee2c80 100644
--- a/Output.h
+++ b/Output.h
@@ -6,7 +6,7 @@
* http://www.opensource.org/licenses/cpl1.0.php *
* *
* Contributors: *
- * Douglas M. pase - initial API and implementation *
+ * Douglas M. Pase - initial API and implementation *
*******************************************************************************/
diff --git a/Run.C b/Run.C
index 72f307c..4fb8057 100644
--- a/Run.C
+++ b/Run.C
@@ -6,7 +6,7 @@
* http://www.opensource.org/licenses/cpl1.0.php *
* *
* Contributors: *
- * Douglas M. pase - initial API and implementation *
+ * Douglas M. Pase - initial API and implementation *
*******************************************************************************/
@@ -27,7 +27,9 @@
static double max( double v1, double v2 );
static double min( double v1, double v2 );
-static void chase_pointers(int64 chains_per_thread, int64 iterations, Chain** root);
+static void chase_pointers(int64 chains_per_thread, int64 iterations, Chain** root, int64 bytes_per_line, int64 bytes_per_chain, int64 stride);
+static void follow_streams(int64 chains_per_thread, int64 iterations, Chain** root, int64 bytes_per_line, int64 bytes_per_chain, int64 stride);
+static void (*run_benchmark)(int64 chains_per_thread, int64 iterations, Chain** root, int64 bytes_per_line, int64 bytes_per_chain, int64 stride) = chase_pointers;
Lock Run::global_mutex;
int64 Run::_ops_per_chain = 0;
@@ -82,13 +84,23 @@ Run::run()
}
#endif
+ // initialize the chains and
+ // select the function that
+ // will execute the tests
for (int i=0; i < this->exp->chains_per_thread; i++) {
if (this->exp->access_pattern == Experiment::RANDOM) {
root[i] = random_mem_init( chain_memory[i] );
- } else if (0 < this->exp->stride) {
- root[i] = forward_mem_init( chain_memory[i] );
- } else {
- root[i] = reverse_mem_init( chain_memory[i] );
+ run_benchmark = chase_pointers;
+ } else if (this->exp->access_pattern == Experiment::STRIDED) {
+ if (0 < this->exp->stride) {
+ root[i] = forward_mem_init( chain_memory[i] );
+ } else {
+ root[i] = reverse_mem_init( chain_memory[i] );
+ }
+ run_benchmark = chase_pointers;
+ } else if (this->exp->access_pattern == Experiment::STREAM) {
+ root[i] = stream_mem_init( chain_memory[i] );
+ run_benchmark = follow_streams;
}
}
@@ -97,7 +109,7 @@ Run::run()
volatile static double istop = 0;
volatile static double elapsed = 0;
volatile static int64 iters = 1;
- volatile static double bound = max(0.2, 10 * Timer::resolution());
+ volatile double bound = max(0.2, 10 * Timer::resolution());
for (iters=1; elapsed <= bound; iters=iters<<1) {
this->bp->barrier();
@@ -108,7 +120,7 @@ Run::run()
this->bp->barrier();
// chase pointers
- chase_pointers(this->exp->chains_per_thread, iters, root);
+ run_benchmark(this->exp->chains_per_thread, iters, root, this->exp->bytes_per_line, this->exp->bytes_per_chain, this->exp->stride);
// barrier
this->bp->barrier();
@@ -144,7 +156,7 @@ Run::run()
this->bp->barrier();
// chase pointers
- chase_pointers(this->exp->chains_per_thread, this->exp->iterations, root);
+ run_benchmark(this->exp->chains_per_thread, this->exp->iterations, root, this->exp->bytes_per_line, this->exp->bytes_per_chain, this->exp->stride);
// barrier
this->bp->barrier();
@@ -333,14 +345,17 @@ static void
chase_pointers(
int64 chains_per_thread, // memory loading per thread
int64 iterations, // number of iterations per experiment
- Chain** root //
+ Chain** root, // root(s) of the chain(s) to follow
+ int64 bytes_per_line, // ignored
+ int64 bytes_per_chain, // ignored
+ int64 stride // ignored
)
{
// chase pointers
switch (chains_per_thread) {
default:
case 1:
- for (int i=0; i < iterations; i++) {
+ for (int64 i=0; i < iterations; i++) {
Chain* a = root[0];
while (a != NULL) {
a = a->next;
@@ -349,7 +364,7 @@ chase_pointers(
}
break;
case 2:
- for (int i=0; i < iterations; i++) {
+ for (int64 i=0; i < iterations; i++) {
Chain* a = root[0];
Chain* b = root[1];
while (a != NULL) {
@@ -361,7 +376,7 @@ chase_pointers(
}
break;
case 3:
- for (int i=0; i < iterations; i++) {
+ for (int64 i=0; i < iterations; i++) {
Chain* a = root[0];
Chain* b = root[1];
Chain* c = root[2];
@@ -376,7 +391,7 @@ chase_pointers(
}
break;
case 4:
- for (int i=0; i < iterations; i++) {
+ for (int64 i=0; i < iterations; i++) {
Chain* a = root[0];
Chain* b = root[1];
Chain* c = root[2];
@@ -394,7 +409,7 @@ chase_pointers(
}
break;
case 5:
- for (int i=0; i < iterations; i++) {
+ for (int64 i=0; i < iterations; i++) {
Chain* a = root[0];
Chain* b = root[1];
Chain* c = root[2];
@@ -415,7 +430,7 @@ chase_pointers(
}
break;
case 6:
- for (int i=0; i < iterations; i++) {
+ for (int64 i=0; i < iterations; i++) {
Chain* a = root[0];
Chain* b = root[1];
Chain* c = root[2];
@@ -439,7 +454,7 @@ chase_pointers(
}
break;
case 7:
- for (int i=0; i < iterations; i++) {
+ for (int64 i=0; i < iterations; i++) {
Chain* a = root[0];
Chain* b = root[1];
Chain* c = root[2];
@@ -466,7 +481,7 @@ chase_pointers(
}
break;
case 8:
- for (int i=0; i < iterations; i++) {
+ for (int64 i=0; i < iterations; i++) {
Chain* a = root[0];
Chain* b = root[1];
Chain* c = root[2];
@@ -496,7 +511,7 @@ chase_pointers(
}
break;
case 9:
- for (int i=0; i < iterations; i++) {
+ for (int64 i=0; i < iterations; i++) {
Chain* a = root[0];
Chain* b = root[1];
Chain* c = root[2];
@@ -529,7 +544,7 @@ chase_pointers(
}
break;
case 10:
- for (int i=0; i < iterations; i++) {
+ for (int64 i=0; i < iterations; i++) {
Chain* a = root[0];
Chain* b = root[1];
Chain* c = root[2];
@@ -565,7 +580,7 @@ chase_pointers(
}
break;
case 11:
- for (int i=0; i < iterations; i++) {
+ for (int64 i=0; i < iterations; i++) {
Chain* a = root[0];
Chain* b = root[1];
Chain* c = root[2];
@@ -604,7 +619,7 @@ chase_pointers(
}
break;
case 12:
- for (int i=0; i < iterations; i++) {
+ for (int64 i=0; i < iterations; i++) {
Chain* a = root[0];
Chain* b = root[1];
Chain* c = root[2];
@@ -646,7 +661,7 @@ chase_pointers(
}
break;
case 13:
- for (int i=0; i < iterations; i++) {
+ for (int64 i=0; i < iterations; i++) {
Chain* a = root[0];
Chain* b = root[1];
Chain* c = root[2];
@@ -691,7 +706,7 @@ chase_pointers(
}
break;
case 14:
- for (int i=0; i < iterations; i++) {
+ for (int64 i=0; i < iterations; i++) {
Chain* a = root[0];
Chain* b = root[1];
Chain* c = root[2];
@@ -739,7 +754,7 @@ chase_pointers(
}
break;
case 15:
- for (int i=0; i < iterations; i++) {
+ for (int64 i=0; i < iterations; i++) {
Chain* a = root[0];
Chain* b = root[1];
Chain* c = root[2];
@@ -790,7 +805,7 @@ chase_pointers(
}
break;
case 16:
- for (int i=0; i < iterations; i++) {
+ for (int64 i=0; i < iterations; i++) {
Chain* a = root[0];
Chain* b = root[1];
Chain* c = root[2];
@@ -844,3 +859,348 @@ chase_pointers(
}
}
}
+
+ // NOT WRITTEN YET -- DMP
+ // JUST A PLACE HOLDER!
+Chain*
+Run::stream_mem_init( Chain *mem )
+{
+// fprintf(stderr, "made it into stream_mem_init.\n");
+// fprintf(stderr, "chains_per_thread = %ld\n", this->exp->chains_per_thread);
+// fprintf(stderr, "iterations = %ld\n", this->exp->iterations);
+// fprintf(stderr, "bytes_per_chain = %ld\n", this->exp->bytes_per_chain);
+// fprintf(stderr, "stride = %ld\n", this->exp->stride);
+ int64 local_ops_per_chain = 0;
+ double* tmp = (double *) mem;
+ int64 refs_per_line = this->exp->bytes_per_line / sizeof(double);
+ int64 refs_per_chain = this->exp->bytes_per_chain / sizeof(double);
+// fprintf(stderr, "refs_per_chain = %ld\n", refs_per_chain);
+
+ for (int64 i=0; i < refs_per_chain; i += this->exp->stride*refs_per_line) {
+ tmp[i] = 0;
+ local_ops_per_chain += 1;
+ }
+
+ Run::global_mutex.lock();
+ Run::_ops_per_chain = local_ops_per_chain;
+ Run::global_mutex.unlock();
+
+// fprintf(stderr, "made it out of stream_mem_init.\n");
+ return mem;
+}
+
+static int64 summ_ck = 0;
+void
+sum_chk( double t )
+{
+ if (t != 0) summ_ck += 1;
+}
+
+ // NOT WRITTEN YET -- DMP
+ // JUST A PLACE HOLDER!
+static void
+follow_streams(
+ int64 chains_per_thread, // memory loading per thread
+ int64 iterations, // number of iterations per experiment
+ Chain** root, // root(s) of the chain(s) to follow
+ int64 bytes_per_line, // ignored
+ int64 bytes_per_chain, // ignored
+ int64 stride // ignored
+)
+{
+ int64 refs_per_line = bytes_per_line / sizeof(double);
+ int64 refs_per_chain = bytes_per_chain / sizeof(double);
+
+ // chase pointers
+ switch (chains_per_thread) {
+ default:
+ case 1:
+ for (int64 i=0; i < iterations; i++) {
+ double t = 0;
+ double* a0 = (double *) root[0];
+ for (int64 j=0; j < refs_per_chain; j+=stride*refs_per_line) {
+ t += a0[j];
+ }
+ sum_chk( t );
+ }
+ break;
+ case 2:
+ for (int64 i=0; i < iterations; i++) {
+ double t = 0;
+ double* a0 = (double *) root[0];
+ double* a1 = (double *) root[1];
+ for (int64 j=0; j < refs_per_chain; j+=stride*refs_per_line) {
+ t += a0[j] + a1[j];
+ }
+ sum_chk( t );
+ }
+ break;
+ case 3:
+ for (int64 i=0; i < iterations; i++) {
+ double t = 0;
+ double* a0 = (double *) root[0];
+ double* a1 = (double *) root[1];
+ double* a2 = (double *) root[2];
+ for (int64 j=0; j < refs_per_chain; j+=stride*refs_per_line) {
+ t += a0[j] + a1[j] + a2[j];
+ }
+ sum_chk( t );
+ }
+ break;
+ case 4:
+ for (int64 i=0; i < iterations; i++) {
+ double t = 0;
+ double* a0 = (double *) root[0];
+ double* a1 = (double *) root[1];
+ double* a2 = (double *) root[2];
+ double* a3 = (double *) root[3];
+ for (int64 j=0; j < refs_per_chain; j+=stride*refs_per_line) {
+ t += a0[j] + a1[j] + a2[j] + a3[j];
+ }
+ sum_chk( t );
+ }
+ break;
+ case 5:
+ for (int64 i=0; i < iterations; i++) {
+ double t = 0;
+ double* a0 = (double *) root[0];
+ double* a1 = (double *) root[1];
+ double* a2 = (double *) root[2];
+ double* a3 = (double *) root[3];
+ double* a4 = (double *) root[4];
+ for (int64 j=0; j < refs_per_chain; j+=stride*refs_per_line) {
+ t += a0[j] + a1[j] + a2[j] + a3[j] + a4[j];
+ }
+ sum_chk( t );
+ }
+ break;
+ case 6:
+ for (int64 i=0; i < iterations; i++) {
+ double t = 0;
+ double* a0 = (double *) root[0];
+ double* a1 = (double *) root[1];
+ double* a2 = (double *) root[2];
+ double* a3 = (double *) root[3];
+ double* a4 = (double *) root[4];
+ double* a5 = (double *) root[5];
+ for (int64 j=0; j < refs_per_chain; j+=stride*refs_per_line) {
+ t += a0[j] + a1[j] + a2[j] + a3[j] + a4[j] + a5[j];
+ }
+ sum_chk( t );
+ }
+ break;
+ case 7:
+ for (int64 i=0; i < iterations; i++) {
+ double t = 0;
+ double* a0 = (double *) root[0];
+ double* a1 = (double *) root[1];
+ double* a2 = (double *) root[2];
+ double* a3 = (double *) root[3];
+ double* a4 = (double *) root[4];
+ double* a5 = (double *) root[5];
+ double* a6 = (double *) root[6];
+ for (int64 j=0; j < refs_per_chain; j+=stride*refs_per_line) {
+ t += a0[j] + a1[j] + a2[j] + a3[j] + a4[j] + a5[j] + a6[j];
+ }
+ sum_chk( t );
+ }
+ break;
+ case 8:
+ for (int64 i=0; i < iterations; i++) {
+ double t = 0;
+ double* a0 = (double *) root[0];
+ double* a1 = (double *) root[1];
+ double* a2 = (double *) root[2];
+ double* a3 = (double *) root[3];
+ double* a4 = (double *) root[4];
+ double* a5 = (double *) root[5];
+ double* a6 = (double *) root[6];
+ double* a7 = (double *) root[7];
+ for (int64 j=0; j < refs_per_chain; j+=stride*refs_per_line) {
+ t += a0[j] + a1[j] + a2[j] + a3[j] + a4[j] + a5[j] + a6[j] + a7[j];
+ }
+ sum_chk( t );
+ }
+ break;
+ case 9:
+ for (int64 i=0; i < iterations; i++) {
+ double t = 0;
+ double* a0 = (double *) root[0];
+ double* a1 = (double *) root[1];
+ double* a2 = (double *) root[2];
+ double* a3 = (double *) root[3];
+ double* a4 = (double *) root[4];
+ double* a5 = (double *) root[5];
+ double* a6 = (double *) root[6];
+ double* a7 = (double *) root[7];
+ double* a8 = (double *) root[8];
+ for (int64 j=0; j < refs_per_chain; j+=stride*refs_per_line) {
+ t += a0[j] + a1[j] + a2[j] + a3[j] + a4[j] + a5[j] + a6[j] + a7[j] +
+ a8[j];
+ }
+ sum_chk( t );
+ }
+ break;
+ case 10:
+ for (int64 i=0; i < iterations; i++) {
+ double t = 0;
+ double* a0 = (double *) root[0];
+ double* a1 = (double *) root[1];
+ double* a2 = (double *) root[2];
+ double* a3 = (double *) root[3];
+ double* a4 = (double *) root[4];
+ double* a5 = (double *) root[5];
+ double* a6 = (double *) root[6];
+ double* a7 = (double *) root[7];
+ double* a8 = (double *) root[8];
+ double* a9 = (double *) root[9];
+ for (int64 j=0; j < refs_per_chain; j+=stride*refs_per_line) {
+ t += a0[j] + a1[j] + a2[j] + a3[j] + a4[j] + a5[j] + a6[j] + a7[j] +
+ a8[j] + a9[j];
+ }
+ sum_chk( t );
+ }
+ break;
+ case 11:
+ for (int64 i=0; i < iterations; i++) {
+ double t = 0;
+ double* a0 = (double *) root[ 0];
+ double* a1 = (double *) root[ 1];
+ double* a2 = (double *) root[ 2];
+ double* a3 = (double *) root[ 3];
+ double* a4 = (double *) root[ 4];
+ double* a5 = (double *) root[ 5];
+ double* a6 = (double *) root[ 6];
+ double* a7 = (double *) root[ 7];
+ double* a8 = (double *) root[ 8];
+ double* a9 = (double *) root[ 9];
+ double* a10 = (double *) root[10];
+ for (int64 j=0; j < refs_per_chain; j+=stride*refs_per_line) {
+ t += a0[j] + a1[j] + a2 [j] + a3[j] + a4[j] + a5[j] + a6[j] + a7[j] +
+ a8[j] + a9[j] + a10[j];
+ }
+ sum_chk( t );
+ }
+ break;
+ case 12:
+ for (int64 i=0; i < iterations; i++) {
+ double t = 0;
+ double* a0 = (double *) root[ 0];
+ double* a1 = (double *) root[ 1];
+ double* a2 = (double *) root[ 2];
+ double* a3 = (double *) root[ 3];
+ double* a4 = (double *) root[ 4];
+ double* a5 = (double *) root[ 5];
+ double* a6 = (double *) root[ 6];
+ double* a7 = (double *) root[ 7];
+ double* a8 = (double *) root[ 8];
+ double* a9 = (double *) root[ 9];
+ double* a10 = (double *) root[10];
+ double* a11 = (double *) root[11];
+ for (int64 j=0; j < refs_per_chain; j+=stride*refs_per_line) {
+ t += a0[j] + a1[j] + a2 [j] + a3 [j] + a4[j] + a5[j] + a6[j] + a7[j] +
+ a8[j] + a9[j] + a10[j] + a11[j];
+ }
+ sum_chk( t );
+ }
+ break;
+ case 13:
+ for (int64 i=0; i < iterations; i++) {
+ double t = 0;
+ double* a0 = (double *) root[ 0];
+ double* a1 = (double *) root[ 1];
+ double* a2 = (double *) root[ 2];
+ double* a3 = (double *) root[ 3];
+ double* a4 = (double *) root[ 4];
+ double* a5 = (double *) root[ 5];
+ double* a6 = (double *) root[ 6];
+ double* a7 = (double *) root[ 7];
+ double* a8 = (double *) root[ 8];
+ double* a9 = (double *) root[ 9];
+ double* a10 = (double *) root[10];
+ double* a11 = (double *) root[11];
+ double* a12 = (double *) root[12];
+ for (int64 j=0; j < refs_per_chain; j+=stride*refs_per_line) {
+ t += a0[j] + a1[j] + a2 [j] + a3 [j] + a4 [j] + a5[j] + a6[j] + a7[j] +
+ a8[j] + a9[j] + a10[j] + a11[j] + a12[j];
+ }
+ sum_chk( t );
+ }
+ break;
+ case 14:
+ for (int64 i=0; i < iterations; i++) {
+ double t = 0;
+ double* a0 = (double *) root[ 0];
+ double* a1 = (double *) root[ 1];
+ double* a2 = (double *) root[ 2];
+ double* a3 = (double *) root[ 3];
+ double* a4 = (double *) root[ 4];
+ double* a5 = (double *) root[ 5];
+ double* a6 = (double *) root[ 6];
+ double* a7 = (double *) root[ 7];
+ double* a8 = (double *) root[ 8];
+ double* a9 = (double *) root[ 9];
+ double* a10 = (double *) root[10];
+ double* a11 = (double *) root[11];
+ double* a12 = (double *) root[12];
+ double* a13 = (double *) root[13];
+ for (int64 j=0; j < refs_per_chain; j+=stride*refs_per_line) {
+ t += a0[j] + a1[j] + a2 [j] + a3 [j] + a4 [j] + a5 [j] + a6[j] + a7[j] +
+ a8[j] + a9[j] + a10[j] + a11[j] + a12[j] + a13[j];
+ }
+ sum_chk( t );
+ }
+ break;
+ case 15:
+ for (int64 i=0; i < iterations; i++) {
+ double t = 0;
+ double* a0 = (double *) root[ 0];
+ double* a1 = (double *) root[ 1];
+ double* a2 = (double *) root[ 2];
+ double* a3 = (double *) root[ 3];
+ double* a4 = (double *) root[ 4];
+ double* a5 = (double *) root[ 5];
+ double* a6 = (double *) root[ 6];
+ double* a7 = (double *) root[ 7];
+ double* a8 = (double *) root[ 8];
+ double* a9 = (double *) root[ 9];
+ double* a10 = (double *) root[10];
+ double* a11 = (double *) root[11];
+ double* a12 = (double *) root[12];
+ double* a13 = (double *) root[13];
+ double* a14 = (double *) root[14];
+ for (int64 j=0; j < refs_per_chain; j+=stride*refs_per_line) {
+ t += a0[j] + a1[j] + a2 [j] + a3 [j] + a4 [j] + a5 [j] + a6 [j] + a7[j] +
+ a8[j] + a9[j] + a10[j] + a11[j] + a12[j] + a13[j] + a14[j];
+ }
+ sum_chk( t );
+ }
+ break;
+ case 16:
+ for (int64 i=0; i < iterations; i++) {
+ double t = 0;
+ double* a0 = (double *) root[ 0];
+ double* a1 = (double *) root[ 1];
+ double* a2 = (double *) root[ 2];
+ double* a3 = (double *) root[ 3];
+ double* a4 = (double *) root[ 4];
+ double* a5 = (double *) root[ 5];
+ double* a6 = (double *) root[ 6];
+ double* a7 = (double *) root[ 7];
+ double* a8 = (double *) root[ 8];
+ double* a9 = (double *) root[ 9];
+ double* a10 = (double *) root[10];
+ double* a11 = (double *) root[11];
+ double* a12 = (double *) root[12];
+ double* a13 = (double *) root[13];
+ double* a14 = (double *) root[14];
+ double* a15 = (double *) root[15];
+ for (int64 j=0; j < refs_per_chain; j+=stride*refs_per_line) {
+ t += a0[j] + a1[j] + a2 [j] + a3 [j] + a4 [j] + a5 [j] + a6 [j] + a7 [j] +
+ a8[j] + a9[j] + a10[j] + a11[j] + a12[j] + a13[j] + a14[j] + a15[j];
+ }
+ sum_chk( t );
+ }
+ break;
+ }
+}
diff --git a/Run.h b/Run.h
index 57a83bc..810c2e8 100644
--- a/Run.h
+++ b/Run.h
@@ -6,7 +6,7 @@
* http://www.opensource.org/licenses/cpl1.0.php *
* *
* Contributors: *
- * Douglas M. pase - initial API and implementation *
+ * Douglas M. Pase - initial API and implementation *
*******************************************************************************/
@@ -39,6 +39,7 @@ private:
Chain* random_mem_init( Chain *m );
Chain* forward_mem_init( Chain *m );
Chain* reverse_mem_init( Chain *m );
+ Chain* stream_mem_init( Chain *m );
static Lock global_mutex; // global lock
static int64 _ops_per_chain; // total number of operations per chain
diff --git a/SpinBarrier.C b/SpinBarrier.C
index c150245..d3d2d7b 100644
--- a/SpinBarrier.C
+++ b/SpinBarrier.C
@@ -6,7 +6,7 @@
* http://www.opensource.org/licenses/cpl1.0.php *
* *
* Contributors: *
- * Douglas M. pase - initial API and implementation *
+ * Douglas M. Pase - initial API and implementation *
*******************************************************************************/
diff --git a/SpinBarrier.h b/SpinBarrier.h
index 27cb28d..f0b76d3 100644
--- a/SpinBarrier.h
+++ b/SpinBarrier.h
@@ -6,7 +6,7 @@
* http://www.opensource.org/licenses/cpl1.0.php *
* *
* Contributors: *
- * Douglas M. pase - initial API and implementation *
+ * Douglas M. Pase - initial API and implementation *
*******************************************************************************/
diff --git a/Thread.C b/Thread.C
index 18bfefd..8908cfe 100644
--- a/Thread.C
+++ b/Thread.C
@@ -6,7 +6,7 @@
* http://www.opensource.org/licenses/cpl1.0.php *
* *
* Contributors: *
- * Douglas M. pase - initial API and implementation *
+ * Douglas M. Pase - initial API and implementation *
*******************************************************************************/
diff --git a/Thread.h b/Thread.h
index ddba894..3948f56 100644
--- a/Thread.h
+++ b/Thread.h
@@ -6,7 +6,7 @@
* http://www.opensource.org/licenses/cpl1.0.php *
* *
* Contributors: *
- * Douglas M. pase - initial API and implementation *
+ * Douglas M. Pase - initial API and implementation *
*******************************************************************************/
diff --git a/Timer.C b/Timer.C
index d450867..b326048 100644
--- a/Timer.C
+++ b/Timer.C
@@ -6,7 +6,7 @@
* http://www.opensource.org/licenses/cpl1.0.php *
* *
* Contributors: *
- * Douglas M. pase - initial API and implementation *
+ * Douglas M. Pase - initial API and implementation *
*******************************************************************************/
diff --git a/Timer.h b/Timer.h
index 164af1d..ba2c503 100644
--- a/Timer.h
+++ b/Timer.h
@@ -6,7 +6,7 @@
* http://www.opensource.org/licenses/cpl1.0.php *
* *
* Contributors: *
- * Douglas M. pase - initial API and implementation *
+ * Douglas M. Pase - initial API and implementation *
*******************************************************************************/
diff --git a/Types.C b/Types.C
index d1ba66f..da5ecd0 100644
--- a/Types.C
+++ b/Types.C
@@ -6,7 +6,7 @@
* http://www.opensource.org/licenses/cpl1.0.php *
* *
* Contributors: *
- * Douglas M. pase - initial API and implementation *
+ * Douglas M. Pase - initial API and implementation *
*******************************************************************************/
diff --git a/Types.h b/Types.h
index 294b9ce..9e2eeb0 100644
--- a/Types.h
+++ b/Types.h
@@ -6,7 +6,7 @@
* http://www.opensource.org/licenses/cpl1.0.php *
* *
* Contributors: *
- * Douglas M. pase - initial API and implementation *
+ * Douglas M. Pase - initial API and implementation *
*******************************************************************************/
diff --git a/pChase.sh b/pChase.sh
index e2e5eb6..e3191e9 100755
--- a/pChase.sh
+++ b/pChase.sh
@@ -2,14 +2,14 @@
pgm=./pChase64_NUMA
-b=(8k 16k 24k 32k 48k 64k 96k 128k 192k 256k 384k 512k 768k 1m 1536k 2m 3m 4m 6m 8m 12m 16m )
+b=(8k 16k 24k 32k 48k 64k 96k 128k 192k 256k 384k 512k 768k 1m 1536k 2m 3m 4m 6m 8m 12m 16m)
c=5
date
uname -a
echo
$pgm -o hdr
-for page in 4k 8k 16k
+for page_size in 4k 8k 16k
do
for threads in 1 2
do
@@ -23,7 +23,7 @@ do
do
for ((j=0; $j < $c; j++))
do
- $pgm -p $page -t $threads -r $refs -n add $offset -a $access -c ${b[$i]} -s 1.0 -o csv
+ $pgm -p $page_size -t $threads -r $refs -n add $offset -a $access -c ${b[$i]} -s 1.0 -o csv
done
done
done
diff --git a/pChase64_NUMA b/pChase64_NUMA
deleted file mode 100755
index f42a29a..0000000
--- a/pChase64_NUMA
+++ /dev/null
Binary files differ
diff --git a/pChase64_SMP b/pChase64_SMP
deleted file mode 100755
index b6ee9a2..0000000
--- a/pChase64_SMP
+++ /dev/null
Binary files differ