diff options
Diffstat (limited to 'HST-S/baselines/gpu/support/partitioner.h')
-rw-r--r-- | HST-S/baselines/gpu/support/partitioner.h | 213 |
1 files changed, 213 insertions, 0 deletions
diff --git a/HST-S/baselines/gpu/support/partitioner.h b/HST-S/baselines/gpu/support/partitioner.h new file mode 100644 index 0000000..61dbe87 --- /dev/null +++ b/HST-S/baselines/gpu/support/partitioner.h @@ -0,0 +1,213 @@ +/*
+ * Copyright (c) 2016 University of Cordoba and University of Illinois
+ * All rights reserved.
+ *
+ * Developed by: IMPACT Research Group
+ * University of Cordoba and University of Illinois
+ * http://impact.crhc.illinois.edu/
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * with the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * > Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimers.
+ * > Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimers in the
+ * documentation and/or other materials provided with the distribution.
+ * > Neither the names of IMPACT Research Group, University of Cordoba,
+ * University of Illinois nor the names of its contributors may be used
+ * to endorse or promote products derived from this Software without
+ * specific prior written permission.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS WITH
+ * THE SOFTWARE.
+ *
+ */
+
+#ifndef _PARTITIONER_H_
+#define _PARTITIONER_H_
+
+#ifndef _CUDA_COMPILER_
+#include <iostream>
+#endif
+
+#if !defined(_CUDA_COMPILER_) && defined(CUDA_8_0)
+#include <atomic>
+#endif
+
+// Partitioner definition -----------------------------------------------------
+
+typedef struct Partitioner {
+
+ int n_tasks;
+ int cut;
+ int current;
+#ifndef _CUDA_COMPILER_
+ int thread_id;
+ int n_threads;
+#endif
+
+
+#ifdef CUDA_8_0
+ // CUDA 8.0 support for dynamic partitioning
+ int strategy;
+#ifdef _CUDA_COMPILER_
+ int *worklist;
+ int *tmp;
+#else
+ std::atomic_int *worklist;
+#endif
+#endif
+
+} Partitioner;
+
+// Partitioning strategies
+#define STATIC_PARTITIONING 0
+#define DYNAMIC_PARTITIONING 1
+
+// Create a partitioner -------------------------------------------------------
+
+#ifdef _CUDA_COMPILER_
+__device__
+#endif
+inline Partitioner partitioner_create(int n_tasks, float alpha
+#ifndef _CUDA_COMPILER_
+ , int thread_id, int n_threads
+#endif
+#ifdef CUDA_8_0
+#ifdef _CUDA_COMPILER_
+ , int *worklist
+ , int *tmp
+#else
+ , std::atomic_int *worklist
+#endif
+#endif
+ ) {
+ Partitioner p;
+ p.n_tasks = n_tasks;
+#ifndef _CUDA_COMPILER_
+ p.thread_id = thread_id;
+ p.n_threads = n_threads;
+#endif
+ if(alpha >= 0.0 && alpha <= 1.0) {
+ p.cut = p.n_tasks * alpha;
+#ifdef CUDA_8_0
+ p.strategy = STATIC_PARTITIONING;
+#endif
+ } else {
+#ifdef CUDA_8_0
+ p.strategy = DYNAMIC_PARTITIONING;
+ p.worklist = worklist;
+#ifdef _CUDA_COMPILER_
+ p.tmp = tmp;
+#endif
+#endif
+ }
+ return p;
+}
+
+// Partitioner iterators: first() ---------------------------------------------
+
+#ifndef _CUDA_COMPILER_
+
+inline int cpu_first(Partitioner *p) {
+#ifdef CUDA_8_0
+ if(p->strategy == DYNAMIC_PARTITIONING) {
+ p->current = p->worklist->fetch_add(1);
+ } else
+#endif
+ {
+ p->current = p->thread_id;
+ }
+ return p->current;
+}
+
+#else
+
+__device__ inline int gpu_first(Partitioner *p) {
+#ifdef CUDA_8_0
+ if(p->strategy == DYNAMIC_PARTITIONING) {
+ if(threadIdx.y == 0 && threadIdx.x == 0) {
+ p->tmp[0] = atomicAdd_system(p->worklist, 1);
+ }
+ __syncthreads();
+ p->current = p->tmp[0];
+ } else
+#endif
+ {
+ p->current = p->cut + blockIdx.x;
+ }
+ return p->current;
+}
+
+#endif
+
+// Partitioner iterators: more() ----------------------------------------------
+
+#ifndef _CUDA_COMPILER_
+
+inline bool cpu_more(const Partitioner *p) {
+#ifdef CUDA_8_0
+ if(p->strategy == DYNAMIC_PARTITIONING) {
+ return (p->current < p->n_tasks);
+ } else
+#endif
+ {
+ return (p->current < p->cut);
+ }
+}
+
+#else
+
+__device__ inline bool gpu_more(const Partitioner *p) {
+ return (p->current < p->n_tasks);
+}
+
+#endif
+
+// Partitioner iterators: next() ----------------------------------------------
+
+#ifndef _CUDA_COMPILER_
+
+inline int cpu_next(Partitioner *p) {
+#ifdef CUDA_8_0
+ if(p->strategy == DYNAMIC_PARTITIONING) {
+ p->current = p->worklist->fetch_add(1);
+ } else
+#endif
+ {
+ p->current = p->current + p->n_threads;
+ }
+ return p->current;
+}
+
+#else
+
+__device__ inline int gpu_next(Partitioner *p) {
+#ifdef CUDA_8_0
+ if(p->strategy == DYNAMIC_PARTITIONING) {
+ if(threadIdx.y == 0 && threadIdx.x == 0) {
+ p->tmp[0] = atomicAdd_system(p->worklist, 1);
+ }
+ __syncthreads();
+ p->current = p->tmp[0];
+ } else
+#endif
+ {
+ p->current = p->current + gridDim.x;
+ }
+ return p->current;
+}
+
+#endif
+
+#endif
|