/* * Copyright (c) 2016 University of Cordoba and University of Illinois * All rights reserved. * * Developed by: IMPACT Research Group * University of Cordoba and University of Illinois * http://impact.crhc.illinois.edu/ * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * with the Software without restriction, including without limitation the * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or * sell copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * > Redistributions of source code must retain the above copyright notice, * this list of conditions and the following disclaimers. * > Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimers in the * documentation and/or other materials provided with the distribution. * > Neither the names of IMPACT Research Group, University of Cordoba, * University of Illinois nor the names of its contributors may be used * to endorse or promote products derived from this Software without * specific prior written permission. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS WITH * THE SOFTWARE. * */ #ifndef _PARTITIONER_H_ #define _PARTITIONER_H_ #ifndef _CUDA_COMPILER_ #include #endif #if !defined(_CUDA_COMPILER_) && defined(CUDA_8_0) #include #endif // Partitioner definition ----------------------------------------------------- typedef struct Partitioner { int n_tasks; int cut; int current; #ifndef _CUDA_COMPILER_ int thread_id; int n_threads; #endif #ifdef CUDA_8_0 // CUDA 8.0 support for dynamic partitioning int strategy; #ifdef _CUDA_COMPILER_ int *worklist; int *tmp; #else std::atomic_int *worklist; #endif #endif } Partitioner; // Partitioning strategies #define STATIC_PARTITIONING 0 #define DYNAMIC_PARTITIONING 1 // Create a partitioner ------------------------------------------------------- #ifdef _CUDA_COMPILER_ __device__ #endif inline Partitioner partitioner_create(int n_tasks, float alpha #ifndef _CUDA_COMPILER_ , int thread_id, int n_threads #endif #ifdef CUDA_8_0 #ifdef _CUDA_COMPILER_ , int *worklist , int *tmp #else , std::atomic_int *worklist #endif #endif ) { Partitioner p; p.n_tasks = n_tasks; #ifndef _CUDA_COMPILER_ p.thread_id = thread_id; p.n_threads = n_threads; #endif if(alpha >= 0.0 && alpha <= 1.0) { p.cut = p.n_tasks * alpha; #ifdef CUDA_8_0 p.strategy = STATIC_PARTITIONING; #endif } else { #ifdef CUDA_8_0 p.strategy = DYNAMIC_PARTITIONING; p.worklist = worklist; #ifdef _CUDA_COMPILER_ p.tmp = tmp; #endif #endif } return p; } // Partitioner iterators: first() --------------------------------------------- #ifndef _CUDA_COMPILER_ inline int cpu_first(Partitioner *p) { #ifdef CUDA_8_0 if(p->strategy == DYNAMIC_PARTITIONING) { p->current = p->worklist->fetch_add(1); } else #endif { p->current = p->thread_id; } return p->current; } #else __device__ inline int gpu_first(Partitioner *p) { #ifdef CUDA_8_0 if(p->strategy == DYNAMIC_PARTITIONING) { if(threadIdx.y == 0 && threadIdx.x == 0) { p->tmp[0] = atomicAdd_system(p->worklist, 1); } __syncthreads(); p->current = p->tmp[0]; } else #endif { p->current = p->cut + blockIdx.x; } return p->current; } #endif // Partitioner iterators: more() ---------------------------------------------- #ifndef _CUDA_COMPILER_ inline bool cpu_more(const Partitioner *p) { #ifdef CUDA_8_0 if(p->strategy == DYNAMIC_PARTITIONING) { return (p->current < p->n_tasks); } else #endif { return (p->current < p->cut); } } #else __device__ inline bool gpu_more(const Partitioner *p) { return (p->current < p->n_tasks); } #endif // Partitioner iterators: next() ---------------------------------------------- #ifndef _CUDA_COMPILER_ inline int cpu_next(Partitioner *p) { #ifdef CUDA_8_0 if(p->strategy == DYNAMIC_PARTITIONING) { p->current = p->worklist->fetch_add(1); } else #endif { p->current = p->current + p->n_threads; } return p->current; } #else __device__ inline int gpu_next(Partitioner *p) { #ifdef CUDA_8_0 if(p->strategy == DYNAMIC_PARTITIONING) { if(threadIdx.y == 0 && threadIdx.x == 0) { p->tmp[0] = atomicAdd_system(p->worklist, 1); } __syncthreads(); p->current = p->tmp[0]; } else #endif { p->current = p->current + gridDim.x; } return p->current; } #endif #endif