5 files changed, 475 insertions, 0 deletions
diff --git a/HST-S/baselines/gpu/support/common.h b/HST-S/baselines/gpu/support/common.h
new file mode 100644
index 0000000..2383eff
--- /dev/null
+++ b/HST-S/baselines/gpu/support/common.h
@@ -0,0 +1,45 @@
+/*
+ * Copyright (c) 2016 University of Cordoba and University of Illinois
+ * All rights reserved.
+ *
+ * Developed by:    IMPACT Research Group
+ *                  University of Cordoba and University of Illinois
+ *                  http://impact.crhc.illinois.edu/
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * with the Software without restriction, including without limitation the 
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ *      > Redistributions of source code must retain the above copyright notice,
+ *        this list of conditions and the following disclaimers.
+ *      > Redistributions in binary form must reproduce the above copyright
+ *        notice, this list of conditions and the following disclaimers in the
+ *        documentation and/or other materials provided with the distribution.
+ *      > Neither the names of IMPACT Research Group, University of Cordoba, 
+ *        University of Illinois nor the names of its contributors may be used 
+ *        to endorse or promote products derived from this Software without 
+ *        specific prior written permission.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 
+ * CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS WITH
+ * THE SOFTWARE.
+ *
+ */
+
+#ifndef _COMMON_H_
+#define _COMMON_H_
+
+#define ByteSwap16(n) (((((unsigned int)n) << 8) & 0xFF00) | ((((unsigned int)n) >> 8) & 0x00FF))
+
+#define PRINT 0
+
+#define divceil(n, m) (((n)-1) / (m) + 1)
+
+#endif
diff --git a/HST-S/baselines/gpu/support/cuda-setup.h b/HST-S/baselines/gpu/support/cuda-setup.h
new file mode 100644
index 0000000..7b7eefe
--- /dev/null
+++ b/HST-S/baselines/gpu/support/cuda-setup.h
@@ -0,0 +1,78 @@
+/*
+ * Copyright (c) 2016 University of Cordoba and University of Illinois
+ * All rights reserved.
+ *
+ * Developed by:    IMPACT Research Group
+ *                  University of Cordoba and University of Illinois
+ *                  http://impact.crhc.illinois.edu/
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * with the Software without restriction, including without limitation the 
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ *      > Redistributions of source code must retain the above copyright notice,
+ *        this list of conditions and the following disclaimers.
+ *      > Redistributions in binary form must reproduce the above copyright
+ *        notice, this list of conditions and the following disclaimers in the
+ *        documentation and/or other materials provided with the distribution.
+ *      > Neither the names of IMPACT Research Group, University of Cordoba, 
+ *        University of Illinois nor the names of its contributors may be used 
+ *        to endorse or promote products derived from this Software without 
+ *        specific prior written permission.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 
+ * CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS WITH
+ * THE SOFTWARE.
+ *
+ */
+
+#include <cuda.h>
+#include <cuda_runtime.h>
+#include <fstream>
+
+// Allocation error checking
+#define ERR_1(v1)                                                                                                      \
+    if(v1 == NULL) {                                                                                                   \
+        fprintf(stderr, "Allocation error at %s, %d\n", __FILE__, __LINE__);                                           \
+        exit(-1);                                                                                                      \
+    }
+#define ERR_2(v1,v2) ERR_1(v1) ERR_1(v2)
+#define ERR_3(v1,v2,v3) ERR_2(v1,v2) ERR_1(v3)
+#define ERR_4(v1,v2,v3,v4) ERR_3(v1,v2,v3) ERR_1(v4)
+#define ERR_5(v1,v2,v3,v4,v5) ERR_4(v1,v2,v3,v4) ERR_1(v5)
+#define ERR_6(v1,v2,v3,v4,v5,v6) ERR_5(v1,v2,v3,v4,v5) ERR_1(v6)
+#define GET_ERR_MACRO(_1,_2,_3,_4,_5,_6,NAME,...) NAME
+#define ALLOC_ERR(...) GET_ERR_MACRO(__VA_ARGS__,ERR_6,ERR_5,ERR_4,ERR_3,ERR_2,ERR_1)(__VA_ARGS__)
+
+#define CUDA_ERR()                                                                                                     \
+    if(cudaStatus != cudaSuccess) {                                                                                    \
+        fprintf(stderr, "CUDA error: %s\n at %s, %d\n", cudaGetErrorString(cudaStatus), __FILE__, __LINE__);           \
+        exit(-1);                                                                                                      \
+    }
+
+struct CUDASetup {
+
+    cudaDeviceProp device_prop;
+
+    CUDASetup(int device) {
+        cudaError_t cudaStatus;
+        cudaStatus = cudaSetDevice(device);
+        CUDA_ERR();
+
+        cudaStatus = cudaGetDeviceProperties(&device_prop, device);
+        CUDA_ERR();
+        fprintf(stderr, "%s\t", device_prop.name);
+
+    }
+
+    int max_gpu_threads() {
+        return device_prop.maxThreadsPerBlock;
+    }
+};
diff --git a/HST-S/baselines/gpu/support/partitioner.h b/HST-S/baselines/gpu/support/partitioner.h
new file mode 100644
index 0000000..61dbe87
--- /dev/null
+++ b/HST-S/baselines/gpu/support/partitioner.h
@@ -0,0 +1,213 @@
+/*
+ * Copyright (c) 2016 University of Cordoba and University of Illinois
+ * All rights reserved.
+ *
+ * Developed by:    IMPACT Research Group
+ *                  University of Cordoba and University of Illinois
+ *                  http://impact.crhc.illinois.edu/
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * with the Software without restriction, including without limitation the 
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ *      > Redistributions of source code must retain the above copyright notice,
+ *        this list of conditions and the following disclaimers.
+ *      > Redistributions in binary form must reproduce the above copyright
+ *        notice, this list of conditions and the following disclaimers in the
+ *        documentation and/or other materials provided with the distribution.
+ *      > Neither the names of IMPACT Research Group, University of Cordoba, 
+ *        University of Illinois nor the names of its contributors may be used 
+ *        to endorse or promote products derived from this Software without 
+ *        specific prior written permission.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 
+ * CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS WITH
+ * THE SOFTWARE.
+ *
+ */
+
+#ifndef _PARTITIONER_H_
+#define _PARTITIONER_H_
+
+#ifndef _CUDA_COMPILER_
+#include <iostream>
+#endif
+
+#if !defined(_CUDA_COMPILER_) && defined(CUDA_8_0)
+#include <atomic>
+#endif
+
+// Partitioner definition -----------------------------------------------------
+
+typedef struct Partitioner {
+
+    int n_tasks;
+    int cut;
+    int current;
+#ifndef _CUDA_COMPILER_
+    int thread_id;
+    int n_threads;
+#endif
+
+
+#ifdef CUDA_8_0
+    // CUDA 8.0 support for dynamic partitioning
+    int strategy;
+#ifdef _CUDA_COMPILER_
+    int *worklist;
+    int *tmp;
+#else
+    std::atomic_int *worklist;
+#endif
+#endif
+
+} Partitioner;
+
+// Partitioning strategies
+#define STATIC_PARTITIONING 0
+#define DYNAMIC_PARTITIONING 1
+
+// Create a partitioner -------------------------------------------------------
+
+#ifdef _CUDA_COMPILER_
+__device__
+#endif
+inline Partitioner partitioner_create(int n_tasks, float alpha
+#ifndef _CUDA_COMPILER_
+    , int thread_id, int n_threads
+#endif
+#ifdef CUDA_8_0
+#ifdef _CUDA_COMPILER_
+    , int *worklist
+    , int *tmp
+#else
+    , std::atomic_int *worklist
+#endif
+#endif
+    ) {
+    Partitioner p;
+    p.n_tasks = n_tasks;
+#ifndef _CUDA_COMPILER_
+    p.thread_id = thread_id;
+    p.n_threads = n_threads;
+#endif
+    if(alpha >= 0.0 && alpha <= 1.0) {
+        p.cut = p.n_tasks * alpha;
+#ifdef CUDA_8_0
+        p.strategy = STATIC_PARTITIONING;
+#endif
+    } else {
+#ifdef CUDA_8_0
+        p.strategy = DYNAMIC_PARTITIONING;
+        p.worklist = worklist;
+#ifdef _CUDA_COMPILER_
+        p.tmp = tmp;
+#endif
+#endif
+    }
+    return p;
+}
+
+// Partitioner iterators: first() ---------------------------------------------
+
+#ifndef _CUDA_COMPILER_
+
+inline int cpu_first(Partitioner *p) {
+#ifdef CUDA_8_0
+    if(p->strategy == DYNAMIC_PARTITIONING) {
+        p->current = p->worklist->fetch_add(1);
+    } else
+#endif
+    {
+        p->current = p->thread_id;
+    }
+    return p->current;
+}
+
+#else
+
+__device__ inline int gpu_first(Partitioner *p) {
+#ifdef CUDA_8_0
+    if(p->strategy == DYNAMIC_PARTITIONING) {
+        if(threadIdx.y == 0 && threadIdx.x == 0) {
+            p->tmp[0] = atomicAdd_system(p->worklist, 1);
+        }
+        __syncthreads();
+        p->current = p->tmp[0];
+    } else
+#endif
+    {
+        p->current = p->cut + blockIdx.x;
+    }
+    return p->current;
+}
+
+#endif
+
+// Partitioner iterators: more() ----------------------------------------------
+
+#ifndef _CUDA_COMPILER_
+
+inline bool cpu_more(const Partitioner *p) {
+#ifdef CUDA_8_0
+    if(p->strategy == DYNAMIC_PARTITIONING) {
+        return (p->current < p->n_tasks);
+    } else
+#endif
+    {
+        return (p->current < p->cut);
+    }
+}
+
+#else
+
+__device__ inline bool gpu_more(const Partitioner *p) {
+    return (p->current < p->n_tasks);
+}
+
+#endif
+
+// Partitioner iterators: next() ----------------------------------------------
+
+#ifndef _CUDA_COMPILER_
+
+inline int cpu_next(Partitioner *p) {
+#ifdef CUDA_8_0
+    if(p->strategy == DYNAMIC_PARTITIONING) {
+        p->current = p->worklist->fetch_add(1);
+    } else
+#endif
+    {
+        p->current = p->current + p->n_threads;
+    }
+    return p->current;
+}
+
+#else
+
+__device__ inline int gpu_next(Partitioner *p) {
+#ifdef CUDA_8_0
+    if(p->strategy == DYNAMIC_PARTITIONING) {
+        if(threadIdx.y == 0 && threadIdx.x == 0) {
+            p->tmp[0] = atomicAdd_system(p->worklist, 1);
+        }
+        __syncthreads();
+        p->current = p->tmp[0];
+    } else
+#endif
+    {
+        p->current = p->current + gridDim.x;
+    }
+    return p->current;
+}
+
+#endif
+
+#endif
diff --git a/HST-S/baselines/gpu/support/timer.h b/HST-S/baselines/gpu/support/timer.h
new file mode 100644
index 0000000..fceab04
--- /dev/null
+++ b/HST-S/baselines/gpu/support/timer.h
@@ -0,0 +1,73 @@
+/*
+ * Copyright (c) 2016 University of Cordoba and University of Illinois
+ * All rights reserved.
+ *
+ * Developed by:    IMPACT Research Group
+ *                  University of Cordoba and University of Illinois
+ *                  http://impact.crhc.illinois.edu/
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * with the Software without restriction, including without limitation the 
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ *      > Redistributions of source code must retain the above copyright notice,
+ *        this list of conditions and the following disclaimers.
+ *      > Redistributions in binary form must reproduce the above copyright
+ *        notice, this list of conditions and the following disclaimers in the
+ *        documentation and/or other materials provided with the distribution.
+ *      > Neither the names of IMPACT Research Group, University of Cordoba, 
+ *        University of Illinois nor the names of its contributors may be used 
+ *        to endorse or promote products derived from this Software without 
+ *        specific prior written permission.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 
+ * CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS WITH
+ * THE SOFTWARE.
+ *
+ */
+
+#include <cuda_runtime.h>
+#include <sys/time.h>
+#include <iostream>
+#include <map>
+#include <string>
+
+using namespace std;
+
+struct Timer {
+
+    map<string, cudaEvent_t> startTime;
+    map<string, cudaEvent_t> stopTime;
+    map<string, float>         time;
+
+    void start(string name) {
+        if(!time.count(name)) {
+            cudaEventCreate(&startTime[name]); 
+            cudaEventCreate(&stopTime[name]);
+            time[name] = 0.0;
+        }
+        cudaEventRecord(startTime[name], 0);
+    }
+
+    void stop(string name) {
+        cudaEventRecord(stopTime[name],0);
+        cudaEventSynchronize(stopTime[name]);
+        float part_time = 0.0;
+        cudaEventElapsedTime(&part_time, startTime[name], stopTime[name]);
+        time[name] += part_time;
+    }
+
+    void print(string name, unsigned int REP) { printf("%s Time (ms): %f\n", name.c_str(), time[name] / REP); }
+
+    void release(string name){
+        cudaEventDestroy(startTime[name]); 
+        cudaEventDestroy(stopTime[name]);
+    }
+};
diff --git a/HST-S/baselines/gpu/support/verify.h b/HST-S/baselines/gpu/support/verify.h
new file mode 100644
index 0000000..9cb9e53
--- /dev/null
+++ b/HST-S/baselines/gpu/support/verify.h
@@ -0,0 +1,66 @@
+/*
+ * Copyright (c) 2016 University of Cordoba and University of Illinois
+ * All rights reserved.
+ *
+ * Developed by:    IMPACT Research Group
+ *                  University of Cordoba and University of Illinois
+ *                  http://impact.crhc.illinois.edu/
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * with the Software without restriction, including without limitation the 
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ *      > Redistributions of source code must retain the above copyright notice,
+ *        this list of conditions and the following disclaimers.
+ *      > Redistributions in binary form must reproduce the above copyright
+ *        notice, this list of conditions and the following disclaimers in the
+ *        documentation and/or other materials provided with the distribution.
+ *      > Neither the names of IMPACT Research Group, University of Cordoba, 
+ *        University of Illinois nor the names of its contributors may be used 
+ *        to endorse or promote products derived from this Software without 
+ *        specific prior written permission.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 
+ * CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS WITH
+ * THE SOFTWARE.
+ *
+ */
+
+#include "common.h"
+#include <math.h>
+#include <string.h>
+
+inline int compare_output(unsigned int *outp, unsigned int *outpCPU, int bins) {
+    for(int i = 0; i < bins; i++) {
+        if(outp[i] != outpCPU[i]) {
+            printf("Test failed\n");
+            exit(EXIT_FAILURE);
+        }
+    }
+    return 0;
+}
+
+// Sequential implementation for comparison purposes
+inline void HistogramCPU(unsigned int *histo, unsigned int *data, int size, int bins) {
+    for(int i = 0; i < size; i++) {
+        // Read pixel
+        unsigned int d = ((data[i] * bins) >> 12);
+        // Vote in histogram
+        histo[d]++;
+    }
+}
+
+inline void verify(unsigned int *histo, unsigned int *input, int size, int bins) {
+    unsigned int *gold = (unsigned int *)malloc(bins * sizeof(unsigned int));
+    memset(gold, 0, bins * sizeof(unsigned int));
+    HistogramCPU(gold, input, size, bins);
+    compare_output(histo, gold, bins);
+    free(gold);
+}