summaryrefslogtreecommitdiff
path: root/NW/baselines/gpu/common
diff options
context:
space:
mode:
authorJuan Gomez Luna <juan.gomez@safari.ethz.ch>2021-06-16 19:46:05 +0200
committerJuan Gomez Luna <juan.gomez@safari.ethz.ch>2021-06-16 19:46:05 +0200
commit3de4b495fb176eba9a0eb517a4ce05903cb67acb (patch)
treefc6776a94549d2d4039898f183dbbeb2ce013ba9 /NW/baselines/gpu/common
parentef5c3688c486b80a56d3c1cded25f2b2387f2668 (diff)
PrIM -- first commit
Diffstat (limited to 'NW/baselines/gpu/common')
-rw-r--r--NW/baselines/gpu/common/common.mk341
-rw-r--r--NW/baselines/gpu/common/make.config40
2 files changed, 381 insertions, 0 deletions
diff --git a/NW/baselines/gpu/common/common.mk b/NW/baselines/gpu/common/common.mk
new file mode 100644
index 0000000..4a5d800
--- /dev/null
+++ b/NW/baselines/gpu/common/common.mk
@@ -0,0 +1,341 @@
+################################################################################
+#
+# Copyright 1993-2006 NVIDIA Corporation. All rights reserved.
+#
+# NOTICE TO USER:
+#
+# This source code is subject to NVIDIA ownership rights under U.S. and
+# international Copyright laws.
+#
+# NVIDIA MAKES NO REPRESENTATION ABOUT THE SUITABILITY OF THIS SOURCE
+# CODE FOR ANY PURPOSE. IT IS PROVIDED "AS IS" WITHOUT EXPRESS OR
+# IMPLIED WARRANTY OF ANY KIND. NVIDIA DISCLAIMS ALL WARRANTIES WITH
+# REGARD TO THIS SOURCE CODE, INCLUDING ALL IMPLIED WARRANTIES OF
+# MERCHANTABILITY, NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE.
+# IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY SPECIAL, INDIRECT, INCIDENTAL,
+# OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS
+# OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE
+# OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE
+# OR PERFORMANCE OF THIS SOURCE CODE.
+#
+# U.S. Government End Users. This source code is a "commercial item" as
+# that term is defined at 48 C.F.R. 2.101 (OCT 1995), consisting of
+# "commercial computer software" and "commercial computer software
+# documentation" as such terms are used in 48 C.F.R. 12.212 (SEPT 1995)
+# and is provided to the U.S. Government only as a commercial end item.
+# Consistent with 48 C.F.R.12.212 and 48 C.F.R. 227.7202-1 through
+# 227.7202-4 (JUNE 1995), all U.S. Government End Users acquire the
+# source code with only those rights set forth herein.
+#
+################################################################################
+#
+# Common build script
+#
+################################################################################
+
+.SUFFIXES : .cu .cu_dbg_o .c_dbg_o .cpp_dbg_o .cu_rel_o .c_rel_o .cpp_rel_o .cubin
+
+# Add new SM Versions here as devices with new Compute Capability are released
+SM_VERSIONS := sm_10 sm_11 sm_12 sm_13
+
+CUDA_INSTALL_PATH ?= /usr/local/cuda
+
+ifdef cuda-install
+ CUDA_INSTALL_PATH := $(cuda-install)
+endif
+
+# detect OS
+OSUPPER = $(shell uname -s 2>/dev/null | tr [:lower:] [:upper:])
+OSLOWER = $(shell uname -s 2>/dev/null | tr [:upper:] [:lower:])
+# 'linux' is output for Linux system, 'darwin' for OS X
+DARWIN = $(strip $(findstring DARWIN, $(OSUPPER)))
+
+# Basic directory setup for SDK
+# (override directories only if they are not already defined)
+SRCDIR ?=
+ROOTDIR ?= ..
+ROOTBINDIR ?= $(ROOTDIR)/../bin
+BINDIR ?= $(ROOTBINDIR)/$(OSLOWER)
+ROOTOBJDIR ?= obj
+LIBDIR := $(ROOTDIR)/../lib
+COMMONDIR := $(ROOTDIR)/../common
+
+# Compilers
+NVCC := $(CUDA_INSTALL_PATH)/bin/nvcc
+CXX := g++
+CC := gcc
+LINK := g++ -fPIC
+
+# Includes
+INCLUDES += -I. -I$(CUDA_INSTALL_PATH)/include -I$(COMMONDIR)/inc
+
+# architecture flag for cubin build
+CUBIN_ARCH_FLAG := -m32
+
+# Warning flags
+CXXWARN_FLAGS := \
+ -W -Wall \
+ -Wimplicit \
+ -Wswitch \
+ -Wformat \
+ -Wchar-subscripts \
+ -Wparentheses \
+ -Wmultichar \
+ -Wtrigraphs \
+ -Wpointer-arith \
+ -Wcast-align \
+ -Wreturn-type \
+ -Wno-unused-function \
+ $(SPACE)
+
+CWARN_FLAGS := $(CXXWARN_FLAGS) \
+ -Wstrict-prototypes \
+ -Wmissing-prototypes \
+ -Wmissing-declarations \
+ -Wnested-externs \
+ -Wmain \
+
+# Compiler-specific flags
+NVCCFLAGS :=
+CXXFLAGS := $(CXXWARN_FLAGS)
+CFLAGS := $(CWARN_FLAGS)
+
+# Common flags
+COMMONFLAGS += $(INCLUDES) -DUNIX
+
+# Debug/release configuration
+ifeq ($(dbg),1)
+ COMMONFLAGS += -g
+ NVCCFLAGS += -D_DEBUG
+ BINSUBDIR := debug
+ LIBSUFFIX := D
+else
+ COMMONFLAGS += -O3
+ BINSUBDIR := release
+ LIBSUFFIX :=
+ NVCCFLAGS += --compiler-options -fno-strict-aliasing
+ CXXFLAGS += -fno-strict-aliasing
+ CFLAGS += -fno-strict-aliasing
+endif
+
+# append optional arch/SM version flags (such as -arch sm_11)
+#NVCCFLAGS += $(SMVERSIONFLAGS)
+
+# architecture flag for cubin build
+CUBIN_ARCH_FLAG := -m32
+
+# detect if 32 bit or 64 bit system
+HP_64 = $(shell uname -m | grep 64)
+
+# OpenGL is used or not (if it is used, then it is necessary to include GLEW)
+ifeq ($(USEGLLIB),1)
+
+ ifneq ($(DARWIN),)
+ OPENGLLIB := -L/System/Library/Frameworks/OpenGL.framework/Libraries -lGL -lGLU $(COMMONDIR)/lib/$(OSLOWER)/libGLEW.a
+ else
+ OPENGLLIB := -lGL -lGLU
+
+ ifeq "$(strip $(HP_64))" ""
+ OPENGLLIB += -lGLEW
+ else
+ OPENGLLIB += -lGLEW_x86_64
+ endif
+ endif
+
+ CUBIN_ARCH_FLAG := -m64
+endif
+
+ifeq ($(USEGLUT),1)
+ ifneq ($(DARWIN),)
+ OPENGLLIB += -framework GLUT
+ else
+ OPENGLLIB += -lglut
+ endif
+endif
+
+ifeq ($(USEPARAMGL),1)
+ PARAMGLLIB := -lparamgl$(LIBSUFFIX)
+endif
+
+ifeq ($(USERENDERCHECKGL),1)
+ RENDERCHECKGLLIB := -lrendercheckgl$(LIBSUFFIX)
+endif
+
+ifeq ($(USECUDPP), 1)
+ ifeq "$(strip $(HP_64))" ""
+ CUDPPLIB := -lcudpp
+ else
+ CUDPPLIB := -lcudpp64
+ endif
+
+ CUDPPLIB := $(CUDPPLIB)$(LIBSUFFIX)
+
+ ifeq ($(emu), 1)
+ CUDPPLIB := $(CUDPPLIB)_emu
+ endif
+endif
+
+# Libs
+LIB := -L$(CUDA_INSTALL_PATH)/lib -L$(LIBDIR) -L$(COMMONDIR)/lib/$(OSLOWER)
+ifeq ($(USEDRVAPI),1)
+ LIB += -lcuda ${OPENGLLIB} $(PARAMGLLIB) $(RENDERCHECKGLLIB) $(CUDPPLIB) ${LIB}
+else
+ LIB += -lcudart ${OPENGLLIB} $(PARAMGLLIB) $(RENDERCHECKGLLIB) $(CUDPPLIB) ${LIB}
+endif
+
+ifeq ($(USECUFFT),1)
+ ifeq ($(emu),1)
+ LIB += -lcufftemu
+ else
+ LIB += -lcufft
+ endif
+endif
+
+ifeq ($(USECUBLAS),1)
+ ifeq ($(emu),1)
+ LIB += -lcublasemu
+ else
+ LIB += -lcublas
+ endif
+endif
+
+# Lib/exe configuration
+ifneq ($(STATIC_LIB),)
+ TARGETDIR := $(LIBDIR)
+ TARGET := $(subst .a,$(LIBSUFFIX).a,$(LIBDIR)/$(STATIC_LIB))
+ LINKLINE = ar qv $(TARGET) $(OBJS)
+else
+ # LIB += -lcutil$(LIBSUFFIX)
+ # Device emulation configuration
+ ifeq ($(emu), 1)
+ NVCCFLAGS += -deviceemu
+ CUDACCFLAGS +=
+ BINSUBDIR := emu$(BINSUBDIR)
+ # consistency, makes developing easier
+ CXXFLAGS += -D__DEVICE_EMULATION__
+ CFLAGS += -D__DEVICE_EMULATION__
+ endif
+ TARGETDIR := $(BINDIR)/$(BINSUBDIR)
+ TARGET := $(TARGETDIR)/$(EXECUTABLE)
+ LINKLINE = $(LINK) -o $(TARGET) $(OBJS) $(LIB)
+endif
+
+# check if verbose
+ifeq ($(verbose), 1)
+ VERBOSE :=
+else
+ VERBOSE := @
+endif
+
+################################################################################
+# Check for input flags and set compiler flags appropriately
+################################################################################
+ifeq ($(fastmath), 1)
+ NVCCFLAGS += -use_fast_math
+endif
+
+ifeq ($(keep), 1)
+ NVCCFLAGS += -keep
+ NVCC_KEEP_CLEAN := *.i* *.cubin *.cu.c *.cudafe* *.fatbin.c *.ptx
+endif
+
+ifdef maxregisters
+ NVCCFLAGS += -maxrregcount $(maxregisters)
+endif
+
+# Add cudacc flags
+NVCCFLAGS += $(CUDACCFLAGS)
+
+# workaround for mac os x cuda 1.1 compiler issues
+ifneq ($(DARWIN),)
+ NVCCFLAGS += --host-compilation=C
+endif
+
+# Add common flags
+NVCCFLAGS += $(COMMONFLAGS)
+CXXFLAGS += $(COMMONFLAGS)
+CFLAGS += $(COMMONFLAGS)
+
+ifeq ($(nvcc_warn_verbose),1)
+ NVCCFLAGS += $(addprefix --compiler-options ,$(CXXWARN_FLAGS))
+ NVCCFLAGS += --compiler-options -fno-strict-aliasing
+endif
+
+################################################################################
+# Set up object files
+################################################################################
+OBJDIR := $(ROOTOBJDIR)/$(BINSUBDIR)
+OBJS += $(patsubst %.cpp,$(OBJDIR)/%.cpp_o,$(notdir $(CCFILES)))
+OBJS += $(patsubst %.c,$(OBJDIR)/%.c_o,$(notdir $(CFILES)))
+OBJS += $(patsubst %.cu,$(OBJDIR)/%.cu_o,$(notdir $(CUFILES)))
+
+################################################################################
+# Set up cubin files
+################################################################################
+CUBINDIR := $(SRCDIR)data
+CUBINS += $(patsubst %.cu,$(CUBINDIR)/%.cubin,$(notdir $(CUBINFILES)))
+
+################################################################################
+# Rules
+################################################################################
+$(OBJDIR)/%.c_o : $(SRCDIR)%.c $(C_DEPS)
+ $(VERBOSE)$(CC) $(CFLAGS) -o $@ -c $<
+
+$(OBJDIR)/%.cpp_o : $(SRCDIR)%.cpp $(C_DEPS)
+ $(VERBOSE)$(CXX) $(CXXFLAGS) -o $@ -c $<
+
+$(OBJDIR)/%.cu_o : $(SRCDIR)%.cu $(CU_DEPS)
+ $(VERBOSE)$(NVCC) $(NVCCFLAGS) $(SMVERSIONFLAGS) -o $@ -c $<
+
+$(CUBINDIR)/%.cubin : $(SRCDIR)%.cu cubindirectory
+ $(VERBOSE)$(NVCC) $(CUBIN_ARCH_FLAG) $(NVCCFLAGS) $(SMVERSIONFLAGS) -o $@ -cubin $<
+
+#
+# The following definition is a template that gets instantiated for each SM
+# version (sm_10, sm_13, etc.) stored in SMVERSIONS. It does 2 things:
+# 1. It adds to OBJS a .cu_sm_XX_o for each .cu file it finds in CUFILES_sm_XX.
+# 2. It generates a rule for building .cu_sm_XX_o files from the corresponding
+# .cu file.
+#
+# The intended use for this is to allow Makefiles that use common.mk to compile
+# files to different Compute Capability targets (aka SM arch version). To do
+# so, in the Makefile, list files for each SM arch separately, like so:
+#
+# CUFILES_sm_10 := mycudakernel_sm10.cu app.cu
+# CUFILES_sm_12 := anothercudakernel_sm12.cu
+#
+define SMVERSION_template
+OBJS += $(patsubst %.cu,$(OBJDIR)/%.cu_$(1)_o,$(notdir $(CUFILES_$(1))))
+$(OBJDIR)/%.cu_$(1)_o : $(SRCDIR)%.cu $(CU_DEPS)
+ $(VERBOSE)$(NVCC) -o $$@ -c $$< $(NVCCFLAGS) -arch $(1)
+endef
+
+# This line invokes the above template for each arch version stored in
+# SM_VERSIONS. The call funtion invokes the template, and the eval
+# function interprets it as make commands.
+$(foreach smver,$(SM_VERSIONS),$(eval $(call SMVERSION_template,$(smver))))
+
+$(TARGET): makedirectories $(OBJS) $(CUBINS) Makefile
+ $(VERBOSE)$(LINKLINE)
+
+cubindirectory:
+ $(VERBOSE)mkdir -p $(CUBINDIR)
+
+makedirectories:
+ $(VERBOSE)mkdir -p $(LIBDIR)
+ $(VERBOSE)mkdir -p $(OBJDIR)
+ $(VERBOSE)mkdir -p $(TARGETDIR)
+
+
+tidy :
+ $(VERBOSE)find . | egrep "#" | xargs rm -f
+ $(VERBOSE)find . | egrep "\~" | xargs rm -f
+
+clean : tidy
+ $(VERBOSE)rm -f $(OBJS)
+ $(VERBOSE)rm -f $(CUBINS)
+ $(VERBOSE)rm -f $(TARGET)
+ $(VERBOSE)rm -f $(NVCC_KEEP_CLEAN)
+
+clobber : clean
+ $(VERBOSE)rm -rf $(ROOTOBJDIR)
diff --git a/NW/baselines/gpu/common/make.config b/NW/baselines/gpu/common/make.config
new file mode 100644
index 0000000..38c3157
--- /dev/null
+++ b/NW/baselines/gpu/common/make.config
@@ -0,0 +1,40 @@
+# CUDA toolkit installation path
+CUDA_DIR = /usr/local/cuda
+
+# CUDA toolkit libraries
+CUDA_LIB_DIR := $(CUDA_DIR)/lib
+ifeq ($(shell uname -m), x86_64)
+ ifeq ($(shell if test -d $(CUDA_DIR)/lib64; then echo T; else echo F; fi), T)
+ CUDA_LIB_DIR := $(CUDA_DIR)/lib64
+ endif
+endif
+
+# CUDA SDK installation path
+SDK_DIR = /usr/local/cuda/samples/
+
+# OPENCL
+
+# NVIDIA_DIR
+NV_OPENCL_DIR =/usr/local/cuda
+NV_OPENCL_INC = $(NV_OPENCL_DIR)/include
+NV_OPENCL_LIB = $(NV_OPENCL_DIR)/lib64
+
+# INTEL_DIR
+INTEL_OPENCL_DIR = /opt/intel/opencl
+INTEL_OPENCL_INC = $(INTEL_OPENCL_DIR)/include
+INTEL_OPENCL_LIB = $(INTEL_OPENCL_DIR)
+
+# AMD_DIR
+# OPENCL_DIR = /usr/local/cuda
+# OPENCL_INC = $(OPENCL_DIR)/include/
+# OPENCL_LIB = $(OPENCL_DIR)/lib/x86_64/ -lOpenCL
+#ifeq ($(shell uname -m), x86_64)
+# ifeq ($(shell if test -d $(OPENCL_DIR)/lib/x86_64/; then echo T; else echo F; fi), T)
+# OPENCL_LIB = $(OPENCL_DIR)/lib/x86_64/
+# endif
+#endif
+
+# DEFAULT OCL
+OPENCL_DIR = $(NV_OPENCL_DIR)
+OPENCL_INC = $(NV_OPENCL_INC)
+OPENCL_LIB = $(NV_OPENCL_LIB)