PrIM -- first commit

author: Juan Gomez Luna <juan.gomez@safari.ethz.ch> 2021-06-16 19:46:05 +0200
committer: Juan Gomez Luna <juan.gomez@safari.ethz.ch> 2021-06-16 19:46:05 +0200
commit: 3de4b495fb176eba9a0eb517a4ce05903cb67acb (patch)
tree: fc6776a94549d2d4039898f183dbbeb2ce013ba9 /NW/baselines/gpu/common
parent: ef5c3688c486b80a56d3c1cded25f2b2387f2668 (diff)
2 files changed, 381 insertions, 0 deletions
diff --git a/NW/baselines/gpu/common/common.mk b/NW/baselines/gpu/common/common.mk
new file mode 100644
index 0000000..4a5d800
--- /dev/null
+++ b/NW/baselines/gpu/common/common.mk
@@ -0,0 +1,341 @@
+################################################################################
+#
+# Copyright 1993-2006 NVIDIA Corporation.  All rights reserved.
+#
+# NOTICE TO USER:   
+#
+# This source code is subject to NVIDIA ownership rights under U.S. and 
+# international Copyright laws.  
+#
+# NVIDIA MAKES NO REPRESENTATION ABOUT THE SUITABILITY OF THIS SOURCE 
+# CODE FOR ANY PURPOSE.  IT IS PROVIDED "AS IS" WITHOUT EXPRESS OR 
+# IMPLIED WARRANTY OF ANY KIND.  NVIDIA DISCLAIMS ALL WARRANTIES WITH 
+# REGARD TO THIS SOURCE CODE, INCLUDING ALL IMPLIED WARRANTIES OF 
+# MERCHANTABILITY, NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE.   
+# IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY SPECIAL, INDIRECT, INCIDENTAL, 
+# OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS 
+# OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE 
+# OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE 
+# OR PERFORMANCE OF THIS SOURCE CODE.  
+#
+# U.S. Government End Users.  This source code is a "commercial item" as 
+# that term is defined at 48 C.F.R. 2.101 (OCT 1995), consisting  of 
+# "commercial computer software" and "commercial computer software 
+# documentation" as such terms are used in 48 C.F.R. 12.212 (SEPT 1995) 
+# and is provided to the U.S. Government only as a commercial end item.  
+# Consistent with 48 C.F.R.12.212 and 48 C.F.R. 227.7202-1 through 
+# 227.7202-4 (JUNE 1995), all U.S. Government End Users acquire the 
+# source code with only those rights set forth herein.
+#
+################################################################################
+#
+# Common build script
+#
+################################################################################
+
+.SUFFIXES : .cu .cu_dbg_o .c_dbg_o .cpp_dbg_o .cu_rel_o .c_rel_o .cpp_rel_o .cubin
+
+# Add new SM Versions here as devices with new Compute Capability are released
+SM_VERSIONS := sm_10 sm_11 sm_12 sm_13
+
+CUDA_INSTALL_PATH ?= /usr/local/cuda
+
+ifdef cuda-install
+	CUDA_INSTALL_PATH := $(cuda-install)
+endif
+
+# detect OS
+OSUPPER = $(shell uname -s 2>/dev/null | tr [:lower:] [:upper:])
+OSLOWER = $(shell uname -s 2>/dev/null | tr [:upper:] [:lower:])
+# 'linux' is output for Linux system, 'darwin' for OS X
+DARWIN = $(strip $(findstring DARWIN, $(OSUPPER)))
+
+# Basic directory setup for SDK
+# (override directories only if they are not already defined)
+SRCDIR     ?= 
+ROOTDIR    ?= ..
+ROOTBINDIR ?= $(ROOTDIR)/../bin
+BINDIR     ?= $(ROOTBINDIR)/$(OSLOWER)
+ROOTOBJDIR ?= obj
+LIBDIR     := $(ROOTDIR)/../lib
+COMMONDIR  := $(ROOTDIR)/../common
+
+# Compilers
+NVCC       := $(CUDA_INSTALL_PATH)/bin/nvcc 
+CXX        := g++
+CC         := gcc
+LINK       := g++ -fPIC
+
+# Includes
+INCLUDES  += -I. -I$(CUDA_INSTALL_PATH)/include -I$(COMMONDIR)/inc
+
+# architecture flag for cubin build
+CUBIN_ARCH_FLAG := -m32
+
+# Warning flags
+CXXWARN_FLAGS := \
+	-W -Wall \
+	-Wimplicit \
+	-Wswitch \
+	-Wformat \
+	-Wchar-subscripts \
+	-Wparentheses \
+	-Wmultichar \
+	-Wtrigraphs \
+	-Wpointer-arith \
+	-Wcast-align \
+	-Wreturn-type \
+	-Wno-unused-function \
+	$(SPACE)
+
+CWARN_FLAGS := $(CXXWARN_FLAGS) \
+	-Wstrict-prototypes \
+	-Wmissing-prototypes \
+	-Wmissing-declarations \
+	-Wnested-externs \
+	-Wmain \
+
+# Compiler-specific flags
+NVCCFLAGS := 
+CXXFLAGS  := $(CXXWARN_FLAGS)
+CFLAGS    := $(CWARN_FLAGS)
+
+# Common flags
+COMMONFLAGS += $(INCLUDES) -DUNIX
+
+# Debug/release configuration
+ifeq ($(dbg),1)
+	COMMONFLAGS += -g
+	NVCCFLAGS   += -D_DEBUG
+	BINSUBDIR   := debug
+	LIBSUFFIX   := D
+else 
+	COMMONFLAGS += -O3 
+	BINSUBDIR   := release
+	LIBSUFFIX   :=
+	NVCCFLAGS   += --compiler-options -fno-strict-aliasing
+	CXXFLAGS    += -fno-strict-aliasing
+	CFLAGS      += -fno-strict-aliasing
+endif
+
+# append optional arch/SM version flags (such as -arch sm_11)
+#NVCCFLAGS += $(SMVERSIONFLAGS)
+
+# architecture flag for cubin build
+CUBIN_ARCH_FLAG := -m32
+
+# detect if 32 bit or 64 bit system
+HP_64 =	$(shell uname -m | grep 64)
+
+# OpenGL is used or not (if it is used, then it is necessary to include GLEW)
+ifeq ($(USEGLLIB),1)
+
+	ifneq ($(DARWIN),)
+		OPENGLLIB := -L/System/Library/Frameworks/OpenGL.framework/Libraries -lGL -lGLU $(COMMONDIR)/lib/$(OSLOWER)/libGLEW.a
+	else
+		OPENGLLIB := -lGL -lGLU
+
+		ifeq "$(strip $(HP_64))" ""
+			OPENGLLIB += -lGLEW
+		else
+			OPENGLLIB += -lGLEW_x86_64
+		endif
+	endif
+
+	CUBIN_ARCH_FLAG := -m64
+endif
+
+ifeq ($(USEGLUT),1)
+	ifneq ($(DARWIN),)
+		OPENGLLIB += -framework GLUT
+	else
+		OPENGLLIB += -lglut
+	endif
+endif
+
+ifeq ($(USEPARAMGL),1)
+	PARAMGLLIB := -lparamgl$(LIBSUFFIX)
+endif
+
+ifeq ($(USERENDERCHECKGL),1)
+	RENDERCHECKGLLIB := -lrendercheckgl$(LIBSUFFIX)
+endif
+
+ifeq ($(USECUDPP), 1)
+	ifeq "$(strip $(HP_64))" ""
+		CUDPPLIB := -lcudpp
+	else
+		CUDPPLIB := -lcudpp64
+	endif
+
+	CUDPPLIB := $(CUDPPLIB)$(LIBSUFFIX)
+
+	ifeq ($(emu), 1)
+		CUDPPLIB := $(CUDPPLIB)_emu
+	endif
+endif
+
+# Libs
+LIB       := -L$(CUDA_INSTALL_PATH)/lib -L$(LIBDIR) -L$(COMMONDIR)/lib/$(OSLOWER)
+ifeq ($(USEDRVAPI),1)
+   LIB += -lcuda ${OPENGLLIB} $(PARAMGLLIB) $(RENDERCHECKGLLIB) $(CUDPPLIB) ${LIB} 
+else
+   LIB += -lcudart ${OPENGLLIB} $(PARAMGLLIB) $(RENDERCHECKGLLIB) $(CUDPPLIB) ${LIB}
+endif
+
+ifeq ($(USECUFFT),1)
+  ifeq ($(emu),1)
+    LIB += -lcufftemu
+  else
+    LIB += -lcufft
+  endif
+endif
+
+ifeq ($(USECUBLAS),1)
+  ifeq ($(emu),1)
+    LIB += -lcublasemu
+  else
+    LIB += -lcublas
+  endif
+endif
+
+# Lib/exe configuration
+ifneq ($(STATIC_LIB),)
+	TARGETDIR := $(LIBDIR)
+	TARGET   := $(subst .a,$(LIBSUFFIX).a,$(LIBDIR)/$(STATIC_LIB))
+	LINKLINE  = ar qv $(TARGET) $(OBJS) 
+else
+	# LIB += -lcutil$(LIBSUFFIX)
+	# Device emulation configuration
+	ifeq ($(emu), 1)
+		NVCCFLAGS   += -deviceemu
+		CUDACCFLAGS += 
+		BINSUBDIR   := emu$(BINSUBDIR)
+		# consistency, makes developing easier
+		CXXFLAGS		+= -D__DEVICE_EMULATION__
+		CFLAGS			+= -D__DEVICE_EMULATION__
+	endif
+	TARGETDIR := $(BINDIR)/$(BINSUBDIR)
+	TARGET    := $(TARGETDIR)/$(EXECUTABLE)
+	LINKLINE  = $(LINK) -o $(TARGET) $(OBJS) $(LIB)
+endif
+
+# check if verbose 
+ifeq ($(verbose), 1)
+	VERBOSE :=
+else
+	VERBOSE := @
+endif
+
+################################################################################
+# Check for input flags and set compiler flags appropriately
+################################################################################
+ifeq ($(fastmath), 1)
+	NVCCFLAGS += -use_fast_math
+endif
+
+ifeq ($(keep), 1)
+	NVCCFLAGS += -keep
+	NVCC_KEEP_CLEAN := *.i* *.cubin *.cu.c *.cudafe* *.fatbin.c *.ptx
+endif
+
+ifdef maxregisters
+	NVCCFLAGS += -maxrregcount $(maxregisters)
+endif
+
+# Add cudacc flags
+NVCCFLAGS += $(CUDACCFLAGS)
+
+# workaround for mac os x cuda 1.1 compiler issues
+ifneq ($(DARWIN),)
+	NVCCFLAGS += --host-compilation=C
+endif
+
+# Add common flags
+NVCCFLAGS += $(COMMONFLAGS)
+CXXFLAGS  += $(COMMONFLAGS)
+CFLAGS    += $(COMMONFLAGS)
+
+ifeq ($(nvcc_warn_verbose),1)
+	NVCCFLAGS += $(addprefix --compiler-options ,$(CXXWARN_FLAGS)) 
+	NVCCFLAGS += --compiler-options -fno-strict-aliasing
+endif
+
+################################################################################
+# Set up object files
+################################################################################
+OBJDIR := $(ROOTOBJDIR)/$(BINSUBDIR)
+OBJS +=  $(patsubst %.cpp,$(OBJDIR)/%.cpp_o,$(notdir $(CCFILES)))
+OBJS +=  $(patsubst %.c,$(OBJDIR)/%.c_o,$(notdir $(CFILES)))
+OBJS +=  $(patsubst %.cu,$(OBJDIR)/%.cu_o,$(notdir $(CUFILES)))
+
+################################################################################
+# Set up cubin files
+################################################################################
+CUBINDIR := $(SRCDIR)data
+CUBINS +=  $(patsubst %.cu,$(CUBINDIR)/%.cubin,$(notdir $(CUBINFILES)))
+
+################################################################################
+# Rules
+################################################################################
+$(OBJDIR)/%.c_o : $(SRCDIR)%.c $(C_DEPS)
+	$(VERBOSE)$(CC) $(CFLAGS) -o $@ -c $<
+
+$(OBJDIR)/%.cpp_o : $(SRCDIR)%.cpp $(C_DEPS)
+	$(VERBOSE)$(CXX) $(CXXFLAGS) -o $@ -c $<
+
+$(OBJDIR)/%.cu_o : $(SRCDIR)%.cu $(CU_DEPS)
+	$(VERBOSE)$(NVCC) $(NVCCFLAGS) $(SMVERSIONFLAGS) -o $@ -c $<
+
+$(CUBINDIR)/%.cubin : $(SRCDIR)%.cu cubindirectory
+	$(VERBOSE)$(NVCC) $(CUBIN_ARCH_FLAG) $(NVCCFLAGS) $(SMVERSIONFLAGS) -o $@ -cubin $<
+
+#
+# The following definition is a template that gets instantiated for each SM
+# version (sm_10, sm_13, etc.) stored in SMVERSIONS.  It does 2 things:
+# 1. It adds to OBJS a .cu_sm_XX_o for each .cu file it finds in CUFILES_sm_XX.
+# 2. It generates a rule for building .cu_sm_XX_o files from the corresponding 
+#    .cu file.
+#
+# The intended use for this is to allow Makefiles that use common.mk to compile
+# files to different Compute Capability targets (aka SM arch version).  To do
+# so, in the Makefile, list files for each SM arch separately, like so:
+#
+# CUFILES_sm_10 := mycudakernel_sm10.cu app.cu
+# CUFILES_sm_12 := anothercudakernel_sm12.cu
+#
+define SMVERSION_template
+OBJS += $(patsubst %.cu,$(OBJDIR)/%.cu_$(1)_o,$(notdir $(CUFILES_$(1))))
+$(OBJDIR)/%.cu_$(1)_o : $(SRCDIR)%.cu $(CU_DEPS)
+	$(VERBOSE)$(NVCC) -o $$@ -c $$< $(NVCCFLAGS) -arch $(1)
+endef
+
+# This line invokes the above template for each arch version stored in
+# SM_VERSIONS.  The call funtion invokes the template, and the eval
+# function interprets it as make commands.
+$(foreach smver,$(SM_VERSIONS),$(eval $(call SMVERSION_template,$(smver))))
+
+$(TARGET): makedirectories $(OBJS) $(CUBINS) Makefile
+	$(VERBOSE)$(LINKLINE)
+
+cubindirectory:
+	$(VERBOSE)mkdir -p $(CUBINDIR)
+
+makedirectories:
+	$(VERBOSE)mkdir -p $(LIBDIR)
+	$(VERBOSE)mkdir -p $(OBJDIR)
+	$(VERBOSE)mkdir -p $(TARGETDIR)
+
+
+tidy :
+	$(VERBOSE)find . | egrep "#" | xargs rm -f
+	$(VERBOSE)find . | egrep "\~" | xargs rm -f
+
+clean : tidy
+	$(VERBOSE)rm -f $(OBJS)
+	$(VERBOSE)rm -f $(CUBINS)
+	$(VERBOSE)rm -f $(TARGET)
+	$(VERBOSE)rm -f $(NVCC_KEEP_CLEAN)
+
+clobber : clean
+	$(VERBOSE)rm -rf $(ROOTOBJDIR)
diff --git a/NW/baselines/gpu/common/make.config b/NW/baselines/gpu/common/make.config
new file mode 100644
index 0000000..38c3157
--- /dev/null
+++ b/NW/baselines/gpu/common/make.config
@@ -0,0 +1,40 @@
+# CUDA toolkit installation path
+CUDA_DIR = /usr/local/cuda
+
+# CUDA toolkit libraries
+CUDA_LIB_DIR := $(CUDA_DIR)/lib
+ifeq ($(shell uname -m), x86_64)
+     ifeq ($(shell if test -d $(CUDA_DIR)/lib64; then echo T; else echo F; fi), T)
+     	CUDA_LIB_DIR := $(CUDA_DIR)/lib64
+     endif
+endif
+
+# CUDA SDK installation path
+SDK_DIR = /usr/local/cuda/samples/
+
+# OPENCL
+
+# NVIDIA_DIR
+NV_OPENCL_DIR =/usr/local/cuda
+NV_OPENCL_INC = $(NV_OPENCL_DIR)/include
+NV_OPENCL_LIB = $(NV_OPENCL_DIR)/lib64
+
+# INTEL_DIR
+INTEL_OPENCL_DIR = /opt/intel/opencl
+INTEL_OPENCL_INC = $(INTEL_OPENCL_DIR)/include
+INTEL_OPENCL_LIB = $(INTEL_OPENCL_DIR)
+
+# AMD_DIR
+# OPENCL_DIR = /usr/local/cuda
+# OPENCL_INC = $(OPENCL_DIR)/include/ 
+# OPENCL_LIB = $(OPENCL_DIR)/lib/x86_64/ -lOpenCL
+#ifeq ($(shell uname -m), x86_64)
+#     ifeq ($(shell if test -d $(OPENCL_DIR)/lib/x86_64/; then echo T; else echo F; fi), T)
+#     	OPENCL_LIB = $(OPENCL_DIR)/lib/x86_64/
+#     endif
+#endif
+
+# DEFAULT OCL
+OPENCL_DIR = $(NV_OPENCL_DIR)
+OPENCL_INC = $(NV_OPENCL_INC)
+OPENCL_LIB = $(NV_OPENCL_LIB)
author	Juan Gomez Luna <juan.gomez@safari.ethz.ch>	2021-06-16 19:46:05 +0200
committer	Juan Gomez Luna <juan.gomez@safari.ethz.ch>	2021-06-16 19:46:05 +0200
commit	3de4b495fb176eba9a0eb517a4ce05903cb67acb (patch)
tree	fc6776a94549d2d4039898f183dbbeb2ce013ba9 /NW/baselines/gpu/common
parent	ef5c3688c486b80a56d3c1cded25f2b2387f2668 (diff)