diff options
author | Juan Gomez Luna <juan.gomez@safari.ethz.ch> | 2021-06-16 19:46:05 +0200 |
---|---|---|
committer | Juan Gomez Luna <juan.gomez@safari.ethz.ch> | 2021-06-16 19:46:05 +0200 |
commit | 3de4b495fb176eba9a0eb517a4ce05903cb67acb (patch) | |
tree | fc6776a94549d2d4039898f183dbbeb2ce013ba9 /NW/baselines/gpu/common | |
parent | ef5c3688c486b80a56d3c1cded25f2b2387f2668 (diff) |
PrIM -- first commit
Diffstat (limited to 'NW/baselines/gpu/common')
-rw-r--r-- | NW/baselines/gpu/common/common.mk | 341 | ||||
-rw-r--r-- | NW/baselines/gpu/common/make.config | 40 |
2 files changed, 381 insertions, 0 deletions
diff --git a/NW/baselines/gpu/common/common.mk b/NW/baselines/gpu/common/common.mk new file mode 100644 index 0000000..4a5d800 --- /dev/null +++ b/NW/baselines/gpu/common/common.mk @@ -0,0 +1,341 @@ +################################################################################ +# +# Copyright 1993-2006 NVIDIA Corporation. All rights reserved. +# +# NOTICE TO USER: +# +# This source code is subject to NVIDIA ownership rights under U.S. and +# international Copyright laws. +# +# NVIDIA MAKES NO REPRESENTATION ABOUT THE SUITABILITY OF THIS SOURCE +# CODE FOR ANY PURPOSE. IT IS PROVIDED "AS IS" WITHOUT EXPRESS OR +# IMPLIED WARRANTY OF ANY KIND. NVIDIA DISCLAIMS ALL WARRANTIES WITH +# REGARD TO THIS SOURCE CODE, INCLUDING ALL IMPLIED WARRANTIES OF +# MERCHANTABILITY, NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE. +# IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY SPECIAL, INDIRECT, INCIDENTAL, +# OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS +# OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE +# OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE +# OR PERFORMANCE OF THIS SOURCE CODE. +# +# U.S. Government End Users. This source code is a "commercial item" as +# that term is defined at 48 C.F.R. 2.101 (OCT 1995), consisting of +# "commercial computer software" and "commercial computer software +# documentation" as such terms are used in 48 C.F.R. 12.212 (SEPT 1995) +# and is provided to the U.S. Government only as a commercial end item. +# Consistent with 48 C.F.R.12.212 and 48 C.F.R. 227.7202-1 through +# 227.7202-4 (JUNE 1995), all U.S. Government End Users acquire the +# source code with only those rights set forth herein. +# +################################################################################ +# +# Common build script +# +################################################################################ + +.SUFFIXES : .cu .cu_dbg_o .c_dbg_o .cpp_dbg_o .cu_rel_o .c_rel_o .cpp_rel_o .cubin + +# Add new SM Versions here as devices with new Compute Capability are released +SM_VERSIONS := sm_10 sm_11 sm_12 sm_13 + +CUDA_INSTALL_PATH ?= /usr/local/cuda + +ifdef cuda-install + CUDA_INSTALL_PATH := $(cuda-install) +endif + +# detect OS +OSUPPER = $(shell uname -s 2>/dev/null | tr [:lower:] [:upper:]) +OSLOWER = $(shell uname -s 2>/dev/null | tr [:upper:] [:lower:]) +# 'linux' is output for Linux system, 'darwin' for OS X +DARWIN = $(strip $(findstring DARWIN, $(OSUPPER))) + +# Basic directory setup for SDK +# (override directories only if they are not already defined) +SRCDIR ?= +ROOTDIR ?= .. +ROOTBINDIR ?= $(ROOTDIR)/../bin +BINDIR ?= $(ROOTBINDIR)/$(OSLOWER) +ROOTOBJDIR ?= obj +LIBDIR := $(ROOTDIR)/../lib +COMMONDIR := $(ROOTDIR)/../common + +# Compilers +NVCC := $(CUDA_INSTALL_PATH)/bin/nvcc +CXX := g++ +CC := gcc +LINK := g++ -fPIC + +# Includes +INCLUDES += -I. -I$(CUDA_INSTALL_PATH)/include -I$(COMMONDIR)/inc + +# architecture flag for cubin build +CUBIN_ARCH_FLAG := -m32 + +# Warning flags +CXXWARN_FLAGS := \ + -W -Wall \ + -Wimplicit \ + -Wswitch \ + -Wformat \ + -Wchar-subscripts \ + -Wparentheses \ + -Wmultichar \ + -Wtrigraphs \ + -Wpointer-arith \ + -Wcast-align \ + -Wreturn-type \ + -Wno-unused-function \ + $(SPACE) + +CWARN_FLAGS := $(CXXWARN_FLAGS) \ + -Wstrict-prototypes \ + -Wmissing-prototypes \ + -Wmissing-declarations \ + -Wnested-externs \ + -Wmain \ + +# Compiler-specific flags +NVCCFLAGS := +CXXFLAGS := $(CXXWARN_FLAGS) +CFLAGS := $(CWARN_FLAGS) + +# Common flags +COMMONFLAGS += $(INCLUDES) -DUNIX + +# Debug/release configuration +ifeq ($(dbg),1) + COMMONFLAGS += -g + NVCCFLAGS += -D_DEBUG + BINSUBDIR := debug + LIBSUFFIX := D +else + COMMONFLAGS += -O3 + BINSUBDIR := release + LIBSUFFIX := + NVCCFLAGS += --compiler-options -fno-strict-aliasing + CXXFLAGS += -fno-strict-aliasing + CFLAGS += -fno-strict-aliasing +endif + +# append optional arch/SM version flags (such as -arch sm_11) +#NVCCFLAGS += $(SMVERSIONFLAGS) + +# architecture flag for cubin build +CUBIN_ARCH_FLAG := -m32 + +# detect if 32 bit or 64 bit system +HP_64 = $(shell uname -m | grep 64) + +# OpenGL is used or not (if it is used, then it is necessary to include GLEW) +ifeq ($(USEGLLIB),1) + + ifneq ($(DARWIN),) + OPENGLLIB := -L/System/Library/Frameworks/OpenGL.framework/Libraries -lGL -lGLU $(COMMONDIR)/lib/$(OSLOWER)/libGLEW.a + else + OPENGLLIB := -lGL -lGLU + + ifeq "$(strip $(HP_64))" "" + OPENGLLIB += -lGLEW + else + OPENGLLIB += -lGLEW_x86_64 + endif + endif + + CUBIN_ARCH_FLAG := -m64 +endif + +ifeq ($(USEGLUT),1) + ifneq ($(DARWIN),) + OPENGLLIB += -framework GLUT + else + OPENGLLIB += -lglut + endif +endif + +ifeq ($(USEPARAMGL),1) + PARAMGLLIB := -lparamgl$(LIBSUFFIX) +endif + +ifeq ($(USERENDERCHECKGL),1) + RENDERCHECKGLLIB := -lrendercheckgl$(LIBSUFFIX) +endif + +ifeq ($(USECUDPP), 1) + ifeq "$(strip $(HP_64))" "" + CUDPPLIB := -lcudpp + else + CUDPPLIB := -lcudpp64 + endif + + CUDPPLIB := $(CUDPPLIB)$(LIBSUFFIX) + + ifeq ($(emu), 1) + CUDPPLIB := $(CUDPPLIB)_emu + endif +endif + +# Libs +LIB := -L$(CUDA_INSTALL_PATH)/lib -L$(LIBDIR) -L$(COMMONDIR)/lib/$(OSLOWER) +ifeq ($(USEDRVAPI),1) + LIB += -lcuda ${OPENGLLIB} $(PARAMGLLIB) $(RENDERCHECKGLLIB) $(CUDPPLIB) ${LIB} +else + LIB += -lcudart ${OPENGLLIB} $(PARAMGLLIB) $(RENDERCHECKGLLIB) $(CUDPPLIB) ${LIB} +endif + +ifeq ($(USECUFFT),1) + ifeq ($(emu),1) + LIB += -lcufftemu + else + LIB += -lcufft + endif +endif + +ifeq ($(USECUBLAS),1) + ifeq ($(emu),1) + LIB += -lcublasemu + else + LIB += -lcublas + endif +endif + +# Lib/exe configuration +ifneq ($(STATIC_LIB),) + TARGETDIR := $(LIBDIR) + TARGET := $(subst .a,$(LIBSUFFIX).a,$(LIBDIR)/$(STATIC_LIB)) + LINKLINE = ar qv $(TARGET) $(OBJS) +else + # LIB += -lcutil$(LIBSUFFIX) + # Device emulation configuration + ifeq ($(emu), 1) + NVCCFLAGS += -deviceemu + CUDACCFLAGS += + BINSUBDIR := emu$(BINSUBDIR) + # consistency, makes developing easier + CXXFLAGS += -D__DEVICE_EMULATION__ + CFLAGS += -D__DEVICE_EMULATION__ + endif + TARGETDIR := $(BINDIR)/$(BINSUBDIR) + TARGET := $(TARGETDIR)/$(EXECUTABLE) + LINKLINE = $(LINK) -o $(TARGET) $(OBJS) $(LIB) +endif + +# check if verbose +ifeq ($(verbose), 1) + VERBOSE := +else + VERBOSE := @ +endif + +################################################################################ +# Check for input flags and set compiler flags appropriately +################################################################################ +ifeq ($(fastmath), 1) + NVCCFLAGS += -use_fast_math +endif + +ifeq ($(keep), 1) + NVCCFLAGS += -keep + NVCC_KEEP_CLEAN := *.i* *.cubin *.cu.c *.cudafe* *.fatbin.c *.ptx +endif + +ifdef maxregisters + NVCCFLAGS += -maxrregcount $(maxregisters) +endif + +# Add cudacc flags +NVCCFLAGS += $(CUDACCFLAGS) + +# workaround for mac os x cuda 1.1 compiler issues +ifneq ($(DARWIN),) + NVCCFLAGS += --host-compilation=C +endif + +# Add common flags +NVCCFLAGS += $(COMMONFLAGS) +CXXFLAGS += $(COMMONFLAGS) +CFLAGS += $(COMMONFLAGS) + +ifeq ($(nvcc_warn_verbose),1) + NVCCFLAGS += $(addprefix --compiler-options ,$(CXXWARN_FLAGS)) + NVCCFLAGS += --compiler-options -fno-strict-aliasing +endif + +################################################################################ +# Set up object files +################################################################################ +OBJDIR := $(ROOTOBJDIR)/$(BINSUBDIR) +OBJS += $(patsubst %.cpp,$(OBJDIR)/%.cpp_o,$(notdir $(CCFILES))) +OBJS += $(patsubst %.c,$(OBJDIR)/%.c_o,$(notdir $(CFILES))) +OBJS += $(patsubst %.cu,$(OBJDIR)/%.cu_o,$(notdir $(CUFILES))) + +################################################################################ +# Set up cubin files +################################################################################ +CUBINDIR := $(SRCDIR)data +CUBINS += $(patsubst %.cu,$(CUBINDIR)/%.cubin,$(notdir $(CUBINFILES))) + +################################################################################ +# Rules +################################################################################ +$(OBJDIR)/%.c_o : $(SRCDIR)%.c $(C_DEPS) + $(VERBOSE)$(CC) $(CFLAGS) -o $@ -c $< + +$(OBJDIR)/%.cpp_o : $(SRCDIR)%.cpp $(C_DEPS) + $(VERBOSE)$(CXX) $(CXXFLAGS) -o $@ -c $< + +$(OBJDIR)/%.cu_o : $(SRCDIR)%.cu $(CU_DEPS) + $(VERBOSE)$(NVCC) $(NVCCFLAGS) $(SMVERSIONFLAGS) -o $@ -c $< + +$(CUBINDIR)/%.cubin : $(SRCDIR)%.cu cubindirectory + $(VERBOSE)$(NVCC) $(CUBIN_ARCH_FLAG) $(NVCCFLAGS) $(SMVERSIONFLAGS) -o $@ -cubin $< + +# +# The following definition is a template that gets instantiated for each SM +# version (sm_10, sm_13, etc.) stored in SMVERSIONS. It does 2 things: +# 1. It adds to OBJS a .cu_sm_XX_o for each .cu file it finds in CUFILES_sm_XX. +# 2. It generates a rule for building .cu_sm_XX_o files from the corresponding +# .cu file. +# +# The intended use for this is to allow Makefiles that use common.mk to compile +# files to different Compute Capability targets (aka SM arch version). To do +# so, in the Makefile, list files for each SM arch separately, like so: +# +# CUFILES_sm_10 := mycudakernel_sm10.cu app.cu +# CUFILES_sm_12 := anothercudakernel_sm12.cu +# +define SMVERSION_template +OBJS += $(patsubst %.cu,$(OBJDIR)/%.cu_$(1)_o,$(notdir $(CUFILES_$(1)))) +$(OBJDIR)/%.cu_$(1)_o : $(SRCDIR)%.cu $(CU_DEPS) + $(VERBOSE)$(NVCC) -o $$@ -c $$< $(NVCCFLAGS) -arch $(1) +endef + +# This line invokes the above template for each arch version stored in +# SM_VERSIONS. The call funtion invokes the template, and the eval +# function interprets it as make commands. +$(foreach smver,$(SM_VERSIONS),$(eval $(call SMVERSION_template,$(smver)))) + +$(TARGET): makedirectories $(OBJS) $(CUBINS) Makefile + $(VERBOSE)$(LINKLINE) + +cubindirectory: + $(VERBOSE)mkdir -p $(CUBINDIR) + +makedirectories: + $(VERBOSE)mkdir -p $(LIBDIR) + $(VERBOSE)mkdir -p $(OBJDIR) + $(VERBOSE)mkdir -p $(TARGETDIR) + + +tidy : + $(VERBOSE)find . | egrep "#" | xargs rm -f + $(VERBOSE)find . | egrep "\~" | xargs rm -f + +clean : tidy + $(VERBOSE)rm -f $(OBJS) + $(VERBOSE)rm -f $(CUBINS) + $(VERBOSE)rm -f $(TARGET) + $(VERBOSE)rm -f $(NVCC_KEEP_CLEAN) + +clobber : clean + $(VERBOSE)rm -rf $(ROOTOBJDIR) diff --git a/NW/baselines/gpu/common/make.config b/NW/baselines/gpu/common/make.config new file mode 100644 index 0000000..38c3157 --- /dev/null +++ b/NW/baselines/gpu/common/make.config @@ -0,0 +1,40 @@ +# CUDA toolkit installation path +CUDA_DIR = /usr/local/cuda + +# CUDA toolkit libraries +CUDA_LIB_DIR := $(CUDA_DIR)/lib +ifeq ($(shell uname -m), x86_64) + ifeq ($(shell if test -d $(CUDA_DIR)/lib64; then echo T; else echo F; fi), T) + CUDA_LIB_DIR := $(CUDA_DIR)/lib64 + endif +endif + +# CUDA SDK installation path +SDK_DIR = /usr/local/cuda/samples/ + +# OPENCL + +# NVIDIA_DIR +NV_OPENCL_DIR =/usr/local/cuda +NV_OPENCL_INC = $(NV_OPENCL_DIR)/include +NV_OPENCL_LIB = $(NV_OPENCL_DIR)/lib64 + +# INTEL_DIR +INTEL_OPENCL_DIR = /opt/intel/opencl +INTEL_OPENCL_INC = $(INTEL_OPENCL_DIR)/include +INTEL_OPENCL_LIB = $(INTEL_OPENCL_DIR) + +# AMD_DIR +# OPENCL_DIR = /usr/local/cuda +# OPENCL_INC = $(OPENCL_DIR)/include/ +# OPENCL_LIB = $(OPENCL_DIR)/lib/x86_64/ -lOpenCL +#ifeq ($(shell uname -m), x86_64) +# ifeq ($(shell if test -d $(OPENCL_DIR)/lib/x86_64/; then echo T; else echo F; fi), T) +# OPENCL_LIB = $(OPENCL_DIR)/lib/x86_64/ +# endif +#endif + +# DEFAULT OCL +OPENCL_DIR = $(NV_OPENCL_DIR) +OPENCL_INC = $(NV_OPENCL_INC) +OPENCL_LIB = $(NV_OPENCL_LIB) |