From 8be6f1ac64cb0338d55e706e0ff1c60a7ceeaf38 Mon Sep 17 00:00:00 2001 From: =?utf8?q?Szil=C3=A1rd=20P=C3=A1ll?= Date: Wed, 25 Jan 2017 02:53:17 +0100 Subject: [PATCH] Enable compiling CUDA device code with clang clang can be used as a device compiler by setting GMX_CLANG_CUDA=ON. A CUDA toolkit (>=7.0) is also needed. Workarounds required: - texture operations are not supported, use the LDG/direct load-based fallback in such cases; - CMake does not support natively clang for CUDA, but it's easy to convince it by setting CXX as compiler and few extra flags for *.cu. Note that clang support is experimental and it is aimed at improving portability and to allow using clang sanitizers without hassle in CUDA builds. TODO/investigate: - CMake seems to not track some files properly with clang, changes to nbnxn_cuda_kernel{,_fermi}.cuh do not trigger a recompile (likely due to the indirect include through a macro in nbnxn_cuda_kernels.cuh). - Full rebuild is triggered even if only CUDA compile flags are changed. Change-Id: I3543469d9f0fda37c186ba8bb474980018bd5c54 --- CMakeLists.txt | 4 + admin/builds/gromacs.py | 6 +- admin/builds/post-submit-matrix.txt | 5 +- .../gmxClangCudaUtils.cmake | 40 ++------ cmake/gmxManageClangCudaConfig.cmake | 107 +++++++++++++++++++++ cmake/gmxManageGPU.cmake | 91 ++++++++++++------ docs/dev-manual/build-system.rst | 8 ++ docs/install-guide/index.rst | 20 ++++ src/CMakeLists.txt | 9 +- src/buildinfo.h.cmakein | 10 +- src/gromacs/CMakeLists.txt | 20 +++- src/gromacs/gpu_utils/cuda_arch_utils.cuh | 4 +- src/gromacs/mdlib/nbnxn_cuda/nbnxn_cuda.cu | 25 ++++- .../mdlib/nbnxn_cuda/nbnxn_cuda_kernel_utils.cuh | 7 +- src/gromacs/utility/binaryinformation.cpp | 4 +- 15 files changed, 270 insertions(+), 90 deletions(-) copy src/CMakeLists.txt => cmake/gmxClangCudaUtils.cmake (60%) create mode 100644 cmake/gmxManageClangCudaConfig.cmake diff --git a/CMakeLists.txt b/CMakeLists.txt index 3aa958f4b1..4d6d437327 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -574,6 +574,10 @@ include(gmxManageLmfit) if(GMX_GPU) # now that we have detected the dependencies, do the second configure pass gmx_gpu_setup() + if (GMX_CLANG_CUDA) + list(APPEND GMX_EXTRA_LIBRARIES ${GMX_CUDA_CLANG_LINK_LIBS}) + link_directories("${GMX_CUDA_CLANG_LINK_DIRS}") + endif() endif() if(CYGWIN) diff --git a/admin/builds/gromacs.py b/admin/builds/gromacs.py index 657a14f96a..06640c16f1 100644 --- a/admin/builds/gromacs.py +++ b/admin/builds/gromacs.py @@ -50,6 +50,7 @@ extra_options = { 'thread-mpi': Option.bool, 'gpu': Option.bool, 'opencl': Option.bool, + 'clang_cuda': Option.bool, 'openmp': Option.bool, 'nranks': Option.string, 'npme': Option.string, @@ -99,7 +100,10 @@ def do_build(context): cmake_opts['GMX_USE_OPENCL'] = 'ON' else: cmake_opts['CUDA_TOOLKIT_ROOT_DIR'] = context.env.cuda_root - cmake_opts['CUDA_HOST_COMPILER'] = context.env.cuda_host_compiler + if context.opts.clang_cuda: + cmake_opts['GMX_CLANG_CUDA'] = 'ON' + else: + cmake_opts['CUDA_HOST_COMPILER'] = context.env.cuda_host_compiler else: cmake_opts['GMX_GPU'] = 'OFF' if context.opts.thread_mpi is False: diff --git a/admin/builds/post-submit-matrix.txt b/admin/builds/post-submit-matrix.txt index eabee21af5..fa6babcb7d 100644 --- a/admin/builds/post-submit-matrix.txt +++ b/admin/builds/post-submit-matrix.txt @@ -35,9 +35,8 @@ gcc-7 npme=1 nranks=2 no-openmp fftpack release-with-assert # Test SSE4.1 SIMD # Test single-rank GPU -# TODO Test clang + OpenMP + CUDA -# TODO change to clang-4 and cuda-8.0 -gcc-4.8 openmp nranks=1 gpu cuda-7.5 simd=sse4.1 +# Test clang + OpenMP + CUDA +clang-4 simd=sse4.1 openmp nranks=1 gpu cuda-8.0 clang_cuda # Test MPMD PME with library MPI # Test clang + OpenMP diff --git a/src/CMakeLists.txt b/cmake/gmxClangCudaUtils.cmake similarity index 60% copy from src/CMakeLists.txt copy to cmake/gmxClangCudaUtils.cmake index f120b01add..e47a25766d 100644 --- a/src/CMakeLists.txt +++ b/cmake/gmxClangCudaUtils.cmake @@ -1,7 +1,7 @@ # # This file is part of the GROMACS molecular simulation package. # -# Copyright (c) 2009,2010,2011,2012,2013,2014,2015,2016, by the GROMACS development team, led by +# Copyright (c) 2017, by the GROMACS development team, led by # Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl, # and including many others, as listed in the AUTHORS file in the # top-level source directory and at http://www.gromacs.org. @@ -32,35 +32,9 @@ # To help us fund GROMACS development, we humbly ask that you cite # the research papers on the package. Check out http://www.gromacs.org. -###################################### -# Output compiler and CFLAGS used -###################################### -include(GetCompilerInfo.cmake) -get_compiler_info(C BUILD_C_COMPILER BUILD_CFLAGS) -get_compiler_info(CXX BUILD_CXX_COMPILER BUILD_CXXFLAGS) -if(GMX_USE_CUDA) - GMX_SET_CUDA_NVCC_FLAGS() - get_cuda_compiler_info(CUDA_NVCC_COMPILER_INFO CUDA_NVCC_COMPILER_FLAGS) -endif() - -configure_file(config.h.cmakein config.h) -configure_file(gmxpre-config.h.cmakein gmxpre-config.h) -configure_file(buildinfo.h.cmakein buildinfo.h ESCAPE_QUOTES) - -if (BUILD_TESTING) - if(NOT GMX_DEVELOPER_BUILD) - set(UNITTEST_TARGET_OPTIONS EXCLUDE_FROM_ALL) - endif() - if (GMX_BUILD_UNITTESTS) - add_subdirectory(external/gmock-1.7.0) - endif() - include(testutils/TestMacros.cmake) - add_subdirectory(testutils) -endif() - -add_subdirectory(gromacs) -add_subdirectory(programs) - -if (NOT GMX_FAHCORE) - add_subdirectory(contrib) -endif() +function(gmx_compile_cuda_file_with_clang) + foreach(_file ${ARGN}) + set_source_files_properties(${_file} PROPERTIES LANGUAGE CXX) + set_source_files_properties(${_file} PROPERTIES COMPILE_FLAGS "${GMX_CUDA_CLANG_FLAGS}") + endforeach() +endfunction() diff --git a/cmake/gmxManageClangCudaConfig.cmake b/cmake/gmxManageClangCudaConfig.cmake new file mode 100644 index 0000000000..40a492b002 --- /dev/null +++ b/cmake/gmxManageClangCudaConfig.cmake @@ -0,0 +1,107 @@ +# +# This file is part of the GROMACS molecular simulation package. +# +# Copyright (c) 2017, by the GROMACS development team, led by +# Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl, +# and including many others, as listed in the AUTHORS file in the +# top-level source directory and at http://www.gromacs.org. +# +# GROMACS is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public License +# as published by the Free Software Foundation; either version 2.1 +# of the License, or (at your option) any later version. +# +# GROMACS is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public +# License along with GROMACS; if not, see +# http://www.gnu.org/licenses, or write to the Free Software Foundation, +# Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. +# +# If you want to redistribute modifications to GROMACS, please +# consider that scientific software is very special. Version +# control is crucial - bugs must be traceable. We will be happy to +# consider code for inclusion in the official distribution, but +# derived work must not be called official GROMACS. Details are found +# in the README & COPYING files - if they are missing, get the +# official version at http://www.gromacs.org. +# +# To help us fund GROMACS development, we humbly ask that you cite +# the research papers on the package. Check out http://www.gromacs.org. + +function (gmx_test_clang_cuda_support) + + if ((NOT CMAKE_CXX_COMPILER_ID MATCHES "Clang") OR + (CMAKE_CXX_COMPILER_ID MATCHES "Clang" AND CMAKE_CXX_COMPILER_VERSION VERSION_LESS "3.9")) + message(FATAL_ERROR "clang 3.9 or later required with GMX_CLANG_CUDA=ON!") + endif() + + # NOTE: we'd ideally like to use a compile check here, but the link-stage + # fails as the clang invocation generated seems to not handle well some + # (GPU code) in the object file generated during compilation. + # SET(CMAKE_REQUIRED_FLAGS ${FLAGS}) + # SET(CMAKE_REQUIRED_LIBRARIES ${LIBS}) + # CHECK_CXX_SOURCE_COMPILES("int main() { int c; cudaGetDeviceCount(&c); return 0; }" _CLANG_CUDA_COMPILES) +endfunction () + + +if (CMAKE_CXX_COMPILER_VERSION VERSION_LESS "4.0" AND + NOT CUDA_VERSION VERSION_LESS "8.0") + message(FATAL_ERROR "clang ${CMAKE_CXX_COMPILER_VERSION} for CUDA is only compatible with CUDA version <8.0") +endif() + +if (GMX_CUDA_TARGET_COMPUTE) + message(WARNING "Values passed in GMX_CUDA_TARGET_COMPUTE will be ignored; clang will by default include PTX in the binary.") +endif() + +if (GMX_CUDA_TARGET_SM) + set(_CUDA_CLANG_GENCODE_FLAGS) + set(_target_sm_list ${GMX_CUDA_TARGET_SM}) + foreach(_target ${_target_sm_list}) + list(APPEND _CUDA_CLANG_GENCODE_FLAGS "--cuda-gpu-arch=sm_${_target}") + endforeach() +else() + list(APPEND _CUDA_CLANG_GENCODE_FLAGS "--cuda-gpu-arch=sm_20") + list(APPEND _CUDA_CLANG_GENCODE_FLAGS "--cuda-gpu-arch=sm_30") + list(APPEND _CUDA_CLANG_GENCODE_FLAGS "--cuda-gpu-arch=sm_35") + list(APPEND _CUDA_CLANG_GENCODE_FLAGS "--cuda-gpu-arch=sm_37") + list(APPEND _CUDA_CLANG_GENCODE_FLAGS "--cuda-gpu-arch=sm_50") + list(APPEND _CUDA_CLANG_GENCODE_FLAGS "--cuda-gpu-arch=sm_52") + if (NOT CUDA_VERSION VERSION_LESS 8.0) + list(APPEND _CUDA_CLANG_GENCODE_FLAGS "--cuda-gpu-arch=sm_60") + list(APPEND _CUDA_CLANG_GENCODE_FLAGS "--cuda-gpu-arch=sm_61") + endif() + # TODO: test CUDA 9.0 and figure out which clang releases support it + # and the sm_70 arch. +endif() +if (GMX_CUDA_TARGET_SM) + set_property(CACHE GMX_CUDA_TARGET_SM PROPERTY HELPSTRING "List of CUDA GPU architecture codes to compile for (without the sm_ prefix)") + set_property(CACHE GMX_CUDA_TARGET_SM PROPERTY TYPE STRING) +endif() + +# default flags +list(APPEND _CUDA_CLANG_FLAGS "-x cuda" "-ffast-math") +# CUDA toolkit +list(APPEND _CUDA_CLANG_FLAGS "--cuda-path=${CUDA_TOOLKIT_ROOT_DIR}") +# codegen flags +list(APPEND _CUDA_CLANG_FLAGS "${_CUDA_CLANG_GENCODE_FLAGS}") +foreach(_flag ${_CUDA_CLANG_FLAGS}) + set(GMX_CUDA_CLANG_FLAGS "${GMX_CUDA_CLANG_FLAGS} ${_flag}") +endforeach() + +if (CUDA_USE_STATIC_CUDA_RUNTIME) + set(GMX_CUDA_CLANG_LINK_LIBS "cudart_static") +else() + set(GMX_CUDA_CLANG_LINK_LIBS "cudart") +endif() +set(GMX_CUDA_CLANG_LINK_LIBS "${GMX_CUDA_CLANG_LINK_LIBS}" "dl" "rt") +if (CUDA_64_BIT_DEVICE_CODE) + set(GMX_CUDA_CLANG_LINK_DIRS "${CUDA_TOOLKIT_ROOT_DIR}/lib64") +else() + set(GMX_CUDA_CLANG_LINK_DIRS "${CUDA_TOOLKIT_ROOT_DIR}/lib") +endif() + +gmx_test_clang_cuda_support() diff --git a/cmake/gmxManageGPU.cmake b/cmake/gmxManageGPU.cmake index 60a74291ec..435ed0c17e 100644 --- a/cmake/gmxManageGPU.cmake +++ b/cmake/gmxManageGPU.cmake @@ -1,7 +1,7 @@ # # This file is part of the GROMACS molecular simulation package. # -# Copyright (c) 2012,2013,2014,2015,2016, by the GROMACS development team, led by +# Copyright (c) 2012,2013,2014,2015,2016,2017, by the GROMACS development team, led by # Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl, # and including many others, as listed in the AUTHORS file in the # top-level source directory and at http://www.gromacs.org. @@ -42,6 +42,12 @@ if (NOT DEFINED GMX_GPU) endif() option(GMX_GPU "Enable GPU acceleration" OFF) +option(GMX_CLANG_CUDA "Use clang for CUDA" OFF) +if (GMX_CLANG_CUDA) + # CUDA 7.0 or later required, override req. version + set(REQUIRED_CUDA_VERSION 7.0) +endif() + if(GMX_GPU AND GMX_DOUBLE) message(FATAL_ERROR "GPU acceleration is not available in double precision!") endif() @@ -174,44 +180,59 @@ endif() # COMPILER_FLAGS - [output variable] flags for the compiler # macro(get_cuda_compiler_info COMPILER_INFO COMPILER_FLAGS) - if(CUDA_NVCC_EXECUTABLE) + if(NOT GMX_CLANG_CUDA) + if(CUDA_NVCC_EXECUTABLE) - # Get the nvcc version string. This is multi-line, but since it is only 4 lines - # and might change in the future it is better to store than trying to parse out - # the version from the current format. - execute_process(COMMAND ${CUDA_NVCC_EXECUTABLE} --version - RESULT_VARIABLE _nvcc_version_res - OUTPUT_VARIABLE _nvcc_version_out - ERROR_VARIABLE _nvcc_version_err - OUTPUT_STRIP_TRAILING_WHITESPACE) - if (${_nvcc_version_res} EQUAL 0) - # Fix multi-line mess: Replace newline with ";" so we can use it in a define - string(REPLACE "\n" ";" _nvcc_info_singleline ${_nvcc_version_out}) - SET(${COMPILER_INFO} "${CUDA_NVCC_EXECUTABLE} ${_nvcc_info_singleline}") - string(TOUPPER ${CMAKE_BUILD_TYPE} _build_type) - SET(_compiler_flags "${CUDA_NVCC_FLAGS_${_build_type}}") - if(CUDA_PROPAGATE_HOST_FLAGS) - string(REGEX REPLACE "[ ]+" ";" _cxx_flags_nospace "${BUILD_CXXFLAGS}") + # Get the nvcc version string. This is multi-line, but since it is only 4 lines + # and might change in the future it is better to store than trying to parse out + # the version from the current format. + execute_process(COMMAND ${CUDA_NVCC_EXECUTABLE} --version + RESULT_VARIABLE _nvcc_version_res + OUTPUT_VARIABLE _nvcc_version_out + ERROR_VARIABLE _nvcc_version_err + OUTPUT_STRIP_TRAILING_WHITESPACE) + if (${_nvcc_version_res} EQUAL 0) + # Fix multi-line mess: Replace newline with ";" so we can use it in a define + string(REPLACE "\n" ";" _nvcc_info_singleline ${_nvcc_version_out}) + SET(${COMPILER_INFO} "${CUDA_NVCC_EXECUTABLE} ${_nvcc_info_singleline}") + string(TOUPPER ${CMAKE_BUILD_TYPE} _build_type) + SET(_compiler_flags "${CUDA_NVCC_FLAGS_${_build_type}}") + if(CUDA_PROPAGATE_HOST_FLAGS) + string(REGEX REPLACE "[ ]+" ";" _cxx_flags_nospace "${BUILD_CXXFLAGS}") + endif() + SET(${COMPILER_FLAGS} "${CUDA_NVCC_FLAGS}${CUDA_NVCC_FLAGS_${_build_type}}; ${_cxx_flags_nospace}") + else() + SET(${COMPILER_INFO} "N/A") + SET(${COMPILER_FLAGS} "N/A") endif() - SET(${COMPILER_FLAGS} "${CUDA_NVCC_FLAGS}${CUDA_NVCC_FLAGS_${_build_type}}; ${_cxx_flags_nospace}") - else() - SET(${COMPILER_INFO} "N/A") - SET(${COMPILER_FLAGS} "N/A") endif() + else() + # CXX compiler is the CUDA compiler + set(${COMPILER_INFO} "${CMAKE_CXX_COMPILER} ${CMAKE_CXX_COMPILER_ID} ${CMAKE_CXX_COMPILER_VERSION}") + # there are some extra flags + set(${COMPILER_FLAGS} "${CMAKE_CXX_FLAGS} ${CMAKE_CXX_FLAGS_${_build_type}} ${GMX_CUDA_CLANG_FLAGS}") endif() endmacro () +macro(enable_multiple_cuda_compilation_units) + message(STATUS "Enabling multiple compilation units for the CUDA non-bonded module.") + set_property(CACHE GMX_CUDA_NB_SINGLE_COMPILATION_UNIT PROPERTY VALUE OFF) +endmacro() + include(CMakeDependentOption) include(gmxOptionUtilities) macro(gmx_gpu_setup) if(GMX_GPU) - if(NOT CUDA_NVCC_EXECUTABLE) - message(FATAL_ERROR "nvcc is required for a CUDA build, please set CUDA_TOOLKIT_ROOT_DIR appropriately") + if(NOT GMX_CLANG_CUDA) + if(NOT CUDA_NVCC_EXECUTABLE) + message(FATAL_ERROR "nvcc is required for a CUDA build, please set CUDA_TOOLKIT_ROOT_DIR appropriately") + endif() + # set up nvcc options + include(gmxManageNvccConfig) + else() + include(gmxManageClangCudaConfig) endif() - # set up nvcc options - include(gmxManageNvccConfig) - gmx_check_if_changed(_cuda_version_changed CUDA_VERSION) # Generate CUDA RT API version string which will end up in config.h @@ -249,19 +270,25 @@ macro(gmx_gpu_setup) endif() endif() # GMX_GPU + if (GMX_CLANG_CUDA) + set (_GMX_CUDA_NB_SINGLE_COMPILATION_UNIT_DEFAULT FALSE) + else() + set (_GMX_CUDA_NB_SINGLE_COMPILATION_UNIT_DEFAULT TRUE) + endif() cmake_dependent_option(GMX_CUDA_NB_SINGLE_COMPILATION_UNIT - "Whether to compile the CUDA non-bonded module using a single compilation unit." ON + "Whether to compile the CUDA non-bonded module using a single compilation unit." ${_GMX_CUDA_NB_SINGLE_COMPILATION_UNIT_DEFAULT} "GMX_GPU" ON) mark_as_advanced(GMX_CUDA_NB_SINGLE_COMPILATION_UNIT) - if (GMX_GPU) + if (GMX_GPU AND NOT GMX_CLANG_CUDA) # We need to use single compilation unit for kernels: - # - when compiling for CC 2.x devices where buggy kernel code is generated + # when compiling with nvcc for CC 2.x devices where buggy kernel code is generated gmx_check_if_changed(_gmx_cuda_target_changed GMX_CUDA_TARGET_SM GMX_CUDA_TARGET_COMPUTE CUDA_NVCC_FLAGS) + if(_gmx_cuda_target_changed OR NOT GMX_GPU_DETECTION_DONE) if((NOT GMX_CUDA_TARGET_SM AND NOT GMX_CUDA_TARGET_COMPUTE) OR - (GMX_CUDA_TARGET_SM MATCHES "2[01]" OR GMX_CUDA_TARGET_COMPUTE MATCHES "2[01]")) - message(STATUS "Enabling single compilation unit for the CUDA non-bonded module. Multiple compilation units are not compatible with CC 2.x devices, to enable the feature specify only CC >=3.0 target architectures in GMX_CUDA_TARGET_SM/GMX_CUDA_TARGET_COMPUTE.") + (GMX_CUDA_TARGET_SM MATCHES "2[01]" OR GMX_CUDA_TARGET_COMPUTE MATCHES "2[01]")) + message(STATUS "Enabling single compilation unit for the CUDA non-bonded module. Multiple compilation units are not compatible with CC 2.x devices, to enable the feature specify only CC >=3.0 target architectures in GMX_CUDA_TARGET_SM/GMX_CUDA_TARGET_COMPUTE.") set_property(CACHE GMX_CUDA_NB_SINGLE_COMPILATION_UNIT PROPERTY VALUE ON) else() message(STATUS "Enabling multiple compilation units for the CUDA non-bonded module.") diff --git a/docs/dev-manual/build-system.rst b/docs/dev-manual/build-system.rst index b055116cf6..459a9a737a 100644 --- a/docs/dev-manual/build-system.rst +++ b/docs/dev-manual/build-system.rst @@ -257,6 +257,14 @@ Variables affecting compilation/linking .. cmake:: GMX_GPU +.. cmake:: GMX_CLANG_CUDA + + Use clang for compiling CUDA GPU code, both host and device. + +.. cmake:: GMX_CUDA_CLANG_FLAGS + + Pass additional CUDA-only compiler flags to clang using this variable. + .. cmake:: GMX_LIB_INSTALL_DIR Sets the installation directory for libraries (default is determined by diff --git a/docs/install-guide/index.rst b/docs/install-guide/index.rst index 98b6fa8b07..e7c4a0110a 100644 --- a/docs/install-guide/index.rst +++ b/docs/install-guide/index.rst @@ -612,6 +612,26 @@ Linux, Mac OS X and Windows operating systems, but Linux is the best-tested and supported of these. Linux running on POWER 8, ARM v7 and v8 CPUs also works well. +Experimental support is available for compiling CUDA code, both for host and +device, using clang (version 3.9 or later). +A CUDA toolkit (>= v7.0) is still required but it is used only for GPU device code +generation and to link against the CUDA runtime library. +The clang CUDA support simplifies compilation and provides benefits for development +(e.g. allows the use code sanitizers in CUDA host-code). +Additionally, using clang for both CPU and GPU compilation can be beneficial +to avoid compatibility issues between the GNU toolchain and the CUDA toolkit. +clang for CUDA can be triggered using the ``GMX_CLANG_CUDA=ON`` CMake option. +Target architectures can be selected with ``GMX_CUDA_TARGET_SM``, +virtual architecture code is always embedded for all requested architectures +(hence GMX_CUDA_TARGET_COMPUTE is ignored). +Note that this is mainly a developer-oriented feature and it is not recommended +for production use as the performance can be significantly lower than that +of code compiled with nvcc (and it has also received less testing). +However, note that with clang 5.0 the performance gap is significantly narrowed +(at the time of writing, about 20% slower GPU kernels), so this version +could be considered in non performance-critical use-cases. + + OpenCL GPU acceleration ^^^^^^^^^^^^^^^^^^^^^^^ diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index f120b01add..517ca813bc 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -1,7 +1,7 @@ # # This file is part of the GROMACS molecular simulation package. # -# Copyright (c) 2009,2010,2011,2012,2013,2014,2015,2016, by the GROMACS development team, led by +# Copyright (c) 2009,2010,2011,2012,2013,2014,2015,2016,2017, by the GROMACS development team, led by # Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl, # and including many others, as listed in the AUTHORS file in the # top-level source directory and at http://www.gromacs.org. @@ -39,8 +39,11 @@ include(GetCompilerInfo.cmake) get_compiler_info(C BUILD_C_COMPILER BUILD_CFLAGS) get_compiler_info(CXX BUILD_CXX_COMPILER BUILD_CXXFLAGS) if(GMX_USE_CUDA) - GMX_SET_CUDA_NVCC_FLAGS() - get_cuda_compiler_info(CUDA_NVCC_COMPILER_INFO CUDA_NVCC_COMPILER_FLAGS) + if(NOT GMX_CLANG_CUDA) + GMX_SET_CUDA_NVCC_FLAGS() + endif() + + get_cuda_compiler_info(CUDA_COMPILER_INFO CUDA_COMPILER_FLAGS) endif() configure_file(config.h.cmakein config.h) diff --git a/src/buildinfo.h.cmakein b/src/buildinfo.h.cmakein index 1f03cdf30a..21a31e23b3 100644 --- a/src/buildinfo.h.cmakein +++ b/src/buildinfo.h.cmakein @@ -1,7 +1,7 @@ /* * This file is part of the GROMACS molecular simulation package. * - * Copyright (c) 2012,2013,2014,2015, by the GROMACS development team, led by + * Copyright (c) 2012,2013,2014,2015,2017, by the GROMACS development team, led by * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl, * and including many others, as listed in the AUTHORS file in the * top-level source directory and at http://www.gromacs.org. @@ -90,11 +90,11 @@ /** Location of data files in the installation directory */ #define DATA_INSTALL_DIR "@DATA_INSTALL_DIR@" -/** CUDA nvcc compiler version information */ -#define CUDA_NVCC_COMPILER_INFO "@CUDA_NVCC_COMPILER_INFO@" +/** CUDA compiler version information */ +#define CUDA_COMPILER_INFO "@CUDA_COMPILER_INFO@" -/** CUDA nvcc compiler flags */ -#define CUDA_NVCC_COMPILER_FLAGS "@CUDA_NVCC_COMPILER_FLAGS@" +/** CUDA compiler flags */ +#define CUDA_COMPILER_FLAGS "@CUDA_COMPILER_FLAGS@" /** OpenCL include dir */ #define OPENCL_INCLUDE_DIR "@OPENCL_INCLUDE_DIR@" diff --git a/src/gromacs/CMakeLists.txt b/src/gromacs/CMakeLists.txt index 4ee526788a..ba8f6d2a55 100644 --- a/src/gromacs/CMakeLists.txt +++ b/src/gromacs/CMakeLists.txt @@ -34,6 +34,10 @@ set(LIBGROMACS_SOURCES) +if (GMX_CLANG_CUDA) + include(gmxClangCudaUtils) +endif() + set_property(GLOBAL PROPERTY GMX_LIBGROMACS_SOURCES) set_property(GLOBAL PROPERTY GMX_INSTALLED_HEADERS) @@ -151,11 +155,25 @@ gmx_configure_version_file( REMOTE_HASH) list(APPEND LIBGROMACS_SOURCES ${GENERATED_VERSION_FILE}) +# set up CUDA compilation with clang +if (GMX_CLANG_CUDA) + foreach (_file ${LIBGROMACS_SOURCES}) + get_filename_component(_ext ${_file} EXT) + if (${_ext} STREQUAL ".cu") + gmx_compile_cuda_file_with_clang(${_file}) + endif() + endforeach() +endif() + if (GMX_USE_CUDA) # Work around FindCUDA that prevents using target_link_libraries() # with keywords otherwise... set(CUDA_LIBRARIES PRIVATE ${CUDA_LIBRARIES}) - cuda_add_library(libgromacs ${LIBGROMACS_SOURCES}) + if (NOT GMX_CLANG_CUDA) + cuda_add_library(libgromacs ${LIBGROMACS_SOURCES}) + else() + add_library(libgromacs ${LIBGROMACS_SOURCES}) + endif() else() add_library(libgromacs ${LIBGROMACS_SOURCES}) endif() diff --git a/src/gromacs/gpu_utils/cuda_arch_utils.cuh b/src/gromacs/gpu_utils/cuda_arch_utils.cuh index 4639acdd23..e1bf50cc3b 100644 --- a/src/gromacs/gpu_utils/cuda_arch_utils.cuh +++ b/src/gromacs/gpu_utils/cuda_arch_utils.cuh @@ -140,11 +140,13 @@ T gmx_shfl_down_sync(const unsigned int activeMask, /*! \brief Allow disabling CUDA textures using the GMX_DISABLE_CUDA_TEXTURES macro. * + * Disable texture support-missing in clang (all versions up to <=5.0-dev as of writing). + * * This option will not influence functionality. All features using textures ought * to have fallback for texture-less reads (direct/LDG loads), all new code needs * to provide fallback code. */ -#if defined GMX_DISABLE_CUDA_TEXTURES +#if defined(GMX_DISABLE_CUDA_TEXTURES) || (defined(__clang__) && defined(__CUDA__)) #define DISABLE_CUDA_TEXTURES 1 #else #define DISABLE_CUDA_TEXTURES 0 diff --git a/src/gromacs/mdlib/nbnxn_cuda/nbnxn_cuda.cu b/src/gromacs/mdlib/nbnxn_cuda/nbnxn_cuda.cu index bb72046172..bcdd4aed0c 100644 --- a/src/gromacs/mdlib/nbnxn_cuda/nbnxn_cuda.cu +++ b/src/gromacs/mdlib/nbnxn_cuda/nbnxn_cuda.cu @@ -128,8 +128,8 @@ texture coulomb_tab_texref; * build-time checks to prevent this, the user could manually tweaks nvcc flags * which would lead to buggy kernels getting compiled. */ -#if GMX_PTX_ARCH > 0 && GMX_PTX_ARCH <= 210 -#error Due to an CUDA compiler bug, the CUDA non-bonded module can not be compiled with multiple compilation units for CC 2.x devices. If you have changed the nvcc flags manually, either use the GMX_CUDA_TARGET_* variables instead or set GMX_CUDA_NB_SINGLE_COMPILATION_UNIT=ON CMake option. +#if GMX_PTX_ARCH > 0 && GMX_PTX_ARCH <= 210 && !defined(__clang__) +#error Due to an CUDA nvcc compiler bug, the CUDA non-bonded module can not be compiled with multiple compilation units for CC 2.x devices. If you have changed the nvcc flags manually, either use the GMX_CUDA_TARGET_* variables instead or set GMX_CUDA_NB_SINGLE_COMPILATION_UNIT=ON CMake option. #endif #endif /* GMX_CUDA_NB_SINGLE_COMPILATION_UNIT */ @@ -932,21 +932,36 @@ void nbnxn_gpu_wait_for_gpu(gmx_nbnxn_cuda_t *nb, plist->haveFreshList = false; } +/*! \brief Return the reference to the nbfp texture. + * + * Note: it can return junk when c_disableCudaTextures==false, but we don't + * assert on that condition because the data_mgmt module ends up calling this + * function even if texture references are not used. + */ const struct texture &nbnxn_cuda_get_nbfp_texref() { - assert(!c_disableCudaTextures); return nbfp_texref; } +/*! \brief Return the reference to the nbfp_comb texture. + * + * Note: it can return junk when c_disableCudaTextures==false, but we don't + * assert on that condition because the data_mgmt module ends up calling this + * function even if texture references are not used. + */ const struct texture &nbnxn_cuda_get_nbfp_comb_texref() { - assert(!c_disableCudaTextures); return nbfp_comb_texref; } +/*! \brief Return the reference to the coulomb_tab. + * + * Note: it can return junk when c_disableCudaTextures==false, but we don't + * assert on that condition because the data_mgmt module ends up calling this + * function even if texture references are not used. + */ const struct texture &nbnxn_cuda_get_coulomb_tab_texref() { - assert(!c_disableCudaTextures); return coulomb_tab_texref; } diff --git a/src/gromacs/mdlib/nbnxn_cuda/nbnxn_cuda_kernel_utils.cuh b/src/gromacs/mdlib/nbnxn_cuda/nbnxn_cuda_kernel_utils.cuh index 71f1901434..2626bf101d 100644 --- a/src/gromacs/mdlib/nbnxn_cuda/nbnxn_cuda_kernel_utils.cuh +++ b/src/gromacs/mdlib/nbnxn_cuda/nbnxn_cuda_kernel_utils.cuh @@ -706,14 +706,13 @@ void reduce_energy_pow2(volatile float *buf, float *e_lj, float *e_el, unsigned int tidx) { - int i, j; - float e1, e2; + float e1, e2; - i = warp_size/2; + unsigned int i = warp_size/2; /* Can't just use i as loop variable because than nvcc refuses to unroll. */ #pragma unroll 10 - for (j = warp_size_log2 - 1; j > 0; j--) + for (int j = warp_size_log2 - 1; j > 0; j--) { if (tidx < i) { diff --git a/src/gromacs/utility/binaryinformation.cpp b/src/gromacs/utility/binaryinformation.cpp index a222478069..efa2c0f287 100644 --- a/src/gromacs/utility/binaryinformation.cpp +++ b/src/gromacs/utility/binaryinformation.cpp @@ -302,8 +302,8 @@ void gmx_print_version_info(gmx::TextWriter *writer) writer->writeLine(formatString("OpenCL version: %s", OPENCL_VERSION_STRING)); #endif #if GMX_GPU == GMX_GPU_CUDA - writer->writeLine(formatString("CUDA compiler: %s\n", CUDA_NVCC_COMPILER_INFO)); - writer->writeLine(formatString("CUDA compiler flags:%s\n", CUDA_NVCC_COMPILER_FLAGS)); + writer->writeLine(formatString("CUDA compiler: %s\n", CUDA_COMPILER_INFO)); + writer->writeLine(formatString("CUDA compiler flags:%s\n", CUDA_COMPILER_FLAGS)); auto driverVersion = gmx::getCudaDriverVersion(); writer->writeLine(formatString("CUDA driver: %d.%d\n", driverVersion.first, driverVersion.second)); auto runtimeVersion = gmx::getCudaRuntimeVersion(); -- 2.11.4.GIT