offload/DeviceRTL/CMakeLists.txt

   1 set(LIBOMPTARGET_BUILD_DEVICERTL_BCLIB TRUE CACHE BOOL
   2   "Can be set to false to disable building this library.")
   3
   4 if (NOT LIBOMPTARGET_BUILD_DEVICERTL_BCLIB)
   5   message(STATUS "Not building DeviceRTL: Disabled by LIBOMPTARGET_BUILD_DEVICERTL_BCLIB")
   6   return()
   7 endif()
   8
   9 # Check to ensure the host system is a supported host architecture.
  10 if(NOT ${CMAKE_SIZEOF_VOID_P} EQUAL "8")
  11   message(STATUS "Not building DeviceRTL: Runtime does not support 32-bit hosts")
  12   return()
  13 endif()
  14
  15 if (LLVM_DIR)
  16   # Builds that use pre-installed LLVM have LLVM_DIR set.
  17   # A standalone or LLVM_ENABLE_RUNTIMES=openmp build takes this route
  18   find_program(CLANG_TOOL clang PATHS ${LLVM_TOOLS_BINARY_DIR} NO_DEFAULT_PATH)
  19   find_program(PACKAGER_TOOL clang-offload-packager PATHS ${LLVM_TOOLS_BINARY_DIR} NO_DEFAULT_PATH)
  20   find_program(LINK_TOOL llvm-link PATHS ${LLVM_TOOLS_BINARY_DIR} NO_DEFAULT_PATH)
  21   find_program(OPT_TOOL opt PATHS ${LLVM_TOOLS_BINARY_DIR} NO_DEFAULT_PATH)
  22   if ((NOT CLANG_TOOL) OR (NOT LINK_TOOL) OR (NOT OPT_TOOL) OR (NOT PACKAGER_TOOL))
  23     message(STATUS "Not building DeviceRTL. Missing clang: ${CLANG_TOOL}, llvm-link: ${LINK_TOOL}, opt: ${OPT_TOOL}, or clang-offload-packager: ${PACKAGER_TOOL}")
  24     return()
  25   else()
  26     message(STATUS "Building DeviceRTL. Using clang: ${CLANG_TOOL}, llvm-link: ${LINK_TOOL} and opt: ${OPT_TOOL}")
  27   endif()
  28 elseif (LLVM_TOOL_CLANG_BUILD AND NOT CMAKE_CROSSCOMPILING AND NOT OPENMP_STANDALONE_BUILD)
  29   # LLVM in-tree builds may use CMake target names to discover the tools.
  30   # A LLVM_ENABLE_PROJECTS=openmp build takes this route
  31   set(CLANG_TOOL $<TARGET_FILE:clang>)
  32   set(PACKAGER_TOOL $<TARGET_FILE:clang-offload-packager>)
  33   set(LINK_TOOL $<TARGET_FILE:llvm-link>)
  34   set(OPT_TOOL $<TARGET_FILE:opt>)
  35   message(STATUS "Building DeviceRTL. Using clang from in-tree build")
  36 else()
  37   message(STATUS "Not building DeviceRTL. No appropriate clang found")
  38   return()
  39 endif()
  40
  41 set(devicertl_base_directory ${CMAKE_CURRENT_SOURCE_DIR})
  42 set(include_directory ${devicertl_base_directory}/include)
  43 set(source_directory ${devicertl_base_directory}/src)
  44
  45 set(all_amdgpu_architectures "gfx700;gfx701;gfx801;gfx803;gfx900;gfx902;gfx906"
  46                              "gfx908;gfx90a;gfx90c;gfx940;gfx941;gfx942;gfx1010"
  47                              "gfx1030;gfx1031;gfx1032;gfx1033;gfx1034;gfx1035"
  48                              "gfx1036;gfx1100;gfx1101;gfx1102;gfx1103;gfx1150"
  49                              "gfx1151;gfx1152")
  50 set(all_nvptx_architectures "sm_35;sm_37;sm_50;sm_52;sm_53;sm_60;sm_61;sm_62"
  51                             "sm_70;sm_72;sm_75;sm_80;sm_86;sm_87;sm_89;sm_90")
  52 set(all_gpu_architectures
  53     "${all_amdgpu_architectures};${all_nvptx_architectures}")
  54
  55 set(LIBOMPTARGET_DEVICE_ARCHITECTURES "all" CACHE STRING
  56     "List of device architectures to be used to compile the OpenMP DeviceRTL.")
  57
  58 if(LIBOMPTARGET_DEVICE_ARCHITECTURES STREQUAL "all")
  59   set(LIBOMPTARGET_DEVICE_ARCHITECTURES ${all_gpu_architectures})
  60 elseif(LIBOMPTARGET_DEVICE_ARCHITECTURES STREQUAL "auto" OR
  61        LIBOMPTARGET_DEVICE_ARCHITECTURES STREQUAL "native")
  62   if(NOT LIBOMPTARGET_NVPTX_ARCH AND NOT LIBOMPTARGET_AMDGPU_ARCH)
  63     libomptarget_error_say(
  64       "Could not find 'amdgpu-arch' and 'nvptx-arch' tools required for 'auto'")
  65   elseif(NOT LIBOMPTARGET_FOUND_NVIDIA_GPU AND NOT LIBOMPTARGET_FOUND_AMDGPU_GPU)
  66     libomptarget_error_say("No AMD or NVIDIA GPU found on the system when using 'auto'")
  67   endif()
  68   set(LIBOMPTARGET_DEVICE_ARCHITECTURES
  69       "${LIBOMPTARGET_NVPTX_DETECTED_ARCH_LIST};${LIBOMPTARGET_AMDGPU_DETECTED_ARCH_LIST}")
  70 endif()
  71 list(REMOVE_DUPLICATES LIBOMPTARGET_DEVICE_ARCHITECTURES)
  72
  73 set(include_files
  74   ${include_directory}/Allocator.h
  75   ${include_directory}/Configuration.h
  76   ${include_directory}/Debug.h
  77   ${include_directory}/Interface.h
  78   ${include_directory}/LibC.h
  79   ${include_directory}/Mapping.h
  80   ${include_directory}/State.h
  81   ${include_directory}/Synchronization.h
  82   ${include_directory}/Types.h
  83   ${include_directory}/Utils.h
  84   ${include_directory}/Workshare.h
  85 )
  86
  87 set(src_files
  88   ${source_directory}/Allocator.cpp
  89   ${source_directory}/Configuration.cpp
  90   ${source_directory}/Debug.cpp
  91   ${source_directory}/Kernel.cpp
  92   ${source_directory}/LibC.cpp
  93   ${source_directory}/Mapping.cpp
  94   ${source_directory}/Misc.cpp
  95   ${source_directory}/Parallelism.cpp
  96   ${source_directory}/Reduction.cpp
  97   ${source_directory}/State.cpp
  98   ${source_directory}/Synchronization.cpp
  99   ${source_directory}/Tasking.cpp
 100   ${source_directory}/Utils.cpp
 101   ${source_directory}/Workshare.cpp
 102 )
 103
 104 # We disable the slp vectorizer during the runtime optimization to avoid
 105 # vectorized accesses to the shared state. Generally, those are "good" but
 106 # the optimizer pipeline (esp. Attributor) does not fully support vectorized
 107 # instructions yet and we end up missing out on way more important constant
 108 # propagation. That said, we will run the vectorizer again after the runtime
 109 # has been linked into the user program.
 110 set(clang_opt_flags -O3 -mllvm -openmp-opt-disable -DSHARED_SCRATCHPAD_SIZE=512 -mllvm -vectorize-slp=false )
 111 set(link_opt_flags  -O3        -openmp-opt-disable -attributor-enable=module -vectorize-slp=false )
 112 set(link_export_flag -passes=internalize -internalize-public-api-file=${source_directory}/exports)
 113
 114 # If the user built with the GPU C library enabled we will use that instead.
 115 if(${LIBOMPTARGET_GPU_LIBC_SUPPORT})
 116   list(APPEND clang_opt_flags -DOMPTARGET_HAS_LIBC)
 117 endif()
 118
 119 # Prepend -I to each list element
 120 set (LIBOMPTARGET_LLVM_INCLUDE_DIRS_DEVICERTL "${LIBOMPTARGET_LLVM_INCLUDE_DIRS}")
 121 list(TRANSFORM LIBOMPTARGET_LLVM_INCLUDE_DIRS_DEVICERTL PREPEND "-I")
 122
 123 # Set flags for LLVM Bitcode compilation.
 124 set(bc_flags -c -foffload-lto -std=c++17 -fvisibility=hidden
 125               ${clang_opt_flags} --offload-device-only
 126              -nocudalib -nogpulib -nostdinc
 127              -fopenmp -fopenmp-cuda-mode
 128              -Wno-unknown-cuda-version
 129              -DOMPTARGET_DEVICE_RUNTIME
 130              -I${include_directory}
 131              -I${devicertl_base_directory}/../include
 132              ${LIBOMPTARGET_LLVM_INCLUDE_DIRS_DEVICERTL}
 133 )
 134
 135 # first create an object target
 136 add_library(omptarget.devicertl.all_objs OBJECT IMPORTED)
 137 function(compileDeviceRTLLibrary target_cpu target_name target_triple)
 138   set(target_bc_flags ${ARGN})
 139
 140   set(bc_files "")
 141   foreach(src ${src_files})
 142     get_filename_component(infile ${src} ABSOLUTE)
 143     get_filename_component(outfile ${src} NAME)
 144     set(outfile "${outfile}-${target_cpu}.bc")
 145     set(depfile "${outfile}.d")
 146
 147     add_custom_command(OUTPUT ${outfile}
 148       COMMAND ${CLANG_TOOL}
 149       ${bc_flags}
 150       --offload-arch=${target_cpu}
 151       ${target_bc_flags}
 152       -MD -MF ${depfile}
 153       ${infile} -o ${outfile}
 154       DEPENDS ${infile}
 155       DEPFILE ${depfile}
 156       COMMENT "Building LLVM bitcode ${outfile}"
 157       VERBATIM
 158     )
 159     if(TARGET clang)
 160       # Add a file-level dependency to ensure that clang is up-to-date.
 161       # By default, add_custom_command only builds clang if the
 162       # executable is missing.
 163       add_custom_command(OUTPUT ${outfile}
 164         DEPENDS clang
 165         APPEND
 166       )
 167     endif()
 168     set_property(DIRECTORY APPEND PROPERTY ADDITIONAL_MAKE_CLEAN_FILES ${outfile})
 169
 170     list(APPEND bc_files ${outfile})
 171   endforeach()
 172
 173   set(bclib_name "libomptarget-${target_name}-${target_cpu}.bc")
 174
 175   # Link to a bitcode library.
 176   add_custom_command(OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/linked_${bclib_name}
 177       COMMAND ${LINK_TOOL}
 178         -o ${CMAKE_CURRENT_BINARY_DIR}/linked_${bclib_name} ${bc_files}
 179       DEPENDS ${bc_files}
 180       COMMENT "Linking LLVM bitcode ${bclib_name}"
 181   )
 182
 183   if(TARGET llvm-link)
 184     add_custom_command(OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/linked_${bclib_name}
 185       DEPENDS llvm-link
 186       APPEND)
 187   endif()
 188
 189   add_custom_command(OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/internalized_${bclib_name}
 190       COMMAND ${OPT_TOOL} ${link_export_flag} ${CMAKE_CURRENT_BINARY_DIR}/linked_${bclib_name}
 191                       -o ${CMAKE_CURRENT_BINARY_DIR}/internalized_${bclib_name}
 192       DEPENDS ${source_directory}/exports ${CMAKE_CURRENT_BINARY_DIR}/linked_${bclib_name}
 193       COMMENT "Internalizing LLVM bitcode ${bclib_name}"
 194   )
 195   if(TARGET opt)
 196     add_custom_command(OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/internalized_${bclib_name}
 197       DEPENDS opt
 198       APPEND)
 199   endif()
 200
 201   add_custom_command(OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/${bclib_name}
 202       COMMAND ${OPT_TOOL} ${link_opt_flags} ${CMAKE_CURRENT_BINARY_DIR}/internalized_${bclib_name}
 203                       -o ${CMAKE_CURRENT_BINARY_DIR}/${bclib_name}
 204       DEPENDS ${CMAKE_CURRENT_BINARY_DIR}/internalized_${bclib_name}
 205       COMMENT "Optimizing LLVM bitcode ${bclib_name}"
 206   )
 207   if(TARGET opt)
 208     add_custom_command(OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/${bclib_name}
 209       DEPENDS opt
 210       APPEND)
 211   endif()
 212
 213   set(bclib_target_name "omptarget-${target_name}-${target_cpu}-bc")
 214   add_custom_target(${bclib_target_name} DEPENDS ${CMAKE_CURRENT_BINARY_DIR}/${bclib_name})
 215
 216   # Copy library to destination.
 217   add_custom_command(TARGET ${bclib_target_name} POST_BUILD
 218                     COMMAND ${CMAKE_COMMAND} -E copy ${CMAKE_CURRENT_BINARY_DIR}/${bclib_name}
 219                     ${LIBOMPTARGET_LIBRARY_DIR})
 220   add_dependencies(omptarget.devicertl.${target_name} ${bclib_target_name})
 221
 222   set_property(DIRECTORY APPEND PROPERTY ADDITIONAL_MAKE_CLEAN_FILES ${bclib_name} ${LIBOMPTARGET_LIBRARY_DIR}/${bclib_name})
 223
 224   # Install bitcode library under the lib destination folder.
 225   install(FILES ${CMAKE_CURRENT_BINARY_DIR}/${bclib_name} DESTINATION "${OFFLOAD_INSTALL_LIBDIR}")
 226
 227   set(target_feature "")
 228   if("${target_triple}" STREQUAL "nvptx64-nvidia-cuda")
 229     set(target_feature "feature=+ptx63")
 230   endif()
 231
 232   # Package the bitcode in the bitcode and embed it in an ELF for the static library
 233   add_custom_command(OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/packaged_${bclib_name}
 234       COMMAND ${PACKAGER_TOOL} -o ${CMAKE_CURRENT_BINARY_DIR}/packaged_${bclib_name}
 235         "--image=file=${CMAKE_CURRENT_BINARY_DIR}/${bclib_name},${target_feature},triple=${target_triple},arch=${target_cpu},kind=openmp"
 236       DEPENDS ${CMAKE_CURRENT_BINARY_DIR}/${bclib_name}
 237       COMMENT "Packaging LLVM offloading binary ${bclib_name}.out"
 238   )
 239   if(TARGET clang-offload-packager)
 240     add_custom_command(OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/packaged_${bclib_name}
 241       DEPENDS clang-offload-packager
 242       APPEND)
 243   endif()
 244
 245   set(output_name "${CMAKE_CURRENT_BINARY_DIR}/devicertl-${target_name}-${target_cpu}.o")
 246   add_custom_command(OUTPUT ${output_name}
 247     COMMAND ${CLANG_TOOL} --std=c++17 -c -nostdlib
 248             -Xclang -fembed-offload-object=${CMAKE_CURRENT_BINARY_DIR}/packaged_${bclib_name}
 249             -o ${output_name}
 250             ${source_directory}/Stub.cpp
 251     DEPENDS ${CMAKE_CURRENT_BINARY_DIR}/packaged_${bclib_name} ${source_directory}/Stub.cpp
 252     COMMENT "Embedding LLVM offloading binary in devicertl-${target_name}-${target_cpu}.o"
 253     VERBATIM
 254   )
 255   if(TARGET clang)
 256     add_custom_command(OUTPUT ${output_name}
 257       DEPENDS clang
 258       APPEND)
 259   endif()
 260
 261   set_property(DIRECTORY APPEND PROPERTY ADDITIONAL_MAKE_CLEAN_FILES ${output_name})
 262   set_property(TARGET omptarget.devicertl.all_objs APPEND PROPERTY IMPORTED_OBJECTS ${output_name})
 263
 264   if (CMAKE_EXPORT_COMPILE_COMMANDS)
 265     set(ide_target_name omptarget-ide-${target_name}-${target_cpu})
 266     add_library(${ide_target_name} STATIC EXCLUDE_FROM_ALL ${src_files})
 267     target_compile_options(${ide_target_name} PRIVATE
 268       -fopenmp --offload-arch=${target_cpu} -fopenmp-cuda-mode
 269       -mllvm -openmp-opt-disable
 270       -foffload-lto -fvisibility=hidden --offload-device-only
 271       -nocudalib -nogpulib -nostdinc -Wno-unknown-cuda-version
 272     )
 273     target_compile_definitions(${ide_target_name} PRIVATE SHARED_SCRATCHPAD_SIZE=512)
 274     target_include_directories(${ide_target_name} PRIVATE
 275       ${include_directory}
 276       ${devicertl_base_directory}/../include
 277       ${LIBOMPTARGET_LLVM_INCLUDE_DIRS}
 278     )
 279     install(TARGETS ${ide_target_name} EXCLUDE_FROM_ALL)
 280   endif()
 281 endfunction()
 282
 283 # Generate a Bitcode library for all the gpu architectures the user requested.
 284 add_custom_target(omptarget.devicertl.nvptx)
 285 add_custom_target(omptarget.devicertl.amdgpu)
 286 foreach(gpu_arch ${LIBOMPTARGET_DEVICE_ARCHITECTURES})
 287   if("${gpu_arch}" IN_LIST all_amdgpu_architectures)
 288     compileDeviceRTLLibrary(${gpu_arch} amdgpu amdgcn-amd-amdhsa -Xclang -mcode-object-version=none)
 289   elseif("${gpu_arch}" IN_LIST all_nvptx_architectures)
 290     compileDeviceRTLLibrary(${gpu_arch} nvptx nvptx64-nvidia-cuda --cuda-feature=+ptx63)
 291   else()
 292     libomptarget_error_say("Unknown GPU architecture '${gpu_arch}'")
 293   endif()
 294 endforeach()
 295
 296 # Archive all the object files generated above into a static library
 297 add_library(omptarget.devicertl STATIC)
 298 set_target_properties(omptarget.devicertl PROPERTIES
 299   ARCHIVE_OUTPUT_DIRECTORY "${LIBOMPTARGET_LLVM_LIBRARY_INTDIR}"
 300   LINKER_LANGUAGE CXX
 301 )
 302 target_link_libraries(omptarget.devicertl PRIVATE omptarget.devicertl.all_objs)
 303
 304 install(TARGETS omptarget.devicertl ARCHIVE DESTINATION ${OFFLOAD_INSTALL_LIBDIR})