openmp/libomptarget/deviceRTLs/nvptx/src/target_impl.h

   1 //===------------ target_impl.h - NVPTX OpenMP GPU options ------- CUDA -*-===//
   2 //
   3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
   4 // See https://llvm.org/LICENSE.txt for license information.
   5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
   6 //
   7 //===----------------------------------------------------------------------===//
   8 //
   9 // Definitions of target specific functions
  10 //
  11 //===----------------------------------------------------------------------===//
  12 #ifndef _TARGET_IMPL_H_
  13 #define _TARGET_IMPL_H_
  14
  15 #include "nvptx_interface.h"
  16
  17 #include <stddef.h>
  18 #include <stdint.h>
  19
  20 // subset of inttypes.h
  21 #define PRId64 "ld"
  22 #define PRIu64 "lu"
  23
  24 typedef uint32_t __kmpc_impl_lanemask_t;
  25
  26 #define INLINE inline __attribute__((always_inline))
  27 #define NOINLINE __attribute__((noinline))
  28 #define ALIGN(N) __attribute__((aligned(N)))
  29 #define PLUGIN_ACCESSIBLE /* no annotation needed for cuda plugin */
  30
  31 #include "llvm/Frontend/OpenMP/OMPGridValues.h"
  32
  33 INLINE constexpr const llvm::omp::GV &getGridValue() {
  34   return llvm::omp::NVPTXGridValues;
  35 }
  36
  37 ////////////////////////////////////////////////////////////////////////////////
  38 // Kernel options
  39 ////////////////////////////////////////////////////////////////////////////////
  40
  41 ////////////////////////////////////////////////////////////////////////////////
  42 // The following def must match the absolute limit hardwired in the host RTL
  43 // max number of threads per team
  44 enum { MAX_THREADS_PER_TEAM = getGridValue().GV_Max_WG_Size };
  45 enum { WARPSIZE = getGridValue().GV_Warp_Size };
  46
  47 // Maximum number of omp state objects per SM allocated statically in global
  48 // memory.
  49 #if __CUDA_ARCH__ >= 600
  50 #define OMP_STATE_COUNT 32
  51 #else
  52 #define OMP_STATE_COUNT 16
  53 #endif
  54
  55 #if !defined(MAX_SM)
  56 #if __CUDA_ARCH__ >= 900
  57 #error unsupported compute capability, define MAX_SM via LIBOMPTARGET_NVPTX_MAX_SM cmake option
  58 #elif __CUDA_ARCH__ >= 800
  59 // GA100 design has a maxinum of 128 SMs but A100 product only has 108 SMs
  60 // GA102 design has a maxinum of 84 SMs
  61 #define MAX_SM 108
  62 #elif __CUDA_ARCH__ >= 700
  63 #define MAX_SM 84
  64 #elif __CUDA_ARCH__ >= 600
  65 #define MAX_SM 56
  66 #else
  67 #define MAX_SM 16
  68 #endif
  69 #endif
  70
  71 #define OMP_ACTIVE_PARALLEL_LEVEL 128
  72
  73 // Data sharing related quantities, need to match what is used in the compiler.
  74 enum DATA_SHARING_SIZES {
  75   // The size reserved for data in a shared memory slot.
  76   DS_Slot_Size = getGridValue().GV_Slot_Size,
  77   // The slot size that should be reserved for a working warp.
  78   DS_Worker_Warp_Slot_Size = getGridValue().warpSlotSize(),
  79   // The maximum number of warps in use
  80   DS_Max_Warp_Number = getGridValue().maxWarpNumber(),
  81 };
  82
  83 enum : __kmpc_impl_lanemask_t {
  84   __kmpc_impl_all_lanes = ~(__kmpc_impl_lanemask_t)0
  85 };
  86
  87 #define printf(...)
  88
  89 #endif