1 //===------------ target_impl.h - NVPTX OpenMP GPU options ------- CUDA -*-===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 // Definitions of target specific functions
11 //===----------------------------------------------------------------------===//
12 #ifndef _TARGET_IMPL_H_
13 #define _TARGET_IMPL_H_
15 #include "nvptx_interface.h"
20 // subset of inttypes.h
24 typedef uint32_t __kmpc_impl_lanemask_t
;
26 #define INLINE inline __attribute__((always_inline))
27 #define NOINLINE __attribute__((noinline))
28 #define ALIGN(N) __attribute__((aligned(N)))
29 #define PLUGIN_ACCESSIBLE /* no annotation needed for cuda plugin */
31 #include "llvm/Frontend/OpenMP/OMPGridValues.h"
33 INLINE
constexpr const llvm::omp::GV
&getGridValue() {
34 return llvm::omp::NVPTXGridValues
;
37 ////////////////////////////////////////////////////////////////////////////////
39 ////////////////////////////////////////////////////////////////////////////////
41 ////////////////////////////////////////////////////////////////////////////////
42 // The following def must match the absolute limit hardwired in the host RTL
43 // max number of threads per team
44 enum { MAX_THREADS_PER_TEAM
= getGridValue().GV_Max_WG_Size
};
45 enum { WARPSIZE
= getGridValue().GV_Warp_Size
};
47 // Maximum number of omp state objects per SM allocated statically in global
49 #if __CUDA_ARCH__ >= 600
50 #define OMP_STATE_COUNT 32
52 #define OMP_STATE_COUNT 16
56 #if __CUDA_ARCH__ >= 900
57 #error unsupported compute capability, define MAX_SM via LIBOMPTARGET_NVPTX_MAX_SM cmake option
58 #elif __CUDA_ARCH__ >= 800
59 // GA100 design has a maxinum of 128 SMs but A100 product only has 108 SMs
60 // GA102 design has a maxinum of 84 SMs
62 #elif __CUDA_ARCH__ >= 700
64 #elif __CUDA_ARCH__ >= 600
71 #define OMP_ACTIVE_PARALLEL_LEVEL 128
73 // Data sharing related quantities, need to match what is used in the compiler.
74 enum DATA_SHARING_SIZES
{
75 // The size reserved for data in a shared memory slot.
76 DS_Slot_Size
= getGridValue().GV_Slot_Size
,
77 // The slot size that should be reserved for a working warp.
78 DS_Worker_Warp_Slot_Size
= getGridValue().warpSlotSize(),
79 // The maximum number of warps in use
80 DS_Max_Warp_Number
= getGridValue().maxWarpNumber(),
83 enum : __kmpc_impl_lanemask_t
{
84 __kmpc_impl_all_lanes
= ~(__kmpc_impl_lanemask_t
)0