1 //===------- Mapping.cpp - OpenMP device runtime mapping helpers -- C++ -*-===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
10 //===----------------------------------------------------------------------===//
13 #include "Interface.h"
18 #pragma omp declare target
20 #include "llvm/Frontend/OpenMP/OMPGridValues.h"
27 /// AMDGCN Implementation
30 #pragma omp begin declare variant match(device = {arch(amdgcn)})
32 constexpr const llvm::omp::GV
&getGridValue() {
33 return llvm::omp::getAMDGPUGridValues
<__AMDGCN_WAVEFRONT_SIZE
>();
36 uint32_t getGridDim(uint32_t n
, uint16_t d
) {
38 return q
+ (n
> q
* d
);
41 uint32_t getWorkgroupDim(uint32_t group_id
, uint32_t grid_size
,
42 uint16_t group_size
) {
43 uint32_t r
= grid_size
- group_id
* group_size
;
44 return (r
< group_size
) ? r
: group_size
;
47 uint32_t getNumHardwareThreadsInBlock() {
48 return getWorkgroupDim(__builtin_amdgcn_workgroup_id_x(),
49 __builtin_amdgcn_grid_size_x(),
50 __builtin_amdgcn_workgroup_size_x());
53 LaneMaskTy
activemask() { return __builtin_amdgcn_read_exec(); }
55 LaneMaskTy
lanemaskLT() {
56 uint32_t Lane
= mapping::getThreadIdInWarp();
57 int64_t Ballot
= mapping::activemask();
58 uint64_t Mask
= ((uint64_t)1 << Lane
) - (uint64_t)1;
62 LaneMaskTy
lanemaskGT() {
63 uint32_t Lane
= mapping::getThreadIdInWarp();
64 if (Lane
== (mapping::getWarpSize() - 1))
66 int64_t Ballot
= mapping::activemask();
67 uint64_t Mask
= (~((uint64_t)0)) << (Lane
+ 1);
71 uint32_t getThreadIdInWarp() {
72 return __builtin_amdgcn_mbcnt_hi(~0u, __builtin_amdgcn_mbcnt_lo(~0u, 0u));
75 uint32_t getThreadIdInBlock() { return __builtin_amdgcn_workitem_id_x(); }
77 uint32_t getKernelSize() { return __builtin_amdgcn_grid_size_x(); }
79 uint32_t getBlockId() { return __builtin_amdgcn_workgroup_id_x(); }
81 uint32_t getNumberOfBlocks() {
82 return getGridDim(__builtin_amdgcn_grid_size_x(),
83 __builtin_amdgcn_workgroup_size_x());
86 uint32_t getWarpId() {
87 return impl::getThreadIdInBlock() / mapping::getWarpSize();
90 uint32_t getNumberOfWarpsInBlock() {
91 return mapping::getBlockSize() / mapping::getWarpSize();
94 #pragma omp end declare variant
97 /// NVPTX Implementation
100 #pragma omp begin declare variant match( \
101 device = {arch(nvptx, nvptx64)}, implementation = {extension(match_any)})
103 uint32_t getNumHardwareThreadsInBlock() {
104 return __nvvm_read_ptx_sreg_ntid_x();
107 constexpr const llvm::omp::GV
&getGridValue() {
108 return llvm::omp::NVPTXGridValues
;
111 LaneMaskTy
activemask() {
113 asm("activemask.b32 %0;" : "=r"(Mask
));
117 LaneMaskTy
lanemaskLT() {
118 __kmpc_impl_lanemask_t Res
;
119 asm("mov.u32 %0, %%lanemask_lt;" : "=r"(Res
));
123 LaneMaskTy
lanemaskGT() {
124 __kmpc_impl_lanemask_t Res
;
125 asm("mov.u32 %0, %%lanemask_gt;" : "=r"(Res
));
129 uint32_t getThreadIdInBlock() { return __nvvm_read_ptx_sreg_tid_x(); }
131 uint32_t getThreadIdInWarp() {
132 return impl::getThreadIdInBlock() & (mapping::getWarpSize() - 1);
135 uint32_t getKernelSize() {
136 return __nvvm_read_ptx_sreg_nctaid_x() *
137 mapping::getNumberOfProcessorElements();
140 uint32_t getBlockId() { return __nvvm_read_ptx_sreg_ctaid_x(); }
142 uint32_t getNumberOfBlocks() { return __nvvm_read_ptx_sreg_nctaid_x(); }
144 uint32_t getWarpId() {
145 return impl::getThreadIdInBlock() / mapping::getWarpSize();
148 uint32_t getNumberOfWarpsInBlock() {
149 return (mapping::getBlockSize() + mapping::getWarpSize() - 1) /
150 mapping::getWarpSize();
153 #pragma omp end declare variant
156 uint32_t getWarpSize() { return getGridValue().GV_Warp_Size
; }
161 /// We have to be deliberate about the distinction of `mapping::` and `impl::`
162 /// below to avoid repeating assumptions or including irrelevant ones.
165 static bool isInLastWarp() {
166 uint32_t MainTId
= (mapping::getNumberOfProcessorElements() - 1) &
167 ~(mapping::getWarpSize() - 1);
168 return mapping::getThreadIdInBlock() == MainTId
;
171 bool mapping::isMainThreadInGenericMode(bool IsSPMD
) {
172 if (IsSPMD
|| icv::Level
)
175 // Check if this is the last warp in the block.
176 return isInLastWarp();
179 bool mapping::isMainThreadInGenericMode() {
180 return mapping::isMainThreadInGenericMode(mapping::isSPMDMode());
183 bool mapping::isInitialThreadInLevel0(bool IsSPMD
) {
185 return mapping::getThreadIdInBlock() == 0;
186 return isInLastWarp();
189 bool mapping::isLeaderInWarp() {
190 __kmpc_impl_lanemask_t Active
= mapping::activemask();
191 __kmpc_impl_lanemask_t LaneMaskLT
= mapping::lanemaskLT();
192 return utils::popc(Active
& LaneMaskLT
) == 0;
195 LaneMaskTy
mapping::activemask() { return impl::activemask(); }
197 LaneMaskTy
mapping::lanemaskLT() { return impl::lanemaskLT(); }
199 LaneMaskTy
mapping::lanemaskGT() { return impl::lanemaskGT(); }
201 uint32_t mapping::getThreadIdInWarp() {
202 uint32_t ThreadIdInWarp
= impl::getThreadIdInWarp();
203 ASSERT(ThreadIdInWarp
< impl::getWarpSize());
204 return ThreadIdInWarp
;
207 uint32_t mapping::getThreadIdInBlock() {
208 uint32_t ThreadIdInBlock
= impl::getThreadIdInBlock();
209 ASSERT(ThreadIdInBlock
< impl::getNumHardwareThreadsInBlock());
210 return ThreadIdInBlock
;
213 uint32_t mapping::getWarpSize() { return impl::getWarpSize(); }
215 uint32_t mapping::getBlockSize() {
216 uint32_t BlockSize
= mapping::getNumberOfProcessorElements() -
217 (!mapping::isSPMDMode() * impl::getWarpSize());
221 uint32_t mapping::getKernelSize() { return impl::getKernelSize(); }
223 uint32_t mapping::getWarpId() {
224 uint32_t WarpID
= impl::getWarpId();
225 ASSERT(WarpID
< impl::getNumberOfWarpsInBlock());
229 uint32_t mapping::getBlockId() {
230 uint32_t BlockId
= impl::getBlockId();
231 ASSERT(BlockId
< impl::getNumberOfBlocks());
235 uint32_t mapping::getNumberOfWarpsInBlock() {
236 uint32_t NumberOfWarpsInBlocks
= impl::getNumberOfWarpsInBlock();
237 ASSERT(impl::getWarpId() < NumberOfWarpsInBlocks
);
238 return NumberOfWarpsInBlocks
;
241 uint32_t mapping::getNumberOfBlocks() {
242 uint32_t NumberOfBlocks
= impl::getNumberOfBlocks();
243 ASSERT(impl::getBlockId() < NumberOfBlocks
);
244 return NumberOfBlocks
;
247 uint32_t mapping::getNumberOfProcessorElements() {
248 uint32_t NumberOfProcessorElements
= impl::getNumHardwareThreadsInBlock();
249 ASSERT(impl::getThreadIdInBlock() < NumberOfProcessorElements
);
250 return NumberOfProcessorElements
;
258 static int SHARED(IsSPMDMode
);
260 void mapping::init(bool IsSPMD
) {
261 if (mapping::isInitialThreadInLevel0(IsSPMD
))
265 bool mapping::isSPMDMode() { return IsSPMDMode
; }
267 bool mapping::isGenericMode() { return !isSPMDMode(); }
271 __attribute__((noinline
)) uint32_t __kmpc_get_hardware_thread_id_in_block() {
272 FunctionTracingRAII();
273 return mapping::getThreadIdInBlock();
276 __attribute__((noinline
)) uint32_t __kmpc_get_hardware_num_threads_in_block() {
277 FunctionTracingRAII();
278 return impl::getNumHardwareThreadsInBlock();
281 __attribute__((noinline
)) uint32_t __kmpc_get_warp_size() {
282 FunctionTracingRAII();
283 return impl::getWarpSize();
286 #pragma omp end declare target