1 //===----------------------------------------------------------------------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
12 #ifdef __AMDGCN_WAVEFRONT_SIZE
13 #define __WARP_SIZE __AMDGCN_WAVEFRONT_SIZE
15 #define __WARP_SIZE 32
18 typedef unsigned long uint64_t;
24 int omp_get_ancestor_thread_num(int);
25 int omp_get_team_size(int);
31 /// Target kernel language extensions
33 /// These extensions exist for the host to allow fallback implementations,
34 /// however, they cannot be arbitrarily composed with OpenMP. If the rules of
35 /// the kernel language are followed, the host fallbacks should behave as
36 /// expected since the kernel is represented as 3 sequential outer loops, one
37 /// for each grid dimension, and three (nested) parallel loops, one for each
38 /// block dimension. This fallback is not supposed to be optimal and should be
39 /// configurable by the user.
48 ompx_relaxed = __ATOMIC_RELAXED,
49 ompx_aquire = __ATOMIC_ACQUIRE,
50 ompx_release = __ATOMIC_RELEASE,
51 ompx_acq_rel = __ATOMIC_ACQ_REL,
52 ompx_seq_cst = __ATOMIC_SEQ_CST,
61 // TODO: The following implementation is for host fallback. We need to disable
62 // generation of host fallback in kernel language mode.
63 #pragma omp begin declare variant match(device = {kind(cpu)})
65 /// ompx_{thread,block}_{id,dim}
67 #define _TGT_KERNEL_LANGUAGE_HOST_IMPL_GRID_C(NAME, VALUE) \
68 static inline int ompx_##NAME(int Dim) { return VALUE; }
70 _TGT_KERNEL_LANGUAGE_HOST_IMPL_GRID_C(thread_id,
71 omp_get_ancestor_thread_num(Dim + 1))
72 _TGT_KERNEL_LANGUAGE_HOST_IMPL_GRID_C(block_dim, omp_get_team_size(Dim + 1))
73 _TGT_KERNEL_LANGUAGE_HOST_IMPL_GRID_C(block_id, 0)
74 _TGT_KERNEL_LANGUAGE_HOST_IMPL_GRID_C(grid_dim, 1)
75 #undef _TGT_KERNEL_LANGUAGE_HOST_IMPL_GRID_C
78 /// ompx_{sync_block}_{,divergent}
80 #define _TGT_KERNEL_LANGUAGE_HOST_IMPL_SYNC_C(RETTY, NAME, ARGS, BODY) \
81 static inline RETTY ompx_##NAME(ARGS) { BODY; }
83 _TGT_KERNEL_LANGUAGE_HOST_IMPL_SYNC_C(void, sync_block, int Ordering,
84 _Pragma("omp barrier"))
85 _TGT_KERNEL_LANGUAGE_HOST_IMPL_SYNC_C(void, sync_block_acq_rel, void,
86 ompx_sync_block(ompx_acq_rel))
87 _TGT_KERNEL_LANGUAGE_HOST_IMPL_SYNC_C(void, sync_block_divergent, int Ordering,
88 ompx_sync_block(Ordering))
89 #undef _TGT_KERNEL_LANGUAGE_HOST_IMPL_SYNC_C
92 static inline uint64_t ompx_ballot_sync(uint64_t mask, int pred) {
96 /// ompx_shfl_down_sync_{i,f,l,d}
98 #define _TGT_KERNEL_LANGUAGE_SHFL_DOWN_SYNC_HOST_IMPL(TYPE, TY) \
99 static inline TYPE ompx_shfl_down_sync_##TY(uint64_t mask, TYPE var, \
100 unsigned delta, int width) { \
104 _TGT_KERNEL_LANGUAGE_SHFL_DOWN_SYNC_HOST_IMPL(int, i)
105 _TGT_KERNEL_LANGUAGE_SHFL_DOWN_SYNC_HOST_IMPL(float, f)
106 _TGT_KERNEL_LANGUAGE_SHFL_DOWN_SYNC_HOST_IMPL(long, l)
107 _TGT_KERNEL_LANGUAGE_SHFL_DOWN_SYNC_HOST_IMPL(double, d)
109 #undef _TGT_KERNEL_LANGUAGE_SHFL_DOWN_SYNC_HOST_IMPL
112 #pragma omp end declare variant
114 /// ompx_{sync_block}_{,divergent}
116 #define _TGT_KERNEL_LANGUAGE_DECL_SYNC_C(RETTY, NAME, ARGS) \
117 RETTY ompx_##NAME(ARGS);
119 _TGT_KERNEL_LANGUAGE_DECL_SYNC_C(void, sync_block, int Ordering)
120 _TGT_KERNEL_LANGUAGE_DECL_SYNC_C(void, sync_block_acq_rel, void)
121 _TGT_KERNEL_LANGUAGE_DECL_SYNC_C(void, sync_block_divergent, int Ordering)
122 #undef _TGT_KERNEL_LANGUAGE_DECL_SYNC_C
125 /// ompx_{thread,block}_{id,dim}_{x,y,z}
127 #define _TGT_KERNEL_LANGUAGE_DECL_GRID_C(NAME) \
128 int ompx_##NAME(int Dim); \
129 static inline int ompx_##NAME##_x() { return ompx_##NAME(ompx_dim_x); } \
130 static inline int ompx_##NAME##_y() { return ompx_##NAME(ompx_dim_y); } \
131 static inline int ompx_##NAME##_z() { return ompx_##NAME(ompx_dim_z); }
133 _TGT_KERNEL_LANGUAGE_DECL_GRID_C(thread_id)
134 _TGT_KERNEL_LANGUAGE_DECL_GRID_C(block_dim)
135 _TGT_KERNEL_LANGUAGE_DECL_GRID_C(block_id)
136 _TGT_KERNEL_LANGUAGE_DECL_GRID_C(grid_dim)
137 #undef _TGT_KERNEL_LANGUAGE_DECL_GRID_C
140 uint64_t ompx_ballot_sync(uint64_t mask, int pred);
142 /// ompx_shfl_down_sync_{i,f,l,d}
144 #define _TGT_KERNEL_LANGUAGE_SHFL_DOWN_SYNC(TYPE, TY) \
145 TYPE ompx_shfl_down_sync_##TY(uint64_t mask, TYPE var, unsigned delta, \
148 _TGT_KERNEL_LANGUAGE_SHFL_DOWN_SYNC(int, i)
149 _TGT_KERNEL_LANGUAGE_SHFL_DOWN_SYNC(float, f)
150 _TGT_KERNEL_LANGUAGE_SHFL_DOWN_SYNC(long, l)
151 _TGT_KERNEL_LANGUAGE_SHFL_DOWN_SYNC(double, d)
153 #undef _TGT_KERNEL_LANGUAGE_SHFL_DOWN_SYNC
171 relaxed = ompx_relaxed ,
172 aquire = ompx_aquire,
173 release = ompx_release,
174 acc_rel = ompx_acq_rel,
175 seq_cst = ompx_seq_cst,
178 /// ompx::{thread,block}_{id,dim}_{,x,y,z}
180 #define _TGT_KERNEL_LANGUAGE_HOST_IMPL_GRID_CXX(NAME) \
181 static inline int NAME(int Dim) noexcept { return ompx_##NAME(Dim); } \
182 static inline int NAME##_x() noexcept { return NAME(ompx_dim_x); } \
183 static inline int NAME##_y() noexcept { return NAME(ompx_dim_y); } \
184 static inline int NAME##_z() noexcept { return NAME(ompx_dim_z); }
186 _TGT_KERNEL_LANGUAGE_HOST_IMPL_GRID_CXX(thread_id)
187 _TGT_KERNEL_LANGUAGE_HOST_IMPL_GRID_CXX(block_dim)
188 _TGT_KERNEL_LANGUAGE_HOST_IMPL_GRID_CXX(block_id)
189 _TGT_KERNEL_LANGUAGE_HOST_IMPL_GRID_CXX(grid_dim)
190 #undef _TGT_KERNEL_LANGUAGE_HOST_IMPL_GRID_CXX
193 /// ompx_{sync_block}_{,divergent}
195 #define _TGT_KERNEL_LANGUAGE_HOST_IMPL_SYNC_CXX(RETTY, NAME, ARGS, CALL_ARGS) \
196 static inline RETTY NAME(ARGS) { \
197 return ompx_##NAME(CALL_ARGS); \
200 _TGT_KERNEL_LANGUAGE_HOST_IMPL_SYNC_CXX(void, sync_block, int Ordering = acc_rel,
202 _TGT_KERNEL_LANGUAGE_HOST_IMPL_SYNC_CXX(void, sync_block_divergent,
203 int Ordering = acc_rel, Ordering)
204 #undef _TGT_KERNEL_LANGUAGE_HOST_IMPL_SYNC_CXX
207 static inline uint64_t ballot_sync(uint64_t mask, int pred) {
208 return ompx_ballot_sync(mask, pred);
213 #define _TGT_KERNEL_LANGUAGE_SHFL_DOWN_SYNC(TYPE, TY) \
214 static inline TYPE shfl_down_sync(uint64_t mask, TYPE var, unsigned delta, \
215 int width = __WARP_SIZE) { \
216 return ompx_shfl_down_sync_##TY(mask, var, delta, width); \
219 _TGT_KERNEL_LANGUAGE_SHFL_DOWN_SYNC(int, i)
220 _TGT_KERNEL_LANGUAGE_SHFL_DOWN_SYNC(float, f)
221 _TGT_KERNEL_LANGUAGE_SHFL_DOWN_SYNC(long, l)
222 _TGT_KERNEL_LANGUAGE_SHFL_DOWN_SYNC(double, d)
224 #undef _TGT_KERNEL_LANGUAGE_SHFL_DOWN_SYNC
232 #endif /* __OMPX_H */