[sanitizer] Improve FreeBSD ASLR detection
[llvm-project.git] / openmp / libomptarget / deviceRTLs / common / omptarget.h
blob417c22d607d5e9d01a174a75f3adcc2dda19694c
1 //===---- omptarget.h - OpenMP GPU initialization ---------------- CUDA -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file contains the declarations of all library macros, types,
10 // and functions.
12 //===----------------------------------------------------------------------===//
14 #ifndef OMPTARGET_H
15 #define OMPTARGET_H
17 #include "common/allocator.h"
18 #include "common/debug.h" // debug
19 #include "common/state-queue.h"
20 #include "common/support.h"
21 #include "interface.h" // interfaces with omp, compiler, and user
22 #include "target_impl.h"
24 #define OMPTARGET_NVPTX_VERSION 1.1
26 // used by the library for the interface with the app
27 #define DISPATCH_FINISHED 0
28 #define DISPATCH_NOTFINISHED 1
30 // used by dynamic scheduling
31 #define FINISHED 0
32 #define NOT_FINISHED 1
33 #define LAST_CHUNK 2
35 #define BARRIER_COUNTER 0
36 #define ORDERED_COUNTER 1
38 // Worker slot type which is initialized with the default worker slot
39 // size of 4*32 bytes.
40 struct __kmpc_data_sharing_slot {
41 __kmpc_data_sharing_slot *Next;
42 __kmpc_data_sharing_slot *Prev;
43 void *PrevSlotStackPtr;
44 void *DataEnd;
45 char Data[DS_Worker_Warp_Slot_Size];
48 ////////////////////////////////////////////////////////////////////////////////
49 // task ICV and (implicit & explicit) task state
51 class omptarget_nvptx_TaskDescr {
52 public:
53 // methods for flags
54 INLINE omp_sched_t GetRuntimeSched() const;
55 INLINE void SetRuntimeSched(omp_sched_t sched);
56 INLINE int InParallelRegion() const { return items.flags & TaskDescr_InPar; }
57 INLINE int InL2OrHigherParallelRegion() const {
58 return items.flags & TaskDescr_InParL2P;
60 INLINE int IsParallelConstruct() const {
61 return items.flags & TaskDescr_IsParConstr;
63 INLINE int IsTaskConstruct() const { return !IsParallelConstruct(); }
64 // methods for other fields
65 INLINE uint16_t &ThreadId() { return items.threadId; }
66 INLINE uint64_t &RuntimeChunkSize() { return items.runtimeChunkSize; }
67 INLINE omptarget_nvptx_TaskDescr *GetPrevTaskDescr() const { return prev; }
68 INLINE void SetPrevTaskDescr(omptarget_nvptx_TaskDescr *taskDescr) {
69 prev = taskDescr;
71 // init & copy
72 INLINE void InitLevelZeroTaskDescr();
73 INLINE void InitLevelOneTaskDescr(omptarget_nvptx_TaskDescr *parentTaskDescr);
74 INLINE void Copy(omptarget_nvptx_TaskDescr *sourceTaskDescr);
75 INLINE void CopyData(omptarget_nvptx_TaskDescr *sourceTaskDescr);
76 INLINE void CopyParent(omptarget_nvptx_TaskDescr *parentTaskDescr);
77 INLINE void CopyForExplicitTask(omptarget_nvptx_TaskDescr *parentTaskDescr);
78 INLINE void CopyToWorkDescr(omptarget_nvptx_TaskDescr *masterTaskDescr);
79 INLINE void CopyFromWorkDescr(omptarget_nvptx_TaskDescr *workTaskDescr);
80 INLINE void CopyConvergentParent(omptarget_nvptx_TaskDescr *parentTaskDescr,
81 uint16_t tid, uint16_t tnum);
82 INLINE void SaveLoopData();
83 INLINE void RestoreLoopData() const;
85 private:
86 // bits for flags: (6 used, 2 free)
87 // 3 bits (SchedMask) for runtime schedule
88 // 1 bit (InPar) if this thread has encountered one or more parallel region
89 // 1 bit (IsParConstr) if ICV for a parallel region (false = explicit task)
90 // 1 bit (InParL2+) if this thread has encountered L2 or higher parallel
91 // region
92 static const uint8_t TaskDescr_SchedMask = (0x1 | 0x2 | 0x4);
93 static const uint8_t TaskDescr_InPar = 0x10;
94 static const uint8_t TaskDescr_IsParConstr = 0x20;
95 static const uint8_t TaskDescr_InParL2P = 0x40;
97 struct SavedLoopDescr_items {
98 int64_t loopUpperBound;
99 int64_t nextLowerBound;
100 int64_t chunk;
101 int64_t stride;
102 kmp_sched_t schedule;
103 } loopData;
105 struct TaskDescr_items {
106 uint8_t flags; // 6 bit used (see flag above)
107 uint8_t unused;
108 uint16_t threadId; // thread id
109 uint64_t runtimeChunkSize; // runtime chunk size
110 } items;
111 omptarget_nvptx_TaskDescr *prev;
114 // build on kmp
115 typedef struct omptarget_nvptx_ExplicitTaskDescr {
116 omptarget_nvptx_TaskDescr
117 taskDescr; // omptarget_nvptx task description (must be first)
118 kmp_TaskDescr kmpTaskDescr; // kmp task description (must be last)
119 } omptarget_nvptx_ExplicitTaskDescr;
121 ////////////////////////////////////////////////////////////////////////////////
122 // Descriptor of a parallel region (worksharing in general)
124 class omptarget_nvptx_WorkDescr {
126 public:
127 // access to data
128 INLINE omptarget_nvptx_TaskDescr *WorkTaskDescr() { return &masterTaskICV; }
130 private:
131 omptarget_nvptx_TaskDescr masterTaskICV;
134 ////////////////////////////////////////////////////////////////////////////////
136 class omptarget_nvptx_TeamDescr {
137 public:
138 // access to data
139 INLINE omptarget_nvptx_TaskDescr *LevelZeroTaskDescr() {
140 return &levelZeroTaskDescr;
142 INLINE omptarget_nvptx_WorkDescr &WorkDescr() {
143 return workDescrForActiveParallel;
146 // init
147 INLINE void InitTeamDescr();
149 INLINE __kmpc_data_sharing_slot *GetPreallocatedSlotAddr(int wid) {
150 worker_rootS[wid].DataEnd =
151 &worker_rootS[wid].Data[0] + DS_Worker_Warp_Slot_Size;
152 // We currently do not have a next slot.
153 worker_rootS[wid].Next = 0;
154 worker_rootS[wid].Prev = 0;
155 worker_rootS[wid].PrevSlotStackPtr = 0;
156 return (__kmpc_data_sharing_slot *)&worker_rootS[wid];
159 private:
160 omptarget_nvptx_TaskDescr
161 levelZeroTaskDescr; // icv for team master initial thread
162 omptarget_nvptx_WorkDescr
163 workDescrForActiveParallel; // one, ONLY for the active par
165 ALIGN(16)
166 __kmpc_data_sharing_slot worker_rootS[DS_Max_Warp_Number];
169 ////////////////////////////////////////////////////////////////////////////////
170 // thread private data (struct of arrays for better coalescing)
171 // tid refers here to the global thread id
172 // do not support multiple concurrent kernel a this time
173 class omptarget_nvptx_ThreadPrivateContext {
174 public:
175 // task
176 INLINE omptarget_nvptx_TaskDescr *Level1TaskDescr(int tid) {
177 return &levelOneTaskDescr[tid];
179 INLINE void SetTopLevelTaskDescr(int tid,
180 omptarget_nvptx_TaskDescr *taskICV) {
181 topTaskDescr[tid] = taskICV;
183 INLINE omptarget_nvptx_TaskDescr *GetTopLevelTaskDescr(int tid) const;
184 // schedule (for dispatch)
185 INLINE kmp_sched_t &ScheduleType(int tid) { return schedule[tid]; }
186 INLINE int64_t &Chunk(int tid) { return chunk[tid]; }
187 INLINE int64_t &LoopUpperBound(int tid) { return loopUpperBound[tid]; }
188 INLINE int64_t &NextLowerBound(int tid) { return nextLowerBound[tid]; }
189 INLINE int64_t &Stride(int tid) { return stride[tid]; }
191 INLINE omptarget_nvptx_TeamDescr &TeamContext() { return teamContext; }
193 INLINE void InitThreadPrivateContext(int tid);
194 INLINE uint64_t &Cnt() { return cnt; }
196 private:
197 // team context for this team
198 omptarget_nvptx_TeamDescr teamContext;
199 // task ICV for implicit threads in the only parallel region
200 omptarget_nvptx_TaskDescr levelOneTaskDescr[MAX_THREADS_PER_TEAM];
201 // pointer where to find the current task ICV (top of the stack)
202 omptarget_nvptx_TaskDescr *topTaskDescr[MAX_THREADS_PER_TEAM];
203 // schedule (for dispatch)
204 kmp_sched_t schedule[MAX_THREADS_PER_TEAM]; // remember schedule type for #for
205 int64_t chunk[MAX_THREADS_PER_TEAM];
206 int64_t loopUpperBound[MAX_THREADS_PER_TEAM];
207 // state for dispatch with dyn/guided OR static (never use both at a time)
208 int64_t nextLowerBound[MAX_THREADS_PER_TEAM];
209 int64_t stride[MAX_THREADS_PER_TEAM];
210 uint64_t cnt;
213 /// Memory manager for statically allocated memory.
214 class omptarget_nvptx_SimpleMemoryManager {
215 private:
216 struct MemDataTy {
217 volatile unsigned keys[OMP_STATE_COUNT];
218 } MemData[MAX_SM] ALIGN(128);
220 INLINE static uint32_t hash(unsigned key) {
221 return key & (OMP_STATE_COUNT - 1);
224 public:
225 INLINE void Release();
226 INLINE const void *Acquire(const void *buf, size_t size);
229 ////////////////////////////////////////////////////////////////////////////////
231 ////////////////////////////////////////////////////////////////////////////////
232 // global data tables
233 ////////////////////////////////////////////////////////////////////////////////
235 extern omptarget_nvptx_SimpleMemoryManager omptarget_nvptx_simpleMemoryManager;
236 extern uint32_t EXTERN_SHARED(usedMemIdx);
237 extern uint32_t EXTERN_SHARED(usedSlotIdx);
238 #if _OPENMP
239 extern uint8_t parallelLevel[MAX_THREADS_PER_TEAM / WARPSIZE];
240 #pragma omp allocate(parallelLevel) allocator(omp_pteam_mem_alloc)
241 #else
242 extern uint8_t EXTERN_SHARED(parallelLevel)[MAX_THREADS_PER_TEAM / WARPSIZE];
243 #endif
244 extern uint16_t EXTERN_SHARED(threadLimit);
245 extern uint16_t EXTERN_SHARED(threadsInTeam);
246 extern uint16_t EXTERN_SHARED(nThreads);
247 extern omptarget_nvptx_ThreadPrivateContext *
248 EXTERN_SHARED(omptarget_nvptx_threadPrivateContext);
250 extern int8_t EXTERN_SHARED(execution_param);
251 extern void *EXTERN_SHARED(ReductionScratchpadPtr);
253 ////////////////////////////////////////////////////////////////////////////////
254 // work function (outlined parallel/simd functions) and arguments.
255 // needed for L1 parallelism only.
256 ////////////////////////////////////////////////////////////////////////////////
258 typedef void *omptarget_nvptx_WorkFn;
259 extern omptarget_nvptx_WorkFn EXTERN_SHARED(omptarget_nvptx_workFn);
261 ////////////////////////////////////////////////////////////////////////////////
262 // get private data structures
263 ////////////////////////////////////////////////////////////////////////////////
265 INLINE omptarget_nvptx_TeamDescr &getMyTeamDescriptor();
266 INLINE omptarget_nvptx_WorkDescr &getMyWorkDescriptor();
267 INLINE omptarget_nvptx_TaskDescr *
268 getMyTopTaskDescriptor(bool isSPMDExecutionMode);
269 INLINE omptarget_nvptx_TaskDescr *getMyTopTaskDescriptor(int globalThreadId);
271 ////////////////////////////////////////////////////////////////////////////////
272 // inlined implementation
273 ////////////////////////////////////////////////////////////////////////////////
275 INLINE uint32_t __kmpc_impl_ffs(uint32_t x) { return __builtin_ffs(x); }
276 INLINE uint32_t __kmpc_impl_popc(uint32_t x) { return __builtin_popcount(x); }
277 INLINE uint32_t __kmpc_impl_ffs(uint64_t x) { return __builtin_ffsl(x); }
278 INLINE uint32_t __kmpc_impl_popc(uint64_t x) { return __builtin_popcountl(x); }
280 #include "common/omptargeti.h"
282 #endif