[sanitizer] Improve FreeBSD ASLR detection
[llvm-project.git] / openmp / libomptarget / deviceRTLs / common / omptargeti.h
blob93831c895273958044ff78956debc3d91d1053ef
1 //===---- omptargeti.h - OpenMP GPU initialization --------------- CUDA -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file contains the declarations of all library macros, types,
10 // and functions.
12 //===----------------------------------------------------------------------===//
14 ////////////////////////////////////////////////////////////////////////////////
15 // Task Descriptor
16 ////////////////////////////////////////////////////////////////////////////////
18 INLINE omp_sched_t omptarget_nvptx_TaskDescr::GetRuntimeSched() const {
19 // sched starts from 1..4; encode it as 0..3; so add 1 here
20 uint8_t rc = (items.flags & TaskDescr_SchedMask) + 1;
21 return (omp_sched_t)rc;
24 INLINE void omptarget_nvptx_TaskDescr::SetRuntimeSched(omp_sched_t sched) {
25 // sched starts from 1..4; encode it as 0..3; so sub 1 here
26 uint8_t val = ((uint8_t)sched) - 1;
27 // clear current sched
28 items.flags &= ~TaskDescr_SchedMask;
29 // set new sched
30 items.flags |= val;
33 INLINE void omptarget_nvptx_TaskDescr::InitLevelZeroTaskDescr() {
34 // slow method
35 // flag:
36 // default sched is static,
37 // dyn is off (unused now anyway, but may need to sample from host ?)
38 // not in parallel
40 items.flags = 0;
41 items.threadId = 0; // is master
42 items.runtimeChunkSize = 1; // preferred chunking statik with chunk 1
45 // This is called when all threads are started together in SPMD mode.
46 // OMP directives include target parallel, target distribute parallel for, etc.
47 INLINE void omptarget_nvptx_TaskDescr::InitLevelOneTaskDescr(
48 omptarget_nvptx_TaskDescr *parentTaskDescr) {
49 // slow method
50 // flag:
51 // default sched is static,
52 // dyn is off (unused now anyway, but may need to sample from host ?)
53 // in L1 parallel
55 items.flags = TaskDescr_InPar | TaskDescr_IsParConstr; // set flag to parallel
56 items.threadId =
57 __kmpc_get_hardware_thread_id_in_block(); // get ids from cuda (only
58 // called for 1st level)
59 items.runtimeChunkSize = 1; // preferred chunking statik with chunk 1
60 prev = parentTaskDescr;
63 INLINE void omptarget_nvptx_TaskDescr::CopyData(
64 omptarget_nvptx_TaskDescr *sourceTaskDescr) {
65 items = sourceTaskDescr->items;
68 INLINE void
69 omptarget_nvptx_TaskDescr::Copy(omptarget_nvptx_TaskDescr *sourceTaskDescr) {
70 CopyData(sourceTaskDescr);
71 prev = sourceTaskDescr->prev;
74 INLINE void omptarget_nvptx_TaskDescr::CopyParent(
75 omptarget_nvptx_TaskDescr *parentTaskDescr) {
76 CopyData(parentTaskDescr);
77 prev = parentTaskDescr;
80 INLINE void omptarget_nvptx_TaskDescr::CopyForExplicitTask(
81 omptarget_nvptx_TaskDescr *parentTaskDescr) {
82 CopyParent(parentTaskDescr);
83 items.flags = items.flags & ~TaskDescr_IsParConstr;
84 ASSERT0(LT_FUSSY, IsTaskConstruct(), "expected task");
87 INLINE void omptarget_nvptx_TaskDescr::CopyToWorkDescr(
88 omptarget_nvptx_TaskDescr *masterTaskDescr) {
89 CopyParent(masterTaskDescr);
90 // overwrite specific items;
91 items.flags |=
92 TaskDescr_InPar | TaskDescr_IsParConstr; // set flag to parallel
95 INLINE void omptarget_nvptx_TaskDescr::CopyFromWorkDescr(
96 omptarget_nvptx_TaskDescr *workTaskDescr) {
97 Copy(workTaskDescr);
99 // overwrite specific items;
101 // The threadID should be __kmpc_get_hardware_thread_id_in_block() %
102 // GetMasterThreadID(). This is so that the serial master (first lane in the
103 // master warp) gets a threadId of 0. However, we know that this function is
104 // always called in a parallel region where only workers are active. The
105 // serial master thread never enters this region. When a parallel region is
106 // executed serially, the threadId is set to 0 elsewhere and the
107 // kmpc_serialized_* functions are called, which never activate this region.
108 items.threadId =
109 __kmpc_get_hardware_thread_id_in_block(); // get ids from cuda (only
110 // called for 1st level)
113 INLINE void omptarget_nvptx_TaskDescr::CopyConvergentParent(
114 omptarget_nvptx_TaskDescr *parentTaskDescr, uint16_t tid, uint16_t tnum) {
115 CopyParent(parentTaskDescr);
116 items.flags |= TaskDescr_InParL2P; // In L2+ parallelism
117 items.threadId = tid;
120 INLINE void omptarget_nvptx_TaskDescr::SaveLoopData() {
121 loopData.loopUpperBound =
122 omptarget_nvptx_threadPrivateContext->LoopUpperBound(items.threadId);
123 loopData.nextLowerBound =
124 omptarget_nvptx_threadPrivateContext->NextLowerBound(items.threadId);
125 loopData.schedule =
126 omptarget_nvptx_threadPrivateContext->ScheduleType(items.threadId);
127 loopData.chunk = omptarget_nvptx_threadPrivateContext->Chunk(items.threadId);
128 loopData.stride =
129 omptarget_nvptx_threadPrivateContext->Stride(items.threadId);
132 INLINE void omptarget_nvptx_TaskDescr::RestoreLoopData() const {
133 omptarget_nvptx_threadPrivateContext->Chunk(items.threadId) = loopData.chunk;
134 omptarget_nvptx_threadPrivateContext->LoopUpperBound(items.threadId) =
135 loopData.loopUpperBound;
136 omptarget_nvptx_threadPrivateContext->NextLowerBound(items.threadId) =
137 loopData.nextLowerBound;
138 omptarget_nvptx_threadPrivateContext->Stride(items.threadId) =
139 loopData.stride;
140 omptarget_nvptx_threadPrivateContext->ScheduleType(items.threadId) =
141 loopData.schedule;
144 ////////////////////////////////////////////////////////////////////////////////
145 // Thread Private Context
146 ////////////////////////////////////////////////////////////////////////////////
148 INLINE omptarget_nvptx_TaskDescr *
149 omptarget_nvptx_ThreadPrivateContext::GetTopLevelTaskDescr(int tid) const {
150 ASSERT0(
151 LT_FUSSY, tid < MAX_THREADS_PER_TEAM,
152 "Getting top level, tid is larger than allocated data structure size");
153 return topTaskDescr[tid];
156 INLINE void
157 omptarget_nvptx_ThreadPrivateContext::InitThreadPrivateContext(int tid) {
158 // levelOneTaskDescr is init when starting the parallel region
159 // top task descr is NULL (team master version will be fixed separately)
160 topTaskDescr[tid] = NULL;
161 // the following don't need to be init here; they are init when using dyn
162 // sched
163 // current_Event, events_Number, chunk, num_Iterations, schedule
166 ////////////////////////////////////////////////////////////////////////////////
167 // Team Descriptor
168 ////////////////////////////////////////////////////////////////////////////////
170 INLINE void omptarget_nvptx_TeamDescr::InitTeamDescr() {
171 levelZeroTaskDescr.InitLevelZeroTaskDescr();
174 ////////////////////////////////////////////////////////////////////////////////
175 // Get private data structure for thread
176 ////////////////////////////////////////////////////////////////////////////////
178 // Utility routines for CUDA threads
179 INLINE omptarget_nvptx_TeamDescr &getMyTeamDescriptor() {
180 return omptarget_nvptx_threadPrivateContext->TeamContext();
183 INLINE omptarget_nvptx_WorkDescr &getMyWorkDescriptor() {
184 omptarget_nvptx_TeamDescr &currTeamDescr = getMyTeamDescriptor();
185 return currTeamDescr.WorkDescr();
188 INLINE omptarget_nvptx_TaskDescr *getMyTopTaskDescriptor(int threadId) {
189 return omptarget_nvptx_threadPrivateContext->GetTopLevelTaskDescr(threadId);
192 INLINE omptarget_nvptx_TaskDescr *
193 getMyTopTaskDescriptor(bool isSPMDExecutionMode) {
194 return getMyTopTaskDescriptor(GetLogicalThreadIdInBlock());
197 ////////////////////////////////////////////////////////////////////////////////
198 // Memory management runtime functions.
199 ////////////////////////////////////////////////////////////////////////////////
201 INLINE void omptarget_nvptx_SimpleMemoryManager::Release() {
202 ASSERT0(LT_FUSSY, usedSlotIdx < MAX_SM,
203 "SlotIdx is too big or uninitialized.");
204 ASSERT0(LT_FUSSY, usedMemIdx < OMP_STATE_COUNT,
205 "MemIdx is too big or uninitialized.");
206 MemDataTy &MD = MemData[usedSlotIdx];
207 __kmpc_atomic_exchange((unsigned *)&MD.keys[usedMemIdx], 0u);
210 INLINE const void *omptarget_nvptx_SimpleMemoryManager::Acquire(const void *buf,
211 size_t size) {
212 ASSERT0(LT_FUSSY, usedSlotIdx < MAX_SM,
213 "SlotIdx is too big or uninitialized.");
214 const unsigned sm = usedSlotIdx;
215 MemDataTy &MD = MemData[sm];
216 unsigned i = hash(GetBlockIdInKernel());
217 while (__kmpc_atomic_cas((unsigned *)&MD.keys[i], 0u, 1u) != 0) {
218 i = hash(i + 1);
220 usedSlotIdx = sm;
221 usedMemIdx = i;
222 return static_cast<const char *>(buf) + (sm * OMP_STATE_COUNT + i) * size;