[sanitizer] Improve FreeBSD ASLR detection
[llvm-project.git] / openmp / libomptarget / DeviceRTL / src / Workshare.cpp
blob24f3fee2aa5b48155fc2795739a72150d58ec843
1 //===----- Workshare.cpp - OpenMP workshare implementation ------ C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file contains the implementation of the KMPC interface
10 // for the loop construct plus other worksharing constructs that use the same
11 // interface as loops.
13 //===----------------------------------------------------------------------===//
15 #include "Debug.h"
16 #include "Interface.h"
17 #include "Mapping.h"
18 #include "State.h"
19 #include "Synchronization.h"
20 #include "Types.h"
21 #include "Utils.h"
23 using namespace _OMP;
25 // TODO:
26 struct DynamicScheduleTracker {
27 int64_t Chunk;
28 int64_t LoopUpperBound;
29 int64_t NextLowerBound;
30 int64_t Stride;
31 kmp_sched_t ScheduleType;
32 DynamicScheduleTracker *NextDST;
35 #define ASSERT0(...)
37 // used by the library for the interface with the app
38 #define DISPATCH_FINISHED 0
39 #define DISPATCH_NOTFINISHED 1
41 // used by dynamic scheduling
42 #define FINISHED 0
43 #define NOT_FINISHED 1
44 #define LAST_CHUNK 2
46 #pragma omp declare target
48 // TODO: This variable is a hack inherited from the old runtime.
49 uint64_t SHARED(Cnt);
51 template <typename T, typename ST> struct omptarget_nvptx_LoopSupport {
52 ////////////////////////////////////////////////////////////////////////////////
53 // Loop with static scheduling with chunk
55 // Generic implementation of OMP loop scheduling with static policy
56 /*! \brief Calculate initial bounds for static loop and stride
57 * @param[in] loc location in code of the call (not used here)
58 * @param[in] global_tid global thread id
59 * @param[in] schetype type of scheduling (see omptarget-nvptx.h)
60 * @param[in] plastiter pointer to last iteration
61 * @param[in,out] pointer to loop lower bound. it will contain value of
62 * lower bound of first chunk
63 * @param[in,out] pointer to loop upper bound. It will contain value of
64 * upper bound of first chunk
65 * @param[in,out] pointer to loop stride. It will contain value of stride
66 * between two successive chunks executed by the same thread
67 * @param[in] loop increment bump
68 * @param[in] chunk size
71 // helper function for static chunk
72 static void ForStaticChunk(int &last, T &lb, T &ub, ST &stride, ST chunk,
73 T entityId, T numberOfEntities) {
74 // each thread executes multiple chunks all of the same size, except
75 // the last one
76 // distance between two successive chunks
77 stride = numberOfEntities * chunk;
78 lb = lb + entityId * chunk;
79 T inputUb = ub;
80 ub = lb + chunk - 1; // Clang uses i <= ub
81 // Say ub' is the begining of the last chunk. Then who ever has a
82 // lower bound plus a multiple of the increment equal to ub' is
83 // the last one.
84 T beginingLastChunk = inputUb - (inputUb % chunk);
85 last = ((beginingLastChunk - lb) % stride) == 0;
88 ////////////////////////////////////////////////////////////////////////////////
89 // Loop with static scheduling without chunk
91 // helper function for static no chunk
92 static void ForStaticNoChunk(int &last, T &lb, T &ub, ST &stride, ST &chunk,
93 T entityId, T numberOfEntities) {
94 // No chunk size specified. Each thread or warp gets at most one
95 // chunk; chunks are all almost of equal size
96 T loopSize = ub - lb + 1;
98 chunk = loopSize / numberOfEntities;
99 T leftOver = loopSize - chunk * numberOfEntities;
101 if (entityId < leftOver) {
102 chunk++;
103 lb = lb + entityId * chunk;
104 } else {
105 lb = lb + entityId * chunk + leftOver;
108 T inputUb = ub;
109 ub = lb + chunk - 1; // Clang uses i <= ub
110 last = lb <= inputUb && inputUb <= ub;
111 stride = loopSize; // make sure we only do 1 chunk per warp
114 ////////////////////////////////////////////////////////////////////////////////
115 // Support for Static Init
117 static void for_static_init(int32_t, int32_t schedtype,
118 int32_t *plastiter, T *plower, T *pupper,
119 ST *pstride, ST chunk, bool IsSPMDExecutionMode) {
120 int32_t gtid = omp_get_thread_num();
121 int numberOfActiveOMPThreads = omp_get_num_threads();
123 // All warps that are in excess of the maximum requested, do
124 // not execute the loop
125 ASSERT0(LT_FUSSY, gtid < numberOfActiveOMPThreads,
126 "current thread is not needed here; error");
128 // copy
129 int lastiter = 0;
130 T lb = *plower;
131 T ub = *pupper;
132 ST stride = *pstride;
134 // init
135 switch (SCHEDULE_WITHOUT_MODIFIERS(schedtype)) {
136 case kmp_sched_static_chunk: {
137 if (chunk > 0) {
138 ForStaticChunk(lastiter, lb, ub, stride, chunk, gtid,
139 numberOfActiveOMPThreads);
140 break;
142 } // note: if chunk <=0, use nochunk
143 case kmp_sched_static_balanced_chunk: {
144 if (chunk > 0) {
145 // round up to make sure the chunk is enough to cover all iterations
146 T tripCount = ub - lb + 1; // +1 because ub is inclusive
147 T span = (tripCount + numberOfActiveOMPThreads - 1) /
148 numberOfActiveOMPThreads;
149 // perform chunk adjustment
150 chunk = (span + chunk - 1) & ~(chunk - 1);
152 ASSERT0(LT_FUSSY, ub >= lb, "ub must be >= lb.");
153 T oldUb = ub;
154 ForStaticChunk(lastiter, lb, ub, stride, chunk, gtid,
155 numberOfActiveOMPThreads);
156 if (ub > oldUb)
157 ub = oldUb;
158 break;
160 } // note: if chunk <=0, use nochunk
161 case kmp_sched_static_nochunk: {
162 ForStaticNoChunk(lastiter, lb, ub, stride, chunk, gtid,
163 numberOfActiveOMPThreads);
164 break;
166 case kmp_sched_distr_static_chunk: {
167 if (chunk > 0) {
168 ForStaticChunk(lastiter, lb, ub, stride, chunk, omp_get_team_num(),
169 omp_get_num_teams());
170 break;
171 } // note: if chunk <=0, use nochunk
173 case kmp_sched_distr_static_nochunk: {
174 ForStaticNoChunk(lastiter, lb, ub, stride, chunk, omp_get_team_num(),
175 omp_get_num_teams());
176 break;
178 case kmp_sched_distr_static_chunk_sched_static_chunkone: {
179 ForStaticChunk(lastiter, lb, ub, stride, chunk,
180 numberOfActiveOMPThreads * omp_get_team_num() + gtid,
181 omp_get_num_teams() * numberOfActiveOMPThreads);
182 break;
184 default: {
185 // ASSERT(LT_FUSSY, 0, "unknown schedtype %d", (int)schedtype);
186 ForStaticChunk(lastiter, lb, ub, stride, chunk, gtid,
187 numberOfActiveOMPThreads);
188 break;
191 // copy back
192 *plastiter = lastiter;
193 *plower = lb;
194 *pupper = ub;
195 *pstride = stride;
198 ////////////////////////////////////////////////////////////////////////////////
199 // Support for dispatch Init
201 static int OrderedSchedule(kmp_sched_t schedule) {
202 return schedule >= kmp_sched_ordered_first &&
203 schedule <= kmp_sched_ordered_last;
206 static void dispatch_init(IdentTy *loc, int32_t threadId,
207 kmp_sched_t schedule, T lb, T ub, ST st, ST chunk,
208 DynamicScheduleTracker *DST) {
209 int tid = mapping::getThreadIdInBlock();
210 T tnum = omp_get_num_threads();
211 T tripCount = ub - lb + 1; // +1 because ub is inclusive
212 ASSERT0(LT_FUSSY, threadId < tnum,
213 "current thread is not needed here; error");
215 /* Currently just ignore the monotonic and non-monotonic modifiers
216 * (the compiler isn't producing them * yet anyway).
217 * When it is we'll want to look at them somewhere here and use that
218 * information to add to our schedule choice. We shouldn't need to pass
219 * them on, they merely affect which schedule we can legally choose for
220 * various dynamic cases. (In particular, whether or not a stealing scheme
221 * is legal).
223 schedule = SCHEDULE_WITHOUT_MODIFIERS(schedule);
225 // Process schedule.
226 if (tnum == 1 || tripCount <= 1 || OrderedSchedule(schedule)) {
227 if (OrderedSchedule(schedule))
228 __kmpc_barrier(loc, threadId);
229 schedule = kmp_sched_static_chunk;
230 chunk = tripCount; // one thread gets the whole loop
231 } else if (schedule == kmp_sched_runtime) {
232 // process runtime
233 omp_sched_t rtSched;
234 int ChunkInt;
235 omp_get_schedule(&rtSched, &ChunkInt);
236 chunk = ChunkInt;
237 switch (rtSched) {
238 case omp_sched_static: {
239 if (chunk > 0)
240 schedule = kmp_sched_static_chunk;
241 else
242 schedule = kmp_sched_static_nochunk;
243 break;
245 case omp_sched_auto: {
246 schedule = kmp_sched_static_chunk;
247 chunk = 1;
248 break;
250 case omp_sched_dynamic:
251 case omp_sched_guided: {
252 schedule = kmp_sched_dynamic;
253 break;
256 } else if (schedule == kmp_sched_auto) {
257 schedule = kmp_sched_static_chunk;
258 chunk = 1;
259 } else {
260 // ASSERT(LT_FUSSY,
261 // schedule == kmp_sched_dynamic || schedule == kmp_sched_guided,
262 // "unknown schedule %d & chunk %lld\n", (int)schedule,
263 // (long long)chunk);
266 // init schedules
267 if (schedule == kmp_sched_static_chunk) {
268 ASSERT0(LT_FUSSY, chunk > 0, "bad chunk value");
269 // save sched state
270 DST->ScheduleType = schedule;
271 // save ub
272 DST->LoopUpperBound = ub;
273 // compute static chunk
274 ST stride;
275 int lastiter = 0;
276 ForStaticChunk(lastiter, lb, ub, stride, chunk, threadId, tnum);
277 // save computed params
278 DST->Chunk = chunk;
279 DST->NextLowerBound = lb;
280 DST->Stride = stride;
281 } else if (schedule == kmp_sched_static_balanced_chunk) {
282 ASSERT0(LT_FUSSY, chunk > 0, "bad chunk value");
283 // save sched state
284 DST->ScheduleType = schedule;
285 // save ub
286 DST->LoopUpperBound = ub;
287 // compute static chunk
288 ST stride;
289 int lastiter = 0;
290 // round up to make sure the chunk is enough to cover all iterations
291 T span = (tripCount + tnum - 1) / tnum;
292 // perform chunk adjustment
293 chunk = (span + chunk - 1) & ~(chunk - 1);
295 T oldUb = ub;
296 ForStaticChunk(lastiter, lb, ub, stride, chunk, threadId, tnum);
297 ASSERT0(LT_FUSSY, ub >= lb, "ub must be >= lb.");
298 if (ub > oldUb)
299 ub = oldUb;
300 // save computed params
301 DST->Chunk = chunk;
302 DST->NextLowerBound = lb;
303 DST->Stride = stride;
304 } else if (schedule == kmp_sched_static_nochunk) {
305 ASSERT0(LT_FUSSY, chunk == 0, "bad chunk value");
306 // save sched state
307 DST->ScheduleType = schedule;
308 // save ub
309 DST->LoopUpperBound = ub;
310 // compute static chunk
311 ST stride;
312 int lastiter = 0;
313 ForStaticNoChunk(lastiter, lb, ub, stride, chunk, threadId, tnum);
314 // save computed params
315 DST->Chunk = chunk;
316 DST->NextLowerBound = lb;
317 DST->Stride = stride;
318 } else if (schedule == kmp_sched_dynamic || schedule == kmp_sched_guided) {
319 // save data
320 DST->ScheduleType = schedule;
321 if (chunk < 1)
322 chunk = 1;
323 DST->Chunk = chunk;
324 DST->LoopUpperBound = ub;
325 DST->NextLowerBound = lb;
326 __kmpc_barrier(loc, threadId);
327 if (tid == 0) {
328 Cnt = 0;
329 fence::team(__ATOMIC_SEQ_CST);
331 __kmpc_barrier(loc, threadId);
335 ////////////////////////////////////////////////////////////////////////////////
336 // Support for dispatch next
338 static uint64_t NextIter() {
339 __kmpc_impl_lanemask_t active = mapping::activemask();
340 uint32_t leader = utils::ffs(active) - 1;
341 uint32_t change = utils::popc(active);
342 __kmpc_impl_lanemask_t lane_mask_lt = mapping::lanemaskLT();
343 unsigned int rank = utils::popc(active & lane_mask_lt);
344 uint64_t warp_res;
345 if (rank == 0) {
346 warp_res = atomic::add(&Cnt, change, __ATOMIC_SEQ_CST);
348 warp_res = utils::shuffle(active, warp_res, leader);
349 return warp_res + rank;
352 static int DynamicNextChunk(T &lb, T &ub, T chunkSize, T loopLowerBound,
353 T loopUpperBound) {
354 T N = NextIter();
355 lb = loopLowerBound + N * chunkSize;
356 ub = lb + chunkSize - 1; // Clang uses i <= ub
358 // 3 result cases:
359 // a. lb and ub < loopUpperBound --> NOT_FINISHED
360 // b. lb < loopUpperBound and ub >= loopUpperBound: last chunk -->
361 // NOT_FINISHED
362 // c. lb and ub >= loopUpperBound: empty chunk --> FINISHED
363 // a.
364 if (lb <= loopUpperBound && ub < loopUpperBound) {
365 return NOT_FINISHED;
367 // b.
368 if (lb <= loopUpperBound) {
369 ub = loopUpperBound;
370 return LAST_CHUNK;
372 // c. if we are here, we are in case 'c'
373 lb = loopUpperBound + 2;
374 ub = loopUpperBound + 1;
375 return FINISHED;
378 static int dispatch_next(IdentTy *loc, int32_t gtid, int32_t *plast,
379 T *plower, T *pupper, ST *pstride,
380 DynamicScheduleTracker *DST) {
381 // ID of a thread in its own warp
383 // automatically selects thread or warp ID based on selected implementation
384 ASSERT0(LT_FUSSY, gtid < omp_get_num_threads(),
385 "current thread is not needed here; error");
386 // retrieve schedule
387 kmp_sched_t schedule = DST->ScheduleType;
389 // xxx reduce to one
390 if (schedule == kmp_sched_static_chunk ||
391 schedule == kmp_sched_static_nochunk) {
392 T myLb = DST->NextLowerBound;
393 T ub = DST->LoopUpperBound;
394 // finished?
395 if (myLb > ub) {
396 return DISPATCH_FINISHED;
398 // not finished, save current bounds
399 ST chunk = DST->Chunk;
400 *plower = myLb;
401 T myUb = myLb + chunk - 1; // Clang uses i <= ub
402 if (myUb > ub)
403 myUb = ub;
404 *pupper = myUb;
405 *plast = (int32_t)(myUb == ub);
407 // increment next lower bound by the stride
408 ST stride = DST->Stride;
409 DST->NextLowerBound = myLb + stride;
410 return DISPATCH_NOTFINISHED;
412 ASSERT0(LT_FUSSY,
413 schedule == kmp_sched_dynamic || schedule == kmp_sched_guided,
414 "bad sched");
415 T myLb, myUb;
416 int finished = DynamicNextChunk(myLb, myUb, DST->Chunk, DST->NextLowerBound,
417 DST->LoopUpperBound);
419 if (finished == FINISHED)
420 return DISPATCH_FINISHED;
422 // not finished (either not finished or last chunk)
423 *plast = (int32_t)(finished == LAST_CHUNK);
424 *plower = myLb;
425 *pupper = myUb;
426 *pstride = 1;
428 return DISPATCH_NOTFINISHED;
431 static void dispatch_fini() {
432 // nothing
435 ////////////////////////////////////////////////////////////////////////////////
436 // end of template class that encapsulate all the helper functions
437 ////////////////////////////////////////////////////////////////////////////////
440 ////////////////////////////////////////////////////////////////////////////////
441 // KMP interface implementation (dyn loops)
442 ////////////////////////////////////////////////////////////////////////////////
444 // TODO: This is a stopgap. We probably want to expand the dispatch API to take
445 // an DST pointer which can then be allocated properly without malloc.
446 DynamicScheduleTracker *THREAD_LOCAL(ThreadDSTPtr);
448 // Create a new DST, link the current one, and define the new as current.
449 static DynamicScheduleTracker *pushDST() {
450 DynamicScheduleTracker *NewDST = static_cast<DynamicScheduleTracker *>(
451 memory::allocGlobal(sizeof(DynamicScheduleTracker), "new DST"));
452 *NewDST = DynamicScheduleTracker({0});
453 NewDST->NextDST = ThreadDSTPtr;
454 ThreadDSTPtr = NewDST;
455 return ThreadDSTPtr;
458 // Return the current DST.
459 static DynamicScheduleTracker *peekDST() { return ThreadDSTPtr; }
461 // Pop the current DST and restore the last one.
462 static void popDST() {
463 DynamicScheduleTracker *OldDST = ThreadDSTPtr->NextDST;
464 memory::freeGlobal(ThreadDSTPtr, "remove DST");
465 ThreadDSTPtr = OldDST;
468 extern "C" {
470 // init
471 void __kmpc_dispatch_init_4(IdentTy *loc, int32_t tid, int32_t schedule,
472 int32_t lb, int32_t ub, int32_t st, int32_t chunk) {
473 FunctionTracingRAII();
474 DynamicScheduleTracker *DST = pushDST();
475 omptarget_nvptx_LoopSupport<int32_t, int32_t>::dispatch_init(
476 loc, tid, (kmp_sched_t)schedule, lb, ub, st, chunk, DST);
479 void __kmpc_dispatch_init_4u(IdentTy *loc, int32_t tid, int32_t schedule,
480 uint32_t lb, uint32_t ub, int32_t st,
481 int32_t chunk) {
482 FunctionTracingRAII();
483 DynamicScheduleTracker *DST = pushDST();
484 omptarget_nvptx_LoopSupport<uint32_t, int32_t>::dispatch_init(
485 loc, tid, (kmp_sched_t)schedule, lb, ub, st, chunk, DST);
488 void __kmpc_dispatch_init_8(IdentTy *loc, int32_t tid, int32_t schedule,
489 int64_t lb, int64_t ub, int64_t st, int64_t chunk) {
490 FunctionTracingRAII();
491 DynamicScheduleTracker *DST = pushDST();
492 omptarget_nvptx_LoopSupport<int64_t, int64_t>::dispatch_init(
493 loc, tid, (kmp_sched_t)schedule, lb, ub, st, chunk, DST);
496 void __kmpc_dispatch_init_8u(IdentTy *loc, int32_t tid, int32_t schedule,
497 uint64_t lb, uint64_t ub, int64_t st,
498 int64_t chunk) {
499 FunctionTracingRAII();
500 DynamicScheduleTracker *DST = pushDST();
501 omptarget_nvptx_LoopSupport<uint64_t, int64_t>::dispatch_init(
502 loc, tid, (kmp_sched_t)schedule, lb, ub, st, chunk, DST);
505 // next
506 int __kmpc_dispatch_next_4(IdentTy *loc, int32_t tid, int32_t *p_last,
507 int32_t *p_lb, int32_t *p_ub, int32_t *p_st) {
508 FunctionTracingRAII();
509 DynamicScheduleTracker *DST = peekDST();
510 return omptarget_nvptx_LoopSupport<int32_t, int32_t>::dispatch_next(
511 loc, tid, p_last, p_lb, p_ub, p_st, DST);
514 int __kmpc_dispatch_next_4u(IdentTy *loc, int32_t tid, int32_t *p_last,
515 uint32_t *p_lb, uint32_t *p_ub, int32_t *p_st) {
516 FunctionTracingRAII();
517 DynamicScheduleTracker *DST = peekDST();
518 return omptarget_nvptx_LoopSupport<uint32_t, int32_t>::dispatch_next(
519 loc, tid, p_last, p_lb, p_ub, p_st, DST);
522 int __kmpc_dispatch_next_8(IdentTy *loc, int32_t tid, int32_t *p_last,
523 int64_t *p_lb, int64_t *p_ub, int64_t *p_st) {
524 FunctionTracingRAII();
525 DynamicScheduleTracker *DST = peekDST();
526 return omptarget_nvptx_LoopSupport<int64_t, int64_t>::dispatch_next(
527 loc, tid, p_last, p_lb, p_ub, p_st, DST);
530 int __kmpc_dispatch_next_8u(IdentTy *loc, int32_t tid, int32_t *p_last,
531 uint64_t *p_lb, uint64_t *p_ub, int64_t *p_st) {
532 FunctionTracingRAII();
533 DynamicScheduleTracker *DST = peekDST();
534 return omptarget_nvptx_LoopSupport<uint64_t, int64_t>::dispatch_next(
535 loc, tid, p_last, p_lb, p_ub, p_st, DST);
538 // fini
539 void __kmpc_dispatch_fini_4(IdentTy *loc, int32_t tid) {
540 FunctionTracingRAII();
541 omptarget_nvptx_LoopSupport<int32_t, int32_t>::dispatch_fini();
542 popDST();
545 void __kmpc_dispatch_fini_4u(IdentTy *loc, int32_t tid) {
546 FunctionTracingRAII();
547 omptarget_nvptx_LoopSupport<uint32_t, int32_t>::dispatch_fini();
548 popDST();
551 void __kmpc_dispatch_fini_8(IdentTy *loc, int32_t tid) {
552 FunctionTracingRAII();
553 omptarget_nvptx_LoopSupport<int64_t, int64_t>::dispatch_fini();
554 popDST();
557 void __kmpc_dispatch_fini_8u(IdentTy *loc, int32_t tid) {
558 FunctionTracingRAII();
559 omptarget_nvptx_LoopSupport<uint64_t, int64_t>::dispatch_fini();
560 popDST();
563 ////////////////////////////////////////////////////////////////////////////////
564 // KMP interface implementation (static loops)
565 ////////////////////////////////////////////////////////////////////////////////
567 void __kmpc_for_static_init_4(IdentTy *loc, int32_t global_tid,
568 int32_t schedtype, int32_t *plastiter,
569 int32_t *plower, int32_t *pupper,
570 int32_t *pstride, int32_t incr, int32_t chunk) {
571 FunctionTracingRAII();
572 omptarget_nvptx_LoopSupport<int32_t, int32_t>::for_static_init(
573 global_tid, schedtype, plastiter, plower, pupper, pstride, chunk,
574 mapping::isSPMDMode());
577 void __kmpc_for_static_init_4u(IdentTy *loc, int32_t global_tid,
578 int32_t schedtype, int32_t *plastiter,
579 uint32_t *plower, uint32_t *pupper,
580 int32_t *pstride, int32_t incr, int32_t chunk) {
581 FunctionTracingRAII();
582 omptarget_nvptx_LoopSupport<uint32_t, int32_t>::for_static_init(
583 global_tid, schedtype, plastiter, plower, pupper, pstride, chunk,
584 mapping::isSPMDMode());
587 void __kmpc_for_static_init_8(IdentTy *loc, int32_t global_tid,
588 int32_t schedtype, int32_t *plastiter,
589 int64_t *plower, int64_t *pupper,
590 int64_t *pstride, int64_t incr, int64_t chunk) {
591 FunctionTracingRAII();
592 omptarget_nvptx_LoopSupport<int64_t, int64_t>::for_static_init(
593 global_tid, schedtype, plastiter, plower, pupper, pstride, chunk,
594 mapping::isSPMDMode());
597 void __kmpc_for_static_init_8u(IdentTy *loc, int32_t global_tid,
598 int32_t schedtype, int32_t *plastiter,
599 uint64_t *plower, uint64_t *pupper,
600 int64_t *pstride, int64_t incr, int64_t chunk) {
601 FunctionTracingRAII();
602 omptarget_nvptx_LoopSupport<uint64_t, int64_t>::for_static_init(
603 global_tid, schedtype, plastiter, plower, pupper, pstride, chunk,
604 mapping::isSPMDMode());
607 void __kmpc_distribute_static_init_4(IdentTy *loc, int32_t global_tid,
608 int32_t schedtype, int32_t *plastiter,
609 int32_t *plower, int32_t *pupper,
610 int32_t *pstride, int32_t incr,
611 int32_t chunk) {
612 FunctionTracingRAII();
613 omptarget_nvptx_LoopSupport<int32_t, int32_t>::for_static_init(
614 global_tid, schedtype, plastiter, plower, pupper, pstride, chunk,
615 mapping::isSPMDMode());
618 void __kmpc_distribute_static_init_4u(IdentTy *loc, int32_t global_tid,
619 int32_t schedtype, int32_t *plastiter,
620 uint32_t *plower, uint32_t *pupper,
621 int32_t *pstride, int32_t incr,
622 int32_t chunk) {
623 FunctionTracingRAII();
624 omptarget_nvptx_LoopSupport<uint32_t, int32_t>::for_static_init(
625 global_tid, schedtype, plastiter, plower, pupper, pstride, chunk,
626 mapping::isSPMDMode());
629 void __kmpc_distribute_static_init_8(IdentTy *loc, int32_t global_tid,
630 int32_t schedtype, int32_t *plastiter,
631 int64_t *plower, int64_t *pupper,
632 int64_t *pstride, int64_t incr,
633 int64_t chunk) {
634 FunctionTracingRAII();
635 omptarget_nvptx_LoopSupport<int64_t, int64_t>::for_static_init(
636 global_tid, schedtype, plastiter, plower, pupper, pstride, chunk,
637 mapping::isSPMDMode());
640 void __kmpc_distribute_static_init_8u(IdentTy *loc, int32_t global_tid,
641 int32_t schedtype, int32_t *plastiter,
642 uint64_t *plower, uint64_t *pupper,
643 int64_t *pstride, int64_t incr,
644 int64_t chunk) {
645 FunctionTracingRAII();
646 omptarget_nvptx_LoopSupport<uint64_t, int64_t>::for_static_init(
647 global_tid, schedtype, plastiter, plower, pupper, pstride, chunk,
648 mapping::isSPMDMode());
651 void __kmpc_for_static_fini(IdentTy *loc, int32_t global_tid) {
652 FunctionTracingRAII();
655 void __kmpc_distribute_static_fini(IdentTy *loc, int32_t global_tid) {
656 FunctionTracingRAII();
660 #pragma omp end declare target