1 //===----- Workshare.cpp - OpenMP workshare implementation ------ C++ -*-===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 // This file contains the implementation of the KMPC interface
10 // for the loop construct plus other worksharing constructs that use the same
11 // interface as loops.
13 //===----------------------------------------------------------------------===//
16 #include "Interface.h"
19 #include "Synchronization.h"
26 struct DynamicScheduleTracker
{
28 int64_t LoopUpperBound
;
29 int64_t NextLowerBound
;
31 kmp_sched_t ScheduleType
;
32 DynamicScheduleTracker
*NextDST
;
37 // used by the library for the interface with the app
38 #define DISPATCH_FINISHED 0
39 #define DISPATCH_NOTFINISHED 1
41 // used by dynamic scheduling
43 #define NOT_FINISHED 1
46 #pragma omp declare target
48 // TODO: This variable is a hack inherited from the old runtime.
51 template <typename T
, typename ST
> struct omptarget_nvptx_LoopSupport
{
52 ////////////////////////////////////////////////////////////////////////////////
53 // Loop with static scheduling with chunk
55 // Generic implementation of OMP loop scheduling with static policy
56 /*! \brief Calculate initial bounds for static loop and stride
57 * @param[in] loc location in code of the call (not used here)
58 * @param[in] global_tid global thread id
59 * @param[in] schetype type of scheduling (see omptarget-nvptx.h)
60 * @param[in] plastiter pointer to last iteration
61 * @param[in,out] pointer to loop lower bound. it will contain value of
62 * lower bound of first chunk
63 * @param[in,out] pointer to loop upper bound. It will contain value of
64 * upper bound of first chunk
65 * @param[in,out] pointer to loop stride. It will contain value of stride
66 * between two successive chunks executed by the same thread
67 * @param[in] loop increment bump
68 * @param[in] chunk size
71 // helper function for static chunk
72 static void ForStaticChunk(int &last
, T
&lb
, T
&ub
, ST
&stride
, ST chunk
,
73 T entityId
, T numberOfEntities
) {
74 // each thread executes multiple chunks all of the same size, except
76 // distance between two successive chunks
77 stride
= numberOfEntities
* chunk
;
78 lb
= lb
+ entityId
* chunk
;
80 ub
= lb
+ chunk
- 1; // Clang uses i <= ub
81 // Say ub' is the begining of the last chunk. Then who ever has a
82 // lower bound plus a multiple of the increment equal to ub' is
84 T beginingLastChunk
= inputUb
- (inputUb
% chunk
);
85 last
= ((beginingLastChunk
- lb
) % stride
) == 0;
88 ////////////////////////////////////////////////////////////////////////////////
89 // Loop with static scheduling without chunk
91 // helper function for static no chunk
92 static void ForStaticNoChunk(int &last
, T
&lb
, T
&ub
, ST
&stride
, ST
&chunk
,
93 T entityId
, T numberOfEntities
) {
94 // No chunk size specified. Each thread or warp gets at most one
95 // chunk; chunks are all almost of equal size
96 T loopSize
= ub
- lb
+ 1;
98 chunk
= loopSize
/ numberOfEntities
;
99 T leftOver
= loopSize
- chunk
* numberOfEntities
;
101 if (entityId
< leftOver
) {
103 lb
= lb
+ entityId
* chunk
;
105 lb
= lb
+ entityId
* chunk
+ leftOver
;
109 ub
= lb
+ chunk
- 1; // Clang uses i <= ub
110 last
= lb
<= inputUb
&& inputUb
<= ub
;
111 stride
= loopSize
; // make sure we only do 1 chunk per warp
114 ////////////////////////////////////////////////////////////////////////////////
115 // Support for Static Init
117 static void for_static_init(int32_t, int32_t schedtype
,
118 int32_t *plastiter
, T
*plower
, T
*pupper
,
119 ST
*pstride
, ST chunk
, bool IsSPMDExecutionMode
) {
120 int32_t gtid
= omp_get_thread_num();
121 int numberOfActiveOMPThreads
= omp_get_num_threads();
123 // All warps that are in excess of the maximum requested, do
124 // not execute the loop
125 ASSERT0(LT_FUSSY
, gtid
< numberOfActiveOMPThreads
,
126 "current thread is not needed here; error");
132 ST stride
= *pstride
;
135 switch (SCHEDULE_WITHOUT_MODIFIERS(schedtype
)) {
136 case kmp_sched_static_chunk
: {
138 ForStaticChunk(lastiter
, lb
, ub
, stride
, chunk
, gtid
,
139 numberOfActiveOMPThreads
);
142 } // note: if chunk <=0, use nochunk
143 case kmp_sched_static_balanced_chunk
: {
145 // round up to make sure the chunk is enough to cover all iterations
146 T tripCount
= ub
- lb
+ 1; // +1 because ub is inclusive
147 T span
= (tripCount
+ numberOfActiveOMPThreads
- 1) /
148 numberOfActiveOMPThreads
;
149 // perform chunk adjustment
150 chunk
= (span
+ chunk
- 1) & ~(chunk
- 1);
152 ASSERT0(LT_FUSSY
, ub
>= lb
, "ub must be >= lb.");
154 ForStaticChunk(lastiter
, lb
, ub
, stride
, chunk
, gtid
,
155 numberOfActiveOMPThreads
);
160 } // note: if chunk <=0, use nochunk
161 case kmp_sched_static_nochunk
: {
162 ForStaticNoChunk(lastiter
, lb
, ub
, stride
, chunk
, gtid
,
163 numberOfActiveOMPThreads
);
166 case kmp_sched_distr_static_chunk
: {
168 ForStaticChunk(lastiter
, lb
, ub
, stride
, chunk
, omp_get_team_num(),
169 omp_get_num_teams());
171 } // note: if chunk <=0, use nochunk
173 case kmp_sched_distr_static_nochunk
: {
174 ForStaticNoChunk(lastiter
, lb
, ub
, stride
, chunk
, omp_get_team_num(),
175 omp_get_num_teams());
178 case kmp_sched_distr_static_chunk_sched_static_chunkone
: {
179 ForStaticChunk(lastiter
, lb
, ub
, stride
, chunk
,
180 numberOfActiveOMPThreads
* omp_get_team_num() + gtid
,
181 omp_get_num_teams() * numberOfActiveOMPThreads
);
185 // ASSERT(LT_FUSSY, 0, "unknown schedtype %d", (int)schedtype);
186 ForStaticChunk(lastiter
, lb
, ub
, stride
, chunk
, gtid
,
187 numberOfActiveOMPThreads
);
192 *plastiter
= lastiter
;
198 ////////////////////////////////////////////////////////////////////////////////
199 // Support for dispatch Init
201 static int OrderedSchedule(kmp_sched_t schedule
) {
202 return schedule
>= kmp_sched_ordered_first
&&
203 schedule
<= kmp_sched_ordered_last
;
206 static void dispatch_init(IdentTy
*loc
, int32_t threadId
,
207 kmp_sched_t schedule
, T lb
, T ub
, ST st
, ST chunk
,
208 DynamicScheduleTracker
*DST
) {
209 int tid
= mapping::getThreadIdInBlock();
210 T tnum
= omp_get_num_threads();
211 T tripCount
= ub
- lb
+ 1; // +1 because ub is inclusive
212 ASSERT0(LT_FUSSY
, threadId
< tnum
,
213 "current thread is not needed here; error");
215 /* Currently just ignore the monotonic and non-monotonic modifiers
216 * (the compiler isn't producing them * yet anyway).
217 * When it is we'll want to look at them somewhere here and use that
218 * information to add to our schedule choice. We shouldn't need to pass
219 * them on, they merely affect which schedule we can legally choose for
220 * various dynamic cases. (In particular, whether or not a stealing scheme
223 schedule
= SCHEDULE_WITHOUT_MODIFIERS(schedule
);
226 if (tnum
== 1 || tripCount
<= 1 || OrderedSchedule(schedule
)) {
227 if (OrderedSchedule(schedule
))
228 __kmpc_barrier(loc
, threadId
);
229 schedule
= kmp_sched_static_chunk
;
230 chunk
= tripCount
; // one thread gets the whole loop
231 } else if (schedule
== kmp_sched_runtime
) {
235 omp_get_schedule(&rtSched
, &ChunkInt
);
238 case omp_sched_static
: {
240 schedule
= kmp_sched_static_chunk
;
242 schedule
= kmp_sched_static_nochunk
;
245 case omp_sched_auto
: {
246 schedule
= kmp_sched_static_chunk
;
250 case omp_sched_dynamic
:
251 case omp_sched_guided
: {
252 schedule
= kmp_sched_dynamic
;
256 } else if (schedule
== kmp_sched_auto
) {
257 schedule
= kmp_sched_static_chunk
;
261 // schedule == kmp_sched_dynamic || schedule == kmp_sched_guided,
262 // "unknown schedule %d & chunk %lld\n", (int)schedule,
263 // (long long)chunk);
267 if (schedule
== kmp_sched_static_chunk
) {
268 ASSERT0(LT_FUSSY
, chunk
> 0, "bad chunk value");
270 DST
->ScheduleType
= schedule
;
272 DST
->LoopUpperBound
= ub
;
273 // compute static chunk
276 ForStaticChunk(lastiter
, lb
, ub
, stride
, chunk
, threadId
, tnum
);
277 // save computed params
279 DST
->NextLowerBound
= lb
;
280 DST
->Stride
= stride
;
281 } else if (schedule
== kmp_sched_static_balanced_chunk
) {
282 ASSERT0(LT_FUSSY
, chunk
> 0, "bad chunk value");
284 DST
->ScheduleType
= schedule
;
286 DST
->LoopUpperBound
= ub
;
287 // compute static chunk
290 // round up to make sure the chunk is enough to cover all iterations
291 T span
= (tripCount
+ tnum
- 1) / tnum
;
292 // perform chunk adjustment
293 chunk
= (span
+ chunk
- 1) & ~(chunk
- 1);
296 ForStaticChunk(lastiter
, lb
, ub
, stride
, chunk
, threadId
, tnum
);
297 ASSERT0(LT_FUSSY
, ub
>= lb
, "ub must be >= lb.");
300 // save computed params
302 DST
->NextLowerBound
= lb
;
303 DST
->Stride
= stride
;
304 } else if (schedule
== kmp_sched_static_nochunk
) {
305 ASSERT0(LT_FUSSY
, chunk
== 0, "bad chunk value");
307 DST
->ScheduleType
= schedule
;
309 DST
->LoopUpperBound
= ub
;
310 // compute static chunk
313 ForStaticNoChunk(lastiter
, lb
, ub
, stride
, chunk
, threadId
, tnum
);
314 // save computed params
316 DST
->NextLowerBound
= lb
;
317 DST
->Stride
= stride
;
318 } else if (schedule
== kmp_sched_dynamic
|| schedule
== kmp_sched_guided
) {
320 DST
->ScheduleType
= schedule
;
324 DST
->LoopUpperBound
= ub
;
325 DST
->NextLowerBound
= lb
;
326 __kmpc_barrier(loc
, threadId
);
329 fence::team(__ATOMIC_SEQ_CST
);
331 __kmpc_barrier(loc
, threadId
);
335 ////////////////////////////////////////////////////////////////////////////////
336 // Support for dispatch next
338 static uint64_t NextIter() {
339 __kmpc_impl_lanemask_t active
= mapping::activemask();
340 uint32_t leader
= utils::ffs(active
) - 1;
341 uint32_t change
= utils::popc(active
);
342 __kmpc_impl_lanemask_t lane_mask_lt
= mapping::lanemaskLT();
343 unsigned int rank
= utils::popc(active
& lane_mask_lt
);
346 warp_res
= atomic::add(&Cnt
, change
, __ATOMIC_SEQ_CST
);
348 warp_res
= utils::shuffle(active
, warp_res
, leader
);
349 return warp_res
+ rank
;
352 static int DynamicNextChunk(T
&lb
, T
&ub
, T chunkSize
, T loopLowerBound
,
355 lb
= loopLowerBound
+ N
* chunkSize
;
356 ub
= lb
+ chunkSize
- 1; // Clang uses i <= ub
359 // a. lb and ub < loopUpperBound --> NOT_FINISHED
360 // b. lb < loopUpperBound and ub >= loopUpperBound: last chunk -->
362 // c. lb and ub >= loopUpperBound: empty chunk --> FINISHED
364 if (lb
<= loopUpperBound
&& ub
< loopUpperBound
) {
368 if (lb
<= loopUpperBound
) {
372 // c. if we are here, we are in case 'c'
373 lb
= loopUpperBound
+ 2;
374 ub
= loopUpperBound
+ 1;
378 static int dispatch_next(IdentTy
*loc
, int32_t gtid
, int32_t *plast
,
379 T
*plower
, T
*pupper
, ST
*pstride
,
380 DynamicScheduleTracker
*DST
) {
381 // ID of a thread in its own warp
383 // automatically selects thread or warp ID based on selected implementation
384 ASSERT0(LT_FUSSY
, gtid
< omp_get_num_threads(),
385 "current thread is not needed here; error");
387 kmp_sched_t schedule
= DST
->ScheduleType
;
390 if (schedule
== kmp_sched_static_chunk
||
391 schedule
== kmp_sched_static_nochunk
) {
392 T myLb
= DST
->NextLowerBound
;
393 T ub
= DST
->LoopUpperBound
;
396 return DISPATCH_FINISHED
;
398 // not finished, save current bounds
399 ST chunk
= DST
->Chunk
;
401 T myUb
= myLb
+ chunk
- 1; // Clang uses i <= ub
405 *plast
= (int32_t)(myUb
== ub
);
407 // increment next lower bound by the stride
408 ST stride
= DST
->Stride
;
409 DST
->NextLowerBound
= myLb
+ stride
;
410 return DISPATCH_NOTFINISHED
;
413 schedule
== kmp_sched_dynamic
|| schedule
== kmp_sched_guided
,
416 int finished
= DynamicNextChunk(myLb
, myUb
, DST
->Chunk
, DST
->NextLowerBound
,
417 DST
->LoopUpperBound
);
419 if (finished
== FINISHED
)
420 return DISPATCH_FINISHED
;
422 // not finished (either not finished or last chunk)
423 *plast
= (int32_t)(finished
== LAST_CHUNK
);
428 return DISPATCH_NOTFINISHED
;
431 static void dispatch_fini() {
435 ////////////////////////////////////////////////////////////////////////////////
436 // end of template class that encapsulate all the helper functions
437 ////////////////////////////////////////////////////////////////////////////////
440 ////////////////////////////////////////////////////////////////////////////////
441 // KMP interface implementation (dyn loops)
442 ////////////////////////////////////////////////////////////////////////////////
444 // TODO: This is a stopgap. We probably want to expand the dispatch API to take
445 // an DST pointer which can then be allocated properly without malloc.
446 DynamicScheduleTracker
*THREAD_LOCAL(ThreadDSTPtr
);
448 // Create a new DST, link the current one, and define the new as current.
449 static DynamicScheduleTracker
*pushDST() {
450 DynamicScheduleTracker
*NewDST
= static_cast<DynamicScheduleTracker
*>(
451 memory::allocGlobal(sizeof(DynamicScheduleTracker
), "new DST"));
452 *NewDST
= DynamicScheduleTracker({0});
453 NewDST
->NextDST
= ThreadDSTPtr
;
454 ThreadDSTPtr
= NewDST
;
458 // Return the current DST.
459 static DynamicScheduleTracker
*peekDST() { return ThreadDSTPtr
; }
461 // Pop the current DST and restore the last one.
462 static void popDST() {
463 DynamicScheduleTracker
*OldDST
= ThreadDSTPtr
->NextDST
;
464 memory::freeGlobal(ThreadDSTPtr
, "remove DST");
465 ThreadDSTPtr
= OldDST
;
471 void __kmpc_dispatch_init_4(IdentTy
*loc
, int32_t tid
, int32_t schedule
,
472 int32_t lb
, int32_t ub
, int32_t st
, int32_t chunk
) {
473 FunctionTracingRAII();
474 DynamicScheduleTracker
*DST
= pushDST();
475 omptarget_nvptx_LoopSupport
<int32_t, int32_t>::dispatch_init(
476 loc
, tid
, (kmp_sched_t
)schedule
, lb
, ub
, st
, chunk
, DST
);
479 void __kmpc_dispatch_init_4u(IdentTy
*loc
, int32_t tid
, int32_t schedule
,
480 uint32_t lb
, uint32_t ub
, int32_t st
,
482 FunctionTracingRAII();
483 DynamicScheduleTracker
*DST
= pushDST();
484 omptarget_nvptx_LoopSupport
<uint32_t, int32_t>::dispatch_init(
485 loc
, tid
, (kmp_sched_t
)schedule
, lb
, ub
, st
, chunk
, DST
);
488 void __kmpc_dispatch_init_8(IdentTy
*loc
, int32_t tid
, int32_t schedule
,
489 int64_t lb
, int64_t ub
, int64_t st
, int64_t chunk
) {
490 FunctionTracingRAII();
491 DynamicScheduleTracker
*DST
= pushDST();
492 omptarget_nvptx_LoopSupport
<int64_t, int64_t>::dispatch_init(
493 loc
, tid
, (kmp_sched_t
)schedule
, lb
, ub
, st
, chunk
, DST
);
496 void __kmpc_dispatch_init_8u(IdentTy
*loc
, int32_t tid
, int32_t schedule
,
497 uint64_t lb
, uint64_t ub
, int64_t st
,
499 FunctionTracingRAII();
500 DynamicScheduleTracker
*DST
= pushDST();
501 omptarget_nvptx_LoopSupport
<uint64_t, int64_t>::dispatch_init(
502 loc
, tid
, (kmp_sched_t
)schedule
, lb
, ub
, st
, chunk
, DST
);
506 int __kmpc_dispatch_next_4(IdentTy
*loc
, int32_t tid
, int32_t *p_last
,
507 int32_t *p_lb
, int32_t *p_ub
, int32_t *p_st
) {
508 FunctionTracingRAII();
509 DynamicScheduleTracker
*DST
= peekDST();
510 return omptarget_nvptx_LoopSupport
<int32_t, int32_t>::dispatch_next(
511 loc
, tid
, p_last
, p_lb
, p_ub
, p_st
, DST
);
514 int __kmpc_dispatch_next_4u(IdentTy
*loc
, int32_t tid
, int32_t *p_last
,
515 uint32_t *p_lb
, uint32_t *p_ub
, int32_t *p_st
) {
516 FunctionTracingRAII();
517 DynamicScheduleTracker
*DST
= peekDST();
518 return omptarget_nvptx_LoopSupport
<uint32_t, int32_t>::dispatch_next(
519 loc
, tid
, p_last
, p_lb
, p_ub
, p_st
, DST
);
522 int __kmpc_dispatch_next_8(IdentTy
*loc
, int32_t tid
, int32_t *p_last
,
523 int64_t *p_lb
, int64_t *p_ub
, int64_t *p_st
) {
524 FunctionTracingRAII();
525 DynamicScheduleTracker
*DST
= peekDST();
526 return omptarget_nvptx_LoopSupport
<int64_t, int64_t>::dispatch_next(
527 loc
, tid
, p_last
, p_lb
, p_ub
, p_st
, DST
);
530 int __kmpc_dispatch_next_8u(IdentTy
*loc
, int32_t tid
, int32_t *p_last
,
531 uint64_t *p_lb
, uint64_t *p_ub
, int64_t *p_st
) {
532 FunctionTracingRAII();
533 DynamicScheduleTracker
*DST
= peekDST();
534 return omptarget_nvptx_LoopSupport
<uint64_t, int64_t>::dispatch_next(
535 loc
, tid
, p_last
, p_lb
, p_ub
, p_st
, DST
);
539 void __kmpc_dispatch_fini_4(IdentTy
*loc
, int32_t tid
) {
540 FunctionTracingRAII();
541 omptarget_nvptx_LoopSupport
<int32_t, int32_t>::dispatch_fini();
545 void __kmpc_dispatch_fini_4u(IdentTy
*loc
, int32_t tid
) {
546 FunctionTracingRAII();
547 omptarget_nvptx_LoopSupport
<uint32_t, int32_t>::dispatch_fini();
551 void __kmpc_dispatch_fini_8(IdentTy
*loc
, int32_t tid
) {
552 FunctionTracingRAII();
553 omptarget_nvptx_LoopSupport
<int64_t, int64_t>::dispatch_fini();
557 void __kmpc_dispatch_fini_8u(IdentTy
*loc
, int32_t tid
) {
558 FunctionTracingRAII();
559 omptarget_nvptx_LoopSupport
<uint64_t, int64_t>::dispatch_fini();
563 ////////////////////////////////////////////////////////////////////////////////
564 // KMP interface implementation (static loops)
565 ////////////////////////////////////////////////////////////////////////////////
567 void __kmpc_for_static_init_4(IdentTy
*loc
, int32_t global_tid
,
568 int32_t schedtype
, int32_t *plastiter
,
569 int32_t *plower
, int32_t *pupper
,
570 int32_t *pstride
, int32_t incr
, int32_t chunk
) {
571 FunctionTracingRAII();
572 omptarget_nvptx_LoopSupport
<int32_t, int32_t>::for_static_init(
573 global_tid
, schedtype
, plastiter
, plower
, pupper
, pstride
, chunk
,
574 mapping::isSPMDMode());
577 void __kmpc_for_static_init_4u(IdentTy
*loc
, int32_t global_tid
,
578 int32_t schedtype
, int32_t *plastiter
,
579 uint32_t *plower
, uint32_t *pupper
,
580 int32_t *pstride
, int32_t incr
, int32_t chunk
) {
581 FunctionTracingRAII();
582 omptarget_nvptx_LoopSupport
<uint32_t, int32_t>::for_static_init(
583 global_tid
, schedtype
, plastiter
, plower
, pupper
, pstride
, chunk
,
584 mapping::isSPMDMode());
587 void __kmpc_for_static_init_8(IdentTy
*loc
, int32_t global_tid
,
588 int32_t schedtype
, int32_t *plastiter
,
589 int64_t *plower
, int64_t *pupper
,
590 int64_t *pstride
, int64_t incr
, int64_t chunk
) {
591 FunctionTracingRAII();
592 omptarget_nvptx_LoopSupport
<int64_t, int64_t>::for_static_init(
593 global_tid
, schedtype
, plastiter
, plower
, pupper
, pstride
, chunk
,
594 mapping::isSPMDMode());
597 void __kmpc_for_static_init_8u(IdentTy
*loc
, int32_t global_tid
,
598 int32_t schedtype
, int32_t *plastiter
,
599 uint64_t *plower
, uint64_t *pupper
,
600 int64_t *pstride
, int64_t incr
, int64_t chunk
) {
601 FunctionTracingRAII();
602 omptarget_nvptx_LoopSupport
<uint64_t, int64_t>::for_static_init(
603 global_tid
, schedtype
, plastiter
, plower
, pupper
, pstride
, chunk
,
604 mapping::isSPMDMode());
607 void __kmpc_distribute_static_init_4(IdentTy
*loc
, int32_t global_tid
,
608 int32_t schedtype
, int32_t *plastiter
,
609 int32_t *plower
, int32_t *pupper
,
610 int32_t *pstride
, int32_t incr
,
612 FunctionTracingRAII();
613 omptarget_nvptx_LoopSupport
<int32_t, int32_t>::for_static_init(
614 global_tid
, schedtype
, plastiter
, plower
, pupper
, pstride
, chunk
,
615 mapping::isSPMDMode());
618 void __kmpc_distribute_static_init_4u(IdentTy
*loc
, int32_t global_tid
,
619 int32_t schedtype
, int32_t *plastiter
,
620 uint32_t *plower
, uint32_t *pupper
,
621 int32_t *pstride
, int32_t incr
,
623 FunctionTracingRAII();
624 omptarget_nvptx_LoopSupport
<uint32_t, int32_t>::for_static_init(
625 global_tid
, schedtype
, plastiter
, plower
, pupper
, pstride
, chunk
,
626 mapping::isSPMDMode());
629 void __kmpc_distribute_static_init_8(IdentTy
*loc
, int32_t global_tid
,
630 int32_t schedtype
, int32_t *plastiter
,
631 int64_t *plower
, int64_t *pupper
,
632 int64_t *pstride
, int64_t incr
,
634 FunctionTracingRAII();
635 omptarget_nvptx_LoopSupport
<int64_t, int64_t>::for_static_init(
636 global_tid
, schedtype
, plastiter
, plower
, pupper
, pstride
, chunk
,
637 mapping::isSPMDMode());
640 void __kmpc_distribute_static_init_8u(IdentTy
*loc
, int32_t global_tid
,
641 int32_t schedtype
, int32_t *plastiter
,
642 uint64_t *plower
, uint64_t *pupper
,
643 int64_t *pstride
, int64_t incr
,
645 FunctionTracingRAII();
646 omptarget_nvptx_LoopSupport
<uint64_t, int64_t>::for_static_init(
647 global_tid
, schedtype
, plastiter
, plower
, pupper
, pstride
, chunk
,
648 mapping::isSPMDMode());
651 void __kmpc_for_static_fini(IdentTy
*loc
, int32_t global_tid
) {
652 FunctionTracingRAII();
655 void __kmpc_distribute_static_fini(IdentTy
*loc
, int32_t global_tid
) {
656 FunctionTracingRAII();
660 #pragma omp end declare target