2 * kmp_sched.cpp -- static scheduling -- iteration initialization
5 //===----------------------------------------------------------------------===//
7 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
8 // See https://llvm.org/LICENSE.txt for license information.
9 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
11 //===----------------------------------------------------------------------===//
13 /* Static scheduling initialization.
15 NOTE: team->t.t_nproc is a constant inside of any dispatch loop, however
16 it may change values between parallel regions. __kmp_max_nth
17 is the largest value __kmp_nth may take, 1 is the smallest. */
20 #include "kmp_error.h"
23 #include "kmp_stats.h"
27 #include "ompt-specific.h"
31 //-------------------------------------------------------------------------
32 // template for debug prints specification ( d, u, lld, llu )
33 char const *traits_t
<int>::spec
= "d";
34 char const *traits_t
<unsigned int>::spec
= "u";
35 char const *traits_t
<long long>::spec
= "lld";
36 char const *traits_t
<unsigned long long>::spec
= "llu";
37 char const *traits_t
<long>::spec
= "ld";
38 //-------------------------------------------------------------------------
42 #define KMP_STATS_LOOP_END(stat) \
45 kmp_int64 u = (kmp_int64)(*pupper); \
46 kmp_int64 l = (kmp_int64)(*plower); \
47 kmp_int64 i = (kmp_int64)incr; \
50 } else if (i == -1) { \
53 t = (u - l) / i + 1; \
55 KMP_DEBUG_ASSERT(i != 0); \
56 t = (l - u) / (-i) + 1; \
58 KMP_COUNT_VALUE(stat, t); \
59 KMP_POP_PARTITIONED_TIMER(); \
62 #define KMP_STATS_LOOP_END(stat) /* Nothing */
65 #if USE_ITT_BUILD || defined KMP_DEBUG
66 static ident_t loc_stub
= {0, KMP_IDENT_KMPC
, 0, 0, ";unknown;unknown;0;0;;"};
67 static inline void check_loc(ident_t
*&loc
) {
69 loc
= &loc_stub
; // may need to report location info to ittnotify
74 static void __kmp_for_static_init(ident_t
*loc
, kmp_int32 global_tid
,
75 kmp_int32 schedtype
, kmp_int32
*plastiter
,
77 typename traits_t
<T
>::signed_t
*pstride
,
78 typename traits_t
<T
>::signed_t incr
,
79 typename traits_t
<T
>::signed_t chunk
80 #if OMPT_SUPPORT && OMPT_OPTIONAL
85 KMP_COUNT_BLOCK(OMP_LOOP_STATIC
);
86 KMP_PUSH_PARTITIONED_TIMER(OMP_loop_static
);
87 KMP_PUSH_PARTITIONED_TIMER(OMP_loop_static_scheduling
);
89 // Clear monotonic/nonmonotonic bits (ignore it)
90 schedtype
= SCHEDULE_WITHOUT_MODIFIERS(schedtype
);
92 typedef typename traits_t
<T
>::unsigned_t UT
;
93 typedef typename traits_t
<T
>::signed_t ST
;
94 /* this all has to be changed back to TID and such.. */
95 kmp_int32 gtid
= global_tid
;
100 __kmp_assert_valid_gtid(gtid
);
101 kmp_info_t
*th
= __kmp_threads
[gtid
];
103 #if OMPT_SUPPORT && OMPT_OPTIONAL
104 ompt_team_info_t
*team_info
= NULL
;
105 ompt_task_info_t
*task_info
= NULL
;
106 ompt_work_t ompt_work_type
= ompt_work_loop_static
;
108 static kmp_int8 warn
= 0;
110 if (ompt_enabled
.ompt_callback_work
|| ompt_enabled
.ompt_callback_dispatch
) {
111 // Only fully initialize variables needed by OMPT if OMPT is enabled.
112 team_info
= __ompt_get_teaminfo(0, NULL
);
113 task_info
= __ompt_get_task_info_object(0);
114 // Determine workshare type
116 if ((loc
->flags
& KMP_IDENT_WORK_LOOP
) != 0) {
117 ompt_work_type
= ompt_work_loop_static
;
118 } else if ((loc
->flags
& KMP_IDENT_WORK_SECTIONS
) != 0) {
119 ompt_work_type
= ompt_work_sections
;
120 } else if ((loc
->flags
& KMP_IDENT_WORK_DISTRIBUTE
) != 0) {
121 ompt_work_type
= ompt_work_distribute
;
124 KMP_COMPARE_AND_STORE_ACQ8(&warn
, (kmp_int8
)0, (kmp_int8
)1);
126 KMP_WARNING(OmptOutdatedWorkshare
);
128 KMP_DEBUG_ASSERT(ompt_work_type
);
133 KMP_DEBUG_ASSERT(plastiter
&& plower
&& pupper
&& pstride
);
134 KE_TRACE(10, ("__kmpc_for_static_init called (%d)\n", global_tid
));
138 // create format specifiers before the debug output
139 buff
= __kmp_str_format(
140 "__kmpc_for_static_init: T#%%d sched=%%d liter=%%d iter=(%%%s,"
141 " %%%s, %%%s) incr=%%%s chunk=%%%s signed?<%s>\n",
142 traits_t
<T
>::spec
, traits_t
<T
>::spec
, traits_t
<ST
>::spec
,
143 traits_t
<ST
>::spec
, traits_t
<ST
>::spec
, traits_t
<T
>::spec
);
144 KD_TRACE(100, (buff
, global_tid
, schedtype
, *plastiter
, *plower
, *pupper
,
145 *pstride
, incr
, chunk
));
146 __kmp_str_free(&buff
);
150 if (__kmp_env_consistency_check
) {
151 __kmp_push_workshare(global_tid
, ct_pdo
, loc
);
153 __kmp_error_construct(kmp_i18n_msg_CnsLoopIncrZeroProhibited
, ct_pdo
,
157 /* special handling for zero-trip loops */
158 if (incr
> 0 ? (*pupper
< *plower
) : (*plower
< *pupper
)) {
159 if (plastiter
!= NULL
)
161 /* leave pupper and plower set to entire iteration space */
162 *pstride
= incr
; /* value should never be used */
163 // *plower = *pupper - incr;
164 // let compiler bypass the illegal loop (like for(i=1;i<10;i--))
165 // THE LINE COMMENTED ABOVE CAUSED shape2F/h_tests_1.f TO HAVE A FAILURE
166 // ON A ZERO-TRIP LOOP (lower=1, upper=0,stride=1) - JPH June 23, 2009.
170 // create format specifiers before the debug output
171 buff
= __kmp_str_format("__kmpc_for_static_init:(ZERO TRIP) liter=%%d "
172 "lower=%%%s upper=%%%s stride = %%%s "
173 "signed?<%s>, loc = %%s\n",
174 traits_t
<T
>::spec
, traits_t
<T
>::spec
,
175 traits_t
<ST
>::spec
, traits_t
<T
>::spec
);
178 (buff
, *plastiter
, *plower
, *pupper
, *pstride
, loc
->psource
));
179 __kmp_str_free(&buff
);
182 KE_TRACE(10, ("__kmpc_for_static_init: T#%d return\n", global_tid
));
184 #if OMPT_SUPPORT && OMPT_OPTIONAL
185 if (ompt_enabled
.ompt_callback_work
) {
186 ompt_callbacks
.ompt_callback(ompt_callback_work
)(
187 ompt_work_type
, ompt_scope_begin
, &(team_info
->parallel_data
),
188 &(task_info
->task_data
), 0, codeptr
);
191 KMP_STATS_LOOP_END(OMP_loop_static_iterations
);
195 // Although there are schedule enumerations above kmp_ord_upper which are not
196 // schedules for "distribute", the only ones which are useful are dynamic, so
197 // cannot be seen here, since this codepath is only executed for static
199 if (schedtype
> kmp_ord_upper
) {
200 // we are in DISTRIBUTE construct
201 schedtype
+= kmp_sch_static
-
202 kmp_distribute_static
; // AC: convert to usual schedule type
203 if (th
->th
.th_team
->t
.t_serialized
> 1) {
205 team
= th
->th
.th_team
;
207 tid
= th
->th
.th_team
->t
.t_master_tid
;
208 team
= th
->th
.th_team
->t
.t_parent
;
211 tid
= __kmp_tid_from_gtid(global_tid
);
212 team
= th
->th
.th_team
;
215 /* determine if "for" loop is an active worksharing construct */
216 if (team
->t
.t_serialized
) {
217 /* serialized parallel, each thread executes whole iteration space */
218 if (plastiter
!= NULL
)
220 /* leave pupper and plower set to entire iteration space */
222 (incr
> 0) ? (*pupper
- *plower
+ 1) : (-(*plower
- *pupper
+ 1));
227 // create format specifiers before the debug output
228 buff
= __kmp_str_format("__kmpc_for_static_init: (serial) liter=%%d "
229 "lower=%%%s upper=%%%s stride = %%%s\n",
230 traits_t
<T
>::spec
, traits_t
<T
>::spec
,
232 KD_TRACE(100, (buff
, *plastiter
, *plower
, *pupper
, *pstride
));
233 __kmp_str_free(&buff
);
236 KE_TRACE(10, ("__kmpc_for_static_init: T#%d return\n", global_tid
));
238 #if OMPT_SUPPORT && OMPT_OPTIONAL
239 if (ompt_enabled
.ompt_callback_work
) {
240 ompt_callbacks
.ompt_callback(ompt_callback_work
)(
241 ompt_work_type
, ompt_scope_begin
, &(team_info
->parallel_data
),
242 &(task_info
->task_data
), *pstride
, codeptr
);
245 KMP_STATS_LOOP_END(OMP_loop_static_iterations
);
248 nth
= team
->t
.t_nproc
;
250 if (plastiter
!= NULL
)
253 (incr
> 0) ? (*pupper
- *plower
+ 1) : (-(*plower
- *pupper
+ 1));
257 // create format specifiers before the debug output
258 buff
= __kmp_str_format("__kmpc_for_static_init: (serial) liter=%%d "
259 "lower=%%%s upper=%%%s stride = %%%s\n",
260 traits_t
<T
>::spec
, traits_t
<T
>::spec
,
262 KD_TRACE(100, (buff
, *plastiter
, *plower
, *pupper
, *pstride
));
263 __kmp_str_free(&buff
);
266 KE_TRACE(10, ("__kmpc_for_static_init: T#%d return\n", global_tid
));
268 #if OMPT_SUPPORT && OMPT_OPTIONAL
269 if (ompt_enabled
.ompt_callback_work
) {
270 ompt_callbacks
.ompt_callback(ompt_callback_work
)(
271 ompt_work_type
, ompt_scope_begin
, &(team_info
->parallel_data
),
272 &(task_info
->task_data
), *pstride
, codeptr
);
275 KMP_STATS_LOOP_END(OMP_loop_static_iterations
);
279 /* compute trip count */
281 trip_count
= *pupper
- *plower
+ 1;
282 } else if (incr
== -1) {
283 trip_count
= *plower
- *pupper
+ 1;
284 } else if (incr
> 0) {
285 // upper-lower can exceed the limit of signed type
286 trip_count
= (UT
)(*pupper
- *plower
) / incr
+ 1;
288 KMP_DEBUG_ASSERT(incr
!= 0);
289 trip_count
= (UT
)(*plower
- *pupper
) / (-incr
) + 1;
292 #if KMP_STATS_ENABLED
293 if (KMP_MASTER_GTID(gtid
)) {
294 KMP_COUNT_VALUE(OMP_loop_static_total_iterations
, trip_count
);
298 if (__kmp_env_consistency_check
) {
299 /* tripcount overflow? */
300 if (trip_count
== 0 && *pupper
!= *plower
) {
301 __kmp_error_construct(kmp_i18n_msg_CnsIterationRangeTooLarge
, ct_pdo
,
306 /* compute remaining parameters */
308 case kmp_sch_static
: {
309 if (trip_count
< nth
) {
311 __kmp_static
== kmp_sch_static_greedy
||
313 kmp_sch_static_balanced
); // Unknown static scheduling type.
314 if (tid
< trip_count
) {
315 *pupper
= *plower
= *plower
+ tid
* incr
;
317 // set bounds so non-active threads execute no iterations
318 *plower
= *pupper
+ (incr
> 0 ? 1 : -1);
320 if (plastiter
!= NULL
)
321 *plastiter
= (tid
== trip_count
- 1);
323 KMP_DEBUG_ASSERT(nth
!= 0);
324 if (__kmp_static
== kmp_sch_static_balanced
) {
325 UT small_chunk
= trip_count
/ nth
;
326 UT extras
= trip_count
% nth
;
327 *plower
+= incr
* (tid
* small_chunk
+ (tid
< extras
? tid
: extras
));
328 *pupper
= *plower
+ small_chunk
* incr
- (tid
< extras
? 0 : incr
);
329 if (plastiter
!= NULL
)
330 *plastiter
= (tid
== nth
- 1);
332 T big_chunk_inc_count
=
333 (trip_count
/ nth
+ ((trip_count
% nth
) ? 1 : 0)) * incr
;
334 T old_upper
= *pupper
;
336 KMP_DEBUG_ASSERT(__kmp_static
== kmp_sch_static_greedy
);
337 // Unknown static scheduling type.
339 *plower
+= tid
* big_chunk_inc_count
;
340 *pupper
= *plower
+ big_chunk_inc_count
- incr
;
342 if (*pupper
< *plower
)
343 *pupper
= traits_t
<T
>::max_value
;
344 if (plastiter
!= NULL
)
345 *plastiter
= *plower
<= old_upper
&& *pupper
> old_upper
- incr
;
346 if (*pupper
> old_upper
)
347 *pupper
= old_upper
; // tracker C73258
349 if (*pupper
> *plower
)
350 *pupper
= traits_t
<T
>::min_value
;
351 if (plastiter
!= NULL
)
352 *plastiter
= *plower
>= old_upper
&& *pupper
< old_upper
- incr
;
353 if (*pupper
< old_upper
)
354 *pupper
= old_upper
; // tracker C73258
358 *pstride
= trip_count
;
361 case kmp_sch_static_chunked
: {
364 KMP_DEBUG_ASSERT(chunk
!= 0);
367 else if ((UT
)chunk
> trip_count
)
369 nchunks
= (trip_count
) / (UT
)chunk
+ (trip_count
% (UT
)chunk
? 1 : 0);
372 *pstride
= span
* nchunks
;
374 *plower
= *plower
+ (span
* tid
);
375 *pupper
= *plower
+ span
- incr
;
377 *plower
= *pupper
+ (incr
> 0 ? 1 : -1);
380 *pstride
= span
* nth
;
381 *plower
= *plower
+ (span
* tid
);
382 *pupper
= *plower
+ span
- incr
;
384 if (plastiter
!= NULL
)
385 *plastiter
= (tid
== (nchunks
- 1) % nth
);
388 case kmp_sch_static_balanced_chunked
: {
389 T old_upper
= *pupper
;
390 KMP_DEBUG_ASSERT(nth
!= 0);
391 // round up to make sure the chunk is enough to cover all iterations
392 UT span
= (trip_count
+ nth
- 1) / nth
;
394 // perform chunk adjustment
395 chunk
= (span
+ chunk
- 1) & ~(chunk
- 1);
398 *plower
= *plower
+ (span
* tid
);
399 *pupper
= *plower
+ span
- incr
;
401 if (*pupper
> old_upper
)
403 } else if (*pupper
< old_upper
)
406 if (plastiter
!= NULL
) {
407 KMP_DEBUG_ASSERT(chunk
!= 0);
408 *plastiter
= (tid
== ((trip_count
- 1) / (UT
)chunk
));
413 KMP_ASSERT2(0, "__kmpc_for_static_init: unknown scheduling type");
418 // Report loop metadata
419 if (KMP_MASTER_TID(tid
) && __itt_metadata_add_ptr
&&
420 __kmp_forkjoin_frames_mode
== 3 && th
->th
.th_teams_microtask
== NULL
&&
421 team
->t
.t_active_level
== 1) {
422 kmp_uint64 cur_chunk
= chunk
;
424 // Calculate chunk in case it was not specified; it is specified for
425 // kmp_sch_static_chunked
426 if (schedtype
== kmp_sch_static
) {
427 KMP_DEBUG_ASSERT(nth
!= 0);
428 cur_chunk
= trip_count
/ nth
+ ((trip_count
% nth
) ? 1 : 0);
430 // 0 - "static" schedule
431 __kmp_itt_metadata_loop(loc
, 0, trip_count
, cur_chunk
);
437 // create format specifiers before the debug output
438 buff
= __kmp_str_format("__kmpc_for_static_init: liter=%%d lower=%%%s "
439 "upper=%%%s stride = %%%s signed?<%s>\n",
440 traits_t
<T
>::spec
, traits_t
<T
>::spec
,
441 traits_t
<ST
>::spec
, traits_t
<T
>::spec
);
442 KD_TRACE(100, (buff
, *plastiter
, *plower
, *pupper
, *pstride
));
443 __kmp_str_free(&buff
);
446 KE_TRACE(10, ("__kmpc_for_static_init: T#%d return\n", global_tid
));
448 #if OMPT_SUPPORT && OMPT_OPTIONAL
449 if (ompt_enabled
.ompt_callback_work
) {
450 ompt_callbacks
.ompt_callback(ompt_callback_work
)(
451 ompt_work_type
, ompt_scope_begin
, &(team_info
->parallel_data
),
452 &(task_info
->task_data
), trip_count
, codeptr
);
454 if (ompt_enabled
.ompt_callback_dispatch
) {
455 ompt_dispatch_t dispatch_type
;
456 ompt_data_t instance
= ompt_data_none
;
457 ompt_dispatch_chunk_t dispatch_chunk
;
458 if (ompt_work_type
== ompt_work_sections
) {
459 dispatch_type
= ompt_dispatch_section
;
460 instance
.ptr
= codeptr
;
462 OMPT_GET_DISPATCH_CHUNK(dispatch_chunk
, *plower
, *pupper
, incr
);
463 dispatch_type
= (ompt_work_type
== ompt_work_distribute
)
464 ? ompt_dispatch_distribute_chunk
465 : ompt_dispatch_ws_loop_chunk
;
466 instance
.ptr
= &dispatch_chunk
;
468 ompt_callbacks
.ompt_callback(ompt_callback_dispatch
)(
469 &(team_info
->parallel_data
), &(task_info
->task_data
), dispatch_type
,
474 KMP_STATS_LOOP_END(OMP_loop_static_iterations
);
478 template <typename T
>
479 static void __kmp_dist_for_static_init(ident_t
*loc
, kmp_int32 gtid
,
480 kmp_int32 schedule
, kmp_int32
*plastiter
,
481 T
*plower
, T
*pupper
, T
*pupperDist
,
482 typename traits_t
<T
>::signed_t
*pstride
,
483 typename traits_t
<T
>::signed_t incr
,
484 typename traits_t
<T
>::signed_t chunk
485 #if OMPT_SUPPORT && OMPT_OPTIONAL
490 KMP_COUNT_BLOCK(OMP_DISTRIBUTE
);
491 KMP_PUSH_PARTITIONED_TIMER(OMP_distribute
);
492 KMP_PUSH_PARTITIONED_TIMER(OMP_distribute_scheduling
);
493 typedef typename traits_t
<T
>::unsigned_t UT
;
494 typedef typename traits_t
<T
>::signed_t ST
;
503 KMP_DEBUG_ASSERT(plastiter
&& plower
&& pupper
&& pupperDist
&& pstride
);
504 KE_TRACE(10, ("__kmpc_dist_for_static_init called (%d)\n", gtid
));
505 __kmp_assert_valid_gtid(gtid
);
509 // create format specifiers before the debug output
510 buff
= __kmp_str_format(
511 "__kmpc_dist_for_static_init: T#%%d schedLoop=%%d liter=%%d "
512 "iter=(%%%s, %%%s, %%%s) chunk=%%%s signed?<%s>\n",
513 traits_t
<T
>::spec
, traits_t
<T
>::spec
, traits_t
<ST
>::spec
,
514 traits_t
<ST
>::spec
, traits_t
<T
>::spec
);
516 (buff
, gtid
, schedule
, *plastiter
, *plower
, *pupper
, incr
, chunk
));
517 __kmp_str_free(&buff
);
521 if (__kmp_env_consistency_check
) {
522 __kmp_push_workshare(gtid
, ct_pdo
, loc
);
524 __kmp_error_construct(kmp_i18n_msg_CnsLoopIncrZeroProhibited
, ct_pdo
,
527 if (incr
> 0 ? (*pupper
< *plower
) : (*plower
< *pupper
)) {
528 // The loop is illegal.
529 // Some zero-trip loops maintained by compiler, e.g.:
530 // for(i=10;i<0;++i) // lower >= upper - run-time check
531 // for(i=0;i>10;--i) // lower <= upper - run-time check
532 // for(i=0;i>10;++i) // incr > 0 - compile-time check
533 // for(i=10;i<0;--i) // incr < 0 - compile-time check
534 // Compiler does not check the following illegal loops:
535 // for(i=0;i<10;i+=incr) // where incr<0
536 // for(i=10;i>0;i-=incr) // where incr<0
537 __kmp_error_construct(kmp_i18n_msg_CnsLoopIncrIllegal
, ct_pdo
, loc
);
540 tid
= __kmp_tid_from_gtid(gtid
);
541 th
= __kmp_threads
[gtid
];
542 nth
= th
->th
.th_team_nproc
;
543 team
= th
->th
.th_team
;
544 KMP_DEBUG_ASSERT(th
->th
.th_teams_microtask
); // we are in the teams construct
545 nteams
= th
->th
.th_teams_size
.nteams
;
546 team_id
= team
->t
.t_master_tid
;
547 KMP_DEBUG_ASSERT(nteams
== (kmp_uint32
)team
->t
.t_parent
->t
.t_nproc
);
549 // compute global trip count
551 trip_count
= *pupper
- *plower
+ 1;
552 } else if (incr
== -1) {
553 trip_count
= *plower
- *pupper
+ 1;
554 } else if (incr
> 0) {
555 // upper-lower can exceed the limit of signed type
556 trip_count
= (UT
)(*pupper
- *plower
) / incr
+ 1;
558 KMP_DEBUG_ASSERT(incr
!= 0);
559 trip_count
= (UT
)(*plower
- *pupper
) / (-incr
) + 1;
562 *pstride
= *pupper
- *plower
; // just in case (can be unused)
563 if (trip_count
<= nteams
) {
565 __kmp_static
== kmp_sch_static_greedy
||
567 kmp_sch_static_balanced
); // Unknown static scheduling type.
568 // only primary threads of some teams get single iteration, other threads
570 if (team_id
< trip_count
&& tid
== 0) {
571 *pupper
= *pupperDist
= *plower
= *plower
+ team_id
* incr
;
573 *pupperDist
= *pupper
;
574 *plower
= *pupper
+ incr
; // compiler should skip loop body
576 if (plastiter
!= NULL
)
577 *plastiter
= (tid
== 0 && team_id
== trip_count
- 1);
579 // Get the team's chunk first (each team gets at most one chunk)
580 KMP_DEBUG_ASSERT(nteams
!= 0);
581 if (__kmp_static
== kmp_sch_static_balanced
) {
582 UT chunkD
= trip_count
/ nteams
;
583 UT extras
= trip_count
% nteams
;
585 incr
* (team_id
* chunkD
+ (team_id
< extras
? team_id
: extras
));
586 *pupperDist
= *plower
+ chunkD
* incr
- (team_id
< extras
? 0 : incr
);
587 if (plastiter
!= NULL
)
588 *plastiter
= (team_id
== nteams
- 1);
591 (trip_count
/ nteams
+ ((trip_count
% nteams
) ? 1 : 0)) * incr
;
593 KMP_DEBUG_ASSERT(__kmp_static
== kmp_sch_static_greedy
);
594 // Unknown static scheduling type.
595 *plower
+= team_id
* chunk_inc_count
;
596 *pupperDist
= *plower
+ chunk_inc_count
- incr
;
597 // Check/correct bounds if needed
599 if (*pupperDist
< *plower
)
600 *pupperDist
= traits_t
<T
>::max_value
;
601 if (plastiter
!= NULL
)
602 *plastiter
= *plower
<= upper
&& *pupperDist
> upper
- incr
;
603 if (*pupperDist
> upper
)
604 *pupperDist
= upper
; // tracker C73258
605 if (*plower
> *pupperDist
) {
606 *pupper
= *pupperDist
; // no iterations available for the team
610 if (*pupperDist
> *plower
)
611 *pupperDist
= traits_t
<T
>::min_value
;
612 if (plastiter
!= NULL
)
613 *plastiter
= *plower
>= upper
&& *pupperDist
< upper
- incr
;
614 if (*pupperDist
< upper
)
615 *pupperDist
= upper
; // tracker C73258
616 if (*plower
< *pupperDist
) {
617 *pupper
= *pupperDist
; // no iterations available for the team
622 // Get the parallel loop chunk now (for thread)
623 // compute trip count for team's chunk
625 trip_count
= *pupperDist
- *plower
+ 1;
626 } else if (incr
== -1) {
627 trip_count
= *plower
- *pupperDist
+ 1;
628 } else if (incr
> 1) {
629 // upper-lower can exceed the limit of signed type
630 trip_count
= (UT
)(*pupperDist
- *plower
) / incr
+ 1;
632 KMP_DEBUG_ASSERT(incr
!= 0);
633 trip_count
= (UT
)(*plower
- *pupperDist
) / (-incr
) + 1;
635 KMP_DEBUG_ASSERT(trip_count
);
637 case kmp_sch_static
: {
638 if (trip_count
<= nth
) {
640 __kmp_static
== kmp_sch_static_greedy
||
642 kmp_sch_static_balanced
); // Unknown static scheduling type.
643 if (tid
< trip_count
)
644 *pupper
= *plower
= *plower
+ tid
* incr
;
646 *plower
= *pupper
+ incr
; // no iterations available
647 if (plastiter
!= NULL
)
648 if (*plastiter
!= 0 && !(tid
== trip_count
- 1))
651 KMP_DEBUG_ASSERT(nth
!= 0);
652 if (__kmp_static
== kmp_sch_static_balanced
) {
653 UT chunkL
= trip_count
/ nth
;
654 UT extras
= trip_count
% nth
;
655 *plower
+= incr
* (tid
* chunkL
+ (tid
< extras
? tid
: extras
));
656 *pupper
= *plower
+ chunkL
* incr
- (tid
< extras
? 0 : incr
);
657 if (plastiter
!= NULL
)
658 if (*plastiter
!= 0 && !(tid
== nth
- 1))
662 (trip_count
/ nth
+ ((trip_count
% nth
) ? 1 : 0)) * incr
;
663 T upper
= *pupperDist
;
664 KMP_DEBUG_ASSERT(__kmp_static
== kmp_sch_static_greedy
);
665 // Unknown static scheduling type.
666 *plower
+= tid
* chunk_inc_count
;
667 *pupper
= *plower
+ chunk_inc_count
- incr
;
669 if (*pupper
< *plower
)
670 *pupper
= traits_t
<T
>::max_value
;
671 if (plastiter
!= NULL
)
672 if (*plastiter
!= 0 &&
673 !(*plower
<= upper
&& *pupper
> upper
- incr
))
676 *pupper
= upper
; // tracker C73258
678 if (*pupper
> *plower
)
679 *pupper
= traits_t
<T
>::min_value
;
680 if (plastiter
!= NULL
)
681 if (*plastiter
!= 0 &&
682 !(*plower
>= upper
&& *pupper
< upper
- incr
))
685 *pupper
= upper
; // tracker C73258
691 case kmp_sch_static_chunked
: {
696 *pstride
= span
* nth
;
697 *plower
= *plower
+ (span
* tid
);
698 *pupper
= *plower
+ span
- incr
;
699 if (plastiter
!= NULL
) {
700 KMP_DEBUG_ASSERT(chunk
!= 0);
701 if (*plastiter
!= 0 && !(tid
== ((trip_count
- 1) / (UT
)chunk
) % nth
))
708 "__kmpc_dist_for_static_init: unknown loop scheduling type");
716 // create format specifiers before the debug output
717 buff
= __kmp_str_format(
718 "__kmpc_dist_for_static_init: last=%%d lo=%%%s up=%%%s upDist=%%%s "
719 "stride=%%%s signed?<%s>\n",
720 traits_t
<T
>::spec
, traits_t
<T
>::spec
, traits_t
<T
>::spec
,
721 traits_t
<ST
>::spec
, traits_t
<T
>::spec
);
722 KD_TRACE(100, (buff
, *plastiter
, *plower
, *pupper
, *pupperDist
, *pstride
));
723 __kmp_str_free(&buff
);
726 KE_TRACE(10, ("__kmpc_dist_for_static_init: T#%d return\n", gtid
));
727 #if OMPT_SUPPORT && OMPT_OPTIONAL
728 if (ompt_enabled
.ompt_callback_work
|| ompt_enabled
.ompt_callback_dispatch
) {
729 ompt_team_info_t
*team_info
= __ompt_get_teaminfo(0, NULL
);
730 ompt_task_info_t
*task_info
= __ompt_get_task_info_object(0);
731 if (ompt_enabled
.ompt_callback_work
) {
732 ompt_callbacks
.ompt_callback(ompt_callback_work
)(
733 ompt_work_distribute
, ompt_scope_begin
, &(team_info
->parallel_data
),
734 &(task_info
->task_data
), 0, codeptr
);
736 if (ompt_enabled
.ompt_callback_dispatch
) {
737 ompt_data_t instance
= ompt_data_none
;
738 ompt_dispatch_chunk_t dispatch_chunk
;
739 OMPT_GET_DISPATCH_CHUNK(dispatch_chunk
, *plower
, *pupperDist
, incr
);
740 instance
.ptr
= &dispatch_chunk
;
741 ompt_callbacks
.ompt_callback(ompt_callback_dispatch
)(
742 &(team_info
->parallel_data
), &(task_info
->task_data
),
743 ompt_dispatch_distribute_chunk
, instance
);
746 #endif // OMPT_SUPPORT && OMPT_OPTIONAL
747 KMP_STATS_LOOP_END(OMP_distribute_iterations
);
751 template <typename T
>
752 static void __kmp_team_static_init(ident_t
*loc
, kmp_int32 gtid
,
753 kmp_int32
*p_last
, T
*p_lb
, T
*p_ub
,
754 typename traits_t
<T
>::signed_t
*p_st
,
755 typename traits_t
<T
>::signed_t incr
,
756 typename traits_t
<T
>::signed_t chunk
) {
757 // The routine returns the first chunk distributed to the team and
758 // stride for next chunks calculation.
759 // Last iteration flag set for the team that will execute
760 // the last iteration of the loop.
761 // The routine is called for dist_schedule(static,chunk) only.
762 typedef typename traits_t
<T
>::unsigned_t UT
;
763 typedef typename traits_t
<T
>::signed_t ST
;
773 KMP_DEBUG_ASSERT(p_last
&& p_lb
&& p_ub
&& p_st
);
774 KE_TRACE(10, ("__kmp_team_static_init called (%d)\n", gtid
));
775 __kmp_assert_valid_gtid(gtid
);
779 // create format specifiers before the debug output
780 buff
= __kmp_str_format("__kmp_team_static_init enter: T#%%d liter=%%d "
781 "iter=(%%%s, %%%s, %%%s) chunk %%%s; signed?<%s>\n",
782 traits_t
<T
>::spec
, traits_t
<T
>::spec
,
783 traits_t
<ST
>::spec
, traits_t
<ST
>::spec
,
785 KD_TRACE(100, (buff
, gtid
, *p_last
, *p_lb
, *p_ub
, *p_st
, chunk
));
786 __kmp_str_free(&buff
);
792 if (__kmp_env_consistency_check
) {
794 __kmp_error_construct(kmp_i18n_msg_CnsLoopIncrZeroProhibited
, ct_pdo
,
797 if (incr
> 0 ? (upper
< lower
) : (lower
< upper
)) {
798 // The loop is illegal.
799 // Some zero-trip loops maintained by compiler, e.g.:
800 // for(i=10;i<0;++i) // lower >= upper - run-time check
801 // for(i=0;i>10;--i) // lower <= upper - run-time check
802 // for(i=0;i>10;++i) // incr > 0 - compile-time check
803 // for(i=10;i<0;--i) // incr < 0 - compile-time check
804 // Compiler does not check the following illegal loops:
805 // for(i=0;i<10;i+=incr) // where incr<0
806 // for(i=10;i>0;i-=incr) // where incr<0
807 __kmp_error_construct(kmp_i18n_msg_CnsLoopIncrIllegal
, ct_pdo
, loc
);
810 th
= __kmp_threads
[gtid
];
811 team
= th
->th
.th_team
;
812 KMP_DEBUG_ASSERT(th
->th
.th_teams_microtask
); // we are in the teams construct
813 nteams
= th
->th
.th_teams_size
.nteams
;
814 team_id
= team
->t
.t_master_tid
;
815 KMP_DEBUG_ASSERT(nteams
== (kmp_uint32
)team
->t
.t_parent
->t
.t_nproc
);
817 // compute trip count
819 trip_count
= upper
- lower
+ 1;
820 } else if (incr
== -1) {
821 trip_count
= lower
- upper
+ 1;
822 } else if (incr
> 0) {
823 // upper-lower can exceed the limit of signed type
824 trip_count
= (UT
)(upper
- lower
) / incr
+ 1;
826 KMP_DEBUG_ASSERT(incr
!= 0);
827 trip_count
= (UT
)(lower
- upper
) / (-incr
) + 1;
832 *p_st
= span
* nteams
;
833 *p_lb
= lower
+ (span
* team_id
);
834 *p_ub
= *p_lb
+ span
- incr
;
835 if (p_last
!= NULL
) {
836 KMP_DEBUG_ASSERT(chunk
!= 0);
837 *p_last
= (team_id
== ((trip_count
- 1) / (UT
)chunk
) % nteams
);
839 // Correct upper bound if needed
841 if (*p_ub
< *p_lb
) // overflow?
842 *p_ub
= traits_t
<T
>::max_value
;
844 *p_ub
= upper
; // tracker C73258
847 *p_ub
= traits_t
<T
>::min_value
;
849 *p_ub
= upper
; // tracker C73258
854 // create format specifiers before the debug output
856 __kmp_str_format("__kmp_team_static_init exit: T#%%d team%%u liter=%%d "
857 "iter=(%%%s, %%%s, %%%s) chunk %%%s\n",
858 traits_t
<T
>::spec
, traits_t
<T
>::spec
,
859 traits_t
<ST
>::spec
, traits_t
<ST
>::spec
);
860 KD_TRACE(100, (buff
, gtid
, team_id
, *p_last
, *p_lb
, *p_ub
, *p_st
, chunk
));
861 __kmp_str_free(&buff
);
866 //------------------------------------------------------------------------------
869 @ingroup WORK_SHARING
870 @param loc Source code location
871 @param gtid Global thread id of this thread
872 @param schedtype Scheduling type
873 @param plastiter Pointer to the "last iteration" flag
874 @param plower Pointer to the lower bound
875 @param pupper Pointer to the upper bound
876 @param pstride Pointer to the stride
877 @param incr Loop increment
878 @param chunk The chunk size
880 Each of the four functions here are identical apart from the argument types.
882 The functions compute the upper and lower bounds and stride to be used for the
883 set of iterations to be executed by the current thread from the statically
884 scheduled loop that is described by the initial values of the bounds, stride,
885 increment and chunk size.
889 void __kmpc_for_static_init_4(ident_t
*loc
, kmp_int32 gtid
, kmp_int32 schedtype
,
890 kmp_int32
*plastiter
, kmp_int32
*plower
,
891 kmp_int32
*pupper
, kmp_int32
*pstride
,
892 kmp_int32 incr
, kmp_int32 chunk
) {
893 __kmp_for_static_init
<kmp_int32
>(loc
, gtid
, schedtype
, plastiter
, plower
,
894 pupper
, pstride
, incr
, chunk
895 #if OMPT_SUPPORT && OMPT_OPTIONAL
897 OMPT_GET_RETURN_ADDRESS(0)
903 See @ref __kmpc_for_static_init_4
905 void __kmpc_for_static_init_4u(ident_t
*loc
, kmp_int32 gtid
,
906 kmp_int32 schedtype
, kmp_int32
*plastiter
,
907 kmp_uint32
*plower
, kmp_uint32
*pupper
,
908 kmp_int32
*pstride
, kmp_int32 incr
,
910 __kmp_for_static_init
<kmp_uint32
>(loc
, gtid
, schedtype
, plastiter
, plower
,
911 pupper
, pstride
, incr
, chunk
912 #if OMPT_SUPPORT && OMPT_OPTIONAL
914 OMPT_GET_RETURN_ADDRESS(0)
920 See @ref __kmpc_for_static_init_4
922 void __kmpc_for_static_init_8(ident_t
*loc
, kmp_int32 gtid
, kmp_int32 schedtype
,
923 kmp_int32
*plastiter
, kmp_int64
*plower
,
924 kmp_int64
*pupper
, kmp_int64
*pstride
,
925 kmp_int64 incr
, kmp_int64 chunk
) {
926 __kmp_for_static_init
<kmp_int64
>(loc
, gtid
, schedtype
, plastiter
, plower
,
927 pupper
, pstride
, incr
, chunk
928 #if OMPT_SUPPORT && OMPT_OPTIONAL
930 OMPT_GET_RETURN_ADDRESS(0)
936 See @ref __kmpc_for_static_init_4
938 void __kmpc_for_static_init_8u(ident_t
*loc
, kmp_int32 gtid
,
939 kmp_int32 schedtype
, kmp_int32
*plastiter
,
940 kmp_uint64
*plower
, kmp_uint64
*pupper
,
941 kmp_int64
*pstride
, kmp_int64 incr
,
943 __kmp_for_static_init
<kmp_uint64
>(loc
, gtid
, schedtype
, plastiter
, plower
,
944 pupper
, pstride
, incr
, chunk
945 #if OMPT_SUPPORT && OMPT_OPTIONAL
947 OMPT_GET_RETURN_ADDRESS(0)
955 #if OMPT_SUPPORT && OMPT_OPTIONAL
956 #define OMPT_CODEPTR_ARG , OMPT_GET_RETURN_ADDRESS(0)
958 #define OMPT_CODEPTR_ARG
962 @ingroup WORK_SHARING
963 @param loc Source code location
964 @param gtid Global thread id of this thread
965 @param schedule Scheduling type for the parallel loop
966 @param plastiter Pointer to the "last iteration" flag
967 @param plower Pointer to the lower bound
968 @param pupper Pointer to the upper bound of loop chunk
969 @param pupperD Pointer to the upper bound of dist_chunk
970 @param pstride Pointer to the stride for parallel loop
971 @param incr Loop increment
972 @param chunk The chunk size for the parallel loop
974 Each of the four functions here are identical apart from the argument types.
976 The functions compute the upper and lower bounds and strides to be used for the
977 set of iterations to be executed by the current thread from the statically
978 scheduled loop that is described by the initial values of the bounds, strides,
979 increment and chunks for parallel loop and distribute constructs.
983 void __kmpc_dist_for_static_init_4(ident_t
*loc
, kmp_int32 gtid
,
984 kmp_int32 schedule
, kmp_int32
*plastiter
,
985 kmp_int32
*plower
, kmp_int32
*pupper
,
986 kmp_int32
*pupperD
, kmp_int32
*pstride
,
987 kmp_int32 incr
, kmp_int32 chunk
) {
988 __kmp_dist_for_static_init
<kmp_int32
>(loc
, gtid
, schedule
, plastiter
, plower
,
989 pupper
, pupperD
, pstride
, incr
,
990 chunk OMPT_CODEPTR_ARG
);
994 See @ref __kmpc_dist_for_static_init_4
996 void __kmpc_dist_for_static_init_4u(ident_t
*loc
, kmp_int32 gtid
,
997 kmp_int32 schedule
, kmp_int32
*plastiter
,
998 kmp_uint32
*plower
, kmp_uint32
*pupper
,
999 kmp_uint32
*pupperD
, kmp_int32
*pstride
,
1000 kmp_int32 incr
, kmp_int32 chunk
) {
1001 __kmp_dist_for_static_init
<kmp_uint32
>(loc
, gtid
, schedule
, plastiter
, plower
,
1002 pupper
, pupperD
, pstride
, incr
,
1003 chunk OMPT_CODEPTR_ARG
);
1007 See @ref __kmpc_dist_for_static_init_4
1009 void __kmpc_dist_for_static_init_8(ident_t
*loc
, kmp_int32 gtid
,
1010 kmp_int32 schedule
, kmp_int32
*plastiter
,
1011 kmp_int64
*plower
, kmp_int64
*pupper
,
1012 kmp_int64
*pupperD
, kmp_int64
*pstride
,
1013 kmp_int64 incr
, kmp_int64 chunk
) {
1014 __kmp_dist_for_static_init
<kmp_int64
>(loc
, gtid
, schedule
, plastiter
, plower
,
1015 pupper
, pupperD
, pstride
, incr
,
1016 chunk OMPT_CODEPTR_ARG
);
1020 See @ref __kmpc_dist_for_static_init_4
1022 void __kmpc_dist_for_static_init_8u(ident_t
*loc
, kmp_int32 gtid
,
1023 kmp_int32 schedule
, kmp_int32
*plastiter
,
1024 kmp_uint64
*plower
, kmp_uint64
*pupper
,
1025 kmp_uint64
*pupperD
, kmp_int64
*pstride
,
1026 kmp_int64 incr
, kmp_int64 chunk
) {
1027 __kmp_dist_for_static_init
<kmp_uint64
>(loc
, gtid
, schedule
, plastiter
, plower
,
1028 pupper
, pupperD
, pstride
, incr
,
1029 chunk OMPT_CODEPTR_ARG
);
1035 //------------------------------------------------------------------------------
1036 // Auxiliary routines for Distribute Parallel Loop construct implementation
1037 // Transfer call to template< type T >
1038 // __kmp_team_static_init( ident_t *loc, int gtid,
1039 // int *p_last, T *lb, T *ub, ST *st, ST incr, ST chunk )
1042 @ingroup WORK_SHARING
1044 @param loc Source location
1045 @param gtid Global thread id
1046 @param p_last pointer to last iteration flag
1047 @param p_lb pointer to Lower bound
1048 @param p_ub pointer to Upper bound
1049 @param p_st Step (or increment if you prefer)
1050 @param incr Loop increment
1051 @param chunk The chunk size to block with
1053 The functions compute the upper and lower bounds and stride to be used for the
1054 set of iterations to be executed by the current team from the statically
1055 scheduled loop that is described by the initial values of the bounds, stride,
1056 increment and chunk for the distribute construct as part of composite distribute
1057 parallel loop construct. These functions are all identical apart from the types
1061 void __kmpc_team_static_init_4(ident_t
*loc
, kmp_int32 gtid
, kmp_int32
*p_last
,
1062 kmp_int32
*p_lb
, kmp_int32
*p_ub
,
1063 kmp_int32
*p_st
, kmp_int32 incr
,
1065 KMP_DEBUG_ASSERT(__kmp_init_serial
);
1066 __kmp_team_static_init
<kmp_int32
>(loc
, gtid
, p_last
, p_lb
, p_ub
, p_st
, incr
,
1071 See @ref __kmpc_team_static_init_4
1073 void __kmpc_team_static_init_4u(ident_t
*loc
, kmp_int32 gtid
, kmp_int32
*p_last
,
1074 kmp_uint32
*p_lb
, kmp_uint32
*p_ub
,
1075 kmp_int32
*p_st
, kmp_int32 incr
,
1077 KMP_DEBUG_ASSERT(__kmp_init_serial
);
1078 __kmp_team_static_init
<kmp_uint32
>(loc
, gtid
, p_last
, p_lb
, p_ub
, p_st
, incr
,
1083 See @ref __kmpc_team_static_init_4
1085 void __kmpc_team_static_init_8(ident_t
*loc
, kmp_int32 gtid
, kmp_int32
*p_last
,
1086 kmp_int64
*p_lb
, kmp_int64
*p_ub
,
1087 kmp_int64
*p_st
, kmp_int64 incr
,
1089 KMP_DEBUG_ASSERT(__kmp_init_serial
);
1090 __kmp_team_static_init
<kmp_int64
>(loc
, gtid
, p_last
, p_lb
, p_ub
, p_st
, incr
,
1095 See @ref __kmpc_team_static_init_4
1097 void __kmpc_team_static_init_8u(ident_t
*loc
, kmp_int32 gtid
, kmp_int32
*p_last
,
1098 kmp_uint64
*p_lb
, kmp_uint64
*p_ub
,
1099 kmp_int64
*p_st
, kmp_int64 incr
,
1101 KMP_DEBUG_ASSERT(__kmp_init_serial
);
1102 __kmp_team_static_init
<kmp_uint64
>(loc
, gtid
, p_last
, p_lb
, p_ub
, p_st
, incr
,