[flang] Fix crash in HLFIR generation (#118399)
[llvm-project.git] / openmp / runtime / src / kmp_sched.cpp
blob2e0dfac6eeb3b9d91b8a00866d3c4918e93fc855
1 /*
2 * kmp_sched.cpp -- static scheduling -- iteration initialization
3 */
5 //===----------------------------------------------------------------------===//
6 //
7 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
8 // See https://llvm.org/LICENSE.txt for license information.
9 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
11 //===----------------------------------------------------------------------===//
13 /* Static scheduling initialization.
15 NOTE: team->t.t_nproc is a constant inside of any dispatch loop, however
16 it may change values between parallel regions. __kmp_max_nth
17 is the largest value __kmp_nth may take, 1 is the smallest. */
19 #include "kmp.h"
20 #include "kmp_error.h"
21 #include "kmp_i18n.h"
22 #include "kmp_itt.h"
23 #include "kmp_stats.h"
24 #include "kmp_str.h"
26 #if OMPT_SUPPORT
27 #include "ompt-specific.h"
28 #endif
30 #ifdef KMP_DEBUG
31 //-------------------------------------------------------------------------
32 // template for debug prints specification ( d, u, lld, llu )
33 char const *traits_t<int>::spec = "d";
34 char const *traits_t<unsigned int>::spec = "u";
35 char const *traits_t<long long>::spec = "lld";
36 char const *traits_t<unsigned long long>::spec = "llu";
37 char const *traits_t<long>::spec = "ld";
38 //-------------------------------------------------------------------------
39 #endif
41 #if KMP_STATS_ENABLED
42 #define KMP_STATS_LOOP_END(stat) \
43 { \
44 kmp_int64 t; \
45 kmp_int64 u = (kmp_int64)(*pupper); \
46 kmp_int64 l = (kmp_int64)(*plower); \
47 kmp_int64 i = (kmp_int64)incr; \
48 if (i == 1) { \
49 t = u - l + 1; \
50 } else if (i == -1) { \
51 t = l - u + 1; \
52 } else if (i > 0) { \
53 t = (u - l) / i + 1; \
54 } else { \
55 KMP_DEBUG_ASSERT(i != 0); \
56 t = (l - u) / (-i) + 1; \
57 } \
58 KMP_COUNT_VALUE(stat, t); \
59 KMP_POP_PARTITIONED_TIMER(); \
61 #else
62 #define KMP_STATS_LOOP_END(stat) /* Nothing */
63 #endif
65 #if USE_ITT_BUILD || defined KMP_DEBUG
66 static ident_t loc_stub = {0, KMP_IDENT_KMPC, 0, 0, ";unknown;unknown;0;0;;"};
67 static inline void check_loc(ident_t *&loc) {
68 if (loc == NULL)
69 loc = &loc_stub; // may need to report location info to ittnotify
71 #endif
73 template <typename T>
74 static void __kmp_for_static_init(ident_t *loc, kmp_int32 global_tid,
75 kmp_int32 schedtype, kmp_int32 *plastiter,
76 T *plower, T *pupper,
77 typename traits_t<T>::signed_t *pstride,
78 typename traits_t<T>::signed_t incr,
79 typename traits_t<T>::signed_t chunk
80 #if OMPT_SUPPORT && OMPT_OPTIONAL
82 void *codeptr
83 #endif
84 ) {
85 KMP_COUNT_BLOCK(OMP_LOOP_STATIC);
86 KMP_PUSH_PARTITIONED_TIMER(OMP_loop_static);
87 KMP_PUSH_PARTITIONED_TIMER(OMP_loop_static_scheduling);
89 // Clear monotonic/nonmonotonic bits (ignore it)
90 schedtype = SCHEDULE_WITHOUT_MODIFIERS(schedtype);
92 typedef typename traits_t<T>::unsigned_t UT;
93 typedef typename traits_t<T>::signed_t ST;
94 /* this all has to be changed back to TID and such.. */
95 kmp_int32 gtid = global_tid;
96 kmp_uint32 tid;
97 kmp_uint32 nth;
98 UT trip_count;
99 kmp_team_t *team;
100 __kmp_assert_valid_gtid(gtid);
101 kmp_info_t *th = __kmp_threads[gtid];
103 #if OMPT_SUPPORT && OMPT_OPTIONAL
104 ompt_team_info_t *team_info = NULL;
105 ompt_task_info_t *task_info = NULL;
106 ompt_work_t ompt_work_type = ompt_work_loop_static;
108 static kmp_int8 warn = 0;
110 if (ompt_enabled.ompt_callback_work || ompt_enabled.ompt_callback_dispatch) {
111 // Only fully initialize variables needed by OMPT if OMPT is enabled.
112 team_info = __ompt_get_teaminfo(0, NULL);
113 task_info = __ompt_get_task_info_object(0);
114 // Determine workshare type
115 if (loc != NULL) {
116 if ((loc->flags & KMP_IDENT_WORK_LOOP) != 0) {
117 ompt_work_type = ompt_work_loop_static;
118 } else if ((loc->flags & KMP_IDENT_WORK_SECTIONS) != 0) {
119 ompt_work_type = ompt_work_sections;
120 } else if ((loc->flags & KMP_IDENT_WORK_DISTRIBUTE) != 0) {
121 ompt_work_type = ompt_work_distribute;
122 } else {
123 kmp_int8 bool_res =
124 KMP_COMPARE_AND_STORE_ACQ8(&warn, (kmp_int8)0, (kmp_int8)1);
125 if (bool_res)
126 KMP_WARNING(OmptOutdatedWorkshare);
128 KMP_DEBUG_ASSERT(ompt_work_type);
131 #endif
133 KMP_DEBUG_ASSERT(plastiter && plower && pupper && pstride);
134 KE_TRACE(10, ("__kmpc_for_static_init called (%d)\n", global_tid));
135 #ifdef KMP_DEBUG
137 char *buff;
138 // create format specifiers before the debug output
139 buff = __kmp_str_format(
140 "__kmpc_for_static_init: T#%%d sched=%%d liter=%%d iter=(%%%s,"
141 " %%%s, %%%s) incr=%%%s chunk=%%%s signed?<%s>\n",
142 traits_t<T>::spec, traits_t<T>::spec, traits_t<ST>::spec,
143 traits_t<ST>::spec, traits_t<ST>::spec, traits_t<T>::spec);
144 KD_TRACE(100, (buff, global_tid, schedtype, *plastiter, *plower, *pupper,
145 *pstride, incr, chunk));
146 __kmp_str_free(&buff);
148 #endif
150 if (__kmp_env_consistency_check) {
151 __kmp_push_workshare(global_tid, ct_pdo, loc);
152 if (incr == 0) {
153 __kmp_error_construct(kmp_i18n_msg_CnsLoopIncrZeroProhibited, ct_pdo,
154 loc);
157 /* special handling for zero-trip loops */
158 if (incr > 0 ? (*pupper < *plower) : (*plower < *pupper)) {
159 if (plastiter != NULL)
160 *plastiter = FALSE;
161 /* leave pupper and plower set to entire iteration space */
162 *pstride = incr; /* value should never be used */
163 // *plower = *pupper - incr;
164 // let compiler bypass the illegal loop (like for(i=1;i<10;i--))
165 // THE LINE COMMENTED ABOVE CAUSED shape2F/h_tests_1.f TO HAVE A FAILURE
166 // ON A ZERO-TRIP LOOP (lower=1, upper=0,stride=1) - JPH June 23, 2009.
167 #ifdef KMP_DEBUG
169 char *buff;
170 // create format specifiers before the debug output
171 buff = __kmp_str_format("__kmpc_for_static_init:(ZERO TRIP) liter=%%d "
172 "lower=%%%s upper=%%%s stride = %%%s "
173 "signed?<%s>, loc = %%s\n",
174 traits_t<T>::spec, traits_t<T>::spec,
175 traits_t<ST>::spec, traits_t<T>::spec);
176 check_loc(loc);
177 KD_TRACE(100,
178 (buff, *plastiter, *plower, *pupper, *pstride, loc->psource));
179 __kmp_str_free(&buff);
181 #endif
182 KE_TRACE(10, ("__kmpc_for_static_init: T#%d return\n", global_tid));
184 #if OMPT_SUPPORT && OMPT_OPTIONAL
185 if (ompt_enabled.ompt_callback_work) {
186 ompt_callbacks.ompt_callback(ompt_callback_work)(
187 ompt_work_type, ompt_scope_begin, &(team_info->parallel_data),
188 &(task_info->task_data), 0, codeptr);
190 #endif
191 KMP_STATS_LOOP_END(OMP_loop_static_iterations);
192 return;
195 // Although there are schedule enumerations above kmp_ord_upper which are not
196 // schedules for "distribute", the only ones which are useful are dynamic, so
197 // cannot be seen here, since this codepath is only executed for static
198 // schedules.
199 if (schedtype > kmp_ord_upper) {
200 // we are in DISTRIBUTE construct
201 schedtype += kmp_sch_static -
202 kmp_distribute_static; // AC: convert to usual schedule type
203 if (th->th.th_team->t.t_serialized > 1) {
204 tid = 0;
205 team = th->th.th_team;
206 } else {
207 tid = th->th.th_team->t.t_master_tid;
208 team = th->th.th_team->t.t_parent;
210 } else {
211 tid = __kmp_tid_from_gtid(global_tid);
212 team = th->th.th_team;
215 /* determine if "for" loop is an active worksharing construct */
216 if (team->t.t_serialized) {
217 /* serialized parallel, each thread executes whole iteration space */
218 if (plastiter != NULL)
219 *plastiter = TRUE;
220 /* leave pupper and plower set to entire iteration space */
221 *pstride =
222 (incr > 0) ? (*pupper - *plower + 1) : (-(*plower - *pupper + 1));
224 #ifdef KMP_DEBUG
226 char *buff;
227 // create format specifiers before the debug output
228 buff = __kmp_str_format("__kmpc_for_static_init: (serial) liter=%%d "
229 "lower=%%%s upper=%%%s stride = %%%s\n",
230 traits_t<T>::spec, traits_t<T>::spec,
231 traits_t<ST>::spec);
232 KD_TRACE(100, (buff, *plastiter, *plower, *pupper, *pstride));
233 __kmp_str_free(&buff);
235 #endif
236 KE_TRACE(10, ("__kmpc_for_static_init: T#%d return\n", global_tid));
238 #if OMPT_SUPPORT && OMPT_OPTIONAL
239 if (ompt_enabled.ompt_callback_work) {
240 ompt_callbacks.ompt_callback(ompt_callback_work)(
241 ompt_work_type, ompt_scope_begin, &(team_info->parallel_data),
242 &(task_info->task_data), *pstride, codeptr);
244 #endif
245 KMP_STATS_LOOP_END(OMP_loop_static_iterations);
246 return;
248 nth = team->t.t_nproc;
249 if (nth == 1) {
250 if (plastiter != NULL)
251 *plastiter = TRUE;
252 *pstride =
253 (incr > 0) ? (*pupper - *plower + 1) : (-(*plower - *pupper + 1));
254 #ifdef KMP_DEBUG
256 char *buff;
257 // create format specifiers before the debug output
258 buff = __kmp_str_format("__kmpc_for_static_init: (serial) liter=%%d "
259 "lower=%%%s upper=%%%s stride = %%%s\n",
260 traits_t<T>::spec, traits_t<T>::spec,
261 traits_t<ST>::spec);
262 KD_TRACE(100, (buff, *plastiter, *plower, *pupper, *pstride));
263 __kmp_str_free(&buff);
265 #endif
266 KE_TRACE(10, ("__kmpc_for_static_init: T#%d return\n", global_tid));
268 #if OMPT_SUPPORT && OMPT_OPTIONAL
269 if (ompt_enabled.ompt_callback_work) {
270 ompt_callbacks.ompt_callback(ompt_callback_work)(
271 ompt_work_type, ompt_scope_begin, &(team_info->parallel_data),
272 &(task_info->task_data), *pstride, codeptr);
274 #endif
275 KMP_STATS_LOOP_END(OMP_loop_static_iterations);
276 return;
279 /* compute trip count */
280 if (incr == 1) {
281 trip_count = *pupper - *plower + 1;
282 } else if (incr == -1) {
283 trip_count = *plower - *pupper + 1;
284 } else if (incr > 0) {
285 // upper-lower can exceed the limit of signed type
286 trip_count = (UT)(*pupper - *plower) / incr + 1;
287 } else {
288 KMP_DEBUG_ASSERT(incr != 0);
289 trip_count = (UT)(*plower - *pupper) / (-incr) + 1;
292 #if KMP_STATS_ENABLED
293 if (KMP_MASTER_GTID(gtid)) {
294 KMP_COUNT_VALUE(OMP_loop_static_total_iterations, trip_count);
296 #endif
298 if (__kmp_env_consistency_check) {
299 /* tripcount overflow? */
300 if (trip_count == 0 && *pupper != *plower) {
301 __kmp_error_construct(kmp_i18n_msg_CnsIterationRangeTooLarge, ct_pdo,
302 loc);
306 /* compute remaining parameters */
307 switch (schedtype) {
308 case kmp_sch_static: {
309 if (trip_count < nth) {
310 KMP_DEBUG_ASSERT(
311 __kmp_static == kmp_sch_static_greedy ||
312 __kmp_static ==
313 kmp_sch_static_balanced); // Unknown static scheduling type.
314 if (tid < trip_count) {
315 *pupper = *plower = *plower + tid * incr;
316 } else {
317 // set bounds so non-active threads execute no iterations
318 *plower = *pupper + (incr > 0 ? 1 : -1);
320 if (plastiter != NULL)
321 *plastiter = (tid == trip_count - 1);
322 } else {
323 KMP_DEBUG_ASSERT(nth != 0);
324 if (__kmp_static == kmp_sch_static_balanced) {
325 UT small_chunk = trip_count / nth;
326 UT extras = trip_count % nth;
327 *plower += incr * (tid * small_chunk + (tid < extras ? tid : extras));
328 *pupper = *plower + small_chunk * incr - (tid < extras ? 0 : incr);
329 if (plastiter != NULL)
330 *plastiter = (tid == nth - 1);
331 } else {
332 T big_chunk_inc_count =
333 (trip_count / nth + ((trip_count % nth) ? 1 : 0)) * incr;
334 T old_upper = *pupper;
336 KMP_DEBUG_ASSERT(__kmp_static == kmp_sch_static_greedy);
337 // Unknown static scheduling type.
339 *plower += tid * big_chunk_inc_count;
340 *pupper = *plower + big_chunk_inc_count - incr;
341 if (incr > 0) {
342 if (*pupper < *plower)
343 *pupper = traits_t<T>::max_value;
344 if (plastiter != NULL)
345 *plastiter = *plower <= old_upper && *pupper > old_upper - incr;
346 if (*pupper > old_upper)
347 *pupper = old_upper; // tracker C73258
348 } else {
349 if (*pupper > *plower)
350 *pupper = traits_t<T>::min_value;
351 if (plastiter != NULL)
352 *plastiter = *plower >= old_upper && *pupper < old_upper - incr;
353 if (*pupper < old_upper)
354 *pupper = old_upper; // tracker C73258
358 *pstride = trip_count;
359 break;
361 case kmp_sch_static_chunked: {
362 ST span;
363 UT nchunks;
364 KMP_DEBUG_ASSERT(chunk != 0);
365 if (chunk < 1)
366 chunk = 1;
367 else if ((UT)chunk > trip_count)
368 chunk = trip_count;
369 nchunks = (trip_count) / (UT)chunk + (trip_count % (UT)chunk ? 1 : 0);
370 span = chunk * incr;
371 if (nchunks < nth) {
372 *pstride = span * nchunks;
373 if (tid < nchunks) {
374 *plower = *plower + (span * tid);
375 *pupper = *plower + span - incr;
376 } else {
377 *plower = *pupper + (incr > 0 ? 1 : -1);
379 } else {
380 *pstride = span * nth;
381 *plower = *plower + (span * tid);
382 *pupper = *plower + span - incr;
384 if (plastiter != NULL)
385 *plastiter = (tid == (nchunks - 1) % nth);
386 break;
388 case kmp_sch_static_balanced_chunked: {
389 T old_upper = *pupper;
390 KMP_DEBUG_ASSERT(nth != 0);
391 // round up to make sure the chunk is enough to cover all iterations
392 UT span = (trip_count + nth - 1) / nth;
394 // perform chunk adjustment
395 chunk = (span + chunk - 1) & ~(chunk - 1);
397 span = chunk * incr;
398 *plower = *plower + (span * tid);
399 *pupper = *plower + span - incr;
400 if (incr > 0) {
401 if (*pupper > old_upper)
402 *pupper = old_upper;
403 } else if (*pupper < old_upper)
404 *pupper = old_upper;
406 if (plastiter != NULL) {
407 KMP_DEBUG_ASSERT(chunk != 0);
408 *plastiter = (tid == ((trip_count - 1) / (UT)chunk));
410 break;
412 default:
413 KMP_ASSERT2(0, "__kmpc_for_static_init: unknown scheduling type");
414 break;
417 #if USE_ITT_BUILD
418 // Report loop metadata
419 if (KMP_MASTER_TID(tid) && __itt_metadata_add_ptr &&
420 __kmp_forkjoin_frames_mode == 3 && th->th.th_teams_microtask == NULL &&
421 team->t.t_active_level == 1) {
422 kmp_uint64 cur_chunk = chunk;
423 check_loc(loc);
424 // Calculate chunk in case it was not specified; it is specified for
425 // kmp_sch_static_chunked
426 if (schedtype == kmp_sch_static) {
427 KMP_DEBUG_ASSERT(nth != 0);
428 cur_chunk = trip_count / nth + ((trip_count % nth) ? 1 : 0);
430 // 0 - "static" schedule
431 __kmp_itt_metadata_loop(loc, 0, trip_count, cur_chunk);
433 #endif
434 #ifdef KMP_DEBUG
436 char *buff;
437 // create format specifiers before the debug output
438 buff = __kmp_str_format("__kmpc_for_static_init: liter=%%d lower=%%%s "
439 "upper=%%%s stride = %%%s signed?<%s>\n",
440 traits_t<T>::spec, traits_t<T>::spec,
441 traits_t<ST>::spec, traits_t<T>::spec);
442 KD_TRACE(100, (buff, *plastiter, *plower, *pupper, *pstride));
443 __kmp_str_free(&buff);
445 #endif
446 KE_TRACE(10, ("__kmpc_for_static_init: T#%d return\n", global_tid));
448 #if OMPT_SUPPORT && OMPT_OPTIONAL
449 if (ompt_enabled.ompt_callback_work) {
450 ompt_callbacks.ompt_callback(ompt_callback_work)(
451 ompt_work_type, ompt_scope_begin, &(team_info->parallel_data),
452 &(task_info->task_data), trip_count, codeptr);
454 if (ompt_enabled.ompt_callback_dispatch) {
455 ompt_dispatch_t dispatch_type;
456 ompt_data_t instance = ompt_data_none;
457 ompt_dispatch_chunk_t dispatch_chunk;
458 if (ompt_work_type == ompt_work_sections) {
459 dispatch_type = ompt_dispatch_section;
460 instance.ptr = codeptr;
461 } else {
462 OMPT_GET_DISPATCH_CHUNK(dispatch_chunk, *plower, *pupper, incr);
463 dispatch_type = (ompt_work_type == ompt_work_distribute)
464 ? ompt_dispatch_distribute_chunk
465 : ompt_dispatch_ws_loop_chunk;
466 instance.ptr = &dispatch_chunk;
468 ompt_callbacks.ompt_callback(ompt_callback_dispatch)(
469 &(team_info->parallel_data), &(task_info->task_data), dispatch_type,
470 instance);
472 #endif
474 KMP_STATS_LOOP_END(OMP_loop_static_iterations);
475 return;
478 template <typename T>
479 static void __kmp_dist_for_static_init(ident_t *loc, kmp_int32 gtid,
480 kmp_int32 schedule, kmp_int32 *plastiter,
481 T *plower, T *pupper, T *pupperDist,
482 typename traits_t<T>::signed_t *pstride,
483 typename traits_t<T>::signed_t incr,
484 typename traits_t<T>::signed_t chunk
485 #if OMPT_SUPPORT && OMPT_OPTIONAL
487 void *codeptr
488 #endif
490 KMP_COUNT_BLOCK(OMP_DISTRIBUTE);
491 KMP_PUSH_PARTITIONED_TIMER(OMP_distribute);
492 KMP_PUSH_PARTITIONED_TIMER(OMP_distribute_scheduling);
493 typedef typename traits_t<T>::unsigned_t UT;
494 typedef typename traits_t<T>::signed_t ST;
495 kmp_uint32 tid;
496 kmp_uint32 nth;
497 kmp_uint32 team_id;
498 kmp_uint32 nteams;
499 UT trip_count;
500 kmp_team_t *team;
501 kmp_info_t *th;
503 KMP_DEBUG_ASSERT(plastiter && plower && pupper && pupperDist && pstride);
504 KE_TRACE(10, ("__kmpc_dist_for_static_init called (%d)\n", gtid));
505 __kmp_assert_valid_gtid(gtid);
506 #ifdef KMP_DEBUG
508 char *buff;
509 // create format specifiers before the debug output
510 buff = __kmp_str_format(
511 "__kmpc_dist_for_static_init: T#%%d schedLoop=%%d liter=%%d "
512 "iter=(%%%s, %%%s, %%%s) chunk=%%%s signed?<%s>\n",
513 traits_t<T>::spec, traits_t<T>::spec, traits_t<ST>::spec,
514 traits_t<ST>::spec, traits_t<T>::spec);
515 KD_TRACE(100,
516 (buff, gtid, schedule, *plastiter, *plower, *pupper, incr, chunk));
517 __kmp_str_free(&buff);
519 #endif
521 if (__kmp_env_consistency_check) {
522 __kmp_push_workshare(gtid, ct_pdo, loc);
523 if (incr == 0) {
524 __kmp_error_construct(kmp_i18n_msg_CnsLoopIncrZeroProhibited, ct_pdo,
525 loc);
527 if (incr > 0 ? (*pupper < *plower) : (*plower < *pupper)) {
528 // The loop is illegal.
529 // Some zero-trip loops maintained by compiler, e.g.:
530 // for(i=10;i<0;++i) // lower >= upper - run-time check
531 // for(i=0;i>10;--i) // lower <= upper - run-time check
532 // for(i=0;i>10;++i) // incr > 0 - compile-time check
533 // for(i=10;i<0;--i) // incr < 0 - compile-time check
534 // Compiler does not check the following illegal loops:
535 // for(i=0;i<10;i+=incr) // where incr<0
536 // for(i=10;i>0;i-=incr) // where incr<0
537 __kmp_error_construct(kmp_i18n_msg_CnsLoopIncrIllegal, ct_pdo, loc);
540 tid = __kmp_tid_from_gtid(gtid);
541 th = __kmp_threads[gtid];
542 nth = th->th.th_team_nproc;
543 team = th->th.th_team;
544 KMP_DEBUG_ASSERT(th->th.th_teams_microtask); // we are in the teams construct
545 nteams = th->th.th_teams_size.nteams;
546 team_id = team->t.t_master_tid;
547 KMP_DEBUG_ASSERT(nteams == (kmp_uint32)team->t.t_parent->t.t_nproc);
549 // compute global trip count
550 if (incr == 1) {
551 trip_count = *pupper - *plower + 1;
552 } else if (incr == -1) {
553 trip_count = *plower - *pupper + 1;
554 } else if (incr > 0) {
555 // upper-lower can exceed the limit of signed type
556 trip_count = (UT)(*pupper - *plower) / incr + 1;
557 } else {
558 KMP_DEBUG_ASSERT(incr != 0);
559 trip_count = (UT)(*plower - *pupper) / (-incr) + 1;
562 *pstride = *pupper - *plower; // just in case (can be unused)
563 if (trip_count <= nteams) {
564 KMP_DEBUG_ASSERT(
565 __kmp_static == kmp_sch_static_greedy ||
566 __kmp_static ==
567 kmp_sch_static_balanced); // Unknown static scheduling type.
568 // only primary threads of some teams get single iteration, other threads
569 // get nothing
570 if (team_id < trip_count && tid == 0) {
571 *pupper = *pupperDist = *plower = *plower + team_id * incr;
572 } else {
573 *pupperDist = *pupper;
574 *plower = *pupper + incr; // compiler should skip loop body
576 if (plastiter != NULL)
577 *plastiter = (tid == 0 && team_id == trip_count - 1);
578 } else {
579 // Get the team's chunk first (each team gets at most one chunk)
580 KMP_DEBUG_ASSERT(nteams != 0);
581 if (__kmp_static == kmp_sch_static_balanced) {
582 UT chunkD = trip_count / nteams;
583 UT extras = trip_count % nteams;
584 *plower +=
585 incr * (team_id * chunkD + (team_id < extras ? team_id : extras));
586 *pupperDist = *plower + chunkD * incr - (team_id < extras ? 0 : incr);
587 if (plastiter != NULL)
588 *plastiter = (team_id == nteams - 1);
589 } else {
590 T chunk_inc_count =
591 (trip_count / nteams + ((trip_count % nteams) ? 1 : 0)) * incr;
592 T upper = *pupper;
593 KMP_DEBUG_ASSERT(__kmp_static == kmp_sch_static_greedy);
594 // Unknown static scheduling type.
595 *plower += team_id * chunk_inc_count;
596 *pupperDist = *plower + chunk_inc_count - incr;
597 // Check/correct bounds if needed
598 if (incr > 0) {
599 if (*pupperDist < *plower)
600 *pupperDist = traits_t<T>::max_value;
601 if (plastiter != NULL)
602 *plastiter = *plower <= upper && *pupperDist > upper - incr;
603 if (*pupperDist > upper)
604 *pupperDist = upper; // tracker C73258
605 if (*plower > *pupperDist) {
606 *pupper = *pupperDist; // no iterations available for the team
607 goto end;
609 } else {
610 if (*pupperDist > *plower)
611 *pupperDist = traits_t<T>::min_value;
612 if (plastiter != NULL)
613 *plastiter = *plower >= upper && *pupperDist < upper - incr;
614 if (*pupperDist < upper)
615 *pupperDist = upper; // tracker C73258
616 if (*plower < *pupperDist) {
617 *pupper = *pupperDist; // no iterations available for the team
618 goto end;
622 // Get the parallel loop chunk now (for thread)
623 // compute trip count for team's chunk
624 if (incr == 1) {
625 trip_count = *pupperDist - *plower + 1;
626 } else if (incr == -1) {
627 trip_count = *plower - *pupperDist + 1;
628 } else if (incr > 1) {
629 // upper-lower can exceed the limit of signed type
630 trip_count = (UT)(*pupperDist - *plower) / incr + 1;
631 } else {
632 KMP_DEBUG_ASSERT(incr != 0);
633 trip_count = (UT)(*plower - *pupperDist) / (-incr) + 1;
635 KMP_DEBUG_ASSERT(trip_count);
636 switch (schedule) {
637 case kmp_sch_static: {
638 if (trip_count <= nth) {
639 KMP_DEBUG_ASSERT(
640 __kmp_static == kmp_sch_static_greedy ||
641 __kmp_static ==
642 kmp_sch_static_balanced); // Unknown static scheduling type.
643 if (tid < trip_count)
644 *pupper = *plower = *plower + tid * incr;
645 else
646 *plower = *pupper + incr; // no iterations available
647 if (plastiter != NULL)
648 if (*plastiter != 0 && !(tid == trip_count - 1))
649 *plastiter = 0;
650 } else {
651 KMP_DEBUG_ASSERT(nth != 0);
652 if (__kmp_static == kmp_sch_static_balanced) {
653 UT chunkL = trip_count / nth;
654 UT extras = trip_count % nth;
655 *plower += incr * (tid * chunkL + (tid < extras ? tid : extras));
656 *pupper = *plower + chunkL * incr - (tid < extras ? 0 : incr);
657 if (plastiter != NULL)
658 if (*plastiter != 0 && !(tid == nth - 1))
659 *plastiter = 0;
660 } else {
661 T chunk_inc_count =
662 (trip_count / nth + ((trip_count % nth) ? 1 : 0)) * incr;
663 T upper = *pupperDist;
664 KMP_DEBUG_ASSERT(__kmp_static == kmp_sch_static_greedy);
665 // Unknown static scheduling type.
666 *plower += tid * chunk_inc_count;
667 *pupper = *plower + chunk_inc_count - incr;
668 if (incr > 0) {
669 if (*pupper < *plower)
670 *pupper = traits_t<T>::max_value;
671 if (plastiter != NULL)
672 if (*plastiter != 0 &&
673 !(*plower <= upper && *pupper > upper - incr))
674 *plastiter = 0;
675 if (*pupper > upper)
676 *pupper = upper; // tracker C73258
677 } else {
678 if (*pupper > *plower)
679 *pupper = traits_t<T>::min_value;
680 if (plastiter != NULL)
681 if (*plastiter != 0 &&
682 !(*plower >= upper && *pupper < upper - incr))
683 *plastiter = 0;
684 if (*pupper < upper)
685 *pupper = upper; // tracker C73258
689 break;
691 case kmp_sch_static_chunked: {
692 ST span;
693 if (chunk < 1)
694 chunk = 1;
695 span = chunk * incr;
696 *pstride = span * nth;
697 *plower = *plower + (span * tid);
698 *pupper = *plower + span - incr;
699 if (plastiter != NULL) {
700 KMP_DEBUG_ASSERT(chunk != 0);
701 if (*plastiter != 0 && !(tid == ((trip_count - 1) / (UT)chunk) % nth))
702 *plastiter = 0;
704 break;
706 default:
707 KMP_ASSERT2(0,
708 "__kmpc_dist_for_static_init: unknown loop scheduling type");
709 break;
712 end:;
713 #ifdef KMP_DEBUG
715 char *buff;
716 // create format specifiers before the debug output
717 buff = __kmp_str_format(
718 "__kmpc_dist_for_static_init: last=%%d lo=%%%s up=%%%s upDist=%%%s "
719 "stride=%%%s signed?<%s>\n",
720 traits_t<T>::spec, traits_t<T>::spec, traits_t<T>::spec,
721 traits_t<ST>::spec, traits_t<T>::spec);
722 KD_TRACE(100, (buff, *plastiter, *plower, *pupper, *pupperDist, *pstride));
723 __kmp_str_free(&buff);
725 #endif
726 KE_TRACE(10, ("__kmpc_dist_for_static_init: T#%d return\n", gtid));
727 #if OMPT_SUPPORT && OMPT_OPTIONAL
728 if (ompt_enabled.ompt_callback_work || ompt_enabled.ompt_callback_dispatch) {
729 ompt_team_info_t *team_info = __ompt_get_teaminfo(0, NULL);
730 ompt_task_info_t *task_info = __ompt_get_task_info_object(0);
731 if (ompt_enabled.ompt_callback_work) {
732 ompt_callbacks.ompt_callback(ompt_callback_work)(
733 ompt_work_distribute, ompt_scope_begin, &(team_info->parallel_data),
734 &(task_info->task_data), 0, codeptr);
736 if (ompt_enabled.ompt_callback_dispatch) {
737 ompt_data_t instance = ompt_data_none;
738 ompt_dispatch_chunk_t dispatch_chunk;
739 OMPT_GET_DISPATCH_CHUNK(dispatch_chunk, *plower, *pupperDist, incr);
740 instance.ptr = &dispatch_chunk;
741 ompt_callbacks.ompt_callback(ompt_callback_dispatch)(
742 &(team_info->parallel_data), &(task_info->task_data),
743 ompt_dispatch_distribute_chunk, instance);
746 #endif // OMPT_SUPPORT && OMPT_OPTIONAL
747 KMP_STATS_LOOP_END(OMP_distribute_iterations);
748 return;
751 template <typename T>
752 static void __kmp_team_static_init(ident_t *loc, kmp_int32 gtid,
753 kmp_int32 *p_last, T *p_lb, T *p_ub,
754 typename traits_t<T>::signed_t *p_st,
755 typename traits_t<T>::signed_t incr,
756 typename traits_t<T>::signed_t chunk) {
757 // The routine returns the first chunk distributed to the team and
758 // stride for next chunks calculation.
759 // Last iteration flag set for the team that will execute
760 // the last iteration of the loop.
761 // The routine is called for dist_schedule(static,chunk) only.
762 typedef typename traits_t<T>::unsigned_t UT;
763 typedef typename traits_t<T>::signed_t ST;
764 kmp_uint32 team_id;
765 kmp_uint32 nteams;
766 UT trip_count;
767 T lower;
768 T upper;
769 ST span;
770 kmp_team_t *team;
771 kmp_info_t *th;
773 KMP_DEBUG_ASSERT(p_last && p_lb && p_ub && p_st);
774 KE_TRACE(10, ("__kmp_team_static_init called (%d)\n", gtid));
775 __kmp_assert_valid_gtid(gtid);
776 #ifdef KMP_DEBUG
778 char *buff;
779 // create format specifiers before the debug output
780 buff = __kmp_str_format("__kmp_team_static_init enter: T#%%d liter=%%d "
781 "iter=(%%%s, %%%s, %%%s) chunk %%%s; signed?<%s>\n",
782 traits_t<T>::spec, traits_t<T>::spec,
783 traits_t<ST>::spec, traits_t<ST>::spec,
784 traits_t<T>::spec);
785 KD_TRACE(100, (buff, gtid, *p_last, *p_lb, *p_ub, *p_st, chunk));
786 __kmp_str_free(&buff);
788 #endif
790 lower = *p_lb;
791 upper = *p_ub;
792 if (__kmp_env_consistency_check) {
793 if (incr == 0) {
794 __kmp_error_construct(kmp_i18n_msg_CnsLoopIncrZeroProhibited, ct_pdo,
795 loc);
797 if (incr > 0 ? (upper < lower) : (lower < upper)) {
798 // The loop is illegal.
799 // Some zero-trip loops maintained by compiler, e.g.:
800 // for(i=10;i<0;++i) // lower >= upper - run-time check
801 // for(i=0;i>10;--i) // lower <= upper - run-time check
802 // for(i=0;i>10;++i) // incr > 0 - compile-time check
803 // for(i=10;i<0;--i) // incr < 0 - compile-time check
804 // Compiler does not check the following illegal loops:
805 // for(i=0;i<10;i+=incr) // where incr<0
806 // for(i=10;i>0;i-=incr) // where incr<0
807 __kmp_error_construct(kmp_i18n_msg_CnsLoopIncrIllegal, ct_pdo, loc);
810 th = __kmp_threads[gtid];
811 team = th->th.th_team;
812 KMP_DEBUG_ASSERT(th->th.th_teams_microtask); // we are in the teams construct
813 nteams = th->th.th_teams_size.nteams;
814 team_id = team->t.t_master_tid;
815 KMP_DEBUG_ASSERT(nteams == (kmp_uint32)team->t.t_parent->t.t_nproc);
817 // compute trip count
818 if (incr == 1) {
819 trip_count = upper - lower + 1;
820 } else if (incr == -1) {
821 trip_count = lower - upper + 1;
822 } else if (incr > 0) {
823 // upper-lower can exceed the limit of signed type
824 trip_count = (UT)(upper - lower) / incr + 1;
825 } else {
826 KMP_DEBUG_ASSERT(incr != 0);
827 trip_count = (UT)(lower - upper) / (-incr) + 1;
829 if (chunk < 1)
830 chunk = 1;
831 span = chunk * incr;
832 *p_st = span * nteams;
833 *p_lb = lower + (span * team_id);
834 *p_ub = *p_lb + span - incr;
835 if (p_last != NULL) {
836 KMP_DEBUG_ASSERT(chunk != 0);
837 *p_last = (team_id == ((trip_count - 1) / (UT)chunk) % nteams);
839 // Correct upper bound if needed
840 if (incr > 0) {
841 if (*p_ub < *p_lb) // overflow?
842 *p_ub = traits_t<T>::max_value;
843 if (*p_ub > upper)
844 *p_ub = upper; // tracker C73258
845 } else { // incr < 0
846 if (*p_ub > *p_lb)
847 *p_ub = traits_t<T>::min_value;
848 if (*p_ub < upper)
849 *p_ub = upper; // tracker C73258
851 #ifdef KMP_DEBUG
853 char *buff;
854 // create format specifiers before the debug output
855 buff =
856 __kmp_str_format("__kmp_team_static_init exit: T#%%d team%%u liter=%%d "
857 "iter=(%%%s, %%%s, %%%s) chunk %%%s\n",
858 traits_t<T>::spec, traits_t<T>::spec,
859 traits_t<ST>::spec, traits_t<ST>::spec);
860 KD_TRACE(100, (buff, gtid, team_id, *p_last, *p_lb, *p_ub, *p_st, chunk));
861 __kmp_str_free(&buff);
863 #endif
866 //------------------------------------------------------------------------------
867 extern "C" {
869 @ingroup WORK_SHARING
870 @param loc Source code location
871 @param gtid Global thread id of this thread
872 @param schedtype Scheduling type
873 @param plastiter Pointer to the "last iteration" flag
874 @param plower Pointer to the lower bound
875 @param pupper Pointer to the upper bound
876 @param pstride Pointer to the stride
877 @param incr Loop increment
878 @param chunk The chunk size
880 Each of the four functions here are identical apart from the argument types.
882 The functions compute the upper and lower bounds and stride to be used for the
883 set of iterations to be executed by the current thread from the statically
884 scheduled loop that is described by the initial values of the bounds, stride,
885 increment and chunk size.
889 void __kmpc_for_static_init_4(ident_t *loc, kmp_int32 gtid, kmp_int32 schedtype,
890 kmp_int32 *plastiter, kmp_int32 *plower,
891 kmp_int32 *pupper, kmp_int32 *pstride,
892 kmp_int32 incr, kmp_int32 chunk) {
893 __kmp_for_static_init<kmp_int32>(loc, gtid, schedtype, plastiter, plower,
894 pupper, pstride, incr, chunk
895 #if OMPT_SUPPORT && OMPT_OPTIONAL
897 OMPT_GET_RETURN_ADDRESS(0)
898 #endif
903 See @ref __kmpc_for_static_init_4
905 void __kmpc_for_static_init_4u(ident_t *loc, kmp_int32 gtid,
906 kmp_int32 schedtype, kmp_int32 *plastiter,
907 kmp_uint32 *plower, kmp_uint32 *pupper,
908 kmp_int32 *pstride, kmp_int32 incr,
909 kmp_int32 chunk) {
910 __kmp_for_static_init<kmp_uint32>(loc, gtid, schedtype, plastiter, plower,
911 pupper, pstride, incr, chunk
912 #if OMPT_SUPPORT && OMPT_OPTIONAL
914 OMPT_GET_RETURN_ADDRESS(0)
915 #endif
920 See @ref __kmpc_for_static_init_4
922 void __kmpc_for_static_init_8(ident_t *loc, kmp_int32 gtid, kmp_int32 schedtype,
923 kmp_int32 *plastiter, kmp_int64 *plower,
924 kmp_int64 *pupper, kmp_int64 *pstride,
925 kmp_int64 incr, kmp_int64 chunk) {
926 __kmp_for_static_init<kmp_int64>(loc, gtid, schedtype, plastiter, plower,
927 pupper, pstride, incr, chunk
928 #if OMPT_SUPPORT && OMPT_OPTIONAL
930 OMPT_GET_RETURN_ADDRESS(0)
931 #endif
936 See @ref __kmpc_for_static_init_4
938 void __kmpc_for_static_init_8u(ident_t *loc, kmp_int32 gtid,
939 kmp_int32 schedtype, kmp_int32 *plastiter,
940 kmp_uint64 *plower, kmp_uint64 *pupper,
941 kmp_int64 *pstride, kmp_int64 incr,
942 kmp_int64 chunk) {
943 __kmp_for_static_init<kmp_uint64>(loc, gtid, schedtype, plastiter, plower,
944 pupper, pstride, incr, chunk
945 #if OMPT_SUPPORT && OMPT_OPTIONAL
947 OMPT_GET_RETURN_ADDRESS(0)
948 #endif
955 #if OMPT_SUPPORT && OMPT_OPTIONAL
956 #define OMPT_CODEPTR_ARG , OMPT_GET_RETURN_ADDRESS(0)
957 #else
958 #define OMPT_CODEPTR_ARG
959 #endif
962 @ingroup WORK_SHARING
963 @param loc Source code location
964 @param gtid Global thread id of this thread
965 @param schedule Scheduling type for the parallel loop
966 @param plastiter Pointer to the "last iteration" flag
967 @param plower Pointer to the lower bound
968 @param pupper Pointer to the upper bound of loop chunk
969 @param pupperD Pointer to the upper bound of dist_chunk
970 @param pstride Pointer to the stride for parallel loop
971 @param incr Loop increment
972 @param chunk The chunk size for the parallel loop
974 Each of the four functions here are identical apart from the argument types.
976 The functions compute the upper and lower bounds and strides to be used for the
977 set of iterations to be executed by the current thread from the statically
978 scheduled loop that is described by the initial values of the bounds, strides,
979 increment and chunks for parallel loop and distribute constructs.
983 void __kmpc_dist_for_static_init_4(ident_t *loc, kmp_int32 gtid,
984 kmp_int32 schedule, kmp_int32 *plastiter,
985 kmp_int32 *plower, kmp_int32 *pupper,
986 kmp_int32 *pupperD, kmp_int32 *pstride,
987 kmp_int32 incr, kmp_int32 chunk) {
988 __kmp_dist_for_static_init<kmp_int32>(loc, gtid, schedule, plastiter, plower,
989 pupper, pupperD, pstride, incr,
990 chunk OMPT_CODEPTR_ARG);
994 See @ref __kmpc_dist_for_static_init_4
996 void __kmpc_dist_for_static_init_4u(ident_t *loc, kmp_int32 gtid,
997 kmp_int32 schedule, kmp_int32 *plastiter,
998 kmp_uint32 *plower, kmp_uint32 *pupper,
999 kmp_uint32 *pupperD, kmp_int32 *pstride,
1000 kmp_int32 incr, kmp_int32 chunk) {
1001 __kmp_dist_for_static_init<kmp_uint32>(loc, gtid, schedule, plastiter, plower,
1002 pupper, pupperD, pstride, incr,
1003 chunk OMPT_CODEPTR_ARG);
1007 See @ref __kmpc_dist_for_static_init_4
1009 void __kmpc_dist_for_static_init_8(ident_t *loc, kmp_int32 gtid,
1010 kmp_int32 schedule, kmp_int32 *plastiter,
1011 kmp_int64 *plower, kmp_int64 *pupper,
1012 kmp_int64 *pupperD, kmp_int64 *pstride,
1013 kmp_int64 incr, kmp_int64 chunk) {
1014 __kmp_dist_for_static_init<kmp_int64>(loc, gtid, schedule, plastiter, plower,
1015 pupper, pupperD, pstride, incr,
1016 chunk OMPT_CODEPTR_ARG);
1020 See @ref __kmpc_dist_for_static_init_4
1022 void __kmpc_dist_for_static_init_8u(ident_t *loc, kmp_int32 gtid,
1023 kmp_int32 schedule, kmp_int32 *plastiter,
1024 kmp_uint64 *plower, kmp_uint64 *pupper,
1025 kmp_uint64 *pupperD, kmp_int64 *pstride,
1026 kmp_int64 incr, kmp_int64 chunk) {
1027 __kmp_dist_for_static_init<kmp_uint64>(loc, gtid, schedule, plastiter, plower,
1028 pupper, pupperD, pstride, incr,
1029 chunk OMPT_CODEPTR_ARG);
1035 //------------------------------------------------------------------------------
1036 // Auxiliary routines for Distribute Parallel Loop construct implementation
1037 // Transfer call to template< type T >
1038 // __kmp_team_static_init( ident_t *loc, int gtid,
1039 // int *p_last, T *lb, T *ub, ST *st, ST incr, ST chunk )
1042 @ingroup WORK_SHARING
1044 @param loc Source location
1045 @param gtid Global thread id
1046 @param p_last pointer to last iteration flag
1047 @param p_lb pointer to Lower bound
1048 @param p_ub pointer to Upper bound
1049 @param p_st Step (or increment if you prefer)
1050 @param incr Loop increment
1051 @param chunk The chunk size to block with
1053 The functions compute the upper and lower bounds and stride to be used for the
1054 set of iterations to be executed by the current team from the statically
1055 scheduled loop that is described by the initial values of the bounds, stride,
1056 increment and chunk for the distribute construct as part of composite distribute
1057 parallel loop construct. These functions are all identical apart from the types
1058 of the arguments.
1061 void __kmpc_team_static_init_4(ident_t *loc, kmp_int32 gtid, kmp_int32 *p_last,
1062 kmp_int32 *p_lb, kmp_int32 *p_ub,
1063 kmp_int32 *p_st, kmp_int32 incr,
1064 kmp_int32 chunk) {
1065 KMP_DEBUG_ASSERT(__kmp_init_serial);
1066 __kmp_team_static_init<kmp_int32>(loc, gtid, p_last, p_lb, p_ub, p_st, incr,
1067 chunk);
1071 See @ref __kmpc_team_static_init_4
1073 void __kmpc_team_static_init_4u(ident_t *loc, kmp_int32 gtid, kmp_int32 *p_last,
1074 kmp_uint32 *p_lb, kmp_uint32 *p_ub,
1075 kmp_int32 *p_st, kmp_int32 incr,
1076 kmp_int32 chunk) {
1077 KMP_DEBUG_ASSERT(__kmp_init_serial);
1078 __kmp_team_static_init<kmp_uint32>(loc, gtid, p_last, p_lb, p_ub, p_st, incr,
1079 chunk);
1083 See @ref __kmpc_team_static_init_4
1085 void __kmpc_team_static_init_8(ident_t *loc, kmp_int32 gtid, kmp_int32 *p_last,
1086 kmp_int64 *p_lb, kmp_int64 *p_ub,
1087 kmp_int64 *p_st, kmp_int64 incr,
1088 kmp_int64 chunk) {
1089 KMP_DEBUG_ASSERT(__kmp_init_serial);
1090 __kmp_team_static_init<kmp_int64>(loc, gtid, p_last, p_lb, p_ub, p_st, incr,
1091 chunk);
1095 See @ref __kmpc_team_static_init_4
1097 void __kmpc_team_static_init_8u(ident_t *loc, kmp_int32 gtid, kmp_int32 *p_last,
1098 kmp_uint64 *p_lb, kmp_uint64 *p_ub,
1099 kmp_int64 *p_st, kmp_int64 incr,
1100 kmp_int64 chunk) {
1101 KMP_DEBUG_ASSERT(__kmp_init_serial);
1102 __kmp_team_static_init<kmp_uint64>(loc, gtid, p_last, p_lb, p_ub, p_st, incr,
1103 chunk);
1109 } // extern "C"