Run DCE after a LoopFlatten test to reduce spurious output [nfc]
[llvm-project.git] / openmp / runtime / src / kmp_sched.cpp
blob53182bef58732e2522a53f7f188bdf391b16f290
1 /*
2 * kmp_sched.cpp -- static scheduling -- iteration initialization
3 */
5 //===----------------------------------------------------------------------===//
6 //
7 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
8 // See https://llvm.org/LICENSE.txt for license information.
9 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
11 //===----------------------------------------------------------------------===//
13 /* Static scheduling initialization.
15 NOTE: team->t.t_nproc is a constant inside of any dispatch loop, however
16 it may change values between parallel regions. __kmp_max_nth
17 is the largest value __kmp_nth may take, 1 is the smallest. */
19 #include "kmp.h"
20 #include "kmp_error.h"
21 #include "kmp_i18n.h"
22 #include "kmp_itt.h"
23 #include "kmp_stats.h"
24 #include "kmp_str.h"
26 #if OMPT_SUPPORT
27 #include "ompt-specific.h"
28 #endif
30 #ifdef KMP_DEBUG
31 //-------------------------------------------------------------------------
32 // template for debug prints specification ( d, u, lld, llu )
33 char const *traits_t<int>::spec = "d";
34 char const *traits_t<unsigned int>::spec = "u";
35 char const *traits_t<long long>::spec = "lld";
36 char const *traits_t<unsigned long long>::spec = "llu";
37 char const *traits_t<long>::spec = "ld";
38 //-------------------------------------------------------------------------
39 #endif
41 #if KMP_STATS_ENABLED
42 #define KMP_STATS_LOOP_END(stat) \
43 { \
44 kmp_int64 t; \
45 kmp_int64 u = (kmp_int64)(*pupper); \
46 kmp_int64 l = (kmp_int64)(*plower); \
47 kmp_int64 i = (kmp_int64)incr; \
48 if (i == 1) { \
49 t = u - l + 1; \
50 } else if (i == -1) { \
51 t = l - u + 1; \
52 } else if (i > 0) { \
53 t = (u - l) / i + 1; \
54 } else { \
55 t = (l - u) / (-i) + 1; \
56 } \
57 KMP_COUNT_VALUE(stat, t); \
58 KMP_POP_PARTITIONED_TIMER(); \
60 #else
61 #define KMP_STATS_LOOP_END(stat) /* Nothing */
62 #endif
64 #if USE_ITT_BUILD || defined KMP_DEBUG
65 static ident_t loc_stub = {0, KMP_IDENT_KMPC, 0, 0, ";unknown;unknown;0;0;;"};
66 static inline void check_loc(ident_t *&loc) {
67 if (loc == NULL)
68 loc = &loc_stub; // may need to report location info to ittnotify
70 #endif
72 template <typename T>
73 static void __kmp_for_static_init(ident_t *loc, kmp_int32 global_tid,
74 kmp_int32 schedtype, kmp_int32 *plastiter,
75 T *plower, T *pupper,
76 typename traits_t<T>::signed_t *pstride,
77 typename traits_t<T>::signed_t incr,
78 typename traits_t<T>::signed_t chunk
79 #if OMPT_SUPPORT && OMPT_OPTIONAL
81 void *codeptr
82 #endif
83 ) {
84 KMP_COUNT_BLOCK(OMP_LOOP_STATIC);
85 KMP_PUSH_PARTITIONED_TIMER(OMP_loop_static);
86 KMP_PUSH_PARTITIONED_TIMER(OMP_loop_static_scheduling);
88 // Clear monotonic/nonmonotonic bits (ignore it)
89 schedtype = SCHEDULE_WITHOUT_MODIFIERS(schedtype);
91 typedef typename traits_t<T>::unsigned_t UT;
92 typedef typename traits_t<T>::signed_t ST;
93 /* this all has to be changed back to TID and such.. */
94 kmp_int32 gtid = global_tid;
95 kmp_uint32 tid;
96 kmp_uint32 nth;
97 UT trip_count;
98 kmp_team_t *team;
99 __kmp_assert_valid_gtid(gtid);
100 kmp_info_t *th = __kmp_threads[gtid];
102 #if OMPT_SUPPORT && OMPT_OPTIONAL
103 ompt_team_info_t *team_info = NULL;
104 ompt_task_info_t *task_info = NULL;
105 ompt_work_t ompt_work_type = ompt_work_loop;
107 static kmp_int8 warn = 0;
109 if (ompt_enabled.ompt_callback_work || ompt_enabled.ompt_callback_dispatch) {
110 // Only fully initialize variables needed by OMPT if OMPT is enabled.
111 team_info = __ompt_get_teaminfo(0, NULL);
112 task_info = __ompt_get_task_info_object(0);
113 // Determine workshare type
114 if (loc != NULL) {
115 if ((loc->flags & KMP_IDENT_WORK_LOOP) != 0) {
116 ompt_work_type = ompt_work_loop;
117 } else if ((loc->flags & KMP_IDENT_WORK_SECTIONS) != 0) {
118 ompt_work_type = ompt_work_sections;
119 } else if ((loc->flags & KMP_IDENT_WORK_DISTRIBUTE) != 0) {
120 ompt_work_type = ompt_work_distribute;
121 } else {
122 kmp_int8 bool_res =
123 KMP_COMPARE_AND_STORE_ACQ8(&warn, (kmp_int8)0, (kmp_int8)1);
124 if (bool_res)
125 KMP_WARNING(OmptOutdatedWorkshare);
127 KMP_DEBUG_ASSERT(ompt_work_type);
130 #endif
132 KMP_DEBUG_ASSERT(plastiter && plower && pupper && pstride);
133 KE_TRACE(10, ("__kmpc_for_static_init called (%d)\n", global_tid));
134 #ifdef KMP_DEBUG
136 char *buff;
137 // create format specifiers before the debug output
138 buff = __kmp_str_format(
139 "__kmpc_for_static_init: T#%%d sched=%%d liter=%%d iter=(%%%s,"
140 " %%%s, %%%s) incr=%%%s chunk=%%%s signed?<%s>\n",
141 traits_t<T>::spec, traits_t<T>::spec, traits_t<ST>::spec,
142 traits_t<ST>::spec, traits_t<ST>::spec, traits_t<T>::spec);
143 KD_TRACE(100, (buff, global_tid, schedtype, *plastiter, *plower, *pupper,
144 *pstride, incr, chunk));
145 __kmp_str_free(&buff);
147 #endif
149 if (__kmp_env_consistency_check) {
150 __kmp_push_workshare(global_tid, ct_pdo, loc);
151 if (incr == 0) {
152 __kmp_error_construct(kmp_i18n_msg_CnsLoopIncrZeroProhibited, ct_pdo,
153 loc);
156 /* special handling for zero-trip loops */
157 if (incr > 0 ? (*pupper < *plower) : (*plower < *pupper)) {
158 if (plastiter != NULL)
159 *plastiter = FALSE;
160 /* leave pupper and plower set to entire iteration space */
161 *pstride = incr; /* value should never be used */
162 // *plower = *pupper - incr;
163 // let compiler bypass the illegal loop (like for(i=1;i<10;i--))
164 // THE LINE COMMENTED ABOVE CAUSED shape2F/h_tests_1.f TO HAVE A FAILURE
165 // ON A ZERO-TRIP LOOP (lower=1, upper=0,stride=1) - JPH June 23, 2009.
166 #ifdef KMP_DEBUG
168 char *buff;
169 // create format specifiers before the debug output
170 buff = __kmp_str_format("__kmpc_for_static_init:(ZERO TRIP) liter=%%d "
171 "lower=%%%s upper=%%%s stride = %%%s "
172 "signed?<%s>, loc = %%s\n",
173 traits_t<T>::spec, traits_t<T>::spec,
174 traits_t<ST>::spec, traits_t<T>::spec);
175 check_loc(loc);
176 KD_TRACE(100,
177 (buff, *plastiter, *plower, *pupper, *pstride, loc->psource));
178 __kmp_str_free(&buff);
180 #endif
181 KE_TRACE(10, ("__kmpc_for_static_init: T#%d return\n", global_tid));
183 #if OMPT_SUPPORT && OMPT_OPTIONAL
184 if (ompt_enabled.ompt_callback_work) {
185 ompt_callbacks.ompt_callback(ompt_callback_work)(
186 ompt_work_type, ompt_scope_begin, &(team_info->parallel_data),
187 &(task_info->task_data), 0, codeptr);
189 #endif
190 KMP_STATS_LOOP_END(OMP_loop_static_iterations);
191 return;
194 // Although there are schedule enumerations above kmp_ord_upper which are not
195 // schedules for "distribute", the only ones which are useful are dynamic, so
196 // cannot be seen here, since this codepath is only executed for static
197 // schedules.
198 if (schedtype > kmp_ord_upper) {
199 // we are in DISTRIBUTE construct
200 schedtype += kmp_sch_static -
201 kmp_distribute_static; // AC: convert to usual schedule type
202 if (th->th.th_team->t.t_serialized > 1) {
203 tid = 0;
204 team = th->th.th_team;
205 } else {
206 tid = th->th.th_team->t.t_master_tid;
207 team = th->th.th_team->t.t_parent;
209 } else {
210 tid = __kmp_tid_from_gtid(global_tid);
211 team = th->th.th_team;
214 /* determine if "for" loop is an active worksharing construct */
215 if (team->t.t_serialized) {
216 /* serialized parallel, each thread executes whole iteration space */
217 if (plastiter != NULL)
218 *plastiter = TRUE;
219 /* leave pupper and plower set to entire iteration space */
220 *pstride =
221 (incr > 0) ? (*pupper - *plower + 1) : (-(*plower - *pupper + 1));
223 #ifdef KMP_DEBUG
225 char *buff;
226 // create format specifiers before the debug output
227 buff = __kmp_str_format("__kmpc_for_static_init: (serial) liter=%%d "
228 "lower=%%%s upper=%%%s stride = %%%s\n",
229 traits_t<T>::spec, traits_t<T>::spec,
230 traits_t<ST>::spec);
231 KD_TRACE(100, (buff, *plastiter, *plower, *pupper, *pstride));
232 __kmp_str_free(&buff);
234 #endif
235 KE_TRACE(10, ("__kmpc_for_static_init: T#%d return\n", global_tid));
237 #if OMPT_SUPPORT && OMPT_OPTIONAL
238 if (ompt_enabled.ompt_callback_work) {
239 ompt_callbacks.ompt_callback(ompt_callback_work)(
240 ompt_work_type, ompt_scope_begin, &(team_info->parallel_data),
241 &(task_info->task_data), *pstride, codeptr);
243 #endif
244 KMP_STATS_LOOP_END(OMP_loop_static_iterations);
245 return;
247 nth = team->t.t_nproc;
248 if (nth == 1) {
249 if (plastiter != NULL)
250 *plastiter = TRUE;
251 *pstride =
252 (incr > 0) ? (*pupper - *plower + 1) : (-(*plower - *pupper + 1));
253 #ifdef KMP_DEBUG
255 char *buff;
256 // create format specifiers before the debug output
257 buff = __kmp_str_format("__kmpc_for_static_init: (serial) liter=%%d "
258 "lower=%%%s upper=%%%s stride = %%%s\n",
259 traits_t<T>::spec, traits_t<T>::spec,
260 traits_t<ST>::spec);
261 KD_TRACE(100, (buff, *plastiter, *plower, *pupper, *pstride));
262 __kmp_str_free(&buff);
264 #endif
265 KE_TRACE(10, ("__kmpc_for_static_init: T#%d return\n", global_tid));
267 #if OMPT_SUPPORT && OMPT_OPTIONAL
268 if (ompt_enabled.ompt_callback_work) {
269 ompt_callbacks.ompt_callback(ompt_callback_work)(
270 ompt_work_type, ompt_scope_begin, &(team_info->parallel_data),
271 &(task_info->task_data), *pstride, codeptr);
273 #endif
274 KMP_STATS_LOOP_END(OMP_loop_static_iterations);
275 return;
278 /* compute trip count */
279 if (incr == 1) {
280 trip_count = *pupper - *plower + 1;
281 } else if (incr == -1) {
282 trip_count = *plower - *pupper + 1;
283 } else if (incr > 0) {
284 // upper-lower can exceed the limit of signed type
285 trip_count = (UT)(*pupper - *plower) / incr + 1;
286 } else {
287 trip_count = (UT)(*plower - *pupper) / (-incr) + 1;
290 #if KMP_STATS_ENABLED
291 if (KMP_MASTER_GTID(gtid)) {
292 KMP_COUNT_VALUE(OMP_loop_static_total_iterations, trip_count);
294 #endif
296 if (__kmp_env_consistency_check) {
297 /* tripcount overflow? */
298 if (trip_count == 0 && *pupper != *plower) {
299 __kmp_error_construct(kmp_i18n_msg_CnsIterationRangeTooLarge, ct_pdo,
300 loc);
304 /* compute remaining parameters */
305 switch (schedtype) {
306 case kmp_sch_static: {
307 if (trip_count < nth) {
308 KMP_DEBUG_ASSERT(
309 __kmp_static == kmp_sch_static_greedy ||
310 __kmp_static ==
311 kmp_sch_static_balanced); // Unknown static scheduling type.
312 if (tid < trip_count) {
313 *pupper = *plower = *plower + tid * incr;
314 } else {
315 // set bounds so non-active threads execute no iterations
316 *plower = *pupper + (incr > 0 ? 1 : -1);
318 if (plastiter != NULL)
319 *plastiter = (tid == trip_count - 1);
320 } else {
321 if (__kmp_static == kmp_sch_static_balanced) {
322 UT small_chunk = trip_count / nth;
323 UT extras = trip_count % nth;
324 *plower += incr * (tid * small_chunk + (tid < extras ? tid : extras));
325 *pupper = *plower + small_chunk * incr - (tid < extras ? 0 : incr);
326 if (plastiter != NULL)
327 *plastiter = (tid == nth - 1);
328 } else {
329 T big_chunk_inc_count =
330 (trip_count / nth + ((trip_count % nth) ? 1 : 0)) * incr;
331 T old_upper = *pupper;
333 KMP_DEBUG_ASSERT(__kmp_static == kmp_sch_static_greedy);
334 // Unknown static scheduling type.
336 *plower += tid * big_chunk_inc_count;
337 *pupper = *plower + big_chunk_inc_count - incr;
338 if (incr > 0) {
339 if (*pupper < *plower)
340 *pupper = traits_t<T>::max_value;
341 if (plastiter != NULL)
342 *plastiter = *plower <= old_upper && *pupper > old_upper - incr;
343 if (*pupper > old_upper)
344 *pupper = old_upper; // tracker C73258
345 } else {
346 if (*pupper > *plower)
347 *pupper = traits_t<T>::min_value;
348 if (plastiter != NULL)
349 *plastiter = *plower >= old_upper && *pupper < old_upper - incr;
350 if (*pupper < old_upper)
351 *pupper = old_upper; // tracker C73258
355 *pstride = trip_count;
356 break;
358 case kmp_sch_static_chunked: {
359 ST span;
360 UT nchunks;
361 if (chunk < 1)
362 chunk = 1;
363 else if ((UT)chunk > trip_count)
364 chunk = trip_count;
365 nchunks = (trip_count) / (UT)chunk + (trip_count % (UT)chunk ? 1 : 0);
366 span = chunk * incr;
367 if (nchunks < nth) {
368 *pstride = span * nchunks;
369 if (tid < nchunks) {
370 *plower = *plower + (span * tid);
371 *pupper = *plower + span - incr;
372 } else {
373 *plower = *pupper + (incr > 0 ? 1 : -1);
375 } else {
376 *pstride = span * nth;
377 *plower = *plower + (span * tid);
378 *pupper = *plower + span - incr;
380 if (plastiter != NULL)
381 *plastiter = (tid == (nchunks - 1) % nth);
382 break;
384 case kmp_sch_static_balanced_chunked: {
385 T old_upper = *pupper;
386 // round up to make sure the chunk is enough to cover all iterations
387 UT span = (trip_count + nth - 1) / nth;
389 // perform chunk adjustment
390 chunk = (span + chunk - 1) & ~(chunk - 1);
392 span = chunk * incr;
393 *plower = *plower + (span * tid);
394 *pupper = *plower + span - incr;
395 if (incr > 0) {
396 if (*pupper > old_upper)
397 *pupper = old_upper;
398 } else if (*pupper < old_upper)
399 *pupper = old_upper;
401 if (plastiter != NULL)
402 *plastiter = (tid == ((trip_count - 1) / (UT)chunk));
403 break;
405 default:
406 KMP_ASSERT2(0, "__kmpc_for_static_init: unknown scheduling type");
407 break;
410 #if USE_ITT_BUILD
411 // Report loop metadata
412 if (KMP_MASTER_TID(tid) && __itt_metadata_add_ptr &&
413 __kmp_forkjoin_frames_mode == 3 && th->th.th_teams_microtask == NULL &&
414 team->t.t_active_level == 1) {
415 kmp_uint64 cur_chunk = chunk;
416 check_loc(loc);
417 // Calculate chunk in case it was not specified; it is specified for
418 // kmp_sch_static_chunked
419 if (schedtype == kmp_sch_static) {
420 cur_chunk = trip_count / nth + ((trip_count % nth) ? 1 : 0);
422 // 0 - "static" schedule
423 __kmp_itt_metadata_loop(loc, 0, trip_count, cur_chunk);
425 #endif
426 #ifdef KMP_DEBUG
428 char *buff;
429 // create format specifiers before the debug output
430 buff = __kmp_str_format("__kmpc_for_static_init: liter=%%d lower=%%%s "
431 "upper=%%%s stride = %%%s signed?<%s>\n",
432 traits_t<T>::spec, traits_t<T>::spec,
433 traits_t<ST>::spec, traits_t<T>::spec);
434 KD_TRACE(100, (buff, *plastiter, *plower, *pupper, *pstride));
435 __kmp_str_free(&buff);
437 #endif
438 KE_TRACE(10, ("__kmpc_for_static_init: T#%d return\n", global_tid));
440 #if OMPT_SUPPORT && OMPT_OPTIONAL
441 if (ompt_enabled.ompt_callback_work) {
442 ompt_callbacks.ompt_callback(ompt_callback_work)(
443 ompt_work_type, ompt_scope_begin, &(team_info->parallel_data),
444 &(task_info->task_data), trip_count, codeptr);
446 if (ompt_enabled.ompt_callback_dispatch) {
447 ompt_dispatch_t dispatch_type;
448 ompt_data_t instance = ompt_data_none;
449 ompt_dispatch_chunk_t dispatch_chunk;
450 if (ompt_work_type == ompt_work_sections) {
451 dispatch_type = ompt_dispatch_section;
452 instance.ptr = codeptr;
453 } else {
454 OMPT_GET_DISPATCH_CHUNK(dispatch_chunk, *plower, *pupper, incr);
455 dispatch_type = (ompt_work_type == ompt_work_distribute)
456 ? ompt_dispatch_distribute_chunk
457 : ompt_dispatch_ws_loop_chunk;
458 instance.ptr = &dispatch_chunk;
460 ompt_callbacks.ompt_callback(ompt_callback_dispatch)(
461 &(team_info->parallel_data), &(task_info->task_data), dispatch_type,
462 instance);
464 #endif
466 KMP_STATS_LOOP_END(OMP_loop_static_iterations);
467 return;
470 template <typename T>
471 static void __kmp_dist_for_static_init(ident_t *loc, kmp_int32 gtid,
472 kmp_int32 schedule, kmp_int32 *plastiter,
473 T *plower, T *pupper, T *pupperDist,
474 typename traits_t<T>::signed_t *pstride,
475 typename traits_t<T>::signed_t incr,
476 typename traits_t<T>::signed_t chunk
477 #if OMPT_SUPPORT && OMPT_OPTIONAL
479 void *codeptr
480 #endif
482 KMP_COUNT_BLOCK(OMP_DISTRIBUTE);
483 KMP_PUSH_PARTITIONED_TIMER(OMP_distribute);
484 KMP_PUSH_PARTITIONED_TIMER(OMP_distribute_scheduling);
485 typedef typename traits_t<T>::unsigned_t UT;
486 typedef typename traits_t<T>::signed_t ST;
487 kmp_uint32 tid;
488 kmp_uint32 nth;
489 kmp_uint32 team_id;
490 kmp_uint32 nteams;
491 UT trip_count;
492 kmp_team_t *team;
493 kmp_info_t *th;
495 KMP_DEBUG_ASSERT(plastiter && plower && pupper && pupperDist && pstride);
496 KE_TRACE(10, ("__kmpc_dist_for_static_init called (%d)\n", gtid));
497 __kmp_assert_valid_gtid(gtid);
498 #ifdef KMP_DEBUG
500 char *buff;
501 // create format specifiers before the debug output
502 buff = __kmp_str_format(
503 "__kmpc_dist_for_static_init: T#%%d schedLoop=%%d liter=%%d "
504 "iter=(%%%s, %%%s, %%%s) chunk=%%%s signed?<%s>\n",
505 traits_t<T>::spec, traits_t<T>::spec, traits_t<ST>::spec,
506 traits_t<ST>::spec, traits_t<T>::spec);
507 KD_TRACE(100,
508 (buff, gtid, schedule, *plastiter, *plower, *pupper, incr, chunk));
509 __kmp_str_free(&buff);
511 #endif
513 if (__kmp_env_consistency_check) {
514 __kmp_push_workshare(gtid, ct_pdo, loc);
515 if (incr == 0) {
516 __kmp_error_construct(kmp_i18n_msg_CnsLoopIncrZeroProhibited, ct_pdo,
517 loc);
519 if (incr > 0 ? (*pupper < *plower) : (*plower < *pupper)) {
520 // The loop is illegal.
521 // Some zero-trip loops maintained by compiler, e.g.:
522 // for(i=10;i<0;++i) // lower >= upper - run-time check
523 // for(i=0;i>10;--i) // lower <= upper - run-time check
524 // for(i=0;i>10;++i) // incr > 0 - compile-time check
525 // for(i=10;i<0;--i) // incr < 0 - compile-time check
526 // Compiler does not check the following illegal loops:
527 // for(i=0;i<10;i+=incr) // where incr<0
528 // for(i=10;i>0;i-=incr) // where incr<0
529 __kmp_error_construct(kmp_i18n_msg_CnsLoopIncrIllegal, ct_pdo, loc);
532 tid = __kmp_tid_from_gtid(gtid);
533 th = __kmp_threads[gtid];
534 nth = th->th.th_team_nproc;
535 team = th->th.th_team;
536 KMP_DEBUG_ASSERT(th->th.th_teams_microtask); // we are in the teams construct
537 nteams = th->th.th_teams_size.nteams;
538 team_id = team->t.t_master_tid;
539 KMP_DEBUG_ASSERT(nteams == (kmp_uint32)team->t.t_parent->t.t_nproc);
541 // compute global trip count
542 if (incr == 1) {
543 trip_count = *pupper - *plower + 1;
544 } else if (incr == -1) {
545 trip_count = *plower - *pupper + 1;
546 } else if (incr > 0) {
547 // upper-lower can exceed the limit of signed type
548 trip_count = (UT)(*pupper - *plower) / incr + 1;
549 } else {
550 trip_count = (UT)(*plower - *pupper) / (-incr) + 1;
553 *pstride = *pupper - *plower; // just in case (can be unused)
554 if (trip_count <= nteams) {
555 KMP_DEBUG_ASSERT(
556 __kmp_static == kmp_sch_static_greedy ||
557 __kmp_static ==
558 kmp_sch_static_balanced); // Unknown static scheduling type.
559 // only primary threads of some teams get single iteration, other threads
560 // get nothing
561 if (team_id < trip_count && tid == 0) {
562 *pupper = *pupperDist = *plower = *plower + team_id * incr;
563 } else {
564 *pupperDist = *pupper;
565 *plower = *pupper + incr; // compiler should skip loop body
567 if (plastiter != NULL)
568 *plastiter = (tid == 0 && team_id == trip_count - 1);
569 } else {
570 // Get the team's chunk first (each team gets at most one chunk)
571 if (__kmp_static == kmp_sch_static_balanced) {
572 UT chunkD = trip_count / nteams;
573 UT extras = trip_count % nteams;
574 *plower +=
575 incr * (team_id * chunkD + (team_id < extras ? team_id : extras));
576 *pupperDist = *plower + chunkD * incr - (team_id < extras ? 0 : incr);
577 if (plastiter != NULL)
578 *plastiter = (team_id == nteams - 1);
579 } else {
580 T chunk_inc_count =
581 (trip_count / nteams + ((trip_count % nteams) ? 1 : 0)) * incr;
582 T upper = *pupper;
583 KMP_DEBUG_ASSERT(__kmp_static == kmp_sch_static_greedy);
584 // Unknown static scheduling type.
585 *plower += team_id * chunk_inc_count;
586 *pupperDist = *plower + chunk_inc_count - incr;
587 // Check/correct bounds if needed
588 if (incr > 0) {
589 if (*pupperDist < *plower)
590 *pupperDist = traits_t<T>::max_value;
591 if (plastiter != NULL)
592 *plastiter = *plower <= upper && *pupperDist > upper - incr;
593 if (*pupperDist > upper)
594 *pupperDist = upper; // tracker C73258
595 if (*plower > *pupperDist) {
596 *pupper = *pupperDist; // no iterations available for the team
597 goto end;
599 } else {
600 if (*pupperDist > *plower)
601 *pupperDist = traits_t<T>::min_value;
602 if (plastiter != NULL)
603 *plastiter = *plower >= upper && *pupperDist < upper - incr;
604 if (*pupperDist < upper)
605 *pupperDist = upper; // tracker C73258
606 if (*plower < *pupperDist) {
607 *pupper = *pupperDist; // no iterations available for the team
608 goto end;
612 // Get the parallel loop chunk now (for thread)
613 // compute trip count for team's chunk
614 if (incr == 1) {
615 trip_count = *pupperDist - *plower + 1;
616 } else if (incr == -1) {
617 trip_count = *plower - *pupperDist + 1;
618 } else if (incr > 1) {
619 // upper-lower can exceed the limit of signed type
620 trip_count = (UT)(*pupperDist - *plower) / incr + 1;
621 } else {
622 trip_count = (UT)(*plower - *pupperDist) / (-incr) + 1;
624 KMP_DEBUG_ASSERT(trip_count);
625 switch (schedule) {
626 case kmp_sch_static: {
627 if (trip_count <= nth) {
628 KMP_DEBUG_ASSERT(
629 __kmp_static == kmp_sch_static_greedy ||
630 __kmp_static ==
631 kmp_sch_static_balanced); // Unknown static scheduling type.
632 if (tid < trip_count)
633 *pupper = *plower = *plower + tid * incr;
634 else
635 *plower = *pupper + incr; // no iterations available
636 if (plastiter != NULL)
637 if (*plastiter != 0 && !(tid == trip_count - 1))
638 *plastiter = 0;
639 } else {
640 if (__kmp_static == kmp_sch_static_balanced) {
641 UT chunkL = trip_count / nth;
642 UT extras = trip_count % nth;
643 *plower += incr * (tid * chunkL + (tid < extras ? tid : extras));
644 *pupper = *plower + chunkL * incr - (tid < extras ? 0 : incr);
645 if (plastiter != NULL)
646 if (*plastiter != 0 && !(tid == nth - 1))
647 *plastiter = 0;
648 } else {
649 T chunk_inc_count =
650 (trip_count / nth + ((trip_count % nth) ? 1 : 0)) * incr;
651 T upper = *pupperDist;
652 KMP_DEBUG_ASSERT(__kmp_static == kmp_sch_static_greedy);
653 // Unknown static scheduling type.
654 *plower += tid * chunk_inc_count;
655 *pupper = *plower + chunk_inc_count - incr;
656 if (incr > 0) {
657 if (*pupper < *plower)
658 *pupper = traits_t<T>::max_value;
659 if (plastiter != NULL)
660 if (*plastiter != 0 &&
661 !(*plower <= upper && *pupper > upper - incr))
662 *plastiter = 0;
663 if (*pupper > upper)
664 *pupper = upper; // tracker C73258
665 } else {
666 if (*pupper > *plower)
667 *pupper = traits_t<T>::min_value;
668 if (plastiter != NULL)
669 if (*plastiter != 0 &&
670 !(*plower >= upper && *pupper < upper - incr))
671 *plastiter = 0;
672 if (*pupper < upper)
673 *pupper = upper; // tracker C73258
677 break;
679 case kmp_sch_static_chunked: {
680 ST span;
681 if (chunk < 1)
682 chunk = 1;
683 span = chunk * incr;
684 *pstride = span * nth;
685 *plower = *plower + (span * tid);
686 *pupper = *plower + span - incr;
687 if (plastiter != NULL)
688 if (*plastiter != 0 && !(tid == ((trip_count - 1) / (UT)chunk) % nth))
689 *plastiter = 0;
690 break;
692 default:
693 KMP_ASSERT2(0,
694 "__kmpc_dist_for_static_init: unknown loop scheduling type");
695 break;
698 end:;
699 #ifdef KMP_DEBUG
701 char *buff;
702 // create format specifiers before the debug output
703 buff = __kmp_str_format(
704 "__kmpc_dist_for_static_init: last=%%d lo=%%%s up=%%%s upDist=%%%s "
705 "stride=%%%s signed?<%s>\n",
706 traits_t<T>::spec, traits_t<T>::spec, traits_t<T>::spec,
707 traits_t<ST>::spec, traits_t<T>::spec);
708 KD_TRACE(100, (buff, *plastiter, *plower, *pupper, *pupperDist, *pstride));
709 __kmp_str_free(&buff);
711 #endif
712 KE_TRACE(10, ("__kmpc_dist_for_static_init: T#%d return\n", gtid));
713 #if OMPT_SUPPORT && OMPT_OPTIONAL
714 if (ompt_enabled.ompt_callback_work || ompt_enabled.ompt_callback_dispatch) {
715 ompt_team_info_t *team_info = __ompt_get_teaminfo(0, NULL);
716 ompt_task_info_t *task_info = __ompt_get_task_info_object(0);
717 if (ompt_enabled.ompt_callback_work) {
718 ompt_callbacks.ompt_callback(ompt_callback_work)(
719 ompt_work_distribute, ompt_scope_begin, &(team_info->parallel_data),
720 &(task_info->task_data), 0, codeptr);
722 if (ompt_enabled.ompt_callback_dispatch) {
723 ompt_data_t instance = ompt_data_none;
724 ompt_dispatch_chunk_t dispatch_chunk;
725 OMPT_GET_DISPATCH_CHUNK(dispatch_chunk, *plower, *pupperDist, incr);
726 instance.ptr = &dispatch_chunk;
727 ompt_callbacks.ompt_callback(ompt_callback_dispatch)(
728 &(team_info->parallel_data), &(task_info->task_data),
729 ompt_dispatch_distribute_chunk, instance);
732 #endif // OMPT_SUPPORT && OMPT_OPTIONAL
733 KMP_STATS_LOOP_END(OMP_distribute_iterations);
734 return;
737 template <typename T>
738 static void __kmp_team_static_init(ident_t *loc, kmp_int32 gtid,
739 kmp_int32 *p_last, T *p_lb, T *p_ub,
740 typename traits_t<T>::signed_t *p_st,
741 typename traits_t<T>::signed_t incr,
742 typename traits_t<T>::signed_t chunk) {
743 // The routine returns the first chunk distributed to the team and
744 // stride for next chunks calculation.
745 // Last iteration flag set for the team that will execute
746 // the last iteration of the loop.
747 // The routine is called for dist_schedule(static,chunk) only.
748 typedef typename traits_t<T>::unsigned_t UT;
749 typedef typename traits_t<T>::signed_t ST;
750 kmp_uint32 team_id;
751 kmp_uint32 nteams;
752 UT trip_count;
753 T lower;
754 T upper;
755 ST span;
756 kmp_team_t *team;
757 kmp_info_t *th;
759 KMP_DEBUG_ASSERT(p_last && p_lb && p_ub && p_st);
760 KE_TRACE(10, ("__kmp_team_static_init called (%d)\n", gtid));
761 __kmp_assert_valid_gtid(gtid);
762 #ifdef KMP_DEBUG
764 char *buff;
765 // create format specifiers before the debug output
766 buff = __kmp_str_format("__kmp_team_static_init enter: T#%%d liter=%%d "
767 "iter=(%%%s, %%%s, %%%s) chunk %%%s; signed?<%s>\n",
768 traits_t<T>::spec, traits_t<T>::spec,
769 traits_t<ST>::spec, traits_t<ST>::spec,
770 traits_t<T>::spec);
771 KD_TRACE(100, (buff, gtid, *p_last, *p_lb, *p_ub, *p_st, chunk));
772 __kmp_str_free(&buff);
774 #endif
776 lower = *p_lb;
777 upper = *p_ub;
778 if (__kmp_env_consistency_check) {
779 if (incr == 0) {
780 __kmp_error_construct(kmp_i18n_msg_CnsLoopIncrZeroProhibited, ct_pdo,
781 loc);
783 if (incr > 0 ? (upper < lower) : (lower < upper)) {
784 // The loop is illegal.
785 // Some zero-trip loops maintained by compiler, e.g.:
786 // for(i=10;i<0;++i) // lower >= upper - run-time check
787 // for(i=0;i>10;--i) // lower <= upper - run-time check
788 // for(i=0;i>10;++i) // incr > 0 - compile-time check
789 // for(i=10;i<0;--i) // incr < 0 - compile-time check
790 // Compiler does not check the following illegal loops:
791 // for(i=0;i<10;i+=incr) // where incr<0
792 // for(i=10;i>0;i-=incr) // where incr<0
793 __kmp_error_construct(kmp_i18n_msg_CnsLoopIncrIllegal, ct_pdo, loc);
796 th = __kmp_threads[gtid];
797 team = th->th.th_team;
798 KMP_DEBUG_ASSERT(th->th.th_teams_microtask); // we are in the teams construct
799 nteams = th->th.th_teams_size.nteams;
800 team_id = team->t.t_master_tid;
801 KMP_DEBUG_ASSERT(nteams == (kmp_uint32)team->t.t_parent->t.t_nproc);
803 // compute trip count
804 if (incr == 1) {
805 trip_count = upper - lower + 1;
806 } else if (incr == -1) {
807 trip_count = lower - upper + 1;
808 } else if (incr > 0) {
809 // upper-lower can exceed the limit of signed type
810 trip_count = (UT)(upper - lower) / incr + 1;
811 } else {
812 trip_count = (UT)(lower - upper) / (-incr) + 1;
814 if (chunk < 1)
815 chunk = 1;
816 span = chunk * incr;
817 *p_st = span * nteams;
818 *p_lb = lower + (span * team_id);
819 *p_ub = *p_lb + span - incr;
820 if (p_last != NULL)
821 *p_last = (team_id == ((trip_count - 1) / (UT)chunk) % nteams);
822 // Correct upper bound if needed
823 if (incr > 0) {
824 if (*p_ub < *p_lb) // overflow?
825 *p_ub = traits_t<T>::max_value;
826 if (*p_ub > upper)
827 *p_ub = upper; // tracker C73258
828 } else { // incr < 0
829 if (*p_ub > *p_lb)
830 *p_ub = traits_t<T>::min_value;
831 if (*p_ub < upper)
832 *p_ub = upper; // tracker C73258
834 #ifdef KMP_DEBUG
836 char *buff;
837 // create format specifiers before the debug output
838 buff =
839 __kmp_str_format("__kmp_team_static_init exit: T#%%d team%%u liter=%%d "
840 "iter=(%%%s, %%%s, %%%s) chunk %%%s\n",
841 traits_t<T>::spec, traits_t<T>::spec,
842 traits_t<ST>::spec, traits_t<ST>::spec);
843 KD_TRACE(100, (buff, gtid, team_id, *p_last, *p_lb, *p_ub, *p_st, chunk));
844 __kmp_str_free(&buff);
846 #endif
849 //------------------------------------------------------------------------------
850 extern "C" {
852 @ingroup WORK_SHARING
853 @param loc Source code location
854 @param gtid Global thread id of this thread
855 @param schedtype Scheduling type
856 @param plastiter Pointer to the "last iteration" flag
857 @param plower Pointer to the lower bound
858 @param pupper Pointer to the upper bound
859 @param pstride Pointer to the stride
860 @param incr Loop increment
861 @param chunk The chunk size
863 Each of the four functions here are identical apart from the argument types.
865 The functions compute the upper and lower bounds and stride to be used for the
866 set of iterations to be executed by the current thread from the statically
867 scheduled loop that is described by the initial values of the bounds, stride,
868 increment and chunk size.
872 void __kmpc_for_static_init_4(ident_t *loc, kmp_int32 gtid, kmp_int32 schedtype,
873 kmp_int32 *plastiter, kmp_int32 *plower,
874 kmp_int32 *pupper, kmp_int32 *pstride,
875 kmp_int32 incr, kmp_int32 chunk) {
876 __kmp_for_static_init<kmp_int32>(loc, gtid, schedtype, plastiter, plower,
877 pupper, pstride, incr, chunk
878 #if OMPT_SUPPORT && OMPT_OPTIONAL
880 OMPT_GET_RETURN_ADDRESS(0)
881 #endif
886 See @ref __kmpc_for_static_init_4
888 void __kmpc_for_static_init_4u(ident_t *loc, kmp_int32 gtid,
889 kmp_int32 schedtype, kmp_int32 *plastiter,
890 kmp_uint32 *plower, kmp_uint32 *pupper,
891 kmp_int32 *pstride, kmp_int32 incr,
892 kmp_int32 chunk) {
893 __kmp_for_static_init<kmp_uint32>(loc, gtid, schedtype, plastiter, plower,
894 pupper, pstride, incr, chunk
895 #if OMPT_SUPPORT && OMPT_OPTIONAL
897 OMPT_GET_RETURN_ADDRESS(0)
898 #endif
903 See @ref __kmpc_for_static_init_4
905 void __kmpc_for_static_init_8(ident_t *loc, kmp_int32 gtid, kmp_int32 schedtype,
906 kmp_int32 *plastiter, kmp_int64 *plower,
907 kmp_int64 *pupper, kmp_int64 *pstride,
908 kmp_int64 incr, kmp_int64 chunk) {
909 __kmp_for_static_init<kmp_int64>(loc, gtid, schedtype, plastiter, plower,
910 pupper, pstride, incr, chunk
911 #if OMPT_SUPPORT && OMPT_OPTIONAL
913 OMPT_GET_RETURN_ADDRESS(0)
914 #endif
919 See @ref __kmpc_for_static_init_4
921 void __kmpc_for_static_init_8u(ident_t *loc, kmp_int32 gtid,
922 kmp_int32 schedtype, kmp_int32 *plastiter,
923 kmp_uint64 *plower, kmp_uint64 *pupper,
924 kmp_int64 *pstride, kmp_int64 incr,
925 kmp_int64 chunk) {
926 __kmp_for_static_init<kmp_uint64>(loc, gtid, schedtype, plastiter, plower,
927 pupper, pstride, incr, chunk
928 #if OMPT_SUPPORT && OMPT_OPTIONAL
930 OMPT_GET_RETURN_ADDRESS(0)
931 #endif
938 #if OMPT_SUPPORT && OMPT_OPTIONAL
939 #define OMPT_CODEPTR_ARG , OMPT_GET_RETURN_ADDRESS(0)
940 #else
941 #define OMPT_CODEPTR_ARG
942 #endif
945 @ingroup WORK_SHARING
946 @param loc Source code location
947 @param gtid Global thread id of this thread
948 @param schedule Scheduling type for the parallel loop
949 @param plastiter Pointer to the "last iteration" flag
950 @param plower Pointer to the lower bound
951 @param pupper Pointer to the upper bound of loop chunk
952 @param pupperD Pointer to the upper bound of dist_chunk
953 @param pstride Pointer to the stride for parallel loop
954 @param incr Loop increment
955 @param chunk The chunk size for the parallel loop
957 Each of the four functions here are identical apart from the argument types.
959 The functions compute the upper and lower bounds and strides to be used for the
960 set of iterations to be executed by the current thread from the statically
961 scheduled loop that is described by the initial values of the bounds, strides,
962 increment and chunks for parallel loop and distribute constructs.
966 void __kmpc_dist_for_static_init_4(ident_t *loc, kmp_int32 gtid,
967 kmp_int32 schedule, kmp_int32 *plastiter,
968 kmp_int32 *plower, kmp_int32 *pupper,
969 kmp_int32 *pupperD, kmp_int32 *pstride,
970 kmp_int32 incr, kmp_int32 chunk) {
971 __kmp_dist_for_static_init<kmp_int32>(loc, gtid, schedule, plastiter, plower,
972 pupper, pupperD, pstride, incr,
973 chunk OMPT_CODEPTR_ARG);
977 See @ref __kmpc_dist_for_static_init_4
979 void __kmpc_dist_for_static_init_4u(ident_t *loc, kmp_int32 gtid,
980 kmp_int32 schedule, kmp_int32 *plastiter,
981 kmp_uint32 *plower, kmp_uint32 *pupper,
982 kmp_uint32 *pupperD, kmp_int32 *pstride,
983 kmp_int32 incr, kmp_int32 chunk) {
984 __kmp_dist_for_static_init<kmp_uint32>(loc, gtid, schedule, plastiter, plower,
985 pupper, pupperD, pstride, incr,
986 chunk OMPT_CODEPTR_ARG);
990 See @ref __kmpc_dist_for_static_init_4
992 void __kmpc_dist_for_static_init_8(ident_t *loc, kmp_int32 gtid,
993 kmp_int32 schedule, kmp_int32 *plastiter,
994 kmp_int64 *plower, kmp_int64 *pupper,
995 kmp_int64 *pupperD, kmp_int64 *pstride,
996 kmp_int64 incr, kmp_int64 chunk) {
997 __kmp_dist_for_static_init<kmp_int64>(loc, gtid, schedule, plastiter, plower,
998 pupper, pupperD, pstride, incr,
999 chunk OMPT_CODEPTR_ARG);
1003 See @ref __kmpc_dist_for_static_init_4
1005 void __kmpc_dist_for_static_init_8u(ident_t *loc, kmp_int32 gtid,
1006 kmp_int32 schedule, kmp_int32 *plastiter,
1007 kmp_uint64 *plower, kmp_uint64 *pupper,
1008 kmp_uint64 *pupperD, kmp_int64 *pstride,
1009 kmp_int64 incr, kmp_int64 chunk) {
1010 __kmp_dist_for_static_init<kmp_uint64>(loc, gtid, schedule, plastiter, plower,
1011 pupper, pupperD, pstride, incr,
1012 chunk OMPT_CODEPTR_ARG);
1018 //------------------------------------------------------------------------------
1019 // Auxiliary routines for Distribute Parallel Loop construct implementation
1020 // Transfer call to template< type T >
1021 // __kmp_team_static_init( ident_t *loc, int gtid,
1022 // int *p_last, T *lb, T *ub, ST *st, ST incr, ST chunk )
1025 @ingroup WORK_SHARING
1027 @param loc Source location
1028 @param gtid Global thread id
1029 @param p_last pointer to last iteration flag
1030 @param p_lb pointer to Lower bound
1031 @param p_ub pointer to Upper bound
1032 @param p_st Step (or increment if you prefer)
1033 @param incr Loop increment
1034 @param chunk The chunk size to block with
1036 The functions compute the upper and lower bounds and stride to be used for the
1037 set of iterations to be executed by the current team from the statically
1038 scheduled loop that is described by the initial values of the bounds, stride,
1039 increment and chunk for the distribute construct as part of composite distribute
1040 parallel loop construct. These functions are all identical apart from the types
1041 of the arguments.
1044 void __kmpc_team_static_init_4(ident_t *loc, kmp_int32 gtid, kmp_int32 *p_last,
1045 kmp_int32 *p_lb, kmp_int32 *p_ub,
1046 kmp_int32 *p_st, kmp_int32 incr,
1047 kmp_int32 chunk) {
1048 KMP_DEBUG_ASSERT(__kmp_init_serial);
1049 __kmp_team_static_init<kmp_int32>(loc, gtid, p_last, p_lb, p_ub, p_st, incr,
1050 chunk);
1054 See @ref __kmpc_team_static_init_4
1056 void __kmpc_team_static_init_4u(ident_t *loc, kmp_int32 gtid, kmp_int32 *p_last,
1057 kmp_uint32 *p_lb, kmp_uint32 *p_ub,
1058 kmp_int32 *p_st, kmp_int32 incr,
1059 kmp_int32 chunk) {
1060 KMP_DEBUG_ASSERT(__kmp_init_serial);
1061 __kmp_team_static_init<kmp_uint32>(loc, gtid, p_last, p_lb, p_ub, p_st, incr,
1062 chunk);
1066 See @ref __kmpc_team_static_init_4
1068 void __kmpc_team_static_init_8(ident_t *loc, kmp_int32 gtid, kmp_int32 *p_last,
1069 kmp_int64 *p_lb, kmp_int64 *p_ub,
1070 kmp_int64 *p_st, kmp_int64 incr,
1071 kmp_int64 chunk) {
1072 KMP_DEBUG_ASSERT(__kmp_init_serial);
1073 __kmp_team_static_init<kmp_int64>(loc, gtid, p_last, p_lb, p_ub, p_st, incr,
1074 chunk);
1078 See @ref __kmpc_team_static_init_4
1080 void __kmpc_team_static_init_8u(ident_t *loc, kmp_int32 gtid, kmp_int32 *p_last,
1081 kmp_uint64 *p_lb, kmp_uint64 *p_ub,
1082 kmp_int64 *p_st, kmp_int64 incr,
1083 kmp_int64 chunk) {
1084 KMP_DEBUG_ASSERT(__kmp_init_serial);
1085 __kmp_team_static_init<kmp_uint64>(loc, gtid, p_last, p_lb, p_ub, p_st, incr,
1086 chunk);
1092 } // extern "C"