2 * kmp_csupport.cpp -- kfront linkage support for OpenMP.
5 //===----------------------------------------------------------------------===//
7 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
8 // See https://llvm.org/LICENSE.txt for license information.
9 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
11 //===----------------------------------------------------------------------===//
14 #include "omp.h" /* extern "C" declarations of user-visible routines */
16 #include "kmp_error.h"
20 #include "kmp_stats.h"
21 #include "ompt-specific.h"
23 #define MAX_MESSAGE 512
25 // flags will be used in future, e.g. to implement openmp_strict library
29 * @ingroup STARTUP_SHUTDOWN
30 * @param loc in source location information
31 * @param flags in for future use (currently ignored)
33 * Initialize the runtime library. This call is optional; if it is not made then
34 * it will be implicitly called by attempts to use other library functions.
36 void __kmpc_begin(ident_t
*loc
, kmp_int32 flags
) {
37 // By default __kmpc_begin() is no-op.
39 if ((env
= getenv("KMP_INITIAL_THREAD_BIND")) != NULL
&&
40 __kmp_str_match_true(env
)) {
41 __kmp_middle_initialize();
42 __kmp_assign_root_init_mask();
43 KC_TRACE(10, ("__kmpc_begin: middle initialization called\n"));
44 } else if (__kmp_ignore_mppbeg() == FALSE
) {
45 // By default __kmp_ignore_mppbeg() returns TRUE.
46 __kmp_internal_begin();
47 KC_TRACE(10, ("__kmpc_begin: called\n"));
52 * @ingroup STARTUP_SHUTDOWN
53 * @param loc source location information
55 * Shutdown the runtime library. This is also optional, and even if called will
56 * not do anything unless the `KMP_IGNORE_MPPEND` environment variable is set to
59 void __kmpc_end(ident_t
*loc
) {
60 // By default, __kmp_ignore_mppend() returns TRUE which makes __kmpc_end()
61 // call no-op. However, this can be overridden with KMP_IGNORE_MPPEND
62 // environment variable. If KMP_IGNORE_MPPEND is 0, __kmp_ignore_mppend()
63 // returns FALSE and __kmpc_end() will unregister this root (it can cause
64 // library shut down).
65 if (__kmp_ignore_mppend() == FALSE
) {
66 KC_TRACE(10, ("__kmpc_end: called\n"));
67 KA_TRACE(30, ("__kmpc_end\n"));
69 __kmp_internal_end_thread(-1);
71 #if KMP_OS_WINDOWS && OMPT_SUPPORT
72 // Normal exit process on Windows does not allow worker threads of the final
73 // parallel region to finish reporting their events, so shutting down the
74 // library here fixes the issue at least for the cases where __kmpc_end() is
76 if (ompt_enabled
.enabled
)
77 __kmp_internal_end_library(__kmp_gtid_get_specific());
82 @ingroup THREAD_STATES
83 @param loc Source location information.
84 @return The global thread index of the active thread.
86 This function can be called in any context.
88 If the runtime has ony been entered at the outermost level from a
89 single (necessarily non-OpenMP<sup>*</sup>) thread, then the thread number is
90 that which would be returned by omp_get_thread_num() in the outermost
91 active parallel construct. (Or zero if there is no active parallel
92 construct, since the primary thread is necessarily thread zero).
94 If multiple non-OpenMP threads all enter an OpenMP construct then this
95 will be a unique thread identifier among all the threads created by
96 the OpenMP runtime (but the value cannot be defined in terms of
97 OpenMP thread ids returned by omp_get_thread_num()).
99 kmp_int32
__kmpc_global_thread_num(ident_t
*loc
) {
100 kmp_int32 gtid
= __kmp_entry_gtid();
102 KC_TRACE(10, ("__kmpc_global_thread_num: T#%d\n", gtid
));
108 @ingroup THREAD_STATES
109 @param loc Source location information.
110 @return The number of threads under control of the OpenMP<sup>*</sup> runtime
112 This function can be called in any context.
113 It returns the total number of threads under the control of the OpenMP runtime.
114 That is not a number that can be determined by any OpenMP standard calls, since
115 the library may be called from more than one non-OpenMP thread, and this
116 reflects the total over all such calls. Similarly the runtime maintains
117 underlying threads even when they are not active (since the cost of creating
118 and destroying OS threads is high), this call counts all such threads even if
119 they are not waiting for work.
121 kmp_int32
__kmpc_global_num_threads(ident_t
*loc
) {
123 ("__kmpc_global_num_threads: num_threads = %d\n", __kmp_all_nth
));
125 return TCR_4(__kmp_all_nth
);
129 @ingroup THREAD_STATES
130 @param loc Source location information.
131 @return The thread number of the calling thread in the innermost active parallel
134 kmp_int32
__kmpc_bound_thread_num(ident_t
*loc
) {
135 KC_TRACE(10, ("__kmpc_bound_thread_num: called\n"));
136 return __kmp_tid_from_gtid(__kmp_entry_gtid());
140 @ingroup THREAD_STATES
141 @param loc Source location information.
142 @return The number of threads in the innermost active parallel construct.
144 kmp_int32
__kmpc_bound_num_threads(ident_t
*loc
) {
145 KC_TRACE(10, ("__kmpc_bound_num_threads: called\n"));
147 return __kmp_entry_thread()->th
.th_team
->t
.t_nproc
;
151 * @ingroup DEPRECATED
152 * @param loc location description
154 * This function need not be called. It always returns TRUE.
156 kmp_int32
__kmpc_ok_to_fork(ident_t
*loc
) {
167 if (__kmp_par_range
== 0) {
170 semi2
= loc
->psource
;
174 semi2
= strchr(semi2
, ';');
178 semi2
= strchr(semi2
+ 1, ';');
182 if (__kmp_par_range_filename
[0]) {
183 const char *name
= semi2
- 1;
184 while ((name
> loc
->psource
) && (*name
!= '/') && (*name
!= ';')) {
187 if ((*name
== '/') || (*name
== ';')) {
190 if (strncmp(__kmp_par_range_filename
, name
, semi2
- name
)) {
191 return __kmp_par_range
< 0;
194 semi3
= strchr(semi2
+ 1, ';');
195 if (__kmp_par_range_routine
[0]) {
196 if ((semi3
!= NULL
) && (semi3
> semi2
) &&
197 (strncmp(__kmp_par_range_routine
, semi2
+ 1, semi3
- semi2
- 1))) {
198 return __kmp_par_range
< 0;
201 if (KMP_SSCANF(semi3
+ 1, "%d", &line_no
) == 1) {
202 if ((line_no
>= __kmp_par_range_lb
) && (line_no
<= __kmp_par_range_ub
)) {
203 return __kmp_par_range
> 0;
205 return __kmp_par_range
< 0;
209 #endif /* KMP_DEBUG */
213 @ingroup THREAD_STATES
214 @param loc Source location information.
215 @return 1 if this thread is executing inside an active parallel region, zero if
218 kmp_int32
__kmpc_in_parallel(ident_t
*loc
) {
219 return __kmp_entry_thread()->th
.th_root
->r
.r_active
;
224 @param loc source location information
225 @param global_tid global thread number
226 @param num_threads number of threads requested for this parallel construct
228 Set the number of threads to be used by the next fork spawned by this thread.
229 This call is only required if the parallel construct has a `num_threads` clause.
231 void __kmpc_push_num_threads(ident_t
*loc
, kmp_int32 global_tid
,
232 kmp_int32 num_threads
) {
233 KA_TRACE(20, ("__kmpc_push_num_threads: enter T#%d num_threads=%d\n",
234 global_tid
, num_threads
));
235 __kmp_assert_valid_gtid(global_tid
);
236 __kmp_push_num_threads(loc
, global_tid
, num_threads
);
239 void __kmpc_pop_num_threads(ident_t
*loc
, kmp_int32 global_tid
) {
240 KA_TRACE(20, ("__kmpc_pop_num_threads: enter\n"));
241 /* the num_threads are automatically popped */
244 void __kmpc_push_proc_bind(ident_t
*loc
, kmp_int32 global_tid
,
245 kmp_int32 proc_bind
) {
246 KA_TRACE(20, ("__kmpc_push_proc_bind: enter T#%d proc_bind=%d\n", global_tid
,
248 __kmp_assert_valid_gtid(global_tid
);
249 __kmp_push_proc_bind(loc
, global_tid
, (kmp_proc_bind_t
)proc_bind
);
254 @param loc source location information
255 @param argc total number of arguments in the ellipsis
256 @param microtask pointer to callback routine consisting of outlined parallel
258 @param ... pointers to shared variables that aren't global
260 Do the actual fork and call the microtask in the relevant number of threads.
262 void __kmpc_fork_call(ident_t
*loc
, kmp_int32 argc
, kmpc_micro microtask
, ...) {
263 int gtid
= __kmp_entry_gtid();
265 #if (KMP_STATS_ENABLED)
266 // If we were in a serial region, then stop the serial timer, record
267 // the event, and start parallel region timer
268 stats_state_e previous_state
= KMP_GET_THREAD_STATE();
269 if (previous_state
== stats_state_e::SERIAL_REGION
) {
270 KMP_EXCHANGE_PARTITIONED_TIMER(OMP_parallel_overhead
);
272 KMP_PUSH_PARTITIONED_TIMER(OMP_parallel_overhead
);
274 int inParallel
= __kmpc_in_parallel(loc
);
276 KMP_COUNT_BLOCK(OMP_NESTED_PARALLEL
);
278 KMP_COUNT_BLOCK(OMP_PARALLEL
);
282 // maybe to save thr_state is enough here
285 va_start(ap
, microtask
);
288 ompt_frame_t
*ompt_frame
;
289 if (ompt_enabled
.enabled
) {
290 kmp_info_t
*master_th
= __kmp_threads
[gtid
];
291 ompt_frame
= &master_th
->th
.th_current_task
->ompt_task_info
.frame
;
292 ompt_frame
->enter_frame
.ptr
= OMPT_GET_FRAME_ADDRESS(0);
294 OMPT_STORE_RETURN_ADDRESS(gtid
);
297 #if INCLUDE_SSC_MARKS
300 __kmp_fork_call(loc
, gtid
, fork_context_intel
, argc
,
301 VOLATILE_CAST(microtask_t
) microtask
, // "wrapped" task
302 VOLATILE_CAST(launch_t
) __kmp_invoke_task_func
,
304 #if INCLUDE_SSC_MARKS
307 __kmp_join_call(loc
, gtid
317 if (ompt_enabled
.enabled
) {
318 ompt_frame
->enter_frame
= ompt_data_none
;
323 #if KMP_STATS_ENABLED
324 if (previous_state
== stats_state_e::SERIAL_REGION
) {
325 KMP_EXCHANGE_PARTITIONED_TIMER(OMP_serial
);
326 KMP_SET_THREAD_STATE(previous_state
);
328 KMP_POP_PARTITIONED_TIMER();
330 #endif // KMP_STATS_ENABLED
335 @param loc source location information
336 @param microtask pointer to callback routine consisting of outlined parallel
338 @param cond condition for running in parallel
339 @param args struct of pointers to shared variables that aren't global
341 Perform a fork only if the condition is true.
343 void __kmpc_fork_call_if(ident_t
*loc
, kmp_int32 argc
, kmpc_micro microtask
,
344 kmp_int32 cond
, void *args
) {
345 int gtid
= __kmp_entry_gtid();
348 __kmpc_fork_call(loc
, argc
, microtask
, args
);
350 __kmpc_fork_call(loc
, argc
, microtask
);
352 __kmpc_serialized_parallel(loc
, gtid
);
355 void *exit_frame_ptr
;
359 __kmp_invoke_microtask(VOLATILE_CAST(microtask_t
) microtask
, gtid
,
368 __kmp_invoke_microtask(VOLATILE_CAST(microtask_t
) microtask
, gtid
,
378 __kmpc_end_serialized_parallel(loc
, gtid
);
384 @param loc source location information
385 @param global_tid global thread number
386 @param num_teams number of teams requested for the teams construct
387 @param num_threads number of threads per team requested for the teams construct
389 Set the number of teams to be used by the teams construct.
390 This call is only required if the teams construct has a `num_teams` clause
391 or a `thread_limit` clause (or both).
393 void __kmpc_push_num_teams(ident_t
*loc
, kmp_int32 global_tid
,
394 kmp_int32 num_teams
, kmp_int32 num_threads
) {
396 ("__kmpc_push_num_teams: enter T#%d num_teams=%d num_threads=%d\n",
397 global_tid
, num_teams
, num_threads
));
398 __kmp_assert_valid_gtid(global_tid
);
399 __kmp_push_num_teams(loc
, global_tid
, num_teams
, num_threads
);
404 @param loc source location information
405 @param global_tid global thread number
406 @param thread_limit limit on number of threads which can be created within the
409 Set the thread_limit for the current task
410 This call is there to support `thread_limit` clause on the `target` construct
412 void __kmpc_set_thread_limit(ident_t
*loc
, kmp_int32 global_tid
,
413 kmp_int32 thread_limit
) {
414 __kmp_assert_valid_gtid(global_tid
);
415 kmp_info_t
*thread
= __kmp_threads
[global_tid
];
416 if (thread_limit
> 0)
417 thread
->th
.th_current_task
->td_icvs
.task_thread_limit
= thread_limit
;
422 @param loc source location information
423 @param global_tid global thread number
424 @param num_teams_lb lower bound on number of teams requested for the teams
426 @param num_teams_ub upper bound on number of teams requested for the teams
428 @param num_threads number of threads per team requested for the teams construct
430 Set the number of teams to be used by the teams construct. The number of initial
431 teams cretaed will be greater than or equal to the lower bound and less than or
432 equal to the upper bound.
433 This call is only required if the teams construct has a `num_teams` clause
434 or a `thread_limit` clause (or both).
436 void __kmpc_push_num_teams_51(ident_t
*loc
, kmp_int32 global_tid
,
437 kmp_int32 num_teams_lb
, kmp_int32 num_teams_ub
,
438 kmp_int32 num_threads
) {
439 KA_TRACE(20, ("__kmpc_push_num_teams_51: enter T#%d num_teams_lb=%d"
440 " num_teams_ub=%d num_threads=%d\n",
441 global_tid
, num_teams_lb
, num_teams_ub
, num_threads
));
442 __kmp_assert_valid_gtid(global_tid
);
443 __kmp_push_num_teams_51(loc
, global_tid
, num_teams_lb
, num_teams_ub
,
449 @param loc source location information
450 @param argc total number of arguments in the ellipsis
451 @param microtask pointer to callback routine consisting of outlined teams
453 @param ... pointers to shared variables that aren't global
455 Do the actual fork and call the microtask in the relevant number of threads.
457 void __kmpc_fork_teams(ident_t
*loc
, kmp_int32 argc
, kmpc_micro microtask
,
459 int gtid
= __kmp_entry_gtid();
460 kmp_info_t
*this_thr
= __kmp_threads
[gtid
];
462 va_start(ap
, microtask
);
464 #if KMP_STATS_ENABLED
465 KMP_COUNT_BLOCK(OMP_TEAMS
);
466 stats_state_e previous_state
= KMP_GET_THREAD_STATE();
467 if (previous_state
== stats_state_e::SERIAL_REGION
) {
468 KMP_EXCHANGE_PARTITIONED_TIMER(OMP_teams_overhead
);
470 KMP_PUSH_PARTITIONED_TIMER(OMP_teams_overhead
);
474 // remember teams entry point and nesting level
475 this_thr
->th
.th_teams_microtask
= microtask
;
476 this_thr
->th
.th_teams_level
=
477 this_thr
->th
.th_team
->t
.t_level
; // AC: can be >0 on host
480 kmp_team_t
*parent_team
= this_thr
->th
.th_team
;
481 int tid
= __kmp_tid_from_gtid(gtid
);
482 if (ompt_enabled
.enabled
) {
483 parent_team
->t
.t_implicit_task_taskdata
[tid
]
484 .ompt_task_info
.frame
.enter_frame
.ptr
= OMPT_GET_FRAME_ADDRESS(0);
486 OMPT_STORE_RETURN_ADDRESS(gtid
);
489 // check if __kmpc_push_num_teams called, set default number of teams
491 if (this_thr
->th
.th_teams_size
.nteams
== 0) {
492 __kmp_push_num_teams(loc
, gtid
, 0, 0);
494 KMP_DEBUG_ASSERT(this_thr
->th
.th_set_nproc
>= 1);
495 KMP_DEBUG_ASSERT(this_thr
->th
.th_teams_size
.nteams
>= 1);
496 KMP_DEBUG_ASSERT(this_thr
->th
.th_teams_size
.nth
>= 1);
499 loc
, gtid
, fork_context_intel
, argc
,
500 VOLATILE_CAST(microtask_t
) __kmp_teams_master
, // "wrapped" task
501 VOLATILE_CAST(launch_t
) __kmp_invoke_teams_master
, kmp_va_addr_of(ap
));
502 __kmp_join_call(loc
, gtid
509 // Pop current CG root off list
510 KMP_DEBUG_ASSERT(this_thr
->th
.th_cg_roots
);
511 kmp_cg_root_t
*tmp
= this_thr
->th
.th_cg_roots
;
512 this_thr
->th
.th_cg_roots
= tmp
->up
;
513 KA_TRACE(100, ("__kmpc_fork_teams: Thread %p popping node %p and moving up"
514 " to node %p. cg_nthreads was %d\n",
515 this_thr
, tmp
, this_thr
->th
.th_cg_roots
, tmp
->cg_nthreads
));
516 KMP_DEBUG_ASSERT(tmp
->cg_nthreads
);
517 int i
= tmp
->cg_nthreads
--;
518 if (i
== 1) { // check is we are the last thread in CG (not always the case)
521 // Restore current task's thread_limit from CG root
522 KMP_DEBUG_ASSERT(this_thr
->th
.th_cg_roots
);
523 this_thr
->th
.th_current_task
->td_icvs
.thread_limit
=
524 this_thr
->th
.th_cg_roots
->cg_thread_limit
;
526 this_thr
->th
.th_teams_microtask
= NULL
;
527 this_thr
->th
.th_teams_level
= 0;
528 *(kmp_int64
*)(&this_thr
->th
.th_teams_size
) = 0L;
530 #if KMP_STATS_ENABLED
531 if (previous_state
== stats_state_e::SERIAL_REGION
) {
532 KMP_EXCHANGE_PARTITIONED_TIMER(OMP_serial
);
533 KMP_SET_THREAD_STATE(previous_state
);
535 KMP_POP_PARTITIONED_TIMER();
537 #endif // KMP_STATS_ENABLED
540 // I don't think this function should ever have been exported.
541 // The __kmpc_ prefix was misapplied. I'm fairly certain that no generated
542 // openmp code ever called it, but it's been exported from the RTL for so
543 // long that I'm afraid to remove the definition.
544 int __kmpc_invoke_task_func(int gtid
) { return __kmp_invoke_task_func(gtid
); }
548 @param loc source location information
549 @param global_tid global thread number
551 Enter a serialized parallel construct. This interface is used to handle a
552 conditional parallel region, like this,
554 #pragma omp parallel if (condition)
556 when the condition is false.
558 void __kmpc_serialized_parallel(ident_t
*loc
, kmp_int32 global_tid
) {
559 // The implementation is now in kmp_runtime.cpp so that it can share static
560 // functions with kmp_fork_call since the tasks to be done are similar in
562 __kmp_assert_valid_gtid(global_tid
);
564 OMPT_STORE_RETURN_ADDRESS(global_tid
);
566 __kmp_serialized_parallel(loc
, global_tid
);
571 @param loc source location information
572 @param global_tid global thread number
574 Leave a serialized parallel construct.
576 void __kmpc_end_serialized_parallel(ident_t
*loc
, kmp_int32 global_tid
) {
577 kmp_internal_control_t
*top
;
578 kmp_info_t
*this_thr
;
579 kmp_team_t
*serial_team
;
582 ("__kmpc_end_serialized_parallel: called by T#%d\n", global_tid
));
584 /* skip all this code for autopar serialized loops since it results in
585 unacceptable overhead */
586 if (loc
!= NULL
&& (loc
->flags
& KMP_IDENT_AUTOPAR
))
590 __kmp_assert_valid_gtid(global_tid
);
591 if (!TCR_4(__kmp_init_parallel
))
592 __kmp_parallel_initialize();
594 __kmp_resume_if_soft_paused();
596 this_thr
= __kmp_threads
[global_tid
];
597 serial_team
= this_thr
->th
.th_serial_team
;
599 kmp_task_team_t
*task_team
= this_thr
->th
.th_task_team
;
600 // we need to wait for the proxy tasks before finishing the thread
601 if (task_team
!= NULL
&& (task_team
->tt
.tt_found_proxy_tasks
||
602 task_team
->tt
.tt_hidden_helper_task_encountered
))
603 __kmp_task_team_wait(this_thr
, serial_team
USE_ITT_BUILD_ARG(NULL
));
606 KMP_DEBUG_ASSERT(serial_team
);
607 KMP_ASSERT(serial_team
->t
.t_serialized
);
608 KMP_DEBUG_ASSERT(this_thr
->th
.th_team
== serial_team
);
609 KMP_DEBUG_ASSERT(serial_team
!= this_thr
->th
.th_root
->r
.r_root_team
);
610 KMP_DEBUG_ASSERT(serial_team
->t
.t_threads
);
611 KMP_DEBUG_ASSERT(serial_team
->t
.t_threads
[0] == this_thr
);
614 if (ompt_enabled
.enabled
&&
615 this_thr
->th
.ompt_thread_info
.state
!= ompt_state_overhead
) {
616 OMPT_CUR_TASK_INFO(this_thr
)->frame
.exit_frame
= ompt_data_none
;
617 if (ompt_enabled
.ompt_callback_implicit_task
) {
618 ompt_callbacks
.ompt_callback(ompt_callback_implicit_task
)(
619 ompt_scope_end
, NULL
, OMPT_CUR_TASK_DATA(this_thr
), 1,
620 OMPT_CUR_TASK_INFO(this_thr
)->thread_num
, ompt_task_implicit
);
623 // reset clear the task id only after unlinking the task
624 ompt_data_t
*parent_task_data
;
625 __ompt_get_task_info_internal(1, NULL
, &parent_task_data
, NULL
, NULL
, NULL
);
627 if (ompt_enabled
.ompt_callback_parallel_end
) {
628 ompt_callbacks
.ompt_callback(ompt_callback_parallel_end
)(
629 &(serial_team
->t
.ompt_team_info
.parallel_data
), parent_task_data
,
630 ompt_parallel_invoker_program
| ompt_parallel_team
,
631 OMPT_LOAD_RETURN_ADDRESS(global_tid
));
633 __ompt_lw_taskteam_unlink(this_thr
);
634 this_thr
->th
.ompt_thread_info
.state
= ompt_state_overhead
;
638 /* If necessary, pop the internal control stack values and replace the team
640 top
= serial_team
->t
.t_control_stack_top
;
641 if (top
&& top
->serial_nesting_level
== serial_team
->t
.t_serialized
) {
642 copy_icvs(&serial_team
->t
.t_threads
[0]->th
.th_current_task
->td_icvs
, top
);
643 serial_team
->t
.t_control_stack_top
= top
->next
;
647 /* pop dispatch buffers stack */
648 KMP_DEBUG_ASSERT(serial_team
->t
.t_dispatch
->th_disp_buffer
);
650 dispatch_private_info_t
*disp_buffer
=
651 serial_team
->t
.t_dispatch
->th_disp_buffer
;
652 serial_team
->t
.t_dispatch
->th_disp_buffer
=
653 serial_team
->t
.t_dispatch
->th_disp_buffer
->next
;
654 __kmp_free(disp_buffer
);
656 this_thr
->th
.th_def_allocator
= serial_team
->t
.t_def_allocator
; // restore
658 --serial_team
->t
.t_serialized
;
659 if (serial_team
->t
.t_serialized
== 0) {
661 /* return to the parallel section */
663 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
664 if (__kmp_inherit_fp_control
&& serial_team
->t
.t_fp_control_saved
) {
665 __kmp_clear_x87_fpu_status_word();
666 __kmp_load_x87_fpu_control_word(&serial_team
->t
.t_x87_fpu_control_word
);
667 __kmp_load_mxcsr(&serial_team
->t
.t_mxcsr
);
669 #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
671 __kmp_pop_current_task_from_thread(this_thr
);
673 if (ompd_state
& OMPD_ENABLE_BP
)
674 ompd_bp_parallel_end();
677 this_thr
->th
.th_team
= serial_team
->t
.t_parent
;
678 this_thr
->th
.th_info
.ds
.ds_tid
= serial_team
->t
.t_master_tid
;
680 /* restore values cached in the thread */
681 this_thr
->th
.th_team_nproc
= serial_team
->t
.t_parent
->t
.t_nproc
; /* JPH */
682 this_thr
->th
.th_team_master
=
683 serial_team
->t
.t_parent
->t
.t_threads
[0]; /* JPH */
684 this_thr
->th
.th_team_serialized
= this_thr
->th
.th_team
->t
.t_serialized
;
686 /* TODO the below shouldn't need to be adjusted for serialized teams */
687 this_thr
->th
.th_dispatch
=
688 &this_thr
->th
.th_team
->t
.t_dispatch
[serial_team
->t
.t_master_tid
];
690 KMP_ASSERT(this_thr
->th
.th_current_task
->td_flags
.executing
== 0);
691 this_thr
->th
.th_current_task
->td_flags
.executing
= 1;
693 if (__kmp_tasking_mode
!= tskm_immediate_exec
) {
694 // Copy the task team from the new child / old parent team to the thread.
695 this_thr
->th
.th_task_team
=
696 this_thr
->th
.th_team
->t
.t_task_team
[this_thr
->th
.th_task_state
];
698 ("__kmpc_end_serialized_parallel: T#%d restoring task_team %p / "
700 global_tid
, this_thr
->th
.th_task_team
, this_thr
->th
.th_team
));
702 #if KMP_AFFINITY_SUPPORTED
703 if (this_thr
->th
.th_team
->t
.t_level
== 0 && __kmp_affinity
.flags
.reset
) {
704 __kmp_reset_root_init_mask(global_tid
);
708 if (__kmp_tasking_mode
!= tskm_immediate_exec
) {
709 KA_TRACE(20, ("__kmpc_end_serialized_parallel: T#%d decreasing nesting "
710 "depth of serial team %p to %d\n",
711 global_tid
, serial_team
, serial_team
->t
.t_serialized
));
715 serial_team
->t
.t_level
--;
716 if (__kmp_env_consistency_check
)
717 __kmp_pop_parallel(global_tid
, NULL
);
719 if (ompt_enabled
.enabled
)
720 this_thr
->th
.ompt_thread_info
.state
=
721 ((this_thr
->th
.th_team_serialized
) ? ompt_state_work_serial
722 : ompt_state_work_parallel
);
727 @ingroup SYNCHRONIZATION
728 @param loc source location information.
730 Execute <tt>flush</tt>. This is implemented as a full memory fence. (Though
731 depending on the memory ordering convention obeyed by the compiler
732 even that may not be necessary).
734 void __kmpc_flush(ident_t
*loc
) {
735 KC_TRACE(10, ("__kmpc_flush: called\n"));
737 /* need explicit __mf() here since use volatile instead in library */
738 KMP_MFENCE(); /* Flush all pending memory write invalidates. */
740 #if OMPT_SUPPORT && OMPT_OPTIONAL
741 if (ompt_enabled
.ompt_callback_flush
) {
742 ompt_callbacks
.ompt_callback(ompt_callback_flush
)(
743 __ompt_get_thread_data_internal(), OMPT_GET_RETURN_ADDRESS(0));
748 /* -------------------------------------------------------------------------- */
750 @ingroup SYNCHRONIZATION
751 @param loc source location information
752 @param global_tid thread id.
756 void __kmpc_barrier(ident_t
*loc
, kmp_int32 global_tid
) {
757 KMP_COUNT_BLOCK(OMP_BARRIER
);
758 KC_TRACE(10, ("__kmpc_barrier: called T#%d\n", global_tid
));
759 __kmp_assert_valid_gtid(global_tid
);
761 if (!TCR_4(__kmp_init_parallel
))
762 __kmp_parallel_initialize();
764 __kmp_resume_if_soft_paused();
766 if (__kmp_env_consistency_check
) {
768 KMP_WARNING(ConstructIdentInvalid
); // ??? What does it mean for the user?
770 __kmp_check_barrier(global_tid
, ct_barrier
, loc
);
774 ompt_frame_t
*ompt_frame
;
775 if (ompt_enabled
.enabled
) {
776 __ompt_get_task_info_internal(0, NULL
, NULL
, &ompt_frame
, NULL
, NULL
);
777 if (ompt_frame
->enter_frame
.ptr
== NULL
)
778 ompt_frame
->enter_frame
.ptr
= OMPT_GET_FRAME_ADDRESS(0);
780 OMPT_STORE_RETURN_ADDRESS(global_tid
);
782 __kmp_threads
[global_tid
]->th
.th_ident
= loc
;
783 // TODO: explicit barrier_wait_id:
784 // this function is called when 'barrier' directive is present or
785 // implicit barrier at the end of a worksharing construct.
786 // 1) better to add a per-thread barrier counter to a thread data structure
787 // 2) set to 0 when a new team is created
788 // 4) no sync is required
790 __kmp_barrier(bs_plain_barrier
, global_tid
, FALSE
, 0, NULL
, NULL
);
791 #if OMPT_SUPPORT && OMPT_OPTIONAL
792 if (ompt_enabled
.enabled
) {
793 ompt_frame
->enter_frame
= ompt_data_none
;
798 /* The BARRIER for a MASTER section is always explicit */
800 @ingroup WORK_SHARING
801 @param loc source location information.
802 @param global_tid global thread number .
803 @return 1 if this thread should execute the <tt>master</tt> block, 0 otherwise.
805 kmp_int32
__kmpc_master(ident_t
*loc
, kmp_int32 global_tid
) {
808 KC_TRACE(10, ("__kmpc_master: called T#%d\n", global_tid
));
809 __kmp_assert_valid_gtid(global_tid
);
811 if (!TCR_4(__kmp_init_parallel
))
812 __kmp_parallel_initialize();
814 __kmp_resume_if_soft_paused();
816 if (KMP_MASTER_GTID(global_tid
)) {
817 KMP_COUNT_BLOCK(OMP_MASTER
);
818 KMP_PUSH_PARTITIONED_TIMER(OMP_master
);
822 #if OMPT_SUPPORT && OMPT_OPTIONAL
824 if (ompt_enabled
.ompt_callback_masked
) {
825 kmp_info_t
*this_thr
= __kmp_threads
[global_tid
];
826 kmp_team_t
*team
= this_thr
->th
.th_team
;
828 int tid
= __kmp_tid_from_gtid(global_tid
);
829 ompt_callbacks
.ompt_callback(ompt_callback_masked
)(
830 ompt_scope_begin
, &(team
->t
.ompt_team_info
.parallel_data
),
831 &(team
->t
.t_implicit_task_taskdata
[tid
].ompt_task_info
.task_data
),
832 OMPT_GET_RETURN_ADDRESS(0));
837 if (__kmp_env_consistency_check
) {
838 #if KMP_USE_DYNAMIC_LOCK
840 __kmp_push_sync(global_tid
, ct_master
, loc
, NULL
, 0);
842 __kmp_check_sync(global_tid
, ct_master
, loc
, NULL
, 0);
845 __kmp_push_sync(global_tid
, ct_master
, loc
, NULL
);
847 __kmp_check_sync(global_tid
, ct_master
, loc
, NULL
);
855 @ingroup WORK_SHARING
856 @param loc source location information.
857 @param global_tid global thread number .
859 Mark the end of a <tt>master</tt> region. This should only be called by the
860 thread that executes the <tt>master</tt> region.
862 void __kmpc_end_master(ident_t
*loc
, kmp_int32 global_tid
) {
863 KC_TRACE(10, ("__kmpc_end_master: called T#%d\n", global_tid
));
864 __kmp_assert_valid_gtid(global_tid
);
865 KMP_DEBUG_ASSERT(KMP_MASTER_GTID(global_tid
));
866 KMP_POP_PARTITIONED_TIMER();
868 #if OMPT_SUPPORT && OMPT_OPTIONAL
869 kmp_info_t
*this_thr
= __kmp_threads
[global_tid
];
870 kmp_team_t
*team
= this_thr
->th
.th_team
;
871 if (ompt_enabled
.ompt_callback_masked
) {
872 int tid
= __kmp_tid_from_gtid(global_tid
);
873 ompt_callbacks
.ompt_callback(ompt_callback_masked
)(
874 ompt_scope_end
, &(team
->t
.ompt_team_info
.parallel_data
),
875 &(team
->t
.t_implicit_task_taskdata
[tid
].ompt_task_info
.task_data
),
876 OMPT_GET_RETURN_ADDRESS(0));
880 if (__kmp_env_consistency_check
) {
881 if (KMP_MASTER_GTID(global_tid
))
882 __kmp_pop_sync(global_tid
, ct_master
, loc
);
887 @ingroup WORK_SHARING
888 @param loc source location information.
889 @param global_tid global thread number.
890 @param filter result of evaluating filter clause on thread global_tid, or zero
891 if no filter clause present
892 @return 1 if this thread should execute the <tt>masked</tt> block, 0 otherwise.
894 kmp_int32
__kmpc_masked(ident_t
*loc
, kmp_int32 global_tid
, kmp_int32 filter
) {
897 KC_TRACE(10, ("__kmpc_masked: called T#%d\n", global_tid
));
898 __kmp_assert_valid_gtid(global_tid
);
900 if (!TCR_4(__kmp_init_parallel
))
901 __kmp_parallel_initialize();
903 __kmp_resume_if_soft_paused();
905 tid
= __kmp_tid_from_gtid(global_tid
);
907 KMP_COUNT_BLOCK(OMP_MASKED
);
908 KMP_PUSH_PARTITIONED_TIMER(OMP_masked
);
912 #if OMPT_SUPPORT && OMPT_OPTIONAL
914 if (ompt_enabled
.ompt_callback_masked
) {
915 kmp_info_t
*this_thr
= __kmp_threads
[global_tid
];
916 kmp_team_t
*team
= this_thr
->th
.th_team
;
917 ompt_callbacks
.ompt_callback(ompt_callback_masked
)(
918 ompt_scope_begin
, &(team
->t
.ompt_team_info
.parallel_data
),
919 &(team
->t
.t_implicit_task_taskdata
[tid
].ompt_task_info
.task_data
),
920 OMPT_GET_RETURN_ADDRESS(0));
925 if (__kmp_env_consistency_check
) {
926 #if KMP_USE_DYNAMIC_LOCK
928 __kmp_push_sync(global_tid
, ct_masked
, loc
, NULL
, 0);
930 __kmp_check_sync(global_tid
, ct_masked
, loc
, NULL
, 0);
933 __kmp_push_sync(global_tid
, ct_masked
, loc
, NULL
);
935 __kmp_check_sync(global_tid
, ct_masked
, loc
, NULL
);
943 @ingroup WORK_SHARING
944 @param loc source location information.
945 @param global_tid global thread number .
947 Mark the end of a <tt>masked</tt> region. This should only be called by the
948 thread that executes the <tt>masked</tt> region.
950 void __kmpc_end_masked(ident_t
*loc
, kmp_int32 global_tid
) {
951 KC_TRACE(10, ("__kmpc_end_masked: called T#%d\n", global_tid
));
952 __kmp_assert_valid_gtid(global_tid
);
953 KMP_POP_PARTITIONED_TIMER();
955 #if OMPT_SUPPORT && OMPT_OPTIONAL
956 kmp_info_t
*this_thr
= __kmp_threads
[global_tid
];
957 kmp_team_t
*team
= this_thr
->th
.th_team
;
958 if (ompt_enabled
.ompt_callback_masked
) {
959 int tid
= __kmp_tid_from_gtid(global_tid
);
960 ompt_callbacks
.ompt_callback(ompt_callback_masked
)(
961 ompt_scope_end
, &(team
->t
.ompt_team_info
.parallel_data
),
962 &(team
->t
.t_implicit_task_taskdata
[tid
].ompt_task_info
.task_data
),
963 OMPT_GET_RETURN_ADDRESS(0));
967 if (__kmp_env_consistency_check
) {
968 __kmp_pop_sync(global_tid
, ct_masked
, loc
);
973 @ingroup WORK_SHARING
974 @param loc source location information.
975 @param gtid global thread number.
977 Start execution of an <tt>ordered</tt> construct.
979 void __kmpc_ordered(ident_t
*loc
, kmp_int32 gtid
) {
982 KMP_DEBUG_ASSERT(__kmp_init_serial
);
984 KC_TRACE(10, ("__kmpc_ordered: called T#%d\n", gtid
));
985 __kmp_assert_valid_gtid(gtid
);
987 if (!TCR_4(__kmp_init_parallel
))
988 __kmp_parallel_initialize();
990 __kmp_resume_if_soft_paused();
993 __kmp_itt_ordered_prep(gtid
);
994 // TODO: ordered_wait_id
995 #endif /* USE_ITT_BUILD */
997 th
= __kmp_threads
[gtid
];
999 #if OMPT_SUPPORT && OMPT_OPTIONAL
1003 OMPT_STORE_RETURN_ADDRESS(gtid
);
1004 if (ompt_enabled
.enabled
) {
1005 team
= __kmp_team_from_gtid(gtid
);
1006 lck
= (ompt_wait_id_t
)(uintptr_t)&team
->t
.t_ordered
.dt
.t_value
;
1007 /* OMPT state update */
1008 th
->th
.ompt_thread_info
.wait_id
= lck
;
1009 th
->th
.ompt_thread_info
.state
= ompt_state_wait_ordered
;
1011 /* OMPT event callback */
1012 codeptr_ra
= OMPT_LOAD_RETURN_ADDRESS(gtid
);
1013 if (ompt_enabled
.ompt_callback_mutex_acquire
) {
1014 ompt_callbacks
.ompt_callback(ompt_callback_mutex_acquire
)(
1015 ompt_mutex_ordered
, omp_lock_hint_none
, kmp_mutex_impl_spin
, lck
,
1021 if (th
->th
.th_dispatch
->th_deo_fcn
!= 0)
1022 (*th
->th
.th_dispatch
->th_deo_fcn
)(>id
, &cid
, loc
);
1024 __kmp_parallel_deo(>id
, &cid
, loc
);
1026 #if OMPT_SUPPORT && OMPT_OPTIONAL
1027 if (ompt_enabled
.enabled
) {
1028 /* OMPT state update */
1029 th
->th
.ompt_thread_info
.state
= ompt_state_work_parallel
;
1030 th
->th
.ompt_thread_info
.wait_id
= 0;
1032 /* OMPT event callback */
1033 if (ompt_enabled
.ompt_callback_mutex_acquired
) {
1034 ompt_callbacks
.ompt_callback(ompt_callback_mutex_acquired
)(
1035 ompt_mutex_ordered
, (ompt_wait_id_t
)(uintptr_t)lck
, codeptr_ra
);
1041 __kmp_itt_ordered_start(gtid
);
1042 #endif /* USE_ITT_BUILD */
1046 @ingroup WORK_SHARING
1047 @param loc source location information.
1048 @param gtid global thread number.
1050 End execution of an <tt>ordered</tt> construct.
1052 void __kmpc_end_ordered(ident_t
*loc
, kmp_int32 gtid
) {
1056 KC_TRACE(10, ("__kmpc_end_ordered: called T#%d\n", gtid
));
1057 __kmp_assert_valid_gtid(gtid
);
1060 __kmp_itt_ordered_end(gtid
);
1061 // TODO: ordered_wait_id
1062 #endif /* USE_ITT_BUILD */
1064 th
= __kmp_threads
[gtid
];
1066 if (th
->th
.th_dispatch
->th_dxo_fcn
!= 0)
1067 (*th
->th
.th_dispatch
->th_dxo_fcn
)(>id
, &cid
, loc
);
1069 __kmp_parallel_dxo(>id
, &cid
, loc
);
1071 #if OMPT_SUPPORT && OMPT_OPTIONAL
1072 OMPT_STORE_RETURN_ADDRESS(gtid
);
1073 if (ompt_enabled
.ompt_callback_mutex_released
) {
1074 ompt_callbacks
.ompt_callback(ompt_callback_mutex_released
)(
1076 (ompt_wait_id_t
)(uintptr_t)&__kmp_team_from_gtid(gtid
)
1077 ->t
.t_ordered
.dt
.t_value
,
1078 OMPT_LOAD_RETURN_ADDRESS(gtid
));
1083 #if KMP_USE_DYNAMIC_LOCK
1085 static __forceinline
void
1086 __kmp_init_indirect_csptr(kmp_critical_name
*crit
, ident_t
const *loc
,
1087 kmp_int32 gtid
, kmp_indirect_locktag_t tag
) {
1088 // Pointer to the allocated indirect lock is written to crit, while indexing
1091 kmp_indirect_lock_t
**lck
;
1092 lck
= (kmp_indirect_lock_t
**)crit
;
1093 kmp_indirect_lock_t
*ilk
= __kmp_allocate_indirect_lock(&idx
, gtid
, tag
);
1094 KMP_I_LOCK_FUNC(ilk
, init
)(ilk
->lock
);
1095 KMP_SET_I_LOCK_LOCATION(ilk
, loc
);
1096 KMP_SET_I_LOCK_FLAGS(ilk
, kmp_lf_critical_section
);
1098 ("__kmp_init_indirect_csptr: initialized indirect lock #%d\n", tag
));
1100 __kmp_itt_critical_creating(ilk
->lock
, loc
);
1102 int status
= KMP_COMPARE_AND_STORE_PTR(lck
, nullptr, ilk
);
1105 __kmp_itt_critical_destroyed(ilk
->lock
);
1107 // We don't really need to destroy the unclaimed lock here since it will be
1108 // cleaned up at program exit.
1109 // KMP_D_LOCK_FUNC(&idx, destroy)((kmp_dyna_lock_t *)&idx);
1111 KMP_DEBUG_ASSERT(*lck
!= NULL
);
1114 // Fast-path acquire tas lock
1115 #define KMP_ACQUIRE_TAS_LOCK(lock, gtid) \
1117 kmp_tas_lock_t *l = (kmp_tas_lock_t *)lock; \
1118 kmp_int32 tas_free = KMP_LOCK_FREE(tas); \
1119 kmp_int32 tas_busy = KMP_LOCK_BUSY(gtid + 1, tas); \
1120 if (KMP_ATOMIC_LD_RLX(&l->lk.poll) != tas_free || \
1121 !__kmp_atomic_compare_store_acq(&l->lk.poll, tas_free, tas_busy)) { \
1123 KMP_FSYNC_PREPARE(l); \
1124 KMP_INIT_YIELD(spins); \
1125 kmp_backoff_t backoff = __kmp_spin_backoff_params; \
1127 if (TCR_4(__kmp_nth) > \
1128 (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc)) { \
1131 KMP_YIELD_SPIN(spins); \
1133 __kmp_spin_backoff(&backoff); \
1135 KMP_ATOMIC_LD_RLX(&l->lk.poll) != tas_free || \
1136 !__kmp_atomic_compare_store_acq(&l->lk.poll, tas_free, tas_busy)); \
1138 KMP_FSYNC_ACQUIRED(l); \
1141 // Fast-path test tas lock
1142 #define KMP_TEST_TAS_LOCK(lock, gtid, rc) \
1144 kmp_tas_lock_t *l = (kmp_tas_lock_t *)lock; \
1145 kmp_int32 tas_free = KMP_LOCK_FREE(tas); \
1146 kmp_int32 tas_busy = KMP_LOCK_BUSY(gtid + 1, tas); \
1147 rc = KMP_ATOMIC_LD_RLX(&l->lk.poll) == tas_free && \
1148 __kmp_atomic_compare_store_acq(&l->lk.poll, tas_free, tas_busy); \
1151 // Fast-path release tas lock
1152 #define KMP_RELEASE_TAS_LOCK(lock, gtid) \
1153 { KMP_ATOMIC_ST_REL(&((kmp_tas_lock_t *)lock)->lk.poll, KMP_LOCK_FREE(tas)); }
1157 #include <sys/syscall.h>
1160 #define FUTEX_WAIT 0
1163 #define FUTEX_WAKE 1
1166 // Fast-path acquire futex lock
1167 #define KMP_ACQUIRE_FUTEX_LOCK(lock, gtid) \
1169 kmp_futex_lock_t *ftx = (kmp_futex_lock_t *)lock; \
1170 kmp_int32 gtid_code = (gtid + 1) << 1; \
1172 KMP_FSYNC_PREPARE(ftx); \
1173 kmp_int32 poll_val; \
1174 while ((poll_val = KMP_COMPARE_AND_STORE_RET32( \
1175 &(ftx->lk.poll), KMP_LOCK_FREE(futex), \
1176 KMP_LOCK_BUSY(gtid_code, futex))) != KMP_LOCK_FREE(futex)) { \
1177 kmp_int32 cond = KMP_LOCK_STRIP(poll_val) & 1; \
1179 if (!KMP_COMPARE_AND_STORE_RET32(&(ftx->lk.poll), poll_val, \
1181 KMP_LOCK_BUSY(1, futex))) { \
1184 poll_val |= KMP_LOCK_BUSY(1, futex); \
1187 if ((rc = syscall(__NR_futex, &(ftx->lk.poll), FUTEX_WAIT, poll_val, \
1188 NULL, NULL, 0)) != 0) { \
1193 KMP_FSYNC_ACQUIRED(ftx); \
1196 // Fast-path test futex lock
1197 #define KMP_TEST_FUTEX_LOCK(lock, gtid, rc) \
1199 kmp_futex_lock_t *ftx = (kmp_futex_lock_t *)lock; \
1200 if (KMP_COMPARE_AND_STORE_ACQ32(&(ftx->lk.poll), KMP_LOCK_FREE(futex), \
1201 KMP_LOCK_BUSY(gtid + 1 << 1, futex))) { \
1202 KMP_FSYNC_ACQUIRED(ftx); \
1209 // Fast-path release futex lock
1210 #define KMP_RELEASE_FUTEX_LOCK(lock, gtid) \
1212 kmp_futex_lock_t *ftx = (kmp_futex_lock_t *)lock; \
1214 KMP_FSYNC_RELEASING(ftx); \
1215 kmp_int32 poll_val = \
1216 KMP_XCHG_FIXED32(&(ftx->lk.poll), KMP_LOCK_FREE(futex)); \
1217 if (KMP_LOCK_STRIP(poll_val) & 1) { \
1218 syscall(__NR_futex, &(ftx->lk.poll), FUTEX_WAKE, \
1219 KMP_LOCK_BUSY(1, futex), NULL, NULL, 0); \
1222 KMP_YIELD_OVERSUB(); \
1225 #endif // KMP_USE_FUTEX
1227 #else // KMP_USE_DYNAMIC_LOCK
1229 static kmp_user_lock_p
__kmp_get_critical_section_ptr(kmp_critical_name
*crit
,
1232 kmp_user_lock_p
*lck_pp
= (kmp_user_lock_p
*)crit
;
1234 // Because of the double-check, the following load doesn't need to be volatile
1235 kmp_user_lock_p lck
= (kmp_user_lock_p
)TCR_PTR(*lck_pp
);
1240 // Allocate & initialize the lock.
1241 // Remember alloc'ed locks in table in order to free them in __kmp_cleanup()
1242 lck
= __kmp_user_lock_allocate(&idx
, gtid
, kmp_lf_critical_section
);
1243 __kmp_init_user_lock_with_checks(lck
);
1244 __kmp_set_user_lock_location(lck
, loc
);
1246 __kmp_itt_critical_creating(lck
);
1247 // __kmp_itt_critical_creating() should be called *before* the first usage
1248 // of underlying lock. It is the only place where we can guarantee it. There
1249 // are chances the lock will destroyed with no usage, but it is not a
1250 // problem, because this is not real event seen by user but rather setting
1251 // name for object (lock). See more details in kmp_itt.h.
1252 #endif /* USE_ITT_BUILD */
1254 // Use a cmpxchg instruction to slam the start of the critical section with
1255 // the lock pointer. If another thread beat us to it, deallocate the lock,
1256 // and use the lock that the other thread allocated.
1257 int status
= KMP_COMPARE_AND_STORE_PTR(lck_pp
, 0, lck
);
1260 // Deallocate the lock and reload the value.
1262 __kmp_itt_critical_destroyed(lck
);
1263 // Let ITT know the lock is destroyed and the same memory location may be reused
1264 // for another purpose.
1265 #endif /* USE_ITT_BUILD */
1266 __kmp_destroy_user_lock_with_checks(lck
);
1267 __kmp_user_lock_free(&idx
, gtid
, lck
);
1268 lck
= (kmp_user_lock_p
)TCR_PTR(*lck_pp
);
1269 KMP_DEBUG_ASSERT(lck
!= NULL
);
1275 #endif // KMP_USE_DYNAMIC_LOCK
1278 @ingroup WORK_SHARING
1279 @param loc source location information.
1280 @param global_tid global thread number.
1281 @param crit identity of the critical section. This could be a pointer to a lock
1282 associated with the critical section, or some other suitably unique value.
1284 Enter code protected by a `critical` construct.
1285 This function blocks until the executing thread can enter the critical section.
1287 void __kmpc_critical(ident_t
*loc
, kmp_int32 global_tid
,
1288 kmp_critical_name
*crit
) {
1289 #if KMP_USE_DYNAMIC_LOCK
1290 #if OMPT_SUPPORT && OMPT_OPTIONAL
1291 OMPT_STORE_RETURN_ADDRESS(global_tid
);
1292 #endif // OMPT_SUPPORT
1293 __kmpc_critical_with_hint(loc
, global_tid
, crit
, omp_lock_hint_none
);
1295 KMP_COUNT_BLOCK(OMP_CRITICAL
);
1296 #if OMPT_SUPPORT && OMPT_OPTIONAL
1297 ompt_state_t prev_state
= ompt_state_undefined
;
1298 ompt_thread_info_t ti
;
1300 kmp_user_lock_p lck
;
1302 KC_TRACE(10, ("__kmpc_critical: called T#%d\n", global_tid
));
1303 __kmp_assert_valid_gtid(global_tid
);
1305 // TODO: add THR_OVHD_STATE
1307 KMP_PUSH_PARTITIONED_TIMER(OMP_critical_wait
);
1308 KMP_CHECK_USER_LOCK_INIT();
1310 if ((__kmp_user_lock_kind
== lk_tas
) &&
1311 (sizeof(lck
->tas
.lk
.poll
) <= OMP_CRITICAL_SIZE
)) {
1312 lck
= (kmp_user_lock_p
)crit
;
1315 else if ((__kmp_user_lock_kind
== lk_futex
) &&
1316 (sizeof(lck
->futex
.lk
.poll
) <= OMP_CRITICAL_SIZE
)) {
1317 lck
= (kmp_user_lock_p
)crit
;
1320 else { // ticket, queuing or drdpa
1321 lck
= __kmp_get_critical_section_ptr(crit
, loc
, global_tid
);
1324 if (__kmp_env_consistency_check
)
1325 __kmp_push_sync(global_tid
, ct_critical
, loc
, lck
);
1327 // since the critical directive binds to all threads, not just the current
1328 // team we have to check this even if we are in a serialized team.
1329 // also, even if we are the uber thread, we still have to conduct the lock,
1330 // as we have to contend with sibling threads.
1333 __kmp_itt_critical_acquiring(lck
);
1334 #endif /* USE_ITT_BUILD */
1335 #if OMPT_SUPPORT && OMPT_OPTIONAL
1336 OMPT_STORE_RETURN_ADDRESS(gtid
);
1337 void *codeptr_ra
= NULL
;
1338 if (ompt_enabled
.enabled
) {
1339 ti
= __kmp_threads
[global_tid
]->th
.ompt_thread_info
;
1340 /* OMPT state update */
1341 prev_state
= ti
.state
;
1342 ti
.wait_id
= (ompt_wait_id_t
)(uintptr_t)lck
;
1343 ti
.state
= ompt_state_wait_critical
;
1345 /* OMPT event callback */
1346 codeptr_ra
= OMPT_LOAD_RETURN_ADDRESS(gtid
);
1347 if (ompt_enabled
.ompt_callback_mutex_acquire
) {
1348 ompt_callbacks
.ompt_callback(ompt_callback_mutex_acquire
)(
1349 ompt_mutex_critical
, omp_lock_hint_none
, __ompt_get_mutex_impl_type(),
1350 (ompt_wait_id_t
)(uintptr_t)lck
, codeptr_ra
);
1354 // Value of 'crit' should be good for using as a critical_id of the critical
1355 // section directive.
1356 __kmp_acquire_user_lock_with_checks(lck
, global_tid
);
1359 __kmp_itt_critical_acquired(lck
);
1360 #endif /* USE_ITT_BUILD */
1361 #if OMPT_SUPPORT && OMPT_OPTIONAL
1362 if (ompt_enabled
.enabled
) {
1363 /* OMPT state update */
1364 ti
.state
= prev_state
;
1367 /* OMPT event callback */
1368 if (ompt_enabled
.ompt_callback_mutex_acquired
) {
1369 ompt_callbacks
.ompt_callback(ompt_callback_mutex_acquired
)(
1370 ompt_mutex_critical
, (ompt_wait_id_t
)(uintptr_t)lck
, codeptr_ra
);
1374 KMP_POP_PARTITIONED_TIMER();
1376 KMP_PUSH_PARTITIONED_TIMER(OMP_critical
);
1377 KA_TRACE(15, ("__kmpc_critical: done T#%d\n", global_tid
));
1378 #endif // KMP_USE_DYNAMIC_LOCK
1381 #if KMP_USE_DYNAMIC_LOCK
1383 // Converts the given hint to an internal lock implementation
1384 static __forceinline kmp_dyna_lockseq_t
__kmp_map_hint_to_lock(uintptr_t hint
) {
1386 #define KMP_TSX_LOCK(seq) lockseq_##seq
1388 #define KMP_TSX_LOCK(seq) __kmp_user_lock_seq
1391 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
1392 #define KMP_CPUINFO_RTM (__kmp_cpuinfo.flags.rtm)
1394 #define KMP_CPUINFO_RTM 0
1397 // Hints that do not require further logic
1398 if (hint
& kmp_lock_hint_hle
)
1399 return KMP_TSX_LOCK(hle
);
1400 if (hint
& kmp_lock_hint_rtm
)
1401 return KMP_CPUINFO_RTM
? KMP_TSX_LOCK(rtm_queuing
) : __kmp_user_lock_seq
;
1402 if (hint
& kmp_lock_hint_adaptive
)
1403 return KMP_CPUINFO_RTM
? KMP_TSX_LOCK(adaptive
) : __kmp_user_lock_seq
;
1405 // Rule out conflicting hints first by returning the default lock
1406 if ((hint
& omp_lock_hint_contended
) && (hint
& omp_lock_hint_uncontended
))
1407 return __kmp_user_lock_seq
;
1408 if ((hint
& omp_lock_hint_speculative
) &&
1409 (hint
& omp_lock_hint_nonspeculative
))
1410 return __kmp_user_lock_seq
;
1412 // Do not even consider speculation when it appears to be contended
1413 if (hint
& omp_lock_hint_contended
)
1414 return lockseq_queuing
;
1416 // Uncontended lock without speculation
1417 if ((hint
& omp_lock_hint_uncontended
) && !(hint
& omp_lock_hint_speculative
))
1420 // Use RTM lock for speculation
1421 if (hint
& omp_lock_hint_speculative
)
1422 return KMP_CPUINFO_RTM
? KMP_TSX_LOCK(rtm_spin
) : __kmp_user_lock_seq
;
1424 return __kmp_user_lock_seq
;
1427 #if OMPT_SUPPORT && OMPT_OPTIONAL
1428 #if KMP_USE_DYNAMIC_LOCK
1429 static kmp_mutex_impl_t
1430 __ompt_get_mutex_impl_type(void *user_lock
, kmp_indirect_lock_t
*ilock
= 0) {
1432 switch (KMP_EXTRACT_D_TAG(user_lock
)) {
1437 return kmp_mutex_impl_queuing
;
1440 return kmp_mutex_impl_spin
;
1443 case locktag_rtm_spin
:
1444 return kmp_mutex_impl_speculative
;
1447 return kmp_mutex_impl_none
;
1449 ilock
= KMP_LOOKUP_I_LOCK(user_lock
);
1452 switch (ilock
->type
) {
1454 case locktag_adaptive
:
1455 case locktag_rtm_queuing
:
1456 return kmp_mutex_impl_speculative
;
1458 case locktag_nested_tas
:
1459 return kmp_mutex_impl_spin
;
1461 case locktag_nested_futex
:
1463 case locktag_ticket
:
1464 case locktag_queuing
:
1466 case locktag_nested_ticket
:
1467 case locktag_nested_queuing
:
1468 case locktag_nested_drdpa
:
1469 return kmp_mutex_impl_queuing
;
1471 return kmp_mutex_impl_none
;
1475 // For locks without dynamic binding
1476 static kmp_mutex_impl_t
__ompt_get_mutex_impl_type() {
1477 switch (__kmp_user_lock_kind
) {
1479 return kmp_mutex_impl_spin
;
1486 return kmp_mutex_impl_queuing
;
1489 case lk_rtm_queuing
:
1492 return kmp_mutex_impl_speculative
;
1495 return kmp_mutex_impl_none
;
1498 #endif // KMP_USE_DYNAMIC_LOCK
1499 #endif // OMPT_SUPPORT && OMPT_OPTIONAL
1502 @ingroup WORK_SHARING
1503 @param loc source location information.
1504 @param global_tid global thread number.
1505 @param crit identity of the critical section. This could be a pointer to a lock
1506 associated with the critical section, or some other suitably unique value.
1507 @param hint the lock hint.
1509 Enter code protected by a `critical` construct with a hint. The hint value is
1510 used to suggest a lock implementation. This function blocks until the executing
1511 thread can enter the critical section unless the hint suggests use of
1512 speculative execution and the hardware supports it.
1514 void __kmpc_critical_with_hint(ident_t
*loc
, kmp_int32 global_tid
,
1515 kmp_critical_name
*crit
, uint32_t hint
) {
1516 KMP_COUNT_BLOCK(OMP_CRITICAL
);
1517 kmp_user_lock_p lck
;
1518 #if OMPT_SUPPORT && OMPT_OPTIONAL
1519 ompt_state_t prev_state
= ompt_state_undefined
;
1520 ompt_thread_info_t ti
;
1521 // This is the case, if called from __kmpc_critical:
1522 void *codeptr
= OMPT_LOAD_RETURN_ADDRESS(global_tid
);
1524 codeptr
= OMPT_GET_RETURN_ADDRESS(0);
1527 KC_TRACE(10, ("__kmpc_critical: called T#%d\n", global_tid
));
1528 __kmp_assert_valid_gtid(global_tid
);
1530 kmp_dyna_lock_t
*lk
= (kmp_dyna_lock_t
*)crit
;
1531 // Check if it is initialized.
1532 KMP_PUSH_PARTITIONED_TIMER(OMP_critical_wait
);
1533 kmp_dyna_lockseq_t lockseq
= __kmp_map_hint_to_lock(hint
);
1535 if (KMP_IS_D_LOCK(lockseq
)) {
1536 KMP_COMPARE_AND_STORE_ACQ32((volatile kmp_int32
*)crit
, 0,
1537 KMP_GET_D_TAG(lockseq
));
1539 __kmp_init_indirect_csptr(crit
, loc
, global_tid
, KMP_GET_I_TAG(lockseq
));
1542 // Branch for accessing the actual lock object and set operation. This
1543 // branching is inevitable since this lock initialization does not follow the
1544 // normal dispatch path (lock table is not used).
1545 if (KMP_EXTRACT_D_TAG(lk
) != 0) {
1546 lck
= (kmp_user_lock_p
)lk
;
1547 if (__kmp_env_consistency_check
) {
1548 __kmp_push_sync(global_tid
, ct_critical
, loc
, lck
,
1549 __kmp_map_hint_to_lock(hint
));
1552 __kmp_itt_critical_acquiring(lck
);
1554 #if OMPT_SUPPORT && OMPT_OPTIONAL
1555 if (ompt_enabled
.enabled
) {
1556 ti
= __kmp_threads
[global_tid
]->th
.ompt_thread_info
;
1557 /* OMPT state update */
1558 prev_state
= ti
.state
;
1559 ti
.wait_id
= (ompt_wait_id_t
)(uintptr_t)lck
;
1560 ti
.state
= ompt_state_wait_critical
;
1562 /* OMPT event callback */
1563 if (ompt_enabled
.ompt_callback_mutex_acquire
) {
1564 ompt_callbacks
.ompt_callback(ompt_callback_mutex_acquire
)(
1565 ompt_mutex_critical
, (unsigned int)hint
,
1566 __ompt_get_mutex_impl_type(crit
), (ompt_wait_id_t
)(uintptr_t)lck
,
1571 #if KMP_USE_INLINED_TAS
1572 if (lockseq
== lockseq_tas
&& !__kmp_env_consistency_check
) {
1573 KMP_ACQUIRE_TAS_LOCK(lck
, global_tid
);
1575 #elif KMP_USE_INLINED_FUTEX
1576 if (lockseq
== lockseq_futex
&& !__kmp_env_consistency_check
) {
1577 KMP_ACQUIRE_FUTEX_LOCK(lck
, global_tid
);
1581 KMP_D_LOCK_FUNC(lk
, set
)(lk
, global_tid
);
1584 kmp_indirect_lock_t
*ilk
= *((kmp_indirect_lock_t
**)lk
);
1586 if (__kmp_env_consistency_check
) {
1587 __kmp_push_sync(global_tid
, ct_critical
, loc
, lck
,
1588 __kmp_map_hint_to_lock(hint
));
1591 __kmp_itt_critical_acquiring(lck
);
1593 #if OMPT_SUPPORT && OMPT_OPTIONAL
1594 if (ompt_enabled
.enabled
) {
1595 ti
= __kmp_threads
[global_tid
]->th
.ompt_thread_info
;
1596 /* OMPT state update */
1597 prev_state
= ti
.state
;
1598 ti
.wait_id
= (ompt_wait_id_t
)(uintptr_t)lck
;
1599 ti
.state
= ompt_state_wait_critical
;
1601 /* OMPT event callback */
1602 if (ompt_enabled
.ompt_callback_mutex_acquire
) {
1603 ompt_callbacks
.ompt_callback(ompt_callback_mutex_acquire
)(
1604 ompt_mutex_critical
, (unsigned int)hint
,
1605 __ompt_get_mutex_impl_type(0, ilk
), (ompt_wait_id_t
)(uintptr_t)lck
,
1610 KMP_I_LOCK_FUNC(ilk
, set
)(lck
, global_tid
);
1612 KMP_POP_PARTITIONED_TIMER();
1615 __kmp_itt_critical_acquired(lck
);
1616 #endif /* USE_ITT_BUILD */
1617 #if OMPT_SUPPORT && OMPT_OPTIONAL
1618 if (ompt_enabled
.enabled
) {
1619 /* OMPT state update */
1620 ti
.state
= prev_state
;
1623 /* OMPT event callback */
1624 if (ompt_enabled
.ompt_callback_mutex_acquired
) {
1625 ompt_callbacks
.ompt_callback(ompt_callback_mutex_acquired
)(
1626 ompt_mutex_critical
, (ompt_wait_id_t
)(uintptr_t)lck
, codeptr
);
1631 KMP_PUSH_PARTITIONED_TIMER(OMP_critical
);
1632 KA_TRACE(15, ("__kmpc_critical: done T#%d\n", global_tid
));
1633 } // __kmpc_critical_with_hint
1635 #endif // KMP_USE_DYNAMIC_LOCK
1638 @ingroup WORK_SHARING
1639 @param loc source location information.
1640 @param global_tid global thread number .
1641 @param crit identity of the critical section. This could be a pointer to a lock
1642 associated with the critical section, or some other suitably unique value.
1644 Leave a critical section, releasing any lock that was held during its execution.
1646 void __kmpc_end_critical(ident_t
*loc
, kmp_int32 global_tid
,
1647 kmp_critical_name
*crit
) {
1648 kmp_user_lock_p lck
;
1650 KC_TRACE(10, ("__kmpc_end_critical: called T#%d\n", global_tid
));
1652 #if KMP_USE_DYNAMIC_LOCK
1653 int locktag
= KMP_EXTRACT_D_TAG(crit
);
1655 lck
= (kmp_user_lock_p
)crit
;
1656 KMP_ASSERT(lck
!= NULL
);
1657 if (__kmp_env_consistency_check
) {
1658 __kmp_pop_sync(global_tid
, ct_critical
, loc
);
1661 __kmp_itt_critical_releasing(lck
);
1663 #if KMP_USE_INLINED_TAS
1664 if (locktag
== locktag_tas
&& !__kmp_env_consistency_check
) {
1665 KMP_RELEASE_TAS_LOCK(lck
, global_tid
);
1667 #elif KMP_USE_INLINED_FUTEX
1668 if (locktag
== locktag_futex
&& !__kmp_env_consistency_check
) {
1669 KMP_RELEASE_FUTEX_LOCK(lck
, global_tid
);
1673 KMP_D_LOCK_FUNC(lck
, unset
)((kmp_dyna_lock_t
*)lck
, global_tid
);
1676 kmp_indirect_lock_t
*ilk
=
1677 (kmp_indirect_lock_t
*)TCR_PTR(*((kmp_indirect_lock_t
**)crit
));
1678 KMP_ASSERT(ilk
!= NULL
);
1680 if (__kmp_env_consistency_check
) {
1681 __kmp_pop_sync(global_tid
, ct_critical
, loc
);
1684 __kmp_itt_critical_releasing(lck
);
1686 KMP_I_LOCK_FUNC(ilk
, unset
)(lck
, global_tid
);
1689 #else // KMP_USE_DYNAMIC_LOCK
1691 if ((__kmp_user_lock_kind
== lk_tas
) &&
1692 (sizeof(lck
->tas
.lk
.poll
) <= OMP_CRITICAL_SIZE
)) {
1693 lck
= (kmp_user_lock_p
)crit
;
1696 else if ((__kmp_user_lock_kind
== lk_futex
) &&
1697 (sizeof(lck
->futex
.lk
.poll
) <= OMP_CRITICAL_SIZE
)) {
1698 lck
= (kmp_user_lock_p
)crit
;
1701 else { // ticket, queuing or drdpa
1702 lck
= (kmp_user_lock_p
)TCR_PTR(*((kmp_user_lock_p
*)crit
));
1705 KMP_ASSERT(lck
!= NULL
);
1707 if (__kmp_env_consistency_check
)
1708 __kmp_pop_sync(global_tid
, ct_critical
, loc
);
1711 __kmp_itt_critical_releasing(lck
);
1712 #endif /* USE_ITT_BUILD */
1713 // Value of 'crit' should be good for using as a critical_id of the critical
1714 // section directive.
1715 __kmp_release_user_lock_with_checks(lck
, global_tid
);
1717 #endif // KMP_USE_DYNAMIC_LOCK
1719 #if OMPT_SUPPORT && OMPT_OPTIONAL
1720 /* OMPT release event triggers after lock is released; place here to trigger
1721 * for all #if branches */
1722 OMPT_STORE_RETURN_ADDRESS(global_tid
);
1723 if (ompt_enabled
.ompt_callback_mutex_released
) {
1724 ompt_callbacks
.ompt_callback(ompt_callback_mutex_released
)(
1725 ompt_mutex_critical
, (ompt_wait_id_t
)(uintptr_t)lck
,
1726 OMPT_LOAD_RETURN_ADDRESS(0));
1730 KMP_POP_PARTITIONED_TIMER();
1731 KA_TRACE(15, ("__kmpc_end_critical: done T#%d\n", global_tid
));
1735 @ingroup SYNCHRONIZATION
1736 @param loc source location information
1737 @param global_tid thread id.
1738 @return one if the thread should execute the master block, zero otherwise
1740 Start execution of a combined barrier and master. The barrier is executed inside
1743 kmp_int32
__kmpc_barrier_master(ident_t
*loc
, kmp_int32 global_tid
) {
1745 KC_TRACE(10, ("__kmpc_barrier_master: called T#%d\n", global_tid
));
1746 __kmp_assert_valid_gtid(global_tid
);
1748 if (!TCR_4(__kmp_init_parallel
))
1749 __kmp_parallel_initialize();
1751 __kmp_resume_if_soft_paused();
1753 if (__kmp_env_consistency_check
)
1754 __kmp_check_barrier(global_tid
, ct_barrier
, loc
);
1757 ompt_frame_t
*ompt_frame
;
1758 if (ompt_enabled
.enabled
) {
1759 __ompt_get_task_info_internal(0, NULL
, NULL
, &ompt_frame
, NULL
, NULL
);
1760 if (ompt_frame
->enter_frame
.ptr
== NULL
)
1761 ompt_frame
->enter_frame
.ptr
= OMPT_GET_FRAME_ADDRESS(0);
1763 OMPT_STORE_RETURN_ADDRESS(global_tid
);
1766 __kmp_threads
[global_tid
]->th
.th_ident
= loc
;
1768 status
= __kmp_barrier(bs_plain_barrier
, global_tid
, TRUE
, 0, NULL
, NULL
);
1769 #if OMPT_SUPPORT && OMPT_OPTIONAL
1770 if (ompt_enabled
.enabled
) {
1771 ompt_frame
->enter_frame
= ompt_data_none
;
1775 return (status
!= 0) ? 0 : 1;
1779 @ingroup SYNCHRONIZATION
1780 @param loc source location information
1781 @param global_tid thread id.
1783 Complete the execution of a combined barrier and master. This function should
1784 only be called at the completion of the <tt>master</tt> code. Other threads will
1785 still be waiting at the barrier and this call releases them.
1787 void __kmpc_end_barrier_master(ident_t
*loc
, kmp_int32 global_tid
) {
1788 KC_TRACE(10, ("__kmpc_end_barrier_master: called T#%d\n", global_tid
));
1789 __kmp_assert_valid_gtid(global_tid
);
1790 __kmp_end_split_barrier(bs_plain_barrier
, global_tid
);
1794 @ingroup SYNCHRONIZATION
1795 @param loc source location information
1796 @param global_tid thread id.
1797 @return one if the thread should execute the master block, zero otherwise
1799 Start execution of a combined barrier and master(nowait) construct.
1800 The barrier is executed inside this function.
1801 There is no equivalent "end" function, since the
1803 kmp_int32
__kmpc_barrier_master_nowait(ident_t
*loc
, kmp_int32 global_tid
) {
1805 KC_TRACE(10, ("__kmpc_barrier_master_nowait: called T#%d\n", global_tid
));
1806 __kmp_assert_valid_gtid(global_tid
);
1808 if (!TCR_4(__kmp_init_parallel
))
1809 __kmp_parallel_initialize();
1811 __kmp_resume_if_soft_paused();
1813 if (__kmp_env_consistency_check
) {
1815 KMP_WARNING(ConstructIdentInvalid
); // ??? What does it mean for the user?
1817 __kmp_check_barrier(global_tid
, ct_barrier
, loc
);
1821 ompt_frame_t
*ompt_frame
;
1822 if (ompt_enabled
.enabled
) {
1823 __ompt_get_task_info_internal(0, NULL
, NULL
, &ompt_frame
, NULL
, NULL
);
1824 if (ompt_frame
->enter_frame
.ptr
== NULL
)
1825 ompt_frame
->enter_frame
.ptr
= OMPT_GET_FRAME_ADDRESS(0);
1827 OMPT_STORE_RETURN_ADDRESS(global_tid
);
1830 __kmp_threads
[global_tid
]->th
.th_ident
= loc
;
1832 __kmp_barrier(bs_plain_barrier
, global_tid
, FALSE
, 0, NULL
, NULL
);
1833 #if OMPT_SUPPORT && OMPT_OPTIONAL
1834 if (ompt_enabled
.enabled
) {
1835 ompt_frame
->enter_frame
= ompt_data_none
;
1839 ret
= __kmpc_master(loc
, global_tid
);
1841 if (__kmp_env_consistency_check
) {
1842 /* there's no __kmpc_end_master called; so the (stats) */
1843 /* actions of __kmpc_end_master are done here */
1845 /* only one thread should do the pop since only */
1846 /* one did the push (see __kmpc_master()) */
1847 __kmp_pop_sync(global_tid
, ct_master
, loc
);
1854 /* The BARRIER for a SINGLE process section is always explicit */
1856 @ingroup WORK_SHARING
1857 @param loc source location information
1858 @param global_tid global thread number
1859 @return One if this thread should execute the single construct, zero otherwise.
1861 Test whether to execute a <tt>single</tt> construct.
1862 There are no implicit barriers in the two "single" calls, rather the compiler
1863 should introduce an explicit barrier if it is required.
1866 kmp_int32
__kmpc_single(ident_t
*loc
, kmp_int32 global_tid
) {
1867 __kmp_assert_valid_gtid(global_tid
);
1868 kmp_int32 rc
= __kmp_enter_single(global_tid
, loc
, TRUE
);
1871 // We are going to execute the single statement, so we should count it.
1872 KMP_COUNT_BLOCK(OMP_SINGLE
);
1873 KMP_PUSH_PARTITIONED_TIMER(OMP_single
);
1876 #if OMPT_SUPPORT && OMPT_OPTIONAL
1877 kmp_info_t
*this_thr
= __kmp_threads
[global_tid
];
1878 kmp_team_t
*team
= this_thr
->th
.th_team
;
1879 int tid
= __kmp_tid_from_gtid(global_tid
);
1881 if (ompt_enabled
.enabled
) {
1883 if (ompt_enabled
.ompt_callback_work
) {
1884 ompt_callbacks
.ompt_callback(ompt_callback_work
)(
1885 ompt_work_single_executor
, ompt_scope_begin
,
1886 &(team
->t
.ompt_team_info
.parallel_data
),
1887 &(team
->t
.t_implicit_task_taskdata
[tid
].ompt_task_info
.task_data
),
1888 1, OMPT_GET_RETURN_ADDRESS(0));
1891 if (ompt_enabled
.ompt_callback_work
) {
1892 ompt_callbacks
.ompt_callback(ompt_callback_work
)(
1893 ompt_work_single_other
, ompt_scope_begin
,
1894 &(team
->t
.ompt_team_info
.parallel_data
),
1895 &(team
->t
.t_implicit_task_taskdata
[tid
].ompt_task_info
.task_data
),
1896 1, OMPT_GET_RETURN_ADDRESS(0));
1897 ompt_callbacks
.ompt_callback(ompt_callback_work
)(
1898 ompt_work_single_other
, ompt_scope_end
,
1899 &(team
->t
.ompt_team_info
.parallel_data
),
1900 &(team
->t
.t_implicit_task_taskdata
[tid
].ompt_task_info
.task_data
),
1901 1, OMPT_GET_RETURN_ADDRESS(0));
1911 @ingroup WORK_SHARING
1912 @param loc source location information
1913 @param global_tid global thread number
1915 Mark the end of a <tt>single</tt> construct. This function should
1916 only be called by the thread that executed the block of code protected
1917 by the `single` construct.
1919 void __kmpc_end_single(ident_t
*loc
, kmp_int32 global_tid
) {
1920 __kmp_assert_valid_gtid(global_tid
);
1921 __kmp_exit_single(global_tid
);
1922 KMP_POP_PARTITIONED_TIMER();
1924 #if OMPT_SUPPORT && OMPT_OPTIONAL
1925 kmp_info_t
*this_thr
= __kmp_threads
[global_tid
];
1926 kmp_team_t
*team
= this_thr
->th
.th_team
;
1927 int tid
= __kmp_tid_from_gtid(global_tid
);
1929 if (ompt_enabled
.ompt_callback_work
) {
1930 ompt_callbacks
.ompt_callback(ompt_callback_work
)(
1931 ompt_work_single_executor
, ompt_scope_end
,
1932 &(team
->t
.ompt_team_info
.parallel_data
),
1933 &(team
->t
.t_implicit_task_taskdata
[tid
].ompt_task_info
.task_data
), 1,
1934 OMPT_GET_RETURN_ADDRESS(0));
1940 @ingroup WORK_SHARING
1941 @param loc Source location
1942 @param global_tid Global thread id
1944 Mark the end of a statically scheduled loop.
1946 void __kmpc_for_static_fini(ident_t
*loc
, kmp_int32 global_tid
) {
1947 KMP_POP_PARTITIONED_TIMER();
1948 KE_TRACE(10, ("__kmpc_for_static_fini called T#%d\n", global_tid
));
1950 #if OMPT_SUPPORT && OMPT_OPTIONAL
1951 if (ompt_enabled
.ompt_callback_work
) {
1952 ompt_work_t ompt_work_type
= ompt_work_loop
;
1953 ompt_team_info_t
*team_info
= __ompt_get_teaminfo(0, NULL
);
1954 ompt_task_info_t
*task_info
= __ompt_get_task_info_object(0);
1955 // Determine workshare type
1957 if ((loc
->flags
& KMP_IDENT_WORK_LOOP
) != 0) {
1958 ompt_work_type
= ompt_work_loop
;
1959 } else if ((loc
->flags
& KMP_IDENT_WORK_SECTIONS
) != 0) {
1960 ompt_work_type
= ompt_work_sections
;
1961 } else if ((loc
->flags
& KMP_IDENT_WORK_DISTRIBUTE
) != 0) {
1962 ompt_work_type
= ompt_work_distribute
;
1964 // use default set above.
1965 // a warning about this case is provided in __kmpc_for_static_init
1967 KMP_DEBUG_ASSERT(ompt_work_type
);
1969 ompt_callbacks
.ompt_callback(ompt_callback_work
)(
1970 ompt_work_type
, ompt_scope_end
, &(team_info
->parallel_data
),
1971 &(task_info
->task_data
), 0, OMPT_GET_RETURN_ADDRESS(0));
1974 if (__kmp_env_consistency_check
)
1975 __kmp_pop_workshare(global_tid
, ct_pdo
, loc
);
1978 // User routines which take C-style arguments (call by value)
1979 // different from the Fortran equivalent routines
1981 void ompc_set_num_threads(int arg
) {
1982 // !!!!! TODO: check the per-task binding
1983 __kmp_set_num_threads(arg
, __kmp_entry_gtid());
1986 void ompc_set_dynamic(int flag
) {
1989 /* For the thread-private implementation of the internal controls */
1990 thread
= __kmp_entry_thread();
1992 __kmp_save_internal_controls(thread
);
1994 set__dynamic(thread
, flag
? true : false);
1997 void ompc_set_nested(int flag
) {
2000 /* For the thread-private internal controls implementation */
2001 thread
= __kmp_entry_thread();
2003 __kmp_save_internal_controls(thread
);
2005 set__max_active_levels(thread
, flag
? __kmp_dflt_max_active_levels
: 1);
2008 void ompc_set_max_active_levels(int max_active_levels
) {
2010 /* we want per-task implementation of this internal control */
2012 /* For the per-thread internal controls implementation */
2013 __kmp_set_max_active_levels(__kmp_entry_gtid(), max_active_levels
);
2016 void ompc_set_schedule(omp_sched_t kind
, int modifier
) {
2017 // !!!!! TODO: check the per-task binding
2018 __kmp_set_schedule(__kmp_entry_gtid(), (kmp_sched_t
)kind
, modifier
);
2021 int ompc_get_ancestor_thread_num(int level
) {
2022 return __kmp_get_ancestor_thread_num(__kmp_entry_gtid(), level
);
2025 int ompc_get_team_size(int level
) {
2026 return __kmp_get_team_size(__kmp_entry_gtid(), level
);
2029 /* OpenMP 5.0 Affinity Format API */
2030 void KMP_EXPAND_NAME(ompc_set_affinity_format
)(char const *format
) {
2031 if (!__kmp_init_serial
) {
2032 __kmp_serial_initialize();
2034 __kmp_strncpy_truncate(__kmp_affinity_format
, KMP_AFFINITY_FORMAT_SIZE
,
2035 format
, KMP_STRLEN(format
) + 1);
2038 size_t KMP_EXPAND_NAME(ompc_get_affinity_format
)(char *buffer
, size_t size
) {
2040 if (!__kmp_init_serial
) {
2041 __kmp_serial_initialize();
2043 format_size
= KMP_STRLEN(__kmp_affinity_format
);
2044 if (buffer
&& size
) {
2045 __kmp_strncpy_truncate(buffer
, size
, __kmp_affinity_format
,
2051 void KMP_EXPAND_NAME(ompc_display_affinity
)(char const *format
) {
2053 if (!TCR_4(__kmp_init_middle
)) {
2054 __kmp_middle_initialize();
2056 __kmp_assign_root_init_mask();
2057 gtid
= __kmp_get_gtid();
2058 #if KMP_AFFINITY_SUPPORTED
2059 if (__kmp_threads
[gtid
]->th
.th_team
->t
.t_level
== 0 &&
2060 __kmp_affinity
.flags
.reset
) {
2061 __kmp_reset_root_init_mask(gtid
);
2064 __kmp_aux_display_affinity(gtid
, format
);
2067 size_t KMP_EXPAND_NAME(ompc_capture_affinity
)(char *buffer
, size_t buf_size
,
2068 char const *format
) {
2070 size_t num_required
;
2071 kmp_str_buf_t capture_buf
;
2072 if (!TCR_4(__kmp_init_middle
)) {
2073 __kmp_middle_initialize();
2075 __kmp_assign_root_init_mask();
2076 gtid
= __kmp_get_gtid();
2077 #if KMP_AFFINITY_SUPPORTED
2078 if (__kmp_threads
[gtid
]->th
.th_team
->t
.t_level
== 0 &&
2079 __kmp_affinity
.flags
.reset
) {
2080 __kmp_reset_root_init_mask(gtid
);
2083 __kmp_str_buf_init(&capture_buf
);
2084 num_required
= __kmp_aux_capture_affinity(gtid
, format
, &capture_buf
);
2085 if (buffer
&& buf_size
) {
2086 __kmp_strncpy_truncate(buffer
, buf_size
, capture_buf
.str
,
2087 capture_buf
.used
+ 1);
2089 __kmp_str_buf_free(&capture_buf
);
2090 return num_required
;
2093 void kmpc_set_stacksize(int arg
) {
2094 // __kmp_aux_set_stacksize initializes the library if needed
2095 __kmp_aux_set_stacksize(arg
);
2098 void kmpc_set_stacksize_s(size_t arg
) {
2099 // __kmp_aux_set_stacksize initializes the library if needed
2100 __kmp_aux_set_stacksize(arg
);
2103 void kmpc_set_blocktime(int arg
) {
2104 int gtid
, tid
, bt
= arg
;
2107 gtid
= __kmp_entry_gtid();
2108 tid
= __kmp_tid_from_gtid(gtid
);
2109 thread
= __kmp_thread_from_gtid(gtid
);
2111 __kmp_aux_convert_blocktime(&bt
);
2112 __kmp_aux_set_blocktime(bt
, thread
, tid
);
2115 void kmpc_set_library(int arg
) {
2116 // __kmp_user_set_library initializes the library if needed
2117 __kmp_user_set_library((enum library_type
)arg
);
2120 void kmpc_set_defaults(char const *str
) {
2121 // __kmp_aux_set_defaults initializes the library if needed
2122 __kmp_aux_set_defaults(str
, KMP_STRLEN(str
));
2125 void kmpc_set_disp_num_buffers(int arg
) {
2126 // ignore after initialization because some teams have already
2127 // allocated dispatch buffers
2128 if (__kmp_init_serial
== FALSE
&& arg
>= KMP_MIN_DISP_NUM_BUFF
&&
2129 arg
<= KMP_MAX_DISP_NUM_BUFF
) {
2130 __kmp_dispatch_num_buffers
= arg
;
2134 int kmpc_set_affinity_mask_proc(int proc
, void **mask
) {
2135 #if defined(KMP_STUB) || !KMP_AFFINITY_SUPPORTED
2138 if (!TCR_4(__kmp_init_middle
)) {
2139 __kmp_middle_initialize();
2141 __kmp_assign_root_init_mask();
2142 return __kmp_aux_set_affinity_mask_proc(proc
, mask
);
2146 int kmpc_unset_affinity_mask_proc(int proc
, void **mask
) {
2147 #if defined(KMP_STUB) || !KMP_AFFINITY_SUPPORTED
2150 if (!TCR_4(__kmp_init_middle
)) {
2151 __kmp_middle_initialize();
2153 __kmp_assign_root_init_mask();
2154 return __kmp_aux_unset_affinity_mask_proc(proc
, mask
);
2158 int kmpc_get_affinity_mask_proc(int proc
, void **mask
) {
2159 #if defined(KMP_STUB) || !KMP_AFFINITY_SUPPORTED
2162 if (!TCR_4(__kmp_init_middle
)) {
2163 __kmp_middle_initialize();
2165 __kmp_assign_root_init_mask();
2166 return __kmp_aux_get_affinity_mask_proc(proc
, mask
);
2170 /* -------------------------------------------------------------------------- */
2172 @ingroup THREADPRIVATE
2173 @param loc source location information
2174 @param gtid global thread number
2175 @param cpy_size size of the cpy_data buffer
2176 @param cpy_data pointer to data to be copied
2177 @param cpy_func helper function to call for copying data
2178 @param didit flag variable: 1=single thread; 0=not single thread
2180 __kmpc_copyprivate implements the interface for the private data broadcast
2181 needed for the copyprivate clause associated with a single region in an
2182 OpenMP<sup>*</sup> program (both C and Fortran).
2183 All threads participating in the parallel region call this routine.
2184 One of the threads (called the single thread) should have the <tt>didit</tt>
2185 variable set to 1 and all other threads should have that variable set to 0.
2186 All threads pass a pointer to a data buffer (cpy_data) that they have built.
2188 The OpenMP specification forbids the use of nowait on the single region when a
2189 copyprivate clause is present. However, @ref __kmpc_copyprivate implements a
2190 barrier internally to avoid race conditions, so the code generation for the
2191 single region should avoid generating a barrier after the call to @ref
2194 The <tt>gtid</tt> parameter is the global thread id for the current thread.
2195 The <tt>loc</tt> parameter is a pointer to source location information.
2197 Internal implementation: The single thread will first copy its descriptor
2198 address (cpy_data) to a team-private location, then the other threads will each
2199 call the function pointed to by the parameter cpy_func, which carries out the
2200 copy by copying the data using the cpy_data buffer.
2202 The cpy_func routine used for the copy and the contents of the data area defined
2203 by cpy_data and cpy_size may be built in any fashion that will allow the copy
2204 to be done. For instance, the cpy_data buffer can hold the actual data to be
2205 copied or it may hold a list of pointers to the data. The cpy_func routine must
2206 interpret the cpy_data buffer appropriately.
2208 The interface to cpy_func is as follows:
2210 void cpy_func( void *destination, void *source )
2212 where void *destination is the cpy_data pointer for the thread being copied to
2213 and void *source is the cpy_data pointer for the thread being copied from.
2215 void __kmpc_copyprivate(ident_t
*loc
, kmp_int32 gtid
, size_t cpy_size
,
2216 void *cpy_data
, void (*cpy_func
)(void *, void *),
2219 KC_TRACE(10, ("__kmpc_copyprivate: called T#%d\n", gtid
));
2220 __kmp_assert_valid_gtid(gtid
);
2224 data_ptr
= &__kmp_team_from_gtid(gtid
)->t
.t_copypriv_data
;
2226 if (__kmp_env_consistency_check
) {
2228 KMP_WARNING(ConstructIdentInvalid
);
2232 // ToDo: Optimize the following two barriers into some kind of split barrier
2235 *data_ptr
= cpy_data
;
2238 ompt_frame_t
*ompt_frame
;
2239 if (ompt_enabled
.enabled
) {
2240 __ompt_get_task_info_internal(0, NULL
, NULL
, &ompt_frame
, NULL
, NULL
);
2241 if (ompt_frame
->enter_frame
.ptr
== NULL
)
2242 ompt_frame
->enter_frame
.ptr
= OMPT_GET_FRAME_ADDRESS(0);
2244 OMPT_STORE_RETURN_ADDRESS(gtid
);
2246 /* This barrier is not a barrier region boundary */
2248 __kmp_threads
[gtid
]->th
.th_ident
= loc
;
2250 __kmp_barrier(bs_plain_barrier
, gtid
, FALSE
, 0, NULL
, NULL
);
2253 (*cpy_func
)(cpy_data
, *data_ptr
);
2255 // Consider next barrier a user-visible barrier for barrier region boundaries
2256 // Nesting checks are already handled by the single construct checks
2259 OMPT_STORE_RETURN_ADDRESS(gtid
);
2262 __kmp_threads
[gtid
]->th
.th_ident
= loc
; // TODO: check if it is needed (e.g.
2263 // tasks can overwrite the location)
2265 __kmp_barrier(bs_plain_barrier
, gtid
, FALSE
, 0, NULL
, NULL
);
2266 #if OMPT_SUPPORT && OMPT_OPTIONAL
2267 if (ompt_enabled
.enabled
) {
2268 ompt_frame
->enter_frame
= ompt_data_none
;
2274 /* --------------------------------------------------------------------------*/
2276 @ingroup THREADPRIVATE
2277 @param loc source location information
2278 @param gtid global thread number
2279 @param cpy_data pointer to the data to be saved/copied or 0
2280 @return the saved pointer to the data
2282 __kmpc_copyprivate_light is a lighter version of __kmpc_copyprivate:
2283 __kmpc_copyprivate_light only saves the pointer it's given (if it's not 0, so
2284 coming from single), and returns that pointer in all calls (for single thread
2285 it's not needed). This version doesn't do any actual data copying. Data copying
2286 has to be done somewhere else, e.g. inline in the generated code. Due to this,
2287 this function doesn't have any barrier at the end of the function, like
2288 __kmpc_copyprivate does, so generated code needs barrier after copying of all
2291 void *__kmpc_copyprivate_light(ident_t
*loc
, kmp_int32 gtid
, void *cpy_data
) {
2294 KC_TRACE(10, ("__kmpc_copyprivate_light: called T#%d\n", gtid
));
2298 data_ptr
= &__kmp_team_from_gtid(gtid
)->t
.t_copypriv_data
;
2300 if (__kmp_env_consistency_check
) {
2302 KMP_WARNING(ConstructIdentInvalid
);
2306 // ToDo: Optimize the following barrier
2309 *data_ptr
= cpy_data
;
2312 ompt_frame_t
*ompt_frame
;
2313 if (ompt_enabled
.enabled
) {
2314 __ompt_get_task_info_internal(0, NULL
, NULL
, &ompt_frame
, NULL
, NULL
);
2315 if (ompt_frame
->enter_frame
.ptr
== NULL
)
2316 ompt_frame
->enter_frame
.ptr
= OMPT_GET_FRAME_ADDRESS(0);
2317 OMPT_STORE_RETURN_ADDRESS(gtid
);
2320 /* This barrier is not a barrier region boundary */
2322 __kmp_threads
[gtid
]->th
.th_ident
= loc
;
2324 __kmp_barrier(bs_plain_barrier
, gtid
, FALSE
, 0, NULL
, NULL
);
2329 /* -------------------------------------------------------------------------- */
2331 #define INIT_LOCK __kmp_init_user_lock_with_checks
2332 #define INIT_NESTED_LOCK __kmp_init_nested_user_lock_with_checks
2333 #define ACQUIRE_LOCK __kmp_acquire_user_lock_with_checks
2334 #define ACQUIRE_LOCK_TIMED __kmp_acquire_user_lock_with_checks_timed
2335 #define ACQUIRE_NESTED_LOCK __kmp_acquire_nested_user_lock_with_checks
2336 #define ACQUIRE_NESTED_LOCK_TIMED \
2337 __kmp_acquire_nested_user_lock_with_checks_timed
2338 #define RELEASE_LOCK __kmp_release_user_lock_with_checks
2339 #define RELEASE_NESTED_LOCK __kmp_release_nested_user_lock_with_checks
2340 #define TEST_LOCK __kmp_test_user_lock_with_checks
2341 #define TEST_NESTED_LOCK __kmp_test_nested_user_lock_with_checks
2342 #define DESTROY_LOCK __kmp_destroy_user_lock_with_checks
2343 #define DESTROY_NESTED_LOCK __kmp_destroy_nested_user_lock_with_checks
2345 // TODO: Make check abort messages use location info & pass it into
2346 // with_checks routines
2348 #if KMP_USE_DYNAMIC_LOCK
2350 // internal lock initializer
2351 static __forceinline
void __kmp_init_lock_with_hint(ident_t
*loc
, void **lock
,
2352 kmp_dyna_lockseq_t seq
) {
2353 if (KMP_IS_D_LOCK(seq
)) {
2354 KMP_INIT_D_LOCK(lock
, seq
);
2356 __kmp_itt_lock_creating((kmp_user_lock_p
)lock
, NULL
);
2359 KMP_INIT_I_LOCK(lock
, seq
);
2361 kmp_indirect_lock_t
*ilk
= KMP_LOOKUP_I_LOCK(lock
);
2362 __kmp_itt_lock_creating(ilk
->lock
, loc
);
2367 // internal nest lock initializer
2368 static __forceinline
void
2369 __kmp_init_nest_lock_with_hint(ident_t
*loc
, void **lock
,
2370 kmp_dyna_lockseq_t seq
) {
2372 // Don't have nested lock implementation for speculative locks
2373 if (seq
== lockseq_hle
|| seq
== lockseq_rtm_queuing
||
2374 seq
== lockseq_rtm_spin
|| seq
== lockseq_adaptive
)
2375 seq
= __kmp_user_lock_seq
;
2379 seq
= lockseq_nested_tas
;
2383 seq
= lockseq_nested_futex
;
2386 case lockseq_ticket
:
2387 seq
= lockseq_nested_ticket
;
2389 case lockseq_queuing
:
2390 seq
= lockseq_nested_queuing
;
2393 seq
= lockseq_nested_drdpa
;
2396 seq
= lockseq_nested_queuing
;
2398 KMP_INIT_I_LOCK(lock
, seq
);
2400 kmp_indirect_lock_t
*ilk
= KMP_LOOKUP_I_LOCK(lock
);
2401 __kmp_itt_lock_creating(ilk
->lock
, loc
);
2405 /* initialize the lock with a hint */
2406 void __kmpc_init_lock_with_hint(ident_t
*loc
, kmp_int32 gtid
, void **user_lock
,
2408 KMP_DEBUG_ASSERT(__kmp_init_serial
);
2409 if (__kmp_env_consistency_check
&& user_lock
== NULL
) {
2410 KMP_FATAL(LockIsUninitialized
, "omp_init_lock_with_hint");
2413 __kmp_init_lock_with_hint(loc
, user_lock
, __kmp_map_hint_to_lock(hint
));
2415 #if OMPT_SUPPORT && OMPT_OPTIONAL
2416 // This is the case, if called from omp_init_lock_with_hint:
2417 void *codeptr
= OMPT_LOAD_RETURN_ADDRESS(gtid
);
2419 codeptr
= OMPT_GET_RETURN_ADDRESS(0);
2420 if (ompt_enabled
.ompt_callback_lock_init
) {
2421 ompt_callbacks
.ompt_callback(ompt_callback_lock_init
)(
2422 ompt_mutex_lock
, (omp_lock_hint_t
)hint
,
2423 __ompt_get_mutex_impl_type(user_lock
),
2424 (ompt_wait_id_t
)(uintptr_t)user_lock
, codeptr
);
2429 /* initialize the lock with a hint */
2430 void __kmpc_init_nest_lock_with_hint(ident_t
*loc
, kmp_int32 gtid
,
2431 void **user_lock
, uintptr_t hint
) {
2432 KMP_DEBUG_ASSERT(__kmp_init_serial
);
2433 if (__kmp_env_consistency_check
&& user_lock
== NULL
) {
2434 KMP_FATAL(LockIsUninitialized
, "omp_init_nest_lock_with_hint");
2437 __kmp_init_nest_lock_with_hint(loc
, user_lock
, __kmp_map_hint_to_lock(hint
));
2439 #if OMPT_SUPPORT && OMPT_OPTIONAL
2440 // This is the case, if called from omp_init_lock_with_hint:
2441 void *codeptr
= OMPT_LOAD_RETURN_ADDRESS(gtid
);
2443 codeptr
= OMPT_GET_RETURN_ADDRESS(0);
2444 if (ompt_enabled
.ompt_callback_lock_init
) {
2445 ompt_callbacks
.ompt_callback(ompt_callback_lock_init
)(
2446 ompt_mutex_nest_lock
, (omp_lock_hint_t
)hint
,
2447 __ompt_get_mutex_impl_type(user_lock
),
2448 (ompt_wait_id_t
)(uintptr_t)user_lock
, codeptr
);
2453 #endif // KMP_USE_DYNAMIC_LOCK
2455 /* initialize the lock */
2456 void __kmpc_init_lock(ident_t
*loc
, kmp_int32 gtid
, void **user_lock
) {
2457 #if KMP_USE_DYNAMIC_LOCK
2459 KMP_DEBUG_ASSERT(__kmp_init_serial
);
2460 if (__kmp_env_consistency_check
&& user_lock
== NULL
) {
2461 KMP_FATAL(LockIsUninitialized
, "omp_init_lock");
2463 __kmp_init_lock_with_hint(loc
, user_lock
, __kmp_user_lock_seq
);
2465 #if OMPT_SUPPORT && OMPT_OPTIONAL
2466 // This is the case, if called from omp_init_lock_with_hint:
2467 void *codeptr
= OMPT_LOAD_RETURN_ADDRESS(gtid
);
2469 codeptr
= OMPT_GET_RETURN_ADDRESS(0);
2470 if (ompt_enabled
.ompt_callback_lock_init
) {
2471 ompt_callbacks
.ompt_callback(ompt_callback_lock_init
)(
2472 ompt_mutex_lock
, omp_lock_hint_none
,
2473 __ompt_get_mutex_impl_type(user_lock
),
2474 (ompt_wait_id_t
)(uintptr_t)user_lock
, codeptr
);
2478 #else // KMP_USE_DYNAMIC_LOCK
2480 static char const *const func
= "omp_init_lock";
2481 kmp_user_lock_p lck
;
2482 KMP_DEBUG_ASSERT(__kmp_init_serial
);
2484 if (__kmp_env_consistency_check
) {
2485 if (user_lock
== NULL
) {
2486 KMP_FATAL(LockIsUninitialized
, func
);
2490 KMP_CHECK_USER_LOCK_INIT();
2492 if ((__kmp_user_lock_kind
== lk_tas
) &&
2493 (sizeof(lck
->tas
.lk
.poll
) <= OMP_LOCK_T_SIZE
)) {
2494 lck
= (kmp_user_lock_p
)user_lock
;
2497 else if ((__kmp_user_lock_kind
== lk_futex
) &&
2498 (sizeof(lck
->futex
.lk
.poll
) <= OMP_LOCK_T_SIZE
)) {
2499 lck
= (kmp_user_lock_p
)user_lock
;
2503 lck
= __kmp_user_lock_allocate(user_lock
, gtid
, 0);
2506 __kmp_set_user_lock_location(lck
, loc
);
2508 #if OMPT_SUPPORT && OMPT_OPTIONAL
2509 // This is the case, if called from omp_init_lock_with_hint:
2510 void *codeptr
= OMPT_LOAD_RETURN_ADDRESS(gtid
);
2512 codeptr
= OMPT_GET_RETURN_ADDRESS(0);
2513 if (ompt_enabled
.ompt_callback_lock_init
) {
2514 ompt_callbacks
.ompt_callback(ompt_callback_lock_init
)(
2515 ompt_mutex_lock
, omp_lock_hint_none
, __ompt_get_mutex_impl_type(),
2516 (ompt_wait_id_t
)(uintptr_t)user_lock
, codeptr
);
2521 __kmp_itt_lock_creating(lck
);
2522 #endif /* USE_ITT_BUILD */
2524 #endif // KMP_USE_DYNAMIC_LOCK
2525 } // __kmpc_init_lock
2527 /* initialize the lock */
2528 void __kmpc_init_nest_lock(ident_t
*loc
, kmp_int32 gtid
, void **user_lock
) {
2529 #if KMP_USE_DYNAMIC_LOCK
2531 KMP_DEBUG_ASSERT(__kmp_init_serial
);
2532 if (__kmp_env_consistency_check
&& user_lock
== NULL
) {
2533 KMP_FATAL(LockIsUninitialized
, "omp_init_nest_lock");
2535 __kmp_init_nest_lock_with_hint(loc
, user_lock
, __kmp_user_lock_seq
);
2537 #if OMPT_SUPPORT && OMPT_OPTIONAL
2538 // This is the case, if called from omp_init_lock_with_hint:
2539 void *codeptr
= OMPT_LOAD_RETURN_ADDRESS(gtid
);
2541 codeptr
= OMPT_GET_RETURN_ADDRESS(0);
2542 if (ompt_enabled
.ompt_callback_lock_init
) {
2543 ompt_callbacks
.ompt_callback(ompt_callback_lock_init
)(
2544 ompt_mutex_nest_lock
, omp_lock_hint_none
,
2545 __ompt_get_mutex_impl_type(user_lock
),
2546 (ompt_wait_id_t
)(uintptr_t)user_lock
, codeptr
);
2550 #else // KMP_USE_DYNAMIC_LOCK
2552 static char const *const func
= "omp_init_nest_lock";
2553 kmp_user_lock_p lck
;
2554 KMP_DEBUG_ASSERT(__kmp_init_serial
);
2556 if (__kmp_env_consistency_check
) {
2557 if (user_lock
== NULL
) {
2558 KMP_FATAL(LockIsUninitialized
, func
);
2562 KMP_CHECK_USER_LOCK_INIT();
2564 if ((__kmp_user_lock_kind
== lk_tas
) &&
2565 (sizeof(lck
->tas
.lk
.poll
) + sizeof(lck
->tas
.lk
.depth_locked
) <=
2566 OMP_NEST_LOCK_T_SIZE
)) {
2567 lck
= (kmp_user_lock_p
)user_lock
;
2570 else if ((__kmp_user_lock_kind
== lk_futex
) &&
2571 (sizeof(lck
->futex
.lk
.poll
) + sizeof(lck
->futex
.lk
.depth_locked
) <=
2572 OMP_NEST_LOCK_T_SIZE
)) {
2573 lck
= (kmp_user_lock_p
)user_lock
;
2577 lck
= __kmp_user_lock_allocate(user_lock
, gtid
, 0);
2580 INIT_NESTED_LOCK(lck
);
2581 __kmp_set_user_lock_location(lck
, loc
);
2583 #if OMPT_SUPPORT && OMPT_OPTIONAL
2584 // This is the case, if called from omp_init_lock_with_hint:
2585 void *codeptr
= OMPT_LOAD_RETURN_ADDRESS(gtid
);
2587 codeptr
= OMPT_GET_RETURN_ADDRESS(0);
2588 if (ompt_enabled
.ompt_callback_lock_init
) {
2589 ompt_callbacks
.ompt_callback(ompt_callback_lock_init
)(
2590 ompt_mutex_nest_lock
, omp_lock_hint_none
, __ompt_get_mutex_impl_type(),
2591 (ompt_wait_id_t
)(uintptr_t)user_lock
, codeptr
);
2596 __kmp_itt_lock_creating(lck
);
2597 #endif /* USE_ITT_BUILD */
2599 #endif // KMP_USE_DYNAMIC_LOCK
2600 } // __kmpc_init_nest_lock
2602 void __kmpc_destroy_lock(ident_t
*loc
, kmp_int32 gtid
, void **user_lock
) {
2603 #if KMP_USE_DYNAMIC_LOCK
2606 kmp_user_lock_p lck
;
2607 if (KMP_EXTRACT_D_TAG(user_lock
) == 0) {
2608 lck
= ((kmp_indirect_lock_t
*)KMP_LOOKUP_I_LOCK(user_lock
))->lock
;
2610 lck
= (kmp_user_lock_p
)user_lock
;
2612 __kmp_itt_lock_destroyed(lck
);
2614 #if OMPT_SUPPORT && OMPT_OPTIONAL
2615 // This is the case, if called from omp_init_lock_with_hint:
2616 void *codeptr
= OMPT_LOAD_RETURN_ADDRESS(gtid
);
2618 codeptr
= OMPT_GET_RETURN_ADDRESS(0);
2619 if (ompt_enabled
.ompt_callback_lock_destroy
) {
2620 ompt_callbacks
.ompt_callback(ompt_callback_lock_destroy
)(
2621 ompt_mutex_lock
, (ompt_wait_id_t
)(uintptr_t)user_lock
, codeptr
);
2624 KMP_D_LOCK_FUNC(user_lock
, destroy
)((kmp_dyna_lock_t
*)user_lock
);
2626 kmp_user_lock_p lck
;
2628 if ((__kmp_user_lock_kind
== lk_tas
) &&
2629 (sizeof(lck
->tas
.lk
.poll
) <= OMP_LOCK_T_SIZE
)) {
2630 lck
= (kmp_user_lock_p
)user_lock
;
2633 else if ((__kmp_user_lock_kind
== lk_futex
) &&
2634 (sizeof(lck
->futex
.lk
.poll
) <= OMP_LOCK_T_SIZE
)) {
2635 lck
= (kmp_user_lock_p
)user_lock
;
2639 lck
= __kmp_lookup_user_lock(user_lock
, "omp_destroy_lock");
2642 #if OMPT_SUPPORT && OMPT_OPTIONAL
2643 // This is the case, if called from omp_init_lock_with_hint:
2644 void *codeptr
= OMPT_LOAD_RETURN_ADDRESS(gtid
);
2646 codeptr
= OMPT_GET_RETURN_ADDRESS(0);
2647 if (ompt_enabled
.ompt_callback_lock_destroy
) {
2648 ompt_callbacks
.ompt_callback(ompt_callback_lock_destroy
)(
2649 ompt_mutex_lock
, (ompt_wait_id_t
)(uintptr_t)user_lock
, codeptr
);
2654 __kmp_itt_lock_destroyed(lck
);
2655 #endif /* USE_ITT_BUILD */
2658 if ((__kmp_user_lock_kind
== lk_tas
) &&
2659 (sizeof(lck
->tas
.lk
.poll
) <= OMP_LOCK_T_SIZE
)) {
2663 else if ((__kmp_user_lock_kind
== lk_futex
) &&
2664 (sizeof(lck
->futex
.lk
.poll
) <= OMP_LOCK_T_SIZE
)) {
2669 __kmp_user_lock_free(user_lock
, gtid
, lck
);
2671 #endif // KMP_USE_DYNAMIC_LOCK
2672 } // __kmpc_destroy_lock
2674 /* destroy the lock */
2675 void __kmpc_destroy_nest_lock(ident_t
*loc
, kmp_int32 gtid
, void **user_lock
) {
2676 #if KMP_USE_DYNAMIC_LOCK
2679 kmp_indirect_lock_t
*ilk
= KMP_LOOKUP_I_LOCK(user_lock
);
2680 __kmp_itt_lock_destroyed(ilk
->lock
);
2682 #if OMPT_SUPPORT && OMPT_OPTIONAL
2683 // This is the case, if called from omp_init_lock_with_hint:
2684 void *codeptr
= OMPT_LOAD_RETURN_ADDRESS(gtid
);
2686 codeptr
= OMPT_GET_RETURN_ADDRESS(0);
2687 if (ompt_enabled
.ompt_callback_lock_destroy
) {
2688 ompt_callbacks
.ompt_callback(ompt_callback_lock_destroy
)(
2689 ompt_mutex_nest_lock
, (ompt_wait_id_t
)(uintptr_t)user_lock
, codeptr
);
2692 KMP_D_LOCK_FUNC(user_lock
, destroy
)((kmp_dyna_lock_t
*)user_lock
);
2694 #else // KMP_USE_DYNAMIC_LOCK
2696 kmp_user_lock_p lck
;
2698 if ((__kmp_user_lock_kind
== lk_tas
) &&
2699 (sizeof(lck
->tas
.lk
.poll
) + sizeof(lck
->tas
.lk
.depth_locked
) <=
2700 OMP_NEST_LOCK_T_SIZE
)) {
2701 lck
= (kmp_user_lock_p
)user_lock
;
2704 else if ((__kmp_user_lock_kind
== lk_futex
) &&
2705 (sizeof(lck
->futex
.lk
.poll
) + sizeof(lck
->futex
.lk
.depth_locked
) <=
2706 OMP_NEST_LOCK_T_SIZE
)) {
2707 lck
= (kmp_user_lock_p
)user_lock
;
2711 lck
= __kmp_lookup_user_lock(user_lock
, "omp_destroy_nest_lock");
2714 #if OMPT_SUPPORT && OMPT_OPTIONAL
2715 // This is the case, if called from omp_init_lock_with_hint:
2716 void *codeptr
= OMPT_LOAD_RETURN_ADDRESS(gtid
);
2718 codeptr
= OMPT_GET_RETURN_ADDRESS(0);
2719 if (ompt_enabled
.ompt_callback_lock_destroy
) {
2720 ompt_callbacks
.ompt_callback(ompt_callback_lock_destroy
)(
2721 ompt_mutex_nest_lock
, (ompt_wait_id_t
)(uintptr_t)user_lock
, codeptr
);
2726 __kmp_itt_lock_destroyed(lck
);
2727 #endif /* USE_ITT_BUILD */
2729 DESTROY_NESTED_LOCK(lck
);
2731 if ((__kmp_user_lock_kind
== lk_tas
) &&
2732 (sizeof(lck
->tas
.lk
.poll
) + sizeof(lck
->tas
.lk
.depth_locked
) <=
2733 OMP_NEST_LOCK_T_SIZE
)) {
2737 else if ((__kmp_user_lock_kind
== lk_futex
) &&
2738 (sizeof(lck
->futex
.lk
.poll
) + sizeof(lck
->futex
.lk
.depth_locked
) <=
2739 OMP_NEST_LOCK_T_SIZE
)) {
2744 __kmp_user_lock_free(user_lock
, gtid
, lck
);
2746 #endif // KMP_USE_DYNAMIC_LOCK
2747 } // __kmpc_destroy_nest_lock
2749 void __kmpc_set_lock(ident_t
*loc
, kmp_int32 gtid
, void **user_lock
) {
2750 KMP_COUNT_BLOCK(OMP_set_lock
);
2751 #if KMP_USE_DYNAMIC_LOCK
2752 int tag
= KMP_EXTRACT_D_TAG(user_lock
);
2754 __kmp_itt_lock_acquiring(
2756 user_lock
); // itt function will get to the right lock object.
2758 #if OMPT_SUPPORT && OMPT_OPTIONAL
2759 // This is the case, if called from omp_init_lock_with_hint:
2760 void *codeptr
= OMPT_LOAD_RETURN_ADDRESS(gtid
);
2762 codeptr
= OMPT_GET_RETURN_ADDRESS(0);
2763 if (ompt_enabled
.ompt_callback_mutex_acquire
) {
2764 ompt_callbacks
.ompt_callback(ompt_callback_mutex_acquire
)(
2765 ompt_mutex_lock
, omp_lock_hint_none
,
2766 __ompt_get_mutex_impl_type(user_lock
),
2767 (ompt_wait_id_t
)(uintptr_t)user_lock
, codeptr
);
2770 #if KMP_USE_INLINED_TAS
2771 if (tag
== locktag_tas
&& !__kmp_env_consistency_check
) {
2772 KMP_ACQUIRE_TAS_LOCK(user_lock
, gtid
);
2774 #elif KMP_USE_INLINED_FUTEX
2775 if (tag
== locktag_futex
&& !__kmp_env_consistency_check
) {
2776 KMP_ACQUIRE_FUTEX_LOCK(user_lock
, gtid
);
2780 __kmp_direct_set
[tag
]((kmp_dyna_lock_t
*)user_lock
, gtid
);
2783 __kmp_itt_lock_acquired((kmp_user_lock_p
)user_lock
);
2785 #if OMPT_SUPPORT && OMPT_OPTIONAL
2786 if (ompt_enabled
.ompt_callback_mutex_acquired
) {
2787 ompt_callbacks
.ompt_callback(ompt_callback_mutex_acquired
)(
2788 ompt_mutex_lock
, (ompt_wait_id_t
)(uintptr_t)user_lock
, codeptr
);
2792 #else // KMP_USE_DYNAMIC_LOCK
2794 kmp_user_lock_p lck
;
2796 if ((__kmp_user_lock_kind
== lk_tas
) &&
2797 (sizeof(lck
->tas
.lk
.poll
) <= OMP_LOCK_T_SIZE
)) {
2798 lck
= (kmp_user_lock_p
)user_lock
;
2801 else if ((__kmp_user_lock_kind
== lk_futex
) &&
2802 (sizeof(lck
->futex
.lk
.poll
) <= OMP_LOCK_T_SIZE
)) {
2803 lck
= (kmp_user_lock_p
)user_lock
;
2807 lck
= __kmp_lookup_user_lock(user_lock
, "omp_set_lock");
2811 __kmp_itt_lock_acquiring(lck
);
2812 #endif /* USE_ITT_BUILD */
2813 #if OMPT_SUPPORT && OMPT_OPTIONAL
2814 // This is the case, if called from omp_init_lock_with_hint:
2815 void *codeptr
= OMPT_LOAD_RETURN_ADDRESS(gtid
);
2817 codeptr
= OMPT_GET_RETURN_ADDRESS(0);
2818 if (ompt_enabled
.ompt_callback_mutex_acquire
) {
2819 ompt_callbacks
.ompt_callback(ompt_callback_mutex_acquire
)(
2820 ompt_mutex_lock
, omp_lock_hint_none
, __ompt_get_mutex_impl_type(),
2821 (ompt_wait_id_t
)(uintptr_t)lck
, codeptr
);
2825 ACQUIRE_LOCK(lck
, gtid
);
2828 __kmp_itt_lock_acquired(lck
);
2829 #endif /* USE_ITT_BUILD */
2831 #if OMPT_SUPPORT && OMPT_OPTIONAL
2832 if (ompt_enabled
.ompt_callback_mutex_acquired
) {
2833 ompt_callbacks
.ompt_callback(ompt_callback_mutex_acquired
)(
2834 ompt_mutex_lock
, (ompt_wait_id_t
)(uintptr_t)lck
, codeptr
);
2838 #endif // KMP_USE_DYNAMIC_LOCK
2841 void __kmpc_set_nest_lock(ident_t
*loc
, kmp_int32 gtid
, void **user_lock
) {
2842 #if KMP_USE_DYNAMIC_LOCK
2845 __kmp_itt_lock_acquiring((kmp_user_lock_p
)user_lock
);
2847 #if OMPT_SUPPORT && OMPT_OPTIONAL
2848 // This is the case, if called from omp_init_lock_with_hint:
2849 void *codeptr
= OMPT_LOAD_RETURN_ADDRESS(gtid
);
2851 codeptr
= OMPT_GET_RETURN_ADDRESS(0);
2852 if (ompt_enabled
.enabled
) {
2853 if (ompt_enabled
.ompt_callback_mutex_acquire
) {
2854 ompt_callbacks
.ompt_callback(ompt_callback_mutex_acquire
)(
2855 ompt_mutex_nest_lock
, omp_lock_hint_none
,
2856 __ompt_get_mutex_impl_type(user_lock
),
2857 (ompt_wait_id_t
)(uintptr_t)user_lock
, codeptr
);
2861 int acquire_status
=
2862 KMP_D_LOCK_FUNC(user_lock
, set
)((kmp_dyna_lock_t
*)user_lock
, gtid
);
2863 (void)acquire_status
;
2865 __kmp_itt_lock_acquired((kmp_user_lock_p
)user_lock
);
2868 #if OMPT_SUPPORT && OMPT_OPTIONAL
2869 if (ompt_enabled
.enabled
) {
2870 if (acquire_status
== KMP_LOCK_ACQUIRED_FIRST
) {
2871 if (ompt_enabled
.ompt_callback_mutex_acquired
) {
2873 ompt_callbacks
.ompt_callback(ompt_callback_mutex_acquired
)(
2874 ompt_mutex_nest_lock
, (ompt_wait_id_t
)(uintptr_t)user_lock
,
2878 if (ompt_enabled
.ompt_callback_nest_lock
) {
2880 ompt_callbacks
.ompt_callback(ompt_callback_nest_lock
)(
2881 ompt_scope_begin
, (ompt_wait_id_t
)(uintptr_t)user_lock
, codeptr
);
2887 #else // KMP_USE_DYNAMIC_LOCK
2889 kmp_user_lock_p lck
;
2891 if ((__kmp_user_lock_kind
== lk_tas
) &&
2892 (sizeof(lck
->tas
.lk
.poll
) + sizeof(lck
->tas
.lk
.depth_locked
) <=
2893 OMP_NEST_LOCK_T_SIZE
)) {
2894 lck
= (kmp_user_lock_p
)user_lock
;
2897 else if ((__kmp_user_lock_kind
== lk_futex
) &&
2898 (sizeof(lck
->futex
.lk
.poll
) + sizeof(lck
->futex
.lk
.depth_locked
) <=
2899 OMP_NEST_LOCK_T_SIZE
)) {
2900 lck
= (kmp_user_lock_p
)user_lock
;
2904 lck
= __kmp_lookup_user_lock(user_lock
, "omp_set_nest_lock");
2908 __kmp_itt_lock_acquiring(lck
);
2909 #endif /* USE_ITT_BUILD */
2910 #if OMPT_SUPPORT && OMPT_OPTIONAL
2911 // This is the case, if called from omp_init_lock_with_hint:
2912 void *codeptr
= OMPT_LOAD_RETURN_ADDRESS(gtid
);
2914 codeptr
= OMPT_GET_RETURN_ADDRESS(0);
2915 if (ompt_enabled
.enabled
) {
2916 if (ompt_enabled
.ompt_callback_mutex_acquire
) {
2917 ompt_callbacks
.ompt_callback(ompt_callback_mutex_acquire
)(
2918 ompt_mutex_nest_lock
, omp_lock_hint_none
,
2919 __ompt_get_mutex_impl_type(), (ompt_wait_id_t
)(uintptr_t)lck
,
2925 ACQUIRE_NESTED_LOCK(lck
, gtid
, &acquire_status
);
2928 __kmp_itt_lock_acquired(lck
);
2929 #endif /* USE_ITT_BUILD */
2931 #if OMPT_SUPPORT && OMPT_OPTIONAL
2932 if (ompt_enabled
.enabled
) {
2933 if (acquire_status
== KMP_LOCK_ACQUIRED_FIRST
) {
2934 if (ompt_enabled
.ompt_callback_mutex_acquired
) {
2936 ompt_callbacks
.ompt_callback(ompt_callback_mutex_acquired
)(
2937 ompt_mutex_nest_lock
, (ompt_wait_id_t
)(uintptr_t)lck
, codeptr
);
2940 if (ompt_enabled
.ompt_callback_nest_lock
) {
2942 ompt_callbacks
.ompt_callback(ompt_callback_nest_lock
)(
2943 ompt_scope_begin
, (ompt_wait_id_t
)(uintptr_t)lck
, codeptr
);
2949 #endif // KMP_USE_DYNAMIC_LOCK
2952 void __kmpc_unset_lock(ident_t
*loc
, kmp_int32 gtid
, void **user_lock
) {
2953 #if KMP_USE_DYNAMIC_LOCK
2955 int tag
= KMP_EXTRACT_D_TAG(user_lock
);
2957 __kmp_itt_lock_releasing((kmp_user_lock_p
)user_lock
);
2959 #if KMP_USE_INLINED_TAS
2960 if (tag
== locktag_tas
&& !__kmp_env_consistency_check
) {
2961 KMP_RELEASE_TAS_LOCK(user_lock
, gtid
);
2963 #elif KMP_USE_INLINED_FUTEX
2964 if (tag
== locktag_futex
&& !__kmp_env_consistency_check
) {
2965 KMP_RELEASE_FUTEX_LOCK(user_lock
, gtid
);
2969 __kmp_direct_unset
[tag
]((kmp_dyna_lock_t
*)user_lock
, gtid
);
2972 #if OMPT_SUPPORT && OMPT_OPTIONAL
2973 // This is the case, if called from omp_init_lock_with_hint:
2974 void *codeptr
= OMPT_LOAD_RETURN_ADDRESS(gtid
);
2976 codeptr
= OMPT_GET_RETURN_ADDRESS(0);
2977 if (ompt_enabled
.ompt_callback_mutex_released
) {
2978 ompt_callbacks
.ompt_callback(ompt_callback_mutex_released
)(
2979 ompt_mutex_lock
, (ompt_wait_id_t
)(uintptr_t)user_lock
, codeptr
);
2983 #else // KMP_USE_DYNAMIC_LOCK
2985 kmp_user_lock_p lck
;
2987 /* Can't use serial interval since not block structured */
2988 /* release the lock */
2990 if ((__kmp_user_lock_kind
== lk_tas
) &&
2991 (sizeof(lck
->tas
.lk
.poll
) <= OMP_LOCK_T_SIZE
)) {
2992 #if KMP_OS_LINUX && \
2993 (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64)
2994 // "fast" path implemented to fix customer performance issue
2996 __kmp_itt_lock_releasing((kmp_user_lock_p
)user_lock
);
2997 #endif /* USE_ITT_BUILD */
2998 TCW_4(((kmp_user_lock_p
)user_lock
)->tas
.lk
.poll
, 0);
3001 #if OMPT_SUPPORT && OMPT_OPTIONAL
3002 // This is the case, if called from omp_init_lock_with_hint:
3003 void *codeptr
= OMPT_LOAD_RETURN_ADDRESS(gtid
);
3005 codeptr
= OMPT_GET_RETURN_ADDRESS(0);
3006 if (ompt_enabled
.ompt_callback_mutex_released
) {
3007 ompt_callbacks
.ompt_callback(ompt_callback_mutex_released
)(
3008 ompt_mutex_lock
, (ompt_wait_id_t
)(uintptr_t)lck
, codeptr
);
3014 lck
= (kmp_user_lock_p
)user_lock
;
3018 else if ((__kmp_user_lock_kind
== lk_futex
) &&
3019 (sizeof(lck
->futex
.lk
.poll
) <= OMP_LOCK_T_SIZE
)) {
3020 lck
= (kmp_user_lock_p
)user_lock
;
3024 lck
= __kmp_lookup_user_lock(user_lock
, "omp_unset_lock");
3028 __kmp_itt_lock_releasing(lck
);
3029 #endif /* USE_ITT_BUILD */
3031 RELEASE_LOCK(lck
, gtid
);
3033 #if OMPT_SUPPORT && OMPT_OPTIONAL
3034 // This is the case, if called from omp_init_lock_with_hint:
3035 void *codeptr
= OMPT_LOAD_RETURN_ADDRESS(gtid
);
3037 codeptr
= OMPT_GET_RETURN_ADDRESS(0);
3038 if (ompt_enabled
.ompt_callback_mutex_released
) {
3039 ompt_callbacks
.ompt_callback(ompt_callback_mutex_released
)(
3040 ompt_mutex_lock
, (ompt_wait_id_t
)(uintptr_t)lck
, codeptr
);
3044 #endif // KMP_USE_DYNAMIC_LOCK
3047 /* release the lock */
3048 void __kmpc_unset_nest_lock(ident_t
*loc
, kmp_int32 gtid
, void **user_lock
) {
3049 #if KMP_USE_DYNAMIC_LOCK
3052 __kmp_itt_lock_releasing((kmp_user_lock_p
)user_lock
);
3054 int release_status
=
3055 KMP_D_LOCK_FUNC(user_lock
, unset
)((kmp_dyna_lock_t
*)user_lock
, gtid
);
3056 (void)release_status
;
3058 #if OMPT_SUPPORT && OMPT_OPTIONAL
3059 // This is the case, if called from omp_init_lock_with_hint:
3060 void *codeptr
= OMPT_LOAD_RETURN_ADDRESS(gtid
);
3062 codeptr
= OMPT_GET_RETURN_ADDRESS(0);
3063 if (ompt_enabled
.enabled
) {
3064 if (release_status
== KMP_LOCK_RELEASED
) {
3065 if (ompt_enabled
.ompt_callback_mutex_released
) {
3066 // release_lock_last
3067 ompt_callbacks
.ompt_callback(ompt_callback_mutex_released
)(
3068 ompt_mutex_nest_lock
, (ompt_wait_id_t
)(uintptr_t)user_lock
,
3071 } else if (ompt_enabled
.ompt_callback_nest_lock
) {
3072 // release_lock_prev
3073 ompt_callbacks
.ompt_callback(ompt_callback_nest_lock
)(
3074 ompt_scope_end
, (ompt_wait_id_t
)(uintptr_t)user_lock
, codeptr
);
3079 #else // KMP_USE_DYNAMIC_LOCK
3081 kmp_user_lock_p lck
;
3083 /* Can't use serial interval since not block structured */
3085 if ((__kmp_user_lock_kind
== lk_tas
) &&
3086 (sizeof(lck
->tas
.lk
.poll
) + sizeof(lck
->tas
.lk
.depth_locked
) <=
3087 OMP_NEST_LOCK_T_SIZE
)) {
3088 #if KMP_OS_LINUX && \
3089 (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64)
3090 // "fast" path implemented to fix customer performance issue
3091 kmp_tas_lock_t
*tl
= (kmp_tas_lock_t
*)user_lock
;
3093 __kmp_itt_lock_releasing((kmp_user_lock_p
)user_lock
);
3094 #endif /* USE_ITT_BUILD */
3096 #if OMPT_SUPPORT && OMPT_OPTIONAL
3097 int release_status
= KMP_LOCK_STILL_HELD
;
3100 if (--(tl
->lk
.depth_locked
) == 0) {
3101 TCW_4(tl
->lk
.poll
, 0);
3102 #if OMPT_SUPPORT && OMPT_OPTIONAL
3103 release_status
= KMP_LOCK_RELEASED
;
3108 #if OMPT_SUPPORT && OMPT_OPTIONAL
3109 // This is the case, if called from omp_init_lock_with_hint:
3110 void *codeptr
= OMPT_LOAD_RETURN_ADDRESS(gtid
);
3112 codeptr
= OMPT_GET_RETURN_ADDRESS(0);
3113 if (ompt_enabled
.enabled
) {
3114 if (release_status
== KMP_LOCK_RELEASED
) {
3115 if (ompt_enabled
.ompt_callback_mutex_released
) {
3116 // release_lock_last
3117 ompt_callbacks
.ompt_callback(ompt_callback_mutex_released
)(
3118 ompt_mutex_nest_lock
, (ompt_wait_id_t
)(uintptr_t)lck
, codeptr
);
3120 } else if (ompt_enabled
.ompt_callback_nest_lock
) {
3121 // release_lock_previous
3122 ompt_callbacks
.ompt_callback(ompt_callback_nest_lock
)(
3123 ompt_mutex_scope_end
, (ompt_wait_id_t
)(uintptr_t)lck
, codeptr
);
3130 lck
= (kmp_user_lock_p
)user_lock
;
3134 else if ((__kmp_user_lock_kind
== lk_futex
) &&
3135 (sizeof(lck
->futex
.lk
.poll
) + sizeof(lck
->futex
.lk
.depth_locked
) <=
3136 OMP_NEST_LOCK_T_SIZE
)) {
3137 lck
= (kmp_user_lock_p
)user_lock
;
3141 lck
= __kmp_lookup_user_lock(user_lock
, "omp_unset_nest_lock");
3145 __kmp_itt_lock_releasing(lck
);
3146 #endif /* USE_ITT_BUILD */
3149 release_status
= RELEASE_NESTED_LOCK(lck
, gtid
);
3150 #if OMPT_SUPPORT && OMPT_OPTIONAL
3151 // This is the case, if called from omp_init_lock_with_hint:
3152 void *codeptr
= OMPT_LOAD_RETURN_ADDRESS(gtid
);
3154 codeptr
= OMPT_GET_RETURN_ADDRESS(0);
3155 if (ompt_enabled
.enabled
) {
3156 if (release_status
== KMP_LOCK_RELEASED
) {
3157 if (ompt_enabled
.ompt_callback_mutex_released
) {
3158 // release_lock_last
3159 ompt_callbacks
.ompt_callback(ompt_callback_mutex_released
)(
3160 ompt_mutex_nest_lock
, (ompt_wait_id_t
)(uintptr_t)lck
, codeptr
);
3162 } else if (ompt_enabled
.ompt_callback_nest_lock
) {
3163 // release_lock_previous
3164 ompt_callbacks
.ompt_callback(ompt_callback_nest_lock
)(
3165 ompt_mutex_scope_end
, (ompt_wait_id_t
)(uintptr_t)lck
, codeptr
);
3170 #endif // KMP_USE_DYNAMIC_LOCK
3173 /* try to acquire the lock */
3174 int __kmpc_test_lock(ident_t
*loc
, kmp_int32 gtid
, void **user_lock
) {
3175 KMP_COUNT_BLOCK(OMP_test_lock
);
3177 #if KMP_USE_DYNAMIC_LOCK
3179 int tag
= KMP_EXTRACT_D_TAG(user_lock
);
3181 __kmp_itt_lock_acquiring((kmp_user_lock_p
)user_lock
);
3183 #if OMPT_SUPPORT && OMPT_OPTIONAL
3184 // This is the case, if called from omp_init_lock_with_hint:
3185 void *codeptr
= OMPT_LOAD_RETURN_ADDRESS(gtid
);
3187 codeptr
= OMPT_GET_RETURN_ADDRESS(0);
3188 if (ompt_enabled
.ompt_callback_mutex_acquire
) {
3189 ompt_callbacks
.ompt_callback(ompt_callback_mutex_acquire
)(
3190 ompt_mutex_test_lock
, omp_lock_hint_none
,
3191 __ompt_get_mutex_impl_type(user_lock
),
3192 (ompt_wait_id_t
)(uintptr_t)user_lock
, codeptr
);
3195 #if KMP_USE_INLINED_TAS
3196 if (tag
== locktag_tas
&& !__kmp_env_consistency_check
) {
3197 KMP_TEST_TAS_LOCK(user_lock
, gtid
, rc
);
3199 #elif KMP_USE_INLINED_FUTEX
3200 if (tag
== locktag_futex
&& !__kmp_env_consistency_check
) {
3201 KMP_TEST_FUTEX_LOCK(user_lock
, gtid
, rc
);
3205 rc
= __kmp_direct_test
[tag
]((kmp_dyna_lock_t
*)user_lock
, gtid
);
3209 __kmp_itt_lock_acquired((kmp_user_lock_p
)user_lock
);
3211 #if OMPT_SUPPORT && OMPT_OPTIONAL
3212 if (ompt_enabled
.ompt_callback_mutex_acquired
) {
3213 ompt_callbacks
.ompt_callback(ompt_callback_mutex_acquired
)(
3214 ompt_mutex_test_lock
, (ompt_wait_id_t
)(uintptr_t)user_lock
, codeptr
);
3220 __kmp_itt_lock_cancelled((kmp_user_lock_p
)user_lock
);
3225 #else // KMP_USE_DYNAMIC_LOCK
3227 kmp_user_lock_p lck
;
3230 if ((__kmp_user_lock_kind
== lk_tas
) &&
3231 (sizeof(lck
->tas
.lk
.poll
) <= OMP_LOCK_T_SIZE
)) {
3232 lck
= (kmp_user_lock_p
)user_lock
;
3235 else if ((__kmp_user_lock_kind
== lk_futex
) &&
3236 (sizeof(lck
->futex
.lk
.poll
) <= OMP_LOCK_T_SIZE
)) {
3237 lck
= (kmp_user_lock_p
)user_lock
;
3241 lck
= __kmp_lookup_user_lock(user_lock
, "omp_test_lock");
3245 __kmp_itt_lock_acquiring(lck
);
3246 #endif /* USE_ITT_BUILD */
3247 #if OMPT_SUPPORT && OMPT_OPTIONAL
3248 // This is the case, if called from omp_init_lock_with_hint:
3249 void *codeptr
= OMPT_LOAD_RETURN_ADDRESS(gtid
);
3251 codeptr
= OMPT_GET_RETURN_ADDRESS(0);
3252 if (ompt_enabled
.ompt_callback_mutex_acquire
) {
3253 ompt_callbacks
.ompt_callback(ompt_callback_mutex_acquire
)(
3254 ompt_mutex_test_lock
, omp_lock_hint_none
, __ompt_get_mutex_impl_type(),
3255 (ompt_wait_id_t
)(uintptr_t)lck
, codeptr
);
3259 rc
= TEST_LOCK(lck
, gtid
);
3262 __kmp_itt_lock_acquired(lck
);
3264 __kmp_itt_lock_cancelled(lck
);
3266 #endif /* USE_ITT_BUILD */
3267 #if OMPT_SUPPORT && OMPT_OPTIONAL
3268 if (rc
&& ompt_enabled
.ompt_callback_mutex_acquired
) {
3269 ompt_callbacks
.ompt_callback(ompt_callback_mutex_acquired
)(
3270 ompt_mutex_test_lock
, (ompt_wait_id_t
)(uintptr_t)lck
, codeptr
);
3274 return (rc
? FTN_TRUE
: FTN_FALSE
);
3276 /* Can't use serial interval since not block structured */
3278 #endif // KMP_USE_DYNAMIC_LOCK
3281 /* try to acquire the lock */
3282 int __kmpc_test_nest_lock(ident_t
*loc
, kmp_int32 gtid
, void **user_lock
) {
3283 #if KMP_USE_DYNAMIC_LOCK
3286 __kmp_itt_lock_acquiring((kmp_user_lock_p
)user_lock
);
3288 #if OMPT_SUPPORT && OMPT_OPTIONAL
3289 // This is the case, if called from omp_init_lock_with_hint:
3290 void *codeptr
= OMPT_LOAD_RETURN_ADDRESS(gtid
);
3292 codeptr
= OMPT_GET_RETURN_ADDRESS(0);
3293 if (ompt_enabled
.ompt_callback_mutex_acquire
) {
3294 ompt_callbacks
.ompt_callback(ompt_callback_mutex_acquire
)(
3295 ompt_mutex_test_nest_lock
, omp_lock_hint_none
,
3296 __ompt_get_mutex_impl_type(user_lock
),
3297 (ompt_wait_id_t
)(uintptr_t)user_lock
, codeptr
);
3300 rc
= KMP_D_LOCK_FUNC(user_lock
, test
)((kmp_dyna_lock_t
*)user_lock
, gtid
);
3303 __kmp_itt_lock_acquired((kmp_user_lock_p
)user_lock
);
3305 __kmp_itt_lock_cancelled((kmp_user_lock_p
)user_lock
);
3308 #if OMPT_SUPPORT && OMPT_OPTIONAL
3309 if (ompt_enabled
.enabled
&& rc
) {
3311 if (ompt_enabled
.ompt_callback_mutex_acquired
) {
3313 ompt_callbacks
.ompt_callback(ompt_callback_mutex_acquired
)(
3314 ompt_mutex_test_nest_lock
, (ompt_wait_id_t
)(uintptr_t)user_lock
,
3318 if (ompt_enabled
.ompt_callback_nest_lock
) {
3320 ompt_callbacks
.ompt_callback(ompt_callback_nest_lock
)(
3321 ompt_scope_begin
, (ompt_wait_id_t
)(uintptr_t)user_lock
, codeptr
);
3328 #else // KMP_USE_DYNAMIC_LOCK
3330 kmp_user_lock_p lck
;
3333 if ((__kmp_user_lock_kind
== lk_tas
) &&
3334 (sizeof(lck
->tas
.lk
.poll
) + sizeof(lck
->tas
.lk
.depth_locked
) <=
3335 OMP_NEST_LOCK_T_SIZE
)) {
3336 lck
= (kmp_user_lock_p
)user_lock
;
3339 else if ((__kmp_user_lock_kind
== lk_futex
) &&
3340 (sizeof(lck
->futex
.lk
.poll
) + sizeof(lck
->futex
.lk
.depth_locked
) <=
3341 OMP_NEST_LOCK_T_SIZE
)) {
3342 lck
= (kmp_user_lock_p
)user_lock
;
3346 lck
= __kmp_lookup_user_lock(user_lock
, "omp_test_nest_lock");
3350 __kmp_itt_lock_acquiring(lck
);
3351 #endif /* USE_ITT_BUILD */
3353 #if OMPT_SUPPORT && OMPT_OPTIONAL
3354 // This is the case, if called from omp_init_lock_with_hint:
3355 void *codeptr
= OMPT_LOAD_RETURN_ADDRESS(gtid
);
3357 codeptr
= OMPT_GET_RETURN_ADDRESS(0);
3358 if (ompt_enabled
.enabled
) &&
3359 ompt_enabled
.ompt_callback_mutex_acquire
) {
3360 ompt_callbacks
.ompt_callback(ompt_callback_mutex_acquire
)(
3361 ompt_mutex_test_nest_lock
, omp_lock_hint_none
,
3362 __ompt_get_mutex_impl_type(), (ompt_wait_id_t
)(uintptr_t)lck
,
3367 rc
= TEST_NESTED_LOCK(lck
, gtid
);
3370 __kmp_itt_lock_acquired(lck
);
3372 __kmp_itt_lock_cancelled(lck
);
3374 #endif /* USE_ITT_BUILD */
3375 #if OMPT_SUPPORT && OMPT_OPTIONAL
3376 if (ompt_enabled
.enabled
&& rc
) {
3378 if (ompt_enabled
.ompt_callback_mutex_acquired
) {
3380 ompt_callbacks
.ompt_callback(ompt_callback_mutex_acquired
)(
3381 ompt_mutex_test_nest_lock
, (ompt_wait_id_t
)(uintptr_t)lck
, codeptr
);
3384 if (ompt_enabled
.ompt_callback_nest_lock
) {
3386 ompt_callbacks
.ompt_callback(ompt_callback_nest_lock
)(
3387 ompt_mutex_scope_begin
, (ompt_wait_id_t
)(uintptr_t)lck
, codeptr
);
3394 /* Can't use serial interval since not block structured */
3396 #endif // KMP_USE_DYNAMIC_LOCK
3399 // Interface to fast scalable reduce methods routines
3401 // keep the selected method in a thread local structure for cross-function
3402 // usage: will be used in __kmpc_end_reduce* functions;
3403 // another solution: to re-determine the method one more time in
3404 // __kmpc_end_reduce* functions (new prototype required then)
3405 // AT: which solution is better?
3406 #define __KMP_SET_REDUCTION_METHOD(gtid, rmethod) \
3407 ((__kmp_threads[(gtid)]->th.th_local.packed_reduction_method) = (rmethod))
3409 #define __KMP_GET_REDUCTION_METHOD(gtid) \
3410 (__kmp_threads[(gtid)]->th.th_local.packed_reduction_method)
3412 // description of the packed_reduction_method variable: look at the macros in
3415 // used in a critical section reduce block
3416 static __forceinline
void
3417 __kmp_enter_critical_section_reduce_block(ident_t
*loc
, kmp_int32 global_tid
,
3418 kmp_critical_name
*crit
) {
3420 // this lock was visible to a customer and to the threading profile tool as a
3421 // serial overhead span (although it's used for an internal purpose only)
3422 // why was it visible in previous implementation?
3423 // should we keep it visible in new reduce block?
3424 kmp_user_lock_p lck
;
3426 #if KMP_USE_DYNAMIC_LOCK
3428 kmp_dyna_lock_t
*lk
= (kmp_dyna_lock_t
*)crit
;
3429 // Check if it is initialized.
3431 if (KMP_IS_D_LOCK(__kmp_user_lock_seq
)) {
3432 KMP_COMPARE_AND_STORE_ACQ32((volatile kmp_int32
*)crit
, 0,
3433 KMP_GET_D_TAG(__kmp_user_lock_seq
));
3435 __kmp_init_indirect_csptr(crit
, loc
, global_tid
,
3436 KMP_GET_I_TAG(__kmp_user_lock_seq
));
3439 // Branch for accessing the actual lock object and set operation. This
3440 // branching is inevitable since this lock initialization does not follow the
3441 // normal dispatch path (lock table is not used).
3442 if (KMP_EXTRACT_D_TAG(lk
) != 0) {
3443 lck
= (kmp_user_lock_p
)lk
;
3444 KMP_DEBUG_ASSERT(lck
!= NULL
);
3445 if (__kmp_env_consistency_check
) {
3446 __kmp_push_sync(global_tid
, ct_critical
, loc
, lck
, __kmp_user_lock_seq
);
3448 KMP_D_LOCK_FUNC(lk
, set
)(lk
, global_tid
);
3450 kmp_indirect_lock_t
*ilk
= *((kmp_indirect_lock_t
**)lk
);
3452 KMP_DEBUG_ASSERT(lck
!= NULL
);
3453 if (__kmp_env_consistency_check
) {
3454 __kmp_push_sync(global_tid
, ct_critical
, loc
, lck
, __kmp_user_lock_seq
);
3456 KMP_I_LOCK_FUNC(ilk
, set
)(lck
, global_tid
);
3459 #else // KMP_USE_DYNAMIC_LOCK
3461 // We know that the fast reduction code is only emitted by Intel compilers
3462 // with 32 byte critical sections. If there isn't enough space, then we
3463 // have to use a pointer.
3464 if (__kmp_base_user_lock_size
<= INTEL_CRITICAL_SIZE
) {
3465 lck
= (kmp_user_lock_p
)crit
;
3467 lck
= __kmp_get_critical_section_ptr(crit
, loc
, global_tid
);
3469 KMP_DEBUG_ASSERT(lck
!= NULL
);
3471 if (__kmp_env_consistency_check
)
3472 __kmp_push_sync(global_tid
, ct_critical
, loc
, lck
);
3474 __kmp_acquire_user_lock_with_checks(lck
, global_tid
);
3476 #endif // KMP_USE_DYNAMIC_LOCK
3479 // used in a critical section reduce block
3480 static __forceinline
void
3481 __kmp_end_critical_section_reduce_block(ident_t
*loc
, kmp_int32 global_tid
,
3482 kmp_critical_name
*crit
) {
3484 kmp_user_lock_p lck
;
3486 #if KMP_USE_DYNAMIC_LOCK
3488 if (KMP_IS_D_LOCK(__kmp_user_lock_seq
)) {
3489 lck
= (kmp_user_lock_p
)crit
;
3490 if (__kmp_env_consistency_check
)
3491 __kmp_pop_sync(global_tid
, ct_critical
, loc
);
3492 KMP_D_LOCK_FUNC(lck
, unset
)((kmp_dyna_lock_t
*)lck
, global_tid
);
3494 kmp_indirect_lock_t
*ilk
=
3495 (kmp_indirect_lock_t
*)TCR_PTR(*((kmp_indirect_lock_t
**)crit
));
3496 if (__kmp_env_consistency_check
)
3497 __kmp_pop_sync(global_tid
, ct_critical
, loc
);
3498 KMP_I_LOCK_FUNC(ilk
, unset
)(ilk
->lock
, global_tid
);
3501 #else // KMP_USE_DYNAMIC_LOCK
3503 // We know that the fast reduction code is only emitted by Intel compilers
3504 // with 32 byte critical sections. If there isn't enough space, then we have
3505 // to use a pointer.
3506 if (__kmp_base_user_lock_size
> 32) {
3507 lck
= *((kmp_user_lock_p
*)crit
);
3508 KMP_ASSERT(lck
!= NULL
);
3510 lck
= (kmp_user_lock_p
)crit
;
3513 if (__kmp_env_consistency_check
)
3514 __kmp_pop_sync(global_tid
, ct_critical
, loc
);
3516 __kmp_release_user_lock_with_checks(lck
, global_tid
);
3518 #endif // KMP_USE_DYNAMIC_LOCK
3519 } // __kmp_end_critical_section_reduce_block
3521 static __forceinline
int
3522 __kmp_swap_teams_for_teams_reduction(kmp_info_t
*th
, kmp_team_t
**team_p
,
3526 // Check if we are inside the teams construct?
3527 if (th
->th
.th_teams_microtask
) {
3528 *team_p
= team
= th
->th
.th_team
;
3529 if (team
->t
.t_level
== th
->th
.th_teams_level
) {
3530 // This is reduction at teams construct.
3531 KMP_DEBUG_ASSERT(!th
->th
.th_info
.ds
.ds_tid
); // AC: check that tid == 0
3532 // Let's swap teams temporarily for the reduction.
3533 th
->th
.th_info
.ds
.ds_tid
= team
->t
.t_master_tid
;
3534 th
->th
.th_team
= team
->t
.t_parent
;
3535 th
->th
.th_team_nproc
= th
->th
.th_team
->t
.t_nproc
;
3536 th
->th
.th_task_team
= th
->th
.th_team
->t
.t_task_team
[0];
3537 *task_state
= th
->th
.th_task_state
;
3538 th
->th
.th_task_state
= 0;
3546 static __forceinline
void
3547 __kmp_restore_swapped_teams(kmp_info_t
*th
, kmp_team_t
*team
, int task_state
) {
3548 // Restore thread structure swapped in __kmp_swap_teams_for_teams_reduction.
3549 th
->th
.th_info
.ds
.ds_tid
= 0;
3550 th
->th
.th_team
= team
;
3551 th
->th
.th_team_nproc
= team
->t
.t_nproc
;
3552 th
->th
.th_task_team
= team
->t
.t_task_team
[task_state
];
3553 __kmp_type_convert(task_state
, &(th
->th
.th_task_state
));
3556 /* 2.a.i. Reduce Block without a terminating barrier */
3558 @ingroup SYNCHRONIZATION
3559 @param loc source location information
3560 @param global_tid global thread number
3561 @param num_vars number of items (variables) to be reduced
3562 @param reduce_size size of data in bytes to be reduced
3563 @param reduce_data pointer to data to be reduced
3564 @param reduce_func callback function providing reduction operation on two
3565 operands and returning result of reduction in lhs_data
3566 @param lck pointer to the unique lock data structure
3567 @result 1 for the primary thread, 0 for all other team threads, 2 for all team
3568 threads if atomic reduction needed
3570 The nowait version is used for a reduce clause with the nowait argument.
3573 __kmpc_reduce_nowait(ident_t
*loc
, kmp_int32 global_tid
, kmp_int32 num_vars
,
3574 size_t reduce_size
, void *reduce_data
,
3575 void (*reduce_func
)(void *lhs_data
, void *rhs_data
),
3576 kmp_critical_name
*lck
) {
3578 KMP_COUNT_BLOCK(REDUCE_nowait
);
3580 PACKED_REDUCTION_METHOD_T packed_reduction_method
;
3583 int teams_swapped
= 0, task_state
;
3584 KA_TRACE(10, ("__kmpc_reduce_nowait() enter: called T#%d\n", global_tid
));
3585 __kmp_assert_valid_gtid(global_tid
);
3587 // why do we need this initialization here at all?
3588 // Reduction clause can not be used as a stand-alone directive.
3590 // do not call __kmp_serial_initialize(), it will be called by
3591 // __kmp_parallel_initialize() if needed
3592 // possible detection of false-positive race by the threadchecker ???
3593 if (!TCR_4(__kmp_init_parallel
))
3594 __kmp_parallel_initialize();
3596 __kmp_resume_if_soft_paused();
3598 // check correctness of reduce block nesting
3599 #if KMP_USE_DYNAMIC_LOCK
3600 if (__kmp_env_consistency_check
)
3601 __kmp_push_sync(global_tid
, ct_reduce
, loc
, NULL
, 0);
3603 if (__kmp_env_consistency_check
)
3604 __kmp_push_sync(global_tid
, ct_reduce
, loc
, NULL
);
3607 th
= __kmp_thread_from_gtid(global_tid
);
3608 teams_swapped
= __kmp_swap_teams_for_teams_reduction(th
, &team
, &task_state
);
3610 // packed_reduction_method value will be reused by __kmp_end_reduce* function,
3611 // the value should be kept in a variable
3612 // the variable should be either a construct-specific or thread-specific
3613 // property, not a team specific property
3614 // (a thread can reach the next reduce block on the next construct, reduce
3615 // method may differ on the next construct)
3616 // an ident_t "loc" parameter could be used as a construct-specific property
3617 // (what if loc == 0?)
3618 // (if both construct-specific and team-specific variables were shared,
3619 // then unness extra syncs should be needed)
3620 // a thread-specific variable is better regarding two issues above (next
3621 // construct and extra syncs)
3622 // a thread-specific "th_local.reduction_method" variable is used currently
3623 // each thread executes 'determine' and 'set' lines (no need to execute by one
3624 // thread, to avoid unness extra syncs)
3626 packed_reduction_method
= __kmp_determine_reduction_method(
3627 loc
, global_tid
, num_vars
, reduce_size
, reduce_data
, reduce_func
, lck
);
3628 __KMP_SET_REDUCTION_METHOD(global_tid
, packed_reduction_method
);
3630 OMPT_REDUCTION_DECL(th
, global_tid
);
3631 if (packed_reduction_method
== critical_reduce_block
) {
3633 OMPT_REDUCTION_BEGIN
;
3635 __kmp_enter_critical_section_reduce_block(loc
, global_tid
, lck
);
3638 } else if (packed_reduction_method
== empty_reduce_block
) {
3640 OMPT_REDUCTION_BEGIN
;
3642 // usage: if team size == 1, no synchronization is required ( Intel
3646 } else if (packed_reduction_method
== atomic_reduce_block
) {
3650 // all threads should do this pop here (because __kmpc_end_reduce_nowait()
3651 // won't be called by the code gen)
3652 // (it's not quite good, because the checking block has been closed by
3654 // but atomic operation has not been executed yet, will be executed
3655 // slightly later, literally on next instruction)
3656 if (__kmp_env_consistency_check
)
3657 __kmp_pop_sync(global_tid
, ct_reduce
, loc
);
3659 } else if (TEST_REDUCTION_METHOD(packed_reduction_method
,
3660 tree_reduce_block
)) {
3662 // AT: performance issue: a real barrier here
3663 // AT: (if primary thread is slow, other threads are blocked here waiting for
3664 // the primary thread to come and release them)
3665 // AT: (it's not what a customer might expect specifying NOWAIT clause)
3666 // AT: (specifying NOWAIT won't result in improvement of performance, it'll
3667 // be confusing to a customer)
3668 // AT: another implementation of *barrier_gather*nowait() (or some other design)
3669 // might go faster and be more in line with sense of NOWAIT
3670 // AT: TO DO: do epcc test and compare times
3672 // this barrier should be invisible to a customer and to the threading profile
3673 // tool (it's neither a terminating barrier nor customer's code, it's
3674 // used for an internal purpose)
3676 // JP: can this barrier potentially leed to task scheduling?
3677 // JP: as long as there is a barrier in the implementation, OMPT should and
3678 // will provide the barrier events
3679 // so we set-up the necessary frame/return addresses.
3680 ompt_frame_t
*ompt_frame
;
3681 if (ompt_enabled
.enabled
) {
3682 __ompt_get_task_info_internal(0, NULL
, NULL
, &ompt_frame
, NULL
, NULL
);
3683 if (ompt_frame
->enter_frame
.ptr
== NULL
)
3684 ompt_frame
->enter_frame
.ptr
= OMPT_GET_FRAME_ADDRESS(0);
3686 OMPT_STORE_RETURN_ADDRESS(global_tid
);
3689 __kmp_threads
[global_tid
]->th
.th_ident
= loc
;
3692 __kmp_barrier(UNPACK_REDUCTION_BARRIER(packed_reduction_method
),
3693 global_tid
, FALSE
, reduce_size
, reduce_data
, reduce_func
);
3694 retval
= (retval
!= 0) ? (0) : (1);
3695 #if OMPT_SUPPORT && OMPT_OPTIONAL
3696 if (ompt_enabled
.enabled
) {
3697 ompt_frame
->enter_frame
= ompt_data_none
;
3701 // all other workers except primary thread should do this pop here
3702 // ( none of other workers will get to __kmpc_end_reduce_nowait() )
3703 if (__kmp_env_consistency_check
) {
3705 __kmp_pop_sync(global_tid
, ct_reduce
, loc
);
3711 // should never reach this block
3712 KMP_ASSERT(0); // "unexpected method"
3714 if (teams_swapped
) {
3715 __kmp_restore_swapped_teams(th
, team
, task_state
);
3719 ("__kmpc_reduce_nowait() exit: called T#%d: method %08x, returns %08x\n",
3720 global_tid
, packed_reduction_method
, retval
));
3726 @ingroup SYNCHRONIZATION
3727 @param loc source location information
3728 @param global_tid global thread id.
3729 @param lck pointer to the unique lock data structure
3731 Finish the execution of a reduce nowait.
3733 void __kmpc_end_reduce_nowait(ident_t
*loc
, kmp_int32 global_tid
,
3734 kmp_critical_name
*lck
) {
3736 PACKED_REDUCTION_METHOD_T packed_reduction_method
;
3738 KA_TRACE(10, ("__kmpc_end_reduce_nowait() enter: called T#%d\n", global_tid
));
3739 __kmp_assert_valid_gtid(global_tid
);
3741 packed_reduction_method
= __KMP_GET_REDUCTION_METHOD(global_tid
);
3743 OMPT_REDUCTION_DECL(__kmp_thread_from_gtid(global_tid
), global_tid
);
3745 if (packed_reduction_method
== critical_reduce_block
) {
3747 __kmp_end_critical_section_reduce_block(loc
, global_tid
, lck
);
3750 } else if (packed_reduction_method
== empty_reduce_block
) {
3752 // usage: if team size == 1, no synchronization is required ( on Intel
3757 } else if (packed_reduction_method
== atomic_reduce_block
) {
3759 // neither primary thread nor other workers should get here
3760 // (code gen does not generate this call in case 2: atomic reduce block)
3761 // actually it's better to remove this elseif at all;
3762 // after removal this value will checked by the 'else' and will assert
3764 } else if (TEST_REDUCTION_METHOD(packed_reduction_method
,
3765 tree_reduce_block
)) {
3767 // only primary thread gets here
3768 // OMPT: tree reduction is annotated in the barrier code
3772 // should never reach this block
3773 KMP_ASSERT(0); // "unexpected method"
3776 if (__kmp_env_consistency_check
)
3777 __kmp_pop_sync(global_tid
, ct_reduce
, loc
);
3779 KA_TRACE(10, ("__kmpc_end_reduce_nowait() exit: called T#%d: method %08x\n",
3780 global_tid
, packed_reduction_method
));
3785 /* 2.a.ii. Reduce Block with a terminating barrier */
3788 @ingroup SYNCHRONIZATION
3789 @param loc source location information
3790 @param global_tid global thread number
3791 @param num_vars number of items (variables) to be reduced
3792 @param reduce_size size of data in bytes to be reduced
3793 @param reduce_data pointer to data to be reduced
3794 @param reduce_func callback function providing reduction operation on two
3795 operands and returning result of reduction in lhs_data
3796 @param lck pointer to the unique lock data structure
3797 @result 1 for the primary thread, 0 for all other team threads, 2 for all team
3798 threads if atomic reduction needed
3800 A blocking reduce that includes an implicit barrier.
3802 kmp_int32
__kmpc_reduce(ident_t
*loc
, kmp_int32 global_tid
, kmp_int32 num_vars
,
3803 size_t reduce_size
, void *reduce_data
,
3804 void (*reduce_func
)(void *lhs_data
, void *rhs_data
),
3805 kmp_critical_name
*lck
) {
3806 KMP_COUNT_BLOCK(REDUCE_wait
);
3808 PACKED_REDUCTION_METHOD_T packed_reduction_method
;
3811 int teams_swapped
= 0, task_state
;
3813 KA_TRACE(10, ("__kmpc_reduce() enter: called T#%d\n", global_tid
));
3814 __kmp_assert_valid_gtid(global_tid
);
3816 // why do we need this initialization here at all?
3817 // Reduction clause can not be a stand-alone directive.
3819 // do not call __kmp_serial_initialize(), it will be called by
3820 // __kmp_parallel_initialize() if needed
3821 // possible detection of false-positive race by the threadchecker ???
3822 if (!TCR_4(__kmp_init_parallel
))
3823 __kmp_parallel_initialize();
3825 __kmp_resume_if_soft_paused();
3827 // check correctness of reduce block nesting
3828 #if KMP_USE_DYNAMIC_LOCK
3829 if (__kmp_env_consistency_check
)
3830 __kmp_push_sync(global_tid
, ct_reduce
, loc
, NULL
, 0);
3832 if (__kmp_env_consistency_check
)
3833 __kmp_push_sync(global_tid
, ct_reduce
, loc
, NULL
);
3836 th
= __kmp_thread_from_gtid(global_tid
);
3837 teams_swapped
= __kmp_swap_teams_for_teams_reduction(th
, &team
, &task_state
);
3839 packed_reduction_method
= __kmp_determine_reduction_method(
3840 loc
, global_tid
, num_vars
, reduce_size
, reduce_data
, reduce_func
, lck
);
3841 __KMP_SET_REDUCTION_METHOD(global_tid
, packed_reduction_method
);
3843 OMPT_REDUCTION_DECL(th
, global_tid
);
3845 if (packed_reduction_method
== critical_reduce_block
) {
3847 OMPT_REDUCTION_BEGIN
;
3848 __kmp_enter_critical_section_reduce_block(loc
, global_tid
, lck
);
3851 } else if (packed_reduction_method
== empty_reduce_block
) {
3853 OMPT_REDUCTION_BEGIN
;
3854 // usage: if team size == 1, no synchronization is required ( Intel
3858 } else if (packed_reduction_method
== atomic_reduce_block
) {
3862 } else if (TEST_REDUCTION_METHOD(packed_reduction_method
,
3863 tree_reduce_block
)) {
3865 // case tree_reduce_block:
3866 // this barrier should be visible to a customer and to the threading profile
3867 // tool (it's a terminating barrier on constructs if NOWAIT not specified)
3869 ompt_frame_t
*ompt_frame
;
3870 if (ompt_enabled
.enabled
) {
3871 __ompt_get_task_info_internal(0, NULL
, NULL
, &ompt_frame
, NULL
, NULL
);
3872 if (ompt_frame
->enter_frame
.ptr
== NULL
)
3873 ompt_frame
->enter_frame
.ptr
= OMPT_GET_FRAME_ADDRESS(0);
3875 OMPT_STORE_RETURN_ADDRESS(global_tid
);
3878 __kmp_threads
[global_tid
]->th
.th_ident
=
3879 loc
; // needed for correct notification of frames
3882 __kmp_barrier(UNPACK_REDUCTION_BARRIER(packed_reduction_method
),
3883 global_tid
, TRUE
, reduce_size
, reduce_data
, reduce_func
);
3884 retval
= (retval
!= 0) ? (0) : (1);
3885 #if OMPT_SUPPORT && OMPT_OPTIONAL
3886 if (ompt_enabled
.enabled
) {
3887 ompt_frame
->enter_frame
= ompt_data_none
;
3891 // all other workers except primary thread should do this pop here
3892 // (none of other workers except primary will enter __kmpc_end_reduce())
3893 if (__kmp_env_consistency_check
) {
3894 if (retval
== 0) { // 0: all other workers; 1: primary thread
3895 __kmp_pop_sync(global_tid
, ct_reduce
, loc
);
3901 // should never reach this block
3902 KMP_ASSERT(0); // "unexpected method"
3904 if (teams_swapped
) {
3905 __kmp_restore_swapped_teams(th
, team
, task_state
);
3909 ("__kmpc_reduce() exit: called T#%d: method %08x, returns %08x\n",
3910 global_tid
, packed_reduction_method
, retval
));
3915 @ingroup SYNCHRONIZATION
3916 @param loc source location information
3917 @param global_tid global thread id.
3918 @param lck pointer to the unique lock data structure
3920 Finish the execution of a blocking reduce.
3921 The <tt>lck</tt> pointer must be the same as that used in the corresponding
3924 void __kmpc_end_reduce(ident_t
*loc
, kmp_int32 global_tid
,
3925 kmp_critical_name
*lck
) {
3927 PACKED_REDUCTION_METHOD_T packed_reduction_method
;
3930 int teams_swapped
= 0, task_state
;
3932 KA_TRACE(10, ("__kmpc_end_reduce() enter: called T#%d\n", global_tid
));
3933 __kmp_assert_valid_gtid(global_tid
);
3935 th
= __kmp_thread_from_gtid(global_tid
);
3936 teams_swapped
= __kmp_swap_teams_for_teams_reduction(th
, &team
, &task_state
);
3938 packed_reduction_method
= __KMP_GET_REDUCTION_METHOD(global_tid
);
3940 // this barrier should be visible to a customer and to the threading profile
3941 // tool (it's a terminating barrier on constructs if NOWAIT not specified)
3942 OMPT_REDUCTION_DECL(th
, global_tid
);
3944 if (packed_reduction_method
== critical_reduce_block
) {
3945 __kmp_end_critical_section_reduce_block(loc
, global_tid
, lck
);
3949 // TODO: implicit barrier: should be exposed
3951 ompt_frame_t
*ompt_frame
;
3952 if (ompt_enabled
.enabled
) {
3953 __ompt_get_task_info_internal(0, NULL
, NULL
, &ompt_frame
, NULL
, NULL
);
3954 if (ompt_frame
->enter_frame
.ptr
== NULL
)
3955 ompt_frame
->enter_frame
.ptr
= OMPT_GET_FRAME_ADDRESS(0);
3957 OMPT_STORE_RETURN_ADDRESS(global_tid
);
3960 __kmp_threads
[global_tid
]->th
.th_ident
= loc
;
3962 __kmp_barrier(bs_plain_barrier
, global_tid
, FALSE
, 0, NULL
, NULL
);
3963 #if OMPT_SUPPORT && OMPT_OPTIONAL
3964 if (ompt_enabled
.enabled
) {
3965 ompt_frame
->enter_frame
= ompt_data_none
;
3969 } else if (packed_reduction_method
== empty_reduce_block
) {
3973 // usage: if team size==1, no synchronization is required (Intel platforms only)
3975 // TODO: implicit barrier: should be exposed
3977 ompt_frame_t
*ompt_frame
;
3978 if (ompt_enabled
.enabled
) {
3979 __ompt_get_task_info_internal(0, NULL
, NULL
, &ompt_frame
, NULL
, NULL
);
3980 if (ompt_frame
->enter_frame
.ptr
== NULL
)
3981 ompt_frame
->enter_frame
.ptr
= OMPT_GET_FRAME_ADDRESS(0);
3983 OMPT_STORE_RETURN_ADDRESS(global_tid
);
3986 __kmp_threads
[global_tid
]->th
.th_ident
= loc
;
3988 __kmp_barrier(bs_plain_barrier
, global_tid
, FALSE
, 0, NULL
, NULL
);
3989 #if OMPT_SUPPORT && OMPT_OPTIONAL
3990 if (ompt_enabled
.enabled
) {
3991 ompt_frame
->enter_frame
= ompt_data_none
;
3995 } else if (packed_reduction_method
== atomic_reduce_block
) {
3998 ompt_frame_t
*ompt_frame
;
3999 if (ompt_enabled
.enabled
) {
4000 __ompt_get_task_info_internal(0, NULL
, NULL
, &ompt_frame
, NULL
, NULL
);
4001 if (ompt_frame
->enter_frame
.ptr
== NULL
)
4002 ompt_frame
->enter_frame
.ptr
= OMPT_GET_FRAME_ADDRESS(0);
4004 OMPT_STORE_RETURN_ADDRESS(global_tid
);
4006 // TODO: implicit barrier: should be exposed
4008 __kmp_threads
[global_tid
]->th
.th_ident
= loc
;
4010 __kmp_barrier(bs_plain_barrier
, global_tid
, FALSE
, 0, NULL
, NULL
);
4011 #if OMPT_SUPPORT && OMPT_OPTIONAL
4012 if (ompt_enabled
.enabled
) {
4013 ompt_frame
->enter_frame
= ompt_data_none
;
4017 } else if (TEST_REDUCTION_METHOD(packed_reduction_method
,
4018 tree_reduce_block
)) {
4020 // only primary thread executes here (primary releases all other workers)
4021 __kmp_end_split_barrier(UNPACK_REDUCTION_BARRIER(packed_reduction_method
),
4026 // should never reach this block
4027 KMP_ASSERT(0); // "unexpected method"
4029 if (teams_swapped
) {
4030 __kmp_restore_swapped_teams(th
, team
, task_state
);
4033 if (__kmp_env_consistency_check
)
4034 __kmp_pop_sync(global_tid
, ct_reduce
, loc
);
4036 KA_TRACE(10, ("__kmpc_end_reduce() exit: called T#%d: method %08x\n",
4037 global_tid
, packed_reduction_method
));
4042 #undef __KMP_GET_REDUCTION_METHOD
4043 #undef __KMP_SET_REDUCTION_METHOD
4045 /* end of interface to fast scalable reduce routines */
4047 kmp_uint64
__kmpc_get_taskid() {
4052 gtid
= __kmp_get_gtid();
4056 thread
= __kmp_thread_from_gtid(gtid
);
4057 return thread
->th
.th_current_task
->td_task_id
;
4059 } // __kmpc_get_taskid
4061 kmp_uint64
__kmpc_get_parent_taskid() {
4065 kmp_taskdata_t
*parent_task
;
4067 gtid
= __kmp_get_gtid();
4071 thread
= __kmp_thread_from_gtid(gtid
);
4072 parent_task
= thread
->th
.th_current_task
->td_parent
;
4073 return (parent_task
== NULL
? 0 : parent_task
->td_task_id
);
4075 } // __kmpc_get_parent_taskid
4078 @ingroup WORK_SHARING
4079 @param loc source location information.
4080 @param gtid global thread number.
4081 @param num_dims number of associated doacross loops.
4082 @param dims info on loops bounds.
4084 Initialize doacross loop information.
4085 Expect compiler send us inclusive bounds,
4086 e.g. for(i=2;i<9;i+=2) lo=2, up=8, st=2.
4088 void __kmpc_doacross_init(ident_t
*loc
, int gtid
, int num_dims
,
4089 const struct kmp_dim
*dims
) {
4090 __kmp_assert_valid_gtid(gtid
);
4092 kmp_int64 last
, trace_count
;
4093 kmp_info_t
*th
= __kmp_threads
[gtid
];
4094 kmp_team_t
*team
= th
->th
.th_team
;
4096 kmp_disp_t
*pr_buf
= th
->th
.th_dispatch
;
4097 dispatch_shared_info_t
*sh_buf
;
4101 ("__kmpc_doacross_init() enter: called T#%d, num dims %d, active %d\n",
4102 gtid
, num_dims
, !team
->t
.t_serialized
));
4103 KMP_DEBUG_ASSERT(dims
!= NULL
);
4104 KMP_DEBUG_ASSERT(num_dims
> 0);
4106 if (team
->t
.t_serialized
) {
4107 KA_TRACE(20, ("__kmpc_doacross_init() exit: serialized team\n"));
4108 return; // no dependencies if team is serialized
4110 KMP_DEBUG_ASSERT(team
->t
.t_nproc
> 1);
4111 idx
= pr_buf
->th_doacross_buf_idx
++; // Increment index of shared buffer for
4113 sh_buf
= &team
->t
.t_disp_buffer
[idx
% __kmp_dispatch_num_buffers
];
4115 // Save bounds info into allocated private buffer
4116 KMP_DEBUG_ASSERT(pr_buf
->th_doacross_info
== NULL
);
4117 pr_buf
->th_doacross_info
= (kmp_int64
*)__kmp_thread_malloc(
4118 th
, sizeof(kmp_int64
) * (4 * num_dims
+ 1));
4119 KMP_DEBUG_ASSERT(pr_buf
->th_doacross_info
!= NULL
);
4120 pr_buf
->th_doacross_info
[0] =
4121 (kmp_int64
)num_dims
; // first element is number of dimensions
4122 // Save also address of num_done in order to access it later without knowing
4124 pr_buf
->th_doacross_info
[1] = (kmp_int64
)&sh_buf
->doacross_num_done
;
4125 pr_buf
->th_doacross_info
[2] = dims
[0].lo
;
4126 pr_buf
->th_doacross_info
[3] = dims
[0].up
;
4127 pr_buf
->th_doacross_info
[4] = dims
[0].st
;
4129 for (j
= 1; j
< num_dims
; ++j
) {
4131 range_length
; // To keep ranges of all dimensions but the first dims[0]
4132 if (dims
[j
].st
== 1) { // most common case
4133 // AC: should we care of ranges bigger than LLONG_MAX? (not for now)
4134 range_length
= dims
[j
].up
- dims
[j
].lo
+ 1;
4136 if (dims
[j
].st
> 0) {
4137 KMP_DEBUG_ASSERT(dims
[j
].up
> dims
[j
].lo
);
4138 range_length
= (kmp_uint64
)(dims
[j
].up
- dims
[j
].lo
) / dims
[j
].st
+ 1;
4139 } else { // negative increment
4140 KMP_DEBUG_ASSERT(dims
[j
].lo
> dims
[j
].up
);
4142 (kmp_uint64
)(dims
[j
].lo
- dims
[j
].up
) / (-dims
[j
].st
) + 1;
4145 pr_buf
->th_doacross_info
[last
++] = range_length
;
4146 pr_buf
->th_doacross_info
[last
++] = dims
[j
].lo
;
4147 pr_buf
->th_doacross_info
[last
++] = dims
[j
].up
;
4148 pr_buf
->th_doacross_info
[last
++] = dims
[j
].st
;
4151 // Compute total trip count.
4152 // Start with range of dims[0] which we don't need to keep in the buffer.
4153 if (dims
[0].st
== 1) { // most common case
4154 trace_count
= dims
[0].up
- dims
[0].lo
+ 1;
4155 } else if (dims
[0].st
> 0) {
4156 KMP_DEBUG_ASSERT(dims
[0].up
> dims
[0].lo
);
4157 trace_count
= (kmp_uint64
)(dims
[0].up
- dims
[0].lo
) / dims
[0].st
+ 1;
4158 } else { // negative increment
4159 KMP_DEBUG_ASSERT(dims
[0].lo
> dims
[0].up
);
4160 trace_count
= (kmp_uint64
)(dims
[0].lo
- dims
[0].up
) / (-dims
[0].st
) + 1;
4162 for (j
= 1; j
< num_dims
; ++j
) {
4163 trace_count
*= pr_buf
->th_doacross_info
[4 * j
+ 1]; // use kept ranges
4165 KMP_DEBUG_ASSERT(trace_count
> 0);
4167 // Check if shared buffer is not occupied by other loop (idx -
4168 // __kmp_dispatch_num_buffers)
4169 if (idx
!= sh_buf
->doacross_buf_idx
) {
4170 // Shared buffer is occupied, wait for it to be free
4171 __kmp_wait_4((volatile kmp_uint32
*)&sh_buf
->doacross_buf_idx
, idx
,
4175 // Check if we are the first thread. After the CAS the first thread gets 0,
4176 // others get 1 if initialization is in progress, allocated pointer otherwise.
4177 // Treat pointer as volatile integer (value 0 or 1) until memory is allocated.
4178 flags
= (kmp_uint32
*)KMP_COMPARE_AND_STORE_RET32(
4179 (volatile kmp_int32
*)&sh_buf
->doacross_flags
, NULL
, 1);
4181 flags
= (kmp_uint32
*)KMP_COMPARE_AND_STORE_RET64(
4182 (volatile kmp_int64
*)&sh_buf
->doacross_flags
, NULL
, 1LL);
4184 if (flags
== NULL
) {
4185 // we are the first thread, allocate the array of flags
4187 (size_t)trace_count
/ 8 + 8; // in bytes, use single bit per iteration
4188 flags
= (kmp_uint32
*)__kmp_thread_calloc(th
, size
, 1);
4190 sh_buf
->doacross_flags
= flags
;
4191 } else if (flags
== (kmp_uint32
*)1) {
4193 // initialization is still in progress, need to wait
4194 while (*(volatile kmp_int32
*)&sh_buf
->doacross_flags
== 1)
4196 while (*(volatile kmp_int64
*)&sh_buf
->doacross_flags
== 1LL)
4203 KMP_DEBUG_ASSERT(sh_buf
->doacross_flags
> (kmp_uint32
*)1); // check ptr value
4204 pr_buf
->th_doacross_flags
=
4205 sh_buf
->doacross_flags
; // save private copy in order to not
4206 // touch shared buffer on each iteration
4207 KA_TRACE(20, ("__kmpc_doacross_init() exit: T#%d\n", gtid
));
4210 void __kmpc_doacross_wait(ident_t
*loc
, int gtid
, const kmp_int64
*vec
) {
4211 __kmp_assert_valid_gtid(gtid
);
4215 kmp_int64 iter_number
; // iteration number of "collapsed" loop nest
4216 kmp_info_t
*th
= __kmp_threads
[gtid
];
4217 kmp_team_t
*team
= th
->th
.th_team
;
4219 kmp_int64 lo
, up
, st
;
4221 KA_TRACE(20, ("__kmpc_doacross_wait() enter: called T#%d\n", gtid
));
4222 if (team
->t
.t_serialized
) {
4223 KA_TRACE(20, ("__kmpc_doacross_wait() exit: serialized team\n"));
4224 return; // no dependencies if team is serialized
4227 // calculate sequential iteration number and check out-of-bounds condition
4228 pr_buf
= th
->th
.th_dispatch
;
4229 KMP_DEBUG_ASSERT(pr_buf
->th_doacross_info
!= NULL
);
4230 num_dims
= (size_t)pr_buf
->th_doacross_info
[0];
4231 lo
= pr_buf
->th_doacross_info
[2];
4232 up
= pr_buf
->th_doacross_info
[3];
4233 st
= pr_buf
->th_doacross_info
[4];
4234 #if OMPT_SUPPORT && OMPT_OPTIONAL
4235 ompt_dependence_t deps
[num_dims
];
4237 if (st
== 1) { // most common case
4238 if (vec
[0] < lo
|| vec
[0] > up
) {
4239 KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of "
4240 "bounds [%lld,%lld]\n",
4241 gtid
, vec
[0], lo
, up
));
4244 iter_number
= vec
[0] - lo
;
4245 } else if (st
> 0) {
4246 if (vec
[0] < lo
|| vec
[0] > up
) {
4247 KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of "
4248 "bounds [%lld,%lld]\n",
4249 gtid
, vec
[0], lo
, up
));
4252 iter_number
= (kmp_uint64
)(vec
[0] - lo
) / st
;
4253 } else { // negative increment
4254 if (vec
[0] > lo
|| vec
[0] < up
) {
4255 KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of "
4256 "bounds [%lld,%lld]\n",
4257 gtid
, vec
[0], lo
, up
));
4260 iter_number
= (kmp_uint64
)(lo
- vec
[0]) / (-st
);
4262 #if OMPT_SUPPORT && OMPT_OPTIONAL
4263 deps
[0].variable
.value
= iter_number
;
4264 deps
[0].dependence_type
= ompt_dependence_type_sink
;
4266 for (i
= 1; i
< num_dims
; ++i
) {
4269 ln
= pr_buf
->th_doacross_info
[j
+ 1];
4270 lo
= pr_buf
->th_doacross_info
[j
+ 2];
4271 up
= pr_buf
->th_doacross_info
[j
+ 3];
4272 st
= pr_buf
->th_doacross_info
[j
+ 4];
4274 if (vec
[i
] < lo
|| vec
[i
] > up
) {
4275 KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of "
4276 "bounds [%lld,%lld]\n",
4277 gtid
, vec
[i
], lo
, up
));
4281 } else if (st
> 0) {
4282 if (vec
[i
] < lo
|| vec
[i
] > up
) {
4283 KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of "
4284 "bounds [%lld,%lld]\n",
4285 gtid
, vec
[i
], lo
, up
));
4288 iter
= (kmp_uint64
)(vec
[i
] - lo
) / st
;
4290 if (vec
[i
] > lo
|| vec
[i
] < up
) {
4291 KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of "
4292 "bounds [%lld,%lld]\n",
4293 gtid
, vec
[i
], lo
, up
));
4296 iter
= (kmp_uint64
)(lo
- vec
[i
]) / (-st
);
4298 iter_number
= iter
+ ln
* iter_number
;
4299 #if OMPT_SUPPORT && OMPT_OPTIONAL
4300 deps
[i
].variable
.value
= iter
;
4301 deps
[i
].dependence_type
= ompt_dependence_type_sink
;
4304 shft
= iter_number
% 32; // use 32-bit granularity
4305 iter_number
>>= 5; // divided by 32
4307 while ((flag
& pr_buf
->th_doacross_flags
[iter_number
]) == 0) {
4311 #if OMPT_SUPPORT && OMPT_OPTIONAL
4312 if (ompt_enabled
.ompt_callback_dependences
) {
4313 ompt_callbacks
.ompt_callback(ompt_callback_dependences
)(
4314 &(OMPT_CUR_TASK_INFO(th
)->task_data
), deps
, (kmp_uint32
)num_dims
);
4318 ("__kmpc_doacross_wait() exit: T#%d wait for iter %lld completed\n",
4319 gtid
, (iter_number
<< 5) + shft
));
4322 void __kmpc_doacross_post(ident_t
*loc
, int gtid
, const kmp_int64
*vec
) {
4323 __kmp_assert_valid_gtid(gtid
);
4327 kmp_int64 iter_number
; // iteration number of "collapsed" loop nest
4328 kmp_info_t
*th
= __kmp_threads
[gtid
];
4329 kmp_team_t
*team
= th
->th
.th_team
;
4333 KA_TRACE(20, ("__kmpc_doacross_post() enter: called T#%d\n", gtid
));
4334 if (team
->t
.t_serialized
) {
4335 KA_TRACE(20, ("__kmpc_doacross_post() exit: serialized team\n"));
4336 return; // no dependencies if team is serialized
4339 // calculate sequential iteration number (same as in "wait" but no
4340 // out-of-bounds checks)
4341 pr_buf
= th
->th
.th_dispatch
;
4342 KMP_DEBUG_ASSERT(pr_buf
->th_doacross_info
!= NULL
);
4343 num_dims
= (size_t)pr_buf
->th_doacross_info
[0];
4344 lo
= pr_buf
->th_doacross_info
[2];
4345 st
= pr_buf
->th_doacross_info
[4];
4346 #if OMPT_SUPPORT && OMPT_OPTIONAL
4347 ompt_dependence_t deps
[num_dims
];
4349 if (st
== 1) { // most common case
4350 iter_number
= vec
[0] - lo
;
4351 } else if (st
> 0) {
4352 iter_number
= (kmp_uint64
)(vec
[0] - lo
) / st
;
4353 } else { // negative increment
4354 iter_number
= (kmp_uint64
)(lo
- vec
[0]) / (-st
);
4356 #if OMPT_SUPPORT && OMPT_OPTIONAL
4357 deps
[0].variable
.value
= iter_number
;
4358 deps
[0].dependence_type
= ompt_dependence_type_source
;
4360 for (i
= 1; i
< num_dims
; ++i
) {
4363 ln
= pr_buf
->th_doacross_info
[j
+ 1];
4364 lo
= pr_buf
->th_doacross_info
[j
+ 2];
4365 st
= pr_buf
->th_doacross_info
[j
+ 4];
4368 } else if (st
> 0) {
4369 iter
= (kmp_uint64
)(vec
[i
] - lo
) / st
;
4371 iter
= (kmp_uint64
)(lo
- vec
[i
]) / (-st
);
4373 iter_number
= iter
+ ln
* iter_number
;
4374 #if OMPT_SUPPORT && OMPT_OPTIONAL
4375 deps
[i
].variable
.value
= iter
;
4376 deps
[i
].dependence_type
= ompt_dependence_type_source
;
4379 #if OMPT_SUPPORT && OMPT_OPTIONAL
4380 if (ompt_enabled
.ompt_callback_dependences
) {
4381 ompt_callbacks
.ompt_callback(ompt_callback_dependences
)(
4382 &(OMPT_CUR_TASK_INFO(th
)->task_data
), deps
, (kmp_uint32
)num_dims
);
4385 shft
= iter_number
% 32; // use 32-bit granularity
4386 iter_number
>>= 5; // divided by 32
4389 if ((flag
& pr_buf
->th_doacross_flags
[iter_number
]) == 0)
4390 KMP_TEST_THEN_OR32(&pr_buf
->th_doacross_flags
[iter_number
], flag
);
4391 KA_TRACE(20, ("__kmpc_doacross_post() exit: T#%d iter %lld posted\n", gtid
,
4392 (iter_number
<< 5) + shft
));
4395 void __kmpc_doacross_fini(ident_t
*loc
, int gtid
) {
4396 __kmp_assert_valid_gtid(gtid
);
4398 kmp_info_t
*th
= __kmp_threads
[gtid
];
4399 kmp_team_t
*team
= th
->th
.th_team
;
4400 kmp_disp_t
*pr_buf
= th
->th
.th_dispatch
;
4402 KA_TRACE(20, ("__kmpc_doacross_fini() enter: called T#%d\n", gtid
));
4403 if (team
->t
.t_serialized
) {
4404 KA_TRACE(20, ("__kmpc_doacross_fini() exit: serialized team %p\n", team
));
4405 return; // nothing to do
4408 KMP_TEST_THEN_INC32((kmp_uintptr_t
)(pr_buf
->th_doacross_info
[1])) + 1;
4409 if (num_done
== th
->th
.th_team_nproc
) {
4410 // we are the last thread, need to free shared resources
4411 int idx
= pr_buf
->th_doacross_buf_idx
- 1;
4412 dispatch_shared_info_t
*sh_buf
=
4413 &team
->t
.t_disp_buffer
[idx
% __kmp_dispatch_num_buffers
];
4414 KMP_DEBUG_ASSERT(pr_buf
->th_doacross_info
[1] ==
4415 (kmp_int64
)&sh_buf
->doacross_num_done
);
4416 KMP_DEBUG_ASSERT(num_done
== sh_buf
->doacross_num_done
);
4417 KMP_DEBUG_ASSERT(idx
== sh_buf
->doacross_buf_idx
);
4418 __kmp_thread_free(th
, CCAST(kmp_uint32
*, sh_buf
->doacross_flags
));
4419 sh_buf
->doacross_flags
= NULL
;
4420 sh_buf
->doacross_num_done
= 0;
4421 sh_buf
->doacross_buf_idx
+=
4422 __kmp_dispatch_num_buffers
; // free buffer for future re-use
4424 // free private resources (need to keep buffer index forever)
4425 pr_buf
->th_doacross_flags
= NULL
;
4426 __kmp_thread_free(th
, (void *)pr_buf
->th_doacross_info
);
4427 pr_buf
->th_doacross_info
= NULL
;
4428 KA_TRACE(20, ("__kmpc_doacross_fini() exit: T#%d\n", gtid
));
4431 /* OpenMP 5.1 Memory Management routines */
4432 void *omp_alloc(size_t size
, omp_allocator_handle_t allocator
) {
4433 return __kmp_alloc(__kmp_entry_gtid(), 0, size
, allocator
);
4436 void *omp_aligned_alloc(size_t align
, size_t size
,
4437 omp_allocator_handle_t allocator
) {
4438 return __kmp_alloc(__kmp_entry_gtid(), align
, size
, allocator
);
4441 void *omp_calloc(size_t nmemb
, size_t size
, omp_allocator_handle_t allocator
) {
4442 return __kmp_calloc(__kmp_entry_gtid(), 0, nmemb
, size
, allocator
);
4445 void *omp_aligned_calloc(size_t align
, size_t nmemb
, size_t size
,
4446 omp_allocator_handle_t allocator
) {
4447 return __kmp_calloc(__kmp_entry_gtid(), align
, nmemb
, size
, allocator
);
4450 void *omp_realloc(void *ptr
, size_t size
, omp_allocator_handle_t allocator
,
4451 omp_allocator_handle_t free_allocator
) {
4452 return __kmp_realloc(__kmp_entry_gtid(), ptr
, size
, allocator
,
4456 void omp_free(void *ptr
, omp_allocator_handle_t allocator
) {
4457 ___kmpc_free(__kmp_entry_gtid(), ptr
, allocator
);
4459 /* end of OpenMP 5.1 Memory Management routines */
4461 int __kmpc_get_target_offload(void) {
4462 if (!__kmp_init_serial
) {
4463 __kmp_serial_initialize();
4465 return __kmp_target_offload
;
4468 int __kmpc_pause_resource(kmp_pause_status_t level
) {
4469 if (!__kmp_init_serial
) {
4470 return 1; // Can't pause if runtime is not initialized
4472 return __kmp_pause_resource(level
);
4475 void __kmpc_error(ident_t
*loc
, int severity
, const char *message
) {
4476 if (!__kmp_init_serial
)
4477 __kmp_serial_initialize();
4479 KMP_ASSERT(severity
== severity_warning
|| severity
== severity_fatal
);
4482 if (ompt_enabled
.enabled
&& ompt_enabled
.ompt_callback_error
) {
4483 ompt_callbacks
.ompt_callback(ompt_callback_error
)(
4484 (ompt_severity_t
)severity
, message
, KMP_STRLEN(message
),
4485 OMPT_GET_RETURN_ADDRESS(0));
4487 #endif // OMPT_SUPPORT
4490 if (loc
&& loc
->psource
) {
4491 kmp_str_loc_t str_loc
= __kmp_str_loc_init(loc
->psource
, false);
4493 __kmp_str_format("%s:%d:%d", str_loc
.file
, str_loc
.line
, str_loc
.col
);
4494 __kmp_str_loc_free(&str_loc
);
4496 src_loc
= __kmp_str_format("unknown");
4499 if (severity
== severity_warning
)
4500 KMP_WARNING(UserDirectedWarning
, src_loc
, message
);
4502 KMP_FATAL(UserDirectedError
, src_loc
, message
);
4504 __kmp_str_free(&src_loc
);
4507 // Mark begin of scope directive.
4508 void __kmpc_scope(ident_t
*loc
, kmp_int32 gtid
, void *reserved
) {
4509 // reserved is for extension of scope directive and not used.
4510 #if OMPT_SUPPORT && OMPT_OPTIONAL
4511 if (ompt_enabled
.enabled
&& ompt_enabled
.ompt_callback_work
) {
4512 kmp_team_t
*team
= __kmp_threads
[gtid
]->th
.th_team
;
4513 int tid
= __kmp_tid_from_gtid(gtid
);
4514 ompt_callbacks
.ompt_callback(ompt_callback_work
)(
4515 ompt_work_scope
, ompt_scope_begin
,
4516 &(team
->t
.ompt_team_info
.parallel_data
),
4517 &(team
->t
.t_implicit_task_taskdata
[tid
].ompt_task_info
.task_data
), 1,
4518 OMPT_GET_RETURN_ADDRESS(0));
4520 #endif // OMPT_SUPPORT && OMPT_OPTIONAL
4523 // Mark end of scope directive
4524 void __kmpc_end_scope(ident_t
*loc
, kmp_int32 gtid
, void *reserved
) {
4525 // reserved is for extension of scope directive and not used.
4526 #if OMPT_SUPPORT && OMPT_OPTIONAL
4527 if (ompt_enabled
.enabled
&& ompt_enabled
.ompt_callback_work
) {
4528 kmp_team_t
*team
= __kmp_threads
[gtid
]->th
.th_team
;
4529 int tid
= __kmp_tid_from_gtid(gtid
);
4530 ompt_callbacks
.ompt_callback(ompt_callback_work
)(
4531 ompt_work_scope
, ompt_scope_end
,
4532 &(team
->t
.ompt_team_info
.parallel_data
),
4533 &(team
->t
.t_implicit_task_taskdata
[tid
].ompt_task_info
.task_data
), 1,
4534 OMPT_GET_RETURN_ADDRESS(0));
4536 #endif // OMPT_SUPPORT && OMPT_OPTIONAL
4539 #ifdef KMP_USE_VERSION_SYMBOLS
4540 // For GOMP compatibility there are two versions of each omp_* API.
4541 // One is the plain C symbol and one is the Fortran symbol with an appended
4542 // underscore. When we implement a specific ompc_* version of an omp_*
4543 // function, we want the plain GOMP versioned symbol to alias the ompc_* version
4544 // instead of the Fortran versions in kmp_ftn_entry.h
4546 // Have to undef these from omp.h so they aren't translated into
4547 // their ompc counterparts in the KMP_VERSION_OMPC_SYMBOL macros below
4548 #ifdef omp_set_affinity_format
4549 #undef omp_set_affinity_format
4551 #ifdef omp_get_affinity_format
4552 #undef omp_get_affinity_format
4554 #ifdef omp_display_affinity
4555 #undef omp_display_affinity
4557 #ifdef omp_capture_affinity
4558 #undef omp_capture_affinity
4560 KMP_VERSION_OMPC_SYMBOL(ompc_set_affinity_format
, omp_set_affinity_format
, 50,
4562 KMP_VERSION_OMPC_SYMBOL(ompc_get_affinity_format
, omp_get_affinity_format
, 50,
4564 KMP_VERSION_OMPC_SYMBOL(ompc_display_affinity
, omp_display_affinity
, 50,
4566 KMP_VERSION_OMPC_SYMBOL(ompc_capture_affinity
, omp_capture_affinity
, 50,