2 * kmp_csupport.cpp -- kfront linkage support for OpenMP.
5 //===----------------------------------------------------------------------===//
7 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
8 // See https://llvm.org/LICENSE.txt for license information.
9 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
11 //===----------------------------------------------------------------------===//
14 #include "omp.h" /* extern "C" declarations of user-visible routines */
16 #include "kmp_error.h"
20 #include "kmp_stats.h"
21 #include "ompt-specific.h"
23 #define MAX_MESSAGE 512
25 // flags will be used in future, e.g. to implement openmp_strict library
29 * @ingroup STARTUP_SHUTDOWN
30 * @param loc in source location information
31 * @param flags in for future use (currently ignored)
33 * Initialize the runtime library. This call is optional; if it is not made then
34 * it will be implicitly called by attempts to use other library functions.
36 void __kmpc_begin(ident_t
*loc
, kmp_int32 flags
) {
37 // By default __kmpc_begin() is no-op.
39 if ((env
= getenv("KMP_INITIAL_THREAD_BIND")) != NULL
&&
40 __kmp_str_match_true(env
)) {
41 __kmp_middle_initialize();
42 KC_TRACE(10, ("__kmpc_begin: middle initialization called\n"));
43 } else if (__kmp_ignore_mppbeg() == FALSE
) {
44 // By default __kmp_ignore_mppbeg() returns TRUE.
45 __kmp_internal_begin();
46 KC_TRACE(10, ("__kmpc_begin: called\n"));
51 * @ingroup STARTUP_SHUTDOWN
52 * @param loc source location information
54 * Shutdown the runtime library. This is also optional, and even if called will
55 * not do anything unless the `KMP_IGNORE_MPPEND` environment variable is set to
58 void __kmpc_end(ident_t
*loc
) {
59 // By default, __kmp_ignore_mppend() returns TRUE which makes __kmpc_end()
60 // call no-op. However, this can be overridden with KMP_IGNORE_MPPEND
61 // environment variable. If KMP_IGNORE_MPPEND is 0, __kmp_ignore_mppend()
62 // returns FALSE and __kmpc_end() will unregister this root (it can cause
63 // library shut down).
64 if (__kmp_ignore_mppend() == FALSE
) {
65 KC_TRACE(10, ("__kmpc_end: called\n"));
66 KA_TRACE(30, ("__kmpc_end\n"));
68 __kmp_internal_end_thread(-1);
70 #if KMP_OS_WINDOWS && OMPT_SUPPORT
71 // Normal exit process on Windows does not allow worker threads of the final
72 // parallel region to finish reporting their events, so shutting down the
73 // library here fixes the issue at least for the cases where __kmpc_end() is
75 if (ompt_enabled
.enabled
)
76 __kmp_internal_end_library(__kmp_gtid_get_specific());
81 @ingroup THREAD_STATES
82 @param loc Source location information.
83 @return The global thread index of the active thread.
85 This function can be called in any context.
87 If the runtime has ony been entered at the outermost level from a
88 single (necessarily non-OpenMP<sup>*</sup>) thread, then the thread number is
89 that which would be returned by omp_get_thread_num() in the outermost
90 active parallel construct. (Or zero if there is no active parallel
91 construct, since the master thread is necessarily thread zero).
93 If multiple non-OpenMP threads all enter an OpenMP construct then this
94 will be a unique thread identifier among all the threads created by
95 the OpenMP runtime (but the value cannot be defined in terms of
96 OpenMP thread ids returned by omp_get_thread_num()).
98 kmp_int32
__kmpc_global_thread_num(ident_t
*loc
) {
99 kmp_int32 gtid
= __kmp_entry_gtid();
101 KC_TRACE(10, ("__kmpc_global_thread_num: T#%d\n", gtid
));
107 @ingroup THREAD_STATES
108 @param loc Source location information.
109 @return The number of threads under control of the OpenMP<sup>*</sup> runtime
111 This function can be called in any context.
112 It returns the total number of threads under the control of the OpenMP runtime.
113 That is not a number that can be determined by any OpenMP standard calls, since
114 the library may be called from more than one non-OpenMP thread, and this
115 reflects the total over all such calls. Similarly the runtime maintains
116 underlying threads even when they are not active (since the cost of creating
117 and destroying OS threads is high), this call counts all such threads even if
118 they are not waiting for work.
120 kmp_int32
__kmpc_global_num_threads(ident_t
*loc
) {
122 ("__kmpc_global_num_threads: num_threads = %d\n", __kmp_all_nth
));
124 return TCR_4(__kmp_all_nth
);
128 @ingroup THREAD_STATES
129 @param loc Source location information.
130 @return The thread number of the calling thread in the innermost active parallel
133 kmp_int32
__kmpc_bound_thread_num(ident_t
*loc
) {
134 KC_TRACE(10, ("__kmpc_bound_thread_num: called\n"));
135 return __kmp_tid_from_gtid(__kmp_entry_gtid());
139 @ingroup THREAD_STATES
140 @param loc Source location information.
141 @return The number of threads in the innermost active parallel construct.
143 kmp_int32
__kmpc_bound_num_threads(ident_t
*loc
) {
144 KC_TRACE(10, ("__kmpc_bound_num_threads: called\n"));
146 return __kmp_entry_thread()->th
.th_team
->t
.t_nproc
;
150 * @ingroup DEPRECATED
151 * @param loc location description
153 * This function need not be called. It always returns TRUE.
155 kmp_int32
__kmpc_ok_to_fork(ident_t
*loc
) {
166 if (__kmp_par_range
== 0) {
169 semi2
= loc
->psource
;
173 semi2
= strchr(semi2
, ';');
177 semi2
= strchr(semi2
+ 1, ';');
181 if (__kmp_par_range_filename
[0]) {
182 const char *name
= semi2
- 1;
183 while ((name
> loc
->psource
) && (*name
!= '/') && (*name
!= ';')) {
186 if ((*name
== '/') || (*name
== ';')) {
189 if (strncmp(__kmp_par_range_filename
, name
, semi2
- name
)) {
190 return __kmp_par_range
< 0;
193 semi3
= strchr(semi2
+ 1, ';');
194 if (__kmp_par_range_routine
[0]) {
195 if ((semi3
!= NULL
) && (semi3
> semi2
) &&
196 (strncmp(__kmp_par_range_routine
, semi2
+ 1, semi3
- semi2
- 1))) {
197 return __kmp_par_range
< 0;
200 if (KMP_SSCANF(semi3
+ 1, "%d", &line_no
) == 1) {
201 if ((line_no
>= __kmp_par_range_lb
) && (line_no
<= __kmp_par_range_ub
)) {
202 return __kmp_par_range
> 0;
204 return __kmp_par_range
< 0;
208 #endif /* KMP_DEBUG */
212 @ingroup THREAD_STATES
213 @param loc Source location information.
214 @return 1 if this thread is executing inside an active parallel region, zero if
217 kmp_int32
__kmpc_in_parallel(ident_t
*loc
) {
218 return __kmp_entry_thread()->th
.th_root
->r
.r_active
;
223 @param loc source location information
224 @param global_tid global thread number
225 @param num_threads number of threads requested for this parallel construct
227 Set the number of threads to be used by the next fork spawned by this thread.
228 This call is only required if the parallel construct has a `num_threads` clause.
230 void __kmpc_push_num_threads(ident_t
*loc
, kmp_int32 global_tid
,
231 kmp_int32 num_threads
) {
232 KA_TRACE(20, ("__kmpc_push_num_threads: enter T#%d num_threads=%d\n",
233 global_tid
, num_threads
));
234 __kmp_assert_valid_gtid(global_tid
);
235 __kmp_push_num_threads(loc
, global_tid
, num_threads
);
238 void __kmpc_pop_num_threads(ident_t
*loc
, kmp_int32 global_tid
) {
239 KA_TRACE(20, ("__kmpc_pop_num_threads: enter\n"));
240 /* the num_threads are automatically popped */
243 void __kmpc_push_proc_bind(ident_t
*loc
, kmp_int32 global_tid
,
244 kmp_int32 proc_bind
) {
245 KA_TRACE(20, ("__kmpc_push_proc_bind: enter T#%d proc_bind=%d\n", global_tid
,
247 __kmp_assert_valid_gtid(global_tid
);
248 __kmp_push_proc_bind(loc
, global_tid
, (kmp_proc_bind_t
)proc_bind
);
253 @param loc source location information
254 @param argc total number of arguments in the ellipsis
255 @param microtask pointer to callback routine consisting of outlined parallel
257 @param ... pointers to shared variables that aren't global
259 Do the actual fork and call the microtask in the relevant number of threads.
261 void __kmpc_fork_call(ident_t
*loc
, kmp_int32 argc
, kmpc_micro microtask
, ...) {
262 int gtid
= __kmp_entry_gtid();
264 #if (KMP_STATS_ENABLED)
265 // If we were in a serial region, then stop the serial timer, record
266 // the event, and start parallel region timer
267 stats_state_e previous_state
= KMP_GET_THREAD_STATE();
268 if (previous_state
== stats_state_e::SERIAL_REGION
) {
269 KMP_EXCHANGE_PARTITIONED_TIMER(OMP_parallel_overhead
);
271 KMP_PUSH_PARTITIONED_TIMER(OMP_parallel_overhead
);
273 int inParallel
= __kmpc_in_parallel(loc
);
275 KMP_COUNT_BLOCK(OMP_NESTED_PARALLEL
);
277 KMP_COUNT_BLOCK(OMP_PARALLEL
);
281 // maybe to save thr_state is enough here
284 va_start(ap
, microtask
);
287 ompt_frame_t
*ompt_frame
;
288 if (ompt_enabled
.enabled
) {
289 kmp_info_t
*master_th
= __kmp_threads
[gtid
];
290 kmp_team_t
*parent_team
= master_th
->th
.th_team
;
291 ompt_lw_taskteam_t
*lwt
= parent_team
->t
.ompt_serialized_team_info
;
293 ompt_frame
= &(lwt
->ompt_task_info
.frame
);
295 int tid
= __kmp_tid_from_gtid(gtid
);
297 parent_team
->t
.t_implicit_task_taskdata
[tid
].ompt_task_info
.frame
);
299 ompt_frame
->enter_frame
.ptr
= OMPT_GET_FRAME_ADDRESS(0);
300 OMPT_STORE_RETURN_ADDRESS(gtid
);
304 #if INCLUDE_SSC_MARKS
307 __kmp_fork_call(loc
, gtid
, fork_context_intel
, argc
,
308 VOLATILE_CAST(microtask_t
) microtask
, // "wrapped" task
309 VOLATILE_CAST(launch_t
) __kmp_invoke_task_func
,
311 #if INCLUDE_SSC_MARKS
314 __kmp_join_call(loc
, gtid
324 #if KMP_STATS_ENABLED
325 if (previous_state
== stats_state_e::SERIAL_REGION
) {
326 KMP_EXCHANGE_PARTITIONED_TIMER(OMP_serial
);
328 KMP_POP_PARTITIONED_TIMER();
330 #endif // KMP_STATS_ENABLED
335 @param loc source location information
336 @param global_tid global thread number
337 @param num_teams number of teams requested for the teams construct
338 @param num_threads number of threads per team requested for the teams construct
340 Set the number of teams to be used by the teams construct.
341 This call is only required if the teams construct has a `num_teams` clause
342 or a `thread_limit` clause (or both).
344 void __kmpc_push_num_teams(ident_t
*loc
, kmp_int32 global_tid
,
345 kmp_int32 num_teams
, kmp_int32 num_threads
) {
347 ("__kmpc_push_num_teams: enter T#%d num_teams=%d num_threads=%d\n",
348 global_tid
, num_teams
, num_threads
));
349 __kmp_assert_valid_gtid(global_tid
);
350 __kmp_push_num_teams(loc
, global_tid
, num_teams
, num_threads
);
355 @param loc source location information
356 @param argc total number of arguments in the ellipsis
357 @param microtask pointer to callback routine consisting of outlined teams
359 @param ... pointers to shared variables that aren't global
361 Do the actual fork and call the microtask in the relevant number of threads.
363 void __kmpc_fork_teams(ident_t
*loc
, kmp_int32 argc
, kmpc_micro microtask
,
365 int gtid
= __kmp_entry_gtid();
366 kmp_info_t
*this_thr
= __kmp_threads
[gtid
];
368 va_start(ap
, microtask
);
370 #if KMP_STATS_ENABLED
371 KMP_COUNT_BLOCK(OMP_TEAMS
);
372 stats_state_e previous_state
= KMP_GET_THREAD_STATE();
373 if (previous_state
== stats_state_e::SERIAL_REGION
) {
374 KMP_EXCHANGE_PARTITIONED_TIMER(OMP_teams_overhead
);
376 KMP_PUSH_PARTITIONED_TIMER(OMP_teams_overhead
);
380 // remember teams entry point and nesting level
381 this_thr
->th
.th_teams_microtask
= microtask
;
382 this_thr
->th
.th_teams_level
=
383 this_thr
->th
.th_team
->t
.t_level
; // AC: can be >0 on host
386 kmp_team_t
*parent_team
= this_thr
->th
.th_team
;
387 int tid
= __kmp_tid_from_gtid(gtid
);
388 if (ompt_enabled
.enabled
) {
389 parent_team
->t
.t_implicit_task_taskdata
[tid
]
390 .ompt_task_info
.frame
.enter_frame
.ptr
= OMPT_GET_FRAME_ADDRESS(0);
392 OMPT_STORE_RETURN_ADDRESS(gtid
);
395 // check if __kmpc_push_num_teams called, set default number of teams
397 if (this_thr
->th
.th_teams_size
.nteams
== 0) {
398 __kmp_push_num_teams(loc
, gtid
, 0, 0);
400 KMP_DEBUG_ASSERT(this_thr
->th
.th_set_nproc
>= 1);
401 KMP_DEBUG_ASSERT(this_thr
->th
.th_teams_size
.nteams
>= 1);
402 KMP_DEBUG_ASSERT(this_thr
->th
.th_teams_size
.nth
>= 1);
405 loc
, gtid
, fork_context_intel
, argc
,
406 VOLATILE_CAST(microtask_t
) __kmp_teams_master
, // "wrapped" task
407 VOLATILE_CAST(launch_t
) __kmp_invoke_teams_master
, kmp_va_addr_of(ap
));
408 __kmp_join_call(loc
, gtid
415 // Pop current CG root off list
416 KMP_DEBUG_ASSERT(this_thr
->th
.th_cg_roots
);
417 kmp_cg_root_t
*tmp
= this_thr
->th
.th_cg_roots
;
418 this_thr
->th
.th_cg_roots
= tmp
->up
;
419 KA_TRACE(100, ("__kmpc_fork_teams: Thread %p popping node %p and moving up"
420 " to node %p. cg_nthreads was %d\n",
421 this_thr
, tmp
, this_thr
->th
.th_cg_roots
, tmp
->cg_nthreads
));
422 KMP_DEBUG_ASSERT(tmp
->cg_nthreads
);
423 int i
= tmp
->cg_nthreads
--;
424 if (i
== 1) { // check is we are the last thread in CG (not always the case)
427 // Restore current task's thread_limit from CG root
428 KMP_DEBUG_ASSERT(this_thr
->th
.th_cg_roots
);
429 this_thr
->th
.th_current_task
->td_icvs
.thread_limit
=
430 this_thr
->th
.th_cg_roots
->cg_thread_limit
;
432 this_thr
->th
.th_teams_microtask
= NULL
;
433 this_thr
->th
.th_teams_level
= 0;
434 *(kmp_int64
*)(&this_thr
->th
.th_teams_size
) = 0L;
436 #if KMP_STATS_ENABLED
437 if (previous_state
== stats_state_e::SERIAL_REGION
) {
438 KMP_EXCHANGE_PARTITIONED_TIMER(OMP_serial
);
440 KMP_POP_PARTITIONED_TIMER();
442 #endif // KMP_STATS_ENABLED
445 // I don't think this function should ever have been exported.
446 // The __kmpc_ prefix was misapplied. I'm fairly certain that no generated
447 // openmp code ever called it, but it's been exported from the RTL for so
448 // long that I'm afraid to remove the definition.
449 int __kmpc_invoke_task_func(int gtid
) { return __kmp_invoke_task_func(gtid
); }
453 @param loc source location information
454 @param global_tid global thread number
456 Enter a serialized parallel construct. This interface is used to handle a
457 conditional parallel region, like this,
459 #pragma omp parallel if (condition)
461 when the condition is false.
463 void __kmpc_serialized_parallel(ident_t
*loc
, kmp_int32 global_tid
) {
464 // The implementation is now in kmp_runtime.cpp so that it can share static
465 // functions with kmp_fork_call since the tasks to be done are similar in
467 __kmp_assert_valid_gtid(global_tid
);
469 OMPT_STORE_RETURN_ADDRESS(global_tid
);
471 __kmp_serialized_parallel(loc
, global_tid
);
476 @param loc source location information
477 @param global_tid global thread number
479 Leave a serialized parallel construct.
481 void __kmpc_end_serialized_parallel(ident_t
*loc
, kmp_int32 global_tid
) {
482 kmp_internal_control_t
*top
;
483 kmp_info_t
*this_thr
;
484 kmp_team_t
*serial_team
;
487 ("__kmpc_end_serialized_parallel: called by T#%d\n", global_tid
));
489 /* skip all this code for autopar serialized loops since it results in
490 unacceptable overhead */
491 if (loc
!= NULL
&& (loc
->flags
& KMP_IDENT_AUTOPAR
))
495 __kmp_assert_valid_gtid(global_tid
);
496 if (!TCR_4(__kmp_init_parallel
))
497 __kmp_parallel_initialize();
499 __kmp_resume_if_soft_paused();
501 this_thr
= __kmp_threads
[global_tid
];
502 serial_team
= this_thr
->th
.th_serial_team
;
504 kmp_task_team_t
*task_team
= this_thr
->th
.th_task_team
;
505 // we need to wait for the proxy tasks before finishing the thread
506 if (task_team
!= NULL
&& task_team
->tt
.tt_found_proxy_tasks
)
507 __kmp_task_team_wait(this_thr
, serial_team
USE_ITT_BUILD_ARG(NULL
));
510 KMP_DEBUG_ASSERT(serial_team
);
511 KMP_ASSERT(serial_team
->t
.t_serialized
);
512 KMP_DEBUG_ASSERT(this_thr
->th
.th_team
== serial_team
);
513 KMP_DEBUG_ASSERT(serial_team
!= this_thr
->th
.th_root
->r
.r_root_team
);
514 KMP_DEBUG_ASSERT(serial_team
->t
.t_threads
);
515 KMP_DEBUG_ASSERT(serial_team
->t
.t_threads
[0] == this_thr
);
518 if (ompt_enabled
.enabled
&&
519 this_thr
->th
.ompt_thread_info
.state
!= ompt_state_overhead
) {
520 OMPT_CUR_TASK_INFO(this_thr
)->frame
.exit_frame
= ompt_data_none
;
521 if (ompt_enabled
.ompt_callback_implicit_task
) {
522 ompt_callbacks
.ompt_callback(ompt_callback_implicit_task
)(
523 ompt_scope_end
, NULL
, OMPT_CUR_TASK_DATA(this_thr
), 1,
524 OMPT_CUR_TASK_INFO(this_thr
)->thread_num
, ompt_task_implicit
);
527 // reset clear the task id only after unlinking the task
528 ompt_data_t
*parent_task_data
;
529 __ompt_get_task_info_internal(1, NULL
, &parent_task_data
, NULL
, NULL
, NULL
);
531 if (ompt_enabled
.ompt_callback_parallel_end
) {
532 ompt_callbacks
.ompt_callback(ompt_callback_parallel_end
)(
533 &(serial_team
->t
.ompt_team_info
.parallel_data
), parent_task_data
,
534 ompt_parallel_invoker_program
| ompt_parallel_team
,
535 OMPT_LOAD_RETURN_ADDRESS(global_tid
));
537 __ompt_lw_taskteam_unlink(this_thr
);
538 this_thr
->th
.ompt_thread_info
.state
= ompt_state_overhead
;
542 /* If necessary, pop the internal control stack values and replace the team
544 top
= serial_team
->t
.t_control_stack_top
;
545 if (top
&& top
->serial_nesting_level
== serial_team
->t
.t_serialized
) {
546 copy_icvs(&serial_team
->t
.t_threads
[0]->th
.th_current_task
->td_icvs
, top
);
547 serial_team
->t
.t_control_stack_top
= top
->next
;
551 // if( serial_team -> t.t_serialized > 1 )
552 serial_team
->t
.t_level
--;
554 /* pop dispatch buffers stack */
555 KMP_DEBUG_ASSERT(serial_team
->t
.t_dispatch
->th_disp_buffer
);
557 dispatch_private_info_t
*disp_buffer
=
558 serial_team
->t
.t_dispatch
->th_disp_buffer
;
559 serial_team
->t
.t_dispatch
->th_disp_buffer
=
560 serial_team
->t
.t_dispatch
->th_disp_buffer
->next
;
561 __kmp_free(disp_buffer
);
563 this_thr
->th
.th_def_allocator
= serial_team
->t
.t_def_allocator
; // restore
565 --serial_team
->t
.t_serialized
;
566 if (serial_team
->t
.t_serialized
== 0) {
568 /* return to the parallel section */
570 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
571 if (__kmp_inherit_fp_control
&& serial_team
->t
.t_fp_control_saved
) {
572 __kmp_clear_x87_fpu_status_word();
573 __kmp_load_x87_fpu_control_word(&serial_team
->t
.t_x87_fpu_control_word
);
574 __kmp_load_mxcsr(&serial_team
->t
.t_mxcsr
);
576 #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
578 this_thr
->th
.th_team
= serial_team
->t
.t_parent
;
579 this_thr
->th
.th_info
.ds
.ds_tid
= serial_team
->t
.t_master_tid
;
581 /* restore values cached in the thread */
582 this_thr
->th
.th_team_nproc
= serial_team
->t
.t_parent
->t
.t_nproc
; /* JPH */
583 this_thr
->th
.th_team_master
=
584 serial_team
->t
.t_parent
->t
.t_threads
[0]; /* JPH */
585 this_thr
->th
.th_team_serialized
= this_thr
->th
.th_team
->t
.t_serialized
;
587 /* TODO the below shouldn't need to be adjusted for serialized teams */
588 this_thr
->th
.th_dispatch
=
589 &this_thr
->th
.th_team
->t
.t_dispatch
[serial_team
->t
.t_master_tid
];
591 __kmp_pop_current_task_from_thread(this_thr
);
593 KMP_ASSERT(this_thr
->th
.th_current_task
->td_flags
.executing
== 0);
594 this_thr
->th
.th_current_task
->td_flags
.executing
= 1;
596 if (__kmp_tasking_mode
!= tskm_immediate_exec
) {
597 // Copy the task team from the new child / old parent team to the thread.
598 this_thr
->th
.th_task_team
=
599 this_thr
->th
.th_team
->t
.t_task_team
[this_thr
->th
.th_task_state
];
601 ("__kmpc_end_serialized_parallel: T#%d restoring task_team %p / "
603 global_tid
, this_thr
->th
.th_task_team
, this_thr
->th
.th_team
));
606 if (__kmp_tasking_mode
!= tskm_immediate_exec
) {
607 KA_TRACE(20, ("__kmpc_end_serialized_parallel: T#%d decreasing nesting "
608 "depth of serial team %p to %d\n",
609 global_tid
, serial_team
, serial_team
->t
.t_serialized
));
613 if (__kmp_env_consistency_check
)
614 __kmp_pop_parallel(global_tid
, NULL
);
616 if (ompt_enabled
.enabled
)
617 this_thr
->th
.ompt_thread_info
.state
=
618 ((this_thr
->th
.th_team_serialized
) ? ompt_state_work_serial
619 : ompt_state_work_parallel
);
624 @ingroup SYNCHRONIZATION
625 @param loc source location information.
627 Execute <tt>flush</tt>. This is implemented as a full memory fence. (Though
628 depending on the memory ordering convention obeyed by the compiler
629 even that may not be necessary).
631 void __kmpc_flush(ident_t
*loc
) {
632 KC_TRACE(10, ("__kmpc_flush: called\n"));
634 /* need explicit __mf() here since use volatile instead in library */
635 KMP_MB(); /* Flush all pending memory write invalidates. */
637 #if (KMP_ARCH_X86 || KMP_ARCH_X86_64)
639 // fence-style instructions do not exist, but lock; xaddl $0,(%rsp) can be used.
640 // We shouldn't need it, though, since the ABI rules require that
641 // * If the compiler generates NGO stores it also generates the fence
642 // * If users hand-code NGO stores they should insert the fence
643 // therefore no incomplete unordered stores should be visible.
646 // This is to address non-temporal store instructions (sfence needed).
647 // The clflush instruction is addressed either (mfence needed).
648 // Probably the non-temporal load monvtdqa instruction should also be
650 // mfence is a SSE2 instruction. Do not execute it if CPU is not SSE2.
651 if (!__kmp_cpuinfo
.initialized
) {
652 __kmp_query_cpuid(&__kmp_cpuinfo
);
654 if (!__kmp_cpuinfo
.sse2
) {
655 // CPU cannot execute SSE2 instructions.
659 #elif KMP_COMPILER_MSVC
662 __sync_synchronize();
663 #endif // KMP_COMPILER_ICC
666 #elif (KMP_ARCH_ARM || KMP_ARCH_AARCH64 || KMP_ARCH_MIPS || KMP_ARCH_MIPS64 || \
668 // Nothing to see here move along
670 // Nothing needed here (we have a real MB above).
672 #error Unknown or unsupported architecture
675 #if OMPT_SUPPORT && OMPT_OPTIONAL
676 if (ompt_enabled
.ompt_callback_flush
) {
677 ompt_callbacks
.ompt_callback(ompt_callback_flush
)(
678 __ompt_get_thread_data_internal(), OMPT_GET_RETURN_ADDRESS(0));
683 /* -------------------------------------------------------------------------- */
685 @ingroup SYNCHRONIZATION
686 @param loc source location information
687 @param global_tid thread id.
691 void __kmpc_barrier(ident_t
*loc
, kmp_int32 global_tid
) {
692 KMP_COUNT_BLOCK(OMP_BARRIER
);
693 KC_TRACE(10, ("__kmpc_barrier: called T#%d\n", global_tid
));
694 __kmp_assert_valid_gtid(global_tid
);
696 if (!TCR_4(__kmp_init_parallel
))
697 __kmp_parallel_initialize();
699 __kmp_resume_if_soft_paused();
701 if (__kmp_env_consistency_check
) {
703 KMP_WARNING(ConstructIdentInvalid
); // ??? What does it mean for the user?
705 __kmp_check_barrier(global_tid
, ct_barrier
, loc
);
709 ompt_frame_t
*ompt_frame
;
710 if (ompt_enabled
.enabled
) {
711 __ompt_get_task_info_internal(0, NULL
, NULL
, &ompt_frame
, NULL
, NULL
);
712 if (ompt_frame
->enter_frame
.ptr
== NULL
)
713 ompt_frame
->enter_frame
.ptr
= OMPT_GET_FRAME_ADDRESS(0);
714 OMPT_STORE_RETURN_ADDRESS(global_tid
);
717 __kmp_threads
[global_tid
]->th
.th_ident
= loc
;
718 // TODO: explicit barrier_wait_id:
719 // this function is called when 'barrier' directive is present or
720 // implicit barrier at the end of a worksharing construct.
721 // 1) better to add a per-thread barrier counter to a thread data structure
722 // 2) set to 0 when a new team is created
723 // 4) no sync is required
725 __kmp_barrier(bs_plain_barrier
, global_tid
, FALSE
, 0, NULL
, NULL
);
726 #if OMPT_SUPPORT && OMPT_OPTIONAL
727 if (ompt_enabled
.enabled
) {
728 ompt_frame
->enter_frame
= ompt_data_none
;
733 /* The BARRIER for a MASTER section is always explicit */
735 @ingroup WORK_SHARING
736 @param loc source location information.
737 @param global_tid global thread number .
738 @return 1 if this thread should execute the <tt>master</tt> block, 0 otherwise.
740 kmp_int32
__kmpc_master(ident_t
*loc
, kmp_int32 global_tid
) {
743 KC_TRACE(10, ("__kmpc_master: called T#%d\n", global_tid
));
744 __kmp_assert_valid_gtid(global_tid
);
746 if (!TCR_4(__kmp_init_parallel
))
747 __kmp_parallel_initialize();
749 __kmp_resume_if_soft_paused();
751 if (KMP_MASTER_GTID(global_tid
)) {
752 KMP_COUNT_BLOCK(OMP_MASTER
);
753 KMP_PUSH_PARTITIONED_TIMER(OMP_master
);
757 #if OMPT_SUPPORT && OMPT_OPTIONAL
759 if (ompt_enabled
.ompt_callback_master
) {
760 kmp_info_t
*this_thr
= __kmp_threads
[global_tid
];
761 kmp_team_t
*team
= this_thr
->th
.th_team
;
763 int tid
= __kmp_tid_from_gtid(global_tid
);
764 ompt_callbacks
.ompt_callback(ompt_callback_master
)(
765 ompt_scope_begin
, &(team
->t
.ompt_team_info
.parallel_data
),
766 &(team
->t
.t_implicit_task_taskdata
[tid
].ompt_task_info
.task_data
),
767 OMPT_GET_RETURN_ADDRESS(0));
772 if (__kmp_env_consistency_check
) {
773 #if KMP_USE_DYNAMIC_LOCK
775 __kmp_push_sync(global_tid
, ct_master
, loc
, NULL
, 0);
777 __kmp_check_sync(global_tid
, ct_master
, loc
, NULL
, 0);
780 __kmp_push_sync(global_tid
, ct_master
, loc
, NULL
);
782 __kmp_check_sync(global_tid
, ct_master
, loc
, NULL
);
790 @ingroup WORK_SHARING
791 @param loc source location information.
792 @param global_tid global thread number .
794 Mark the end of a <tt>master</tt> region. This should only be called by the
795 thread that executes the <tt>master</tt> region.
797 void __kmpc_end_master(ident_t
*loc
, kmp_int32 global_tid
) {
798 KC_TRACE(10, ("__kmpc_end_master: called T#%d\n", global_tid
));
799 __kmp_assert_valid_gtid(global_tid
);
800 KMP_DEBUG_ASSERT(KMP_MASTER_GTID(global_tid
));
801 KMP_POP_PARTITIONED_TIMER();
803 #if OMPT_SUPPORT && OMPT_OPTIONAL
804 kmp_info_t
*this_thr
= __kmp_threads
[global_tid
];
805 kmp_team_t
*team
= this_thr
->th
.th_team
;
806 if (ompt_enabled
.ompt_callback_master
) {
807 int tid
= __kmp_tid_from_gtid(global_tid
);
808 ompt_callbacks
.ompt_callback(ompt_callback_master
)(
809 ompt_scope_end
, &(team
->t
.ompt_team_info
.parallel_data
),
810 &(team
->t
.t_implicit_task_taskdata
[tid
].ompt_task_info
.task_data
),
811 OMPT_GET_RETURN_ADDRESS(0));
815 if (__kmp_env_consistency_check
) {
816 if (KMP_MASTER_GTID(global_tid
))
817 __kmp_pop_sync(global_tid
, ct_master
, loc
);
822 @ingroup WORK_SHARING
823 @param loc source location information.
824 @param gtid global thread number.
826 Start execution of an <tt>ordered</tt> construct.
828 void __kmpc_ordered(ident_t
*loc
, kmp_int32 gtid
) {
831 KMP_DEBUG_ASSERT(__kmp_init_serial
);
833 KC_TRACE(10, ("__kmpc_ordered: called T#%d\n", gtid
));
834 __kmp_assert_valid_gtid(gtid
);
836 if (!TCR_4(__kmp_init_parallel
))
837 __kmp_parallel_initialize();
839 __kmp_resume_if_soft_paused();
842 __kmp_itt_ordered_prep(gtid
);
843 // TODO: ordered_wait_id
844 #endif /* USE_ITT_BUILD */
846 th
= __kmp_threads
[gtid
];
848 #if OMPT_SUPPORT && OMPT_OPTIONAL
852 if (ompt_enabled
.enabled
) {
853 OMPT_STORE_RETURN_ADDRESS(gtid
);
854 team
= __kmp_team_from_gtid(gtid
);
855 lck
= (ompt_wait_id_t
)(uintptr_t)&team
->t
.t_ordered
.dt
.t_value
;
856 /* OMPT state update */
857 th
->th
.ompt_thread_info
.wait_id
= lck
;
858 th
->th
.ompt_thread_info
.state
= ompt_state_wait_ordered
;
860 /* OMPT event callback */
861 codeptr_ra
= OMPT_LOAD_RETURN_ADDRESS(gtid
);
862 if (ompt_enabled
.ompt_callback_mutex_acquire
) {
863 ompt_callbacks
.ompt_callback(ompt_callback_mutex_acquire
)(
864 ompt_mutex_ordered
, omp_lock_hint_none
, kmp_mutex_impl_spin
, lck
,
870 if (th
->th
.th_dispatch
->th_deo_fcn
!= 0)
871 (*th
->th
.th_dispatch
->th_deo_fcn
)(>id
, &cid
, loc
);
873 __kmp_parallel_deo(>id
, &cid
, loc
);
875 #if OMPT_SUPPORT && OMPT_OPTIONAL
876 if (ompt_enabled
.enabled
) {
877 /* OMPT state update */
878 th
->th
.ompt_thread_info
.state
= ompt_state_work_parallel
;
879 th
->th
.ompt_thread_info
.wait_id
= 0;
881 /* OMPT event callback */
882 if (ompt_enabled
.ompt_callback_mutex_acquired
) {
883 ompt_callbacks
.ompt_callback(ompt_callback_mutex_acquired
)(
884 ompt_mutex_ordered
, (ompt_wait_id_t
)(uintptr_t)lck
, codeptr_ra
);
890 __kmp_itt_ordered_start(gtid
);
891 #endif /* USE_ITT_BUILD */
895 @ingroup WORK_SHARING
896 @param loc source location information.
897 @param gtid global thread number.
899 End execution of an <tt>ordered</tt> construct.
901 void __kmpc_end_ordered(ident_t
*loc
, kmp_int32 gtid
) {
905 KC_TRACE(10, ("__kmpc_end_ordered: called T#%d\n", gtid
));
906 __kmp_assert_valid_gtid(gtid
);
909 __kmp_itt_ordered_end(gtid
);
910 // TODO: ordered_wait_id
911 #endif /* USE_ITT_BUILD */
913 th
= __kmp_threads
[gtid
];
915 if (th
->th
.th_dispatch
->th_dxo_fcn
!= 0)
916 (*th
->th
.th_dispatch
->th_dxo_fcn
)(>id
, &cid
, loc
);
918 __kmp_parallel_dxo(>id
, &cid
, loc
);
920 #if OMPT_SUPPORT && OMPT_OPTIONAL
921 OMPT_STORE_RETURN_ADDRESS(gtid
);
922 if (ompt_enabled
.ompt_callback_mutex_released
) {
923 ompt_callbacks
.ompt_callback(ompt_callback_mutex_released
)(
925 (ompt_wait_id_t
)(uintptr_t)&__kmp_team_from_gtid(gtid
)
926 ->t
.t_ordered
.dt
.t_value
,
927 OMPT_LOAD_RETURN_ADDRESS(gtid
));
932 #if KMP_USE_DYNAMIC_LOCK
934 static __forceinline
void
935 __kmp_init_indirect_csptr(kmp_critical_name
*crit
, ident_t
const *loc
,
936 kmp_int32 gtid
, kmp_indirect_locktag_t tag
) {
937 // Pointer to the allocated indirect lock is written to crit, while indexing
940 kmp_indirect_lock_t
**lck
;
941 lck
= (kmp_indirect_lock_t
**)crit
;
942 kmp_indirect_lock_t
*ilk
= __kmp_allocate_indirect_lock(&idx
, gtid
, tag
);
943 KMP_I_LOCK_FUNC(ilk
, init
)(ilk
->lock
);
944 KMP_SET_I_LOCK_LOCATION(ilk
, loc
);
945 KMP_SET_I_LOCK_FLAGS(ilk
, kmp_lf_critical_section
);
947 ("__kmp_init_indirect_csptr: initialized indirect lock #%d\n", tag
));
949 __kmp_itt_critical_creating(ilk
->lock
, loc
);
951 int status
= KMP_COMPARE_AND_STORE_PTR(lck
, nullptr, ilk
);
954 __kmp_itt_critical_destroyed(ilk
->lock
);
956 // We don't really need to destroy the unclaimed lock here since it will be
957 // cleaned up at program exit.
958 // KMP_D_LOCK_FUNC(&idx, destroy)((kmp_dyna_lock_t *)&idx);
960 KMP_DEBUG_ASSERT(*lck
!= NULL
);
963 // Fast-path acquire tas lock
964 #define KMP_ACQUIRE_TAS_LOCK(lock, gtid) \
966 kmp_tas_lock_t *l = (kmp_tas_lock_t *)lock; \
967 kmp_int32 tas_free = KMP_LOCK_FREE(tas); \
968 kmp_int32 tas_busy = KMP_LOCK_BUSY(gtid + 1, tas); \
969 if (KMP_ATOMIC_LD_RLX(&l->lk.poll) != tas_free || \
970 !__kmp_atomic_compare_store_acq(&l->lk.poll, tas_free, tas_busy)) { \
972 KMP_FSYNC_PREPARE(l); \
973 KMP_INIT_YIELD(spins); \
974 kmp_backoff_t backoff = __kmp_spin_backoff_params; \
976 if (TCR_4(__kmp_nth) > \
977 (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc)) { \
980 KMP_YIELD_SPIN(spins); \
982 __kmp_spin_backoff(&backoff); \
984 KMP_ATOMIC_LD_RLX(&l->lk.poll) != tas_free || \
985 !__kmp_atomic_compare_store_acq(&l->lk.poll, tas_free, tas_busy)); \
987 KMP_FSYNC_ACQUIRED(l); \
990 // Fast-path test tas lock
991 #define KMP_TEST_TAS_LOCK(lock, gtid, rc) \
993 kmp_tas_lock_t *l = (kmp_tas_lock_t *)lock; \
994 kmp_int32 tas_free = KMP_LOCK_FREE(tas); \
995 kmp_int32 tas_busy = KMP_LOCK_BUSY(gtid + 1, tas); \
996 rc = KMP_ATOMIC_LD_RLX(&l->lk.poll) == tas_free && \
997 __kmp_atomic_compare_store_acq(&l->lk.poll, tas_free, tas_busy); \
1000 // Fast-path release tas lock
1001 #define KMP_RELEASE_TAS_LOCK(lock, gtid) \
1002 { KMP_ATOMIC_ST_REL(&((kmp_tas_lock_t *)lock)->lk.poll, KMP_LOCK_FREE(tas)); }
1006 #include <sys/syscall.h>
1009 #define FUTEX_WAIT 0
1012 #define FUTEX_WAKE 1
1015 // Fast-path acquire futex lock
1016 #define KMP_ACQUIRE_FUTEX_LOCK(lock, gtid) \
1018 kmp_futex_lock_t *ftx = (kmp_futex_lock_t *)lock; \
1019 kmp_int32 gtid_code = (gtid + 1) << 1; \
1021 KMP_FSYNC_PREPARE(ftx); \
1022 kmp_int32 poll_val; \
1023 while ((poll_val = KMP_COMPARE_AND_STORE_RET32( \
1024 &(ftx->lk.poll), KMP_LOCK_FREE(futex), \
1025 KMP_LOCK_BUSY(gtid_code, futex))) != KMP_LOCK_FREE(futex)) { \
1026 kmp_int32 cond = KMP_LOCK_STRIP(poll_val) & 1; \
1028 if (!KMP_COMPARE_AND_STORE_RET32(&(ftx->lk.poll), poll_val, \
1030 KMP_LOCK_BUSY(1, futex))) { \
1033 poll_val |= KMP_LOCK_BUSY(1, futex); \
1036 if ((rc = syscall(__NR_futex, &(ftx->lk.poll), FUTEX_WAIT, poll_val, \
1037 NULL, NULL, 0)) != 0) { \
1042 KMP_FSYNC_ACQUIRED(ftx); \
1045 // Fast-path test futex lock
1046 #define KMP_TEST_FUTEX_LOCK(lock, gtid, rc) \
1048 kmp_futex_lock_t *ftx = (kmp_futex_lock_t *)lock; \
1049 if (KMP_COMPARE_AND_STORE_ACQ32(&(ftx->lk.poll), KMP_LOCK_FREE(futex), \
1050 KMP_LOCK_BUSY(gtid + 1 << 1, futex))) { \
1051 KMP_FSYNC_ACQUIRED(ftx); \
1058 // Fast-path release futex lock
1059 #define KMP_RELEASE_FUTEX_LOCK(lock, gtid) \
1061 kmp_futex_lock_t *ftx = (kmp_futex_lock_t *)lock; \
1063 KMP_FSYNC_RELEASING(ftx); \
1064 kmp_int32 poll_val = \
1065 KMP_XCHG_FIXED32(&(ftx->lk.poll), KMP_LOCK_FREE(futex)); \
1066 if (KMP_LOCK_STRIP(poll_val) & 1) { \
1067 syscall(__NR_futex, &(ftx->lk.poll), FUTEX_WAKE, \
1068 KMP_LOCK_BUSY(1, futex), NULL, NULL, 0); \
1071 KMP_YIELD_OVERSUB(); \
1074 #endif // KMP_USE_FUTEX
1076 #else // KMP_USE_DYNAMIC_LOCK
1078 static kmp_user_lock_p
__kmp_get_critical_section_ptr(kmp_critical_name
*crit
,
1081 kmp_user_lock_p
*lck_pp
= (kmp_user_lock_p
*)crit
;
1083 // Because of the double-check, the following load doesn't need to be volatile
1084 kmp_user_lock_p lck
= (kmp_user_lock_p
)TCR_PTR(*lck_pp
);
1089 // Allocate & initialize the lock.
1090 // Remember alloc'ed locks in table in order to free them in __kmp_cleanup()
1091 lck
= __kmp_user_lock_allocate(&idx
, gtid
, kmp_lf_critical_section
);
1092 __kmp_init_user_lock_with_checks(lck
);
1093 __kmp_set_user_lock_location(lck
, loc
);
1095 __kmp_itt_critical_creating(lck
);
1096 // __kmp_itt_critical_creating() should be called *before* the first usage
1097 // of underlying lock. It is the only place where we can guarantee it. There
1098 // are chances the lock will destroyed with no usage, but it is not a
1099 // problem, because this is not real event seen by user but rather setting
1100 // name for object (lock). See more details in kmp_itt.h.
1101 #endif /* USE_ITT_BUILD */
1103 // Use a cmpxchg instruction to slam the start of the critical section with
1104 // the lock pointer. If another thread beat us to it, deallocate the lock,
1105 // and use the lock that the other thread allocated.
1106 int status
= KMP_COMPARE_AND_STORE_PTR(lck_pp
, 0, lck
);
1109 // Deallocate the lock and reload the value.
1111 __kmp_itt_critical_destroyed(lck
);
1112 // Let ITT know the lock is destroyed and the same memory location may be reused
1113 // for another purpose.
1114 #endif /* USE_ITT_BUILD */
1115 __kmp_destroy_user_lock_with_checks(lck
);
1116 __kmp_user_lock_free(&idx
, gtid
, lck
);
1117 lck
= (kmp_user_lock_p
)TCR_PTR(*lck_pp
);
1118 KMP_DEBUG_ASSERT(lck
!= NULL
);
1124 #endif // KMP_USE_DYNAMIC_LOCK
1127 @ingroup WORK_SHARING
1128 @param loc source location information.
1129 @param global_tid global thread number.
1130 @param crit identity of the critical section. This could be a pointer to a lock
1131 associated with the critical section, or some other suitably unique value.
1133 Enter code protected by a `critical` construct.
1134 This function blocks until the executing thread can enter the critical section.
1136 void __kmpc_critical(ident_t
*loc
, kmp_int32 global_tid
,
1137 kmp_critical_name
*crit
) {
1138 #if KMP_USE_DYNAMIC_LOCK
1139 #if OMPT_SUPPORT && OMPT_OPTIONAL
1140 OMPT_STORE_RETURN_ADDRESS(global_tid
);
1141 #endif // OMPT_SUPPORT
1142 __kmpc_critical_with_hint(loc
, global_tid
, crit
, omp_lock_hint_none
);
1144 KMP_COUNT_BLOCK(OMP_CRITICAL
);
1145 #if OMPT_SUPPORT && OMPT_OPTIONAL
1146 ompt_state_t prev_state
= ompt_state_undefined
;
1147 ompt_thread_info_t ti
;
1149 kmp_user_lock_p lck
;
1151 KC_TRACE(10, ("__kmpc_critical: called T#%d\n", global_tid
));
1152 __kmp_assert_valid_gtid(global_tid
);
1154 // TODO: add THR_OVHD_STATE
1156 KMP_PUSH_PARTITIONED_TIMER(OMP_critical_wait
);
1157 KMP_CHECK_USER_LOCK_INIT();
1159 if ((__kmp_user_lock_kind
== lk_tas
) &&
1160 (sizeof(lck
->tas
.lk
.poll
) <= OMP_CRITICAL_SIZE
)) {
1161 lck
= (kmp_user_lock_p
)crit
;
1164 else if ((__kmp_user_lock_kind
== lk_futex
) &&
1165 (sizeof(lck
->futex
.lk
.poll
) <= OMP_CRITICAL_SIZE
)) {
1166 lck
= (kmp_user_lock_p
)crit
;
1169 else { // ticket, queuing or drdpa
1170 lck
= __kmp_get_critical_section_ptr(crit
, loc
, global_tid
);
1173 if (__kmp_env_consistency_check
)
1174 __kmp_push_sync(global_tid
, ct_critical
, loc
, lck
);
1176 // since the critical directive binds to all threads, not just the current
1177 // team we have to check this even if we are in a serialized team.
1178 // also, even if we are the uber thread, we still have to conduct the lock,
1179 // as we have to contend with sibling threads.
1182 __kmp_itt_critical_acquiring(lck
);
1183 #endif /* USE_ITT_BUILD */
1184 #if OMPT_SUPPORT && OMPT_OPTIONAL
1185 OMPT_STORE_RETURN_ADDRESS(gtid
);
1186 void *codeptr_ra
= NULL
;
1187 if (ompt_enabled
.enabled
) {
1188 ti
= __kmp_threads
[global_tid
]->th
.ompt_thread_info
;
1189 /* OMPT state update */
1190 prev_state
= ti
.state
;
1191 ti
.wait_id
= (ompt_wait_id_t
)(uintptr_t)lck
;
1192 ti
.state
= ompt_state_wait_critical
;
1194 /* OMPT event callback */
1195 codeptr_ra
= OMPT_LOAD_RETURN_ADDRESS(gtid
);
1196 if (ompt_enabled
.ompt_callback_mutex_acquire
) {
1197 ompt_callbacks
.ompt_callback(ompt_callback_mutex_acquire
)(
1198 ompt_mutex_critical
, omp_lock_hint_none
, __ompt_get_mutex_impl_type(),
1199 (ompt_wait_id_t
)(uintptr_t)lck
, codeptr_ra
);
1203 // Value of 'crit' should be good for using as a critical_id of the critical
1204 // section directive.
1205 __kmp_acquire_user_lock_with_checks(lck
, global_tid
);
1208 __kmp_itt_critical_acquired(lck
);
1209 #endif /* USE_ITT_BUILD */
1210 #if OMPT_SUPPORT && OMPT_OPTIONAL
1211 if (ompt_enabled
.enabled
) {
1212 /* OMPT state update */
1213 ti
.state
= prev_state
;
1216 /* OMPT event callback */
1217 if (ompt_enabled
.ompt_callback_mutex_acquired
) {
1218 ompt_callbacks
.ompt_callback(ompt_callback_mutex_acquired
)(
1219 ompt_mutex_critical
, (ompt_wait_id_t
)(uintptr_t)lck
, codeptr_ra
);
1223 KMP_POP_PARTITIONED_TIMER();
1225 KMP_PUSH_PARTITIONED_TIMER(OMP_critical
);
1226 KA_TRACE(15, ("__kmpc_critical: done T#%d\n", global_tid
));
1227 #endif // KMP_USE_DYNAMIC_LOCK
1230 #if KMP_USE_DYNAMIC_LOCK
1232 // Converts the given hint to an internal lock implementation
1233 static __forceinline kmp_dyna_lockseq_t
__kmp_map_hint_to_lock(uintptr_t hint
) {
1235 #define KMP_TSX_LOCK(seq) lockseq_##seq
1237 #define KMP_TSX_LOCK(seq) __kmp_user_lock_seq
1240 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
1241 #define KMP_CPUINFO_RTM (__kmp_cpuinfo.rtm)
1243 #define KMP_CPUINFO_RTM 0
1246 // Hints that do not require further logic
1247 if (hint
& kmp_lock_hint_hle
)
1248 return KMP_TSX_LOCK(hle
);
1249 if (hint
& kmp_lock_hint_rtm
)
1250 return KMP_CPUINFO_RTM
? KMP_TSX_LOCK(rtm
) : __kmp_user_lock_seq
;
1251 if (hint
& kmp_lock_hint_adaptive
)
1252 return KMP_CPUINFO_RTM
? KMP_TSX_LOCK(adaptive
) : __kmp_user_lock_seq
;
1254 // Rule out conflicting hints first by returning the default lock
1255 if ((hint
& omp_lock_hint_contended
) && (hint
& omp_lock_hint_uncontended
))
1256 return __kmp_user_lock_seq
;
1257 if ((hint
& omp_lock_hint_speculative
) &&
1258 (hint
& omp_lock_hint_nonspeculative
))
1259 return __kmp_user_lock_seq
;
1261 // Do not even consider speculation when it appears to be contended
1262 if (hint
& omp_lock_hint_contended
)
1263 return lockseq_queuing
;
1265 // Uncontended lock without speculation
1266 if ((hint
& omp_lock_hint_uncontended
) && !(hint
& omp_lock_hint_speculative
))
1269 // HLE lock for speculation
1270 if (hint
& omp_lock_hint_speculative
)
1271 return KMP_TSX_LOCK(hle
);
1273 return __kmp_user_lock_seq
;
1276 #if OMPT_SUPPORT && OMPT_OPTIONAL
1277 #if KMP_USE_DYNAMIC_LOCK
1278 static kmp_mutex_impl_t
1279 __ompt_get_mutex_impl_type(void *user_lock
, kmp_indirect_lock_t
*ilock
= 0) {
1281 switch (KMP_EXTRACT_D_TAG(user_lock
)) {
1286 return kmp_mutex_impl_queuing
;
1289 return kmp_mutex_impl_spin
;
1292 return kmp_mutex_impl_speculative
;
1295 return kmp_mutex_impl_none
;
1297 ilock
= KMP_LOOKUP_I_LOCK(user_lock
);
1300 switch (ilock
->type
) {
1302 case locktag_adaptive
:
1304 return kmp_mutex_impl_speculative
;
1306 case locktag_nested_tas
:
1307 return kmp_mutex_impl_spin
;
1309 case locktag_nested_futex
:
1311 case locktag_ticket
:
1312 case locktag_queuing
:
1314 case locktag_nested_ticket
:
1315 case locktag_nested_queuing
:
1316 case locktag_nested_drdpa
:
1317 return kmp_mutex_impl_queuing
;
1319 return kmp_mutex_impl_none
;
1323 // For locks without dynamic binding
1324 static kmp_mutex_impl_t
__ompt_get_mutex_impl_type() {
1325 switch (__kmp_user_lock_kind
) {
1327 return kmp_mutex_impl_spin
;
1334 return kmp_mutex_impl_queuing
;
1339 return kmp_mutex_impl_speculative
;
1342 return kmp_mutex_impl_none
;
1345 #endif // KMP_USE_DYNAMIC_LOCK
1346 #endif // OMPT_SUPPORT && OMPT_OPTIONAL
1349 @ingroup WORK_SHARING
1350 @param loc source location information.
1351 @param global_tid global thread number.
1352 @param crit identity of the critical section. This could be a pointer to a lock
1353 associated with the critical section, or some other suitably unique value.
1354 @param hint the lock hint.
1356 Enter code protected by a `critical` construct with a hint. The hint value is
1357 used to suggest a lock implementation. This function blocks until the executing
1358 thread can enter the critical section unless the hint suggests use of
1359 speculative execution and the hardware supports it.
1361 void __kmpc_critical_with_hint(ident_t
*loc
, kmp_int32 global_tid
,
1362 kmp_critical_name
*crit
, uint32_t hint
) {
1363 KMP_COUNT_BLOCK(OMP_CRITICAL
);
1364 kmp_user_lock_p lck
;
1365 #if OMPT_SUPPORT && OMPT_OPTIONAL
1366 ompt_state_t prev_state
= ompt_state_undefined
;
1367 ompt_thread_info_t ti
;
1368 // This is the case, if called from __kmpc_critical:
1369 void *codeptr
= OMPT_LOAD_RETURN_ADDRESS(global_tid
);
1371 codeptr
= OMPT_GET_RETURN_ADDRESS(0);
1374 KC_TRACE(10, ("__kmpc_critical: called T#%d\n", global_tid
));
1375 __kmp_assert_valid_gtid(global_tid
);
1377 kmp_dyna_lock_t
*lk
= (kmp_dyna_lock_t
*)crit
;
1378 // Check if it is initialized.
1379 KMP_PUSH_PARTITIONED_TIMER(OMP_critical_wait
);
1381 kmp_dyna_lockseq_t lckseq
= __kmp_map_hint_to_lock(hint
);
1382 if (KMP_IS_D_LOCK(lckseq
)) {
1383 KMP_COMPARE_AND_STORE_ACQ32((volatile kmp_int32
*)crit
, 0,
1384 KMP_GET_D_TAG(lckseq
));
1386 __kmp_init_indirect_csptr(crit
, loc
, global_tid
, KMP_GET_I_TAG(lckseq
));
1389 // Branch for accessing the actual lock object and set operation. This
1390 // branching is inevitable since this lock initialization does not follow the
1391 // normal dispatch path (lock table is not used).
1392 if (KMP_EXTRACT_D_TAG(lk
) != 0) {
1393 lck
= (kmp_user_lock_p
)lk
;
1394 if (__kmp_env_consistency_check
) {
1395 __kmp_push_sync(global_tid
, ct_critical
, loc
, lck
,
1396 __kmp_map_hint_to_lock(hint
));
1399 __kmp_itt_critical_acquiring(lck
);
1401 #if OMPT_SUPPORT && OMPT_OPTIONAL
1402 if (ompt_enabled
.enabled
) {
1403 ti
= __kmp_threads
[global_tid
]->th
.ompt_thread_info
;
1404 /* OMPT state update */
1405 prev_state
= ti
.state
;
1406 ti
.wait_id
= (ompt_wait_id_t
)(uintptr_t)lck
;
1407 ti
.state
= ompt_state_wait_critical
;
1409 /* OMPT event callback */
1410 if (ompt_enabled
.ompt_callback_mutex_acquire
) {
1411 ompt_callbacks
.ompt_callback(ompt_callback_mutex_acquire
)(
1412 ompt_mutex_critical
, (unsigned int)hint
,
1413 __ompt_get_mutex_impl_type(crit
), (ompt_wait_id_t
)(uintptr_t)lck
,
1418 #if KMP_USE_INLINED_TAS
1419 if (__kmp_user_lock_seq
== lockseq_tas
&& !__kmp_env_consistency_check
) {
1420 KMP_ACQUIRE_TAS_LOCK(lck
, global_tid
);
1422 #elif KMP_USE_INLINED_FUTEX
1423 if (__kmp_user_lock_seq
== lockseq_futex
&& !__kmp_env_consistency_check
) {
1424 KMP_ACQUIRE_FUTEX_LOCK(lck
, global_tid
);
1428 KMP_D_LOCK_FUNC(lk
, set
)(lk
, global_tid
);
1431 kmp_indirect_lock_t
*ilk
= *((kmp_indirect_lock_t
**)lk
);
1433 if (__kmp_env_consistency_check
) {
1434 __kmp_push_sync(global_tid
, ct_critical
, loc
, lck
,
1435 __kmp_map_hint_to_lock(hint
));
1438 __kmp_itt_critical_acquiring(lck
);
1440 #if OMPT_SUPPORT && OMPT_OPTIONAL
1441 if (ompt_enabled
.enabled
) {
1442 ti
= __kmp_threads
[global_tid
]->th
.ompt_thread_info
;
1443 /* OMPT state update */
1444 prev_state
= ti
.state
;
1445 ti
.wait_id
= (ompt_wait_id_t
)(uintptr_t)lck
;
1446 ti
.state
= ompt_state_wait_critical
;
1448 /* OMPT event callback */
1449 if (ompt_enabled
.ompt_callback_mutex_acquire
) {
1450 ompt_callbacks
.ompt_callback(ompt_callback_mutex_acquire
)(
1451 ompt_mutex_critical
, (unsigned int)hint
,
1452 __ompt_get_mutex_impl_type(0, ilk
), (ompt_wait_id_t
)(uintptr_t)lck
,
1457 KMP_I_LOCK_FUNC(ilk
, set
)(lck
, global_tid
);
1459 KMP_POP_PARTITIONED_TIMER();
1462 __kmp_itt_critical_acquired(lck
);
1463 #endif /* USE_ITT_BUILD */
1464 #if OMPT_SUPPORT && OMPT_OPTIONAL
1465 if (ompt_enabled
.enabled
) {
1466 /* OMPT state update */
1467 ti
.state
= prev_state
;
1470 /* OMPT event callback */
1471 if (ompt_enabled
.ompt_callback_mutex_acquired
) {
1472 ompt_callbacks
.ompt_callback(ompt_callback_mutex_acquired
)(
1473 ompt_mutex_critical
, (ompt_wait_id_t
)(uintptr_t)lck
, codeptr
);
1478 KMP_PUSH_PARTITIONED_TIMER(OMP_critical
);
1479 KA_TRACE(15, ("__kmpc_critical: done T#%d\n", global_tid
));
1480 } // __kmpc_critical_with_hint
1482 #endif // KMP_USE_DYNAMIC_LOCK
1485 @ingroup WORK_SHARING
1486 @param loc source location information.
1487 @param global_tid global thread number .
1488 @param crit identity of the critical section. This could be a pointer to a lock
1489 associated with the critical section, or some other suitably unique value.
1491 Leave a critical section, releasing any lock that was held during its execution.
1493 void __kmpc_end_critical(ident_t
*loc
, kmp_int32 global_tid
,
1494 kmp_critical_name
*crit
) {
1495 kmp_user_lock_p lck
;
1497 KC_TRACE(10, ("__kmpc_end_critical: called T#%d\n", global_tid
));
1499 #if KMP_USE_DYNAMIC_LOCK
1500 if (KMP_IS_D_LOCK(__kmp_user_lock_seq
)) {
1501 lck
= (kmp_user_lock_p
)crit
;
1502 KMP_ASSERT(lck
!= NULL
);
1503 if (__kmp_env_consistency_check
) {
1504 __kmp_pop_sync(global_tid
, ct_critical
, loc
);
1507 __kmp_itt_critical_releasing(lck
);
1509 #if KMP_USE_INLINED_TAS
1510 if (__kmp_user_lock_seq
== lockseq_tas
&& !__kmp_env_consistency_check
) {
1511 KMP_RELEASE_TAS_LOCK(lck
, global_tid
);
1513 #elif KMP_USE_INLINED_FUTEX
1514 if (__kmp_user_lock_seq
== lockseq_futex
&& !__kmp_env_consistency_check
) {
1515 KMP_RELEASE_FUTEX_LOCK(lck
, global_tid
);
1519 KMP_D_LOCK_FUNC(lck
, unset
)((kmp_dyna_lock_t
*)lck
, global_tid
);
1522 kmp_indirect_lock_t
*ilk
=
1523 (kmp_indirect_lock_t
*)TCR_PTR(*((kmp_indirect_lock_t
**)crit
));
1524 KMP_ASSERT(ilk
!= NULL
);
1526 if (__kmp_env_consistency_check
) {
1527 __kmp_pop_sync(global_tid
, ct_critical
, loc
);
1530 __kmp_itt_critical_releasing(lck
);
1532 KMP_I_LOCK_FUNC(ilk
, unset
)(lck
, global_tid
);
1535 #else // KMP_USE_DYNAMIC_LOCK
1537 if ((__kmp_user_lock_kind
== lk_tas
) &&
1538 (sizeof(lck
->tas
.lk
.poll
) <= OMP_CRITICAL_SIZE
)) {
1539 lck
= (kmp_user_lock_p
)crit
;
1542 else if ((__kmp_user_lock_kind
== lk_futex
) &&
1543 (sizeof(lck
->futex
.lk
.poll
) <= OMP_CRITICAL_SIZE
)) {
1544 lck
= (kmp_user_lock_p
)crit
;
1547 else { // ticket, queuing or drdpa
1548 lck
= (kmp_user_lock_p
)TCR_PTR(*((kmp_user_lock_p
*)crit
));
1551 KMP_ASSERT(lck
!= NULL
);
1553 if (__kmp_env_consistency_check
)
1554 __kmp_pop_sync(global_tid
, ct_critical
, loc
);
1557 __kmp_itt_critical_releasing(lck
);
1558 #endif /* USE_ITT_BUILD */
1559 // Value of 'crit' should be good for using as a critical_id of the critical
1560 // section directive.
1561 __kmp_release_user_lock_with_checks(lck
, global_tid
);
1563 #endif // KMP_USE_DYNAMIC_LOCK
1565 #if OMPT_SUPPORT && OMPT_OPTIONAL
1566 /* OMPT release event triggers after lock is released; place here to trigger
1567 * for all #if branches */
1568 OMPT_STORE_RETURN_ADDRESS(global_tid
);
1569 if (ompt_enabled
.ompt_callback_mutex_released
) {
1570 ompt_callbacks
.ompt_callback(ompt_callback_mutex_released
)(
1571 ompt_mutex_critical
, (ompt_wait_id_t
)(uintptr_t)lck
,
1572 OMPT_LOAD_RETURN_ADDRESS(0));
1576 KMP_POP_PARTITIONED_TIMER();
1577 KA_TRACE(15, ("__kmpc_end_critical: done T#%d\n", global_tid
));
1581 @ingroup SYNCHRONIZATION
1582 @param loc source location information
1583 @param global_tid thread id.
1584 @return one if the thread should execute the master block, zero otherwise
1586 Start execution of a combined barrier and master. The barrier is executed inside
1589 kmp_int32
__kmpc_barrier_master(ident_t
*loc
, kmp_int32 global_tid
) {
1591 KC_TRACE(10, ("__kmpc_barrier_master: called T#%d\n", global_tid
));
1592 __kmp_assert_valid_gtid(global_tid
);
1594 if (!TCR_4(__kmp_init_parallel
))
1595 __kmp_parallel_initialize();
1597 __kmp_resume_if_soft_paused();
1599 if (__kmp_env_consistency_check
)
1600 __kmp_check_barrier(global_tid
, ct_barrier
, loc
);
1603 ompt_frame_t
*ompt_frame
;
1604 if (ompt_enabled
.enabled
) {
1605 __ompt_get_task_info_internal(0, NULL
, NULL
, &ompt_frame
, NULL
, NULL
);
1606 if (ompt_frame
->enter_frame
.ptr
== NULL
)
1607 ompt_frame
->enter_frame
.ptr
= OMPT_GET_FRAME_ADDRESS(0);
1608 OMPT_STORE_RETURN_ADDRESS(global_tid
);
1612 __kmp_threads
[global_tid
]->th
.th_ident
= loc
;
1614 status
= __kmp_barrier(bs_plain_barrier
, global_tid
, TRUE
, 0, NULL
, NULL
);
1615 #if OMPT_SUPPORT && OMPT_OPTIONAL
1616 if (ompt_enabled
.enabled
) {
1617 ompt_frame
->enter_frame
= ompt_data_none
;
1621 return (status
!= 0) ? 0 : 1;
1625 @ingroup SYNCHRONIZATION
1626 @param loc source location information
1627 @param global_tid thread id.
1629 Complete the execution of a combined barrier and master. This function should
1630 only be called at the completion of the <tt>master</tt> code. Other threads will
1631 still be waiting at the barrier and this call releases them.
1633 void __kmpc_end_barrier_master(ident_t
*loc
, kmp_int32 global_tid
) {
1634 KC_TRACE(10, ("__kmpc_end_barrier_master: called T#%d\n", global_tid
));
1635 __kmp_assert_valid_gtid(global_tid
);
1636 __kmp_end_split_barrier(bs_plain_barrier
, global_tid
);
1640 @ingroup SYNCHRONIZATION
1641 @param loc source location information
1642 @param global_tid thread id.
1643 @return one if the thread should execute the master block, zero otherwise
1645 Start execution of a combined barrier and master(nowait) construct.
1646 The barrier is executed inside this function.
1647 There is no equivalent "end" function, since the
1649 kmp_int32
__kmpc_barrier_master_nowait(ident_t
*loc
, kmp_int32 global_tid
) {
1651 KC_TRACE(10, ("__kmpc_barrier_master_nowait: called T#%d\n", global_tid
));
1652 __kmp_assert_valid_gtid(global_tid
);
1654 if (!TCR_4(__kmp_init_parallel
))
1655 __kmp_parallel_initialize();
1657 __kmp_resume_if_soft_paused();
1659 if (__kmp_env_consistency_check
) {
1661 KMP_WARNING(ConstructIdentInvalid
); // ??? What does it mean for the user?
1663 __kmp_check_barrier(global_tid
, ct_barrier
, loc
);
1667 ompt_frame_t
*ompt_frame
;
1668 if (ompt_enabled
.enabled
) {
1669 __ompt_get_task_info_internal(0, NULL
, NULL
, &ompt_frame
, NULL
, NULL
);
1670 if (ompt_frame
->enter_frame
.ptr
== NULL
)
1671 ompt_frame
->enter_frame
.ptr
= OMPT_GET_FRAME_ADDRESS(0);
1672 OMPT_STORE_RETURN_ADDRESS(global_tid
);
1676 __kmp_threads
[global_tid
]->th
.th_ident
= loc
;
1678 __kmp_barrier(bs_plain_barrier
, global_tid
, FALSE
, 0, NULL
, NULL
);
1679 #if OMPT_SUPPORT && OMPT_OPTIONAL
1680 if (ompt_enabled
.enabled
) {
1681 ompt_frame
->enter_frame
= ompt_data_none
;
1685 ret
= __kmpc_master(loc
, global_tid
);
1687 if (__kmp_env_consistency_check
) {
1688 /* there's no __kmpc_end_master called; so the (stats) */
1689 /* actions of __kmpc_end_master are done here */
1691 /* only one thread should do the pop since only */
1692 /* one did the push (see __kmpc_master()) */
1693 __kmp_pop_sync(global_tid
, ct_master
, loc
);
1700 /* The BARRIER for a SINGLE process section is always explicit */
1702 @ingroup WORK_SHARING
1703 @param loc source location information
1704 @param global_tid global thread number
1705 @return One if this thread should execute the single construct, zero otherwise.
1707 Test whether to execute a <tt>single</tt> construct.
1708 There are no implicit barriers in the two "single" calls, rather the compiler
1709 should introduce an explicit barrier if it is required.
1712 kmp_int32
__kmpc_single(ident_t
*loc
, kmp_int32 global_tid
) {
1713 __kmp_assert_valid_gtid(global_tid
);
1714 kmp_int32 rc
= __kmp_enter_single(global_tid
, loc
, TRUE
);
1717 // We are going to execute the single statement, so we should count it.
1718 KMP_COUNT_BLOCK(OMP_SINGLE
);
1719 KMP_PUSH_PARTITIONED_TIMER(OMP_single
);
1722 #if OMPT_SUPPORT && OMPT_OPTIONAL
1723 kmp_info_t
*this_thr
= __kmp_threads
[global_tid
];
1724 kmp_team_t
*team
= this_thr
->th
.th_team
;
1725 int tid
= __kmp_tid_from_gtid(global_tid
);
1727 if (ompt_enabled
.enabled
) {
1729 if (ompt_enabled
.ompt_callback_work
) {
1730 ompt_callbacks
.ompt_callback(ompt_callback_work
)(
1731 ompt_work_single_executor
, ompt_scope_begin
,
1732 &(team
->t
.ompt_team_info
.parallel_data
),
1733 &(team
->t
.t_implicit_task_taskdata
[tid
].ompt_task_info
.task_data
),
1734 1, OMPT_GET_RETURN_ADDRESS(0));
1737 if (ompt_enabled
.ompt_callback_work
) {
1738 ompt_callbacks
.ompt_callback(ompt_callback_work
)(
1739 ompt_work_single_other
, ompt_scope_begin
,
1740 &(team
->t
.ompt_team_info
.parallel_data
),
1741 &(team
->t
.t_implicit_task_taskdata
[tid
].ompt_task_info
.task_data
),
1742 1, OMPT_GET_RETURN_ADDRESS(0));
1743 ompt_callbacks
.ompt_callback(ompt_callback_work
)(
1744 ompt_work_single_other
, ompt_scope_end
,
1745 &(team
->t
.ompt_team_info
.parallel_data
),
1746 &(team
->t
.t_implicit_task_taskdata
[tid
].ompt_task_info
.task_data
),
1747 1, OMPT_GET_RETURN_ADDRESS(0));
1757 @ingroup WORK_SHARING
1758 @param loc source location information
1759 @param global_tid global thread number
1761 Mark the end of a <tt>single</tt> construct. This function should
1762 only be called by the thread that executed the block of code protected
1763 by the `single` construct.
1765 void __kmpc_end_single(ident_t
*loc
, kmp_int32 global_tid
) {
1766 __kmp_assert_valid_gtid(global_tid
);
1767 __kmp_exit_single(global_tid
);
1768 KMP_POP_PARTITIONED_TIMER();
1770 #if OMPT_SUPPORT && OMPT_OPTIONAL
1771 kmp_info_t
*this_thr
= __kmp_threads
[global_tid
];
1772 kmp_team_t
*team
= this_thr
->th
.th_team
;
1773 int tid
= __kmp_tid_from_gtid(global_tid
);
1775 if (ompt_enabled
.ompt_callback_work
) {
1776 ompt_callbacks
.ompt_callback(ompt_callback_work
)(
1777 ompt_work_single_executor
, ompt_scope_end
,
1778 &(team
->t
.ompt_team_info
.parallel_data
),
1779 &(team
->t
.t_implicit_task_taskdata
[tid
].ompt_task_info
.task_data
), 1,
1780 OMPT_GET_RETURN_ADDRESS(0));
1786 @ingroup WORK_SHARING
1787 @param loc Source location
1788 @param global_tid Global thread id
1790 Mark the end of a statically scheduled loop.
1792 void __kmpc_for_static_fini(ident_t
*loc
, kmp_int32 global_tid
) {
1793 KMP_POP_PARTITIONED_TIMER();
1794 KE_TRACE(10, ("__kmpc_for_static_fini called T#%d\n", global_tid
));
1796 #if OMPT_SUPPORT && OMPT_OPTIONAL
1797 if (ompt_enabled
.ompt_callback_work
) {
1798 ompt_work_t ompt_work_type
= ompt_work_loop
;
1799 ompt_team_info_t
*team_info
= __ompt_get_teaminfo(0, NULL
);
1800 ompt_task_info_t
*task_info
= __ompt_get_task_info_object(0);
1801 // Determine workshare type
1803 if ((loc
->flags
& KMP_IDENT_WORK_LOOP
) != 0) {
1804 ompt_work_type
= ompt_work_loop
;
1805 } else if ((loc
->flags
& KMP_IDENT_WORK_SECTIONS
) != 0) {
1806 ompt_work_type
= ompt_work_sections
;
1807 } else if ((loc
->flags
& KMP_IDENT_WORK_DISTRIBUTE
) != 0) {
1808 ompt_work_type
= ompt_work_distribute
;
1810 // use default set above.
1811 // a warning about this case is provided in __kmpc_for_static_init
1813 KMP_DEBUG_ASSERT(ompt_work_type
);
1815 ompt_callbacks
.ompt_callback(ompt_callback_work
)(
1816 ompt_work_type
, ompt_scope_end
, &(team_info
->parallel_data
),
1817 &(task_info
->task_data
), 0, OMPT_GET_RETURN_ADDRESS(0));
1820 if (__kmp_env_consistency_check
)
1821 __kmp_pop_workshare(global_tid
, ct_pdo
, loc
);
1824 // User routines which take C-style arguments (call by value)
1825 // different from the Fortran equivalent routines
1827 void ompc_set_num_threads(int arg
) {
1828 // !!!!! TODO: check the per-task binding
1829 __kmp_set_num_threads(arg
, __kmp_entry_gtid());
1832 void ompc_set_dynamic(int flag
) {
1835 /* For the thread-private implementation of the internal controls */
1836 thread
= __kmp_entry_thread();
1838 __kmp_save_internal_controls(thread
);
1840 set__dynamic(thread
, flag
? TRUE
: FALSE
);
1843 void ompc_set_nested(int flag
) {
1846 /* For the thread-private internal controls implementation */
1847 thread
= __kmp_entry_thread();
1849 __kmp_save_internal_controls(thread
);
1851 set__max_active_levels(thread
, flag
? __kmp_dflt_max_active_levels
: 1);
1854 void ompc_set_max_active_levels(int max_active_levels
) {
1856 /* we want per-task implementation of this internal control */
1858 /* For the per-thread internal controls implementation */
1859 __kmp_set_max_active_levels(__kmp_entry_gtid(), max_active_levels
);
1862 void ompc_set_schedule(omp_sched_t kind
, int modifier
) {
1863 // !!!!! TODO: check the per-task binding
1864 __kmp_set_schedule(__kmp_entry_gtid(), (kmp_sched_t
)kind
, modifier
);
1867 int ompc_get_ancestor_thread_num(int level
) {
1868 return __kmp_get_ancestor_thread_num(__kmp_entry_gtid(), level
);
1871 int ompc_get_team_size(int level
) {
1872 return __kmp_get_team_size(__kmp_entry_gtid(), level
);
1875 /* OpenMP 5.0 Affinity Format API */
1877 void ompc_set_affinity_format(char const *format
) {
1878 if (!__kmp_init_serial
) {
1879 __kmp_serial_initialize();
1881 __kmp_strncpy_truncate(__kmp_affinity_format
, KMP_AFFINITY_FORMAT_SIZE
,
1882 format
, KMP_STRLEN(format
) + 1);
1885 size_t ompc_get_affinity_format(char *buffer
, size_t size
) {
1887 if (!__kmp_init_serial
) {
1888 __kmp_serial_initialize();
1890 format_size
= KMP_STRLEN(__kmp_affinity_format
);
1891 if (buffer
&& size
) {
1892 __kmp_strncpy_truncate(buffer
, size
, __kmp_affinity_format
,
1898 void ompc_display_affinity(char const *format
) {
1900 if (!TCR_4(__kmp_init_middle
)) {
1901 __kmp_middle_initialize();
1903 gtid
= __kmp_get_gtid();
1904 __kmp_aux_display_affinity(gtid
, format
);
1907 size_t ompc_capture_affinity(char *buffer
, size_t buf_size
,
1908 char const *format
) {
1910 size_t num_required
;
1911 kmp_str_buf_t capture_buf
;
1912 if (!TCR_4(__kmp_init_middle
)) {
1913 __kmp_middle_initialize();
1915 gtid
= __kmp_get_gtid();
1916 __kmp_str_buf_init(&capture_buf
);
1917 num_required
= __kmp_aux_capture_affinity(gtid
, format
, &capture_buf
);
1918 if (buffer
&& buf_size
) {
1919 __kmp_strncpy_truncate(buffer
, buf_size
, capture_buf
.str
,
1920 capture_buf
.used
+ 1);
1922 __kmp_str_buf_free(&capture_buf
);
1923 return num_required
;
1926 void kmpc_set_stacksize(int arg
) {
1927 // __kmp_aux_set_stacksize initializes the library if needed
1928 __kmp_aux_set_stacksize(arg
);
1931 void kmpc_set_stacksize_s(size_t arg
) {
1932 // __kmp_aux_set_stacksize initializes the library if needed
1933 __kmp_aux_set_stacksize(arg
);
1936 void kmpc_set_blocktime(int arg
) {
1940 gtid
= __kmp_entry_gtid();
1941 tid
= __kmp_tid_from_gtid(gtid
);
1942 thread
= __kmp_thread_from_gtid(gtid
);
1944 __kmp_aux_set_blocktime(arg
, thread
, tid
);
1947 void kmpc_set_library(int arg
) {
1948 // __kmp_user_set_library initializes the library if needed
1949 __kmp_user_set_library((enum library_type
)arg
);
1952 void kmpc_set_defaults(char const *str
) {
1953 // __kmp_aux_set_defaults initializes the library if needed
1954 __kmp_aux_set_defaults(str
, KMP_STRLEN(str
));
1957 void kmpc_set_disp_num_buffers(int arg
) {
1958 // ignore after initialization because some teams have already
1959 // allocated dispatch buffers
1960 if (__kmp_init_serial
== 0 && arg
> 0)
1961 __kmp_dispatch_num_buffers
= arg
;
1964 int kmpc_set_affinity_mask_proc(int proc
, void **mask
) {
1965 #if defined(KMP_STUB) || !KMP_AFFINITY_SUPPORTED
1968 if (!TCR_4(__kmp_init_middle
)) {
1969 __kmp_middle_initialize();
1971 return __kmp_aux_set_affinity_mask_proc(proc
, mask
);
1975 int kmpc_unset_affinity_mask_proc(int proc
, void **mask
) {
1976 #if defined(KMP_STUB) || !KMP_AFFINITY_SUPPORTED
1979 if (!TCR_4(__kmp_init_middle
)) {
1980 __kmp_middle_initialize();
1982 return __kmp_aux_unset_affinity_mask_proc(proc
, mask
);
1986 int kmpc_get_affinity_mask_proc(int proc
, void **mask
) {
1987 #if defined(KMP_STUB) || !KMP_AFFINITY_SUPPORTED
1990 if (!TCR_4(__kmp_init_middle
)) {
1991 __kmp_middle_initialize();
1993 return __kmp_aux_get_affinity_mask_proc(proc
, mask
);
1997 /* -------------------------------------------------------------------------- */
1999 @ingroup THREADPRIVATE
2000 @param loc source location information
2001 @param gtid global thread number
2002 @param cpy_size size of the cpy_data buffer
2003 @param cpy_data pointer to data to be copied
2004 @param cpy_func helper function to call for copying data
2005 @param didit flag variable: 1=single thread; 0=not single thread
2007 __kmpc_copyprivate implements the interface for the private data broadcast
2008 needed for the copyprivate clause associated with a single region in an
2009 OpenMP<sup>*</sup> program (both C and Fortran).
2010 All threads participating in the parallel region call this routine.
2011 One of the threads (called the single thread) should have the <tt>didit</tt>
2012 variable set to 1 and all other threads should have that variable set to 0.
2013 All threads pass a pointer to a data buffer (cpy_data) that they have built.
2015 The OpenMP specification forbids the use of nowait on the single region when a
2016 copyprivate clause is present. However, @ref __kmpc_copyprivate implements a
2017 barrier internally to avoid race conditions, so the code generation for the
2018 single region should avoid generating a barrier after the call to @ref
2021 The <tt>gtid</tt> parameter is the global thread id for the current thread.
2022 The <tt>loc</tt> parameter is a pointer to source location information.
2024 Internal implementation: The single thread will first copy its descriptor
2025 address (cpy_data) to a team-private location, then the other threads will each
2026 call the function pointed to by the parameter cpy_func, which carries out the
2027 copy by copying the data using the cpy_data buffer.
2029 The cpy_func routine used for the copy and the contents of the data area defined
2030 by cpy_data and cpy_size may be built in any fashion that will allow the copy
2031 to be done. For instance, the cpy_data buffer can hold the actual data to be
2032 copied or it may hold a list of pointers to the data. The cpy_func routine must
2033 interpret the cpy_data buffer appropriately.
2035 The interface to cpy_func is as follows:
2037 void cpy_func( void *destination, void *source )
2039 where void *destination is the cpy_data pointer for the thread being copied to
2040 and void *source is the cpy_data pointer for the thread being copied from.
2042 void __kmpc_copyprivate(ident_t
*loc
, kmp_int32 gtid
, size_t cpy_size
,
2043 void *cpy_data
, void (*cpy_func
)(void *, void *),
2046 KC_TRACE(10, ("__kmpc_copyprivate: called T#%d\n", gtid
));
2047 __kmp_assert_valid_gtid(gtid
);
2051 data_ptr
= &__kmp_team_from_gtid(gtid
)->t
.t_copypriv_data
;
2053 if (__kmp_env_consistency_check
) {
2055 KMP_WARNING(ConstructIdentInvalid
);
2059 // ToDo: Optimize the following two barriers into some kind of split barrier
2062 *data_ptr
= cpy_data
;
2065 ompt_frame_t
*ompt_frame
;
2066 if (ompt_enabled
.enabled
) {
2067 __ompt_get_task_info_internal(0, NULL
, NULL
, &ompt_frame
, NULL
, NULL
);
2068 if (ompt_frame
->enter_frame
.ptr
== NULL
)
2069 ompt_frame
->enter_frame
.ptr
= OMPT_GET_FRAME_ADDRESS(0);
2070 OMPT_STORE_RETURN_ADDRESS(gtid
);
2073 /* This barrier is not a barrier region boundary */
2075 __kmp_threads
[gtid
]->th
.th_ident
= loc
;
2077 __kmp_barrier(bs_plain_barrier
, gtid
, FALSE
, 0, NULL
, NULL
);
2080 (*cpy_func
)(cpy_data
, *data_ptr
);
2082 // Consider next barrier a user-visible barrier for barrier region boundaries
2083 // Nesting checks are already handled by the single construct checks
2086 if (ompt_enabled
.enabled
) {
2087 OMPT_STORE_RETURN_ADDRESS(gtid
);
2091 __kmp_threads
[gtid
]->th
.th_ident
= loc
; // TODO: check if it is needed (e.g.
2092 // tasks can overwrite the location)
2094 __kmp_barrier(bs_plain_barrier
, gtid
, FALSE
, 0, NULL
, NULL
);
2095 #if OMPT_SUPPORT && OMPT_OPTIONAL
2096 if (ompt_enabled
.enabled
) {
2097 ompt_frame
->enter_frame
= ompt_data_none
;
2102 /* -------------------------------------------------------------------------- */
2104 #define INIT_LOCK __kmp_init_user_lock_with_checks
2105 #define INIT_NESTED_LOCK __kmp_init_nested_user_lock_with_checks
2106 #define ACQUIRE_LOCK __kmp_acquire_user_lock_with_checks
2107 #define ACQUIRE_LOCK_TIMED __kmp_acquire_user_lock_with_checks_timed
2108 #define ACQUIRE_NESTED_LOCK __kmp_acquire_nested_user_lock_with_checks
2109 #define ACQUIRE_NESTED_LOCK_TIMED \
2110 __kmp_acquire_nested_user_lock_with_checks_timed
2111 #define RELEASE_LOCK __kmp_release_user_lock_with_checks
2112 #define RELEASE_NESTED_LOCK __kmp_release_nested_user_lock_with_checks
2113 #define TEST_LOCK __kmp_test_user_lock_with_checks
2114 #define TEST_NESTED_LOCK __kmp_test_nested_user_lock_with_checks
2115 #define DESTROY_LOCK __kmp_destroy_user_lock_with_checks
2116 #define DESTROY_NESTED_LOCK __kmp_destroy_nested_user_lock_with_checks
2118 // TODO: Make check abort messages use location info & pass it into
2119 // with_checks routines
2121 #if KMP_USE_DYNAMIC_LOCK
2123 // internal lock initializer
2124 static __forceinline
void __kmp_init_lock_with_hint(ident_t
*loc
, void **lock
,
2125 kmp_dyna_lockseq_t seq
) {
2126 if (KMP_IS_D_LOCK(seq
)) {
2127 KMP_INIT_D_LOCK(lock
, seq
);
2129 __kmp_itt_lock_creating((kmp_user_lock_p
)lock
, NULL
);
2132 KMP_INIT_I_LOCK(lock
, seq
);
2134 kmp_indirect_lock_t
*ilk
= KMP_LOOKUP_I_LOCK(lock
);
2135 __kmp_itt_lock_creating(ilk
->lock
, loc
);
2140 // internal nest lock initializer
2141 static __forceinline
void
2142 __kmp_init_nest_lock_with_hint(ident_t
*loc
, void **lock
,
2143 kmp_dyna_lockseq_t seq
) {
2145 // Don't have nested lock implementation for speculative locks
2146 if (seq
== lockseq_hle
|| seq
== lockseq_rtm
|| seq
== lockseq_adaptive
)
2147 seq
= __kmp_user_lock_seq
;
2151 seq
= lockseq_nested_tas
;
2155 seq
= lockseq_nested_futex
;
2158 case lockseq_ticket
:
2159 seq
= lockseq_nested_ticket
;
2161 case lockseq_queuing
:
2162 seq
= lockseq_nested_queuing
;
2165 seq
= lockseq_nested_drdpa
;
2168 seq
= lockseq_nested_queuing
;
2170 KMP_INIT_I_LOCK(lock
, seq
);
2172 kmp_indirect_lock_t
*ilk
= KMP_LOOKUP_I_LOCK(lock
);
2173 __kmp_itt_lock_creating(ilk
->lock
, loc
);
2177 /* initialize the lock with a hint */
2178 void __kmpc_init_lock_with_hint(ident_t
*loc
, kmp_int32 gtid
, void **user_lock
,
2180 KMP_DEBUG_ASSERT(__kmp_init_serial
);
2181 if (__kmp_env_consistency_check
&& user_lock
== NULL
) {
2182 KMP_FATAL(LockIsUninitialized
, "omp_init_lock_with_hint");
2185 __kmp_init_lock_with_hint(loc
, user_lock
, __kmp_map_hint_to_lock(hint
));
2187 #if OMPT_SUPPORT && OMPT_OPTIONAL
2188 // This is the case, if called from omp_init_lock_with_hint:
2189 void *codeptr
= OMPT_LOAD_RETURN_ADDRESS(gtid
);
2191 codeptr
= OMPT_GET_RETURN_ADDRESS(0);
2192 if (ompt_enabled
.ompt_callback_lock_init
) {
2193 ompt_callbacks
.ompt_callback(ompt_callback_lock_init
)(
2194 ompt_mutex_lock
, (omp_lock_hint_t
)hint
,
2195 __ompt_get_mutex_impl_type(user_lock
),
2196 (ompt_wait_id_t
)(uintptr_t)user_lock
, codeptr
);
2201 /* initialize the lock with a hint */
2202 void __kmpc_init_nest_lock_with_hint(ident_t
*loc
, kmp_int32 gtid
,
2203 void **user_lock
, uintptr_t hint
) {
2204 KMP_DEBUG_ASSERT(__kmp_init_serial
);
2205 if (__kmp_env_consistency_check
&& user_lock
== NULL
) {
2206 KMP_FATAL(LockIsUninitialized
, "omp_init_nest_lock_with_hint");
2209 __kmp_init_nest_lock_with_hint(loc
, user_lock
, __kmp_map_hint_to_lock(hint
));
2211 #if OMPT_SUPPORT && OMPT_OPTIONAL
2212 // This is the case, if called from omp_init_lock_with_hint:
2213 void *codeptr
= OMPT_LOAD_RETURN_ADDRESS(gtid
);
2215 codeptr
= OMPT_GET_RETURN_ADDRESS(0);
2216 if (ompt_enabled
.ompt_callback_lock_init
) {
2217 ompt_callbacks
.ompt_callback(ompt_callback_lock_init
)(
2218 ompt_mutex_nest_lock
, (omp_lock_hint_t
)hint
,
2219 __ompt_get_mutex_impl_type(user_lock
),
2220 (ompt_wait_id_t
)(uintptr_t)user_lock
, codeptr
);
2225 #endif // KMP_USE_DYNAMIC_LOCK
2227 /* initialize the lock */
2228 void __kmpc_init_lock(ident_t
*loc
, kmp_int32 gtid
, void **user_lock
) {
2229 #if KMP_USE_DYNAMIC_LOCK
2231 KMP_DEBUG_ASSERT(__kmp_init_serial
);
2232 if (__kmp_env_consistency_check
&& user_lock
== NULL
) {
2233 KMP_FATAL(LockIsUninitialized
, "omp_init_lock");
2235 __kmp_init_lock_with_hint(loc
, user_lock
, __kmp_user_lock_seq
);
2237 #if OMPT_SUPPORT && OMPT_OPTIONAL
2238 // This is the case, if called from omp_init_lock_with_hint:
2239 void *codeptr
= OMPT_LOAD_RETURN_ADDRESS(gtid
);
2241 codeptr
= OMPT_GET_RETURN_ADDRESS(0);
2242 if (ompt_enabled
.ompt_callback_lock_init
) {
2243 ompt_callbacks
.ompt_callback(ompt_callback_lock_init
)(
2244 ompt_mutex_lock
, omp_lock_hint_none
,
2245 __ompt_get_mutex_impl_type(user_lock
),
2246 (ompt_wait_id_t
)(uintptr_t)user_lock
, codeptr
);
2250 #else // KMP_USE_DYNAMIC_LOCK
2252 static char const *const func
= "omp_init_lock";
2253 kmp_user_lock_p lck
;
2254 KMP_DEBUG_ASSERT(__kmp_init_serial
);
2256 if (__kmp_env_consistency_check
) {
2257 if (user_lock
== NULL
) {
2258 KMP_FATAL(LockIsUninitialized
, func
);
2262 KMP_CHECK_USER_LOCK_INIT();
2264 if ((__kmp_user_lock_kind
== lk_tas
) &&
2265 (sizeof(lck
->tas
.lk
.poll
) <= OMP_LOCK_T_SIZE
)) {
2266 lck
= (kmp_user_lock_p
)user_lock
;
2269 else if ((__kmp_user_lock_kind
== lk_futex
) &&
2270 (sizeof(lck
->futex
.lk
.poll
) <= OMP_LOCK_T_SIZE
)) {
2271 lck
= (kmp_user_lock_p
)user_lock
;
2275 lck
= __kmp_user_lock_allocate(user_lock
, gtid
, 0);
2278 __kmp_set_user_lock_location(lck
, loc
);
2280 #if OMPT_SUPPORT && OMPT_OPTIONAL
2281 // This is the case, if called from omp_init_lock_with_hint:
2282 void *codeptr
= OMPT_LOAD_RETURN_ADDRESS(gtid
);
2284 codeptr
= OMPT_GET_RETURN_ADDRESS(0);
2285 if (ompt_enabled
.ompt_callback_lock_init
) {
2286 ompt_callbacks
.ompt_callback(ompt_callback_lock_init
)(
2287 ompt_mutex_lock
, omp_lock_hint_none
, __ompt_get_mutex_impl_type(),
2288 (ompt_wait_id_t
)(uintptr_t)user_lock
, codeptr
);
2293 __kmp_itt_lock_creating(lck
);
2294 #endif /* USE_ITT_BUILD */
2296 #endif // KMP_USE_DYNAMIC_LOCK
2297 } // __kmpc_init_lock
2299 /* initialize the lock */
2300 void __kmpc_init_nest_lock(ident_t
*loc
, kmp_int32 gtid
, void **user_lock
) {
2301 #if KMP_USE_DYNAMIC_LOCK
2303 KMP_DEBUG_ASSERT(__kmp_init_serial
);
2304 if (__kmp_env_consistency_check
&& user_lock
== NULL
) {
2305 KMP_FATAL(LockIsUninitialized
, "omp_init_nest_lock");
2307 __kmp_init_nest_lock_with_hint(loc
, user_lock
, __kmp_user_lock_seq
);
2309 #if OMPT_SUPPORT && OMPT_OPTIONAL
2310 // This is the case, if called from omp_init_lock_with_hint:
2311 void *codeptr
= OMPT_LOAD_RETURN_ADDRESS(gtid
);
2313 codeptr
= OMPT_GET_RETURN_ADDRESS(0);
2314 if (ompt_enabled
.ompt_callback_lock_init
) {
2315 ompt_callbacks
.ompt_callback(ompt_callback_lock_init
)(
2316 ompt_mutex_nest_lock
, omp_lock_hint_none
,
2317 __ompt_get_mutex_impl_type(user_lock
),
2318 (ompt_wait_id_t
)(uintptr_t)user_lock
, codeptr
);
2322 #else // KMP_USE_DYNAMIC_LOCK
2324 static char const *const func
= "omp_init_nest_lock";
2325 kmp_user_lock_p lck
;
2326 KMP_DEBUG_ASSERT(__kmp_init_serial
);
2328 if (__kmp_env_consistency_check
) {
2329 if (user_lock
== NULL
) {
2330 KMP_FATAL(LockIsUninitialized
, func
);
2334 KMP_CHECK_USER_LOCK_INIT();
2336 if ((__kmp_user_lock_kind
== lk_tas
) &&
2337 (sizeof(lck
->tas
.lk
.poll
) + sizeof(lck
->tas
.lk
.depth_locked
) <=
2338 OMP_NEST_LOCK_T_SIZE
)) {
2339 lck
= (kmp_user_lock_p
)user_lock
;
2342 else if ((__kmp_user_lock_kind
== lk_futex
) &&
2343 (sizeof(lck
->futex
.lk
.poll
) + sizeof(lck
->futex
.lk
.depth_locked
) <=
2344 OMP_NEST_LOCK_T_SIZE
)) {
2345 lck
= (kmp_user_lock_p
)user_lock
;
2349 lck
= __kmp_user_lock_allocate(user_lock
, gtid
, 0);
2352 INIT_NESTED_LOCK(lck
);
2353 __kmp_set_user_lock_location(lck
, loc
);
2355 #if OMPT_SUPPORT && OMPT_OPTIONAL
2356 // This is the case, if called from omp_init_lock_with_hint:
2357 void *codeptr
= OMPT_LOAD_RETURN_ADDRESS(gtid
);
2359 codeptr
= OMPT_GET_RETURN_ADDRESS(0);
2360 if (ompt_enabled
.ompt_callback_lock_init
) {
2361 ompt_callbacks
.ompt_callback(ompt_callback_lock_init
)(
2362 ompt_mutex_nest_lock
, omp_lock_hint_none
, __ompt_get_mutex_impl_type(),
2363 (ompt_wait_id_t
)(uintptr_t)user_lock
, codeptr
);
2368 __kmp_itt_lock_creating(lck
);
2369 #endif /* USE_ITT_BUILD */
2371 #endif // KMP_USE_DYNAMIC_LOCK
2372 } // __kmpc_init_nest_lock
2374 void __kmpc_destroy_lock(ident_t
*loc
, kmp_int32 gtid
, void **user_lock
) {
2375 #if KMP_USE_DYNAMIC_LOCK
2378 kmp_user_lock_p lck
;
2379 if (KMP_EXTRACT_D_TAG(user_lock
) == 0) {
2380 lck
= ((kmp_indirect_lock_t
*)KMP_LOOKUP_I_LOCK(user_lock
))->lock
;
2382 lck
= (kmp_user_lock_p
)user_lock
;
2384 __kmp_itt_lock_destroyed(lck
);
2386 #if OMPT_SUPPORT && OMPT_OPTIONAL
2387 // This is the case, if called from omp_init_lock_with_hint:
2388 void *codeptr
= OMPT_LOAD_RETURN_ADDRESS(gtid
);
2390 codeptr
= OMPT_GET_RETURN_ADDRESS(0);
2391 if (ompt_enabled
.ompt_callback_lock_destroy
) {
2392 kmp_user_lock_p lck
;
2393 if (KMP_EXTRACT_D_TAG(user_lock
) == 0) {
2394 lck
= ((kmp_indirect_lock_t
*)KMP_LOOKUP_I_LOCK(user_lock
))->lock
;
2396 lck
= (kmp_user_lock_p
)user_lock
;
2398 ompt_callbacks
.ompt_callback(ompt_callback_lock_destroy
)(
2399 ompt_mutex_lock
, (ompt_wait_id_t
)(uintptr_t)user_lock
, codeptr
);
2402 KMP_D_LOCK_FUNC(user_lock
, destroy
)((kmp_dyna_lock_t
*)user_lock
);
2404 kmp_user_lock_p lck
;
2406 if ((__kmp_user_lock_kind
== lk_tas
) &&
2407 (sizeof(lck
->tas
.lk
.poll
) <= OMP_LOCK_T_SIZE
)) {
2408 lck
= (kmp_user_lock_p
)user_lock
;
2411 else if ((__kmp_user_lock_kind
== lk_futex
) &&
2412 (sizeof(lck
->futex
.lk
.poll
) <= OMP_LOCK_T_SIZE
)) {
2413 lck
= (kmp_user_lock_p
)user_lock
;
2417 lck
= __kmp_lookup_user_lock(user_lock
, "omp_destroy_lock");
2420 #if OMPT_SUPPORT && OMPT_OPTIONAL
2421 // This is the case, if called from omp_init_lock_with_hint:
2422 void *codeptr
= OMPT_LOAD_RETURN_ADDRESS(gtid
);
2424 codeptr
= OMPT_GET_RETURN_ADDRESS(0);
2425 if (ompt_enabled
.ompt_callback_lock_destroy
) {
2426 ompt_callbacks
.ompt_callback(ompt_callback_lock_destroy
)(
2427 ompt_mutex_lock
, (ompt_wait_id_t
)(uintptr_t)user_lock
, codeptr
);
2432 __kmp_itt_lock_destroyed(lck
);
2433 #endif /* USE_ITT_BUILD */
2436 if ((__kmp_user_lock_kind
== lk_tas
) &&
2437 (sizeof(lck
->tas
.lk
.poll
) <= OMP_LOCK_T_SIZE
)) {
2441 else if ((__kmp_user_lock_kind
== lk_futex
) &&
2442 (sizeof(lck
->futex
.lk
.poll
) <= OMP_LOCK_T_SIZE
)) {
2447 __kmp_user_lock_free(user_lock
, gtid
, lck
);
2449 #endif // KMP_USE_DYNAMIC_LOCK
2450 } // __kmpc_destroy_lock
2452 /* destroy the lock */
2453 void __kmpc_destroy_nest_lock(ident_t
*loc
, kmp_int32 gtid
, void **user_lock
) {
2454 #if KMP_USE_DYNAMIC_LOCK
2457 kmp_indirect_lock_t
*ilk
= KMP_LOOKUP_I_LOCK(user_lock
);
2458 __kmp_itt_lock_destroyed(ilk
->lock
);
2460 #if OMPT_SUPPORT && OMPT_OPTIONAL
2461 // This is the case, if called from omp_init_lock_with_hint:
2462 void *codeptr
= OMPT_LOAD_RETURN_ADDRESS(gtid
);
2464 codeptr
= OMPT_GET_RETURN_ADDRESS(0);
2465 if (ompt_enabled
.ompt_callback_lock_destroy
) {
2466 ompt_callbacks
.ompt_callback(ompt_callback_lock_destroy
)(
2467 ompt_mutex_nest_lock
, (ompt_wait_id_t
)(uintptr_t)user_lock
, codeptr
);
2470 KMP_D_LOCK_FUNC(user_lock
, destroy
)((kmp_dyna_lock_t
*)user_lock
);
2472 #else // KMP_USE_DYNAMIC_LOCK
2474 kmp_user_lock_p lck
;
2476 if ((__kmp_user_lock_kind
== lk_tas
) &&
2477 (sizeof(lck
->tas
.lk
.poll
) + sizeof(lck
->tas
.lk
.depth_locked
) <=
2478 OMP_NEST_LOCK_T_SIZE
)) {
2479 lck
= (kmp_user_lock_p
)user_lock
;
2482 else if ((__kmp_user_lock_kind
== lk_futex
) &&
2483 (sizeof(lck
->futex
.lk
.poll
) + sizeof(lck
->futex
.lk
.depth_locked
) <=
2484 OMP_NEST_LOCK_T_SIZE
)) {
2485 lck
= (kmp_user_lock_p
)user_lock
;
2489 lck
= __kmp_lookup_user_lock(user_lock
, "omp_destroy_nest_lock");
2492 #if OMPT_SUPPORT && OMPT_OPTIONAL
2493 // This is the case, if called from omp_init_lock_with_hint:
2494 void *codeptr
= OMPT_LOAD_RETURN_ADDRESS(gtid
);
2496 codeptr
= OMPT_GET_RETURN_ADDRESS(0);
2497 if (ompt_enabled
.ompt_callback_lock_destroy
) {
2498 ompt_callbacks
.ompt_callback(ompt_callback_lock_destroy
)(
2499 ompt_mutex_nest_lock
, (ompt_wait_id_t
)(uintptr_t)user_lock
, codeptr
);
2504 __kmp_itt_lock_destroyed(lck
);
2505 #endif /* USE_ITT_BUILD */
2507 DESTROY_NESTED_LOCK(lck
);
2509 if ((__kmp_user_lock_kind
== lk_tas
) &&
2510 (sizeof(lck
->tas
.lk
.poll
) + sizeof(lck
->tas
.lk
.depth_locked
) <=
2511 OMP_NEST_LOCK_T_SIZE
)) {
2515 else if ((__kmp_user_lock_kind
== lk_futex
) &&
2516 (sizeof(lck
->futex
.lk
.poll
) + sizeof(lck
->futex
.lk
.depth_locked
) <=
2517 OMP_NEST_LOCK_T_SIZE
)) {
2522 __kmp_user_lock_free(user_lock
, gtid
, lck
);
2524 #endif // KMP_USE_DYNAMIC_LOCK
2525 } // __kmpc_destroy_nest_lock
2527 void __kmpc_set_lock(ident_t
*loc
, kmp_int32 gtid
, void **user_lock
) {
2528 KMP_COUNT_BLOCK(OMP_set_lock
);
2529 #if KMP_USE_DYNAMIC_LOCK
2530 int tag
= KMP_EXTRACT_D_TAG(user_lock
);
2532 __kmp_itt_lock_acquiring(
2534 user_lock
); // itt function will get to the right lock object.
2536 #if OMPT_SUPPORT && OMPT_OPTIONAL
2537 // This is the case, if called from omp_init_lock_with_hint:
2538 void *codeptr
= OMPT_LOAD_RETURN_ADDRESS(gtid
);
2540 codeptr
= OMPT_GET_RETURN_ADDRESS(0);
2541 if (ompt_enabled
.ompt_callback_mutex_acquire
) {
2542 ompt_callbacks
.ompt_callback(ompt_callback_mutex_acquire
)(
2543 ompt_mutex_lock
, omp_lock_hint_none
,
2544 __ompt_get_mutex_impl_type(user_lock
),
2545 (ompt_wait_id_t
)(uintptr_t)user_lock
, codeptr
);
2548 #if KMP_USE_INLINED_TAS
2549 if (tag
== locktag_tas
&& !__kmp_env_consistency_check
) {
2550 KMP_ACQUIRE_TAS_LOCK(user_lock
, gtid
);
2552 #elif KMP_USE_INLINED_FUTEX
2553 if (tag
== locktag_futex
&& !__kmp_env_consistency_check
) {
2554 KMP_ACQUIRE_FUTEX_LOCK(user_lock
, gtid
);
2558 __kmp_direct_set
[tag
]((kmp_dyna_lock_t
*)user_lock
, gtid
);
2561 __kmp_itt_lock_acquired((kmp_user_lock_p
)user_lock
);
2563 #if OMPT_SUPPORT && OMPT_OPTIONAL
2564 if (ompt_enabled
.ompt_callback_mutex_acquired
) {
2565 ompt_callbacks
.ompt_callback(ompt_callback_mutex_acquired
)(
2566 ompt_mutex_lock
, (ompt_wait_id_t
)(uintptr_t)user_lock
, codeptr
);
2570 #else // KMP_USE_DYNAMIC_LOCK
2572 kmp_user_lock_p lck
;
2574 if ((__kmp_user_lock_kind
== lk_tas
) &&
2575 (sizeof(lck
->tas
.lk
.poll
) <= OMP_LOCK_T_SIZE
)) {
2576 lck
= (kmp_user_lock_p
)user_lock
;
2579 else if ((__kmp_user_lock_kind
== lk_futex
) &&
2580 (sizeof(lck
->futex
.lk
.poll
) <= OMP_LOCK_T_SIZE
)) {
2581 lck
= (kmp_user_lock_p
)user_lock
;
2585 lck
= __kmp_lookup_user_lock(user_lock
, "omp_set_lock");
2589 __kmp_itt_lock_acquiring(lck
);
2590 #endif /* USE_ITT_BUILD */
2591 #if OMPT_SUPPORT && OMPT_OPTIONAL
2592 // This is the case, if called from omp_init_lock_with_hint:
2593 void *codeptr
= OMPT_LOAD_RETURN_ADDRESS(gtid
);
2595 codeptr
= OMPT_GET_RETURN_ADDRESS(0);
2596 if (ompt_enabled
.ompt_callback_mutex_acquire
) {
2597 ompt_callbacks
.ompt_callback(ompt_callback_mutex_acquire
)(
2598 ompt_mutex_lock
, omp_lock_hint_none
, __ompt_get_mutex_impl_type(),
2599 (ompt_wait_id_t
)(uintptr_t)lck
, codeptr
);
2603 ACQUIRE_LOCK(lck
, gtid
);
2606 __kmp_itt_lock_acquired(lck
);
2607 #endif /* USE_ITT_BUILD */
2609 #if OMPT_SUPPORT && OMPT_OPTIONAL
2610 if (ompt_enabled
.ompt_callback_mutex_acquired
) {
2611 ompt_callbacks
.ompt_callback(ompt_callback_mutex_acquired
)(
2612 ompt_mutex_lock
, (ompt_wait_id_t
)(uintptr_t)lck
, codeptr
);
2616 #endif // KMP_USE_DYNAMIC_LOCK
2619 void __kmpc_set_nest_lock(ident_t
*loc
, kmp_int32 gtid
, void **user_lock
) {
2620 #if KMP_USE_DYNAMIC_LOCK
2623 __kmp_itt_lock_acquiring((kmp_user_lock_p
)user_lock
);
2625 #if OMPT_SUPPORT && OMPT_OPTIONAL
2626 // This is the case, if called from omp_init_lock_with_hint:
2627 void *codeptr
= OMPT_LOAD_RETURN_ADDRESS(gtid
);
2629 codeptr
= OMPT_GET_RETURN_ADDRESS(0);
2630 if (ompt_enabled
.enabled
) {
2631 if (ompt_enabled
.ompt_callback_mutex_acquire
) {
2632 ompt_callbacks
.ompt_callback(ompt_callback_mutex_acquire
)(
2633 ompt_mutex_nest_lock
, omp_lock_hint_none
,
2634 __ompt_get_mutex_impl_type(user_lock
),
2635 (ompt_wait_id_t
)(uintptr_t)user_lock
, codeptr
);
2639 int acquire_status
=
2640 KMP_D_LOCK_FUNC(user_lock
, set
)((kmp_dyna_lock_t
*)user_lock
, gtid
);
2641 (void) acquire_status
;
2643 __kmp_itt_lock_acquired((kmp_user_lock_p
)user_lock
);
2646 #if OMPT_SUPPORT && OMPT_OPTIONAL
2647 if (ompt_enabled
.enabled
) {
2648 if (acquire_status
== KMP_LOCK_ACQUIRED_FIRST
) {
2649 if (ompt_enabled
.ompt_callback_mutex_acquired
) {
2651 ompt_callbacks
.ompt_callback(ompt_callback_mutex_acquired
)(
2652 ompt_mutex_nest_lock
, (ompt_wait_id_t
)(uintptr_t)user_lock
,
2656 if (ompt_enabled
.ompt_callback_nest_lock
) {
2658 ompt_callbacks
.ompt_callback(ompt_callback_nest_lock
)(
2659 ompt_scope_begin
, (ompt_wait_id_t
)(uintptr_t)user_lock
, codeptr
);
2665 #else // KMP_USE_DYNAMIC_LOCK
2667 kmp_user_lock_p lck
;
2669 if ((__kmp_user_lock_kind
== lk_tas
) &&
2670 (sizeof(lck
->tas
.lk
.poll
) + sizeof(lck
->tas
.lk
.depth_locked
) <=
2671 OMP_NEST_LOCK_T_SIZE
)) {
2672 lck
= (kmp_user_lock_p
)user_lock
;
2675 else if ((__kmp_user_lock_kind
== lk_futex
) &&
2676 (sizeof(lck
->futex
.lk
.poll
) + sizeof(lck
->futex
.lk
.depth_locked
) <=
2677 OMP_NEST_LOCK_T_SIZE
)) {
2678 lck
= (kmp_user_lock_p
)user_lock
;
2682 lck
= __kmp_lookup_user_lock(user_lock
, "omp_set_nest_lock");
2686 __kmp_itt_lock_acquiring(lck
);
2687 #endif /* USE_ITT_BUILD */
2688 #if OMPT_SUPPORT && OMPT_OPTIONAL
2689 // This is the case, if called from omp_init_lock_with_hint:
2690 void *codeptr
= OMPT_LOAD_RETURN_ADDRESS(gtid
);
2692 codeptr
= OMPT_GET_RETURN_ADDRESS(0);
2693 if (ompt_enabled
.enabled
) {
2694 if (ompt_enabled
.ompt_callback_mutex_acquire
) {
2695 ompt_callbacks
.ompt_callback(ompt_callback_mutex_acquire
)(
2696 ompt_mutex_nest_lock
, omp_lock_hint_none
,
2697 __ompt_get_mutex_impl_type(), (ompt_wait_id_t
)(uintptr_t)lck
,
2703 ACQUIRE_NESTED_LOCK(lck
, gtid
, &acquire_status
);
2706 __kmp_itt_lock_acquired(lck
);
2707 #endif /* USE_ITT_BUILD */
2709 #if OMPT_SUPPORT && OMPT_OPTIONAL
2710 if (ompt_enabled
.enabled
) {
2711 if (acquire_status
== KMP_LOCK_ACQUIRED_FIRST
) {
2712 if (ompt_enabled
.ompt_callback_mutex_acquired
) {
2714 ompt_callbacks
.ompt_callback(ompt_callback_mutex_acquired
)(
2715 ompt_mutex_nest_lock
, (ompt_wait_id_t
)(uintptr_t)lck
, codeptr
);
2718 if (ompt_enabled
.ompt_callback_nest_lock
) {
2720 ompt_callbacks
.ompt_callback(ompt_callback_nest_lock
)(
2721 ompt_scope_begin
, (ompt_wait_id_t
)(uintptr_t)lck
, codeptr
);
2727 #endif // KMP_USE_DYNAMIC_LOCK
2730 void __kmpc_unset_lock(ident_t
*loc
, kmp_int32 gtid
, void **user_lock
) {
2731 #if KMP_USE_DYNAMIC_LOCK
2733 int tag
= KMP_EXTRACT_D_TAG(user_lock
);
2735 __kmp_itt_lock_releasing((kmp_user_lock_p
)user_lock
);
2737 #if KMP_USE_INLINED_TAS
2738 if (tag
== locktag_tas
&& !__kmp_env_consistency_check
) {
2739 KMP_RELEASE_TAS_LOCK(user_lock
, gtid
);
2741 #elif KMP_USE_INLINED_FUTEX
2742 if (tag
== locktag_futex
&& !__kmp_env_consistency_check
) {
2743 KMP_RELEASE_FUTEX_LOCK(user_lock
, gtid
);
2747 __kmp_direct_unset
[tag
]((kmp_dyna_lock_t
*)user_lock
, gtid
);
2750 #if OMPT_SUPPORT && OMPT_OPTIONAL
2751 // This is the case, if called from omp_init_lock_with_hint:
2752 void *codeptr
= OMPT_LOAD_RETURN_ADDRESS(gtid
);
2754 codeptr
= OMPT_GET_RETURN_ADDRESS(0);
2755 if (ompt_enabled
.ompt_callback_mutex_released
) {
2756 ompt_callbacks
.ompt_callback(ompt_callback_mutex_released
)(
2757 ompt_mutex_lock
, (ompt_wait_id_t
)(uintptr_t)user_lock
, codeptr
);
2761 #else // KMP_USE_DYNAMIC_LOCK
2763 kmp_user_lock_p lck
;
2765 /* Can't use serial interval since not block structured */
2766 /* release the lock */
2768 if ((__kmp_user_lock_kind
== lk_tas
) &&
2769 (sizeof(lck
->tas
.lk
.poll
) <= OMP_LOCK_T_SIZE
)) {
2770 #if KMP_OS_LINUX && \
2771 (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64)
2772 // "fast" path implemented to fix customer performance issue
2774 __kmp_itt_lock_releasing((kmp_user_lock_p
)user_lock
);
2775 #endif /* USE_ITT_BUILD */
2776 TCW_4(((kmp_user_lock_p
)user_lock
)->tas
.lk
.poll
, 0);
2779 #if OMPT_SUPPORT && OMPT_OPTIONAL
2780 // This is the case, if called from omp_init_lock_with_hint:
2781 void *codeptr
= OMPT_LOAD_RETURN_ADDRESS(gtid
);
2783 codeptr
= OMPT_GET_RETURN_ADDRESS(0);
2784 if (ompt_enabled
.ompt_callback_mutex_released
) {
2785 ompt_callbacks
.ompt_callback(ompt_callback_mutex_released
)(
2786 ompt_mutex_lock
, (ompt_wait_id_t
)(uintptr_t)lck
, codeptr
);
2792 lck
= (kmp_user_lock_p
)user_lock
;
2796 else if ((__kmp_user_lock_kind
== lk_futex
) &&
2797 (sizeof(lck
->futex
.lk
.poll
) <= OMP_LOCK_T_SIZE
)) {
2798 lck
= (kmp_user_lock_p
)user_lock
;
2802 lck
= __kmp_lookup_user_lock(user_lock
, "omp_unset_lock");
2806 __kmp_itt_lock_releasing(lck
);
2807 #endif /* USE_ITT_BUILD */
2809 RELEASE_LOCK(lck
, gtid
);
2811 #if OMPT_SUPPORT && OMPT_OPTIONAL
2812 // This is the case, if called from omp_init_lock_with_hint:
2813 void *codeptr
= OMPT_LOAD_RETURN_ADDRESS(gtid
);
2815 codeptr
= OMPT_GET_RETURN_ADDRESS(0);
2816 if (ompt_enabled
.ompt_callback_mutex_released
) {
2817 ompt_callbacks
.ompt_callback(ompt_callback_mutex_released
)(
2818 ompt_mutex_lock
, (ompt_wait_id_t
)(uintptr_t)lck
, codeptr
);
2822 #endif // KMP_USE_DYNAMIC_LOCK
2825 /* release the lock */
2826 void __kmpc_unset_nest_lock(ident_t
*loc
, kmp_int32 gtid
, void **user_lock
) {
2827 #if KMP_USE_DYNAMIC_LOCK
2830 __kmp_itt_lock_releasing((kmp_user_lock_p
)user_lock
);
2832 int release_status
=
2833 KMP_D_LOCK_FUNC(user_lock
, unset
)((kmp_dyna_lock_t
*)user_lock
, gtid
);
2834 (void) release_status
;
2836 #if OMPT_SUPPORT && OMPT_OPTIONAL
2837 // This is the case, if called from omp_init_lock_with_hint:
2838 void *codeptr
= OMPT_LOAD_RETURN_ADDRESS(gtid
);
2840 codeptr
= OMPT_GET_RETURN_ADDRESS(0);
2841 if (ompt_enabled
.enabled
) {
2842 if (release_status
== KMP_LOCK_RELEASED
) {
2843 if (ompt_enabled
.ompt_callback_mutex_released
) {
2844 // release_lock_last
2845 ompt_callbacks
.ompt_callback(ompt_callback_mutex_released
)(
2846 ompt_mutex_nest_lock
, (ompt_wait_id_t
)(uintptr_t)user_lock
,
2849 } else if (ompt_enabled
.ompt_callback_nest_lock
) {
2850 // release_lock_prev
2851 ompt_callbacks
.ompt_callback(ompt_callback_nest_lock
)(
2852 ompt_scope_end
, (ompt_wait_id_t
)(uintptr_t)user_lock
, codeptr
);
2857 #else // KMP_USE_DYNAMIC_LOCK
2859 kmp_user_lock_p lck
;
2861 /* Can't use serial interval since not block structured */
2863 if ((__kmp_user_lock_kind
== lk_tas
) &&
2864 (sizeof(lck
->tas
.lk
.poll
) + sizeof(lck
->tas
.lk
.depth_locked
) <=
2865 OMP_NEST_LOCK_T_SIZE
)) {
2866 #if KMP_OS_LINUX && \
2867 (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64)
2868 // "fast" path implemented to fix customer performance issue
2869 kmp_tas_lock_t
*tl
= (kmp_tas_lock_t
*)user_lock
;
2871 __kmp_itt_lock_releasing((kmp_user_lock_p
)user_lock
);
2872 #endif /* USE_ITT_BUILD */
2874 #if OMPT_SUPPORT && OMPT_OPTIONAL
2875 int release_status
= KMP_LOCK_STILL_HELD
;
2878 if (--(tl
->lk
.depth_locked
) == 0) {
2879 TCW_4(tl
->lk
.poll
, 0);
2880 #if OMPT_SUPPORT && OMPT_OPTIONAL
2881 release_status
= KMP_LOCK_RELEASED
;
2886 #if OMPT_SUPPORT && OMPT_OPTIONAL
2887 // This is the case, if called from omp_init_lock_with_hint:
2888 void *codeptr
= OMPT_LOAD_RETURN_ADDRESS(gtid
);
2890 codeptr
= OMPT_GET_RETURN_ADDRESS(0);
2891 if (ompt_enabled
.enabled
) {
2892 if (release_status
== KMP_LOCK_RELEASED
) {
2893 if (ompt_enabled
.ompt_callback_mutex_released
) {
2894 // release_lock_last
2895 ompt_callbacks
.ompt_callback(ompt_callback_mutex_released
)(
2896 ompt_mutex_nest_lock
, (ompt_wait_id_t
)(uintptr_t)lck
, codeptr
);
2898 } else if (ompt_enabled
.ompt_callback_nest_lock
) {
2899 // release_lock_previous
2900 ompt_callbacks
.ompt_callback(ompt_callback_nest_lock
)(
2901 ompt_mutex_scope_end
, (ompt_wait_id_t
)(uintptr_t)lck
, codeptr
);
2908 lck
= (kmp_user_lock_p
)user_lock
;
2912 else if ((__kmp_user_lock_kind
== lk_futex
) &&
2913 (sizeof(lck
->futex
.lk
.poll
) + sizeof(lck
->futex
.lk
.depth_locked
) <=
2914 OMP_NEST_LOCK_T_SIZE
)) {
2915 lck
= (kmp_user_lock_p
)user_lock
;
2919 lck
= __kmp_lookup_user_lock(user_lock
, "omp_unset_nest_lock");
2923 __kmp_itt_lock_releasing(lck
);
2924 #endif /* USE_ITT_BUILD */
2927 release_status
= RELEASE_NESTED_LOCK(lck
, gtid
);
2928 #if OMPT_SUPPORT && OMPT_OPTIONAL
2929 // This is the case, if called from omp_init_lock_with_hint:
2930 void *codeptr
= OMPT_LOAD_RETURN_ADDRESS(gtid
);
2932 codeptr
= OMPT_GET_RETURN_ADDRESS(0);
2933 if (ompt_enabled
.enabled
) {
2934 if (release_status
== KMP_LOCK_RELEASED
) {
2935 if (ompt_enabled
.ompt_callback_mutex_released
) {
2936 // release_lock_last
2937 ompt_callbacks
.ompt_callback(ompt_callback_mutex_released
)(
2938 ompt_mutex_nest_lock
, (ompt_wait_id_t
)(uintptr_t)lck
, codeptr
);
2940 } else if (ompt_enabled
.ompt_callback_nest_lock
) {
2941 // release_lock_previous
2942 ompt_callbacks
.ompt_callback(ompt_callback_nest_lock
)(
2943 ompt_mutex_scope_end
, (ompt_wait_id_t
)(uintptr_t)lck
, codeptr
);
2948 #endif // KMP_USE_DYNAMIC_LOCK
2951 /* try to acquire the lock */
2952 int __kmpc_test_lock(ident_t
*loc
, kmp_int32 gtid
, void **user_lock
) {
2953 KMP_COUNT_BLOCK(OMP_test_lock
);
2955 #if KMP_USE_DYNAMIC_LOCK
2957 int tag
= KMP_EXTRACT_D_TAG(user_lock
);
2959 __kmp_itt_lock_acquiring((kmp_user_lock_p
)user_lock
);
2961 #if OMPT_SUPPORT && OMPT_OPTIONAL
2962 // This is the case, if called from omp_init_lock_with_hint:
2963 void *codeptr
= OMPT_LOAD_RETURN_ADDRESS(gtid
);
2965 codeptr
= OMPT_GET_RETURN_ADDRESS(0);
2966 if (ompt_enabled
.ompt_callback_mutex_acquire
) {
2967 ompt_callbacks
.ompt_callback(ompt_callback_mutex_acquire
)(
2968 ompt_mutex_lock
, omp_lock_hint_none
,
2969 __ompt_get_mutex_impl_type(user_lock
),
2970 (ompt_wait_id_t
)(uintptr_t)user_lock
, codeptr
);
2973 #if KMP_USE_INLINED_TAS
2974 if (tag
== locktag_tas
&& !__kmp_env_consistency_check
) {
2975 KMP_TEST_TAS_LOCK(user_lock
, gtid
, rc
);
2977 #elif KMP_USE_INLINED_FUTEX
2978 if (tag
== locktag_futex
&& !__kmp_env_consistency_check
) {
2979 KMP_TEST_FUTEX_LOCK(user_lock
, gtid
, rc
);
2983 rc
= __kmp_direct_test
[tag
]((kmp_dyna_lock_t
*)user_lock
, gtid
);
2987 __kmp_itt_lock_acquired((kmp_user_lock_p
)user_lock
);
2989 #if OMPT_SUPPORT && OMPT_OPTIONAL
2990 if (ompt_enabled
.ompt_callback_mutex_acquired
) {
2991 ompt_callbacks
.ompt_callback(ompt_callback_mutex_acquired
)(
2992 ompt_mutex_lock
, (ompt_wait_id_t
)(uintptr_t)user_lock
, codeptr
);
2998 __kmp_itt_lock_cancelled((kmp_user_lock_p
)user_lock
);
3003 #else // KMP_USE_DYNAMIC_LOCK
3005 kmp_user_lock_p lck
;
3008 if ((__kmp_user_lock_kind
== lk_tas
) &&
3009 (sizeof(lck
->tas
.lk
.poll
) <= OMP_LOCK_T_SIZE
)) {
3010 lck
= (kmp_user_lock_p
)user_lock
;
3013 else if ((__kmp_user_lock_kind
== lk_futex
) &&
3014 (sizeof(lck
->futex
.lk
.poll
) <= OMP_LOCK_T_SIZE
)) {
3015 lck
= (kmp_user_lock_p
)user_lock
;
3019 lck
= __kmp_lookup_user_lock(user_lock
, "omp_test_lock");
3023 __kmp_itt_lock_acquiring(lck
);
3024 #endif /* USE_ITT_BUILD */
3025 #if OMPT_SUPPORT && OMPT_OPTIONAL
3026 // This is the case, if called from omp_init_lock_with_hint:
3027 void *codeptr
= OMPT_LOAD_RETURN_ADDRESS(gtid
);
3029 codeptr
= OMPT_GET_RETURN_ADDRESS(0);
3030 if (ompt_enabled
.ompt_callback_mutex_acquire
) {
3031 ompt_callbacks
.ompt_callback(ompt_callback_mutex_acquire
)(
3032 ompt_mutex_lock
, omp_lock_hint_none
, __ompt_get_mutex_impl_type(),
3033 (ompt_wait_id_t
)(uintptr_t)lck
, codeptr
);
3037 rc
= TEST_LOCK(lck
, gtid
);
3040 __kmp_itt_lock_acquired(lck
);
3042 __kmp_itt_lock_cancelled(lck
);
3044 #endif /* USE_ITT_BUILD */
3045 #if OMPT_SUPPORT && OMPT_OPTIONAL
3046 if (rc
&& ompt_enabled
.ompt_callback_mutex_acquired
) {
3047 ompt_callbacks
.ompt_callback(ompt_callback_mutex_acquired
)(
3048 ompt_mutex_lock
, (ompt_wait_id_t
)(uintptr_t)lck
, codeptr
);
3052 return (rc
? FTN_TRUE
: FTN_FALSE
);
3054 /* Can't use serial interval since not block structured */
3056 #endif // KMP_USE_DYNAMIC_LOCK
3059 /* try to acquire the lock */
3060 int __kmpc_test_nest_lock(ident_t
*loc
, kmp_int32 gtid
, void **user_lock
) {
3061 #if KMP_USE_DYNAMIC_LOCK
3064 __kmp_itt_lock_acquiring((kmp_user_lock_p
)user_lock
);
3066 #if OMPT_SUPPORT && OMPT_OPTIONAL
3067 // This is the case, if called from omp_init_lock_with_hint:
3068 void *codeptr
= OMPT_LOAD_RETURN_ADDRESS(gtid
);
3070 codeptr
= OMPT_GET_RETURN_ADDRESS(0);
3071 if (ompt_enabled
.ompt_callback_mutex_acquire
) {
3072 ompt_callbacks
.ompt_callback(ompt_callback_mutex_acquire
)(
3073 ompt_mutex_nest_lock
, omp_lock_hint_none
,
3074 __ompt_get_mutex_impl_type(user_lock
),
3075 (ompt_wait_id_t
)(uintptr_t)user_lock
, codeptr
);
3078 rc
= KMP_D_LOCK_FUNC(user_lock
, test
)((kmp_dyna_lock_t
*)user_lock
, gtid
);
3081 __kmp_itt_lock_acquired((kmp_user_lock_p
)user_lock
);
3083 __kmp_itt_lock_cancelled((kmp_user_lock_p
)user_lock
);
3086 #if OMPT_SUPPORT && OMPT_OPTIONAL
3087 if (ompt_enabled
.enabled
&& rc
) {
3089 if (ompt_enabled
.ompt_callback_mutex_acquired
) {
3091 ompt_callbacks
.ompt_callback(ompt_callback_mutex_acquired
)(
3092 ompt_mutex_nest_lock
, (ompt_wait_id_t
)(uintptr_t)user_lock
,
3096 if (ompt_enabled
.ompt_callback_nest_lock
) {
3098 ompt_callbacks
.ompt_callback(ompt_callback_nest_lock
)(
3099 ompt_scope_begin
, (ompt_wait_id_t
)(uintptr_t)user_lock
, codeptr
);
3106 #else // KMP_USE_DYNAMIC_LOCK
3108 kmp_user_lock_p lck
;
3111 if ((__kmp_user_lock_kind
== lk_tas
) &&
3112 (sizeof(lck
->tas
.lk
.poll
) + sizeof(lck
->tas
.lk
.depth_locked
) <=
3113 OMP_NEST_LOCK_T_SIZE
)) {
3114 lck
= (kmp_user_lock_p
)user_lock
;
3117 else if ((__kmp_user_lock_kind
== lk_futex
) &&
3118 (sizeof(lck
->futex
.lk
.poll
) + sizeof(lck
->futex
.lk
.depth_locked
) <=
3119 OMP_NEST_LOCK_T_SIZE
)) {
3120 lck
= (kmp_user_lock_p
)user_lock
;
3124 lck
= __kmp_lookup_user_lock(user_lock
, "omp_test_nest_lock");
3128 __kmp_itt_lock_acquiring(lck
);
3129 #endif /* USE_ITT_BUILD */
3131 #if OMPT_SUPPORT && OMPT_OPTIONAL
3132 // This is the case, if called from omp_init_lock_with_hint:
3133 void *codeptr
= OMPT_LOAD_RETURN_ADDRESS(gtid
);
3135 codeptr
= OMPT_GET_RETURN_ADDRESS(0);
3136 if (ompt_enabled
.enabled
) &&
3137 ompt_enabled
.ompt_callback_mutex_acquire
) {
3138 ompt_callbacks
.ompt_callback(ompt_callback_mutex_acquire
)(
3139 ompt_mutex_nest_lock
, omp_lock_hint_none
,
3140 __ompt_get_mutex_impl_type(), (ompt_wait_id_t
)(uintptr_t)lck
,
3145 rc
= TEST_NESTED_LOCK(lck
, gtid
);
3148 __kmp_itt_lock_acquired(lck
);
3150 __kmp_itt_lock_cancelled(lck
);
3152 #endif /* USE_ITT_BUILD */
3153 #if OMPT_SUPPORT && OMPT_OPTIONAL
3154 if (ompt_enabled
.enabled
&& rc
) {
3156 if (ompt_enabled
.ompt_callback_mutex_acquired
) {
3158 ompt_callbacks
.ompt_callback(ompt_callback_mutex_acquired
)(
3159 ompt_mutex_nest_lock
, (ompt_wait_id_t
)(uintptr_t)lck
, codeptr
);
3162 if (ompt_enabled
.ompt_callback_nest_lock
) {
3164 ompt_callbacks
.ompt_callback(ompt_callback_nest_lock
)(
3165 ompt_mutex_scope_begin
, (ompt_wait_id_t
)(uintptr_t)lck
, codeptr
);
3172 /* Can't use serial interval since not block structured */
3174 #endif // KMP_USE_DYNAMIC_LOCK
3177 // Interface to fast scalable reduce methods routines
3179 // keep the selected method in a thread local structure for cross-function
3180 // usage: will be used in __kmpc_end_reduce* functions;
3181 // another solution: to re-determine the method one more time in
3182 // __kmpc_end_reduce* functions (new prototype required then)
3183 // AT: which solution is better?
3184 #define __KMP_SET_REDUCTION_METHOD(gtid, rmethod) \
3185 ((__kmp_threads[(gtid)]->th.th_local.packed_reduction_method) = (rmethod))
3187 #define __KMP_GET_REDUCTION_METHOD(gtid) \
3188 (__kmp_threads[(gtid)]->th.th_local.packed_reduction_method)
3190 // description of the packed_reduction_method variable: look at the macros in
3193 // used in a critical section reduce block
3194 static __forceinline
void
3195 __kmp_enter_critical_section_reduce_block(ident_t
*loc
, kmp_int32 global_tid
,
3196 kmp_critical_name
*crit
) {
3198 // this lock was visible to a customer and to the threading profile tool as a
3199 // serial overhead span (although it's used for an internal purpose only)
3200 // why was it visible in previous implementation?
3201 // should we keep it visible in new reduce block?
3202 kmp_user_lock_p lck
;
3204 #if KMP_USE_DYNAMIC_LOCK
3206 kmp_dyna_lock_t
*lk
= (kmp_dyna_lock_t
*)crit
;
3207 // Check if it is initialized.
3209 if (KMP_IS_D_LOCK(__kmp_user_lock_seq
)) {
3210 KMP_COMPARE_AND_STORE_ACQ32((volatile kmp_int32
*)crit
, 0,
3211 KMP_GET_D_TAG(__kmp_user_lock_seq
));
3213 __kmp_init_indirect_csptr(crit
, loc
, global_tid
,
3214 KMP_GET_I_TAG(__kmp_user_lock_seq
));
3217 // Branch for accessing the actual lock object and set operation. This
3218 // branching is inevitable since this lock initialization does not follow the
3219 // normal dispatch path (lock table is not used).
3220 if (KMP_EXTRACT_D_TAG(lk
) != 0) {
3221 lck
= (kmp_user_lock_p
)lk
;
3222 KMP_DEBUG_ASSERT(lck
!= NULL
);
3223 if (__kmp_env_consistency_check
) {
3224 __kmp_push_sync(global_tid
, ct_critical
, loc
, lck
, __kmp_user_lock_seq
);
3226 KMP_D_LOCK_FUNC(lk
, set
)(lk
, global_tid
);
3228 kmp_indirect_lock_t
*ilk
= *((kmp_indirect_lock_t
**)lk
);
3230 KMP_DEBUG_ASSERT(lck
!= NULL
);
3231 if (__kmp_env_consistency_check
) {
3232 __kmp_push_sync(global_tid
, ct_critical
, loc
, lck
, __kmp_user_lock_seq
);
3234 KMP_I_LOCK_FUNC(ilk
, set
)(lck
, global_tid
);
3237 #else // KMP_USE_DYNAMIC_LOCK
3239 // We know that the fast reduction code is only emitted by Intel compilers
3240 // with 32 byte critical sections. If there isn't enough space, then we
3241 // have to use a pointer.
3242 if (__kmp_base_user_lock_size
<= INTEL_CRITICAL_SIZE
) {
3243 lck
= (kmp_user_lock_p
)crit
;
3245 lck
= __kmp_get_critical_section_ptr(crit
, loc
, global_tid
);
3247 KMP_DEBUG_ASSERT(lck
!= NULL
);
3249 if (__kmp_env_consistency_check
)
3250 __kmp_push_sync(global_tid
, ct_critical
, loc
, lck
);
3252 __kmp_acquire_user_lock_with_checks(lck
, global_tid
);
3254 #endif // KMP_USE_DYNAMIC_LOCK
3257 // used in a critical section reduce block
3258 static __forceinline
void
3259 __kmp_end_critical_section_reduce_block(ident_t
*loc
, kmp_int32 global_tid
,
3260 kmp_critical_name
*crit
) {
3262 kmp_user_lock_p lck
;
3264 #if KMP_USE_DYNAMIC_LOCK
3266 if (KMP_IS_D_LOCK(__kmp_user_lock_seq
)) {
3267 lck
= (kmp_user_lock_p
)crit
;
3268 if (__kmp_env_consistency_check
)
3269 __kmp_pop_sync(global_tid
, ct_critical
, loc
);
3270 KMP_D_LOCK_FUNC(lck
, unset
)((kmp_dyna_lock_t
*)lck
, global_tid
);
3272 kmp_indirect_lock_t
*ilk
=
3273 (kmp_indirect_lock_t
*)TCR_PTR(*((kmp_indirect_lock_t
**)crit
));
3274 if (__kmp_env_consistency_check
)
3275 __kmp_pop_sync(global_tid
, ct_critical
, loc
);
3276 KMP_I_LOCK_FUNC(ilk
, unset
)(ilk
->lock
, global_tid
);
3279 #else // KMP_USE_DYNAMIC_LOCK
3281 // We know that the fast reduction code is only emitted by Intel compilers
3282 // with 32 byte critical sections. If there isn't enough space, then we have
3283 // to use a pointer.
3284 if (__kmp_base_user_lock_size
> 32) {
3285 lck
= *((kmp_user_lock_p
*)crit
);
3286 KMP_ASSERT(lck
!= NULL
);
3288 lck
= (kmp_user_lock_p
)crit
;
3291 if (__kmp_env_consistency_check
)
3292 __kmp_pop_sync(global_tid
, ct_critical
, loc
);
3294 __kmp_release_user_lock_with_checks(lck
, global_tid
);
3296 #endif // KMP_USE_DYNAMIC_LOCK
3297 } // __kmp_end_critical_section_reduce_block
3299 static __forceinline
int
3300 __kmp_swap_teams_for_teams_reduction(kmp_info_t
*th
, kmp_team_t
**team_p
,
3304 // Check if we are inside the teams construct?
3305 if (th
->th
.th_teams_microtask
) {
3306 *team_p
= team
= th
->th
.th_team
;
3307 if (team
->t
.t_level
== th
->th
.th_teams_level
) {
3308 // This is reduction at teams construct.
3309 KMP_DEBUG_ASSERT(!th
->th
.th_info
.ds
.ds_tid
); // AC: check that tid == 0
3310 // Let's swap teams temporarily for the reduction.
3311 th
->th
.th_info
.ds
.ds_tid
= team
->t
.t_master_tid
;
3312 th
->th
.th_team
= team
->t
.t_parent
;
3313 th
->th
.th_team_nproc
= th
->th
.th_team
->t
.t_nproc
;
3314 th
->th
.th_task_team
= th
->th
.th_team
->t
.t_task_team
[0];
3315 *task_state
= th
->th
.th_task_state
;
3316 th
->th
.th_task_state
= 0;
3324 static __forceinline
void
3325 __kmp_restore_swapped_teams(kmp_info_t
*th
, kmp_team_t
*team
, int task_state
) {
3326 // Restore thread structure swapped in __kmp_swap_teams_for_teams_reduction.
3327 th
->th
.th_info
.ds
.ds_tid
= 0;
3328 th
->th
.th_team
= team
;
3329 th
->th
.th_team_nproc
= team
->t
.t_nproc
;
3330 th
->th
.th_task_team
= team
->t
.t_task_team
[task_state
];
3331 th
->th
.th_task_state
= task_state
;
3334 /* 2.a.i. Reduce Block without a terminating barrier */
3336 @ingroup SYNCHRONIZATION
3337 @param loc source location information
3338 @param global_tid global thread number
3339 @param num_vars number of items (variables) to be reduced
3340 @param reduce_size size of data in bytes to be reduced
3341 @param reduce_data pointer to data to be reduced
3342 @param reduce_func callback function providing reduction operation on two
3343 operands and returning result of reduction in lhs_data
3344 @param lck pointer to the unique lock data structure
3345 @result 1 for the master thread, 0 for all other team threads, 2 for all team
3346 threads if atomic reduction needed
3348 The nowait version is used for a reduce clause with the nowait argument.
3351 __kmpc_reduce_nowait(ident_t
*loc
, kmp_int32 global_tid
, kmp_int32 num_vars
,
3352 size_t reduce_size
, void *reduce_data
,
3353 void (*reduce_func
)(void *lhs_data
, void *rhs_data
),
3354 kmp_critical_name
*lck
) {
3356 KMP_COUNT_BLOCK(REDUCE_nowait
);
3358 PACKED_REDUCTION_METHOD_T packed_reduction_method
;
3361 int teams_swapped
= 0, task_state
;
3362 KA_TRACE(10, ("__kmpc_reduce_nowait() enter: called T#%d\n", global_tid
));
3363 __kmp_assert_valid_gtid(global_tid
);
3365 // why do we need this initialization here at all?
3366 // Reduction clause can not be used as a stand-alone directive.
3368 // do not call __kmp_serial_initialize(), it will be called by
3369 // __kmp_parallel_initialize() if needed
3370 // possible detection of false-positive race by the threadchecker ???
3371 if (!TCR_4(__kmp_init_parallel
))
3372 __kmp_parallel_initialize();
3374 __kmp_resume_if_soft_paused();
3376 // check correctness of reduce block nesting
3377 #if KMP_USE_DYNAMIC_LOCK
3378 if (__kmp_env_consistency_check
)
3379 __kmp_push_sync(global_tid
, ct_reduce
, loc
, NULL
, 0);
3381 if (__kmp_env_consistency_check
)
3382 __kmp_push_sync(global_tid
, ct_reduce
, loc
, NULL
);
3385 th
= __kmp_thread_from_gtid(global_tid
);
3386 teams_swapped
= __kmp_swap_teams_for_teams_reduction(th
, &team
, &task_state
);
3388 // packed_reduction_method value will be reused by __kmp_end_reduce* function,
3389 // the value should be kept in a variable
3390 // the variable should be either a construct-specific or thread-specific
3391 // property, not a team specific property
3392 // (a thread can reach the next reduce block on the next construct, reduce
3393 // method may differ on the next construct)
3394 // an ident_t "loc" parameter could be used as a construct-specific property
3395 // (what if loc == 0?)
3396 // (if both construct-specific and team-specific variables were shared,
3397 // then unness extra syncs should be needed)
3398 // a thread-specific variable is better regarding two issues above (next
3399 // construct and extra syncs)
3400 // a thread-specific "th_local.reduction_method" variable is used currently
3401 // each thread executes 'determine' and 'set' lines (no need to execute by one
3402 // thread, to avoid unness extra syncs)
3404 packed_reduction_method
= __kmp_determine_reduction_method(
3405 loc
, global_tid
, num_vars
, reduce_size
, reduce_data
, reduce_func
, lck
);
3406 __KMP_SET_REDUCTION_METHOD(global_tid
, packed_reduction_method
);
3408 OMPT_REDUCTION_DECL(th
, global_tid
);
3409 if (packed_reduction_method
== critical_reduce_block
) {
3411 OMPT_REDUCTION_BEGIN
;
3413 __kmp_enter_critical_section_reduce_block(loc
, global_tid
, lck
);
3416 } else if (packed_reduction_method
== empty_reduce_block
) {
3418 OMPT_REDUCTION_BEGIN
;
3420 // usage: if team size == 1, no synchronization is required ( Intel
3424 } else if (packed_reduction_method
== atomic_reduce_block
) {
3428 // all threads should do this pop here (because __kmpc_end_reduce_nowait()
3429 // won't be called by the code gen)
3430 // (it's not quite good, because the checking block has been closed by
3432 // but atomic operation has not been executed yet, will be executed
3433 // slightly later, literally on next instruction)
3434 if (__kmp_env_consistency_check
)
3435 __kmp_pop_sync(global_tid
, ct_reduce
, loc
);
3437 } else if (TEST_REDUCTION_METHOD(packed_reduction_method
,
3438 tree_reduce_block
)) {
3440 // AT: performance issue: a real barrier here
3441 // AT: (if master goes slow, other threads are blocked here waiting for the
3442 // master to come and release them)
3443 // AT: (it's not what a customer might expect specifying NOWAIT clause)
3444 // AT: (specifying NOWAIT won't result in improvement of performance, it'll
3445 // be confusing to a customer)
3446 // AT: another implementation of *barrier_gather*nowait() (or some other design)
3447 // might go faster and be more in line with sense of NOWAIT
3448 // AT: TO DO: do epcc test and compare times
3450 // this barrier should be invisible to a customer and to the threading profile
3451 // tool (it's neither a terminating barrier nor customer's code, it's
3452 // used for an internal purpose)
3454 // JP: can this barrier potentially leed to task scheduling?
3455 // JP: as long as there is a barrier in the implementation, OMPT should and
3456 // will provide the barrier events
3457 // so we set-up the necessary frame/return addresses.
3458 ompt_frame_t
*ompt_frame
;
3459 if (ompt_enabled
.enabled
) {
3460 __ompt_get_task_info_internal(0, NULL
, NULL
, &ompt_frame
, NULL
, NULL
);
3461 if (ompt_frame
->enter_frame
.ptr
== NULL
)
3462 ompt_frame
->enter_frame
.ptr
= OMPT_GET_FRAME_ADDRESS(0);
3463 OMPT_STORE_RETURN_ADDRESS(global_tid
);
3467 __kmp_threads
[global_tid
]->th
.th_ident
= loc
;
3470 __kmp_barrier(UNPACK_REDUCTION_BARRIER(packed_reduction_method
),
3471 global_tid
, FALSE
, reduce_size
, reduce_data
, reduce_func
);
3472 retval
= (retval
!= 0) ? (0) : (1);
3473 #if OMPT_SUPPORT && OMPT_OPTIONAL
3474 if (ompt_enabled
.enabled
) {
3475 ompt_frame
->enter_frame
= ompt_data_none
;
3479 // all other workers except master should do this pop here
3480 // ( none of other workers will get to __kmpc_end_reduce_nowait() )
3481 if (__kmp_env_consistency_check
) {
3483 __kmp_pop_sync(global_tid
, ct_reduce
, loc
);
3489 // should never reach this block
3490 KMP_ASSERT(0); // "unexpected method"
3492 if (teams_swapped
) {
3493 __kmp_restore_swapped_teams(th
, team
, task_state
);
3497 ("__kmpc_reduce_nowait() exit: called T#%d: method %08x, returns %08x\n",
3498 global_tid
, packed_reduction_method
, retval
));
3504 @ingroup SYNCHRONIZATION
3505 @param loc source location information
3506 @param global_tid global thread id.
3507 @param lck pointer to the unique lock data structure
3509 Finish the execution of a reduce nowait.
3511 void __kmpc_end_reduce_nowait(ident_t
*loc
, kmp_int32 global_tid
,
3512 kmp_critical_name
*lck
) {
3514 PACKED_REDUCTION_METHOD_T packed_reduction_method
;
3516 KA_TRACE(10, ("__kmpc_end_reduce_nowait() enter: called T#%d\n", global_tid
));
3517 __kmp_assert_valid_gtid(global_tid
);
3519 packed_reduction_method
= __KMP_GET_REDUCTION_METHOD(global_tid
);
3521 OMPT_REDUCTION_DECL(__kmp_thread_from_gtid(global_tid
), global_tid
);
3523 if (packed_reduction_method
== critical_reduce_block
) {
3525 __kmp_end_critical_section_reduce_block(loc
, global_tid
, lck
);
3528 } else if (packed_reduction_method
== empty_reduce_block
) {
3530 // usage: if team size == 1, no synchronization is required ( on Intel
3535 } else if (packed_reduction_method
== atomic_reduce_block
) {
3537 // neither master nor other workers should get here
3538 // (code gen does not generate this call in case 2: atomic reduce block)
3539 // actually it's better to remove this elseif at all;
3540 // after removal this value will checked by the 'else' and will assert
3542 } else if (TEST_REDUCTION_METHOD(packed_reduction_method
,
3543 tree_reduce_block
)) {
3545 // only master gets here
3546 // OMPT: tree reduction is annotated in the barrier code
3550 // should never reach this block
3551 KMP_ASSERT(0); // "unexpected method"
3554 if (__kmp_env_consistency_check
)
3555 __kmp_pop_sync(global_tid
, ct_reduce
, loc
);
3557 KA_TRACE(10, ("__kmpc_end_reduce_nowait() exit: called T#%d: method %08x\n",
3558 global_tid
, packed_reduction_method
));
3563 /* 2.a.ii. Reduce Block with a terminating barrier */
3566 @ingroup SYNCHRONIZATION
3567 @param loc source location information
3568 @param global_tid global thread number
3569 @param num_vars number of items (variables) to be reduced
3570 @param reduce_size size of data in bytes to be reduced
3571 @param reduce_data pointer to data to be reduced
3572 @param reduce_func callback function providing reduction operation on two
3573 operands and returning result of reduction in lhs_data
3574 @param lck pointer to the unique lock data structure
3575 @result 1 for the master thread, 0 for all other team threads, 2 for all team
3576 threads if atomic reduction needed
3578 A blocking reduce that includes an implicit barrier.
3580 kmp_int32
__kmpc_reduce(ident_t
*loc
, kmp_int32 global_tid
, kmp_int32 num_vars
,
3581 size_t reduce_size
, void *reduce_data
,
3582 void (*reduce_func
)(void *lhs_data
, void *rhs_data
),
3583 kmp_critical_name
*lck
) {
3584 KMP_COUNT_BLOCK(REDUCE_wait
);
3586 PACKED_REDUCTION_METHOD_T packed_reduction_method
;
3589 int teams_swapped
= 0, task_state
;
3591 KA_TRACE(10, ("__kmpc_reduce() enter: called T#%d\n", global_tid
));
3592 __kmp_assert_valid_gtid(global_tid
);
3594 // why do we need this initialization here at all?
3595 // Reduction clause can not be a stand-alone directive.
3597 // do not call __kmp_serial_initialize(), it will be called by
3598 // __kmp_parallel_initialize() if needed
3599 // possible detection of false-positive race by the threadchecker ???
3600 if (!TCR_4(__kmp_init_parallel
))
3601 __kmp_parallel_initialize();
3603 __kmp_resume_if_soft_paused();
3605 // check correctness of reduce block nesting
3606 #if KMP_USE_DYNAMIC_LOCK
3607 if (__kmp_env_consistency_check
)
3608 __kmp_push_sync(global_tid
, ct_reduce
, loc
, NULL
, 0);
3610 if (__kmp_env_consistency_check
)
3611 __kmp_push_sync(global_tid
, ct_reduce
, loc
, NULL
);
3614 th
= __kmp_thread_from_gtid(global_tid
);
3615 teams_swapped
= __kmp_swap_teams_for_teams_reduction(th
, &team
, &task_state
);
3617 packed_reduction_method
= __kmp_determine_reduction_method(
3618 loc
, global_tid
, num_vars
, reduce_size
, reduce_data
, reduce_func
, lck
);
3619 __KMP_SET_REDUCTION_METHOD(global_tid
, packed_reduction_method
);
3621 OMPT_REDUCTION_DECL(th
, global_tid
);
3623 if (packed_reduction_method
== critical_reduce_block
) {
3625 OMPT_REDUCTION_BEGIN
;
3626 __kmp_enter_critical_section_reduce_block(loc
, global_tid
, lck
);
3629 } else if (packed_reduction_method
== empty_reduce_block
) {
3631 OMPT_REDUCTION_BEGIN
;
3632 // usage: if team size == 1, no synchronization is required ( Intel
3636 } else if (packed_reduction_method
== atomic_reduce_block
) {
3640 } else if (TEST_REDUCTION_METHOD(packed_reduction_method
,
3641 tree_reduce_block
)) {
3643 // case tree_reduce_block:
3644 // this barrier should be visible to a customer and to the threading profile
3645 // tool (it's a terminating barrier on constructs if NOWAIT not specified)
3647 ompt_frame_t
*ompt_frame
;
3648 if (ompt_enabled
.enabled
) {
3649 __ompt_get_task_info_internal(0, NULL
, NULL
, &ompt_frame
, NULL
, NULL
);
3650 if (ompt_frame
->enter_frame
.ptr
== NULL
)
3651 ompt_frame
->enter_frame
.ptr
= OMPT_GET_FRAME_ADDRESS(0);
3652 OMPT_STORE_RETURN_ADDRESS(global_tid
);
3656 __kmp_threads
[global_tid
]->th
.th_ident
=
3657 loc
; // needed for correct notification of frames
3660 __kmp_barrier(UNPACK_REDUCTION_BARRIER(packed_reduction_method
),
3661 global_tid
, TRUE
, reduce_size
, reduce_data
, reduce_func
);
3662 retval
= (retval
!= 0) ? (0) : (1);
3663 #if OMPT_SUPPORT && OMPT_OPTIONAL
3664 if (ompt_enabled
.enabled
) {
3665 ompt_frame
->enter_frame
= ompt_data_none
;
3669 // all other workers except master should do this pop here
3670 // ( none of other workers except master will enter __kmpc_end_reduce() )
3671 if (__kmp_env_consistency_check
) {
3672 if (retval
== 0) { // 0: all other workers; 1: master
3673 __kmp_pop_sync(global_tid
, ct_reduce
, loc
);
3679 // should never reach this block
3680 KMP_ASSERT(0); // "unexpected method"
3682 if (teams_swapped
) {
3683 __kmp_restore_swapped_teams(th
, team
, task_state
);
3687 ("__kmpc_reduce() exit: called T#%d: method %08x, returns %08x\n",
3688 global_tid
, packed_reduction_method
, retval
));
3693 @ingroup SYNCHRONIZATION
3694 @param loc source location information
3695 @param global_tid global thread id.
3696 @param lck pointer to the unique lock data structure
3698 Finish the execution of a blocking reduce.
3699 The <tt>lck</tt> pointer must be the same as that used in the corresponding
3702 void __kmpc_end_reduce(ident_t
*loc
, kmp_int32 global_tid
,
3703 kmp_critical_name
*lck
) {
3705 PACKED_REDUCTION_METHOD_T packed_reduction_method
;
3708 int teams_swapped
= 0, task_state
;
3710 KA_TRACE(10, ("__kmpc_end_reduce() enter: called T#%d\n", global_tid
));
3711 __kmp_assert_valid_gtid(global_tid
);
3713 th
= __kmp_thread_from_gtid(global_tid
);
3714 teams_swapped
= __kmp_swap_teams_for_teams_reduction(th
, &team
, &task_state
);
3716 packed_reduction_method
= __KMP_GET_REDUCTION_METHOD(global_tid
);
3718 // this barrier should be visible to a customer and to the threading profile
3719 // tool (it's a terminating barrier on constructs if NOWAIT not specified)
3720 OMPT_REDUCTION_DECL(th
, global_tid
);
3722 if (packed_reduction_method
== critical_reduce_block
) {
3723 __kmp_end_critical_section_reduce_block(loc
, global_tid
, lck
);
3727 // TODO: implicit barrier: should be exposed
3729 ompt_frame_t
*ompt_frame
;
3730 if (ompt_enabled
.enabled
) {
3731 __ompt_get_task_info_internal(0, NULL
, NULL
, &ompt_frame
, NULL
, NULL
);
3732 if (ompt_frame
->enter_frame
.ptr
== NULL
)
3733 ompt_frame
->enter_frame
.ptr
= OMPT_GET_FRAME_ADDRESS(0);
3734 OMPT_STORE_RETURN_ADDRESS(global_tid
);
3738 __kmp_threads
[global_tid
]->th
.th_ident
= loc
;
3740 __kmp_barrier(bs_plain_barrier
, global_tid
, FALSE
, 0, NULL
, NULL
);
3741 #if OMPT_SUPPORT && OMPT_OPTIONAL
3742 if (ompt_enabled
.enabled
) {
3743 ompt_frame
->enter_frame
= ompt_data_none
;
3747 } else if (packed_reduction_method
== empty_reduce_block
) {
3751 // usage: if team size==1, no synchronization is required (Intel platforms only)
3753 // TODO: implicit barrier: should be exposed
3755 ompt_frame_t
*ompt_frame
;
3756 if (ompt_enabled
.enabled
) {
3757 __ompt_get_task_info_internal(0, NULL
, NULL
, &ompt_frame
, NULL
, NULL
);
3758 if (ompt_frame
->enter_frame
.ptr
== NULL
)
3759 ompt_frame
->enter_frame
.ptr
= OMPT_GET_FRAME_ADDRESS(0);
3760 OMPT_STORE_RETURN_ADDRESS(global_tid
);
3764 __kmp_threads
[global_tid
]->th
.th_ident
= loc
;
3766 __kmp_barrier(bs_plain_barrier
, global_tid
, FALSE
, 0, NULL
, NULL
);
3767 #if OMPT_SUPPORT && OMPT_OPTIONAL
3768 if (ompt_enabled
.enabled
) {
3769 ompt_frame
->enter_frame
= ompt_data_none
;
3773 } else if (packed_reduction_method
== atomic_reduce_block
) {
3776 ompt_frame_t
*ompt_frame
;
3777 if (ompt_enabled
.enabled
) {
3778 __ompt_get_task_info_internal(0, NULL
, NULL
, &ompt_frame
, NULL
, NULL
);
3779 if (ompt_frame
->enter_frame
.ptr
== NULL
)
3780 ompt_frame
->enter_frame
.ptr
= OMPT_GET_FRAME_ADDRESS(0);
3781 OMPT_STORE_RETURN_ADDRESS(global_tid
);
3784 // TODO: implicit barrier: should be exposed
3786 __kmp_threads
[global_tid
]->th
.th_ident
= loc
;
3788 __kmp_barrier(bs_plain_barrier
, global_tid
, FALSE
, 0, NULL
, NULL
);
3789 #if OMPT_SUPPORT && OMPT_OPTIONAL
3790 if (ompt_enabled
.enabled
) {
3791 ompt_frame
->enter_frame
= ompt_data_none
;
3795 } else if (TEST_REDUCTION_METHOD(packed_reduction_method
,
3796 tree_reduce_block
)) {
3798 // only master executes here (master releases all other workers)
3799 __kmp_end_split_barrier(UNPACK_REDUCTION_BARRIER(packed_reduction_method
),
3804 // should never reach this block
3805 KMP_ASSERT(0); // "unexpected method"
3807 if (teams_swapped
) {
3808 __kmp_restore_swapped_teams(th
, team
, task_state
);
3811 if (__kmp_env_consistency_check
)
3812 __kmp_pop_sync(global_tid
, ct_reduce
, loc
);
3814 KA_TRACE(10, ("__kmpc_end_reduce() exit: called T#%d: method %08x\n",
3815 global_tid
, packed_reduction_method
));
3820 #undef __KMP_GET_REDUCTION_METHOD
3821 #undef __KMP_SET_REDUCTION_METHOD
3823 /* end of interface to fast scalable reduce routines */
3825 kmp_uint64
__kmpc_get_taskid() {
3830 gtid
= __kmp_get_gtid();
3834 thread
= __kmp_thread_from_gtid(gtid
);
3835 return thread
->th
.th_current_task
->td_task_id
;
3837 } // __kmpc_get_taskid
3839 kmp_uint64
__kmpc_get_parent_taskid() {
3843 kmp_taskdata_t
*parent_task
;
3845 gtid
= __kmp_get_gtid();
3849 thread
= __kmp_thread_from_gtid(gtid
);
3850 parent_task
= thread
->th
.th_current_task
->td_parent
;
3851 return (parent_task
== NULL
? 0 : parent_task
->td_task_id
);
3853 } // __kmpc_get_parent_taskid
3856 @ingroup WORK_SHARING
3857 @param loc source location information.
3858 @param gtid global thread number.
3859 @param num_dims number of associated doacross loops.
3860 @param dims info on loops bounds.
3862 Initialize doacross loop information.
3863 Expect compiler send us inclusive bounds,
3864 e.g. for(i=2;i<9;i+=2) lo=2, up=8, st=2.
3866 void __kmpc_doacross_init(ident_t
*loc
, int gtid
, int num_dims
,
3867 const struct kmp_dim
*dims
) {
3868 __kmp_assert_valid_gtid(gtid
);
3870 kmp_int64 last
, trace_count
;
3871 kmp_info_t
*th
= __kmp_threads
[gtid
];
3872 kmp_team_t
*team
= th
->th
.th_team
;
3874 kmp_disp_t
*pr_buf
= th
->th
.th_dispatch
;
3875 dispatch_shared_info_t
*sh_buf
;
3879 ("__kmpc_doacross_init() enter: called T#%d, num dims %d, active %d\n",
3880 gtid
, num_dims
, !team
->t
.t_serialized
));
3881 KMP_DEBUG_ASSERT(dims
!= NULL
);
3882 KMP_DEBUG_ASSERT(num_dims
> 0);
3884 if (team
->t
.t_serialized
) {
3885 KA_TRACE(20, ("__kmpc_doacross_init() exit: serialized team\n"));
3886 return; // no dependencies if team is serialized
3888 KMP_DEBUG_ASSERT(team
->t
.t_nproc
> 1);
3889 idx
= pr_buf
->th_doacross_buf_idx
++; // Increment index of shared buffer for
3891 sh_buf
= &team
->t
.t_disp_buffer
[idx
% __kmp_dispatch_num_buffers
];
3893 // Save bounds info into allocated private buffer
3894 KMP_DEBUG_ASSERT(pr_buf
->th_doacross_info
== NULL
);
3895 pr_buf
->th_doacross_info
= (kmp_int64
*)__kmp_thread_malloc(
3896 th
, sizeof(kmp_int64
) * (4 * num_dims
+ 1));
3897 KMP_DEBUG_ASSERT(pr_buf
->th_doacross_info
!= NULL
);
3898 pr_buf
->th_doacross_info
[0] =
3899 (kmp_int64
)num_dims
; // first element is number of dimensions
3900 // Save also address of num_done in order to access it later without knowing
3902 pr_buf
->th_doacross_info
[1] = (kmp_int64
)&sh_buf
->doacross_num_done
;
3903 pr_buf
->th_doacross_info
[2] = dims
[0].lo
;
3904 pr_buf
->th_doacross_info
[3] = dims
[0].up
;
3905 pr_buf
->th_doacross_info
[4] = dims
[0].st
;
3907 for (j
= 1; j
< num_dims
; ++j
) {
3909 range_length
; // To keep ranges of all dimensions but the first dims[0]
3910 if (dims
[j
].st
== 1) { // most common case
3911 // AC: should we care of ranges bigger than LLONG_MAX? (not for now)
3912 range_length
= dims
[j
].up
- dims
[j
].lo
+ 1;
3914 if (dims
[j
].st
> 0) {
3915 KMP_DEBUG_ASSERT(dims
[j
].up
> dims
[j
].lo
);
3916 range_length
= (kmp_uint64
)(dims
[j
].up
- dims
[j
].lo
) / dims
[j
].st
+ 1;
3917 } else { // negative increment
3918 KMP_DEBUG_ASSERT(dims
[j
].lo
> dims
[j
].up
);
3920 (kmp_uint64
)(dims
[j
].lo
- dims
[j
].up
) / (-dims
[j
].st
) + 1;
3923 pr_buf
->th_doacross_info
[last
++] = range_length
;
3924 pr_buf
->th_doacross_info
[last
++] = dims
[j
].lo
;
3925 pr_buf
->th_doacross_info
[last
++] = dims
[j
].up
;
3926 pr_buf
->th_doacross_info
[last
++] = dims
[j
].st
;
3929 // Compute total trip count.
3930 // Start with range of dims[0] which we don't need to keep in the buffer.
3931 if (dims
[0].st
== 1) { // most common case
3932 trace_count
= dims
[0].up
- dims
[0].lo
+ 1;
3933 } else if (dims
[0].st
> 0) {
3934 KMP_DEBUG_ASSERT(dims
[0].up
> dims
[0].lo
);
3935 trace_count
= (kmp_uint64
)(dims
[0].up
- dims
[0].lo
) / dims
[0].st
+ 1;
3936 } else { // negative increment
3937 KMP_DEBUG_ASSERT(dims
[0].lo
> dims
[0].up
);
3938 trace_count
= (kmp_uint64
)(dims
[0].lo
- dims
[0].up
) / (-dims
[0].st
) + 1;
3940 for (j
= 1; j
< num_dims
; ++j
) {
3941 trace_count
*= pr_buf
->th_doacross_info
[4 * j
+ 1]; // use kept ranges
3943 KMP_DEBUG_ASSERT(trace_count
> 0);
3945 // Check if shared buffer is not occupied by other loop (idx -
3946 // __kmp_dispatch_num_buffers)
3947 if (idx
!= sh_buf
->doacross_buf_idx
) {
3948 // Shared buffer is occupied, wait for it to be free
3949 __kmp_wait_4((volatile kmp_uint32
*)&sh_buf
->doacross_buf_idx
, idx
,
3953 // Check if we are the first thread. After the CAS the first thread gets 0,
3954 // others get 1 if initialization is in progress, allocated pointer otherwise.
3955 // Treat pointer as volatile integer (value 0 or 1) until memory is allocated.
3956 flags
= (kmp_uint32
*)KMP_COMPARE_AND_STORE_RET32(
3957 (volatile kmp_int32
*)&sh_buf
->doacross_flags
, NULL
, 1);
3959 flags
= (kmp_uint32
*)KMP_COMPARE_AND_STORE_RET64(
3960 (volatile kmp_int64
*)&sh_buf
->doacross_flags
, NULL
, 1LL);
3962 if (flags
== NULL
) {
3963 // we are the first thread, allocate the array of flags
3964 size_t size
= trace_count
/ 8 + 8; // in bytes, use single bit per iteration
3965 flags
= (kmp_uint32
*)__kmp_thread_calloc(th
, size
, 1);
3967 sh_buf
->doacross_flags
= flags
;
3968 } else if (flags
== (kmp_uint32
*)1) {
3970 // initialization is still in progress, need to wait
3971 while (*(volatile kmp_int32
*)&sh_buf
->doacross_flags
== 1)
3973 while (*(volatile kmp_int64
*)&sh_buf
->doacross_flags
== 1LL)
3980 KMP_DEBUG_ASSERT(sh_buf
->doacross_flags
> (kmp_uint32
*)1); // check ptr value
3981 pr_buf
->th_doacross_flags
=
3982 sh_buf
->doacross_flags
; // save private copy in order to not
3983 // touch shared buffer on each iteration
3984 KA_TRACE(20, ("__kmpc_doacross_init() exit: T#%d\n", gtid
));
3987 void __kmpc_doacross_wait(ident_t
*loc
, int gtid
, const kmp_int64
*vec
) {
3988 __kmp_assert_valid_gtid(gtid
);
3989 kmp_int32 shft
, num_dims
, i
;
3991 kmp_int64 iter_number
; // iteration number of "collapsed" loop nest
3992 kmp_info_t
*th
= __kmp_threads
[gtid
];
3993 kmp_team_t
*team
= th
->th
.th_team
;
3995 kmp_int64 lo
, up
, st
;
3997 KA_TRACE(20, ("__kmpc_doacross_wait() enter: called T#%d\n", gtid
));
3998 if (team
->t
.t_serialized
) {
3999 KA_TRACE(20, ("__kmpc_doacross_wait() exit: serialized team\n"));
4000 return; // no dependencies if team is serialized
4003 // calculate sequential iteration number and check out-of-bounds condition
4004 pr_buf
= th
->th
.th_dispatch
;
4005 KMP_DEBUG_ASSERT(pr_buf
->th_doacross_info
!= NULL
);
4006 num_dims
= pr_buf
->th_doacross_info
[0];
4007 lo
= pr_buf
->th_doacross_info
[2];
4008 up
= pr_buf
->th_doacross_info
[3];
4009 st
= pr_buf
->th_doacross_info
[4];
4010 #if OMPT_SUPPORT && OMPT_OPTIONAL
4011 ompt_dependence_t deps
[num_dims
];
4013 if (st
== 1) { // most common case
4014 if (vec
[0] < lo
|| vec
[0] > up
) {
4015 KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of "
4016 "bounds [%lld,%lld]\n",
4017 gtid
, vec
[0], lo
, up
));
4020 iter_number
= vec
[0] - lo
;
4021 } else if (st
> 0) {
4022 if (vec
[0] < lo
|| vec
[0] > up
) {
4023 KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of "
4024 "bounds [%lld,%lld]\n",
4025 gtid
, vec
[0], lo
, up
));
4028 iter_number
= (kmp_uint64
)(vec
[0] - lo
) / st
;
4029 } else { // negative increment
4030 if (vec
[0] > lo
|| vec
[0] < up
) {
4031 KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of "
4032 "bounds [%lld,%lld]\n",
4033 gtid
, vec
[0], lo
, up
));
4036 iter_number
= (kmp_uint64
)(lo
- vec
[0]) / (-st
);
4038 #if OMPT_SUPPORT && OMPT_OPTIONAL
4039 deps
[0].variable
.value
= iter_number
;
4040 deps
[0].dependence_type
= ompt_dependence_type_sink
;
4042 for (i
= 1; i
< num_dims
; ++i
) {
4044 kmp_int32 j
= i
* 4;
4045 ln
= pr_buf
->th_doacross_info
[j
+ 1];
4046 lo
= pr_buf
->th_doacross_info
[j
+ 2];
4047 up
= pr_buf
->th_doacross_info
[j
+ 3];
4048 st
= pr_buf
->th_doacross_info
[j
+ 4];
4050 if (vec
[i
] < lo
|| vec
[i
] > up
) {
4051 KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of "
4052 "bounds [%lld,%lld]\n",
4053 gtid
, vec
[i
], lo
, up
));
4057 } else if (st
> 0) {
4058 if (vec
[i
] < lo
|| vec
[i
] > up
) {
4059 KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of "
4060 "bounds [%lld,%lld]\n",
4061 gtid
, vec
[i
], lo
, up
));
4064 iter
= (kmp_uint64
)(vec
[i
] - lo
) / st
;
4066 if (vec
[i
] > lo
|| vec
[i
] < up
) {
4067 KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of "
4068 "bounds [%lld,%lld]\n",
4069 gtid
, vec
[i
], lo
, up
));
4072 iter
= (kmp_uint64
)(lo
- vec
[i
]) / (-st
);
4074 iter_number
= iter
+ ln
* iter_number
;
4075 #if OMPT_SUPPORT && OMPT_OPTIONAL
4076 deps
[i
].variable
.value
= iter
;
4077 deps
[i
].dependence_type
= ompt_dependence_type_sink
;
4080 shft
= iter_number
% 32; // use 32-bit granularity
4081 iter_number
>>= 5; // divided by 32
4083 while ((flag
& pr_buf
->th_doacross_flags
[iter_number
]) == 0) {
4087 #if OMPT_SUPPORT && OMPT_OPTIONAL
4088 if (ompt_enabled
.ompt_callback_dependences
) {
4089 ompt_callbacks
.ompt_callback(ompt_callback_dependences
)(
4090 &(OMPT_CUR_TASK_INFO(th
)->task_data
), deps
, num_dims
);
4094 ("__kmpc_doacross_wait() exit: T#%d wait for iter %lld completed\n",
4095 gtid
, (iter_number
<< 5) + shft
));
4098 void __kmpc_doacross_post(ident_t
*loc
, int gtid
, const kmp_int64
*vec
) {
4099 __kmp_assert_valid_gtid(gtid
);
4100 kmp_int32 shft
, num_dims
, i
;
4102 kmp_int64 iter_number
; // iteration number of "collapsed" loop nest
4103 kmp_info_t
*th
= __kmp_threads
[gtid
];
4104 kmp_team_t
*team
= th
->th
.th_team
;
4108 KA_TRACE(20, ("__kmpc_doacross_post() enter: called T#%d\n", gtid
));
4109 if (team
->t
.t_serialized
) {
4110 KA_TRACE(20, ("__kmpc_doacross_post() exit: serialized team\n"));
4111 return; // no dependencies if team is serialized
4114 // calculate sequential iteration number (same as in "wait" but no
4115 // out-of-bounds checks)
4116 pr_buf
= th
->th
.th_dispatch
;
4117 KMP_DEBUG_ASSERT(pr_buf
->th_doacross_info
!= NULL
);
4118 num_dims
= pr_buf
->th_doacross_info
[0];
4119 lo
= pr_buf
->th_doacross_info
[2];
4120 st
= pr_buf
->th_doacross_info
[4];
4121 #if OMPT_SUPPORT && OMPT_OPTIONAL
4122 ompt_dependence_t deps
[num_dims
];
4124 if (st
== 1) { // most common case
4125 iter_number
= vec
[0] - lo
;
4126 } else if (st
> 0) {
4127 iter_number
= (kmp_uint64
)(vec
[0] - lo
) / st
;
4128 } else { // negative increment
4129 iter_number
= (kmp_uint64
)(lo
- vec
[0]) / (-st
);
4131 #if OMPT_SUPPORT && OMPT_OPTIONAL
4132 deps
[0].variable
.value
= iter_number
;
4133 deps
[0].dependence_type
= ompt_dependence_type_source
;
4135 for (i
= 1; i
< num_dims
; ++i
) {
4137 kmp_int32 j
= i
* 4;
4138 ln
= pr_buf
->th_doacross_info
[j
+ 1];
4139 lo
= pr_buf
->th_doacross_info
[j
+ 2];
4140 st
= pr_buf
->th_doacross_info
[j
+ 4];
4143 } else if (st
> 0) {
4144 iter
= (kmp_uint64
)(vec
[i
] - lo
) / st
;
4146 iter
= (kmp_uint64
)(lo
- vec
[i
]) / (-st
);
4148 iter_number
= iter
+ ln
* iter_number
;
4149 #if OMPT_SUPPORT && OMPT_OPTIONAL
4150 deps
[i
].variable
.value
= iter
;
4151 deps
[i
].dependence_type
= ompt_dependence_type_source
;
4154 #if OMPT_SUPPORT && OMPT_OPTIONAL
4155 if (ompt_enabled
.ompt_callback_dependences
) {
4156 ompt_callbacks
.ompt_callback(ompt_callback_dependences
)(
4157 &(OMPT_CUR_TASK_INFO(th
)->task_data
), deps
, num_dims
);
4160 shft
= iter_number
% 32; // use 32-bit granularity
4161 iter_number
>>= 5; // divided by 32
4164 if ((flag
& pr_buf
->th_doacross_flags
[iter_number
]) == 0)
4165 KMP_TEST_THEN_OR32(&pr_buf
->th_doacross_flags
[iter_number
], flag
);
4166 KA_TRACE(20, ("__kmpc_doacross_post() exit: T#%d iter %lld posted\n", gtid
,
4167 (iter_number
<< 5) + shft
));
4170 void __kmpc_doacross_fini(ident_t
*loc
, int gtid
) {
4171 __kmp_assert_valid_gtid(gtid
);
4173 kmp_info_t
*th
= __kmp_threads
[gtid
];
4174 kmp_team_t
*team
= th
->th
.th_team
;
4175 kmp_disp_t
*pr_buf
= th
->th
.th_dispatch
;
4177 KA_TRACE(20, ("__kmpc_doacross_fini() enter: called T#%d\n", gtid
));
4178 if (team
->t
.t_serialized
) {
4179 KA_TRACE(20, ("__kmpc_doacross_fini() exit: serialized team %p\n", team
));
4180 return; // nothing to do
4182 num_done
= KMP_TEST_THEN_INC32((kmp_int32
*)pr_buf
->th_doacross_info
[1]) + 1;
4183 if (num_done
== th
->th
.th_team_nproc
) {
4184 // we are the last thread, need to free shared resources
4185 int idx
= pr_buf
->th_doacross_buf_idx
- 1;
4186 dispatch_shared_info_t
*sh_buf
=
4187 &team
->t
.t_disp_buffer
[idx
% __kmp_dispatch_num_buffers
];
4188 KMP_DEBUG_ASSERT(pr_buf
->th_doacross_info
[1] ==
4189 (kmp_int64
)&sh_buf
->doacross_num_done
);
4190 KMP_DEBUG_ASSERT(num_done
== sh_buf
->doacross_num_done
);
4191 KMP_DEBUG_ASSERT(idx
== sh_buf
->doacross_buf_idx
);
4192 __kmp_thread_free(th
, CCAST(kmp_uint32
*, sh_buf
->doacross_flags
));
4193 sh_buf
->doacross_flags
= NULL
;
4194 sh_buf
->doacross_num_done
= 0;
4195 sh_buf
->doacross_buf_idx
+=
4196 __kmp_dispatch_num_buffers
; // free buffer for future re-use
4198 // free private resources (need to keep buffer index forever)
4199 pr_buf
->th_doacross_flags
= NULL
;
4200 __kmp_thread_free(th
, (void *)pr_buf
->th_doacross_info
);
4201 pr_buf
->th_doacross_info
= NULL
;
4202 KA_TRACE(20, ("__kmpc_doacross_fini() exit: T#%d\n", gtid
));
4205 /* omp_alloc/omp_free only defined for C/C++, not for Fortran */
4206 void *omp_alloc(size_t size
, omp_allocator_handle_t allocator
) {
4207 return __kmpc_alloc(__kmp_entry_gtid(), size
, allocator
);
4210 void omp_free(void *ptr
, omp_allocator_handle_t allocator
) {
4211 __kmpc_free(__kmp_entry_gtid(), ptr
, allocator
);
4214 int __kmpc_get_target_offload(void) {
4215 if (!__kmp_init_serial
) {
4216 __kmp_serial_initialize();
4218 return __kmp_target_offload
;
4221 int __kmpc_pause_resource(kmp_pause_status_t level
) {
4222 if (!__kmp_init_serial
) {
4223 return 1; // Can't pause if runtime is not initialized
4225 return __kmp_pause_resource(level
);