2 * kmp_lock.cpp -- lock-related functions
5 //===----------------------------------------------------------------------===//
7 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
8 // See https://llvm.org/LICENSE.txt for license information.
9 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
11 //===----------------------------------------------------------------------===//
21 #include "kmp_wait_release.h"
22 #include "kmp_wrapper_getpid.h"
25 #include <sys/syscall.h>
27 // We should really include <futex.h>, but that causes compatibility problems on
28 // different Linux* OS distributions that either require that you include (or
29 // break when you try to include) <pci/types.h>. Since all we need is the two
30 // macros below (which are part of the kernel ABI, so can't change) we just
31 // define the constants here and don't include <futex.h>
40 /* Implement spin locks for internal library use. */
41 /* The algorithm implemented is Lamport's bakery lock [1974]. */
43 void __kmp_validate_locks(void) {
47 /* Check to make sure unsigned arithmetic does wraps properly */
48 x
= ~((kmp_uint32
)0) - 2;
51 for (i
= 0; i
< 8; ++i
, ++x
, ++y
) {
52 kmp_uint32 z
= (x
- y
);
56 KMP_ASSERT(offsetof(kmp_base_queuing_lock
, tail_id
) % 8 == 0);
59 /* ------------------------------------------------------------------------ */
60 /* test and set locks */
62 // For the non-nested locks, we can only assume that the first 4 bytes were
63 // allocated, since gcc only allocates 4 bytes for omp_lock_t, and the Intel
64 // compiler only allocates a 4 byte pointer on IA-32 architecture. On
65 // Windows* OS on Intel(R) 64, we can assume that all 8 bytes were allocated.
67 // gcc reserves >= 8 bytes for nested locks, so we can assume that the
68 // entire 8 bytes were allocated for nested locks on all 64-bit platforms.
70 static kmp_int32
__kmp_get_tas_lock_owner(kmp_tas_lock_t
*lck
) {
71 return KMP_LOCK_STRIP(KMP_ATOMIC_LD_RLX(&lck
->lk
.poll
)) - 1;
74 static inline bool __kmp_is_tas_lock_nestable(kmp_tas_lock_t
*lck
) {
75 return lck
->lk
.depth_locked
!= -1;
78 __forceinline
static int
79 __kmp_acquire_tas_lock_timed_template(kmp_tas_lock_t
*lck
, kmp_int32 gtid
) {
82 #ifdef USE_LOCK_PROFILE
83 kmp_uint32 curr
= KMP_LOCK_STRIP(lck
->lk
.poll
);
84 if ((curr
!= 0) && (curr
!= gtid
+ 1))
85 __kmp_printf("LOCK CONTENTION: %p\n", lck
);
86 /* else __kmp_printf( "." );*/
87 #endif /* USE_LOCK_PROFILE */
89 kmp_int32 tas_free
= KMP_LOCK_FREE(tas
);
90 kmp_int32 tas_busy
= KMP_LOCK_BUSY(gtid
+ 1, tas
);
92 if (KMP_ATOMIC_LD_RLX(&lck
->lk
.poll
) == tas_free
&&
93 __kmp_atomic_compare_store_acq(&lck
->lk
.poll
, tas_free
, tas_busy
)) {
94 KMP_FSYNC_ACQUIRED(lck
);
95 return KMP_LOCK_ACQUIRED_FIRST
;
100 KMP_FSYNC_PREPARE(lck
);
101 KMP_INIT_YIELD(spins
);
102 KMP_INIT_BACKOFF(time
);
103 kmp_backoff_t backoff
= __kmp_spin_backoff_params
;
106 __kmp_spin_backoff(&backoff
);
108 if (!__kmp_tpause_enabled
)
109 __kmp_spin_backoff(&backoff
);
111 KMP_YIELD_OVERSUB_ELSE_SPIN(spins
, time
);
112 } while (KMP_ATOMIC_LD_RLX(&lck
->lk
.poll
) != tas_free
||
113 !__kmp_atomic_compare_store_acq(&lck
->lk
.poll
, tas_free
, tas_busy
));
114 KMP_FSYNC_ACQUIRED(lck
);
115 return KMP_LOCK_ACQUIRED_FIRST
;
118 int __kmp_acquire_tas_lock(kmp_tas_lock_t
*lck
, kmp_int32 gtid
) {
119 int retval
= __kmp_acquire_tas_lock_timed_template(lck
, gtid
);
123 static int __kmp_acquire_tas_lock_with_checks(kmp_tas_lock_t
*lck
,
125 char const *const func
= "omp_set_lock";
126 if ((sizeof(kmp_tas_lock_t
) <= OMP_LOCK_T_SIZE
) &&
127 __kmp_is_tas_lock_nestable(lck
)) {
128 KMP_FATAL(LockNestableUsedAsSimple
, func
);
130 if ((gtid
>= 0) && (__kmp_get_tas_lock_owner(lck
) == gtid
)) {
131 KMP_FATAL(LockIsAlreadyOwned
, func
);
133 return __kmp_acquire_tas_lock(lck
, gtid
);
136 int __kmp_test_tas_lock(kmp_tas_lock_t
*lck
, kmp_int32 gtid
) {
137 kmp_int32 tas_free
= KMP_LOCK_FREE(tas
);
138 kmp_int32 tas_busy
= KMP_LOCK_BUSY(gtid
+ 1, tas
);
139 if (KMP_ATOMIC_LD_RLX(&lck
->lk
.poll
) == tas_free
&&
140 __kmp_atomic_compare_store_acq(&lck
->lk
.poll
, tas_free
, tas_busy
)) {
141 KMP_FSYNC_ACQUIRED(lck
);
147 static int __kmp_test_tas_lock_with_checks(kmp_tas_lock_t
*lck
,
149 char const *const func
= "omp_test_lock";
150 if ((sizeof(kmp_tas_lock_t
) <= OMP_LOCK_T_SIZE
) &&
151 __kmp_is_tas_lock_nestable(lck
)) {
152 KMP_FATAL(LockNestableUsedAsSimple
, func
);
154 return __kmp_test_tas_lock(lck
, gtid
);
157 int __kmp_release_tas_lock(kmp_tas_lock_t
*lck
, kmp_int32 gtid
) {
158 KMP_MB(); /* Flush all pending memory write invalidates. */
160 KMP_FSYNC_RELEASING(lck
);
161 KMP_ATOMIC_ST_REL(&lck
->lk
.poll
, KMP_LOCK_FREE(tas
));
162 KMP_MB(); /* Flush all pending memory write invalidates. */
165 return KMP_LOCK_RELEASED
;
168 static int __kmp_release_tas_lock_with_checks(kmp_tas_lock_t
*lck
,
170 char const *const func
= "omp_unset_lock";
171 KMP_MB(); /* in case another processor initialized lock */
172 if ((sizeof(kmp_tas_lock_t
) <= OMP_LOCK_T_SIZE
) &&
173 __kmp_is_tas_lock_nestable(lck
)) {
174 KMP_FATAL(LockNestableUsedAsSimple
, func
);
176 if (__kmp_get_tas_lock_owner(lck
) == -1) {
177 KMP_FATAL(LockUnsettingFree
, func
);
179 if ((gtid
>= 0) && (__kmp_get_tas_lock_owner(lck
) >= 0) &&
180 (__kmp_get_tas_lock_owner(lck
) != gtid
)) {
181 KMP_FATAL(LockUnsettingSetByAnother
, func
);
183 return __kmp_release_tas_lock(lck
, gtid
);
186 void __kmp_init_tas_lock(kmp_tas_lock_t
*lck
) {
187 lck
->lk
.poll
= KMP_LOCK_FREE(tas
);
190 void __kmp_destroy_tas_lock(kmp_tas_lock_t
*lck
) { lck
->lk
.poll
= 0; }
192 static void __kmp_destroy_tas_lock_with_checks(kmp_tas_lock_t
*lck
) {
193 char const *const func
= "omp_destroy_lock";
194 if ((sizeof(kmp_tas_lock_t
) <= OMP_LOCK_T_SIZE
) &&
195 __kmp_is_tas_lock_nestable(lck
)) {
196 KMP_FATAL(LockNestableUsedAsSimple
, func
);
198 if (__kmp_get_tas_lock_owner(lck
) != -1) {
199 KMP_FATAL(LockStillOwned
, func
);
201 __kmp_destroy_tas_lock(lck
);
204 // nested test and set locks
206 int __kmp_acquire_nested_tas_lock(kmp_tas_lock_t
*lck
, kmp_int32 gtid
) {
207 KMP_DEBUG_ASSERT(gtid
>= 0);
209 if (__kmp_get_tas_lock_owner(lck
) == gtid
) {
210 lck
->lk
.depth_locked
+= 1;
211 return KMP_LOCK_ACQUIRED_NEXT
;
213 __kmp_acquire_tas_lock_timed_template(lck
, gtid
);
214 lck
->lk
.depth_locked
= 1;
215 return KMP_LOCK_ACQUIRED_FIRST
;
219 static int __kmp_acquire_nested_tas_lock_with_checks(kmp_tas_lock_t
*lck
,
221 char const *const func
= "omp_set_nest_lock";
222 if (!__kmp_is_tas_lock_nestable(lck
)) {
223 KMP_FATAL(LockSimpleUsedAsNestable
, func
);
225 return __kmp_acquire_nested_tas_lock(lck
, gtid
);
228 int __kmp_test_nested_tas_lock(kmp_tas_lock_t
*lck
, kmp_int32 gtid
) {
231 KMP_DEBUG_ASSERT(gtid
>= 0);
233 if (__kmp_get_tas_lock_owner(lck
) == gtid
) {
234 retval
= ++lck
->lk
.depth_locked
;
235 } else if (!__kmp_test_tas_lock(lck
, gtid
)) {
239 retval
= lck
->lk
.depth_locked
= 1;
244 static int __kmp_test_nested_tas_lock_with_checks(kmp_tas_lock_t
*lck
,
246 char const *const func
= "omp_test_nest_lock";
247 if (!__kmp_is_tas_lock_nestable(lck
)) {
248 KMP_FATAL(LockSimpleUsedAsNestable
, func
);
250 return __kmp_test_nested_tas_lock(lck
, gtid
);
253 int __kmp_release_nested_tas_lock(kmp_tas_lock_t
*lck
, kmp_int32 gtid
) {
254 KMP_DEBUG_ASSERT(gtid
>= 0);
257 if (--(lck
->lk
.depth_locked
) == 0) {
258 __kmp_release_tas_lock(lck
, gtid
);
259 return KMP_LOCK_RELEASED
;
261 return KMP_LOCK_STILL_HELD
;
264 static int __kmp_release_nested_tas_lock_with_checks(kmp_tas_lock_t
*lck
,
266 char const *const func
= "omp_unset_nest_lock";
267 KMP_MB(); /* in case another processor initialized lock */
268 if (!__kmp_is_tas_lock_nestable(lck
)) {
269 KMP_FATAL(LockSimpleUsedAsNestable
, func
);
271 if (__kmp_get_tas_lock_owner(lck
) == -1) {
272 KMP_FATAL(LockUnsettingFree
, func
);
274 if (__kmp_get_tas_lock_owner(lck
) != gtid
) {
275 KMP_FATAL(LockUnsettingSetByAnother
, func
);
277 return __kmp_release_nested_tas_lock(lck
, gtid
);
280 void __kmp_init_nested_tas_lock(kmp_tas_lock_t
*lck
) {
281 __kmp_init_tas_lock(lck
);
282 lck
->lk
.depth_locked
= 0; // >= 0 for nestable locks, -1 for simple locks
285 void __kmp_destroy_nested_tas_lock(kmp_tas_lock_t
*lck
) {
286 __kmp_destroy_tas_lock(lck
);
287 lck
->lk
.depth_locked
= 0;
290 static void __kmp_destroy_nested_tas_lock_with_checks(kmp_tas_lock_t
*lck
) {
291 char const *const func
= "omp_destroy_nest_lock";
292 if (!__kmp_is_tas_lock_nestable(lck
)) {
293 KMP_FATAL(LockSimpleUsedAsNestable
, func
);
295 if (__kmp_get_tas_lock_owner(lck
) != -1) {
296 KMP_FATAL(LockStillOwned
, func
);
298 __kmp_destroy_nested_tas_lock(lck
);
303 /* ------------------------------------------------------------------------ */
306 // futex locks are really just test and set locks, with a different method
307 // of handling contention. They take the same amount of space as test and
308 // set locks, and are allocated the same way (i.e. use the area allocated by
309 // the compiler for non-nested locks / allocate nested locks on the heap).
311 static kmp_int32
__kmp_get_futex_lock_owner(kmp_futex_lock_t
*lck
) {
312 return KMP_LOCK_STRIP((TCR_4(lck
->lk
.poll
) >> 1)) - 1;
315 static inline bool __kmp_is_futex_lock_nestable(kmp_futex_lock_t
*lck
) {
316 return lck
->lk
.depth_locked
!= -1;
319 __forceinline
static int
320 __kmp_acquire_futex_lock_timed_template(kmp_futex_lock_t
*lck
, kmp_int32 gtid
) {
321 kmp_int32 gtid_code
= (gtid
+ 1) << 1;
325 #ifdef USE_LOCK_PROFILE
326 kmp_uint32 curr
= KMP_LOCK_STRIP(TCR_4(lck
->lk
.poll
));
327 if ((curr
!= 0) && (curr
!= gtid_code
))
328 __kmp_printf("LOCK CONTENTION: %p\n", lck
);
329 /* else __kmp_printf( "." );*/
330 #endif /* USE_LOCK_PROFILE */
332 KMP_FSYNC_PREPARE(lck
);
333 KA_TRACE(1000, ("__kmp_acquire_futex_lock: lck:%p(0x%x), T#%d entering\n",
334 lck
, lck
->lk
.poll
, gtid
));
338 while ((poll_val
= KMP_COMPARE_AND_STORE_RET32(
339 &(lck
->lk
.poll
), KMP_LOCK_FREE(futex
),
340 KMP_LOCK_BUSY(gtid_code
, futex
))) != KMP_LOCK_FREE(futex
)) {
342 kmp_int32 cond
= KMP_LOCK_STRIP(poll_val
) & 1;
345 ("__kmp_acquire_futex_lock: lck:%p, T#%d poll_val = 0x%x cond = 0x%x\n",
346 lck
, gtid
, poll_val
, cond
));
348 // NOTE: if you try to use the following condition for this branch
350 // if ( poll_val & 1 == 0 )
352 // Then the 12.0 compiler has a bug where the following block will
353 // always be skipped, regardless of the value of the LSB of poll_val.
355 // Try to set the lsb in the poll to indicate to the owner
356 // thread that they need to wake this thread up.
357 if (!KMP_COMPARE_AND_STORE_REL32(&(lck
->lk
.poll
), poll_val
,
358 poll_val
| KMP_LOCK_BUSY(1, futex
))) {
361 ("__kmp_acquire_futex_lock: lck:%p(0x%x), T#%d can't set bit 0\n",
362 lck
, lck
->lk
.poll
, gtid
));
365 poll_val
|= KMP_LOCK_BUSY(1, futex
);
368 ("__kmp_acquire_futex_lock: lck:%p(0x%x), T#%d bit 0 set\n", lck
,
369 lck
->lk
.poll
, gtid
));
374 ("__kmp_acquire_futex_lock: lck:%p, T#%d before futex_wait(0x%x)\n",
375 lck
, gtid
, poll_val
));
378 if ((rc
= syscall(__NR_futex
, &(lck
->lk
.poll
), FUTEX_WAIT
, poll_val
, NULL
,
380 KA_TRACE(1000, ("__kmp_acquire_futex_lock: lck:%p, T#%d futex_wait(0x%x) "
381 "failed (rc=%ld errno=%d)\n",
382 lck
, gtid
, poll_val
, rc
, errno
));
387 ("__kmp_acquire_futex_lock: lck:%p, T#%d after futex_wait(0x%x)\n",
388 lck
, gtid
, poll_val
));
389 // This thread has now done a successful futex wait call and was entered on
390 // the OS futex queue. We must now perform a futex wake call when releasing
391 // the lock, as we have no idea how many other threads are in the queue.
395 KMP_FSYNC_ACQUIRED(lck
);
396 KA_TRACE(1000, ("__kmp_acquire_futex_lock: lck:%p(0x%x), T#%d exiting\n", lck
,
397 lck
->lk
.poll
, gtid
));
398 return KMP_LOCK_ACQUIRED_FIRST
;
401 int __kmp_acquire_futex_lock(kmp_futex_lock_t
*lck
, kmp_int32 gtid
) {
402 int retval
= __kmp_acquire_futex_lock_timed_template(lck
, gtid
);
406 static int __kmp_acquire_futex_lock_with_checks(kmp_futex_lock_t
*lck
,
408 char const *const func
= "omp_set_lock";
409 if ((sizeof(kmp_futex_lock_t
) <= OMP_LOCK_T_SIZE
) &&
410 __kmp_is_futex_lock_nestable(lck
)) {
411 KMP_FATAL(LockNestableUsedAsSimple
, func
);
413 if ((gtid
>= 0) && (__kmp_get_futex_lock_owner(lck
) == gtid
)) {
414 KMP_FATAL(LockIsAlreadyOwned
, func
);
416 return __kmp_acquire_futex_lock(lck
, gtid
);
419 int __kmp_test_futex_lock(kmp_futex_lock_t
*lck
, kmp_int32 gtid
) {
420 if (KMP_COMPARE_AND_STORE_ACQ32(&(lck
->lk
.poll
), KMP_LOCK_FREE(futex
),
421 KMP_LOCK_BUSY((gtid
+ 1) << 1, futex
))) {
422 KMP_FSYNC_ACQUIRED(lck
);
428 static int __kmp_test_futex_lock_with_checks(kmp_futex_lock_t
*lck
,
430 char const *const func
= "omp_test_lock";
431 if ((sizeof(kmp_futex_lock_t
) <= OMP_LOCK_T_SIZE
) &&
432 __kmp_is_futex_lock_nestable(lck
)) {
433 KMP_FATAL(LockNestableUsedAsSimple
, func
);
435 return __kmp_test_futex_lock(lck
, gtid
);
438 int __kmp_release_futex_lock(kmp_futex_lock_t
*lck
, kmp_int32 gtid
) {
439 KMP_MB(); /* Flush all pending memory write invalidates. */
441 KA_TRACE(1000, ("__kmp_release_futex_lock: lck:%p(0x%x), T#%d entering\n",
442 lck
, lck
->lk
.poll
, gtid
));
444 KMP_FSYNC_RELEASING(lck
);
446 kmp_int32 poll_val
= KMP_XCHG_FIXED32(&(lck
->lk
.poll
), KMP_LOCK_FREE(futex
));
449 ("__kmp_release_futex_lock: lck:%p, T#%d released poll_val = 0x%x\n",
450 lck
, gtid
, poll_val
));
452 if (KMP_LOCK_STRIP(poll_val
) & 1) {
454 ("__kmp_release_futex_lock: lck:%p, T#%d futex_wake 1 thread\n",
456 syscall(__NR_futex
, &(lck
->lk
.poll
), FUTEX_WAKE
, KMP_LOCK_BUSY(1, futex
),
460 KMP_MB(); /* Flush all pending memory write invalidates. */
462 KA_TRACE(1000, ("__kmp_release_futex_lock: lck:%p(0x%x), T#%d exiting\n", lck
,
463 lck
->lk
.poll
, gtid
));
466 return KMP_LOCK_RELEASED
;
469 static int __kmp_release_futex_lock_with_checks(kmp_futex_lock_t
*lck
,
471 char const *const func
= "omp_unset_lock";
472 KMP_MB(); /* in case another processor initialized lock */
473 if ((sizeof(kmp_futex_lock_t
) <= OMP_LOCK_T_SIZE
) &&
474 __kmp_is_futex_lock_nestable(lck
)) {
475 KMP_FATAL(LockNestableUsedAsSimple
, func
);
477 if (__kmp_get_futex_lock_owner(lck
) == -1) {
478 KMP_FATAL(LockUnsettingFree
, func
);
480 if ((gtid
>= 0) && (__kmp_get_futex_lock_owner(lck
) >= 0) &&
481 (__kmp_get_futex_lock_owner(lck
) != gtid
)) {
482 KMP_FATAL(LockUnsettingSetByAnother
, func
);
484 return __kmp_release_futex_lock(lck
, gtid
);
487 void __kmp_init_futex_lock(kmp_futex_lock_t
*lck
) {
488 TCW_4(lck
->lk
.poll
, KMP_LOCK_FREE(futex
));
491 void __kmp_destroy_futex_lock(kmp_futex_lock_t
*lck
) { lck
->lk
.poll
= 0; }
493 static void __kmp_destroy_futex_lock_with_checks(kmp_futex_lock_t
*lck
) {
494 char const *const func
= "omp_destroy_lock";
495 if ((sizeof(kmp_futex_lock_t
) <= OMP_LOCK_T_SIZE
) &&
496 __kmp_is_futex_lock_nestable(lck
)) {
497 KMP_FATAL(LockNestableUsedAsSimple
, func
);
499 if (__kmp_get_futex_lock_owner(lck
) != -1) {
500 KMP_FATAL(LockStillOwned
, func
);
502 __kmp_destroy_futex_lock(lck
);
505 // nested futex locks
507 int __kmp_acquire_nested_futex_lock(kmp_futex_lock_t
*lck
, kmp_int32 gtid
) {
508 KMP_DEBUG_ASSERT(gtid
>= 0);
510 if (__kmp_get_futex_lock_owner(lck
) == gtid
) {
511 lck
->lk
.depth_locked
+= 1;
512 return KMP_LOCK_ACQUIRED_NEXT
;
514 __kmp_acquire_futex_lock_timed_template(lck
, gtid
);
515 lck
->lk
.depth_locked
= 1;
516 return KMP_LOCK_ACQUIRED_FIRST
;
520 static int __kmp_acquire_nested_futex_lock_with_checks(kmp_futex_lock_t
*lck
,
522 char const *const func
= "omp_set_nest_lock";
523 if (!__kmp_is_futex_lock_nestable(lck
)) {
524 KMP_FATAL(LockSimpleUsedAsNestable
, func
);
526 return __kmp_acquire_nested_futex_lock(lck
, gtid
);
529 int __kmp_test_nested_futex_lock(kmp_futex_lock_t
*lck
, kmp_int32 gtid
) {
532 KMP_DEBUG_ASSERT(gtid
>= 0);
534 if (__kmp_get_futex_lock_owner(lck
) == gtid
) {
535 retval
= ++lck
->lk
.depth_locked
;
536 } else if (!__kmp_test_futex_lock(lck
, gtid
)) {
540 retval
= lck
->lk
.depth_locked
= 1;
545 static int __kmp_test_nested_futex_lock_with_checks(kmp_futex_lock_t
*lck
,
547 char const *const func
= "omp_test_nest_lock";
548 if (!__kmp_is_futex_lock_nestable(lck
)) {
549 KMP_FATAL(LockSimpleUsedAsNestable
, func
);
551 return __kmp_test_nested_futex_lock(lck
, gtid
);
554 int __kmp_release_nested_futex_lock(kmp_futex_lock_t
*lck
, kmp_int32 gtid
) {
555 KMP_DEBUG_ASSERT(gtid
>= 0);
558 if (--(lck
->lk
.depth_locked
) == 0) {
559 __kmp_release_futex_lock(lck
, gtid
);
560 return KMP_LOCK_RELEASED
;
562 return KMP_LOCK_STILL_HELD
;
565 static int __kmp_release_nested_futex_lock_with_checks(kmp_futex_lock_t
*lck
,
567 char const *const func
= "omp_unset_nest_lock";
568 KMP_MB(); /* in case another processor initialized lock */
569 if (!__kmp_is_futex_lock_nestable(lck
)) {
570 KMP_FATAL(LockSimpleUsedAsNestable
, func
);
572 if (__kmp_get_futex_lock_owner(lck
) == -1) {
573 KMP_FATAL(LockUnsettingFree
, func
);
575 if (__kmp_get_futex_lock_owner(lck
) != gtid
) {
576 KMP_FATAL(LockUnsettingSetByAnother
, func
);
578 return __kmp_release_nested_futex_lock(lck
, gtid
);
581 void __kmp_init_nested_futex_lock(kmp_futex_lock_t
*lck
) {
582 __kmp_init_futex_lock(lck
);
583 lck
->lk
.depth_locked
= 0; // >= 0 for nestable locks, -1 for simple locks
586 void __kmp_destroy_nested_futex_lock(kmp_futex_lock_t
*lck
) {
587 __kmp_destroy_futex_lock(lck
);
588 lck
->lk
.depth_locked
= 0;
591 static void __kmp_destroy_nested_futex_lock_with_checks(kmp_futex_lock_t
*lck
) {
592 char const *const func
= "omp_destroy_nest_lock";
593 if (!__kmp_is_futex_lock_nestable(lck
)) {
594 KMP_FATAL(LockSimpleUsedAsNestable
, func
);
596 if (__kmp_get_futex_lock_owner(lck
) != -1) {
597 KMP_FATAL(LockStillOwned
, func
);
599 __kmp_destroy_nested_futex_lock(lck
);
602 #endif // KMP_USE_FUTEX
604 /* ------------------------------------------------------------------------ */
605 /* ticket (bakery) locks */
607 static kmp_int32
__kmp_get_ticket_lock_owner(kmp_ticket_lock_t
*lck
) {
608 return std::atomic_load_explicit(&lck
->lk
.owner_id
,
609 std::memory_order_relaxed
) -
613 static inline bool __kmp_is_ticket_lock_nestable(kmp_ticket_lock_t
*lck
) {
614 return std::atomic_load_explicit(&lck
->lk
.depth_locked
,
615 std::memory_order_relaxed
) != -1;
618 static kmp_uint32
__kmp_bakery_check(void *now_serving
, kmp_uint32 my_ticket
) {
619 return std::atomic_load_explicit((std::atomic
<unsigned> *)now_serving
,
620 std::memory_order_acquire
) == my_ticket
;
623 __forceinline
static int
624 __kmp_acquire_ticket_lock_timed_template(kmp_ticket_lock_t
*lck
,
626 kmp_uint32 my_ticket
= std::atomic_fetch_add_explicit(
627 &lck
->lk
.next_ticket
, 1U, std::memory_order_relaxed
);
629 #ifdef USE_LOCK_PROFILE
630 if (std::atomic_load_explicit(&lck
->lk
.now_serving
,
631 std::memory_order_relaxed
) != my_ticket
)
632 __kmp_printf("LOCK CONTENTION: %p\n", lck
);
633 /* else __kmp_printf( "." );*/
634 #endif /* USE_LOCK_PROFILE */
636 if (std::atomic_load_explicit(&lck
->lk
.now_serving
,
637 std::memory_order_acquire
) == my_ticket
) {
638 return KMP_LOCK_ACQUIRED_FIRST
;
640 KMP_WAIT_PTR(&lck
->lk
.now_serving
, my_ticket
, __kmp_bakery_check
, lck
);
641 return KMP_LOCK_ACQUIRED_FIRST
;
644 int __kmp_acquire_ticket_lock(kmp_ticket_lock_t
*lck
, kmp_int32 gtid
) {
645 int retval
= __kmp_acquire_ticket_lock_timed_template(lck
, gtid
);
649 static int __kmp_acquire_ticket_lock_with_checks(kmp_ticket_lock_t
*lck
,
651 char const *const func
= "omp_set_lock";
653 if (!std::atomic_load_explicit(&lck
->lk
.initialized
,
654 std::memory_order_relaxed
)) {
655 KMP_FATAL(LockIsUninitialized
, func
);
657 if (lck
->lk
.self
!= lck
) {
658 KMP_FATAL(LockIsUninitialized
, func
);
660 if (__kmp_is_ticket_lock_nestable(lck
)) {
661 KMP_FATAL(LockNestableUsedAsSimple
, func
);
663 if ((gtid
>= 0) && (__kmp_get_ticket_lock_owner(lck
) == gtid
)) {
664 KMP_FATAL(LockIsAlreadyOwned
, func
);
667 __kmp_acquire_ticket_lock(lck
, gtid
);
669 std::atomic_store_explicit(&lck
->lk
.owner_id
, gtid
+ 1,
670 std::memory_order_relaxed
);
671 return KMP_LOCK_ACQUIRED_FIRST
;
674 int __kmp_test_ticket_lock(kmp_ticket_lock_t
*lck
, kmp_int32 gtid
) {
675 kmp_uint32 my_ticket
= std::atomic_load_explicit(&lck
->lk
.next_ticket
,
676 std::memory_order_relaxed
);
678 if (std::atomic_load_explicit(&lck
->lk
.now_serving
,
679 std::memory_order_relaxed
) == my_ticket
) {
680 kmp_uint32 next_ticket
= my_ticket
+ 1;
681 if (std::atomic_compare_exchange_strong_explicit(
682 &lck
->lk
.next_ticket
, &my_ticket
, next_ticket
,
683 std::memory_order_acquire
, std::memory_order_acquire
)) {
690 static int __kmp_test_ticket_lock_with_checks(kmp_ticket_lock_t
*lck
,
692 char const *const func
= "omp_test_lock";
694 if (!std::atomic_load_explicit(&lck
->lk
.initialized
,
695 std::memory_order_relaxed
)) {
696 KMP_FATAL(LockIsUninitialized
, func
);
698 if (lck
->lk
.self
!= lck
) {
699 KMP_FATAL(LockIsUninitialized
, func
);
701 if (__kmp_is_ticket_lock_nestable(lck
)) {
702 KMP_FATAL(LockNestableUsedAsSimple
, func
);
705 int retval
= __kmp_test_ticket_lock(lck
, gtid
);
708 std::atomic_store_explicit(&lck
->lk
.owner_id
, gtid
+ 1,
709 std::memory_order_relaxed
);
714 int __kmp_release_ticket_lock(kmp_ticket_lock_t
*lck
, kmp_int32 gtid
) {
715 kmp_uint32 distance
= std::atomic_load_explicit(&lck
->lk
.next_ticket
,
716 std::memory_order_relaxed
) -
717 std::atomic_load_explicit(&lck
->lk
.now_serving
,
718 std::memory_order_relaxed
);
720 std::atomic_fetch_add_explicit(&lck
->lk
.now_serving
, 1U,
721 std::memory_order_release
);
724 (kmp_uint32
)(__kmp_avail_proc
? __kmp_avail_proc
: __kmp_xproc
));
725 return KMP_LOCK_RELEASED
;
728 static int __kmp_release_ticket_lock_with_checks(kmp_ticket_lock_t
*lck
,
730 char const *const func
= "omp_unset_lock";
732 if (!std::atomic_load_explicit(&lck
->lk
.initialized
,
733 std::memory_order_relaxed
)) {
734 KMP_FATAL(LockIsUninitialized
, func
);
736 if (lck
->lk
.self
!= lck
) {
737 KMP_FATAL(LockIsUninitialized
, func
);
739 if (__kmp_is_ticket_lock_nestable(lck
)) {
740 KMP_FATAL(LockNestableUsedAsSimple
, func
);
742 if (__kmp_get_ticket_lock_owner(lck
) == -1) {
743 KMP_FATAL(LockUnsettingFree
, func
);
745 if ((gtid
>= 0) && (__kmp_get_ticket_lock_owner(lck
) >= 0) &&
746 (__kmp_get_ticket_lock_owner(lck
) != gtid
)) {
747 KMP_FATAL(LockUnsettingSetByAnother
, func
);
749 std::atomic_store_explicit(&lck
->lk
.owner_id
, 0, std::memory_order_relaxed
);
750 return __kmp_release_ticket_lock(lck
, gtid
);
753 void __kmp_init_ticket_lock(kmp_ticket_lock_t
*lck
) {
754 lck
->lk
.location
= NULL
;
756 std::atomic_store_explicit(&lck
->lk
.next_ticket
, 0U,
757 std::memory_order_relaxed
);
758 std::atomic_store_explicit(&lck
->lk
.now_serving
, 0U,
759 std::memory_order_relaxed
);
760 std::atomic_store_explicit(
761 &lck
->lk
.owner_id
, 0,
762 std::memory_order_relaxed
); // no thread owns the lock.
763 std::atomic_store_explicit(
764 &lck
->lk
.depth_locked
, -1,
765 std::memory_order_relaxed
); // -1 => not a nested lock.
766 std::atomic_store_explicit(&lck
->lk
.initialized
, true,
767 std::memory_order_release
);
770 void __kmp_destroy_ticket_lock(kmp_ticket_lock_t
*lck
) {
771 std::atomic_store_explicit(&lck
->lk
.initialized
, false,
772 std::memory_order_release
);
774 lck
->lk
.location
= NULL
;
775 std::atomic_store_explicit(&lck
->lk
.next_ticket
, 0U,
776 std::memory_order_relaxed
);
777 std::atomic_store_explicit(&lck
->lk
.now_serving
, 0U,
778 std::memory_order_relaxed
);
779 std::atomic_store_explicit(&lck
->lk
.owner_id
, 0, std::memory_order_relaxed
);
780 std::atomic_store_explicit(&lck
->lk
.depth_locked
, -1,
781 std::memory_order_relaxed
);
784 static void __kmp_destroy_ticket_lock_with_checks(kmp_ticket_lock_t
*lck
) {
785 char const *const func
= "omp_destroy_lock";
787 if (!std::atomic_load_explicit(&lck
->lk
.initialized
,
788 std::memory_order_relaxed
)) {
789 KMP_FATAL(LockIsUninitialized
, func
);
791 if (lck
->lk
.self
!= lck
) {
792 KMP_FATAL(LockIsUninitialized
, func
);
794 if (__kmp_is_ticket_lock_nestable(lck
)) {
795 KMP_FATAL(LockNestableUsedAsSimple
, func
);
797 if (__kmp_get_ticket_lock_owner(lck
) != -1) {
798 KMP_FATAL(LockStillOwned
, func
);
800 __kmp_destroy_ticket_lock(lck
);
803 // nested ticket locks
805 int __kmp_acquire_nested_ticket_lock(kmp_ticket_lock_t
*lck
, kmp_int32 gtid
) {
806 KMP_DEBUG_ASSERT(gtid
>= 0);
808 if (__kmp_get_ticket_lock_owner(lck
) == gtid
) {
809 std::atomic_fetch_add_explicit(&lck
->lk
.depth_locked
, 1,
810 std::memory_order_relaxed
);
811 return KMP_LOCK_ACQUIRED_NEXT
;
813 __kmp_acquire_ticket_lock_timed_template(lck
, gtid
);
814 std::atomic_store_explicit(&lck
->lk
.depth_locked
, 1,
815 std::memory_order_relaxed
);
816 std::atomic_store_explicit(&lck
->lk
.owner_id
, gtid
+ 1,
817 std::memory_order_relaxed
);
818 return KMP_LOCK_ACQUIRED_FIRST
;
822 static int __kmp_acquire_nested_ticket_lock_with_checks(kmp_ticket_lock_t
*lck
,
824 char const *const func
= "omp_set_nest_lock";
826 if (!std::atomic_load_explicit(&lck
->lk
.initialized
,
827 std::memory_order_relaxed
)) {
828 KMP_FATAL(LockIsUninitialized
, func
);
830 if (lck
->lk
.self
!= lck
) {
831 KMP_FATAL(LockIsUninitialized
, func
);
833 if (!__kmp_is_ticket_lock_nestable(lck
)) {
834 KMP_FATAL(LockSimpleUsedAsNestable
, func
);
836 return __kmp_acquire_nested_ticket_lock(lck
, gtid
);
839 int __kmp_test_nested_ticket_lock(kmp_ticket_lock_t
*lck
, kmp_int32 gtid
) {
842 KMP_DEBUG_ASSERT(gtid
>= 0);
844 if (__kmp_get_ticket_lock_owner(lck
) == gtid
) {
845 retval
= std::atomic_fetch_add_explicit(&lck
->lk
.depth_locked
, 1,
846 std::memory_order_relaxed
) +
848 } else if (!__kmp_test_ticket_lock(lck
, gtid
)) {
851 std::atomic_store_explicit(&lck
->lk
.depth_locked
, 1,
852 std::memory_order_relaxed
);
853 std::atomic_store_explicit(&lck
->lk
.owner_id
, gtid
+ 1,
854 std::memory_order_relaxed
);
860 static int __kmp_test_nested_ticket_lock_with_checks(kmp_ticket_lock_t
*lck
,
862 char const *const func
= "omp_test_nest_lock";
864 if (!std::atomic_load_explicit(&lck
->lk
.initialized
,
865 std::memory_order_relaxed
)) {
866 KMP_FATAL(LockIsUninitialized
, func
);
868 if (lck
->lk
.self
!= lck
) {
869 KMP_FATAL(LockIsUninitialized
, func
);
871 if (!__kmp_is_ticket_lock_nestable(lck
)) {
872 KMP_FATAL(LockSimpleUsedAsNestable
, func
);
874 return __kmp_test_nested_ticket_lock(lck
, gtid
);
877 int __kmp_release_nested_ticket_lock(kmp_ticket_lock_t
*lck
, kmp_int32 gtid
) {
878 KMP_DEBUG_ASSERT(gtid
>= 0);
880 if ((std::atomic_fetch_add_explicit(&lck
->lk
.depth_locked
, -1,
881 std::memory_order_relaxed
) -
883 std::atomic_store_explicit(&lck
->lk
.owner_id
, 0, std::memory_order_relaxed
);
884 __kmp_release_ticket_lock(lck
, gtid
);
885 return KMP_LOCK_RELEASED
;
887 return KMP_LOCK_STILL_HELD
;
890 static int __kmp_release_nested_ticket_lock_with_checks(kmp_ticket_lock_t
*lck
,
892 char const *const func
= "omp_unset_nest_lock";
894 if (!std::atomic_load_explicit(&lck
->lk
.initialized
,
895 std::memory_order_relaxed
)) {
896 KMP_FATAL(LockIsUninitialized
, func
);
898 if (lck
->lk
.self
!= lck
) {
899 KMP_FATAL(LockIsUninitialized
, func
);
901 if (!__kmp_is_ticket_lock_nestable(lck
)) {
902 KMP_FATAL(LockSimpleUsedAsNestable
, func
);
904 if (__kmp_get_ticket_lock_owner(lck
) == -1) {
905 KMP_FATAL(LockUnsettingFree
, func
);
907 if (__kmp_get_ticket_lock_owner(lck
) != gtid
) {
908 KMP_FATAL(LockUnsettingSetByAnother
, func
);
910 return __kmp_release_nested_ticket_lock(lck
, gtid
);
913 void __kmp_init_nested_ticket_lock(kmp_ticket_lock_t
*lck
) {
914 __kmp_init_ticket_lock(lck
);
915 std::atomic_store_explicit(&lck
->lk
.depth_locked
, 0,
916 std::memory_order_relaxed
);
917 // >= 0 for nestable locks, -1 for simple locks
920 void __kmp_destroy_nested_ticket_lock(kmp_ticket_lock_t
*lck
) {
921 __kmp_destroy_ticket_lock(lck
);
922 std::atomic_store_explicit(&lck
->lk
.depth_locked
, 0,
923 std::memory_order_relaxed
);
927 __kmp_destroy_nested_ticket_lock_with_checks(kmp_ticket_lock_t
*lck
) {
928 char const *const func
= "omp_destroy_nest_lock";
930 if (!std::atomic_load_explicit(&lck
->lk
.initialized
,
931 std::memory_order_relaxed
)) {
932 KMP_FATAL(LockIsUninitialized
, func
);
934 if (lck
->lk
.self
!= lck
) {
935 KMP_FATAL(LockIsUninitialized
, func
);
937 if (!__kmp_is_ticket_lock_nestable(lck
)) {
938 KMP_FATAL(LockSimpleUsedAsNestable
, func
);
940 if (__kmp_get_ticket_lock_owner(lck
) != -1) {
941 KMP_FATAL(LockStillOwned
, func
);
943 __kmp_destroy_nested_ticket_lock(lck
);
946 // access functions to fields which don't exist for all lock kinds.
948 static const ident_t
*__kmp_get_ticket_lock_location(kmp_ticket_lock_t
*lck
) {
949 return lck
->lk
.location
;
952 static void __kmp_set_ticket_lock_location(kmp_ticket_lock_t
*lck
,
953 const ident_t
*loc
) {
954 lck
->lk
.location
= loc
;
957 static kmp_lock_flags_t
__kmp_get_ticket_lock_flags(kmp_ticket_lock_t
*lck
) {
958 return lck
->lk
.flags
;
961 static void __kmp_set_ticket_lock_flags(kmp_ticket_lock_t
*lck
,
962 kmp_lock_flags_t flags
) {
963 lck
->lk
.flags
= flags
;
966 /* ------------------------------------------------------------------------ */
970 (head,tail) = 0, 0 means lock is unheld, nobody on queue
971 UINT_MAX or -1, 0 means lock is held, nobody on queue
972 h, h means lock held or about to transition,
974 h, t h <> t, means lock is held or about to
975 transition, >1 elements on queue
980 Acquire(-1,0) = h ,h h > 0
982 Acquire(h,h) = h ,t h > 0, t > 0, h <> t
983 Release(h,h) = -1 ,0 h > 0
984 Acquire(h,t) = h ,t' h > 0, t > 0, t' > 0, h <> t, h <> t', t <> t'
985 Release(h,t) = h',t h > 0, t > 0, h <> t, h <> h', h' maybe = t
990 | 0, 0|------- release -------> Error
1014 | h, t|----- acquire, release loopback ---+
1018 +------------------------------------+
1021 #ifdef DEBUG_QUEUING_LOCKS
1023 /* Stuff for circular trace buffer */
1024 #define TRACE_BUF_ELE 1024
1025 static char traces
[TRACE_BUF_ELE
][128] = {0};
1027 #define TRACE_LOCK(X, Y) \
1028 KMP_SNPRINTF(traces[tc++ % TRACE_BUF_ELE], 128, "t%d at %s\n", X, Y);
1029 #define TRACE_LOCK_T(X, Y, Z) \
1030 KMP_SNPRINTF(traces[tc++ % TRACE_BUF_ELE], 128, "t%d at %s%d\n", X, Y, Z);
1031 #define TRACE_LOCK_HT(X, Y, Z, Q) \
1032 KMP_SNPRINTF(traces[tc++ % TRACE_BUF_ELE], 128, "t%d at %s %d,%d\n", X, Y, \
1035 static void __kmp_dump_queuing_lock(kmp_info_t
*this_thr
, kmp_int32 gtid
,
1036 kmp_queuing_lock_t
*lck
, kmp_int32 head_id
,
1037 kmp_int32 tail_id
) {
1040 __kmp_printf_no_lock("\n__kmp_dump_queuing_lock: TRACE BEGINS HERE! \n");
1042 i
= tc
% TRACE_BUF_ELE
;
1043 __kmp_printf_no_lock("%s\n", traces
[i
]);
1044 i
= (i
+ 1) % TRACE_BUF_ELE
;
1045 while (i
!= (tc
% TRACE_BUF_ELE
)) {
1046 __kmp_printf_no_lock("%s", traces
[i
]);
1047 i
= (i
+ 1) % TRACE_BUF_ELE
;
1049 __kmp_printf_no_lock("\n");
1051 __kmp_printf_no_lock("\n__kmp_dump_queuing_lock: gtid+1:%d, spin_here:%d, "
1052 "next_wait:%d, head_id:%d, tail_id:%d\n",
1053 gtid
+ 1, this_thr
->th
.th_spin_here
,
1054 this_thr
->th
.th_next_waiting
, head_id
, tail_id
);
1056 __kmp_printf_no_lock("\t\thead: %d ", lck
->lk
.head_id
);
1058 if (lck
->lk
.head_id
>= 1) {
1059 t
= __kmp_threads
[lck
->lk
.head_id
- 1]->th
.th_next_waiting
;
1061 __kmp_printf_no_lock("-> %d ", t
);
1062 t
= __kmp_threads
[t
- 1]->th
.th_next_waiting
;
1065 __kmp_printf_no_lock("; tail: %d ", lck
->lk
.tail_id
);
1066 __kmp_printf_no_lock("\n\n");
1069 #endif /* DEBUG_QUEUING_LOCKS */
1071 static kmp_int32
__kmp_get_queuing_lock_owner(kmp_queuing_lock_t
*lck
) {
1072 return TCR_4(lck
->lk
.owner_id
) - 1;
1075 static inline bool __kmp_is_queuing_lock_nestable(kmp_queuing_lock_t
*lck
) {
1076 return lck
->lk
.depth_locked
!= -1;
1079 /* Acquire a lock using a the queuing lock implementation */
1080 template <bool takeTime
>
1081 /* [TLW] The unused template above is left behind because of what BEB believes
1082 is a potential compiler problem with __forceinline. */
1083 __forceinline
static int
1084 __kmp_acquire_queuing_lock_timed_template(kmp_queuing_lock_t
*lck
,
1086 kmp_info_t
*this_thr
= __kmp_thread_from_gtid(gtid
);
1087 volatile kmp_int32
*head_id_p
= &lck
->lk
.head_id
;
1088 volatile kmp_int32
*tail_id_p
= &lck
->lk
.tail_id
;
1089 volatile kmp_uint32
*spin_here_p
;
1092 ompt_state_t prev_state
= ompt_state_undefined
;
1096 ("__kmp_acquire_queuing_lock: lck:%p, T#%d entering\n", lck
, gtid
));
1098 KMP_FSYNC_PREPARE(lck
);
1099 KMP_DEBUG_ASSERT(this_thr
!= NULL
);
1100 spin_here_p
= &this_thr
->th
.th_spin_here
;
1102 #ifdef DEBUG_QUEUING_LOCKS
1103 TRACE_LOCK(gtid
+ 1, "acq ent");
1105 __kmp_dump_queuing_lock(this_thr
, gtid
, lck
, *head_id_p
, *tail_id_p
);
1106 if (this_thr
->th
.th_next_waiting
!= 0)
1107 __kmp_dump_queuing_lock(this_thr
, gtid
, lck
, *head_id_p
, *tail_id_p
);
1109 KMP_DEBUG_ASSERT(!*spin_here_p
);
1110 KMP_DEBUG_ASSERT(this_thr
->th
.th_next_waiting
== 0);
1112 /* The following st.rel to spin_here_p needs to precede the cmpxchg.acq to
1113 head_id_p that may follow, not just in execution order, but also in
1114 visibility order. This way, when a releasing thread observes the changes to
1115 the queue by this thread, it can rightly assume that spin_here_p has
1116 already been set to TRUE, so that when it sets spin_here_p to FALSE, it is
1117 not premature. If the releasing thread sets spin_here_p to FALSE before
1118 this thread sets it to TRUE, this thread will hang. */
1119 *spin_here_p
= TRUE
; /* before enqueuing to prevent race */
1131 #ifdef DEBUG_QUEUING_LOCKS
1133 TRACE_LOCK_HT(gtid
+ 1, "acq read: ", head
, tail
);
1135 tail
= 0; /* to make sure next link asynchronously read is not set
1136 accidentally; this assignment prevents us from entering the
1137 if ( t > 0 ) condition in the enqueued case below, which is not
1138 necessary for this state transition */
1140 /* try (-1,0)->(tid,tid) */
1141 enqueued
= KMP_COMPARE_AND_STORE_ACQ64((volatile kmp_int64
*)tail_id_p
,
1143 KMP_PACK_64(gtid
+ 1, gtid
+ 1));
1144 #ifdef DEBUG_QUEUING_LOCKS
1146 TRACE_LOCK(gtid
+ 1, "acq enq: (-1,0)->(tid,tid)");
1152 KMP_DEBUG_ASSERT(tail
!= gtid
+ 1);
1154 #ifdef DEBUG_QUEUING_LOCKS
1155 TRACE_LOCK_HT(gtid
+ 1, "acq read: ", head
, tail
);
1161 /* try (h,t) or (h,h)->(h,tid) */
1162 enqueued
= KMP_COMPARE_AND_STORE_ACQ32(tail_id_p
, tail
, gtid
+ 1);
1164 #ifdef DEBUG_QUEUING_LOCKS
1166 TRACE_LOCK(gtid
+ 1, "acq enq: (h,t)->(h,tid)");
1171 case 0: /* empty queue */
1173 kmp_int32 grabbed_lock
;
1175 #ifdef DEBUG_QUEUING_LOCKS
1177 TRACE_LOCK_HT(gtid
+ 1, "acq read: ", head
, tail
);
1179 /* try (0,0)->(-1,0) */
1181 /* only legal transition out of head = 0 is head = -1 with no change to
1183 grabbed_lock
= KMP_COMPARE_AND_STORE_ACQ32(head_id_p
, 0, -1);
1187 *spin_here_p
= FALSE
;
1191 ("__kmp_acquire_queuing_lock: lck:%p, T#%d exiting: no queuing\n",
1193 #ifdef DEBUG_QUEUING_LOCKS
1194 TRACE_LOCK_HT(gtid
+ 1, "acq exit: ", head
, 0);
1198 if (ompt_enabled
.enabled
&& prev_state
!= ompt_state_undefined
) {
1199 /* change the state before clearing wait_id */
1200 this_thr
->th
.ompt_thread_info
.state
= prev_state
;
1201 this_thr
->th
.ompt_thread_info
.wait_id
= 0;
1205 KMP_FSYNC_ACQUIRED(lck
);
1206 return KMP_LOCK_ACQUIRED_FIRST
; /* lock holder cannot be on queue */
1213 if (ompt_enabled
.enabled
&& prev_state
== ompt_state_undefined
) {
1214 /* this thread will spin; set wait_id before entering wait state */
1215 prev_state
= this_thr
->th
.ompt_thread_info
.state
;
1216 this_thr
->th
.ompt_thread_info
.wait_id
= (uint64_t)lck
;
1217 this_thr
->th
.ompt_thread_info
.state
= ompt_state_wait_lock
;
1223 kmp_info_t
*tail_thr
= __kmp_thread_from_gtid(tail
- 1);
1224 KMP_ASSERT(tail_thr
!= NULL
);
1225 tail_thr
->th
.th_next_waiting
= gtid
+ 1;
1226 /* corresponding wait for this write in release code */
1229 ("__kmp_acquire_queuing_lock: lck:%p, T#%d waiting for lock\n",
1233 // ToDo: Use __kmp_wait_sleep or similar when blocktime != inf
1234 KMP_WAIT(spin_here_p
, FALSE
, KMP_EQ
, lck
);
1235 // Synchronize writes to both runtime thread structures
1236 // and writes in user code.
1239 #ifdef DEBUG_QUEUING_LOCKS
1240 TRACE_LOCK(gtid
+ 1, "acq spin");
1242 if (this_thr
->th
.th_next_waiting
!= 0)
1243 __kmp_dump_queuing_lock(this_thr
, gtid
, lck
, *head_id_p
, *tail_id_p
);
1245 KMP_DEBUG_ASSERT(this_thr
->th
.th_next_waiting
== 0);
1246 KA_TRACE(1000, ("__kmp_acquire_queuing_lock: lck:%p, T#%d exiting: after "
1247 "waiting on queue\n",
1250 #ifdef DEBUG_QUEUING_LOCKS
1251 TRACE_LOCK(gtid
+ 1, "acq exit 2");
1255 /* change the state before clearing wait_id */
1256 this_thr
->th
.ompt_thread_info
.state
= prev_state
;
1257 this_thr
->th
.ompt_thread_info
.wait_id
= 0;
1260 /* got lock, we were dequeued by the thread that released lock */
1261 return KMP_LOCK_ACQUIRED_FIRST
;
1264 /* Yield if number of threads > number of logical processors */
1265 /* ToDo: Not sure why this should only be in oversubscription case,
1266 maybe should be traditional YIELD_INIT/YIELD_WHEN loop */
1267 KMP_YIELD_OVERSUB();
1269 #ifdef DEBUG_QUEUING_LOCKS
1270 TRACE_LOCK(gtid
+ 1, "acq retry");
1273 KMP_ASSERT2(0, "should not get here");
1274 return KMP_LOCK_ACQUIRED_FIRST
;
1277 int __kmp_acquire_queuing_lock(kmp_queuing_lock_t
*lck
, kmp_int32 gtid
) {
1278 KMP_DEBUG_ASSERT(gtid
>= 0);
1280 int retval
= __kmp_acquire_queuing_lock_timed_template
<false>(lck
, gtid
);
1284 static int __kmp_acquire_queuing_lock_with_checks(kmp_queuing_lock_t
*lck
,
1286 char const *const func
= "omp_set_lock";
1287 if (lck
->lk
.initialized
!= lck
) {
1288 KMP_FATAL(LockIsUninitialized
, func
);
1290 if (__kmp_is_queuing_lock_nestable(lck
)) {
1291 KMP_FATAL(LockNestableUsedAsSimple
, func
);
1293 if (__kmp_get_queuing_lock_owner(lck
) == gtid
) {
1294 KMP_FATAL(LockIsAlreadyOwned
, func
);
1297 __kmp_acquire_queuing_lock(lck
, gtid
);
1299 lck
->lk
.owner_id
= gtid
+ 1;
1300 return KMP_LOCK_ACQUIRED_FIRST
;
1303 int __kmp_test_queuing_lock(kmp_queuing_lock_t
*lck
, kmp_int32 gtid
) {
1304 volatile kmp_int32
*head_id_p
= &lck
->lk
.head_id
;
1307 kmp_info_t
*this_thr
;
1310 KA_TRACE(1000, ("__kmp_test_queuing_lock: T#%d entering\n", gtid
));
1311 KMP_DEBUG_ASSERT(gtid
>= 0);
1313 this_thr
= __kmp_thread_from_gtid(gtid
);
1314 KMP_DEBUG_ASSERT(this_thr
!= NULL
);
1315 KMP_DEBUG_ASSERT(!this_thr
->th
.th_spin_here
);
1320 if (head
== 0) { /* nobody on queue, nobody holding */
1321 /* try (0,0)->(-1,0) */
1322 if (KMP_COMPARE_AND_STORE_ACQ32(head_id_p
, 0, -1)) {
1324 ("__kmp_test_queuing_lock: T#%d exiting: holding lock\n", gtid
));
1325 KMP_FSYNC_ACQUIRED(lck
);
1331 ("__kmp_test_queuing_lock: T#%d exiting: without lock\n", gtid
));
1335 static int __kmp_test_queuing_lock_with_checks(kmp_queuing_lock_t
*lck
,
1337 char const *const func
= "omp_test_lock";
1338 if (lck
->lk
.initialized
!= lck
) {
1339 KMP_FATAL(LockIsUninitialized
, func
);
1341 if (__kmp_is_queuing_lock_nestable(lck
)) {
1342 KMP_FATAL(LockNestableUsedAsSimple
, func
);
1345 int retval
= __kmp_test_queuing_lock(lck
, gtid
);
1348 lck
->lk
.owner_id
= gtid
+ 1;
1353 int __kmp_release_queuing_lock(kmp_queuing_lock_t
*lck
, kmp_int32 gtid
) {
1354 volatile kmp_int32
*head_id_p
= &lck
->lk
.head_id
;
1355 volatile kmp_int32
*tail_id_p
= &lck
->lk
.tail_id
;
1358 ("__kmp_release_queuing_lock: lck:%p, T#%d entering\n", lck
, gtid
));
1359 KMP_DEBUG_ASSERT(gtid
>= 0);
1360 #if KMP_DEBUG || DEBUG_QUEUING_LOCKS
1361 kmp_info_t
*this_thr
= __kmp_thread_from_gtid(gtid
);
1363 KMP_DEBUG_ASSERT(this_thr
!= NULL
);
1364 #ifdef DEBUG_QUEUING_LOCKS
1365 TRACE_LOCK(gtid
+ 1, "rel ent");
1367 if (this_thr
->th
.th_spin_here
)
1368 __kmp_dump_queuing_lock(this_thr
, gtid
, lck
, *head_id_p
, *tail_id_p
);
1369 if (this_thr
->th
.th_next_waiting
!= 0)
1370 __kmp_dump_queuing_lock(this_thr
, gtid
, lck
, *head_id_p
, *tail_id_p
);
1372 KMP_DEBUG_ASSERT(!this_thr
->th
.th_spin_here
);
1373 KMP_DEBUG_ASSERT(this_thr
->th
.th_next_waiting
== 0);
1375 KMP_FSYNC_RELEASING(lck
);
1384 #ifdef DEBUG_QUEUING_LOCKS
1386 TRACE_LOCK_HT(gtid
+ 1, "rel read: ", head
, tail
);
1388 __kmp_dump_queuing_lock(this_thr
, gtid
, lck
, head
, tail
);
1390 KMP_DEBUG_ASSERT(head
!=
1391 0); /* holding the lock, head must be -1 or queue head */
1393 if (head
== -1) { /* nobody on queue */
1394 /* try (-1,0)->(0,0) */
1395 if (KMP_COMPARE_AND_STORE_REL32(head_id_p
, -1, 0)) {
1398 ("__kmp_release_queuing_lock: lck:%p, T#%d exiting: queue empty\n",
1400 #ifdef DEBUG_QUEUING_LOCKS
1401 TRACE_LOCK_HT(gtid
+ 1, "rel exit: ", 0, 0);
1405 /* nothing to do - no other thread is trying to shift blame */
1407 return KMP_LOCK_RELEASED
;
1413 if (head
== tail
) { /* only one thread on the queue */
1414 #ifdef DEBUG_QUEUING_LOCKS
1416 __kmp_dump_queuing_lock(this_thr
, gtid
, lck
, head
, tail
);
1418 KMP_DEBUG_ASSERT(head
> 0);
1420 /* try (h,h)->(-1,0) */
1421 dequeued
= KMP_COMPARE_AND_STORE_REL64(
1422 RCAST(volatile kmp_int64
*, tail_id_p
), KMP_PACK_64(head
, head
),
1423 KMP_PACK_64(-1, 0));
1424 #ifdef DEBUG_QUEUING_LOCKS
1425 TRACE_LOCK(gtid
+ 1, "rel deq: (h,h)->(-1,0)");
1429 volatile kmp_int32
*waiting_id_p
;
1430 kmp_info_t
*head_thr
= __kmp_thread_from_gtid(head
- 1);
1431 KMP_DEBUG_ASSERT(head_thr
!= NULL
);
1432 waiting_id_p
= &head_thr
->th
.th_next_waiting
;
1434 /* Does this require synchronous reads? */
1435 #ifdef DEBUG_QUEUING_LOCKS
1436 if (head
<= 0 || tail
<= 0)
1437 __kmp_dump_queuing_lock(this_thr
, gtid
, lck
, head
, tail
);
1439 KMP_DEBUG_ASSERT(head
> 0 && tail
> 0);
1441 /* try (h,t)->(h',t) or (t,t) */
1443 /* make sure enqueuing thread has time to update next waiting thread
1446 KMP_WAIT((volatile kmp_uint32
*)waiting_id_p
, 0, KMP_NEQ
, NULL
);
1447 #ifdef DEBUG_QUEUING_LOCKS
1448 TRACE_LOCK(gtid
+ 1, "rel deq: (h,t)->(h',t)");
1455 kmp_info_t
*head_thr
= __kmp_thread_from_gtid(head
- 1);
1456 KMP_DEBUG_ASSERT(head_thr
!= NULL
);
1458 /* Does this require synchronous reads? */
1459 #ifdef DEBUG_QUEUING_LOCKS
1460 if (head
<= 0 || tail
<= 0)
1461 __kmp_dump_queuing_lock(this_thr
, gtid
, lck
, head
, tail
);
1463 KMP_DEBUG_ASSERT(head
> 0 && tail
> 0);
1465 /* For clean code only. Thread not released until next statement prevents
1466 race with acquire code. */
1467 head_thr
->th
.th_next_waiting
= 0;
1468 #ifdef DEBUG_QUEUING_LOCKS
1469 TRACE_LOCK_T(gtid
+ 1, "rel nw=0 for t=", head
);
1473 /* reset spin value */
1474 head_thr
->th
.th_spin_here
= FALSE
;
1476 KA_TRACE(1000, ("__kmp_release_queuing_lock: lck:%p, T#%d exiting: after "
1479 #ifdef DEBUG_QUEUING_LOCKS
1480 TRACE_LOCK(gtid
+ 1, "rel exit 2");
1482 return KMP_LOCK_RELEASED
;
1484 /* KMP_CPU_PAUSE(); don't want to make releasing thread hold up acquiring
1487 #ifdef DEBUG_QUEUING_LOCKS
1488 TRACE_LOCK(gtid
+ 1, "rel retry");
1492 KMP_ASSERT2(0, "should not get here");
1493 return KMP_LOCK_RELEASED
;
1496 static int __kmp_release_queuing_lock_with_checks(kmp_queuing_lock_t
*lck
,
1498 char const *const func
= "omp_unset_lock";
1499 KMP_MB(); /* in case another processor initialized lock */
1500 if (lck
->lk
.initialized
!= lck
) {
1501 KMP_FATAL(LockIsUninitialized
, func
);
1503 if (__kmp_is_queuing_lock_nestable(lck
)) {
1504 KMP_FATAL(LockNestableUsedAsSimple
, func
);
1506 if (__kmp_get_queuing_lock_owner(lck
) == -1) {
1507 KMP_FATAL(LockUnsettingFree
, func
);
1509 if (__kmp_get_queuing_lock_owner(lck
) != gtid
) {
1510 KMP_FATAL(LockUnsettingSetByAnother
, func
);
1512 lck
->lk
.owner_id
= 0;
1513 return __kmp_release_queuing_lock(lck
, gtid
);
1516 void __kmp_init_queuing_lock(kmp_queuing_lock_t
*lck
) {
1517 lck
->lk
.location
= NULL
;
1518 lck
->lk
.head_id
= 0;
1519 lck
->lk
.tail_id
= 0;
1520 lck
->lk
.next_ticket
= 0;
1521 lck
->lk
.now_serving
= 0;
1522 lck
->lk
.owner_id
= 0; // no thread owns the lock.
1523 lck
->lk
.depth_locked
= -1; // >= 0 for nestable locks, -1 for simple locks.
1524 lck
->lk
.initialized
= lck
;
1526 KA_TRACE(1000, ("__kmp_init_queuing_lock: lock %p initialized\n", lck
));
1529 void __kmp_destroy_queuing_lock(kmp_queuing_lock_t
*lck
) {
1530 lck
->lk
.initialized
= NULL
;
1531 lck
->lk
.location
= NULL
;
1532 lck
->lk
.head_id
= 0;
1533 lck
->lk
.tail_id
= 0;
1534 lck
->lk
.next_ticket
= 0;
1535 lck
->lk
.now_serving
= 0;
1536 lck
->lk
.owner_id
= 0;
1537 lck
->lk
.depth_locked
= -1;
1540 static void __kmp_destroy_queuing_lock_with_checks(kmp_queuing_lock_t
*lck
) {
1541 char const *const func
= "omp_destroy_lock";
1542 if (lck
->lk
.initialized
!= lck
) {
1543 KMP_FATAL(LockIsUninitialized
, func
);
1545 if (__kmp_is_queuing_lock_nestable(lck
)) {
1546 KMP_FATAL(LockNestableUsedAsSimple
, func
);
1548 if (__kmp_get_queuing_lock_owner(lck
) != -1) {
1549 KMP_FATAL(LockStillOwned
, func
);
1551 __kmp_destroy_queuing_lock(lck
);
1554 // nested queuing locks
1556 int __kmp_acquire_nested_queuing_lock(kmp_queuing_lock_t
*lck
, kmp_int32 gtid
) {
1557 KMP_DEBUG_ASSERT(gtid
>= 0);
1559 if (__kmp_get_queuing_lock_owner(lck
) == gtid
) {
1560 lck
->lk
.depth_locked
+= 1;
1561 return KMP_LOCK_ACQUIRED_NEXT
;
1563 __kmp_acquire_queuing_lock_timed_template
<false>(lck
, gtid
);
1565 lck
->lk
.depth_locked
= 1;
1567 lck
->lk
.owner_id
= gtid
+ 1;
1568 return KMP_LOCK_ACQUIRED_FIRST
;
1573 __kmp_acquire_nested_queuing_lock_with_checks(kmp_queuing_lock_t
*lck
,
1575 char const *const func
= "omp_set_nest_lock";
1576 if (lck
->lk
.initialized
!= lck
) {
1577 KMP_FATAL(LockIsUninitialized
, func
);
1579 if (!__kmp_is_queuing_lock_nestable(lck
)) {
1580 KMP_FATAL(LockSimpleUsedAsNestable
, func
);
1582 return __kmp_acquire_nested_queuing_lock(lck
, gtid
);
1585 int __kmp_test_nested_queuing_lock(kmp_queuing_lock_t
*lck
, kmp_int32 gtid
) {
1588 KMP_DEBUG_ASSERT(gtid
>= 0);
1590 if (__kmp_get_queuing_lock_owner(lck
) == gtid
) {
1591 retval
= ++lck
->lk
.depth_locked
;
1592 } else if (!__kmp_test_queuing_lock(lck
, gtid
)) {
1596 retval
= lck
->lk
.depth_locked
= 1;
1598 lck
->lk
.owner_id
= gtid
+ 1;
1603 static int __kmp_test_nested_queuing_lock_with_checks(kmp_queuing_lock_t
*lck
,
1605 char const *const func
= "omp_test_nest_lock";
1606 if (lck
->lk
.initialized
!= lck
) {
1607 KMP_FATAL(LockIsUninitialized
, func
);
1609 if (!__kmp_is_queuing_lock_nestable(lck
)) {
1610 KMP_FATAL(LockSimpleUsedAsNestable
, func
);
1612 return __kmp_test_nested_queuing_lock(lck
, gtid
);
1615 int __kmp_release_nested_queuing_lock(kmp_queuing_lock_t
*lck
, kmp_int32 gtid
) {
1616 KMP_DEBUG_ASSERT(gtid
>= 0);
1619 if (--(lck
->lk
.depth_locked
) == 0) {
1621 lck
->lk
.owner_id
= 0;
1622 __kmp_release_queuing_lock(lck
, gtid
);
1623 return KMP_LOCK_RELEASED
;
1625 return KMP_LOCK_STILL_HELD
;
1629 __kmp_release_nested_queuing_lock_with_checks(kmp_queuing_lock_t
*lck
,
1631 char const *const func
= "omp_unset_nest_lock";
1632 KMP_MB(); /* in case another processor initialized lock */
1633 if (lck
->lk
.initialized
!= lck
) {
1634 KMP_FATAL(LockIsUninitialized
, func
);
1636 if (!__kmp_is_queuing_lock_nestable(lck
)) {
1637 KMP_FATAL(LockSimpleUsedAsNestable
, func
);
1639 if (__kmp_get_queuing_lock_owner(lck
) == -1) {
1640 KMP_FATAL(LockUnsettingFree
, func
);
1642 if (__kmp_get_queuing_lock_owner(lck
) != gtid
) {
1643 KMP_FATAL(LockUnsettingSetByAnother
, func
);
1645 return __kmp_release_nested_queuing_lock(lck
, gtid
);
1648 void __kmp_init_nested_queuing_lock(kmp_queuing_lock_t
*lck
) {
1649 __kmp_init_queuing_lock(lck
);
1650 lck
->lk
.depth_locked
= 0; // >= 0 for nestable locks, -1 for simple locks
1653 void __kmp_destroy_nested_queuing_lock(kmp_queuing_lock_t
*lck
) {
1654 __kmp_destroy_queuing_lock(lck
);
1655 lck
->lk
.depth_locked
= 0;
1659 __kmp_destroy_nested_queuing_lock_with_checks(kmp_queuing_lock_t
*lck
) {
1660 char const *const func
= "omp_destroy_nest_lock";
1661 if (lck
->lk
.initialized
!= lck
) {
1662 KMP_FATAL(LockIsUninitialized
, func
);
1664 if (!__kmp_is_queuing_lock_nestable(lck
)) {
1665 KMP_FATAL(LockSimpleUsedAsNestable
, func
);
1667 if (__kmp_get_queuing_lock_owner(lck
) != -1) {
1668 KMP_FATAL(LockStillOwned
, func
);
1670 __kmp_destroy_nested_queuing_lock(lck
);
1673 // access functions to fields which don't exist for all lock kinds.
1675 static const ident_t
*__kmp_get_queuing_lock_location(kmp_queuing_lock_t
*lck
) {
1676 return lck
->lk
.location
;
1679 static void __kmp_set_queuing_lock_location(kmp_queuing_lock_t
*lck
,
1680 const ident_t
*loc
) {
1681 lck
->lk
.location
= loc
;
1684 static kmp_lock_flags_t
__kmp_get_queuing_lock_flags(kmp_queuing_lock_t
*lck
) {
1685 return lck
->lk
.flags
;
1688 static void __kmp_set_queuing_lock_flags(kmp_queuing_lock_t
*lck
,
1689 kmp_lock_flags_t flags
) {
1690 lck
->lk
.flags
= flags
;
1693 #if KMP_USE_ADAPTIVE_LOCKS
1695 /* RTM Adaptive locks */
1697 #if KMP_HAVE_RTM_INTRINSICS
1698 #include <immintrin.h>
1699 #define SOFT_ABORT_MASK (_XABORT_RETRY | _XABORT_CONFLICT | _XABORT_EXPLICIT)
1703 // Values from the status register after failed speculation.
1704 #define _XBEGIN_STARTED (~0u)
1705 #define _XABORT_EXPLICIT (1 << 0)
1706 #define _XABORT_RETRY (1 << 1)
1707 #define _XABORT_CONFLICT (1 << 2)
1708 #define _XABORT_CAPACITY (1 << 3)
1709 #define _XABORT_DEBUG (1 << 4)
1710 #define _XABORT_NESTED (1 << 5)
1711 #define _XABORT_CODE(x) ((unsigned char)(((x) >> 24) & 0xFF))
1713 // Aborts for which it's worth trying again immediately
1714 #define SOFT_ABORT_MASK (_XABORT_RETRY | _XABORT_CONFLICT | _XABORT_EXPLICIT)
1716 #define STRINGIZE_INTERNAL(arg) #arg
1717 #define STRINGIZE(arg) STRINGIZE_INTERNAL(arg)
1719 // Access to RTM instructions
1720 /*A version of XBegin which returns -1 on speculation, and the value of EAX on
1721 an abort. This is the same definition as the compiler intrinsic that will be
1722 supported at some point. */
1723 static __inline
int _xbegin() {
1751 #endif // KMP_ARCH_X86_64
1753 /* Note that %eax must be noted as killed (clobbered), because the XSR is
1754 returned in %eax(%rax) on abort. Other register values are restored, so
1755 don't need to be killed.
1757 We must also mark 'res' as an input and an output, since otherwise
1758 'res=-1' may be dropped as being dead, whereas we do need the assignment on
1759 the successful (i.e., non-abort) path. */
1760 __asm__
volatile("1: .byte 0xC7; .byte 0xF8;\n"
1763 "1: movl %%eax,%0\n"
1765 : "+r"(res
)::"memory", "%eax");
1766 #endif // KMP_OS_WINDOWS
1770 /* Transaction end */
1771 static __inline
void _xend() {
1779 __asm__
volatile(".byte 0x0f; .byte 0x01; .byte 0xd5" ::: "memory");
1783 /* This is a macro, the argument must be a single byte constant which can be
1784 evaluated by the inline assembler, since it is emitted as a byte into the
1788 #define _xabort(ARG) _asm _emit 0xc6 _asm _emit 0xf8 _asm _emit ARG
1790 #define _xabort(ARG) \
1791 __asm__ volatile(".byte 0xC6; .byte 0xF8; .byte " STRINGIZE(ARG):::"memory");
1794 #endif // KMP_COMPILER_ICC && __INTEL_COMPILER >= 1300
1796 // Statistics is collected for testing purpose
1797 #if KMP_DEBUG_ADAPTIVE_LOCKS
1799 // We accumulate speculative lock statistics when the lock is destroyed. We
1800 // keep locks that haven't been destroyed in the liveLocks list so that we can
1801 // grab their statistics too.
1802 static kmp_adaptive_lock_statistics_t destroyedStats
;
1804 // To hold the list of live locks.
1805 static kmp_adaptive_lock_info_t liveLocks
;
1807 // A lock so we can safely update the list of locks.
1808 static kmp_bootstrap_lock_t chain_lock
=
1809 KMP_BOOTSTRAP_LOCK_INITIALIZER(chain_lock
);
1811 // Initialize the list of stats.
1812 void __kmp_init_speculative_stats() {
1813 kmp_adaptive_lock_info_t
*lck
= &liveLocks
;
1815 memset(CCAST(kmp_adaptive_lock_statistics_t
*, &(lck
->stats
)), 0,
1816 sizeof(lck
->stats
));
1817 lck
->stats
.next
= lck
;
1818 lck
->stats
.prev
= lck
;
1820 KMP_ASSERT(lck
->stats
.next
->stats
.prev
== lck
);
1821 KMP_ASSERT(lck
->stats
.prev
->stats
.next
== lck
);
1823 __kmp_init_bootstrap_lock(&chain_lock
);
1826 // Insert the lock into the circular list
1827 static void __kmp_remember_lock(kmp_adaptive_lock_info_t
*lck
) {
1828 __kmp_acquire_bootstrap_lock(&chain_lock
);
1830 lck
->stats
.next
= liveLocks
.stats
.next
;
1831 lck
->stats
.prev
= &liveLocks
;
1833 liveLocks
.stats
.next
= lck
;
1834 lck
->stats
.next
->stats
.prev
= lck
;
1836 KMP_ASSERT(lck
->stats
.next
->stats
.prev
== lck
);
1837 KMP_ASSERT(lck
->stats
.prev
->stats
.next
== lck
);
1839 __kmp_release_bootstrap_lock(&chain_lock
);
1842 static void __kmp_forget_lock(kmp_adaptive_lock_info_t
*lck
) {
1843 KMP_ASSERT(lck
->stats
.next
->stats
.prev
== lck
);
1844 KMP_ASSERT(lck
->stats
.prev
->stats
.next
== lck
);
1846 kmp_adaptive_lock_info_t
*n
= lck
->stats
.next
;
1847 kmp_adaptive_lock_info_t
*p
= lck
->stats
.prev
;
1853 static void __kmp_zero_speculative_stats(kmp_adaptive_lock_info_t
*lck
) {
1854 memset(CCAST(kmp_adaptive_lock_statistics_t
*, &lck
->stats
), 0,
1855 sizeof(lck
->stats
));
1856 __kmp_remember_lock(lck
);
1859 static void __kmp_add_stats(kmp_adaptive_lock_statistics_t
*t
,
1860 kmp_adaptive_lock_info_t
*lck
) {
1861 kmp_adaptive_lock_statistics_t
volatile *s
= &lck
->stats
;
1863 t
->nonSpeculativeAcquireAttempts
+= lck
->acquire_attempts
;
1864 t
->successfulSpeculations
+= s
->successfulSpeculations
;
1865 t
->hardFailedSpeculations
+= s
->hardFailedSpeculations
;
1866 t
->softFailedSpeculations
+= s
->softFailedSpeculations
;
1867 t
->nonSpeculativeAcquires
+= s
->nonSpeculativeAcquires
;
1868 t
->lemmingYields
+= s
->lemmingYields
;
1871 static void __kmp_accumulate_speculative_stats(kmp_adaptive_lock_info_t
*lck
) {
1872 __kmp_acquire_bootstrap_lock(&chain_lock
);
1874 __kmp_add_stats(&destroyedStats
, lck
);
1875 __kmp_forget_lock(lck
);
1877 __kmp_release_bootstrap_lock(&chain_lock
);
1880 static float percent(kmp_uint32 count
, kmp_uint32 total
) {
1881 return (total
== 0) ? 0.0 : (100.0 * count
) / total
;
1884 void __kmp_print_speculative_stats() {
1885 kmp_adaptive_lock_statistics_t total
= destroyedStats
;
1886 kmp_adaptive_lock_info_t
*lck
;
1888 for (lck
= liveLocks
.stats
.next
; lck
!= &liveLocks
; lck
= lck
->stats
.next
) {
1889 __kmp_add_stats(&total
, lck
);
1891 kmp_adaptive_lock_statistics_t
*t
= &total
;
1892 kmp_uint32 totalSections
=
1893 t
->nonSpeculativeAcquires
+ t
->successfulSpeculations
;
1894 kmp_uint32 totalSpeculations
= t
->successfulSpeculations
+
1895 t
->hardFailedSpeculations
+
1896 t
->softFailedSpeculations
;
1897 if (totalSections
<= 0)
1900 kmp_safe_raii_file_t statsFile
;
1901 if (strcmp(__kmp_speculative_statsfile
, "-") == 0) {
1902 statsFile
.set_stdout();
1904 size_t buffLen
= KMP_STRLEN(__kmp_speculative_statsfile
) + 20;
1905 char buffer
[buffLen
];
1906 KMP_SNPRINTF(&buffer
[0], buffLen
, __kmp_speculative_statsfile
,
1907 (kmp_int32
)getpid());
1908 statsFile
.open(buffer
, "w");
1911 fprintf(statsFile
, "Speculative lock statistics (all approximate!)\n");
1913 " Lock parameters: \n"
1914 " max_soft_retries : %10d\n"
1915 " max_badness : %10d\n",
1916 __kmp_adaptive_backoff_params
.max_soft_retries
,
1917 __kmp_adaptive_backoff_params
.max_badness
);
1918 fprintf(statsFile
, " Non-speculative acquire attempts : %10d\n",
1919 t
->nonSpeculativeAcquireAttempts
);
1920 fprintf(statsFile
, " Total critical sections : %10d\n",
1922 fprintf(statsFile
, " Successful speculations : %10d (%5.1f%%)\n",
1923 t
->successfulSpeculations
,
1924 percent(t
->successfulSpeculations
, totalSections
));
1925 fprintf(statsFile
, " Non-speculative acquires : %10d (%5.1f%%)\n",
1926 t
->nonSpeculativeAcquires
,
1927 percent(t
->nonSpeculativeAcquires
, totalSections
));
1928 fprintf(statsFile
, " Lemming yields : %10d\n\n",
1931 fprintf(statsFile
, " Speculative acquire attempts : %10d\n",
1933 fprintf(statsFile
, " Successes : %10d (%5.1f%%)\n",
1934 t
->successfulSpeculations
,
1935 percent(t
->successfulSpeculations
, totalSpeculations
));
1936 fprintf(statsFile
, " Soft failures : %10d (%5.1f%%)\n",
1937 t
->softFailedSpeculations
,
1938 percent(t
->softFailedSpeculations
, totalSpeculations
));
1939 fprintf(statsFile
, " Hard failures : %10d (%5.1f%%)\n",
1940 t
->hardFailedSpeculations
,
1941 percent(t
->hardFailedSpeculations
, totalSpeculations
));
1944 #define KMP_INC_STAT(lck, stat) (lck->lk.adaptive.stats.stat++)
1946 #define KMP_INC_STAT(lck, stat)
1948 #endif // KMP_DEBUG_ADAPTIVE_LOCKS
1950 static inline bool __kmp_is_unlocked_queuing_lock(kmp_queuing_lock_t
*lck
) {
1951 // It is enough to check that the head_id is zero.
1952 // We don't also need to check the tail.
1953 bool res
= lck
->lk
.head_id
== 0;
1955 // We need a fence here, since we must ensure that no memory operations
1956 // from later in this thread float above that read.
1957 #if KMP_COMPILER_ICC || KMP_COMPILER_ICX
1960 __sync_synchronize();
1966 // Functions for manipulating the badness
1967 static __inline
void
1968 __kmp_update_badness_after_success(kmp_adaptive_lock_t
*lck
) {
1969 // Reset the badness to zero so we eagerly try to speculate again
1970 lck
->lk
.adaptive
.badness
= 0;
1971 KMP_INC_STAT(lck
, successfulSpeculations
);
1974 // Create a bit mask with one more set bit.
1975 static __inline
void __kmp_step_badness(kmp_adaptive_lock_t
*lck
) {
1976 kmp_uint32 newBadness
= (lck
->lk
.adaptive
.badness
<< 1) | 1;
1977 if (newBadness
> lck
->lk
.adaptive
.max_badness
) {
1980 lck
->lk
.adaptive
.badness
= newBadness
;
1984 // Check whether speculation should be attempted.
1985 KMP_ATTRIBUTE_TARGET_RTM
1986 static __inline
int __kmp_should_speculate(kmp_adaptive_lock_t
*lck
,
1988 kmp_uint32 badness
= lck
->lk
.adaptive
.badness
;
1989 kmp_uint32 attempts
= lck
->lk
.adaptive
.acquire_attempts
;
1990 int res
= (attempts
& badness
) == 0;
1994 // Attempt to acquire only the speculative lock.
1995 // Does not back off to the non-speculative lock.
1996 KMP_ATTRIBUTE_TARGET_RTM
1997 static int __kmp_test_adaptive_lock_only(kmp_adaptive_lock_t
*lck
,
1999 int retries
= lck
->lk
.adaptive
.max_soft_retries
;
2001 // We don't explicitly count the start of speculation, rather we record the
2002 // results (success, hard fail, soft fail). The sum of all of those is the
2003 // total number of times we started speculation since all speculations must
2004 // end one of those ways.
2006 kmp_uint32 status
= _xbegin();
2007 // Switch this in to disable actual speculation but exercise at least some
2008 // of the rest of the code. Useful for debugging...
2009 // kmp_uint32 status = _XABORT_NESTED;
2011 if (status
== _XBEGIN_STARTED
) {
2012 /* We have successfully started speculation. Check that no-one acquired
2013 the lock for real between when we last looked and now. This also gets
2014 the lock cache line into our read-set, which we need so that we'll
2015 abort if anyone later claims it for real. */
2016 if (!__kmp_is_unlocked_queuing_lock(GET_QLK_PTR(lck
))) {
2017 // Lock is now visibly acquired, so someone beat us to it. Abort the
2018 // transaction so we'll restart from _xbegin with the failure status.
2020 KMP_ASSERT2(0, "should not get here");
2022 return 1; // Lock has been acquired (speculatively)
2024 // We have aborted, update the statistics
2025 if (status
& SOFT_ABORT_MASK
) {
2026 KMP_INC_STAT(lck
, softFailedSpeculations
);
2027 // and loop round to retry.
2029 KMP_INC_STAT(lck
, hardFailedSpeculations
);
2030 // Give up if we had a hard failure.
2034 } while (retries
--); // Loop while we have retries, and didn't fail hard.
2036 // Either we had a hard failure or we didn't succeed softly after
2037 // the full set of attempts, so back off the badness.
2038 __kmp_step_badness(lck
);
2042 // Attempt to acquire the speculative lock, or back off to the non-speculative
2043 // one if the speculative lock cannot be acquired.
2044 // We can succeed speculatively, non-speculatively, or fail.
2045 static int __kmp_test_adaptive_lock(kmp_adaptive_lock_t
*lck
, kmp_int32 gtid
) {
2046 // First try to acquire the lock speculatively
2047 if (__kmp_should_speculate(lck
, gtid
) &&
2048 __kmp_test_adaptive_lock_only(lck
, gtid
))
2051 // Speculative acquisition failed, so try to acquire it non-speculatively.
2052 // Count the non-speculative acquire attempt
2053 lck
->lk
.adaptive
.acquire_attempts
++;
2055 // Use base, non-speculative lock.
2056 if (__kmp_test_queuing_lock(GET_QLK_PTR(lck
), gtid
)) {
2057 KMP_INC_STAT(lck
, nonSpeculativeAcquires
);
2058 return 1; // Lock is acquired (non-speculatively)
2060 return 0; // Failed to acquire the lock, it's already visibly locked.
2064 static int __kmp_test_adaptive_lock_with_checks(kmp_adaptive_lock_t
*lck
,
2066 char const *const func
= "omp_test_lock";
2067 if (lck
->lk
.qlk
.initialized
!= GET_QLK_PTR(lck
)) {
2068 KMP_FATAL(LockIsUninitialized
, func
);
2071 int retval
= __kmp_test_adaptive_lock(lck
, gtid
);
2074 lck
->lk
.qlk
.owner_id
= gtid
+ 1;
2079 // Block until we can acquire a speculative, adaptive lock. We check whether we
2080 // should be trying to speculate. If we should be, we check the real lock to see
2081 // if it is free, and, if not, pause without attempting to acquire it until it
2082 // is. Then we try the speculative acquire. This means that although we suffer
2083 // from lemmings a little (because all we can't acquire the lock speculatively
2084 // until the queue of threads waiting has cleared), we don't get into a state
2085 // where we can never acquire the lock speculatively (because we force the queue
2086 // to clear by preventing new arrivals from entering the queue). This does mean
2087 // that when we're trying to break lemmings, the lock is no longer fair. However
2088 // OpenMP makes no guarantee that its locks are fair, so this isn't a real
2090 static void __kmp_acquire_adaptive_lock(kmp_adaptive_lock_t
*lck
,
2092 if (__kmp_should_speculate(lck
, gtid
)) {
2093 if (__kmp_is_unlocked_queuing_lock(GET_QLK_PTR(lck
))) {
2094 if (__kmp_test_adaptive_lock_only(lck
, gtid
))
2096 // We tried speculation and failed, so give up.
2098 // We can't try speculation until the lock is free, so we pause here
2099 // (without suspending on the queueing lock, to allow it to drain, then
2100 // try again. All other threads will also see the same result for
2101 // shouldSpeculate, so will be doing the same if they try to claim the
2102 // lock from now on.
2103 while (!__kmp_is_unlocked_queuing_lock(GET_QLK_PTR(lck
))) {
2104 KMP_INC_STAT(lck
, lemmingYields
);
2108 if (__kmp_test_adaptive_lock_only(lck
, gtid
))
2113 // Speculative acquisition failed, so acquire it non-speculatively.
2114 // Count the non-speculative acquire attempt
2115 lck
->lk
.adaptive
.acquire_attempts
++;
2117 __kmp_acquire_queuing_lock_timed_template
<FALSE
>(GET_QLK_PTR(lck
), gtid
);
2118 // We have acquired the base lock, so count that.
2119 KMP_INC_STAT(lck
, nonSpeculativeAcquires
);
2122 static void __kmp_acquire_adaptive_lock_with_checks(kmp_adaptive_lock_t
*lck
,
2124 char const *const func
= "omp_set_lock";
2125 if (lck
->lk
.qlk
.initialized
!= GET_QLK_PTR(lck
)) {
2126 KMP_FATAL(LockIsUninitialized
, func
);
2128 if (__kmp_get_queuing_lock_owner(GET_QLK_PTR(lck
)) == gtid
) {
2129 KMP_FATAL(LockIsAlreadyOwned
, func
);
2132 __kmp_acquire_adaptive_lock(lck
, gtid
);
2134 lck
->lk
.qlk
.owner_id
= gtid
+ 1;
2137 KMP_ATTRIBUTE_TARGET_RTM
2138 static int __kmp_release_adaptive_lock(kmp_adaptive_lock_t
*lck
,
2140 if (__kmp_is_unlocked_queuing_lock(GET_QLK_PTR(
2141 lck
))) { // If the lock doesn't look claimed we must be speculating.
2142 // (Or the user's code is buggy and they're releasing without locking;
2143 // if we had XTEST we'd be able to check that case...)
2144 _xend(); // Exit speculation
2145 __kmp_update_badness_after_success(lck
);
2146 } else { // Since the lock *is* visibly locked we're not speculating,
2147 // so should use the underlying lock's release scheme.
2148 __kmp_release_queuing_lock(GET_QLK_PTR(lck
), gtid
);
2150 return KMP_LOCK_RELEASED
;
2153 static int __kmp_release_adaptive_lock_with_checks(kmp_adaptive_lock_t
*lck
,
2155 char const *const func
= "omp_unset_lock";
2156 KMP_MB(); /* in case another processor initialized lock */
2157 if (lck
->lk
.qlk
.initialized
!= GET_QLK_PTR(lck
)) {
2158 KMP_FATAL(LockIsUninitialized
, func
);
2160 if (__kmp_get_queuing_lock_owner(GET_QLK_PTR(lck
)) == -1) {
2161 KMP_FATAL(LockUnsettingFree
, func
);
2163 if (__kmp_get_queuing_lock_owner(GET_QLK_PTR(lck
)) != gtid
) {
2164 KMP_FATAL(LockUnsettingSetByAnother
, func
);
2166 lck
->lk
.qlk
.owner_id
= 0;
2167 __kmp_release_adaptive_lock(lck
, gtid
);
2168 return KMP_LOCK_RELEASED
;
2171 static void __kmp_init_adaptive_lock(kmp_adaptive_lock_t
*lck
) {
2172 __kmp_init_queuing_lock(GET_QLK_PTR(lck
));
2173 lck
->lk
.adaptive
.badness
= 0;
2174 lck
->lk
.adaptive
.acquire_attempts
= 0; // nonSpeculativeAcquireAttempts = 0;
2175 lck
->lk
.adaptive
.max_soft_retries
=
2176 __kmp_adaptive_backoff_params
.max_soft_retries
;
2177 lck
->lk
.adaptive
.max_badness
= __kmp_adaptive_backoff_params
.max_badness
;
2178 #if KMP_DEBUG_ADAPTIVE_LOCKS
2179 __kmp_zero_speculative_stats(&lck
->lk
.adaptive
);
2181 KA_TRACE(1000, ("__kmp_init_adaptive_lock: lock %p initialized\n", lck
));
2184 static void __kmp_destroy_adaptive_lock(kmp_adaptive_lock_t
*lck
) {
2185 #if KMP_DEBUG_ADAPTIVE_LOCKS
2186 __kmp_accumulate_speculative_stats(&lck
->lk
.adaptive
);
2188 __kmp_destroy_queuing_lock(GET_QLK_PTR(lck
));
2189 // Nothing needed for the speculative part.
2192 static void __kmp_destroy_adaptive_lock_with_checks(kmp_adaptive_lock_t
*lck
) {
2193 char const *const func
= "omp_destroy_lock";
2194 if (lck
->lk
.qlk
.initialized
!= GET_QLK_PTR(lck
)) {
2195 KMP_FATAL(LockIsUninitialized
, func
);
2197 if (__kmp_get_queuing_lock_owner(GET_QLK_PTR(lck
)) != -1) {
2198 KMP_FATAL(LockStillOwned
, func
);
2200 __kmp_destroy_adaptive_lock(lck
);
2203 #endif // KMP_USE_ADAPTIVE_LOCKS
2205 /* ------------------------------------------------------------------------ */
2206 /* DRDPA ticket locks */
2207 /* "DRDPA" means Dynamically Reconfigurable Distributed Polling Area */
2209 static kmp_int32
__kmp_get_drdpa_lock_owner(kmp_drdpa_lock_t
*lck
) {
2210 return lck
->lk
.owner_id
- 1;
2213 static inline bool __kmp_is_drdpa_lock_nestable(kmp_drdpa_lock_t
*lck
) {
2214 return lck
->lk
.depth_locked
!= -1;
2217 __forceinline
static int
2218 __kmp_acquire_drdpa_lock_timed_template(kmp_drdpa_lock_t
*lck
, kmp_int32 gtid
) {
2219 kmp_uint64 ticket
= KMP_ATOMIC_INC(&lck
->lk
.next_ticket
);
2220 kmp_uint64 mask
= lck
->lk
.mask
; // atomic load
2221 std::atomic
<kmp_uint64
> *polls
= lck
->lk
.polls
;
2223 #ifdef USE_LOCK_PROFILE
2224 if (polls
[ticket
& mask
] != ticket
)
2225 __kmp_printf("LOCK CONTENTION: %p\n", lck
);
2226 /* else __kmp_printf( "." );*/
2227 #endif /* USE_LOCK_PROFILE */
2229 // Now spin-wait, but reload the polls pointer and mask, in case the
2230 // polling area has been reconfigured. Unless it is reconfigured, the
2231 // reloads stay in L1 cache and are cheap.
2233 // Keep this code in sync with KMP_WAIT, in kmp_dispatch.cpp !!!
2234 // The current implementation of KMP_WAIT doesn't allow for mask
2235 // and poll to be re-read every spin iteration.
2238 KMP_FSYNC_PREPARE(lck
);
2239 KMP_INIT_YIELD(spins
);
2240 KMP_INIT_BACKOFF(time
);
2241 while (polls
[ticket
& mask
] < ticket
) { // atomic load
2242 KMP_YIELD_OVERSUB_ELSE_SPIN(spins
, time
);
2243 // Re-read the mask and the poll pointer from the lock structure.
2245 // Make certain that "mask" is read before "polls" !!!
2247 // If another thread picks reconfigures the polling area and updates their
2248 // values, and we get the new value of mask and the old polls pointer, we
2249 // could access memory beyond the end of the old polling area.
2250 mask
= lck
->lk
.mask
; // atomic load
2251 polls
= lck
->lk
.polls
; // atomic load
2254 // Critical section starts here
2255 KMP_FSYNC_ACQUIRED(lck
);
2256 KA_TRACE(1000, ("__kmp_acquire_drdpa_lock: ticket #%lld acquired lock %p\n",
2258 lck
->lk
.now_serving
= ticket
; // non-volatile store
2260 // Deallocate a garbage polling area if we know that we are the last
2261 // thread that could possibly access it.
2263 // The >= check is in case __kmp_test_drdpa_lock() allocated the cleanup
2265 if ((lck
->lk
.old_polls
!= NULL
) && (ticket
>= lck
->lk
.cleanup_ticket
)) {
2266 __kmp_free(lck
->lk
.old_polls
);
2267 lck
->lk
.old_polls
= NULL
;
2268 lck
->lk
.cleanup_ticket
= 0;
2271 // Check to see if we should reconfigure the polling area.
2272 // If there is still a garbage polling area to be deallocated from a
2273 // previous reconfiguration, let a later thread reconfigure it.
2274 if (lck
->lk
.old_polls
== NULL
) {
2275 bool reconfigure
= false;
2276 std::atomic
<kmp_uint64
> *old_polls
= polls
;
2277 kmp_uint32 num_polls
= TCR_4(lck
->lk
.num_polls
);
2279 if (TCR_4(__kmp_nth
) >
2280 (__kmp_avail_proc
? __kmp_avail_proc
: __kmp_xproc
)) {
2281 // We are in oversubscription mode. Contract the polling area
2282 // down to a single location, if that hasn't been done already.
2283 if (num_polls
> 1) {
2285 num_polls
= TCR_4(lck
->lk
.num_polls
);
2288 polls
= (std::atomic
<kmp_uint64
> *)__kmp_allocate(num_polls
*
2293 // We are in under/fully subscribed mode. Check the number of
2294 // threads waiting on the lock. The size of the polling area
2295 // should be at least the number of threads waiting.
2296 kmp_uint64 num_waiting
= TCR_8(lck
->lk
.next_ticket
) - ticket
- 1;
2297 if (num_waiting
> num_polls
) {
2298 kmp_uint32 old_num_polls
= num_polls
;
2301 mask
= (mask
<< 1) | 1;
2303 } while (num_polls
<= num_waiting
);
2305 // Allocate the new polling area, and copy the relevant portion
2306 // of the old polling area to the new area. __kmp_allocate()
2307 // zeroes the memory it allocates, and most of the old area is
2308 // just zero padding, so we only copy the release counters.
2309 polls
= (std::atomic
<kmp_uint64
> *)__kmp_allocate(num_polls
*
2312 for (i
= 0; i
< old_num_polls
; i
++) {
2313 polls
[i
].store(old_polls
[i
]);
2319 // Now write the updated fields back to the lock structure.
2321 // Make certain that "polls" is written before "mask" !!!
2323 // If another thread picks up the new value of mask and the old polls
2324 // pointer , it could access memory beyond the end of the old polling
2327 // On x86, we need memory fences.
2328 KA_TRACE(1000, ("__kmp_acquire_drdpa_lock: ticket #%lld reconfiguring "
2329 "lock %p to %d polls\n",
2330 ticket
, lck
, num_polls
));
2332 lck
->lk
.old_polls
= old_polls
;
2333 lck
->lk
.polls
= polls
; // atomic store
2337 lck
->lk
.num_polls
= num_polls
;
2338 lck
->lk
.mask
= mask
; // atomic store
2342 // Only after the new polling area and mask have been flushed
2343 // to main memory can we update the cleanup ticket field.
2345 // volatile load / non-volatile store
2346 lck
->lk
.cleanup_ticket
= lck
->lk
.next_ticket
;
2349 return KMP_LOCK_ACQUIRED_FIRST
;
2352 int __kmp_acquire_drdpa_lock(kmp_drdpa_lock_t
*lck
, kmp_int32 gtid
) {
2353 int retval
= __kmp_acquire_drdpa_lock_timed_template(lck
, gtid
);
2357 static int __kmp_acquire_drdpa_lock_with_checks(kmp_drdpa_lock_t
*lck
,
2359 char const *const func
= "omp_set_lock";
2360 if (lck
->lk
.initialized
!= lck
) {
2361 KMP_FATAL(LockIsUninitialized
, func
);
2363 if (__kmp_is_drdpa_lock_nestable(lck
)) {
2364 KMP_FATAL(LockNestableUsedAsSimple
, func
);
2366 if ((gtid
>= 0) && (__kmp_get_drdpa_lock_owner(lck
) == gtid
)) {
2367 KMP_FATAL(LockIsAlreadyOwned
, func
);
2370 __kmp_acquire_drdpa_lock(lck
, gtid
);
2372 lck
->lk
.owner_id
= gtid
+ 1;
2373 return KMP_LOCK_ACQUIRED_FIRST
;
2376 int __kmp_test_drdpa_lock(kmp_drdpa_lock_t
*lck
, kmp_int32 gtid
) {
2377 // First get a ticket, then read the polls pointer and the mask.
2378 // The polls pointer must be read before the mask!!! (See above)
2379 kmp_uint64 ticket
= lck
->lk
.next_ticket
; // atomic load
2380 std::atomic
<kmp_uint64
> *polls
= lck
->lk
.polls
;
2381 kmp_uint64 mask
= lck
->lk
.mask
; // atomic load
2382 if (polls
[ticket
& mask
] == ticket
) {
2383 kmp_uint64 next_ticket
= ticket
+ 1;
2384 if (__kmp_atomic_compare_store_acq(&lck
->lk
.next_ticket
, ticket
,
2386 KMP_FSYNC_ACQUIRED(lck
);
2387 KA_TRACE(1000, ("__kmp_test_drdpa_lock: ticket #%lld acquired lock %p\n",
2389 lck
->lk
.now_serving
= ticket
; // non-volatile store
2391 // Since no threads are waiting, there is no possibility that we would
2392 // want to reconfigure the polling area. We might have the cleanup ticket
2393 // value (which says that it is now safe to deallocate old_polls), but
2394 // we'll let a later thread which calls __kmp_acquire_lock do that - this
2395 // routine isn't supposed to block, and we would risk blocks if we called
2396 // __kmp_free() to do the deallocation.
2403 static int __kmp_test_drdpa_lock_with_checks(kmp_drdpa_lock_t
*lck
,
2405 char const *const func
= "omp_test_lock";
2406 if (lck
->lk
.initialized
!= lck
) {
2407 KMP_FATAL(LockIsUninitialized
, func
);
2409 if (__kmp_is_drdpa_lock_nestable(lck
)) {
2410 KMP_FATAL(LockNestableUsedAsSimple
, func
);
2413 int retval
= __kmp_test_drdpa_lock(lck
, gtid
);
2416 lck
->lk
.owner_id
= gtid
+ 1;
2421 int __kmp_release_drdpa_lock(kmp_drdpa_lock_t
*lck
, kmp_int32 gtid
) {
2422 // Read the ticket value from the lock data struct, then the polls pointer and
2423 // the mask. The polls pointer must be read before the mask!!! (See above)
2424 kmp_uint64 ticket
= lck
->lk
.now_serving
+ 1; // non-atomic load
2425 std::atomic
<kmp_uint64
> *polls
= lck
->lk
.polls
; // atomic load
2426 kmp_uint64 mask
= lck
->lk
.mask
; // atomic load
2427 KA_TRACE(1000, ("__kmp_release_drdpa_lock: ticket #%lld released lock %p\n",
2429 KMP_FSYNC_RELEASING(lck
);
2430 polls
[ticket
& mask
] = ticket
; // atomic store
2431 return KMP_LOCK_RELEASED
;
2434 static int __kmp_release_drdpa_lock_with_checks(kmp_drdpa_lock_t
*lck
,
2436 char const *const func
= "omp_unset_lock";
2437 KMP_MB(); /* in case another processor initialized lock */
2438 if (lck
->lk
.initialized
!= lck
) {
2439 KMP_FATAL(LockIsUninitialized
, func
);
2441 if (__kmp_is_drdpa_lock_nestable(lck
)) {
2442 KMP_FATAL(LockNestableUsedAsSimple
, func
);
2444 if (__kmp_get_drdpa_lock_owner(lck
) == -1) {
2445 KMP_FATAL(LockUnsettingFree
, func
);
2447 if ((gtid
>= 0) && (__kmp_get_drdpa_lock_owner(lck
) >= 0) &&
2448 (__kmp_get_drdpa_lock_owner(lck
) != gtid
)) {
2449 KMP_FATAL(LockUnsettingSetByAnother
, func
);
2451 lck
->lk
.owner_id
= 0;
2452 return __kmp_release_drdpa_lock(lck
, gtid
);
2455 void __kmp_init_drdpa_lock(kmp_drdpa_lock_t
*lck
) {
2456 lck
->lk
.location
= NULL
;
2458 lck
->lk
.num_polls
= 1;
2459 lck
->lk
.polls
= (std::atomic
<kmp_uint64
> *)__kmp_allocate(
2460 lck
->lk
.num_polls
* sizeof(*(lck
->lk
.polls
)));
2461 lck
->lk
.cleanup_ticket
= 0;
2462 lck
->lk
.old_polls
= NULL
;
2463 lck
->lk
.next_ticket
= 0;
2464 lck
->lk
.now_serving
= 0;
2465 lck
->lk
.owner_id
= 0; // no thread owns the lock.
2466 lck
->lk
.depth_locked
= -1; // >= 0 for nestable locks, -1 for simple locks.
2467 lck
->lk
.initialized
= lck
;
2469 KA_TRACE(1000, ("__kmp_init_drdpa_lock: lock %p initialized\n", lck
));
2472 void __kmp_destroy_drdpa_lock(kmp_drdpa_lock_t
*lck
) {
2473 lck
->lk
.initialized
= NULL
;
2474 lck
->lk
.location
= NULL
;
2475 if (lck
->lk
.polls
.load() != NULL
) {
2476 __kmp_free(lck
->lk
.polls
.load());
2477 lck
->lk
.polls
= NULL
;
2479 if (lck
->lk
.old_polls
!= NULL
) {
2480 __kmp_free(lck
->lk
.old_polls
);
2481 lck
->lk
.old_polls
= NULL
;
2484 lck
->lk
.num_polls
= 0;
2485 lck
->lk
.cleanup_ticket
= 0;
2486 lck
->lk
.next_ticket
= 0;
2487 lck
->lk
.now_serving
= 0;
2488 lck
->lk
.owner_id
= 0;
2489 lck
->lk
.depth_locked
= -1;
2492 static void __kmp_destroy_drdpa_lock_with_checks(kmp_drdpa_lock_t
*lck
) {
2493 char const *const func
= "omp_destroy_lock";
2494 if (lck
->lk
.initialized
!= lck
) {
2495 KMP_FATAL(LockIsUninitialized
, func
);
2497 if (__kmp_is_drdpa_lock_nestable(lck
)) {
2498 KMP_FATAL(LockNestableUsedAsSimple
, func
);
2500 if (__kmp_get_drdpa_lock_owner(lck
) != -1) {
2501 KMP_FATAL(LockStillOwned
, func
);
2503 __kmp_destroy_drdpa_lock(lck
);
2506 // nested drdpa ticket locks
2508 int __kmp_acquire_nested_drdpa_lock(kmp_drdpa_lock_t
*lck
, kmp_int32 gtid
) {
2509 KMP_DEBUG_ASSERT(gtid
>= 0);
2511 if (__kmp_get_drdpa_lock_owner(lck
) == gtid
) {
2512 lck
->lk
.depth_locked
+= 1;
2513 return KMP_LOCK_ACQUIRED_NEXT
;
2515 __kmp_acquire_drdpa_lock_timed_template(lck
, gtid
);
2517 lck
->lk
.depth_locked
= 1;
2519 lck
->lk
.owner_id
= gtid
+ 1;
2520 return KMP_LOCK_ACQUIRED_FIRST
;
2524 static void __kmp_acquire_nested_drdpa_lock_with_checks(kmp_drdpa_lock_t
*lck
,
2526 char const *const func
= "omp_set_nest_lock";
2527 if (lck
->lk
.initialized
!= lck
) {
2528 KMP_FATAL(LockIsUninitialized
, func
);
2530 if (!__kmp_is_drdpa_lock_nestable(lck
)) {
2531 KMP_FATAL(LockSimpleUsedAsNestable
, func
);
2533 __kmp_acquire_nested_drdpa_lock(lck
, gtid
);
2536 int __kmp_test_nested_drdpa_lock(kmp_drdpa_lock_t
*lck
, kmp_int32 gtid
) {
2539 KMP_DEBUG_ASSERT(gtid
>= 0);
2541 if (__kmp_get_drdpa_lock_owner(lck
) == gtid
) {
2542 retval
= ++lck
->lk
.depth_locked
;
2543 } else if (!__kmp_test_drdpa_lock(lck
, gtid
)) {
2547 retval
= lck
->lk
.depth_locked
= 1;
2549 lck
->lk
.owner_id
= gtid
+ 1;
2554 static int __kmp_test_nested_drdpa_lock_with_checks(kmp_drdpa_lock_t
*lck
,
2556 char const *const func
= "omp_test_nest_lock";
2557 if (lck
->lk
.initialized
!= lck
) {
2558 KMP_FATAL(LockIsUninitialized
, func
);
2560 if (!__kmp_is_drdpa_lock_nestable(lck
)) {
2561 KMP_FATAL(LockSimpleUsedAsNestable
, func
);
2563 return __kmp_test_nested_drdpa_lock(lck
, gtid
);
2566 int __kmp_release_nested_drdpa_lock(kmp_drdpa_lock_t
*lck
, kmp_int32 gtid
) {
2567 KMP_DEBUG_ASSERT(gtid
>= 0);
2570 if (--(lck
->lk
.depth_locked
) == 0) {
2572 lck
->lk
.owner_id
= 0;
2573 __kmp_release_drdpa_lock(lck
, gtid
);
2574 return KMP_LOCK_RELEASED
;
2576 return KMP_LOCK_STILL_HELD
;
2579 static int __kmp_release_nested_drdpa_lock_with_checks(kmp_drdpa_lock_t
*lck
,
2581 char const *const func
= "omp_unset_nest_lock";
2582 KMP_MB(); /* in case another processor initialized lock */
2583 if (lck
->lk
.initialized
!= lck
) {
2584 KMP_FATAL(LockIsUninitialized
, func
);
2586 if (!__kmp_is_drdpa_lock_nestable(lck
)) {
2587 KMP_FATAL(LockSimpleUsedAsNestable
, func
);
2589 if (__kmp_get_drdpa_lock_owner(lck
) == -1) {
2590 KMP_FATAL(LockUnsettingFree
, func
);
2592 if (__kmp_get_drdpa_lock_owner(lck
) != gtid
) {
2593 KMP_FATAL(LockUnsettingSetByAnother
, func
);
2595 return __kmp_release_nested_drdpa_lock(lck
, gtid
);
2598 void __kmp_init_nested_drdpa_lock(kmp_drdpa_lock_t
*lck
) {
2599 __kmp_init_drdpa_lock(lck
);
2600 lck
->lk
.depth_locked
= 0; // >= 0 for nestable locks, -1 for simple locks
2603 void __kmp_destroy_nested_drdpa_lock(kmp_drdpa_lock_t
*lck
) {
2604 __kmp_destroy_drdpa_lock(lck
);
2605 lck
->lk
.depth_locked
= 0;
2608 static void __kmp_destroy_nested_drdpa_lock_with_checks(kmp_drdpa_lock_t
*lck
) {
2609 char const *const func
= "omp_destroy_nest_lock";
2610 if (lck
->lk
.initialized
!= lck
) {
2611 KMP_FATAL(LockIsUninitialized
, func
);
2613 if (!__kmp_is_drdpa_lock_nestable(lck
)) {
2614 KMP_FATAL(LockSimpleUsedAsNestable
, func
);
2616 if (__kmp_get_drdpa_lock_owner(lck
) != -1) {
2617 KMP_FATAL(LockStillOwned
, func
);
2619 __kmp_destroy_nested_drdpa_lock(lck
);
2622 // access functions to fields which don't exist for all lock kinds.
2624 static const ident_t
*__kmp_get_drdpa_lock_location(kmp_drdpa_lock_t
*lck
) {
2625 return lck
->lk
.location
;
2628 static void __kmp_set_drdpa_lock_location(kmp_drdpa_lock_t
*lck
,
2629 const ident_t
*loc
) {
2630 lck
->lk
.location
= loc
;
2633 static kmp_lock_flags_t
__kmp_get_drdpa_lock_flags(kmp_drdpa_lock_t
*lck
) {
2634 return lck
->lk
.flags
;
2637 static void __kmp_set_drdpa_lock_flags(kmp_drdpa_lock_t
*lck
,
2638 kmp_lock_flags_t flags
) {
2639 lck
->lk
.flags
= flags
;
2642 // Time stamp counter
2643 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
2644 #define __kmp_tsc() __kmp_hardware_timestamp()
2645 // Runtime's default backoff parameters
2646 kmp_backoff_t __kmp_spin_backoff_params
= {1, 4096, 100};
2648 // Use nanoseconds for other platforms
2649 extern kmp_uint64
__kmp_now_nsec();
2650 kmp_backoff_t __kmp_spin_backoff_params
= {1, 256, 100};
2651 #define __kmp_tsc() __kmp_now_nsec()
2654 // A useful predicate for dealing with timestamps that may wrap.
2655 // Is a before b? Since the timestamps may wrap, this is asking whether it's
2656 // shorter to go clockwise from a to b around the clock-face, or anti-clockwise.
2657 // Times where going clockwise is less distance than going anti-clockwise
2658 // are in the future, others are in the past. e.g. a = MAX-1, b = MAX+1 (=0),
2659 // then a > b (true) does not mean a reached b; whereas signed(a) = -2,
2660 // signed(b) = 0 captures the actual difference
2661 static inline bool before(kmp_uint64 a
, kmp_uint64 b
) {
2662 return ((kmp_int64
)b
- (kmp_int64
)a
) > 0;
2665 // Truncated binary exponential backoff function
2666 void __kmp_spin_backoff(kmp_backoff_t
*boff
) {
2667 // We could flatten this loop, but making it a nested loop gives better result
2669 for (i
= boff
->step
; i
> 0; i
--) {
2670 kmp_uint64 goal
= __kmp_tsc() + boff
->min_tick
;
2672 if (__kmp_umwait_enabled
) {
2673 __kmp_tpause(0, boff
->min_tick
);
2678 } while (before(__kmp_tsc(), goal
));
2683 boff
->step
= (boff
->step
<< 1 | 1) & (boff
->max_backoff
- 1);
2686 #if KMP_USE_DYNAMIC_LOCK
2688 // Direct lock initializers. It simply writes a tag to the low 8 bits of the
2690 static void __kmp_init_direct_lock(kmp_dyna_lock_t
*lck
,
2691 kmp_dyna_lockseq_t seq
) {
2692 TCW_4(*lck
, KMP_GET_D_TAG(seq
));
2695 ("__kmp_init_direct_lock: initialized direct lock with type#%d\n", seq
));
2700 // HLE lock functions - imported from the testbed runtime.
2701 #define HLE_ACQUIRE ".byte 0xf2;"
2702 #define HLE_RELEASE ".byte 0xf3;"
2704 static inline kmp_uint32
swap4(kmp_uint32
volatile *p
, kmp_uint32 v
) {
2705 __asm__
volatile(HLE_ACQUIRE
"xchg %1,%0" : "+r"(v
), "+m"(*p
) : : "memory");
2709 static void __kmp_destroy_hle_lock(kmp_dyna_lock_t
*lck
) { TCW_4(*lck
, 0); }
2711 static void __kmp_destroy_hle_lock_with_checks(kmp_dyna_lock_t
*lck
) {
2715 static void __kmp_acquire_hle_lock(kmp_dyna_lock_t
*lck
, kmp_int32 gtid
) {
2716 // Use gtid for KMP_LOCK_BUSY if necessary
2717 if (swap4(lck
, KMP_LOCK_BUSY(1, hle
)) != KMP_LOCK_FREE(hle
)) {
2720 while (*(kmp_uint32
volatile *)lck
!= KMP_LOCK_FREE(hle
)) {
2721 for (int i
= delay
; i
!= 0; --i
)
2723 delay
= ((delay
<< 1) | 1) & 7;
2725 } while (swap4(lck
, KMP_LOCK_BUSY(1, hle
)) != KMP_LOCK_FREE(hle
));
2729 static void __kmp_acquire_hle_lock_with_checks(kmp_dyna_lock_t
*lck
,
2731 __kmp_acquire_hle_lock(lck
, gtid
); // TODO: add checks
2734 static int __kmp_release_hle_lock(kmp_dyna_lock_t
*lck
, kmp_int32 gtid
) {
2735 __asm__
volatile(HLE_RELEASE
"movl %1,%0"
2737 : "r"(KMP_LOCK_FREE(hle
))
2739 return KMP_LOCK_RELEASED
;
2742 static int __kmp_release_hle_lock_with_checks(kmp_dyna_lock_t
*lck
,
2744 return __kmp_release_hle_lock(lck
, gtid
); // TODO: add checks
2747 static int __kmp_test_hle_lock(kmp_dyna_lock_t
*lck
, kmp_int32 gtid
) {
2748 return swap4(lck
, KMP_LOCK_BUSY(1, hle
)) == KMP_LOCK_FREE(hle
);
2751 static int __kmp_test_hle_lock_with_checks(kmp_dyna_lock_t
*lck
,
2753 return __kmp_test_hle_lock(lck
, gtid
); // TODO: add checks
2756 static void __kmp_init_rtm_queuing_lock(kmp_queuing_lock_t
*lck
) {
2757 __kmp_init_queuing_lock(lck
);
2760 static void __kmp_destroy_rtm_queuing_lock(kmp_queuing_lock_t
*lck
) {
2761 __kmp_destroy_queuing_lock(lck
);
2765 __kmp_destroy_rtm_queuing_lock_with_checks(kmp_queuing_lock_t
*lck
) {
2766 __kmp_destroy_queuing_lock_with_checks(lck
);
2769 KMP_ATTRIBUTE_TARGET_RTM
2770 static void __kmp_acquire_rtm_queuing_lock(kmp_queuing_lock_t
*lck
,
2772 unsigned retries
= 3, status
;
2775 if (status
== _XBEGIN_STARTED
) {
2776 if (__kmp_is_unlocked_queuing_lock(lck
))
2780 if ((status
& _XABORT_EXPLICIT
) && _XABORT_CODE(status
) == 0xff) {
2781 // Wait until lock becomes free
2782 while (!__kmp_is_unlocked_queuing_lock(lck
)) {
2785 } else if (!(status
& _XABORT_RETRY
))
2787 } while (retries
--);
2789 // Fall-back non-speculative lock (xchg)
2790 __kmp_acquire_queuing_lock(lck
, gtid
);
2793 static void __kmp_acquire_rtm_queuing_lock_with_checks(kmp_queuing_lock_t
*lck
,
2795 __kmp_acquire_rtm_queuing_lock(lck
, gtid
);
2798 KMP_ATTRIBUTE_TARGET_RTM
2799 static int __kmp_release_rtm_queuing_lock(kmp_queuing_lock_t
*lck
,
2801 if (__kmp_is_unlocked_queuing_lock(lck
)) {
2802 // Releasing from speculation
2805 // Releasing from a real lock
2806 __kmp_release_queuing_lock(lck
, gtid
);
2808 return KMP_LOCK_RELEASED
;
2811 static int __kmp_release_rtm_queuing_lock_with_checks(kmp_queuing_lock_t
*lck
,
2813 return __kmp_release_rtm_queuing_lock(lck
, gtid
);
2816 KMP_ATTRIBUTE_TARGET_RTM
2817 static int __kmp_test_rtm_queuing_lock(kmp_queuing_lock_t
*lck
,
2819 unsigned retries
= 3, status
;
2822 if (status
== _XBEGIN_STARTED
&& __kmp_is_unlocked_queuing_lock(lck
)) {
2825 if (!(status
& _XABORT_RETRY
))
2827 } while (retries
--);
2829 return __kmp_test_queuing_lock(lck
, gtid
);
2832 static int __kmp_test_rtm_queuing_lock_with_checks(kmp_queuing_lock_t
*lck
,
2834 return __kmp_test_rtm_queuing_lock(lck
, gtid
);
2837 // Reuse kmp_tas_lock_t for TSX lock which use RTM with fall-back spin lock.
2838 typedef kmp_tas_lock_t kmp_rtm_spin_lock_t
;
2840 static void __kmp_destroy_rtm_spin_lock(kmp_rtm_spin_lock_t
*lck
) {
2841 KMP_ATOMIC_ST_REL(&lck
->lk
.poll
, 0);
2844 static void __kmp_destroy_rtm_spin_lock_with_checks(kmp_rtm_spin_lock_t
*lck
) {
2845 __kmp_destroy_rtm_spin_lock(lck
);
2848 KMP_ATTRIBUTE_TARGET_RTM
2849 static int __kmp_acquire_rtm_spin_lock(kmp_rtm_spin_lock_t
*lck
,
2851 unsigned retries
= 3, status
;
2852 kmp_int32 lock_free
= KMP_LOCK_FREE(rtm_spin
);
2853 kmp_int32 lock_busy
= KMP_LOCK_BUSY(1, rtm_spin
);
2856 if (status
== _XBEGIN_STARTED
) {
2857 if (KMP_ATOMIC_LD_RLX(&lck
->lk
.poll
) == lock_free
)
2858 return KMP_LOCK_ACQUIRED_FIRST
;
2861 if ((status
& _XABORT_EXPLICIT
) && _XABORT_CODE(status
) == 0xff) {
2862 // Wait until lock becomes free
2863 while (KMP_ATOMIC_LD_RLX(&lck
->lk
.poll
) != lock_free
) {
2866 } else if (!(status
& _XABORT_RETRY
))
2868 } while (retries
--);
2870 // Fall-back spin lock
2871 KMP_FSYNC_PREPARE(lck
);
2872 kmp_backoff_t backoff
= __kmp_spin_backoff_params
;
2873 while (KMP_ATOMIC_LD_RLX(&lck
->lk
.poll
) != lock_free
||
2874 !__kmp_atomic_compare_store_acq(&lck
->lk
.poll
, lock_free
, lock_busy
)) {
2875 __kmp_spin_backoff(&backoff
);
2877 KMP_FSYNC_ACQUIRED(lck
);
2878 return KMP_LOCK_ACQUIRED_FIRST
;
2881 static int __kmp_acquire_rtm_spin_lock_with_checks(kmp_rtm_spin_lock_t
*lck
,
2883 return __kmp_acquire_rtm_spin_lock(lck
, gtid
);
2886 KMP_ATTRIBUTE_TARGET_RTM
2887 static int __kmp_release_rtm_spin_lock(kmp_rtm_spin_lock_t
*lck
,
2889 if (KMP_ATOMIC_LD_RLX(&lck
->lk
.poll
) == KMP_LOCK_FREE(rtm_spin
)) {
2890 // Releasing from speculation
2893 // Releasing from a real lock
2894 KMP_FSYNC_RELEASING(lck
);
2895 KMP_ATOMIC_ST_REL(&lck
->lk
.poll
, KMP_LOCK_FREE(rtm_spin
));
2897 return KMP_LOCK_RELEASED
;
2900 static int __kmp_release_rtm_spin_lock_with_checks(kmp_rtm_spin_lock_t
*lck
,
2902 return __kmp_release_rtm_spin_lock(lck
, gtid
);
2905 KMP_ATTRIBUTE_TARGET_RTM
2906 static int __kmp_test_rtm_spin_lock(kmp_rtm_spin_lock_t
*lck
, kmp_int32 gtid
) {
2907 unsigned retries
= 3, status
;
2908 kmp_int32 lock_free
= KMP_LOCK_FREE(rtm_spin
);
2909 kmp_int32 lock_busy
= KMP_LOCK_BUSY(1, rtm_spin
);
2912 if (status
== _XBEGIN_STARTED
&&
2913 KMP_ATOMIC_LD_RLX(&lck
->lk
.poll
) == lock_free
) {
2916 if (!(status
& _XABORT_RETRY
))
2918 } while (retries
--);
2920 if (KMP_ATOMIC_LD_RLX(&lck
->lk
.poll
) == lock_free
&&
2921 __kmp_atomic_compare_store_acq(&lck
->lk
.poll
, lock_free
, lock_busy
)) {
2922 KMP_FSYNC_ACQUIRED(lck
);
2928 static int __kmp_test_rtm_spin_lock_with_checks(kmp_rtm_spin_lock_t
*lck
,
2930 return __kmp_test_rtm_spin_lock(lck
, gtid
);
2933 #endif // KMP_USE_TSX
2935 // Entry functions for indirect locks (first element of direct lock jump tables)
2936 static void __kmp_init_indirect_lock(kmp_dyna_lock_t
*l
,
2937 kmp_dyna_lockseq_t tag
);
2938 static void __kmp_destroy_indirect_lock(kmp_dyna_lock_t
*lock
);
2939 static int __kmp_set_indirect_lock(kmp_dyna_lock_t
*lock
, kmp_int32
);
2940 static int __kmp_unset_indirect_lock(kmp_dyna_lock_t
*lock
, kmp_int32
);
2941 static int __kmp_test_indirect_lock(kmp_dyna_lock_t
*lock
, kmp_int32
);
2942 static int __kmp_set_indirect_lock_with_checks(kmp_dyna_lock_t
*lock
,
2944 static int __kmp_unset_indirect_lock_with_checks(kmp_dyna_lock_t
*lock
,
2946 static int __kmp_test_indirect_lock_with_checks(kmp_dyna_lock_t
*lock
,
2949 // Lock function definitions for the union parameter type
2950 #define KMP_FOREACH_LOCK_KIND(m, a) m(ticket, a) m(queuing, a) m(drdpa, a)
2952 #define expand1(lk, op) \
2953 static void __kmp_##op##_##lk##_##lock(kmp_user_lock_p lock) { \
2954 __kmp_##op##_##lk##_##lock(&lock->lk); \
2956 #define expand2(lk, op) \
2957 static int __kmp_##op##_##lk##_##lock(kmp_user_lock_p lock, \
2959 return __kmp_##op##_##lk##_##lock(&lock->lk, gtid); \
2961 #define expand3(lk, op) \
2962 static void __kmp_set_##lk##_##lock_flags(kmp_user_lock_p lock, \
2963 kmp_lock_flags_t flags) { \
2964 __kmp_set_##lk##_lock_flags(&lock->lk, flags); \
2966 #define expand4(lk, op) \
2967 static void __kmp_set_##lk##_##lock_location(kmp_user_lock_p lock, \
2968 const ident_t *loc) { \
2969 __kmp_set_##lk##_lock_location(&lock->lk, loc); \
2972 KMP_FOREACH_LOCK_KIND(expand1
, init
)
2973 KMP_FOREACH_LOCK_KIND(expand1
, init_nested
)
2974 KMP_FOREACH_LOCK_KIND(expand1
, destroy
)
2975 KMP_FOREACH_LOCK_KIND(expand1
, destroy_nested
)
2976 KMP_FOREACH_LOCK_KIND(expand2
, acquire
)
2977 KMP_FOREACH_LOCK_KIND(expand2
, acquire_nested
)
2978 KMP_FOREACH_LOCK_KIND(expand2
, release
)
2979 KMP_FOREACH_LOCK_KIND(expand2
, release_nested
)
2980 KMP_FOREACH_LOCK_KIND(expand2
, test
)
2981 KMP_FOREACH_LOCK_KIND(expand2
, test_nested
)
2982 KMP_FOREACH_LOCK_KIND(expand3
, )
2983 KMP_FOREACH_LOCK_KIND(expand4
, )
2990 // Jump tables for the indirect lock functions
2991 // Only fill in the odd entries, that avoids the need to shift out the low bit
2994 #define expand(l, op) 0, __kmp_init_direct_lock,
2995 void (*__kmp_direct_init
[])(kmp_dyna_lock_t
*, kmp_dyna_lockseq_t
) = {
2996 __kmp_init_indirect_lock
, 0, KMP_FOREACH_D_LOCK(expand
, init
)};
2999 // destroy functions
3000 #define expand(l, op) 0, (void (*)(kmp_dyna_lock_t *))__kmp_##op##_##l##_lock,
3001 static void (*direct_destroy
[])(kmp_dyna_lock_t
*) = {
3002 __kmp_destroy_indirect_lock
, 0, KMP_FOREACH_D_LOCK(expand
, destroy
)};
3004 #define expand(l, op) \
3005 0, (void (*)(kmp_dyna_lock_t *))__kmp_destroy_##l##_lock_with_checks,
3006 static void (*direct_destroy_check
[])(kmp_dyna_lock_t
*) = {
3007 __kmp_destroy_indirect_lock
, 0, KMP_FOREACH_D_LOCK(expand
, destroy
)};
3010 // set/acquire functions
3011 #define expand(l, op) \
3012 0, (int (*)(kmp_dyna_lock_t *, kmp_int32))__kmp_##op##_##l##_lock,
3013 static int (*direct_set
[])(kmp_dyna_lock_t
*, kmp_int32
) = {
3014 __kmp_set_indirect_lock
, 0, KMP_FOREACH_D_LOCK(expand
, acquire
)};
3016 #define expand(l, op) \
3017 0, (int (*)(kmp_dyna_lock_t *, kmp_int32))__kmp_##op##_##l##_lock_with_checks,
3018 static int (*direct_set_check
[])(kmp_dyna_lock_t
*, kmp_int32
) = {
3019 __kmp_set_indirect_lock_with_checks
, 0,
3020 KMP_FOREACH_D_LOCK(expand
, acquire
)};
3023 // unset/release and test functions
3024 #define expand(l, op) \
3025 0, (int (*)(kmp_dyna_lock_t *, kmp_int32))__kmp_##op##_##l##_lock,
3026 static int (*direct_unset
[])(kmp_dyna_lock_t
*, kmp_int32
) = {
3027 __kmp_unset_indirect_lock
, 0, KMP_FOREACH_D_LOCK(expand
, release
)};
3028 static int (*direct_test
[])(kmp_dyna_lock_t
*, kmp_int32
) = {
3029 __kmp_test_indirect_lock
, 0, KMP_FOREACH_D_LOCK(expand
, test
)};
3031 #define expand(l, op) \
3032 0, (int (*)(kmp_dyna_lock_t *, kmp_int32))__kmp_##op##_##l##_lock_with_checks,
3033 static int (*direct_unset_check
[])(kmp_dyna_lock_t
*, kmp_int32
) = {
3034 __kmp_unset_indirect_lock_with_checks
, 0,
3035 KMP_FOREACH_D_LOCK(expand
, release
)};
3036 static int (*direct_test_check
[])(kmp_dyna_lock_t
*, kmp_int32
) = {
3037 __kmp_test_indirect_lock_with_checks
, 0, KMP_FOREACH_D_LOCK(expand
, test
)};
3040 // Exposes only one set of jump tables (*lock or *lock_with_checks).
3041 void (**__kmp_direct_destroy
)(kmp_dyna_lock_t
*) = 0;
3042 int (**__kmp_direct_set
)(kmp_dyna_lock_t
*, kmp_int32
) = 0;
3043 int (**__kmp_direct_unset
)(kmp_dyna_lock_t
*, kmp_int32
) = 0;
3044 int (**__kmp_direct_test
)(kmp_dyna_lock_t
*, kmp_int32
) = 0;
3046 // Jump tables for the indirect lock functions
3047 #define expand(l, op) (void (*)(kmp_user_lock_p)) __kmp_##op##_##l##_##lock,
3048 void (*__kmp_indirect_init
[])(kmp_user_lock_p
) = {
3049 KMP_FOREACH_I_LOCK(expand
, init
)};
3052 #define expand(l, op) (void (*)(kmp_user_lock_p)) __kmp_##op##_##l##_##lock,
3053 static void (*indirect_destroy
[])(kmp_user_lock_p
) = {
3054 KMP_FOREACH_I_LOCK(expand
, destroy
)};
3056 #define expand(l, op) \
3057 (void (*)(kmp_user_lock_p)) __kmp_##op##_##l##_##lock_with_checks,
3058 static void (*indirect_destroy_check
[])(kmp_user_lock_p
) = {
3059 KMP_FOREACH_I_LOCK(expand
, destroy
)};
3062 // set/acquire functions
3063 #define expand(l, op) \
3064 (int (*)(kmp_user_lock_p, kmp_int32)) __kmp_##op##_##l##_##lock,
3065 static int (*indirect_set
[])(kmp_user_lock_p
,
3066 kmp_int32
) = {KMP_FOREACH_I_LOCK(expand
, acquire
)};
3068 #define expand(l, op) \
3069 (int (*)(kmp_user_lock_p, kmp_int32)) __kmp_##op##_##l##_##lock_with_checks,
3070 static int (*indirect_set_check
[])(kmp_user_lock_p
, kmp_int32
) = {
3071 KMP_FOREACH_I_LOCK(expand
, acquire
)};
3074 // unset/release and test functions
3075 #define expand(l, op) \
3076 (int (*)(kmp_user_lock_p, kmp_int32)) __kmp_##op##_##l##_##lock,
3077 static int (*indirect_unset
[])(kmp_user_lock_p
, kmp_int32
) = {
3078 KMP_FOREACH_I_LOCK(expand
, release
)};
3079 static int (*indirect_test
[])(kmp_user_lock_p
,
3080 kmp_int32
) = {KMP_FOREACH_I_LOCK(expand
, test
)};
3082 #define expand(l, op) \
3083 (int (*)(kmp_user_lock_p, kmp_int32)) __kmp_##op##_##l##_##lock_with_checks,
3084 static int (*indirect_unset_check
[])(kmp_user_lock_p
, kmp_int32
) = {
3085 KMP_FOREACH_I_LOCK(expand
, release
)};
3086 static int (*indirect_test_check
[])(kmp_user_lock_p
, kmp_int32
) = {
3087 KMP_FOREACH_I_LOCK(expand
, test
)};
3090 // Exposes only one jump tables (*lock or *lock_with_checks).
3091 void (**__kmp_indirect_destroy
)(kmp_user_lock_p
) = 0;
3092 int (**__kmp_indirect_set
)(kmp_user_lock_p
, kmp_int32
) = 0;
3093 int (**__kmp_indirect_unset
)(kmp_user_lock_p
, kmp_int32
) = 0;
3094 int (**__kmp_indirect_test
)(kmp_user_lock_p
, kmp_int32
) = 0;
3096 // Lock index table.
3097 kmp_indirect_lock_table_t __kmp_i_lock_table
;
3099 // Size of indirect locks.
3100 static kmp_uint32 __kmp_indirect_lock_size
[KMP_NUM_I_LOCKS
] = {0};
3102 // Jump tables for lock accessor/modifier.
3103 void (*__kmp_indirect_set_location
[KMP_NUM_I_LOCKS
])(kmp_user_lock_p
,
3104 const ident_t
*) = {0};
3105 void (*__kmp_indirect_set_flags
[KMP_NUM_I_LOCKS
])(kmp_user_lock_p
,
3106 kmp_lock_flags_t
) = {0};
3107 const ident_t
*(*__kmp_indirect_get_location
[KMP_NUM_I_LOCKS
])(
3108 kmp_user_lock_p
) = {0};
3109 kmp_lock_flags_t (*__kmp_indirect_get_flags
[KMP_NUM_I_LOCKS
])(
3110 kmp_user_lock_p
) = {0};
3112 // Use different lock pools for different lock types.
3113 static kmp_indirect_lock_t
*__kmp_indirect_lock_pool
[KMP_NUM_I_LOCKS
] = {0};
3115 // User lock allocator for dynamically dispatched indirect locks. Every entry of
3116 // the indirect lock table holds the address and type of the allocated indirect
3117 // lock (kmp_indirect_lock_t), and the size of the table doubles when it is
3118 // full. A destroyed indirect lock object is returned to the reusable pool of
3119 // locks, unique to each lock type.
3120 kmp_indirect_lock_t
*__kmp_allocate_indirect_lock(void **user_lock
,
3122 kmp_indirect_locktag_t tag
) {
3123 kmp_indirect_lock_t
*lck
;
3124 kmp_lock_index_t idx
, table_idx
;
3126 __kmp_acquire_lock(&__kmp_global_lock
, gtid
);
3128 if (__kmp_indirect_lock_pool
[tag
] != NULL
) {
3129 // Reuse the allocated and destroyed lock object
3130 lck
= __kmp_indirect_lock_pool
[tag
];
3131 if (OMP_LOCK_T_SIZE
< sizeof(void *))
3132 idx
= lck
->lock
->pool
.index
;
3133 __kmp_indirect_lock_pool
[tag
] = (kmp_indirect_lock_t
*)lck
->lock
->pool
.next
;
3134 KA_TRACE(20, ("__kmp_allocate_indirect_lock: reusing an existing lock %p\n",
3137 kmp_uint32 row
, col
;
3138 kmp_indirect_lock_table_t
*lock_table
= &__kmp_i_lock_table
;
3140 // Find location in list of lock tables to put new lock
3142 table_idx
= lock_table
->next
; // index within this table
3143 idx
+= lock_table
->next
; // global index within list of tables
3144 if (table_idx
< lock_table
->nrow_ptrs
* KMP_I_LOCK_CHUNK
) {
3145 row
= table_idx
/ KMP_I_LOCK_CHUNK
;
3146 col
= table_idx
% KMP_I_LOCK_CHUNK
;
3147 // Allocate a new row of locks if necessary
3148 if (!lock_table
->table
[row
]) {
3149 lock_table
->table
[row
] = (kmp_indirect_lock_t
*)__kmp_allocate(
3150 sizeof(kmp_indirect_lock_t
) * KMP_I_LOCK_CHUNK
);
3154 // Allocate a new lock table if necessary with double the capacity
3155 if (!lock_table
->next_table
) {
3156 kmp_indirect_lock_table_t
*next_table
=
3157 (kmp_indirect_lock_table_t
*)__kmp_allocate(
3158 sizeof(kmp_indirect_lock_table_t
));
3159 next_table
->table
= (kmp_indirect_lock_t
**)__kmp_allocate(
3160 sizeof(kmp_indirect_lock_t
*) * 2 * lock_table
->nrow_ptrs
);
3161 next_table
->nrow_ptrs
= 2 * lock_table
->nrow_ptrs
;
3162 next_table
->next
= 0;
3163 next_table
->next_table
= nullptr;
3164 lock_table
->next_table
= next_table
;
3166 lock_table
= lock_table
->next_table
;
3167 KMP_ASSERT(lock_table
);
3171 lck
= &lock_table
->table
[row
][col
];
3172 // Allocate a new base lock object
3173 lck
->lock
= (kmp_user_lock_p
)__kmp_allocate(__kmp_indirect_lock_size
[tag
]);
3175 ("__kmp_allocate_indirect_lock: allocated a new lock %p\n", lck
));
3178 __kmp_release_lock(&__kmp_global_lock
, gtid
);
3182 if (OMP_LOCK_T_SIZE
< sizeof(void *)) {
3183 *((kmp_lock_index_t
*)user_lock
) = idx
3184 << 1; // indirect lock word must be even
3186 *((kmp_indirect_lock_t
**)user_lock
) = lck
;
3192 // User lock lookup for dynamically dispatched locks.
3193 static __forceinline kmp_indirect_lock_t
*
3194 __kmp_lookup_indirect_lock(void **user_lock
, const char *func
) {
3195 if (__kmp_env_consistency_check
) {
3196 kmp_indirect_lock_t
*lck
= NULL
;
3197 if (user_lock
== NULL
) {
3198 KMP_FATAL(LockIsUninitialized
, func
);
3200 if (OMP_LOCK_T_SIZE
< sizeof(void *)) {
3201 kmp_lock_index_t idx
= KMP_EXTRACT_I_INDEX(user_lock
);
3202 lck
= __kmp_get_i_lock(idx
);
3204 lck
= *((kmp_indirect_lock_t
**)user_lock
);
3207 KMP_FATAL(LockIsUninitialized
, func
);
3211 if (OMP_LOCK_T_SIZE
< sizeof(void *)) {
3212 return __kmp_get_i_lock(KMP_EXTRACT_I_INDEX(user_lock
));
3214 return *((kmp_indirect_lock_t
**)user_lock
);
3219 static void __kmp_init_indirect_lock(kmp_dyna_lock_t
*lock
,
3220 kmp_dyna_lockseq_t seq
) {
3221 #if KMP_USE_ADAPTIVE_LOCKS
3222 if (seq
== lockseq_adaptive
&& !__kmp_cpuinfo
.flags
.rtm
) {
3223 KMP_WARNING(AdaptiveNotSupported
, "kmp_lockseq_t", "adaptive");
3224 seq
= lockseq_queuing
;
3228 if (seq
== lockseq_rtm_queuing
&& !__kmp_cpuinfo
.flags
.rtm
) {
3229 seq
= lockseq_queuing
;
3232 kmp_indirect_locktag_t tag
= KMP_GET_I_TAG(seq
);
3233 kmp_indirect_lock_t
*l
=
3234 __kmp_allocate_indirect_lock((void **)lock
, __kmp_entry_gtid(), tag
);
3235 KMP_I_LOCK_FUNC(l
, init
)(l
->lock
);
3237 20, ("__kmp_init_indirect_lock: initialized indirect lock with type#%d\n",
3241 static void __kmp_destroy_indirect_lock(kmp_dyna_lock_t
*lock
) {
3242 kmp_uint32 gtid
= __kmp_entry_gtid();
3243 kmp_indirect_lock_t
*l
=
3244 __kmp_lookup_indirect_lock((void **)lock
, "omp_destroy_lock");
3245 KMP_I_LOCK_FUNC(l
, destroy
)(l
->lock
);
3246 kmp_indirect_locktag_t tag
= l
->type
;
3248 __kmp_acquire_lock(&__kmp_global_lock
, gtid
);
3250 // Use the base lock's space to keep the pool chain.
3251 l
->lock
->pool
.next
= (kmp_user_lock_p
)__kmp_indirect_lock_pool
[tag
];
3252 if (OMP_LOCK_T_SIZE
< sizeof(void *)) {
3253 l
->lock
->pool
.index
= KMP_EXTRACT_I_INDEX(lock
);
3255 __kmp_indirect_lock_pool
[tag
] = l
;
3257 __kmp_release_lock(&__kmp_global_lock
, gtid
);
3260 static int __kmp_set_indirect_lock(kmp_dyna_lock_t
*lock
, kmp_int32 gtid
) {
3261 kmp_indirect_lock_t
*l
= KMP_LOOKUP_I_LOCK(lock
);
3262 return KMP_I_LOCK_FUNC(l
, set
)(l
->lock
, gtid
);
3265 static int __kmp_unset_indirect_lock(kmp_dyna_lock_t
*lock
, kmp_int32 gtid
) {
3266 kmp_indirect_lock_t
*l
= KMP_LOOKUP_I_LOCK(lock
);
3267 return KMP_I_LOCK_FUNC(l
, unset
)(l
->lock
, gtid
);
3270 static int __kmp_test_indirect_lock(kmp_dyna_lock_t
*lock
, kmp_int32 gtid
) {
3271 kmp_indirect_lock_t
*l
= KMP_LOOKUP_I_LOCK(lock
);
3272 return KMP_I_LOCK_FUNC(l
, test
)(l
->lock
, gtid
);
3275 static int __kmp_set_indirect_lock_with_checks(kmp_dyna_lock_t
*lock
,
3277 kmp_indirect_lock_t
*l
=
3278 __kmp_lookup_indirect_lock((void **)lock
, "omp_set_lock");
3279 return KMP_I_LOCK_FUNC(l
, set
)(l
->lock
, gtid
);
3282 static int __kmp_unset_indirect_lock_with_checks(kmp_dyna_lock_t
*lock
,
3284 kmp_indirect_lock_t
*l
=
3285 __kmp_lookup_indirect_lock((void **)lock
, "omp_unset_lock");
3286 return KMP_I_LOCK_FUNC(l
, unset
)(l
->lock
, gtid
);
3289 static int __kmp_test_indirect_lock_with_checks(kmp_dyna_lock_t
*lock
,
3291 kmp_indirect_lock_t
*l
=
3292 __kmp_lookup_indirect_lock((void **)lock
, "omp_test_lock");
3293 return KMP_I_LOCK_FUNC(l
, test
)(l
->lock
, gtid
);
3296 kmp_dyna_lockseq_t __kmp_user_lock_seq
= lockseq_queuing
;
3298 // This is used only in kmp_error.cpp when consistency checking is on.
3299 kmp_int32
__kmp_get_user_lock_owner(kmp_user_lock_p lck
, kmp_uint32 seq
) {
3302 case lockseq_nested_tas
:
3303 return __kmp_get_tas_lock_owner((kmp_tas_lock_t
*)lck
);
3306 case lockseq_nested_futex
:
3307 return __kmp_get_futex_lock_owner((kmp_futex_lock_t
*)lck
);
3309 case lockseq_ticket
:
3310 case lockseq_nested_ticket
:
3311 return __kmp_get_ticket_lock_owner((kmp_ticket_lock_t
*)lck
);
3312 case lockseq_queuing
:
3313 case lockseq_nested_queuing
:
3314 #if KMP_USE_ADAPTIVE_LOCKS
3315 case lockseq_adaptive
:
3317 return __kmp_get_queuing_lock_owner((kmp_queuing_lock_t
*)lck
);
3319 case lockseq_nested_drdpa
:
3320 return __kmp_get_drdpa_lock_owner((kmp_drdpa_lock_t
*)lck
);
3326 // Initializes data for dynamic user locks.
3327 void __kmp_init_dynamic_user_locks() {
3328 // Initialize jump table for the lock functions
3329 if (__kmp_env_consistency_check
) {
3330 __kmp_direct_set
= direct_set_check
;
3331 __kmp_direct_unset
= direct_unset_check
;
3332 __kmp_direct_test
= direct_test_check
;
3333 __kmp_direct_destroy
= direct_destroy_check
;
3334 __kmp_indirect_set
= indirect_set_check
;
3335 __kmp_indirect_unset
= indirect_unset_check
;
3336 __kmp_indirect_test
= indirect_test_check
;
3337 __kmp_indirect_destroy
= indirect_destroy_check
;
3339 __kmp_direct_set
= direct_set
;
3340 __kmp_direct_unset
= direct_unset
;
3341 __kmp_direct_test
= direct_test
;
3342 __kmp_direct_destroy
= direct_destroy
;
3343 __kmp_indirect_set
= indirect_set
;
3344 __kmp_indirect_unset
= indirect_unset
;
3345 __kmp_indirect_test
= indirect_test
;
3346 __kmp_indirect_destroy
= indirect_destroy
;
3348 // If the user locks have already been initialized, then return. Allow the
3349 // switch between different KMP_CONSISTENCY_CHECK values, but do not allocate
3350 // new lock tables if they have already been allocated.
3351 if (__kmp_init_user_locks
)
3354 // Initialize lock index table
3355 __kmp_i_lock_table
.nrow_ptrs
= KMP_I_LOCK_TABLE_INIT_NROW_PTRS
;
3356 __kmp_i_lock_table
.table
= (kmp_indirect_lock_t
**)__kmp_allocate(
3357 sizeof(kmp_indirect_lock_t
*) * KMP_I_LOCK_TABLE_INIT_NROW_PTRS
);
3358 *(__kmp_i_lock_table
.table
) = (kmp_indirect_lock_t
*)__kmp_allocate(
3359 KMP_I_LOCK_CHUNK
* sizeof(kmp_indirect_lock_t
));
3360 __kmp_i_lock_table
.next
= 0;
3361 __kmp_i_lock_table
.next_table
= nullptr;
3363 // Indirect lock size
3364 __kmp_indirect_lock_size
[locktag_ticket
] = sizeof(kmp_ticket_lock_t
);
3365 __kmp_indirect_lock_size
[locktag_queuing
] = sizeof(kmp_queuing_lock_t
);
3366 #if KMP_USE_ADAPTIVE_LOCKS
3367 __kmp_indirect_lock_size
[locktag_adaptive
] = sizeof(kmp_adaptive_lock_t
);
3369 __kmp_indirect_lock_size
[locktag_drdpa
] = sizeof(kmp_drdpa_lock_t
);
3371 __kmp_indirect_lock_size
[locktag_rtm_queuing
] = sizeof(kmp_queuing_lock_t
);
3373 __kmp_indirect_lock_size
[locktag_nested_tas
] = sizeof(kmp_tas_lock_t
);
3375 __kmp_indirect_lock_size
[locktag_nested_futex
] = sizeof(kmp_futex_lock_t
);
3377 __kmp_indirect_lock_size
[locktag_nested_ticket
] = sizeof(kmp_ticket_lock_t
);
3378 __kmp_indirect_lock_size
[locktag_nested_queuing
] = sizeof(kmp_queuing_lock_t
);
3379 __kmp_indirect_lock_size
[locktag_nested_drdpa
] = sizeof(kmp_drdpa_lock_t
);
3381 // Initialize lock accessor/modifier
3382 #define fill_jumps(table, expand, sep) \
3384 table[locktag##sep##ticket] = expand(ticket); \
3385 table[locktag##sep##queuing] = expand(queuing); \
3386 table[locktag##sep##drdpa] = expand(drdpa); \
3389 #if KMP_USE_ADAPTIVE_LOCKS
3390 #define fill_table(table, expand) \
3392 fill_jumps(table, expand, _); \
3393 table[locktag_adaptive] = expand(queuing); \
3394 fill_jumps(table, expand, _nested_); \
3397 #define fill_table(table, expand) \
3399 fill_jumps(table, expand, _); \
3400 fill_jumps(table, expand, _nested_); \
3402 #endif // KMP_USE_ADAPTIVE_LOCKS
3405 (void (*)(kmp_user_lock_p, const ident_t *)) __kmp_set_##l##_lock_location
3406 fill_table(__kmp_indirect_set_location
, expand
);
3409 (void (*)(kmp_user_lock_p, kmp_lock_flags_t)) __kmp_set_##l##_lock_flags
3410 fill_table(__kmp_indirect_set_flags
, expand
);
3413 (const ident_t *(*)(kmp_user_lock_p)) __kmp_get_##l##_lock_location
3414 fill_table(__kmp_indirect_get_location
, expand
);
3417 (kmp_lock_flags_t(*)(kmp_user_lock_p)) __kmp_get_##l##_lock_flags
3418 fill_table(__kmp_indirect_get_flags
, expand
);
3421 __kmp_init_user_locks
= TRUE
;
3424 // Clean up the lock table.
3425 void __kmp_cleanup_indirect_user_locks() {
3428 // Clean up locks in the pools first (they were already destroyed before going
3430 for (k
= 0; k
< KMP_NUM_I_LOCKS
; ++k
) {
3431 kmp_indirect_lock_t
*l
= __kmp_indirect_lock_pool
[k
];
3433 kmp_indirect_lock_t
*ll
= l
;
3434 l
= (kmp_indirect_lock_t
*)l
->lock
->pool
.next
;
3435 KA_TRACE(20, ("__kmp_cleanup_indirect_user_locks: freeing %p from pool\n",
3437 __kmp_free(ll
->lock
);
3440 __kmp_indirect_lock_pool
[k
] = NULL
;
3442 // Clean up the remaining undestroyed locks.
3443 kmp_indirect_lock_table_t
*ptr
= &__kmp_i_lock_table
;
3445 for (kmp_uint32 row
= 0; row
< ptr
->nrow_ptrs
; ++row
) {
3446 if (!ptr
->table
[row
])
3448 for (kmp_uint32 col
= 0; col
< KMP_I_LOCK_CHUNK
; ++col
) {
3449 kmp_indirect_lock_t
*l
= &ptr
->table
[row
][col
];
3451 // Locks not destroyed explicitly need to be destroyed here.
3452 KMP_I_LOCK_FUNC(l
, destroy
)(l
->lock
);
3453 KA_TRACE(20, ("__kmp_cleanup_indirect_user_locks: destroy/freeing %p "
3456 __kmp_free(l
->lock
);
3459 __kmp_free(ptr
->table
[row
]);
3461 kmp_indirect_lock_table_t
*next_table
= ptr
->next_table
;
3462 if (ptr
!= &__kmp_i_lock_table
)
3467 __kmp_init_user_locks
= FALSE
;
3470 enum kmp_lock_kind __kmp_user_lock_kind
= lk_default
;
3471 int __kmp_num_locks_in_block
= 1; // FIXME - tune this value
3473 #else // KMP_USE_DYNAMIC_LOCK
3475 static void __kmp_init_tas_lock_with_checks(kmp_tas_lock_t
*lck
) {
3476 __kmp_init_tas_lock(lck
);
3479 static void __kmp_init_nested_tas_lock_with_checks(kmp_tas_lock_t
*lck
) {
3480 __kmp_init_nested_tas_lock(lck
);
3484 static void __kmp_init_futex_lock_with_checks(kmp_futex_lock_t
*lck
) {
3485 __kmp_init_futex_lock(lck
);
3488 static void __kmp_init_nested_futex_lock_with_checks(kmp_futex_lock_t
*lck
) {
3489 __kmp_init_nested_futex_lock(lck
);
3493 static int __kmp_is_ticket_lock_initialized(kmp_ticket_lock_t
*lck
) {
3494 return lck
== lck
->lk
.self
;
3497 static void __kmp_init_ticket_lock_with_checks(kmp_ticket_lock_t
*lck
) {
3498 __kmp_init_ticket_lock(lck
);
3501 static void __kmp_init_nested_ticket_lock_with_checks(kmp_ticket_lock_t
*lck
) {
3502 __kmp_init_nested_ticket_lock(lck
);
3505 static int __kmp_is_queuing_lock_initialized(kmp_queuing_lock_t
*lck
) {
3506 return lck
== lck
->lk
.initialized
;
3509 static void __kmp_init_queuing_lock_with_checks(kmp_queuing_lock_t
*lck
) {
3510 __kmp_init_queuing_lock(lck
);
3514 __kmp_init_nested_queuing_lock_with_checks(kmp_queuing_lock_t
*lck
) {
3515 __kmp_init_nested_queuing_lock(lck
);
3518 #if KMP_USE_ADAPTIVE_LOCKS
3519 static void __kmp_init_adaptive_lock_with_checks(kmp_adaptive_lock_t
*lck
) {
3520 __kmp_init_adaptive_lock(lck
);
3524 static int __kmp_is_drdpa_lock_initialized(kmp_drdpa_lock_t
*lck
) {
3525 return lck
== lck
->lk
.initialized
;
3528 static void __kmp_init_drdpa_lock_with_checks(kmp_drdpa_lock_t
*lck
) {
3529 __kmp_init_drdpa_lock(lck
);
3532 static void __kmp_init_nested_drdpa_lock_with_checks(kmp_drdpa_lock_t
*lck
) {
3533 __kmp_init_nested_drdpa_lock(lck
);
3537 * They are implemented as a table of function pointers which are set to the
3538 * lock functions of the appropriate kind, once that has been determined. */
3540 enum kmp_lock_kind __kmp_user_lock_kind
= lk_default
;
3542 size_t __kmp_base_user_lock_size
= 0;
3543 size_t __kmp_user_lock_size
= 0;
3545 kmp_int32 (*__kmp_get_user_lock_owner_
)(kmp_user_lock_p lck
) = NULL
;
3546 int (*__kmp_acquire_user_lock_with_checks_
)(kmp_user_lock_p lck
,
3547 kmp_int32 gtid
) = NULL
;
3549 int (*__kmp_test_user_lock_with_checks_
)(kmp_user_lock_p lck
,
3550 kmp_int32 gtid
) = NULL
;
3551 int (*__kmp_release_user_lock_with_checks_
)(kmp_user_lock_p lck
,
3552 kmp_int32 gtid
) = NULL
;
3553 void (*__kmp_init_user_lock_with_checks_
)(kmp_user_lock_p lck
) = NULL
;
3554 void (*__kmp_destroy_user_lock_
)(kmp_user_lock_p lck
) = NULL
;
3555 void (*__kmp_destroy_user_lock_with_checks_
)(kmp_user_lock_p lck
) = NULL
;
3556 int (*__kmp_acquire_nested_user_lock_with_checks_
)(kmp_user_lock_p lck
,
3557 kmp_int32 gtid
) = NULL
;
3559 int (*__kmp_test_nested_user_lock_with_checks_
)(kmp_user_lock_p lck
,
3560 kmp_int32 gtid
) = NULL
;
3561 int (*__kmp_release_nested_user_lock_with_checks_
)(kmp_user_lock_p lck
,
3562 kmp_int32 gtid
) = NULL
;
3563 void (*__kmp_init_nested_user_lock_with_checks_
)(kmp_user_lock_p lck
) = NULL
;
3564 void (*__kmp_destroy_nested_user_lock_with_checks_
)(kmp_user_lock_p lck
) = NULL
;
3566 int (*__kmp_is_user_lock_initialized_
)(kmp_user_lock_p lck
) = NULL
;
3567 const ident_t
*(*__kmp_get_user_lock_location_
)(kmp_user_lock_p lck
) = NULL
;
3568 void (*__kmp_set_user_lock_location_
)(kmp_user_lock_p lck
,
3569 const ident_t
*loc
) = NULL
;
3570 kmp_lock_flags_t (*__kmp_get_user_lock_flags_
)(kmp_user_lock_p lck
) = NULL
;
3571 void (*__kmp_set_user_lock_flags_
)(kmp_user_lock_p lck
,
3572 kmp_lock_flags_t flags
) = NULL
;
3574 void __kmp_set_user_lock_vptrs(kmp_lock_kind_t user_lock_kind
) {
3575 switch (user_lock_kind
) {
3581 __kmp_base_user_lock_size
= sizeof(kmp_base_tas_lock_t
);
3582 __kmp_user_lock_size
= sizeof(kmp_tas_lock_t
);
3584 __kmp_get_user_lock_owner_
=
3585 (kmp_int32(*)(kmp_user_lock_p
))(&__kmp_get_tas_lock_owner
);
3587 if (__kmp_env_consistency_check
) {
3588 KMP_BIND_USER_LOCK_WITH_CHECKS(tas
);
3589 KMP_BIND_NESTED_USER_LOCK_WITH_CHECKS(tas
);
3591 KMP_BIND_USER_LOCK(tas
);
3592 KMP_BIND_NESTED_USER_LOCK(tas
);
3595 __kmp_destroy_user_lock_
=
3596 (void (*)(kmp_user_lock_p
))(&__kmp_destroy_tas_lock
);
3598 __kmp_is_user_lock_initialized_
= (int (*)(kmp_user_lock_p
))NULL
;
3600 __kmp_get_user_lock_location_
= (const ident_t
*(*)(kmp_user_lock_p
))NULL
;
3602 __kmp_set_user_lock_location_
=
3603 (void (*)(kmp_user_lock_p
, const ident_t
*))NULL
;
3605 __kmp_get_user_lock_flags_
= (kmp_lock_flags_t(*)(kmp_user_lock_p
))NULL
;
3607 __kmp_set_user_lock_flags_
=
3608 (void (*)(kmp_user_lock_p
, kmp_lock_flags_t
))NULL
;
3614 __kmp_base_user_lock_size
= sizeof(kmp_base_futex_lock_t
);
3615 __kmp_user_lock_size
= sizeof(kmp_futex_lock_t
);
3617 __kmp_get_user_lock_owner_
=
3618 (kmp_int32(*)(kmp_user_lock_p
))(&__kmp_get_futex_lock_owner
);
3620 if (__kmp_env_consistency_check
) {
3621 KMP_BIND_USER_LOCK_WITH_CHECKS(futex
);
3622 KMP_BIND_NESTED_USER_LOCK_WITH_CHECKS(futex
);
3624 KMP_BIND_USER_LOCK(futex
);
3625 KMP_BIND_NESTED_USER_LOCK(futex
);
3628 __kmp_destroy_user_lock_
=
3629 (void (*)(kmp_user_lock_p
))(&__kmp_destroy_futex_lock
);
3631 __kmp_is_user_lock_initialized_
= (int (*)(kmp_user_lock_p
))NULL
;
3633 __kmp_get_user_lock_location_
= (const ident_t
*(*)(kmp_user_lock_p
))NULL
;
3635 __kmp_set_user_lock_location_
=
3636 (void (*)(kmp_user_lock_p
, const ident_t
*))NULL
;
3638 __kmp_get_user_lock_flags_
= (kmp_lock_flags_t(*)(kmp_user_lock_p
))NULL
;
3640 __kmp_set_user_lock_flags_
=
3641 (void (*)(kmp_user_lock_p
, kmp_lock_flags_t
))NULL
;
3644 #endif // KMP_USE_FUTEX
3647 __kmp_base_user_lock_size
= sizeof(kmp_base_ticket_lock_t
);
3648 __kmp_user_lock_size
= sizeof(kmp_ticket_lock_t
);
3650 __kmp_get_user_lock_owner_
=
3651 (kmp_int32(*)(kmp_user_lock_p
))(&__kmp_get_ticket_lock_owner
);
3653 if (__kmp_env_consistency_check
) {
3654 KMP_BIND_USER_LOCK_WITH_CHECKS(ticket
);
3655 KMP_BIND_NESTED_USER_LOCK_WITH_CHECKS(ticket
);
3657 KMP_BIND_USER_LOCK(ticket
);
3658 KMP_BIND_NESTED_USER_LOCK(ticket
);
3661 __kmp_destroy_user_lock_
=
3662 (void (*)(kmp_user_lock_p
))(&__kmp_destroy_ticket_lock
);
3664 __kmp_is_user_lock_initialized_
=
3665 (int (*)(kmp_user_lock_p
))(&__kmp_is_ticket_lock_initialized
);
3667 __kmp_get_user_lock_location_
=
3668 (const ident_t
*(*)(kmp_user_lock_p
))(&__kmp_get_ticket_lock_location
);
3670 __kmp_set_user_lock_location_
= (void (*)(
3671 kmp_user_lock_p
, const ident_t
*))(&__kmp_set_ticket_lock_location
);
3673 __kmp_get_user_lock_flags_
=
3674 (kmp_lock_flags_t(*)(kmp_user_lock_p
))(&__kmp_get_ticket_lock_flags
);
3676 __kmp_set_user_lock_flags_
= (void (*)(kmp_user_lock_p
, kmp_lock_flags_t
))(
3677 &__kmp_set_ticket_lock_flags
);
3681 __kmp_base_user_lock_size
= sizeof(kmp_base_queuing_lock_t
);
3682 __kmp_user_lock_size
= sizeof(kmp_queuing_lock_t
);
3684 __kmp_get_user_lock_owner_
=
3685 (kmp_int32(*)(kmp_user_lock_p
))(&__kmp_get_queuing_lock_owner
);
3687 if (__kmp_env_consistency_check
) {
3688 KMP_BIND_USER_LOCK_WITH_CHECKS(queuing
);
3689 KMP_BIND_NESTED_USER_LOCK_WITH_CHECKS(queuing
);
3691 KMP_BIND_USER_LOCK(queuing
);
3692 KMP_BIND_NESTED_USER_LOCK(queuing
);
3695 __kmp_destroy_user_lock_
=
3696 (void (*)(kmp_user_lock_p
))(&__kmp_destroy_queuing_lock
);
3698 __kmp_is_user_lock_initialized_
=
3699 (int (*)(kmp_user_lock_p
))(&__kmp_is_queuing_lock_initialized
);
3701 __kmp_get_user_lock_location_
=
3702 (const ident_t
*(*)(kmp_user_lock_p
))(&__kmp_get_queuing_lock_location
);
3704 __kmp_set_user_lock_location_
= (void (*)(
3705 kmp_user_lock_p
, const ident_t
*))(&__kmp_set_queuing_lock_location
);
3707 __kmp_get_user_lock_flags_
=
3708 (kmp_lock_flags_t(*)(kmp_user_lock_p
))(&__kmp_get_queuing_lock_flags
);
3710 __kmp_set_user_lock_flags_
= (void (*)(kmp_user_lock_p
, kmp_lock_flags_t
))(
3711 &__kmp_set_queuing_lock_flags
);
3714 #if KMP_USE_ADAPTIVE_LOCKS
3716 __kmp_base_user_lock_size
= sizeof(kmp_base_adaptive_lock_t
);
3717 __kmp_user_lock_size
= sizeof(kmp_adaptive_lock_t
);
3719 __kmp_get_user_lock_owner_
=
3720 (kmp_int32(*)(kmp_user_lock_p
))(&__kmp_get_queuing_lock_owner
);
3722 if (__kmp_env_consistency_check
) {
3723 KMP_BIND_USER_LOCK_WITH_CHECKS(adaptive
);
3725 KMP_BIND_USER_LOCK(adaptive
);
3728 __kmp_destroy_user_lock_
=
3729 (void (*)(kmp_user_lock_p
))(&__kmp_destroy_adaptive_lock
);
3731 __kmp_is_user_lock_initialized_
=
3732 (int (*)(kmp_user_lock_p
))(&__kmp_is_queuing_lock_initialized
);
3734 __kmp_get_user_lock_location_
=
3735 (const ident_t
*(*)(kmp_user_lock_p
))(&__kmp_get_queuing_lock_location
);
3737 __kmp_set_user_lock_location_
= (void (*)(
3738 kmp_user_lock_p
, const ident_t
*))(&__kmp_set_queuing_lock_location
);
3740 __kmp_get_user_lock_flags_
=
3741 (kmp_lock_flags_t(*)(kmp_user_lock_p
))(&__kmp_get_queuing_lock_flags
);
3743 __kmp_set_user_lock_flags_
= (void (*)(kmp_user_lock_p
, kmp_lock_flags_t
))(
3744 &__kmp_set_queuing_lock_flags
);
3747 #endif // KMP_USE_ADAPTIVE_LOCKS
3750 __kmp_base_user_lock_size
= sizeof(kmp_base_drdpa_lock_t
);
3751 __kmp_user_lock_size
= sizeof(kmp_drdpa_lock_t
);
3753 __kmp_get_user_lock_owner_
=
3754 (kmp_int32(*)(kmp_user_lock_p
))(&__kmp_get_drdpa_lock_owner
);
3756 if (__kmp_env_consistency_check
) {
3757 KMP_BIND_USER_LOCK_WITH_CHECKS(drdpa
);
3758 KMP_BIND_NESTED_USER_LOCK_WITH_CHECKS(drdpa
);
3760 KMP_BIND_USER_LOCK(drdpa
);
3761 KMP_BIND_NESTED_USER_LOCK(drdpa
);
3764 __kmp_destroy_user_lock_
=
3765 (void (*)(kmp_user_lock_p
))(&__kmp_destroy_drdpa_lock
);
3767 __kmp_is_user_lock_initialized_
=
3768 (int (*)(kmp_user_lock_p
))(&__kmp_is_drdpa_lock_initialized
);
3770 __kmp_get_user_lock_location_
=
3771 (const ident_t
*(*)(kmp_user_lock_p
))(&__kmp_get_drdpa_lock_location
);
3773 __kmp_set_user_lock_location_
= (void (*)(
3774 kmp_user_lock_p
, const ident_t
*))(&__kmp_set_drdpa_lock_location
);
3776 __kmp_get_user_lock_flags_
=
3777 (kmp_lock_flags_t(*)(kmp_user_lock_p
))(&__kmp_get_drdpa_lock_flags
);
3779 __kmp_set_user_lock_flags_
= (void (*)(kmp_user_lock_p
, kmp_lock_flags_t
))(
3780 &__kmp_set_drdpa_lock_flags
);
3785 // ----------------------------------------------------------------------------
3786 // User lock table & lock allocation
3788 kmp_lock_table_t __kmp_user_lock_table
= {1, 0, NULL
};
3789 kmp_user_lock_p __kmp_lock_pool
= NULL
;
3791 // Lock block-allocation support.
3792 kmp_block_of_locks
*__kmp_lock_blocks
= NULL
;
3793 int __kmp_num_locks_in_block
= 1; // FIXME - tune this value
3795 static kmp_lock_index_t
__kmp_lock_table_insert(kmp_user_lock_p lck
) {
3796 // Assume that kmp_global_lock is held upon entry/exit.
3797 kmp_lock_index_t index
;
3798 if (__kmp_user_lock_table
.used
>= __kmp_user_lock_table
.allocated
) {
3799 kmp_lock_index_t size
;
3800 kmp_user_lock_p
*table
;
3801 // Reallocate lock table.
3802 if (__kmp_user_lock_table
.allocated
== 0) {
3805 size
= __kmp_user_lock_table
.allocated
* 2;
3807 table
= (kmp_user_lock_p
*)__kmp_allocate(sizeof(kmp_user_lock_p
) * size
);
3808 KMP_MEMCPY(table
+ 1, __kmp_user_lock_table
.table
+ 1,
3809 sizeof(kmp_user_lock_p
) * (__kmp_user_lock_table
.used
- 1));
3810 table
[0] = (kmp_user_lock_p
)__kmp_user_lock_table
.table
;
3811 // We cannot free the previous table now, since it may be in use by other
3812 // threads. So save the pointer to the previous table in the first
3813 // element of the new table. All the tables will be organized into a list,
3814 // and could be freed when library shutting down.
3815 __kmp_user_lock_table
.table
= table
;
3816 __kmp_user_lock_table
.allocated
= size
;
3818 KMP_DEBUG_ASSERT(__kmp_user_lock_table
.used
<
3819 __kmp_user_lock_table
.allocated
);
3820 index
= __kmp_user_lock_table
.used
;
3821 __kmp_user_lock_table
.table
[index
] = lck
;
3822 ++__kmp_user_lock_table
.used
;
3826 static kmp_user_lock_p
__kmp_lock_block_allocate() {
3827 // Assume that kmp_global_lock is held upon entry/exit.
3828 static int last_index
= 0;
3829 if ((last_index
>= __kmp_num_locks_in_block
) || (__kmp_lock_blocks
== NULL
)) {
3830 // Restart the index.
3832 // Need to allocate a new block.
3833 KMP_DEBUG_ASSERT(__kmp_user_lock_size
> 0);
3834 size_t space_for_locks
= __kmp_user_lock_size
* __kmp_num_locks_in_block
;
3836 (char *)__kmp_allocate(space_for_locks
+ sizeof(kmp_block_of_locks
));
3837 // Set up the new block.
3838 kmp_block_of_locks
*new_block
=
3839 (kmp_block_of_locks
*)(&buffer
[space_for_locks
]);
3840 new_block
->next_block
= __kmp_lock_blocks
;
3841 new_block
->locks
= (void *)buffer
;
3842 // Publish the new block.
3844 __kmp_lock_blocks
= new_block
;
3846 kmp_user_lock_p ret
= (kmp_user_lock_p
)(&(
3847 ((char *)(__kmp_lock_blocks
->locks
))[last_index
* __kmp_user_lock_size
]));
3852 // Get memory for a lock. It may be freshly allocated memory or reused memory
3854 kmp_user_lock_p
__kmp_user_lock_allocate(void **user_lock
, kmp_int32 gtid
,
3855 kmp_lock_flags_t flags
) {
3856 kmp_user_lock_p lck
;
3857 kmp_lock_index_t index
;
3858 KMP_DEBUG_ASSERT(user_lock
);
3860 __kmp_acquire_lock(&__kmp_global_lock
, gtid
);
3862 if (__kmp_lock_pool
== NULL
) {
3863 // Lock pool is empty. Allocate new memory.
3865 if (__kmp_num_locks_in_block
<= 1) { // Tune this cutoff point.
3866 lck
= (kmp_user_lock_p
)__kmp_allocate(__kmp_user_lock_size
);
3868 lck
= __kmp_lock_block_allocate();
3871 // Insert lock in the table so that it can be freed in __kmp_cleanup,
3872 // and debugger has info on all allocated locks.
3873 index
= __kmp_lock_table_insert(lck
);
3875 // Pick up lock from pool.
3876 lck
= __kmp_lock_pool
;
3877 index
= __kmp_lock_pool
->pool
.index
;
3878 __kmp_lock_pool
= __kmp_lock_pool
->pool
.next
;
3881 // We could potentially differentiate between nested and regular locks
3882 // here, and do the lock table lookup for regular locks only.
3883 if (OMP_LOCK_T_SIZE
< sizeof(void *)) {
3884 *((kmp_lock_index_t
*)user_lock
) = index
;
3886 *((kmp_user_lock_p
*)user_lock
) = lck
;
3889 // mark the lock if it is critical section lock.
3890 __kmp_set_user_lock_flags(lck
, flags
);
3892 __kmp_release_lock(&__kmp_global_lock
, gtid
); // AC: TODO move this line upper
3897 // Put lock's memory to pool for reusing.
3898 void __kmp_user_lock_free(void **user_lock
, kmp_int32 gtid
,
3899 kmp_user_lock_p lck
) {
3900 KMP_DEBUG_ASSERT(user_lock
!= NULL
);
3901 KMP_DEBUG_ASSERT(lck
!= NULL
);
3903 __kmp_acquire_lock(&__kmp_global_lock
, gtid
);
3905 lck
->pool
.next
= __kmp_lock_pool
;
3906 __kmp_lock_pool
= lck
;
3907 if (OMP_LOCK_T_SIZE
< sizeof(void *)) {
3908 kmp_lock_index_t index
= *((kmp_lock_index_t
*)user_lock
);
3909 KMP_DEBUG_ASSERT(0 < index
&& index
<= __kmp_user_lock_table
.used
);
3910 lck
->pool
.index
= index
;
3913 __kmp_release_lock(&__kmp_global_lock
, gtid
);
3916 kmp_user_lock_p
__kmp_lookup_user_lock(void **user_lock
, char const *func
) {
3917 kmp_user_lock_p lck
= NULL
;
3919 if (__kmp_env_consistency_check
) {
3920 if (user_lock
== NULL
) {
3921 KMP_FATAL(LockIsUninitialized
, func
);
3925 if (OMP_LOCK_T_SIZE
< sizeof(void *)) {
3926 kmp_lock_index_t index
= *((kmp_lock_index_t
*)user_lock
);
3927 if (__kmp_env_consistency_check
) {
3928 if (!(0 < index
&& index
< __kmp_user_lock_table
.used
)) {
3929 KMP_FATAL(LockIsUninitialized
, func
);
3932 KMP_DEBUG_ASSERT(0 < index
&& index
< __kmp_user_lock_table
.used
);
3933 KMP_DEBUG_ASSERT(__kmp_user_lock_size
> 0);
3934 lck
= __kmp_user_lock_table
.table
[index
];
3936 lck
= *((kmp_user_lock_p
*)user_lock
);
3939 if (__kmp_env_consistency_check
) {
3941 KMP_FATAL(LockIsUninitialized
, func
);
3948 void __kmp_cleanup_user_locks(void) {
3949 // Reset lock pool. Don't worry about lock in the pool--we will free them when
3950 // iterating through lock table (it includes all the locks, dead or alive).
3951 __kmp_lock_pool
= NULL
;
3953 #define IS_CRITICAL(lck) \
3954 ((__kmp_get_user_lock_flags_ != NULL) && \
3955 ((*__kmp_get_user_lock_flags_)(lck)&kmp_lf_critical_section))
3957 // Loop through lock table, free all locks.
3958 // Do not free item [0], it is reserved for lock tables list.
3960 // FIXME - we are iterating through a list of (pointers to) objects of type
3961 // union kmp_user_lock, but we have no way of knowing whether the base type is
3962 // currently "pool" or whatever the global user lock type is.
3964 // We are relying on the fact that for all of the user lock types
3965 // (except "tas"), the first field in the lock struct is the "initialized"
3966 // field, which is set to the address of the lock object itself when
3967 // the lock is initialized. When the union is of type "pool", the
3968 // first field is a pointer to the next object in the free list, which
3969 // will not be the same address as the object itself.
3971 // This means that the check (*__kmp_is_user_lock_initialized_)(lck) will fail
3972 // for "pool" objects on the free list. This must happen as the "location"
3973 // field of real user locks overlaps the "index" field of "pool" objects.
3975 // It would be better to run through the free list, and remove all "pool"
3976 // objects from the lock table before executing this loop. However,
3977 // "pool" objects do not always have their index field set (only on
3978 // lin_32e), and I don't want to search the lock table for the address
3979 // of every "pool" object on the free list.
3980 while (__kmp_user_lock_table
.used
> 1) {
3983 // reduce __kmp_user_lock_table.used before freeing the lock,
3984 // so that state of locks is consistent
3985 kmp_user_lock_p lck
=
3986 __kmp_user_lock_table
.table
[--__kmp_user_lock_table
.used
];
3988 if ((__kmp_is_user_lock_initialized_
!= NULL
) &&
3989 (*__kmp_is_user_lock_initialized_
)(lck
)) {
3990 // Issue a warning if: KMP_CONSISTENCY_CHECK AND lock is initialized AND
3991 // it is NOT a critical section (user is not responsible for destroying
3992 // criticals) AND we know source location to report.
3993 if (__kmp_env_consistency_check
&& (!IS_CRITICAL(lck
)) &&
3994 ((loc
= __kmp_get_user_lock_location(lck
)) != NULL
) &&
3995 (loc
->psource
!= NULL
)) {
3996 kmp_str_loc_t str_loc
= __kmp_str_loc_init(loc
->psource
, false);
3997 KMP_WARNING(CnsLockNotDestroyed
, str_loc
.file
, str_loc
.line
);
3998 __kmp_str_loc_free(&str_loc
);
4002 if (IS_CRITICAL(lck
)) {
4005 ("__kmp_cleanup_user_locks: free critical section lock %p (%p)\n",
4006 lck
, *(void **)lck
));
4008 KA_TRACE(20, ("__kmp_cleanup_user_locks: free lock %p (%p)\n", lck
,
4013 // Cleanup internal lock dynamic resources (for drdpa locks particularly).
4014 __kmp_destroy_user_lock(lck
);
4017 // Free the lock if block allocation of locks is not used.
4018 if (__kmp_lock_blocks
== NULL
) {
4025 // delete lock table(s).
4026 kmp_user_lock_p
*table_ptr
= __kmp_user_lock_table
.table
;
4027 __kmp_user_lock_table
.table
= NULL
;
4028 __kmp_user_lock_table
.allocated
= 0;
4030 while (table_ptr
!= NULL
) {
4031 // In the first element we saved the pointer to the previous
4032 // (smaller) lock table.
4033 kmp_user_lock_p
*next
= (kmp_user_lock_p
*)(table_ptr
[0]);
4034 __kmp_free(table_ptr
);
4038 // Free buffers allocated for blocks of locks.
4039 kmp_block_of_locks_t
*block_ptr
= __kmp_lock_blocks
;
4040 __kmp_lock_blocks
= NULL
;
4042 while (block_ptr
!= NULL
) {
4043 kmp_block_of_locks_t
*next
= block_ptr
->next_block
;
4044 __kmp_free(block_ptr
->locks
);
4045 // *block_ptr itself was allocated at the end of the locks vector.
4049 TCW_4(__kmp_init_user_locks
, FALSE
);
4052 #endif // KMP_USE_DYNAMIC_LOCK