Linux 6.13-rc7
[linux.git] / fs / bcachefs / six.c
blob617d07e53b20ccaba8552dc7c1b2faeaeff39aa4
1 // SPDX-License-Identifier: GPL-2.0
3 #include <linux/export.h>
4 #include <linux/log2.h>
5 #include <linux/percpu.h>
6 #include <linux/preempt.h>
7 #include <linux/rcupdate.h>
8 #include <linux/sched.h>
9 #include <linux/sched/clock.h>
10 #include <linux/sched/rt.h>
11 #include <linux/sched/task.h>
12 #include <linux/slab.h>
14 #include <trace/events/lock.h>
16 #include "six.h"
18 #ifdef DEBUG
19 #define EBUG_ON(cond) BUG_ON(cond)
20 #else
21 #define EBUG_ON(cond) do {} while (0)
22 #endif
24 #define six_acquire(l, t, r, ip) lock_acquire(l, 0, t, r, 1, NULL, ip)
25 #define six_release(l, ip) lock_release(l, ip)
27 static void do_six_unlock_type(struct six_lock *lock, enum six_lock_type type);
29 #define SIX_LOCK_HELD_read_OFFSET 0
30 #define SIX_LOCK_HELD_read ~(~0U << 26)
31 #define SIX_LOCK_HELD_intent (1U << 26)
32 #define SIX_LOCK_HELD_write (1U << 27)
33 #define SIX_LOCK_WAITING_read (1U << (28 + SIX_LOCK_read))
34 #define SIX_LOCK_WAITING_write (1U << (28 + SIX_LOCK_write))
35 #define SIX_LOCK_NOSPIN (1U << 31)
37 struct six_lock_vals {
38 /* Value we add to the lock in order to take the lock: */
39 u32 lock_val;
41 /* If the lock has this value (used as a mask), taking the lock fails: */
42 u32 lock_fail;
44 /* Mask that indicates lock is held for this type: */
45 u32 held_mask;
47 /* Waitlist we wakeup when releasing the lock: */
48 enum six_lock_type unlock_wakeup;
51 static const struct six_lock_vals l[] = {
52 [SIX_LOCK_read] = {
53 .lock_val = 1U << SIX_LOCK_HELD_read_OFFSET,
54 .lock_fail = SIX_LOCK_HELD_write,
55 .held_mask = SIX_LOCK_HELD_read,
56 .unlock_wakeup = SIX_LOCK_write,
58 [SIX_LOCK_intent] = {
59 .lock_val = SIX_LOCK_HELD_intent,
60 .lock_fail = SIX_LOCK_HELD_intent,
61 .held_mask = SIX_LOCK_HELD_intent,
62 .unlock_wakeup = SIX_LOCK_intent,
64 [SIX_LOCK_write] = {
65 .lock_val = SIX_LOCK_HELD_write,
66 .lock_fail = SIX_LOCK_HELD_read,
67 .held_mask = SIX_LOCK_HELD_write,
68 .unlock_wakeup = SIX_LOCK_read,
72 static inline void six_set_bitmask(struct six_lock *lock, u32 mask)
74 if ((atomic_read(&lock->state) & mask) != mask)
75 atomic_or(mask, &lock->state);
78 static inline void six_clear_bitmask(struct six_lock *lock, u32 mask)
80 if (atomic_read(&lock->state) & mask)
81 atomic_and(~mask, &lock->state);
84 static inline void six_set_owner(struct six_lock *lock, enum six_lock_type type,
85 u32 old, struct task_struct *owner)
87 if (type != SIX_LOCK_intent)
88 return;
90 if (!(old & SIX_LOCK_HELD_intent)) {
91 EBUG_ON(lock->owner);
92 lock->owner = owner;
93 } else {
94 EBUG_ON(lock->owner != current);
98 static inline unsigned pcpu_read_count(struct six_lock *lock)
100 unsigned read_count = 0;
101 int cpu;
103 for_each_possible_cpu(cpu)
104 read_count += *per_cpu_ptr(lock->readers, cpu);
105 return read_count;
109 * __do_six_trylock() - main trylock routine
111 * Returns 1 on success, 0 on failure
113 * In percpu reader mode, a failed trylock may cause a spurious trylock failure
114 * for anoter thread taking the competing lock type, and we may havve to do a
115 * wakeup: when a wakeup is required, we return -1 - wakeup_type.
117 static int __do_six_trylock(struct six_lock *lock, enum six_lock_type type,
118 struct task_struct *task, bool try)
120 int ret;
121 u32 old;
123 EBUG_ON(type == SIX_LOCK_write && lock->owner != task);
124 EBUG_ON(type == SIX_LOCK_write &&
125 (try != !(atomic_read(&lock->state) & SIX_LOCK_HELD_write)));
128 * Percpu reader mode:
130 * The basic idea behind this algorithm is that you can implement a lock
131 * between two threads without any atomics, just memory barriers:
133 * For two threads you'll need two variables, one variable for "thread a
134 * has the lock" and another for "thread b has the lock".
136 * To take the lock, a thread sets its variable indicating that it holds
137 * the lock, then issues a full memory barrier, then reads from the
138 * other thread's variable to check if the other thread thinks it has
139 * the lock. If we raced, we backoff and retry/sleep.
141 * Failure to take the lock may cause a spurious trylock failure in
142 * another thread, because we temporarily set the lock to indicate that
143 * we held it. This would be a problem for a thread in six_lock(), when
144 * they are calling trylock after adding themself to the waitlist and
145 * prior to sleeping.
147 * Therefore, if we fail to get the lock, and there were waiters of the
148 * type we conflict with, we will have to issue a wakeup.
150 * Since we may be called under wait_lock (and by the wakeup code
151 * itself), we return that the wakeup has to be done instead of doing it
152 * here.
154 if (type == SIX_LOCK_read && lock->readers) {
155 preempt_disable();
156 this_cpu_inc(*lock->readers); /* signal that we own lock */
158 smp_mb();
160 old = atomic_read(&lock->state);
161 ret = !(old & l[type].lock_fail);
163 this_cpu_sub(*lock->readers, !ret);
164 preempt_enable();
166 if (!ret) {
167 smp_mb();
168 if (atomic_read(&lock->state) & SIX_LOCK_WAITING_write)
169 ret = -1 - SIX_LOCK_write;
171 } else if (type == SIX_LOCK_write && lock->readers) {
172 if (try)
173 atomic_add(SIX_LOCK_HELD_write, &lock->state);
176 * Make sure atomic_add happens before pcpu_read_count and
177 * six_set_bitmask in slow path happens before pcpu_read_count.
179 * Paired with the smp_mb() in read lock fast path (per-cpu mode)
180 * and the one before atomic_read in read unlock path.
182 smp_mb();
183 ret = !pcpu_read_count(lock);
185 if (try && !ret) {
186 old = atomic_sub_return(SIX_LOCK_HELD_write, &lock->state);
187 if (old & SIX_LOCK_WAITING_read)
188 ret = -1 - SIX_LOCK_read;
190 } else {
191 old = atomic_read(&lock->state);
192 do {
193 ret = !(old & l[type].lock_fail);
194 if (!ret || (type == SIX_LOCK_write && !try)) {
195 smp_mb();
196 break;
198 } while (!atomic_try_cmpxchg_acquire(&lock->state, &old, old + l[type].lock_val));
200 EBUG_ON(ret && !(atomic_read(&lock->state) & l[type].held_mask));
203 if (ret > 0)
204 six_set_owner(lock, type, old, task);
206 EBUG_ON(type == SIX_LOCK_write && try && ret <= 0 &&
207 (atomic_read(&lock->state) & SIX_LOCK_HELD_write));
209 return ret;
212 static void __six_lock_wakeup(struct six_lock *lock, enum six_lock_type lock_type)
214 struct six_lock_waiter *w, *next;
215 struct task_struct *task;
216 bool saw_one;
217 int ret;
218 again:
219 ret = 0;
220 saw_one = false;
221 raw_spin_lock(&lock->wait_lock);
223 list_for_each_entry_safe(w, next, &lock->wait_list, list) {
224 if (w->lock_want != lock_type)
225 continue;
227 if (saw_one && lock_type != SIX_LOCK_read)
228 goto unlock;
229 saw_one = true;
231 ret = __do_six_trylock(lock, lock_type, w->task, false);
232 if (ret <= 0)
233 goto unlock;
236 * Similar to percpu_rwsem_wake_function(), we need to guard
237 * against the wakee noticing w->lock_acquired, returning, and
238 * then exiting before we do the wakeup:
240 task = get_task_struct(w->task);
241 __list_del(w->list.prev, w->list.next);
243 * The release barrier here ensures the ordering of the
244 * __list_del before setting w->lock_acquired; @w is on the
245 * stack of the thread doing the waiting and will be reused
246 * after it sees w->lock_acquired with no other locking:
247 * pairs with smp_load_acquire() in six_lock_slowpath()
249 smp_store_release(&w->lock_acquired, true);
250 wake_up_process(task);
251 put_task_struct(task);
254 six_clear_bitmask(lock, SIX_LOCK_WAITING_read << lock_type);
255 unlock:
256 raw_spin_unlock(&lock->wait_lock);
258 if (ret < 0) {
259 lock_type = -ret - 1;
260 goto again;
264 __always_inline
265 static void six_lock_wakeup(struct six_lock *lock, u32 state,
266 enum six_lock_type lock_type)
268 if (lock_type == SIX_LOCK_write && (state & SIX_LOCK_HELD_read))
269 return;
271 if (!(state & (SIX_LOCK_WAITING_read << lock_type)))
272 return;
274 __six_lock_wakeup(lock, lock_type);
277 __always_inline
278 static bool do_six_trylock(struct six_lock *lock, enum six_lock_type type, bool try)
280 int ret;
282 ret = __do_six_trylock(lock, type, current, try);
283 if (ret < 0)
284 __six_lock_wakeup(lock, -ret - 1);
286 return ret > 0;
290 * six_trylock_ip - attempt to take a six lock without blocking
291 * @lock: lock to take
292 * @type: SIX_LOCK_read, SIX_LOCK_intent, or SIX_LOCK_write
293 * @ip: ip parameter for lockdep/lockstat, i.e. _THIS_IP_
295 * Return: true on success, false on failure.
297 bool six_trylock_ip(struct six_lock *lock, enum six_lock_type type, unsigned long ip)
299 if (!do_six_trylock(lock, type, true))
300 return false;
302 if (type != SIX_LOCK_write)
303 six_acquire(&lock->dep_map, 1, type == SIX_LOCK_read, ip);
304 return true;
306 EXPORT_SYMBOL_GPL(six_trylock_ip);
309 * six_relock_ip - attempt to re-take a lock that was held previously
310 * @lock: lock to take
311 * @type: SIX_LOCK_read, SIX_LOCK_intent, or SIX_LOCK_write
312 * @seq: lock sequence number obtained from six_lock_seq() while lock was
313 * held previously
314 * @ip: ip parameter for lockdep/lockstat, i.e. _THIS_IP_
316 * Return: true on success, false on failure.
318 bool six_relock_ip(struct six_lock *lock, enum six_lock_type type,
319 unsigned seq, unsigned long ip)
321 if (six_lock_seq(lock) != seq || !six_trylock_ip(lock, type, ip))
322 return false;
324 if (six_lock_seq(lock) != seq) {
325 six_unlock_ip(lock, type, ip);
326 return false;
329 return true;
331 EXPORT_SYMBOL_GPL(six_relock_ip);
333 #ifdef CONFIG_BCACHEFS_SIX_OPTIMISTIC_SPIN
335 static inline bool six_owner_running(struct six_lock *lock)
338 * When there's no owner, we might have preempted between the owner
339 * acquiring the lock and setting the owner field. If we're an RT task
340 * that will live-lock because we won't let the owner complete.
342 rcu_read_lock();
343 struct task_struct *owner = READ_ONCE(lock->owner);
344 bool ret = owner ? owner_on_cpu(owner) : !rt_or_dl_task(current);
345 rcu_read_unlock();
347 return ret;
350 static inline bool six_optimistic_spin(struct six_lock *lock,
351 struct six_lock_waiter *wait,
352 enum six_lock_type type)
354 unsigned loop = 0;
355 u64 end_time;
357 if (type == SIX_LOCK_write)
358 return false;
360 if (lock->wait_list.next != &wait->list)
361 return false;
363 if (atomic_read(&lock->state) & SIX_LOCK_NOSPIN)
364 return false;
366 preempt_disable();
367 end_time = sched_clock() + 10 * NSEC_PER_USEC;
369 while (!need_resched() && six_owner_running(lock)) {
371 * Ensures that writes to the waitlist entry happen after we see
372 * wait->lock_acquired: pairs with the smp_store_release in
373 * __six_lock_wakeup
375 if (smp_load_acquire(&wait->lock_acquired)) {
376 preempt_enable();
377 return true;
380 if (!(++loop & 0xf) && (time_after64(sched_clock(), end_time))) {
381 six_set_bitmask(lock, SIX_LOCK_NOSPIN);
382 break;
386 * The cpu_relax() call is a compiler barrier which forces
387 * everything in this loop to be re-loaded. We don't need
388 * memory barriers as we'll eventually observe the right
389 * values at the cost of a few extra spins.
391 cpu_relax();
394 preempt_enable();
395 return false;
398 #else /* CONFIG_LOCK_SPIN_ON_OWNER */
400 static inline bool six_optimistic_spin(struct six_lock *lock,
401 struct six_lock_waiter *wait,
402 enum six_lock_type type)
404 return false;
407 #endif
409 noinline
410 static int six_lock_slowpath(struct six_lock *lock, enum six_lock_type type,
411 struct six_lock_waiter *wait,
412 six_lock_should_sleep_fn should_sleep_fn, void *p,
413 unsigned long ip)
415 int ret = 0;
417 if (type == SIX_LOCK_write) {
418 EBUG_ON(atomic_read(&lock->state) & SIX_LOCK_HELD_write);
419 atomic_add(SIX_LOCK_HELD_write, &lock->state);
420 smp_mb__after_atomic();
423 trace_contention_begin(lock, 0);
424 lock_contended(&lock->dep_map, ip);
426 wait->task = current;
427 wait->lock_want = type;
428 wait->lock_acquired = false;
430 raw_spin_lock(&lock->wait_lock);
431 six_set_bitmask(lock, SIX_LOCK_WAITING_read << type);
433 * Retry taking the lock after taking waitlist lock, in case we raced
434 * with an unlock:
436 ret = __do_six_trylock(lock, type, current, false);
437 if (ret <= 0) {
438 wait->start_time = local_clock();
440 if (!list_empty(&lock->wait_list)) {
441 struct six_lock_waiter *last =
442 list_last_entry(&lock->wait_list,
443 struct six_lock_waiter, list);
445 if (time_before_eq64(wait->start_time, last->start_time))
446 wait->start_time = last->start_time + 1;
449 list_add_tail(&wait->list, &lock->wait_list);
451 raw_spin_unlock(&lock->wait_lock);
453 if (unlikely(ret > 0)) {
454 ret = 0;
455 goto out;
458 if (unlikely(ret < 0)) {
459 __six_lock_wakeup(lock, -ret - 1);
460 ret = 0;
463 if (six_optimistic_spin(lock, wait, type))
464 goto out;
466 while (1) {
467 set_current_state(TASK_UNINTERRUPTIBLE);
470 * Ensures that writes to the waitlist entry happen after we see
471 * wait->lock_acquired: pairs with the smp_store_release in
472 * __six_lock_wakeup
474 if (smp_load_acquire(&wait->lock_acquired))
475 break;
477 ret = should_sleep_fn ? should_sleep_fn(lock, p) : 0;
478 if (unlikely(ret)) {
479 bool acquired;
482 * If should_sleep_fn() returns an error, we are
483 * required to return that error even if we already
484 * acquired the lock - should_sleep_fn() might have
485 * modified external state (e.g. when the deadlock cycle
486 * detector in bcachefs issued a transaction restart)
488 raw_spin_lock(&lock->wait_lock);
489 acquired = wait->lock_acquired;
490 if (!acquired)
491 list_del(&wait->list);
492 raw_spin_unlock(&lock->wait_lock);
494 if (unlikely(acquired))
495 do_six_unlock_type(lock, type);
496 break;
499 schedule();
502 __set_current_state(TASK_RUNNING);
503 out:
504 if (ret && type == SIX_LOCK_write) {
505 six_clear_bitmask(lock, SIX_LOCK_HELD_write);
506 six_lock_wakeup(lock, atomic_read(&lock->state), SIX_LOCK_read);
508 trace_contention_end(lock, 0);
510 return ret;
514 * six_lock_ip_waiter - take a lock, with full waitlist interface
515 * @lock: lock to take
516 * @type: SIX_LOCK_read, SIX_LOCK_intent, or SIX_LOCK_write
517 * @wait: pointer to wait object, which will be added to lock's waitlist
518 * @should_sleep_fn: callback run after adding to waitlist, immediately prior
519 * to scheduling
520 * @p: passed through to @should_sleep_fn
521 * @ip: ip parameter for lockdep/lockstat, i.e. _THIS_IP_
523 * This is the most general six_lock() variant, with parameters to support full
524 * cycle detection for deadlock avoidance.
526 * The code calling this function must implement tracking of held locks, and the
527 * @wait object should be embedded into the struct that tracks held locks -
528 * which must also be accessible in a thread-safe way.
530 * @should_sleep_fn should invoke the cycle detector; it should walk each
531 * lock's waiters, and for each waiter recursively walk their held locks.
533 * When this function must block, @wait will be added to @lock's waitlist before
534 * calling trylock, and before calling @should_sleep_fn, and @wait will not be
535 * removed from the lock waitlist until the lock has been successfully acquired,
536 * or we abort.
538 * @wait.start_time will be monotonically increasing for any given waitlist, and
539 * thus may be used as a loop cursor.
541 * Return: 0 on success, or the return code from @should_sleep_fn on failure.
543 int six_lock_ip_waiter(struct six_lock *lock, enum six_lock_type type,
544 struct six_lock_waiter *wait,
545 six_lock_should_sleep_fn should_sleep_fn, void *p,
546 unsigned long ip)
548 int ret;
550 wait->start_time = 0;
552 if (type != SIX_LOCK_write)
553 six_acquire(&lock->dep_map, 0, type == SIX_LOCK_read, ip);
555 ret = do_six_trylock(lock, type, true) ? 0
556 : six_lock_slowpath(lock, type, wait, should_sleep_fn, p, ip);
558 if (ret && type != SIX_LOCK_write)
559 six_release(&lock->dep_map, ip);
560 if (!ret)
561 lock_acquired(&lock->dep_map, ip);
563 return ret;
565 EXPORT_SYMBOL_GPL(six_lock_ip_waiter);
567 __always_inline
568 static void do_six_unlock_type(struct six_lock *lock, enum six_lock_type type)
570 u32 state;
572 if (type == SIX_LOCK_intent)
573 lock->owner = NULL;
575 if (type == SIX_LOCK_read &&
576 lock->readers) {
577 smp_mb(); /* unlock barrier */
578 this_cpu_dec(*lock->readers);
579 smp_mb(); /* between unlocking and checking for waiters */
580 state = atomic_read(&lock->state);
581 } else {
582 u32 v = l[type].lock_val;
584 if (type != SIX_LOCK_read)
585 v += atomic_read(&lock->state) & SIX_LOCK_NOSPIN;
587 EBUG_ON(!(atomic_read(&lock->state) & l[type].held_mask));
588 state = atomic_sub_return_release(v, &lock->state);
591 six_lock_wakeup(lock, state, l[type].unlock_wakeup);
595 * six_unlock_ip - drop a six lock
596 * @lock: lock to unlock
597 * @type: SIX_LOCK_read, SIX_LOCK_intent, or SIX_LOCK_write
598 * @ip: ip parameter for lockdep/lockstat, i.e. _THIS_IP_
600 * When a lock is held multiple times (because six_lock_incement()) was used),
601 * this decrements the 'lock held' counter by one.
603 * For example:
604 * six_lock_read(&foo->lock); read count 1
605 * six_lock_increment(&foo->lock, SIX_LOCK_read); read count 2
606 * six_lock_unlock(&foo->lock, SIX_LOCK_read); read count 1
607 * six_lock_unlock(&foo->lock, SIX_LOCK_read); read count 0
609 void six_unlock_ip(struct six_lock *lock, enum six_lock_type type, unsigned long ip)
611 EBUG_ON(type == SIX_LOCK_write &&
612 !(atomic_read(&lock->state) & SIX_LOCK_HELD_intent));
613 EBUG_ON((type == SIX_LOCK_write ||
614 type == SIX_LOCK_intent) &&
615 lock->owner != current);
617 if (type != SIX_LOCK_write)
618 six_release(&lock->dep_map, ip);
619 else
620 lock->seq++;
622 if (type == SIX_LOCK_intent &&
623 lock->intent_lock_recurse) {
624 --lock->intent_lock_recurse;
625 return;
628 do_six_unlock_type(lock, type);
630 EXPORT_SYMBOL_GPL(six_unlock_ip);
633 * six_lock_downgrade - convert an intent lock to a read lock
634 * @lock: lock to dowgrade
636 * @lock will have read count incremented and intent count decremented
638 void six_lock_downgrade(struct six_lock *lock)
640 six_lock_increment(lock, SIX_LOCK_read);
641 six_unlock_intent(lock);
643 EXPORT_SYMBOL_GPL(six_lock_downgrade);
646 * six_lock_tryupgrade - attempt to convert read lock to an intent lock
647 * @lock: lock to upgrade
649 * On success, @lock will have intent count incremented and read count
650 * decremented
652 * Return: true on success, false on failure
654 bool six_lock_tryupgrade(struct six_lock *lock)
656 u32 old = atomic_read(&lock->state), new;
658 do {
659 new = old;
661 if (new & SIX_LOCK_HELD_intent)
662 return false;
664 if (!lock->readers) {
665 EBUG_ON(!(new & SIX_LOCK_HELD_read));
666 new -= l[SIX_LOCK_read].lock_val;
669 new |= SIX_LOCK_HELD_intent;
670 } while (!atomic_try_cmpxchg_acquire(&lock->state, &old, new));
672 if (lock->readers)
673 this_cpu_dec(*lock->readers);
675 six_set_owner(lock, SIX_LOCK_intent, old, current);
677 return true;
679 EXPORT_SYMBOL_GPL(six_lock_tryupgrade);
682 * six_trylock_convert - attempt to convert a held lock from one type to another
683 * @lock: lock to upgrade
684 * @from: SIX_LOCK_read or SIX_LOCK_intent
685 * @to: SIX_LOCK_read or SIX_LOCK_intent
687 * On success, @lock will have intent count incremented and read count
688 * decremented
690 * Return: true on success, false on failure
692 bool six_trylock_convert(struct six_lock *lock,
693 enum six_lock_type from,
694 enum six_lock_type to)
696 EBUG_ON(to == SIX_LOCK_write || from == SIX_LOCK_write);
698 if (to == from)
699 return true;
701 if (to == SIX_LOCK_read) {
702 six_lock_downgrade(lock);
703 return true;
704 } else {
705 return six_lock_tryupgrade(lock);
708 EXPORT_SYMBOL_GPL(six_trylock_convert);
711 * six_lock_increment - increase held lock count on a lock that is already held
712 * @lock: lock to increment
713 * @type: SIX_LOCK_read or SIX_LOCK_intent
715 * @lock must already be held, with a lock type that is greater than or equal to
716 * @type
718 * A corresponding six_unlock_type() call will be required for @lock to be fully
719 * unlocked.
721 void six_lock_increment(struct six_lock *lock, enum six_lock_type type)
723 six_acquire(&lock->dep_map, 0, type == SIX_LOCK_read, _RET_IP_);
725 /* XXX: assert already locked, and that we don't overflow: */
727 switch (type) {
728 case SIX_LOCK_read:
729 if (lock->readers) {
730 this_cpu_inc(*lock->readers);
731 } else {
732 EBUG_ON(!(atomic_read(&lock->state) &
733 (SIX_LOCK_HELD_read|
734 SIX_LOCK_HELD_intent)));
735 atomic_add(l[type].lock_val, &lock->state);
737 break;
738 case SIX_LOCK_intent:
739 EBUG_ON(!(atomic_read(&lock->state) & SIX_LOCK_HELD_intent));
740 lock->intent_lock_recurse++;
741 break;
742 case SIX_LOCK_write:
743 BUG();
744 break;
747 EXPORT_SYMBOL_GPL(six_lock_increment);
750 * six_lock_wakeup_all - wake up all waiters on @lock
751 * @lock: lock to wake up waiters for
753 * Wakeing up waiters will cause them to re-run should_sleep_fn, which may then
754 * abort the lock operation.
756 * This function is never needed in a bug-free program; it's only useful in
757 * debug code, e.g. to determine if a cycle detector is at fault.
759 void six_lock_wakeup_all(struct six_lock *lock)
761 u32 state = atomic_read(&lock->state);
762 struct six_lock_waiter *w;
764 six_lock_wakeup(lock, state, SIX_LOCK_read);
765 six_lock_wakeup(lock, state, SIX_LOCK_intent);
766 six_lock_wakeup(lock, state, SIX_LOCK_write);
768 raw_spin_lock(&lock->wait_lock);
769 list_for_each_entry(w, &lock->wait_list, list)
770 wake_up_process(w->task);
771 raw_spin_unlock(&lock->wait_lock);
773 EXPORT_SYMBOL_GPL(six_lock_wakeup_all);
776 * six_lock_counts - return held lock counts, for each lock type
777 * @lock: lock to return counters for
779 * Return: the number of times a lock is held for read, intent and write.
781 struct six_lock_count six_lock_counts(struct six_lock *lock)
783 struct six_lock_count ret;
785 ret.n[SIX_LOCK_read] = !lock->readers
786 ? atomic_read(&lock->state) & SIX_LOCK_HELD_read
787 : pcpu_read_count(lock);
788 ret.n[SIX_LOCK_intent] = !!(atomic_read(&lock->state) & SIX_LOCK_HELD_intent) +
789 lock->intent_lock_recurse;
790 ret.n[SIX_LOCK_write] = !!(atomic_read(&lock->state) & SIX_LOCK_HELD_write);
792 return ret;
794 EXPORT_SYMBOL_GPL(six_lock_counts);
797 * six_lock_readers_add - directly manipulate reader count of a lock
798 * @lock: lock to add/subtract readers for
799 * @nr: reader count to add/subtract
801 * When an upper layer is implementing lock reentrency, we may have both read
802 * and intent locks on the same lock.
804 * When we need to take a write lock, the read locks will cause self-deadlock,
805 * because six locks themselves do not track which read locks are held by the
806 * current thread and which are held by a different thread - it does no
807 * per-thread tracking of held locks.
809 * The upper layer that is tracking held locks may however, if trylock() has
810 * failed, count up its own read locks, subtract them, take the write lock, and
811 * then re-add them.
813 * As in any other situation when taking a write lock, @lock must be held for
814 * intent one (or more) times, so @lock will never be left unlocked.
816 void six_lock_readers_add(struct six_lock *lock, int nr)
818 if (lock->readers) {
819 this_cpu_add(*lock->readers, nr);
820 } else {
821 EBUG_ON((int) (atomic_read(&lock->state) & SIX_LOCK_HELD_read) + nr < 0);
822 /* reader count starts at bit 0 */
823 atomic_add(nr, &lock->state);
826 EXPORT_SYMBOL_GPL(six_lock_readers_add);
829 * six_lock_exit - release resources held by a lock prior to freeing
830 * @lock: lock to exit
832 * When a lock was initialized in percpu mode (SIX_OLCK_INIT_PCPU), this is
833 * required to free the percpu read counts.
835 void six_lock_exit(struct six_lock *lock)
837 WARN_ON(lock->readers && pcpu_read_count(lock));
838 WARN_ON(atomic_read(&lock->state) & SIX_LOCK_HELD_read);
840 free_percpu(lock->readers);
841 lock->readers = NULL;
843 EXPORT_SYMBOL_GPL(six_lock_exit);
845 void __six_lock_init(struct six_lock *lock, const char *name,
846 struct lock_class_key *key, enum six_lock_init_flags flags)
848 atomic_set(&lock->state, 0);
849 raw_spin_lock_init(&lock->wait_lock);
850 INIT_LIST_HEAD(&lock->wait_list);
851 #ifdef CONFIG_DEBUG_LOCK_ALLOC
852 debug_check_no_locks_freed((void *) lock, sizeof(*lock));
853 lockdep_init_map(&lock->dep_map, name, key, 0);
854 #endif
857 * Don't assume that we have real percpu variables available in
858 * userspace:
860 #ifdef __KERNEL__
861 if (flags & SIX_LOCK_INIT_PCPU) {
863 * We don't return an error here on memory allocation failure
864 * since percpu is an optimization, and locks will work with the
865 * same semantics in non-percpu mode: callers can check for
866 * failure if they wish by checking lock->readers, but generally
867 * will not want to treat it as an error.
869 lock->readers = alloc_percpu(unsigned);
871 #endif
873 EXPORT_SYMBOL_GPL(__six_lock_init);