1 /* rwsem.c: R/W semaphores: contention handling functions
3 * Written by David Howells (dhowells@redhat.com).
4 * Derived from arch/i386/kernel/semaphore.c
6 * Writer lock-stealing by Alex Shi <alex.shi@intel.com>
7 * and Michel Lespinasse <walken@google.com>
9 * Optimistic spinning by Tim Chen <tim.c.chen@intel.com>
10 * and Davidlohr Bueso <davidlohr@hp.com>. Based on mutexes.
12 #include <linux/rwsem.h>
13 #include <linux/sched.h>
14 #include <linux/init.h>
15 #include <linux/export.h>
16 #include <linux/sched/rt.h>
18 #include "mcs_spinlock.h"
21 * Guide to the rw_semaphore's count field for common values.
22 * (32-bit case illustrated, similar for 64-bit)
24 * 0x0000000X (1) X readers active or attempting lock, no writer waiting
25 * X = #active_readers + #readers attempting to lock
28 * 0x00000000 rwsem is unlocked, and no one is waiting for the lock or
29 * attempting to read lock or write lock.
31 * 0xffff000X (1) X readers active or attempting lock, with waiters for lock
32 * X = #active readers + # readers attempting lock
33 * (X*ACTIVE_BIAS + WAITING_BIAS)
34 * (2) 1 writer attempting lock, no waiters for lock
35 * X-1 = #active readers + #readers attempting lock
36 * ((X-1)*ACTIVE_BIAS + ACTIVE_WRITE_BIAS)
37 * (3) 1 writer active, no waiters for lock
38 * X-1 = #active readers + #readers attempting lock
39 * ((X-1)*ACTIVE_BIAS + ACTIVE_WRITE_BIAS)
41 * 0xffff0001 (1) 1 reader active or attempting lock, waiters for lock
42 * (WAITING_BIAS + ACTIVE_BIAS)
43 * (2) 1 writer active or attempting lock, no waiters for lock
46 * 0xffff0000 (1) There are writers or readers queued but none active
47 * or in the process of attempting lock.
49 * Note: writer can attempt to steal lock for this count by adding
50 * ACTIVE_WRITE_BIAS in cmpxchg and checking the old count
52 * 0xfffe0001 (1) 1 writer active, or attempting lock. Waiters on queue.
53 * (ACTIVE_WRITE_BIAS + WAITING_BIAS)
55 * Note: Readers attempt to lock by adding ACTIVE_BIAS in down_read and checking
56 * the count becomes more than 0 for successful lock acquisition,
57 * i.e. the case where there are only readers or nobody has lock.
58 * (1st and 2nd case above).
60 * Writers attempt to lock by adding ACTIVE_WRITE_BIAS in down_write and
61 * checking the count becomes ACTIVE_WRITE_BIAS for successful lock
62 * acquisition (i.e. nobody else has lock or attempts lock). If
63 * unsuccessful, in rwsem_down_write_failed, we'll check to see if there
64 * are only waiters but none active (5th case above), and attempt to
70 * Initialize an rwsem:
72 void __init_rwsem(struct rw_semaphore
*sem
, const char *name
,
73 struct lock_class_key
*key
)
75 #ifdef CONFIG_DEBUG_LOCK_ALLOC
77 * Make sure we are not reinitializing a held semaphore:
79 debug_check_no_locks_freed((void *)sem
, sizeof(*sem
));
80 lockdep_init_map(&sem
->dep_map
, name
, key
, 0);
82 sem
->count
= RWSEM_UNLOCKED_VALUE
;
83 raw_spin_lock_init(&sem
->wait_lock
);
84 INIT_LIST_HEAD(&sem
->wait_list
);
85 #ifdef CONFIG_RWSEM_SPIN_ON_OWNER
87 osq_lock_init(&sem
->osq
);
91 EXPORT_SYMBOL(__init_rwsem
);
93 enum rwsem_waiter_type
{
94 RWSEM_WAITING_FOR_WRITE
,
95 RWSEM_WAITING_FOR_READ
99 struct list_head list
;
100 struct task_struct
*task
;
101 enum rwsem_waiter_type type
;
104 enum rwsem_wake_type
{
105 RWSEM_WAKE_ANY
, /* Wake whatever's at head of wait list */
106 RWSEM_WAKE_READERS
, /* Wake readers only */
107 RWSEM_WAKE_READ_OWNED
/* Waker thread holds the read lock */
111 * handle the lock release when processes blocked on it that can now run
112 * - if we come here from up_xxxx(), then:
113 * - the 'active part' of count (&0x0000ffff) reached 0 (but may have changed)
114 * - the 'waiting part' of count (&0xffff0000) is -ve (and will still be so)
115 * - there must be someone on the queue
116 * - the spinlock must be held by the caller
117 * - woken process blocks are discarded from the list after having task zeroed
118 * - writers are only woken if downgrading is false
120 static struct rw_semaphore
*
121 __rwsem_do_wake(struct rw_semaphore
*sem
, enum rwsem_wake_type wake_type
)
123 struct rwsem_waiter
*waiter
;
124 struct task_struct
*tsk
;
125 struct list_head
*next
;
126 long oldcount
, woken
, loop
, adjustment
;
128 waiter
= list_entry(sem
->wait_list
.next
, struct rwsem_waiter
, list
);
129 if (waiter
->type
== RWSEM_WAITING_FOR_WRITE
) {
130 if (wake_type
== RWSEM_WAKE_ANY
)
131 /* Wake writer at the front of the queue, but do not
132 * grant it the lock yet as we want other writers
133 * to be able to steal it. Readers, on the other hand,
134 * will block as they will notice the queued writer.
136 wake_up_process(waiter
->task
);
140 /* Writers might steal the lock before we grant it to the next reader.
141 * We prefer to do the first reader grant before counting readers
142 * so we can bail out early if a writer stole the lock.
145 if (wake_type
!= RWSEM_WAKE_READ_OWNED
) {
146 adjustment
= RWSEM_ACTIVE_READ_BIAS
;
148 oldcount
= rwsem_atomic_update(adjustment
, sem
) - adjustment
;
149 if (unlikely(oldcount
< RWSEM_WAITING_BIAS
)) {
150 /* A writer stole the lock. Undo our reader grant. */
151 if (rwsem_atomic_update(-adjustment
, sem
) &
154 /* Last active locker left. Retry waking readers. */
155 goto try_reader_grant
;
159 /* Grant an infinite number of read locks to the readers at the front
160 * of the queue. Note we increment the 'active part' of the count by
161 * the number of readers before waking any processes up.
167 if (waiter
->list
.next
== &sem
->wait_list
)
170 waiter
= list_entry(waiter
->list
.next
,
171 struct rwsem_waiter
, list
);
173 } while (waiter
->type
!= RWSEM_WAITING_FOR_WRITE
);
175 adjustment
= woken
* RWSEM_ACTIVE_READ_BIAS
- adjustment
;
176 if (waiter
->type
!= RWSEM_WAITING_FOR_WRITE
)
177 /* hit end of list above */
178 adjustment
-= RWSEM_WAITING_BIAS
;
181 rwsem_atomic_add(adjustment
, sem
);
183 next
= sem
->wait_list
.next
;
186 waiter
= list_entry(next
, struct rwsem_waiter
, list
);
187 next
= waiter
->list
.next
;
191 wake_up_process(tsk
);
192 put_task_struct(tsk
);
195 sem
->wait_list
.next
= next
;
196 next
->prev
= &sem
->wait_list
;
203 * Wait for the read lock to be granted
206 struct rw_semaphore __sched
*rwsem_down_read_failed(struct rw_semaphore
*sem
)
208 long count
, adjustment
= -RWSEM_ACTIVE_READ_BIAS
;
209 struct rwsem_waiter waiter
;
210 struct task_struct
*tsk
= current
;
212 /* set up my own style of waitqueue */
214 waiter
.type
= RWSEM_WAITING_FOR_READ
;
215 get_task_struct(tsk
);
217 raw_spin_lock_irq(&sem
->wait_lock
);
218 if (list_empty(&sem
->wait_list
))
219 adjustment
+= RWSEM_WAITING_BIAS
;
220 list_add_tail(&waiter
.list
, &sem
->wait_list
);
222 /* we're now waiting on the lock, but no longer actively locking */
223 count
= rwsem_atomic_update(adjustment
, sem
);
225 /* If there are no active locks, wake the front queued process(es).
227 * If there are no writers and we are first in the queue,
228 * wake our own waiter to join the existing active readers !
230 if (count
== RWSEM_WAITING_BIAS
||
231 (count
> RWSEM_WAITING_BIAS
&&
232 adjustment
!= -RWSEM_ACTIVE_READ_BIAS
))
233 sem
= __rwsem_do_wake(sem
, RWSEM_WAKE_ANY
);
235 raw_spin_unlock_irq(&sem
->wait_lock
);
237 /* wait to be given the lock */
239 set_task_state(tsk
, TASK_UNINTERRUPTIBLE
);
245 tsk
->state
= TASK_RUNNING
;
250 static inline bool rwsem_try_write_lock(long count
, struct rw_semaphore
*sem
)
252 if (!(count
& RWSEM_ACTIVE_MASK
)) {
253 /* try acquiring the write lock */
254 if (sem
->count
== RWSEM_WAITING_BIAS
&&
255 cmpxchg(&sem
->count
, RWSEM_WAITING_BIAS
,
256 RWSEM_ACTIVE_WRITE_BIAS
) == RWSEM_WAITING_BIAS
) {
257 if (!list_is_singular(&sem
->wait_list
))
258 rwsem_atomic_update(RWSEM_WAITING_BIAS
, sem
);
265 #ifdef CONFIG_RWSEM_SPIN_ON_OWNER
267 * Try to acquire write lock before the writer has been put on wait queue.
269 static inline bool rwsem_try_write_lock_unqueued(struct rw_semaphore
*sem
)
271 long old
, count
= ACCESS_ONCE(sem
->count
);
274 if (!(count
== 0 || count
== RWSEM_WAITING_BIAS
))
277 old
= cmpxchg(&sem
->count
, count
, count
+ RWSEM_ACTIVE_WRITE_BIAS
);
285 static inline bool rwsem_can_spin_on_owner(struct rw_semaphore
*sem
)
287 struct task_struct
*owner
;
294 owner
= ACCESS_ONCE(sem
->owner
);
296 on_cpu
= owner
->on_cpu
;
300 * If sem->owner is not set, yet we have just recently entered the
301 * slowpath, then there is a possibility reader(s) may have the lock.
302 * To be safe, avoid spinning in these situations.
307 static inline bool owner_running(struct rw_semaphore
*sem
,
308 struct task_struct
*owner
)
310 if (sem
->owner
!= owner
)
314 * Ensure we emit the owner->on_cpu, dereference _after_ checking
315 * sem->owner still matches owner, if that fails, owner might
316 * point to free()d memory, if it still matches, the rcu_read_lock()
317 * ensures the memory stays valid.
321 return owner
->on_cpu
;
325 bool rwsem_spin_on_owner(struct rw_semaphore
*sem
, struct task_struct
*owner
)
328 while (owner_running(sem
, owner
)) {
332 cpu_relax_lowlatency();
337 * We break out the loop above on need_resched() or when the
338 * owner changed, which is a sign for heavy contention. Return
339 * success only when sem->owner is NULL.
341 return sem
->owner
== NULL
;
344 static bool rwsem_optimistic_spin(struct rw_semaphore
*sem
)
346 struct task_struct
*owner
;
351 /* sem->wait_lock should not be held when doing optimistic spinning */
352 if (!rwsem_can_spin_on_owner(sem
))
355 if (!osq_lock(&sem
->osq
))
359 owner
= ACCESS_ONCE(sem
->owner
);
360 if (owner
&& !rwsem_spin_on_owner(sem
, owner
))
363 /* wait_lock will be acquired if write_lock is obtained */
364 if (rwsem_try_write_lock_unqueued(sem
)) {
370 * When there's no owner, we might have preempted between the
371 * owner acquiring the lock and setting the owner field. If
372 * we're an RT task that will live-lock because we won't let
373 * the owner complete.
375 if (!owner
&& (need_resched() || rt_task(current
)))
379 * The cpu_relax() call is a compiler barrier which forces
380 * everything in this loop to be re-loaded. We don't need
381 * memory barriers as we'll eventually observe the right
382 * values at the cost of a few extra spins.
384 cpu_relax_lowlatency();
386 osq_unlock(&sem
->osq
);
393 static bool rwsem_optimistic_spin(struct rw_semaphore
*sem
)
400 * Wait until we successfully acquire the write lock
403 struct rw_semaphore __sched
*rwsem_down_write_failed(struct rw_semaphore
*sem
)
406 bool waiting
= true; /* any queued threads before us */
407 struct rwsem_waiter waiter
;
409 /* undo write bias from down_write operation, stop active locking */
410 count
= rwsem_atomic_update(-RWSEM_ACTIVE_WRITE_BIAS
, sem
);
412 /* do optimistic spinning and steal lock if possible */
413 if (rwsem_optimistic_spin(sem
))
417 * Optimistic spinning failed, proceed to the slowpath
418 * and block until we can acquire the sem.
420 waiter
.task
= current
;
421 waiter
.type
= RWSEM_WAITING_FOR_WRITE
;
423 raw_spin_lock_irq(&sem
->wait_lock
);
425 /* account for this before adding a new element to the list */
426 if (list_empty(&sem
->wait_list
))
429 list_add_tail(&waiter
.list
, &sem
->wait_list
);
431 /* we're now waiting on the lock, but no longer actively locking */
433 count
= ACCESS_ONCE(sem
->count
);
436 * If there were already threads queued before us and there are
437 * no active writers, the lock must be read owned; so we try to
438 * wake any read locks that were queued ahead of us.
440 if (count
> RWSEM_WAITING_BIAS
)
441 sem
= __rwsem_do_wake(sem
, RWSEM_WAKE_READERS
);
444 count
= rwsem_atomic_update(RWSEM_WAITING_BIAS
, sem
);
446 /* wait until we successfully acquire the lock */
447 set_current_state(TASK_UNINTERRUPTIBLE
);
449 if (rwsem_try_write_lock(count
, sem
))
451 raw_spin_unlock_irq(&sem
->wait_lock
);
453 /* Block until there are no active lockers. */
456 set_current_state(TASK_UNINTERRUPTIBLE
);
457 } while ((count
= sem
->count
) & RWSEM_ACTIVE_MASK
);
459 raw_spin_lock_irq(&sem
->wait_lock
);
461 __set_current_state(TASK_RUNNING
);
463 list_del(&waiter
.list
);
464 raw_spin_unlock_irq(&sem
->wait_lock
);
470 * handle waking up a waiter on the semaphore
471 * - up_read/up_write has decremented the active part of count if we come here
474 struct rw_semaphore
*rwsem_wake(struct rw_semaphore
*sem
)
478 raw_spin_lock_irqsave(&sem
->wait_lock
, flags
);
480 /* do nothing if list empty */
481 if (!list_empty(&sem
->wait_list
))
482 sem
= __rwsem_do_wake(sem
, RWSEM_WAKE_ANY
);
484 raw_spin_unlock_irqrestore(&sem
->wait_lock
, flags
);
490 * downgrade a write lock into a read lock
491 * - caller incremented waiting part of count and discovered it still negative
492 * - just wake up any readers at the front of the queue
495 struct rw_semaphore
*rwsem_downgrade_wake(struct rw_semaphore
*sem
)
499 raw_spin_lock_irqsave(&sem
->wait_lock
, flags
);
501 /* do nothing if list empty */
502 if (!list_empty(&sem
->wait_list
))
503 sem
= __rwsem_do_wake(sem
, RWSEM_WAKE_READ_OWNED
);
505 raw_spin_unlock_irqrestore(&sem
->wait_lock
, flags
);
510 EXPORT_SYMBOL(rwsem_down_read_failed
);
511 EXPORT_SYMBOL(rwsem_down_write_failed
);
512 EXPORT_SYMBOL(rwsem_wake
);
513 EXPORT_SYMBOL(rwsem_downgrade_wake
);