1 /* rwsem.c: R/W semaphores: contention handling functions
3 * Written by David Howells (dhowells@redhat.com).
4 * Derived from arch/i386/kernel/semaphore.c
6 * Writer lock-stealing by Alex Shi <alex.shi@intel.com>
7 * and Michel Lespinasse <walken@google.com>
9 * Optimistic spinning by Tim Chen <tim.c.chen@intel.com>
10 * and Davidlohr Bueso <davidlohr@hp.com>. Based on mutexes.
12 #include <linux/rwsem.h>
13 #include <linux/sched.h>
14 #include <linux/init.h>
15 #include <linux/export.h>
16 #include <linux/sched/rt.h>
18 #include "mcs_spinlock.h"
21 * Guide to the rw_semaphore's count field for common values.
22 * (32-bit case illustrated, similar for 64-bit)
24 * 0x0000000X (1) X readers active or attempting lock, no writer waiting
25 * X = #active_readers + #readers attempting to lock
28 * 0x00000000 rwsem is unlocked, and no one is waiting for the lock or
29 * attempting to read lock or write lock.
31 * 0xffff000X (1) X readers active or attempting lock, with waiters for lock
32 * X = #active readers + # readers attempting lock
33 * (X*ACTIVE_BIAS + WAITING_BIAS)
34 * (2) 1 writer attempting lock, no waiters for lock
35 * X-1 = #active readers + #readers attempting lock
36 * ((X-1)*ACTIVE_BIAS + ACTIVE_WRITE_BIAS)
37 * (3) 1 writer active, no waiters for lock
38 * X-1 = #active readers + #readers attempting lock
39 * ((X-1)*ACTIVE_BIAS + ACTIVE_WRITE_BIAS)
41 * 0xffff0001 (1) 1 reader active or attempting lock, waiters for lock
42 * (WAITING_BIAS + ACTIVE_BIAS)
43 * (2) 1 writer active or attempting lock, no waiters for lock
46 * 0xffff0000 (1) There are writers or readers queued but none active
47 * or in the process of attempting lock.
49 * Note: writer can attempt to steal lock for this count by adding
50 * ACTIVE_WRITE_BIAS in cmpxchg and checking the old count
52 * 0xfffe0001 (1) 1 writer active, or attempting lock. Waiters on queue.
53 * (ACTIVE_WRITE_BIAS + WAITING_BIAS)
55 * Note: Readers attempt to lock by adding ACTIVE_BIAS in down_read and checking
56 * the count becomes more than 0 for successful lock acquisition,
57 * i.e. the case where there are only readers or nobody has lock.
58 * (1st and 2nd case above).
60 * Writers attempt to lock by adding ACTIVE_WRITE_BIAS in down_write and
61 * checking the count becomes ACTIVE_WRITE_BIAS for successful lock
62 * acquisition (i.e. nobody else has lock or attempts lock). If
63 * unsuccessful, in rwsem_down_write_failed, we'll check to see if there
64 * are only waiters but none active (5th case above), and attempt to
70 * Initialize an rwsem:
72 void __init_rwsem(struct rw_semaphore
*sem
, const char *name
,
73 struct lock_class_key
*key
)
75 #ifdef CONFIG_DEBUG_LOCK_ALLOC
77 * Make sure we are not reinitializing a held semaphore:
79 debug_check_no_locks_freed((void *)sem
, sizeof(*sem
));
80 lockdep_init_map(&sem
->dep_map
, name
, key
, 0);
82 sem
->count
= RWSEM_UNLOCKED_VALUE
;
83 raw_spin_lock_init(&sem
->wait_lock
);
84 INIT_LIST_HEAD(&sem
->wait_list
);
85 #ifdef CONFIG_RWSEM_SPIN_ON_OWNER
87 osq_lock_init(&sem
->osq
);
91 EXPORT_SYMBOL(__init_rwsem
);
93 enum rwsem_waiter_type
{
94 RWSEM_WAITING_FOR_WRITE
,
95 RWSEM_WAITING_FOR_READ
99 struct list_head list
;
100 struct task_struct
*task
;
101 enum rwsem_waiter_type type
;
104 enum rwsem_wake_type
{
105 RWSEM_WAKE_ANY
, /* Wake whatever's at head of wait list */
106 RWSEM_WAKE_READERS
, /* Wake readers only */
107 RWSEM_WAKE_READ_OWNED
/* Waker thread holds the read lock */
111 * handle the lock release when processes blocked on it that can now run
112 * - if we come here from up_xxxx(), then:
113 * - the 'active part' of count (&0x0000ffff) reached 0 (but may have changed)
114 * - the 'waiting part' of count (&0xffff0000) is -ve (and will still be so)
115 * - there must be someone on the queue
116 * - the spinlock must be held by the caller
117 * - woken process blocks are discarded from the list after having task zeroed
118 * - writers are only woken if downgrading is false
120 static struct rw_semaphore
*
121 __rwsem_do_wake(struct rw_semaphore
*sem
, enum rwsem_wake_type wake_type
)
123 struct rwsem_waiter
*waiter
;
124 struct task_struct
*tsk
;
125 struct list_head
*next
;
126 long oldcount
, woken
, loop
, adjustment
;
128 waiter
= list_entry(sem
->wait_list
.next
, struct rwsem_waiter
, list
);
129 if (waiter
->type
== RWSEM_WAITING_FOR_WRITE
) {
130 if (wake_type
== RWSEM_WAKE_ANY
)
131 /* Wake writer at the front of the queue, but do not
132 * grant it the lock yet as we want other writers
133 * to be able to steal it. Readers, on the other hand,
134 * will block as they will notice the queued writer.
136 wake_up_process(waiter
->task
);
140 /* Writers might steal the lock before we grant it to the next reader.
141 * We prefer to do the first reader grant before counting readers
142 * so we can bail out early if a writer stole the lock.
145 if (wake_type
!= RWSEM_WAKE_READ_OWNED
) {
146 adjustment
= RWSEM_ACTIVE_READ_BIAS
;
148 oldcount
= rwsem_atomic_update(adjustment
, sem
) - adjustment
;
149 if (unlikely(oldcount
< RWSEM_WAITING_BIAS
)) {
150 /* A writer stole the lock. Undo our reader grant. */
151 if (rwsem_atomic_update(-adjustment
, sem
) &
154 /* Last active locker left. Retry waking readers. */
155 goto try_reader_grant
;
159 /* Grant an infinite number of read locks to the readers at the front
160 * of the queue. Note we increment the 'active part' of the count by
161 * the number of readers before waking any processes up.
167 if (waiter
->list
.next
== &sem
->wait_list
)
170 waiter
= list_entry(waiter
->list
.next
,
171 struct rwsem_waiter
, list
);
173 } while (waiter
->type
!= RWSEM_WAITING_FOR_WRITE
);
175 adjustment
= woken
* RWSEM_ACTIVE_READ_BIAS
- adjustment
;
176 if (waiter
->type
!= RWSEM_WAITING_FOR_WRITE
)
177 /* hit end of list above */
178 adjustment
-= RWSEM_WAITING_BIAS
;
181 rwsem_atomic_add(adjustment
, sem
);
183 next
= sem
->wait_list
.next
;
186 waiter
= list_entry(next
, struct rwsem_waiter
, list
);
187 next
= waiter
->list
.next
;
191 wake_up_process(tsk
);
192 put_task_struct(tsk
);
195 sem
->wait_list
.next
= next
;
196 next
->prev
= &sem
->wait_list
;
203 * Wait for the read lock to be granted
206 struct rw_semaphore __sched
*rwsem_down_read_failed(struct rw_semaphore
*sem
)
208 long count
, adjustment
= -RWSEM_ACTIVE_READ_BIAS
;
209 struct rwsem_waiter waiter
;
210 struct task_struct
*tsk
= current
;
212 /* set up my own style of waitqueue */
214 waiter
.type
= RWSEM_WAITING_FOR_READ
;
215 get_task_struct(tsk
);
217 raw_spin_lock_irq(&sem
->wait_lock
);
218 if (list_empty(&sem
->wait_list
))
219 adjustment
+= RWSEM_WAITING_BIAS
;
220 list_add_tail(&waiter
.list
, &sem
->wait_list
);
222 /* we're now waiting on the lock, but no longer actively locking */
223 count
= rwsem_atomic_update(adjustment
, sem
);
225 /* If there are no active locks, wake the front queued process(es).
227 * If there are no writers and we are first in the queue,
228 * wake our own waiter to join the existing active readers !
230 if (count
== RWSEM_WAITING_BIAS
||
231 (count
> RWSEM_WAITING_BIAS
&&
232 adjustment
!= -RWSEM_ACTIVE_READ_BIAS
))
233 sem
= __rwsem_do_wake(sem
, RWSEM_WAKE_ANY
);
235 raw_spin_unlock_irq(&sem
->wait_lock
);
237 /* wait to be given the lock */
239 set_task_state(tsk
, TASK_UNINTERRUPTIBLE
);
245 __set_task_state(tsk
, TASK_RUNNING
);
248 EXPORT_SYMBOL(rwsem_down_read_failed
);
250 static inline bool rwsem_try_write_lock(long count
, struct rw_semaphore
*sem
)
253 * Try acquiring the write lock. Check count first in order
254 * to reduce unnecessary expensive cmpxchg() operations.
256 if (count
== RWSEM_WAITING_BIAS
&&
257 cmpxchg(&sem
->count
, RWSEM_WAITING_BIAS
,
258 RWSEM_ACTIVE_WRITE_BIAS
) == RWSEM_WAITING_BIAS
) {
259 if (!list_is_singular(&sem
->wait_list
))
260 rwsem_atomic_update(RWSEM_WAITING_BIAS
, sem
);
267 #ifdef CONFIG_RWSEM_SPIN_ON_OWNER
269 * Try to acquire write lock before the writer has been put on wait queue.
271 static inline bool rwsem_try_write_lock_unqueued(struct rw_semaphore
*sem
)
273 long old
, count
= ACCESS_ONCE(sem
->count
);
276 if (!(count
== 0 || count
== RWSEM_WAITING_BIAS
))
279 old
= cmpxchg(&sem
->count
, count
, count
+ RWSEM_ACTIVE_WRITE_BIAS
);
287 static inline bool rwsem_can_spin_on_owner(struct rw_semaphore
*sem
)
289 struct task_struct
*owner
;
296 owner
= ACCESS_ONCE(sem
->owner
);
298 on_cpu
= owner
->on_cpu
;
302 * If sem->owner is not set, yet we have just recently entered the
303 * slowpath, then there is a possibility reader(s) may have the lock.
304 * To be safe, avoid spinning in these situations.
309 static inline bool owner_running(struct rw_semaphore
*sem
,
310 struct task_struct
*owner
)
312 if (sem
->owner
!= owner
)
316 * Ensure we emit the owner->on_cpu, dereference _after_ checking
317 * sem->owner still matches owner, if that fails, owner might
318 * point to free()d memory, if it still matches, the rcu_read_lock()
319 * ensures the memory stays valid.
323 return owner
->on_cpu
;
327 bool rwsem_spin_on_owner(struct rw_semaphore
*sem
, struct task_struct
*owner
)
330 while (owner_running(sem
, owner
)) {
334 cpu_relax_lowlatency();
339 * We break out the loop above on need_resched() or when the
340 * owner changed, which is a sign for heavy contention. Return
341 * success only when sem->owner is NULL.
343 return sem
->owner
== NULL
;
346 static bool rwsem_optimistic_spin(struct rw_semaphore
*sem
)
348 struct task_struct
*owner
;
353 /* sem->wait_lock should not be held when doing optimistic spinning */
354 if (!rwsem_can_spin_on_owner(sem
))
357 if (!osq_lock(&sem
->osq
))
361 owner
= ACCESS_ONCE(sem
->owner
);
362 if (owner
&& !rwsem_spin_on_owner(sem
, owner
))
365 /* wait_lock will be acquired if write_lock is obtained */
366 if (rwsem_try_write_lock_unqueued(sem
)) {
372 * When there's no owner, we might have preempted between the
373 * owner acquiring the lock and setting the owner field. If
374 * we're an RT task that will live-lock because we won't let
375 * the owner complete.
377 if (!owner
&& (need_resched() || rt_task(current
)))
381 * The cpu_relax() call is a compiler barrier which forces
382 * everything in this loop to be re-loaded. We don't need
383 * memory barriers as we'll eventually observe the right
384 * values at the cost of a few extra spins.
386 cpu_relax_lowlatency();
388 osq_unlock(&sem
->osq
);
395 static bool rwsem_optimistic_spin(struct rw_semaphore
*sem
)
402 * Wait until we successfully acquire the write lock
405 struct rw_semaphore __sched
*rwsem_down_write_failed(struct rw_semaphore
*sem
)
408 bool waiting
= true; /* any queued threads before us */
409 struct rwsem_waiter waiter
;
411 /* undo write bias from down_write operation, stop active locking */
412 count
= rwsem_atomic_update(-RWSEM_ACTIVE_WRITE_BIAS
, sem
);
414 /* do optimistic spinning and steal lock if possible */
415 if (rwsem_optimistic_spin(sem
))
419 * Optimistic spinning failed, proceed to the slowpath
420 * and block until we can acquire the sem.
422 waiter
.task
= current
;
423 waiter
.type
= RWSEM_WAITING_FOR_WRITE
;
425 raw_spin_lock_irq(&sem
->wait_lock
);
427 /* account for this before adding a new element to the list */
428 if (list_empty(&sem
->wait_list
))
431 list_add_tail(&waiter
.list
, &sem
->wait_list
);
433 /* we're now waiting on the lock, but no longer actively locking */
435 count
= ACCESS_ONCE(sem
->count
);
438 * If there were already threads queued before us and there are
439 * no active writers, the lock must be read owned; so we try to
440 * wake any read locks that were queued ahead of us.
442 if (count
> RWSEM_WAITING_BIAS
)
443 sem
= __rwsem_do_wake(sem
, RWSEM_WAKE_READERS
);
446 count
= rwsem_atomic_update(RWSEM_WAITING_BIAS
, sem
);
448 /* wait until we successfully acquire the lock */
449 set_current_state(TASK_UNINTERRUPTIBLE
);
451 if (rwsem_try_write_lock(count
, sem
))
453 raw_spin_unlock_irq(&sem
->wait_lock
);
455 /* Block until there are no active lockers. */
458 set_current_state(TASK_UNINTERRUPTIBLE
);
459 } while ((count
= sem
->count
) & RWSEM_ACTIVE_MASK
);
461 raw_spin_lock_irq(&sem
->wait_lock
);
463 __set_current_state(TASK_RUNNING
);
465 list_del(&waiter
.list
);
466 raw_spin_unlock_irq(&sem
->wait_lock
);
470 EXPORT_SYMBOL(rwsem_down_write_failed
);
473 * handle waking up a waiter on the semaphore
474 * - up_read/up_write has decremented the active part of count if we come here
477 struct rw_semaphore
*rwsem_wake(struct rw_semaphore
*sem
)
481 raw_spin_lock_irqsave(&sem
->wait_lock
, flags
);
483 /* do nothing if list empty */
484 if (!list_empty(&sem
->wait_list
))
485 sem
= __rwsem_do_wake(sem
, RWSEM_WAKE_ANY
);
487 raw_spin_unlock_irqrestore(&sem
->wait_lock
, flags
);
491 EXPORT_SYMBOL(rwsem_wake
);
494 * downgrade a write lock into a read lock
495 * - caller incremented waiting part of count and discovered it still negative
496 * - just wake up any readers at the front of the queue
499 struct rw_semaphore
*rwsem_downgrade_wake(struct rw_semaphore
*sem
)
503 raw_spin_lock_irqsave(&sem
->wait_lock
, flags
);
505 /* do nothing if list empty */
506 if (!list_empty(&sem
->wait_list
))
507 sem
= __rwsem_do_wake(sem
, RWSEM_WAKE_READ_OWNED
);
509 raw_spin_unlock_irqrestore(&sem
->wait_lock
, flags
);
513 EXPORT_SYMBOL(rwsem_downgrade_wake
);