kernel/locking/percpu-rwsem.c

   1 #include <linux/atomic.h>
   2 #include <linux/rwsem.h>
   3 #include <linux/percpu.h>
   4 #include <linux/lockdep.h>
   5 #include <linux/percpu-rwsem.h>
   6 #include <linux/rcupdate.h>
   7 #include <linux/sched.h>
   8 #include <linux/errno.h>
   9
  10 #include "rwsem.h"
  11
  12 int __percpu_init_rwsem(struct percpu_rw_semaphore *sem,
  13                         const char *name, struct lock_class_key *rwsem_key)
  14 {
  15         sem->read_count = alloc_percpu(int);
  16         if (unlikely(!sem->read_count))
  17                 return -ENOMEM;
  18
  19         /* ->rw_sem represents the whole percpu_rw_semaphore for lockdep */
  20         rcu_sync_init(&sem->rss, RCU_SCHED_SYNC);
  21         __init_rwsem(&sem->rw_sem, name, rwsem_key);
  22         rcuwait_init(&sem->writer);
  23         sem->readers_block = 0;
  24         return 0;
  25 }
  26 EXPORT_SYMBOL_GPL(__percpu_init_rwsem);
  27
  28 void percpu_free_rwsem(struct percpu_rw_semaphore *sem)
  29 {
  30         /*
  31          * XXX: temporary kludge. The error path in alloc_super()
  32          * assumes that percpu_free_rwsem() is safe after kzalloc().
  33          */
  34         if (!sem->read_count)
  35                 return;
  36
  37         rcu_sync_dtor(&sem->rss);
  38         free_percpu(sem->read_count);
  39         sem->read_count = NULL; /* catch use after free bugs */
  40 }
  41 EXPORT_SYMBOL_GPL(percpu_free_rwsem);
  42
  43 int __percpu_down_read(struct percpu_rw_semaphore *sem, int try)
  44 {
  45         /*
  46          * Due to having preemption disabled the decrement happens on
  47          * the same CPU as the increment, avoiding the
  48          * increment-on-one-CPU-and-decrement-on-another problem.
  49          *
  50          * If the reader misses the writer's assignment of readers_block, then
  51          * the writer is guaranteed to see the reader's increment.
  52          *
  53          * Conversely, any readers that increment their sem->read_count after
  54          * the writer looks are guaranteed to see the readers_block value,
  55          * which in turn means that they are guaranteed to immediately
  56          * decrement their sem->read_count, so that it doesn't matter that the
  57          * writer missed them.
  58          */
  59
  60         smp_mb(); /* A matches D */
  61
  62         /*
  63          * If !readers_block the critical section starts here, matched by the
  64          * release in percpu_up_write().
  65          */
  66         if (likely(!smp_load_acquire(&sem->readers_block)))
  67                 return 1;
  68
  69         /*
  70          * Per the above comment; we still have preemption disabled and
  71          * will thus decrement on the same CPU as we incremented.
  72          */
  73         __percpu_up_read(sem);
  74
  75         if (try)
  76                 return 0;
  77
  78         /*
  79          * We either call schedule() in the wait, or we'll fall through
  80          * and reschedule on the preempt_enable() in percpu_down_read().
  81          */
  82         preempt_enable_no_resched();
  83
  84         /*
  85          * Avoid lockdep for the down/up_read() we already have them.
  86          */
  87         __down_read(&sem->rw_sem);
  88         this_cpu_inc(*sem->read_count);
  89         __up_read(&sem->rw_sem);
  90
  91         preempt_disable();
  92         return 1;
  93 }
  94 EXPORT_SYMBOL_GPL(__percpu_down_read);
  95
  96 void __percpu_up_read(struct percpu_rw_semaphore *sem)
  97 {
  98         smp_mb(); /* B matches C */
  99         /*
 100          * In other words, if they see our decrement (presumably to aggregate
 101          * zero, as that is the only time it matters) they will also see our
 102          * critical section.
 103          */
 104         __this_cpu_dec(*sem->read_count);
 105
 106         /* Prod writer to recheck readers_active */
 107         rcuwait_wake_up(&sem->writer);
 108 }
 109 EXPORT_SYMBOL_GPL(__percpu_up_read);
 110
 111 #define per_cpu_sum(var)                                                \
 112 ({                                                                      \
 113         typeof(var) __sum = 0;                                          \
 114         int cpu;                                                        \
 115         compiletime_assert_atomic_type(__sum);                          \
 116         for_each_possible_cpu(cpu)                                      \
 117                 __sum += per_cpu(var, cpu);                             \
 118         __sum;                                                          \
 119 })
 120
 121 /*
 122  * Return true if the modular sum of the sem->read_count per-CPU variable is
 123  * zero.  If this sum is zero, then it is stable due to the fact that if any
 124  * newly arriving readers increment a given counter, they will immediately
 125  * decrement that same counter.
 126  */
 127 static bool readers_active_check(struct percpu_rw_semaphore *sem)
 128 {
 129         if (per_cpu_sum(*sem->read_count) != 0)
 130                 return false;
 131
 132         /*
 133          * If we observed the decrement; ensure we see the entire critical
 134          * section.
 135          */
 136
 137         smp_mb(); /* C matches B */
 138
 139         return true;
 140 }
 141
 142 void percpu_down_write(struct percpu_rw_semaphore *sem)
 143 {
 144         /* Notify readers to take the slow path. */
 145         rcu_sync_enter(&sem->rss);
 146
 147         down_write(&sem->rw_sem);
 148
 149         /*
 150          * Notify new readers to block; up until now, and thus throughout the
 151          * longish rcu_sync_enter() above, new readers could still come in.
 152          */
 153         WRITE_ONCE(sem->readers_block, 1);
 154
 155         smp_mb(); /* D matches A */
 156
 157         /*
 158          * If they don't see our writer of readers_block, then we are
 159          * guaranteed to see their sem->read_count increment, and therefore
 160          * will wait for them.
 161          */
 162
 163         /* Wait for all now active readers to complete. */
 164         rcuwait_wait_event(&sem->writer, readers_active_check(sem));
 165 }
 166 EXPORT_SYMBOL_GPL(percpu_down_write);
 167
 168 void percpu_up_write(struct percpu_rw_semaphore *sem)
 169 {
 170         /*
 171          * Signal the writer is done, no fast path yet.
 172          *
 173          * One reason that we cannot just immediately flip to readers_fast is
 174          * that new readers might fail to see the results of this writer's
 175          * critical section.
 176          *
 177          * Therefore we force it through the slow path which guarantees an
 178          * acquire and thereby guarantees the critical section's consistency.
 179          */
 180         smp_store_release(&sem->readers_block, 0);
 181
 182         /*
 183          * Release the write lock, this will allow readers back in the game.
 184          */
 185         up_write(&sem->rw_sem);
 186
 187         /*
 188          * Once this completes (at least one RCU-sched grace period hence) the
 189          * reader fast path will be available again. Safe to use outside the
 190          * exclusive write lock because its counting.
 191          */
 192         rcu_sync_exit(&sem->rss);
 193 }
 194 EXPORT_SYMBOL_GPL(percpu_up_write);