kernel/rtmutex.c

   1 /*
   2  * RT-Mutexes: simple blocking mutual exclusion locks with PI support
   3  *
   4  * started by Ingo Molnar and Thomas Gleixner.
   5  *
   6  *  Copyright (C) 2004-2006 Red Hat, Inc., Ingo Molnar <mingo@redhat.com>
   7  *  Copyright (C) 2005-2006 Timesys Corp., Thomas Gleixner <tglx@timesys.com>
   8  *  Copyright (C) 2005 Kihon Technologies Inc., Steven Rostedt
   9  *  Copyright (C) 2006 Esben Nielsen
  10  *
  11  *  See Documentation/rt-mutex-design.txt for details.
  12  */
  13 #include <linux/spinlock.h>
  14 #include <linux/export.h>
  15 #include <linux/sched.h>
  16 #include <linux/sched/rt.h>
  17 #include <linux/timer.h>
  18
  19 #include "rtmutex_common.h"
  20
  21 /*
  22  * lock->owner state tracking:
  23  *
  24  * lock->owner holds the task_struct pointer of the owner. Bit 0
  25  * is used to keep track of the "lock has waiters" state.
  26  *
  27  * owner        bit0
  28  * NULL         0       lock is free (fast acquire possible)
  29  * NULL         1       lock is free and has waiters and the top waiter
  30  *                              is going to take the lock*
  31  * taskpointer  0       lock is held (fast release possible)
  32  * taskpointer  1       lock is held and has waiters**
  33  *
  34  * The fast atomic compare exchange based acquire and release is only
  35  * possible when bit 0 of lock->owner is 0.
  36  *
  37  * (*) It also can be a transitional state when grabbing the lock
  38  * with ->wait_lock is held. To prevent any fast path cmpxchg to the lock,
  39  * we need to set the bit0 before looking at the lock, and the owner may be
  40  * NULL in this small time, hence this can be a transitional state.
  41  *
  42  * (**) There is a small time when bit 0 is set but there are no
  43  * waiters. This can happen when grabbing the lock in the slow path.
  44  * To prevent a cmpxchg of the owner releasing the lock, we need to
  45  * set this bit before looking at the lock.
  46  */
  47
  48 static void
  49 rt_mutex_set_owner(struct rt_mutex *lock, struct task_struct *owner)
  50 {
  51         unsigned long val = (unsigned long)owner;
  52
  53         if (rt_mutex_has_waiters(lock))
  54                 val |= RT_MUTEX_HAS_WAITERS;
  55
  56         lock->owner = (struct task_struct *)val;
  57 }
  58
  59 static inline void clear_rt_mutex_waiters(struct rt_mutex *lock)
  60 {
  61         lock->owner = (struct task_struct *)
  62                         ((unsigned long)lock->owner & ~RT_MUTEX_HAS_WAITERS);
  63 }
  64
  65 static void fixup_rt_mutex_waiters(struct rt_mutex *lock)
  66 {
  67         if (!rt_mutex_has_waiters(lock))
  68                 clear_rt_mutex_waiters(lock);
  69 }
  70
  71 /*
  72  * We can speed up the acquire/release, if the architecture
  73  * supports cmpxchg and if there's no debugging state to be set up
  74  */
  75 #if defined(__HAVE_ARCH_CMPXCHG) && !defined(CONFIG_DEBUG_RT_MUTEXES)
  76 # define rt_mutex_cmpxchg(l,c,n)        (cmpxchg(&l->owner, c, n) == c)
  77 static inline void mark_rt_mutex_waiters(struct rt_mutex *lock)
  78 {
  79         unsigned long owner, *p = (unsigned long *) &lock->owner;
  80
  81         do {
  82                 owner = *p;
  83         } while (cmpxchg(p, owner, owner | RT_MUTEX_HAS_WAITERS) != owner);
  84 }
  85
  86 /*
  87  * Safe fastpath aware unlock:
  88  * 1) Clear the waiters bit
  89  * 2) Drop lock->wait_lock
  90  * 3) Try to unlock the lock with cmpxchg
  91  */
  92 static inline bool unlock_rt_mutex_safe(struct rt_mutex *lock)
  93         __releases(lock->wait_lock)
  94 {
  95         struct task_struct *owner = rt_mutex_owner(lock);
  96
  97         clear_rt_mutex_waiters(lock);
  98         raw_spin_unlock(&lock->wait_lock);
  99         /*
 100          * If a new waiter comes in between the unlock and the cmpxchg
 101          * we have two situations:
 102          *
 103          * unlock(wait_lock);
 104          *                                      lock(wait_lock);
 105          * cmpxchg(p, owner, 0) == owner
 106          *                                      mark_rt_mutex_waiters(lock);
 107          *                                      acquire(lock);
 108          * or:
 109          *
 110          * unlock(wait_lock);
 111          *                                      lock(wait_lock);
 112          *                                      mark_rt_mutex_waiters(lock);
 113          *
 114          * cmpxchg(p, owner, 0) != owner
 115          *                                      enqueue_waiter();
 116          *                                      unlock(wait_lock);
 117          * lock(wait_lock);
 118          * wake waiter();
 119          * unlock(wait_lock);
 120          *                                      lock(wait_lock);
 121          *                                      acquire(lock);
 122          */
 123         return rt_mutex_cmpxchg(lock, owner, NULL);
 124 }
 125
 126 #else
 127 # define rt_mutex_cmpxchg(l,c,n)        (0)
 128 static inline void mark_rt_mutex_waiters(struct rt_mutex *lock)
 129 {
 130         lock->owner = (struct task_struct *)
 131                         ((unsigned long)lock->owner | RT_MUTEX_HAS_WAITERS);
 132 }
 133
 134 /*
 135  * Simple slow path only version: lock->owner is protected by lock->wait_lock.
 136  */
 137 static inline bool unlock_rt_mutex_safe(struct rt_mutex *lock)
 138         __releases(lock->wait_lock)
 139 {
 140         lock->owner = NULL;
 141         raw_spin_unlock(&lock->wait_lock);
 142         return true;
 143 }
 144 #endif
 145
 146 /*
 147  * Calculate task priority from the waiter list priority
 148  *
 149  * Return task->normal_prio when the waiter list is empty or when
 150  * the waiter is not allowed to do priority boosting
 151  */
 152 int rt_mutex_getprio(struct task_struct *task)
 153 {
 154         if (likely(!task_has_pi_waiters(task)))
 155                 return task->normal_prio;
 156
 157         return min(task_top_pi_waiter(task)->pi_list_entry.prio,
 158                    task->normal_prio);
 159 }
 160
 161 /*
 162  * Adjust the priority of a task, after its pi_waiters got modified.
 163  *
 164  * This can be both boosting and unboosting. task->pi_lock must be held.
 165  */
 166 static void __rt_mutex_adjust_prio(struct task_struct *task)
 167 {
 168         int prio = rt_mutex_getprio(task);
 169
 170         if (task->prio != prio)
 171                 rt_mutex_setprio(task, prio);
 172 }
 173
 174 /*
 175  * Adjust task priority (undo boosting). Called from the exit path of
 176  * rt_mutex_slowunlock() and rt_mutex_slowlock().
 177  *
 178  * (Note: We do this outside of the protection of lock->wait_lock to
 179  * allow the lock to be taken while or before we readjust the priority
 180  * of task. We do not use the spin_xx_mutex() variants here as we are
 181  * outside of the debug path.)
 182  */
 183 static void rt_mutex_adjust_prio(struct task_struct *task)
 184 {
 185         unsigned long flags;
 186
 187         raw_spin_lock_irqsave(&task->pi_lock, flags);
 188         __rt_mutex_adjust_prio(task);
 189         raw_spin_unlock_irqrestore(&task->pi_lock, flags);
 190 }
 191
 192 /*
 193  * Max number of times we'll walk the boosting chain:
 194  */
 195 int max_lock_depth = 1024;
 196
 197 static inline struct rt_mutex *task_blocked_on_lock(struct task_struct *p)
 198 {
 199         return p->pi_blocked_on ? p->pi_blocked_on->lock : NULL;
 200 }
 201
 202 /*
 203  * Adjust the priority chain. Also used for deadlock detection.
 204  * Decreases task's usage by one - may thus free the task.
 205  *
 206  * @task:       the task owning the mutex (owner) for which a chain walk is
 207  *              probably needed
 208  * @deadlock_detect: do we have to carry out deadlock detection?
 209  * @orig_lock:  the mutex (can be NULL if we are walking the chain to recheck
 210  *              things for a task that has just got its priority adjusted, and
 211  *              is waiting on a mutex)
 212  * @next_lock:  the mutex on which the owner of @orig_lock was blocked before
 213  *              we dropped its pi_lock. Is never dereferenced, only used for
 214  *              comparison to detect lock chain changes.
 215  * @orig_waiter: rt_mutex_waiter struct for the task that has just donated
 216  *              its priority to the mutex owner (can be NULL in the case
 217  *              depicted above or if the top waiter is gone away and we are
 218  *              actually deboosting the owner)
 219  * @top_task:   the current top waiter
 220  *
 221  * Returns 0 or -EDEADLK.
 222  */
 223 static int rt_mutex_adjust_prio_chain(struct task_struct *task,
 224                                       int deadlock_detect,
 225                                       struct rt_mutex *orig_lock,
 226                                       struct rt_mutex *next_lock,
 227                                       struct rt_mutex_waiter *orig_waiter,
 228                                       struct task_struct *top_task)
 229 {
 230         struct rt_mutex *lock;
 231         struct rt_mutex_waiter *waiter, *top_waiter = orig_waiter;
 232         int detect_deadlock, ret = 0, depth = 0;
 233         unsigned long flags;
 234
 235         detect_deadlock = debug_rt_mutex_detect_deadlock(orig_waiter,
 236                                                          deadlock_detect);
 237
 238         /*
 239          * The (de)boosting is a step by step approach with a lot of
 240          * pitfalls. We want this to be preemptible and we want hold a
 241          * maximum of two locks per step. So we have to check
 242          * carefully whether things change under us.
 243          */
 244  again:
 245         if (++depth > max_lock_depth) {
 246                 static int prev_max;
 247
 248                 /*
 249                  * Print this only once. If the admin changes the limit,
 250                  * print a new message when reaching the limit again.
 251                  */
 252                 if (prev_max != max_lock_depth) {
 253                         prev_max = max_lock_depth;
 254                         printk(KERN_WARNING "Maximum lock depth %d reached "
 255                                "task: %s (%d)\n", max_lock_depth,
 256                                top_task->comm, task_pid_nr(top_task));
 257                 }
 258                 put_task_struct(task);
 259
 260                 return -EDEADLK;
 261         }
 262  retry:
 263         /*
 264          * Task can not go away as we did a get_task() before !
 265          */
 266         raw_spin_lock_irqsave(&task->pi_lock, flags);
 267
 268         waiter = task->pi_blocked_on;
 269         /*
 270          * Check whether the end of the boosting chain has been
 271          * reached or the state of the chain has changed while we
 272          * dropped the locks.
 273          */
 274         if (!waiter)
 275                 goto out_unlock_pi;
 276
 277         /*
 278          * Check the orig_waiter state. After we dropped the locks,
 279          * the previous owner of the lock might have released the lock.
 280          */
 281         if (orig_waiter && !rt_mutex_owner(orig_lock))
 282                 goto out_unlock_pi;
 283
 284         /*
 285          * We dropped all locks after taking a refcount on @task, so
 286          * the task might have moved on in the lock chain or even left
 287          * the chain completely and blocks now on an unrelated lock or
 288          * on @orig_lock.
 289          *
 290          * We stored the lock on which @task was blocked in @next_lock,
 291          * so we can detect the chain change.
 292          */
 293         if (next_lock != waiter->lock)
 294                 goto out_unlock_pi;
 295
 296         /*
 297          * Drop out, when the task has no waiters. Note,
 298          * top_waiter can be NULL, when we are in the deboosting
 299          * mode!
 300          */
 301         if (top_waiter) {
 302                 if (!task_has_pi_waiters(task))
 303                         goto out_unlock_pi;
 304                 /*
 305                  * If deadlock detection is off, we stop here if we
 306                  * are not the top pi waiter of the task.
 307                  */
 308                 if (!detect_deadlock && top_waiter != task_top_pi_waiter(task))
 309                         goto out_unlock_pi;
 310         }
 311
 312         /*
 313          * When deadlock detection is off then we check, if further
 314          * priority adjustment is necessary.
 315          */
 316         if (!detect_deadlock && waiter->list_entry.prio == task->prio)
 317                 goto out_unlock_pi;
 318
 319         lock = waiter->lock;
 320         if (!raw_spin_trylock(&lock->wait_lock)) {
 321                 raw_spin_unlock_irqrestore(&task->pi_lock, flags);
 322                 cpu_relax();
 323                 goto retry;
 324         }
 325
 326         /*
 327          * Deadlock detection. If the lock is the same as the original
 328          * lock which caused us to walk the lock chain or if the
 329          * current lock is owned by the task which initiated the chain
 330          * walk, we detected a deadlock.
 331          */
 332         if (lock == orig_lock || rt_mutex_owner(lock) == top_task) {
 333                 debug_rt_mutex_deadlock(deadlock_detect, orig_waiter, lock);
 334                 raw_spin_unlock(&lock->wait_lock);
 335                 ret = -EDEADLK;
 336                 goto out_unlock_pi;
 337         }
 338
 339         top_waiter = rt_mutex_top_waiter(lock);
 340
 341         /* Requeue the waiter */
 342         plist_del(&waiter->list_entry, &lock->wait_list);
 343         waiter->list_entry.prio = task->prio;
 344         plist_add(&waiter->list_entry, &lock->wait_list);
 345
 346         /* Release the task */
 347         raw_spin_unlock_irqrestore(&task->pi_lock, flags);
 348         if (!rt_mutex_owner(lock)) {
 349                 /*
 350                  * If the requeue above changed the top waiter, then we need
 351                  * to wake the new top waiter up to try to get the lock.
 352                  */
 353
 354                 if (top_waiter != rt_mutex_top_waiter(lock))
 355                         wake_up_process(rt_mutex_top_waiter(lock)->task);
 356                 raw_spin_unlock(&lock->wait_lock);
 357                 goto out_put_task;
 358         }
 359         put_task_struct(task);
 360
 361         /* Grab the next task */
 362         task = rt_mutex_owner(lock);
 363         get_task_struct(task);
 364         raw_spin_lock_irqsave(&task->pi_lock, flags);
 365
 366         if (waiter == rt_mutex_top_waiter(lock)) {
 367                 /* Boost the owner */
 368                 plist_del(&top_waiter->pi_list_entry, &task->pi_waiters);
 369                 waiter->pi_list_entry.prio = waiter->list_entry.prio;
 370                 plist_add(&waiter->pi_list_entry, &task->pi_waiters);
 371                 __rt_mutex_adjust_prio(task);
 372
 373         } else if (top_waiter == waiter) {
 374                 /* Deboost the owner */
 375                 plist_del(&waiter->pi_list_entry, &task->pi_waiters);
 376                 waiter = rt_mutex_top_waiter(lock);
 377                 waiter->pi_list_entry.prio = waiter->list_entry.prio;
 378                 plist_add(&waiter->pi_list_entry, &task->pi_waiters);
 379                 __rt_mutex_adjust_prio(task);
 380         }
 381
 382         /*
 383          * Check whether the task which owns the current lock is pi
 384          * blocked itself. If yes we store a pointer to the lock for
 385          * the lock chain change detection above. After we dropped
 386          * task->pi_lock next_lock cannot be dereferenced anymore.
 387          */
 388         next_lock = task_blocked_on_lock(task);
 389
 390         raw_spin_unlock_irqrestore(&task->pi_lock, flags);
 391
 392         top_waiter = rt_mutex_top_waiter(lock);
 393         raw_spin_unlock(&lock->wait_lock);
 394
 395         /*
 396          * We reached the end of the lock chain. Stop right here. No
 397          * point to go back just to figure that out.
 398          */
 399         if (!next_lock)
 400                 goto out_put_task;
 401
 402         if (!detect_deadlock && waiter != top_waiter)
 403                 goto out_put_task;
 404
 405         goto again;
 406
 407  out_unlock_pi:
 408         raw_spin_unlock_irqrestore(&task->pi_lock, flags);
 409  out_put_task:
 410         put_task_struct(task);
 411
 412         return ret;
 413 }
 414
 415 /*
 416  * Try to take an rt-mutex
 417  *
 418  * Must be called with lock->wait_lock held.
 419  *
 420  * @lock:   the lock to be acquired.
 421  * @task:   the task which wants to acquire the lock
 422  * @waiter: the waiter that is queued to the lock's wait list. (could be NULL)
 423  */
 424 static int try_to_take_rt_mutex(struct rt_mutex *lock, struct task_struct *task,
 425                 struct rt_mutex_waiter *waiter)
 426 {
 427         /*
 428          * We have to be careful here if the atomic speedups are
 429          * enabled, such that, when
 430          *  - no other waiter is on the lock
 431          *  - the lock has been released since we did the cmpxchg
 432          * the lock can be released or taken while we are doing the
 433          * checks and marking the lock with RT_MUTEX_HAS_WAITERS.
 434          *
 435          * The atomic acquire/release aware variant of
 436          * mark_rt_mutex_waiters uses a cmpxchg loop. After setting
 437          * the WAITERS bit, the atomic release / acquire can not
 438          * happen anymore and lock->wait_lock protects us from the
 439          * non-atomic case.
 440          *
 441          * Note, that this might set lock->owner =
 442          * RT_MUTEX_HAS_WAITERS in the case the lock is not contended
 443          * any more. This is fixed up when we take the ownership.
 444          * This is the transitional state explained at the top of this file.
 445          */
 446         mark_rt_mutex_waiters(lock);
 447
 448         if (rt_mutex_owner(lock))
 449                 return 0;
 450
 451         /*
 452          * It will get the lock because of one of these conditions:
 453          * 1) there is no waiter
 454          * 2) higher priority than waiters
 455          * 3) it is top waiter
 456          */
 457         if (rt_mutex_has_waiters(lock)) {
 458                 if (task->prio >= rt_mutex_top_waiter(lock)->list_entry.prio) {
 459                         if (!waiter || waiter != rt_mutex_top_waiter(lock))
 460                                 return 0;
 461                 }
 462         }
 463
 464         if (waiter || rt_mutex_has_waiters(lock)) {
 465                 unsigned long flags;
 466                 struct rt_mutex_waiter *top;
 467
 468                 raw_spin_lock_irqsave(&task->pi_lock, flags);
 469
 470                 /* remove the queued waiter. */
 471                 if (waiter) {
 472                         plist_del(&waiter->list_entry, &lock->wait_list);
 473                         task->pi_blocked_on = NULL;
 474                 }
 475
 476                 /*
 477                  * We have to enqueue the top waiter(if it exists) into
 478                  * task->pi_waiters list.
 479                  */
 480                 if (rt_mutex_has_waiters(lock)) {
 481                         top = rt_mutex_top_waiter(lock);
 482                         top->pi_list_entry.prio = top->list_entry.prio;
 483                         plist_add(&top->pi_list_entry, &task->pi_waiters);
 484                 }
 485                 raw_spin_unlock_irqrestore(&task->pi_lock, flags);
 486         }
 487
 488         /* We got the lock. */
 489         debug_rt_mutex_lock(lock);
 490
 491         rt_mutex_set_owner(lock, task);
 492
 493         rt_mutex_deadlock_account_lock(lock, task);
 494
 495         return 1;
 496 }
 497
 498 /*
 499  * Task blocks on lock.
 500  *
 501  * Prepare waiter and propagate pi chain
 502  *
 503  * This must be called with lock->wait_lock held.
 504  */
 505 static int task_blocks_on_rt_mutex(struct rt_mutex *lock,
 506                                    struct rt_mutex_waiter *waiter,
 507                                    struct task_struct *task,
 508                                    int detect_deadlock)
 509 {
 510         struct task_struct *owner = rt_mutex_owner(lock);
 511         struct rt_mutex_waiter *top_waiter = waiter;
 512         struct rt_mutex *next_lock;
 513         int chain_walk = 0, res;
 514         unsigned long flags;
 515
 516         /*
 517          * Early deadlock detection. We really don't want the task to
 518          * enqueue on itself just to untangle the mess later. It's not
 519          * only an optimization. We drop the locks, so another waiter
 520          * can come in before the chain walk detects the deadlock. So
 521          * the other will detect the deadlock and return -EDEADLOCK,
 522          * which is wrong, as the other waiter is not in a deadlock
 523          * situation.
 524          */
 525         if (owner == task)
 526                 return -EDEADLK;
 527
 528         raw_spin_lock_irqsave(&task->pi_lock, flags);
 529         __rt_mutex_adjust_prio(task);
 530         waiter->task = task;
 531         waiter->lock = lock;
 532         plist_node_init(&waiter->list_entry, task->prio);
 533         plist_node_init(&waiter->pi_list_entry, task->prio);
 534
 535         /* Get the top priority waiter on the lock */
 536         if (rt_mutex_has_waiters(lock))
 537                 top_waiter = rt_mutex_top_waiter(lock);
 538         plist_add(&waiter->list_entry, &lock->wait_list);
 539
 540         task->pi_blocked_on = waiter;
 541
 542         raw_spin_unlock_irqrestore(&task->pi_lock, flags);
 543
 544         if (!owner)
 545                 return 0;
 546
 547         raw_spin_lock_irqsave(&owner->pi_lock, flags);
 548         if (waiter == rt_mutex_top_waiter(lock)) {
 549                 plist_del(&top_waiter->pi_list_entry, &owner->pi_waiters);
 550                 plist_add(&waiter->pi_list_entry, &owner->pi_waiters);
 551
 552                 __rt_mutex_adjust_prio(owner);
 553                 if (owner->pi_blocked_on)
 554                         chain_walk = 1;
 555         } else if (debug_rt_mutex_detect_deadlock(waiter, detect_deadlock)) {
 556                 chain_walk = 1;
 557         }
 558
 559         /* Store the lock on which owner is blocked or NULL */
 560         next_lock = task_blocked_on_lock(owner);
 561
 562         raw_spin_unlock_irqrestore(&owner->pi_lock, flags);
 563         /*
 564          * Even if full deadlock detection is on, if the owner is not
 565          * blocked itself, we can avoid finding this out in the chain
 566          * walk.
 567          */
 568         if (!chain_walk || !next_lock)
 569                 return 0;
 570
 571         /*
 572          * The owner can't disappear while holding a lock,
 573          * so the owner struct is protected by wait_lock.
 574          * Gets dropped in rt_mutex_adjust_prio_chain()!
 575          */
 576         get_task_struct(owner);
 577
 578         raw_spin_unlock(&lock->wait_lock);
 579
 580         res = rt_mutex_adjust_prio_chain(owner, detect_deadlock, lock,
 581                                          next_lock, waiter, task);
 582
 583         raw_spin_lock(&lock->wait_lock);
 584
 585         return res;
 586 }
 587
 588 /*
 589  * Wake up the next waiter on the lock.
 590  *
 591  * Remove the top waiter from the current tasks pi waiter list and
 592  * wake it up.
 593  *
 594  * Called with lock->wait_lock held.
 595  */
 596 static void wakeup_next_waiter(struct rt_mutex *lock)
 597 {
 598         struct rt_mutex_waiter *waiter;
 599         unsigned long flags;
 600
 601         raw_spin_lock_irqsave(&current->pi_lock, flags);
 602
 603         waiter = rt_mutex_top_waiter(lock);
 604
 605         /*
 606          * Remove it from current->pi_waiters. We do not adjust a
 607          * possible priority boost right now. We execute wakeup in the
 608          * boosted mode and go back to normal after releasing
 609          * lock->wait_lock.
 610          */
 611         plist_del(&waiter->pi_list_entry, &current->pi_waiters);
 612
 613         /*
 614          * As we are waking up the top waiter, and the waiter stays
 615          * queued on the lock until it gets the lock, this lock
 616          * obviously has waiters. Just set the bit here and this has
 617          * the added benefit of forcing all new tasks into the
 618          * slow path making sure no task of lower priority than
 619          * the top waiter can steal this lock.
 620          */
 621         lock->owner = (void *) RT_MUTEX_HAS_WAITERS;
 622
 623         raw_spin_unlock_irqrestore(&current->pi_lock, flags);
 624
 625         /*
 626          * It's safe to dereference waiter as it cannot go away as
 627          * long as we hold lock->wait_lock. The waiter task needs to
 628          * acquire it in order to dequeue the waiter.
 629          */
 630         wake_up_process(waiter->task);
 631 }
 632
 633 /*
 634  * Remove a waiter from a lock and give up
 635  *
 636  * Must be called with lock->wait_lock held and
 637  * have just failed to try_to_take_rt_mutex().
 638  */
 639 static void remove_waiter(struct rt_mutex *lock,
 640                           struct rt_mutex_waiter *waiter)
 641 {
 642         int first = (waiter == rt_mutex_top_waiter(lock));
 643         struct task_struct *owner = rt_mutex_owner(lock);
 644         struct rt_mutex *next_lock = NULL;
 645         unsigned long flags;
 646
 647         raw_spin_lock_irqsave(&current->pi_lock, flags);
 648         plist_del(&waiter->list_entry, &lock->wait_list);
 649         current->pi_blocked_on = NULL;
 650         raw_spin_unlock_irqrestore(&current->pi_lock, flags);
 651
 652         if (!owner)
 653                 return;
 654
 655         if (first) {
 656
 657                 raw_spin_lock_irqsave(&owner->pi_lock, flags);
 658
 659                 plist_del(&waiter->pi_list_entry, &owner->pi_waiters);
 660
 661                 if (rt_mutex_has_waiters(lock)) {
 662                         struct rt_mutex_waiter *next;
 663
 664                         next = rt_mutex_top_waiter(lock);
 665                         plist_add(&next->pi_list_entry, &owner->pi_waiters);
 666                 }
 667                 __rt_mutex_adjust_prio(owner);
 668
 669                 /* Store the lock on which owner is blocked or NULL */
 670                 next_lock = task_blocked_on_lock(owner);
 671
 672                 raw_spin_unlock_irqrestore(&owner->pi_lock, flags);
 673         }
 674
 675         WARN_ON(!plist_node_empty(&waiter->pi_list_entry));
 676
 677         if (!next_lock)
 678                 return;
 679
 680         /* gets dropped in rt_mutex_adjust_prio_chain()! */
 681         get_task_struct(owner);
 682
 683         raw_spin_unlock(&lock->wait_lock);
 684
 685         rt_mutex_adjust_prio_chain(owner, 0, lock, next_lock, NULL, current);
 686
 687         raw_spin_lock(&lock->wait_lock);
 688 }
 689
 690 /*
 691  * Recheck the pi chain, in case we got a priority setting
 692  *
 693  * Called from sched_setscheduler
 694  */
 695 void rt_mutex_adjust_pi(struct task_struct *task)
 696 {
 697         struct rt_mutex_waiter *waiter;
 698         struct rt_mutex *next_lock;
 699         unsigned long flags;
 700
 701         raw_spin_lock_irqsave(&task->pi_lock, flags);
 702
 703         waiter = task->pi_blocked_on;
 704         if (!waiter || waiter->list_entry.prio == task->prio) {
 705                 raw_spin_unlock_irqrestore(&task->pi_lock, flags);
 706                 return;
 707         }
 708         next_lock = waiter->lock;
 709         raw_spin_unlock_irqrestore(&task->pi_lock, flags);
 710
 711         /* gets dropped in rt_mutex_adjust_prio_chain()! */
 712         get_task_struct(task);
 713
 714         rt_mutex_adjust_prio_chain(task, 0, NULL, next_lock, NULL, task);
 715 }
 716
 717 /**
 718  * __rt_mutex_slowlock() - Perform the wait-wake-try-to-take loop
 719  * @lock:                the rt_mutex to take
 720  * @state:               the state the task should block in (TASK_INTERRUPTIBLE
 721  *                       or TASK_UNINTERRUPTIBLE)
 722  * @timeout:             the pre-initialized and started timer, or NULL for none
 723  * @waiter:              the pre-initialized rt_mutex_waiter
 724  *
 725  * lock->wait_lock must be held by the caller.
 726  */
 727 static int __sched
 728 __rt_mutex_slowlock(struct rt_mutex *lock, int state,
 729                     struct hrtimer_sleeper *timeout,
 730                     struct rt_mutex_waiter *waiter)
 731 {
 732         int ret = 0;
 733
 734         for (;;) {
 735                 /* Try to acquire the lock: */
 736                 if (try_to_take_rt_mutex(lock, current, waiter))
 737                         break;
 738
 739                 /*
 740                  * TASK_INTERRUPTIBLE checks for signals and
 741                  * timeout. Ignored otherwise.
 742                  */
 743                 if (unlikely(state == TASK_INTERRUPTIBLE)) {
 744                         /* Signal pending? */
 745                         if (signal_pending(current))
 746                                 ret = -EINTR;
 747                         if (timeout && !timeout->task)
 748                                 ret = -ETIMEDOUT;
 749                         if (ret)
 750                                 break;
 751                 }
 752
 753                 raw_spin_unlock(&lock->wait_lock);
 754
 755                 debug_rt_mutex_print_deadlock(waiter);
 756
 757                 schedule_rt_mutex(lock);
 758
 759                 raw_spin_lock(&lock->wait_lock);
 760                 set_current_state(state);
 761         }
 762
 763         return ret;
 764 }
 765
 766 static void rt_mutex_handle_deadlock(int res, int detect_deadlock,
 767                                      struct rt_mutex_waiter *w)
 768 {
 769         /*
 770          * If the result is not -EDEADLOCK or the caller requested
 771          * deadlock detection, nothing to do here.
 772          */
 773         if (res != -EDEADLOCK || detect_deadlock)
 774                 return;
 775
 776         /*
 777          * Yell lowdly and stop the task right here.
 778          */
 779         rt_mutex_print_deadlock(w);
 780         while (1) {
 781                 set_current_state(TASK_INTERRUPTIBLE);
 782                 schedule();
 783         }
 784 }
 785
 786 /*
 787  * Slow path lock function:
 788  */
 789 static int __sched
 790 rt_mutex_slowlock(struct rt_mutex *lock, int state,
 791                   struct hrtimer_sleeper *timeout,
 792                   int detect_deadlock)
 793 {
 794         struct rt_mutex_waiter waiter;
 795         int ret = 0;
 796
 797         debug_rt_mutex_init_waiter(&waiter);
 798
 799         raw_spin_lock(&lock->wait_lock);
 800
 801         /* Try to acquire the lock again: */
 802         if (try_to_take_rt_mutex(lock, current, NULL)) {
 803                 raw_spin_unlock(&lock->wait_lock);
 804                 return 0;
 805         }
 806
 807         set_current_state(state);
 808
 809         /* Setup the timer, when timeout != NULL */
 810         if (unlikely(timeout)) {
 811                 hrtimer_start_expires(&timeout->timer, HRTIMER_MODE_ABS);
 812                 if (!hrtimer_active(&timeout->timer))
 813                         timeout->task = NULL;
 814         }
 815
 816         ret = task_blocks_on_rt_mutex(lock, &waiter, current, detect_deadlock);
 817
 818         if (likely(!ret))
 819                 ret = __rt_mutex_slowlock(lock, state, timeout, &waiter);
 820
 821         set_current_state(TASK_RUNNING);
 822
 823         if (unlikely(ret)) {
 824                 remove_waiter(lock, &waiter);
 825                 rt_mutex_handle_deadlock(ret, detect_deadlock, &waiter);
 826         }
 827
 828         /*
 829          * try_to_take_rt_mutex() sets the waiter bit
 830          * unconditionally. We might have to fix that up.
 831          */
 832         fixup_rt_mutex_waiters(lock);
 833
 834         raw_spin_unlock(&lock->wait_lock);
 835
 836         /* Remove pending timer: */
 837         if (unlikely(timeout))
 838                 hrtimer_cancel(&timeout->timer);
 839
 840         debug_rt_mutex_free_waiter(&waiter);
 841
 842         return ret;
 843 }
 844
 845 /*
 846  * Slow path try-lock function:
 847  */
 848 static inline int
 849 rt_mutex_slowtrylock(struct rt_mutex *lock)
 850 {
 851         int ret = 0;
 852
 853         raw_spin_lock(&lock->wait_lock);
 854
 855         if (likely(rt_mutex_owner(lock) != current)) {
 856
 857                 ret = try_to_take_rt_mutex(lock, current, NULL);
 858                 /*
 859                  * try_to_take_rt_mutex() sets the lock waiters
 860                  * bit unconditionally. Clean this up.
 861                  */
 862                 fixup_rt_mutex_waiters(lock);
 863         }
 864
 865         raw_spin_unlock(&lock->wait_lock);
 866
 867         return ret;
 868 }
 869
 870 /*
 871  * Slow path to release a rt-mutex:
 872  */
 873 static void __sched
 874 rt_mutex_slowunlock(struct rt_mutex *lock)
 875 {
 876         raw_spin_lock(&lock->wait_lock);
 877
 878         debug_rt_mutex_unlock(lock);
 879
 880         rt_mutex_deadlock_account_unlock(current);
 881
 882         /*
 883          * We must be careful here if the fast path is enabled. If we
 884          * have no waiters queued we cannot set owner to NULL here
 885          * because of:
 886          *
 887          * foo->lock->owner = NULL;
 888          *                      rtmutex_lock(foo->lock);   <- fast path
 889          *                      free = atomic_dec_and_test(foo->refcnt);
 890          *                      rtmutex_unlock(foo->lock); <- fast path
 891          *                      if (free)
 892          *                              kfree(foo);
 893          * raw_spin_unlock(foo->lock->wait_lock);
 894          *
 895          * So for the fastpath enabled kernel:
 896          *
 897          * Nothing can set the waiters bit as long as we hold
 898          * lock->wait_lock. So we do the following sequence:
 899          *
 900          *      owner = rt_mutex_owner(lock);
 901          *      clear_rt_mutex_waiters(lock);
 902          *      raw_spin_unlock(&lock->wait_lock);
 903          *      if (cmpxchg(&lock->owner, owner, 0) == owner)
 904          *              return;
 905          *      goto retry;
 906          *
 907          * The fastpath disabled variant is simple as all access to
 908          * lock->owner is serialized by lock->wait_lock:
 909          *
 910          *      lock->owner = NULL;
 911          *      raw_spin_unlock(&lock->wait_lock);
 912          */
 913         while (!rt_mutex_has_waiters(lock)) {
 914                 /* Drops lock->wait_lock ! */
 915                 if (unlock_rt_mutex_safe(lock) == true)
 916                         return;
 917                 /* Relock the rtmutex and try again */
 918                 raw_spin_lock(&lock->wait_lock);
 919         }
 920
 921         /*
 922          * The wakeup next waiter path does not suffer from the above
 923          * race. See the comments there.
 924          */
 925         wakeup_next_waiter(lock);
 926
 927         raw_spin_unlock(&lock->wait_lock);
 928
 929         /* Undo pi boosting if necessary: */
 930         rt_mutex_adjust_prio(current);
 931 }
 932
 933 /*
 934  * debug aware fast / slowpath lock,trylock,unlock
 935  *
 936  * The atomic acquire/release ops are compiled away, when either the
 937  * architecture does not support cmpxchg or when debugging is enabled.
 938  */
 939 static inline int
 940 rt_mutex_fastlock(struct rt_mutex *lock, int state,
 941                   int detect_deadlock,
 942                   int (*slowfn)(struct rt_mutex *lock, int state,
 943                                 struct hrtimer_sleeper *timeout,
 944                                 int detect_deadlock))
 945 {
 946         if (!detect_deadlock && likely(rt_mutex_cmpxchg(lock, NULL, current))) {
 947                 rt_mutex_deadlock_account_lock(lock, current);
 948                 return 0;
 949         } else
 950                 return slowfn(lock, state, NULL, detect_deadlock);
 951 }
 952
 953 static inline int
 954 rt_mutex_timed_fastlock(struct rt_mutex *lock, int state,
 955                         struct hrtimer_sleeper *timeout, int detect_deadlock,
 956                         int (*slowfn)(struct rt_mutex *lock, int state,
 957                                       struct hrtimer_sleeper *timeout,
 958                                       int detect_deadlock))
 959 {
 960         if (!detect_deadlock && likely(rt_mutex_cmpxchg(lock, NULL, current))) {
 961                 rt_mutex_deadlock_account_lock(lock, current);
 962                 return 0;
 963         } else
 964                 return slowfn(lock, state, timeout, detect_deadlock);
 965 }
 966
 967 static inline int
 968 rt_mutex_fasttrylock(struct rt_mutex *lock,
 969                      int (*slowfn)(struct rt_mutex *lock))
 970 {
 971         if (likely(rt_mutex_cmpxchg(lock, NULL, current))) {
 972                 rt_mutex_deadlock_account_lock(lock, current);
 973                 return 1;
 974         }
 975         return slowfn(lock);
 976 }
 977
 978 static inline void
 979 rt_mutex_fastunlock(struct rt_mutex *lock,
 980                     void (*slowfn)(struct rt_mutex *lock))
 981 {
 982         if (likely(rt_mutex_cmpxchg(lock, current, NULL)))
 983                 rt_mutex_deadlock_account_unlock(current);
 984         else
 985                 slowfn(lock);
 986 }
 987
 988 /**
 989  * rt_mutex_lock - lock a rt_mutex
 990  *
 991  * @lock: the rt_mutex to be locked
 992  */
 993 void __sched rt_mutex_lock(struct rt_mutex *lock)
 994 {
 995         might_sleep();
 996
 997         rt_mutex_fastlock(lock, TASK_UNINTERRUPTIBLE, 0, rt_mutex_slowlock);
 998 }
 999 EXPORT_SYMBOL_GPL(rt_mutex_lock);
1000
1001 /**
1002  * rt_mutex_lock_interruptible - lock a rt_mutex interruptible
1003  *
1004  * @lock:               the rt_mutex to be locked
1005  * @detect_deadlock:    deadlock detection on/off
1006  *
1007  * Returns:
1008  *  0           on success
1009  * -EINTR       when interrupted by a signal
1010  * -EDEADLK     when the lock would deadlock (when deadlock detection is on)
1011  */
1012 int __sched rt_mutex_lock_interruptible(struct rt_mutex *lock,
1013                                                  int detect_deadlock)
1014 {
1015         might_sleep();
1016
1017         return rt_mutex_fastlock(lock, TASK_INTERRUPTIBLE,
1018                                  detect_deadlock, rt_mutex_slowlock);
1019 }
1020 EXPORT_SYMBOL_GPL(rt_mutex_lock_interruptible);
1021
1022 /**
1023  * rt_mutex_timed_lock - lock a rt_mutex interruptible
1024  *                      the timeout structure is provided
1025  *                      by the caller
1026  *
1027  * @lock:               the rt_mutex to be locked
1028  * @timeout:            timeout structure or NULL (no timeout)
1029  * @detect_deadlock:    deadlock detection on/off
1030  *
1031  * Returns:
1032  *  0           on success
1033  * -EINTR       when interrupted by a signal
1034  * -ETIMEDOUT   when the timeout expired
1035  * -EDEADLK     when the lock would deadlock (when deadlock detection is on)
1036  */
1037 int
1038 rt_mutex_timed_lock(struct rt_mutex *lock, struct hrtimer_sleeper *timeout,
1039                     int detect_deadlock)
1040 {
1041         might_sleep();
1042
1043         return rt_mutex_timed_fastlock(lock, TASK_INTERRUPTIBLE, timeout,
1044                                        detect_deadlock, rt_mutex_slowlock);
1045 }
1046 EXPORT_SYMBOL_GPL(rt_mutex_timed_lock);
1047
1048 /**
1049  * rt_mutex_trylock - try to lock a rt_mutex
1050  *
1051  * @lock:       the rt_mutex to be locked
1052  *
1053  * Returns 1 on success and 0 on contention
1054  */
1055 int __sched rt_mutex_trylock(struct rt_mutex *lock)
1056 {
1057         return rt_mutex_fasttrylock(lock, rt_mutex_slowtrylock);
1058 }
1059 EXPORT_SYMBOL_GPL(rt_mutex_trylock);
1060
1061 /**
1062  * rt_mutex_unlock - unlock a rt_mutex
1063  *
1064  * @lock: the rt_mutex to be unlocked
1065  */
1066 void __sched rt_mutex_unlock(struct rt_mutex *lock)
1067 {
1068         rt_mutex_fastunlock(lock, rt_mutex_slowunlock);
1069 }
1070 EXPORT_SYMBOL_GPL(rt_mutex_unlock);
1071
1072 /**
1073  * rt_mutex_destroy - mark a mutex unusable
1074  * @lock: the mutex to be destroyed
1075  *
1076  * This function marks the mutex uninitialized, and any subsequent
1077  * use of the mutex is forbidden. The mutex must not be locked when
1078  * this function is called.
1079  */
1080 void rt_mutex_destroy(struct rt_mutex *lock)
1081 {
1082         WARN_ON(rt_mutex_is_locked(lock));
1083 #ifdef CONFIG_DEBUG_RT_MUTEXES
1084         lock->magic = NULL;
1085 #endif
1086 }
1087
1088 EXPORT_SYMBOL_GPL(rt_mutex_destroy);
1089
1090 /**
1091  * __rt_mutex_init - initialize the rt lock
1092  *
1093  * @lock: the rt lock to be initialized
1094  *
1095  * Initialize the rt lock to unlocked state.
1096  *
1097  * Initializing of a locked rt lock is not allowed
1098  */
1099 void __rt_mutex_init(struct rt_mutex *lock, const char *name)
1100 {
1101         lock->owner = NULL;
1102         raw_spin_lock_init(&lock->wait_lock);
1103         plist_head_init(&lock->wait_list);
1104
1105         debug_rt_mutex_init(lock, name);
1106 }
1107 EXPORT_SYMBOL_GPL(__rt_mutex_init);
1108
1109 /**
1110  * rt_mutex_init_proxy_locked - initialize and lock a rt_mutex on behalf of a
1111  *                              proxy owner
1112  *
1113  * @lock:       the rt_mutex to be locked
1114  * @proxy_owner:the task to set as owner
1115  *
1116  * No locking. Caller has to do serializing itself
1117  * Special API call for PI-futex support
1118  */
1119 void rt_mutex_init_proxy_locked(struct rt_mutex *lock,
1120                                 struct task_struct *proxy_owner)
1121 {
1122         __rt_mutex_init(lock, NULL);
1123         debug_rt_mutex_proxy_lock(lock, proxy_owner);
1124         rt_mutex_set_owner(lock, proxy_owner);
1125         rt_mutex_deadlock_account_lock(lock, proxy_owner);
1126 }
1127
1128 /**
1129  * rt_mutex_proxy_unlock - release a lock on behalf of owner
1130  *
1131  * @lock:       the rt_mutex to be locked
1132  *
1133  * No locking. Caller has to do serializing itself
1134  * Special API call for PI-futex support
1135  */
1136 void rt_mutex_proxy_unlock(struct rt_mutex *lock,
1137                            struct task_struct *proxy_owner)
1138 {
1139         debug_rt_mutex_proxy_unlock(lock);
1140         rt_mutex_set_owner(lock, NULL);
1141         rt_mutex_deadlock_account_unlock(proxy_owner);
1142 }
1143
1144 /**
1145  * rt_mutex_start_proxy_lock() - Start lock acquisition for another task
1146  * @lock:               the rt_mutex to take
1147  * @waiter:             the pre-initialized rt_mutex_waiter
1148  * @task:               the task to prepare
1149  * @detect_deadlock:    perform deadlock detection (1) or not (0)
1150  *
1151  * Returns:
1152  *  0 - task blocked on lock
1153  *  1 - acquired the lock for task, caller should wake it up
1154  * <0 - error
1155  *
1156  * Special API call for FUTEX_REQUEUE_PI support.
1157  */
1158 int rt_mutex_start_proxy_lock(struct rt_mutex *lock,
1159                               struct rt_mutex_waiter *waiter,
1160                               struct task_struct *task, int detect_deadlock)
1161 {
1162         int ret;
1163
1164         raw_spin_lock(&lock->wait_lock);
1165
1166         if (try_to_take_rt_mutex(lock, task, NULL)) {
1167                 raw_spin_unlock(&lock->wait_lock);
1168                 return 1;
1169         }
1170
1171         /* We enforce deadlock detection for futexes */
1172         ret = task_blocks_on_rt_mutex(lock, waiter, task, 1);
1173
1174         if (ret && !rt_mutex_owner(lock)) {
1175                 /*
1176                  * Reset the return value. We might have
1177                  * returned with -EDEADLK and the owner
1178                  * released the lock while we were walking the
1179                  * pi chain.  Let the waiter sort it out.
1180                  */
1181                 ret = 0;
1182         }
1183
1184         if (unlikely(ret))
1185                 remove_waiter(lock, waiter);
1186
1187         raw_spin_unlock(&lock->wait_lock);
1188
1189         debug_rt_mutex_print_deadlock(waiter);
1190
1191         return ret;
1192 }
1193
1194 /**
1195  * rt_mutex_next_owner - return the next owner of the lock
1196  *
1197  * @lock: the rt lock query
1198  *
1199  * Returns the next owner of the lock or NULL
1200  *
1201  * Caller has to serialize against other accessors to the lock
1202  * itself.
1203  *
1204  * Special API call for PI-futex support
1205  */
1206 struct task_struct *rt_mutex_next_owner(struct rt_mutex *lock)
1207 {
1208         if (!rt_mutex_has_waiters(lock))
1209                 return NULL;
1210
1211         return rt_mutex_top_waiter(lock)->task;
1212 }
1213
1214 /**
1215  * rt_mutex_finish_proxy_lock() - Complete lock acquisition
1216  * @lock:               the rt_mutex we were woken on
1217  * @to:                 the timeout, null if none. hrtimer should already have
1218  *                      been started.
1219  * @waiter:             the pre-initialized rt_mutex_waiter
1220  * @detect_deadlock:    perform deadlock detection (1) or not (0)
1221  *
1222  * Complete the lock acquisition started our behalf by another thread.
1223  *
1224  * Returns:
1225  *  0 - success
1226  * <0 - error, one of -EINTR, -ETIMEDOUT, or -EDEADLK
1227  *
1228  * Special API call for PI-futex requeue support
1229  */
1230 int rt_mutex_finish_proxy_lock(struct rt_mutex *lock,
1231                                struct hrtimer_sleeper *to,
1232                                struct rt_mutex_waiter *waiter,
1233                                int detect_deadlock)
1234 {
1235         int ret;
1236
1237         raw_spin_lock(&lock->wait_lock);
1238
1239         set_current_state(TASK_INTERRUPTIBLE);
1240
1241         ret = __rt_mutex_slowlock(lock, TASK_INTERRUPTIBLE, to, waiter);
1242
1243         set_current_state(TASK_RUNNING);
1244
1245         if (unlikely(ret))
1246                 remove_waiter(lock, waiter);
1247
1248         /*
1249          * try_to_take_rt_mutex() sets the waiter bit unconditionally. We might
1250          * have to fix that up.
1251          */
1252         fixup_rt_mutex_waiters(lock);
1253
1254         raw_spin_unlock(&lock->wait_lock);
1255
1256         return ret;
1257 }