module/os/linux/spl/spl-condvar.c

   1 /*
   2  *  Copyright (C) 2007-2010 Lawrence Livermore National Security, LLC.
   3  *  Copyright (C) 2007 The Regents of the University of California.
   4  *  Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
   5  *  Written by Brian Behlendorf <behlendorf1@llnl.gov>.
   6  *  UCRL-CODE-235197
   7  *
   8  *  This file is part of the SPL, Solaris Porting Layer.
   9  *
  10  *  The SPL is free software; you can redistribute it and/or modify it
  11  *  under the terms of the GNU General Public License as published by the
  12  *  Free Software Foundation; either version 2 of the License, or (at your
  13  *  option) any later version.
  14  *
  15  *  The SPL is distributed in the hope that it will be useful, but WITHOUT
  16  *  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  17  *  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  18  *  for more details.
  19  *
  20  *  You should have received a copy of the GNU General Public License along
  21  *  with the SPL.  If not, see <http://www.gnu.org/licenses/>.
  22  *
  23  *  Solaris Porting Layer (SPL) Credential Implementation.
  24  */
  25
  26 #include <sys/condvar.h>
  27 #include <sys/time.h>
  28 #include <sys/sysmacros.h>
  29 #include <linux/hrtimer.h>
  30 #include <linux/compiler_compat.h>
  31 #include <linux/mod_compat.h>
  32
  33 #include <linux/sched.h>
  34
  35 #ifdef HAVE_SCHED_SIGNAL_HEADER
  36 #include <linux/sched/signal.h>
  37 #endif
  38
  39 #define MAX_HRTIMEOUT_SLACK_US  1000
  40 unsigned int spl_schedule_hrtimeout_slack_us = 0;
  41
  42 static int
  43 param_set_hrtimeout_slack(const char *buf, zfs_kernel_param_t *kp)
  44 {
  45         unsigned long val;
  46         int error;
  47
  48         error = kstrtoul(buf, 0, &val);
  49         if (error)
  50                 return (error);
  51
  52         if (val > MAX_HRTIMEOUT_SLACK_US)
  53                 return (-EINVAL);
  54
  55         error = param_set_uint(buf, kp);
  56         if (error < 0)
  57                 return (error);
  58
  59         return (0);
  60 }
  61
  62 module_param_call(spl_schedule_hrtimeout_slack_us, param_set_hrtimeout_slack,
  63         param_get_uint, &spl_schedule_hrtimeout_slack_us, 0644);
  64 MODULE_PARM_DESC(spl_schedule_hrtimeout_slack_us,
  65         "schedule_hrtimeout_range() delta/slack value in us, default(0)");
  66
  67 void
  68 __cv_init(kcondvar_t *cvp, char *name, kcv_type_t type, void *arg)
  69 {
  70         ASSERT(cvp);
  71         ASSERT(name == NULL);
  72         ASSERT(type == CV_DEFAULT);
  73         ASSERT(arg == NULL);
  74
  75         cvp->cv_magic = CV_MAGIC;
  76         init_waitqueue_head(&cvp->cv_event);
  77         init_waitqueue_head(&cvp->cv_destroy);
  78         atomic_set(&cvp->cv_waiters, 0);
  79         atomic_set(&cvp->cv_refs, 1);
  80         cvp->cv_mutex = NULL;
  81 }
  82 EXPORT_SYMBOL(__cv_init);
  83
  84 static int
  85 cv_destroy_wakeup(kcondvar_t *cvp)
  86 {
  87         if (!atomic_read(&cvp->cv_waiters) && !atomic_read(&cvp->cv_refs)) {
  88                 ASSERT(cvp->cv_mutex == NULL);
  89                 ASSERT(!waitqueue_active(&cvp->cv_event));
  90                 return (1);
  91         }
  92
  93         return (0);
  94 }
  95
  96 void
  97 __cv_destroy(kcondvar_t *cvp)
  98 {
  99         ASSERT(cvp);
 100         ASSERT(cvp->cv_magic == CV_MAGIC);
 101
 102         cvp->cv_magic = CV_DESTROY;
 103         atomic_dec(&cvp->cv_refs);
 104
 105         /* Block until all waiters are woken and references dropped. */
 106         while (cv_destroy_wakeup(cvp) == 0)
 107                 wait_event_timeout(cvp->cv_destroy, cv_destroy_wakeup(cvp), 1);
 108
 109         ASSERT3P(cvp->cv_mutex, ==, NULL);
 110         ASSERT3S(atomic_read(&cvp->cv_refs), ==, 0);
 111         ASSERT3S(atomic_read(&cvp->cv_waiters), ==, 0);
 112         ASSERT3S(waitqueue_active(&cvp->cv_event), ==, 0);
 113 }
 114 EXPORT_SYMBOL(__cv_destroy);
 115
 116 static void
 117 cv_wait_common(kcondvar_t *cvp, kmutex_t *mp, int state, int io)
 118 {
 119         DEFINE_WAIT(wait);
 120         kmutex_t *m;
 121
 122         ASSERT(cvp);
 123         ASSERT(mp);
 124         ASSERT(cvp->cv_magic == CV_MAGIC);
 125         ASSERT(mutex_owned(mp));
 126         atomic_inc(&cvp->cv_refs);
 127
 128         m = READ_ONCE(cvp->cv_mutex);
 129         if (!m)
 130                 m = xchg(&cvp->cv_mutex, mp);
 131         /* Ensure the same mutex is used by all callers */
 132         ASSERT(m == NULL || m == mp);
 133
 134         prepare_to_wait_exclusive(&cvp->cv_event, &wait, state);
 135         atomic_inc(&cvp->cv_waiters);
 136
 137         /*
 138          * Mutex should be dropped after prepare_to_wait() this
 139          * ensures we're linked in to the waiters list and avoids the
 140          * race where 'cvp->cv_waiters > 0' but the list is empty.
 141          */
 142         mutex_exit(mp);
 143         if (io)
 144                 io_schedule();
 145         else
 146                 schedule();
 147
 148         /* No more waiters a different mutex could be used */
 149         if (atomic_dec_and_test(&cvp->cv_waiters)) {
 150                 /*
 151                  * This is set without any lock, so it's racy. But this is
 152                  * just for debug anyway, so make it best-effort
 153                  */
 154                 cvp->cv_mutex = NULL;
 155                 wake_up(&cvp->cv_destroy);
 156         }
 157
 158         finish_wait(&cvp->cv_event, &wait);
 159         atomic_dec(&cvp->cv_refs);
 160
 161         /*
 162          * Hold mutex after we release the cvp, otherwise we could dead lock
 163          * with a thread holding the mutex and call cv_destroy.
 164          */
 165         mutex_enter(mp);
 166 }
 167
 168 void
 169 __cv_wait(kcondvar_t *cvp, kmutex_t *mp)
 170 {
 171         cv_wait_common(cvp, mp, TASK_UNINTERRUPTIBLE, 0);
 172 }
 173 EXPORT_SYMBOL(__cv_wait);
 174
 175 void
 176 __cv_wait_io(kcondvar_t *cvp, kmutex_t *mp)
 177 {
 178         cv_wait_common(cvp, mp, TASK_UNINTERRUPTIBLE, 1);
 179 }
 180 EXPORT_SYMBOL(__cv_wait_io);
 181
 182 int
 183 __cv_wait_io_sig(kcondvar_t *cvp, kmutex_t *mp)
 184 {
 185         cv_wait_common(cvp, mp, TASK_INTERRUPTIBLE, 1);
 186
 187         return (signal_pending(current) ? 0 : 1);
 188 }
 189 EXPORT_SYMBOL(__cv_wait_io_sig);
 190
 191 int
 192 __cv_wait_sig(kcondvar_t *cvp, kmutex_t *mp)
 193 {
 194         cv_wait_common(cvp, mp, TASK_INTERRUPTIBLE, 0);
 195
 196         return (signal_pending(current) ? 0 : 1);
 197 }
 198 EXPORT_SYMBOL(__cv_wait_sig);
 199
 200 void
 201 __cv_wait_idle(kcondvar_t *cvp, kmutex_t *mp)
 202 {
 203         sigset_t blocked, saved;
 204
 205         sigfillset(&blocked);
 206         (void) sigprocmask(SIG_BLOCK, &blocked, &saved);
 207         cv_wait_common(cvp, mp, TASK_INTERRUPTIBLE, 0);
 208         (void) sigprocmask(SIG_SETMASK, &saved, NULL);
 209 }
 210 EXPORT_SYMBOL(__cv_wait_idle);
 211
 212 #if defined(HAVE_IO_SCHEDULE_TIMEOUT)
 213 #define spl_io_schedule_timeout(t)      io_schedule_timeout(t)
 214 #else
 215
 216 struct spl_task_timer {
 217         struct timer_list timer;
 218         struct task_struct *task;
 219 };
 220
 221 static void
 222 __cv_wakeup(spl_timer_list_t t)
 223 {
 224         struct timer_list *tmr = (struct timer_list *)t;
 225         struct spl_task_timer *task_timer = from_timer(task_timer, tmr, timer);
 226
 227         wake_up_process(task_timer->task);
 228 }
 229
 230 static long
 231 spl_io_schedule_timeout(long time_left)
 232 {
 233         long expire_time = jiffies + time_left;
 234         struct spl_task_timer task_timer;
 235         struct timer_list *timer = &task_timer.timer;
 236
 237         task_timer.task = current;
 238
 239         timer_setup(timer, __cv_wakeup, 0);
 240
 241         timer->expires = expire_time;
 242         add_timer(timer);
 243
 244         io_schedule();
 245
 246         del_timer_sync(timer);
 247
 248         time_left = expire_time - jiffies;
 249
 250         return (time_left < 0 ? 0 : time_left);
 251 }
 252 #endif
 253
 254 /*
 255  * 'expire_time' argument is an absolute wall clock time in jiffies.
 256  * Return value is time left (expire_time - now) or -1 if timeout occurred.
 257  */
 258 static clock_t
 259 __cv_timedwait_common(kcondvar_t *cvp, kmutex_t *mp, clock_t expire_time,
 260     int state, int io)
 261 {
 262         DEFINE_WAIT(wait);
 263         kmutex_t *m;
 264         clock_t time_left;
 265
 266         ASSERT(cvp);
 267         ASSERT(mp);
 268         ASSERT(cvp->cv_magic == CV_MAGIC);
 269         ASSERT(mutex_owned(mp));
 270
 271         /* XXX - Does not handle jiffie wrap properly */
 272         time_left = expire_time - jiffies;
 273         if (time_left <= 0)
 274                 return (-1);
 275
 276         atomic_inc(&cvp->cv_refs);
 277         m = READ_ONCE(cvp->cv_mutex);
 278         if (!m)
 279                 m = xchg(&cvp->cv_mutex, mp);
 280         /* Ensure the same mutex is used by all callers */
 281         ASSERT(m == NULL || m == mp);
 282
 283         prepare_to_wait_exclusive(&cvp->cv_event, &wait, state);
 284         atomic_inc(&cvp->cv_waiters);
 285
 286         /*
 287          * Mutex should be dropped after prepare_to_wait() this
 288          * ensures we're linked in to the waiters list and avoids the
 289          * race where 'cvp->cv_waiters > 0' but the list is empty.
 290          */
 291         mutex_exit(mp);
 292         if (io)
 293                 time_left = spl_io_schedule_timeout(time_left);
 294         else
 295                 time_left = schedule_timeout(time_left);
 296
 297         /* No more waiters a different mutex could be used */
 298         if (atomic_dec_and_test(&cvp->cv_waiters)) {
 299                 /*
 300                  * This is set without any lock, so it's racy. But this is
 301                  * just for debug anyway, so make it best-effort
 302                  */
 303                 cvp->cv_mutex = NULL;
 304                 wake_up(&cvp->cv_destroy);
 305         }
 306
 307         finish_wait(&cvp->cv_event, &wait);
 308         atomic_dec(&cvp->cv_refs);
 309
 310         /*
 311          * Hold mutex after we release the cvp, otherwise we could dead lock
 312          * with a thread holding the mutex and call cv_destroy.
 313          */
 314         mutex_enter(mp);
 315         return (time_left > 0 ? 1 : -1);
 316 }
 317
 318 int
 319 __cv_timedwait(kcondvar_t *cvp, kmutex_t *mp, clock_t exp_time)
 320 {
 321         return (__cv_timedwait_common(cvp, mp, exp_time,
 322             TASK_UNINTERRUPTIBLE, 0));
 323 }
 324 EXPORT_SYMBOL(__cv_timedwait);
 325
 326 int
 327 __cv_timedwait_io(kcondvar_t *cvp, kmutex_t *mp, clock_t exp_time)
 328 {
 329         return (__cv_timedwait_common(cvp, mp, exp_time,
 330             TASK_UNINTERRUPTIBLE, 1));
 331 }
 332 EXPORT_SYMBOL(__cv_timedwait_io);
 333
 334 int
 335 __cv_timedwait_sig(kcondvar_t *cvp, kmutex_t *mp, clock_t exp_time)
 336 {
 337         int rc;
 338
 339         rc = __cv_timedwait_common(cvp, mp, exp_time, TASK_INTERRUPTIBLE, 0);
 340         return (signal_pending(current) ? 0 : rc);
 341 }
 342 EXPORT_SYMBOL(__cv_timedwait_sig);
 343
 344 int
 345 __cv_timedwait_idle(kcondvar_t *cvp, kmutex_t *mp, clock_t exp_time)
 346 {
 347         sigset_t blocked, saved;
 348         int rc;
 349
 350         sigfillset(&blocked);
 351         (void) sigprocmask(SIG_BLOCK, &blocked, &saved);
 352         rc = __cv_timedwait_common(cvp, mp, exp_time,
 353             TASK_INTERRUPTIBLE, 0);
 354         (void) sigprocmask(SIG_SETMASK, &saved, NULL);
 355
 356         return (rc);
 357 }
 358 EXPORT_SYMBOL(__cv_timedwait_idle);
 359 /*
 360  * 'expire_time' argument is an absolute clock time in nanoseconds.
 361  * Return value is time left (expire_time - now) or -1 if timeout occurred.
 362  */
 363 static clock_t
 364 __cv_timedwait_hires(kcondvar_t *cvp, kmutex_t *mp, hrtime_t expire_time,
 365     hrtime_t res, int state)
 366 {
 367         DEFINE_WAIT(wait);
 368         kmutex_t *m;
 369         hrtime_t time_left;
 370         ktime_t ktime_left;
 371         u64 slack = 0;
 372         int rc;
 373
 374         ASSERT(cvp);
 375         ASSERT(mp);
 376         ASSERT(cvp->cv_magic == CV_MAGIC);
 377         ASSERT(mutex_owned(mp));
 378
 379         time_left = expire_time - gethrtime();
 380         if (time_left <= 0)
 381                 return (-1);
 382
 383         atomic_inc(&cvp->cv_refs);
 384         m = READ_ONCE(cvp->cv_mutex);
 385         if (!m)
 386                 m = xchg(&cvp->cv_mutex, mp);
 387         /* Ensure the same mutex is used by all callers */
 388         ASSERT(m == NULL || m == mp);
 389
 390         prepare_to_wait_exclusive(&cvp->cv_event, &wait, state);
 391         atomic_inc(&cvp->cv_waiters);
 392
 393         /*
 394          * Mutex should be dropped after prepare_to_wait() this
 395          * ensures we're linked in to the waiters list and avoids the
 396          * race where 'cvp->cv_waiters > 0' but the list is empty.
 397          */
 398         mutex_exit(mp);
 399
 400         ktime_left = ktime_set(0, time_left);
 401         slack = MIN(MAX(res, spl_schedule_hrtimeout_slack_us * NSEC_PER_USEC),
 402             MAX_HRTIMEOUT_SLACK_US * NSEC_PER_USEC);
 403         rc = schedule_hrtimeout_range(&ktime_left, slack, HRTIMER_MODE_REL);
 404
 405         /* No more waiters a different mutex could be used */
 406         if (atomic_dec_and_test(&cvp->cv_waiters)) {
 407                 /*
 408                  * This is set without any lock, so it's racy. But this is
 409                  * just for debug anyway, so make it best-effort
 410                  */
 411                 cvp->cv_mutex = NULL;
 412                 wake_up(&cvp->cv_destroy);
 413         }
 414
 415         finish_wait(&cvp->cv_event, &wait);
 416         atomic_dec(&cvp->cv_refs);
 417
 418         mutex_enter(mp);
 419         return (rc == -EINTR ? 1 : -1);
 420 }
 421
 422 /*
 423  * Compatibility wrapper for the cv_timedwait_hires() Illumos interface.
 424  */
 425 static int
 426 cv_timedwait_hires_common(kcondvar_t *cvp, kmutex_t *mp, hrtime_t tim,
 427     hrtime_t res, int flag, int state)
 428 {
 429         if (!(flag & CALLOUT_FLAG_ABSOLUTE))
 430                 tim += gethrtime();
 431
 432         return (__cv_timedwait_hires(cvp, mp, tim, res, state));
 433 }
 434
 435 int
 436 cv_timedwait_hires(kcondvar_t *cvp, kmutex_t *mp, hrtime_t tim, hrtime_t res,
 437     int flag)
 438 {
 439         return (cv_timedwait_hires_common(cvp, mp, tim, res, flag,
 440             TASK_UNINTERRUPTIBLE));
 441 }
 442 EXPORT_SYMBOL(cv_timedwait_hires);
 443
 444 int
 445 cv_timedwait_sig_hires(kcondvar_t *cvp, kmutex_t *mp, hrtime_t tim,
 446     hrtime_t res, int flag)
 447 {
 448         int rc;
 449
 450         rc = cv_timedwait_hires_common(cvp, mp, tim, res, flag,
 451             TASK_INTERRUPTIBLE);
 452         return (signal_pending(current) ? 0 : rc);
 453 }
 454 EXPORT_SYMBOL(cv_timedwait_sig_hires);
 455
 456 int
 457 cv_timedwait_idle_hires(kcondvar_t *cvp, kmutex_t *mp, hrtime_t tim,
 458     hrtime_t res, int flag)
 459 {
 460         sigset_t blocked, saved;
 461         int rc;
 462
 463         sigfillset(&blocked);
 464         (void) sigprocmask(SIG_BLOCK, &blocked, &saved);
 465         rc = cv_timedwait_hires_common(cvp, mp, tim, res, flag,
 466             TASK_INTERRUPTIBLE);
 467         (void) sigprocmask(SIG_SETMASK, &saved, NULL);
 468
 469         return (rc);
 470 }
 471 EXPORT_SYMBOL(cv_timedwait_idle_hires);
 472
 473 void
 474 __cv_signal(kcondvar_t *cvp)
 475 {
 476         ASSERT(cvp);
 477         ASSERT(cvp->cv_magic == CV_MAGIC);
 478         atomic_inc(&cvp->cv_refs);
 479
 480         /*
 481          * All waiters are added with WQ_FLAG_EXCLUSIVE so only one
 482          * waiter will be set runnable with each call to wake_up().
 483          * Additionally wake_up() holds a spin_lock associated with
 484          * the wait queue to ensure we don't race waking up processes.
 485          */
 486         if (atomic_read(&cvp->cv_waiters) > 0)
 487                 wake_up(&cvp->cv_event);
 488
 489         atomic_dec(&cvp->cv_refs);
 490 }
 491 EXPORT_SYMBOL(__cv_signal);
 492
 493 void
 494 __cv_broadcast(kcondvar_t *cvp)
 495 {
 496         ASSERT(cvp);
 497         ASSERT(cvp->cv_magic == CV_MAGIC);
 498         atomic_inc(&cvp->cv_refs);
 499
 500         /*
 501          * Wake_up_all() will wake up all waiters even those which
 502          * have the WQ_FLAG_EXCLUSIVE flag set.
 503          */
 504         if (atomic_read(&cvp->cv_waiters) > 0)
 505                 wake_up_all(&cvp->cv_event);
 506
 507         atomic_dec(&cvp->cv_refs);
 508 }
 509 EXPORT_SYMBOL(__cv_broadcast);