usr/src/uts/common/os/timers.c

   1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21
  22 /*
  23  * Copyright 2010 Sun Microsystems, Inc.  All rights reserved.
  24  * Use is subject to license terms.
  25  */
  26
  27 /*
  28  * Copyright (c) 1982, 1986 Regents of the University of California.
  29  * All rights reserved.  The Berkeley software License Agreement
  30  * specifies the terms and conditions for redistribution.
  31  */
  32
  33 #include <sys/param.h>
  34 #include <sys/user.h>
  35 #include <sys/vnode.h>
  36 #include <sys/proc.h>
  37 #include <sys/time.h>
  38 #include <sys/systm.h>
  39 #include <sys/kmem.h>
  40 #include <sys/cmn_err.h>
  41 #include <sys/cpuvar.h>
  42 #include <sys/timer.h>
  43 #include <sys/debug.h>
  44 #include <sys/sysmacros.h>
  45 #include <sys/cyclic.h>
  46
  47 static void     realitexpire(void *);
  48 static void     realprofexpire(void *);
  49 static void     timeval_advance(struct timeval *, struct timeval *);
  50
  51 kmutex_t tod_lock;      /* protects time-of-day stuff */
  52
  53 /*
  54  * Constant to define the minimum interval value of the ITIMER_REALPROF timer.
  55  * Value is in microseconds; defaults to 500 usecs.  Setting this value
  56  * significantly lower may allow for denial-of-service attacks.
  57  */
  58 int itimer_realprof_minimum = 500;
  59
  60 /*
  61  * macro to compare a timeval to a timestruc
  62  */
  63
  64 #define TVTSCMP(tvp, tsp, cmp) \
  65         /* CSTYLED */ \
  66         ((tvp)->tv_sec cmp (tsp)->tv_sec || \
  67         ((tvp)->tv_sec == (tsp)->tv_sec && \
  68         /* CSTYLED */ \
  69         (tvp)->tv_usec * 1000 cmp (tsp)->tv_nsec))
  70
  71 /*
  72  * Time of day and interval timer support.
  73  *
  74  * These routines provide the kernel entry points to get and set
  75  * the time-of-day and per-process interval timers.  Subroutines
  76  * here provide support for adding and subtracting timeval structures
  77  * and decrementing interval timers, optionally reloading the interval
  78  * timers when they expire.
  79  */
  80
  81 /*
  82  * SunOS function to generate monotonically increasing time values.
  83  */
  84 void
  85 uniqtime(struct timeval *tv)
  86 {
  87         static struct timeval last;
  88         static int last_timechanged;
  89         timestruc_t ts;
  90         time_t sec;
  91         int usec, nsec;
  92
  93         /*
  94          * protect modification of last
  95          */
  96         mutex_enter(&tod_lock);
  97         gethrestime(&ts);
  98
  99         /*
 100          * Fast algorithm to convert nsec to usec -- see hrt2ts()
 101          * in common/os/timers.c for a full description.
 102          */
 103         nsec = ts.tv_nsec;
 104         usec = nsec + (nsec >> 2);
 105         usec = nsec + (usec >> 1);
 106         usec = nsec + (usec >> 2);
 107         usec = nsec + (usec >> 4);
 108         usec = nsec - (usec >> 3);
 109         usec = nsec + (usec >> 2);
 110         usec = nsec + (usec >> 3);
 111         usec = nsec + (usec >> 4);
 112         usec = nsec + (usec >> 1);
 113         usec = nsec + (usec >> 6);
 114         usec = usec >> 10;
 115         sec = ts.tv_sec;
 116
 117         /*
 118          * If the system hres time has been changed since the last time
 119          * we are called. then all bets are off; just update our
 120          * local copy of timechanged and accept the reported time as is.
 121          */
 122         if (last_timechanged != timechanged) {
 123                 last_timechanged = timechanged;
 124         }
 125         /*
 126          * Try to keep timestamps unique, but don't be obsessive about
 127          * it in the face of large differences.
 128          */
 129         else if ((sec <= last.tv_sec) &&        /* same or lower seconds, and */
 130             ((sec != last.tv_sec) ||            /* either different second or */
 131             (usec <= last.tv_usec)) &&          /* lower microsecond, and */
 132             ((last.tv_sec - sec) <= 5)) {       /* not way back in time */
 133                 sec = last.tv_sec;
 134                 usec = last.tv_usec + 1;
 135                 if (usec >= MICROSEC) {
 136                         usec -= MICROSEC;
 137                         sec++;
 138                 }
 139         }
 140         last.tv_sec = sec;
 141         last.tv_usec = usec;
 142         mutex_exit(&tod_lock);
 143
 144         tv->tv_sec = sec;
 145         tv->tv_usec = usec;
 146 }
 147
 148 /*
 149  * Timestamps are exported from the kernel in several places.
 150  * Such timestamps are commonly used for either uniqueness or for
 151  * sequencing - truncation to 32-bits is fine for uniqueness,
 152  * but sequencing is going to take more work as we get closer to 2038!
 153  */
 154 void
 155 uniqtime32(struct timeval32 *tv32p)
 156 {
 157         struct timeval tv;
 158
 159         uniqtime(&tv);
 160         TIMEVAL_TO_TIMEVAL32(tv32p, &tv);
 161 }
 162
 163 int
 164 gettimeofday(struct timeval *tp)
 165 {
 166         struct timeval atv;
 167
 168         if (tp) {
 169                 uniqtime(&atv);
 170                 if (get_udatamodel() == DATAMODEL_NATIVE) {
 171                         if (copyout(&atv, tp, sizeof (atv)))
 172                                 return (set_errno(EFAULT));
 173                 } else {
 174                         struct timeval32 tv32;
 175
 176                         if (TIMEVAL_OVERFLOW(&atv))
 177                                 return (set_errno(EOVERFLOW));
 178                         TIMEVAL_TO_TIMEVAL32(&tv32, &atv);
 179
 180                         if (copyout(&tv32, tp, sizeof (tv32)))
 181                                 return (set_errno(EFAULT));
 182                 }
 183         }
 184         return (0);
 185 }
 186
 187 int
 188 getitimer(uint_t which, struct itimerval *itv)
 189 {
 190         int error;
 191
 192         if (get_udatamodel() == DATAMODEL_NATIVE)
 193                 error = xgetitimer(which, itv, 0);
 194         else {
 195                 struct itimerval kitv;
 196
 197                 if ((error = xgetitimer(which, &kitv, 1)) == 0) {
 198                         if (ITIMERVAL_OVERFLOW(&kitv)) {
 199                                 error = EOVERFLOW;
 200                         } else {
 201                                 struct itimerval32 itv32;
 202
 203                                 ITIMERVAL_TO_ITIMERVAL32(&itv32, &kitv);
 204                                 if (copyout(&itv32, itv, sizeof (itv32)) != 0)
 205                                         error = EFAULT;
 206                         }
 207                 }
 208         }
 209
 210         return (error ? (set_errno(error)) : 0);
 211 }
 212
 213 int
 214 xgetitimer(uint_t which, struct itimerval *itv, int iskaddr)
 215 {
 216         struct proc *p = curproc;
 217         struct timeval now;
 218         struct itimerval aitv;
 219         hrtime_t ts, first, interval, remain;
 220
 221         mutex_enter(&p->p_lock);
 222
 223         switch (which) {
 224         case ITIMER_VIRTUAL:
 225         case ITIMER_PROF:
 226                 aitv = ttolwp(curthread)->lwp_timer[which];
 227                 break;
 228
 229         case ITIMER_REAL:
 230                 uniqtime(&now);
 231                 aitv = p->p_realitimer;
 232
 233                 if (timerisset(&aitv.it_value)) {
 234                         /*CSTYLED*/
 235                         if (timercmp(&aitv.it_value, &now, <)) {
 236                                 timerclear(&aitv.it_value);
 237                         } else {
 238                                 timevalsub(&aitv.it_value, &now);
 239                         }
 240                 }
 241                 break;
 242
 243         case ITIMER_REALPROF:
 244                 if (curproc->p_rprof_cyclic == CYCLIC_NONE) {
 245                         bzero(&aitv, sizeof (aitv));
 246                         break;
 247                 }
 248
 249                 aitv = curproc->p_rprof_timer;
 250
 251                 first = tv2hrt(&aitv.it_value);
 252                 interval = tv2hrt(&aitv.it_interval);
 253
 254                 if ((ts = gethrtime()) < first) {
 255                         /*
 256                          * We haven't gone off for the first time; the time
 257                          * remaining is simply the first time we will go
 258                          * off minus the current time.
 259                          */
 260                         remain = first - ts;
 261                 } else {
 262                         if (interval == 0) {
 263                                 /*
 264                                  * This was set as a one-shot, and we've
 265                                  * already gone off; there is no time
 266                                  * remaining.
 267                                  */
 268                                 remain = 0;
 269                         } else {
 270                                 /*
 271                                  * We have a non-zero interval; we need to
 272                                  * determine how far we are into the current
 273                                  * interval, and subtract that from the
 274                                  * interval to determine the time remaining.
 275                                  */
 276                                 remain = interval - ((ts - first) % interval);
 277                         }
 278                 }
 279
 280                 hrt2tv(remain, &aitv.it_value);
 281                 break;
 282
 283         default:
 284                 mutex_exit(&p->p_lock);
 285                 return (EINVAL);
 286         }
 287
 288         mutex_exit(&p->p_lock);
 289
 290         if (iskaddr) {
 291                 bcopy(&aitv, itv, sizeof (*itv));
 292         } else {
 293                 ASSERT(get_udatamodel() == DATAMODEL_NATIVE);
 294                 if (copyout(&aitv, itv, sizeof (*itv)))
 295                         return (EFAULT);
 296         }
 297
 298         return (0);
 299 }
 300
 301
 302 int
 303 setitimer(uint_t which, struct itimerval *itv, struct itimerval *oitv)
 304 {
 305         int error;
 306
 307         if (oitv != NULL)
 308                 if ((error = getitimer(which, oitv)) != 0)
 309                         return (error);
 310
 311         if (itv == NULL)
 312                 return (0);
 313
 314         if (get_udatamodel() == DATAMODEL_NATIVE)
 315                 error = xsetitimer(which, itv, 0);
 316         else {
 317                 struct itimerval32 itv32;
 318                 struct itimerval kitv;
 319
 320                 if (copyin(itv, &itv32, sizeof (itv32)))
 321                         error = EFAULT;
 322                 ITIMERVAL32_TO_ITIMERVAL(&kitv, &itv32);
 323                 error = xsetitimer(which, &kitv, 1);
 324         }
 325
 326         return (error ? (set_errno(error)) : 0);
 327 }
 328
 329 int
 330 xsetitimer(uint_t which, struct itimerval *itv, int iskaddr)
 331 {
 332         struct itimerval aitv;
 333         struct timeval now;
 334         struct proc *p = curproc;
 335         kthread_t *t;
 336         timeout_id_t tmp_id;
 337         cyc_handler_t hdlr;
 338         cyc_time_t when;
 339         cyclic_id_t cyclic;
 340         hrtime_t ts;
 341         int min;
 342
 343         if (itv == NULL)
 344                 return (0);
 345
 346         if (iskaddr) {
 347                 bcopy(itv, &aitv, sizeof (aitv));
 348         } else {
 349                 ASSERT(get_udatamodel() == DATAMODEL_NATIVE);
 350                 if (copyin(itv, &aitv, sizeof (aitv)))
 351                         return (EFAULT);
 352         }
 353
 354         if (which == ITIMER_REALPROF) {
 355                 min = MAX((int)(cyclic_getres() / (NANOSEC / MICROSEC)),
 356                     itimer_realprof_minimum);
 357         } else {
 358                 min = usec_per_tick;
 359         }
 360
 361         if (itimerfix(&aitv.it_value, min) ||
 362             (itimerfix(&aitv.it_interval, min) && timerisset(&aitv.it_value)))
 363                 return (EINVAL);
 364
 365         mutex_enter(&p->p_lock);
 366         switch (which) {
 367         case ITIMER_REAL:
 368                 /*
 369                  * The SITBUSY flag prevents conflicts with multiple
 370                  * threads attempting to perform setitimer(ITIMER_REAL)
 371                  * at the same time, even when we drop p->p_lock below.
 372                  * Any blocked thread returns successfully because the
 373                  * effect is the same as if it got here first, finished,
 374                  * and the other thread then came through and destroyed
 375                  * what it did.  We are just protecting the system from
 376                  * malfunctioning due to the race condition.
 377                  */
 378                 if (p->p_flag & SITBUSY) {
 379                         mutex_exit(&p->p_lock);
 380                         return (0);
 381                 }
 382                 p->p_flag |= SITBUSY;
 383                 while ((tmp_id = p->p_itimerid) != 0) {
 384                         /*
 385                          * Avoid deadlock in callout_delete (called from
 386                          * untimeout) which may go to sleep (while holding
 387                          * p_lock). Drop p_lock and re-acquire it after
 388                          * untimeout returns. Need to clear p_itimerid
 389                          * while holding p_lock.
 390                          */
 391                         p->p_itimerid = 0;
 392                         mutex_exit(&p->p_lock);
 393                         (void) untimeout(tmp_id);
 394                         mutex_enter(&p->p_lock);
 395                 }
 396                 if (timerisset(&aitv.it_value)) {
 397                         uniqtime(&now);
 398                         timevaladd(&aitv.it_value, &now);
 399                         p->p_itimerid = realtime_timeout(realitexpire,
 400                             p, hzto(&aitv.it_value));
 401                 }
 402                 p->p_realitimer = aitv;
 403                 p->p_flag &= ~SITBUSY;
 404                 break;
 405
 406         case ITIMER_REALPROF:
 407                 cyclic = p->p_rprof_cyclic;
 408                 p->p_rprof_cyclic = CYCLIC_NONE;
 409
 410                 mutex_exit(&p->p_lock);
 411
 412                 /*
 413                  * We're now going to acquire cpu_lock, remove the old cyclic
 414                  * if necessary, and add our new cyclic.
 415                  */
 416                 mutex_enter(&cpu_lock);
 417
 418                 if (cyclic != CYCLIC_NONE)
 419                         cyclic_remove(cyclic);
 420
 421                 if (!timerisset(&aitv.it_value)) {
 422                         /*
 423                          * If we were passed a value of 0, we're done.
 424                          */
 425                         mutex_exit(&cpu_lock);
 426                         return (0);
 427                 }
 428
 429                 hdlr.cyh_func = realprofexpire;
 430                 hdlr.cyh_arg = p;
 431                 hdlr.cyh_level = CY_LOW_LEVEL;
 432
 433                 when.cyt_when = (ts = gethrtime() + tv2hrt(&aitv.it_value));
 434                 when.cyt_interval = tv2hrt(&aitv.it_interval);
 435
 436                 if (when.cyt_interval == 0) {
 437                         /*
 438                          * Using the same logic as for CLOCK_HIGHRES timers, we
 439                          * set the interval to be INT64_MAX - when.cyt_when to
 440                          * effect a one-shot; see the comment in clock_highres.c
 441                          * for more details on why this works.
 442                          */
 443                         when.cyt_interval = INT64_MAX - when.cyt_when;
 444                 }
 445
 446                 cyclic = cyclic_add(&hdlr, &when);
 447
 448                 mutex_exit(&cpu_lock);
 449
 450                 /*
 451                  * We have now successfully added the cyclic.  Reacquire
 452                  * p_lock, and see if anyone has snuck in.
 453                  */
 454                 mutex_enter(&p->p_lock);
 455
 456                 if (p->p_rprof_cyclic != CYCLIC_NONE) {
 457                         /*
 458                          * We're racing with another thread establishing an
 459                          * ITIMER_REALPROF interval timer.  We'll let the other
 460                          * thread win (this is a race at the application level,
 461                          * so letting the other thread win is acceptable).
 462                          */
 463                         mutex_exit(&p->p_lock);
 464                         mutex_enter(&cpu_lock);
 465                         cyclic_remove(cyclic);
 466                         mutex_exit(&cpu_lock);
 467
 468                         return (0);
 469                 }
 470
 471                 /*
 472                  * Success.  Set our tracking variables in the proc structure,
 473                  * cancel any outstanding ITIMER_PROF, and allocate the
 474                  * per-thread SIGPROF buffers, if possible.
 475                  */
 476                 hrt2tv(ts, &aitv.it_value);
 477                 p->p_rprof_timer = aitv;
 478                 p->p_rprof_cyclic = cyclic;
 479
 480                 t = p->p_tlist;
 481                 do {
 482                         struct itimerval *itvp;
 483
 484                         itvp = &ttolwp(t)->lwp_timer[ITIMER_PROF];
 485                         timerclear(&itvp->it_interval);
 486                         timerclear(&itvp->it_value);
 487
 488                         if (t->t_rprof != NULL)
 489                                 continue;
 490
 491                         t->t_rprof =
 492                             kmem_zalloc(sizeof (struct rprof), KM_NOSLEEP);
 493                         aston(t);
 494                 } while ((t = t->t_forw) != p->p_tlist);
 495
 496                 break;
 497
 498         case ITIMER_VIRTUAL:
 499                 ttolwp(curthread)->lwp_timer[ITIMER_VIRTUAL] = aitv;
 500                 break;
 501
 502         case ITIMER_PROF:
 503                 if (p->p_rprof_cyclic != CYCLIC_NONE) {
 504                         /*
 505                          * Silently ignore ITIMER_PROF if ITIMER_REALPROF
 506                          * is in effect.
 507                          */
 508                         break;
 509                 }
 510
 511                 ttolwp(curthread)->lwp_timer[ITIMER_PROF] = aitv;
 512                 break;
 513
 514         default:
 515                 mutex_exit(&p->p_lock);
 516                 return (EINVAL);
 517         }
 518         mutex_exit(&p->p_lock);
 519         return (0);
 520 }
 521
 522 /*
 523  * Delete the ITIMER_REALPROF interval timer.
 524  * Called only from exec_args() when exec occurs.
 525  * The other ITIMER_* interval timers are specified
 526  * to be inherited across exec(), so leave them alone.
 527  */
 528 void
 529 delete_itimer_realprof(void)
 530 {
 531         kthread_t *t = curthread;
 532         struct proc *p = ttoproc(t);
 533         klwp_t *lwp = ttolwp(t);
 534         cyclic_id_t cyclic;
 535
 536         mutex_enter(&p->p_lock);
 537
 538         /* we are performing execve(); assert we are single-threaded */
 539         ASSERT(t == p->p_tlist && t == t->t_forw);
 540
 541         if ((cyclic = p->p_rprof_cyclic) == CYCLIC_NONE) {
 542                 mutex_exit(&p->p_lock);
 543         } else {
 544                 p->p_rprof_cyclic = CYCLIC_NONE;
 545                 /*
 546                  * Delete any current instance of SIGPROF.
 547                  */
 548                 if (lwp->lwp_cursig == SIGPROF) {
 549                         lwp->lwp_cursig = 0;
 550                         lwp->lwp_extsig = 0;
 551                         if (lwp->lwp_curinfo) {
 552                                 siginfofree(lwp->lwp_curinfo);
 553                                 lwp->lwp_curinfo = NULL;
 554                         }
 555                 }
 556                 /*
 557                  * Delete any pending instances of SIGPROF.
 558                  */
 559                 sigdelset(&p->p_sig, SIGPROF);
 560                 sigdelset(&p->p_extsig, SIGPROF);
 561                 sigdelq(p, NULL, SIGPROF);
 562                 sigdelset(&t->t_sig, SIGPROF);
 563                 sigdelset(&t->t_extsig, SIGPROF);
 564                 sigdelq(p, t, SIGPROF);
 565
 566                 mutex_exit(&p->p_lock);
 567
 568                 /*
 569                  * Remove the ITIMER_REALPROF cyclic.
 570                  */
 571                 mutex_enter(&cpu_lock);
 572                 cyclic_remove(cyclic);
 573                 mutex_exit(&cpu_lock);
 574         }
 575 }
 576
 577 /*
 578  * Real interval timer expired:
 579  * send process whose timer expired an alarm signal.
 580  * If time is not set up to reload, then just return.
 581  * Else compute next time timer should go off which is > current time.
 582  * This is where delay in processing this timeout causes multiple
 583  * SIGALRM calls to be compressed into one.
 584  */
 585 static void
 586 realitexpire(void *arg)
 587 {
 588         struct proc *p = arg;
 589         struct timeval *valp = &p->p_realitimer.it_value;
 590         struct timeval *intervalp = &p->p_realitimer.it_interval;
 591 #if !defined(_LP64)
 592         clock_t ticks;
 593 #endif
 594
 595         mutex_enter(&p->p_lock);
 596 #if !defined(_LP64)
 597         if ((ticks = hzto(valp)) > 1) {
 598                 /*
 599                  * If we are executing before we were meant to, it must be
 600                  * because of an overflow in a prior hzto() calculation.
 601                  * In this case, we want to go to sleep for the recalculated
 602                  * number of ticks. For the special meaning of the value "1"
 603                  * see comment in timespectohz().
 604                  */
 605                 p->p_itimerid = realtime_timeout(realitexpire, p, ticks);
 606                 mutex_exit(&p->p_lock);
 607                 return;
 608         }
 609 #endif
 610         sigtoproc(p, NULL, SIGALRM);
 611         if (!timerisset(intervalp)) {
 612                 timerclear(valp);
 613                 p->p_itimerid = 0;
 614         } else {
 615                 /* advance timer value past current time */
 616                 timeval_advance(valp, intervalp);
 617                 p->p_itimerid = realtime_timeout(realitexpire, p, hzto(valp));
 618         }
 619         mutex_exit(&p->p_lock);
 620 }
 621
 622 /*
 623  * Real time profiling interval timer expired:
 624  * Increment microstate counters for each lwp in the process
 625  * and ensure that running lwps are kicked into the kernel.
 626  * If time is not set up to reload, then just return.
 627  * Else compute next time timer should go off which is > current time,
 628  * as above.
 629  */
 630 static void
 631 realprofexpire(void *arg)
 632 {
 633         struct proc *p = arg;
 634         kthread_t *t;
 635
 636         mutex_enter(&p->p_lock);
 637         if (p->p_rprof_cyclic == CYCLIC_NONE ||
 638             (t = p->p_tlist) == NULL) {
 639                 mutex_exit(&p->p_lock);
 640                 return;
 641         }
 642         do {
 643                 int mstate;
 644
 645                 /*
 646                  * Attempt to allocate the SIGPROF buffer, but don't sleep.
 647                  */
 648                 if (t->t_rprof == NULL)
 649                         t->t_rprof = kmem_zalloc(sizeof (struct rprof),
 650                             KM_NOSLEEP);
 651                 if (t->t_rprof == NULL)
 652                         continue;
 653
 654                 thread_lock(t);
 655                 switch (t->t_state) {
 656                 case TS_SLEEP:
 657                         /*
 658                          * Don't touch the lwp is it is swapped out.
 659                          */
 660                         if (!(t->t_schedflag & TS_LOAD)) {
 661                                 mstate = LMS_SLEEP;
 662                                 break;
 663                         }
 664                         switch (mstate = ttolwp(t)->lwp_mstate.ms_prev) {
 665                         case LMS_TFAULT:
 666                         case LMS_DFAULT:
 667                         case LMS_KFAULT:
 668                         case LMS_USER_LOCK:
 669                                 break;
 670                         default:
 671                                 mstate = LMS_SLEEP;
 672                                 break;
 673                         }
 674                         break;
 675                 case TS_RUN:
 676                 case TS_WAIT:
 677                         mstate = LMS_WAIT_CPU;
 678                         break;
 679                 case TS_ONPROC:
 680                         switch (mstate = t->t_mstate) {
 681                         case LMS_USER:
 682                         case LMS_SYSTEM:
 683                         case LMS_TRAP:
 684                                 break;
 685                         default:
 686                                 mstate = LMS_SYSTEM;
 687                                 break;
 688                         }
 689                         break;
 690                 default:
 691                         mstate = t->t_mstate;
 692                         break;
 693                 }
 694                 t->t_rprof->rp_anystate = 1;
 695                 t->t_rprof->rp_state[mstate]++;
 696                 aston(t);
 697                 /*
 698                  * force the thread into the kernel
 699                  * if it is not already there.
 700                  */
 701                 if (t->t_state == TS_ONPROC && t->t_cpu != CPU)
 702                         poke_cpu(t->t_cpu->cpu_id);
 703                 thread_unlock(t);
 704         } while ((t = t->t_forw) != p->p_tlist);
 705
 706         mutex_exit(&p->p_lock);
 707 }
 708
 709 /*
 710  * Advances timer value past the current time of day.  See the detailed
 711  * comment for this logic in realitsexpire(), above.
 712  */
 713 static void
 714 timeval_advance(struct timeval *valp, struct timeval *intervalp)
 715 {
 716         int cnt2nth;
 717         struct timeval interval2nth;
 718
 719         for (;;) {
 720                 interval2nth = *intervalp;
 721                 for (cnt2nth = 0; ; cnt2nth++) {
 722                         timevaladd(valp, &interval2nth);
 723                         /*CSTYLED*/
 724                         if (TVTSCMP(valp, &hrestime, >))
 725                                 break;
 726                         timevaladd(&interval2nth, &interval2nth);
 727                 }
 728                 if (cnt2nth == 0)
 729                         break;
 730                 timevalsub(valp, &interval2nth);
 731         }
 732 }
 733
 734 /*
 735  * Check that a proposed value to load into the .it_value or .it_interval
 736  * part of an interval timer is acceptable, and set it to at least a
 737  * specified minimal value.
 738  */
 739 int
 740 itimerfix(struct timeval *tv, int minimum)
 741 {
 742         if (tv->tv_sec < 0 || tv->tv_sec > 100000000 ||
 743             tv->tv_usec < 0 || tv->tv_usec >= MICROSEC)
 744                 return (EINVAL);
 745         if (tv->tv_sec == 0 && tv->tv_usec != 0 && tv->tv_usec < minimum)
 746                 tv->tv_usec = minimum;
 747         return (0);
 748 }
 749
 750 /*
 751  * Same as itimerfix, except a) it takes a timespec instead of a timeval and
 752  * b) it doesn't truncate based on timeout granularity; consumers of this
 753  * interface (e.g. timer_settime()) depend on the passed timespec not being
 754  * modified implicitly.
 755  */
 756 int
 757 itimerspecfix(timespec_t *tv)
 758 {
 759         if (tv->tv_sec < 0 || tv->tv_nsec < 0 || tv->tv_nsec >= NANOSEC)
 760                 return (EINVAL);
 761         return (0);
 762 }
 763
 764 /*
 765  * Decrement an interval timer by a specified number
 766  * of microseconds, which must be less than a second,
 767  * i.e. < 1000000.  If the timer expires, then reload
 768  * it.  In this case, carry over (usec - old value) to
 769  * reducint the value reloaded into the timer so that
 770  * the timer does not drift.  This routine assumes
 771  * that it is called in a context where the timers
 772  * on which it is operating cannot change in value.
 773  */
 774 int
 775 itimerdecr(struct itimerval *itp, int usec)
 776 {
 777         if (itp->it_value.tv_usec < usec) {
 778                 if (itp->it_value.tv_sec == 0) {
 779                         /* expired, and already in next interval */
 780                         usec -= itp->it_value.tv_usec;
 781                         goto expire;
 782                 }
 783                 itp->it_value.tv_usec += MICROSEC;
 784                 itp->it_value.tv_sec--;
 785         }
 786         itp->it_value.tv_usec -= usec;
 787         usec = 0;
 788         if (timerisset(&itp->it_value))
 789                 return (1);
 790         /* expired, exactly at end of interval */
 791 expire:
 792         if (timerisset(&itp->it_interval)) {
 793                 itp->it_value = itp->it_interval;
 794                 itp->it_value.tv_usec -= usec;
 795                 if (itp->it_value.tv_usec < 0) {
 796                         itp->it_value.tv_usec += MICROSEC;
 797                         itp->it_value.tv_sec--;
 798                 }
 799         } else
 800                 itp->it_value.tv_usec = 0;              /* sec is already 0 */
 801         return (0);
 802 }
 803
 804 /*
 805  * Add and subtract routines for timevals.
 806  * N.B.: subtract routine doesn't deal with
 807  * results which are before the beginning,
 808  * it just gets very confused in this case.
 809  * Caveat emptor.
 810  */
 811 void
 812 timevaladd(struct timeval *t1, struct timeval *t2)
 813 {
 814         t1->tv_sec += t2->tv_sec;
 815         t1->tv_usec += t2->tv_usec;
 816         timevalfix(t1);
 817 }
 818
 819 void
 820 timevalsub(struct timeval *t1, struct timeval *t2)
 821 {
 822         t1->tv_sec -= t2->tv_sec;
 823         t1->tv_usec -= t2->tv_usec;
 824         timevalfix(t1);
 825 }
 826
 827 void
 828 timevalfix(struct timeval *t1)
 829 {
 830         if (t1->tv_usec < 0) {
 831                 t1->tv_sec--;
 832                 t1->tv_usec += MICROSEC;
 833         }
 834         if (t1->tv_usec >= MICROSEC) {
 835                 t1->tv_sec++;
 836                 t1->tv_usec -= MICROSEC;
 837         }
 838 }
 839
 840 /*
 841  * Same as the routines above. These routines take a timespec instead
 842  * of a timeval.
 843  */
 844 void
 845 timespecadd(timespec_t *t1, timespec_t *t2)
 846 {
 847         t1->tv_sec += t2->tv_sec;
 848         t1->tv_nsec += t2->tv_nsec;
 849         timespecfix(t1);
 850 }
 851
 852 void
 853 timespecsub(timespec_t *t1, timespec_t *t2)
 854 {
 855         t1->tv_sec -= t2->tv_sec;
 856         t1->tv_nsec -= t2->tv_nsec;
 857         timespecfix(t1);
 858 }
 859
 860 void
 861 timespecfix(timespec_t *t1)
 862 {
 863         if (t1->tv_nsec < 0) {
 864                 t1->tv_sec--;
 865                 t1->tv_nsec += NANOSEC;
 866         } else {
 867                 if (t1->tv_nsec >= NANOSEC) {
 868                         t1->tv_sec++;
 869                         t1->tv_nsec -= NANOSEC;
 870                 }
 871         }
 872 }
 873
 874 /*
 875  * Compute number of hz until specified time.
 876  * Used to compute third argument to timeout() from an absolute time.
 877  */
 878 clock_t
 879 hzto(struct timeval *tv)
 880 {
 881         timespec_t ts, now;
 882
 883         ts.tv_sec = tv->tv_sec;
 884         ts.tv_nsec = tv->tv_usec * 1000;
 885         gethrestime_lasttick(&now);
 886
 887         return (timespectohz(&ts, now));
 888 }
 889
 890 /*
 891  * Compute number of hz until specified time for a given timespec value.
 892  * Used to compute third argument to timeout() from an absolute time.
 893  */
 894 clock_t
 895 timespectohz(timespec_t *tv, timespec_t now)
 896 {
 897         clock_t ticks;
 898         time_t  sec;
 899         int     nsec;
 900
 901         /*
 902          * Compute number of ticks we will see between now and
 903          * the target time; returns "1" if the destination time
 904          * is before the next tick, so we always get some delay,
 905          * and returns LONG_MAX ticks if we would overflow.
 906          */
 907         sec = tv->tv_sec - now.tv_sec;
 908         nsec = tv->tv_nsec - now.tv_nsec + nsec_per_tick - 1;
 909
 910         if (nsec < 0) {
 911                 sec--;
 912                 nsec += NANOSEC;
 913         } else if (nsec >= NANOSEC) {
 914                 sec++;
 915                 nsec -= NANOSEC;
 916         }
 917
 918         ticks = NSEC_TO_TICK(nsec);
 919
 920         /*
 921          * Compute ticks, accounting for negative and overflow as above.
 922          * Overflow protection kicks in at about 70 weeks for hz=50
 923          * and at about 35 weeks for hz=100. (Rather longer for the 64-bit
 924          * kernel :-)
 925          */
 926         if (sec < 0 || (sec == 0 && ticks < 1))
 927                 ticks = 1;                      /* protect vs nonpositive */
 928         else if (sec > (LONG_MAX - ticks) / hz)
 929                 ticks = LONG_MAX;               /* protect vs overflow */
 930         else
 931                 ticks += sec * hz;              /* common case */
 932
 933         return (ticks);
 934 }
 935
 936 /*
 937  * Compute number of hz with the timespec tv specified.
 938  * The return type must be 64 bit integer.
 939  */
 940 int64_t
 941 timespectohz64(timespec_t *tv)
 942 {
 943         int64_t ticks;
 944         int64_t sec;
 945         int64_t nsec;
 946
 947         sec = tv->tv_sec;
 948         nsec = tv->tv_nsec + nsec_per_tick - 1;
 949
 950         if (nsec < 0) {
 951                 sec--;
 952                 nsec += NANOSEC;
 953         } else if (nsec >= NANOSEC) {
 954                 sec++;
 955                 nsec -= NANOSEC;
 956         }
 957
 958         ticks = NSEC_TO_TICK(nsec);
 959
 960         /*
 961          * Compute ticks, accounting for negative and overflow as above.
 962          * Overflow protection kicks in at about 70 weeks for hz=50
 963          * and at about 35 weeks for hz=100. (Rather longer for the 64-bit
 964          * kernel
 965          */
 966         if (sec < 0 || (sec == 0 && ticks < 1))
 967                 ticks = 1;                      /* protect vs nonpositive */
 968         else if (sec > (((~0ULL) >> 1) - ticks) / hz)
 969                 ticks = (~0ULL) >> 1;           /* protect vs overflow */
 970         else
 971                 ticks += sec * hz;              /* common case */
 972
 973         return (ticks);
 974 }
 975
 976 /*
 977  * hrt2ts(): convert from hrtime_t to timestruc_t.
 978  *
 979  * All this routine really does is:
 980  *
 981  *      tsp->sec  = hrt / NANOSEC;
 982  *      tsp->nsec = hrt % NANOSEC;
 983  *
 984  * The black magic below avoids doing a 64-bit by 32-bit integer divide,
 985  * which is quite expensive.  There's actually much more going on here than
 986  * it might first appear -- don't try this at home.
 987  *
 988  * For the adventuresome, here's an explanation of how it works.
 989  *
 990  * Multiplication by a fixed constant is easy -- you just do the appropriate
 991  * shifts and adds.  For example, to multiply by 10, we observe that
 992  *
 993  *      x * 10  = x * (8 + 2)
 994  *              = (x * 8) + (x * 2)
 995  *              = (x << 3) + (x << 1).
 996  *
 997  * In general, you can read the algorithm right off the bits: the number 10
 998  * is 1010 in binary; bits 1 and 3 are ones, so x * 10 = (x << 1) + (x << 3).
 999  *
1000  * Sometimes you can do better.  For example, 15 is 1111 binary, so the normal
1001  * shift/add computation is x * 15 = (x << 0) + (x << 1) + (x << 2) + (x << 3).
1002  * But, it's cheaper if you capitalize on the fact that you have a run of ones:
1003  * 1111 = 10000 - 1, hence x * 15 = (x << 4) - (x << 0).  [You would never
1004  * actually perform the operation << 0, since it's a no-op; I'm just writing
1005  * it that way for clarity.]
1006  *
1007  * The other way you can win is if you get lucky with the prime factorization
1008  * of your constant.  The number 1,000,000,000, which we have to multiply
1009  * by below, is a good example.  One billion is 111011100110101100101000000000
1010  * in binary.  If you apply the bit-grouping trick, it doesn't buy you very
1011  * much, because it's only a win for groups of three or more equal bits:
1012  *
1013  * 111011100110101100101000000000 = 1000000000000000000000000000000
1014  *                                -  000100011001010011011000000000
1015  *
1016  * Thus, instead of the 13 shift/add pairs (26 operations) implied by the LHS,
1017  * we have reduced this to 10 shift/add pairs (20 operations) on the RHS.
1018  * This is better, but not great.
1019  *
1020  * However, we can factor 1,000,000,000 = 2^9 * 5^9 = 2^9 * 125 * 125 * 125,
1021  * and multiply by each factor.  Multiplication by 125 is particularly easy,
1022  * since 128 is nearby: x * 125 = (x << 7) - x - x - x, which is just four
1023  * operations.  So, to multiply by 1,000,000,000, we perform three multipli-
1024  * cations by 125, then << 9, a total of only 3 * 4 + 1 = 13 operations.
1025  * This is the algorithm we actually use in both hrt2ts() and ts2hrt().
1026  *
1027  * Division is harder; there is no equivalent of the simple shift-add algorithm
1028  * we used for multiplication.  However, we can convert the division problem
1029  * into a multiplication problem by pre-computing the binary representation
1030  * of the reciprocal of the divisor.  For the case of interest, we have
1031  *
1032  *      1 / 1,000,000,000 = 1.0001001011100000101111101000001B-30,
1033  *
1034  * to 32 bits of precision.  (The notation B-30 means "* 2^-30", just like
1035  * E-18 means "* 10^-18".)
1036  *
1037  * So, to compute x / 1,000,000,000, we just multiply x by the 32-bit
1038  * integer 10001001011100000101111101000001, then normalize (shift) the
1039  * result.  This constant has several large bits runs, so the multiply
1040  * is relatively cheap:
1041  *
1042  *      10001001011100000101111101000001 = 10001001100000000110000001000001
1043  *                                       - 00000000000100000000000100000000
1044  *
1045  * Again, you can just read the algorithm right off the bits:
1046  *
1047  *                      sec = hrt;
1048  *                      sec += (hrt << 6);
1049  *                      sec -= (hrt << 8);
1050  *                      sec += (hrt << 13);
1051  *                      sec += (hrt << 14);
1052  *                      sec -= (hrt << 20);
1053  *                      sec += (hrt << 23);
1054  *                      sec += (hrt << 24);
1055  *                      sec += (hrt << 27);
1056  *                      sec += (hrt << 31);
1057  *                      sec >>= (32 + 30);
1058  *
1059  * Voila!  The only problem is, since hrt is 64 bits, we need to use 96-bit
1060  * arithmetic to perform this calculation.  That's a waste, because ultimately
1061  * we only need the highest 32 bits of the result.
1062  *
1063  * The first thing we do is to realize that we don't need to use all of hrt
1064  * in the calculation.  The lowest 30 bits can contribute at most 1 to the
1065  * quotient (2^30 / 1,000,000,000 = 1.07...), so we'll deal with them later.
1066  * The highest 2 bits have to be zero, or hrt won't fit in a timestruc_t.
1067  * Thus, the only bits of hrt that matter for division are bits 30..61.
1068  * These 32 bits are just the lower-order word of (hrt >> 30).  This brings
1069  * us down from 96-bit math to 64-bit math, and our algorithm becomes:
1070  *
1071  *                      tmp = (uint32_t) (hrt >> 30);
1072  *                      sec = tmp;
1073  *                      sec += (tmp << 6);
1074  *                      sec -= (tmp << 8);
1075  *                      sec += (tmp << 13);
1076  *                      sec += (tmp << 14);
1077  *                      sec -= (tmp << 20);
1078  *                      sec += (tmp << 23);
1079  *                      sec += (tmp << 24);
1080  *                      sec += (tmp << 27);
1081  *                      sec += (tmp << 31);
1082  *                      sec >>= 32;
1083  *
1084  * Next, we're going to reduce this 64-bit computation to a 32-bit
1085  * computation.  We begin by rewriting the above algorithm to use relative
1086  * shifts instead of absolute shifts.  That is, instead of computing
1087  * tmp << 6, tmp << 8, tmp << 13, etc, we'll just shift incrementally:
1088  * tmp <<= 6, tmp <<= 2 (== 8 - 6), tmp <<= 5 (== 13 - 8), etc:
1089  *
1090  *                      tmp = (uint32_t) (hrt >> 30);
1091  *                      sec = tmp;
1092  *                      tmp <<= 6; sec += tmp;
1093  *                      tmp <<= 2; sec -= tmp;
1094  *                      tmp <<= 5; sec += tmp;
1095  *                      tmp <<= 1; sec += tmp;
1096  *                      tmp <<= 6; sec -= tmp;
1097  *                      tmp <<= 3; sec += tmp;
1098  *                      tmp <<= 1; sec += tmp;
1099  *                      tmp <<= 3; sec += tmp;
1100  *                      tmp <<= 4; sec += tmp;
1101  *                      sec >>= 32;
1102  *
1103  * Now for the final step.  Instead of throwing away the low 32 bits at
1104  * the end, we can throw them away as we go, only keeping the high 32 bits
1105  * of the product at each step.  So, for example, where we now have
1106  *
1107  *                      tmp <<= 6; sec = sec + tmp;
1108  * we will instead have
1109  *                      tmp <<= 6; sec = (sec + tmp) >> 6;
1110  * which is equivalent to
1111  *                      sec = (sec >> 6) + tmp;
1112  *
1113  * The final shift ("sec >>= 32") goes away.
1114  *
1115  * All we're really doing here is long multiplication, just like we learned in
1116  * grade school, except that at each step, we only look at the leftmost 32
1117  * columns.  The cumulative error is, at most, the sum of all the bits we
1118  * throw away, which is 2^-32 + 2^-31 + ... + 2^-2 + 2^-1 == 1 - 2^-32.
1119  * Thus, the final result ("sec") is correct to +/- 1.
1120  *
1121  * It turns out to be important to keep "sec" positive at each step, because
1122  * we don't want to have to explicitly extend the sign bit.  Therefore,
1123  * starting with the last line of code above, each line that would have read
1124  * "sec = (sec >> n) - tmp" must be changed to "sec = tmp - (sec >> n)", and
1125  * the operators (+ or -) in all previous lines must be toggled accordingly.
1126  * Thus, we end up with:
1127  *
1128  *                      tmp = (uint32_t) (hrt >> 30);
1129  *                      sec = tmp + (sec >> 6);
1130  *                      sec = tmp - (tmp >> 2);
1131  *                      sec = tmp - (sec >> 5);
1132  *                      sec = tmp + (sec >> 1);
1133  *                      sec = tmp - (sec >> 6);
1134  *                      sec = tmp - (sec >> 3);
1135  *                      sec = tmp + (sec >> 1);
1136  *                      sec = tmp + (sec >> 3);
1137  *                      sec = tmp + (sec >> 4);
1138  *
1139  * This yields a value for sec that is accurate to +1/-1, so we have two
1140  * cases to deal with.  The mysterious-looking "+ 7" in the code below biases
1141  * the rounding toward zero, so that sec is always less than or equal to
1142  * the correct value.  With this modified code, sec is accurate to +0/-2, with
1143  * the -2 case being very rare in practice.  With this change, we only have to
1144  * deal with one case (sec too small) in the cleanup code.
1145  *
1146  * The other modification we make is to delete the second line above
1147  * ("sec = tmp + (sec >> 6);"), since it only has an effect when bit 31 is
1148  * set, and the cleanup code can handle that rare case.  This reduces the
1149  * *guaranteed* accuracy of sec to +0/-3, but speeds up the common cases.
1150  *
1151  * Finally, we compute nsec = hrt - (sec * 1,000,000,000).  nsec will always
1152  * be positive (since sec is never too large), and will at most be equal to
1153  * the error in sec (times 1,000,000,000) plus the low-order 30 bits of hrt.
1154  * Thus, nsec < 3 * 1,000,000,000 + 2^30, which is less than 2^32, so we can
1155  * safely assume that nsec fits in 32 bits.  Consequently, when we compute
1156  * sec * 1,000,000,000, we only need the low 32 bits, so we can just do 32-bit
1157  * arithmetic and let the high-order bits fall off the end.
1158  *
1159  * Since nsec < 3 * 1,000,000,000 + 2^30 == 4,073,741,824, the cleanup loop:
1160  *
1161  *                      while (nsec >= NANOSEC) {
1162  *                              nsec -= NANOSEC;
1163  *                              sec++;
1164  *                      }
1165  *
1166  * is guaranteed to complete in at most 4 iterations.  In practice, the loop
1167  * completes in 0 or 1 iteration over 95% of the time.
1168  *
1169  * On an SS2, this implementation of hrt2ts() takes 1.7 usec, versus about
1170  * 35 usec for software division -- about 20 times faster.
1171  */
1172 void
1173 hrt2ts(hrtime_t hrt, timestruc_t *tsp)
1174 {
1175         uint32_t sec, nsec, tmp;
1176
1177         tmp = (uint32_t)(hrt >> 30);
1178         sec = tmp - (tmp >> 2);
1179         sec = tmp - (sec >> 5);
1180         sec = tmp + (sec >> 1);
1181         sec = tmp - (sec >> 6) + 7;
1182         sec = tmp - (sec >> 3);
1183         sec = tmp + (sec >> 1);
1184         sec = tmp + (sec >> 3);
1185         sec = tmp + (sec >> 4);
1186         tmp = (sec << 7) - sec - sec - sec;
1187         tmp = (tmp << 7) - tmp - tmp - tmp;
1188         tmp = (tmp << 7) - tmp - tmp - tmp;
1189         nsec = (uint32_t)hrt - (tmp << 9);
1190         while (nsec >= NANOSEC) {
1191                 nsec -= NANOSEC;
1192                 sec++;
1193         }
1194         tsp->tv_sec = (time_t)sec;
1195         tsp->tv_nsec = nsec;
1196 }
1197
1198 /*
1199  * Convert from timestruc_t to hrtime_t.
1200  *
1201  * The code below is equivalent to:
1202  *
1203  *      hrt = tsp->tv_sec * NANOSEC + tsp->tv_nsec;
1204  *
1205  * but requires no integer multiply.
1206  */
1207 hrtime_t
1208 ts2hrt(const timestruc_t *tsp)
1209 {
1210         hrtime_t hrt;
1211
1212         hrt = tsp->tv_sec;
1213         hrt = (hrt << 7) - hrt - hrt - hrt;
1214         hrt = (hrt << 7) - hrt - hrt - hrt;
1215         hrt = (hrt << 7) - hrt - hrt - hrt;
1216         hrt = (hrt << 9) + tsp->tv_nsec;
1217         return (hrt);
1218 }
1219
1220 /*
1221  * For the various 32-bit "compatibility" paths in the system.
1222  */
1223 void
1224 hrt2ts32(hrtime_t hrt, timestruc32_t *ts32p)
1225 {
1226         timestruc_t ts;
1227
1228         hrt2ts(hrt, &ts);
1229         TIMESPEC_TO_TIMESPEC32(ts32p, &ts);
1230 }
1231
1232 /*
1233  * If this ever becomes performance critical (ha!), we can borrow the
1234  * code from ts2hrt(), above, to multiply tv_sec by 1,000,000 and the
1235  * straightforward (x << 10) - (x << 5) + (x << 3) to multiply tv_usec by
1236  * 1,000.  For now, we'll opt for readability (besides, the compiler does
1237  * a passable job of optimizing constant multiplication into shifts and adds).
1238  */
1239 hrtime_t
1240 tv2hrt(struct timeval *tvp)
1241 {
1242         return ((hrtime_t)tvp->tv_sec * NANOSEC +
1243             (hrtime_t)tvp->tv_usec * (NANOSEC / MICROSEC));
1244 }
1245
1246 void
1247 hrt2tv(hrtime_t hrt, struct timeval *tvp)
1248 {
1249         uint32_t sec, nsec, tmp;
1250         uint32_t q, r, t;
1251
1252         tmp = (uint32_t)(hrt >> 30);
1253         sec = tmp - (tmp >> 2);
1254         sec = tmp - (sec >> 5);
1255         sec = tmp + (sec >> 1);
1256         sec = tmp - (sec >> 6) + 7;
1257         sec = tmp - (sec >> 3);
1258         sec = tmp + (sec >> 1);
1259         sec = tmp + (sec >> 3);
1260         sec = tmp + (sec >> 4);
1261         tmp = (sec << 7) - sec - sec - sec;
1262         tmp = (tmp << 7) - tmp - tmp - tmp;
1263         tmp = (tmp << 7) - tmp - tmp - tmp;
1264         nsec = (uint32_t)hrt - (tmp << 9);
1265         while (nsec >= NANOSEC) {
1266                 nsec -= NANOSEC;
1267                 sec++;
1268         }
1269         tvp->tv_sec = (time_t)sec;
1270 /*
1271  * this routine is very similar to hr2ts, but requires microseconds
1272  * instead of nanoseconds, so an interger divide by 1000 routine
1273  * completes the conversion
1274  */
1275         t = (nsec >> 7) + (nsec >> 8) + (nsec >> 12);
1276         q = (nsec >> 1) + t + (nsec >> 15) + (t >> 11) + (t >> 14);
1277         q = q >> 9;
1278         r = nsec - q*1000;
1279         tvp->tv_usec = q + ((r + 24) >> 10);
1280
1281 }
1282
1283 int
1284 nanosleep(timespec_t *rqtp, timespec_t *rmtp)
1285 {
1286         timespec_t rqtime;
1287         timespec_t rmtime;
1288         timespec_t now;
1289         int timecheck;
1290         int ret = 1;
1291         model_t datamodel = get_udatamodel();
1292
1293         timecheck = timechanged;
1294         gethrestime(&now);
1295
1296         if (datamodel == DATAMODEL_NATIVE) {
1297                 if (copyin(rqtp, &rqtime, sizeof (rqtime)))
1298                         return (set_errno(EFAULT));
1299         } else {
1300                 timespec32_t rqtime32;
1301
1302                 if (copyin(rqtp, &rqtime32, sizeof (rqtime32)))
1303                         return (set_errno(EFAULT));
1304                 TIMESPEC32_TO_TIMESPEC(&rqtime, &rqtime32);
1305         }
1306
1307         if (rqtime.tv_sec < 0 || rqtime.tv_nsec < 0 ||
1308             rqtime.tv_nsec >= NANOSEC)
1309                 return (set_errno(EINVAL));
1310
1311         if (timerspecisset(&rqtime)) {
1312                 timespecadd(&rqtime, &now);
1313                 mutex_enter(&curthread->t_delay_lock);
1314                 while ((ret = cv_waituntil_sig(&curthread->t_delay_cv,
1315                     &curthread->t_delay_lock, &rqtime, timecheck)) > 0)
1316                         continue;
1317                 mutex_exit(&curthread->t_delay_lock);
1318         }
1319
1320         if (rmtp) {
1321                 /*
1322                  * If cv_waituntil_sig() returned due to a signal, and
1323                  * there is time remaining, then set the time remaining.
1324                  * Else set time remaining to zero
1325                  */
1326                 rmtime.tv_sec = rmtime.tv_nsec = 0;
1327                 if (ret == 0) {
1328                         timespec_t delta = rqtime;
1329
1330                         gethrestime(&now);
1331                         timespecsub(&delta, &now);
1332                         if (delta.tv_sec > 0 || (delta.tv_sec == 0 &&
1333                             delta.tv_nsec > 0))
1334                                 rmtime = delta;
1335                 }
1336
1337                 if (datamodel == DATAMODEL_NATIVE) {
1338                         if (copyout(&rmtime, rmtp, sizeof (rmtime)))
1339                                 return (set_errno(EFAULT));
1340                 } else {
1341                         timespec32_t rmtime32;
1342
1343                         TIMESPEC_TO_TIMESPEC32(&rmtime32, &rmtime);
1344                         if (copyout(&rmtime32, rmtp, sizeof (rmtime32)))
1345                                 return (set_errno(EFAULT));
1346                 }
1347         }
1348
1349         if (ret == 0)
1350                 return (set_errno(EINTR));
1351         return (0);
1352 }
1353
1354 /*
1355  * Routines to convert standard UNIX time (seconds since Jan 1, 1970)
1356  * into year/month/day/hour/minute/second format, and back again.
1357  * Note: these routines require tod_lock held to protect cached state.
1358  */
1359 static int days_thru_month[64] = {
1360         0, 0, 31, 60, 91, 121, 152, 182, 213, 244, 274, 305, 335, 366, 0, 0,
1361         0, 0, 31, 59, 90, 120, 151, 181, 212, 243, 273, 304, 334, 365, 0, 0,
1362         0, 0, 31, 59, 90, 120, 151, 181, 212, 243, 273, 304, 334, 365, 0, 0,
1363         0, 0, 31, 59, 90, 120, 151, 181, 212, 243, 273, 304, 334, 365, 0, 0,
1364 };
1365
1366 todinfo_t saved_tod;
1367 int saved_utc = -60;
1368
1369 todinfo_t
1370 utc_to_tod(time_t utc)
1371 {
1372         long dse, day, month, year;
1373         todinfo_t tod;
1374
1375         ASSERT(MUTEX_HELD(&tod_lock));
1376
1377         /*
1378          * Note that tod_set_prev() assumes utc will be set to zero in
1379          * the case of it being negative.  Consequently, any change made
1380          * to this behavior would have to be reflected in that function
1381          * as well.
1382          */
1383         if (utc < 0)                    /* should never happen */
1384                 utc = 0;
1385
1386         saved_tod.tod_sec += utc - saved_utc;
1387         saved_utc = utc;
1388         if (saved_tod.tod_sec >= 0 && saved_tod.tod_sec < 60)
1389                 return (saved_tod);     /* only the seconds changed */
1390
1391         dse = utc / 86400;              /* days since epoch */
1392
1393         tod.tod_sec = utc % 60;
1394         tod.tod_min = (utc % 3600) / 60;
1395         tod.tod_hour = (utc % 86400) / 3600;
1396         tod.tod_dow = (dse + 4) % 7 + 1;        /* epoch was a Thursday */
1397
1398         year = dse / 365 + 72;  /* first guess -- always a bit too large */
1399         do {
1400                 year--;
1401                 day = dse - 365 * (year - 70) - ((year - 69) >> 2);
1402         } while (day < 0);
1403
1404         month = ((year & 3) << 4) + 1;
1405         while (day >= days_thru_month[month + 1])
1406                 month++;
1407
1408         tod.tod_day = day - days_thru_month[month] + 1;
1409         tod.tod_month = month & 15;
1410         tod.tod_year = year;
1411
1412         saved_tod = tod;
1413         return (tod);
1414 }
1415
1416 time_t
1417 tod_to_utc(todinfo_t tod)
1418 {
1419         time_t utc;
1420         int year = tod.tod_year;
1421         int month = tod.tod_month + ((year & 3) << 4);
1422 #ifdef DEBUG
1423         /* only warn once, not each time called */
1424         static int year_warn = 1;
1425         static int month_warn = 1;
1426         static int day_warn = 1;
1427         static int hour_warn = 1;
1428         static int min_warn = 1;
1429         static int sec_warn = 1;
1430         int days_diff = days_thru_month[month + 1] - days_thru_month[month];
1431 #endif
1432
1433         ASSERT(MUTEX_HELD(&tod_lock));
1434
1435 #ifdef DEBUG
1436         if (year_warn && (year < 70 || year > 8029)) {
1437                 cmn_err(CE_WARN,
1438                     "The hardware real-time clock appears to have the "
1439                     "wrong years value %d -- time needs to be reset\n",
1440                     year);
1441                 year_warn = 0;
1442         }
1443
1444         if (month_warn && (tod.tod_month < 1 || tod.tod_month > 12)) {
1445                 cmn_err(CE_WARN,
1446                     "The hardware real-time clock appears to have the "
1447                     "wrong months value %d -- time needs to be reset\n",
1448                     tod.tod_month);
1449                 month_warn = 0;
1450         }
1451
1452         if (day_warn && (tod.tod_day < 1 || tod.tod_day > days_diff)) {
1453                 cmn_err(CE_WARN,
1454                     "The hardware real-time clock appears to have the "
1455                     "wrong days value %d -- time needs to be reset\n",
1456                     tod.tod_day);
1457                 day_warn = 0;
1458         }
1459
1460         if (hour_warn && (tod.tod_hour < 0 || tod.tod_hour > 23)) {
1461                 cmn_err(CE_WARN,
1462                     "The hardware real-time clock appears to have the "
1463                     "wrong hours value %d -- time needs to be reset\n",
1464                     tod.tod_hour);
1465                 hour_warn = 0;
1466         }
1467
1468         if (min_warn && (tod.tod_min < 0 || tod.tod_min > 59)) {
1469                 cmn_err(CE_WARN,
1470                     "The hardware real-time clock appears to have the "
1471                     "wrong minutes value %d -- time needs to be reset\n",
1472                     tod.tod_min);
1473                 min_warn = 0;
1474         }
1475
1476         if (sec_warn && (tod.tod_sec < 0 || tod.tod_sec > 59)) {
1477                 cmn_err(CE_WARN,
1478                     "The hardware real-time clock appears to have the "
1479                     "wrong seconds value %d -- time needs to be reset\n",
1480                     tod.tod_sec);
1481                 sec_warn = 0;
1482         }
1483 #endif
1484
1485         utc = (year - 70);              /* next 3 lines: utc = 365y + y/4 */
1486         utc += (utc << 3) + (utc << 6);
1487         utc += (utc << 2) + ((year - 69) >> 2);
1488         utc += days_thru_month[month] + tod.tod_day - 1;
1489         utc = (utc << 3) + (utc << 4) + tod.tod_hour;   /* 24 * day + hour */
1490         utc = (utc << 6) - (utc << 2) + tod.tod_min;    /* 60 * hour + min */
1491         utc = (utc << 6) - (utc << 2) + tod.tod_sec;    /* 60 * min + sec */
1492
1493         return (utc);
1494 }