sys/arch/i386/isa/npx.c

   1 /*      $NetBSD: npx.c,v 1.134 2008/11/25 21:53:50 bouyer Exp $ */
   2
   3 /*-
   4  * Copyright (c) 2008 The NetBSD Foundation, Inc.
   5  * All rights reserved.
   6  *
   7  * This code is derived from software developed for The NetBSD Foundation
   8  * by Andrew Doran.
   9  *
  10  * Redistribution and use in source and binary forms, with or without
  11  * modification, are permitted provided that the following conditions
  12  * are met:
  13  * 1. Redistributions of source code must retain the above copyright
  14  *    notice, this list of conditions and the following disclaimer.
  15  * 2. Redistributions in binary form must reproduce the above copyright
  16  *    notice, this list of conditions and the following disclaimer in the
  17  *    documentation and/or other materials provided with the distribution.
  18  *
  19  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
  20  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
  21  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
  22  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
  23  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  24  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  25  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  26  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  27  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  28  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
  29  * POSSIBILITY OF SUCH DAMAGE.
  30  */
  31
  32 /*-
  33  * Copyright (c) 1991 The Regents of the University of California.
  34  * All rights reserved.
  35  *
  36  * Redistribution and use in source and binary forms, with or without
  37  * modification, are permitted provided that the following conditions
  38  * are met:
  39  * 1. Redistributions of source code must retain the above copyright
  40  *    notice, this list of conditions and the following disclaimer.
  41  * 2. Redistributions in binary form must reproduce the above copyright
  42  *    notice, this list of conditions and the following disclaimer in the
  43  *    documentation and/or other materials provided with the distribution.
  44  * 3. Neither the name of the University nor the names of its contributors
  45  *    may be used to endorse or promote products derived from this software
  46  *    without specific prior written permission.
  47  *
  48  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  49  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  50  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  51  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  52  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  53  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  54  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  55  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  56  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  57  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  58  * SUCH DAMAGE.
  59  *
  60  *      @(#)npx.c       7.2 (Berkeley) 5/12/91
  61  */
  62
  63 /*-
  64  * Copyright (c) 1994, 1995, 1998 Charles M. Hannum.  All rights reserved.
  65  * Copyright (c) 1990 William Jolitz.
  66  *
  67  * Redistribution and use in source and binary forms, with or without
  68  * modification, are permitted provided that the following conditions
  69  * are met:
  70  * 1. Redistributions of source code must retain the above copyright
  71  *    notice, this list of conditions and the following disclaimer.
  72  * 2. Redistributions in binary form must reproduce the above copyright
  73  *    notice, this list of conditions and the following disclaimer in the
  74  *    documentation and/or other materials provided with the distribution.
  75  * 3. All advertising materials mentioning features or use of this software
  76  *    must display the following acknowledgement:
  77  *      This product includes software developed by the University of
  78  *      California, Berkeley and its contributors.
  79  * 4. Neither the name of the University nor the names of its contributors
  80  *    may be used to endorse or promote products derived from this software
  81  *    without specific prior written permission.
  82  *
  83  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  84  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  85  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  86  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  87  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  88  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  89  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  90  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  91  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  92  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  93  * SUCH DAMAGE.
  94  *
  95  *      @(#)npx.c       7.2 (Berkeley) 5/12/91
  96  */
  97
  98 #include <sys/cdefs.h>
  99 __KERNEL_RCSID(0, "$NetBSD: npx.c,v 1.134 2008/11/25 21:53:50 bouyer Exp $");
 100
 101 #if 0
 102 #define IPRINTF(x)      printf x
 103 #else
 104 #define IPRINTF(x)
 105 #endif
 106
 107 #include "opt_multiprocessor.h"
 108 #include "opt_xen.h"
 109
 110 #include <sys/param.h>
 111 #include <sys/systm.h>
 112 #include <sys/conf.h>
 113 #include <sys/file.h>
 114 #include <sys/proc.h>
 115 #include <sys/ioctl.h>
 116 #include <sys/device.h>
 117 #include <sys/vmmeter.h>
 118 #include <sys/kernel.h>
 119 #include <sys/bus.h>
 120 #include <sys/cpu.h>
 121 #include <sys/intr.h>
 122
 123 #include <uvm/uvm_extern.h>
 124
 125 #include <machine/cpufunc.h>
 126 #include <machine/pcb.h>
 127 #include <machine/trap.h>
 128 #include <machine/specialreg.h>
 129 #include <machine/pio.h>
 130 #include <machine/i8259.h>
 131
 132 #include <dev/isa/isareg.h>
 133 #include <dev/isa/isavar.h>
 134
 135 #include <i386/isa/npxvar.h>
 136
 137 /*
 138  * 387 and 287 Numeric Coprocessor Extension (NPX) Driver.
 139  *
 140  * We do lazy initialization and switching using the TS bit in cr0 and the
 141  * MDL_USEDFPU bit in mdlwp.
 142  *
 143  * DNA exceptions are handled like this:
 144  *
 145  * 1) If there is no NPX, return and go to the emulator.
 146  * 2) If someone else has used the NPX, save its state into that process's PCB.
 147  * 3a) If MDL_USEDFPU is not set, set it and initialize the NPX.
 148  * 3b) Otherwise, reload the process's previous NPX state.
 149  *
 150  * When a process is created or exec()s, its saved cr0 image has the TS bit
 151  * set and the MDL_USEDFPU bit clear.  The MDL_USEDFPU bit is set when the
 152  * process first gets a DNA and the NPX is initialized.  The TS bit is turned
 153  * off when the NPX is used, and turned on again later when the process's NPX
 154  * state is saved.
 155  */
 156
 157 static int      x86fpflags_to_ksiginfo(uint32_t flags);
 158 static int      npxdna(struct cpu_info *);
 159
 160 #ifdef XEN
 161 #define clts()
 162 #define stts()
 163 #endif
 164
 165 static  enum npx_type           npx_type;
 166 volatile u_int                  npx_intrs_while_probing;
 167 volatile u_int                  npx_traps_while_probing;
 168
 169 extern int i386_fpu_present;
 170 extern int i386_fpu_exception;
 171 extern int i386_fpu_fdivbug;
 172
 173 struct npx_softc                *npx_softc;
 174
 175 static inline void
 176 fpu_save(union savefpu *addr)
 177 {
 178         if (i386_use_fxsave)
 179         {
 180                 fxsave(&addr->sv_xmm);
 181
 182                 /* FXSAVE doesn't FNINIT like FNSAVE does -- so do it here. */
 183                 fninit();
 184         } else
 185                 fnsave(&addr->sv_87);
 186 }
 187
 188 static int
 189 npxdna_empty(struct cpu_info *ci)
 190 {
 191
 192 #ifndef XEN
 193         panic("npxdna vector not initialized");
 194 #endif
 195         return 0;
 196 }
 197
 198
 199 int    (*npxdna_func)(struct cpu_info *) = npxdna_empty;
 200
 201 #ifndef XEN
 202 /*
 203  * This calls i8259_* directly, but currently we can count on systems
 204  * having a i8259 compatible setup all the time. Maybe have to change
 205  * that in the future.
 206  */
 207 enum npx_type
 208 npxprobe1(bus_space_tag_t iot, bus_space_handle_t ioh, int irq)
 209 {
 210         struct gate_descriptor save_idt_npxintr;
 211         struct gate_descriptor save_idt_npxtrap;
 212         enum npx_type rv = NPX_NONE;
 213         u_long  save_eflags;
 214         int control;
 215         int status;
 216         unsigned irqmask;
 217
 218         if (cpu_feature & CPUID_FPU) {
 219                 i386_fpu_exception = 1;
 220                 return NPX_CPUID;
 221         }
 222         save_eflags = x86_read_psl();
 223         x86_disable_intr();
 224         save_idt_npxintr = idt[NRSVIDT + irq];
 225         save_idt_npxtrap = idt[16];
 226         setgate(&idt[NRSVIDT + irq], probeintr, 0, SDT_SYS386IGT, SEL_KPL,
 227             GSEL(GCODE_SEL, SEL_KPL));
 228         setgate(&idt[16], probetrap, 0, SDT_SYS386TGT, SEL_KPL,
 229             GSEL(GCODE_SEL, SEL_KPL));
 230
 231         irqmask = i8259_setmask(~((1 << IRQ_SLAVE) | (1 << irq)));
 232
 233         /*
 234          * Partially reset the coprocessor, if any.  Some BIOS's don't reset
 235          * it after a warm boot.
 236          */
 237         /* full reset on some systems, NOP on others */
 238         bus_space_write_1(iot, ioh, 1, 0);
 239         delay(1000);
 240         /* clear BUSY# latch */
 241         bus_space_write_1(iot, ioh, 0, 0);
 242
 243         /*
 244          * We set CR0 in locore to trap all ESC and WAIT instructions.
 245          * We have to turn off the CR0_EM bit temporarily while probing.
 246          */
 247         lcr0(rcr0() & ~(CR0_EM|CR0_TS));
 248         x86_enable_intr();
 249
 250         /*
 251          * Finish resetting the coprocessor, if any.  If there is an error
 252          * pending, then we may get a bogus IRQ13, but probeintr() will handle
 253          * it OK.  Bogus halts have never been observed, but we enabled
 254          * IRQ13 and cleared the BUSY# latch early to handle them anyway.
 255          */
 256         fninit();
 257         delay(1000);            /* wait for any IRQ13 (fwait might hang) */
 258
 259         /*
 260          * Check for a status of mostly zero.
 261          */
 262         status = 0x5a5a;
 263         fnstsw(&status);
 264         if ((status & 0xb8ff) == 0) {
 265                 /*
 266                  * Good, now check for a proper control word.
 267                  */
 268                 control = 0x5a5a;
 269                 fnstcw(&control);
 270                 if ((control & 0x1f3f) == 0x033f) {
 271                         /*
 272                          * We have an npx, now divide by 0 to see if exception
 273                          * 16 works.
 274                          */
 275                         control &= ~(1 << 2);   /* enable divide by 0 trap */
 276                         fldcw(&control);
 277                         npx_traps_while_probing = npx_intrs_while_probing = 0;
 278                         fp_divide_by_0();
 279                         if (npx_traps_while_probing != 0) {
 280                                 /*
 281                                  * Good, exception 16 works.
 282                                  */
 283                                 rv = NPX_EXCEPTION;
 284                                 i386_fpu_exception = 1;
 285                         } else if (npx_intrs_while_probing != 0) {
 286                                 /*
 287                                  * Bad, we are stuck with IRQ13.
 288                                  */
 289                                 rv = NPX_INTERRUPT;
 290                         } else {
 291                                 /*
 292                                  * Worse, even IRQ13 is broken.  Use emulator.
 293                                  */
 294                                 rv = NPX_BROKEN;
 295                         }
 296                 }
 297         }
 298
 299         x86_disable_intr();
 300         lcr0(rcr0() | (CR0_EM|CR0_TS));
 301
 302         irqmask = i8259_setmask(irqmask);
 303
 304         idt[NRSVIDT + irq] = save_idt_npxintr;
 305
 306         idt[16] = save_idt_npxtrap;
 307         x86_write_psl(save_eflags);
 308
 309         return (rv);
 310 }
 311
 312 void npxinit(struct cpu_info *ci)
 313 {
 314         lcr0(rcr0() & ~(CR0_EM|CR0_TS));
 315         fninit();
 316         if (npx586bug1(4195835, 3145727) != 0) {
 317                 i386_fpu_fdivbug = 1;
 318                 aprint_normal_dev(ci->ci_dev,
 319                     "WARNING: Pentium FDIV bug detected!\n");
 320         }
 321         lcr0(rcr0() | (CR0_TS));
 322 }
 323 #endif
 324
 325 /*
 326  * Common attach routine.
 327  */
 328 void
 329 npxattach(struct npx_softc *sc)
 330 {
 331
 332         npx_softc = sc;
 333         npx_type = sc->sc_type;
 334
 335 #ifndef XEN
 336         npxinit(&cpu_info_primary);
 337 #endif
 338         i386_fpu_present = 1;
 339         npxdna_func = npxdna;
 340
 341         if (!pmf_device_register(sc->sc_dev, NULL, NULL))
 342                 aprint_error_dev(sc->sc_dev, "couldn't establish power handler\n");
 343 }
 344
 345 int
 346 npxdetach(device_t self, int flags)
 347 {
 348         struct npx_softc *sc = device_private(self);
 349
 350         if (sc->sc_type == NPX_INTERRUPT)
 351                 return EBUSY;
 352
 353         pmf_device_deregister(self);
 354
 355         return 0;
 356 }
 357
 358 /*
 359  * Record the FPU state and reinitialize it all except for the control word.
 360  * Then generate a SIGFPE.
 361  *
 362  * Reinitializing the state allows naive SIGFPE handlers to longjmp without
 363  * doing any fixups.
 364  *
 365  * XXX there is currently no way to pass the full error state to signal
 366  * handlers, and if this is a nested interrupt there is no way to pass even
 367  * a status code!  So there is no way to have a non-naive SIGFPE handler.  At
 368  * best a handler could do an fninit followed by an fldcw of a static value.
 369  * fnclex would be of little use because it would leave junk on the FPU stack.
 370  * Returning from the handler would be even less safe than usual because
 371  * IRQ13 exception handling makes exceptions even less precise than usual.
 372  */
 373 int
 374 npxintr(void *arg, struct intrframe *frame)
 375 {
 376         struct cpu_info *ci = curcpu();
 377         struct lwp *l = ci->ci_fpcurlwp;
 378         union savefpu *addr;
 379         struct npx_softc *sc;
 380         struct pcb *pcb;
 381         ksiginfo_t ksi;
 382
 383         sc = npx_softc;
 384
 385         kpreempt_disable();
 386 #ifndef XEN
 387         KASSERT((x86_read_psl() & PSL_I) == 0);
 388         x86_enable_intr();
 389 #endif
 390
 391         uvmexp.traps++;
 392         IPRINTF(("%s: fp intr\n", device_xname(ci->ci_dev)));
 393
 394 #ifndef XEN
 395         /*
 396          * Clear the interrupt latch.
 397          */
 398         bus_space_write_1(sc->sc_iot, sc->sc_ioh, 0, 0);
 399 #endif
 400
 401         /*
 402          * If we're saving, ignore the interrupt.  The FPU will generate
 403          * another one when we restore the state later.
 404          */
 405         if (ci->ci_fpsaving) {
 406                 kpreempt_enable();
 407                 return (1);
 408         }
 409
 410         if (l == NULL || npx_type == NPX_NONE) {
 411                 printf("npxintr: l = %p, curproc = %p, npx_type = %d\n",
 412                     l, curproc, npx_type);
 413                 printf("npxintr: came from nowhere");
 414                 kpreempt_enable();
 415                 return 1;
 416         }
 417
 418         /*
 419          * At this point, fpcurlwp should be curlwp.  If it wasn't, the TS
 420          * bit should be set, and we should have gotten a DNA exception.
 421          */
 422         KASSERT(l == curlwp);
 423         pcb = lwp_getpcb(l);
 424
 425         /*
 426          * Find the address of fpcurproc's saved FPU state.  (Given the
 427          * invariant above, this is always the one in curpcb.)
 428          */
 429         addr = &pcb->pcb_savefpu;
 430
 431         /*
 432          * Save state.  This does an implied fninit.  It had better not halt
 433          * the CPU or we'll hang.
 434          */
 435         fpu_save(addr);
 436         fwait();
 437         if (i386_use_fxsave) {
 438                 fldcw(&addr->sv_xmm.sv_env.en_cw);
 439                 /*
 440                  * FNINIT doesn't affect MXCSR or the XMM registers;
 441                  * no need to re-load MXCSR here.
 442                  */
 443         } else
 444                 fldcw(&addr->sv_87.sv_env.en_cw);
 445         fwait();
 446         /*
 447          * Remember the exception status word and tag word.  The current
 448          * (almost fninit'ed) fpu state is in the fpu and the exception
 449          * state just saved will soon be junk.  However, the implied fninit
 450          * doesn't change the error pointers or register contents, and we
 451          * preserved the control word and will copy the status and tag
 452          * words, so the complete exception state can be recovered.
 453          */
 454         if (i386_use_fxsave) {
 455                 addr->sv_xmm.sv_ex_sw = addr->sv_xmm.sv_env.en_sw;
 456                 addr->sv_xmm.sv_ex_tw = addr->sv_xmm.sv_env.en_tw;
 457         } else {
 458                 addr->sv_87.sv_ex_sw = addr->sv_87.sv_env.en_sw;
 459                 addr->sv_87.sv_ex_tw = addr->sv_87.sv_env.en_tw;
 460         }
 461         /*
 462          * Pass exception to process.
 463          */
 464         if (USERMODE(frame->if_cs, frame->if_eflags)) {
 465                 /*
 466                  * Interrupt is essentially a trap, so we can afford to call
 467                  * the SIGFPE handler (if any) as soon as the interrupt
 468                  * returns.
 469                  *
 470                  * XXX little or nothing is gained from this, and plenty is
 471                  * lost - the interrupt frame has to contain the trap frame
 472                  * (this is otherwise only necessary for the rescheduling trap
 473                  * in doreti, and the frame for that could easily be set up
 474                  * just before it is used).
 475                  */
 476                 l->l_md.md_regs = (struct trapframe *)&frame->if_gs;
 477
 478                 KSI_INIT_TRAP(&ksi);
 479                 ksi.ksi_signo = SIGFPE;
 480                 ksi.ksi_addr = (void *)frame->if_eip;
 481
 482                 /*
 483                  * Encode the appropriate code for detailed information on
 484                  * this exception.
 485                  */
 486
 487                 if (i386_use_fxsave) {
 488                         ksi.ksi_code =
 489                                 x86fpflags_to_ksiginfo(addr->sv_xmm.sv_ex_sw);
 490                         ksi.ksi_trap = (int)addr->sv_xmm.sv_ex_sw;
 491                 } else {
 492                         ksi.ksi_code =
 493                                 x86fpflags_to_ksiginfo(addr->sv_87.sv_ex_sw);
 494                         ksi.ksi_trap = (int)addr->sv_87.sv_ex_sw;
 495                 }
 496
 497                 trapsignal(l, &ksi);
 498         } else {
 499                 /*
 500                  * This is a nested interrupt.  This should only happen when
 501                  * an IRQ13 occurs at the same time as a higher-priority
 502                  * interrupt.
 503                  *
 504                  * XXX
 505                  * Currently, we treat this like an asynchronous interrupt, but
 506                  * this has disadvantages.
 507                  */
 508                 psignal(l->l_proc, SIGFPE);
 509         }
 510
 511         kpreempt_enable();
 512         return (1);
 513 }
 514
 515 /* map x86 fp flags to ksiginfo fp codes                */
 516 /* see table 8-4 of the IA-32 Intel Architecture        */
 517 /* Software Developer's Manual, Volume 1                */
 518 /* XXX punting on the stack fault with FLTINV           */
 519 static int
 520 x86fpflags_to_ksiginfo(uint32_t flags)
 521 {
 522         int i;
 523         static int x86fp_ksiginfo_table[] = {
 524                 FPE_FLTINV, /* bit 0 - invalid operation */
 525                 FPE_FLTRES, /* bit 1 - denormal operand */
 526                 FPE_FLTDIV, /* bit 2 - divide by zero   */
 527                 FPE_FLTOVF, /* bit 3 - fp overflow      */
 528                 FPE_FLTUND, /* bit 4 - fp underflow     */
 529                 FPE_FLTRES, /* bit 5 - fp precision     */
 530                 FPE_FLTINV, /* bit 6 - stack fault      */
 531         };
 532
 533         for(i=0;i < sizeof(x86fp_ksiginfo_table)/sizeof(int); i++) {
 534                 if (flags & (1 << i))
 535                         return(x86fp_ksiginfo_table[i]);
 536         }
 537         /* punt if flags not set */
 538         return(0);
 539 }
 540
 541 /*
 542  * Implement device not available (DNA) exception
 543  *
 544  * If we were the last lwp to use the FPU, we can simply return.
 545  * Otherwise, we save the previous state, if necessary, and restore
 546  * our last saved state.
 547  */
 548 static int
 549 npxdna(struct cpu_info *ci)
 550 {
 551         struct lwp *l, *fl;
 552         struct pcb *pcb;
 553         int s;
 554
 555         if (ci->ci_fpsaving) {
 556                 /* Recursive trap. */
 557                 return 1;
 558         }
 559
 560         /* Lock out IPIs and disable preemption. */
 561         s = splhigh();
 562 #ifndef XEN
 563         x86_enable_intr();
 564 #endif
 565         /* Save state on current CPU. */
 566         l = ci->ci_curlwp;
 567         pcb = lwp_getpcb(l);
 568
 569         fl = ci->ci_fpcurlwp;
 570         if (fl != NULL) {
 571                 /*
 572                  * It seems we can get here on Xen even if we didn't
 573                  * switch lwp.  In this case do nothing
 574                  */
 575                 if (fl == l) {
 576                         KASSERT(pcb->pcb_fpcpu == ci);
 577                         ci->ci_fpused = 1;
 578                         clts();
 579                         splx(s);
 580                         return 1;
 581                 }
 582                 KASSERT(fl != l);
 583                 npxsave_cpu(true);
 584                 KASSERT(ci->ci_fpcurlwp == NULL);
 585         }
 586
 587         /* Save our state if on a remote CPU. */
 588         if (pcb->pcb_fpcpu != NULL) {
 589                 /* Explicitly disable preemption before dropping spl. */
 590                 KPREEMPT_DISABLE(l);
 591                 splx(s);
 592                 npxsave_lwp(l, true);
 593                 KASSERT(pcb->pcb_fpcpu == NULL);
 594                 s = splhigh();
 595                 KPREEMPT_ENABLE(l);
 596         }
 597
 598         /*
 599          * Restore state on this CPU, or initialize.  Ensure that
 600          * the entire update is atomic with respect to FPU-sync IPIs.
 601          */
 602         clts();
 603         ci->ci_fpcurlwp = l;
 604         pcb->pcb_fpcpu = ci;
 605         ci->ci_fpused = 1;
 606
 607         if ((l->l_md.md_flags & MDL_USEDFPU) == 0) {
 608                 fninit();
 609                 if (i386_use_fxsave) {
 610                         fldcw(&pcb->pcb_savefpu.
 611                             sv_xmm.sv_env.en_cw);
 612                 } else {
 613                         fldcw(&pcb->pcb_savefpu.
 614                             sv_87.sv_env.en_cw);
 615                 }
 616                 l->l_md.md_flags |= MDL_USEDFPU;
 617         } else if (i386_use_fxsave) {
 618                 /*
 619                  * AMD FPU's do not restore FIP, FDP, and FOP on fxrstor,
 620                  * leaking other process's execution history. Clear them
 621                  * manually.
 622                  */
 623                 static const double zero = 0.0;
 624                 int status;
 625                 /*
 626                  * Clear the ES bit in the x87 status word if it is currently
 627                  * set, in order to avoid causing a fault in the upcoming load.
 628                  */
 629                 fnstsw(&status);
 630                 if (status & 0x80)
 631                         fnclex();
 632                 /*
 633                  * Load the dummy variable into the x87 stack.  This mangles
 634                  * the x87 stack, but we don't care since we're about to call
 635                  * fxrstor() anyway.
 636                  */
 637                 fldummy(&zero);
 638                 fxrstor(&pcb->pcb_savefpu.sv_xmm);
 639         } else {
 640                 frstor(&pcb->pcb_savefpu.sv_87);
 641         }
 642
 643         KASSERT(ci == curcpu());
 644         splx(s);
 645         return 1;
 646 }
 647
 648 /*
 649  * Save current CPU's FPU state.  Must be called at IPL_HIGH.
 650  */
 651 void
 652 npxsave_cpu(bool save)
 653 {
 654         struct cpu_info *ci;
 655         struct lwp *l;
 656         struct pcb *pcb;
 657
 658         KASSERT(curcpu()->ci_ilevel == IPL_HIGH);
 659
 660         ci = curcpu();
 661         l = ci->ci_fpcurlwp;
 662         if (l == NULL)
 663                 return;
 664
 665         pcb = lwp_getpcb(l);
 666
 667         if (save) {
 668                  /*
 669                   * Set ci->ci_fpsaving, so that any pending exception will
 670                   * be thrown away.  It will be caught again if/when the
 671                   * FPU state is restored.
 672                   */
 673                 KASSERT(ci->ci_fpsaving == 0);
 674                 clts();
 675                 ci->ci_fpsaving = 1;
 676                 if (i386_use_fxsave) {
 677                         fxsave(&pcb->pcb_savefpu.sv_xmm);
 678                 } else {
 679                         fnsave(&pcb->pcb_savefpu.sv_87);
 680                 }
 681                 ci->ci_fpsaving = 0;
 682         }
 683
 684         stts();
 685         pcb->pcb_fpcpu = NULL;
 686         ci->ci_fpcurlwp = NULL;
 687         ci->ci_fpused = 1;
 688 }
 689
 690 /*
 691  * Save l's FPU state, which may be on this processor or another processor.
 692  * It may take some time, so we avoid disabling preemption where possible.
 693  * Caller must know that the target LWP is stopped, otherwise this routine
 694  * may race against it.
 695  */
 696 void
 697 npxsave_lwp(struct lwp *l, bool save)
 698 {
 699         struct cpu_info *oci;
 700         struct pcb *pcb;
 701         int s, spins, ticks;
 702
 703         spins = 0;
 704         ticks = hardclock_ticks;
 705         for (;;) {
 706                 s = splhigh();
 707                 pcb = lwp_getpcb(l);
 708                 oci = pcb->pcb_fpcpu;
 709                 if (oci == NULL) {
 710                         splx(s);
 711                         break;
 712                 }
 713                 if (oci == curcpu()) {
 714                         KASSERT(oci->ci_fpcurlwp == l);
 715                         npxsave_cpu(save);
 716                         splx(s);
 717                         break;
 718                 }
 719                 splx(s);
 720                 x86_send_ipi(oci, X86_IPI_SYNCH_FPU);
 721                 while (pcb->pcb_fpcpu == oci &&
 722                     ticks == hardclock_ticks) {
 723                         x86_pause();
 724                         spins++;
 725                 }
 726                 if (spins > 100000000) {
 727                         panic("npxsave_lwp: did not");
 728                 }
 729         }
 730
 731         if (!save) {
 732                 /* Ensure we restart with a clean slate. */
 733                 l->l_md.md_flags &= ~MDL_USEDFPU;
 734         }
 735 }