1 /* $NetBSD: trap.c,v 1.249 2009/11/06 18:18:57 dyoung Exp $ */
4 * Copyright (c) 1998, 2000, 2005, 2006, 2007, 2008 The NetBSD Foundation, Inc.
7 * This code is derived from software contributed to The NetBSD Foundation
8 * by Charles M. Hannum.
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29 * POSSIBILITY OF SUCH DAMAGE.
33 * Copyright (c) 1990 The Regents of the University of California.
34 * All rights reserved.
36 * This code is derived from software contributed to Berkeley by
37 * the University of Utah, and William Jolitz.
39 * Redistribution and use in source and binary forms, with or without
40 * modification, are permitted provided that the following conditions
42 * 1. Redistributions of source code must retain the above copyright
43 * notice, this list of conditions and the following disclaimer.
44 * 2. Redistributions in binary form must reproduce the above copyright
45 * notice, this list of conditions and the following disclaimer in the
46 * documentation and/or other materials provided with the distribution.
47 * 3. Neither the name of the University nor the names of its contributors
48 * may be used to endorse or promote products derived from this software
49 * without specific prior written permission.
51 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
52 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
53 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
54 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
55 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
56 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
57 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
58 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
59 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
60 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
63 * @(#)trap.c 7.4 (Berkeley) 5/13/91
67 * 386 Trap and System call handling
70 #include <sys/cdefs.h>
71 __KERNEL_RCSID(0, "$NetBSD: trap.c,v 1.249 2009/11/06 18:18:57 dyoung Exp $");
75 #include "opt_lockdebug.h"
76 #include "opt_multiprocessor.h"
78 #include "opt_kvm86.h"
79 #include "opt_kstack_dr0.h"
82 #include <sys/param.h>
83 #include <sys/systm.h>
86 #include <sys/kauth.h>
87 #include <sys/kernel.h>
90 #include <sys/signal.h>
91 #include <sys/syscall.h>
93 #include <sys/ucontext.h>
95 #include <sys/savar.h>
97 #include <uvm/uvm_extern.h>
99 #include <machine/cpufunc.h>
100 #include <machine/psl.h>
101 #include <machine/reg.h>
102 #include <machine/trap.h>
103 #include <machine/userret.h>
105 #include <machine/db_machdep.h>
110 #include <machine/mca_machdep.h>
118 #include <sys/kgdb.h>
123 static inline int xmm_si_code(struct lwp
*);
124 void trap(struct trapframe
*);
125 void trap_tss(struct i386tss
*, int, int);
128 #include <machine/kvm86.h>
129 #define KVM86MODE (kvm86_incall)
131 #define KVM86MODE (0)
134 const char * const trap_type
[] = {
135 "privileged instruction fault", /* 0 T_PRIVINFLT */
136 "breakpoint trap", /* 1 T_BPTFLT */
137 "arithmetic trap", /* 2 T_ARITHTRAP */
138 "asynchronous system trap", /* 3 T_ASTFLT */
139 "protection fault", /* 4 T_PROTFLT */
140 "trace trap", /* 5 T_TRCTRAP */
141 "page fault", /* 6 T_PAGEFLT */
142 "alignment fault", /* 7 T_ALIGNFLT */
143 "integer divide fault", /* 8 T_DIVIDE */
144 "non-maskable interrupt", /* 9 T_NMI */
145 "overflow trap", /* 10 T_OFLOW */
146 "bounds check fault", /* 11 T_BOUND */
147 "FPU not available fault", /* 12 T_DNA */
148 "double fault", /* 13 T_DOUBLEFLT */
149 "FPU operand fetch fault", /* 14 T_FPOPFLT */
150 "invalid TSS fault", /* 15 T_TSSFLT */
151 "segment not present fault", /* 16 T_SEGNPFLT */
152 "stack fault", /* 17 T_STKFLT */
153 "machine check fault", /* 18 T_MCA */
154 "SSE FP exception", /* 19 T_XMM */
155 "reserved trap", /* 20 T_RESERVED */
157 int trap_types
= __arraycount(trap_type
);
163 #define IDTVEC(name) __CONCAT(X, name)
166 trap_tss(struct i386tss
*tss
, int trapno
, int code
)
170 tf
.tf_gs
= tss
->tss_gs
;
171 tf
.tf_fs
= tss
->tss_fs
;
172 tf
.tf_es
= tss
->__tss_es
;
173 tf
.tf_ds
= tss
->__tss_ds
;
174 tf
.tf_edi
= tss
->__tss_edi
;
175 tf
.tf_esi
= tss
->__tss_esi
;
176 tf
.tf_ebp
= tss
->tss_ebp
;
177 tf
.tf_ebx
= tss
->__tss_ebx
;
178 tf
.tf_edx
= tss
->__tss_edx
;
179 tf
.tf_ecx
= tss
->__tss_ecx
;
180 tf
.tf_eax
= tss
->__tss_eax
;
181 tf
.tf_trapno
= trapno
;
182 tf
.tf_err
= code
| TC_TSS
;
183 tf
.tf_eip
= tss
->__tss_eip
;
184 tf
.tf_cs
= tss
->__tss_cs
;
185 tf
.tf_eflags
= tss
->__tss_eflags
;
186 tf
.tf_esp
= tss
->tss_esp
;
187 tf
.tf_ss
= tss
->__tss_ss
;
192 xmm_si_code(struct lwp
*l
)
195 uint32_t mxcsr
, mask
;
197 if (!i386_use_fxsave
) {
199 panic("SSE FP Exception, but no SSE");
204 mxcsr
= pcb
->pcb_savefpu
.sv_xmm
.sv_env
.en_mxcsr
;
207 * Since we only have a single status and control register,
208 * we use the exception mask bits to mask disabled exceptions
210 mask
= ~((mxcsr
& __INITIAL_MXCSR__
) >> 7) & 0xff;
211 switch (mask
& mxcsr
) {
221 case EN_SW_UNDERFLOW
:
223 case EN_SW_DATACHAIN
:
232 onfault_handler(const struct pcb
*pcb
, const struct trapframe
*tf
)
234 struct onfault_table
{
239 extern const struct onfault_table onfault_table
[];
240 const struct onfault_table
*p
;
243 if (pcb
->pcb_onfault
!= NULL
) {
244 return pcb
->pcb_onfault
;
248 for (p
= onfault_table
; p
->start
; p
++) {
249 if (p
->start
<= pc
&& pc
< p
->end
) {
257 trap_print(int type
, struct trapframe
*frame
)
259 if (frame
->tf_trapno
< trap_types
)
260 printf("fatal %s", trap_type
[frame
->tf_trapno
]);
262 printf("unknown trap %d", frame
->tf_trapno
);
263 printf(" in %s mode\n", (type
& T_USER
) ? "user" : "supervisor");
264 printf("trap type %d code %x eip %x cs %x eflags %x cr2 %lx ilevel %x\n",
265 type
, frame
->tf_err
, frame
->tf_eip
, frame
->tf_cs
,
266 frame
->tf_eflags
, (long)rcr2(), curcpu()->ci_ilevel
);
272 #ifdef KSTACK_CHECK_DR0
273 u_int mask
, dr6
= rdr6();
275 mask
= 1 << 0; /* dr0 */
277 panic("trap on DR0: maybe kernel stack overflow\n");
288 * trap(frame): exception, fault, and trap interface to BSD kernel.
290 * This common code is called from assembly language IDT gate entry routines
291 * that prepare a suitable stack frame, and restore this frame after the
292 * exception has been processed. Note that the effect is as if the arguments
293 * were passed call by reference.
296 trap(struct trapframe
*frame
)
298 struct lwp
*l
= curlwp
;
301 extern char fusubail
[], kcopy_fault
[], return_address_fault
[],
302 trapreturn
[], IDTVEC(osyscall
)[];
303 struct trapframe
*vframe
;
310 if (__predict_true(l
!= NULL
)) {
315 * this can happen eg. on break points in early on boot.
320 type
= frame
->tf_trapno
;
324 printf("trap %d code %x eip %x cs %x eflags %x cr2 %lx cpl %x\n",
325 type
, frame
->tf_err
, frame
->tf_eip
, frame
->tf_cs
,
326 frame
->tf_eflags
, rcr2(), curcpu()->ci_ilevel
);
327 printf("curlwp %p%s", curlwp
, curlwp
? " " : "\n");
329 printf("pid %d lid %d\n", l
->l_proc
->p_pid
, l
->l_lid
);
332 if (type
!= T_NMI
&& !KVM86MODE
&&
333 !KERNELMODE(frame
->tf_cs
, frame
->tf_eflags
)) {
335 l
->l_md
.md_regs
= frame
;
337 LWP_CACHE_CREDS(l
, p
);
350 if (type
== T_TRCTRAP
)
353 trap_print(type
, frame
);
355 if (kdb_trap(type
, 0, frame
))
359 if (kgdb_trap(type
, frame
))
363 * If this is a breakpoint, don't panic
364 * if we're not connected.
366 if (type
== T_BPTFLT
) {
367 printf("kgdb: ignored %s\n", trap_type
[type
]);
378 kvm86_gpfault(frame
);
387 /* Check for copyin/copyout fault. */
388 onfault
= onfault_handler(pcb
, frame
);
389 if (onfault
!= NULL
) {
393 frame
->tf_eip
= (uintptr_t)onfault
;
394 frame
->tf_eax
= error
;
399 * Check for failure during return to user mode.
401 * We do this by looking at the instruction we faulted on.
402 * The specific instructions we recognize only happen when
403 * returning from a trap, syscall, or interrupt.
405 * At this point, there are (at least) two trap frames on
406 * the kernel stack; we presume here that we faulted while
407 * loading our registers out of the outer one.
409 switch (*(u_char
*)frame
->tf_eip
) {
410 case 0xcf: /* iret */
411 vframe
= (void *)((int)&frame
->tf_esp
-
412 offsetof(struct trapframe
, tf_eip
));
415 switch (*(uint32_t *)frame
->tf_eip
) {
416 case 0x0c245c8e: /* movl 0xc(%esp,1),%ds */
417 case 0x0824448e: /* movl 0x8(%esp,1),%es */
418 case 0x0424648e: /* movl 0x4(%esp,1),%fs */
419 case 0x00246c8e: /* movl 0x0(%esp,1),%gs */
424 vframe
= (void *)(int)&frame
->tf_esp
;
429 if (KERNELMODE(vframe
->tf_cs
, vframe
->tf_eflags
))
433 * Arrange to signal the thread, which will reset its
434 * registers in the outer frame. This also allows us to
435 * capture the invalid register state in sigcontext,
436 * packaged up with the signal delivery. We restart
437 * on return at 'trapreturn', acting as if nothing
438 * happened, restarting the return to user with our new
441 * Clear PSL_NT. It can be set by userland because setting
442 * it isn't a privileged operation.
444 * Set PSL_I. Otherwise, if SIGSEGV is ignored, we'll
445 * continue to generate traps infinitely with
446 * interrupts disabled.
448 frame
->tf_ds
= GSEL(GDATA_SEL
, SEL_KPL
);
449 frame
->tf_es
= GSEL(GDATA_SEL
, SEL_KPL
);
450 frame
->tf_gs
= GSEL(GDATA_SEL
, SEL_KPL
);
451 frame
->tf_fs
= GSEL(GCPU_SEL
, SEL_KPL
);
452 frame
->tf_eip
= (uintptr_t)trapreturn
;
453 frame
->tf_eflags
= (frame
->tf_eflags
& ~PSL_NT
) | PSL_I
;
455 ksi
.ksi_signo
= SIGSEGV
;
456 ksi
.ksi_addr
= (void *)rcr2();
457 ksi
.ksi_code
= SEGV_ACCERR
;
458 ksi
.ksi_trap
= type
& ~T_USER
;
459 (*p
->p_emul
->e_trapsignal
)(l
, &ksi
);
462 case T_PROTFLT
|T_USER
: /* protection fault */
463 case T_TSSFLT
|T_USER
:
464 case T_SEGNPFLT
|T_USER
:
465 case T_STKFLT
|T_USER
:
466 case T_ALIGNFLT
|T_USER
:
469 ksi
.ksi_addr
= (void *)rcr2();
471 case T_SEGNPFLT
|T_USER
:
472 case T_STKFLT
|T_USER
:
473 ksi
.ksi_signo
= SIGBUS
;
474 ksi
.ksi_code
= BUS_ADRERR
;
476 case T_TSSFLT
|T_USER
:
477 ksi
.ksi_signo
= SIGBUS
;
478 ksi
.ksi_code
= BUS_OBJERR
;
480 case T_ALIGNFLT
|T_USER
:
481 ksi
.ksi_signo
= SIGBUS
;
482 ksi
.ksi_code
= BUS_ADRALN
;
484 case T_PROTFLT
|T_USER
:
486 if (frame
->tf_eflags
& PSL_VM
) {
487 vm86_gpfault(l
, type
& ~T_USER
);
492 * If pmap_exec_fixup does something,
493 * let's retry the trap.
495 if (pmap_exec_fixup(&p
->p_vmspace
->vm_map
, frame
,
499 ksi
.ksi_signo
= SIGSEGV
;
500 ksi
.ksi_code
= SEGV_ACCERR
;
508 case T_PRIVINFLT
|T_USER
: /* privileged instruction fault */
509 case T_FPOPFLT
|T_USER
: /* coprocessor operand fault */
511 ksi
.ksi_signo
= SIGILL
;
512 ksi
.ksi_addr
= (void *)rcr2();
514 case T_PRIVINFLT
|T_USER
:
515 ksi
.ksi_code
= ILL_PRVOPC
;
517 case T_FPOPFLT
|T_USER
:
518 ksi
.ksi_code
= ILL_COPROC
;
526 case T_ASTFLT
|T_USER
:
527 /* Allow process switch. */
529 if (l
->l_pflag
& LP_OWEUPC
) {
530 l
->l_pflag
&= ~LP_OWEUPC
;
533 /* Allow a forced task switch. */
534 if (curcpu()->ci_want_resched
) {
541 ksi
.ksi_signo
= SIGKILL
;
542 ksi
.ksi_addr
= (void *)frame
->tf_eip
;
543 printf("pid %d killed due to lack of floating point\n",
551 case T_DIVIDE
|T_USER
:
552 case T_ARITHTRAP
|T_USER
:
554 ksi
.ksi_signo
= SIGFPE
;
555 ksi
.ksi_addr
= (void *)frame
->tf_eip
;
558 ksi
.ksi_code
= xmm_si_code(l
);
562 ksi
.ksi_code
= FPE_FLTOVF
;
564 case T_DIVIDE
|T_USER
:
565 ksi
.ksi_code
= FPE_FLTDIV
;
567 case T_ARITHTRAP
|T_USER
:
568 ksi
.ksi_code
= FPE_INTOVF
;
577 /* Allow page faults in kernel mode. */
578 if (__predict_false(l
== NULL
))
582 * fusubail is used by [fs]uswintr() to prevent page faulting
583 * from inside the profiling interrupt.
585 onfault
= pcb
->pcb_onfault
;
586 if (onfault
== fusubail
|| onfault
== return_address_fault
) {
589 if (cpu_intr_p() || (l
->l_pflag
& LP_INTR
) != 0) {
596 case T_PAGEFLT
|T_USER
: { /* page fault */
598 register struct vmspace
*vm
;
599 register struct vm_map
*map
;
601 extern struct vm_map
*kernel_map
;
604 if (l
->l_flag
& LW_SA
) {
605 l
->l_savp
->savp_faultaddr
= (vaddr_t
)cr2
;
606 l
->l_pflag
|= LP_SA_PAGEFAULT
;
610 if (__predict_false(vm
== NULL
)) {
614 va
= trunc_page((vaddr_t
)cr2
);
616 * It is only a kernel address space fault iff:
617 * 1. (type & T_USER) == 0 and
618 * 2. pcb_onfault not set or
619 * 3. pcb_onfault set but supervisor space fault
620 * The last can occur during an exec() copyin where the
621 * argument space is lazy-allocated.
623 if (type
== T_PAGEFLT
&& va
>= KERNBASE
)
627 if (frame
->tf_err
& PGEX_W
)
628 ftype
= VM_PROT_WRITE
;
630 ftype
= VM_PROT_READ
;
633 if (map
== kernel_map
&& va
== 0) {
634 printf("trap: bad kernel access at %lx\n", va
);
638 /* Fault the original page in. */
639 onfault
= pcb
->pcb_onfault
;
640 pcb
->pcb_onfault
= NULL
;
641 error
= uvm_fault(map
, va
, ftype
);
642 pcb
->pcb_onfault
= onfault
;
644 if (map
!= kernel_map
&& (void *)va
>= vm
->vm_maxsaddr
)
648 while (type
== T_PAGEFLT
) {
650 * we need to switch pmap now if we're in
651 * the middle of copyin/out.
653 * but we don't need to do so for kcopy as
654 * it never touch userspace.
657 if (curcpu()->ci_want_pmapload
) {
658 onfault
= onfault_handler(pcb
, frame
);
659 if (onfault
!= kcopy_fault
) {
664 * We need to keep the pmap loaded and
665 * so avoid being preempted until back
666 * into the copy functions. Disable
667 * interrupts at the hardware level before
668 * re-enabling preemption. Interrupts
669 * will be re-enabled by 'iret' when
670 * returning back out of the trap stub.
671 * They'll only be re-enabled when the
672 * program counter is once again in
673 * the copy functions, and so visible
674 * to cpu_kpreempt_exit().
680 if (l
->l_nopreempt
> 0 || !l
->l_dopreempt
||
688 * If preemption fails for some reason,
689 * don't retry it. The conditions won't
690 * change under our nose.
694 l
->l_pflag
&= ~LP_SA_PAGEFAULT
;
698 ksi
.ksi_trap
= type
& ~T_USER
;
699 ksi
.ksi_addr
= (void *)cr2
;
700 if (error
== EACCES
) {
701 ksi
.ksi_code
= SEGV_ACCERR
;
704 ksi
.ksi_code
= SEGV_MAPERR
;
707 if (type
== T_PAGEFLT
) {
708 onfault
= onfault_handler(pcb
, frame
);
711 printf("uvm_fault(%p, %#lx, %d) -> %#x\n",
712 map
, va
, ftype
, error
);
715 if (error
== ENOMEM
) {
716 ksi
.ksi_signo
= SIGKILL
;
717 printf("UVM: pid %d (%s), uid %d killed: out of swap\n",
720 kauth_cred_geteuid(l
->l_cred
) : -1);
722 ksi
.ksi_signo
= SIGSEGV
;
724 (*p
->p_emul
->e_trapsignal
)(l
, &ksi
);
725 l
->l_pflag
&= ~LP_SA_PAGEFAULT
;
730 /* Check whether they single-stepped into a lcall. */
731 if (frame
->tf_eip
== (int)IDTVEC(osyscall
))
733 if (frame
->tf_eip
== (int)IDTVEC(osyscall
) + 1) {
734 frame
->tf_eflags
&= ~PSL_T
;
739 case T_BPTFLT
|T_USER
: /* bpt instruction fault */
740 case T_TRCTRAP
|T_USER
: /* trace trap */
742 * Don't go single-stepping into a RAS.
744 if (p
->p_raslist
== NULL
||
745 (ras_lookup(p
, (void *)frame
->tf_eip
) == (void *)-1)) {
747 ksi
.ksi_signo
= SIGTRAP
;
748 ksi
.ksi_trap
= type
& ~T_USER
;
749 if (type
== (T_BPTFLT
|T_USER
))
750 ksi
.ksi_code
= TRAP_BRKPT
;
752 ksi
.ksi_code
= TRAP_TRACE
;
753 ksi
.ksi_addr
= (void *)frame
->tf_eip
;
754 (*p
->p_emul
->e_trapsignal
)(l
, &ksi
);
760 if (nmi_dispatch(frame
))
762 #if (NISA > 0 || NMCA > 0)
763 #if defined(KGDB) || defined(DDB)
764 /* NMI can be hooked up to a pushbutton for debugging */
765 printf ("NMI ... going to debugger\n");
768 if (kgdb_trap(type
, frame
))
772 if (kdb_trap(type
, 0, frame
))
775 #endif /* KGDB || DDB */
776 /* machine/parity/power fail/"kitchen sink" faults */
779 /* mca_nmi() takes care to call x86_nmi() if appropriate */
789 #endif /* NMCA > 0 */
790 #endif /* (NISA > 0 || NMCA > 0) */
791 #endif /* !defined(XEN) */
792 ; /* avoid a label at end of compound statement */
795 if ((type
& T_USER
) == 0)
801 ksi
.ksi_trap
= type
& ~T_USER
;
802 (*p
->p_emul
->e_trapsignal
)(l
, &ksi
);
807 * startlwp: start of a new LWP.
812 ucontext_t
*uc
= arg
;
816 error
= cpu_setmcontext(l
, &uc
->uc_mcontext
, uc
->uc_flags
);
818 pool_put(&lwp_uc_pool
, uc
);
823 * XXX_SA: This is a terrible name.
826 upcallret(struct lwp
*l
)
828 KERNEL_UNLOCK_LAST(l
);