Fix up mix of man(7)/mdoc(7).
[netbsd-mini2440.git] / sys / compat / linux / arch / i386 / linux_machdep.c
blob4abf5ac18763007dc6a520dbc242e37ed9d87811
1 /* $NetBSD: linux_machdep.c,v 1.146 2009/11/23 00:46:06 rmind Exp $ */
3 /*-
4 * Copyright (c) 1995, 2000, 2008, 2009 The NetBSD Foundation, Inc.
5 * All rights reserved.
7 * This code is derived from software contributed to The NetBSD Foundation
8 * by Frank van der Linden, and by Andrew Doran.
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29 * POSSIBILITY OF SUCH DAMAGE.
32 #include <sys/cdefs.h>
33 __KERNEL_RCSID(0, "$NetBSD: linux_machdep.c,v 1.146 2009/11/23 00:46:06 rmind Exp $");
35 #if defined(_KERNEL_OPT)
36 #include "opt_vm86.h"
37 #include "opt_user_ldt.h"
38 #endif
40 #include <sys/param.h>
41 #include <sys/systm.h>
42 #include <sys/signalvar.h>
43 #include <sys/kernel.h>
44 #include <sys/proc.h>
45 #include <sys/buf.h>
46 #include <sys/reboot.h>
47 #include <sys/conf.h>
48 #include <sys/exec.h>
49 #include <sys/file.h>
50 #include <sys/callout.h>
51 #include <sys/malloc.h>
52 #include <sys/mbuf.h>
53 #include <sys/msgbuf.h>
54 #include <sys/mount.h>
55 #include <sys/vnode.h>
56 #include <sys/device.h>
57 #include <sys/syscallargs.h>
58 #include <sys/filedesc.h>
59 #include <sys/exec_elf.h>
60 #include <sys/disklabel.h>
61 #include <sys/ioctl.h>
62 #include <sys/wait.h>
63 #include <sys/kauth.h>
64 #include <sys/kmem.h>
66 #include <miscfs/specfs/specdev.h>
68 #include <compat/linux/common/linux_types.h>
69 #include <compat/linux/common/linux_signal.h>
70 #include <compat/linux/common/linux_util.h>
71 #include <compat/linux/common/linux_ioctl.h>
72 #include <compat/linux/common/linux_hdio.h>
73 #include <compat/linux/common/linux_exec.h>
74 #include <compat/linux/common/linux_machdep.h>
75 #include <compat/linux/common/linux_errno.h>
77 #include <compat/linux/linux_syscallargs.h>
79 #include <sys/cpu.h>
80 #include <machine/cpufunc.h>
81 #include <machine/psl.h>
82 #include <machine/reg.h>
83 #include <machine/segments.h>
84 #include <machine/specialreg.h>
85 #include <machine/sysarch.h>
86 #include <machine/vm86.h>
87 #include <machine/vmparam.h>
90 * To see whether wscons is configured (for virtual console ioctl calls).
92 #if defined(_KERNEL_OPT)
93 #include "wsdisplay.h"
94 #endif
95 #if (NWSDISPLAY > 0)
96 #include <dev/wscons/wsconsio.h>
97 #include <dev/wscons/wsdisplay_usl_io.h>
98 #if defined(_KERNEL_OPT)
99 #include "opt_xserver.h"
100 #endif
101 #endif
103 #ifdef DEBUG_LINUX
104 #define DPRINTF(a) uprintf a
105 #else
106 #define DPRINTF(a)
107 #endif
109 static struct biosdisk_info *fd2biosinfo(struct proc *, struct file *);
110 extern struct disklist *x86_alldisks;
111 static void linux_save_ucontext(struct lwp *, struct trapframe *,
112 const sigset_t *, struct sigaltstack *, struct linux_ucontext *);
113 static void linux_save_sigcontext(struct lwp *, struct trapframe *,
114 const sigset_t *, struct linux_sigcontext *);
115 static int linux_restore_sigcontext(struct lwp *,
116 struct linux_sigcontext *, register_t *);
117 static void linux_rt_sendsig(const ksiginfo_t *, const sigset_t *);
118 static void linux_old_sendsig(const ksiginfo_t *, const sigset_t *);
120 extern char linux_sigcode[], linux_rt_sigcode[];
122 * Deal with some i386-specific things in the Linux emulation code.
125 void
126 linux_setregs(struct lwp *l, struct exec_package *epp, u_long stack)
128 struct pcb *pcb = lwp_getpcb(l);
129 struct trapframe *tf;
131 #if NNPX > 0
132 /* If we were using the FPU, forget about it. */
133 if (npxproc == l)
134 npxdrop();
135 #endif
137 #ifdef USER_LDT
138 pmap_ldt_cleanup(l);
139 #endif
141 l->l_md.md_flags &= ~MDL_USEDFPU;
143 if (i386_use_fxsave) {
144 pcb->pcb_savefpu.sv_xmm.sv_env.en_cw = __Linux_NPXCW__;
145 pcb->pcb_savefpu.sv_xmm.sv_env.en_mxcsr = __INITIAL_MXCSR__;
146 } else
147 pcb->pcb_savefpu.sv_87.sv_env.en_cw = __Linux_NPXCW__;
149 tf = l->l_md.md_regs;
150 tf->tf_gs = GSEL(GUDATA_SEL, SEL_UPL);
151 tf->tf_fs = GSEL(GUDATA_SEL, SEL_UPL);
152 tf->tf_es = GSEL(GUDATA_SEL, SEL_UPL);
153 tf->tf_ds = GSEL(GUDATA_SEL, SEL_UPL);
154 tf->tf_edi = 0;
155 tf->tf_esi = 0;
156 tf->tf_ebp = 0;
157 tf->tf_ebx = (int)l->l_proc->p_psstr;
158 tf->tf_edx = 0;
159 tf->tf_ecx = 0;
160 tf->tf_eax = 0;
161 tf->tf_eip = epp->ep_entry;
162 tf->tf_cs = GSEL(GUCODEBIG_SEL, SEL_UPL);
163 tf->tf_eflags = PSL_USERSET;
164 tf->tf_esp = stack;
165 tf->tf_ss = GSEL(GUDATA_SEL, SEL_UPL);
169 * Send an interrupt to process.
171 * Stack is set up to allow sigcode stored
172 * in u. to call routine, followed by kcall
173 * to sigreturn routine below. After sigreturn
174 * resets the signal mask, the stack, and the
175 * frame pointer, it returns to the user
176 * specified pc, psl.
179 void
180 linux_sendsig(const ksiginfo_t *ksi, const sigset_t *mask)
182 if (SIGACTION(curproc, ksi->ksi_signo).sa_flags & SA_SIGINFO)
183 linux_rt_sendsig(ksi, mask);
184 else
185 linux_old_sendsig(ksi, mask);
189 static void
190 linux_save_ucontext(struct lwp *l, struct trapframe *tf, const sigset_t *mask, struct sigaltstack *sas, struct linux_ucontext *uc)
192 uc->uc_flags = 0;
193 uc->uc_link = NULL;
194 native_to_linux_sigaltstack(&uc->uc_stack, sas);
195 linux_save_sigcontext(l, tf, mask, &uc->uc_mcontext);
196 native_to_linux_sigset(&uc->uc_sigmask, mask);
197 (void)memset(&uc->uc_fpregs_mem, 0, sizeof(uc->uc_fpregs_mem));
200 static void
201 linux_save_sigcontext(struct lwp *l, struct trapframe *tf,
202 const sigset_t *mask, struct linux_sigcontext *sc)
204 struct pcb *pcb = lwp_getpcb(l);
206 /* Save register context. */
207 #ifdef VM86
208 if (tf->tf_eflags & PSL_VM) {
209 sc->sc_gs = tf->tf_vm86_gs;
210 sc->sc_fs = tf->tf_vm86_fs;
211 sc->sc_es = tf->tf_vm86_es;
212 sc->sc_ds = tf->tf_vm86_ds;
213 sc->sc_eflags = get_vflags(l);
214 } else
215 #endif
217 sc->sc_gs = tf->tf_gs;
218 sc->sc_fs = tf->tf_fs;
219 sc->sc_es = tf->tf_es;
220 sc->sc_ds = tf->tf_ds;
221 sc->sc_eflags = tf->tf_eflags;
223 sc->sc_edi = tf->tf_edi;
224 sc->sc_esi = tf->tf_esi;
225 sc->sc_esp = tf->tf_esp;
226 sc->sc_ebp = tf->tf_ebp;
227 sc->sc_ebx = tf->tf_ebx;
228 sc->sc_edx = tf->tf_edx;
229 sc->sc_ecx = tf->tf_ecx;
230 sc->sc_eax = tf->tf_eax;
231 sc->sc_eip = tf->tf_eip;
232 sc->sc_cs = tf->tf_cs;
233 sc->sc_esp_at_signal = tf->tf_esp;
234 sc->sc_ss = tf->tf_ss;
235 sc->sc_err = tf->tf_err;
236 sc->sc_trapno = tf->tf_trapno;
237 sc->sc_cr2 = pcb->pcb_cr2;
238 sc->sc_387 = NULL;
240 /* Save signal stack. */
241 /* Linux doesn't save the onstack flag in sigframe */
243 /* Save signal mask. */
244 native_to_linux_old_sigset(&sc->sc_mask, mask);
247 static void
248 linux_rt_sendsig(const ksiginfo_t *ksi, const sigset_t *mask)
250 struct lwp *l = curlwp;
251 struct proc *p = l->l_proc;
252 struct trapframe *tf;
253 struct linux_rt_sigframe *fp, frame;
254 int onstack, error;
255 linux_siginfo_t *lsi;
256 int sig = ksi->ksi_signo;
257 sig_t catcher = SIGACTION(p, sig).sa_handler;
258 struct sigaltstack *sas = &l->l_sigstk;
260 tf = l->l_md.md_regs;
261 /* Do we need to jump onto the signal stack? */
262 onstack = (sas->ss_flags & (SS_DISABLE | SS_ONSTACK)) == 0 &&
263 (SIGACTION(p, sig).sa_flags & SA_ONSTACK) != 0;
266 /* Allocate space for the signal handler context. */
267 if (onstack)
268 fp = (struct linux_rt_sigframe *)((char *)sas->ss_sp +
269 sas->ss_size);
270 else
271 fp = (struct linux_rt_sigframe *)tf->tf_esp;
272 fp--;
274 DPRINTF(("rt: onstack = %d, fp = %p sig = %d eip = 0x%x cr2 = 0x%x\n",
275 onstack, fp, sig, tf->tf_eip,
276 ((struct pcb *)lwp_getpcb(l))->pcb_cr2));
278 /* Build stack frame for signal trampoline. */
279 frame.sf_handler = catcher;
280 frame.sf_sig = native_to_linux_signo[sig];
281 frame.sf_sip = &fp->sf_si;
282 frame.sf_ucp = &fp->sf_uc;
285 * XXX: the following code assumes that the constants for
286 * siginfo are the same between linux and NetBSD.
288 (void)memset(lsi = &frame.sf_si, 0, sizeof(frame.sf_si));
289 lsi->lsi_errno = native_to_linux_errno[ksi->ksi_errno];
290 lsi->lsi_code = native_to_linux_si_code(ksi->ksi_code);
291 switch (lsi->lsi_signo = frame.sf_sig) {
292 case LINUX_SIGILL:
293 case LINUX_SIGFPE:
294 case LINUX_SIGSEGV:
295 case LINUX_SIGBUS:
296 case LINUX_SIGTRAP:
297 lsi->lsi_addr = ksi->ksi_addr;
298 break;
299 case LINUX_SIGCHLD:
300 lsi->lsi_uid = ksi->ksi_uid;
301 lsi->lsi_pid = ksi->ksi_pid;
302 lsi->lsi_utime = ksi->ksi_utime;
303 lsi->lsi_stime = ksi->ksi_stime;
304 lsi->lsi_status =
305 native_to_linux_si_status(ksi->ksi_code, ksi->ksi_status);
306 break;
307 case LINUX_SIGIO:
308 lsi->lsi_band = ksi->ksi_band;
309 lsi->lsi_fd = ksi->ksi_fd;
310 break;
311 default:
312 lsi->lsi_uid = ksi->ksi_uid;
313 lsi->lsi_pid = ksi->ksi_pid;
314 if (lsi->lsi_signo == LINUX_SIGALRM ||
315 lsi->lsi_signo >= LINUX_SIGRTMIN)
316 lsi->lsi_value.sival_ptr = ksi->ksi_value.sival_ptr;
317 break;
320 /* Save register context. */
321 linux_save_ucontext(l, tf, mask, sas, &frame.sf_uc);
322 sendsig_reset(l, sig);
324 mutex_exit(p->p_lock);
325 error = copyout(&frame, fp, sizeof(frame));
326 mutex_enter(p->p_lock);
328 if (error != 0) {
330 * Process has trashed its stack; give it an illegal
331 * instruction to halt it in its tracks.
333 sigexit(l, SIGILL);
334 /* NOTREACHED */
338 * Build context to run handler in.
340 tf->tf_gs = GSEL(GUDATA_SEL, SEL_UPL);
341 tf->tf_fs = GSEL(GUDATA_SEL, SEL_UPL);
342 tf->tf_es = GSEL(GUDATA_SEL, SEL_UPL);
343 tf->tf_ds = GSEL(GUDATA_SEL, SEL_UPL);
344 tf->tf_eip = ((int)p->p_sigctx.ps_sigcode) +
345 (linux_rt_sigcode - linux_sigcode);
346 tf->tf_cs = GSEL(GUCODE_SEL, SEL_UPL);
347 tf->tf_eflags &= ~PSL_CLEARSIG;
348 tf->tf_esp = (int)fp;
349 tf->tf_ss = GSEL(GUDATA_SEL, SEL_UPL);
351 /* Remember that we're now on the signal stack. */
352 if (onstack)
353 sas->ss_flags |= SS_ONSTACK;
356 static void
357 linux_old_sendsig(const ksiginfo_t *ksi, const sigset_t *mask)
359 struct lwp *l = curlwp;
360 struct proc *p = l->l_proc;
361 struct trapframe *tf;
362 struct linux_sigframe *fp, frame;
363 int onstack, error;
364 int sig = ksi->ksi_signo;
365 sig_t catcher = SIGACTION(p, sig).sa_handler;
366 struct sigaltstack *sas = &l->l_sigstk;
368 tf = l->l_md.md_regs;
370 /* Do we need to jump onto the signal stack? */
371 onstack = (sas->ss_flags & (SS_DISABLE | SS_ONSTACK)) == 0 &&
372 (SIGACTION(p, sig).sa_flags & SA_ONSTACK) != 0;
374 /* Allocate space for the signal handler context. */
375 if (onstack)
376 fp = (struct linux_sigframe *) ((char *)sas->ss_sp +
377 sas->ss_size);
378 else
379 fp = (struct linux_sigframe *)tf->tf_esp;
380 fp--;
382 DPRINTF(("old: onstack = %d, fp = %p sig = %d eip = 0x%x cr2 = 0x%x\n",
383 onstack, fp, sig, tf->tf_eip,
384 ((struct pcb *)lwp_getpcb(l))->pcb_cr2));
386 /* Build stack frame for signal trampoline. */
387 frame.sf_handler = catcher;
388 frame.sf_sig = native_to_linux_signo[sig];
390 linux_save_sigcontext(l, tf, mask, &frame.sf_sc);
391 sendsig_reset(l, sig);
393 mutex_exit(p->p_lock);
394 error = copyout(&frame, fp, sizeof(frame));
395 mutex_enter(p->p_lock);
397 if (error != 0) {
399 * Process has trashed its stack; give it an illegal
400 * instruction to halt it in its tracks.
402 sigexit(l, SIGILL);
403 /* NOTREACHED */
407 * Build context to run handler in.
409 tf->tf_gs = GSEL(GUDATA_SEL, SEL_UPL);
410 tf->tf_fs = GSEL(GUDATA_SEL, SEL_UPL);
411 tf->tf_es = GSEL(GUDATA_SEL, SEL_UPL);
412 tf->tf_ds = GSEL(GUDATA_SEL, SEL_UPL);
413 tf->tf_eip = (int)p->p_sigctx.ps_sigcode;
414 tf->tf_cs = GSEL(GUCODEBIG_SEL, SEL_UPL);
415 tf->tf_eflags &= ~PSL_CLEARSIG;
416 tf->tf_esp = (int)fp;
417 tf->tf_ss = GSEL(GUDATA_SEL, SEL_UPL);
419 /* Remember that we're now on the signal stack. */
420 if (onstack)
421 sas->ss_flags |= SS_ONSTACK;
425 * System call to cleanup state after a signal
426 * has been taken. Reset signal mask and
427 * stack state from context left by sendsig (above).
428 * Return to previous pc and psl as specified by
429 * context left by sendsig. Check carefully to
430 * make sure that the user has not modified the
431 * psl to gain improper privileges or to cause
432 * a machine fault.
435 linux_sys_rt_sigreturn(struct lwp *l, const struct linux_sys_rt_sigreturn_args *uap, register_t *retval)
437 /* {
438 syscallarg(struct linux_ucontext *) ucp;
439 } */
440 struct linux_ucontext context, *ucp = SCARG(uap, ucp);
441 int error;
444 * The trampoline code hands us the context.
445 * It is unsafe to keep track of it ourselves, in the event that a
446 * program jumps out of a signal handler.
448 if ((error = copyin(ucp, &context, sizeof(*ucp))) != 0)
449 return error;
451 /* XXX XAX we can do better here by using more of the ucontext */
452 return linux_restore_sigcontext(l, &context.uc_mcontext, retval);
456 linux_sys_sigreturn(struct lwp *l, const struct linux_sys_sigreturn_args *uap, register_t *retval)
458 /* {
459 syscallarg(struct linux_sigcontext *) scp;
460 } */
461 struct linux_sigcontext context, *scp = SCARG(uap, scp);
462 int error;
465 * The trampoline code hands us the context.
466 * It is unsafe to keep track of it ourselves, in the event that a
467 * program jumps out of a signal handler.
469 if ((error = copyin((void *)scp, &context, sizeof(*scp))) != 0)
470 return error;
471 return linux_restore_sigcontext(l, &context, retval);
474 static int
475 linux_restore_sigcontext(struct lwp *l, struct linux_sigcontext *scp,
476 register_t *retval)
478 struct proc *p = l->l_proc;
479 struct sigaltstack *sas = &l->l_sigstk;
480 struct trapframe *tf;
481 sigset_t mask;
482 ssize_t ss_gap;
483 /* Restore register context. */
484 tf = l->l_md.md_regs;
486 DPRINTF(("sigreturn enter esp=%x eip=%x\n", tf->tf_esp, tf->tf_eip));
487 #ifdef VM86
488 if (scp->sc_eflags & PSL_VM) {
489 void syscall_vm86(struct trapframe *);
491 tf->tf_vm86_gs = scp->sc_gs;
492 tf->tf_vm86_fs = scp->sc_fs;
493 tf->tf_vm86_es = scp->sc_es;
494 tf->tf_vm86_ds = scp->sc_ds;
495 set_vflags(l, scp->sc_eflags);
496 p->p_md.md_syscall = syscall_vm86;
497 } else
498 #endif
501 * Check for security violations. If we're returning to
502 * protected mode, the CPU will validate the segment registers
503 * automatically and generate a trap on violations. We handle
504 * the trap, rather than doing all of the checking here.
506 if (((scp->sc_eflags ^ tf->tf_eflags) & PSL_USERSTATIC) != 0 ||
507 !USERMODE(scp->sc_cs, scp->sc_eflags))
508 return EINVAL;
510 tf->tf_gs = scp->sc_gs;
511 tf->tf_fs = scp->sc_fs;
512 tf->tf_es = scp->sc_es;
513 tf->tf_ds = scp->sc_ds;
514 #ifdef VM86
515 if (tf->tf_eflags & PSL_VM)
516 (*p->p_emul->e_syscall_intern)(p);
517 #endif
518 tf->tf_eflags = scp->sc_eflags;
520 tf->tf_edi = scp->sc_edi;
521 tf->tf_esi = scp->sc_esi;
522 tf->tf_ebp = scp->sc_ebp;
523 tf->tf_ebx = scp->sc_ebx;
524 tf->tf_edx = scp->sc_edx;
525 tf->tf_ecx = scp->sc_ecx;
526 tf->tf_eax = scp->sc_eax;
527 tf->tf_eip = scp->sc_eip;
528 tf->tf_cs = scp->sc_cs;
529 tf->tf_esp = scp->sc_esp_at_signal;
530 tf->tf_ss = scp->sc_ss;
532 /* Restore signal stack. */
534 * Linux really does it this way; it doesn't have space in sigframe
535 * to save the onstack flag.
537 mutex_enter(p->p_lock);
538 ss_gap = (ssize_t)((char *)scp->sc_esp_at_signal - (char *)sas->ss_sp);
539 if (ss_gap >= 0 && ss_gap < sas->ss_size)
540 sas->ss_flags |= SS_ONSTACK;
541 else
542 sas->ss_flags &= ~SS_ONSTACK;
544 /* Restore signal mask. */
545 linux_old_to_native_sigset(&mask, &scp->sc_mask);
546 (void) sigprocmask1(l, SIG_SETMASK, &mask, 0);
547 mutex_exit(p->p_lock);
549 DPRINTF(("sigreturn exit esp=%x eip=%x\n", tf->tf_esp, tf->tf_eip));
550 return EJUSTRETURN;
553 #ifdef USER_LDT
555 static int
556 linux_read_ldt(struct lwp *l, const struct linux_sys_modify_ldt_args *uap,
557 register_t *retval)
559 struct x86_get_ldt_args gl;
560 int error;
561 union descriptor *ldt_buf;
562 size_t sz;
565 * I've checked the linux code - this function is asymetric with
566 * linux_write_ldt, and returns raw ldt entries.
567 * NB, the code I saw zerod the spare parts of the user buffer.
570 DPRINTF(("linux_read_ldt!"));
572 sz = 8192 * sizeof(*ldt_buf);
573 ldt_buf = kmem_zalloc(sz, KM_SLEEP);
574 gl.start = 0;
575 gl.desc = NULL;
576 gl.num = SCARG(uap, bytecount) / sizeof(union descriptor);
577 error = x86_get_ldt1(l, &gl, ldt_buf);
578 /* NB gl.num might have changed */
579 if (error == 0) {
580 *retval = gl.num * sizeof *ldt;
581 error = copyout(ldt_buf, SCARG(uap, ptr),
582 gl.num * sizeof *ldt_buf);
584 kmem_free(ldt_buf, sz);
586 return error;
589 struct linux_ldt_info {
590 u_int entry_number;
591 u_long base_addr;
592 u_int limit;
593 u_int seg_32bit:1;
594 u_int contents:2;
595 u_int read_exec_only:1;
596 u_int limit_in_pages:1;
597 u_int seg_not_present:1;
598 u_int useable:1;
601 static int
602 linux_write_ldt(struct lwp *l, const struct linux_sys_modify_ldt_args *uap,
603 int oldmode)
605 struct linux_ldt_info ldt_info;
606 union descriptor d;
607 struct x86_set_ldt_args sl;
608 int error;
610 DPRINTF(("linux_write_ldt %d\n", oldmode));
611 if (SCARG(uap, bytecount) != sizeof(ldt_info))
612 return (EINVAL);
613 if ((error = copyin(SCARG(uap, ptr), &ldt_info, sizeof(ldt_info))) != 0)
614 return error;
615 if (ldt_info.entry_number >= 8192)
616 return (EINVAL);
617 if (ldt_info.contents == 3) {
618 if (oldmode)
619 return (EINVAL);
620 if (ldt_info.seg_not_present)
621 return (EINVAL);
624 if (ldt_info.base_addr == 0 && ldt_info.limit == 0 &&
625 (oldmode || (ldt_info.contents == 0 &&
626 ldt_info.read_exec_only == 1 && ldt_info.seg_32bit == 0 &&
627 ldt_info.limit_in_pages == 0 && ldt_info.seg_not_present == 1 &&
628 ldt_info.useable == 0))) {
629 /* this means you should zero the ldt */
630 (void)memset(&d, 0, sizeof(d));
631 } else {
632 d.sd.sd_lobase = ldt_info.base_addr & 0xffffff;
633 d.sd.sd_hibase = (ldt_info.base_addr >> 24) & 0xff;
634 d.sd.sd_lolimit = ldt_info.limit & 0xffff;
635 d.sd.sd_hilimit = (ldt_info.limit >> 16) & 0xf;
636 d.sd.sd_type = 16 | (ldt_info.contents << 2) |
637 (!ldt_info.read_exec_only << 1);
638 d.sd.sd_dpl = SEL_UPL;
639 d.sd.sd_p = !ldt_info.seg_not_present;
640 d.sd.sd_def32 = ldt_info.seg_32bit;
641 d.sd.sd_gran = ldt_info.limit_in_pages;
642 if (!oldmode)
643 d.sd.sd_xx = ldt_info.useable;
644 else
645 d.sd.sd_xx = 0;
647 sl.start = ldt_info.entry_number;
648 sl.desc = NULL;
649 sl.num = 1;
651 DPRINTF(("linux_write_ldt: idx=%d, base=0x%lx, limit=0x%x\n",
652 ldt_info.entry_number, ldt_info.base_addr, ldt_info.limit));
654 return x86_set_ldt1(l, &sl, &d);
657 #endif /* USER_LDT */
660 linux_sys_modify_ldt(struct lwp *l, const struct linux_sys_modify_ldt_args *uap, register_t *retval)
662 /* {
663 syscallarg(int) func;
664 syscallarg(void *) ptr;
665 syscallarg(size_t) bytecount;
666 } */
668 switch (SCARG(uap, func)) {
669 #ifdef USER_LDT
670 case 0:
671 return linux_read_ldt(l, (const void *)uap, retval);
672 case 1:
673 return linux_write_ldt(l, (const void *)uap, 1);
674 case 2:
675 #ifdef notyet
676 return (linux_read_default_ldt(l, (const void *)uap, retval);
677 #else
678 return (ENOSYS);
679 #endif
680 case 0x11:
681 return linux_write_ldt(l, (const void *)uap, 0);
682 #endif /* USER_LDT */
684 default:
685 return (ENOSYS);
690 * XXX Pathetic hack to make svgalib work. This will fake the major
691 * device number of an opened VT so that svgalib likes it. grmbl.
692 * Should probably do it 'wrong the right way' and use a mapping
693 * array for all major device numbers, and map linux_mknod too.
695 dev_t
696 linux_fakedev(dev_t dev, int raw)
698 extern const struct cdevsw ptc_cdevsw, pts_cdevsw;
699 const struct cdevsw *cd = cdevsw_lookup(dev);
701 if (raw) {
702 #if (NWSDISPLAY > 0)
703 extern const struct cdevsw wsdisplay_cdevsw;
704 if (cd == &wsdisplay_cdevsw)
705 return makedev(LINUX_CONS_MAJOR, (minor(dev) + 1));
706 #endif
709 if (cd == &ptc_cdevsw)
710 return makedev(LINUX_PTC_MAJOR, minor(dev));
711 if (cd == &pts_cdevsw)
712 return makedev(LINUX_PTS_MAJOR, minor(dev));
714 return dev;
717 #if (NWSDISPLAY > 0)
719 * That's not complete, but enough to get an X server running.
721 #define NR_KEYS 128
722 static const u_short plain_map[NR_KEYS] = {
723 0x0200, 0x001b, 0x0031, 0x0032, 0x0033, 0x0034, 0x0035, 0x0036,
724 0x0037, 0x0038, 0x0039, 0x0030, 0x002d, 0x003d, 0x007f, 0x0009,
725 0x0b71, 0x0b77, 0x0b65, 0x0b72, 0x0b74, 0x0b79, 0x0b75, 0x0b69,
726 0x0b6f, 0x0b70, 0x005b, 0x005d, 0x0201, 0x0702, 0x0b61, 0x0b73,
727 0x0b64, 0x0b66, 0x0b67, 0x0b68, 0x0b6a, 0x0b6b, 0x0b6c, 0x003b,
728 0x0027, 0x0060, 0x0700, 0x005c, 0x0b7a, 0x0b78, 0x0b63, 0x0b76,
729 0x0b62, 0x0b6e, 0x0b6d, 0x002c, 0x002e, 0x002f, 0x0700, 0x030c,
730 0x0703, 0x0020, 0x0207, 0x0100, 0x0101, 0x0102, 0x0103, 0x0104,
731 0x0105, 0x0106, 0x0107, 0x0108, 0x0109, 0x0208, 0x0209, 0x0307,
732 0x0308, 0x0309, 0x030b, 0x0304, 0x0305, 0x0306, 0x030a, 0x0301,
733 0x0302, 0x0303, 0x0300, 0x0310, 0x0206, 0x0200, 0x003c, 0x010a,
734 0x010b, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200,
735 0x030e, 0x0702, 0x030d, 0x001c, 0x0701, 0x0205, 0x0114, 0x0603,
736 0x0118, 0x0601, 0x0602, 0x0117, 0x0600, 0x0119, 0x0115, 0x0116,
737 0x011a, 0x010c, 0x010d, 0x011b, 0x011c, 0x0110, 0x0311, 0x011d,
738 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200,
739 }, shift_map[NR_KEYS] = {
740 0x0200, 0x001b, 0x0021, 0x0040, 0x0023, 0x0024, 0x0025, 0x005e,
741 0x0026, 0x002a, 0x0028, 0x0029, 0x005f, 0x002b, 0x007f, 0x0009,
742 0x0b51, 0x0b57, 0x0b45, 0x0b52, 0x0b54, 0x0b59, 0x0b55, 0x0b49,
743 0x0b4f, 0x0b50, 0x007b, 0x007d, 0x0201, 0x0702, 0x0b41, 0x0b53,
744 0x0b44, 0x0b46, 0x0b47, 0x0b48, 0x0b4a, 0x0b4b, 0x0b4c, 0x003a,
745 0x0022, 0x007e, 0x0700, 0x007c, 0x0b5a, 0x0b58, 0x0b43, 0x0b56,
746 0x0b42, 0x0b4e, 0x0b4d, 0x003c, 0x003e, 0x003f, 0x0700, 0x030c,
747 0x0703, 0x0020, 0x0207, 0x010a, 0x010b, 0x010c, 0x010d, 0x010e,
748 0x010f, 0x0110, 0x0111, 0x0112, 0x0113, 0x0213, 0x0203, 0x0307,
749 0x0308, 0x0309, 0x030b, 0x0304, 0x0305, 0x0306, 0x030a, 0x0301,
750 0x0302, 0x0303, 0x0300, 0x0310, 0x0206, 0x0200, 0x003e, 0x010a,
751 0x010b, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200,
752 0x030e, 0x0702, 0x030d, 0x0200, 0x0701, 0x0205, 0x0114, 0x0603,
753 0x020b, 0x0601, 0x0602, 0x0117, 0x0600, 0x020a, 0x0115, 0x0116,
754 0x011a, 0x010c, 0x010d, 0x011b, 0x011c, 0x0110, 0x0311, 0x011d,
755 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200,
756 }, altgr_map[NR_KEYS] = {
757 0x0200, 0x0200, 0x0200, 0x0040, 0x0200, 0x0024, 0x0200, 0x0200,
758 0x007b, 0x005b, 0x005d, 0x007d, 0x005c, 0x0200, 0x0200, 0x0200,
759 0x0b71, 0x0b77, 0x0918, 0x0b72, 0x0b74, 0x0b79, 0x0b75, 0x0b69,
760 0x0b6f, 0x0b70, 0x0200, 0x007e, 0x0201, 0x0702, 0x0914, 0x0b73,
761 0x0917, 0x0919, 0x0b67, 0x0b68, 0x0b6a, 0x0b6b, 0x0b6c, 0x0200,
762 0x0200, 0x0200, 0x0700, 0x0200, 0x0b7a, 0x0b78, 0x0916, 0x0b76,
763 0x0915, 0x0b6e, 0x0b6d, 0x0200, 0x0200, 0x0200, 0x0700, 0x030c,
764 0x0703, 0x0200, 0x0207, 0x050c, 0x050d, 0x050e, 0x050f, 0x0510,
765 0x0511, 0x0512, 0x0513, 0x0514, 0x0515, 0x0208, 0x0202, 0x0911,
766 0x0912, 0x0913, 0x030b, 0x090e, 0x090f, 0x0910, 0x030a, 0x090b,
767 0x090c, 0x090d, 0x090a, 0x0310, 0x0206, 0x0200, 0x007c, 0x0516,
768 0x0517, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200,
769 0x030e, 0x0702, 0x030d, 0x0200, 0x0701, 0x0205, 0x0114, 0x0603,
770 0x0118, 0x0601, 0x0602, 0x0117, 0x0600, 0x0119, 0x0115, 0x0116,
771 0x011a, 0x010c, 0x010d, 0x011b, 0x011c, 0x0110, 0x0311, 0x011d,
772 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200,
773 }, ctrl_map[NR_KEYS] = {
774 0x0200, 0x0200, 0x0200, 0x0000, 0x001b, 0x001c, 0x001d, 0x001e,
775 0x001f, 0x007f, 0x0200, 0x0200, 0x001f, 0x0200, 0x0008, 0x0200,
776 0x0011, 0x0017, 0x0005, 0x0012, 0x0014, 0x0019, 0x0015, 0x0009,
777 0x000f, 0x0010, 0x001b, 0x001d, 0x0201, 0x0702, 0x0001, 0x0013,
778 0x0004, 0x0006, 0x0007, 0x0008, 0x000a, 0x000b, 0x000c, 0x0200,
779 0x0007, 0x0000, 0x0700, 0x001c, 0x001a, 0x0018, 0x0003, 0x0016,
780 0x0002, 0x000e, 0x000d, 0x0200, 0x020e, 0x007f, 0x0700, 0x030c,
781 0x0703, 0x0000, 0x0207, 0x0100, 0x0101, 0x0102, 0x0103, 0x0104,
782 0x0105, 0x0106, 0x0107, 0x0108, 0x0109, 0x0208, 0x0204, 0x0307,
783 0x0308, 0x0309, 0x030b, 0x0304, 0x0305, 0x0306, 0x030a, 0x0301,
784 0x0302, 0x0303, 0x0300, 0x0310, 0x0206, 0x0200, 0x0200, 0x010a,
785 0x010b, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200,
786 0x030e, 0x0702, 0x030d, 0x001c, 0x0701, 0x0205, 0x0114, 0x0603,
787 0x0118, 0x0601, 0x0602, 0x0117, 0x0600, 0x0119, 0x0115, 0x0116,
788 0x011a, 0x010c, 0x010d, 0x011b, 0x011c, 0x0110, 0x0311, 0x011d,
789 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200,
792 const u_short * const linux_keytabs[] = {
793 plain_map, shift_map, altgr_map, altgr_map, ctrl_map
795 #endif
797 static struct biosdisk_info *
798 fd2biosinfo(struct proc *p, struct file *fp)
800 struct vnode *vp;
801 const char *blkname;
802 char diskname[16];
803 int i;
804 struct nativedisk_info *nip;
805 struct disklist *dl = x86_alldisks;
807 if (fp->f_type != DTYPE_VNODE)
808 return NULL;
809 vp = (struct vnode *)fp->f_data;
811 if (vp->v_type != VBLK)
812 return NULL;
814 blkname = devsw_blk2name(major(vp->v_rdev));
815 snprintf(diskname, sizeof diskname, "%s%llu", blkname,
816 (unsigned long long)DISKUNIT(vp->v_rdev));
818 for (i = 0; i < dl->dl_nnativedisks; i++) {
819 nip = &dl->dl_nativedisks[i];
820 if (strcmp(diskname, nip->ni_devname))
821 continue;
822 if (nip->ni_nmatches != 0)
823 return &dl->dl_biosdisks[nip->ni_biosmatches[0]];
826 return NULL;
831 * We come here in a last attempt to satisfy a Linux ioctl() call
834 linux_machdepioctl(struct lwp *l, const struct linux_sys_ioctl_args *uap, register_t *retval)
836 /* {
837 syscallarg(int) fd;
838 syscallarg(u_long) com;
839 syscallarg(void *) data;
840 } */
841 struct sys_ioctl_args bia;
842 u_long com;
843 int error, error1;
844 #if (NWSDISPLAY > 0)
845 struct vt_mode lvt;
846 struct kbentry kbe;
847 #endif
848 struct linux_hd_geometry hdg;
849 struct linux_hd_big_geometry hdg_big;
850 struct biosdisk_info *bip;
851 file_t *fp;
852 int fd;
853 struct disklabel label, *labp;
854 struct partinfo partp;
855 int (*ioctlf)(struct file *, u_long, void *);
856 u_long start, biostotal, realtotal;
857 u_char heads, sectors;
858 u_int cylinders;
859 struct ioctl_pt pt;
861 fd = SCARG(uap, fd);
862 SCARG(&bia, fd) = fd;
863 SCARG(&bia, data) = SCARG(uap, data);
864 com = SCARG(uap, com);
866 if ((fp = fd_getfile(fd)) == NULL)
867 return (EBADF);
869 switch (com) {
870 #if (NWSDISPLAY > 0)
871 case LINUX_KDGKBMODE:
872 com = KDGKBMODE;
873 break;
874 case LINUX_KDSKBMODE:
875 com = KDSKBMODE;
876 if ((unsigned)SCARG(uap, data) == LINUX_K_MEDIUMRAW)
877 SCARG(&bia, data) = (void *)K_RAW;
878 break;
879 case LINUX_KIOCSOUND:
880 SCARG(&bia, data) =
881 (void *)(((unsigned long)SCARG(&bia, data)) & 0xffff);
882 /* fall through */
883 case LINUX_KDMKTONE:
884 com = KDMKTONE;
885 break;
886 case LINUX_KDSETMODE:
887 com = KDSETMODE;
888 break;
889 case LINUX_KDGETMODE:
890 /* KD_* values are equal to the wscons numbers */
891 com = WSDISPLAYIO_GMODE;
892 break;
893 case LINUX_KDENABIO:
894 com = KDENABIO;
895 break;
896 case LINUX_KDDISABIO:
897 com = KDDISABIO;
898 break;
899 case LINUX_KDGETLED:
900 com = KDGETLED;
901 break;
902 case LINUX_KDSETLED:
903 com = KDSETLED;
904 break;
905 case LINUX_VT_OPENQRY:
906 com = VT_OPENQRY;
907 break;
908 case LINUX_VT_GETMODE:
909 error = fp->f_ops->fo_ioctl(fp, VT_GETMODE, &lvt);
910 if (error != 0)
911 goto out;
912 lvt.relsig = native_to_linux_signo[lvt.relsig];
913 lvt.acqsig = native_to_linux_signo[lvt.acqsig];
914 lvt.frsig = native_to_linux_signo[lvt.frsig];
915 error = copyout(&lvt, SCARG(uap, data), sizeof (lvt));
916 goto out;
917 case LINUX_VT_SETMODE:
918 error = copyin(SCARG(uap, data), &lvt, sizeof (lvt));
919 if (error != 0)
920 goto out;
921 lvt.relsig = linux_to_native_signo[lvt.relsig];
922 lvt.acqsig = linux_to_native_signo[lvt.acqsig];
923 lvt.frsig = linux_to_native_signo[lvt.frsig];
924 error = fp->f_ops->fo_ioctl(fp, VT_SETMODE, &lvt);
925 goto out;
926 case LINUX_VT_DISALLOCATE:
927 /* XXX should use WSDISPLAYIO_DELSCREEN */
928 error = 0;
929 goto out;
930 case LINUX_VT_RELDISP:
931 com = VT_RELDISP;
932 break;
933 case LINUX_VT_ACTIVATE:
934 com = VT_ACTIVATE;
935 break;
936 case LINUX_VT_WAITACTIVE:
937 com = VT_WAITACTIVE;
938 break;
939 case LINUX_VT_GETSTATE:
940 com = VT_GETSTATE;
941 break;
942 case LINUX_KDGKBTYPE:
944 static const u_int8_t kb101 = KB_101;
946 /* This is what Linux does. */
947 error = copyout(&kb101, SCARG(uap, data), 1);
948 goto out;
950 case LINUX_KDGKBENT:
952 * The Linux KDGKBENT ioctl is different from the
953 * SYSV original. So we handle it in machdep code.
954 * XXX We should use keyboard mapping information
955 * from wsdisplay, but this would be expensive.
957 if ((error = copyin(SCARG(uap, data), &kbe,
958 sizeof(struct kbentry))))
959 goto out;
960 if (kbe.kb_table >= sizeof(linux_keytabs) / sizeof(u_short *)
961 || kbe.kb_index >= NR_KEYS) {
962 error = EINVAL;
963 goto out;
965 kbe.kb_value = linux_keytabs[kbe.kb_table][kbe.kb_index];
966 error = copyout(&kbe, SCARG(uap, data),
967 sizeof(struct kbentry));
968 goto out;
969 #endif
970 case LINUX_HDIO_GETGEO:
971 case LINUX_HDIO_GETGEO_BIG:
973 * Try to mimic Linux behaviour: return the BIOS geometry
974 * if possible (extending its # of cylinders if it's beyond
975 * the 1023 limit), fall back to the MI geometry (i.e.
976 * the real geometry) if not found, by returning an
977 * error. See common/linux_hdio.c
979 bip = fd2biosinfo(curproc, fp);
980 ioctlf = fp->f_ops->fo_ioctl;
981 error = ioctlf(fp, DIOCGDEFLABEL, (void *)&label);
982 error1 = ioctlf(fp, DIOCGPART, (void *)&partp);
983 if (error != 0 && error1 != 0) {
984 error = error1;
985 goto out;
987 labp = error != 0 ? &label : partp.disklab;
988 start = error1 != 0 ? partp.part->p_offset : 0;
989 if (bip != NULL && bip->bi_head != 0 && bip->bi_sec != 0
990 && bip->bi_cyl != 0) {
991 heads = bip->bi_head;
992 sectors = bip->bi_sec;
993 cylinders = bip->bi_cyl;
994 biostotal = heads * sectors * cylinders;
995 realtotal = labp->d_ntracks * labp->d_nsectors *
996 labp->d_ncylinders;
997 if (realtotal > biostotal)
998 cylinders = realtotal / (heads * sectors);
999 } else {
1000 heads = labp->d_ntracks;
1001 cylinders = labp->d_ncylinders;
1002 sectors = labp->d_nsectors;
1004 if (com == LINUX_HDIO_GETGEO) {
1005 hdg.start = start;
1006 hdg.heads = heads;
1007 hdg.cylinders = cylinders;
1008 hdg.sectors = sectors;
1009 error = copyout(&hdg, SCARG(uap, data), sizeof hdg);
1010 goto out;
1011 } else {
1012 hdg_big.start = start;
1013 hdg_big.heads = heads;
1014 hdg_big.cylinders = cylinders;
1015 hdg_big.sectors = sectors;
1016 error = copyout(&hdg_big, SCARG(uap, data),
1017 sizeof hdg_big);
1018 goto out;
1021 default:
1023 * Unknown to us. If it's on a device, just pass it through
1024 * using PTIOCLINUX, the device itself might be able to
1025 * make some sense of it.
1026 * XXX hack: if the function returns EJUSTRETURN,
1027 * it has stuffed a sysctl return value in pt.data.
1029 ioctlf = fp->f_ops->fo_ioctl;
1030 pt.com = SCARG(uap, com);
1031 pt.data = SCARG(uap, data);
1032 error = ioctlf(fp, PTIOCLINUX, &pt);
1033 if (error == EJUSTRETURN) {
1034 retval[0] = (register_t)pt.data;
1035 error = 0;
1038 if (error == ENOTTY) {
1039 DPRINTF(("linux_machdepioctl: invalid ioctl %08lx\n",
1040 com));
1042 goto out;
1044 SCARG(&bia, com) = com;
1045 error = sys_ioctl(curlwp, &bia, retval);
1046 out:
1047 fd_putfile(fd);
1048 return error;
1052 * Set I/O permissions for a process. Just set the maximum level
1053 * right away (ignoring the argument), otherwise we would have
1054 * to rely on I/O permission maps, which are not implemented.
1057 linux_sys_iopl(struct lwp *l, const struct linux_sys_iopl_args *uap, register_t *retval)
1059 /* {
1060 syscallarg(int) level;
1061 } */
1062 struct trapframe *fp = l->l_md.md_regs;
1064 if (kauth_authorize_machdep(l->l_cred, KAUTH_MACHDEP_IOPL,
1065 NULL, NULL, NULL, NULL) != 0)
1066 return EPERM;
1067 fp->tf_eflags |= PSL_IOPL;
1068 *retval = 0;
1069 return 0;
1073 * See above. If a root process tries to set access to an I/O port,
1074 * just let it have the whole range.
1077 linux_sys_ioperm(struct lwp *l, const struct linux_sys_ioperm_args *uap, register_t *retval)
1079 /* {
1080 syscallarg(unsigned int) lo;
1081 syscallarg(unsigned int) hi;
1082 syscallarg(int) val;
1083 } */
1084 struct trapframe *fp = l->l_md.md_regs;
1086 if (kauth_authorize_machdep(l->l_cred, SCARG(uap, val) ?
1087 KAUTH_MACHDEP_IOPERM_SET : KAUTH_MACHDEP_IOPERM_GET, NULL, NULL,
1088 NULL, NULL) != 0)
1089 return EPERM;
1090 if (SCARG(uap, val))
1091 fp->tf_eflags |= PSL_IOPL;
1092 *retval = 0;
1093 return 0;
1097 linux_usertrap(struct lwp *l, vaddr_t trapaddr,
1098 void *arg)
1100 return 0;
1103 const char *
1104 linux_get_uname_arch(void)
1106 static char uname_arch[5] = "i386";
1108 if (uname_arch[1] == '3')
1109 uname_arch[1] += cpu_class;
1110 return uname_arch;
1113 #ifdef LINUX_NPTL
1114 void *
1115 linux_get_newtls(struct lwp *l)
1117 #if 0
1118 struct trapframe *tf = l->l_md.md_regs;
1119 #endif
1121 /* XXX: Implement me */
1122 return NULL;
1126 linux_set_newtls(struct lwp *l, void *tls)
1128 /* XXX: Implement me */
1129 return 0;
1131 #endif