1 /* $NetBSD: linux_machdep.c,v 1.146 2009/11/23 00:46:06 rmind Exp $ */
4 * Copyright (c) 1995, 2000, 2008, 2009 The NetBSD Foundation, Inc.
7 * This code is derived from software contributed to The NetBSD Foundation
8 * by Frank van der Linden, and by Andrew Doran.
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29 * POSSIBILITY OF SUCH DAMAGE.
32 #include <sys/cdefs.h>
33 __KERNEL_RCSID(0, "$NetBSD: linux_machdep.c,v 1.146 2009/11/23 00:46:06 rmind Exp $");
35 #if defined(_KERNEL_OPT)
37 #include "opt_user_ldt.h"
40 #include <sys/param.h>
41 #include <sys/systm.h>
42 #include <sys/signalvar.h>
43 #include <sys/kernel.h>
46 #include <sys/reboot.h>
50 #include <sys/callout.h>
51 #include <sys/malloc.h>
53 #include <sys/msgbuf.h>
54 #include <sys/mount.h>
55 #include <sys/vnode.h>
56 #include <sys/device.h>
57 #include <sys/syscallargs.h>
58 #include <sys/filedesc.h>
59 #include <sys/exec_elf.h>
60 #include <sys/disklabel.h>
61 #include <sys/ioctl.h>
63 #include <sys/kauth.h>
66 #include <miscfs/specfs/specdev.h>
68 #include <compat/linux/common/linux_types.h>
69 #include <compat/linux/common/linux_signal.h>
70 #include <compat/linux/common/linux_util.h>
71 #include <compat/linux/common/linux_ioctl.h>
72 #include <compat/linux/common/linux_hdio.h>
73 #include <compat/linux/common/linux_exec.h>
74 #include <compat/linux/common/linux_machdep.h>
75 #include <compat/linux/common/linux_errno.h>
77 #include <compat/linux/linux_syscallargs.h>
80 #include <machine/cpufunc.h>
81 #include <machine/psl.h>
82 #include <machine/reg.h>
83 #include <machine/segments.h>
84 #include <machine/specialreg.h>
85 #include <machine/sysarch.h>
86 #include <machine/vm86.h>
87 #include <machine/vmparam.h>
90 * To see whether wscons is configured (for virtual console ioctl calls).
92 #if defined(_KERNEL_OPT)
93 #include "wsdisplay.h"
96 #include <dev/wscons/wsconsio.h>
97 #include <dev/wscons/wsdisplay_usl_io.h>
98 #if defined(_KERNEL_OPT)
99 #include "opt_xserver.h"
104 #define DPRINTF(a) uprintf a
109 static struct biosdisk_info
*fd2biosinfo(struct proc
*, struct file
*);
110 extern struct disklist
*x86_alldisks
;
111 static void linux_save_ucontext(struct lwp
*, struct trapframe
*,
112 const sigset_t
*, struct sigaltstack
*, struct linux_ucontext
*);
113 static void linux_save_sigcontext(struct lwp
*, struct trapframe
*,
114 const sigset_t
*, struct linux_sigcontext
*);
115 static int linux_restore_sigcontext(struct lwp
*,
116 struct linux_sigcontext
*, register_t
*);
117 static void linux_rt_sendsig(const ksiginfo_t
*, const sigset_t
*);
118 static void linux_old_sendsig(const ksiginfo_t
*, const sigset_t
*);
120 extern char linux_sigcode
[], linux_rt_sigcode
[];
122 * Deal with some i386-specific things in the Linux emulation code.
126 linux_setregs(struct lwp
*l
, struct exec_package
*epp
, u_long stack
)
128 struct pcb
*pcb
= lwp_getpcb(l
);
129 struct trapframe
*tf
;
132 /* If we were using the FPU, forget about it. */
141 l
->l_md
.md_flags
&= ~MDL_USEDFPU
;
143 if (i386_use_fxsave
) {
144 pcb
->pcb_savefpu
.sv_xmm
.sv_env
.en_cw
= __Linux_NPXCW__
;
145 pcb
->pcb_savefpu
.sv_xmm
.sv_env
.en_mxcsr
= __INITIAL_MXCSR__
;
147 pcb
->pcb_savefpu
.sv_87
.sv_env
.en_cw
= __Linux_NPXCW__
;
149 tf
= l
->l_md
.md_regs
;
150 tf
->tf_gs
= GSEL(GUDATA_SEL
, SEL_UPL
);
151 tf
->tf_fs
= GSEL(GUDATA_SEL
, SEL_UPL
);
152 tf
->tf_es
= GSEL(GUDATA_SEL
, SEL_UPL
);
153 tf
->tf_ds
= GSEL(GUDATA_SEL
, SEL_UPL
);
157 tf
->tf_ebx
= (int)l
->l_proc
->p_psstr
;
161 tf
->tf_eip
= epp
->ep_entry
;
162 tf
->tf_cs
= GSEL(GUCODEBIG_SEL
, SEL_UPL
);
163 tf
->tf_eflags
= PSL_USERSET
;
165 tf
->tf_ss
= GSEL(GUDATA_SEL
, SEL_UPL
);
169 * Send an interrupt to process.
171 * Stack is set up to allow sigcode stored
172 * in u. to call routine, followed by kcall
173 * to sigreturn routine below. After sigreturn
174 * resets the signal mask, the stack, and the
175 * frame pointer, it returns to the user
180 linux_sendsig(const ksiginfo_t
*ksi
, const sigset_t
*mask
)
182 if (SIGACTION(curproc
, ksi
->ksi_signo
).sa_flags
& SA_SIGINFO
)
183 linux_rt_sendsig(ksi
, mask
);
185 linux_old_sendsig(ksi
, mask
);
190 linux_save_ucontext(struct lwp
*l
, struct trapframe
*tf
, const sigset_t
*mask
, struct sigaltstack
*sas
, struct linux_ucontext
*uc
)
194 native_to_linux_sigaltstack(&uc
->uc_stack
, sas
);
195 linux_save_sigcontext(l
, tf
, mask
, &uc
->uc_mcontext
);
196 native_to_linux_sigset(&uc
->uc_sigmask
, mask
);
197 (void)memset(&uc
->uc_fpregs_mem
, 0, sizeof(uc
->uc_fpregs_mem
));
201 linux_save_sigcontext(struct lwp
*l
, struct trapframe
*tf
,
202 const sigset_t
*mask
, struct linux_sigcontext
*sc
)
204 struct pcb
*pcb
= lwp_getpcb(l
);
206 /* Save register context. */
208 if (tf
->tf_eflags
& PSL_VM
) {
209 sc
->sc_gs
= tf
->tf_vm86_gs
;
210 sc
->sc_fs
= tf
->tf_vm86_fs
;
211 sc
->sc_es
= tf
->tf_vm86_es
;
212 sc
->sc_ds
= tf
->tf_vm86_ds
;
213 sc
->sc_eflags
= get_vflags(l
);
217 sc
->sc_gs
= tf
->tf_gs
;
218 sc
->sc_fs
= tf
->tf_fs
;
219 sc
->sc_es
= tf
->tf_es
;
220 sc
->sc_ds
= tf
->tf_ds
;
221 sc
->sc_eflags
= tf
->tf_eflags
;
223 sc
->sc_edi
= tf
->tf_edi
;
224 sc
->sc_esi
= tf
->tf_esi
;
225 sc
->sc_esp
= tf
->tf_esp
;
226 sc
->sc_ebp
= tf
->tf_ebp
;
227 sc
->sc_ebx
= tf
->tf_ebx
;
228 sc
->sc_edx
= tf
->tf_edx
;
229 sc
->sc_ecx
= tf
->tf_ecx
;
230 sc
->sc_eax
= tf
->tf_eax
;
231 sc
->sc_eip
= tf
->tf_eip
;
232 sc
->sc_cs
= tf
->tf_cs
;
233 sc
->sc_esp_at_signal
= tf
->tf_esp
;
234 sc
->sc_ss
= tf
->tf_ss
;
235 sc
->sc_err
= tf
->tf_err
;
236 sc
->sc_trapno
= tf
->tf_trapno
;
237 sc
->sc_cr2
= pcb
->pcb_cr2
;
240 /* Save signal stack. */
241 /* Linux doesn't save the onstack flag in sigframe */
243 /* Save signal mask. */
244 native_to_linux_old_sigset(&sc
->sc_mask
, mask
);
248 linux_rt_sendsig(const ksiginfo_t
*ksi
, const sigset_t
*mask
)
250 struct lwp
*l
= curlwp
;
251 struct proc
*p
= l
->l_proc
;
252 struct trapframe
*tf
;
253 struct linux_rt_sigframe
*fp
, frame
;
255 linux_siginfo_t
*lsi
;
256 int sig
= ksi
->ksi_signo
;
257 sig_t catcher
= SIGACTION(p
, sig
).sa_handler
;
258 struct sigaltstack
*sas
= &l
->l_sigstk
;
260 tf
= l
->l_md
.md_regs
;
261 /* Do we need to jump onto the signal stack? */
262 onstack
= (sas
->ss_flags
& (SS_DISABLE
| SS_ONSTACK
)) == 0 &&
263 (SIGACTION(p
, sig
).sa_flags
& SA_ONSTACK
) != 0;
266 /* Allocate space for the signal handler context. */
268 fp
= (struct linux_rt_sigframe
*)((char *)sas
->ss_sp
+
271 fp
= (struct linux_rt_sigframe
*)tf
->tf_esp
;
274 DPRINTF(("rt: onstack = %d, fp = %p sig = %d eip = 0x%x cr2 = 0x%x\n",
275 onstack
, fp
, sig
, tf
->tf_eip
,
276 ((struct pcb
*)lwp_getpcb(l
))->pcb_cr2
));
278 /* Build stack frame for signal trampoline. */
279 frame
.sf_handler
= catcher
;
280 frame
.sf_sig
= native_to_linux_signo
[sig
];
281 frame
.sf_sip
= &fp
->sf_si
;
282 frame
.sf_ucp
= &fp
->sf_uc
;
285 * XXX: the following code assumes that the constants for
286 * siginfo are the same between linux and NetBSD.
288 (void)memset(lsi
= &frame
.sf_si
, 0, sizeof(frame
.sf_si
));
289 lsi
->lsi_errno
= native_to_linux_errno
[ksi
->ksi_errno
];
290 lsi
->lsi_code
= native_to_linux_si_code(ksi
->ksi_code
);
291 switch (lsi
->lsi_signo
= frame
.sf_sig
) {
297 lsi
->lsi_addr
= ksi
->ksi_addr
;
300 lsi
->lsi_uid
= ksi
->ksi_uid
;
301 lsi
->lsi_pid
= ksi
->ksi_pid
;
302 lsi
->lsi_utime
= ksi
->ksi_utime
;
303 lsi
->lsi_stime
= ksi
->ksi_stime
;
305 native_to_linux_si_status(ksi
->ksi_code
, ksi
->ksi_status
);
308 lsi
->lsi_band
= ksi
->ksi_band
;
309 lsi
->lsi_fd
= ksi
->ksi_fd
;
312 lsi
->lsi_uid
= ksi
->ksi_uid
;
313 lsi
->lsi_pid
= ksi
->ksi_pid
;
314 if (lsi
->lsi_signo
== LINUX_SIGALRM
||
315 lsi
->lsi_signo
>= LINUX_SIGRTMIN
)
316 lsi
->lsi_value
.sival_ptr
= ksi
->ksi_value
.sival_ptr
;
320 /* Save register context. */
321 linux_save_ucontext(l
, tf
, mask
, sas
, &frame
.sf_uc
);
322 sendsig_reset(l
, sig
);
324 mutex_exit(p
->p_lock
);
325 error
= copyout(&frame
, fp
, sizeof(frame
));
326 mutex_enter(p
->p_lock
);
330 * Process has trashed its stack; give it an illegal
331 * instruction to halt it in its tracks.
338 * Build context to run handler in.
340 tf
->tf_gs
= GSEL(GUDATA_SEL
, SEL_UPL
);
341 tf
->tf_fs
= GSEL(GUDATA_SEL
, SEL_UPL
);
342 tf
->tf_es
= GSEL(GUDATA_SEL
, SEL_UPL
);
343 tf
->tf_ds
= GSEL(GUDATA_SEL
, SEL_UPL
);
344 tf
->tf_eip
= ((int)p
->p_sigctx
.ps_sigcode
) +
345 (linux_rt_sigcode
- linux_sigcode
);
346 tf
->tf_cs
= GSEL(GUCODE_SEL
, SEL_UPL
);
347 tf
->tf_eflags
&= ~PSL_CLEARSIG
;
348 tf
->tf_esp
= (int)fp
;
349 tf
->tf_ss
= GSEL(GUDATA_SEL
, SEL_UPL
);
351 /* Remember that we're now on the signal stack. */
353 sas
->ss_flags
|= SS_ONSTACK
;
357 linux_old_sendsig(const ksiginfo_t
*ksi
, const sigset_t
*mask
)
359 struct lwp
*l
= curlwp
;
360 struct proc
*p
= l
->l_proc
;
361 struct trapframe
*tf
;
362 struct linux_sigframe
*fp
, frame
;
364 int sig
= ksi
->ksi_signo
;
365 sig_t catcher
= SIGACTION(p
, sig
).sa_handler
;
366 struct sigaltstack
*sas
= &l
->l_sigstk
;
368 tf
= l
->l_md
.md_regs
;
370 /* Do we need to jump onto the signal stack? */
371 onstack
= (sas
->ss_flags
& (SS_DISABLE
| SS_ONSTACK
)) == 0 &&
372 (SIGACTION(p
, sig
).sa_flags
& SA_ONSTACK
) != 0;
374 /* Allocate space for the signal handler context. */
376 fp
= (struct linux_sigframe
*) ((char *)sas
->ss_sp
+
379 fp
= (struct linux_sigframe
*)tf
->tf_esp
;
382 DPRINTF(("old: onstack = %d, fp = %p sig = %d eip = 0x%x cr2 = 0x%x\n",
383 onstack
, fp
, sig
, tf
->tf_eip
,
384 ((struct pcb
*)lwp_getpcb(l
))->pcb_cr2
));
386 /* Build stack frame for signal trampoline. */
387 frame
.sf_handler
= catcher
;
388 frame
.sf_sig
= native_to_linux_signo
[sig
];
390 linux_save_sigcontext(l
, tf
, mask
, &frame
.sf_sc
);
391 sendsig_reset(l
, sig
);
393 mutex_exit(p
->p_lock
);
394 error
= copyout(&frame
, fp
, sizeof(frame
));
395 mutex_enter(p
->p_lock
);
399 * Process has trashed its stack; give it an illegal
400 * instruction to halt it in its tracks.
407 * Build context to run handler in.
409 tf
->tf_gs
= GSEL(GUDATA_SEL
, SEL_UPL
);
410 tf
->tf_fs
= GSEL(GUDATA_SEL
, SEL_UPL
);
411 tf
->tf_es
= GSEL(GUDATA_SEL
, SEL_UPL
);
412 tf
->tf_ds
= GSEL(GUDATA_SEL
, SEL_UPL
);
413 tf
->tf_eip
= (int)p
->p_sigctx
.ps_sigcode
;
414 tf
->tf_cs
= GSEL(GUCODEBIG_SEL
, SEL_UPL
);
415 tf
->tf_eflags
&= ~PSL_CLEARSIG
;
416 tf
->tf_esp
= (int)fp
;
417 tf
->tf_ss
= GSEL(GUDATA_SEL
, SEL_UPL
);
419 /* Remember that we're now on the signal stack. */
421 sas
->ss_flags
|= SS_ONSTACK
;
425 * System call to cleanup state after a signal
426 * has been taken. Reset signal mask and
427 * stack state from context left by sendsig (above).
428 * Return to previous pc and psl as specified by
429 * context left by sendsig. Check carefully to
430 * make sure that the user has not modified the
431 * psl to gain improper privileges or to cause
435 linux_sys_rt_sigreturn(struct lwp
*l
, const struct linux_sys_rt_sigreturn_args
*uap
, register_t
*retval
)
438 syscallarg(struct linux_ucontext *) ucp;
440 struct linux_ucontext context
, *ucp
= SCARG(uap
, ucp
);
444 * The trampoline code hands us the context.
445 * It is unsafe to keep track of it ourselves, in the event that a
446 * program jumps out of a signal handler.
448 if ((error
= copyin(ucp
, &context
, sizeof(*ucp
))) != 0)
451 /* XXX XAX we can do better here by using more of the ucontext */
452 return linux_restore_sigcontext(l
, &context
.uc_mcontext
, retval
);
456 linux_sys_sigreturn(struct lwp
*l
, const struct linux_sys_sigreturn_args
*uap
, register_t
*retval
)
459 syscallarg(struct linux_sigcontext *) scp;
461 struct linux_sigcontext context
, *scp
= SCARG(uap
, scp
);
465 * The trampoline code hands us the context.
466 * It is unsafe to keep track of it ourselves, in the event that a
467 * program jumps out of a signal handler.
469 if ((error
= copyin((void *)scp
, &context
, sizeof(*scp
))) != 0)
471 return linux_restore_sigcontext(l
, &context
, retval
);
475 linux_restore_sigcontext(struct lwp
*l
, struct linux_sigcontext
*scp
,
478 struct proc
*p
= l
->l_proc
;
479 struct sigaltstack
*sas
= &l
->l_sigstk
;
480 struct trapframe
*tf
;
483 /* Restore register context. */
484 tf
= l
->l_md
.md_regs
;
486 DPRINTF(("sigreturn enter esp=%x eip=%x\n", tf
->tf_esp
, tf
->tf_eip
));
488 if (scp
->sc_eflags
& PSL_VM
) {
489 void syscall_vm86(struct trapframe
*);
491 tf
->tf_vm86_gs
= scp
->sc_gs
;
492 tf
->tf_vm86_fs
= scp
->sc_fs
;
493 tf
->tf_vm86_es
= scp
->sc_es
;
494 tf
->tf_vm86_ds
= scp
->sc_ds
;
495 set_vflags(l
, scp
->sc_eflags
);
496 p
->p_md
.md_syscall
= syscall_vm86
;
501 * Check for security violations. If we're returning to
502 * protected mode, the CPU will validate the segment registers
503 * automatically and generate a trap on violations. We handle
504 * the trap, rather than doing all of the checking here.
506 if (((scp
->sc_eflags
^ tf
->tf_eflags
) & PSL_USERSTATIC
) != 0 ||
507 !USERMODE(scp
->sc_cs
, scp
->sc_eflags
))
510 tf
->tf_gs
= scp
->sc_gs
;
511 tf
->tf_fs
= scp
->sc_fs
;
512 tf
->tf_es
= scp
->sc_es
;
513 tf
->tf_ds
= scp
->sc_ds
;
515 if (tf
->tf_eflags
& PSL_VM
)
516 (*p
->p_emul
->e_syscall_intern
)(p
);
518 tf
->tf_eflags
= scp
->sc_eflags
;
520 tf
->tf_edi
= scp
->sc_edi
;
521 tf
->tf_esi
= scp
->sc_esi
;
522 tf
->tf_ebp
= scp
->sc_ebp
;
523 tf
->tf_ebx
= scp
->sc_ebx
;
524 tf
->tf_edx
= scp
->sc_edx
;
525 tf
->tf_ecx
= scp
->sc_ecx
;
526 tf
->tf_eax
= scp
->sc_eax
;
527 tf
->tf_eip
= scp
->sc_eip
;
528 tf
->tf_cs
= scp
->sc_cs
;
529 tf
->tf_esp
= scp
->sc_esp_at_signal
;
530 tf
->tf_ss
= scp
->sc_ss
;
532 /* Restore signal stack. */
534 * Linux really does it this way; it doesn't have space in sigframe
535 * to save the onstack flag.
537 mutex_enter(p
->p_lock
);
538 ss_gap
= (ssize_t
)((char *)scp
->sc_esp_at_signal
- (char *)sas
->ss_sp
);
539 if (ss_gap
>= 0 && ss_gap
< sas
->ss_size
)
540 sas
->ss_flags
|= SS_ONSTACK
;
542 sas
->ss_flags
&= ~SS_ONSTACK
;
544 /* Restore signal mask. */
545 linux_old_to_native_sigset(&mask
, &scp
->sc_mask
);
546 (void) sigprocmask1(l
, SIG_SETMASK
, &mask
, 0);
547 mutex_exit(p
->p_lock
);
549 DPRINTF(("sigreturn exit esp=%x eip=%x\n", tf
->tf_esp
, tf
->tf_eip
));
556 linux_read_ldt(struct lwp
*l
, const struct linux_sys_modify_ldt_args
*uap
,
559 struct x86_get_ldt_args gl
;
561 union descriptor
*ldt_buf
;
565 * I've checked the linux code - this function is asymetric with
566 * linux_write_ldt, and returns raw ldt entries.
567 * NB, the code I saw zerod the spare parts of the user buffer.
570 DPRINTF(("linux_read_ldt!"));
572 sz
= 8192 * sizeof(*ldt_buf
);
573 ldt_buf
= kmem_zalloc(sz
, KM_SLEEP
);
576 gl
.num
= SCARG(uap
, bytecount
) / sizeof(union descriptor
);
577 error
= x86_get_ldt1(l
, &gl
, ldt_buf
);
578 /* NB gl.num might have changed */
580 *retval
= gl
.num
* sizeof *ldt
;
581 error
= copyout(ldt_buf
, SCARG(uap
, ptr
),
582 gl
.num
* sizeof *ldt_buf
);
584 kmem_free(ldt_buf
, sz
);
589 struct linux_ldt_info
{
595 u_int read_exec_only
:1;
596 u_int limit_in_pages
:1;
597 u_int seg_not_present
:1;
602 linux_write_ldt(struct lwp
*l
, const struct linux_sys_modify_ldt_args
*uap
,
605 struct linux_ldt_info ldt_info
;
607 struct x86_set_ldt_args sl
;
610 DPRINTF(("linux_write_ldt %d\n", oldmode
));
611 if (SCARG(uap
, bytecount
) != sizeof(ldt_info
))
613 if ((error
= copyin(SCARG(uap
, ptr
), &ldt_info
, sizeof(ldt_info
))) != 0)
615 if (ldt_info
.entry_number
>= 8192)
617 if (ldt_info
.contents
== 3) {
620 if (ldt_info
.seg_not_present
)
624 if (ldt_info
.base_addr
== 0 && ldt_info
.limit
== 0 &&
625 (oldmode
|| (ldt_info
.contents
== 0 &&
626 ldt_info
.read_exec_only
== 1 && ldt_info
.seg_32bit
== 0 &&
627 ldt_info
.limit_in_pages
== 0 && ldt_info
.seg_not_present
== 1 &&
628 ldt_info
.useable
== 0))) {
629 /* this means you should zero the ldt */
630 (void)memset(&d
, 0, sizeof(d
));
632 d
.sd
.sd_lobase
= ldt_info
.base_addr
& 0xffffff;
633 d
.sd
.sd_hibase
= (ldt_info
.base_addr
>> 24) & 0xff;
634 d
.sd
.sd_lolimit
= ldt_info
.limit
& 0xffff;
635 d
.sd
.sd_hilimit
= (ldt_info
.limit
>> 16) & 0xf;
636 d
.sd
.sd_type
= 16 | (ldt_info
.contents
<< 2) |
637 (!ldt_info
.read_exec_only
<< 1);
638 d
.sd
.sd_dpl
= SEL_UPL
;
639 d
.sd
.sd_p
= !ldt_info
.seg_not_present
;
640 d
.sd
.sd_def32
= ldt_info
.seg_32bit
;
641 d
.sd
.sd_gran
= ldt_info
.limit_in_pages
;
643 d
.sd
.sd_xx
= ldt_info
.useable
;
647 sl
.start
= ldt_info
.entry_number
;
651 DPRINTF(("linux_write_ldt: idx=%d, base=0x%lx, limit=0x%x\n",
652 ldt_info
.entry_number
, ldt_info
.base_addr
, ldt_info
.limit
));
654 return x86_set_ldt1(l
, &sl
, &d
);
657 #endif /* USER_LDT */
660 linux_sys_modify_ldt(struct lwp
*l
, const struct linux_sys_modify_ldt_args
*uap
, register_t
*retval
)
663 syscallarg(int) func;
664 syscallarg(void *) ptr;
665 syscallarg(size_t) bytecount;
668 switch (SCARG(uap
, func
)) {
671 return linux_read_ldt(l
, (const void *)uap
, retval
);
673 return linux_write_ldt(l
, (const void *)uap
, 1);
676 return (linux_read_default_ldt(l
, (const void *)uap
, retval
);
681 return linux_write_ldt(l
, (const void *)uap
, 0);
682 #endif /* USER_LDT */
690 * XXX Pathetic hack to make svgalib work. This will fake the major
691 * device number of an opened VT so that svgalib likes it. grmbl.
692 * Should probably do it 'wrong the right way' and use a mapping
693 * array for all major device numbers, and map linux_mknod too.
696 linux_fakedev(dev_t dev
, int raw
)
698 extern const struct cdevsw ptc_cdevsw
, pts_cdevsw
;
699 const struct cdevsw
*cd
= cdevsw_lookup(dev
);
703 extern const struct cdevsw wsdisplay_cdevsw
;
704 if (cd
== &wsdisplay_cdevsw
)
705 return makedev(LINUX_CONS_MAJOR
, (minor(dev
) + 1));
709 if (cd
== &ptc_cdevsw
)
710 return makedev(LINUX_PTC_MAJOR
, minor(dev
));
711 if (cd
== &pts_cdevsw
)
712 return makedev(LINUX_PTS_MAJOR
, minor(dev
));
719 * That's not complete, but enough to get an X server running.
722 static const u_short plain_map
[NR_KEYS
] = {
723 0x0200, 0x001b, 0x0031, 0x0032, 0x0033, 0x0034, 0x0035, 0x0036,
724 0x0037, 0x0038, 0x0039, 0x0030, 0x002d, 0x003d, 0x007f, 0x0009,
725 0x0b71, 0x0b77, 0x0b65, 0x0b72, 0x0b74, 0x0b79, 0x0b75, 0x0b69,
726 0x0b6f, 0x0b70, 0x005b, 0x005d, 0x0201, 0x0702, 0x0b61, 0x0b73,
727 0x0b64, 0x0b66, 0x0b67, 0x0b68, 0x0b6a, 0x0b6b, 0x0b6c, 0x003b,
728 0x0027, 0x0060, 0x0700, 0x005c, 0x0b7a, 0x0b78, 0x0b63, 0x0b76,
729 0x0b62, 0x0b6e, 0x0b6d, 0x002c, 0x002e, 0x002f, 0x0700, 0x030c,
730 0x0703, 0x0020, 0x0207, 0x0100, 0x0101, 0x0102, 0x0103, 0x0104,
731 0x0105, 0x0106, 0x0107, 0x0108, 0x0109, 0x0208, 0x0209, 0x0307,
732 0x0308, 0x0309, 0x030b, 0x0304, 0x0305, 0x0306, 0x030a, 0x0301,
733 0x0302, 0x0303, 0x0300, 0x0310, 0x0206, 0x0200, 0x003c, 0x010a,
734 0x010b, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200,
735 0x030e, 0x0702, 0x030d, 0x001c, 0x0701, 0x0205, 0x0114, 0x0603,
736 0x0118, 0x0601, 0x0602, 0x0117, 0x0600, 0x0119, 0x0115, 0x0116,
737 0x011a, 0x010c, 0x010d, 0x011b, 0x011c, 0x0110, 0x0311, 0x011d,
738 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200,
739 }, shift_map
[NR_KEYS
] = {
740 0x0200, 0x001b, 0x0021, 0x0040, 0x0023, 0x0024, 0x0025, 0x005e,
741 0x0026, 0x002a, 0x0028, 0x0029, 0x005f, 0x002b, 0x007f, 0x0009,
742 0x0b51, 0x0b57, 0x0b45, 0x0b52, 0x0b54, 0x0b59, 0x0b55, 0x0b49,
743 0x0b4f, 0x0b50, 0x007b, 0x007d, 0x0201, 0x0702, 0x0b41, 0x0b53,
744 0x0b44, 0x0b46, 0x0b47, 0x0b48, 0x0b4a, 0x0b4b, 0x0b4c, 0x003a,
745 0x0022, 0x007e, 0x0700, 0x007c, 0x0b5a, 0x0b58, 0x0b43, 0x0b56,
746 0x0b42, 0x0b4e, 0x0b4d, 0x003c, 0x003e, 0x003f, 0x0700, 0x030c,
747 0x0703, 0x0020, 0x0207, 0x010a, 0x010b, 0x010c, 0x010d, 0x010e,
748 0x010f, 0x0110, 0x0111, 0x0112, 0x0113, 0x0213, 0x0203, 0x0307,
749 0x0308, 0x0309, 0x030b, 0x0304, 0x0305, 0x0306, 0x030a, 0x0301,
750 0x0302, 0x0303, 0x0300, 0x0310, 0x0206, 0x0200, 0x003e, 0x010a,
751 0x010b, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200,
752 0x030e, 0x0702, 0x030d, 0x0200, 0x0701, 0x0205, 0x0114, 0x0603,
753 0x020b, 0x0601, 0x0602, 0x0117, 0x0600, 0x020a, 0x0115, 0x0116,
754 0x011a, 0x010c, 0x010d, 0x011b, 0x011c, 0x0110, 0x0311, 0x011d,
755 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200,
756 }, altgr_map
[NR_KEYS
] = {
757 0x0200, 0x0200, 0x0200, 0x0040, 0x0200, 0x0024, 0x0200, 0x0200,
758 0x007b, 0x005b, 0x005d, 0x007d, 0x005c, 0x0200, 0x0200, 0x0200,
759 0x0b71, 0x0b77, 0x0918, 0x0b72, 0x0b74, 0x0b79, 0x0b75, 0x0b69,
760 0x0b6f, 0x0b70, 0x0200, 0x007e, 0x0201, 0x0702, 0x0914, 0x0b73,
761 0x0917, 0x0919, 0x0b67, 0x0b68, 0x0b6a, 0x0b6b, 0x0b6c, 0x0200,
762 0x0200, 0x0200, 0x0700, 0x0200, 0x0b7a, 0x0b78, 0x0916, 0x0b76,
763 0x0915, 0x0b6e, 0x0b6d, 0x0200, 0x0200, 0x0200, 0x0700, 0x030c,
764 0x0703, 0x0200, 0x0207, 0x050c, 0x050d, 0x050e, 0x050f, 0x0510,
765 0x0511, 0x0512, 0x0513, 0x0514, 0x0515, 0x0208, 0x0202, 0x0911,
766 0x0912, 0x0913, 0x030b, 0x090e, 0x090f, 0x0910, 0x030a, 0x090b,
767 0x090c, 0x090d, 0x090a, 0x0310, 0x0206, 0x0200, 0x007c, 0x0516,
768 0x0517, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200,
769 0x030e, 0x0702, 0x030d, 0x0200, 0x0701, 0x0205, 0x0114, 0x0603,
770 0x0118, 0x0601, 0x0602, 0x0117, 0x0600, 0x0119, 0x0115, 0x0116,
771 0x011a, 0x010c, 0x010d, 0x011b, 0x011c, 0x0110, 0x0311, 0x011d,
772 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200,
773 }, ctrl_map
[NR_KEYS
] = {
774 0x0200, 0x0200, 0x0200, 0x0000, 0x001b, 0x001c, 0x001d, 0x001e,
775 0x001f, 0x007f, 0x0200, 0x0200, 0x001f, 0x0200, 0x0008, 0x0200,
776 0x0011, 0x0017, 0x0005, 0x0012, 0x0014, 0x0019, 0x0015, 0x0009,
777 0x000f, 0x0010, 0x001b, 0x001d, 0x0201, 0x0702, 0x0001, 0x0013,
778 0x0004, 0x0006, 0x0007, 0x0008, 0x000a, 0x000b, 0x000c, 0x0200,
779 0x0007, 0x0000, 0x0700, 0x001c, 0x001a, 0x0018, 0x0003, 0x0016,
780 0x0002, 0x000e, 0x000d, 0x0200, 0x020e, 0x007f, 0x0700, 0x030c,
781 0x0703, 0x0000, 0x0207, 0x0100, 0x0101, 0x0102, 0x0103, 0x0104,
782 0x0105, 0x0106, 0x0107, 0x0108, 0x0109, 0x0208, 0x0204, 0x0307,
783 0x0308, 0x0309, 0x030b, 0x0304, 0x0305, 0x0306, 0x030a, 0x0301,
784 0x0302, 0x0303, 0x0300, 0x0310, 0x0206, 0x0200, 0x0200, 0x010a,
785 0x010b, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200,
786 0x030e, 0x0702, 0x030d, 0x001c, 0x0701, 0x0205, 0x0114, 0x0603,
787 0x0118, 0x0601, 0x0602, 0x0117, 0x0600, 0x0119, 0x0115, 0x0116,
788 0x011a, 0x010c, 0x010d, 0x011b, 0x011c, 0x0110, 0x0311, 0x011d,
789 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200, 0x0200,
792 const u_short
* const linux_keytabs
[] = {
793 plain_map
, shift_map
, altgr_map
, altgr_map
, ctrl_map
797 static struct biosdisk_info
*
798 fd2biosinfo(struct proc
*p
, struct file
*fp
)
804 struct nativedisk_info
*nip
;
805 struct disklist
*dl
= x86_alldisks
;
807 if (fp
->f_type
!= DTYPE_VNODE
)
809 vp
= (struct vnode
*)fp
->f_data
;
811 if (vp
->v_type
!= VBLK
)
814 blkname
= devsw_blk2name(major(vp
->v_rdev
));
815 snprintf(diskname
, sizeof diskname
, "%s%llu", blkname
,
816 (unsigned long long)DISKUNIT(vp
->v_rdev
));
818 for (i
= 0; i
< dl
->dl_nnativedisks
; i
++) {
819 nip
= &dl
->dl_nativedisks
[i
];
820 if (strcmp(diskname
, nip
->ni_devname
))
822 if (nip
->ni_nmatches
!= 0)
823 return &dl
->dl_biosdisks
[nip
->ni_biosmatches
[0]];
831 * We come here in a last attempt to satisfy a Linux ioctl() call
834 linux_machdepioctl(struct lwp
*l
, const struct linux_sys_ioctl_args
*uap
, register_t
*retval
)
838 syscallarg(u_long) com;
839 syscallarg(void *) data;
841 struct sys_ioctl_args bia
;
848 struct linux_hd_geometry hdg
;
849 struct linux_hd_big_geometry hdg_big
;
850 struct biosdisk_info
*bip
;
853 struct disklabel label
, *labp
;
854 struct partinfo partp
;
855 int (*ioctlf
)(struct file
*, u_long
, void *);
856 u_long start
, biostotal
, realtotal
;
857 u_char heads
, sectors
;
862 SCARG(&bia
, fd
) = fd
;
863 SCARG(&bia
, data
) = SCARG(uap
, data
);
864 com
= SCARG(uap
, com
);
866 if ((fp
= fd_getfile(fd
)) == NULL
)
871 case LINUX_KDGKBMODE
:
874 case LINUX_KDSKBMODE
:
876 if ((unsigned)SCARG(uap
, data
) == LINUX_K_MEDIUMRAW
)
877 SCARG(&bia
, data
) = (void *)K_RAW
;
879 case LINUX_KIOCSOUND
:
881 (void *)(((unsigned long)SCARG(&bia
, data
)) & 0xffff);
886 case LINUX_KDSETMODE
:
889 case LINUX_KDGETMODE
:
890 /* KD_* values are equal to the wscons numbers */
891 com
= WSDISPLAYIO_GMODE
;
896 case LINUX_KDDISABIO
:
905 case LINUX_VT_OPENQRY
:
908 case LINUX_VT_GETMODE
:
909 error
= fp
->f_ops
->fo_ioctl(fp
, VT_GETMODE
, &lvt
);
912 lvt
.relsig
= native_to_linux_signo
[lvt
.relsig
];
913 lvt
.acqsig
= native_to_linux_signo
[lvt
.acqsig
];
914 lvt
.frsig
= native_to_linux_signo
[lvt
.frsig
];
915 error
= copyout(&lvt
, SCARG(uap
, data
), sizeof (lvt
));
917 case LINUX_VT_SETMODE
:
918 error
= copyin(SCARG(uap
, data
), &lvt
, sizeof (lvt
));
921 lvt
.relsig
= linux_to_native_signo
[lvt
.relsig
];
922 lvt
.acqsig
= linux_to_native_signo
[lvt
.acqsig
];
923 lvt
.frsig
= linux_to_native_signo
[lvt
.frsig
];
924 error
= fp
->f_ops
->fo_ioctl(fp
, VT_SETMODE
, &lvt
);
926 case LINUX_VT_DISALLOCATE
:
927 /* XXX should use WSDISPLAYIO_DELSCREEN */
930 case LINUX_VT_RELDISP
:
933 case LINUX_VT_ACTIVATE
:
936 case LINUX_VT_WAITACTIVE
:
939 case LINUX_VT_GETSTATE
:
942 case LINUX_KDGKBTYPE
:
944 static const u_int8_t kb101
= KB_101
;
946 /* This is what Linux does. */
947 error
= copyout(&kb101
, SCARG(uap
, data
), 1);
952 * The Linux KDGKBENT ioctl is different from the
953 * SYSV original. So we handle it in machdep code.
954 * XXX We should use keyboard mapping information
955 * from wsdisplay, but this would be expensive.
957 if ((error
= copyin(SCARG(uap
, data
), &kbe
,
958 sizeof(struct kbentry
))))
960 if (kbe
.kb_table
>= sizeof(linux_keytabs
) / sizeof(u_short
*)
961 || kbe
.kb_index
>= NR_KEYS
) {
965 kbe
.kb_value
= linux_keytabs
[kbe
.kb_table
][kbe
.kb_index
];
966 error
= copyout(&kbe
, SCARG(uap
, data
),
967 sizeof(struct kbentry
));
970 case LINUX_HDIO_GETGEO
:
971 case LINUX_HDIO_GETGEO_BIG
:
973 * Try to mimic Linux behaviour: return the BIOS geometry
974 * if possible (extending its # of cylinders if it's beyond
975 * the 1023 limit), fall back to the MI geometry (i.e.
976 * the real geometry) if not found, by returning an
977 * error. See common/linux_hdio.c
979 bip
= fd2biosinfo(curproc
, fp
);
980 ioctlf
= fp
->f_ops
->fo_ioctl
;
981 error
= ioctlf(fp
, DIOCGDEFLABEL
, (void *)&label
);
982 error1
= ioctlf(fp
, DIOCGPART
, (void *)&partp
);
983 if (error
!= 0 && error1
!= 0) {
987 labp
= error
!= 0 ? &label
: partp
.disklab
;
988 start
= error1
!= 0 ? partp
.part
->p_offset
: 0;
989 if (bip
!= NULL
&& bip
->bi_head
!= 0 && bip
->bi_sec
!= 0
990 && bip
->bi_cyl
!= 0) {
991 heads
= bip
->bi_head
;
992 sectors
= bip
->bi_sec
;
993 cylinders
= bip
->bi_cyl
;
994 biostotal
= heads
* sectors
* cylinders
;
995 realtotal
= labp
->d_ntracks
* labp
->d_nsectors
*
997 if (realtotal
> biostotal
)
998 cylinders
= realtotal
/ (heads
* sectors
);
1000 heads
= labp
->d_ntracks
;
1001 cylinders
= labp
->d_ncylinders
;
1002 sectors
= labp
->d_nsectors
;
1004 if (com
== LINUX_HDIO_GETGEO
) {
1007 hdg
.cylinders
= cylinders
;
1008 hdg
.sectors
= sectors
;
1009 error
= copyout(&hdg
, SCARG(uap
, data
), sizeof hdg
);
1012 hdg_big
.start
= start
;
1013 hdg_big
.heads
= heads
;
1014 hdg_big
.cylinders
= cylinders
;
1015 hdg_big
.sectors
= sectors
;
1016 error
= copyout(&hdg_big
, SCARG(uap
, data
),
1023 * Unknown to us. If it's on a device, just pass it through
1024 * using PTIOCLINUX, the device itself might be able to
1025 * make some sense of it.
1026 * XXX hack: if the function returns EJUSTRETURN,
1027 * it has stuffed a sysctl return value in pt.data.
1029 ioctlf
= fp
->f_ops
->fo_ioctl
;
1030 pt
.com
= SCARG(uap
, com
);
1031 pt
.data
= SCARG(uap
, data
);
1032 error
= ioctlf(fp
, PTIOCLINUX
, &pt
);
1033 if (error
== EJUSTRETURN
) {
1034 retval
[0] = (register_t
)pt
.data
;
1038 if (error
== ENOTTY
) {
1039 DPRINTF(("linux_machdepioctl: invalid ioctl %08lx\n",
1044 SCARG(&bia
, com
) = com
;
1045 error
= sys_ioctl(curlwp
, &bia
, retval
);
1052 * Set I/O permissions for a process. Just set the maximum level
1053 * right away (ignoring the argument), otherwise we would have
1054 * to rely on I/O permission maps, which are not implemented.
1057 linux_sys_iopl(struct lwp
*l
, const struct linux_sys_iopl_args
*uap
, register_t
*retval
)
1060 syscallarg(int) level;
1062 struct trapframe
*fp
= l
->l_md
.md_regs
;
1064 if (kauth_authorize_machdep(l
->l_cred
, KAUTH_MACHDEP_IOPL
,
1065 NULL
, NULL
, NULL
, NULL
) != 0)
1067 fp
->tf_eflags
|= PSL_IOPL
;
1073 * See above. If a root process tries to set access to an I/O port,
1074 * just let it have the whole range.
1077 linux_sys_ioperm(struct lwp
*l
, const struct linux_sys_ioperm_args
*uap
, register_t
*retval
)
1080 syscallarg(unsigned int) lo;
1081 syscallarg(unsigned int) hi;
1082 syscallarg(int) val;
1084 struct trapframe
*fp
= l
->l_md
.md_regs
;
1086 if (kauth_authorize_machdep(l
->l_cred
, SCARG(uap
, val
) ?
1087 KAUTH_MACHDEP_IOPERM_SET
: KAUTH_MACHDEP_IOPERM_GET
, NULL
, NULL
,
1090 if (SCARG(uap
, val
))
1091 fp
->tf_eflags
|= PSL_IOPL
;
1097 linux_usertrap(struct lwp
*l
, vaddr_t trapaddr
,
1104 linux_get_uname_arch(void)
1106 static char uname_arch
[5] = "i386";
1108 if (uname_arch
[1] == '3')
1109 uname_arch
[1] += cpu_class
;
1115 linux_get_newtls(struct lwp
*l
)
1118 struct trapframe
*tf
= l
->l_md
.md_regs
;
1121 /* XXX: Implement me */
1126 linux_set_newtls(struct lwp
*l
, void *tls
)
1128 /* XXX: Implement me */