1 /* $NetBSD: linux_misc.c,v 1.212 2009/11/24 10:42:44 njoly Exp $ */
4 * Copyright (c) 1995, 1998, 1999, 2008 The NetBSD Foundation, Inc.
7 * This code is derived from software contributed to The NetBSD Foundation
8 * by Frank van der Linden and Eric Haszlakiewicz; by Jason R. Thorpe
9 * of the Numerical Aerospace Simulation Facility, NASA Ames Research Center.
11 * Redistribution and use in source and binary forms, with or without
12 * modification, are permitted provided that the following conditions
14 * 1. Redistributions of source code must retain the above copyright
15 * notice, this list of conditions and the following disclaimer.
16 * 2. Redistributions in binary form must reproduce the above copyright
17 * notice, this list of conditions and the following disclaimer in the
18 * documentation and/or other materials provided with the distribution.
20 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
21 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
22 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
23 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
24 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30 * POSSIBILITY OF SUCH DAMAGE.
34 * Linux compatibility module. Try to deal with various Linux system calls.
38 * These functions have been moved to multiarch to allow
39 * selection of which machines include them to be
40 * determined by the individual files.linux_<arch> files.
42 * Function in multiarch:
43 * linux_sys_break : linux_break.c
44 * linux_sys_alarm : linux_misc_notalpha.c
45 * linux_sys_getresgid : linux_misc_notalpha.c
46 * linux_sys_nice : linux_misc_notalpha.c
47 * linux_sys_readdir : linux_misc_notalpha.c
48 * linux_sys_setresgid : linux_misc_notalpha.c
49 * linux_sys_time : linux_misc_notalpha.c
50 * linux_sys_utime : linux_misc_notalpha.c
51 * linux_sys_waitpid : linux_misc_notalpha.c
52 * linux_sys_old_mmap : linux_oldmmap.c
53 * linux_sys_oldolduname : linux_oldolduname.c
54 * linux_sys_oldselect : linux_oldselect.c
55 * linux_sys_olduname : linux_olduname.c
56 * linux_sys_pipe : linux_pipe.c
59 #include <sys/cdefs.h>
60 __KERNEL_RCSID(0, "$NetBSD: linux_misc.c,v 1.212 2009/11/24 10:42:44 njoly Exp $");
62 #include <sys/param.h>
63 #include <sys/systm.h>
64 #include <sys/namei.h>
66 #include <sys/dirent.h>
69 #include <sys/filedesc.h>
70 #include <sys/ioctl.h>
71 #include <sys/kernel.h>
72 #include <sys/malloc.h>
75 #include <sys/mount.h>
77 #include <sys/reboot.h>
78 #include <sys/resource.h>
79 #include <sys/resourcevar.h>
80 #include <sys/select.h>
81 #include <sys/signal.h>
82 #include <sys/signalvar.h>
83 #include <sys/socket.h>
85 #include <sys/times.h>
86 #include <sys/vnode.h>
89 #include <sys/utsname.h>
90 #include <sys/unistd.h>
91 #include <sys/vfs_syscalls.h>
92 #include <sys/swap.h> /* for SWAP_ON */
93 #include <sys/sysctl.h> /* for KERN_DOMAINNAME */
94 #include <sys/kauth.h>
96 #include <sys/ptrace.h>
97 #include <machine/ptrace.h>
99 #include <sys/syscall.h>
100 #include <sys/syscallargs.h>
102 #include <compat/sys/resource.h>
104 #include <compat/linux/common/linux_machdep.h>
105 #include <compat/linux/common/linux_types.h>
106 #include <compat/linux/common/linux_signal.h>
107 #include <compat/linux/common/linux_ipc.h>
108 #include <compat/linux/common/linux_sem.h>
110 #include <compat/linux/linux_syscallargs.h>
112 #include <compat/linux/common/linux_fcntl.h>
113 #include <compat/linux/common/linux_mmap.h>
114 #include <compat/linux/common/linux_dirent.h>
115 #include <compat/linux/common/linux_util.h>
116 #include <compat/linux/common/linux_misc.h>
117 #ifndef COMPAT_LINUX32
118 #include <compat/linux/common/linux_statfs.h>
119 #include <compat/linux/common/linux_limit.h>
121 #include <compat/linux/common/linux_ptrace.h>
122 #include <compat/linux/common/linux_reboot.h>
123 #include <compat/linux/common/linux_emuldata.h>
125 #ifndef COMPAT_LINUX32
126 const int linux_ptrace_request_map
[] = {
127 LINUX_PTRACE_TRACEME
, PT_TRACE_ME
,
128 LINUX_PTRACE_PEEKTEXT
, PT_READ_I
,
129 LINUX_PTRACE_PEEKDATA
, PT_READ_D
,
130 LINUX_PTRACE_POKETEXT
, PT_WRITE_I
,
131 LINUX_PTRACE_POKEDATA
, PT_WRITE_D
,
132 LINUX_PTRACE_CONT
, PT_CONTINUE
,
133 LINUX_PTRACE_KILL
, PT_KILL
,
134 LINUX_PTRACE_ATTACH
, PT_ATTACH
,
135 LINUX_PTRACE_DETACH
, PT_DETACH
,
137 LINUX_PTRACE_SINGLESTEP
, PT_STEP
,
139 LINUX_PTRACE_SYSCALL
, PT_SYSCALL
,
143 const struct linux_mnttypes linux_fstypes
[] = {
144 { MOUNT_FFS
, LINUX_DEFAULT_SUPER_MAGIC
},
145 { MOUNT_NFS
, LINUX_NFS_SUPER_MAGIC
},
146 { MOUNT_MFS
, LINUX_DEFAULT_SUPER_MAGIC
},
147 { MOUNT_MSDOS
, LINUX_MSDOS_SUPER_MAGIC
},
148 { MOUNT_LFS
, LINUX_DEFAULT_SUPER_MAGIC
},
149 { MOUNT_FDESC
, LINUX_DEFAULT_SUPER_MAGIC
},
150 { MOUNT_NULL
, LINUX_DEFAULT_SUPER_MAGIC
},
151 { MOUNT_OVERLAY
, LINUX_DEFAULT_SUPER_MAGIC
},
152 { MOUNT_UMAP
, LINUX_DEFAULT_SUPER_MAGIC
},
153 { MOUNT_KERNFS
, LINUX_DEFAULT_SUPER_MAGIC
},
154 { MOUNT_PROCFS
, LINUX_PROC_SUPER_MAGIC
},
155 { MOUNT_AFS
, LINUX_DEFAULT_SUPER_MAGIC
},
156 { MOUNT_CD9660
, LINUX_ISOFS_SUPER_MAGIC
},
157 { MOUNT_UNION
, LINUX_DEFAULT_SUPER_MAGIC
},
158 { MOUNT_ADOSFS
, LINUX_ADFS_SUPER_MAGIC
},
159 { MOUNT_EXT2FS
, LINUX_EXT2_SUPER_MAGIC
},
160 { MOUNT_CFS
, LINUX_DEFAULT_SUPER_MAGIC
},
161 { MOUNT_CODA
, LINUX_CODA_SUPER_MAGIC
},
162 { MOUNT_FILECORE
, LINUX_DEFAULT_SUPER_MAGIC
},
163 { MOUNT_NTFS
, LINUX_DEFAULT_SUPER_MAGIC
},
164 { MOUNT_SMBFS
, LINUX_SMB_SUPER_MAGIC
},
165 { MOUNT_PTYFS
, LINUX_DEVPTS_SUPER_MAGIC
},
166 { MOUNT_TMPFS
, LINUX_TMPFS_SUPER_MAGIC
}
168 const int linux_fstypes_cnt
= sizeof(linux_fstypes
) / sizeof(linux_fstypes
[0]);
171 #define DPRINTF(a) uprintf a
176 /* Local linux_misc.c functions: */
177 static void linux_to_bsd_mmap_args(struct sys_mmap_args
*,
178 const struct linux_sys_mmap_args
*);
179 static int linux_mmap(struct lwp
*, const struct linux_sys_mmap_args
*,
180 register_t
*, off_t
);
184 * The information on a terminated (or stopped) process needs
185 * to be converted in order for Linux binaries to get a valid signal
189 bsd_to_linux_wstat(int st
)
194 if (WIFSIGNALED(st
)) {
196 if (sig
>= 0 && sig
< NSIG
)
197 st
= (st
& ~0177) | native_to_linux_signo
[sig
];
198 } else if (WIFSTOPPED(st
)) {
200 if (sig
>= 0 && sig
< NSIG
)
201 st
= (st
& ~0xff00) |
202 (native_to_linux_signo
[sig
] << 8);
208 * wait4(2). Passed on to the NetBSD call, surrounded by code to
209 * reserve some space for a NetBSD-style wait status, and converting
210 * it to what Linux wants.
213 linux_sys_wait4(struct lwp
*l
, const struct linux_sys_wait4_args
*uap
, register_t
*retval
)
217 syscallarg(int *) status;
218 syscallarg(int) options;
219 syscallarg(struct rusage50 *) rusage;
221 int error
, status
, options
, linux_options
, pid
= SCARG(uap
, pid
);
222 struct rusage50 ru50
;
226 linux_options
= SCARG(uap
, options
);
227 options
= WOPTSCHECKED
;
228 if (linux_options
& ~(LINUX_WAIT4_KNOWNFLAGS
))
231 if (linux_options
& LINUX_WAIT4_WNOHANG
)
233 if (linux_options
& LINUX_WAIT4_WUNTRACED
)
234 options
|= WUNTRACED
;
235 if (linux_options
& LINUX_WAIT4_WALL
)
237 if (linux_options
& LINUX_WAIT4_WCLONE
)
240 if (linux_options
& LINUX_WAIT4_WNOTHREAD
)
241 printf("WARNING: %s: linux process %d.%d called "
242 "waitpid with __WNOTHREAD set!",
243 __FILE__
, l
->l_proc
->p_pid
, l
->l_lid
);
247 error
= do_sys_wait(&pid
, &status
, options
,
248 SCARG(uap
, rusage
) != NULL
? &ru
: NULL
);
255 mutex_enter(p
->p_lock
);
256 sigdelset(&p
->p_sigpend
.sp_set
, SIGCHLD
); /* XXXAD ksiginfo leak */
257 mutex_exit(p
->p_lock
);
259 if (SCARG(uap
, rusage
) != NULL
) {
260 rusage_to_rusage50(&ru
, &ru50
);
261 error
= copyout(&ru
, SCARG(uap
, rusage
), sizeof(ru
));
264 if (error
== 0 && SCARG(uap
, status
) != NULL
) {
265 status
= bsd_to_linux_wstat(status
);
266 error
= copyout(&status
, SCARG(uap
, status
), sizeof status
);
273 * Linux brk(2). The check if the new address is >= the old one is
274 * done in the kernel in Linux. NetBSD does it in the library.
277 linux_sys_brk(struct lwp
*l
, const struct linux_sys_brk_args
*uap
, register_t
*retval
)
280 syscallarg(char *) nsize;
282 struct proc
*p
= l
->l_proc
;
283 char *nbrk
= SCARG(uap
, nsize
);
284 struct sys_obreak_args oba
;
285 struct vmspace
*vm
= p
->p_vmspace
;
286 struct linux_emuldata
*ed
= (struct linux_emuldata
*)p
->p_emuldata
;
288 SCARG(&oba
, nsize
) = nbrk
;
290 if ((void *) nbrk
> vm
->vm_daddr
&& sys_obreak(l
, &oba
, retval
) == 0)
291 ed
->s
->p_break
= (char*)nbrk
;
293 nbrk
= ed
->s
->p_break
;
295 retval
[0] = (register_t
)nbrk
;
301 * Implement the fs stat functions. Straightforward.
304 linux_sys_statfs(struct lwp
*l
, const struct linux_sys_statfs_args
*uap
, register_t
*retval
)
307 syscallarg(const char *) path;
308 syscallarg(struct linux_statfs *) sp;
311 struct linux_statfs ltmp
;
314 sb
= STATVFSBUF_GET();
315 error
= do_sys_pstatvfs(l
, SCARG(uap
, path
), ST_WAIT
, sb
);
317 bsd_to_linux_statfs(sb
, <mp
);
318 error
= copyout(<mp
, SCARG(uap
, sp
), sizeof ltmp
);
326 linux_sys_fstatfs(struct lwp
*l
, const struct linux_sys_fstatfs_args
*uap
, register_t
*retval
)
330 syscallarg(struct linux_statfs *) sp;
333 struct linux_statfs ltmp
;
336 sb
= STATVFSBUF_GET();
337 error
= do_sys_fstatvfs(l
, SCARG(uap
, fd
), ST_WAIT
, sb
);
339 bsd_to_linux_statfs(sb
, <mp
);
340 error
= copyout(<mp
, SCARG(uap
, sp
), sizeof ltmp
);
348 * uname(). Just copy the info from the various strings stored in the
349 * kernel, and put it in the Linux utsname structure. That structure
350 * is almost the same as the NetBSD one, only it has fields 65 characters
351 * long, and an extra domainname field.
354 linux_sys_uname(struct lwp
*l
, const struct linux_sys_uname_args
*uap
, register_t
*retval
)
357 syscallarg(struct linux_utsname *) up;
359 struct linux_utsname luts
;
361 strlcpy(luts
.l_sysname
, linux_sysname
, sizeof(luts
.l_sysname
));
362 strlcpy(luts
.l_nodename
, hostname
, sizeof(luts
.l_nodename
));
363 strlcpy(luts
.l_release
, linux_release
, sizeof(luts
.l_release
));
364 strlcpy(luts
.l_version
, linux_version
, sizeof(luts
.l_version
));
365 strlcpy(luts
.l_machine
, LINUX_UNAME_ARCH
, sizeof(luts
.l_machine
));
366 strlcpy(luts
.l_domainname
, domainname
, sizeof(luts
.l_domainname
));
368 return copyout(&luts
, SCARG(uap
, up
), sizeof(luts
));
371 /* Used directly on: alpha, mips, ppc, sparc, sparc64 */
372 /* Used indirectly on: arm, i386, m68k */
375 * New type Linux mmap call.
376 * Only called directly on machines with >= 6 free regs.
379 linux_sys_mmap(struct lwp
*l
, const struct linux_sys_mmap_args
*uap
, register_t
*retval
)
382 syscallarg(unsigned long) addr;
383 syscallarg(size_t) len;
384 syscallarg(int) prot;
385 syscallarg(int) flags;
387 syscallarg(linux_off_t) offset;
390 if (SCARG(uap
, offset
) & PAGE_MASK
)
393 return linux_mmap(l
, uap
, retval
, SCARG(uap
, offset
));
397 * Guts of most architectures' mmap64() implementations. This shares
398 * its list of arguments with linux_sys_mmap().
400 * The difference in linux_sys_mmap2() is that "offset" is actually
401 * (offset / pagesize), not an absolute byte count. This translation
402 * to pagesize offsets is done inside glibc between the mmap64() call
403 * point, and the actual syscall.
406 linux_sys_mmap2(struct lwp
*l
, const struct linux_sys_mmap2_args
*uap
, register_t
*retval
)
409 syscallarg(unsigned long) addr;
410 syscallarg(size_t) len;
411 syscallarg(int) prot;
412 syscallarg(int) flags;
414 syscallarg(linux_off_t) offset;
417 return linux_mmap(l
, uap
, retval
,
418 ((off_t
)SCARG(uap
, offset
)) << PAGE_SHIFT
);
422 * Massage arguments and call system mmap(2).
425 linux_mmap(struct lwp
*l
, const struct linux_sys_mmap_args
*uap
, register_t
*retval
, off_t offset
)
427 struct sys_mmap_args cma
;
431 linux_to_bsd_mmap_args(&cma
, uap
);
432 SCARG(&cma
, pos
) = offset
;
434 if (SCARG(uap
, flags
) & LINUX_MAP_GROWSDOWN
) {
436 * Request for stack-like memory segment. On linux, this
437 * works by mmap()ping (small) segment, which is automatically
438 * extended when page fault happens below the currently
439 * allocated area. We emulate this by allocating (typically
440 * bigger) segment sized at current stack size limit, and
441 * offsetting the requested and returned address accordingly.
442 * Since physical pages are only allocated on-demand, this
443 * is effectively identical.
445 rlim_t ssl
= l
->l_proc
->p_rlimit
[RLIMIT_STACK
].rlim_cur
;
447 if (SCARG(&cma
, len
) < ssl
) {
448 /* Compute the address offset */
449 mmoff
= round_page(ssl
) - SCARG(uap
, len
);
451 if (SCARG(&cma
, addr
))
452 SCARG(&cma
, addr
) = (char *)SCARG(&cma
, addr
) - mmoff
;
454 SCARG(&cma
, len
) = (size_t) ssl
;
458 error
= sys_mmap(l
, &cma
, retval
);
462 /* Shift the returned address for stack-like segment if necessary */
469 linux_to_bsd_mmap_args(struct sys_mmap_args
*cma
, const struct linux_sys_mmap_args
*uap
)
471 int flags
= MAP_TRYFIXED
, fl
= SCARG(uap
, flags
);
473 flags
|= cvtto_bsd_mask(fl
, LINUX_MAP_SHARED
, MAP_SHARED
);
474 flags
|= cvtto_bsd_mask(fl
, LINUX_MAP_PRIVATE
, MAP_PRIVATE
);
475 flags
|= cvtto_bsd_mask(fl
, LINUX_MAP_FIXED
, MAP_FIXED
);
476 flags
|= cvtto_bsd_mask(fl
, LINUX_MAP_ANON
, MAP_ANON
);
477 /* XXX XAX ERH: Any other flags here? There are more defined... */
479 SCARG(cma
, addr
) = (void *)SCARG(uap
, addr
);
480 SCARG(cma
, len
) = SCARG(uap
, len
);
481 SCARG(cma
, prot
) = SCARG(uap
, prot
);
482 if (SCARG(cma
, prot
) & VM_PROT_WRITE
) /* XXX */
483 SCARG(cma
, prot
) |= VM_PROT_READ
;
484 SCARG(cma
, flags
) = flags
;
485 SCARG(cma
, fd
) = flags
& MAP_ANON
? -1 : SCARG(uap
, fd
);
489 #define LINUX_MREMAP_MAYMOVE 1
490 #define LINUX_MREMAP_FIXED 2
493 linux_sys_mremap(struct lwp
*l
, const struct linux_sys_mremap_args
*uap
, register_t
*retval
)
496 syscallarg(void *) old_address;
497 syscallarg(size_t) old_size;
498 syscallarg(size_t) new_size;
499 syscallarg(u_long) flags;
512 flags
= SCARG(uap
, flags
);
513 oldva
= (vaddr_t
)SCARG(uap
, old_address
);
514 oldsize
= round_page(SCARG(uap
, old_size
));
515 newsize
= round_page(SCARG(uap
, new_size
));
516 if ((flags
& ~(LINUX_MREMAP_FIXED
|LINUX_MREMAP_MAYMOVE
)) != 0) {
520 if ((flags
& LINUX_MREMAP_FIXED
) != 0) {
521 if ((flags
& LINUX_MREMAP_MAYMOVE
) == 0) {
526 newva
= SCARG(uap
, new_address
);
527 uvmflags
= MAP_FIXED
;
532 } else if ((flags
& LINUX_MREMAP_MAYMOVE
) != 0) {
536 uvmflags
= MAP_FIXED
;
539 map
= &p
->p_vmspace
->vm_map
;
540 error
= uvm_mremap(map
, oldva
, oldsize
, map
, &newva
, newsize
, p
,
544 *retval
= (error
!= 0) ? 0 : (register_t
)newva
;
549 linux_sys_mprotect(struct lwp
*l
, const struct linux_sys_mprotect_args
*uap
, register_t
*retval
)
552 syscallarg(const void *) start;
553 syscallarg(unsigned long) len;
554 syscallarg(int) prot;
556 struct vm_map_entry
*entry
;
559 vaddr_t end
, start
, len
, stacklim
;
562 start
= (vaddr_t
)SCARG(uap
, start
);
563 len
= round_page(SCARG(uap
, len
));
564 prot
= SCARG(uap
, prot
);
565 grows
= prot
& (LINUX_PROT_GROWSDOWN
| LINUX_PROT_GROWSUP
);
569 if (start
& PAGE_MASK
)
576 if (prot
& ~(PROT_READ
| PROT_WRITE
| PROT_EXEC
))
578 if (grows
== (LINUX_PROT_GROWSDOWN
| LINUX_PROT_GROWSUP
))
582 map
= &p
->p_vmspace
->vm_map
;
585 VM_MAP_RANGE_CHECK(map
, start
, end
);
587 if (!uvm_map_lookup_entry(map
, start
, &entry
) || entry
->start
> start
) {
593 * Approximate the behaviour of PROT_GROWS{DOWN,UP}.
596 stacklim
= (vaddr_t
)p
->p_limit
->pl_rlimit
[RLIMIT_STACK
].rlim_cur
;
597 if (grows
& LINUX_PROT_GROWSDOWN
) {
598 if (USRSTACK
- stacklim
<= start
&& start
< USRSTACK
) {
599 start
= USRSTACK
- stacklim
;
601 start
= entry
->start
;
603 } else if (grows
& LINUX_PROT_GROWSUP
) {
604 if (USRSTACK
<= end
&& end
< USRSTACK
+ stacklim
) {
605 end
= USRSTACK
+ stacklim
;
611 return uvm_map_protect(map
, start
, end
, prot
, FALSE
);
615 * This code is partly stolen from src/lib/libc/compat-43/times.c
618 #define CONVTCK(r) (r.tv_sec * hz + r.tv_usec / (1000000 / hz))
621 linux_sys_times(struct lwp
*l
, const struct linux_sys_times_args
*uap
, register_t
*retval
)
624 syscallarg(struct times *) tms;
626 struct proc
*p
= l
->l_proc
;
630 if (SCARG(uap
, tms
)) {
631 struct linux_tms ltms
;
634 mutex_enter(p
->p_lock
);
635 calcru(p
, &ru
.ru_utime
, &ru
.ru_stime
, NULL
, NULL
);
636 ltms
.ltms_utime
= CONVTCK(ru
.ru_utime
);
637 ltms
.ltms_stime
= CONVTCK(ru
.ru_stime
);
638 ltms
.ltms_cutime
= CONVTCK(p
->p_stats
->p_cru
.ru_utime
);
639 ltms
.ltms_cstime
= CONVTCK(p
->p_stats
->p_cru
.ru_stime
);
640 mutex_exit(p
->p_lock
);
642 if ((error
= copyout(<ms
, SCARG(uap
, tms
), sizeof ltms
)))
648 retval
[0] = ((linux_clock_t
)(CONVTCK(t
)));
655 * Linux 'readdir' call. This code is mostly taken from the
656 * SunOS getdents call (see compat/sunos/sunos_misc.c), though
657 * an attempt has been made to keep it a little cleaner (failing
658 * miserably, because of the cruft needed if count 1 is passed).
660 * The d_off field should contain the offset of the next valid entry,
661 * but in Linux it has the offset of the entry itself. We emulate
664 * Read in BSD-style entries, convert them, and copy them out.
666 * Note that this doesn't handle union-mounted filesystems.
669 linux_sys_getdents(struct lwp
*l
, const struct linux_sys_getdents_args
*uap
, register_t
*retval
)
673 syscallarg(struct linux_dirent *) dent;
674 syscallarg(unsigned int) count;
678 char *inp
, *tbuf
; /* BSD-format */
679 int len
, reclen
; /* BSD-format */
680 char *outp
; /* Linux-format */
681 int resid
, linux_reclen
= 0; /* Linux-format */
685 struct linux_dirent idb
;
686 off_t off
; /* true file offset */
687 int buflen
, error
, eofflag
, nbytes
, oldcall
;
689 off_t
*cookiebuf
= NULL
, *cookie
;
692 /* fd_getvnode() will use the descriptor for us */
693 if ((error
= fd_getvnode(SCARG(uap
, fd
), &fp
)) != 0)
696 if ((fp
->f_flag
& FREAD
) == 0) {
701 vp
= (struct vnode
*)fp
->f_data
;
702 if (vp
->v_type
!= VDIR
) {
707 if ((error
= VOP_GETATTR(vp
, &va
, l
->l_cred
)))
710 nbytes
= SCARG(uap
, count
);
711 if (nbytes
== 1) { /* emulating old, broken behaviour */
712 nbytes
= sizeof (idb
);
713 buflen
= max(va
.va_blocksize
, nbytes
);
716 buflen
= min(MAXBSIZE
, nbytes
);
717 if (buflen
< va
.va_blocksize
)
718 buflen
= va
.va_blocksize
;
721 tbuf
= malloc(buflen
, M_TEMP
, M_WAITOK
);
723 vn_lock(vp
, LK_EXCLUSIVE
| LK_RETRY
);
726 aiov
.iov_base
= tbuf
;
727 aiov
.iov_len
= buflen
;
728 auio
.uio_iov
= &aiov
;
730 auio
.uio_rw
= UIO_READ
;
731 auio
.uio_resid
= buflen
;
732 auio
.uio_offset
= off
;
733 UIO_SETUP_SYSSPACE(&auio
);
735 * First we read into the malloc'ed buffer, then
736 * we massage it into user space, one record at a time.
738 error
= VOP_READDIR(vp
, &auio
, fp
->f_cred
, &eofflag
, &cookiebuf
,
744 outp
= (void *)SCARG(uap
, dent
);
746 if ((len
= buflen
- auio
.uio_resid
) == 0)
749 for (cookie
= cookiebuf
; len
> 0; len
-= reclen
) {
750 bdp
= (struct dirent
*)inp
;
751 reclen
= bdp
->d_reclen
;
753 panic("linux_readdir");
754 if (bdp
->d_fileno
== 0) {
755 inp
+= reclen
; /* it is a hole; squish it out */
762 linux_reclen
= LINUX_RECLEN(&idb
, bdp
->d_namlen
);
763 if (reclen
> len
|| resid
< linux_reclen
) {
764 /* entry too big for buffer, so just stop */
769 * Massage in place to make a Linux-shaped dirent (otherwise
770 * we have to worry about touching user memory outside of
771 * the copyout() call).
773 idb
.d_ino
= bdp
->d_fileno
;
775 * The old readdir() call misuses the offset and reclen fields.
778 idb
.d_off
= (linux_off_t
)linux_reclen
;
779 idb
.d_reclen
= (u_short
)bdp
->d_namlen
;
781 if (sizeof (idb
.d_off
) <= 4 && (off
>> 32) != 0) {
782 compat_offseterr(vp
, "linux_getdents");
786 idb
.d_off
= (linux_off_t
)off
;
787 idb
.d_reclen
= (u_short
)linux_reclen
;
789 strcpy(idb
.d_name
, bdp
->d_name
);
790 if ((error
= copyout((void *)&idb
, outp
, linux_reclen
)))
792 /* advance past this real entry */
795 off
= *cookie
++; /* each entry points to itself */
798 /* advance output past Linux-shaped entry */
799 outp
+= linux_reclen
;
800 resid
-= linux_reclen
;
805 /* if we squished out the whole block, try again */
806 if (outp
== (void *)SCARG(uap
, dent
))
808 fp
->f_offset
= off
; /* update the vnode offset */
811 nbytes
= resid
+ linux_reclen
;
814 *retval
= nbytes
- resid
;
818 free(cookiebuf
, M_TEMP
);
821 fd_putfile(SCARG(uap
, fd
));
826 * Even when just using registers to pass arguments to syscalls you can
827 * have 5 of them on the i386. So this newer version of select() does
831 linux_sys_select(struct lwp
*l
, const struct linux_sys_select_args
*uap
, register_t
*retval
)
834 syscallarg(int) nfds;
835 syscallarg(fd_set *) readfds;
836 syscallarg(fd_set *) writefds;
837 syscallarg(fd_set *) exceptfds;
838 syscallarg(struct timeval50 *) timeout;
841 return linux_select1(l
, retval
, SCARG(uap
, nfds
), SCARG(uap
, readfds
),
842 SCARG(uap
, writefds
), SCARG(uap
, exceptfds
),
843 (struct linux_timeval
*)SCARG(uap
, timeout
));
847 * Common code for the old and new versions of select(). A couple of
848 * things are important:
849 * 1) return the amount of time left in the 'timeout' parameter
850 * 2) select never returns ERESTART on Linux, always return EINTR
853 linux_select1(struct lwp
*l
, register_t
*retval
, int nfds
, fd_set
*readfds
,
854 fd_set
*writefds
, fd_set
*exceptfds
, struct linux_timeval
*timeout
)
856 struct timespec ts0
, ts1
, uts
, *ts
= NULL
;
857 struct linux_timeval ltv
;
861 * Store current time for computation of the amount of
865 if ((error
= copyin(timeout
, <v
, sizeof(ltv
))))
867 uts
.tv_sec
= ltv
.tv_sec
;
868 uts
.tv_nsec
= ltv
.tv_usec
* 1000;
869 if (itimespecfix(&uts
)) {
871 * The timeval was invalid. Convert it to something
872 * valid that will act as it does under Linux.
874 uts
.tv_sec
+= uts
.tv_nsec
/ 1000000000;
875 uts
.tv_nsec
%= 1000000000;
876 if (uts
.tv_nsec
< 0) {
878 uts
.tv_nsec
+= 1000000000;
887 error
= selcommon(retval
, nfds
, readfds
, writefds
, exceptfds
, ts
, NULL
);
891 * See fs/select.c in the Linux kernel. Without this,
892 * Maelstrom doesn't work.
894 if (error
== ERESTART
)
902 * Compute how much time was left of the timeout,
903 * by subtracting the current time and the time
904 * before we started the call, and subtracting
905 * that result from the user-supplied value.
908 timespecsub(&ts1
, &ts0
, &ts1
);
909 timespecsub(&uts
, &ts1
, &uts
);
914 ltv
.tv_sec
= uts
.tv_sec
;
915 ltv
.tv_usec
= uts
.tv_nsec
/ 1000;
916 if ((error
= copyout(<v
, timeout
, sizeof(ltv
))))
924 * Set the 'personality' (emulation mode) for the current process. Only
925 * accept the Linux personality here (0). This call is needed because
926 * the Linux ELF crt0 issues it in an ugly kludge to make sure that
927 * ELF binaries run in Linux mode, not SVR4 mode.
930 linux_sys_personality(struct lwp
*l
, const struct linux_sys_personality_args
*uap
, register_t
*retval
)
933 syscallarg(unsigned long) per;
936 switch (SCARG(uap
, per
)) {
937 case LINUX_PER_LINUX
:
938 case LINUX_PER_QUERY
:
944 retval
[0] = LINUX_PER_LINUX
;
949 * We have nonexistent fsuid equal to uid.
950 * If modification is requested, refuse.
953 linux_sys_setfsuid(struct lwp
*l
, const struct linux_sys_setfsuid_args
*uap
, register_t
*retval
)
956 syscallarg(uid_t) uid;
960 uid
= SCARG(uap
, uid
);
961 if (kauth_cred_getuid(l
->l_cred
) != uid
)
962 return sys_nosys(l
, uap
, retval
);
969 linux_sys_setfsgid(struct lwp
*l
, const struct linux_sys_setfsgid_args
*uap
, register_t
*retval
)
972 syscallarg(gid_t) gid;
976 gid
= SCARG(uap
, gid
);
977 if (kauth_cred_getgid(l
->l_cred
) != gid
)
978 return sys_nosys(l
, uap
, retval
);
985 linux_sys_setresuid(struct lwp
*l
, const struct linux_sys_setresuid_args
*uap
, register_t
*retval
)
988 syscallarg(uid_t) ruid;
989 syscallarg(uid_t) euid;
990 syscallarg(uid_t) suid;
994 * Note: These checks are a little different than the NetBSD
995 * setreuid(2) call performs. This precisely follows the
996 * behavior of the Linux kernel.
999 return do_setresuid(l
, SCARG(uap
, ruid
), SCARG(uap
, euid
),
1001 ID_R_EQ_R
| ID_R_EQ_E
| ID_R_EQ_S
|
1002 ID_E_EQ_R
| ID_E_EQ_E
| ID_E_EQ_S
|
1003 ID_S_EQ_R
| ID_S_EQ_E
| ID_S_EQ_S
);
1007 linux_sys_getresuid(struct lwp
*l
, const struct linux_sys_getresuid_args
*uap
, register_t
*retval
)
1010 syscallarg(uid_t *) ruid;
1011 syscallarg(uid_t *) euid;
1012 syscallarg(uid_t *) suid;
1014 kauth_cred_t pc
= l
->l_cred
;
1019 * Linux copies these values out to userspace like so:
1022 * 2. If that succeeds, copy out euid.
1023 * 3. If both of those succeed, copy out suid.
1025 uid
= kauth_cred_getuid(pc
);
1026 if ((error
= copyout(&uid
, SCARG(uap
, ruid
), sizeof(uid_t
))) != 0)
1029 uid
= kauth_cred_geteuid(pc
);
1030 if ((error
= copyout(&uid
, SCARG(uap
, euid
), sizeof(uid_t
))) != 0)
1033 uid
= kauth_cred_getsvuid(pc
);
1035 return (copyout(&uid
, SCARG(uap
, suid
), sizeof(uid_t
)));
1039 linux_sys_ptrace(struct lwp
*l
, const struct linux_sys_ptrace_args
*uap
, register_t
*retval
)
1042 i386, m68k, powerpc: T=int
1043 alpha, amd64: T=long
1044 syscallarg(T) request;
1053 ptr
= linux_ptrace_request_map
;
1054 request
= SCARG(uap
, request
);
1056 if (*ptr
++ == request
) {
1057 struct sys_ptrace_args pta
;
1059 SCARG(&pta
, req
) = *ptr
;
1060 SCARG(&pta
, pid
) = SCARG(uap
, pid
);
1061 SCARG(&pta
, addr
) = (void *)SCARG(uap
, addr
);
1062 SCARG(&pta
, data
) = SCARG(uap
, data
);
1065 * Linux ptrace(PTRACE_CONT, pid, 0, 0) means actually
1066 * to continue where the process left off previously.
1067 * The same thing is achieved by addr == (void *) 1
1068 * on NetBSD, so rewrite 'addr' appropriately.
1070 if (request
== LINUX_PTRACE_CONT
&& SCARG(uap
, addr
)==0)
1071 SCARG(&pta
, addr
) = (void *) 1;
1073 error
= sysent
[SYS_ptrace
].sy_call(l
, &pta
, retval
);
1077 case LINUX_PTRACE_PEEKTEXT
:
1078 case LINUX_PTRACE_PEEKDATA
:
1079 error
= copyout (retval
,
1080 (void *)SCARG(uap
, data
),
1082 *retval
= SCARG(uap
, data
);
1092 return LINUX_SYS_PTRACE_ARCH(l
, uap
, retval
);
1096 linux_sys_reboot(struct lwp
*l
, const struct linux_sys_reboot_args
*uap
, register_t
*retval
)
1099 syscallarg(int) magic1;
1100 syscallarg(int) magic2;
1101 syscallarg(int) cmd;
1102 syscallarg(void *) arg;
1104 struct sys_reboot_args
/* {
1105 syscallarg(int) opt;
1106 syscallarg(char *) bootstr;
1110 if ((error
= kauth_authorize_system(l
->l_cred
,
1111 KAUTH_SYSTEM_REBOOT
, 0, NULL
, NULL
, NULL
)) != 0)
1114 if (SCARG(uap
, magic1
) != LINUX_REBOOT_MAGIC1
)
1116 if (SCARG(uap
, magic2
) != LINUX_REBOOT_MAGIC2
&&
1117 SCARG(uap
, magic2
) != LINUX_REBOOT_MAGIC2A
&&
1118 SCARG(uap
, magic2
) != LINUX_REBOOT_MAGIC2B
)
1121 switch ((unsigned long)SCARG(uap
, cmd
)) {
1122 case LINUX_REBOOT_CMD_RESTART
:
1123 SCARG(&sra
, opt
) = RB_AUTOBOOT
;
1125 case LINUX_REBOOT_CMD_HALT
:
1126 SCARG(&sra
, opt
) = RB_HALT
;
1128 case LINUX_REBOOT_CMD_POWER_OFF
:
1129 SCARG(&sra
, opt
) = RB_HALT
|RB_POWERDOWN
;
1131 case LINUX_REBOOT_CMD_RESTART2
:
1132 /* Reboot with an argument. */
1133 SCARG(&sra
, opt
) = RB_AUTOBOOT
|RB_STRING
;
1134 SCARG(&sra
, bootstr
) = SCARG(uap
, arg
);
1136 case LINUX_REBOOT_CMD_CAD_ON
:
1137 return(EINVAL
); /* We don't implement ctrl-alt-delete */
1138 case LINUX_REBOOT_CMD_CAD_OFF
:
1144 return(sys_reboot(l
, &sra
, retval
));
1148 * Copy of compat_12_sys_swapon().
1151 linux_sys_swapon(struct lwp
*l
, const struct linux_sys_swapon_args
*uap
, register_t
*retval
)
1154 syscallarg(const char *) name;
1156 struct sys_swapctl_args ua
;
1158 SCARG(&ua
, cmd
) = SWAP_ON
;
1159 SCARG(&ua
, arg
) = (void *)__UNCONST(SCARG(uap
, name
));
1160 SCARG(&ua
, misc
) = 0; /* priority */
1161 return (sys_swapctl(l
, &ua
, retval
));
1165 * Stop swapping to the file or block device specified by path.
1168 linux_sys_swapoff(struct lwp
*l
, const struct linux_sys_swapoff_args
*uap
, register_t
*retval
)
1171 syscallarg(const char *) path;
1173 struct sys_swapctl_args ua
;
1175 SCARG(&ua
, cmd
) = SWAP_OFF
;
1176 SCARG(&ua
, arg
) = __UNCONST(SCARG(uap
, path
)); /*XXXUNCONST*/
1177 return (sys_swapctl(l
, &ua
, retval
));
1181 * Copy of compat_09_sys_setdomainname()
1185 linux_sys_setdomainname(struct lwp
*l
, const struct linux_sys_setdomainname_args
*uap
, register_t
*retval
)
1188 syscallarg(char *) domainname;
1189 syscallarg(int) len;
1194 name
[1] = KERN_DOMAINNAME
;
1195 return (old_sysctl(&name
[0], 2, 0, 0, SCARG(uap
, domainname
),
1196 SCARG(uap
, len
), l
));
1204 linux_sys_sysinfo(struct lwp
*l
, const struct linux_sys_sysinfo_args
*uap
, register_t
*retval
)
1207 syscallarg(struct linux_sysinfo *) arg;
1209 struct linux_sysinfo si
;
1212 si
.uptime
= time_uptime
;
1214 si
.loads
[0] = la
->ldavg
[0] * LINUX_SYSINFO_LOADS_SCALE
/ la
->fscale
;
1215 si
.loads
[1] = la
->ldavg
[1] * LINUX_SYSINFO_LOADS_SCALE
/ la
->fscale
;
1216 si
.loads
[2] = la
->ldavg
[2] * LINUX_SYSINFO_LOADS_SCALE
/ la
->fscale
;
1217 si
.totalram
= ctob((u_long
)physmem
);
1218 si
.freeram
= (u_long
)uvmexp
.free
* uvmexp
.pagesize
;
1219 si
.sharedram
= 0; /* XXX */
1220 si
.bufferram
= (u_long
)uvmexp
.filepages
* uvmexp
.pagesize
;
1221 si
.totalswap
= (u_long
)uvmexp
.swpages
* uvmexp
.pagesize
;
1223 (u_long
)(uvmexp
.swpages
- uvmexp
.swpginuse
) * uvmexp
.pagesize
;
1226 /* The following are only present in newer Linux kernels. */
1231 return (copyout(&si
, SCARG(uap
, arg
), sizeof si
));
1235 linux_sys_getrlimit(struct lwp
*l
, const struct linux_sys_getrlimit_args
*uap
, register_t
*retval
)
1238 syscallarg(int) which;
1239 # ifdef LINUX_LARGEFILE64
1240 syscallarg(struct rlimit *) rlp;
1242 syscallarg(struct orlimit *) rlp;
1245 # ifdef LINUX_LARGEFILE64
1252 which
= linux_to_bsd_limit(SCARG(uap
, which
));
1256 bsd_to_linux_rlimit(&orl
, &l
->l_proc
->p_rlimit
[which
]);
1258 return copyout(&orl
, SCARG(uap
, rlp
), sizeof(orl
));
1262 linux_sys_setrlimit(struct lwp
*l
, const struct linux_sys_setrlimit_args
*uap
, register_t
*retval
)
1265 syscallarg(int) which;
1266 # ifdef LINUX_LARGEFILE64
1267 syscallarg(struct rlimit *) rlp;
1269 syscallarg(struct orlimit *) rlp;
1273 # ifdef LINUX_LARGEFILE64
1281 if ((error
= copyin(SCARG(uap
, rlp
), &orl
, sizeof(orl
))) != 0)
1284 which
= linux_to_bsd_limit(SCARG(uap
, which
));
1288 linux_to_bsd_rlimit(&rl
, &orl
);
1289 return dosetrlimit(l
, l
->l_proc
, which
, &rl
);
1292 # if !defined(__mips__) && !defined(__amd64__)
1293 /* XXX: this doesn't look 100% common, at least mips doesn't have it */
1295 linux_sys_ugetrlimit(struct lwp
*l
, const struct linux_sys_ugetrlimit_args
*uap
, register_t
*retval
)
1297 return linux_sys_getrlimit(l
, (const void *)uap
, retval
);
1302 * This gets called for unsupported syscalls. The difference to sys_nosys()
1303 * is that process does not get SIGSYS, the call just returns with ENOSYS.
1304 * This is the way Linux does it and glibc depends on this behaviour.
1307 linux_sys_nosys(struct lwp
*l
, const void *v
, register_t
*retval
)
1313 linux_sys_getpriority(struct lwp
*l
, const struct linux_sys_getpriority_args
*uap
, register_t
*retval
)
1316 syscallarg(int) which;
1317 syscallarg(int) who;
1319 struct sys_getpriority_args bsa
;
1322 SCARG(&bsa
, which
) = SCARG(uap
, which
);
1323 SCARG(&bsa
, who
) = SCARG(uap
, who
);
1325 if ((error
= sys_getpriority(l
, &bsa
, retval
)))
1328 *retval
= NZERO
- *retval
;
1333 #endif /* !COMPAT_LINUX32 */