Try to fixup the mess of mdoc(7)/man(7) mixture as created by the merge.
[netbsd-mini2440.git] / sys / kern / sys_descrip.c
blobf12fd5cd5bb008eb3332cc9eca45332ce26e6b2e
1 /* $NetBSD: sys_descrip.c,v 1.16 2009/06/10 23:48:10 yamt Exp $ */
3 /*-
4 * Copyright (c) 2008 The NetBSD Foundation, Inc.
5 * All rights reserved.
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
16 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
17 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
18 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
19 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
20 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
26 * POSSIBILITY OF SUCH DAMAGE.
30 * Copyright (c) 1982, 1986, 1989, 1991, 1993
31 * The Regents of the University of California. All rights reserved.
32 * (c) UNIX System Laboratories, Inc.
33 * All or some portions of this file are derived from material licensed
34 * to the University of California by American Telephone and Telegraph
35 * Co. or Unix System Laboratories, Inc. and are reproduced herein with
36 * the permission of UNIX System Laboratories, Inc.
38 * Redistribution and use in source and binary forms, with or without
39 * modification, are permitted provided that the following conditions
40 * are met:
41 * 1. Redistributions of source code must retain the above copyright
42 * notice, this list of conditions and the following disclaimer.
43 * 2. Redistributions in binary form must reproduce the above copyright
44 * notice, this list of conditions and the following disclaimer in the
45 * documentation and/or other materials provided with the distribution.
46 * 3. Neither the name of the University nor the names of its contributors
47 * may be used to endorse or promote products derived from this software
48 * without specific prior written permission.
50 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
51 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
52 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
53 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
54 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
55 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
56 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
57 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
58 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
59 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
60 * SUCH DAMAGE.
62 * @(#)kern_descrip.c 8.8 (Berkeley) 2/14/95
66 * System calls on descriptors.
69 #include <sys/cdefs.h>
70 __KERNEL_RCSID(0, "$NetBSD: sys_descrip.c,v 1.16 2009/06/10 23:48:10 yamt Exp $");
72 #include <sys/param.h>
73 #include <sys/systm.h>
74 #include <sys/filedesc.h>
75 #include <sys/kernel.h>
76 #include <sys/vnode.h>
77 #include <sys/proc.h>
78 #include <sys/file.h>
79 #include <sys/namei.h>
80 #include <sys/socket.h>
81 #include <sys/socketvar.h>
82 #include <sys/stat.h>
83 #include <sys/ioctl.h>
84 #include <sys/fcntl.h>
85 #include <sys/kmem.h>
86 #include <sys/pool.h>
87 #include <sys/syslog.h>
88 #include <sys/unistd.h>
89 #include <sys/resourcevar.h>
90 #include <sys/conf.h>
91 #include <sys/event.h>
92 #include <sys/kauth.h>
93 #include <sys/atomic.h>
94 #include <sys/mount.h>
95 #include <sys/syscallargs.h>
97 #include <uvm/uvm_readahead.h>
100 * Duplicate a file descriptor.
103 sys_dup(struct lwp *l, const struct sys_dup_args *uap, register_t *retval)
105 /* {
106 syscallarg(int) fd;
107 } */
108 int new, error, old;
109 file_t *fp;
111 old = SCARG(uap, fd);
113 if ((fp = fd_getfile(old)) == NULL) {
114 return EBADF;
116 error = fd_dup(fp, 0, &new, false);
117 fd_putfile(old);
118 *retval = new;
119 return error;
123 * Duplicate a file descriptor to a particular value.
126 sys_dup2(struct lwp *l, const struct sys_dup2_args *uap, register_t *retval)
128 /* {
129 syscallarg(int) from;
130 syscallarg(int) to;
131 } */
132 int old, new, error;
133 file_t *fp;
135 old = SCARG(uap, from);
136 new = SCARG(uap, to);
138 if ((fp = fd_getfile(old)) == NULL) {
139 return EBADF;
141 mutex_enter(&fp->f_lock);
142 fp->f_count++;
143 mutex_exit(&fp->f_lock);
144 fd_putfile(old);
146 if ((u_int)new >= curproc->p_rlimit[RLIMIT_NOFILE].rlim_cur ||
147 (u_int)new >= maxfiles) {
148 error = EBADF;
149 } else if (old == new) {
150 error = 0;
151 } else {
152 error = fd_dup2(fp, new);
154 closef(fp);
155 *retval = new;
157 return error;
161 * fcntl call which is being passed to the file's fs.
163 static int
164 fcntl_forfs(int fd, file_t *fp, int cmd, void *arg)
166 int error;
167 u_int size;
168 void *data, *memp;
169 #define STK_PARAMS 128
170 char stkbuf[STK_PARAMS];
172 if ((fp->f_flag & (FREAD | FWRITE)) == 0)
173 return (EBADF);
176 * Interpret high order word to find amount of data to be
177 * copied to/from the user's address space.
179 size = (size_t)F_PARAM_LEN(cmd);
180 if (size > F_PARAM_MAX)
181 return (EINVAL);
182 memp = NULL;
183 if (size > sizeof(stkbuf)) {
184 memp = kmem_alloc(size, KM_SLEEP);
185 data = memp;
186 } else
187 data = stkbuf;
188 if (cmd & F_FSIN) {
189 if (size) {
190 error = copyin(arg, data, size);
191 if (error) {
192 if (memp)
193 kmem_free(memp, size);
194 return (error);
196 } else
197 *(void **)data = arg;
198 } else if ((cmd & F_FSOUT) != 0 && size != 0) {
200 * Zero the buffer so the user always
201 * gets back something deterministic.
203 memset(data, 0, size);
204 } else if (cmd & F_FSVOID)
205 *(void **)data = arg;
208 error = (*fp->f_ops->fo_fcntl)(fp, cmd, data);
211 * Copy any data to user, size was
212 * already set and checked above.
214 if (error == 0 && (cmd & F_FSOUT) && size)
215 error = copyout(data, arg, size);
216 if (memp)
217 kmem_free(memp, size);
218 return (error);
222 do_fcntl_lock(int fd, int cmd, struct flock *fl)
224 file_t *fp;
225 vnode_t *vp;
226 proc_t *p;
227 int error, flg;
229 if ((fp = fd_getfile(fd)) == NULL)
230 return EBADF;
231 if (fp->f_type != DTYPE_VNODE) {
232 fd_putfile(fd);
233 return EINVAL;
235 vp = fp->f_data;
236 if (fl->l_whence == SEEK_CUR)
237 fl->l_start += fp->f_offset;
239 flg = F_POSIX;
240 p = curproc;
242 switch (cmd) {
243 case F_SETLKW:
244 flg |= F_WAIT;
245 /* Fall into F_SETLK */
247 case F_SETLK:
248 switch (fl->l_type) {
249 case F_RDLCK:
250 if ((fp->f_flag & FREAD) == 0) {
251 error = EBADF;
252 break;
254 if ((p->p_flag & PK_ADVLOCK) == 0) {
255 mutex_enter(p->p_lock);
256 p->p_flag |= PK_ADVLOCK;
257 mutex_exit(p->p_lock);
259 error = VOP_ADVLOCK(vp, p, F_SETLK, fl, flg);
260 break;
262 case F_WRLCK:
263 if ((fp->f_flag & FWRITE) == 0) {
264 error = EBADF;
265 break;
267 if ((p->p_flag & PK_ADVLOCK) == 0) {
268 mutex_enter(p->p_lock);
269 p->p_flag |= PK_ADVLOCK;
270 mutex_exit(p->p_lock);
272 error = VOP_ADVLOCK(vp, p, F_SETLK, fl, flg);
273 break;
275 case F_UNLCK:
276 error = VOP_ADVLOCK(vp, p, F_UNLCK, fl, F_POSIX);
277 break;
279 default:
280 error = EINVAL;
281 break;
283 break;
285 case F_GETLK:
286 if (fl->l_type != F_RDLCK &&
287 fl->l_type != F_WRLCK &&
288 fl->l_type != F_UNLCK) {
289 error = EINVAL;
290 break;
292 error = VOP_ADVLOCK(vp, p, F_GETLK, fl, F_POSIX);
293 break;
295 default:
296 error = EINVAL;
297 break;
300 fd_putfile(fd);
301 return error;
305 * The file control system call.
308 sys_fcntl(struct lwp *l, const struct sys_fcntl_args *uap, register_t *retval)
310 /* {
311 syscallarg(int) fd;
312 syscallarg(int) cmd;
313 syscallarg(void *) arg;
314 } */
315 int fd, i, tmp, error, cmd, newmin;
316 filedesc_t *fdp;
317 file_t *fp;
318 fdfile_t *ff;
319 struct flock fl;
321 fd = SCARG(uap, fd);
322 cmd = SCARG(uap, cmd);
323 fdp = l->l_fd;
324 error = 0;
326 switch (cmd) {
327 case F_CLOSEM:
328 if (fd < 0)
329 return EBADF;
330 while ((i = fdp->fd_lastfile) >= fd) {
331 if (fd_getfile(i) == NULL) {
332 /* Another thread has updated. */
333 continue;
335 fd_close(i);
337 return 0;
339 case F_MAXFD:
340 *retval = fdp->fd_lastfile;
341 return 0;
343 case F_SETLKW:
344 case F_SETLK:
345 case F_GETLK:
346 error = copyin(SCARG(uap, arg), &fl, sizeof(fl));
347 if (error)
348 return error;
349 error = do_fcntl_lock(fd, cmd, &fl);
350 if (cmd == F_GETLK && error == 0)
351 error = copyout(&fl, SCARG(uap, arg), sizeof(fl));
352 return error;
354 default:
355 /* Handled below */
356 break;
359 if ((fp = fd_getfile(fd)) == NULL)
360 return (EBADF);
361 ff = fdp->fd_dt->dt_ff[fd];
363 if ((cmd & F_FSCTL)) {
364 error = fcntl_forfs(fd, fp, cmd, SCARG(uap, arg));
365 fd_putfile(fd);
366 return error;
369 switch (cmd) {
370 case F_DUPFD:
371 newmin = (long)SCARG(uap, arg);
372 if ((u_int)newmin >=
373 l->l_proc->p_rlimit[RLIMIT_NOFILE].rlim_cur ||
374 (u_int)newmin >= maxfiles) {
375 fd_putfile(fd);
376 return EINVAL;
378 error = fd_dup(fp, newmin, &i, false);
379 *retval = i;
380 break;
382 case F_GETFD:
383 *retval = ff->ff_exclose;
384 break;
386 case F_SETFD:
387 if ((long)SCARG(uap, arg) & FD_CLOEXEC) {
388 ff->ff_exclose = true;
389 fdp->fd_exclose = true;
390 } else {
391 ff->ff_exclose = false;
393 break;
395 case F_GETFL:
396 *retval = OFLAGS(fp->f_flag);
397 break;
399 case F_SETFL:
400 /* XXX not guaranteed to be atomic. */
401 tmp = FFLAGS((long)SCARG(uap, arg)) & FCNTLFLAGS;
402 error = (*fp->f_ops->fo_fcntl)(fp, F_SETFL, &tmp);
403 if (error)
404 break;
405 i = tmp ^ fp->f_flag;
406 if (i & FNONBLOCK) {
407 int flgs = tmp & FNONBLOCK;
408 error = (*fp->f_ops->fo_ioctl)(fp, FIONBIO, &flgs);
409 if (error) {
410 (*fp->f_ops->fo_fcntl)(fp, F_SETFL,
411 &fp->f_flag);
412 break;
415 if (i & FASYNC) {
416 int flgs = tmp & FASYNC;
417 error = (*fp->f_ops->fo_ioctl)(fp, FIOASYNC, &flgs);
418 if (error) {
419 if (i & FNONBLOCK) {
420 tmp = fp->f_flag & FNONBLOCK;
421 (void)(*fp->f_ops->fo_ioctl)(fp,
422 FIONBIO, &tmp);
424 (*fp->f_ops->fo_fcntl)(fp, F_SETFL,
425 &fp->f_flag);
426 break;
429 fp->f_flag = (fp->f_flag & ~FCNTLFLAGS) | tmp;
430 break;
432 case F_GETOWN:
433 error = (*fp->f_ops->fo_ioctl)(fp, FIOGETOWN, &tmp);
434 *retval = tmp;
435 break;
437 case F_SETOWN:
438 tmp = (int)(uintptr_t) SCARG(uap, arg);
439 error = (*fp->f_ops->fo_ioctl)(fp, FIOSETOWN, &tmp);
440 break;
442 default:
443 error = EINVAL;
446 fd_putfile(fd);
447 return (error);
451 * Close a file descriptor.
454 sys_close(struct lwp *l, const struct sys_close_args *uap, register_t *retval)
456 /* {
457 syscallarg(int) fd;
458 } */
460 if (fd_getfile(SCARG(uap, fd)) == NULL) {
461 return EBADF;
463 return fd_close(SCARG(uap, fd));
467 * Return status information about a file descriptor.
468 * Common function for compat code.
471 do_sys_fstat(int fd, struct stat *sb)
473 file_t *fp;
474 int error;
476 if ((fp = fd_getfile(fd)) == NULL) {
477 return EBADF;
479 error = (*fp->f_ops->fo_stat)(fp, sb);
480 fd_putfile(fd);
482 return error;
486 * Return status information about a file descriptor.
489 sys___fstat50(struct lwp *l, const struct sys___fstat50_args *uap,
490 register_t *retval)
492 /* {
493 syscallarg(int) fd;
494 syscallarg(struct stat *) sb;
495 } */
496 struct stat sb;
497 int error;
499 error = do_sys_fstat(SCARG(uap, fd), &sb);
500 if (error == 0) {
501 error = copyout(&sb, SCARG(uap, sb), sizeof(sb));
503 return error;
507 * Return pathconf information about a file descriptor.
510 sys_fpathconf(struct lwp *l, const struct sys_fpathconf_args *uap,
511 register_t *retval)
513 /* {
514 syscallarg(int) fd;
515 syscallarg(int) name;
516 } */
517 int fd, error;
518 file_t *fp;
520 fd = SCARG(uap, fd);
521 error = 0;
523 if ((fp = fd_getfile(fd)) == NULL) {
524 return (EBADF);
526 switch (fp->f_type) {
527 case DTYPE_SOCKET:
528 case DTYPE_PIPE:
529 if (SCARG(uap, name) != _PC_PIPE_BUF)
530 error = EINVAL;
531 else
532 *retval = PIPE_BUF;
533 break;
535 case DTYPE_VNODE:
536 error = VOP_PATHCONF(fp->f_data, SCARG(uap, name), retval);
537 break;
539 case DTYPE_KQUEUE:
540 error = EINVAL;
541 break;
543 default:
544 error = EOPNOTSUPP;
545 break;
548 fd_putfile(fd);
549 return (error);
553 * Apply an advisory lock on a file descriptor.
555 * Just attempt to get a record lock of the requested type on
556 * the entire file (l_whence = SEEK_SET, l_start = 0, l_len = 0).
558 /* ARGSUSED */
560 sys_flock(struct lwp *l, const struct sys_flock_args *uap, register_t *retval)
562 /* {
563 syscallarg(int) fd;
564 syscallarg(int) how;
565 } */
566 int fd, how, error;
567 file_t *fp;
568 vnode_t *vp;
569 struct flock lf;
570 proc_t *p;
572 fd = SCARG(uap, fd);
573 how = SCARG(uap, how);
574 error = 0;
576 if ((fp = fd_getfile(fd)) == NULL) {
577 return EBADF;
579 if (fp->f_type != DTYPE_VNODE) {
580 fd_putfile(fd);
581 return EOPNOTSUPP;
584 vp = fp->f_data;
585 lf.l_whence = SEEK_SET;
586 lf.l_start = 0;
587 lf.l_len = 0;
589 switch (how & ~LOCK_NB) {
590 case LOCK_UN:
591 lf.l_type = F_UNLCK;
592 atomic_and_uint(&fp->f_flag, ~FHASLOCK);
593 error = VOP_ADVLOCK(vp, fp, F_UNLCK, &lf, F_FLOCK);
594 fd_putfile(fd);
595 return error;
596 case LOCK_EX:
597 lf.l_type = F_WRLCK;
598 break;
599 case LOCK_SH:
600 lf.l_type = F_RDLCK;
601 break;
602 default:
603 fd_putfile(fd);
604 return EINVAL;
607 atomic_or_uint(&fp->f_flag, FHASLOCK);
608 p = curproc;
609 if (how & LOCK_NB) {
610 error = VOP_ADVLOCK(vp, fp, F_SETLK, &lf, F_FLOCK);
611 } else {
612 error = VOP_ADVLOCK(vp, fp, F_SETLK, &lf, F_FLOCK|F_WAIT);
614 fd_putfile(fd);
615 return error;
619 do_posix_fadvise(int fd, off_t offset, off_t len, int advice)
621 file_t *fp;
622 vnode_t *vp;
623 off_t endoffset;
624 int error;
625 CTASSERT(POSIX_FADV_NORMAL == UVM_ADV_NORMAL);
626 CTASSERT(POSIX_FADV_RANDOM == UVM_ADV_RANDOM);
627 CTASSERT(POSIX_FADV_SEQUENTIAL == UVM_ADV_SEQUENTIAL);
629 if (len == 0) {
630 endoffset = INT64_MAX;
631 } else if (INT64_MAX - offset >= len) {
632 endoffset = offset + len;
633 } else {
634 return EINVAL;
636 if ((fp = fd_getfile(fd)) == NULL) {
637 return EBADF;
639 if (fp->f_type != DTYPE_VNODE) {
640 if (fp->f_type == DTYPE_PIPE || fp->f_type == DTYPE_SOCKET) {
641 error = ESPIPE;
642 } else {
643 error = EOPNOTSUPP;
645 fd_putfile(fd);
646 return error;
649 switch (advice) {
650 case POSIX_FADV_WILLNEED:
651 case POSIX_FADV_DONTNEED:
652 vp = fp->f_data;
653 if (vp->v_type != VREG && vp->v_type != VBLK) {
654 fd_putfile(fd);
655 return 0;
657 break;
660 switch (advice) {
661 case POSIX_FADV_NORMAL:
662 case POSIX_FADV_RANDOM:
663 case POSIX_FADV_SEQUENTIAL:
666 * We ignore offset and size. must lock the file to
667 * do this, as f_advice is sub-word sized.
669 mutex_enter(&fp->f_lock);
670 fp->f_advice = (u_char)advice;
671 mutex_exit(&fp->f_lock);
672 error = 0;
673 break;
675 case POSIX_FADV_WILLNEED:
676 vp = fp->f_data;
677 error = uvm_readahead(&vp->v_uobj, offset, endoffset - offset);
678 break;
680 case POSIX_FADV_DONTNEED:
681 vp = fp->f_data;
682 mutex_enter(&vp->v_interlock);
683 error = VOP_PUTPAGES(vp, round_page(offset),
684 trunc_page(endoffset), PGO_DEACTIVATE | PGO_CLEANIT);
685 break;
687 case POSIX_FADV_NOREUSE:
688 /* Not implemented yet. */
689 error = 0;
690 break;
691 default:
692 error = EINVAL;
693 break;
696 fd_putfile(fd);
697 return error;
701 sys___posix_fadvise50(struct lwp *l,
702 const struct sys___posix_fadvise50_args *uap,
703 register_t *retval)
705 /* {
706 syscallarg(int) fd;
707 syscallarg(int) pad;
708 syscallarg(off_t) offset;
709 syscallarg(off_t) len;
710 syscallarg(int) advice;
711 } */
713 *retval = do_posix_fadvise(SCARG(uap, fd), SCARG(uap, offset),
714 SCARG(uap, len), SCARG(uap, advice));
716 return 0;