1 /* $NetBSD: tmpfs_subr.c,v 1.55 2009/09/03 11:22:05 pooka Exp $ */
4 * Copyright (c) 2005, 2006, 2007 The NetBSD Foundation, Inc.
7 * This code is derived from software contributed to The NetBSD Foundation
8 * by Julio M. Merino Vidal, developed as part of Google's Summer of Code
11 * Redistribution and use in source and binary forms, with or without
12 * modification, are permitted provided that the following conditions
14 * 1. Redistributions of source code must retain the above copyright
15 * notice, this list of conditions and the following disclaimer.
16 * 2. Redistributions in binary form must reproduce the above copyright
17 * notice, this list of conditions and the following disclaimer in the
18 * documentation and/or other materials provided with the distribution.
20 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
21 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
22 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
23 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
24 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30 * POSSIBILITY OF SUCH DAMAGE.
34 * Efficient memory file system supporting functions.
37 #include <sys/cdefs.h>
38 __KERNEL_RCSID(0, "$NetBSD: tmpfs_subr.c,v 1.55 2009/09/03 11:22:05 pooka Exp $");
40 #include <sys/param.h>
41 #include <sys/dirent.h>
42 #include <sys/event.h>
44 #include <sys/mount.h>
45 #include <sys/namei.h>
48 #include <sys/systm.h>
50 #include <sys/vnode.h>
51 #include <sys/kauth.h>
53 #include <sys/atomic.h>
57 #include <miscfs/specfs/specdev.h>
58 #include <miscfs/genfs/genfs.h>
59 #include <fs/tmpfs/tmpfs.h>
60 #include <fs/tmpfs/tmpfs_fifoops.h>
61 #include <fs/tmpfs/tmpfs_specops.h>
62 #include <fs/tmpfs/tmpfs_vnops.h>
64 /* --------------------------------------------------------------------- */
67 * Allocates a new node of type 'type' inside the 'tmp' mount point, with
68 * its owner set to 'uid', its group to 'gid' and its mode set to 'mode',
69 * using the credentials of the process 'p'.
71 * If the node type is set to 'VDIR', then the parent parameter must point
72 * to the parent directory of the node being created. It may only be NULL
73 * while allocating the root node.
75 * If the node type is set to 'VBLK' or 'VCHR', then the rdev parameter
76 * specifies the device the node represents.
78 * If the node type is set to 'VLNK', then the parameter target specifies
79 * the file name of the target file for the symbolic link that is being
82 * Note that new nodes are retrieved from the available list if it has
83 * items or, if it is empty, from the node pool as long as there is enough
84 * space to create them.
86 * Returns zero on success or an appropriate error code on failure.
89 tmpfs_alloc_node(struct tmpfs_mount
*tmp
, enum vtype type
,
90 uid_t uid
, gid_t gid
, mode_t mode
, struct tmpfs_node
*parent
,
91 char *target
, dev_t rdev
, struct tmpfs_node
**node
)
93 struct tmpfs_node
*nnode
;
95 /* If the root directory of the 'tmp' file system is not yet
96 * allocated, this must be the request to do it. */
97 KASSERT(IMPLIES(tmp
->tm_root
== NULL
, parent
== NULL
&& type
== VDIR
));
99 KASSERT(IFF(type
== VLNK
, target
!= NULL
));
100 KASSERT(IFF(type
== VBLK
|| type
== VCHR
, rdev
!= VNOVAL
));
102 KASSERT(uid
!= VNOVAL
&& gid
!= VNOVAL
&& mode
!= VNOVAL
);
105 if (atomic_inc_uint_nv(&tmp
->tm_nodes_cnt
) >= tmp
->tm_nodes_max
) {
106 atomic_dec_uint(&tmp
->tm_nodes_cnt
);
110 nnode
= (struct tmpfs_node
*)TMPFS_POOL_GET(&tmp
->tm_node_pool
, 0);
112 atomic_dec_uint(&tmp
->tm_nodes_cnt
);
117 * XXX Where the pool is backed by a map larger than (4GB *
118 * sizeof(*nnode)), this may produce duplicate inode numbers
119 * for applications that do not understand 64-bit ino_t.
121 nnode
->tn_id
= (ino_t
)((uintptr_t)nnode
/ sizeof(*nnode
));
122 nnode
->tn_gen
= arc4random();
124 /* Generic initialization. */
125 nnode
->tn_type
= type
;
127 nnode
->tn_status
= 0;
131 vfs_timestamp(&nnode
->tn_atime
);
132 nnode
->tn_birthtime
= nnode
->tn_atime
;
133 nnode
->tn_ctime
= nnode
->tn_atime
;
134 nnode
->tn_mtime
= nnode
->tn_atime
;
138 nnode
->tn_mode
= mode
;
139 nnode
->tn_lockf
= NULL
;
140 nnode
->tn_vnode
= NULL
;
142 /* Type-specific initialization. */
143 switch (nnode
->tn_type
) {
146 nnode
->tn_spec
.tn_dev
.tn_rdev
= rdev
;
150 TAILQ_INIT(&nnode
->tn_spec
.tn_dir
.tn_dir
);
151 nnode
->tn_spec
.tn_dir
.tn_parent
=
152 (parent
== NULL
) ? nnode
: parent
;
153 nnode
->tn_spec
.tn_dir
.tn_readdir_lastn
= 0;
154 nnode
->tn_spec
.tn_dir
.tn_readdir_lastp
= NULL
;
164 KASSERT(strlen(target
) < MAXPATHLEN
);
165 nnode
->tn_size
= strlen(target
);
166 nnode
->tn_spec
.tn_lnk
.tn_link
=
167 tmpfs_str_pool_get(&tmp
->tm_str_pool
, nnode
->tn_size
, 0);
168 if (nnode
->tn_spec
.tn_lnk
.tn_link
== NULL
) {
169 atomic_dec_uint(&tmp
->tm_nodes_cnt
);
170 TMPFS_POOL_PUT(&tmp
->tm_node_pool
, nnode
);
173 memcpy(nnode
->tn_spec
.tn_lnk
.tn_link
, target
, nnode
->tn_size
);
177 nnode
->tn_spec
.tn_reg
.tn_aobj
=
178 uao_create(INT32_MAX
- PAGE_SIZE
, 0);
179 nnode
->tn_spec
.tn_reg
.tn_aobj_pages
= 0;
186 mutex_init(&nnode
->tn_vlock
, MUTEX_DEFAULT
, IPL_NONE
);
188 mutex_enter(&tmp
->tm_lock
);
189 LIST_INSERT_HEAD(&tmp
->tm_nodes
, nnode
, tn_entries
);
190 mutex_exit(&tmp
->tm_lock
);
196 /* --------------------------------------------------------------------- */
199 * Destroys the node pointed to by node from the file system 'tmp'.
200 * If the node does not belong to the given mount point, the results are
203 * If the node references a directory; no entries are allowed because
204 * their removal could need a recursive algorithm, something forbidden in
205 * kernel space. Furthermore, there is not need to provide such
206 * functionality (recursive removal) because the only primitives offered
207 * to the user are the removal of empty directories and the deletion of
210 * Note that nodes are not really deleted; in fact, when a node has been
211 * allocated, it cannot be deleted during the whole life of the file
212 * system. Instead, they are moved to the available list and remain there
216 tmpfs_free_node(struct tmpfs_mount
*tmp
, struct tmpfs_node
*node
)
219 if (node
->tn_type
== VREG
) {
220 atomic_add_int(&tmp
->tm_pages_used
,
221 -node
->tn_spec
.tn_reg
.tn_aobj_pages
);
223 atomic_dec_uint(&tmp
->tm_nodes_cnt
);
224 mutex_enter(&tmp
->tm_lock
);
225 LIST_REMOVE(node
, tn_entries
);
226 mutex_exit(&tmp
->tm_lock
);
228 switch (node
->tn_type
) {
230 tmpfs_str_pool_put(&tmp
->tm_str_pool
,
231 node
->tn_spec
.tn_lnk
.tn_link
, node
->tn_size
);
235 if (node
->tn_spec
.tn_reg
.tn_aobj
!= NULL
)
236 uao_detach(node
->tn_spec
.tn_reg
.tn_aobj
);
243 mutex_destroy(&node
->tn_vlock
);
244 TMPFS_POOL_PUT(&tmp
->tm_node_pool
, node
);
247 /* --------------------------------------------------------------------- */
250 * Allocates a new directory entry for the node node with a name of name.
251 * The new directory entry is returned in *de.
253 * The link count of node is increased by one to reflect the new object
254 * referencing it. This takes care of notifying kqueue listeners about
257 * Returns zero on success or an appropriate error code on failure.
260 tmpfs_alloc_dirent(struct tmpfs_mount
*tmp
, struct tmpfs_node
*node
,
261 const char *name
, uint16_t len
, struct tmpfs_dirent
**de
)
263 struct tmpfs_dirent
*nde
;
265 nde
= (struct tmpfs_dirent
*)TMPFS_POOL_GET(&tmp
->tm_dirent_pool
, 0);
269 nde
->td_name
= tmpfs_str_pool_get(&tmp
->tm_str_pool
, len
, 0);
270 if (nde
->td_name
== NULL
) {
271 TMPFS_POOL_PUT(&tmp
->tm_dirent_pool
, nde
);
274 nde
->td_namelen
= len
;
275 memcpy(nde
->td_name
, name
, len
);
279 if (node
->tn_links
> 1 && node
->tn_vnode
!= NULL
)
280 VN_KNOTE(node
->tn_vnode
, NOTE_LINK
);
286 /* --------------------------------------------------------------------- */
289 * Frees a directory entry. It is the caller's responsibility to destroy
290 * the node referenced by it if needed.
292 * The link count of node is decreased by one to reflect the removal of an
293 * object that referenced it. This only happens if 'node_exists' is true;
294 * otherwise the function will not access the node referred to by the
295 * directory entry, as it may already have been released from the outside.
297 * Interested parties (kqueue) are notified of the link count change; note
298 * that this can include both the node pointed to by the directory entry
299 * as well as its parent.
302 tmpfs_free_dirent(struct tmpfs_mount
*tmp
, struct tmpfs_dirent
*de
,
306 struct tmpfs_node
*node
;
310 KASSERT(node
->tn_links
> 0);
312 if (node
->tn_vnode
!= NULL
)
313 VN_KNOTE(node
->tn_vnode
, node
->tn_links
== 0 ?
314 NOTE_DELETE
: NOTE_LINK
);
315 if (node
->tn_type
== VDIR
)
316 VN_KNOTE(node
->tn_spec
.tn_dir
.tn_parent
->tn_vnode
,
320 tmpfs_str_pool_put(&tmp
->tm_str_pool
, de
->td_name
, de
->td_namelen
);
321 TMPFS_POOL_PUT(&tmp
->tm_dirent_pool
, de
);
324 /* --------------------------------------------------------------------- */
327 * Allocates a new vnode for the node node or returns a new reference to
328 * an existing one if the node had already a vnode referencing it. The
329 * resulting locked vnode is returned in *vpp.
331 * Returns zero on success or an appropriate error code on failure.
334 tmpfs_alloc_vp(struct mount
*mp
, struct tmpfs_node
*node
, struct vnode
**vpp
)
339 /* If there is already a vnode, then lock it. */
341 mutex_enter(&node
->tn_vlock
);
342 if ((vp
= node
->tn_vnode
) != NULL
) {
343 mutex_enter(&vp
->v_interlock
);
344 mutex_exit(&node
->tn_vlock
);
345 error
= vget(vp
, LK_EXCLUSIVE
| LK_INTERLOCK
);
346 if (error
== ENOENT
) {
347 /* vnode was reclaimed. */
356 /* Get a new vnode and associate it with our node. */
357 error
= getnewvnode(VT_TMPFS
, mp
, tmpfs_vnodeop_p
, &vp
);
359 mutex_exit(&node
->tn_vlock
);
363 error
= vn_lock(vp
, LK_EXCLUSIVE
| LK_RETRY
);
365 mutex_exit(&node
->tn_vlock
);
370 vp
->v_type
= node
->tn_type
;
372 /* Type-specific initialization. */
373 switch (node
->tn_type
) {
377 vp
->v_op
= tmpfs_specop_p
;
378 spec_node_init(vp
, node
->tn_spec
.tn_dev
.tn_rdev
);
382 vp
->v_vflag
|= node
->tn_spec
.tn_dir
.tn_parent
== node
?
387 vp
->v_op
= tmpfs_fifoop_p
;
401 uvm_vnp_setsize(vp
, node
->tn_size
);
404 mutex_exit(&node
->tn_vlock
);
407 KASSERT(IFF(error
== 0, *vpp
!= NULL
&& VOP_ISLOCKED(*vpp
)));
408 KASSERT(*vpp
== node
->tn_vnode
);
413 /* --------------------------------------------------------------------- */
416 * Destroys the association between the vnode vp and the node it
420 tmpfs_free_vp(struct vnode
*vp
)
422 struct tmpfs_node
*node
;
424 node
= VP_TO_TMPFS_NODE(vp
);
426 mutex_enter(&node
->tn_vlock
);
427 node
->tn_vnode
= NULL
;
428 mutex_exit(&node
->tn_vlock
);
432 /* --------------------------------------------------------------------- */
435 * Allocates a new file of type 'type' and adds it to the parent directory
436 * 'dvp'; this addition is done using the component name given in 'cnp'.
437 * The ownership of the new file is automatically assigned based on the
438 * credentials of the caller (through 'cnp'), the group is set based on
439 * the parent directory and the mode is determined from the 'vap' argument.
440 * If successful, *vpp holds a vnode to the newly created file and zero
441 * is returned. Otherwise *vpp is NULL and the function returns an
442 * appropriate error code.
445 tmpfs_alloc_file(struct vnode
*dvp
, struct vnode
**vpp
, struct vattr
*vap
,
446 struct componentname
*cnp
, char *target
)
449 struct tmpfs_dirent
*de
;
450 struct tmpfs_mount
*tmp
;
451 struct tmpfs_node
*dnode
;
452 struct tmpfs_node
*node
;
453 struct tmpfs_node
*parent
;
455 KASSERT(VOP_ISLOCKED(dvp
));
456 KASSERT(cnp
->cn_flags
& HASBUF
);
458 tmp
= VFS_TO_TMPFS(dvp
->v_mount
);
459 dnode
= VP_TO_TMPFS_DIR(dvp
);
462 /* If the entry we are creating is a directory, we cannot overflow
463 * the number of links of its parent, because it will get a new
465 if (vap
->va_type
== VDIR
) {
466 /* Ensure that we do not overflow the maximum number of links
467 * imposed by the system. */
468 KASSERT(dnode
->tn_links
<= LINK_MAX
);
469 if (dnode
->tn_links
== LINK_MAX
) {
478 /* Allocate a node that represents the new file. */
479 error
= tmpfs_alloc_node(tmp
, vap
->va_type
, kauth_cred_geteuid(cnp
->cn_cred
),
480 dnode
->tn_gid
, vap
->va_mode
, parent
, target
, vap
->va_rdev
, &node
);
484 /* Allocate a directory entry that points to the new file. */
485 error
= tmpfs_alloc_dirent(tmp
, node
, cnp
->cn_nameptr
, cnp
->cn_namelen
,
488 tmpfs_free_node(tmp
, node
);
492 /* Allocate a vnode for the new file. */
493 error
= tmpfs_alloc_vp(dvp
->v_mount
, node
, vpp
);
495 tmpfs_free_dirent(tmp
, de
, true);
496 tmpfs_free_node(tmp
, node
);
500 /* Now that all required items are allocated, we can proceed to
501 * insert the new node into the directory, an operation that
503 tmpfs_dir_attach(dvp
, de
);
504 if (vap
->va_type
== VDIR
) {
505 VN_KNOTE(dvp
, NOTE_LINK
);
507 KASSERT(dnode
->tn_links
<= LINK_MAX
);
511 if (error
!= 0 || !(cnp
->cn_flags
& SAVESTART
))
512 PNBUF_PUT(cnp
->cn_pnbuf
);
515 KASSERT(IFF(error
== 0, *vpp
!= NULL
));
520 /* --------------------------------------------------------------------- */
523 * Attaches the directory entry de to the directory represented by vp.
524 * Note that this does not change the link count of the node pointed by
525 * the directory entry, as this is done by tmpfs_alloc_dirent.
527 * As the "parent" directory changes, interested parties are notified of
531 tmpfs_dir_attach(struct vnode
*vp
, struct tmpfs_dirent
*de
)
533 struct tmpfs_node
*dnode
;
535 dnode
= VP_TO_TMPFS_DIR(vp
);
537 TAILQ_INSERT_TAIL(&dnode
->tn_spec
.tn_dir
.tn_dir
, de
, td_entries
);
538 dnode
->tn_size
+= sizeof(struct tmpfs_dirent
);
539 dnode
->tn_status
|= TMPFS_NODE_ACCESSED
| TMPFS_NODE_CHANGED
| \
541 uvm_vnp_setsize(vp
, dnode
->tn_size
);
543 VN_KNOTE(vp
, NOTE_WRITE
);
546 /* --------------------------------------------------------------------- */
549 * Detaches the directory entry de from the directory represented by vp.
550 * Note that this does not change the link count of the node pointed by
551 * the directory entry, as this is done by tmpfs_free_dirent.
553 * As the "parent" directory changes, interested parties are notified of
557 tmpfs_dir_detach(struct vnode
*vp
, struct tmpfs_dirent
*de
)
559 struct tmpfs_node
*dnode
;
561 KASSERT(VOP_ISLOCKED(vp
));
563 dnode
= VP_TO_TMPFS_DIR(vp
);
565 if (dnode
->tn_spec
.tn_dir
.tn_readdir_lastp
== de
) {
566 dnode
->tn_spec
.tn_dir
.tn_readdir_lastn
= 0;
567 dnode
->tn_spec
.tn_dir
.tn_readdir_lastp
= NULL
;
570 TAILQ_REMOVE(&dnode
->tn_spec
.tn_dir
.tn_dir
, de
, td_entries
);
571 dnode
->tn_size
-= sizeof(struct tmpfs_dirent
);
572 dnode
->tn_status
|= TMPFS_NODE_ACCESSED
| TMPFS_NODE_CHANGED
| \
574 uvm_vnp_setsize(vp
, dnode
->tn_size
);
576 VN_KNOTE(vp
, NOTE_WRITE
);
579 /* --------------------------------------------------------------------- */
582 * Looks for a directory entry in the directory represented by node.
583 * 'cnp' describes the name of the entry to look for. Note that the .
584 * and .. components are not allowed as they do not physically exist
585 * within directories.
587 * Returns a pointer to the entry when found, otherwise NULL.
589 struct tmpfs_dirent
*
590 tmpfs_dir_lookup(struct tmpfs_node
*node
, struct componentname
*cnp
)
592 struct tmpfs_dirent
*de
;
594 KASSERT(VOP_ISLOCKED(node
->tn_vnode
));
595 KASSERT(IMPLIES(cnp
->cn_namelen
== 1, cnp
->cn_nameptr
[0] != '.'));
596 KASSERT(IMPLIES(cnp
->cn_namelen
== 2, !(cnp
->cn_nameptr
[0] == '.' &&
597 cnp
->cn_nameptr
[1] == '.')));
598 TMPFS_VALIDATE_DIR(node
);
600 node
->tn_status
|= TMPFS_NODE_ACCESSED
;
602 TAILQ_FOREACH(de
, &node
->tn_spec
.tn_dir
.tn_dir
, td_entries
) {
603 KASSERT(cnp
->cn_namelen
< 0xffff);
604 if (de
->td_namelen
== (uint16_t)cnp
->cn_namelen
&&
605 memcmp(de
->td_name
, cnp
->cn_nameptr
, de
->td_namelen
) == 0) {
613 /* --------------------------------------------------------------------- */
616 * Helper function for tmpfs_readdir. Creates a '.' entry for the given
617 * directory and returns it in the uio space. The function returns 0
618 * on success, -1 if there was not enough space in the uio structure to
619 * hold the directory entry or an appropriate error code if another
623 tmpfs_dir_getdotdent(struct tmpfs_node
*node
, struct uio
*uio
)
626 struct dirent
*dentp
;
628 TMPFS_VALIDATE_DIR(node
);
629 KASSERT(uio
->uio_offset
== TMPFS_DIRCOOKIE_DOT
);
631 dentp
= kmem_alloc(sizeof(struct dirent
), KM_SLEEP
);
633 dentp
->d_fileno
= node
->tn_id
;
634 dentp
->d_type
= DT_DIR
;
636 dentp
->d_name
[0] = '.';
637 dentp
->d_name
[1] = '\0';
638 dentp
->d_reclen
= _DIRENT_SIZE(dentp
);
640 if (dentp
->d_reclen
> uio
->uio_resid
)
643 error
= uiomove(dentp
, dentp
->d_reclen
, uio
);
645 uio
->uio_offset
= TMPFS_DIRCOOKIE_DOTDOT
;
648 node
->tn_status
|= TMPFS_NODE_ACCESSED
;
650 kmem_free(dentp
, sizeof(struct dirent
));
654 /* --------------------------------------------------------------------- */
657 * Helper function for tmpfs_readdir. Creates a '..' entry for the given
658 * directory and returns it in the uio space. The function returns 0
659 * on success, -1 if there was not enough space in the uio structure to
660 * hold the directory entry or an appropriate error code if another
664 tmpfs_dir_getdotdotdent(struct tmpfs_node
*node
, struct uio
*uio
)
667 struct dirent
*dentp
;
669 TMPFS_VALIDATE_DIR(node
);
670 KASSERT(uio
->uio_offset
== TMPFS_DIRCOOKIE_DOTDOT
);
672 dentp
= kmem_alloc(sizeof(struct dirent
), KM_SLEEP
);
674 dentp
->d_fileno
= node
->tn_spec
.tn_dir
.tn_parent
->tn_id
;
675 dentp
->d_type
= DT_DIR
;
677 dentp
->d_name
[0] = '.';
678 dentp
->d_name
[1] = '.';
679 dentp
->d_name
[2] = '\0';
680 dentp
->d_reclen
= _DIRENT_SIZE(dentp
);
682 if (dentp
->d_reclen
> uio
->uio_resid
)
685 error
= uiomove(dentp
, dentp
->d_reclen
, uio
);
687 struct tmpfs_dirent
*de
;
689 de
= TAILQ_FIRST(&node
->tn_spec
.tn_dir
.tn_dir
);
691 uio
->uio_offset
= TMPFS_DIRCOOKIE_EOF
;
693 uio
->uio_offset
= tmpfs_dircookie(de
);
697 node
->tn_status
|= TMPFS_NODE_ACCESSED
;
699 kmem_free(dentp
, sizeof(struct dirent
));
703 /* --------------------------------------------------------------------- */
706 * Lookup a directory entry by its associated cookie.
708 struct tmpfs_dirent
*
709 tmpfs_dir_lookupbycookie(struct tmpfs_node
*node
, off_t cookie
)
711 struct tmpfs_dirent
*de
;
713 KASSERT(VOP_ISLOCKED(node
->tn_vnode
));
715 if (cookie
== node
->tn_spec
.tn_dir
.tn_readdir_lastn
&&
716 node
->tn_spec
.tn_dir
.tn_readdir_lastp
!= NULL
) {
717 return node
->tn_spec
.tn_dir
.tn_readdir_lastp
;
720 TAILQ_FOREACH(de
, &node
->tn_spec
.tn_dir
.tn_dir
, td_entries
) {
721 if (tmpfs_dircookie(de
) == cookie
) {
729 /* --------------------------------------------------------------------- */
732 * Helper function for tmpfs_readdir. Returns as much directory entries
733 * as can fit in the uio space. The read starts at uio->uio_offset.
734 * The function returns 0 on success, -1 if there was not enough space
735 * in the uio structure to hold the directory entry or an appropriate
736 * error code if another error happens.
739 tmpfs_dir_getdents(struct tmpfs_node
*node
, struct uio
*uio
, off_t
*cntp
)
743 struct dirent
*dentp
;
744 struct tmpfs_dirent
*de
;
746 KASSERT(VOP_ISLOCKED(node
->tn_vnode
));
747 TMPFS_VALIDATE_DIR(node
);
749 /* Locate the first directory entry we have to return. We have cached
750 * the last readdir in the node, so use those values if appropriate.
751 * Otherwise do a linear scan to find the requested entry. */
752 startcookie
= uio
->uio_offset
;
753 KASSERT(startcookie
!= TMPFS_DIRCOOKIE_DOT
);
754 KASSERT(startcookie
!= TMPFS_DIRCOOKIE_DOTDOT
);
755 if (startcookie
== TMPFS_DIRCOOKIE_EOF
) {
758 de
= tmpfs_dir_lookupbycookie(node
, startcookie
);
764 dentp
= kmem_alloc(sizeof(struct dirent
), KM_SLEEP
);
766 /* Read as much entries as possible; i.e., until we reach the end of
767 * the directory or we exhaust uio space. */
769 /* Create a dirent structure representing the current
770 * tmpfs_node and fill it. */
771 dentp
->d_fileno
= de
->td_node
->tn_id
;
772 switch (de
->td_node
->tn_type
) {
774 dentp
->d_type
= DT_BLK
;
778 dentp
->d_type
= DT_CHR
;
782 dentp
->d_type
= DT_DIR
;
786 dentp
->d_type
= DT_FIFO
;
790 dentp
->d_type
= DT_LNK
;
794 dentp
->d_type
= DT_REG
;
798 dentp
->d_type
= DT_SOCK
;
804 dentp
->d_namlen
= de
->td_namelen
;
805 KASSERT(de
->td_namelen
< sizeof(dentp
->d_name
));
806 (void)memcpy(dentp
->d_name
, de
->td_name
, de
->td_namelen
);
807 dentp
->d_name
[de
->td_namelen
] = '\0';
808 dentp
->d_reclen
= _DIRENT_SIZE(dentp
);
810 /* Stop reading if the directory entry we are treating is
811 * bigger than the amount of data that can be returned. */
812 if (dentp
->d_reclen
> uio
->uio_resid
) {
817 /* Copy the new dirent structure into the output buffer and
818 * advance pointers. */
819 error
= uiomove(dentp
, dentp
->d_reclen
, uio
);
822 de
= TAILQ_NEXT(de
, td_entries
);
823 } while (error
== 0 && uio
->uio_resid
> 0 && de
!= NULL
);
825 /* Update the offset and cache. */
827 uio
->uio_offset
= TMPFS_DIRCOOKIE_EOF
;
828 node
->tn_spec
.tn_dir
.tn_readdir_lastn
= 0;
829 node
->tn_spec
.tn_dir
.tn_readdir_lastp
= NULL
;
831 node
->tn_spec
.tn_dir
.tn_readdir_lastn
= uio
->uio_offset
=
833 node
->tn_spec
.tn_dir
.tn_readdir_lastp
= de
;
836 node
->tn_status
|= TMPFS_NODE_ACCESSED
;
838 kmem_free(dentp
, sizeof(struct dirent
));
842 /* --------------------------------------------------------------------- */
845 * Resizes the aobj associated to the regular file pointed to by vp to
846 * the size newsize. 'vp' must point to a vnode that represents a regular
847 * file. 'newsize' must be positive.
849 * If the file is extended, the appropriate kevent is raised. This does
850 * not rise a write event though because resizing is not the same as
853 * Returns zero on success or an appropriate error code on failure.
856 tmpfs_reg_resize(struct vnode
*vp
, off_t newsize
)
859 unsigned int newpages
, oldpages
;
860 struct tmpfs_mount
*tmp
;
861 struct tmpfs_node
*node
;
864 KASSERT(vp
->v_type
== VREG
);
865 KASSERT(newsize
>= 0);
867 node
= VP_TO_TMPFS_NODE(vp
);
868 tmp
= VFS_TO_TMPFS(vp
->v_mount
);
870 /* Convert the old and new sizes to the number of pages needed to
871 * store them. It may happen that we do not need to do anything
872 * because the last allocated page can accommodate the change on
874 oldsize
= node
->tn_size
;
875 oldpages
= round_page(oldsize
) / PAGE_SIZE
;
876 KASSERT(oldpages
== node
->tn_spec
.tn_reg
.tn_aobj_pages
);
877 newpages
= round_page(newsize
) / PAGE_SIZE
;
879 if (newpages
> oldpages
&&
880 (ssize_t
)(newpages
- oldpages
) > TMPFS_PAGES_AVAIL(tmp
)) {
884 atomic_add_int(&tmp
->tm_pages_used
, newpages
- oldpages
);
886 if (newsize
< oldsize
) {
887 int zerolen
= MIN(round_page(newsize
), node
->tn_size
) - newsize
;
890 * zero out the truncated part of the last page.
893 uvm_vnp_zerorange(vp
, newsize
, zerolen
);
896 node
->tn_spec
.tn_reg
.tn_aobj_pages
= newpages
;
897 node
->tn_size
= newsize
;
898 uvm_vnp_setsize(vp
, newsize
);
901 * free "backing store"
904 if (newpages
< oldpages
) {
905 struct uvm_object
*uobj
;
907 uobj
= node
->tn_spec
.tn_reg
.tn_aobj
;
909 mutex_enter(&uobj
->vmobjlock
);
910 uao_dropswap_range(uobj
, newpages
, oldpages
);
911 mutex_exit(&uobj
->vmobjlock
);
916 if (newsize
> oldsize
)
917 VN_KNOTE(vp
, NOTE_EXTEND
);
923 /* --------------------------------------------------------------------- */
926 * Returns information about the number of available memory pages,
927 * including physical and virtual ones.
929 * If 'total' is true, the value returned is the total amount of memory
930 * pages configured for the system (either in use or free).
931 * If it is FALSE, the value returned is the amount of free memory pages.
933 * Remember to remove TMPFS_PAGES_RESERVED from the returned value to avoid
934 * excessive memory usage.
938 tmpfs_mem_info(bool total
)
943 size
+= uvmexp
.swpgavail
;
945 size
-= uvmexp
.swpgonly
;
948 size
+= uvmexp
.filepages
;
949 if (size
> uvmexp
.wired
) {
950 size
-= uvmexp
.wired
;
958 /* --------------------------------------------------------------------- */
961 * Change flags of the given vnode.
962 * Caller should execute tmpfs_update on vp after a successful execution.
963 * The vnode must be locked on entry and remain locked on exit.
966 tmpfs_chflags(struct vnode
*vp
, int flags
, kauth_cred_t cred
, struct lwp
*l
)
969 struct tmpfs_node
*node
;
970 kauth_action_t action
= KAUTH_VNODE_WRITE_FLAGS
;
973 KASSERT(VOP_ISLOCKED(vp
));
975 node
= VP_TO_TMPFS_NODE(vp
);
977 /* Disallow this operation if the file system is mounted read-only. */
978 if (vp
->v_mount
->mnt_flag
& MNT_RDONLY
)
981 if (kauth_cred_geteuid(cred
) != node
->tn_uid
)
982 fs_decision
= EACCES
;
985 * If the new flags have non-user flags that are different than
986 * those on the node, we need special permission to change them.
988 if ((flags
& SF_SETTABLE
) != (node
->tn_flags
& SF_SETTABLE
)) {
989 action
|= KAUTH_VNODE_WRITE_SYSFLAGS
;
995 * Indicate that this node's flags have system attributes in them if
998 if (node
->tn_flags
& (SF_IMMUTABLE
| SF_APPEND
)) {
999 action
|= KAUTH_VNODE_HAS_SYSFLAGS
;
1002 error
= kauth_authorize_vnode(cred
, action
, vp
, NULL
, fs_decision
);
1007 * Set the flags. If we're not setting non-user flags, be careful not
1008 * to overwrite them.
1010 * XXX: Can't we always assign here? if the system flags are different,
1011 * the code above should catch attempts to change them without
1012 * proper permissions, and if we're here it means it's okay to
1015 if (action
& KAUTH_VNODE_WRITE_SYSFLAGS
) {
1016 node
->tn_flags
= flags
;
1018 /* Clear all user-settable flags and re-set them. */
1019 node
->tn_flags
&= SF_SETTABLE
;
1020 node
->tn_flags
|= (flags
& UF_SETTABLE
);
1023 node
->tn_status
|= TMPFS_NODE_CHANGED
;
1024 VN_KNOTE(vp
, NOTE_ATTRIB
);
1026 KASSERT(VOP_ISLOCKED(vp
));
1031 /* --------------------------------------------------------------------- */
1034 * Change access mode on the given vnode.
1035 * Caller should execute tmpfs_update on vp after a successful execution.
1036 * The vnode must be locked on entry and remain locked on exit.
1039 tmpfs_chmod(struct vnode
*vp
, mode_t mode
, kauth_cred_t cred
, struct lwp
*l
)
1042 struct tmpfs_node
*node
;
1044 KASSERT(VOP_ISLOCKED(vp
));
1046 node
= VP_TO_TMPFS_NODE(vp
);
1048 /* Disallow this operation if the file system is mounted read-only. */
1049 if (vp
->v_mount
->mnt_flag
& MNT_RDONLY
)
1052 /* Immutable or append-only files cannot be modified, either. */
1053 if (node
->tn_flags
& (IMMUTABLE
| APPEND
))
1056 error
= genfs_can_chmod(vp
, cred
, node
->tn_uid
, node
->tn_gid
,
1059 error
= kauth_authorize_vnode(cred
, KAUTH_VNODE_WRITE_SECURITY
, vp
,
1064 node
->tn_mode
= (mode
& ALLPERMS
);
1066 node
->tn_status
|= TMPFS_NODE_CHANGED
;
1067 VN_KNOTE(vp
, NOTE_ATTRIB
);
1069 KASSERT(VOP_ISLOCKED(vp
));
1074 /* --------------------------------------------------------------------- */
1077 * Change ownership of the given vnode. At least one of uid or gid must
1078 * be different than VNOVAL. If one is set to that value, the attribute
1080 * Caller should execute tmpfs_update on vp after a successful execution.
1081 * The vnode must be locked on entry and remain locked on exit.
1084 tmpfs_chown(struct vnode
*vp
, uid_t uid
, gid_t gid
, kauth_cred_t cred
,
1088 struct tmpfs_node
*node
;
1090 KASSERT(VOP_ISLOCKED(vp
));
1092 node
= VP_TO_TMPFS_NODE(vp
);
1094 /* Assign default values if they are unknown. */
1095 KASSERT(uid
!= VNOVAL
|| gid
!= VNOVAL
);
1100 KASSERT(uid
!= VNOVAL
&& gid
!= VNOVAL
);
1102 /* Disallow this operation if the file system is mounted read-only. */
1103 if (vp
->v_mount
->mnt_flag
& MNT_RDONLY
)
1106 /* Immutable or append-only files cannot be modified, either. */
1107 if (node
->tn_flags
& (IMMUTABLE
| APPEND
))
1110 error
= genfs_can_chown(vp
, cred
, node
->tn_uid
, node
->tn_gid
, uid
,
1113 error
= kauth_authorize_vnode(cred
, KAUTH_VNODE_CHANGE_OWNERSHIP
, vp
,
1121 node
->tn_status
|= TMPFS_NODE_CHANGED
;
1122 VN_KNOTE(vp
, NOTE_ATTRIB
);
1124 KASSERT(VOP_ISLOCKED(vp
));
1129 /* --------------------------------------------------------------------- */
1132 * Change size of the given vnode.
1133 * Caller should execute tmpfs_update on vp after a successful execution.
1134 * The vnode must be locked on entry and remain locked on exit.
1137 tmpfs_chsize(struct vnode
*vp
, u_quad_t size
, kauth_cred_t cred
,
1141 struct tmpfs_node
*node
;
1143 KASSERT(VOP_ISLOCKED(vp
));
1145 node
= VP_TO_TMPFS_NODE(vp
);
1147 /* Decide whether this is a valid operation based on the file type. */
1149 switch (vp
->v_type
) {
1154 if (vp
->v_mount
->mnt_flag
& MNT_RDONLY
)
1163 /* Allow modifications of special files even if in the file
1164 * system is mounted read-only (we are not modifying the
1165 * files themselves, but the objects they represent). */
1169 /* Anything else is unsupported. */
1173 /* Immutable or append-only files cannot be modified, either. */
1174 if (node
->tn_flags
& (IMMUTABLE
| APPEND
))
1177 error
= tmpfs_truncate(vp
, size
);
1178 /* tmpfs_truncate will raise the NOTE_EXTEND and NOTE_ATTRIB kevents
1179 * for us, as will update tn_status; no need to do that here. */
1181 KASSERT(VOP_ISLOCKED(vp
));
1186 /* --------------------------------------------------------------------- */
1189 * Change access and modification times of the given vnode.
1190 * Caller should execute tmpfs_update on vp after a successful execution.
1191 * The vnode must be locked on entry and remain locked on exit.
1194 tmpfs_chtimes(struct vnode
*vp
, const struct timespec
*atime
,
1195 const struct timespec
*mtime
, const struct timespec
*btime
,
1196 int vaflags
, kauth_cred_t cred
, struct lwp
*l
)
1199 struct tmpfs_node
*node
;
1201 KASSERT(VOP_ISLOCKED(vp
));
1203 node
= VP_TO_TMPFS_NODE(vp
);
1205 /* Disallow this operation if the file system is mounted read-only. */
1206 if (vp
->v_mount
->mnt_flag
& MNT_RDONLY
)
1209 /* Immutable or append-only files cannot be modified, either. */
1210 if (node
->tn_flags
& (IMMUTABLE
| APPEND
))
1213 error
= genfs_can_chtimes(vp
, vaflags
, node
->tn_uid
, cred
);
1215 error
= kauth_authorize_vnode(cred
, KAUTH_VNODE_WRITE_TIMES
, vp
, NULL
,
1220 if (atime
->tv_sec
!= VNOVAL
&& atime
->tv_nsec
!= VNOVAL
)
1221 node
->tn_status
|= TMPFS_NODE_ACCESSED
;
1223 if (mtime
->tv_sec
!= VNOVAL
&& mtime
->tv_nsec
!= VNOVAL
)
1224 node
->tn_status
|= TMPFS_NODE_MODIFIED
;
1226 if (btime
->tv_sec
== VNOVAL
&& btime
->tv_nsec
== VNOVAL
)
1229 tmpfs_update(vp
, atime
, mtime
, btime
, 0);
1230 VN_KNOTE(vp
, NOTE_ATTRIB
);
1232 KASSERT(VOP_ISLOCKED(vp
));
1237 /* --------------------------------------------------------------------- */
1239 /* Sync timestamps */
1241 tmpfs_itimes(struct vnode
*vp
, const struct timespec
*acc
,
1242 const struct timespec
*mod
, const struct timespec
*birth
)
1244 struct tmpfs_node
*node
;
1245 struct timespec nowtm
;
1247 node
= VP_TO_TMPFS_NODE(vp
);
1249 if ((node
->tn_status
& (TMPFS_NODE_ACCESSED
| TMPFS_NODE_MODIFIED
|
1250 TMPFS_NODE_CHANGED
)) == 0)
1253 if (birth
!= NULL
) {
1254 node
->tn_birthtime
= *birth
;
1256 vfs_timestamp(&nowtm
);
1258 if (node
->tn_status
& TMPFS_NODE_ACCESSED
) {
1259 node
->tn_atime
= acc
? *acc
: nowtm
;
1261 if (node
->tn_status
& TMPFS_NODE_MODIFIED
) {
1262 node
->tn_mtime
= mod
? *mod
: nowtm
;
1264 if (node
->tn_status
& TMPFS_NODE_CHANGED
) {
1265 node
->tn_ctime
= nowtm
;
1269 ~(TMPFS_NODE_ACCESSED
| TMPFS_NODE_MODIFIED
| TMPFS_NODE_CHANGED
);
1272 /* --------------------------------------------------------------------- */
1275 tmpfs_update(struct vnode
*vp
, const struct timespec
*acc
,
1276 const struct timespec
*mod
, const struct timespec
*birth
, int flags
)
1279 struct tmpfs_node
*node
;
1281 KASSERT(VOP_ISLOCKED(vp
));
1283 node
= VP_TO_TMPFS_NODE(vp
);
1286 if (flags
& UPDATE_CLOSE
)
1287 ; /* XXX Need to do anything special? */
1290 tmpfs_itimes(vp
, acc
, mod
, birth
);
1292 KASSERT(VOP_ISLOCKED(vp
));
1295 /* --------------------------------------------------------------------- */
1298 tmpfs_truncate(struct vnode
*vp
, off_t length
)
1302 struct tmpfs_node
*node
;
1304 node
= VP_TO_TMPFS_NODE(vp
);
1305 extended
= length
> node
->tn_size
;
1312 if (node
->tn_size
== length
) {
1317 error
= tmpfs_reg_resize(vp
, length
);
1319 node
->tn_status
|= TMPFS_NODE_CHANGED
| TMPFS_NODE_MODIFIED
;
1322 tmpfs_update(vp
, NULL
, NULL
, NULL
, 0);