Expand PMF_FN_* macros.
[netbsd-mini2440.git] / sys / fs / tmpfs / tmpfs_subr.c
blobe6300a34155313038161c06a73672ed3c1520547
1 /* $NetBSD: tmpfs_subr.c,v 1.55 2009/09/03 11:22:05 pooka Exp $ */
3 /*
4 * Copyright (c) 2005, 2006, 2007 The NetBSD Foundation, Inc.
5 * All rights reserved.
7 * This code is derived from software contributed to The NetBSD Foundation
8 * by Julio M. Merino Vidal, developed as part of Google's Summer of Code
9 * 2005 program.
11 * Redistribution and use in source and binary forms, with or without
12 * modification, are permitted provided that the following conditions
13 * are met:
14 * 1. Redistributions of source code must retain the above copyright
15 * notice, this list of conditions and the following disclaimer.
16 * 2. Redistributions in binary form must reproduce the above copyright
17 * notice, this list of conditions and the following disclaimer in the
18 * documentation and/or other materials provided with the distribution.
20 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
21 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
22 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
23 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
24 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30 * POSSIBILITY OF SUCH DAMAGE.
34 * Efficient memory file system supporting functions.
37 #include <sys/cdefs.h>
38 __KERNEL_RCSID(0, "$NetBSD: tmpfs_subr.c,v 1.55 2009/09/03 11:22:05 pooka Exp $");
40 #include <sys/param.h>
41 #include <sys/dirent.h>
42 #include <sys/event.h>
43 #include <sys/kmem.h>
44 #include <sys/mount.h>
45 #include <sys/namei.h>
46 #include <sys/time.h>
47 #include <sys/stat.h>
48 #include <sys/systm.h>
49 #include <sys/swap.h>
50 #include <sys/vnode.h>
51 #include <sys/kauth.h>
52 #include <sys/proc.h>
53 #include <sys/atomic.h>
55 #include <uvm/uvm.h>
57 #include <miscfs/specfs/specdev.h>
58 #include <miscfs/genfs/genfs.h>
59 #include <fs/tmpfs/tmpfs.h>
60 #include <fs/tmpfs/tmpfs_fifoops.h>
61 #include <fs/tmpfs/tmpfs_specops.h>
62 #include <fs/tmpfs/tmpfs_vnops.h>
64 /* --------------------------------------------------------------------- */
67 * Allocates a new node of type 'type' inside the 'tmp' mount point, with
68 * its owner set to 'uid', its group to 'gid' and its mode set to 'mode',
69 * using the credentials of the process 'p'.
71 * If the node type is set to 'VDIR', then the parent parameter must point
72 * to the parent directory of the node being created. It may only be NULL
73 * while allocating the root node.
75 * If the node type is set to 'VBLK' or 'VCHR', then the rdev parameter
76 * specifies the device the node represents.
78 * If the node type is set to 'VLNK', then the parameter target specifies
79 * the file name of the target file for the symbolic link that is being
80 * created.
82 * Note that new nodes are retrieved from the available list if it has
83 * items or, if it is empty, from the node pool as long as there is enough
84 * space to create them.
86 * Returns zero on success or an appropriate error code on failure.
88 int
89 tmpfs_alloc_node(struct tmpfs_mount *tmp, enum vtype type,
90 uid_t uid, gid_t gid, mode_t mode, struct tmpfs_node *parent,
91 char *target, dev_t rdev, struct tmpfs_node **node)
93 struct tmpfs_node *nnode;
95 /* If the root directory of the 'tmp' file system is not yet
96 * allocated, this must be the request to do it. */
97 KASSERT(IMPLIES(tmp->tm_root == NULL, parent == NULL && type == VDIR));
99 KASSERT(IFF(type == VLNK, target != NULL));
100 KASSERT(IFF(type == VBLK || type == VCHR, rdev != VNOVAL));
102 KASSERT(uid != VNOVAL && gid != VNOVAL && mode != VNOVAL);
104 nnode = NULL;
105 if (atomic_inc_uint_nv(&tmp->tm_nodes_cnt) >= tmp->tm_nodes_max) {
106 atomic_dec_uint(&tmp->tm_nodes_cnt);
107 return ENOSPC;
110 nnode = (struct tmpfs_node *)TMPFS_POOL_GET(&tmp->tm_node_pool, 0);
111 if (nnode == NULL) {
112 atomic_dec_uint(&tmp->tm_nodes_cnt);
113 return ENOSPC;
117 * XXX Where the pool is backed by a map larger than (4GB *
118 * sizeof(*nnode)), this may produce duplicate inode numbers
119 * for applications that do not understand 64-bit ino_t.
121 nnode->tn_id = (ino_t)((uintptr_t)nnode / sizeof(*nnode));
122 nnode->tn_gen = arc4random();
124 /* Generic initialization. */
125 nnode->tn_type = type;
126 nnode->tn_size = 0;
127 nnode->tn_status = 0;
128 nnode->tn_flags = 0;
129 nnode->tn_links = 0;
131 vfs_timestamp(&nnode->tn_atime);
132 nnode->tn_birthtime = nnode->tn_atime;
133 nnode->tn_ctime = nnode->tn_atime;
134 nnode->tn_mtime = nnode->tn_atime;
136 nnode->tn_uid = uid;
137 nnode->tn_gid = gid;
138 nnode->tn_mode = mode;
139 nnode->tn_lockf = NULL;
140 nnode->tn_vnode = NULL;
142 /* Type-specific initialization. */
143 switch (nnode->tn_type) {
144 case VBLK:
145 case VCHR:
146 nnode->tn_spec.tn_dev.tn_rdev = rdev;
147 break;
149 case VDIR:
150 TAILQ_INIT(&nnode->tn_spec.tn_dir.tn_dir);
151 nnode->tn_spec.tn_dir.tn_parent =
152 (parent == NULL) ? nnode : parent;
153 nnode->tn_spec.tn_dir.tn_readdir_lastn = 0;
154 nnode->tn_spec.tn_dir.tn_readdir_lastp = NULL;
155 nnode->tn_links++;
156 break;
158 case VFIFO:
159 /* FALLTHROUGH */
160 case VSOCK:
161 break;
163 case VLNK:
164 KASSERT(strlen(target) < MAXPATHLEN);
165 nnode->tn_size = strlen(target);
166 nnode->tn_spec.tn_lnk.tn_link =
167 tmpfs_str_pool_get(&tmp->tm_str_pool, nnode->tn_size, 0);
168 if (nnode->tn_spec.tn_lnk.tn_link == NULL) {
169 atomic_dec_uint(&tmp->tm_nodes_cnt);
170 TMPFS_POOL_PUT(&tmp->tm_node_pool, nnode);
171 return ENOSPC;
173 memcpy(nnode->tn_spec.tn_lnk.tn_link, target, nnode->tn_size);
174 break;
176 case VREG:
177 nnode->tn_spec.tn_reg.tn_aobj =
178 uao_create(INT32_MAX - PAGE_SIZE, 0);
179 nnode->tn_spec.tn_reg.tn_aobj_pages = 0;
180 break;
182 default:
183 KASSERT(0);
186 mutex_init(&nnode->tn_vlock, MUTEX_DEFAULT, IPL_NONE);
188 mutex_enter(&tmp->tm_lock);
189 LIST_INSERT_HEAD(&tmp->tm_nodes, nnode, tn_entries);
190 mutex_exit(&tmp->tm_lock);
192 *node = nnode;
193 return 0;
196 /* --------------------------------------------------------------------- */
199 * Destroys the node pointed to by node from the file system 'tmp'.
200 * If the node does not belong to the given mount point, the results are
201 * unpredicted.
203 * If the node references a directory; no entries are allowed because
204 * their removal could need a recursive algorithm, something forbidden in
205 * kernel space. Furthermore, there is not need to provide such
206 * functionality (recursive removal) because the only primitives offered
207 * to the user are the removal of empty directories and the deletion of
208 * individual files.
210 * Note that nodes are not really deleted; in fact, when a node has been
211 * allocated, it cannot be deleted during the whole life of the file
212 * system. Instead, they are moved to the available list and remain there
213 * until reused.
215 void
216 tmpfs_free_node(struct tmpfs_mount *tmp, struct tmpfs_node *node)
219 if (node->tn_type == VREG) {
220 atomic_add_int(&tmp->tm_pages_used,
221 -node->tn_spec.tn_reg.tn_aobj_pages);
223 atomic_dec_uint(&tmp->tm_nodes_cnt);
224 mutex_enter(&tmp->tm_lock);
225 LIST_REMOVE(node, tn_entries);
226 mutex_exit(&tmp->tm_lock);
228 switch (node->tn_type) {
229 case VLNK:
230 tmpfs_str_pool_put(&tmp->tm_str_pool,
231 node->tn_spec.tn_lnk.tn_link, node->tn_size);
232 break;
234 case VREG:
235 if (node->tn_spec.tn_reg.tn_aobj != NULL)
236 uao_detach(node->tn_spec.tn_reg.tn_aobj);
237 break;
239 default:
240 break;
243 mutex_destroy(&node->tn_vlock);
244 TMPFS_POOL_PUT(&tmp->tm_node_pool, node);
247 /* --------------------------------------------------------------------- */
250 * Allocates a new directory entry for the node node with a name of name.
251 * The new directory entry is returned in *de.
253 * The link count of node is increased by one to reflect the new object
254 * referencing it. This takes care of notifying kqueue listeners about
255 * this change.
257 * Returns zero on success or an appropriate error code on failure.
260 tmpfs_alloc_dirent(struct tmpfs_mount *tmp, struct tmpfs_node *node,
261 const char *name, uint16_t len, struct tmpfs_dirent **de)
263 struct tmpfs_dirent *nde;
265 nde = (struct tmpfs_dirent *)TMPFS_POOL_GET(&tmp->tm_dirent_pool, 0);
266 if (nde == NULL)
267 return ENOSPC;
269 nde->td_name = tmpfs_str_pool_get(&tmp->tm_str_pool, len, 0);
270 if (nde->td_name == NULL) {
271 TMPFS_POOL_PUT(&tmp->tm_dirent_pool, nde);
272 return ENOSPC;
274 nde->td_namelen = len;
275 memcpy(nde->td_name, name, len);
276 nde->td_node = node;
278 node->tn_links++;
279 if (node->tn_links > 1 && node->tn_vnode != NULL)
280 VN_KNOTE(node->tn_vnode, NOTE_LINK);
281 *de = nde;
283 return 0;
286 /* --------------------------------------------------------------------- */
289 * Frees a directory entry. It is the caller's responsibility to destroy
290 * the node referenced by it if needed.
292 * The link count of node is decreased by one to reflect the removal of an
293 * object that referenced it. This only happens if 'node_exists' is true;
294 * otherwise the function will not access the node referred to by the
295 * directory entry, as it may already have been released from the outside.
297 * Interested parties (kqueue) are notified of the link count change; note
298 * that this can include both the node pointed to by the directory entry
299 * as well as its parent.
301 void
302 tmpfs_free_dirent(struct tmpfs_mount *tmp, struct tmpfs_dirent *de,
303 bool node_exists)
305 if (node_exists) {
306 struct tmpfs_node *node;
308 node = de->td_node;
310 KASSERT(node->tn_links > 0);
311 node->tn_links--;
312 if (node->tn_vnode != NULL)
313 VN_KNOTE(node->tn_vnode, node->tn_links == 0 ?
314 NOTE_DELETE : NOTE_LINK);
315 if (node->tn_type == VDIR)
316 VN_KNOTE(node->tn_spec.tn_dir.tn_parent->tn_vnode,
317 NOTE_LINK);
320 tmpfs_str_pool_put(&tmp->tm_str_pool, de->td_name, de->td_namelen);
321 TMPFS_POOL_PUT(&tmp->tm_dirent_pool, de);
324 /* --------------------------------------------------------------------- */
327 * Allocates a new vnode for the node node or returns a new reference to
328 * an existing one if the node had already a vnode referencing it. The
329 * resulting locked vnode is returned in *vpp.
331 * Returns zero on success or an appropriate error code on failure.
334 tmpfs_alloc_vp(struct mount *mp, struct tmpfs_node *node, struct vnode **vpp)
336 int error;
337 struct vnode *vp;
339 /* If there is already a vnode, then lock it. */
340 for (;;) {
341 mutex_enter(&node->tn_vlock);
342 if ((vp = node->tn_vnode) != NULL) {
343 mutex_enter(&vp->v_interlock);
344 mutex_exit(&node->tn_vlock);
345 error = vget(vp, LK_EXCLUSIVE | LK_INTERLOCK);
346 if (error == ENOENT) {
347 /* vnode was reclaimed. */
348 continue;
350 *vpp = vp;
351 return error;
353 break;
356 /* Get a new vnode and associate it with our node. */
357 error = getnewvnode(VT_TMPFS, mp, tmpfs_vnodeop_p, &vp);
358 if (error != 0) {
359 mutex_exit(&node->tn_vlock);
360 return error;
363 error = vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
364 if (error != 0) {
365 mutex_exit(&node->tn_vlock);
366 ungetnewvnode(vp);
367 return error;
370 vp->v_type = node->tn_type;
372 /* Type-specific initialization. */
373 switch (node->tn_type) {
374 case VBLK:
375 /* FALLTHROUGH */
376 case VCHR:
377 vp->v_op = tmpfs_specop_p;
378 spec_node_init(vp, node->tn_spec.tn_dev.tn_rdev);
379 break;
381 case VDIR:
382 vp->v_vflag |= node->tn_spec.tn_dir.tn_parent == node ?
383 VV_ROOT : 0;
384 break;
386 case VFIFO:
387 vp->v_op = tmpfs_fifoop_p;
388 break;
390 case VLNK:
391 /* FALLTHROUGH */
392 case VREG:
393 /* FALLTHROUGH */
394 case VSOCK:
395 break;
397 default:
398 KASSERT(0);
401 uvm_vnp_setsize(vp, node->tn_size);
402 vp->v_data = node;
403 node->tn_vnode = vp;
404 mutex_exit(&node->tn_vlock);
405 *vpp = vp;
407 KASSERT(IFF(error == 0, *vpp != NULL && VOP_ISLOCKED(*vpp)));
408 KASSERT(*vpp == node->tn_vnode);
410 return error;
413 /* --------------------------------------------------------------------- */
416 * Destroys the association between the vnode vp and the node it
417 * references.
419 void
420 tmpfs_free_vp(struct vnode *vp)
422 struct tmpfs_node *node;
424 node = VP_TO_TMPFS_NODE(vp);
426 mutex_enter(&node->tn_vlock);
427 node->tn_vnode = NULL;
428 mutex_exit(&node->tn_vlock);
429 vp->v_data = NULL;
432 /* --------------------------------------------------------------------- */
435 * Allocates a new file of type 'type' and adds it to the parent directory
436 * 'dvp'; this addition is done using the component name given in 'cnp'.
437 * The ownership of the new file is automatically assigned based on the
438 * credentials of the caller (through 'cnp'), the group is set based on
439 * the parent directory and the mode is determined from the 'vap' argument.
440 * If successful, *vpp holds a vnode to the newly created file and zero
441 * is returned. Otherwise *vpp is NULL and the function returns an
442 * appropriate error code.
445 tmpfs_alloc_file(struct vnode *dvp, struct vnode **vpp, struct vattr *vap,
446 struct componentname *cnp, char *target)
448 int error;
449 struct tmpfs_dirent *de;
450 struct tmpfs_mount *tmp;
451 struct tmpfs_node *dnode;
452 struct tmpfs_node *node;
453 struct tmpfs_node *parent;
455 KASSERT(VOP_ISLOCKED(dvp));
456 KASSERT(cnp->cn_flags & HASBUF);
458 tmp = VFS_TO_TMPFS(dvp->v_mount);
459 dnode = VP_TO_TMPFS_DIR(dvp);
460 *vpp = NULL;
462 /* If the entry we are creating is a directory, we cannot overflow
463 * the number of links of its parent, because it will get a new
464 * link. */
465 if (vap->va_type == VDIR) {
466 /* Ensure that we do not overflow the maximum number of links
467 * imposed by the system. */
468 KASSERT(dnode->tn_links <= LINK_MAX);
469 if (dnode->tn_links == LINK_MAX) {
470 error = EMLINK;
471 goto out;
474 parent = dnode;
475 } else
476 parent = NULL;
478 /* Allocate a node that represents the new file. */
479 error = tmpfs_alloc_node(tmp, vap->va_type, kauth_cred_geteuid(cnp->cn_cred),
480 dnode->tn_gid, vap->va_mode, parent, target, vap->va_rdev, &node);
481 if (error != 0)
482 goto out;
484 /* Allocate a directory entry that points to the new file. */
485 error = tmpfs_alloc_dirent(tmp, node, cnp->cn_nameptr, cnp->cn_namelen,
486 &de);
487 if (error != 0) {
488 tmpfs_free_node(tmp, node);
489 goto out;
492 /* Allocate a vnode for the new file. */
493 error = tmpfs_alloc_vp(dvp->v_mount, node, vpp);
494 if (error != 0) {
495 tmpfs_free_dirent(tmp, de, true);
496 tmpfs_free_node(tmp, node);
497 goto out;
500 /* Now that all required items are allocated, we can proceed to
501 * insert the new node into the directory, an operation that
502 * cannot fail. */
503 tmpfs_dir_attach(dvp, de);
504 if (vap->va_type == VDIR) {
505 VN_KNOTE(dvp, NOTE_LINK);
506 dnode->tn_links++;
507 KASSERT(dnode->tn_links <= LINK_MAX);
510 out:
511 if (error != 0 || !(cnp->cn_flags & SAVESTART))
512 PNBUF_PUT(cnp->cn_pnbuf);
513 vput(dvp);
515 KASSERT(IFF(error == 0, *vpp != NULL));
517 return error;
520 /* --------------------------------------------------------------------- */
523 * Attaches the directory entry de to the directory represented by vp.
524 * Note that this does not change the link count of the node pointed by
525 * the directory entry, as this is done by tmpfs_alloc_dirent.
527 * As the "parent" directory changes, interested parties are notified of
528 * a write to it.
530 void
531 tmpfs_dir_attach(struct vnode *vp, struct tmpfs_dirent *de)
533 struct tmpfs_node *dnode;
535 dnode = VP_TO_TMPFS_DIR(vp);
537 TAILQ_INSERT_TAIL(&dnode->tn_spec.tn_dir.tn_dir, de, td_entries);
538 dnode->tn_size += sizeof(struct tmpfs_dirent);
539 dnode->tn_status |= TMPFS_NODE_ACCESSED | TMPFS_NODE_CHANGED | \
540 TMPFS_NODE_MODIFIED;
541 uvm_vnp_setsize(vp, dnode->tn_size);
543 VN_KNOTE(vp, NOTE_WRITE);
546 /* --------------------------------------------------------------------- */
549 * Detaches the directory entry de from the directory represented by vp.
550 * Note that this does not change the link count of the node pointed by
551 * the directory entry, as this is done by tmpfs_free_dirent.
553 * As the "parent" directory changes, interested parties are notified of
554 * a write to it.
556 void
557 tmpfs_dir_detach(struct vnode *vp, struct tmpfs_dirent *de)
559 struct tmpfs_node *dnode;
561 KASSERT(VOP_ISLOCKED(vp));
563 dnode = VP_TO_TMPFS_DIR(vp);
565 if (dnode->tn_spec.tn_dir.tn_readdir_lastp == de) {
566 dnode->tn_spec.tn_dir.tn_readdir_lastn = 0;
567 dnode->tn_spec.tn_dir.tn_readdir_lastp = NULL;
570 TAILQ_REMOVE(&dnode->tn_spec.tn_dir.tn_dir, de, td_entries);
571 dnode->tn_size -= sizeof(struct tmpfs_dirent);
572 dnode->tn_status |= TMPFS_NODE_ACCESSED | TMPFS_NODE_CHANGED | \
573 TMPFS_NODE_MODIFIED;
574 uvm_vnp_setsize(vp, dnode->tn_size);
576 VN_KNOTE(vp, NOTE_WRITE);
579 /* --------------------------------------------------------------------- */
582 * Looks for a directory entry in the directory represented by node.
583 * 'cnp' describes the name of the entry to look for. Note that the .
584 * and .. components are not allowed as they do not physically exist
585 * within directories.
587 * Returns a pointer to the entry when found, otherwise NULL.
589 struct tmpfs_dirent *
590 tmpfs_dir_lookup(struct tmpfs_node *node, struct componentname *cnp)
592 struct tmpfs_dirent *de;
594 KASSERT(VOP_ISLOCKED(node->tn_vnode));
595 KASSERT(IMPLIES(cnp->cn_namelen == 1, cnp->cn_nameptr[0] != '.'));
596 KASSERT(IMPLIES(cnp->cn_namelen == 2, !(cnp->cn_nameptr[0] == '.' &&
597 cnp->cn_nameptr[1] == '.')));
598 TMPFS_VALIDATE_DIR(node);
600 node->tn_status |= TMPFS_NODE_ACCESSED;
602 TAILQ_FOREACH(de, &node->tn_spec.tn_dir.tn_dir, td_entries) {
603 KASSERT(cnp->cn_namelen < 0xffff);
604 if (de->td_namelen == (uint16_t)cnp->cn_namelen &&
605 memcmp(de->td_name, cnp->cn_nameptr, de->td_namelen) == 0) {
606 break;
610 return de;
613 /* --------------------------------------------------------------------- */
616 * Helper function for tmpfs_readdir. Creates a '.' entry for the given
617 * directory and returns it in the uio space. The function returns 0
618 * on success, -1 if there was not enough space in the uio structure to
619 * hold the directory entry or an appropriate error code if another
620 * error happens.
623 tmpfs_dir_getdotdent(struct tmpfs_node *node, struct uio *uio)
625 int error;
626 struct dirent *dentp;
628 TMPFS_VALIDATE_DIR(node);
629 KASSERT(uio->uio_offset == TMPFS_DIRCOOKIE_DOT);
631 dentp = kmem_alloc(sizeof(struct dirent), KM_SLEEP);
633 dentp->d_fileno = node->tn_id;
634 dentp->d_type = DT_DIR;
635 dentp->d_namlen = 1;
636 dentp->d_name[0] = '.';
637 dentp->d_name[1] = '\0';
638 dentp->d_reclen = _DIRENT_SIZE(dentp);
640 if (dentp->d_reclen > uio->uio_resid)
641 error = -1;
642 else {
643 error = uiomove(dentp, dentp->d_reclen, uio);
644 if (error == 0)
645 uio->uio_offset = TMPFS_DIRCOOKIE_DOTDOT;
648 node->tn_status |= TMPFS_NODE_ACCESSED;
650 kmem_free(dentp, sizeof(struct dirent));
651 return error;
654 /* --------------------------------------------------------------------- */
657 * Helper function for tmpfs_readdir. Creates a '..' entry for the given
658 * directory and returns it in the uio space. The function returns 0
659 * on success, -1 if there was not enough space in the uio structure to
660 * hold the directory entry or an appropriate error code if another
661 * error happens.
664 tmpfs_dir_getdotdotdent(struct tmpfs_node *node, struct uio *uio)
666 int error;
667 struct dirent *dentp;
669 TMPFS_VALIDATE_DIR(node);
670 KASSERT(uio->uio_offset == TMPFS_DIRCOOKIE_DOTDOT);
672 dentp = kmem_alloc(sizeof(struct dirent), KM_SLEEP);
674 dentp->d_fileno = node->tn_spec.tn_dir.tn_parent->tn_id;
675 dentp->d_type = DT_DIR;
676 dentp->d_namlen = 2;
677 dentp->d_name[0] = '.';
678 dentp->d_name[1] = '.';
679 dentp->d_name[2] = '\0';
680 dentp->d_reclen = _DIRENT_SIZE(dentp);
682 if (dentp->d_reclen > uio->uio_resid)
683 error = -1;
684 else {
685 error = uiomove(dentp, dentp->d_reclen, uio);
686 if (error == 0) {
687 struct tmpfs_dirent *de;
689 de = TAILQ_FIRST(&node->tn_spec.tn_dir.tn_dir);
690 if (de == NULL)
691 uio->uio_offset = TMPFS_DIRCOOKIE_EOF;
692 else
693 uio->uio_offset = tmpfs_dircookie(de);
697 node->tn_status |= TMPFS_NODE_ACCESSED;
699 kmem_free(dentp, sizeof(struct dirent));
700 return error;
703 /* --------------------------------------------------------------------- */
706 * Lookup a directory entry by its associated cookie.
708 struct tmpfs_dirent *
709 tmpfs_dir_lookupbycookie(struct tmpfs_node *node, off_t cookie)
711 struct tmpfs_dirent *de;
713 KASSERT(VOP_ISLOCKED(node->tn_vnode));
715 if (cookie == node->tn_spec.tn_dir.tn_readdir_lastn &&
716 node->tn_spec.tn_dir.tn_readdir_lastp != NULL) {
717 return node->tn_spec.tn_dir.tn_readdir_lastp;
720 TAILQ_FOREACH(de, &node->tn_spec.tn_dir.tn_dir, td_entries) {
721 if (tmpfs_dircookie(de) == cookie) {
722 break;
726 return de;
729 /* --------------------------------------------------------------------- */
732 * Helper function for tmpfs_readdir. Returns as much directory entries
733 * as can fit in the uio space. The read starts at uio->uio_offset.
734 * The function returns 0 on success, -1 if there was not enough space
735 * in the uio structure to hold the directory entry or an appropriate
736 * error code if another error happens.
739 tmpfs_dir_getdents(struct tmpfs_node *node, struct uio *uio, off_t *cntp)
741 int error;
742 off_t startcookie;
743 struct dirent *dentp;
744 struct tmpfs_dirent *de;
746 KASSERT(VOP_ISLOCKED(node->tn_vnode));
747 TMPFS_VALIDATE_DIR(node);
749 /* Locate the first directory entry we have to return. We have cached
750 * the last readdir in the node, so use those values if appropriate.
751 * Otherwise do a linear scan to find the requested entry. */
752 startcookie = uio->uio_offset;
753 KASSERT(startcookie != TMPFS_DIRCOOKIE_DOT);
754 KASSERT(startcookie != TMPFS_DIRCOOKIE_DOTDOT);
755 if (startcookie == TMPFS_DIRCOOKIE_EOF) {
756 return 0;
757 } else {
758 de = tmpfs_dir_lookupbycookie(node, startcookie);
760 if (de == NULL) {
761 return EINVAL;
764 dentp = kmem_alloc(sizeof(struct dirent), KM_SLEEP);
766 /* Read as much entries as possible; i.e., until we reach the end of
767 * the directory or we exhaust uio space. */
768 do {
769 /* Create a dirent structure representing the current
770 * tmpfs_node and fill it. */
771 dentp->d_fileno = de->td_node->tn_id;
772 switch (de->td_node->tn_type) {
773 case VBLK:
774 dentp->d_type = DT_BLK;
775 break;
777 case VCHR:
778 dentp->d_type = DT_CHR;
779 break;
781 case VDIR:
782 dentp->d_type = DT_DIR;
783 break;
785 case VFIFO:
786 dentp->d_type = DT_FIFO;
787 break;
789 case VLNK:
790 dentp->d_type = DT_LNK;
791 break;
793 case VREG:
794 dentp->d_type = DT_REG;
795 break;
797 case VSOCK:
798 dentp->d_type = DT_SOCK;
799 break;
801 default:
802 KASSERT(0);
804 dentp->d_namlen = de->td_namelen;
805 KASSERT(de->td_namelen < sizeof(dentp->d_name));
806 (void)memcpy(dentp->d_name, de->td_name, de->td_namelen);
807 dentp->d_name[de->td_namelen] = '\0';
808 dentp->d_reclen = _DIRENT_SIZE(dentp);
810 /* Stop reading if the directory entry we are treating is
811 * bigger than the amount of data that can be returned. */
812 if (dentp->d_reclen > uio->uio_resid) {
813 error = -1;
814 break;
817 /* Copy the new dirent structure into the output buffer and
818 * advance pointers. */
819 error = uiomove(dentp, dentp->d_reclen, uio);
821 (*cntp)++;
822 de = TAILQ_NEXT(de, td_entries);
823 } while (error == 0 && uio->uio_resid > 0 && de != NULL);
825 /* Update the offset and cache. */
826 if (de == NULL) {
827 uio->uio_offset = TMPFS_DIRCOOKIE_EOF;
828 node->tn_spec.tn_dir.tn_readdir_lastn = 0;
829 node->tn_spec.tn_dir.tn_readdir_lastp = NULL;
830 } else {
831 node->tn_spec.tn_dir.tn_readdir_lastn = uio->uio_offset =
832 tmpfs_dircookie(de);
833 node->tn_spec.tn_dir.tn_readdir_lastp = de;
836 node->tn_status |= TMPFS_NODE_ACCESSED;
838 kmem_free(dentp, sizeof(struct dirent));
839 return error;
842 /* --------------------------------------------------------------------- */
845 * Resizes the aobj associated to the regular file pointed to by vp to
846 * the size newsize. 'vp' must point to a vnode that represents a regular
847 * file. 'newsize' must be positive.
849 * If the file is extended, the appropriate kevent is raised. This does
850 * not rise a write event though because resizing is not the same as
851 * writing.
853 * Returns zero on success or an appropriate error code on failure.
856 tmpfs_reg_resize(struct vnode *vp, off_t newsize)
858 int error;
859 unsigned int newpages, oldpages;
860 struct tmpfs_mount *tmp;
861 struct tmpfs_node *node;
862 off_t oldsize;
864 KASSERT(vp->v_type == VREG);
865 KASSERT(newsize >= 0);
867 node = VP_TO_TMPFS_NODE(vp);
868 tmp = VFS_TO_TMPFS(vp->v_mount);
870 /* Convert the old and new sizes to the number of pages needed to
871 * store them. It may happen that we do not need to do anything
872 * because the last allocated page can accommodate the change on
873 * its own. */
874 oldsize = node->tn_size;
875 oldpages = round_page(oldsize) / PAGE_SIZE;
876 KASSERT(oldpages == node->tn_spec.tn_reg.tn_aobj_pages);
877 newpages = round_page(newsize) / PAGE_SIZE;
879 if (newpages > oldpages &&
880 (ssize_t)(newpages - oldpages) > TMPFS_PAGES_AVAIL(tmp)) {
881 error = ENOSPC;
882 goto out;
884 atomic_add_int(&tmp->tm_pages_used, newpages - oldpages);
886 if (newsize < oldsize) {
887 int zerolen = MIN(round_page(newsize), node->tn_size) - newsize;
890 * zero out the truncated part of the last page.
893 uvm_vnp_zerorange(vp, newsize, zerolen);
896 node->tn_spec.tn_reg.tn_aobj_pages = newpages;
897 node->tn_size = newsize;
898 uvm_vnp_setsize(vp, newsize);
901 * free "backing store"
904 if (newpages < oldpages) {
905 struct uvm_object *uobj;
907 uobj = node->tn_spec.tn_reg.tn_aobj;
909 mutex_enter(&uobj->vmobjlock);
910 uao_dropswap_range(uobj, newpages, oldpages);
911 mutex_exit(&uobj->vmobjlock);
914 error = 0;
916 if (newsize > oldsize)
917 VN_KNOTE(vp, NOTE_EXTEND);
919 out:
920 return error;
923 /* --------------------------------------------------------------------- */
926 * Returns information about the number of available memory pages,
927 * including physical and virtual ones.
929 * If 'total' is true, the value returned is the total amount of memory
930 * pages configured for the system (either in use or free).
931 * If it is FALSE, the value returned is the amount of free memory pages.
933 * Remember to remove TMPFS_PAGES_RESERVED from the returned value to avoid
934 * excessive memory usage.
937 size_t
938 tmpfs_mem_info(bool total)
940 size_t size;
942 size = 0;
943 size += uvmexp.swpgavail;
944 if (!total) {
945 size -= uvmexp.swpgonly;
947 size += uvmexp.free;
948 size += uvmexp.filepages;
949 if (size > uvmexp.wired) {
950 size -= uvmexp.wired;
951 } else {
952 size = 0;
955 return size;
958 /* --------------------------------------------------------------------- */
961 * Change flags of the given vnode.
962 * Caller should execute tmpfs_update on vp after a successful execution.
963 * The vnode must be locked on entry and remain locked on exit.
966 tmpfs_chflags(struct vnode *vp, int flags, kauth_cred_t cred, struct lwp *l)
968 int error;
969 struct tmpfs_node *node;
970 kauth_action_t action = KAUTH_VNODE_WRITE_FLAGS;
971 int fs_decision = 0;
973 KASSERT(VOP_ISLOCKED(vp));
975 node = VP_TO_TMPFS_NODE(vp);
977 /* Disallow this operation if the file system is mounted read-only. */
978 if (vp->v_mount->mnt_flag & MNT_RDONLY)
979 return EROFS;
981 if (kauth_cred_geteuid(cred) != node->tn_uid)
982 fs_decision = EACCES;
985 * If the new flags have non-user flags that are different than
986 * those on the node, we need special permission to change them.
988 if ((flags & SF_SETTABLE) != (node->tn_flags & SF_SETTABLE)) {
989 action |= KAUTH_VNODE_WRITE_SYSFLAGS;
990 if (!fs_decision)
991 fs_decision = EPERM;
995 * Indicate that this node's flags have system attributes in them if
996 * that's the case.
998 if (node->tn_flags & (SF_IMMUTABLE | SF_APPEND)) {
999 action |= KAUTH_VNODE_HAS_SYSFLAGS;
1002 error = kauth_authorize_vnode(cred, action, vp, NULL, fs_decision);
1003 if (error)
1004 return error;
1007 * Set the flags. If we're not setting non-user flags, be careful not
1008 * to overwrite them.
1010 * XXX: Can't we always assign here? if the system flags are different,
1011 * the code above should catch attempts to change them without
1012 * proper permissions, and if we're here it means it's okay to
1013 * change them...
1015 if (action & KAUTH_VNODE_WRITE_SYSFLAGS) {
1016 node->tn_flags = flags;
1017 } else {
1018 /* Clear all user-settable flags and re-set them. */
1019 node->tn_flags &= SF_SETTABLE;
1020 node->tn_flags |= (flags & UF_SETTABLE);
1023 node->tn_status |= TMPFS_NODE_CHANGED;
1024 VN_KNOTE(vp, NOTE_ATTRIB);
1026 KASSERT(VOP_ISLOCKED(vp));
1028 return 0;
1031 /* --------------------------------------------------------------------- */
1034 * Change access mode on the given vnode.
1035 * Caller should execute tmpfs_update on vp after a successful execution.
1036 * The vnode must be locked on entry and remain locked on exit.
1039 tmpfs_chmod(struct vnode *vp, mode_t mode, kauth_cred_t cred, struct lwp *l)
1041 int error;
1042 struct tmpfs_node *node;
1044 KASSERT(VOP_ISLOCKED(vp));
1046 node = VP_TO_TMPFS_NODE(vp);
1048 /* Disallow this operation if the file system is mounted read-only. */
1049 if (vp->v_mount->mnt_flag & MNT_RDONLY)
1050 return EROFS;
1052 /* Immutable or append-only files cannot be modified, either. */
1053 if (node->tn_flags & (IMMUTABLE | APPEND))
1054 return EPERM;
1056 error = genfs_can_chmod(vp, cred, node->tn_uid, node->tn_gid,
1057 mode);
1059 error = kauth_authorize_vnode(cred, KAUTH_VNODE_WRITE_SECURITY, vp,
1060 NULL, error);
1061 if (error)
1062 return (error);
1064 node->tn_mode = (mode & ALLPERMS);
1066 node->tn_status |= TMPFS_NODE_CHANGED;
1067 VN_KNOTE(vp, NOTE_ATTRIB);
1069 KASSERT(VOP_ISLOCKED(vp));
1071 return 0;
1074 /* --------------------------------------------------------------------- */
1077 * Change ownership of the given vnode. At least one of uid or gid must
1078 * be different than VNOVAL. If one is set to that value, the attribute
1079 * is unchanged.
1080 * Caller should execute tmpfs_update on vp after a successful execution.
1081 * The vnode must be locked on entry and remain locked on exit.
1084 tmpfs_chown(struct vnode *vp, uid_t uid, gid_t gid, kauth_cred_t cred,
1085 struct lwp *l)
1087 int error;
1088 struct tmpfs_node *node;
1090 KASSERT(VOP_ISLOCKED(vp));
1092 node = VP_TO_TMPFS_NODE(vp);
1094 /* Assign default values if they are unknown. */
1095 KASSERT(uid != VNOVAL || gid != VNOVAL);
1096 if (uid == VNOVAL)
1097 uid = node->tn_uid;
1098 if (gid == VNOVAL)
1099 gid = node->tn_gid;
1100 KASSERT(uid != VNOVAL && gid != VNOVAL);
1102 /* Disallow this operation if the file system is mounted read-only. */
1103 if (vp->v_mount->mnt_flag & MNT_RDONLY)
1104 return EROFS;
1106 /* Immutable or append-only files cannot be modified, either. */
1107 if (node->tn_flags & (IMMUTABLE | APPEND))
1108 return EPERM;
1110 error = genfs_can_chown(vp, cred, node->tn_uid, node->tn_gid, uid,
1111 gid);
1113 error = kauth_authorize_vnode(cred, KAUTH_VNODE_CHANGE_OWNERSHIP, vp,
1114 NULL, error);
1115 if (error)
1116 return (error);
1118 node->tn_uid = uid;
1119 node->tn_gid = gid;
1121 node->tn_status |= TMPFS_NODE_CHANGED;
1122 VN_KNOTE(vp, NOTE_ATTRIB);
1124 KASSERT(VOP_ISLOCKED(vp));
1126 return 0;
1129 /* --------------------------------------------------------------------- */
1132 * Change size of the given vnode.
1133 * Caller should execute tmpfs_update on vp after a successful execution.
1134 * The vnode must be locked on entry and remain locked on exit.
1137 tmpfs_chsize(struct vnode *vp, u_quad_t size, kauth_cred_t cred,
1138 struct lwp *l)
1140 int error;
1141 struct tmpfs_node *node;
1143 KASSERT(VOP_ISLOCKED(vp));
1145 node = VP_TO_TMPFS_NODE(vp);
1147 /* Decide whether this is a valid operation based on the file type. */
1148 error = 0;
1149 switch (vp->v_type) {
1150 case VDIR:
1151 return EISDIR;
1153 case VREG:
1154 if (vp->v_mount->mnt_flag & MNT_RDONLY)
1155 return EROFS;
1156 break;
1158 case VBLK:
1159 /* FALLTHROUGH */
1160 case VCHR:
1161 /* FALLTHROUGH */
1162 case VFIFO:
1163 /* Allow modifications of special files even if in the file
1164 * system is mounted read-only (we are not modifying the
1165 * files themselves, but the objects they represent). */
1166 return 0;
1168 default:
1169 /* Anything else is unsupported. */
1170 return EOPNOTSUPP;
1173 /* Immutable or append-only files cannot be modified, either. */
1174 if (node->tn_flags & (IMMUTABLE | APPEND))
1175 return EPERM;
1177 error = tmpfs_truncate(vp, size);
1178 /* tmpfs_truncate will raise the NOTE_EXTEND and NOTE_ATTRIB kevents
1179 * for us, as will update tn_status; no need to do that here. */
1181 KASSERT(VOP_ISLOCKED(vp));
1183 return error;
1186 /* --------------------------------------------------------------------- */
1189 * Change access and modification times of the given vnode.
1190 * Caller should execute tmpfs_update on vp after a successful execution.
1191 * The vnode must be locked on entry and remain locked on exit.
1194 tmpfs_chtimes(struct vnode *vp, const struct timespec *atime,
1195 const struct timespec *mtime, const struct timespec *btime,
1196 int vaflags, kauth_cred_t cred, struct lwp *l)
1198 int error;
1199 struct tmpfs_node *node;
1201 KASSERT(VOP_ISLOCKED(vp));
1203 node = VP_TO_TMPFS_NODE(vp);
1205 /* Disallow this operation if the file system is mounted read-only. */
1206 if (vp->v_mount->mnt_flag & MNT_RDONLY)
1207 return EROFS;
1209 /* Immutable or append-only files cannot be modified, either. */
1210 if (node->tn_flags & (IMMUTABLE | APPEND))
1211 return EPERM;
1213 error = genfs_can_chtimes(vp, vaflags, node->tn_uid, cred);
1215 error = kauth_authorize_vnode(cred, KAUTH_VNODE_WRITE_TIMES, vp, NULL,
1216 error);
1217 if (error)
1218 return (error);
1220 if (atime->tv_sec != VNOVAL && atime->tv_nsec != VNOVAL)
1221 node->tn_status |= TMPFS_NODE_ACCESSED;
1223 if (mtime->tv_sec != VNOVAL && mtime->tv_nsec != VNOVAL)
1224 node->tn_status |= TMPFS_NODE_MODIFIED;
1226 if (btime->tv_sec == VNOVAL && btime->tv_nsec == VNOVAL)
1227 btime = NULL;
1229 tmpfs_update(vp, atime, mtime, btime, 0);
1230 VN_KNOTE(vp, NOTE_ATTRIB);
1232 KASSERT(VOP_ISLOCKED(vp));
1234 return 0;
1237 /* --------------------------------------------------------------------- */
1239 /* Sync timestamps */
1240 void
1241 tmpfs_itimes(struct vnode *vp, const struct timespec *acc,
1242 const struct timespec *mod, const struct timespec *birth)
1244 struct tmpfs_node *node;
1245 struct timespec nowtm;
1247 node = VP_TO_TMPFS_NODE(vp);
1249 if ((node->tn_status & (TMPFS_NODE_ACCESSED | TMPFS_NODE_MODIFIED |
1250 TMPFS_NODE_CHANGED)) == 0)
1251 return;
1253 if (birth != NULL) {
1254 node->tn_birthtime = *birth;
1256 vfs_timestamp(&nowtm);
1258 if (node->tn_status & TMPFS_NODE_ACCESSED) {
1259 node->tn_atime = acc ? *acc : nowtm;
1261 if (node->tn_status & TMPFS_NODE_MODIFIED) {
1262 node->tn_mtime = mod ? *mod : nowtm;
1264 if (node->tn_status & TMPFS_NODE_CHANGED) {
1265 node->tn_ctime = nowtm;
1268 node->tn_status &=
1269 ~(TMPFS_NODE_ACCESSED | TMPFS_NODE_MODIFIED | TMPFS_NODE_CHANGED);
1272 /* --------------------------------------------------------------------- */
1274 void
1275 tmpfs_update(struct vnode *vp, const struct timespec *acc,
1276 const struct timespec *mod, const struct timespec *birth, int flags)
1279 struct tmpfs_node *node;
1281 KASSERT(VOP_ISLOCKED(vp));
1283 node = VP_TO_TMPFS_NODE(vp);
1285 #if 0
1286 if (flags & UPDATE_CLOSE)
1287 ; /* XXX Need to do anything special? */
1288 #endif
1290 tmpfs_itimes(vp, acc, mod, birth);
1292 KASSERT(VOP_ISLOCKED(vp));
1295 /* --------------------------------------------------------------------- */
1298 tmpfs_truncate(struct vnode *vp, off_t length)
1300 bool extended;
1301 int error;
1302 struct tmpfs_node *node;
1304 node = VP_TO_TMPFS_NODE(vp);
1305 extended = length > node->tn_size;
1307 if (length < 0) {
1308 error = EINVAL;
1309 goto out;
1312 if (node->tn_size == length) {
1313 error = 0;
1314 goto out;
1317 error = tmpfs_reg_resize(vp, length);
1318 if (error == 0)
1319 node->tn_status |= TMPFS_NODE_CHANGED | TMPFS_NODE_MODIFIED;
1321 out:
1322 tmpfs_update(vp, NULL, NULL, NULL, 0);
1324 return error;