1 .\" $NetBSD: vnode.9,v 1.43 2010/01/08 13:10:48 pooka Exp $
3 .\" Copyright (c) 2001, 2005, 2006 The NetBSD Foundation, Inc.
4 .\" All rights reserved.
6 .\" This code is derived from software contributed to The NetBSD Foundation
7 .\" by Gregory McGarry.
9 .\" Redistribution and use in source and binary forms, with or without
10 .\" modification, are permitted provided that the following conditions
12 .\" 1. Redistributions of source code must retain the above copyright
13 .\" notice, this list of conditions and the following disclaimer.
14 .\" 2. Redistributions in binary form must reproduce the above copyright
15 .\" notice, this list of conditions and the following disclaimer in the
16 .\" documentation and/or other materials provided with the distribution.
18 .\" THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
19 .\" ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
20 .\" TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
21 .\" PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
22 .\" BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
23 .\" CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
24 .\" SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
25 .\" INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
26 .\" CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
27 .\" ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
28 .\" POSSIBILITY OF SUCH DAMAGE.
57 .Nd kernel representation of a file or directory
62 .Fn vref "struct vnode *vp"
64 .Fn vrele "struct vnode *vp"
66 .Fn vget "struct vnode *vp" "int lockflag"
68 .Fn vput "struct vnode *vp"
70 .Fn vhold "struct vnode *vp"
72 .Fn holdrele "struct vnode *vp"
74 .Fn getnewvnode "enum vtagtype tag" "struct mount *mp" "int (**vops)(void *)" "struct vnode **vpp"
76 .Fn ungetnewvnode "struct vnode *vp"
78 .Fn vrecycle "struct vnode *vp" "struct simplelock *inter_lkp" "struct lwp *l"
80 .Fn vgone "struct vnode *vp"
82 .Fn vgonel "struct vnode *vp" "struct lwp *l"
84 .Fn vflush "struct mount *mp" "struct vnode *skipvp" "int flags"
86 .Fn vaccess "enum vtype type" "mode_t file_mode" "uid_t uid" "gid_t gid" "mode_t acc_mode" "kauth_cred_t cred"
88 .Fn bdevvp "dev_t dev" "struct vnode **vpp"
90 .Fn cdevvp "dev_t dev" "struct vnode **vpp"
92 .Fn vfinddev "dev_t dev" "enum vtype" "struct vnode **vpp"
94 .Fn vdevgone "int maj" "int minl" "int minh" "enum vtype type"
96 .Fn vwakeup "struct buf *bp"
98 .Fn vflushbuf "struct vnode *vp" "int sync"
100 .Fn vinvalbuf "struct vnode *vp" "int flags" "kauth_cred_t cred" "struct lwp *l" "int slpflag" "int slptimeo"
102 .Fn vtruncbuf "struct vnode *vp" "daddr_t lbn" "int slpflag" "int slptimeo"
104 .Fn vprint "const char *label" "struct vnode *vp"
106 The vnode is the focus of all file activity in
108 There is a unique vnode allocated for each active file, directory,
109 mounted-on file, fifo, domain socket, symbolic link and device.
110 The kernel has no concept of a file's underlying structure and so it
111 relies on the information stored in the vnode to describe the file.
112 Thus, the vnode associated with a file holds all the administration
113 information pertaining to it.
115 When a process requests an operation on a file, the
117 interface passes control to a file system type dependent function to carry
119 If the file system type dependent function finds that a vnode
120 representing the file is not in main memory, it dynamically allocates
121 a new vnode from the system main memory pool.
122 Once allocated, the vnode is attached to the data structure pointer
123 associated with the cause of the vnode allocation and it remains
124 resident in the main memory until the system decides that it is no
125 longer needed and can be recycled.
127 The vnode has the following structure:
130 struct uvm_object v_uobj; /* uvm object */
131 #define v_usecount v_uobj.uo_refs
132 #define v_interlock v_uobj.vmobjlock
133 voff_t v_size; /* size of file */
134 int v_flag; /* flags */
135 int v_numoutput; /* num pending writes */
136 long v_writecount; /* ref count of writers */
137 long v_holdcnt; /* page \*[Am] buffer refs */
138 struct mount *v_mount; /* ptr to vfs we are in */
139 int (**v_op)(void *); /* vnode ops vector */
140 TAILQ_ENTRY(vnode) v_freelist; /* vnode freelist */
141 LIST_ENTRY(vnode) v_mntvnodes; /* vnodes for mount pt */
142 struct buflists v_cleanblkhd; /* clean blocklist head */
143 struct buflists v_dirtyblkhd; /* dirty blocklist head */
144 LIST_ENTRY(vnode) v_synclist; /* dirty vnodes */
145 LIST_HEAD(, namecache) v_dnclist; /* namecaches for children */
146 LIST_HEAD(, namecache) v_nclist; /* namecaches for our parent */
148 struct mount *vu_mountedhere;/* ptr to mounted vfs */
149 struct socket *vu_socket; /* unix ipc (VSOCK) */
150 struct specinfo *vu_specinfo; /* device (VCHR, VBLK) */
151 struct fifoinfo *vu_fifoinfo; /* fifo (VFIFO) */
153 #define v_mountedhere v_un.vu_mountedhere
154 #define v_socket v_un.vu_socket
155 #define v_specinfo v_un.vu_specinfo
156 #define v_fifoinfo v_un.vu_fifoinfo
157 struct nqlease *v_lease; /* Soft ref to lease */
158 enum vtype v_type; /* vnode type */
159 enum vtagtype v_tag; /* underlying data type */
160 struct lock v_lock; /* lock for this vnode */
161 struct lock *v_vnlock; /* ptr to vnode lock */
162 void *v_data; /* private data for fs */
163 struct klist v_klist; /* knotes attached to vnode */
167 Most members of the vnode structure should be treated as opaque and
168 only manipulated using the proper functions.
169 There are some rather common exceptions detailed throughout this page.
171 Files and file systems are inextricably linked with the virtual memory
174 contains the data maintained by the virtual memory system.
175 For compatibility with code written before the integration of
179 C-preprocessor directives are used to alias the members of
182 Vnode flags are recorded by
186 .Bl -tag -offset indent -width VONWORKLST -compact
188 This vnode is the root of its file system.
190 This vnode is a pure text prototype.
192 This vnode is being used by the kernel; only used to skip quota files in
195 This vnode represents a tty; used when reading dead vnodes.
197 This vnode has executable mappings.
199 This vnode might have PROT_WRITE user mappings.
201 This vnode might have dirty pages due to VWRITEMAP
203 This vnode's file system supports locking.
205 This vnode is currently locked to change underlying type.
207 A process is waiting for this vnode.
209 Waiting for output associated with this vnode to complete.
211 This vnode has an alias.
213 This vnode is involved in a directory operation.
214 This flag is used exclusively by LFS.
216 This vnode is on a layered file system.
218 This vnode is on syncer work-list.
220 This vnode is being freed.
222 This vnode might have user mappings.
225 The VXLOCK flag is used to prevent multiple processes from entering
226 the vnode reclamation code.
227 It is also used as a flag to indicate that reclamation is in progress.
228 The VXWANT flag is set by threads that wish to be awakened when
229 reclamation is finished.
234 simplelock must be acquired.
237 for details on the kernel locking API.
239 Each vnode has three reference counts:
244 The first is the number of active references within the
246 This count is maintained by
251 The second is the number of active references within the kernel to the
252 vnode performing write access to the file.
253 It is maintained by the
258 The third is the number of references within the kernel
259 requiring the vnode to remain active and not be recycled.
260 This count is maintained by
268 reach zero, the vnode is recycled to the freelist and may be reused
270 The transition to and from the freelist is handled by
280 is also protected by the
284 The number of pending synchronous and asynchronous writes on the
285 vnode are recorded in
289 to wait for all writes to complete before returning to the user.
290 Its value must only be modified at splbio (see
292 It does not track the number of dirty buffers attached to the
300 to maintain the list of associated entries so that
304 The link to the file system which owns the vnode is recorded by
308 for further information of file system mount status.
312 pointer points to its vnode operations vector.
313 This vector describes what operations can be done to the file associated
315 The system maintains one vnode operations vector for each file system
316 type configured into the kernel.
317 The vnode operations vector contains a pointer to a function for
318 each operation supported by the file system.
321 for a description of vnode operations.
323 When not in use, vnodes are kept on the freelist through
325 The vnodes still reference valid files but may be reused to refer to a
326 new file at any time.
327 When a valid vnode which is on the freelist is used again, the user
330 to increment the reference count and retrieve it from the freelist.
331 When a user wants a new vnode for another file,
333 is invoked to remove a vnode from the freelist and initialize it for
336 The type of object the vnode represents is recorded by
338 It is used by generic code to perform checks to ensure operations are
339 performed on valid file system objects.
342 .Bl -tag -offset indent -width VFIFO -compact
344 The vnode has no type.
346 The vnode represents a regular file.
348 The vnode represents a directory.
350 The vnode represents a block special device.
352 The vnode represents a character special device.
354 The vnode represents a symbolic link.
356 The vnode represents a socket.
358 The vnode represents a pipe.
360 The vnode represents a bad file (not currently used).
363 Vnode tag types are used by external programs only (e.g.,
365 and should never be inspected by the kernel.
366 Its use is deprecated
369 values cannot be defined for loadable file systems.
375 .Bl -tag -offset indent -width "VT_FILECORE " -compact
379 universal file system
387 log-structured file system
391 file descriptor file system
393 null file system layer
395 uid/gid remapping file system layer
397 kernel interface file system
399 process interface file system
403 ISO 9660 file system(s)
409 Linux's EXT2 file system
415 Microsoft NT's file system
423 pseudo-terminal device file system
425 efficient memory file system
427 universal disk format file system
429 systemV boot file system
432 All vnode locking operations use
434 This lock is acquired by calling
436 and released by calling
438 The reason for this asymmetry is that
442 with extra checks, while the unlocking step usually does not need
443 additional checks and thus has no wrapper.
445 The vnode locking operation is complicated because it is used for many
447 Sometimes it is used to bundle a series of vnode operations (see
449 into an atomic group.
450 Many file systems rely on it to prevent race conditions in updating
451 file system type specific data structures rather than using their
453 The vnode lock can operate as a multiple-reader (shared-access lock)
454 or single-writer lock (exclusive access lock), however many current file
455 system implementations were written assuming only single-writer
457 Multiple-reader locking functions equivalently only in the presence
458 of big-lock SMP locking or a uni-processor machine.
459 The lock may be held while sleeping.
462 is acquired, the holder is guaranteed that the vnode will not be
463 reclaimed or invalidated.
464 Most file system functions require that you hold the vnode lock on entry.
467 for details on the kernel locking API.
469 For leaf file systems (such as ffs, lfs, msdosfs, etc),
473 For stacked file systems,
475 will generally point to
477 of the lowest file system.
478 Additionally, the implementation of the vnode lock is the
479 responsibility of the individual file systems and
481 may also be NULL indicating that a leaf node does not export a lock
483 In this case, stacked file systems (such as nullfs) must call the
484 underlying file system directly for locking.
486 Each file system underlying a vnode allocates its own private area and
490 Most functions discussed in this page that operate on vnodes cannot be
491 called from interrupt context.
500 are modified in interrupt context and must be protected by
502 unless it is certain that there is no chance an interrupt handler will
504 The vnode lock must not be acquired within interrupt context.
506 .Bl -tag -width compact
512 Any kernel thread system which uses a vnode (e.g., during the operation
513 of some algorithm or to store in a data structure) should call
520 Any code in the system which is using a vnode should call
522 when it is finished with the vnode.
525 of the vnode reaches zero and
527 is greater than zero, the vnode is placed on the holdlist.
532 are zero, the vnode is placed on the freelist.
533 .It Fn vget "vp" "lockflags"
536 from the freelist, increment its reference count and lock it.
541 flags used to lock the vnode.
542 If the VXLOCK is set in
549 and the calling thread sleeps until the transition is complete.
550 When it is awakened, an error is returned to indicate that the vnode is
551 no longer usable (possibly having been recycled to a new file system type).
557 Depending on the reference counts, move the vnode to the holdlist or
559 This operation is functionally equivalent to calling
566 as active by incrementing
567 .Em vp-\*[Gt]v_holdcnt
568 and moving the vnode from the freelist to the holdlist.
569 Once on the holdlist, the vnode will not be recycled until it is
575 as inactive by decrementing
576 .Em vp-\*[Gt]v_holdcnt
577 and moving the vnode from the holdlist to the freelist.
578 .It Fn getnewvnode "tag" "mp" "vops" "vpp"
579 Retrieve the next vnode from the freelist.
581 must choose whether to allocate a new vnode or recycle an existing
583 The criterion for allocating a new one is that the total number of
584 vnodes is less than the number desired or there are no vnodes on either
586 Generally only vnodes that have no buffers associated with them are
587 recycled and the next vnode from the freelist is retrieved.
588 If the freelist is empty, vnodes on the holdlist are considered.
589 The new vnode is returned in the address specified by
594 is the mount point for the file system requested the new vnode.
595 Before retrieving the new vnode, the file system is checked if it is
596 busy (such as currently unmounting).
597 An error is returned if the file system is unmounted.
601 is the vnode tag assigned to
602 .Fa *vpp-\*[Gt]v_tag .
605 is the vnode operations vector of the file system requesting the new
607 If a vnode is successfully retrieved zero is returned, otherwise an
608 appropriate error code is returned.
609 .It Fn ungetnewvnode "vp"
610 Undo the operation of
614 is the vnode to return to the freelist.
615 This function is needed for
617 which may need to push back a vnode in case of a locking race
619 .It Fn vrecycle "vp" "inter_lkp" "l"
620 Recycle the unused vnode
622 to the front of the freelist.
624 is a null operation if the reference count is greater than zero.
626 Eliminate all activity associated with the unlocked vnode
628 in preparation for recycling.
629 .It Fn vgonel "vp" "p"
630 Eliminate all activity associated with the locked vnode
632 in preparation for recycling.
633 .It Fn vflush "mp" "skipvp" "flags"
634 Remove any vnodes in the vnode table belonging to mount point
638 is not NULL it is exempt from being flushed.
641 is a set of flags modifying the operation of
643 If FORCECLOSE is not specified, there should not be any active vnodes and
646 is returned if any are found (this is a user error, not a system error).
647 If FORCECLOSE is specified, active vnodes that are found are detached.
648 If WRITECLOSE is set, only flush out regular file vnodes open for
650 SKIPSYSTEM causes any vnodes marked V_SYSTEM to be skipped.
651 .It Fn vaccess "type" "file_mode" "uid" "gid" "acc_mode" "cred"
652 Do access checking by comparing the file's permissions to the caller's
657 .It Fn bdevvp "dev" "vpp"
658 Create a vnode for a block device.
660 is used for root file systems, swap areas and for memory file system
662 .It Fn cdevvp "dev" "vpp"
663 Create a vnode for a character device.
665 is used for the console and kernfs special devices.
666 .It Fn vfinddev "dev" "vtype" "vpp"
667 Lookup a vnode by device number.
668 The vnode is returned in the address specified by
670 .It Fn vdevgone "int maj" "int min" "int minh" "enum vtype type"
671 Reclaim all vnodes that correspond to the specified minor number range
675 (endpoints inclusive) of the specified major
678 Update outstanding I/O count
679 .Em vp-\*[Gt]v_numoutput
682 and do a wakeup if requested and
685 .It Fn vflushbuf "vp" "sync"
686 Flush all dirty buffers to disk for the file with the locked vnode
690 specifies whether the I/O should be synchronous and
693 .Em vp-\*[Gt]v_numoutput
695 .Em vp-\*[Gt]v_dirtyblkhd
697 .It Fn vinvalbuf "vp" "flags" "cred" "l" "slpflag" "slptimeo"
698 Flush out and invalidate all buffers associated with locked vnode
704 specified the calling process and its credentials.
707 flag and timeout are specified by the arguments
712 If the operation is successful zero is returned, otherwise an
713 appropriate error code is returned.
714 .It Fn vtruncbuf "vp" "lbn" "slpflag" "slptimeo"
715 Destroy any in-core buffers past the file truncation length for the
718 The truncation length is specified by
721 will sleep while the I/O is performed, The
723 flag and timeout are specified by the arguments
728 If the operation is successful zero is returned, otherwise an
729 appropriate error code is returned.
730 .It Fn vprint "label" "vp"
731 This function is used by the kernel to dump vnode information during a
733 It is only used if the kernel option DIAGNOSTIC is compiled into the kernel.
736 is a string to prefix the information dump of vnode
740 This section describes places within the
742 source tree where actual code implementing or using the vnode
743 framework can be found.
744 All pathnames are relative to
747 The vnode framework is implemented within the file
748 .Pa sys/kern/vfs_subr.c .
761 The locking protocol is inconsistent.
762 Many vnode operations are passed locked vnodes on entry but release
763 the lock before they exit.
764 The locking protocol is used in some places to attempt to make a
765 series of operations atomic (e.g., access check then operation).
766 This does not work for non-local file systems that do not support locking
770 interface would benefit from a simpler locking protocol.