1 /* $NetBSD: ulfs_lookup.c,v 1.34 2015/09/21 01:24:23 dholland Exp $ */
2 /* from NetBSD: ufs_lookup.c,v 1.122 2013/01/22 09:39:18 dholland Exp */
5 * Copyright (c) 1989, 1993
6 * The Regents of the University of California. All rights reserved.
7 * (c) UNIX System Laboratories, Inc.
8 * All or some portions of this file are derived from material licensed
9 * to the University of California by American Telephone and Telegraph
10 * Co. or Unix System Laboratories, Inc. and are reproduced herein with
11 * the permission of UNIX System Laboratories, Inc.
13 * Redistribution and use in source and binary forms, with or without
14 * modification, are permitted provided that the following conditions
16 * 1. Redistributions of source code must retain the above copyright
17 * notice, this list of conditions and the following disclaimer.
18 * 2. Redistributions in binary form must reproduce the above copyright
19 * notice, this list of conditions and the following disclaimer in the
20 * documentation and/or other materials provided with the distribution.
21 * 3. Neither the name of the University nor the names of its contributors
22 * may be used to endorse or promote products derived from this software
23 * without specific prior written permission.
25 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
26 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
28 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
29 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
30 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
31 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
32 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
33 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
34 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
37 * @(#)ufs_lookup.c 8.9 (Berkeley) 8/11/94
40 #include <sys/cdefs.h>
41 __KERNEL_RCSID(0, "$NetBSD: ulfs_lookup.c,v 1.34 2015/09/21 01:24:23 dholland Exp $");
47 #include <sys/param.h>
48 #include <sys/systm.h>
49 #include <sys/namei.h>
53 #include <sys/mount.h>
54 #include <sys/vnode.h>
55 #include <sys/kernel.h>
56 #include <sys/kauth.h>
57 #include <sys/wapbl.h>
58 #include <sys/fstrans.h>
62 #include <ufs/lfs/lfs.h>
63 #include <ufs/lfs/lfs_accessors.h>
64 #include <ufs/lfs/lfs_extern.h>
66 #include <ufs/lfs/ulfs_inode.h>
68 #include <ufs/lfs/ulfs_dirhash.h>
70 #include <ufs/lfs/ulfsmount.h>
71 #include <ufs/lfs/ulfs_extern.h>
72 #include <ufs/lfs/ulfs_bswap.h>
74 #include <miscfs/genfs/genfs.h>
83 * Convert a component of a pathname into a pointer to a locked inode.
84 * This is a very central and rather complicated routine.
85 * If the file system is not maintained in a strict tree hierarchy,
86 * this can result in a deadlock situation (see comments in code below).
88 * The cnp->cn_nameiop argument is LOOKUP, CREATE, RENAME, or DELETE depending
89 * on whether the name is to be looked up, created, renamed, or deleted.
90 * When CREATE, RENAME, or DELETE is specified, information usable in
91 * creating, renaming, or deleting a directory entry may be calculated.
92 * If flag has LOCKPARENT or'ed into it and the target of the pathname
93 * exists, lookup returns both the target and its parent directory locked.
94 * When creating or renaming and LOCKPARENT is specified, the target may
95 * not be ".". When deleting and LOCKPARENT is specified, the target may
96 * be "."., but the caller must check to ensure it does an vrele and vput
97 * instead of two vputs.
99 * Overall outline of ulfs_lookup:
101 * check accessibility of directory
102 * look for name in cache, if found, then if at end of path
103 * and deleting or creating, drop it, else return name
104 * search for name in directory, to found or notfound
106 * if creating, return locked directory, leaving info on available slots
109 * if at end of path and deleting, return information to allow delete
110 * if at end of path and rewriting (RENAME and LOCKPARENT), lock target
111 * inode and return info to allow rewrite
112 * if not at end, add name to cache; if at end and neither creating
113 * nor deleting, add name to cache
118 struct vop_lookup_v2_args
/* {
120 struct vnode **a_vpp;
121 struct componentname *a_cnp;
123 struct vnode
*vdp
= ap
->a_dvp
; /* vnode for directory being searched */
124 struct inode
*dp
= VTOI(vdp
); /* inode for directory being searched */
125 struct buf
*bp
; /* a buffer of directory entries */
126 LFS_DIRHEADER
*ep
; /* the current directory entry */
127 int entryoffsetinblock
; /* offset of ep in bp's buffer */
129 NONE
, /* need to search a slot for our new entry */
130 COMPACT
, /* a compaction can make a slot in the current
132 FOUND
, /* found a slot (or no need to search) */
134 doff_t slotoffset
; /* offset of area with free space.
135 a special value -1 for invalid */
136 int slotsize
; /* size of area at slotoffset */
137 int slotfreespace
; /* accumulated amount of space free in
138 the current DIRBLKSIZ block */
139 int slotneeded
; /* size of the entry we're seeking */
140 int numdirpasses
; /* strategy for directory search */
141 doff_t endsearch
; /* offset to end directory search */
142 doff_t prevoff
; /* previous value of ulr_offset */
143 struct vnode
*tdp
; /* returned by vcache_get */
144 doff_t enduseful
; /* pointer past last used dir slot.
145 used for directory truncation. */
146 u_long bmask
; /* block offset mask */
148 struct vnode
**vpp
= ap
->a_vpp
;
149 struct componentname
*cnp
= ap
->a_cnp
;
150 kauth_cred_t cred
= cnp
->cn_cred
;
152 int nameiop
= cnp
->cn_nameiop
;
153 struct lfs
*fs
= dp
->i_lfs
;
154 int dirblksiz
= fs
->um_dirblksiz
;
156 struct ulfs_lookup_results
*results
;
157 int iswhiteout
; /* temp result from cache_lookup() */
159 flags
= cnp
->cn_flags
;
164 endsearch
= 0; /* silence compiler warning */
167 * Produce the auxiliary lookup results into i_crap. Increment
168 * its serial number so elsewhere we can tell if we're using
169 * stale results. This should not be done this way. XXX.
171 results
= &dp
->i_crap
;
175 * Check accessiblity of directory.
177 if ((error
= VOP_ACCESS(vdp
, VEXEC
, cred
)) != 0)
180 if ((flags
& ISLASTCN
) && (vdp
->v_mount
->mnt_flag
& MNT_RDONLY
) &&
181 (nameiop
== DELETE
|| nameiop
== RENAME
))
185 * We now have a segment name to search for, and a directory to search.
187 * Before tediously performing a linear scan of the directory,
188 * check the name cache to see if the directory/name pair
189 * we are looking for is known already.
191 if (cache_lookup(vdp
, cnp
->cn_nameptr
, cnp
->cn_namelen
,
192 cnp
->cn_nameiop
, cnp
->cn_flags
, &iswhiteout
, vpp
)) {
194 cnp
->cn_flags
|= ISWHITEOUT
;
196 return *vpp
== NULLVP
? ENOENT
: 0;
200 * The namecache set iswhiteout without finding a
201 * cache entry. As of this writing (20121014), this
202 * can happen if there was a whiteout entry that has
203 * been invalidated by the lookup. It is not clear if
204 * it is correct to set ISWHITEOUT in this case or
205 * not; however, doing so retains the prior behavior,
206 * so we'll go with that until some clearer answer
209 cnp
->cn_flags
|= ISWHITEOUT
;
212 fstrans_start(vdp
->v_mount
, FSTRANS_SHARED
);
215 * Suppress search for slots unless creating
216 * file and at end of pathname, in which case
217 * we watch for a place to put the new file in
218 * case it doesn't already exist.
221 slotfreespace
= slotsize
= slotneeded
= 0;
222 if ((nameiop
== CREATE
|| nameiop
== RENAME
) && (flags
& ISLASTCN
)) {
224 slotneeded
= LFS_DIRECTSIZ(fs
, cnp
->cn_namelen
);
228 * If there is cached information on a previous search of
229 * this directory, pick up where we last left off.
230 * We cache only lookups as these are the most common
231 * and have the greatest payoff. Caching CREATE has little
232 * benefit as it usually must search the entire directory
233 * to determine that the entry does not exist. Caching the
234 * location of the last DELETE or RENAME has not reduced
235 * profiling time and hence has been removed in the interest
238 bmask
= vdp
->v_mount
->mnt_stat
.f_iosize
- 1;
242 * Use dirhash for fast operations on large directories. The logic
243 * to determine whether to hash the directory is contained within
244 * ulfsdirhash_build(); a zero return means that it decided to hash
245 * this directory and it successfully built up the hash table.
247 if (ulfsdirhash_build(dp
) == 0) {
248 /* Look for a free slot if needed. */
249 enduseful
= dp
->i_size
;
250 if (slotstatus
!= FOUND
) {
251 slotoffset
= ulfsdirhash_findfree(dp
, slotneeded
,
253 if (slotoffset
>= 0) {
254 slotstatus
= COMPACT
;
255 enduseful
= ulfsdirhash_enduseful(dp
);
257 enduseful
= dp
->i_size
;
260 /* Look up the component. */
262 entryoffsetinblock
= 0; /* silence compiler warning */
263 switch (ulfsdirhash_lookup(dp
, cnp
->cn_nameptr
, cnp
->cn_namelen
,
264 &results
->ulr_offset
, &bp
, nameiop
== DELETE
? &prevoff
: NULL
)) {
266 ep
= (LFS_DIRHEADER
*)((char *)bp
->b_data
+
267 (results
->ulr_offset
& bmask
));
270 results
->ulr_offset
= roundup(dp
->i_size
, dirblksiz
);
273 /* Something failed; just do a linear search. */
277 #endif /* LFS_DIRHASH */
279 if (nameiop
!= LOOKUP
|| results
->ulr_diroff
== 0 ||
280 results
->ulr_diroff
>= dp
->i_size
) {
281 entryoffsetinblock
= 0;
282 results
->ulr_offset
= 0;
285 results
->ulr_offset
= results
->ulr_diroff
;
286 if ((entryoffsetinblock
= results
->ulr_offset
& bmask
) &&
287 (error
= ulfs_blkatoff(vdp
, (off_t
)results
->ulr_offset
,
291 namecache_count_2passes();
293 prevoff
= results
->ulr_offset
;
294 endsearch
= roundup(dp
->i_size
, dirblksiz
);
298 while (results
->ulr_offset
< endsearch
) {
299 if (curcpu()->ci_schedstate
.spc_flags
& SPCF_SHOULDYIELD
)
302 * If necessary, get the next directory block.
304 if ((results
->ulr_offset
& bmask
) == 0) {
307 error
= ulfs_blkatoff(vdp
, (off_t
)results
->ulr_offset
,
311 entryoffsetinblock
= 0;
314 * If still looking for a slot, and at a DIRBLKSIZ
315 * boundary, have to start looking for free space again.
317 if (slotstatus
== NONE
&&
318 (entryoffsetinblock
& (dirblksiz
- 1)) == 0) {
323 * Get pointer to next entry.
324 * Full validation checks are slow, so we only check
325 * enough to insure forward progress through the
326 * directory. Complete checks can be run by patching
327 * "lfs_dirchk" to be true.
330 ep
= (LFS_DIRHEADER
*)((char *)bp
->b_data
+ entryoffsetinblock
);
331 if (lfs_dir_getreclen(fs
, ep
) == 0 ||
332 (lfs_dirchk
&& ulfs_dirbadentry(vdp
, ep
, entryoffsetinblock
))) {
335 ulfs_dirbad(dp
, results
->ulr_offset
, "mangled entry");
336 i
= dirblksiz
- (entryoffsetinblock
& (dirblksiz
- 1));
337 results
->ulr_offset
+= i
;
338 entryoffsetinblock
+= i
;
343 * If an appropriate sized slot has not yet been found,
344 * check to see if one is available. Also accumulate space
345 * in the current block so that we can determine if
346 * compaction is viable.
348 if (slotstatus
!= FOUND
) {
349 int size
= lfs_dir_getreclen(fs
, ep
);
351 if (lfs_dir_getino(fs
, ep
) != 0)
352 size
-= LFS_DIRSIZ(fs
, ep
);
354 if (size
>= slotneeded
) {
356 slotoffset
= results
->ulr_offset
;
357 slotsize
= lfs_dir_getreclen(fs
, ep
);
358 } else if (slotstatus
== NONE
) {
359 slotfreespace
+= size
;
360 if (slotoffset
== -1)
361 slotoffset
= results
->ulr_offset
;
362 if (slotfreespace
>= slotneeded
) {
363 slotstatus
= COMPACT
;
364 slotsize
= results
->ulr_offset
+
365 lfs_dir_getreclen(fs
, ep
) -
373 * Check for a name match.
375 if (lfs_dir_getino(fs
, ep
)) {
378 namlen
= lfs_dir_getnamlen(fs
, ep
);
379 if (namlen
== cnp
->cn_namelen
&&
380 !memcmp(cnp
->cn_nameptr
, lfs_dir_nameptr(fs
, ep
),
386 * Save directory entry's inode number and
387 * reclen, and release directory buffer.
389 if (!FSFMT(vdp
) && lfs_dir_gettype(fs
, ep
) == LFS_DT_WHT
) {
391 slotoffset
= results
->ulr_offset
;
392 slotsize
= lfs_dir_getreclen(fs
, ep
);
393 results
->ulr_reclen
= slotsize
;
395 * This is used to set
396 * results->ulr_endoff,
397 * which may be used by ulfs_direnter()
398 * as a length to truncate the
399 * directory to. Therefore, it must
400 * point past the end of the last
401 * non-empty directory entry. We don't
402 * know where that is in this case, so
403 * we effectively disable shrinking by
404 * using the existing size of the
407 * Note that we wouldn't expect to
408 * shrink the directory while rewriting
409 * an existing entry anyway.
411 enduseful
= endsearch
;
412 cnp
->cn_flags
|= ISWHITEOUT
;
416 foundino
= lfs_dir_getino(fs
, ep
);
417 results
->ulr_reclen
= lfs_dir_getreclen(fs
, ep
);
421 prevoff
= results
->ulr_offset
;
422 results
->ulr_offset
+= lfs_dir_getreclen(fs
, ep
);
423 entryoffsetinblock
+= lfs_dir_getreclen(fs
, ep
);
424 if (lfs_dir_getino(fs
, ep
))
425 enduseful
= results
->ulr_offset
;
429 * If we started in the middle of the directory and failed
430 * to find our target, we must check the beginning as well.
432 if (numdirpasses
== 2) {
434 results
->ulr_offset
= 0;
435 endsearch
= results
->ulr_diroff
;
441 * If creating, and at end of pathname and current
442 * directory has not been removed, then can consider
443 * allowing file to be created.
445 if ((nameiop
== CREATE
|| nameiop
== RENAME
||
446 (nameiop
== DELETE
&&
447 (cnp
->cn_flags
& DOWHITEOUT
) &&
448 (cnp
->cn_flags
& ISWHITEOUT
))) &&
449 (flags
& ISLASTCN
) && dp
->i_nlink
!= 0) {
451 * Access for write is interpreted as allowing
452 * creation of files in the directory.
454 error
= VOP_ACCESS(vdp
, VWRITE
, cred
);
458 * Return an indication of where the new directory
459 * entry should be put. If we didn't find a slot,
460 * then set results->ulr_count to 0 indicating
461 * that the new slot belongs at the end of the
462 * directory. If we found a slot, then the new entry
463 * can be put in the range from results->ulr_offset to
464 * results->ulr_offset + results->ulr_count.
466 if (slotstatus
== NONE
) {
467 results
->ulr_offset
= roundup(dp
->i_size
, dirblksiz
);
468 results
->ulr_count
= 0;
469 enduseful
= results
->ulr_offset
;
470 } else if (nameiop
== DELETE
) {
471 results
->ulr_offset
= slotoffset
;
472 if ((results
->ulr_offset
& (dirblksiz
- 1)) == 0)
473 results
->ulr_count
= 0;
476 results
->ulr_offset
- prevoff
;
478 results
->ulr_offset
= slotoffset
;
479 results
->ulr_count
= slotsize
;
480 if (enduseful
< slotoffset
+ slotsize
)
481 enduseful
= slotoffset
+ slotsize
;
483 results
->ulr_endoff
= roundup(enduseful
, dirblksiz
);
484 #if 0 /* commented out by dbj. none of the on disk fields changed */
485 dp
->i_flag
|= IN_CHANGE
| IN_UPDATE
;
488 * We return with the directory locked, so that
489 * the parameters we set up above will still be
490 * valid if we actually decide to do a direnter().
491 * We return ni_vp == NULL to indicate that the entry
492 * does not currently exist; we leave a pointer to
493 * the (locked) directory inode in ndp->ni_dvp.
495 * NB - if the directory is unlocked, then this
496 * information cannot be used.
502 * Insert name into cache (as non-existent) if appropriate.
504 if (nameiop
!= CREATE
) {
505 cache_enter(vdp
, *vpp
, cnp
->cn_nameptr
, cnp
->cn_namelen
,
512 if (numdirpasses
== 2)
513 namecache_count_pass2();
515 * Check that directory length properly reflects presence
518 if (results
->ulr_offset
+ LFS_DIRSIZ(fs
, ep
) > dp
->i_size
) {
519 ulfs_dirbad(dp
, results
->ulr_offset
, "i_size too small");
521 results
->ulr_offset
+ LFS_DIRSIZ(fs
, ep
);
522 DIP_ASSIGN(dp
, size
, dp
->i_size
);
523 dp
->i_flag
|= IN_CHANGE
| IN_UPDATE
;
528 * Found component in pathname.
529 * If the final component of path name, save information
530 * in the cache as to where the entry was found.
532 if ((flags
& ISLASTCN
) && nameiop
== LOOKUP
)
533 results
->ulr_diroff
= results
->ulr_offset
&~ (dirblksiz
- 1);
536 * If deleting, and at end of pathname, return
537 * parameters which can be used to remove file.
538 * Lock the inode, being careful with ".".
540 if (nameiop
== DELETE
&& (flags
& ISLASTCN
)) {
542 * Return pointer to current entry in results->ulr_offset,
543 * and distance past previous entry (if there
544 * is a previous entry in this block) in results->ulr_count.
545 * Save directory inode pointer in ndp->ni_dvp for dirremove().
547 if ((results
->ulr_offset
& (dirblksiz
- 1)) == 0)
548 results
->ulr_count
= 0;
550 results
->ulr_count
= results
->ulr_offset
- prevoff
;
551 if (dp
->i_number
== foundino
) {
555 error
= vcache_get(vdp
->v_mount
,
556 &foundino
, sizeof(foundino
), &tdp
);
561 * Write access to directory required to delete files.
563 error
= VOP_ACCESS(vdp
, VWRITE
, cred
);
569 * If directory is "sticky", then user must own
570 * the directory, or the file in it, else she
571 * may not delete it (unless she's root). This
572 * implements append-only directories.
574 if (dp
->i_mode
& ISVTX
) {
575 error
= kauth_authorize_vnode(cred
, KAUTH_VNODE_DELETE
,
576 tdp
, vdp
, genfs_can_sticky(cred
, dp
->i_uid
,
590 * If rewriting (RENAME), return the inode and the
591 * information required to rewrite the present directory
592 * Must get inode of directory entry to verify it's a
593 * regular file, or empty directory.
595 if (nameiop
== RENAME
&& (flags
& ISLASTCN
)) {
596 error
= VOP_ACCESS(vdp
, VWRITE
, cred
);
600 * Careful about locking second inode.
601 * This can only occur if the target is ".".
603 if (dp
->i_number
== foundino
) {
607 error
= vcache_get(vdp
->v_mount
,
608 &foundino
, sizeof(foundino
), &tdp
);
616 if (dp
->i_number
== foundino
) {
617 vref(vdp
); /* we want ourself, ie "." */
620 error
= vcache_get(vdp
->v_mount
,
621 &foundino
, sizeof(foundino
), &tdp
);
628 * Insert name into cache if appropriate.
630 cache_enter(vdp
, *vpp
, cnp
->cn_nameptr
, cnp
->cn_namelen
, cnp
->cn_flags
);
634 fstrans_done(vdp
->v_mount
);
639 ulfs_dirbad(struct inode
*ip
, doff_t offset
, const char *how
)
643 mp
= ITOV(ip
)->v_mount
;
644 printf("%s: bad dir ino %llu at offset %d: %s\n",
645 mp
->mnt_stat
.f_mntonname
, (unsigned long long)ip
->i_number
,
647 if ((mp
->mnt_flag
& MNT_RDONLY
) == 0)
652 * Do consistency checking on a directory entry:
653 * record length must be multiple of 4
654 * entry must fit in rest of its DIRBLKSIZ block
655 * record must be large enough to contain entry
656 * name is not longer than LFS_MAXNAMLEN
657 * name must be as long as advertised, and null terminated
660 ulfs_dirbadentry(struct vnode
*dp
, LFS_DIRHEADER
*ep
, int entryoffsetinblock
)
665 struct ulfsmount
*ump
= VFSTOULFS(dp
->v_mount
);
666 struct lfs
*fs
= ump
->um_lfs
;
667 int dirblksiz
= fs
->um_dirblksiz
;
670 namlen
= lfs_dir_getnamlen(fs
, ep
);
671 reclen
= lfs_dir_getreclen(fs
, ep
);
672 if ((reclen
& 0x3) != 0 ||
673 reclen
> dirblksiz
- (entryoffsetinblock
& (dirblksiz
- 1)) ||
674 reclen
< LFS_DIRSIZ(fs
, ep
) || namlen
> LFS_MAXNAMLEN
) {
676 printf("First bad, reclen=%#x, DIRSIZ=%lu, namlen=%d, "
677 "flags=%#x, entryoffsetinblock=%d, dirblksiz = %d\n",
678 lfs_dir_getreclen(fs
, ep
),
679 (u_long
)LFS_DIRSIZ(fs
, ep
),
680 namlen
, dp
->v_mount
->mnt_flag
, entryoffsetinblock
,
684 if (lfs_dir_getino(fs
, ep
) == 0)
686 name
= lfs_dir_nameptr(fs
, ep
);
687 for (i
= 0; i
< namlen
; i
++)
688 if (name
[i
] == '\0') {
690 printf("Second bad\n");
701 * Assign the contents of directory entry DIRP, on volume FS.
703 * NAME/NAMLEN is the name, which is not necessarily null terminated.
704 * INUM is the inode number, and DTYPE is the type code (LFS_DT_*).
706 * Note that these values typically come from:
710 * LFS_IFTODT(ip->i_mode)
712 * Does not set d_reclen.
715 ulfs_direntry_assign(struct lfs
*fs
, LFS_DIRHEADER
*dirp
,
716 const char *name
, size_t namlen
,
717 ino_t inum
, unsigned dtype
)
719 lfs_dir_setino(fs
, dirp
, inum
);
720 lfs_dir_setnamlen(fs
, dirp
, namlen
);
721 lfs_dir_settype(fs
, dirp
, dtype
);
722 memcpy(lfs_dir_nameptr(fs
, dirp
), name
, namlen
);
723 lfs_dir_nameptr(fs
, dirp
)[namlen
] = '\0';
727 * Write a directory entry after a call to namei, using the parameters
728 * that ulfs_lookup left in nameidata and in the ulfs_lookup_results.
730 * DVP is the directory to be updated. It must be locked.
731 * ULR is the ulfs_lookup_results structure from the final lookup step.
732 * TVP is not used. (XXX: why is it here? remove it)
733 * CNP is the componentname from the final lookup step.
734 * INUM is the inode number to insert into the new directory entry.
735 * DTYPE is the type code (LFS_DT_*) to insert into the new directory entry.
736 * NEWDIRBP is not used and (XXX) should be removed. The previous
737 * comment here said it was used by the now-removed softupdates code.
739 * The link count of the target inode is *not* incremented; the
742 * If ulr->ulr_count is 0, ulfs_lookup did not find space to insert the
743 * directory entry. ulr_offset, which is the place to put the entry,
744 * should be on a block boundary (and should be at the end of the
745 * directory AFAIK) and a fresh block is allocated to put the new
746 * directory entry in.
748 * If ulr->ulr_count is not zero, ulfs_lookup found a slot to insert
749 * the entry into. This slot ranges from ulr_offset to ulr_offset +
750 * ulr_count. However, this slot may already be partially populated
751 * requiring compaction. See notes below.
753 * Furthermore, if ulr_count is not zero and ulr_endoff is not the
754 * same as i_size, the directory is truncated to size ulr_endoff.
757 ulfs_direnter(struct vnode
*dvp
, const struct ulfs_lookup_results
*ulr
,
759 struct componentname
*cnp
, ino_t inum
, unsigned dtype
,
760 struct buf
*newdirbp
)
767 LFS_DIRHEADER
*ep
, *nep
;
768 int error
, ret
, lfs_blkoff
, loc
, spacefree
;
771 struct ulfsmount
*ump
= VFSTOULFS(dvp
->v_mount
);
772 struct lfs
*fs
= ump
->um_lfs
;
773 int dirblksiz
= fs
->um_dirblksiz
;
775 unsigned namlen
, reclen
;
781 name
= cnp
->cn_nameptr
; /* note: not null-terminated */
782 namlen
= cnp
->cn_namelen
;
786 newentrysize
= LFS_DIRECTSIZ(fs
, namlen
);
788 if (ulr
->ulr_count
== 0) {
790 * If ulr_count is 0, then namei could find no
791 * space in the directory. Here, ulr_offset will
792 * be on a directory block boundary and we will write the
793 * new entry into a fresh block.
795 if (ulr
->ulr_offset
& (dirblksiz
- 1))
796 panic("ulfs_direnter: newblk");
797 if ((error
= lfs_balloc(dvp
, (off_t
)ulr
->ulr_offset
, dirblksiz
,
798 cr
, B_CLRBUF
| B_SYNC
, &bp
)) != 0) {
801 dp
->i_size
= ulr
->ulr_offset
+ dirblksiz
;
802 DIP_ASSIGN(dp
, size
, dp
->i_size
);
803 dp
->i_flag
|= IN_CHANGE
| IN_UPDATE
;
804 uvm_vnp_setsize(dvp
, dp
->i_size
);
805 lfs_blkoff
= ulr
->ulr_offset
& (ump
->um_mountp
->mnt_stat
.f_iosize
- 1);
806 ep
= (LFS_DIRHEADER
*)((char *)bp
->b_data
+ lfs_blkoff
);
807 ulfs_direntry_assign(fs
, ep
, name
, namlen
, inum
, dtype
);
808 lfs_dir_setreclen(fs
, ep
, dirblksiz
);
810 if (dp
->i_dirhash
!= NULL
) {
811 ulfsdirhash_newblk(dp
, ulr
->ulr_offset
);
812 ulfsdirhash_add(dp
, ep
, ulr
->ulr_offset
);
813 ulfsdirhash_checkblock(dp
, (char *)bp
->b_data
+ lfs_blkoff
,
817 error
= VOP_BWRITE(bp
->b_vp
, bp
);
819 ret
= lfs_update(dvp
, &ts
, &ts
, UPDATE_DIROP
);
826 * If ulr_count is non-zero, then namei found space for the new
827 * entry in the range ulr_offset to ulr_offset + ulr_count
828 * in the directory. To use this space, we may have to compact
829 * the entries located there, by copying them together towards the
830 * beginning of the block, leaving the free space in one usable
835 * Increase size of directory if entry eats into new space.
836 * This should never push the size past a new multiple of
839 * N.B. - THIS IS AN ARTIFACT OF 4.2 AND SHOULD NEVER HAPPEN.
841 if (ulr
->ulr_offset
+ ulr
->ulr_count
> dp
->i_size
) {
843 printf("ulfs_direnter: reached 4.2-only block, "
844 "not supposed to happen\n");
846 dp
->i_size
= ulr
->ulr_offset
+ ulr
->ulr_count
;
847 DIP_ASSIGN(dp
, size
, dp
->i_size
);
848 dp
->i_flag
|= IN_CHANGE
| IN_UPDATE
;
851 * Get the block containing the space for the new directory entry.
853 error
= ulfs_blkatoff(dvp
, (off_t
)ulr
->ulr_offset
, &dirbuf
, &bp
, true);
858 * Find space for the new entry. In the simple case, the entry at
859 * offset base will have the space. If it does not, then namei
860 * arranged that compacting the region ulr_offset to
861 * ulr_offset + ulr_count would yield the space.
863 ep
= (LFS_DIRHEADER
*)dirbuf
;
864 dsize
= (lfs_dir_getino(fs
, ep
) != 0) ? LFS_DIRSIZ(fs
, ep
) : 0;
865 spacefree
= lfs_dir_getreclen(fs
, ep
) - dsize
;
866 for (loc
= lfs_dir_getreclen(fs
, ep
); loc
< ulr
->ulr_count
; ) {
867 nep
= (LFS_DIRHEADER
*)(dirbuf
+ loc
);
869 /* Trim the existing slot (NB: dsize may be zero). */
870 lfs_dir_setreclen(fs
, ep
, dsize
);
871 ep
= LFS_NEXTDIR(fs
, ep
);
873 reclen
= lfs_dir_getreclen(fs
, nep
);
875 if (lfs_dir_getino(fs
, nep
) == 0) {
877 * A mid-block unused entry. Such entries are
878 * never created by the kernel, but fsck_ffs
879 * can create them (and it doesn't fix them).
881 * Add up the free space, and initialise the
882 * relocated entry since we don't memcpy it.
885 lfs_dir_setino(fs
, ep
, 0);
889 dsize
= LFS_DIRSIZ(fs
, nep
);
890 spacefree
+= reclen
- dsize
;
892 if (dp
->i_dirhash
!= NULL
)
893 ulfsdirhash_move(dp
, nep
,
894 ulr
->ulr_offset
+ ((char *)nep
- dirbuf
),
895 ulr
->ulr_offset
+ ((char *)ep
- dirbuf
));
897 memcpy((void *)ep
, (void *)nep
, dsize
);
900 * Here, `ep' points to a directory entry containing `dsize' in-use
901 * bytes followed by `spacefree' unused bytes. If ep->d_ino == 0,
902 * then the entry is completely unused (dsize == 0). The value
903 * of ep->d_reclen is always indeterminate.
905 * Update the pointer fields in the previous entry (if any),
906 * copy in the new entry, and write out the block.
908 if (lfs_dir_getino(fs
, ep
) == 0 ||
909 (lfs_dir_getino(fs
, ep
) == ULFS_WINO
&&
910 memcmp(lfs_dir_nameptr(fs
, ep
), name
, namlen
) == 0)) {
911 if (spacefree
+ dsize
< newentrysize
)
912 panic("ulfs_direnter: compact1");
913 reclen
= spacefree
+ dsize
;
915 dohashadd
= (lfs_dir_getino(fs
, ep
) == 0);
918 if (spacefree
< newentrysize
)
919 panic("ulfs_direnter: compact2");
921 lfs_dir_setreclen(fs
, ep
, dsize
);
922 ep
= LFS_NEXTDIR(fs
, ep
);
928 ulfs_direntry_assign(fs
, ep
, name
, namlen
, inum
, dtype
);
929 lfs_dir_setreclen(fs
, ep
, reclen
);
931 if (dp
->i_dirhash
!= NULL
&& dohashadd
)
932 ulfsdirhash_add(dp
, ep
, ulr
->ulr_offset
+ ((char *)ep
- dirbuf
));
933 if (dp
->i_dirhash
!= NULL
)
934 ulfsdirhash_checkblock(dp
, dirbuf
-
935 (ulr
->ulr_offset
& (dirblksiz
- 1)),
936 ulr
->ulr_offset
& ~(dirblksiz
- 1));
938 error
= VOP_BWRITE(bp
->b_vp
, bp
);
939 dp
->i_flag
|= IN_CHANGE
| IN_UPDATE
;
941 * If all went well, and the directory can be shortened, proceed
942 * with the truncation. Note that we have to unlock the inode for
943 * the entry that we just entered, as the truncation may need to
944 * lock other inodes which can lead to deadlock if we also hold a
945 * lock on the newly entered node.
947 if (error
== 0 && ulr
->ulr_endoff
&& ulr
->ulr_endoff
< dp
->i_size
) {
949 if (dp
->i_dirhash
!= NULL
)
950 ulfsdirhash_dirtrunc(dp
, ulr
->ulr_endoff
);
952 (void) lfs_truncate(dvp
, (off_t
)ulr
->ulr_endoff
, IO_SYNC
, cr
);
958 * Remove a directory entry after a call to namei, using the
959 * parameters that ulfs_lookup left in nameidata and in the
960 * ulfs_lookup_results.
962 * DVP is the directory to be updated. It must be locked.
963 * ULR is the ulfs_lookup_results structure from the final lookup step.
964 * IP, if not null, is the inode being unlinked.
965 * FLAGS may contain DOWHITEOUT.
966 * ISRMDIR is not used and (XXX) should be removed.
968 * If FLAGS contains DOWHITEOUT the entry is replaced with a whiteout
969 * instead of being cleared.
971 * ulr->ulr_offset contains the position of the directory entry
974 * ulr->ulr_reclen contains the size of the directory entry to be
977 * ulr->ulr_count contains the size of the *previous* directory
978 * entry. This allows finding it, for free space management. If
979 * ulr_count is 0, the target entry is at the beginning of the
980 * directory. (Does this ever happen? The first entry should be ".",
981 * which should only be removed at rmdir time. Does rmdir come here
982 * to clear out the "." and ".." entries? Perhaps, but I doubt it.)
984 * The space is marked free by adding it to the record length (not
985 * name length) of the preceding entry. If the first entry becomes
986 * free, it is marked free by setting the inode number to 0.
988 * The link count of IP is decremented. Note that this is not the
989 * inverse behavior of ulfs_direnter, which does not adjust link
993 ulfs_dirremove(struct vnode
*dvp
, const struct ulfs_lookup_results
*ulr
,
994 struct inode
*ip
, int flags
, int isrmdir
)
996 struct inode
*dp
= VTOI(dvp
);
997 struct lfs
*fs
= dp
->i_lfs
;
1002 if (flags
& DOWHITEOUT
) {
1004 * Whiteout entry: set d_ino to ULFS_WINO.
1006 error
= ulfs_blkatoff(dvp
, (off_t
)ulr
->ulr_offset
, (void *)&ep
,
1010 lfs_dir_setino(fs
, ep
, ULFS_WINO
);
1011 lfs_dir_settype(fs
, ep
, LFS_DT_WHT
);
1015 if ((error
= ulfs_blkatoff(dvp
,
1016 (off_t
)(ulr
->ulr_offset
- ulr
->ulr_count
), (void *)&ep
, &bp
, true)) != 0)
1021 * Remove the dirhash entry. This is complicated by the fact
1022 * that `ep' is the previous entry when ulr_count != 0.
1024 if (dp
->i_dirhash
!= NULL
)
1025 ulfsdirhash_remove(dp
, (ulr
->ulr_count
== 0) ? ep
:
1026 LFS_NEXTDIR(fs
, ep
), ulr
->ulr_offset
);
1029 if (ulr
->ulr_count
== 0) {
1031 * First entry in block: set d_ino to zero.
1033 lfs_dir_setino(fs
, ep
, 0);
1036 * Collapse new free space into previous entry.
1038 lfs_dir_setreclen(fs
, ep
,
1039 lfs_dir_getreclen(fs
, ep
) + ulr
->ulr_reclen
);
1043 if (dp
->i_dirhash
!= NULL
) {
1044 int dirblksiz
= ip
->i_lfs
->um_dirblksiz
;
1045 ulfsdirhash_checkblock(dp
, (char *)ep
-
1046 ((ulr
->ulr_offset
- ulr
->ulr_count
) & (dirblksiz
- 1)),
1047 ulr
->ulr_offset
& ~(dirblksiz
- 1));
1054 DIP_ASSIGN(ip
, nlink
, ip
->i_nlink
);
1055 ip
->i_flag
|= IN_CHANGE
;
1058 * XXX did it ever occur to anyone that it might be a good
1059 * idea to restore ip->i_nlink if this fails? Or something?
1060 * Currently on error return from this function the state of
1061 * ip->i_nlink depends on what happened, and callers
1062 * definitely do not take this into account.
1064 error
= VOP_BWRITE(bp
->b_vp
, bp
);
1065 dp
->i_flag
|= IN_CHANGE
| IN_UPDATE
;
1067 * If the last named reference to a snapshot goes away,
1068 * drop its snapshot reference so that it will be reclaimed
1069 * when last open reference goes away.
1071 if (ip
!= 0 && (ip
->i_flags
& SF_SNAPSHOT
) != 0 &&
1078 * Rewrite an existing directory entry to point at the inode supplied.
1080 * DP is the directory to update.
1081 * OFFSET is the position of the entry in question. It may come
1082 * from ulr_offset of a ulfs_lookup_results.
1083 * OIP is the old inode the directory previously pointed to.
1084 * NEWINUM is the number of the new inode.
1085 * NEWTYPE is the new value for the type field of the directory entry.
1086 * (This is ignored if the fs doesn't support that.)
1087 * ISRMDIR is not used and (XXX) should be removed.
1088 * IFLAGS are added to DP's inode flags.
1090 * The link count of OIP is decremented. Note that the link count of
1091 * the new inode is *not* incremented. Yay for symmetry.
1094 ulfs_dirrewrite(struct inode
*dp
, off_t offset
,
1095 struct inode
*oip
, ino_t newinum
, int newtype
,
1096 int isrmdir
, int iflags
)
1098 struct lfs
*fs
= dp
->i_lfs
;
1101 struct vnode
*vdp
= ITOV(dp
);
1104 error
= ulfs_blkatoff(vdp
, offset
, (void *)&ep
, &bp
, true);
1107 lfs_dir_setino(fs
, ep
, newinum
);
1108 lfs_dir_settype(fs
, ep
, newtype
);
1110 DIP_ASSIGN(oip
, nlink
, oip
->i_nlink
);
1111 oip
->i_flag
|= IN_CHANGE
;
1112 error
= VOP_BWRITE(bp
->b_vp
, bp
);
1113 dp
->i_flag
|= iflags
;
1115 * If the last named reference to a snapshot goes away,
1116 * drop its snapshot reference so that it will be reclaimed
1117 * when last open reference goes away.
1119 if ((oip
->i_flags
& SF_SNAPSHOT
) != 0 && oip
->i_nlink
== 0)
1125 * Check if a directory is empty or not.
1126 * Inode supplied must be locked.
1128 * Using a struct lfs_dirtemplate here is not precisely
1129 * what we want, but better than using a struct lfs_direct.
1131 * NB: does not handle corrupted directories.
1134 ulfs_dirempty(struct inode
*ip
, ino_t parentino
, kauth_cred_t cred
)
1136 struct lfs
*fs
= ip
->i_lfs
;
1138 union lfs_dirtemplate dbuf
;
1139 LFS_DIRHEADER
*dp
= (LFS_DIRHEADER
*)&dbuf
;
1143 /* XXX this should probably use LFS_DIRECTSIZ(fs, 2) */
1144 #define MINDIRSIZ (sizeof (struct lfs_dirtemplate64) / 2)
1146 for (off
= 0; off
< ip
->i_size
; off
+= lfs_dir_getreclen(fs
, dp
)) {
1147 error
= ulfs_bufio(UIO_READ
, ITOV(ip
), (void *)dp
, MINDIRSIZ
,
1148 off
, IO_NODELOCKED
, cred
, &count
, NULL
);
1150 * Since we read MINDIRSIZ, residual must
1151 * be 0 unless we're at end of file.
1153 if (error
|| count
!= 0)
1155 /* avoid infinite loops */
1156 if (lfs_dir_getreclen(fs
, dp
) == 0)
1158 /* skip empty entries */
1159 if (lfs_dir_getino(fs
, dp
) == 0 ||
1160 lfs_dir_getino(fs
, dp
) == ULFS_WINO
)
1162 /* accept only "." and ".." */
1163 namlen
= lfs_dir_getnamlen(fs
, dp
);
1164 name
= lfs_dir_nameptr(fs
, dp
);
1170 * At this point namlen must be 1 or 2.
1171 * 1 implies ".", 2 implies ".." if second
1174 if (namlen
== 1 && lfs_dir_getino(fs
, dp
) == ip
->i_number
)
1176 if (name
[1] == '.' && lfs_dir_getino(fs
, dp
) == parentino
)
1183 #define ULFS_DIRRABLKS 0
1184 int ulfs_dirrablks
= ULFS_DIRRABLKS
;
1187 * ulfs_blkatoff: Return buffer with the contents of block "offset" from
1188 * the beginning of directory "vp". If "res" is non-NULL, fill it in with
1189 * a pointer to the remaining space in the directory. If the caller intends
1190 * to modify the buffer returned, "modify" must be true.
1194 ulfs_blkatoff(struct vnode
*vp
, off_t offset
, char **res
, struct buf
**bpp
,
1197 struct inode
*ip __diagused
;
1200 const int dirrablks
= ulfs_dirrablks
;
1204 struct mount
*mp
= vp
->v_mount
;
1205 const int bshift
= mp
->mnt_fs_bshift
;
1206 const int bsize
= 1 << bshift
;
1209 blks
= kmem_alloc((1 + dirrablks
) * sizeof(daddr_t
), KM_SLEEP
);
1210 blksizes
= kmem_alloc((1 + dirrablks
) * sizeof(int), KM_SLEEP
);
1212 KASSERT(vp
->v_size
== ip
->i_size
);
1213 GOP_SIZE(vp
, vp
->v_size
, &eof
, 0);
1214 lbn
= offset
>> bshift
;
1216 for (run
= 0; run
<= dirrablks
;) {
1217 const off_t curoff
= lbn
<< bshift
;
1218 const int size
= MIN(eof
- curoff
, bsize
);
1223 KASSERT(curoff
< eof
);
1225 blksizes
[run
] = size
;
1228 if (size
!= bsize
) {
1233 error
= breadn(vp
, blks
[0], blksizes
[0], &blks
[1], &blksizes
[1],
1234 run
- 1, (modify
? B_MODIFY
: 0), &bp
);
1240 *res
= (char *)bp
->b_data
+ (offset
& (bsize
- 1));
1245 kmem_free(blks
, (1 + dirrablks
) * sizeof(daddr_t
));
1246 kmem_free(blksizes
, (1 + dirrablks
) * sizeof(int));