sys/ufs/lfs/ulfs_lookup.c

   1 /*      $NetBSD: ulfs_lookup.c,v 1.34 2015/09/21 01:24:23 dholland Exp $        */
   2 /*  from NetBSD: ufs_lookup.c,v 1.122 2013/01/22 09:39:18 dholland Exp  */
   3
   4 /*
   5  * Copyright (c) 1989, 1993
   6  *      The Regents of the University of California.  All rights reserved.
   7  * (c) UNIX System Laboratories, Inc.
   8  * All or some portions of this file are derived from material licensed
   9  * to the University of California by American Telephone and Telegraph
  10  * Co. or Unix System Laboratories, Inc. and are reproduced herein with
  11  * the permission of UNIX System Laboratories, Inc.
  12  *
  13  * Redistribution and use in source and binary forms, with or without
  14  * modification, are permitted provided that the following conditions
  15  * are met:
  16  * 1. Redistributions of source code must retain the above copyright
  17  *    notice, this list of conditions and the following disclaimer.
  18  * 2. Redistributions in binary form must reproduce the above copyright
  19  *    notice, this list of conditions and the following disclaimer in the
  20  *    documentation and/or other materials provided with the distribution.
  21  * 3. Neither the name of the University nor the names of its contributors
  22  *    may be used to endorse or promote products derived from this software
  23  *    without specific prior written permission.
  24  *
  25  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  26  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  27  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  28  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  29  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  30  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  31  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  32  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  33  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  34  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  35  * SUCH DAMAGE.
  36  *
  37  *      @(#)ufs_lookup.c        8.9 (Berkeley) 8/11/94
  38  */
  39
  40 #include <sys/cdefs.h>
  41 __KERNEL_RCSID(0, "$NetBSD: ulfs_lookup.c,v 1.34 2015/09/21 01:24:23 dholland Exp $");
  42
  43 #ifdef _KERNEL_OPT
  44 #include "opt_lfs.h"
  45 #endif
  46
  47 #include <sys/param.h>
  48 #include <sys/systm.h>
  49 #include <sys/namei.h>
  50 #include <sys/buf.h>
  51 #include <sys/file.h>
  52 #include <sys/stat.h>
  53 #include <sys/mount.h>
  54 #include <sys/vnode.h>
  55 #include <sys/kernel.h>
  56 #include <sys/kauth.h>
  57 #include <sys/wapbl.h>
  58 #include <sys/fstrans.h>
  59 #include <sys/proc.h>
  60 #include <sys/kmem.h>
  61
  62 #include <ufs/lfs/lfs.h>
  63 #include <ufs/lfs/lfs_accessors.h>
  64 #include <ufs/lfs/lfs_extern.h>
  65
  66 #include <ufs/lfs/ulfs_inode.h>
  67 #ifdef LFS_DIRHASH
  68 #include <ufs/lfs/ulfs_dirhash.h>
  69 #endif
  70 #include <ufs/lfs/ulfsmount.h>
  71 #include <ufs/lfs/ulfs_extern.h>
  72 #include <ufs/lfs/ulfs_bswap.h>
  73
  74 #include <miscfs/genfs/genfs.h>
  75
  76 #ifdef DIAGNOSTIC
  77 int     lfs_dirchk = 1;
  78 #else
  79 int     lfs_dirchk = 0;
  80 #endif
  81
  82 /*
  83  * Convert a component of a pathname into a pointer to a locked inode.
  84  * This is a very central and rather complicated routine.
  85  * If the file system is not maintained in a strict tree hierarchy,
  86  * this can result in a deadlock situation (see comments in code below).
  87  *
  88  * The cnp->cn_nameiop argument is LOOKUP, CREATE, RENAME, or DELETE depending
  89  * on whether the name is to be looked up, created, renamed, or deleted.
  90  * When CREATE, RENAME, or DELETE is specified, information usable in
  91  * creating, renaming, or deleting a directory entry may be calculated.
  92  * If flag has LOCKPARENT or'ed into it and the target of the pathname
  93  * exists, lookup returns both the target and its parent directory locked.
  94  * When creating or renaming and LOCKPARENT is specified, the target may
  95  * not be ".".  When deleting and LOCKPARENT is specified, the target may
  96  * be "."., but the caller must check to ensure it does an vrele and vput
  97  * instead of two vputs.
  98  *
  99  * Overall outline of ulfs_lookup:
 100  *
 101  *      check accessibility of directory
 102  *      look for name in cache, if found, then if at end of path
 103  *        and deleting or creating, drop it, else return name
 104  *      search for name in directory, to found or notfound
 105  * notfound:
 106  *      if creating, return locked directory, leaving info on available slots
 107  *      else return error
 108  * found:
 109  *      if at end of path and deleting, return information to allow delete
 110  *      if at end of path and rewriting (RENAME and LOCKPARENT), lock target
 111  *        inode and return info to allow rewrite
 112  *      if not at end, add name to cache; if at end and neither creating
 113  *        nor deleting, add name to cache
 114  */
 115 int
 116 ulfs_lookup(void *v)
 117 {
 118         struct vop_lookup_v2_args /* {
 119                 struct vnode *a_dvp;
 120                 struct vnode **a_vpp;
 121                 struct componentname *a_cnp;
 122         } */ *ap = v;
 123         struct vnode *vdp = ap->a_dvp;  /* vnode for directory being searched */
 124         struct inode *dp = VTOI(vdp);   /* inode for directory being searched */
 125         struct buf *bp;                 /* a buffer of directory entries */
 126         LFS_DIRHEADER *ep;              /* the current directory entry */
 127         int entryoffsetinblock;         /* offset of ep in bp's buffer */
 128         enum {
 129                 NONE,           /* need to search a slot for our new entry */
 130                 COMPACT,        /* a compaction can make a slot in the current
 131                                    DIRBLKSIZ block */
 132                 FOUND,          /* found a slot (or no need to search) */
 133         } slotstatus;
 134         doff_t slotoffset;              /* offset of area with free space.
 135                                            a special value -1 for invalid */
 136         int slotsize;                   /* size of area at slotoffset */
 137         int slotfreespace;              /* accumulated amount of space free in
 138                                            the current DIRBLKSIZ block */
 139         int slotneeded;                 /* size of the entry we're seeking */
 140         int numdirpasses;               /* strategy for directory search */
 141         doff_t endsearch;               /* offset to end directory search */
 142         doff_t prevoff;                 /* previous value of ulr_offset */
 143         struct vnode *tdp;              /* returned by vcache_get */
 144         doff_t enduseful;               /* pointer past last used dir slot.
 145                                            used for directory truncation. */
 146         u_long bmask;                   /* block offset mask */
 147         int error;
 148         struct vnode **vpp = ap->a_vpp;
 149         struct componentname *cnp = ap->a_cnp;
 150         kauth_cred_t cred = cnp->cn_cred;
 151         int flags;
 152         int nameiop = cnp->cn_nameiop;
 153         struct lfs *fs = dp->i_lfs;
 154         int dirblksiz = fs->um_dirblksiz;
 155         ino_t foundino;
 156         struct ulfs_lookup_results *results;
 157         int iswhiteout;                 /* temp result from cache_lookup() */
 158
 159         flags = cnp->cn_flags;
 160
 161         bp = NULL;
 162         slotoffset = -1;
 163         *vpp = NULL;
 164         endsearch = 0; /* silence compiler warning */
 165
 166         /*
 167          * Produce the auxiliary lookup results into i_crap. Increment
 168          * its serial number so elsewhere we can tell if we're using
 169          * stale results. This should not be done this way. XXX.
 170          */
 171         results = &dp->i_crap;
 172         dp->i_crapcounter++;
 173
 174         /*
 175          * Check accessiblity of directory.
 176          */
 177         if ((error = VOP_ACCESS(vdp, VEXEC, cred)) != 0)
 178                 return (error);
 179
 180         if ((flags & ISLASTCN) && (vdp->v_mount->mnt_flag & MNT_RDONLY) &&
 181             (nameiop == DELETE || nameiop == RENAME))
 182                 return (EROFS);
 183
 184         /*
 185          * We now have a segment name to search for, and a directory to search.
 186          *
 187          * Before tediously performing a linear scan of the directory,
 188          * check the name cache to see if the directory/name pair
 189          * we are looking for is known already.
 190          */
 191         if (cache_lookup(vdp, cnp->cn_nameptr, cnp->cn_namelen,
 192                          cnp->cn_nameiop, cnp->cn_flags, &iswhiteout, vpp)) {
 193                 if (iswhiteout) {
 194                         cnp->cn_flags |= ISWHITEOUT;
 195                 }
 196                 return *vpp == NULLVP ? ENOENT : 0;
 197         }
 198         if (iswhiteout) {
 199                 /*
 200                  * The namecache set iswhiteout without finding a
 201                  * cache entry. As of this writing (20121014), this
 202                  * can happen if there was a whiteout entry that has
 203                  * been invalidated by the lookup. It is not clear if
 204                  * it is correct to set ISWHITEOUT in this case or
 205                  * not; however, doing so retains the prior behavior,
 206                  * so we'll go with that until some clearer answer
 207                  * appears. XXX
 208                  */
 209                 cnp->cn_flags |= ISWHITEOUT;
 210         }
 211
 212         fstrans_start(vdp->v_mount, FSTRANS_SHARED);
 213
 214         /*
 215          * Suppress search for slots unless creating
 216          * file and at end of pathname, in which case
 217          * we watch for a place to put the new file in
 218          * case it doesn't already exist.
 219          */
 220         slotstatus = FOUND;
 221         slotfreespace = slotsize = slotneeded = 0;
 222         if ((nameiop == CREATE || nameiop == RENAME) && (flags & ISLASTCN)) {
 223                 slotstatus = NONE;
 224                 slotneeded = LFS_DIRECTSIZ(fs, cnp->cn_namelen);
 225         }
 226
 227         /*
 228          * If there is cached information on a previous search of
 229          * this directory, pick up where we last left off.
 230          * We cache only lookups as these are the most common
 231          * and have the greatest payoff. Caching CREATE has little
 232          * benefit as it usually must search the entire directory
 233          * to determine that the entry does not exist. Caching the
 234          * location of the last DELETE or RENAME has not reduced
 235          * profiling time and hence has been removed in the interest
 236          * of simplicity.
 237          */
 238         bmask = vdp->v_mount->mnt_stat.f_iosize - 1;
 239
 240 #ifdef LFS_DIRHASH
 241         /*
 242          * Use dirhash for fast operations on large directories. The logic
 243          * to determine whether to hash the directory is contained within
 244          * ulfsdirhash_build(); a zero return means that it decided to hash
 245          * this directory and it successfully built up the hash table.
 246          */
 247         if (ulfsdirhash_build(dp) == 0) {
 248                 /* Look for a free slot if needed. */
 249                 enduseful = dp->i_size;
 250                 if (slotstatus != FOUND) {
 251                         slotoffset = ulfsdirhash_findfree(dp, slotneeded,
 252                             &slotsize);
 253                         if (slotoffset >= 0) {
 254                                 slotstatus = COMPACT;
 255                                 enduseful = ulfsdirhash_enduseful(dp);
 256                                 if (enduseful < 0)
 257                                         enduseful = dp->i_size;
 258                         }
 259                 }
 260                 /* Look up the component. */
 261                 numdirpasses = 1;
 262                 entryoffsetinblock = 0; /* silence compiler warning */
 263                 switch (ulfsdirhash_lookup(dp, cnp->cn_nameptr, cnp->cn_namelen,
 264                     &results->ulr_offset, &bp, nameiop == DELETE ? &prevoff : NULL)) {
 265                 case 0:
 266                         ep = (LFS_DIRHEADER *)((char *)bp->b_data +
 267                             (results->ulr_offset & bmask));
 268                         goto foundentry;
 269                 case ENOENT:
 270                         results->ulr_offset = roundup(dp->i_size, dirblksiz);
 271                         goto notfound;
 272                 default:
 273                         /* Something failed; just do a linear search. */
 274                         break;
 275                 }
 276         }
 277 #endif /* LFS_DIRHASH */
 278
 279         if (nameiop != LOOKUP || results->ulr_diroff == 0 ||
 280             results->ulr_diroff >= dp->i_size) {
 281                 entryoffsetinblock = 0;
 282                 results->ulr_offset = 0;
 283                 numdirpasses = 1;
 284         } else {
 285                 results->ulr_offset = results->ulr_diroff;
 286                 if ((entryoffsetinblock = results->ulr_offset & bmask) &&
 287                     (error = ulfs_blkatoff(vdp, (off_t)results->ulr_offset,
 288                     NULL, &bp, false)))
 289                         goto out;
 290                 numdirpasses = 2;
 291                 namecache_count_2passes();
 292         }
 293         prevoff = results->ulr_offset;
 294         endsearch = roundup(dp->i_size, dirblksiz);
 295         enduseful = 0;
 296
 297 searchloop:
 298         while (results->ulr_offset < endsearch) {
 299                 if (curcpu()->ci_schedstate.spc_flags & SPCF_SHOULDYIELD)
 300                         preempt();
 301                 /*
 302                  * If necessary, get the next directory block.
 303                  */
 304                 if ((results->ulr_offset & bmask) == 0) {
 305                         if (bp != NULL)
 306                                 brelse(bp, 0);
 307                         error = ulfs_blkatoff(vdp, (off_t)results->ulr_offset,
 308                             NULL, &bp, false);
 309                         if (error)
 310                                 goto out;
 311                         entryoffsetinblock = 0;
 312                 }
 313                 /*
 314                  * If still looking for a slot, and at a DIRBLKSIZ
 315                  * boundary, have to start looking for free space again.
 316                  */
 317                 if (slotstatus == NONE &&
 318                     (entryoffsetinblock & (dirblksiz - 1)) == 0) {
 319                         slotoffset = -1;
 320                         slotfreespace = 0;
 321                 }
 322                 /*
 323                  * Get pointer to next entry.
 324                  * Full validation checks are slow, so we only check
 325                  * enough to insure forward progress through the
 326                  * directory. Complete checks can be run by patching
 327                  * "lfs_dirchk" to be true.
 328                  */
 329                 KASSERT(bp != NULL);
 330                 ep = (LFS_DIRHEADER *)((char *)bp->b_data + entryoffsetinblock);
 331                 if (lfs_dir_getreclen(fs, ep) == 0 ||
 332                     (lfs_dirchk && ulfs_dirbadentry(vdp, ep, entryoffsetinblock))) {
 333                         int i;
 334
 335                         ulfs_dirbad(dp, results->ulr_offset, "mangled entry");
 336                         i = dirblksiz - (entryoffsetinblock & (dirblksiz - 1));
 337                         results->ulr_offset += i;
 338                         entryoffsetinblock += i;
 339                         continue;
 340                 }
 341
 342                 /*
 343                  * If an appropriate sized slot has not yet been found,
 344                  * check to see if one is available. Also accumulate space
 345                  * in the current block so that we can determine if
 346                  * compaction is viable.
 347                  */
 348                 if (slotstatus != FOUND) {
 349                         int size = lfs_dir_getreclen(fs, ep);
 350
 351                         if (lfs_dir_getino(fs, ep) != 0)
 352                                 size -= LFS_DIRSIZ(fs, ep);
 353                         if (size > 0) {
 354                                 if (size >= slotneeded) {
 355                                         slotstatus = FOUND;
 356                                         slotoffset = results->ulr_offset;
 357                                         slotsize = lfs_dir_getreclen(fs, ep);
 358                                 } else if (slotstatus == NONE) {
 359                                         slotfreespace += size;
 360                                         if (slotoffset == -1)
 361                                                 slotoffset = results->ulr_offset;
 362                                         if (slotfreespace >= slotneeded) {
 363                                                 slotstatus = COMPACT;
 364                                                 slotsize = results->ulr_offset +
 365                                                     lfs_dir_getreclen(fs, ep) -
 366                                                     slotoffset;
 367                                         }
 368                                 }
 369                         }
 370                 }
 371
 372                 /*
 373                  * Check for a name match.
 374                  */
 375                 if (lfs_dir_getino(fs, ep)) {
 376                         int namlen;
 377
 378                         namlen = lfs_dir_getnamlen(fs, ep);
 379                         if (namlen == cnp->cn_namelen &&
 380                             !memcmp(cnp->cn_nameptr, lfs_dir_nameptr(fs, ep),
 381                             (unsigned)namlen)) {
 382 #ifdef LFS_DIRHASH
 383 foundentry:
 384 #endif
 385                                 /*
 386                                  * Save directory entry's inode number and
 387                                  * reclen, and release directory buffer.
 388                                  */
 389                                 if (!FSFMT(vdp) && lfs_dir_gettype(fs, ep) == LFS_DT_WHT) {
 390                                         slotstatus = FOUND;
 391                                         slotoffset = results->ulr_offset;
 392                                         slotsize = lfs_dir_getreclen(fs, ep);
 393                                         results->ulr_reclen = slotsize;
 394                                         /*
 395                                          * This is used to set
 396                                          * results->ulr_endoff,
 397                                          * which may be used by ulfs_direnter()
 398                                          * as a length to truncate the
 399                                          * directory to.  Therefore, it must
 400                                          * point past the end of the last
 401                                          * non-empty directory entry.  We don't
 402                                          * know where that is in this case, so
 403                                          * we effectively disable shrinking by
 404                                          * using the existing size of the
 405                                          * directory.
 406                                          *
 407                                          * Note that we wouldn't expect to
 408                                          * shrink the directory while rewriting
 409                                          * an existing entry anyway.
 410                                          */
 411                                         enduseful = endsearch;
 412                                         cnp->cn_flags |= ISWHITEOUT;
 413                                         numdirpasses--;
 414                                         goto notfound;
 415                                 }
 416                                 foundino = lfs_dir_getino(fs, ep);
 417                                 results->ulr_reclen = lfs_dir_getreclen(fs, ep);
 418                                 goto found;
 419                         }
 420                 }
 421                 prevoff = results->ulr_offset;
 422                 results->ulr_offset += lfs_dir_getreclen(fs, ep);
 423                 entryoffsetinblock += lfs_dir_getreclen(fs, ep);
 424                 if (lfs_dir_getino(fs, ep))
 425                         enduseful = results->ulr_offset;
 426         }
 427 notfound:
 428         /*
 429          * If we started in the middle of the directory and failed
 430          * to find our target, we must check the beginning as well.
 431          */
 432         if (numdirpasses == 2) {
 433                 numdirpasses--;
 434                 results->ulr_offset = 0;
 435                 endsearch = results->ulr_diroff;
 436                 goto searchloop;
 437         }
 438         if (bp != NULL)
 439                 brelse(bp, 0);
 440         /*
 441          * If creating, and at end of pathname and current
 442          * directory has not been removed, then can consider
 443          * allowing file to be created.
 444          */
 445         if ((nameiop == CREATE || nameiop == RENAME ||
 446              (nameiop == DELETE &&
 447               (cnp->cn_flags & DOWHITEOUT) &&
 448               (cnp->cn_flags & ISWHITEOUT))) &&
 449             (flags & ISLASTCN) && dp->i_nlink != 0) {
 450                 /*
 451                  * Access for write is interpreted as allowing
 452                  * creation of files in the directory.
 453                  */
 454                 error = VOP_ACCESS(vdp, VWRITE, cred);
 455                 if (error)
 456                         goto out;
 457                 /*
 458                  * Return an indication of where the new directory
 459                  * entry should be put.  If we didn't find a slot,
 460                  * then set results->ulr_count to 0 indicating
 461                  * that the new slot belongs at the end of the
 462                  * directory. If we found a slot, then the new entry
 463                  * can be put in the range from results->ulr_offset to
 464                  * results->ulr_offset + results->ulr_count.
 465                  */
 466                 if (slotstatus == NONE) {
 467                         results->ulr_offset = roundup(dp->i_size, dirblksiz);
 468                         results->ulr_count = 0;
 469                         enduseful = results->ulr_offset;
 470                 } else if (nameiop == DELETE) {
 471                         results->ulr_offset = slotoffset;
 472                         if ((results->ulr_offset & (dirblksiz - 1)) == 0)
 473                                 results->ulr_count = 0;
 474                         else
 475                                 results->ulr_count =
 476                                     results->ulr_offset - prevoff;
 477                 } else {
 478                         results->ulr_offset = slotoffset;
 479                         results->ulr_count = slotsize;
 480                         if (enduseful < slotoffset + slotsize)
 481                                 enduseful = slotoffset + slotsize;
 482                 }
 483                 results->ulr_endoff = roundup(enduseful, dirblksiz);
 484 #if 0 /* commented out by dbj. none of the on disk fields changed */
 485                 dp->i_flag |= IN_CHANGE | IN_UPDATE;
 486 #endif
 487                 /*
 488                  * We return with the directory locked, so that
 489                  * the parameters we set up above will still be
 490                  * valid if we actually decide to do a direnter().
 491                  * We return ni_vp == NULL to indicate that the entry
 492                  * does not currently exist; we leave a pointer to
 493                  * the (locked) directory inode in ndp->ni_dvp.
 494                  *
 495                  * NB - if the directory is unlocked, then this
 496                  * information cannot be used.
 497                  */
 498                 error = EJUSTRETURN;
 499                 goto out;
 500         }
 501         /*
 502          * Insert name into cache (as non-existent) if appropriate.
 503          */
 504         if (nameiop != CREATE) {
 505                 cache_enter(vdp, *vpp, cnp->cn_nameptr, cnp->cn_namelen,
 506                             cnp->cn_flags);
 507         }
 508         error = ENOENT;
 509         goto out;
 510
 511 found:
 512         if (numdirpasses == 2)
 513                 namecache_count_pass2();
 514         /*
 515          * Check that directory length properly reflects presence
 516          * of this entry.
 517          */
 518         if (results->ulr_offset + LFS_DIRSIZ(fs, ep) > dp->i_size) {
 519                 ulfs_dirbad(dp, results->ulr_offset, "i_size too small");
 520                 dp->i_size =
 521                     results->ulr_offset + LFS_DIRSIZ(fs, ep);
 522                 DIP_ASSIGN(dp, size, dp->i_size);
 523                 dp->i_flag |= IN_CHANGE | IN_UPDATE;
 524         }
 525         brelse(bp, 0);
 526
 527         /*
 528          * Found component in pathname.
 529          * If the final component of path name, save information
 530          * in the cache as to where the entry was found.
 531          */
 532         if ((flags & ISLASTCN) && nameiop == LOOKUP)
 533                 results->ulr_diroff = results->ulr_offset &~ (dirblksiz - 1);
 534
 535         /*
 536          * If deleting, and at end of pathname, return
 537          * parameters which can be used to remove file.
 538          * Lock the inode, being careful with ".".
 539          */
 540         if (nameiop == DELETE && (flags & ISLASTCN)) {
 541                 /*
 542                  * Return pointer to current entry in results->ulr_offset,
 543                  * and distance past previous entry (if there
 544                  * is a previous entry in this block) in results->ulr_count.
 545                  * Save directory inode pointer in ndp->ni_dvp for dirremove().
 546                  */
 547                 if ((results->ulr_offset & (dirblksiz - 1)) == 0)
 548                         results->ulr_count = 0;
 549                 else
 550                         results->ulr_count = results->ulr_offset - prevoff;
 551                 if (dp->i_number == foundino) {
 552                         vref(vdp);
 553                         tdp = vdp;
 554                 } else {
 555                         error = vcache_get(vdp->v_mount,
 556                             &foundino, sizeof(foundino), &tdp);
 557                         if (error)
 558                                 goto out;
 559                 }
 560                 /*
 561                  * Write access to directory required to delete files.
 562                  */
 563                 error = VOP_ACCESS(vdp, VWRITE, cred);
 564                 if (error) {
 565                         vrele(tdp);
 566                         goto out;
 567                 }
 568                 /*
 569                  * If directory is "sticky", then user must own
 570                  * the directory, or the file in it, else she
 571                  * may not delete it (unless she's root). This
 572                  * implements append-only directories.
 573                  */
 574                 if (dp->i_mode & ISVTX) {
 575                         error = kauth_authorize_vnode(cred, KAUTH_VNODE_DELETE,
 576                             tdp, vdp, genfs_can_sticky(cred, dp->i_uid,
 577                             VTOI(tdp)->i_uid));
 578                         if (error) {
 579                                 vrele(tdp);
 580                                 error = EPERM;
 581                                 goto out;
 582                         }
 583                 }
 584                 *vpp = tdp;
 585                 error = 0;
 586                 goto out;
 587         }
 588
 589         /*
 590          * If rewriting (RENAME), return the inode and the
 591          * information required to rewrite the present directory
 592          * Must get inode of directory entry to verify it's a
 593          * regular file, or empty directory.
 594          */
 595         if (nameiop == RENAME && (flags & ISLASTCN)) {
 596                 error = VOP_ACCESS(vdp, VWRITE, cred);
 597                 if (error)
 598                         goto out;
 599                 /*
 600                  * Careful about locking second inode.
 601                  * This can only occur if the target is ".".
 602                  */
 603                 if (dp->i_number == foundino) {
 604                         error = EISDIR;
 605                         goto out;
 606                 }
 607                 error = vcache_get(vdp->v_mount,
 608                     &foundino, sizeof(foundino), &tdp);
 609                 if (error)
 610                         goto out;
 611                 *vpp = tdp;
 612                 error = 0;
 613                 goto out;
 614         }
 615
 616         if (dp->i_number == foundino) {
 617                 vref(vdp);      /* we want ourself, ie "." */
 618                 *vpp = vdp;
 619         } else {
 620                 error = vcache_get(vdp->v_mount,
 621                     &foundino, sizeof(foundino), &tdp);
 622                 if (error)
 623                         goto out;
 624                 *vpp = tdp;
 625         }
 626
 627         /*
 628          * Insert name into cache if appropriate.
 629          */
 630         cache_enter(vdp, *vpp, cnp->cn_nameptr, cnp->cn_namelen, cnp->cn_flags);
 631         error = 0;
 632
 633 out:
 634         fstrans_done(vdp->v_mount);
 635         return error;
 636 }
 637
 638 void
 639 ulfs_dirbad(struct inode *ip, doff_t offset, const char *how)
 640 {
 641         struct mount *mp;
 642
 643         mp = ITOV(ip)->v_mount;
 644         printf("%s: bad dir ino %llu at offset %d: %s\n",
 645             mp->mnt_stat.f_mntonname, (unsigned long long)ip->i_number,
 646             offset, how);
 647         if ((mp->mnt_flag & MNT_RDONLY) == 0)
 648                 panic("bad dir");
 649 }
 650
 651 /*
 652  * Do consistency checking on a directory entry:
 653  *      record length must be multiple of 4
 654  *      entry must fit in rest of its DIRBLKSIZ block
 655  *      record must be large enough to contain entry
 656  *      name is not longer than LFS_MAXNAMLEN
 657  *      name must be as long as advertised, and null terminated
 658  */
 659 int
 660 ulfs_dirbadentry(struct vnode *dp, LFS_DIRHEADER *ep, int entryoffsetinblock)
 661 {
 662         int i;
 663         int namlen;
 664         unsigned reclen;
 665         struct ulfsmount *ump = VFSTOULFS(dp->v_mount);
 666         struct lfs *fs = ump->um_lfs;
 667         int dirblksiz = fs->um_dirblksiz;
 668         const char *name;
 669
 670         namlen = lfs_dir_getnamlen(fs, ep);
 671         reclen = lfs_dir_getreclen(fs, ep);
 672         if ((reclen & 0x3) != 0 ||
 673             reclen > dirblksiz - (entryoffsetinblock & (dirblksiz - 1)) ||
 674             reclen < LFS_DIRSIZ(fs, ep) || namlen > LFS_MAXNAMLEN) {
 675                 /*return (1); */
 676                 printf("First bad, reclen=%#x, DIRSIZ=%lu, namlen=%d, "
 677                         "flags=%#x, entryoffsetinblock=%d, dirblksiz = %d\n",
 678                         lfs_dir_getreclen(fs, ep),
 679                         (u_long)LFS_DIRSIZ(fs, ep),
 680                         namlen, dp->v_mount->mnt_flag, entryoffsetinblock,
 681                         dirblksiz);
 682                 goto bad;
 683         }
 684         if (lfs_dir_getino(fs, ep) == 0)
 685                 return (0);
 686         name = lfs_dir_nameptr(fs, ep);
 687         for (i = 0; i < namlen; i++)
 688                 if (name[i] == '\0') {
 689                         /*return (1); */
 690                         printf("Second bad\n");
 691                         goto bad;
 692         }
 693         if (name[i])
 694                 goto bad;
 695         return (0);
 696 bad:
 697         return (1);
 698 }
 699
 700 /*
 701  * Assign the contents of directory entry DIRP, on volume FS.
 702  *
 703  * NAME/NAMLEN is the name, which is not necessarily null terminated.
 704  * INUM is the inode number, and DTYPE is the type code (LFS_DT_*).
 705  *
 706  * Note that these values typically come from:
 707  *    cnp->cn_nameptr
 708  *    cnp->cn_namelen
 709  *    ip->i_number
 710  *    LFS_IFTODT(ip->i_mode)
 711  *
 712  * Does not set d_reclen.
 713  */
 714 static void
 715 ulfs_direntry_assign(struct lfs *fs, LFS_DIRHEADER *dirp,
 716                      const char *name, size_t namlen,
 717                      ino_t inum, unsigned dtype)
 718 {
 719         lfs_dir_setino(fs, dirp, inum);
 720         lfs_dir_setnamlen(fs, dirp, namlen);
 721         lfs_dir_settype(fs, dirp, dtype);
 722         memcpy(lfs_dir_nameptr(fs, dirp), name, namlen);
 723         lfs_dir_nameptr(fs, dirp)[namlen] = '\0';
 724 }
 725
 726 /*
 727  * Write a directory entry after a call to namei, using the parameters
 728  * that ulfs_lookup left in nameidata and in the ulfs_lookup_results.
 729  *
 730  * DVP is the directory to be updated. It must be locked.
 731  * ULR is the ulfs_lookup_results structure from the final lookup step.
 732  * TVP is not used. (XXX: why is it here? remove it)
 733  * CNP is the componentname from the final lookup step.
 734  * INUM is the inode number to insert into the new directory entry.
 735  * DTYPE is the type code (LFS_DT_*) to insert into the new directory entry.
 736  * NEWDIRBP is not used and (XXX) should be removed. The previous
 737  * comment here said it was used by the now-removed softupdates code.
 738  *
 739  * The link count of the target inode is *not* incremented; the
 740  * caller does that.
 741  *
 742  * If ulr->ulr_count is 0, ulfs_lookup did not find space to insert the
 743  * directory entry. ulr_offset, which is the place to put the entry,
 744  * should be on a block boundary (and should be at the end of the
 745  * directory AFAIK) and a fresh block is allocated to put the new
 746  * directory entry in.
 747  *
 748  * If ulr->ulr_count is not zero, ulfs_lookup found a slot to insert
 749  * the entry into. This slot ranges from ulr_offset to ulr_offset +
 750  * ulr_count. However, this slot may already be partially populated
 751  * requiring compaction. See notes below.
 752  *
 753  * Furthermore, if ulr_count is not zero and ulr_endoff is not the
 754  * same as i_size, the directory is truncated to size ulr_endoff.
 755  */
 756 int
 757 ulfs_direnter(struct vnode *dvp, const struct ulfs_lookup_results *ulr,
 758     struct vnode *tvp,
 759     struct componentname *cnp, ino_t inum, unsigned dtype,
 760     struct buf *newdirbp)
 761 {
 762         kauth_cred_t cr;
 763         int newentrysize;
 764         struct inode *dp;
 765         struct buf *bp;
 766         u_int dsize;
 767         LFS_DIRHEADER *ep, *nep;
 768         int error, ret, lfs_blkoff, loc, spacefree;
 769         char *dirbuf;
 770         struct timespec ts;
 771         struct ulfsmount *ump = VFSTOULFS(dvp->v_mount);
 772         struct lfs *fs = ump->um_lfs;
 773         int dirblksiz = fs->um_dirblksiz;
 774         const char *name;
 775         unsigned namlen, reclen;
 776 #ifdef LFS_DIRHASH
 777         int dohashadd;
 778 #endif
 779
 780         error = 0;
 781         name = cnp->cn_nameptr; /* note: not null-terminated */
 782         namlen = cnp->cn_namelen;
 783         cr = cnp->cn_cred;
 784
 785         dp = VTOI(dvp);
 786         newentrysize = LFS_DIRECTSIZ(fs, namlen);
 787
 788         if (ulr->ulr_count == 0) {
 789                 /*
 790                  * If ulr_count is 0, then namei could find no
 791                  * space in the directory. Here, ulr_offset will
 792                  * be on a directory block boundary and we will write the
 793                  * new entry into a fresh block.
 794                  */
 795                 if (ulr->ulr_offset & (dirblksiz - 1))
 796                         panic("ulfs_direnter: newblk");
 797                 if ((error = lfs_balloc(dvp, (off_t)ulr->ulr_offset, dirblksiz,
 798                     cr, B_CLRBUF | B_SYNC, &bp)) != 0) {
 799                         return (error);
 800                 }
 801                 dp->i_size = ulr->ulr_offset + dirblksiz;
 802                 DIP_ASSIGN(dp, size, dp->i_size);
 803                 dp->i_flag |= IN_CHANGE | IN_UPDATE;
 804                 uvm_vnp_setsize(dvp, dp->i_size);
 805                 lfs_blkoff = ulr->ulr_offset & (ump->um_mountp->mnt_stat.f_iosize - 1);
 806                 ep = (LFS_DIRHEADER *)((char *)bp->b_data + lfs_blkoff);
 807                 ulfs_direntry_assign(fs, ep, name, namlen, inum, dtype);
 808                 lfs_dir_setreclen(fs, ep, dirblksiz);
 809 #ifdef LFS_DIRHASH
 810                 if (dp->i_dirhash != NULL) {
 811                         ulfsdirhash_newblk(dp, ulr->ulr_offset);
 812                         ulfsdirhash_add(dp, ep, ulr->ulr_offset);
 813                         ulfsdirhash_checkblock(dp, (char *)bp->b_data + lfs_blkoff,
 814                             ulr->ulr_offset);
 815                 }
 816 #endif
 817                 error = VOP_BWRITE(bp->b_vp, bp);
 818                 vfs_timestamp(&ts);
 819                 ret = lfs_update(dvp, &ts, &ts, UPDATE_DIROP);
 820                 if (error == 0)
 821                         return (ret);
 822                 return (error);
 823         }
 824
 825         /*
 826          * If ulr_count is non-zero, then namei found space for the new
 827          * entry in the range ulr_offset to ulr_offset + ulr_count
 828          * in the directory. To use this space, we may have to compact
 829          * the entries located there, by copying them together towards the
 830          * beginning of the block, leaving the free space in one usable
 831          * chunk at the end.
 832          */
 833
 834         /*
 835          * Increase size of directory if entry eats into new space.
 836          * This should never push the size past a new multiple of
 837          * DIRBLKSIZ.
 838          *
 839          * N.B. - THIS IS AN ARTIFACT OF 4.2 AND SHOULD NEVER HAPPEN.
 840          */
 841         if (ulr->ulr_offset + ulr->ulr_count > dp->i_size) {
 842 #ifdef DIAGNOSTIC
 843                 printf("ulfs_direnter: reached 4.2-only block, "
 844                        "not supposed to happen\n");
 845 #endif
 846                 dp->i_size = ulr->ulr_offset + ulr->ulr_count;
 847                 DIP_ASSIGN(dp, size, dp->i_size);
 848                 dp->i_flag |= IN_CHANGE | IN_UPDATE;
 849         }
 850         /*
 851          * Get the block containing the space for the new directory entry.
 852          */
 853         error = ulfs_blkatoff(dvp, (off_t)ulr->ulr_offset, &dirbuf, &bp, true);
 854         if (error) {
 855                 return (error);
 856         }
 857         /*
 858          * Find space for the new entry. In the simple case, the entry at
 859          * offset base will have the space. If it does not, then namei
 860          * arranged that compacting the region ulr_offset to
 861          * ulr_offset + ulr_count would yield the space.
 862          */
 863         ep = (LFS_DIRHEADER *)dirbuf;
 864         dsize = (lfs_dir_getino(fs, ep) != 0) ? LFS_DIRSIZ(fs, ep) : 0;
 865         spacefree = lfs_dir_getreclen(fs, ep) - dsize;
 866         for (loc = lfs_dir_getreclen(fs, ep); loc < ulr->ulr_count; ) {
 867                 nep = (LFS_DIRHEADER *)(dirbuf + loc);
 868
 869                 /* Trim the existing slot (NB: dsize may be zero). */
 870                 lfs_dir_setreclen(fs, ep, dsize);
 871                 ep = LFS_NEXTDIR(fs, ep);
 872
 873                 reclen = lfs_dir_getreclen(fs, nep);
 874                 loc += reclen;
 875                 if (lfs_dir_getino(fs, nep) == 0) {
 876                         /*
 877                          * A mid-block unused entry. Such entries are
 878                          * never created by the kernel, but fsck_ffs
 879                          * can create them (and it doesn't fix them).
 880                          *
 881                          * Add up the free space, and initialise the
 882                          * relocated entry since we don't memcpy it.
 883                          */
 884                         spacefree += reclen;
 885                         lfs_dir_setino(fs, ep, 0);
 886                         dsize = 0;
 887                         continue;
 888                 }
 889                 dsize = LFS_DIRSIZ(fs, nep);
 890                 spacefree += reclen - dsize;
 891 #ifdef LFS_DIRHASH
 892                 if (dp->i_dirhash != NULL)
 893                         ulfsdirhash_move(dp, nep,
 894                             ulr->ulr_offset + ((char *)nep - dirbuf),
 895                             ulr->ulr_offset + ((char *)ep - dirbuf));
 896 #endif
 897                 memcpy((void *)ep, (void *)nep, dsize);
 898         }
 899         /*
 900          * Here, `ep' points to a directory entry containing `dsize' in-use
 901          * bytes followed by `spacefree' unused bytes. If ep->d_ino == 0,
 902          * then the entry is completely unused (dsize == 0). The value
 903          * of ep->d_reclen is always indeterminate.
 904          *
 905          * Update the pointer fields in the previous entry (if any),
 906          * copy in the new entry, and write out the block.
 907          */
 908         if (lfs_dir_getino(fs, ep) == 0 ||
 909             (lfs_dir_getino(fs, ep) == ULFS_WINO &&
 910              memcmp(lfs_dir_nameptr(fs, ep), name, namlen) == 0)) {
 911                 if (spacefree + dsize < newentrysize)
 912                         panic("ulfs_direnter: compact1");
 913                 reclen = spacefree + dsize;
 914 #ifdef LFS_DIRHASH
 915                 dohashadd = (lfs_dir_getino(fs, ep) == 0);
 916 #endif
 917         } else {
 918                 if (spacefree < newentrysize)
 919                         panic("ulfs_direnter: compact2");
 920                 reclen = spacefree;
 921                 lfs_dir_setreclen(fs, ep, dsize);
 922                 ep = LFS_NEXTDIR(fs, ep);
 923 #ifdef LFS_DIRHASH
 924                 dohashadd = 1;
 925 #endif
 926         }
 927
 928         ulfs_direntry_assign(fs, ep, name, namlen, inum, dtype);
 929         lfs_dir_setreclen(fs, ep, reclen);
 930 #ifdef LFS_DIRHASH
 931         if (dp->i_dirhash != NULL && dohashadd)
 932                 ulfsdirhash_add(dp, ep, ulr->ulr_offset + ((char *)ep - dirbuf));
 933         if (dp->i_dirhash != NULL)
 934                 ulfsdirhash_checkblock(dp, dirbuf -
 935                     (ulr->ulr_offset & (dirblksiz - 1)),
 936                     ulr->ulr_offset & ~(dirblksiz - 1));
 937 #endif
 938         error = VOP_BWRITE(bp->b_vp, bp);
 939         dp->i_flag |= IN_CHANGE | IN_UPDATE;
 940         /*
 941          * If all went well, and the directory can be shortened, proceed
 942          * with the truncation. Note that we have to unlock the inode for
 943          * the entry that we just entered, as the truncation may need to
 944          * lock other inodes which can lead to deadlock if we also hold a
 945          * lock on the newly entered node.
 946          */
 947         if (error == 0 && ulr->ulr_endoff && ulr->ulr_endoff < dp->i_size) {
 948 #ifdef LFS_DIRHASH
 949                 if (dp->i_dirhash != NULL)
 950                         ulfsdirhash_dirtrunc(dp, ulr->ulr_endoff);
 951 #endif
 952                 (void) lfs_truncate(dvp, (off_t)ulr->ulr_endoff, IO_SYNC, cr);
 953         }
 954         return (error);
 955 }
 956
 957 /*
 958  * Remove a directory entry after a call to namei, using the
 959  * parameters that ulfs_lookup left in nameidata and in the
 960  * ulfs_lookup_results.
 961  *
 962  * DVP is the directory to be updated. It must be locked.
 963  * ULR is the ulfs_lookup_results structure from the final lookup step.
 964  * IP, if not null, is the inode being unlinked.
 965  * FLAGS may contain DOWHITEOUT.
 966  * ISRMDIR is not used and (XXX) should be removed.
 967  *
 968  * If FLAGS contains DOWHITEOUT the entry is replaced with a whiteout
 969  * instead of being cleared.
 970  *
 971  * ulr->ulr_offset contains the position of the directory entry
 972  * to be removed.
 973  *
 974  * ulr->ulr_reclen contains the size of the directory entry to be
 975  * removed.
 976  *
 977  * ulr->ulr_count contains the size of the *previous* directory
 978  * entry. This allows finding it, for free space management. If
 979  * ulr_count is 0, the target entry is at the beginning of the
 980  * directory. (Does this ever happen? The first entry should be ".",
 981  * which should only be removed at rmdir time. Does rmdir come here
 982  * to clear out the "." and ".." entries? Perhaps, but I doubt it.)
 983  *
 984  * The space is marked free by adding it to the record length (not
 985  * name length) of the preceding entry. If the first entry becomes
 986  * free, it is marked free by setting the inode number to 0.
 987  *
 988  * The link count of IP is decremented. Note that this is not the
 989  * inverse behavior of ulfs_direnter, which does not adjust link
 990  * counts. Sigh.
 991  */
 992 int
 993 ulfs_dirremove(struct vnode *dvp, const struct ulfs_lookup_results *ulr,
 994               struct inode *ip, int flags, int isrmdir)
 995 {
 996         struct inode *dp = VTOI(dvp);
 997         struct lfs *fs = dp->i_lfs;
 998         LFS_DIRHEADER *ep;
 999         struct buf *bp;
1000         int error;
1001
1002         if (flags & DOWHITEOUT) {
1003                 /*
1004                  * Whiteout entry: set d_ino to ULFS_WINO.
1005                  */
1006                 error = ulfs_blkatoff(dvp, (off_t)ulr->ulr_offset, (void *)&ep,
1007                                      &bp, true);
1008                 if (error)
1009                         return (error);
1010                 lfs_dir_setino(fs, ep, ULFS_WINO);
1011                 lfs_dir_settype(fs, ep, LFS_DT_WHT);
1012                 goto out;
1013         }
1014
1015         if ((error = ulfs_blkatoff(dvp,
1016             (off_t)(ulr->ulr_offset - ulr->ulr_count), (void *)&ep, &bp, true)) != 0)
1017                 return (error);
1018
1019 #ifdef LFS_DIRHASH
1020         /*
1021          * Remove the dirhash entry. This is complicated by the fact
1022          * that `ep' is the previous entry when ulr_count != 0.
1023          */
1024         if (dp->i_dirhash != NULL)
1025                 ulfsdirhash_remove(dp, (ulr->ulr_count == 0) ? ep :
1026                    LFS_NEXTDIR(fs, ep), ulr->ulr_offset);
1027 #endif
1028
1029         if (ulr->ulr_count == 0) {
1030                 /*
1031                  * First entry in block: set d_ino to zero.
1032                  */
1033                 lfs_dir_setino(fs, ep, 0);
1034         } else {
1035                 /*
1036                  * Collapse new free space into previous entry.
1037                  */
1038                 lfs_dir_setreclen(fs, ep,
1039                         lfs_dir_getreclen(fs, ep) + ulr->ulr_reclen);
1040         }
1041
1042 #ifdef LFS_DIRHASH
1043         if (dp->i_dirhash != NULL) {
1044                 int dirblksiz = ip->i_lfs->um_dirblksiz;
1045                 ulfsdirhash_checkblock(dp, (char *)ep -
1046                     ((ulr->ulr_offset - ulr->ulr_count) & (dirblksiz - 1)),
1047                     ulr->ulr_offset & ~(dirblksiz - 1));
1048         }
1049 #endif
1050
1051 out:
1052         if (ip) {
1053                 ip->i_nlink--;
1054                 DIP_ASSIGN(ip, nlink, ip->i_nlink);
1055                 ip->i_flag |= IN_CHANGE;
1056         }
1057         /*
1058          * XXX did it ever occur to anyone that it might be a good
1059          * idea to restore ip->i_nlink if this fails? Or something?
1060          * Currently on error return from this function the state of
1061          * ip->i_nlink depends on what happened, and callers
1062          * definitely do not take this into account.
1063          */
1064         error = VOP_BWRITE(bp->b_vp, bp);
1065         dp->i_flag |= IN_CHANGE | IN_UPDATE;
1066         /*
1067          * If the last named reference to a snapshot goes away,
1068          * drop its snapshot reference so that it will be reclaimed
1069          * when last open reference goes away.
1070          */
1071         if (ip != 0 && (ip->i_flags & SF_SNAPSHOT) != 0 &&
1072             ip->i_nlink == 0)
1073                 ulfs_snapgone(ip);
1074         return (error);
1075 }
1076
1077 /*
1078  * Rewrite an existing directory entry to point at the inode supplied.
1079  *
1080  * DP is the directory to update.
1081  * OFFSET is the position of the entry in question. It may come
1082  * from ulr_offset of a ulfs_lookup_results.
1083  * OIP is the old inode the directory previously pointed to.
1084  * NEWINUM is the number of the new inode.
1085  * NEWTYPE is the new value for the type field of the directory entry.
1086  * (This is ignored if the fs doesn't support that.)
1087  * ISRMDIR is not used and (XXX) should be removed.
1088  * IFLAGS are added to DP's inode flags.
1089  *
1090  * The link count of OIP is decremented. Note that the link count of
1091  * the new inode is *not* incremented. Yay for symmetry.
1092  */
1093 int
1094 ulfs_dirrewrite(struct inode *dp, off_t offset,
1095     struct inode *oip, ino_t newinum, int newtype,
1096     int isrmdir, int iflags)
1097 {
1098         struct lfs *fs = dp->i_lfs;
1099         struct buf *bp;
1100         LFS_DIRHEADER *ep;
1101         struct vnode *vdp = ITOV(dp);
1102         int error;
1103
1104         error = ulfs_blkatoff(vdp, offset, (void *)&ep, &bp, true);
1105         if (error)
1106                 return (error);
1107         lfs_dir_setino(fs, ep, newinum);
1108         lfs_dir_settype(fs, ep, newtype);
1109         oip->i_nlink--;
1110         DIP_ASSIGN(oip, nlink, oip->i_nlink);
1111         oip->i_flag |= IN_CHANGE;
1112         error = VOP_BWRITE(bp->b_vp, bp);
1113         dp->i_flag |= iflags;
1114         /*
1115          * If the last named reference to a snapshot goes away,
1116          * drop its snapshot reference so that it will be reclaimed
1117          * when last open reference goes away.
1118          */
1119         if ((oip->i_flags & SF_SNAPSHOT) != 0 && oip->i_nlink == 0)
1120                 ulfs_snapgone(oip);
1121         return (error);
1122 }
1123
1124 /*
1125  * Check if a directory is empty or not.
1126  * Inode supplied must be locked.
1127  *
1128  * Using a struct lfs_dirtemplate here is not precisely
1129  * what we want, but better than using a struct lfs_direct.
1130  *
1131  * NB: does not handle corrupted directories.
1132  */
1133 int
1134 ulfs_dirempty(struct inode *ip, ino_t parentino, kauth_cred_t cred)
1135 {
1136         struct lfs *fs = ip->i_lfs;
1137         doff_t off;
1138         union lfs_dirtemplate dbuf;
1139         LFS_DIRHEADER *dp = (LFS_DIRHEADER *)&dbuf;
1140         int error, namlen;
1141         const char *name;
1142         size_t count;
1143 /* XXX this should probably use LFS_DIRECTSIZ(fs, 2) */
1144 #define MINDIRSIZ (sizeof (struct lfs_dirtemplate64) / 2)
1145
1146         for (off = 0; off < ip->i_size; off += lfs_dir_getreclen(fs, dp)) {
1147                 error = ulfs_bufio(UIO_READ, ITOV(ip), (void *)dp, MINDIRSIZ,
1148                     off, IO_NODELOCKED, cred, &count, NULL);
1149                 /*
1150                  * Since we read MINDIRSIZ, residual must
1151                  * be 0 unless we're at end of file.
1152                  */
1153                 if (error || count != 0)
1154                         return (0);
1155                 /* avoid infinite loops */
1156                 if (lfs_dir_getreclen(fs, dp) == 0)
1157                         return (0);
1158                 /* skip empty entries */
1159                 if (lfs_dir_getino(fs, dp) == 0 ||
1160                     lfs_dir_getino(fs, dp) == ULFS_WINO)
1161                         continue;
1162                 /* accept only "." and ".." */
1163                 namlen = lfs_dir_getnamlen(fs, dp);
1164                 name = lfs_dir_nameptr(fs, dp);
1165                 if (namlen > 2)
1166                         return (0);
1167                 if (name[0] != '.')
1168                         return (0);
1169                 /*
1170                  * At this point namlen must be 1 or 2.
1171                  * 1 implies ".", 2 implies ".." if second
1172                  * char is also "."
1173                  */
1174                 if (namlen == 1 && lfs_dir_getino(fs, dp) == ip->i_number)
1175                         continue;
1176                 if (name[1] == '.' && lfs_dir_getino(fs, dp) == parentino)
1177                         continue;
1178                 return (0);
1179         }
1180         return (1);
1181 }
1182
1183 #define ULFS_DIRRABLKS 0
1184 int ulfs_dirrablks = ULFS_DIRRABLKS;
1185
1186 /*
1187  * ulfs_blkatoff: Return buffer with the contents of block "offset" from
1188  * the beginning of directory "vp".  If "res" is non-NULL, fill it in with
1189  * a pointer to the remaining space in the directory.  If the caller intends
1190  * to modify the buffer returned, "modify" must be true.
1191  */
1192
1193 int
1194 ulfs_blkatoff(struct vnode *vp, off_t offset, char **res, struct buf **bpp,
1195     bool modify)
1196 {
1197         struct inode *ip __diagused;
1198         struct buf *bp;
1199         daddr_t lbn;
1200         const int dirrablks = ulfs_dirrablks;
1201         daddr_t *blks;
1202         int *blksizes;
1203         int run, error;
1204         struct mount *mp = vp->v_mount;
1205         const int bshift = mp->mnt_fs_bshift;
1206         const int bsize = 1 << bshift;
1207         off_t eof;
1208
1209         blks = kmem_alloc((1 + dirrablks) * sizeof(daddr_t), KM_SLEEP);
1210         blksizes = kmem_alloc((1 + dirrablks) * sizeof(int), KM_SLEEP);
1211         ip = VTOI(vp);
1212         KASSERT(vp->v_size == ip->i_size);
1213         GOP_SIZE(vp, vp->v_size, &eof, 0);
1214         lbn = offset >> bshift;
1215
1216         for (run = 0; run <= dirrablks;) {
1217                 const off_t curoff = lbn << bshift;
1218                 const int size = MIN(eof - curoff, bsize);
1219
1220                 if (size == 0) {
1221                         break;
1222                 }
1223                 KASSERT(curoff < eof);
1224                 blks[run] = lbn;
1225                 blksizes[run] = size;
1226                 lbn++;
1227                 run++;
1228                 if (size != bsize) {
1229                         break;
1230                 }
1231         }
1232         KASSERT(run >= 1);
1233         error = breadn(vp, blks[0], blksizes[0], &blks[1], &blksizes[1],
1234             run - 1, (modify ? B_MODIFY : 0), &bp);
1235         if (error != 0) {
1236                 *bpp = NULL;
1237                 goto out;
1238         }
1239         if (res) {
1240                 *res = (char *)bp->b_data + (offset & (bsize - 1));
1241         }
1242         *bpp = bp;
1243
1244  out:
1245         kmem_free(blks, (1 + dirrablks) * sizeof(daddr_t));
1246         kmem_free(blksizes, (1 + dirrablks) * sizeof(int));
1247         return error;
1248 }