sys/ufs/lfs/lfs_subr.c

   1 /*      $NetBSD: lfs_subr.c,v 1.76 2010/06/25 10:03:52 hannken Exp $    */
   2
   3 /*-
   4  * Copyright (c) 1999, 2000, 2001, 2002, 2003 The NetBSD Foundation, Inc.
   5  * All rights reserved.
   6  *
   7  * This code is derived from software contributed to The NetBSD Foundation
   8  * by Konrad E. Schroder <perseant@hhhh.org>.
   9  *
  10  * Redistribution and use in source and binary forms, with or without
  11  * modification, are permitted provided that the following conditions
  12  * are met:
  13  * 1. Redistributions of source code must retain the above copyright
  14  *    notice, this list of conditions and the following disclaimer.
  15  * 2. Redistributions in binary form must reproduce the above copyright
  16  *    notice, this list of conditions and the following disclaimer in the
  17  *    documentation and/or other materials provided with the distribution.
  18  *
  19  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
  20  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
  21  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
  22  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
  23  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  24  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  25  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  26  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  27  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  28  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
  29  * POSSIBILITY OF SUCH DAMAGE.
  30  */
  31 /*
  32  * Copyright (c) 1991, 1993
  33  *      The Regents of the University of California.  All rights reserved.
  34  *
  35  * Redistribution and use in source and binary forms, with or without
  36  * modification, are permitted provided that the following conditions
  37  * are met:
  38  * 1. Redistributions of source code must retain the above copyright
  39  *    notice, this list of conditions and the following disclaimer.
  40  * 2. Redistributions in binary form must reproduce the above copyright
  41  *    notice, this list of conditions and the following disclaimer in the
  42  *    documentation and/or other materials provided with the distribution.
  43  * 3. Neither the name of the University nor the names of its contributors
  44  *    may be used to endorse or promote products derived from this software
  45  *    without specific prior written permission.
  46  *
  47  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  48  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  49  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  50  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  51  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  52  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  53  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  54  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  55  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  56  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  57  * SUCH DAMAGE.
  58  *
  59  *      @(#)lfs_subr.c  8.4 (Berkeley) 5/8/95
  60  */
  61
  62 #include <sys/cdefs.h>
  63 __KERNEL_RCSID(0, "$NetBSD: lfs_subr.c,v 1.76 2010/06/25 10:03:52 hannken Exp $");
  64
  65 #include <sys/param.h>
  66 #include <sys/systm.h>
  67 #include <sys/namei.h>
  68 #include <sys/vnode.h>
  69 #include <sys/buf.h>
  70 #include <sys/mount.h>
  71 #include <sys/malloc.h>
  72 #include <sys/proc.h>
  73 #include <sys/kauth.h>
  74
  75 #include <ufs/ufs/inode.h>
  76 #include <ufs/lfs/lfs.h>
  77 #include <ufs/lfs/lfs_extern.h>
  78
  79 #include <uvm/uvm.h>
  80
  81 #ifdef DEBUG
  82 const char *lfs_res_names[LFS_NB_COUNT] = {
  83         "summary",
  84         "superblock",
  85         "file block",
  86         "cluster",
  87         "clean",
  88         "blkiov",
  89 };
  90 #endif
  91
  92 int lfs_res_qty[LFS_NB_COUNT] = {
  93         LFS_N_SUMMARIES,
  94         LFS_N_SBLOCKS,
  95         LFS_N_IBLOCKS,
  96         LFS_N_CLUSTERS,
  97         LFS_N_CLEAN,
  98         LFS_N_BLKIOV,
  99 };
 100
 101 void
 102 lfs_setup_resblks(struct lfs *fs)
 103 {
 104         int i, j;
 105         int maxbpp;
 106
 107         ASSERT_NO_SEGLOCK(fs);
 108         fs->lfs_resblk = (res_t *)malloc(LFS_N_TOTAL * sizeof(res_t), M_SEGMENT,
 109                                           M_WAITOK);
 110         for (i = 0; i < LFS_N_TOTAL; i++) {
 111                 fs->lfs_resblk[i].inuse = 0;
 112                 fs->lfs_resblk[i].p = NULL;
 113         }
 114         for (i = 0; i < LFS_RESHASH_WIDTH; i++)
 115                 LIST_INIT(fs->lfs_reshash + i);
 116
 117         /*
 118          * These types of allocations can be larger than a page,
 119          * so we can't use the pool subsystem for them.
 120          */
 121         for (i = 0, j = 0; j < LFS_N_SUMMARIES; j++, i++)
 122                 fs->lfs_resblk[i].size = fs->lfs_sumsize;
 123         for (j = 0; j < LFS_N_SBLOCKS; j++, i++)
 124                 fs->lfs_resblk[i].size = LFS_SBPAD;
 125         for (j = 0; j < LFS_N_IBLOCKS; j++, i++)
 126                 fs->lfs_resblk[i].size = fs->lfs_bsize;
 127         for (j = 0; j < LFS_N_CLUSTERS; j++, i++)
 128                 fs->lfs_resblk[i].size = MAXPHYS;
 129         for (j = 0; j < LFS_N_CLEAN; j++, i++)
 130                 fs->lfs_resblk[i].size = MAXPHYS;
 131         for (j = 0; j < LFS_N_BLKIOV; j++, i++)
 132                 fs->lfs_resblk[i].size = LFS_MARKV_MAXBLKCNT * sizeof(BLOCK_INFO);
 133
 134         for (i = 0; i < LFS_N_TOTAL; i++) {
 135                 fs->lfs_resblk[i].p = malloc(fs->lfs_resblk[i].size,
 136                                              M_SEGMENT, M_WAITOK);
 137         }
 138
 139         /*
 140          * Initialize pools for small types (XXX is BPP small?)
 141          */
 142         pool_init(&fs->lfs_clpool, sizeof(struct lfs_cluster), 0, 0, 0,
 143                 "lfsclpl", &pool_allocator_nointr, IPL_NONE);
 144         pool_init(&fs->lfs_segpool, sizeof(struct segment), 0, 0, 0,
 145                 "lfssegpool", &pool_allocator_nointr, IPL_NONE);
 146         maxbpp = ((fs->lfs_sumsize - SEGSUM_SIZE(fs)) / sizeof(int32_t) + 2);
 147         maxbpp = MIN(maxbpp, segsize(fs) / fs->lfs_fsize + 2);
 148         pool_init(&fs->lfs_bpppool, maxbpp * sizeof(struct buf *), 0, 0, 0,
 149                 "lfsbpppl", &pool_allocator_nointr, IPL_NONE);
 150 }
 151
 152 void
 153 lfs_free_resblks(struct lfs *fs)
 154 {
 155         int i;
 156
 157         pool_destroy(&fs->lfs_bpppool);
 158         pool_destroy(&fs->lfs_segpool);
 159         pool_destroy(&fs->lfs_clpool);
 160
 161         mutex_enter(&lfs_lock);
 162         for (i = 0; i < LFS_N_TOTAL; i++) {
 163                 while (fs->lfs_resblk[i].inuse)
 164                         mtsleep(&fs->lfs_resblk, PRIBIO + 1, "lfs_free", 0,
 165                                 &lfs_lock);
 166                 if (fs->lfs_resblk[i].p != NULL)
 167                         free(fs->lfs_resblk[i].p, M_SEGMENT);
 168         }
 169         free(fs->lfs_resblk, M_SEGMENT);
 170         mutex_exit(&lfs_lock);
 171 }
 172
 173 static unsigned int
 174 lfs_mhash(void *vp)
 175 {
 176         return (unsigned int)(((unsigned long)vp) >> 2) % LFS_RESHASH_WIDTH;
 177 }
 178
 179 /*
 180  * Return memory of the given size for the given purpose, or use one of a
 181  * number of spare last-resort buffers, if malloc returns NULL.
 182  */
 183 void *
 184 lfs_malloc(struct lfs *fs, size_t size, int type)
 185 {
 186         struct lfs_res_blk *re;
 187         void *r;
 188         int i, s, start;
 189         unsigned int h;
 190
 191         ASSERT_MAYBE_SEGLOCK(fs);
 192         r = NULL;
 193
 194         /* If no mem allocated for this type, it just waits */
 195         if (lfs_res_qty[type] == 0) {
 196                 r = malloc(size, M_SEGMENT, M_WAITOK);
 197                 return r;
 198         }
 199
 200         /* Otherwise try a quick malloc, and if it works, great */
 201         if ((r = malloc(size, M_SEGMENT, M_NOWAIT)) != NULL) {
 202                 return r;
 203         }
 204
 205         /*
 206          * If malloc returned NULL, we are forced to use one of our
 207          * reserve blocks.  We have on hand at least one summary block,
 208          * at least one cluster block, at least one superblock,
 209          * and several indirect blocks.
 210          */
 211
 212         mutex_enter(&lfs_lock);
 213         /* skip over blocks of other types */
 214         for (i = 0, start = 0; i < type; i++)
 215                 start += lfs_res_qty[i];
 216         while (r == NULL) {
 217                 for (i = 0; i < lfs_res_qty[type]; i++) {
 218                         if (fs->lfs_resblk[start + i].inuse == 0) {
 219                                 re = fs->lfs_resblk + start + i;
 220                                 re->inuse = 1;
 221                                 r = re->p;
 222                                 KASSERT(re->size >= size);
 223                                 h = lfs_mhash(r);
 224                                 s = splbio();
 225                                 LIST_INSERT_HEAD(&fs->lfs_reshash[h], re, res);
 226                                 splx(s);
 227                                 mutex_exit(&lfs_lock);
 228                                 return r;
 229                         }
 230                 }
 231                 DLOG((DLOG_MALLOC, "sleeping on %s (%d)\n",
 232                       lfs_res_names[type], lfs_res_qty[type]));
 233                 mtsleep(&fs->lfs_resblk, PVM, "lfs_malloc", 0,
 234                         &lfs_lock);
 235                 DLOG((DLOG_MALLOC, "done sleeping on %s\n",
 236                       lfs_res_names[type]));
 237         }
 238         /* NOTREACHED */
 239         mutex_exit(&lfs_lock);
 240         return r;
 241 }
 242
 243 void
 244 lfs_free(struct lfs *fs, void *p, int type)
 245 {
 246         int s;
 247         unsigned int h;
 248         res_t *re;
 249 #ifdef DEBUG
 250         int i;
 251 #endif
 252
 253         ASSERT_MAYBE_SEGLOCK(fs);
 254         h = lfs_mhash(p);
 255         mutex_enter(&lfs_lock);
 256         s = splbio();
 257         LIST_FOREACH(re, &fs->lfs_reshash[h], res) {
 258                 if (re->p == p) {
 259                         KASSERT(re->inuse == 1);
 260                         LIST_REMOVE(re, res);
 261                         re->inuse = 0;
 262                         wakeup(&fs->lfs_resblk);
 263                         splx(s);
 264                         mutex_exit(&lfs_lock);
 265                         return;
 266                 }
 267         }
 268 #ifdef DEBUG
 269         for (i = 0; i < LFS_N_TOTAL; i++) {
 270                 if (fs->lfs_resblk[i].p == p)
 271                         panic("lfs_free: inconsistent reserved block");
 272         }
 273 #endif
 274         splx(s);
 275         mutex_exit(&lfs_lock);
 276
 277         /*
 278          * If we didn't find it, free it.
 279          */
 280         free(p, M_SEGMENT);
 281 }
 282
 283 /*
 284  * lfs_seglock --
 285  *      Single thread the segment writer.
 286  */
 287 int
 288 lfs_seglock(struct lfs *fs, unsigned long flags)
 289 {
 290         struct segment *sp;
 291
 292         mutex_enter(&lfs_lock);
 293         if (fs->lfs_seglock) {
 294                 if (fs->lfs_lockpid == curproc->p_pid &&
 295                     fs->lfs_locklwp == curlwp->l_lid) {
 296                         ++fs->lfs_seglock;
 297                         fs->lfs_sp->seg_flags |= flags;
 298                         mutex_exit(&lfs_lock);
 299                         return 0;
 300                 } else if (flags & SEGM_PAGEDAEMON) {
 301                         mutex_exit(&lfs_lock);
 302                         return EWOULDBLOCK;
 303                 } else {
 304                         while (fs->lfs_seglock) {
 305                                 (void)mtsleep(&fs->lfs_seglock, PRIBIO + 1,
 306                                         "lfs_seglock", 0, &lfs_lock);
 307                         }
 308                 }
 309         }
 310
 311         fs->lfs_seglock = 1;
 312         fs->lfs_lockpid = curproc->p_pid;
 313         fs->lfs_locklwp = curlwp->l_lid;
 314         mutex_exit(&lfs_lock);
 315         fs->lfs_cleanind = 0;
 316
 317 #ifdef DEBUG
 318         LFS_ENTER_LOG("seglock", __FILE__, __LINE__, 0, flags, curproc->p_pid);
 319 #endif
 320         /* Drain fragment size changes out */
 321         rw_enter(&fs->lfs_fraglock, RW_WRITER);
 322
 323         sp = fs->lfs_sp = pool_get(&fs->lfs_segpool, PR_WAITOK);
 324         sp->bpp = pool_get(&fs->lfs_bpppool, PR_WAITOK);
 325         sp->seg_flags = flags;
 326         sp->vp = NULL;
 327         sp->seg_iocount = 0;
 328         (void) lfs_initseg(fs);
 329
 330         /*
 331          * Keep a cumulative count of the outstanding I/O operations.  If the
 332          * disk drive catches up with us it could go to zero before we finish,
 333          * so we artificially increment it by one until we've scheduled all of
 334          * the writes we intend to do.
 335          */
 336         mutex_enter(&lfs_lock);
 337         ++fs->lfs_iocount;
 338         mutex_exit(&lfs_lock);
 339         return 0;
 340 }
 341
 342 static void lfs_unmark_dirop(struct lfs *);
 343
 344 static void
 345 lfs_unmark_dirop(struct lfs *fs)
 346 {
 347         struct inode *ip, *nip;
 348         struct vnode *vp;
 349         int doit;
 350
 351         ASSERT_NO_SEGLOCK(fs);
 352         mutex_enter(&lfs_lock);
 353         doit = !(fs->lfs_flags & LFS_UNDIROP);
 354         if (doit)
 355                 fs->lfs_flags |= LFS_UNDIROP;
 356         if (!doit) {
 357                 mutex_exit(&lfs_lock);
 358                 return;
 359         }
 360
 361         for (ip = TAILQ_FIRST(&fs->lfs_dchainhd); ip != NULL; ip = nip) {
 362                 nip = TAILQ_NEXT(ip, i_lfs_dchain);
 363                 vp = ITOV(ip);
 364                 if ((VTOI(vp)->i_flag & (IN_ADIROP | IN_ALLMOD)) == 0) {
 365                         --lfs_dirvcount;
 366                         --fs->lfs_dirvcount;
 367                         vp->v_uflag &= ~VU_DIROP;
 368                         TAILQ_REMOVE(&fs->lfs_dchainhd, ip, i_lfs_dchain);
 369                         wakeup(&lfs_dirvcount);
 370                         fs->lfs_unlockvp = vp;
 371                         mutex_exit(&lfs_lock);
 372                         vrele(vp);
 373                         mutex_enter(&lfs_lock);
 374                         fs->lfs_unlockvp = NULL;
 375                 }
 376         }
 377
 378         fs->lfs_flags &= ~LFS_UNDIROP;
 379         wakeup(&fs->lfs_flags);
 380         mutex_exit(&lfs_lock);
 381 }
 382
 383 static void
 384 lfs_auto_segclean(struct lfs *fs)
 385 {
 386         int i, error, s, waited;
 387
 388         ASSERT_SEGLOCK(fs);
 389         /*
 390          * Now that we've swapped lfs_activesb, but while we still
 391          * hold the segment lock, run through the segment list marking
 392          * the empty ones clean.
 393          * XXX - do we really need to do them all at once?
 394          */
 395         waited = 0;
 396         for (i = 0; i < fs->lfs_nseg; i++) {
 397                 if ((fs->lfs_suflags[0][i] &
 398                      (SEGUSE_ACTIVE | SEGUSE_DIRTY | SEGUSE_EMPTY)) ==
 399                     (SEGUSE_DIRTY | SEGUSE_EMPTY) &&
 400                     (fs->lfs_suflags[1][i] &
 401                      (SEGUSE_ACTIVE | SEGUSE_DIRTY | SEGUSE_EMPTY)) ==
 402                     (SEGUSE_DIRTY | SEGUSE_EMPTY)) {
 403
 404                         /* Make sure the sb is written before we clean */
 405                         mutex_enter(&lfs_lock);
 406                         s = splbio();
 407                         while (waited == 0 && fs->lfs_sbactive)
 408                                 mtsleep(&fs->lfs_sbactive, PRIBIO+1, "lfs asb",
 409                                         0, &lfs_lock);
 410                         splx(s);
 411                         mutex_exit(&lfs_lock);
 412                         waited = 1;
 413
 414                         if ((error = lfs_do_segclean(fs, i)) != 0) {
 415                                 DLOG((DLOG_CLEAN, "lfs_auto_segclean: lfs_do_segclean returned %d for seg %d\n", error, i));
 416                         }
 417                 }
 418                 fs->lfs_suflags[1 - fs->lfs_activesb][i] =
 419                         fs->lfs_suflags[fs->lfs_activesb][i];
 420         }
 421 }
 422
 423 /*
 424  * lfs_segunlock --
 425  *      Single thread the segment writer.
 426  */
 427 void
 428 lfs_segunlock(struct lfs *fs)
 429 {
 430         struct segment *sp;
 431         unsigned long sync, ckp;
 432         struct buf *bp;
 433         int do_unmark_dirop = 0;
 434
 435         sp = fs->lfs_sp;
 436
 437         mutex_enter(&lfs_lock);
 438         KASSERT(LFS_SEGLOCK_HELD(fs));
 439         if (fs->lfs_seglock == 1) {
 440                 if ((sp->seg_flags & (SEGM_PROT | SEGM_CLEAN)) == 0 &&
 441                     LFS_STARVED_FOR_SEGS(fs) == 0)
 442                         do_unmark_dirop = 1;
 443                 mutex_exit(&lfs_lock);
 444                 sync = sp->seg_flags & SEGM_SYNC;
 445                 ckp = sp->seg_flags & SEGM_CKP;
 446
 447                 /* We should have a segment summary, and nothing else */
 448                 KASSERT(sp->cbpp == sp->bpp + 1);
 449
 450                 /* Free allocated segment summary */
 451                 fs->lfs_offset -= btofsb(fs, fs->lfs_sumsize);
 452                 bp = *sp->bpp;
 453                 lfs_freebuf(fs, bp);
 454
 455                 pool_put(&fs->lfs_bpppool, sp->bpp);
 456                 sp->bpp = NULL;
 457
 458                 /*
 459                  * If we're not sync, we're done with sp, get rid of it.
 460                  * Otherwise, we keep a local copy around but free
 461                  * fs->lfs_sp so another process can use it (we have to
 462                  * wait but they don't have to wait for us).
 463                  */
 464                 if (!sync)
 465                         pool_put(&fs->lfs_segpool, sp);
 466                 fs->lfs_sp = NULL;
 467
 468                 /*
 469                  * If the I/O count is non-zero, sleep until it reaches zero.
 470                  * At the moment, the user's process hangs around so we can
 471                  * sleep.
 472                  */
 473                 mutex_enter(&lfs_lock);
 474                 if (--fs->lfs_iocount == 0) {
 475                         LFS_DEBUG_COUNTLOCKED("lfs_segunlock");
 476                 }
 477                 if (fs->lfs_iocount <= 1)
 478                         wakeup(&fs->lfs_iocount);
 479                 mutex_exit(&lfs_lock);
 480                 /*
 481                  * If we're not checkpointing, we don't have to block
 482                  * other processes to wait for a synchronous write
 483                  * to complete.
 484                  */
 485                 if (!ckp) {
 486 #ifdef DEBUG
 487                         LFS_ENTER_LOG("segunlock_std", __FILE__, __LINE__, 0, 0, curproc->p_pid);
 488 #endif
 489                         mutex_enter(&lfs_lock);
 490                         --fs->lfs_seglock;
 491                         fs->lfs_lockpid = 0;
 492                         fs->lfs_locklwp = 0;
 493                         mutex_exit(&lfs_lock);
 494                         wakeup(&fs->lfs_seglock);
 495                 }
 496                 /*
 497                  * We let checkpoints happen asynchronously.  That means
 498                  * that during recovery, we have to roll forward between
 499                  * the two segments described by the first and second
 500                  * superblocks to make sure that the checkpoint described
 501                  * by a superblock completed.
 502                  */
 503                 mutex_enter(&lfs_lock);
 504                 while (ckp && sync && fs->lfs_iocount) {
 505                         (void)mtsleep(&fs->lfs_iocount, PRIBIO + 1,
 506                                       "lfs_iocount", 0, &lfs_lock);
 507                         DLOG((DLOG_SEG, "sleeping on iocount %x == %d\n", fs, fs->lfs_iocount));
 508                 }
 509                 while (sync && sp->seg_iocount) {
 510                         (void)mtsleep(&sp->seg_iocount, PRIBIO + 1,
 511                                      "seg_iocount", 0, &lfs_lock);
 512                         DLOG((DLOG_SEG, "sleeping on iocount %x == %d\n", sp, sp->seg_iocount));
 513                 }
 514                 mutex_exit(&lfs_lock);
 515                 if (sync)
 516                         pool_put(&fs->lfs_segpool, sp);
 517
 518                 if (ckp) {
 519                         fs->lfs_nactive = 0;
 520                         /* If we *know* everything's on disk, write both sbs */
 521                         /* XXX should wait for this one  */
 522                         if (sync)
 523                                 lfs_writesuper(fs, fs->lfs_sboffs[fs->lfs_activesb]);
 524                         lfs_writesuper(fs, fs->lfs_sboffs[1 - fs->lfs_activesb]);
 525                         if (!(fs->lfs_ivnode->v_mount->mnt_iflag & IMNT_UNMOUNT)) {
 526                                 lfs_auto_segclean(fs);
 527                                 /* If sync, we can clean the remainder too */
 528                                 if (sync)
 529                                         lfs_auto_segclean(fs);
 530                         }
 531                         fs->lfs_activesb = 1 - fs->lfs_activesb;
 532 #ifdef DEBUG
 533                         LFS_ENTER_LOG("segunlock_ckp", __FILE__, __LINE__, 0, 0, curproc->p_pid);
 534 #endif
 535                         mutex_enter(&lfs_lock);
 536                         --fs->lfs_seglock;
 537                         fs->lfs_lockpid = 0;
 538                         fs->lfs_locklwp = 0;
 539                         mutex_exit(&lfs_lock);
 540                         wakeup(&fs->lfs_seglock);
 541                 }
 542                 /* Reenable fragment size changes */
 543                 rw_exit(&fs->lfs_fraglock);
 544                 if (do_unmark_dirop)
 545                         lfs_unmark_dirop(fs);
 546         } else if (fs->lfs_seglock == 0) {
 547                 mutex_exit(&lfs_lock);
 548                 panic ("Seglock not held");
 549         } else {
 550                 --fs->lfs_seglock;
 551                 mutex_exit(&lfs_lock);
 552         }
 553 }
 554
 555 /*
 556  * Drain dirops and start writer.
 557  *
 558  * No simple_locks are held when we enter and none are held when we return.
 559  */
 560 int
 561 lfs_writer_enter(struct lfs *fs, const char *wmesg)
 562 {
 563         int error = 0;
 564
 565         ASSERT_MAYBE_SEGLOCK(fs);
 566         mutex_enter(&lfs_lock);
 567
 568         /* disallow dirops during flush */
 569         fs->lfs_writer++;
 570
 571         while (fs->lfs_dirops > 0) {
 572                 ++fs->lfs_diropwait;
 573                 error = mtsleep(&fs->lfs_writer, PRIBIO+1, wmesg, 0,
 574                                 &lfs_lock);
 575                 --fs->lfs_diropwait;
 576         }
 577
 578         if (error)
 579                 fs->lfs_writer--;
 580
 581         mutex_exit(&lfs_lock);
 582
 583         return error;
 584 }
 585
 586 void
 587 lfs_writer_leave(struct lfs *fs)
 588 {
 589         bool dowakeup;
 590
 591         ASSERT_MAYBE_SEGLOCK(fs);
 592         mutex_enter(&lfs_lock);
 593         dowakeup = !(--fs->lfs_writer);
 594         mutex_exit(&lfs_lock);
 595         if (dowakeup)
 596                 wakeup(&fs->lfs_dirops);
 597 }
 598
 599 /*
 600  * Unlock, wait for the cleaner, then relock to where we were before.
 601  * To be used only at a fairly high level, to address a paucity of free
 602  * segments propagated back from lfs_gop_write().
 603  */
 604 void
 605 lfs_segunlock_relock(struct lfs *fs)
 606 {
 607         int n = fs->lfs_seglock;
 608         u_int16_t seg_flags;
 609         CLEANERINFO *cip;
 610         struct buf *bp;
 611
 612         if (n == 0)
 613                 return;
 614
 615         /* Write anything we've already gathered to disk */
 616         lfs_writeseg(fs, fs->lfs_sp);
 617
 618         /* Tell cleaner */
 619         LFS_CLEANERINFO(cip, fs, bp);
 620         cip->flags |= LFS_CLEANER_MUST_CLEAN;
 621         LFS_SYNC_CLEANERINFO(cip, fs, bp, 1);
 622
 623         /* Save segment flags for later */
 624         seg_flags = fs->lfs_sp->seg_flags;
 625
 626         fs->lfs_sp->seg_flags |= SEGM_PROT; /* Don't unmark dirop nodes */
 627         while(fs->lfs_seglock)
 628                 lfs_segunlock(fs);
 629
 630         /* Wait for the cleaner */
 631         lfs_wakeup_cleaner(fs);
 632         mutex_enter(&lfs_lock);
 633         while (LFS_STARVED_FOR_SEGS(fs))
 634                 mtsleep(&fs->lfs_avail, PRIBIO, "relock", 0,
 635                         &lfs_lock);
 636         mutex_exit(&lfs_lock);
 637
 638         /* Put the segment lock back the way it was. */
 639         while(n--)
 640                 lfs_seglock(fs, seg_flags);
 641
 642         /* Cleaner can relax now */
 643         LFS_CLEANERINFO(cip, fs, bp);
 644         cip->flags &= ~LFS_CLEANER_MUST_CLEAN;
 645         LFS_SYNC_CLEANERINFO(cip, fs, bp, 1);
 646
 647         return;
 648 }
 649
 650 /*
 651  * Wake up the cleaner, provided that nowrap is not set.
 652  */
 653 void
 654 lfs_wakeup_cleaner(struct lfs *fs)
 655 {
 656         if (fs->lfs_nowrap > 0)
 657                 return;
 658
 659         wakeup(&fs->lfs_nextseg);
 660         wakeup(&lfs_allclean_wakeup);
 661 }