sys/ufs/lfs/lfs_subr.c

   1 /*      $NetBSD: lfs_subr.c,v 1.86 2015/10/03 08:28:16 dholland Exp $   */
   2
   3 /*-
   4  * Copyright (c) 1999, 2000, 2001, 2002, 2003 The NetBSD Foundation, Inc.
   5  * All rights reserved.
   6  *
   7  * This code is derived from software contributed to The NetBSD Foundation
   8  * by Konrad E. Schroder <perseant@hhhh.org>.
   9  *
  10  * Redistribution and use in source and binary forms, with or without
  11  * modification, are permitted provided that the following conditions
  12  * are met:
  13  * 1. Redistributions of source code must retain the above copyright
  14  *    notice, this list of conditions and the following disclaimer.
  15  * 2. Redistributions in binary form must reproduce the above copyright
  16  *    notice, this list of conditions and the following disclaimer in the
  17  *    documentation and/or other materials provided with the distribution.
  18  *
  19  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
  20  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
  21  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
  22  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
  23  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  24  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  25  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  26  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  27  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  28  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
  29  * POSSIBILITY OF SUCH DAMAGE.
  30  */
  31 /*
  32  * Copyright (c) 1991, 1993
  33  *      The Regents of the University of California.  All rights reserved.
  34  *
  35  * Redistribution and use in source and binary forms, with or without
  36  * modification, are permitted provided that the following conditions
  37  * are met:
  38  * 1. Redistributions of source code must retain the above copyright
  39  *    notice, this list of conditions and the following disclaimer.
  40  * 2. Redistributions in binary form must reproduce the above copyright
  41  *    notice, this list of conditions and the following disclaimer in the
  42  *    documentation and/or other materials provided with the distribution.
  43  * 3. Neither the name of the University nor the names of its contributors
  44  *    may be used to endorse or promote products derived from this software
  45  *    without specific prior written permission.
  46  *
  47  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  48  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  49  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  50  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  51  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  52  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  53  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  54  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  55  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  56  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  57  * SUCH DAMAGE.
  58  *
  59  *      @(#)lfs_subr.c  8.4 (Berkeley) 5/8/95
  60  */
  61
  62 #include <sys/cdefs.h>
  63 __KERNEL_RCSID(0, "$NetBSD: lfs_subr.c,v 1.86 2015/10/03 08:28:16 dholland Exp $");
  64
  65 #include <sys/param.h>
  66 #include <sys/systm.h>
  67 #include <sys/namei.h>
  68 #include <sys/vnode.h>
  69 #include <sys/buf.h>
  70 #include <sys/mount.h>
  71 #include <sys/malloc.h>
  72 #include <sys/proc.h>
  73 #include <sys/kauth.h>
  74
  75 #include <ufs/lfs/ulfs_inode.h>
  76 #include <ufs/lfs/lfs.h>
  77 #include <ufs/lfs/lfs_accessors.h>
  78 #include <ufs/lfs/lfs_kernel.h>
  79 #include <ufs/lfs/lfs_extern.h>
  80
  81 #include <uvm/uvm.h>
  82
  83 #ifdef DEBUG
  84 const char *lfs_res_names[LFS_NB_COUNT] = {
  85         "summary",
  86         "superblock",
  87         "file block",
  88         "cluster",
  89         "clean",
  90         "blkiov",
  91 };
  92 #endif
  93
  94 int lfs_res_qty[LFS_NB_COUNT] = {
  95         LFS_N_SUMMARIES,
  96         LFS_N_SBLOCKS,
  97         LFS_N_IBLOCKS,
  98         LFS_N_CLUSTERS,
  99         LFS_N_CLEAN,
 100         LFS_N_BLKIOV,
 101 };
 102
 103 void
 104 lfs_setup_resblks(struct lfs *fs)
 105 {
 106         int i, j;
 107         int maxbpp;
 108
 109         ASSERT_NO_SEGLOCK(fs);
 110         fs->lfs_resblk = malloc(LFS_N_TOTAL * sizeof(res_t), M_SEGMENT,
 111                                 M_WAITOK);
 112         for (i = 0; i < LFS_N_TOTAL; i++) {
 113                 fs->lfs_resblk[i].inuse = 0;
 114                 fs->lfs_resblk[i].p = NULL;
 115         }
 116         for (i = 0; i < LFS_RESHASH_WIDTH; i++)
 117                 LIST_INIT(fs->lfs_reshash + i);
 118
 119         /*
 120          * These types of allocations can be larger than a page,
 121          * so we can't use the pool subsystem for them.
 122          */
 123         for (i = 0, j = 0; j < LFS_N_SUMMARIES; j++, i++)
 124                 fs->lfs_resblk[i].size = lfs_sb_getsumsize(fs);
 125         for (j = 0; j < LFS_N_SBLOCKS; j++, i++)
 126                 fs->lfs_resblk[i].size = LFS_SBPAD;
 127         for (j = 0; j < LFS_N_IBLOCKS; j++, i++)
 128                 fs->lfs_resblk[i].size = lfs_sb_getbsize(fs);
 129         for (j = 0; j < LFS_N_CLUSTERS; j++, i++)
 130                 fs->lfs_resblk[i].size = MAXPHYS;
 131         for (j = 0; j < LFS_N_CLEAN; j++, i++)
 132                 fs->lfs_resblk[i].size = MAXPHYS;
 133         for (j = 0; j < LFS_N_BLKIOV; j++, i++)
 134                 fs->lfs_resblk[i].size = LFS_MARKV_MAXBLKCNT * sizeof(BLOCK_INFO);
 135
 136         for (i = 0; i < LFS_N_TOTAL; i++) {
 137                 fs->lfs_resblk[i].p = malloc(fs->lfs_resblk[i].size,
 138                                              M_SEGMENT, M_WAITOK);
 139         }
 140
 141         /*
 142          * Initialize pools for small types (XXX is BPP small?)
 143          */
 144         pool_init(&fs->lfs_clpool, sizeof(struct lfs_cluster), 0, 0, 0,
 145                 "lfsclpl", &pool_allocator_nointr, IPL_NONE);
 146         pool_init(&fs->lfs_segpool, sizeof(struct segment), 0, 0, 0,
 147                 "lfssegpool", &pool_allocator_nointr, IPL_NONE);
 148         /* XXX: should this int32 be 32/64? */
 149         maxbpp = ((lfs_sb_getsumsize(fs) - SEGSUM_SIZE(fs)) / sizeof(int32_t) + 2);
 150         maxbpp = MIN(maxbpp, lfs_segsize(fs) / lfs_sb_getfsize(fs) + 2);
 151         pool_init(&fs->lfs_bpppool, maxbpp * sizeof(struct buf *), 0, 0, 0,
 152                 "lfsbpppl", &pool_allocator_nointr, IPL_NONE);
 153 }
 154
 155 void
 156 lfs_free_resblks(struct lfs *fs)
 157 {
 158         int i;
 159
 160         pool_destroy(&fs->lfs_bpppool);
 161         pool_destroy(&fs->lfs_segpool);
 162         pool_destroy(&fs->lfs_clpool);
 163
 164         mutex_enter(&lfs_lock);
 165         for (i = 0; i < LFS_N_TOTAL; i++) {
 166                 while (fs->lfs_resblk[i].inuse)
 167                         mtsleep(&fs->lfs_resblk, PRIBIO + 1, "lfs_free", 0,
 168                                 &lfs_lock);
 169                 if (fs->lfs_resblk[i].p != NULL)
 170                         free(fs->lfs_resblk[i].p, M_SEGMENT);
 171         }
 172         free(fs->lfs_resblk, M_SEGMENT);
 173         mutex_exit(&lfs_lock);
 174 }
 175
 176 static unsigned int
 177 lfs_mhash(void *vp)
 178 {
 179         return (unsigned int)(((unsigned long)vp) >> 2) % LFS_RESHASH_WIDTH;
 180 }
 181
 182 /*
 183  * Return memory of the given size for the given purpose, or use one of a
 184  * number of spare last-resort buffers, if malloc returns NULL.
 185  */
 186 void *
 187 lfs_malloc(struct lfs *fs, size_t size, int type)
 188 {
 189         struct lfs_res_blk *re;
 190         void *r;
 191         int i, s, start;
 192         unsigned int h;
 193
 194         ASSERT_MAYBE_SEGLOCK(fs);
 195         r = NULL;
 196
 197         /* If no mem allocated for this type, it just waits */
 198         if (lfs_res_qty[type] == 0) {
 199                 r = malloc(size, M_SEGMENT, M_WAITOK);
 200                 return r;
 201         }
 202
 203         /* Otherwise try a quick malloc, and if it works, great */
 204         if ((r = malloc(size, M_SEGMENT, M_NOWAIT)) != NULL) {
 205                 return r;
 206         }
 207
 208         /*
 209          * If malloc returned NULL, we are forced to use one of our
 210          * reserve blocks.  We have on hand at least one summary block,
 211          * at least one cluster block, at least one superblock,
 212          * and several indirect blocks.
 213          */
 214
 215         mutex_enter(&lfs_lock);
 216         /* skip over blocks of other types */
 217         for (i = 0, start = 0; i < type; i++)
 218                 start += lfs_res_qty[i];
 219         while (r == NULL) {
 220                 for (i = 0; i < lfs_res_qty[type]; i++) {
 221                         if (fs->lfs_resblk[start + i].inuse == 0) {
 222                                 re = fs->lfs_resblk + start + i;
 223                                 re->inuse = 1;
 224                                 r = re->p;
 225                                 KASSERT(re->size >= size);
 226                                 h = lfs_mhash(r);
 227                                 s = splbio();
 228                                 LIST_INSERT_HEAD(&fs->lfs_reshash[h], re, res);
 229                                 splx(s);
 230                                 mutex_exit(&lfs_lock);
 231                                 return r;
 232                         }
 233                 }
 234                 DLOG((DLOG_MALLOC, "sleeping on %s (%d)\n",
 235                       lfs_res_names[type], lfs_res_qty[type]));
 236                 mtsleep(&fs->lfs_resblk, PVM, "lfs_malloc", 0,
 237                         &lfs_lock);
 238                 DLOG((DLOG_MALLOC, "done sleeping on %s\n",
 239                       lfs_res_names[type]));
 240         }
 241         /* NOTREACHED */
 242         mutex_exit(&lfs_lock);
 243         return r;
 244 }
 245
 246 void
 247 lfs_free(struct lfs *fs, void *p, int type)
 248 {
 249         int s;
 250         unsigned int h;
 251         res_t *re;
 252 #ifdef DEBUG
 253         int i;
 254 #endif
 255
 256         ASSERT_MAYBE_SEGLOCK(fs);
 257         h = lfs_mhash(p);
 258         mutex_enter(&lfs_lock);
 259         s = splbio();
 260         LIST_FOREACH(re, &fs->lfs_reshash[h], res) {
 261                 if (re->p == p) {
 262                         KASSERT(re->inuse == 1);
 263                         LIST_REMOVE(re, res);
 264                         re->inuse = 0;
 265                         wakeup(&fs->lfs_resblk);
 266                         splx(s);
 267                         mutex_exit(&lfs_lock);
 268                         return;
 269                 }
 270         }
 271 #ifdef DEBUG
 272         for (i = 0; i < LFS_N_TOTAL; i++) {
 273                 if (fs->lfs_resblk[i].p == p)
 274                         panic("lfs_free: inconsistent reserved block");
 275         }
 276 #endif
 277         splx(s);
 278         mutex_exit(&lfs_lock);
 279
 280         /*
 281          * If we didn't find it, free it.
 282          */
 283         free(p, M_SEGMENT);
 284 }
 285
 286 /*
 287  * lfs_seglock --
 288  *      Single thread the segment writer.
 289  */
 290 int
 291 lfs_seglock(struct lfs *fs, unsigned long flags)
 292 {
 293         struct segment *sp;
 294
 295         mutex_enter(&lfs_lock);
 296         if (fs->lfs_seglock) {
 297                 if (fs->lfs_lockpid == curproc->p_pid &&
 298                     fs->lfs_locklwp == curlwp->l_lid) {
 299                         ++fs->lfs_seglock;
 300                         fs->lfs_sp->seg_flags |= flags;
 301                         mutex_exit(&lfs_lock);
 302                         return 0;
 303                 } else if (flags & SEGM_PAGEDAEMON) {
 304                         mutex_exit(&lfs_lock);
 305                         return EWOULDBLOCK;
 306                 } else {
 307                         while (fs->lfs_seglock) {
 308                                 (void)mtsleep(&fs->lfs_seglock, PRIBIO + 1,
 309                                         "lfs_seglock", 0, &lfs_lock);
 310                         }
 311                 }
 312         }
 313
 314         fs->lfs_seglock = 1;
 315         fs->lfs_lockpid = curproc->p_pid;
 316         fs->lfs_locklwp = curlwp->l_lid;
 317         mutex_exit(&lfs_lock);
 318         fs->lfs_cleanind = 0;
 319
 320 #ifdef DEBUG
 321         LFS_ENTER_LOG("seglock", __FILE__, __LINE__, 0, flags, curproc->p_pid);
 322 #endif
 323         /* Drain fragment size changes out */
 324         rw_enter(&fs->lfs_fraglock, RW_WRITER);
 325
 326         sp = fs->lfs_sp = pool_get(&fs->lfs_segpool, PR_WAITOK);
 327         sp->bpp = pool_get(&fs->lfs_bpppool, PR_WAITOK);
 328         sp->seg_flags = flags;
 329         sp->vp = NULL;
 330         sp->seg_iocount = 0;
 331         (void) lfs_initseg(fs);
 332
 333         /*
 334          * Keep a cumulative count of the outstanding I/O operations.  If the
 335          * disk drive catches up with us it could go to zero before we finish,
 336          * so we artificially increment it by one until we've scheduled all of
 337          * the writes we intend to do.
 338          */
 339         mutex_enter(&lfs_lock);
 340         ++fs->lfs_iocount;
 341         fs->lfs_startseg = lfs_sb_getcurseg(fs);
 342         mutex_exit(&lfs_lock);
 343         return 0;
 344 }
 345
 346 static void lfs_unmark_dirop(struct lfs *);
 347
 348 static void
 349 lfs_unmark_dirop(struct lfs *fs)
 350 {
 351         struct inode *ip, *nip;
 352         struct vnode *vp;
 353         int doit;
 354
 355         ASSERT_NO_SEGLOCK(fs);
 356         mutex_enter(&lfs_lock);
 357         doit = !(fs->lfs_flags & LFS_UNDIROP);
 358         if (doit)
 359                 fs->lfs_flags |= LFS_UNDIROP;
 360         if (!doit) {
 361                 mutex_exit(&lfs_lock);
 362                 return;
 363         }
 364
 365         for (ip = TAILQ_FIRST(&fs->lfs_dchainhd); ip != NULL; ip = nip) {
 366                 nip = TAILQ_NEXT(ip, i_lfs_dchain);
 367                 vp = ITOV(ip);
 368                 if ((ip->i_flag & (IN_ADIROP | IN_CDIROP)) == IN_CDIROP) {
 369                         --lfs_dirvcount;
 370                         --fs->lfs_dirvcount;
 371                         vp->v_uflag &= ~VU_DIROP;
 372                         TAILQ_REMOVE(&fs->lfs_dchainhd, ip, i_lfs_dchain);
 373                         wakeup(&lfs_dirvcount);
 374                         fs->lfs_unlockvp = vp;
 375                         mutex_exit(&lfs_lock);
 376                         vrele(vp);
 377                         mutex_enter(&lfs_lock);
 378                         fs->lfs_unlockvp = NULL;
 379                         ip->i_flag &= ~IN_CDIROP;
 380                 }
 381         }
 382
 383         fs->lfs_flags &= ~LFS_UNDIROP;
 384         wakeup(&fs->lfs_flags);
 385         mutex_exit(&lfs_lock);
 386 }
 387
 388 static void
 389 lfs_auto_segclean(struct lfs *fs)
 390 {
 391         int i, error, s, waited;
 392
 393         ASSERT_SEGLOCK(fs);
 394         /*
 395          * Now that we've swapped lfs_activesb, but while we still
 396          * hold the segment lock, run through the segment list marking
 397          * the empty ones clean.
 398          * XXX - do we really need to do them all at once?
 399          */
 400         waited = 0;
 401         for (i = 0; i < lfs_sb_getnseg(fs); i++) {
 402                 if ((fs->lfs_suflags[0][i] &
 403                      (SEGUSE_ACTIVE | SEGUSE_DIRTY | SEGUSE_EMPTY)) ==
 404                     (SEGUSE_DIRTY | SEGUSE_EMPTY) &&
 405                     (fs->lfs_suflags[1][i] &
 406                      (SEGUSE_ACTIVE | SEGUSE_DIRTY | SEGUSE_EMPTY)) ==
 407                     (SEGUSE_DIRTY | SEGUSE_EMPTY)) {
 408
 409                         /* Make sure the sb is written before we clean */
 410                         mutex_enter(&lfs_lock);
 411                         s = splbio();
 412                         while (waited == 0 && fs->lfs_sbactive)
 413                                 mtsleep(&fs->lfs_sbactive, PRIBIO+1, "lfs asb",
 414                                         0, &lfs_lock);
 415                         splx(s);
 416                         mutex_exit(&lfs_lock);
 417                         waited = 1;
 418
 419                         if ((error = lfs_do_segclean(fs, i)) != 0) {
 420                                 DLOG((DLOG_CLEAN, "lfs_auto_segclean: lfs_do_segclean returned %d for seg %d\n", error, i));
 421                         }
 422                 }
 423                 fs->lfs_suflags[1 - fs->lfs_activesb][i] =
 424                         fs->lfs_suflags[fs->lfs_activesb][i];
 425         }
 426 }
 427
 428 /*
 429  * lfs_segunlock --
 430  *      Single thread the segment writer.
 431  */
 432 void
 433 lfs_segunlock(struct lfs *fs)
 434 {
 435         struct segment *sp;
 436         unsigned long sync, ckp;
 437         struct buf *bp;
 438         int do_unmark_dirop = 0;
 439
 440         sp = fs->lfs_sp;
 441
 442         mutex_enter(&lfs_lock);
 443         KASSERT(LFS_SEGLOCK_HELD(fs));
 444         if (fs->lfs_seglock == 1) {
 445                 if ((sp->seg_flags & (SEGM_PROT | SEGM_CLEAN)) == 0)
 446                         do_unmark_dirop = 1;
 447                 mutex_exit(&lfs_lock);
 448                 sync = sp->seg_flags & SEGM_SYNC;
 449                 ckp = sp->seg_flags & SEGM_CKP;
 450
 451                 /* We should have a segment summary, and nothing else */
 452                 KASSERT(sp->cbpp == sp->bpp + 1);
 453
 454                 /* Free allocated segment summary */
 455                 lfs_sb_suboffset(fs, lfs_btofsb(fs, lfs_sb_getsumsize(fs)));
 456                 bp = *sp->bpp;
 457                 lfs_freebuf(fs, bp);
 458
 459                 pool_put(&fs->lfs_bpppool, sp->bpp);
 460                 sp->bpp = NULL;
 461
 462                 /*
 463                  * If we're not sync, we're done with sp, get rid of it.
 464                  * Otherwise, we keep a local copy around but free
 465                  * fs->lfs_sp so another process can use it (we have to
 466                  * wait but they don't have to wait for us).
 467                  */
 468                 if (!sync)
 469                         pool_put(&fs->lfs_segpool, sp);
 470                 fs->lfs_sp = NULL;
 471
 472                 /*
 473                  * If the I/O count is non-zero, sleep until it reaches zero.
 474                  * At the moment, the user's process hangs around so we can
 475                  * sleep.
 476                  */
 477                 mutex_enter(&lfs_lock);
 478                 if (--fs->lfs_iocount == 0) {
 479                         LFS_DEBUG_COUNTLOCKED("lfs_segunlock");
 480                 }
 481                 if (fs->lfs_iocount <= 1)
 482                         wakeup(&fs->lfs_iocount);
 483                 mutex_exit(&lfs_lock);
 484                 /*
 485                  * If we're not checkpointing, we don't have to block
 486                  * other processes to wait for a synchronous write
 487                  * to complete.
 488                  */
 489                 if (!ckp) {
 490 #ifdef DEBUG
 491                         LFS_ENTER_LOG("segunlock_std", __FILE__, __LINE__, 0, 0, curproc->p_pid);
 492 #endif
 493                         mutex_enter(&lfs_lock);
 494                         --fs->lfs_seglock;
 495                         fs->lfs_lockpid = 0;
 496                         fs->lfs_locklwp = 0;
 497                         mutex_exit(&lfs_lock);
 498                         wakeup(&fs->lfs_seglock);
 499                 }
 500                 /*
 501                  * We let checkpoints happen asynchronously.  That means
 502                  * that during recovery, we have to roll forward between
 503                  * the two segments described by the first and second
 504                  * superblocks to make sure that the checkpoint described
 505                  * by a superblock completed.
 506                  */
 507                 mutex_enter(&lfs_lock);
 508                 while (ckp && sync && fs->lfs_iocount) {
 509                         (void)mtsleep(&fs->lfs_iocount, PRIBIO + 1,
 510                                       "lfs_iocount", 0, &lfs_lock);
 511                         DLOG((DLOG_SEG, "sleeping on iocount %x == %d\n", fs, fs->lfs_iocount));
 512                 }
 513                 while (sync && sp->seg_iocount) {
 514                         (void)mtsleep(&sp->seg_iocount, PRIBIO + 1,
 515                                      "seg_iocount", 0, &lfs_lock);
 516                         DLOG((DLOG_SEG, "sleeping on iocount %x == %d\n", sp, sp->seg_iocount));
 517                 }
 518                 mutex_exit(&lfs_lock);
 519                 if (sync)
 520                         pool_put(&fs->lfs_segpool, sp);
 521
 522                 if (ckp) {
 523                         fs->lfs_nactive = 0;
 524                         /* If we *know* everything's on disk, write both sbs */
 525                         /* XXX should wait for this one  */
 526                         if (sync)
 527                                 lfs_writesuper(fs, lfs_sb_getsboff(fs, fs->lfs_activesb));
 528                         lfs_writesuper(fs, lfs_sb_getsboff(fs, 1 - fs->lfs_activesb));
 529                         if (!(fs->lfs_ivnode->v_mount->mnt_iflag & IMNT_UNMOUNT)) {
 530                                 lfs_auto_segclean(fs);
 531                                 /* If sync, we can clean the remainder too */
 532                                 if (sync)
 533                                         lfs_auto_segclean(fs);
 534                         }
 535                         fs->lfs_activesb = 1 - fs->lfs_activesb;
 536 #ifdef DEBUG
 537                         LFS_ENTER_LOG("segunlock_ckp", __FILE__, __LINE__, 0, 0, curproc->p_pid);
 538 #endif
 539                         mutex_enter(&lfs_lock);
 540                         --fs->lfs_seglock;
 541                         fs->lfs_lockpid = 0;
 542                         fs->lfs_locklwp = 0;
 543                         mutex_exit(&lfs_lock);
 544                         wakeup(&fs->lfs_seglock);
 545                 }
 546                 /* Reenable fragment size changes */
 547                 rw_exit(&fs->lfs_fraglock);
 548                 if (do_unmark_dirop)
 549                         lfs_unmark_dirop(fs);
 550         } else if (fs->lfs_seglock == 0) {
 551                 mutex_exit(&lfs_lock);
 552                 panic ("Seglock not held");
 553         } else {
 554                 --fs->lfs_seglock;
 555                 mutex_exit(&lfs_lock);
 556         }
 557 }
 558
 559 /*
 560  * Drain dirops and start writer.
 561  *
 562  * No simple_locks are held when we enter and none are held when we return.
 563  */
 564 int
 565 lfs_writer_enter(struct lfs *fs, const char *wmesg)
 566 {
 567         int error = 0;
 568
 569         ASSERT_MAYBE_SEGLOCK(fs);
 570         mutex_enter(&lfs_lock);
 571
 572         /* disallow dirops during flush */
 573         fs->lfs_writer++;
 574
 575         while (fs->lfs_dirops > 0) {
 576                 ++fs->lfs_diropwait;
 577                 error = mtsleep(&fs->lfs_writer, PRIBIO+1, wmesg, 0,
 578                                 &lfs_lock);
 579                 --fs->lfs_diropwait;
 580         }
 581
 582         if (error)
 583                 fs->lfs_writer--;
 584
 585         mutex_exit(&lfs_lock);
 586
 587         return error;
 588 }
 589
 590 void
 591 lfs_writer_leave(struct lfs *fs)
 592 {
 593         bool dowakeup;
 594
 595         ASSERT_MAYBE_SEGLOCK(fs);
 596         mutex_enter(&lfs_lock);
 597         dowakeup = !(--fs->lfs_writer);
 598         mutex_exit(&lfs_lock);
 599         if (dowakeup)
 600                 wakeup(&fs->lfs_dirops);
 601 }
 602
 603 /*
 604  * Unlock, wait for the cleaner, then relock to where we were before.
 605  * To be used only at a fairly high level, to address a paucity of free
 606  * segments propagated back from lfs_gop_write().
 607  */
 608 void
 609 lfs_segunlock_relock(struct lfs *fs)
 610 {
 611         int n = fs->lfs_seglock;
 612         u_int16_t seg_flags;
 613         CLEANERINFO *cip;
 614         struct buf *bp;
 615
 616         if (n == 0)
 617                 return;
 618
 619         /* Write anything we've already gathered to disk */
 620         lfs_writeseg(fs, fs->lfs_sp);
 621
 622         /* Tell cleaner */
 623         LFS_CLEANERINFO(cip, fs, bp);
 624         lfs_ci_setflags(fs, cip,
 625                         lfs_ci_getflags(fs, cip) | LFS_CLEANER_MUST_CLEAN);
 626         LFS_SYNC_CLEANERINFO(cip, fs, bp, 1);
 627
 628         /* Save segment flags for later */
 629         seg_flags = fs->lfs_sp->seg_flags;
 630
 631         fs->lfs_sp->seg_flags |= SEGM_PROT; /* Don't unmark dirop nodes */
 632         while(fs->lfs_seglock)
 633                 lfs_segunlock(fs);
 634
 635         /* Wait for the cleaner */
 636         lfs_wakeup_cleaner(fs);
 637         mutex_enter(&lfs_lock);
 638         while (LFS_STARVED_FOR_SEGS(fs))
 639                 mtsleep(&fs->lfs_availsleep, PRIBIO, "relock", 0,
 640                         &lfs_lock);
 641         mutex_exit(&lfs_lock);
 642
 643         /* Put the segment lock back the way it was. */
 644         while(n--)
 645                 lfs_seglock(fs, seg_flags);
 646
 647         /* Cleaner can relax now */
 648         LFS_CLEANERINFO(cip, fs, bp);
 649         lfs_ci_setflags(fs, cip,
 650                         lfs_ci_getflags(fs, cip) & ~LFS_CLEANER_MUST_CLEAN);
 651         LFS_SYNC_CLEANERINFO(cip, fs, bp, 1);
 652
 653         return;
 654 }
 655
 656 /*
 657  * Wake up the cleaner, provided that nowrap is not set.
 658  */
 659 void
 660 lfs_wakeup_cleaner(struct lfs *fs)
 661 {
 662         if (fs->lfs_nowrap > 0)
 663                 return;
 664
 665         wakeup(&fs->lfs_nextsegsleep);
 666         wakeup(&lfs_allclean_wakeup);
 667 }