dmake: do not set MAKEFLAGS=k
[unleashed/tickless.git] / kernel / fs / ufs / ufs_subr.c
blobd7f620fe9a3a2241537b10da810465e7e39294a6
1 /*
2 * CDDL HEADER START
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
19 * CDDL HEADER END
22 * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
23 * Use is subject to license terms.
26 /* Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T */
27 /* All Rights Reserved */
30 * University Copyright- Copyright (c) 1982, 1986, 1988
31 * The Regents of the University of California
32 * All Rights Reserved
34 * University Acknowledgment- Portions of this document are derived from
35 * software developed by the University of California, Berkeley, and its
36 * contributors.
39 #include <sys/types.h>
40 #include <sys/t_lock.h>
41 #include <sys/param.h>
42 #include <sys/time.h>
43 #include <sys/fs/ufs_fs.h>
44 #include <sys/cmn_err.h>
46 #ifdef _KERNEL
48 #include <sys/systm.h>
49 #include <sys/sysmacros.h>
50 #include <sys/buf.h>
51 #include <sys/conf.h>
52 #include <sys/user.h>
53 #include <sys/var.h>
54 #include <sys/vfs.h>
55 #include <sys/vnode.h>
56 #include <sys/proc.h>
57 #include <sys/debug.h>
58 #include <sys/fssnap_if.h>
59 #include <sys/fs/ufs_inode.h>
60 #include <sys/fs/ufs_trans.h>
61 #include <sys/fs/ufs_panic.h>
62 #include <sys/fs/ufs_bio.h>
63 #include <sys/fs/ufs_log.h>
64 #include <sys/kmem.h>
65 #include <sys/policy.h>
66 #include <vm/hat.h>
67 #include <vm/as.h>
68 #include <vm/seg.h>
69 #include <vm/pvn.h>
70 #include <vm/seg_map.h>
71 #include <sys/swap.h>
72 #include <vm/seg_kmem.h>
74 #else /* _KERNEL */
76 #define ASSERT(x) /* don't use asserts for fsck et al */
78 #endif /* _KERNEL */
80 #ifdef _KERNEL
83 * Used to verify that a given entry on the ufs_instances list (see below)
84 * still refers to a mounted file system.
86 * XXX: This is a crock that substitutes for proper locking to coordinate
87 * updates to and uses of the entries in ufs_instances.
89 struct check_node {
90 struct vfs *vfsp;
91 struct ufsvfs *ufsvfs;
92 dev_t vfs_dev;
95 static vfs_t *still_mounted(struct check_node *);
98 * All ufs file system instances are linked together into a list starting at
99 * ufs_instances. The list is updated as part of mount and unmount. It's
100 * consulted in ufs_update, to allow syncing out all ufs file system instances
101 * in a batch.
103 * ufsvfs_mutex guards access to this list and to the {,old}ufsvfslist
104 * manipulated in ufs_funmount_cleanup. (A given ufs instance is always on
105 * exactly one of these lists except while it's being allocated or
106 * deallocated.)
108 struct ufsvfs *ufs_instances;
109 extern kmutex_t ufsvfs_mutex; /* XXX: move this to ufs_inode.h? */
112 * ufsvfs list manipulation routines
116 * Link ufsp in at the head of the list of ufs_instances.
118 void
119 ufs_vfs_add(struct ufsvfs *ufsp)
121 mutex_enter(&ufsvfs_mutex);
122 ufsp->vfs_next = ufs_instances;
123 ufs_instances = ufsp;
124 mutex_exit(&ufsvfs_mutex);
128 * Remove ufsp from the list of ufs_instances.
130 * Does no error checking; ufsp is assumed to actually be on the list.
132 void
133 ufs_vfs_remove(struct ufsvfs *ufsp)
135 struct ufsvfs **delpt = &ufs_instances;
137 mutex_enter(&ufsvfs_mutex);
138 for (; *delpt != NULL; delpt = &((*delpt)->vfs_next)) {
139 if (*delpt == ufsp) {
140 *delpt = ufsp->vfs_next;
141 ufsp->vfs_next = NULL;
142 break;
145 mutex_exit(&ufsvfs_mutex);
149 * Clean up state resulting from a forcible unmount that couldn't be handled
150 * directly during the unmount. (See commentary in the unmount code for more
151 * info.)
153 static void
154 ufs_funmount_cleanup()
156 struct ufsvfs *ufsvfsp;
157 extern struct ufsvfs *oldufsvfslist, *ufsvfslist;
160 * Assumption: it's now safe to blow away the entries on
161 * oldufsvfslist.
163 mutex_enter(&ufsvfs_mutex);
164 while ((ufsvfsp = oldufsvfslist) != NULL) {
165 oldufsvfslist = ufsvfsp->vfs_next;
167 mutex_destroy(&ufsvfsp->vfs_lock);
168 kmem_free(ufsvfsp, sizeof (struct ufsvfs));
171 * Rotate more recent unmount entries into place in preparation for
172 * the next time around.
174 oldufsvfslist = ufsvfslist;
175 ufsvfslist = NULL;
176 mutex_exit(&ufsvfs_mutex);
181 * ufs_update performs the ufs part of `sync'. It goes through the disk
182 * queues to initiate sandbagged IO; goes through the inodes to write
183 * modified nodes; and it goes through the mount table to initiate
184 * the writing of the modified super blocks.
186 extern time_t time;
187 time_t ufs_sync_time;
188 time_t ufs_sync_time_secs = 1;
190 extern kmutex_t ufs_scan_lock;
192 void
193 ufs_update(int flag)
195 struct vfs *vfsp;
196 struct fs *fs;
197 struct ufsvfs *ufsp;
198 struct ufsvfs *ufsnext;
199 struct ufsvfs *update_list = NULL;
200 int check_cnt = 0;
201 size_t check_size;
202 struct check_node *check_list, *ptr;
203 int cheap = flag & SYNC_ATTR;
206 * This is a hack. A design flaw in the forced unmount protocol
207 * could allow a thread to attempt to use a kmem_freed ufsvfs
208 * structure in ufs_lockfs_begin/ufs_check_lockfs. This window
209 * is difficult to hit, even during the lockfs stress tests.
210 * So the hacky fix is to wait awhile before kmem_free'ing the
211 * ufsvfs structures for forcibly unmounted file systems. `Awhile'
212 * is defined as every other call from fsflush (~60 seconds).
214 if (cheap)
215 ufs_funmount_cleanup();
218 * Examine all ufsvfs structures and add those that we can lock to the
219 * update list. This is so that we don't hold the list lock for a
220 * long time. If vfs_lock fails for a file system instance, then skip
221 * it because somebody is doing a unmount on it.
223 mutex_enter(&ufsvfs_mutex);
224 for (ufsp = ufs_instances; ufsp != NULL; ufsp = ufsp->vfs_next) {
225 vfsp = ufsp->vfs_vfs;
226 if (vfs_lock(vfsp) != 0)
227 continue;
228 ufsp->vfs_wnext = update_list;
229 update_list = ufsp;
230 check_cnt++;
232 mutex_exit(&ufsvfs_mutex);
234 if (update_list == NULL)
235 return;
237 check_size = sizeof (struct check_node) * check_cnt;
238 check_list = ptr = kmem_alloc(check_size, KM_NOSLEEP);
241 * Write back modified superblocks.
242 * Consistency check that the superblock of
243 * each file system is still in the buffer cache.
245 * Note that the update_list traversal is done without the protection
246 * of an overall list lock, so it's necessary to rely on the fact that
247 * each entry of the list is vfs_locked when moving from one entry to
248 * the next. This works because a concurrent attempt to add an entry
249 * to another thread's update_list won't find it, since it'll already
250 * be locked.
252 check_cnt = 0;
253 for (ufsp = update_list; ufsp != NULL; ufsp = ufsnext) {
255 * Need to grab the next ptr before we unlock this one so
256 * another thread doesn't grab it and change it before we move
257 * on to the next vfs. (Once we unlock it, it's ok if another
258 * thread finds it to add it to its own update_list; we don't
259 * attempt to refer to it through our list any more.)
261 ufsnext = ufsp->vfs_wnext;
262 vfsp = ufsp->vfs_vfs;
265 * Seems like this can't happen, so perhaps it should become
266 * an ASSERT(vfsp->vfs_data != NULL).
268 if (!vfsp->vfs_data) {
269 vfs_unlock(vfsp);
270 continue;
273 fs = ufsp->vfs_fs;
276 * don't update a locked superblock during a panic; it
277 * may be in an inconsistent state
279 if (panicstr) {
280 if (!mutex_tryenter(&ufsp->vfs_lock)) {
281 vfs_unlock(vfsp);
282 continue;
284 } else
285 mutex_enter(&ufsp->vfs_lock);
287 * Build up the STABLE check list, so we can unlock the vfs
288 * until we do the actual checking.
290 if (check_list != NULL) {
291 if ((fs->fs_ronly == 0) &&
292 (fs->fs_clean != FSBAD) &&
293 (fs->fs_clean != FSSUSPEND)) {
294 ptr->vfsp = vfsp;
295 ptr->ufsvfs = ufsp;
296 ptr->vfs_dev = vfsp->vfs_dev;
297 ptr++;
298 check_cnt++;
303 * superblock is not modified
305 if (fs->fs_fmod == 0) {
306 mutex_exit(&ufsp->vfs_lock);
307 vfs_unlock(vfsp);
308 continue;
310 if (fs->fs_ronly != 0) {
311 mutex_exit(&ufsp->vfs_lock);
312 vfs_unlock(vfsp);
313 (void) ufs_fault(ufsp->vfs_root,
314 "fs = %s update: ro fs mod\n", fs->fs_fsmnt);
316 * XXX: Why is this a return instead of a continue?
317 * This may be an attempt to replace a panic with
318 * something less drastic, but there's cleanup we
319 * should be doing that's not being done (e.g.,
320 * unlocking the remaining entries on the list).
322 return;
324 fs->fs_fmod = 0;
325 mutex_exit(&ufsp->vfs_lock);
326 TRANS_SBUPDATE(ufsp, vfsp, TOP_SBUPDATE_UPDATE);
327 vfs_unlock(vfsp);
330 ufs_sync_time = time;
333 * Avoid racing with ufs_unmount() and ufs_sync().
335 mutex_enter(&ufs_scan_lock);
337 (void) ufs_scan_inodes(1, ufs_sync_inode, (void *)(uintptr_t)cheap,
338 NULL);
340 mutex_exit(&ufs_scan_lock);
343 * Force stale buffer cache information to be flushed,
344 * for all devices. This should cause any remaining control
345 * information (e.g., cg and inode info) to be flushed back.
347 bflush((dev_t)NODEV);
349 if (check_list == NULL)
350 return;
353 * For each UFS filesystem in the STABLE check_list, update
354 * the clean flag if warranted.
356 for (ptr = check_list; check_cnt > 0; check_cnt--, ptr++) {
357 int error;
360 * still_mounted() returns with vfsp and the vfs_reflock
361 * held if ptr refers to a vfs that is still mounted.
363 if ((vfsp = still_mounted(ptr)) == NULL)
364 continue;
365 ufs_checkclean(vfsp);
367 * commit any outstanding async transactions
369 ufsp = (struct ufsvfs *)vfsp->vfs_data;
370 curthread->t_flag |= T_DONTBLOCK;
371 TRANS_BEGIN_SYNC(ufsp, TOP_COMMIT_UPDATE, TOP_COMMIT_SIZE,
372 &error);
373 if (!error) {
374 TRANS_END_SYNC(ufsp, &error, TOP_COMMIT_UPDATE,
375 TOP_COMMIT_SIZE);
377 curthread->t_flag &= ~T_DONTBLOCK;
379 vfs_unlock(vfsp);
382 kmem_free(check_list, check_size);
386 ufs_sync_inode(struct inode *ip, void *arg)
388 int cheap = (int)(uintptr_t)arg;
389 struct ufsvfs *ufsvfsp;
390 uint_t flag = ip->i_flag;
392 if (cheap && ((flag & (IUPD|IACC|ICHG|IMOD|IMODACC|IATTCHG)) == 0))
393 return (0);
396 * if we are panic'ing; then don't update the inode if this
397 * file system is FSSTABLE. Otherwise, we would have to
398 * force the superblock to FSACTIVE and the superblock
399 * may not be in a good state. Also, if the inode is
400 * IREF'ed then it may be in an inconsistent state. Don't
401 * push it. Finally, don't push the inode if the fs is
402 * logging; the transaction will be discarded at boot.
404 if (panicstr) {
406 if (flag & IREF)
407 return (0);
409 if (ip->i_ufsvfs == NULL ||
410 (ip->i_fs->fs_clean == FSSTABLE ||
411 ip->i_fs->fs_clean == FSLOG))
412 return (0);
415 ufsvfsp = ip->i_ufsvfs;
418 * Limit access time only updates
420 if (((flag & (IMOD|IMODACC|IUPD|ICHG|IACC)) == IMODACC) && ufsvfsp) {
422 * if file system has deferred access time turned on and there
423 * was no IO recently, don't bother flushing it. It will be
424 * flushed when I/Os start again.
426 if (cheap && (ufsvfsp->vfs_dfritime & UFS_DFRATIME) &&
427 (ufsvfsp->vfs_iotstamp + ufs_iowait < ddi_get_lbolt()))
428 return (0);
430 * an app issueing a sync() can take forever on a trans device
431 * when NetWorker or find is running because all of the
432 * directorys' access times have to be updated. So, we limit
433 * the time we spend updating access times per sync.
435 if (TRANS_ISTRANS(ufsvfsp) && ((ufs_sync_time +
436 ufs_sync_time_secs) < time))
437 return (0);
441 * if we are running on behalf of the flush thread or this is
442 * a swap file, then simply do a delay update of the inode.
443 * Otherwise, push the pages and then do a delayed inode update.
445 if (cheap || IS_SWAPVP(ITOV(ip))) {
446 TRANS_IUPDAT(ip, 0);
447 } else {
448 (void) TRANS_SYNCIP(ip, B_ASYNC, I_ASYNC, TOP_SYNCIP_SYNC);
450 return (0);
454 * Flush all the pages associated with an inode using the given 'flags',
455 * then force inode information to be written back using the given 'waitfor'.
458 ufs_syncip(struct inode *ip, int flags, int waitfor, top_t topid)
460 int error;
461 struct vnode *vp = ITOV(ip);
462 struct ufsvfs *ufsvfsp = ip->i_ufsvfs;
463 int dotrans = 0;
466 * Return if file system has been forcibly umounted.
468 if (ufsvfsp == NULL)
469 return (EIO);
471 * don't need to fop_putpage if there are no pages
473 if (!vn_has_cached_data(vp) || vp->v_type == VCHR) {
474 error = 0;
475 } else {
477 * if the inode we're working on is a shadow inode
478 * or quota inode we need to make sure that the
479 * ufs_putpage call is inside a transaction as this
480 * could include meta data changes.
482 if ((ip->i_mode & IFMT) == IFSHAD ||
483 ufsvfsp->vfs_qinod == ip) {
484 dotrans = 1;
485 curthread->t_flag |= T_DONTBLOCK;
486 TRANS_BEGIN_ASYNC(ufsvfsp, TOP_PUTPAGE,
487 TOP_PUTPAGE_SIZE(ip));
489 error = fop_putpage(vp, (offset_t)0, (size_t)0,
490 flags, CRED(), NULL);
491 if (dotrans) {
492 TRANS_END_ASYNC(ufsvfsp, TOP_PUTPAGE,
493 TOP_PUTPAGE_SIZE(ip));
494 curthread->t_flag &= ~T_DONTBLOCK;
495 dotrans = 0;
498 if (panicstr && TRANS_ISTRANS(ufsvfsp))
499 goto out;
501 * waitfor represents two things -
502 * 1. whether data sync or file sync.
503 * 2. if file sync then ufs_iupdat should 'waitfor' disk i/o or not.
505 if (waitfor == I_DSYNC) {
507 * If data sync, only IATTCHG (size/block change) requires
508 * inode update, fdatasync()/FDSYNC implementation.
510 if (ip->i_flag & (IBDWRITE|IATTCHG)) {
512 * Enter a transaction to provide mutual exclusion
513 * with deltamap_push and avoid a race where
514 * the inode flush could get dropped.
516 if ((curthread->t_flag & T_DONTBLOCK) == 0) {
517 dotrans = 1;
518 curthread->t_flag |= T_DONTBLOCK;
519 TRANS_BEGIN_ASYNC(ufsvfsp, topid,
520 TOP_SYNCIP_SIZE);
522 rw_enter(&ip->i_contents, RW_READER);
523 mutex_enter(&ip->i_tlock);
524 ip->i_flag &= ~IMODTIME;
525 mutex_exit(&ip->i_tlock);
526 ufs_iupdat(ip, 1);
527 rw_exit(&ip->i_contents);
528 if (dotrans) {
529 TRANS_END_ASYNC(ufsvfsp, topid,
530 TOP_SYNCIP_SIZE);
531 curthread->t_flag &= ~T_DONTBLOCK;
534 } else {
535 /* For file sync, any inode change requires inode update */
536 if (ip->i_flag & (IBDWRITE|IUPD|IACC|ICHG|IMOD|IMODACC)) {
538 * Enter a transaction to provide mutual exclusion
539 * with deltamap_push and avoid a race where
540 * the inode flush could get dropped.
542 if ((curthread->t_flag & T_DONTBLOCK) == 0) {
543 dotrans = 1;
544 curthread->t_flag |= T_DONTBLOCK;
545 TRANS_BEGIN_ASYNC(ufsvfsp, topid,
546 TOP_SYNCIP_SIZE);
548 rw_enter(&ip->i_contents, RW_READER);
549 mutex_enter(&ip->i_tlock);
550 ip->i_flag &= ~IMODTIME;
551 mutex_exit(&ip->i_tlock);
552 ufs_iupdat(ip, waitfor);
553 rw_exit(&ip->i_contents);
554 if (dotrans) {
555 TRANS_END_ASYNC(ufsvfsp, topid,
556 TOP_SYNCIP_SIZE);
557 curthread->t_flag &= ~T_DONTBLOCK;
562 out:
563 return (error);
566 * Flush all indirect blocks related to an inode.
567 * Supports triple indirect blocks also.
570 ufs_sync_indir(struct inode *ip)
572 int i;
573 daddr_t blkno;
574 daddr_t lbn; /* logical blkno of last blk in file */
575 daddr_t clbn; /* current logical blk */
576 daddr32_t *bap;
577 struct fs *fs;
578 struct buf *bp;
579 int bsize;
580 struct ufsvfs *ufsvfsp;
581 int j;
582 daddr_t indirect_blkno;
583 daddr32_t *indirect_bap;
584 struct buf *indirect_bp;
586 ufsvfsp = ip->i_ufsvfs;
588 * unnecessary when logging; allocation blocks are kept up-to-date
590 if (TRANS_ISTRANS(ufsvfsp))
591 return (0);
593 fs = ufsvfsp->vfs_fs;
594 bsize = fs->fs_bsize;
595 lbn = (daddr_t)lblkno(fs, ip->i_size - 1);
596 if (lbn < NDADDR)
597 return (0); /* No indirect blocks used */
598 if (lbn < NDADDR + NINDIR(fs)) {
599 /* File has one indirect block. */
600 blkflush(ip->i_dev, (daddr_t)fsbtodb(fs, ip->i_ib[0]));
601 return (0);
604 /* Write out all the first level indirect blocks */
605 for (i = 0; i < NIADDR; i++) {
606 if ((blkno = ip->i_ib[i]) == 0)
607 continue;
608 blkflush(ip->i_dev, (daddr_t)fsbtodb(fs, blkno));
610 /* Write out second level of indirect blocks */
611 if ((blkno = ip->i_ib[1]) == 0)
612 return (0);
613 bp = UFS_BREAD(ufsvfsp, ip->i_dev, (daddr_t)fsbtodb(fs, blkno), bsize);
614 if (bp->b_flags & B_ERROR) {
615 brelse(bp);
616 return (EIO);
618 bap = bp->b_un.b_daddr;
619 clbn = NDADDR + NINDIR(fs);
620 for (i = 0; i < NINDIR(fs); i++) {
621 if (clbn > lbn)
622 break;
623 clbn += NINDIR(fs);
624 if ((blkno = bap[i]) == 0)
625 continue;
626 blkflush(ip->i_dev, (daddr_t)fsbtodb(fs, blkno));
629 brelse(bp);
630 /* write out third level indirect blocks */
632 if ((blkno = ip->i_ib[2]) == 0)
633 return (0);
635 bp = UFS_BREAD(ufsvfsp, ip->i_dev, (daddr_t)fsbtodb(fs, blkno), bsize);
636 if (bp->b_flags & B_ERROR) {
637 brelse(bp);
638 return (EIO);
640 bap = bp->b_un.b_daddr;
641 clbn = NDADDR + NINDIR(fs) + (NINDIR(fs) * NINDIR(fs));
643 for (i = 0; i < NINDIR(fs); i++) {
644 if (clbn > lbn)
645 break;
646 if ((indirect_blkno = bap[i]) == 0)
647 continue;
648 blkflush(ip->i_dev, (daddr_t)fsbtodb(fs, indirect_blkno));
649 indirect_bp = UFS_BREAD(ufsvfsp, ip->i_dev,
650 (daddr_t)fsbtodb(fs, indirect_blkno), bsize);
651 if (indirect_bp->b_flags & B_ERROR) {
652 brelse(indirect_bp);
653 brelse(bp);
654 return (EIO);
656 indirect_bap = indirect_bp->b_un.b_daddr;
657 for (j = 0; j < NINDIR(fs); j++) {
658 if (clbn > lbn)
659 break;
660 clbn += NINDIR(fs);
661 if ((blkno = indirect_bap[j]) == 0)
662 continue;
663 blkflush(ip->i_dev, (daddr_t)fsbtodb(fs, blkno));
665 brelse(indirect_bp);
667 brelse(bp);
669 return (0);
673 * Flush all indirect blocks related to an offset of a file.
674 * read/write in sync mode may have to flush indirect blocks.
677 ufs_indirblk_sync(struct inode *ip, offset_t off)
679 daddr_t lbn;
680 struct fs *fs;
681 struct buf *bp;
682 int i, j, shft;
683 daddr_t ob, nb, tbn;
684 daddr32_t *bap;
685 int nindirshift, nindiroffset;
686 struct ufsvfs *ufsvfsp;
688 ufsvfsp = ip->i_ufsvfs;
690 * unnecessary when logging; allocation blocks are kept up-to-date
692 if (TRANS_ISTRANS(ufsvfsp))
693 return (0);
695 fs = ufsvfsp->vfs_fs;
697 lbn = (daddr_t)lblkno(fs, off);
698 if (lbn < 0)
699 return (EFBIG);
701 /* The first NDADDR are direct so nothing to do */
702 if (lbn < NDADDR)
703 return (0);
705 nindirshift = ip->i_ufsvfs->vfs_nindirshift;
706 nindiroffset = ip->i_ufsvfs->vfs_nindiroffset;
708 /* Determine level of indirect blocks */
709 shft = 0;
710 tbn = lbn - NDADDR;
711 for (j = NIADDR; j > 0; j--) {
712 longlong_t sh;
714 shft += nindirshift;
715 sh = 1LL << shft;
716 if (tbn < sh)
717 break;
718 tbn -= (daddr_t)sh;
721 if (j == 0)
722 return (EFBIG);
724 if ((nb = ip->i_ib[NIADDR - j]) == 0)
725 return (0); /* UFS Hole */
727 /* Flush first level indirect block */
728 blkflush(ip->i_dev, fsbtodb(fs, nb));
730 /* Fetch through next levels */
731 for (; j < NIADDR; j++) {
732 ob = nb;
733 bp = UFS_BREAD(ufsvfsp,
734 ip->i_dev, fsbtodb(fs, ob), fs->fs_bsize);
735 if (bp->b_flags & B_ERROR) {
736 brelse(bp);
737 return (EIO);
739 bap = bp->b_un.b_daddr;
740 shft -= nindirshift; /* sh / nindir */
741 i = (tbn >> shft) & nindiroffset; /* (tbn /sh) & nindir */
742 nb = bap[i];
743 brelse(bp);
744 if (nb == 0) {
745 return (0); /* UFS hole */
747 blkflush(ip->i_dev, fsbtodb(fs, nb));
749 return (0);
752 #ifdef DEBUG
755 * The bad block checking routines: ufs_indir_badblock() and ufs_badblock()
756 * are very expensive. It's been found from profiling that we're
757 * spending 6-7% of our time in ufs_badblock, and another 1-2% in
758 * ufs_indir_badblock. They are only called via ASSERTs (from debug kernels).
759 * In addition from experience no failures have been found in recent
760 * years. So the following tunable can be set to enable checking.
762 int ufs_badblock_checks = 0;
765 * Check that a given indirect block contains blocks in range
768 ufs_indir_badblock(struct inode *ip, daddr32_t *bap)
770 int i;
771 int err = 0;
773 if (ufs_badblock_checks) {
774 for (i = 0; i < NINDIR(ip->i_fs) - 1; i++)
775 if (bap[i] != 0 && (err = ufs_badblock(ip, bap[i])))
776 break;
778 return (err);
782 * Check that a specified block number is in range.
785 ufs_badblock(struct inode *ip, daddr_t bn)
787 long c;
788 daddr_t sum;
790 if (!ufs_badblock_checks)
791 return (0);
792 ASSERT(bn);
793 if (bn <= 0 || bn > ip->i_fs->fs_size)
794 return (bn);
796 sum = 0;
797 c = dtog(ip->i_fs, bn);
798 if (c == 0) {
799 sum = howmany(ip->i_fs->fs_cssize, ip->i_fs->fs_fsize);
802 * if block no. is below this cylinder group,
803 * within the space reserved for superblock, inodes, (summary data)
804 * or if it is above this cylinder group
805 * then its invalid
806 * It's hard to see how we'd be outside this cyl, but let's be careful.
808 if ((bn < cgbase(ip->i_fs, c)) ||
809 (bn >= cgsblock(ip->i_fs, c) && bn < cgdmin(ip->i_fs, c)+sum) ||
810 (bn >= (unsigned)cgbase(ip->i_fs, c+1)))
811 return (bn);
813 return (0); /* not a bad block */
816 #endif /* DEBUG */
819 * When i_rwlock is write-locked or has a writer pended, then the inode
820 * is going to change in a way that the filesystem will be marked as
821 * active. So no need to let the filesystem be mark as stable now.
822 * Also to ensure the filesystem consistency during the directory
823 * operations, filesystem cannot be marked as stable if i_rwlock of
824 * the directory inode is write-locked.
828 * Check for busy inodes for this filesystem.
829 * NOTE: Needs better way to do this expensive operation in the future.
831 static void
832 ufs_icheck(struct ufsvfs *ufsvfsp, int *isbusyp, int *isreclaimp)
834 union ihead *ih;
835 struct inode *ip;
836 int i;
837 int isnottrans = !TRANS_ISTRANS(ufsvfsp);
838 int isbusy = *isbusyp;
839 int isreclaim = *isreclaimp;
841 for (i = 0, ih = ihead; i < inohsz; i++, ih++) {
842 mutex_enter(&ih_lock[i]);
843 for (ip = ih->ih_chain[0];
844 ip != (struct inode *)ih;
845 ip = ip->i_forw) {
847 * if inode is busy/modified/deleted, filesystem is busy
849 if (ip->i_ufsvfs != ufsvfsp)
850 continue;
851 if ((ip->i_flag & (IMOD | IUPD | ICHG)) ||
852 (RW_ISWRITER(&ip->i_rwlock)))
853 isbusy = 1;
854 if ((ip->i_nlink <= 0) && (ip->i_flag & IREF))
855 isreclaim = 1;
856 if (isbusy && (isreclaim || isnottrans))
857 break;
859 mutex_exit(&ih_lock[i]);
860 if (isbusy && (isreclaim || isnottrans))
861 break;
863 *isbusyp = isbusy;
864 *isreclaimp = isreclaim;
868 * As part of the ufs 'sync' operation, this routine is called to mark
869 * the filesystem as STABLE if there is no modified metadata in memory.
871 void
872 ufs_checkclean(struct vfs *vfsp)
874 struct ufsvfs *ufsvfsp = (struct ufsvfs *)vfsp->vfs_data;
875 struct fs *fs = ufsvfsp->vfs_fs;
876 int isbusy;
877 int isreclaim;
878 int updatesb;
880 ASSERT(vfs_lock_held(vfsp));
883 * filesystem is stable or cleanflag processing is disabled; do nothing
884 * no transitions when panic'ing
886 if (fs->fs_ronly ||
887 fs->fs_clean == FSBAD ||
888 fs->fs_clean == FSSUSPEND ||
889 fs->fs_clean == FSSTABLE ||
890 panicstr)
891 return;
894 * if logging and nothing to reclaim; do nothing
896 if ((fs->fs_clean == FSLOG) &&
897 (((fs->fs_reclaim & FS_RECLAIM) == 0) ||
898 (fs->fs_reclaim & FS_RECLAIMING)))
899 return;
902 * FS_CHECKCLEAN is reset if the file system goes dirty
903 * FS_CHECKRECLAIM is reset if a file gets deleted
905 mutex_enter(&ufsvfsp->vfs_lock);
906 fs->fs_reclaim |= (FS_CHECKCLEAN | FS_CHECKRECLAIM);
907 mutex_exit(&ufsvfsp->vfs_lock);
909 updatesb = 0;
912 * if logging or buffers are busy; do nothing
914 isbusy = isreclaim = 0;
915 if ((fs->fs_clean == FSLOG) ||
916 (bcheck(vfsp->vfs_dev, ufsvfsp->vfs_bufp)))
917 isbusy = 1;
920 * isreclaim == TRUE means can't change the state of fs_reclaim
922 isreclaim =
923 ((fs->fs_clean == FSLOG) &&
924 (((fs->fs_reclaim & FS_RECLAIM) == 0) ||
925 (fs->fs_reclaim & FS_RECLAIMING)));
928 * if fs is busy or can't change the state of fs_reclaim; do nothing
930 if (isbusy && isreclaim)
931 return;
934 * look for busy or deleted inodes; (deleted == needs reclaim)
936 ufs_icheck(ufsvfsp, &isbusy, &isreclaim);
938 mutex_enter(&ufsvfsp->vfs_lock);
941 * IF POSSIBLE, RESET RECLAIM
944 * the reclaim thread is not running
946 if ((fs->fs_reclaim & FS_RECLAIMING) == 0)
948 * no files were deleted during the scan
950 if (fs->fs_reclaim & FS_CHECKRECLAIM)
952 * no deleted files were found in the inode cache
954 if ((isreclaim == 0) && (fs->fs_reclaim & FS_RECLAIM)) {
955 fs->fs_reclaim &= ~FS_RECLAIM;
956 updatesb = 1;
959 * IF POSSIBLE, SET STABLE
962 * not logging
964 if (fs->fs_clean != FSLOG)
966 * file system has not gone dirty since the scan began
968 if (fs->fs_reclaim & FS_CHECKCLEAN)
970 * nothing dirty was found in the buffer or inode cache
972 if ((isbusy == 0) && (isreclaim == 0) &&
973 (fs->fs_clean != FSSTABLE)) {
974 fs->fs_clean = FSSTABLE;
975 updatesb = 1;
978 mutex_exit(&ufsvfsp->vfs_lock);
979 if (updatesb) {
980 TRANS_SBWRITE(ufsvfsp, TOP_SBWRITE_STABLE);
985 * called whenever an unlink occurs
987 void
988 ufs_setreclaim(struct inode *ip)
990 struct ufsvfs *ufsvfsp = ip->i_ufsvfs;
991 struct fs *fs = ufsvfsp->vfs_fs;
993 if (ip->i_nlink || fs->fs_ronly || (fs->fs_clean != FSLOG))
994 return;
997 * reclaim-needed bit is already set or we need to tell
998 * ufs_checkclean that a file has been deleted
1000 if ((fs->fs_reclaim & (FS_RECLAIM | FS_CHECKRECLAIM)) == FS_RECLAIM)
1001 return;
1003 mutex_enter(&ufsvfsp->vfs_lock);
1005 * inform ufs_checkclean that the file system has gone dirty
1007 fs->fs_reclaim &= ~FS_CHECKRECLAIM;
1010 * set the reclaim-needed bit
1012 if ((fs->fs_reclaim & FS_RECLAIM) == 0) {
1013 fs->fs_reclaim |= FS_RECLAIM;
1014 ufs_sbwrite(ufsvfsp);
1016 mutex_exit(&ufsvfsp->vfs_lock);
1020 * Before any modified metadata written back to the disk, this routine
1021 * is called to mark the filesystem as ACTIVE.
1023 void
1024 ufs_notclean(struct ufsvfs *ufsvfsp)
1026 struct fs *fs = ufsvfsp->vfs_fs;
1028 ASSERT(MUTEX_HELD(&ufsvfsp->vfs_lock));
1029 ULOCKFS_SET_MOD((&ufsvfsp->vfs_ulockfs));
1032 * inform ufs_checkclean that the file system has gone dirty
1034 fs->fs_reclaim &= ~FS_CHECKCLEAN;
1037 * ignore if active or bad or suspended or readonly or logging
1039 if ((fs->fs_clean == FSACTIVE) || (fs->fs_clean == FSLOG) ||
1040 (fs->fs_clean == FSBAD) || (fs->fs_clean == FSSUSPEND) ||
1041 (fs->fs_ronly)) {
1042 mutex_exit(&ufsvfsp->vfs_lock);
1043 return;
1045 fs->fs_clean = FSACTIVE;
1047 * write superblock synchronously
1049 ufs_sbwrite(ufsvfsp);
1050 mutex_exit(&ufsvfsp->vfs_lock);
1054 * ufs specific fbwrite()
1057 ufs_fbwrite(struct fbuf *fbp, struct inode *ip)
1059 struct ufsvfs *ufsvfsp = ip->i_ufsvfs;
1061 if (TRANS_ISTRANS(ufsvfsp))
1062 return (fbwrite(fbp));
1063 mutex_enter(&ufsvfsp->vfs_lock);
1064 ufs_notclean(ufsvfsp);
1065 return ((ufsvfsp->vfs_dio) ? fbdwrite(fbp) : fbwrite(fbp));
1069 * ufs specific fbiwrite()
1072 ufs_fbiwrite(struct fbuf *fbp, struct inode *ip, daddr_t bn, long bsize)
1074 struct ufsvfs *ufsvfsp = ip->i_ufsvfs;
1075 o_mode_t ifmt = ip->i_mode & IFMT;
1076 buf_t *bp;
1077 int error;
1079 mutex_enter(&ufsvfsp->vfs_lock);
1080 ufs_notclean(ufsvfsp);
1081 if (ifmt == IFDIR || ifmt == IFSHAD || ifmt == IFATTRDIR ||
1082 (ip->i_ufsvfs->vfs_qinod == ip)) {
1083 TRANS_DELTA(ufsvfsp, ldbtob(bn * (offset_t)(btod(bsize))),
1084 fbp->fb_count, DT_FBI, 0, 0);
1087 * Inlined version of fbiwrite()
1089 bp = pageio_setup(NULL, fbp->fb_count, ip->i_devvp, B_WRITE);
1090 bp->b_flags &= ~B_PAGEIO;
1091 bp->b_un.b_addr = fbp->fb_addr;
1093 bp->b_blkno = bn * btod(bsize);
1094 bp->b_dev = cmpdev(ip->i_dev); /* store in old dev format */
1095 bp->b_edev = ip->i_dev;
1096 bp->b_proc = NULL; /* i.e. the kernel */
1097 bp->b_file = ip->i_vnode;
1098 bp->b_offset = -1;
1100 if (ufsvfsp->vfs_log) {
1101 lufs_write_strategy(ufsvfsp->vfs_log, bp);
1102 } else if (ufsvfsp->vfs_snapshot) {
1103 fssnap_strategy(&ufsvfsp->vfs_snapshot, bp);
1104 } else {
1105 ufsvfsp->vfs_iotstamp = ddi_get_lbolt();
1106 ub.ub_fbiwrites.value.ul++;
1107 (void) bdev_strategy(bp);
1108 lwp_stat_update(LWP_STAT_OUBLK, 1);
1110 error = biowait(bp);
1111 pageio_done(bp);
1112 fbrelse(fbp, S_OTHER);
1113 return (error);
1117 * Write the ufs superblock only.
1119 void
1120 ufs_sbwrite(struct ufsvfs *ufsvfsp)
1122 char sav_fs_fmod;
1123 struct fs *fs = ufsvfsp->vfs_fs;
1124 struct buf *bp = ufsvfsp->vfs_bufp;
1126 ASSERT(MUTEX_HELD(&ufsvfsp->vfs_lock));
1129 * for ulockfs processing, limit the superblock writes
1131 if ((ufsvfsp->vfs_ulockfs.ul_sbowner) &&
1132 (curthread != ufsvfsp->vfs_ulockfs.ul_sbowner)) {
1133 /* try again later */
1134 fs->fs_fmod = 1;
1135 return;
1138 ULOCKFS_SET_MOD((&ufsvfsp->vfs_ulockfs));
1140 * update superblock timestamp and fs_clean checksum
1141 * if marked FSBAD, we always want an erroneous
1142 * checksum to force repair
1144 fs->fs_time = gethrestime_sec();
1145 fs->fs_state = (fs->fs_clean != FSBAD) ?
1146 FSOKAY - fs->fs_time : -(FSOKAY - fs->fs_time);
1147 switch (fs->fs_clean) {
1148 case FSCLEAN:
1149 case FSSTABLE:
1150 fs->fs_reclaim &= ~FS_RECLAIM;
1151 break;
1152 case FSACTIVE:
1153 case FSSUSPEND:
1154 case FSBAD:
1155 case FSLOG:
1156 break;
1157 default:
1158 fs->fs_clean = FSACTIVE;
1159 break;
1162 * reset incore only bits
1164 fs->fs_reclaim &= ~(FS_CHECKCLEAN | FS_CHECKRECLAIM);
1167 * delta the whole superblock
1169 TRANS_DELTA(ufsvfsp, ldbtob(SBLOCK), sizeof (struct fs),
1170 DT_SB, NULL, 0);
1172 * retain the incore state of fs_fmod; set the ondisk state to 0
1174 sav_fs_fmod = fs->fs_fmod;
1175 fs->fs_fmod = 0;
1178 * Don't release the buffer after written to the disk
1180 UFS_BWRITE2(ufsvfsp, bp);
1181 fs->fs_fmod = sav_fs_fmod; /* reset fs_fmod's incore state */
1185 * Returns vfs pointer if vfs still being mounted. vfs lock is held.
1186 * Otherwise, returns NULL.
1188 * For our purposes, "still mounted" means that the file system still appears
1189 * on the list of UFS file system instances.
1191 static vfs_t *
1192 still_mounted(struct check_node *checkp)
1194 struct vfs *vfsp;
1195 struct ufsvfs *ufsp;
1197 mutex_enter(&ufsvfs_mutex);
1198 for (ufsp = ufs_instances; ufsp != NULL; ufsp = ufsp->vfs_next) {
1199 if (ufsp != checkp->ufsvfs)
1200 continue;
1202 * Tentative match: verify it and try to lock. (It's not at
1203 * all clear how the verification could fail, given that we've
1204 * gotten this far. We would have had to reallocate the
1205 * ufsvfs struct at hand for a new incarnation; is that really
1206 * possible in the interval from constructing the check_node
1207 * to here?)
1209 vfsp = ufsp->vfs_vfs;
1210 if (vfsp != checkp->vfsp)
1211 continue;
1212 if (vfsp->vfs_dev != checkp->vfs_dev)
1213 continue;
1214 if (vfs_lock(vfsp) != 0)
1215 continue;
1217 mutex_exit(&ufsvfs_mutex);
1218 return (vfsp);
1220 mutex_exit(&ufsvfs_mutex);
1221 return (NULL);
1225 ufs_si_io_done(struct buf *bp)
1227 sema_v(&bp->b_io);
1228 return (0);
1231 #define SI_BUFSZ roundup(sizeof (struct cg), DEV_BSIZE)
1232 #define NSIBUF 32
1235 * ufs_construct_si()
1236 * Read each cylinder group in turn and construct the summary information
1238 static int
1239 ufs_construct_si(dev_t dev, struct fs *fs, struct ufsvfs *ufsvfsp)
1241 buf_t *bps, *bp;
1242 char *bufs;
1243 struct csum *sip = fs->fs_u.fs_csp;
1244 struct cg *cgp;
1245 int i, ncg;
1246 int error = 0, cg = 0;
1248 bps = kmem_alloc(NSIBUF * sizeof (buf_t), KM_SLEEP);
1249 bufs = kmem_alloc(NSIBUF * SI_BUFSZ, KM_SLEEP);
1252 * Initialise the buffer headers
1254 for (bp = bps, i = 0; i < NSIBUF; i++, bp++) {
1255 bioinit(bp);
1256 bp->b_iodone = ufs_si_io_done;
1257 bp->b_bufsize = bp->b_bcount = SI_BUFSZ;
1258 bp->b_flags = B_READ;
1259 bp->b_un.b_addr = bufs + (i * SI_BUFSZ);
1260 bp->b_edev = dev;
1264 * Repeat while there are cylinder groups left to read.
1266 do {
1268 * Issue upto NSIBUF asynchronous reads
1270 ncg = MIN(NSIBUF, (fs->fs_ncg - cg));
1271 for (bp = bps, i = 0; i < ncg; i++, bp++) {
1272 bp->b_blkno = (daddr_t)fsbtodb(fs, cgtod(fs, cg + i));
1273 if (ufsvfsp->vfs_log) {
1274 lufs_read_strategy(ufsvfsp->vfs_log, bp);
1275 } else {
1276 (void) bdev_strategy(bp);
1281 * wait for each read to finish;
1282 * check for errors and copy the csum info
1284 for (bp = bps, i = 0; i < ncg; i++, bp++) {
1285 sema_p(&bp->b_io);
1286 if (!error) {
1287 cgp = bp->b_un.b_cg;
1288 sip[cg + i] = cgp->cg_cs;
1289 error = geterror(bp);
1292 if (error) {
1293 goto err;
1295 cg += ncg;
1296 } while (cg < fs->fs_ncg);
1298 err:
1299 kmem_free(bps, NSIBUF * sizeof (buf_t));
1300 kmem_free(bufs, NSIBUF * SI_BUFSZ);
1301 return (error);
1305 * ufs_getsummaryinfo
1308 ufs_getsummaryinfo(dev_t dev, struct ufsvfs *ufsvfsp, struct fs *fs)
1310 int i; /* `for' loop counter */
1311 ssize_t size; /* bytes of summary info to read */
1312 daddr_t frags; /* frags of summary info to read */
1313 caddr_t sip; /* summary info */
1314 struct buf *tp; /* tmp buf */
1317 * maintain metadata map for trans device (debug only)
1319 TRANS_MATA_SI(ufsvfsp, fs);
1322 * Compute #frags and allocate space for summary info
1324 frags = howmany(fs->fs_cssize, fs->fs_fsize);
1325 sip = kmem_alloc((size_t)fs->fs_cssize, KM_SLEEP);
1326 fs->fs_u.fs_csp = (struct csum *)sip;
1328 if (fs->fs_si == FS_SI_BAD) {
1330 * The summary information is unknown, read it in from
1331 * the cylinder groups.
1333 if (TRANS_ISTRANS(ufsvfsp) && !TRANS_ISERROR(ufsvfsp) &&
1334 ufsvfsp->vfs_log->un_logmap) {
1335 logmap_roll_dev(ufsvfsp->vfs_log); /* flush the log */
1337 bzero(sip, (size_t)fs->fs_cssize);
1338 if (ufs_construct_si(dev, fs, ufsvfsp)) {
1339 kmem_free(fs->fs_u.fs_csp, fs->fs_cssize);
1340 fs->fs_u.fs_csp = NULL;
1341 return (EIO);
1343 } else {
1344 /* Read summary info a fs block at a time */
1345 size = fs->fs_bsize;
1346 for (i = 0; i < frags; i += fs->fs_frag) {
1347 if (i + fs->fs_frag > frags)
1349 * This happens only the last iteration, so
1350 * don't worry about size being reset
1352 size = (frags - i) * fs->fs_fsize;
1353 tp = UFS_BREAD(ufsvfsp, dev,
1354 (daddr_t)fsbtodb(fs, fs->fs_csaddr+i), size);
1355 tp->b_flags |= B_STALE | B_AGE;
1356 if (tp->b_flags & B_ERROR) {
1357 kmem_free(fs->fs_u.fs_csp, fs->fs_cssize);
1358 fs->fs_u.fs_csp = NULL;
1359 brelse(tp);
1360 return (EIO);
1362 bcopy(tp->b_un.b_addr, sip, size);
1363 sip += size;
1364 brelse(tp);
1367 bzero((caddr_t)&fs->fs_cstotal, sizeof (fs->fs_cstotal));
1368 for (i = 0; i < fs->fs_ncg; ++i) {
1369 fs->fs_cstotal.cs_ndir += fs->fs_cs(fs, i).cs_ndir;
1370 fs->fs_cstotal.cs_nbfree += fs->fs_cs(fs, i).cs_nbfree;
1371 fs->fs_cstotal.cs_nifree += fs->fs_cs(fs, i).cs_nifree;
1372 fs->fs_cstotal.cs_nffree += fs->fs_cs(fs, i).cs_nffree;
1374 return (0);
1378 * ufs_putsummaryinfo() stores all the cylinder group summary information
1379 * This is only used when logging, but the file system may not
1380 * be logging at the time, eg a read-only mount to flush the log
1381 * may push the summary info out.
1384 ufs_putsummaryinfo(dev_t dev, struct ufsvfs *ufsvfsp, struct fs *fs)
1386 struct buf b, *bp; /* tmp buf */
1387 caddr_t sip; /* summary info */
1388 ssize_t size; /* bytes of summary info to write */
1389 daddr_t frags; /* frags of summary info to write */
1390 int i; /* `for' loop counter */
1391 int error; /* error */
1393 if (TRANS_ISERROR(ufsvfsp)) {
1394 return (EIO);
1397 if ((fs->fs_si != FS_SI_BAD) || !ufsvfsp->vfs_nolog_si) {
1398 return (0);
1401 bp = &b;
1402 bioinit(bp);
1403 bp->b_iodone = ufs_si_io_done;
1404 bp->b_bufsize = size = fs->fs_bsize;
1405 bp->b_flags = B_WRITE;
1406 bp->b_un.b_addr = kmem_alloc(size, KM_SLEEP);
1407 bp->b_edev = dev;
1408 frags = howmany(fs->fs_cssize, fs->fs_fsize);
1409 sip = (caddr_t)fs->fs_u.fs_csp;
1411 /* Write summary info one fs block at a time */
1412 for (error = 0, i = 0; (i < frags) && (error == 0); i += fs->fs_frag) {
1413 if (i + fs->fs_frag > frags) {
1415 * This happens only the last iteration, so
1416 * don't worry about size being reset
1418 size = (frags - i) * fs->fs_fsize;
1420 bcopy(sip, bp->b_un.b_addr, size);
1421 bp->b_blkno = (daddr_t)fsbtodb(fs, fs->fs_csaddr+i);
1422 bp->b_bcount = size;
1423 (void) bdev_strategy(bp);
1424 sema_p(&bp->b_io); /* wait for write to complete */
1425 error = geterror(bp);
1426 sip += size;
1428 kmem_free(bp->b_un.b_addr, fs->fs_bsize);
1429 if (!error) {
1430 fs->fs_si = FS_SI_OK;
1432 return (error);
1436 * Decide whether it is okay to remove within a sticky directory.
1437 * Two conditions need to be met: write access to the directory
1438 * is needed. In sticky directories, write access is not sufficient;
1439 * you can remove entries from a directory only if you own the directory,
1440 * if you are privileged, if you own the entry or if the entry is
1441 * a plain file and you have write access to that file.
1442 * Function returns 0 if remove access is granted.
1443 * Note, the caller is responsible for holding the i_contents lock
1444 * at least as reader on the inquired inode 'ip'.
1447 ufs_sticky_remove_access(struct inode *dp, struct inode *ip, struct cred *cr)
1449 uid_t uid;
1451 ASSERT(RW_LOCK_HELD(&ip->i_contents));
1453 if ((dp->i_mode & ISVTX) &&
1454 (uid = crgetuid(cr)) != dp->i_uid &&
1455 uid != ip->i_uid &&
1456 ((ip->i_mode & IFMT) != IFREG ||
1457 ufs_iaccess(ip, IWRITE, cr, 0) != 0))
1458 return (secpolicy_vnode_remove(cr));
1460 return (0);
1462 #endif /* _KERNEL */
1464 extern int around[9];
1465 extern int inside[9];
1466 extern uchar_t *fragtbl[];
1469 * Update the frsum fields to reflect addition or deletion
1470 * of some frags.
1472 void
1473 fragacct(struct fs *fs, int fragmap, int32_t *fraglist, int cnt)
1475 int inblk;
1476 int field, subfield;
1477 int siz, pos;
1480 * ufsvfsp->vfs_lock is held when calling this.
1482 inblk = (int)(fragtbl[fs->fs_frag][fragmap]) << 1;
1483 fragmap <<= 1;
1484 for (siz = 1; siz < fs->fs_frag; siz++) {
1485 if ((inblk & (1 << (siz + (fs->fs_frag % NBBY)))) == 0)
1486 continue;
1487 field = around[siz];
1488 subfield = inside[siz];
1489 for (pos = siz; pos <= fs->fs_frag; pos++) {
1490 if ((fragmap & field) == subfield) {
1491 fraglist[siz] += cnt;
1492 ASSERT(fraglist[siz] >= 0);
1493 pos += siz;
1494 field <<= siz;
1495 subfield <<= siz;
1497 field <<= 1;
1498 subfield <<= 1;
1504 * Block operations
1508 * Check if a block is available
1511 isblock(struct fs *fs, uchar_t *cp, daddr_t h)
1513 uchar_t mask;
1515 ASSERT(fs->fs_frag == 8 || fs->fs_frag == 4 || fs->fs_frag == 2 || \
1516 fs->fs_frag == 1);
1518 * ufsvfsp->vfs_lock is held when calling this.
1520 switch ((int)fs->fs_frag) {
1521 case 8:
1522 return (cp[h] == 0xff);
1523 case 4:
1524 mask = 0x0f << ((h & 0x1) << 2);
1525 return ((cp[h >> 1] & mask) == mask);
1526 case 2:
1527 mask = 0x03 << ((h & 0x3) << 1);
1528 return ((cp[h >> 2] & mask) == mask);
1529 case 1:
1530 mask = 0x01 << (h & 0x7);
1531 return ((cp[h >> 3] & mask) == mask);
1532 default:
1533 #ifndef _KERNEL
1534 cmn_err(CE_PANIC, "isblock: illegal fs->fs_frag value (%d)",
1535 fs->fs_frag);
1536 #endif /* _KERNEL */
1537 return (0);
1542 * Take a block out of the map
1544 void
1545 clrblock(struct fs *fs, uchar_t *cp, daddr_t h)
1547 ASSERT(fs->fs_frag == 8 || fs->fs_frag == 4 || fs->fs_frag == 2 || \
1548 fs->fs_frag == 1);
1550 * ufsvfsp->vfs_lock is held when calling this.
1552 switch ((int)fs->fs_frag) {
1553 case 8:
1554 cp[h] = 0;
1555 return;
1556 case 4:
1557 cp[h >> 1] &= ~(0x0f << ((h & 0x1) << 2));
1558 return;
1559 case 2:
1560 cp[h >> 2] &= ~(0x03 << ((h & 0x3) << 1));
1561 return;
1562 case 1:
1563 cp[h >> 3] &= ~(0x01 << (h & 0x7));
1564 return;
1565 default:
1566 #ifndef _KERNEL
1567 cmn_err(CE_PANIC, "clrblock: illegal fs->fs_frag value (%d)",
1568 fs->fs_frag);
1569 #endif /* _KERNEL */
1570 return;
1575 * Is block allocated?
1578 isclrblock(struct fs *fs, uchar_t *cp, daddr_t h)
1580 uchar_t mask;
1581 int frag;
1583 * ufsvfsp->vfs_lock is held when calling this.
1585 frag = fs->fs_frag;
1586 ASSERT(frag == 8 || frag == 4 || frag == 2 || frag == 1);
1587 switch (frag) {
1588 case 8:
1589 return (cp[h] == 0);
1590 case 4:
1591 mask = ~(0x0f << ((h & 0x1) << 2));
1592 return (cp[h >> 1] == (cp[h >> 1] & mask));
1593 case 2:
1594 mask = ~(0x03 << ((h & 0x3) << 1));
1595 return (cp[h >> 2] == (cp[h >> 2] & mask));
1596 case 1:
1597 mask = ~(0x01 << (h & 0x7));
1598 return (cp[h >> 3] == (cp[h >> 3] & mask));
1599 default:
1600 #ifndef _KERNEL
1601 cmn_err(CE_PANIC, "isclrblock: illegal fs->fs_frag value (%d)",
1602 fs->fs_frag);
1603 #endif /* _KERNEL */
1604 break;
1606 return (0);
1610 * Put a block into the map
1612 void
1613 setblock(struct fs *fs, uchar_t *cp, daddr_t h)
1615 ASSERT(fs->fs_frag == 8 || fs->fs_frag == 4 || fs->fs_frag == 2 || \
1616 fs->fs_frag == 1);
1618 * ufsvfsp->vfs_lock is held when calling this.
1620 switch ((int)fs->fs_frag) {
1621 case 8:
1622 cp[h] = 0xff;
1623 return;
1624 case 4:
1625 cp[h >> 1] |= (0x0f << ((h & 0x1) << 2));
1626 return;
1627 case 2:
1628 cp[h >> 2] |= (0x03 << ((h & 0x3) << 1));
1629 return;
1630 case 1:
1631 cp[h >> 3] |= (0x01 << (h & 0x7));
1632 return;
1633 default:
1634 #ifndef _KERNEL
1635 cmn_err(CE_PANIC, "setblock: illegal fs->fs_frag value (%d)",
1636 fs->fs_frag);
1637 #endif /* _KERNEL */
1638 return;
1643 skpc(char c, uint_t len, char *cp)
1645 if (len == 0)
1646 return (0);
1647 while (*cp++ == c && --len)
1649 return (len);