1 /* $NetBSD: ffs_alloc.c,v 1.145 2013/11/12 03:29:22 dholland Exp $ */
4 * Copyright (c) 2008, 2009 The NetBSD Foundation, Inc.
7 * This code is derived from software contributed to The NetBSD Foundation
8 * by Wasabi Systems, Inc.
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29 * POSSIBILITY OF SUCH DAMAGE.
33 * Copyright (c) 2002 Networks Associates Technology, Inc.
34 * All rights reserved.
36 * This software was developed for the FreeBSD Project by Marshall
37 * Kirk McKusick and Network Associates Laboratories, the Security
38 * Research Division of Network Associates, Inc. under DARPA/SPAWAR
39 * contract N66001-01-C-8035 ("CBOSS"), as part of the DARPA CHATS
42 * Copyright (c) 1982, 1986, 1989, 1993
43 * The Regents of the University of California. All rights reserved.
45 * Redistribution and use in source and binary forms, with or without
46 * modification, are permitted provided that the following conditions
48 * 1. Redistributions of source code must retain the above copyright
49 * notice, this list of conditions and the following disclaimer.
50 * 2. Redistributions in binary form must reproduce the above copyright
51 * notice, this list of conditions and the following disclaimer in the
52 * documentation and/or other materials provided with the distribution.
53 * 3. Neither the name of the University nor the names of its contributors
54 * may be used to endorse or promote products derived from this software
55 * without specific prior written permission.
57 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
58 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
59 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
60 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
61 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
62 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
63 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
64 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
65 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
66 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
69 * @(#)ffs_alloc.c 8.19 (Berkeley) 7/13/95
72 #include <sys/cdefs.h>
73 __KERNEL_RCSID(0, "$NetBSD: ffs_alloc.c,v 1.145 2013/11/12 03:29:22 dholland Exp $");
75 #if defined(_KERNEL_OPT)
77 #include "opt_quota.h"
78 #include "opt_uvm_page_trkown.h"
81 #include <sys/param.h>
82 #include <sys/systm.h>
84 #include <sys/cprng.h>
85 #include <sys/fstrans.h>
86 #include <sys/kauth.h>
87 #include <sys/kernel.h>
88 #include <sys/mount.h>
90 #include <sys/syslog.h>
91 #include <sys/vnode.h>
92 #include <sys/wapbl.h>
94 #include <miscfs/specfs/specdev.h>
95 #include <ufs/ufs/quota.h>
96 #include <ufs/ufs/ufsmount.h>
97 #include <ufs/ufs/inode.h>
98 #include <ufs/ufs/ufs_extern.h>
99 #include <ufs/ufs/ufs_bswap.h>
100 #include <ufs/ufs/ufs_wapbl.h>
102 #include <ufs/ffs/fs.h>
103 #include <ufs/ffs/ffs_extern.h>
105 #ifdef UVM_PAGE_TRKOWN
109 static daddr_t
ffs_alloccg(struct inode
*, int, daddr_t
, int, int);
110 static daddr_t
ffs_alloccgblk(struct inode
*, struct buf
*, daddr_t
, int);
111 static ino_t
ffs_dirpref(struct inode
*);
112 static daddr_t
ffs_fragextend(struct inode
*, int, daddr_t
, int, int);
113 static void ffs_fserr(struct fs
*, u_int
, const char *);
114 static daddr_t
ffs_hashalloc(struct inode
*, int, daddr_t
, int, int,
115 daddr_t (*)(struct inode
*, int, daddr_t
, int, int));
116 static daddr_t
ffs_nodealloccg(struct inode
*, int, daddr_t
, int, int);
117 static int32_t ffs_mapsearch(struct fs
*, struct cg
*,
119 static void ffs_blkfree_common(struct ufsmount
*, struct fs
*, dev_t
, struct buf
*,
120 daddr_t
, long, bool);
121 static void ffs_freefile_common(struct ufsmount
*, struct fs
*, dev_t
, struct buf
*, ino_t
,
124 /* if 1, changes in optimalization strategy are logged */
125 int ffs_log_changeopt
= 0;
127 /* in ffs_tables.c */
128 extern const int inside
[], around
[];
129 extern const u_char
* const fragtbl
[];
131 /* Basic consistency check for block allocations */
133 ffs_check_bad_allocation(const char *func
, struct fs
*fs
, daddr_t bno
,
134 long size
, dev_t dev
, ino_t inum
)
136 if ((u_int
)size
> fs
->fs_bsize
|| ffs_fragoff(fs
, size
) != 0 ||
137 ffs_fragnum(fs
, bno
) + ffs_numfrags(fs
, size
) > fs
->fs_frag
) {
138 printf("dev = 0x%llx, bno = %" PRId64
" bsize = %d, "
139 "size = %ld, fs = %s\n",
140 (long long)dev
, bno
, fs
->fs_bsize
, size
, fs
->fs_fsmnt
);
141 panic("%s: bad size", func
);
144 if (bno
>= fs
->fs_size
) {
145 printf("bad block %" PRId64
", ino %llu\n", bno
,
146 (unsigned long long)inum
);
147 ffs_fserr(fs
, inum
, "bad block");
154 * Allocate a block in the file system.
156 * The size of the requested block is given, which must be some
157 * multiple of fs_fsize and <= fs_bsize.
158 * A preference may be optionally specified. If a preference is given
159 * the following hierarchy is used to allocate a block:
160 * 1) allocate the requested block.
161 * 2) allocate a rotationally optimal block in the same cylinder.
162 * 3) allocate a block in the same cylinder group.
163 * 4) quadradically rehash into other cylinder groups, until an
164 * available block is located.
165 * If no block preference is given the following hierarchy is used
166 * to allocate a block:
167 * 1) allocate a block in the cylinder group that contains the
168 * inode for the file.
169 * 2) quadradically rehash into other cylinder groups, until an
170 * available block is located.
172 * => called with um_lock held
173 * => releases um_lock before returning
176 ffs_alloc(struct inode
*ip
, daddr_t lbn
, daddr_t bpref
, int size
, int flags
,
177 kauth_cred_t cred
, daddr_t
*bnp
)
179 struct ufsmount
*ump
;
183 #if defined(QUOTA) || defined(QUOTA2)
190 KASSERT(mutex_owned(&ump
->um_lock
));
192 #ifdef UVM_PAGE_TRKOWN
195 * Sanity-check that allocations within the file size
196 * do not allow other threads to read the stale contents
197 * of newly allocated blocks.
198 * Usually pages will exist to cover the new allocation.
199 * There is an optimization in ffs_write() where we skip
200 * creating pages if several conditions are met:
201 * - the file must not be mapped (in any user address space).
202 * - the write must cover whole pages and whole blocks.
203 * If those conditions are not met then pages must exist and
204 * be locked by the current thread.
207 if (ITOV(ip
)->v_type
== VREG
&&
208 ffs_lblktosize(fs
, (voff_t
)lbn
) < round_page(ITOV(ip
)->v_size
)) {
210 struct vnode
*vp
= ITOV(ip
);
211 struct uvm_object
*uobj
= &vp
->v_uobj
;
212 voff_t off
= trunc_page(ffs_lblktosize(fs
, lbn
));
213 voff_t endoff
= round_page(ffs_lblktosize(fs
, lbn
) + size
);
215 mutex_enter(uobj
->vmobjlock
);
216 while (off
< endoff
) {
217 pg
= uvm_pagelookup(uobj
, off
);
218 KASSERT((pg
== NULL
&& (vp
->v_vflag
& VV_MAPPED
) == 0 &&
219 (size
& PAGE_MASK
) == 0 &&
220 ffs_blkoff(fs
, size
) == 0) ||
221 (pg
!= NULL
&& pg
->owner
== curproc
->p_pid
&&
222 pg
->lowner
== curlwp
->l_lid
));
225 mutex_exit(uobj
->vmobjlock
);
231 if ((u_int
)size
> fs
->fs_bsize
|| ffs_fragoff(fs
, size
) != 0) {
232 printf("dev = 0x%llx, bsize = %d, size = %d, fs = %s\n",
233 (unsigned long long)ip
->i_dev
, fs
->fs_bsize
, size
,
235 panic("ffs_alloc: bad size");
238 panic("ffs_alloc: missing credential");
239 #endif /* DIAGNOSTIC */
240 if (size
== fs
->fs_bsize
&& fs
->fs_cstotal
.cs_nbfree
== 0)
242 if (freespace(fs
, fs
->fs_minfree
) <= 0 &&
243 kauth_authorize_system(cred
, KAUTH_SYSTEM_FS_RESERVEDSPACE
, 0, NULL
,
246 #if defined(QUOTA) || defined(QUOTA2)
247 mutex_exit(&ump
->um_lock
);
248 if ((error
= chkdq(ip
, btodb(size
), cred
, 0)) != 0)
250 mutex_enter(&ump
->um_lock
);
253 if (bpref
>= fs
->fs_size
)
256 cg
= ino_to_cg(fs
, ip
->i_number
);
258 cg
= dtog(fs
, bpref
);
259 bno
= ffs_hashalloc(ip
, cg
, bpref
, size
, flags
, ffs_alloccg
);
261 DIP_ADD(ip
, blocks
, btodb(size
));
262 ip
->i_flag
|= IN_CHANGE
| IN_UPDATE
;
266 #if defined(QUOTA) || defined(QUOTA2)
268 * Restore user's disk quota because allocation failed.
270 (void) chkdq(ip
, -btodb(size
), cred
, FORCE
);
272 if (flags
& B_CONTIG
) {
274 * XXX ump->um_lock handling is "suspect" at best.
275 * For the case where ffs_hashalloc() fails early
276 * in the B_CONTIG case we reach here with um_lock
277 * already unlocked, so we can't release it again
278 * like in the normal error path. See kern/39206.
281 * Fail silently - it's up to our caller to report
287 mutex_exit(&ump
->um_lock
);
288 ffs_fserr(fs
, kauth_cred_geteuid(cred
), "file system full");
289 uprintf("\n%s: write failed, file system is full\n", fs
->fs_fsmnt
);
294 * Reallocate a fragment to a bigger size
296 * The number and size of the old block is given, and a preference
297 * and new size is also specified. The allocator attempts to extend
298 * the original block. Failing that, the regular block allocator is
299 * invoked to get an appropriate block.
301 * => called with um_lock held
302 * => return with um_lock released
305 ffs_realloccg(struct inode
*ip
, daddr_t lbprev
, daddr_t bpref
, int osize
,
306 int nsize
, kauth_cred_t cred
, struct buf
**bpp
, daddr_t
*blknop
)
308 struct ufsmount
*ump
;
311 int cg
, request
, error
;
317 KASSERT(mutex_owned(&ump
->um_lock
));
319 #ifdef UVM_PAGE_TRKOWN
322 * Sanity-check that allocations within the file size
323 * do not allow other threads to read the stale contents
324 * of newly allocated blocks.
325 * Unlike in ffs_alloc(), here pages must always exist
326 * for such allocations, because only the last block of a file
327 * can be a fragment and ffs_write() will reallocate the
328 * fragment to the new size using ufs_balloc_range(),
329 * which always creates pages to cover blocks it allocates.
332 if (ITOV(ip
)->v_type
== VREG
) {
334 struct uvm_object
*uobj
= &ITOV(ip
)->v_uobj
;
335 voff_t off
= trunc_page(ffs_lblktosize(fs
, lbprev
));
336 voff_t endoff
= round_page(ffs_lblktosize(fs
, lbprev
) + osize
);
338 mutex_enter(uobj
->vmobjlock
);
339 while (off
< endoff
) {
340 pg
= uvm_pagelookup(uobj
, off
);
341 KASSERT(pg
->owner
== curproc
->p_pid
&&
342 pg
->lowner
== curlwp
->l_lid
);
345 mutex_exit(uobj
->vmobjlock
);
350 if ((u_int
)osize
> fs
->fs_bsize
|| ffs_fragoff(fs
, osize
) != 0 ||
351 (u_int
)nsize
> fs
->fs_bsize
|| ffs_fragoff(fs
, nsize
) != 0) {
353 "dev = 0x%llx, bsize = %d, osize = %d, nsize = %d, fs = %s\n",
354 (unsigned long long)ip
->i_dev
, fs
->fs_bsize
, osize
, nsize
,
356 panic("ffs_realloccg: bad size");
359 panic("ffs_realloccg: missing credential");
360 #endif /* DIAGNOSTIC */
361 if (freespace(fs
, fs
->fs_minfree
) <= 0 &&
362 kauth_authorize_system(cred
, KAUTH_SYSTEM_FS_RESERVEDSPACE
, 0, NULL
,
364 mutex_exit(&ump
->um_lock
);
367 if (fs
->fs_magic
== FS_UFS2_MAGIC
)
368 bprev
= ufs_rw64(ip
->i_ffs2_db
[lbprev
], UFS_FSNEEDSWAP(fs
));
370 bprev
= ufs_rw32(ip
->i_ffs1_db
[lbprev
], UFS_FSNEEDSWAP(fs
));
373 printf("dev = 0x%llx, bsize = %d, bprev = %" PRId64
", fs = %s\n",
374 (unsigned long long)ip
->i_dev
, fs
->fs_bsize
, bprev
,
376 panic("ffs_realloccg: bad bprev");
378 mutex_exit(&ump
->um_lock
);
381 * Allocate the extra space in the buffer.
384 (error
= bread(ITOV(ip
), lbprev
, osize
, NOCRED
, 0, &bp
)) != 0) {
387 #if defined(QUOTA) || defined(QUOTA2)
388 if ((error
= chkdq(ip
, btodb(nsize
- osize
), cred
, 0)) != 0) {
396 * Check for extension in the existing location.
398 cg
= dtog(fs
, bprev
);
399 mutex_enter(&ump
->um_lock
);
400 if ((bno
= ffs_fragextend(ip
, cg
, bprev
, osize
, nsize
)) != 0) {
401 DIP_ADD(ip
, blocks
, btodb(nsize
- osize
));
402 ip
->i_flag
|= IN_CHANGE
| IN_UPDATE
;
405 if (bp
->b_blkno
!= FFS_FSBTODB(fs
, bno
))
406 panic("bad blockno");
407 allocbuf(bp
, nsize
, 1);
408 memset((char *)bp
->b_data
+ osize
, 0, nsize
- osize
);
409 mutex_enter(bp
->b_objlock
);
410 KASSERT(!cv_has_waiters(&bp
->b_done
));
411 bp
->b_oflags
|= BO_DONE
;
412 mutex_exit(bp
->b_objlock
);
415 if (blknop
!= NULL
) {
421 * Allocate a new disk location.
423 if (bpref
>= fs
->fs_size
)
425 switch ((int)fs
->fs_optim
) {
428 * Allocate an exact sized fragment. Although this makes
429 * best use of space, we will waste time relocating it if
430 * the file continues to grow. If the fragmentation is
431 * less than half of the minimum free reserve, we choose
432 * to begin optimizing for time.
435 if (fs
->fs_minfree
< 5 ||
436 fs
->fs_cstotal
.cs_nffree
>
437 fs
->fs_dsize
* fs
->fs_minfree
/ (2 * 100))
440 if (ffs_log_changeopt
) {
442 "%s: optimization changed from SPACE to TIME\n",
446 fs
->fs_optim
= FS_OPTTIME
;
450 * At this point we have discovered a file that is trying to
451 * grow a small fragment to a larger fragment. To save time,
452 * we allocate a full sized block, then free the unused portion.
453 * If the file continues to grow, the `ffs_fragextend' call
454 * above will be able to grow it in place without further
455 * copying. If aberrant programs cause disk fragmentation to
456 * grow within 2% of the free reserve, we choose to begin
457 * optimizing for space.
459 request
= fs
->fs_bsize
;
460 if (fs
->fs_cstotal
.cs_nffree
<
461 fs
->fs_dsize
* (fs
->fs_minfree
- 2) / 100)
464 if (ffs_log_changeopt
) {
466 "%s: optimization changed from TIME to SPACE\n",
470 fs
->fs_optim
= FS_OPTSPACE
;
473 printf("dev = 0x%llx, optim = %d, fs = %s\n",
474 (unsigned long long)ip
->i_dev
, fs
->fs_optim
, fs
->fs_fsmnt
);
475 panic("ffs_realloccg: bad optim");
478 bno
= ffs_hashalloc(ip
, cg
, bpref
, request
, 0, ffs_alloccg
);
480 if ((ip
->i_ump
->um_mountp
->mnt_wapbl
) &&
481 (ITOV(ip
)->v_type
!= VREG
)) {
482 UFS_WAPBL_REGISTER_DEALLOCATION(
483 ip
->i_ump
->um_mountp
, FFS_FSBTODB(fs
, bprev
),
486 ffs_blkfree(fs
, ip
->i_devvp
, bprev
, (long)osize
,
489 if (nsize
< request
) {
490 if ((ip
->i_ump
->um_mountp
->mnt_wapbl
) &&
491 (ITOV(ip
)->v_type
!= VREG
)) {
492 UFS_WAPBL_REGISTER_DEALLOCATION(
493 ip
->i_ump
->um_mountp
,
494 FFS_FSBTODB(fs
, (bno
+ ffs_numfrags(fs
, nsize
))),
497 ffs_blkfree(fs
, ip
->i_devvp
,
498 bno
+ ffs_numfrags(fs
, nsize
),
499 (long)(request
- nsize
), ip
->i_number
);
501 DIP_ADD(ip
, blocks
, btodb(nsize
- osize
));
502 ip
->i_flag
|= IN_CHANGE
| IN_UPDATE
;
504 bp
->b_blkno
= FFS_FSBTODB(fs
, bno
);
505 allocbuf(bp
, nsize
, 1);
506 memset((char *)bp
->b_data
+ osize
, 0, (u_int
)nsize
- osize
);
507 mutex_enter(bp
->b_objlock
);
508 KASSERT(!cv_has_waiters(&bp
->b_done
));
509 bp
->b_oflags
|= BO_DONE
;
510 mutex_exit(bp
->b_objlock
);
513 if (blknop
!= NULL
) {
518 mutex_exit(&ump
->um_lock
);
520 #if defined(QUOTA) || defined(QUOTA2)
522 * Restore user's disk quota because allocation failed.
524 (void) chkdq(ip
, -btodb(nsize
- osize
), cred
, FORCE
);
534 ffs_fserr(fs
, kauth_cred_geteuid(cred
), "file system full");
535 uprintf("\n%s: write failed, file system is full\n", fs
->fs_fsmnt
);
540 * Allocate an inode in the file system.
542 * If allocating a directory, use ffs_dirpref to select the inode.
543 * If allocating in a directory, the following hierarchy is followed:
544 * 1) allocate the preferred inode.
545 * 2) allocate an inode in the same cylinder group.
546 * 3) quadradically rehash into other cylinder groups, until an
547 * available inode is located.
548 * If no inode preference is given the following hierarchy is used
549 * to allocate an inode:
550 * 1) allocate an inode in cylinder group 0.
551 * 2) quadradically rehash into other cylinder groups, until an
552 * available inode is located.
554 * => um_lock not held upon entry or return
557 ffs_valloc(struct vnode
*pvp
, int mode
, kauth_cred_t cred
,
560 struct ufsmount
*ump
;
568 UFS_WAPBL_JUNLOCK_ASSERT(pvp
->v_mount
);
575 error
= UFS_WAPBL_BEGIN(pvp
->v_mount
);
579 mutex_enter(&ump
->um_lock
);
580 if (fs
->fs_cstotal
.cs_nifree
== 0)
583 if ((mode
& IFMT
) == IFDIR
)
584 ipref
= ffs_dirpref(pip
);
586 ipref
= pip
->i_number
;
587 if (ipref
>= fs
->fs_ncg
* fs
->fs_ipg
)
589 cg
= ino_to_cg(fs
, ipref
);
591 * Track number of dirs created one after another
592 * in a same cg without intervening by files.
594 if ((mode
& IFMT
) == IFDIR
) {
595 if (fs
->fs_contigdirs
[cg
] < 255)
596 fs
->fs_contigdirs
[cg
]++;
598 if (fs
->fs_contigdirs
[cg
] > 0)
599 fs
->fs_contigdirs
[cg
]--;
601 ino
= (ino_t
)ffs_hashalloc(pip
, cg
, ipref
, mode
, 0, ffs_nodealloccg
);
604 UFS_WAPBL_END(pvp
->v_mount
);
605 error
= VFS_VGET(pvp
->v_mount
, ino
, vpp
);
608 err
= UFS_WAPBL_BEGIN(pvp
->v_mount
);
610 ffs_vfree(pvp
, ino
, mode
);
612 UFS_WAPBL_END(pvp
->v_mount
);
615 KASSERT((*vpp
)->v_type
== VNON
);
619 printf("mode = 0%o, inum = %d, fs = %s\n",
620 ip
->i_mode
, ip
->i_number
, fs
->fs_fsmnt
);
622 printf("dmode %x mode %x dgen %x gen %x\n",
623 DIP(ip
, mode
), ip
->i_mode
,
624 DIP(ip
, gen
), ip
->i_gen
);
625 printf("size %llx blocks %llx\n",
626 (long long)DIP(ip
, size
), (long long)DIP(ip
, blocks
));
627 printf("ino %llu ipref %llu\n", (unsigned long long)ino
,
628 (unsigned long long)ipref
);
630 error
= bread(ump
->um_devvp
, FFS_FSBTODB(fs
, ino_to_fsba(fs
, ino
)),
631 (int)fs
->fs_bsize
, NOCRED
, 0, &bp
);
635 panic("ffs_valloc: dup alloc");
637 if (DIP(ip
, blocks
)) { /* XXX */
638 printf("free inode %llu on %s had %" PRId64
" blocks\n",
639 (unsigned long long)ino
, fs
->fs_fsmnt
, DIP(ip
, blocks
));
640 DIP_ASSIGN(ip
, blocks
, 0);
642 ip
->i_flag
&= ~IN_SPACECOUNTED
;
644 DIP_ASSIGN(ip
, flags
, 0);
646 * Set up a new generation number for this inode.
649 DIP_ASSIGN(ip
, gen
, ip
->i_gen
);
650 if (fs
->fs_magic
== FS_UFS2_MAGIC
) {
652 ip
->i_ffs2_birthtime
= ts
.tv_sec
;
653 ip
->i_ffs2_birthnsec
= ts
.tv_nsec
;
657 mutex_exit(&ump
->um_lock
);
658 UFS_WAPBL_END(pvp
->v_mount
);
659 ffs_fserr(fs
, kauth_cred_geteuid(cred
), "out of inodes");
660 uprintf("\n%s: create/symlink failed, no inodes free\n", fs
->fs_fsmnt
);
665 * Find a cylinder group in which to place a directory.
667 * The policy implemented by this algorithm is to allocate a
668 * directory inode in the same cylinder group as its parent
669 * directory, but also to reserve space for its files inodes
670 * and data. Restrict the number of directories which may be
671 * allocated one after another in the same cylinder group
672 * without intervening allocation of files.
674 * If we allocate a first level directory then force allocation
675 * in another cylinder group.
678 ffs_dirpref(struct inode
*pip
)
680 register struct fs
*fs
;
682 int64_t dirsize
, cgsize
, curdsz
;
683 int avgifree
, avgbfree
, avgndir
;
684 int minifree
, minbfree
, maxndir
;
688 KASSERT(mutex_owned(&pip
->i_ump
->um_lock
));
692 avgifree
= fs
->fs_cstotal
.cs_nifree
/ fs
->fs_ncg
;
693 avgbfree
= fs
->fs_cstotal
.cs_nbfree
/ fs
->fs_ncg
;
694 avgndir
= fs
->fs_cstotal
.cs_ndir
/ fs
->fs_ncg
;
697 * Force allocation in another cg if creating a first level dir.
699 if (ITOV(pip
)->v_vflag
& VV_ROOT
) {
700 prefcg
= random() % fs
->fs_ncg
;
702 minndir
= fs
->fs_ipg
;
703 for (cg
= prefcg
; cg
< fs
->fs_ncg
; cg
++)
704 if (fs
->fs_cs(fs
, cg
).cs_ndir
< minndir
&&
705 fs
->fs_cs(fs
, cg
).cs_nifree
>= avgifree
&&
706 fs
->fs_cs(fs
, cg
).cs_nbfree
>= avgbfree
) {
708 minndir
= fs
->fs_cs(fs
, cg
).cs_ndir
;
710 for (cg
= 0; cg
< prefcg
; cg
++)
711 if (fs
->fs_cs(fs
, cg
).cs_ndir
< minndir
&&
712 fs
->fs_cs(fs
, cg
).cs_nifree
>= avgifree
&&
713 fs
->fs_cs(fs
, cg
).cs_nbfree
>= avgbfree
) {
715 minndir
= fs
->fs_cs(fs
, cg
).cs_ndir
;
717 return ((ino_t
)(fs
->fs_ipg
* mincg
));
721 * Count various limits which used for
722 * optimal allocation of a directory inode.
723 * Try cylinder groups with >75% avgifree and avgbfree.
724 * Avoid cylinder groups with no free blocks or inodes as that
725 * triggers an I/O-expensive cylinder group scan.
727 maxndir
= min(avgndir
+ fs
->fs_ipg
/ 16, fs
->fs_ipg
);
728 minifree
= avgifree
- avgifree
/ 4;
731 minbfree
= avgbfree
- avgbfree
/ 4;
734 cgsize
= (int64_t)fs
->fs_fsize
* fs
->fs_fpg
;
735 dirsize
= (int64_t)fs
->fs_avgfilesize
* fs
->fs_avgfpdir
;
737 curdsz
= (cgsize
- (int64_t)avgbfree
* fs
->fs_bsize
) / avgndir
;
738 if (dirsize
< curdsz
)
741 if (cgsize
< dirsize
* 255)
742 maxcontigdirs
= (avgbfree
* fs
->fs_bsize
) / dirsize
;
745 if (fs
->fs_avgfpdir
> 0)
746 maxcontigdirs
= min(maxcontigdirs
,
747 fs
->fs_ipg
/ fs
->fs_avgfpdir
);
748 if (maxcontigdirs
== 0)
752 * Limit number of dirs in one cg and reserve space for
753 * regular files, but only if we have no deficit in
756 prefcg
= ino_to_cg(fs
, pip
->i_number
);
757 for (cg
= prefcg
; cg
< fs
->fs_ncg
; cg
++)
758 if (fs
->fs_cs(fs
, cg
).cs_ndir
< maxndir
&&
759 fs
->fs_cs(fs
, cg
).cs_nifree
>= minifree
&&
760 fs
->fs_cs(fs
, cg
).cs_nbfree
>= minbfree
) {
761 if (fs
->fs_contigdirs
[cg
] < maxcontigdirs
)
762 return ((ino_t
)(fs
->fs_ipg
* cg
));
764 for (cg
= 0; cg
< prefcg
; cg
++)
765 if (fs
->fs_cs(fs
, cg
).cs_ndir
< maxndir
&&
766 fs
->fs_cs(fs
, cg
).cs_nifree
>= minifree
&&
767 fs
->fs_cs(fs
, cg
).cs_nbfree
>= minbfree
) {
768 if (fs
->fs_contigdirs
[cg
] < maxcontigdirs
)
769 return ((ino_t
)(fs
->fs_ipg
* cg
));
772 * This is a backstop when we are deficient in space.
774 for (cg
= prefcg
; cg
< fs
->fs_ncg
; cg
++)
775 if (fs
->fs_cs(fs
, cg
).cs_nifree
>= avgifree
)
776 return ((ino_t
)(fs
->fs_ipg
* cg
));
777 for (cg
= 0; cg
< prefcg
; cg
++)
778 if (fs
->fs_cs(fs
, cg
).cs_nifree
>= avgifree
)
780 return ((ino_t
)(fs
->fs_ipg
* cg
));
784 * Select the desired position for the next block in a file. The file is
785 * logically divided into sections. The first section is composed of the
786 * direct blocks. Each additional section contains fs_maxbpg blocks.
788 * If no blocks have been allocated in the first section, the policy is to
789 * request a block in the same cylinder group as the inode that describes
790 * the file. If no blocks have been allocated in any other section, the
791 * policy is to place the section in a cylinder group with a greater than
792 * average number of free blocks. An appropriate cylinder group is found
793 * by using a rotor that sweeps the cylinder groups. When a new group of
794 * blocks is needed, the sweep begins in the cylinder group following the
795 * cylinder group from which the previous allocation was made. The sweep
796 * continues until a cylinder group with greater than the average number
797 * of free blocks is found. If the allocation is for the first block in an
798 * indirect block, the information on the previous allocation is unavailable;
799 * here a best guess is made based upon the logical block number being
802 * If a section is already partially allocated, the policy is to
803 * contiguously allocate fs_maxcontig blocks. The end of one of these
804 * contiguous blocks and the beginning of the next is laid out
805 * contigously if possible.
807 * => um_lock held on entry and exit
810 ffs_blkpref_ufs1(struct inode
*ip
, daddr_t lbn
, int indx
, int flags
,
811 int32_t *bap
/* XXX ondisk32 */)
815 int avgbfree
, startcg
;
817 KASSERT(mutex_owned(&ip
->i_ump
->um_lock
));
822 * If allocating a contiguous file with B_CONTIG, use the hints
823 * in the inode extentions to return the desired block.
825 * For metadata (indirect blocks) return the address of where
826 * the first indirect block resides - we'll scan for the next
827 * available slot if we need to allocate more than one indirect
828 * block. For data, return the address of the actual block
829 * relative to the address of the first data block.
831 if (flags
& B_CONTIG
) {
832 KASSERT(ip
->i_ffs_first_data_blk
!= 0);
833 KASSERT(ip
->i_ffs_first_indir_blk
!= 0);
834 if (flags
& B_METAONLY
)
835 return ip
->i_ffs_first_indir_blk
;
837 return ip
->i_ffs_first_data_blk
+ ffs_blkstofrags(fs
, lbn
);
840 if (indx
% fs
->fs_maxbpg
== 0 || bap
[indx
- 1] == 0) {
841 if (lbn
< UFS_NDADDR
+ FFS_NINDIR(fs
)) {
842 cg
= ino_to_cg(fs
, ip
->i_number
);
843 return (cgbase(fs
, cg
) + fs
->fs_frag
);
846 * Find a cylinder with greater than average number of
847 * unused data blocks.
849 if (indx
== 0 || bap
[indx
- 1] == 0)
851 ino_to_cg(fs
, ip
->i_number
) + lbn
/ fs
->fs_maxbpg
;
854 ufs_rw32(bap
[indx
- 1], UFS_FSNEEDSWAP(fs
)) + 1);
855 startcg
%= fs
->fs_ncg
;
856 avgbfree
= fs
->fs_cstotal
.cs_nbfree
/ fs
->fs_ncg
;
857 for (cg
= startcg
; cg
< fs
->fs_ncg
; cg
++)
858 if (fs
->fs_cs(fs
, cg
).cs_nbfree
>= avgbfree
) {
859 return (cgbase(fs
, cg
) + fs
->fs_frag
);
861 for (cg
= 0; cg
< startcg
; cg
++)
862 if (fs
->fs_cs(fs
, cg
).cs_nbfree
>= avgbfree
) {
863 return (cgbase(fs
, cg
) + fs
->fs_frag
);
868 * We just always try to lay things out contiguously.
870 return ufs_rw32(bap
[indx
- 1], UFS_FSNEEDSWAP(fs
)) + fs
->fs_frag
;
874 ffs_blkpref_ufs2(struct inode
*ip
, daddr_t lbn
, int indx
, int flags
,
879 int avgbfree
, startcg
;
881 KASSERT(mutex_owned(&ip
->i_ump
->um_lock
));
886 * If allocating a contiguous file with B_CONTIG, use the hints
887 * in the inode extentions to return the desired block.
889 * For metadata (indirect blocks) return the address of where
890 * the first indirect block resides - we'll scan for the next
891 * available slot if we need to allocate more than one indirect
892 * block. For data, return the address of the actual block
893 * relative to the address of the first data block.
895 if (flags
& B_CONTIG
) {
896 KASSERT(ip
->i_ffs_first_data_blk
!= 0);
897 KASSERT(ip
->i_ffs_first_indir_blk
!= 0);
898 if (flags
& B_METAONLY
)
899 return ip
->i_ffs_first_indir_blk
;
901 return ip
->i_ffs_first_data_blk
+ ffs_blkstofrags(fs
, lbn
);
904 if (indx
% fs
->fs_maxbpg
== 0 || bap
[indx
- 1] == 0) {
905 if (lbn
< UFS_NDADDR
+ FFS_NINDIR(fs
)) {
906 cg
= ino_to_cg(fs
, ip
->i_number
);
907 return (cgbase(fs
, cg
) + fs
->fs_frag
);
910 * Find a cylinder with greater than average number of
911 * unused data blocks.
913 if (indx
== 0 || bap
[indx
- 1] == 0)
915 ino_to_cg(fs
, ip
->i_number
) + lbn
/ fs
->fs_maxbpg
;
918 ufs_rw64(bap
[indx
- 1], UFS_FSNEEDSWAP(fs
)) + 1);
919 startcg
%= fs
->fs_ncg
;
920 avgbfree
= fs
->fs_cstotal
.cs_nbfree
/ fs
->fs_ncg
;
921 for (cg
= startcg
; cg
< fs
->fs_ncg
; cg
++)
922 if (fs
->fs_cs(fs
, cg
).cs_nbfree
>= avgbfree
) {
923 return (cgbase(fs
, cg
) + fs
->fs_frag
);
925 for (cg
= 0; cg
< startcg
; cg
++)
926 if (fs
->fs_cs(fs
, cg
).cs_nbfree
>= avgbfree
) {
927 return (cgbase(fs
, cg
) + fs
->fs_frag
);
932 * We just always try to lay things out contiguously.
934 return ufs_rw64(bap
[indx
- 1], UFS_FSNEEDSWAP(fs
)) + fs
->fs_frag
;
939 * Implement the cylinder overflow algorithm.
941 * The policy implemented by this algorithm is:
942 * 1) allocate the block in its requested cylinder group.
943 * 2) quadradically rehash on the cylinder group number.
944 * 3) brute force search for a free block.
946 * => called with um_lock held
947 * => returns with um_lock released on success, held on failure
948 * (*allocator releases lock on success, retains lock on failure)
952 ffs_hashalloc(struct inode
*ip
, int cg
, daddr_t pref
,
953 int size
/* size for data blocks, mode for inodes */,
954 int flags
, daddr_t (*allocator
)(struct inode
*, int, daddr_t
, int, int))
962 * 1: preferred cylinder group
964 result
= (*allocator
)(ip
, cg
, pref
, size
, flags
);
968 if (flags
& B_CONTIG
)
971 * 2: quadratic rehash
973 for (i
= 1; i
< fs
->fs_ncg
; i
*= 2) {
975 if (cg
>= fs
->fs_ncg
)
977 result
= (*allocator
)(ip
, cg
, 0, size
, flags
);
982 * 3: brute force search
983 * Note that we start at i == 2, since 0 was checked initially,
984 * and 1 is always checked in the quadratic rehash.
986 cg
= (icg
+ 2) % fs
->fs_ncg
;
987 for (i
= 2; i
< fs
->fs_ncg
; i
++) {
988 result
= (*allocator
)(ip
, cg
, 0, size
, flags
);
992 if (cg
== fs
->fs_ncg
)
999 * Determine whether a fragment can be extended.
1001 * Check to see if the necessary fragments are available, and
1002 * if they are, allocate them.
1004 * => called with um_lock held
1005 * => returns with um_lock released on success, held on failure
1008 ffs_fragextend(struct inode
*ip
, int cg
, daddr_t bprev
, int osize
, int nsize
)
1010 struct ufsmount
*ump
;
1022 KASSERT(mutex_owned(&ump
->um_lock
));
1024 if (fs
->fs_cs(fs
, cg
).cs_nffree
< ffs_numfrags(fs
, nsize
- osize
))
1026 frags
= ffs_numfrags(fs
, nsize
);
1027 bbase
= ffs_fragnum(fs
, bprev
);
1028 if (bbase
> ffs_fragnum(fs
, (bprev
+ frags
- 1))) {
1029 /* cannot extend across a block boundary */
1032 mutex_exit(&ump
->um_lock
);
1033 error
= bread(ip
->i_devvp
, FFS_FSBTODB(fs
, cgtod(fs
, cg
)),
1034 (int)fs
->fs_cgsize
, NOCRED
, B_MODIFY
, &bp
);
1037 cgp
= (struct cg
*)bp
->b_data
;
1038 if (!cg_chkmagic(cgp
, UFS_FSNEEDSWAP(fs
)))
1040 cgp
->cg_old_time
= ufs_rw32(time_second
, UFS_FSNEEDSWAP(fs
));
1041 if ((fs
->fs_magic
!= FS_UFS1_MAGIC
) ||
1042 (fs
->fs_old_flags
& FS_FLAGS_UPDATED
))
1043 cgp
->cg_time
= ufs_rw64(time_second
, UFS_FSNEEDSWAP(fs
));
1044 bno
= dtogd(fs
, bprev
);
1045 blksfree
= cg_blksfree(cgp
, UFS_FSNEEDSWAP(fs
));
1046 for (i
= ffs_numfrags(fs
, osize
); i
< frags
; i
++)
1047 if (isclr(blksfree
, bno
+ i
))
1050 * the current fragment can be extended
1051 * deduct the count on fragment being extended into
1052 * increase the count on the remaining fragment (if any)
1053 * allocate the extended piece
1055 for (i
= frags
; i
< fs
->fs_frag
- bbase
; i
++)
1056 if (isclr(blksfree
, bno
+ i
))
1058 ufs_add32(cgp
->cg_frsum
[i
- ffs_numfrags(fs
, osize
)], -1, UFS_FSNEEDSWAP(fs
));
1060 ufs_add32(cgp
->cg_frsum
[i
- frags
], 1, UFS_FSNEEDSWAP(fs
));
1061 mutex_enter(&ump
->um_lock
);
1062 for (i
= ffs_numfrags(fs
, osize
); i
< frags
; i
++) {
1063 clrbit(blksfree
, bno
+ i
);
1064 ufs_add32(cgp
->cg_cs
.cs_nffree
, -1, UFS_FSNEEDSWAP(fs
));
1065 fs
->fs_cstotal
.cs_nffree
--;
1066 fs
->fs_cs(fs
, cg
).cs_nffree
--;
1069 ACTIVECG_CLR(fs
, cg
);
1070 mutex_exit(&ump
->um_lock
);
1077 mutex_enter(&ump
->um_lock
);
1082 * Determine whether a block can be allocated.
1084 * Check to see if a block of the appropriate size is available,
1085 * and if it is, allocate it.
1088 ffs_alloccg(struct inode
*ip
, int cg
, daddr_t bpref
, int size
, int flags
)
1090 struct ufsmount
*ump
;
1091 struct fs
*fs
= ip
->i_fs
;
1096 int error
, frags
, allocsiz
, i
;
1098 const int needswap
= UFS_FSNEEDSWAP(fs
);
1102 KASSERT(mutex_owned(&ump
->um_lock
));
1104 if (fs
->fs_cs(fs
, cg
).cs_nbfree
== 0 && size
== fs
->fs_bsize
)
1106 mutex_exit(&ump
->um_lock
);
1107 error
= bread(ip
->i_devvp
, FFS_FSBTODB(fs
, cgtod(fs
, cg
)),
1108 (int)fs
->fs_cgsize
, NOCRED
, B_MODIFY
, &bp
);
1111 cgp
= (struct cg
*)bp
->b_data
;
1112 if (!cg_chkmagic(cgp
, needswap
) ||
1113 (cgp
->cg_cs
.cs_nbfree
== 0 && size
== fs
->fs_bsize
))
1115 cgp
->cg_old_time
= ufs_rw32(time_second
, needswap
);
1116 if ((fs
->fs_magic
!= FS_UFS1_MAGIC
) ||
1117 (fs
->fs_old_flags
& FS_FLAGS_UPDATED
))
1118 cgp
->cg_time
= ufs_rw64(time_second
, needswap
);
1119 if (size
== fs
->fs_bsize
) {
1120 mutex_enter(&ump
->um_lock
);
1121 blkno
= ffs_alloccgblk(ip
, bp
, bpref
, flags
);
1122 ACTIVECG_CLR(fs
, cg
);
1123 mutex_exit(&ump
->um_lock
);
1128 * check to see if any fragments are already available
1129 * allocsiz is the size which will be allocated, hacking
1130 * it down to a smaller size if necessary
1132 blksfree
= cg_blksfree(cgp
, needswap
);
1133 frags
= ffs_numfrags(fs
, size
);
1134 for (allocsiz
= frags
; allocsiz
< fs
->fs_frag
; allocsiz
++)
1135 if (cgp
->cg_frsum
[allocsiz
] != 0)
1137 if (allocsiz
== fs
->fs_frag
) {
1139 * no fragments were available, so a block will be
1140 * allocated, and hacked up
1142 if (cgp
->cg_cs
.cs_nbfree
== 0)
1144 mutex_enter(&ump
->um_lock
);
1145 blkno
= ffs_alloccgblk(ip
, bp
, bpref
, flags
);
1146 bno
= dtogd(fs
, blkno
);
1147 for (i
= frags
; i
< fs
->fs_frag
; i
++)
1148 setbit(blksfree
, bno
+ i
);
1149 i
= fs
->fs_frag
- frags
;
1150 ufs_add32(cgp
->cg_cs
.cs_nffree
, i
, needswap
);
1151 fs
->fs_cstotal
.cs_nffree
+= i
;
1152 fs
->fs_cs(fs
, cg
).cs_nffree
+= i
;
1154 ufs_add32(cgp
->cg_frsum
[i
], 1, needswap
);
1155 ACTIVECG_CLR(fs
, cg
);
1156 mutex_exit(&ump
->um_lock
);
1160 bno
= ffs_mapsearch(fs
, cgp
, bpref
, allocsiz
);
1163 * XXX fvdl mapsearch will panic, and never return -1
1164 * also: returning NULL as daddr_t ?
1169 for (i
= 0; i
< frags
; i
++)
1170 clrbit(blksfree
, bno
+ i
);
1171 mutex_enter(&ump
->um_lock
);
1172 ufs_add32(cgp
->cg_cs
.cs_nffree
, -frags
, needswap
);
1173 fs
->fs_cstotal
.cs_nffree
-= frags
;
1174 fs
->fs_cs(fs
, cg
).cs_nffree
-= frags
;
1176 ufs_add32(cgp
->cg_frsum
[allocsiz
], -1, needswap
);
1177 if (frags
!= allocsiz
)
1178 ufs_add32(cgp
->cg_frsum
[allocsiz
- frags
], 1, needswap
);
1179 blkno
= cgbase(fs
, cg
) + bno
;
1180 ACTIVECG_CLR(fs
, cg
);
1181 mutex_exit(&ump
->um_lock
);
1188 mutex_enter(&ump
->um_lock
);
1193 * Allocate a block in a cylinder group.
1195 * This algorithm implements the following policy:
1196 * 1) allocate the requested block.
1197 * 2) allocate a rotationally optimal block in the same cylinder.
1198 * 3) allocate the next available block on the block rotor for the
1199 * specified cylinder group.
1200 * Note that this routine only allocates fs_bsize blocks; these
1201 * blocks may be fragmented by the routine that allocates them.
1204 ffs_alloccgblk(struct inode
*ip
, struct buf
*bp
, daddr_t bpref
, int flags
)
1206 struct fs
*fs
= ip
->i_fs
;
1212 const int needswap
= UFS_FSNEEDSWAP(fs
);
1214 KASSERT(mutex_owned(&ip
->i_ump
->um_lock
));
1216 cgp
= (struct cg
*)bp
->b_data
;
1217 blksfree
= cg_blksfree(cgp
, needswap
);
1218 if (bpref
== 0 || dtog(fs
, bpref
) != ufs_rw32(cgp
->cg_cgx
, needswap
)) {
1219 bpref
= ufs_rw32(cgp
->cg_rotor
, needswap
);
1221 bpref
= ffs_blknum(fs
, bpref
);
1222 bno
= dtogd(fs
, bpref
);
1224 * if the requested block is available, use it
1226 if (ffs_isblock(fs
, blksfree
, ffs_fragstoblks(fs
, bno
)))
1229 * if the requested data block isn't available and we are
1230 * trying to allocate a contiguous file, return an error.
1232 if ((flags
& (B_CONTIG
| B_METAONLY
)) == B_CONTIG
)
1237 * Take the next available block in this cylinder group.
1239 bno
= ffs_mapsearch(fs
, cgp
, bpref
, (int)fs
->fs_frag
);
1242 cgp
->cg_rotor
= ufs_rw32(bno
, needswap
);
1244 blkno
= ffs_fragstoblks(fs
, bno
);
1245 ffs_clrblock(fs
, blksfree
, blkno
);
1246 ffs_clusteracct(fs
, cgp
, blkno
, -1);
1247 ufs_add32(cgp
->cg_cs
.cs_nbfree
, -1, needswap
);
1248 fs
->fs_cstotal
.cs_nbfree
--;
1249 fs
->fs_cs(fs
, ufs_rw32(cgp
->cg_cgx
, needswap
)).cs_nbfree
--;
1250 if ((fs
->fs_magic
== FS_UFS1_MAGIC
) &&
1251 ((fs
->fs_old_flags
& FS_FLAGS_UPDATED
) == 0)) {
1253 cylno
= old_cbtocylno(fs
, bno
);
1254 KASSERT(cylno
>= 0);
1255 KASSERT(cylno
< fs
->fs_old_ncyl
);
1256 KASSERT(old_cbtorpos(fs
, bno
) >= 0);
1257 KASSERT(fs
->fs_old_nrpos
== 0 || old_cbtorpos(fs
, bno
) < fs
->fs_old_nrpos
);
1258 ufs_add16(old_cg_blks(fs
, cgp
, cylno
, needswap
)[old_cbtorpos(fs
, bno
)], -1,
1260 ufs_add32(old_cg_blktot(cgp
, needswap
)[cylno
], -1, needswap
);
1263 cg
= ufs_rw32(cgp
->cg_cgx
, needswap
);
1264 blkno
= cgbase(fs
, cg
) + bno
;
1269 * Determine whether an inode can be allocated.
1271 * Check to see if an inode is available, and if it is,
1272 * allocate it using the following policy:
1273 * 1) allocate the requested inode.
1274 * 2) allocate the next available inode after the requested
1275 * inode in the specified cylinder group.
1278 ffs_nodealloccg(struct inode
*ip
, int cg
, daddr_t ipref
, int mode
, int flags
)
1280 struct ufsmount
*ump
= ip
->i_ump
;
1281 struct fs
*fs
= ip
->i_fs
;
1283 struct buf
*bp
, *ibp
;
1285 int error
, start
, len
, loc
, map
, i
;
1288 struct ufs2_dinode
*dp2
;
1289 const int needswap
= UFS_FSNEEDSWAP(fs
);
1291 KASSERT(mutex_owned(&ump
->um_lock
));
1292 UFS_WAPBL_JLOCK_ASSERT(ip
->i_ump
->um_mountp
);
1294 if (fs
->fs_cs(fs
, cg
).cs_nifree
== 0)
1296 mutex_exit(&ump
->um_lock
);
1300 error
= bread(ip
->i_devvp
, FFS_FSBTODB(fs
, cgtod(fs
, cg
)),
1301 (int)fs
->fs_cgsize
, NOCRED
, B_MODIFY
, &bp
);
1304 cgp
= (struct cg
*)bp
->b_data
;
1305 if (!cg_chkmagic(cgp
, needswap
) || cgp
->cg_cs
.cs_nifree
== 0)
1309 initediblk
!= ufs_rw32(cgp
->cg_initediblk
, needswap
)) {
1310 /* Another thread allocated more inodes so we retry the test. */
1315 * Check to see if we need to initialize more inodes.
1317 if (fs
->fs_magic
== FS_UFS2_MAGIC
&& ibp
== NULL
) {
1318 initediblk
= ufs_rw32(cgp
->cg_initediblk
, needswap
);
1319 nalloc
= fs
->fs_ipg
- ufs_rw32(cgp
->cg_cs
.cs_nifree
, needswap
);
1320 if (nalloc
+ FFS_INOPB(fs
) > initediblk
&&
1321 initediblk
< ufs_rw32(cgp
->cg_niblk
, needswap
)) {
1323 * We have to release the cg buffer here to prevent
1324 * a deadlock when reading the inode block will
1325 * run a copy-on-write that might use this cg.
1329 error
= ffs_getblk(ip
->i_devvp
, FFS_FSBTODB(fs
,
1330 ino_to_fsba(fs
, cg
* fs
->fs_ipg
+ initediblk
)),
1331 FFS_NOBLK
, fs
->fs_bsize
, false, &ibp
);
1338 cgp
->cg_old_time
= ufs_rw32(time_second
, needswap
);
1339 if ((fs
->fs_magic
!= FS_UFS1_MAGIC
) ||
1340 (fs
->fs_old_flags
& FS_FLAGS_UPDATED
))
1341 cgp
->cg_time
= ufs_rw64(time_second
, needswap
);
1342 inosused
= cg_inosused(cgp
, needswap
);
1344 ipref
%= fs
->fs_ipg
;
1345 if (isclr(inosused
, ipref
))
1348 start
= ufs_rw32(cgp
->cg_irotor
, needswap
) / NBBY
;
1349 len
= howmany(fs
->fs_ipg
- ufs_rw32(cgp
->cg_irotor
, needswap
),
1351 loc
= skpc(0xff, len
, &inosused
[start
]);
1355 loc
= skpc(0xff, len
, &inosused
[0]);
1357 printf("cg = %d, irotor = %d, fs = %s\n",
1358 cg
, ufs_rw32(cgp
->cg_irotor
, needswap
),
1360 panic("ffs_nodealloccg: map corrupted");
1364 i
= start
+ len
- loc
;
1365 map
= inosused
[i
] ^ 0xff;
1367 printf("fs = %s\n", fs
->fs_fsmnt
);
1368 panic("ffs_nodealloccg: block not in map");
1370 ipref
= i
* NBBY
+ ffs(map
) - 1;
1371 cgp
->cg_irotor
= ufs_rw32(ipref
, needswap
);
1373 UFS_WAPBL_REGISTER_INODE(ip
->i_ump
->um_mountp
, cg
* fs
->fs_ipg
+ ipref
,
1376 * Check to see if we need to initialize more inodes.
1379 KASSERT(initediblk
== ufs_rw32(cgp
->cg_initediblk
, needswap
));
1380 memset(ibp
->b_data
, 0, fs
->fs_bsize
);
1381 dp2
= (struct ufs2_dinode
*)(ibp
->b_data
);
1382 for (i
= 0; i
< FFS_INOPB(fs
); i
++) {
1384 * Don't bother to swap, it's supposed to be
1385 * random, after all.
1387 dp2
->di_gen
= (cprng_fast32() & INT32_MAX
) / 2 + 1;
1390 initediblk
+= FFS_INOPB(fs
);
1391 cgp
->cg_initediblk
= ufs_rw32(initediblk
, needswap
);
1394 mutex_enter(&ump
->um_lock
);
1395 ACTIVECG_CLR(fs
, cg
);
1396 setbit(inosused
, ipref
);
1397 ufs_add32(cgp
->cg_cs
.cs_nifree
, -1, needswap
);
1398 fs
->fs_cstotal
.cs_nifree
--;
1399 fs
->fs_cs(fs
, cg
).cs_nifree
--;
1401 if ((mode
& IFMT
) == IFDIR
) {
1402 ufs_add32(cgp
->cg_cs
.cs_ndir
, 1, needswap
);
1403 fs
->fs_cstotal
.cs_ndir
++;
1404 fs
->fs_cs(fs
, cg
).cs_ndir
++;
1406 mutex_exit(&ump
->um_lock
);
1412 return (cg
* fs
->fs_ipg
+ ipref
);
1418 mutex_enter(&ump
->um_lock
);
1423 * Allocate a block or fragment.
1425 * The specified block or fragment is removed from the
1426 * free map, possibly fragmenting a block in the process.
1428 * This implementation should mirror fs_blkfree
1430 * => um_lock not held on entry or exit
1433 ffs_blkalloc(struct inode
*ip
, daddr_t bno
, long size
)
1437 error
= ffs_check_bad_allocation(__func__
, ip
->i_fs
, bno
, size
,
1438 ip
->i_dev
, ip
->i_uid
);
1442 return ffs_blkalloc_ump(ip
->i_ump
, bno
, size
);
1446 ffs_blkalloc_ump(struct ufsmount
*ump
, daddr_t bno
, long size
)
1448 struct fs
*fs
= ump
->um_fs
;
1451 int32_t fragno
, cgbno
;
1452 int i
, error
, cg
, blk
, frags
, bbase
;
1454 const int needswap
= UFS_FSNEEDSWAP(fs
);
1456 KASSERT((u_int
)size
<= fs
->fs_bsize
&& ffs_fragoff(fs
, size
) == 0 &&
1457 ffs_fragnum(fs
, bno
) + ffs_numfrags(fs
, size
) <= fs
->fs_frag
);
1458 KASSERT(bno
< fs
->fs_size
);
1461 error
= bread(ump
->um_devvp
, FFS_FSBTODB(fs
, cgtod(fs
, cg
)),
1462 (int)fs
->fs_cgsize
, NOCRED
, B_MODIFY
, &bp
);
1466 cgp
= (struct cg
*)bp
->b_data
;
1467 if (!cg_chkmagic(cgp
, needswap
)) {
1471 cgp
->cg_old_time
= ufs_rw32(time_second
, needswap
);
1472 cgp
->cg_time
= ufs_rw64(time_second
, needswap
);
1473 cgbno
= dtogd(fs
, bno
);
1474 blksfree
= cg_blksfree(cgp
, needswap
);
1476 mutex_enter(&ump
->um_lock
);
1477 if (size
== fs
->fs_bsize
) {
1478 fragno
= ffs_fragstoblks(fs
, cgbno
);
1479 if (!ffs_isblock(fs
, blksfree
, fragno
)) {
1480 mutex_exit(&ump
->um_lock
);
1484 ffs_clrblock(fs
, blksfree
, fragno
);
1485 ffs_clusteracct(fs
, cgp
, fragno
, -1);
1486 ufs_add32(cgp
->cg_cs
.cs_nbfree
, -1, needswap
);
1487 fs
->fs_cstotal
.cs_nbfree
--;
1488 fs
->fs_cs(fs
, cg
).cs_nbfree
--;
1490 bbase
= cgbno
- ffs_fragnum(fs
, cgbno
);
1492 frags
= ffs_numfrags(fs
, size
);
1493 for (i
= 0; i
< frags
; i
++) {
1494 if (isclr(blksfree
, cgbno
+ i
)) {
1495 mutex_exit(&ump
->um_lock
);
1501 * if a complete block is being split, account for it
1503 fragno
= ffs_fragstoblks(fs
, bbase
);
1504 if (ffs_isblock(fs
, blksfree
, fragno
)) {
1505 ufs_add32(cgp
->cg_cs
.cs_nffree
, fs
->fs_frag
, needswap
);
1506 fs
->fs_cstotal
.cs_nffree
+= fs
->fs_frag
;
1507 fs
->fs_cs(fs
, cg
).cs_nffree
+= fs
->fs_frag
;
1508 ffs_clusteracct(fs
, cgp
, fragno
, -1);
1509 ufs_add32(cgp
->cg_cs
.cs_nbfree
, -1, needswap
);
1510 fs
->fs_cstotal
.cs_nbfree
--;
1511 fs
->fs_cs(fs
, cg
).cs_nbfree
--;
1514 * decrement the counts associated with the old frags
1516 blk
= blkmap(fs
, blksfree
, bbase
);
1517 ffs_fragacct(fs
, blk
, cgp
->cg_frsum
, -1, needswap
);
1519 * allocate the fragment
1521 for (i
= 0; i
< frags
; i
++) {
1522 clrbit(blksfree
, cgbno
+ i
);
1524 ufs_add32(cgp
->cg_cs
.cs_nffree
, -i
, needswap
);
1525 fs
->fs_cstotal
.cs_nffree
-= i
;
1526 fs
->fs_cs(fs
, cg
).cs_nffree
-= i
;
1528 * add back in counts associated with the new frags
1530 blk
= blkmap(fs
, blksfree
, bbase
);
1531 ffs_fragacct(fs
, blk
, cgp
->cg_frsum
, 1, needswap
);
1534 ACTIVECG_CLR(fs
, cg
);
1535 mutex_exit(&ump
->um_lock
);
1541 * Free a block or fragment.
1543 * The specified block or fragment is placed back in the
1544 * free map. If a fragment is deallocated, a possible
1545 * block reassembly is checked.
1547 * => um_lock not held on entry or exit
1550 ffs_blkfree_cg(struct fs
*fs
, struct vnode
*devvp
, daddr_t bno
, long size
)
1554 struct ufsmount
*ump
;
1558 const bool devvp_is_snapshot
= (devvp
->v_type
!= VBLK
);
1559 const int needswap
= UFS_FSNEEDSWAP(fs
);
1561 KASSERT(!devvp_is_snapshot
);
1564 dev
= devvp
->v_rdev
;
1565 ump
= VFSTOUFS(spec_node_getmountedfs(devvp
));
1566 KASSERT(fs
== ump
->um_fs
);
1567 cgblkno
= FFS_FSBTODB(fs
, cgtod(fs
, cg
));
1569 error
= bread(devvp
, cgblkno
, (int)fs
->fs_cgsize
,
1570 NOCRED
, B_MODIFY
, &bp
);
1574 cgp
= (struct cg
*)bp
->b_data
;
1575 if (!cg_chkmagic(cgp
, needswap
)) {
1580 ffs_blkfree_common(ump
, fs
, dev
, bp
, bno
, size
, devvp_is_snapshot
);
1585 struct discardopdata
{
1586 struct work wk
; /* must be first */
1587 struct vnode
*devvp
;
1592 struct discarddata
{
1594 struct discardopdata
*entry
;
1597 struct workqueue
*wq
;
1598 int wqcnt
, wqdraining
;
1601 /* timer for flush? */
1605 ffs_blkfree_td(struct fs
*fs
, struct discardopdata
*td
)
1610 todo
= min(td
->size
,
1611 ffs_lfragtosize(fs
, (fs
->fs_frag
- ffs_fragnum(fs
, td
->bno
))));
1612 ffs_blkfree_cg(fs
, td
->devvp
, td
->bno
, todo
);
1613 td
->bno
+= ffs_numfrags(fs
, todo
);
1619 ffs_discardcb(struct work
*wk
, void *arg
)
1621 struct discardopdata
*td
= (void *)wk
;
1622 struct discarddata
*ts
= arg
;
1623 struct fs
*fs
= ts
->fs
;
1624 struct disk_discard_range ta
;
1629 ta
.bno
= FFS_FSBTODB(fs
, td
->bno
);
1630 ta
.size
= td
->size
>> DEV_BSHIFT
;
1634 VOP_IOCTL(td
->devvp
, DIOCDISCARD
, &ta
, FWRITE
, FSCRED
);
1636 printf("trim(%" PRId64
",%ld):%d\n", td
->bno
, td
->size
, error
);
1639 ffs_blkfree_td(fs
, td
);
1640 kmem_free(td
, sizeof(*td
));
1641 mutex_enter(&ts
->wqlk
);
1643 if (ts
->wqdraining
&& !ts
->wqcnt
)
1644 cv_signal(&ts
->wqcv
);
1645 mutex_exit(&ts
->wqlk
);
1649 ffs_discard_init(struct vnode
*devvp
, struct fs
*fs
)
1651 struct disk_discard_params tp
;
1652 struct discarddata
*ts
;
1655 error
= VOP_IOCTL(devvp
, DIOCGDISCARDPARAMS
, &tp
, FREAD
, FSCRED
);
1657 printf("DIOCGDISCARDPARAMS: %d\n", error
);
1660 if (tp
.maxsize
* DEV_BSIZE
< fs
->fs_bsize
) {
1661 printf("tp.maxsize=%ld, fs_bsize=%d\n", tp
.maxsize
, fs
->fs_bsize
);
1665 ts
= kmem_zalloc(sizeof (*ts
), KM_SLEEP
);
1666 error
= workqueue_create(&ts
->wq
, "trimwq", ffs_discardcb
, ts
,
1669 kmem_free(ts
, sizeof (*ts
));
1672 mutex_init(&ts
->entrylk
, MUTEX_DEFAULT
, IPL_NONE
);
1673 mutex_init(&ts
->wqlk
, MUTEX_DEFAULT
, IPL_NONE
);
1674 cv_init(&ts
->wqcv
, "trimwqcv");
1675 ts
->maxsize
= max(tp
.maxsize
* DEV_BSIZE
, 100*1024); /* XXX */
1681 ffs_discard_finish(void *vts
, int flags
)
1683 struct discarddata
*ts
= vts
;
1684 struct discardopdata
*td
= NULL
;
1687 /* wait for workqueue to drain */
1688 mutex_enter(&ts
->wqlk
);
1691 res
= cv_timedwait(&ts
->wqcv
, &ts
->wqlk
, mstohz(5000));
1693 mutex_exit(&ts
->wqlk
);
1695 printf("ffs_discarddata drain timeout\n");
1697 mutex_enter(&ts
->entrylk
);
1702 mutex_exit(&ts
->entrylk
);
1704 /* XXX don't tell disk, its optional */
1705 ffs_blkfree_td(ts
->fs
, td
);
1707 printf("finish(%" PRId64
",%ld)\n", td
->bno
, td
->size
);
1709 kmem_free(td
, sizeof(*td
));
1712 cv_destroy(&ts
->wqcv
);
1713 mutex_destroy(&ts
->entrylk
);
1714 mutex_destroy(&ts
->wqlk
);
1715 workqueue_destroy(ts
->wq
);
1716 kmem_free(ts
, sizeof(*ts
));
1720 ffs_blkfree(struct fs
*fs
, struct vnode
*devvp
, daddr_t bno
, long size
,
1723 struct ufsmount
*ump
;
1726 struct discarddata
*ts
;
1727 struct discardopdata
*td
;
1729 dev
= devvp
->v_rdev
;
1730 ump
= VFSTOUFS(spec_node_getmountedfs(devvp
));
1731 if (ffs_snapblkfree(fs
, devvp
, bno
, size
, inum
))
1734 error
= ffs_check_bad_allocation(__func__
, fs
, bno
, size
, dev
, inum
);
1738 if (!ump
->um_discarddata
) {
1739 ffs_blkfree_cg(fs
, devvp
, bno
, size
);
1744 printf("blkfree(%" PRId64
",%ld)\n", bno
, size
);
1746 ts
= ump
->um_discarddata
;
1749 mutex_enter(&ts
->entrylk
);
1752 /* ffs deallocs backwards, check for prepend only */
1753 if (td
->bno
== bno
+ ffs_numfrags(fs
, size
)
1754 && td
->size
+ size
<= ts
->maxsize
) {
1757 if (td
->size
< ts
->maxsize
) {
1759 printf("defer(%" PRId64
",%ld)\n", td
->bno
, td
->size
);
1761 mutex_exit(&ts
->entrylk
);
1764 size
= 0; /* mark done */
1768 mutex_exit(&ts
->entrylk
);
1772 printf("enq old(%" PRId64
",%ld)\n", td
->bno
, td
->size
);
1774 mutex_enter(&ts
->wqlk
);
1776 mutex_exit(&ts
->wqlk
);
1777 workqueue_enqueue(ts
->wq
, &td
->wk
, NULL
);
1782 td
= kmem_alloc(sizeof(*td
), KM_SLEEP
);
1787 if (td
->size
< ts
->maxsize
) { /* XXX always the case */
1788 mutex_enter(&ts
->entrylk
);
1789 if (!ts
->entry
) { /* possible race? */
1791 printf("defer(%" PRId64
",%ld)\n", td
->bno
, td
->size
);
1796 mutex_exit(&ts
->entrylk
);
1800 printf("enq new(%" PRId64
",%ld)\n", td
->bno
, td
->size
);
1802 mutex_enter(&ts
->wqlk
);
1804 mutex_exit(&ts
->wqlk
);
1805 workqueue_enqueue(ts
->wq
, &td
->wk
, NULL
);
1810 * Free a block or fragment from a snapshot cg copy.
1812 * The specified block or fragment is placed back in the
1813 * free map. If a fragment is deallocated, a possible
1814 * block reassembly is checked.
1816 * => um_lock not held on entry or exit
1819 ffs_blkfree_snap(struct fs
*fs
, struct vnode
*devvp
, daddr_t bno
, long size
,
1824 struct ufsmount
*ump
;
1828 const bool devvp_is_snapshot
= (devvp
->v_type
!= VBLK
);
1829 const int needswap
= UFS_FSNEEDSWAP(fs
);
1831 KASSERT(devvp_is_snapshot
);
1834 dev
= VTOI(devvp
)->i_devvp
->v_rdev
;
1835 ump
= VFSTOUFS(devvp
->v_mount
);
1836 cgblkno
= ffs_fragstoblks(fs
, cgtod(fs
, cg
));
1838 error
= ffs_check_bad_allocation(__func__
, fs
, bno
, size
, dev
, inum
);
1842 error
= bread(devvp
, cgblkno
, (int)fs
->fs_cgsize
,
1843 NOCRED
, B_MODIFY
, &bp
);
1847 cgp
= (struct cg
*)bp
->b_data
;
1848 if (!cg_chkmagic(cgp
, needswap
)) {
1853 ffs_blkfree_common(ump
, fs
, dev
, bp
, bno
, size
, devvp_is_snapshot
);
1859 ffs_blkfree_common(struct ufsmount
*ump
, struct fs
*fs
, dev_t dev
,
1860 struct buf
*bp
, daddr_t bno
, long size
, bool devvp_is_snapshot
)
1863 int32_t fragno
, cgbno
;
1864 int i
, cg
, blk
, frags
, bbase
;
1866 const int needswap
= UFS_FSNEEDSWAP(fs
);
1869 cgp
= (struct cg
*)bp
->b_data
;
1870 cgp
->cg_old_time
= ufs_rw32(time_second
, needswap
);
1871 if ((fs
->fs_magic
!= FS_UFS1_MAGIC
) ||
1872 (fs
->fs_old_flags
& FS_FLAGS_UPDATED
))
1873 cgp
->cg_time
= ufs_rw64(time_second
, needswap
);
1874 cgbno
= dtogd(fs
, bno
);
1875 blksfree
= cg_blksfree(cgp
, needswap
);
1876 mutex_enter(&ump
->um_lock
);
1877 if (size
== fs
->fs_bsize
) {
1878 fragno
= ffs_fragstoblks(fs
, cgbno
);
1879 if (!ffs_isfreeblock(fs
, blksfree
, fragno
)) {
1880 if (devvp_is_snapshot
) {
1881 mutex_exit(&ump
->um_lock
);
1884 printf("dev = 0x%llx, block = %" PRId64
", fs = %s\n",
1885 (unsigned long long)dev
, bno
, fs
->fs_fsmnt
);
1886 panic("blkfree: freeing free block");
1888 ffs_setblock(fs
, blksfree
, fragno
);
1889 ffs_clusteracct(fs
, cgp
, fragno
, 1);
1890 ufs_add32(cgp
->cg_cs
.cs_nbfree
, 1, needswap
);
1891 fs
->fs_cstotal
.cs_nbfree
++;
1892 fs
->fs_cs(fs
, cg
).cs_nbfree
++;
1893 if ((fs
->fs_magic
== FS_UFS1_MAGIC
) &&
1894 ((fs
->fs_old_flags
& FS_FLAGS_UPDATED
) == 0)) {
1895 i
= old_cbtocylno(fs
, cgbno
);
1897 KASSERT(i
< fs
->fs_old_ncyl
);
1898 KASSERT(old_cbtorpos(fs
, cgbno
) >= 0);
1899 KASSERT(fs
->fs_old_nrpos
== 0 || old_cbtorpos(fs
, cgbno
) < fs
->fs_old_nrpos
);
1900 ufs_add16(old_cg_blks(fs
, cgp
, i
, needswap
)[old_cbtorpos(fs
, cgbno
)], 1,
1902 ufs_add32(old_cg_blktot(cgp
, needswap
)[i
], 1, needswap
);
1905 bbase
= cgbno
- ffs_fragnum(fs
, cgbno
);
1907 * decrement the counts associated with the old frags
1909 blk
= blkmap(fs
, blksfree
, bbase
);
1910 ffs_fragacct(fs
, blk
, cgp
->cg_frsum
, -1, needswap
);
1912 * deallocate the fragment
1914 frags
= ffs_numfrags(fs
, size
);
1915 for (i
= 0; i
< frags
; i
++) {
1916 if (isset(blksfree
, cgbno
+ i
)) {
1917 printf("dev = 0x%llx, block = %" PRId64
1919 (unsigned long long)dev
, bno
+ i
,
1921 panic("blkfree: freeing free frag");
1923 setbit(blksfree
, cgbno
+ i
);
1925 ufs_add32(cgp
->cg_cs
.cs_nffree
, i
, needswap
);
1926 fs
->fs_cstotal
.cs_nffree
+= i
;
1927 fs
->fs_cs(fs
, cg
).cs_nffree
+= i
;
1929 * add back in counts associated with the new frags
1931 blk
= blkmap(fs
, blksfree
, bbase
);
1932 ffs_fragacct(fs
, blk
, cgp
->cg_frsum
, 1, needswap
);
1934 * if a complete block has been reassembled, account for it
1936 fragno
= ffs_fragstoblks(fs
, bbase
);
1937 if (ffs_isblock(fs
, blksfree
, fragno
)) {
1938 ufs_add32(cgp
->cg_cs
.cs_nffree
, -fs
->fs_frag
, needswap
);
1939 fs
->fs_cstotal
.cs_nffree
-= fs
->fs_frag
;
1940 fs
->fs_cs(fs
, cg
).cs_nffree
-= fs
->fs_frag
;
1941 ffs_clusteracct(fs
, cgp
, fragno
, 1);
1942 ufs_add32(cgp
->cg_cs
.cs_nbfree
, 1, needswap
);
1943 fs
->fs_cstotal
.cs_nbfree
++;
1944 fs
->fs_cs(fs
, cg
).cs_nbfree
++;
1945 if ((fs
->fs_magic
== FS_UFS1_MAGIC
) &&
1946 ((fs
->fs_old_flags
& FS_FLAGS_UPDATED
) == 0)) {
1947 i
= old_cbtocylno(fs
, bbase
);
1949 KASSERT(i
< fs
->fs_old_ncyl
);
1950 KASSERT(old_cbtorpos(fs
, bbase
) >= 0);
1951 KASSERT(fs
->fs_old_nrpos
== 0 || old_cbtorpos(fs
, bbase
) < fs
->fs_old_nrpos
);
1952 ufs_add16(old_cg_blks(fs
, cgp
, i
, needswap
)[old_cbtorpos(fs
,
1953 bbase
)], 1, needswap
);
1954 ufs_add32(old_cg_blktot(cgp
, needswap
)[i
], 1, needswap
);
1959 ACTIVECG_CLR(fs
, cg
);
1960 mutex_exit(&ump
->um_lock
);
1967 ffs_vfree(struct vnode
*vp
, ino_t ino
, int mode
)
1970 return ffs_freefile(vp
->v_mount
, ino
, mode
);
1974 * Do the actual free operation.
1975 * The specified inode is placed back in the free map.
1977 * => um_lock not held on entry or exit
1980 ffs_freefile(struct mount
*mp
, ino_t ino
, int mode
)
1982 struct ufsmount
*ump
= VFSTOUFS(mp
);
1983 struct fs
*fs
= ump
->um_fs
;
1984 struct vnode
*devvp
;
1990 const int needswap
= UFS_FSNEEDSWAP(fs
);
1992 cg
= ino_to_cg(fs
, ino
);
1993 devvp
= ump
->um_devvp
;
1994 dev
= devvp
->v_rdev
;
1995 cgbno
= FFS_FSBTODB(fs
, cgtod(fs
, cg
));
1997 if ((u_int
)ino
>= fs
->fs_ipg
* fs
->fs_ncg
)
1998 panic("ifree: range: dev = 0x%llx, ino = %llu, fs = %s",
1999 (long long)dev
, (unsigned long long)ino
, fs
->fs_fsmnt
);
2000 error
= bread(devvp
, cgbno
, (int)fs
->fs_cgsize
,
2001 NOCRED
, B_MODIFY
, &bp
);
2005 cgp
= (struct cg
*)bp
->b_data
;
2006 if (!cg_chkmagic(cgp
, needswap
)) {
2011 ffs_freefile_common(ump
, fs
, dev
, bp
, ino
, mode
, false);
2019 ffs_freefile_snap(struct fs
*fs
, struct vnode
*devvp
, ino_t ino
, int mode
)
2021 struct ufsmount
*ump
;
2027 const int needswap
= UFS_FSNEEDSWAP(fs
);
2029 KASSERT(devvp
->v_type
!= VBLK
);
2031 cg
= ino_to_cg(fs
, ino
);
2032 dev
= VTOI(devvp
)->i_devvp
->v_rdev
;
2033 ump
= VFSTOUFS(devvp
->v_mount
);
2034 cgbno
= ffs_fragstoblks(fs
, cgtod(fs
, cg
));
2035 if ((u_int
)ino
>= fs
->fs_ipg
* fs
->fs_ncg
)
2036 panic("ifree: range: dev = 0x%llx, ino = %llu, fs = %s",
2037 (unsigned long long)dev
, (unsigned long long)ino
,
2039 error
= bread(devvp
, cgbno
, (int)fs
->fs_cgsize
,
2040 NOCRED
, B_MODIFY
, &bp
);
2044 cgp
= (struct cg
*)bp
->b_data
;
2045 if (!cg_chkmagic(cgp
, needswap
)) {
2049 ffs_freefile_common(ump
, fs
, dev
, bp
, ino
, mode
, true);
2057 ffs_freefile_common(struct ufsmount
*ump
, struct fs
*fs
, dev_t dev
,
2058 struct buf
*bp
, ino_t ino
, int mode
, bool devvp_is_snapshot
)
2063 const int needswap
= UFS_FSNEEDSWAP(fs
);
2065 cg
= ino_to_cg(fs
, ino
);
2066 cgp
= (struct cg
*)bp
->b_data
;
2067 cgp
->cg_old_time
= ufs_rw32(time_second
, needswap
);
2068 if ((fs
->fs_magic
!= FS_UFS1_MAGIC
) ||
2069 (fs
->fs_old_flags
& FS_FLAGS_UPDATED
))
2070 cgp
->cg_time
= ufs_rw64(time_second
, needswap
);
2071 inosused
= cg_inosused(cgp
, needswap
);
2073 if (isclr(inosused
, ino
)) {
2074 printf("ifree: dev = 0x%llx, ino = %llu, fs = %s\n",
2075 (unsigned long long)dev
, (unsigned long long)ino
+
2076 cg
* fs
->fs_ipg
, fs
->fs_fsmnt
);
2077 if (fs
->fs_ronly
== 0)
2078 panic("ifree: freeing free inode");
2080 clrbit(inosused
, ino
);
2081 if (!devvp_is_snapshot
)
2082 UFS_WAPBL_UNREGISTER_INODE(ump
->um_mountp
,
2083 ino
+ cg
* fs
->fs_ipg
, mode
);
2084 if (ino
< ufs_rw32(cgp
->cg_irotor
, needswap
))
2085 cgp
->cg_irotor
= ufs_rw32(ino
, needswap
);
2086 ufs_add32(cgp
->cg_cs
.cs_nifree
, 1, needswap
);
2087 mutex_enter(&ump
->um_lock
);
2088 fs
->fs_cstotal
.cs_nifree
++;
2089 fs
->fs_cs(fs
, cg
).cs_nifree
++;
2090 if ((mode
& IFMT
) == IFDIR
) {
2091 ufs_add32(cgp
->cg_cs
.cs_ndir
, -1, needswap
);
2092 fs
->fs_cstotal
.cs_ndir
--;
2093 fs
->fs_cs(fs
, cg
).cs_ndir
--;
2096 ACTIVECG_CLR(fs
, cg
);
2097 mutex_exit(&ump
->um_lock
);
2101 * Check to see if a file is free.
2104 ffs_checkfreefile(struct fs
*fs
, struct vnode
*devvp
, ino_t ino
)
2111 const bool devvp_is_snapshot
= (devvp
->v_type
!= VBLK
);
2113 KASSERT(devvp_is_snapshot
);
2115 cg
= ino_to_cg(fs
, ino
);
2116 if (devvp_is_snapshot
)
2117 cgbno
= ffs_fragstoblks(fs
, cgtod(fs
, cg
));
2119 cgbno
= FFS_FSBTODB(fs
, cgtod(fs
, cg
));
2120 if ((u_int
)ino
>= fs
->fs_ipg
* fs
->fs_ncg
)
2122 if (bread(devvp
, cgbno
, (int)fs
->fs_cgsize
, NOCRED
, 0, &bp
)) {
2125 cgp
= (struct cg
*)bp
->b_data
;
2126 if (!cg_chkmagic(cgp
, UFS_FSNEEDSWAP(fs
))) {
2130 inosused
= cg_inosused(cgp
, UFS_FSNEEDSWAP(fs
));
2132 ret
= isclr(inosused
, ino
);
2138 * Find a block of the specified size in the specified cylinder group.
2140 * It is a panic if a request is made to find a block if none are
2144 ffs_mapsearch(struct fs
*fs
, struct cg
*cgp
, daddr_t bpref
, int allocsiz
)
2147 int start
, len
, loc
, i
;
2148 int blk
, field
, subfield
, pos
;
2151 const int needswap
= UFS_FSNEEDSWAP(fs
);
2153 /* KASSERT(mutex_owned(&ump->um_lock)); */
2156 * find the fragment by searching through the free block
2157 * map for an appropriate bit pattern
2160 start
= dtogd(fs
, bpref
) / NBBY
;
2162 start
= ufs_rw32(cgp
->cg_frotor
, needswap
) / NBBY
;
2163 blksfree
= cg_blksfree(cgp
, needswap
);
2164 len
= howmany(fs
->fs_fpg
, NBBY
) - start
;
2167 loc
= scanc((u_int
)len
,
2168 (const u_char
*)&blksfree
[start
],
2169 (const u_char
*)fragtbl
[fs
->fs_frag
],
2170 (1 << (allocsiz
- 1 + (fs
->fs_frag
& (NBBY
- 1)))));
2174 loc
= scanc((u_int
)len
,
2175 (const u_char
*)&blksfree
[0],
2176 (const u_char
*)fragtbl
[fs
->fs_frag
],
2177 (1 << (allocsiz
- 1 + (fs
->fs_frag
& (NBBY
- 1)))));
2179 printf("start = %d, len = %d, fs = %s\n",
2180 ostart
, olen
, fs
->fs_fsmnt
);
2181 printf("offset=%d %ld\n",
2182 ufs_rw32(cgp
->cg_freeoff
, needswap
),
2183 (long)blksfree
- (long)cgp
);
2184 printf("cg %d\n", cgp
->cg_cgx
);
2185 panic("ffs_alloccg: map corrupted");
2189 bno
= (start
+ len
- loc
) * NBBY
;
2190 cgp
->cg_frotor
= ufs_rw32(bno
, needswap
);
2192 * found the byte in the map
2193 * sift through the bits to find the selected frag
2195 for (i
= bno
+ NBBY
; bno
< i
; bno
+= fs
->fs_frag
) {
2196 blk
= blkmap(fs
, blksfree
, bno
);
2198 field
= around
[allocsiz
];
2199 subfield
= inside
[allocsiz
];
2200 for (pos
= 0; pos
<= fs
->fs_frag
- allocsiz
; pos
++) {
2201 if ((blk
& field
) == subfield
)
2207 printf("bno = %d, fs = %s\n", bno
, fs
->fs_fsmnt
);
2208 panic("ffs_alloccg: block not in map");
2213 * Fserr prints the name of a file system with an error diagnostic.
2215 * The form of the error message is:
2219 ffs_fserr(struct fs
*fs
, u_int uid
, const char *cp
)
2222 log(LOG_ERR
, "uid %d, pid %d, command %s, on %s: %s\n",
2223 uid
, curproc
->p_pid
, curproc
->p_comm
, fs
->fs_fsmnt
, cp
);