dmake: do not set MAKEFLAGS=k
[unleashed/tickless.git] / kernel / fs / ufs / ufs_alloc.c
blobbcd3cf0571372eb2e587071e8e2e21cd96fa8bda
1 /*
2 * CDDL HEADER START
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
19 * CDDL HEADER END
22 * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
23 * Use is subject to license terms.
26 /* Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T */
27 /* All Rights Reserved */
30 * University Copyright- Copyright (c) 1982, 1986, 1988
31 * The Regents of the University of California
32 * All Rights Reserved
34 * University Acknowledgment- Portions of this document are derived from
35 * software developed by the University of California, Berkeley, and its
36 * contributors.
39 #include <sys/condvar_impl.h>
40 #include <sys/types.h>
41 #include <sys/t_lock.h>
42 #include <sys/debug.h>
43 #include <sys/param.h>
44 #include <sys/systm.h>
45 #include <sys/signal.h>
46 #include <sys/cred.h>
47 #include <sys/proc.h>
48 #include <sys/disp.h>
49 #include <sys/user.h>
50 #include <sys/buf.h>
51 #include <sys/vfs.h>
52 #include <sys/vnode.h>
53 #include <sys/acl.h>
54 #include <sys/fs/ufs_fs.h>
55 #include <sys/fs/ufs_inode.h>
56 #include <sys/fs/ufs_acl.h>
57 #include <sys/fs/ufs_bio.h>
58 #include <sys/fs/ufs_quota.h>
59 #include <sys/kmem.h>
60 #include <sys/fs/ufs_trans.h>
61 #include <sys/fs/ufs_panic.h>
62 #include <sys/errno.h>
63 #include <sys/time.h>
64 #include <sys/sysmacros.h>
65 #include <sys/file.h>
66 #include <sys/fcntl.h>
67 #include <sys/flock.h>
68 #include <sys/fs_subr.h>
69 #include <sys/cmn_err.h>
70 #include <sys/policy.h>
71 #include <sys/fs/ufs_log.h>
73 static ino_t hashalloc();
74 static daddr_t fragextend();
75 static daddr_t alloccg();
76 static daddr_t alloccgblk();
77 static ino_t ialloccg();
78 static daddr_t mapsearch();
79 static int findlogstartcg();
81 extern int inside[], around[];
82 extern uchar_t *fragtbl[];
83 void delay();
86 * Allocate a block in the file system.
88 * The size of the requested block is given, which must be some
89 * multiple of fs_fsize and <= fs_bsize.
90 * A preference may be optionally specified. If a preference is given
91 * the following hierarchy is used to allocate a block:
92 * 1) allocate the requested block.
93 * 2) allocate a rotationally optimal block in the same cylinder.
94 * 3) allocate a block in the same cylinder group.
95 * 4) quadratically rehash into other cylinder groups, until an
96 * available block is located.
97 * If no block preference is given the following hierarchy is used
98 * to allocate a block:
99 * 1) allocate a block in the cylinder group that contains the
100 * inode for the file.
101 * 2) quadratically rehash into other cylinder groups, until an
102 * available block is located.
105 alloc(struct inode *ip, daddr_t bpref, int size, daddr_t *bnp, cred_t *cr)
107 struct fs *fs;
108 struct ufsvfs *ufsvfsp;
109 daddr_t bno;
110 int cg;
111 int err;
112 char *errmsg = NULL;
113 size_t len;
114 clock_t now;
116 ufsvfsp = ip->i_ufsvfs;
117 fs = ufsvfsp->vfs_fs;
118 if ((unsigned)size > fs->fs_bsize || fragoff(fs, size) != 0) {
119 err = ufs_fault(ITOV(ip), "alloc: bad size, dev = 0x%lx,"
120 " bsize = %d, size = %d, fs = %s\n",
121 ip->i_dev, fs->fs_bsize, size, fs->fs_fsmnt);
122 return (err);
124 if (size == fs->fs_bsize && fs->fs_cstotal.cs_nbfree == 0)
125 goto nospace;
126 if (freespace(fs, ufsvfsp) <= 0 &&
127 secpolicy_fs_minfree(cr, ufsvfsp->vfs_vfs) != 0)
128 goto nospace;
129 err = chkdq(ip, (long)btodb(size), 0, cr, &errmsg, &len);
130 /* Note that may not have err, but may have errmsg */
131 if (errmsg != NULL) {
132 uprintf(errmsg);
133 kmem_free(errmsg, len);
134 errmsg = NULL;
136 if (err)
137 return (err);
138 if (bpref >= fs->fs_size)
139 bpref = 0;
140 if (bpref == 0)
141 cg = (int)itog(fs, ip->i_number);
142 else
143 cg = dtog(fs, bpref);
145 bno = (daddr_t)hashalloc(ip, cg, (long)bpref, size,
146 (ulong_t (*)())alloccg);
147 if (bno > 0) {
148 *bnp = bno;
149 return (0);
153 * hashalloc() failed because some other thread grabbed
154 * the last block so unwind the quota operation. We can
155 * ignore the return because subtractions don't fail and
156 * size is guaranteed to be >= zero by our caller.
158 (void) chkdq(ip, -(long)btodb(size), 0, cr, (char **)NULL, NULL);
160 nospace:
161 now = ddi_get_lbolt();
162 mutex_enter(&ufsvfsp->vfs_lock);
163 if ((now - ufsvfsp->vfs_lastwhinetime) > (hz << 2) &&
164 (!(TRANS_ISTRANS(ufsvfsp)) || !(ip->i_flag & IQUIET))) {
165 ufsvfsp->vfs_lastwhinetime = now;
166 cmn_err(CE_NOTE, "alloc: %s: file system full", fs->fs_fsmnt);
168 mutex_exit(&ufsvfsp->vfs_lock);
169 return (ENOSPC);
173 * Reallocate a fragment to a bigger size
175 * The number and size of the old block is given, and a preference
176 * and new size is also specified. The allocator attempts to extend
177 * the original block. Failing that, the regular block allocator is
178 * invoked to get an appropriate block.
181 realloccg(struct inode *ip, daddr_t bprev, daddr_t bpref, int osize,
182 int nsize, daddr_t *bnp, cred_t *cr)
184 daddr_t bno;
185 struct fs *fs;
186 struct ufsvfs *ufsvfsp;
187 int cg, request;
188 int err;
189 char *errmsg = NULL;
190 size_t len;
191 clock_t now;
193 ufsvfsp = ip->i_ufsvfs;
194 fs = ufsvfsp->vfs_fs;
195 if ((unsigned)osize > fs->fs_bsize || fragoff(fs, osize) != 0 ||
196 (unsigned)nsize > fs->fs_bsize || fragoff(fs, nsize) != 0) {
197 err = ufs_fault(ITOV(ip),
198 "realloccg: bad size, dev=0x%lx, bsize=%d, "
199 "osize=%d, nsize=%d, fs=%s\n",
200 ip->i_dev, fs->fs_bsize, osize, nsize, fs->fs_fsmnt);
201 return (err);
203 if (freespace(fs, ufsvfsp) <= 0 &&
204 secpolicy_fs_minfree(cr, ufsvfsp->vfs_vfs) != 0)
205 goto nospace;
206 if (bprev == 0) {
207 err = ufs_fault(ITOV(ip),
208 "realloccg: bad bprev, dev = 0x%lx, bsize = %d,"
209 " bprev = %ld, fs = %s\n", ip->i_dev, fs->fs_bsize, bprev,
210 fs->fs_fsmnt);
211 return (err);
213 err = chkdq(ip, (long)btodb(nsize - osize), 0, cr, &errmsg, &len);
214 /* Note that may not have err, but may have errmsg */
215 if (errmsg != NULL) {
216 uprintf(errmsg);
217 kmem_free(errmsg, len);
218 errmsg = NULL;
220 if (err)
221 return (err);
222 cg = dtog(fs, bprev);
223 bno = fragextend(ip, cg, (long)bprev, osize, nsize);
224 if (bno != 0) {
225 *bnp = bno;
226 return (0);
228 if (bpref >= fs->fs_size)
229 bpref = 0;
232 * When optimizing for time we allocate a full block and
233 * then only use the upper portion for this request. When
234 * this file grows again it will grow into the unused portion
235 * of the block (See fragextend() above). This saves time
236 * because an extra disk write would be needed if the frags
237 * following the current allocation were not free. The extra
238 * disk write is needed to move the data from its current
239 * location into the newly allocated position.
241 * When optimizing for space we allocate a run of frags
242 * that is just the right size for this request.
244 request = (fs->fs_optim == FS_OPTTIME) ? fs->fs_bsize : nsize;
245 bno = (daddr_t)hashalloc(ip, cg, (long)bpref, request,
246 (ulong_t (*)())alloccg);
247 if (bno > 0) {
248 *bnp = bno;
249 if (nsize < request)
250 (void) free(ip, bno + numfrags(fs, nsize),
251 (off_t)(request - nsize), I_NOCANCEL);
252 return (0);
256 * hashalloc() failed because some other thread grabbed
257 * the last block so unwind the quota operation. We can
258 * ignore the return because subtractions don't fail, and
259 * our caller guarantees nsize >= osize.
261 (void) chkdq(ip, -(long)btodb(nsize - osize), 0, cr, (char **)NULL,
262 NULL);
264 nospace:
265 now = ddi_get_lbolt();
266 mutex_enter(&ufsvfsp->vfs_lock);
267 if ((now - ufsvfsp->vfs_lastwhinetime) > (hz << 2) &&
268 (!(TRANS_ISTRANS(ufsvfsp)) || !(ip->i_flag & IQUIET))) {
269 ufsvfsp->vfs_lastwhinetime = now;
270 cmn_err(CE_NOTE,
271 "realloccg %s: file system full", fs->fs_fsmnt);
273 mutex_exit(&ufsvfsp->vfs_lock);
274 return (ENOSPC);
278 * Allocate an inode in the file system.
280 * A preference may be optionally specified. If a preference is given
281 * the following hierarchy is used to allocate an inode:
282 * 1) allocate the requested inode.
283 * 2) allocate an inode in the same cylinder group.
284 * 3) quadratically rehash into other cylinder groups, until an
285 * available inode is located.
286 * If no inode preference is given the following hierarchy is used
287 * to allocate an inode:
288 * 1) allocate an inode in cylinder group 0.
289 * 2) quadratically rehash into other cylinder groups, until an
290 * available inode is located.
293 ufs_ialloc(struct inode *pip,
294 ino_t ipref, mode_t mode, struct inode **ipp, cred_t *cr)
296 struct inode *ip;
297 struct fs *fs;
298 int cg;
299 ino_t ino;
300 int err;
301 int nifree;
302 struct ufsvfs *ufsvfsp = pip->i_ufsvfs;
303 char *errmsg = NULL;
304 size_t len;
306 ASSERT(RW_WRITE_HELD(&pip->i_rwlock));
307 fs = pip->i_fs;
308 loop:
309 nifree = fs->fs_cstotal.cs_nifree;
311 if (nifree == 0)
312 goto noinodes;
314 * Shadow inodes don't count against a user's inode allocation.
315 * They are an implementation method and not a resource.
317 if ((mode != IFSHAD) && (mode != IFATTRDIR)) {
318 err = chkiq((struct ufsvfs *)ITOV(pip)->v_vfsp->vfs_data,
319 /* change */ 1, NULL, crgetuid(cr), 0,
320 cr, &errmsg, &len);
322 * As we haven't acquired any locks yet, dump the message
323 * now.
325 if (errmsg != NULL) {
326 uprintf(errmsg);
327 kmem_free(errmsg, len);
328 errmsg = NULL;
330 if (err)
331 return (err);
334 if (ipref >= (ulong_t)(fs->fs_ncg * fs->fs_ipg))
335 ipref = 0;
336 cg = (int)itog(fs, ipref);
337 ino = (ino_t)hashalloc(pip, cg, (long)ipref, (int)mode,
338 (ulong_t (*)())ialloccg);
339 if (ino == 0) {
340 if ((mode != IFSHAD) && (mode != IFATTRDIR)) {
342 * We can safely ignore the return from chkiq()
343 * because deallocations can only fail if we
344 * can't get the user's quota info record off
345 * the disk due to an I/O error. In that case,
346 * the quota subsystem is already messed up.
348 (void) chkiq(ufsvfsp, /* change */ -1, NULL,
349 crgetuid(cr), 0, cr, (char **)NULL, NULL);
351 goto noinodes;
353 err = ufs_iget(pip->i_vfs, ino, ipp, cr);
354 if (err) {
355 if ((mode != IFSHAD) && (mode != IFATTRDIR)) {
357 * See above comment about why it is safe to ignore an
358 * error return here.
360 (void) chkiq(ufsvfsp, /* change */ -1, NULL,
361 crgetuid(cr), 0, cr, (char **)NULL, NULL);
363 ufs_ifree(pip, ino, 0);
364 return (err);
366 ip = *ipp;
367 ASSERT(!ip->i_ufs_acl);
368 ASSERT(!ip->i_dquot);
369 rw_enter(&ip->i_contents, RW_WRITER);
372 * Check if we really got a free inode, if not then complain
373 * and mark the inode ISTALE so that it will be freed by the
374 * ufs idle thread eventually and will not be sent to ufs_delete().
376 if (ip->i_mode || (ip->i_nlink > 0)) {
377 ip->i_flag |= ISTALE;
378 rw_exit(&ip->i_contents);
379 VN_RELE(ITOV(ip));
380 cmn_err(CE_WARN,
381 "%s: unexpected allocated inode %d, run fsck(1M)%s",
382 fs->fs_fsmnt, (int)ino,
383 (TRANS_ISTRANS(ufsvfsp) ? " -o f" : ""));
384 goto loop;
388 * Check the inode has no size or data blocks.
389 * This could have happened if the truncation failed when
390 * deleting the inode. It used to be possible for this to occur
391 * if a block allocation failed when iteratively truncating a
392 * large file using logging and with a full file system.
393 * This was fixed with bug fix 4348738. However, truncation may
394 * still fail on an IO error. So in all cases for safety and
395 * security we clear out the size; the blocks allocated; and
396 * pointers to the blocks. This will ultimately cause a fsck
397 * error of un-accounted for blocks, but its a fairly benign error,
398 * and possibly the correct thing to do anyway as accesssing those
399 * blocks agains may lead to more IO errors.
401 if (ip->i_size || ip->i_blocks) {
402 int i;
404 if (ip->i_size) {
405 cmn_err(CE_WARN,
406 "%s: free inode %d had size 0x%llx, run fsck(1M)%s",
407 fs->fs_fsmnt, (int)ino, ip->i_size,
408 (TRANS_ISTRANS(ufsvfsp) ? " -o f" : ""));
411 * Clear any garbage left behind.
413 ip->i_size = 0;
414 ip->i_blocks = 0;
415 for (i = 0; i < NDADDR; i++)
416 ip->i_db[i] = 0;
417 for (i = 0; i < NIADDR; i++)
418 ip->i_ib[i] = 0;
422 * Initialize the link count
424 ip->i_nlink = 0;
427 * Clear the old flags
429 ip->i_flag &= IREF;
432 * Access times are not really defined if the fs is mounted
433 * with 'noatime'. But it can cause nfs clients to fail
434 * open() if the atime is not a legal value. Set a legal value
435 * here when the inode is allocated.
437 if (ufsvfsp->vfs_noatime) {
438 mutex_enter(&ufs_iuniqtime_lock);
439 ip->i_atime = iuniqtime;
440 mutex_exit(&ufs_iuniqtime_lock);
442 rw_exit(&ip->i_contents);
443 return (0);
444 noinodes:
445 if (!(TRANS_ISTRANS(ufsvfsp)) || !(pip->i_flag & IQUIET))
446 cmn_err(CE_NOTE, "%s: out of inodes\n", fs->fs_fsmnt);
447 return (ENOSPC);
451 * Find a cylinder group to place a directory.
452 * Returns an inumber within the selected cylinder group.
453 * Note, the vfs_lock is not needed as we don't require exact cg summary info.
455 * If the switch ufs_close_dirs is set, then the policy is to use
456 * the current cg if it has more than 25% free inodes and more
457 * than 25% free blocks. Otherwise the cgs are searched from
458 * the beginning and the first cg with the same criteria is
459 * used. If that is also null then we revert to the old algorithm.
460 * This tends to cluster files at the beginning of the disk
461 * until the disk gets full.
463 * Otherwise if ufs_close_dirs is not set then the original policy is
464 * used which is to select from among those cylinder groups with
465 * above the average number of free inodes, the one with the smallest
466 * number of directories.
469 int ufs_close_dirs = 1; /* allocate directories close as possible */
471 ino_t
472 dirpref(inode_t *dp)
474 int cg, minndir, mincg, avgifree, mininode, minbpg, ifree;
475 struct fs *fs = dp->i_fs;
477 cg = itog(fs, dp->i_number);
478 mininode = fs->fs_ipg >> 2;
479 minbpg = fs->fs_maxbpg >> 2;
480 if (ufs_close_dirs &&
481 (fs->fs_cs(fs, cg).cs_nifree > mininode) &&
482 (fs->fs_cs(fs, cg).cs_nbfree > minbpg)) {
483 return (dp->i_number);
486 avgifree = fs->fs_cstotal.cs_nifree / fs->fs_ncg;
487 minndir = fs->fs_ipg;
488 mincg = 0;
489 for (cg = 0; cg < fs->fs_ncg; cg++) {
490 ifree = fs->fs_cs(fs, cg).cs_nifree;
491 if (ufs_close_dirs &&
492 (ifree > mininode) &&
493 (fs->fs_cs(fs, cg).cs_nbfree > minbpg)) {
494 return ((ino_t)(fs->fs_ipg * cg));
496 if ((fs->fs_cs(fs, cg).cs_ndir < minndir) &&
497 (ifree >= avgifree)) {
498 mincg = cg;
499 minndir = fs->fs_cs(fs, cg).cs_ndir;
502 return ((ino_t)(fs->fs_ipg * mincg));
506 * Select the desired position for the next block in a file. The file is
507 * logically divided into sections. The first section is composed of the
508 * direct blocks. Each additional section contains fs_maxbpg blocks.
510 * If no blocks have been allocated in the first section, the policy is to
511 * request a block in the same cylinder group as the inode that describes
512 * the file. If no blocks have been allocated in any other section, the
513 * policy is to place the section in a cylinder group with a greater than
514 * average number of free blocks. An appropriate cylinder group is found
515 * by using a rotor that sweeps the cylinder groups. When a new group of
516 * blocks is needed, the sweep begins in the cylinder group following the
517 * cylinder group from which the previous allocation was made. The sweep
518 * continues until a cylinder group with greater than the average number
519 * of free blocks is found. If the allocation is for the first block in an
520 * indirect block, the information on the previous allocation is unavailable;
521 * here a best guess is made based upon the logical block number being
522 * allocated.
524 * If a section is already partially allocated, the policy is to
525 * contiguously allocate fs_maxcontig blocks. The end of one of these
526 * contiguous blocks and the beginning of the next is physically separated
527 * so that the disk head will be in transit between them for at least
528 * fs_rotdelay milliseconds. This is to allow time for the processor to
529 * schedule another I/O transfer.
531 daddr_t
532 blkpref(struct inode *ip, daddr_t lbn, int indx, daddr32_t *bap)
534 struct fs *fs;
535 struct ufsvfs *ufsvfsp;
536 int cg;
537 int avgbfree, startcg;
538 daddr_t nextblk;
540 ufsvfsp = ip->i_ufsvfs;
541 fs = ip->i_fs;
542 if (indx % fs->fs_maxbpg == 0 || bap[indx - 1] == 0) {
543 if (lbn < NDADDR) {
544 cg = itog(fs, ip->i_number);
545 return (fs->fs_fpg * cg + fs->fs_frag);
548 * Find a cylinder with greater than average
549 * number of unused data blocks.
551 if (indx == 0 || bap[indx - 1] == 0)
552 startcg = itog(fs, ip->i_number) + lbn / fs->fs_maxbpg;
553 else
554 startcg = dtog(fs, bap[indx - 1]) + 1;
555 startcg %= fs->fs_ncg;
557 mutex_enter(&ufsvfsp->vfs_lock);
558 avgbfree = fs->fs_cstotal.cs_nbfree / fs->fs_ncg;
560 * used for computing log space for writes/truncs
562 ufsvfsp->vfs_avgbfree = avgbfree;
563 for (cg = startcg; cg < fs->fs_ncg; cg++)
564 if (fs->fs_cs(fs, cg).cs_nbfree >= avgbfree) {
565 fs->fs_cgrotor = cg;
566 mutex_exit(&ufsvfsp->vfs_lock);
567 return (fs->fs_fpg * cg + fs->fs_frag);
569 for (cg = 0; cg <= startcg; cg++)
570 if (fs->fs_cs(fs, cg).cs_nbfree >= avgbfree) {
571 fs->fs_cgrotor = cg;
572 mutex_exit(&ufsvfsp->vfs_lock);
573 return (fs->fs_fpg * cg + fs->fs_frag);
575 mutex_exit(&ufsvfsp->vfs_lock);
576 return (0);
579 * One or more previous blocks have been laid out. If less
580 * than fs_maxcontig previous blocks are contiguous, the
581 * next block is requested contiguously, otherwise it is
582 * requested rotationally delayed by fs_rotdelay milliseconds.
585 nextblk = bap[indx - 1];
587 * Provision for fallocate to return positive
588 * blk preference based on last allocation
590 if (nextblk < 0 && nextblk != UFS_HOLE) {
591 nextblk = (-bap[indx - 1]) + fs->fs_frag;
592 } else {
593 nextblk = bap[indx - 1] + fs->fs_frag;
596 if (indx > fs->fs_maxcontig && bap[indx - fs->fs_maxcontig] +
597 blkstofrags(fs, fs->fs_maxcontig) != nextblk) {
598 return (nextblk);
600 if (fs->fs_rotdelay != 0)
602 * Here we convert ms of delay to frags as:
603 * (frags) = (ms) * (rev/sec) * (sect/rev) /
604 * ((sect/frag) * (ms/sec))
605 * then round up to the next block.
607 nextblk += roundup(fs->fs_rotdelay * fs->fs_rps * fs->fs_nsect /
608 (NSPF(fs) * 1000), fs->fs_frag);
609 return (nextblk);
613 * Free a block or fragment.
615 * The specified block or fragment is placed back in the
616 * free map. If a fragment is deallocated, a possible
617 * block reassembly is checked.
619 void
620 free(struct inode *ip, daddr_t bno, off_t size, int flags)
622 struct fs *fs = ip->i_fs;
623 struct ufsvfs *ufsvfsp = ip->i_ufsvfs;
624 struct ufs_q *delq = &ufsvfsp->vfs_delete;
625 struct ufs_delq_info *delq_info = &ufsvfsp->vfs_delete_info;
626 struct cg *cgp;
627 struct buf *bp;
628 int cg, bmap, bbase;
629 int i;
630 uchar_t *blksfree;
631 int *blktot;
632 short *blks;
633 daddr_t blkno, cylno, rpos;
636 * fallocate'd files will have negative block address.
637 * So negate it again to get original block address.
639 if (bno < 0 && (bno % fs->fs_frag == 0) && bno != UFS_HOLE) {
640 bno = -bno;
643 if ((unsigned long)size > fs->fs_bsize || fragoff(fs, size) != 0) {
644 (void) ufs_fault(ITOV(ip),
645 "free: bad size, dev = 0x%lx, bsize = %d, size = %d, "
646 "fs = %s\n", ip->i_dev, fs->fs_bsize,
647 (int)size, fs->fs_fsmnt);
648 return;
650 cg = dtog(fs, bno);
651 ASSERT(!ufs_badblock(ip, bno));
652 bp = UFS_BREAD(ufsvfsp, ip->i_dev, (daddr_t)fsbtodb(fs, cgtod(fs, cg)),
653 (int)fs->fs_cgsize);
655 cgp = bp->b_un.b_cg;
656 if (bp->b_flags & B_ERROR || !cg_chkmagic(cgp)) {
657 brelse(bp);
658 return;
661 if (!(flags & I_NOCANCEL))
662 TRANS_CANCEL(ufsvfsp, ldbtob(fsbtodb(fs, bno)), size, flags);
663 if (flags & (I_DIR|I_IBLK|I_SHAD|I_QUOTA)) {
664 TRANS_MATA_FREE(ufsvfsp, ldbtob(fsbtodb(fs, bno)), size);
666 blksfree = cg_blksfree(cgp);
667 blktot = cg_blktot(cgp);
668 mutex_enter(&ufsvfsp->vfs_lock);
669 cgp->cg_time = gethrestime_sec();
670 bno = dtogd(fs, bno);
671 if (size == fs->fs_bsize) {
672 blkno = fragstoblks(fs, bno);
673 cylno = cbtocylno(fs, bno);
674 rpos = cbtorpos(ufsvfsp, bno);
675 blks = cg_blks(ufsvfsp, cgp, cylno);
676 if (!isclrblock(fs, blksfree, blkno)) {
677 mutex_exit(&ufsvfsp->vfs_lock);
678 brelse(bp);
679 (void) ufs_fault(ITOV(ip), "free: freeing free block, "
680 "dev:0x%lx, block:%ld, ino:%lu, fs:%s",
681 ip->i_dev, bno, ip->i_number, fs->fs_fsmnt);
682 return;
684 setblock(fs, blksfree, blkno);
685 blks[rpos]++;
686 blktot[cylno]++;
687 cgp->cg_cs.cs_nbfree++; /* Log below */
688 fs->fs_cstotal.cs_nbfree++;
689 fs->fs_cs(fs, cg).cs_nbfree++;
690 if (TRANS_ISTRANS(ufsvfsp) && (flags & I_ACCT)) {
691 mutex_enter(&delq->uq_mutex);
692 delq_info->delq_unreclaimed_blocks -=
693 btodb(fs->fs_bsize);
694 mutex_exit(&delq->uq_mutex);
696 } else {
697 bbase = bno - fragnum(fs, bno);
699 * Decrement the counts associated with the old frags
701 bmap = blkmap(fs, blksfree, bbase);
702 fragacct(fs, bmap, cgp->cg_frsum, -1);
704 * Deallocate the fragment
706 for (i = 0; i < numfrags(fs, size); i++) {
707 if (isset(blksfree, bno + i)) {
708 brelse(bp);
709 mutex_exit(&ufsvfsp->vfs_lock);
710 (void) ufs_fault(ITOV(ip),
711 "free: freeing free frag, "
712 "dev:0x%lx, blk:%ld, cg:%d, "
713 "ino:%lu, fs:%s",
714 ip->i_dev,
715 bno + i,
716 cgp->cg_cgx,
717 ip->i_number,
718 fs->fs_fsmnt);
719 return;
721 setbit(blksfree, bno + i);
723 cgp->cg_cs.cs_nffree += i;
724 fs->fs_cstotal.cs_nffree += i;
725 fs->fs_cs(fs, cg).cs_nffree += i;
726 if (TRANS_ISTRANS(ufsvfsp) && (flags & I_ACCT)) {
727 mutex_enter(&delq->uq_mutex);
728 delq_info->delq_unreclaimed_blocks -=
729 btodb(i * fs->fs_fsize);
730 mutex_exit(&delq->uq_mutex);
733 * Add back in counts associated with the new frags
735 bmap = blkmap(fs, blksfree, bbase);
736 fragacct(fs, bmap, cgp->cg_frsum, 1);
738 * If a complete block has been reassembled, account for it
740 blkno = fragstoblks(fs, bbase);
741 if (isblock(fs, blksfree, blkno)) {
742 cylno = cbtocylno(fs, bbase);
743 rpos = cbtorpos(ufsvfsp, bbase);
744 blks = cg_blks(ufsvfsp, cgp, cylno);
745 blks[rpos]++;
746 blktot[cylno]++;
747 cgp->cg_cs.cs_nffree -= fs->fs_frag;
748 fs->fs_cstotal.cs_nffree -= fs->fs_frag;
749 fs->fs_cs(fs, cg).cs_nffree -= fs->fs_frag;
750 cgp->cg_cs.cs_nbfree++;
751 fs->fs_cstotal.cs_nbfree++;
752 fs->fs_cs(fs, cg).cs_nbfree++;
755 fs->fs_fmod = 1;
756 ufs_notclean(ufsvfsp);
757 TRANS_BUF(ufsvfsp, 0, fs->fs_cgsize, bp, DT_CG);
758 TRANS_SI(ufsvfsp, fs, cg);
759 bdrwrite(bp);
763 * Free an inode.
765 * The specified inode is placed back in the free map.
767 void
768 ufs_ifree(struct inode *ip, ino_t ino, mode_t mode)
770 struct fs *fs = ip->i_fs;
771 struct ufsvfs *ufsvfsp = ip->i_ufsvfs;
772 struct cg *cgp;
773 struct buf *bp;
774 unsigned int inot;
775 int cg;
776 char *iused;
778 if (ip->i_number == ino && ip->i_mode != 0) {
779 (void) ufs_fault(ITOV(ip),
780 "ufs_ifree: illegal mode: (imode) %o, (omode) %o, ino %d, "
781 "fs = %s\n",
782 ip->i_mode, mode, (int)ip->i_number, fs->fs_fsmnt);
783 return;
785 if (ino >= fs->fs_ipg * fs->fs_ncg) {
786 (void) ufs_fault(ITOV(ip),
787 "ifree: range, dev = 0x%x, ino = %d, fs = %s\n",
788 (int)ip->i_dev, (int)ino, fs->fs_fsmnt);
789 return;
791 cg = (int)itog(fs, ino);
792 bp = UFS_BREAD(ufsvfsp, ip->i_dev, (daddr_t)fsbtodb(fs, cgtod(fs, cg)),
793 (int)fs->fs_cgsize);
795 cgp = bp->b_un.b_cg;
796 if (bp->b_flags & B_ERROR || !cg_chkmagic(cgp)) {
797 brelse(bp);
798 return;
800 mutex_enter(&ufsvfsp->vfs_lock);
801 cgp->cg_time = gethrestime_sec();
802 iused = cg_inosused(cgp);
803 inot = (unsigned int)(ino % (ulong_t)fs->fs_ipg);
804 if (isclr(iused, inot)) {
805 mutex_exit(&ufsvfsp->vfs_lock);
806 brelse(bp);
807 (void) ufs_fault(ITOV(ip), "ufs_ifree: freeing free inode, "
808 "mode: (imode) %o, (omode) %o, ino:%d, "
809 "fs:%s",
810 ip->i_mode, mode, (int)ino, fs->fs_fsmnt);
811 return;
813 clrbit(iused, inot);
815 if (inot < (ulong_t)cgp->cg_irotor)
816 cgp->cg_irotor = inot;
817 cgp->cg_cs.cs_nifree++;
818 fs->fs_cstotal.cs_nifree++;
819 fs->fs_cs(fs, cg).cs_nifree++;
820 if (((mode & IFMT) == IFDIR) || ((mode & IFMT) == IFATTRDIR)) {
821 cgp->cg_cs.cs_ndir--;
822 fs->fs_cstotal.cs_ndir--;
823 fs->fs_cs(fs, cg).cs_ndir--;
825 fs->fs_fmod = 1;
826 ufs_notclean(ufsvfsp);
827 TRANS_BUF(ufsvfsp, 0, fs->fs_cgsize, bp, DT_CG);
828 TRANS_SI(ufsvfsp, fs, cg);
829 bdrwrite(bp);
833 * Implement the cylinder overflow algorithm.
835 * The policy implemented by this algorithm is:
836 * 1) allocate the block in its requested cylinder group.
837 * 2) quadratically rehash on the cylinder group number.
838 * 3) brute force search for a free block.
839 * The size parameter means size for data blocks, mode for inodes.
841 static ino_t
842 hashalloc(struct inode *ip, int cg, long pref, int size, ulong_t (*allocator)())
844 struct fs *fs;
845 int i;
846 long result;
847 int icg = cg;
849 fs = ip->i_fs;
851 * 1: preferred cylinder group
853 result = (*allocator)(ip, cg, pref, size);
854 if (result)
855 return (result);
857 * 2: quadratic rehash
859 for (i = 1; i < fs->fs_ncg; i *= 2) {
860 cg += i;
861 if (cg >= fs->fs_ncg)
862 cg -= fs->fs_ncg;
863 result = (*allocator)(ip, cg, 0, size);
864 if (result)
865 return (result);
868 * 3: brute force search
869 * Note that we start at i == 2, since 0 was checked initially,
870 * and 1 is always checked in the quadratic rehash.
872 cg = (icg + 2) % fs->fs_ncg;
873 for (i = 2; i < fs->fs_ncg; i++) {
874 result = (*allocator)(ip, cg, 0, size);
875 if (result)
876 return (result);
877 cg++;
878 if (cg == fs->fs_ncg)
879 cg = 0;
881 return (0);
885 * Determine whether a fragment can be extended.
887 * Check to see if the necessary fragments are available, and
888 * if they are, allocate them.
890 static daddr_t
891 fragextend(struct inode *ip, int cg, long bprev, int osize, int nsize)
893 struct ufsvfs *ufsvfsp = ip->i_ufsvfs;
894 struct fs *fs = ip->i_fs;
895 struct buf *bp;
896 struct cg *cgp;
897 uchar_t *blksfree;
898 long bno;
899 int frags, bbase;
900 int i, j;
902 if (fs->fs_cs(fs, cg).cs_nffree < numfrags(fs, nsize - osize))
903 return (0);
904 frags = numfrags(fs, nsize);
905 bbase = (int)fragnum(fs, bprev);
906 if (bbase > fragnum(fs, (bprev + frags - 1))) {
907 /* cannot extend across a block boundary */
908 return (0);
911 bp = UFS_BREAD(ufsvfsp, ip->i_dev, (daddr_t)fsbtodb(fs, cgtod(fs, cg)),
912 (int)fs->fs_cgsize);
913 cgp = bp->b_un.b_cg;
914 if (bp->b_flags & B_ERROR || !cg_chkmagic(cgp)) {
915 brelse(bp);
916 return (0);
919 blksfree = cg_blksfree(cgp);
920 mutex_enter(&ufsvfsp->vfs_lock);
921 bno = dtogd(fs, bprev);
922 for (i = numfrags(fs, osize); i < frags; i++) {
923 if (isclr(blksfree, bno + i)) {
924 mutex_exit(&ufsvfsp->vfs_lock);
925 brelse(bp);
926 return (0);
928 if ((TRANS_ISCANCEL(ufsvfsp, ldbtob(fsbtodb(fs, bprev + i)),
929 fs->fs_fsize))) {
930 mutex_exit(&ufsvfsp->vfs_lock);
931 brelse(bp);
932 return (0);
936 cgp->cg_time = gethrestime_sec();
938 * The current fragment can be extended,
939 * deduct the count on fragment being extended into
940 * increase the count on the remaining fragment (if any)
941 * allocate the extended piece.
943 for (i = frags; i < fs->fs_frag - bbase; i++)
944 if (isclr(blksfree, bno + i))
945 break;
946 j = i - numfrags(fs, osize);
947 cgp->cg_frsum[j]--;
948 ASSERT(cgp->cg_frsum[j] >= 0);
949 if (i != frags)
950 cgp->cg_frsum[i - frags]++;
951 for (i = numfrags(fs, osize); i < frags; i++) {
952 clrbit(blksfree, bno + i);
953 cgp->cg_cs.cs_nffree--;
954 fs->fs_cs(fs, cg).cs_nffree--;
955 fs->fs_cstotal.cs_nffree--;
957 fs->fs_fmod = 1;
958 ufs_notclean(ufsvfsp);
959 TRANS_BUF(ufsvfsp, 0, fs->fs_cgsize, bp, DT_CG);
960 TRANS_SI(ufsvfsp, fs, cg);
961 bdrwrite(bp);
962 return ((daddr_t)bprev);
966 * Determine whether a block can be allocated.
968 * Check to see if a block of the apprpriate size
969 * is available, and if it is, allocate it.
971 static daddr_t
972 alloccg(struct inode *ip, int cg, daddr_t bpref, int size)
974 struct ufsvfs *ufsvfsp = ip->i_ufsvfs;
975 struct fs *fs = ip->i_fs;
976 struct buf *bp;
977 struct cg *cgp;
978 uchar_t *blksfree;
979 int bno, frags;
980 int allocsiz;
981 int i;
984 * Searching for space could be time expensive so do some
985 * up front checking to verify that there is actually space
986 * available (free blocks or free frags).
988 if (fs->fs_cs(fs, cg).cs_nbfree == 0) {
989 if (size == fs->fs_bsize)
990 return (0);
993 * If there are not enough free frags then return.
995 if (fs->fs_cs(fs, cg).cs_nffree < numfrags(fs, size))
996 return (0);
999 bp = UFS_BREAD(ufsvfsp, ip->i_dev, (daddr_t)fsbtodb(fs, cgtod(fs, cg)),
1000 (int)fs->fs_cgsize);
1002 cgp = bp->b_un.b_cg;
1003 if (bp->b_flags & B_ERROR || !cg_chkmagic(cgp) ||
1004 (cgp->cg_cs.cs_nbfree == 0 && size == fs->fs_bsize)) {
1005 brelse(bp);
1006 return (0);
1008 blksfree = cg_blksfree(cgp);
1009 mutex_enter(&ufsvfsp->vfs_lock);
1010 cgp->cg_time = gethrestime_sec();
1011 if (size == fs->fs_bsize) {
1012 if ((bno = alloccgblk(ufsvfsp, cgp, bpref, bp)) == 0)
1013 goto errout;
1014 fs->fs_fmod = 1;
1015 ufs_notclean(ufsvfsp);
1016 TRANS_SI(ufsvfsp, fs, cg);
1017 bdrwrite(bp);
1018 return (bno);
1021 * Check fragment bitmap to see if any fragments are already available.
1022 * mapsearch() may fail because the fragment that fits this request
1023 * might still be on the cancel list and not available for re-use yet.
1024 * Look for a bigger sized fragment to allocate first before we have
1025 * to give up and fragment a whole new block eventually.
1027 frags = numfrags(fs, size);
1028 allocsiz = frags;
1029 next_size:
1030 for (; allocsiz < fs->fs_frag; allocsiz++)
1031 if (cgp->cg_frsum[allocsiz] != 0)
1032 break;
1034 if (allocsiz != fs->fs_frag) {
1035 bno = mapsearch(ufsvfsp, cgp, bpref, allocsiz);
1036 if (bno < 0 && allocsiz < (fs->fs_frag - 1)) {
1037 allocsiz++;
1038 goto next_size;
1042 if (allocsiz == fs->fs_frag || bno < 0) {
1044 * No fragments were available, so a block
1045 * will be allocated and hacked up.
1047 if (cgp->cg_cs.cs_nbfree == 0)
1048 goto errout;
1049 if ((bno = alloccgblk(ufsvfsp, cgp, bpref, bp)) == 0)
1050 goto errout;
1051 bpref = dtogd(fs, bno);
1052 for (i = frags; i < fs->fs_frag; i++)
1053 setbit(blksfree, bpref + i);
1054 i = fs->fs_frag - frags;
1055 cgp->cg_cs.cs_nffree += i;
1056 fs->fs_cstotal.cs_nffree += i;
1057 fs->fs_cs(fs, cg).cs_nffree += i;
1058 cgp->cg_frsum[i]++;
1059 fs->fs_fmod = 1;
1060 ufs_notclean(ufsvfsp);
1061 TRANS_SI(ufsvfsp, fs, cg);
1062 bdrwrite(bp);
1063 return (bno);
1066 for (i = 0; i < frags; i++)
1067 clrbit(blksfree, bno + i);
1068 cgp->cg_cs.cs_nffree -= frags;
1069 fs->fs_cstotal.cs_nffree -= frags;
1070 fs->fs_cs(fs, cg).cs_nffree -= frags;
1071 cgp->cg_frsum[allocsiz]--;
1072 ASSERT(cgp->cg_frsum[allocsiz] >= 0);
1073 if (frags != allocsiz) {
1074 cgp->cg_frsum[allocsiz - frags]++;
1076 fs->fs_fmod = 1;
1077 ufs_notclean(ufsvfsp);
1078 TRANS_BUF(ufsvfsp, 0, fs->fs_cgsize, bp, DT_CG);
1079 TRANS_SI(ufsvfsp, fs, cg);
1080 bdrwrite(bp);
1081 return (cg * fs->fs_fpg + bno);
1082 errout:
1083 mutex_exit(&ufsvfsp->vfs_lock);
1084 brelse(bp);
1085 return (0);
1089 * Allocate a block in a cylinder group.
1091 * This algorithm implements the following policy:
1092 * 1) allocate the requested block.
1093 * 2) allocate a rotationally optimal block in the same cylinder.
1094 * 3) allocate the next available block on the block rotor for the
1095 * specified cylinder group.
1096 * Note that this routine only allocates fs_bsize blocks; these
1097 * blocks may be fragmented by the routine that allocates them.
1099 static daddr_t
1100 alloccgblk(
1101 struct ufsvfs *ufsvfsp,
1102 struct cg *cgp,
1103 daddr_t bpref,
1104 struct buf *bp)
1106 daddr_t bno;
1107 int cylno, pos, delta, rotbl_size;
1108 short *cylbp;
1109 int i;
1110 struct fs *fs;
1111 uchar_t *blksfree;
1112 daddr_t blkno, rpos, frag;
1113 short *blks;
1114 int32_t *blktot;
1116 ASSERT(MUTEX_HELD(&ufsvfsp->vfs_lock));
1117 fs = ufsvfsp->vfs_fs;
1118 blksfree = cg_blksfree(cgp);
1119 if (bpref == 0) {
1120 bpref = cgp->cg_rotor;
1121 goto norot;
1123 bpref = blknum(fs, bpref);
1124 bpref = dtogd(fs, bpref);
1126 * If the requested block is available, use it.
1128 if (isblock(fs, blksfree, (daddr_t)fragstoblks(fs, bpref))) {
1129 bno = bpref;
1130 goto gotit;
1133 * Check for a block available on the same cylinder.
1135 cylno = cbtocylno(fs, bpref);
1136 if (cg_blktot(cgp)[cylno] == 0)
1137 goto norot;
1138 if (fs->fs_cpc == 0) {
1140 * Block layout info is not available, so just
1141 * have to take any block in this cylinder.
1143 bpref = howmany(fs->fs_spc * cylno, NSPF(fs));
1144 goto norot;
1147 * Check the summary information to see if a block is
1148 * available in the requested cylinder starting at the
1149 * requested rotational position and proceeding around.
1151 cylbp = cg_blks(ufsvfsp, cgp, cylno);
1152 pos = cbtorpos(ufsvfsp, bpref);
1153 for (i = pos; i < ufsvfsp->vfs_nrpos; i++)
1154 if (cylbp[i] > 0)
1155 break;
1156 if (i == ufsvfsp->vfs_nrpos)
1157 for (i = 0; i < pos; i++)
1158 if (cylbp[i] > 0)
1159 break;
1160 if (cylbp[i] > 0) {
1162 * Found a rotational position, now find the actual
1163 * block. A "panic" if none is actually there.
1167 * Up to this point, "pos" has referred to the rotational
1168 * position of the desired block. From now on, it holds
1169 * the offset of the current cylinder within a cylinder
1170 * cycle. (A cylinder cycle refers to a set of cylinders
1171 * which are described by a single rotational table; the
1172 * size of the cycle is fs_cpc.)
1174 * bno is set to the block number of the first block within
1175 * the current cylinder cycle.
1178 pos = cylno % fs->fs_cpc;
1179 bno = (cylno - pos) * fs->fs_spc / NSPB(fs);
1182 * The blocks within a cylinder are grouped into equivalence
1183 * classes according to their "rotational position." There
1184 * are two tables used to determine these classes.
1186 * The positional offset table (fs_postbl) has an entry for
1187 * each rotational position of each cylinder in a cylinder
1188 * cycle. This entry contains the relative block number
1189 * (counting from the start of the cylinder cycle) of the
1190 * first block in the equivalence class for that position
1191 * and that cylinder. Positions for which no blocks exist
1192 * are indicated by a -1.
1194 * The rotational delta table (fs_rotbl) has an entry for
1195 * each block in a cylinder cycle. This entry contains
1196 * the offset from that block to the next block in the
1197 * same equivalence class. The last block in the class
1198 * is indicated by a zero in the table.
1200 * The following code, then, walks through all of the blocks
1201 * in the cylinder (cylno) which we're allocating within
1202 * which are in the equivalence class for the rotational
1203 * position (i) which we're allocating within.
1206 if (fs_postbl(ufsvfsp, pos)[i] == -1) {
1207 (void) ufs_fault(ufsvfsp->vfs_root,
1208 "alloccgblk: cyl groups corrupted, pos = %d, "
1209 "i = %d, fs = %s\n", pos, i, fs->fs_fsmnt);
1210 return (0);
1214 * There is one entry in the rotational table for each block
1215 * in the cylinder cycle. These are whole blocks, not frags.
1218 rotbl_size = (fs->fs_cpc * fs->fs_spc) >>
1219 (fs->fs_fragshift + fs->fs_fsbtodb);
1222 * As we start, "i" is the rotational position within which
1223 * we're searching. After the next line, it will be a block
1224 * number (relative to the start of the cylinder cycle)
1225 * within the equivalence class of that rotational position.
1228 i = fs_postbl(ufsvfsp, pos)[i];
1230 for (;;) {
1231 if (isblock(fs, blksfree, (daddr_t)(bno + i))) {
1232 bno = blkstofrags(fs, (bno + i));
1233 goto gotit;
1235 delta = fs_rotbl(fs)[i];
1236 if (delta <= 0 || /* End of chain, or */
1237 delta + i > rotbl_size) /* end of table? */
1238 break; /* If so, panic. */
1239 i += delta;
1241 (void) ufs_fault(ufsvfsp->vfs_root,
1242 "alloccgblk: can't find blk in cyl, pos:%d, i:%d, "
1243 "fs:%s bno: %x\n", pos, i, fs->fs_fsmnt, (int)bno);
1244 return (0);
1246 norot:
1248 * No blocks in the requested cylinder, so take
1249 * next available one in this cylinder group.
1251 bno = mapsearch(ufsvfsp, cgp, bpref, (int)fs->fs_frag);
1252 if (bno < 0)
1253 return (0);
1254 cgp->cg_rotor = bno;
1255 gotit:
1256 blkno = fragstoblks(fs, bno);
1257 frag = (cgp->cg_cgx * fs->fs_fpg) + bno;
1258 if (TRANS_ISCANCEL(ufsvfsp, ldbtob(fsbtodb(fs, frag)), fs->fs_bsize))
1259 goto norot;
1260 clrblock(fs, blksfree, (long)blkno);
1262 * the other cg/sb/si fields are TRANS'ed by the caller
1264 cgp->cg_cs.cs_nbfree--;
1265 fs->fs_cstotal.cs_nbfree--;
1266 fs->fs_cs(fs, cgp->cg_cgx).cs_nbfree--;
1267 cylno = cbtocylno(fs, bno);
1268 blks = cg_blks(ufsvfsp, cgp, cylno);
1269 rpos = cbtorpos(ufsvfsp, bno);
1270 blktot = cg_blktot(cgp);
1271 blks[rpos]--;
1272 blktot[cylno]--;
1273 TRANS_BUF(ufsvfsp, 0, fs->fs_cgsize, bp, DT_CG);
1274 fs->fs_fmod = 1;
1275 return (frag);
1279 * Determine whether an inode can be allocated.
1281 * Check to see if an inode is available, and if it is,
1282 * allocate it using the following policy:
1283 * 1) allocate the requested inode.
1284 * 2) allocate the next available inode after the requested
1285 * inode in the specified cylinder group.
1287 static ino_t
1288 ialloccg(struct inode *ip, int cg, daddr_t ipref, int mode)
1290 struct ufsvfs *ufsvfsp = ip->i_ufsvfs;
1291 struct fs *fs = ip->i_fs;
1292 struct cg *cgp;
1293 struct buf *bp;
1294 int start, len, loc, map, i;
1295 char *iused;
1297 if (fs->fs_cs(fs, cg).cs_nifree == 0)
1298 return (0);
1299 bp = UFS_BREAD(ufsvfsp, ip->i_dev, (daddr_t)fsbtodb(fs, cgtod(fs, cg)),
1300 (int)fs->fs_cgsize);
1302 cgp = bp->b_un.b_cg;
1303 if (bp->b_flags & B_ERROR || !cg_chkmagic(cgp) ||
1304 cgp->cg_cs.cs_nifree == 0) {
1305 brelse(bp);
1306 return (0);
1308 iused = cg_inosused(cgp);
1309 mutex_enter(&ufsvfsp->vfs_lock);
1311 * While we are waiting for the mutex, someone may have taken
1312 * the last available inode. Need to recheck.
1314 if (cgp->cg_cs.cs_nifree == 0) {
1315 mutex_exit(&ufsvfsp->vfs_lock);
1316 brelse(bp);
1317 return (0);
1320 cgp->cg_time = gethrestime_sec();
1321 if (ipref) {
1322 ipref %= fs->fs_ipg;
1323 if (isclr(iused, ipref))
1324 goto gotit;
1326 start = cgp->cg_irotor / NBBY;
1327 len = howmany(fs->fs_ipg - cgp->cg_irotor, NBBY);
1328 loc = skpc(0xff, (uint_t)len, &iused[start]);
1329 if (loc == 0) {
1330 len = start + 1;
1331 start = 0;
1332 loc = skpc(0xff, (uint_t)len, &iused[0]);
1333 if (loc == 0) {
1334 mutex_exit(&ufsvfsp->vfs_lock);
1335 (void) ufs_fault(ITOV(ip),
1336 "ialloccg: map corrupted, cg = %d, irotor = %d, "
1337 "fs = %s\n", cg, (int)cgp->cg_irotor, fs->fs_fsmnt);
1338 return (0);
1341 i = start + len - loc;
1342 map = iused[i];
1343 ipref = i * NBBY;
1344 for (i = 1; i < (1 << NBBY); i <<= 1, ipref++) {
1345 if ((map & i) == 0) {
1346 cgp->cg_irotor = ipref;
1347 goto gotit;
1351 mutex_exit(&ufsvfsp->vfs_lock);
1352 (void) ufs_fault(ITOV(ip), "ialloccg: block not in mapfs = %s",
1353 fs->fs_fsmnt);
1354 return (0);
1355 gotit:
1356 setbit(iused, ipref);
1357 cgp->cg_cs.cs_nifree--;
1358 fs->fs_cstotal.cs_nifree--;
1359 fs->fs_cs(fs, cg).cs_nifree--;
1360 if (((mode & IFMT) == IFDIR) || ((mode & IFMT) == IFATTRDIR)) {
1361 cgp->cg_cs.cs_ndir++;
1362 fs->fs_cstotal.cs_ndir++;
1363 fs->fs_cs(fs, cg).cs_ndir++;
1365 fs->fs_fmod = 1;
1366 ufs_notclean(ufsvfsp);
1367 TRANS_BUF(ufsvfsp, 0, fs->fs_cgsize, bp, DT_CG);
1368 TRANS_SI(ufsvfsp, fs, cg);
1369 bdrwrite(bp);
1370 return (cg * fs->fs_ipg + ipref);
1374 * Find a block of the specified size in the specified cylinder group.
1376 * It is a panic if a request is made to find a block if none are
1377 * available.
1379 static daddr_t
1380 mapsearch(struct ufsvfs *ufsvfsp, struct cg *cgp, daddr_t bpref,
1381 int allocsiz)
1383 struct fs *fs = ufsvfsp->vfs_fs;
1384 daddr_t bno, cfrag;
1385 int start, len, loc, i, last, first, secondtime;
1386 int blk, field, subfield, pos;
1387 int gotit;
1390 * ufsvfs->vfs_lock is held when calling this.
1393 * Find the fragment by searching through the
1394 * free block map for an appropriate bit pattern.
1396 if (bpref)
1397 start = dtogd(fs, bpref) / NBBY;
1398 else
1399 start = cgp->cg_frotor / NBBY;
1401 * the following loop performs two scans -- the first scan
1402 * searches the bottom half of the array for a match and the
1403 * second scan searches the top half of the array. The loops
1404 * have been merged just to make things difficult.
1406 first = start;
1407 last = howmany(fs->fs_fpg, NBBY);
1408 secondtime = 0;
1409 cfrag = cgp->cg_cgx * fs->fs_fpg;
1410 while (first < last) {
1411 len = last - first;
1413 * search the array for a match
1415 loc = scanc((unsigned)len, (uchar_t *)&cg_blksfree(cgp)[first],
1416 (uchar_t *)fragtbl[fs->fs_frag],
1417 (int)(1 << (allocsiz - 1 + (fs->fs_frag % NBBY))));
1419 * match found
1421 if (loc) {
1422 bno = (last - loc) * NBBY;
1425 * Found the byte in the map, sift
1426 * through the bits to find the selected frag
1428 cgp->cg_frotor = bno;
1429 gotit = 0;
1430 for (i = bno + NBBY; bno < i; bno += fs->fs_frag) {
1431 blk = blkmap(fs, cg_blksfree(cgp), bno);
1432 blk <<= 1;
1433 field = around[allocsiz];
1434 subfield = inside[allocsiz];
1435 for (pos = 0;
1436 pos <= fs->fs_frag - allocsiz;
1437 pos++) {
1438 if ((blk & field) == subfield) {
1439 gotit++;
1440 break;
1442 field <<= 1;
1443 subfield <<= 1;
1445 if (gotit)
1446 break;
1448 bno += pos;
1451 * success if block is *not* being converted from
1452 * metadata into userdata (harpy). If so, ignore.
1454 if (!TRANS_ISCANCEL(ufsvfsp,
1455 ldbtob(fsbtodb(fs, (cfrag+bno))),
1456 allocsiz * fs->fs_fsize))
1457 return (bno);
1460 * keep looking -- this block is being converted
1462 first = (last - loc) + 1;
1463 loc = 0;
1464 if (first < last)
1465 continue;
1468 * no usable matches in bottom half -- now search the top half
1470 if (secondtime)
1472 * no usable matches in top half -- all done
1474 break;
1475 secondtime = 1;
1476 last = start + 1;
1477 first = 0;
1480 * no usable matches
1482 return ((daddr_t)-1);
1485 #define UFSNADDR (NDADDR + NIADDR) /* NADDR applies to (obsolete) S5FS */
1486 #define IB(i) (NDADDR + (i)) /* index of i'th indirect block ptr */
1487 #define SINGLE 0 /* single indirect block ptr */
1488 #define DOUBLE 1 /* double indirect block ptr */
1489 #define TRIPLE 2 /* triple indirect block ptr */
1492 * Acquire a write lock, and keep trying till we get it
1494 static int
1495 allocsp_wlockfs(struct vnode *vp, struct lockfs *lf)
1497 int err = 0;
1499 lockagain:
1500 do {
1501 err = ufs_fiolfss(vp, lf);
1502 if (err)
1503 return (err);
1504 } while (!LOCKFS_IS_ULOCK(lf));
1506 lf->lf_lock = LOCKFS_WLOCK;
1507 lf->lf_flags = 0;
1508 lf->lf_comment = NULL;
1509 err = ufs__fiolfs(vp, lf, 1, 0);
1511 if (err == EBUSY || err == EINVAL)
1512 goto lockagain;
1514 return (err);
1518 * Release the write lock
1520 static int
1521 allocsp_unlockfs(struct vnode *vp, struct lockfs *lf)
1523 int err = 0;
1525 lf->lf_lock = LOCKFS_ULOCK;
1526 lf->lf_flags = 0;
1527 err = ufs__fiolfs(vp, lf, 1, 0);
1528 return (err);
1531 struct allocsp_undo {
1532 daddr_t offset;
1533 daddr_t blk;
1534 struct allocsp_undo *next;
1538 * ufs_allocsp() can be used to pre-allocate blocks for a file on a given
1539 * file system. For direct blocks, the blocks are allocated from the offset
1540 * requested to the block boundary, then any full blocks are allocated,
1541 * and finally any remainder.
1542 * For indirect blocks the blocks are not initialized and are
1543 * only marked as allocated. These addresses are then stored as negative
1544 * block numbers in the inode to imply special handling. UFS has been modified
1545 * where necessary to understand this new notion.
1546 * Successfully fallocated files will have IFALLOCATE cflag set in the inode.
1549 ufs_allocsp(struct vnode *vp, struct flock64 *lp, cred_t *cr)
1551 struct lockfs lf;
1552 int berr, err, resv, issync;
1553 off_t istart, len; /* istart, special for idb */
1554 struct inode *ip;
1555 struct fs *fs;
1556 struct ufsvfs *ufsvfsp;
1557 uoff_t resid, i, uoff;
1558 daddr32_t db_undo[NDADDR]; /* old direct blocks */
1559 struct allocsp_undo *ib_undo = NULL; /* ib undo */
1560 struct allocsp_undo *undo = NULL;
1561 uoff_t osz; /* old file size */
1562 int chunkblks = 0; /* # of blocks in 1 allocation */
1563 int cnt = 0;
1564 daddr_t allocblk;
1565 daddr_t totblks = 0;
1566 struct ulockfs *ulp;
1567 size_t done_len;
1568 int nbytes, offsetn;
1571 ASSERT(vp->v_type == VREG);
1573 ip = VTOI(vp);
1574 fs = ip->i_fs;
1575 if ((ufsvfsp = ip->i_ufsvfs) == NULL) {
1576 err = EIO;
1577 goto out_allocsp;
1580 istart = blkroundup(fs, (lp->l_start));
1581 len = blkroundup(fs, (lp->l_len));
1582 chunkblks = blkroundup(fs, ufsvfsp->vfs_iotransz) / fs->fs_bsize;
1583 ulp = &ufsvfsp->vfs_ulockfs;
1585 if (lp->l_start < 0 || lp->l_len <= 0)
1586 return (EINVAL);
1588 /* Quickly check to make sure we have space before we proceed */
1589 if (lblkno(fs, len) > fs->fs_cstotal.cs_nbfree) {
1590 if (TRANS_ISTRANS(ufsvfsp)) {
1591 ufs_delete_drain_wait(ufsvfsp, 1);
1592 if (lblkno(fs, len) > fs->fs_cstotal.cs_nbfree)
1593 return (ENOSPC);
1594 } else
1595 return (ENOSPC);
1599 * We will keep i_rwlock locked as WRITER through out the function
1600 * since we don't want anyone else reading or writing to the inode
1601 * while we are in the middle of fallocating the file.
1603 rw_enter(&ip->i_rwlock, RW_WRITER);
1605 /* Back up the direct block list, used for undo later if necessary */
1606 rw_enter(&ip->i_contents, RW_READER);
1607 for (i = 0; i < NDADDR; i++)
1608 db_undo[i] = ip->i_db[i];
1609 osz = ip->i_size;
1610 rw_exit(&ip->i_contents);
1612 /* Write lock the file system */
1613 if (err = allocsp_wlockfs(vp, &lf))
1614 goto exit;
1617 * Allocate any direct blocks now.
1618 * Blocks are allocated from the offset requested to the block
1619 * boundary, then any full blocks are allocated, and finally any
1620 * remainder.
1622 if (lblkno(fs, lp->l_start) < NDADDR) {
1623 ufs_trans_trunc_resv(ip, ip->i_size + (NDADDR * fs->fs_bsize),
1624 &resv, &resid);
1625 TRANS_BEGIN_CSYNC(ufsvfsp, &issync, TOP_ALLOCSP, resv);
1627 rw_enter(&ufsvfsp->vfs_dqrwlock, RW_READER);
1628 rw_enter(&ip->i_contents, RW_WRITER);
1630 done_len = 0;
1631 while ((done_len < lp->l_len) &&
1632 (lblkno(fs, lp->l_start + done_len) < NDADDR)) {
1633 uoff = (offset_t)(lp->l_start + done_len);
1634 offsetn = (int)blkoff(fs, uoff);
1635 nbytes = (int)MIN(fs->fs_bsize - offsetn,
1636 lp->l_len - done_len);
1638 berr = bmap_write(ip, uoff, offsetn + nbytes,
1639 BI_FALLOCATE, &allocblk, cr);
1640 /* Yikes error, quit */
1641 if (berr) {
1642 TRANS_INODE(ufsvfsp, ip);
1643 rw_exit(&ip->i_contents);
1644 rw_exit(&ufsvfsp->vfs_dqrwlock);
1645 TRANS_END_CSYNC(ufsvfsp, &err, issync,
1646 TOP_ALLOCSP, resv);
1647 err = allocsp_unlockfs(vp, &lf);
1648 goto exit;
1651 if (allocblk) {
1652 totblks++;
1653 if ((uoff + nbytes) > ip->i_size)
1654 ip->i_size = (uoff + nbytes);
1656 done_len += nbytes;
1659 TRANS_INODE(ufsvfsp, ip);
1660 rw_exit(&ip->i_contents);
1661 rw_exit(&ufsvfsp->vfs_dqrwlock);
1662 TRANS_END_CSYNC(ufsvfsp, &err, issync, TOP_ALLOCSP, resv);
1664 /* start offset for indirect allocation */
1665 istart = (uoff + nbytes);
1668 /* Break the transactions into vfs_iotransz units */
1669 ufs_trans_trunc_resv(ip, ip->i_size +
1670 blkroundup(fs, ufsvfsp->vfs_iotransz), &resv, &resid);
1671 TRANS_BEGIN_CSYNC(ufsvfsp, &issync, TOP_ALLOCSP, resv);
1673 rw_enter(&ufsvfsp->vfs_dqrwlock, RW_READER);
1674 rw_enter(&ip->i_contents, RW_WRITER);
1676 /* Now go about fallocating necessary indirect blocks */
1677 for (i = istart; i < (lp->l_start + lp->l_len); i += fs->fs_bsize) {
1678 berr = bmap_write(ip, i, fs->fs_bsize, BI_FALLOCATE,
1679 &allocblk, cr);
1680 if (berr) {
1681 TRANS_INODE(ufsvfsp, ip);
1682 rw_exit(&ip->i_contents);
1683 rw_exit(&ufsvfsp->vfs_dqrwlock);
1684 TRANS_END_CSYNC(ufsvfsp, &err, issync, TOP_ALLOCSP,
1685 resv);
1686 err = allocsp_unlockfs(vp, &lf);
1687 goto exit;
1690 /* Update the blk counter only if new block was added */
1691 if (allocblk) {
1692 /* Save undo information */
1693 undo = kmem_alloc(sizeof (struct allocsp_undo),
1694 KM_SLEEP);
1695 undo->offset = i;
1696 undo->blk = allocblk;
1697 undo->next = ib_undo;
1698 ib_undo = undo;
1699 totblks++;
1701 if (i >= ip->i_size)
1702 ip->i_size += fs->fs_bsize;
1704 cnt++;
1706 /* Being a good UFS citizen, let others get a share */
1707 if (cnt == chunkblks) {
1709 * If there are waiters or the fs is hard locked,
1710 * error locked, or read-only error locked,
1711 * quit with EIO
1713 if (ULOCKFS_IS_HLOCK(ulp) || ULOCKFS_IS_ELOCK(ulp) ||
1714 ULOCKFS_IS_ROELOCK(ulp)) {
1715 ip->i_cflags |= IFALLOCATE;
1716 TRANS_INODE(ufsvfsp, ip);
1717 rw_exit(&ip->i_contents);
1718 rw_exit(&ufsvfsp->vfs_dqrwlock);
1720 TRANS_END_CSYNC(ufsvfsp, &err, issync,
1721 TOP_ALLOCSP, resv);
1722 rw_exit(&ip->i_rwlock);
1723 (void) allocsp_unlockfs(vp, &lf);
1724 return (EIO);
1727 TRANS_INODE(ufsvfsp, ip);
1728 rw_exit(&ip->i_contents);
1729 rw_exit(&ufsvfsp->vfs_dqrwlock);
1731 /* End the current transaction */
1732 TRANS_END_CSYNC(ufsvfsp, &err, issync, TOP_ALLOCSP,
1733 resv);
1735 if (CV_HAS_WAITERS(&ulp->ul_cv)) {
1736 /* Release the write lock */
1737 if (err = allocsp_unlockfs(vp, &lf))
1738 goto exit;
1740 /* Wake up others waiting to do operations */
1741 mutex_enter(&ulp->ul_lock);
1742 cv_broadcast(&ulp->ul_cv);
1743 mutex_exit(&ulp->ul_lock);
1745 /* Grab the write lock again */
1746 if (err = allocsp_wlockfs(vp, &lf))
1747 goto exit;
1748 } /* end of CV_HAS_WAITERS(&ulp->ul_cv) */
1750 /* Reserve more space in log for this file */
1751 ufs_trans_trunc_resv(ip,
1752 ip->i_size + blkroundup(fs, ufsvfsp->vfs_iotransz),
1753 &resv, &resid);
1754 TRANS_BEGIN_CSYNC(ufsvfsp, &issync, TOP_ALLOCSP, resv);
1756 rw_enter(&ufsvfsp->vfs_dqrwlock, RW_READER);
1757 rw_enter(&ip->i_contents, RW_WRITER);
1759 cnt = 0; /* reset cnt b/c of new transaction */
1763 if (!err && !berr)
1764 ip->i_cflags |= IFALLOCATE;
1766 /* If the file has grown then correct the file size */
1767 if (osz < (lp->l_start + lp->l_len))
1768 ip->i_size = (lp->l_start + lp->l_len);
1770 /* Release locks, end log transaction and unlock fs */
1771 TRANS_INODE(ufsvfsp, ip);
1772 rw_exit(&ip->i_contents);
1773 rw_exit(&ufsvfsp->vfs_dqrwlock);
1775 TRANS_END_CSYNC(ufsvfsp, &err, issync, TOP_ALLOCSP, resv);
1776 err = allocsp_unlockfs(vp, &lf);
1779 * @ exit label, we should no longer be holding the fs write lock, and
1780 * all logging transactions should have been ended. We still hold
1781 * ip->i_rwlock.
1783 exit:
1785 * File has grown larger than 2GB. Set flag
1786 * in superblock to indicate this, if it
1787 * is not already set.
1789 if ((ip->i_size > MAXOFF32_T) &&
1790 !(fs->fs_flags & FSLARGEFILES)) {
1791 ASSERT(ufsvfsp->vfs_lfflags & UFS_LARGEFILES);
1792 mutex_enter(&ufsvfsp->vfs_lock);
1793 fs->fs_flags |= FSLARGEFILES;
1794 ufs_sbwrite(ufsvfsp);
1795 mutex_exit(&ufsvfsp->vfs_lock);
1799 * Since we couldn't allocate completely, we will undo the allocations.
1801 if (berr) {
1802 ufs_trans_trunc_resv(ip, totblks * fs->fs_bsize, &resv, &resid);
1803 TRANS_BEGIN_CSYNC(ufsvfsp, &issync, TOP_ALLOCSP, resv);
1805 rw_enter(&ufsvfsp->vfs_dqrwlock, RW_READER);
1806 rw_enter(&ip->i_contents, RW_WRITER);
1808 /* Direct blocks */
1809 for (i = 0; i < NDADDR; i++) {
1811 * Only free the block if they are not same, and
1812 * the old one isn't zero (the fragment was
1813 * re-allocated).
1815 if (db_undo[i] != ip->i_db[i] && db_undo[i] == 0) {
1816 free(ip, ip->i_db[i], fs->fs_bsize, 0);
1817 ip->i_db[i] = 0;
1821 /* Undo the indirect blocks */
1822 while (ib_undo != NULL) {
1823 undo = ib_undo;
1824 err = bmap_set_bn(vp, undo->offset, 0);
1825 if (err)
1826 cmn_err(CE_PANIC, "ufs_allocsp(): failed to "
1827 "undo allocation of block %ld",
1828 undo->offset);
1829 free(ip, undo->blk, fs->fs_bsize, I_IBLK);
1830 ib_undo = undo->next;
1831 kmem_free(undo, sizeof (struct allocsp_undo));
1834 ip->i_size = osz;
1835 TRANS_INODE(ufsvfsp, ip);
1837 rw_exit(&ip->i_contents);
1838 rw_exit(&ufsvfsp->vfs_dqrwlock);
1840 TRANS_END_CSYNC(ufsvfsp, &err, issync, TOP_ALLOCSP, resv);
1842 rw_exit(&ip->i_rwlock);
1843 return (berr);
1847 * Don't forget to free the undo chain :)
1849 while (ib_undo != NULL) {
1850 undo = ib_undo;
1851 ib_undo = undo->next;
1852 kmem_free(undo, sizeof (struct allocsp_undo));
1855 rw_exit(&ip->i_rwlock);
1857 out_allocsp:
1858 return (err);
1862 * Free storage space associated with the specified inode. The portion
1863 * to be freed is specified by lp->l_start and lp->l_len (already
1864 * normalized to a "whence" of 0).
1866 * This is an experimental facility whose continued existence is not
1867 * guaranteed. Currently, we only support the special case
1868 * of l_len == 0, meaning free to end of file.
1870 * Blocks are freed in reverse order. This FILO algorithm will tend to
1871 * maintain a contiguous free list much longer than FIFO.
1872 * See also ufs_itrunc() in ufs_inode.c.
1874 * Bug: unused bytes in the last retained block are not cleared.
1875 * This may result in a "hole" in the file that does not read as zeroes.
1877 /* ARGSUSED */
1879 ufs_freesp(struct vnode *vp, struct flock64 *lp, int flag, cred_t *cr)
1881 int i;
1882 struct inode *ip = VTOI(vp);
1883 int error;
1885 ASSERT(vp->v_type == VREG);
1886 ASSERT(lp->l_start >= 0); /* checked by convoff */
1888 if (lp->l_len != 0)
1889 return (EINVAL);
1891 rw_enter(&ip->i_contents, RW_READER);
1892 if (ip->i_size == (uoff_t)lp->l_start) {
1893 rw_exit(&ip->i_contents);
1894 return (0);
1898 * Check if there is any active mandatory lock on the
1899 * range that will be truncated/expanded.
1901 if (MANDLOCK(vp, ip->i_mode)) {
1902 offset_t save_start;
1904 save_start = lp->l_start;
1906 if (ip->i_size < lp->l_start) {
1908 * "Truncate up" case: need to make sure there
1909 * is no lock beyond current end-of-file. To
1910 * do so, we need to set l_start to the size
1911 * of the file temporarily.
1913 lp->l_start = ip->i_size;
1915 lp->l_type = F_WRLCK;
1916 lp->l_sysid = 0;
1917 lp->l_pid = ttoproc(curthread)->p_pid;
1918 i = (flag & (FNDELAY|FNONBLOCK)) ? 0 : SLPFLCK;
1919 rw_exit(&ip->i_contents);
1920 if ((i = reclock(vp, lp, i, 0, lp->l_start, NULL)) != 0 ||
1921 lp->l_type != F_UNLCK) {
1922 return (i ? i : EAGAIN);
1924 rw_enter(&ip->i_contents, RW_READER);
1926 lp->l_start = save_start;
1930 * Make sure a write isn't in progress (allocating blocks)
1931 * by acquiring i_rwlock (we promised ufs_bmap we wouldn't
1932 * truncate while it was allocating blocks).
1933 * Grab the locks in the right order.
1935 rw_exit(&ip->i_contents);
1936 rw_enter(&ip->i_rwlock, RW_WRITER);
1937 error = TRANS_ITRUNC(ip, (uoff_t)lp->l_start, 0, cr);
1938 rw_exit(&ip->i_rwlock);
1939 return (error);
1943 * Find a cg with as close to nb contiguous bytes as possible
1944 * THIS MAY TAKE MANY DISK READS!
1946 * Implemented in an attempt to allocate contiguous blocks for
1947 * writing the ufs log file to, minimizing future disk head seeking
1949 daddr_t
1950 contigpref(ufsvfs_t *ufsvfsp, size_t nb, size_t minb)
1952 struct fs *fs = ufsvfsp->vfs_fs;
1953 daddr_t nblk = lblkno(fs, blkroundup(fs, nb));
1954 daddr_t minblk = lblkno(fs, blkroundup(fs, minb));
1955 daddr_t savebno, curbno, cgbno;
1956 int cg, cgblks, savecg, savenblk, curnblk, startcg;
1957 uchar_t *blksfree;
1958 buf_t *bp;
1959 struct cg *cgp;
1961 savenblk = 0;
1962 savecg = 0;
1963 savebno = 0;
1965 if ((startcg = findlogstartcg(fs, nblk, minblk)) == -1)
1966 cg = 0; /* Nothing suitable found */
1967 else
1968 cg = startcg;
1970 for (; cg < fs->fs_ncg; ++cg) {
1972 * find the largest contiguous range in this cg
1974 bp = UFS_BREAD(ufsvfsp, ufsvfsp->vfs_dev,
1975 (daddr_t)fsbtodb(fs, cgtod(fs, cg)),
1976 (int)fs->fs_cgsize);
1977 cgp = bp->b_un.b_cg;
1978 if (bp->b_flags & B_ERROR || !cg_chkmagic(cgp)) {
1979 brelse(bp);
1980 continue;
1982 blksfree = cg_blksfree(cgp); /* free array */
1983 cgblks = fragstoblks(fs, fs->fs_fpg); /* blks in free array */
1984 cgbno = 0;
1985 while (cgbno < cgblks && savenblk < nblk) {
1986 /* find a free block */
1987 for (; cgbno < cgblks; ++cgbno) {
1988 if (isblock(fs, blksfree, cgbno)) {
1989 if (startcg != -1) {
1990 brelse(bp);
1991 savecg = startcg;
1992 savebno = cgbno;
1993 goto done;
1994 } else
1995 break;
1998 curbno = cgbno;
1999 /* count the number of free blocks */
2000 for (curnblk = 0; cgbno < cgblks; ++cgbno) {
2001 if (!isblock(fs, blksfree, cgbno))
2002 break;
2003 if (++curnblk >= nblk)
2004 break;
2006 if (curnblk > savenblk) {
2007 savecg = cg;
2008 savenblk = curnblk;
2009 savebno = curbno;
2012 brelse(bp);
2013 if (savenblk >= nblk)
2014 break;
2017 done:
2019 /* convert block offset in cg to frag offset in cg */
2020 savebno = blkstofrags(fs, savebno);
2022 /* convert frag offset in cg to frag offset in fs */
2023 savebno += (savecg * fs->fs_fpg);
2025 return (savebno);
2029 * The object of this routine is to find a start point for the UFS log.
2030 * Ideally the space should be allocated from the smallest possible number
2031 * of contiguous cylinder groups. This is found by using a sliding window
2032 * technique. The smallest window of contiguous cylinder groups, which is
2033 * still able to accommodate the target, is found by moving the window
2034 * through the cylinder groups in a single pass. The end of the window is
2035 * advanced until the space is accommodated, then the start is advanced until
2036 * it no longer fits, the end is then advanced again and so on until the
2037 * final cylinder group is reached. The first suitable instance is recorded
2038 * and its starting cg number is returned.
2040 * If we are not able to find a minimum amount of space, represented by
2041 * minblk, or to do so uses more than the available extents, then return -1.
2045 findlogstartcg(struct fs *fs, daddr_t requested, daddr_t minblk)
2047 int ncgs; /* number of cylinder groups */
2048 daddr_t target; /* amount of space sought */
2049 int cwidth, ctotal; /* current window width and total */
2050 int bwidth, btotal; /* best window width and total so far */
2051 int s; /* index of the first element in the current window */
2052 int e; /* index of the first element + the width */
2053 /* (i.e. 1 + index of last element) */
2054 int bs; /* index of the first element in the best window so far */
2055 int header, max_extents;
2057 target = requested;
2058 ncgs = fs->fs_ncg;
2060 header = sizeof (extent_block_t) - sizeof (extent_t);
2061 max_extents = ((fs->fs_bsize)-header) / sizeof (extent_t);
2062 cwidth = ctotal = 0;
2063 btotal = -1;
2064 bwidth = ncgs;
2065 s = e = 0;
2066 while (e < ncgs) {
2067 /* Advance the end of the window until it accommodates the target. */
2068 while (ctotal < target && e < ncgs) {
2069 ctotal += fs->fs_cs(fs, e).cs_nbfree;
2070 e++;
2074 * Advance the start of the window until it no longer
2075 * accommodates the target.
2077 while (ctotal >= target && s < e) {
2078 /* See if this is the smallest window so far. */
2079 cwidth = e - s;
2080 if (cwidth <= bwidth) {
2081 if (cwidth == bwidth && ctotal <= btotal)
2082 goto more;
2083 bwidth = cwidth;
2084 btotal = ctotal;
2085 bs = s;
2087 more:
2088 ctotal -= fs->fs_cs(fs, s).cs_nbfree;
2089 s++;
2094 * If we cannot allocate the minimum required or we use too many
2095 * extents to do so, return -1.
2097 if (btotal < minblk || bwidth > max_extents)
2098 bs = -1;
2100 return (bs);