1 /* $NetBSD: ffs_wapbl.c,v 1.25 2013/10/25 11:35:55 martin Exp $ */
4 * Copyright (c) 2003,2006,2008 The NetBSD Foundation, Inc.
7 * This code is derived from software contributed to The NetBSD Foundation
8 * by Wasabi Systems, Inc.
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29 * POSSIBILITY OF SUCH DAMAGE.
32 #include <sys/cdefs.h>
33 __KERNEL_RCSID(0, "$NetBSD: ffs_wapbl.c,v 1.25 2013/10/25 11:35:55 martin Exp $");
35 #define WAPBL_INTERNAL
37 #if defined(_KERNEL_OPT)
41 #include <sys/param.h>
42 #include <sys/systm.h>
43 #include <sys/kernel.h>
44 #include <sys/vnode.h>
45 #include <sys/mount.h>
48 #include <sys/ioctl.h>
49 #include <sys/errno.h>
50 #include <sys/kauth.h>
51 #include <sys/wapbl.h>
53 #include <ufs/ufs/inode.h>
54 #include <ufs/ufs/quota.h>
55 #include <ufs/ufs/ufsmount.h>
56 #include <ufs/ufs/ufs_bswap.h>
57 #include <ufs/ufs/ufs_extern.h>
58 #include <ufs/ufs/ufs_wapbl.h>
60 #include <ufs/ffs/fs.h>
61 #include <ufs/ffs/ffs_extern.h>
65 int ffs_wapbl_debug
= 1;
66 #define DPRINTF(fmt, args...) \
68 if (ffs_wapbl_debug) \
69 printf("%s:%d "fmt, __func__ , __LINE__, ##args); \
70 } while (/* CONSTCOND */0)
72 #define DPRINTF(fmt, args...) \
75 } while (/* CONSTCOND */0)
78 static int ffs_superblock_layout(struct fs
*);
79 static int wapbl_log_position(struct mount
*, struct fs
*, struct vnode
*,
80 daddr_t
*, size_t *, size_t *, uint64_t *);
81 static int wapbl_create_infs_log(struct mount
*, struct fs
*, struct vnode
*,
82 daddr_t
*, size_t *, uint64_t *);
83 static void wapbl_find_log_start(struct mount
*, struct vnode
*, off_t
,
84 daddr_t
*, daddr_t
*, size_t *);
85 static int wapbl_remove_log(struct mount
*);
86 static int wapbl_allocate_log_file(struct mount
*, struct vnode
*,
87 daddr_t
*, size_t *, uint64_t *);
90 * Return the super block layout format - UFS1 or UFS2.
91 * WAPBL only works with UFS2 layout (which is still available
94 * XXX Should this be in ufs/ffs/fs.h? Same style of check is
95 * also used in ffs_alloc.c in a few places.
98 ffs_superblock_layout(struct fs
*fs
)
100 if ((fs
->fs_magic
== FS_UFS1_MAGIC
) &&
101 ((fs
->fs_old_flags
& FS_FLAGS_UPDATED
) == 0))
108 * This function is invoked after a log is replayed to
109 * disk to perform logical cleanup actions as described by
113 ffs_wapbl_replay_finish(struct mount
*mp
)
115 struct wapbl_replay
*wr
= mp
->mnt_wapbl_replay
;
122 KDASSERT((mp
->mnt_flag
& MNT_RDONLY
) == 0);
124 for (i
= 0; i
< wr
->wr_inodescnt
; i
++) {
127 error
= VFS_VGET(mp
, wr
->wr_inodes
[i
].wr_inumber
, &vp
);
129 printf("ffs_wapbl_replay_finish: "
130 "unable to cleanup inode %" PRIu32
"\n",
131 wr
->wr_inodes
[i
].wr_inumber
);
135 KDASSERT(wr
->wr_inodes
[i
].wr_inumber
== ip
->i_number
);
137 printf("ffs_wapbl_replay_finish: "
138 "cleaning inode %" PRIu64
" size=%" PRIu64
" mode=%o nlink=%d\n",
139 ip
->i_number
, ip
->i_size
, ip
->i_mode
, ip
->i_nlink
);
141 KASSERT(ip
->i_nlink
== 0);
144 * The journal may have left partially allocated inodes in mode
145 * zero. This may occur if a crash occurs betweeen the node
146 * allocation in ffs_nodeallocg and when the node is properly
147 * initialized in ufs_makeinode. If so, just dallocate them.
149 if (ip
->i_mode
== 0) {
151 ffs_vfree(vp
, ip
->i_number
, wr
->wr_inodes
[i
].wr_imode
);
156 wapbl_replay_stop(wr
);
157 wapbl_replay_free(wr
);
158 mp
->mnt_wapbl_replay
= NULL
;
161 /* Callback for wapbl */
163 ffs_wapbl_sync_metadata(struct mount
*mp
, daddr_t
*deallocblks
,
164 int *dealloclens
, int dealloccnt
)
166 struct ufsmount
*ump
= VFSTOUFS(mp
);
167 struct fs
*fs
= ump
->um_fs
;
168 int i
, error __diagused
;
170 #ifdef WAPBL_DEBUG_INODES
171 ufs_wapbl_verify_inodes(mp
, "ffs_wapbl_sync_metadata");
174 for (i
= 0; i
< dealloccnt
; i
++) {
176 * blkfree errors are unreported, might silently fail
177 * if it cannot read the cylinder group block
179 ffs_blkfree(fs
, ump
->um_devvp
,
180 FFS_DBTOFSB(fs
, deallocblks
[i
]), dealloclens
[i
], -1);
184 fs
->fs_time
= time_second
;
185 error
= ffs_cgupdate(ump
, 0);
190 ffs_wapbl_abort_sync_metadata(struct mount
*mp
, daddr_t
*deallocblks
,
191 int *dealloclens
, int dealloccnt
)
193 struct ufsmount
*ump
= VFSTOUFS(mp
);
194 struct fs
*fs
= ump
->um_fs
;
197 for (i
= 0; i
< dealloccnt
; i
++) {
199 * Since the above blkfree may have failed, this blkalloc might
200 * fail as well, so don't check its error. Note that if the
201 * blkfree succeeded above, then this shouldn't fail because
202 * the buffer will be locked in the current transaction.
204 ffs_blkalloc_ump(ump
, FFS_DBTOFSB(fs
, deallocblks
[i
]),
210 wapbl_remove_log(struct mount
*mp
)
212 struct ufsmount
*ump
= VFSTOUFS(mp
);
213 struct fs
*fs
= ump
->um_fs
;
219 /* If super block layout is too old to support WAPBL, return */
220 if (ffs_superblock_layout(fs
) < 2)
223 /* If all the log locators are 0, just clean up */
224 if (fs
->fs_journallocs
[0] == 0 &&
225 fs
->fs_journallocs
[1] == 0 &&
226 fs
->fs_journallocs
[2] == 0 &&
227 fs
->fs_journallocs
[3] == 0) {
228 DPRINTF("empty locators, just clear\n");
232 switch (fs
->fs_journal_location
) {
233 case UFS_WAPBL_JOURNALLOC_NONE
:
238 case UFS_WAPBL_JOURNALLOC_IN_FILESYSTEM
:
239 log_ino
= fs
->fs_journallocs
[UFS_WAPBL_INFS_INO
];
240 DPRINTF("in-fs log, ino = %" PRId64
"\n",log_ino
);
242 /* if no existing log inode, just clear all fields and bail */
245 error
= VFS_VGET(mp
, log_ino
, &vp
);
247 printf("ffs_wapbl: vget failed %d\n",
249 /* clear out log info on error */
253 KASSERT(log_ino
== ip
->i_number
);
254 if ((ip
->i_flags
& SF_LOG
) == 0) {
255 printf("ffs_wapbl: try to clear non-log inode "
256 "%" PRId64
"\n", log_ino
);
258 /* clear out log info on error */
263 * remove the log inode by setting its link count back
267 DIP_ASSIGN(ip
, nlink
, 0);
270 case UFS_WAPBL_JOURNALLOC_END_PARTITION
:
271 DPRINTF("end-of-partition log\n");
272 /* no extra work required */
276 printf("ffs_wapbl: unknown journal type %d\n",
277 fs
->fs_journal_location
);
283 /* Clear out all previous knowledge of journal */
284 fs
->fs_journal_version
= 0;
285 fs
->fs_journal_location
= 0;
286 fs
->fs_journal_flags
= 0;
287 fs
->fs_journallocs
[0] = 0;
288 fs
->fs_journallocs
[1] = 0;
289 fs
->fs_journallocs
[2] = 0;
290 fs
->fs_journallocs
[3] = 0;
291 (void) ffs_sbupdate(ump
, MNT_WAIT
);
297 ffs_wapbl_start(struct mount
*mp
)
299 struct ufsmount
*ump
= VFSTOUFS(mp
);
300 struct fs
*fs
= ump
->um_fs
;
301 struct vnode
*devvp
= ump
->um_devvp
;
308 if (mp
->mnt_wapbl
== NULL
) {
309 if (fs
->fs_journal_flags
& UFS_WAPBL_FLAGS_CLEAR_LOG
) {
310 /* Clear out any existing journal file */
311 error
= wapbl_remove_log(mp
);
316 if (mp
->mnt_flag
& MNT_LOG
) {
317 KDASSERT(fs
->fs_ronly
== 0);
319 /* WAPBL needs UFS2 format super block */
320 if (ffs_superblock_layout(fs
) < 2) {
321 printf("%s fs superblock in old format, "
323 VFSTOUFS(mp
)->um_fs
->fs_fsmnt
);
324 mp
->mnt_flag
&= ~MNT_LOG
;
328 error
= wapbl_log_position(mp
, fs
, devvp
, &off
,
329 &count
, &blksize
, &extradata
);
333 error
= wapbl_start(&mp
->mnt_wapbl
, mp
, devvp
, off
,
334 count
, blksize
, mp
->mnt_wapbl_replay
,
335 ffs_wapbl_sync_metadata
,
336 ffs_wapbl_abort_sync_metadata
);
340 mp
->mnt_wapbl_op
= &wapbl_ops
;
343 printf("%s: enabling logging\n", fs
->fs_fsmnt
);
346 if ((fs
->fs_flags
& FS_DOWAPBL
) == 0) {
348 fs
->fs_flags
|= FS_DOWAPBL
;
349 error
= ffs_sbupdate(ump
, MNT_WAIT
);
352 ffs_wapbl_stop(mp
, MNT_FORCE
);
356 error
= wapbl_flush(mp
->mnt_wapbl
, 1);
358 ffs_wapbl_stop(mp
, MNT_FORCE
);
362 } else if (fs
->fs_flags
& FS_DOWAPBL
) {
364 fs
->fs_flags
&= ~FS_DOWAPBL
;
369 * It is recommended that you finish replay with logging enabled.
370 * However, even if logging is not enabled, the remaining log
371 * replay should be safely recoverable with an fsck, so perform
374 if ((fs
->fs_ronly
== 0) && mp
->mnt_wapbl_replay
) {
375 int saveflag
= mp
->mnt_flag
& MNT_RDONLY
;
377 * Make sure MNT_RDONLY is not set so that the inode
378 * cleanup in ufs_inactive will actually do its work.
380 mp
->mnt_flag
&= ~MNT_RDONLY
;
381 ffs_wapbl_replay_finish(mp
);
382 mp
->mnt_flag
|= saveflag
;
383 KASSERT(fs
->fs_ronly
== 0);
390 ffs_wapbl_stop(struct mount
*mp
, int force
)
392 struct ufsmount
*ump
= VFSTOUFS(mp
);
393 struct fs
*fs
= ump
->um_fs
;
397 KDASSERT(fs
->fs_ronly
== 0);
400 * Make sure turning off FS_DOWAPBL is only removed
401 * as the only change in the final flush since otherwise
402 * a transaction may reorder writes.
404 error
= wapbl_flush(mp
->mnt_wapbl
, 1);
409 error
= UFS_WAPBL_BEGIN(mp
);
414 KASSERT(fs
->fs_flags
& FS_DOWAPBL
);
416 fs
->fs_flags
&= ~FS_DOWAPBL
;
417 error
= ffs_sbupdate(ump
, MNT_WAIT
);
418 KASSERT(error
== 0); /* XXX a bit drastic! */
421 error
= wapbl_stop(mp
->mnt_wapbl
, force
);
424 fs
->fs_flags
|= FS_DOWAPBL
;
427 fs
->fs_flags
&= ~FS_DOWAPBL
; /* Repeat in case of forced error */
428 mp
->mnt_wapbl
= NULL
;
431 printf("%s: disabled logging\n", fs
->fs_fsmnt
);
439 ffs_wapbl_replay_start(struct mount
*mp
, struct fs
*fs
, struct vnode
*devvp
)
448 * WAPBL needs UFS2 format super block, if we got here with a
449 * UFS1 format super block something is amiss...
451 if (ffs_superblock_layout(fs
) < 2)
454 error
= wapbl_log_position(mp
, fs
, devvp
, &off
, &count
, &blksize
,
460 error
= wapbl_replay_start(&mp
->mnt_wapbl_replay
, devvp
, off
,
465 mp
->mnt_wapbl_op
= &wapbl_ops
;
471 * If the superblock doesn't already have a recorded journal location
472 * then we allocate the journal in one of two positions:
474 * - At the end of the partition after the filesystem if there's
475 * enough space. "Enough space" is defined as >= 1MB of journal
476 * per 1GB of filesystem or 64MB, whichever is smaller.
478 * - Inside the filesystem. We try to allocate a contiguous journal
479 * based on the total filesystem size - the target is 1MB of journal
480 * per 1GB of filesystem, up to a maximum journal size of 64MB. As
481 * a worst case allowing for fragmentation, we'll allocate a journal
482 * 1/4 of the desired size but never smaller than 1MB.
484 * XXX In the future if we allow for non-contiguous journal files we
485 * can tighten the above restrictions.
488 * These seems like a lot of duplication both here and in some of
489 * the userland tools (fsck_ffs, dumpfs, tunefs) with similar
490 * "switch (fs_journal_location)" constructs. Can we centralise
491 * this sort of code somehow/somewhere?
494 wapbl_log_position(struct mount
*mp
, struct fs
*fs
, struct vnode
*devvp
,
495 daddr_t
*startp
, size_t *countp
, size_t *blksizep
, uint64_t *extradatap
)
497 struct ufsmount
*ump
= VFSTOUFS(mp
);
498 daddr_t logstart
, logend
, desired_logsize
;
503 if (fs
->fs_journal_version
== UFS_WAPBL_VERSION
) {
504 switch (fs
->fs_journal_location
) {
505 case UFS_WAPBL_JOURNALLOC_END_PARTITION
:
506 DPRINTF("found existing end-of-partition log\n");
507 *startp
= fs
->fs_journallocs
[UFS_WAPBL_EPART_ADDR
];
508 *countp
= fs
->fs_journallocs
[UFS_WAPBL_EPART_COUNT
];
509 *blksizep
= fs
->fs_journallocs
[UFS_WAPBL_EPART_BLKSZ
];
510 DPRINTF(" start = %" PRId64
", size = %zu, "
511 "blksize = %zu\n", *startp
, *countp
, *blksizep
);
514 case UFS_WAPBL_JOURNALLOC_IN_FILESYSTEM
:
515 DPRINTF("found existing in-filesystem log\n");
516 *startp
= fs
->fs_journallocs
[UFS_WAPBL_INFS_ADDR
];
517 *countp
= fs
->fs_journallocs
[UFS_WAPBL_INFS_COUNT
];
518 *blksizep
= fs
->fs_journallocs
[UFS_WAPBL_INFS_BLKSZ
];
519 DPRINTF(" start = %" PRId64
", size = %zu, "
520 "blksize = %zu\n", *startp
, *countp
, *blksizep
);
524 printf("ffs_wapbl: unknown journal type %d\n",
525 fs
->fs_journal_location
);
531 ffs_lfragtosize(fs
, fs
->fs_size
) / UFS_WAPBL_JOURNAL_SCALE
;
532 DPRINTF("desired log size = %" PRId64
" kB\n", desired_logsize
/ 1024);
533 desired_logsize
= max(desired_logsize
, UFS_WAPBL_MIN_JOURNAL_SIZE
);
534 desired_logsize
= min(desired_logsize
, UFS_WAPBL_MAX_JOURNAL_SIZE
);
535 DPRINTF("adjusted desired log size = %" PRId64
" kB\n",
536 desired_logsize
/ 1024);
538 /* Is there space after after filesystem on partition for log? */
539 logstart
= FFS_FSBTODB(fs
, fs
->fs_size
);
540 error
= getdisksize(devvp
, &numsecs
, &secsize
);
543 KDASSERT(secsize
!= 0);
544 logend
= btodb(numsecs
* secsize
);
546 if (dbtob(logend
- logstart
) >= desired_logsize
) {
547 DPRINTF("enough space, use end-of-partition log\n");
549 location
= UFS_WAPBL_JOURNALLOC_END_PARTITION
;
553 *countp
= (logend
- logstart
);
556 /* convert to physical block numbers */
557 *startp
= dbtob(*startp
) / secsize
;
558 *countp
= dbtob(*countp
) / secsize
;
560 fs
->fs_journallocs
[UFS_WAPBL_EPART_ADDR
] = *startp
;
561 fs
->fs_journallocs
[UFS_WAPBL_EPART_COUNT
] = *countp
;
562 fs
->fs_journallocs
[UFS_WAPBL_EPART_BLKSZ
] = *blksizep
;
563 fs
->fs_journallocs
[UFS_WAPBL_EPART_UNUSED
] = *extradatap
;
565 DPRINTF("end-of-partition has only %" PRId64
" free\n",
568 location
= UFS_WAPBL_JOURNALLOC_IN_FILESYSTEM
;
571 error
= wapbl_create_infs_log(mp
, fs
, devvp
,
572 startp
, countp
, extradatap
);
573 ffs_sync(mp
, MNT_WAIT
, FSCRED
);
575 /* convert to physical block numbers */
576 *startp
= dbtob(*startp
) / secsize
;
577 *countp
= dbtob(*countp
) / secsize
;
579 fs
->fs_journallocs
[UFS_WAPBL_INFS_ADDR
] = *startp
;
580 fs
->fs_journallocs
[UFS_WAPBL_INFS_COUNT
] = *countp
;
581 fs
->fs_journallocs
[UFS_WAPBL_INFS_BLKSZ
] = *blksizep
;
582 fs
->fs_journallocs
[UFS_WAPBL_INFS_INO
] = *extradatap
;
586 /* update superblock with log location */
587 fs
->fs_journal_version
= UFS_WAPBL_VERSION
;
588 fs
->fs_journal_location
= location
;
589 fs
->fs_journal_flags
= 0;
591 error
= ffs_sbupdate(ump
, MNT_WAIT
);
598 * Try to create a journal log inside the filesystem.
601 wapbl_create_infs_log(struct mount
*mp
, struct fs
*fs
, struct vnode
*devvp
,
602 daddr_t
*startp
, size_t *countp
, uint64_t *extradatap
)
604 struct vnode
*vp
, *rvp
;
608 if ((error
= VFS_ROOT(mp
, &rvp
)) != 0)
611 error
= UFS_VALLOC(rvp
, 0 | S_IFREG
, NOCRED
, &vp
);
612 if (mp
->mnt_flag
& MNT_UPDATE
) {
623 ip
->i_flag
|= IN_ACCESS
| IN_CHANGE
| IN_UPDATE
;
624 ip
->i_mode
= 0 | IFREG
;
625 DIP_ASSIGN(ip
, mode
, ip
->i_mode
);
626 ip
->i_flags
= SF_LOG
;
627 DIP_ASSIGN(ip
, flags
, ip
->i_flags
);
629 DIP_ASSIGN(ip
, nlink
, 1);
630 ffs_update(vp
, NULL
, NULL
, UPDATE_WAIT
);
632 if ((error
= wapbl_allocate_log_file(mp
, vp
,
633 startp
, countp
, extradatap
)) != 0) {
635 * If we couldn't allocate the space for the log file,
636 * remove the inode by setting its link count back to
640 DIP_ASSIGN(ip
, nlink
, 0);
648 * Now that we have the place-holder inode for the journal,
649 * we don't need the vnode ever again.
658 wapbl_allocate_log_file(struct mount
*mp
, struct vnode
*vp
,
659 daddr_t
*startp
, size_t *countp
, uint64_t *extradatap
)
661 struct ufsmount
*ump
= VFSTOUFS(mp
);
662 struct fs
*fs
= ump
->um_fs
;
663 daddr_t addr
, indir_addr
;
669 /* check if there's a suggested log size */
670 if (fs
->fs_journal_flags
& UFS_WAPBL_FLAGS_CREATE_LOG
&&
671 fs
->fs_journal_location
== UFS_WAPBL_JOURNALLOC_IN_FILESYSTEM
)
672 logsize
= fs
->fs_journallocs
[UFS_WAPBL_INFS_COUNT
];
674 if (vp
->v_size
> 0) {
675 printf("%s: file size (%" PRId64
") non zero\n", __func__
,
679 wapbl_find_log_start(mp
, vp
, logsize
, &addr
, &indir_addr
, &size
);
681 printf("%s: log not allocated, largest extent is "
682 "%" PRId64
"MB\n", __func__
,
683 ffs_lblktosize(fs
, size
) / (1024 * 1024));
687 logsize
= ffs_lblktosize(fs
, size
); /* final log size */
689 VTOI(vp
)->i_ffs_first_data_blk
= addr
;
690 VTOI(vp
)->i_ffs_first_indir_blk
= indir_addr
;
692 error
= GOP_ALLOC(vp
, 0, logsize
, B_CONTIG
, FSCRED
);
694 printf("%s: GOP_ALLOC error %d\n", __func__
, error
);
698 *startp
= FFS_FSBTODB(fs
, addr
);
699 *countp
= btodb(logsize
);
700 *extradatap
= VTOI(vp
)->i_number
;
706 * Find a suitable location for the journal in the filesystem.
708 * Our strategy here is to look for a contiguous block of free space
709 * at least "logfile" MB in size (plus room for any indirect blocks).
710 * We start at the middle of the filesystem and check each cylinder
711 * group working outwards. If "logfile" MB is not available as a
712 * single contigous chunk, then return the address and size of the
713 * largest chunk found.
716 * At what stage does the search fail? Is if the largest space we could
717 * find is less than a quarter the requested space reasonable? If the
718 * search fails entirely, return a block address if "0" it indicate this.
721 wapbl_find_log_start(struct mount
*mp
, struct vnode
*vp
, off_t logsize
,
722 daddr_t
*addr
, daddr_t
*indir_addr
, size_t *size
)
724 struct ufsmount
*ump
= VFSTOUFS(mp
);
725 struct fs
*fs
= ump
->um_fs
;
726 struct vnode
*devvp
= ump
->um_devvp
;
730 daddr_t blkno
, best_addr
, start_addr
;
731 daddr_t desired_blks
, min_desired_blks
;
732 daddr_t freeblks
, best_blks
;
733 int bpcg
, cg
, error
, fixedsize
, indir_blks
, n
, s
;
734 const int needswap
= UFS_FSNEEDSWAP(fs
);
737 fixedsize
= 0; /* We can adjust the size if tight */
738 logsize
= ffs_lfragtosize(fs
, fs
->fs_dsize
) /
739 UFS_WAPBL_JOURNAL_SCALE
;
740 DPRINTF("suggested log size = %" PRId64
"\n", logsize
);
741 logsize
= max(logsize
, UFS_WAPBL_MIN_JOURNAL_SIZE
);
742 logsize
= min(logsize
, UFS_WAPBL_MAX_JOURNAL_SIZE
);
743 DPRINTF("adjusted log size = %" PRId64
"\n", logsize
);
746 DPRINTF("fixed log size = %" PRId64
"\n", logsize
);
749 desired_blks
= logsize
/ fs
->fs_bsize
;
750 DPRINTF("desired blocks = %" PRId64
"\n", desired_blks
);
752 /* add in number of indirect blocks needed */
754 if (desired_blks
>= UFS_NDADDR
) {
755 struct indir indirs
[UFS_NIADDR
+ 2];
758 error
= ufs_getlbns(vp
, desired_blks
, indirs
, &num
);
760 printf("%s: ufs_getlbns failed, error %d!\n",
767 indir_blks
= 1; /* 1st level indirect */
770 indir_blks
= 1 + /* 1st level indirect */
771 1 + /* 2nd level indirect */
772 indirs
[1].in_off
+ 1; /* extra 1st level indirect */
775 printf("%s: unexpected numlevels %d from ufs_getlbns\n",
780 desired_blks
+= indir_blks
;
782 DPRINTF("desired blocks = %" PRId64
" (including indirect)\n",
786 * If a specific size wasn't requested, allow for a smaller log
787 * if we're really tight for space...
789 min_desired_blks
= desired_blks
;
791 min_desired_blks
= desired_blks
/ 4;
793 /* Look at number of blocks per CG. If it's too small, bail early. */
794 bpcg
= ffs_fragstoblks(fs
, fs
->fs_fpg
);
795 if (min_desired_blks
> bpcg
) {
796 printf("ffs_wapbl: cylinder group size of %" PRId64
" MB "
797 " is not big enough for journal\n",
798 ffs_lblktosize(fs
, bpcg
) / (1024 * 1024));
803 * Start with the middle cylinder group, and search outwards in
804 * both directions until we either find the requested log size
805 * or reach the start/end of the file system. If we reach the
806 * start/end without finding enough space for the full requested
807 * log size, use the largest extent found if it is large enough
808 * to satisfy the our minimum size.
811 * Can we just use the cluster contigsum stuff (esp on UFS2)
812 * here to simplify this search code?
816 for (cg
= fs
->fs_ncg
/ 2, s
= 0, n
= 1;
817 best_blks
< desired_blks
&& cg
>= 0 && cg
< fs
->fs_ncg
;
818 s
++, n
= -n
, cg
+= n
* s
) {
819 DPRINTF("check cg %d of %d\n", cg
, fs
->fs_ncg
);
820 error
= bread(devvp
, FFS_FSBTODB(fs
, cgtod(fs
, cg
)),
821 fs
->fs_cgsize
, FSCRED
, 0, &bp
);
825 cgp
= (struct cg
*)bp
->b_data
;
826 if (!cg_chkmagic(cgp
, UFS_FSNEEDSWAP(fs
))) {
831 blksfree
= cg_blksfree(cgp
, needswap
);
833 for (blkno
= 0; blkno
< bpcg
;) {
834 /* look for next free block */
835 /* XXX use scanc() and fragtbl[] here? */
836 for (; blkno
< bpcg
- min_desired_blks
; blkno
++)
837 if (ffs_isblock(fs
, blksfree
, blkno
))
840 /* past end of search space in this CG? */
841 if (blkno
>= bpcg
- min_desired_blks
)
844 /* count how many free blocks in this extent */
846 for (freeblks
= 0; blkno
< bpcg
; blkno
++, freeblks
++)
847 if (!ffs_isblock(fs
, blksfree
, blkno
))
850 if (freeblks
> best_blks
) {
851 best_blks
= freeblks
;
852 best_addr
= ffs_blkstofrags(fs
, start_addr
) +
855 if (freeblks
>= desired_blks
) {
856 DPRINTF("found len %" PRId64
857 " at offset %" PRId64
" in gc\n",
858 freeblks
, start_addr
);
865 DPRINTF("best found len = %" PRId64
", wanted %" PRId64
866 " at addr %" PRId64
"\n", best_blks
, desired_blks
, best_addr
);
868 if (best_blks
< min_desired_blks
) {
872 /* put indirect blocks at start, and data blocks after */
873 *addr
= best_addr
+ ffs_blkstofrags(fs
, indir_blks
);
874 *indir_addr
= best_addr
;
876 *size
= min(desired_blks
, best_blks
) - indir_blks
;