1 /* $NetBSD: ffs_wapbl.c,v 1.12 2009/02/22 20:28:06 ad Exp $ */
4 * Copyright (c) 2003,2006,2008 The NetBSD Foundation, Inc.
7 * This code is derived from software contributed to The NetBSD Foundation
8 * by Wasabi Systems, Inc.
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29 * POSSIBILITY OF SUCH DAMAGE.
32 #include <sys/cdefs.h>
33 __KERNEL_RCSID(0, "$NetBSD: ffs_wapbl.c,v 1.12 2009/02/22 20:28:06 ad Exp $");
35 #define WAPBL_INTERNAL
37 #if defined(_KERNEL_OPT)
41 #include <sys/param.h>
42 #include <sys/systm.h>
43 #include <sys/kernel.h>
44 #include <sys/vnode.h>
45 #include <sys/mount.h>
48 #include <sys/disklabel.h>
49 #include <sys/ioctl.h>
50 #include <sys/errno.h>
51 #include <sys/kauth.h>
52 #include <sys/wapbl.h>
54 #include <ufs/ufs/inode.h>
55 #include <ufs/ufs/quota.h>
56 #include <ufs/ufs/ufsmount.h>
57 #include <ufs/ufs/ufs_bswap.h>
58 #include <ufs/ufs/ufs_extern.h>
59 #include <ufs/ufs/ufs_wapbl.h>
61 #include <ufs/ffs/fs.h>
62 #include <ufs/ffs/ffs_extern.h>
66 int ffs_wapbl_debug
= 1;
67 #define DPRINTF(fmt, args...) \
69 if (ffs_wapbl_debug) \
70 printf("%s:%d "fmt, __func__ , __LINE__, ##args); \
71 } while (/* CONSTCOND */0)
73 #define DPRINTF(fmt, args...) \
76 } while (/* CONSTCOND */0)
79 static int ffs_superblock_layout(struct fs
*);
80 static int wapbl_log_position(struct mount
*, struct fs
*, struct vnode
*,
81 daddr_t
*, size_t *, size_t *, uint64_t *);
82 static int wapbl_create_infs_log(struct mount
*, struct fs
*, struct vnode
*,
83 daddr_t
*, size_t *, size_t *, uint64_t *);
84 static void wapbl_find_log_start(struct mount
*, struct vnode
*, off_t
,
85 daddr_t
*, daddr_t
*, size_t *);
86 static int wapbl_remove_log(struct mount
*);
87 static int wapbl_allocate_log_file(struct mount
*, struct vnode
*);
90 * Return the super block layout format - UFS1 or UFS2.
91 * WAPBL only works with UFS2 layout (which is still available
94 * XXX Should this be in ufs/ffs/fs.h? Same style of check is
95 * also used in ffs_alloc.c in a few places.
98 ffs_superblock_layout(struct fs
*fs
)
100 if ((fs
->fs_magic
== FS_UFS1_MAGIC
) &&
101 ((fs
->fs_old_flags
& FS_FLAGS_UPDATED
) == 0))
108 * This function is invoked after a log is replayed to
109 * disk to perform logical cleanup actions as described by
113 ffs_wapbl_replay_finish(struct mount
*mp
)
115 struct wapbl_replay
*wr
= mp
->mnt_wapbl_replay
;
122 KDASSERT((mp
->mnt_flag
& MNT_RDONLY
) == 0);
124 for (i
= 0; i
< wr
->wr_inodescnt
; i
++) {
127 error
= VFS_VGET(mp
, wr
->wr_inodes
[i
].wr_inumber
, &vp
);
129 printf("ffs_wapbl_replay_finish: "
130 "unable to cleanup inode %" PRIu32
"\n",
131 wr
->wr_inodes
[i
].wr_inumber
);
135 KDASSERT(wr
->wr_inodes
[i
].wr_inumber
== ip
->i_number
);
137 printf("ffs_wapbl_replay_finish: "
138 "cleaning inode %" PRIu64
" size=%" PRIu64
" mode=%o nlink=%d\n",
139 ip
->i_number
, ip
->i_size
, ip
->i_mode
, ip
->i_nlink
);
141 KASSERT(ip
->i_nlink
== 0);
144 * The journal may have left partially allocated inodes in mode
145 * zero. This may occur if a crash occurs betweeen the node
146 * allocation in ffs_nodeallocg and when the node is properly
147 * initialized in ufs_makeinode. If so, just dallocate them.
149 if (ip
->i_mode
== 0) {
151 ffs_vfree(vp
, ip
->i_number
, wr
->wr_inodes
[i
].wr_imode
);
156 wapbl_replay_stop(wr
);
157 wapbl_replay_free(wr
);
158 mp
->mnt_wapbl_replay
= NULL
;
161 /* Callback for wapbl */
163 ffs_wapbl_sync_metadata(struct mount
*mp
, daddr_t
*deallocblks
,
164 int *dealloclens
, int dealloccnt
)
166 struct ufsmount
*ump
= VFSTOUFS(mp
);
167 struct fs
*fs
= ump
->um_fs
;
170 #ifdef WAPBL_DEBUG_INODES
171 ufs_wapbl_verify_inodes(mp
, "ffs_wapbl_sync_metadata");
174 for (i
= 0; i
< dealloccnt
; i
++) {
176 * blkfree errors are unreported, might silently fail
177 * if it cannot read the cylinder group block
179 ffs_blkfree(fs
, ump
->um_devvp
,
180 dbtofsb(fs
, deallocblks
[i
]), dealloclens
[i
], -1);
184 fs
->fs_time
= time_second
;
185 error
= ffs_cgupdate(ump
, 0);
190 ffs_wapbl_abort_sync_metadata(struct mount
*mp
, daddr_t
*deallocblks
,
191 int *dealloclens
, int dealloccnt
)
193 struct ufsmount
*ump
= VFSTOUFS(mp
);
194 struct fs
*fs
= ump
->um_fs
;
197 for (i
= 0; i
< dealloccnt
; i
++) {
199 * Since the above blkfree may have failed, this blkalloc might
200 * fail as well, so don't check its error. Note that if the
201 * blkfree succeeded above, then this shouldn't fail because
202 * the buffer will be locked in the current transaction.
204 ffs_blkalloc_ump(ump
, dbtofsb(fs
, deallocblks
[i
]),
210 wapbl_remove_log(struct mount
*mp
)
212 struct ufsmount
*ump
= VFSTOUFS(mp
);
213 struct fs
*fs
= ump
->um_fs
;
219 /* If super block layout is too old to support WAPBL, return */
220 if (ffs_superblock_layout(fs
) < 2)
223 /* If all the log locators are 0, just clean up */
224 if (fs
->fs_journallocs
[0] == 0 &&
225 fs
->fs_journallocs
[1] == 0 &&
226 fs
->fs_journallocs
[2] == 0 &&
227 fs
->fs_journallocs
[3] == 0) {
228 DPRINTF("empty locators, just clear\n");
232 switch (fs
->fs_journal_location
) {
233 case UFS_WAPBL_JOURNALLOC_NONE
:
238 case UFS_WAPBL_JOURNALLOC_IN_FILESYSTEM
:
239 log_ino
= fs
->fs_journallocs
[UFS_WAPBL_INFS_INO
];
240 DPRINTF("in-fs log, ino = %" PRId64
"\n",log_ino
);
242 /* if no existing log inode, just clear all fields and bail */
245 error
= VFS_VGET(mp
, log_ino
, &vp
);
247 printf("ffs_wapbl: vget failed %d\n",
249 /* clear out log info on error */
253 KASSERT(log_ino
== ip
->i_number
);
254 if ((ip
->i_flags
& SF_LOG
) == 0) {
255 printf("ffs_wapbl: try to clear non-log inode "
256 "%" PRId64
"\n", log_ino
);
258 /* clear out log info on error */
263 * remove the log inode by setting its link count back
267 DIP_ASSIGN(ip
, nlink
, 0);
270 case UFS_WAPBL_JOURNALLOC_END_PARTITION
:
271 DPRINTF("end-of-partition log\n");
272 /* no extra work required */
276 printf("ffs_wapbl: unknown journal type %d\n",
277 fs
->fs_journal_location
);
283 /* Clear out all previous knowledge of journal */
284 fs
->fs_journal_version
= 0;
285 fs
->fs_journal_location
= 0;
286 fs
->fs_journal_flags
= 0;
287 fs
->fs_journallocs
[0] = 0;
288 fs
->fs_journallocs
[1] = 0;
289 fs
->fs_journallocs
[2] = 0;
290 fs
->fs_journallocs
[3] = 0;
291 (void) ffs_sbupdate(ump
, MNT_WAIT
);
297 ffs_wapbl_start(struct mount
*mp
)
299 struct ufsmount
*ump
= VFSTOUFS(mp
);
300 struct fs
*fs
= ump
->um_fs
;
301 struct vnode
*devvp
= ump
->um_devvp
;
308 if (mp
->mnt_wapbl
== NULL
) {
309 if (fs
->fs_journal_flags
& UFS_WAPBL_FLAGS_CLEAR_LOG
) {
310 /* Clear out any existing journal file */
311 error
= wapbl_remove_log(mp
);
316 if (mp
->mnt_flag
& MNT_LOG
) {
317 KDASSERT(fs
->fs_ronly
== 0);
319 /* WAPBL needs UFS2 format super block */
320 if (ffs_superblock_layout(fs
) < 2) {
321 printf("%s fs superblock in old format, "
323 VFSTOUFS(mp
)->um_fs
->fs_fsmnt
);
324 mp
->mnt_flag
&= ~MNT_LOG
;
328 error
= wapbl_log_position(mp
, fs
, devvp
, &off
,
329 &count
, &blksize
, &extradata
);
333 /* XXX any other consistancy checks here? */
334 if (blksize
!= DEV_BSIZE
) {
335 printf("%s: bad blocksize %zu\n", __func__
,
340 error
= wapbl_start(&mp
->mnt_wapbl
, mp
, devvp
, off
,
341 count
, blksize
, mp
->mnt_wapbl_replay
,
342 ffs_wapbl_sync_metadata
,
343 ffs_wapbl_abort_sync_metadata
);
347 mp
->mnt_wapbl_op
= &wapbl_ops
;
350 printf("%s: enabling logging\n", fs
->fs_fsmnt
);
353 if ((fs
->fs_flags
& FS_DOWAPBL
) == 0) {
355 fs
->fs_flags
|= FS_DOWAPBL
;
356 error
= ffs_sbupdate(ump
, MNT_WAIT
);
359 ffs_wapbl_stop(mp
, MNT_FORCE
);
363 error
= wapbl_flush(mp
->mnt_wapbl
, 1);
365 ffs_wapbl_stop(mp
, MNT_FORCE
);
369 } else if (fs
->fs_flags
& FS_DOWAPBL
) {
371 fs
->fs_flags
&= ~FS_DOWAPBL
;
376 * It is recommended that you finish replay with logging enabled.
377 * However, even if logging is not enabled, the remaining log
378 * replay should be safely recoverable with an fsck, so perform
381 if ((fs
->fs_ronly
== 0) && mp
->mnt_wapbl_replay
) {
382 int saveflag
= mp
->mnt_flag
& MNT_RDONLY
;
384 * Make sure MNT_RDONLY is not set so that the inode
385 * cleanup in ufs_inactive will actually do its work.
387 mp
->mnt_flag
&= ~MNT_RDONLY
;
388 ffs_wapbl_replay_finish(mp
);
389 mp
->mnt_flag
|= saveflag
;
390 KASSERT(fs
->fs_ronly
== 0);
397 ffs_wapbl_stop(struct mount
*mp
, int force
)
399 struct ufsmount
*ump
= VFSTOUFS(mp
);
400 struct fs
*fs
= ump
->um_fs
;
404 KDASSERT(fs
->fs_ronly
== 0);
407 * Make sure turning off FS_DOWAPBL is only removed
408 * as the only change in the final flush since otherwise
409 * a transaction may reorder writes.
411 error
= wapbl_flush(mp
->mnt_wapbl
, 1);
416 error
= UFS_WAPBL_BEGIN(mp
);
421 KASSERT(fs
->fs_flags
& FS_DOWAPBL
);
423 fs
->fs_flags
&= ~FS_DOWAPBL
;
424 error
= ffs_sbupdate(ump
, MNT_WAIT
);
425 KASSERT(error
== 0); /* XXX a bit drastic! */
428 error
= wapbl_stop(mp
->mnt_wapbl
, force
);
431 fs
->fs_flags
|= FS_DOWAPBL
;
434 fs
->fs_flags
&= ~FS_DOWAPBL
; /* Repeat in case of forced error */
435 mp
->mnt_wapbl
= NULL
;
438 printf("%s: disabled logging\n", fs
->fs_fsmnt
);
446 ffs_wapbl_replay_start(struct mount
*mp
, struct fs
*fs
, struct vnode
*devvp
)
455 * WAPBL needs UFS2 format super block, if we got here with a
456 * UFS1 format super block something is amiss...
458 if (ffs_superblock_layout(fs
) < 2)
461 error
= wapbl_log_position(mp
, fs
, devvp
, &off
, &count
, &blksize
,
467 error
= wapbl_replay_start(&mp
->mnt_wapbl_replay
, devvp
, off
,
472 mp
->mnt_wapbl_op
= &wapbl_ops
;
478 * If the superblock doesn't already have a recorded journal location
479 * then we allocate the journal in one of two positions:
481 * - At the end of the partition after the filesystem if there's
482 * enough space. "Enough space" is defined as >= 1MB of journal
483 * per 1GB of filesystem or 64MB, whichever is smaller.
485 * - Inside the filesystem. We try to allocate a contiguous journal
486 * based on the total filesystem size - the target is 1MB of journal
487 * per 1GB of filesystem, up to a maximum journal size of 64MB. As
488 * a worst case allowing for fragmentation, we'll allocate a journal
489 * 1/4 of the desired size but never smaller than 1MB.
491 * XXX In the future if we allow for non-contiguous journal files we
492 * can tighten the above restrictions.
495 * These seems like a lot of duplication both here and in some of
496 * the userland tools (fsck_ffs, dumpfs, tunefs) with similar
497 * "switch (fs_journal_location)" constructs. Can we centralise
498 * this sort of code somehow/somewhere?
501 wapbl_log_position(struct mount
*mp
, struct fs
*fs
, struct vnode
*devvp
,
502 daddr_t
*startp
, size_t *countp
, size_t *blksizep
, uint64_t *extradatap
)
504 struct ufsmount
*ump
= VFSTOUFS(mp
);
505 struct partinfo dpart
;
506 daddr_t logstart
, logend
, desired_logsize
;
510 if (fs
->fs_journal_version
== UFS_WAPBL_VERSION
) {
511 switch (fs
->fs_journal_location
) {
512 case UFS_WAPBL_JOURNALLOC_END_PARTITION
:
513 DPRINTF("found existing end-of-partition log\n");
514 *startp
= fs
->fs_journallocs
[UFS_WAPBL_EPART_ADDR
];
515 *countp
= fs
->fs_journallocs
[UFS_WAPBL_EPART_COUNT
];
516 *blksizep
= fs
->fs_journallocs
[UFS_WAPBL_EPART_BLKSZ
];
517 DPRINTF(" start = %" PRId64
", size = %zu, "
518 "blksize = %zu\n", *startp
, *countp
, *blksizep
);
521 case UFS_WAPBL_JOURNALLOC_IN_FILESYSTEM
:
522 DPRINTF("found existing in-filesystem log\n");
523 *startp
= fs
->fs_journallocs
[UFS_WAPBL_INFS_ADDR
];
524 *countp
= fs
->fs_journallocs
[UFS_WAPBL_INFS_COUNT
];
525 *blksizep
= fs
->fs_journallocs
[UFS_WAPBL_INFS_BLKSZ
];
526 DPRINTF(" start = %" PRId64
", size = %zu, "
527 "blksize = %zu\n", *startp
, *countp
, *blksizep
);
531 printf("ffs_wapbl: unknown journal type %d\n",
532 fs
->fs_journal_location
);
538 lfragtosize(fs
, fs
->fs_size
) / UFS_WAPBL_JOURNAL_SCALE
;
539 DPRINTF("desired log size = %" PRId64
" kB\n", desired_logsize
/ 1024);
540 desired_logsize
= max(desired_logsize
, UFS_WAPBL_MIN_JOURNAL_SIZE
);
541 desired_logsize
= min(desired_logsize
, UFS_WAPBL_MAX_JOURNAL_SIZE
);
542 DPRINTF("adjusted desired log size = %" PRId64
" kB\n",
543 desired_logsize
/ 1024);
545 /* Is there space after after filesystem on partition for log? */
546 logstart
= fsbtodb(fs
, fs
->fs_size
);
547 error
= VOP_IOCTL(devvp
, DIOCGPART
, &dpart
, FREAD
, FSCRED
);
549 logend
= dpart
.part
->p_size
;
550 blksize
= dpart
.disklab
->d_secsize
;
552 struct dkwedge_info dkw
;
553 error
= VOP_IOCTL(devvp
, DIOCGWEDGEINFO
, &dkw
, FREAD
, FSCRED
);
558 logend
= dkw
.dkw_size
;
561 if ((logend
- logstart
) * blksize
>= desired_logsize
) {
562 KDASSERT(blksize
!= 0);
563 DPRINTF("enough space, use end-of-partition log\n");
566 *countp
= (logend
- logstart
);
570 /* update superblock with log location */
571 fs
->fs_journal_version
= UFS_WAPBL_VERSION
;
572 fs
->fs_journal_location
= UFS_WAPBL_JOURNALLOC_END_PARTITION
;
573 fs
->fs_journal_flags
= 0;
574 fs
->fs_journallocs
[UFS_WAPBL_EPART_ADDR
] = *startp
;
575 fs
->fs_journallocs
[UFS_WAPBL_EPART_COUNT
] = *countp
;
576 fs
->fs_journallocs
[UFS_WAPBL_EPART_BLKSZ
] = *blksizep
;
577 fs
->fs_journallocs
[UFS_WAPBL_EPART_UNUSED
] = *extradatap
;
579 error
= ffs_sbupdate(ump
, MNT_WAIT
);
582 DPRINTF("end-of-partition has only %" PRId64
" free\n",
585 error
= wapbl_create_infs_log(mp
, fs
, devvp
, startp
, countp
, blksizep
,
588 ffs_sync(mp
, MNT_WAIT
, FSCRED
);
594 * Try to create a journal log inside the filesystem.
597 wapbl_create_infs_log(struct mount
*mp
, struct fs
*fs
, struct vnode
*devvp
,
598 daddr_t
*startp
, size_t *countp
, size_t *blksizep
, uint64_t *extradatap
)
600 struct vnode
*vp
, *rvp
;
604 if ((error
= VFS_ROOT(mp
, &rvp
)) != 0)
607 if ((error
= UFS_VALLOC(rvp
, 0 | S_IFREG
, NOCRED
, &vp
)) != 0) {
615 ip
->i_flag
|= IN_ACCESS
| IN_CHANGE
| IN_UPDATE
;
616 ip
->i_mode
= 0 | IFREG
;
617 DIP_ASSIGN(ip
, mode
, ip
->i_mode
);
618 ip
->i_flags
= SF_LOG
;
619 DIP_ASSIGN(ip
, flags
, ip
->i_flags
);
621 DIP_ASSIGN(ip
, nlink
, 1);
622 ffs_update(vp
, NULL
, NULL
, UPDATE_WAIT
);
624 if ((error
= wapbl_allocate_log_file(mp
, vp
)) != 0) {
626 * If we couldn't allocate the space for the log file,
627 * remove the inode by setting its link count back to
631 DIP_ASSIGN(ip
, nlink
, 0);
638 * Now that we have the place-holder inode for the journal,
639 * we don't need the vnode ever again.
643 *startp
= fs
->fs_journallocs
[UFS_WAPBL_INFS_ADDR
];
644 *countp
= fs
->fs_journallocs
[UFS_WAPBL_INFS_COUNT
];
645 *blksizep
= fs
->fs_journallocs
[UFS_WAPBL_INFS_BLKSZ
];
646 *extradatap
= fs
->fs_journallocs
[UFS_WAPBL_INFS_INO
];
652 wapbl_allocate_log_file(struct mount
*mp
, struct vnode
*vp
)
654 struct ufsmount
*ump
= VFSTOUFS(mp
);
655 struct fs
*fs
= ump
->um_fs
;
656 daddr_t addr
, indir_addr
;
662 /* check if there's a suggested log size */
663 if (fs
->fs_journal_flags
& UFS_WAPBL_FLAGS_CREATE_LOG
&&
664 fs
->fs_journal_location
== UFS_WAPBL_JOURNALLOC_IN_FILESYSTEM
)
665 logsize
= fs
->fs_journallocs
[UFS_WAPBL_INFS_COUNT
];
667 if (vp
->v_size
> 0) {
668 printf("%s: file size (%" PRId64
") non zero\n", __func__
,
672 wapbl_find_log_start(mp
, vp
, logsize
, &addr
, &indir_addr
, &size
);
674 printf("%s: log not allocated, largest extent is "
675 "%" PRId64
"MB\n", __func__
,
676 lblktosize(fs
, size
) / (1024 * 1024));
680 logsize
= lblktosize(fs
, size
); /* final log size */
682 VTOI(vp
)->i_ffs_first_data_blk
= addr
;
683 VTOI(vp
)->i_ffs_first_indir_blk
= indir_addr
;
685 error
= GOP_ALLOC(vp
, 0, logsize
, B_CONTIG
, FSCRED
);
687 printf("%s: GOP_ALLOC error %d\n", __func__
, error
);
691 fs
->fs_journal_version
= UFS_WAPBL_VERSION
;
692 fs
->fs_journal_location
= UFS_WAPBL_JOURNALLOC_IN_FILESYSTEM
;
693 fs
->fs_journal_flags
= 0;
694 fs
->fs_journallocs
[UFS_WAPBL_INFS_ADDR
] =
695 lfragtosize(fs
, addr
) / DEV_BSIZE
;
696 fs
->fs_journallocs
[UFS_WAPBL_INFS_COUNT
] = logsize
/ DEV_BSIZE
;
697 fs
->fs_journallocs
[UFS_WAPBL_INFS_BLKSZ
] = DEV_BSIZE
;
698 fs
->fs_journallocs
[UFS_WAPBL_INFS_INO
] = VTOI(vp
)->i_number
;
700 error
= ffs_sbupdate(ump
, MNT_WAIT
);
705 * Find a suitable location for the journal in the filesystem.
707 * Our strategy here is to look for a contiguous block of free space
708 * at least "logfile" MB in size (plus room for any indirect blocks).
709 * We start at the middle of the filesystem and check each cylinder
710 * group working outwards. If "logfile" MB is not available as a
711 * single contigous chunk, then return the address and size of the
712 * largest chunk found.
715 * At what stage does the search fail? Is if the largest space we could
716 * find is less than a quarter the requested space reasonable? If the
717 * search fails entirely, return a block address if "0" it indicate this.
720 wapbl_find_log_start(struct mount
*mp
, struct vnode
*vp
, off_t logsize
,
721 daddr_t
*addr
, daddr_t
*indir_addr
, size_t *size
)
723 struct ufsmount
*ump
= VFSTOUFS(mp
);
724 struct fs
*fs
= ump
->um_fs
;
725 struct vnode
*devvp
= ump
->um_devvp
;
729 daddr_t blkno
, best_addr
, start_addr
;
730 daddr_t desired_blks
, min_desired_blks
;
731 daddr_t freeblks
, best_blks
;
732 int bpcg
, cg
, error
, fixedsize
, indir_blks
, n
, s
;
734 const int needswap
= UFS_FSNEEDSWAP(fs
);
738 fixedsize
= 0; /* We can adjust the size if tight */
739 logsize
= lfragtosize(fs
, fs
->fs_dsize
) /
740 UFS_WAPBL_JOURNAL_SCALE
;
741 DPRINTF("suggested log size = %" PRId64
"\n", logsize
);
742 logsize
= max(logsize
, UFS_WAPBL_MIN_JOURNAL_SIZE
);
743 logsize
= min(logsize
, UFS_WAPBL_MAX_JOURNAL_SIZE
);
744 DPRINTF("adjusted log size = %" PRId64
"\n", logsize
);
747 DPRINTF("fixed log size = %" PRId64
"\n", logsize
);
750 desired_blks
= logsize
/ fs
->fs_bsize
;
751 DPRINTF("desired blocks = %" PRId64
"\n", desired_blks
);
753 /* add in number of indirect blocks needed */
755 if (desired_blks
>= NDADDR
) {
756 struct indir indirs
[NIADDR
+ 2];
759 error
= ufs_getlbns(vp
, desired_blks
, indirs
, &num
);
761 printf("%s: ufs_getlbns failed, error %d!\n",
768 indir_blks
= 1; /* 1st level indirect */
771 indir_blks
= 1 + /* 1st level indirect */
772 1 + /* 2nd level indirect */
773 indirs
[1].in_off
+ 1; /* extra 1st level indirect */
776 printf("%s: unexpected numlevels %d from ufs_getlbns\n",
781 desired_blks
+= indir_blks
;
783 DPRINTF("desired blocks = %" PRId64
" (including indirect)\n",
787 * If a specific size wasn't requested, allow for a smaller log
788 * if we're really tight for space...
790 min_desired_blks
= desired_blks
;
792 min_desired_blks
= desired_blks
/ 4;
794 /* Look at number of blocks per CG. If it's too small, bail early. */
795 bpcg
= fragstoblks(fs
, fs
->fs_fpg
);
796 if (min_desired_blks
> bpcg
) {
797 printf("ffs_wapbl: cylinder group size of %" PRId64
" MB "
798 " is not big enough for journal\n",
799 lblktosize(fs
, bpcg
) / (1024 * 1024));
804 * Start with the middle cylinder group, and search outwards in
805 * both directions until we either find the requested log size
806 * or reach the start/end of the file system. If we reach the
807 * start/end without finding enough space for the full requested
808 * log size, use the largest extent found if it is large enough
809 * to satisfy the our minimum size.
812 * Can we just use the cluster contigsum stuff (esp on UFS2)
813 * here to simplify this search code?
817 for (cg
= fs
->fs_ncg
/ 2, s
= 0, n
= 1;
818 best_blks
< desired_blks
&& cg
>= 0 && cg
< fs
->fs_ncg
;
819 s
++, n
= -n
, cg
+= n
* s
) {
820 DPRINTF("check cg %d of %d\n", cg
, fs
->fs_ncg
);
821 error
= bread(devvp
, fsbtodb(fs
, cgtod(fs
, cg
)),
822 fs
->fs_cgsize
, FSCRED
, 0, &bp
);
823 cgp
= (struct cg
*)bp
->b_data
;
824 if (error
|| !cg_chkmagic(cgp
, UFS_FSNEEDSWAP(fs
))) {
829 blksfree
= cg_blksfree(cgp
, needswap
);
831 for (blkno
= 0; blkno
< bpcg
;) {
832 /* look for next free block */
833 /* XXX use scanc() and fragtbl[] here? */
834 for (; blkno
< bpcg
- min_desired_blks
; blkno
++)
835 if (ffs_isblock(fs
, blksfree
, blkno
))
838 /* past end of search space in this CG? */
839 if (blkno
>= bpcg
- min_desired_blks
)
842 /* count how many free blocks in this extent */
844 for (freeblks
= 0; blkno
< bpcg
; blkno
++, freeblks
++)
845 if (!ffs_isblock(fs
, blksfree
, blkno
))
848 if (freeblks
> best_blks
) {
849 best_blks
= freeblks
;
850 best_addr
= blkstofrags(fs
, start_addr
) +
853 if (freeblks
>= desired_blks
) {
854 DPRINTF("found len %" PRId64
855 " at offset %" PRId64
" in gc\n",
856 freeblks
, start_addr
);
863 DPRINTF("best found len = %" PRId64
", wanted %" PRId64
864 " at addr %" PRId64
"\n", best_blks
, desired_blks
, best_addr
);
866 if (best_blks
< min_desired_blks
) {
870 /* put indirect blocks at start, and data blocks after */
871 *addr
= best_addr
+ blkstofrags(fs
, indir_blks
);
872 *indir_addr
= best_addr
;
874 *size
= min(desired_blks
, best_blks
) - indir_blks
;