1 // SPDX-License-Identifier: GPL-2.0-or-later
3 * Copyright (c) 2022-2024 Oracle. All Rights Reserved.
4 * Author: Darrick J. Wong <djwong@kernel.org>
8 #include "xfs_shared.h"
9 #include "xfs_format.h"
10 #include "xfs_trans_resv.h"
13 #include "xfs_mount.h"
14 #include "xfs_btree.h"
15 #include "xfs_alloc_btree.h"
16 #include "xfs_rmap_btree.h"
17 #include "xfs_alloc.h"
18 #include "xfs_ialloc.h"
21 #include "xfs_ag_resv.h"
22 #include "xfs_health.h"
23 #include "xfs_error.h"
25 #include "xfs_defer.h"
26 #include "xfs_log_format.h"
27 #include "xfs_trans.h"
28 #include "xfs_trace.h"
29 #include "xfs_inode.h"
30 #include "xfs_icache.h"
31 #include "xfs_buf_item.h"
32 #include "xfs_rtgroup.h"
33 #include "xfs_rtbitmap.h"
34 #include "xfs_metafile.h"
35 #include "xfs_metadir.h"
37 /* Find the first usable fsblock in this rtgroup. */
38 static inline uint32_t
39 xfs_rtgroup_min_block(
43 if (xfs_has_rtsb(mp
) && rgno
== 0)
44 return mp
->m_sb
.sb_rextsize
;
49 /* Precompute this group's geometry */
51 xfs_rtgroup_calc_geometry(
53 struct xfs_rtgroup
*rtg
,
55 xfs_rgnumber_t rgcount
,
56 xfs_rtbxlen_t rextents
)
58 rtg
->rtg_extents
= __xfs_rtgroup_extents(mp
, rgno
, rgcount
, rextents
);
59 rtg_group(rtg
)->xg_block_count
= rtg
->rtg_extents
* mp
->m_sb
.sb_rextsize
;
60 rtg_group(rtg
)->xg_min_gbno
= xfs_rtgroup_min_block(mp
, rgno
);
67 xfs_rgnumber_t rgcount
,
68 xfs_rtbxlen_t rextents
)
70 struct xfs_rtgroup
*rtg
;
73 rtg
= kzalloc(sizeof(struct xfs_rtgroup
), GFP_KERNEL
);
77 xfs_rtgroup_calc_geometry(mp
, rtg
, rgno
, rgcount
, rextents
);
79 error
= xfs_group_insert(mp
, rtg_group(rtg
), rgno
, XG_TYPE_RTG
);
94 xfs_group_free(mp
, rgno
, XG_TYPE_RTG
, NULL
);
97 /* Free a range of incore rtgroup objects. */
100 struct xfs_mount
*mp
,
101 xfs_rgnumber_t first_rgno
,
102 xfs_rgnumber_t end_rgno
)
106 for (rgno
= first_rgno
; rgno
< end_rgno
; rgno
++)
107 xfs_rtgroup_free(mp
, rgno
);
110 /* Initialize some range of incore rtgroup objects. */
112 xfs_initialize_rtgroups(
113 struct xfs_mount
*mp
,
114 xfs_rgnumber_t first_rgno
,
115 xfs_rgnumber_t end_rgno
,
116 xfs_rtbxlen_t rextents
)
118 xfs_rgnumber_t index
;
121 if (first_rgno
>= end_rgno
)
124 for (index
= first_rgno
; index
< end_rgno
; index
++) {
125 error
= xfs_rtgroup_alloc(mp
, index
, end_rgno
, rextents
);
127 goto out_unwind_new_rtgs
;
133 xfs_free_rtgroups(mp
, first_rgno
, index
);
137 /* Compute the number of rt extents in this realtime group. */
139 __xfs_rtgroup_extents(
140 struct xfs_mount
*mp
,
142 xfs_rgnumber_t rgcount
,
143 xfs_rtbxlen_t rextents
)
145 ASSERT(rgno
< rgcount
);
146 if (rgno
== rgcount
- 1)
147 return rextents
- ((xfs_rtxnum_t
)rgno
* mp
->m_sb
.sb_rgextents
);
149 ASSERT(xfs_has_rtgroups(mp
));
150 return mp
->m_sb
.sb_rgextents
;
155 struct xfs_mount
*mp
,
158 return __xfs_rtgroup_extents(mp
, rgno
, mp
->m_sb
.sb_rgcount
,
159 mp
->m_sb
.sb_rextents
);
163 * Update the rt extent count of the previous tail rtgroup if it changed during
164 * recovery (i.e. recovery of a growfs).
167 xfs_update_last_rtgroup_size(
168 struct xfs_mount
*mp
,
169 xfs_rgnumber_t prev_rgcount
)
171 struct xfs_rtgroup
*rtg
;
173 ASSERT(prev_rgcount
> 0);
175 rtg
= xfs_rtgroup_grab(mp
, prev_rgcount
- 1);
177 return -EFSCORRUPTED
;
178 rtg
->rtg_extents
= __xfs_rtgroup_extents(mp
, prev_rgcount
- 1,
179 mp
->m_sb
.sb_rgcount
, mp
->m_sb
.sb_rextents
);
180 rtg_group(rtg
)->xg_block_count
= rtg
->rtg_extents
* mp
->m_sb
.sb_rextsize
;
181 xfs_rtgroup_rele(rtg
);
185 /* Lock metadata inodes associated with this rt group. */
188 struct xfs_rtgroup
*rtg
,
189 unsigned int rtglock_flags
)
191 ASSERT(!(rtglock_flags
& ~XFS_RTGLOCK_ALL_FLAGS
));
192 ASSERT(!(rtglock_flags
& XFS_RTGLOCK_BITMAP_SHARED
) ||
193 !(rtglock_flags
& XFS_RTGLOCK_BITMAP
));
195 if (rtglock_flags
& XFS_RTGLOCK_BITMAP
) {
197 * Lock both realtime free space metadata inodes for a freespace
200 xfs_ilock(rtg
->rtg_inodes
[XFS_RTGI_BITMAP
], XFS_ILOCK_EXCL
);
201 xfs_ilock(rtg
->rtg_inodes
[XFS_RTGI_SUMMARY
], XFS_ILOCK_EXCL
);
202 } else if (rtglock_flags
& XFS_RTGLOCK_BITMAP_SHARED
) {
203 xfs_ilock(rtg
->rtg_inodes
[XFS_RTGI_BITMAP
], XFS_ILOCK_SHARED
);
207 /* Unlock metadata inodes associated with this rt group. */
210 struct xfs_rtgroup
*rtg
,
211 unsigned int rtglock_flags
)
213 ASSERT(!(rtglock_flags
& ~XFS_RTGLOCK_ALL_FLAGS
));
214 ASSERT(!(rtglock_flags
& XFS_RTGLOCK_BITMAP_SHARED
) ||
215 !(rtglock_flags
& XFS_RTGLOCK_BITMAP
));
217 if (rtglock_flags
& XFS_RTGLOCK_BITMAP
) {
218 xfs_iunlock(rtg
->rtg_inodes
[XFS_RTGI_SUMMARY
], XFS_ILOCK_EXCL
);
219 xfs_iunlock(rtg
->rtg_inodes
[XFS_RTGI_BITMAP
], XFS_ILOCK_EXCL
);
220 } else if (rtglock_flags
& XFS_RTGLOCK_BITMAP_SHARED
) {
221 xfs_iunlock(rtg
->rtg_inodes
[XFS_RTGI_BITMAP
], XFS_ILOCK_SHARED
);
226 * Join realtime group metadata inodes to the transaction. The ILOCKs will be
227 * released on transaction commit.
230 xfs_rtgroup_trans_join(
231 struct xfs_trans
*tp
,
232 struct xfs_rtgroup
*rtg
,
233 unsigned int rtglock_flags
)
235 ASSERT(!(rtglock_flags
& ~XFS_RTGLOCK_ALL_FLAGS
));
236 ASSERT(!(rtglock_flags
& XFS_RTGLOCK_BITMAP_SHARED
));
238 if (rtglock_flags
& XFS_RTGLOCK_BITMAP
) {
239 xfs_trans_ijoin(tp
, rtg
->rtg_inodes
[XFS_RTGI_BITMAP
],
241 xfs_trans_ijoin(tp
, rtg
->rtg_inodes
[XFS_RTGI_SUMMARY
],
246 /* Retrieve rt group geometry. */
248 xfs_rtgroup_get_geometry(
249 struct xfs_rtgroup
*rtg
,
250 struct xfs_rtgroup_geometry
*rgeo
)
253 memset(rgeo
, 0, sizeof(*rgeo
));
254 rgeo
->rg_number
= rtg_rgno(rtg
);
255 rgeo
->rg_length
= rtg_group(rtg
)->xg_block_count
;
256 xfs_rtgroup_geom_health(rtg
, rgeo
);
260 #ifdef CONFIG_PROVE_LOCKING
261 static struct lock_class_key xfs_rtginode_lock_class
;
264 xfs_rtginode_ilock_cmp_fn(
265 const struct lockdep_map
*m1
,
266 const struct lockdep_map
*m2
)
268 const struct xfs_inode
*ip1
=
269 container_of(m1
, struct xfs_inode
, i_lock
.dep_map
);
270 const struct xfs_inode
*ip2
=
271 container_of(m2
, struct xfs_inode
, i_lock
.dep_map
);
273 if (ip1
->i_projid
< ip2
->i_projid
)
275 if (ip1
->i_projid
> ip2
->i_projid
)
281 xfs_rtginode_ilock_print_fn(
282 const struct lockdep_map
*m
)
284 const struct xfs_inode
*ip
=
285 container_of(m
, struct xfs_inode
, i_lock
.dep_map
);
287 printk(KERN_CONT
" rgno=%u", ip
->i_projid
);
291 * Most of the time each of the RTG inode locks are only taken one at a time.
292 * But when committing deferred ops, more than one of a kind can be taken.
293 * However, deferred rt ops will be committed in rgno order so there is no
294 * potential for deadlocks. The code here is needed to tell lockdep about this
298 xfs_rtginode_lockdep_setup(
299 struct xfs_inode
*ip
,
301 enum xfs_rtg_inodes type
)
303 lockdep_set_class_and_subclass(&ip
->i_lock
, &xfs_rtginode_lock_class
,
305 lock_set_cmp_fn(&ip
->i_lock
, xfs_rtginode_ilock_cmp_fn
,
306 xfs_rtginode_ilock_print_fn
);
309 #define xfs_rtginode_lockdep_setup(ip, rgno, type) do { } while (0)
310 #endif /* CONFIG_PROVE_LOCKING */
312 struct xfs_rtginode_ops
{
313 const char *name
; /* short name */
315 enum xfs_metafile_type metafile_type
;
317 unsigned int sick
; /* rtgroup sickness flag */
319 /* Does the fs have this feature? */
320 bool (*enabled
)(struct xfs_mount
*mp
);
322 /* Create this rtgroup metadata inode and initialize it. */
323 int (*create
)(struct xfs_rtgroup
*rtg
,
324 struct xfs_inode
*ip
,
325 struct xfs_trans
*tp
,
329 static const struct xfs_rtginode_ops xfs_rtginode_ops
[XFS_RTGI_MAX
] = {
330 [XFS_RTGI_BITMAP
] = {
332 .metafile_type
= XFS_METAFILE_RTBITMAP
,
333 .sick
= XFS_SICK_RG_BITMAP
,
334 .create
= xfs_rtbitmap_create
,
336 [XFS_RTGI_SUMMARY
] = {
338 .metafile_type
= XFS_METAFILE_RTSUMMARY
,
339 .sick
= XFS_SICK_RG_SUMMARY
,
340 .create
= xfs_rtsummary_create
,
344 /* Return the shortname of this rtgroup inode. */
347 enum xfs_rtg_inodes type
)
349 return xfs_rtginode_ops
[type
].name
;
352 /* Return the metafile type of this rtgroup inode. */
353 enum xfs_metafile_type
354 xfs_rtginode_metafile_type(
355 enum xfs_rtg_inodes type
)
357 return xfs_rtginode_ops
[type
].metafile_type
;
360 /* Should this rtgroup inode be present? */
362 xfs_rtginode_enabled(
363 struct xfs_rtgroup
*rtg
,
364 enum xfs_rtg_inodes type
)
366 const struct xfs_rtginode_ops
*ops
= &xfs_rtginode_ops
[type
];
370 return ops
->enabled(rtg_mount(rtg
));
373 /* Mark an rtgroup inode sick */
375 xfs_rtginode_mark_sick(
376 struct xfs_rtgroup
*rtg
,
377 enum xfs_rtg_inodes type
)
379 const struct xfs_rtginode_ops
*ops
= &xfs_rtginode_ops
[type
];
381 xfs_group_mark_sick(rtg_group(rtg
), ops
->sick
);
384 /* Load and existing rtgroup inode into the rtgroup structure. */
387 struct xfs_rtgroup
*rtg
,
388 enum xfs_rtg_inodes type
,
389 struct xfs_trans
*tp
)
391 struct xfs_mount
*mp
= tp
->t_mountp
;
392 struct xfs_inode
*ip
;
393 const struct xfs_rtginode_ops
*ops
= &xfs_rtginode_ops
[type
];
396 if (!xfs_rtginode_enabled(rtg
, type
))
399 if (!xfs_has_rtgroups(mp
)) {
403 case XFS_RTGI_BITMAP
:
404 ino
= mp
->m_sb
.sb_rbmino
;
406 case XFS_RTGI_SUMMARY
:
407 ino
= mp
->m_sb
.sb_rsumino
;
410 /* None of the other types exist on !rtgroups */
414 error
= xfs_trans_metafile_iget(tp
, ino
, ops
->metafile_type
,
419 if (!mp
->m_rtdirip
) {
420 xfs_fs_mark_sick(mp
, XFS_SICK_FS_METADIR
);
421 return -EFSCORRUPTED
;
424 path
= xfs_rtginode_path(rtg_rgno(rtg
), type
);
427 error
= xfs_metadir_load(tp
, mp
->m_rtdirip
, path
,
428 ops
->metafile_type
, &ip
);
433 if (xfs_metadata_is_sick(error
))
434 xfs_rtginode_mark_sick(rtg
, type
);
438 if (XFS_IS_CORRUPT(mp
, ip
->i_df
.if_format
!= XFS_DINODE_FMT_EXTENTS
&&
439 ip
->i_df
.if_format
!= XFS_DINODE_FMT_BTREE
)) {
441 xfs_rtginode_mark_sick(rtg
, type
);
442 return -EFSCORRUPTED
;
445 if (XFS_IS_CORRUPT(mp
, ip
->i_projid
!= rtg_rgno(rtg
))) {
447 xfs_rtginode_mark_sick(rtg
, type
);
448 return -EFSCORRUPTED
;
451 xfs_rtginode_lockdep_setup(ip
, rtg_rgno(rtg
), type
);
452 rtg
->rtg_inodes
[type
] = ip
;
456 /* Release an rtgroup metadata inode. */
459 struct xfs_inode
**ipp
)
466 /* Add a metadata inode for a realtime rmap btree. */
469 struct xfs_rtgroup
*rtg
,
470 enum xfs_rtg_inodes type
,
473 const struct xfs_rtginode_ops
*ops
= &xfs_rtginode_ops
[type
];
474 struct xfs_mount
*mp
= rtg_mount(rtg
);
475 struct xfs_metadir_update upd
= {
477 .metafile_type
= ops
->metafile_type
,
481 if (!xfs_rtginode_enabled(rtg
, type
))
484 if (!mp
->m_rtdirip
) {
485 xfs_fs_mark_sick(mp
, XFS_SICK_FS_METADIR
);
486 return -EFSCORRUPTED
;
489 upd
.path
= xfs_rtginode_path(rtg_rgno(rtg
), type
);
493 error
= xfs_metadir_start_create(&upd
);
497 error
= xfs_metadir_create(&upd
, S_IFREG
);
501 xfs_rtginode_lockdep_setup(upd
.ip
, rtg_rgno(rtg
), type
);
503 upd
.ip
->i_projid
= rtg_rgno(rtg
);
504 error
= ops
->create(rtg
, upd
.ip
, upd
.tp
, init
);
508 error
= xfs_metadir_commit(&upd
);
513 xfs_finish_inode_setup(upd
.ip
);
514 rtg
->rtg_inodes
[type
] = upd
.ip
;
518 xfs_metadir_cancel(&upd
, error
);
519 /* Have to finish setting up the inode to ensure it's deleted. */
521 xfs_finish_inode_setup(upd
.ip
);
529 /* Create the parent directory for all rtgroup inodes and load it. */
531 xfs_rtginode_mkdir_parent(
532 struct xfs_mount
*mp
)
534 if (!mp
->m_metadirip
) {
535 xfs_fs_mark_sick(mp
, XFS_SICK_FS_METADIR
);
536 return -EFSCORRUPTED
;
539 return xfs_metadir_mkdir(mp
->m_metadirip
, "rtgroups", &mp
->m_rtdirip
);
542 /* Load the parent directory of all rtgroup inodes. */
544 xfs_rtginode_load_parent(
545 struct xfs_trans
*tp
)
547 struct xfs_mount
*mp
= tp
->t_mountp
;
549 if (!mp
->m_metadirip
) {
550 xfs_fs_mark_sick(mp
, XFS_SICK_FS_METADIR
);
551 return -EFSCORRUPTED
;
554 return xfs_metadir_load(tp
, mp
->m_metadirip
, "rtgroups",
555 XFS_METAFILE_DIR
, &mp
->m_rtdirip
);
558 /* Check superblock fields for a read or a write. */
559 static xfs_failaddr_t
560 xfs_rtsb_verify_common(
563 struct xfs_rtsb
*rsb
= bp
->b_addr
;
565 if (!xfs_verify_magic(bp
, rsb
->rsb_magicnum
))
566 return __this_address
;
568 return __this_address
;
570 /* Everything to the end of the fs block must be zero */
571 if (memchr_inv(rsb
+ 1, 0, BBTOB(bp
->b_length
) - sizeof(*rsb
)))
572 return __this_address
;
577 /* Check superblock fields for a read or revalidation. */
578 static inline xfs_failaddr_t
582 struct xfs_rtsb
*rsb
= bp
->b_addr
;
583 struct xfs_mount
*mp
= bp
->b_mount
;
586 fa
= xfs_rtsb_verify_common(bp
);
590 if (memcmp(&rsb
->rsb_fname
, &mp
->m_sb
.sb_fname
, XFSLABEL_MAX
))
591 return __this_address
;
592 if (!uuid_equal(&rsb
->rsb_uuid
, &mp
->m_sb
.sb_uuid
))
593 return __this_address
;
594 if (!uuid_equal(&rsb
->rsb_meta_uuid
, &mp
->m_sb
.sb_meta_uuid
))
595 return __this_address
;
601 xfs_rtsb_read_verify(
606 if (!xfs_buf_verify_cksum(bp
, XFS_RTSB_CRC_OFF
)) {
607 xfs_verifier_error(bp
, -EFSBADCRC
, __this_address
);
611 fa
= xfs_rtsb_verify_all(bp
);
613 xfs_verifier_error(bp
, -EFSCORRUPTED
, fa
);
617 xfs_rtsb_write_verify(
622 fa
= xfs_rtsb_verify_common(bp
);
624 xfs_verifier_error(bp
, -EFSCORRUPTED
, fa
);
628 xfs_buf_update_cksum(bp
, XFS_RTSB_CRC_OFF
);
631 const struct xfs_buf_ops xfs_rtsb_buf_ops
= {
633 .magic
= { 0, cpu_to_be32(XFS_RTSB_MAGIC
) },
634 .verify_read
= xfs_rtsb_read_verify
,
635 .verify_write
= xfs_rtsb_write_verify
,
636 .verify_struct
= xfs_rtsb_verify_all
,
639 /* Update a realtime superblock from the primary fs super */
642 struct xfs_buf
*rtsb_bp
,
643 const struct xfs_buf
*sb_bp
)
645 const struct xfs_dsb
*dsb
= sb_bp
->b_addr
;
646 struct xfs_rtsb
*rsb
= rtsb_bp
->b_addr
;
647 const uuid_t
*meta_uuid
;
649 rsb
->rsb_magicnum
= cpu_to_be32(XFS_RTSB_MAGIC
);
652 memcpy(&rsb
->rsb_fname
, &dsb
->sb_fname
, XFSLABEL_MAX
);
654 memcpy(&rsb
->rsb_uuid
, &dsb
->sb_uuid
, sizeof(rsb
->rsb_uuid
));
657 * The metadata uuid is the fs uuid if the metauuid feature is not
660 if (dsb
->sb_features_incompat
&
661 cpu_to_be32(XFS_SB_FEAT_INCOMPAT_META_UUID
))
662 meta_uuid
= &dsb
->sb_meta_uuid
;
664 meta_uuid
= &dsb
->sb_uuid
;
665 memcpy(&rsb
->rsb_meta_uuid
, meta_uuid
, sizeof(rsb
->rsb_meta_uuid
));
669 * Update the realtime superblock from a filesystem superblock and log it to
670 * the given transaction.
674 struct xfs_trans
*tp
,
675 const struct xfs_buf
*sb_bp
)
677 struct xfs_buf
*rtsb_bp
;
679 if (!xfs_has_rtsb(tp
->t_mountp
))
682 rtsb_bp
= xfs_trans_getrtsb(tp
);
685 * It's possible for the rtgroups feature to be enabled but
686 * there is no incore rt superblock buffer if the rt geometry
687 * was specified at mkfs time but the rt section has not yet
688 * been attached. In this case, rblocks must be zero.
690 ASSERT(tp
->t_mountp
->m_sb
.sb_rblocks
== 0);
694 xfs_update_rtsb(rtsb_bp
, sb_bp
);
695 xfs_trans_ordered_buf(tp
, rtsb_bp
);