1 // SPDX-License-Identifier: GPL-2.0
3 * Copyright (c) 2000-2003,2005 Silicon Graphics, Inc.
8 #include "xfs_shared.h"
9 #include "xfs_format.h"
10 #include "xfs_log_format.h"
11 #include "xfs_trans_resv.h"
13 #include "xfs_mount.h"
14 #include "xfs_defer.h"
15 #include "xfs_inode.h"
16 #include "xfs_trans.h"
17 #include "xfs_inode_item.h"
18 #include "xfs_alloc.h"
19 #include "xfs_btree.h"
20 #include "xfs_bmap_btree.h"
22 #include "xfs_error.h"
23 #include "xfs_quota.h"
24 #include "xfs_trace.h"
25 #include "xfs_cksum.h"
29 * Convert on-disk form of btree root to in-memory form.
34 xfs_bmdr_block_t
*dblock
,
36 struct xfs_btree_block
*rblock
,
39 struct xfs_mount
*mp
= ip
->i_mount
;
46 xfs_btree_init_block_int(mp
, rblock
, XFS_BUF_DADDR_NULL
,
47 XFS_BTNUM_BMAP
, 0, 0, ip
->i_ino
,
49 rblock
->bb_level
= dblock
->bb_level
;
50 ASSERT(be16_to_cpu(rblock
->bb_level
) > 0);
51 rblock
->bb_numrecs
= dblock
->bb_numrecs
;
52 dmxr
= xfs_bmdr_maxrecs(dblocklen
, 0);
53 fkp
= XFS_BMDR_KEY_ADDR(dblock
, 1);
54 tkp
= XFS_BMBT_KEY_ADDR(mp
, rblock
, 1);
55 fpp
= XFS_BMDR_PTR_ADDR(dblock
, 1, dmxr
);
56 tpp
= XFS_BMAP_BROOT_PTR_ADDR(mp
, rblock
, 1, rblocklen
);
57 dmxr
= be16_to_cpu(dblock
->bb_numrecs
);
58 memcpy(tkp
, fkp
, sizeof(*fkp
) * dmxr
);
59 memcpy(tpp
, fpp
, sizeof(*fpp
) * dmxr
);
63 xfs_bmbt_disk_get_all(
64 struct xfs_bmbt_rec
*rec
,
65 struct xfs_bmbt_irec
*irec
)
67 uint64_t l0
= get_unaligned_be64(&rec
->l0
);
68 uint64_t l1
= get_unaligned_be64(&rec
->l1
);
70 irec
->br_startoff
= (l0
& xfs_mask64lo(64 - BMBT_EXNTFLAG_BITLEN
)) >> 9;
71 irec
->br_startblock
= ((l0
& xfs_mask64lo(9)) << 43) | (l1
>> 21);
72 irec
->br_blockcount
= l1
& xfs_mask64lo(21);
73 if (l0
>> (64 - BMBT_EXNTFLAG_BITLEN
))
74 irec
->br_state
= XFS_EXT_UNWRITTEN
;
76 irec
->br_state
= XFS_EXT_NORM
;
80 * Extract the blockcount field from an on disk bmap extent record.
83 xfs_bmbt_disk_get_blockcount(
86 return (xfs_filblks_t
)(be64_to_cpu(r
->l1
) & xfs_mask64lo(21));
90 * Extract the startoff field from a disk format bmap extent record.
93 xfs_bmbt_disk_get_startoff(
96 return ((xfs_fileoff_t
)be64_to_cpu(r
->l0
) &
97 xfs_mask64lo(64 - BMBT_EXNTFLAG_BITLEN
)) >> 9;
101 * Set all the fields in a bmap extent record from the uncompressed form.
104 xfs_bmbt_disk_set_all(
105 struct xfs_bmbt_rec
*r
,
106 struct xfs_bmbt_irec
*s
)
108 int extent_flag
= (s
->br_state
!= XFS_EXT_NORM
);
110 ASSERT(s
->br_state
== XFS_EXT_NORM
|| s
->br_state
== XFS_EXT_UNWRITTEN
);
111 ASSERT(!(s
->br_startoff
& xfs_mask64hi(64-BMBT_STARTOFF_BITLEN
)));
112 ASSERT(!(s
->br_blockcount
& xfs_mask64hi(64-BMBT_BLOCKCOUNT_BITLEN
)));
113 ASSERT(!(s
->br_startblock
& xfs_mask64hi(64-BMBT_STARTBLOCK_BITLEN
)));
116 ((xfs_bmbt_rec_base_t
)extent_flag
<< 63) |
117 ((xfs_bmbt_rec_base_t
)s
->br_startoff
<< 9) |
118 ((xfs_bmbt_rec_base_t
)s
->br_startblock
>> 43), &r
->l0
);
120 ((xfs_bmbt_rec_base_t
)s
->br_startblock
<< 21) |
121 ((xfs_bmbt_rec_base_t
)s
->br_blockcount
&
122 (xfs_bmbt_rec_base_t
)xfs_mask64lo(21)), &r
->l1
);
126 * Convert in-memory form of btree root to on-disk form.
130 struct xfs_mount
*mp
,
131 struct xfs_btree_block
*rblock
,
133 xfs_bmdr_block_t
*dblock
,
142 if (xfs_sb_version_hascrc(&mp
->m_sb
)) {
143 ASSERT(rblock
->bb_magic
== cpu_to_be32(XFS_BMAP_CRC_MAGIC
));
144 ASSERT(uuid_equal(&rblock
->bb_u
.l
.bb_uuid
,
145 &mp
->m_sb
.sb_meta_uuid
));
146 ASSERT(rblock
->bb_u
.l
.bb_blkno
==
147 cpu_to_be64(XFS_BUF_DADDR_NULL
));
149 ASSERT(rblock
->bb_magic
== cpu_to_be32(XFS_BMAP_MAGIC
));
150 ASSERT(rblock
->bb_u
.l
.bb_leftsib
== cpu_to_be64(NULLFSBLOCK
));
151 ASSERT(rblock
->bb_u
.l
.bb_rightsib
== cpu_to_be64(NULLFSBLOCK
));
152 ASSERT(rblock
->bb_level
!= 0);
153 dblock
->bb_level
= rblock
->bb_level
;
154 dblock
->bb_numrecs
= rblock
->bb_numrecs
;
155 dmxr
= xfs_bmdr_maxrecs(dblocklen
, 0);
156 fkp
= XFS_BMBT_KEY_ADDR(mp
, rblock
, 1);
157 tkp
= XFS_BMDR_KEY_ADDR(dblock
, 1);
158 fpp
= XFS_BMAP_BROOT_PTR_ADDR(mp
, rblock
, 1, rblocklen
);
159 tpp
= XFS_BMDR_PTR_ADDR(dblock
, 1, dmxr
);
160 dmxr
= be16_to_cpu(dblock
->bb_numrecs
);
161 memcpy(tkp
, fkp
, sizeof(*fkp
) * dmxr
);
162 memcpy(tpp
, fpp
, sizeof(*fpp
) * dmxr
);
165 STATIC
struct xfs_btree_cur
*
167 struct xfs_btree_cur
*cur
)
169 struct xfs_btree_cur
*new;
171 new = xfs_bmbt_init_cursor(cur
->bc_mp
, cur
->bc_tp
,
172 cur
->bc_private
.b
.ip
, cur
->bc_private
.b
.whichfork
);
175 * Copy the firstblock, dfops, and flags values,
176 * since init cursor doesn't get them.
178 new->bc_private
.b
.flags
= cur
->bc_private
.b
.flags
;
184 xfs_bmbt_update_cursor(
185 struct xfs_btree_cur
*src
,
186 struct xfs_btree_cur
*dst
)
188 ASSERT((dst
->bc_tp
->t_firstblock
!= NULLFSBLOCK
) ||
189 (dst
->bc_private
.b
.ip
->i_d
.di_flags
& XFS_DIFLAG_REALTIME
));
191 dst
->bc_private
.b
.allocated
+= src
->bc_private
.b
.allocated
;
192 dst
->bc_tp
->t_firstblock
= src
->bc_tp
->t_firstblock
;
194 src
->bc_private
.b
.allocated
= 0;
198 xfs_bmbt_alloc_block(
199 struct xfs_btree_cur
*cur
,
200 union xfs_btree_ptr
*start
,
201 union xfs_btree_ptr
*new,
204 xfs_alloc_arg_t args
; /* block allocation args */
205 int error
; /* error return value */
207 memset(&args
, 0, sizeof(args
));
208 args
.tp
= cur
->bc_tp
;
209 args
.mp
= cur
->bc_mp
;
210 args
.fsbno
= cur
->bc_tp
->t_firstblock
;
211 xfs_rmap_ino_bmbt_owner(&args
.oinfo
, cur
->bc_private
.b
.ip
->i_ino
,
212 cur
->bc_private
.b
.whichfork
);
214 if (args
.fsbno
== NULLFSBLOCK
) {
215 args
.fsbno
= be64_to_cpu(start
->l
);
216 args
.type
= XFS_ALLOCTYPE_START_BNO
;
218 * Make sure there is sufficient room left in the AG to
219 * complete a full tree split for an extent insert. If
220 * we are converting the middle part of an extent then
221 * we may need space for two tree splits.
223 * We are relying on the caller to make the correct block
224 * reservation for this operation to succeed. If the
225 * reservation amount is insufficient then we may fail a
226 * block allocation here and corrupt the filesystem.
228 args
.minleft
= args
.tp
->t_blk_res
;
229 } else if (cur
->bc_tp
->t_flags
& XFS_TRANS_LOWMODE
) {
230 args
.type
= XFS_ALLOCTYPE_START_BNO
;
232 args
.type
= XFS_ALLOCTYPE_NEAR_BNO
;
235 args
.minlen
= args
.maxlen
= args
.prod
= 1;
236 args
.wasdel
= cur
->bc_private
.b
.flags
& XFS_BTCUR_BPRV_WASDEL
;
237 if (!args
.wasdel
&& args
.tp
->t_blk_res
== 0) {
241 error
= xfs_alloc_vextent(&args
);
245 if (args
.fsbno
== NULLFSBLOCK
&& args
.minleft
) {
247 * Could not find an AG with enough free space to satisfy
248 * a full btree split. Try again and if
249 * successful activate the lowspace algorithm.
252 args
.type
= XFS_ALLOCTYPE_FIRST_AG
;
253 error
= xfs_alloc_vextent(&args
);
256 cur
->bc_tp
->t_flags
|= XFS_TRANS_LOWMODE
;
258 if (WARN_ON_ONCE(args
.fsbno
== NULLFSBLOCK
)) {
263 ASSERT(args
.len
== 1);
264 cur
->bc_tp
->t_firstblock
= args
.fsbno
;
265 cur
->bc_private
.b
.allocated
++;
266 cur
->bc_private
.b
.ip
->i_d
.di_nblocks
++;
267 xfs_trans_log_inode(args
.tp
, cur
->bc_private
.b
.ip
, XFS_ILOG_CORE
);
268 xfs_trans_mod_dquot_byino(args
.tp
, cur
->bc_private
.b
.ip
,
269 XFS_TRANS_DQ_BCOUNT
, 1L);
271 new->l
= cpu_to_be64(args
.fsbno
);
282 struct xfs_btree_cur
*cur
,
285 struct xfs_mount
*mp
= cur
->bc_mp
;
286 struct xfs_inode
*ip
= cur
->bc_private
.b
.ip
;
287 struct xfs_trans
*tp
= cur
->bc_tp
;
288 xfs_fsblock_t fsbno
= XFS_DADDR_TO_FSB(mp
, XFS_BUF_ADDR(bp
));
289 struct xfs_owner_info oinfo
;
291 xfs_rmap_ino_bmbt_owner(&oinfo
, ip
->i_ino
, cur
->bc_private
.b
.whichfork
);
292 xfs_bmap_add_free(cur
->bc_tp
, fsbno
, 1, &oinfo
);
293 ip
->i_d
.di_nblocks
--;
295 xfs_trans_log_inode(tp
, ip
, XFS_ILOG_CORE
);
296 xfs_trans_mod_dquot_byino(tp
, ip
, XFS_TRANS_DQ_BCOUNT
, -1L);
301 xfs_bmbt_get_minrecs(
302 struct xfs_btree_cur
*cur
,
305 if (level
== cur
->bc_nlevels
- 1) {
306 struct xfs_ifork
*ifp
;
308 ifp
= XFS_IFORK_PTR(cur
->bc_private
.b
.ip
,
309 cur
->bc_private
.b
.whichfork
);
311 return xfs_bmbt_maxrecs(cur
->bc_mp
,
312 ifp
->if_broot_bytes
, level
== 0) / 2;
315 return cur
->bc_mp
->m_bmap_dmnr
[level
!= 0];
319 xfs_bmbt_get_maxrecs(
320 struct xfs_btree_cur
*cur
,
323 if (level
== cur
->bc_nlevels
- 1) {
324 struct xfs_ifork
*ifp
;
326 ifp
= XFS_IFORK_PTR(cur
->bc_private
.b
.ip
,
327 cur
->bc_private
.b
.whichfork
);
329 return xfs_bmbt_maxrecs(cur
->bc_mp
,
330 ifp
->if_broot_bytes
, level
== 0);
333 return cur
->bc_mp
->m_bmap_dmxr
[level
!= 0];
338 * Get the maximum records we could store in the on-disk format.
340 * For non-root nodes this is equivalent to xfs_bmbt_get_maxrecs, but
341 * for the root node this checks the available space in the dinode fork
342 * so that we can resize the in-memory buffer to match it. After a
343 * resize to the maximum size this function returns the same value
344 * as xfs_bmbt_get_maxrecs for the root node, too.
347 xfs_bmbt_get_dmaxrecs(
348 struct xfs_btree_cur
*cur
,
351 if (level
!= cur
->bc_nlevels
- 1)
352 return cur
->bc_mp
->m_bmap_dmxr
[level
!= 0];
353 return xfs_bmdr_maxrecs(cur
->bc_private
.b
.forksize
, level
== 0);
357 xfs_bmbt_init_key_from_rec(
358 union xfs_btree_key
*key
,
359 union xfs_btree_rec
*rec
)
361 key
->bmbt
.br_startoff
=
362 cpu_to_be64(xfs_bmbt_disk_get_startoff(&rec
->bmbt
));
366 xfs_bmbt_init_high_key_from_rec(
367 union xfs_btree_key
*key
,
368 union xfs_btree_rec
*rec
)
370 key
->bmbt
.br_startoff
= cpu_to_be64(
371 xfs_bmbt_disk_get_startoff(&rec
->bmbt
) +
372 xfs_bmbt_disk_get_blockcount(&rec
->bmbt
) - 1);
376 xfs_bmbt_init_rec_from_cur(
377 struct xfs_btree_cur
*cur
,
378 union xfs_btree_rec
*rec
)
380 xfs_bmbt_disk_set_all(&rec
->bmbt
, &cur
->bc_rec
.b
);
384 xfs_bmbt_init_ptr_from_cur(
385 struct xfs_btree_cur
*cur
,
386 union xfs_btree_ptr
*ptr
)
393 struct xfs_btree_cur
*cur
,
394 union xfs_btree_key
*key
)
396 return (int64_t)be64_to_cpu(key
->bmbt
.br_startoff
) -
397 cur
->bc_rec
.b
.br_startoff
;
401 xfs_bmbt_diff_two_keys(
402 struct xfs_btree_cur
*cur
,
403 union xfs_btree_key
*k1
,
404 union xfs_btree_key
*k2
)
406 return (int64_t)be64_to_cpu(k1
->bmbt
.br_startoff
) -
407 be64_to_cpu(k2
->bmbt
.br_startoff
);
410 static xfs_failaddr_t
414 struct xfs_mount
*mp
= bp
->b_target
->bt_mount
;
415 struct xfs_btree_block
*block
= XFS_BUF_TO_BLOCK(bp
);
419 switch (block
->bb_magic
) {
420 case cpu_to_be32(XFS_BMAP_CRC_MAGIC
):
422 * XXX: need a better way of verifying the owner here. Right now
423 * just make sure there has been one set.
425 fa
= xfs_btree_lblock_v5hdr_verify(bp
, XFS_RMAP_OWN_UNKNOWN
);
429 case cpu_to_be32(XFS_BMAP_MAGIC
):
432 return __this_address
;
436 * numrecs and level verification.
438 * We don't know what fork we belong to, so just verify that the level
439 * is less than the maximum of the two. Later checks will be more
442 level
= be16_to_cpu(block
->bb_level
);
443 if (level
> max(mp
->m_bm_maxlevels
[0], mp
->m_bm_maxlevels
[1]))
444 return __this_address
;
446 return xfs_btree_lblock_verify(bp
, mp
->m_bmap_dmxr
[level
!= 0]);
450 xfs_bmbt_read_verify(
455 if (!xfs_btree_lblock_verify_crc(bp
))
456 xfs_verifier_error(bp
, -EFSBADCRC
, __this_address
);
458 fa
= xfs_bmbt_verify(bp
);
460 xfs_verifier_error(bp
, -EFSCORRUPTED
, fa
);
464 trace_xfs_btree_corrupt(bp
, _RET_IP_
);
468 xfs_bmbt_write_verify(
473 fa
= xfs_bmbt_verify(bp
);
475 trace_xfs_btree_corrupt(bp
, _RET_IP_
);
476 xfs_verifier_error(bp
, -EFSCORRUPTED
, fa
);
479 xfs_btree_lblock_calc_crc(bp
);
482 const struct xfs_buf_ops xfs_bmbt_buf_ops
= {
484 .verify_read
= xfs_bmbt_read_verify
,
485 .verify_write
= xfs_bmbt_write_verify
,
486 .verify_struct
= xfs_bmbt_verify
,
491 xfs_bmbt_keys_inorder(
492 struct xfs_btree_cur
*cur
,
493 union xfs_btree_key
*k1
,
494 union xfs_btree_key
*k2
)
496 return be64_to_cpu(k1
->bmbt
.br_startoff
) <
497 be64_to_cpu(k2
->bmbt
.br_startoff
);
501 xfs_bmbt_recs_inorder(
502 struct xfs_btree_cur
*cur
,
503 union xfs_btree_rec
*r1
,
504 union xfs_btree_rec
*r2
)
506 return xfs_bmbt_disk_get_startoff(&r1
->bmbt
) +
507 xfs_bmbt_disk_get_blockcount(&r1
->bmbt
) <=
508 xfs_bmbt_disk_get_startoff(&r2
->bmbt
);
511 static const struct xfs_btree_ops xfs_bmbt_ops
= {
512 .rec_len
= sizeof(xfs_bmbt_rec_t
),
513 .key_len
= sizeof(xfs_bmbt_key_t
),
515 .dup_cursor
= xfs_bmbt_dup_cursor
,
516 .update_cursor
= xfs_bmbt_update_cursor
,
517 .alloc_block
= xfs_bmbt_alloc_block
,
518 .free_block
= xfs_bmbt_free_block
,
519 .get_maxrecs
= xfs_bmbt_get_maxrecs
,
520 .get_minrecs
= xfs_bmbt_get_minrecs
,
521 .get_dmaxrecs
= xfs_bmbt_get_dmaxrecs
,
522 .init_key_from_rec
= xfs_bmbt_init_key_from_rec
,
523 .init_high_key_from_rec
= xfs_bmbt_init_high_key_from_rec
,
524 .init_rec_from_cur
= xfs_bmbt_init_rec_from_cur
,
525 .init_ptr_from_cur
= xfs_bmbt_init_ptr_from_cur
,
526 .key_diff
= xfs_bmbt_key_diff
,
527 .diff_two_keys
= xfs_bmbt_diff_two_keys
,
528 .buf_ops
= &xfs_bmbt_buf_ops
,
529 .keys_inorder
= xfs_bmbt_keys_inorder
,
530 .recs_inorder
= xfs_bmbt_recs_inorder
,
534 * Allocate a new bmap btree cursor.
536 struct xfs_btree_cur
* /* new bmap btree cursor */
537 xfs_bmbt_init_cursor(
538 struct xfs_mount
*mp
, /* file system mount point */
539 struct xfs_trans
*tp
, /* transaction pointer */
540 struct xfs_inode
*ip
, /* inode owning the btree */
541 int whichfork
) /* data or attr fork */
543 struct xfs_ifork
*ifp
= XFS_IFORK_PTR(ip
, whichfork
);
544 struct xfs_btree_cur
*cur
;
545 ASSERT(whichfork
!= XFS_COW_FORK
);
547 cur
= kmem_zone_zalloc(xfs_btree_cur_zone
, KM_NOFS
);
551 cur
->bc_nlevels
= be16_to_cpu(ifp
->if_broot
->bb_level
) + 1;
552 cur
->bc_btnum
= XFS_BTNUM_BMAP
;
553 cur
->bc_blocklog
= mp
->m_sb
.sb_blocklog
;
554 cur
->bc_statoff
= XFS_STATS_CALC_INDEX(xs_bmbt_2
);
556 cur
->bc_ops
= &xfs_bmbt_ops
;
557 cur
->bc_flags
= XFS_BTREE_LONG_PTRS
| XFS_BTREE_ROOT_IN_INODE
;
558 if (xfs_sb_version_hascrc(&mp
->m_sb
))
559 cur
->bc_flags
|= XFS_BTREE_CRC_BLOCKS
;
561 cur
->bc_private
.b
.forksize
= XFS_IFORK_SIZE(ip
, whichfork
);
562 cur
->bc_private
.b
.ip
= ip
;
563 cur
->bc_private
.b
.allocated
= 0;
564 cur
->bc_private
.b
.flags
= 0;
565 cur
->bc_private
.b
.whichfork
= whichfork
;
571 * Calculate number of records in a bmap btree block.
575 struct xfs_mount
*mp
,
579 blocklen
-= XFS_BMBT_BLOCK_LEN(mp
);
582 return blocklen
/ sizeof(xfs_bmbt_rec_t
);
583 return blocklen
/ (sizeof(xfs_bmbt_key_t
) + sizeof(xfs_bmbt_ptr_t
));
587 * Calculate number of records in a bmap btree inode root.
594 blocklen
-= sizeof(xfs_bmdr_block_t
);
597 return blocklen
/ sizeof(xfs_bmdr_rec_t
);
598 return blocklen
/ (sizeof(xfs_bmdr_key_t
) + sizeof(xfs_bmdr_ptr_t
));
602 * Change the owner of a btree format fork fo the inode passed in. Change it to
603 * the owner of that is passed in so that we can change owners before or after
604 * we switch forks between inodes. The operation that the caller is doing will
605 * determine whether is needs to change owner before or after the switch.
607 * For demand paged transactional modification, the fork switch should be done
608 * after reading in all the blocks, modifying them and pinning them in the
609 * transaction. For modification when the buffers are already pinned in memory,
610 * the fork switch can be done before changing the owner as we won't need to
611 * validate the owner until the btree buffers are unpinned and writes can occur
614 * For recovery based ownership change, there is no transactional context and
615 * so a buffer list must be supplied so that we can record the buffers that we
616 * modified for the caller to issue IO on.
619 xfs_bmbt_change_owner(
620 struct xfs_trans
*tp
,
621 struct xfs_inode
*ip
,
624 struct list_head
*buffer_list
)
626 struct xfs_btree_cur
*cur
;
629 ASSERT(tp
|| buffer_list
);
630 ASSERT(!(tp
&& buffer_list
));
631 if (whichfork
== XFS_DATA_FORK
)
632 ASSERT(ip
->i_d
.di_format
== XFS_DINODE_FMT_BTREE
);
634 ASSERT(ip
->i_d
.di_aformat
== XFS_DINODE_FMT_BTREE
);
636 cur
= xfs_bmbt_init_cursor(ip
->i_mount
, tp
, ip
, whichfork
);
639 cur
->bc_private
.b
.flags
|= XFS_BTCUR_BPRV_INVALID_OWNER
;
641 error
= xfs_btree_change_owner(cur
, new_owner
, buffer_list
);
642 xfs_btree_del_cursor(cur
, error
);
646 /* Calculate the bmap btree size for some records. */
649 struct xfs_mount
*mp
,
650 unsigned long long len
)
652 return xfs_btree_calc_size(mp
->m_bmap_dmnr
, len
);