1 // SPDX-License-Identifier: GPL-2.0+
3 * Copyright (C) 2017 Oracle. All Rights Reserved.
4 * Author: Darrick J. Wong <darrick.wong@oracle.com>
8 #include "xfs_shared.h"
9 #include "xfs_format.h"
10 #include "xfs_log_format.h"
11 #include "xfs_trans_resv.h"
12 #include "xfs_mount.h"
13 #include "xfs_inode.h"
14 #include "xfs_trans.h"
15 #include "xfs_btree.h"
16 #include "xfs_rmap_btree.h"
17 #include "xfs_trace.h"
19 #include "xfs_alloc.h"
21 #include <linux/fsmap.h>
22 #include "xfs_fsmap.h"
23 #include "xfs_refcount.h"
24 #include "xfs_refcount_btree.h"
25 #include "xfs_alloc_btree.h"
26 #include "xfs_rtbitmap.h"
28 #include "xfs_rtgroup.h"
30 /* Convert an xfs_fsmap to an fsmap. */
32 xfs_fsmap_from_internal(
34 struct xfs_fsmap
*src
)
36 dest
->fmr_device
= src
->fmr_device
;
37 dest
->fmr_flags
= src
->fmr_flags
;
38 dest
->fmr_physical
= BBTOB(src
->fmr_physical
);
39 dest
->fmr_owner
= src
->fmr_owner
;
40 dest
->fmr_offset
= BBTOB(src
->fmr_offset
);
41 dest
->fmr_length
= BBTOB(src
->fmr_length
);
42 dest
->fmr_reserved
[0] = 0;
43 dest
->fmr_reserved
[1] = 0;
44 dest
->fmr_reserved
[2] = 0;
47 /* Convert an fsmap to an xfs_fsmap. */
49 xfs_fsmap_to_internal(
50 struct xfs_fsmap
*dest
,
53 dest
->fmr_device
= src
->fmr_device
;
54 dest
->fmr_flags
= src
->fmr_flags
;
55 dest
->fmr_physical
= BTOBBT(src
->fmr_physical
);
56 dest
->fmr_owner
= src
->fmr_owner
;
57 dest
->fmr_offset
= BTOBBT(src
->fmr_offset
);
58 dest
->fmr_length
= BTOBBT(src
->fmr_length
);
61 /* Convert an fsmap owner into an rmapbt owner. */
63 xfs_fsmap_owner_to_rmap(
64 struct xfs_rmap_irec
*dest
,
65 const struct xfs_fsmap
*src
)
67 if (!(src
->fmr_flags
& FMR_OF_SPECIAL_OWNER
)) {
68 dest
->rm_owner
= src
->fmr_owner
;
72 switch (src
->fmr_owner
) {
73 case 0: /* "lowest owner id possible" */
74 case -1ULL: /* "highest owner id possible" */
75 dest
->rm_owner
= src
->fmr_owner
;
77 case XFS_FMR_OWN_FREE
:
78 dest
->rm_owner
= XFS_RMAP_OWN_NULL
;
80 case XFS_FMR_OWN_UNKNOWN
:
81 dest
->rm_owner
= XFS_RMAP_OWN_UNKNOWN
;
84 dest
->rm_owner
= XFS_RMAP_OWN_FS
;
87 dest
->rm_owner
= XFS_RMAP_OWN_LOG
;
90 dest
->rm_owner
= XFS_RMAP_OWN_AG
;
92 case XFS_FMR_OWN_INOBT
:
93 dest
->rm_owner
= XFS_RMAP_OWN_INOBT
;
95 case XFS_FMR_OWN_INODES
:
96 dest
->rm_owner
= XFS_RMAP_OWN_INODES
;
98 case XFS_FMR_OWN_REFC
:
99 dest
->rm_owner
= XFS_RMAP_OWN_REFC
;
101 case XFS_FMR_OWN_COW
:
102 dest
->rm_owner
= XFS_RMAP_OWN_COW
;
104 case XFS_FMR_OWN_DEFECTIVE
: /* not implemented */
112 /* Convert an rmapbt owner into an fsmap owner. */
114 xfs_fsmap_owner_from_frec(
115 struct xfs_fsmap
*dest
,
116 const struct xfs_fsmap_irec
*frec
)
119 if (!XFS_RMAP_NON_INODE_OWNER(frec
->owner
)) {
120 dest
->fmr_owner
= frec
->owner
;
123 dest
->fmr_flags
|= FMR_OF_SPECIAL_OWNER
;
125 switch (frec
->owner
) {
126 case XFS_RMAP_OWN_FS
:
127 dest
->fmr_owner
= XFS_FMR_OWN_FS
;
129 case XFS_RMAP_OWN_LOG
:
130 dest
->fmr_owner
= XFS_FMR_OWN_LOG
;
132 case XFS_RMAP_OWN_AG
:
133 dest
->fmr_owner
= XFS_FMR_OWN_AG
;
135 case XFS_RMAP_OWN_INOBT
:
136 dest
->fmr_owner
= XFS_FMR_OWN_INOBT
;
138 case XFS_RMAP_OWN_INODES
:
139 dest
->fmr_owner
= XFS_FMR_OWN_INODES
;
141 case XFS_RMAP_OWN_REFC
:
142 dest
->fmr_owner
= XFS_FMR_OWN_REFC
;
144 case XFS_RMAP_OWN_COW
:
145 dest
->fmr_owner
= XFS_FMR_OWN_COW
;
147 case XFS_RMAP_OWN_NULL
: /* "free" */
148 dest
->fmr_owner
= XFS_FMR_OWN_FREE
;
152 return -EFSCORRUPTED
;
157 /* getfsmap query state */
158 struct xfs_getfsmap_info
{
159 struct xfs_fsmap_head
*head
;
160 struct fsmap
*fsmap_recs
; /* mapping records */
161 struct xfs_buf
*agf_bp
; /* AGF, for refcount queries */
162 struct xfs_group
*group
; /* group info, if applicable */
163 xfs_daddr_t next_daddr
; /* next daddr we expect */
164 /* daddr of low fsmap key when we're using the rtbitmap */
165 xfs_daddr_t low_daddr
;
166 xfs_daddr_t end_daddr
; /* daddr of high fsmap key */
167 u64 missing_owner
; /* owner of holes */
168 u32 dev
; /* device id */
170 * Low rmap key for the query. If low.rm_blockcount is nonzero, this
171 * is the second (or later) call to retrieve the recordset in pieces.
172 * xfs_getfsmap_rec_before_start will compare all records retrieved
173 * by the rmapbt query to filter out any records that start before
176 struct xfs_rmap_irec low
;
177 struct xfs_rmap_irec high
; /* high rmap key */
178 bool last
; /* last extent? */
181 /* Associate a device with a getfsmap handler. */
182 struct xfs_getfsmap_dev
{
184 int (*fn
)(struct xfs_trans
*tp
,
185 const struct xfs_fsmap
*keys
,
186 struct xfs_getfsmap_info
*info
);
190 /* Compare two getfsmap device handlers. */
192 xfs_getfsmap_dev_compare(
196 const struct xfs_getfsmap_dev
*d1
= p1
;
197 const struct xfs_getfsmap_dev
*d2
= p2
;
199 return d1
->dev
- d2
->dev
;
202 /* Decide if this mapping is shared. */
204 xfs_getfsmap_is_shared(
205 struct xfs_trans
*tp
,
206 struct xfs_getfsmap_info
*info
,
207 const struct xfs_fsmap_irec
*frec
,
210 struct xfs_mount
*mp
= tp
->t_mountp
;
211 struct xfs_btree_cur
*cur
;
217 if (!xfs_has_reflink(mp
))
219 /* rt files will have no perag structure */
223 /* Are there any shared blocks here? */
225 cur
= xfs_refcountbt_init_cursor(mp
, tp
, info
->agf_bp
,
226 to_perag(info
->group
));
228 error
= xfs_refcount_find_shared(cur
, frec
->rec_key
,
229 XFS_BB_TO_FSBT(mp
, frec
->len_daddr
), &fbno
, &flen
,
232 xfs_btree_del_cursor(cur
, error
);
242 struct xfs_mount
*mp
,
243 struct xfs_fsmap
*xfm
,
244 struct xfs_getfsmap_info
*info
)
248 trace_xfs_getfsmap_mapping(mp
, xfm
);
250 rec
= &info
->fsmap_recs
[info
->head
->fmh_entries
++];
251 xfs_fsmap_from_internal(rec
, xfm
);
255 xfs_getfsmap_frec_before_start(
256 struct xfs_getfsmap_info
*info
,
257 const struct xfs_fsmap_irec
*frec
)
259 if (info
->low_daddr
!= XFS_BUF_DADDR_NULL
)
260 return frec
->start_daddr
< info
->low_daddr
;
261 if (info
->low
.rm_blockcount
) {
262 struct xfs_rmap_irec rec
= {
263 .rm_startblock
= frec
->rec_key
,
264 .rm_owner
= frec
->owner
,
265 .rm_flags
= frec
->rm_flags
,
268 return xfs_rmap_compare(&rec
, &info
->low
) < 0;
275 * Format a reverse mapping for getfsmap, having translated rm_startblock
276 * into the appropriate daddr units. Pass in a nonzero @len_daddr if the
277 * length could be larger than rm_blockcount in struct xfs_rmap_irec.
281 struct xfs_trans
*tp
,
282 struct xfs_getfsmap_info
*info
,
283 const struct xfs_fsmap_irec
*frec
)
285 struct xfs_fsmap fmr
;
286 struct xfs_mount
*mp
= tp
->t_mountp
;
290 if (fatal_signal_pending(current
))
294 * Filter out records that start before our startpoint, if the
295 * caller requested that.
297 if (xfs_getfsmap_frec_before_start(info
, frec
))
300 /* Are we just counting mappings? */
301 if (info
->head
->fmh_count
== 0) {
302 if (info
->head
->fmh_entries
== UINT_MAX
)
305 if (frec
->start_daddr
> info
->next_daddr
)
306 info
->head
->fmh_entries
++;
311 info
->head
->fmh_entries
++;
316 * If the record starts past the last physical block we saw,
317 * then we've found a gap. Report the gap as being owned by
318 * whatever the caller specified is the missing owner.
320 if (frec
->start_daddr
> info
->next_daddr
) {
321 if (info
->head
->fmh_entries
>= info
->head
->fmh_count
)
324 fmr
.fmr_device
= info
->dev
;
325 fmr
.fmr_physical
= info
->next_daddr
;
326 fmr
.fmr_owner
= info
->missing_owner
;
328 fmr
.fmr_length
= frec
->start_daddr
- info
->next_daddr
;
329 fmr
.fmr_flags
= FMR_OF_SPECIAL_OWNER
;
330 xfs_getfsmap_format(mp
, &fmr
, info
);
336 /* Fill out the extent we found */
337 if (info
->head
->fmh_entries
>= info
->head
->fmh_count
)
340 trace_xfs_fsmap_mapping(mp
, info
->dev
,
341 info
->group
? info
->group
->xg_gno
: NULLAGNUMBER
,
344 fmr
.fmr_device
= info
->dev
;
345 fmr
.fmr_physical
= frec
->start_daddr
;
346 error
= xfs_fsmap_owner_from_frec(&fmr
, frec
);
349 fmr
.fmr_offset
= XFS_FSB_TO_BB(mp
, frec
->offset
);
350 fmr
.fmr_length
= frec
->len_daddr
;
351 if (frec
->rm_flags
& XFS_RMAP_UNWRITTEN
)
352 fmr
.fmr_flags
|= FMR_OF_PREALLOC
;
353 if (frec
->rm_flags
& XFS_RMAP_ATTR_FORK
)
354 fmr
.fmr_flags
|= FMR_OF_ATTR_FORK
;
355 if (frec
->rm_flags
& XFS_RMAP_BMBT_BLOCK
)
356 fmr
.fmr_flags
|= FMR_OF_EXTENT_MAP
;
357 if (fmr
.fmr_flags
== 0) {
358 error
= xfs_getfsmap_is_shared(tp
, info
, frec
, &shared
);
362 fmr
.fmr_flags
|= FMR_OF_SHARED
;
365 xfs_getfsmap_format(mp
, &fmr
, info
);
367 info
->next_daddr
= max(info
->next_daddr
,
368 frec
->start_daddr
+ frec
->len_daddr
);
373 xfs_getfsmap_group_helper(
374 struct xfs_getfsmap_info
*info
,
375 struct xfs_trans
*tp
,
376 struct xfs_group
*xg
,
377 xfs_agblock_t startblock
,
378 xfs_extlen_t blockcount
,
379 struct xfs_fsmap_irec
*frec
)
382 * For an info->last query, we're looking for a gap between the last
383 * mapping emitted and the high key specified by userspace. If the
384 * user's query spans less than 1 fsblock, then info->high and
385 * info->low will have the same rm_startblock, which causes rec_daddr
386 * and next_daddr to be the same. Therefore, use the end_daddr that
387 * we calculated from userspace's high key to synthesize the record.
388 * Note that if the btree query found a mapping, there won't be a gap.
390 if (info
->last
&& info
->end_daddr
!= XFS_BUF_DADDR_NULL
)
391 frec
->start_daddr
= info
->end_daddr
;
393 frec
->start_daddr
= xfs_gbno_to_daddr(xg
, startblock
);
395 frec
->len_daddr
= XFS_FSB_TO_BB(xg
->xg_mount
, blockcount
);
396 return xfs_getfsmap_helper(tp
, info
, frec
);
399 /* Transform a rmapbt irec into a fsmap */
401 xfs_getfsmap_rmapbt_helper(
402 struct xfs_btree_cur
*cur
,
403 const struct xfs_rmap_irec
*rec
,
406 struct xfs_fsmap_irec frec
= {
407 .owner
= rec
->rm_owner
,
408 .offset
= rec
->rm_offset
,
409 .rm_flags
= rec
->rm_flags
,
410 .rec_key
= rec
->rm_startblock
,
412 struct xfs_getfsmap_info
*info
= priv
;
414 return xfs_getfsmap_group_helper(info
, cur
->bc_tp
, cur
->bc_group
,
415 rec
->rm_startblock
, rec
->rm_blockcount
, &frec
);
418 /* Transform a bnobt irec into a fsmap */
420 xfs_getfsmap_datadev_bnobt_helper(
421 struct xfs_btree_cur
*cur
,
422 const struct xfs_alloc_rec_incore
*rec
,
425 struct xfs_fsmap_irec frec
= {
426 .owner
= XFS_RMAP_OWN_NULL
, /* "free" */
427 .rec_key
= rec
->ar_startblock
,
429 struct xfs_getfsmap_info
*info
= priv
;
431 return xfs_getfsmap_group_helper(info
, cur
->bc_tp
, cur
->bc_group
,
432 rec
->ar_startblock
, rec
->ar_blockcount
, &frec
);
435 /* Set rmap flags based on the getfsmap flags */
437 xfs_getfsmap_set_irec_flags(
438 struct xfs_rmap_irec
*irec
,
439 const struct xfs_fsmap
*fmr
)
442 if (fmr
->fmr_flags
& FMR_OF_ATTR_FORK
)
443 irec
->rm_flags
|= XFS_RMAP_ATTR_FORK
;
444 if (fmr
->fmr_flags
& FMR_OF_EXTENT_MAP
)
445 irec
->rm_flags
|= XFS_RMAP_BMBT_BLOCK
;
446 if (fmr
->fmr_flags
& FMR_OF_PREALLOC
)
447 irec
->rm_flags
|= XFS_RMAP_UNWRITTEN
;
451 rmap_not_shareable(struct xfs_mount
*mp
, const struct xfs_rmap_irec
*r
)
453 if (!xfs_has_reflink(mp
))
455 if (XFS_RMAP_NON_INODE_OWNER(r
->rm_owner
))
457 if (r
->rm_flags
& (XFS_RMAP_ATTR_FORK
| XFS_RMAP_BMBT_BLOCK
|
463 /* Execute a getfsmap query against the regular data device. */
465 __xfs_getfsmap_datadev(
466 struct xfs_trans
*tp
,
467 const struct xfs_fsmap
*keys
,
468 struct xfs_getfsmap_info
*info
,
469 int (*query_fn
)(struct xfs_trans
*,
470 struct xfs_getfsmap_info
*,
471 struct xfs_btree_cur
**,
475 struct xfs_mount
*mp
= tp
->t_mountp
;
476 struct xfs_perag
*pag
= NULL
;
477 struct xfs_btree_cur
*bt_cur
= NULL
;
478 xfs_fsblock_t start_fsb
;
479 xfs_fsblock_t end_fsb
;
480 xfs_agnumber_t start_ag
, end_ag
;
484 eofs
= XFS_FSB_TO_BB(mp
, mp
->m_sb
.sb_dblocks
);
485 if (keys
[0].fmr_physical
>= eofs
)
487 start_fsb
= XFS_DADDR_TO_FSB(mp
, keys
[0].fmr_physical
);
488 end_fsb
= XFS_DADDR_TO_FSB(mp
, min(eofs
- 1, keys
[1].fmr_physical
));
491 * Convert the fsmap low/high keys to AG based keys. Initialize
492 * low to the fsmap low key and max out the high key to the end
495 info
->low
.rm_offset
= XFS_BB_TO_FSBT(mp
, keys
[0].fmr_offset
);
496 error
= xfs_fsmap_owner_to_rmap(&info
->low
, &keys
[0]);
499 info
->low
.rm_blockcount
= XFS_BB_TO_FSBT(mp
, keys
[0].fmr_length
);
500 xfs_getfsmap_set_irec_flags(&info
->low
, &keys
[0]);
502 /* Adjust the low key if we are continuing from where we left off. */
503 if (info
->low
.rm_blockcount
== 0) {
504 /* No previous record from which to continue */
505 } else if (rmap_not_shareable(mp
, &info
->low
)) {
506 /* Last record seen was an unshareable extent */
507 info
->low
.rm_owner
= 0;
508 info
->low
.rm_offset
= 0;
510 start_fsb
+= info
->low
.rm_blockcount
;
511 if (XFS_FSB_TO_DADDR(mp
, start_fsb
) >= eofs
)
514 /* Last record seen was a shareable file data extent */
515 info
->low
.rm_offset
+= info
->low
.rm_blockcount
;
517 info
->low
.rm_startblock
= XFS_FSB_TO_AGBNO(mp
, start_fsb
);
519 info
->high
.rm_startblock
= -1U;
520 info
->high
.rm_owner
= ULLONG_MAX
;
521 info
->high
.rm_offset
= ULLONG_MAX
;
522 info
->high
.rm_blockcount
= 0;
523 info
->high
.rm_flags
= XFS_RMAP_KEY_FLAGS
| XFS_RMAP_REC_FLAGS
;
525 start_ag
= XFS_FSB_TO_AGNO(mp
, start_fsb
);
526 end_ag
= XFS_FSB_TO_AGNO(mp
, end_fsb
);
528 while ((pag
= xfs_perag_next_range(mp
, pag
, start_ag
, end_ag
))) {
530 * Set the AG high key from the fsmap high key if this
531 * is the last AG that we're querying.
533 info
->group
= pag_group(pag
);
534 if (pag_agno(pag
) == end_ag
) {
535 info
->high
.rm_startblock
= XFS_FSB_TO_AGBNO(mp
,
537 info
->high
.rm_offset
= XFS_BB_TO_FSBT(mp
,
539 error
= xfs_fsmap_owner_to_rmap(&info
->high
, &keys
[1]);
542 xfs_getfsmap_set_irec_flags(&info
->high
, &keys
[1]);
546 xfs_btree_del_cursor(bt_cur
, XFS_BTREE_NOERROR
);
548 xfs_trans_brelse(tp
, info
->agf_bp
);
552 error
= xfs_alloc_read_agf(pag
, tp
, 0, &info
->agf_bp
);
556 trace_xfs_fsmap_low_group_key(mp
, info
->dev
, pag_agno(pag
),
558 trace_xfs_fsmap_high_group_key(mp
, info
->dev
, pag_agno(pag
),
561 error
= query_fn(tp
, info
, &bt_cur
, priv
);
566 * Set the AG low key to the start of the AG prior to
567 * moving on to the next AG.
569 if (pag_agno(pag
) == start_ag
)
570 memset(&info
->low
, 0, sizeof(info
->low
));
573 * If this is the last AG, report any gap at the end of it
574 * before we drop the reference to the perag when the loop
577 if (pag_agno(pag
) == end_ag
) {
579 error
= query_fn(tp
, info
, &bt_cur
, priv
);
587 xfs_btree_del_cursor(bt_cur
, error
< 0 ? XFS_BTREE_ERROR
:
590 xfs_trans_brelse(tp
, info
->agf_bp
);
597 /* loop termination case */
604 /* Actually query the rmap btree. */
606 xfs_getfsmap_datadev_rmapbt_query(
607 struct xfs_trans
*tp
,
608 struct xfs_getfsmap_info
*info
,
609 struct xfs_btree_cur
**curpp
,
612 /* Report any gap at the end of the last AG. */
614 return xfs_getfsmap_rmapbt_helper(*curpp
, &info
->high
, info
);
616 /* Allocate cursor for this AG and query_range it. */
617 *curpp
= xfs_rmapbt_init_cursor(tp
->t_mountp
, tp
, info
->agf_bp
,
618 to_perag(info
->group
));
619 return xfs_rmap_query_range(*curpp
, &info
->low
, &info
->high
,
620 xfs_getfsmap_rmapbt_helper
, info
);
623 /* Execute a getfsmap query against the regular data device rmapbt. */
625 xfs_getfsmap_datadev_rmapbt(
626 struct xfs_trans
*tp
,
627 const struct xfs_fsmap
*keys
,
628 struct xfs_getfsmap_info
*info
)
630 info
->missing_owner
= XFS_FMR_OWN_FREE
;
631 return __xfs_getfsmap_datadev(tp
, keys
, info
,
632 xfs_getfsmap_datadev_rmapbt_query
, NULL
);
635 /* Actually query the bno btree. */
637 xfs_getfsmap_datadev_bnobt_query(
638 struct xfs_trans
*tp
,
639 struct xfs_getfsmap_info
*info
,
640 struct xfs_btree_cur
**curpp
,
643 struct xfs_alloc_rec_incore
*key
= priv
;
645 /* Report any gap at the end of the last AG. */
647 return xfs_getfsmap_datadev_bnobt_helper(*curpp
, &key
[1], info
);
649 /* Allocate cursor for this AG and query_range it. */
650 *curpp
= xfs_bnobt_init_cursor(tp
->t_mountp
, tp
, info
->agf_bp
,
651 to_perag(info
->group
));
652 key
->ar_startblock
= info
->low
.rm_startblock
;
653 key
[1].ar_startblock
= info
->high
.rm_startblock
;
654 return xfs_alloc_query_range(*curpp
, key
, &key
[1],
655 xfs_getfsmap_datadev_bnobt_helper
, info
);
658 /* Execute a getfsmap query against the regular data device's bnobt. */
660 xfs_getfsmap_datadev_bnobt(
661 struct xfs_trans
*tp
,
662 const struct xfs_fsmap
*keys
,
663 struct xfs_getfsmap_info
*info
)
665 struct xfs_alloc_rec_incore akeys
[2];
667 memset(akeys
, 0, sizeof(akeys
));
668 info
->missing_owner
= XFS_FMR_OWN_UNKNOWN
;
669 return __xfs_getfsmap_datadev(tp
, keys
, info
,
670 xfs_getfsmap_datadev_bnobt_query
, &akeys
[0]);
673 /* Execute a getfsmap query against the log device. */
676 struct xfs_trans
*tp
,
677 const struct xfs_fsmap
*keys
,
678 struct xfs_getfsmap_info
*info
)
680 struct xfs_fsmap_irec frec
= {
683 .owner
= XFS_RMAP_OWN_LOG
,
685 struct xfs_mount
*mp
= tp
->t_mountp
;
686 xfs_fsblock_t start_fsb
, end_fsb
;
689 eofs
= XFS_FSB_TO_BB(mp
, mp
->m_sb
.sb_logblocks
);
690 if (keys
[0].fmr_physical
>= eofs
)
692 start_fsb
= XFS_BB_TO_FSBT(mp
,
693 keys
[0].fmr_physical
+ keys
[0].fmr_length
);
694 end_fsb
= XFS_BB_TO_FSB(mp
, min(eofs
- 1, keys
[1].fmr_physical
));
696 /* Adjust the low key if we are continuing from where we left off. */
697 if (keys
[0].fmr_length
> 0)
698 info
->low_daddr
= XFS_FSB_TO_BB(mp
, start_fsb
);
700 trace_xfs_fsmap_low_linear_key(mp
, info
->dev
, start_fsb
);
701 trace_xfs_fsmap_high_linear_key(mp
, info
->dev
, end_fsb
);
706 /* Fabricate an rmap entry for the external log device. */
707 frec
.len_daddr
= XFS_FSB_TO_BB(mp
, mp
->m_sb
.sb_logblocks
);
708 return xfs_getfsmap_helper(tp
, info
, &frec
);
712 /* Transform a rtbitmap "record" into a fsmap */
714 xfs_getfsmap_rtdev_rtbitmap_helper(
715 struct xfs_rtgroup
*rtg
,
716 struct xfs_trans
*tp
,
717 const struct xfs_rtalloc_rec
*rec
,
720 struct xfs_fsmap_irec frec
= {
721 .owner
= XFS_RMAP_OWN_NULL
, /* "free" */
723 struct xfs_mount
*mp
= rtg_mount(rtg
);
724 struct xfs_getfsmap_info
*info
= priv
;
725 xfs_rtblock_t start_rtb
=
726 xfs_rtx_to_rtb(rtg
, rec
->ar_startext
);
728 xfs_rtbxlen_to_blen(mp
, rec
->ar_extcount
);
731 * For an info->last query, we're looking for a gap between the last
732 * mapping emitted and the high key specified by userspace. If the
733 * user's query spans less than 1 fsblock, then info->high and
734 * info->low will have the same rm_startblock, which causes rec_daddr
735 * and next_daddr to be the same. Therefore, use the end_daddr that
736 * we calculated from userspace's high key to synthesize the record.
737 * Note that if the btree query found a mapping, there won't be a gap.
739 if (info
->last
&& info
->end_daddr
!= XFS_BUF_DADDR_NULL
) {
740 frec
.start_daddr
= info
->end_daddr
;
742 frec
.start_daddr
= xfs_rtb_to_daddr(mp
, start_rtb
);
745 frec
.len_daddr
= XFS_FSB_TO_BB(mp
, rtbcount
);
746 return xfs_getfsmap_helper(tp
, info
, &frec
);
749 /* Execute a getfsmap query against the realtime device rtbitmap. */
751 xfs_getfsmap_rtdev_rtbitmap(
752 struct xfs_trans
*tp
,
753 const struct xfs_fsmap
*keys
,
754 struct xfs_getfsmap_info
*info
)
756 struct xfs_mount
*mp
= tp
->t_mountp
;
757 xfs_rtblock_t start_rtbno
, end_rtbno
;
758 xfs_rtxnum_t start_rtx
, end_rtx
;
759 xfs_rgnumber_t start_rgno
, end_rgno
;
760 struct xfs_rtgroup
*rtg
= NULL
;
764 eofs
= XFS_FSB_TO_BB(mp
, mp
->m_sb
.sb_rblocks
);
765 if (keys
[0].fmr_physical
>= eofs
)
768 info
->missing_owner
= XFS_FMR_OWN_UNKNOWN
;
770 /* Adjust the low key if we are continuing from where we left off. */
771 start_rtbno
= xfs_daddr_to_rtb(mp
,
772 keys
[0].fmr_physical
+ keys
[0].fmr_length
);
773 if (keys
[0].fmr_length
> 0) {
774 info
->low_daddr
= xfs_rtb_to_daddr(mp
, start_rtbno
);
775 if (info
->low_daddr
>= eofs
)
778 start_rtx
= xfs_rtb_to_rtx(mp
, start_rtbno
);
779 start_rgno
= xfs_rtb_to_rgno(mp
, start_rtbno
);
781 end_rtbno
= xfs_daddr_to_rtb(mp
, min(eofs
- 1, keys
[1].fmr_physical
));
782 end_rgno
= xfs_rtb_to_rgno(mp
, end_rtbno
);
784 trace_xfs_fsmap_low_linear_key(mp
, info
->dev
, start_rtbno
);
785 trace_xfs_fsmap_high_linear_key(mp
, info
->dev
, end_rtbno
);
789 while ((rtg
= xfs_rtgroup_next_range(mp
, rtg
, start_rgno
, end_rgno
))) {
790 if (rtg_rgno(rtg
) == end_rgno
)
791 end_rtx
= xfs_rtb_to_rtx(mp
,
792 end_rtbno
+ mp
->m_sb
.sb_rextsize
- 1);
794 info
->group
= rtg_group(rtg
);
795 xfs_rtgroup_lock(rtg
, XFS_RTGLOCK_BITMAP_SHARED
);
796 error
= xfs_rtalloc_query_range(rtg
, tp
, start_rtx
, end_rtx
,
797 xfs_getfsmap_rtdev_rtbitmap_helper
, info
);
802 * Report any gaps at the end of the rtbitmap by simulating a
803 * zero-length free extent starting at the rtx after the end
804 * of the query range.
806 if (rtg_rgno(rtg
) == end_rgno
) {
807 struct xfs_rtalloc_rec ahigh
= {
808 .ar_startext
= min(end_rtx
+ 1,
813 error
= xfs_getfsmap_rtdev_rtbitmap_helper(rtg
, tp
,
819 xfs_rtgroup_unlock(rtg
, XFS_RTGLOCK_BITMAP_SHARED
);
824 /* loop termination case */
827 xfs_rtgroup_unlock(rtg
, XFS_RTGLOCK_BITMAP_SHARED
);
830 xfs_rtgroup_rele(rtg
);
835 #endif /* CONFIG_XFS_RT */
837 /* Do we recognize the device? */
839 xfs_getfsmap_is_valid_device(
840 struct xfs_mount
*mp
,
841 struct xfs_fsmap
*fm
)
843 if (fm
->fmr_device
== 0 || fm
->fmr_device
== UINT_MAX
||
844 fm
->fmr_device
== new_encode_dev(mp
->m_ddev_targp
->bt_dev
))
846 if (mp
->m_logdev_targp
&&
847 fm
->fmr_device
== new_encode_dev(mp
->m_logdev_targp
->bt_dev
))
849 if (mp
->m_rtdev_targp
&&
850 fm
->fmr_device
== new_encode_dev(mp
->m_rtdev_targp
->bt_dev
))
855 /* Ensure that the low key is less than the high key. */
857 xfs_getfsmap_check_keys(
858 struct xfs_fsmap
*low_key
,
859 struct xfs_fsmap
*high_key
)
861 if (low_key
->fmr_flags
& (FMR_OF_SPECIAL_OWNER
| FMR_OF_EXTENT_MAP
)) {
862 if (low_key
->fmr_offset
)
865 if (high_key
->fmr_flags
!= -1U &&
866 (high_key
->fmr_flags
& (FMR_OF_SPECIAL_OWNER
|
867 FMR_OF_EXTENT_MAP
))) {
868 if (high_key
->fmr_offset
&& high_key
->fmr_offset
!= -1ULL)
871 if (high_key
->fmr_length
&& high_key
->fmr_length
!= -1ULL)
874 if (low_key
->fmr_device
> high_key
->fmr_device
)
876 if (low_key
->fmr_device
< high_key
->fmr_device
)
879 if (low_key
->fmr_physical
> high_key
->fmr_physical
)
881 if (low_key
->fmr_physical
< high_key
->fmr_physical
)
884 if (low_key
->fmr_owner
> high_key
->fmr_owner
)
886 if (low_key
->fmr_owner
< high_key
->fmr_owner
)
889 if (low_key
->fmr_offset
> high_key
->fmr_offset
)
891 if (low_key
->fmr_offset
< high_key
->fmr_offset
)
898 * There are only two devices if we didn't configure RT devices at build time.
901 #define XFS_GETFSMAP_DEVS 3
903 #define XFS_GETFSMAP_DEVS 2
904 #endif /* CONFIG_XFS_RT */
907 * Get filesystem's extents as described in head, and format for output. Fills
908 * in the supplied records array until there are no more reverse mappings to
909 * return or head.fmh_entries == head.fmh_count. In the second case, this
910 * function returns -ECANCELED to indicate that more records would have been
915 * There are multiple levels of keys and counters at work here:
916 * xfs_fsmap_head.fmh_keys -- low and high fsmap keys passed in;
917 * these reflect fs-wide sector addrs.
918 * dkeys -- fmh_keys used to query each device;
919 * these are fmh_keys but w/ the low key
920 * bumped up by fmr_length.
921 * xfs_getfsmap_info.next_daddr -- next disk addr we expect to see; this
922 * is how we detect gaps in the fsmap
923 records and report them.
924 * xfs_getfsmap_info.low/high -- per-AG low/high keys computed from
925 * dkeys; used to query the metadata.
929 struct xfs_mount
*mp
,
930 struct xfs_fsmap_head
*head
,
931 struct fsmap
*fsmap_recs
)
933 struct xfs_trans
*tp
= NULL
;
934 struct xfs_fsmap dkeys
[2]; /* per-dev keys */
935 struct xfs_getfsmap_dev handlers
[XFS_GETFSMAP_DEVS
];
936 struct xfs_getfsmap_info info
= { NULL
};
941 if (head
->fmh_iflags
& ~FMH_IF_VALID
)
943 if (!xfs_getfsmap_is_valid_device(mp
, &head
->fmh_keys
[0]) ||
944 !xfs_getfsmap_is_valid_device(mp
, &head
->fmh_keys
[1]))
946 if (!xfs_getfsmap_check_keys(&head
->fmh_keys
[0], &head
->fmh_keys
[1]))
949 use_rmap
= xfs_has_rmapbt(mp
) &&
950 has_capability_noaudit(current
, CAP_SYS_ADMIN
);
951 head
->fmh_entries
= 0;
953 /* Set up our device handlers. */
954 memset(handlers
, 0, sizeof(handlers
));
955 handlers
[0].nr_sectors
= XFS_FSB_TO_BB(mp
, mp
->m_sb
.sb_dblocks
);
956 handlers
[0].dev
= new_encode_dev(mp
->m_ddev_targp
->bt_dev
);
958 handlers
[0].fn
= xfs_getfsmap_datadev_rmapbt
;
960 handlers
[0].fn
= xfs_getfsmap_datadev_bnobt
;
961 if (mp
->m_logdev_targp
!= mp
->m_ddev_targp
) {
962 handlers
[1].nr_sectors
= XFS_FSB_TO_BB(mp
,
963 mp
->m_sb
.sb_logblocks
);
964 handlers
[1].dev
= new_encode_dev(mp
->m_logdev_targp
->bt_dev
);
965 handlers
[1].fn
= xfs_getfsmap_logdev
;
968 if (mp
->m_rtdev_targp
) {
969 handlers
[2].nr_sectors
= XFS_FSB_TO_BB(mp
, mp
->m_sb
.sb_rblocks
);
970 handlers
[2].dev
= new_encode_dev(mp
->m_rtdev_targp
->bt_dev
);
971 handlers
[2].fn
= xfs_getfsmap_rtdev_rtbitmap
;
973 #endif /* CONFIG_XFS_RT */
975 xfs_sort(handlers
, XFS_GETFSMAP_DEVS
, sizeof(struct xfs_getfsmap_dev
),
976 xfs_getfsmap_dev_compare
);
979 * To continue where we left off, we allow userspace to use the
980 * last mapping from a previous call as the low key of the next.
981 * This is identified by a non-zero length in the low key. We
982 * have to increment the low key in this scenario to ensure we
983 * don't return the same mapping again, and instead return the
986 * If the low key mapping refers to file data, the same physical
987 * blocks could be mapped to several other files/offsets.
988 * According to rmapbt record ordering, the minimal next
989 * possible record for the block range is the next starting
990 * offset in the same inode. Therefore, each fsmap backend bumps
991 * the file offset to continue the search appropriately. For
992 * all other low key mapping types (attr blocks, metadata), each
993 * fsmap backend bumps the physical offset as there can be no
994 * other mapping for the same physical block range.
996 dkeys
[0] = head
->fmh_keys
[0];
997 memset(&dkeys
[1], 0xFF, sizeof(struct xfs_fsmap
));
999 info
.next_daddr
= head
->fmh_keys
[0].fmr_physical
+
1000 head
->fmh_keys
[0].fmr_length
;
1001 info
.end_daddr
= XFS_BUF_DADDR_NULL
;
1002 info
.fsmap_recs
= fsmap_recs
;
1005 /* For each device we support... */
1006 for (i
= 0; i
< XFS_GETFSMAP_DEVS
; i
++) {
1007 /* Is this device within the range the user asked for? */
1008 if (!handlers
[i
].fn
)
1010 if (head
->fmh_keys
[0].fmr_device
> handlers
[i
].dev
)
1012 if (head
->fmh_keys
[1].fmr_device
< handlers
[i
].dev
)
1016 * If this device number matches the high key, we have
1017 * to pass the high key to the handler to limit the
1018 * query results. If the device number exceeds the
1019 * low key, zero out the low key so that we get
1020 * everything from the beginning.
1022 if (handlers
[i
].dev
== head
->fmh_keys
[1].fmr_device
) {
1023 dkeys
[1] = head
->fmh_keys
[1];
1024 info
.end_daddr
= min(handlers
[i
].nr_sectors
- 1,
1025 dkeys
[1].fmr_physical
);
1027 if (handlers
[i
].dev
> head
->fmh_keys
[0].fmr_device
)
1028 memset(&dkeys
[0], 0, sizeof(struct xfs_fsmap
));
1031 * Grab an empty transaction so that we can use its recursive
1032 * buffer locking abilities to detect cycles in the rmapbt
1033 * without deadlocking.
1035 error
= xfs_trans_alloc_empty(mp
, &tp
);
1039 info
.dev
= handlers
[i
].dev
;
1042 info
.low_daddr
= XFS_BUF_DADDR_NULL
;
1043 info
.low
.rm_blockcount
= 0;
1044 error
= handlers
[i
].fn(tp
, dkeys
, &info
);
1047 xfs_trans_cancel(tp
);
1049 info
.next_daddr
= 0;
1053 xfs_trans_cancel(tp
);
1054 head
->fmh_oflags
= FMH_OF_DEV_T
;
1060 struct xfs_inode
*ip
,
1061 struct fsmap_head __user
*arg
)
1063 struct xfs_fsmap_head xhead
= {0};
1064 struct fsmap_head head
;
1067 __u32 last_flags
= 0;
1071 if (copy_from_user(&head
, arg
, sizeof(struct fsmap_head
)))
1073 if (memchr_inv(head
.fmh_reserved
, 0, sizeof(head
.fmh_reserved
)) ||
1074 memchr_inv(head
.fmh_keys
[0].fmr_reserved
, 0,
1075 sizeof(head
.fmh_keys
[0].fmr_reserved
)) ||
1076 memchr_inv(head
.fmh_keys
[1].fmr_reserved
, 0,
1077 sizeof(head
.fmh_keys
[1].fmr_reserved
)))
1081 * Use an internal memory buffer so that we don't have to copy fsmap
1082 * data to userspace while holding locks. Start by trying to allocate
1083 * up to 128k for the buffer, but fall back to a single page if needed.
1085 count
= min_t(unsigned int, head
.fmh_count
,
1086 131072 / sizeof(struct fsmap
));
1087 recs
= kvcalloc(count
, sizeof(struct fsmap
), GFP_KERNEL
);
1089 count
= min_t(unsigned int, head
.fmh_count
,
1090 PAGE_SIZE
/ sizeof(struct fsmap
));
1091 recs
= kvcalloc(count
, sizeof(struct fsmap
), GFP_KERNEL
);
1096 xhead
.fmh_iflags
= head
.fmh_iflags
;
1097 xfs_fsmap_to_internal(&xhead
.fmh_keys
[0], &head
.fmh_keys
[0]);
1098 xfs_fsmap_to_internal(&xhead
.fmh_keys
[1], &head
.fmh_keys
[1]);
1100 trace_xfs_getfsmap_low_key(ip
->i_mount
, &xhead
.fmh_keys
[0]);
1101 trace_xfs_getfsmap_high_key(ip
->i_mount
, &xhead
.fmh_keys
[1]);
1103 head
.fmh_entries
= 0;
1105 struct fsmap __user
*user_recs
;
1106 struct fsmap
*last_rec
;
1108 user_recs
= &arg
->fmh_recs
[head
.fmh_entries
];
1109 xhead
.fmh_entries
= 0;
1110 xhead
.fmh_count
= min_t(unsigned int, count
,
1111 head
.fmh_count
- head
.fmh_entries
);
1113 /* Run query, record how many entries we got. */
1114 error
= xfs_getfsmap(ip
->i_mount
, &xhead
, recs
);
1118 * There are no more records in the result set. Copy
1119 * whatever we got to userspace and break out.
1125 * The internal memory buffer is full. Copy whatever
1126 * records we got to userspace and go again if we have
1127 * not yet filled the userspace buffer.
1134 head
.fmh_entries
+= xhead
.fmh_entries
;
1135 head
.fmh_oflags
= xhead
.fmh_oflags
;
1138 * If the caller wanted a record count or there aren't any
1139 * new records to return, we're done.
1141 if (head
.fmh_count
== 0 || xhead
.fmh_entries
== 0)
1144 /* Copy all the records we got out to userspace. */
1145 if (copy_to_user(user_recs
, recs
,
1146 xhead
.fmh_entries
* sizeof(struct fsmap
))) {
1151 /* Remember the last record flags we copied to userspace. */
1152 last_rec
= &recs
[xhead
.fmh_entries
- 1];
1153 last_flags
= last_rec
->fmr_flags
;
1155 /* Set up the low key for the next iteration. */
1156 xfs_fsmap_to_internal(&xhead
.fmh_keys
[0], last_rec
);
1157 trace_xfs_getfsmap_low_key(ip
->i_mount
, &xhead
.fmh_keys
[0]);
1158 } while (!done
&& head
.fmh_entries
< head
.fmh_count
);
1161 * If there are no more records in the query result set and we're not
1162 * in counting mode, mark the last record returned with the LAST flag.
1164 if (done
&& head
.fmh_count
> 0 && head
.fmh_entries
> 0) {
1165 struct fsmap __user
*user_rec
;
1167 last_flags
|= FMR_OF_LAST
;
1168 user_rec
= &arg
->fmh_recs
[head
.fmh_entries
- 1];
1170 if (copy_to_user(&user_rec
->fmr_flags
, &last_flags
,
1171 sizeof(last_flags
))) {
1177 /* copy back header */
1178 if (copy_to_user(arg
, &head
, sizeof(struct fsmap_head
))) {