1 // SPDX-License-Identifier: GPL-2.0+
3 * Copyright (C) 2017 Oracle. All Rights Reserved.
4 * Author: Darrick J. Wong <darrick.wong@oracle.com>
8 #include "xfs_shared.h"
9 #include "xfs_format.h"
10 #include "xfs_log_format.h"
11 #include "xfs_trans_resv.h"
12 #include "xfs_mount.h"
13 #include "xfs_inode.h"
14 #include "xfs_trans.h"
15 #include "xfs_btree.h"
16 #include "xfs_rmap_btree.h"
17 #include "xfs_trace.h"
19 #include "xfs_alloc.h"
21 #include <linux/fsmap.h>
22 #include "xfs_fsmap.h"
23 #include "xfs_refcount.h"
24 #include "xfs_refcount_btree.h"
25 #include "xfs_alloc_btree.h"
26 #include "xfs_rtbitmap.h"
29 /* Convert an xfs_fsmap to an fsmap. */
31 xfs_fsmap_from_internal(
33 struct xfs_fsmap
*src
)
35 dest
->fmr_device
= src
->fmr_device
;
36 dest
->fmr_flags
= src
->fmr_flags
;
37 dest
->fmr_physical
= BBTOB(src
->fmr_physical
);
38 dest
->fmr_owner
= src
->fmr_owner
;
39 dest
->fmr_offset
= BBTOB(src
->fmr_offset
);
40 dest
->fmr_length
= BBTOB(src
->fmr_length
);
41 dest
->fmr_reserved
[0] = 0;
42 dest
->fmr_reserved
[1] = 0;
43 dest
->fmr_reserved
[2] = 0;
46 /* Convert an fsmap to an xfs_fsmap. */
48 xfs_fsmap_to_internal(
49 struct xfs_fsmap
*dest
,
52 dest
->fmr_device
= src
->fmr_device
;
53 dest
->fmr_flags
= src
->fmr_flags
;
54 dest
->fmr_physical
= BTOBBT(src
->fmr_physical
);
55 dest
->fmr_owner
= src
->fmr_owner
;
56 dest
->fmr_offset
= BTOBBT(src
->fmr_offset
);
57 dest
->fmr_length
= BTOBBT(src
->fmr_length
);
60 /* Convert an fsmap owner into an rmapbt owner. */
62 xfs_fsmap_owner_to_rmap(
63 struct xfs_rmap_irec
*dest
,
64 const struct xfs_fsmap
*src
)
66 if (!(src
->fmr_flags
& FMR_OF_SPECIAL_OWNER
)) {
67 dest
->rm_owner
= src
->fmr_owner
;
71 switch (src
->fmr_owner
) {
72 case 0: /* "lowest owner id possible" */
73 case -1ULL: /* "highest owner id possible" */
74 dest
->rm_owner
= src
->fmr_owner
;
76 case XFS_FMR_OWN_FREE
:
77 dest
->rm_owner
= XFS_RMAP_OWN_NULL
;
79 case XFS_FMR_OWN_UNKNOWN
:
80 dest
->rm_owner
= XFS_RMAP_OWN_UNKNOWN
;
83 dest
->rm_owner
= XFS_RMAP_OWN_FS
;
86 dest
->rm_owner
= XFS_RMAP_OWN_LOG
;
89 dest
->rm_owner
= XFS_RMAP_OWN_AG
;
91 case XFS_FMR_OWN_INOBT
:
92 dest
->rm_owner
= XFS_RMAP_OWN_INOBT
;
94 case XFS_FMR_OWN_INODES
:
95 dest
->rm_owner
= XFS_RMAP_OWN_INODES
;
97 case XFS_FMR_OWN_REFC
:
98 dest
->rm_owner
= XFS_RMAP_OWN_REFC
;
100 case XFS_FMR_OWN_COW
:
101 dest
->rm_owner
= XFS_RMAP_OWN_COW
;
103 case XFS_FMR_OWN_DEFECTIVE
: /* not implemented */
111 /* Convert an rmapbt owner into an fsmap owner. */
113 xfs_fsmap_owner_from_rmap(
114 struct xfs_fsmap
*dest
,
115 const struct xfs_rmap_irec
*src
)
118 if (!XFS_RMAP_NON_INODE_OWNER(src
->rm_owner
)) {
119 dest
->fmr_owner
= src
->rm_owner
;
122 dest
->fmr_flags
|= FMR_OF_SPECIAL_OWNER
;
124 switch (src
->rm_owner
) {
125 case XFS_RMAP_OWN_FS
:
126 dest
->fmr_owner
= XFS_FMR_OWN_FS
;
128 case XFS_RMAP_OWN_LOG
:
129 dest
->fmr_owner
= XFS_FMR_OWN_LOG
;
131 case XFS_RMAP_OWN_AG
:
132 dest
->fmr_owner
= XFS_FMR_OWN_AG
;
134 case XFS_RMAP_OWN_INOBT
:
135 dest
->fmr_owner
= XFS_FMR_OWN_INOBT
;
137 case XFS_RMAP_OWN_INODES
:
138 dest
->fmr_owner
= XFS_FMR_OWN_INODES
;
140 case XFS_RMAP_OWN_REFC
:
141 dest
->fmr_owner
= XFS_FMR_OWN_REFC
;
143 case XFS_RMAP_OWN_COW
:
144 dest
->fmr_owner
= XFS_FMR_OWN_COW
;
146 case XFS_RMAP_OWN_NULL
: /* "free" */
147 dest
->fmr_owner
= XFS_FMR_OWN_FREE
;
151 return -EFSCORRUPTED
;
156 /* getfsmap query state */
157 struct xfs_getfsmap_info
{
158 struct xfs_fsmap_head
*head
;
159 struct fsmap
*fsmap_recs
; /* mapping records */
160 struct xfs_buf
*agf_bp
; /* AGF, for refcount queries */
161 struct xfs_perag
*pag
; /* AG info, if applicable */
162 xfs_daddr_t next_daddr
; /* next daddr we expect */
163 /* daddr of low fsmap key when we're using the rtbitmap */
164 xfs_daddr_t low_daddr
;
165 xfs_daddr_t end_daddr
; /* daddr of high fsmap key */
166 u64 missing_owner
; /* owner of holes */
167 u32 dev
; /* device id */
169 * Low rmap key for the query. If low.rm_blockcount is nonzero, this
170 * is the second (or later) call to retrieve the recordset in pieces.
171 * xfs_getfsmap_rec_before_start will compare all records retrieved
172 * by the rmapbt query to filter out any records that start before
175 struct xfs_rmap_irec low
;
176 struct xfs_rmap_irec high
; /* high rmap key */
177 bool last
; /* last extent? */
180 /* Associate a device with a getfsmap handler. */
181 struct xfs_getfsmap_dev
{
183 int (*fn
)(struct xfs_trans
*tp
,
184 const struct xfs_fsmap
*keys
,
185 struct xfs_getfsmap_info
*info
);
189 /* Compare two getfsmap device handlers. */
191 xfs_getfsmap_dev_compare(
195 const struct xfs_getfsmap_dev
*d1
= p1
;
196 const struct xfs_getfsmap_dev
*d2
= p2
;
198 return d1
->dev
- d2
->dev
;
201 /* Decide if this mapping is shared. */
203 xfs_getfsmap_is_shared(
204 struct xfs_trans
*tp
,
205 struct xfs_getfsmap_info
*info
,
206 const struct xfs_rmap_irec
*rec
,
209 struct xfs_mount
*mp
= tp
->t_mountp
;
210 struct xfs_btree_cur
*cur
;
216 if (!xfs_has_reflink(mp
))
218 /* rt files will have no perag structure */
222 /* Are there any shared blocks here? */
224 cur
= xfs_refcountbt_init_cursor(mp
, tp
, info
->agf_bp
, info
->pag
);
226 error
= xfs_refcount_find_shared(cur
, rec
->rm_startblock
,
227 rec
->rm_blockcount
, &fbno
, &flen
, false);
229 xfs_btree_del_cursor(cur
, error
);
239 struct xfs_mount
*mp
,
240 struct xfs_fsmap
*xfm
,
241 struct xfs_getfsmap_info
*info
)
245 trace_xfs_getfsmap_mapping(mp
, xfm
);
247 rec
= &info
->fsmap_recs
[info
->head
->fmh_entries
++];
248 xfs_fsmap_from_internal(rec
, xfm
);
252 xfs_getfsmap_rec_before_start(
253 struct xfs_getfsmap_info
*info
,
254 const struct xfs_rmap_irec
*rec
,
255 xfs_daddr_t rec_daddr
)
257 if (info
->low_daddr
!= XFS_BUF_DADDR_NULL
)
258 return rec_daddr
< info
->low_daddr
;
259 if (info
->low
.rm_blockcount
)
260 return xfs_rmap_compare(rec
, &info
->low
) < 0;
265 * Format a reverse mapping for getfsmap, having translated rm_startblock
266 * into the appropriate daddr units. Pass in a nonzero @len_daddr if the
267 * length could be larger than rm_blockcount in struct xfs_rmap_irec.
271 struct xfs_trans
*tp
,
272 struct xfs_getfsmap_info
*info
,
273 const struct xfs_rmap_irec
*rec
,
274 xfs_daddr_t rec_daddr
,
275 xfs_daddr_t len_daddr
)
277 struct xfs_fsmap fmr
;
278 struct xfs_mount
*mp
= tp
->t_mountp
;
282 if (fatal_signal_pending(current
))
286 len_daddr
= XFS_FSB_TO_BB(mp
, rec
->rm_blockcount
);
289 * Filter out records that start before our startpoint, if the
290 * caller requested that.
292 if (xfs_getfsmap_rec_before_start(info
, rec
, rec_daddr
)) {
293 rec_daddr
+= len_daddr
;
294 if (info
->next_daddr
< rec_daddr
)
295 info
->next_daddr
= rec_daddr
;
300 * For an info->last query, we're looking for a gap between the last
301 * mapping emitted and the high key specified by userspace. If the
302 * user's query spans less than 1 fsblock, then info->high and
303 * info->low will have the same rm_startblock, which causes rec_daddr
304 * and next_daddr to be the same. Therefore, use the end_daddr that
305 * we calculated from userspace's high key to synthesize the record.
306 * Note that if the btree query found a mapping, there won't be a gap.
308 if (info
->last
&& info
->end_daddr
!= XFS_BUF_DADDR_NULL
)
309 rec_daddr
= info
->end_daddr
;
311 /* Are we just counting mappings? */
312 if (info
->head
->fmh_count
== 0) {
313 if (info
->head
->fmh_entries
== UINT_MAX
)
316 if (rec_daddr
> info
->next_daddr
)
317 info
->head
->fmh_entries
++;
322 info
->head
->fmh_entries
++;
324 rec_daddr
+= len_daddr
;
325 if (info
->next_daddr
< rec_daddr
)
326 info
->next_daddr
= rec_daddr
;
331 * If the record starts past the last physical block we saw,
332 * then we've found a gap. Report the gap as being owned by
333 * whatever the caller specified is the missing owner.
335 if (rec_daddr
> info
->next_daddr
) {
336 if (info
->head
->fmh_entries
>= info
->head
->fmh_count
)
339 fmr
.fmr_device
= info
->dev
;
340 fmr
.fmr_physical
= info
->next_daddr
;
341 fmr
.fmr_owner
= info
->missing_owner
;
343 fmr
.fmr_length
= rec_daddr
- info
->next_daddr
;
344 fmr
.fmr_flags
= FMR_OF_SPECIAL_OWNER
;
345 xfs_getfsmap_format(mp
, &fmr
, info
);
351 /* Fill out the extent we found */
352 if (info
->head
->fmh_entries
>= info
->head
->fmh_count
)
355 trace_xfs_fsmap_mapping(mp
, info
->dev
,
356 info
->pag
? info
->pag
->pag_agno
: NULLAGNUMBER
, rec
);
358 fmr
.fmr_device
= info
->dev
;
359 fmr
.fmr_physical
= rec_daddr
;
360 error
= xfs_fsmap_owner_from_rmap(&fmr
, rec
);
363 fmr
.fmr_offset
= XFS_FSB_TO_BB(mp
, rec
->rm_offset
);
364 fmr
.fmr_length
= len_daddr
;
365 if (rec
->rm_flags
& XFS_RMAP_UNWRITTEN
)
366 fmr
.fmr_flags
|= FMR_OF_PREALLOC
;
367 if (rec
->rm_flags
& XFS_RMAP_ATTR_FORK
)
368 fmr
.fmr_flags
|= FMR_OF_ATTR_FORK
;
369 if (rec
->rm_flags
& XFS_RMAP_BMBT_BLOCK
)
370 fmr
.fmr_flags
|= FMR_OF_EXTENT_MAP
;
371 if (fmr
.fmr_flags
== 0) {
372 error
= xfs_getfsmap_is_shared(tp
, info
, rec
, &shared
);
376 fmr
.fmr_flags
|= FMR_OF_SHARED
;
379 xfs_getfsmap_format(mp
, &fmr
, info
);
381 rec_daddr
+= len_daddr
;
382 if (info
->next_daddr
< rec_daddr
)
383 info
->next_daddr
= rec_daddr
;
387 /* Transform a rmapbt irec into a fsmap */
389 xfs_getfsmap_datadev_helper(
390 struct xfs_btree_cur
*cur
,
391 const struct xfs_rmap_irec
*rec
,
394 struct xfs_mount
*mp
= cur
->bc_mp
;
395 struct xfs_getfsmap_info
*info
= priv
;
397 xfs_daddr_t rec_daddr
;
399 fsb
= XFS_AGB_TO_FSB(mp
, cur
->bc_ag
.pag
->pag_agno
, rec
->rm_startblock
);
400 rec_daddr
= XFS_FSB_TO_DADDR(mp
, fsb
);
402 return xfs_getfsmap_helper(cur
->bc_tp
, info
, rec
, rec_daddr
, 0);
405 /* Transform a bnobt irec into a fsmap */
407 xfs_getfsmap_datadev_bnobt_helper(
408 struct xfs_btree_cur
*cur
,
409 const struct xfs_alloc_rec_incore
*rec
,
412 struct xfs_mount
*mp
= cur
->bc_mp
;
413 struct xfs_getfsmap_info
*info
= priv
;
414 struct xfs_rmap_irec irec
;
415 xfs_daddr_t rec_daddr
;
417 rec_daddr
= XFS_AGB_TO_DADDR(mp
, cur
->bc_ag
.pag
->pag_agno
,
420 irec
.rm_startblock
= rec
->ar_startblock
;
421 irec
.rm_blockcount
= rec
->ar_blockcount
;
422 irec
.rm_owner
= XFS_RMAP_OWN_NULL
; /* "free" */
426 return xfs_getfsmap_helper(cur
->bc_tp
, info
, &irec
, rec_daddr
, 0);
429 /* Set rmap flags based on the getfsmap flags */
431 xfs_getfsmap_set_irec_flags(
432 struct xfs_rmap_irec
*irec
,
433 const struct xfs_fsmap
*fmr
)
436 if (fmr
->fmr_flags
& FMR_OF_ATTR_FORK
)
437 irec
->rm_flags
|= XFS_RMAP_ATTR_FORK
;
438 if (fmr
->fmr_flags
& FMR_OF_EXTENT_MAP
)
439 irec
->rm_flags
|= XFS_RMAP_BMBT_BLOCK
;
440 if (fmr
->fmr_flags
& FMR_OF_PREALLOC
)
441 irec
->rm_flags
|= XFS_RMAP_UNWRITTEN
;
445 rmap_not_shareable(struct xfs_mount
*mp
, const struct xfs_rmap_irec
*r
)
447 if (!xfs_has_reflink(mp
))
449 if (XFS_RMAP_NON_INODE_OWNER(r
->rm_owner
))
451 if (r
->rm_flags
& (XFS_RMAP_ATTR_FORK
| XFS_RMAP_BMBT_BLOCK
|
457 /* Execute a getfsmap query against the regular data device. */
459 __xfs_getfsmap_datadev(
460 struct xfs_trans
*tp
,
461 const struct xfs_fsmap
*keys
,
462 struct xfs_getfsmap_info
*info
,
463 int (*query_fn
)(struct xfs_trans
*,
464 struct xfs_getfsmap_info
*,
465 struct xfs_btree_cur
**,
469 struct xfs_mount
*mp
= tp
->t_mountp
;
470 struct xfs_perag
*pag
;
471 struct xfs_btree_cur
*bt_cur
= NULL
;
472 xfs_fsblock_t start_fsb
;
473 xfs_fsblock_t end_fsb
;
474 xfs_agnumber_t start_ag
;
475 xfs_agnumber_t end_ag
;
479 eofs
= XFS_FSB_TO_BB(mp
, mp
->m_sb
.sb_dblocks
);
480 if (keys
[0].fmr_physical
>= eofs
)
482 start_fsb
= XFS_DADDR_TO_FSB(mp
, keys
[0].fmr_physical
);
483 end_fsb
= XFS_DADDR_TO_FSB(mp
, min(eofs
- 1, keys
[1].fmr_physical
));
486 * Convert the fsmap low/high keys to AG based keys. Initialize
487 * low to the fsmap low key and max out the high key to the end
490 info
->low
.rm_offset
= XFS_BB_TO_FSBT(mp
, keys
[0].fmr_offset
);
491 error
= xfs_fsmap_owner_to_rmap(&info
->low
, &keys
[0]);
494 info
->low
.rm_blockcount
= XFS_BB_TO_FSBT(mp
, keys
[0].fmr_length
);
495 xfs_getfsmap_set_irec_flags(&info
->low
, &keys
[0]);
497 /* Adjust the low key if we are continuing from where we left off. */
498 if (info
->low
.rm_blockcount
== 0) {
499 /* No previous record from which to continue */
500 } else if (rmap_not_shareable(mp
, &info
->low
)) {
501 /* Last record seen was an unshareable extent */
502 info
->low
.rm_owner
= 0;
503 info
->low
.rm_offset
= 0;
505 start_fsb
+= info
->low
.rm_blockcount
;
506 if (XFS_FSB_TO_DADDR(mp
, start_fsb
) >= eofs
)
509 /* Last record seen was a shareable file data extent */
510 info
->low
.rm_offset
+= info
->low
.rm_blockcount
;
512 info
->low
.rm_startblock
= XFS_FSB_TO_AGBNO(mp
, start_fsb
);
514 info
->high
.rm_startblock
= -1U;
515 info
->high
.rm_owner
= ULLONG_MAX
;
516 info
->high
.rm_offset
= ULLONG_MAX
;
517 info
->high
.rm_blockcount
= 0;
518 info
->high
.rm_flags
= XFS_RMAP_KEY_FLAGS
| XFS_RMAP_REC_FLAGS
;
520 start_ag
= XFS_FSB_TO_AGNO(mp
, start_fsb
);
521 end_ag
= XFS_FSB_TO_AGNO(mp
, end_fsb
);
523 for_each_perag_range(mp
, start_ag
, end_ag
, pag
) {
525 * Set the AG high key from the fsmap high key if this
526 * is the last AG that we're querying.
529 if (pag
->pag_agno
== end_ag
) {
530 info
->high
.rm_startblock
= XFS_FSB_TO_AGBNO(mp
,
532 info
->high
.rm_offset
= XFS_BB_TO_FSBT(mp
,
534 error
= xfs_fsmap_owner_to_rmap(&info
->high
, &keys
[1]);
537 xfs_getfsmap_set_irec_flags(&info
->high
, &keys
[1]);
541 xfs_btree_del_cursor(bt_cur
, XFS_BTREE_NOERROR
);
543 xfs_trans_brelse(tp
, info
->agf_bp
);
547 error
= xfs_alloc_read_agf(pag
, tp
, 0, &info
->agf_bp
);
551 trace_xfs_fsmap_low_key(mp
, info
->dev
, pag
->pag_agno
,
553 trace_xfs_fsmap_high_key(mp
, info
->dev
, pag
->pag_agno
,
556 error
= query_fn(tp
, info
, &bt_cur
, priv
);
561 * Set the AG low key to the start of the AG prior to
562 * moving on to the next AG.
564 if (pag
->pag_agno
== start_ag
)
565 memset(&info
->low
, 0, sizeof(info
->low
));
568 * If this is the last AG, report any gap at the end of it
569 * before we drop the reference to the perag when the loop
572 if (pag
->pag_agno
== end_ag
) {
574 error
= query_fn(tp
, info
, &bt_cur
, priv
);
582 xfs_btree_del_cursor(bt_cur
, error
< 0 ? XFS_BTREE_ERROR
:
585 xfs_trans_brelse(tp
, info
->agf_bp
);
589 xfs_perag_rele(info
->pag
);
592 /* loop termination case */
599 /* Actually query the rmap btree. */
601 xfs_getfsmap_datadev_rmapbt_query(
602 struct xfs_trans
*tp
,
603 struct xfs_getfsmap_info
*info
,
604 struct xfs_btree_cur
**curpp
,
607 /* Report any gap at the end of the last AG. */
609 return xfs_getfsmap_datadev_helper(*curpp
, &info
->high
, info
);
611 /* Allocate cursor for this AG and query_range it. */
612 *curpp
= xfs_rmapbt_init_cursor(tp
->t_mountp
, tp
, info
->agf_bp
,
614 return xfs_rmap_query_range(*curpp
, &info
->low
, &info
->high
,
615 xfs_getfsmap_datadev_helper
, info
);
618 /* Execute a getfsmap query against the regular data device rmapbt. */
620 xfs_getfsmap_datadev_rmapbt(
621 struct xfs_trans
*tp
,
622 const struct xfs_fsmap
*keys
,
623 struct xfs_getfsmap_info
*info
)
625 info
->missing_owner
= XFS_FMR_OWN_FREE
;
626 return __xfs_getfsmap_datadev(tp
, keys
, info
,
627 xfs_getfsmap_datadev_rmapbt_query
, NULL
);
630 /* Actually query the bno btree. */
632 xfs_getfsmap_datadev_bnobt_query(
633 struct xfs_trans
*tp
,
634 struct xfs_getfsmap_info
*info
,
635 struct xfs_btree_cur
**curpp
,
638 struct xfs_alloc_rec_incore
*key
= priv
;
640 /* Report any gap at the end of the last AG. */
642 return xfs_getfsmap_datadev_bnobt_helper(*curpp
, &key
[1], info
);
644 /* Allocate cursor for this AG and query_range it. */
645 *curpp
= xfs_bnobt_init_cursor(tp
->t_mountp
, tp
, info
->agf_bp
,
647 key
->ar_startblock
= info
->low
.rm_startblock
;
648 key
[1].ar_startblock
= info
->high
.rm_startblock
;
649 return xfs_alloc_query_range(*curpp
, key
, &key
[1],
650 xfs_getfsmap_datadev_bnobt_helper
, info
);
653 /* Execute a getfsmap query against the regular data device's bnobt. */
655 xfs_getfsmap_datadev_bnobt(
656 struct xfs_trans
*tp
,
657 const struct xfs_fsmap
*keys
,
658 struct xfs_getfsmap_info
*info
)
660 struct xfs_alloc_rec_incore akeys
[2];
662 memset(akeys
, 0, sizeof(akeys
));
663 info
->missing_owner
= XFS_FMR_OWN_UNKNOWN
;
664 return __xfs_getfsmap_datadev(tp
, keys
, info
,
665 xfs_getfsmap_datadev_bnobt_query
, &akeys
[0]);
668 /* Execute a getfsmap query against the log device. */
671 struct xfs_trans
*tp
,
672 const struct xfs_fsmap
*keys
,
673 struct xfs_getfsmap_info
*info
)
675 struct xfs_mount
*mp
= tp
->t_mountp
;
676 struct xfs_rmap_irec rmap
;
677 xfs_daddr_t rec_daddr
, len_daddr
;
678 xfs_fsblock_t start_fsb
, end_fsb
;
681 eofs
= XFS_FSB_TO_BB(mp
, mp
->m_sb
.sb_logblocks
);
682 if (keys
[0].fmr_physical
>= eofs
)
684 start_fsb
= XFS_BB_TO_FSBT(mp
,
685 keys
[0].fmr_physical
+ keys
[0].fmr_length
);
686 end_fsb
= XFS_BB_TO_FSB(mp
, min(eofs
- 1, keys
[1].fmr_physical
));
688 /* Adjust the low key if we are continuing from where we left off. */
689 if (keys
[0].fmr_length
> 0)
690 info
->low_daddr
= XFS_FSB_TO_BB(mp
, start_fsb
);
692 trace_xfs_fsmap_low_key_linear(mp
, info
->dev
, start_fsb
);
693 trace_xfs_fsmap_high_key_linear(mp
, info
->dev
, end_fsb
);
698 /* Fabricate an rmap entry for the external log device. */
699 rmap
.rm_startblock
= 0;
700 rmap
.rm_blockcount
= mp
->m_sb
.sb_logblocks
;
701 rmap
.rm_owner
= XFS_RMAP_OWN_LOG
;
705 rec_daddr
= XFS_FSB_TO_BB(mp
, rmap
.rm_startblock
);
706 len_daddr
= XFS_FSB_TO_BB(mp
, rmap
.rm_blockcount
);
707 return xfs_getfsmap_helper(tp
, info
, &rmap
, rec_daddr
, len_daddr
);
711 /* Transform a rtbitmap "record" into a fsmap */
713 xfs_getfsmap_rtdev_rtbitmap_helper(
714 struct xfs_mount
*mp
,
715 struct xfs_trans
*tp
,
716 const struct xfs_rtalloc_rec
*rec
,
719 struct xfs_getfsmap_info
*info
= priv
;
720 struct xfs_rmap_irec irec
;
722 xfs_daddr_t rec_daddr
, len_daddr
;
724 rtbno
= xfs_rtx_to_rtb(mp
, rec
->ar_startext
);
725 rec_daddr
= XFS_FSB_TO_BB(mp
, rtbno
);
726 irec
.rm_startblock
= rtbno
;
728 rtbno
= xfs_rtx_to_rtb(mp
, rec
->ar_extcount
);
729 len_daddr
= XFS_FSB_TO_BB(mp
, rtbno
);
730 irec
.rm_blockcount
= rtbno
;
732 irec
.rm_owner
= XFS_RMAP_OWN_NULL
; /* "free" */
736 return xfs_getfsmap_helper(tp
, info
, &irec
, rec_daddr
, len_daddr
);
739 /* Execute a getfsmap query against the realtime device rtbitmap. */
741 xfs_getfsmap_rtdev_rtbitmap(
742 struct xfs_trans
*tp
,
743 const struct xfs_fsmap
*keys
,
744 struct xfs_getfsmap_info
*info
)
747 struct xfs_rtalloc_rec ahigh
= { 0 };
748 struct xfs_mount
*mp
= tp
->t_mountp
;
749 xfs_rtblock_t start_rtb
;
750 xfs_rtblock_t end_rtb
;
755 eofs
= XFS_FSB_TO_BB(mp
, xfs_rtx_to_rtb(mp
, mp
->m_sb
.sb_rextents
));
756 if (keys
[0].fmr_physical
>= eofs
)
758 start_rtb
= XFS_BB_TO_FSBT(mp
,
759 keys
[0].fmr_physical
+ keys
[0].fmr_length
);
760 end_rtb
= XFS_BB_TO_FSB(mp
, min(eofs
- 1, keys
[1].fmr_physical
));
762 info
->missing_owner
= XFS_FMR_OWN_UNKNOWN
;
764 /* Adjust the low key if we are continuing from where we left off. */
765 if (keys
[0].fmr_length
> 0) {
766 info
->low_daddr
= XFS_FSB_TO_BB(mp
, start_rtb
);
767 if (info
->low_daddr
>= eofs
)
771 trace_xfs_fsmap_low_key_linear(mp
, info
->dev
, start_rtb
);
772 trace_xfs_fsmap_high_key_linear(mp
, info
->dev
, end_rtb
);
774 xfs_rtbitmap_lock_shared(mp
, XFS_RBMLOCK_BITMAP
);
777 * Set up query parameters to return free rtextents covering the range
780 high
= xfs_rtb_to_rtxup(mp
, end_rtb
);
781 error
= xfs_rtalloc_query_range(mp
, tp
, xfs_rtb_to_rtx(mp
, start_rtb
),
782 high
, xfs_getfsmap_rtdev_rtbitmap_helper
, info
);
787 * Report any gaps at the end of the rtbitmap by simulating a null
788 * rmap starting at the block after the end of the query range.
791 ahigh
.ar_startext
= min(mp
->m_sb
.sb_rextents
, high
);
793 error
= xfs_getfsmap_rtdev_rtbitmap_helper(mp
, tp
, &ahigh
, info
);
797 xfs_rtbitmap_unlock_shared(mp
, XFS_RBMLOCK_BITMAP
);
800 #endif /* CONFIG_XFS_RT */
802 /* Do we recognize the device? */
804 xfs_getfsmap_is_valid_device(
805 struct xfs_mount
*mp
,
806 struct xfs_fsmap
*fm
)
808 if (fm
->fmr_device
== 0 || fm
->fmr_device
== UINT_MAX
||
809 fm
->fmr_device
== new_encode_dev(mp
->m_ddev_targp
->bt_dev
))
811 if (mp
->m_logdev_targp
&&
812 fm
->fmr_device
== new_encode_dev(mp
->m_logdev_targp
->bt_dev
))
814 if (mp
->m_rtdev_targp
&&
815 fm
->fmr_device
== new_encode_dev(mp
->m_rtdev_targp
->bt_dev
))
820 /* Ensure that the low key is less than the high key. */
822 xfs_getfsmap_check_keys(
823 struct xfs_fsmap
*low_key
,
824 struct xfs_fsmap
*high_key
)
826 if (low_key
->fmr_flags
& (FMR_OF_SPECIAL_OWNER
| FMR_OF_EXTENT_MAP
)) {
827 if (low_key
->fmr_offset
)
830 if (high_key
->fmr_flags
!= -1U &&
831 (high_key
->fmr_flags
& (FMR_OF_SPECIAL_OWNER
|
832 FMR_OF_EXTENT_MAP
))) {
833 if (high_key
->fmr_offset
&& high_key
->fmr_offset
!= -1ULL)
836 if (high_key
->fmr_length
&& high_key
->fmr_length
!= -1ULL)
839 if (low_key
->fmr_device
> high_key
->fmr_device
)
841 if (low_key
->fmr_device
< high_key
->fmr_device
)
844 if (low_key
->fmr_physical
> high_key
->fmr_physical
)
846 if (low_key
->fmr_physical
< high_key
->fmr_physical
)
849 if (low_key
->fmr_owner
> high_key
->fmr_owner
)
851 if (low_key
->fmr_owner
< high_key
->fmr_owner
)
854 if (low_key
->fmr_offset
> high_key
->fmr_offset
)
856 if (low_key
->fmr_offset
< high_key
->fmr_offset
)
863 * There are only two devices if we didn't configure RT devices at build time.
866 #define XFS_GETFSMAP_DEVS 3
868 #define XFS_GETFSMAP_DEVS 2
869 #endif /* CONFIG_XFS_RT */
872 * Get filesystem's extents as described in head, and format for output. Fills
873 * in the supplied records array until there are no more reverse mappings to
874 * return or head.fmh_entries == head.fmh_count. In the second case, this
875 * function returns -ECANCELED to indicate that more records would have been
880 * There are multiple levels of keys and counters at work here:
881 * xfs_fsmap_head.fmh_keys -- low and high fsmap keys passed in;
882 * these reflect fs-wide sector addrs.
883 * dkeys -- fmh_keys used to query each device;
884 * these are fmh_keys but w/ the low key
885 * bumped up by fmr_length.
886 * xfs_getfsmap_info.next_daddr -- next disk addr we expect to see; this
887 * is how we detect gaps in the fsmap
888 records and report them.
889 * xfs_getfsmap_info.low/high -- per-AG low/high keys computed from
890 * dkeys; used to query the metadata.
894 struct xfs_mount
*mp
,
895 struct xfs_fsmap_head
*head
,
896 struct fsmap
*fsmap_recs
)
898 struct xfs_trans
*tp
= NULL
;
899 struct xfs_fsmap dkeys
[2]; /* per-dev keys */
900 struct xfs_getfsmap_dev handlers
[XFS_GETFSMAP_DEVS
];
901 struct xfs_getfsmap_info info
= { NULL
};
906 if (head
->fmh_iflags
& ~FMH_IF_VALID
)
908 if (!xfs_getfsmap_is_valid_device(mp
, &head
->fmh_keys
[0]) ||
909 !xfs_getfsmap_is_valid_device(mp
, &head
->fmh_keys
[1]))
911 if (!xfs_getfsmap_check_keys(&head
->fmh_keys
[0], &head
->fmh_keys
[1]))
914 use_rmap
= xfs_has_rmapbt(mp
) &&
915 has_capability_noaudit(current
, CAP_SYS_ADMIN
);
916 head
->fmh_entries
= 0;
918 /* Set up our device handlers. */
919 memset(handlers
, 0, sizeof(handlers
));
920 handlers
[0].nr_sectors
= XFS_FSB_TO_BB(mp
, mp
->m_sb
.sb_dblocks
);
921 handlers
[0].dev
= new_encode_dev(mp
->m_ddev_targp
->bt_dev
);
923 handlers
[0].fn
= xfs_getfsmap_datadev_rmapbt
;
925 handlers
[0].fn
= xfs_getfsmap_datadev_bnobt
;
926 if (mp
->m_logdev_targp
!= mp
->m_ddev_targp
) {
927 handlers
[1].nr_sectors
= XFS_FSB_TO_BB(mp
,
928 mp
->m_sb
.sb_logblocks
);
929 handlers
[1].dev
= new_encode_dev(mp
->m_logdev_targp
->bt_dev
);
930 handlers
[1].fn
= xfs_getfsmap_logdev
;
933 if (mp
->m_rtdev_targp
) {
934 handlers
[2].nr_sectors
= XFS_FSB_TO_BB(mp
, mp
->m_sb
.sb_rblocks
);
935 handlers
[2].dev
= new_encode_dev(mp
->m_rtdev_targp
->bt_dev
);
936 handlers
[2].fn
= xfs_getfsmap_rtdev_rtbitmap
;
938 #endif /* CONFIG_XFS_RT */
940 xfs_sort(handlers
, XFS_GETFSMAP_DEVS
, sizeof(struct xfs_getfsmap_dev
),
941 xfs_getfsmap_dev_compare
);
944 * To continue where we left off, we allow userspace to use the
945 * last mapping from a previous call as the low key of the next.
946 * This is identified by a non-zero length in the low key. We
947 * have to increment the low key in this scenario to ensure we
948 * don't return the same mapping again, and instead return the
951 * If the low key mapping refers to file data, the same physical
952 * blocks could be mapped to several other files/offsets.
953 * According to rmapbt record ordering, the minimal next
954 * possible record for the block range is the next starting
955 * offset in the same inode. Therefore, each fsmap backend bumps
956 * the file offset to continue the search appropriately. For
957 * all other low key mapping types (attr blocks, metadata), each
958 * fsmap backend bumps the physical offset as there can be no
959 * other mapping for the same physical block range.
961 dkeys
[0] = head
->fmh_keys
[0];
962 memset(&dkeys
[1], 0xFF, sizeof(struct xfs_fsmap
));
964 info
.next_daddr
= head
->fmh_keys
[0].fmr_physical
+
965 head
->fmh_keys
[0].fmr_length
;
966 info
.end_daddr
= XFS_BUF_DADDR_NULL
;
967 info
.fsmap_recs
= fsmap_recs
;
970 /* For each device we support... */
971 for (i
= 0; i
< XFS_GETFSMAP_DEVS
; i
++) {
972 /* Is this device within the range the user asked for? */
975 if (head
->fmh_keys
[0].fmr_device
> handlers
[i
].dev
)
977 if (head
->fmh_keys
[1].fmr_device
< handlers
[i
].dev
)
981 * If this device number matches the high key, we have
982 * to pass the high key to the handler to limit the
983 * query results. If the device number exceeds the
984 * low key, zero out the low key so that we get
985 * everything from the beginning.
987 if (handlers
[i
].dev
== head
->fmh_keys
[1].fmr_device
) {
988 dkeys
[1] = head
->fmh_keys
[1];
989 info
.end_daddr
= min(handlers
[i
].nr_sectors
- 1,
990 dkeys
[1].fmr_physical
);
992 if (handlers
[i
].dev
> head
->fmh_keys
[0].fmr_device
)
993 memset(&dkeys
[0], 0, sizeof(struct xfs_fsmap
));
996 * Grab an empty transaction so that we can use its recursive
997 * buffer locking abilities to detect cycles in the rmapbt
998 * without deadlocking.
1000 error
= xfs_trans_alloc_empty(mp
, &tp
);
1004 info
.dev
= handlers
[i
].dev
;
1007 info
.low_daddr
= XFS_BUF_DADDR_NULL
;
1008 info
.low
.rm_blockcount
= 0;
1009 error
= handlers
[i
].fn(tp
, dkeys
, &info
);
1012 xfs_trans_cancel(tp
);
1014 info
.next_daddr
= 0;
1018 xfs_trans_cancel(tp
);
1019 head
->fmh_oflags
= FMH_OF_DEV_T
;
1025 struct xfs_inode
*ip
,
1026 struct fsmap_head __user
*arg
)
1028 struct xfs_fsmap_head xhead
= {0};
1029 struct fsmap_head head
;
1032 __u32 last_flags
= 0;
1036 if (copy_from_user(&head
, arg
, sizeof(struct fsmap_head
)))
1038 if (memchr_inv(head
.fmh_reserved
, 0, sizeof(head
.fmh_reserved
)) ||
1039 memchr_inv(head
.fmh_keys
[0].fmr_reserved
, 0,
1040 sizeof(head
.fmh_keys
[0].fmr_reserved
)) ||
1041 memchr_inv(head
.fmh_keys
[1].fmr_reserved
, 0,
1042 sizeof(head
.fmh_keys
[1].fmr_reserved
)))
1046 * Use an internal memory buffer so that we don't have to copy fsmap
1047 * data to userspace while holding locks. Start by trying to allocate
1048 * up to 128k for the buffer, but fall back to a single page if needed.
1050 count
= min_t(unsigned int, head
.fmh_count
,
1051 131072 / sizeof(struct fsmap
));
1052 recs
= kvcalloc(count
, sizeof(struct fsmap
), GFP_KERNEL
);
1054 count
= min_t(unsigned int, head
.fmh_count
,
1055 PAGE_SIZE
/ sizeof(struct fsmap
));
1056 recs
= kvcalloc(count
, sizeof(struct fsmap
), GFP_KERNEL
);
1061 xhead
.fmh_iflags
= head
.fmh_iflags
;
1062 xfs_fsmap_to_internal(&xhead
.fmh_keys
[0], &head
.fmh_keys
[0]);
1063 xfs_fsmap_to_internal(&xhead
.fmh_keys
[1], &head
.fmh_keys
[1]);
1065 trace_xfs_getfsmap_low_key(ip
->i_mount
, &xhead
.fmh_keys
[0]);
1066 trace_xfs_getfsmap_high_key(ip
->i_mount
, &xhead
.fmh_keys
[1]);
1068 head
.fmh_entries
= 0;
1070 struct fsmap __user
*user_recs
;
1071 struct fsmap
*last_rec
;
1073 user_recs
= &arg
->fmh_recs
[head
.fmh_entries
];
1074 xhead
.fmh_entries
= 0;
1075 xhead
.fmh_count
= min_t(unsigned int, count
,
1076 head
.fmh_count
- head
.fmh_entries
);
1078 /* Run query, record how many entries we got. */
1079 error
= xfs_getfsmap(ip
->i_mount
, &xhead
, recs
);
1083 * There are no more records in the result set. Copy
1084 * whatever we got to userspace and break out.
1090 * The internal memory buffer is full. Copy whatever
1091 * records we got to userspace and go again if we have
1092 * not yet filled the userspace buffer.
1099 head
.fmh_entries
+= xhead
.fmh_entries
;
1100 head
.fmh_oflags
= xhead
.fmh_oflags
;
1103 * If the caller wanted a record count or there aren't any
1104 * new records to return, we're done.
1106 if (head
.fmh_count
== 0 || xhead
.fmh_entries
== 0)
1109 /* Copy all the records we got out to userspace. */
1110 if (copy_to_user(user_recs
, recs
,
1111 xhead
.fmh_entries
* sizeof(struct fsmap
))) {
1116 /* Remember the last record flags we copied to userspace. */
1117 last_rec
= &recs
[xhead
.fmh_entries
- 1];
1118 last_flags
= last_rec
->fmr_flags
;
1120 /* Set up the low key for the next iteration. */
1121 xfs_fsmap_to_internal(&xhead
.fmh_keys
[0], last_rec
);
1122 trace_xfs_getfsmap_low_key(ip
->i_mount
, &xhead
.fmh_keys
[0]);
1123 } while (!done
&& head
.fmh_entries
< head
.fmh_count
);
1126 * If there are no more records in the query result set and we're not
1127 * in counting mode, mark the last record returned with the LAST flag.
1129 if (done
&& head
.fmh_count
> 0 && head
.fmh_entries
> 0) {
1130 struct fsmap __user
*user_rec
;
1132 last_flags
|= FMR_OF_LAST
;
1133 user_rec
= &arg
->fmh_recs
[head
.fmh_entries
- 1];
1135 if (copy_to_user(&user_rec
->fmr_flags
, &last_flags
,
1136 sizeof(last_flags
))) {
1142 /* copy back header */
1143 if (copy_to_user(arg
, &head
, sizeof(struct fsmap_head
))) {