drm/tests: Add test for drm_atomic_helper_check_modeset()
[drm/drm-misc.git] / fs / xfs / xfs_fsmap.c
blob82f2e0dd224997e26b52311383b0826acdad0c98
1 // SPDX-License-Identifier: GPL-2.0+
2 /*
3 * Copyright (C) 2017 Oracle. All Rights Reserved.
4 * Author: Darrick J. Wong <darrick.wong@oracle.com>
5 */
6 #include "xfs.h"
7 #include "xfs_fs.h"
8 #include "xfs_shared.h"
9 #include "xfs_format.h"
10 #include "xfs_log_format.h"
11 #include "xfs_trans_resv.h"
12 #include "xfs_mount.h"
13 #include "xfs_inode.h"
14 #include "xfs_trans.h"
15 #include "xfs_btree.h"
16 #include "xfs_rmap_btree.h"
17 #include "xfs_trace.h"
18 #include "xfs_rmap.h"
19 #include "xfs_alloc.h"
20 #include "xfs_bit.h"
21 #include <linux/fsmap.h>
22 #include "xfs_fsmap.h"
23 #include "xfs_refcount.h"
24 #include "xfs_refcount_btree.h"
25 #include "xfs_alloc_btree.h"
26 #include "xfs_rtbitmap.h"
27 #include "xfs_ag.h"
28 #include "xfs_rtgroup.h"
30 /* Convert an xfs_fsmap to an fsmap. */
31 static void
32 xfs_fsmap_from_internal(
33 struct fsmap *dest,
34 struct xfs_fsmap *src)
36 dest->fmr_device = src->fmr_device;
37 dest->fmr_flags = src->fmr_flags;
38 dest->fmr_physical = BBTOB(src->fmr_physical);
39 dest->fmr_owner = src->fmr_owner;
40 dest->fmr_offset = BBTOB(src->fmr_offset);
41 dest->fmr_length = BBTOB(src->fmr_length);
42 dest->fmr_reserved[0] = 0;
43 dest->fmr_reserved[1] = 0;
44 dest->fmr_reserved[2] = 0;
47 /* Convert an fsmap to an xfs_fsmap. */
48 static void
49 xfs_fsmap_to_internal(
50 struct xfs_fsmap *dest,
51 struct fsmap *src)
53 dest->fmr_device = src->fmr_device;
54 dest->fmr_flags = src->fmr_flags;
55 dest->fmr_physical = BTOBBT(src->fmr_physical);
56 dest->fmr_owner = src->fmr_owner;
57 dest->fmr_offset = BTOBBT(src->fmr_offset);
58 dest->fmr_length = BTOBBT(src->fmr_length);
61 /* Convert an fsmap owner into an rmapbt owner. */
62 static int
63 xfs_fsmap_owner_to_rmap(
64 struct xfs_rmap_irec *dest,
65 const struct xfs_fsmap *src)
67 if (!(src->fmr_flags & FMR_OF_SPECIAL_OWNER)) {
68 dest->rm_owner = src->fmr_owner;
69 return 0;
72 switch (src->fmr_owner) {
73 case 0: /* "lowest owner id possible" */
74 case -1ULL: /* "highest owner id possible" */
75 dest->rm_owner = src->fmr_owner;
76 break;
77 case XFS_FMR_OWN_FREE:
78 dest->rm_owner = XFS_RMAP_OWN_NULL;
79 break;
80 case XFS_FMR_OWN_UNKNOWN:
81 dest->rm_owner = XFS_RMAP_OWN_UNKNOWN;
82 break;
83 case XFS_FMR_OWN_FS:
84 dest->rm_owner = XFS_RMAP_OWN_FS;
85 break;
86 case XFS_FMR_OWN_LOG:
87 dest->rm_owner = XFS_RMAP_OWN_LOG;
88 break;
89 case XFS_FMR_OWN_AG:
90 dest->rm_owner = XFS_RMAP_OWN_AG;
91 break;
92 case XFS_FMR_OWN_INOBT:
93 dest->rm_owner = XFS_RMAP_OWN_INOBT;
94 break;
95 case XFS_FMR_OWN_INODES:
96 dest->rm_owner = XFS_RMAP_OWN_INODES;
97 break;
98 case XFS_FMR_OWN_REFC:
99 dest->rm_owner = XFS_RMAP_OWN_REFC;
100 break;
101 case XFS_FMR_OWN_COW:
102 dest->rm_owner = XFS_RMAP_OWN_COW;
103 break;
104 case XFS_FMR_OWN_DEFECTIVE: /* not implemented */
105 /* fall through */
106 default:
107 return -EINVAL;
109 return 0;
112 /* Convert an rmapbt owner into an fsmap owner. */
113 static int
114 xfs_fsmap_owner_from_frec(
115 struct xfs_fsmap *dest,
116 const struct xfs_fsmap_irec *frec)
118 dest->fmr_flags = 0;
119 if (!XFS_RMAP_NON_INODE_OWNER(frec->owner)) {
120 dest->fmr_owner = frec->owner;
121 return 0;
123 dest->fmr_flags |= FMR_OF_SPECIAL_OWNER;
125 switch (frec->owner) {
126 case XFS_RMAP_OWN_FS:
127 dest->fmr_owner = XFS_FMR_OWN_FS;
128 break;
129 case XFS_RMAP_OWN_LOG:
130 dest->fmr_owner = XFS_FMR_OWN_LOG;
131 break;
132 case XFS_RMAP_OWN_AG:
133 dest->fmr_owner = XFS_FMR_OWN_AG;
134 break;
135 case XFS_RMAP_OWN_INOBT:
136 dest->fmr_owner = XFS_FMR_OWN_INOBT;
137 break;
138 case XFS_RMAP_OWN_INODES:
139 dest->fmr_owner = XFS_FMR_OWN_INODES;
140 break;
141 case XFS_RMAP_OWN_REFC:
142 dest->fmr_owner = XFS_FMR_OWN_REFC;
143 break;
144 case XFS_RMAP_OWN_COW:
145 dest->fmr_owner = XFS_FMR_OWN_COW;
146 break;
147 case XFS_RMAP_OWN_NULL: /* "free" */
148 dest->fmr_owner = XFS_FMR_OWN_FREE;
149 break;
150 default:
151 ASSERT(0);
152 return -EFSCORRUPTED;
154 return 0;
157 /* getfsmap query state */
158 struct xfs_getfsmap_info {
159 struct xfs_fsmap_head *head;
160 struct fsmap *fsmap_recs; /* mapping records */
161 struct xfs_buf *agf_bp; /* AGF, for refcount queries */
162 struct xfs_group *group; /* group info, if applicable */
163 xfs_daddr_t next_daddr; /* next daddr we expect */
164 /* daddr of low fsmap key when we're using the rtbitmap */
165 xfs_daddr_t low_daddr;
166 xfs_daddr_t end_daddr; /* daddr of high fsmap key */
167 u64 missing_owner; /* owner of holes */
168 u32 dev; /* device id */
170 * Low rmap key for the query. If low.rm_blockcount is nonzero, this
171 * is the second (or later) call to retrieve the recordset in pieces.
172 * xfs_getfsmap_rec_before_start will compare all records retrieved
173 * by the rmapbt query to filter out any records that start before
174 * the last record.
176 struct xfs_rmap_irec low;
177 struct xfs_rmap_irec high; /* high rmap key */
178 bool last; /* last extent? */
181 /* Associate a device with a getfsmap handler. */
182 struct xfs_getfsmap_dev {
183 u32 dev;
184 int (*fn)(struct xfs_trans *tp,
185 const struct xfs_fsmap *keys,
186 struct xfs_getfsmap_info *info);
187 sector_t nr_sectors;
190 /* Compare two getfsmap device handlers. */
191 static int
192 xfs_getfsmap_dev_compare(
193 const void *p1,
194 const void *p2)
196 const struct xfs_getfsmap_dev *d1 = p1;
197 const struct xfs_getfsmap_dev *d2 = p2;
199 return d1->dev - d2->dev;
202 /* Decide if this mapping is shared. */
203 STATIC int
204 xfs_getfsmap_is_shared(
205 struct xfs_trans *tp,
206 struct xfs_getfsmap_info *info,
207 const struct xfs_fsmap_irec *frec,
208 bool *stat)
210 struct xfs_mount *mp = tp->t_mountp;
211 struct xfs_btree_cur *cur;
212 xfs_agblock_t fbno;
213 xfs_extlen_t flen;
214 int error;
216 *stat = false;
217 if (!xfs_has_reflink(mp))
218 return 0;
219 /* rt files will have no perag structure */
220 if (!info->group)
221 return 0;
223 /* Are there any shared blocks here? */
224 flen = 0;
225 cur = xfs_refcountbt_init_cursor(mp, tp, info->agf_bp,
226 to_perag(info->group));
228 error = xfs_refcount_find_shared(cur, frec->rec_key,
229 XFS_BB_TO_FSBT(mp, frec->len_daddr), &fbno, &flen,
230 false);
232 xfs_btree_del_cursor(cur, error);
233 if (error)
234 return error;
236 *stat = flen > 0;
237 return 0;
240 static inline void
241 xfs_getfsmap_format(
242 struct xfs_mount *mp,
243 struct xfs_fsmap *xfm,
244 struct xfs_getfsmap_info *info)
246 struct fsmap *rec;
248 trace_xfs_getfsmap_mapping(mp, xfm);
250 rec = &info->fsmap_recs[info->head->fmh_entries++];
251 xfs_fsmap_from_internal(rec, xfm);
254 static inline bool
255 xfs_getfsmap_frec_before_start(
256 struct xfs_getfsmap_info *info,
257 const struct xfs_fsmap_irec *frec)
259 if (info->low_daddr != XFS_BUF_DADDR_NULL)
260 return frec->start_daddr < info->low_daddr;
261 if (info->low.rm_blockcount) {
262 struct xfs_rmap_irec rec = {
263 .rm_startblock = frec->rec_key,
264 .rm_owner = frec->owner,
265 .rm_flags = frec->rm_flags,
268 return xfs_rmap_compare(&rec, &info->low) < 0;
271 return false;
275 * Format a reverse mapping for getfsmap, having translated rm_startblock
276 * into the appropriate daddr units. Pass in a nonzero @len_daddr if the
277 * length could be larger than rm_blockcount in struct xfs_rmap_irec.
279 STATIC int
280 xfs_getfsmap_helper(
281 struct xfs_trans *tp,
282 struct xfs_getfsmap_info *info,
283 const struct xfs_fsmap_irec *frec)
285 struct xfs_fsmap fmr;
286 struct xfs_mount *mp = tp->t_mountp;
287 bool shared;
288 int error = 0;
290 if (fatal_signal_pending(current))
291 return -EINTR;
294 * Filter out records that start before our startpoint, if the
295 * caller requested that.
297 if (xfs_getfsmap_frec_before_start(info, frec))
298 goto out;
300 /* Are we just counting mappings? */
301 if (info->head->fmh_count == 0) {
302 if (info->head->fmh_entries == UINT_MAX)
303 return -ECANCELED;
305 if (frec->start_daddr > info->next_daddr)
306 info->head->fmh_entries++;
308 if (info->last)
309 return 0;
311 info->head->fmh_entries++;
312 goto out;
316 * If the record starts past the last physical block we saw,
317 * then we've found a gap. Report the gap as being owned by
318 * whatever the caller specified is the missing owner.
320 if (frec->start_daddr > info->next_daddr) {
321 if (info->head->fmh_entries >= info->head->fmh_count)
322 return -ECANCELED;
324 fmr.fmr_device = info->dev;
325 fmr.fmr_physical = info->next_daddr;
326 fmr.fmr_owner = info->missing_owner;
327 fmr.fmr_offset = 0;
328 fmr.fmr_length = frec->start_daddr - info->next_daddr;
329 fmr.fmr_flags = FMR_OF_SPECIAL_OWNER;
330 xfs_getfsmap_format(mp, &fmr, info);
333 if (info->last)
334 goto out;
336 /* Fill out the extent we found */
337 if (info->head->fmh_entries >= info->head->fmh_count)
338 return -ECANCELED;
340 trace_xfs_fsmap_mapping(mp, info->dev,
341 info->group ? info->group->xg_gno : NULLAGNUMBER,
342 frec);
344 fmr.fmr_device = info->dev;
345 fmr.fmr_physical = frec->start_daddr;
346 error = xfs_fsmap_owner_from_frec(&fmr, frec);
347 if (error)
348 return error;
349 fmr.fmr_offset = XFS_FSB_TO_BB(mp, frec->offset);
350 fmr.fmr_length = frec->len_daddr;
351 if (frec->rm_flags & XFS_RMAP_UNWRITTEN)
352 fmr.fmr_flags |= FMR_OF_PREALLOC;
353 if (frec->rm_flags & XFS_RMAP_ATTR_FORK)
354 fmr.fmr_flags |= FMR_OF_ATTR_FORK;
355 if (frec->rm_flags & XFS_RMAP_BMBT_BLOCK)
356 fmr.fmr_flags |= FMR_OF_EXTENT_MAP;
357 if (fmr.fmr_flags == 0) {
358 error = xfs_getfsmap_is_shared(tp, info, frec, &shared);
359 if (error)
360 return error;
361 if (shared)
362 fmr.fmr_flags |= FMR_OF_SHARED;
365 xfs_getfsmap_format(mp, &fmr, info);
366 out:
367 info->next_daddr = max(info->next_daddr,
368 frec->start_daddr + frec->len_daddr);
369 return 0;
372 static inline int
373 xfs_getfsmap_group_helper(
374 struct xfs_getfsmap_info *info,
375 struct xfs_trans *tp,
376 struct xfs_group *xg,
377 xfs_agblock_t startblock,
378 xfs_extlen_t blockcount,
379 struct xfs_fsmap_irec *frec)
382 * For an info->last query, we're looking for a gap between the last
383 * mapping emitted and the high key specified by userspace. If the
384 * user's query spans less than 1 fsblock, then info->high and
385 * info->low will have the same rm_startblock, which causes rec_daddr
386 * and next_daddr to be the same. Therefore, use the end_daddr that
387 * we calculated from userspace's high key to synthesize the record.
388 * Note that if the btree query found a mapping, there won't be a gap.
390 if (info->last && info->end_daddr != XFS_BUF_DADDR_NULL)
391 frec->start_daddr = info->end_daddr;
392 else
393 frec->start_daddr = xfs_gbno_to_daddr(xg, startblock);
395 frec->len_daddr = XFS_FSB_TO_BB(xg->xg_mount, blockcount);
396 return xfs_getfsmap_helper(tp, info, frec);
399 /* Transform a rmapbt irec into a fsmap */
400 STATIC int
401 xfs_getfsmap_rmapbt_helper(
402 struct xfs_btree_cur *cur,
403 const struct xfs_rmap_irec *rec,
404 void *priv)
406 struct xfs_fsmap_irec frec = {
407 .owner = rec->rm_owner,
408 .offset = rec->rm_offset,
409 .rm_flags = rec->rm_flags,
410 .rec_key = rec->rm_startblock,
412 struct xfs_getfsmap_info *info = priv;
414 return xfs_getfsmap_group_helper(info, cur->bc_tp, cur->bc_group,
415 rec->rm_startblock, rec->rm_blockcount, &frec);
418 /* Transform a bnobt irec into a fsmap */
419 STATIC int
420 xfs_getfsmap_datadev_bnobt_helper(
421 struct xfs_btree_cur *cur,
422 const struct xfs_alloc_rec_incore *rec,
423 void *priv)
425 struct xfs_fsmap_irec frec = {
426 .owner = XFS_RMAP_OWN_NULL, /* "free" */
427 .rec_key = rec->ar_startblock,
429 struct xfs_getfsmap_info *info = priv;
431 return xfs_getfsmap_group_helper(info, cur->bc_tp, cur->bc_group,
432 rec->ar_startblock, rec->ar_blockcount, &frec);
435 /* Set rmap flags based on the getfsmap flags */
436 static void
437 xfs_getfsmap_set_irec_flags(
438 struct xfs_rmap_irec *irec,
439 const struct xfs_fsmap *fmr)
441 irec->rm_flags = 0;
442 if (fmr->fmr_flags & FMR_OF_ATTR_FORK)
443 irec->rm_flags |= XFS_RMAP_ATTR_FORK;
444 if (fmr->fmr_flags & FMR_OF_EXTENT_MAP)
445 irec->rm_flags |= XFS_RMAP_BMBT_BLOCK;
446 if (fmr->fmr_flags & FMR_OF_PREALLOC)
447 irec->rm_flags |= XFS_RMAP_UNWRITTEN;
450 static inline bool
451 rmap_not_shareable(struct xfs_mount *mp, const struct xfs_rmap_irec *r)
453 if (!xfs_has_reflink(mp))
454 return true;
455 if (XFS_RMAP_NON_INODE_OWNER(r->rm_owner))
456 return true;
457 if (r->rm_flags & (XFS_RMAP_ATTR_FORK | XFS_RMAP_BMBT_BLOCK |
458 XFS_RMAP_UNWRITTEN))
459 return true;
460 return false;
463 /* Execute a getfsmap query against the regular data device. */
464 STATIC int
465 __xfs_getfsmap_datadev(
466 struct xfs_trans *tp,
467 const struct xfs_fsmap *keys,
468 struct xfs_getfsmap_info *info,
469 int (*query_fn)(struct xfs_trans *,
470 struct xfs_getfsmap_info *,
471 struct xfs_btree_cur **,
472 void *),
473 void *priv)
475 struct xfs_mount *mp = tp->t_mountp;
476 struct xfs_perag *pag = NULL;
477 struct xfs_btree_cur *bt_cur = NULL;
478 xfs_fsblock_t start_fsb;
479 xfs_fsblock_t end_fsb;
480 xfs_agnumber_t start_ag, end_ag;
481 uint64_t eofs;
482 int error = 0;
484 eofs = XFS_FSB_TO_BB(mp, mp->m_sb.sb_dblocks);
485 if (keys[0].fmr_physical >= eofs)
486 return 0;
487 start_fsb = XFS_DADDR_TO_FSB(mp, keys[0].fmr_physical);
488 end_fsb = XFS_DADDR_TO_FSB(mp, min(eofs - 1, keys[1].fmr_physical));
491 * Convert the fsmap low/high keys to AG based keys. Initialize
492 * low to the fsmap low key and max out the high key to the end
493 * of the AG.
495 info->low.rm_offset = XFS_BB_TO_FSBT(mp, keys[0].fmr_offset);
496 error = xfs_fsmap_owner_to_rmap(&info->low, &keys[0]);
497 if (error)
498 return error;
499 info->low.rm_blockcount = XFS_BB_TO_FSBT(mp, keys[0].fmr_length);
500 xfs_getfsmap_set_irec_flags(&info->low, &keys[0]);
502 /* Adjust the low key if we are continuing from where we left off. */
503 if (info->low.rm_blockcount == 0) {
504 /* No previous record from which to continue */
505 } else if (rmap_not_shareable(mp, &info->low)) {
506 /* Last record seen was an unshareable extent */
507 info->low.rm_owner = 0;
508 info->low.rm_offset = 0;
510 start_fsb += info->low.rm_blockcount;
511 if (XFS_FSB_TO_DADDR(mp, start_fsb) >= eofs)
512 return 0;
513 } else {
514 /* Last record seen was a shareable file data extent */
515 info->low.rm_offset += info->low.rm_blockcount;
517 info->low.rm_startblock = XFS_FSB_TO_AGBNO(mp, start_fsb);
519 info->high.rm_startblock = -1U;
520 info->high.rm_owner = ULLONG_MAX;
521 info->high.rm_offset = ULLONG_MAX;
522 info->high.rm_blockcount = 0;
523 info->high.rm_flags = XFS_RMAP_KEY_FLAGS | XFS_RMAP_REC_FLAGS;
525 start_ag = XFS_FSB_TO_AGNO(mp, start_fsb);
526 end_ag = XFS_FSB_TO_AGNO(mp, end_fsb);
528 while ((pag = xfs_perag_next_range(mp, pag, start_ag, end_ag))) {
530 * Set the AG high key from the fsmap high key if this
531 * is the last AG that we're querying.
533 info->group = pag_group(pag);
534 if (pag_agno(pag) == end_ag) {
535 info->high.rm_startblock = XFS_FSB_TO_AGBNO(mp,
536 end_fsb);
537 info->high.rm_offset = XFS_BB_TO_FSBT(mp,
538 keys[1].fmr_offset);
539 error = xfs_fsmap_owner_to_rmap(&info->high, &keys[1]);
540 if (error)
541 break;
542 xfs_getfsmap_set_irec_flags(&info->high, &keys[1]);
545 if (bt_cur) {
546 xfs_btree_del_cursor(bt_cur, XFS_BTREE_NOERROR);
547 bt_cur = NULL;
548 xfs_trans_brelse(tp, info->agf_bp);
549 info->agf_bp = NULL;
552 error = xfs_alloc_read_agf(pag, tp, 0, &info->agf_bp);
553 if (error)
554 break;
556 trace_xfs_fsmap_low_group_key(mp, info->dev, pag_agno(pag),
557 &info->low);
558 trace_xfs_fsmap_high_group_key(mp, info->dev, pag_agno(pag),
559 &info->high);
561 error = query_fn(tp, info, &bt_cur, priv);
562 if (error)
563 break;
566 * Set the AG low key to the start of the AG prior to
567 * moving on to the next AG.
569 if (pag_agno(pag) == start_ag)
570 memset(&info->low, 0, sizeof(info->low));
573 * If this is the last AG, report any gap at the end of it
574 * before we drop the reference to the perag when the loop
575 * terminates.
577 if (pag_agno(pag) == end_ag) {
578 info->last = true;
579 error = query_fn(tp, info, &bt_cur, priv);
580 if (error)
581 break;
583 info->group = NULL;
586 if (bt_cur)
587 xfs_btree_del_cursor(bt_cur, error < 0 ? XFS_BTREE_ERROR :
588 XFS_BTREE_NOERROR);
589 if (info->agf_bp) {
590 xfs_trans_brelse(tp, info->agf_bp);
591 info->agf_bp = NULL;
593 if (info->group) {
594 xfs_perag_rele(pag);
595 info->group = NULL;
596 } else if (pag) {
597 /* loop termination case */
598 xfs_perag_rele(pag);
601 return error;
604 /* Actually query the rmap btree. */
605 STATIC int
606 xfs_getfsmap_datadev_rmapbt_query(
607 struct xfs_trans *tp,
608 struct xfs_getfsmap_info *info,
609 struct xfs_btree_cur **curpp,
610 void *priv)
612 /* Report any gap at the end of the last AG. */
613 if (info->last)
614 return xfs_getfsmap_rmapbt_helper(*curpp, &info->high, info);
616 /* Allocate cursor for this AG and query_range it. */
617 *curpp = xfs_rmapbt_init_cursor(tp->t_mountp, tp, info->agf_bp,
618 to_perag(info->group));
619 return xfs_rmap_query_range(*curpp, &info->low, &info->high,
620 xfs_getfsmap_rmapbt_helper, info);
623 /* Execute a getfsmap query against the regular data device rmapbt. */
624 STATIC int
625 xfs_getfsmap_datadev_rmapbt(
626 struct xfs_trans *tp,
627 const struct xfs_fsmap *keys,
628 struct xfs_getfsmap_info *info)
630 info->missing_owner = XFS_FMR_OWN_FREE;
631 return __xfs_getfsmap_datadev(tp, keys, info,
632 xfs_getfsmap_datadev_rmapbt_query, NULL);
635 /* Actually query the bno btree. */
636 STATIC int
637 xfs_getfsmap_datadev_bnobt_query(
638 struct xfs_trans *tp,
639 struct xfs_getfsmap_info *info,
640 struct xfs_btree_cur **curpp,
641 void *priv)
643 struct xfs_alloc_rec_incore *key = priv;
645 /* Report any gap at the end of the last AG. */
646 if (info->last)
647 return xfs_getfsmap_datadev_bnobt_helper(*curpp, &key[1], info);
649 /* Allocate cursor for this AG and query_range it. */
650 *curpp = xfs_bnobt_init_cursor(tp->t_mountp, tp, info->agf_bp,
651 to_perag(info->group));
652 key->ar_startblock = info->low.rm_startblock;
653 key[1].ar_startblock = info->high.rm_startblock;
654 return xfs_alloc_query_range(*curpp, key, &key[1],
655 xfs_getfsmap_datadev_bnobt_helper, info);
658 /* Execute a getfsmap query against the regular data device's bnobt. */
659 STATIC int
660 xfs_getfsmap_datadev_bnobt(
661 struct xfs_trans *tp,
662 const struct xfs_fsmap *keys,
663 struct xfs_getfsmap_info *info)
665 struct xfs_alloc_rec_incore akeys[2];
667 memset(akeys, 0, sizeof(akeys));
668 info->missing_owner = XFS_FMR_OWN_UNKNOWN;
669 return __xfs_getfsmap_datadev(tp, keys, info,
670 xfs_getfsmap_datadev_bnobt_query, &akeys[0]);
673 /* Execute a getfsmap query against the log device. */
674 STATIC int
675 xfs_getfsmap_logdev(
676 struct xfs_trans *tp,
677 const struct xfs_fsmap *keys,
678 struct xfs_getfsmap_info *info)
680 struct xfs_fsmap_irec frec = {
681 .start_daddr = 0,
682 .rec_key = 0,
683 .owner = XFS_RMAP_OWN_LOG,
685 struct xfs_mount *mp = tp->t_mountp;
686 xfs_fsblock_t start_fsb, end_fsb;
687 uint64_t eofs;
689 eofs = XFS_FSB_TO_BB(mp, mp->m_sb.sb_logblocks);
690 if (keys[0].fmr_physical >= eofs)
691 return 0;
692 start_fsb = XFS_BB_TO_FSBT(mp,
693 keys[0].fmr_physical + keys[0].fmr_length);
694 end_fsb = XFS_BB_TO_FSB(mp, min(eofs - 1, keys[1].fmr_physical));
696 /* Adjust the low key if we are continuing from where we left off. */
697 if (keys[0].fmr_length > 0)
698 info->low_daddr = XFS_FSB_TO_BB(mp, start_fsb);
700 trace_xfs_fsmap_low_linear_key(mp, info->dev, start_fsb);
701 trace_xfs_fsmap_high_linear_key(mp, info->dev, end_fsb);
703 if (start_fsb > 0)
704 return 0;
706 /* Fabricate an rmap entry for the external log device. */
707 frec.len_daddr = XFS_FSB_TO_BB(mp, mp->m_sb.sb_logblocks);
708 return xfs_getfsmap_helper(tp, info, &frec);
711 #ifdef CONFIG_XFS_RT
712 /* Transform a rtbitmap "record" into a fsmap */
713 STATIC int
714 xfs_getfsmap_rtdev_rtbitmap_helper(
715 struct xfs_rtgroup *rtg,
716 struct xfs_trans *tp,
717 const struct xfs_rtalloc_rec *rec,
718 void *priv)
720 struct xfs_fsmap_irec frec = {
721 .owner = XFS_RMAP_OWN_NULL, /* "free" */
723 struct xfs_mount *mp = rtg_mount(rtg);
724 struct xfs_getfsmap_info *info = priv;
725 xfs_rtblock_t start_rtb =
726 xfs_rtx_to_rtb(rtg, rec->ar_startext);
727 uint64_t rtbcount =
728 xfs_rtbxlen_to_blen(mp, rec->ar_extcount);
731 * For an info->last query, we're looking for a gap between the last
732 * mapping emitted and the high key specified by userspace. If the
733 * user's query spans less than 1 fsblock, then info->high and
734 * info->low will have the same rm_startblock, which causes rec_daddr
735 * and next_daddr to be the same. Therefore, use the end_daddr that
736 * we calculated from userspace's high key to synthesize the record.
737 * Note that if the btree query found a mapping, there won't be a gap.
739 if (info->last && info->end_daddr != XFS_BUF_DADDR_NULL) {
740 frec.start_daddr = info->end_daddr;
741 } else {
742 frec.start_daddr = xfs_rtb_to_daddr(mp, start_rtb);
745 frec.len_daddr = XFS_FSB_TO_BB(mp, rtbcount);
746 return xfs_getfsmap_helper(tp, info, &frec);
749 /* Execute a getfsmap query against the realtime device rtbitmap. */
750 STATIC int
751 xfs_getfsmap_rtdev_rtbitmap(
752 struct xfs_trans *tp,
753 const struct xfs_fsmap *keys,
754 struct xfs_getfsmap_info *info)
756 struct xfs_mount *mp = tp->t_mountp;
757 xfs_rtblock_t start_rtbno, end_rtbno;
758 xfs_rtxnum_t start_rtx, end_rtx;
759 xfs_rgnumber_t start_rgno, end_rgno;
760 struct xfs_rtgroup *rtg = NULL;
761 uint64_t eofs;
762 int error;
764 eofs = XFS_FSB_TO_BB(mp, mp->m_sb.sb_rblocks);
765 if (keys[0].fmr_physical >= eofs)
766 return 0;
768 info->missing_owner = XFS_FMR_OWN_UNKNOWN;
770 /* Adjust the low key if we are continuing from where we left off. */
771 start_rtbno = xfs_daddr_to_rtb(mp,
772 keys[0].fmr_physical + keys[0].fmr_length);
773 if (keys[0].fmr_length > 0) {
774 info->low_daddr = xfs_rtb_to_daddr(mp, start_rtbno);
775 if (info->low_daddr >= eofs)
776 return 0;
778 start_rtx = xfs_rtb_to_rtx(mp, start_rtbno);
779 start_rgno = xfs_rtb_to_rgno(mp, start_rtbno);
781 end_rtbno = xfs_daddr_to_rtb(mp, min(eofs - 1, keys[1].fmr_physical));
782 end_rgno = xfs_rtb_to_rgno(mp, end_rtbno);
784 trace_xfs_fsmap_low_linear_key(mp, info->dev, start_rtbno);
785 trace_xfs_fsmap_high_linear_key(mp, info->dev, end_rtbno);
787 end_rtx = -1ULL;
789 while ((rtg = xfs_rtgroup_next_range(mp, rtg, start_rgno, end_rgno))) {
790 if (rtg_rgno(rtg) == end_rgno)
791 end_rtx = xfs_rtb_to_rtx(mp,
792 end_rtbno + mp->m_sb.sb_rextsize - 1);
794 info->group = rtg_group(rtg);
795 xfs_rtgroup_lock(rtg, XFS_RTGLOCK_BITMAP_SHARED);
796 error = xfs_rtalloc_query_range(rtg, tp, start_rtx, end_rtx,
797 xfs_getfsmap_rtdev_rtbitmap_helper, info);
798 if (error)
799 break;
802 * Report any gaps at the end of the rtbitmap by simulating a
803 * zero-length free extent starting at the rtx after the end
804 * of the query range.
806 if (rtg_rgno(rtg) == end_rgno) {
807 struct xfs_rtalloc_rec ahigh = {
808 .ar_startext = min(end_rtx + 1,
809 rtg->rtg_extents),
812 info->last = true;
813 error = xfs_getfsmap_rtdev_rtbitmap_helper(rtg, tp,
814 &ahigh, info);
815 if (error)
816 break;
819 xfs_rtgroup_unlock(rtg, XFS_RTGLOCK_BITMAP_SHARED);
820 info->group = NULL;
821 start_rtx = 0;
824 /* loop termination case */
825 if (rtg) {
826 if (info->group) {
827 xfs_rtgroup_unlock(rtg, XFS_RTGLOCK_BITMAP_SHARED);
828 info->group = NULL;
830 xfs_rtgroup_rele(rtg);
833 return error;
835 #endif /* CONFIG_XFS_RT */
837 /* Do we recognize the device? */
838 STATIC bool
839 xfs_getfsmap_is_valid_device(
840 struct xfs_mount *mp,
841 struct xfs_fsmap *fm)
843 if (fm->fmr_device == 0 || fm->fmr_device == UINT_MAX ||
844 fm->fmr_device == new_encode_dev(mp->m_ddev_targp->bt_dev))
845 return true;
846 if (mp->m_logdev_targp &&
847 fm->fmr_device == new_encode_dev(mp->m_logdev_targp->bt_dev))
848 return true;
849 if (mp->m_rtdev_targp &&
850 fm->fmr_device == new_encode_dev(mp->m_rtdev_targp->bt_dev))
851 return true;
852 return false;
855 /* Ensure that the low key is less than the high key. */
856 STATIC bool
857 xfs_getfsmap_check_keys(
858 struct xfs_fsmap *low_key,
859 struct xfs_fsmap *high_key)
861 if (low_key->fmr_flags & (FMR_OF_SPECIAL_OWNER | FMR_OF_EXTENT_MAP)) {
862 if (low_key->fmr_offset)
863 return false;
865 if (high_key->fmr_flags != -1U &&
866 (high_key->fmr_flags & (FMR_OF_SPECIAL_OWNER |
867 FMR_OF_EXTENT_MAP))) {
868 if (high_key->fmr_offset && high_key->fmr_offset != -1ULL)
869 return false;
871 if (high_key->fmr_length && high_key->fmr_length != -1ULL)
872 return false;
874 if (low_key->fmr_device > high_key->fmr_device)
875 return false;
876 if (low_key->fmr_device < high_key->fmr_device)
877 return true;
879 if (low_key->fmr_physical > high_key->fmr_physical)
880 return false;
881 if (low_key->fmr_physical < high_key->fmr_physical)
882 return true;
884 if (low_key->fmr_owner > high_key->fmr_owner)
885 return false;
886 if (low_key->fmr_owner < high_key->fmr_owner)
887 return true;
889 if (low_key->fmr_offset > high_key->fmr_offset)
890 return false;
891 if (low_key->fmr_offset < high_key->fmr_offset)
892 return true;
894 return false;
898 * There are only two devices if we didn't configure RT devices at build time.
900 #ifdef CONFIG_XFS_RT
901 #define XFS_GETFSMAP_DEVS 3
902 #else
903 #define XFS_GETFSMAP_DEVS 2
904 #endif /* CONFIG_XFS_RT */
907 * Get filesystem's extents as described in head, and format for output. Fills
908 * in the supplied records array until there are no more reverse mappings to
909 * return or head.fmh_entries == head.fmh_count. In the second case, this
910 * function returns -ECANCELED to indicate that more records would have been
911 * returned.
913 * Key to Confusion
914 * ----------------
915 * There are multiple levels of keys and counters at work here:
916 * xfs_fsmap_head.fmh_keys -- low and high fsmap keys passed in;
917 * these reflect fs-wide sector addrs.
918 * dkeys -- fmh_keys used to query each device;
919 * these are fmh_keys but w/ the low key
920 * bumped up by fmr_length.
921 * xfs_getfsmap_info.next_daddr -- next disk addr we expect to see; this
922 * is how we detect gaps in the fsmap
923 records and report them.
924 * xfs_getfsmap_info.low/high -- per-AG low/high keys computed from
925 * dkeys; used to query the metadata.
927 STATIC int
928 xfs_getfsmap(
929 struct xfs_mount *mp,
930 struct xfs_fsmap_head *head,
931 struct fsmap *fsmap_recs)
933 struct xfs_trans *tp = NULL;
934 struct xfs_fsmap dkeys[2]; /* per-dev keys */
935 struct xfs_getfsmap_dev handlers[XFS_GETFSMAP_DEVS];
936 struct xfs_getfsmap_info info = { NULL };
937 bool use_rmap;
938 int i;
939 int error = 0;
941 if (head->fmh_iflags & ~FMH_IF_VALID)
942 return -EINVAL;
943 if (!xfs_getfsmap_is_valid_device(mp, &head->fmh_keys[0]) ||
944 !xfs_getfsmap_is_valid_device(mp, &head->fmh_keys[1]))
945 return -EINVAL;
946 if (!xfs_getfsmap_check_keys(&head->fmh_keys[0], &head->fmh_keys[1]))
947 return -EINVAL;
949 use_rmap = xfs_has_rmapbt(mp) &&
950 has_capability_noaudit(current, CAP_SYS_ADMIN);
951 head->fmh_entries = 0;
953 /* Set up our device handlers. */
954 memset(handlers, 0, sizeof(handlers));
955 handlers[0].nr_sectors = XFS_FSB_TO_BB(mp, mp->m_sb.sb_dblocks);
956 handlers[0].dev = new_encode_dev(mp->m_ddev_targp->bt_dev);
957 if (use_rmap)
958 handlers[0].fn = xfs_getfsmap_datadev_rmapbt;
959 else
960 handlers[0].fn = xfs_getfsmap_datadev_bnobt;
961 if (mp->m_logdev_targp != mp->m_ddev_targp) {
962 handlers[1].nr_sectors = XFS_FSB_TO_BB(mp,
963 mp->m_sb.sb_logblocks);
964 handlers[1].dev = new_encode_dev(mp->m_logdev_targp->bt_dev);
965 handlers[1].fn = xfs_getfsmap_logdev;
967 #ifdef CONFIG_XFS_RT
968 if (mp->m_rtdev_targp) {
969 handlers[2].nr_sectors = XFS_FSB_TO_BB(mp, mp->m_sb.sb_rblocks);
970 handlers[2].dev = new_encode_dev(mp->m_rtdev_targp->bt_dev);
971 handlers[2].fn = xfs_getfsmap_rtdev_rtbitmap;
973 #endif /* CONFIG_XFS_RT */
975 xfs_sort(handlers, XFS_GETFSMAP_DEVS, sizeof(struct xfs_getfsmap_dev),
976 xfs_getfsmap_dev_compare);
979 * To continue where we left off, we allow userspace to use the
980 * last mapping from a previous call as the low key of the next.
981 * This is identified by a non-zero length in the low key. We
982 * have to increment the low key in this scenario to ensure we
983 * don't return the same mapping again, and instead return the
984 * very next mapping.
986 * If the low key mapping refers to file data, the same physical
987 * blocks could be mapped to several other files/offsets.
988 * According to rmapbt record ordering, the minimal next
989 * possible record for the block range is the next starting
990 * offset in the same inode. Therefore, each fsmap backend bumps
991 * the file offset to continue the search appropriately. For
992 * all other low key mapping types (attr blocks, metadata), each
993 * fsmap backend bumps the physical offset as there can be no
994 * other mapping for the same physical block range.
996 dkeys[0] = head->fmh_keys[0];
997 memset(&dkeys[1], 0xFF, sizeof(struct xfs_fsmap));
999 info.next_daddr = head->fmh_keys[0].fmr_physical +
1000 head->fmh_keys[0].fmr_length;
1001 info.end_daddr = XFS_BUF_DADDR_NULL;
1002 info.fsmap_recs = fsmap_recs;
1003 info.head = head;
1005 /* For each device we support... */
1006 for (i = 0; i < XFS_GETFSMAP_DEVS; i++) {
1007 /* Is this device within the range the user asked for? */
1008 if (!handlers[i].fn)
1009 continue;
1010 if (head->fmh_keys[0].fmr_device > handlers[i].dev)
1011 continue;
1012 if (head->fmh_keys[1].fmr_device < handlers[i].dev)
1013 break;
1016 * If this device number matches the high key, we have
1017 * to pass the high key to the handler to limit the
1018 * query results. If the device number exceeds the
1019 * low key, zero out the low key so that we get
1020 * everything from the beginning.
1022 if (handlers[i].dev == head->fmh_keys[1].fmr_device) {
1023 dkeys[1] = head->fmh_keys[1];
1024 info.end_daddr = min(handlers[i].nr_sectors - 1,
1025 dkeys[1].fmr_physical);
1027 if (handlers[i].dev > head->fmh_keys[0].fmr_device)
1028 memset(&dkeys[0], 0, sizeof(struct xfs_fsmap));
1031 * Grab an empty transaction so that we can use its recursive
1032 * buffer locking abilities to detect cycles in the rmapbt
1033 * without deadlocking.
1035 error = xfs_trans_alloc_empty(mp, &tp);
1036 if (error)
1037 break;
1039 info.dev = handlers[i].dev;
1040 info.last = false;
1041 info.group = NULL;
1042 info.low_daddr = XFS_BUF_DADDR_NULL;
1043 info.low.rm_blockcount = 0;
1044 error = handlers[i].fn(tp, dkeys, &info);
1045 if (error)
1046 break;
1047 xfs_trans_cancel(tp);
1048 tp = NULL;
1049 info.next_daddr = 0;
1052 if (tp)
1053 xfs_trans_cancel(tp);
1054 head->fmh_oflags = FMH_OF_DEV_T;
1055 return error;
1059 xfs_ioc_getfsmap(
1060 struct xfs_inode *ip,
1061 struct fsmap_head __user *arg)
1063 struct xfs_fsmap_head xhead = {0};
1064 struct fsmap_head head;
1065 struct fsmap *recs;
1066 unsigned int count;
1067 __u32 last_flags = 0;
1068 bool done = false;
1069 int error;
1071 if (copy_from_user(&head, arg, sizeof(struct fsmap_head)))
1072 return -EFAULT;
1073 if (memchr_inv(head.fmh_reserved, 0, sizeof(head.fmh_reserved)) ||
1074 memchr_inv(head.fmh_keys[0].fmr_reserved, 0,
1075 sizeof(head.fmh_keys[0].fmr_reserved)) ||
1076 memchr_inv(head.fmh_keys[1].fmr_reserved, 0,
1077 sizeof(head.fmh_keys[1].fmr_reserved)))
1078 return -EINVAL;
1081 * Use an internal memory buffer so that we don't have to copy fsmap
1082 * data to userspace while holding locks. Start by trying to allocate
1083 * up to 128k for the buffer, but fall back to a single page if needed.
1085 count = min_t(unsigned int, head.fmh_count,
1086 131072 / sizeof(struct fsmap));
1087 recs = kvcalloc(count, sizeof(struct fsmap), GFP_KERNEL);
1088 if (!recs) {
1089 count = min_t(unsigned int, head.fmh_count,
1090 PAGE_SIZE / sizeof(struct fsmap));
1091 recs = kvcalloc(count, sizeof(struct fsmap), GFP_KERNEL);
1092 if (!recs)
1093 return -ENOMEM;
1096 xhead.fmh_iflags = head.fmh_iflags;
1097 xfs_fsmap_to_internal(&xhead.fmh_keys[0], &head.fmh_keys[0]);
1098 xfs_fsmap_to_internal(&xhead.fmh_keys[1], &head.fmh_keys[1]);
1100 trace_xfs_getfsmap_low_key(ip->i_mount, &xhead.fmh_keys[0]);
1101 trace_xfs_getfsmap_high_key(ip->i_mount, &xhead.fmh_keys[1]);
1103 head.fmh_entries = 0;
1104 do {
1105 struct fsmap __user *user_recs;
1106 struct fsmap *last_rec;
1108 user_recs = &arg->fmh_recs[head.fmh_entries];
1109 xhead.fmh_entries = 0;
1110 xhead.fmh_count = min_t(unsigned int, count,
1111 head.fmh_count - head.fmh_entries);
1113 /* Run query, record how many entries we got. */
1114 error = xfs_getfsmap(ip->i_mount, &xhead, recs);
1115 switch (error) {
1116 case 0:
1118 * There are no more records in the result set. Copy
1119 * whatever we got to userspace and break out.
1121 done = true;
1122 break;
1123 case -ECANCELED:
1125 * The internal memory buffer is full. Copy whatever
1126 * records we got to userspace and go again if we have
1127 * not yet filled the userspace buffer.
1129 error = 0;
1130 break;
1131 default:
1132 goto out_free;
1134 head.fmh_entries += xhead.fmh_entries;
1135 head.fmh_oflags = xhead.fmh_oflags;
1138 * If the caller wanted a record count or there aren't any
1139 * new records to return, we're done.
1141 if (head.fmh_count == 0 || xhead.fmh_entries == 0)
1142 break;
1144 /* Copy all the records we got out to userspace. */
1145 if (copy_to_user(user_recs, recs,
1146 xhead.fmh_entries * sizeof(struct fsmap))) {
1147 error = -EFAULT;
1148 goto out_free;
1151 /* Remember the last record flags we copied to userspace. */
1152 last_rec = &recs[xhead.fmh_entries - 1];
1153 last_flags = last_rec->fmr_flags;
1155 /* Set up the low key for the next iteration. */
1156 xfs_fsmap_to_internal(&xhead.fmh_keys[0], last_rec);
1157 trace_xfs_getfsmap_low_key(ip->i_mount, &xhead.fmh_keys[0]);
1158 } while (!done && head.fmh_entries < head.fmh_count);
1161 * If there are no more records in the query result set and we're not
1162 * in counting mode, mark the last record returned with the LAST flag.
1164 if (done && head.fmh_count > 0 && head.fmh_entries > 0) {
1165 struct fsmap __user *user_rec;
1167 last_flags |= FMR_OF_LAST;
1168 user_rec = &arg->fmh_recs[head.fmh_entries - 1];
1170 if (copy_to_user(&user_rec->fmr_flags, &last_flags,
1171 sizeof(last_flags))) {
1172 error = -EFAULT;
1173 goto out_free;
1177 /* copy back header */
1178 if (copy_to_user(arg, &head, sizeof(struct fsmap_head))) {
1179 error = -EFAULT;
1180 goto out_free;
1183 out_free:
1184 kvfree(recs);
1185 return error;