1 // SPDX-License-Identifier: GPL-2.0
3 * Copyright (C) 2010, 2023 Red Hat, Inc.
7 #include "xfs_shared.h"
8 #include "xfs_format.h"
9 #include "xfs_log_format.h"
10 #include "xfs_trans_resv.h"
11 #include "xfs_trans.h"
12 #include "xfs_mount.h"
13 #include "xfs_btree.h"
14 #include "xfs_alloc_btree.h"
15 #include "xfs_alloc.h"
16 #include "xfs_discard.h"
17 #include "xfs_error.h"
18 #include "xfs_extent_busy.h"
19 #include "xfs_trace.h"
22 #include "xfs_health.h"
23 #include "xfs_rtbitmap.h"
24 #include "xfs_rtgroup.h"
27 * Notes on an efficient, low latency fstrim algorithm
29 * We need to walk the filesystem free space and issue discards on the free
30 * space that meet the search criteria (size and location). We cannot issue
31 * discards on extents that might be in use, or are so recently in use they are
32 * still marked as busy. To serialise against extent state changes whilst we are
33 * gathering extents to trim, we must hold the AGF lock to lock out other
34 * allocations and extent free operations that might change extent state.
36 * However, we cannot just hold the AGF for the entire AG free space walk whilst
37 * we issue discards on each free space that is found. Storage devices can have
38 * extremely slow discard implementations (e.g. ceph RBD) and so walking a
39 * couple of million free extents and issuing synchronous discards on each
40 * extent can take a *long* time. Whilst we are doing this walk, nothing else
41 * can access the AGF, and we can stall transactions and hence the log whilst
42 * modifications wait for the AGF lock to be released. This can lead hung tasks
43 * kicking the hung task timer and rebooting the system. This is bad.
45 * Hence we need to take a leaf from the bulkstat playbook. It takes the AGI
46 * lock, gathers a range of inode cluster buffers that are allocated, drops the
47 * AGI lock and then reads all the inode cluster buffers and processes them. It
48 * loops doing this, using a cursor to keep track of where it is up to in the AG
49 * for each iteration to restart the INOBT lookup from.
51 * We can't do this exactly with free space - once we drop the AGF lock, the
52 * state of the free extent is out of our control and we cannot run a discard
53 * safely on it in this situation. Unless, of course, we've marked the free
54 * extent as busy and undergoing a discard operation whilst we held the AGF
57 * This is exactly how online discard works - free extents are marked busy when
58 * they are freed, and once the extent free has been committed to the journal,
59 * the busy extent record is marked as "undergoing discard" and the discard is
60 * then issued on the free extent. Once the discard completes, the busy extent
61 * record is removed and the extent is able to be allocated again.
63 * In the context of fstrim, if we find a free extent we need to discard, we
64 * don't have to discard it immediately. All we need to do it record that free
65 * extent as being busy and under discard, and all the allocation routines will
66 * now avoid trying to allocate it. Hence if we mark the extent as busy under
67 * the AGF lock, we can safely discard it without holding the AGF lock because
68 * nothing will attempt to allocate that free space until the discard completes.
70 * This also allows us to issue discards asynchronously like we do with online
71 * discard, and so for fast devices fstrim will run much faster as we can have
72 * multiple discard operations in flight at once, as well as pipeline the free
73 * extent search so that it overlaps in flight discard IO.
76 #define XFS_DISCARD_MAX_EXAMINE (100)
78 struct workqueue_struct
*xfs_discard_wq
;
81 xfs_discard_endio_work(
82 struct work_struct
*work
)
84 struct xfs_busy_extents
*extents
=
85 container_of(work
, struct xfs_busy_extents
, endio_work
);
87 xfs_extent_busy_clear(&extents
->extent_list
, false);
88 kfree(extents
->owner
);
92 * Queue up the actual completion to a thread to avoid IRQ-safe locking for
99 struct xfs_busy_extents
*extents
= bio
->bi_private
;
101 INIT_WORK(&extents
->endio_work
, xfs_discard_endio_work
);
102 queue_work(xfs_discard_wq
, &extents
->endio_work
);
106 static inline struct block_device
*
108 const struct xfs_group
*xg
)
110 struct xfs_mount
*mp
= xg
->xg_mount
;
112 switch (xg
->xg_type
) {
114 return mp
->m_ddev_targp
->bt_bdev
;
116 return mp
->m_rtdev_targp
->bt_bdev
;
125 * Walk the discard list and issue discards on all the busy extents in the
126 * list. We plug and chain the bios so that we only need a single completion
127 * call to clear all the busy extents once the discards are complete.
131 struct xfs_mount
*mp
,
132 struct xfs_busy_extents
*extents
)
134 struct xfs_extent_busy
*busyp
;
135 struct bio
*bio
= NULL
;
136 struct blk_plug plug
;
139 blk_start_plug(&plug
);
140 list_for_each_entry(busyp
, &extents
->extent_list
, list
) {
141 trace_xfs_discard_extent(busyp
->group
, busyp
->bno
,
144 error
= __blkdev_issue_discard(xfs_group_bdev(busyp
->group
),
145 xfs_gbno_to_daddr(busyp
->group
, busyp
->bno
),
146 XFS_FSB_TO_BB(mp
, busyp
->length
),
148 if (error
&& error
!= -EOPNOTSUPP
) {
150 "discard failed for extent [0x%llx,%u], error %d",
151 (unsigned long long)busyp
->bno
,
159 bio
->bi_private
= extents
;
160 bio
->bi_end_io
= xfs_discard_endio
;
163 xfs_discard_endio_work(&extents
->endio_work
);
165 blk_finish_plug(&plug
);
170 struct xfs_trim_cur
{
179 xfs_trim_gather_extents(
180 struct xfs_perag
*pag
,
181 struct xfs_trim_cur
*tcur
,
182 struct xfs_busy_extents
*extents
)
184 struct xfs_mount
*mp
= pag_mount(pag
);
185 struct xfs_trans
*tp
;
186 struct xfs_btree_cur
*cur
;
187 struct xfs_buf
*agbp
;
190 int batch
= XFS_DISCARD_MAX_EXAMINE
;
193 * Force out the log. This means any transactions that might have freed
194 * space before we take the AGF buffer lock are now on disk, and the
195 * volatile disk cache is flushed.
197 xfs_log_force(mp
, XFS_LOG_SYNC
);
199 error
= xfs_trans_alloc_empty(mp
, &tp
);
203 error
= xfs_alloc_read_agf(pag
, tp
, 0, &agbp
);
205 goto out_trans_cancel
;
208 /* sub-AG discard request always starts at tcur->start */
209 cur
= xfs_bnobt_init_cursor(mp
, tp
, agbp
, pag
);
210 error
= xfs_alloc_lookup_le(cur
, tcur
->start
, 0, &i
);
212 error
= xfs_alloc_lookup_ge(cur
, tcur
->start
, 0, &i
);
213 } else if (tcur
->start
== 0) {
214 /* first time through a by-len starts with max length */
215 cur
= xfs_cntbt_init_cursor(mp
, tp
, agbp
, pag
);
216 error
= xfs_alloc_lookup_ge(cur
, 0, tcur
->count
, &i
);
218 /* nth time through a by-len starts where we left off */
219 cur
= xfs_cntbt_init_cursor(mp
, tp
, agbp
, pag
);
220 error
= xfs_alloc_lookup_le(cur
, tcur
->start
, tcur
->count
, &i
);
225 /* nothing of that length left in the AG, we are done */
231 * Loop until we are done with all extents that are large
232 * enough to be worth discarding or we hit batch limits.
238 error
= xfs_alloc_get_rec(cur
, &fbno
, &flen
, &i
);
241 if (XFS_IS_CORRUPT(mp
, i
!= 1)) {
242 xfs_btree_mark_sick(cur
);
243 error
= -EFSCORRUPTED
;
249 * Update the cursor to point at this extent so we
250 * restart the next batch from this extent.
258 * If the extent is entirely outside of the range we are
259 * supposed to skip it. Do not bother to trim down partially
260 * overlapping ranges for now.
262 if (fbno
+ flen
< tcur
->start
) {
263 trace_xfs_discard_exclude(pag_group(pag
), fbno
, flen
);
266 if (fbno
> tcur
->end
) {
267 trace_xfs_discard_exclude(pag_group(pag
), fbno
, flen
);
275 /* Trim the extent returned to the range we want. */
276 if (fbno
< tcur
->start
) {
277 flen
-= tcur
->start
- fbno
;
280 if (fbno
+ flen
> tcur
->end
+ 1)
281 flen
= tcur
->end
- fbno
+ 1;
283 /* Too small? Give up. */
284 if (flen
< tcur
->minlen
) {
285 trace_xfs_discard_toosmall(pag_group(pag
), fbno
, flen
);
293 * If any blocks in the range are still busy, skip the
294 * discard and try again the next time.
296 if (xfs_extent_busy_search(pag_group(pag
), fbno
, flen
)) {
297 trace_xfs_discard_busy(pag_group(pag
), fbno
, flen
);
301 xfs_extent_busy_insert_discard(pag_group(pag
), fbno
, flen
,
302 &extents
->extent_list
);
305 error
= xfs_btree_increment(cur
, 0, &i
);
307 error
= xfs_btree_decrement(cur
, 0, &i
);
312 * If there's no more records in the tree, we are done. Set the
313 * cursor block count to 0 to indicate to the caller that there
314 * is no more extents to search.
321 * If there was an error, release all the gathered busy extents because
322 * we aren't going to issue a discard on them any more.
325 xfs_extent_busy_clear(&extents
->extent_list
, false);
327 xfs_btree_del_cursor(cur
, error
);
329 xfs_trans_cancel(tp
);
334 xfs_trim_should_stop(void)
336 return fatal_signal_pending(current
) || freezing(current
);
340 * Iterate the free list gathering extents and discarding them. We need a cursor
341 * for the repeated iteration of gather/discard loop, so use the longest extent
342 * we found in the last batch as the key to start the next.
345 xfs_trim_perag_extents(
346 struct xfs_perag
*pag
,
351 struct xfs_trim_cur tcur
= {
353 .count
= pag
->pagf_longest
,
359 if (start
!= 0 || end
!= pag_group(pag
)->xg_block_count
)
363 struct xfs_busy_extents
*extents
;
365 extents
= kzalloc(sizeof(*extents
), GFP_KERNEL
);
371 extents
->owner
= extents
;
372 INIT_LIST_HEAD(&extents
->extent_list
);
374 error
= xfs_trim_gather_extents(pag
, &tcur
, extents
);
381 * We hand the extent list to the discard function here so the
382 * discarded extents can be removed from the busy extent list.
383 * This allows the discards to run asynchronously with gathering
384 * the next round of extents to discard.
386 * However, we must ensure that we do not reference the extent
387 * list after this function call, as it may have been freed by
388 * the time control returns to us.
390 error
= xfs_discard_extents(pag_mount(pag
), extents
);
394 if (xfs_trim_should_stop())
397 } while (tcur
.count
!= 0);
404 xfs_trim_datadev_extents(
405 struct xfs_mount
*mp
,
410 xfs_agnumber_t start_agno
, end_agno
;
411 xfs_agblock_t start_agbno
, end_agbno
;
412 struct xfs_perag
*pag
= NULL
;
413 xfs_daddr_t ddev_end
;
414 int last_error
= 0, error
;
416 ddev_end
= min_t(xfs_daddr_t
, end
,
417 XFS_FSB_TO_BB(mp
, mp
->m_sb
.sb_dblocks
) - 1);
419 start_agno
= xfs_daddr_to_agno(mp
, start
);
420 start_agbno
= xfs_daddr_to_agbno(mp
, start
);
421 end_agno
= xfs_daddr_to_agno(mp
, ddev_end
);
422 end_agbno
= xfs_daddr_to_agbno(mp
, ddev_end
);
424 while ((pag
= xfs_perag_next_range(mp
, pag
, start_agno
, end_agno
))) {
425 xfs_agblock_t agend
= pag_group(pag
)->xg_block_count
;
427 if (pag_agno(pag
) == end_agno
)
429 error
= xfs_trim_perag_extents(pag
, start_agbno
, agend
, minlen
);
433 if (xfs_trim_should_stop()) {
444 struct xfs_trim_rtdev
{
445 /* list of rt extents to free */
446 struct list_head extent_list
;
448 /* minimum length that caller allows us to trim */
449 xfs_rtblock_t minlen_fsb
;
451 /* restart point for the rtbitmap walk */
452 xfs_rtxnum_t restart_rtx
;
454 /* stopping point for the current rtbitmap walk */
455 xfs_rtxnum_t stop_rtx
;
458 struct xfs_rtx_busy
{
459 struct list_head list
;
461 xfs_rtblock_t length
;
465 xfs_discard_free_rtdev_extents(
466 struct xfs_trim_rtdev
*tr
)
468 struct xfs_rtx_busy
*busyp
, *n
;
470 list_for_each_entry_safe(busyp
, n
, &tr
->extent_list
, list
) {
471 list_del_init(&busyp
->list
);
477 * Walk the discard list and issue discards on all the busy extents in the
478 * list. We plug and chain the bios so that we only need a single completion
479 * call to clear all the busy extents once the discards are complete.
482 xfs_discard_rtdev_extents(
483 struct xfs_mount
*mp
,
484 struct xfs_trim_rtdev
*tr
)
486 struct block_device
*bdev
= mp
->m_rtdev_targp
->bt_bdev
;
487 struct xfs_rtx_busy
*busyp
;
488 struct bio
*bio
= NULL
;
489 struct blk_plug plug
;
490 xfs_rtblock_t start
= NULLRTBLOCK
, length
= 0;
493 blk_start_plug(&plug
);
494 list_for_each_entry(busyp
, &tr
->extent_list
, list
) {
495 if (start
== NULLRTBLOCK
)
497 length
+= busyp
->length
;
499 trace_xfs_discard_rtextent(mp
, busyp
->bno
, busyp
->length
);
501 error
= __blkdev_issue_discard(bdev
,
502 xfs_rtb_to_daddr(mp
, busyp
->bno
),
503 XFS_FSB_TO_BB(mp
, busyp
->length
),
508 xfs_discard_free_rtdev_extents(tr
);
511 error
= submit_bio_wait(bio
);
512 if (error
== -EOPNOTSUPP
)
516 "discard failed for rtextent [0x%llx,%llu], error %d",
517 (unsigned long long)start
,
518 (unsigned long long)length
,
522 blk_finish_plug(&plug
);
528 xfs_trim_gather_rtextent(
529 struct xfs_rtgroup
*rtg
,
530 struct xfs_trans
*tp
,
531 const struct xfs_rtalloc_rec
*rec
,
534 struct xfs_trim_rtdev
*tr
= priv
;
535 struct xfs_rtx_busy
*busyp
;
536 xfs_rtblock_t rbno
, rlen
;
538 if (rec
->ar_startext
> tr
->stop_rtx
) {
540 * If we've scanned a large number of rtbitmap blocks, update
541 * the cursor to point at this extent so we restart the next
542 * batch from this extent.
544 tr
->restart_rtx
= rec
->ar_startext
;
548 rbno
= xfs_rtx_to_rtb(rtg
, rec
->ar_startext
);
549 rlen
= xfs_rtbxlen_to_blen(rtg_mount(rtg
), rec
->ar_extcount
);
551 /* Ignore too small. */
552 if (rlen
< tr
->minlen_fsb
) {
553 trace_xfs_discard_rttoosmall(rtg_mount(rtg
), rbno
, rlen
);
557 busyp
= kzalloc(sizeof(struct xfs_rtx_busy
), GFP_KERNEL
);
562 busyp
->length
= rlen
;
563 INIT_LIST_HEAD(&busyp
->list
);
564 list_add_tail(&busyp
->list
, &tr
->extent_list
);
566 tr
->restart_rtx
= rec
->ar_startext
+ rec
->ar_extcount
;
570 /* Trim extents on an !rtgroups realtime device */
573 struct xfs_rtgroup
*rtg
,
578 struct xfs_mount
*mp
= rtg_mount(rtg
);
579 struct xfs_trim_rtdev tr
= {
580 .minlen_fsb
= XFS_BB_TO_FSB(mp
, minlen
),
581 .extent_list
= LIST_HEAD_INIT(tr
.extent_list
),
583 struct xfs_trans
*tp
;
586 error
= xfs_trans_alloc_empty(mp
, &tp
);
591 * Walk the free ranges between low and high. The query_range function
592 * trims the extents returned.
595 tr
.stop_rtx
= low
+ xfs_rtbitmap_rtx_per_rbmblock(mp
);
596 xfs_rtgroup_lock(rtg
, XFS_RTGLOCK_BITMAP_SHARED
);
597 error
= xfs_rtalloc_query_range(rtg
, tp
, low
, high
,
598 xfs_trim_gather_rtextent
, &tr
);
600 if (error
== -ECANCELED
)
603 xfs_rtgroup_unlock(rtg
, XFS_RTGLOCK_BITMAP_SHARED
);
604 xfs_discard_free_rtdev_extents(&tr
);
608 if (list_empty(&tr
.extent_list
)) {
609 xfs_rtgroup_unlock(rtg
, XFS_RTGLOCK_BITMAP_SHARED
);
613 error
= xfs_discard_rtdev_extents(mp
, &tr
);
614 xfs_rtgroup_unlock(rtg
, XFS_RTGLOCK_BITMAP_SHARED
);
618 low
= tr
.restart_rtx
;
619 } while (!xfs_trim_should_stop() && low
<= high
);
621 xfs_trans_cancel(tp
);
625 struct xfs_trim_rtgroup
{
626 /* list of rtgroup extents to free */
627 struct xfs_busy_extents
*extents
;
629 /* minimum length that caller allows us to trim */
630 xfs_rtblock_t minlen_fsb
;
632 /* restart point for the rtbitmap walk */
633 xfs_rtxnum_t restart_rtx
;
635 /* number of extents to examine before stopping to issue discard ios */
638 /* number of extents queued for discard */
643 xfs_trim_gather_rtgroup_extent(
644 struct xfs_rtgroup
*rtg
,
645 struct xfs_trans
*tp
,
646 const struct xfs_rtalloc_rec
*rec
,
649 struct xfs_trim_rtgroup
*tr
= priv
;
653 if (--tr
->batch
<= 0) {
655 * If we've checked a large number of extents, update the
656 * cursor to point at this extent so we restart the next batch
659 tr
->restart_rtx
= rec
->ar_startext
;
663 rgbno
= xfs_rtx_to_rgbno(rtg
, rec
->ar_startext
);
664 len
= xfs_rtxlen_to_extlen(rtg_mount(rtg
), rec
->ar_extcount
);
666 /* Ignore too small. */
667 if (len
< tr
->minlen_fsb
) {
668 trace_xfs_discard_toosmall(rtg_group(rtg
), rgbno
, len
);
673 * If any blocks in the range are still busy, skip the discard and try
674 * again the next time.
676 if (xfs_extent_busy_search(rtg_group(rtg
), rgbno
, len
)) {
677 trace_xfs_discard_busy(rtg_group(rtg
), rgbno
, len
);
681 xfs_extent_busy_insert_discard(rtg_group(rtg
), rgbno
, len
,
682 &tr
->extents
->extent_list
);
685 tr
->restart_rtx
= rec
->ar_startext
+ rec
->ar_extcount
;
689 /* Trim extents in this rtgroup using the busy extent machinery. */
691 xfs_trim_rtgroup_extents(
692 struct xfs_rtgroup
*rtg
,
697 struct xfs_mount
*mp
= rtg_mount(rtg
);
698 struct xfs_trim_rtgroup tr
= {
699 .minlen_fsb
= XFS_BB_TO_FSB(mp
, minlen
),
701 struct xfs_trans
*tp
;
704 error
= xfs_trans_alloc_empty(mp
, &tp
);
709 * Walk the free ranges between low and high. The query_range function
710 * trims the extents returned.
713 tr
.extents
= kzalloc(sizeof(*tr
.extents
), GFP_KERNEL
);
720 tr
.batch
= XFS_DISCARD_MAX_EXAMINE
;
721 tr
.extents
->owner
= tr
.extents
;
722 INIT_LIST_HEAD(&tr
.extents
->extent_list
);
724 xfs_rtgroup_lock(rtg
, XFS_RTGLOCK_BITMAP_SHARED
);
725 error
= xfs_rtalloc_query_range(rtg
, tp
, low
, high
,
726 xfs_trim_gather_rtgroup_extent
, &tr
);
727 xfs_rtgroup_unlock(rtg
, XFS_RTGLOCK_BITMAP_SHARED
);
728 if (error
== -ECANCELED
)
739 * We hand the extent list to the discard function here so the
740 * discarded extents can be removed from the busy extent list.
741 * This allows the discards to run asynchronously with
742 * gathering the next round of extents to discard.
744 * However, we must ensure that we do not reference the extent
745 * list after this function call, as it may have been freed by
746 * the time control returns to us.
748 error
= xfs_discard_extents(rtg_mount(rtg
), tr
.extents
);
752 low
= tr
.restart_rtx
;
753 } while (!xfs_trim_should_stop() && low
<= high
);
755 xfs_trans_cancel(tp
);
760 xfs_trim_rtdev_extents(
761 struct xfs_mount
*mp
,
766 xfs_rtblock_t start_rtbno
, end_rtbno
;
767 xfs_rtxnum_t start_rtx
, end_rtx
;
768 xfs_rgnumber_t start_rgno
, end_rgno
;
769 xfs_daddr_t daddr_offset
;
770 int last_error
= 0, error
;
771 struct xfs_rtgroup
*rtg
= NULL
;
773 /* Shift the start and end downwards to match the rt device. */
774 daddr_offset
= XFS_FSB_TO_BB(mp
, mp
->m_sb
.sb_dblocks
);
775 if (start
> daddr_offset
)
776 start
-= daddr_offset
;
779 start_rtbno
= xfs_daddr_to_rtb(mp
, start
);
780 start_rtx
= xfs_rtb_to_rtx(mp
, start_rtbno
);
781 start_rgno
= xfs_rtb_to_rgno(mp
, start_rtbno
);
783 if (end
<= daddr_offset
)
787 end_rtbno
= xfs_daddr_to_rtb(mp
, end
);
788 end_rtx
= xfs_rtb_to_rtx(mp
, end_rtbno
+ mp
->m_sb
.sb_rextsize
- 1);
789 end_rgno
= xfs_rtb_to_rgno(mp
, end_rtbno
);
791 while ((rtg
= xfs_rtgroup_next_range(mp
, rtg
, start_rgno
, end_rgno
))) {
792 xfs_rtxnum_t rtg_end
= rtg
->rtg_extents
;
794 if (rtg_rgno(rtg
) == end_rgno
)
795 rtg_end
= min(rtg_end
, end_rtx
);
797 if (xfs_has_rtgroups(mp
))
798 error
= xfs_trim_rtgroup_extents(rtg
, start_rtx
,
801 error
= xfs_trim_rtextents(rtg
, start_rtx
, rtg_end
,
806 if (xfs_trim_should_stop()) {
807 xfs_rtgroup_rele(rtg
);
816 # define xfs_trim_rtdev_extents(...) (-EOPNOTSUPP)
817 #endif /* CONFIG_XFS_RT */
820 * trim a range of the filesystem.
822 * Note: the parameters passed from userspace are byte ranges into the
823 * filesystem which does not match to the format we use for filesystem block
824 * addressing. FSB addressing is sparse (AGNO|AGBNO), while the incoming format
825 * is a linear address range. Hence we need to use DADDR based conversions and
826 * comparisons for determining the correct offset and regions to trim.
828 * The realtime device is mapped into the FITRIM "address space" immediately
829 * after the data device.
833 struct xfs_mount
*mp
,
834 struct fstrim_range __user
*urange
)
836 unsigned int granularity
=
837 bdev_discard_granularity(mp
->m_ddev_targp
->bt_bdev
);
838 struct block_device
*rt_bdev
= NULL
;
839 struct fstrim_range range
;
840 xfs_daddr_t start
, end
;
842 xfs_rfsblock_t max_blocks
;
843 int error
, last_error
= 0;
845 if (!capable(CAP_SYS_ADMIN
))
847 if (mp
->m_rtdev_targp
&&
848 bdev_max_discard_sectors(mp
->m_rtdev_targp
->bt_bdev
))
849 rt_bdev
= mp
->m_rtdev_targp
->bt_bdev
;
850 if (!bdev_max_discard_sectors(mp
->m_ddev_targp
->bt_bdev
) && !rt_bdev
)
854 granularity
= max(granularity
,
855 bdev_discard_granularity(rt_bdev
));
858 * We haven't recovered the log, so we cannot use our bnobt-guided
859 * storage zapping commands.
861 if (xfs_has_norecovery(mp
))
864 if (copy_from_user(&range
, urange
, sizeof(range
)))
867 range
.minlen
= max_t(u64
, granularity
, range
.minlen
);
868 minlen
= XFS_B_TO_FSB(mp
, range
.minlen
);
871 * Truncating down the len isn't actually quite correct, but using
872 * BBTOB would mean we trivially get overflows for values
873 * of ULLONG_MAX or slightly lower. And ULLONG_MAX is the default
874 * used by the fstrim application. In the end it really doesn't
875 * matter as trimming blocks is an advisory interface.
877 max_blocks
= mp
->m_sb
.sb_dblocks
+ mp
->m_sb
.sb_rblocks
;
878 if (range
.start
>= XFS_FSB_TO_B(mp
, max_blocks
) ||
879 range
.minlen
> XFS_FSB_TO_B(mp
, mp
->m_ag_max_usable
) ||
880 range
.len
< mp
->m_sb
.sb_blocksize
)
883 start
= BTOBB(range
.start
);
884 end
= start
+ BTOBBT(range
.len
) - 1;
886 if (bdev_max_discard_sectors(mp
->m_ddev_targp
->bt_bdev
)) {
887 error
= xfs_trim_datadev_extents(mp
, start
, end
, minlen
);
892 if (rt_bdev
&& !xfs_trim_should_stop()) {
893 error
= xfs_trim_rtdev_extents(mp
, start
, end
, minlen
);
901 range
.len
= min_t(unsigned long long, range
.len
,
902 XFS_FSB_TO_B(mp
, max_blocks
) - range
.start
);
903 if (copy_to_user(urange
, &range
, sizeof(range
)))