1 // SPDX-License-Identifier: GPL-2.0
3 * Functions related to segment and merge handling
5 #include <linux/kernel.h>
6 #include <linux/module.h>
8 #include <linux/blkdev.h>
9 #include <linux/blk-integrity.h>
10 #include <linux/scatterlist.h>
11 #include <linux/part_stat.h>
12 #include <linux/blk-cgroup.h>
14 #include <trace/events/block.h>
17 #include "blk-mq-sched.h"
18 #include "blk-rq-qos.h"
19 #include "blk-throttle.h"
21 static inline void bio_get_first_bvec(struct bio
*bio
, struct bio_vec
*bv
)
23 *bv
= mp_bvec_iter_bvec(bio
->bi_io_vec
, bio
->bi_iter
);
26 static inline void bio_get_last_bvec(struct bio
*bio
, struct bio_vec
*bv
)
28 struct bvec_iter iter
= bio
->bi_iter
;
31 bio_get_first_bvec(bio
, bv
);
32 if (bv
->bv_len
== bio
->bi_iter
.bi_size
)
33 return; /* this bio only has a single bvec */
35 bio_advance_iter(bio
, &iter
, iter
.bi_size
);
37 if (!iter
.bi_bvec_done
)
38 idx
= iter
.bi_idx
- 1;
39 else /* in the middle of bvec */
42 *bv
= bio
->bi_io_vec
[idx
];
45 * iter.bi_bvec_done records actual length of the last bvec
46 * if this bio ends in the middle of one io vector
48 if (iter
.bi_bvec_done
)
49 bv
->bv_len
= iter
.bi_bvec_done
;
52 static inline bool bio_will_gap(struct request_queue
*q
,
53 struct request
*prev_rq
, struct bio
*prev
, struct bio
*next
)
55 struct bio_vec pb
, nb
;
57 if (!bio_has_data(prev
) || !queue_virt_boundary(q
))
61 * Don't merge if the 1st bio starts with non-zero offset, otherwise it
62 * is quite difficult to respect the sg gap limit. We work hard to
63 * merge a huge number of small single bios in case of mkfs.
66 bio_get_first_bvec(prev_rq
->bio
, &pb
);
68 bio_get_first_bvec(prev
, &pb
);
69 if (pb
.bv_offset
& queue_virt_boundary(q
))
73 * We don't need to worry about the situation that the merged segment
74 * ends in unaligned virt boundary:
76 * - if 'pb' ends aligned, the merged segment ends aligned
77 * - if 'pb' ends unaligned, the next bio must include
78 * one single bvec of 'nb', otherwise the 'nb' can't
81 bio_get_last_bvec(prev
, &pb
);
82 bio_get_first_bvec(next
, &nb
);
83 if (biovec_phys_mergeable(q
, &pb
, &nb
))
85 return __bvec_gap_to_prev(&q
->limits
, &pb
, nb
.bv_offset
);
88 static inline bool req_gap_back_merge(struct request
*req
, struct bio
*bio
)
90 return bio_will_gap(req
->q
, req
, req
->biotail
, bio
);
93 static inline bool req_gap_front_merge(struct request
*req
, struct bio
*bio
)
95 return bio_will_gap(req
->q
, NULL
, bio
, req
->bio
);
99 * The max size one bio can handle is UINT_MAX becasue bvec_iter.bi_size
100 * is defined as 'unsigned int', meantime it has to be aligned to with the
101 * logical block size, which is the minimum accepted unit by hardware.
103 static unsigned int bio_allowed_max_sectors(const struct queue_limits
*lim
)
105 return round_down(UINT_MAX
, lim
->logical_block_size
) >> SECTOR_SHIFT
;
108 static struct bio
*bio_submit_split(struct bio
*bio
, int split_sectors
)
110 if (unlikely(split_sectors
< 0)) {
111 bio
->bi_status
= errno_to_blk_status(split_sectors
);
119 split
= bio_split(bio
, split_sectors
, GFP_NOIO
,
120 &bio
->bi_bdev
->bd_disk
->bio_split
);
121 split
->bi_opf
|= REQ_NOMERGE
;
122 blkcg_bio_issue_init(split
);
123 bio_chain(split
, bio
);
124 trace_block_split(split
, bio
->bi_iter
.bi_sector
);
125 WARN_ON_ONCE(bio_zone_write_plugging(bio
));
126 submit_bio_noacct(bio
);
133 struct bio
*bio_split_discard(struct bio
*bio
, const struct queue_limits
*lim
,
136 unsigned int max_discard_sectors
, granularity
;
138 unsigned split_sectors
;
142 granularity
= max(lim
->discard_granularity
>> 9, 1U);
144 max_discard_sectors
=
145 min(lim
->max_discard_sectors
, bio_allowed_max_sectors(lim
));
146 max_discard_sectors
-= max_discard_sectors
% granularity
;
147 if (unlikely(!max_discard_sectors
))
150 if (bio_sectors(bio
) <= max_discard_sectors
)
153 split_sectors
= max_discard_sectors
;
156 * If the next starting sector would be misaligned, stop the discard at
157 * the previous aligned sector.
159 tmp
= bio
->bi_iter
.bi_sector
+ split_sectors
-
160 ((lim
->discard_alignment
>> 9) % granularity
);
161 tmp
= sector_div(tmp
, granularity
);
163 if (split_sectors
> tmp
)
164 split_sectors
-= tmp
;
166 return bio_submit_split(bio
, split_sectors
);
169 struct bio
*bio_split_write_zeroes(struct bio
*bio
,
170 const struct queue_limits
*lim
, unsigned *nsegs
)
173 if (!lim
->max_write_zeroes_sectors
)
175 if (bio_sectors(bio
) <= lim
->max_write_zeroes_sectors
)
177 return bio_submit_split(bio
, lim
->max_write_zeroes_sectors
);
180 static inline unsigned int blk_boundary_sectors(const struct queue_limits
*lim
,
184 * chunk_sectors must be a multiple of atomic_write_boundary_sectors if
187 if (is_atomic
&& lim
->atomic_write_boundary_sectors
)
188 return lim
->atomic_write_boundary_sectors
;
190 return lim
->chunk_sectors
;
194 * Return the maximum number of sectors from the start of a bio that may be
195 * submitted as a single request to a block device. If enough sectors remain,
196 * align the end to the physical block size. Otherwise align the end to the
197 * logical block size. This approach minimizes the number of non-aligned
198 * requests that are submitted to a block device if the start of a bio is not
199 * aligned to a physical block boundary.
201 static inline unsigned get_max_io_size(struct bio
*bio
,
202 const struct queue_limits
*lim
)
204 unsigned pbs
= lim
->physical_block_size
>> SECTOR_SHIFT
;
205 unsigned lbs
= lim
->logical_block_size
>> SECTOR_SHIFT
;
206 bool is_atomic
= bio
->bi_opf
& REQ_ATOMIC
;
207 unsigned boundary_sectors
= blk_boundary_sectors(lim
, is_atomic
);
208 unsigned max_sectors
, start
, end
;
211 * We ignore lim->max_sectors for atomic writes because it may less
212 * than the actual bio size, which we cannot tolerate.
215 max_sectors
= lim
->atomic_write_max_sectors
;
217 max_sectors
= lim
->max_sectors
;
219 if (boundary_sectors
) {
220 max_sectors
= min(max_sectors
,
221 blk_boundary_sectors_left(bio
->bi_iter
.bi_sector
,
225 start
= bio
->bi_iter
.bi_sector
& (pbs
- 1);
226 end
= (start
+ max_sectors
) & ~(pbs
- 1);
229 return max_sectors
& ~(lbs
- 1);
233 * get_max_segment_size() - maximum number of bytes to add as a single segment
234 * @lim: Request queue limits.
235 * @paddr: address of the range to add
236 * @len: maximum length available to add at @paddr
238 * Returns the maximum number of bytes of the range starting at @paddr that can
239 * be added to a single segment.
241 static inline unsigned get_max_segment_size(const struct queue_limits
*lim
,
242 phys_addr_t paddr
, unsigned int len
)
245 * Prevent an overflow if mask = ULONG_MAX and offset = 0 by adding 1
246 * after having calculated the minimum.
248 return min_t(unsigned long, len
,
249 min(lim
->seg_boundary_mask
- (lim
->seg_boundary_mask
& paddr
),
250 (unsigned long)lim
->max_segment_size
- 1) + 1);
254 * bvec_split_segs - verify whether or not a bvec should be split in the middle
255 * @lim: [in] queue limits to split based on
256 * @bv: [in] bvec to examine
257 * @nsegs: [in,out] Number of segments in the bio being built. Incremented
258 * by the number of segments from @bv that may be appended to that
259 * bio without exceeding @max_segs
260 * @bytes: [in,out] Number of bytes in the bio being built. Incremented
261 * by the number of bytes from @bv that may be appended to that
262 * bio without exceeding @max_bytes
263 * @max_segs: [in] upper bound for *@nsegs
264 * @max_bytes: [in] upper bound for *@bytes
266 * When splitting a bio, it can happen that a bvec is encountered that is too
267 * big to fit in a single segment and hence that it has to be split in the
268 * middle. This function verifies whether or not that should happen. The value
269 * %true is returned if and only if appending the entire @bv to a bio with
270 * *@nsegs segments and *@sectors sectors would make that bio unacceptable for
273 static bool bvec_split_segs(const struct queue_limits
*lim
,
274 const struct bio_vec
*bv
, unsigned *nsegs
, unsigned *bytes
,
275 unsigned max_segs
, unsigned max_bytes
)
277 unsigned max_len
= min(max_bytes
, UINT_MAX
) - *bytes
;
278 unsigned len
= min(bv
->bv_len
, max_len
);
279 unsigned total_len
= 0;
280 unsigned seg_size
= 0;
282 while (len
&& *nsegs
< max_segs
) {
283 seg_size
= get_max_segment_size(lim
, bvec_phys(bv
) + total_len
, len
);
286 total_len
+= seg_size
;
289 if ((bv
->bv_offset
+ total_len
) & lim
->virt_boundary_mask
)
295 /* tell the caller to split the bvec if it is too big to fit */
296 return len
> 0 || bv
->bv_len
> max_len
;
300 * bio_split_rw_at - check if and where to split a read/write bio
301 * @bio: [in] bio to be split
302 * @lim: [in] queue limits to split based on
303 * @segs: [out] number of segments in the bio with the first half of the sectors
304 * @max_bytes: [in] maximum number of bytes per bio
306 * Find out if @bio needs to be split to fit the queue limits in @lim and a
307 * maximum size of @max_bytes. Returns a negative error number if @bio can't be
308 * split, 0 if the bio doesn't have to be split, or a positive sector offset if
309 * @bio needs to be split.
311 int bio_split_rw_at(struct bio
*bio
, const struct queue_limits
*lim
,
312 unsigned *segs
, unsigned max_bytes
)
314 struct bio_vec bv
, bvprv
, *bvprvp
= NULL
;
315 struct bvec_iter iter
;
316 unsigned nsegs
= 0, bytes
= 0;
318 bio_for_each_bvec(bv
, bio
, iter
) {
320 * If the queue doesn't support SG gaps and adding this
321 * offset would create a gap, disallow it.
323 if (bvprvp
&& bvec_gap_to_prev(lim
, bvprvp
, bv
.bv_offset
))
326 if (nsegs
< lim
->max_segments
&&
327 bytes
+ bv
.bv_len
<= max_bytes
&&
328 bv
.bv_offset
+ bv
.bv_len
<= PAGE_SIZE
) {
332 if (bvec_split_segs(lim
, &bv
, &nsegs
, &bytes
,
333 lim
->max_segments
, max_bytes
))
344 if (bio
->bi_opf
& REQ_ATOMIC
)
348 * We can't sanely support splitting for a REQ_NOWAIT bio. End it
349 * with EAGAIN if splitting is required and return an error pointer.
351 if (bio
->bi_opf
& REQ_NOWAIT
)
357 * Individual bvecs might not be logical block aligned. Round down the
358 * split size so that each bio is properly block size aligned, even if
359 * we do not use the full hardware limits.
361 bytes
= ALIGN_DOWN(bytes
, lim
->logical_block_size
);
364 * Bio splitting may cause subtle trouble such as hang when doing sync
365 * iopoll in direct IO routine. Given performance gain of iopoll for
366 * big IO can be trival, disable iopoll when split needed.
368 bio_clear_polled(bio
);
369 return bytes
>> SECTOR_SHIFT
;
371 EXPORT_SYMBOL_GPL(bio_split_rw_at
);
373 struct bio
*bio_split_rw(struct bio
*bio
, const struct queue_limits
*lim
,
376 return bio_submit_split(bio
,
377 bio_split_rw_at(bio
, lim
, nr_segs
,
378 get_max_io_size(bio
, lim
) << SECTOR_SHIFT
));
382 * REQ_OP_ZONE_APPEND bios must never be split by the block layer.
384 * But we want the nr_segs calculation provided by bio_split_rw_at, and having
385 * a good sanity check that the submitter built the bio correctly is nice to
388 struct bio
*bio_split_zone_append(struct bio
*bio
,
389 const struct queue_limits
*lim
, unsigned *nr_segs
)
391 unsigned int max_sectors
= queue_limits_max_zone_append_sectors(lim
);
394 split_sectors
= bio_split_rw_at(bio
, lim
, nr_segs
,
395 max_sectors
<< SECTOR_SHIFT
);
396 if (WARN_ON_ONCE(split_sectors
> 0))
397 split_sectors
= -EINVAL
;
398 return bio_submit_split(bio
, split_sectors
);
402 * bio_split_to_limits - split a bio to fit the queue limits
403 * @bio: bio to be split
405 * Check if @bio needs splitting based on the queue limits of @bio->bi_bdev, and
406 * if so split off a bio fitting the limits from the beginning of @bio and
407 * return it. @bio is shortened to the remainder and re-submitted.
409 * The split bio is allocated from @q->bio_split, which is provided by the
412 struct bio
*bio_split_to_limits(struct bio
*bio
)
414 const struct queue_limits
*lim
= &bdev_get_queue(bio
->bi_bdev
)->limits
;
415 unsigned int nr_segs
;
417 return __bio_split_to_limits(bio
, lim
, &nr_segs
);
419 EXPORT_SYMBOL(bio_split_to_limits
);
421 unsigned int blk_recalc_rq_segments(struct request
*rq
)
423 unsigned int nr_phys_segs
= 0;
424 unsigned int bytes
= 0;
425 struct req_iterator iter
;
431 switch (bio_op(rq
->bio
)) {
433 case REQ_OP_SECURE_ERASE
:
434 if (queue_max_discard_segments(rq
->q
) > 1) {
435 struct bio
*bio
= rq
->bio
;
442 case REQ_OP_WRITE_ZEROES
:
448 rq_for_each_bvec(bv
, rq
, iter
)
449 bvec_split_segs(&rq
->q
->limits
, &bv
, &nr_phys_segs
, &bytes
,
454 static inline struct scatterlist
*blk_next_sg(struct scatterlist
**sg
,
455 struct scatterlist
*sglist
)
461 * If the driver previously mapped a shorter list, we could see a
462 * termination bit prematurely unless it fully inits the sg table
463 * on each mapping. We KNOW that there must be more entries here
464 * or the driver would be buggy, so force clear the termination bit
465 * to avoid doing a full sg_init_table() in drivers for each command.
471 static unsigned blk_bvec_map_sg(struct request_queue
*q
,
472 struct bio_vec
*bvec
, struct scatterlist
*sglist
,
473 struct scatterlist
**sg
)
475 unsigned nbytes
= bvec
->bv_len
;
476 unsigned nsegs
= 0, total
= 0;
479 unsigned offset
= bvec
->bv_offset
+ total
;
480 unsigned len
= get_max_segment_size(&q
->limits
,
481 bvec_phys(bvec
) + total
, nbytes
);
482 struct page
*page
= bvec
->bv_page
;
485 * Unfortunately a fair number of drivers barf on scatterlists
486 * that have an offset larger than PAGE_SIZE, despite other
487 * subsystems dealing with that invariant just fine. For now
488 * stick to the legacy format where we never present those from
489 * the block layer, but the code below should be removed once
490 * these offenders (mostly MMC/SD drivers) are fixed.
492 page
+= (offset
>> PAGE_SHIFT
);
493 offset
&= ~PAGE_MASK
;
495 *sg
= blk_next_sg(sg
, sglist
);
496 sg_set_page(*sg
, page
, len
, offset
);
506 static inline int __blk_bvec_map_sg(struct bio_vec bv
,
507 struct scatterlist
*sglist
, struct scatterlist
**sg
)
509 *sg
= blk_next_sg(sg
, sglist
);
510 sg_set_page(*sg
, bv
.bv_page
, bv
.bv_len
, bv
.bv_offset
);
514 /* only try to merge bvecs into one sg if they are from two bios */
516 __blk_segment_map_sg_merge(struct request_queue
*q
, struct bio_vec
*bvec
,
517 struct bio_vec
*bvprv
, struct scatterlist
**sg
)
520 int nbytes
= bvec
->bv_len
;
525 if ((*sg
)->length
+ nbytes
> queue_max_segment_size(q
))
528 if (!biovec_phys_mergeable(q
, bvprv
, bvec
))
531 (*sg
)->length
+= nbytes
;
536 static int __blk_bios_map_sg(struct request_queue
*q
, struct bio
*bio
,
537 struct scatterlist
*sglist
,
538 struct scatterlist
**sg
)
540 struct bio_vec bvec
, bvprv
= { NULL
};
541 struct bvec_iter iter
;
543 bool new_bio
= false;
546 bio_for_each_bvec(bvec
, bio
, iter
) {
548 * Only try to merge bvecs from two bios given we
549 * have done bio internal merge when adding pages
553 __blk_segment_map_sg_merge(q
, &bvec
, &bvprv
, sg
))
556 if (bvec
.bv_offset
+ bvec
.bv_len
<= PAGE_SIZE
)
557 nsegs
+= __blk_bvec_map_sg(bvec
, sglist
, sg
);
559 nsegs
+= blk_bvec_map_sg(q
, &bvec
, sglist
, sg
);
563 if (likely(bio
->bi_iter
.bi_size
)) {
573 * map a request to scatterlist, return number of sg entries setup. Caller
574 * must make sure sg can hold rq->nr_phys_segments entries
576 int __blk_rq_map_sg(struct request_queue
*q
, struct request
*rq
,
577 struct scatterlist
*sglist
, struct scatterlist
**last_sg
)
581 if (rq
->rq_flags
& RQF_SPECIAL_PAYLOAD
)
582 nsegs
= __blk_bvec_map_sg(rq
->special_vec
, sglist
, last_sg
);
584 nsegs
= __blk_bios_map_sg(q
, rq
->bio
, sglist
, last_sg
);
587 sg_mark_end(*last_sg
);
590 * Something must have been wrong if the figured number of
591 * segment is bigger than number of req's physical segments
593 WARN_ON(nsegs
> blk_rq_nr_phys_segments(rq
));
597 EXPORT_SYMBOL(__blk_rq_map_sg
);
599 static inline unsigned int blk_rq_get_max_sectors(struct request
*rq
,
602 struct request_queue
*q
= rq
->q
;
603 struct queue_limits
*lim
= &q
->limits
;
604 unsigned int max_sectors
, boundary_sectors
;
605 bool is_atomic
= rq
->cmd_flags
& REQ_ATOMIC
;
607 if (blk_rq_is_passthrough(rq
))
608 return q
->limits
.max_hw_sectors
;
610 boundary_sectors
= blk_boundary_sectors(lim
, is_atomic
);
611 max_sectors
= blk_queue_get_max_sectors(rq
);
613 if (!boundary_sectors
||
614 req_op(rq
) == REQ_OP_DISCARD
||
615 req_op(rq
) == REQ_OP_SECURE_ERASE
)
617 return min(max_sectors
,
618 blk_boundary_sectors_left(offset
, boundary_sectors
));
621 static inline int ll_new_hw_segment(struct request
*req
, struct bio
*bio
,
622 unsigned int nr_phys_segs
)
624 if (!blk_cgroup_mergeable(req
, bio
))
627 if (blk_integrity_merge_bio(req
->q
, req
, bio
) == false)
630 /* discard request merge won't add new segment */
631 if (req_op(req
) == REQ_OP_DISCARD
)
634 if (req
->nr_phys_segments
+ nr_phys_segs
> blk_rq_get_max_segments(req
))
638 * This will form the start of a new hw segment. Bump both
641 req
->nr_phys_segments
+= nr_phys_segs
;
642 if (bio_integrity(bio
))
643 req
->nr_integrity_segments
+= blk_rq_count_integrity_sg(req
->q
,
648 req_set_nomerge(req
->q
, req
);
652 int ll_back_merge_fn(struct request
*req
, struct bio
*bio
, unsigned int nr_segs
)
654 if (req_gap_back_merge(req
, bio
))
656 if (blk_integrity_rq(req
) &&
657 integrity_req_gap_back_merge(req
, bio
))
659 if (!bio_crypt_ctx_back_mergeable(req
, bio
))
661 if (blk_rq_sectors(req
) + bio_sectors(bio
) >
662 blk_rq_get_max_sectors(req
, blk_rq_pos(req
))) {
663 req_set_nomerge(req
->q
, req
);
667 return ll_new_hw_segment(req
, bio
, nr_segs
);
670 static int ll_front_merge_fn(struct request
*req
, struct bio
*bio
,
671 unsigned int nr_segs
)
673 if (req_gap_front_merge(req
, bio
))
675 if (blk_integrity_rq(req
) &&
676 integrity_req_gap_front_merge(req
, bio
))
678 if (!bio_crypt_ctx_front_mergeable(req
, bio
))
680 if (blk_rq_sectors(req
) + bio_sectors(bio
) >
681 blk_rq_get_max_sectors(req
, bio
->bi_iter
.bi_sector
)) {
682 req_set_nomerge(req
->q
, req
);
686 return ll_new_hw_segment(req
, bio
, nr_segs
);
689 static bool req_attempt_discard_merge(struct request_queue
*q
, struct request
*req
,
690 struct request
*next
)
692 unsigned short segments
= blk_rq_nr_discard_segments(req
);
694 if (segments
>= queue_max_discard_segments(q
))
696 if (blk_rq_sectors(req
) + bio_sectors(next
->bio
) >
697 blk_rq_get_max_sectors(req
, blk_rq_pos(req
)))
700 req
->nr_phys_segments
= segments
+ blk_rq_nr_discard_segments(next
);
703 req_set_nomerge(q
, req
);
707 static int ll_merge_requests_fn(struct request_queue
*q
, struct request
*req
,
708 struct request
*next
)
710 int total_phys_segments
;
712 if (req_gap_back_merge(req
, next
->bio
))
716 * Will it become too large?
718 if ((blk_rq_sectors(req
) + blk_rq_sectors(next
)) >
719 blk_rq_get_max_sectors(req
, blk_rq_pos(req
)))
722 total_phys_segments
= req
->nr_phys_segments
+ next
->nr_phys_segments
;
723 if (total_phys_segments
> blk_rq_get_max_segments(req
))
726 if (!blk_cgroup_mergeable(req
, next
->bio
))
729 if (blk_integrity_merge_rq(q
, req
, next
) == false)
732 if (!bio_crypt_ctx_merge_rq(req
, next
))
736 req
->nr_phys_segments
= total_phys_segments
;
737 req
->nr_integrity_segments
+= next
->nr_integrity_segments
;
742 * blk_rq_set_mixed_merge - mark a request as mixed merge
743 * @rq: request to mark as mixed merge
746 * @rq is about to be mixed merged. Make sure the attributes
747 * which can be mixed are set in each bio and mark @rq as mixed
750 static void blk_rq_set_mixed_merge(struct request
*rq
)
752 blk_opf_t ff
= rq
->cmd_flags
& REQ_FAILFAST_MASK
;
755 if (rq
->rq_flags
& RQF_MIXED_MERGE
)
759 * @rq will no longer represent mixable attributes for all the
760 * contained bios. It will just track those of the first one.
761 * Distributes the attributs to each bio.
763 for (bio
= rq
->bio
; bio
; bio
= bio
->bi_next
) {
764 WARN_ON_ONCE((bio
->bi_opf
& REQ_FAILFAST_MASK
) &&
765 (bio
->bi_opf
& REQ_FAILFAST_MASK
) != ff
);
768 rq
->rq_flags
|= RQF_MIXED_MERGE
;
771 static inline blk_opf_t
bio_failfast(const struct bio
*bio
)
773 if (bio
->bi_opf
& REQ_RAHEAD
)
774 return REQ_FAILFAST_MASK
;
776 return bio
->bi_opf
& REQ_FAILFAST_MASK
;
780 * After we are marked as MIXED_MERGE, any new RA bio has to be updated
781 * as failfast, and request's failfast has to be updated in case of
784 static inline void blk_update_mixed_merge(struct request
*req
,
785 struct bio
*bio
, bool front_merge
)
787 if (req
->rq_flags
& RQF_MIXED_MERGE
) {
788 if (bio
->bi_opf
& REQ_RAHEAD
)
789 bio
->bi_opf
|= REQ_FAILFAST_MASK
;
792 req
->cmd_flags
&= ~REQ_FAILFAST_MASK
;
793 req
->cmd_flags
|= bio
->bi_opf
& REQ_FAILFAST_MASK
;
798 static void blk_account_io_merge_request(struct request
*req
)
800 if (blk_do_io_stat(req
)) {
802 part_stat_inc(req
->part
, merges
[op_stat_group(req_op(req
))]);
803 part_stat_local_dec(req
->part
,
804 in_flight
[op_is_write(req_op(req
))]);
809 static enum elv_merge
blk_try_req_merge(struct request
*req
,
810 struct request
*next
)
812 if (blk_discard_mergable(req
))
813 return ELEVATOR_DISCARD_MERGE
;
814 else if (blk_rq_pos(req
) + blk_rq_sectors(req
) == blk_rq_pos(next
))
815 return ELEVATOR_BACK_MERGE
;
817 return ELEVATOR_NO_MERGE
;
820 static bool blk_atomic_write_mergeable_rq_bio(struct request
*rq
,
823 return (rq
->cmd_flags
& REQ_ATOMIC
) == (bio
->bi_opf
& REQ_ATOMIC
);
826 static bool blk_atomic_write_mergeable_rqs(struct request
*rq
,
827 struct request
*next
)
829 return (rq
->cmd_flags
& REQ_ATOMIC
) == (next
->cmd_flags
& REQ_ATOMIC
);
833 * For non-mq, this has to be called with the request spinlock acquired.
834 * For mq with scheduling, the appropriate queue wide lock should be held.
836 static struct request
*attempt_merge(struct request_queue
*q
,
837 struct request
*req
, struct request
*next
)
839 if (!rq_mergeable(req
) || !rq_mergeable(next
))
842 if (req_op(req
) != req_op(next
))
845 if (rq_data_dir(req
) != rq_data_dir(next
))
848 /* Don't merge requests with different write hints. */
849 if (req
->write_hint
!= next
->write_hint
)
852 if (req
->ioprio
!= next
->ioprio
)
855 if (!blk_atomic_write_mergeable_rqs(req
, next
))
859 * If we are allowed to merge, then append bio list
860 * from next to rq and release next. merge_requests_fn
861 * will have updated segment counts, update sector
862 * counts here. Handle DISCARDs separately, as they
863 * have separate settings.
866 switch (blk_try_req_merge(req
, next
)) {
867 case ELEVATOR_DISCARD_MERGE
:
868 if (!req_attempt_discard_merge(q
, req
, next
))
871 case ELEVATOR_BACK_MERGE
:
872 if (!ll_merge_requests_fn(q
, req
, next
))
880 * If failfast settings disagree or any of the two is already
881 * a mixed merge, mark both as mixed before proceeding. This
882 * makes sure that all involved bios have mixable attributes
885 if (((req
->rq_flags
| next
->rq_flags
) & RQF_MIXED_MERGE
) ||
886 (req
->cmd_flags
& REQ_FAILFAST_MASK
) !=
887 (next
->cmd_flags
& REQ_FAILFAST_MASK
)) {
888 blk_rq_set_mixed_merge(req
);
889 blk_rq_set_mixed_merge(next
);
893 * At this point we have either done a back merge or front merge. We
894 * need the smaller start_time_ns of the merged requests to be the
895 * current request for accounting purposes.
897 if (next
->start_time_ns
< req
->start_time_ns
)
898 req
->start_time_ns
= next
->start_time_ns
;
900 req
->biotail
->bi_next
= next
->bio
;
901 req
->biotail
= next
->biotail
;
903 req
->__data_len
+= blk_rq_bytes(next
);
905 if (!blk_discard_mergable(req
))
906 elv_merge_requests(q
, req
, next
);
908 blk_crypto_rq_put_keyslot(next
);
911 * 'next' is going away, so update stats accordingly
913 blk_account_io_merge_request(next
);
915 trace_block_rq_merge(next
);
918 * ownership of bio passed from next to req, return 'next' for
925 static struct request
*attempt_back_merge(struct request_queue
*q
,
928 struct request
*next
= elv_latter_request(q
, rq
);
931 return attempt_merge(q
, rq
, next
);
936 static struct request
*attempt_front_merge(struct request_queue
*q
,
939 struct request
*prev
= elv_former_request(q
, rq
);
942 return attempt_merge(q
, prev
, rq
);
948 * Try to merge 'next' into 'rq'. Return true if the merge happened, false
949 * otherwise. The caller is responsible for freeing 'next' if the merge
952 bool blk_attempt_req_merge(struct request_queue
*q
, struct request
*rq
,
953 struct request
*next
)
955 return attempt_merge(q
, rq
, next
);
958 bool blk_rq_merge_ok(struct request
*rq
, struct bio
*bio
)
960 if (!rq_mergeable(rq
) || !bio_mergeable(bio
))
963 if (req_op(rq
) != bio_op(bio
))
966 /* different data direction or already started, don't merge */
967 if (bio_data_dir(bio
) != rq_data_dir(rq
))
970 /* don't merge across cgroup boundaries */
971 if (!blk_cgroup_mergeable(rq
, bio
))
974 /* only merge integrity protected bio into ditto rq */
975 if (blk_integrity_merge_bio(rq
->q
, rq
, bio
) == false)
978 /* Only merge if the crypt contexts are compatible */
979 if (!bio_crypt_rq_ctx_compatible(rq
, bio
))
982 /* Don't merge requests with different write hints. */
983 if (rq
->write_hint
!= bio
->bi_write_hint
)
986 if (rq
->ioprio
!= bio_prio(bio
))
989 if (blk_atomic_write_mergeable_rq_bio(rq
, bio
) == false)
995 enum elv_merge
blk_try_merge(struct request
*rq
, struct bio
*bio
)
997 if (blk_discard_mergable(rq
))
998 return ELEVATOR_DISCARD_MERGE
;
999 else if (blk_rq_pos(rq
) + blk_rq_sectors(rq
) == bio
->bi_iter
.bi_sector
)
1000 return ELEVATOR_BACK_MERGE
;
1001 else if (blk_rq_pos(rq
) - bio_sectors(bio
) == bio
->bi_iter
.bi_sector
)
1002 return ELEVATOR_FRONT_MERGE
;
1003 return ELEVATOR_NO_MERGE
;
1006 static void blk_account_io_merge_bio(struct request
*req
)
1008 if (!blk_do_io_stat(req
))
1012 part_stat_inc(req
->part
, merges
[op_stat_group(req_op(req
))]);
1016 enum bio_merge_status
bio_attempt_back_merge(struct request
*req
,
1017 struct bio
*bio
, unsigned int nr_segs
)
1019 const blk_opf_t ff
= bio_failfast(bio
);
1021 if (!ll_back_merge_fn(req
, bio
, nr_segs
))
1022 return BIO_MERGE_FAILED
;
1024 trace_block_bio_backmerge(bio
);
1025 rq_qos_merge(req
->q
, req
, bio
);
1027 if ((req
->cmd_flags
& REQ_FAILFAST_MASK
) != ff
)
1028 blk_rq_set_mixed_merge(req
);
1030 blk_update_mixed_merge(req
, bio
, false);
1032 if (req
->rq_flags
& RQF_ZONE_WRITE_PLUGGING
)
1033 blk_zone_write_plug_bio_merged(bio
);
1035 req
->biotail
->bi_next
= bio
;
1037 req
->__data_len
+= bio
->bi_iter
.bi_size
;
1039 bio_crypt_free_ctx(bio
);
1041 blk_account_io_merge_bio(req
);
1042 return BIO_MERGE_OK
;
1045 static enum bio_merge_status
bio_attempt_front_merge(struct request
*req
,
1046 struct bio
*bio
, unsigned int nr_segs
)
1048 const blk_opf_t ff
= bio_failfast(bio
);
1051 * A front merge for writes to sequential zones of a zoned block device
1052 * can happen only if the user submitted writes out of order. Do not
1053 * merge such write to let it fail.
1055 if (req
->rq_flags
& RQF_ZONE_WRITE_PLUGGING
)
1056 return BIO_MERGE_FAILED
;
1058 if (!ll_front_merge_fn(req
, bio
, nr_segs
))
1059 return BIO_MERGE_FAILED
;
1061 trace_block_bio_frontmerge(bio
);
1062 rq_qos_merge(req
->q
, req
, bio
);
1064 if ((req
->cmd_flags
& REQ_FAILFAST_MASK
) != ff
)
1065 blk_rq_set_mixed_merge(req
);
1067 blk_update_mixed_merge(req
, bio
, true);
1069 bio
->bi_next
= req
->bio
;
1072 req
->__sector
= bio
->bi_iter
.bi_sector
;
1073 req
->__data_len
+= bio
->bi_iter
.bi_size
;
1075 bio_crypt_do_front_merge(req
, bio
);
1077 blk_account_io_merge_bio(req
);
1078 return BIO_MERGE_OK
;
1081 static enum bio_merge_status
bio_attempt_discard_merge(struct request_queue
*q
,
1082 struct request
*req
, struct bio
*bio
)
1084 unsigned short segments
= blk_rq_nr_discard_segments(req
);
1086 if (segments
>= queue_max_discard_segments(q
))
1088 if (blk_rq_sectors(req
) + bio_sectors(bio
) >
1089 blk_rq_get_max_sectors(req
, blk_rq_pos(req
)))
1092 rq_qos_merge(q
, req
, bio
);
1094 req
->biotail
->bi_next
= bio
;
1096 req
->__data_len
+= bio
->bi_iter
.bi_size
;
1097 req
->nr_phys_segments
= segments
+ 1;
1099 blk_account_io_merge_bio(req
);
1100 return BIO_MERGE_OK
;
1102 req_set_nomerge(q
, req
);
1103 return BIO_MERGE_FAILED
;
1106 static enum bio_merge_status
blk_attempt_bio_merge(struct request_queue
*q
,
1109 unsigned int nr_segs
,
1110 bool sched_allow_merge
)
1112 if (!blk_rq_merge_ok(rq
, bio
))
1113 return BIO_MERGE_NONE
;
1115 switch (blk_try_merge(rq
, bio
)) {
1116 case ELEVATOR_BACK_MERGE
:
1117 if (!sched_allow_merge
|| blk_mq_sched_allow_merge(q
, rq
, bio
))
1118 return bio_attempt_back_merge(rq
, bio
, nr_segs
);
1120 case ELEVATOR_FRONT_MERGE
:
1121 if (!sched_allow_merge
|| blk_mq_sched_allow_merge(q
, rq
, bio
))
1122 return bio_attempt_front_merge(rq
, bio
, nr_segs
);
1124 case ELEVATOR_DISCARD_MERGE
:
1125 return bio_attempt_discard_merge(q
, rq
, bio
);
1127 return BIO_MERGE_NONE
;
1130 return BIO_MERGE_FAILED
;
1134 * blk_attempt_plug_merge - try to merge with %current's plugged list
1135 * @q: request_queue new bio is being queued at
1136 * @bio: new bio being queued
1137 * @nr_segs: number of segments in @bio
1138 * from the passed in @q already in the plug list
1140 * Determine whether @bio being queued on @q can be merged with the previous
1141 * request on %current's plugged list. Returns %true if merge was successful,
1144 * Plugging coalesces IOs from the same issuer for the same purpose without
1145 * going through @q->queue_lock. As such it's more of an issuing mechanism
1146 * than scheduling, and the request, while may have elvpriv data, is not
1147 * added on the elevator at this point. In addition, we don't have
1148 * reliable access to the elevator outside queue lock. Only check basic
1149 * merging parameters without querying the elevator.
1151 * Caller must ensure !blk_queue_nomerges(q) beforehand.
1153 bool blk_attempt_plug_merge(struct request_queue
*q
, struct bio
*bio
,
1154 unsigned int nr_segs
)
1156 struct blk_plug
*plug
= current
->plug
;
1159 if (!plug
|| rq_list_empty(plug
->mq_list
))
1162 rq_list_for_each(&plug
->mq_list
, rq
) {
1164 if (blk_attempt_bio_merge(q
, rq
, bio
, nr_segs
, false) ==
1171 * Only keep iterating plug list for merges if we have multiple
1174 if (!plug
->multiple_queues
)
1181 * Iterate list of requests and see if we can merge this bio with any
1184 bool blk_bio_list_merge(struct request_queue
*q
, struct list_head
*list
,
1185 struct bio
*bio
, unsigned int nr_segs
)
1190 list_for_each_entry_reverse(rq
, list
, queuelist
) {
1194 switch (blk_attempt_bio_merge(q
, rq
, bio
, nr_segs
, true)) {
1195 case BIO_MERGE_NONE
:
1199 case BIO_MERGE_FAILED
:
1207 EXPORT_SYMBOL_GPL(blk_bio_list_merge
);
1209 bool blk_mq_sched_try_merge(struct request_queue
*q
, struct bio
*bio
,
1210 unsigned int nr_segs
, struct request
**merged_request
)
1214 switch (elv_merge(q
, &rq
, bio
)) {
1215 case ELEVATOR_BACK_MERGE
:
1216 if (!blk_mq_sched_allow_merge(q
, rq
, bio
))
1218 if (bio_attempt_back_merge(rq
, bio
, nr_segs
) != BIO_MERGE_OK
)
1220 *merged_request
= attempt_back_merge(q
, rq
);
1221 if (!*merged_request
)
1222 elv_merged_request(q
, rq
, ELEVATOR_BACK_MERGE
);
1224 case ELEVATOR_FRONT_MERGE
:
1225 if (!blk_mq_sched_allow_merge(q
, rq
, bio
))
1227 if (bio_attempt_front_merge(rq
, bio
, nr_segs
) != BIO_MERGE_OK
)
1229 *merged_request
= attempt_front_merge(q
, rq
);
1230 if (!*merged_request
)
1231 elv_merged_request(q
, rq
, ELEVATOR_FRONT_MERGE
);
1233 case ELEVATOR_DISCARD_MERGE
:
1234 return bio_attempt_discard_merge(q
, rq
, bio
) == BIO_MERGE_OK
;
1239 EXPORT_SYMBOL_GPL(blk_mq_sched_try_merge
);