1 // SPDX-License-Identifier: GPL-2.0
2 /* bounce buffer handling for block devices
4 * - Split from highmem.c
7 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
10 #include <linux/export.h>
11 #include <linux/swap.h>
12 #include <linux/gfp.h>
13 #include <linux/bio.h>
14 #include <linux/pagemap.h>
15 #include <linux/mempool.h>
16 #include <linux/blkdev.h>
17 #include <linux/backing-dev.h>
18 #include <linux/init.h>
19 #include <linux/hash.h>
20 #include <linux/highmem.h>
21 #include <linux/memblock.h>
22 #include <linux/printk.h>
23 #include <asm/tlbflush.h>
25 #include <trace/events/block.h>
29 #define ISA_POOL_SIZE 16
31 static struct bio_set bounce_bio_set
, bounce_bio_split
;
32 static mempool_t page_pool
, isa_page_pool
;
34 static void init_bounce_bioset(void)
36 static bool bounce_bs_setup
;
42 ret
= bioset_init(&bounce_bio_set
, BIO_POOL_SIZE
, 0, BIOSET_NEED_BVECS
);
44 if (bioset_integrity_create(&bounce_bio_set
, BIO_POOL_SIZE
))
47 ret
= bioset_init(&bounce_bio_split
, BIO_POOL_SIZE
, 0, 0);
49 bounce_bs_setup
= true;
52 #if defined(CONFIG_HIGHMEM)
53 static __init
int init_emergency_pool(void)
56 #if defined(CONFIG_HIGHMEM) && !defined(CONFIG_MEMORY_HOTPLUG)
57 if (max_pfn
<= max_low_pfn
)
61 ret
= mempool_init_page_pool(&page_pool
, POOL_SIZE
, 0);
63 pr_info("pool size: %d pages\n", POOL_SIZE
);
69 __initcall(init_emergency_pool
);
74 * highmem version, map in to vec
76 static void bounce_copy_vec(struct bio_vec
*to
, unsigned char *vfrom
)
80 vto
= kmap_atomic(to
->bv_page
);
81 memcpy(vto
+ to
->bv_offset
, vfrom
, to
->bv_len
);
85 #else /* CONFIG_HIGHMEM */
87 #define bounce_copy_vec(to, vfrom) \
88 memcpy(page_address((to)->bv_page) + (to)->bv_offset, vfrom, (to)->bv_len)
90 #endif /* CONFIG_HIGHMEM */
93 * allocate pages in the DMA region for the ISA pool
95 static void *mempool_alloc_pages_isa(gfp_t gfp_mask
, void *data
)
97 return mempool_alloc_pages(gfp_mask
| GFP_DMA
, data
);
100 static DEFINE_MUTEX(isa_mutex
);
103 * gets called "every" time someone init's a queue with BLK_BOUNCE_ISA
104 * as the max address, so check if the pool has already been created.
106 int init_emergency_isa_pool(void)
110 mutex_lock(&isa_mutex
);
112 if (mempool_initialized(&isa_page_pool
)) {
113 mutex_unlock(&isa_mutex
);
117 ret
= mempool_init(&isa_page_pool
, ISA_POOL_SIZE
, mempool_alloc_pages_isa
,
118 mempool_free_pages
, (void *) 0);
121 pr_info("isa pool size: %d pages\n", ISA_POOL_SIZE
);
122 init_bounce_bioset();
123 mutex_unlock(&isa_mutex
);
128 * Simple bounce buffer support for highmem pages. Depending on the
129 * queue gfp mask set, *to may or may not be a highmem page. kmap it
130 * always, it will do the Right Thing
132 static void copy_to_high_bio_irq(struct bio
*to
, struct bio
*from
)
134 unsigned char *vfrom
;
135 struct bio_vec tovec
, fromvec
;
136 struct bvec_iter iter
;
138 * The bio of @from is created by bounce, so we can iterate
139 * its bvec from start to end, but the @from->bi_iter can't be
140 * trusted because it might be changed by splitting.
142 struct bvec_iter from_iter
= BVEC_ITER_ALL_INIT
;
144 bio_for_each_segment(tovec
, to
, iter
) {
145 fromvec
= bio_iter_iovec(from
, from_iter
);
146 if (tovec
.bv_page
!= fromvec
.bv_page
) {
148 * fromvec->bv_offset and fromvec->bv_len might have
149 * been modified by the block layer, so use the original
150 * copy, bounce_copy_vec already uses tovec->bv_len
152 vfrom
= page_address(fromvec
.bv_page
) +
155 bounce_copy_vec(&tovec
, vfrom
);
156 flush_dcache_page(tovec
.bv_page
);
158 bio_advance_iter(from
, &from_iter
, tovec
.bv_len
);
162 static void bounce_end_io(struct bio
*bio
, mempool_t
*pool
)
164 struct bio
*bio_orig
= bio
->bi_private
;
165 struct bio_vec
*bvec
, orig_vec
;
167 struct bvec_iter orig_iter
= bio_orig
->bi_iter
;
168 struct bvec_iter_all iter_all
;
171 * free up bounce indirect pages used
173 bio_for_each_segment_all(bvec
, bio
, i
, iter_all
) {
174 orig_vec
= bio_iter_iovec(bio_orig
, orig_iter
);
175 if (bvec
->bv_page
!= orig_vec
.bv_page
) {
176 dec_zone_page_state(bvec
->bv_page
, NR_BOUNCE
);
177 mempool_free(bvec
->bv_page
, pool
);
179 bio_advance_iter(bio_orig
, &orig_iter
, orig_vec
.bv_len
);
182 bio_orig
->bi_status
= bio
->bi_status
;
187 static void bounce_end_io_write(struct bio
*bio
)
189 bounce_end_io(bio
, &page_pool
);
192 static void bounce_end_io_write_isa(struct bio
*bio
)
195 bounce_end_io(bio
, &isa_page_pool
);
198 static void __bounce_end_io_read(struct bio
*bio
, mempool_t
*pool
)
200 struct bio
*bio_orig
= bio
->bi_private
;
203 copy_to_high_bio_irq(bio_orig
, bio
);
205 bounce_end_io(bio
, pool
);
208 static void bounce_end_io_read(struct bio
*bio
)
210 __bounce_end_io_read(bio
, &page_pool
);
213 static void bounce_end_io_read_isa(struct bio
*bio
)
215 __bounce_end_io_read(bio
, &isa_page_pool
);
218 static struct bio
*bounce_clone_bio(struct bio
*bio_src
, gfp_t gfp_mask
,
221 struct bvec_iter iter
;
226 * Pre immutable biovecs, __bio_clone() used to just do a memcpy from
227 * bio_src->bi_io_vec to bio->bi_io_vec.
229 * We can't do that anymore, because:
231 * - The point of cloning the biovec is to produce a bio with a biovec
232 * the caller can modify: bi_idx and bi_bvec_done should be 0.
234 * - The original bio could've had more than BIO_MAX_PAGES biovecs; if
235 * we tried to clone the whole thing bio_alloc_bioset() would fail.
236 * But the clone should succeed as long as the number of biovecs we
237 * actually need to allocate is fewer than BIO_MAX_PAGES.
239 * - Lastly, bi_vcnt should not be looked at or relied upon by code
240 * that does not own the bio - reason being drivers don't use it for
241 * iterating over the biovec anymore, so expecting it to be kept up
242 * to date (i.e. for clones that share the parent biovec) is just
243 * asking for trouble and would force extra work on
244 * __bio_clone_fast() anyways.
247 bio
= bio_alloc_bioset(gfp_mask
, bio_segments(bio_src
), bs
);
250 bio
->bi_disk
= bio_src
->bi_disk
;
251 bio
->bi_opf
= bio_src
->bi_opf
;
252 bio
->bi_ioprio
= bio_src
->bi_ioprio
;
253 bio
->bi_write_hint
= bio_src
->bi_write_hint
;
254 bio
->bi_iter
.bi_sector
= bio_src
->bi_iter
.bi_sector
;
255 bio
->bi_iter
.bi_size
= bio_src
->bi_iter
.bi_size
;
257 switch (bio_op(bio
)) {
259 case REQ_OP_SECURE_ERASE
:
260 case REQ_OP_WRITE_ZEROES
:
262 case REQ_OP_WRITE_SAME
:
263 bio
->bi_io_vec
[bio
->bi_vcnt
++] = bio_src
->bi_io_vec
[0];
266 bio_for_each_segment(bv
, bio_src
, iter
)
267 bio
->bi_io_vec
[bio
->bi_vcnt
++] = bv
;
271 if (bio_integrity(bio_src
)) {
274 ret
= bio_integrity_clone(bio
, bio_src
, gfp_mask
);
281 bio_clone_blkg_association(bio
, bio_src
);
282 blkcg_bio_issue_init(bio
);
287 static void __blk_queue_bounce(struct request_queue
*q
, struct bio
**bio_orig
,
291 int rw
= bio_data_dir(*bio_orig
);
292 struct bio_vec
*to
, from
;
293 struct bvec_iter iter
;
297 bool passthrough
= bio_is_passthrough(*bio_orig
);
299 bio_for_each_segment(from
, *bio_orig
, iter
) {
300 if (i
++ < BIO_MAX_PAGES
)
301 sectors
+= from
.bv_len
>> 9;
302 if (page_to_pfn(from
.bv_page
) > q
->limits
.bounce_pfn
)
308 if (!passthrough
&& sectors
< bio_sectors(*bio_orig
)) {
309 bio
= bio_split(*bio_orig
, sectors
, GFP_NOIO
, &bounce_bio_split
);
310 bio_chain(bio
, *bio_orig
);
311 generic_make_request(*bio_orig
);
314 bio
= bounce_clone_bio(*bio_orig
, GFP_NOIO
, passthrough
? NULL
:
318 * Bvec table can't be updated by bio_for_each_segment_all(),
319 * so retrieve bvec from the table directly. This way is safe
320 * because the 'bio' is single-page bvec.
322 for (i
= 0, to
= bio
->bi_io_vec
; i
< bio
->bi_vcnt
; to
++, i
++) {
323 struct page
*page
= to
->bv_page
;
325 if (page_to_pfn(page
) <= q
->limits
.bounce_pfn
)
328 to
->bv_page
= mempool_alloc(pool
, q
->bounce_gfp
);
329 inc_zone_page_state(to
->bv_page
, NR_BOUNCE
);
334 flush_dcache_page(page
);
336 vto
= page_address(to
->bv_page
) + to
->bv_offset
;
337 vfrom
= kmap_atomic(page
) + to
->bv_offset
;
338 memcpy(vto
, vfrom
, to
->bv_len
);
339 kunmap_atomic(vfrom
);
343 trace_block_bio_bounce(q
, *bio_orig
);
345 bio
->bi_flags
|= (1 << BIO_BOUNCED
);
347 if (pool
== &page_pool
) {
348 bio
->bi_end_io
= bounce_end_io_write
;
350 bio
->bi_end_io
= bounce_end_io_read
;
352 bio
->bi_end_io
= bounce_end_io_write_isa
;
354 bio
->bi_end_io
= bounce_end_io_read_isa
;
357 bio
->bi_private
= *bio_orig
;
361 void blk_queue_bounce(struct request_queue
*q
, struct bio
**bio_orig
)
366 * Data-less bio, nothing to bounce
368 if (!bio_has_data(*bio_orig
))
372 * for non-isa bounce case, just check if the bounce pfn is equal
373 * to or bigger than the highest pfn in the system -- in that case,
374 * don't waste time iterating over bio segments
376 if (!(q
->bounce_gfp
& GFP_DMA
)) {
377 if (q
->limits
.bounce_pfn
>= blk_max_pfn
)
381 BUG_ON(!mempool_initialized(&isa_page_pool
));
382 pool
= &isa_page_pool
;
388 __blk_queue_bounce(q
, bio_orig
, pool
);