1 // SPDX-License-Identifier: GPL-2.0
3 * bio-integrity.c - bio data integrity extensions
5 * Copyright (C) 2007, 2008, 2009 Oracle Corporation
6 * Written by: Martin K. Petersen <martin.petersen@oracle.com>
9 #include <linux/blk-integrity.h>
10 #include <linux/mempool.h>
11 #include <linux/export.h>
12 #include <linux/bio.h>
13 #include <linux/workqueue.h>
14 #include <linux/slab.h>
17 static struct kmem_cache
*bip_slab
;
18 static struct workqueue_struct
*kintegrityd_wq
;
20 void blk_flush_integrity(void)
22 flush_workqueue(kintegrityd_wq
);
26 * bio_integrity_free - Free bio integrity payload
27 * @bio: bio containing bip to be freed
29 * Description: Free the integrity portion of a bio.
31 void bio_integrity_free(struct bio
*bio
)
33 struct bio_integrity_payload
*bip
= bio_integrity(bio
);
34 struct bio_set
*bs
= bio
->bi_pool
;
36 if (bs
&& mempool_initialized(&bs
->bio_integrity_pool
)) {
38 bvec_free(&bs
->bvec_integrity_pool
, bip
->bip_vec
,
40 mempool_free(bip
, &bs
->bio_integrity_pool
);
44 bio
->bi_integrity
= NULL
;
45 bio
->bi_opf
&= ~REQ_INTEGRITY
;
49 * bio_integrity_alloc - Allocate integrity payload and attach it to bio
50 * @bio: bio to attach integrity metadata to
51 * @gfp_mask: Memory allocation mask
52 * @nr_vecs: Number of integrity metadata scatter-gather elements
54 * Description: This function prepares a bio for attaching integrity
55 * metadata. nr_vecs specifies the maximum number of pages containing
56 * integrity metadata that can be attached.
58 struct bio_integrity_payload
*bio_integrity_alloc(struct bio
*bio
,
62 struct bio_integrity_payload
*bip
;
63 struct bio_set
*bs
= bio
->bi_pool
;
66 if (WARN_ON_ONCE(bio_has_crypt_ctx(bio
)))
67 return ERR_PTR(-EOPNOTSUPP
);
69 if (!bs
|| !mempool_initialized(&bs
->bio_integrity_pool
)) {
70 bip
= kmalloc(struct_size(bip
, bip_inline_vecs
, nr_vecs
), gfp_mask
);
71 inline_vecs
= nr_vecs
;
73 bip
= mempool_alloc(&bs
->bio_integrity_pool
, gfp_mask
);
74 inline_vecs
= BIO_INLINE_VECS
;
78 return ERR_PTR(-ENOMEM
);
80 memset(bip
, 0, sizeof(*bip
));
82 /* always report as many vecs as asked explicitly, not inline vecs */
83 bip
->bip_max_vcnt
= nr_vecs
;
84 if (nr_vecs
> inline_vecs
) {
85 bip
->bip_vec
= bvec_alloc(&bs
->bvec_integrity_pool
,
86 &bip
->bip_max_vcnt
, gfp_mask
);
90 bip
->bip_vec
= bip
->bip_inline_vecs
;
94 bio
->bi_integrity
= bip
;
95 bio
->bi_opf
|= REQ_INTEGRITY
;
99 if (bs
&& mempool_initialized(&bs
->bio_integrity_pool
))
100 mempool_free(bip
, &bs
->bio_integrity_pool
);
103 return ERR_PTR(-ENOMEM
);
105 EXPORT_SYMBOL(bio_integrity_alloc
);
107 static void bio_integrity_unpin_bvec(struct bio_vec
*bv
, int nr_vecs
,
112 for (i
= 0; i
< nr_vecs
; i
++) {
113 if (dirty
&& !PageCompound(bv
[i
].bv_page
))
114 set_page_dirty_lock(bv
[i
].bv_page
);
115 unpin_user_page(bv
[i
].bv_page
);
119 static void bio_integrity_uncopy_user(struct bio_integrity_payload
*bip
)
121 unsigned short nr_vecs
= bip
->bip_max_vcnt
- 1;
122 struct bio_vec
*copy
= &bip
->bip_vec
[1];
123 size_t bytes
= bip
->bip_iter
.bi_size
;
124 struct iov_iter iter
;
127 iov_iter_bvec(&iter
, ITER_DEST
, copy
, nr_vecs
, bytes
);
128 ret
= copy_to_iter(bvec_virt(bip
->bip_vec
), bytes
, &iter
);
129 WARN_ON_ONCE(ret
!= bytes
);
131 bio_integrity_unpin_bvec(copy
, nr_vecs
, true);
135 * bio_integrity_unmap_user - Unmap user integrity payload
136 * @bio: bio containing bip to be unmapped
138 * Unmap the user mapped integrity portion of a bio.
140 void bio_integrity_unmap_user(struct bio
*bio
)
142 struct bio_integrity_payload
*bip
= bio_integrity(bio
);
144 if (bip
->bip_flags
& BIP_COPY_USER
) {
145 if (bio_data_dir(bio
) == READ
)
146 bio_integrity_uncopy_user(bip
);
147 kfree(bvec_virt(bip
->bip_vec
));
151 bio_integrity_unpin_bvec(bip
->bip_vec
, bip
->bip_max_vcnt
,
152 bio_data_dir(bio
) == READ
);
156 * bio_integrity_add_page - Attach integrity metadata
157 * @bio: bio to update
158 * @page: page containing integrity metadata
159 * @len: number of bytes of integrity metadata in page
160 * @offset: start offset within page
162 * Description: Attach a page containing integrity metadata to bio.
164 int bio_integrity_add_page(struct bio
*bio
, struct page
*page
,
165 unsigned int len
, unsigned int offset
)
167 struct request_queue
*q
= bdev_get_queue(bio
->bi_bdev
);
168 struct bio_integrity_payload
*bip
= bio_integrity(bio
);
170 if (bip
->bip_vcnt
> 0) {
171 struct bio_vec
*bv
= &bip
->bip_vec
[bip
->bip_vcnt
- 1];
172 bool same_page
= false;
174 if (bvec_try_merge_hw_page(q
, bv
, page
, len
, offset
,
176 bip
->bip_iter
.bi_size
+= len
;
181 min(bip
->bip_max_vcnt
, queue_max_integrity_segments(q
)))
185 * If the queue doesn't support SG gaps and adding this segment
186 * would create a gap, disallow it.
188 if (bvec_gap_to_prev(&q
->limits
, bv
, offset
))
192 bvec_set_page(&bip
->bip_vec
[bip
->bip_vcnt
], page
, len
, offset
);
194 bip
->bip_iter
.bi_size
+= len
;
198 EXPORT_SYMBOL(bio_integrity_add_page
);
200 static int bio_integrity_copy_user(struct bio
*bio
, struct bio_vec
*bvec
,
201 int nr_vecs
, unsigned int len
,
202 unsigned int direction
)
204 bool write
= direction
== ITER_SOURCE
;
205 struct bio_integrity_payload
*bip
;
206 struct iov_iter iter
;
210 buf
= kmalloc(len
, GFP_KERNEL
);
215 iov_iter_bvec(&iter
, direction
, bvec
, nr_vecs
, len
);
216 if (!copy_from_iter_full(buf
, len
, &iter
)) {
221 bip
= bio_integrity_alloc(bio
, GFP_KERNEL
, 1);
226 * We need to preserve the original bvec and the number of vecs
227 * in it for completion handling
229 bip
= bio_integrity_alloc(bio
, GFP_KERNEL
, nr_vecs
+ 1);
238 bio_integrity_unpin_bvec(bvec
, nr_vecs
, false);
240 memcpy(&bip
->bip_vec
[1], bvec
, nr_vecs
* sizeof(*bvec
));
242 ret
= bio_integrity_add_page(bio
, virt_to_page(buf
), len
,
243 offset_in_page(buf
));
249 bip
->bip_flags
|= BIP_COPY_USER
;
250 bip
->bip_vcnt
= nr_vecs
;
253 bio_integrity_free(bio
);
259 static int bio_integrity_init_user(struct bio
*bio
, struct bio_vec
*bvec
,
260 int nr_vecs
, unsigned int len
)
262 struct bio_integrity_payload
*bip
;
264 bip
= bio_integrity_alloc(bio
, GFP_KERNEL
, nr_vecs
);
268 memcpy(bip
->bip_vec
, bvec
, nr_vecs
* sizeof(*bvec
));
269 bip
->bip_iter
.bi_size
= len
;
270 bip
->bip_vcnt
= nr_vecs
;
274 static unsigned int bvec_from_pages(struct bio_vec
*bvec
, struct page
**pages
,
275 int nr_vecs
, ssize_t bytes
, ssize_t offset
)
277 unsigned int nr_bvecs
= 0;
280 for (i
= 0; i
< nr_vecs
; i
= j
) {
281 size_t size
= min_t(size_t, bytes
, PAGE_SIZE
- offset
);
282 struct folio
*folio
= page_folio(pages
[i
]);
285 for (j
= i
+ 1; j
< nr_vecs
; j
++) {
286 size_t next
= min_t(size_t, PAGE_SIZE
, bytes
);
288 if (page_folio(pages
[j
]) != folio
||
289 pages
[j
] != pages
[j
- 1] + 1)
291 unpin_user_page(pages
[j
]);
296 bvec_set_page(&bvec
[nr_bvecs
], pages
[i
], size
, offset
);
304 int bio_integrity_map_user(struct bio
*bio
, void __user
*ubuf
, ssize_t bytes
)
306 struct request_queue
*q
= bdev_get_queue(bio
->bi_bdev
);
307 unsigned int align
= blk_lim_dma_alignment_and_pad(&q
->limits
);
308 struct page
*stack_pages
[UIO_FASTIOV
], **pages
= stack_pages
;
309 struct bio_vec stack_vec
[UIO_FASTIOV
], *bvec
= stack_vec
;
310 unsigned int direction
, nr_bvecs
;
311 struct iov_iter iter
;
316 if (bio_integrity(bio
))
318 if (bytes
>> SECTOR_SHIFT
> queue_max_hw_sectors(q
))
321 if (bio_data_dir(bio
) == READ
)
322 direction
= ITER_DEST
;
324 direction
= ITER_SOURCE
;
326 iov_iter_ubuf(&iter
, direction
, ubuf
, bytes
);
327 nr_vecs
= iov_iter_npages(&iter
, BIO_MAX_VECS
+ 1);
328 if (nr_vecs
> BIO_MAX_VECS
)
330 if (nr_vecs
> UIO_FASTIOV
) {
331 bvec
= kcalloc(nr_vecs
, sizeof(*bvec
), GFP_KERNEL
);
337 copy
= !iov_iter_is_aligned(&iter
, align
, align
);
338 ret
= iov_iter_extract_pages(&iter
, &pages
, bytes
, nr_vecs
, 0, &offset
);
339 if (unlikely(ret
< 0))
342 nr_bvecs
= bvec_from_pages(bvec
, pages
, nr_vecs
, bytes
, offset
);
343 if (pages
!= stack_pages
)
345 if (nr_bvecs
> queue_max_integrity_segments(q
))
349 ret
= bio_integrity_copy_user(bio
, bvec
, nr_bvecs
, bytes
,
352 ret
= bio_integrity_init_user(bio
, bvec
, nr_bvecs
, bytes
);
355 if (bvec
!= stack_vec
)
361 bio_integrity_unpin_bvec(bvec
, nr_bvecs
, false);
363 if (bvec
!= stack_vec
)
369 * bio_integrity_prep - Prepare bio for integrity I/O
370 * @bio: bio to prepare
372 * Description: Checks if the bio already has an integrity payload attached.
373 * If it does, the payload has been generated by another kernel subsystem,
374 * and we just pass it through. Otherwise allocates integrity payload.
375 * The bio must have data direction, target device and start sector set priot
376 * to calling. In the WRITE case, integrity metadata will be generated using
377 * the block device's integrity function. In the READ case, the buffer
378 * will be prepared for DMA and a suitable end_io handler set up.
380 bool bio_integrity_prep(struct bio
*bio
)
382 struct bio_integrity_payload
*bip
;
383 struct blk_integrity
*bi
= blk_get_integrity(bio
->bi_bdev
->bd_disk
);
386 gfp_t gfp
= GFP_NOIO
;
391 if (!bio_sectors(bio
))
394 /* Already protected? */
395 if (bio_integrity(bio
))
398 switch (bio_op(bio
)) {
400 if (bi
->flags
& BLK_INTEGRITY_NOVERIFY
)
404 if (bi
->flags
& BLK_INTEGRITY_NOGENERATE
)
408 * Zero the memory allocated to not leak uninitialized kernel
409 * memory to disk for non-integrity metadata where nothing else
410 * initializes the memory.
412 if (bi
->csum_type
== BLK_INTEGRITY_CSUM_NONE
)
419 /* Allocate kernel buffer for protection data */
420 len
= bio_integrity_bytes(bi
, bio_sectors(bio
));
421 buf
= kmalloc(len
, gfp
);
422 if (unlikely(buf
== NULL
)) {
426 bip
= bio_integrity_alloc(bio
, GFP_NOIO
, 1);
432 bip
->bip_flags
|= BIP_BLOCK_INTEGRITY
;
433 bip_set_seed(bip
, bio
->bi_iter
.bi_sector
);
435 if (bi
->csum_type
== BLK_INTEGRITY_CSUM_IP
)
436 bip
->bip_flags
|= BIP_IP_CHECKSUM
;
438 if (bio_integrity_add_page(bio
, virt_to_page(buf
), len
,
439 offset_in_page(buf
)) < len
) {
440 printk(KERN_ERR
"could not attach integrity payload\n");
444 /* Auto-generate integrity metadata if this is a write */
445 if (bio_data_dir(bio
) == WRITE
)
446 blk_integrity_generate(bio
);
448 bip
->bio_iter
= bio
->bi_iter
;
452 bio
->bi_status
= BLK_STS_RESOURCE
;
456 EXPORT_SYMBOL(bio_integrity_prep
);
459 * bio_integrity_verify_fn - Integrity I/O completion worker
460 * @work: Work struct stored in bio to be verified
462 * Description: This workqueue function is called to complete a READ
463 * request. The function verifies the transferred integrity metadata
464 * and then calls the original bio end_io function.
466 static void bio_integrity_verify_fn(struct work_struct
*work
)
468 struct bio_integrity_payload
*bip
=
469 container_of(work
, struct bio_integrity_payload
, bip_work
);
470 struct bio
*bio
= bip
->bip_bio
;
472 blk_integrity_verify(bio
);
474 kfree(bvec_virt(bip
->bip_vec
));
475 bio_integrity_free(bio
);
480 * __bio_integrity_endio - Integrity I/O completion function
481 * @bio: Protected bio
483 * Description: Completion for integrity I/O
485 * Normally I/O completion is done in interrupt context. However,
486 * verifying I/O integrity is a time-consuming task which must be run
487 * in process context. This function postpones completion
490 bool __bio_integrity_endio(struct bio
*bio
)
492 struct blk_integrity
*bi
= blk_get_integrity(bio
->bi_bdev
->bd_disk
);
493 struct bio_integrity_payload
*bip
= bio_integrity(bio
);
495 if (bio_op(bio
) == REQ_OP_READ
&& !bio
->bi_status
&& bi
->csum_type
) {
496 INIT_WORK(&bip
->bip_work
, bio_integrity_verify_fn
);
497 queue_work(kintegrityd_wq
, &bip
->bip_work
);
501 kfree(bvec_virt(bip
->bip_vec
));
502 bio_integrity_free(bio
);
507 * bio_integrity_advance - Advance integrity vector
508 * @bio: bio whose integrity vector to update
509 * @bytes_done: number of data bytes that have been completed
511 * Description: This function calculates how many integrity bytes the
512 * number of completed data bytes correspond to and advances the
513 * integrity vector accordingly.
515 void bio_integrity_advance(struct bio
*bio
, unsigned int bytes_done
)
517 struct bio_integrity_payload
*bip
= bio_integrity(bio
);
518 struct blk_integrity
*bi
= blk_get_integrity(bio
->bi_bdev
->bd_disk
);
519 unsigned bytes
= bio_integrity_bytes(bi
, bytes_done
>> 9);
521 bip
->bip_iter
.bi_sector
+= bio_integrity_intervals(bi
, bytes_done
>> 9);
522 bvec_iter_advance(bip
->bip_vec
, &bip
->bip_iter
, bytes
);
526 * bio_integrity_trim - Trim integrity vector
527 * @bio: bio whose integrity vector to update
529 * Description: Used to trim the integrity vector in a cloned bio.
531 void bio_integrity_trim(struct bio
*bio
)
533 struct bio_integrity_payload
*bip
= bio_integrity(bio
);
534 struct blk_integrity
*bi
= blk_get_integrity(bio
->bi_bdev
->bd_disk
);
536 bip
->bip_iter
.bi_size
= bio_integrity_bytes(bi
, bio_sectors(bio
));
538 EXPORT_SYMBOL(bio_integrity_trim
);
541 * bio_integrity_clone - Callback for cloning bios with integrity metadata
543 * @bio_src: Original bio
544 * @gfp_mask: Memory allocation mask
546 * Description: Called to allocate a bip when cloning a bio
548 int bio_integrity_clone(struct bio
*bio
, struct bio
*bio_src
,
551 struct bio_integrity_payload
*bip_src
= bio_integrity(bio_src
);
552 struct bio_integrity_payload
*bip
;
554 BUG_ON(bip_src
== NULL
);
556 bip
= bio_integrity_alloc(bio
, gfp_mask
, 0);
560 bip
->bip_vec
= bip_src
->bip_vec
;
561 bip
->bip_iter
= bip_src
->bip_iter
;
562 bip
->bip_flags
= bip_src
->bip_flags
& ~BIP_BLOCK_INTEGRITY
;
567 int bioset_integrity_create(struct bio_set
*bs
, int pool_size
)
569 if (mempool_initialized(&bs
->bio_integrity_pool
))
572 if (mempool_init_slab_pool(&bs
->bio_integrity_pool
,
573 pool_size
, bip_slab
))
576 if (biovec_init_pool(&bs
->bvec_integrity_pool
, pool_size
)) {
577 mempool_exit(&bs
->bio_integrity_pool
);
583 EXPORT_SYMBOL(bioset_integrity_create
);
585 void bioset_integrity_free(struct bio_set
*bs
)
587 mempool_exit(&bs
->bio_integrity_pool
);
588 mempool_exit(&bs
->bvec_integrity_pool
);
591 void __init
bio_integrity_init(void)
594 * kintegrityd won't block much but may burn a lot of CPU cycles.
595 * Make it highpri CPU intensive wq with max concurrency of 1.
597 kintegrityd_wq
= alloc_workqueue("kintegrityd", WQ_MEM_RECLAIM
|
598 WQ_HIGHPRI
| WQ_CPU_INTENSIVE
, 1);
600 panic("Failed to create kintegrityd\n");
602 bip_slab
= kmem_cache_create("bio_integrity_payload",
603 sizeof(struct bio_integrity_payload
) +
604 sizeof(struct bio_vec
) * BIO_INLINE_VECS
,
605 0, SLAB_HWCACHE_ALIGN
|SLAB_PANIC
, NULL
);