1 // SPDX-License-Identifier: GPL-2.0-only
3 #include <linux/spinlock.h>
4 #include <linux/slab.h>
5 #include <linux/blkdev.h>
6 #include <linux/hdreg.h>
7 #include <linux/module.h>
8 #include <linux/mutex.h>
9 #include <linux/interrupt.h>
10 #include <linux/virtio.h>
11 #include <linux/virtio_blk.h>
12 #include <linux/scatterlist.h>
13 #include <linux/string_helpers.h>
14 #include <linux/idr.h>
15 #include <linux/blk-mq.h>
16 #include <linux/blk-mq-virtio.h>
17 #include <linux/numa.h>
18 #include <linux/vmalloc.h>
19 #include <uapi/linux/virtio_ring.h>
22 #define VQ_NAME_LEN 16
23 #define MAX_DISCARD_SEGMENTS 256u
25 /* The maximum number of sg elements that fit into a virtqueue */
26 #define VIRTIO_BLK_MAX_SG_ELEMS 32768
28 #ifdef CONFIG_ARCH_NO_SG_CHAIN
29 #define VIRTIO_BLK_INLINE_SG_CNT 0
31 #define VIRTIO_BLK_INLINE_SG_CNT 2
34 static unsigned int num_request_queues
;
35 module_param(num_request_queues
, uint
, 0644);
36 MODULE_PARM_DESC(num_request_queues
,
37 "Limit the number of request queues to use for blk device. "
39 "Values > nr_cpu_ids truncated to nr_cpu_ids.");
41 static unsigned int poll_queues
;
42 module_param(poll_queues
, uint
, 0644);
43 MODULE_PARM_DESC(poll_queues
, "The number of dedicated virtqueues for polling I/O");
46 static DEFINE_IDA(vd_index_ida
);
48 static struct workqueue_struct
*virtblk_wq
;
50 struct virtio_blk_vq
{
53 char name
[VQ_NAME_LEN
];
54 } ____cacheline_aligned_in_smp
;
58 * This mutex must be held by anything that may run after
59 * virtblk_remove() sets vblk->vdev to NULL.
61 * blk-mq, virtqueue processing, and sysfs attribute code paths are
62 * shut down before vblk->vdev is set to NULL and therefore do not need
65 struct mutex vdev_mutex
;
66 struct virtio_device
*vdev
;
68 /* The disk structure for the kernel. */
71 /* Block layer tags. */
72 struct blk_mq_tag_set tag_set
;
74 /* Process context for config space updates */
75 struct work_struct config_work
;
77 /* Ida index - used to track minor number allocations. */
82 int io_queues
[HCTX_MAX_TYPES
];
83 struct virtio_blk_vq
*vqs
;
85 /* For zoned device */
86 unsigned int zone_sectors
;
91 struct virtio_blk_outhdr out_hdr
;
98 * The zone append command has an extended in header.
99 * The status field in zone_append_in_hdr must always
110 struct sg_table sg_table
;
111 struct scatterlist sg
[];
114 static inline blk_status_t
virtblk_result(u8 status
)
117 case VIRTIO_BLK_S_OK
:
119 case VIRTIO_BLK_S_UNSUPP
:
120 return BLK_STS_NOTSUPP
;
121 case VIRTIO_BLK_S_ZONE_OPEN_RESOURCE
:
122 return BLK_STS_ZONE_OPEN_RESOURCE
;
123 case VIRTIO_BLK_S_ZONE_ACTIVE_RESOURCE
:
124 return BLK_STS_ZONE_ACTIVE_RESOURCE
;
125 case VIRTIO_BLK_S_IOERR
:
126 case VIRTIO_BLK_S_ZONE_UNALIGNED_WP
:
128 return BLK_STS_IOERR
;
132 static inline struct virtio_blk_vq
*get_virtio_blk_vq(struct blk_mq_hw_ctx
*hctx
)
134 struct virtio_blk
*vblk
= hctx
->queue
->queuedata
;
135 struct virtio_blk_vq
*vq
= &vblk
->vqs
[hctx
->queue_num
];
140 static int virtblk_add_req(struct virtqueue
*vq
, struct virtblk_req
*vbr
)
142 struct scatterlist out_hdr
, in_hdr
, *sgs
[3];
143 unsigned int num_out
= 0, num_in
= 0;
145 sg_init_one(&out_hdr
, &vbr
->out_hdr
, sizeof(vbr
->out_hdr
));
146 sgs
[num_out
++] = &out_hdr
;
148 if (vbr
->sg_table
.nents
) {
149 if (vbr
->out_hdr
.type
& cpu_to_virtio32(vq
->vdev
, VIRTIO_BLK_T_OUT
))
150 sgs
[num_out
++] = vbr
->sg_table
.sgl
;
152 sgs
[num_out
+ num_in
++] = vbr
->sg_table
.sgl
;
155 sg_init_one(&in_hdr
, &vbr
->in_hdr
.status
, vbr
->in_hdr_len
);
156 sgs
[num_out
+ num_in
++] = &in_hdr
;
158 return virtqueue_add_sgs(vq
, sgs
, num_out
, num_in
, vbr
, GFP_ATOMIC
);
161 static int virtblk_setup_discard_write_zeroes_erase(struct request
*req
, bool unmap
)
163 unsigned short segments
= blk_rq_nr_discard_segments(req
);
164 unsigned short n
= 0;
165 struct virtio_blk_discard_write_zeroes
*range
;
170 flags
|= VIRTIO_BLK_WRITE_ZEROES_FLAG_UNMAP
;
172 range
= kmalloc_array(segments
, sizeof(*range
), GFP_ATOMIC
);
177 * Single max discard segment means multi-range discard isn't
178 * supported, and block layer only runs contiguity merge like
179 * normal RW request. So we can't reply on bio for retrieving
182 if (queue_max_discard_segments(req
->q
) == 1) {
183 range
[0].flags
= cpu_to_le32(flags
);
184 range
[0].num_sectors
= cpu_to_le32(blk_rq_sectors(req
));
185 range
[0].sector
= cpu_to_le64(blk_rq_pos(req
));
188 __rq_for_each_bio(bio
, req
) {
189 u64 sector
= bio
->bi_iter
.bi_sector
;
190 u32 num_sectors
= bio
->bi_iter
.bi_size
>> SECTOR_SHIFT
;
192 range
[n
].flags
= cpu_to_le32(flags
);
193 range
[n
].num_sectors
= cpu_to_le32(num_sectors
);
194 range
[n
].sector
= cpu_to_le64(sector
);
199 WARN_ON_ONCE(n
!= segments
);
201 bvec_set_virt(&req
->special_vec
, range
, sizeof(*range
) * segments
);
202 req
->rq_flags
|= RQF_SPECIAL_PAYLOAD
;
207 static void virtblk_unmap_data(struct request
*req
, struct virtblk_req
*vbr
)
209 if (blk_rq_nr_phys_segments(req
))
210 sg_free_table_chained(&vbr
->sg_table
,
211 VIRTIO_BLK_INLINE_SG_CNT
);
214 static int virtblk_map_data(struct blk_mq_hw_ctx
*hctx
, struct request
*req
,
215 struct virtblk_req
*vbr
)
219 if (!blk_rq_nr_phys_segments(req
))
222 vbr
->sg_table
.sgl
= vbr
->sg
;
223 err
= sg_alloc_table_chained(&vbr
->sg_table
,
224 blk_rq_nr_phys_segments(req
),
226 VIRTIO_BLK_INLINE_SG_CNT
);
230 return blk_rq_map_sg(hctx
->queue
, req
, vbr
->sg_table
.sgl
);
233 static void virtblk_cleanup_cmd(struct request
*req
)
235 if (req
->rq_flags
& RQF_SPECIAL_PAYLOAD
)
236 kfree(bvec_virt(&req
->special_vec
));
239 static blk_status_t
virtblk_setup_cmd(struct virtio_device
*vdev
,
241 struct virtblk_req
*vbr
)
243 size_t in_hdr_len
= sizeof(vbr
->in_hdr
.status
);
248 if (!IS_ENABLED(CONFIG_BLK_DEV_ZONED
) && op_is_zone_mgmt(req_op(req
)))
249 return BLK_STS_NOTSUPP
;
251 /* Set fields for all request types */
252 vbr
->out_hdr
.ioprio
= cpu_to_virtio32(vdev
, req_get_ioprio(req
));
254 switch (req_op(req
)) {
256 type
= VIRTIO_BLK_T_IN
;
257 sector
= blk_rq_pos(req
);
260 type
= VIRTIO_BLK_T_OUT
;
261 sector
= blk_rq_pos(req
);
264 type
= VIRTIO_BLK_T_FLUSH
;
267 type
= VIRTIO_BLK_T_DISCARD
;
269 case REQ_OP_WRITE_ZEROES
:
270 type
= VIRTIO_BLK_T_WRITE_ZEROES
;
271 unmap
= !(req
->cmd_flags
& REQ_NOUNMAP
);
273 case REQ_OP_SECURE_ERASE
:
274 type
= VIRTIO_BLK_T_SECURE_ERASE
;
276 case REQ_OP_ZONE_OPEN
:
277 type
= VIRTIO_BLK_T_ZONE_OPEN
;
278 sector
= blk_rq_pos(req
);
280 case REQ_OP_ZONE_CLOSE
:
281 type
= VIRTIO_BLK_T_ZONE_CLOSE
;
282 sector
= blk_rq_pos(req
);
284 case REQ_OP_ZONE_FINISH
:
285 type
= VIRTIO_BLK_T_ZONE_FINISH
;
286 sector
= blk_rq_pos(req
);
288 case REQ_OP_ZONE_APPEND
:
289 type
= VIRTIO_BLK_T_ZONE_APPEND
;
290 sector
= blk_rq_pos(req
);
291 in_hdr_len
= sizeof(vbr
->in_hdr
.zone_append
);
293 case REQ_OP_ZONE_RESET
:
294 type
= VIRTIO_BLK_T_ZONE_RESET
;
295 sector
= blk_rq_pos(req
);
297 case REQ_OP_ZONE_RESET_ALL
:
298 type
= VIRTIO_BLK_T_ZONE_RESET_ALL
;
302 * Out header has already been prepared by the caller (virtblk_get_id()
303 * or virtblk_submit_zone_report()), nothing to do here.
308 return BLK_STS_IOERR
;
311 /* Set fields for non-REQ_OP_DRV_IN request types */
312 vbr
->in_hdr_len
= in_hdr_len
;
313 vbr
->out_hdr
.type
= cpu_to_virtio32(vdev
, type
);
314 vbr
->out_hdr
.sector
= cpu_to_virtio64(vdev
, sector
);
316 if (type
== VIRTIO_BLK_T_DISCARD
|| type
== VIRTIO_BLK_T_WRITE_ZEROES
||
317 type
== VIRTIO_BLK_T_SECURE_ERASE
) {
318 if (virtblk_setup_discard_write_zeroes_erase(req
, unmap
))
319 return BLK_STS_RESOURCE
;
326 * The status byte is always the last byte of the virtblk request
327 * in-header. This helper fetches its value for all in-header formats
328 * that are currently defined.
330 static inline u8
virtblk_vbr_status(struct virtblk_req
*vbr
)
332 return *((u8
*)&vbr
->in_hdr
+ vbr
->in_hdr_len
- 1);
335 static inline void virtblk_request_done(struct request
*req
)
337 struct virtblk_req
*vbr
= blk_mq_rq_to_pdu(req
);
338 blk_status_t status
= virtblk_result(virtblk_vbr_status(vbr
));
339 struct virtio_blk
*vblk
= req
->mq_hctx
->queue
->queuedata
;
341 virtblk_unmap_data(req
, vbr
);
342 virtblk_cleanup_cmd(req
);
344 if (req_op(req
) == REQ_OP_ZONE_APPEND
)
345 req
->__sector
= virtio64_to_cpu(vblk
->vdev
,
346 vbr
->in_hdr
.zone_append
.sector
);
348 blk_mq_end_request(req
, status
);
351 static void virtblk_done(struct virtqueue
*vq
)
353 struct virtio_blk
*vblk
= vq
->vdev
->priv
;
354 bool req_done
= false;
356 struct virtblk_req
*vbr
;
360 spin_lock_irqsave(&vblk
->vqs
[qid
].lock
, flags
);
362 virtqueue_disable_cb(vq
);
363 while ((vbr
= virtqueue_get_buf(vblk
->vqs
[qid
].vq
, &len
)) != NULL
) {
364 struct request
*req
= blk_mq_rq_from_pdu(vbr
);
366 if (likely(!blk_should_fake_timeout(req
->q
)))
367 blk_mq_complete_request(req
);
370 } while (!virtqueue_enable_cb(vq
));
372 /* In case queue is stopped waiting for more buffers. */
374 blk_mq_start_stopped_hw_queues(vblk
->disk
->queue
, true);
375 spin_unlock_irqrestore(&vblk
->vqs
[qid
].lock
, flags
);
378 static void virtio_commit_rqs(struct blk_mq_hw_ctx
*hctx
)
380 struct virtio_blk
*vblk
= hctx
->queue
->queuedata
;
381 struct virtio_blk_vq
*vq
= &vblk
->vqs
[hctx
->queue_num
];
384 spin_lock_irq(&vq
->lock
);
385 kick
= virtqueue_kick_prepare(vq
->vq
);
386 spin_unlock_irq(&vq
->lock
);
389 virtqueue_notify(vq
->vq
);
392 static blk_status_t
virtblk_fail_to_queue(struct request
*req
, int rc
)
394 virtblk_cleanup_cmd(req
);
397 return BLK_STS_DEV_RESOURCE
;
399 return BLK_STS_RESOURCE
;
401 return BLK_STS_IOERR
;
405 static blk_status_t
virtblk_prep_rq(struct blk_mq_hw_ctx
*hctx
,
406 struct virtio_blk
*vblk
,
408 struct virtblk_req
*vbr
)
413 status
= virtblk_setup_cmd(vblk
->vdev
, req
, vbr
);
414 if (unlikely(status
))
417 num
= virtblk_map_data(hctx
, req
, vbr
);
418 if (unlikely(num
< 0))
419 return virtblk_fail_to_queue(req
, -ENOMEM
);
420 vbr
->sg_table
.nents
= num
;
422 blk_mq_start_request(req
);
427 static blk_status_t
virtio_queue_rq(struct blk_mq_hw_ctx
*hctx
,
428 const struct blk_mq_queue_data
*bd
)
430 struct virtio_blk
*vblk
= hctx
->queue
->queuedata
;
431 struct request
*req
= bd
->rq
;
432 struct virtblk_req
*vbr
= blk_mq_rq_to_pdu(req
);
434 int qid
= hctx
->queue_num
;
439 status
= virtblk_prep_rq(hctx
, vblk
, req
, vbr
);
440 if (unlikely(status
))
443 spin_lock_irqsave(&vblk
->vqs
[qid
].lock
, flags
);
444 err
= virtblk_add_req(vblk
->vqs
[qid
].vq
, vbr
);
446 virtqueue_kick(vblk
->vqs
[qid
].vq
);
447 /* Don't stop the queue if -ENOMEM: we may have failed to
448 * bounce the buffer due to global resource outage.
451 blk_mq_stop_hw_queue(hctx
);
452 spin_unlock_irqrestore(&vblk
->vqs
[qid
].lock
, flags
);
453 virtblk_unmap_data(req
, vbr
);
454 return virtblk_fail_to_queue(req
, err
);
457 if (bd
->last
&& virtqueue_kick_prepare(vblk
->vqs
[qid
].vq
))
459 spin_unlock_irqrestore(&vblk
->vqs
[qid
].lock
, flags
);
462 virtqueue_notify(vblk
->vqs
[qid
].vq
);
466 static bool virtblk_prep_rq_batch(struct request
*req
)
468 struct virtio_blk
*vblk
= req
->mq_hctx
->queue
->queuedata
;
469 struct virtblk_req
*vbr
= blk_mq_rq_to_pdu(req
);
471 return virtblk_prep_rq(req
->mq_hctx
, vblk
, req
, vbr
) == BLK_STS_OK
;
474 static bool virtblk_add_req_batch(struct virtio_blk_vq
*vq
,
475 struct request
**rqlist
)
481 spin_lock_irqsave(&vq
->lock
, flags
);
483 while (!rq_list_empty(*rqlist
)) {
484 struct request
*req
= rq_list_pop(rqlist
);
485 struct virtblk_req
*vbr
= blk_mq_rq_to_pdu(req
);
487 err
= virtblk_add_req(vq
->vq
, vbr
);
489 virtblk_unmap_data(req
, vbr
);
490 virtblk_cleanup_cmd(req
);
491 blk_mq_requeue_request(req
, true);
495 kick
= virtqueue_kick_prepare(vq
->vq
);
496 spin_unlock_irqrestore(&vq
->lock
, flags
);
501 static void virtio_queue_rqs(struct request
**rqlist
)
503 struct request
*req
, *next
, *prev
= NULL
;
504 struct request
*requeue_list
= NULL
;
506 rq_list_for_each_safe(rqlist
, req
, next
) {
507 struct virtio_blk_vq
*vq
= get_virtio_blk_vq(req
->mq_hctx
);
510 if (!virtblk_prep_rq_batch(req
)) {
511 rq_list_move(rqlist
, &requeue_list
, req
, prev
);
517 if (!next
|| req
->mq_hctx
!= next
->mq_hctx
) {
519 kick
= virtblk_add_req_batch(vq
, rqlist
);
521 virtqueue_notify(vq
->vq
);
529 *rqlist
= requeue_list
;
532 #ifdef CONFIG_BLK_DEV_ZONED
533 static void *virtblk_alloc_report_buffer(struct virtio_blk
*vblk
,
534 unsigned int nr_zones
,
537 struct request_queue
*q
= vblk
->disk
->queue
;
541 nr_zones
= min_t(unsigned int, nr_zones
,
542 get_capacity(vblk
->disk
) >> ilog2(vblk
->zone_sectors
));
544 bufsize
= sizeof(struct virtio_blk_zone_report
) +
545 nr_zones
* sizeof(struct virtio_blk_zone_descriptor
);
546 bufsize
= min_t(size_t, bufsize
,
547 queue_max_hw_sectors(q
) << SECTOR_SHIFT
);
548 bufsize
= min_t(size_t, bufsize
, queue_max_segments(q
) << PAGE_SHIFT
);
550 while (bufsize
>= sizeof(struct virtio_blk_zone_report
)) {
551 buf
= __vmalloc(bufsize
, GFP_KERNEL
| __GFP_NORETRY
);
562 static int virtblk_submit_zone_report(struct virtio_blk
*vblk
,
563 char *report_buf
, size_t report_len
,
566 struct request_queue
*q
= vblk
->disk
->queue
;
568 struct virtblk_req
*vbr
;
571 req
= blk_mq_alloc_request(q
, REQ_OP_DRV_IN
, 0);
575 vbr
= blk_mq_rq_to_pdu(req
);
576 vbr
->in_hdr_len
= sizeof(vbr
->in_hdr
.status
);
577 vbr
->out_hdr
.type
= cpu_to_virtio32(vblk
->vdev
, VIRTIO_BLK_T_ZONE_REPORT
);
578 vbr
->out_hdr
.sector
= cpu_to_virtio64(vblk
->vdev
, sector
);
580 err
= blk_rq_map_kern(q
, req
, report_buf
, report_len
, GFP_KERNEL
);
584 blk_execute_rq(req
, false);
585 err
= blk_status_to_errno(virtblk_result(vbr
->in_hdr
.status
));
587 blk_mq_free_request(req
);
591 static int virtblk_parse_zone(struct virtio_blk
*vblk
,
592 struct virtio_blk_zone_descriptor
*entry
,
593 unsigned int idx
, report_zones_cb cb
, void *data
)
595 struct blk_zone zone
= { };
597 zone
.start
= virtio64_to_cpu(vblk
->vdev
, entry
->z_start
);
598 if (zone
.start
+ vblk
->zone_sectors
<= get_capacity(vblk
->disk
))
599 zone
.len
= vblk
->zone_sectors
;
601 zone
.len
= get_capacity(vblk
->disk
) - zone
.start
;
602 zone
.capacity
= virtio64_to_cpu(vblk
->vdev
, entry
->z_cap
);
603 zone
.wp
= virtio64_to_cpu(vblk
->vdev
, entry
->z_wp
);
605 switch (entry
->z_type
) {
606 case VIRTIO_BLK_ZT_SWR
:
607 zone
.type
= BLK_ZONE_TYPE_SEQWRITE_REQ
;
609 case VIRTIO_BLK_ZT_SWP
:
610 zone
.type
= BLK_ZONE_TYPE_SEQWRITE_PREF
;
612 case VIRTIO_BLK_ZT_CONV
:
613 zone
.type
= BLK_ZONE_TYPE_CONVENTIONAL
;
616 dev_err(&vblk
->vdev
->dev
, "zone %llu: invalid type %#x\n",
617 zone
.start
, entry
->z_type
);
621 switch (entry
->z_state
) {
622 case VIRTIO_BLK_ZS_EMPTY
:
623 zone
.cond
= BLK_ZONE_COND_EMPTY
;
625 case VIRTIO_BLK_ZS_CLOSED
:
626 zone
.cond
= BLK_ZONE_COND_CLOSED
;
628 case VIRTIO_BLK_ZS_FULL
:
629 zone
.cond
= BLK_ZONE_COND_FULL
;
630 zone
.wp
= zone
.start
+ zone
.len
;
632 case VIRTIO_BLK_ZS_EOPEN
:
633 zone
.cond
= BLK_ZONE_COND_EXP_OPEN
;
635 case VIRTIO_BLK_ZS_IOPEN
:
636 zone
.cond
= BLK_ZONE_COND_IMP_OPEN
;
638 case VIRTIO_BLK_ZS_NOT_WP
:
639 zone
.cond
= BLK_ZONE_COND_NOT_WP
;
641 case VIRTIO_BLK_ZS_RDONLY
:
642 zone
.cond
= BLK_ZONE_COND_READONLY
;
645 case VIRTIO_BLK_ZS_OFFLINE
:
646 zone
.cond
= BLK_ZONE_COND_OFFLINE
;
650 dev_err(&vblk
->vdev
->dev
, "zone %llu: invalid condition %#x\n",
651 zone
.start
, entry
->z_state
);
656 * The callback below checks the validity of the reported
657 * entry data, no need to further validate it here.
659 return cb(&zone
, idx
, data
);
662 static int virtblk_report_zones(struct gendisk
*disk
, sector_t sector
,
663 unsigned int nr_zones
, report_zones_cb cb
,
666 struct virtio_blk
*vblk
= disk
->private_data
;
667 struct virtio_blk_zone_report
*report
;
668 unsigned long long nz
, i
;
670 unsigned int zone_idx
= 0;
673 if (WARN_ON_ONCE(!vblk
->zone_sectors
))
676 report
= virtblk_alloc_report_buffer(vblk
, nr_zones
, &buflen
);
680 mutex_lock(&vblk
->vdev_mutex
);
687 while (zone_idx
< nr_zones
&& sector
< get_capacity(vblk
->disk
)) {
688 memset(report
, 0, buflen
);
690 ret
= virtblk_submit_zone_report(vblk
, (char *)report
,
695 nz
= min_t(u64
, virtio64_to_cpu(vblk
->vdev
, report
->nr_zones
),
700 for (i
= 0; i
< nz
&& zone_idx
< nr_zones
; i
++) {
701 ret
= virtblk_parse_zone(vblk
, &report
->zones
[i
],
706 sector
= virtio64_to_cpu(vblk
->vdev
,
707 report
->zones
[i
].z_start
) +
718 mutex_unlock(&vblk
->vdev_mutex
);
723 static int virtblk_read_zoned_limits(struct virtio_blk
*vblk
,
724 struct queue_limits
*lim
)
726 struct virtio_device
*vdev
= vblk
->vdev
;
729 dev_dbg(&vdev
->dev
, "probing host-managed zoned device\n");
731 lim
->features
|= BLK_FEAT_ZONED
;
733 virtio_cread(vdev
, struct virtio_blk_config
,
734 zoned
.max_open_zones
, &v
);
735 lim
->max_open_zones
= v
;
736 dev_dbg(&vdev
->dev
, "max open zones = %u\n", v
);
738 virtio_cread(vdev
, struct virtio_blk_config
,
739 zoned
.max_active_zones
, &v
);
740 lim
->max_active_zones
= v
;
741 dev_dbg(&vdev
->dev
, "max active zones = %u\n", v
);
743 virtio_cread(vdev
, struct virtio_blk_config
,
744 zoned
.write_granularity
, &wg
);
746 dev_warn(&vdev
->dev
, "zero write granularity reported\n");
749 lim
->physical_block_size
= wg
;
752 dev_dbg(&vdev
->dev
, "write granularity = %u\n", wg
);
755 * virtio ZBD specification doesn't require zones to be a power of
756 * two sectors in size, but the code in this driver expects that.
758 virtio_cread(vdev
, struct virtio_blk_config
, zoned
.zone_sectors
,
759 &vblk
->zone_sectors
);
760 if (vblk
->zone_sectors
== 0 || !is_power_of_2(vblk
->zone_sectors
)) {
762 "zoned device with non power of two zone size %u\n",
766 lim
->chunk_sectors
= vblk
->zone_sectors
;
767 dev_dbg(&vdev
->dev
, "zone sectors = %u\n", vblk
->zone_sectors
);
769 if (virtio_has_feature(vdev
, VIRTIO_BLK_F_DISCARD
)) {
770 dev_warn(&vblk
->vdev
->dev
,
771 "ignoring negotiated F_DISCARD for zoned device\n");
772 lim
->max_hw_discard_sectors
= 0;
775 virtio_cread(vdev
, struct virtio_blk_config
,
776 zoned
.max_append_sectors
, &v
);
778 dev_warn(&vdev
->dev
, "zero max_append_sectors reported\n");
781 if ((v
<< SECTOR_SHIFT
) < wg
) {
783 "write granularity %u exceeds max_append_sectors %u limit\n",
787 lim
->max_zone_append_sectors
= v
;
788 dev_dbg(&vdev
->dev
, "max append sectors = %u\n", v
);
794 * Zoned block device support is not configured in this kernel, host-managed
795 * zoned devices can't be supported.
797 #define virtblk_report_zones NULL
798 static inline int virtblk_read_zoned_limits(struct virtio_blk
*vblk
,
799 struct queue_limits
*lim
)
801 dev_err(&vblk
->vdev
->dev
,
802 "virtio_blk: zoned devices are not supported");
805 #endif /* CONFIG_BLK_DEV_ZONED */
807 /* return id (s/n) string for *disk to *id_str
809 static int virtblk_get_id(struct gendisk
*disk
, char *id_str
)
811 struct virtio_blk
*vblk
= disk
->private_data
;
812 struct request_queue
*q
= vblk
->disk
->queue
;
814 struct virtblk_req
*vbr
;
817 req
= blk_mq_alloc_request(q
, REQ_OP_DRV_IN
, 0);
821 vbr
= blk_mq_rq_to_pdu(req
);
822 vbr
->in_hdr_len
= sizeof(vbr
->in_hdr
.status
);
823 vbr
->out_hdr
.type
= cpu_to_virtio32(vblk
->vdev
, VIRTIO_BLK_T_GET_ID
);
824 vbr
->out_hdr
.sector
= 0;
826 err
= blk_rq_map_kern(q
, req
, id_str
, VIRTIO_BLK_ID_BYTES
, GFP_KERNEL
);
830 blk_execute_rq(req
, false);
831 err
= blk_status_to_errno(virtblk_result(vbr
->in_hdr
.status
));
833 blk_mq_free_request(req
);
837 /* We provide getgeo only to please some old bootloader/partitioning tools */
838 static int virtblk_getgeo(struct block_device
*bd
, struct hd_geometry
*geo
)
840 struct virtio_blk
*vblk
= bd
->bd_disk
->private_data
;
843 mutex_lock(&vblk
->vdev_mutex
);
850 /* see if the host passed in geometry config */
851 if (virtio_has_feature(vblk
->vdev
, VIRTIO_BLK_F_GEOMETRY
)) {
852 virtio_cread(vblk
->vdev
, struct virtio_blk_config
,
853 geometry
.cylinders
, &geo
->cylinders
);
854 virtio_cread(vblk
->vdev
, struct virtio_blk_config
,
855 geometry
.heads
, &geo
->heads
);
856 virtio_cread(vblk
->vdev
, struct virtio_blk_config
,
857 geometry
.sectors
, &geo
->sectors
);
859 /* some standard values, similar to sd */
861 geo
->sectors
= 1 << 5;
862 geo
->cylinders
= get_capacity(bd
->bd_disk
) >> 11;
865 mutex_unlock(&vblk
->vdev_mutex
);
869 static void virtblk_free_disk(struct gendisk
*disk
)
871 struct virtio_blk
*vblk
= disk
->private_data
;
873 ida_free(&vd_index_ida
, vblk
->index
);
874 mutex_destroy(&vblk
->vdev_mutex
);
878 static const struct block_device_operations virtblk_fops
= {
879 .owner
= THIS_MODULE
,
880 .getgeo
= virtblk_getgeo
,
881 .free_disk
= virtblk_free_disk
,
882 .report_zones
= virtblk_report_zones
,
885 static int index_to_minor(int index
)
887 return index
<< PART_BITS
;
890 static int minor_to_index(int minor
)
892 return minor
>> PART_BITS
;
895 static ssize_t
serial_show(struct device
*dev
,
896 struct device_attribute
*attr
, char *buf
)
898 struct gendisk
*disk
= dev_to_disk(dev
);
901 /* sysfs gives us a PAGE_SIZE buffer */
902 BUILD_BUG_ON(PAGE_SIZE
< VIRTIO_BLK_ID_BYTES
);
904 buf
[VIRTIO_BLK_ID_BYTES
] = '\0';
905 err
= virtblk_get_id(disk
, buf
);
909 if (err
== -EIO
) /* Unsupported? Make it empty. */
915 static DEVICE_ATTR_RO(serial
);
917 /* The queue's logical block size must be set before calling this */
918 static void virtblk_update_capacity(struct virtio_blk
*vblk
, bool resize
)
920 struct virtio_device
*vdev
= vblk
->vdev
;
921 struct request_queue
*q
= vblk
->disk
->queue
;
922 char cap_str_2
[10], cap_str_10
[10];
923 unsigned long long nblocks
;
926 /* Host must always specify the capacity. */
927 virtio_cread(vdev
, struct virtio_blk_config
, capacity
, &capacity
);
929 nblocks
= DIV_ROUND_UP_ULL(capacity
, queue_logical_block_size(q
) >> 9);
931 string_get_size(nblocks
, queue_logical_block_size(q
),
932 STRING_UNITS_2
, cap_str_2
, sizeof(cap_str_2
));
933 string_get_size(nblocks
, queue_logical_block_size(q
),
934 STRING_UNITS_10
, cap_str_10
, sizeof(cap_str_10
));
936 dev_notice(&vdev
->dev
,
937 "[%s] %s%llu %d-byte logical blocks (%s/%s)\n",
938 vblk
->disk
->disk_name
,
939 resize
? "new size: " : "",
941 queue_logical_block_size(q
),
945 set_capacity_and_notify(vblk
->disk
, capacity
);
948 static void virtblk_config_changed_work(struct work_struct
*work
)
950 struct virtio_blk
*vblk
=
951 container_of(work
, struct virtio_blk
, config_work
);
953 virtblk_update_capacity(vblk
, true);
956 static void virtblk_config_changed(struct virtio_device
*vdev
)
958 struct virtio_blk
*vblk
= vdev
->priv
;
960 queue_work(virtblk_wq
, &vblk
->config_work
);
963 static int init_vq(struct virtio_blk
*vblk
)
967 struct virtqueue_info
*vqs_info
;
968 struct virtqueue
**vqs
;
969 unsigned short num_vqs
;
970 unsigned short num_poll_vqs
;
971 struct virtio_device
*vdev
= vblk
->vdev
;
972 struct irq_affinity desc
= { 0, };
974 err
= virtio_cread_feature(vdev
, VIRTIO_BLK_F_MQ
,
975 struct virtio_blk_config
, num_queues
,
980 if (!err
&& !num_vqs
) {
981 dev_err(&vdev
->dev
, "MQ advertised but zero queues reported\n");
985 num_vqs
= min_t(unsigned int,
986 min_not_zero(num_request_queues
, nr_cpu_ids
),
989 num_poll_vqs
= min_t(unsigned int, poll_queues
, num_vqs
- 1);
991 vblk
->io_queues
[HCTX_TYPE_DEFAULT
] = num_vqs
- num_poll_vqs
;
992 vblk
->io_queues
[HCTX_TYPE_READ
] = 0;
993 vblk
->io_queues
[HCTX_TYPE_POLL
] = num_poll_vqs
;
995 dev_info(&vdev
->dev
, "%d/%d/%d default/read/poll queues\n",
996 vblk
->io_queues
[HCTX_TYPE_DEFAULT
],
997 vblk
->io_queues
[HCTX_TYPE_READ
],
998 vblk
->io_queues
[HCTX_TYPE_POLL
]);
1000 vblk
->vqs
= kmalloc_array(num_vqs
, sizeof(*vblk
->vqs
), GFP_KERNEL
);
1004 vqs_info
= kcalloc(num_vqs
, sizeof(*vqs_info
), GFP_KERNEL
);
1005 vqs
= kmalloc_array(num_vqs
, sizeof(*vqs
), GFP_KERNEL
);
1006 if (!vqs_info
|| !vqs
) {
1011 for (i
= 0; i
< num_vqs
- num_poll_vqs
; i
++) {
1012 vqs_info
[i
].callback
= virtblk_done
;
1013 snprintf(vblk
->vqs
[i
].name
, VQ_NAME_LEN
, "req.%u", i
);
1014 vqs_info
[i
].name
= vblk
->vqs
[i
].name
;
1017 for (; i
< num_vqs
; i
++) {
1018 snprintf(vblk
->vqs
[i
].name
, VQ_NAME_LEN
, "req_poll.%u", i
);
1019 vqs_info
[i
].name
= vblk
->vqs
[i
].name
;
1022 /* Discover virtqueues and write information to configuration. */
1023 err
= virtio_find_vqs(vdev
, num_vqs
, vqs
, vqs_info
, &desc
);
1027 for (i
= 0; i
< num_vqs
; i
++) {
1028 spin_lock_init(&vblk
->vqs
[i
].lock
);
1029 vblk
->vqs
[i
].vq
= vqs
[i
];
1031 vblk
->num_vqs
= num_vqs
;
1042 * Legacy naming scheme used for virtio devices. We are stuck with it for
1043 * virtio blk but don't ever use it for any new driver.
1045 static int virtblk_name_format(char *prefix
, int index
, char *buf
, int buflen
)
1047 const int base
= 'z' - 'a' + 1;
1048 char *begin
= buf
+ strlen(prefix
);
1049 char *end
= buf
+ buflen
;
1059 *--p
= 'a' + (index
% unit
);
1060 index
= (index
/ unit
) - 1;
1061 } while (index
>= 0);
1063 memmove(begin
, p
, end
- p
);
1064 memcpy(buf
, prefix
, strlen(prefix
));
1069 static int virtblk_get_cache_mode(struct virtio_device
*vdev
)
1074 err
= virtio_cread_feature(vdev
, VIRTIO_BLK_F_CONFIG_WCE
,
1075 struct virtio_blk_config
, wce
,
1079 * If WCE is not configurable and flush is not available,
1080 * assume no writeback cache is in use.
1083 writeback
= virtio_has_feature(vdev
, VIRTIO_BLK_F_FLUSH
);
1088 static const char *const virtblk_cache_types
[] = {
1089 "write through", "write back"
1093 cache_type_store(struct device
*dev
, struct device_attribute
*attr
,
1094 const char *buf
, size_t count
)
1096 struct gendisk
*disk
= dev_to_disk(dev
);
1097 struct virtio_blk
*vblk
= disk
->private_data
;
1098 struct virtio_device
*vdev
= vblk
->vdev
;
1099 struct queue_limits lim
;
1102 BUG_ON(!virtio_has_feature(vblk
->vdev
, VIRTIO_BLK_F_CONFIG_WCE
));
1103 i
= sysfs_match_string(virtblk_cache_types
, buf
);
1107 virtio_cwrite8(vdev
, offsetof(struct virtio_blk_config
, wce
), i
);
1109 lim
= queue_limits_start_update(disk
->queue
);
1110 if (virtblk_get_cache_mode(vdev
))
1111 lim
.features
|= BLK_FEAT_WRITE_CACHE
;
1113 lim
.features
&= ~BLK_FEAT_WRITE_CACHE
;
1114 blk_mq_freeze_queue(disk
->queue
);
1115 i
= queue_limits_commit_update(disk
->queue
, &lim
);
1116 blk_mq_unfreeze_queue(disk
->queue
);
1123 cache_type_show(struct device
*dev
, struct device_attribute
*attr
, char *buf
)
1125 struct gendisk
*disk
= dev_to_disk(dev
);
1126 struct virtio_blk
*vblk
= disk
->private_data
;
1127 u8 writeback
= virtblk_get_cache_mode(vblk
->vdev
);
1129 BUG_ON(writeback
>= ARRAY_SIZE(virtblk_cache_types
));
1130 return sysfs_emit(buf
, "%s\n", virtblk_cache_types
[writeback
]);
1133 static DEVICE_ATTR_RW(cache_type
);
1135 static struct attribute
*virtblk_attrs
[] = {
1136 &dev_attr_serial
.attr
,
1137 &dev_attr_cache_type
.attr
,
1141 static umode_t
virtblk_attrs_are_visible(struct kobject
*kobj
,
1142 struct attribute
*a
, int n
)
1144 struct device
*dev
= kobj_to_dev(kobj
);
1145 struct gendisk
*disk
= dev_to_disk(dev
);
1146 struct virtio_blk
*vblk
= disk
->private_data
;
1147 struct virtio_device
*vdev
= vblk
->vdev
;
1149 if (a
== &dev_attr_cache_type
.attr
&&
1150 !virtio_has_feature(vdev
, VIRTIO_BLK_F_CONFIG_WCE
))
1156 static const struct attribute_group virtblk_attr_group
= {
1157 .attrs
= virtblk_attrs
,
1158 .is_visible
= virtblk_attrs_are_visible
,
1161 static const struct attribute_group
*virtblk_attr_groups
[] = {
1162 &virtblk_attr_group
,
1166 static void virtblk_map_queues(struct blk_mq_tag_set
*set
)
1168 struct virtio_blk
*vblk
= set
->driver_data
;
1171 for (i
= 0, qoff
= 0; i
< set
->nr_maps
; i
++) {
1172 struct blk_mq_queue_map
*map
= &set
->map
[i
];
1174 map
->nr_queues
= vblk
->io_queues
[i
];
1175 map
->queue_offset
= qoff
;
1176 qoff
+= map
->nr_queues
;
1178 if (map
->nr_queues
== 0)
1182 * Regular queues have interrupts and hence CPU affinity is
1183 * defined by the core virtio code, but polling queues have
1184 * no interrupts so we let the block layer assign CPU affinity.
1186 if (i
== HCTX_TYPE_POLL
)
1187 blk_mq_map_queues(&set
->map
[i
]);
1189 blk_mq_virtio_map_queues(&set
->map
[i
], vblk
->vdev
, 0);
1193 static void virtblk_complete_batch(struct io_comp_batch
*iob
)
1195 struct request
*req
;
1197 rq_list_for_each(&iob
->req_list
, req
) {
1198 virtblk_unmap_data(req
, blk_mq_rq_to_pdu(req
));
1199 virtblk_cleanup_cmd(req
);
1201 blk_mq_end_request_batch(iob
);
1204 static int virtblk_poll(struct blk_mq_hw_ctx
*hctx
, struct io_comp_batch
*iob
)
1206 struct virtio_blk
*vblk
= hctx
->queue
->queuedata
;
1207 struct virtio_blk_vq
*vq
= get_virtio_blk_vq(hctx
);
1208 struct virtblk_req
*vbr
;
1209 unsigned long flags
;
1213 spin_lock_irqsave(&vq
->lock
, flags
);
1215 while ((vbr
= virtqueue_get_buf(vq
->vq
, &len
)) != NULL
) {
1216 struct request
*req
= blk_mq_rq_from_pdu(vbr
);
1219 if (!blk_mq_complete_request_remote(req
) &&
1220 !blk_mq_add_to_batch(req
, iob
, virtblk_vbr_status(vbr
),
1221 virtblk_complete_batch
))
1222 virtblk_request_done(req
);
1226 blk_mq_start_stopped_hw_queues(vblk
->disk
->queue
, true);
1228 spin_unlock_irqrestore(&vq
->lock
, flags
);
1233 static const struct blk_mq_ops virtio_mq_ops
= {
1234 .queue_rq
= virtio_queue_rq
,
1235 .queue_rqs
= virtio_queue_rqs
,
1236 .commit_rqs
= virtio_commit_rqs
,
1237 .complete
= virtblk_request_done
,
1238 .map_queues
= virtblk_map_queues
,
1239 .poll
= virtblk_poll
,
1242 static unsigned int virtblk_queue_depth
;
1243 module_param_named(queue_depth
, virtblk_queue_depth
, uint
, 0444);
1245 static int virtblk_read_limits(struct virtio_blk
*vblk
,
1246 struct queue_limits
*lim
)
1248 struct virtio_device
*vdev
= vblk
->vdev
;
1249 u32 v
, max_size
, sg_elems
, opt_io_size
;
1250 u32 max_discard_segs
= 0;
1251 u32 discard_granularity
= 0;
1253 u8 physical_block_exp
, alignment_offset
;
1254 size_t max_dma_size
;
1257 /* We need to know how many segments before we allocate. */
1258 err
= virtio_cread_feature(vdev
, VIRTIO_BLK_F_SEG_MAX
,
1259 struct virtio_blk_config
, seg_max
,
1262 /* We need at least one SG element, whatever they say. */
1263 if (err
|| !sg_elems
)
1266 /* Prevent integer overflows and honor max vq size */
1267 sg_elems
= min_t(u32
, sg_elems
, VIRTIO_BLK_MAX_SG_ELEMS
- 2);
1269 /* We can handle whatever the host told us to handle. */
1270 lim
->max_segments
= sg_elems
;
1272 /* No real sector limit. */
1273 lim
->max_hw_sectors
= UINT_MAX
;
1275 max_dma_size
= virtio_max_dma_size(vdev
);
1276 max_size
= max_dma_size
> U32_MAX
? U32_MAX
: max_dma_size
;
1278 /* Host can optionally specify maximum segment size and number of
1280 err
= virtio_cread_feature(vdev
, VIRTIO_BLK_F_SIZE_MAX
,
1281 struct virtio_blk_config
, size_max
, &v
);
1283 max_size
= min(max_size
, v
);
1285 lim
->max_segment_size
= max_size
;
1287 /* Host can optionally specify the block size of the device */
1288 virtio_cread_feature(vdev
, VIRTIO_BLK_F_BLK_SIZE
,
1289 struct virtio_blk_config
, blk_size
,
1290 &lim
->logical_block_size
);
1292 /* Use topology information if available */
1293 err
= virtio_cread_feature(vdev
, VIRTIO_BLK_F_TOPOLOGY
,
1294 struct virtio_blk_config
, physical_block_exp
,
1295 &physical_block_exp
);
1296 if (!err
&& physical_block_exp
)
1297 lim
->physical_block_size
=
1298 lim
->logical_block_size
* (1 << physical_block_exp
);
1300 err
= virtio_cread_feature(vdev
, VIRTIO_BLK_F_TOPOLOGY
,
1301 struct virtio_blk_config
, alignment_offset
,
1303 if (!err
&& alignment_offset
)
1304 lim
->alignment_offset
=
1305 lim
->logical_block_size
* alignment_offset
;
1307 err
= virtio_cread_feature(vdev
, VIRTIO_BLK_F_TOPOLOGY
,
1308 struct virtio_blk_config
, min_io_size
,
1310 if (!err
&& min_io_size
)
1311 lim
->io_min
= lim
->logical_block_size
* min_io_size
;
1313 err
= virtio_cread_feature(vdev
, VIRTIO_BLK_F_TOPOLOGY
,
1314 struct virtio_blk_config
, opt_io_size
,
1316 if (!err
&& opt_io_size
)
1317 lim
->io_opt
= lim
->logical_block_size
* opt_io_size
;
1319 if (virtio_has_feature(vdev
, VIRTIO_BLK_F_DISCARD
)) {
1320 virtio_cread(vdev
, struct virtio_blk_config
,
1321 discard_sector_alignment
, &discard_granularity
);
1323 virtio_cread(vdev
, struct virtio_blk_config
,
1324 max_discard_sectors
, &v
);
1325 lim
->max_hw_discard_sectors
= v
? v
: UINT_MAX
;
1327 virtio_cread(vdev
, struct virtio_blk_config
, max_discard_seg
,
1331 if (virtio_has_feature(vdev
, VIRTIO_BLK_F_WRITE_ZEROES
)) {
1332 virtio_cread(vdev
, struct virtio_blk_config
,
1333 max_write_zeroes_sectors
, &v
);
1334 lim
->max_write_zeroes_sectors
= v
? v
: UINT_MAX
;
1337 /* The discard and secure erase limits are combined since the Linux
1338 * block layer uses the same limit for both commands.
1340 * If both VIRTIO_BLK_F_SECURE_ERASE and VIRTIO_BLK_F_DISCARD features
1341 * are negotiated, we will use the minimum between the limits.
1343 * discard sector alignment is set to the minimum between discard_sector_alignment
1344 * and secure_erase_sector_alignment.
1346 * max discard sectors is set to the minimum between max_discard_seg and
1347 * max_secure_erase_seg.
1349 if (virtio_has_feature(vdev
, VIRTIO_BLK_F_SECURE_ERASE
)) {
1351 virtio_cread(vdev
, struct virtio_blk_config
,
1352 secure_erase_sector_alignment
, &v
);
1354 /* secure_erase_sector_alignment should not be zero, the device should set a
1355 * valid number of sectors.
1359 "virtio_blk: secure_erase_sector_alignment can't be 0\n");
1363 discard_granularity
= min_not_zero(discard_granularity
, v
);
1365 virtio_cread(vdev
, struct virtio_blk_config
,
1366 max_secure_erase_sectors
, &v
);
1368 /* max_secure_erase_sectors should not be zero, the device should set a
1369 * valid number of sectors.
1373 "virtio_blk: max_secure_erase_sectors can't be 0\n");
1377 lim
->max_secure_erase_sectors
= v
;
1379 virtio_cread(vdev
, struct virtio_blk_config
,
1380 max_secure_erase_seg
, &v
);
1382 /* max_secure_erase_seg should not be zero, the device should set a
1383 * valid number of segments
1387 "virtio_blk: max_secure_erase_seg can't be 0\n");
1391 max_discard_segs
= min_not_zero(max_discard_segs
, v
);
1394 if (virtio_has_feature(vdev
, VIRTIO_BLK_F_DISCARD
) ||
1395 virtio_has_feature(vdev
, VIRTIO_BLK_F_SECURE_ERASE
)) {
1396 /* max_discard_seg and discard_granularity will be 0 only
1397 * if max_discard_seg and discard_sector_alignment fields in the virtio
1398 * config are 0 and VIRTIO_BLK_F_SECURE_ERASE feature is not negotiated.
1399 * In this case, we use default values.
1401 if (!max_discard_segs
)
1402 max_discard_segs
= sg_elems
;
1404 lim
->max_discard_segments
=
1405 min(max_discard_segs
, MAX_DISCARD_SEGMENTS
);
1407 if (discard_granularity
)
1408 lim
->discard_granularity
=
1409 discard_granularity
<< SECTOR_SHIFT
;
1411 lim
->discard_granularity
= lim
->logical_block_size
;
1414 if (virtio_has_feature(vdev
, VIRTIO_BLK_F_ZONED
)) {
1417 virtio_cread(vdev
, struct virtio_blk_config
, zoned
.model
, &model
);
1419 case VIRTIO_BLK_Z_NONE
:
1420 case VIRTIO_BLK_Z_HA
:
1421 /* treat host-aware devices as non-zoned */
1423 case VIRTIO_BLK_Z_HM
:
1424 err
= virtblk_read_zoned_limits(vblk
, lim
);
1429 dev_err(&vdev
->dev
, "unsupported zone model %d\n", model
);
1437 static int virtblk_probe(struct virtio_device
*vdev
)
1439 struct virtio_blk
*vblk
;
1440 struct queue_limits lim
= {
1441 .features
= BLK_FEAT_ROTATIONAL
,
1442 .logical_block_size
= SECTOR_SIZE
,
1445 unsigned int queue_depth
;
1447 if (!vdev
->config
->get
) {
1448 dev_err(&vdev
->dev
, "%s failure: config access disabled\n",
1453 err
= ida_alloc_range(&vd_index_ida
, 0,
1454 minor_to_index(1 << MINORBITS
) - 1, GFP_KERNEL
);
1459 vdev
->priv
= vblk
= kmalloc(sizeof(*vblk
), GFP_KERNEL
);
1462 goto out_free_index
;
1465 mutex_init(&vblk
->vdev_mutex
);
1469 INIT_WORK(&vblk
->config_work
, virtblk_config_changed_work
);
1471 err
= init_vq(vblk
);
1475 /* Default queue sizing is to fill the ring. */
1476 if (!virtblk_queue_depth
) {
1477 queue_depth
= vblk
->vqs
[0].vq
->num_free
;
1478 /* ... but without indirect descs, we use 2 descs per req */
1479 if (!virtio_has_feature(vdev
, VIRTIO_RING_F_INDIRECT_DESC
))
1482 queue_depth
= virtblk_queue_depth
;
1485 memset(&vblk
->tag_set
, 0, sizeof(vblk
->tag_set
));
1486 vblk
->tag_set
.ops
= &virtio_mq_ops
;
1487 vblk
->tag_set
.queue_depth
= queue_depth
;
1488 vblk
->tag_set
.numa_node
= NUMA_NO_NODE
;
1489 vblk
->tag_set
.flags
= BLK_MQ_F_SHOULD_MERGE
;
1490 vblk
->tag_set
.cmd_size
=
1491 sizeof(struct virtblk_req
) +
1492 sizeof(struct scatterlist
) * VIRTIO_BLK_INLINE_SG_CNT
;
1493 vblk
->tag_set
.driver_data
= vblk
;
1494 vblk
->tag_set
.nr_hw_queues
= vblk
->num_vqs
;
1495 vblk
->tag_set
.nr_maps
= 1;
1496 if (vblk
->io_queues
[HCTX_TYPE_POLL
])
1497 vblk
->tag_set
.nr_maps
= 3;
1499 err
= blk_mq_alloc_tag_set(&vblk
->tag_set
);
1503 err
= virtblk_read_limits(vblk
, &lim
);
1507 if (virtblk_get_cache_mode(vdev
))
1508 lim
.features
|= BLK_FEAT_WRITE_CACHE
;
1510 vblk
->disk
= blk_mq_alloc_disk(&vblk
->tag_set
, &lim
, vblk
);
1511 if (IS_ERR(vblk
->disk
)) {
1512 err
= PTR_ERR(vblk
->disk
);
1516 virtblk_name_format("vd", index
, vblk
->disk
->disk_name
, DISK_NAME_LEN
);
1518 vblk
->disk
->major
= major
;
1519 vblk
->disk
->first_minor
= index_to_minor(index
);
1520 vblk
->disk
->minors
= 1 << PART_BITS
;
1521 vblk
->disk
->private_data
= vblk
;
1522 vblk
->disk
->fops
= &virtblk_fops
;
1523 vblk
->index
= index
;
1525 /* If disk is read-only in the host, the guest should obey */
1526 if (virtio_has_feature(vdev
, VIRTIO_BLK_F_RO
))
1527 set_disk_ro(vblk
->disk
, 1);
1529 virtblk_update_capacity(vblk
, false);
1530 virtio_device_ready(vdev
);
1533 * All steps that follow use the VQs therefore they need to be
1534 * placed after the virtio_device_ready() call above.
1536 if (IS_ENABLED(CONFIG_BLK_DEV_ZONED
) &&
1537 (lim
.features
& BLK_FEAT_ZONED
)) {
1538 err
= blk_revalidate_disk_zones(vblk
->disk
);
1540 goto out_cleanup_disk
;
1543 err
= device_add_disk(&vdev
->dev
, vblk
->disk
, virtblk_attr_groups
);
1545 goto out_cleanup_disk
;
1550 put_disk(vblk
->disk
);
1552 blk_mq_free_tag_set(&vblk
->tag_set
);
1554 vdev
->config
->del_vqs(vdev
);
1559 ida_free(&vd_index_ida
, index
);
1564 static void virtblk_remove(struct virtio_device
*vdev
)
1566 struct virtio_blk
*vblk
= vdev
->priv
;
1568 /* Make sure no work handler is accessing the device. */
1569 flush_work(&vblk
->config_work
);
1571 del_gendisk(vblk
->disk
);
1572 blk_mq_free_tag_set(&vblk
->tag_set
);
1574 mutex_lock(&vblk
->vdev_mutex
);
1576 /* Stop all the virtqueues. */
1577 virtio_reset_device(vdev
);
1579 /* Virtqueues are stopped, nothing can use vblk->vdev anymore. */
1582 vdev
->config
->del_vqs(vdev
);
1585 mutex_unlock(&vblk
->vdev_mutex
);
1587 put_disk(vblk
->disk
);
1590 #ifdef CONFIG_PM_SLEEP
1591 static int virtblk_freeze(struct virtio_device
*vdev
)
1593 struct virtio_blk
*vblk
= vdev
->priv
;
1595 /* Ensure no requests in virtqueues before deleting vqs. */
1596 blk_mq_freeze_queue(vblk
->disk
->queue
);
1598 /* Ensure we don't receive any more interrupts */
1599 virtio_reset_device(vdev
);
1601 /* Make sure no work handler is accessing the device. */
1602 flush_work(&vblk
->config_work
);
1604 vdev
->config
->del_vqs(vdev
);
1610 static int virtblk_restore(struct virtio_device
*vdev
)
1612 struct virtio_blk
*vblk
= vdev
->priv
;
1615 ret
= init_vq(vdev
->priv
);
1619 virtio_device_ready(vdev
);
1621 blk_mq_unfreeze_queue(vblk
->disk
->queue
);
1626 static const struct virtio_device_id id_table
[] = {
1627 { VIRTIO_ID_BLOCK
, VIRTIO_DEV_ANY_ID
},
1631 static unsigned int features_legacy
[] = {
1632 VIRTIO_BLK_F_SEG_MAX
, VIRTIO_BLK_F_SIZE_MAX
, VIRTIO_BLK_F_GEOMETRY
,
1633 VIRTIO_BLK_F_RO
, VIRTIO_BLK_F_BLK_SIZE
,
1634 VIRTIO_BLK_F_FLUSH
, VIRTIO_BLK_F_TOPOLOGY
, VIRTIO_BLK_F_CONFIG_WCE
,
1635 VIRTIO_BLK_F_MQ
, VIRTIO_BLK_F_DISCARD
, VIRTIO_BLK_F_WRITE_ZEROES
,
1636 VIRTIO_BLK_F_SECURE_ERASE
,
1639 static unsigned int features
[] = {
1640 VIRTIO_BLK_F_SEG_MAX
, VIRTIO_BLK_F_SIZE_MAX
, VIRTIO_BLK_F_GEOMETRY
,
1641 VIRTIO_BLK_F_RO
, VIRTIO_BLK_F_BLK_SIZE
,
1642 VIRTIO_BLK_F_FLUSH
, VIRTIO_BLK_F_TOPOLOGY
, VIRTIO_BLK_F_CONFIG_WCE
,
1643 VIRTIO_BLK_F_MQ
, VIRTIO_BLK_F_DISCARD
, VIRTIO_BLK_F_WRITE_ZEROES
,
1644 VIRTIO_BLK_F_SECURE_ERASE
, VIRTIO_BLK_F_ZONED
,
1647 static struct virtio_driver virtio_blk
= {
1648 .feature_table
= features
,
1649 .feature_table_size
= ARRAY_SIZE(features
),
1650 .feature_table_legacy
= features_legacy
,
1651 .feature_table_size_legacy
= ARRAY_SIZE(features_legacy
),
1652 .driver
.name
= KBUILD_MODNAME
,
1653 .id_table
= id_table
,
1654 .probe
= virtblk_probe
,
1655 .remove
= virtblk_remove
,
1656 .config_changed
= virtblk_config_changed
,
1657 #ifdef CONFIG_PM_SLEEP
1658 .freeze
= virtblk_freeze
,
1659 .restore
= virtblk_restore
,
1663 static int __init
virtio_blk_init(void)
1667 virtblk_wq
= alloc_workqueue("virtio-blk", 0, 0);
1671 major
= register_blkdev(0, "virtblk");
1674 goto out_destroy_workqueue
;
1677 error
= register_virtio_driver(&virtio_blk
);
1679 goto out_unregister_blkdev
;
1682 out_unregister_blkdev
:
1683 unregister_blkdev(major
, "virtblk");
1684 out_destroy_workqueue
:
1685 destroy_workqueue(virtblk_wq
);
1689 static void __exit
virtio_blk_fini(void)
1691 unregister_virtio_driver(&virtio_blk
);
1692 unregister_blkdev(major
, "virtblk");
1693 destroy_workqueue(virtblk_wq
);
1695 module_init(virtio_blk_init
);
1696 module_exit(virtio_blk_fini
);
1698 MODULE_DEVICE_TABLE(virtio
, id_table
);
1699 MODULE_DESCRIPTION("Virtio block driver");
1700 MODULE_LICENSE("GPL");