1 // SPDX-License-Identifier: GPL-2.0-only
3 #include <linux/spinlock.h>
4 #include <linux/slab.h>
5 #include <linux/blkdev.h>
6 #include <linux/hdreg.h>
7 #include <linux/module.h>
8 #include <linux/mutex.h>
9 #include <linux/interrupt.h>
10 #include <linux/virtio.h>
11 #include <linux/virtio_blk.h>
12 #include <linux/scatterlist.h>
13 #include <linux/string_helpers.h>
14 #include <linux/idr.h>
15 #include <linux/blk-mq.h>
16 #include <linux/blk-mq-virtio.h>
17 #include <linux/numa.h>
18 #include <linux/vmalloc.h>
19 #include <uapi/linux/virtio_ring.h>
22 #define VQ_NAME_LEN 16
23 #define MAX_DISCARD_SEGMENTS 256u
25 /* The maximum number of sg elements that fit into a virtqueue */
26 #define VIRTIO_BLK_MAX_SG_ELEMS 32768
28 #ifdef CONFIG_ARCH_NO_SG_CHAIN
29 #define VIRTIO_BLK_INLINE_SG_CNT 0
31 #define VIRTIO_BLK_INLINE_SG_CNT 2
34 static unsigned int num_request_queues
;
35 module_param(num_request_queues
, uint
, 0644);
36 MODULE_PARM_DESC(num_request_queues
,
37 "Limit the number of request queues to use for blk device. "
39 "Values > nr_cpu_ids truncated to nr_cpu_ids.");
41 static unsigned int poll_queues
;
42 module_param(poll_queues
, uint
, 0644);
43 MODULE_PARM_DESC(poll_queues
, "The number of dedicated virtqueues for polling I/O");
46 static DEFINE_IDA(vd_index_ida
);
48 static struct workqueue_struct
*virtblk_wq
;
50 struct virtio_blk_vq
{
53 char name
[VQ_NAME_LEN
];
54 } ____cacheline_aligned_in_smp
;
58 * This mutex must be held by anything that may run after
59 * virtblk_remove() sets vblk->vdev to NULL.
61 * blk-mq, virtqueue processing, and sysfs attribute code paths are
62 * shut down before vblk->vdev is set to NULL and therefore do not need
65 struct mutex vdev_mutex
;
66 struct virtio_device
*vdev
;
68 /* The disk structure for the kernel. */
71 /* Block layer tags. */
72 struct blk_mq_tag_set tag_set
;
74 /* Process context for config space updates */
75 struct work_struct config_work
;
77 /* Ida index - used to track minor number allocations. */
82 int io_queues
[HCTX_MAX_TYPES
];
83 struct virtio_blk_vq
*vqs
;
85 /* For zoned device */
86 unsigned int zone_sectors
;
91 struct virtio_blk_outhdr out_hdr
;
98 * The zone append command has an extended in header.
99 * The status field in zone_append_in_hdr must always
110 struct sg_table sg_table
;
111 struct scatterlist sg
[];
114 static inline blk_status_t
virtblk_result(u8 status
)
117 case VIRTIO_BLK_S_OK
:
119 case VIRTIO_BLK_S_UNSUPP
:
120 return BLK_STS_NOTSUPP
;
121 case VIRTIO_BLK_S_ZONE_OPEN_RESOURCE
:
122 return BLK_STS_ZONE_OPEN_RESOURCE
;
123 case VIRTIO_BLK_S_ZONE_ACTIVE_RESOURCE
:
124 return BLK_STS_ZONE_ACTIVE_RESOURCE
;
125 case VIRTIO_BLK_S_IOERR
:
126 case VIRTIO_BLK_S_ZONE_UNALIGNED_WP
:
128 return BLK_STS_IOERR
;
132 static inline struct virtio_blk_vq
*get_virtio_blk_vq(struct blk_mq_hw_ctx
*hctx
)
134 struct virtio_blk
*vblk
= hctx
->queue
->queuedata
;
135 struct virtio_blk_vq
*vq
= &vblk
->vqs
[hctx
->queue_num
];
140 static int virtblk_add_req(struct virtqueue
*vq
, struct virtblk_req
*vbr
)
142 struct scatterlist out_hdr
, in_hdr
, *sgs
[3];
143 unsigned int num_out
= 0, num_in
= 0;
145 sg_init_one(&out_hdr
, &vbr
->out_hdr
, sizeof(vbr
->out_hdr
));
146 sgs
[num_out
++] = &out_hdr
;
148 if (vbr
->sg_table
.nents
) {
149 if (vbr
->out_hdr
.type
& cpu_to_virtio32(vq
->vdev
, VIRTIO_BLK_T_OUT
))
150 sgs
[num_out
++] = vbr
->sg_table
.sgl
;
152 sgs
[num_out
+ num_in
++] = vbr
->sg_table
.sgl
;
155 sg_init_one(&in_hdr
, &vbr
->in_hdr
.status
, vbr
->in_hdr_len
);
156 sgs
[num_out
+ num_in
++] = &in_hdr
;
158 return virtqueue_add_sgs(vq
, sgs
, num_out
, num_in
, vbr
, GFP_ATOMIC
);
161 static int virtblk_setup_discard_write_zeroes_erase(struct request
*req
, bool unmap
)
163 unsigned short segments
= blk_rq_nr_discard_segments(req
);
164 unsigned short n
= 0;
165 struct virtio_blk_discard_write_zeroes
*range
;
170 flags
|= VIRTIO_BLK_WRITE_ZEROES_FLAG_UNMAP
;
172 range
= kmalloc_array(segments
, sizeof(*range
), GFP_ATOMIC
);
177 * Single max discard segment means multi-range discard isn't
178 * supported, and block layer only runs contiguity merge like
179 * normal RW request. So we can't reply on bio for retrieving
182 if (queue_max_discard_segments(req
->q
) == 1) {
183 range
[0].flags
= cpu_to_le32(flags
);
184 range
[0].num_sectors
= cpu_to_le32(blk_rq_sectors(req
));
185 range
[0].sector
= cpu_to_le64(blk_rq_pos(req
));
188 __rq_for_each_bio(bio
, req
) {
189 u64 sector
= bio
->bi_iter
.bi_sector
;
190 u32 num_sectors
= bio
->bi_iter
.bi_size
>> SECTOR_SHIFT
;
192 range
[n
].flags
= cpu_to_le32(flags
);
193 range
[n
].num_sectors
= cpu_to_le32(num_sectors
);
194 range
[n
].sector
= cpu_to_le64(sector
);
199 WARN_ON_ONCE(n
!= segments
);
201 bvec_set_virt(&req
->special_vec
, range
, sizeof(*range
) * segments
);
202 req
->rq_flags
|= RQF_SPECIAL_PAYLOAD
;
207 static void virtblk_unmap_data(struct request
*req
, struct virtblk_req
*vbr
)
209 if (blk_rq_nr_phys_segments(req
))
210 sg_free_table_chained(&vbr
->sg_table
,
211 VIRTIO_BLK_INLINE_SG_CNT
);
214 static int virtblk_map_data(struct blk_mq_hw_ctx
*hctx
, struct request
*req
,
215 struct virtblk_req
*vbr
)
219 if (!blk_rq_nr_phys_segments(req
))
222 vbr
->sg_table
.sgl
= vbr
->sg
;
223 err
= sg_alloc_table_chained(&vbr
->sg_table
,
224 blk_rq_nr_phys_segments(req
),
226 VIRTIO_BLK_INLINE_SG_CNT
);
230 return blk_rq_map_sg(hctx
->queue
, req
, vbr
->sg_table
.sgl
);
233 static void virtblk_cleanup_cmd(struct request
*req
)
235 if (req
->rq_flags
& RQF_SPECIAL_PAYLOAD
)
236 kfree(bvec_virt(&req
->special_vec
));
239 static blk_status_t
virtblk_setup_cmd(struct virtio_device
*vdev
,
241 struct virtblk_req
*vbr
)
243 size_t in_hdr_len
= sizeof(vbr
->in_hdr
.status
);
248 if (!IS_ENABLED(CONFIG_BLK_DEV_ZONED
) && op_is_zone_mgmt(req_op(req
)))
249 return BLK_STS_NOTSUPP
;
251 /* Set fields for all request types */
252 vbr
->out_hdr
.ioprio
= cpu_to_virtio32(vdev
, req_get_ioprio(req
));
254 switch (req_op(req
)) {
256 type
= VIRTIO_BLK_T_IN
;
257 sector
= blk_rq_pos(req
);
260 type
= VIRTIO_BLK_T_OUT
;
261 sector
= blk_rq_pos(req
);
264 type
= VIRTIO_BLK_T_FLUSH
;
267 type
= VIRTIO_BLK_T_DISCARD
;
269 case REQ_OP_WRITE_ZEROES
:
270 type
= VIRTIO_BLK_T_WRITE_ZEROES
;
271 unmap
= !(req
->cmd_flags
& REQ_NOUNMAP
);
273 case REQ_OP_SECURE_ERASE
:
274 type
= VIRTIO_BLK_T_SECURE_ERASE
;
276 case REQ_OP_ZONE_OPEN
:
277 type
= VIRTIO_BLK_T_ZONE_OPEN
;
278 sector
= blk_rq_pos(req
);
280 case REQ_OP_ZONE_CLOSE
:
281 type
= VIRTIO_BLK_T_ZONE_CLOSE
;
282 sector
= blk_rq_pos(req
);
284 case REQ_OP_ZONE_FINISH
:
285 type
= VIRTIO_BLK_T_ZONE_FINISH
;
286 sector
= blk_rq_pos(req
);
288 case REQ_OP_ZONE_APPEND
:
289 type
= VIRTIO_BLK_T_ZONE_APPEND
;
290 sector
= blk_rq_pos(req
);
291 in_hdr_len
= sizeof(vbr
->in_hdr
.zone_append
);
293 case REQ_OP_ZONE_RESET
:
294 type
= VIRTIO_BLK_T_ZONE_RESET
;
295 sector
= blk_rq_pos(req
);
297 case REQ_OP_ZONE_RESET_ALL
:
298 type
= VIRTIO_BLK_T_ZONE_RESET_ALL
;
302 * Out header has already been prepared by the caller (virtblk_get_id()
303 * or virtblk_submit_zone_report()), nothing to do here.
308 return BLK_STS_IOERR
;
311 /* Set fields for non-REQ_OP_DRV_IN request types */
312 vbr
->in_hdr_len
= in_hdr_len
;
313 vbr
->out_hdr
.type
= cpu_to_virtio32(vdev
, type
);
314 vbr
->out_hdr
.sector
= cpu_to_virtio64(vdev
, sector
);
316 if (type
== VIRTIO_BLK_T_DISCARD
|| type
== VIRTIO_BLK_T_WRITE_ZEROES
||
317 type
== VIRTIO_BLK_T_SECURE_ERASE
) {
318 if (virtblk_setup_discard_write_zeroes_erase(req
, unmap
))
319 return BLK_STS_RESOURCE
;
326 * The status byte is always the last byte of the virtblk request
327 * in-header. This helper fetches its value for all in-header formats
328 * that are currently defined.
330 static inline u8
virtblk_vbr_status(struct virtblk_req
*vbr
)
332 return *((u8
*)&vbr
->in_hdr
+ vbr
->in_hdr_len
- 1);
335 static inline void virtblk_request_done(struct request
*req
)
337 struct virtblk_req
*vbr
= blk_mq_rq_to_pdu(req
);
338 blk_status_t status
= virtblk_result(virtblk_vbr_status(vbr
));
339 struct virtio_blk
*vblk
= req
->mq_hctx
->queue
->queuedata
;
341 virtblk_unmap_data(req
, vbr
);
342 virtblk_cleanup_cmd(req
);
344 if (req_op(req
) == REQ_OP_ZONE_APPEND
)
345 req
->__sector
= virtio64_to_cpu(vblk
->vdev
,
346 vbr
->in_hdr
.zone_append
.sector
);
348 blk_mq_end_request(req
, status
);
351 static void virtblk_done(struct virtqueue
*vq
)
353 struct virtio_blk
*vblk
= vq
->vdev
->priv
;
354 bool req_done
= false;
356 struct virtblk_req
*vbr
;
360 spin_lock_irqsave(&vblk
->vqs
[qid
].lock
, flags
);
362 virtqueue_disable_cb(vq
);
363 while ((vbr
= virtqueue_get_buf(vblk
->vqs
[qid
].vq
, &len
)) != NULL
) {
364 struct request
*req
= blk_mq_rq_from_pdu(vbr
);
366 if (likely(!blk_should_fake_timeout(req
->q
)))
367 blk_mq_complete_request(req
);
370 } while (!virtqueue_enable_cb(vq
));
372 /* In case queue is stopped waiting for more buffers. */
374 blk_mq_start_stopped_hw_queues(vblk
->disk
->queue
, true);
375 spin_unlock_irqrestore(&vblk
->vqs
[qid
].lock
, flags
);
378 static void virtio_commit_rqs(struct blk_mq_hw_ctx
*hctx
)
380 struct virtio_blk
*vblk
= hctx
->queue
->queuedata
;
381 struct virtio_blk_vq
*vq
= &vblk
->vqs
[hctx
->queue_num
];
384 spin_lock_irq(&vq
->lock
);
385 kick
= virtqueue_kick_prepare(vq
->vq
);
386 spin_unlock_irq(&vq
->lock
);
389 virtqueue_notify(vq
->vq
);
392 static blk_status_t
virtblk_fail_to_queue(struct request
*req
, int rc
)
394 virtblk_cleanup_cmd(req
);
397 return BLK_STS_DEV_RESOURCE
;
399 return BLK_STS_RESOURCE
;
401 return BLK_STS_IOERR
;
405 static blk_status_t
virtblk_prep_rq(struct blk_mq_hw_ctx
*hctx
,
406 struct virtio_blk
*vblk
,
408 struct virtblk_req
*vbr
)
413 status
= virtblk_setup_cmd(vblk
->vdev
, req
, vbr
);
414 if (unlikely(status
))
417 num
= virtblk_map_data(hctx
, req
, vbr
);
418 if (unlikely(num
< 0))
419 return virtblk_fail_to_queue(req
, -ENOMEM
);
420 vbr
->sg_table
.nents
= num
;
422 blk_mq_start_request(req
);
427 static blk_status_t
virtio_queue_rq(struct blk_mq_hw_ctx
*hctx
,
428 const struct blk_mq_queue_data
*bd
)
430 struct virtio_blk
*vblk
= hctx
->queue
->queuedata
;
431 struct request
*req
= bd
->rq
;
432 struct virtblk_req
*vbr
= blk_mq_rq_to_pdu(req
);
434 int qid
= hctx
->queue_num
;
439 status
= virtblk_prep_rq(hctx
, vblk
, req
, vbr
);
440 if (unlikely(status
))
443 spin_lock_irqsave(&vblk
->vqs
[qid
].lock
, flags
);
444 err
= virtblk_add_req(vblk
->vqs
[qid
].vq
, vbr
);
446 virtqueue_kick(vblk
->vqs
[qid
].vq
);
447 /* Don't stop the queue if -ENOMEM: we may have failed to
448 * bounce the buffer due to global resource outage.
451 blk_mq_stop_hw_queue(hctx
);
452 spin_unlock_irqrestore(&vblk
->vqs
[qid
].lock
, flags
);
453 virtblk_unmap_data(req
, vbr
);
454 return virtblk_fail_to_queue(req
, err
);
457 if (bd
->last
&& virtqueue_kick_prepare(vblk
->vqs
[qid
].vq
))
459 spin_unlock_irqrestore(&vblk
->vqs
[qid
].lock
, flags
);
462 virtqueue_notify(vblk
->vqs
[qid
].vq
);
466 static bool virtblk_prep_rq_batch(struct request
*req
)
468 struct virtio_blk
*vblk
= req
->mq_hctx
->queue
->queuedata
;
469 struct virtblk_req
*vbr
= blk_mq_rq_to_pdu(req
);
471 return virtblk_prep_rq(req
->mq_hctx
, vblk
, req
, vbr
) == BLK_STS_OK
;
474 static void virtblk_add_req_batch(struct virtio_blk_vq
*vq
,
475 struct rq_list
*rqlist
)
481 spin_lock_irqsave(&vq
->lock
, flags
);
483 while ((req
= rq_list_pop(rqlist
))) {
484 struct virtblk_req
*vbr
= blk_mq_rq_to_pdu(req
);
487 err
= virtblk_add_req(vq
->vq
, vbr
);
489 virtblk_unmap_data(req
, vbr
);
490 virtblk_cleanup_cmd(req
);
491 blk_mq_requeue_request(req
, true);
495 kick
= virtqueue_kick_prepare(vq
->vq
);
496 spin_unlock_irqrestore(&vq
->lock
, flags
);
499 virtqueue_notify(vq
->vq
);
502 static void virtio_queue_rqs(struct rq_list
*rqlist
)
504 struct rq_list submit_list
= { };
505 struct rq_list requeue_list
= { };
506 struct virtio_blk_vq
*vq
= NULL
;
509 while ((req
= rq_list_pop(rqlist
))) {
510 struct virtio_blk_vq
*this_vq
= get_virtio_blk_vq(req
->mq_hctx
);
512 if (vq
&& vq
!= this_vq
)
513 virtblk_add_req_batch(vq
, &submit_list
);
516 if (virtblk_prep_rq_batch(req
))
517 rq_list_add_tail(&submit_list
, req
);
519 rq_list_add_tail(&requeue_list
, req
);
523 virtblk_add_req_batch(vq
, &submit_list
);
524 *rqlist
= requeue_list
;
527 #ifdef CONFIG_BLK_DEV_ZONED
528 static void *virtblk_alloc_report_buffer(struct virtio_blk
*vblk
,
529 unsigned int nr_zones
,
532 struct request_queue
*q
= vblk
->disk
->queue
;
536 nr_zones
= min_t(unsigned int, nr_zones
,
537 get_capacity(vblk
->disk
) >> ilog2(vblk
->zone_sectors
));
539 bufsize
= sizeof(struct virtio_blk_zone_report
) +
540 nr_zones
* sizeof(struct virtio_blk_zone_descriptor
);
541 bufsize
= min_t(size_t, bufsize
,
542 queue_max_hw_sectors(q
) << SECTOR_SHIFT
);
543 bufsize
= min_t(size_t, bufsize
, queue_max_segments(q
) << PAGE_SHIFT
);
545 while (bufsize
>= sizeof(struct virtio_blk_zone_report
)) {
546 buf
= __vmalloc(bufsize
, GFP_KERNEL
| __GFP_NORETRY
);
557 static int virtblk_submit_zone_report(struct virtio_blk
*vblk
,
558 char *report_buf
, size_t report_len
,
561 struct request_queue
*q
= vblk
->disk
->queue
;
563 struct virtblk_req
*vbr
;
566 req
= blk_mq_alloc_request(q
, REQ_OP_DRV_IN
, 0);
570 vbr
= blk_mq_rq_to_pdu(req
);
571 vbr
->in_hdr_len
= sizeof(vbr
->in_hdr
.status
);
572 vbr
->out_hdr
.type
= cpu_to_virtio32(vblk
->vdev
, VIRTIO_BLK_T_ZONE_REPORT
);
573 vbr
->out_hdr
.sector
= cpu_to_virtio64(vblk
->vdev
, sector
);
575 err
= blk_rq_map_kern(q
, req
, report_buf
, report_len
, GFP_KERNEL
);
579 blk_execute_rq(req
, false);
580 err
= blk_status_to_errno(virtblk_result(vbr
->in_hdr
.status
));
582 blk_mq_free_request(req
);
586 static int virtblk_parse_zone(struct virtio_blk
*vblk
,
587 struct virtio_blk_zone_descriptor
*entry
,
588 unsigned int idx
, report_zones_cb cb
, void *data
)
590 struct blk_zone zone
= { };
592 zone
.start
= virtio64_to_cpu(vblk
->vdev
, entry
->z_start
);
593 if (zone
.start
+ vblk
->zone_sectors
<= get_capacity(vblk
->disk
))
594 zone
.len
= vblk
->zone_sectors
;
596 zone
.len
= get_capacity(vblk
->disk
) - zone
.start
;
597 zone
.capacity
= virtio64_to_cpu(vblk
->vdev
, entry
->z_cap
);
598 zone
.wp
= virtio64_to_cpu(vblk
->vdev
, entry
->z_wp
);
600 switch (entry
->z_type
) {
601 case VIRTIO_BLK_ZT_SWR
:
602 zone
.type
= BLK_ZONE_TYPE_SEQWRITE_REQ
;
604 case VIRTIO_BLK_ZT_SWP
:
605 zone
.type
= BLK_ZONE_TYPE_SEQWRITE_PREF
;
607 case VIRTIO_BLK_ZT_CONV
:
608 zone
.type
= BLK_ZONE_TYPE_CONVENTIONAL
;
611 dev_err(&vblk
->vdev
->dev
, "zone %llu: invalid type %#x\n",
612 zone
.start
, entry
->z_type
);
616 switch (entry
->z_state
) {
617 case VIRTIO_BLK_ZS_EMPTY
:
618 zone
.cond
= BLK_ZONE_COND_EMPTY
;
620 case VIRTIO_BLK_ZS_CLOSED
:
621 zone
.cond
= BLK_ZONE_COND_CLOSED
;
623 case VIRTIO_BLK_ZS_FULL
:
624 zone
.cond
= BLK_ZONE_COND_FULL
;
625 zone
.wp
= zone
.start
+ zone
.len
;
627 case VIRTIO_BLK_ZS_EOPEN
:
628 zone
.cond
= BLK_ZONE_COND_EXP_OPEN
;
630 case VIRTIO_BLK_ZS_IOPEN
:
631 zone
.cond
= BLK_ZONE_COND_IMP_OPEN
;
633 case VIRTIO_BLK_ZS_NOT_WP
:
634 zone
.cond
= BLK_ZONE_COND_NOT_WP
;
636 case VIRTIO_BLK_ZS_RDONLY
:
637 zone
.cond
= BLK_ZONE_COND_READONLY
;
640 case VIRTIO_BLK_ZS_OFFLINE
:
641 zone
.cond
= BLK_ZONE_COND_OFFLINE
;
645 dev_err(&vblk
->vdev
->dev
, "zone %llu: invalid condition %#x\n",
646 zone
.start
, entry
->z_state
);
651 * The callback below checks the validity of the reported
652 * entry data, no need to further validate it here.
654 return cb(&zone
, idx
, data
);
657 static int virtblk_report_zones(struct gendisk
*disk
, sector_t sector
,
658 unsigned int nr_zones
, report_zones_cb cb
,
661 struct virtio_blk
*vblk
= disk
->private_data
;
662 struct virtio_blk_zone_report
*report
;
663 unsigned long long nz
, i
;
665 unsigned int zone_idx
= 0;
668 if (WARN_ON_ONCE(!vblk
->zone_sectors
))
671 report
= virtblk_alloc_report_buffer(vblk
, nr_zones
, &buflen
);
675 mutex_lock(&vblk
->vdev_mutex
);
682 while (zone_idx
< nr_zones
&& sector
< get_capacity(vblk
->disk
)) {
683 memset(report
, 0, buflen
);
685 ret
= virtblk_submit_zone_report(vblk
, (char *)report
,
690 nz
= min_t(u64
, virtio64_to_cpu(vblk
->vdev
, report
->nr_zones
),
695 for (i
= 0; i
< nz
&& zone_idx
< nr_zones
; i
++) {
696 ret
= virtblk_parse_zone(vblk
, &report
->zones
[i
],
701 sector
= virtio64_to_cpu(vblk
->vdev
,
702 report
->zones
[i
].z_start
) +
713 mutex_unlock(&vblk
->vdev_mutex
);
718 static int virtblk_read_zoned_limits(struct virtio_blk
*vblk
,
719 struct queue_limits
*lim
)
721 struct virtio_device
*vdev
= vblk
->vdev
;
724 dev_dbg(&vdev
->dev
, "probing host-managed zoned device\n");
726 lim
->features
|= BLK_FEAT_ZONED
;
728 virtio_cread(vdev
, struct virtio_blk_config
,
729 zoned
.max_open_zones
, &v
);
730 lim
->max_open_zones
= v
;
731 dev_dbg(&vdev
->dev
, "max open zones = %u\n", v
);
733 virtio_cread(vdev
, struct virtio_blk_config
,
734 zoned
.max_active_zones
, &v
);
735 lim
->max_active_zones
= v
;
736 dev_dbg(&vdev
->dev
, "max active zones = %u\n", v
);
738 virtio_cread(vdev
, struct virtio_blk_config
,
739 zoned
.write_granularity
, &wg
);
741 dev_warn(&vdev
->dev
, "zero write granularity reported\n");
744 lim
->physical_block_size
= wg
;
747 dev_dbg(&vdev
->dev
, "write granularity = %u\n", wg
);
750 * virtio ZBD specification doesn't require zones to be a power of
751 * two sectors in size, but the code in this driver expects that.
753 virtio_cread(vdev
, struct virtio_blk_config
, zoned
.zone_sectors
,
754 &vblk
->zone_sectors
);
755 if (vblk
->zone_sectors
== 0 || !is_power_of_2(vblk
->zone_sectors
)) {
757 "zoned device with non power of two zone size %u\n",
761 lim
->chunk_sectors
= vblk
->zone_sectors
;
762 dev_dbg(&vdev
->dev
, "zone sectors = %u\n", vblk
->zone_sectors
);
764 if (virtio_has_feature(vdev
, VIRTIO_BLK_F_DISCARD
)) {
765 dev_warn(&vblk
->vdev
->dev
,
766 "ignoring negotiated F_DISCARD for zoned device\n");
767 lim
->max_hw_discard_sectors
= 0;
770 virtio_cread(vdev
, struct virtio_blk_config
,
771 zoned
.max_append_sectors
, &v
);
773 dev_warn(&vdev
->dev
, "zero max_append_sectors reported\n");
776 if ((v
<< SECTOR_SHIFT
) < wg
) {
778 "write granularity %u exceeds max_append_sectors %u limit\n",
782 lim
->max_hw_zone_append_sectors
= v
;
783 dev_dbg(&vdev
->dev
, "max append sectors = %u\n", v
);
789 * Zoned block device support is not configured in this kernel, host-managed
790 * zoned devices can't be supported.
792 #define virtblk_report_zones NULL
793 static inline int virtblk_read_zoned_limits(struct virtio_blk
*vblk
,
794 struct queue_limits
*lim
)
796 dev_err(&vblk
->vdev
->dev
,
797 "virtio_blk: zoned devices are not supported");
800 #endif /* CONFIG_BLK_DEV_ZONED */
802 /* return id (s/n) string for *disk to *id_str
804 static int virtblk_get_id(struct gendisk
*disk
, char *id_str
)
806 struct virtio_blk
*vblk
= disk
->private_data
;
807 struct request_queue
*q
= vblk
->disk
->queue
;
809 struct virtblk_req
*vbr
;
812 req
= blk_mq_alloc_request(q
, REQ_OP_DRV_IN
, 0);
816 vbr
= blk_mq_rq_to_pdu(req
);
817 vbr
->in_hdr_len
= sizeof(vbr
->in_hdr
.status
);
818 vbr
->out_hdr
.type
= cpu_to_virtio32(vblk
->vdev
, VIRTIO_BLK_T_GET_ID
);
819 vbr
->out_hdr
.sector
= 0;
821 err
= blk_rq_map_kern(q
, req
, id_str
, VIRTIO_BLK_ID_BYTES
, GFP_KERNEL
);
825 blk_execute_rq(req
, false);
826 err
= blk_status_to_errno(virtblk_result(vbr
->in_hdr
.status
));
828 blk_mq_free_request(req
);
832 /* We provide getgeo only to please some old bootloader/partitioning tools */
833 static int virtblk_getgeo(struct block_device
*bd
, struct hd_geometry
*geo
)
835 struct virtio_blk
*vblk
= bd
->bd_disk
->private_data
;
838 mutex_lock(&vblk
->vdev_mutex
);
845 /* see if the host passed in geometry config */
846 if (virtio_has_feature(vblk
->vdev
, VIRTIO_BLK_F_GEOMETRY
)) {
847 virtio_cread(vblk
->vdev
, struct virtio_blk_config
,
848 geometry
.cylinders
, &geo
->cylinders
);
849 virtio_cread(vblk
->vdev
, struct virtio_blk_config
,
850 geometry
.heads
, &geo
->heads
);
851 virtio_cread(vblk
->vdev
, struct virtio_blk_config
,
852 geometry
.sectors
, &geo
->sectors
);
854 /* some standard values, similar to sd */
856 geo
->sectors
= 1 << 5;
857 geo
->cylinders
= get_capacity(bd
->bd_disk
) >> 11;
860 mutex_unlock(&vblk
->vdev_mutex
);
864 static void virtblk_free_disk(struct gendisk
*disk
)
866 struct virtio_blk
*vblk
= disk
->private_data
;
868 ida_free(&vd_index_ida
, vblk
->index
);
869 mutex_destroy(&vblk
->vdev_mutex
);
873 static const struct block_device_operations virtblk_fops
= {
874 .owner
= THIS_MODULE
,
875 .getgeo
= virtblk_getgeo
,
876 .free_disk
= virtblk_free_disk
,
877 .report_zones
= virtblk_report_zones
,
880 static int index_to_minor(int index
)
882 return index
<< PART_BITS
;
885 static int minor_to_index(int minor
)
887 return minor
>> PART_BITS
;
890 static ssize_t
serial_show(struct device
*dev
,
891 struct device_attribute
*attr
, char *buf
)
893 struct gendisk
*disk
= dev_to_disk(dev
);
896 /* sysfs gives us a PAGE_SIZE buffer */
897 BUILD_BUG_ON(PAGE_SIZE
< VIRTIO_BLK_ID_BYTES
);
899 buf
[VIRTIO_BLK_ID_BYTES
] = '\0';
900 err
= virtblk_get_id(disk
, buf
);
904 if (err
== -EIO
) /* Unsupported? Make it empty. */
910 static DEVICE_ATTR_RO(serial
);
912 /* The queue's logical block size must be set before calling this */
913 static void virtblk_update_capacity(struct virtio_blk
*vblk
, bool resize
)
915 struct virtio_device
*vdev
= vblk
->vdev
;
916 struct request_queue
*q
= vblk
->disk
->queue
;
917 char cap_str_2
[10], cap_str_10
[10];
918 unsigned long long nblocks
;
921 /* Host must always specify the capacity. */
922 virtio_cread(vdev
, struct virtio_blk_config
, capacity
, &capacity
);
924 nblocks
= DIV_ROUND_UP_ULL(capacity
, queue_logical_block_size(q
) >> 9);
926 string_get_size(nblocks
, queue_logical_block_size(q
),
927 STRING_UNITS_2
, cap_str_2
, sizeof(cap_str_2
));
928 string_get_size(nblocks
, queue_logical_block_size(q
),
929 STRING_UNITS_10
, cap_str_10
, sizeof(cap_str_10
));
931 dev_notice(&vdev
->dev
,
932 "[%s] %s%llu %d-byte logical blocks (%s/%s)\n",
933 vblk
->disk
->disk_name
,
934 resize
? "new size: " : "",
936 queue_logical_block_size(q
),
940 set_capacity_and_notify(vblk
->disk
, capacity
);
943 static void virtblk_config_changed_work(struct work_struct
*work
)
945 struct virtio_blk
*vblk
=
946 container_of(work
, struct virtio_blk
, config_work
);
948 virtblk_update_capacity(vblk
, true);
951 static void virtblk_config_changed(struct virtio_device
*vdev
)
953 struct virtio_blk
*vblk
= vdev
->priv
;
955 queue_work(virtblk_wq
, &vblk
->config_work
);
958 static int init_vq(struct virtio_blk
*vblk
)
962 struct virtqueue_info
*vqs_info
;
963 struct virtqueue
**vqs
;
964 unsigned short num_vqs
;
965 unsigned short num_poll_vqs
;
966 struct virtio_device
*vdev
= vblk
->vdev
;
967 struct irq_affinity desc
= { 0, };
969 err
= virtio_cread_feature(vdev
, VIRTIO_BLK_F_MQ
,
970 struct virtio_blk_config
, num_queues
,
975 if (!err
&& !num_vqs
) {
976 dev_err(&vdev
->dev
, "MQ advertised but zero queues reported\n");
980 num_vqs
= min_t(unsigned int,
981 min_not_zero(num_request_queues
, nr_cpu_ids
),
984 num_poll_vqs
= min_t(unsigned int, poll_queues
, num_vqs
- 1);
986 vblk
->io_queues
[HCTX_TYPE_DEFAULT
] = num_vqs
- num_poll_vqs
;
987 vblk
->io_queues
[HCTX_TYPE_READ
] = 0;
988 vblk
->io_queues
[HCTX_TYPE_POLL
] = num_poll_vqs
;
990 dev_info(&vdev
->dev
, "%d/%d/%d default/read/poll queues\n",
991 vblk
->io_queues
[HCTX_TYPE_DEFAULT
],
992 vblk
->io_queues
[HCTX_TYPE_READ
],
993 vblk
->io_queues
[HCTX_TYPE_POLL
]);
995 vblk
->vqs
= kmalloc_array(num_vqs
, sizeof(*vblk
->vqs
), GFP_KERNEL
);
999 vqs_info
= kcalloc(num_vqs
, sizeof(*vqs_info
), GFP_KERNEL
);
1000 vqs
= kmalloc_array(num_vqs
, sizeof(*vqs
), GFP_KERNEL
);
1001 if (!vqs_info
|| !vqs
) {
1006 for (i
= 0; i
< num_vqs
- num_poll_vqs
; i
++) {
1007 vqs_info
[i
].callback
= virtblk_done
;
1008 snprintf(vblk
->vqs
[i
].name
, VQ_NAME_LEN
, "req.%u", i
);
1009 vqs_info
[i
].name
= vblk
->vqs
[i
].name
;
1012 for (; i
< num_vqs
; i
++) {
1013 snprintf(vblk
->vqs
[i
].name
, VQ_NAME_LEN
, "req_poll.%u", i
);
1014 vqs_info
[i
].name
= vblk
->vqs
[i
].name
;
1017 /* Discover virtqueues and write information to configuration. */
1018 err
= virtio_find_vqs(vdev
, num_vqs
, vqs
, vqs_info
, &desc
);
1022 for (i
= 0; i
< num_vqs
; i
++) {
1023 spin_lock_init(&vblk
->vqs
[i
].lock
);
1024 vblk
->vqs
[i
].vq
= vqs
[i
];
1026 vblk
->num_vqs
= num_vqs
;
1037 * Legacy naming scheme used for virtio devices. We are stuck with it for
1038 * virtio blk but don't ever use it for any new driver.
1040 static int virtblk_name_format(char *prefix
, int index
, char *buf
, int buflen
)
1042 const int base
= 'z' - 'a' + 1;
1043 char *begin
= buf
+ strlen(prefix
);
1044 char *end
= buf
+ buflen
;
1054 *--p
= 'a' + (index
% unit
);
1055 index
= (index
/ unit
) - 1;
1056 } while (index
>= 0);
1058 memmove(begin
, p
, end
- p
);
1059 memcpy(buf
, prefix
, strlen(prefix
));
1064 static int virtblk_get_cache_mode(struct virtio_device
*vdev
)
1069 err
= virtio_cread_feature(vdev
, VIRTIO_BLK_F_CONFIG_WCE
,
1070 struct virtio_blk_config
, wce
,
1074 * If WCE is not configurable and flush is not available,
1075 * assume no writeback cache is in use.
1078 writeback
= virtio_has_feature(vdev
, VIRTIO_BLK_F_FLUSH
);
1083 static const char *const virtblk_cache_types
[] = {
1084 "write through", "write back"
1088 cache_type_store(struct device
*dev
, struct device_attribute
*attr
,
1089 const char *buf
, size_t count
)
1091 struct gendisk
*disk
= dev_to_disk(dev
);
1092 struct virtio_blk
*vblk
= disk
->private_data
;
1093 struct virtio_device
*vdev
= vblk
->vdev
;
1094 struct queue_limits lim
;
1097 BUG_ON(!virtio_has_feature(vblk
->vdev
, VIRTIO_BLK_F_CONFIG_WCE
));
1098 i
= sysfs_match_string(virtblk_cache_types
, buf
);
1102 virtio_cwrite8(vdev
, offsetof(struct virtio_blk_config
, wce
), i
);
1104 lim
= queue_limits_start_update(disk
->queue
);
1105 if (virtblk_get_cache_mode(vdev
))
1106 lim
.features
|= BLK_FEAT_WRITE_CACHE
;
1108 lim
.features
&= ~BLK_FEAT_WRITE_CACHE
;
1109 blk_mq_freeze_queue(disk
->queue
);
1110 i
= queue_limits_commit_update(disk
->queue
, &lim
);
1111 blk_mq_unfreeze_queue(disk
->queue
);
1118 cache_type_show(struct device
*dev
, struct device_attribute
*attr
, char *buf
)
1120 struct gendisk
*disk
= dev_to_disk(dev
);
1121 struct virtio_blk
*vblk
= disk
->private_data
;
1122 u8 writeback
= virtblk_get_cache_mode(vblk
->vdev
);
1124 BUG_ON(writeback
>= ARRAY_SIZE(virtblk_cache_types
));
1125 return sysfs_emit(buf
, "%s\n", virtblk_cache_types
[writeback
]);
1128 static DEVICE_ATTR_RW(cache_type
);
1130 static struct attribute
*virtblk_attrs
[] = {
1131 &dev_attr_serial
.attr
,
1132 &dev_attr_cache_type
.attr
,
1136 static umode_t
virtblk_attrs_are_visible(struct kobject
*kobj
,
1137 struct attribute
*a
, int n
)
1139 struct device
*dev
= kobj_to_dev(kobj
);
1140 struct gendisk
*disk
= dev_to_disk(dev
);
1141 struct virtio_blk
*vblk
= disk
->private_data
;
1142 struct virtio_device
*vdev
= vblk
->vdev
;
1144 if (a
== &dev_attr_cache_type
.attr
&&
1145 !virtio_has_feature(vdev
, VIRTIO_BLK_F_CONFIG_WCE
))
1151 static const struct attribute_group virtblk_attr_group
= {
1152 .attrs
= virtblk_attrs
,
1153 .is_visible
= virtblk_attrs_are_visible
,
1156 static const struct attribute_group
*virtblk_attr_groups
[] = {
1157 &virtblk_attr_group
,
1161 static void virtblk_map_queues(struct blk_mq_tag_set
*set
)
1163 struct virtio_blk
*vblk
= set
->driver_data
;
1166 for (i
= 0, qoff
= 0; i
< set
->nr_maps
; i
++) {
1167 struct blk_mq_queue_map
*map
= &set
->map
[i
];
1169 map
->nr_queues
= vblk
->io_queues
[i
];
1170 map
->queue_offset
= qoff
;
1171 qoff
+= map
->nr_queues
;
1173 if (map
->nr_queues
== 0)
1177 * Regular queues have interrupts and hence CPU affinity is
1178 * defined by the core virtio code, but polling queues have
1179 * no interrupts so we let the block layer assign CPU affinity.
1181 if (i
== HCTX_TYPE_POLL
)
1182 blk_mq_map_queues(&set
->map
[i
]);
1184 blk_mq_virtio_map_queues(&set
->map
[i
], vblk
->vdev
, 0);
1188 static void virtblk_complete_batch(struct io_comp_batch
*iob
)
1190 struct request
*req
;
1192 rq_list_for_each(&iob
->req_list
, req
) {
1193 virtblk_unmap_data(req
, blk_mq_rq_to_pdu(req
));
1194 virtblk_cleanup_cmd(req
);
1196 blk_mq_end_request_batch(iob
);
1199 static int virtblk_poll(struct blk_mq_hw_ctx
*hctx
, struct io_comp_batch
*iob
)
1201 struct virtio_blk
*vblk
= hctx
->queue
->queuedata
;
1202 struct virtio_blk_vq
*vq
= get_virtio_blk_vq(hctx
);
1203 struct virtblk_req
*vbr
;
1204 unsigned long flags
;
1208 spin_lock_irqsave(&vq
->lock
, flags
);
1210 while ((vbr
= virtqueue_get_buf(vq
->vq
, &len
)) != NULL
) {
1211 struct request
*req
= blk_mq_rq_from_pdu(vbr
);
1214 if (!blk_mq_complete_request_remote(req
) &&
1215 !blk_mq_add_to_batch(req
, iob
, virtblk_vbr_status(vbr
),
1216 virtblk_complete_batch
))
1217 virtblk_request_done(req
);
1221 blk_mq_start_stopped_hw_queues(vblk
->disk
->queue
, true);
1223 spin_unlock_irqrestore(&vq
->lock
, flags
);
1228 static const struct blk_mq_ops virtio_mq_ops
= {
1229 .queue_rq
= virtio_queue_rq
,
1230 .queue_rqs
= virtio_queue_rqs
,
1231 .commit_rqs
= virtio_commit_rqs
,
1232 .complete
= virtblk_request_done
,
1233 .map_queues
= virtblk_map_queues
,
1234 .poll
= virtblk_poll
,
1237 static unsigned int virtblk_queue_depth
;
1238 module_param_named(queue_depth
, virtblk_queue_depth
, uint
, 0444);
1240 static int virtblk_read_limits(struct virtio_blk
*vblk
,
1241 struct queue_limits
*lim
)
1243 struct virtio_device
*vdev
= vblk
->vdev
;
1244 u32 v
, max_size
, sg_elems
, opt_io_size
;
1245 u32 max_discard_segs
= 0;
1246 u32 discard_granularity
= 0;
1248 u8 physical_block_exp
, alignment_offset
;
1249 size_t max_dma_size
;
1252 /* We need to know how many segments before we allocate. */
1253 err
= virtio_cread_feature(vdev
, VIRTIO_BLK_F_SEG_MAX
,
1254 struct virtio_blk_config
, seg_max
,
1257 /* We need at least one SG element, whatever they say. */
1258 if (err
|| !sg_elems
)
1261 /* Prevent integer overflows and honor max vq size */
1262 sg_elems
= min_t(u32
, sg_elems
, VIRTIO_BLK_MAX_SG_ELEMS
- 2);
1264 /* We can handle whatever the host told us to handle. */
1265 lim
->max_segments
= sg_elems
;
1267 /* No real sector limit. */
1268 lim
->max_hw_sectors
= UINT_MAX
;
1270 max_dma_size
= virtio_max_dma_size(vdev
);
1271 max_size
= max_dma_size
> U32_MAX
? U32_MAX
: max_dma_size
;
1273 /* Host can optionally specify maximum segment size and number of
1275 err
= virtio_cread_feature(vdev
, VIRTIO_BLK_F_SIZE_MAX
,
1276 struct virtio_blk_config
, size_max
, &v
);
1278 max_size
= min(max_size
, v
);
1280 lim
->max_segment_size
= max_size
;
1282 /* Host can optionally specify the block size of the device */
1283 virtio_cread_feature(vdev
, VIRTIO_BLK_F_BLK_SIZE
,
1284 struct virtio_blk_config
, blk_size
,
1285 &lim
->logical_block_size
);
1287 /* Use topology information if available */
1288 err
= virtio_cread_feature(vdev
, VIRTIO_BLK_F_TOPOLOGY
,
1289 struct virtio_blk_config
, physical_block_exp
,
1290 &physical_block_exp
);
1291 if (!err
&& physical_block_exp
)
1292 lim
->physical_block_size
=
1293 lim
->logical_block_size
* (1 << physical_block_exp
);
1295 err
= virtio_cread_feature(vdev
, VIRTIO_BLK_F_TOPOLOGY
,
1296 struct virtio_blk_config
, alignment_offset
,
1298 if (!err
&& alignment_offset
)
1299 lim
->alignment_offset
=
1300 lim
->logical_block_size
* alignment_offset
;
1302 err
= virtio_cread_feature(vdev
, VIRTIO_BLK_F_TOPOLOGY
,
1303 struct virtio_blk_config
, min_io_size
,
1305 if (!err
&& min_io_size
)
1306 lim
->io_min
= lim
->logical_block_size
* min_io_size
;
1308 err
= virtio_cread_feature(vdev
, VIRTIO_BLK_F_TOPOLOGY
,
1309 struct virtio_blk_config
, opt_io_size
,
1311 if (!err
&& opt_io_size
)
1312 lim
->io_opt
= lim
->logical_block_size
* opt_io_size
;
1314 if (virtio_has_feature(vdev
, VIRTIO_BLK_F_DISCARD
)) {
1315 virtio_cread(vdev
, struct virtio_blk_config
,
1316 discard_sector_alignment
, &discard_granularity
);
1318 virtio_cread(vdev
, struct virtio_blk_config
,
1319 max_discard_sectors
, &v
);
1320 lim
->max_hw_discard_sectors
= v
? v
: UINT_MAX
;
1322 virtio_cread(vdev
, struct virtio_blk_config
, max_discard_seg
,
1326 if (virtio_has_feature(vdev
, VIRTIO_BLK_F_WRITE_ZEROES
)) {
1327 virtio_cread(vdev
, struct virtio_blk_config
,
1328 max_write_zeroes_sectors
, &v
);
1329 lim
->max_write_zeroes_sectors
= v
? v
: UINT_MAX
;
1332 /* The discard and secure erase limits are combined since the Linux
1333 * block layer uses the same limit for both commands.
1335 * If both VIRTIO_BLK_F_SECURE_ERASE and VIRTIO_BLK_F_DISCARD features
1336 * are negotiated, we will use the minimum between the limits.
1338 * discard sector alignment is set to the minimum between discard_sector_alignment
1339 * and secure_erase_sector_alignment.
1341 * max discard sectors is set to the minimum between max_discard_seg and
1342 * max_secure_erase_seg.
1344 if (virtio_has_feature(vdev
, VIRTIO_BLK_F_SECURE_ERASE
)) {
1346 virtio_cread(vdev
, struct virtio_blk_config
,
1347 secure_erase_sector_alignment
, &v
);
1349 /* secure_erase_sector_alignment should not be zero, the device should set a
1350 * valid number of sectors.
1354 "virtio_blk: secure_erase_sector_alignment can't be 0\n");
1358 discard_granularity
= min_not_zero(discard_granularity
, v
);
1360 virtio_cread(vdev
, struct virtio_blk_config
,
1361 max_secure_erase_sectors
, &v
);
1363 /* max_secure_erase_sectors should not be zero, the device should set a
1364 * valid number of sectors.
1368 "virtio_blk: max_secure_erase_sectors can't be 0\n");
1372 lim
->max_secure_erase_sectors
= v
;
1374 virtio_cread(vdev
, struct virtio_blk_config
,
1375 max_secure_erase_seg
, &v
);
1377 /* max_secure_erase_seg should not be zero, the device should set a
1378 * valid number of segments
1382 "virtio_blk: max_secure_erase_seg can't be 0\n");
1386 max_discard_segs
= min_not_zero(max_discard_segs
, v
);
1389 if (virtio_has_feature(vdev
, VIRTIO_BLK_F_DISCARD
) ||
1390 virtio_has_feature(vdev
, VIRTIO_BLK_F_SECURE_ERASE
)) {
1391 /* max_discard_seg and discard_granularity will be 0 only
1392 * if max_discard_seg and discard_sector_alignment fields in the virtio
1393 * config are 0 and VIRTIO_BLK_F_SECURE_ERASE feature is not negotiated.
1394 * In this case, we use default values.
1396 if (!max_discard_segs
)
1397 max_discard_segs
= sg_elems
;
1399 lim
->max_discard_segments
=
1400 min(max_discard_segs
, MAX_DISCARD_SEGMENTS
);
1402 if (discard_granularity
)
1403 lim
->discard_granularity
=
1404 discard_granularity
<< SECTOR_SHIFT
;
1406 lim
->discard_granularity
= lim
->logical_block_size
;
1409 if (virtio_has_feature(vdev
, VIRTIO_BLK_F_ZONED
)) {
1412 virtio_cread(vdev
, struct virtio_blk_config
, zoned
.model
, &model
);
1414 case VIRTIO_BLK_Z_NONE
:
1415 case VIRTIO_BLK_Z_HA
:
1416 /* treat host-aware devices as non-zoned */
1418 case VIRTIO_BLK_Z_HM
:
1419 err
= virtblk_read_zoned_limits(vblk
, lim
);
1424 dev_err(&vdev
->dev
, "unsupported zone model %d\n", model
);
1432 static int virtblk_probe(struct virtio_device
*vdev
)
1434 struct virtio_blk
*vblk
;
1435 struct queue_limits lim
= {
1436 .features
= BLK_FEAT_ROTATIONAL
,
1437 .logical_block_size
= SECTOR_SIZE
,
1440 unsigned int queue_depth
;
1442 if (!vdev
->config
->get
) {
1443 dev_err(&vdev
->dev
, "%s failure: config access disabled\n",
1448 err
= ida_alloc_range(&vd_index_ida
, 0,
1449 minor_to_index(1 << MINORBITS
) - 1, GFP_KERNEL
);
1454 vdev
->priv
= vblk
= kmalloc(sizeof(*vblk
), GFP_KERNEL
);
1457 goto out_free_index
;
1460 mutex_init(&vblk
->vdev_mutex
);
1464 INIT_WORK(&vblk
->config_work
, virtblk_config_changed_work
);
1466 err
= init_vq(vblk
);
1470 /* Default queue sizing is to fill the ring. */
1471 if (!virtblk_queue_depth
) {
1472 queue_depth
= vblk
->vqs
[0].vq
->num_free
;
1473 /* ... but without indirect descs, we use 2 descs per req */
1474 if (!virtio_has_feature(vdev
, VIRTIO_RING_F_INDIRECT_DESC
))
1477 queue_depth
= virtblk_queue_depth
;
1480 memset(&vblk
->tag_set
, 0, sizeof(vblk
->tag_set
));
1481 vblk
->tag_set
.ops
= &virtio_mq_ops
;
1482 vblk
->tag_set
.queue_depth
= queue_depth
;
1483 vblk
->tag_set
.numa_node
= NUMA_NO_NODE
;
1484 vblk
->tag_set
.flags
= BLK_MQ_F_SHOULD_MERGE
;
1485 vblk
->tag_set
.cmd_size
=
1486 sizeof(struct virtblk_req
) +
1487 sizeof(struct scatterlist
) * VIRTIO_BLK_INLINE_SG_CNT
;
1488 vblk
->tag_set
.driver_data
= vblk
;
1489 vblk
->tag_set
.nr_hw_queues
= vblk
->num_vqs
;
1490 vblk
->tag_set
.nr_maps
= 1;
1491 if (vblk
->io_queues
[HCTX_TYPE_POLL
])
1492 vblk
->tag_set
.nr_maps
= 3;
1494 err
= blk_mq_alloc_tag_set(&vblk
->tag_set
);
1498 err
= virtblk_read_limits(vblk
, &lim
);
1502 if (virtblk_get_cache_mode(vdev
))
1503 lim
.features
|= BLK_FEAT_WRITE_CACHE
;
1505 vblk
->disk
= blk_mq_alloc_disk(&vblk
->tag_set
, &lim
, vblk
);
1506 if (IS_ERR(vblk
->disk
)) {
1507 err
= PTR_ERR(vblk
->disk
);
1511 virtblk_name_format("vd", index
, vblk
->disk
->disk_name
, DISK_NAME_LEN
);
1513 vblk
->disk
->major
= major
;
1514 vblk
->disk
->first_minor
= index_to_minor(index
);
1515 vblk
->disk
->minors
= 1 << PART_BITS
;
1516 vblk
->disk
->private_data
= vblk
;
1517 vblk
->disk
->fops
= &virtblk_fops
;
1518 vblk
->index
= index
;
1520 /* If disk is read-only in the host, the guest should obey */
1521 if (virtio_has_feature(vdev
, VIRTIO_BLK_F_RO
))
1522 set_disk_ro(vblk
->disk
, 1);
1524 virtblk_update_capacity(vblk
, false);
1525 virtio_device_ready(vdev
);
1528 * All steps that follow use the VQs therefore they need to be
1529 * placed after the virtio_device_ready() call above.
1531 if (IS_ENABLED(CONFIG_BLK_DEV_ZONED
) &&
1532 (lim
.features
& BLK_FEAT_ZONED
)) {
1533 err
= blk_revalidate_disk_zones(vblk
->disk
);
1535 goto out_cleanup_disk
;
1538 err
= device_add_disk(&vdev
->dev
, vblk
->disk
, virtblk_attr_groups
);
1540 goto out_cleanup_disk
;
1545 put_disk(vblk
->disk
);
1547 blk_mq_free_tag_set(&vblk
->tag_set
);
1549 vdev
->config
->del_vqs(vdev
);
1554 ida_free(&vd_index_ida
, index
);
1559 static void virtblk_remove(struct virtio_device
*vdev
)
1561 struct virtio_blk
*vblk
= vdev
->priv
;
1563 /* Make sure no work handler is accessing the device. */
1564 flush_work(&vblk
->config_work
);
1566 del_gendisk(vblk
->disk
);
1567 blk_mq_free_tag_set(&vblk
->tag_set
);
1569 mutex_lock(&vblk
->vdev_mutex
);
1571 /* Stop all the virtqueues. */
1572 virtio_reset_device(vdev
);
1574 /* Virtqueues are stopped, nothing can use vblk->vdev anymore. */
1577 vdev
->config
->del_vqs(vdev
);
1580 mutex_unlock(&vblk
->vdev_mutex
);
1582 put_disk(vblk
->disk
);
1585 #ifdef CONFIG_PM_SLEEP
1586 static int virtblk_freeze(struct virtio_device
*vdev
)
1588 struct virtio_blk
*vblk
= vdev
->priv
;
1590 /* Ensure no requests in virtqueues before deleting vqs. */
1591 blk_mq_freeze_queue(vblk
->disk
->queue
);
1593 /* Ensure we don't receive any more interrupts */
1594 virtio_reset_device(vdev
);
1596 /* Make sure no work handler is accessing the device. */
1597 flush_work(&vblk
->config_work
);
1599 vdev
->config
->del_vqs(vdev
);
1605 static int virtblk_restore(struct virtio_device
*vdev
)
1607 struct virtio_blk
*vblk
= vdev
->priv
;
1610 ret
= init_vq(vdev
->priv
);
1614 virtio_device_ready(vdev
);
1616 blk_mq_unfreeze_queue(vblk
->disk
->queue
);
1621 static const struct virtio_device_id id_table
[] = {
1622 { VIRTIO_ID_BLOCK
, VIRTIO_DEV_ANY_ID
},
1626 static unsigned int features_legacy
[] = {
1627 VIRTIO_BLK_F_SEG_MAX
, VIRTIO_BLK_F_SIZE_MAX
, VIRTIO_BLK_F_GEOMETRY
,
1628 VIRTIO_BLK_F_RO
, VIRTIO_BLK_F_BLK_SIZE
,
1629 VIRTIO_BLK_F_FLUSH
, VIRTIO_BLK_F_TOPOLOGY
, VIRTIO_BLK_F_CONFIG_WCE
,
1630 VIRTIO_BLK_F_MQ
, VIRTIO_BLK_F_DISCARD
, VIRTIO_BLK_F_WRITE_ZEROES
,
1631 VIRTIO_BLK_F_SECURE_ERASE
,
1634 static unsigned int features
[] = {
1635 VIRTIO_BLK_F_SEG_MAX
, VIRTIO_BLK_F_SIZE_MAX
, VIRTIO_BLK_F_GEOMETRY
,
1636 VIRTIO_BLK_F_RO
, VIRTIO_BLK_F_BLK_SIZE
,
1637 VIRTIO_BLK_F_FLUSH
, VIRTIO_BLK_F_TOPOLOGY
, VIRTIO_BLK_F_CONFIG_WCE
,
1638 VIRTIO_BLK_F_MQ
, VIRTIO_BLK_F_DISCARD
, VIRTIO_BLK_F_WRITE_ZEROES
,
1639 VIRTIO_BLK_F_SECURE_ERASE
, VIRTIO_BLK_F_ZONED
,
1642 static struct virtio_driver virtio_blk
= {
1643 .feature_table
= features
,
1644 .feature_table_size
= ARRAY_SIZE(features
),
1645 .feature_table_legacy
= features_legacy
,
1646 .feature_table_size_legacy
= ARRAY_SIZE(features_legacy
),
1647 .driver
.name
= KBUILD_MODNAME
,
1648 .id_table
= id_table
,
1649 .probe
= virtblk_probe
,
1650 .remove
= virtblk_remove
,
1651 .config_changed
= virtblk_config_changed
,
1652 #ifdef CONFIG_PM_SLEEP
1653 .freeze
= virtblk_freeze
,
1654 .restore
= virtblk_restore
,
1658 static int __init
virtio_blk_init(void)
1662 virtblk_wq
= alloc_workqueue("virtio-blk", 0, 0);
1666 major
= register_blkdev(0, "virtblk");
1669 goto out_destroy_workqueue
;
1672 error
= register_virtio_driver(&virtio_blk
);
1674 goto out_unregister_blkdev
;
1677 out_unregister_blkdev
:
1678 unregister_blkdev(major
, "virtblk");
1679 out_destroy_workqueue
:
1680 destroy_workqueue(virtblk_wq
);
1684 static void __exit
virtio_blk_fini(void)
1686 unregister_virtio_driver(&virtio_blk
);
1687 unregister_blkdev(major
, "virtblk");
1688 destroy_workqueue(virtblk_wq
);
1690 module_init(virtio_blk_init
);
1691 module_exit(virtio_blk_fini
);
1693 MODULE_DEVICE_TABLE(virtio
, id_table
);
1694 MODULE_DESCRIPTION("Virtio block driver");
1695 MODULE_LICENSE("GPL");