1 // SPDX-License-Identifier: GPL-2.0-only
3 * VDPA simulator for block device.
5 * Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
6 * Copyright (c) 2021, Red Hat Inc. All rights reserved.
10 #include <linux/init.h>
11 #include <linux/module.h>
12 #include <linux/device.h>
13 #include <linux/kernel.h>
14 #include <linux/blkdev.h>
15 #include <linux/vringh.h>
16 #include <linux/vdpa.h>
17 #include <uapi/linux/virtio_blk.h>
21 #define DRV_VERSION "0.1"
22 #define DRV_AUTHOR "Max Gurtovoy <mgurtovoy@nvidia.com>"
23 #define DRV_DESC "vDPA Device Simulator for block device"
24 #define DRV_LICENSE "GPL v2"
26 #define VDPASIM_BLK_FEATURES (VDPASIM_FEATURES | \
27 (1ULL << VIRTIO_BLK_F_FLUSH) | \
28 (1ULL << VIRTIO_BLK_F_SIZE_MAX) | \
29 (1ULL << VIRTIO_BLK_F_SEG_MAX) | \
30 (1ULL << VIRTIO_BLK_F_BLK_SIZE) | \
31 (1ULL << VIRTIO_BLK_F_TOPOLOGY) | \
32 (1ULL << VIRTIO_BLK_F_MQ) | \
33 (1ULL << VIRTIO_BLK_F_DISCARD) | \
34 (1ULL << VIRTIO_BLK_F_WRITE_ZEROES))
36 #define VDPASIM_BLK_CAPACITY 0x40000
37 #define VDPASIM_BLK_SIZE_MAX 0x1000
38 #define VDPASIM_BLK_SEG_MAX 32
39 #define VDPASIM_BLK_DWZ_MAX_SECTORS UINT_MAX
41 /* 1 virtqueue, 1 address space, 1 virtqueue group */
42 #define VDPASIM_BLK_VQ_NUM 1
43 #define VDPASIM_BLK_AS_NUM 1
44 #define VDPASIM_BLK_GROUP_NUM 1
47 struct vdpasim vdpasim
;
52 static struct vdpasim_blk
*sim_to_blk(struct vdpasim
*vdpasim
)
54 return container_of(vdpasim
, struct vdpasim_blk
, vdpasim
);
57 static char vdpasim_blk_id
[VIRTIO_BLK_ID_BYTES
] = "vdpa_blk_sim";
59 static bool shared_backend
;
60 module_param(shared_backend
, bool, 0444);
61 MODULE_PARM_DESC(shared_backend
, "Enable the shared backend between virtio-blk devices");
63 static void *shared_buffer
;
64 /* mutex to synchronize shared_buffer access */
65 static DEFINE_MUTEX(shared_buffer_mutex
);
67 static void vdpasim_blk_buffer_lock(struct vdpasim_blk
*blk
)
69 if (blk
->shared_backend
)
70 mutex_lock(&shared_buffer_mutex
);
73 static void vdpasim_blk_buffer_unlock(struct vdpasim_blk
*blk
)
75 if (blk
->shared_backend
)
76 mutex_unlock(&shared_buffer_mutex
);
79 static bool vdpasim_blk_check_range(struct vdpasim
*vdpasim
, u64 start_sector
,
80 u64 num_sectors
, u64 max_sectors
)
82 if (start_sector
> VDPASIM_BLK_CAPACITY
) {
83 dev_dbg(&vdpasim
->vdpa
.dev
,
84 "starting sector exceeds the capacity - start: 0x%llx capacity: 0x%x\n",
85 start_sector
, VDPASIM_BLK_CAPACITY
);
88 if (num_sectors
> max_sectors
) {
89 dev_dbg(&vdpasim
->vdpa
.dev
,
90 "number of sectors exceeds the max allowed in a request - num: 0x%llx max: 0x%llx\n",
91 num_sectors
, max_sectors
);
95 if (num_sectors
> VDPASIM_BLK_CAPACITY
- start_sector
) {
96 dev_dbg(&vdpasim
->vdpa
.dev
,
97 "request exceeds the capacity - start: 0x%llx num: 0x%llx capacity: 0x%x\n",
98 start_sector
, num_sectors
, VDPASIM_BLK_CAPACITY
);
105 /* Returns 'true' if the request is handled (with or without an I/O error)
106 * and the status is correctly written in the last byte of the 'in iov',
109 static bool vdpasim_blk_handle_req(struct vdpasim
*vdpasim
,
110 struct vdpasim_virtqueue
*vq
)
112 struct vdpasim_blk
*blk
= sim_to_blk(vdpasim
);
113 size_t pushed
= 0, to_pull
, to_push
;
114 struct virtio_blk_outhdr hdr
;
115 bool handled
= false;
123 ret
= vringh_getdesc_iotlb(&vq
->vring
, &vq
->out_iov
, &vq
->in_iov
,
124 &vq
->head
, GFP_ATOMIC
);
128 if (vq
->out_iov
.used
< 1 || vq
->in_iov
.used
< 1) {
129 dev_dbg(&vdpasim
->vdpa
.dev
, "missing headers - out_iov: %u in_iov %u\n",
130 vq
->out_iov
.used
, vq
->in_iov
.used
);
134 if (vq
->in_iov
.iov
[vq
->in_iov
.used
- 1].iov_len
< 1) {
135 dev_dbg(&vdpasim
->vdpa
.dev
, "request in header too short\n");
139 /* The last byte is the status and we checked if the last iov has
140 * enough room for it.
142 to_push
= vringh_kiov_length(&vq
->in_iov
) - 1;
144 to_pull
= vringh_kiov_length(&vq
->out_iov
);
146 bytes
= vringh_iov_pull_iotlb(&vq
->vring
, &vq
->out_iov
, &hdr
,
148 if (bytes
!= sizeof(hdr
)) {
149 dev_dbg(&vdpasim
->vdpa
.dev
, "request out header too short\n");
155 type
= vdpasim32_to_cpu(vdpasim
, hdr
.type
);
156 sector
= vdpasim64_to_cpu(vdpasim
, hdr
.sector
);
157 offset
= sector
<< SECTOR_SHIFT
;
158 status
= VIRTIO_BLK_S_OK
;
160 if (type
!= VIRTIO_BLK_T_IN
&& type
!= VIRTIO_BLK_T_OUT
&&
162 dev_dbg(&vdpasim
->vdpa
.dev
,
163 "sector must be 0 for %u request - sector: 0x%llx\n",
165 status
= VIRTIO_BLK_S_IOERR
;
170 case VIRTIO_BLK_T_IN
:
171 if (!vdpasim_blk_check_range(vdpasim
, sector
,
172 to_push
>> SECTOR_SHIFT
,
173 VDPASIM_BLK_SIZE_MAX
* VDPASIM_BLK_SEG_MAX
)) {
174 status
= VIRTIO_BLK_S_IOERR
;
178 vdpasim_blk_buffer_lock(blk
);
179 bytes
= vringh_iov_push_iotlb(&vq
->vring
, &vq
->in_iov
,
180 blk
->buffer
+ offset
, to_push
);
181 vdpasim_blk_buffer_unlock(blk
);
183 dev_dbg(&vdpasim
->vdpa
.dev
,
184 "vringh_iov_push_iotlb() error: %zd offset: 0x%llx len: 0x%zx\n",
185 bytes
, offset
, to_push
);
186 status
= VIRTIO_BLK_S_IOERR
;
193 case VIRTIO_BLK_T_OUT
:
194 if (!vdpasim_blk_check_range(vdpasim
, sector
,
195 to_pull
>> SECTOR_SHIFT
,
196 VDPASIM_BLK_SIZE_MAX
* VDPASIM_BLK_SEG_MAX
)) {
197 status
= VIRTIO_BLK_S_IOERR
;
201 vdpasim_blk_buffer_lock(blk
);
202 bytes
= vringh_iov_pull_iotlb(&vq
->vring
, &vq
->out_iov
,
203 blk
->buffer
+ offset
, to_pull
);
204 vdpasim_blk_buffer_unlock(blk
);
206 dev_dbg(&vdpasim
->vdpa
.dev
,
207 "vringh_iov_pull_iotlb() error: %zd offset: 0x%llx len: 0x%zx\n",
208 bytes
, offset
, to_pull
);
209 status
= VIRTIO_BLK_S_IOERR
;
214 case VIRTIO_BLK_T_GET_ID
:
215 bytes
= vringh_iov_push_iotlb(&vq
->vring
, &vq
->in_iov
,
217 VIRTIO_BLK_ID_BYTES
);
219 dev_dbg(&vdpasim
->vdpa
.dev
,
220 "vringh_iov_push_iotlb() error: %zd\n", bytes
);
221 status
= VIRTIO_BLK_S_IOERR
;
228 case VIRTIO_BLK_T_FLUSH
:
232 case VIRTIO_BLK_T_DISCARD
:
233 case VIRTIO_BLK_T_WRITE_ZEROES
: {
234 struct virtio_blk_discard_write_zeroes range
;
235 u32 num_sectors
, flags
;
237 if (to_pull
!= sizeof(range
)) {
238 dev_dbg(&vdpasim
->vdpa
.dev
,
239 "discard/write_zeroes header len: 0x%zx [expected: 0x%zx]\n",
240 to_pull
, sizeof(range
));
241 status
= VIRTIO_BLK_S_IOERR
;
245 bytes
= vringh_iov_pull_iotlb(&vq
->vring
, &vq
->out_iov
, &range
,
248 dev_dbg(&vdpasim
->vdpa
.dev
,
249 "vringh_iov_pull_iotlb() error: %zd offset: 0x%llx len: 0x%zx\n",
250 bytes
, offset
, to_pull
);
251 status
= VIRTIO_BLK_S_IOERR
;
255 sector
= le64_to_cpu(range
.sector
);
256 offset
= sector
<< SECTOR_SHIFT
;
257 num_sectors
= le32_to_cpu(range
.num_sectors
);
258 flags
= le32_to_cpu(range
.flags
);
260 if (type
== VIRTIO_BLK_T_DISCARD
&& flags
!= 0) {
261 dev_dbg(&vdpasim
->vdpa
.dev
,
262 "discard unexpected flags set - flags: 0x%x\n",
264 status
= VIRTIO_BLK_S_UNSUPP
;
268 if (type
== VIRTIO_BLK_T_WRITE_ZEROES
&&
269 flags
& ~VIRTIO_BLK_WRITE_ZEROES_FLAG_UNMAP
) {
270 dev_dbg(&vdpasim
->vdpa
.dev
,
271 "write_zeroes unexpected flags set - flags: 0x%x\n",
273 status
= VIRTIO_BLK_S_UNSUPP
;
277 if (!vdpasim_blk_check_range(vdpasim
, sector
, num_sectors
,
278 VDPASIM_BLK_DWZ_MAX_SECTORS
)) {
279 status
= VIRTIO_BLK_S_IOERR
;
283 if (type
== VIRTIO_BLK_T_WRITE_ZEROES
) {
284 vdpasim_blk_buffer_lock(blk
);
285 memset(blk
->buffer
+ offset
, 0,
286 num_sectors
<< SECTOR_SHIFT
);
287 vdpasim_blk_buffer_unlock(blk
);
293 dev_dbg(&vdpasim
->vdpa
.dev
,
294 "Unsupported request type %d\n", type
);
295 status
= VIRTIO_BLK_S_IOERR
;
300 /* If some operations fail, we need to skip the remaining bytes
301 * to put the status in the last byte
303 if (to_push
- pushed
> 0)
304 vringh_kiov_advance(&vq
->in_iov
, to_push
- pushed
);
306 /* Last byte is the status */
307 bytes
= vringh_iov_push_iotlb(&vq
->vring
, &vq
->in_iov
, &status
, 1);
313 /* Make sure data is wrote before advancing index */
319 vringh_complete_iotlb(&vq
->vring
, vq
->head
, pushed
);
324 static void vdpasim_blk_work(struct vdpasim
*vdpasim
)
326 bool reschedule
= false;
329 mutex_lock(&vdpasim
->mutex
);
331 if (!(vdpasim
->status
& VIRTIO_CONFIG_S_DRIVER_OK
))
334 if (!vdpasim
->running
)
337 for (i
= 0; i
< VDPASIM_BLK_VQ_NUM
; i
++) {
338 struct vdpasim_virtqueue
*vq
= &vdpasim
->vqs
[i
];
344 while (vdpasim_blk_handle_req(vdpasim
, vq
)) {
345 /* Make sure used is visible before rasing the interrupt. */
349 if (vringh_need_notify_iotlb(&vq
->vring
) > 0)
350 vringh_notify(&vq
->vring
);
360 mutex_unlock(&vdpasim
->mutex
);
363 vdpasim_schedule_work(vdpasim
);
366 static void vdpasim_blk_get_config(struct vdpasim
*vdpasim
, void *config
)
368 struct virtio_blk_config
*blk_config
= config
;
370 memset(config
, 0, sizeof(struct virtio_blk_config
));
372 blk_config
->capacity
= cpu_to_vdpasim64(vdpasim
, VDPASIM_BLK_CAPACITY
);
373 blk_config
->size_max
= cpu_to_vdpasim32(vdpasim
, VDPASIM_BLK_SIZE_MAX
);
374 blk_config
->seg_max
= cpu_to_vdpasim32(vdpasim
, VDPASIM_BLK_SEG_MAX
);
375 blk_config
->num_queues
= cpu_to_vdpasim16(vdpasim
, VDPASIM_BLK_VQ_NUM
);
376 blk_config
->min_io_size
= cpu_to_vdpasim16(vdpasim
, 1);
377 blk_config
->opt_io_size
= cpu_to_vdpasim32(vdpasim
, 1);
378 blk_config
->blk_size
= cpu_to_vdpasim32(vdpasim
, SECTOR_SIZE
);
379 /* VIRTIO_BLK_F_DISCARD */
380 blk_config
->discard_sector_alignment
=
381 cpu_to_vdpasim32(vdpasim
, SECTOR_SIZE
);
382 blk_config
->max_discard_sectors
=
383 cpu_to_vdpasim32(vdpasim
, VDPASIM_BLK_DWZ_MAX_SECTORS
);
384 blk_config
->max_discard_seg
= cpu_to_vdpasim32(vdpasim
, 1);
385 /* VIRTIO_BLK_F_WRITE_ZEROES */
386 blk_config
->max_write_zeroes_sectors
=
387 cpu_to_vdpasim32(vdpasim
, VDPASIM_BLK_DWZ_MAX_SECTORS
);
388 blk_config
->max_write_zeroes_seg
= cpu_to_vdpasim32(vdpasim
, 1);
392 static void vdpasim_blk_free(struct vdpasim
*vdpasim
)
394 struct vdpasim_blk
*blk
= sim_to_blk(vdpasim
);
396 if (!blk
->shared_backend
)
400 static void vdpasim_blk_mgmtdev_release(struct device
*dev
)
404 static struct device vdpasim_blk_mgmtdev
= {
405 .init_name
= "vdpasim_blk",
406 .release
= vdpasim_blk_mgmtdev_release
,
409 static int vdpasim_blk_dev_add(struct vdpa_mgmt_dev
*mdev
, const char *name
,
410 const struct vdpa_dev_set_config
*config
)
412 struct vdpasim_dev_attr dev_attr
= {};
413 struct vdpasim_blk
*blk
;
414 struct vdpasim
*simdev
;
417 dev_attr
.mgmt_dev
= mdev
;
418 dev_attr
.name
= name
;
419 dev_attr
.id
= VIRTIO_ID_BLOCK
;
420 dev_attr
.supported_features
= VDPASIM_BLK_FEATURES
;
421 dev_attr
.nvqs
= VDPASIM_BLK_VQ_NUM
;
422 dev_attr
.ngroups
= VDPASIM_BLK_GROUP_NUM
;
423 dev_attr
.nas
= VDPASIM_BLK_AS_NUM
;
424 dev_attr
.alloc_size
= sizeof(struct vdpasim_blk
);
425 dev_attr
.config_size
= sizeof(struct virtio_blk_config
);
426 dev_attr
.get_config
= vdpasim_blk_get_config
;
427 dev_attr
.work_fn
= vdpasim_blk_work
;
428 dev_attr
.free
= vdpasim_blk_free
;
430 simdev
= vdpasim_create(&dev_attr
, config
);
432 return PTR_ERR(simdev
);
434 blk
= sim_to_blk(simdev
);
435 blk
->shared_backend
= shared_backend
;
437 if (blk
->shared_backend
) {
438 blk
->buffer
= shared_buffer
;
440 blk
->buffer
= kvzalloc(VDPASIM_BLK_CAPACITY
<< SECTOR_SHIFT
,
448 ret
= _vdpa_register_device(&simdev
->vdpa
, VDPASIM_BLK_VQ_NUM
);
455 put_device(&simdev
->vdpa
.dev
);
459 static void vdpasim_blk_dev_del(struct vdpa_mgmt_dev
*mdev
,
460 struct vdpa_device
*dev
)
462 struct vdpasim
*simdev
= container_of(dev
, struct vdpasim
, vdpa
);
464 _vdpa_unregister_device(&simdev
->vdpa
);
467 static const struct vdpa_mgmtdev_ops vdpasim_blk_mgmtdev_ops
= {
468 .dev_add
= vdpasim_blk_dev_add
,
469 .dev_del
= vdpasim_blk_dev_del
472 static struct virtio_device_id id_table
[] = {
473 { VIRTIO_ID_BLOCK
, VIRTIO_DEV_ANY_ID
},
477 static struct vdpa_mgmt_dev mgmt_dev
= {
478 .device
= &vdpasim_blk_mgmtdev
,
479 .id_table
= id_table
,
480 .ops
= &vdpasim_blk_mgmtdev_ops
,
483 static int __init
vdpasim_blk_init(void)
487 ret
= device_register(&vdpasim_blk_mgmtdev
);
489 put_device(&vdpasim_blk_mgmtdev
);
493 ret
= vdpa_mgmtdev_register(&mgmt_dev
);
497 if (shared_backend
) {
498 shared_buffer
= kvzalloc(VDPASIM_BLK_CAPACITY
<< SECTOR_SHIFT
,
500 if (!shared_buffer
) {
508 vdpa_mgmtdev_unregister(&mgmt_dev
);
510 device_unregister(&vdpasim_blk_mgmtdev
);
514 static void __exit
vdpasim_blk_exit(void)
516 kvfree(shared_buffer
);
517 vdpa_mgmtdev_unregister(&mgmt_dev
);
518 device_unregister(&vdpasim_blk_mgmtdev
);
521 module_init(vdpasim_blk_init
)
522 module_exit(vdpasim_blk_exit
)
524 MODULE_VERSION(DRV_VERSION
);
525 MODULE_LICENSE(DRV_LICENSE
);
526 MODULE_AUTHOR(DRV_AUTHOR
);
527 MODULE_DESCRIPTION(DRV_DESC
);