4 * Copyright IBM, Corp. 2007
7 * Anthony Liguori <aliguori@us.ibm.com>
9 * This work is licensed under the terms of the GNU GPL, version 2. See
10 * the COPYING file in the top-level directory.
14 #include <qemu-common.h>
16 #include "virtio-blk.h"
17 #include "block_int.h"
22 typedef struct VirtIOBlock
28 char serial_str
[BLOCK_SERIAL_STRLEN
+ 1];
32 static VirtIOBlock
*to_virtio_blk(VirtIODevice
*vdev
)
34 return (VirtIOBlock
*)vdev
;
37 /* store identify data in little endian format
39 static inline void put_le16(uint16_t *p
, unsigned int v
)
44 /* copy to *dst from *src, nul pad dst tail as needed to len bytes
46 static inline void padstr(char *dst
, const char *src
, int len
)
49 *dst
++ = *src
? *src
++ : '\0';
52 /* setup simulated identify data as appropriate for virtio block device
54 * ref: AT Attachment 8 - ATA/ATAPI Command Set (ATA8-ACS)
56 static inline void virtio_identify_template(struct virtio_blk_config
*bc
)
58 uint16_t *p
= &bc
->identify
[0];
59 uint64_t lba_sectors
= bc
->capacity
;
61 memset(p
, 0, sizeof(bc
->identify
));
62 put_le16(p
+ 0, 0x0); /* ATA device */
63 padstr((char *)(p
+ 23), QEMU_VERSION
, 8); /* firmware revision */
64 padstr((char *)(p
+ 27), "QEMU VIRT_BLK", 40); /* model# */
65 put_le16(p
+ 47, 0x80ff); /* max xfer 255 sectors */
66 put_le16(p
+ 49, 0x0b00); /* support IORDY/LBA/DMA */
67 put_le16(p
+ 59, 0x1ff); /* cur xfer 255 sectors */
68 put_le16(p
+ 80, 0x1f0); /* support ATA8/7/6/5/4 */
69 put_le16(p
+ 81, 0x16);
70 put_le16(p
+ 82, 0x400);
71 put_le16(p
+ 83, 0x400);
72 put_le16(p
+ 100, lba_sectors
);
73 put_le16(p
+ 101, lba_sectors
>> 16);
74 put_le16(p
+ 102, lba_sectors
>> 32);
75 put_le16(p
+ 103, lba_sectors
>> 48);
78 typedef struct VirtIOBlockReq
81 VirtQueueElement elem
;
82 struct virtio_blk_inhdr
*in
;
83 struct virtio_blk_outhdr
*out
;
84 struct virtio_scsi_inhdr
*scsi
;
86 struct VirtIOBlockReq
*next
;
89 static void virtio_blk_req_complete(VirtIOBlockReq
*req
, int status
)
91 VirtIOBlock
*s
= req
->dev
;
93 req
->in
->status
= status
;
94 virtqueue_push(s
->vq
, &req
->elem
, req
->qiov
.size
+ sizeof(*req
->in
));
95 virtio_notify(&s
->vdev
, s
->vq
);
100 static int virtio_blk_handle_write_error(VirtIOBlockReq
*req
, int error
)
102 BlockInterfaceErrorAction action
= drive_get_onerror(req
->dev
->bs
);
103 VirtIOBlock
*s
= req
->dev
;
105 if (action
== BLOCK_ERR_IGNORE
)
108 if ((error
== ENOSPC
&& action
== BLOCK_ERR_STOP_ENOSPC
)
109 || action
== BLOCK_ERR_STOP_ANY
) {
114 virtio_blk_req_complete(req
, VIRTIO_BLK_S_IOERR
);
120 static void virtio_blk_rw_complete(void *opaque
, int ret
)
122 VirtIOBlockReq
*req
= opaque
;
124 if (ret
&& (req
->out
->type
& VIRTIO_BLK_T_OUT
)) {
125 if (virtio_blk_handle_write_error(req
, -ret
))
129 virtio_blk_req_complete(req
, VIRTIO_BLK_S_OK
);
132 static VirtIOBlockReq
*virtio_blk_alloc_request(VirtIOBlock
*s
)
134 VirtIOBlockReq
*req
= qemu_mallocz(sizeof(*req
));
139 static VirtIOBlockReq
*virtio_blk_get_request(VirtIOBlock
*s
)
141 VirtIOBlockReq
*req
= virtio_blk_alloc_request(s
);
144 if (!virtqueue_pop(s
->vq
, &req
->elem
)) {
154 static void virtio_blk_handle_scsi(VirtIOBlockReq
*req
)
156 struct sg_io_hdr hdr
;
162 * We require at least one output segment each for the virtio_blk_outhdr
163 * and the SCSI command block.
165 * We also at least require the virtio_blk_inhdr, the virtio_scsi_inhdr
166 * and the sense buffer pointer in the input segments.
168 if (req
->elem
.out_num
< 2 || req
->elem
.in_num
< 3) {
169 virtio_blk_req_complete(req
, VIRTIO_BLK_S_IOERR
);
174 * No support for bidirection commands yet.
176 if (req
->elem
.out_num
> 2 && req
->elem
.in_num
> 3) {
177 virtio_blk_req_complete(req
, VIRTIO_BLK_S_UNSUPP
);
182 * The scsi inhdr is placed in the second-to-last input segment, just
183 * before the regular inhdr.
185 req
->scsi
= (void *)req
->elem
.in_sg
[req
->elem
.in_num
- 2].iov_base
;
186 size
= sizeof(*req
->in
) + sizeof(*req
->scsi
);
188 memset(&hdr
, 0, sizeof(struct sg_io_hdr
));
189 hdr
.interface_id
= 'S';
190 hdr
.cmd_len
= req
->elem
.out_sg
[1].iov_len
;
191 hdr
.cmdp
= req
->elem
.out_sg
[1].iov_base
;
194 if (req
->elem
.out_num
> 2) {
196 * If there are more than the minimally required 2 output segments
197 * there is write payload starting from the third iovec.
199 hdr
.dxfer_direction
= SG_DXFER_TO_DEV
;
200 hdr
.iovec_count
= req
->elem
.out_num
- 2;
202 for (i
= 0; i
< hdr
.iovec_count
; i
++)
203 hdr
.dxfer_len
+= req
->elem
.out_sg
[i
+ 2].iov_len
;
205 hdr
.dxferp
= req
->elem
.out_sg
+ 2;
207 } else if (req
->elem
.in_num
> 3) {
209 * If we have more than 3 input segments the guest wants to actually
212 hdr
.dxfer_direction
= SG_DXFER_FROM_DEV
;
213 hdr
.iovec_count
= req
->elem
.in_num
- 3;
214 for (i
= 0; i
< hdr
.iovec_count
; i
++)
215 hdr
.dxfer_len
+= req
->elem
.in_sg
[i
].iov_len
;
217 hdr
.dxferp
= req
->elem
.in_sg
;
218 size
+= hdr
.dxfer_len
;
221 * Some SCSI commands don't actually transfer any data.
223 hdr
.dxfer_direction
= SG_DXFER_NONE
;
226 hdr
.sbp
= req
->elem
.in_sg
[req
->elem
.in_num
- 3].iov_base
;
227 hdr
.mx_sb_len
= req
->elem
.in_sg
[req
->elem
.in_num
- 3].iov_len
;
228 size
+= hdr
.mx_sb_len
;
230 ret
= bdrv_ioctl(req
->dev
->bs
, SG_IO
, &hdr
);
232 status
= VIRTIO_BLK_S_UNSUPP
;
234 hdr
.resid
= hdr
.dxfer_len
;
235 } else if (hdr
.status
) {
236 status
= VIRTIO_BLK_S_IOERR
;
238 status
= VIRTIO_BLK_S_OK
;
241 req
->scsi
->errors
= hdr
.status
;
242 req
->scsi
->residual
= hdr
.resid
;
243 req
->scsi
->sense_len
= hdr
.sb_len_wr
;
244 req
->scsi
->data_len
= hdr
.dxfer_len
;
246 virtio_blk_req_complete(req
, status
);
249 static void virtio_blk_handle_scsi(VirtIOBlockReq
*req
)
251 virtio_blk_req_complete(req
, VIRTIO_BLK_S_UNSUPP
);
253 #endif /* __linux__ */
255 static void virtio_blk_handle_write(VirtIOBlockReq
*req
)
257 bdrv_aio_writev(req
->dev
->bs
, req
->out
->sector
, &req
->qiov
,
258 req
->qiov
.size
/ 512, virtio_blk_rw_complete
, req
);
261 static void virtio_blk_handle_read(VirtIOBlockReq
*req
)
263 bdrv_aio_readv(req
->dev
->bs
, req
->out
->sector
, &req
->qiov
,
264 req
->qiov
.size
/ 512, virtio_blk_rw_complete
, req
);
267 static void virtio_blk_handle_output(VirtIODevice
*vdev
, VirtQueue
*vq
)
269 VirtIOBlock
*s
= to_virtio_blk(vdev
);
272 while ((req
= virtio_blk_get_request(s
))) {
273 if (req
->elem
.out_num
< 1 || req
->elem
.in_num
< 1) {
274 fprintf(stderr
, "virtio-blk missing headers\n");
278 if (req
->elem
.out_sg
[0].iov_len
< sizeof(*req
->out
) ||
279 req
->elem
.in_sg
[req
->elem
.in_num
- 1].iov_len
< sizeof(*req
->in
)) {
280 fprintf(stderr
, "virtio-blk header not in correct element\n");
284 req
->out
= (void *)req
->elem
.out_sg
[0].iov_base
;
285 req
->in
= (void *)req
->elem
.in_sg
[req
->elem
.in_num
- 1].iov_base
;
287 if (req
->out
->type
& VIRTIO_BLK_T_SCSI_CMD
) {
288 virtio_blk_handle_scsi(req
);
289 } else if (req
->out
->type
& VIRTIO_BLK_T_OUT
) {
290 qemu_iovec_init_external(&req
->qiov
, &req
->elem
.out_sg
[1],
291 req
->elem
.out_num
- 1);
292 virtio_blk_handle_write(req
);
294 qemu_iovec_init_external(&req
->qiov
, &req
->elem
.in_sg
[0],
295 req
->elem
.in_num
- 1);
296 virtio_blk_handle_read(req
);
300 * FIXME: Want to check for completions before returning to guest mode,
301 * so cached reads and writes are reported as quickly as possible. But
302 * that should be done in the generic block layer.
306 static void virtio_blk_dma_restart_bh(void *opaque
)
308 VirtIOBlock
*s
= opaque
;
309 VirtIOBlockReq
*req
= s
->rq
;
311 qemu_bh_delete(s
->bh
);
317 virtio_blk_handle_write(req
);
322 static void virtio_blk_dma_restart_cb(void *opaque
, int running
, int reason
)
324 VirtIOBlock
*s
= opaque
;
330 s
->bh
= qemu_bh_new(virtio_blk_dma_restart_bh
, s
);
331 qemu_bh_schedule(s
->bh
);
335 static void virtio_blk_reset(VirtIODevice
*vdev
)
338 * This should cancel pending requests, but can't do nicely until there
339 * are per-device request lists.
344 /* coalesce internal state, copy to pci i/o region 0
346 static void virtio_blk_update_config(VirtIODevice
*vdev
, uint8_t *config
)
348 VirtIOBlock
*s
= to_virtio_blk(vdev
);
349 struct virtio_blk_config blkcfg
;
351 int cylinders
, heads
, secs
;
353 bdrv_get_geometry(s
->bs
, &capacity
);
354 bdrv_get_geometry_hint(s
->bs
, &cylinders
, &heads
, &secs
);
355 memset(&blkcfg
, 0, sizeof(blkcfg
));
356 stq_raw(&blkcfg
.capacity
, capacity
);
357 stl_raw(&blkcfg
.seg_max
, 128 - 2);
358 stw_raw(&blkcfg
.cylinders
, cylinders
);
359 blkcfg
.heads
= heads
;
360 blkcfg
.sectors
= secs
;
362 virtio_identify_template(&blkcfg
);
363 memcpy(&blkcfg
.identify
[VIRTIO_BLK_ID_SN
], s
->serial_str
,
364 VIRTIO_BLK_ID_SN_BYTES
);
365 memcpy(config
, &blkcfg
, sizeof(blkcfg
));
368 static uint32_t virtio_blk_get_features(VirtIODevice
*vdev
)
370 VirtIOBlock
*s
= to_virtio_blk(vdev
);
371 uint32_t features
= 0;
373 features
|= (1 << VIRTIO_BLK_F_SEG_MAX
);
374 features
|= (1 << VIRTIO_BLK_F_GEOMETRY
);
376 features
|= (1 << VIRTIO_BLK_F_SCSI
);
378 if (strcmp(s
->serial_str
, "0"))
379 features
|= 1 << VIRTIO_BLK_F_IDENTIFY
;
384 static void virtio_blk_save(QEMUFile
*f
, void *opaque
)
386 VirtIOBlock
*s
= opaque
;
387 VirtIOBlockReq
*req
= s
->rq
;
389 virtio_save(&s
->vdev
, f
);
392 qemu_put_sbyte(f
, 1);
393 qemu_put_buffer(f
, (unsigned char*)&req
->elem
, sizeof(req
->elem
));
396 qemu_put_sbyte(f
, 0);
399 static int virtio_blk_load(QEMUFile
*f
, void *opaque
, int version_id
)
401 VirtIOBlock
*s
= opaque
;
406 virtio_load(&s
->vdev
, f
);
407 while (qemu_get_sbyte(f
)) {
408 VirtIOBlockReq
*req
= virtio_blk_alloc_request(s
);
409 qemu_get_buffer(f
, (unsigned char*)&req
->elem
, sizeof(req
->elem
));
417 VirtIODevice
*virtio_blk_init(DeviceState
*dev
)
420 int cylinders
, heads
, secs
;
421 static int virtio_blk_id
;
422 BlockDriverState
*bs
;
425 s
= (VirtIOBlock
*)virtio_common_init("virtio-blk", VIRTIO_ID_BLOCK
,
426 sizeof(struct virtio_blk_config
),
427 sizeof(VirtIOBlock
));
429 bs
= qdev_init_bdrv(dev
, IF_VIRTIO
);
430 s
->vdev
.get_config
= virtio_blk_update_config
;
431 s
->vdev
.get_features
= virtio_blk_get_features
;
432 s
->vdev
.reset
= virtio_blk_reset
;
435 if (strlen(ps
= (char *)drive_get_serial(bs
)))
436 strncpy(s
->serial_str
, ps
, sizeof(s
->serial_str
));
438 snprintf(s
->serial_str
, sizeof(s
->serial_str
), "0");
440 bdrv_guess_geometry(s
->bs
, &cylinders
, &heads
, &secs
);
441 bdrv_set_geometry_hint(s
->bs
, cylinders
, heads
, secs
);
443 s
->vq
= virtio_add_queue(&s
->vdev
, 128, virtio_blk_handle_output
);
445 qemu_add_vm_change_state_handler(virtio_blk_dma_restart_cb
, s
);
446 register_savevm("virtio-blk", virtio_blk_id
++, 2,
447 virtio_blk_save
, virtio_blk_load
, s
);