2 * Block driver for the Virtual Disk Image (VDI) format
4 * Copyright (c) 2009, 2012 Stefan Weil
6 * This program is free software: you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation, either version 2 of the License, or
9 * (at your option) version 3 or any later version.
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
16 * You should have received a copy of the GNU General Public License
17 * along with this program. If not, see <http://www.gnu.org/licenses/>.
20 * http://forums.virtualbox.org/viewtopic.php?t=8046
22 * This driver supports create / read / write operations on VDI images.
24 * Todo (see also TODO in code):
26 * Some features like snapshots are still missing.
28 * Deallocation of zero-filled blocks and shrinking images are missing, too
29 * (might be added to common block layer).
31 * Allocation of blocks could be optimized (less writes to block map and
34 * Read and write of adjacents blocks could be done in one operation
35 * (current code uses one operation per block (1 MiB).
37 * The code is not thread safe (missing locks for changes in header and
38 * block table, no problem with current QEMU).
42 * Blocks (VDI documentation) correspond to clusters (QEMU).
43 * QEMU's backing files could be implemented using VDI snapshot files (TODO).
44 * VDI snapshot files may also contain the complete machine state.
45 * Maybe this machine state can be converted to QEMU PC machine snapshot data.
47 * The driver keeps a block cache (little endian entries) in memory.
48 * For the standard block size (1 MiB), a 1 TiB disk will use 4 MiB RAM,
49 * so this seems to be reasonable.
52 #include "qemu-common.h"
53 #include "block_int.h"
55 #include "migration.h"
57 #if defined(CONFIG_UUID)
58 #include <uuid/uuid.h>
60 /* TODO: move uuid emulation to some central place in QEMU. */
61 #include "sysemu.h" /* UUID_FMT */
62 typedef unsigned char uuid_t
[16];
63 void uuid_generate(uuid_t out
);
64 int uuid_is_null(const uuid_t uu
);
65 void uuid_unparse(const uuid_t uu
, char *out
);
68 /* Code configuration options. */
70 /* Enable debug messages. */
71 //~ #define CONFIG_VDI_DEBUG
73 /* Support write operations on VDI images. */
74 #define CONFIG_VDI_WRITE
76 /* Support non-standard block (cluster) size. This is untested.
77 * Maybe it will be needed for very large images.
79 //~ #define CONFIG_VDI_BLOCK_SIZE
81 /* Support static (fixed, pre-allocated) images. */
82 #define CONFIG_VDI_STATIC_IMAGE
84 /* Command line option for static images. */
85 #define BLOCK_OPT_STATIC "static"
88 #define MiB (KiB * KiB)
90 #define SECTOR_SIZE 512
91 #define DEFAULT_CLUSTER_SIZE (1 * MiB)
93 #if defined(CONFIG_VDI_DEBUG)
94 #define logout(fmt, ...) \
95 fprintf(stderr, "vdi\t%-24s" fmt, __func__, ##__VA_ARGS__)
97 #define logout(fmt, ...) ((void)0)
100 /* Image signature. */
101 #define VDI_SIGNATURE 0xbeda107f
104 #define VDI_VERSION_1_1 0x00010001
107 #define VDI_TYPE_DYNAMIC 1
108 #define VDI_TYPE_STATIC 2
110 /* Innotek / SUN images use these strings in header.text:
111 * "<<< innotek VirtualBox Disk Image >>>\n"
112 * "<<< Sun xVM VirtualBox Disk Image >>>\n"
113 * "<<< Sun VirtualBox Disk Image >>>\n"
114 * The value does not matter, so QEMU created images use a different text.
116 #define VDI_TEXT "<<< QEMU VM Virtual Disk Image >>>\n"
118 /* A never-allocated block; semantically arbitrary content. */
119 #define VDI_UNALLOCATED 0xffffffffU
121 /* A discarded (no longer allocated) block; semantically zero-filled. */
122 #define VDI_DISCARDED 0xfffffffeU
124 #define VDI_IS_ALLOCATED(X) ((X) < VDI_DISCARDED)
126 #if !defined(CONFIG_UUID)
127 void uuid_generate(uuid_t out
)
129 memset(out
, 0, sizeof(uuid_t
));
132 int uuid_is_null(const uuid_t uu
)
134 uuid_t null_uuid
= { 0 };
135 return memcmp(uu
, null_uuid
, sizeof(uuid_t
)) == 0;
138 void uuid_unparse(const uuid_t uu
, char *out
)
140 snprintf(out
, 37, UUID_FMT
,
141 uu
[0], uu
[1], uu
[2], uu
[3], uu
[4], uu
[5], uu
[6], uu
[7],
142 uu
[8], uu
[9], uu
[10], uu
[11], uu
[12], uu
[13], uu
[14], uu
[15]);
147 BlockDriverAIOCB common
;
151 /* Total number of sectors. */
153 /* Number of sectors for current AIO. */
155 /* New allocated block map entry. */
158 /* Buffer for new allocated block. */
163 BlockDriverAIOCB
*hd_aiocb
;
165 QEMUIOVector hd_qiov
;
173 uint32_t header_size
;
175 uint32_t image_flags
;
176 char description
[256];
177 uint32_t offset_bmap
;
178 uint32_t offset_data
;
179 uint32_t cylinders
; /* disk geometry, unused here */
180 uint32_t heads
; /* disk geometry, unused here */
181 uint32_t sectors
; /* disk geometry, unused here */
182 uint32_t sector_size
;
186 uint32_t block_extra
; /* unused here */
187 uint32_t blocks_in_image
;
188 uint32_t blocks_allocated
;
190 uuid_t uuid_last_snap
;
197 /* The block map entries are little endian (even in memory). */
199 /* Size of block (bytes). */
201 /* Size of block (sectors). */
202 uint32_t block_sectors
;
203 /* First sector of block map. */
204 uint32_t bmap_sector
;
205 /* VDI header (converted to host endianness). */
208 Error
*migration_blocker
;
211 /* Change UUID from little endian (IPRT = VirtualBox format) to big endian
212 * format (network byte order, standard, see RFC 4122) and vice versa.
214 static void uuid_convert(uuid_t uuid
)
216 bswap32s((uint32_t *)&uuid
[0]);
217 bswap16s((uint16_t *)&uuid
[4]);
218 bswap16s((uint16_t *)&uuid
[6]);
221 static void vdi_header_to_cpu(VdiHeader
*header
)
223 le32_to_cpus(&header
->signature
);
224 le32_to_cpus(&header
->version
);
225 le32_to_cpus(&header
->header_size
);
226 le32_to_cpus(&header
->image_type
);
227 le32_to_cpus(&header
->image_flags
);
228 le32_to_cpus(&header
->offset_bmap
);
229 le32_to_cpus(&header
->offset_data
);
230 le32_to_cpus(&header
->cylinders
);
231 le32_to_cpus(&header
->heads
);
232 le32_to_cpus(&header
->sectors
);
233 le32_to_cpus(&header
->sector_size
);
234 le64_to_cpus(&header
->disk_size
);
235 le32_to_cpus(&header
->block_size
);
236 le32_to_cpus(&header
->block_extra
);
237 le32_to_cpus(&header
->blocks_in_image
);
238 le32_to_cpus(&header
->blocks_allocated
);
239 uuid_convert(header
->uuid_image
);
240 uuid_convert(header
->uuid_last_snap
);
241 uuid_convert(header
->uuid_link
);
242 uuid_convert(header
->uuid_parent
);
245 static void vdi_header_to_le(VdiHeader
*header
)
247 cpu_to_le32s(&header
->signature
);
248 cpu_to_le32s(&header
->version
);
249 cpu_to_le32s(&header
->header_size
);
250 cpu_to_le32s(&header
->image_type
);
251 cpu_to_le32s(&header
->image_flags
);
252 cpu_to_le32s(&header
->offset_bmap
);
253 cpu_to_le32s(&header
->offset_data
);
254 cpu_to_le32s(&header
->cylinders
);
255 cpu_to_le32s(&header
->heads
);
256 cpu_to_le32s(&header
->sectors
);
257 cpu_to_le32s(&header
->sector_size
);
258 cpu_to_le64s(&header
->disk_size
);
259 cpu_to_le32s(&header
->block_size
);
260 cpu_to_le32s(&header
->block_extra
);
261 cpu_to_le32s(&header
->blocks_in_image
);
262 cpu_to_le32s(&header
->blocks_allocated
);
263 cpu_to_le32s(&header
->blocks_allocated
);
264 uuid_convert(header
->uuid_image
);
265 uuid_convert(header
->uuid_last_snap
);
266 uuid_convert(header
->uuid_link
);
267 uuid_convert(header
->uuid_parent
);
270 #if defined(CONFIG_VDI_DEBUG)
271 static void vdi_header_print(VdiHeader
*header
)
274 logout("text %s", header
->text
);
275 logout("signature 0x%04x\n", header
->signature
);
276 logout("header size 0x%04x\n", header
->header_size
);
277 logout("image type 0x%04x\n", header
->image_type
);
278 logout("image flags 0x%04x\n", header
->image_flags
);
279 logout("description %s\n", header
->description
);
280 logout("offset bmap 0x%04x\n", header
->offset_bmap
);
281 logout("offset data 0x%04x\n", header
->offset_data
);
282 logout("cylinders 0x%04x\n", header
->cylinders
);
283 logout("heads 0x%04x\n", header
->heads
);
284 logout("sectors 0x%04x\n", header
->sectors
);
285 logout("sector size 0x%04x\n", header
->sector_size
);
286 logout("image size 0x%" PRIx64
" B (%" PRIu64
" MiB)\n",
287 header
->disk_size
, header
->disk_size
/ MiB
);
288 logout("block size 0x%04x\n", header
->block_size
);
289 logout("block extra 0x%04x\n", header
->block_extra
);
290 logout("blocks tot. 0x%04x\n", header
->blocks_in_image
);
291 logout("blocks all. 0x%04x\n", header
->blocks_allocated
);
292 uuid_unparse(header
->uuid_image
, uuid
);
293 logout("uuid image %s\n", uuid
);
294 uuid_unparse(header
->uuid_last_snap
, uuid
);
295 logout("uuid snap %s\n", uuid
);
296 uuid_unparse(header
->uuid_link
, uuid
);
297 logout("uuid link %s\n", uuid
);
298 uuid_unparse(header
->uuid_parent
, uuid
);
299 logout("uuid parent %s\n", uuid
);
303 static int vdi_check(BlockDriverState
*bs
, BdrvCheckResult
*res
)
305 /* TODO: additional checks possible. */
306 BDRVVdiState
*s
= (BDRVVdiState
*)bs
->opaque
;
307 uint32_t blocks_allocated
= 0;
312 bmap
= g_malloc(s
->header
.blocks_in_image
* sizeof(uint32_t));
313 memset(bmap
, 0xff, s
->header
.blocks_in_image
* sizeof(uint32_t));
315 /* Check block map and value of blocks_allocated. */
316 for (block
= 0; block
< s
->header
.blocks_in_image
; block
++) {
317 uint32_t bmap_entry
= le32_to_cpu(s
->bmap
[block
]);
318 if (VDI_IS_ALLOCATED(bmap_entry
)) {
319 if (bmap_entry
< s
->header
.blocks_in_image
) {
321 if (!VDI_IS_ALLOCATED(bmap
[bmap_entry
])) {
322 bmap
[bmap_entry
] = bmap_entry
;
324 fprintf(stderr
, "ERROR: block index %" PRIu32
325 " also used by %" PRIu32
"\n", bmap
[bmap_entry
], bmap_entry
);
329 fprintf(stderr
, "ERROR: block index %" PRIu32
330 " too large, is %" PRIu32
"\n", block
, bmap_entry
);
335 if (blocks_allocated
!= s
->header
.blocks_allocated
) {
336 fprintf(stderr
, "ERROR: allocated blocks mismatch, is %" PRIu32
337 ", should be %" PRIu32
"\n",
338 blocks_allocated
, s
->header
.blocks_allocated
);
347 static int vdi_get_info(BlockDriverState
*bs
, BlockDriverInfo
*bdi
)
349 /* TODO: vdi_get_info would be needed for machine snapshots.
350 vm_state_offset is still missing. */
351 BDRVVdiState
*s
= (BDRVVdiState
*)bs
->opaque
;
353 bdi
->cluster_size
= s
->block_size
;
354 bdi
->vm_state_offset
= 0;
358 static int vdi_make_empty(BlockDriverState
*bs
)
360 /* TODO: missing code. */
362 /* The return value for missing code must be 0, see block.c. */
366 static int vdi_probe(const uint8_t *buf
, int buf_size
, const char *filename
)
368 const VdiHeader
*header
= (const VdiHeader
*)buf
;
373 if (buf_size
< sizeof(*header
)) {
374 /* Header too small, no VDI. */
375 } else if (le32_to_cpu(header
->signature
) == VDI_SIGNATURE
) {
380 logout("no vdi image\n");
382 logout("%s", header
->text
);
388 static int vdi_open(BlockDriverState
*bs
, int flags
)
390 BDRVVdiState
*s
= bs
->opaque
;
396 if (bdrv_read(bs
->file
, 0, (uint8_t *)&header
, 1) < 0) {
400 vdi_header_to_cpu(&header
);
401 #if defined(CONFIG_VDI_DEBUG)
402 vdi_header_print(&header
);
405 if (header
.disk_size
% SECTOR_SIZE
!= 0) {
406 /* 'VBoxManage convertfromraw' can create images with odd disk sizes.
407 We accept them but round the disk size to the next multiple of
409 logout("odd disk size %" PRIu64
" B, round up\n", header
.disk_size
);
410 header
.disk_size
+= SECTOR_SIZE
- 1;
411 header
.disk_size
&= ~(SECTOR_SIZE
- 1);
414 if (header
.version
!= VDI_VERSION_1_1
) {
415 logout("unsupported version %u.%u\n",
416 header
.version
>> 16, header
.version
& 0xffff);
418 } else if (header
.offset_bmap
% SECTOR_SIZE
!= 0) {
419 /* We only support block maps which start on a sector boundary. */
420 logout("unsupported block map offset 0x%x B\n", header
.offset_bmap
);
422 } else if (header
.offset_data
% SECTOR_SIZE
!= 0) {
423 /* We only support data blocks which start on a sector boundary. */
424 logout("unsupported data offset 0x%x B\n", header
.offset_data
);
426 } else if (header
.sector_size
!= SECTOR_SIZE
) {
427 logout("unsupported sector size %u B\n", header
.sector_size
);
429 } else if (header
.block_size
!= 1 * MiB
) {
430 logout("unsupported block size %u B\n", header
.block_size
);
432 } else if (header
.disk_size
>
433 (uint64_t)header
.blocks_in_image
* header
.block_size
) {
434 logout("unsupported disk size %" PRIu64
" B\n", header
.disk_size
);
436 } else if (!uuid_is_null(header
.uuid_link
)) {
437 logout("link uuid != 0, unsupported\n");
439 } else if (!uuid_is_null(header
.uuid_parent
)) {
440 logout("parent uuid != 0, unsupported\n");
444 bs
->total_sectors
= header
.disk_size
/ SECTOR_SIZE
;
446 s
->block_size
= header
.block_size
;
447 s
->block_sectors
= header
.block_size
/ SECTOR_SIZE
;
448 s
->bmap_sector
= header
.offset_bmap
/ SECTOR_SIZE
;
451 bmap_size
= header
.blocks_in_image
* sizeof(uint32_t);
452 bmap_size
= (bmap_size
+ SECTOR_SIZE
- 1) / SECTOR_SIZE
;
454 s
->bmap
= g_malloc(bmap_size
* SECTOR_SIZE
);
456 if (bdrv_read(bs
->file
, s
->bmap_sector
, (uint8_t *)s
->bmap
, bmap_size
) < 0) {
460 /* Disable migration when vdi images are used */
461 error_set(&s
->migration_blocker
,
462 QERR_BLOCK_FORMAT_FEATURE_NOT_SUPPORTED
,
463 "vdi", bs
->device_name
, "live migration");
464 migrate_add_blocker(s
->migration_blocker
);
475 static int coroutine_fn
vdi_co_is_allocated(BlockDriverState
*bs
,
476 int64_t sector_num
, int nb_sectors
, int *pnum
)
478 /* TODO: Check for too large sector_num (in bdrv_is_allocated or here). */
479 BDRVVdiState
*s
= (BDRVVdiState
*)bs
->opaque
;
480 size_t bmap_index
= sector_num
/ s
->block_sectors
;
481 size_t sector_in_block
= sector_num
% s
->block_sectors
;
482 int n_sectors
= s
->block_sectors
- sector_in_block
;
483 uint32_t bmap_entry
= le32_to_cpu(s
->bmap
[bmap_index
]);
484 logout("%p, %" PRId64
", %d, %p\n", bs
, sector_num
, nb_sectors
, pnum
);
485 if (n_sectors
> nb_sectors
) {
486 n_sectors
= nb_sectors
;
489 return VDI_IS_ALLOCATED(bmap_entry
);
492 static void vdi_aio_cancel(BlockDriverAIOCB
*blockacb
)
494 /* TODO: This code is untested. How can I get it executed? */
495 VdiAIOCB
*acb
= container_of(blockacb
, VdiAIOCB
, common
);
498 bdrv_aio_cancel(acb
->hd_aiocb
);
500 qemu_aio_release(acb
);
503 static AIOPool vdi_aio_pool
= {
504 .aiocb_size
= sizeof(VdiAIOCB
),
505 .cancel
= vdi_aio_cancel
,
508 static VdiAIOCB
*vdi_aio_setup(BlockDriverState
*bs
, int64_t sector_num
,
509 QEMUIOVector
*qiov
, int nb_sectors
,
510 BlockDriverCompletionFunc
*cb
, void *opaque
, int is_write
)
514 logout("%p, %" PRId64
", %p, %d, %p, %p, %d\n",
515 bs
, sector_num
, qiov
, nb_sectors
, cb
, opaque
, is_write
);
517 acb
= qemu_aio_get(&vdi_aio_pool
, bs
, cb
, opaque
);
518 acb
->hd_aiocb
= NULL
;
519 acb
->sector_num
= sector_num
;
521 acb
->is_write
= is_write
;
523 if (qiov
->niov
> 1) {
524 acb
->buf
= qemu_blockalign(bs
, qiov
->size
);
525 acb
->orig_buf
= acb
->buf
;
527 qemu_iovec_to_buffer(qiov
, acb
->buf
);
530 acb
->buf
= (uint8_t *)qiov
->iov
->iov_base
;
532 acb
->nb_sectors
= nb_sectors
;
534 acb
->bmap_first
= VDI_UNALLOCATED
;
535 acb
->bmap_last
= VDI_UNALLOCATED
;
536 acb
->block_buffer
= NULL
;
537 acb
->header_modified
= 0;
541 static int vdi_schedule_bh(QEMUBHFunc
*cb
, VdiAIOCB
*acb
)
549 acb
->bh
= qemu_bh_new(cb
, acb
);
554 qemu_bh_schedule(acb
->bh
);
559 static void vdi_aio_read_cb(void *opaque
, int ret
);
560 static void vdi_aio_write_cb(void *opaque
, int ret
);
562 static void vdi_aio_rw_bh(void *opaque
)
564 VdiAIOCB
*acb
= opaque
;
566 qemu_bh_delete(acb
->bh
);
570 vdi_aio_write_cb(opaque
, 0);
572 vdi_aio_read_cb(opaque
, 0);
576 static void vdi_aio_read_cb(void *opaque
, int ret
)
578 VdiAIOCB
*acb
= opaque
;
579 BlockDriverState
*bs
= acb
->common
.bs
;
580 BDRVVdiState
*s
= bs
->opaque
;
582 uint32_t block_index
;
583 uint32_t sector_in_block
;
586 logout("%u sectors read\n", acb
->n_sectors
);
588 acb
->hd_aiocb
= NULL
;
594 acb
->nb_sectors
-= acb
->n_sectors
;
596 if (acb
->nb_sectors
== 0) {
597 /* request completed */
602 acb
->sector_num
+= acb
->n_sectors
;
603 acb
->buf
+= acb
->n_sectors
* SECTOR_SIZE
;
605 block_index
= acb
->sector_num
/ s
->block_sectors
;
606 sector_in_block
= acb
->sector_num
% s
->block_sectors
;
607 n_sectors
= s
->block_sectors
- sector_in_block
;
608 if (n_sectors
> acb
->nb_sectors
) {
609 n_sectors
= acb
->nb_sectors
;
612 logout("will read %u sectors starting at sector %" PRIu64
"\n",
613 n_sectors
, acb
->sector_num
);
615 /* prepare next AIO request */
616 acb
->n_sectors
= n_sectors
;
617 bmap_entry
= le32_to_cpu(s
->bmap
[block_index
]);
618 if (!VDI_IS_ALLOCATED(bmap_entry
)) {
619 /* Block not allocated, return zeros, no need to wait. */
620 memset(acb
->buf
, 0, n_sectors
* SECTOR_SIZE
);
621 ret
= vdi_schedule_bh(vdi_aio_rw_bh
, acb
);
626 uint64_t offset
= s
->header
.offset_data
/ SECTOR_SIZE
+
627 (uint64_t)bmap_entry
* s
->block_sectors
+
629 acb
->hd_iov
.iov_base
= (void *)acb
->buf
;
630 acb
->hd_iov
.iov_len
= n_sectors
* SECTOR_SIZE
;
631 qemu_iovec_init_external(&acb
->hd_qiov
, &acb
->hd_iov
, 1);
632 acb
->hd_aiocb
= bdrv_aio_readv(bs
->file
, offset
, &acb
->hd_qiov
,
633 n_sectors
, vdi_aio_read_cb
, acb
);
637 if (acb
->qiov
->niov
> 1) {
638 qemu_iovec_from_buffer(acb
->qiov
, acb
->orig_buf
, acb
->qiov
->size
);
639 qemu_vfree(acb
->orig_buf
);
641 acb
->common
.cb(acb
->common
.opaque
, ret
);
642 qemu_aio_release(acb
);
645 static BlockDriverAIOCB
*vdi_aio_readv(BlockDriverState
*bs
,
646 int64_t sector_num
, QEMUIOVector
*qiov
, int nb_sectors
,
647 BlockDriverCompletionFunc
*cb
, void *opaque
)
653 acb
= vdi_aio_setup(bs
, sector_num
, qiov
, nb_sectors
, cb
, opaque
, 0);
654 ret
= vdi_schedule_bh(vdi_aio_rw_bh
, acb
);
656 if (acb
->qiov
->niov
> 1) {
657 qemu_vfree(acb
->orig_buf
);
659 qemu_aio_release(acb
);
666 static void vdi_aio_write_cb(void *opaque
, int ret
)
668 VdiAIOCB
*acb
= opaque
;
669 BlockDriverState
*bs
= acb
->common
.bs
;
670 BDRVVdiState
*s
= bs
->opaque
;
672 uint32_t block_index
;
673 uint32_t sector_in_block
;
676 acb
->hd_aiocb
= NULL
;
682 acb
->nb_sectors
-= acb
->n_sectors
;
683 acb
->sector_num
+= acb
->n_sectors
;
684 acb
->buf
+= acb
->n_sectors
* SECTOR_SIZE
;
686 if (acb
->nb_sectors
== 0) {
687 logout("finished data write\n");
689 if (acb
->header_modified
) {
690 VdiHeader
*header
= acb
->block_buffer
;
691 logout("now writing modified header\n");
692 assert(VDI_IS_ALLOCATED(acb
->bmap_first
));
694 vdi_header_to_le(header
);
695 acb
->header_modified
= 0;
696 acb
->hd_iov
.iov_base
= acb
->block_buffer
;
697 acb
->hd_iov
.iov_len
= SECTOR_SIZE
;
698 qemu_iovec_init_external(&acb
->hd_qiov
, &acb
->hd_iov
, 1);
699 acb
->hd_aiocb
= bdrv_aio_writev(bs
->file
, 0, &acb
->hd_qiov
, 1,
700 vdi_aio_write_cb
, acb
);
702 } else if (VDI_IS_ALLOCATED(acb
->bmap_first
)) {
703 /* One or more new blocks were allocated. */
707 g_free(acb
->block_buffer
);
708 acb
->block_buffer
= NULL
;
709 bmap_first
= acb
->bmap_first
;
710 bmap_last
= acb
->bmap_last
;
711 logout("now writing modified block map entry %u...%u\n",
712 bmap_first
, bmap_last
);
713 /* Write modified sectors from block map. */
714 bmap_first
/= (SECTOR_SIZE
/ sizeof(uint32_t));
715 bmap_last
/= (SECTOR_SIZE
/ sizeof(uint32_t));
716 n_sectors
= bmap_last
- bmap_first
+ 1;
717 offset
= s
->bmap_sector
+ bmap_first
;
718 acb
->bmap_first
= VDI_UNALLOCATED
;
719 acb
->hd_iov
.iov_base
= (void *)((uint8_t *)&s
->bmap
[0] +
720 bmap_first
* SECTOR_SIZE
);
721 acb
->hd_iov
.iov_len
= n_sectors
* SECTOR_SIZE
;
722 qemu_iovec_init_external(&acb
->hd_qiov
, &acb
->hd_iov
, 1);
723 logout("will write %u block map sectors starting from entry %u\n",
724 n_sectors
, bmap_first
);
725 acb
->hd_aiocb
= bdrv_aio_writev(bs
->file
, offset
, &acb
->hd_qiov
,
726 n_sectors
, vdi_aio_write_cb
, acb
);
733 logout("%u sectors written\n", acb
->n_sectors
);
735 block_index
= acb
->sector_num
/ s
->block_sectors
;
736 sector_in_block
= acb
->sector_num
% s
->block_sectors
;
737 n_sectors
= s
->block_sectors
- sector_in_block
;
738 if (n_sectors
> acb
->nb_sectors
) {
739 n_sectors
= acb
->nb_sectors
;
742 logout("will write %u sectors starting at sector %" PRIu64
"\n",
743 n_sectors
, acb
->sector_num
);
745 /* prepare next AIO request */
746 acb
->n_sectors
= n_sectors
;
747 bmap_entry
= le32_to_cpu(s
->bmap
[block_index
]);
748 if (!VDI_IS_ALLOCATED(bmap_entry
)) {
749 /* Allocate new block and write to it. */
752 bmap_entry
= s
->header
.blocks_allocated
;
753 s
->bmap
[block_index
] = cpu_to_le32(bmap_entry
);
754 s
->header
.blocks_allocated
++;
755 offset
= s
->header
.offset_data
/ SECTOR_SIZE
+
756 (uint64_t)bmap_entry
* s
->block_sectors
;
757 block
= acb
->block_buffer
;
759 block
= g_malloc(s
->block_size
);
760 acb
->block_buffer
= block
;
761 acb
->bmap_first
= block_index
;
762 assert(!acb
->header_modified
);
763 acb
->header_modified
= 1;
765 acb
->bmap_last
= block_index
;
766 /* Copy data to be written to new block and zero unused parts. */
767 memset(block
, 0, sector_in_block
* SECTOR_SIZE
);
768 memcpy(block
+ sector_in_block
* SECTOR_SIZE
,
769 acb
->buf
, n_sectors
* SECTOR_SIZE
);
770 memset(block
+ (sector_in_block
+ n_sectors
) * SECTOR_SIZE
, 0,
771 (s
->block_sectors
- n_sectors
- sector_in_block
) * SECTOR_SIZE
);
772 acb
->hd_iov
.iov_base
= (void *)block
;
773 acb
->hd_iov
.iov_len
= s
->block_size
;
774 qemu_iovec_init_external(&acb
->hd_qiov
, &acb
->hd_iov
, 1);
775 acb
->hd_aiocb
= bdrv_aio_writev(bs
->file
, offset
,
776 &acb
->hd_qiov
, s
->block_sectors
,
777 vdi_aio_write_cb
, acb
);
779 uint64_t offset
= s
->header
.offset_data
/ SECTOR_SIZE
+
780 (uint64_t)bmap_entry
* s
->block_sectors
+
782 acb
->hd_iov
.iov_base
= (void *)acb
->buf
;
783 acb
->hd_iov
.iov_len
= n_sectors
* SECTOR_SIZE
;
784 qemu_iovec_init_external(&acb
->hd_qiov
, &acb
->hd_iov
, 1);
785 acb
->hd_aiocb
= bdrv_aio_writev(bs
->file
, offset
, &acb
->hd_qiov
,
786 n_sectors
, vdi_aio_write_cb
, acb
);
792 if (acb
->qiov
->niov
> 1) {
793 qemu_vfree(acb
->orig_buf
);
795 acb
->common
.cb(acb
->common
.opaque
, ret
);
796 qemu_aio_release(acb
);
799 static BlockDriverAIOCB
*vdi_aio_writev(BlockDriverState
*bs
,
800 int64_t sector_num
, QEMUIOVector
*qiov
, int nb_sectors
,
801 BlockDriverCompletionFunc
*cb
, void *opaque
)
807 acb
= vdi_aio_setup(bs
, sector_num
, qiov
, nb_sectors
, cb
, opaque
, 1);
808 ret
= vdi_schedule_bh(vdi_aio_rw_bh
, acb
);
810 if (acb
->qiov
->niov
> 1) {
811 qemu_vfree(acb
->orig_buf
);
813 qemu_aio_release(acb
);
820 static int vdi_create(const char *filename
, QEMUOptionParameter
*options
)
826 size_t block_size
= DEFAULT_CLUSTER_SIZE
;
827 uint32_t image_type
= VDI_TYPE_DYNAMIC
;
835 /* Read out options. */
836 while (options
&& options
->name
) {
837 if (!strcmp(options
->name
, BLOCK_OPT_SIZE
)) {
838 bytes
= options
->value
.n
;
839 #if defined(CONFIG_VDI_BLOCK_SIZE)
840 } else if (!strcmp(options
->name
, BLOCK_OPT_CLUSTER_SIZE
)) {
841 if (options
->value
.n
) {
842 /* TODO: Additional checks (SECTOR_SIZE * 2^n, ...). */
843 block_size
= options
->value
.n
;
846 #if defined(CONFIG_VDI_STATIC_IMAGE)
847 } else if (!strcmp(options
->name
, BLOCK_OPT_STATIC
)) {
848 if (options
->value
.n
) {
849 image_type
= VDI_TYPE_STATIC
;
856 fd
= open(filename
, O_WRONLY
| O_CREAT
| O_TRUNC
| O_BINARY
| O_LARGEFILE
,
862 /* We need enough blocks to store the given disk size,
863 so always round up. */
864 blocks
= (bytes
+ block_size
- 1) / block_size
;
866 bmap_size
= blocks
* sizeof(uint32_t);
867 bmap_size
= ((bmap_size
+ SECTOR_SIZE
- 1) & ~(SECTOR_SIZE
-1));
869 memset(&header
, 0, sizeof(header
));
870 pstrcpy(header
.text
, sizeof(header
.text
), VDI_TEXT
);
871 header
.signature
= VDI_SIGNATURE
;
872 header
.version
= VDI_VERSION_1_1
;
873 header
.header_size
= 0x180;
874 header
.image_type
= image_type
;
875 header
.offset_bmap
= 0x200;
876 header
.offset_data
= 0x200 + bmap_size
;
877 header
.sector_size
= SECTOR_SIZE
;
878 header
.disk_size
= bytes
;
879 header
.block_size
= block_size
;
880 header
.blocks_in_image
= blocks
;
881 if (image_type
== VDI_TYPE_STATIC
) {
882 header
.blocks_allocated
= blocks
;
884 uuid_generate(header
.uuid_image
);
885 uuid_generate(header
.uuid_last_snap
);
886 /* There is no need to set header.uuid_link or header.uuid_parent here. */
887 #if defined(CONFIG_VDI_DEBUG)
888 vdi_header_print(&header
);
890 vdi_header_to_le(&header
);
891 if (write(fd
, &header
, sizeof(header
)) < 0) {
897 bmap
= (uint32_t *)g_malloc0(bmap_size
);
899 for (i
= 0; i
< blocks
; i
++) {
900 if (image_type
== VDI_TYPE_STATIC
) {
903 bmap
[i
] = VDI_UNALLOCATED
;
906 if (write(fd
, bmap
, bmap_size
) < 0) {
910 if (image_type
== VDI_TYPE_STATIC
) {
911 if (ftruncate(fd
, sizeof(header
) + bmap_size
+ blocks
* block_size
)) {
923 static void vdi_close(BlockDriverState
*bs
)
925 BDRVVdiState
*s
= bs
->opaque
;
929 migrate_del_blocker(s
->migration_blocker
);
930 error_free(s
->migration_blocker
);
933 static coroutine_fn
int vdi_co_flush(BlockDriverState
*bs
)
936 return bdrv_co_flush(bs
->file
);
940 static QEMUOptionParameter vdi_create_options
[] = {
942 .name
= BLOCK_OPT_SIZE
,
944 .help
= "Virtual disk size"
946 #if defined(CONFIG_VDI_BLOCK_SIZE)
948 .name
= BLOCK_OPT_CLUSTER_SIZE
,
950 .help
= "VDI cluster (block) size",
951 .value
= { .n
= DEFAULT_CLUSTER_SIZE
},
954 #if defined(CONFIG_VDI_STATIC_IMAGE)
956 .name
= BLOCK_OPT_STATIC
,
958 .help
= "VDI static (pre-allocated) image"
961 /* TODO: An additional option to set UUID values might be useful. */
965 static BlockDriver bdrv_vdi
= {
966 .format_name
= "vdi",
967 .instance_size
= sizeof(BDRVVdiState
),
968 .bdrv_probe
= vdi_probe
,
969 .bdrv_open
= vdi_open
,
970 .bdrv_close
= vdi_close
,
971 .bdrv_create
= vdi_create
,
972 .bdrv_co_flush_to_disk
= vdi_co_flush
,
973 .bdrv_co_is_allocated
= vdi_co_is_allocated
,
974 .bdrv_make_empty
= vdi_make_empty
,
976 .bdrv_aio_readv
= vdi_aio_readv
,
977 #if defined(CONFIG_VDI_WRITE)
978 .bdrv_aio_writev
= vdi_aio_writev
,
981 .bdrv_get_info
= vdi_get_info
,
983 .create_options
= vdi_create_options
,
984 .bdrv_check
= vdi_check
,
987 static void bdrv_vdi_init(void)
990 bdrv_register(&bdrv_vdi
);
993 block_init(bdrv_vdi_init
);