configure: check for EFD_NONBLOCK | EFD_CLOEXEC flags
[qemu/qmp-unstable.git] / block.c
blobd0158877d662c47598f0c8e55290a186d15a8945
1 /*
2 * QEMU System Emulator block driver
4 * Copyright (c) 2003 Fabrice Bellard
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to deal
8 * in the Software without restriction, including without limitation the rights
9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 * copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22 * THE SOFTWARE.
24 #include "config-host.h"
25 #include "qemu-common.h"
26 #include "trace.h"
27 #include "monitor.h"
28 #include "block_int.h"
29 #include "module.h"
30 #include "qjson.h"
31 #include "qemu-coroutine.h"
32 #include "qmp-commands.h"
34 #ifdef CONFIG_BSD
35 #include <sys/types.h>
36 #include <sys/stat.h>
37 #include <sys/ioctl.h>
38 #include <sys/queue.h>
39 #ifndef __DragonFly__
40 #include <sys/disk.h>
41 #endif
42 #endif
44 #ifdef _WIN32
45 #include <windows.h>
46 #endif
48 #define NOT_DONE 0x7fffffff /* used while emulated sync operation in progress */
50 static void bdrv_dev_change_media_cb(BlockDriverState *bs, bool load);
51 static BlockDriverAIOCB *bdrv_aio_readv_em(BlockDriverState *bs,
52 int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
53 BlockDriverCompletionFunc *cb, void *opaque);
54 static BlockDriverAIOCB *bdrv_aio_writev_em(BlockDriverState *bs,
55 int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
56 BlockDriverCompletionFunc *cb, void *opaque);
57 static int coroutine_fn bdrv_co_readv_em(BlockDriverState *bs,
58 int64_t sector_num, int nb_sectors,
59 QEMUIOVector *iov);
60 static int coroutine_fn bdrv_co_writev_em(BlockDriverState *bs,
61 int64_t sector_num, int nb_sectors,
62 QEMUIOVector *iov);
63 static int coroutine_fn bdrv_co_do_readv(BlockDriverState *bs,
64 int64_t sector_num, int nb_sectors, QEMUIOVector *qiov);
65 static int coroutine_fn bdrv_co_do_writev(BlockDriverState *bs,
66 int64_t sector_num, int nb_sectors, QEMUIOVector *qiov);
67 static BlockDriverAIOCB *bdrv_co_aio_rw_vector(BlockDriverState *bs,
68 int64_t sector_num,
69 QEMUIOVector *qiov,
70 int nb_sectors,
71 BlockDriverCompletionFunc *cb,
72 void *opaque,
73 bool is_write);
74 static void coroutine_fn bdrv_co_do_rw(void *opaque);
76 static QTAILQ_HEAD(, BlockDriverState) bdrv_states =
77 QTAILQ_HEAD_INITIALIZER(bdrv_states);
79 static QLIST_HEAD(, BlockDriver) bdrv_drivers =
80 QLIST_HEAD_INITIALIZER(bdrv_drivers);
82 /* The device to use for VM snapshots */
83 static BlockDriverState *bs_snapshots;
85 /* If non-zero, use only whitelisted block drivers */
86 static int use_bdrv_whitelist;
88 #ifdef _WIN32
89 static int is_windows_drive_prefix(const char *filename)
91 return (((filename[0] >= 'a' && filename[0] <= 'z') ||
92 (filename[0] >= 'A' && filename[0] <= 'Z')) &&
93 filename[1] == ':');
96 int is_windows_drive(const char *filename)
98 if (is_windows_drive_prefix(filename) &&
99 filename[2] == '\0')
100 return 1;
101 if (strstart(filename, "\\\\.\\", NULL) ||
102 strstart(filename, "//./", NULL))
103 return 1;
104 return 0;
106 #endif
108 /* check if the path starts with "<protocol>:" */
109 static int path_has_protocol(const char *path)
111 #ifdef _WIN32
112 if (is_windows_drive(path) ||
113 is_windows_drive_prefix(path)) {
114 return 0;
116 #endif
118 return strchr(path, ':') != NULL;
121 int path_is_absolute(const char *path)
123 const char *p;
124 #ifdef _WIN32
125 /* specific case for names like: "\\.\d:" */
126 if (*path == '/' || *path == '\\')
127 return 1;
128 #endif
129 p = strchr(path, ':');
130 if (p)
131 p++;
132 else
133 p = path;
134 #ifdef _WIN32
135 return (*p == '/' || *p == '\\');
136 #else
137 return (*p == '/');
138 #endif
141 /* if filename is absolute, just copy it to dest. Otherwise, build a
142 path to it by considering it is relative to base_path. URL are
143 supported. */
144 void path_combine(char *dest, int dest_size,
145 const char *base_path,
146 const char *filename)
148 const char *p, *p1;
149 int len;
151 if (dest_size <= 0)
152 return;
153 if (path_is_absolute(filename)) {
154 pstrcpy(dest, dest_size, filename);
155 } else {
156 p = strchr(base_path, ':');
157 if (p)
158 p++;
159 else
160 p = base_path;
161 p1 = strrchr(base_path, '/');
162 #ifdef _WIN32
164 const char *p2;
165 p2 = strrchr(base_path, '\\');
166 if (!p1 || p2 > p1)
167 p1 = p2;
169 #endif
170 if (p1)
171 p1++;
172 else
173 p1 = base_path;
174 if (p1 > p)
175 p = p1;
176 len = p - base_path;
177 if (len > dest_size - 1)
178 len = dest_size - 1;
179 memcpy(dest, base_path, len);
180 dest[len] = '\0';
181 pstrcat(dest, dest_size, filename);
185 void bdrv_register(BlockDriver *bdrv)
187 /* Block drivers without coroutine functions need emulation */
188 if (!bdrv->bdrv_co_readv) {
189 bdrv->bdrv_co_readv = bdrv_co_readv_em;
190 bdrv->bdrv_co_writev = bdrv_co_writev_em;
192 /* bdrv_co_readv_em()/brdv_co_writev_em() work in terms of aio, so if
193 * the block driver lacks aio we need to emulate that too.
195 if (!bdrv->bdrv_aio_readv) {
196 /* add AIO emulation layer */
197 bdrv->bdrv_aio_readv = bdrv_aio_readv_em;
198 bdrv->bdrv_aio_writev = bdrv_aio_writev_em;
202 QLIST_INSERT_HEAD(&bdrv_drivers, bdrv, list);
205 /* create a new block device (by default it is empty) */
206 BlockDriverState *bdrv_new(const char *device_name)
208 BlockDriverState *bs;
210 bs = g_malloc0(sizeof(BlockDriverState));
211 pstrcpy(bs->device_name, sizeof(bs->device_name), device_name);
212 if (device_name[0] != '\0') {
213 QTAILQ_INSERT_TAIL(&bdrv_states, bs, list);
215 bdrv_iostatus_disable(bs);
216 return bs;
219 BlockDriver *bdrv_find_format(const char *format_name)
221 BlockDriver *drv1;
222 QLIST_FOREACH(drv1, &bdrv_drivers, list) {
223 if (!strcmp(drv1->format_name, format_name)) {
224 return drv1;
227 return NULL;
230 static int bdrv_is_whitelisted(BlockDriver *drv)
232 static const char *whitelist[] = {
233 CONFIG_BDRV_WHITELIST
235 const char **p;
237 if (!whitelist[0])
238 return 1; /* no whitelist, anything goes */
240 for (p = whitelist; *p; p++) {
241 if (!strcmp(drv->format_name, *p)) {
242 return 1;
245 return 0;
248 BlockDriver *bdrv_find_whitelisted_format(const char *format_name)
250 BlockDriver *drv = bdrv_find_format(format_name);
251 return drv && bdrv_is_whitelisted(drv) ? drv : NULL;
254 int bdrv_create(BlockDriver *drv, const char* filename,
255 QEMUOptionParameter *options)
257 if (!drv->bdrv_create)
258 return -ENOTSUP;
260 return drv->bdrv_create(filename, options);
263 int bdrv_create_file(const char* filename, QEMUOptionParameter *options)
265 BlockDriver *drv;
267 drv = bdrv_find_protocol(filename);
268 if (drv == NULL) {
269 return -ENOENT;
272 return bdrv_create(drv, filename, options);
275 #ifdef _WIN32
276 void get_tmp_filename(char *filename, int size)
278 char temp_dir[MAX_PATH];
280 GetTempPath(MAX_PATH, temp_dir);
281 GetTempFileName(temp_dir, "qem", 0, filename);
283 #else
284 void get_tmp_filename(char *filename, int size)
286 int fd;
287 const char *tmpdir;
288 /* XXX: race condition possible */
289 tmpdir = getenv("TMPDIR");
290 if (!tmpdir)
291 tmpdir = "/tmp";
292 snprintf(filename, size, "%s/vl.XXXXXX", tmpdir);
293 fd = mkstemp(filename);
294 close(fd);
296 #endif
299 * Detect host devices. By convention, /dev/cdrom[N] is always
300 * recognized as a host CDROM.
302 static BlockDriver *find_hdev_driver(const char *filename)
304 int score_max = 0, score;
305 BlockDriver *drv = NULL, *d;
307 QLIST_FOREACH(d, &bdrv_drivers, list) {
308 if (d->bdrv_probe_device) {
309 score = d->bdrv_probe_device(filename);
310 if (score > score_max) {
311 score_max = score;
312 drv = d;
317 return drv;
320 BlockDriver *bdrv_find_protocol(const char *filename)
322 BlockDriver *drv1;
323 char protocol[128];
324 int len;
325 const char *p;
327 /* TODO Drivers without bdrv_file_open must be specified explicitly */
330 * XXX(hch): we really should not let host device detection
331 * override an explicit protocol specification, but moving this
332 * later breaks access to device names with colons in them.
333 * Thanks to the brain-dead persistent naming schemes on udev-
334 * based Linux systems those actually are quite common.
336 drv1 = find_hdev_driver(filename);
337 if (drv1) {
338 return drv1;
341 if (!path_has_protocol(filename)) {
342 return bdrv_find_format("file");
344 p = strchr(filename, ':');
345 assert(p != NULL);
346 len = p - filename;
347 if (len > sizeof(protocol) - 1)
348 len = sizeof(protocol) - 1;
349 memcpy(protocol, filename, len);
350 protocol[len] = '\0';
351 QLIST_FOREACH(drv1, &bdrv_drivers, list) {
352 if (drv1->protocol_name &&
353 !strcmp(drv1->protocol_name, protocol)) {
354 return drv1;
357 return NULL;
360 static int find_image_format(const char *filename, BlockDriver **pdrv)
362 int ret, score, score_max;
363 BlockDriver *drv1, *drv;
364 uint8_t buf[2048];
365 BlockDriverState *bs;
367 ret = bdrv_file_open(&bs, filename, 0);
368 if (ret < 0) {
369 *pdrv = NULL;
370 return ret;
373 /* Return the raw BlockDriver * to scsi-generic devices or empty drives */
374 if (bs->sg || !bdrv_is_inserted(bs)) {
375 bdrv_delete(bs);
376 drv = bdrv_find_format("raw");
377 if (!drv) {
378 ret = -ENOENT;
380 *pdrv = drv;
381 return ret;
384 ret = bdrv_pread(bs, 0, buf, sizeof(buf));
385 bdrv_delete(bs);
386 if (ret < 0) {
387 *pdrv = NULL;
388 return ret;
391 score_max = 0;
392 drv = NULL;
393 QLIST_FOREACH(drv1, &bdrv_drivers, list) {
394 if (drv1->bdrv_probe) {
395 score = drv1->bdrv_probe(buf, ret, filename);
396 if (score > score_max) {
397 score_max = score;
398 drv = drv1;
402 if (!drv) {
403 ret = -ENOENT;
405 *pdrv = drv;
406 return ret;
410 * Set the current 'total_sectors' value
412 static int refresh_total_sectors(BlockDriverState *bs, int64_t hint)
414 BlockDriver *drv = bs->drv;
416 /* Do not attempt drv->bdrv_getlength() on scsi-generic devices */
417 if (bs->sg)
418 return 0;
420 /* query actual device if possible, otherwise just trust the hint */
421 if (drv->bdrv_getlength) {
422 int64_t length = drv->bdrv_getlength(bs);
423 if (length < 0) {
424 return length;
426 hint = length >> BDRV_SECTOR_BITS;
429 bs->total_sectors = hint;
430 return 0;
434 * Set open flags for a given cache mode
436 * Return 0 on success, -1 if the cache mode was invalid.
438 int bdrv_parse_cache_flags(const char *mode, int *flags)
440 *flags &= ~BDRV_O_CACHE_MASK;
442 if (!strcmp(mode, "off") || !strcmp(mode, "none")) {
443 *flags |= BDRV_O_NOCACHE | BDRV_O_CACHE_WB;
444 } else if (!strcmp(mode, "directsync")) {
445 *flags |= BDRV_O_NOCACHE;
446 } else if (!strcmp(mode, "writeback")) {
447 *flags |= BDRV_O_CACHE_WB;
448 } else if (!strcmp(mode, "unsafe")) {
449 *flags |= BDRV_O_CACHE_WB;
450 *flags |= BDRV_O_NO_FLUSH;
451 } else if (!strcmp(mode, "writethrough")) {
452 /* this is the default */
453 } else {
454 return -1;
457 return 0;
461 * Common part for opening disk images and files
463 static int bdrv_open_common(BlockDriverState *bs, const char *filename,
464 int flags, BlockDriver *drv)
466 int ret, open_flags;
468 assert(drv != NULL);
470 trace_bdrv_open_common(bs, filename, flags, drv->format_name);
472 bs->file = NULL;
473 bs->total_sectors = 0;
474 bs->encrypted = 0;
475 bs->valid_key = 0;
476 bs->sg = 0;
477 bs->open_flags = flags;
478 bs->growable = 0;
479 bs->buffer_alignment = 512;
481 pstrcpy(bs->filename, sizeof(bs->filename), filename);
482 bs->backing_file[0] = '\0';
484 if (use_bdrv_whitelist && !bdrv_is_whitelisted(drv)) {
485 return -ENOTSUP;
488 bs->drv = drv;
489 bs->opaque = g_malloc0(drv->instance_size);
491 bs->enable_write_cache = !!(flags & BDRV_O_CACHE_WB);
494 * Clear flags that are internal to the block layer before opening the
495 * image.
497 open_flags = flags & ~(BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING);
500 * Snapshots should be writable.
502 if (bs->is_temporary) {
503 open_flags |= BDRV_O_RDWR;
506 bs->keep_read_only = bs->read_only = !(open_flags & BDRV_O_RDWR);
508 /* Open the image, either directly or using a protocol */
509 if (drv->bdrv_file_open) {
510 ret = drv->bdrv_file_open(bs, filename, open_flags);
511 } else {
512 ret = bdrv_file_open(&bs->file, filename, open_flags);
513 if (ret >= 0) {
514 ret = drv->bdrv_open(bs, open_flags);
518 if (ret < 0) {
519 goto free_and_fail;
522 ret = refresh_total_sectors(bs, bs->total_sectors);
523 if (ret < 0) {
524 goto free_and_fail;
527 #ifndef _WIN32
528 if (bs->is_temporary) {
529 unlink(filename);
531 #endif
532 return 0;
534 free_and_fail:
535 if (bs->file) {
536 bdrv_delete(bs->file);
537 bs->file = NULL;
539 g_free(bs->opaque);
540 bs->opaque = NULL;
541 bs->drv = NULL;
542 return ret;
546 * Opens a file using a protocol (file, host_device, nbd, ...)
548 int bdrv_file_open(BlockDriverState **pbs, const char *filename, int flags)
550 BlockDriverState *bs;
551 BlockDriver *drv;
552 int ret;
554 drv = bdrv_find_protocol(filename);
555 if (!drv) {
556 return -ENOENT;
559 bs = bdrv_new("");
560 ret = bdrv_open_common(bs, filename, flags, drv);
561 if (ret < 0) {
562 bdrv_delete(bs);
563 return ret;
565 bs->growable = 1;
566 *pbs = bs;
567 return 0;
571 * Opens a disk image (raw, qcow2, vmdk, ...)
573 int bdrv_open(BlockDriverState *bs, const char *filename, int flags,
574 BlockDriver *drv)
576 int ret;
577 char tmp_filename[PATH_MAX];
579 if (flags & BDRV_O_SNAPSHOT) {
580 BlockDriverState *bs1;
581 int64_t total_size;
582 int is_protocol = 0;
583 BlockDriver *bdrv_qcow2;
584 QEMUOptionParameter *options;
585 char backing_filename[PATH_MAX];
587 /* if snapshot, we create a temporary backing file and open it
588 instead of opening 'filename' directly */
590 /* if there is a backing file, use it */
591 bs1 = bdrv_new("");
592 ret = bdrv_open(bs1, filename, 0, drv);
593 if (ret < 0) {
594 bdrv_delete(bs1);
595 return ret;
597 total_size = bdrv_getlength(bs1) & BDRV_SECTOR_MASK;
599 if (bs1->drv && bs1->drv->protocol_name)
600 is_protocol = 1;
602 bdrv_delete(bs1);
604 get_tmp_filename(tmp_filename, sizeof(tmp_filename));
606 /* Real path is meaningless for protocols */
607 if (is_protocol)
608 snprintf(backing_filename, sizeof(backing_filename),
609 "%s", filename);
610 else if (!realpath(filename, backing_filename))
611 return -errno;
613 bdrv_qcow2 = bdrv_find_format("qcow2");
614 options = parse_option_parameters("", bdrv_qcow2->create_options, NULL);
616 set_option_parameter_int(options, BLOCK_OPT_SIZE, total_size);
617 set_option_parameter(options, BLOCK_OPT_BACKING_FILE, backing_filename);
618 if (drv) {
619 set_option_parameter(options, BLOCK_OPT_BACKING_FMT,
620 drv->format_name);
623 ret = bdrv_create(bdrv_qcow2, tmp_filename, options);
624 free_option_parameters(options);
625 if (ret < 0) {
626 return ret;
629 filename = tmp_filename;
630 drv = bdrv_qcow2;
631 bs->is_temporary = 1;
634 /* Find the right image format driver */
635 if (!drv) {
636 ret = find_image_format(filename, &drv);
639 if (!drv) {
640 goto unlink_and_fail;
643 /* Open the image */
644 ret = bdrv_open_common(bs, filename, flags, drv);
645 if (ret < 0) {
646 goto unlink_and_fail;
649 /* If there is a backing file, use it */
650 if ((flags & BDRV_O_NO_BACKING) == 0 && bs->backing_file[0] != '\0') {
651 char backing_filename[PATH_MAX];
652 int back_flags;
653 BlockDriver *back_drv = NULL;
655 bs->backing_hd = bdrv_new("");
657 if (path_has_protocol(bs->backing_file)) {
658 pstrcpy(backing_filename, sizeof(backing_filename),
659 bs->backing_file);
660 } else {
661 path_combine(backing_filename, sizeof(backing_filename),
662 filename, bs->backing_file);
665 if (bs->backing_format[0] != '\0') {
666 back_drv = bdrv_find_format(bs->backing_format);
669 /* backing files always opened read-only */
670 back_flags =
671 flags & ~(BDRV_O_RDWR | BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING);
673 ret = bdrv_open(bs->backing_hd, backing_filename, back_flags, back_drv);
674 if (ret < 0) {
675 bdrv_close(bs);
676 return ret;
678 if (bs->is_temporary) {
679 bs->backing_hd->keep_read_only = !(flags & BDRV_O_RDWR);
680 } else {
681 /* base image inherits from "parent" */
682 bs->backing_hd->keep_read_only = bs->keep_read_only;
686 if (!bdrv_key_required(bs)) {
687 bdrv_dev_change_media_cb(bs, true);
690 return 0;
692 unlink_and_fail:
693 if (bs->is_temporary) {
694 unlink(filename);
696 return ret;
699 void bdrv_close(BlockDriverState *bs)
701 if (bs->drv) {
702 if (bs == bs_snapshots) {
703 bs_snapshots = NULL;
705 if (bs->backing_hd) {
706 bdrv_delete(bs->backing_hd);
707 bs->backing_hd = NULL;
709 bs->drv->bdrv_close(bs);
710 g_free(bs->opaque);
711 #ifdef _WIN32
712 if (bs->is_temporary) {
713 unlink(bs->filename);
715 #endif
716 bs->opaque = NULL;
717 bs->drv = NULL;
719 if (bs->file != NULL) {
720 bdrv_close(bs->file);
723 bdrv_dev_change_media_cb(bs, false);
727 void bdrv_close_all(void)
729 BlockDriverState *bs;
731 QTAILQ_FOREACH(bs, &bdrv_states, list) {
732 bdrv_close(bs);
736 /* make a BlockDriverState anonymous by removing from bdrv_state list.
737 Also, NULL terminate the device_name to prevent double remove */
738 void bdrv_make_anon(BlockDriverState *bs)
740 if (bs->device_name[0] != '\0') {
741 QTAILQ_REMOVE(&bdrv_states, bs, list);
743 bs->device_name[0] = '\0';
746 void bdrv_delete(BlockDriverState *bs)
748 assert(!bs->dev);
750 /* remove from list, if necessary */
751 bdrv_make_anon(bs);
753 bdrv_close(bs);
754 if (bs->file != NULL) {
755 bdrv_delete(bs->file);
758 assert(bs != bs_snapshots);
759 g_free(bs);
762 int bdrv_attach_dev(BlockDriverState *bs, void *dev)
763 /* TODO change to DeviceState *dev when all users are qdevified */
765 if (bs->dev) {
766 return -EBUSY;
768 bs->dev = dev;
769 bdrv_iostatus_reset(bs);
770 return 0;
773 /* TODO qdevified devices don't use this, remove when devices are qdevified */
774 void bdrv_attach_dev_nofail(BlockDriverState *bs, void *dev)
776 if (bdrv_attach_dev(bs, dev) < 0) {
777 abort();
781 void bdrv_detach_dev(BlockDriverState *bs, void *dev)
782 /* TODO change to DeviceState *dev when all users are qdevified */
784 assert(bs->dev == dev);
785 bs->dev = NULL;
786 bs->dev_ops = NULL;
787 bs->dev_opaque = NULL;
788 bs->buffer_alignment = 512;
791 /* TODO change to return DeviceState * when all users are qdevified */
792 void *bdrv_get_attached_dev(BlockDriverState *bs)
794 return bs->dev;
797 void bdrv_set_dev_ops(BlockDriverState *bs, const BlockDevOps *ops,
798 void *opaque)
800 bs->dev_ops = ops;
801 bs->dev_opaque = opaque;
802 if (bdrv_dev_has_removable_media(bs) && bs == bs_snapshots) {
803 bs_snapshots = NULL;
807 static void bdrv_dev_change_media_cb(BlockDriverState *bs, bool load)
809 if (bs->dev_ops && bs->dev_ops->change_media_cb) {
810 bs->dev_ops->change_media_cb(bs->dev_opaque, load);
814 bool bdrv_dev_has_removable_media(BlockDriverState *bs)
816 return !bs->dev || (bs->dev_ops && bs->dev_ops->change_media_cb);
819 void bdrv_dev_eject_request(BlockDriverState *bs, bool force)
821 if (bs->dev_ops && bs->dev_ops->eject_request_cb) {
822 bs->dev_ops->eject_request_cb(bs->dev_opaque, force);
826 bool bdrv_dev_is_tray_open(BlockDriverState *bs)
828 if (bs->dev_ops && bs->dev_ops->is_tray_open) {
829 return bs->dev_ops->is_tray_open(bs->dev_opaque);
831 return false;
834 static void bdrv_dev_resize_cb(BlockDriverState *bs)
836 if (bs->dev_ops && bs->dev_ops->resize_cb) {
837 bs->dev_ops->resize_cb(bs->dev_opaque);
841 bool bdrv_dev_is_medium_locked(BlockDriverState *bs)
843 if (bs->dev_ops && bs->dev_ops->is_medium_locked) {
844 return bs->dev_ops->is_medium_locked(bs->dev_opaque);
846 return false;
850 * Run consistency checks on an image
852 * Returns 0 if the check could be completed (it doesn't mean that the image is
853 * free of errors) or -errno when an internal error occurred. The results of the
854 * check are stored in res.
856 int bdrv_check(BlockDriverState *bs, BdrvCheckResult *res)
858 if (bs->drv->bdrv_check == NULL) {
859 return -ENOTSUP;
862 memset(res, 0, sizeof(*res));
863 return bs->drv->bdrv_check(bs, res);
866 #define COMMIT_BUF_SECTORS 2048
868 /* commit COW file into the raw image */
869 int bdrv_commit(BlockDriverState *bs)
871 BlockDriver *drv = bs->drv;
872 BlockDriver *backing_drv;
873 int64_t sector, total_sectors;
874 int n, ro, open_flags;
875 int ret = 0, rw_ret = 0;
876 uint8_t *buf;
877 char filename[1024];
878 BlockDriverState *bs_rw, *bs_ro;
880 if (!drv)
881 return -ENOMEDIUM;
883 if (!bs->backing_hd) {
884 return -ENOTSUP;
887 if (bs->backing_hd->keep_read_only) {
888 return -EACCES;
891 backing_drv = bs->backing_hd->drv;
892 ro = bs->backing_hd->read_only;
893 strncpy(filename, bs->backing_hd->filename, sizeof(filename));
894 open_flags = bs->backing_hd->open_flags;
896 if (ro) {
897 /* re-open as RW */
898 bdrv_delete(bs->backing_hd);
899 bs->backing_hd = NULL;
900 bs_rw = bdrv_new("");
901 rw_ret = bdrv_open(bs_rw, filename, open_flags | BDRV_O_RDWR,
902 backing_drv);
903 if (rw_ret < 0) {
904 bdrv_delete(bs_rw);
905 /* try to re-open read-only */
906 bs_ro = bdrv_new("");
907 ret = bdrv_open(bs_ro, filename, open_flags & ~BDRV_O_RDWR,
908 backing_drv);
909 if (ret < 0) {
910 bdrv_delete(bs_ro);
911 /* drive not functional anymore */
912 bs->drv = NULL;
913 return ret;
915 bs->backing_hd = bs_ro;
916 return rw_ret;
918 bs->backing_hd = bs_rw;
921 total_sectors = bdrv_getlength(bs) >> BDRV_SECTOR_BITS;
922 buf = g_malloc(COMMIT_BUF_SECTORS * BDRV_SECTOR_SIZE);
924 for (sector = 0; sector < total_sectors; sector += n) {
925 if (drv->bdrv_is_allocated(bs, sector, COMMIT_BUF_SECTORS, &n)) {
927 if (bdrv_read(bs, sector, buf, n) != 0) {
928 ret = -EIO;
929 goto ro_cleanup;
932 if (bdrv_write(bs->backing_hd, sector, buf, n) != 0) {
933 ret = -EIO;
934 goto ro_cleanup;
939 if (drv->bdrv_make_empty) {
940 ret = drv->bdrv_make_empty(bs);
941 bdrv_flush(bs);
945 * Make sure all data we wrote to the backing device is actually
946 * stable on disk.
948 if (bs->backing_hd)
949 bdrv_flush(bs->backing_hd);
951 ro_cleanup:
952 g_free(buf);
954 if (ro) {
955 /* re-open as RO */
956 bdrv_delete(bs->backing_hd);
957 bs->backing_hd = NULL;
958 bs_ro = bdrv_new("");
959 ret = bdrv_open(bs_ro, filename, open_flags & ~BDRV_O_RDWR,
960 backing_drv);
961 if (ret < 0) {
962 bdrv_delete(bs_ro);
963 /* drive not functional anymore */
964 bs->drv = NULL;
965 return ret;
967 bs->backing_hd = bs_ro;
968 bs->backing_hd->keep_read_only = 0;
971 return ret;
974 void bdrv_commit_all(void)
976 BlockDriverState *bs;
978 QTAILQ_FOREACH(bs, &bdrv_states, list) {
979 bdrv_commit(bs);
984 * Return values:
985 * 0 - success
986 * -EINVAL - backing format specified, but no file
987 * -ENOSPC - can't update the backing file because no space is left in the
988 * image file header
989 * -ENOTSUP - format driver doesn't support changing the backing file
991 int bdrv_change_backing_file(BlockDriverState *bs,
992 const char *backing_file, const char *backing_fmt)
994 BlockDriver *drv = bs->drv;
996 if (drv->bdrv_change_backing_file != NULL) {
997 return drv->bdrv_change_backing_file(bs, backing_file, backing_fmt);
998 } else {
999 return -ENOTSUP;
1003 static int bdrv_check_byte_request(BlockDriverState *bs, int64_t offset,
1004 size_t size)
1006 int64_t len;
1008 if (!bdrv_is_inserted(bs))
1009 return -ENOMEDIUM;
1011 if (bs->growable)
1012 return 0;
1014 len = bdrv_getlength(bs);
1016 if (offset < 0)
1017 return -EIO;
1019 if ((offset > len) || (len - offset < size))
1020 return -EIO;
1022 return 0;
1025 static int bdrv_check_request(BlockDriverState *bs, int64_t sector_num,
1026 int nb_sectors)
1028 return bdrv_check_byte_request(bs, sector_num * BDRV_SECTOR_SIZE,
1029 nb_sectors * BDRV_SECTOR_SIZE);
1032 typedef struct RwCo {
1033 BlockDriverState *bs;
1034 int64_t sector_num;
1035 int nb_sectors;
1036 QEMUIOVector *qiov;
1037 bool is_write;
1038 int ret;
1039 } RwCo;
1041 static void coroutine_fn bdrv_rw_co_entry(void *opaque)
1043 RwCo *rwco = opaque;
1045 if (!rwco->is_write) {
1046 rwco->ret = bdrv_co_do_readv(rwco->bs, rwco->sector_num,
1047 rwco->nb_sectors, rwco->qiov);
1048 } else {
1049 rwco->ret = bdrv_co_do_writev(rwco->bs, rwco->sector_num,
1050 rwco->nb_sectors, rwco->qiov);
1055 * Process a synchronous request using coroutines
1057 static int bdrv_rw_co(BlockDriverState *bs, int64_t sector_num, uint8_t *buf,
1058 int nb_sectors, bool is_write)
1060 QEMUIOVector qiov;
1061 struct iovec iov = {
1062 .iov_base = (void *)buf,
1063 .iov_len = nb_sectors * BDRV_SECTOR_SIZE,
1065 Coroutine *co;
1066 RwCo rwco = {
1067 .bs = bs,
1068 .sector_num = sector_num,
1069 .nb_sectors = nb_sectors,
1070 .qiov = &qiov,
1071 .is_write = is_write,
1072 .ret = NOT_DONE,
1075 qemu_iovec_init_external(&qiov, &iov, 1);
1077 if (qemu_in_coroutine()) {
1078 /* Fast-path if already in coroutine context */
1079 bdrv_rw_co_entry(&rwco);
1080 } else {
1081 co = qemu_coroutine_create(bdrv_rw_co_entry);
1082 qemu_coroutine_enter(co, &rwco);
1083 while (rwco.ret == NOT_DONE) {
1084 qemu_aio_wait();
1087 return rwco.ret;
1090 /* return < 0 if error. See bdrv_write() for the return codes */
1091 int bdrv_read(BlockDriverState *bs, int64_t sector_num,
1092 uint8_t *buf, int nb_sectors)
1094 return bdrv_rw_co(bs, sector_num, buf, nb_sectors, false);
1097 static void set_dirty_bitmap(BlockDriverState *bs, int64_t sector_num,
1098 int nb_sectors, int dirty)
1100 int64_t start, end;
1101 unsigned long val, idx, bit;
1103 start = sector_num / BDRV_SECTORS_PER_DIRTY_CHUNK;
1104 end = (sector_num + nb_sectors - 1) / BDRV_SECTORS_PER_DIRTY_CHUNK;
1106 for (; start <= end; start++) {
1107 idx = start / (sizeof(unsigned long) * 8);
1108 bit = start % (sizeof(unsigned long) * 8);
1109 val = bs->dirty_bitmap[idx];
1110 if (dirty) {
1111 if (!(val & (1UL << bit))) {
1112 bs->dirty_count++;
1113 val |= 1UL << bit;
1115 } else {
1116 if (val & (1UL << bit)) {
1117 bs->dirty_count--;
1118 val &= ~(1UL << bit);
1121 bs->dirty_bitmap[idx] = val;
1125 /* Return < 0 if error. Important errors are:
1126 -EIO generic I/O error (may happen for all errors)
1127 -ENOMEDIUM No media inserted.
1128 -EINVAL Invalid sector number or nb_sectors
1129 -EACCES Trying to write a read-only device
1131 int bdrv_write(BlockDriverState *bs, int64_t sector_num,
1132 const uint8_t *buf, int nb_sectors)
1134 return bdrv_rw_co(bs, sector_num, (uint8_t *)buf, nb_sectors, true);
1137 int bdrv_pread(BlockDriverState *bs, int64_t offset,
1138 void *buf, int count1)
1140 uint8_t tmp_buf[BDRV_SECTOR_SIZE];
1141 int len, nb_sectors, count;
1142 int64_t sector_num;
1143 int ret;
1145 count = count1;
1146 /* first read to align to sector start */
1147 len = (BDRV_SECTOR_SIZE - offset) & (BDRV_SECTOR_SIZE - 1);
1148 if (len > count)
1149 len = count;
1150 sector_num = offset >> BDRV_SECTOR_BITS;
1151 if (len > 0) {
1152 if ((ret = bdrv_read(bs, sector_num, tmp_buf, 1)) < 0)
1153 return ret;
1154 memcpy(buf, tmp_buf + (offset & (BDRV_SECTOR_SIZE - 1)), len);
1155 count -= len;
1156 if (count == 0)
1157 return count1;
1158 sector_num++;
1159 buf += len;
1162 /* read the sectors "in place" */
1163 nb_sectors = count >> BDRV_SECTOR_BITS;
1164 if (nb_sectors > 0) {
1165 if ((ret = bdrv_read(bs, sector_num, buf, nb_sectors)) < 0)
1166 return ret;
1167 sector_num += nb_sectors;
1168 len = nb_sectors << BDRV_SECTOR_BITS;
1169 buf += len;
1170 count -= len;
1173 /* add data from the last sector */
1174 if (count > 0) {
1175 if ((ret = bdrv_read(bs, sector_num, tmp_buf, 1)) < 0)
1176 return ret;
1177 memcpy(buf, tmp_buf, count);
1179 return count1;
1182 int bdrv_pwrite(BlockDriverState *bs, int64_t offset,
1183 const void *buf, int count1)
1185 uint8_t tmp_buf[BDRV_SECTOR_SIZE];
1186 int len, nb_sectors, count;
1187 int64_t sector_num;
1188 int ret;
1190 count = count1;
1191 /* first write to align to sector start */
1192 len = (BDRV_SECTOR_SIZE - offset) & (BDRV_SECTOR_SIZE - 1);
1193 if (len > count)
1194 len = count;
1195 sector_num = offset >> BDRV_SECTOR_BITS;
1196 if (len > 0) {
1197 if ((ret = bdrv_read(bs, sector_num, tmp_buf, 1)) < 0)
1198 return ret;
1199 memcpy(tmp_buf + (offset & (BDRV_SECTOR_SIZE - 1)), buf, len);
1200 if ((ret = bdrv_write(bs, sector_num, tmp_buf, 1)) < 0)
1201 return ret;
1202 count -= len;
1203 if (count == 0)
1204 return count1;
1205 sector_num++;
1206 buf += len;
1209 /* write the sectors "in place" */
1210 nb_sectors = count >> BDRV_SECTOR_BITS;
1211 if (nb_sectors > 0) {
1212 if ((ret = bdrv_write(bs, sector_num, buf, nb_sectors)) < 0)
1213 return ret;
1214 sector_num += nb_sectors;
1215 len = nb_sectors << BDRV_SECTOR_BITS;
1216 buf += len;
1217 count -= len;
1220 /* add data from the last sector */
1221 if (count > 0) {
1222 if ((ret = bdrv_read(bs, sector_num, tmp_buf, 1)) < 0)
1223 return ret;
1224 memcpy(tmp_buf, buf, count);
1225 if ((ret = bdrv_write(bs, sector_num, tmp_buf, 1)) < 0)
1226 return ret;
1228 return count1;
1232 * Writes to the file and ensures that no writes are reordered across this
1233 * request (acts as a barrier)
1235 * Returns 0 on success, -errno in error cases.
1237 int bdrv_pwrite_sync(BlockDriverState *bs, int64_t offset,
1238 const void *buf, int count)
1240 int ret;
1242 ret = bdrv_pwrite(bs, offset, buf, count);
1243 if (ret < 0) {
1244 return ret;
1247 /* No flush needed for cache modes that use O_DSYNC */
1248 if ((bs->open_flags & BDRV_O_CACHE_WB) != 0) {
1249 bdrv_flush(bs);
1252 return 0;
1256 * Handle a read request in coroutine context
1258 static int coroutine_fn bdrv_co_do_readv(BlockDriverState *bs,
1259 int64_t sector_num, int nb_sectors, QEMUIOVector *qiov)
1261 BlockDriver *drv = bs->drv;
1263 if (!drv) {
1264 return -ENOMEDIUM;
1266 if (bdrv_check_request(bs, sector_num, nb_sectors)) {
1267 return -EIO;
1270 return drv->bdrv_co_readv(bs, sector_num, nb_sectors, qiov);
1273 int coroutine_fn bdrv_co_readv(BlockDriverState *bs, int64_t sector_num,
1274 int nb_sectors, QEMUIOVector *qiov)
1276 trace_bdrv_co_readv(bs, sector_num, nb_sectors);
1278 return bdrv_co_do_readv(bs, sector_num, nb_sectors, qiov);
1282 * Handle a write request in coroutine context
1284 static int coroutine_fn bdrv_co_do_writev(BlockDriverState *bs,
1285 int64_t sector_num, int nb_sectors, QEMUIOVector *qiov)
1287 BlockDriver *drv = bs->drv;
1288 int ret;
1290 if (!bs->drv) {
1291 return -ENOMEDIUM;
1293 if (bs->read_only) {
1294 return -EACCES;
1296 if (bdrv_check_request(bs, sector_num, nb_sectors)) {
1297 return -EIO;
1300 ret = drv->bdrv_co_writev(bs, sector_num, nb_sectors, qiov);
1302 if (bs->dirty_bitmap) {
1303 set_dirty_bitmap(bs, sector_num, nb_sectors, 1);
1306 if (bs->wr_highest_sector < sector_num + nb_sectors - 1) {
1307 bs->wr_highest_sector = sector_num + nb_sectors - 1;
1310 return ret;
1313 int coroutine_fn bdrv_co_writev(BlockDriverState *bs, int64_t sector_num,
1314 int nb_sectors, QEMUIOVector *qiov)
1316 trace_bdrv_co_writev(bs, sector_num, nb_sectors);
1318 return bdrv_co_do_writev(bs, sector_num, nb_sectors, qiov);
1322 * Truncate file to 'offset' bytes (needed only for file protocols)
1324 int bdrv_truncate(BlockDriverState *bs, int64_t offset)
1326 BlockDriver *drv = bs->drv;
1327 int ret;
1328 if (!drv)
1329 return -ENOMEDIUM;
1330 if (!drv->bdrv_truncate)
1331 return -ENOTSUP;
1332 if (bs->read_only)
1333 return -EACCES;
1334 if (bdrv_in_use(bs))
1335 return -EBUSY;
1336 ret = drv->bdrv_truncate(bs, offset);
1337 if (ret == 0) {
1338 ret = refresh_total_sectors(bs, offset >> BDRV_SECTOR_BITS);
1339 bdrv_dev_resize_cb(bs);
1341 return ret;
1345 * Length of a allocated file in bytes. Sparse files are counted by actual
1346 * allocated space. Return < 0 if error or unknown.
1348 int64_t bdrv_get_allocated_file_size(BlockDriverState *bs)
1350 BlockDriver *drv = bs->drv;
1351 if (!drv) {
1352 return -ENOMEDIUM;
1354 if (drv->bdrv_get_allocated_file_size) {
1355 return drv->bdrv_get_allocated_file_size(bs);
1357 if (bs->file) {
1358 return bdrv_get_allocated_file_size(bs->file);
1360 return -ENOTSUP;
1364 * Length of a file in bytes. Return < 0 if error or unknown.
1366 int64_t bdrv_getlength(BlockDriverState *bs)
1368 BlockDriver *drv = bs->drv;
1369 if (!drv)
1370 return -ENOMEDIUM;
1372 if (bs->growable || bdrv_dev_has_removable_media(bs)) {
1373 if (drv->bdrv_getlength) {
1374 return drv->bdrv_getlength(bs);
1377 return bs->total_sectors * BDRV_SECTOR_SIZE;
1380 /* return 0 as number of sectors if no device present or error */
1381 void bdrv_get_geometry(BlockDriverState *bs, uint64_t *nb_sectors_ptr)
1383 int64_t length;
1384 length = bdrv_getlength(bs);
1385 if (length < 0)
1386 length = 0;
1387 else
1388 length = length >> BDRV_SECTOR_BITS;
1389 *nb_sectors_ptr = length;
1392 struct partition {
1393 uint8_t boot_ind; /* 0x80 - active */
1394 uint8_t head; /* starting head */
1395 uint8_t sector; /* starting sector */
1396 uint8_t cyl; /* starting cylinder */
1397 uint8_t sys_ind; /* What partition type */
1398 uint8_t end_head; /* end head */
1399 uint8_t end_sector; /* end sector */
1400 uint8_t end_cyl; /* end cylinder */
1401 uint32_t start_sect; /* starting sector counting from 0 */
1402 uint32_t nr_sects; /* nr of sectors in partition */
1403 } QEMU_PACKED;
1405 /* try to guess the disk logical geometry from the MSDOS partition table. Return 0 if OK, -1 if could not guess */
1406 static int guess_disk_lchs(BlockDriverState *bs,
1407 int *pcylinders, int *pheads, int *psectors)
1409 uint8_t buf[BDRV_SECTOR_SIZE];
1410 int ret, i, heads, sectors, cylinders;
1411 struct partition *p;
1412 uint32_t nr_sects;
1413 uint64_t nb_sectors;
1415 bdrv_get_geometry(bs, &nb_sectors);
1417 ret = bdrv_read(bs, 0, buf, 1);
1418 if (ret < 0)
1419 return -1;
1420 /* test msdos magic */
1421 if (buf[510] != 0x55 || buf[511] != 0xaa)
1422 return -1;
1423 for(i = 0; i < 4; i++) {
1424 p = ((struct partition *)(buf + 0x1be)) + i;
1425 nr_sects = le32_to_cpu(p->nr_sects);
1426 if (nr_sects && p->end_head) {
1427 /* We make the assumption that the partition terminates on
1428 a cylinder boundary */
1429 heads = p->end_head + 1;
1430 sectors = p->end_sector & 63;
1431 if (sectors == 0)
1432 continue;
1433 cylinders = nb_sectors / (heads * sectors);
1434 if (cylinders < 1 || cylinders > 16383)
1435 continue;
1436 *pheads = heads;
1437 *psectors = sectors;
1438 *pcylinders = cylinders;
1439 #if 0
1440 printf("guessed geometry: LCHS=%d %d %d\n",
1441 cylinders, heads, sectors);
1442 #endif
1443 return 0;
1446 return -1;
1449 void bdrv_guess_geometry(BlockDriverState *bs, int *pcyls, int *pheads, int *psecs)
1451 int translation, lba_detected = 0;
1452 int cylinders, heads, secs;
1453 uint64_t nb_sectors;
1455 /* if a geometry hint is available, use it */
1456 bdrv_get_geometry(bs, &nb_sectors);
1457 bdrv_get_geometry_hint(bs, &cylinders, &heads, &secs);
1458 translation = bdrv_get_translation_hint(bs);
1459 if (cylinders != 0) {
1460 *pcyls = cylinders;
1461 *pheads = heads;
1462 *psecs = secs;
1463 } else {
1464 if (guess_disk_lchs(bs, &cylinders, &heads, &secs) == 0) {
1465 if (heads > 16) {
1466 /* if heads > 16, it means that a BIOS LBA
1467 translation was active, so the default
1468 hardware geometry is OK */
1469 lba_detected = 1;
1470 goto default_geometry;
1471 } else {
1472 *pcyls = cylinders;
1473 *pheads = heads;
1474 *psecs = secs;
1475 /* disable any translation to be in sync with
1476 the logical geometry */
1477 if (translation == BIOS_ATA_TRANSLATION_AUTO) {
1478 bdrv_set_translation_hint(bs,
1479 BIOS_ATA_TRANSLATION_NONE);
1482 } else {
1483 default_geometry:
1484 /* if no geometry, use a standard physical disk geometry */
1485 cylinders = nb_sectors / (16 * 63);
1487 if (cylinders > 16383)
1488 cylinders = 16383;
1489 else if (cylinders < 2)
1490 cylinders = 2;
1491 *pcyls = cylinders;
1492 *pheads = 16;
1493 *psecs = 63;
1494 if ((lba_detected == 1) && (translation == BIOS_ATA_TRANSLATION_AUTO)) {
1495 if ((*pcyls * *pheads) <= 131072) {
1496 bdrv_set_translation_hint(bs,
1497 BIOS_ATA_TRANSLATION_LARGE);
1498 } else {
1499 bdrv_set_translation_hint(bs,
1500 BIOS_ATA_TRANSLATION_LBA);
1504 bdrv_set_geometry_hint(bs, *pcyls, *pheads, *psecs);
1508 void bdrv_set_geometry_hint(BlockDriverState *bs,
1509 int cyls, int heads, int secs)
1511 bs->cyls = cyls;
1512 bs->heads = heads;
1513 bs->secs = secs;
1516 void bdrv_set_translation_hint(BlockDriverState *bs, int translation)
1518 bs->translation = translation;
1521 void bdrv_get_geometry_hint(BlockDriverState *bs,
1522 int *pcyls, int *pheads, int *psecs)
1524 *pcyls = bs->cyls;
1525 *pheads = bs->heads;
1526 *psecs = bs->secs;
1529 /* Recognize floppy formats */
1530 typedef struct FDFormat {
1531 FDriveType drive;
1532 uint8_t last_sect;
1533 uint8_t max_track;
1534 uint8_t max_head;
1535 } FDFormat;
1537 static const FDFormat fd_formats[] = {
1538 /* First entry is default format */
1539 /* 1.44 MB 3"1/2 floppy disks */
1540 { FDRIVE_DRV_144, 18, 80, 1, },
1541 { FDRIVE_DRV_144, 20, 80, 1, },
1542 { FDRIVE_DRV_144, 21, 80, 1, },
1543 { FDRIVE_DRV_144, 21, 82, 1, },
1544 { FDRIVE_DRV_144, 21, 83, 1, },
1545 { FDRIVE_DRV_144, 22, 80, 1, },
1546 { FDRIVE_DRV_144, 23, 80, 1, },
1547 { FDRIVE_DRV_144, 24, 80, 1, },
1548 /* 2.88 MB 3"1/2 floppy disks */
1549 { FDRIVE_DRV_288, 36, 80, 1, },
1550 { FDRIVE_DRV_288, 39, 80, 1, },
1551 { FDRIVE_DRV_288, 40, 80, 1, },
1552 { FDRIVE_DRV_288, 44, 80, 1, },
1553 { FDRIVE_DRV_288, 48, 80, 1, },
1554 /* 720 kB 3"1/2 floppy disks */
1555 { FDRIVE_DRV_144, 9, 80, 1, },
1556 { FDRIVE_DRV_144, 10, 80, 1, },
1557 { FDRIVE_DRV_144, 10, 82, 1, },
1558 { FDRIVE_DRV_144, 10, 83, 1, },
1559 { FDRIVE_DRV_144, 13, 80, 1, },
1560 { FDRIVE_DRV_144, 14, 80, 1, },
1561 /* 1.2 MB 5"1/4 floppy disks */
1562 { FDRIVE_DRV_120, 15, 80, 1, },
1563 { FDRIVE_DRV_120, 18, 80, 1, },
1564 { FDRIVE_DRV_120, 18, 82, 1, },
1565 { FDRIVE_DRV_120, 18, 83, 1, },
1566 { FDRIVE_DRV_120, 20, 80, 1, },
1567 /* 720 kB 5"1/4 floppy disks */
1568 { FDRIVE_DRV_120, 9, 80, 1, },
1569 { FDRIVE_DRV_120, 11, 80, 1, },
1570 /* 360 kB 5"1/4 floppy disks */
1571 { FDRIVE_DRV_120, 9, 40, 1, },
1572 { FDRIVE_DRV_120, 9, 40, 0, },
1573 { FDRIVE_DRV_120, 10, 41, 1, },
1574 { FDRIVE_DRV_120, 10, 42, 1, },
1575 /* 320 kB 5"1/4 floppy disks */
1576 { FDRIVE_DRV_120, 8, 40, 1, },
1577 { FDRIVE_DRV_120, 8, 40, 0, },
1578 /* 360 kB must match 5"1/4 better than 3"1/2... */
1579 { FDRIVE_DRV_144, 9, 80, 0, },
1580 /* end */
1581 { FDRIVE_DRV_NONE, -1, -1, 0, },
1584 void bdrv_get_floppy_geometry_hint(BlockDriverState *bs, int *nb_heads,
1585 int *max_track, int *last_sect,
1586 FDriveType drive_in, FDriveType *drive)
1588 const FDFormat *parse;
1589 uint64_t nb_sectors, size;
1590 int i, first_match, match;
1592 bdrv_get_geometry_hint(bs, nb_heads, max_track, last_sect);
1593 if (*nb_heads != 0 && *max_track != 0 && *last_sect != 0) {
1594 /* User defined disk */
1595 } else {
1596 bdrv_get_geometry(bs, &nb_sectors);
1597 match = -1;
1598 first_match = -1;
1599 for (i = 0; ; i++) {
1600 parse = &fd_formats[i];
1601 if (parse->drive == FDRIVE_DRV_NONE) {
1602 break;
1604 if (drive_in == parse->drive ||
1605 drive_in == FDRIVE_DRV_NONE) {
1606 size = (parse->max_head + 1) * parse->max_track *
1607 parse->last_sect;
1608 if (nb_sectors == size) {
1609 match = i;
1610 break;
1612 if (first_match == -1) {
1613 first_match = i;
1617 if (match == -1) {
1618 if (first_match == -1) {
1619 match = 1;
1620 } else {
1621 match = first_match;
1623 parse = &fd_formats[match];
1625 *nb_heads = parse->max_head + 1;
1626 *max_track = parse->max_track;
1627 *last_sect = parse->last_sect;
1628 *drive = parse->drive;
1632 int bdrv_get_translation_hint(BlockDriverState *bs)
1634 return bs->translation;
1637 void bdrv_set_on_error(BlockDriverState *bs, BlockErrorAction on_read_error,
1638 BlockErrorAction on_write_error)
1640 bs->on_read_error = on_read_error;
1641 bs->on_write_error = on_write_error;
1644 BlockErrorAction bdrv_get_on_error(BlockDriverState *bs, int is_read)
1646 return is_read ? bs->on_read_error : bs->on_write_error;
1649 int bdrv_is_read_only(BlockDriverState *bs)
1651 return bs->read_only;
1654 int bdrv_is_sg(BlockDriverState *bs)
1656 return bs->sg;
1659 int bdrv_enable_write_cache(BlockDriverState *bs)
1661 return bs->enable_write_cache;
1664 int bdrv_is_encrypted(BlockDriverState *bs)
1666 if (bs->backing_hd && bs->backing_hd->encrypted)
1667 return 1;
1668 return bs->encrypted;
1671 int bdrv_key_required(BlockDriverState *bs)
1673 BlockDriverState *backing_hd = bs->backing_hd;
1675 if (backing_hd && backing_hd->encrypted && !backing_hd->valid_key)
1676 return 1;
1677 return (bs->encrypted && !bs->valid_key);
1680 int bdrv_set_key(BlockDriverState *bs, const char *key)
1682 int ret;
1683 if (bs->backing_hd && bs->backing_hd->encrypted) {
1684 ret = bdrv_set_key(bs->backing_hd, key);
1685 if (ret < 0)
1686 return ret;
1687 if (!bs->encrypted)
1688 return 0;
1690 if (!bs->encrypted) {
1691 return -EINVAL;
1692 } else if (!bs->drv || !bs->drv->bdrv_set_key) {
1693 return -ENOMEDIUM;
1695 ret = bs->drv->bdrv_set_key(bs, key);
1696 if (ret < 0) {
1697 bs->valid_key = 0;
1698 } else if (!bs->valid_key) {
1699 bs->valid_key = 1;
1700 /* call the change callback now, we skipped it on open */
1701 bdrv_dev_change_media_cb(bs, true);
1703 return ret;
1706 void bdrv_get_format(BlockDriverState *bs, char *buf, int buf_size)
1708 if (!bs->drv) {
1709 buf[0] = '\0';
1710 } else {
1711 pstrcpy(buf, buf_size, bs->drv->format_name);
1715 void bdrv_iterate_format(void (*it)(void *opaque, const char *name),
1716 void *opaque)
1718 BlockDriver *drv;
1720 QLIST_FOREACH(drv, &bdrv_drivers, list) {
1721 it(opaque, drv->format_name);
1725 BlockDriverState *bdrv_find(const char *name)
1727 BlockDriverState *bs;
1729 QTAILQ_FOREACH(bs, &bdrv_states, list) {
1730 if (!strcmp(name, bs->device_name)) {
1731 return bs;
1734 return NULL;
1737 BlockDriverState *bdrv_next(BlockDriverState *bs)
1739 if (!bs) {
1740 return QTAILQ_FIRST(&bdrv_states);
1742 return QTAILQ_NEXT(bs, list);
1745 void bdrv_iterate(void (*it)(void *opaque, BlockDriverState *bs), void *opaque)
1747 BlockDriverState *bs;
1749 QTAILQ_FOREACH(bs, &bdrv_states, list) {
1750 it(opaque, bs);
1754 const char *bdrv_get_device_name(BlockDriverState *bs)
1756 return bs->device_name;
1759 void bdrv_flush_all(void)
1761 BlockDriverState *bs;
1763 QTAILQ_FOREACH(bs, &bdrv_states, list) {
1764 if (!bdrv_is_read_only(bs) && bdrv_is_inserted(bs)) {
1765 bdrv_flush(bs);
1770 int bdrv_has_zero_init(BlockDriverState *bs)
1772 assert(bs->drv);
1774 if (bs->drv->bdrv_has_zero_init) {
1775 return bs->drv->bdrv_has_zero_init(bs);
1778 return 1;
1782 * Returns true iff the specified sector is present in the disk image. Drivers
1783 * not implementing the functionality are assumed to not support backing files,
1784 * hence all their sectors are reported as allocated.
1786 * 'pnum' is set to the number of sectors (including and immediately following
1787 * the specified sector) that are known to be in the same
1788 * allocated/unallocated state.
1790 * 'nb_sectors' is the max value 'pnum' should be set to.
1792 int bdrv_is_allocated(BlockDriverState *bs, int64_t sector_num, int nb_sectors,
1793 int *pnum)
1795 int64_t n;
1796 if (!bs->drv->bdrv_is_allocated) {
1797 if (sector_num >= bs->total_sectors) {
1798 *pnum = 0;
1799 return 0;
1801 n = bs->total_sectors - sector_num;
1802 *pnum = (n < nb_sectors) ? (n) : (nb_sectors);
1803 return 1;
1805 return bs->drv->bdrv_is_allocated(bs, sector_num, nb_sectors, pnum);
1808 void bdrv_mon_event(const BlockDriverState *bdrv,
1809 BlockMonEventAction action, int is_read)
1811 QObject *data;
1812 const char *action_str;
1814 switch (action) {
1815 case BDRV_ACTION_REPORT:
1816 action_str = "report";
1817 break;
1818 case BDRV_ACTION_IGNORE:
1819 action_str = "ignore";
1820 break;
1821 case BDRV_ACTION_STOP:
1822 action_str = "stop";
1823 break;
1824 default:
1825 abort();
1828 data = qobject_from_jsonf("{ 'device': %s, 'action': %s, 'operation': %s }",
1829 bdrv->device_name,
1830 action_str,
1831 is_read ? "read" : "write");
1832 monitor_protocol_event(QEVENT_BLOCK_IO_ERROR, data);
1834 qobject_decref(data);
1837 BlockInfoList *qmp_query_block(Error **errp)
1839 BlockInfoList *head = NULL, *cur_item = NULL;
1840 BlockDriverState *bs;
1842 QTAILQ_FOREACH(bs, &bdrv_states, list) {
1843 BlockInfoList *info = g_malloc0(sizeof(*info));
1845 info->value = g_malloc0(sizeof(*info->value));
1846 info->value->device = g_strdup(bs->device_name);
1847 info->value->type = g_strdup("unknown");
1848 info->value->locked = bdrv_dev_is_medium_locked(bs);
1849 info->value->removable = bdrv_dev_has_removable_media(bs);
1851 if (bdrv_dev_has_removable_media(bs)) {
1852 info->value->has_tray_open = true;
1853 info->value->tray_open = bdrv_dev_is_tray_open(bs);
1856 if (bdrv_iostatus_is_enabled(bs)) {
1857 info->value->has_io_status = true;
1858 info->value->io_status = bs->iostatus;
1861 if (bs->drv) {
1862 info->value->has_inserted = true;
1863 info->value->inserted = g_malloc0(sizeof(*info->value->inserted));
1864 info->value->inserted->file = g_strdup(bs->filename);
1865 info->value->inserted->ro = bs->read_only;
1866 info->value->inserted->drv = g_strdup(bs->drv->format_name);
1867 info->value->inserted->encrypted = bs->encrypted;
1868 if (bs->backing_file[0]) {
1869 info->value->inserted->has_backing_file = true;
1870 info->value->inserted->backing_file = g_strdup(bs->backing_file);
1874 /* XXX: waiting for the qapi to support GSList */
1875 if (!cur_item) {
1876 head = cur_item = info;
1877 } else {
1878 cur_item->next = info;
1879 cur_item = info;
1883 return head;
1886 /* Consider exposing this as a full fledged QMP command */
1887 static BlockStats *qmp_query_blockstat(const BlockDriverState *bs, Error **errp)
1889 BlockStats *s;
1891 s = g_malloc0(sizeof(*s));
1893 if (bs->device_name[0]) {
1894 s->has_device = true;
1895 s->device = g_strdup(bs->device_name);
1898 s->stats = g_malloc0(sizeof(*s->stats));
1899 s->stats->rd_bytes = bs->nr_bytes[BDRV_ACCT_READ];
1900 s->stats->wr_bytes = bs->nr_bytes[BDRV_ACCT_WRITE];
1901 s->stats->rd_operations = bs->nr_ops[BDRV_ACCT_READ];
1902 s->stats->wr_operations = bs->nr_ops[BDRV_ACCT_WRITE];
1903 s->stats->wr_highest_offset = bs->wr_highest_sector * BDRV_SECTOR_SIZE;
1904 s->stats->flush_operations = bs->nr_ops[BDRV_ACCT_FLUSH];
1905 s->stats->wr_total_time_ns = bs->total_time_ns[BDRV_ACCT_WRITE];
1906 s->stats->rd_total_time_ns = bs->total_time_ns[BDRV_ACCT_READ];
1907 s->stats->flush_total_time_ns = bs->total_time_ns[BDRV_ACCT_FLUSH];
1909 if (bs->file) {
1910 s->has_parent = true;
1911 s->parent = qmp_query_blockstat(bs->file, NULL);
1914 return s;
1917 BlockStatsList *qmp_query_blockstats(Error **errp)
1919 BlockStatsList *head = NULL, *cur_item = NULL;
1920 BlockDriverState *bs;
1922 QTAILQ_FOREACH(bs, &bdrv_states, list) {
1923 BlockStatsList *info = g_malloc0(sizeof(*info));
1924 info->value = qmp_query_blockstat(bs, NULL);
1926 /* XXX: waiting for the qapi to support GSList */
1927 if (!cur_item) {
1928 head = cur_item = info;
1929 } else {
1930 cur_item->next = info;
1931 cur_item = info;
1935 return head;
1938 const char *bdrv_get_encrypted_filename(BlockDriverState *bs)
1940 if (bs->backing_hd && bs->backing_hd->encrypted)
1941 return bs->backing_file;
1942 else if (bs->encrypted)
1943 return bs->filename;
1944 else
1945 return NULL;
1948 void bdrv_get_backing_filename(BlockDriverState *bs,
1949 char *filename, int filename_size)
1951 pstrcpy(filename, filename_size, bs->backing_file);
1954 int bdrv_write_compressed(BlockDriverState *bs, int64_t sector_num,
1955 const uint8_t *buf, int nb_sectors)
1957 BlockDriver *drv = bs->drv;
1958 if (!drv)
1959 return -ENOMEDIUM;
1960 if (!drv->bdrv_write_compressed)
1961 return -ENOTSUP;
1962 if (bdrv_check_request(bs, sector_num, nb_sectors))
1963 return -EIO;
1965 if (bs->dirty_bitmap) {
1966 set_dirty_bitmap(bs, sector_num, nb_sectors, 1);
1969 return drv->bdrv_write_compressed(bs, sector_num, buf, nb_sectors);
1972 int bdrv_get_info(BlockDriverState *bs, BlockDriverInfo *bdi)
1974 BlockDriver *drv = bs->drv;
1975 if (!drv)
1976 return -ENOMEDIUM;
1977 if (!drv->bdrv_get_info)
1978 return -ENOTSUP;
1979 memset(bdi, 0, sizeof(*bdi));
1980 return drv->bdrv_get_info(bs, bdi);
1983 int bdrv_save_vmstate(BlockDriverState *bs, const uint8_t *buf,
1984 int64_t pos, int size)
1986 BlockDriver *drv = bs->drv;
1987 if (!drv)
1988 return -ENOMEDIUM;
1989 if (drv->bdrv_save_vmstate)
1990 return drv->bdrv_save_vmstate(bs, buf, pos, size);
1991 if (bs->file)
1992 return bdrv_save_vmstate(bs->file, buf, pos, size);
1993 return -ENOTSUP;
1996 int bdrv_load_vmstate(BlockDriverState *bs, uint8_t *buf,
1997 int64_t pos, int size)
1999 BlockDriver *drv = bs->drv;
2000 if (!drv)
2001 return -ENOMEDIUM;
2002 if (drv->bdrv_load_vmstate)
2003 return drv->bdrv_load_vmstate(bs, buf, pos, size);
2004 if (bs->file)
2005 return bdrv_load_vmstate(bs->file, buf, pos, size);
2006 return -ENOTSUP;
2009 void bdrv_debug_event(BlockDriverState *bs, BlkDebugEvent event)
2011 BlockDriver *drv = bs->drv;
2013 if (!drv || !drv->bdrv_debug_event) {
2014 return;
2017 return drv->bdrv_debug_event(bs, event);
2021 /**************************************************************/
2022 /* handling of snapshots */
2024 int bdrv_can_snapshot(BlockDriverState *bs)
2026 BlockDriver *drv = bs->drv;
2027 if (!drv || !bdrv_is_inserted(bs) || bdrv_is_read_only(bs)) {
2028 return 0;
2031 if (!drv->bdrv_snapshot_create) {
2032 if (bs->file != NULL) {
2033 return bdrv_can_snapshot(bs->file);
2035 return 0;
2038 return 1;
2041 int bdrv_is_snapshot(BlockDriverState *bs)
2043 return !!(bs->open_flags & BDRV_O_SNAPSHOT);
2046 BlockDriverState *bdrv_snapshots(void)
2048 BlockDriverState *bs;
2050 if (bs_snapshots) {
2051 return bs_snapshots;
2054 bs = NULL;
2055 while ((bs = bdrv_next(bs))) {
2056 if (bdrv_can_snapshot(bs)) {
2057 bs_snapshots = bs;
2058 return bs;
2061 return NULL;
2064 int bdrv_snapshot_create(BlockDriverState *bs,
2065 QEMUSnapshotInfo *sn_info)
2067 BlockDriver *drv = bs->drv;
2068 if (!drv)
2069 return -ENOMEDIUM;
2070 if (drv->bdrv_snapshot_create)
2071 return drv->bdrv_snapshot_create(bs, sn_info);
2072 if (bs->file)
2073 return bdrv_snapshot_create(bs->file, sn_info);
2074 return -ENOTSUP;
2077 int bdrv_snapshot_goto(BlockDriverState *bs,
2078 const char *snapshot_id)
2080 BlockDriver *drv = bs->drv;
2081 int ret, open_ret;
2083 if (!drv)
2084 return -ENOMEDIUM;
2085 if (drv->bdrv_snapshot_goto)
2086 return drv->bdrv_snapshot_goto(bs, snapshot_id);
2088 if (bs->file) {
2089 drv->bdrv_close(bs);
2090 ret = bdrv_snapshot_goto(bs->file, snapshot_id);
2091 open_ret = drv->bdrv_open(bs, bs->open_flags);
2092 if (open_ret < 0) {
2093 bdrv_delete(bs->file);
2094 bs->drv = NULL;
2095 return open_ret;
2097 return ret;
2100 return -ENOTSUP;
2103 int bdrv_snapshot_delete(BlockDriverState *bs, const char *snapshot_id)
2105 BlockDriver *drv = bs->drv;
2106 if (!drv)
2107 return -ENOMEDIUM;
2108 if (drv->bdrv_snapshot_delete)
2109 return drv->bdrv_snapshot_delete(bs, snapshot_id);
2110 if (bs->file)
2111 return bdrv_snapshot_delete(bs->file, snapshot_id);
2112 return -ENOTSUP;
2115 int bdrv_snapshot_list(BlockDriverState *bs,
2116 QEMUSnapshotInfo **psn_info)
2118 BlockDriver *drv = bs->drv;
2119 if (!drv)
2120 return -ENOMEDIUM;
2121 if (drv->bdrv_snapshot_list)
2122 return drv->bdrv_snapshot_list(bs, psn_info);
2123 if (bs->file)
2124 return bdrv_snapshot_list(bs->file, psn_info);
2125 return -ENOTSUP;
2128 int bdrv_snapshot_load_tmp(BlockDriverState *bs,
2129 const char *snapshot_name)
2131 BlockDriver *drv = bs->drv;
2132 if (!drv) {
2133 return -ENOMEDIUM;
2135 if (!bs->read_only) {
2136 return -EINVAL;
2138 if (drv->bdrv_snapshot_load_tmp) {
2139 return drv->bdrv_snapshot_load_tmp(bs, snapshot_name);
2141 return -ENOTSUP;
2144 #define NB_SUFFIXES 4
2146 char *get_human_readable_size(char *buf, int buf_size, int64_t size)
2148 static const char suffixes[NB_SUFFIXES] = "KMGT";
2149 int64_t base;
2150 int i;
2152 if (size <= 999) {
2153 snprintf(buf, buf_size, "%" PRId64, size);
2154 } else {
2155 base = 1024;
2156 for(i = 0; i < NB_SUFFIXES; i++) {
2157 if (size < (10 * base)) {
2158 snprintf(buf, buf_size, "%0.1f%c",
2159 (double)size / base,
2160 suffixes[i]);
2161 break;
2162 } else if (size < (1000 * base) || i == (NB_SUFFIXES - 1)) {
2163 snprintf(buf, buf_size, "%" PRId64 "%c",
2164 ((size + (base >> 1)) / base),
2165 suffixes[i]);
2166 break;
2168 base = base * 1024;
2171 return buf;
2174 char *bdrv_snapshot_dump(char *buf, int buf_size, QEMUSnapshotInfo *sn)
2176 char buf1[128], date_buf[128], clock_buf[128];
2177 #ifdef _WIN32
2178 struct tm *ptm;
2179 #else
2180 struct tm tm;
2181 #endif
2182 time_t ti;
2183 int64_t secs;
2185 if (!sn) {
2186 snprintf(buf, buf_size,
2187 "%-10s%-20s%7s%20s%15s",
2188 "ID", "TAG", "VM SIZE", "DATE", "VM CLOCK");
2189 } else {
2190 ti = sn->date_sec;
2191 #ifdef _WIN32
2192 ptm = localtime(&ti);
2193 strftime(date_buf, sizeof(date_buf),
2194 "%Y-%m-%d %H:%M:%S", ptm);
2195 #else
2196 localtime_r(&ti, &tm);
2197 strftime(date_buf, sizeof(date_buf),
2198 "%Y-%m-%d %H:%M:%S", &tm);
2199 #endif
2200 secs = sn->vm_clock_nsec / 1000000000;
2201 snprintf(clock_buf, sizeof(clock_buf),
2202 "%02d:%02d:%02d.%03d",
2203 (int)(secs / 3600),
2204 (int)((secs / 60) % 60),
2205 (int)(secs % 60),
2206 (int)((sn->vm_clock_nsec / 1000000) % 1000));
2207 snprintf(buf, buf_size,
2208 "%-10s%-20s%7s%20s%15s",
2209 sn->id_str, sn->name,
2210 get_human_readable_size(buf1, sizeof(buf1), sn->vm_state_size),
2211 date_buf,
2212 clock_buf);
2214 return buf;
2217 /**************************************************************/
2218 /* async I/Os */
2220 BlockDriverAIOCB *bdrv_aio_readv(BlockDriverState *bs, int64_t sector_num,
2221 QEMUIOVector *qiov, int nb_sectors,
2222 BlockDriverCompletionFunc *cb, void *opaque)
2224 trace_bdrv_aio_readv(bs, sector_num, nb_sectors, opaque);
2226 return bdrv_co_aio_rw_vector(bs, sector_num, qiov, nb_sectors,
2227 cb, opaque, false);
2230 BlockDriverAIOCB *bdrv_aio_writev(BlockDriverState *bs, int64_t sector_num,
2231 QEMUIOVector *qiov, int nb_sectors,
2232 BlockDriverCompletionFunc *cb, void *opaque)
2234 trace_bdrv_aio_writev(bs, sector_num, nb_sectors, opaque);
2236 return bdrv_co_aio_rw_vector(bs, sector_num, qiov, nb_sectors,
2237 cb, opaque, true);
2241 typedef struct MultiwriteCB {
2242 int error;
2243 int num_requests;
2244 int num_callbacks;
2245 struct {
2246 BlockDriverCompletionFunc *cb;
2247 void *opaque;
2248 QEMUIOVector *free_qiov;
2249 void *free_buf;
2250 } callbacks[];
2251 } MultiwriteCB;
2253 static void multiwrite_user_cb(MultiwriteCB *mcb)
2255 int i;
2257 for (i = 0; i < mcb->num_callbacks; i++) {
2258 mcb->callbacks[i].cb(mcb->callbacks[i].opaque, mcb->error);
2259 if (mcb->callbacks[i].free_qiov) {
2260 qemu_iovec_destroy(mcb->callbacks[i].free_qiov);
2262 g_free(mcb->callbacks[i].free_qiov);
2263 qemu_vfree(mcb->callbacks[i].free_buf);
2267 static void multiwrite_cb(void *opaque, int ret)
2269 MultiwriteCB *mcb = opaque;
2271 trace_multiwrite_cb(mcb, ret);
2273 if (ret < 0 && !mcb->error) {
2274 mcb->error = ret;
2277 mcb->num_requests--;
2278 if (mcb->num_requests == 0) {
2279 multiwrite_user_cb(mcb);
2280 g_free(mcb);
2284 static int multiwrite_req_compare(const void *a, const void *b)
2286 const BlockRequest *req1 = a, *req2 = b;
2289 * Note that we can't simply subtract req2->sector from req1->sector
2290 * here as that could overflow the return value.
2292 if (req1->sector > req2->sector) {
2293 return 1;
2294 } else if (req1->sector < req2->sector) {
2295 return -1;
2296 } else {
2297 return 0;
2302 * Takes a bunch of requests and tries to merge them. Returns the number of
2303 * requests that remain after merging.
2305 static int multiwrite_merge(BlockDriverState *bs, BlockRequest *reqs,
2306 int num_reqs, MultiwriteCB *mcb)
2308 int i, outidx;
2310 // Sort requests by start sector
2311 qsort(reqs, num_reqs, sizeof(*reqs), &multiwrite_req_compare);
2313 // Check if adjacent requests touch the same clusters. If so, combine them,
2314 // filling up gaps with zero sectors.
2315 outidx = 0;
2316 for (i = 1; i < num_reqs; i++) {
2317 int merge = 0;
2318 int64_t oldreq_last = reqs[outidx].sector + reqs[outidx].nb_sectors;
2320 // This handles the cases that are valid for all block drivers, namely
2321 // exactly sequential writes and overlapping writes.
2322 if (reqs[i].sector <= oldreq_last) {
2323 merge = 1;
2326 // The block driver may decide that it makes sense to combine requests
2327 // even if there is a gap of some sectors between them. In this case,
2328 // the gap is filled with zeros (therefore only applicable for yet
2329 // unused space in format like qcow2).
2330 if (!merge && bs->drv->bdrv_merge_requests) {
2331 merge = bs->drv->bdrv_merge_requests(bs, &reqs[outidx], &reqs[i]);
2334 if (reqs[outidx].qiov->niov + reqs[i].qiov->niov + 1 > IOV_MAX) {
2335 merge = 0;
2338 if (merge) {
2339 size_t size;
2340 QEMUIOVector *qiov = g_malloc0(sizeof(*qiov));
2341 qemu_iovec_init(qiov,
2342 reqs[outidx].qiov->niov + reqs[i].qiov->niov + 1);
2344 // Add the first request to the merged one. If the requests are
2345 // overlapping, drop the last sectors of the first request.
2346 size = (reqs[i].sector - reqs[outidx].sector) << 9;
2347 qemu_iovec_concat(qiov, reqs[outidx].qiov, size);
2349 // We might need to add some zeros between the two requests
2350 if (reqs[i].sector > oldreq_last) {
2351 size_t zero_bytes = (reqs[i].sector - oldreq_last) << 9;
2352 uint8_t *buf = qemu_blockalign(bs, zero_bytes);
2353 memset(buf, 0, zero_bytes);
2354 qemu_iovec_add(qiov, buf, zero_bytes);
2355 mcb->callbacks[i].free_buf = buf;
2358 // Add the second request
2359 qemu_iovec_concat(qiov, reqs[i].qiov, reqs[i].qiov->size);
2361 reqs[outidx].nb_sectors = qiov->size >> 9;
2362 reqs[outidx].qiov = qiov;
2364 mcb->callbacks[i].free_qiov = reqs[outidx].qiov;
2365 } else {
2366 outidx++;
2367 reqs[outidx].sector = reqs[i].sector;
2368 reqs[outidx].nb_sectors = reqs[i].nb_sectors;
2369 reqs[outidx].qiov = reqs[i].qiov;
2373 return outidx + 1;
2377 * Submit multiple AIO write requests at once.
2379 * On success, the function returns 0 and all requests in the reqs array have
2380 * been submitted. In error case this function returns -1, and any of the
2381 * requests may or may not be submitted yet. In particular, this means that the
2382 * callback will be called for some of the requests, for others it won't. The
2383 * caller must check the error field of the BlockRequest to wait for the right
2384 * callbacks (if error != 0, no callback will be called).
2386 * The implementation may modify the contents of the reqs array, e.g. to merge
2387 * requests. However, the fields opaque and error are left unmodified as they
2388 * are used to signal failure for a single request to the caller.
2390 int bdrv_aio_multiwrite(BlockDriverState *bs, BlockRequest *reqs, int num_reqs)
2392 BlockDriverAIOCB *acb;
2393 MultiwriteCB *mcb;
2394 int i;
2396 /* don't submit writes if we don't have a medium */
2397 if (bs->drv == NULL) {
2398 for (i = 0; i < num_reqs; i++) {
2399 reqs[i].error = -ENOMEDIUM;
2401 return -1;
2404 if (num_reqs == 0) {
2405 return 0;
2408 // Create MultiwriteCB structure
2409 mcb = g_malloc0(sizeof(*mcb) + num_reqs * sizeof(*mcb->callbacks));
2410 mcb->num_requests = 0;
2411 mcb->num_callbacks = num_reqs;
2413 for (i = 0; i < num_reqs; i++) {
2414 mcb->callbacks[i].cb = reqs[i].cb;
2415 mcb->callbacks[i].opaque = reqs[i].opaque;
2418 // Check for mergable requests
2419 num_reqs = multiwrite_merge(bs, reqs, num_reqs, mcb);
2421 trace_bdrv_aio_multiwrite(mcb, mcb->num_callbacks, num_reqs);
2424 * Run the aio requests. As soon as one request can't be submitted
2425 * successfully, fail all requests that are not yet submitted (we must
2426 * return failure for all requests anyway)
2428 * num_requests cannot be set to the right value immediately: If
2429 * bdrv_aio_writev fails for some request, num_requests would be too high
2430 * and therefore multiwrite_cb() would never recognize the multiwrite
2431 * request as completed. We also cannot use the loop variable i to set it
2432 * when the first request fails because the callback may already have been
2433 * called for previously submitted requests. Thus, num_requests must be
2434 * incremented for each request that is submitted.
2436 * The problem that callbacks may be called early also means that we need
2437 * to take care that num_requests doesn't become 0 before all requests are
2438 * submitted - multiwrite_cb() would consider the multiwrite request
2439 * completed. A dummy request that is "completed" by a manual call to
2440 * multiwrite_cb() takes care of this.
2442 mcb->num_requests = 1;
2444 // Run the aio requests
2445 for (i = 0; i < num_reqs; i++) {
2446 mcb->num_requests++;
2447 acb = bdrv_aio_writev(bs, reqs[i].sector, reqs[i].qiov,
2448 reqs[i].nb_sectors, multiwrite_cb, mcb);
2450 if (acb == NULL) {
2451 // We can only fail the whole thing if no request has been
2452 // submitted yet. Otherwise we'll wait for the submitted AIOs to
2453 // complete and report the error in the callback.
2454 if (i == 0) {
2455 trace_bdrv_aio_multiwrite_earlyfail(mcb);
2456 goto fail;
2457 } else {
2458 trace_bdrv_aio_multiwrite_latefail(mcb, i);
2459 multiwrite_cb(mcb, -EIO);
2460 break;
2465 /* Complete the dummy request */
2466 multiwrite_cb(mcb, 0);
2468 return 0;
2470 fail:
2471 for (i = 0; i < mcb->num_callbacks; i++) {
2472 reqs[i].error = -EIO;
2474 g_free(mcb);
2475 return -1;
2478 void bdrv_aio_cancel(BlockDriverAIOCB *acb)
2480 acb->pool->cancel(acb);
2484 /**************************************************************/
2485 /* async block device emulation */
2487 typedef struct BlockDriverAIOCBSync {
2488 BlockDriverAIOCB common;
2489 QEMUBH *bh;
2490 int ret;
2491 /* vector translation state */
2492 QEMUIOVector *qiov;
2493 uint8_t *bounce;
2494 int is_write;
2495 } BlockDriverAIOCBSync;
2497 static void bdrv_aio_cancel_em(BlockDriverAIOCB *blockacb)
2499 BlockDriverAIOCBSync *acb =
2500 container_of(blockacb, BlockDriverAIOCBSync, common);
2501 qemu_bh_delete(acb->bh);
2502 acb->bh = NULL;
2503 qemu_aio_release(acb);
2506 static AIOPool bdrv_em_aio_pool = {
2507 .aiocb_size = sizeof(BlockDriverAIOCBSync),
2508 .cancel = bdrv_aio_cancel_em,
2511 static void bdrv_aio_bh_cb(void *opaque)
2513 BlockDriverAIOCBSync *acb = opaque;
2515 if (!acb->is_write)
2516 qemu_iovec_from_buffer(acb->qiov, acb->bounce, acb->qiov->size);
2517 qemu_vfree(acb->bounce);
2518 acb->common.cb(acb->common.opaque, acb->ret);
2519 qemu_bh_delete(acb->bh);
2520 acb->bh = NULL;
2521 qemu_aio_release(acb);
2524 static BlockDriverAIOCB *bdrv_aio_rw_vector(BlockDriverState *bs,
2525 int64_t sector_num,
2526 QEMUIOVector *qiov,
2527 int nb_sectors,
2528 BlockDriverCompletionFunc *cb,
2529 void *opaque,
2530 int is_write)
2533 BlockDriverAIOCBSync *acb;
2535 acb = qemu_aio_get(&bdrv_em_aio_pool, bs, cb, opaque);
2536 acb->is_write = is_write;
2537 acb->qiov = qiov;
2538 acb->bounce = qemu_blockalign(bs, qiov->size);
2540 if (!acb->bh)
2541 acb->bh = qemu_bh_new(bdrv_aio_bh_cb, acb);
2543 if (is_write) {
2544 qemu_iovec_to_buffer(acb->qiov, acb->bounce);
2545 acb->ret = bs->drv->bdrv_write(bs, sector_num, acb->bounce, nb_sectors);
2546 } else {
2547 acb->ret = bs->drv->bdrv_read(bs, sector_num, acb->bounce, nb_sectors);
2550 qemu_bh_schedule(acb->bh);
2552 return &acb->common;
2555 static BlockDriverAIOCB *bdrv_aio_readv_em(BlockDriverState *bs,
2556 int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
2557 BlockDriverCompletionFunc *cb, void *opaque)
2559 return bdrv_aio_rw_vector(bs, sector_num, qiov, nb_sectors, cb, opaque, 0);
2562 static BlockDriverAIOCB *bdrv_aio_writev_em(BlockDriverState *bs,
2563 int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
2564 BlockDriverCompletionFunc *cb, void *opaque)
2566 return bdrv_aio_rw_vector(bs, sector_num, qiov, nb_sectors, cb, opaque, 1);
2570 typedef struct BlockDriverAIOCBCoroutine {
2571 BlockDriverAIOCB common;
2572 BlockRequest req;
2573 bool is_write;
2574 QEMUBH* bh;
2575 } BlockDriverAIOCBCoroutine;
2577 static void bdrv_aio_co_cancel_em(BlockDriverAIOCB *blockacb)
2579 qemu_aio_flush();
2582 static AIOPool bdrv_em_co_aio_pool = {
2583 .aiocb_size = sizeof(BlockDriverAIOCBCoroutine),
2584 .cancel = bdrv_aio_co_cancel_em,
2587 static void bdrv_co_em_bh(void *opaque)
2589 BlockDriverAIOCBCoroutine *acb = opaque;
2591 acb->common.cb(acb->common.opaque, acb->req.error);
2592 qemu_bh_delete(acb->bh);
2593 qemu_aio_release(acb);
2596 /* Invoke bdrv_co_do_readv/bdrv_co_do_writev */
2597 static void coroutine_fn bdrv_co_do_rw(void *opaque)
2599 BlockDriverAIOCBCoroutine *acb = opaque;
2600 BlockDriverState *bs = acb->common.bs;
2602 if (!acb->is_write) {
2603 acb->req.error = bdrv_co_do_readv(bs, acb->req.sector,
2604 acb->req.nb_sectors, acb->req.qiov);
2605 } else {
2606 acb->req.error = bdrv_co_do_writev(bs, acb->req.sector,
2607 acb->req.nb_sectors, acb->req.qiov);
2610 acb->bh = qemu_bh_new(bdrv_co_em_bh, acb);
2611 qemu_bh_schedule(acb->bh);
2614 static BlockDriverAIOCB *bdrv_co_aio_rw_vector(BlockDriverState *bs,
2615 int64_t sector_num,
2616 QEMUIOVector *qiov,
2617 int nb_sectors,
2618 BlockDriverCompletionFunc *cb,
2619 void *opaque,
2620 bool is_write)
2622 Coroutine *co;
2623 BlockDriverAIOCBCoroutine *acb;
2625 acb = qemu_aio_get(&bdrv_em_co_aio_pool, bs, cb, opaque);
2626 acb->req.sector = sector_num;
2627 acb->req.nb_sectors = nb_sectors;
2628 acb->req.qiov = qiov;
2629 acb->is_write = is_write;
2631 co = qemu_coroutine_create(bdrv_co_do_rw);
2632 qemu_coroutine_enter(co, acb);
2634 return &acb->common;
2637 static void coroutine_fn bdrv_aio_flush_co_entry(void *opaque)
2639 BlockDriverAIOCBCoroutine *acb = opaque;
2640 BlockDriverState *bs = acb->common.bs;
2642 acb->req.error = bdrv_co_flush(bs);
2643 acb->bh = qemu_bh_new(bdrv_co_em_bh, acb);
2644 qemu_bh_schedule(acb->bh);
2647 BlockDriverAIOCB *bdrv_aio_flush(BlockDriverState *bs,
2648 BlockDriverCompletionFunc *cb, void *opaque)
2650 trace_bdrv_aio_flush(bs, opaque);
2652 Coroutine *co;
2653 BlockDriverAIOCBCoroutine *acb;
2655 acb = qemu_aio_get(&bdrv_em_co_aio_pool, bs, cb, opaque);
2656 co = qemu_coroutine_create(bdrv_aio_flush_co_entry);
2657 qemu_coroutine_enter(co, acb);
2659 return &acb->common;
2662 static void coroutine_fn bdrv_aio_discard_co_entry(void *opaque)
2664 BlockDriverAIOCBCoroutine *acb = opaque;
2665 BlockDriverState *bs = acb->common.bs;
2667 acb->req.error = bdrv_co_discard(bs, acb->req.sector, acb->req.nb_sectors);
2668 acb->bh = qemu_bh_new(bdrv_co_em_bh, acb);
2669 qemu_bh_schedule(acb->bh);
2672 BlockDriverAIOCB *bdrv_aio_discard(BlockDriverState *bs,
2673 int64_t sector_num, int nb_sectors,
2674 BlockDriverCompletionFunc *cb, void *opaque)
2676 Coroutine *co;
2677 BlockDriverAIOCBCoroutine *acb;
2679 trace_bdrv_aio_discard(bs, sector_num, nb_sectors, opaque);
2681 acb = qemu_aio_get(&bdrv_em_co_aio_pool, bs, cb, opaque);
2682 acb->req.sector = sector_num;
2683 acb->req.nb_sectors = nb_sectors;
2684 co = qemu_coroutine_create(bdrv_aio_discard_co_entry);
2685 qemu_coroutine_enter(co, acb);
2687 return &acb->common;
2690 void bdrv_init(void)
2692 module_call_init(MODULE_INIT_BLOCK);
2695 void bdrv_init_with_whitelist(void)
2697 use_bdrv_whitelist = 1;
2698 bdrv_init();
2701 void *qemu_aio_get(AIOPool *pool, BlockDriverState *bs,
2702 BlockDriverCompletionFunc *cb, void *opaque)
2704 BlockDriverAIOCB *acb;
2706 if (pool->free_aiocb) {
2707 acb = pool->free_aiocb;
2708 pool->free_aiocb = acb->next;
2709 } else {
2710 acb = g_malloc0(pool->aiocb_size);
2711 acb->pool = pool;
2713 acb->bs = bs;
2714 acb->cb = cb;
2715 acb->opaque = opaque;
2716 return acb;
2719 void qemu_aio_release(void *p)
2721 BlockDriverAIOCB *acb = (BlockDriverAIOCB *)p;
2722 AIOPool *pool = acb->pool;
2723 acb->next = pool->free_aiocb;
2724 pool->free_aiocb = acb;
2727 /**************************************************************/
2728 /* Coroutine block device emulation */
2730 typedef struct CoroutineIOCompletion {
2731 Coroutine *coroutine;
2732 int ret;
2733 } CoroutineIOCompletion;
2735 static void bdrv_co_io_em_complete(void *opaque, int ret)
2737 CoroutineIOCompletion *co = opaque;
2739 co->ret = ret;
2740 qemu_coroutine_enter(co->coroutine, NULL);
2743 static int coroutine_fn bdrv_co_io_em(BlockDriverState *bs, int64_t sector_num,
2744 int nb_sectors, QEMUIOVector *iov,
2745 bool is_write)
2747 CoroutineIOCompletion co = {
2748 .coroutine = qemu_coroutine_self(),
2750 BlockDriverAIOCB *acb;
2752 if (is_write) {
2753 acb = bs->drv->bdrv_aio_writev(bs, sector_num, iov, nb_sectors,
2754 bdrv_co_io_em_complete, &co);
2755 } else {
2756 acb = bs->drv->bdrv_aio_readv(bs, sector_num, iov, nb_sectors,
2757 bdrv_co_io_em_complete, &co);
2760 trace_bdrv_co_io_em(bs, sector_num, nb_sectors, is_write, acb);
2761 if (!acb) {
2762 return -EIO;
2764 qemu_coroutine_yield();
2766 return co.ret;
2769 static int coroutine_fn bdrv_co_readv_em(BlockDriverState *bs,
2770 int64_t sector_num, int nb_sectors,
2771 QEMUIOVector *iov)
2773 return bdrv_co_io_em(bs, sector_num, nb_sectors, iov, false);
2776 static int coroutine_fn bdrv_co_writev_em(BlockDriverState *bs,
2777 int64_t sector_num, int nb_sectors,
2778 QEMUIOVector *iov)
2780 return bdrv_co_io_em(bs, sector_num, nb_sectors, iov, true);
2783 static void coroutine_fn bdrv_flush_co_entry(void *opaque)
2785 RwCo *rwco = opaque;
2787 rwco->ret = bdrv_co_flush(rwco->bs);
2790 int coroutine_fn bdrv_co_flush(BlockDriverState *bs)
2792 int ret;
2794 if (!bs->drv) {
2795 return 0;
2798 /* Write back cached data to the OS even with cache=unsafe */
2799 if (bs->drv->bdrv_co_flush_to_os) {
2800 ret = bs->drv->bdrv_co_flush_to_os(bs);
2801 if (ret < 0) {
2802 return ret;
2806 /* But don't actually force it to the disk with cache=unsafe */
2807 if (bs->open_flags & BDRV_O_NO_FLUSH) {
2808 return 0;
2811 if (bs->drv->bdrv_co_flush_to_disk) {
2812 return bs->drv->bdrv_co_flush_to_disk(bs);
2813 } else if (bs->drv->bdrv_aio_flush) {
2814 BlockDriverAIOCB *acb;
2815 CoroutineIOCompletion co = {
2816 .coroutine = qemu_coroutine_self(),
2819 acb = bs->drv->bdrv_aio_flush(bs, bdrv_co_io_em_complete, &co);
2820 if (acb == NULL) {
2821 return -EIO;
2822 } else {
2823 qemu_coroutine_yield();
2824 return co.ret;
2826 } else {
2828 * Some block drivers always operate in either writethrough or unsafe
2829 * mode and don't support bdrv_flush therefore. Usually qemu doesn't
2830 * know how the server works (because the behaviour is hardcoded or
2831 * depends on server-side configuration), so we can't ensure that
2832 * everything is safe on disk. Returning an error doesn't work because
2833 * that would break guests even if the server operates in writethrough
2834 * mode.
2836 * Let's hope the user knows what he's doing.
2838 return 0;
2842 void bdrv_invalidate_cache(BlockDriverState *bs)
2844 if (bs->drv && bs->drv->bdrv_invalidate_cache) {
2845 bs->drv->bdrv_invalidate_cache(bs);
2849 void bdrv_invalidate_cache_all(void)
2851 BlockDriverState *bs;
2853 QTAILQ_FOREACH(bs, &bdrv_states, list) {
2854 bdrv_invalidate_cache(bs);
2858 int bdrv_flush(BlockDriverState *bs)
2860 Coroutine *co;
2861 RwCo rwco = {
2862 .bs = bs,
2863 .ret = NOT_DONE,
2866 if (qemu_in_coroutine()) {
2867 /* Fast-path if already in coroutine context */
2868 bdrv_flush_co_entry(&rwco);
2869 } else {
2870 co = qemu_coroutine_create(bdrv_flush_co_entry);
2871 qemu_coroutine_enter(co, &rwco);
2872 while (rwco.ret == NOT_DONE) {
2873 qemu_aio_wait();
2877 return rwco.ret;
2880 static void coroutine_fn bdrv_discard_co_entry(void *opaque)
2882 RwCo *rwco = opaque;
2884 rwco->ret = bdrv_co_discard(rwco->bs, rwco->sector_num, rwco->nb_sectors);
2887 int coroutine_fn bdrv_co_discard(BlockDriverState *bs, int64_t sector_num,
2888 int nb_sectors)
2890 if (!bs->drv) {
2891 return -ENOMEDIUM;
2892 } else if (bdrv_check_request(bs, sector_num, nb_sectors)) {
2893 return -EIO;
2894 } else if (bs->read_only) {
2895 return -EROFS;
2896 } else if (bs->drv->bdrv_co_discard) {
2897 return bs->drv->bdrv_co_discard(bs, sector_num, nb_sectors);
2898 } else if (bs->drv->bdrv_aio_discard) {
2899 BlockDriverAIOCB *acb;
2900 CoroutineIOCompletion co = {
2901 .coroutine = qemu_coroutine_self(),
2904 acb = bs->drv->bdrv_aio_discard(bs, sector_num, nb_sectors,
2905 bdrv_co_io_em_complete, &co);
2906 if (acb == NULL) {
2907 return -EIO;
2908 } else {
2909 qemu_coroutine_yield();
2910 return co.ret;
2912 } else {
2913 return 0;
2917 int bdrv_discard(BlockDriverState *bs, int64_t sector_num, int nb_sectors)
2919 Coroutine *co;
2920 RwCo rwco = {
2921 .bs = bs,
2922 .sector_num = sector_num,
2923 .nb_sectors = nb_sectors,
2924 .ret = NOT_DONE,
2927 if (qemu_in_coroutine()) {
2928 /* Fast-path if already in coroutine context */
2929 bdrv_discard_co_entry(&rwco);
2930 } else {
2931 co = qemu_coroutine_create(bdrv_discard_co_entry);
2932 qemu_coroutine_enter(co, &rwco);
2933 while (rwco.ret == NOT_DONE) {
2934 qemu_aio_wait();
2938 return rwco.ret;
2941 /**************************************************************/
2942 /* removable device support */
2945 * Return TRUE if the media is present
2947 int bdrv_is_inserted(BlockDriverState *bs)
2949 BlockDriver *drv = bs->drv;
2951 if (!drv)
2952 return 0;
2953 if (!drv->bdrv_is_inserted)
2954 return 1;
2955 return drv->bdrv_is_inserted(bs);
2959 * Return whether the media changed since the last call to this
2960 * function, or -ENOTSUP if we don't know. Most drivers don't know.
2962 int bdrv_media_changed(BlockDriverState *bs)
2964 BlockDriver *drv = bs->drv;
2966 if (drv && drv->bdrv_media_changed) {
2967 return drv->bdrv_media_changed(bs);
2969 return -ENOTSUP;
2973 * If eject_flag is TRUE, eject the media. Otherwise, close the tray
2975 void bdrv_eject(BlockDriverState *bs, int eject_flag)
2977 BlockDriver *drv = bs->drv;
2979 if (drv && drv->bdrv_eject) {
2980 drv->bdrv_eject(bs, eject_flag);
2985 * Lock or unlock the media (if it is locked, the user won't be able
2986 * to eject it manually).
2988 void bdrv_lock_medium(BlockDriverState *bs, bool locked)
2990 BlockDriver *drv = bs->drv;
2992 trace_bdrv_lock_medium(bs, locked);
2994 if (drv && drv->bdrv_lock_medium) {
2995 drv->bdrv_lock_medium(bs, locked);
2999 /* needed for generic scsi interface */
3001 int bdrv_ioctl(BlockDriverState *bs, unsigned long int req, void *buf)
3003 BlockDriver *drv = bs->drv;
3005 if (drv && drv->bdrv_ioctl)
3006 return drv->bdrv_ioctl(bs, req, buf);
3007 return -ENOTSUP;
3010 BlockDriverAIOCB *bdrv_aio_ioctl(BlockDriverState *bs,
3011 unsigned long int req, void *buf,
3012 BlockDriverCompletionFunc *cb, void *opaque)
3014 BlockDriver *drv = bs->drv;
3016 if (drv && drv->bdrv_aio_ioctl)
3017 return drv->bdrv_aio_ioctl(bs, req, buf, cb, opaque);
3018 return NULL;
3021 void bdrv_set_buffer_alignment(BlockDriverState *bs, int align)
3023 bs->buffer_alignment = align;
3026 void *qemu_blockalign(BlockDriverState *bs, size_t size)
3028 return qemu_memalign((bs && bs->buffer_alignment) ? bs->buffer_alignment : 512, size);
3031 void bdrv_set_dirty_tracking(BlockDriverState *bs, int enable)
3033 int64_t bitmap_size;
3035 bs->dirty_count = 0;
3036 if (enable) {
3037 if (!bs->dirty_bitmap) {
3038 bitmap_size = (bdrv_getlength(bs) >> BDRV_SECTOR_BITS) +
3039 BDRV_SECTORS_PER_DIRTY_CHUNK * 8 - 1;
3040 bitmap_size /= BDRV_SECTORS_PER_DIRTY_CHUNK * 8;
3042 bs->dirty_bitmap = g_malloc0(bitmap_size);
3044 } else {
3045 if (bs->dirty_bitmap) {
3046 g_free(bs->dirty_bitmap);
3047 bs->dirty_bitmap = NULL;
3052 int bdrv_get_dirty(BlockDriverState *bs, int64_t sector)
3054 int64_t chunk = sector / (int64_t)BDRV_SECTORS_PER_DIRTY_CHUNK;
3056 if (bs->dirty_bitmap &&
3057 (sector << BDRV_SECTOR_BITS) < bdrv_getlength(bs)) {
3058 return !!(bs->dirty_bitmap[chunk / (sizeof(unsigned long) * 8)] &
3059 (1UL << (chunk % (sizeof(unsigned long) * 8))));
3060 } else {
3061 return 0;
3065 void bdrv_reset_dirty(BlockDriverState *bs, int64_t cur_sector,
3066 int nr_sectors)
3068 set_dirty_bitmap(bs, cur_sector, nr_sectors, 0);
3071 int64_t bdrv_get_dirty_count(BlockDriverState *bs)
3073 return bs->dirty_count;
3076 void bdrv_set_in_use(BlockDriverState *bs, int in_use)
3078 assert(bs->in_use != in_use);
3079 bs->in_use = in_use;
3082 int bdrv_in_use(BlockDriverState *bs)
3084 return bs->in_use;
3087 void bdrv_iostatus_enable(BlockDriverState *bs)
3089 bs->iostatus_enabled = true;
3090 bs->iostatus = BLOCK_DEVICE_IO_STATUS_OK;
3093 /* The I/O status is only enabled if the drive explicitly
3094 * enables it _and_ the VM is configured to stop on errors */
3095 bool bdrv_iostatus_is_enabled(const BlockDriverState *bs)
3097 return (bs->iostatus_enabled &&
3098 (bs->on_write_error == BLOCK_ERR_STOP_ENOSPC ||
3099 bs->on_write_error == BLOCK_ERR_STOP_ANY ||
3100 bs->on_read_error == BLOCK_ERR_STOP_ANY));
3103 void bdrv_iostatus_disable(BlockDriverState *bs)
3105 bs->iostatus_enabled = false;
3108 void bdrv_iostatus_reset(BlockDriverState *bs)
3110 if (bdrv_iostatus_is_enabled(bs)) {
3111 bs->iostatus = BLOCK_DEVICE_IO_STATUS_OK;
3115 /* XXX: Today this is set by device models because it makes the implementation
3116 quite simple. However, the block layer knows about the error, so it's
3117 possible to implement this without device models being involved */
3118 void bdrv_iostatus_set_err(BlockDriverState *bs, int error)
3120 if (bdrv_iostatus_is_enabled(bs) &&
3121 bs->iostatus == BLOCK_DEVICE_IO_STATUS_OK) {
3122 assert(error >= 0);
3123 bs->iostatus = error == ENOSPC ? BLOCK_DEVICE_IO_STATUS_NOSPACE :
3124 BLOCK_DEVICE_IO_STATUS_FAILED;
3128 void
3129 bdrv_acct_start(BlockDriverState *bs, BlockAcctCookie *cookie, int64_t bytes,
3130 enum BlockAcctType type)
3132 assert(type < BDRV_MAX_IOTYPE);
3134 cookie->bytes = bytes;
3135 cookie->start_time_ns = get_clock();
3136 cookie->type = type;
3139 void
3140 bdrv_acct_done(BlockDriverState *bs, BlockAcctCookie *cookie)
3142 assert(cookie->type < BDRV_MAX_IOTYPE);
3144 bs->nr_bytes[cookie->type] += cookie->bytes;
3145 bs->nr_ops[cookie->type]++;
3146 bs->total_time_ns[cookie->type] += get_clock() - cookie->start_time_ns;
3149 int bdrv_img_create(const char *filename, const char *fmt,
3150 const char *base_filename, const char *base_fmt,
3151 char *options, uint64_t img_size, int flags)
3153 QEMUOptionParameter *param = NULL, *create_options = NULL;
3154 QEMUOptionParameter *backing_fmt, *backing_file, *size;
3155 BlockDriverState *bs = NULL;
3156 BlockDriver *drv, *proto_drv;
3157 BlockDriver *backing_drv = NULL;
3158 int ret = 0;
3160 /* Find driver and parse its options */
3161 drv = bdrv_find_format(fmt);
3162 if (!drv) {
3163 error_report("Unknown file format '%s'", fmt);
3164 ret = -EINVAL;
3165 goto out;
3168 proto_drv = bdrv_find_protocol(filename);
3169 if (!proto_drv) {
3170 error_report("Unknown protocol '%s'", filename);
3171 ret = -EINVAL;
3172 goto out;
3175 create_options = append_option_parameters(create_options,
3176 drv->create_options);
3177 create_options = append_option_parameters(create_options,
3178 proto_drv->create_options);
3180 /* Create parameter list with default values */
3181 param = parse_option_parameters("", create_options, param);
3183 set_option_parameter_int(param, BLOCK_OPT_SIZE, img_size);
3185 /* Parse -o options */
3186 if (options) {
3187 param = parse_option_parameters(options, create_options, param);
3188 if (param == NULL) {
3189 error_report("Invalid options for file format '%s'.", fmt);
3190 ret = -EINVAL;
3191 goto out;
3195 if (base_filename) {
3196 if (set_option_parameter(param, BLOCK_OPT_BACKING_FILE,
3197 base_filename)) {
3198 error_report("Backing file not supported for file format '%s'",
3199 fmt);
3200 ret = -EINVAL;
3201 goto out;
3205 if (base_fmt) {
3206 if (set_option_parameter(param, BLOCK_OPT_BACKING_FMT, base_fmt)) {
3207 error_report("Backing file format not supported for file "
3208 "format '%s'", fmt);
3209 ret = -EINVAL;
3210 goto out;
3214 backing_file = get_option_parameter(param, BLOCK_OPT_BACKING_FILE);
3215 if (backing_file && backing_file->value.s) {
3216 if (!strcmp(filename, backing_file->value.s)) {
3217 error_report("Error: Trying to create an image with the "
3218 "same filename as the backing file");
3219 ret = -EINVAL;
3220 goto out;
3224 backing_fmt = get_option_parameter(param, BLOCK_OPT_BACKING_FMT);
3225 if (backing_fmt && backing_fmt->value.s) {
3226 backing_drv = bdrv_find_format(backing_fmt->value.s);
3227 if (!backing_drv) {
3228 error_report("Unknown backing file format '%s'",
3229 backing_fmt->value.s);
3230 ret = -EINVAL;
3231 goto out;
3235 // The size for the image must always be specified, with one exception:
3236 // If we are using a backing file, we can obtain the size from there
3237 size = get_option_parameter(param, BLOCK_OPT_SIZE);
3238 if (size && size->value.n == -1) {
3239 if (backing_file && backing_file->value.s) {
3240 uint64_t size;
3241 char buf[32];
3243 bs = bdrv_new("");
3245 ret = bdrv_open(bs, backing_file->value.s, flags, backing_drv);
3246 if (ret < 0) {
3247 error_report("Could not open '%s'", backing_file->value.s);
3248 goto out;
3250 bdrv_get_geometry(bs, &size);
3251 size *= 512;
3253 snprintf(buf, sizeof(buf), "%" PRId64, size);
3254 set_option_parameter(param, BLOCK_OPT_SIZE, buf);
3255 } else {
3256 error_report("Image creation needs a size parameter");
3257 ret = -EINVAL;
3258 goto out;
3262 printf("Formatting '%s', fmt=%s ", filename, fmt);
3263 print_option_parameters(param);
3264 puts("");
3266 ret = bdrv_create(drv, filename, param);
3268 if (ret < 0) {
3269 if (ret == -ENOTSUP) {
3270 error_report("Formatting or formatting option not supported for "
3271 "file format '%s'", fmt);
3272 } else if (ret == -EFBIG) {
3273 error_report("The image size is too large for file format '%s'",
3274 fmt);
3275 } else {
3276 error_report("%s: error while creating %s: %s", filename, fmt,
3277 strerror(-ret));
3281 out:
3282 free_option_parameters(create_options);
3283 free_option_parameters(param);
3285 if (bs) {
3286 bdrv_delete(bs);
3289 return ret;