kvm tools: Add ivshmem device
[linux-2.6/next.git] / tools / kvm / disk / qcow.c
blob2471aebc81cb44fa8dae9417950fadb43e848c8a
1 #include "kvm/qcow.h"
3 #include "kvm/disk-image.h"
4 #include "kvm/read-write.h"
5 #include "kvm/mutex.h"
6 #include "kvm/util.h"
8 #include <sys/types.h>
9 #include <sys/stat.h>
10 #include <stdbool.h>
11 #include <stdlib.h>
12 #include <string.h>
13 #include <unistd.h>
14 #include <fcntl.h>
16 #include <linux/byteorder.h>
17 #include <linux/kernel.h>
18 #include <linux/types.h>
20 static int l2_table_insert(struct rb_root *root, struct qcow_l2_table *new)
22 struct rb_node **link = &(root->rb_node), *parent = NULL;
23 u64 offset = new->offset;
25 /* search the tree */
26 while (*link) {
27 struct qcow_l2_table *t;
29 t = rb_entry(*link, struct qcow_l2_table, node);
30 if (!t)
31 goto error;
33 parent = *link;
35 if (t->offset > offset)
36 link = &(*link)->rb_left;
37 else if (t->offset < offset)
38 link = &(*link)->rb_right;
39 else
40 goto out;
43 /* add new node */
44 rb_link_node(&new->node, parent, link);
45 rb_insert_color(&new->node, root);
46 out:
47 return 0;
48 error:
49 return -1;
52 static struct qcow_l2_table *l2_table_lookup(struct rb_root *root, u64 offset)
54 struct rb_node *link = root->rb_node;
56 while (link) {
57 struct qcow_l2_table *t;
59 t = rb_entry(link, struct qcow_l2_table, node);
60 if (!t)
61 goto out;
63 if (t->offset > offset)
64 link = link->rb_left;
65 else if (t->offset < offset)
66 link = link->rb_right;
67 else
68 return t;
70 out:
71 return NULL;
74 static void l1_table_free_cache(struct qcow_l1_table *l1t)
76 struct rb_root *r = &l1t->root;
77 struct list_head *pos, *n;
78 struct qcow_l2_table *t;
80 list_for_each_safe(pos, n, &l1t->lru_list) {
81 /* Remove cache table from the list and RB tree */
82 list_del(pos);
83 t = list_entry(pos, struct qcow_l2_table, list);
84 rb_erase(&t->node, r);
86 /* Free the cached node */
87 free(t);
91 static int qcow_l2_cache_write(struct qcow *q, struct qcow_l2_table *c)
93 struct qcow_header *header = q->header;
94 u64 size;
96 if (!c->dirty)
97 return 0;
99 size = 1 << header->l2_bits;
101 if (pwrite_in_full(q->fd, c->table, size * sizeof(u64), c->offset) < 0)
102 return -1;
104 c->dirty = 0;
106 return 0;
109 static int cache_table(struct qcow *q, struct qcow_l2_table *c)
111 struct qcow_l1_table *l1t = &q->table;
112 struct rb_root *r = &l1t->root;
113 struct qcow_l2_table *lru;
115 if (l1t->nr_cached == MAX_CACHE_NODES) {
117 * The node at the head of the list is least recently used
118 * node. Remove it from the list and replaced with a new node.
120 lru = list_first_entry(&l1t->lru_list, struct qcow_l2_table, list);
122 if (qcow_l2_cache_write(q, lru) < 0)
123 goto error;
125 /* Remove the node from the cache */
126 rb_erase(&lru->node, r);
127 list_del_init(&lru->list);
128 l1t->nr_cached--;
130 /* Free the LRUed node */
131 free(lru);
134 /* Add new node in RB Tree: Helps in searching faster */
135 if (l2_table_insert(r, c) < 0)
136 goto error;
138 /* Add in LRU replacement list */
139 list_add_tail(&c->list, &l1t->lru_list);
140 l1t->nr_cached++;
142 return 0;
143 error:
144 return -1;
147 static struct qcow_l2_table *l2_table_search(struct qcow *q, u64 offset)
149 struct qcow_l1_table *l1t = &q->table;
150 struct qcow_l2_table *l2t;
152 l2t = l2_table_lookup(&l1t->root, offset);
153 if (!l2t)
154 return NULL;
156 /* Update the LRU state, by moving the searched node to list tail */
157 list_move_tail(&l2t->list, &l1t->lru_list);
159 return l2t;
162 /* Allocates a new node for caching L2 table */
163 static struct qcow_l2_table *new_cache_table(struct qcow *q, u64 offset)
165 struct qcow_header *header = q->header;
166 struct qcow_l2_table *c;
167 u64 l2t_sz;
168 u64 size;
170 l2t_sz = 1 << header->l2_bits;
171 size = sizeof(*c) + l2t_sz * sizeof(u64);
172 c = calloc(1, size);
173 if (!c)
174 goto out;
176 c->offset = offset;
177 RB_CLEAR_NODE(&c->node);
178 INIT_LIST_HEAD(&c->list);
179 out:
180 return c;
183 static inline u64 get_l1_index(struct qcow *q, u64 offset)
185 struct qcow_header *header = q->header;
187 return offset >> (header->l2_bits + header->cluster_bits);
190 static inline u64 get_l2_index(struct qcow *q, u64 offset)
192 struct qcow_header *header = q->header;
194 return (offset >> (header->cluster_bits)) & ((1 << header->l2_bits)-1);
197 static inline u64 get_cluster_offset(struct qcow *q, u64 offset)
199 struct qcow_header *header = q->header;
201 return offset & ((1 << header->cluster_bits)-1);
204 static struct qcow_l2_table *qcow_read_l2_table(struct qcow *q, u64 offset)
206 struct qcow_header *header = q->header;
207 struct qcow_l2_table *l2t;
208 u64 size;
210 size = 1 << header->l2_bits;
212 /* search an entry for offset in cache */
213 l2t = l2_table_search(q, offset);
214 if (l2t)
215 return l2t;
217 /* allocate new node for caching l2 table */
218 l2t = new_cache_table(q, offset);
219 if (!l2t)
220 goto error;
222 /* table not cached: read from the disk */
223 if (pread_in_full(q->fd, l2t->table, size * sizeof(u64), offset) < 0)
224 goto error;
226 /* cache the table */
227 if (cache_table(q, l2t) < 0)
228 goto error;
230 return l2t;
231 error:
232 free(l2t);
233 return NULL;
236 static ssize_t qcow_read_cluster(struct qcow *q, u64 offset, void *dst, u32 dst_len)
238 struct qcow_header *header = q->header;
239 struct qcow_l1_table *l1t = &q->table;
240 struct qcow_l2_table *l2t;
241 u64 cluster_size;
242 u64 clust_offset;
243 u64 clust_start;
244 u64 l2t_offset;
245 size_t length;
246 u64 l2t_size;
247 u64 l1_idx;
248 u64 l2_idx;
250 cluster_size = 1 << header->cluster_bits;
252 l1_idx = get_l1_index(q, offset);
253 if (l1_idx >= l1t->table_size)
254 return -1;
256 clust_offset = get_cluster_offset(q, offset);
257 if (clust_offset >= cluster_size)
258 return -1;
260 length = cluster_size - clust_offset;
261 if (length > dst_len)
262 length = dst_len;
264 mutex_lock(&q->mutex);
266 l2t_offset = be64_to_cpu(l1t->l1_table[l1_idx]);
267 if (l2t_offset & QCOW_OFLAG_COMPRESSED) {
268 pr_warning("compressed sectors are not supported");
269 goto out_error;
272 l2t_offset &= QCOW_OFFSET_MASK;
273 if (!l2t_offset)
274 goto zero_cluster;
276 l2t_size = 1 << header->l2_bits;
278 /* read and cache level 2 table */
279 l2t = qcow_read_l2_table(q, l2t_offset);
280 if (!l2t)
281 goto out_error;
283 l2_idx = get_l2_index(q, offset);
284 if (l2_idx >= l2t_size)
285 goto out_error;
287 clust_start = be64_to_cpu(l2t->table[l2_idx]);
288 if (clust_start & QCOW_OFLAG_COMPRESSED) {
289 pr_warning("compressed sectors are not supported");
290 goto out_error;
293 clust_start &= QCOW_OFFSET_MASK;
294 if (!clust_start)
295 goto zero_cluster;
297 mutex_unlock(&q->mutex);
299 if (pread_in_full(q->fd, dst, length, clust_start + clust_offset) < 0)
300 return -1;
302 return length;
304 zero_cluster:
305 mutex_unlock(&q->mutex);
306 memset(dst, 0, length);
307 return length;
309 out_error:
310 mutex_unlock(&q->mutex);
311 length = -1;
312 return -1;
315 static ssize_t qcow_read_sector(struct disk_image *disk, u64 sector, void *dst, u32 dst_len)
317 struct qcow *q = disk->priv;
318 struct qcow_header *header = q->header;
319 u32 nr_read;
320 u64 offset;
321 char *buf;
322 u32 nr;
324 buf = dst;
325 nr_read = 0;
327 while (nr_read < dst_len) {
328 offset = sector << SECTOR_SHIFT;
329 if (offset >= header->size)
330 return -1;
332 nr = qcow_read_cluster(q, offset, buf, dst_len - nr_read);
333 if (nr <= 0)
334 return -1;
336 nr_read += nr;
337 buf += nr;
338 sector += (nr >> SECTOR_SHIFT);
341 return dst_len;
344 static inline u64 file_size(int fd)
346 struct stat st;
348 if (fstat(fd, &st) < 0)
349 return 0;
351 return st.st_size;
354 static inline int qcow_pwrite_sync(int fd, void *buf, size_t count, off_t offset)
356 if (pwrite_in_full(fd, buf, count, offset) < 0)
357 return -1;
359 return fdatasync(fd);
362 /* Writes a level 2 table at the end of the file. */
363 static u64 qcow_write_l2_table(struct qcow *q, u64 *table)
365 struct qcow_header *header = q->header;
366 u64 clust_sz;
367 u64 f_sz;
368 u64 off;
369 u64 sz;
371 f_sz = file_size(q->fd);
372 if (!f_sz)
373 return 0;
375 sz = 1 << header->l2_bits;
376 clust_sz = 1 << header->cluster_bits;
377 off = ALIGN(f_sz, clust_sz);
379 if (pwrite_in_full(q->fd, table, sz * sizeof(u64), off) < 0)
380 return 0;
382 return off;
385 static void refcount_table_free_cache(struct qcow_refcount_table *rft)
387 struct rb_root *r = &rft->root;
388 struct list_head *pos, *n;
389 struct qcow_refcount_block *t;
391 list_for_each_safe(pos, n, &rft->lru_list) {
392 list_del(pos);
393 t = list_entry(pos, struct qcow_refcount_block, list);
394 rb_erase(&t->node, r);
396 free(t);
400 static int refcount_block_insert(struct rb_root *root, struct qcow_refcount_block *new)
402 struct rb_node **link = &(root->rb_node), *parent = NULL;
403 u64 offset = new->offset;
405 /* search the tree */
406 while (*link) {
407 struct qcow_refcount_block *t;
409 t = rb_entry(*link, struct qcow_refcount_block, node);
410 if (!t)
411 goto error;
413 parent = *link;
415 if (t->offset > offset)
416 link = &(*link)->rb_left;
417 else if (t->offset < offset)
418 link = &(*link)->rb_right;
419 else
420 goto out;
423 /* add new node */
424 rb_link_node(&new->node, parent, link);
425 rb_insert_color(&new->node, root);
426 out:
427 return 0;
428 error:
429 return -1;
432 static int write_refcount_block(struct qcow *q, struct qcow_refcount_block *rfb)
434 if (!rfb->dirty)
435 return 0;
437 if (pwrite_in_full(q->fd, rfb->entries, rfb->size * sizeof(u16), rfb->offset) < 0)
438 return -1;
440 rfb->dirty = 0;
442 return 0;
445 static int cache_refcount_block(struct qcow *q, struct qcow_refcount_block *c)
447 struct qcow_refcount_table *rft = &q->refcount_table;
448 struct rb_root *r = &rft->root;
449 struct qcow_refcount_block *lru;
451 if (rft->nr_cached == MAX_CACHE_NODES) {
452 lru = list_first_entry(&rft->lru_list, struct qcow_refcount_block, list);
454 if (write_refcount_block(q, lru) < 0)
455 goto error;
457 rb_erase(&lru->node, r);
458 list_del_init(&lru->list);
459 rft->nr_cached--;
461 free(lru);
464 if (refcount_block_insert(r, c) < 0)
465 goto error;
467 list_add_tail(&c->list, &rft->lru_list);
468 rft->nr_cached++;
470 return 0;
471 error:
472 return -1;
475 static struct qcow_refcount_block *new_refcount_block(struct qcow *q, u64 rfb_offset)
477 struct qcow_header *header = q->header;
478 struct qcow_refcount_block *rfb;
479 u64 cluster_size;
481 cluster_size = 1 << header->cluster_bits;
483 rfb = malloc(sizeof *rfb + cluster_size);
484 if (!rfb)
485 return NULL;
487 rfb->offset = rfb_offset;
488 rfb->size = cluster_size / sizeof(u16);
489 RB_CLEAR_NODE(&rfb->node);
490 INIT_LIST_HEAD(&rfb->list);
492 return rfb;
495 static struct qcow_refcount_block *refcount_block_lookup(struct rb_root *root, u64 offset)
497 struct rb_node *link = root->rb_node;
499 while (link) {
500 struct qcow_refcount_block *t;
502 t = rb_entry(link, struct qcow_refcount_block, node);
503 if (!t)
504 goto out;
506 if (t->offset > offset)
507 link = link->rb_left;
508 else if (t->offset < offset)
509 link = link->rb_right;
510 else
511 return t;
513 out:
514 return NULL;
517 static struct qcow_refcount_block *refcount_block_search(struct qcow *q, u64 offset)
519 struct qcow_refcount_table *rft = &q->refcount_table;
520 struct qcow_refcount_block *rfb;
522 rfb = refcount_block_lookup(&rft->root, offset);
523 if (!rfb)
524 return NULL;
526 /* Update the LRU state, by moving the searched node to list tail */
527 list_move_tail(&rfb->list, &rft->lru_list);
529 return rfb;
532 static struct qcow_refcount_block *qcow_read_refcount_block(struct qcow *q, u64 clust_idx)
534 struct qcow_header *header = q->header;
535 struct qcow_refcount_table *rft = &q->refcount_table;
536 struct qcow_refcount_block *rfb;
537 u64 rfb_offset;
538 u64 rft_idx;
540 rft_idx = clust_idx >> (header->cluster_bits - QCOW_REFCOUNT_BLOCK_SHIFT);
541 if (rft_idx >= rft->rf_size)
542 return NULL;
544 rfb_offset = be64_to_cpu(rft->rf_table[rft_idx]);
546 rfb = refcount_block_search(q, rfb_offset);
547 if (rfb)
548 return rfb;
550 rfb = new_refcount_block(q, rfb_offset);
551 if (!rfb)
552 return NULL;
554 if (pread_in_full(q->fd, rfb->entries, rfb->size * sizeof(u16), rfb_offset) < 0)
555 goto error_free_rfb;
557 if (cache_refcount_block(q, rfb) < 0)
558 goto error_free_rfb;
560 return rfb;
562 error_free_rfb:
563 free(rfb);
565 return NULL;
569 * QCOW file might grow during a write operation. Not only data but metadata is
570 * also written at the end of the file. Therefore it is necessary to ensure
571 * every write is committed to disk. Hence we use uses qcow_pwrite_sync() to
572 * synchronize the in-core state of QCOW image to disk.
574 * We also try to restore the image to a consistent state if the metdata
575 * operation fails. The two metadat operations are: level 1 and level 2 table
576 * update. If either of them fails the image is truncated to a consistent state.
578 static ssize_t qcow_write_cluster(struct qcow *q, u64 offset, void *buf, u32 src_len)
580 struct qcow_header *header = q->header;
581 struct qcow_l1_table *l1t = &q->table;
582 struct qcow_l2_table *l2t;
583 u64 clust_start;
584 u64 clust_flags;
585 u64 l2t_offset;
586 u64 clust_off;
587 u64 l2t_size;
588 u64 clust_sz;
589 u64 l1t_idx;
590 u64 l2t_idx;
591 u64 f_sz;
592 u64 len;
594 l2t = NULL;
595 l2t_size = 1 << header->l2_bits;
596 clust_sz = 1 << header->cluster_bits;
598 l1t_idx = get_l1_index(q, offset);
599 if (l1t_idx >= l1t->table_size)
600 return -1;
602 l2t_idx = get_l2_index(q, offset);
603 if (l2t_idx >= l2t_size)
604 return -1;
606 clust_off = get_cluster_offset(q, offset);
607 if (clust_off >= clust_sz)
608 return -1;
610 len = clust_sz - clust_off;
611 if (len > src_len)
612 len = src_len;
614 mutex_lock(&q->mutex);
616 l2t_offset = be64_to_cpu(l1t->l1_table[l1t_idx]);
617 if (l2t_offset & QCOW_OFLAG_COMPRESSED) {
618 pr_warning("compressed clusters are not supported");
619 goto error;
621 if (!(l2t_offset & QCOW_OFLAG_COPIED)) {
622 pr_warning("L2 copy-on-write clusters are not supported");
623 goto error;
626 l2t_offset &= QCOW_OFFSET_MASK;
627 if (l2t_offset) {
628 /* read and cache l2 table */
629 l2t = qcow_read_l2_table(q, l2t_offset);
630 if (!l2t)
631 goto error;
632 } else {
633 l2t = new_cache_table(q, l2t_offset);
634 if (!l2t)
635 goto error;
637 /* Capture the state of the consistent QCOW image */
638 f_sz = file_size(q->fd);
639 if (!f_sz)
640 goto free_cache;
642 /* Write the l2 table of 0's at the end of the file */
643 l2t_offset = qcow_write_l2_table(q, l2t->table);
644 if (!l2t_offset)
645 goto free_cache;
647 if (cache_table(q, l2t) < 0) {
648 if (ftruncate(q->fd, f_sz) < 0)
649 goto free_cache;
651 goto free_cache;
654 /* Update the in-core entry */
655 l1t->l1_table[l1t_idx] = cpu_to_be64(l2t_offset);
658 /* Capture the state of the consistent QCOW image */
659 f_sz = file_size(q->fd);
660 if (!f_sz)
661 goto error;
663 clust_start = be64_to_cpu(l2t->table[l2t_idx]);
665 clust_flags = clust_start & QCOW_OFLAGS_MASK;
666 if (clust_flags & QCOW_OFLAG_COMPRESSED) {
667 pr_warning("compressed clusters are not supported");
668 goto error;
671 clust_start &= QCOW_OFFSET_MASK;
672 if (!clust_start) {
673 clust_start = ALIGN(f_sz, clust_sz);
674 l2t->table[l2t_idx] = cpu_to_be64(clust_start | QCOW_OFLAG_COPIED);
675 l2t->dirty = 1;
678 if (!(clust_flags & QCOW_OFLAG_COPIED)) {
679 struct qcow_refcount_block *rfb = NULL;
680 u16 clust_refcount;
681 u64 clust_idx;
682 u64 rfb_idx;
684 clust_idx = (clust_start & QCOW_OFFSET_MASK) >> (header->cluster_bits);
686 rfb = qcow_read_refcount_block(q, clust_idx);
687 if (!rfb) {
688 pr_warning("L1: error while reading refcount table");
689 goto error;
692 rfb_idx = clust_idx & (((1ULL << (header->cluster_bits - QCOW_REFCOUNT_BLOCK_SHIFT)) - 1));
693 if (rfb_idx >= rfb->size) {
694 pr_warning("L1: refcount block index out of bounds");
695 goto error;
698 clust_refcount = be16_to_cpu(rfb->entries[rfb_idx]);
699 if (!clust_refcount) {
700 clust_refcount = 1;
701 rfb->entries[rfb_idx] = cpu_to_be16(clust_refcount);
702 rfb->dirty = 1;
705 if (clust_refcount > 1) {
706 pr_warning("L1 copy-on-write clusters are not supported");
707 goto error;
711 mutex_unlock(&q->mutex);
713 /* Write actual data */
714 if (pwrite_in_full(q->fd, buf, len, clust_start + clust_off) < 0)
715 return -1;
717 return len;
719 free_cache:
720 free(l2t);
721 error:
722 mutex_unlock(&q->mutex);
723 return -1;
726 static ssize_t qcow_write_sector(struct disk_image *disk, u64 sector, void *src, u32 src_len)
728 struct qcow *q = disk->priv;
729 struct qcow_header *header = q->header;
730 u32 nr_written;
731 char *buf;
732 u64 offset;
733 ssize_t nr;
735 buf = src;
736 nr_written = 0;
737 offset = sector << SECTOR_SHIFT;
739 while (nr_written < src_len) {
740 if (offset >= header->size)
741 return -1;
743 nr = qcow_write_cluster(q, offset, buf, src_len - nr_written);
744 if (nr < 0)
745 return -1;
747 nr_written += nr;
748 buf += nr;
749 offset += nr;
752 return nr_written;
755 static ssize_t qcow_nowrite_sector(struct disk_image *disk, u64 sector, void *src, u32 src_len)
757 /* I/O error */
758 pr_info("%s: no write support\n", __func__);
759 return -1;
762 static int qcow_disk_flush(struct disk_image *disk)
764 struct qcow *q = disk->priv;
765 struct qcow_refcount_table *rft;
766 struct qcow_header *header;
767 struct list_head *pos, *n;
768 struct qcow_l1_table *l1t;
770 header = q->header;
771 l1t = &q->table;
772 rft = &q->refcount_table;
774 mutex_lock(&q->mutex);
776 list_for_each_safe(pos, n, &rft->lru_list) {
777 struct qcow_refcount_block *c = list_entry(pos, struct qcow_refcount_block, list);
779 if (write_refcount_block(q, c) < 0)
780 goto error_unlock;
783 if (fdatasync(disk->fd) < 0)
784 goto error_unlock;
786 list_for_each_safe(pos, n, &l1t->lru_list) {
787 struct qcow_l2_table *c = list_entry(pos, struct qcow_l2_table, list);
789 if (qcow_l2_cache_write(q, c) < 0)
790 goto error_unlock;
793 if (fdatasync(disk->fd) < 0)
794 goto error_unlock;
796 if (pwrite_in_full(disk->fd, l1t->l1_table, l1t->table_size * sizeof(u64), header->l1_table_offset) < 0)
797 goto error_unlock;
799 mutex_unlock(&q->mutex);
801 return fsync(disk->fd);
803 error_unlock:
804 mutex_unlock(&q->mutex);
805 return -1;
808 static int qcow_disk_close(struct disk_image *disk)
810 struct qcow *q;
812 if (!disk)
813 return 0;
815 q = disk->priv;
817 refcount_table_free_cache(&q->refcount_table);
818 l1_table_free_cache(&q->table);
819 free(q->refcount_table.rf_table);
820 free(q->table.l1_table);
821 free(q->header);
822 free(q);
824 return 0;
827 static struct disk_image_operations qcow_disk_readonly_ops = {
828 .read_sector = qcow_read_sector,
829 .write_sector = qcow_nowrite_sector,
830 .close = qcow_disk_close,
833 static struct disk_image_operations qcow_disk_ops = {
834 .read_sector = qcow_read_sector,
835 .write_sector = qcow_write_sector,
836 .flush = qcow_disk_flush,
837 .close = qcow_disk_close,
840 static int qcow_read_refcount_table(struct qcow *q)
842 struct qcow_header *header = q->header;
843 struct qcow_refcount_table *rft = &q->refcount_table;
844 u64 cluster_size;
846 cluster_size = 1 << header->cluster_bits;
848 rft->rf_size = (header->refcount_table_size * cluster_size) / sizeof(u64);
850 rft->rf_table = calloc(rft->rf_size, sizeof(u64));
851 if (!rft->rf_table)
852 return -1;
854 rft->root = RB_ROOT;
855 INIT_LIST_HEAD(&rft->lru_list);
857 return pread_in_full(q->fd, rft->rf_table, sizeof(u64) * rft->rf_size, header->refcount_table_offset);
860 static int qcow_read_l1_table(struct qcow *q)
862 struct qcow_header *header = q->header;
863 struct qcow_l1_table *table = &q->table;
865 table->table_size = header->l1_size;
867 table->l1_table = calloc(table->table_size, sizeof(u64));
868 if (!table->l1_table)
869 return -1;
871 return pread_in_full(q->fd, table->l1_table, sizeof(u64) * table->table_size, header->l1_table_offset);
874 static void *qcow2_read_header(int fd)
876 struct qcow2_header_disk f_header;
877 struct qcow_header *header;
879 header = malloc(sizeof(struct qcow_header));
880 if (!header)
881 return NULL;
883 if (pread_in_full(fd, &f_header, sizeof(struct qcow2_header_disk), 0) < 0) {
884 free(header);
885 return NULL;
888 be32_to_cpus(&f_header.magic);
889 be32_to_cpus(&f_header.version);
890 be64_to_cpus(&f_header.backing_file_offset);
891 be32_to_cpus(&f_header.backing_file_size);
892 be32_to_cpus(&f_header.cluster_bits);
893 be64_to_cpus(&f_header.size);
894 be32_to_cpus(&f_header.crypt_method);
895 be32_to_cpus(&f_header.l1_size);
896 be64_to_cpus(&f_header.l1_table_offset);
897 be64_to_cpus(&f_header.refcount_table_offset);
898 be32_to_cpus(&f_header.refcount_table_clusters);
899 be32_to_cpus(&f_header.nb_snapshots);
900 be64_to_cpus(&f_header.snapshots_offset);
902 *header = (struct qcow_header) {
903 .size = f_header.size,
904 .l1_table_offset = f_header.l1_table_offset,
905 .l1_size = f_header.l1_size,
906 .cluster_bits = f_header.cluster_bits,
907 .l2_bits = f_header.cluster_bits - 3,
908 .refcount_table_offset = f_header.refcount_table_offset,
909 .refcount_table_size = f_header.refcount_table_clusters,
912 return header;
915 static struct disk_image *qcow2_probe(int fd, bool readonly)
917 struct disk_image *disk_image;
918 struct qcow_l1_table *l1t;
919 struct qcow_header *h;
920 struct qcow *q;
922 q = calloc(1, sizeof(struct qcow));
923 if (!q)
924 goto error;
926 mutex_init(&q->mutex);
927 q->fd = fd;
929 l1t = &q->table;
931 l1t->root = RB_ROOT;
932 INIT_LIST_HEAD(&l1t->lru_list);
934 h = q->header = qcow2_read_header(fd);
935 if (!h)
936 goto error;
938 if (qcow_read_l1_table(q) < 0)
939 goto error;
941 if (qcow_read_refcount_table(q) < 0)
942 goto error;
945 * Do not use mmap use read/write instead
947 if (readonly)
948 disk_image = disk_image__new(fd, h->size, &qcow_disk_readonly_ops, DISK_IMAGE_NOMMAP);
949 else
950 disk_image = disk_image__new(fd, h->size, &qcow_disk_ops, DISK_IMAGE_NOMMAP);
952 if (!disk_image)
953 goto error;
954 disk_image->priv = q;
956 return disk_image;
957 error:
958 if (!q)
959 return NULL;
961 free(q->table.l1_table);
962 free(q->header);
963 free(q);
965 return NULL;
968 static bool qcow2_check_image(int fd)
970 struct qcow2_header_disk f_header;
972 if (pread_in_full(fd, &f_header, sizeof(struct qcow2_header_disk), 0) < 0)
973 return false;
975 be32_to_cpus(&f_header.magic);
976 be32_to_cpus(&f_header.version);
978 if (f_header.magic != QCOW_MAGIC)
979 return false;
981 if (f_header.version != QCOW2_VERSION)
982 return false;
984 return true;
987 static void *qcow1_read_header(int fd)
989 struct qcow1_header_disk f_header;
990 struct qcow_header *header;
992 header = malloc(sizeof(struct qcow_header));
993 if (!header)
994 return NULL;
996 if (pread_in_full(fd, &f_header, sizeof(struct qcow1_header_disk), 0) < 0) {
997 free(header);
998 return NULL;
1001 be32_to_cpus(&f_header.magic);
1002 be32_to_cpus(&f_header.version);
1003 be64_to_cpus(&f_header.backing_file_offset);
1004 be32_to_cpus(&f_header.backing_file_size);
1005 be32_to_cpus(&f_header.mtime);
1006 be64_to_cpus(&f_header.size);
1007 be32_to_cpus(&f_header.crypt_method);
1008 be64_to_cpus(&f_header.l1_table_offset);
1010 *header = (struct qcow_header) {
1011 .size = f_header.size,
1012 .l1_table_offset = f_header.l1_table_offset,
1013 .l1_size = f_header.size / ((1 << f_header.l2_bits) * (1 << f_header.cluster_bits)),
1014 .cluster_bits = f_header.cluster_bits,
1015 .l2_bits = f_header.l2_bits,
1018 return header;
1021 static struct disk_image *qcow1_probe(int fd, bool readonly)
1023 struct disk_image *disk_image;
1024 struct qcow_l1_table *l1t;
1025 struct qcow_header *h;
1026 struct qcow *q;
1028 q = calloc(1, sizeof(struct qcow));
1029 if (!q)
1030 goto error;
1032 mutex_init(&q->mutex);
1033 q->fd = fd;
1035 l1t = &q->table;
1037 l1t->root = RB_ROOT;
1038 INIT_LIST_HEAD(&l1t->lru_list);
1040 h = q->header = qcow1_read_header(fd);
1041 if (!h)
1042 goto error;
1044 if (qcow_read_l1_table(q) < 0)
1045 goto error;
1048 * Do not use mmap use read/write instead
1050 if (readonly)
1051 disk_image = disk_image__new(fd, h->size, &qcow_disk_readonly_ops, DISK_IMAGE_NOMMAP);
1052 else
1053 disk_image = disk_image__new(fd, h->size, &qcow_disk_ops, DISK_IMAGE_NOMMAP);
1055 if (!disk_image)
1056 goto error;
1057 disk_image->priv = q;
1059 return disk_image;
1060 error:
1061 if (!q)
1062 return NULL;
1064 free(q->table.l1_table);
1065 free(q->header);
1066 free(q);
1068 return NULL;
1071 static bool qcow1_check_image(int fd)
1073 struct qcow1_header_disk f_header;
1075 if (pread_in_full(fd, &f_header, sizeof(struct qcow1_header_disk), 0) < 0)
1076 return false;
1078 be32_to_cpus(&f_header.magic);
1079 be32_to_cpus(&f_header.version);
1081 if (f_header.magic != QCOW_MAGIC)
1082 return false;
1084 if (f_header.version != QCOW1_VERSION)
1085 return false;
1087 return true;
1090 struct disk_image *qcow_probe(int fd, bool readonly)
1092 if (qcow1_check_image(fd))
1093 return qcow1_probe(fd, readonly);
1095 if (qcow2_check_image(fd))
1096 return qcow2_probe(fd, readonly);
1098 return NULL;