1 # Generator of fuzzed qcow2 images
3 # Copyright (C) 2014 Maria Kustova <maria.k@catit.be>
5 # This program is free software: you can redistribute it and/or modify
6 # it under the terms of the GNU General Public License as published by
7 # the Free Software Foundation, either version 2 of the License, or
8 # (at your option) any later version.
10 # This program is distributed in the hope that it will be useful,
11 # but WITHOUT ANY WARRANTY; without even the implied warranty of
12 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 # GNU General Public License for more details.
15 # You should have received a copy of the GNU General Public License
16 # along with this program. If not, see <http://www.gnu.org/licenses/>.
23 from os
import urandom
24 from itertools
import chain
26 MAX_IMAGE_SIZE
= 10 * (1 << 20)
34 """Atomic image element (field).
36 The class represents an image field as quadruple of a data format
37 of value necessary for its packing to binary form, an offset from
38 the beginning of the image, a value and a name.
40 The field can be iterated as a list [format, offset, value, name].
43 __slots__
= ('fmt', 'offset', 'value', 'name')
45 def __init__(self
, fmt
, offset
, val
, name
):
52 return iter([self
.fmt
, self
.offset
, self
.value
, self
.name
])
55 return "Field(fmt=%r, offset=%r, value=%r, name=%r)" % \
56 (self
.fmt
, self
.offset
, self
.value
, self
.name
)
59 class FieldsList(object):
63 The class allows access to a field in the list by its name.
66 def __init__(self
, meta_data
=None):
70 self
.data
= [Field(*f
)
73 def __getitem__(self
, name
):
74 return [x
for x
in self
.data
if x
.name
== name
]
77 return iter(self
.data
)
85 """ Qcow2 image object.
87 This class allows to create qcow2 images with random valid structures and
88 values, fuzz them via external qcow2.fuzz module and write the result to
92 def __init__(self
, backing_file_name
=None):
93 """Create a random valid qcow2 image with the correct header and stored
96 cluster_bits
, self
.image_size
= self
._size
_params
()
97 self
.cluster_size
= 1 << cluster_bits
98 self
.header
= FieldsList()
99 self
.backing_file_name
= FieldsList()
100 self
.backing_file_format
= FieldsList()
101 self
.feature_name_table
= FieldsList()
102 self
.end_of_extension_area
= FieldsList()
103 self
.l2_tables
= FieldsList()
104 self
.l1_table
= FieldsList()
105 self
.refcount_table
= FieldsList()
106 self
.refcount_blocks
= FieldsList()
108 self
.create_header(cluster_bits
, backing_file_name
)
109 self
.set_backing_file_name(backing_file_name
)
110 self
.data_clusters
= self
._alloc
_data
(self
.image_size
,
112 # Percentage of fields will be fuzzed
113 self
.bias
= random
.uniform(0.2, 0.5)
116 return chain(self
.header
, self
.backing_file_format
,
117 self
.feature_name_table
, self
.end_of_extension_area
,
118 self
.backing_file_name
, self
.l1_table
, self
.l2_tables
,
119 self
.refcount_table
, self
.refcount_blocks
)
121 def create_header(self
, cluster_bits
, backing_file_name
=None):
122 """Generate a random valid header."""
124 ['>4s', 0, b
"QFI\xfb", 'magic'],
125 ['>I', 4, random
.randint(2, 3), 'version'],
126 ['>Q', 8, 0, 'backing_file_offset'],
127 ['>I', 16, 0, 'backing_file_size'],
128 ['>I', 20, cluster_bits
, 'cluster_bits'],
129 ['>Q', 24, self
.image_size
, 'size'],
130 ['>I', 32, 0, 'crypt_method'],
131 ['>I', 36, 0, 'l1_size'],
132 ['>Q', 40, 0, 'l1_table_offset'],
133 ['>Q', 48, 0, 'refcount_table_offset'],
134 ['>I', 56, 0, 'refcount_table_clusters'],
135 ['>I', 60, 0, 'nb_snapshots'],
136 ['>Q', 64, 0, 'snapshots_offset'],
137 ['>Q', 72, 0, 'incompatible_features'],
138 ['>Q', 80, 0, 'compatible_features'],
139 ['>Q', 88, 0, 'autoclear_features'],
140 # Only refcount_order = 4 is supported by current (07.2014)
141 # implementation of QEMU
142 ['>I', 96, 4, 'refcount_order'],
143 ['>I', 100, 0, 'header_length']
145 self
.header
= FieldsList(meta_header
)
147 if self
.header
['version'][0].value
== 2:
148 self
.header
['header_length'][0].value
= 72
150 self
.header
['incompatible_features'][0].value
= \
151 random
.getrandbits(2)
152 self
.header
['compatible_features'][0].value
= random
.getrandbits(1)
153 self
.header
['header_length'][0].value
= 104
154 # Extensions start at the header last field offset and the field size
155 self
.ext_offset
= struct
.calcsize(
156 self
.header
['header_length'][0].fmt
) + \
157 self
.header
['header_length'][0].offset
158 end_of_extension_area_len
= 2 * UINT32_S
159 free_space
= self
.cluster_size
- self
.ext_offset
- \
160 end_of_extension_area_len
161 # If the backing file name specified and there is enough space for it
162 # in the first cluster, then it's placed in the very end of the first
164 if (backing_file_name
is not None) and \
165 (free_space
>= len(backing_file_name
)):
166 self
.header
['backing_file_size'][0].value
= len(backing_file_name
)
167 self
.header
['backing_file_offset'][0].value
= \
168 self
.cluster_size
- len(backing_file_name
)
170 def set_backing_file_name(self
, backing_file_name
=None):
171 """Add the name of the backing file at the offset specified
174 if (backing_file_name
is not None) and \
175 (not self
.header
['backing_file_offset'][0].value
== 0):
176 data_len
= len(backing_file_name
)
177 data_fmt
= '>' + str(data_len
) + 's'
178 self
.backing_file_name
= FieldsList([
179 [data_fmt
, self
.header
['backing_file_offset'][0].value
,
180 backing_file_name
, 'bf_name']
183 def set_backing_file_format(self
, backing_file_fmt
=None):
184 """Generate the header extension for the backing file format."""
185 if backing_file_fmt
is not None:
186 # Calculation of the free space available in the first cluster
187 end_of_extension_area_len
= 2 * UINT32_S
188 high_border
= (self
.header
['backing_file_offset'][0].value
or
189 (self
.cluster_size
- 1)) - \
190 end_of_extension_area_len
191 free_space
= high_border
- self
.ext_offset
192 ext_size
= 2 * UINT32_S
+ ((len(backing_file_fmt
) + 7) & ~
7)
194 if free_space
>= ext_size
:
195 ext_data_len
= len(backing_file_fmt
)
196 ext_data_fmt
= '>' + str(ext_data_len
) + 's'
197 ext_padding_len
= 7 - (ext_data_len
- 1) % 8
198 self
.backing_file_format
= FieldsList([
199 ['>I', self
.ext_offset
, 0xE2792ACA, 'ext_magic'],
200 ['>I', self
.ext_offset
+ UINT32_S
, ext_data_len
,
202 [ext_data_fmt
, self
.ext_offset
+ UINT32_S
* 2,
203 backing_file_fmt
, 'bf_format']
207 self
.backing_file_format
['bf_format'][0].fmt
) + \
209 self
.backing_file_format
['bf_format'][0].offset
211 def create_feature_name_table(self
):
212 """Generate a random header extension for names of features used in
216 """Return random feature type and feature bit."""
217 return (random
.randint(0, 2), random
.randint(0, 63))
219 end_of_extension_area_len
= 2 * UINT32_S
220 high_border
= (self
.header
['backing_file_offset'][0].value
or
221 (self
.cluster_size
- 1)) - \
222 end_of_extension_area_len
223 free_space
= high_border
- self
.ext_offset
224 # Sum of sizes of 'magic' and 'length' header extension fields
225 ext_header_len
= 2 * UINT32_S
226 fnt_entry_size
= 6 * UINT64_S
227 num_fnt_entries
= min(10, (free_space
- ext_header_len
) /
229 if not num_fnt_entries
== 0:
232 inner_offset
= self
.ext_offset
+ ext_header_len
233 feat_name
= b
'some cool feature'
234 while len(feature_tables
) < num_fnt_entries
* 3:
235 feat_type
, feat_bit
= gen_feat_ids()
237 while (feat_type
, feat_bit
) in feature_ids
:
238 feat_type
, feat_bit
= gen_feat_ids()
239 feature_ids
.append((feat_type
, feat_bit
))
240 feat_fmt
= '>' + str(len(feat_name
)) + 's'
241 feature_tables
+= [['B', inner_offset
,
242 feat_type
, 'feature_type'],
243 ['B', inner_offset
+ 1, feat_bit
,
244 'feature_bit_number'],
245 [feat_fmt
, inner_offset
+ 2,
246 feat_name
, 'feature_name']
248 inner_offset
+= fnt_entry_size
249 # No padding for the extension is necessary, because
250 # the extension length is multiple of 8
251 self
.feature_name_table
= FieldsList([
252 ['>I', self
.ext_offset
, 0x6803f857, 'ext_magic'],
253 # One feature table contains 3 fields and takes 48 bytes
254 ['>I', self
.ext_offset
+ UINT32_S
,
255 len(feature_tables
) // 3 * 48, 'ext_length']
257 self
.ext_offset
= inner_offset
259 def set_end_of_extension_area(self
):
260 """Generate a mandatory header extension marking end of header
263 self
.end_of_extension_area
= FieldsList([
264 ['>I', self
.ext_offset
, 0, 'ext_magic'],
265 ['>I', self
.ext_offset
+ UINT32_S
, 0, 'ext_length']
268 def create_l_structures(self
):
269 """Generate random valid L1 and L2 tables."""
270 def create_l2_entry(host
, guest
, l2_cluster
):
271 """Generate one L2 entry."""
272 offset
= l2_cluster
* self
.cluster_size
273 l2_size
= self
.cluster_size
// UINT64_S
274 entry_offset
= offset
+ UINT64_S
* (guest
% l2_size
)
275 cluster_descriptor
= host
* self
.cluster_size
276 if not self
.header
['version'][0].value
== 2:
277 cluster_descriptor
+= random
.randint(0, 1)
278 # While snapshots are not supported, bit #63 = 1
279 # Compressed clusters are not supported => bit #62 = 0
280 entry_val
= (1 << 63) + cluster_descriptor
281 return ['>Q', entry_offset
, entry_val
, 'l2_entry']
283 def create_l1_entry(l2_cluster
, l1_offset
, guest
):
284 """Generate one L1 entry."""
285 l2_size
= self
.cluster_size
// UINT64_S
286 entry_offset
= l1_offset
+ UINT64_S
* (guest
// l2_size
)
287 # While snapshots are not supported bit #63 = 1
288 entry_val
= (1 << 63) + l2_cluster
* self
.cluster_size
289 return ['>Q', entry_offset
, entry_val
, 'l1_entry']
291 if len(self
.data_clusters
) == 0:
292 # All metadata for an empty guest image needs 4 clusters:
293 # header, rfc table, rfc block, L1 table.
294 # Header takes cluster #0, other clusters ##1-3 can be used
295 l1_offset
= random
.randint(1, 3) * self
.cluster_size
296 l1
= [['>Q', l1_offset
, 0, 'l1_entry']]
299 meta_data
= self
._get
_metadata
()
300 guest_clusters
= random
.sample(range(self
.image_size
//
302 len(self
.data_clusters
))
303 # Number of entries in a L1/L2 table
304 l_size
= self
.cluster_size
// UINT64_S
305 # Number of clusters necessary for L1 table
306 l1_size
= int(ceil((max(guest_clusters
) + 1) / float(l_size
**2)))
307 l1_start
= self
._get
_adjacent
_clusters
(self
.data_clusters |
309 meta_data |
= set(range(l1_start
, l1_start
+ l1_size
))
310 l1_offset
= l1_start
* self
.cluster_size
311 # Indices of L2 tables
313 # Host clusters allocated for L2 tables
319 for host
, guest
in zip(self
.data_clusters
, guest_clusters
):
320 l2_id
= guest
// l_size
321 if l2_id
not in l2_ids
:
323 l2_clusters
.append(self
._get
_adjacent
_clusters
(
324 self
.data_clusters | meta_data |
set(l2_clusters
),
326 l1
.append(create_l1_entry(l2_clusters
[-1], l1_offset
,
328 l2
.append(create_l2_entry(host
, guest
,
329 l2_clusters
[l2_ids
.index(l2_id
)]))
330 self
.l2_tables
= FieldsList(l2
)
331 self
.l1_table
= FieldsList(l1
)
332 self
.header
['l1_size'][0].value
= int(ceil(UINT64_S
* self
.image_size
/
333 float(self
.cluster_size
**2)))
334 self
.header
['l1_table_offset'][0].value
= l1_offset
336 def create_refcount_structures(self
):
337 """Generate random refcount blocks and refcount table."""
338 def allocate_rfc_blocks(data
, size
):
339 """Return indices of clusters allocated for refcount blocks."""
341 diff
= block_ids
= set([x
// size
for x
in data
])
342 while len(diff
) != 0:
343 # Allocate all yet not allocated clusters
344 new
= self
._get
_available
_clusters
(data | cluster_ids
,
346 # Indices of new refcount blocks necessary to cover clusters
348 diff
= set([x
// size
for x
in new
]) - block_ids
351 return cluster_ids
, block_ids
353 def allocate_rfc_table(data
, init_blocks
, block_size
):
354 """Return indices of clusters allocated for the refcount table
355 and updated indices of clusters allocated for blocks and indices
358 blocks
= set(init_blocks
)
360 # Number of entries in one cluster of the refcount table
361 size
= self
.cluster_size
// UINT64_S
362 # Number of clusters necessary for the refcount table based on
363 # the current number of refcount blocks
364 table_size
= int(ceil((max(blocks
) + 1) / float(size
)))
365 # Index of the first cluster of the refcount table
366 table_start
= self
._get
_adjacent
_clusters
(data
, table_size
+ 1)
367 # Clusters allocated for the current length of the refcount table
368 table_clusters
= set(range(table_start
, table_start
+ table_size
))
369 # Clusters allocated for the refcount table including
370 # last optional one for potential l1 growth
371 table_clusters_allocated
= set(range(table_start
, table_start
+
373 # New refcount blocks necessary for clusters occupied by the
375 diff
= set([c
// block_size
for c
in table_clusters
]) - blocks
377 while len(diff
) != 0:
378 # Allocate clusters for new refcount blocks
379 new
= self
._get
_available
_clusters
((data | clusters
) |
380 table_clusters_allocated
,
382 # Indices of new refcount blocks necessary to cover
384 diff
= set([x
// block_size
for x
in new
]) - blocks
387 # Check if the refcount table needs one more cluster
388 if int(ceil((max(blocks
) + 1) / float(size
))) > table_size
:
389 new_block_id
= (table_start
+ table_size
) // block_size
390 # Check if the additional table cluster needs
391 # one more refcount block
392 if new_block_id
not in blocks
:
393 diff
.add(new_block_id
)
394 table_clusters
.add(table_start
+ table_size
)
396 return table_clusters
, blocks
, clusters
398 def create_table_entry(table_offset
, block_cluster
, block_size
,
400 """Generate a refcount table entry."""
401 offset
= table_offset
+ UINT64_S
* (cluster
// block_size
)
402 return ['>Q', offset
, block_cluster
* self
.cluster_size
,
403 'refcount_table_entry']
405 def create_block_entry(block_cluster
, block_size
, cluster
):
406 """Generate a list of entries for the current block."""
407 entry_size
= self
.cluster_size
// block_size
408 offset
= block_cluster
* self
.cluster_size
409 entry_offset
= offset
+ entry_size
* (cluster
% block_size
)
410 # While snapshots are not supported all refcounts are set to 1
411 return ['>H', entry_offset
, 1, 'refcount_block_entry']
412 # Size of a block entry in bits
413 refcount_bits
= 1 << self
.header
['refcount_order'][0].value
414 # Number of refcount entries per refcount block
415 # Convert self.cluster_size from bytes to bits to have the same
416 # base for the numerator and denominator
417 block_size
= self
.cluster_size
* 8 // refcount_bits
418 meta_data
= self
._get
_metadata
()
419 if len(self
.data_clusters
) == 0:
420 # All metadata for an empty guest image needs 4 clusters:
421 # header, rfc table, rfc block, L1 table.
422 # Header takes cluster #0, other clusters ##1-3 can be used
423 block_clusters
= set([random
.choice(list(set(range(1, 4)) -
426 table_clusters
= set([random
.choice(list(set(range(1, 4)) -
430 block_clusters
, block_ids
= \
431 allocate_rfc_blocks(self
.data_clusters |
432 meta_data
, block_size
)
433 table_clusters
, block_ids
, new_clusters
= \
434 allocate_rfc_table(self
.data_clusters |
439 block_clusters |
= new_clusters
441 meta_data |
= block_clusters | table_clusters
442 table_offset
= min(table_clusters
) * self
.cluster_size
444 # Clusters allocated for refcount blocks
445 block_clusters
= list(block_clusters
)
446 # Indices of refcount blocks
447 block_ids
= list(block_ids
)
448 # Refcount table entries
453 for cluster
in sorted(self
.data_clusters | meta_data
):
454 if cluster
// block_size
!= block_id
:
455 block_id
= cluster
// block_size
456 block_cluster
= block_clusters
[block_ids
.index(block_id
)]
457 rfc_table
.append(create_table_entry(table_offset
,
459 block_size
, cluster
))
460 rfc_blocks
.append(create_block_entry(block_cluster
, block_size
,
462 self
.refcount_table
= FieldsList(rfc_table
)
463 self
.refcount_blocks
= FieldsList(rfc_blocks
)
465 self
.header
['refcount_table_offset'][0].value
= table_offset
466 self
.header
['refcount_table_clusters'][0].value
= len(table_clusters
)
468 def fuzz(self
, fields_to_fuzz
=None):
469 """Fuzz an image by corrupting values of a random subset of its fields.
471 Without parameters the method fuzzes an entire image.
473 If 'fields_to_fuzz' is specified then only fields in this list will be
474 fuzzed. 'fields_to_fuzz' can contain both individual fields and more
475 general image elements as a header or tables.
477 In the first case the field will be fuzzed always.
478 In the second a random subset of fields will be selected and fuzzed.
481 """Return boolean value proportional to a portion of fields to be
484 return random
.random() < self
.bias
486 if fields_to_fuzz
is None:
489 field
.value
= getattr(fuzz
, field
.name
)(field
.value
)
491 for item
in fields_to_fuzz
:
493 for field
in getattr(self
, item
[0]):
495 field
.value
= getattr(fuzz
,
496 field
.name
)(field
.value
)
498 # If fields with the requested name were not generated
499 # getattr(self, item[0])[item[1]] returns an empty list
500 for field
in getattr(self
, item
[0])[item
[1]]:
501 field
.value
= getattr(fuzz
, field
.name
)(field
.value
)
503 def write(self
, filename
):
504 """Write an entire image to the file."""
505 image_file
= open(filename
, 'wb')
507 image_file
.seek(field
.offset
)
508 image_file
.write(struct
.pack(field
.fmt
, field
.value
))
510 for cluster
in sorted(self
.data_clusters
):
511 image_file
.seek(cluster
* self
.cluster_size
)
512 image_file
.write(urandom(self
.cluster_size
))
514 # Align the real image size to the cluster size
515 image_file
.seek(0, 2)
516 size
= image_file
.tell()
517 rounded
= (size
+ self
.cluster_size
- 1) & ~
(self
.cluster_size
- 1)
519 image_file
.seek(rounded
- 1)
520 image_file
.write(b
'\x00')
525 """Generate a random image size aligned to a random correct
528 cluster_bits
= random
.randrange(9, 21)
529 cluster_size
= 1 << cluster_bits
530 img_size
= random
.randrange(0, MAX_IMAGE_SIZE
+ 1, cluster_size
)
531 return (cluster_bits
, img_size
)
534 def _get_available_clusters(used
, number
):
535 """Return a set of indices of not allocated clusters.
537 'used' contains indices of currently allocated clusters.
538 All clusters that cannot be allocated between 'used' clusters will have
539 indices appended to the end of 'used'.
541 append_id
= max(used
) + 1
542 free
= set(range(1, append_id
)) - used
543 if len(free
) >= number
:
544 return set(random
.sample(free
, number
))
546 return free |
set(range(append_id
, append_id
+ number
- len(free
)))
549 def _get_adjacent_clusters(used
, size
):
550 """Return an index of the first cluster in the sequence of free ones.
552 'used' contains indices of currently allocated clusters. 'size' is the
553 length of the sequence of free clusters.
554 If the sequence of 'size' is not available between 'used' clusters, its
555 first index will be append to the end of 'used'.
557 def get_cluster_id(lst
, length
):
558 """Return the first index of the sequence of the specified length
559 or None if the sequence cannot be inserted in the list.
564 for i
in range(1, len(lst
)):
565 if lst
[i
] == lst
[i
-1] + 1:
566 pair
= (lst
[i
], pair
[1] + 1)
571 random
.shuffle(pairs
)
574 return x
- length
+ 1
577 append_id
= max(used
) + 1
578 free
= list(set(range(1, append_id
)) - used
)
579 idx
= get_cluster_id(free
, size
)
586 def _alloc_data(img_size
, cluster_size
):
587 """Return a set of random indices of clusters allocated for guest data.
589 num_of_cls
= img_size
// cluster_size
590 return set(random
.sample(range(1, num_of_cls
+ 1),
591 random
.randint(0, num_of_cls
)))
593 def _get_metadata(self
):
594 """Return indices of clusters allocated for image metadata."""
597 ids
.add(x
.offset
// self
.cluster_size
)
601 def create_image(test_img_path
, backing_file_name
=None, backing_file_fmt
=None,
602 fields_to_fuzz
=None):
603 """Create a fuzzed image and write it to the specified file."""
604 image
= Image(backing_file_name
.encode())
605 image
.set_backing_file_format(backing_file_fmt
.encode())
606 image
.create_feature_name_table()
607 image
.set_end_of_extension_area()
608 image
.create_l_structures()
609 image
.create_refcount_structures()
610 image
.fuzz(fields_to_fuzz
)
611 image
.write(test_img_path
)
612 return image
.image_size