1 # Generator of fuzzed qcow2 images
3 # Copyright (C) 2014 Maria Kustova <maria.k@catit.be>
5 # This program is free software: you can redistribute it and/or modify
6 # it under the terms of the GNU General Public License as published by
7 # the Free Software Foundation, either version 2 of the License, or
8 # (at your option) any later version.
10 # This program is distributed in the hope that it will be useful,
11 # but WITHOUT ANY WARRANTY; without even the implied warranty of
12 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 # GNU General Public License for more details.
15 # You should have received a copy of the GNU General Public License
16 # along with this program. If not, see <http://www.gnu.org/licenses/>.
19 from __future__
import absolute_import
24 from os
import urandom
25 from itertools
import chain
27 MAX_IMAGE_SIZE
= 10 * (1 << 20)
35 """Atomic image element (field).
37 The class represents an image field as quadruple of a data format
38 of value necessary for its packing to binary form, an offset from
39 the beginning of the image, a value and a name.
41 The field can be iterated as a list [format, offset, value, name].
44 __slots__
= ('fmt', 'offset', 'value', 'name')
46 def __init__(self
, fmt
, offset
, val
, name
):
53 return iter([self
.fmt
, self
.offset
, self
.value
, self
.name
])
56 return "Field(fmt='%s', offset=%d, value=%s, name=%s)" % \
57 (self
.fmt
, self
.offset
, str(self
.value
), self
.name
)
60 class FieldsList(object):
64 The class allows access to a field in the list by its name.
67 def __init__(self
, meta_data
=None):
71 self
.data
= [Field(*f
)
74 def __getitem__(self
, name
):
75 return [x
for x
in self
.data
if x
.name
== name
]
78 return iter(self
.data
)
86 """ Qcow2 image object.
88 This class allows to create qcow2 images with random valid structures and
89 values, fuzz them via external qcow2.fuzz module and write the result to
93 def __init__(self
, backing_file_name
=None):
94 """Create a random valid qcow2 image with the correct header and stored
97 cluster_bits
, self
.image_size
= self
._size
_params
()
98 self
.cluster_size
= 1 << cluster_bits
99 self
.header
= FieldsList()
100 self
.backing_file_name
= FieldsList()
101 self
.backing_file_format
= FieldsList()
102 self
.feature_name_table
= FieldsList()
103 self
.end_of_extension_area
= FieldsList()
104 self
.l2_tables
= FieldsList()
105 self
.l1_table
= FieldsList()
106 self
.refcount_table
= FieldsList()
107 self
.refcount_blocks
= FieldsList()
109 self
.create_header(cluster_bits
, backing_file_name
)
110 self
.set_backing_file_name(backing_file_name
)
111 self
.data_clusters
= self
._alloc
_data
(self
.image_size
,
113 # Percentage of fields will be fuzzed
114 self
.bias
= random
.uniform(0.2, 0.5)
117 return chain(self
.header
, self
.backing_file_format
,
118 self
.feature_name_table
, self
.end_of_extension_area
,
119 self
.backing_file_name
, self
.l1_table
, self
.l2_tables
,
120 self
.refcount_table
, self
.refcount_blocks
)
122 def create_header(self
, cluster_bits
, backing_file_name
=None):
123 """Generate a random valid header."""
125 ['>4s', 0, "QFI\xfb", 'magic'],
126 ['>I', 4, random
.randint(2, 3), 'version'],
127 ['>Q', 8, 0, 'backing_file_offset'],
128 ['>I', 16, 0, 'backing_file_size'],
129 ['>I', 20, cluster_bits
, 'cluster_bits'],
130 ['>Q', 24, self
.image_size
, 'size'],
131 ['>I', 32, 0, 'crypt_method'],
132 ['>I', 36, 0, 'l1_size'],
133 ['>Q', 40, 0, 'l1_table_offset'],
134 ['>Q', 48, 0, 'refcount_table_offset'],
135 ['>I', 56, 0, 'refcount_table_clusters'],
136 ['>I', 60, 0, 'nb_snapshots'],
137 ['>Q', 64, 0, 'snapshots_offset'],
138 ['>Q', 72, 0, 'incompatible_features'],
139 ['>Q', 80, 0, 'compatible_features'],
140 ['>Q', 88, 0, 'autoclear_features'],
141 # Only refcount_order = 4 is supported by current (07.2014)
142 # implementation of QEMU
143 ['>I', 96, 4, 'refcount_order'],
144 ['>I', 100, 0, 'header_length']
146 self
.header
= FieldsList(meta_header
)
148 if self
.header
['version'][0].value
== 2:
149 self
.header
['header_length'][0].value
= 72
151 self
.header
['incompatible_features'][0].value
= \
152 random
.getrandbits(2)
153 self
.header
['compatible_features'][0].value
= random
.getrandbits(1)
154 self
.header
['header_length'][0].value
= 104
155 # Extensions start at the header last field offset and the field size
156 self
.ext_offset
= struct
.calcsize(
157 self
.header
['header_length'][0].fmt
) + \
158 self
.header
['header_length'][0].offset
159 end_of_extension_area_len
= 2 * UINT32_S
160 free_space
= self
.cluster_size
- self
.ext_offset
- \
161 end_of_extension_area_len
162 # If the backing file name specified and there is enough space for it
163 # in the first cluster, then it's placed in the very end of the first
165 if (backing_file_name
is not None) and \
166 (free_space
>= len(backing_file_name
)):
167 self
.header
['backing_file_size'][0].value
= len(backing_file_name
)
168 self
.header
['backing_file_offset'][0].value
= \
169 self
.cluster_size
- len(backing_file_name
)
171 def set_backing_file_name(self
, backing_file_name
=None):
172 """Add the name of the backing file at the offset specified
175 if (backing_file_name
is not None) and \
176 (not self
.header
['backing_file_offset'][0].value
== 0):
177 data_len
= len(backing_file_name
)
178 data_fmt
= '>' + str(data_len
) + 's'
179 self
.backing_file_name
= FieldsList([
180 [data_fmt
, self
.header
['backing_file_offset'][0].value
,
181 backing_file_name
, 'bf_name']
184 def set_backing_file_format(self
, backing_file_fmt
=None):
185 """Generate the header extension for the backing file format."""
186 if backing_file_fmt
is not None:
187 # Calculation of the free space available in the first cluster
188 end_of_extension_area_len
= 2 * UINT32_S
189 high_border
= (self
.header
['backing_file_offset'][0].value
or
190 (self
.cluster_size
- 1)) - \
191 end_of_extension_area_len
192 free_space
= high_border
- self
.ext_offset
193 ext_size
= 2 * UINT32_S
+ ((len(backing_file_fmt
) + 7) & ~
7)
195 if free_space
>= ext_size
:
196 ext_data_len
= len(backing_file_fmt
)
197 ext_data_fmt
= '>' + str(ext_data_len
) + 's'
198 ext_padding_len
= 7 - (ext_data_len
- 1) % 8
199 self
.backing_file_format
= FieldsList([
200 ['>I', self
.ext_offset
, 0xE2792ACA, 'ext_magic'],
201 ['>I', self
.ext_offset
+ UINT32_S
, ext_data_len
,
203 [ext_data_fmt
, self
.ext_offset
+ UINT32_S
* 2,
204 backing_file_fmt
, 'bf_format']
208 self
.backing_file_format
['bf_format'][0].fmt
) + \
210 self
.backing_file_format
['bf_format'][0].offset
212 def create_feature_name_table(self
):
213 """Generate a random header extension for names of features used in
217 """Return random feature type and feature bit."""
218 return (random
.randint(0, 2), random
.randint(0, 63))
220 end_of_extension_area_len
= 2 * UINT32_S
221 high_border
= (self
.header
['backing_file_offset'][0].value
or
222 (self
.cluster_size
- 1)) - \
223 end_of_extension_area_len
224 free_space
= high_border
- self
.ext_offset
225 # Sum of sizes of 'magic' and 'length' header extension fields
226 ext_header_len
= 2 * UINT32_S
227 fnt_entry_size
= 6 * UINT64_S
228 num_fnt_entries
= min(10, (free_space
- ext_header_len
) /
230 if not num_fnt_entries
== 0:
233 inner_offset
= self
.ext_offset
+ ext_header_len
234 feat_name
= 'some cool feature'
235 while len(feature_tables
) < num_fnt_entries
* 3:
236 feat_type
, feat_bit
= gen_feat_ids()
238 while (feat_type
, feat_bit
) in feature_ids
:
239 feat_type
, feat_bit
= gen_feat_ids()
240 feature_ids
.append((feat_type
, feat_bit
))
241 feat_fmt
= '>' + str(len(feat_name
)) + 's'
242 feature_tables
+= [['B', inner_offset
,
243 feat_type
, 'feature_type'],
244 ['B', inner_offset
+ 1, feat_bit
,
245 'feature_bit_number'],
246 [feat_fmt
, inner_offset
+ 2,
247 feat_name
, 'feature_name']
249 inner_offset
+= fnt_entry_size
250 # No padding for the extension is necessary, because
251 # the extension length is multiple of 8
252 self
.feature_name_table
= FieldsList([
253 ['>I', self
.ext_offset
, 0x6803f857, 'ext_magic'],
254 # One feature table contains 3 fields and takes 48 bytes
255 ['>I', self
.ext_offset
+ UINT32_S
,
256 len(feature_tables
) / 3 * 48, 'ext_length']
258 self
.ext_offset
= inner_offset
260 def set_end_of_extension_area(self
):
261 """Generate a mandatory header extension marking end of header
264 self
.end_of_extension_area
= FieldsList([
265 ['>I', self
.ext_offset
, 0, 'ext_magic'],
266 ['>I', self
.ext_offset
+ UINT32_S
, 0, 'ext_length']
269 def create_l_structures(self
):
270 """Generate random valid L1 and L2 tables."""
271 def create_l2_entry(host
, guest
, l2_cluster
):
272 """Generate one L2 entry."""
273 offset
= l2_cluster
* self
.cluster_size
274 l2_size
= self
.cluster_size
/ UINT64_S
275 entry_offset
= offset
+ UINT64_S
* (guest
% l2_size
)
276 cluster_descriptor
= host
* self
.cluster_size
277 if not self
.header
['version'][0].value
== 2:
278 cluster_descriptor
+= random
.randint(0, 1)
279 # While snapshots are not supported, bit #63 = 1
280 # Compressed clusters are not supported => bit #62 = 0
281 entry_val
= (1 << 63) + cluster_descriptor
282 return ['>Q', entry_offset
, entry_val
, 'l2_entry']
284 def create_l1_entry(l2_cluster
, l1_offset
, guest
):
285 """Generate one L1 entry."""
286 l2_size
= self
.cluster_size
/ UINT64_S
287 entry_offset
= l1_offset
+ UINT64_S
* (guest
/ l2_size
)
288 # While snapshots are not supported bit #63 = 1
289 entry_val
= (1 << 63) + l2_cluster
* self
.cluster_size
290 return ['>Q', entry_offset
, entry_val
, 'l1_entry']
292 if len(self
.data_clusters
) == 0:
293 # All metadata for an empty guest image needs 4 clusters:
294 # header, rfc table, rfc block, L1 table.
295 # Header takes cluster #0, other clusters ##1-3 can be used
296 l1_offset
= random
.randint(1, 3) * self
.cluster_size
297 l1
= [['>Q', l1_offset
, 0, 'l1_entry']]
300 meta_data
= self
._get
_metadata
()
301 guest_clusters
= random
.sample(range(self
.image_size
/
303 len(self
.data_clusters
))
304 # Number of entries in a L1/L2 table
305 l_size
= self
.cluster_size
/ UINT64_S
306 # Number of clusters necessary for L1 table
307 l1_size
= int(ceil((max(guest_clusters
) + 1) / float(l_size
**2)))
308 l1_start
= self
._get
_adjacent
_clusters
(self
.data_clusters |
310 meta_data |
= set(range(l1_start
, l1_start
+ l1_size
))
311 l1_offset
= l1_start
* self
.cluster_size
312 # Indices of L2 tables
314 # Host clusters allocated for L2 tables
320 for host
, guest
in zip(self
.data_clusters
, guest_clusters
):
321 l2_id
= guest
/ l_size
322 if l2_id
not in l2_ids
:
324 l2_clusters
.append(self
._get
_adjacent
_clusters
(
325 self
.data_clusters | meta_data |
set(l2_clusters
),
327 l1
.append(create_l1_entry(l2_clusters
[-1], l1_offset
,
329 l2
.append(create_l2_entry(host
, guest
,
330 l2_clusters
[l2_ids
.index(l2_id
)]))
331 self
.l2_tables
= FieldsList(l2
)
332 self
.l1_table
= FieldsList(l1
)
333 self
.header
['l1_size'][0].value
= int(ceil(UINT64_S
* self
.image_size
/
334 float(self
.cluster_size
**2)))
335 self
.header
['l1_table_offset'][0].value
= l1_offset
337 def create_refcount_structures(self
):
338 """Generate random refcount blocks and refcount table."""
339 def allocate_rfc_blocks(data
, size
):
340 """Return indices of clusters allocated for refcount blocks."""
342 diff
= block_ids
= set([x
/ size
for x
in data
])
343 while len(diff
) != 0:
344 # Allocate all yet not allocated clusters
345 new
= self
._get
_available
_clusters
(data | cluster_ids
,
347 # Indices of new refcount blocks necessary to cover clusters
349 diff
= set([x
/ size
for x
in new
]) - block_ids
352 return cluster_ids
, block_ids
354 def allocate_rfc_table(data
, init_blocks
, block_size
):
355 """Return indices of clusters allocated for the refcount table
356 and updated indices of clusters allocated for blocks and indices
359 blocks
= set(init_blocks
)
361 # Number of entries in one cluster of the refcount table
362 size
= self
.cluster_size
/ UINT64_S
363 # Number of clusters necessary for the refcount table based on
364 # the current number of refcount blocks
365 table_size
= int(ceil((max(blocks
) + 1) / float(size
)))
366 # Index of the first cluster of the refcount table
367 table_start
= self
._get
_adjacent
_clusters
(data
, table_size
+ 1)
368 # Clusters allocated for the current length of the refcount table
369 table_clusters
= set(range(table_start
, table_start
+ table_size
))
370 # Clusters allocated for the refcount table including
371 # last optional one for potential l1 growth
372 table_clusters_allocated
= set(range(table_start
, table_start
+
374 # New refcount blocks necessary for clusters occupied by the
376 diff
= set([c
/ block_size
for c
in table_clusters
]) - blocks
378 while len(diff
) != 0:
379 # Allocate clusters for new refcount blocks
380 new
= self
._get
_available
_clusters
((data | clusters
) |
381 table_clusters_allocated
,
383 # Indices of new refcount blocks necessary to cover
385 diff
= set([x
/ block_size
for x
in new
]) - blocks
388 # Check if the refcount table needs one more cluster
389 if int(ceil((max(blocks
) + 1) / float(size
))) > table_size
:
390 new_block_id
= (table_start
+ table_size
) / block_size
391 # Check if the additional table cluster needs
392 # one more refcount block
393 if new_block_id
not in blocks
:
394 diff
.add(new_block_id
)
395 table_clusters
.add(table_start
+ table_size
)
397 return table_clusters
, blocks
, clusters
399 def create_table_entry(table_offset
, block_cluster
, block_size
,
401 """Generate a refcount table entry."""
402 offset
= table_offset
+ UINT64_S
* (cluster
/ block_size
)
403 return ['>Q', offset
, block_cluster
* self
.cluster_size
,
404 'refcount_table_entry']
406 def create_block_entry(block_cluster
, block_size
, cluster
):
407 """Generate a list of entries for the current block."""
408 entry_size
= self
.cluster_size
/ block_size
409 offset
= block_cluster
* self
.cluster_size
410 entry_offset
= offset
+ entry_size
* (cluster
% block_size
)
411 # While snapshots are not supported all refcounts are set to 1
412 return ['>H', entry_offset
, 1, 'refcount_block_entry']
413 # Size of a block entry in bits
414 refcount_bits
= 1 << self
.header
['refcount_order'][0].value
415 # Number of refcount entries per refcount block
416 # Convert self.cluster_size from bytes to bits to have the same
417 # base for the numerator and denominator
418 block_size
= self
.cluster_size
* 8 / refcount_bits
419 meta_data
= self
._get
_metadata
()
420 if len(self
.data_clusters
) == 0:
421 # All metadata for an empty guest image needs 4 clusters:
422 # header, rfc table, rfc block, L1 table.
423 # Header takes cluster #0, other clusters ##1-3 can be used
424 block_clusters
= set([random
.choice(list(set(range(1, 4)) -
427 table_clusters
= set([random
.choice(list(set(range(1, 4)) -
431 block_clusters
, block_ids
= \
432 allocate_rfc_blocks(self
.data_clusters |
433 meta_data
, block_size
)
434 table_clusters
, block_ids
, new_clusters
= \
435 allocate_rfc_table(self
.data_clusters |
440 block_clusters |
= new_clusters
442 meta_data |
= block_clusters | table_clusters
443 table_offset
= min(table_clusters
) * self
.cluster_size
445 # Clusters allocated for refcount blocks
446 block_clusters
= list(block_clusters
)
447 # Indices of refcount blocks
448 block_ids
= list(block_ids
)
449 # Refcount table entries
454 for cluster
in sorted(self
.data_clusters | meta_data
):
455 if cluster
/ block_size
!= block_id
:
456 block_id
= cluster
/ block_size
457 block_cluster
= block_clusters
[block_ids
.index(block_id
)]
458 rfc_table
.append(create_table_entry(table_offset
,
460 block_size
, cluster
))
461 rfc_blocks
.append(create_block_entry(block_cluster
, block_size
,
463 self
.refcount_table
= FieldsList(rfc_table
)
464 self
.refcount_blocks
= FieldsList(rfc_blocks
)
466 self
.header
['refcount_table_offset'][0].value
= table_offset
467 self
.header
['refcount_table_clusters'][0].value
= len(table_clusters
)
469 def fuzz(self
, fields_to_fuzz
=None):
470 """Fuzz an image by corrupting values of a random subset of its fields.
472 Without parameters the method fuzzes an entire image.
474 If 'fields_to_fuzz' is specified then only fields in this list will be
475 fuzzed. 'fields_to_fuzz' can contain both individual fields and more
476 general image elements as a header or tables.
478 In the first case the field will be fuzzed always.
479 In the second a random subset of fields will be selected and fuzzed.
482 """Return boolean value proportional to a portion of fields to be
485 return random
.random() < self
.bias
487 if fields_to_fuzz
is None:
490 field
.value
= getattr(fuzz
, field
.name
)(field
.value
)
492 for item
in fields_to_fuzz
:
494 for field
in getattr(self
, item
[0]):
496 field
.value
= getattr(fuzz
,
497 field
.name
)(field
.value
)
499 # If fields with the requested name were not generated
500 # getattr(self, item[0])[item[1]] returns an empty list
501 for field
in getattr(self
, item
[0])[item
[1]]:
502 field
.value
= getattr(fuzz
, field
.name
)(field
.value
)
504 def write(self
, filename
):
505 """Write an entire image to the file."""
506 image_file
= open(filename
, 'w')
508 image_file
.seek(field
.offset
)
509 image_file
.write(struct
.pack(field
.fmt
, field
.value
))
511 for cluster
in sorted(self
.data_clusters
):
512 image_file
.seek(cluster
* self
.cluster_size
)
513 image_file
.write(urandom(self
.cluster_size
))
515 # Align the real image size to the cluster size
516 image_file
.seek(0, 2)
517 size
= image_file
.tell()
518 rounded
= (size
+ self
.cluster_size
- 1) & ~
(self
.cluster_size
- 1)
520 image_file
.seek(rounded
- 1)
521 image_file
.write("\0")
526 """Generate a random image size aligned to a random correct
529 cluster_bits
= random
.randrange(9, 21)
530 cluster_size
= 1 << cluster_bits
531 img_size
= random
.randrange(0, MAX_IMAGE_SIZE
+ 1, cluster_size
)
532 return (cluster_bits
, img_size
)
535 def _get_available_clusters(used
, number
):
536 """Return a set of indices of not allocated clusters.
538 'used' contains indices of currently allocated clusters.
539 All clusters that cannot be allocated between 'used' clusters will have
540 indices appended to the end of 'used'.
542 append_id
= max(used
) + 1
543 free
= set(range(1, append_id
)) - used
544 if len(free
) >= number
:
545 return set(random
.sample(free
, number
))
547 return free |
set(range(append_id
, append_id
+ number
- len(free
)))
550 def _get_adjacent_clusters(used
, size
):
551 """Return an index of the first cluster in the sequence of free ones.
553 'used' contains indices of currently allocated clusters. 'size' is the
554 length of the sequence of free clusters.
555 If the sequence of 'size' is not available between 'used' clusters, its
556 first index will be append to the end of 'used'.
558 def get_cluster_id(lst
, length
):
559 """Return the first index of the sequence of the specified length
560 or None if the sequence cannot be inserted in the list.
565 for i
in range(1, len(lst
)):
566 if lst
[i
] == lst
[i
-1] + 1:
567 pair
= (lst
[i
], pair
[1] + 1)
572 random
.shuffle(pairs
)
575 return x
- length
+ 1
578 append_id
= max(used
) + 1
579 free
= list(set(range(1, append_id
)) - used
)
580 idx
= get_cluster_id(free
, size
)
587 def _alloc_data(img_size
, cluster_size
):
588 """Return a set of random indices of clusters allocated for guest data.
590 num_of_cls
= img_size
/cluster_size
591 return set(random
.sample(range(1, num_of_cls
+ 1),
592 random
.randint(0, num_of_cls
)))
594 def _get_metadata(self
):
595 """Return indices of clusters allocated for image metadata."""
598 ids
.add(x
.offset
/self
.cluster_size
)
602 def create_image(test_img_path
, backing_file_name
=None, backing_file_fmt
=None,
603 fields_to_fuzz
=None):
604 """Create a fuzzed image and write it to the specified file."""
605 image
= Image(backing_file_name
)
606 image
.set_backing_file_format(backing_file_fmt
)
607 image
.create_feature_name_table()
608 image
.set_end_of_extension_area()
609 image
.create_l_structures()
610 image
.create_refcount_structures()
611 image
.fuzz(fields_to_fuzz
)
612 image
.write(test_img_path
)
613 return image
.image_size