Support for Data Descriptor
[zip-parser.git] / zip_parser
blob5ea1333e1f1e5f605605b2106302fc919572f099
1 #!/usr/bin/perl
3 use strict;
4 use warnings;
5 use Data::ParseBinary;
6 use Data::Dumper;
8 # Lacks the capability to parse Zip64 and encrypted .Zip files
10 # Check if a file has been passed as an argument or not
11 if( @ARGV == 0 ) {
12 print " No .zip file has been passed.\n";
13 exit;
16 # Slurp!
17 undef $/;
19 # Open the file and get filehandle
20 my $filename = shift;
21 open my $fh, '<', $filename or die 'can not open $filename';
22 binmode $fh;
24 # List the printables
25 my @printable_local_file_header = qw(
26 Filename Version_Needed_To_Extract General_Purpose_Bit_Flag Compression_Method
29 my @printable_central_directory_record = qw(
30 Filename General_Purpose_Bit_Flag
33 my @printable_end_central_directory_record = qw(
34 Total_Number_Of_Entries_In_The_Central_Directory
37 sub printable {#{{{
38 my ( $var, $header ) = @_;
39 my @printable;
41 if ( $header eq 'lfh' ) { @printable = @printable_local_file_header }
42 elsif( $header eq 'cdr' ) { @printable = @printable_central_directory_record }
43 elsif( $header eq 'ecdr' ) { @printable = @printable_end_central_directory_record }
45 map { return 1 if($var eq $_) } @printable;
47 return 0;
49 #}}}
51 sub dissect {#{{{
53 my ( $data, $header ) = @_;
55 if ( $header eq 'lfh' ) { print "\tLOCAL FILE HEADER\n" , '-' x 50, "\n\n" }
56 elsif( $header eq 'cdr' ) { print "\tCENTRAL DIRECTORY RECORD\n" , '-' x 50, "\n\n" }
57 elsif( $header eq 'ecdr' ) { print "\tEND CENTRAL DIRECTORY RECORD\n", '-' x 50, "\n\n" }
59 if( $header eq 'lfh' || $header eq 'cdr' ) {
60 &mod_file_date_time ($data);
61 &extra_field ($data);
62 &compression_method ($data);
63 &general_purpose_bit_flag ($data);
64 &version_needed_to_extract($data);
65 } else { $data = [$data] }
67 for( @$data ) {
68 my %temp = %$_;
70 map {
71 if( &printable($_, $header) ) {
73 if( ref($temp{$_}) eq '' || ref($temp{$_}) eq 'SCALAR' ) {
74 printf "%s -> \n\t%s\n", $_, $temp{$_};
75 } elsif( ref($temp{$_}) eq 'ARRAY' ) {
76 printf "%s ->", $_;
77 map print("\n\t$_"), @{ $temp{$_} };
78 print "\n";
79 } elsif( ref($temp{$_}) eq 'HASH' ) {
80 printf "%s ->", $_;
81 my %hash = %{$temp{$_}};
83 map {
84 if( ref($hash{$_}) eq 'ARRAY' ) {
85 printf "\n%s ->", $_;
86 map print("\n\t$_"), @{ $hash{$_} };
87 } else { printf "\n%s -> %s", $_, $hash{$_} }
88 } keys %hash;
90 print "\n";
94 } keys %temp;
96 print "\n\n";
99 #}}}
101 sub compression_method {#{{{
102 my $data = shift;
104 for( my $i = 0; $i < @$data; $i++ ) {
105 exists $data->[$i]{'Compression_Method'}
106 or die " Compression_Method is not present\n";
108 my %compression_method = (
109 '0' => 'The file is stored (no compression)',
110 '1' => 'The file is Shrunk',
111 '2' => 'The file is Reduced with compression factor 1',
112 '3' => 'The file is Reduced with compression factor 2',
113 '4' => 'The file is Reduced with compression factor 3',
114 '5' => 'The file is Reduced with compression factor 4',
115 '6' => 'The file is Imploded',
116 '7' => 'Reserved for Tokenizing compression algorithm',
117 '8' => 'The file is Deflated',
118 '9' => 'Enhanced Deflating using Deflate64(tm)',
119 '10' => 'PKWARE Data Compression Library Imploding (old IBM TERSE)',
120 '11' => 'Reserved by PKWARE',
121 '12' => 'File is compressed using BZIP2 algorithm',
122 '13' => 'Reserved by PKWARE',
123 '14' => 'LZMA (EFS)',
124 '15' => 'Reserved by PKWARE',
125 '16' => 'Reserved by PKWARE',
126 '17' => 'Reserved by PKWARE',
127 '18' => 'File is compressed using IBM TERSE (new)',
128 '19' => 'IBM LZ77 z Architecture (PFS)',
129 '97' => 'WavPack compressed data',
130 '98' => 'PPMd version I, Rev 1',
133 $data->[$i]{'Compression_Method'} = $compression_method{ $data->[$i]{'Compression_Method'} };
137 #}}}
139 sub general_purpose_bit_flag {#{{{
140 my $data = shift;
142 for( my $i = 0; $i < @$data; $i++ ) {
143 exists $data->[$i]{'General_Purpose_Bit_Flag'}
144 or die " General_Purpose_Bit_Flag is not present\n";
146 my $bit0 = ${$data->[$i]{'General_Purpose_Bit_Flag'}}{'Bit0' };
147 my $bit1 = ${$data->[$i]{'General_Purpose_Bit_Flag'}}{'Bit1' };
148 my $bit2 = ${$data->[$i]{'General_Purpose_Bit_Flag'}}{'Bit2' };
149 my $bit3 = ${$data->[$i]{'General_Purpose_Bit_Flag'}}{'Bit3' };
150 my $bit4 = ${$data->[$i]{'General_Purpose_Bit_Flag'}}{'Bit4' };
151 my $bit5 = ${$data->[$i]{'General_Purpose_Bit_Flag'}}{'Bit5' };
152 my $bit6 = ${$data->[$i]{'General_Purpose_Bit_Flag'}}{'Bit6' };
153 my $bit11 = ${$data->[$i]{'General_Purpose_Bit_Flag'}}{'Bit11'};
154 my $bit12 = ${$data->[$i]{'General_Purpose_Bit_Flag'}}{'Bit12'};
155 my $bit13 = ${$data->[$i]{'General_Purpose_Bit_Flag'}}{'Bit13'};
157 my @general_purpose_bit_flag;
158 push @general_purpose_bit_flag, 'File is encrypted' if $bit0 == 1;
160 if( $data->[$i]{'Compression_Method'} eq 'The file is Imploded' ) {
162 if( $bit1 == 1 ) { push @general_purpose_bit_flag, '8K sliding dictionary' }
163 else { push @general_purpose_bit_flag, '4K sliding dictionary' }
164 if( $bit2 == 1 ) { push @general_purpose_bit_flag, '3 Shannon-Fano trees were used to encode the sliding dictionary output' }
165 else { push @general_purpose_bit_flag, '2 Shannon-Fano trees were used to encode the sliding dictionary output' }
167 } elsif( $data->[$i]{'Compression_Method'} eq 'The file is Deflated' ||
168 $data->[$i]{'Compression_Method'} eq 'Enhanced Deflating using Deflate64(tm)' ) {
170 push @general_purpose_bit_flag, 'Normal (-en) compression option was used' if $bit2 == 0 && $bit1 == 0;
171 push @general_purpose_bit_flag, 'Maximum (-exx/-ex) compression option was used' if $bit2 == 0 && $bit1 == 1;
172 push @general_purpose_bit_flag, 'Fast (-ef) compression option was used' if $bit2 == 1 && $bit1 == 0;
173 push @general_purpose_bit_flag, 'Super Fast (-es) compression option was used' if $bit2 == 1 && $bit1 == 1;
175 } elsif( $data->[$i]{'Compression_Method'} eq 'LZMA (EFS)' ) {
177 if( $bit1 == 1 ) {
178 push @general_purpose_bit_flag, 'End-of-stream (EOS) marker is used to mark the end of the compressed data stream';
179 } else {
180 push @general_purpose_bit_flag, 'End-of-stream (EOS) marker is not present and the compressed data size must be known to extract';
183 } else {
185 if( $bit3 == 1 ) {
187 $data->[$i]{'CRC-32' } = 0;
188 $data->[$i]{'Compressed_Size' } = 0;
189 $data->[$i]{'Uncompressed_Size'} = 0;
191 push @general_purpose_bit_flag, 'Data Descriptor contains CRC-32, Compressed_Size and Uncompressed_Size';
194 if( $bit4 == 1 && $data->[$i]{'Compression_Method'} eq 'The file is Deflated' ) {
195 push @general_purpose_bit_flag, 'Enhanced deflating';
196 } elsif( $bit4 == 1 && $data->[$i]{'Compression_Method'} ne 'The file is Deflated' ) {
197 die ' Enhanced deflating cannot be done on a file that is not deflated';
200 if( $bit5 == 1 && $data->[$i]{'Version_Needed_To_Extract'} >= 27 ) {
201 push @general_purpose_bit_flag, 'Compressed patched data'
202 } elsif( $bit5 == 1 ) {
203 die ' Incompatible Version_Needed_To_Extract for patched compressed data';
206 if( $bit6 == 1 && $data->[$i]{'Version_Needed_To_Extract'} >= 50 && $bit0 == 1 ) {
207 push @general_purpose_bit_flag, 'Strong encryption'
208 } elsif( $bit6 == 1 && $data->[$i]{'Version_Needed_To_Extract'} < 50 && $bit0 == 1 ) {
209 die ' Incompatible Version_Needed_To_Extract for strong encryption';
210 } elsif( $bit6 == 1 && $bit0 == 0 ) {
211 die ' Non-encrypted file cannot be strong encrypted';
214 push @general_purpose_bit_flag, 'Filename and comment fields for this file must be encoded using UTF-8' if $bit11 == 1;
215 push @general_purpose_bit_flag, 'Enhanced compression' if $bit12 == 1;
216 push @general_purpose_bit_flag, 'Selected data values in the Local Header are masked' if $bit13 == 1;
220 $data->[$i]{'General_Purpose_Bit_Flag'} = [ @general_purpose_bit_flag ];
225 #}}}
227 sub version_needed_to_extract {#{{{
228 my $data = shift;
230 my %version_mappings = (
232 '10' => 'Default value',
233 '11' => 'File is a volume label',
235 '20' => "File is a folder (directory)" .
236 "\n\tFile is compressed using Deflate compression" .
237 "\n\tFile is encrypted using traditional PKWARE encryption",
239 '21' => 'File is compressed using Deflate64(tm)',
240 '25' => 'File is compressed using PKWARE DCL Implode ',
241 '27' => 'File is a patch data set ',
242 '45' => 'File uses ZIP64 format extensions',
243 '46' => 'File is compressed using BZIP2 compression*',
245 '50' => "File is encrypted using DES" .
246 "\n\tFile is encrypted using 3DES" .
247 "\n\tFile is encrypted using original RC2 encryption" .
248 "\n\tFile is encrypted using RC4 encryption",
250 '51' => "File is encrypted using AES encryption" .
251 "\n\tFile is encrypted using corrected RC2 encryption",
253 '52' => 'File is encrypted using corrected RC2-64 encryption',
254 '61' => 'File is encrypted using non-OAEP key wrapping',
255 '62' => 'Central directory encryption',
257 '63' => "File is compressed using LZMA" .
258 "\n\tFile is compressed using PPMd" .
259 "\n\tFile is encrypted using Blowfish" .
260 "\n\tFile is encrypted using Twofish",
264 for( my $i = 0; $i < @$data; $i++ ) {
266 exists $data->[$i]{'Version_Needed_To_Extract'}
267 or die " Version_Needed_To_Extract is not present\n";
268 exists $version_mappings{ $data->[$i]{'Version_Needed_To_Extract'} }
269 or die " Version_Needed_To_Extract has an illegal value\n";
271 $data->[$i]{'Version_Needed_To_Extract'} =
272 $version_mappings{ $data->[$i]{'Version_Needed_To_Extract'} },
278 #}}}
280 sub extra_field {#{{{
281 my $data = shift;
282 my ( @header, @data );
283 my %header_mappings = (
284 '0001' => 'Zip64 extended information extra field',
285 '0007' => 'AV Info',
286 '0008' => 'Reserved for extended language encoding data (PFS)',
287 '0009' => 'OS/2',
288 '000a' => 'NTFS ',
289 '000c' => 'OpenVMS',
290 '000d' => 'UNIX',
291 '000e' => 'Reserved for file stream and fork descriptors',
292 '000f' => 'Patch Descriptor',
293 '0014' => 'PKCS#7 Store for X.509 Certificates',
294 '0015' => 'X.509 Certificate ID and Signature for individual file',
295 '0016' => 'X.509 Certificate ID for Central Directory',
296 '0017' => 'Strong Encryption Header',
297 '0018' => 'Record Management Controls',
298 '0019' => 'PKCS#7 Encryption Recipient Certificate List',
299 '0065' => 'IBM S/390 (Z390), AS/400 (I400) attributes - uncompressed',
300 '0066' => 'Reserved for IBM S/390 (Z390), AS/400 (I400) attributes - compressed',
301 '4690' => 'POSZIP 4690 (reserved) ',
302 '07c8' => 'Macintosh',
303 '2605' => 'ZipIt Macintosh',
304 '2705' => 'ZipIt Macintosh 1.3.5+',
305 '2805' => 'ZipIt Macintosh 1.3.5+',
306 '334d' => 'Info-ZIP Macintosh',
307 '4341' => 'Acorn/SparkFS ',
308 '4453' => 'Windows NT security descriptor (binary ACL)',
309 '4704' => 'VM/CMS',
310 '470f' => 'MVS',
311 '4b46' => 'FWKCS MD5 (see below)',
312 '4c41' => 'OS/2 access control list (text ACL)',
313 '4d49' => 'Info-ZIP OpenVMS',
314 '4f4c' => 'Xceed original location extra field',
315 '5356' => 'AOS/VS (ACL)',
316 '5455' => 'extended timestamp',
317 '554e' => 'Xceed unicode extra field',
318 '5855' => 'Info-ZIP UNIX (original, also OS/2, NT, etc)',
319 '6375' => 'Info-ZIP Unicode Comment Extra Field',
320 '6542' => 'BeOS/BeBox',
321 '7075' => 'Info-ZIP Unicode Path Extra Field',
322 '756e' => 'ASi UNIX',
323 '7855' => 'Info-ZIP UNIX (new)',
324 'a220' => 'Microsoft Open Packaging Growth Hint',
325 'fd4a' => 'SMS/QDOS',
328 for( my $i = 0; $i < @$data; $i++ ) {
330 if( exists $data->[$i]{'Extra_Field'} ) {
332 for( my $j = 0; $j < length $data->[$i]{'Extra_Field'}; $j += 4 ) {
334 my $header = unpack("H4", pack('Z*', substr($data->[$i]{'Extra_Field'}, $j, 2)));
335 $header = substr($header, 2, 2) . substr($header, 0, 2);
336 $header = $header_mappings{$header} if exists $header_mappings{$header};
337 push @header, $header;
339 my $data = unpack("H4", pack('Z*', substr($data->[$i]{'Extra_Field'}, $j + 2, 2)));
340 push @data, substr($data, 2, 2) . substr($data, 0, 2);
344 $data->[$i]{'Extra_Field'} = {
345 Header => \@header,
346 Data => \@data,
351 #}}}
353 sub mod_file_date_time {#{{{
354 my $data = shift;
355 for( 0 .. @$data - 1 ) {
356 # Convert Last Mod File Time to Hour, Minute and Second
357 $data->[$_]{'Last_Mod_File_Time'} = pack('n', $data->[$_]{'Last_Mod_File_Time'});
358 my $convert = BitStruct('Last_Mod_File_Time',
359 BitField('Hour' , 5),
360 BitField('Minute', 6),
361 BitField('Second', 5),
363 $data->[$_]{'Last_Mod_File_Time'} = $convert->parse(CreateStreamReader($data->[$_]{'Last_Mod_File_Time'}));
364 # Convert Last Mod File Date to Year, Month and Day
365 $data->[$_]{'Last_Mod_File_Date'} = pack('n', $data->[$_]{'Last_Mod_File_Date'});
366 $convert = BitStruct('Last Mod File Date',
367 BitField('Year' , 7),
368 BitField('Month', 4),
369 BitField('Day' , 5),
371 $data->[$_]{'Last_Mod_File_Date'} = $convert->parse(CreateStreamReader($data->[$_]{'Last_Mod_File_Date'}));
372 $data->[$_]{'Last_Mod_File_Date'}{'Year'} = $data->[$_]{'Last_Mod_File_Date'}{'Year'} + 1980;
375 #}}}
377 # Zip is little endian
379 my $parser_end_central_directory_record = Struct('zip',
380 Const(
381 Bytes('End_Of_Central_Dir_Signature', 4), "\x50\x4B\x05\x06"
383 Anchor('Position'),
384 ULInt16('Number_Of_This_Disk' ),
385 ULInt16('Number_Of_The_Disk_With_The_Start_Of_The_Central_Directory' ),
386 ULInt16('Total_Number_Of_Entries_In_The_Central_Directory_On_This_Disk' ),
387 ULInt16('Total_Number_Of_Entries_In_The_Central_Directory' ),
388 ULInt32('Size_Of_The_Central_Directory' ),
389 ULInt32('Offset_Of_Start_Of_Central_Directory_With_Respect_To_The_Starting_Disk_Number'),
390 ULInt16('.ZIP_File_Comment_Length' ),
391 Field('.ZIP_File_Comment', sub { $_->ctx->{'.ZIP_File_Comment_Length'} }),
394 seek $fh, -22, 2;
396 my $number_of_files;
397 my $stream = CreateStreamReader(File => $fh);
398 my $pecdr = $parser_end_central_directory_record->parse($stream);
400 &dissect($pecdr, 'ecdr');
401 $number_of_files = $pecdr->{'Total_Number_Of_Entries_In_The_Central_Directory'};
403 my $parser_local_file_header = Array($number_of_files,
404 Struct('zip',
405 Const(
406 Bytes('Local_File_Header_Signature', 4), "\x50\x4B\x03\x04"
408 Pointer( sub { 0 }, Bytes("\x50\x4B\x03\x04", 4)),
409 ULInt16('Version_Needed_To_Extract'),
410 BitStruct('General_Purpose_Bit_Flag',
411 Padding(1),
412 Flag('Bit6' ),
413 Flag('Bit5' ),
414 Flag('Bit4' ),
415 Flag('Bit3' ),
416 Flag('Bit2' ),
417 Flag('Bit1' ),
418 Flag('Bit0' ),
419 Padding(2),
420 Flag('Bit13'),
421 Flag('Bit12'),
422 Flag('Bit11'),
423 Padding(3),
425 ULInt16('Compression_Method'),
426 ULInt16('Last_Mod_File_Time'),
427 ULInt16('Last_Mod_File_Date'),
428 ULInt32('CRC-32' ),
429 ULInt32('Compressed_Size' ),
430 ULInt32('Uncompressed_Size' ),
431 ULInt16('Filename_Length' ),
432 ULInt16('Extra_Field_Length'),
433 String('Filename' , sub { $_->ctx->{'Filename_Length' } }),
434 Field ('Extra_Field' , sub { $_->ctx->{'Extra_Field_Length'} }),
435 Field ('Compressed_Data', sub { $_->ctx->{'Compressed_Size' } }),
436 If(sub { $_->ctx->{'General_Purpose_Bit_Flag'}->{'Bit3'} },
437 Struct('Data_Descriptor',
438 ULInt32('CRC-32' ),
439 ULInt32('Compressed_Size' ),
440 ULInt32('Uncompressed_Size'),
443 Anchor('Position'),
447 my $parser_central_directory_record = Array($number_of_files,
448 Struct('zip',
449 Const(
450 Bytes('Central_File_Header_Signature', 4), "\x50\x4B\x01\x02"
452 Struct('Version_Made_By',
453 ULInt8('Specification'),
454 ULInt8('Compatibility'),
456 ULInt16('Version_Needed_To_Extract'),
457 BitStruct('General_Purpose_Bit_Flag',
458 Padding(1 ),
459 Flag('Bit6' ),
460 Flag('Bit5' ),
461 Flag('Bit4' ),
462 Flag('Bit3' ),
463 Flag('Bit2' ),
464 Flag('Bit1' ),
465 Flag('Bit0' ),
466 Padding(2 ),
467 Flag('Bit13'),
468 Flag('Bit12'),
469 Flag('Bit11'),
470 Padding(3 ),
472 ULInt16('Compression_Method' ),
473 ULInt16('Last_Mod_File_Time' ),
474 ULInt16('Last_Mod_File_Date' ),
475 ULInt32('CRC-32' ),
476 ULInt32('Compressed_Size' ),
477 ULInt32('Uncompressed_Size' ),
478 ULInt16('Filename_Length' ),
479 ULInt16('Extra_Field_Length' ),
480 ULInt16('File_Comment_Length' ),
481 ULInt16('Disk_Number_Start' ),
482 ULInt16('Internal_File_Attributes' ),
483 ULInt32('External_File_Attributes' ),
484 ULInt32('Relative_Offset_Of_Local_Header'),
485 String('Filename' , sub { $_->ctx->{'Filename_Length' } }),
486 Field ('Extra_Field' , sub { $_->ctx->{'Extra_Field_Length' } }),
487 Field ('File_Comment', sub { $_->ctx->{'File_Comment_Length'} }),
491 seek $fh, 0, 0;
492 $stream = CreateStreamReader(File => $fh);
494 &dissect( $parser_local_file_header->parse($stream), 'lfh');
495 &dissect($parser_central_directory_record->parse($stream), 'cdr');