Updated General Purpose Bit Flags
[zip-parser.git] / zip_parser
blobf2dcd2dc673a40e33965c0250bfacc190b6a9d06
1 #!/usr/bin/perl
3 use strict;
4 use warnings;
5 use Data::ParseBinary;
6 use Data::Dumper;
8 # Check if a file has been passed as an argument or not
9 if( @ARGV == 0 ) {
10 print " No .zip file has been passed.\n";
11 exit;
14 undef $/;
16 my $filename = shift;
17 open my $fh, '<', $filename or die 'can not open $filename';
18 binmode $fh;
21 my @printable_local_file_header = qw(
22 Filename Version_Needed_To_Extract General_Purpose_Bit_Flag Compression_Method
25 my @printable_central_directory_record = qw(
26 Filename General_Purpose_Bit_Flag
29 my @printable_end_central_directory_record = qw(
30 Total_Number_Of_Entries_In_The_Central_Directory
33 sub printable {#{{{
34 my ( $var, $header ) = @_;
35 my @printable;
37 if( $header eq 'lfh' ) {
38 @printable = @printable_local_file_header;
39 } elsif( $header eq 'cdr' ) {
40 @printable = @printable_central_directory_record;
41 } elsif( $header eq 'ecdr' ) {
42 @printable = @printable_end_central_directory_record;
45 for( @printable ) {
46 return 1 if($var eq $_);
48 return 0;
50 #}}}
52 sub dissect {#{{{
54 my ( $data, $header ) = @_;
56 if ( $header eq 'lfh' ) { print "\tLOCAL FILE HEADER\n" , '-' x 50, "\n\n" }
57 elsif( $header eq 'cdr' ) { print "\tCENTRAL DIRECTORY RECORD\n" , '-' x 50, "\n\n" }
58 elsif( $header eq 'ecdr' ) { print "\tEND CENTRAL DIRECTORY RECORD\n", '-' x 50, "\n\n" }
60 if( $header eq 'lfh' || $header eq 'cdr' ) {
62 &mod_file_date_time ($data);
63 &extra_field ($data);
64 &compression_method ($data);
65 &general_purpose_bit_flag ($data);
66 &version_needed_to_extract($data);
68 } else { $data = [$data] }
70 for( @$data ) {
72 my %temp = %$_;
74 map {
76 if( &printable($_, $header) ) {
78 if( ref($temp{$_}) eq '' || ref($temp{$_}) eq 'SCALAR' ) {
80 printf "%s -> \n\t%s\n", $_, $temp{$_}
82 } elsif( ref($temp{$_}) eq 'ARRAY' ) {
84 printf "%s ->", $_;
85 map print("\n\t$_"), @{ $temp{$_} };
86 print "\n";
88 } elsif( ref($temp{$_} eq 'HASH') ) {
90 printf "%s ->", $_;
92 my %hash = %{$temp{$_}};
94 map {
96 if( ref($hash{$_}) eq 'ARRAY' ) {
98 printf "\n%s ->", $_;
99 map print("\n\t$_"), @{ $hash{$_} };
101 } else { printf "\n%s -> %s", $_, $hash{$_} }
103 } keys %hash;
105 print "\n";
110 } keys %temp;
112 print "\n\n";
115 #}}}
117 sub compression_method {#{{{
118 my $data = shift;
120 for( my $i = 0; $i < @$data; $i++ ) {
122 exists $data->[$i]{'Compression_Method'}
123 or die " Compression_Method is not present\n";
125 my %compression_method = (
127 '0' => 'The file is stored (no compression)',
128 '1' => 'The file is Shrunk',
129 '2' => 'The file is Reduced with compression factor 1',
130 '3' => 'The file is Reduced with compression factor 2',
131 '4' => 'The file is Reduced with compression factor 3',
132 '5' => 'The file is Reduced with compression factor 4',
133 '6' => 'The file is Imploded',
134 '7' => 'Reserved for Tokenizing compression algorithm',
135 '8' => 'The file is Deflated',
136 '9' => 'Enhanced Deflating using Deflate64(tm)',
137 '10' => 'PKWARE Data Compression Library Imploding (old IBM TERSE)',
138 '11' => 'Reserved by PKWARE',
139 '12' => 'File is compressed using BZIP2 algorithm',
140 '13' => 'Reserved by PKWARE',
141 '14' => 'LZMA (EFS)',
142 '15' => 'Reserved by PKWARE',
143 '16' => 'Reserved by PKWARE',
144 '17' => 'Reserved by PKWARE',
145 '18' => 'File is compressed using IBM TERSE (new)',
146 '19' => 'IBM LZ77 z Architecture (PFS)',
147 '97' => 'WavPack compressed data',
148 '98' => 'PPMd version I, Rev 1',
152 $data->[$i]{'Compression_Method'} = $compression_method{ $data->[$i]{'Compression_Method'} };
157 #}}}
159 sub general_purpose_bit_flag {#{{{
161 my $data = shift;
163 for( my $i = 0; $i < @$data; $i++ ) {
165 exists $data->[$i]{'General_Purpose_Bit_Flag'}
166 or die " General_Purpose_Bit_Flag is not present\n";
168 my $bit0 = ${$data->[$i]{'General_Purpose_Bit_Flag'}}{'Bit0' };
169 my $bit1 = ${$data->[$i]{'General_Purpose_Bit_Flag'}}{'Bit1' };
170 my $bit2 = ${$data->[$i]{'General_Purpose_Bit_Flag'}}{'Bit2' };
171 my $bit3 = ${$data->[$i]{'General_Purpose_Bit_Flag'}}{'Bit3' };
172 my $bit4 = ${$data->[$i]{'General_Purpose_Bit_Flag'}}{'Bit4' };
173 my $bit5 = ${$data->[$i]{'General_Purpose_Bit_Flag'}}{'Bit5' };
174 my $bit6 = ${$data->[$i]{'General_Purpose_Bit_Flag'}}{'Bit6' };
175 my $bit11 = ${$data->[$i]{'General_Purpose_Bit_Flag'}}{'Bit11'};
176 my $bit12 = ${$data->[$i]{'General_Purpose_Bit_Flag'}}{'Bit12'};
177 my $bit13 = ${$data->[$i]{'General_Purpose_Bit_Flag'}}{'Bit13'};
179 my @general_purpose_bit_flag;
181 push @general_purpose_bit_flag, 'File is encrypted' if $bit0 == 1;
183 if( $data->[$i]{'Compression_Method'} eq 'The file is Imploded' ) {
185 if( $bit1 == 1 ) { push @general_purpose_bit_flag, '8K sliding dictionary' }
186 else { push @general_purpose_bit_flag, '4K sliding dictionary' }
187 if( $bit2 == 1 ) { push @general_purpose_bit_flag, '3 Shannon-Fano trees were used to encode the sliding dictionary output' }
188 else { push @general_purpose_bit_flag, '2 Shannon-Fano trees were used to encode the sliding dictionary output' }
190 } elsif( $data->[$i]{'Compression_Method'} eq 'The file is Deflated' ||
191 $data->[$i]{'Compression_Method'} eq 'Enhanced Deflating using Deflate64(tm)' ) {
193 push @general_purpose_bit_flag, 'Normal (-en) compression option was used' if $bit2 == 0 && $bit1 == 0;
194 push @general_purpose_bit_flag, 'Maximum (-exx/-ex) compression option was used' if $bit2 == 0 && $bit1 == 1;
195 push @general_purpose_bit_flag, 'Fast (-ef) compression option was used' if $bit2 == 1 && $bit1 == 0;
196 push @general_purpose_bit_flag, 'Super Fast (-es) compression option was used' if $bit2 == 1 && $bit1 == 1;
198 } elsif( $data->[$i]{'Compression_Method'} eq 'LZMA (EFS)' ) {
200 if( $bit1 == 1 ) {
201 push @general_purpose_bit_flag, 'End-of-stream (EOS) marker is used to mark the end of the compressed data stream';
202 } else {
203 push @general_purpose_bit_flag, 'End-of-stream (EOS) marker is not present and the compressed data size must be known to extract';
206 } else {
208 if( $bit3 == 1 ) {
210 $data->[$i]{'CRC-32' } = 0;
211 $data->[$i]{'Compressed_Size' } = 0;
212 $data->[$i]{'Uncompressed_Size'} = 0;
214 push @general_purpose_bit_flag, 'Data Descriptor contains CRC-32, Compressed_Size and Uncompressed_Size';
217 if( $bit4 == 1 && $data->[$i]{'Compression_Method'} eq 'The file is Deflated' ) {
218 push @general_purpose_bit_flag, 'Enhanced deflating';
219 } elsif( $bit4 == 1 && $data->[$i]{'Compression_Method'} ne 'The file is Deflated' ) {
220 die ' Enhanced deflating cannot be done on a file that is not deflated';
223 if( $bit5 == 1 && $data->[$i]{'Version_Needed_To_Extract'} >= 27 ) {
224 push @general_purpose_bit_flag, 'Compressed patched data'
225 } elsif( $bit5 == 1 ) {
226 die ' Incompatible Version_Needed_To_Extract for patched compressed data';
229 if( $bit6 == 1 && $data->[$i]{'Version_Needed_To_Extract'} >= 50 && $bit0 == 1 ) {
230 push @general_purpose_bit_flag, 'Strong encryption'
231 } elsif( $bit6 == 1 && $data->[$i]{'Version_Needed_To_Extract'} < 50 && $bit0 == 1 ) {
232 die ' Incompatible Version_Needed_To_Extract for strong encryption';
233 } elsif( $bit6 == 1 && $bit0 == 0 ) {
234 die ' Non-encrypted file cannot be strong encrypted';
237 push @general_purpose_bit_flag, 'Filename and comment fields for this file must be encoded using UTF-8' if $bit11 == 1;
238 push @general_purpose_bit_flag, 'Enhanced compression' if $bit12 == 1;
239 push @general_purpose_bit_flag, 'Selected data values in the Local Header are masked' if $bit13 == 1;
243 $data->[$i]{'General_Purpose_Bit_Flag'} = [ @general_purpose_bit_flag ];
249 #}}}
251 sub version_needed_to_extract {#{{{
252 my $data = shift;
254 my %version_mappings = (
256 '10' => 'Default value',
257 '11' => 'File is a volume label',
259 '20' => "File is a folder (directory)" .
260 "\n\tFile is compressed using Deflate compression" .
261 "\n\tFile is encrypted using traditional PKWARE encryption",
263 '21' => 'File is compressed using Deflate64(tm)',
264 '25' => 'File is compressed using PKWARE DCL Implode ',
265 '27' => 'File is a patch data set ',
266 '45' => 'File uses ZIP64 format extensions',
267 '46' => 'File is compressed using BZIP2 compression*',
269 '50' => "File is encrypted using DES" .
270 "\n\tFile is encrypted using 3DES" .
271 "\n\tFile is encrypted using original RC2 encryption" .
272 "\n\tFile is encrypted using RC4 encryption",
274 '51' => "File is encrypted using AES encryption" .
275 "\n\tFile is encrypted using corrected RC2 encryption",
277 '52' => 'File is encrypted using corrected RC2-64 encryption',
278 '61' => 'File is encrypted using non-OAEP key wrapping',
279 '62' => 'Central directory encryption',
281 '63' => "File is compressed using LZMA" .
282 "\n\tFile is compressed using PPMd" .
283 "\n\tFile is encrypted using Blowfish" .
284 "\n\tFile is encrypted using Twofish",
288 for( my $i = 0; $i < @$data; $i++ ) {
290 exists $data->[$i]{'Version_Needed_To_Extract'}
291 or die " Version_Needed_To_Extract is not present\n";
292 exists $version_mappings{ $data->[$i]{'Version_Needed_To_Extract'} }
293 or die " Version_Needed_To_Extract has an illegal value\n";
295 $data->[$i]{'Version_Needed_To_Extract'} =
296 $version_mappings{ $data->[$i]{'Version_Needed_To_Extract'} },
302 #}}}
304 sub extra_field {#{{{
305 my $data = shift;
306 my ( @header, @data );
307 my %header_mappings = (
308 '0001' => 'Zip64 extended information extra field',
309 '0007' => 'AV Info',
310 '0008' => 'Reserved for extended language encoding data (PFS)',
311 '0009' => 'OS/2',
312 '000a' => 'NTFS ',
313 '000c' => 'OpenVMS',
314 '000d' => 'UNIX',
315 '000e' => 'Reserved for file stream and fork descriptors',
316 '000f' => 'Patch Descriptor',
317 '0014' => 'PKCS#7 Store for X.509 Certificates',
318 '0015' => 'X.509 Certificate ID and Signature for individual file',
319 '0016' => 'X.509 Certificate ID for Central Directory',
320 '0017' => 'Strong Encryption Header',
321 '0018' => 'Record Management Controls',
322 '0019' => 'PKCS#7 Encryption Recipient Certificate List',
323 '0065' => 'IBM S/390 (Z390), AS/400 (I400) attributes - uncompressed',
324 '0066' => 'Reserved for IBM S/390 (Z390), AS/400 (I400) attributes - compressed',
325 '4690' => 'POSZIP 4690 (reserved) ',
326 '07c8' => 'Macintosh',
327 '2605' => 'ZipIt Macintosh',
328 '2705' => 'ZipIt Macintosh 1.3.5+',
329 '2805' => 'ZipIt Macintosh 1.3.5+',
330 '334d' => 'Info-ZIP Macintosh',
331 '4341' => 'Acorn/SparkFS ',
332 '4453' => 'Windows NT security descriptor (binary ACL)',
333 '4704' => 'VM/CMS',
334 '470f' => 'MVS',
335 '4b46' => 'FWKCS MD5 (see below)',
336 '4c41' => 'OS/2 access control list (text ACL)',
337 '4d49' => 'Info-ZIP OpenVMS',
338 '4f4c' => 'Xceed original location extra field',
339 '5356' => 'AOS/VS (ACL)',
340 '5455' => 'extended timestamp',
341 '554e' => 'Xceed unicode extra field',
342 '5855' => 'Info-ZIP UNIX (original, also OS/2, NT, etc)',
343 '6375' => 'Info-ZIP Unicode Comment Extra Field',
344 '6542' => 'BeOS/BeBox',
345 '7075' => 'Info-ZIP Unicode Path Extra Field',
346 '756e' => 'ASi UNIX',
347 '7855' => 'Info-ZIP UNIX (new)',
348 'a220' => 'Microsoft Open Packaging Growth Hint',
349 'fd4a' => 'SMS/QDOS',
352 for( my $i = 0; $i < @$data; $i++ ) {
354 if( exists $data->[$i]{'Extra_Field'} ) {
356 for( my $j = 0; $j < length $data->[$i]{'Extra_Field'}; $j += 4 ) {
358 my $header = unpack("H4", pack('Z*', substr($data->[$i]{'Extra_Field'}, $j, 2)));
359 $header = substr($header, 2, 2) . substr($header, 0, 2);
360 $header = $header_mappings{$header} if exists $header_mappings{$header};
361 push @header, $header;
363 my $data = unpack("H4", pack('Z*', substr($data->[$i]{'Extra_Field'}, $j + 2, 2)));
364 push @data, substr($data, 2, 2) . substr($data, 0, 2);
368 $data->[$i]{'Extra_Field'} = {
369 Header => \@header,
370 Data => \@data,
375 #}}}
377 sub mod_file_date_time {#{{{
378 my $data = shift;
379 for( 0 .. @$data - 1 ) {
380 # Convert Last Mod File Time to Hour, Minute and Second
381 $data->[$_]{'Last_Mod_File_Time'} = pack('n', $data->[$_]{'Last_Mod_File_Time'});
382 my $convert = BitStruct('Last_Mod_File_Time',
383 BitField('Hour' , 5),
384 BitField('Minute', 6),
385 BitField('Second', 5),
387 $data->[$_]{'Last_Mod_File_Time'} = $convert->parse(CreateStreamReader($data->[$_]{'Last_Mod_File_Time'}));
388 # Convert Last Mod File Date to Year, Month and Day
389 $data->[$_]{'Last_Mod_File_Date'} = pack('n', $data->[$_]{'Last_Mod_File_Date'});
390 $convert = BitStruct('Last Mod File Date',
391 BitField('Year' , 7),
392 BitField('Month', 4),
393 BitField('Day' , 5),
395 $data->[$_]{'Last_Mod_File_Date'} = $convert->parse(CreateStreamReader($data->[$_]{'Last_Mod_File_Date'}));
396 $data->[$_]{'Last_Mod_File_Date'}{'Year'} = $data->[$_]{'Last_Mod_File_Date'}{'Year'} + 1980;
399 #}}}
401 # Zip is little endian
403 my $parser_end_central_directory_record = Struct('zip',
405 Const(
406 Bytes('End_Of_Central_Dir_Signature', 4), "\x50\x4B\x05\x06"
409 Anchor('Position'),
411 ULInt16('Number_Of_This_Disk' ),
412 ULInt16('Number_Of_The_Disk_With_The_Start_Of_The_Central_Directory' ),
413 ULInt16('Total_Number_Of_Entries_In_The_Central_Directory_On_This_Disk' ),
414 ULInt16('Total_Number_Of_Entries_In_The_Central_Directory' ),
416 ULInt32('Size_Of_The_Central_Directory' ),
417 ULInt32('Offset_Of_Start_Of_Central_Directory_With_Respect_To_The_Starting_Disk_Number'),
419 ULInt16('.ZIP_File_Comment_Length' ),
421 Field('.ZIP_File_Comment', sub { $_->ctx->{'.ZIP_File_Comment_Length'} }),
425 seek $fh, -22, 2;
427 my $number_of_files;
428 my $stream = CreateStreamReader(File => $fh);
429 my $pecdr = $parser_end_central_directory_record->parse($stream);
431 &dissect($pecdr, 'ecdr');
432 $number_of_files = $pecdr->{'Total_Number_Of_Entries_In_The_Central_Directory'},
434 my $parser_local_file_header = Array($number_of_files,
436 Struct('zip',
438 Const(
439 Bytes('Local_File_Header_Signature', 4), "\x50\x4B\x03\x04"
442 Pointer( sub { 0 }, Bytes("\x50\x4B\x03\x04", 4)),
444 ULInt16('Version_Needed_To_Extract'),
446 BitStruct('General_Purpose_Bit_Flag',
448 Padding(1),
450 Flag('Bit6' ),
451 Flag('Bit5' ),
452 Flag('Bit4' ),
453 Flag('Bit3' ),
454 Flag('Bit2' ),
455 Flag('Bit1' ),
456 Flag('Bit0' ),
458 Padding(2),
460 Flag('Bit13'),
461 Flag('Bit12'),
462 Flag('Bit11'),
464 Padding(3),
468 ULInt16('Compression_Method'),
469 ULInt16('Last_Mod_File_Time'),
470 ULInt16('Last_Mod_File_Date'),
472 ULInt32('CRC-32' ),
473 ULInt32('Compressed_Size' ),
474 ULInt32('Uncompressed_Size' ),
476 ULInt16('Filename_Length' ),
477 ULInt16('Extra_Field_Length'),
479 String('Filename' , sub { $_->ctx->{'Filename_Length' } }),
481 Field('Extra_Field' , sub { $_->ctx->{'Extra_Field_Length'} }),
482 Field('Compressed_Data', sub { $_->ctx->{'Compressed_Size' } }),
484 Anchor('Position'),
490 my $parser_central_directory_record = Array($number_of_files,
492 Struct('zip',
494 Const(
495 Bytes('Central_File_Header_Signature', 4), "\x50\x4B\x01\x02"
498 Struct('Version_Made_By',
499 ULInt8('Specification'),
500 ULInt8('Compatibility'),
503 ULInt16('Version_Needed_To_Extract'),
505 BitStruct('General_Purpose_Bit_Flag',
507 Padding(1),
509 Flag('Bit6' ),
510 Flag('Bit5' ),
511 Flag('Bit4' ),
512 Flag('Bit3' ),
513 Flag('Bit2' ),
514 Flag('Bit1' ),
515 Flag('Bit0' ),
517 Padding(2),
519 Flag('Bit13'),
520 Flag('Bit12'),
521 Flag('Bit11'),
523 Padding(3),
527 ULInt16('Compression_Method' ),
528 ULInt16('Last_Mod_File_Time' ),
529 ULInt16('Last_Mod_File_Date' ),
531 ULInt32('CRC-32' ),
532 ULInt32('Compressed_Size' ),
533 ULInt32('Uncompressed_Size' ),
535 ULInt16('Filename_Length' ),
536 ULInt16('Extra_Field_Length' ),
537 ULInt16('File_Comment_Length' ),
538 ULInt16('Disk_Number_Start' ),
539 ULInt16('Internal_File_Attributes' ),
541 ULInt32('External_File_Attributes' ),
542 ULInt32('Relative_Offset_Of_Local_Header'),
544 String('Filename' , sub { $_->ctx->{'Filename_Length' } }),
546 Field('Extra_Field' , sub { $_->ctx->{'Extra_Field_Length' } }),
547 Field('File_Comment', sub { $_->ctx->{'File_Comment_Length'} }),
553 seek $fh, 0, 0;
554 $stream = CreateStreamReader(File => $fh);
555 &dissect($parser_local_file_header->parse($stream), 'lfh');
556 #print Dumper $parser_local_file_header->parse($stream);
557 &dissect($parser_central_directory_record->parse($stream), 'cdr');