External_File_Attributes are output in hexadecimal
[zip-parser.git] / zip_parser
blob85f2f8bddd289f2d7adce763706422d6f33558db
1 #!/usr/bin/perl
3 use strict;
4 use warnings;
5 use Data::ParseBinary;
6 use Data::Dumper;
7 use Cwd;
8 use File::Spec;
10 # *** Lacks the capability to parse Zip64 and encrypted .Zip files ***
12 # Check if a file has been passed as an argument or not
13 die "Usage: $0 zipfile\n" if @ARGV == 0;
15 # Slurp!
16 undef $/;
18 # Open the file and get filehandle
19 my $filename = shift;
20 open my $fh, '<', $filename
21 or die 'can not open $filename';
22 binmode $fh;
24 my ($printable_local_file_header, $printable_central_directory_record,
25 $printable_end_central_directory_record) =
26 do "$0.conf";
28 sub printable
29 {#{{{
30 my ($var, $header) = @_;
31 my @printable;
32 if ($header eq 'lfh' )
34 @printable = @{ $printable_local_file_header }
36 elsif ($header eq 'cdr' )
38 @printable = @{ $printable_central_directory_record }
40 elsif ($header eq 'ecdr')
42 @printable = @{ $printable_end_central_directory_record }
44 map
46 return 1 if $var eq $_
47 } @printable;
49 return 0;
50 }#}}}
52 sub dissect
53 {#{{{
54 my ($data, $header) = @_;
55 if ($header eq 'lfh' )
57 print "\tLOCAL FILE HEADER\n", '-' x 50, "\n\n"
59 elsif ($header eq 'cdr' )
61 print "\tCENTRAL DIRECTORY RECORD\n" , '-' x 50, "\n\n"
63 elsif ($header eq 'ecdr')
65 print "\tEND CENTRAL DIRECTORY RECORD\n", '-' x 50, "\n\n"
67 if ($header eq 'lfh' || $header eq 'cdr')
69 &mod_file_date_time ($data);
70 &extra_field ($data);
71 &compression_method ($data);
72 &general_purpose_bit_flag ($data);
73 &version_needed_to_extract($data);
74 &crc32 ($data);
75 &external_file_attributes ($data);
76 &data_descriptor ($data);
78 else
80 $data = [$data]
82 for( @$data )
84 my %temp = %$_;
85 map
87 if (&printable($_, $header))
89 # Have to clean up the following, or maybe ponder of a better method
90 if (ref($temp{$_}) eq '' || ref($temp{$_}) eq 'SCALAR')
92 printf "%s -> \n\t%s\n", $_, $temp{$_}
94 elsif (ref($temp{$_}) eq 'ARRAY')
96 printf "%s ->", $_;
97 map print("\n\t$_"), @{ $temp{$_} };
98 print "\n";
100 elsif (ref($temp{$_}) eq 'HASH')
102 printf "%s ->", $_;
103 my %hash = %{ $temp{$_} };
106 if (ref($hash{$_}) eq 'ARRAY')
108 printf "\n%s ->", $_;
109 map print("\n\t$_"), @{ $hash{$_} };
111 else
113 printf "\n%s -> %s", $_, $hash{$_}
115 } keys %hash;
116 print "\n";
119 } sort keys %temp;
120 print "\n\n";
122 }#}}}
124 sub data_descriptor
125 {#{{{
126 my $data = shift;
127 for (my $i = 0; $i < @$data; $i++)
129 if (ref $data->[$i]{'General_Purpose_Bit_Flag'} eq 'ARRAY')
131 ${$data->[$i]{'Data_Descriptor'}} = {}
133 else
135 ${$data->[$i]{'Data_Descriptor'}} = ''
138 }#}}}
140 sub crc32
141 {#{{{
142 my $data = shift;
143 for (my $i = 0; $i < @$data; $i++)
145 $data->[$i]{'CRC-32'} = unpack 'H*', (pack 'N', $data->[$i]{'CRC-32'});
147 }#}}}
149 sub external_file_attributes
150 {#{{{
151 my $data = shift;
152 for (my $i = 0; $i < @$data; $i++)
154 $data->[$i]{'External_File_Attributes'} = unpack 'H*', (pack 'N', $data->[$i]{'External_File_Attributes'});
156 }#}}}
158 sub compression_method
159 {#{{{
160 my $data = shift;
161 for (my $i = 0; $i < @$data; $i++)
163 exists $data->[$i]{'Compression_Method'}
164 or die " Compression_Method is not present\n";
165 my %compression_method =
167 '0' => 'The file is stored (no compression)',
168 '1' => 'The file is Shrunk',
169 '2' => 'The file is Reduced with compression factor 1',
170 '3' => 'The file is Reduced with compression factor 2',
171 '4' => 'The file is Reduced with compression factor 3',
172 '5' => 'The file is Reduced with compression factor 4',
173 '6' => 'The file is Imploded',
174 '7' => 'Reserved for Tokenizing compression algorithm',
175 '8' => 'The file is Deflated',
176 '9' => 'Enhanced Deflating using Deflate64(tm)',
177 '10' => 'PKWARE Data Compression Library Imploding (old IBM TERSE)',
178 '11' => 'Reserved by PKWARE',
179 '12' => 'File is compressed using BZIP2 algorithm',
180 '13' => 'Reserved by PKWARE',
181 '14' => 'LZMA (EFS)',
182 '15' => 'Reserved by PKWARE',
183 '16' => 'Reserved by PKWARE',
184 '17' => 'Reserved by PKWARE',
185 '18' => 'File is compressed using IBM TERSE (new)',
186 '19' => 'IBM LZ77 z Architecture (PFS)',
187 '97' => 'WavPack compressed data',
188 '98' => 'PPMd version I, Rev 1',
190 $data->[$i]{'Compression_Method'} = $compression_method{ $data->[$i]{'Compression_Method'} };
192 }#}}}
194 sub general_purpose_bit_flag
195 {#{{{
196 my $data = shift;
197 for (my $i = 0; $i < @$data; $i++)
199 exists $data->[$i]{'General_Purpose_Bit_Flag'}
200 or die " General_Purpose_Bit_Flag is not present\n";
201 my $bit0 = ${ $data->[$i]{'General_Purpose_Bit_Flag'} }{'Bit0' };
202 my $bit1 = ${ $data->[$i]{'General_Purpose_Bit_Flag'} }{'Bit1' };
203 my $bit2 = ${ $data->[$i]{'General_Purpose_Bit_Flag'} }{'Bit2' };
204 my $bit3 = ${ $data->[$i]{'General_Purpose_Bit_Flag'} }{'Bit3' };
205 my $bit4 = ${ $data->[$i]{'General_Purpose_Bit_Flag'} }{'Bit4' };
206 my $bit5 = ${ $data->[$i]{'General_Purpose_Bit_Flag'} }{'Bit5' };
207 my $bit6 = ${ $data->[$i]{'General_Purpose_Bit_Flag'} }{'Bit6' };
208 my $bit11 = ${ $data->[$i]{'General_Purpose_Bit_Flag'} }{'Bit11'};
209 my $bit12 = ${ $data->[$i]{'General_Purpose_Bit_Flag'} }{'Bit12'};
210 my $bit13 = ${ $data->[$i]{'General_Purpose_Bit_Flag'} }{'Bit13'};
211 my @general_purpose_bit_flag;
212 push @general_purpose_bit_flag, 'File is encrypted' if $bit0 == 1;
213 if ($data->[$i]{'Compression_Method'} eq 'The file is Imploded')
215 if ($bit1 == 1)
217 push @general_purpose_bit_flag, '8K sliding dictionary'
219 else
221 push @general_purpose_bit_flag, '4K sliding dictionary'
223 if ($bit2 == 1)
225 push @general_purpose_bit_flag, '3 Shannon-Fano trees were used to encode the sliding dictionary output'
227 else
229 push @general_purpose_bit_flag, '2 Shannon-Fano trees were used to encode the sliding dictionary output'
232 elsif ($data->[$i]{'Compression_Method'} eq 'The file is Deflated' ||
233 $data->[$i]{'Compression_Method'} eq 'Enhanced Deflating using Deflate64(tm)')
235 push @general_purpose_bit_flag, 'Normal (-en) compression option was used' if $bit2 == 0 && $bit1 == 0;
236 push @general_purpose_bit_flag, 'Maximum (-exx/-ex) compression option was used' if $bit2 == 0 && $bit1 == 1;
237 push @general_purpose_bit_flag, 'Fast (-ef) compression option was used' if $bit2 == 1 && $bit1 == 0;
238 push @general_purpose_bit_flag, 'Super Fast (-es) compression option was used' if $bit2 == 1 && $bit1 == 1;
239 } elsif ($data->[$i]{'Compression_Method'} eq 'LZMA (EFS)')
241 if ($bit1 == 1)
243 push @general_purpose_bit_flag, 'End-of-stream (EOS) marker is used to mark the end of the compressed data stream'
245 else
247 push @general_purpose_bit_flag, 'End-of-stream (EOS) marker is not present and the compressed data size must be known to extract'
250 else
252 if ($bit3 == 1)
254 $data->[$i]{'CRC-32' } = 0;
255 $data->[$i]{'Compressed_Size' } = 0;
256 $data->[$i]{'Uncompressed_Size'} = 0;
257 push @general_purpose_bit_flag, 'Data Descriptor contains CRC-32, Compressed_Size and Uncompressed_Size';
259 if ($bit4 == 1 && $data->[$i]{'Compression_Method'} eq 'The file is Deflated')
261 push @general_purpose_bit_flag, 'Enhanced deflating'
263 elsif ($bit4 == 1 && $data->[$i]{'Compression_Method'} ne 'The file is Deflated')
265 die ' Enhanced deflating cannot be done on a file that is not deflated'
267 if ($bit5 == 1 && $data->[$i]{'Version_Needed_To_Extract'} >= 27)
269 push @general_purpose_bit_flag, 'Compressed patched data'
271 elsif ($bit5 == 1)
273 die ' Incompatible Version_Needed_To_Extract for patched compressed data'
275 if ($bit6 == 1 && $data->[$i]{'Version_Needed_To_Extract'} >= 50 && $bit0 == 1)
277 push @general_purpose_bit_flag, 'Strong encryption'
279 elsif ($bit6 == 1 && $data->[$i]{'Version_Needed_To_Extract'} < 50 && $bit0 == 1)
281 die ' Incompatible Version_Needed_To_Extract for strong encryption'
283 elsif ($bit6 == 1 && $bit0 == 0)
285 die ' Non-encrypted file cannot be strong encrypted'
287 push @general_purpose_bit_flag, 'Filename and comment fields for this file must be encoded using UTF-8' if $bit11 == 1;
288 push @general_purpose_bit_flag, 'Enhanced compression' if $bit12 == 1;
289 push @general_purpose_bit_flag, 'Selected data values in the Local Header are masked' if $bit13 == 1;
291 $data->[$i]{'General_Purpose_Bit_Flag'} = [ @general_purpose_bit_flag ];
293 }#}}}
295 sub version_needed_to_extract
296 {#{{{
297 my $data = shift;
298 my %version_mappings =
300 '10' => 'Default value',
301 '11' => 'File is a volume label',
302 '20' => "File is a folder (directory)" .
303 "\n\tFile is compressed using Deflate compression" .
304 "\n\tFile is encrypted using traditional PKWARE encryption",
305 '21' => 'File is compressed using Deflate64(tm)',
306 '25' => 'File is compressed using PKWARE DCL Implode ',
307 '27' => 'File is a patch data set ',
308 '45' => 'File uses ZIP64 format extensions',
309 '46' => 'File is compressed using BZIP2 compression*',
310 '50' => "File is encrypted using DES" .
311 "\n\tFile is encrypted using 3DES" .
312 "\n\tFile is encrypted using original RC2 encryption" .
313 "\n\tFile is encrypted using RC4 encryption",
314 '51' => "File is encrypted using AES encryption" .
315 "\n\tFile is encrypted using corrected RC2 encryption",
316 '52' => 'File is encrypted using corrected RC2-64 encryption',
317 '61' => 'File is encrypted using non-OAEP key wrapping',
318 '62' => 'Central directory encryption',
319 '63' => "File is compressed using LZMA" .
320 "\n\tFile is compressed using PPMd" .
321 "\n\tFile is encrypted using Blowfish" .
322 "\n\tFile is encrypted using Twofish",
324 for (my $i = 0; $i < @$data; $i++)
326 exists $data->[$i]{'Version_Needed_To_Extract'}
327 or die " Version_Needed_To_Extract is not present\n";
328 exists $version_mappings{ $data->[$i]{'Version_Needed_To_Extract'} }
329 or die " Version_Needed_To_Extract has an illegal value\n";
330 $data->[$i]{'Version_Needed_To_Extract'} = $version_mappings{ $data->[$i]{'Version_Needed_To_Extract'} };
333 #}}}
335 sub extra_field
336 {#{{{
337 my $data = shift;
338 my (@header, @data);
339 my %header_mappings =
341 '0001' => 'Zip64 extended information extra field',
342 '0007' => 'AV Info',
343 '0008' => 'Reserved for extended language encoding data (PFS)',
344 '0009' => 'OS/2',
345 '000a' => 'NTFS ',
346 '000c' => 'OpenVMS',
347 '000d' => 'UNIX',
348 '000e' => 'Reserved for file stream and fork descriptors',
349 '000f' => 'Patch Descriptor',
350 '0014' => 'PKCS#7 Store for X.509 Certificates',
351 '0015' => 'X.509 Certificate ID and Signature for individual file',
352 '0016' => 'X.509 Certificate ID for Central Directory',
353 '0017' => 'Strong Encryption Header',
354 '0018' => 'Record Management Controls',
355 '0019' => 'PKCS#7 Encryption Recipient Certificate List',
356 '0065' => 'IBM S/390 (Z390), AS/400 (I400) attributes - uncompressed',
357 '0066' => 'Reserved for IBM S/390 (Z390), AS/400 (I400) attributes - compressed',
358 '4690' => 'POSZIP 4690 (reserved) ',
359 '07c8' => 'Macintosh',
360 '2605' => 'ZipIt Macintosh',
361 '2705' => 'ZipIt Macintosh 1.3.5+',
362 '2805' => 'ZipIt Macintosh 1.3.5+',
363 '334d' => 'Info-ZIP Macintosh',
364 '4341' => 'Acorn/SparkFS ',
365 '4453' => 'Windows NT security descriptor (binary ACL)',
366 '4704' => 'VM/CMS',
367 '470f' => 'MVS',
368 '4b46' => 'FWKCS MD5 (see below)',
369 '4c41' => 'OS/2 access control list (text ACL)',
370 '4d49' => 'Info-ZIP OpenVMS',
371 '4f4c' => 'Xceed original location extra field',
372 '5356' => 'AOS/VS (ACL)',
373 '5455' => 'extended timestamp',
374 '554e' => 'Xceed unicode extra field',
375 '5855' => 'Info-ZIP UNIX (original, also OS/2, NT, etc)',
376 '6375' => 'Info-ZIP Unicode Comment Extra Field',
377 '6542' => 'BeOS/BeBox',
378 '7075' => 'Info-ZIP Unicode Path Extra Field',
379 '756e' => 'ASi UNIX',
380 '7855' => 'Info-ZIP UNIX (new)',
381 'a220' => 'Microsoft Open Packaging Growth Hint',
382 'fd4a' => 'SMS/QDOS',
384 for (my $i = 0; $i < @$data; $i++)
386 if (exists $data->[$i]{'Extra_Field'})
388 for (my $j = 0; $j < length $data->[$i]{'Extra_Field'}; $j += 4)
390 my $header = unpack("H4", pack('Z*', substr($data->[$i]{'Extra_Field'}, $j, 2)));
391 $header = substr($header, 2, 2) . substr($header, 0, 2);
392 $header = $header_mappings{$header} if exists $header_mappings{$header};
393 push @header, $header;
394 my $data = unpack("H4", pack('Z*', substr($data->[$i]{'Extra_Field'}, $j + 2, 2)));
395 push @data, substr($data, 2, 2) . substr($data, 0, 2);
397 $data->[$i]{'Extra_Field'} =
399 Header => \@header,
400 Data => \@data,
404 }#}}}
406 sub mod_file_date_time
407 {#{{{
408 my $data = shift;
409 for (0 .. @$data - 1)
411 # Convert Last Mod File Time to Hour, Minute and Second
412 $data->[$_]{'Last_Mod_File_Time'} = pack('n', $data->[$_]{'Last_Mod_File_Time'});
413 my $convert =
414 BitStruct
416 'Last_Mod_File_Time',
417 BitField('Hour' , 5),
418 BitField('Minute', 6),
419 BitField('Second', 5),
421 $data->[$_]{'Last_Mod_File_Time'} = $convert->parse(CreateStreamReader($data->[$_]{'Last_Mod_File_Time'}));
422 # Convert Last Mod File Date to Year, Month and Day
423 $data->[$_]{'Last_Mod_File_Date'} = pack('n', $data->[$_]{'Last_Mod_File_Date'});
424 $convert =
425 BitStruct
427 'Last Mod File Date',
428 BitField('Year' , 7),
429 BitField('Month', 4),
430 BitField('Day' , 5),
432 $data->[$_]{'Last_Mod_File_Date'} = $convert->parse(CreateStreamReader($data->[$_]{'Last_Mod_File_Date'}));
433 $data->[$_]{'Last_Mod_File_Date'}{'Year'} = $data->[$_]{'Last_Mod_File_Date'}{'Year'} + 1980;
435 }#}}}
437 # Zip is little endian
438 my $parser_end_central_directory_record =
439 Struct
441 'zip',
442 Const
444 Bytes('End_Of_Central_Dir_Signature', 4), "\x50\x4B\x05\x06"
446 ULInt16('Number_Of_This_Disk' ),
447 ULInt16('Number_Of_The_Disk_With_The_Start_Of_The_Central_Directory' ),
448 ULInt16('Total_Number_Of_Entries_In_The_Central_Directory_On_This_Disk' ),
449 ULInt16('Total_Number_Of_Entries_In_The_Central_Directory' ),
450 ULInt32('Size_Of_The_Central_Directory' ),
451 ULInt32('Offset_Of_Start_Of_Central_Directory_With_Respect_To_The_Starting_Disk_Number'),
452 ULInt16('.ZIP_File_Comment_Length' ),
453 Field
455 '.ZIP_File_Comment',
458 $_->ctx->{'.ZIP_File_Comment_Length'}
463 seek $fh, -22, 2;
465 my $number_of_files;
466 my $stream = CreateStreamReader(File => $fh);
467 my $pecdr = $parser_end_central_directory_record->parse($stream);
469 &dissect($pecdr, 'ecdr');
470 $number_of_files = $pecdr->{'Total_Number_Of_Entries_In_The_Central_Directory'};
472 my $parser_local_file_header =
473 Array
475 $number_of_files,
476 Struct
478 'zip',
479 Const
481 Bytes
483 'Local_File_Header_Signature', 4
485 "\x50\x4B\x03\x04"
487 Pointer
493 Bytes
495 "\x50\x4B\x03\x04", 4
498 ULInt16('Version_Needed_To_Extract'),
499 BitStruct
501 'General_Purpose_Bit_Flag',
502 Padding(1 ),
503 Flag('Bit6' ),
504 Flag('Bit5' ),
505 Flag('Bit4' ),
506 Flag('Bit3' ),
507 Flag('Bit2' ),
508 Flag('Bit1' ),
509 Flag('Bit0' ),
510 Padding(2 ),
511 Flag('Bit13'),
512 Flag('Bit12'),
513 Flag('Bit11'),
514 Padding(3 ),
516 ULInt16('Compression_Method'),
517 ULInt16('Last_Mod_File_Time'),
518 ULInt16('Last_Mod_File_Date'),
519 ULInt32('CRC-32' ),
520 ULInt32('Compressed_Size' ),
521 ULInt32('Uncompressed_Size' ),
522 ULInt16('Filename_Length' ),
523 ULInt16('Extra_Field_Length'),
524 String
526 'Filename',
529 $_->ctx->{'Filename_Length' }
532 Field
534 'Extra_Field',
537 $_->ctx->{'Extra_Field_Length'}
540 Anchor('Position'),
541 Field
543 'Compressed_Data',
546 $_->ctx->{'Compressed_Size'}
552 my $parser_central_directory_record =
553 Array
555 $number_of_files,
556 Struct
558 'zip',
559 Const
561 Bytes
563 'Central_File_Header_Signature',
566 "\x50\x4B\x01\x02"
568 Struct
570 'Version_Made_By',
571 ULInt8('Specification'),
572 ULInt8('Compatibility'),
574 ULInt16('Version_Needed_To_Extract'),
575 BitStruct
577 'General_Purpose_Bit_Flag',
578 Padding(1 ),
579 Flag('Bit6' ),
580 Flag('Bit5' ),
581 Flag('Bit4' ),
582 Flag('Bit3' ),
583 Flag('Bit2' ),
584 Flag('Bit1' ),
585 Flag('Bit0' ),
586 Padding(2 ),
587 Flag('Bit13'),
588 Flag('Bit12'),
589 Flag('Bit11'),
590 Padding(3 ),
592 ULInt16('Compression_Method' ),
593 ULInt16('Last_Mod_File_Time' ),
594 ULInt16('Last_Mod_File_Date' ),
595 ULInt32('CRC-32' ),
596 ULInt32('Compressed_Size' ),
597 ULInt32('Uncompressed_Size' ),
598 ULInt16('Filename_Length' ),
599 ULInt16('Extra_Field_Length' ),
600 ULInt16('File_Comment_Length' ),
601 ULInt16('Disk_Number_Start' ),
602 ULInt16('Internal_File_Attributes' ),
603 Anchor('Position' ),
604 ULInt32('External_File_Attributes' ),
605 ULInt32('Relative_Offset_Of_Local_Header'),
606 String
608 'Filename',
611 $_->ctx->{'Filename_Length'}
614 Field
616 'Extra_Field',
619 $_->ctx->{'Extra_Field_Length'}
622 Field
624 'File_Comment',
627 $_->ctx->{'File_Comment_Length'}
633 seek $fh, 0, 0;
634 $stream = CreateStreamReader(File => $fh);
636 &dissect($parser_local_file_header->parse($stream) , 'lfh');
637 &dissect($parser_central_directory_record->parse($stream), 'cdr');