Shebang line now uses /usr/bin/env perl
[zip-parser.git] / zip_parser
blobf146c95dc708f080111a4f717ff7b87ab117a569
1 #!/usr/bin/env perl
3 use strict;
4 use warnings;
5 use Data::ParseBinary;
6 use Cwd;
7 use File::Spec;
9 # *** Lacks the capability to parse Zip64 and encrypted .Zip files ***
11 # Check if a file has been passed as an argument or not
12 die "Usage: $0 zipfile\n" if @ARGV == 0;
14 # Slurp!
15 undef $/;
17 # Open the file and get filehandle
18 my $filename = shift;
19 open my $fh, '<', $filename
20 or die 'can not open $filename';
21 binmode $fh;
23 my (
24 $printable_local_file_header,
25 $printable_central_directory_record,
26 $printable_end_central_directory_record
27 ) = do "$0.conf";
29 sub printable {
30 my ( $var, $header ) = @_;
31 my @printable;
32 if ( $header eq 'lfh' ) {
33 @printable = @{$printable_local_file_header};
35 elsif ( $header eq 'cdr' ) {
36 @printable = @{$printable_central_directory_record};
38 elsif ( $header eq 'ecdr' ) {
39 @printable = @{$printable_end_central_directory_record};
41 map { return 1 if $var eq $_ } @printable;
43 return 0;
46 sub dissect {
47 my ( $data, $header ) = @_;
48 if ( $header eq 'lfh' ) {
49 print "\tLOCAL FILE HEADER\n", '-' x 50, "\n\n";
51 elsif ( $header eq 'cdr' ) {
52 print "\tCENTRAL DIRECTORY RECORD\n", '-' x 50, "\n\n";
54 elsif ( $header eq 'ecdr' ) {
55 print "\tEND CENTRAL DIRECTORY RECORD\n", '-' x 50, "\n\n";
57 if ( $header eq 'lfh' || $header eq 'cdr' ) {
58 &mod_file_date_time($data);
59 &extra_field($data);
60 &compression_method($data);
61 &general_purpose_bit_flag($data);
62 &version_needed_to_extract($data);
63 &crc32($data);
64 &external_file_attributes($data);
65 &data_descriptor($data);
67 else {
68 $data = [$data];
70 for (@$data) {
71 my %temp = %$_;
72 map {
73 if ( &printable( $_, $header ) )
76 # Have to clean up the following, or maybe ponder of a better method
77 if ( ref( $temp{$_} ) eq '' || ref( $temp{$_} ) eq 'SCALAR' ) {
78 printf "%s -> \n\t%s\n", $_, $temp{$_};
80 elsif ( ref( $temp{$_} ) eq 'ARRAY' ) {
81 printf "%s ->", $_;
82 map print("\n\t$_"), @{ $temp{$_} };
83 print "\n";
85 elsif ( ref( $temp{$_} ) eq 'HASH' ) {
86 printf "%s ->", $_;
87 my %hash = %{ $temp{$_} };
88 map {
89 if ( ref( $hash{$_} ) eq 'ARRAY' )
91 printf "\n%s ->", $_;
92 map print("\n\t$_"), @{ $hash{$_} };
94 else {
95 printf "\n%s -> %s", $_, $hash{$_};
97 } keys %hash;
98 print "\n";
101 } sort keys %temp;
102 print "\n\n";
106 sub data_descriptor {
107 my $data = shift;
108 for ( my $i = 0 ; $i < @$data ; $i++ ) {
109 if ( ref $data->[$i]{'General_Purpose_Bit_Flag'} eq 'ARRAY' ) {
110 ${ $data->[$i]{'Data_Descriptor'} } = {};
112 else {
113 ${ $data->[$i]{'Data_Descriptor'} } = '';
118 sub crc32 {
119 my $data = shift;
120 for ( my $i = 0 ; $i < @$data ; $i++ ) {
121 $data->[$i]{'CRC-32'} = unpack 'H*',
122 ( pack 'N', $data->[$i]{'CRC-32'} );
126 sub external_file_attributes {
127 my $data = shift;
128 for ( my $i = 0 ; $i < @$data ; $i++ ) {
129 $data->[$i]{'External_File_Attributes'} = unpack 'H*',
130 ( pack 'N', $data->[$i]{'External_File_Attributes'} );
134 sub compression_method {
135 my $data = shift;
136 for ( my $i = 0 ; $i < @$data ; $i++ ) {
137 exists $data->[$i]{'Compression_Method'}
138 or die " Compression_Method is not present\n";
139 my %compression_method = (
140 '0' => 'The file is stored (no compression)',
141 '1' => 'The file is Shrunk',
142 '2' => 'The file is Reduced with compression factor 1',
143 '3' => 'The file is Reduced with compression factor 2',
144 '4' => 'The file is Reduced with compression factor 3',
145 '5' => 'The file is Reduced with compression factor 4',
146 '6' => 'The file is Imploded',
147 '7' => 'Reserved for Tokenizing compression algorithm',
148 '8' => 'The file is Deflated',
149 '9' => 'Enhanced Deflating using Deflate64(tm)',
150 '10' => 'PKWARE Data Compression Library Imploding (old IBM TERSE)',
151 '11' => 'Reserved by PKWARE',
152 '12' => 'File is compressed using BZIP2 algorithm',
153 '13' => 'Reserved by PKWARE',
154 '14' => 'LZMA (EFS)',
155 '15' => 'Reserved by PKWARE',
156 '16' => 'Reserved by PKWARE',
157 '17' => 'Reserved by PKWARE',
158 '18' => 'File is compressed using IBM TERSE (new)',
159 '19' => 'IBM LZ77 z Architecture (PFS)',
160 '97' => 'WavPack compressed data',
161 '98' => 'PPMd version I, Rev 1',
163 $data->[$i]{'Compression_Method'} =
164 $compression_method{ $data->[$i]{'Compression_Method'} };
168 sub general_purpose_bit_flag {
169 my $data = shift;
170 for ( my $i = 0 ; $i < @$data ; $i++ ) {
171 exists $data->[$i]{'General_Purpose_Bit_Flag'}
172 or die " General_Purpose_Bit_Flag is not present\n";
173 my $bit0 = ${ $data->[$i]{'General_Purpose_Bit_Flag'} }{'Bit0'};
174 my $bit1 = ${ $data->[$i]{'General_Purpose_Bit_Flag'} }{'Bit1'};
175 my $bit2 = ${ $data->[$i]{'General_Purpose_Bit_Flag'} }{'Bit2'};
176 my $bit3 = ${ $data->[$i]{'General_Purpose_Bit_Flag'} }{'Bit3'};
177 my $bit4 = ${ $data->[$i]{'General_Purpose_Bit_Flag'} }{'Bit4'};
178 my $bit5 = ${ $data->[$i]{'General_Purpose_Bit_Flag'} }{'Bit5'};
179 my $bit6 = ${ $data->[$i]{'General_Purpose_Bit_Flag'} }{'Bit6'};
180 my $bit11 = ${ $data->[$i]{'General_Purpose_Bit_Flag'} }{'Bit11'};
181 my $bit12 = ${ $data->[$i]{'General_Purpose_Bit_Flag'} }{'Bit12'};
182 my $bit13 = ${ $data->[$i]{'General_Purpose_Bit_Flag'} }{'Bit13'};
183 my @general_purpose_bit_flag;
184 push @general_purpose_bit_flag, 'File is encrypted' if $bit0 == 1;
186 if ( $data->[$i]{'Compression_Method'} eq 'The file is Imploded' ) {
187 if ( $bit1 == 1 ) {
188 push @general_purpose_bit_flag, '8K sliding dictionary';
190 else {
191 push @general_purpose_bit_flag, '4K sliding dictionary';
193 if ( $bit2 == 1 ) {
194 push @general_purpose_bit_flag, '3 Shannon-Fano trees were used'
195 . ' to encode the sliding dictionary output';
197 else {
198 push @general_purpose_bit_flag, '2 Shannon-Fano trees were used'
199 . ' to encode the sliding dictionary output';
202 elsif ($data->[$i]{'Compression_Method'} eq 'The file is Deflated'
203 || $data->[$i]{'Compression_Method'} eq
204 'Enhanced Deflating using Deflate64(tm)' )
206 push @general_purpose_bit_flag,
207 'Normal (-en) compression option was used'
208 if $bit2 == 0 && $bit1 == 0;
209 push @general_purpose_bit_flag,
210 'Maximum (-exx/-ex) compression option was used'
211 if $bit2 == 0 && $bit1 == 1;
212 push @general_purpose_bit_flag,
213 'Fast (-ef) compression option was used'
214 if $bit2 == 1 && $bit1 == 0;
215 push @general_purpose_bit_flag,
216 'Super Fast (-es) compression option was used'
217 if $bit2 == 1 && $bit1 == 1;
219 elsif ( $data->[$i]{'Compression_Method'} eq 'LZMA (EFS)' ) {
220 if ( $bit1 == 1 ) {
221 push @general_purpose_bit_flag, 'End-of-stream (EOS) marker is '
222 . 'used to mark the end of the compressed data stream';
224 else {
225 push @general_purpose_bit_flag,
226 'End-of-stream (EOS) marker is '
227 . 'not present and the compressed data size must be known to '
228 . 'extract';
231 else {
232 if ( $bit3 == 1 ) {
233 $data->[$i]{'CRC-32'} = 0;
234 $data->[$i]{'Compressed_Size'} = 0;
235 $data->[$i]{'Uncompressed_Size'} = 0;
236 push @general_purpose_bit_flag, 'Data Descriptor contains '
237 . 'CRC-32, Compressed_Size and Uncompressed_Size';
239 if ( $bit4 == 1
240 && $data->[$i]{'Compression_Method'} eq 'The file is Deflated' )
242 push @general_purpose_bit_flag, 'Enhanced deflating';
244 elsif ($bit4 == 1
245 && $data->[$i]{'Compression_Method'} ne 'The file is Deflated' )
247 die ' Enhanced deflating cannot be done on a file that is not '
248 . 'deflated';
250 if ( $bit5 == 1 && $data->[$i]{'Version_Needed_To_Extract'} >= 27 )
252 push @general_purpose_bit_flag, 'Compressed patched data';
254 elsif ( $bit5 == 1 ) {
255 die ' Incompatible Version_Needed_To_Extract for patched '
256 . 'compressed data';
258 if ( $bit6 == 1
259 && $data->[$i]{'Version_Needed_To_Extract'} >= 50
260 && $bit0 == 1 )
262 push @general_purpose_bit_flag, 'Strong encryption';
264 elsif ($bit6 == 1
265 && $data->[$i]{'Version_Needed_To_Extract'} < 50
266 && $bit0 == 1 )
268 die ' Incompatible Version_Needed_To_Extract for strong '
269 . 'encryption';
271 elsif ( $bit6 == 1 && $bit0 == 0 ) {
272 die ' Non-encrypted file cannot be strong encrypted';
274 push @general_purpose_bit_flag,
275 'Filename and comment fields for '
276 . 'this file must be encoded using UTF-8'
277 if $bit11 == 1;
278 push @general_purpose_bit_flag, 'Enhanced compression'
279 if $bit12 == 1;
280 push @general_purpose_bit_flag,
281 'Selected data values in the Local Header are masked'
282 if $bit13 == 1;
284 $data->[$i]{'General_Purpose_Bit_Flag'} = [@general_purpose_bit_flag];
288 sub version_needed_to_extract {
289 my $data = shift;
290 my %version_mappings = (
291 '10' => 'Default value',
292 '11' => 'File is a volume label',
293 '20' => "File is a folder (directory)"
294 . "\n\tFile is compressed using Deflate compression"
295 . "\n\tFile is encrypted using traditional PKWARE encryption",
296 '21' => 'File is compressed using Deflate64(tm)',
297 '25' => 'File is compressed using PKWARE DCL Implode ',
298 '27' => 'File is a patch data set ',
299 '45' => 'File uses ZIP64 format extensions',
300 '46' => 'File is compressed using BZIP2 compression*',
301 '50' => "File is encrypted using DES"
302 . "\n\tFile is encrypted using 3DES"
303 . "\n\tFile is encrypted using original RC2 encryption"
304 . "\n\tFile is encrypted using RC4 encryption",
305 '51' => "File is encrypted using AES encryption"
306 . "\n\tFile is encrypted using corrected RC2 encryption",
307 '52' => 'File is encrypted using corrected RC2-64 encryption',
308 '61' => 'File is encrypted using non-OAEP key wrapping',
309 '62' => 'Central directory encryption',
310 '63' => "File is compressed using LZMA"
311 . "\n\tFile is compressed using PPMd"
312 . "\n\tFile is encrypted using Blowfish"
313 . "\n\tFile is encrypted using Twofish",
315 for ( my $i = 0 ; $i < @$data ; $i++ ) {
316 exists $data->[$i]{'Version_Needed_To_Extract'}
317 or die " Version_Needed_To_Extract is not present\n";
318 exists $version_mappings{ $data->[$i]{'Version_Needed_To_Extract'} }
319 or die " Version_Needed_To_Extract has an illegal value\n";
320 $data->[$i]{'Version_Needed_To_Extract'} =
321 $version_mappings{ $data->[$i]{'Version_Needed_To_Extract'} };
325 sub extra_field {
326 my $data = shift;
327 my ( @header, @data );
328 my %header_mappings = (
329 '0001' => 'Zip64 extended information extra field',
330 '0007' => 'AV Info',
331 '0008' => 'Reserved for extended language encoding data (PFS)',
332 '0009' => 'OS/2',
333 '000a' => 'NTFS ',
334 '000c' => 'OpenVMS',
335 '000d' => 'UNIX',
336 '000e' => 'Reserved for file stream and fork descriptors',
337 '000f' => 'Patch Descriptor',
338 '0014' => 'PKCS#7 Store for X.509 Certificates',
339 '0015' => 'X.509 Certificate ID and Signature for individual file',
340 '0016' => 'X.509 Certificate ID for Central Directory',
341 '0017' => 'Strong Encryption Header',
342 '0018' => 'Record Management Controls',
343 '0019' => 'PKCS#7 Encryption Recipient Certificate List',
344 '0065' => 'IBM S/390 (Z390), AS/400 (I400) attributes - uncompressed',
345 '0066' => 'Reserved for IBM S/390 (Z390), AS/400 (I400) attributes - '
346 . 'compressed',
347 '4690' => 'POSZIP 4690 (reserved) ',
348 '07c8' => 'Macintosh',
349 '2605' => 'ZipIt Macintosh',
350 '2705' => 'ZipIt Macintosh 1.3.5+',
351 '2805' => 'ZipIt Macintosh 1.3.5+',
352 '334d' => 'Info-ZIP Macintosh',
353 '4341' => 'Acorn/SparkFS ',
354 '4453' => 'Windows NT security descriptor (binary ACL)',
355 '4704' => 'VM/CMS',
356 '470f' => 'MVS',
357 '4b46' => 'FWKCS MD5 (see below)',
358 '4c41' => 'OS/2 access control list (text ACL)',
359 '4d49' => 'Info-ZIP OpenVMS',
360 '4f4c' => 'Xceed original location extra field',
361 '5356' => 'AOS/VS (ACL)',
362 '5455' => 'extended timestamp',
363 '554e' => 'Xceed unicode extra field',
364 '5855' => 'Info-ZIP UNIX (original, also OS/2, NT, etc)',
365 '6375' => 'Info-ZIP Unicode Comment Extra Field',
366 '6542' => 'BeOS/BeBox',
367 '7075' => 'Info-ZIP Unicode Path Extra Field',
368 '756e' => 'ASi UNIX',
369 '7855' => 'Info-ZIP UNIX (new)',
370 'a220' => 'Microsoft Open Packaging Growth Hint',
371 'fd4a' => 'SMS/QDOS',
373 for ( my $i = 0 ; $i < @$data ; $i++ ) {
374 if ( exists $data->[$i]{'Extra_Field'} ) {
375 for ( my $j = 0 ; $j < length $data->[$i]{'Extra_Field'} ; $j += 4 )
377 my $header =
378 unpack( "H4",
379 pack( 'Z*', substr( $data->[$i]{'Extra_Field'}, $j, 2 ) ) );
380 $header = substr( $header, 2, 2 ) . substr( $header, 0, 2 );
381 $header = $header_mappings{$header}
382 if exists $header_mappings{$header};
383 push @header, $header;
384 my $data = unpack(
385 "H4",
386 pack( 'Z*',
387 substr( $data->[$i]{'Extra_Field'}, $j + 2, 2 ) )
389 push @data, substr( $data, 2, 2 ) . substr( $data, 0, 2 );
391 $data->[$i]{'Extra_Field'} = {
392 Header => \@header,
393 Data => \@data,
399 sub mod_file_date_time {
400 my $data = shift;
401 for ( 0 .. @$data - 1 ) {
403 # Convert Last Mod File Time to Hour, Minute and Second
404 $data->[$_]{'Last_Mod_File_Time'} =
405 pack( 'n', $data->[$_]{'Last_Mod_File_Time'} );
406 my $convert = BitStruct(
407 'Last_Mod_File_Time',
408 BitField( 'Hour', 5 ),
409 BitField( 'Minute', 6 ),
410 BitField( 'Second', 5 ),
412 $data->[$_]{'Last_Mod_File_Time'} = $convert->parse(
413 CreateStreamReader( $data->[$_]{'Last_Mod_File_Time'} ) );
415 # Convert Last Mod File Date to Year, Month and Day
416 $data->[$_]{'Last_Mod_File_Date'} =
417 pack( 'n', $data->[$_]{'Last_Mod_File_Date'} );
418 $convert = BitStruct(
419 'Last Mod File Date',
420 BitField( 'Year', 7 ),
421 BitField( 'Month', 4 ),
422 BitField( 'Day', 5 ),
424 $data->[$_]{'Last_Mod_File_Date'} = $convert->parse(
425 CreateStreamReader( $data->[$_]{'Last_Mod_File_Date'} ) );
426 $data->[$_]{'Last_Mod_File_Date'}{'Year'} =
427 $data->[$_]{'Last_Mod_File_Date'}{'Year'} + 1980;
431 # Zip is little endian
432 my $parser_end_central_directory_record = Struct(
433 'zip',
434 Const( Bytes( 'End_Of_Central_Dir_Signature', 4 ), "\x50\x4B\x05\x06" ),
435 ULInt16('Number_Of_This_Disk'),
436 ULInt16('Number_Of_The_Disk_With_The_Start_Of_The_Central_Directory'),
437 ULInt16('Total_Number_Of_Entries_In_The_Central_Directory_On_This_Disk'),
438 ULInt16('Total_Number_Of_Entries_In_The_Central_Directory'),
439 ULInt32('Size_Of_The_Central_Directory'),
440 ULInt32(
441 'Offset_Of_Start_Of_Central_Directory_With_Respect_To_The_Starting_Disk_Number'
443 ULInt16('.ZIP_File_Comment_Length'),
444 Field(
445 '.ZIP_File_Comment',
446 sub {
447 $_->ctx->{'.ZIP_File_Comment_Length'};
452 seek $fh, -22, 2;
454 my $number_of_files;
455 my $stream = CreateStreamReader( File => $fh );
456 my $pecdr = $parser_end_central_directory_record->parse($stream);
458 &dissect( $pecdr, 'ecdr' );
459 $number_of_files = $pecdr->{'Total_Number_Of_Entries_In_The_Central_Directory'};
461 my $parser_local_file_header = Array(
462 $number_of_files,
463 Struct(
464 'zip',
465 Const( Bytes( 'Local_File_Header_Signature', 4 ), "\x50\x4B\x03\x04" ),
466 Pointer(
467 sub {
470 Bytes( "\x50\x4B\x03\x04", 4 )
472 ULInt16('Version_Needed_To_Extract'),
473 BitStruct(
474 'General_Purpose_Bit_Flag',
475 Padding(1),
476 Flag('Bit6'),
477 Flag('Bit5'),
478 Flag('Bit4'),
479 Flag('Bit3'),
480 Flag('Bit2'),
481 Flag('Bit1'),
482 Flag('Bit0'),
483 Padding(2),
484 Flag('Bit13'),
485 Flag('Bit12'),
486 Flag('Bit11'),
487 Padding(3),
489 ULInt16('Compression_Method'),
490 ULInt16('Last_Mod_File_Time'),
491 ULInt16('Last_Mod_File_Date'),
492 ULInt32('CRC-32'),
493 ULInt32('Compressed_Size'),
494 ULInt32('Uncompressed_Size'),
495 ULInt16('Filename_Length'),
496 ULInt16('Extra_Field_Length'),
497 String(
498 'Filename',
499 sub {
500 $_->ctx->{'Filename_Length'};
503 Field(
504 'Extra_Field',
505 sub {
506 $_->ctx->{'Extra_Field_Length'};
509 Anchor('Position'),
510 Field(
511 'Compressed_Data',
512 sub {
513 $_->ctx->{'Compressed_Size'};
519 my $parser_central_directory_record = Array(
520 $number_of_files,
521 Struct(
522 'zip',
523 Const(
524 Bytes( 'Central_File_Header_Signature', 4 ), "\x50\x4B\x01\x02"
526 Struct(
527 'Version_Made_By', ULInt8('Specification'),
528 ULInt8('Compatibility'),
530 ULInt16('Version_Needed_To_Extract'),
531 BitStruct(
532 'General_Purpose_Bit_Flag', Padding(1),
533 Flag('Bit6'), Flag('Bit5'),
534 Flag('Bit4'), Flag('Bit3'),
535 Flag('Bit2'), Flag('Bit1'),
536 Flag('Bit0'), Padding(2),
537 Flag('Bit13'), Flag('Bit12'),
538 Flag('Bit11'), Padding(3),
540 ULInt16('Compression_Method'),
541 ULInt16('Last_Mod_File_Time'),
542 ULInt16('Last_Mod_File_Date'),
543 ULInt32('CRC-32'),
544 ULInt32('Compressed_Size'),
545 ULInt32('Uncompressed_Size'),
546 ULInt16('Filename_Length'),
547 ULInt16('Extra_Field_Length'),
548 ULInt16('File_Comment_Length'),
549 ULInt16('Disk_Number_Start'),
550 ULInt16('Internal_File_Attributes'),
551 Anchor('Position'),
552 ULInt32('External_File_Attributes'),
553 ULInt32('Relative_Offset_Of_Local_Header'),
554 String(
555 'Filename',
556 sub {
557 $_->ctx->{'Filename_Length'};
560 Field(
561 'Extra_Field',
562 sub {
563 $_->ctx->{'Extra_Field_Length'};
566 Field(
567 'File_Comment',
568 sub {
569 $_->ctx->{'File_Comment_Length'};
575 seek $fh, 0, 0;
576 $stream = CreateStreamReader( File => $fh );
578 &dissect( $parser_local_file_header->parse($stream), 'lfh' );
579 &dissect( $parser_central_directory_record->parse($stream), 'cdr' );