Locates the configuration file even when the script is run from elsewhere
[zip-parser.git] / zip_parser
blob82d65d4d3e19da61dd2ada3f7413977e3b643d1e
1 #!/usr/bin/perl
3 use strict;
4 use warnings;
5 use Data::ParseBinary;
6 use Data::Dumper;
7 use Cwd;
8 use File::Spec;
10 # *** Lacks the capability to parse Zip64 and encrypted .Zip files ***
12 # Check if a file has been passed as an argument or not
13 die "Usage: $0 zipfile\n" if @ARGV == 0;
15 # Slurp!
16 undef $/;
18 # Open the file and get filehandle
19 my $filename = shift;
20 open my $fh, '<', $filename
21 or die 'can not open $filename';
22 binmode $fh;
24 my ($printable_local_file_header, $printable_central_directory_record,
25 $printable_end_central_directory_record) =
26 do "$0.conf";
28 sub printable
29 {#{{{
30 my ($var, $header) = @_;
31 my @printable;
32 if ($header eq 'lfh' )
34 @printable = @{ $printable_local_file_header }
36 elsif ($header eq 'cdr' )
38 @printable = @{ $printable_central_directory_record }
40 elsif ($header eq 'ecdr')
42 @printable = @{ $printable_end_central_directory_record }
44 map
46 return 1 if $var eq $_
47 } @printable;
49 return 0;
50 }#}}}
52 sub dissect
53 {#{{{
54 my ($data, $header) = @_;
55 if ($header eq 'lfh' )
57 print "\tLOCAL FILE HEADER\n", '-' x 50, "\n\n"
59 elsif ($header eq 'cdr' )
61 print "\tCENTRAL DIRECTORY RECORD\n" , '-' x 50, "\n\n"
63 elsif ($header eq 'ecdr')
65 print "\tEND CENTRAL DIRECTORY RECORD\n", '-' x 50, "\n\n"
67 if ($header eq 'lfh' || $header eq 'cdr')
69 &mod_file_date_time ($data);
70 &extra_field ($data);
71 &compression_method ($data);
72 &general_purpose_bit_flag ($data);
73 &version_needed_to_extract($data);
74 &crc32 ($data);
75 &data_descriptor ($data);
77 else
79 $data = [$data]
81 for( @$data )
83 my %temp = %$_;
84 map
86 if (&printable($_, $header))
88 # Have to clean up the following, or maybe ponder of a better method
89 if (ref($temp{$_}) eq '' || ref($temp{$_}) eq 'SCALAR')
91 printf "%s -> \n\t%s\n", $_, $temp{$_}
93 elsif (ref($temp{$_}) eq 'ARRAY')
95 printf "%s ->", $_;
96 map print("\n\t$_"), @{ $temp{$_} };
97 print "\n";
99 elsif (ref($temp{$_}) eq 'HASH')
101 printf "%s ->", $_;
102 my %hash = %{ $temp{$_} };
105 if (ref($hash{$_}) eq 'ARRAY')
107 printf "\n%s ->", $_;
108 map print("\n\t$_"), @{ $hash{$_} };
110 else
112 printf "\n%s -> %s", $_, $hash{$_}
114 } keys %hash;
115 print "\n";
118 } sort keys %temp;
119 print "\n\n";
121 }#}}}
123 sub data_descriptor
124 {#{{{
125 my $data = shift;
126 for (my $i = 0; $i < @$data; $i++)
128 if (ref $data->[$i]{'General_Purpose_Bit_Flag'} eq 'ARRAY')
130 ${$data->[$i]{'Data_Descriptor'}} = {}
132 else
134 ${$data->[$i]{'Data_Descriptor'}} = ''
137 }#}}}
139 sub crc32
140 {#{{{
141 my $data = shift;
142 for (my $i = 0; $i < @$data; $i++)
144 $data->[$i]{'CRC-32'} = unpack 'H*', (pack 'N', $data->[$i]{'CRC-32'});
146 }#}}}
148 sub compression_method
149 {#{{{
150 my $data = shift;
151 for (my $i = 0; $i < @$data; $i++)
153 exists $data->[$i]{'Compression_Method'}
154 or die " Compression_Method is not present\n";
155 my %compression_method =
157 '0' => 'The file is stored (no compression)',
158 '1' => 'The file is Shrunk',
159 '2' => 'The file is Reduced with compression factor 1',
160 '3' => 'The file is Reduced with compression factor 2',
161 '4' => 'The file is Reduced with compression factor 3',
162 '5' => 'The file is Reduced with compression factor 4',
163 '6' => 'The file is Imploded',
164 '7' => 'Reserved for Tokenizing compression algorithm',
165 '8' => 'The file is Deflated',
166 '9' => 'Enhanced Deflating using Deflate64(tm)',
167 '10' => 'PKWARE Data Compression Library Imploding (old IBM TERSE)',
168 '11' => 'Reserved by PKWARE',
169 '12' => 'File is compressed using BZIP2 algorithm',
170 '13' => 'Reserved by PKWARE',
171 '14' => 'LZMA (EFS)',
172 '15' => 'Reserved by PKWARE',
173 '16' => 'Reserved by PKWARE',
174 '17' => 'Reserved by PKWARE',
175 '18' => 'File is compressed using IBM TERSE (new)',
176 '19' => 'IBM LZ77 z Architecture (PFS)',
177 '97' => 'WavPack compressed data',
178 '98' => 'PPMd version I, Rev 1',
180 $data->[$i]{'Compression_Method'} = $compression_method{ $data->[$i]{'Compression_Method'} };
182 }#}}}
184 sub general_purpose_bit_flag
185 {#{{{
186 my $data = shift;
187 for (my $i = 0; $i < @$data; $i++)
189 exists $data->[$i]{'General_Purpose_Bit_Flag'}
190 or die " General_Purpose_Bit_Flag is not present\n";
191 my $bit0 = ${ $data->[$i]{'General_Purpose_Bit_Flag'} }{'Bit0' };
192 my $bit1 = ${ $data->[$i]{'General_Purpose_Bit_Flag'} }{'Bit1' };
193 my $bit2 = ${ $data->[$i]{'General_Purpose_Bit_Flag'} }{'Bit2' };
194 my $bit3 = ${ $data->[$i]{'General_Purpose_Bit_Flag'} }{'Bit3' };
195 my $bit4 = ${ $data->[$i]{'General_Purpose_Bit_Flag'} }{'Bit4' };
196 my $bit5 = ${ $data->[$i]{'General_Purpose_Bit_Flag'} }{'Bit5' };
197 my $bit6 = ${ $data->[$i]{'General_Purpose_Bit_Flag'} }{'Bit6' };
198 my $bit11 = ${ $data->[$i]{'General_Purpose_Bit_Flag'} }{'Bit11'};
199 my $bit12 = ${ $data->[$i]{'General_Purpose_Bit_Flag'} }{'Bit12'};
200 my $bit13 = ${ $data->[$i]{'General_Purpose_Bit_Flag'} }{'Bit13'};
201 my @general_purpose_bit_flag;
202 push @general_purpose_bit_flag, 'File is encrypted' if $bit0 == 1;
203 if ($data->[$i]{'Compression_Method'} eq 'The file is Imploded')
205 if ($bit1 == 1)
207 push @general_purpose_bit_flag, '8K sliding dictionary'
209 else
211 push @general_purpose_bit_flag, '4K sliding dictionary'
213 if ($bit2 == 1)
215 push @general_purpose_bit_flag, '3 Shannon-Fano trees were used to encode the sliding dictionary output'
217 else
219 push @general_purpose_bit_flag, '2 Shannon-Fano trees were used to encode the sliding dictionary output'
222 elsif ($data->[$i]{'Compression_Method'} eq 'The file is Deflated' ||
223 $data->[$i]{'Compression_Method'} eq 'Enhanced Deflating using Deflate64(tm)')
225 push @general_purpose_bit_flag, 'Normal (-en) compression option was used' if $bit2 == 0 && $bit1 == 0;
226 push @general_purpose_bit_flag, 'Maximum (-exx/-ex) compression option was used' if $bit2 == 0 && $bit1 == 1;
227 push @general_purpose_bit_flag, 'Fast (-ef) compression option was used' if $bit2 == 1 && $bit1 == 0;
228 push @general_purpose_bit_flag, 'Super Fast (-es) compression option was used' if $bit2 == 1 && $bit1 == 1;
229 } elsif ($data->[$i]{'Compression_Method'} eq 'LZMA (EFS)')
231 if ($bit1 == 1)
233 push @general_purpose_bit_flag, 'End-of-stream (EOS) marker is used to mark the end of the compressed data stream'
235 else
237 push @general_purpose_bit_flag, 'End-of-stream (EOS) marker is not present and the compressed data size must be known to extract'
240 else
242 if ($bit3 == 1)
244 $data->[$i]{'CRC-32' } = 0;
245 $data->[$i]{'Compressed_Size' } = 0;
246 $data->[$i]{'Uncompressed_Size'} = 0;
247 push @general_purpose_bit_flag, 'Data Descriptor contains CRC-32, Compressed_Size and Uncompressed_Size';
249 if ($bit4 == 1 && $data->[$i]{'Compression_Method'} eq 'The file is Deflated')
251 push @general_purpose_bit_flag, 'Enhanced deflating'
253 elsif ($bit4 == 1 && $data->[$i]{'Compression_Method'} ne 'The file is Deflated')
255 die ' Enhanced deflating cannot be done on a file that is not deflated'
257 if ($bit5 == 1 && $data->[$i]{'Version_Needed_To_Extract'} >= 27)
259 push @general_purpose_bit_flag, 'Compressed patched data'
261 elsif ($bit5 == 1)
263 die ' Incompatible Version_Needed_To_Extract for patched compressed data'
265 if ($bit6 == 1 && $data->[$i]{'Version_Needed_To_Extract'} >= 50 && $bit0 == 1)
267 push @general_purpose_bit_flag, 'Strong encryption'
269 elsif ($bit6 == 1 && $data->[$i]{'Version_Needed_To_Extract'} < 50 && $bit0 == 1)
271 die ' Incompatible Version_Needed_To_Extract for strong encryption'
273 elsif ($bit6 == 1 && $bit0 == 0)
275 die ' Non-encrypted file cannot be strong encrypted'
277 push @general_purpose_bit_flag, 'Filename and comment fields for this file must be encoded using UTF-8' if $bit11 == 1;
278 push @general_purpose_bit_flag, 'Enhanced compression' if $bit12 == 1;
279 push @general_purpose_bit_flag, 'Selected data values in the Local Header are masked' if $bit13 == 1;
281 $data->[$i]{'General_Purpose_Bit_Flag'} = [ @general_purpose_bit_flag ];
283 }#}}}
285 sub version_needed_to_extract
286 {#{{{
287 my $data = shift;
288 my %version_mappings =
290 '10' => 'Default value',
291 '11' => 'File is a volume label',
292 '20' => "File is a folder (directory)" .
293 "\n\tFile is compressed using Deflate compression" .
294 "\n\tFile is encrypted using traditional PKWARE encryption",
295 '21' => 'File is compressed using Deflate64(tm)',
296 '25' => 'File is compressed using PKWARE DCL Implode ',
297 '27' => 'File is a patch data set ',
298 '45' => 'File uses ZIP64 format extensions',
299 '46' => 'File is compressed using BZIP2 compression*',
300 '50' => "File is encrypted using DES" .
301 "\n\tFile is encrypted using 3DES" .
302 "\n\tFile is encrypted using original RC2 encryption" .
303 "\n\tFile is encrypted using RC4 encryption",
304 '51' => "File is encrypted using AES encryption" .
305 "\n\tFile is encrypted using corrected RC2 encryption",
306 '52' => 'File is encrypted using corrected RC2-64 encryption',
307 '61' => 'File is encrypted using non-OAEP key wrapping',
308 '62' => 'Central directory encryption',
309 '63' => "File is compressed using LZMA" .
310 "\n\tFile is compressed using PPMd" .
311 "\n\tFile is encrypted using Blowfish" .
312 "\n\tFile is encrypted using Twofish",
314 for (my $i = 0; $i < @$data; $i++)
316 exists $data->[$i]{'Version_Needed_To_Extract'}
317 or die " Version_Needed_To_Extract is not present\n";
318 exists $version_mappings{ $data->[$i]{'Version_Needed_To_Extract'} }
319 or die " Version_Needed_To_Extract has an illegal value\n";
320 $data->[$i]{'Version_Needed_To_Extract'} = $version_mappings{ $data->[$i]{'Version_Needed_To_Extract'} };
323 #}}}
325 sub extra_field
326 {#{{{
327 my $data = shift;
328 my (@header, @data);
329 my %header_mappings =
331 '0001' => 'Zip64 extended information extra field',
332 '0007' => 'AV Info',
333 '0008' => 'Reserved for extended language encoding data (PFS)',
334 '0009' => 'OS/2',
335 '000a' => 'NTFS ',
336 '000c' => 'OpenVMS',
337 '000d' => 'UNIX',
338 '000e' => 'Reserved for file stream and fork descriptors',
339 '000f' => 'Patch Descriptor',
340 '0014' => 'PKCS#7 Store for X.509 Certificates',
341 '0015' => 'X.509 Certificate ID and Signature for individual file',
342 '0016' => 'X.509 Certificate ID for Central Directory',
343 '0017' => 'Strong Encryption Header',
344 '0018' => 'Record Management Controls',
345 '0019' => 'PKCS#7 Encryption Recipient Certificate List',
346 '0065' => 'IBM S/390 (Z390), AS/400 (I400) attributes - uncompressed',
347 '0066' => 'Reserved for IBM S/390 (Z390), AS/400 (I400) attributes - compressed',
348 '4690' => 'POSZIP 4690 (reserved) ',
349 '07c8' => 'Macintosh',
350 '2605' => 'ZipIt Macintosh',
351 '2705' => 'ZipIt Macintosh 1.3.5+',
352 '2805' => 'ZipIt Macintosh 1.3.5+',
353 '334d' => 'Info-ZIP Macintosh',
354 '4341' => 'Acorn/SparkFS ',
355 '4453' => 'Windows NT security descriptor (binary ACL)',
356 '4704' => 'VM/CMS',
357 '470f' => 'MVS',
358 '4b46' => 'FWKCS MD5 (see below)',
359 '4c41' => 'OS/2 access control list (text ACL)',
360 '4d49' => 'Info-ZIP OpenVMS',
361 '4f4c' => 'Xceed original location extra field',
362 '5356' => 'AOS/VS (ACL)',
363 '5455' => 'extended timestamp',
364 '554e' => 'Xceed unicode extra field',
365 '5855' => 'Info-ZIP UNIX (original, also OS/2, NT, etc)',
366 '6375' => 'Info-ZIP Unicode Comment Extra Field',
367 '6542' => 'BeOS/BeBox',
368 '7075' => 'Info-ZIP Unicode Path Extra Field',
369 '756e' => 'ASi UNIX',
370 '7855' => 'Info-ZIP UNIX (new)',
371 'a220' => 'Microsoft Open Packaging Growth Hint',
372 'fd4a' => 'SMS/QDOS',
374 for (my $i = 0; $i < @$data; $i++)
376 if (exists $data->[$i]{'Extra_Field'})
378 for (my $j = 0; $j < length $data->[$i]{'Extra_Field'}; $j += 4)
380 my $header = unpack("H4", pack('Z*', substr($data->[$i]{'Extra_Field'}, $j, 2)));
381 $header = substr($header, 2, 2) . substr($header, 0, 2);
382 $header = $header_mappings{$header} if exists $header_mappings{$header};
383 push @header, $header;
384 my $data = unpack("H4", pack('Z*', substr($data->[$i]{'Extra_Field'}, $j + 2, 2)));
385 push @data, substr($data, 2, 2) . substr($data, 0, 2);
387 $data->[$i]{'Extra_Field'} =
389 Header => \@header,
390 Data => \@data,
394 }#}}}
396 sub mod_file_date_time
397 {#{{{
398 my $data = shift;
399 for (0 .. @$data - 1)
401 # Convert Last Mod File Time to Hour, Minute and Second
402 $data->[$_]{'Last_Mod_File_Time'} = pack('n', $data->[$_]{'Last_Mod_File_Time'});
403 my $convert =
404 BitStruct
406 'Last_Mod_File_Time',
407 BitField('Hour' , 5),
408 BitField('Minute', 6),
409 BitField('Second', 5),
411 $data->[$_]{'Last_Mod_File_Time'} = $convert->parse(CreateStreamReader($data->[$_]{'Last_Mod_File_Time'}));
412 # Convert Last Mod File Date to Year, Month and Day
413 $data->[$_]{'Last_Mod_File_Date'} = pack('n', $data->[$_]{'Last_Mod_File_Date'});
414 $convert =
415 BitStruct
417 'Last Mod File Date',
418 BitField('Year' , 7),
419 BitField('Month', 4),
420 BitField('Day' , 5),
422 $data->[$_]{'Last_Mod_File_Date'} = $convert->parse(CreateStreamReader($data->[$_]{'Last_Mod_File_Date'}));
423 $data->[$_]{'Last_Mod_File_Date'}{'Year'} = $data->[$_]{'Last_Mod_File_Date'}{'Year'} + 1980;
425 }#}}}
427 # Zip is little endian
428 my $parser_end_central_directory_record =
429 Struct
431 'zip',
432 Const
434 Bytes('End_Of_Central_Dir_Signature', 4), "\x50\x4B\x05\x06"
436 Anchor('Position'),
437 ULInt16('Number_Of_This_Disk' ),
438 ULInt16('Number_Of_The_Disk_With_The_Start_Of_The_Central_Directory' ),
439 ULInt16('Total_Number_Of_Entries_In_The_Central_Directory_On_This_Disk' ),
440 ULInt16('Total_Number_Of_Entries_In_The_Central_Directory' ),
441 ULInt32('Size_Of_The_Central_Directory' ),
442 ULInt32('Offset_Of_Start_Of_Central_Directory_With_Respect_To_The_Starting_Disk_Number'),
443 ULInt16('.ZIP_File_Comment_Length' ),
444 Field
446 '.ZIP_File_Comment',
449 $_->ctx->{'.ZIP_File_Comment_Length'}
454 seek $fh, -22, 2;
456 my $number_of_files;
457 my $stream = CreateStreamReader(File => $fh);
458 my $pecdr = $parser_end_central_directory_record->parse($stream);
460 &dissect($pecdr, 'ecdr');
461 $number_of_files = $pecdr->{'Total_Number_Of_Entries_In_The_Central_Directory'};
463 my $parser_local_file_header =
464 Array
466 $number_of_files,
467 Struct
469 'zip',
470 Const
472 Bytes
474 'Local_File_Header_Signature', 4
476 "\x50\x4B\x03\x04"
478 Pointer
484 Bytes
486 "\x50\x4B\x03\x04", 4
489 ULInt16('Version_Needed_To_Extract'),
490 BitStruct
492 'General_Purpose_Bit_Flag',
493 Padding(1 ),
494 Flag('Bit6' ),
495 Flag('Bit5' ),
496 Flag('Bit4' ),
497 Flag('Bit3' ),
498 Flag('Bit2' ),
499 Flag('Bit1' ),
500 Flag('Bit0' ),
501 Padding(2 ),
502 Flag('Bit13'),
503 Flag('Bit12'),
504 Flag('Bit11'),
505 Padding(3 ),
507 ULInt16('Compression_Method'),
508 ULInt16('Last_Mod_File_Time'),
509 ULInt16('Last_Mod_File_Date'),
510 ULInt32('CRC-32' ),
511 ULInt32('Compressed_Size' ),
512 ULInt32('Uncompressed_Size' ),
513 ULInt16('Filename_Length' ),
514 ULInt16('Extra_Field_Length'),
515 String
517 'Filename',
520 $_->ctx->{'Filename_Length' }
523 Field
525 'Extra_Field',
528 $_->ctx->{'Extra_Field_Length'}
531 Field
533 'Compressed_Data',
536 $_->ctx->{'Compressed_Size'}
543 $_->ctx->{'General_Purpose_Bit_Flag'}->{'Bit3'}
545 Struct
547 'Data_Descriptor',
548 ULInt32('CRC-32' ),
549 ULInt32('Compressed_Size' ),
550 ULInt32('Uncompressed_Size'),
553 Anchor('Position'),
557 my $parser_central_directory_record =
558 Array
560 $number_of_files,
561 Struct
563 'zip',
564 Const
566 Bytes
568 'Central_File_Header_Signature',
571 "\x50\x4B\x01\x02"
573 Struct
575 'Version_Made_By',
576 ULInt8('Specification'),
577 ULInt8('Compatibility'),
579 ULInt16('Version_Needed_To_Extract'),
580 BitStruct
582 'General_Purpose_Bit_Flag',
583 Padding(1 ),
584 Flag('Bit6' ),
585 Flag('Bit5' ),
586 Flag('Bit4' ),
587 Flag('Bit3' ),
588 Flag('Bit2' ),
589 Flag('Bit1' ),
590 Flag('Bit0' ),
591 Padding(2 ),
592 Flag('Bit13'),
593 Flag('Bit12'),
594 Flag('Bit11'),
595 Padding(3 ),
597 ULInt16('Compression_Method' ),
598 ULInt16('Last_Mod_File_Time' ),
599 ULInt16('Last_Mod_File_Date' ),
600 ULInt32('CRC-32' ),
601 ULInt32('Compressed_Size' ),
602 ULInt32('Uncompressed_Size' ),
603 ULInt16('Filename_Length' ),
604 ULInt16('Extra_Field_Length' ),
605 ULInt16('File_Comment_Length' ),
606 ULInt16('Disk_Number_Start' ),
607 ULInt16('Internal_File_Attributes' ),
608 ULInt32('External_File_Attributes' ),
609 ULInt32('Relative_Offset_Of_Local_Header'),
610 String
612 'Filename',
615 $_->ctx->{'Filename_Length'}
618 Field
620 'Extra_Field',
623 $_->ctx->{'Extra_Field_Length'}
626 Field
628 'File_Comment',
631 $_->ctx->{'File_Comment_Length'}
637 seek $fh, 0, 0;
638 $stream = CreateStreamReader(File => $fh);
640 &dissect($parser_local_file_header->parse($stream) , 'lfh');
641 &dissect($parser_central_directory_record->parse($stream), 'cdr');