1 // This file is part of Deark.
2 // Copyright (C) 2016 Jason Summers
3 // See the file COPYING for terms of use.
7 #include <deark-config.h>
8 #include <deark-private.h>
9 #include <deark-fmtutil.h>
11 DE_DECLARE_MODULE(de_module_zip
);
13 struct localctx_struct
;
14 typedef struct localctx_struct lctx
;
16 #define CODE_PK12 0x02014b50U
17 #define CODE_PK34 0x04034b50U
18 static const u8 g_zipsig34
[4] = {'P', 'K', 0x03, 0x04};
19 static const u8 g_zipsig56
[4] = {'P', 'K', 0x05, 0x06};
20 static const u8 g_zipsig66
[4] = {'P', 'K', 0x06, 0x06};
21 static const u8 g_zipsig67
[4] = {'P', 'K', 0x06, 0x07};
23 struct compression_params
{
24 // ZIP-specific params (not in de_dfilter_*_params) that may be needed to
25 // to decompress something.
27 unsigned int bit_flags
;
30 typedef void (*decompressor_fn
)(deark
*c
, lctx
*d
, struct compression_params
*cparams
,
31 struct de_dfilter_in_params
*dcmpri
, struct de_dfilter_out_params
*dcmpro
,
32 struct de_dfilter_results
*dres
);
34 struct cmpr_meth_info
{
38 decompressor_fn decompressor
;
41 struct dir_entry_data
{
42 unsigned int ver_needed
;
43 unsigned int ver_needed_hi
, ver_needed_lo
;
44 i64 cmpr_size
, uncmpr_size
;
46 const struct cmpr_meth_info
*cmi
;
47 unsigned int bit_flags
;
54 struct timestamp_data
{
55 struct de_timestamp ts
; // The best timestamp of this type found so far
60 unsigned int ver_made_by
;
61 unsigned int ver_made_by_hi
, ver_made_by_lo
;
62 unsigned int attr_i
, attr_e
;
63 i64 offset_of_local_header
;
64 i64 disk_number_start
;
70 struct timestamp_data tsdata
[DE_TIMESTAMPIDX_COUNT
];
72 struct dir_entry_data central_dir_entry_data
;
73 struct dir_entry_data local_dir_entry_data
;
75 i64 cmpr_size
, uncmpr_size
;
79 struct extra_item_type_info_struct
;
81 struct extra_item_info_struct
{
85 const struct extra_item_type_info_struct
*eiti
;
86 struct member_data
*md
;
87 struct dir_entry_data
*dd
;
91 struct localctx_struct
{
92 de_encoding default_enc_for_filenames
;
93 de_encoding default_enc_for_comments
;
94 i64 end_of_central_dir_pos
;
95 i64 central_dir_num_entries
;
96 i64 central_dir_byte_size
;
97 i64 central_dir_offset
;
101 i64 zip64_num_centr_dir_entries_this_disk
;
102 i64 zip64_num_centr_dir_entries_total
;
103 i64 zip64_centr_dir_byte_size
;
104 unsigned int zip64_eocd_disknum
;
105 unsigned int zip64_cd_disknum
;
106 i64 offset_discrepancy
;
107 int used_offset_discrepancy
;
110 struct de_crcobj
*crco
;
113 typedef void (*extrafield_decoder_fn
)(deark
*c
, lctx
*d
,
114 struct extra_item_info_struct
*eii
);
116 static int is_compression_method_supported(lctx
*d
, const struct cmpr_meth_info
*cmi
)
118 if(cmi
&& cmi
->decompressor
) return 1;
122 static void do_decompress_shrink(deark
*c
, lctx
*d
, struct compression_params
*cparams
,
123 struct de_dfilter_in_params
*dcmpri
, struct de_dfilter_out_params
*dcmpro
,
124 struct de_dfilter_results
*dres
)
126 fmtutil_decompress_zip_shrink(c
, dcmpri
, dcmpro
, dres
, NULL
);
129 static void do_decompress_reduce(deark
*c
, lctx
*d
, struct compression_params
*cparams
,
130 struct de_dfilter_in_params
*dcmpri
, struct de_dfilter_out_params
*dcmpro
,
131 struct de_dfilter_results
*dres
)
133 struct de_zipreduce_params params
;
135 de_zeromem(¶ms
, sizeof(struct de_zipreduce_params
));
136 params
.cmpr_factor
= (unsigned int)(cparams
->cmpr_meth
-1);
137 fmtutil_decompress_zip_reduce(c
, dcmpri
, dcmpro
, dres
, ¶ms
);
140 static void do_decompress_implode(deark
*c
, lctx
*d
, struct compression_params
*cparams
,
141 struct de_dfilter_in_params
*dcmpri
, struct de_dfilter_out_params
*dcmpro
,
142 struct de_dfilter_results
*dres
)
144 struct de_zipimplode_params params
;
146 de_zeromem(¶ms
, sizeof(struct de_zipimplode_params
));
147 params
.bit_flags
= cparams
->bit_flags
;
148 params
.mml_bug
= (u8
)de_get_ext_option_bool(c
, "zip:implodebug", 0);
149 fmtutil_decompress_zip_implode(c
, dcmpri
, dcmpro
, dres
, ¶ms
);
152 static void do_decompress_deflate(deark
*c
, lctx
*d
, struct compression_params
*cparams
,
153 struct de_dfilter_in_params
*dcmpri
, struct de_dfilter_out_params
*dcmpro
,
154 struct de_dfilter_results
*dres
)
156 struct de_deflate_params inflparams
;
158 de_zeromem(&inflparams
, sizeof(struct de_deflate_params
));
159 if(cparams
->cmpr_meth
==9) {
160 inflparams
.flags
|= DE_DEFLATEFLAG_DEFLATE64
;
162 fmtutil_decompress_deflate_ex(c
, dcmpri
, dcmpro
, dres
, &inflparams
);
165 static void do_decompress_dclimplode(deark
*c
, lctx
*d
, struct compression_params
*cparams
,
166 struct de_dfilter_in_params
*dcmpri
, struct de_dfilter_out_params
*dcmpro
,
167 struct de_dfilter_results
*dres
)
169 fmtutil_dclimplode_codectype1(c
, dcmpri
, dcmpro
, dres
, NULL
);
172 static void do_decompress_stored(deark
*c
, lctx
*d
, struct compression_params
*cparams
,
173 struct de_dfilter_in_params
*dcmpri
, struct de_dfilter_out_params
*dcmpro
,
174 struct de_dfilter_results
*dres
)
176 fmtutil_decompress_uncompressed(c
, dcmpri
, dcmpro
, dres
, 0);
179 static const struct cmpr_meth_info cmpr_meth_info_arr
[] = {
180 { 0, 0x00, "stored", do_decompress_stored
},
181 { 1, 0x00, "shrink", do_decompress_shrink
},
182 { 2, 0x00, "reduce, CF=1", do_decompress_reduce
},
183 { 3, 0x00, "reduce, CF=2", do_decompress_reduce
},
184 { 4, 0x00, "reduce, CF=3", do_decompress_reduce
},
185 { 5, 0x00, "reduce, CF=4", do_decompress_reduce
},
186 { 6, 0x00, "implode", do_decompress_implode
},
187 { 8, 0x00, "deflate", do_decompress_deflate
},
188 { 9, 0x00, "deflate64", do_decompress_deflate
},
189 { 10, 0x00, "PKWARE DCL implode", do_decompress_dclimplode
},
190 { 12, 0x00, "bzip2", NULL
},
191 { 14, 0x00, "LZMA", NULL
},
192 { 16, 0x00, "IBM z/OS CMPSC", NULL
},
193 { 18, 0x00, "IBM TERSE (new)", NULL
},
194 { 19, 0x00, "IBM LZ77 z Architecture", NULL
},
195 { 94, 0x00, "MP3", NULL
},
196 { 95, 0x00, "XZ", NULL
},
197 { 96, 0x00, "JPEG", NULL
},
198 { 97, 0x00, "WavPack", NULL
},
199 { 98, 0x00, "PPMd", NULL
},
200 { 99, 0x00, "AES", NULL
}
203 static const struct cmpr_meth_info
*get_cmpr_meth_info(int cmpr_meth
)
207 for(k
=0; k
<DE_ARRAYCOUNT(cmpr_meth_info_arr
); k
++) {
208 if(cmpr_meth_info_arr
[k
].cmpr_meth
== cmpr_meth
) {
209 return &cmpr_meth_info_arr
[k
];
215 // Decompress some data, using the given ZIP compression method.
216 // On failure, dres->errcode will be set.
217 static void do_decompress_lowlevel(deark
*c
, lctx
*d
, struct de_dfilter_in_params
*dcmpri
,
218 struct de_dfilter_out_params
*dcmpro
, struct de_dfilter_results
*dres
,
219 int cmpr_meth
, const struct cmpr_meth_info
*cmi
, unsigned int bit_flags
)
221 struct compression_params cparams
;
223 de_zeromem(&cparams
, sizeof(struct compression_params
));
224 cparams
.cmpr_meth
= cmpr_meth
;
225 cparams
.bit_flags
= bit_flags
;
227 if(cmi
&& cmi
->decompressor
) {
228 cmi
->decompressor(c
, d
, &cparams
, dcmpri
, dcmpro
, dres
);
231 de_internal_err_nonfatal(c
, "Unsupported compression method (%d)", cmpr_meth
);
232 de_dfilter_set_generic_error(c
, dres
, NULL
);
236 static void our_writelistener_cb(dbuf
*f
, void *userdata
, const u8
*buf
, i64 buf_len
)
238 struct de_crcobj
*crco
= (struct de_crcobj
*)userdata
;
239 de_crcobj_addbuf(crco
, buf
, buf_len
);
242 // Decompress a Zip member file, writing to outf.
243 // Does CRC calculation.
244 // Reports errors to the user.
245 // Only call this if the compression method is supported -- Call
246 // is_compression_method_supported() first.
247 // Assumes ldd->cmi has been set, by calling get_cmpr_meth_info().
248 static int do_decompress_member(deark
*c
, lctx
*d
, struct member_data
*md
, dbuf
*outf
)
250 struct dir_entry_data
*ldd
= &md
->local_dir_entry_data
;
251 struct de_dfilter_in_params dcmpri
;
252 struct de_dfilter_out_params dcmpro
;
253 struct de_dfilter_results dres
;
257 de_dfilter_init_objects(c
, &dcmpri
, &dcmpro
, &dres
);
258 dcmpri
.f
= c
->infile
;
259 dcmpri
.pos
= md
->file_data_pos
;
260 dcmpri
.len
= md
->cmpr_size
;
262 dcmpro
.expected_len
= md
->uncmpr_size
;
263 dcmpro
.len_known
= 1;
265 dbuf_set_writelistener(outf
, our_writelistener_cb
, (void*)d
->crco
);
266 de_crcobj_reset(d
->crco
);
268 do_decompress_lowlevel(c
, d
, &dcmpri
, &dcmpro
, &dres
, ldd
->cmpr_meth
,
269 ldd
->cmi
, ldd
->bit_flags
);
272 de_err(c
, "%s: %s", ucstring_getpsz_d(ldd
->fname
),
273 de_dfilter_get_errmsg(c
, &dres
));
277 crc_calculated
= de_crcobj_getval(d
->crco
);
278 de_dbg(c
, "crc (calculated): 0x%08x", (unsigned int)crc_calculated
);
280 if(crc_calculated
!= md
->crc_reported
) {
281 de_err(c
, "%s: CRC check failed: Expected 0x%08x, got 0x%08x",
282 ucstring_getpsz_d(ldd
->fname
),
283 (unsigned int)md
->crc_reported
, (unsigned int)crc_calculated
);
284 if(dres
.bytes_consumed_valid
&& (dres
.bytes_consumed
< dcmpri
.len
)) {
285 de_info(c
, "Note: Only used %"I64_FMT
" of %"I64_FMT
" compressed bytes.",
286 dres
.bytes_consumed
, dcmpri
.len
);
296 // A variation of do_decompress_member() -
297 // works for Finder attribute data, and OS/2 extended attributes.
298 // Only call this if the compression method is supported -- Call
299 // is_compression_method_supported() first.
300 // outf is assumed to be a membuf.
301 // dcflags: 0x1 = Validate the crc_reported param.
302 static int do_decompress_attrib_data(deark
*c
, lctx
*d
,
303 i64 dpos
, i64 dlen
, dbuf
*outf
, i64 uncmprsize
, u32 crc_reported
,
304 int cmpr_meth
, const struct cmpr_meth_info
*cmi
, UI flags
, const char *name
)
306 struct de_dfilter_in_params dcmpri
;
307 struct de_dfilter_out_params dcmpro
;
308 struct de_dfilter_results dres
;
312 de_dfilter_init_objects(c
, &dcmpri
, &dcmpro
, &dres
);
313 dcmpri
.f
= c
->infile
;
317 dcmpro
.expected_len
= uncmprsize
;
318 dcmpro
.len_known
= 1;
320 do_decompress_lowlevel(c
, d
, &dcmpri
, &dcmpro
, &dres
, cmpr_meth
, cmi
, 0);
322 goto done
; // Could report the error, but this isn't critical data
326 de_crcobj_reset(d
->crco
);
327 de_crcobj_addslice(d
->crco
, outf
, 0, outf
->len
);
328 crc_calculated
= de_crcobj_getval(d
->crco
);
329 de_dbg(c
, "%s crc (calculated): 0x%08x", name
, (UI
)crc_calculated
);
330 if(crc_calculated
!= crc_reported
) goto done
;
338 // As we read a member file's attributes, we may encounter multiple timestamps,
339 // which can differ in their precision, and whether they use UTC.
340 // This function is called to remember the "best" file modification time
341 // encountered so far.
342 static void apply_timestamp(deark
*c
, lctx
*d
, struct member_data
*md
, int tstype
,
343 const struct de_timestamp
*ts
, int quality
)
345 if(!ts
->is_valid
) return;
347 // In case of a tie, we prefer the later timestamp that we encountered.
348 // This makes local headers have priority over central headers, for
350 if(quality
>= md
->tsdata
[tstype
].quality
) {
351 md
->tsdata
[tstype
].ts
= *ts
;
352 md
->tsdata
[tstype
].quality
= quality
;
356 static void do_read_filename(deark
*c
, lctx
*d
,
357 struct member_data
*md
, struct dir_entry_data
*dd
,
358 i64 pos
, i64 len
, int utf8_flag
)
360 de_encoding from_encoding
;
362 ucstring_empty(dd
->fname
);
363 from_encoding
= utf8_flag
? DE_ENCODING_UTF8
: d
->default_enc_for_filenames
;
364 dbuf_read_to_ucstring(c
->infile
, pos
, len
, dd
->fname
, 0, from_encoding
);
365 de_dbg(c
, "filename: \"%s\"", ucstring_getpsz_d(dd
->fname
));
368 static void do_comment_display(deark
*c
, lctx
*d
, i64 pos
, i64 len
, de_ext_encoding ee
,
371 de_ucstring
*s
= NULL
;
373 s
= ucstring_create(c
);
374 dbuf_read_to_ucstring(c
->infile
, pos
, len
, s
, 0, ee
);
375 de_dbg(c
, "%s: \"%s\"", name
, ucstring_getpsz_d(s
));
379 static void do_comment_extract(deark
*c
, lctx
*d
, i64 pos
, i64 len
, de_ext_encoding ee
,
383 de_ucstring
*s
= NULL
;
385 f
= dbuf_create_output_file(c
, ext
, NULL
, DE_CREATEFLAG_IS_AUX
);
386 s
= ucstring_create(c
);
387 dbuf_read_to_ucstring(c
->infile
, pos
, len
, s
, 0, ee
);
388 ucstring_write_as_utf8(c
, s
, f
, 1);
393 static void do_comment(deark
*c
, lctx
*d
, i64 pos
, i64 len
, int utf8_flag
,
394 const char *name
, const char *ext
)
399 ee
= utf8_flag
? DE_ENCODING_UTF8
: d
->default_enc_for_comments
;
400 ee
= DE_EXTENC_MAKE(ee
, DE_ENCSUBTYPE_HYBRID
);
401 if(c
->extract_level
>=2) {
402 do_comment_extract(c
, d
, pos
, len
, ee
, ext
);
405 do_comment_display(c
, d
, pos
, len
, ee
, name
);
409 static void read_unix_timestamp(deark
*c
, lctx
*d
, i64 pos
,
410 struct de_timestamp
*timestamp
, const char *name
)
413 char timestamp_buf
[64];
415 t
= de_geti32le(pos
);
416 de_unix_time_to_timestamp(t
, timestamp
, 0x1);
417 de_dbg_timestamp_to_string(c
, timestamp
, timestamp_buf
, sizeof(timestamp_buf
), 0);
418 de_dbg(c
, "%s: %"I64_FMT
" (%s)", name
, t
, timestamp_buf
);
421 static void read_FILETIME(deark
*c
, lctx
*d
, i64 pos
,
422 struct de_timestamp
*timestamp
, const char *name
)
425 char timestamp_buf
[64];
427 t_FILETIME
= de_geti64le(pos
);
428 de_FILETIME_to_timestamp(t_FILETIME
, timestamp
, 0x1);
429 de_dbg_timestamp_to_string(c
, timestamp
, timestamp_buf
, sizeof(timestamp_buf
), 0);
430 de_dbg(c
, "%s: %s", name
, timestamp_buf
);
433 static void ef_zip64extinfo(deark
*c
, lctx
*d
, struct extra_item_info_struct
*eii
)
438 if(pos
+8 > eii
->dpos
+eii
->dlen
) goto done
;
439 n
= de_geti64le(pos
); pos
+= 8;
440 de_dbg(c
, "orig uncmpr file size: %"I64_FMT
, n
);
441 if(eii
->dd
->uncmpr_size
==0xffffffffLL
) {
442 eii
->dd
->uncmpr_size
= n
;
445 if(pos
+8 > eii
->dpos
+eii
->dlen
) goto done
;
446 n
= de_geti64le(pos
); pos
+= 8;
447 de_dbg(c
, "cmpr data size: %"I64_FMT
, n
);
448 if(eii
->dd
->cmpr_size
==0xffffffffLL
) {
449 eii
->dd
->cmpr_size
= n
;
452 if(pos
+8 > eii
->dpos
+eii
->dlen
) goto done
;
453 n
= de_geti64le(pos
); pos
+= 8;
454 de_dbg(c
, "offset of local header record: %"I64_FMT
, n
);
456 if(pos
+4 > eii
->dpos
+eii
->dlen
) goto done
;
457 n
= de_getu32le_p(&pos
);
458 de_dbg(c
, "disk start number: %"I64_FMT
, n
);
463 // Extra field 0x5455
464 static void ef_extended_timestamp(deark
*c
, lctx
*d
, struct extra_item_info_struct
*eii
)
469 int has_mtime
, has_atime
, has_ctime
;
470 struct de_timestamp timestamp_tmp
;
472 endpos
= pos
+ eii
->dlen
;
473 if(pos
+1>endpos
) return;
474 flags
= de_getbyte_p(&pos
);
475 if(eii
->is_central
) {
476 has_mtime
= (eii
->dlen
>=5);
481 has_mtime
= (flags
& 0x01)?1:0;
482 has_atime
= (flags
& 0x02)?1:0;
483 has_ctime
= (flags
& 0x04)?1:0;
486 if(pos
+4>endpos
) return;
487 read_unix_timestamp(c
, d
, pos
, ×tamp_tmp
, "mtime");
488 apply_timestamp(c
, d
, eii
->md
, DE_TIMESTAMPIDX_MODIFY
, ×tamp_tmp
, 50);
492 if(pos
+4>endpos
) return;
493 read_unix_timestamp(c
, d
, pos
, ×tamp_tmp
, "atime");
494 apply_timestamp(c
, d
, eii
->md
, DE_TIMESTAMPIDX_ACCESS
, ×tamp_tmp
, 50);
498 if(pos
+4>endpos
) return;
499 read_unix_timestamp(c
, d
, pos
, ×tamp_tmp
, "creation time");
500 apply_timestamp(c
, d
, eii
->md
, DE_TIMESTAMPIDX_CREATE
, ×tamp_tmp
, 50);
505 // Extra field 0x5855
506 static void ef_infozip1(deark
*c
, lctx
*d
, struct extra_item_info_struct
*eii
)
509 struct de_timestamp timestamp_tmp
;
511 if(eii
->is_central
&& eii
->dlen
<8) return;
512 if(!eii
->is_central
&& eii
->dlen
<12) return;
513 read_unix_timestamp(c
, d
, eii
->dpos
, ×tamp_tmp
, "atime");
514 apply_timestamp(c
, d
, eii
->md
, DE_TIMESTAMPIDX_ACCESS
, ×tamp_tmp
, 45);
515 read_unix_timestamp(c
, d
, eii
->dpos
+4, ×tamp_tmp
, "mtime");
516 apply_timestamp(c
, d
, eii
->md
, DE_TIMESTAMPIDX_MODIFY
, ×tamp_tmp
, 45);
517 if(!eii
->is_central
) {
518 uidnum
= de_getu16le(eii
->dpos
+8);
519 gidnum
= de_getu16le(eii
->dpos
+10);
520 de_dbg(c
, "uid: %d, gid: %d", (int)uidnum
, (int)gidnum
);
524 // Extra field 0x7075 - Info-ZIP Unicode Path
525 static void ef_unicodepath(deark
*c
, lctx
*d
, struct extra_item_info_struct
*eii
)
528 de_ucstring
*fn
= NULL
;
530 u32 crc_reported
, crc_calculated
;
532 if(eii
->dlen
<1) goto done
;
533 ver
= de_getbyte(eii
->dpos
);
534 de_dbg(c
, "version: %u", (unsigned int)ver
);
535 if(ver
!=1) goto done
;
536 if(eii
->dlen
<6) goto done
;
537 crc_reported
= (u32
)de_getu32le(eii
->dpos
+1);
538 de_dbg(c
, "name-crc (reported): 0x%08x", (unsigned int)crc_reported
);
539 fn
= ucstring_create(c
);
540 fnlen
= eii
->dlen
- 5;
541 dbuf_read_to_ucstring(c
->infile
, eii
->dpos
+5, fnlen
, fn
, 0, DE_ENCODING_UTF8
);
542 de_dbg(c
, "unicode name: \"%s\"", ucstring_getpsz_d(fn
));
544 // Need to go back and calculate a CRC of the main filename. This is
545 // protection against the case where a ZIP editor may have changed the
546 // original filename, but retained a now-orphaned Unicode Path field.
547 de_crcobj_reset(d
->crco
);
548 de_crcobj_addslice(d
->crco
, c
->infile
, eii
->dd
->main_fname_pos
, eii
->dd
->main_fname_len
);
549 crc_calculated
= de_crcobj_getval(d
->crco
);
550 de_dbg(c
, "name-crc (calculated): 0x%08x", (unsigned int)crc_calculated
);
552 if(crc_calculated
== crc_reported
) {
553 ucstring_empty(eii
->dd
->fname
);
554 ucstring_append_ucstring(eii
->dd
->fname
, fn
);
558 ucstring_destroy(fn
);
561 // Extra field 0x7855
562 static void ef_infozip2(deark
*c
, lctx
*d
, struct extra_item_info_struct
*eii
)
566 if(eii
->is_central
) return;
567 if(eii
->dlen
<4) return;
568 uidnum
= de_getu16le(eii
->dpos
);
569 gidnum
= de_getu16le(eii
->dpos
+2);
570 de_dbg(c
, "uid: %d, gid: %d", (int)uidnum
, (int)gidnum
);
573 // Extra field 0x7875
574 static void ef_infozip3(deark
*c
, lctx
*d
, struct extra_item_info_struct
*eii
)
582 endpos
= pos
+eii
->dlen
;
584 if(pos
+1>endpos
) return;
585 ver
= de_getbyte_p(&pos
);
586 de_dbg(c
, "version: %d", (int)ver
);
589 if(pos
+1>endpos
) return;
590 sz
= (i64
)de_getbyte_p(&pos
);
591 if(pos
+sz
>endpos
) return;
592 uidnum
= dbuf_getint_ext(c
->infile
, pos
, (unsigned int)sz
, 1, 0);
595 if(pos
+1>endpos
) return;
596 sz
= (i64
)de_getbyte_p(&pos
);
597 if(pos
+sz
>endpos
) return;
598 gidnum
= dbuf_getint_ext(c
->infile
, pos
, (unsigned int)sz
, 1, 0);
601 de_dbg(c
, "uid: %d, gid: %d", (int)uidnum
, (int)gidnum
);
604 // Extra field 0x000a
605 static void ef_ntfs(deark
*c
, lctx
*d
, struct extra_item_info_struct
*eii
)
612 struct de_timestamp timestamp_tmp
;
614 endpos
= pos
+eii
->dlen
;
615 pos
+= 4; // skip reserved field
618 if(pos
+4>endpos
) break;
619 attr_tag
= de_getu16le_p(&pos
);
620 attr_size
= de_getu16le_p(&pos
);
621 if(attr_tag
==0x0001) name
="NTFS filetimes";
623 de_dbg(c
, "tag: 0x%04x (%s), dlen: %d", (unsigned int)attr_tag
, name
,
625 if(pos
+attr_size
>endpos
) break;
628 if(attr_tag
==0x0001 && attr_size
>=24) {
629 read_FILETIME(c
, d
, pos
, ×tamp_tmp
, "mtime");
630 apply_timestamp(c
, d
, eii
->md
, DE_TIMESTAMPIDX_MODIFY
, ×tamp_tmp
, 90);
631 read_FILETIME(c
, d
, pos
+8, ×tamp_tmp
, "atime");
632 apply_timestamp(c
, d
, eii
->md
, DE_TIMESTAMPIDX_ACCESS
, ×tamp_tmp
, 90);
633 read_FILETIME(c
, d
, pos
+16, ×tamp_tmp
, "creation time");
634 apply_timestamp(c
, d
, eii
->md
, DE_TIMESTAMPIDX_CREATE
, ×tamp_tmp
, 90);
636 de_dbg_indent(c
, -1);
642 // Extra field 0x0009
643 static void ef_os2(deark
*c
, lctx
*d
, struct extra_item_info_struct
*eii
)
651 const struct cmpr_meth_info
*cmi
= NULL
;
652 const char *name
= "OS/2 ext. attr. data";
653 dbuf
*attr_data
= NULL
;
654 de_module_params
*mparams
= NULL
;
657 endpos
= pos
+eii
->dlen
;
658 if(pos
+4>endpos
) goto done
;
659 ulen
= de_getu32le_p(&pos
);
660 de_dbg(c
, "uncmpr ext attr data size: %"I64_FMT
, ulen
);
661 if(eii
->is_central
) goto done
;
663 if(pos
+2>endpos
) goto done
;
664 cmpr_meth
= (int)de_getu16le_p(&pos
);
665 de_dbg(c
, "ext attr cmpr method: %d", cmpr_meth
);
667 if(pos
+4>endpos
) goto done
;
668 crc_reported
= (u32
)de_getu32le_p(&pos
);
669 de_dbg(c
, "ext attr crc (reported): 0x%08x", (unsigned int)crc_reported
);
671 cmpr_attr_size
= endpos
-pos
;
672 de_dbg(c
, "cmpr ext attr data at %"I64_FMT
", len=%"I64_FMT
, pos
, cmpr_attr_size
);
673 if(pos
+ cmpr_attr_size
> endpos
) goto done
;
675 cmi
= get_cmpr_meth_info(cmpr_meth
);
676 if(cmpr_meth
==6 || !is_compression_method_supported(d
, cmi
)) {
677 de_warn(c
, "%s: Unsupported compression method: %d (%s)",
678 name
, cmpr_meth
, (cmi
? cmi
->name
: "?"));
682 attr_data
= dbuf_create_membuf(c
, ulen
, 0x1);
683 ret
= do_decompress_attrib_data(c
, d
, pos
, cmpr_attr_size
,
684 attr_data
, ulen
, crc_reported
, cmpr_meth
, cmi
, 0x1, name
);
686 de_warn(c
, "Failed to decompress %s", name
);
690 // attr_data contains an OS/2 extended attribute structure (FEA2LIST)
691 mparams
= de_malloc(c
, sizeof(de_module_params
));
692 mparams
->in_params
.codes
= "L";
693 de_dbg(c
, "decoding OS/2 ext. attribs., unc. len=%"I64_FMT
, attr_data
->len
);
695 de_run_module_by_id_on_slice(c
, "ea_data", mparams
, attr_data
, 0, attr_data
->len
);
696 de_dbg_indent(c
, -1);
699 dbuf_close(attr_data
);
703 // Extra field 0x2705 (ZipIt Macintosh 1.3.5+)
704 static void ef_zipitmac_2705(deark
*c
, lctx
*d
, struct extra_item_info_struct
*eii
)
706 struct de_fourcc sig
;
707 struct de_fourcc filetype
;
708 struct de_fourcc creator
;
710 if(eii
->dlen
<4) goto done
;
711 dbuf_read_fourcc(c
->infile
, eii
->dpos
, &sig
, 4, 0x0);
712 de_dbg(c
, "signature: '%s'", sig
.id_dbgstr
);
713 if(sig
.id
!=0x5a504954U
) goto done
; // expecting 'ZPIT'
714 if(eii
->dlen
<12) goto done
;
715 dbuf_read_fourcc(c
->infile
, eii
->dpos
+4, &filetype
, 4, 0x0);
716 de_dbg(c
, "filetype: '%s'", filetype
.id_dbgstr
);
717 dbuf_read_fourcc(c
->infile
, eii
->dpos
+8, &creator
, 4, 0x0);
718 de_dbg(c
, "creator: '%s'", creator
.id_dbgstr
);
724 // The time will be returned in the caller-supplied 'ts'
725 static void handle_mac_time(deark
*c
, lctx
*d
,
726 i64 mt_raw
, i64 mt_offset
,
727 struct de_timestamp
*ts
, const char *name
)
729 char timestamp_buf
[64];
730 de_mac_time_to_timestamp(mt_raw
- mt_offset
, ts
);
731 ts
->tzcode
= DE_TZCODE_UTC
;
732 de_dbg_timestamp_to_string(c
, ts
, timestamp_buf
, sizeof(timestamp_buf
), 0);
733 de_dbg(c
, "%s: %"I64_FMT
" %+"I64_FMT
" (%s)", name
,
734 mt_raw
, -mt_offset
, timestamp_buf
);
737 // Extra field 0x334d (Info-ZIP Macintosh)
738 static void ef_infozipmac(deark
*c
, lctx
*d
, struct extra_item_info_struct
*eii
)
746 const struct cmpr_meth_info
*cmi
= NULL
;
747 struct de_fourcc filetype
;
748 struct de_fourcc creator
;
749 de_ucstring
*flags_str
= NULL
;
750 dbuf
*attr_data
= NULL
;
753 i64 create_time_offset
;
757 i64 backup_time_offset
;
758 struct de_timestamp tmp_timestamp
;
760 u32 crc_reported
= 0;
762 struct de_stringreaderdata
*srd
;
764 if(eii
->dlen
<14) goto done
;
766 ulen
= de_getu32le_p(&pos
);
767 de_dbg(c
, "uncmpr. finder attr. size: %d", (int)ulen
);
769 flags
= (unsigned int)de_getu16le_p(&pos
);
770 flags_str
= ucstring_create(c
);
771 if(flags
&0x0001) ucstring_append_flags_item(flags_str
, "data_fork");
772 if(flags
&0x0002) ucstring_append_flags_item(flags_str
, "0x0002"); // something about the filename
773 ucstring_append_flags_item(flags_str
,
774 (flags
&0x0004)?"uncmpressed_attribute_data":"compressed_attribute_data");
775 if(flags
&0x0008) ucstring_append_flags_item(flags_str
, "64-bit_times");
776 if(flags
&0x0010) ucstring_append_flags_item(flags_str
, "no_timezone_offsets");
777 de_dbg(c
, "flags: 0x%04x (%s)", flags
, ucstring_getpsz(flags_str
));
779 dbuf_read_fourcc(c
->infile
, pos
, &filetype
, 4, 0x0);
780 de_dbg(c
, "filetype: '%s'", filetype
.id_dbgstr
);
782 dbuf_read_fourcc(c
->infile
, pos
, &creator
, 4, 0x0);
783 de_dbg(c
, "creator: '%s'", creator
.id_dbgstr
);
786 if(eii
->is_central
) goto done
;
788 if(flags
&0x0004) { // Uncompressed attribute data
792 dcflags
|= 0x1; // CRC is known
793 cmpr_meth
= (int)de_getu16le_p(&pos
);
794 cmi
= get_cmpr_meth_info(cmpr_meth
);
795 de_dbg(c
, "finder attr. cmpr. method: %d (%s)", cmpr_meth
, (cmi
? cmi
->name
: "?"));
797 crc_reported
= (u32
)de_getu32le_p(&pos
);
798 de_dbg(c
, "finder attr. data crc (reported): 0x%08x", (UI
)crc_reported
);
801 // The rest of the data is Finder attribute data
802 cmpr_attr_size
= eii
->dpos
+eii
->dlen
- pos
;
803 de_dbg(c
, "cmpr. finder attr. size: %d", (int)cmpr_attr_size
);
804 if(ulen
<1 || ulen
>1000000) goto done
;
806 // Type 6 (implode) compression won't work here, because it needs
807 // additional parameters seemingly not provided by the Finder attr data.
808 if(cmpr_meth
==6 || !is_compression_method_supported(d
, cmi
)) {
809 de_warn(c
, "Finder attribute data: Unsupported compression method: %d (%s)",
810 cmpr_meth
, (cmi
? cmi
->name
: "?"));
814 // Decompress and decode the Finder attribute data
815 attr_data
= dbuf_create_membuf(c
, ulen
, 0x1);
816 ret
= do_decompress_attrib_data(c
, d
, pos
, cmpr_attr_size
,
817 attr_data
, ulen
, crc_reported
, cmpr_meth
, cmi
, dcflags
, "finder attr. data");
819 de_warn(c
, "Failed to decompress finder attribute data");
824 dpos
+= 2; // Finder flags
825 dpos
+= 4; // Icon location
827 dpos
+= 16; // FXInfo
828 dpos
+= 1; // file version number
829 dpos
+= 1; // dir access rights
831 if(flags
&0x0008) goto done
; // We don't support 64-bit times
832 if(flags
&0x0010) goto done
; // We want timezone offsets
833 if(attr_data
->len
- dpos
< 6*4) goto done
;
835 create_time_raw
= dbuf_getu32le_p(attr_data
, &dpos
);
836 mod_time_raw
= dbuf_getu32le_p(attr_data
, &dpos
);
837 backup_time_raw
= dbuf_getu32le_p(attr_data
, &dpos
);
838 create_time_offset
= dbuf_geti32le(attr_data
, dpos
); dpos
+= 4;
839 mod_time_offset
= dbuf_geti32le(attr_data
, dpos
); dpos
+= 4;
840 backup_time_offset
= dbuf_geti32le(attr_data
, dpos
); dpos
+= 4;
842 handle_mac_time(c
, d
, create_time_raw
, create_time_offset
, &tmp_timestamp
, "create time");
843 if(create_time_raw
>0) {
844 apply_timestamp(c
, d
, eii
->md
, DE_TIMESTAMPIDX_CREATE
, &tmp_timestamp
, 40);
846 handle_mac_time(c
, d
, mod_time_raw
, mod_time_offset
, &tmp_timestamp
, "mod time ");
848 apply_timestamp(c
, d
, eii
->md
, DE_TIMESTAMPIDX_MODIFY
, &tmp_timestamp
, 40);
850 handle_mac_time(c
, d
, backup_time_raw
, backup_time_offset
, &tmp_timestamp
, "backup time");
851 if(backup_time_raw
>0) {
852 apply_timestamp(c
, d
, eii
->md
, DE_TIMESTAMPIDX_BACKUP
, &tmp_timestamp
, 40);
855 // Expecting 2 bytes for charset, and at least 2 more for the 2 NUL-terminated
856 // strings that follow.
857 if(attr_data
->len
- dpos
< 4) goto done
;
859 charset
= (int)dbuf_getu16le_p(attr_data
, &dpos
);
860 de_dbg(c
, "charset for fullpath/comment: %d", charset
);
862 // TODO: Can we use the correct encoding?
863 srd
= dbuf_read_string(attr_data
, dpos
, attr_data
->len
-dpos
, DE_DBG_MAX_STRLEN
,
864 DE_CONVFLAG_STOP_AT_NUL
, DE_ENCODING_ASCII
);
865 de_dbg(c
, "fullpath: \"%s\"", ucstring_getpsz(srd
->str
));
866 dpos
+= srd
->bytes_consumed
;
867 de_destroy_stringreaderdata(c
, srd
);
869 srd
= dbuf_read_string(attr_data
, dpos
, attr_data
->len
-dpos
, DE_DBG_MAX_STRLEN
,
870 DE_CONVFLAG_STOP_AT_NUL
, DE_ENCODING_ASCII
);
871 de_dbg(c
, "comment: \"%s\"", ucstring_getpsz(srd
->str
));
872 dpos
+= srd
->bytes_consumed
;
873 de_destroy_stringreaderdata(c
, srd
);
876 ucstring_destroy(flags_str
);
877 dbuf_close(attr_data
);
880 // Acorn / SparkFS / RISC OS
881 static void ef_acorn(deark
*c
, lctx
*d
, struct extra_item_info_struct
*eii
)
884 struct de_riscos_file_attrs rfa
;
886 if(eii
->dlen
<16) return;
887 if(dbuf_memcmp(c
->infile
, eii
->dpos
, "ARC0", 4)) {
888 de_dbg(c
, "[unsupported Acorn extra-field type]");
893 de_zeromem(&rfa
, sizeof(struct de_riscos_file_attrs
));
894 fmtutil_riscos_read_load_exec(c
, c
->infile
, &rfa
, pos
);
896 if(rfa
.mod_time
.is_valid
) {
897 apply_timestamp(c
, d
, eii
->md
, DE_TIMESTAMPIDX_MODIFY
, &rfa
.mod_time
, 70);
900 fmtutil_riscos_read_attribs_field(c
, c
->infile
, &rfa
, pos
, 0);
901 // Note: attribs does not have any information that we care about (no
902 // 'executable' or 'is-directory' flag).
905 struct extra_item_type_info_struct
{
908 extrafield_decoder_fn fn
;
910 static const struct extra_item_type_info_struct extra_item_type_info_arr
[] = {
911 { 0x0001 /* */, "Zip64 extended information", ef_zip64extinfo
},
912 { 0x0007 /* */, "AV Info", NULL
},
913 { 0x0008 /* */, "extended language encoding data", NULL
},
914 { 0x0009 /* */, "OS/2", ef_os2
},
915 { 0x000a /* */, "NTFS", ef_ntfs
},
916 { 0x000c /* */, "OpenVMS", NULL
},
917 { 0x000d /* */, "Unix", NULL
},
918 { 0x000e /* */, "file stream and fork descriptors", NULL
},
919 { 0x000f /* */, "Patch Descriptor", NULL
},
920 { 0x0014 /* */, "PKCS#7 Store for X.509 Certificates", NULL
},
921 { 0x0015 /* */, "X.509 Certificate ID and Signature for individual file", NULL
},
922 { 0x0016 /* */, "X.509 Certificate ID for Central Directory", NULL
},
923 { 0x0017 /* */, "Strong Encryption Header", NULL
},
924 { 0x0018 /* */, "Record Management Controls", NULL
},
925 { 0x0019 /* */, "PKCS#7 Encryption Recipient Certificate List", NULL
},
926 { 0x0021 /* */, "Policy Decryption Key", NULL
},
927 { 0x0022 /* */, "Smartcrypt Key Provider", NULL
},
928 { 0x0023 /* */, "Smartcrypt Policy Key Data", NULL
},
929 { 0x0065 /* */, "IBM S/390 (Z390), AS/400 (I400) attributes", NULL
},
930 { 0x0066 /* */, "IBM S/390 (Z390), AS/400 (I400) attributes - compressed", NULL
},
931 { 0x07c8 /* */, "Macintosh", NULL
},
932 { 0x2605 /* */, "ZipIt Macintosh", NULL
},
933 { 0x2705 /* */, "ZipIt Macintosh 1.3.5+", ef_zipitmac_2705
},
934 { 0x2805 /* */, "ZipIt Macintosh 1.3.5+", NULL
},
935 { 0x334d /* M3 */, "Info-ZIP Macintosh", ef_infozipmac
},
936 { 0x4154 /* TA */, "Tandem NSK", NULL
},
937 { 0x4341 /* AC */, "Acorn/SparkFS", ef_acorn
},
938 { 0x4453 /* SE */, "Windows NT security descriptor (binary ACL)", NULL
},
939 { 0x4690 /* */, "POSZIP 4690", NULL
},
940 { 0x4704 /* */, "VM/CMS", NULL
},
941 { 0x470f /* */, "MVS", NULL
},
942 { 0x4854 /* TH */, "Theos, old unofficial port", NULL
}, // unzip:extrafld.txt says "inofficial"
943 { 0x4b46 /* FK */, "FWKCS MD5", NULL
},
944 { 0x4c41 /* AL */, "OS/2 access control list (text ACL)", NULL
},
945 { 0x4d49 /* IM */, "Info-ZIP OpenVMS", NULL
},
946 { 0x4d63 /* cM */, "Macintosh SmartZIP", NULL
},
947 { 0x4f4c /* LO */, "Xceed original location", NULL
},
948 { 0x5350 /* PS */, "Psion?", NULL
}, // observed in some Psion files
949 { 0x5356 /* VS */, "AOS/VS (ACL)", NULL
},
950 { 0x5455 /* UT */, "extended timestamp", ef_extended_timestamp
},
951 { 0x554e /* NU */, "Xceed unicode", NULL
},
952 { 0x5855 /* UX */, "Info-ZIP Unix, first version", ef_infozip1
},
953 { 0x6375 /* uc */, "Info-ZIP Unicode Comment", NULL
},
954 { 0x6542 /* Be */, "BeOS/BeBox", NULL
},
955 { 0x6854 /* Th */, "Theos", NULL
},
956 { 0x7075 /* up */, "Info-ZIP Unicode Path", ef_unicodepath
},
957 { 0x7441 /* At */, "AtheOS", NULL
},
958 { 0x756e /* nu */, "ASi Unix", NULL
},
959 { 0x7855 /* Ux */, "Info-ZIP Unix, second version", ef_infozip2
},
960 { 0x7875 /* ux */, "Info-ZIP Unix, third version", ef_infozip3
},
961 { 0xa220 /* */, "Microsoft Open Packaging Growth Hint", NULL
},
962 { 0xfb4a /* */, "SMS/QDOS", NULL
}, // according to Info-ZIP zip 3.0
963 { 0xfd4a /* */, "SMS/QDOS", NULL
} // according to ZIP v6.3.4 APPNOTE
966 static const struct extra_item_type_info_struct
*get_extra_item_type_info(i64 id
)
968 static const struct extra_item_type_info_struct default_ei
=
972 for(i
=0; i
<DE_ARRAYCOUNT(extra_item_type_info_arr
); i
++) {
973 if(id
== (i64
)extra_item_type_info_arr
[i
].id
) {
974 return &extra_item_type_info_arr
[i
];
980 static void do_extra_data(deark
*c
, lctx
*d
,
981 struct member_data
*md
, struct dir_entry_data
*dd
,
982 i64 pos1
, i64 len
, int is_central
)
986 de_dbg(c
, "extra data at %"I64_FMT
", len=%d", pos1
, (int)len
);
991 struct extra_item_info_struct eii
;
993 if(pos
+4 >= pos1
+len
) break;
994 de_zeromem(&eii
, sizeof(struct extra_item_info_struct
));
997 eii
.is_central
= is_central
;
1000 eii
.id
= (u32
)de_getu16le(pos
);
1001 eii
.dlen
= de_getu16le(pos
+2);
1003 eii
.eiti
= get_extra_item_type_info(eii
.id
);
1005 de_dbg(c
, "item id=0x%04x (%s), dlen=%d", (unsigned int)eii
.id
, eii
.eiti
->name
,
1007 if(pos
+4+eii
.dlen
> pos1
+len
) break;
1010 de_dbg_indent(c
, 1);
1011 eii
.eiti
->fn(c
, d
, &eii
);
1012 de_dbg_indent(c
, -1);
1018 de_dbg_indent(c
, -1);
1021 static void do_extract_file(deark
*c
, lctx
*d
, struct member_data
*md
)
1024 de_finfo
*fi
= NULL
;
1025 struct dir_entry_data
*ldd
= &md
->local_dir_entry_data
;
1027 int saved_indent_level
;
1029 de_dbg_indent_save(c
, &saved_indent_level
);
1030 de_dbg(c
, "file data at %"I64_FMT
", len=%"I64_FMT
, md
->file_data_pos
,
1032 de_dbg_indent(c
, 1);
1034 if(ldd
->bit_flags
& 0x1) {
1035 de_err(c
, "%s: Encryption is not supported", ucstring_getpsz_d(ldd
->fname
));
1039 if(!is_compression_method_supported(d
, ldd
->cmi
)) {
1040 de_err(c
, "%s: Unsupported compression method: %d (%s)",
1041 ucstring_getpsz_d(ldd
->fname
),
1042 ldd
->cmpr_meth
, (ldd
->cmi
? ldd
->cmi
->name
: "?"));
1046 if(md
->file_data_pos
+md
->cmpr_size
> c
->infile
->len
) {
1047 de_err(c
, "%s: Data goes beyond end of file", ucstring_getpsz_d(ldd
->fname
));
1051 if(md
->is_symlink
) {
1052 de_warn(c
, "\"%s\" is a symbolic link. It will not be extracted as a link.",
1053 ucstring_getpsz_d(ldd
->fname
));
1056 fi
= de_finfo_create(c
);
1057 fi
->detect_root_dot_dir
= 1;
1059 if(ucstring_isnonempty(ldd
->fname
)) {
1060 unsigned int snflags
= DE_SNFLAG_FULLPATH
;
1061 if(md
->is_dir
) snflags
|= DE_SNFLAG_STRIPTRAILINGSLASH
;
1062 de_finfo_set_name_from_ucstring(c
, fi
, ldd
->fname
, snflags
);
1063 fi
->original_filename_flag
= 1;
1066 for(tsidx
=0; tsidx
<DE_TIMESTAMPIDX_COUNT
; tsidx
++) {
1067 if(md
->tsdata
[tsidx
].ts
.is_valid
) {
1068 fi
->timestamp
[tsidx
] = md
->tsdata
[tsidx
].ts
;
1073 fi
->is_directory
= 1;
1075 else if(md
->is_executable
) {
1076 fi
->mode_flags
|= DE_MODEFLAG_EXE
;
1078 else if(md
->is_nonexecutable
) {
1079 fi
->mode_flags
|= DE_MODEFLAG_NONEXE
;
1082 outf
= dbuf_create_output_file(c
, NULL
, fi
, 0);
1087 (void)do_decompress_member(c
, d
, md
, outf
);
1091 de_finfo_destroy(c
, fi
);
1092 de_dbg_indent_restore(c
, saved_indent_level
);
1095 static const char *get_platform_name(unsigned int ver_hi
)
1097 static const char *pltf_names
[20] = {
1098 "MS-DOS, etc.", "Amiga", "OpenVMS", "Unix",
1099 "VM/CMS", "Atari ST", "HPFS", "Macintosh",
1100 "Z-System", "CP/M", "NTFS or TOPS-20", "MVS or NTFS",
1101 "VSE or SMS/QDOS", "Acorn RISC OS", "VFAT", "MVS",
1102 "BeOS", "Tandem", "OS/400", "OS X" };
1105 return pltf_names
[ver_hi
];
1106 if(ver_hi
==30) return "AtheOS/Syllable";
1110 // Look at the attributes, and set some other fields based on them.
1111 static void process_ext_attr(deark
*c
, lctx
*d
, struct member_data
*md
)
1113 if(d
->using_scanmode
) {
1114 // In this mode, there is no 'external attribs' field.
1118 if(md
->ver_made_by_hi
==3) { // Unix
1119 unsigned int unix_filetype
;
1120 unix_filetype
= (md
->attr_e
>>16)&0170000;
1121 if(unix_filetype
== 0040000) {
1124 else if(unix_filetype
== 0120000) {
1128 if((md
->attr_e
>>16)&0111) {
1129 md
->is_executable
= 1;
1132 md
->is_nonexecutable
= 1;
1136 // MS-DOS-style attributes.
1137 // Technically, we should only do this if
1138 // md->central_dir_entry_data.ver_made_by_hi==0.
1139 // However, most(?) zip programs set the low byte of the external attribs
1140 // to the equivalent MS-DOS attribs, at least in cases where it matters.
1141 if(md
->attr_e
& 0x10) {
1145 // TODO: Support more platforms.
1146 // TODO: The 0x756e (ASi Unix) extra field might be important, as it contains
1147 // file permissions.
1149 if(md
->is_dir
&& md
->uncmpr_size
!=0) {
1150 // I'd expect a subdirectory entry to have zero size. If it doesn't,
1151 // let's just assume we misidentified it as a subdirectory, and
1152 // extract its data.
1157 static void describe_internal_attr(deark
*c
, struct member_data
*md
,
1160 unsigned int bf
= md
->attr_i
;
1163 ucstring_append_flags_item(s
, "text file");
1167 if(bf
!=0) { // Report any unrecognized flags
1168 ucstring_append_flags_itemf(s
, "0x%04x", bf
);
1172 // Uses dd->bit_flags, dd->cmpr_method
1173 static void describe_general_purpose_bit_flags(deark
*c
, struct dir_entry_data
*dd
,
1177 unsigned int bf
= dd
->bit_flags
;
1180 ucstring_append_flags_item(s
, "encrypted");
1184 if(dd
->cmpr_meth
==6) { // implode
1192 ucstring_append_flags_itemf(s
, "%s sliding dictionary", name
);
1201 ucstring_append_flags_itemf(s
, "%s trees", name
);
1204 if(dd
->cmpr_meth
==8 || dd
->cmpr_meth
==9) { // deflate flags
1207 code
= (bf
& 0x0006)>>1;
1209 case 1: name
="max"; break;
1210 case 2: name
="fast"; break;
1211 case 3: name
="super_fast"; break;
1212 default: name
="normal";
1214 ucstring_append_flags_itemf(s
, "cmprlevel=%s", name
);
1215 bf
-= (bf
& 0x0006);
1219 ucstring_append_flags_item(s
, "uses data descriptor");
1224 ucstring_append_flags_item(s
, "UTF-8");
1228 if(bf
!=0) { // Report any unrecognized flags
1229 ucstring_append_flags_itemf(s
, "0x%04x", bf
);
1233 // Read either a central directory entry (a.k.a. central directory file header),
1234 // or a local file header.
1235 static int do_file_header(deark
*c
, lctx
*d
, struct member_data
*md
,
1236 int is_central
, i64 pos1
, i64
*p_entry_size
)
1240 i64 fn_len
, extra_len
, comment_len
;
1243 i64 fixed_header_size
;
1244 i64 mod_time_raw
, mod_date_raw
;
1245 struct dir_entry_data
*dd
; // Points to either md->central or md->local
1246 de_ucstring
*descr
= NULL
;
1247 struct de_timestamp dos_timestamp
;
1248 char timestamp_buf
[64];
1251 descr
= ucstring_create(c
);
1253 dd
= &md
->central_dir_entry_data
;
1254 fixed_header_size
= 46;
1255 de_dbg(c
, "central dir entry at %"I64_FMT
, pos
);
1258 dd
= &md
->local_dir_entry_data
;
1259 fixed_header_size
= 30;
1260 if(md
->disk_number_start
!=d
->this_disk_num
) {
1261 de_err(c
, "Member file not in this ZIP file");
1264 de_dbg(c
, "local file header at %"I64_FMT
, pos
);
1266 de_dbg_indent(c
, 1);
1268 sig
= (u32
)de_getu32le_p(&pos
);
1269 if(is_central
&& sig
!=CODE_PK12
) {
1270 de_err(c
, "Central dir file header not found at %"I64_FMT
, pos1
);
1273 else if(!is_central
&& sig
!=CODE_PK34
) {
1274 de_err(c
, "Local file header not found at %"I64_FMT
, pos1
);
1279 md
->ver_made_by
= (unsigned int)de_getu16le_p(&pos
);
1280 md
->ver_made_by_hi
= (unsigned int)((md
->ver_made_by
&0xff00)>>8);
1281 md
->ver_made_by_lo
= (unsigned int)(md
->ver_made_by
&0x00ff);
1282 de_dbg(c
, "version made by: platform=%u (%s), ZIP spec=%u.%u",
1283 md
->ver_made_by_hi
, get_platform_name(md
->ver_made_by_hi
),
1284 (unsigned int)(md
->ver_made_by_lo
/10), (unsigned int)(md
->ver_made_by_lo
%10));
1287 dd
->ver_needed
= (unsigned int)de_getu16le_p(&pos
);
1288 dd
->ver_needed_hi
= (unsigned int)((dd
->ver_needed
&0xff00)>>8);
1289 dd
->ver_needed_lo
= (unsigned int)(dd
->ver_needed
&0x00ff);
1290 de_dbg(c
, "version needed to extract: platform=%u (%s), ZIP spec=%u.%u",
1291 dd
->ver_needed_hi
, get_platform_name(dd
->ver_needed_hi
),
1292 (unsigned int)(dd
->ver_needed_lo
/10), (unsigned int)(dd
->ver_needed_lo
%10));
1294 dd
->bit_flags
= (unsigned int)de_getu16le_p(&pos
);
1295 dd
->cmpr_meth
= (int)de_getu16le_p(&pos
);
1296 dd
->cmi
= get_cmpr_meth_info(dd
->cmpr_meth
);
1298 utf8_flag
= (dd
->bit_flags
& 0x800)?1:0;
1299 ucstring_empty(descr
);
1300 describe_general_purpose_bit_flags(c
, dd
, descr
);
1301 de_dbg(c
, "flags: 0x%04x (%s)", dd
->bit_flags
, ucstring_getpsz(descr
));
1303 de_dbg(c
, "cmpr method: %d (%s)", dd
->cmpr_meth
,
1304 (dd
->cmi
? dd
->cmi
->name
: "?"));
1306 mod_time_raw
= de_getu16le_p(&pos
);
1307 mod_date_raw
= de_getu16le_p(&pos
);
1308 de_dos_datetime_to_timestamp(&dos_timestamp
, mod_date_raw
, mod_time_raw
);
1309 dos_timestamp
.tzcode
= DE_TZCODE_LOCAL
;
1310 de_dbg_timestamp_to_string(c
, &dos_timestamp
, timestamp_buf
, sizeof(timestamp_buf
), 0);
1311 de_dbg(c
, "mod time: %s", timestamp_buf
);
1312 apply_timestamp(c
, d
, md
, DE_TIMESTAMPIDX_MODIFY
, &dos_timestamp
, 10);
1314 dd
->crc_reported
= (u32
)de_getu32le_p(&pos
);
1315 de_dbg(c
, "crc (reported): 0x%08x", (unsigned int)dd
->crc_reported
);
1317 dd
->cmpr_size
= de_getu32le_p(&pos
);
1318 dd
->uncmpr_size
= de_getu32le_p(&pos
);
1319 de_dbg(c
, "cmpr size: %" I64_FMT
", uncmpr size: %" I64_FMT
, dd
->cmpr_size
, dd
->uncmpr_size
);
1321 fn_len
= de_getu16le_p(&pos
);
1323 extra_len
= de_getu16le_p(&pos
);
1326 comment_len
= de_getu16le_p(&pos
);
1333 md
->file_data_pos
= pos
+ fn_len
+ extra_len
;
1337 md
->disk_number_start
= de_getu16le_p(&pos
);
1339 md
->attr_i
= (unsigned int)de_getu16le_p(&pos
);
1340 ucstring_empty(descr
);
1341 describe_internal_attr(c
, md
, descr
);
1342 de_dbg(c
, "internal file attributes: 0x%04x (%s)", md
->attr_i
,
1343 ucstring_getpsz(descr
));
1345 md
->attr_e
= (unsigned int)de_getu32le_p(&pos
);
1346 de_dbg(c
, "external file attributes: 0x%08x", md
->attr_e
);
1347 de_dbg_indent(c
, 1);
1350 // The low byte is, AFAIK, *almost* universally used for MS-DOS-style
1352 unsigned int dos_attrs
= (md
->attr_e
& 0xff);
1353 ucstring_empty(descr
);
1354 de_describe_dos_attribs(c
, dos_attrs
, descr
, 0);
1355 de_dbg(c
, "%sMS-DOS attribs: 0x%02x (%s)",
1356 (md
->ver_made_by_hi
==0)?"":"(hypothetical) ",
1357 dos_attrs
, ucstring_getpsz(descr
));
1360 if((md
->attr_e
>>16) != 0) {
1361 // A number of platforms put Unix-style file attributes here, so
1362 // decode them as such whenever they are nonzero.
1363 de_dbg(c
, "%sUnix attribs: octal(%06o)",
1364 (md
->ver_made_by_hi
==3)?"":"(hypothetical) ",
1365 (unsigned int)(md
->attr_e
>>16));
1368 de_dbg_indent(c
, -1);
1370 md
->offset_of_local_header
= de_getu32le_p(&pos
);
1371 de_dbg(c
, "offset of local header: %"I64_FMT
", disk: %d", md
->offset_of_local_header
,
1372 (int)md
->disk_number_start
);
1376 de_dbg(c
, "filename_len: %d, extra_len: %d, comment_len: %d", (int)fn_len
,
1377 (int)extra_len
, (int)comment_len
);
1380 de_dbg(c
, "filename_len: %d, extra_len: %d", (int)fn_len
,
1384 *p_entry_size
= fixed_header_size
+ fn_len
+ extra_len
+ comment_len
;
1386 dd
->main_fname_pos
= pos1
+fixed_header_size
;
1387 dd
->main_fname_len
= fn_len
;
1388 do_read_filename(c
, d
, md
, dd
, pos1
+fixed_header_size
, fn_len
, utf8_flag
);
1391 do_extra_data(c
, d
, md
, dd
, pos1
+fixed_header_size
+fn_len
, extra_len
, is_central
);
1395 do_comment(c
, d
, pos1
+fixed_header_size
+fn_len
+extra_len
, comment_len
, utf8_flag
,
1396 "member file comment", "fcomment.txt");
1400 if(d
->used_offset_discrepancy
) {
1401 md
->offset_of_local_header
+= d
->offset_discrepancy
;
1402 de_dbg(c
, "assuming local header is really at %"I64_FMT
, md
->offset_of_local_header
);
1404 else if(d
->offset_discrepancy
!=0) {
1408 sig1
= (u32
)de_getu32le(md
->offset_of_local_header
);
1409 if(sig1
!=CODE_PK34
) {
1410 alt_pos
= md
->offset_of_local_header
+ d
->offset_discrepancy
;
1411 sig2
= (u32
)de_getu32le(alt_pos
);
1412 if(sig2
==CODE_PK34
) {
1413 de_warn(c
, "Local file header found at %"I64_FMT
" instead of %"I64_FMT
". "
1414 "Assuming offsets are wrong by %"I64_FMT
" bytes.",
1415 alt_pos
, md
->offset_of_local_header
, d
->offset_discrepancy
);
1416 md
->offset_of_local_header
+= d
->offset_discrepancy
;
1417 d
->used_offset_discrepancy
= 1;
1426 de_dbg_indent(c
, -1);
1427 ucstring_destroy(descr
);
1431 static struct member_data
*create_member_data(deark
*c
, lctx
*d
)
1433 struct member_data
*md
;
1435 md
= de_malloc(c
, sizeof(struct member_data
));
1436 md
->local_dir_entry_data
.fname
= ucstring_create(c
);
1437 md
->central_dir_entry_data
.fname
= ucstring_create(c
);
1441 static void destroy_member_data(deark
*c
, struct member_data
*md
)
1444 ucstring_destroy(md
->central_dir_entry_data
.fname
);
1445 ucstring_destroy(md
->local_dir_entry_data
.fname
);
1449 static i32
ucstring_lastchar(de_ucstring
*s
)
1451 if(!s
|| s
->len
<1) return 0;
1452 return s
->str
[s
->len
-1];
1455 // Things to do after both the central and local headers have been read.
1456 // E.g., extract the file.
1457 static int do_process_member(deark
*c
, lctx
*d
, struct member_data
*md
)
1461 // If for some reason we have a central-dir filename but not a local-dir
1462 // filename, use the central-dir filename.
1463 if(ucstring_isempty(md
->local_dir_entry_data
.fname
) &&
1464 ucstring_isnonempty(md
->central_dir_entry_data
.fname
))
1466 ucstring_append_ucstring(md
->local_dir_entry_data
.fname
,
1467 md
->central_dir_entry_data
.fname
);
1470 // Set the final file size and crc fields.
1471 if(md
->local_dir_entry_data
.bit_flags
& 0x0008) {
1472 if(d
->using_scanmode
) {
1473 de_err(c
, "File is incompatible with scan mode");
1477 // Indicates that certain fields are not present in the local file header,
1478 // and are instead in a "data descriptor" after the file data.
1479 // Let's hope they are also in the central file header.
1480 md
->cmpr_size
= md
->central_dir_entry_data
.cmpr_size
;
1481 md
->uncmpr_size
= md
->central_dir_entry_data
.uncmpr_size
;
1482 md
->crc_reported
= md
->central_dir_entry_data
.crc_reported
;
1485 md
->cmpr_size
= md
->local_dir_entry_data
.cmpr_size
;
1486 md
->uncmpr_size
= md
->local_dir_entry_data
.uncmpr_size
;
1487 md
->crc_reported
= md
->local_dir_entry_data
.crc_reported
;
1490 process_ext_attr(c
, d
, md
);
1492 // In some cases, detect directories by checking whether the filename ends
1494 if(!md
->is_dir
&& md
->uncmpr_size
==0 &&
1495 (d
->using_scanmode
|| (md
->ver_made_by_lo
<20)))
1497 if(ucstring_lastchar(md
->local_dir_entry_data
.fname
) == '/') {
1498 de_dbg(c
, "[assuming this is a subdirectory]");
1503 do_extract_file(c
, d
, md
);
1510 // In *entry_size, returns the size of the central dir entry.
1511 // Returns 0 if the central dir entry could not even be parsed.
1512 static int do_member_from_central_dir_entry(deark
*c
, lctx
*d
,
1513 struct member_data
*md
, i64 central_index
, i64 pos
, i64
*entry_size
)
1517 int saved_indent_level
;
1519 de_dbg_indent_save(c
, &saved_indent_level
);
1523 if(pos
>= d
->central_dir_offset
+d
->central_dir_byte_size
) {
1527 de_dbg(c
, "central dir entry #%d", (int)central_index
);
1528 de_dbg_indent(c
, 1);
1530 // Read the central dir file header
1531 if(!do_file_header(c
, d
, md
, 1, pos
, entry_size
)) {
1535 // If we were able to read the central dir file header, we might be able
1536 // to continue and read more files, even if the local file header fails.
1539 // Read the local file header
1540 if(!do_file_header(c
, d
, md
, 0, md
->offset_of_local_header
, &tmp_entry_size
)) {
1544 do_process_member(c
, d
, md
);
1547 de_dbg_indent_restore(c
, saved_indent_level
);
1551 static int do_central_dir_entry(deark
*c
, lctx
*d
,
1552 i64 central_index
, i64 pos
, i64
*entry_size
)
1554 struct member_data
*md
= NULL
;
1557 md
= create_member_data(c
, d
);
1558 ret
= do_member_from_central_dir_entry(c
, d
, md
, central_index
, pos
, entry_size
);
1559 destroy_member_data(c
, md
);
1563 static int do_local_dir_only(deark
*c
, lctx
*d
, i64 pos1
, i64
*pmember_size
)
1565 struct member_data
*md
= NULL
;
1569 md
= create_member_data(c
, d
);
1571 md
->offset_of_local_header
= pos1
;
1573 // Read the local file header
1574 if(!do_file_header(c
, d
, md
, 0, md
->offset_of_local_header
, &tmp_entry_size
)) {
1578 if(!do_process_member(c
, d
, md
)) goto done
;
1580 *pmember_size
= md
->file_data_pos
+ md
->cmpr_size
- pos1
;
1584 destroy_member_data(c
, md
);
1588 static void de_run_zip_scanmode(deark
*c
, lctx
*d
)
1592 d
->using_scanmode
= 1;
1597 i64 member_size
= 0;
1599 if(pos
> c
->infile
->len
-4) break;
1600 ret
= dbuf_search(c
->infile
, g_zipsig34
, 4, pos
, c
->infile
->len
-pos
, &foundpos
);
1603 de_dbg(c
, "zip member at %"I64_FMT
, pos
);
1604 de_dbg_indent(c
, 1);
1605 ret
= do_local_dir_only(c
, d
, pos
, &member_size
);
1606 de_dbg_indent(c
, -1);
1608 if(member_size
<1) break;
1613 static int do_central_dir(deark
*c
, lctx
*d
)
1620 pos
= d
->central_dir_offset
;
1621 de_dbg(c
, "central dir at %"I64_FMT
, pos
);
1622 de_dbg_indent(c
, 1);
1624 for(i
=0; i
<d
->central_dir_num_entries
; i
++) {
1625 if(!do_central_dir_entry(c
, d
, i
, pos
, &entry_size
)) {
1626 // TODO: Decide exactly what to do if something fails.
1634 de_dbg_indent(c
, -1);
1638 static int do_zip64_eocd(deark
*c
, lctx
*d
)
1643 int saved_indent_level
;
1644 UI ver
, ver_hi
, ver_lo
;
1646 de_dbg_indent_save(c
, &saved_indent_level
);
1648 if(d
->zip64_eocd_disknum
!=0) {
1649 de_warn(c
, "This might be a multi-disk Zip64 archive, which is not supported");
1655 pos
= d
->zip64_eocd_pos
;
1656 if(dbuf_memcmp(c
->infile
, pos
, g_zipsig66
, 4)) {
1657 de_warn(c
, "Expected Zip64 end-of-central-directory record not found at %"I64_FMT
, pos
);
1658 retval
= 1; // Maybe the eocd locator sig was a false positive?
1663 de_dbg(c
, "zip64 end-of-central-dir record at %"I64_FMT
, pos
);
1665 de_dbg_indent(c
, 1);
1667 n
= de_geti64le(pos
); pos
+= 8;
1668 de_dbg(c
, "size of zip64 eocd record: (12+)%"I64_FMT
, n
);
1670 ver
= (UI
)de_getu16le_p(&pos
);
1671 ver_hi
= (ver
&0xff00)>>8;
1672 ver_lo
= ver
&0x00ff;
1673 de_dbg(c
, "version made by: platform=%u (%s), ZIP spec=%u.%u",
1674 ver_hi
, get_platform_name(ver_hi
), (UI
)(ver_lo
/10), (UI
)(ver_lo
%10));
1676 ver
= (UI
)de_getu16le_p(&pos
);
1677 ver_hi
= (ver
&0xff00)>>8;
1678 ver_lo
= ver
&0x00ff;
1679 de_dbg(c
, "version needed: platform=%u (%s), ZIP spec=%u.%u",
1680 ver_hi
, get_platform_name(ver_hi
), (UI
)(ver_lo
/10), (UI
)(ver_lo
%10));
1682 n
= de_getu32le_p(&pos
);
1683 de_dbg(c
, "this disk num: %"I64_FMT
, n
);
1685 d
->zip64_cd_disknum
= (unsigned int)de_getu32le_p(&pos
);
1686 d
->zip64_num_centr_dir_entries_this_disk
= de_geti64le(pos
); pos
+= 8;
1687 de_dbg(c
, "central dir num entries on this disk: %"I64_FMT
, d
->zip64_num_centr_dir_entries_this_disk
);
1688 d
->zip64_num_centr_dir_entries_total
= de_geti64le(pos
); pos
+= 8;
1689 de_dbg(c
, "central dir num entries: %"I64_FMT
, d
->zip64_num_centr_dir_entries_total
);
1690 d
->zip64_centr_dir_byte_size
= de_geti64le(pos
); pos
+= 8;
1691 de_dbg(c
, "central dir size: %"I64_FMT
, d
->zip64_centr_dir_byte_size
);
1692 d
->zip64_cd_pos
= de_geti64le(pos
); pos
+= 8;
1693 de_dbg(c
, "central dir offset: %"I64_FMT
", disk: %u",
1694 d
->zip64_cd_pos
, d
->zip64_cd_disknum
);
1698 de_dbg_indent_restore(c
, saved_indent_level
);
1702 static void do_zip64_eocd_locator(deark
*c
, lctx
*d
)
1705 i64 pos
= d
->end_of_central_dir_pos
- 20;
1707 if(dbuf_memcmp(c
->infile
, pos
, g_zipsig67
, 4)) {
1710 de_dbg(c
, "zip64 eocd locator found at %"I64_FMT
, pos
);
1713 de_dbg_indent(c
, 1);
1714 d
->zip64_eocd_disknum
= (unsigned int)de_getu32le_p(&pos
);
1715 d
->zip64_eocd_pos
= de_geti64le(pos
); pos
+= 8;
1716 de_dbg(c
, "offset of zip64 eocd: %"I64_FMT
", disk: %u",
1717 d
->zip64_eocd_pos
, d
->zip64_eocd_disknum
);
1718 n
= de_getu32le_p(&pos
);
1719 de_dbg(c
, "total number of disks: %u", (unsigned int)n
);
1720 de_dbg_indent(c
, -1);
1723 static int do_end_of_central_dir(deark
*c
, lctx
*d
)
1726 i64 num_entries_this_disk
;
1727 i64 disk_num_with_central_dir_start
;
1729 i64 alt_central_dir_offset
;
1732 pos
= d
->end_of_central_dir_pos
;
1733 de_dbg(c
, "end-of-central-dir record at %"I64_FMT
, pos
);
1734 de_dbg_indent(c
, 1);
1736 d
->this_disk_num
= de_getu16le(pos
+4);
1737 de_dbg(c
, "this disk num: %"I64_FMT
, d
->this_disk_num
);
1738 disk_num_with_central_dir_start
= de_getu16le(pos
+6);
1740 num_entries_this_disk
= de_getu16le(pos
+8);
1741 de_dbg(c
, "central dir num entries on this disk: %"I64_FMT
, num_entries_this_disk
);
1742 if(d
->is_zip64
&& (num_entries_this_disk
==0xffff)) {
1743 num_entries_this_disk
= d
->zip64_num_centr_dir_entries_this_disk
;
1746 d
->central_dir_num_entries
= de_getu16le(pos
+10);
1747 d
->central_dir_byte_size
= de_getu32le(pos
+12);
1748 d
->central_dir_offset
= de_getu32le(pos
+16);
1749 de_dbg(c
, "central dir num entries: %"I64_FMT
, d
->central_dir_num_entries
);
1750 if(d
->is_zip64
&& (d
->central_dir_num_entries
==0xffff)) {
1751 d
->central_dir_num_entries
= d
->zip64_num_centr_dir_entries_total
;
1754 de_dbg(c
, "central dir size: %"I64_FMT
, d
->central_dir_byte_size
);
1755 if(d
->is_zip64
&& (d
->central_dir_byte_size
==0xffffffffLL
)) {
1756 d
->central_dir_byte_size
= d
->zip64_centr_dir_byte_size
;
1759 de_dbg(c
, "central dir offset: %"I64_FMT
", disk: %"I64_FMT
, d
->central_dir_offset
,
1760 disk_num_with_central_dir_start
);
1761 if(d
->is_zip64
&& (d
->central_dir_offset
==0xffffffffLL
)) {
1762 d
->central_dir_offset
= d
->zip64_cd_pos
;
1765 comment_length
= de_getu16le(pos
+20);
1766 de_dbg(c
, "comment length: %d", (int)comment_length
);
1767 if(comment_length
>0) {
1768 // The comment for the whole .ZIP file presumably has to use
1769 // cp437 encoding. There's no flag that could indicate otherwise.
1770 do_comment(c
, d
, pos
+22, comment_length
, 0,
1771 "ZIP file comment", "comment.txt");
1774 // TODO: Figure out exactly how to detect disk spanning.
1775 if(disk_num_with_central_dir_start
!=d
->this_disk_num
||
1776 (d
->is_zip64
&& d
->zip64_eocd_disknum
!=d
->this_disk_num
))
1778 de_err(c
, "Disk spanning not supported");
1782 if(d
->this_disk_num
!=0) {
1783 de_warn(c
, "This ZIP file might be part of a multi-part archive, and "
1784 "might not be supported correctly");
1787 if(num_entries_this_disk
!=d
->central_dir_num_entries
) {
1788 de_warn(c
, "This ZIP file might not be supported correctly "
1789 "(number-of-entries-this-disk=%d, number-of-entries-total=%d)",
1790 (int)num_entries_this_disk
, (int)d
->central_dir_num_entries
);
1793 alt_central_dir_offset
=
1794 (d
->is_zip64
? d
->zip64_eocd_pos
: d
->end_of_central_dir_pos
) -
1795 d
->central_dir_byte_size
;
1797 if(alt_central_dir_offset
!= d
->central_dir_offset
) {
1800 de_warn(c
, "Inconsistent central directory offset. Reported to be %"I64_FMT
", "
1801 "but based on its reported size, it should be %"I64_FMT
".",
1802 d
->central_dir_offset
, alt_central_dir_offset
);
1804 sig
= (u32
)de_getu32le(alt_central_dir_offset
);
1805 if(sig
==CODE_PK12
) {
1806 d
->offset_discrepancy
= alt_central_dir_offset
- d
->central_dir_offset
;
1807 de_dbg(c
, "likely central dir found at %"I64_FMT
, alt_central_dir_offset
);
1808 d
->central_dir_offset
= alt_central_dir_offset
;
1815 de_dbg_indent(c
, -1);
1819 static void de_run_zip_normally(deark
*c
, lctx
*d
)
1823 if(c
->detection_data
&& c
->detection_data
->zip_eocd_looked_for
) {
1824 eocd_found
= (int)c
->detection_data
->zip_eocd_found
;
1825 d
->end_of_central_dir_pos
= c
->detection_data
->zip_eocd_pos
;
1828 eocd_found
= fmtutil_find_zip_eocd(c
, c
->infile
, &d
->end_of_central_dir_pos
);
1831 if(c
->module_disposition
==DE_MODDISP_AUTODETECT
||
1832 c
->module_disposition
==DE_MODDISP_EXPLICIT
)
1834 if(de_getu32le(0)==CODE_PK34
) {
1835 de_err(c
, "ZIP central directory not found. "
1836 "You could try \"-opt zip:scanmode\".");
1840 de_err(c
, "Not a valid ZIP file");
1844 de_dbg(c
, "end-of-central-dir record found at %"I64_FMT
,
1845 d
->end_of_central_dir_pos
);
1847 do_zip64_eocd_locator(c
, d
);
1850 if(!do_zip64_eocd(c
, d
)) goto done
;
1854 de_declare_fmt(c
, "ZIP-Zip64");
1856 de_declare_fmt(c
, "ZIP");
1858 if(!do_end_of_central_dir(c
, d
)) {
1862 if(!do_central_dir(c
, d
)) {
1870 static void de_run_zip(deark
*c
, de_module_params
*mparams
)
1875 d
= de_malloc(c
, sizeof(lctx
));
1877 enc
= de_get_input_encoding(c
, NULL
, DE_ENCODING_CP437
);
1878 d
->default_enc_for_filenames
= enc
;
1879 d
->default_enc_for_comments
= enc
;
1881 d
->crco
= de_crcobj_create(c
, DE_CRCOBJ_CRC32_IEEE
);
1883 if(de_get_ext_option(c
, "zip:scanmode")) {
1884 de_run_zip_scanmode(c
, d
);
1887 de_run_zip_normally(c
, d
);
1891 de_crcobj_destroy(d
->crco
);
1896 static int de_identify_zip(deark
*c
)
1902 has_zip_ext
= de_input_file_has_ext(c
, "zip");
1907 if(!de_memcmp(b
, g_zipsig34
, 4)) {
1908 return has_zip_ext
? 100 : 90;
1910 if(b
[0]=='M' && b
[1]=='Z') has_mz_sig
= 1;
1912 if(c
->infile
->len
>= 22) {
1913 de_read(b
, c
->infile
->len
- 22, 4);
1914 if(!de_memcmp(b
, g_zipsig56
, 4)) {
1915 return has_zip_ext
? 100 : 19;
1919 // Things to consider:
1920 // * We want de_fmtutil_find_zip_eocd() to be called no more than once, and
1921 // only on files that for some reason we suspect could be ZIP files.
1922 // * If the user disables exe format detection (e.g. with "-onlydetect zip"),
1923 // we want self-extracting-ZIP .exe files to be detected as ZIP instead.
1924 // * And we want the above to work even if the file has a ZIP file comment,
1925 // making it expensive to detect as ZIP.
1927 // Tests below can't return a confidence higher than this.
1928 if(c
->detection_data
->best_confidence_so_far
>= 19) return 0;
1932 if(has_mz_sig
|| has_zip_ext
) {
1935 c
->detection_data
->zip_eocd_looked_for
= 1;
1936 if(fmtutil_find_zip_eocd(c
, c
->infile
, &eocd_pos
)) {
1937 c
->detection_data
->zip_eocd_found
= 1;
1938 c
->detection_data
->zip_eocd_pos
= eocd_pos
;
1946 static void de_help_zip(deark
*c
)
1948 de_msg(c
, "-opt zip:scanmode : Do not use the \"central directory\"");
1949 de_msg(c
, "-opt zip:implodebug : Behave like PKZIP 1.01/1.02");
1952 void de_module_zip(deark
*c
, struct deark_module_info
*mi
)
1955 mi
->desc
= "ZIP archive";
1956 mi
->run_fn
= de_run_zip
;
1957 mi
->identify_fn
= de_identify_zip
;
1958 mi
->help_fn
= de_help_zip
;