Refactoring the iff decoder
[deark.git] / modules / zip.c
blob87b1e5096d6d4186fce814f3f2a7eb3c1ef243c5
1 // This file is part of Deark.
2 // Copyright (C) 2016 Jason Summers
3 // See the file COPYING for terms of use.
5 // ZIP format
7 #include <deark-config.h>
8 #include <deark-private.h>
9 #include <deark-fmtutil.h>
11 DE_DECLARE_MODULE(de_module_zip);
13 struct localctx_struct;
14 typedef struct localctx_struct lctx;
16 #define CODE_PK12 0x02014b50U
17 #define CODE_PK34 0x04034b50U
18 static const u8 g_zipsig34[4] = {'P', 'K', 0x03, 0x04};
19 static const u8 g_zipsig56[4] = {'P', 'K', 0x05, 0x06};
20 static const u8 g_zipsig66[4] = {'P', 'K', 0x06, 0x06};
21 static const u8 g_zipsig67[4] = {'P', 'K', 0x06, 0x07};
23 struct compression_params {
24 // ZIP-specific params (not in de_dfilter_*_params) that may be needed to
25 // to decompress something.
26 int cmpr_meth;
27 unsigned int bit_flags;
30 typedef void (*decompressor_fn)(deark *c, lctx *d, struct compression_params *cparams,
31 struct de_dfilter_in_params *dcmpri, struct de_dfilter_out_params *dcmpro,
32 struct de_dfilter_results *dres);
34 struct cmpr_meth_info {
35 int cmpr_meth;
36 unsigned int flags;
37 const char *name;
38 decompressor_fn decompressor;
41 struct dir_entry_data {
42 unsigned int ver_needed;
43 unsigned int ver_needed_hi, ver_needed_lo;
44 i64 cmpr_size, uncmpr_size;
45 int cmpr_meth;
46 const struct cmpr_meth_info *cmi;
47 unsigned int bit_flags;
48 u32 crc_reported;
49 i64 main_fname_pos;
50 i64 main_fname_len;
51 de_ucstring *fname;
54 struct timestamp_data {
55 struct de_timestamp ts; // The best timestamp of this type found so far
56 int quality;
59 struct member_data {
60 unsigned int ver_made_by;
61 unsigned int ver_made_by_hi, ver_made_by_lo;
62 unsigned int attr_i, attr_e;
63 i64 offset_of_local_header;
64 i64 disk_number_start;
65 i64 file_data_pos;
66 int is_nonexecutable;
67 int is_executable;
68 int is_dir;
69 int is_symlink;
70 struct timestamp_data tsdata[DE_TIMESTAMPIDX_COUNT];
71 u8 has_riscos_data;
72 struct de_riscos_file_attrs rfa;
74 struct dir_entry_data central_dir_entry_data;
75 struct dir_entry_data local_dir_entry_data;
77 i64 cmpr_size, uncmpr_size;
78 u32 crc_reported;
79 u8 has_extts, has_extts_atime, has_extts_crtime;
80 u8 questionable_atime, questionable_crtime;
83 struct extra_item_type_info_struct;
85 struct extra_item_info_struct {
86 u32 id;
87 i64 dpos;
88 i64 dlen;
89 const struct extra_item_type_info_struct *eiti;
90 struct member_data *md;
91 struct dir_entry_data *dd;
92 int is_central;
95 struct localctx_struct {
96 de_encoding default_enc_for_filenames;
97 de_encoding default_enc_for_comments;
98 i64 end_of_central_dir_pos;
99 i64 central_dir_num_entries;
100 i64 central_dir_byte_size;
101 i64 central_dir_offset;
102 i64 this_disk_num;
103 i64 zip64_eocd_pos;
104 i64 zip64_cd_pos;
105 i64 zip64_num_centr_dir_entries_this_disk;
106 i64 zip64_num_centr_dir_entries_total;
107 i64 zip64_centr_dir_byte_size;
108 unsigned int zip64_eocd_disknum;
109 unsigned int zip64_cd_disknum;
110 i64 offset_discrepancy;
111 int used_offset_discrepancy;
112 int is_zip64;
113 int using_scanmode;
114 struct de_crcobj *crco;
117 typedef void (*extrafield_decoder_fn)(deark *c, lctx *d,
118 struct extra_item_info_struct *eii);
120 // (Timezone info and precision are ignored.)
121 static int timestamps_are_valid_and_equal(const struct de_timestamp *ts1,
122 const struct de_timestamp *ts2)
124 if(!ts1->is_valid || !ts2->is_valid) return 0;
125 return (ts1->ts_FILETIME == ts2->ts_FILETIME);
128 static int is_compression_method_supported(lctx *d, const struct cmpr_meth_info *cmi)
130 if(cmi && cmi->decompressor) return 1;
131 return 0;
134 static void do_decompress_shrink(deark *c, lctx *d, struct compression_params *cparams,
135 struct de_dfilter_in_params *dcmpri, struct de_dfilter_out_params *dcmpro,
136 struct de_dfilter_results *dres)
138 fmtutil_decompress_zip_shrink(c, dcmpri, dcmpro, dres, NULL);
141 static void do_decompress_reduce(deark *c, lctx *d, struct compression_params *cparams,
142 struct de_dfilter_in_params *dcmpri, struct de_dfilter_out_params *dcmpro,
143 struct de_dfilter_results *dres)
145 struct de_zipreduce_params params;
147 de_zeromem(&params, sizeof(struct de_zipreduce_params));
148 params.cmpr_factor = (unsigned int)(cparams->cmpr_meth-1);
149 fmtutil_decompress_zip_reduce(c, dcmpri, dcmpro, dres, &params);
152 static void do_decompress_implode(deark *c, lctx *d, struct compression_params *cparams,
153 struct de_dfilter_in_params *dcmpri, struct de_dfilter_out_params *dcmpro,
154 struct de_dfilter_results *dres)
156 struct de_zipimplode_params params;
158 de_zeromem(&params, sizeof(struct de_zipimplode_params));
159 params.bit_flags = cparams->bit_flags;
160 params.mml_bug = (u8)de_get_ext_option_bool(c, "zip:implodebug", 0);
161 fmtutil_decompress_zip_implode(c, dcmpri, dcmpro, dres, &params);
164 static void do_decompress_deflate(deark *c, lctx *d, struct compression_params *cparams,
165 struct de_dfilter_in_params *dcmpri, struct de_dfilter_out_params *dcmpro,
166 struct de_dfilter_results *dres)
168 struct de_deflate_params inflparams;
170 de_zeromem(&inflparams, sizeof(struct de_deflate_params));
171 if(cparams->cmpr_meth==9) {
172 inflparams.flags |= DE_DEFLATEFLAG_DEFLATE64;
174 fmtutil_decompress_deflate_ex(c, dcmpri, dcmpro, dres, &inflparams);
177 static void do_decompress_dclimplode(deark *c, lctx *d, struct compression_params *cparams,
178 struct de_dfilter_in_params *dcmpri, struct de_dfilter_out_params *dcmpro,
179 struct de_dfilter_results *dres)
181 fmtutil_dclimplode_codectype1(c, dcmpri, dcmpro, dres, NULL);
184 static void do_decompress_stored(deark *c, lctx *d, struct compression_params *cparams,
185 struct de_dfilter_in_params *dcmpri, struct de_dfilter_out_params *dcmpro,
186 struct de_dfilter_results *dres)
188 fmtutil_decompress_uncompressed(c, dcmpri, dcmpro, dres, 0);
191 static const struct cmpr_meth_info cmpr_meth_info_arr[] = {
192 { 0, 0x00, "stored", do_decompress_stored },
193 { 1, 0x00, "shrink", do_decompress_shrink },
194 { 2, 0x00, "reduce, CF=1", do_decompress_reduce },
195 { 3, 0x00, "reduce, CF=2", do_decompress_reduce },
196 { 4, 0x00, "reduce, CF=3", do_decompress_reduce },
197 { 5, 0x00, "reduce, CF=4", do_decompress_reduce },
198 { 6, 0x00, "implode", do_decompress_implode },
199 { 8, 0x00, "deflate", do_decompress_deflate },
200 { 9, 0x00, "deflate64", do_decompress_deflate },
201 { 10, 0x00, "PKWARE DCL implode", do_decompress_dclimplode },
202 { 12, 0x00, "bzip2", NULL },
203 { 14, 0x00, "LZMA", NULL },
204 { 16, 0x00, "IBM z/OS CMPSC", NULL },
205 { 18, 0x00, "IBM TERSE (new)", NULL },
206 { 19, 0x00, "IBM LZ77 z Architecture", NULL },
207 { 94, 0x00, "MP3", NULL },
208 { 95, 0x00, "XZ", NULL },
209 { 96, 0x00, "JPEG", NULL },
210 { 97, 0x00, "WavPack", NULL },
211 { 98, 0x00, "PPMd", NULL },
212 { 99, 0x00, "AES", NULL }
215 static const struct cmpr_meth_info *get_cmpr_meth_info(int cmpr_meth)
217 size_t k;
219 for(k=0; k<DE_ARRAYCOUNT(cmpr_meth_info_arr); k++) {
220 if(cmpr_meth_info_arr[k].cmpr_meth == cmpr_meth) {
221 return &cmpr_meth_info_arr[k];
224 return NULL;
227 // Decompress some data, using the given ZIP compression method.
228 // On failure, dres->errcode will be set.
229 static void do_decompress_lowlevel(deark *c, lctx *d, struct de_dfilter_in_params *dcmpri,
230 struct de_dfilter_out_params *dcmpro, struct de_dfilter_results *dres,
231 int cmpr_meth, const struct cmpr_meth_info *cmi, unsigned int bit_flags)
233 struct compression_params cparams;
235 de_zeromem(&cparams, sizeof(struct compression_params));
236 cparams.cmpr_meth = cmpr_meth;
237 cparams.bit_flags = bit_flags;
239 if(cmi && cmi->decompressor) {
240 cmi->decompressor(c, d, &cparams, dcmpri, dcmpro, dres);
242 else {
243 de_internal_err_nonfatal(c, "Unsupported compression method (%d)", cmpr_meth);
244 de_dfilter_set_generic_error(c, dres, NULL);
248 static void our_writelistener_cb(dbuf *f, void *userdata, const u8 *buf, i64 buf_len)
250 struct de_crcobj *crco = (struct de_crcobj *)userdata;
251 de_crcobj_addbuf(crco, buf, buf_len);
254 // Decompress a Zip member file, writing to outf.
255 // Does CRC calculation.
256 // Reports errors to the user.
257 // Only call this if the compression method is supported -- Call
258 // is_compression_method_supported() first.
259 // Assumes ldd->cmi has been set, by calling get_cmpr_meth_info().
260 static int do_decompress_member(deark *c, lctx *d, struct member_data *md, dbuf *outf)
262 struct dir_entry_data *ldd = &md->local_dir_entry_data;
263 struct de_dfilter_in_params dcmpri;
264 struct de_dfilter_out_params dcmpro;
265 struct de_dfilter_results dres;
266 u32 crc_calculated;
267 int retval = 0;
269 de_dfilter_init_objects(c, &dcmpri, &dcmpro, &dres);
270 dcmpri.f = c->infile;
271 dcmpri.pos = md->file_data_pos;
272 dcmpri.len = md->cmpr_size;
273 dcmpro.f = outf;
274 dcmpro.expected_len = md->uncmpr_size;
275 dcmpro.len_known = 1;
277 dbuf_set_writelistener(outf, our_writelistener_cb, (void*)d->crco);
278 de_crcobj_reset(d->crco);
280 do_decompress_lowlevel(c, d, &dcmpri, &dcmpro, &dres, ldd->cmpr_meth,
281 ldd->cmi, ldd->bit_flags);
283 if(dres.errcode) {
284 de_err(c, "%s: %s", ucstring_getpsz_d(ldd->fname),
285 de_dfilter_get_errmsg(c, &dres));
286 goto done;
289 crc_calculated = de_crcobj_getval(d->crco);
290 de_dbg(c, "crc (calculated): 0x%08x", (unsigned int)crc_calculated);
292 if(crc_calculated != md->crc_reported) {
293 de_err(c, "%s: CRC check failed: Expected 0x%08x, got 0x%08x",
294 ucstring_getpsz_d(ldd->fname),
295 (unsigned int)md->crc_reported, (unsigned int)crc_calculated);
296 if(dres.bytes_consumed_valid && (dres.bytes_consumed < dcmpri.len)) {
297 de_info(c, "Note: Only used %"I64_FMT" of %"I64_FMT" compressed bytes.",
298 dres.bytes_consumed, dcmpri.len);
300 goto done;
303 retval = 1;
304 done:
305 return retval;
308 // A variation of do_decompress_member() -
309 // works for Finder attribute data, and OS/2 extended attributes.
310 // Only call this if the compression method is supported -- Call
311 // is_compression_method_supported() first.
312 // outf is assumed to be a membuf.
313 // dcflags: 0x1 = Validate the crc_reported param.
314 static int do_decompress_attrib_data(deark *c, lctx *d,
315 i64 dpos, i64 dlen, dbuf *outf, i64 uncmprsize, u32 crc_reported,
316 int cmpr_meth, const struct cmpr_meth_info *cmi, UI flags, const char *name)
318 struct de_dfilter_in_params dcmpri;
319 struct de_dfilter_out_params dcmpro;
320 struct de_dfilter_results dres;
321 u32 crc_calculated;
322 int retval = 0;
324 de_dfilter_init_objects(c, &dcmpri, &dcmpro, &dres);
325 dcmpri.f = c->infile;
326 dcmpri.pos = dpos;
327 dcmpri.len = dlen;
328 dcmpro.f = outf;
329 dcmpro.expected_len = uncmprsize;
330 dcmpro.len_known = 1;
332 do_decompress_lowlevel(c, d, &dcmpri, &dcmpro, &dres, cmpr_meth, cmi, 0);
333 if(dres.errcode) {
334 goto done; // Could report the error, but this isn't critical data
337 if(flags & 0x1) {
338 de_crcobj_reset(d->crco);
339 de_crcobj_addslice(d->crco, outf, 0, outf->len);
340 crc_calculated = de_crcobj_getval(d->crco);
341 de_dbg(c, "%s crc (calculated): 0x%08x", name, (UI)crc_calculated);
342 if(crc_calculated != crc_reported) goto done;
345 retval = 1;
346 done:
347 return retval;
350 // As we read a member file's attributes, we may encounter multiple timestamps,
351 // which can differ in their precision, and whether they use UTC.
352 // This function is called to remember the "best" file modification time
353 // encountered so far.
354 static void apply_timestamp(deark *c, lctx *d, struct member_data *md, int tstype,
355 const struct de_timestamp *ts, int quality)
357 if(!ts->is_valid) return;
359 // In case of a tie, we prefer the later timestamp that we encountered.
360 // This makes local headers have priority over central headers, for
361 // example.
362 if(quality >= md->tsdata[tstype].quality) {
363 md->tsdata[tstype].ts = *ts;
364 md->tsdata[tstype].quality = quality;
368 static void do_read_filename(deark *c, lctx *d,
369 struct member_data *md, struct dir_entry_data *dd,
370 i64 pos, i64 len, int utf8_flag)
372 de_encoding from_encoding;
374 ucstring_empty(dd->fname);
375 from_encoding = utf8_flag ? DE_ENCODING_UTF8 : d->default_enc_for_filenames;
376 dbuf_read_to_ucstring(c->infile, pos, len, dd->fname, 0, from_encoding);
377 de_dbg(c, "filename: \"%s\"", ucstring_getpsz_d(dd->fname));
380 static void do_comment_display(deark *c, lctx *d, i64 pos, i64 len, de_ext_encoding ee,
381 const char *name)
383 de_ucstring *s = NULL;
385 s = ucstring_create(c);
386 dbuf_read_to_ucstring(c->infile, pos, len, s, 0, ee);
387 de_dbg(c, "%s: \"%s\"", name, ucstring_getpsz_d(s));
388 ucstring_destroy(s);
391 static void do_comment_extract(deark *c, lctx *d, i64 pos, i64 len, de_ext_encoding ee,
392 const char *ext)
394 dbuf *f = NULL;
395 de_ucstring *s = NULL;
397 f = dbuf_create_output_file(c, ext, NULL, DE_CREATEFLAG_IS_AUX);
398 s = ucstring_create(c);
399 dbuf_read_to_ucstring(c->infile, pos, len, s, 0, ee);
400 ucstring_write_as_utf8(c, s, f, 1);
401 ucstring_destroy(s);
402 dbuf_close(f);
405 static void do_comment(deark *c, lctx *d, i64 pos, i64 len, int utf8_flag,
406 const char *name, const char *ext)
408 de_ext_encoding ee;
410 if(len<1) return;
411 ee = utf8_flag ? DE_ENCODING_UTF8 : d->default_enc_for_comments;
412 ee = DE_EXTENC_MAKE(ee, DE_ENCSUBTYPE_HYBRID);
413 if(c->extract_level>=2) {
414 do_comment_extract(c, d, pos, len, ee, ext);
416 else {
417 do_comment_display(c, d, pos, len, ee, name);
421 static void read_unix_timestamp(deark *c, lctx *d, i64 pos,
422 struct de_timestamp *timestamp, const char *name)
424 i64 t;
425 char timestamp_buf[64];
427 t = de_geti32le(pos);
428 de_unix_time_to_timestamp(t, timestamp, 0x1);
429 de_dbg_timestamp_to_string(c, timestamp, timestamp_buf, sizeof(timestamp_buf), 0);
430 de_dbg(c, "%s: %"I64_FMT" (%s)", name, t, timestamp_buf);
433 static void read_FILETIME(deark *c, lctx *d, i64 pos,
434 struct de_timestamp *timestamp, const char *name)
436 i64 t_FILETIME;
437 char timestamp_buf[64];
439 t_FILETIME = de_geti64le(pos);
440 de_FILETIME_to_timestamp(t_FILETIME, timestamp, 0x1);
441 de_dbg_timestamp_to_string(c, timestamp, timestamp_buf, sizeof(timestamp_buf), 0);
442 de_dbg(c, "%s: %s", name, timestamp_buf);
445 static void ef_zip64extinfo(deark *c, lctx *d, struct extra_item_info_struct *eii)
447 i64 n;
448 i64 pos = eii->dpos;
450 if(pos+8 > eii->dpos+eii->dlen) goto done;
451 n = de_geti64le(pos); pos += 8;
452 de_dbg(c, "orig uncmpr file size: %"I64_FMT, n);
453 if(eii->dd->uncmpr_size==0xffffffffLL) {
454 eii->dd->uncmpr_size = n;
457 if(pos+8 > eii->dpos+eii->dlen) goto done;
458 n = de_geti64le(pos); pos += 8;
459 de_dbg(c, "cmpr data size: %"I64_FMT, n);
460 if(eii->dd->cmpr_size==0xffffffffLL) {
461 eii->dd->cmpr_size = n;
464 if(pos+8 > eii->dpos+eii->dlen) goto done;
465 n = de_geti64le(pos); pos += 8;
466 de_dbg(c, "offset of local header record: %"I64_FMT, n);
468 if(pos+4 > eii->dpos+eii->dlen) goto done;
469 n = de_getu32le_p(&pos);
470 de_dbg(c, "disk start number: %"I64_FMT, n);
471 done:
475 // Extra field 0x5455
476 static void ef_extended_timestamp(deark *c, lctx *d, struct extra_item_info_struct *eii)
478 i64 pos = eii->dpos;
479 u8 flags;
480 i64 endpos;
481 int has_mtime, has_atime, has_ctime;
482 struct de_timestamp timestamp_tmp;
484 endpos = pos + eii->dlen;
485 if(pos+1>endpos) return;
486 flags = de_getbyte_p(&pos);
487 de_dbg2(c, "flags: 0x%02x", (UI)flags);
488 if(eii->is_central) {
489 has_mtime = (eii->dlen>=5);
490 has_atime = 0;
491 has_ctime = 0;
493 else {
494 eii->md->has_extts = 1;
495 has_mtime = (flags & 0x01)?1:0;
496 has_atime = (flags & 0x02)?1:0;
497 has_ctime = (flags & 0x04)?1:0;
499 if(has_mtime) {
500 if(pos+4>endpos) return;
501 read_unix_timestamp(c, d, pos, &timestamp_tmp, "mtime");
502 apply_timestamp(c, d, eii->md, DE_TIMESTAMPIDX_MODIFY, &timestamp_tmp, 50);
503 pos+=4;
505 if(has_atime) {
506 if(pos+4>endpos) return;
507 read_unix_timestamp(c, d, pos, &timestamp_tmp, "atime");
508 apply_timestamp(c, d, eii->md, DE_TIMESTAMPIDX_ACCESS, &timestamp_tmp, 50);
509 eii->md->has_extts_atime = 1;
510 pos+=4;
512 if(has_ctime) {
513 if(pos+4>endpos) return;
514 read_unix_timestamp(c, d, pos, &timestamp_tmp, "creation time");
515 apply_timestamp(c, d, eii->md, DE_TIMESTAMPIDX_CREATE, &timestamp_tmp, 50);
516 eii->md->has_extts_crtime = 1;
517 pos+=4;
521 // Extra field 0x5855
522 static void ef_infozip1(deark *c, lctx *d, struct extra_item_info_struct *eii)
524 i64 uidnum, gidnum;
525 struct de_timestamp timestamp_tmp;
527 if(eii->dlen<8) return;
528 read_unix_timestamp(c, d, eii->dpos, &timestamp_tmp, "atime");
529 apply_timestamp(c, d, eii->md, DE_TIMESTAMPIDX_ACCESS, &timestamp_tmp, 45);
530 read_unix_timestamp(c, d, eii->dpos+4, &timestamp_tmp, "mtime");
531 apply_timestamp(c, d, eii->md, DE_TIMESTAMPIDX_MODIFY, &timestamp_tmp, 45);
532 if(!eii->is_central && eii->dlen>=12) {
533 uidnum = de_getu16le(eii->dpos+8);
534 gidnum = de_getu16le(eii->dpos+10);
535 de_dbg(c, "uid: %d, gid: %d", (int)uidnum, (int)gidnum);
539 // Extra field 0x7075 - Info-ZIP Unicode Path
540 static void ef_unicodepath(deark *c, lctx *d, struct extra_item_info_struct *eii)
542 u8 ver;
543 de_ucstring *fn = NULL;
544 i64 fnlen;
545 u32 crc_reported, crc_calculated;
547 if(eii->dlen<1) goto done;
548 ver = de_getbyte(eii->dpos);
549 de_dbg(c, "version: %u", (unsigned int)ver);
550 if(ver!=1) goto done;
551 if(eii->dlen<6) goto done;
552 crc_reported = (u32)de_getu32le(eii->dpos+1);
553 de_dbg(c, "name-crc (reported): 0x%08x", (unsigned int)crc_reported);
554 fn = ucstring_create(c);
555 fnlen = eii->dlen - 5;
556 dbuf_read_to_ucstring(c->infile, eii->dpos+5, fnlen, fn, 0, DE_ENCODING_UTF8);
557 de_dbg(c, "unicode name: \"%s\"", ucstring_getpsz_d(fn));
559 // Need to go back and calculate a CRC of the main filename. This is
560 // protection against the case where a ZIP editor may have changed the
561 // original filename, but retained a now-orphaned Unicode Path field.
562 de_crcobj_reset(d->crco);
563 de_crcobj_addslice(d->crco, c->infile, eii->dd->main_fname_pos, eii->dd->main_fname_len);
564 crc_calculated = de_crcobj_getval(d->crco);
565 de_dbg(c, "name-crc (calculated): 0x%08x", (unsigned int)crc_calculated);
567 if(crc_calculated == crc_reported) {
568 ucstring_empty(eii->dd->fname);
569 ucstring_append_ucstring(eii->dd->fname, fn);
572 done:
573 ucstring_destroy(fn);
576 // Extra field 0x7855
577 static void ef_infozip2(deark *c, lctx *d, struct extra_item_info_struct *eii)
579 i64 uidnum, gidnum;
581 if(eii->is_central) return;
582 if(eii->dlen<4) return;
583 uidnum = de_getu16le(eii->dpos);
584 gidnum = de_getu16le(eii->dpos+2);
585 de_dbg(c, "uid: %d, gid: %d", (int)uidnum, (int)gidnum);
588 // Extra field 0x7875
589 static void ef_infozip3(deark *c, lctx *d, struct extra_item_info_struct *eii)
591 i64 pos = eii->dpos;
592 i64 uidnum, gidnum;
593 u8 ver;
594 i64 endpos;
595 i64 sz;
597 endpos = pos+eii->dlen;
599 if(pos+1>endpos) return;
600 ver = de_getbyte_p(&pos);
601 de_dbg(c, "version: %d", (int)ver);
602 if(ver!=1) return;
604 if(pos+1>endpos) return;
605 sz = (i64)de_getbyte_p(&pos);
606 if(pos+sz>endpos) return;
607 uidnum = dbuf_getint_ext(c->infile, pos, (unsigned int)sz, 1, 0);
608 pos += sz;
610 if(pos+1>endpos) return;
611 sz = (i64)de_getbyte_p(&pos);
612 if(pos+sz>endpos) return;
613 gidnum = dbuf_getint_ext(c->infile, pos, (unsigned int)sz, 1, 0);
614 pos += sz;
616 de_dbg(c, "uid: %d, gid: %d", (int)uidnum, (int)gidnum);
619 // Extra field 0x000a
620 static void ef_ntfs(deark *c, lctx *d, struct extra_item_info_struct *eii)
622 i64 pos = eii->dpos;
623 i64 endpos;
624 i64 attr_tag;
625 i64 attr_size;
626 const char *name;
627 struct de_timestamp timestamp_tmp_m;
628 struct de_timestamp timestamp_tmp;
630 endpos = pos+eii->dlen;
631 pos += 4; // skip reserved field
633 while(1) {
634 if(pos+4>endpos) break;
635 attr_tag = de_getu16le_p(&pos);
636 attr_size = de_getu16le_p(&pos);
637 if(attr_tag==0x0001) name="NTFS filetimes";
638 else name="?";
639 de_dbg(c, "tag: 0x%04x (%s), dlen: %d", (unsigned int)attr_tag, name,
640 (int)attr_size);
641 if(pos+attr_size>endpos) break;
643 de_dbg_indent(c, 1);
644 if(attr_tag==0x0001 && attr_size>=24) {
645 read_FILETIME(c, d, pos, &timestamp_tmp_m, "mtime");
646 apply_timestamp(c, d, eii->md, DE_TIMESTAMPIDX_MODIFY, &timestamp_tmp_m, 90);
648 read_FILETIME(c, d, pos+8, &timestamp_tmp, "atime");
649 apply_timestamp(c, d, eii->md, DE_TIMESTAMPIDX_ACCESS, &timestamp_tmp, 90);
650 if(timestamps_are_valid_and_equal(&timestamp_tmp, &timestamp_tmp_m)) {
651 eii->md->questionable_atime = 1;
654 read_FILETIME(c, d, pos+16, &timestamp_tmp, "creation time");
655 apply_timestamp(c, d, eii->md, DE_TIMESTAMPIDX_CREATE, &timestamp_tmp, 90);
656 if(timestamps_are_valid_and_equal(&timestamp_tmp, &timestamp_tmp_m)) {
657 eii->md->questionable_crtime = 1;
660 de_dbg_indent(c, -1);
662 pos += attr_size;
666 // Extra field 0x0009
667 static void ef_os2(deark *c, lctx *d, struct extra_item_info_struct *eii)
669 i64 pos = eii->dpos;
670 i64 endpos;
671 i64 ulen;
672 i64 cmpr_attr_size;
673 int cmpr_meth;
674 u32 crc_reported;
675 const struct cmpr_meth_info *cmi = NULL;
676 const char *name = "OS/2 ext. attr. data";
677 dbuf *attr_data = NULL;
678 de_module_params *mparams = NULL;
679 int ret;
681 endpos = pos+eii->dlen;
682 if(pos+4>endpos) goto done;
683 ulen = de_getu32le_p(&pos);
684 de_dbg(c, "uncmpr ext attr data size: %"I64_FMT, ulen);
685 if(eii->is_central) goto done;
687 if(pos+2>endpos) goto done;
688 cmpr_meth = (int)de_getu16le_p(&pos);
689 de_dbg(c, "ext attr cmpr method: %d", cmpr_meth);
691 if(pos+4>endpos) goto done;
692 crc_reported = (u32)de_getu32le_p(&pos);
693 de_dbg(c, "ext attr crc (reported): 0x%08x", (unsigned int)crc_reported);
695 cmpr_attr_size = endpos-pos;
696 de_dbg(c, "cmpr ext attr data at %"I64_FMT", len=%"I64_FMT, pos, cmpr_attr_size);
697 if(pos + cmpr_attr_size > endpos) goto done;
699 cmi = get_cmpr_meth_info(cmpr_meth);
700 if(cmpr_meth==6 || !is_compression_method_supported(d, cmi)) {
701 de_warn(c, "%s: Unsupported compression method: %d (%s)",
702 name, cmpr_meth, (cmi ? cmi->name : "?"));
703 goto done;
706 attr_data = dbuf_create_membuf(c, ulen, 0x1);
707 ret = do_decompress_attrib_data(c, d, pos, cmpr_attr_size,
708 attr_data, ulen, crc_reported, cmpr_meth, cmi, 0x1, name);
709 if(!ret) {
710 de_warn(c, "Failed to decompress %s", name);
711 goto done;
714 // attr_data contains an OS/2 extended attribute structure (FEA2LIST)
715 mparams = de_malloc(c, sizeof(de_module_params));
716 mparams->in_params.codes = "L";
717 de_dbg(c, "decoding OS/2 ext. attribs., unc. len=%"I64_FMT, attr_data->len);
718 de_dbg_indent(c, 1);
719 de_run_module_by_id_on_slice(c, "ea_data", mparams, attr_data, 0, attr_data->len);
720 de_dbg_indent(c, -1);
722 done:
723 dbuf_close(attr_data);
724 de_free(c, mparams);
727 // Extra field 0x2705 (ZipIt Macintosh 1.3.5+)
728 static void ef_zipitmac_2705(deark *c, lctx *d, struct extra_item_info_struct *eii)
730 struct de_fourcc sig;
731 struct de_fourcc filetype;
732 struct de_fourcc creator;
734 if(eii->dlen<4) goto done;
735 dbuf_read_fourcc(c->infile, eii->dpos, &sig, 4, 0x0);
736 de_dbg(c, "signature: '%s'", sig.id_dbgstr);
737 if(sig.id!=0x5a504954U) goto done; // expecting 'ZPIT'
738 if(eii->dlen<12) goto done;
739 dbuf_read_fourcc(c->infile, eii->dpos+4, &filetype, 4, 0x0);
740 de_dbg(c, "filetype: '%s'", filetype.id_dbgstr);
741 dbuf_read_fourcc(c->infile, eii->dpos+8, &creator, 4, 0x0);
742 de_dbg(c, "creator: '%s'", creator.id_dbgstr);
744 done:
748 // The time will be returned in the caller-supplied 'ts'
749 static void handle_mac_time(deark *c, lctx *d,
750 i64 mt_raw, i64 mt_offset,
751 struct de_timestamp *ts, const char *name)
753 char timestamp_buf[64];
754 de_mac_time_to_timestamp(mt_raw - mt_offset, ts);
755 ts->tzcode = DE_TZCODE_UTC;
756 de_dbg_timestamp_to_string(c, ts, timestamp_buf, sizeof(timestamp_buf), 0);
757 de_dbg(c, "%s: %"I64_FMT" %+"I64_FMT" (%s)", name,
758 mt_raw, -mt_offset, timestamp_buf);
761 // Extra field 0x334d (Info-ZIP Macintosh)
762 static void ef_infozipmac(deark *c, lctx *d, struct extra_item_info_struct *eii)
764 i64 pos = eii->dpos;
765 i64 dpos;
766 i64 ulen;
767 i64 cmpr_attr_size;
768 unsigned int flags;
769 int cmpr_meth;
770 const struct cmpr_meth_info *cmi = NULL;
771 struct de_fourcc filetype;
772 struct de_fourcc creator;
773 de_ucstring *flags_str = NULL;
774 dbuf *attr_data = NULL;
775 int ret;
776 i64 create_time_raw;
777 i64 create_time_offset;
778 i64 mod_time_raw;
779 i64 mod_time_offset;
780 i64 backup_time_raw;
781 i64 backup_time_offset;
782 struct de_timestamp tmp_timestamp;
783 int charset;
784 u32 crc_reported = 0;
785 UI dcflags = 0;
786 struct de_stringreaderdata *srd;
788 if(eii->dlen<14) goto done;
790 ulen = de_getu32le_p(&pos);
791 de_dbg(c, "uncmpr. finder attr. size: %d", (int)ulen);
793 flags = (unsigned int)de_getu16le_p(&pos);
794 flags_str = ucstring_create(c);
795 if(flags&0x0001) ucstring_append_flags_item(flags_str, "data_fork");
796 if(flags&0x0002) ucstring_append_flags_item(flags_str, "0x0002"); // something about the filename
797 ucstring_append_flags_item(flags_str,
798 (flags&0x0004)?"uncmpressed_attribute_data":"compressed_attribute_data");
799 if(flags&0x0008) ucstring_append_flags_item(flags_str, "64-bit_times");
800 if(flags&0x0010) ucstring_append_flags_item(flags_str, "no_timezone_offsets");
801 de_dbg(c, "flags: 0x%04x (%s)", flags, ucstring_getpsz(flags_str));
803 dbuf_read_fourcc(c->infile, pos, &filetype, 4, 0x0);
804 de_dbg(c, "filetype: '%s'", filetype.id_dbgstr);
805 pos += 4;
806 dbuf_read_fourcc(c->infile, pos, &creator, 4, 0x0);
807 de_dbg(c, "creator: '%s'", creator.id_dbgstr);
808 pos += 4;
810 if(eii->is_central) goto done;
812 if(flags&0x0004) { // Uncompressed attribute data
813 cmpr_meth = 0;
815 else {
816 dcflags |= 0x1; // CRC is known
817 cmpr_meth = (int)de_getu16le_p(&pos);
818 cmi = get_cmpr_meth_info(cmpr_meth);
819 de_dbg(c, "finder attr. cmpr. method: %d (%s)", cmpr_meth, (cmi ? cmi->name : "?"));
821 crc_reported = (u32)de_getu32le_p(&pos);
822 de_dbg(c, "finder attr. data crc (reported): 0x%08x", (UI)crc_reported);
825 // The rest of the data is Finder attribute data
826 cmpr_attr_size = eii->dpos+eii->dlen - pos;
827 de_dbg(c, "cmpr. finder attr. size: %d", (int)cmpr_attr_size);
828 if(ulen<1 || ulen>1000000) goto done;
830 // Type 6 (implode) compression won't work here, because it needs
831 // additional parameters seemingly not provided by the Finder attr data.
832 if(cmpr_meth==6 || !is_compression_method_supported(d, cmi)) {
833 de_warn(c, "Finder attribute data: Unsupported compression method: %d (%s)",
834 cmpr_meth, (cmi ? cmi->name : "?"));
835 goto done;
838 // Decompress and decode the Finder attribute data
839 attr_data = dbuf_create_membuf(c, ulen, 0x1);
840 ret = do_decompress_attrib_data(c, d, pos, cmpr_attr_size,
841 attr_data, ulen, crc_reported, cmpr_meth, cmi, dcflags, "finder attr. data");
842 if(!ret) {
843 de_warn(c, "Failed to decompress finder attribute data");
844 goto done;
847 dpos = 0;
848 dpos += 2; // Finder flags
849 dpos += 4; // Icon location
850 dpos += 2; // Folder
851 dpos += 16; // FXInfo
852 dpos += 1; // file version number
853 dpos += 1; // dir access rights
855 if(flags&0x0008) goto done; // We don't support 64-bit times
856 if(flags&0x0010) goto done; // We want timezone offsets
857 if(attr_data->len - dpos < 6*4) goto done;
859 create_time_raw = dbuf_getu32le_p(attr_data, &dpos);
860 mod_time_raw = dbuf_getu32le_p(attr_data, &dpos);
861 backup_time_raw = dbuf_getu32le_p(attr_data, &dpos);
862 create_time_offset = dbuf_geti32le(attr_data, dpos); dpos += 4;
863 mod_time_offset = dbuf_geti32le(attr_data, dpos); dpos += 4;
864 backup_time_offset = dbuf_geti32le(attr_data, dpos); dpos += 4;
866 handle_mac_time(c, d, create_time_raw, create_time_offset, &tmp_timestamp, "create time");
867 if(create_time_raw>0) {
868 apply_timestamp(c, d, eii->md, DE_TIMESTAMPIDX_CREATE, &tmp_timestamp, 40);
870 handle_mac_time(c, d, mod_time_raw, mod_time_offset, &tmp_timestamp, "mod time ");
871 if(mod_time_raw>0) {
872 apply_timestamp(c, d, eii->md, DE_TIMESTAMPIDX_MODIFY, &tmp_timestamp, 40);
874 handle_mac_time(c, d, backup_time_raw, backup_time_offset, &tmp_timestamp, "backup time");
875 if(backup_time_raw>0) {
876 apply_timestamp(c, d, eii->md, DE_TIMESTAMPIDX_BACKUP, &tmp_timestamp, 40);
879 // Expecting 2 bytes for charset, and at least 2 more for the 2 NUL-terminated
880 // strings that follow.
881 if(attr_data->len - dpos < 4) goto done;
883 charset = (int)dbuf_getu16le_p(attr_data, &dpos);
884 de_dbg(c, "charset for fullpath/comment: %d", charset);
886 // TODO: Can we use the correct encoding?
887 srd = dbuf_read_string(attr_data, dpos, attr_data->len-dpos, DE_DBG_MAX_STRLEN,
888 DE_CONVFLAG_STOP_AT_NUL, DE_ENCODING_ASCII);
889 de_dbg(c, "fullpath: \"%s\"", ucstring_getpsz(srd->str));
890 dpos += srd->bytes_consumed;
891 de_destroy_stringreaderdata(c, srd);
893 srd = dbuf_read_string(attr_data, dpos, attr_data->len-dpos, DE_DBG_MAX_STRLEN,
894 DE_CONVFLAG_STOP_AT_NUL, DE_ENCODING_ASCII);
895 de_dbg(c, "comment: \"%s\"", ucstring_getpsz(srd->str));
896 dpos += srd->bytes_consumed;
897 de_destroy_stringreaderdata(c, srd);
899 done:
900 ucstring_destroy(flags_str);
901 dbuf_close(attr_data);
904 // Acorn / SparkFS / RISC OS
905 static void ef_acorn(deark *c, lctx *d, struct extra_item_info_struct *eii)
907 i64 pos = eii->dpos;
908 struct de_riscos_file_attrs rfa;
910 if(eii->dlen<16) return;
911 if(dbuf_memcmp(c->infile, eii->dpos, "ARC0", 4)) {
912 de_dbg(c, "[unsupported Acorn extra-field type]");
913 return;
915 pos += 4;
917 de_zeromem(&rfa, sizeof(struct de_riscos_file_attrs));
918 fmtutil_riscos_read_load_exec(c, c->infile, &rfa, pos);
919 pos += 8;
920 if(rfa.mod_time.is_valid) {
921 apply_timestamp(c, d, eii->md, DE_TIMESTAMPIDX_MODIFY, &rfa.mod_time, 70);
924 fmtutil_riscos_read_attribs_field(c, c->infile, &rfa, pos, 0);
926 if(!eii->is_central && !eii->md->has_riscos_data) {
927 eii->md->has_riscos_data = 1;
928 eii->md->rfa = rfa;
932 struct extra_item_type_info_struct {
933 u16 id;
934 const char *name;
935 extrafield_decoder_fn fn;
937 static const struct extra_item_type_info_struct extra_item_type_info_arr[] = {
938 { 0x0001 /* */, "Zip64 extended information", ef_zip64extinfo },
939 { 0x0007 /* */, "AV Info", NULL },
940 { 0x0008 /* */, "extended language encoding data", NULL },
941 { 0x0009 /* */, "OS/2", ef_os2 },
942 { 0x000a /* */, "NTFS", ef_ntfs },
943 { 0x000c /* */, "OpenVMS", NULL },
944 { 0x000d /* */, "Unix", NULL },
945 { 0x000e /* */, "file stream and fork descriptors", NULL },
946 { 0x000f /* */, "Patch Descriptor", NULL },
947 { 0x0014 /* */, "PKCS#7 Store for X.509 Certificates", NULL },
948 { 0x0015 /* */, "X.509 Certificate ID and Signature for individual file", NULL },
949 { 0x0016 /* */, "X.509 Certificate ID for Central Directory", NULL },
950 { 0x0017 /* */, "Strong Encryption Header", NULL },
951 { 0x0018 /* */, "Record Management Controls", NULL },
952 { 0x0019 /* */, "PKCS#7 Encryption Recipient Certificate List", NULL },
953 { 0x0021 /* */, "Policy Decryption Key", NULL },
954 { 0x0022 /* */, "Smartcrypt Key Provider", NULL },
955 { 0x0023 /* */, "Smartcrypt Policy Key Data", NULL },
956 { 0x0065 /* */, "IBM S/390 (Z390), AS/400 (I400) attributes", NULL },
957 { 0x0066 /* */, "IBM S/390 (Z390), AS/400 (I400) attributes - compressed", NULL },
958 { 0x07c8 /* */, "Macintosh", NULL },
959 { 0x2605 /* */, "ZipIt Macintosh", NULL },
960 { 0x2705 /* */, "ZipIt Macintosh 1.3.5+", ef_zipitmac_2705 },
961 { 0x2805 /* */, "ZipIt Macintosh 1.3.5+", NULL },
962 { 0x334d /* M3 */, "Info-ZIP Macintosh", ef_infozipmac },
963 { 0x4154 /* TA */, "Tandem NSK", NULL },
964 { 0x4341 /* AC */, "Acorn/SparkFS", ef_acorn },
965 { 0x4453 /* SE */, "Windows NT security descriptor (binary ACL)", NULL },
966 { 0x4690 /* */, "POSZIP 4690", NULL },
967 { 0x4704 /* */, "VM/CMS", NULL },
968 { 0x470f /* */, "MVS", NULL },
969 { 0x4854 /* TH */, "Theos, old unofficial port", NULL }, // unzip:extrafld.txt says "inofficial"
970 { 0x4b46 /* FK */, "FWKCS MD5", NULL },
971 { 0x4c41 /* AL */, "OS/2 access control list (text ACL)", NULL },
972 { 0x4d49 /* IM */, "Info-ZIP OpenVMS", NULL },
973 { 0x4d63 /* cM */, "Macintosh SmartZIP", NULL },
974 { 0x4f4c /* LO */, "Xceed original location", NULL },
975 { 0x5350 /* PS */, "Psion?", NULL }, // observed in some Psion files
976 { 0x5356 /* VS */, "AOS/VS (ACL)", NULL },
977 { 0x5455 /* UT */, "extended timestamp", ef_extended_timestamp },
978 { 0x554e /* NU */, "Xceed unicode", NULL },
979 { 0x5855 /* UX */, "Info-ZIP Unix, first version", ef_infozip1 },
980 { 0x6375 /* uc */, "Info-ZIP Unicode Comment", NULL },
981 { 0x6542 /* Be */, "BeOS/BeBox", NULL },
982 { 0x6854 /* Th */, "Theos", NULL },
983 { 0x7075 /* up */, "Info-ZIP Unicode Path", ef_unicodepath },
984 { 0x7441 /* At */, "AtheOS", NULL },
985 { 0x756e /* nu */, "ASi Unix", NULL },
986 { 0x7855 /* Ux */, "Info-ZIP Unix, second version", ef_infozip2 },
987 { 0x7875 /* ux */, "Info-ZIP Unix, third version", ef_infozip3 },
988 { 0xa220 /* */, "Microsoft Open Packaging Growth Hint", NULL },
989 { 0xfb4a /* */, "SMS/QDOS", NULL }, // according to Info-ZIP zip 3.0
990 { 0xfd4a /* */, "SMS/QDOS", NULL } // according to ZIP v6.3.4 APPNOTE
993 static const struct extra_item_type_info_struct *get_extra_item_type_info(i64 id)
995 static const struct extra_item_type_info_struct default_ei =
996 { 0, "?", NULL };
997 size_t i;
999 for(i=0; i<DE_ARRAYCOUNT(extra_item_type_info_arr); i++) {
1000 if(id == (i64)extra_item_type_info_arr[i].id) {
1001 return &extra_item_type_info_arr[i];
1004 return &default_ei;
1007 static void do_extra_data(deark *c, lctx *d,
1008 struct member_data *md, struct dir_entry_data *dd,
1009 i64 pos1, i64 len, int is_central)
1011 i64 pos;
1013 de_dbg(c, "extra data at %"I64_FMT", len=%d", pos1, (int)len);
1014 de_dbg_indent(c, 1);
1016 pos = pos1;
1017 while(1) {
1018 struct extra_item_info_struct eii;
1020 if(pos+4 >= pos1+len) break;
1021 de_zeromem(&eii, sizeof(struct extra_item_info_struct));
1022 eii.md = md;
1023 eii.dd = dd;
1024 eii.is_central = is_central;
1025 eii.dpos = pos+4;
1027 eii.id = (u32)de_getu16le(pos);
1028 eii.dlen = de_getu16le(pos+2);
1030 eii.eiti = get_extra_item_type_info(eii.id);
1032 de_dbg(c, "item id=0x%04x (%s), dlen=%d", (unsigned int)eii.id, eii.eiti->name,
1033 (int)eii.dlen);
1034 if(pos+4+eii.dlen > pos1+len) break;
1036 if(eii.eiti->fn) {
1037 de_dbg_indent(c, 1);
1038 eii.eiti->fn(c, d, &eii);
1039 de_dbg_indent(c, -1);
1042 pos += 4+eii.dlen;
1045 de_dbg_indent(c, -1);
1048 static void do_extract_file(deark *c, lctx *d, struct member_data *md)
1050 dbuf *outf = NULL;
1051 de_finfo *fi = NULL;
1052 struct dir_entry_data *ldd = &md->local_dir_entry_data;
1053 int tsidx;
1054 int saved_indent_level;
1056 de_dbg_indent_save(c, &saved_indent_level);
1057 de_dbg(c, "file data at %"I64_FMT", len=%"I64_FMT, md->file_data_pos,
1058 md->cmpr_size);
1059 de_dbg_indent(c, 1);
1061 if(ldd->bit_flags & 0x1) {
1062 de_err(c, "%s: Encryption is not supported", ucstring_getpsz_d(ldd->fname));
1063 goto done;
1066 if(!is_compression_method_supported(d, ldd->cmi)) {
1067 de_err(c, "%s: Unsupported compression method: %d (%s)",
1068 ucstring_getpsz_d(ldd->fname),
1069 ldd->cmpr_meth, (ldd->cmi ? ldd->cmi->name : "?"));
1070 goto done;
1073 if(md->file_data_pos+md->cmpr_size > c->infile->len) {
1074 de_err(c, "%s: Data goes beyond end of file", ucstring_getpsz_d(ldd->fname));
1075 goto done;
1078 if(md->is_symlink) {
1079 de_warn(c, "\"%s\" is a symbolic link. It will not be extracted as a link.",
1080 ucstring_getpsz_d(ldd->fname));
1083 fi = de_finfo_create(c);
1084 fi->detect_root_dot_dir = 1;
1086 if(ucstring_isnonempty(ldd->fname)) {
1087 unsigned int snflags = DE_SNFLAG_FULLPATH;
1089 if(md->has_riscos_data) {
1090 fmtutil_riscos_append_type_to_filename(c, fi, ldd->fname, &md->rfa, md->is_dir, 0);
1092 if(md->is_dir) snflags |= DE_SNFLAG_STRIPTRAILINGSLASH;
1093 de_finfo_set_name_from_ucstring(c, fi, ldd->fname, snflags);
1094 fi->original_filename_flag = 1;
1097 // This is basically a hack to better deal with Deark's ZIP writer's habit of
1098 // using the NTFS field to store high resolution timestamps. The problem is
1099 // that there seems to be no standard way to indicate the lack of a particular
1100 // timestamp.
1101 // We disregard the NTFS Access or Creation timestamp in some cases, to make it
1102 // more likely that a ZIP file can be round-tripped through Deark, without
1103 // spurious timestamps appearing in the 0x5455 (extended timestamp) field.
1104 if(md->questionable_atime && md->has_extts && !md->has_extts_atime) {
1105 md->tsdata[DE_TIMESTAMPIDX_ACCESS].ts.is_valid = 0;
1107 if(md->questionable_crtime && md->has_extts && !md->has_extts_crtime) {
1108 md->tsdata[DE_TIMESTAMPIDX_CREATE].ts.is_valid = 0;
1111 for(tsidx=0; tsidx<DE_TIMESTAMPIDX_COUNT; tsidx++) {
1112 if(md->tsdata[tsidx].ts.is_valid) {
1113 fi->timestamp[tsidx] = md->tsdata[tsidx].ts;
1117 if(md->has_riscos_data) {
1118 fi->has_riscos_data = 1;
1119 fi->riscos_attribs = md->rfa.attribs;
1120 fi->load_addr = md->rfa.load_addr;
1121 fi->exec_addr = md->rfa.exec_addr;
1124 if(md->is_dir) {
1125 fi->is_directory = 1;
1127 else if(md->is_executable) {
1128 fi->mode_flags |= DE_MODEFLAG_EXE;
1130 else if(md->is_nonexecutable) {
1131 fi->mode_flags |= DE_MODEFLAG_NONEXE;
1134 outf = dbuf_create_output_file(c, NULL, fi, 0);
1135 if(md->is_dir) {
1136 goto done;
1139 (void)do_decompress_member(c, d, md, outf);
1141 done:
1142 dbuf_close(outf);
1143 de_finfo_destroy(c, fi);
1144 de_dbg_indent_restore(c, saved_indent_level);
1147 static const char *get_platform_name(unsigned int ver_hi)
1149 static const char *pltf_names[20] = {
1150 "MS-DOS, etc.", "Amiga", "OpenVMS", "Unix",
1151 "VM/CMS", "Atari ST", "HPFS", "Macintosh",
1152 "Z-System", "CP/M", "NTFS or TOPS-20", "MVS or NTFS",
1153 "VSE or SMS/QDOS", "Acorn RISC OS", "VFAT", "MVS",
1154 "BeOS", "Tandem", "OS/400", "OS X" };
1156 if(ver_hi<20)
1157 return pltf_names[ver_hi];
1158 if(ver_hi==30) return "AtheOS/Syllable";
1159 return "?";
1162 // Look at the attributes, and set some other fields based on them.
1163 static void process_ext_attr(deark *c, lctx *d, struct member_data *md)
1165 if(d->using_scanmode) {
1166 // In this mode, there is no 'external attribs' field.
1167 return;
1170 if(md->ver_made_by_hi==3) { // Unix
1171 unsigned int unix_filetype;
1172 unix_filetype = (md->attr_e>>16)&0170000;
1173 if(unix_filetype == 0040000) {
1174 md->is_dir = 1;
1176 else if(unix_filetype == 0120000) {
1177 md->is_symlink = 1;
1180 if((md->attr_e>>16)&0111) {
1181 md->is_executable = 1;
1183 else {
1184 md->is_nonexecutable = 1;
1188 // MS-DOS-style attributes.
1189 // Technically, we should only do this if
1190 // md->central_dir_entry_data.ver_made_by_hi==0.
1191 // However, most(?) zip programs set the low byte of the external attribs
1192 // to the equivalent MS-DOS attribs, at least in cases where it matters.
1193 if(md->attr_e & 0x10) {
1194 md->is_dir = 1;
1197 // TODO: Support more platforms.
1198 // TODO: The 0x756e (ASi Unix) extra field might be important, as it contains
1199 // file permissions.
1201 if(md->is_dir && md->uncmpr_size!=0) {
1202 // I'd expect a subdirectory entry to have zero size. If it doesn't,
1203 // let's just assume we misidentified it as a subdirectory, and
1204 // extract its data.
1205 md->is_dir = 0;
1209 static void describe_internal_attr(deark *c, struct member_data *md,
1210 de_ucstring *s)
1212 unsigned int bf = md->attr_i;
1214 if(bf & 0x0001) {
1215 ucstring_append_flags_item(s, "text file");
1216 bf -= 0x0001;
1219 if(bf!=0) { // Report any unrecognized flags
1220 ucstring_append_flags_itemf(s, "0x%04x", bf);
1224 // Uses dd->bit_flags, dd->cmpr_method
1225 static void describe_general_purpose_bit_flags(deark *c, struct dir_entry_data *dd,
1226 de_ucstring *s)
1228 const char *name;
1229 unsigned int bf = dd->bit_flags;
1231 if(bf & 0x0001) {
1232 ucstring_append_flags_item(s, "encrypted");
1233 bf -= 0x0001;
1236 if(dd->cmpr_meth==6) { // implode
1237 if(bf & 0x0002) {
1238 name = "8K";
1239 bf -= 0x0002;
1241 else {
1242 name = "4K";
1244 ucstring_append_flags_itemf(s, "%s sliding dictionary", name);
1246 if(bf & 0x0004) {
1247 name = "3";
1248 bf -= 0x0004;
1250 else {
1251 name = "2";
1253 ucstring_append_flags_itemf(s, "%s trees", name);
1256 if(dd->cmpr_meth==8 || dd->cmpr_meth==9) { // deflate flags
1257 unsigned int code;
1259 code = (bf & 0x0006)>>1;
1260 switch(code) {
1261 case 1: name="max"; break;
1262 case 2: name="fast"; break;
1263 case 3: name="super_fast"; break;
1264 default: name="normal";
1266 ucstring_append_flags_itemf(s, "cmprlevel=%s", name);
1267 bf -= (bf & 0x0006);
1270 if(bf & 0x0008) {
1271 ucstring_append_flags_item(s, "uses data descriptor");
1272 bf -= 0x0008;
1275 if(bf & 0x0800) {
1276 ucstring_append_flags_item(s, "UTF-8");
1277 bf -= 0x0800;
1280 if(bf!=0) { // Report any unrecognized flags
1281 ucstring_append_flags_itemf(s, "0x%04x", bf);
1285 // Read either a central directory entry (a.k.a. central directory file header),
1286 // or a local file header.
1287 static int do_file_header(deark *c, lctx *d, struct member_data *md,
1288 int is_central, i64 pos1, i64 *p_entry_size)
1290 i64 pos;
1291 u32 sig;
1292 i64 fn_len, extra_len, comment_len;
1293 int utf8_flag;
1294 int retval = 0;
1295 i64 fixed_header_size;
1296 i64 mod_time_raw, mod_date_raw;
1297 struct dir_entry_data *dd; // Points to either md->central or md->local
1298 de_ucstring *descr = NULL;
1299 struct de_timestamp dos_timestamp;
1300 char timestamp_buf[64];
1302 pos = pos1;
1303 descr = ucstring_create(c);
1304 if(is_central) {
1305 dd = &md->central_dir_entry_data;
1306 fixed_header_size = 46;
1307 de_dbg(c, "central dir entry at %"I64_FMT, pos);
1309 else {
1310 dd = &md->local_dir_entry_data;
1311 fixed_header_size = 30;
1312 if(md->disk_number_start!=d->this_disk_num) {
1313 de_err(c, "Member file not in this ZIP file");
1314 return 0;
1316 de_dbg(c, "local file header at %"I64_FMT, pos);
1318 de_dbg_indent(c, 1);
1320 sig = (u32)de_getu32le_p(&pos);
1321 if(is_central && sig!=CODE_PK12) {
1322 de_err(c, "Central dir file header not found at %"I64_FMT, pos1);
1323 goto done;
1325 else if(!is_central && sig!=CODE_PK34) {
1326 de_err(c, "Local file header not found at %"I64_FMT, pos1);
1327 goto done;
1330 if(is_central) {
1331 md->ver_made_by = (unsigned int)de_getu16le_p(&pos);
1332 md->ver_made_by_hi = (unsigned int)((md->ver_made_by&0xff00)>>8);
1333 md->ver_made_by_lo = (unsigned int)(md->ver_made_by&0x00ff);
1334 de_dbg(c, "version made by: platform=%u (%s), ZIP spec=%u.%u",
1335 md->ver_made_by_hi, get_platform_name(md->ver_made_by_hi),
1336 (unsigned int)(md->ver_made_by_lo/10), (unsigned int)(md->ver_made_by_lo%10));
1339 dd->ver_needed = (unsigned int)de_getu16le_p(&pos);
1340 dd->ver_needed_hi = (unsigned int)((dd->ver_needed&0xff00)>>8);
1341 dd->ver_needed_lo = (unsigned int)(dd->ver_needed&0x00ff);
1342 de_dbg(c, "version needed to extract: platform=%u (%s), ZIP spec=%u.%u",
1343 dd->ver_needed_hi, get_platform_name(dd->ver_needed_hi),
1344 (unsigned int)(dd->ver_needed_lo/10), (unsigned int)(dd->ver_needed_lo%10));
1346 dd->bit_flags = (unsigned int)de_getu16le_p(&pos);
1347 dd->cmpr_meth = (int)de_getu16le_p(&pos);
1348 dd->cmi = get_cmpr_meth_info(dd->cmpr_meth);
1350 utf8_flag = (dd->bit_flags & 0x800)?1:0;
1351 ucstring_empty(descr);
1352 describe_general_purpose_bit_flags(c, dd, descr);
1353 de_dbg(c, "flags: 0x%04x (%s)", dd->bit_flags, ucstring_getpsz(descr));
1355 de_dbg(c, "cmpr method: %d (%s)", dd->cmpr_meth,
1356 (dd->cmi ? dd->cmi->name : "?"));
1358 mod_time_raw = de_getu16le_p(&pos);
1359 mod_date_raw = de_getu16le_p(&pos);
1360 de_dos_datetime_to_timestamp(&dos_timestamp, mod_date_raw, mod_time_raw);
1361 dos_timestamp.tzcode = DE_TZCODE_LOCAL;
1362 de_dbg_timestamp_to_string(c, &dos_timestamp, timestamp_buf, sizeof(timestamp_buf), 0);
1363 de_dbg(c, "mod time: %s", timestamp_buf);
1364 apply_timestamp(c, d, md, DE_TIMESTAMPIDX_MODIFY, &dos_timestamp, 10);
1366 dd->crc_reported = (u32)de_getu32le_p(&pos);
1367 de_dbg(c, "crc (reported): 0x%08x", (unsigned int)dd->crc_reported);
1369 dd->cmpr_size = de_getu32le_p(&pos);
1370 dd->uncmpr_size = de_getu32le_p(&pos);
1371 de_dbg(c, "cmpr size: %" I64_FMT ", uncmpr size: %" I64_FMT, dd->cmpr_size, dd->uncmpr_size);
1373 fn_len = de_getu16le_p(&pos);
1375 extra_len = de_getu16le_p(&pos);
1377 if(is_central) {
1378 comment_len = de_getu16le_p(&pos);
1380 else {
1381 comment_len = 0;
1384 if(!is_central) {
1385 md->file_data_pos = pos + fn_len + extra_len;
1388 if(is_central) {
1389 md->disk_number_start = de_getu16le_p(&pos);
1391 md->attr_i = (unsigned int)de_getu16le_p(&pos);
1392 ucstring_empty(descr);
1393 describe_internal_attr(c, md, descr);
1394 de_dbg(c, "internal file attributes: 0x%04x (%s)", md->attr_i,
1395 ucstring_getpsz(descr));
1397 md->attr_e = (unsigned int)de_getu32le_p(&pos);
1398 de_dbg(c, "external file attributes: 0x%08x", md->attr_e);
1399 de_dbg_indent(c, 1);
1402 // The low byte is, AFAIK, *almost* universally used for MS-DOS-style
1403 // attributes.
1404 unsigned int dos_attrs = (md->attr_e & 0xff);
1405 ucstring_empty(descr);
1406 de_describe_dos_attribs(c, dos_attrs, descr, 0);
1407 de_dbg(c, "%sMS-DOS attribs: 0x%02x (%s)",
1408 (md->ver_made_by_hi==0)?"":"(hypothetical) ",
1409 dos_attrs, ucstring_getpsz(descr));
1412 if((md->attr_e>>16) != 0) {
1413 // A number of platforms put Unix-style file attributes here, so
1414 // decode them as such whenever they are nonzero.
1415 de_dbg(c, "%sUnix attribs: octal(%06o)",
1416 (md->ver_made_by_hi==3)?"":"(hypothetical) ",
1417 (unsigned int)(md->attr_e>>16));
1420 de_dbg_indent(c, -1);
1422 md->offset_of_local_header = de_getu32le_p(&pos);
1423 de_dbg(c, "offset of local header: %"I64_FMT", disk: %d", md->offset_of_local_header,
1424 (int)md->disk_number_start);
1427 if(is_central) {
1428 de_dbg(c, "filename_len: %d, extra_len: %d, comment_len: %d", (int)fn_len,
1429 (int)extra_len, (int)comment_len);
1431 else {
1432 de_dbg(c, "filename_len: %d, extra_len: %d", (int)fn_len,
1433 (int)extra_len);
1436 *p_entry_size = fixed_header_size + fn_len + extra_len + comment_len;
1438 dd->main_fname_pos = pos1+fixed_header_size;
1439 dd->main_fname_len = fn_len;
1440 do_read_filename(c, d, md, dd, pos1+fixed_header_size, fn_len, utf8_flag);
1442 if(extra_len>0) {
1443 do_extra_data(c, d, md, dd, pos1+fixed_header_size+fn_len, extra_len, is_central);
1446 if(comment_len>0) {
1447 do_comment(c, d, pos1+fixed_header_size+fn_len+extra_len, comment_len, utf8_flag,
1448 "member file comment", "fcomment.txt");
1451 if(is_central) {
1452 if(d->used_offset_discrepancy) {
1453 md->offset_of_local_header += d->offset_discrepancy;
1454 de_dbg(c, "assuming local header is really at %"I64_FMT, md->offset_of_local_header);
1456 else if(d->offset_discrepancy!=0) {
1457 u32 sig1, sig2;
1458 i64 alt_pos;
1460 sig1 = (u32)de_getu32le(md->offset_of_local_header);
1461 if(sig1!=CODE_PK34) {
1462 alt_pos = md->offset_of_local_header + d->offset_discrepancy;
1463 sig2 = (u32)de_getu32le(alt_pos);
1464 if(sig2==CODE_PK34) {
1465 de_warn(c, "Local file header found at %"I64_FMT" instead of %"I64_FMT". "
1466 "Assuming offsets are wrong by %"I64_FMT" bytes.",
1467 alt_pos, md->offset_of_local_header, d->offset_discrepancy);
1468 md->offset_of_local_header += d->offset_discrepancy;
1469 d->used_offset_discrepancy = 1;
1475 retval = 1;
1477 done:
1478 de_dbg_indent(c, -1);
1479 ucstring_destroy(descr);
1480 return retval;
1483 static struct member_data *create_member_data(deark *c, lctx *d)
1485 struct member_data *md;
1487 md = de_malloc(c, sizeof(struct member_data));
1488 md->local_dir_entry_data.fname = ucstring_create(c);
1489 md->central_dir_entry_data.fname = ucstring_create(c);
1490 return md;
1493 static void destroy_member_data(deark *c, struct member_data *md)
1495 if(!md) return;
1496 ucstring_destroy(md->central_dir_entry_data.fname);
1497 ucstring_destroy(md->local_dir_entry_data.fname);
1498 de_free(c, md);
1501 static i32 ucstring_lastchar(de_ucstring *s)
1503 if(!s || s->len<1) return 0;
1504 return s->str[s->len-1];
1507 // Things to do after both the central and local headers have been read.
1508 // E.g., extract the file.
1509 static int do_process_member(deark *c, lctx *d, struct member_data *md)
1511 int retval = 0;
1513 // If for some reason we have a central-dir filename but not a local-dir
1514 // filename, use the central-dir filename.
1515 if(ucstring_isempty(md->local_dir_entry_data.fname) &&
1516 ucstring_isnonempty(md->central_dir_entry_data.fname))
1518 ucstring_append_ucstring(md->local_dir_entry_data.fname,
1519 md->central_dir_entry_data.fname);
1522 // Set the final file size and crc fields.
1523 if(md->local_dir_entry_data.bit_flags & 0x0008) {
1524 if(d->using_scanmode) {
1525 de_err(c, "File is incompatible with scan mode");
1526 goto done;
1529 // Indicates that certain fields are not present in the local file header,
1530 // and are instead in a "data descriptor" after the file data.
1531 // Let's hope they are also in the central file header.
1532 md->cmpr_size = md->central_dir_entry_data.cmpr_size;
1533 md->uncmpr_size = md->central_dir_entry_data.uncmpr_size;
1534 md->crc_reported = md->central_dir_entry_data.crc_reported;
1536 else {
1537 md->cmpr_size = md->local_dir_entry_data.cmpr_size;
1538 md->uncmpr_size = md->local_dir_entry_data.uncmpr_size;
1539 md->crc_reported = md->local_dir_entry_data.crc_reported;
1542 process_ext_attr(c, d, md);
1544 // In some cases, detect directories by checking whether the filename ends
1545 // with a slash.
1546 if(!md->is_dir && md->uncmpr_size==0 &&
1547 (d->using_scanmode || (md->ver_made_by_lo<20)))
1549 if(ucstring_lastchar(md->local_dir_entry_data.fname) == '/') {
1550 de_dbg(c, "[assuming this is a subdirectory]");
1551 md->is_dir = 1;
1555 do_extract_file(c, d, md);
1556 retval = 1;
1558 done:
1559 return retval;
1562 // In *entry_size, returns the size of the central dir entry.
1563 // Returns 0 if the central dir entry could not even be parsed.
1564 static int do_member_from_central_dir_entry(deark *c, lctx *d,
1565 struct member_data *md, i64 central_index, i64 pos, i64 *entry_size)
1567 i64 tmp_entry_size;
1568 int retval = 0;
1569 int saved_indent_level;
1571 de_dbg_indent_save(c, &saved_indent_level);
1573 *entry_size = 0;
1575 if(pos >= d->central_dir_offset+d->central_dir_byte_size) {
1576 goto done;
1579 de_dbg(c, "central dir entry #%d", (int)central_index);
1580 de_dbg_indent(c, 1);
1582 // Read the central dir file header
1583 if(!do_file_header(c, d, md, 1, pos, entry_size)) {
1584 goto done;
1587 // If we were able to read the central dir file header, we might be able
1588 // to continue and read more files, even if the local file header fails.
1589 retval = 1;
1591 // Read the local file header
1592 if(!do_file_header(c, d, md, 0, md->offset_of_local_header, &tmp_entry_size)) {
1593 goto done;
1596 do_process_member(c, d, md);
1598 done:
1599 de_dbg_indent_restore(c, saved_indent_level);
1600 return retval;
1603 static int do_central_dir_entry(deark *c, lctx *d,
1604 i64 central_index, i64 pos, i64 *entry_size)
1606 struct member_data *md = NULL;
1607 int ret;
1609 md = create_member_data(c, d);
1610 ret = do_member_from_central_dir_entry(c, d, md, central_index, pos, entry_size);
1611 destroy_member_data(c, md);
1612 return ret;
1615 static int do_local_dir_only(deark *c, lctx *d, i64 pos1, i64 *pmember_size)
1617 struct member_data *md = NULL;
1618 i64 tmp_entry_size;
1619 int retval = 0;
1621 md = create_member_data(c, d);
1623 md->offset_of_local_header = pos1;
1625 // Read the local file header
1626 if(!do_file_header(c, d, md, 0, md->offset_of_local_header, &tmp_entry_size)) {
1627 goto done;
1630 if(!do_process_member(c, d, md)) goto done;
1632 *pmember_size = md->file_data_pos + md->cmpr_size - pos1;
1633 retval = 1;
1635 done:
1636 destroy_member_data(c, md);
1637 return retval;
1640 static void de_run_zip_scanmode(deark *c, lctx *d)
1642 i64 pos = 0;
1644 d->using_scanmode = 1;
1646 while(1) {
1647 int ret;
1648 i64 foundpos = 0;
1649 i64 member_size = 0;
1651 if(pos > c->infile->len-4) break;
1652 ret = dbuf_search(c->infile, g_zipsig34, 4, pos, c->infile->len-pos, &foundpos);
1653 if(!ret) break;
1654 pos = foundpos;
1655 de_dbg(c, "zip member at %"I64_FMT, pos);
1656 de_dbg_indent(c, 1);
1657 ret = do_local_dir_only(c, d, pos, &member_size);
1658 de_dbg_indent(c, -1);
1659 if(!ret) break;
1660 if(member_size<1) break;
1661 pos += member_size;
1665 static int do_central_dir(deark *c, lctx *d)
1667 i64 i;
1668 i64 pos;
1669 i64 entry_size;
1670 int retval = 0;
1672 pos = d->central_dir_offset;
1673 de_dbg(c, "central dir at %"I64_FMT, pos);
1674 de_dbg_indent(c, 1);
1676 for(i=0; i<d->central_dir_num_entries; i++) {
1677 if(!do_central_dir_entry(c, d, i, pos, &entry_size)) {
1678 // TODO: Decide exactly what to do if something fails.
1679 goto done;
1681 pos += entry_size;
1683 retval = 1;
1685 done:
1686 de_dbg_indent(c, -1);
1687 return retval;
1690 static int do_zip64_eocd(deark *c, lctx *d)
1692 i64 pos;
1693 i64 n;
1694 int retval = 0;
1695 int saved_indent_level;
1696 UI ver, ver_hi, ver_lo;
1698 de_dbg_indent_save(c, &saved_indent_level);
1700 if(d->zip64_eocd_disknum!=0) {
1701 de_warn(c, "This might be a multi-disk Zip64 archive, which is not supported");
1702 retval = 1;
1703 d->is_zip64 = 0;
1704 goto done;
1707 pos = d->zip64_eocd_pos;
1708 if(dbuf_memcmp(c->infile, pos, g_zipsig66, 4)) {
1709 de_warn(c, "Expected Zip64 end-of-central-directory record not found at %"I64_FMT, pos);
1710 retval = 1; // Maybe the eocd locator sig was a false positive?
1711 d->is_zip64 = 0;
1712 goto done;
1715 de_dbg(c, "zip64 end-of-central-dir record at %"I64_FMT, pos);
1716 pos += 4;
1717 de_dbg_indent(c, 1);
1719 n = de_geti64le(pos); pos += 8;
1720 de_dbg(c, "size of zip64 eocd record: (12+)%"I64_FMT, n);
1722 ver = (UI)de_getu16le_p(&pos);
1723 ver_hi = (ver&0xff00)>>8;
1724 ver_lo = ver&0x00ff;
1725 de_dbg(c, "version made by: platform=%u (%s), ZIP spec=%u.%u",
1726 ver_hi, get_platform_name(ver_hi), (UI)(ver_lo/10), (UI)(ver_lo%10));
1728 ver = (UI)de_getu16le_p(&pos);
1729 ver_hi = (ver&0xff00)>>8;
1730 ver_lo = ver&0x00ff;
1731 de_dbg(c, "version needed: platform=%u (%s), ZIP spec=%u.%u",
1732 ver_hi, get_platform_name(ver_hi), (UI)(ver_lo/10), (UI)(ver_lo%10));
1734 n = de_getu32le_p(&pos);
1735 de_dbg(c, "this disk num: %"I64_FMT, n);
1737 d->zip64_cd_disknum = (unsigned int)de_getu32le_p(&pos);
1738 d->zip64_num_centr_dir_entries_this_disk = de_geti64le(pos); pos += 8;
1739 de_dbg(c, "central dir num entries on this disk: %"I64_FMT, d->zip64_num_centr_dir_entries_this_disk);
1740 d->zip64_num_centr_dir_entries_total = de_geti64le(pos); pos += 8;
1741 de_dbg(c, "central dir num entries: %"I64_FMT, d->zip64_num_centr_dir_entries_total);
1742 d->zip64_centr_dir_byte_size = de_geti64le(pos); pos += 8;
1743 de_dbg(c, "central dir size: %"I64_FMT, d->zip64_centr_dir_byte_size);
1744 d->zip64_cd_pos = de_geti64le(pos); pos += 8;
1745 de_dbg(c, "central dir offset: %"I64_FMT", disk: %u",
1746 d->zip64_cd_pos, d->zip64_cd_disknum);
1748 retval = 1;
1749 done:
1750 de_dbg_indent_restore(c, saved_indent_level);
1751 return retval;
1754 static void do_zip64_eocd_locator(deark *c, lctx *d)
1756 i64 n;
1757 i64 pos = d->end_of_central_dir_pos - 20;
1759 if(dbuf_memcmp(c->infile, pos, g_zipsig67, 4)) {
1760 return;
1762 de_dbg(c, "zip64 eocd locator found at %"I64_FMT, pos);
1763 pos += 4;
1764 d->is_zip64 = 1;
1765 de_dbg_indent(c, 1);
1766 d->zip64_eocd_disknum = (unsigned int)de_getu32le_p(&pos);
1767 d->zip64_eocd_pos = de_geti64le(pos); pos += 8;
1768 de_dbg(c, "offset of zip64 eocd: %"I64_FMT", disk: %u",
1769 d->zip64_eocd_pos, d->zip64_eocd_disknum);
1770 n = de_getu32le_p(&pos);
1771 de_dbg(c, "total number of disks: %u", (unsigned int)n);
1772 de_dbg_indent(c, -1);
1775 static int do_end_of_central_dir(deark *c, lctx *d)
1777 i64 pos;
1778 i64 num_entries_this_disk;
1779 i64 disk_num_with_central_dir_start;
1780 i64 comment_length;
1781 i64 alt_central_dir_offset;
1782 int retval = 0;
1784 pos = d->end_of_central_dir_pos;
1785 de_dbg(c, "end-of-central-dir record at %"I64_FMT, pos);
1786 de_dbg_indent(c, 1);
1788 d->this_disk_num = de_getu16le(pos+4);
1789 de_dbg(c, "this disk num: %"I64_FMT, d->this_disk_num);
1790 disk_num_with_central_dir_start = de_getu16le(pos+6);
1792 num_entries_this_disk = de_getu16le(pos+8);
1793 de_dbg(c, "central dir num entries on this disk: %"I64_FMT, num_entries_this_disk);
1794 if(d->is_zip64 && (num_entries_this_disk==0xffff)) {
1795 num_entries_this_disk = d->zip64_num_centr_dir_entries_this_disk;
1798 d->central_dir_num_entries = de_getu16le(pos+10);
1799 d->central_dir_byte_size = de_getu32le(pos+12);
1800 d->central_dir_offset = de_getu32le(pos+16);
1801 de_dbg(c, "central dir num entries: %"I64_FMT, d->central_dir_num_entries);
1802 if(d->is_zip64 && (d->central_dir_num_entries==0xffff)) {
1803 d->central_dir_num_entries = d->zip64_num_centr_dir_entries_total;
1806 de_dbg(c, "central dir size: %"I64_FMT, d->central_dir_byte_size);
1807 if(d->is_zip64 && (d->central_dir_byte_size==0xffffffffLL)) {
1808 d->central_dir_byte_size = d->zip64_centr_dir_byte_size;
1811 de_dbg(c, "central dir offset: %"I64_FMT", disk: %"I64_FMT, d->central_dir_offset,
1812 disk_num_with_central_dir_start);
1813 if(d->is_zip64 && (d->central_dir_offset==0xffffffffLL)) {
1814 d->central_dir_offset = d->zip64_cd_pos;
1817 comment_length = de_getu16le(pos+20);
1818 de_dbg(c, "comment length: %d", (int)comment_length);
1819 if(comment_length>0) {
1820 // The comment for the whole .ZIP file presumably has to use
1821 // cp437 encoding. There's no flag that could indicate otherwise.
1822 do_comment(c, d, pos+22, comment_length, 0,
1823 "ZIP file comment", "comment.txt");
1826 // TODO: Figure out exactly how to detect disk spanning.
1827 if(disk_num_with_central_dir_start!=d->this_disk_num ||
1828 (d->is_zip64 && d->zip64_eocd_disknum!=d->this_disk_num))
1830 de_err(c, "Disk spanning not supported");
1831 goto done;
1834 if(d->this_disk_num!=0) {
1835 de_warn(c, "This ZIP file might be part of a multi-part archive, and "
1836 "might not be supported correctly");
1839 if(num_entries_this_disk!=d->central_dir_num_entries) {
1840 de_warn(c, "This ZIP file might not be supported correctly "
1841 "(number-of-entries-this-disk=%d, number-of-entries-total=%d)",
1842 (int)num_entries_this_disk, (int)d->central_dir_num_entries);
1845 alt_central_dir_offset =
1846 (d->is_zip64 ? d->zip64_eocd_pos : d->end_of_central_dir_pos) -
1847 d->central_dir_byte_size;
1849 if(alt_central_dir_offset != d->central_dir_offset) {
1850 u32 sig;
1852 de_warn(c, "Inconsistent central directory offset. Reported to be %"I64_FMT", "
1853 "but based on its reported size, it should be %"I64_FMT".",
1854 d->central_dir_offset, alt_central_dir_offset);
1856 sig = (u32)de_getu32le(alt_central_dir_offset);
1857 if(sig==CODE_PK12) {
1858 d->offset_discrepancy = alt_central_dir_offset - d->central_dir_offset;
1859 de_dbg(c, "likely central dir found at %"I64_FMT, alt_central_dir_offset);
1860 d->central_dir_offset = alt_central_dir_offset;
1864 retval = 1;
1866 done:
1867 de_dbg_indent(c, -1);
1868 return retval;
1871 static void de_run_zip_normally(deark *c, lctx *d)
1873 int eocd_found;
1875 if(c->detection_data && c->detection_data->zip_eocd_looked_for) {
1876 eocd_found = (int)c->detection_data->zip_eocd_found;
1877 d->end_of_central_dir_pos = c->detection_data->zip_eocd_pos;
1879 else {
1880 eocd_found = fmtutil_find_zip_eocd(c, c->infile, &d->end_of_central_dir_pos);
1882 if(!eocd_found) {
1883 if(c->module_disposition==DE_MODDISP_AUTODETECT ||
1884 c->module_disposition==DE_MODDISP_EXPLICIT)
1886 if(de_getu32le(0)==CODE_PK34) {
1887 de_err(c, "ZIP central directory not found. "
1888 "You could try \"-opt zip:scanmode\".");
1889 goto done;
1892 de_err(c, "Not a valid ZIP file");
1893 goto done;
1896 de_dbg(c, "end-of-central-dir record found at %"I64_FMT,
1897 d->end_of_central_dir_pos);
1899 do_zip64_eocd_locator(c, d);
1901 if(d->is_zip64) {
1902 if(!do_zip64_eocd(c, d)) goto done;
1905 if(d->is_zip64)
1906 de_declare_fmt(c, "ZIP-Zip64");
1907 else
1908 de_declare_fmt(c, "ZIP");
1910 if(!do_end_of_central_dir(c, d)) {
1911 goto done;
1914 if(!do_central_dir(c, d)) {
1915 goto done;
1918 done:
1922 static void de_run_zip(deark *c, de_module_params *mparams)
1924 lctx *d = NULL;
1925 de_encoding enc;
1927 d = de_malloc(c, sizeof(lctx));
1929 enc = de_get_input_encoding(c, NULL, DE_ENCODING_CP437);
1930 d->default_enc_for_filenames = enc;
1931 d->default_enc_for_comments = enc;
1933 d->crco = de_crcobj_create(c, DE_CRCOBJ_CRC32_IEEE);
1935 if(de_get_ext_option(c, "zip:scanmode")) {
1936 de_run_zip_scanmode(c, d);
1938 else {
1939 de_run_zip_normally(c, d);
1942 if(d) {
1943 de_crcobj_destroy(d->crco);
1944 de_free(c, d);
1948 static int de_identify_zip(deark *c)
1950 u8 b[4];
1951 int has_zip_ext;
1952 int has_mz_sig = 0;
1954 has_zip_ext = de_input_file_has_ext(c, "zip");
1956 // Fast tests:
1958 de_read(b, 0, 4);
1959 if(!de_memcmp(b, g_zipsig34, 4)) {
1960 return has_zip_ext ? 100 : 90;
1962 if(b[0]=='M' && b[1]=='Z') has_mz_sig = 1;
1964 if(c->infile->len >= 22) {
1965 de_read(b, c->infile->len - 22, 4);
1966 if(!de_memcmp(b, g_zipsig56, 4)) {
1967 return has_zip_ext ? 100 : 19;
1971 // Things to consider:
1972 // * We want de_fmtutil_find_zip_eocd() to be called no more than once, and
1973 // only on files that for some reason we suspect could be ZIP files.
1974 // * If the user disables exe format detection (e.g. with "-onlydetect zip"),
1975 // we want self-extracting-ZIP .exe files to be detected as ZIP instead.
1976 // * And we want the above to work even if the file has a ZIP file comment,
1977 // making it expensive to detect as ZIP.
1979 // Tests below can't return a confidence higher than this.
1980 if(c->detection_data->best_confidence_so_far >= 19) return 0;
1982 // Slow tests:
1984 if(has_mz_sig || has_zip_ext) {
1985 i64 eocd_pos = 0;
1987 c->detection_data->zip_eocd_looked_for = 1;
1988 if(fmtutil_find_zip_eocd(c, c->infile, &eocd_pos)) {
1989 c->detection_data->zip_eocd_found = 1;
1990 c->detection_data->zip_eocd_pos = eocd_pos;
1991 return 19;
1995 return 0;
1998 static void de_help_zip(deark *c)
2000 de_msg(c, "-opt zip:scanmode : Do not use the \"central directory\"");
2001 de_msg(c, "-opt zip:implodebug : Behave like PKZIP 1.01/1.02");
2004 void de_module_zip(deark *c, struct deark_module_info *mi)
2006 mi->id = "zip";
2007 mi->desc = "ZIP archive";
2008 mi->run_fn = de_run_zip;
2009 mi->identify_fn = de_identify_zip;
2010 mi->help_fn = de_help_zip;