zip: Better parsing of Info-ZIP type 1 extra field
[deark.git] / modules / zip.c
blob09184ab2c3ab8dc49a0bacd13fdeca9845b808e9
1 // This file is part of Deark.
2 // Copyright (C) 2016 Jason Summers
3 // See the file COPYING for terms of use.
5 // ZIP format
7 #include <deark-config.h>
8 #include <deark-private.h>
9 #include <deark-fmtutil.h>
11 DE_DECLARE_MODULE(de_module_zip);
13 struct localctx_struct;
14 typedef struct localctx_struct lctx;
16 #define CODE_PK12 0x02014b50U
17 #define CODE_PK34 0x04034b50U
18 static const u8 g_zipsig34[4] = {'P', 'K', 0x03, 0x04};
19 static const u8 g_zipsig56[4] = {'P', 'K', 0x05, 0x06};
20 static const u8 g_zipsig66[4] = {'P', 'K', 0x06, 0x06};
21 static const u8 g_zipsig67[4] = {'P', 'K', 0x06, 0x07};
23 struct compression_params {
24 // ZIP-specific params (not in de_dfilter_*_params) that may be needed to
25 // to decompress something.
26 int cmpr_meth;
27 unsigned int bit_flags;
30 typedef void (*decompressor_fn)(deark *c, lctx *d, struct compression_params *cparams,
31 struct de_dfilter_in_params *dcmpri, struct de_dfilter_out_params *dcmpro,
32 struct de_dfilter_results *dres);
34 struct cmpr_meth_info {
35 int cmpr_meth;
36 unsigned int flags;
37 const char *name;
38 decompressor_fn decompressor;
41 struct dir_entry_data {
42 unsigned int ver_needed;
43 unsigned int ver_needed_hi, ver_needed_lo;
44 i64 cmpr_size, uncmpr_size;
45 int cmpr_meth;
46 const struct cmpr_meth_info *cmi;
47 unsigned int bit_flags;
48 u32 crc_reported;
49 i64 main_fname_pos;
50 i64 main_fname_len;
51 de_ucstring *fname;
54 struct timestamp_data {
55 struct de_timestamp ts; // The best timestamp of this type found so far
56 int quality;
59 struct member_data {
60 unsigned int ver_made_by;
61 unsigned int ver_made_by_hi, ver_made_by_lo;
62 unsigned int attr_i, attr_e;
63 i64 offset_of_local_header;
64 i64 disk_number_start;
65 i64 file_data_pos;
66 int is_nonexecutable;
67 int is_executable;
68 int is_dir;
69 int is_symlink;
70 struct timestamp_data tsdata[DE_TIMESTAMPIDX_COUNT];
71 u8 has_riscos_data;
72 struct de_riscos_file_attrs rfa;
74 struct dir_entry_data central_dir_entry_data;
75 struct dir_entry_data local_dir_entry_data;
77 i64 cmpr_size, uncmpr_size;
78 u32 crc_reported;
81 struct extra_item_type_info_struct;
83 struct extra_item_info_struct {
84 u32 id;
85 i64 dpos;
86 i64 dlen;
87 const struct extra_item_type_info_struct *eiti;
88 struct member_data *md;
89 struct dir_entry_data *dd;
90 int is_central;
93 struct localctx_struct {
94 de_encoding default_enc_for_filenames;
95 de_encoding default_enc_for_comments;
96 i64 end_of_central_dir_pos;
97 i64 central_dir_num_entries;
98 i64 central_dir_byte_size;
99 i64 central_dir_offset;
100 i64 this_disk_num;
101 i64 zip64_eocd_pos;
102 i64 zip64_cd_pos;
103 i64 zip64_num_centr_dir_entries_this_disk;
104 i64 zip64_num_centr_dir_entries_total;
105 i64 zip64_centr_dir_byte_size;
106 unsigned int zip64_eocd_disknum;
107 unsigned int zip64_cd_disknum;
108 i64 offset_discrepancy;
109 int used_offset_discrepancy;
110 int is_zip64;
111 int using_scanmode;
112 struct de_crcobj *crco;
115 typedef void (*extrafield_decoder_fn)(deark *c, lctx *d,
116 struct extra_item_info_struct *eii);
118 static int is_compression_method_supported(lctx *d, const struct cmpr_meth_info *cmi)
120 if(cmi && cmi->decompressor) return 1;
121 return 0;
124 static void do_decompress_shrink(deark *c, lctx *d, struct compression_params *cparams,
125 struct de_dfilter_in_params *dcmpri, struct de_dfilter_out_params *dcmpro,
126 struct de_dfilter_results *dres)
128 fmtutil_decompress_zip_shrink(c, dcmpri, dcmpro, dres, NULL);
131 static void do_decompress_reduce(deark *c, lctx *d, struct compression_params *cparams,
132 struct de_dfilter_in_params *dcmpri, struct de_dfilter_out_params *dcmpro,
133 struct de_dfilter_results *dres)
135 struct de_zipreduce_params params;
137 de_zeromem(&params, sizeof(struct de_zipreduce_params));
138 params.cmpr_factor = (unsigned int)(cparams->cmpr_meth-1);
139 fmtutil_decompress_zip_reduce(c, dcmpri, dcmpro, dres, &params);
142 static void do_decompress_implode(deark *c, lctx *d, struct compression_params *cparams,
143 struct de_dfilter_in_params *dcmpri, struct de_dfilter_out_params *dcmpro,
144 struct de_dfilter_results *dres)
146 struct de_zipimplode_params params;
148 de_zeromem(&params, sizeof(struct de_zipimplode_params));
149 params.bit_flags = cparams->bit_flags;
150 params.mml_bug = (u8)de_get_ext_option_bool(c, "zip:implodebug", 0);
151 fmtutil_decompress_zip_implode(c, dcmpri, dcmpro, dres, &params);
154 static void do_decompress_deflate(deark *c, lctx *d, struct compression_params *cparams,
155 struct de_dfilter_in_params *dcmpri, struct de_dfilter_out_params *dcmpro,
156 struct de_dfilter_results *dres)
158 struct de_deflate_params inflparams;
160 de_zeromem(&inflparams, sizeof(struct de_deflate_params));
161 if(cparams->cmpr_meth==9) {
162 inflparams.flags |= DE_DEFLATEFLAG_DEFLATE64;
164 fmtutil_decompress_deflate_ex(c, dcmpri, dcmpro, dres, &inflparams);
167 static void do_decompress_dclimplode(deark *c, lctx *d, struct compression_params *cparams,
168 struct de_dfilter_in_params *dcmpri, struct de_dfilter_out_params *dcmpro,
169 struct de_dfilter_results *dres)
171 fmtutil_dclimplode_codectype1(c, dcmpri, dcmpro, dres, NULL);
174 static void do_decompress_stored(deark *c, lctx *d, struct compression_params *cparams,
175 struct de_dfilter_in_params *dcmpri, struct de_dfilter_out_params *dcmpro,
176 struct de_dfilter_results *dres)
178 fmtutil_decompress_uncompressed(c, dcmpri, dcmpro, dres, 0);
181 static const struct cmpr_meth_info cmpr_meth_info_arr[] = {
182 { 0, 0x00, "stored", do_decompress_stored },
183 { 1, 0x00, "shrink", do_decompress_shrink },
184 { 2, 0x00, "reduce, CF=1", do_decompress_reduce },
185 { 3, 0x00, "reduce, CF=2", do_decompress_reduce },
186 { 4, 0x00, "reduce, CF=3", do_decompress_reduce },
187 { 5, 0x00, "reduce, CF=4", do_decompress_reduce },
188 { 6, 0x00, "implode", do_decompress_implode },
189 { 8, 0x00, "deflate", do_decompress_deflate },
190 { 9, 0x00, "deflate64", do_decompress_deflate },
191 { 10, 0x00, "PKWARE DCL implode", do_decompress_dclimplode },
192 { 12, 0x00, "bzip2", NULL },
193 { 14, 0x00, "LZMA", NULL },
194 { 16, 0x00, "IBM z/OS CMPSC", NULL },
195 { 18, 0x00, "IBM TERSE (new)", NULL },
196 { 19, 0x00, "IBM LZ77 z Architecture", NULL },
197 { 94, 0x00, "MP3", NULL },
198 { 95, 0x00, "XZ", NULL },
199 { 96, 0x00, "JPEG", NULL },
200 { 97, 0x00, "WavPack", NULL },
201 { 98, 0x00, "PPMd", NULL },
202 { 99, 0x00, "AES", NULL }
205 static const struct cmpr_meth_info *get_cmpr_meth_info(int cmpr_meth)
207 size_t k;
209 for(k=0; k<DE_ARRAYCOUNT(cmpr_meth_info_arr); k++) {
210 if(cmpr_meth_info_arr[k].cmpr_meth == cmpr_meth) {
211 return &cmpr_meth_info_arr[k];
214 return NULL;
217 // Decompress some data, using the given ZIP compression method.
218 // On failure, dres->errcode will be set.
219 static void do_decompress_lowlevel(deark *c, lctx *d, struct de_dfilter_in_params *dcmpri,
220 struct de_dfilter_out_params *dcmpro, struct de_dfilter_results *dres,
221 int cmpr_meth, const struct cmpr_meth_info *cmi, unsigned int bit_flags)
223 struct compression_params cparams;
225 de_zeromem(&cparams, sizeof(struct compression_params));
226 cparams.cmpr_meth = cmpr_meth;
227 cparams.bit_flags = bit_flags;
229 if(cmi && cmi->decompressor) {
230 cmi->decompressor(c, d, &cparams, dcmpri, dcmpro, dres);
232 else {
233 de_internal_err_nonfatal(c, "Unsupported compression method (%d)", cmpr_meth);
234 de_dfilter_set_generic_error(c, dres, NULL);
238 static void our_writelistener_cb(dbuf *f, void *userdata, const u8 *buf, i64 buf_len)
240 struct de_crcobj *crco = (struct de_crcobj *)userdata;
241 de_crcobj_addbuf(crco, buf, buf_len);
244 // Decompress a Zip member file, writing to outf.
245 // Does CRC calculation.
246 // Reports errors to the user.
247 // Only call this if the compression method is supported -- Call
248 // is_compression_method_supported() first.
249 // Assumes ldd->cmi has been set, by calling get_cmpr_meth_info().
250 static int do_decompress_member(deark *c, lctx *d, struct member_data *md, dbuf *outf)
252 struct dir_entry_data *ldd = &md->local_dir_entry_data;
253 struct de_dfilter_in_params dcmpri;
254 struct de_dfilter_out_params dcmpro;
255 struct de_dfilter_results dres;
256 u32 crc_calculated;
257 int retval = 0;
259 de_dfilter_init_objects(c, &dcmpri, &dcmpro, &dres);
260 dcmpri.f = c->infile;
261 dcmpri.pos = md->file_data_pos;
262 dcmpri.len = md->cmpr_size;
263 dcmpro.f = outf;
264 dcmpro.expected_len = md->uncmpr_size;
265 dcmpro.len_known = 1;
267 dbuf_set_writelistener(outf, our_writelistener_cb, (void*)d->crco);
268 de_crcobj_reset(d->crco);
270 do_decompress_lowlevel(c, d, &dcmpri, &dcmpro, &dres, ldd->cmpr_meth,
271 ldd->cmi, ldd->bit_flags);
273 if(dres.errcode) {
274 de_err(c, "%s: %s", ucstring_getpsz_d(ldd->fname),
275 de_dfilter_get_errmsg(c, &dres));
276 goto done;
279 crc_calculated = de_crcobj_getval(d->crco);
280 de_dbg(c, "crc (calculated): 0x%08x", (unsigned int)crc_calculated);
282 if(crc_calculated != md->crc_reported) {
283 de_err(c, "%s: CRC check failed: Expected 0x%08x, got 0x%08x",
284 ucstring_getpsz_d(ldd->fname),
285 (unsigned int)md->crc_reported, (unsigned int)crc_calculated);
286 if(dres.bytes_consumed_valid && (dres.bytes_consumed < dcmpri.len)) {
287 de_info(c, "Note: Only used %"I64_FMT" of %"I64_FMT" compressed bytes.",
288 dres.bytes_consumed, dcmpri.len);
290 goto done;
293 retval = 1;
294 done:
295 return retval;
298 // A variation of do_decompress_member() -
299 // works for Finder attribute data, and OS/2 extended attributes.
300 // Only call this if the compression method is supported -- Call
301 // is_compression_method_supported() first.
302 // outf is assumed to be a membuf.
303 // dcflags: 0x1 = Validate the crc_reported param.
304 static int do_decompress_attrib_data(deark *c, lctx *d,
305 i64 dpos, i64 dlen, dbuf *outf, i64 uncmprsize, u32 crc_reported,
306 int cmpr_meth, const struct cmpr_meth_info *cmi, UI flags, const char *name)
308 struct de_dfilter_in_params dcmpri;
309 struct de_dfilter_out_params dcmpro;
310 struct de_dfilter_results dres;
311 u32 crc_calculated;
312 int retval = 0;
314 de_dfilter_init_objects(c, &dcmpri, &dcmpro, &dres);
315 dcmpri.f = c->infile;
316 dcmpri.pos = dpos;
317 dcmpri.len = dlen;
318 dcmpro.f = outf;
319 dcmpro.expected_len = uncmprsize;
320 dcmpro.len_known = 1;
322 do_decompress_lowlevel(c, d, &dcmpri, &dcmpro, &dres, cmpr_meth, cmi, 0);
323 if(dres.errcode) {
324 goto done; // Could report the error, but this isn't critical data
327 if(flags & 0x1) {
328 de_crcobj_reset(d->crco);
329 de_crcobj_addslice(d->crco, outf, 0, outf->len);
330 crc_calculated = de_crcobj_getval(d->crco);
331 de_dbg(c, "%s crc (calculated): 0x%08x", name, (UI)crc_calculated);
332 if(crc_calculated != crc_reported) goto done;
335 retval = 1;
336 done:
337 return retval;
340 // As we read a member file's attributes, we may encounter multiple timestamps,
341 // which can differ in their precision, and whether they use UTC.
342 // This function is called to remember the "best" file modification time
343 // encountered so far.
344 static void apply_timestamp(deark *c, lctx *d, struct member_data *md, int tstype,
345 const struct de_timestamp *ts, int quality)
347 if(!ts->is_valid) return;
349 // In case of a tie, we prefer the later timestamp that we encountered.
350 // This makes local headers have priority over central headers, for
351 // example.
352 if(quality >= md->tsdata[tstype].quality) {
353 md->tsdata[tstype].ts = *ts;
354 md->tsdata[tstype].quality = quality;
358 static void do_read_filename(deark *c, lctx *d,
359 struct member_data *md, struct dir_entry_data *dd,
360 i64 pos, i64 len, int utf8_flag)
362 de_encoding from_encoding;
364 ucstring_empty(dd->fname);
365 from_encoding = utf8_flag ? DE_ENCODING_UTF8 : d->default_enc_for_filenames;
366 dbuf_read_to_ucstring(c->infile, pos, len, dd->fname, 0, from_encoding);
367 de_dbg(c, "filename: \"%s\"", ucstring_getpsz_d(dd->fname));
370 static void do_comment_display(deark *c, lctx *d, i64 pos, i64 len, de_ext_encoding ee,
371 const char *name)
373 de_ucstring *s = NULL;
375 s = ucstring_create(c);
376 dbuf_read_to_ucstring(c->infile, pos, len, s, 0, ee);
377 de_dbg(c, "%s: \"%s\"", name, ucstring_getpsz_d(s));
378 ucstring_destroy(s);
381 static void do_comment_extract(deark *c, lctx *d, i64 pos, i64 len, de_ext_encoding ee,
382 const char *ext)
384 dbuf *f = NULL;
385 de_ucstring *s = NULL;
387 f = dbuf_create_output_file(c, ext, NULL, DE_CREATEFLAG_IS_AUX);
388 s = ucstring_create(c);
389 dbuf_read_to_ucstring(c->infile, pos, len, s, 0, ee);
390 ucstring_write_as_utf8(c, s, f, 1);
391 ucstring_destroy(s);
392 dbuf_close(f);
395 static void do_comment(deark *c, lctx *d, i64 pos, i64 len, int utf8_flag,
396 const char *name, const char *ext)
398 de_ext_encoding ee;
400 if(len<1) return;
401 ee = utf8_flag ? DE_ENCODING_UTF8 : d->default_enc_for_comments;
402 ee = DE_EXTENC_MAKE(ee, DE_ENCSUBTYPE_HYBRID);
403 if(c->extract_level>=2) {
404 do_comment_extract(c, d, pos, len, ee, ext);
406 else {
407 do_comment_display(c, d, pos, len, ee, name);
411 static void read_unix_timestamp(deark *c, lctx *d, i64 pos,
412 struct de_timestamp *timestamp, const char *name)
414 i64 t;
415 char timestamp_buf[64];
417 t = de_geti32le(pos);
418 de_unix_time_to_timestamp(t, timestamp, 0x1);
419 de_dbg_timestamp_to_string(c, timestamp, timestamp_buf, sizeof(timestamp_buf), 0);
420 de_dbg(c, "%s: %"I64_FMT" (%s)", name, t, timestamp_buf);
423 static void read_FILETIME(deark *c, lctx *d, i64 pos,
424 struct de_timestamp *timestamp, const char *name)
426 i64 t_FILETIME;
427 char timestamp_buf[64];
429 t_FILETIME = de_geti64le(pos);
430 de_FILETIME_to_timestamp(t_FILETIME, timestamp, 0x1);
431 de_dbg_timestamp_to_string(c, timestamp, timestamp_buf, sizeof(timestamp_buf), 0);
432 de_dbg(c, "%s: %s", name, timestamp_buf);
435 static void ef_zip64extinfo(deark *c, lctx *d, struct extra_item_info_struct *eii)
437 i64 n;
438 i64 pos = eii->dpos;
440 if(pos+8 > eii->dpos+eii->dlen) goto done;
441 n = de_geti64le(pos); pos += 8;
442 de_dbg(c, "orig uncmpr file size: %"I64_FMT, n);
443 if(eii->dd->uncmpr_size==0xffffffffLL) {
444 eii->dd->uncmpr_size = n;
447 if(pos+8 > eii->dpos+eii->dlen) goto done;
448 n = de_geti64le(pos); pos += 8;
449 de_dbg(c, "cmpr data size: %"I64_FMT, n);
450 if(eii->dd->cmpr_size==0xffffffffLL) {
451 eii->dd->cmpr_size = n;
454 if(pos+8 > eii->dpos+eii->dlen) goto done;
455 n = de_geti64le(pos); pos += 8;
456 de_dbg(c, "offset of local header record: %"I64_FMT, n);
458 if(pos+4 > eii->dpos+eii->dlen) goto done;
459 n = de_getu32le_p(&pos);
460 de_dbg(c, "disk start number: %"I64_FMT, n);
461 done:
465 // Extra field 0x5455
466 static void ef_extended_timestamp(deark *c, lctx *d, struct extra_item_info_struct *eii)
468 i64 pos = eii->dpos;
469 u8 flags;
470 i64 endpos;
471 int has_mtime, has_atime, has_ctime;
472 struct de_timestamp timestamp_tmp;
474 endpos = pos + eii->dlen;
475 if(pos+1>endpos) return;
476 flags = de_getbyte_p(&pos);
477 de_dbg2(c, "flags: 0x%02x", (UI)flags);
478 if(eii->is_central) {
479 has_mtime = (eii->dlen>=5);
480 has_atime = 0;
481 has_ctime = 0;
483 else {
484 has_mtime = (flags & 0x01)?1:0;
485 has_atime = (flags & 0x02)?1:0;
486 has_ctime = (flags & 0x04)?1:0;
488 if(has_mtime) {
489 if(pos+4>endpos) return;
490 read_unix_timestamp(c, d, pos, &timestamp_tmp, "mtime");
491 apply_timestamp(c, d, eii->md, DE_TIMESTAMPIDX_MODIFY, &timestamp_tmp, 50);
492 pos+=4;
494 if(has_atime) {
495 if(pos+4>endpos) return;
496 read_unix_timestamp(c, d, pos, &timestamp_tmp, "atime");
497 apply_timestamp(c, d, eii->md, DE_TIMESTAMPIDX_ACCESS, &timestamp_tmp, 50);
498 pos+=4;
500 if(has_ctime) {
501 if(pos+4>endpos) return;
502 read_unix_timestamp(c, d, pos, &timestamp_tmp, "creation time");
503 apply_timestamp(c, d, eii->md, DE_TIMESTAMPIDX_CREATE, &timestamp_tmp, 50);
504 pos+=4;
508 // Extra field 0x5855
509 static void ef_infozip1(deark *c, lctx *d, struct extra_item_info_struct *eii)
511 i64 uidnum, gidnum;
512 struct de_timestamp timestamp_tmp;
514 if(eii->dlen<8) return;
515 read_unix_timestamp(c, d, eii->dpos, &timestamp_tmp, "atime");
516 apply_timestamp(c, d, eii->md, DE_TIMESTAMPIDX_ACCESS, &timestamp_tmp, 45);
517 read_unix_timestamp(c, d, eii->dpos+4, &timestamp_tmp, "mtime");
518 apply_timestamp(c, d, eii->md, DE_TIMESTAMPIDX_MODIFY, &timestamp_tmp, 45);
519 if(!eii->is_central && eii->dlen>=12) {
520 uidnum = de_getu16le(eii->dpos+8);
521 gidnum = de_getu16le(eii->dpos+10);
522 de_dbg(c, "uid: %d, gid: %d", (int)uidnum, (int)gidnum);
526 // Extra field 0x7075 - Info-ZIP Unicode Path
527 static void ef_unicodepath(deark *c, lctx *d, struct extra_item_info_struct *eii)
529 u8 ver;
530 de_ucstring *fn = NULL;
531 i64 fnlen;
532 u32 crc_reported, crc_calculated;
534 if(eii->dlen<1) goto done;
535 ver = de_getbyte(eii->dpos);
536 de_dbg(c, "version: %u", (unsigned int)ver);
537 if(ver!=1) goto done;
538 if(eii->dlen<6) goto done;
539 crc_reported = (u32)de_getu32le(eii->dpos+1);
540 de_dbg(c, "name-crc (reported): 0x%08x", (unsigned int)crc_reported);
541 fn = ucstring_create(c);
542 fnlen = eii->dlen - 5;
543 dbuf_read_to_ucstring(c->infile, eii->dpos+5, fnlen, fn, 0, DE_ENCODING_UTF8);
544 de_dbg(c, "unicode name: \"%s\"", ucstring_getpsz_d(fn));
546 // Need to go back and calculate a CRC of the main filename. This is
547 // protection against the case where a ZIP editor may have changed the
548 // original filename, but retained a now-orphaned Unicode Path field.
549 de_crcobj_reset(d->crco);
550 de_crcobj_addslice(d->crco, c->infile, eii->dd->main_fname_pos, eii->dd->main_fname_len);
551 crc_calculated = de_crcobj_getval(d->crco);
552 de_dbg(c, "name-crc (calculated): 0x%08x", (unsigned int)crc_calculated);
554 if(crc_calculated == crc_reported) {
555 ucstring_empty(eii->dd->fname);
556 ucstring_append_ucstring(eii->dd->fname, fn);
559 done:
560 ucstring_destroy(fn);
563 // Extra field 0x7855
564 static void ef_infozip2(deark *c, lctx *d, struct extra_item_info_struct *eii)
566 i64 uidnum, gidnum;
568 if(eii->is_central) return;
569 if(eii->dlen<4) return;
570 uidnum = de_getu16le(eii->dpos);
571 gidnum = de_getu16le(eii->dpos+2);
572 de_dbg(c, "uid: %d, gid: %d", (int)uidnum, (int)gidnum);
575 // Extra field 0x7875
576 static void ef_infozip3(deark *c, lctx *d, struct extra_item_info_struct *eii)
578 i64 pos = eii->dpos;
579 i64 uidnum, gidnum;
580 u8 ver;
581 i64 endpos;
582 i64 sz;
584 endpos = pos+eii->dlen;
586 if(pos+1>endpos) return;
587 ver = de_getbyte_p(&pos);
588 de_dbg(c, "version: %d", (int)ver);
589 if(ver!=1) return;
591 if(pos+1>endpos) return;
592 sz = (i64)de_getbyte_p(&pos);
593 if(pos+sz>endpos) return;
594 uidnum = dbuf_getint_ext(c->infile, pos, (unsigned int)sz, 1, 0);
595 pos += sz;
597 if(pos+1>endpos) return;
598 sz = (i64)de_getbyte_p(&pos);
599 if(pos+sz>endpos) return;
600 gidnum = dbuf_getint_ext(c->infile, pos, (unsigned int)sz, 1, 0);
601 pos += sz;
603 de_dbg(c, "uid: %d, gid: %d", (int)uidnum, (int)gidnum);
606 // Extra field 0x000a
607 static void ef_ntfs(deark *c, lctx *d, struct extra_item_info_struct *eii)
609 i64 pos = eii->dpos;
610 i64 endpos;
611 i64 attr_tag;
612 i64 attr_size;
613 const char *name;
614 struct de_timestamp timestamp_tmp;
616 endpos = pos+eii->dlen;
617 pos += 4; // skip reserved field
619 while(1) {
620 if(pos+4>endpos) break;
621 attr_tag = de_getu16le_p(&pos);
622 attr_size = de_getu16le_p(&pos);
623 if(attr_tag==0x0001) name="NTFS filetimes";
624 else name="?";
625 de_dbg(c, "tag: 0x%04x (%s), dlen: %d", (unsigned int)attr_tag, name,
626 (int)attr_size);
627 if(pos+attr_size>endpos) break;
629 de_dbg_indent(c, 1);
630 if(attr_tag==0x0001 && attr_size>=24) {
631 read_FILETIME(c, d, pos, &timestamp_tmp, "mtime");
632 apply_timestamp(c, d, eii->md, DE_TIMESTAMPIDX_MODIFY, &timestamp_tmp, 90);
633 read_FILETIME(c, d, pos+8, &timestamp_tmp, "atime");
634 apply_timestamp(c, d, eii->md, DE_TIMESTAMPIDX_ACCESS, &timestamp_tmp, 90);
635 read_FILETIME(c, d, pos+16, &timestamp_tmp, "creation time");
636 apply_timestamp(c, d, eii->md, DE_TIMESTAMPIDX_CREATE, &timestamp_tmp, 90);
638 de_dbg_indent(c, -1);
640 pos += attr_size;
644 // Extra field 0x0009
645 static void ef_os2(deark *c, lctx *d, struct extra_item_info_struct *eii)
647 i64 pos = eii->dpos;
648 i64 endpos;
649 i64 ulen;
650 i64 cmpr_attr_size;
651 int cmpr_meth;
652 u32 crc_reported;
653 const struct cmpr_meth_info *cmi = NULL;
654 const char *name = "OS/2 ext. attr. data";
655 dbuf *attr_data = NULL;
656 de_module_params *mparams = NULL;
657 int ret;
659 endpos = pos+eii->dlen;
660 if(pos+4>endpos) goto done;
661 ulen = de_getu32le_p(&pos);
662 de_dbg(c, "uncmpr ext attr data size: %"I64_FMT, ulen);
663 if(eii->is_central) goto done;
665 if(pos+2>endpos) goto done;
666 cmpr_meth = (int)de_getu16le_p(&pos);
667 de_dbg(c, "ext attr cmpr method: %d", cmpr_meth);
669 if(pos+4>endpos) goto done;
670 crc_reported = (u32)de_getu32le_p(&pos);
671 de_dbg(c, "ext attr crc (reported): 0x%08x", (unsigned int)crc_reported);
673 cmpr_attr_size = endpos-pos;
674 de_dbg(c, "cmpr ext attr data at %"I64_FMT", len=%"I64_FMT, pos, cmpr_attr_size);
675 if(pos + cmpr_attr_size > endpos) goto done;
677 cmi = get_cmpr_meth_info(cmpr_meth);
678 if(cmpr_meth==6 || !is_compression_method_supported(d, cmi)) {
679 de_warn(c, "%s: Unsupported compression method: %d (%s)",
680 name, cmpr_meth, (cmi ? cmi->name : "?"));
681 goto done;
684 attr_data = dbuf_create_membuf(c, ulen, 0x1);
685 ret = do_decompress_attrib_data(c, d, pos, cmpr_attr_size,
686 attr_data, ulen, crc_reported, cmpr_meth, cmi, 0x1, name);
687 if(!ret) {
688 de_warn(c, "Failed to decompress %s", name);
689 goto done;
692 // attr_data contains an OS/2 extended attribute structure (FEA2LIST)
693 mparams = de_malloc(c, sizeof(de_module_params));
694 mparams->in_params.codes = "L";
695 de_dbg(c, "decoding OS/2 ext. attribs., unc. len=%"I64_FMT, attr_data->len);
696 de_dbg_indent(c, 1);
697 de_run_module_by_id_on_slice(c, "ea_data", mparams, attr_data, 0, attr_data->len);
698 de_dbg_indent(c, -1);
700 done:
701 dbuf_close(attr_data);
702 de_free(c, mparams);
705 // Extra field 0x2705 (ZipIt Macintosh 1.3.5+)
706 static void ef_zipitmac_2705(deark *c, lctx *d, struct extra_item_info_struct *eii)
708 struct de_fourcc sig;
709 struct de_fourcc filetype;
710 struct de_fourcc creator;
712 if(eii->dlen<4) goto done;
713 dbuf_read_fourcc(c->infile, eii->dpos, &sig, 4, 0x0);
714 de_dbg(c, "signature: '%s'", sig.id_dbgstr);
715 if(sig.id!=0x5a504954U) goto done; // expecting 'ZPIT'
716 if(eii->dlen<12) goto done;
717 dbuf_read_fourcc(c->infile, eii->dpos+4, &filetype, 4, 0x0);
718 de_dbg(c, "filetype: '%s'", filetype.id_dbgstr);
719 dbuf_read_fourcc(c->infile, eii->dpos+8, &creator, 4, 0x0);
720 de_dbg(c, "creator: '%s'", creator.id_dbgstr);
722 done:
726 // The time will be returned in the caller-supplied 'ts'
727 static void handle_mac_time(deark *c, lctx *d,
728 i64 mt_raw, i64 mt_offset,
729 struct de_timestamp *ts, const char *name)
731 char timestamp_buf[64];
732 de_mac_time_to_timestamp(mt_raw - mt_offset, ts);
733 ts->tzcode = DE_TZCODE_UTC;
734 de_dbg_timestamp_to_string(c, ts, timestamp_buf, sizeof(timestamp_buf), 0);
735 de_dbg(c, "%s: %"I64_FMT" %+"I64_FMT" (%s)", name,
736 mt_raw, -mt_offset, timestamp_buf);
739 // Extra field 0x334d (Info-ZIP Macintosh)
740 static void ef_infozipmac(deark *c, lctx *d, struct extra_item_info_struct *eii)
742 i64 pos = eii->dpos;
743 i64 dpos;
744 i64 ulen;
745 i64 cmpr_attr_size;
746 unsigned int flags;
747 int cmpr_meth;
748 const struct cmpr_meth_info *cmi = NULL;
749 struct de_fourcc filetype;
750 struct de_fourcc creator;
751 de_ucstring *flags_str = NULL;
752 dbuf *attr_data = NULL;
753 int ret;
754 i64 create_time_raw;
755 i64 create_time_offset;
756 i64 mod_time_raw;
757 i64 mod_time_offset;
758 i64 backup_time_raw;
759 i64 backup_time_offset;
760 struct de_timestamp tmp_timestamp;
761 int charset;
762 u32 crc_reported = 0;
763 UI dcflags = 0;
764 struct de_stringreaderdata *srd;
766 if(eii->dlen<14) goto done;
768 ulen = de_getu32le_p(&pos);
769 de_dbg(c, "uncmpr. finder attr. size: %d", (int)ulen);
771 flags = (unsigned int)de_getu16le_p(&pos);
772 flags_str = ucstring_create(c);
773 if(flags&0x0001) ucstring_append_flags_item(flags_str, "data_fork");
774 if(flags&0x0002) ucstring_append_flags_item(flags_str, "0x0002"); // something about the filename
775 ucstring_append_flags_item(flags_str,
776 (flags&0x0004)?"uncmpressed_attribute_data":"compressed_attribute_data");
777 if(flags&0x0008) ucstring_append_flags_item(flags_str, "64-bit_times");
778 if(flags&0x0010) ucstring_append_flags_item(flags_str, "no_timezone_offsets");
779 de_dbg(c, "flags: 0x%04x (%s)", flags, ucstring_getpsz(flags_str));
781 dbuf_read_fourcc(c->infile, pos, &filetype, 4, 0x0);
782 de_dbg(c, "filetype: '%s'", filetype.id_dbgstr);
783 pos += 4;
784 dbuf_read_fourcc(c->infile, pos, &creator, 4, 0x0);
785 de_dbg(c, "creator: '%s'", creator.id_dbgstr);
786 pos += 4;
788 if(eii->is_central) goto done;
790 if(flags&0x0004) { // Uncompressed attribute data
791 cmpr_meth = 0;
793 else {
794 dcflags |= 0x1; // CRC is known
795 cmpr_meth = (int)de_getu16le_p(&pos);
796 cmi = get_cmpr_meth_info(cmpr_meth);
797 de_dbg(c, "finder attr. cmpr. method: %d (%s)", cmpr_meth, (cmi ? cmi->name : "?"));
799 crc_reported = (u32)de_getu32le_p(&pos);
800 de_dbg(c, "finder attr. data crc (reported): 0x%08x", (UI)crc_reported);
803 // The rest of the data is Finder attribute data
804 cmpr_attr_size = eii->dpos+eii->dlen - pos;
805 de_dbg(c, "cmpr. finder attr. size: %d", (int)cmpr_attr_size);
806 if(ulen<1 || ulen>1000000) goto done;
808 // Type 6 (implode) compression won't work here, because it needs
809 // additional parameters seemingly not provided by the Finder attr data.
810 if(cmpr_meth==6 || !is_compression_method_supported(d, cmi)) {
811 de_warn(c, "Finder attribute data: Unsupported compression method: %d (%s)",
812 cmpr_meth, (cmi ? cmi->name : "?"));
813 goto done;
816 // Decompress and decode the Finder attribute data
817 attr_data = dbuf_create_membuf(c, ulen, 0x1);
818 ret = do_decompress_attrib_data(c, d, pos, cmpr_attr_size,
819 attr_data, ulen, crc_reported, cmpr_meth, cmi, dcflags, "finder attr. data");
820 if(!ret) {
821 de_warn(c, "Failed to decompress finder attribute data");
822 goto done;
825 dpos = 0;
826 dpos += 2; // Finder flags
827 dpos += 4; // Icon location
828 dpos += 2; // Folder
829 dpos += 16; // FXInfo
830 dpos += 1; // file version number
831 dpos += 1; // dir access rights
833 if(flags&0x0008) goto done; // We don't support 64-bit times
834 if(flags&0x0010) goto done; // We want timezone offsets
835 if(attr_data->len - dpos < 6*4) goto done;
837 create_time_raw = dbuf_getu32le_p(attr_data, &dpos);
838 mod_time_raw = dbuf_getu32le_p(attr_data, &dpos);
839 backup_time_raw = dbuf_getu32le_p(attr_data, &dpos);
840 create_time_offset = dbuf_geti32le(attr_data, dpos); dpos += 4;
841 mod_time_offset = dbuf_geti32le(attr_data, dpos); dpos += 4;
842 backup_time_offset = dbuf_geti32le(attr_data, dpos); dpos += 4;
844 handle_mac_time(c, d, create_time_raw, create_time_offset, &tmp_timestamp, "create time");
845 if(create_time_raw>0) {
846 apply_timestamp(c, d, eii->md, DE_TIMESTAMPIDX_CREATE, &tmp_timestamp, 40);
848 handle_mac_time(c, d, mod_time_raw, mod_time_offset, &tmp_timestamp, "mod time ");
849 if(mod_time_raw>0) {
850 apply_timestamp(c, d, eii->md, DE_TIMESTAMPIDX_MODIFY, &tmp_timestamp, 40);
852 handle_mac_time(c, d, backup_time_raw, backup_time_offset, &tmp_timestamp, "backup time");
853 if(backup_time_raw>0) {
854 apply_timestamp(c, d, eii->md, DE_TIMESTAMPIDX_BACKUP, &tmp_timestamp, 40);
857 // Expecting 2 bytes for charset, and at least 2 more for the 2 NUL-terminated
858 // strings that follow.
859 if(attr_data->len - dpos < 4) goto done;
861 charset = (int)dbuf_getu16le_p(attr_data, &dpos);
862 de_dbg(c, "charset for fullpath/comment: %d", charset);
864 // TODO: Can we use the correct encoding?
865 srd = dbuf_read_string(attr_data, dpos, attr_data->len-dpos, DE_DBG_MAX_STRLEN,
866 DE_CONVFLAG_STOP_AT_NUL, DE_ENCODING_ASCII);
867 de_dbg(c, "fullpath: \"%s\"", ucstring_getpsz(srd->str));
868 dpos += srd->bytes_consumed;
869 de_destroy_stringreaderdata(c, srd);
871 srd = dbuf_read_string(attr_data, dpos, attr_data->len-dpos, DE_DBG_MAX_STRLEN,
872 DE_CONVFLAG_STOP_AT_NUL, DE_ENCODING_ASCII);
873 de_dbg(c, "comment: \"%s\"", ucstring_getpsz(srd->str));
874 dpos += srd->bytes_consumed;
875 de_destroy_stringreaderdata(c, srd);
877 done:
878 ucstring_destroy(flags_str);
879 dbuf_close(attr_data);
882 // Acorn / SparkFS / RISC OS
883 static void ef_acorn(deark *c, lctx *d, struct extra_item_info_struct *eii)
885 i64 pos = eii->dpos;
886 struct de_riscos_file_attrs rfa;
888 if(eii->dlen<16) return;
889 if(dbuf_memcmp(c->infile, eii->dpos, "ARC0", 4)) {
890 de_dbg(c, "[unsupported Acorn extra-field type]");
891 return;
893 pos += 4;
895 de_zeromem(&rfa, sizeof(struct de_riscos_file_attrs));
896 fmtutil_riscos_read_load_exec(c, c->infile, &rfa, pos);
897 pos += 8;
898 if(rfa.mod_time.is_valid) {
899 apply_timestamp(c, d, eii->md, DE_TIMESTAMPIDX_MODIFY, &rfa.mod_time, 70);
902 fmtutil_riscos_read_attribs_field(c, c->infile, &rfa, pos, 0);
904 if(!eii->is_central && !eii->md->has_riscos_data) {
905 eii->md->has_riscos_data = 1;
906 eii->md->rfa = rfa;
910 struct extra_item_type_info_struct {
911 u16 id;
912 const char *name;
913 extrafield_decoder_fn fn;
915 static const struct extra_item_type_info_struct extra_item_type_info_arr[] = {
916 { 0x0001 /* */, "Zip64 extended information", ef_zip64extinfo },
917 { 0x0007 /* */, "AV Info", NULL },
918 { 0x0008 /* */, "extended language encoding data", NULL },
919 { 0x0009 /* */, "OS/2", ef_os2 },
920 { 0x000a /* */, "NTFS", ef_ntfs },
921 { 0x000c /* */, "OpenVMS", NULL },
922 { 0x000d /* */, "Unix", NULL },
923 { 0x000e /* */, "file stream and fork descriptors", NULL },
924 { 0x000f /* */, "Patch Descriptor", NULL },
925 { 0x0014 /* */, "PKCS#7 Store for X.509 Certificates", NULL },
926 { 0x0015 /* */, "X.509 Certificate ID and Signature for individual file", NULL },
927 { 0x0016 /* */, "X.509 Certificate ID for Central Directory", NULL },
928 { 0x0017 /* */, "Strong Encryption Header", NULL },
929 { 0x0018 /* */, "Record Management Controls", NULL },
930 { 0x0019 /* */, "PKCS#7 Encryption Recipient Certificate List", NULL },
931 { 0x0021 /* */, "Policy Decryption Key", NULL },
932 { 0x0022 /* */, "Smartcrypt Key Provider", NULL },
933 { 0x0023 /* */, "Smartcrypt Policy Key Data", NULL },
934 { 0x0065 /* */, "IBM S/390 (Z390), AS/400 (I400) attributes", NULL },
935 { 0x0066 /* */, "IBM S/390 (Z390), AS/400 (I400) attributes - compressed", NULL },
936 { 0x07c8 /* */, "Macintosh", NULL },
937 { 0x2605 /* */, "ZipIt Macintosh", NULL },
938 { 0x2705 /* */, "ZipIt Macintosh 1.3.5+", ef_zipitmac_2705 },
939 { 0x2805 /* */, "ZipIt Macintosh 1.3.5+", NULL },
940 { 0x334d /* M3 */, "Info-ZIP Macintosh", ef_infozipmac },
941 { 0x4154 /* TA */, "Tandem NSK", NULL },
942 { 0x4341 /* AC */, "Acorn/SparkFS", ef_acorn },
943 { 0x4453 /* SE */, "Windows NT security descriptor (binary ACL)", NULL },
944 { 0x4690 /* */, "POSZIP 4690", NULL },
945 { 0x4704 /* */, "VM/CMS", NULL },
946 { 0x470f /* */, "MVS", NULL },
947 { 0x4854 /* TH */, "Theos, old unofficial port", NULL }, // unzip:extrafld.txt says "inofficial"
948 { 0x4b46 /* FK */, "FWKCS MD5", NULL },
949 { 0x4c41 /* AL */, "OS/2 access control list (text ACL)", NULL },
950 { 0x4d49 /* IM */, "Info-ZIP OpenVMS", NULL },
951 { 0x4d63 /* cM */, "Macintosh SmartZIP", NULL },
952 { 0x4f4c /* LO */, "Xceed original location", NULL },
953 { 0x5350 /* PS */, "Psion?", NULL }, // observed in some Psion files
954 { 0x5356 /* VS */, "AOS/VS (ACL)", NULL },
955 { 0x5455 /* UT */, "extended timestamp", ef_extended_timestamp },
956 { 0x554e /* NU */, "Xceed unicode", NULL },
957 { 0x5855 /* UX */, "Info-ZIP Unix, first version", ef_infozip1 },
958 { 0x6375 /* uc */, "Info-ZIP Unicode Comment", NULL },
959 { 0x6542 /* Be */, "BeOS/BeBox", NULL },
960 { 0x6854 /* Th */, "Theos", NULL },
961 { 0x7075 /* up */, "Info-ZIP Unicode Path", ef_unicodepath },
962 { 0x7441 /* At */, "AtheOS", NULL },
963 { 0x756e /* nu */, "ASi Unix", NULL },
964 { 0x7855 /* Ux */, "Info-ZIP Unix, second version", ef_infozip2 },
965 { 0x7875 /* ux */, "Info-ZIP Unix, third version", ef_infozip3 },
966 { 0xa220 /* */, "Microsoft Open Packaging Growth Hint", NULL },
967 { 0xfb4a /* */, "SMS/QDOS", NULL }, // according to Info-ZIP zip 3.0
968 { 0xfd4a /* */, "SMS/QDOS", NULL } // according to ZIP v6.3.4 APPNOTE
971 static const struct extra_item_type_info_struct *get_extra_item_type_info(i64 id)
973 static const struct extra_item_type_info_struct default_ei =
974 { 0, "?", NULL };
975 size_t i;
977 for(i=0; i<DE_ARRAYCOUNT(extra_item_type_info_arr); i++) {
978 if(id == (i64)extra_item_type_info_arr[i].id) {
979 return &extra_item_type_info_arr[i];
982 return &default_ei;
985 static void do_extra_data(deark *c, lctx *d,
986 struct member_data *md, struct dir_entry_data *dd,
987 i64 pos1, i64 len, int is_central)
989 i64 pos;
991 de_dbg(c, "extra data at %"I64_FMT", len=%d", pos1, (int)len);
992 de_dbg_indent(c, 1);
994 pos = pos1;
995 while(1) {
996 struct extra_item_info_struct eii;
998 if(pos+4 >= pos1+len) break;
999 de_zeromem(&eii, sizeof(struct extra_item_info_struct));
1000 eii.md = md;
1001 eii.dd = dd;
1002 eii.is_central = is_central;
1003 eii.dpos = pos+4;
1005 eii.id = (u32)de_getu16le(pos);
1006 eii.dlen = de_getu16le(pos+2);
1008 eii.eiti = get_extra_item_type_info(eii.id);
1010 de_dbg(c, "item id=0x%04x (%s), dlen=%d", (unsigned int)eii.id, eii.eiti->name,
1011 (int)eii.dlen);
1012 if(pos+4+eii.dlen > pos1+len) break;
1014 if(eii.eiti->fn) {
1015 de_dbg_indent(c, 1);
1016 eii.eiti->fn(c, d, &eii);
1017 de_dbg_indent(c, -1);
1020 pos += 4+eii.dlen;
1023 de_dbg_indent(c, -1);
1026 static void do_extract_file(deark *c, lctx *d, struct member_data *md)
1028 dbuf *outf = NULL;
1029 de_finfo *fi = NULL;
1030 struct dir_entry_data *ldd = &md->local_dir_entry_data;
1031 int tsidx;
1032 int saved_indent_level;
1034 de_dbg_indent_save(c, &saved_indent_level);
1035 de_dbg(c, "file data at %"I64_FMT", len=%"I64_FMT, md->file_data_pos,
1036 md->cmpr_size);
1037 de_dbg_indent(c, 1);
1039 if(ldd->bit_flags & 0x1) {
1040 de_err(c, "%s: Encryption is not supported", ucstring_getpsz_d(ldd->fname));
1041 goto done;
1044 if(!is_compression_method_supported(d, ldd->cmi)) {
1045 de_err(c, "%s: Unsupported compression method: %d (%s)",
1046 ucstring_getpsz_d(ldd->fname),
1047 ldd->cmpr_meth, (ldd->cmi ? ldd->cmi->name : "?"));
1048 goto done;
1051 if(md->file_data_pos+md->cmpr_size > c->infile->len) {
1052 de_err(c, "%s: Data goes beyond end of file", ucstring_getpsz_d(ldd->fname));
1053 goto done;
1056 if(md->is_symlink) {
1057 de_warn(c, "\"%s\" is a symbolic link. It will not be extracted as a link.",
1058 ucstring_getpsz_d(ldd->fname));
1061 fi = de_finfo_create(c);
1062 fi->detect_root_dot_dir = 1;
1064 if(ucstring_isnonempty(ldd->fname)) {
1065 unsigned int snflags = DE_SNFLAG_FULLPATH;
1067 if(md->has_riscos_data) {
1068 fmtutil_riscos_append_type_to_filename(c, fi, ldd->fname, &md->rfa, md->is_dir, 0);
1070 if(md->is_dir) snflags |= DE_SNFLAG_STRIPTRAILINGSLASH;
1071 de_finfo_set_name_from_ucstring(c, fi, ldd->fname, snflags);
1072 fi->original_filename_flag = 1;
1075 for(tsidx=0; tsidx<DE_TIMESTAMPIDX_COUNT; tsidx++) {
1076 if(md->tsdata[tsidx].ts.is_valid) {
1077 fi->timestamp[tsidx] = md->tsdata[tsidx].ts;
1081 if(md->has_riscos_data) {
1082 fi->has_riscos_data = 1;
1083 fi->riscos_attribs = md->rfa.attribs;
1084 fi->load_addr = md->rfa.load_addr;
1085 fi->exec_addr = md->rfa.exec_addr;
1088 if(md->is_dir) {
1089 fi->is_directory = 1;
1091 else if(md->is_executable) {
1092 fi->mode_flags |= DE_MODEFLAG_EXE;
1094 else if(md->is_nonexecutable) {
1095 fi->mode_flags |= DE_MODEFLAG_NONEXE;
1098 outf = dbuf_create_output_file(c, NULL, fi, 0);
1099 if(md->is_dir) {
1100 goto done;
1103 (void)do_decompress_member(c, d, md, outf);
1105 done:
1106 dbuf_close(outf);
1107 de_finfo_destroy(c, fi);
1108 de_dbg_indent_restore(c, saved_indent_level);
1111 static const char *get_platform_name(unsigned int ver_hi)
1113 static const char *pltf_names[20] = {
1114 "MS-DOS, etc.", "Amiga", "OpenVMS", "Unix",
1115 "VM/CMS", "Atari ST", "HPFS", "Macintosh",
1116 "Z-System", "CP/M", "NTFS or TOPS-20", "MVS or NTFS",
1117 "VSE or SMS/QDOS", "Acorn RISC OS", "VFAT", "MVS",
1118 "BeOS", "Tandem", "OS/400", "OS X" };
1120 if(ver_hi<20)
1121 return pltf_names[ver_hi];
1122 if(ver_hi==30) return "AtheOS/Syllable";
1123 return "?";
1126 // Look at the attributes, and set some other fields based on them.
1127 static void process_ext_attr(deark *c, lctx *d, struct member_data *md)
1129 if(d->using_scanmode) {
1130 // In this mode, there is no 'external attribs' field.
1131 return;
1134 if(md->ver_made_by_hi==3) { // Unix
1135 unsigned int unix_filetype;
1136 unix_filetype = (md->attr_e>>16)&0170000;
1137 if(unix_filetype == 0040000) {
1138 md->is_dir = 1;
1140 else if(unix_filetype == 0120000) {
1141 md->is_symlink = 1;
1144 if((md->attr_e>>16)&0111) {
1145 md->is_executable = 1;
1147 else {
1148 md->is_nonexecutable = 1;
1152 // MS-DOS-style attributes.
1153 // Technically, we should only do this if
1154 // md->central_dir_entry_data.ver_made_by_hi==0.
1155 // However, most(?) zip programs set the low byte of the external attribs
1156 // to the equivalent MS-DOS attribs, at least in cases where it matters.
1157 if(md->attr_e & 0x10) {
1158 md->is_dir = 1;
1161 // TODO: Support more platforms.
1162 // TODO: The 0x756e (ASi Unix) extra field might be important, as it contains
1163 // file permissions.
1165 if(md->is_dir && md->uncmpr_size!=0) {
1166 // I'd expect a subdirectory entry to have zero size. If it doesn't,
1167 // let's just assume we misidentified it as a subdirectory, and
1168 // extract its data.
1169 md->is_dir = 0;
1173 static void describe_internal_attr(deark *c, struct member_data *md,
1174 de_ucstring *s)
1176 unsigned int bf = md->attr_i;
1178 if(bf & 0x0001) {
1179 ucstring_append_flags_item(s, "text file");
1180 bf -= 0x0001;
1183 if(bf!=0) { // Report any unrecognized flags
1184 ucstring_append_flags_itemf(s, "0x%04x", bf);
1188 // Uses dd->bit_flags, dd->cmpr_method
1189 static void describe_general_purpose_bit_flags(deark *c, struct dir_entry_data *dd,
1190 de_ucstring *s)
1192 const char *name;
1193 unsigned int bf = dd->bit_flags;
1195 if(bf & 0x0001) {
1196 ucstring_append_flags_item(s, "encrypted");
1197 bf -= 0x0001;
1200 if(dd->cmpr_meth==6) { // implode
1201 if(bf & 0x0002) {
1202 name = "8K";
1203 bf -= 0x0002;
1205 else {
1206 name = "4K";
1208 ucstring_append_flags_itemf(s, "%s sliding dictionary", name);
1210 if(bf & 0x0004) {
1211 name = "3";
1212 bf -= 0x0004;
1214 else {
1215 name = "2";
1217 ucstring_append_flags_itemf(s, "%s trees", name);
1220 if(dd->cmpr_meth==8 || dd->cmpr_meth==9) { // deflate flags
1221 unsigned int code;
1223 code = (bf & 0x0006)>>1;
1224 switch(code) {
1225 case 1: name="max"; break;
1226 case 2: name="fast"; break;
1227 case 3: name="super_fast"; break;
1228 default: name="normal";
1230 ucstring_append_flags_itemf(s, "cmprlevel=%s", name);
1231 bf -= (bf & 0x0006);
1234 if(bf & 0x0008) {
1235 ucstring_append_flags_item(s, "uses data descriptor");
1236 bf -= 0x0008;
1239 if(bf & 0x0800) {
1240 ucstring_append_flags_item(s, "UTF-8");
1241 bf -= 0x0800;
1244 if(bf!=0) { // Report any unrecognized flags
1245 ucstring_append_flags_itemf(s, "0x%04x", bf);
1249 // Read either a central directory entry (a.k.a. central directory file header),
1250 // or a local file header.
1251 static int do_file_header(deark *c, lctx *d, struct member_data *md,
1252 int is_central, i64 pos1, i64 *p_entry_size)
1254 i64 pos;
1255 u32 sig;
1256 i64 fn_len, extra_len, comment_len;
1257 int utf8_flag;
1258 int retval = 0;
1259 i64 fixed_header_size;
1260 i64 mod_time_raw, mod_date_raw;
1261 struct dir_entry_data *dd; // Points to either md->central or md->local
1262 de_ucstring *descr = NULL;
1263 struct de_timestamp dos_timestamp;
1264 char timestamp_buf[64];
1266 pos = pos1;
1267 descr = ucstring_create(c);
1268 if(is_central) {
1269 dd = &md->central_dir_entry_data;
1270 fixed_header_size = 46;
1271 de_dbg(c, "central dir entry at %"I64_FMT, pos);
1273 else {
1274 dd = &md->local_dir_entry_data;
1275 fixed_header_size = 30;
1276 if(md->disk_number_start!=d->this_disk_num) {
1277 de_err(c, "Member file not in this ZIP file");
1278 return 0;
1280 de_dbg(c, "local file header at %"I64_FMT, pos);
1282 de_dbg_indent(c, 1);
1284 sig = (u32)de_getu32le_p(&pos);
1285 if(is_central && sig!=CODE_PK12) {
1286 de_err(c, "Central dir file header not found at %"I64_FMT, pos1);
1287 goto done;
1289 else if(!is_central && sig!=CODE_PK34) {
1290 de_err(c, "Local file header not found at %"I64_FMT, pos1);
1291 goto done;
1294 if(is_central) {
1295 md->ver_made_by = (unsigned int)de_getu16le_p(&pos);
1296 md->ver_made_by_hi = (unsigned int)((md->ver_made_by&0xff00)>>8);
1297 md->ver_made_by_lo = (unsigned int)(md->ver_made_by&0x00ff);
1298 de_dbg(c, "version made by: platform=%u (%s), ZIP spec=%u.%u",
1299 md->ver_made_by_hi, get_platform_name(md->ver_made_by_hi),
1300 (unsigned int)(md->ver_made_by_lo/10), (unsigned int)(md->ver_made_by_lo%10));
1303 dd->ver_needed = (unsigned int)de_getu16le_p(&pos);
1304 dd->ver_needed_hi = (unsigned int)((dd->ver_needed&0xff00)>>8);
1305 dd->ver_needed_lo = (unsigned int)(dd->ver_needed&0x00ff);
1306 de_dbg(c, "version needed to extract: platform=%u (%s), ZIP spec=%u.%u",
1307 dd->ver_needed_hi, get_platform_name(dd->ver_needed_hi),
1308 (unsigned int)(dd->ver_needed_lo/10), (unsigned int)(dd->ver_needed_lo%10));
1310 dd->bit_flags = (unsigned int)de_getu16le_p(&pos);
1311 dd->cmpr_meth = (int)de_getu16le_p(&pos);
1312 dd->cmi = get_cmpr_meth_info(dd->cmpr_meth);
1314 utf8_flag = (dd->bit_flags & 0x800)?1:0;
1315 ucstring_empty(descr);
1316 describe_general_purpose_bit_flags(c, dd, descr);
1317 de_dbg(c, "flags: 0x%04x (%s)", dd->bit_flags, ucstring_getpsz(descr));
1319 de_dbg(c, "cmpr method: %d (%s)", dd->cmpr_meth,
1320 (dd->cmi ? dd->cmi->name : "?"));
1322 mod_time_raw = de_getu16le_p(&pos);
1323 mod_date_raw = de_getu16le_p(&pos);
1324 de_dos_datetime_to_timestamp(&dos_timestamp, mod_date_raw, mod_time_raw);
1325 dos_timestamp.tzcode = DE_TZCODE_LOCAL;
1326 de_dbg_timestamp_to_string(c, &dos_timestamp, timestamp_buf, sizeof(timestamp_buf), 0);
1327 de_dbg(c, "mod time: %s", timestamp_buf);
1328 apply_timestamp(c, d, md, DE_TIMESTAMPIDX_MODIFY, &dos_timestamp, 10);
1330 dd->crc_reported = (u32)de_getu32le_p(&pos);
1331 de_dbg(c, "crc (reported): 0x%08x", (unsigned int)dd->crc_reported);
1333 dd->cmpr_size = de_getu32le_p(&pos);
1334 dd->uncmpr_size = de_getu32le_p(&pos);
1335 de_dbg(c, "cmpr size: %" I64_FMT ", uncmpr size: %" I64_FMT, dd->cmpr_size, dd->uncmpr_size);
1337 fn_len = de_getu16le_p(&pos);
1339 extra_len = de_getu16le_p(&pos);
1341 if(is_central) {
1342 comment_len = de_getu16le_p(&pos);
1344 else {
1345 comment_len = 0;
1348 if(!is_central) {
1349 md->file_data_pos = pos + fn_len + extra_len;
1352 if(is_central) {
1353 md->disk_number_start = de_getu16le_p(&pos);
1355 md->attr_i = (unsigned int)de_getu16le_p(&pos);
1356 ucstring_empty(descr);
1357 describe_internal_attr(c, md, descr);
1358 de_dbg(c, "internal file attributes: 0x%04x (%s)", md->attr_i,
1359 ucstring_getpsz(descr));
1361 md->attr_e = (unsigned int)de_getu32le_p(&pos);
1362 de_dbg(c, "external file attributes: 0x%08x", md->attr_e);
1363 de_dbg_indent(c, 1);
1366 // The low byte is, AFAIK, *almost* universally used for MS-DOS-style
1367 // attributes.
1368 unsigned int dos_attrs = (md->attr_e & 0xff);
1369 ucstring_empty(descr);
1370 de_describe_dos_attribs(c, dos_attrs, descr, 0);
1371 de_dbg(c, "%sMS-DOS attribs: 0x%02x (%s)",
1372 (md->ver_made_by_hi==0)?"":"(hypothetical) ",
1373 dos_attrs, ucstring_getpsz(descr));
1376 if((md->attr_e>>16) != 0) {
1377 // A number of platforms put Unix-style file attributes here, so
1378 // decode them as such whenever they are nonzero.
1379 de_dbg(c, "%sUnix attribs: octal(%06o)",
1380 (md->ver_made_by_hi==3)?"":"(hypothetical) ",
1381 (unsigned int)(md->attr_e>>16));
1384 de_dbg_indent(c, -1);
1386 md->offset_of_local_header = de_getu32le_p(&pos);
1387 de_dbg(c, "offset of local header: %"I64_FMT", disk: %d", md->offset_of_local_header,
1388 (int)md->disk_number_start);
1391 if(is_central) {
1392 de_dbg(c, "filename_len: %d, extra_len: %d, comment_len: %d", (int)fn_len,
1393 (int)extra_len, (int)comment_len);
1395 else {
1396 de_dbg(c, "filename_len: %d, extra_len: %d", (int)fn_len,
1397 (int)extra_len);
1400 *p_entry_size = fixed_header_size + fn_len + extra_len + comment_len;
1402 dd->main_fname_pos = pos1+fixed_header_size;
1403 dd->main_fname_len = fn_len;
1404 do_read_filename(c, d, md, dd, pos1+fixed_header_size, fn_len, utf8_flag);
1406 if(extra_len>0) {
1407 do_extra_data(c, d, md, dd, pos1+fixed_header_size+fn_len, extra_len, is_central);
1410 if(comment_len>0) {
1411 do_comment(c, d, pos1+fixed_header_size+fn_len+extra_len, comment_len, utf8_flag,
1412 "member file comment", "fcomment.txt");
1415 if(is_central) {
1416 if(d->used_offset_discrepancy) {
1417 md->offset_of_local_header += d->offset_discrepancy;
1418 de_dbg(c, "assuming local header is really at %"I64_FMT, md->offset_of_local_header);
1420 else if(d->offset_discrepancy!=0) {
1421 u32 sig1, sig2;
1422 i64 alt_pos;
1424 sig1 = (u32)de_getu32le(md->offset_of_local_header);
1425 if(sig1!=CODE_PK34) {
1426 alt_pos = md->offset_of_local_header + d->offset_discrepancy;
1427 sig2 = (u32)de_getu32le(alt_pos);
1428 if(sig2==CODE_PK34) {
1429 de_warn(c, "Local file header found at %"I64_FMT" instead of %"I64_FMT". "
1430 "Assuming offsets are wrong by %"I64_FMT" bytes.",
1431 alt_pos, md->offset_of_local_header, d->offset_discrepancy);
1432 md->offset_of_local_header += d->offset_discrepancy;
1433 d->used_offset_discrepancy = 1;
1439 retval = 1;
1441 done:
1442 de_dbg_indent(c, -1);
1443 ucstring_destroy(descr);
1444 return retval;
1447 static struct member_data *create_member_data(deark *c, lctx *d)
1449 struct member_data *md;
1451 md = de_malloc(c, sizeof(struct member_data));
1452 md->local_dir_entry_data.fname = ucstring_create(c);
1453 md->central_dir_entry_data.fname = ucstring_create(c);
1454 return md;
1457 static void destroy_member_data(deark *c, struct member_data *md)
1459 if(!md) return;
1460 ucstring_destroy(md->central_dir_entry_data.fname);
1461 ucstring_destroy(md->local_dir_entry_data.fname);
1462 de_free(c, md);
1465 static i32 ucstring_lastchar(de_ucstring *s)
1467 if(!s || s->len<1) return 0;
1468 return s->str[s->len-1];
1471 // Things to do after both the central and local headers have been read.
1472 // E.g., extract the file.
1473 static int do_process_member(deark *c, lctx *d, struct member_data *md)
1475 int retval = 0;
1477 // If for some reason we have a central-dir filename but not a local-dir
1478 // filename, use the central-dir filename.
1479 if(ucstring_isempty(md->local_dir_entry_data.fname) &&
1480 ucstring_isnonempty(md->central_dir_entry_data.fname))
1482 ucstring_append_ucstring(md->local_dir_entry_data.fname,
1483 md->central_dir_entry_data.fname);
1486 // Set the final file size and crc fields.
1487 if(md->local_dir_entry_data.bit_flags & 0x0008) {
1488 if(d->using_scanmode) {
1489 de_err(c, "File is incompatible with scan mode");
1490 goto done;
1493 // Indicates that certain fields are not present in the local file header,
1494 // and are instead in a "data descriptor" after the file data.
1495 // Let's hope they are also in the central file header.
1496 md->cmpr_size = md->central_dir_entry_data.cmpr_size;
1497 md->uncmpr_size = md->central_dir_entry_data.uncmpr_size;
1498 md->crc_reported = md->central_dir_entry_data.crc_reported;
1500 else {
1501 md->cmpr_size = md->local_dir_entry_data.cmpr_size;
1502 md->uncmpr_size = md->local_dir_entry_data.uncmpr_size;
1503 md->crc_reported = md->local_dir_entry_data.crc_reported;
1506 process_ext_attr(c, d, md);
1508 // In some cases, detect directories by checking whether the filename ends
1509 // with a slash.
1510 if(!md->is_dir && md->uncmpr_size==0 &&
1511 (d->using_scanmode || (md->ver_made_by_lo<20)))
1513 if(ucstring_lastchar(md->local_dir_entry_data.fname) == '/') {
1514 de_dbg(c, "[assuming this is a subdirectory]");
1515 md->is_dir = 1;
1519 do_extract_file(c, d, md);
1520 retval = 1;
1522 done:
1523 return retval;
1526 // In *entry_size, returns the size of the central dir entry.
1527 // Returns 0 if the central dir entry could not even be parsed.
1528 static int do_member_from_central_dir_entry(deark *c, lctx *d,
1529 struct member_data *md, i64 central_index, i64 pos, i64 *entry_size)
1531 i64 tmp_entry_size;
1532 int retval = 0;
1533 int saved_indent_level;
1535 de_dbg_indent_save(c, &saved_indent_level);
1537 *entry_size = 0;
1539 if(pos >= d->central_dir_offset+d->central_dir_byte_size) {
1540 goto done;
1543 de_dbg(c, "central dir entry #%d", (int)central_index);
1544 de_dbg_indent(c, 1);
1546 // Read the central dir file header
1547 if(!do_file_header(c, d, md, 1, pos, entry_size)) {
1548 goto done;
1551 // If we were able to read the central dir file header, we might be able
1552 // to continue and read more files, even if the local file header fails.
1553 retval = 1;
1555 // Read the local file header
1556 if(!do_file_header(c, d, md, 0, md->offset_of_local_header, &tmp_entry_size)) {
1557 goto done;
1560 do_process_member(c, d, md);
1562 done:
1563 de_dbg_indent_restore(c, saved_indent_level);
1564 return retval;
1567 static int do_central_dir_entry(deark *c, lctx *d,
1568 i64 central_index, i64 pos, i64 *entry_size)
1570 struct member_data *md = NULL;
1571 int ret;
1573 md = create_member_data(c, d);
1574 ret = do_member_from_central_dir_entry(c, d, md, central_index, pos, entry_size);
1575 destroy_member_data(c, md);
1576 return ret;
1579 static int do_local_dir_only(deark *c, lctx *d, i64 pos1, i64 *pmember_size)
1581 struct member_data *md = NULL;
1582 i64 tmp_entry_size;
1583 int retval = 0;
1585 md = create_member_data(c, d);
1587 md->offset_of_local_header = pos1;
1589 // Read the local file header
1590 if(!do_file_header(c, d, md, 0, md->offset_of_local_header, &tmp_entry_size)) {
1591 goto done;
1594 if(!do_process_member(c, d, md)) goto done;
1596 *pmember_size = md->file_data_pos + md->cmpr_size - pos1;
1597 retval = 1;
1599 done:
1600 destroy_member_data(c, md);
1601 return retval;
1604 static void de_run_zip_scanmode(deark *c, lctx *d)
1606 i64 pos = 0;
1608 d->using_scanmode = 1;
1610 while(1) {
1611 int ret;
1612 i64 foundpos = 0;
1613 i64 member_size = 0;
1615 if(pos > c->infile->len-4) break;
1616 ret = dbuf_search(c->infile, g_zipsig34, 4, pos, c->infile->len-pos, &foundpos);
1617 if(!ret) break;
1618 pos = foundpos;
1619 de_dbg(c, "zip member at %"I64_FMT, pos);
1620 de_dbg_indent(c, 1);
1621 ret = do_local_dir_only(c, d, pos, &member_size);
1622 de_dbg_indent(c, -1);
1623 if(!ret) break;
1624 if(member_size<1) break;
1625 pos += member_size;
1629 static int do_central_dir(deark *c, lctx *d)
1631 i64 i;
1632 i64 pos;
1633 i64 entry_size;
1634 int retval = 0;
1636 pos = d->central_dir_offset;
1637 de_dbg(c, "central dir at %"I64_FMT, pos);
1638 de_dbg_indent(c, 1);
1640 for(i=0; i<d->central_dir_num_entries; i++) {
1641 if(!do_central_dir_entry(c, d, i, pos, &entry_size)) {
1642 // TODO: Decide exactly what to do if something fails.
1643 goto done;
1645 pos += entry_size;
1647 retval = 1;
1649 done:
1650 de_dbg_indent(c, -1);
1651 return retval;
1654 static int do_zip64_eocd(deark *c, lctx *d)
1656 i64 pos;
1657 i64 n;
1658 int retval = 0;
1659 int saved_indent_level;
1660 UI ver, ver_hi, ver_lo;
1662 de_dbg_indent_save(c, &saved_indent_level);
1664 if(d->zip64_eocd_disknum!=0) {
1665 de_warn(c, "This might be a multi-disk Zip64 archive, which is not supported");
1666 retval = 1;
1667 d->is_zip64 = 0;
1668 goto done;
1671 pos = d->zip64_eocd_pos;
1672 if(dbuf_memcmp(c->infile, pos, g_zipsig66, 4)) {
1673 de_warn(c, "Expected Zip64 end-of-central-directory record not found at %"I64_FMT, pos);
1674 retval = 1; // Maybe the eocd locator sig was a false positive?
1675 d->is_zip64 = 0;
1676 goto done;
1679 de_dbg(c, "zip64 end-of-central-dir record at %"I64_FMT, pos);
1680 pos += 4;
1681 de_dbg_indent(c, 1);
1683 n = de_geti64le(pos); pos += 8;
1684 de_dbg(c, "size of zip64 eocd record: (12+)%"I64_FMT, n);
1686 ver = (UI)de_getu16le_p(&pos);
1687 ver_hi = (ver&0xff00)>>8;
1688 ver_lo = ver&0x00ff;
1689 de_dbg(c, "version made by: platform=%u (%s), ZIP spec=%u.%u",
1690 ver_hi, get_platform_name(ver_hi), (UI)(ver_lo/10), (UI)(ver_lo%10));
1692 ver = (UI)de_getu16le_p(&pos);
1693 ver_hi = (ver&0xff00)>>8;
1694 ver_lo = ver&0x00ff;
1695 de_dbg(c, "version needed: platform=%u (%s), ZIP spec=%u.%u",
1696 ver_hi, get_platform_name(ver_hi), (UI)(ver_lo/10), (UI)(ver_lo%10));
1698 n = de_getu32le_p(&pos);
1699 de_dbg(c, "this disk num: %"I64_FMT, n);
1701 d->zip64_cd_disknum = (unsigned int)de_getu32le_p(&pos);
1702 d->zip64_num_centr_dir_entries_this_disk = de_geti64le(pos); pos += 8;
1703 de_dbg(c, "central dir num entries on this disk: %"I64_FMT, d->zip64_num_centr_dir_entries_this_disk);
1704 d->zip64_num_centr_dir_entries_total = de_geti64le(pos); pos += 8;
1705 de_dbg(c, "central dir num entries: %"I64_FMT, d->zip64_num_centr_dir_entries_total);
1706 d->zip64_centr_dir_byte_size = de_geti64le(pos); pos += 8;
1707 de_dbg(c, "central dir size: %"I64_FMT, d->zip64_centr_dir_byte_size);
1708 d->zip64_cd_pos = de_geti64le(pos); pos += 8;
1709 de_dbg(c, "central dir offset: %"I64_FMT", disk: %u",
1710 d->zip64_cd_pos, d->zip64_cd_disknum);
1712 retval = 1;
1713 done:
1714 de_dbg_indent_restore(c, saved_indent_level);
1715 return retval;
1718 static void do_zip64_eocd_locator(deark *c, lctx *d)
1720 i64 n;
1721 i64 pos = d->end_of_central_dir_pos - 20;
1723 if(dbuf_memcmp(c->infile, pos, g_zipsig67, 4)) {
1724 return;
1726 de_dbg(c, "zip64 eocd locator found at %"I64_FMT, pos);
1727 pos += 4;
1728 d->is_zip64 = 1;
1729 de_dbg_indent(c, 1);
1730 d->zip64_eocd_disknum = (unsigned int)de_getu32le_p(&pos);
1731 d->zip64_eocd_pos = de_geti64le(pos); pos += 8;
1732 de_dbg(c, "offset of zip64 eocd: %"I64_FMT", disk: %u",
1733 d->zip64_eocd_pos, d->zip64_eocd_disknum);
1734 n = de_getu32le_p(&pos);
1735 de_dbg(c, "total number of disks: %u", (unsigned int)n);
1736 de_dbg_indent(c, -1);
1739 static int do_end_of_central_dir(deark *c, lctx *d)
1741 i64 pos;
1742 i64 num_entries_this_disk;
1743 i64 disk_num_with_central_dir_start;
1744 i64 comment_length;
1745 i64 alt_central_dir_offset;
1746 int retval = 0;
1748 pos = d->end_of_central_dir_pos;
1749 de_dbg(c, "end-of-central-dir record at %"I64_FMT, pos);
1750 de_dbg_indent(c, 1);
1752 d->this_disk_num = de_getu16le(pos+4);
1753 de_dbg(c, "this disk num: %"I64_FMT, d->this_disk_num);
1754 disk_num_with_central_dir_start = de_getu16le(pos+6);
1756 num_entries_this_disk = de_getu16le(pos+8);
1757 de_dbg(c, "central dir num entries on this disk: %"I64_FMT, num_entries_this_disk);
1758 if(d->is_zip64 && (num_entries_this_disk==0xffff)) {
1759 num_entries_this_disk = d->zip64_num_centr_dir_entries_this_disk;
1762 d->central_dir_num_entries = de_getu16le(pos+10);
1763 d->central_dir_byte_size = de_getu32le(pos+12);
1764 d->central_dir_offset = de_getu32le(pos+16);
1765 de_dbg(c, "central dir num entries: %"I64_FMT, d->central_dir_num_entries);
1766 if(d->is_zip64 && (d->central_dir_num_entries==0xffff)) {
1767 d->central_dir_num_entries = d->zip64_num_centr_dir_entries_total;
1770 de_dbg(c, "central dir size: %"I64_FMT, d->central_dir_byte_size);
1771 if(d->is_zip64 && (d->central_dir_byte_size==0xffffffffLL)) {
1772 d->central_dir_byte_size = d->zip64_centr_dir_byte_size;
1775 de_dbg(c, "central dir offset: %"I64_FMT", disk: %"I64_FMT, d->central_dir_offset,
1776 disk_num_with_central_dir_start);
1777 if(d->is_zip64 && (d->central_dir_offset==0xffffffffLL)) {
1778 d->central_dir_offset = d->zip64_cd_pos;
1781 comment_length = de_getu16le(pos+20);
1782 de_dbg(c, "comment length: %d", (int)comment_length);
1783 if(comment_length>0) {
1784 // The comment for the whole .ZIP file presumably has to use
1785 // cp437 encoding. There's no flag that could indicate otherwise.
1786 do_comment(c, d, pos+22, comment_length, 0,
1787 "ZIP file comment", "comment.txt");
1790 // TODO: Figure out exactly how to detect disk spanning.
1791 if(disk_num_with_central_dir_start!=d->this_disk_num ||
1792 (d->is_zip64 && d->zip64_eocd_disknum!=d->this_disk_num))
1794 de_err(c, "Disk spanning not supported");
1795 goto done;
1798 if(d->this_disk_num!=0) {
1799 de_warn(c, "This ZIP file might be part of a multi-part archive, and "
1800 "might not be supported correctly");
1803 if(num_entries_this_disk!=d->central_dir_num_entries) {
1804 de_warn(c, "This ZIP file might not be supported correctly "
1805 "(number-of-entries-this-disk=%d, number-of-entries-total=%d)",
1806 (int)num_entries_this_disk, (int)d->central_dir_num_entries);
1809 alt_central_dir_offset =
1810 (d->is_zip64 ? d->zip64_eocd_pos : d->end_of_central_dir_pos) -
1811 d->central_dir_byte_size;
1813 if(alt_central_dir_offset != d->central_dir_offset) {
1814 u32 sig;
1816 de_warn(c, "Inconsistent central directory offset. Reported to be %"I64_FMT", "
1817 "but based on its reported size, it should be %"I64_FMT".",
1818 d->central_dir_offset, alt_central_dir_offset);
1820 sig = (u32)de_getu32le(alt_central_dir_offset);
1821 if(sig==CODE_PK12) {
1822 d->offset_discrepancy = alt_central_dir_offset - d->central_dir_offset;
1823 de_dbg(c, "likely central dir found at %"I64_FMT, alt_central_dir_offset);
1824 d->central_dir_offset = alt_central_dir_offset;
1828 retval = 1;
1830 done:
1831 de_dbg_indent(c, -1);
1832 return retval;
1835 static void de_run_zip_normally(deark *c, lctx *d)
1837 int eocd_found;
1839 if(c->detection_data && c->detection_data->zip_eocd_looked_for) {
1840 eocd_found = (int)c->detection_data->zip_eocd_found;
1841 d->end_of_central_dir_pos = c->detection_data->zip_eocd_pos;
1843 else {
1844 eocd_found = fmtutil_find_zip_eocd(c, c->infile, &d->end_of_central_dir_pos);
1846 if(!eocd_found) {
1847 if(c->module_disposition==DE_MODDISP_AUTODETECT ||
1848 c->module_disposition==DE_MODDISP_EXPLICIT)
1850 if(de_getu32le(0)==CODE_PK34) {
1851 de_err(c, "ZIP central directory not found. "
1852 "You could try \"-opt zip:scanmode\".");
1853 goto done;
1856 de_err(c, "Not a valid ZIP file");
1857 goto done;
1860 de_dbg(c, "end-of-central-dir record found at %"I64_FMT,
1861 d->end_of_central_dir_pos);
1863 do_zip64_eocd_locator(c, d);
1865 if(d->is_zip64) {
1866 if(!do_zip64_eocd(c, d)) goto done;
1869 if(d->is_zip64)
1870 de_declare_fmt(c, "ZIP-Zip64");
1871 else
1872 de_declare_fmt(c, "ZIP");
1874 if(!do_end_of_central_dir(c, d)) {
1875 goto done;
1878 if(!do_central_dir(c, d)) {
1879 goto done;
1882 done:
1886 static void de_run_zip(deark *c, de_module_params *mparams)
1888 lctx *d = NULL;
1889 de_encoding enc;
1891 d = de_malloc(c, sizeof(lctx));
1893 enc = de_get_input_encoding(c, NULL, DE_ENCODING_CP437);
1894 d->default_enc_for_filenames = enc;
1895 d->default_enc_for_comments = enc;
1897 d->crco = de_crcobj_create(c, DE_CRCOBJ_CRC32_IEEE);
1899 if(de_get_ext_option(c, "zip:scanmode")) {
1900 de_run_zip_scanmode(c, d);
1902 else {
1903 de_run_zip_normally(c, d);
1906 if(d) {
1907 de_crcobj_destroy(d->crco);
1908 de_free(c, d);
1912 static int de_identify_zip(deark *c)
1914 u8 b[4];
1915 int has_zip_ext;
1916 int has_mz_sig = 0;
1918 has_zip_ext = de_input_file_has_ext(c, "zip");
1920 // Fast tests:
1922 de_read(b, 0, 4);
1923 if(!de_memcmp(b, g_zipsig34, 4)) {
1924 return has_zip_ext ? 100 : 90;
1926 if(b[0]=='M' && b[1]=='Z') has_mz_sig = 1;
1928 if(c->infile->len >= 22) {
1929 de_read(b, c->infile->len - 22, 4);
1930 if(!de_memcmp(b, g_zipsig56, 4)) {
1931 return has_zip_ext ? 100 : 19;
1935 // Things to consider:
1936 // * We want de_fmtutil_find_zip_eocd() to be called no more than once, and
1937 // only on files that for some reason we suspect could be ZIP files.
1938 // * If the user disables exe format detection (e.g. with "-onlydetect zip"),
1939 // we want self-extracting-ZIP .exe files to be detected as ZIP instead.
1940 // * And we want the above to work even if the file has a ZIP file comment,
1941 // making it expensive to detect as ZIP.
1943 // Tests below can't return a confidence higher than this.
1944 if(c->detection_data->best_confidence_so_far >= 19) return 0;
1946 // Slow tests:
1948 if(has_mz_sig || has_zip_ext) {
1949 i64 eocd_pos = 0;
1951 c->detection_data->zip_eocd_looked_for = 1;
1952 if(fmtutil_find_zip_eocd(c, c->infile, &eocd_pos)) {
1953 c->detection_data->zip_eocd_found = 1;
1954 c->detection_data->zip_eocd_pos = eocd_pos;
1955 return 19;
1959 return 0;
1962 static void de_help_zip(deark *c)
1964 de_msg(c, "-opt zip:scanmode : Do not use the \"central directory\"");
1965 de_msg(c, "-opt zip:implodebug : Behave like PKZIP 1.01/1.02");
1968 void de_module_zip(deark *c, struct deark_module_info *mi)
1970 mi->id = "zip";
1971 mi->desc = "ZIP archive";
1972 mi->run_fn = de_run_zip;
1973 mi->identify_fn = de_identify_zip;
1974 mi->help_fn = de_help_zip;