lzhuf: Refactored to avoid direct array access
[deark.git] / modules / zip.c
blob14e0f6ebabcf109e9819a7dec4813704fa22f017
1 // This file is part of Deark.
2 // Copyright (C) 2016 Jason Summers
3 // See the file COPYING for terms of use.
5 // ZIP format
7 #include <deark-config.h>
8 #include <deark-private.h>
9 #include <deark-fmtutil.h>
11 DE_DECLARE_MODULE(de_module_zip);
13 struct localctx_struct;
14 typedef struct localctx_struct lctx;
16 #define CODE_PK12 0x02014b50U
17 #define CODE_PK34 0x04034b50U
18 static const u8 g_zipsig34[4] = {'P', 'K', 0x03, 0x04};
19 static const u8 g_zipsig56[4] = {'P', 'K', 0x05, 0x06};
20 static const u8 g_zipsig66[4] = {'P', 'K', 0x06, 0x06};
21 static const u8 g_zipsig67[4] = {'P', 'K', 0x06, 0x07};
23 struct compression_params {
24 // ZIP-specific params (not in de_dfilter_*_params) that may be needed to
25 // to decompress something.
26 int cmpr_meth;
27 unsigned int bit_flags;
30 typedef void (*decompressor_fn)(deark *c, lctx *d, struct compression_params *cparams,
31 struct de_dfilter_in_params *dcmpri, struct de_dfilter_out_params *dcmpro,
32 struct de_dfilter_results *dres);
34 struct cmpr_meth_info {
35 int cmpr_meth;
36 unsigned int flags;
37 const char *name;
38 decompressor_fn decompressor;
41 struct dir_entry_data {
42 unsigned int ver_needed;
43 unsigned int ver_needed_hi, ver_needed_lo;
44 i64 cmpr_size, uncmpr_size;
45 int cmpr_meth;
46 const struct cmpr_meth_info *cmi;
47 unsigned int bit_flags;
48 u32 crc_reported;
49 i64 main_fname_pos;
50 i64 main_fname_len;
51 de_ucstring *fname;
54 struct timestamp_data {
55 struct de_timestamp ts; // The best timestamp of this type found so far
56 int quality;
59 struct member_data {
60 unsigned int ver_made_by;
61 unsigned int ver_made_by_hi, ver_made_by_lo;
62 unsigned int attr_i, attr_e;
63 i64 offset_of_local_header;
64 i64 disk_number_start;
65 i64 file_data_pos;
66 int is_nonexecutable;
67 int is_executable;
68 int is_dir;
69 int is_symlink;
70 struct timestamp_data tsdata[DE_TIMESTAMPIDX_COUNT];
72 struct dir_entry_data central_dir_entry_data;
73 struct dir_entry_data local_dir_entry_data;
75 i64 cmpr_size, uncmpr_size;
76 u32 crc_reported;
79 struct extra_item_type_info_struct;
81 struct extra_item_info_struct {
82 u32 id;
83 i64 dpos;
84 i64 dlen;
85 const struct extra_item_type_info_struct *eiti;
86 struct member_data *md;
87 struct dir_entry_data *dd;
88 int is_central;
91 struct localctx_struct {
92 de_encoding default_enc_for_filenames;
93 de_encoding default_enc_for_comments;
94 i64 end_of_central_dir_pos;
95 i64 central_dir_num_entries;
96 i64 central_dir_byte_size;
97 i64 central_dir_offset;
98 i64 this_disk_num;
99 i64 zip64_eocd_pos;
100 i64 zip64_cd_pos;
101 i64 zip64_num_centr_dir_entries_this_disk;
102 i64 zip64_num_centr_dir_entries_total;
103 i64 zip64_centr_dir_byte_size;
104 unsigned int zip64_eocd_disknum;
105 unsigned int zip64_cd_disknum;
106 i64 offset_discrepancy;
107 int used_offset_discrepancy;
108 int is_zip64;
109 int using_scanmode;
110 struct de_crcobj *crco;
113 typedef void (*extrafield_decoder_fn)(deark *c, lctx *d,
114 struct extra_item_info_struct *eii);
116 static int is_compression_method_supported(lctx *d, const struct cmpr_meth_info *cmi)
118 if(cmi && cmi->decompressor) return 1;
119 return 0;
122 static void do_decompress_shrink(deark *c, lctx *d, struct compression_params *cparams,
123 struct de_dfilter_in_params *dcmpri, struct de_dfilter_out_params *dcmpro,
124 struct de_dfilter_results *dres)
126 fmtutil_decompress_zip_shrink(c, dcmpri, dcmpro, dres, NULL);
129 static void do_decompress_reduce(deark *c, lctx *d, struct compression_params *cparams,
130 struct de_dfilter_in_params *dcmpri, struct de_dfilter_out_params *dcmpro,
131 struct de_dfilter_results *dres)
133 struct de_zipreduce_params params;
135 de_zeromem(&params, sizeof(struct de_zipreduce_params));
136 params.cmpr_factor = (unsigned int)(cparams->cmpr_meth-1);
137 fmtutil_decompress_zip_reduce(c, dcmpri, dcmpro, dres, &params);
140 static void do_decompress_implode(deark *c, lctx *d, struct compression_params *cparams,
141 struct de_dfilter_in_params *dcmpri, struct de_dfilter_out_params *dcmpro,
142 struct de_dfilter_results *dres)
144 struct de_zipimplode_params params;
146 de_zeromem(&params, sizeof(struct de_zipimplode_params));
147 params.bit_flags = cparams->bit_flags;
148 params.mml_bug = (u8)de_get_ext_option_bool(c, "zip:implodebug", 0);
149 fmtutil_decompress_zip_implode(c, dcmpri, dcmpro, dres, &params);
152 static void do_decompress_deflate(deark *c, lctx *d, struct compression_params *cparams,
153 struct de_dfilter_in_params *dcmpri, struct de_dfilter_out_params *dcmpro,
154 struct de_dfilter_results *dres)
156 struct de_deflate_params inflparams;
158 de_zeromem(&inflparams, sizeof(struct de_deflate_params));
159 if(cparams->cmpr_meth==9) {
160 inflparams.flags |= DE_DEFLATEFLAG_DEFLATE64;
162 fmtutil_decompress_deflate_ex(c, dcmpri, dcmpro, dres, &inflparams);
165 static void do_decompress_dclimplode(deark *c, lctx *d, struct compression_params *cparams,
166 struct de_dfilter_in_params *dcmpri, struct de_dfilter_out_params *dcmpro,
167 struct de_dfilter_results *dres)
169 fmtutil_dclimplode_codectype1(c, dcmpri, dcmpro, dres, NULL);
172 static void do_decompress_stored(deark *c, lctx *d, struct compression_params *cparams,
173 struct de_dfilter_in_params *dcmpri, struct de_dfilter_out_params *dcmpro,
174 struct de_dfilter_results *dres)
176 fmtutil_decompress_uncompressed(c, dcmpri, dcmpro, dres, 0);
179 static const struct cmpr_meth_info cmpr_meth_info_arr[] = {
180 { 0, 0x00, "stored", do_decompress_stored },
181 { 1, 0x00, "shrink", do_decompress_shrink },
182 { 2, 0x00, "reduce, CF=1", do_decompress_reduce },
183 { 3, 0x00, "reduce, CF=2", do_decompress_reduce },
184 { 4, 0x00, "reduce, CF=3", do_decompress_reduce },
185 { 5, 0x00, "reduce, CF=4", do_decompress_reduce },
186 { 6, 0x00, "implode", do_decompress_implode },
187 { 8, 0x00, "deflate", do_decompress_deflate },
188 { 9, 0x00, "deflate64", do_decompress_deflate },
189 { 10, 0x00, "PKWARE DCL implode", do_decompress_dclimplode },
190 { 12, 0x00, "bzip2", NULL },
191 { 14, 0x00, "LZMA", NULL },
192 { 16, 0x00, "IBM z/OS CMPSC", NULL },
193 { 18, 0x00, "IBM TERSE (new)", NULL },
194 { 19, 0x00, "IBM LZ77 z Architecture", NULL },
195 { 94, 0x00, "MP3", NULL },
196 { 95, 0x00, "XZ", NULL },
197 { 96, 0x00, "JPEG", NULL },
198 { 97, 0x00, "WavPack", NULL },
199 { 98, 0x00, "PPMd", NULL },
200 { 99, 0x00, "AES", NULL }
203 static const struct cmpr_meth_info *get_cmpr_meth_info(int cmpr_meth)
205 size_t k;
207 for(k=0; k<DE_ARRAYCOUNT(cmpr_meth_info_arr); k++) {
208 if(cmpr_meth_info_arr[k].cmpr_meth == cmpr_meth) {
209 return &cmpr_meth_info_arr[k];
212 return NULL;
215 // Decompress some data, using the given ZIP compression method.
216 // On failure, dres->errcode will be set.
217 static void do_decompress_lowlevel(deark *c, lctx *d, struct de_dfilter_in_params *dcmpri,
218 struct de_dfilter_out_params *dcmpro, struct de_dfilter_results *dres,
219 int cmpr_meth, const struct cmpr_meth_info *cmi, unsigned int bit_flags)
221 struct compression_params cparams;
223 de_zeromem(&cparams, sizeof(struct compression_params));
224 cparams.cmpr_meth = cmpr_meth;
225 cparams.bit_flags = bit_flags;
227 if(cmi && cmi->decompressor) {
228 cmi->decompressor(c, d, &cparams, dcmpri, dcmpro, dres);
230 else {
231 de_internal_err_nonfatal(c, "Unsupported compression method (%d)", cmpr_meth);
232 de_dfilter_set_generic_error(c, dres, NULL);
236 static void our_writelistener_cb(dbuf *f, void *userdata, const u8 *buf, i64 buf_len)
238 struct de_crcobj *crco = (struct de_crcobj *)userdata;
239 de_crcobj_addbuf(crco, buf, buf_len);
242 // Decompress a Zip member file, writing to outf.
243 // Does CRC calculation.
244 // Reports errors to the user.
245 // Only call this if the compression method is supported -- Call
246 // is_compression_method_supported() first.
247 // Assumes ldd->cmi has been set, by calling get_cmpr_meth_info().
248 static int do_decompress_member(deark *c, lctx *d, struct member_data *md, dbuf *outf)
250 struct dir_entry_data *ldd = &md->local_dir_entry_data;
251 struct de_dfilter_in_params dcmpri;
252 struct de_dfilter_out_params dcmpro;
253 struct de_dfilter_results dres;
254 u32 crc_calculated;
255 int retval = 0;
257 de_dfilter_init_objects(c, &dcmpri, &dcmpro, &dres);
258 dcmpri.f = c->infile;
259 dcmpri.pos = md->file_data_pos;
260 dcmpri.len = md->cmpr_size;
261 dcmpro.f = outf;
262 dcmpro.expected_len = md->uncmpr_size;
263 dcmpro.len_known = 1;
265 dbuf_set_writelistener(outf, our_writelistener_cb, (void*)d->crco);
266 de_crcobj_reset(d->crco);
268 do_decompress_lowlevel(c, d, &dcmpri, &dcmpro, &dres, ldd->cmpr_meth,
269 ldd->cmi, ldd->bit_flags);
271 if(dres.errcode) {
272 de_err(c, "%s: %s", ucstring_getpsz_d(ldd->fname),
273 de_dfilter_get_errmsg(c, &dres));
274 goto done;
277 crc_calculated = de_crcobj_getval(d->crco);
278 de_dbg(c, "crc (calculated): 0x%08x", (unsigned int)crc_calculated);
280 if(crc_calculated != md->crc_reported) {
281 de_err(c, "%s: CRC check failed: Expected 0x%08x, got 0x%08x",
282 ucstring_getpsz_d(ldd->fname),
283 (unsigned int)md->crc_reported, (unsigned int)crc_calculated);
284 if(dres.bytes_consumed_valid && (dres.bytes_consumed < dcmpri.len)) {
285 de_info(c, "Note: Only used %"I64_FMT" of %"I64_FMT" compressed bytes.",
286 dres.bytes_consumed, dcmpri.len);
288 goto done;
291 retval = 1;
292 done:
293 return retval;
296 // A variation of do_decompress_member() -
297 // works for Finder attribute data, and OS/2 extended attributes.
298 // Only call this if the compression method is supported -- Call
299 // is_compression_method_supported() first.
300 // outf is assumed to be a membuf.
301 // dcflags: 0x1 = Validate the crc_reported param.
302 static int do_decompress_attrib_data(deark *c, lctx *d,
303 i64 dpos, i64 dlen, dbuf *outf, i64 uncmprsize, u32 crc_reported,
304 int cmpr_meth, const struct cmpr_meth_info *cmi, UI flags, const char *name)
306 struct de_dfilter_in_params dcmpri;
307 struct de_dfilter_out_params dcmpro;
308 struct de_dfilter_results dres;
309 u32 crc_calculated;
310 int retval = 0;
312 de_dfilter_init_objects(c, &dcmpri, &dcmpro, &dres);
313 dcmpri.f = c->infile;
314 dcmpri.pos = dpos;
315 dcmpri.len = dlen;
316 dcmpro.f = outf;
317 dcmpro.expected_len = uncmprsize;
318 dcmpro.len_known = 1;
320 do_decompress_lowlevel(c, d, &dcmpri, &dcmpro, &dres, cmpr_meth, cmi, 0);
321 if(dres.errcode) {
322 goto done; // Could report the error, but this isn't critical data
325 if(flags & 0x1) {
326 de_crcobj_reset(d->crco);
327 de_crcobj_addslice(d->crco, outf, 0, outf->len);
328 crc_calculated = de_crcobj_getval(d->crco);
329 de_dbg(c, "%s crc (calculated): 0x%08x", name, (UI)crc_calculated);
330 if(crc_calculated != crc_reported) goto done;
333 retval = 1;
334 done:
335 return retval;
338 // As we read a member file's attributes, we may encounter multiple timestamps,
339 // which can differ in their precision, and whether they use UTC.
340 // This function is called to remember the "best" file modification time
341 // encountered so far.
342 static void apply_timestamp(deark *c, lctx *d, struct member_data *md, int tstype,
343 const struct de_timestamp *ts, int quality)
345 if(!ts->is_valid) return;
347 // In case of a tie, we prefer the later timestamp that we encountered.
348 // This makes local headers have priority over central headers, for
349 // example.
350 if(quality >= md->tsdata[tstype].quality) {
351 md->tsdata[tstype].ts = *ts;
352 md->tsdata[tstype].quality = quality;
356 static void do_read_filename(deark *c, lctx *d,
357 struct member_data *md, struct dir_entry_data *dd,
358 i64 pos, i64 len, int utf8_flag)
360 de_encoding from_encoding;
362 ucstring_empty(dd->fname);
363 from_encoding = utf8_flag ? DE_ENCODING_UTF8 : d->default_enc_for_filenames;
364 dbuf_read_to_ucstring(c->infile, pos, len, dd->fname, 0, from_encoding);
365 de_dbg(c, "filename: \"%s\"", ucstring_getpsz_d(dd->fname));
368 static void do_comment_display(deark *c, lctx *d, i64 pos, i64 len, de_ext_encoding ee,
369 const char *name)
371 de_ucstring *s = NULL;
373 s = ucstring_create(c);
374 dbuf_read_to_ucstring(c->infile, pos, len, s, 0, ee);
375 de_dbg(c, "%s: \"%s\"", name, ucstring_getpsz_d(s));
376 ucstring_destroy(s);
379 static void do_comment_extract(deark *c, lctx *d, i64 pos, i64 len, de_ext_encoding ee,
380 const char *ext)
382 dbuf *f = NULL;
383 de_ucstring *s = NULL;
385 f = dbuf_create_output_file(c, ext, NULL, DE_CREATEFLAG_IS_AUX);
386 s = ucstring_create(c);
387 dbuf_read_to_ucstring(c->infile, pos, len, s, 0, ee);
388 ucstring_write_as_utf8(c, s, f, 1);
389 ucstring_destroy(s);
390 dbuf_close(f);
393 static void do_comment(deark *c, lctx *d, i64 pos, i64 len, int utf8_flag,
394 const char *name, const char *ext)
396 de_ext_encoding ee;
398 if(len<1) return;
399 ee = utf8_flag ? DE_ENCODING_UTF8 : d->default_enc_for_comments;
400 ee = DE_EXTENC_MAKE(ee, DE_ENCSUBTYPE_HYBRID);
401 if(c->extract_level>=2) {
402 do_comment_extract(c, d, pos, len, ee, ext);
404 else {
405 do_comment_display(c, d, pos, len, ee, name);
409 static void read_unix_timestamp(deark *c, lctx *d, i64 pos,
410 struct de_timestamp *timestamp, const char *name)
412 i64 t;
413 char timestamp_buf[64];
415 t = de_geti32le(pos);
416 de_unix_time_to_timestamp(t, timestamp, 0x1);
417 de_dbg_timestamp_to_string(c, timestamp, timestamp_buf, sizeof(timestamp_buf), 0);
418 de_dbg(c, "%s: %"I64_FMT" (%s)", name, t, timestamp_buf);
421 static void read_FILETIME(deark *c, lctx *d, i64 pos,
422 struct de_timestamp *timestamp, const char *name)
424 i64 t_FILETIME;
425 char timestamp_buf[64];
427 t_FILETIME = de_geti64le(pos);
428 de_FILETIME_to_timestamp(t_FILETIME, timestamp, 0x1);
429 de_dbg_timestamp_to_string(c, timestamp, timestamp_buf, sizeof(timestamp_buf), 0);
430 de_dbg(c, "%s: %s", name, timestamp_buf);
433 static void ef_zip64extinfo(deark *c, lctx *d, struct extra_item_info_struct *eii)
435 i64 n;
436 i64 pos = eii->dpos;
438 if(pos+8 > eii->dpos+eii->dlen) goto done;
439 n = de_geti64le(pos); pos += 8;
440 de_dbg(c, "orig uncmpr file size: %"I64_FMT, n);
441 if(eii->dd->uncmpr_size==0xffffffffLL) {
442 eii->dd->uncmpr_size = n;
445 if(pos+8 > eii->dpos+eii->dlen) goto done;
446 n = de_geti64le(pos); pos += 8;
447 de_dbg(c, "cmpr data size: %"I64_FMT, n);
448 if(eii->dd->cmpr_size==0xffffffffLL) {
449 eii->dd->cmpr_size = n;
452 if(pos+8 > eii->dpos+eii->dlen) goto done;
453 n = de_geti64le(pos); pos += 8;
454 de_dbg(c, "offset of local header record: %"I64_FMT, n);
456 if(pos+4 > eii->dpos+eii->dlen) goto done;
457 n = de_getu32le_p(&pos);
458 de_dbg(c, "disk start number: %"I64_FMT, n);
459 done:
463 // Extra field 0x5455
464 static void ef_extended_timestamp(deark *c, lctx *d, struct extra_item_info_struct *eii)
466 i64 pos = eii->dpos;
467 u8 flags;
468 i64 endpos;
469 int has_mtime, has_atime, has_ctime;
470 struct de_timestamp timestamp_tmp;
472 endpos = pos + eii->dlen;
473 if(pos+1>endpos) return;
474 flags = de_getbyte_p(&pos);
475 if(eii->is_central) {
476 has_mtime = (eii->dlen>=5);
477 has_atime = 0;
478 has_ctime = 0;
480 else {
481 has_mtime = (flags & 0x01)?1:0;
482 has_atime = (flags & 0x02)?1:0;
483 has_ctime = (flags & 0x04)?1:0;
485 if(has_mtime) {
486 if(pos+4>endpos) return;
487 read_unix_timestamp(c, d, pos, &timestamp_tmp, "mtime");
488 apply_timestamp(c, d, eii->md, DE_TIMESTAMPIDX_MODIFY, &timestamp_tmp, 50);
489 pos+=4;
491 if(has_atime) {
492 if(pos+4>endpos) return;
493 read_unix_timestamp(c, d, pos, &timestamp_tmp, "atime");
494 apply_timestamp(c, d, eii->md, DE_TIMESTAMPIDX_ACCESS, &timestamp_tmp, 50);
495 pos+=4;
497 if(has_ctime) {
498 if(pos+4>endpos) return;
499 read_unix_timestamp(c, d, pos, &timestamp_tmp, "creation time");
500 apply_timestamp(c, d, eii->md, DE_TIMESTAMPIDX_CREATE, &timestamp_tmp, 50);
501 pos+=4;
505 // Extra field 0x5855
506 static void ef_infozip1(deark *c, lctx *d, struct extra_item_info_struct *eii)
508 i64 uidnum, gidnum;
509 struct de_timestamp timestamp_tmp;
511 if(eii->is_central && eii->dlen<8) return;
512 if(!eii->is_central && eii->dlen<12) return;
513 read_unix_timestamp(c, d, eii->dpos, &timestamp_tmp, "atime");
514 apply_timestamp(c, d, eii->md, DE_TIMESTAMPIDX_ACCESS, &timestamp_tmp, 45);
515 read_unix_timestamp(c, d, eii->dpos+4, &timestamp_tmp, "mtime");
516 apply_timestamp(c, d, eii->md, DE_TIMESTAMPIDX_MODIFY, &timestamp_tmp, 45);
517 if(!eii->is_central) {
518 uidnum = de_getu16le(eii->dpos+8);
519 gidnum = de_getu16le(eii->dpos+10);
520 de_dbg(c, "uid: %d, gid: %d", (int)uidnum, (int)gidnum);
524 // Extra field 0x7075 - Info-ZIP Unicode Path
525 static void ef_unicodepath(deark *c, lctx *d, struct extra_item_info_struct *eii)
527 u8 ver;
528 de_ucstring *fn = NULL;
529 i64 fnlen;
530 u32 crc_reported, crc_calculated;
532 if(eii->dlen<1) goto done;
533 ver = de_getbyte(eii->dpos);
534 de_dbg(c, "version: %u", (unsigned int)ver);
535 if(ver!=1) goto done;
536 if(eii->dlen<6) goto done;
537 crc_reported = (u32)de_getu32le(eii->dpos+1);
538 de_dbg(c, "name-crc (reported): 0x%08x", (unsigned int)crc_reported);
539 fn = ucstring_create(c);
540 fnlen = eii->dlen - 5;
541 dbuf_read_to_ucstring(c->infile, eii->dpos+5, fnlen, fn, 0, DE_ENCODING_UTF8);
542 de_dbg(c, "unicode name: \"%s\"", ucstring_getpsz_d(fn));
544 // Need to go back and calculate a CRC of the main filename. This is
545 // protection against the case where a ZIP editor may have changed the
546 // original filename, but retained a now-orphaned Unicode Path field.
547 de_crcobj_reset(d->crco);
548 de_crcobj_addslice(d->crco, c->infile, eii->dd->main_fname_pos, eii->dd->main_fname_len);
549 crc_calculated = de_crcobj_getval(d->crco);
550 de_dbg(c, "name-crc (calculated): 0x%08x", (unsigned int)crc_calculated);
552 if(crc_calculated == crc_reported) {
553 ucstring_empty(eii->dd->fname);
554 ucstring_append_ucstring(eii->dd->fname, fn);
557 done:
558 ucstring_destroy(fn);
561 // Extra field 0x7855
562 static void ef_infozip2(deark *c, lctx *d, struct extra_item_info_struct *eii)
564 i64 uidnum, gidnum;
566 if(eii->is_central) return;
567 if(eii->dlen<4) return;
568 uidnum = de_getu16le(eii->dpos);
569 gidnum = de_getu16le(eii->dpos+2);
570 de_dbg(c, "uid: %d, gid: %d", (int)uidnum, (int)gidnum);
573 // Extra field 0x7875
574 static void ef_infozip3(deark *c, lctx *d, struct extra_item_info_struct *eii)
576 i64 pos = eii->dpos;
577 i64 uidnum, gidnum;
578 u8 ver;
579 i64 endpos;
580 i64 sz;
582 endpos = pos+eii->dlen;
584 if(pos+1>endpos) return;
585 ver = de_getbyte_p(&pos);
586 de_dbg(c, "version: %d", (int)ver);
587 if(ver!=1) return;
589 if(pos+1>endpos) return;
590 sz = (i64)de_getbyte_p(&pos);
591 if(pos+sz>endpos) return;
592 uidnum = dbuf_getint_ext(c->infile, pos, (unsigned int)sz, 1, 0);
593 pos += sz;
595 if(pos+1>endpos) return;
596 sz = (i64)de_getbyte_p(&pos);
597 if(pos+sz>endpos) return;
598 gidnum = dbuf_getint_ext(c->infile, pos, (unsigned int)sz, 1, 0);
599 pos += sz;
601 de_dbg(c, "uid: %d, gid: %d", (int)uidnum, (int)gidnum);
604 // Extra field 0x000a
605 static void ef_ntfs(deark *c, lctx *d, struct extra_item_info_struct *eii)
607 i64 pos = eii->dpos;
608 i64 endpos;
609 i64 attr_tag;
610 i64 attr_size;
611 const char *name;
612 struct de_timestamp timestamp_tmp;
614 endpos = pos+eii->dlen;
615 pos += 4; // skip reserved field
617 while(1) {
618 if(pos+4>endpos) break;
619 attr_tag = de_getu16le_p(&pos);
620 attr_size = de_getu16le_p(&pos);
621 if(attr_tag==0x0001) name="NTFS filetimes";
622 else name="?";
623 de_dbg(c, "tag: 0x%04x (%s), dlen: %d", (unsigned int)attr_tag, name,
624 (int)attr_size);
625 if(pos+attr_size>endpos) break;
627 de_dbg_indent(c, 1);
628 if(attr_tag==0x0001 && attr_size>=24) {
629 read_FILETIME(c, d, pos, &timestamp_tmp, "mtime");
630 apply_timestamp(c, d, eii->md, DE_TIMESTAMPIDX_MODIFY, &timestamp_tmp, 90);
631 read_FILETIME(c, d, pos+8, &timestamp_tmp, "atime");
632 apply_timestamp(c, d, eii->md, DE_TIMESTAMPIDX_ACCESS, &timestamp_tmp, 90);
633 read_FILETIME(c, d, pos+16, &timestamp_tmp, "creation time");
634 apply_timestamp(c, d, eii->md, DE_TIMESTAMPIDX_CREATE, &timestamp_tmp, 90);
636 de_dbg_indent(c, -1);
638 pos += attr_size;
642 // Extra field 0x0009
643 static void ef_os2(deark *c, lctx *d, struct extra_item_info_struct *eii)
645 i64 pos = eii->dpos;
646 i64 endpos;
647 i64 ulen;
648 i64 cmpr_attr_size;
649 int cmpr_meth;
650 u32 crc_reported;
651 const struct cmpr_meth_info *cmi = NULL;
652 const char *name = "OS/2 ext. attr. data";
653 dbuf *attr_data = NULL;
654 de_module_params *mparams = NULL;
655 int ret;
657 endpos = pos+eii->dlen;
658 if(pos+4>endpos) goto done;
659 ulen = de_getu32le_p(&pos);
660 de_dbg(c, "uncmpr ext attr data size: %"I64_FMT, ulen);
661 if(eii->is_central) goto done;
663 if(pos+2>endpos) goto done;
664 cmpr_meth = (int)de_getu16le_p(&pos);
665 de_dbg(c, "ext attr cmpr method: %d", cmpr_meth);
667 if(pos+4>endpos) goto done;
668 crc_reported = (u32)de_getu32le_p(&pos);
669 de_dbg(c, "ext attr crc (reported): 0x%08x", (unsigned int)crc_reported);
671 cmpr_attr_size = endpos-pos;
672 de_dbg(c, "cmpr ext attr data at %"I64_FMT", len=%"I64_FMT, pos, cmpr_attr_size);
673 if(pos + cmpr_attr_size > endpos) goto done;
675 cmi = get_cmpr_meth_info(cmpr_meth);
676 if(cmpr_meth==6 || !is_compression_method_supported(d, cmi)) {
677 de_warn(c, "%s: Unsupported compression method: %d (%s)",
678 name, cmpr_meth, (cmi ? cmi->name : "?"));
679 goto done;
682 attr_data = dbuf_create_membuf(c, ulen, 0x1);
683 ret = do_decompress_attrib_data(c, d, pos, cmpr_attr_size,
684 attr_data, ulen, crc_reported, cmpr_meth, cmi, 0x1, name);
685 if(!ret) {
686 de_warn(c, "Failed to decompress %s", name);
687 goto done;
690 // attr_data contains an OS/2 extended attribute structure (FEA2LIST)
691 mparams = de_malloc(c, sizeof(de_module_params));
692 mparams->in_params.codes = "L";
693 de_dbg(c, "decoding OS/2 ext. attribs., unc. len=%"I64_FMT, attr_data->len);
694 de_dbg_indent(c, 1);
695 de_run_module_by_id_on_slice(c, "ea_data", mparams, attr_data, 0, attr_data->len);
696 de_dbg_indent(c, -1);
698 done:
699 dbuf_close(attr_data);
700 de_free(c, mparams);
703 // Extra field 0x2705 (ZipIt Macintosh 1.3.5+)
704 static void ef_zipitmac_2705(deark *c, lctx *d, struct extra_item_info_struct *eii)
706 struct de_fourcc sig;
707 struct de_fourcc filetype;
708 struct de_fourcc creator;
710 if(eii->dlen<4) goto done;
711 dbuf_read_fourcc(c->infile, eii->dpos, &sig, 4, 0x0);
712 de_dbg(c, "signature: '%s'", sig.id_dbgstr);
713 if(sig.id!=0x5a504954U) goto done; // expecting 'ZPIT'
714 if(eii->dlen<12) goto done;
715 dbuf_read_fourcc(c->infile, eii->dpos+4, &filetype, 4, 0x0);
716 de_dbg(c, "filetype: '%s'", filetype.id_dbgstr);
717 dbuf_read_fourcc(c->infile, eii->dpos+8, &creator, 4, 0x0);
718 de_dbg(c, "creator: '%s'", creator.id_dbgstr);
720 done:
724 // The time will be returned in the caller-supplied 'ts'
725 static void handle_mac_time(deark *c, lctx *d,
726 i64 mt_raw, i64 mt_offset,
727 struct de_timestamp *ts, const char *name)
729 char timestamp_buf[64];
730 de_mac_time_to_timestamp(mt_raw - mt_offset, ts);
731 ts->tzcode = DE_TZCODE_UTC;
732 de_dbg_timestamp_to_string(c, ts, timestamp_buf, sizeof(timestamp_buf), 0);
733 de_dbg(c, "%s: %"I64_FMT" %+"I64_FMT" (%s)", name,
734 mt_raw, -mt_offset, timestamp_buf);
737 // Extra field 0x334d (Info-ZIP Macintosh)
738 static void ef_infozipmac(deark *c, lctx *d, struct extra_item_info_struct *eii)
740 i64 pos = eii->dpos;
741 i64 dpos;
742 i64 ulen;
743 i64 cmpr_attr_size;
744 unsigned int flags;
745 int cmpr_meth;
746 const struct cmpr_meth_info *cmi = NULL;
747 struct de_fourcc filetype;
748 struct de_fourcc creator;
749 de_ucstring *flags_str = NULL;
750 dbuf *attr_data = NULL;
751 int ret;
752 i64 create_time_raw;
753 i64 create_time_offset;
754 i64 mod_time_raw;
755 i64 mod_time_offset;
756 i64 backup_time_raw;
757 i64 backup_time_offset;
758 struct de_timestamp tmp_timestamp;
759 int charset;
760 u32 crc_reported = 0;
761 UI dcflags = 0;
762 struct de_stringreaderdata *srd;
764 if(eii->dlen<14) goto done;
766 ulen = de_getu32le_p(&pos);
767 de_dbg(c, "uncmpr. finder attr. size: %d", (int)ulen);
769 flags = (unsigned int)de_getu16le_p(&pos);
770 flags_str = ucstring_create(c);
771 if(flags&0x0001) ucstring_append_flags_item(flags_str, "data_fork");
772 if(flags&0x0002) ucstring_append_flags_item(flags_str, "0x0002"); // something about the filename
773 ucstring_append_flags_item(flags_str,
774 (flags&0x0004)?"uncmpressed_attribute_data":"compressed_attribute_data");
775 if(flags&0x0008) ucstring_append_flags_item(flags_str, "64-bit_times");
776 if(flags&0x0010) ucstring_append_flags_item(flags_str, "no_timezone_offsets");
777 de_dbg(c, "flags: 0x%04x (%s)", flags, ucstring_getpsz(flags_str));
779 dbuf_read_fourcc(c->infile, pos, &filetype, 4, 0x0);
780 de_dbg(c, "filetype: '%s'", filetype.id_dbgstr);
781 pos += 4;
782 dbuf_read_fourcc(c->infile, pos, &creator, 4, 0x0);
783 de_dbg(c, "creator: '%s'", creator.id_dbgstr);
784 pos += 4;
786 if(eii->is_central) goto done;
788 if(flags&0x0004) { // Uncompressed attribute data
789 cmpr_meth = 0;
791 else {
792 dcflags |= 0x1; // CRC is known
793 cmpr_meth = (int)de_getu16le_p(&pos);
794 cmi = get_cmpr_meth_info(cmpr_meth);
795 de_dbg(c, "finder attr. cmpr. method: %d (%s)", cmpr_meth, (cmi ? cmi->name : "?"));
797 crc_reported = (u32)de_getu32le_p(&pos);
798 de_dbg(c, "finder attr. data crc (reported): 0x%08x", (UI)crc_reported);
801 // The rest of the data is Finder attribute data
802 cmpr_attr_size = eii->dpos+eii->dlen - pos;
803 de_dbg(c, "cmpr. finder attr. size: %d", (int)cmpr_attr_size);
804 if(ulen<1 || ulen>1000000) goto done;
806 // Type 6 (implode) compression won't work here, because it needs
807 // additional parameters seemingly not provided by the Finder attr data.
808 if(cmpr_meth==6 || !is_compression_method_supported(d, cmi)) {
809 de_warn(c, "Finder attribute data: Unsupported compression method: %d (%s)",
810 cmpr_meth, (cmi ? cmi->name : "?"));
811 goto done;
814 // Decompress and decode the Finder attribute data
815 attr_data = dbuf_create_membuf(c, ulen, 0x1);
816 ret = do_decompress_attrib_data(c, d, pos, cmpr_attr_size,
817 attr_data, ulen, crc_reported, cmpr_meth, cmi, dcflags, "finder attr. data");
818 if(!ret) {
819 de_warn(c, "Failed to decompress finder attribute data");
820 goto done;
823 dpos = 0;
824 dpos += 2; // Finder flags
825 dpos += 4; // Icon location
826 dpos += 2; // Folder
827 dpos += 16; // FXInfo
828 dpos += 1; // file version number
829 dpos += 1; // dir access rights
831 if(flags&0x0008) goto done; // We don't support 64-bit times
832 if(flags&0x0010) goto done; // We want timezone offsets
833 if(attr_data->len - dpos < 6*4) goto done;
835 create_time_raw = dbuf_getu32le_p(attr_data, &dpos);
836 mod_time_raw = dbuf_getu32le_p(attr_data, &dpos);
837 backup_time_raw = dbuf_getu32le_p(attr_data, &dpos);
838 create_time_offset = dbuf_geti32le(attr_data, dpos); dpos += 4;
839 mod_time_offset = dbuf_geti32le(attr_data, dpos); dpos += 4;
840 backup_time_offset = dbuf_geti32le(attr_data, dpos); dpos += 4;
842 handle_mac_time(c, d, create_time_raw, create_time_offset, &tmp_timestamp, "create time");
843 if(create_time_raw>0) {
844 apply_timestamp(c, d, eii->md, DE_TIMESTAMPIDX_CREATE, &tmp_timestamp, 40);
846 handle_mac_time(c, d, mod_time_raw, mod_time_offset, &tmp_timestamp, "mod time ");
847 if(mod_time_raw>0) {
848 apply_timestamp(c, d, eii->md, DE_TIMESTAMPIDX_MODIFY, &tmp_timestamp, 40);
850 handle_mac_time(c, d, backup_time_raw, backup_time_offset, &tmp_timestamp, "backup time");
851 if(backup_time_raw>0) {
852 apply_timestamp(c, d, eii->md, DE_TIMESTAMPIDX_BACKUP, &tmp_timestamp, 40);
855 // Expecting 2 bytes for charset, and at least 2 more for the 2 NUL-terminated
856 // strings that follow.
857 if(attr_data->len - dpos < 4) goto done;
859 charset = (int)dbuf_getu16le_p(attr_data, &dpos);
860 de_dbg(c, "charset for fullpath/comment: %d", charset);
862 // TODO: Can we use the correct encoding?
863 srd = dbuf_read_string(attr_data, dpos, attr_data->len-dpos, DE_DBG_MAX_STRLEN,
864 DE_CONVFLAG_STOP_AT_NUL, DE_ENCODING_ASCII);
865 de_dbg(c, "fullpath: \"%s\"", ucstring_getpsz(srd->str));
866 dpos += srd->bytes_consumed;
867 de_destroy_stringreaderdata(c, srd);
869 srd = dbuf_read_string(attr_data, dpos, attr_data->len-dpos, DE_DBG_MAX_STRLEN,
870 DE_CONVFLAG_STOP_AT_NUL, DE_ENCODING_ASCII);
871 de_dbg(c, "comment: \"%s\"", ucstring_getpsz(srd->str));
872 dpos += srd->bytes_consumed;
873 de_destroy_stringreaderdata(c, srd);
875 done:
876 ucstring_destroy(flags_str);
877 dbuf_close(attr_data);
880 // Acorn / SparkFS / RISC OS
881 static void ef_acorn(deark *c, lctx *d, struct extra_item_info_struct *eii)
883 i64 pos = eii->dpos;
884 struct de_riscos_file_attrs rfa;
886 if(eii->dlen<16) return;
887 if(dbuf_memcmp(c->infile, eii->dpos, "ARC0", 4)) {
888 de_dbg(c, "[unsupported Acorn extra-field type]");
889 return;
891 pos += 4;
893 de_zeromem(&rfa, sizeof(struct de_riscos_file_attrs));
894 fmtutil_riscos_read_load_exec(c, c->infile, &rfa, pos);
895 pos += 8;
896 if(rfa.mod_time.is_valid) {
897 apply_timestamp(c, d, eii->md, DE_TIMESTAMPIDX_MODIFY, &rfa.mod_time, 70);
900 fmtutil_riscos_read_attribs_field(c, c->infile, &rfa, pos, 0);
901 // Note: attribs does not have any information that we care about (no
902 // 'executable' or 'is-directory' flag).
905 struct extra_item_type_info_struct {
906 u16 id;
907 const char *name;
908 extrafield_decoder_fn fn;
910 static const struct extra_item_type_info_struct extra_item_type_info_arr[] = {
911 { 0x0001 /* */, "Zip64 extended information", ef_zip64extinfo },
912 { 0x0007 /* */, "AV Info", NULL },
913 { 0x0008 /* */, "extended language encoding data", NULL },
914 { 0x0009 /* */, "OS/2", ef_os2 },
915 { 0x000a /* */, "NTFS", ef_ntfs },
916 { 0x000c /* */, "OpenVMS", NULL },
917 { 0x000d /* */, "Unix", NULL },
918 { 0x000e /* */, "file stream and fork descriptors", NULL },
919 { 0x000f /* */, "Patch Descriptor", NULL },
920 { 0x0014 /* */, "PKCS#7 Store for X.509 Certificates", NULL },
921 { 0x0015 /* */, "X.509 Certificate ID and Signature for individual file", NULL },
922 { 0x0016 /* */, "X.509 Certificate ID for Central Directory", NULL },
923 { 0x0017 /* */, "Strong Encryption Header", NULL },
924 { 0x0018 /* */, "Record Management Controls", NULL },
925 { 0x0019 /* */, "PKCS#7 Encryption Recipient Certificate List", NULL },
926 { 0x0021 /* */, "Policy Decryption Key", NULL },
927 { 0x0022 /* */, "Smartcrypt Key Provider", NULL },
928 { 0x0023 /* */, "Smartcrypt Policy Key Data", NULL },
929 { 0x0065 /* */, "IBM S/390 (Z390), AS/400 (I400) attributes", NULL },
930 { 0x0066 /* */, "IBM S/390 (Z390), AS/400 (I400) attributes - compressed", NULL },
931 { 0x07c8 /* */, "Macintosh", NULL },
932 { 0x2605 /* */, "ZipIt Macintosh", NULL },
933 { 0x2705 /* */, "ZipIt Macintosh 1.3.5+", ef_zipitmac_2705 },
934 { 0x2805 /* */, "ZipIt Macintosh 1.3.5+", NULL },
935 { 0x334d /* M3 */, "Info-ZIP Macintosh", ef_infozipmac },
936 { 0x4154 /* TA */, "Tandem NSK", NULL },
937 { 0x4341 /* AC */, "Acorn/SparkFS", ef_acorn },
938 { 0x4453 /* SE */, "Windows NT security descriptor (binary ACL)", NULL },
939 { 0x4690 /* */, "POSZIP 4690", NULL },
940 { 0x4704 /* */, "VM/CMS", NULL },
941 { 0x470f /* */, "MVS", NULL },
942 { 0x4854 /* TH */, "Theos, old unofficial port", NULL }, // unzip:extrafld.txt says "inofficial"
943 { 0x4b46 /* FK */, "FWKCS MD5", NULL },
944 { 0x4c41 /* AL */, "OS/2 access control list (text ACL)", NULL },
945 { 0x4d49 /* IM */, "Info-ZIP OpenVMS", NULL },
946 { 0x4d63 /* cM */, "Macintosh SmartZIP", NULL },
947 { 0x4f4c /* LO */, "Xceed original location", NULL },
948 { 0x5350 /* PS */, "Psion?", NULL }, // observed in some Psion files
949 { 0x5356 /* VS */, "AOS/VS (ACL)", NULL },
950 { 0x5455 /* UT */, "extended timestamp", ef_extended_timestamp },
951 { 0x554e /* NU */, "Xceed unicode", NULL },
952 { 0x5855 /* UX */, "Info-ZIP Unix, first version", ef_infozip1 },
953 { 0x6375 /* uc */, "Info-ZIP Unicode Comment", NULL },
954 { 0x6542 /* Be */, "BeOS/BeBox", NULL },
955 { 0x6854 /* Th */, "Theos", NULL },
956 { 0x7075 /* up */, "Info-ZIP Unicode Path", ef_unicodepath },
957 { 0x7441 /* At */, "AtheOS", NULL },
958 { 0x756e /* nu */, "ASi Unix", NULL },
959 { 0x7855 /* Ux */, "Info-ZIP Unix, second version", ef_infozip2 },
960 { 0x7875 /* ux */, "Info-ZIP Unix, third version", ef_infozip3 },
961 { 0xa220 /* */, "Microsoft Open Packaging Growth Hint", NULL },
962 { 0xfb4a /* */, "SMS/QDOS", NULL }, // according to Info-ZIP zip 3.0
963 { 0xfd4a /* */, "SMS/QDOS", NULL } // according to ZIP v6.3.4 APPNOTE
966 static const struct extra_item_type_info_struct *get_extra_item_type_info(i64 id)
968 static const struct extra_item_type_info_struct default_ei =
969 { 0, "?", NULL };
970 size_t i;
972 for(i=0; i<DE_ARRAYCOUNT(extra_item_type_info_arr); i++) {
973 if(id == (i64)extra_item_type_info_arr[i].id) {
974 return &extra_item_type_info_arr[i];
977 return &default_ei;
980 static void do_extra_data(deark *c, lctx *d,
981 struct member_data *md, struct dir_entry_data *dd,
982 i64 pos1, i64 len, int is_central)
984 i64 pos;
986 de_dbg(c, "extra data at %"I64_FMT", len=%d", pos1, (int)len);
987 de_dbg_indent(c, 1);
989 pos = pos1;
990 while(1) {
991 struct extra_item_info_struct eii;
993 if(pos+4 >= pos1+len) break;
994 de_zeromem(&eii, sizeof(struct extra_item_info_struct));
995 eii.md = md;
996 eii.dd = dd;
997 eii.is_central = is_central;
998 eii.dpos = pos+4;
1000 eii.id = (u32)de_getu16le(pos);
1001 eii.dlen = de_getu16le(pos+2);
1003 eii.eiti = get_extra_item_type_info(eii.id);
1005 de_dbg(c, "item id=0x%04x (%s), dlen=%d", (unsigned int)eii.id, eii.eiti->name,
1006 (int)eii.dlen);
1007 if(pos+4+eii.dlen > pos1+len) break;
1009 if(eii.eiti->fn) {
1010 de_dbg_indent(c, 1);
1011 eii.eiti->fn(c, d, &eii);
1012 de_dbg_indent(c, -1);
1015 pos += 4+eii.dlen;
1018 de_dbg_indent(c, -1);
1021 static void do_extract_file(deark *c, lctx *d, struct member_data *md)
1023 dbuf *outf = NULL;
1024 de_finfo *fi = NULL;
1025 struct dir_entry_data *ldd = &md->local_dir_entry_data;
1026 int tsidx;
1027 int saved_indent_level;
1029 de_dbg_indent_save(c, &saved_indent_level);
1030 de_dbg(c, "file data at %"I64_FMT", len=%"I64_FMT, md->file_data_pos,
1031 md->cmpr_size);
1032 de_dbg_indent(c, 1);
1034 if(ldd->bit_flags & 0x1) {
1035 de_err(c, "%s: Encryption is not supported", ucstring_getpsz_d(ldd->fname));
1036 goto done;
1039 if(!is_compression_method_supported(d, ldd->cmi)) {
1040 de_err(c, "%s: Unsupported compression method: %d (%s)",
1041 ucstring_getpsz_d(ldd->fname),
1042 ldd->cmpr_meth, (ldd->cmi ? ldd->cmi->name : "?"));
1043 goto done;
1046 if(md->file_data_pos+md->cmpr_size > c->infile->len) {
1047 de_err(c, "%s: Data goes beyond end of file", ucstring_getpsz_d(ldd->fname));
1048 goto done;
1051 if(md->is_symlink) {
1052 de_warn(c, "\"%s\" is a symbolic link. It will not be extracted as a link.",
1053 ucstring_getpsz_d(ldd->fname));
1056 fi = de_finfo_create(c);
1057 fi->detect_root_dot_dir = 1;
1059 if(ucstring_isnonempty(ldd->fname)) {
1060 unsigned int snflags = DE_SNFLAG_FULLPATH;
1061 if(md->is_dir) snflags |= DE_SNFLAG_STRIPTRAILINGSLASH;
1062 de_finfo_set_name_from_ucstring(c, fi, ldd->fname, snflags);
1063 fi->original_filename_flag = 1;
1066 for(tsidx=0; tsidx<DE_TIMESTAMPIDX_COUNT; tsidx++) {
1067 if(md->tsdata[tsidx].ts.is_valid) {
1068 fi->timestamp[tsidx] = md->tsdata[tsidx].ts;
1072 if(md->is_dir) {
1073 fi->is_directory = 1;
1075 else if(md->is_executable) {
1076 fi->mode_flags |= DE_MODEFLAG_EXE;
1078 else if(md->is_nonexecutable) {
1079 fi->mode_flags |= DE_MODEFLAG_NONEXE;
1082 outf = dbuf_create_output_file(c, NULL, fi, 0);
1083 if(md->is_dir) {
1084 goto done;
1087 (void)do_decompress_member(c, d, md, outf);
1089 done:
1090 dbuf_close(outf);
1091 de_finfo_destroy(c, fi);
1092 de_dbg_indent_restore(c, saved_indent_level);
1095 static const char *get_platform_name(unsigned int ver_hi)
1097 static const char *pltf_names[20] = {
1098 "MS-DOS, etc.", "Amiga", "OpenVMS", "Unix",
1099 "VM/CMS", "Atari ST", "HPFS", "Macintosh",
1100 "Z-System", "CP/M", "NTFS or TOPS-20", "MVS or NTFS",
1101 "VSE or SMS/QDOS", "Acorn RISC OS", "VFAT", "MVS",
1102 "BeOS", "Tandem", "OS/400", "OS X" };
1104 if(ver_hi<20)
1105 return pltf_names[ver_hi];
1106 if(ver_hi==30) return "AtheOS/Syllable";
1107 return "?";
1110 // Look at the attributes, and set some other fields based on them.
1111 static void process_ext_attr(deark *c, lctx *d, struct member_data *md)
1113 if(d->using_scanmode) {
1114 // In this mode, there is no 'external attribs' field.
1115 return;
1118 if(md->ver_made_by_hi==3) { // Unix
1119 unsigned int unix_filetype;
1120 unix_filetype = (md->attr_e>>16)&0170000;
1121 if(unix_filetype == 0040000) {
1122 md->is_dir = 1;
1124 else if(unix_filetype == 0120000) {
1125 md->is_symlink = 1;
1128 if((md->attr_e>>16)&0111) {
1129 md->is_executable = 1;
1131 else {
1132 md->is_nonexecutable = 1;
1136 // MS-DOS-style attributes.
1137 // Technically, we should only do this if
1138 // md->central_dir_entry_data.ver_made_by_hi==0.
1139 // However, most(?) zip programs set the low byte of the external attribs
1140 // to the equivalent MS-DOS attribs, at least in cases where it matters.
1141 if(md->attr_e & 0x10) {
1142 md->is_dir = 1;
1145 // TODO: Support more platforms.
1146 // TODO: The 0x756e (ASi Unix) extra field might be important, as it contains
1147 // file permissions.
1149 if(md->is_dir && md->uncmpr_size!=0) {
1150 // I'd expect a subdirectory entry to have zero size. If it doesn't,
1151 // let's just assume we misidentified it as a subdirectory, and
1152 // extract its data.
1153 md->is_dir = 0;
1157 static void describe_internal_attr(deark *c, struct member_data *md,
1158 de_ucstring *s)
1160 unsigned int bf = md->attr_i;
1162 if(bf & 0x0001) {
1163 ucstring_append_flags_item(s, "text file");
1164 bf -= 0x0001;
1167 if(bf!=0) { // Report any unrecognized flags
1168 ucstring_append_flags_itemf(s, "0x%04x", bf);
1172 // Uses dd->bit_flags, dd->cmpr_method
1173 static void describe_general_purpose_bit_flags(deark *c, struct dir_entry_data *dd,
1174 de_ucstring *s)
1176 const char *name;
1177 unsigned int bf = dd->bit_flags;
1179 if(bf & 0x0001) {
1180 ucstring_append_flags_item(s, "encrypted");
1181 bf -= 0x0001;
1184 if(dd->cmpr_meth==6) { // implode
1185 if(bf & 0x0002) {
1186 name = "8K";
1187 bf -= 0x0002;
1189 else {
1190 name = "4K";
1192 ucstring_append_flags_itemf(s, "%s sliding dictionary", name);
1194 if(bf & 0x0004) {
1195 name = "3";
1196 bf -= 0x0004;
1198 else {
1199 name = "2";
1201 ucstring_append_flags_itemf(s, "%s trees", name);
1204 if(dd->cmpr_meth==8 || dd->cmpr_meth==9) { // deflate flags
1205 unsigned int code;
1207 code = (bf & 0x0006)>>1;
1208 switch(code) {
1209 case 1: name="max"; break;
1210 case 2: name="fast"; break;
1211 case 3: name="super_fast"; break;
1212 default: name="normal";
1214 ucstring_append_flags_itemf(s, "cmprlevel=%s", name);
1215 bf -= (bf & 0x0006);
1218 if(bf & 0x0008) {
1219 ucstring_append_flags_item(s, "uses data descriptor");
1220 bf -= 0x0008;
1223 if(bf & 0x0800) {
1224 ucstring_append_flags_item(s, "UTF-8");
1225 bf -= 0x0800;
1228 if(bf!=0) { // Report any unrecognized flags
1229 ucstring_append_flags_itemf(s, "0x%04x", bf);
1233 // Read either a central directory entry (a.k.a. central directory file header),
1234 // or a local file header.
1235 static int do_file_header(deark *c, lctx *d, struct member_data *md,
1236 int is_central, i64 pos1, i64 *p_entry_size)
1238 i64 pos;
1239 u32 sig;
1240 i64 fn_len, extra_len, comment_len;
1241 int utf8_flag;
1242 int retval = 0;
1243 i64 fixed_header_size;
1244 i64 mod_time_raw, mod_date_raw;
1245 struct dir_entry_data *dd; // Points to either md->central or md->local
1246 de_ucstring *descr = NULL;
1247 struct de_timestamp dos_timestamp;
1248 char timestamp_buf[64];
1250 pos = pos1;
1251 descr = ucstring_create(c);
1252 if(is_central) {
1253 dd = &md->central_dir_entry_data;
1254 fixed_header_size = 46;
1255 de_dbg(c, "central dir entry at %"I64_FMT, pos);
1257 else {
1258 dd = &md->local_dir_entry_data;
1259 fixed_header_size = 30;
1260 if(md->disk_number_start!=d->this_disk_num) {
1261 de_err(c, "Member file not in this ZIP file");
1262 return 0;
1264 de_dbg(c, "local file header at %"I64_FMT, pos);
1266 de_dbg_indent(c, 1);
1268 sig = (u32)de_getu32le_p(&pos);
1269 if(is_central && sig!=CODE_PK12) {
1270 de_err(c, "Central dir file header not found at %"I64_FMT, pos1);
1271 goto done;
1273 else if(!is_central && sig!=CODE_PK34) {
1274 de_err(c, "Local file header not found at %"I64_FMT, pos1);
1275 goto done;
1278 if(is_central) {
1279 md->ver_made_by = (unsigned int)de_getu16le_p(&pos);
1280 md->ver_made_by_hi = (unsigned int)((md->ver_made_by&0xff00)>>8);
1281 md->ver_made_by_lo = (unsigned int)(md->ver_made_by&0x00ff);
1282 de_dbg(c, "version made by: platform=%u (%s), ZIP spec=%u.%u",
1283 md->ver_made_by_hi, get_platform_name(md->ver_made_by_hi),
1284 (unsigned int)(md->ver_made_by_lo/10), (unsigned int)(md->ver_made_by_lo%10));
1287 dd->ver_needed = (unsigned int)de_getu16le_p(&pos);
1288 dd->ver_needed_hi = (unsigned int)((dd->ver_needed&0xff00)>>8);
1289 dd->ver_needed_lo = (unsigned int)(dd->ver_needed&0x00ff);
1290 de_dbg(c, "version needed to extract: platform=%u (%s), ZIP spec=%u.%u",
1291 dd->ver_needed_hi, get_platform_name(dd->ver_needed_hi),
1292 (unsigned int)(dd->ver_needed_lo/10), (unsigned int)(dd->ver_needed_lo%10));
1294 dd->bit_flags = (unsigned int)de_getu16le_p(&pos);
1295 dd->cmpr_meth = (int)de_getu16le_p(&pos);
1296 dd->cmi = get_cmpr_meth_info(dd->cmpr_meth);
1298 utf8_flag = (dd->bit_flags & 0x800)?1:0;
1299 ucstring_empty(descr);
1300 describe_general_purpose_bit_flags(c, dd, descr);
1301 de_dbg(c, "flags: 0x%04x (%s)", dd->bit_flags, ucstring_getpsz(descr));
1303 de_dbg(c, "cmpr method: %d (%s)", dd->cmpr_meth,
1304 (dd->cmi ? dd->cmi->name : "?"));
1306 mod_time_raw = de_getu16le_p(&pos);
1307 mod_date_raw = de_getu16le_p(&pos);
1308 de_dos_datetime_to_timestamp(&dos_timestamp, mod_date_raw, mod_time_raw);
1309 dos_timestamp.tzcode = DE_TZCODE_LOCAL;
1310 de_dbg_timestamp_to_string(c, &dos_timestamp, timestamp_buf, sizeof(timestamp_buf), 0);
1311 de_dbg(c, "mod time: %s", timestamp_buf);
1312 apply_timestamp(c, d, md, DE_TIMESTAMPIDX_MODIFY, &dos_timestamp, 10);
1314 dd->crc_reported = (u32)de_getu32le_p(&pos);
1315 de_dbg(c, "crc (reported): 0x%08x", (unsigned int)dd->crc_reported);
1317 dd->cmpr_size = de_getu32le_p(&pos);
1318 dd->uncmpr_size = de_getu32le_p(&pos);
1319 de_dbg(c, "cmpr size: %" I64_FMT ", uncmpr size: %" I64_FMT, dd->cmpr_size, dd->uncmpr_size);
1321 fn_len = de_getu16le_p(&pos);
1323 extra_len = de_getu16le_p(&pos);
1325 if(is_central) {
1326 comment_len = de_getu16le_p(&pos);
1328 else {
1329 comment_len = 0;
1332 if(!is_central) {
1333 md->file_data_pos = pos + fn_len + extra_len;
1336 if(is_central) {
1337 md->disk_number_start = de_getu16le_p(&pos);
1339 md->attr_i = (unsigned int)de_getu16le_p(&pos);
1340 ucstring_empty(descr);
1341 describe_internal_attr(c, md, descr);
1342 de_dbg(c, "internal file attributes: 0x%04x (%s)", md->attr_i,
1343 ucstring_getpsz(descr));
1345 md->attr_e = (unsigned int)de_getu32le_p(&pos);
1346 de_dbg(c, "external file attributes: 0x%08x", md->attr_e);
1347 de_dbg_indent(c, 1);
1350 // The low byte is, AFAIK, *almost* universally used for MS-DOS-style
1351 // attributes.
1352 unsigned int dos_attrs = (md->attr_e & 0xff);
1353 ucstring_empty(descr);
1354 de_describe_dos_attribs(c, dos_attrs, descr, 0);
1355 de_dbg(c, "%sMS-DOS attribs: 0x%02x (%s)",
1356 (md->ver_made_by_hi==0)?"":"(hypothetical) ",
1357 dos_attrs, ucstring_getpsz(descr));
1360 if((md->attr_e>>16) != 0) {
1361 // A number of platforms put Unix-style file attributes here, so
1362 // decode them as such whenever they are nonzero.
1363 de_dbg(c, "%sUnix attribs: octal(%06o)",
1364 (md->ver_made_by_hi==3)?"":"(hypothetical) ",
1365 (unsigned int)(md->attr_e>>16));
1368 de_dbg_indent(c, -1);
1370 md->offset_of_local_header = de_getu32le_p(&pos);
1371 de_dbg(c, "offset of local header: %"I64_FMT", disk: %d", md->offset_of_local_header,
1372 (int)md->disk_number_start);
1375 if(is_central) {
1376 de_dbg(c, "filename_len: %d, extra_len: %d, comment_len: %d", (int)fn_len,
1377 (int)extra_len, (int)comment_len);
1379 else {
1380 de_dbg(c, "filename_len: %d, extra_len: %d", (int)fn_len,
1381 (int)extra_len);
1384 *p_entry_size = fixed_header_size + fn_len + extra_len + comment_len;
1386 dd->main_fname_pos = pos1+fixed_header_size;
1387 dd->main_fname_len = fn_len;
1388 do_read_filename(c, d, md, dd, pos1+fixed_header_size, fn_len, utf8_flag);
1390 if(extra_len>0) {
1391 do_extra_data(c, d, md, dd, pos1+fixed_header_size+fn_len, extra_len, is_central);
1394 if(comment_len>0) {
1395 do_comment(c, d, pos1+fixed_header_size+fn_len+extra_len, comment_len, utf8_flag,
1396 "member file comment", "fcomment.txt");
1399 if(is_central) {
1400 if(d->used_offset_discrepancy) {
1401 md->offset_of_local_header += d->offset_discrepancy;
1402 de_dbg(c, "assuming local header is really at %"I64_FMT, md->offset_of_local_header);
1404 else if(d->offset_discrepancy!=0) {
1405 u32 sig1, sig2;
1406 i64 alt_pos;
1408 sig1 = (u32)de_getu32le(md->offset_of_local_header);
1409 if(sig1!=CODE_PK34) {
1410 alt_pos = md->offset_of_local_header + d->offset_discrepancy;
1411 sig2 = (u32)de_getu32le(alt_pos);
1412 if(sig2==CODE_PK34) {
1413 de_warn(c, "Local file header found at %"I64_FMT" instead of %"I64_FMT". "
1414 "Assuming offsets are wrong by %"I64_FMT" bytes.",
1415 alt_pos, md->offset_of_local_header, d->offset_discrepancy);
1416 md->offset_of_local_header += d->offset_discrepancy;
1417 d->used_offset_discrepancy = 1;
1423 retval = 1;
1425 done:
1426 de_dbg_indent(c, -1);
1427 ucstring_destroy(descr);
1428 return retval;
1431 static struct member_data *create_member_data(deark *c, lctx *d)
1433 struct member_data *md;
1435 md = de_malloc(c, sizeof(struct member_data));
1436 md->local_dir_entry_data.fname = ucstring_create(c);
1437 md->central_dir_entry_data.fname = ucstring_create(c);
1438 return md;
1441 static void destroy_member_data(deark *c, struct member_data *md)
1443 if(!md) return;
1444 ucstring_destroy(md->central_dir_entry_data.fname);
1445 ucstring_destroy(md->local_dir_entry_data.fname);
1446 de_free(c, md);
1449 static i32 ucstring_lastchar(de_ucstring *s)
1451 if(!s || s->len<1) return 0;
1452 return s->str[s->len-1];
1455 // Things to do after both the central and local headers have been read.
1456 // E.g., extract the file.
1457 static int do_process_member(deark *c, lctx *d, struct member_data *md)
1459 int retval = 0;
1461 // If for some reason we have a central-dir filename but not a local-dir
1462 // filename, use the central-dir filename.
1463 if(ucstring_isempty(md->local_dir_entry_data.fname) &&
1464 ucstring_isnonempty(md->central_dir_entry_data.fname))
1466 ucstring_append_ucstring(md->local_dir_entry_data.fname,
1467 md->central_dir_entry_data.fname);
1470 // Set the final file size and crc fields.
1471 if(md->local_dir_entry_data.bit_flags & 0x0008) {
1472 if(d->using_scanmode) {
1473 de_err(c, "File is incompatible with scan mode");
1474 goto done;
1477 // Indicates that certain fields are not present in the local file header,
1478 // and are instead in a "data descriptor" after the file data.
1479 // Let's hope they are also in the central file header.
1480 md->cmpr_size = md->central_dir_entry_data.cmpr_size;
1481 md->uncmpr_size = md->central_dir_entry_data.uncmpr_size;
1482 md->crc_reported = md->central_dir_entry_data.crc_reported;
1484 else {
1485 md->cmpr_size = md->local_dir_entry_data.cmpr_size;
1486 md->uncmpr_size = md->local_dir_entry_data.uncmpr_size;
1487 md->crc_reported = md->local_dir_entry_data.crc_reported;
1490 process_ext_attr(c, d, md);
1492 // In some cases, detect directories by checking whether the filename ends
1493 // with a slash.
1494 if(!md->is_dir && md->uncmpr_size==0 &&
1495 (d->using_scanmode || (md->ver_made_by_lo<20)))
1497 if(ucstring_lastchar(md->local_dir_entry_data.fname) == '/') {
1498 de_dbg(c, "[assuming this is a subdirectory]");
1499 md->is_dir = 1;
1503 do_extract_file(c, d, md);
1504 retval = 1;
1506 done:
1507 return retval;
1510 // In *entry_size, returns the size of the central dir entry.
1511 // Returns 0 if the central dir entry could not even be parsed.
1512 static int do_member_from_central_dir_entry(deark *c, lctx *d,
1513 struct member_data *md, i64 central_index, i64 pos, i64 *entry_size)
1515 i64 tmp_entry_size;
1516 int retval = 0;
1517 int saved_indent_level;
1519 de_dbg_indent_save(c, &saved_indent_level);
1521 *entry_size = 0;
1523 if(pos >= d->central_dir_offset+d->central_dir_byte_size) {
1524 goto done;
1527 de_dbg(c, "central dir entry #%d", (int)central_index);
1528 de_dbg_indent(c, 1);
1530 // Read the central dir file header
1531 if(!do_file_header(c, d, md, 1, pos, entry_size)) {
1532 goto done;
1535 // If we were able to read the central dir file header, we might be able
1536 // to continue and read more files, even if the local file header fails.
1537 retval = 1;
1539 // Read the local file header
1540 if(!do_file_header(c, d, md, 0, md->offset_of_local_header, &tmp_entry_size)) {
1541 goto done;
1544 do_process_member(c, d, md);
1546 done:
1547 de_dbg_indent_restore(c, saved_indent_level);
1548 return retval;
1551 static int do_central_dir_entry(deark *c, lctx *d,
1552 i64 central_index, i64 pos, i64 *entry_size)
1554 struct member_data *md = NULL;
1555 int ret;
1557 md = create_member_data(c, d);
1558 ret = do_member_from_central_dir_entry(c, d, md, central_index, pos, entry_size);
1559 destroy_member_data(c, md);
1560 return ret;
1563 static int do_local_dir_only(deark *c, lctx *d, i64 pos1, i64 *pmember_size)
1565 struct member_data *md = NULL;
1566 i64 tmp_entry_size;
1567 int retval = 0;
1569 md = create_member_data(c, d);
1571 md->offset_of_local_header = pos1;
1573 // Read the local file header
1574 if(!do_file_header(c, d, md, 0, md->offset_of_local_header, &tmp_entry_size)) {
1575 goto done;
1578 if(!do_process_member(c, d, md)) goto done;
1580 *pmember_size = md->file_data_pos + md->cmpr_size - pos1;
1581 retval = 1;
1583 done:
1584 destroy_member_data(c, md);
1585 return retval;
1588 static void de_run_zip_scanmode(deark *c, lctx *d)
1590 i64 pos = 0;
1592 d->using_scanmode = 1;
1594 while(1) {
1595 int ret;
1596 i64 foundpos = 0;
1597 i64 member_size = 0;
1599 if(pos > c->infile->len-4) break;
1600 ret = dbuf_search(c->infile, g_zipsig34, 4, pos, c->infile->len-pos, &foundpos);
1601 if(!ret) break;
1602 pos = foundpos;
1603 de_dbg(c, "zip member at %"I64_FMT, pos);
1604 de_dbg_indent(c, 1);
1605 ret = do_local_dir_only(c, d, pos, &member_size);
1606 de_dbg_indent(c, -1);
1607 if(!ret) break;
1608 if(member_size<1) break;
1609 pos += member_size;
1613 static int do_central_dir(deark *c, lctx *d)
1615 i64 i;
1616 i64 pos;
1617 i64 entry_size;
1618 int retval = 0;
1620 pos = d->central_dir_offset;
1621 de_dbg(c, "central dir at %"I64_FMT, pos);
1622 de_dbg_indent(c, 1);
1624 for(i=0; i<d->central_dir_num_entries; i++) {
1625 if(!do_central_dir_entry(c, d, i, pos, &entry_size)) {
1626 // TODO: Decide exactly what to do if something fails.
1627 goto done;
1629 pos += entry_size;
1631 retval = 1;
1633 done:
1634 de_dbg_indent(c, -1);
1635 return retval;
1638 static int do_zip64_eocd(deark *c, lctx *d)
1640 i64 pos;
1641 i64 n;
1642 int retval = 0;
1643 int saved_indent_level;
1644 UI ver, ver_hi, ver_lo;
1646 de_dbg_indent_save(c, &saved_indent_level);
1648 if(d->zip64_eocd_disknum!=0) {
1649 de_warn(c, "This might be a multi-disk Zip64 archive, which is not supported");
1650 retval = 1;
1651 d->is_zip64 = 0;
1652 goto done;
1655 pos = d->zip64_eocd_pos;
1656 if(dbuf_memcmp(c->infile, pos, g_zipsig66, 4)) {
1657 de_warn(c, "Expected Zip64 end-of-central-directory record not found at %"I64_FMT, pos);
1658 retval = 1; // Maybe the eocd locator sig was a false positive?
1659 d->is_zip64 = 0;
1660 goto done;
1663 de_dbg(c, "zip64 end-of-central-dir record at %"I64_FMT, pos);
1664 pos += 4;
1665 de_dbg_indent(c, 1);
1667 n = de_geti64le(pos); pos += 8;
1668 de_dbg(c, "size of zip64 eocd record: (12+)%"I64_FMT, n);
1670 ver = (UI)de_getu16le_p(&pos);
1671 ver_hi = (ver&0xff00)>>8;
1672 ver_lo = ver&0x00ff;
1673 de_dbg(c, "version made by: platform=%u (%s), ZIP spec=%u.%u",
1674 ver_hi, get_platform_name(ver_hi), (UI)(ver_lo/10), (UI)(ver_lo%10));
1676 ver = (UI)de_getu16le_p(&pos);
1677 ver_hi = (ver&0xff00)>>8;
1678 ver_lo = ver&0x00ff;
1679 de_dbg(c, "version needed: platform=%u (%s), ZIP spec=%u.%u",
1680 ver_hi, get_platform_name(ver_hi), (UI)(ver_lo/10), (UI)(ver_lo%10));
1682 n = de_getu32le_p(&pos);
1683 de_dbg(c, "this disk num: %"I64_FMT, n);
1685 d->zip64_cd_disknum = (unsigned int)de_getu32le_p(&pos);
1686 d->zip64_num_centr_dir_entries_this_disk = de_geti64le(pos); pos += 8;
1687 de_dbg(c, "central dir num entries on this disk: %"I64_FMT, d->zip64_num_centr_dir_entries_this_disk);
1688 d->zip64_num_centr_dir_entries_total = de_geti64le(pos); pos += 8;
1689 de_dbg(c, "central dir num entries: %"I64_FMT, d->zip64_num_centr_dir_entries_total);
1690 d->zip64_centr_dir_byte_size = de_geti64le(pos); pos += 8;
1691 de_dbg(c, "central dir size: %"I64_FMT, d->zip64_centr_dir_byte_size);
1692 d->zip64_cd_pos = de_geti64le(pos); pos += 8;
1693 de_dbg(c, "central dir offset: %"I64_FMT", disk: %u",
1694 d->zip64_cd_pos, d->zip64_cd_disknum);
1696 retval = 1;
1697 done:
1698 de_dbg_indent_restore(c, saved_indent_level);
1699 return retval;
1702 static void do_zip64_eocd_locator(deark *c, lctx *d)
1704 i64 n;
1705 i64 pos = d->end_of_central_dir_pos - 20;
1707 if(dbuf_memcmp(c->infile, pos, g_zipsig67, 4)) {
1708 return;
1710 de_dbg(c, "zip64 eocd locator found at %"I64_FMT, pos);
1711 pos += 4;
1712 d->is_zip64 = 1;
1713 de_dbg_indent(c, 1);
1714 d->zip64_eocd_disknum = (unsigned int)de_getu32le_p(&pos);
1715 d->zip64_eocd_pos = de_geti64le(pos); pos += 8;
1716 de_dbg(c, "offset of zip64 eocd: %"I64_FMT", disk: %u",
1717 d->zip64_eocd_pos, d->zip64_eocd_disknum);
1718 n = de_getu32le_p(&pos);
1719 de_dbg(c, "total number of disks: %u", (unsigned int)n);
1720 de_dbg_indent(c, -1);
1723 static int do_end_of_central_dir(deark *c, lctx *d)
1725 i64 pos;
1726 i64 num_entries_this_disk;
1727 i64 disk_num_with_central_dir_start;
1728 i64 comment_length;
1729 i64 alt_central_dir_offset;
1730 int retval = 0;
1732 pos = d->end_of_central_dir_pos;
1733 de_dbg(c, "end-of-central-dir record at %"I64_FMT, pos);
1734 de_dbg_indent(c, 1);
1736 d->this_disk_num = de_getu16le(pos+4);
1737 de_dbg(c, "this disk num: %"I64_FMT, d->this_disk_num);
1738 disk_num_with_central_dir_start = de_getu16le(pos+6);
1740 num_entries_this_disk = de_getu16le(pos+8);
1741 de_dbg(c, "central dir num entries on this disk: %"I64_FMT, num_entries_this_disk);
1742 if(d->is_zip64 && (num_entries_this_disk==0xffff)) {
1743 num_entries_this_disk = d->zip64_num_centr_dir_entries_this_disk;
1746 d->central_dir_num_entries = de_getu16le(pos+10);
1747 d->central_dir_byte_size = de_getu32le(pos+12);
1748 d->central_dir_offset = de_getu32le(pos+16);
1749 de_dbg(c, "central dir num entries: %"I64_FMT, d->central_dir_num_entries);
1750 if(d->is_zip64 && (d->central_dir_num_entries==0xffff)) {
1751 d->central_dir_num_entries = d->zip64_num_centr_dir_entries_total;
1754 de_dbg(c, "central dir size: %"I64_FMT, d->central_dir_byte_size);
1755 if(d->is_zip64 && (d->central_dir_byte_size==0xffffffffLL)) {
1756 d->central_dir_byte_size = d->zip64_centr_dir_byte_size;
1759 de_dbg(c, "central dir offset: %"I64_FMT", disk: %"I64_FMT, d->central_dir_offset,
1760 disk_num_with_central_dir_start);
1761 if(d->is_zip64 && (d->central_dir_offset==0xffffffffLL)) {
1762 d->central_dir_offset = d->zip64_cd_pos;
1765 comment_length = de_getu16le(pos+20);
1766 de_dbg(c, "comment length: %d", (int)comment_length);
1767 if(comment_length>0) {
1768 // The comment for the whole .ZIP file presumably has to use
1769 // cp437 encoding. There's no flag that could indicate otherwise.
1770 do_comment(c, d, pos+22, comment_length, 0,
1771 "ZIP file comment", "comment.txt");
1774 // TODO: Figure out exactly how to detect disk spanning.
1775 if(disk_num_with_central_dir_start!=d->this_disk_num ||
1776 (d->is_zip64 && d->zip64_eocd_disknum!=d->this_disk_num))
1778 de_err(c, "Disk spanning not supported");
1779 goto done;
1782 if(d->this_disk_num!=0) {
1783 de_warn(c, "This ZIP file might be part of a multi-part archive, and "
1784 "might not be supported correctly");
1787 if(num_entries_this_disk!=d->central_dir_num_entries) {
1788 de_warn(c, "This ZIP file might not be supported correctly "
1789 "(number-of-entries-this-disk=%d, number-of-entries-total=%d)",
1790 (int)num_entries_this_disk, (int)d->central_dir_num_entries);
1793 alt_central_dir_offset =
1794 (d->is_zip64 ? d->zip64_eocd_pos : d->end_of_central_dir_pos) -
1795 d->central_dir_byte_size;
1797 if(alt_central_dir_offset != d->central_dir_offset) {
1798 u32 sig;
1800 de_warn(c, "Inconsistent central directory offset. Reported to be %"I64_FMT", "
1801 "but based on its reported size, it should be %"I64_FMT".",
1802 d->central_dir_offset, alt_central_dir_offset);
1804 sig = (u32)de_getu32le(alt_central_dir_offset);
1805 if(sig==CODE_PK12) {
1806 d->offset_discrepancy = alt_central_dir_offset - d->central_dir_offset;
1807 de_dbg(c, "likely central dir found at %"I64_FMT, alt_central_dir_offset);
1808 d->central_dir_offset = alt_central_dir_offset;
1812 retval = 1;
1814 done:
1815 de_dbg_indent(c, -1);
1816 return retval;
1819 static void de_run_zip_normally(deark *c, lctx *d)
1821 int eocd_found;
1823 if(c->detection_data && c->detection_data->zip_eocd_looked_for) {
1824 eocd_found = (int)c->detection_data->zip_eocd_found;
1825 d->end_of_central_dir_pos = c->detection_data->zip_eocd_pos;
1827 else {
1828 eocd_found = fmtutil_find_zip_eocd(c, c->infile, &d->end_of_central_dir_pos);
1830 if(!eocd_found) {
1831 if(c->module_disposition==DE_MODDISP_AUTODETECT ||
1832 c->module_disposition==DE_MODDISP_EXPLICIT)
1834 if(de_getu32le(0)==CODE_PK34) {
1835 de_err(c, "ZIP central directory not found. "
1836 "You could try \"-opt zip:scanmode\".");
1837 goto done;
1840 de_err(c, "Not a valid ZIP file");
1841 goto done;
1844 de_dbg(c, "end-of-central-dir record found at %"I64_FMT,
1845 d->end_of_central_dir_pos);
1847 do_zip64_eocd_locator(c, d);
1849 if(d->is_zip64) {
1850 if(!do_zip64_eocd(c, d)) goto done;
1853 if(d->is_zip64)
1854 de_declare_fmt(c, "ZIP-Zip64");
1855 else
1856 de_declare_fmt(c, "ZIP");
1858 if(!do_end_of_central_dir(c, d)) {
1859 goto done;
1862 if(!do_central_dir(c, d)) {
1863 goto done;
1866 done:
1870 static void de_run_zip(deark *c, de_module_params *mparams)
1872 lctx *d = NULL;
1873 de_encoding enc;
1875 d = de_malloc(c, sizeof(lctx));
1877 enc = de_get_input_encoding(c, NULL, DE_ENCODING_CP437);
1878 d->default_enc_for_filenames = enc;
1879 d->default_enc_for_comments = enc;
1881 d->crco = de_crcobj_create(c, DE_CRCOBJ_CRC32_IEEE);
1883 if(de_get_ext_option(c, "zip:scanmode")) {
1884 de_run_zip_scanmode(c, d);
1886 else {
1887 de_run_zip_normally(c, d);
1890 if(d) {
1891 de_crcobj_destroy(d->crco);
1892 de_free(c, d);
1896 static int de_identify_zip(deark *c)
1898 u8 b[4];
1899 int has_zip_ext;
1900 int has_mz_sig = 0;
1902 has_zip_ext = de_input_file_has_ext(c, "zip");
1904 // Fast tests:
1906 de_read(b, 0, 4);
1907 if(!de_memcmp(b, g_zipsig34, 4)) {
1908 return has_zip_ext ? 100 : 90;
1910 if(b[0]=='M' && b[1]=='Z') has_mz_sig = 1;
1912 if(c->infile->len >= 22) {
1913 de_read(b, c->infile->len - 22, 4);
1914 if(!de_memcmp(b, g_zipsig56, 4)) {
1915 return has_zip_ext ? 100 : 19;
1919 // Things to consider:
1920 // * We want de_fmtutil_find_zip_eocd() to be called no more than once, and
1921 // only on files that for some reason we suspect could be ZIP files.
1922 // * If the user disables exe format detection (e.g. with "-onlydetect zip"),
1923 // we want self-extracting-ZIP .exe files to be detected as ZIP instead.
1924 // * And we want the above to work even if the file has a ZIP file comment,
1925 // making it expensive to detect as ZIP.
1927 // Tests below can't return a confidence higher than this.
1928 if(c->detection_data->best_confidence_so_far >= 19) return 0;
1930 // Slow tests:
1932 if(has_mz_sig || has_zip_ext) {
1933 i64 eocd_pos = 0;
1935 c->detection_data->zip_eocd_looked_for = 1;
1936 if(fmtutil_find_zip_eocd(c, c->infile, &eocd_pos)) {
1937 c->detection_data->zip_eocd_found = 1;
1938 c->detection_data->zip_eocd_pos = eocd_pos;
1939 return 19;
1943 return 0;
1946 static void de_help_zip(deark *c)
1948 de_msg(c, "-opt zip:scanmode : Do not use the \"central directory\"");
1949 de_msg(c, "-opt zip:implodebug : Behave like PKZIP 1.01/1.02");
1952 void de_module_zip(deark *c, struct deark_module_info *mi)
1954 mi->id = "zip";
1955 mi->desc = "ZIP archive";
1956 mi->run_fn = de_run_zip;
1957 mi->identify_fn = de_identify_zip;
1958 mi->help_fn = de_help_zip;