1 // This file is part of Deark.
2 // Copyright (C) 2016 Jason Summers
3 // See the file COPYING for terms of use.
7 #include <deark-config.h>
8 #include <deark-private.h>
9 #include <deark-fmtutil.h>
11 DE_DECLARE_MODULE(de_module_zip
);
13 struct localctx_struct
;
14 typedef struct localctx_struct lctx
;
16 #define CODE_PK12 0x02014b50U
17 #define CODE_PK34 0x04034b50U
18 static const u8 g_zipsig34
[4] = {'P', 'K', 0x03, 0x04};
19 static const u8 g_zipsig56
[4] = {'P', 'K', 0x05, 0x06};
20 static const u8 g_zipsig66
[4] = {'P', 'K', 0x06, 0x06};
21 static const u8 g_zipsig67
[4] = {'P', 'K', 0x06, 0x07};
23 struct compression_params
{
24 // ZIP-specific params (not in de_dfilter_*_params) that may be needed to
25 // to decompress something.
27 unsigned int bit_flags
;
30 typedef void (*decompressor_fn
)(deark
*c
, lctx
*d
, struct compression_params
*cparams
,
31 struct de_dfilter_in_params
*dcmpri
, struct de_dfilter_out_params
*dcmpro
,
32 struct de_dfilter_results
*dres
);
34 struct cmpr_meth_info
{
38 decompressor_fn decompressor
;
41 struct dir_entry_data
{
42 unsigned int ver_needed
;
43 unsigned int ver_needed_hi
, ver_needed_lo
;
44 i64 cmpr_size
, uncmpr_size
;
46 const struct cmpr_meth_info
*cmi
;
47 unsigned int bit_flags
;
54 struct timestamp_data
{
55 struct de_timestamp ts
; // The best timestamp of this type found so far
60 unsigned int ver_made_by
;
61 unsigned int ver_made_by_hi
, ver_made_by_lo
;
62 unsigned int attr_i
, attr_e
;
63 i64 offset_of_local_header
;
64 i64 disk_number_start
;
70 struct de_crcobj
*crco
; // copy of lctx::crco
71 struct timestamp_data tsdata
[DE_TIMESTAMPIDX_COUNT
];
73 struct dir_entry_data central_dir_entry_data
;
74 struct dir_entry_data local_dir_entry_data
;
76 i64 cmpr_size
, uncmpr_size
;
80 struct extra_item_type_info_struct
;
82 struct extra_item_info_struct
{
86 const struct extra_item_type_info_struct
*eiti
;
87 struct member_data
*md
;
88 struct dir_entry_data
*dd
;
92 struct localctx_struct
{
93 i64 end_of_central_dir_pos
;
94 i64 central_dir_num_entries
;
95 i64 central_dir_byte_size
;
96 i64 central_dir_offset
;
100 i64 zip64_num_centr_dir_entries_this_disk
;
101 i64 zip64_num_centr_dir_entries_total
;
102 i64 zip64_centr_dir_byte_size
;
103 unsigned int zip64_eocd_disknum
;
104 unsigned int zip64_cd_disknum
;
105 i64 offset_discrepancy
;
106 int used_offset_discrepancy
;
109 struct de_crcobj
*crco
;
112 typedef void (*extrafield_decoder_fn
)(deark
*c
, lctx
*d
,
113 struct extra_item_info_struct
*eii
);
115 static int is_compression_method_supported(lctx
*d
, const struct cmpr_meth_info
*cmi
)
117 if(cmi
&& cmi
->decompressor
) return 1;
121 static void do_decompress_shrink(deark
*c
, lctx
*d
, struct compression_params
*cparams
,
122 struct de_dfilter_in_params
*dcmpri
, struct de_dfilter_out_params
*dcmpro
,
123 struct de_dfilter_results
*dres
)
125 fmtutil_decompress_zip_shrink(c
, dcmpri
, dcmpro
, dres
, NULL
);
128 static void do_decompress_reduce(deark
*c
, lctx
*d
, struct compression_params
*cparams
,
129 struct de_dfilter_in_params
*dcmpri
, struct de_dfilter_out_params
*dcmpro
,
130 struct de_dfilter_results
*dres
)
132 struct de_zipreduce_params params
;
134 de_zeromem(¶ms
, sizeof(struct de_zipreduce_params
));
135 params
.cmpr_factor
= (unsigned int)(cparams
->cmpr_meth
-1);
136 fmtutil_decompress_zip_reduce(c
, dcmpri
, dcmpro
, dres
, ¶ms
);
139 static void do_decompress_implode(deark
*c
, lctx
*d
, struct compression_params
*cparams
,
140 struct de_dfilter_in_params
*dcmpri
, struct de_dfilter_out_params
*dcmpro
,
141 struct de_dfilter_results
*dres
)
143 struct de_zipimplode_params params
;
145 de_zeromem(¶ms
, sizeof(struct de_zipimplode_params
));
146 params
.bit_flags
= cparams
->bit_flags
;
147 params
.dump_trees
= (u8
)de_get_ext_option_bool(c
, "zip:dumptrees", 0);
148 params
.mml_bug
= (u8
)de_get_ext_option_bool(c
, "zip:implodebug", 0);
149 fmtutil_decompress_zip_implode(c
, dcmpri
, dcmpro
, dres
, ¶ms
);
152 static void do_decompress_deflate(deark
*c
, lctx
*d
, struct compression_params
*cparams
,
153 struct de_dfilter_in_params
*dcmpri
, struct de_dfilter_out_params
*dcmpro
,
154 struct de_dfilter_results
*dres
)
156 struct de_inflate_params inflparams
;
158 de_zeromem(&inflparams
, sizeof(struct de_inflate_params
));
159 fmtutil_decompress_deflate_ex(c
, dcmpri
, dcmpro
, dres
, &inflparams
);
162 static void do_decompress_stored(deark
*c
, lctx
*d
, struct compression_params
*cparams
,
163 struct de_dfilter_in_params
*dcmpri
, struct de_dfilter_out_params
*dcmpro
,
164 struct de_dfilter_results
*dres
)
166 fmtutil_decompress_uncompressed(c
, dcmpri
, dcmpro
, dres
, 0);
169 static const struct cmpr_meth_info cmpr_meth_info_arr
[] = {
170 { 0, 0x00, "stored", do_decompress_stored
},
171 { 1, 0x00, "shrink", do_decompress_shrink
},
172 { 2, 0x00, "reduce, CF=1", do_decompress_reduce
},
173 { 3, 0x00, "reduce, CF=2", do_decompress_reduce
},
174 { 4, 0x00, "reduce, CF=3", do_decompress_reduce
},
175 { 5, 0x00, "reduce, CF=4", do_decompress_reduce
},
176 { 6, 0x00, "implode", do_decompress_implode
},
177 { 8, 0x00, "deflate", do_decompress_deflate
},
178 { 9, 0x00, "deflate64", NULL
},
179 { 10, 0x00, "PKWARE DCL implode", NULL
},
180 { 12, 0x00, "bzip2", NULL
},
181 { 14, 0x00, "LZMA", NULL
},
182 { 16, 0x00, "IBM z/OS CMPSC", NULL
},
183 { 18, 0x00, "IBM TERSE (new)", NULL
},
184 { 19, 0x00, "IBM LZ77 z Architecture", NULL
},
185 { 94, 0x00, "MP3", NULL
},
186 { 95, 0x00, "XZ", NULL
},
187 { 96, 0x00, "JPEG", NULL
},
188 { 97, 0x00, "WavPack", NULL
},
189 { 98, 0x00, "PPMd", NULL
},
190 { 99, 0x00, "AES", NULL
}
193 static const struct cmpr_meth_info
*get_cmpr_meth_info(int cmpr_meth
)
197 for(k
=0; k
<DE_ARRAYCOUNT(cmpr_meth_info_arr
); k
++) {
198 if(cmpr_meth_info_arr
[k
].cmpr_meth
== cmpr_meth
) {
199 return &cmpr_meth_info_arr
[k
];
205 // Decompress some data from inf, using the given ZIP compression method,
206 // and append it to outf.
207 // On failure, prints an error and returns 0.
208 // Returns 1 on apparent success.
209 // TODO: How should this low-level function report errors and warnings?
210 static int do_decompress_data(deark
*c
, lctx
*d
,
211 dbuf
*inf
, i64 inf_pos
, i64 inf_size
,
212 dbuf
*outf
, i64 maxuncmprsize
,
213 int cmpr_meth
, const struct cmpr_meth_info
*cmi
, unsigned int bit_flags
)
216 struct de_dfilter_in_params dcmpri
;
217 struct de_dfilter_out_params dcmpro
;
218 struct de_dfilter_results dres
;
219 struct compression_params cparams
;
221 de_zeromem(&cparams
, sizeof(struct compression_params
));
222 de_dfilter_init_objects(c
, &dcmpri
, &dcmpro
, &dres
);
223 cparams
.cmpr_meth
= cmpr_meth
;
224 cparams
.bit_flags
= bit_flags
;
226 dcmpri
.pos
= inf_pos
;
227 dcmpri
.len
= inf_size
;
229 dcmpro
.expected_len
= maxuncmprsize
;
230 dcmpro
.len_known
= 1;
232 if(cmi
&& cmi
->decompressor
) {
233 cmi
->decompressor(c
, d
, &cparams
, &dcmpri
, &dcmpro
, &dres
);
235 de_err(c
, "%s", de_dfilter_get_errmsg(c
, &dres
));
238 if(dres
.bytes_consumed_valid
&& (dres
.bytes_consumed
< inf_size
)) {
239 de_warn(c
, "Decompression may have failed (used only "
240 "%"I64_FMT
" of %"I64_FMT
" compressed bytes)",
241 dres
.bytes_consumed
, inf_size
);
248 de_err(c
, "Unsupported compression method: %d (%s)", cmpr_meth
,
249 (cmi
? cmi
->name
: "?"));
255 // As we read a member file's attributes, we may encounter multiple timestamps,
256 // which can differ in their precision, and whether they use UTC.
257 // This function is called to remember the "best" file modification time
258 // encountered so far.
259 static void apply_timestamp(deark
*c
, lctx
*d
, struct member_data
*md
, int tstype
,
260 const struct de_timestamp
*ts
, int quality
)
262 if(!ts
->is_valid
) return;
264 // In case of a tie, we prefer the later timestamp that we encountered.
265 // This makes local headers have priority over central headers, for
267 if(quality
>= md
->tsdata
[tstype
].quality
) {
268 md
->tsdata
[tstype
].ts
= *ts
;
269 md
->tsdata
[tstype
].quality
= quality
;
273 static void do_read_filename(deark
*c
, lctx
*d
,
274 struct member_data
*md
, struct dir_entry_data
*dd
,
275 i64 pos
, i64 len
, int utf8_flag
)
277 de_encoding from_encoding
;
279 ucstring_empty(dd
->fname
);
280 from_encoding
= utf8_flag
? DE_ENCODING_UTF8
: DE_ENCODING_CP437
;
281 dbuf_read_to_ucstring(c
->infile
, pos
, len
, dd
->fname
, 0, from_encoding
);
282 de_dbg(c
, "filename: \"%s\"", ucstring_getpsz_d(dd
->fname
));
285 static void do_comment_display(deark
*c
, lctx
*d
, i64 pos
, i64 len
, de_ext_encoding ee
,
288 de_ucstring
*s
= NULL
;
290 s
= ucstring_create(c
);
291 dbuf_read_to_ucstring(c
->infile
, pos
, len
, s
, 0, ee
);
292 de_dbg(c
, "%s: \"%s\"", name
, ucstring_getpsz_d(s
));
296 static void do_comment_extract(deark
*c
, lctx
*d
, i64 pos
, i64 len
, de_ext_encoding ee
,
300 de_ucstring
*s
= NULL
;
302 f
= dbuf_create_output_file(c
, ext
, NULL
, DE_CREATEFLAG_IS_AUX
);
303 s
= ucstring_create(c
);
304 dbuf_read_to_ucstring(c
->infile
, pos
, len
, s
, 0, ee
);
305 ucstring_write_as_utf8(c
, s
, f
, 1);
310 static void do_comment(deark
*c
, lctx
*d
, i64 pos
, i64 len
, int utf8_flag
,
311 const char *name
, const char *ext
)
316 ee
= utf8_flag
? DE_ENCODING_UTF8
: DE_ENCODING_CP437
;
317 ee
= DE_EXTENC_MAKE(ee
, DE_ENCSUBTYPE_HYBRID
);
318 if(c
->extract_level
>=2) {
319 do_comment_extract(c
, d
, pos
, len
, ee
, ext
);
322 do_comment_display(c
, d
, pos
, len
, ee
, name
);
326 static void read_unix_timestamp(deark
*c
, lctx
*d
, i64 pos
,
327 struct de_timestamp
*timestamp
, const char *name
)
330 char timestamp_buf
[64];
332 t
= de_geti32le(pos
);
333 de_unix_time_to_timestamp(t
, timestamp
, 0x1);
334 de_dbg_timestamp_to_string(c
, timestamp
, timestamp_buf
, sizeof(timestamp_buf
), 0);
335 de_dbg(c
, "%s: %"I64_FMT
" (%s)", name
, t
, timestamp_buf
);
338 static void read_FILETIME(deark
*c
, lctx
*d
, i64 pos
,
339 struct de_timestamp
*timestamp
, const char *name
)
342 char timestamp_buf
[64];
344 t_FILETIME
= de_geti64le(pos
);
345 de_FILETIME_to_timestamp(t_FILETIME
, timestamp
, 0x1);
346 de_dbg_timestamp_to_string(c
, timestamp
, timestamp_buf
, sizeof(timestamp_buf
), 0);
347 de_dbg(c
, "%s: %s", name
, timestamp_buf
);
350 static void ef_zip64extinfo(deark
*c
, lctx
*d
, struct extra_item_info_struct
*eii
)
355 if(pos
+8 > eii
->dpos
+eii
->dlen
) goto done
;
356 n
= de_geti64le(pos
); pos
+= 8;
357 de_dbg(c
, "orig uncmpr file size: %"I64_FMT
, n
);
358 if(eii
->dd
->uncmpr_size
==0xffffffffLL
) {
359 eii
->dd
->uncmpr_size
= n
;
362 if(pos
+8 > eii
->dpos
+eii
->dlen
) goto done
;
363 n
= de_geti64le(pos
); pos
+= 8;
364 de_dbg(c
, "cmpr data size: %"I64_FMT
, n
);
365 if(eii
->dd
->cmpr_size
==0xffffffffLL
) {
366 eii
->dd
->cmpr_size
= n
;
369 if(pos
+8 > eii
->dpos
+eii
->dlen
) goto done
;
370 n
= de_geti64le(pos
); pos
+= 8;
371 de_dbg(c
, "offset of local header record: %"I64_FMT
, n
);
373 if(pos
+4 > eii
->dpos
+eii
->dlen
) goto done
;
374 n
= de_getu32le_p(&pos
);
375 de_dbg(c
, "disk start number: %"I64_FMT
, n
);
380 // Extra field 0x5455
381 static void ef_extended_timestamp(deark
*c
, lctx
*d
, struct extra_item_info_struct
*eii
)
386 int has_mtime
, has_atime
, has_ctime
;
387 struct de_timestamp timestamp_tmp
;
389 endpos
= pos
+ eii
->dlen
;
390 if(pos
+1>endpos
) return;
391 flags
= de_getbyte_p(&pos
);
392 if(eii
->is_central
) {
393 has_mtime
= (eii
->dlen
>=5);
398 has_mtime
= (flags
& 0x01)?1:0;
399 has_atime
= (flags
& 0x02)?1:0;
400 has_ctime
= (flags
& 0x04)?1:0;
403 if(pos
+4>endpos
) return;
404 read_unix_timestamp(c
, d
, pos
, ×tamp_tmp
, "mtime");
405 apply_timestamp(c
, d
, eii
->md
, DE_TIMESTAMPIDX_MODIFY
, ×tamp_tmp
, 50);
409 if(pos
+4>endpos
) return;
410 read_unix_timestamp(c
, d
, pos
, ×tamp_tmp
, "atime");
411 apply_timestamp(c
, d
, eii
->md
, DE_TIMESTAMPIDX_ACCESS
, ×tamp_tmp
, 50);
415 if(pos
+4>endpos
) return;
416 read_unix_timestamp(c
, d
, pos
, ×tamp_tmp
, "creation time");
417 apply_timestamp(c
, d
, eii
->md
, DE_TIMESTAMPIDX_CREATE
, ×tamp_tmp
, 50);
422 // Extra field 0x5855
423 static void ef_infozip1(deark
*c
, lctx
*d
, struct extra_item_info_struct
*eii
)
426 struct de_timestamp timestamp_tmp
;
428 if(eii
->is_central
&& eii
->dlen
<8) return;
429 if(!eii
->is_central
&& eii
->dlen
<12) return;
430 read_unix_timestamp(c
, d
, eii
->dpos
, ×tamp_tmp
, "atime");
431 apply_timestamp(c
, d
, eii
->md
, DE_TIMESTAMPIDX_ACCESS
, ×tamp_tmp
, 45);
432 read_unix_timestamp(c
, d
, eii
->dpos
+4, ×tamp_tmp
, "mtime");
433 apply_timestamp(c
, d
, eii
->md
, DE_TIMESTAMPIDX_MODIFY
, ×tamp_tmp
, 45);
434 if(!eii
->is_central
) {
435 uidnum
= de_getu16le(eii
->dpos
+8);
436 gidnum
= de_getu16le(eii
->dpos
+10);
437 de_dbg(c
, "uid: %d, gid: %d", (int)uidnum
, (int)gidnum
);
441 // Extra field 0x7075 - Info-ZIP Unicode Path
442 static void ef_unicodepath(deark
*c
, lctx
*d
, struct extra_item_info_struct
*eii
)
445 de_ucstring
*fn
= NULL
;
447 u32 crc_reported
, crc_calculated
;
448 struct de_crcobj
*fncrco
= NULL
;
450 if(eii
->dlen
<1) goto done
;
451 ver
= de_getbyte(eii
->dpos
);
452 de_dbg(c
, "version: %u", (unsigned int)ver
);
453 if(ver
!=1) goto done
;
454 if(eii
->dlen
<6) goto done
;
455 crc_reported
= (u32
)de_getu32le(eii
->dpos
+1);
456 de_dbg(c
, "name-crc (reported): 0x%08x", (unsigned int)crc_reported
);
457 fn
= ucstring_create(c
);
458 fnlen
= eii
->dlen
- 5;
459 dbuf_read_to_ucstring(c
->infile
, eii
->dpos
+5, fnlen
, fn
, 0, DE_ENCODING_UTF8
);
460 de_dbg(c
, "unicode name: \"%s\"", ucstring_getpsz_d(fn
));
462 // Need to go back and calculate a CRC of the main filename. This is
463 // protection against the case where a ZIP editor may have changed the
464 // original filename, but retained a now-orphaned Unicode Path field.
465 fncrco
= de_crcobj_create(c
, DE_CRCOBJ_CRC32_IEEE
);
466 de_crcobj_addslice(fncrco
, c
->infile
, eii
->dd
->main_fname_pos
, eii
->dd
->main_fname_len
);
467 crc_calculated
= de_crcobj_getval(fncrco
);
468 de_dbg(c
, "name-crc (calculated): 0x%08x", (unsigned int)crc_calculated
);
470 if(crc_calculated
== crc_reported
) {
471 ucstring_empty(eii
->dd
->fname
);
472 ucstring_append_ucstring(eii
->dd
->fname
, fn
);
476 ucstring_destroy(fn
);
477 de_crcobj_destroy(fncrco
);
480 // Extra field 0x7855
481 static void ef_infozip2(deark
*c
, lctx
*d
, struct extra_item_info_struct
*eii
)
485 if(eii
->is_central
) return;
486 if(eii
->dlen
<4) return;
487 uidnum
= de_getu16le(eii
->dpos
);
488 gidnum
= de_getu16le(eii
->dpos
+2);
489 de_dbg(c
, "uid: %d, gid: %d", (int)uidnum
, (int)gidnum
);
492 // Extra field 0x7875
493 static void ef_infozip3(deark
*c
, lctx
*d
, struct extra_item_info_struct
*eii
)
501 endpos
= pos
+eii
->dlen
;
503 if(pos
+1>endpos
) return;
504 ver
= de_getbyte_p(&pos
);
505 de_dbg(c
, "version: %d", (int)ver
);
508 if(pos
+1>endpos
) return;
509 sz
= (i64
)de_getbyte_p(&pos
);
510 if(pos
+sz
>endpos
) return;
511 uidnum
= dbuf_getint_ext(c
->infile
, pos
, (unsigned int)sz
, 1, 0);
514 if(pos
+1>endpos
) return;
515 sz
= (i64
)de_getbyte_p(&pos
);
516 if(pos
+sz
>endpos
) return;
517 gidnum
= dbuf_getint_ext(c
->infile
, pos
, (unsigned int)sz
, 1, 0);
520 de_dbg(c
, "uid: %d, gid: %d", (int)uidnum
, (int)gidnum
);
523 // Extra field 0x000a
524 static void ef_ntfs(deark
*c
, lctx
*d
, struct extra_item_info_struct
*eii
)
531 struct de_timestamp timestamp_tmp
;
533 endpos
= pos
+eii
->dlen
;
534 pos
+= 4; // skip reserved field
537 if(pos
+4>endpos
) break;
538 attr_tag
= de_getu16le_p(&pos
);
539 attr_size
= de_getu16le_p(&pos
);
540 if(attr_tag
==0x0001) name
="NTFS filetimes";
542 de_dbg(c
, "tag: 0x%04x (%s), dlen: %d", (unsigned int)attr_tag
, name
,
544 if(pos
+attr_size
>endpos
) break;
547 if(attr_tag
==0x0001 && attr_size
>=24) {
548 read_FILETIME(c
, d
, pos
, ×tamp_tmp
, "mtime");
549 apply_timestamp(c
, d
, eii
->md
, DE_TIMESTAMPIDX_MODIFY
, ×tamp_tmp
, 90);
550 read_FILETIME(c
, d
, pos
+8, ×tamp_tmp
, "atime");
551 apply_timestamp(c
, d
, eii
->md
, DE_TIMESTAMPIDX_ACCESS
, ×tamp_tmp
, 90);
552 read_FILETIME(c
, d
, pos
+16, ×tamp_tmp
, "creation time");
553 apply_timestamp(c
, d
, eii
->md
, DE_TIMESTAMPIDX_CREATE
, ×tamp_tmp
, 90);
555 de_dbg_indent(c
, -1);
561 // Extra field 0x0009
562 static void ef_os2(deark
*c
, lctx
*d
, struct extra_item_info_struct
*eii
)
570 endpos
= pos
+eii
->dlen
;
571 if(pos
+4>endpos
) return;
572 unc_size
= de_getu32le_p(&pos
);
573 de_dbg(c
, "uncmpr ext attr data size: %d", (int)unc_size
);
574 if(eii
->is_central
) return;
576 if(pos
+2>endpos
) return;
577 cmpr_type
= de_getu16le_p(&pos
);
578 de_dbg(c
, "ext attr cmpr method: %d", (int)cmpr_type
);
580 if(pos
+4>endpos
) return;
581 crc
= de_getu32le_p(&pos
);
582 de_dbg(c
, "ext attr crc: 0x%08x", (unsigned int)crc
);
584 de_dbg(c
, "cmpr ext attr data at %"I64_FMT
", len=%d", pos
, (int)(endpos
-pos
));
585 // TODO: Uncompress and decode OS/2 extended attribute structure (FEA2LIST)
588 // Extra field 0x2705 (ZipIt Macintosh 1.3.5+)
589 static void ef_zipitmac_2705(deark
*c
, lctx
*d
, struct extra_item_info_struct
*eii
)
591 struct de_fourcc sig
;
592 struct de_fourcc filetype
;
593 struct de_fourcc creator
;
595 if(eii
->dlen
<4) goto done
;
596 dbuf_read_fourcc(c
->infile
, eii
->dpos
, &sig
, 4, 0x0);
597 de_dbg(c
, "signature: '%s'", sig
.id_dbgstr
);
598 if(sig
.id
!=0x5a504954U
) goto done
; // expecting 'ZPIT'
599 if(eii
->dlen
<12) goto done
;
600 dbuf_read_fourcc(c
->infile
, eii
->dpos
+4, &filetype
, 4, 0x0);
601 de_dbg(c
, "filetype: '%s'", filetype
.id_dbgstr
);
602 dbuf_read_fourcc(c
->infile
, eii
->dpos
+8, &creator
, 4, 0x0);
603 de_dbg(c
, "creator: '%s'", creator
.id_dbgstr
);
609 // The time will be returned in the caller-supplied 'ts'
610 static void handle_mac_time(deark
*c
, lctx
*d
,
611 i64 mt_raw
, i64 mt_offset
,
612 struct de_timestamp
*ts
, const char *name
)
614 char timestamp_buf
[64];
615 de_mac_time_to_timestamp(mt_raw
- mt_offset
, ts
);
616 ts
->tzcode
= DE_TZCODE_UTC
;
617 de_dbg_timestamp_to_string(c
, ts
, timestamp_buf
, sizeof(timestamp_buf
), 0);
618 de_dbg(c
, "%s: %"I64_FMT
" %+"I64_FMT
" (%s)", name
,
619 mt_raw
, -mt_offset
, timestamp_buf
);
622 // Extra field 0x334d (Info-ZIP Macintosh)
623 static void ef_infozipmac(deark
*c
, lctx
*d
, struct extra_item_info_struct
*eii
)
631 const struct cmpr_meth_info
*cmi
= NULL
;
632 struct de_fourcc filetype
;
633 struct de_fourcc creator
;
634 de_ucstring
*flags_str
= NULL
;
635 dbuf
*attr_data
= NULL
;
638 i64 create_time_offset
;
642 i64 backup_time_offset
;
643 struct de_timestamp tmp_timestamp
;
645 struct de_stringreaderdata
*srd
;
647 if(eii
->dlen
<14) goto done
;
649 ulen
= de_getu32le_p(&pos
);
650 de_dbg(c
, "uncmpr. finder attr. size: %d", (int)ulen
);
652 flags
= (unsigned int)de_getu16le_p(&pos
);
653 flags_str
= ucstring_create(c
);
654 if(flags
&0x0001) ucstring_append_flags_item(flags_str
, "data_fork");
655 if(flags
&0x0002) ucstring_append_flags_item(flags_str
, "0x0002"); // something about the filename
656 ucstring_append_flags_item(flags_str
,
657 (flags
&0x0004)?"uncmpressed_attribute_data":"compressed_attribute_data");
658 if(flags
&0x0008) ucstring_append_flags_item(flags_str
, "64-bit_times");
659 if(flags
&0x0010) ucstring_append_flags_item(flags_str
, "no_timezone_offsets");
660 de_dbg(c
, "flags: 0x%04x (%s)", flags
, ucstring_getpsz(flags_str
));
662 dbuf_read_fourcc(c
->infile
, pos
, &filetype
, 4, 0x0);
663 de_dbg(c
, "filetype: '%s'", filetype
.id_dbgstr
);
665 dbuf_read_fourcc(c
->infile
, pos
, &creator
, 4, 0x0);
666 de_dbg(c
, "creator: '%s'", creator
.id_dbgstr
);
669 if(eii
->is_central
) goto done
;
671 if(flags
&0x0004) { // Uncompressed attribute data
675 unsigned int crc_reported
;
677 cmpr_meth
= (int)de_getu16le_p(&pos
);
678 cmi
= get_cmpr_meth_info(cmpr_meth
);
679 de_dbg(c
, "finder attr. cmpr. method: %d (%s)", cmpr_meth
, (cmi
? cmi
->name
: "?"));
681 crc_reported
= (unsigned int)de_getu32le_p(&pos
);
682 de_dbg(c
, "finder attr. data crc (reported): 0x%08x", crc_reported
);
685 // The rest of the data is Finder attribute data
686 cmpr_attr_size
= eii
->dpos
+eii
->dlen
- pos
;
687 de_dbg(c
, "cmpr. finder attr. size: %d", (int)cmpr_attr_size
);
688 if(ulen
<1 || ulen
>1000000) goto done
;
690 // Type 6 (implode) compression won't work here, because it needs
691 // additional parameters seemingly not provided by the Finder attr data.
692 if(cmpr_meth
==6 || !is_compression_method_supported(d
, cmi
)) {
693 de_warn(c
, "Finder attribute data: Unsupported compression method: %d (%s)",
694 cmpr_meth
, (cmi
? cmi
->name
: "?"));
697 // Decompress and decode the Finder attribute data
698 attr_data
= dbuf_create_membuf(c
, ulen
, 0x1);
699 ret
= do_decompress_data(c
, d
, c
->infile
, pos
, cmpr_attr_size
,
700 attr_data
, 65536, cmpr_meth
, cmi
, 0);
702 de_warn(c
, "Failed to decompress finder attribute data");
707 dpos
+= 2; // Finder flags
708 dpos
+= 4; // Icon location
710 dpos
+= 16; // FXInfo
711 dpos
+= 1; // file version number
712 dpos
+= 1; // dir access rights
714 if(flags
&0x0008) goto done
; // We don't support 64-bit times
715 if(flags
&0x0010) goto done
; // We want timezone offsets
716 if(attr_data
->len
- dpos
< 6*4) goto done
;
718 create_time_raw
= dbuf_getu32le_p(attr_data
, &dpos
);
719 mod_time_raw
= dbuf_getu32le_p(attr_data
, &dpos
);
720 backup_time_raw
= dbuf_getu32le_p(attr_data
, &dpos
);
721 create_time_offset
= dbuf_geti32le(attr_data
, dpos
); dpos
+= 4;
722 mod_time_offset
= dbuf_geti32le(attr_data
, dpos
); dpos
+= 4;
723 backup_time_offset
= dbuf_geti32le(attr_data
, dpos
); dpos
+= 4;
725 handle_mac_time(c
, d
, create_time_raw
, create_time_offset
, &tmp_timestamp
, "create time");
726 if(create_time_raw
>0) {
727 apply_timestamp(c
, d
, eii
->md
, DE_TIMESTAMPIDX_CREATE
, &tmp_timestamp
, 40);
729 handle_mac_time(c
, d
, mod_time_raw
, mod_time_offset
, &tmp_timestamp
, "mod time ");
731 apply_timestamp(c
, d
, eii
->md
, DE_TIMESTAMPIDX_MODIFY
, &tmp_timestamp
, 40);
733 handle_mac_time(c
, d
, backup_time_raw
, backup_time_offset
, &tmp_timestamp
, "backup time");
734 if(backup_time_raw
>0) {
735 apply_timestamp(c
, d
, eii
->md
, DE_TIMESTAMPIDX_BACKUP
, &tmp_timestamp
, 40);
738 // Expecting 2 bytes for charset, and at least 2 more for the 2 NUL-terminated
739 // strings that follow.
740 if(attr_data
->len
- dpos
< 4) goto done
;
742 charset
= (int)dbuf_getu16le_p(attr_data
, &dpos
);
743 de_dbg(c
, "charset for fullpath/comment: %d", charset
);
745 // TODO: Can we use the correct encoding?
746 srd
= dbuf_read_string(attr_data
, dpos
, attr_data
->len
-dpos
, DE_DBG_MAX_STRLEN
,
747 DE_CONVFLAG_STOP_AT_NUL
, DE_ENCODING_ASCII
);
748 de_dbg(c
, "fullpath: \"%s\"", ucstring_getpsz(srd
->str
));
749 dpos
+= srd
->bytes_consumed
;
750 de_destroy_stringreaderdata(c
, srd
);
752 srd
= dbuf_read_string(attr_data
, dpos
, attr_data
->len
-dpos
, DE_DBG_MAX_STRLEN
,
753 DE_CONVFLAG_STOP_AT_NUL
, DE_ENCODING_ASCII
);
754 de_dbg(c
, "comment: \"%s\"", ucstring_getpsz(srd
->str
));
755 dpos
+= srd
->bytes_consumed
;
756 de_destroy_stringreaderdata(c
, srd
);
759 ucstring_destroy(flags_str
);
760 dbuf_close(attr_data
);
763 // Acorn / SparkFS / RISC OS
764 static void ef_acorn(deark
*c
, lctx
*d
, struct extra_item_info_struct
*eii
)
767 struct de_riscos_file_attrs rfa
;
769 if(eii
->dlen
<16) return;
770 if(dbuf_memcmp(c
->infile
, eii
->dpos
, "ARC0", 4)) {
771 de_dbg(c
, "[unsupported Acorn extra-field type]");
776 de_zeromem(&rfa
, sizeof(struct de_riscos_file_attrs
));
777 fmtutil_riscos_read_load_exec(c
, c
->infile
, &rfa
, pos
);
779 if(rfa
.mod_time
.is_valid
) {
780 apply_timestamp(c
, d
, eii
->md
, DE_TIMESTAMPIDX_MODIFY
, &rfa
.mod_time
, 70);
783 fmtutil_riscos_read_attribs_field(c
, c
->infile
, &rfa
, pos
, 0);
784 // Note: attribs does not have any information that we care about (no
785 // 'executable' or 'is-directory' flag).
788 struct extra_item_type_info_struct
{
791 extrafield_decoder_fn fn
;
793 static const struct extra_item_type_info_struct extra_item_type_info_arr
[] = {
794 { 0x0001 /* */, "Zip64 extended information", ef_zip64extinfo
},
795 { 0x0007 /* */, "AV Info", NULL
},
796 { 0x0008 /* */, "extended language encoding data", NULL
},
797 { 0x0009 /* */, "OS/2", ef_os2
},
798 { 0x000a /* */, "NTFS", ef_ntfs
},
799 { 0x000c /* */, "OpenVMS", NULL
},
800 { 0x000d /* */, "Unix", NULL
},
801 { 0x000e /* */, "file stream and fork descriptors", NULL
},
802 { 0x000f /* */, "Patch Descriptor", NULL
},
803 { 0x0014 /* */, "PKCS#7 Store for X.509 Certificates", NULL
},
804 { 0x0015 /* */, "X.509 Certificate ID and Signature for individual file", NULL
},
805 { 0x0016 /* */, "X.509 Certificate ID for Central Directory", NULL
},
806 { 0x0017 /* */, "Strong Encryption Header", NULL
},
807 { 0x0018 /* */, "Record Management Controls", NULL
},
808 { 0x0019 /* */, "PKCS#7 Encryption Recipient Certificate List", NULL
},
809 { 0x0021 /* */, "Policy Decryption Key", NULL
},
810 { 0x0022 /* */, "Smartcrypt Key Provider", NULL
},
811 { 0x0023 /* */, "Smartcrypt Policy Key Data", NULL
},
812 { 0x0065 /* */, "IBM S/390 (Z390), AS/400 (I400) attributes", NULL
},
813 { 0x0066 /* */, "IBM S/390 (Z390), AS/400 (I400) attributes - compressed", NULL
},
814 { 0x07c8 /* */, "Macintosh", NULL
},
815 { 0x2605 /* */, "ZipIt Macintosh", NULL
},
816 { 0x2705 /* */, "ZipIt Macintosh 1.3.5+", ef_zipitmac_2705
},
817 { 0x2805 /* */, "ZipIt Macintosh 1.3.5+", NULL
},
818 { 0x334d /* M3 */, "Info-ZIP Macintosh", ef_infozipmac
},
819 { 0x4154 /* TA */, "Tandem NSK", NULL
},
820 { 0x4341 /* AC */, "Acorn/SparkFS", ef_acorn
},
821 { 0x4453 /* SE */, "Windows NT security descriptor (binary ACL)", NULL
},
822 { 0x4690 /* */, "POSZIP 4690", NULL
},
823 { 0x4704 /* */, "VM/CMS", NULL
},
824 { 0x470f /* */, "MVS", NULL
},
825 { 0x4854 /* TH */, "Theos, old unofficial port", NULL
}, // unzip:extrafld.txt says "inofficial"
826 { 0x4b46 /* FK */, "FWKCS MD5", NULL
},
827 { 0x4c41 /* AL */, "OS/2 access control list (text ACL)", NULL
},
828 { 0x4d49 /* IM */, "Info-ZIP OpenVMS", NULL
},
829 { 0x4d63 /* cM */, "Macintosh SmartZIP", NULL
},
830 { 0x4f4c /* LO */, "Xceed original location", NULL
},
831 { 0x5350 /* PS */, "Psion?", NULL
}, // observed in some Psion files
832 { 0x5356 /* VS */, "AOS/VS (ACL)", NULL
},
833 { 0x5455 /* UT */, "extended timestamp", ef_extended_timestamp
},
834 { 0x554e /* NU */, "Xceed unicode", NULL
},
835 { 0x5855 /* UX */, "Info-ZIP Unix, first version", ef_infozip1
},
836 { 0x6375 /* uc */, "Info-ZIP Unicode Comment", NULL
},
837 { 0x6542 /* Be */, "BeOS/BeBox", NULL
},
838 { 0x6854 /* Th */, "Theos", NULL
},
839 { 0x7075 /* up */, "Info-ZIP Unicode Path", ef_unicodepath
},
840 { 0x7441 /* At */, "AtheOS", NULL
},
841 { 0x756e /* nu */, "ASi Unix", NULL
},
842 { 0x7855 /* Ux */, "Info-ZIP Unix, second version", ef_infozip2
},
843 { 0x7875 /* ux */, "Info-ZIP Unix, third version", ef_infozip3
},
844 { 0xa220 /* */, "Microsoft Open Packaging Growth Hint", NULL
},
845 { 0xfb4a /* */, "SMS/QDOS", NULL
}, // according to Info-ZIP zip 3.0
846 { 0xfd4a /* */, "SMS/QDOS", NULL
} // according to ZIP v6.3.4 APPNOTE
849 static const struct extra_item_type_info_struct
*get_extra_item_type_info(i64 id
)
851 static const struct extra_item_type_info_struct default_ei
=
855 for(i
=0; i
<DE_ARRAYCOUNT(extra_item_type_info_arr
); i
++) {
856 if(id
== (i64
)extra_item_type_info_arr
[i
].id
) {
857 return &extra_item_type_info_arr
[i
];
863 static void do_extra_data(deark
*c
, lctx
*d
,
864 struct member_data
*md
, struct dir_entry_data
*dd
,
865 i64 pos1
, i64 len
, int is_central
)
869 de_dbg(c
, "extra data at %"I64_FMT
", len=%d", pos1
, (int)len
);
874 struct extra_item_info_struct eii
;
876 if(pos
+4 >= pos1
+len
) break;
877 de_zeromem(&eii
, sizeof(struct extra_item_info_struct
));
880 eii
.is_central
= is_central
;
883 eii
.id
= (u32
)de_getu16le(pos
);
884 eii
.dlen
= de_getu16le(pos
+2);
886 eii
.eiti
= get_extra_item_type_info(eii
.id
);
888 de_dbg(c
, "item id=0x%04x (%s), dlen=%d", (unsigned int)eii
.id
, eii
.eiti
->name
,
890 if(pos
+4+eii
.dlen
> pos1
+len
) break;
894 eii
.eiti
->fn(c
, d
, &eii
);
895 de_dbg_indent(c
, -1);
901 de_dbg_indent(c
, -1);
904 static void our_writelistener_cb(dbuf
*f
, void *userdata
, const u8
*buf
, i64 buf_len
)
906 struct member_data
*md
= (struct member_data
*)userdata
;
907 de_crcobj_addbuf(md
->crco
, buf
, buf_len
);
910 static void do_extract_file(deark
*c
, lctx
*d
, struct member_data
*md
)
914 struct dir_entry_data
*ldd
= &md
->local_dir_entry_data
;
919 de_dbg(c
, "file data at %"I64_FMT
", len=%"I64_FMT
, md
->file_data_pos
,
922 if(ldd
->bit_flags
& 0x1) {
923 de_err(c
, "%s: Encryption is not supported", ucstring_getpsz_d(ldd
->fname
));
927 if(!is_compression_method_supported(d
, ldd
->cmi
)) {
928 de_err(c
, "%s: Unsupported compression method: %d (%s)",
929 ucstring_getpsz_d(ldd
->fname
),
930 ldd
->cmpr_meth
, (ldd
->cmi
? ldd
->cmi
->name
: "?"));
934 if(md
->file_data_pos
+md
->cmpr_size
> c
->infile
->len
) {
935 de_err(c
, "Member data goes beyond end of file");
940 de_warn(c
, "\"%s\" is a symbolic link. It will not be extracted as a link.",
941 ucstring_getpsz_d(ldd
->fname
));
944 fi
= de_finfo_create(c
);
945 fi
->detect_root_dot_dir
= 1;
947 if(ucstring_isnonempty(ldd
->fname
)) {
948 unsigned int snflags
= DE_SNFLAG_FULLPATH
;
949 if(md
->is_dir
) snflags
|= DE_SNFLAG_STRIPTRAILINGSLASH
;
950 de_finfo_set_name_from_ucstring(c
, fi
, ldd
->fname
, snflags
);
951 fi
->original_filename_flag
= 1;
954 for(tsidx
=0; tsidx
<DE_TIMESTAMPIDX_COUNT
; tsidx
++) {
955 if(md
->tsdata
[tsidx
].ts
.is_valid
) {
956 fi
->timestamp
[tsidx
] = md
->tsdata
[tsidx
].ts
;
961 fi
->is_directory
= 1;
963 else if(md
->is_executable
) {
964 fi
->mode_flags
|= DE_MODEFLAG_EXE
;
966 else if(md
->is_nonexecutable
) {
967 fi
->mode_flags
|= DE_MODEFLAG_NONEXE
;
970 outf
= dbuf_create_output_file(c
, NULL
, fi
, 0);
975 dbuf_set_writelistener(outf
, our_writelistener_cb
, (void*)md
);
977 de_crcobj_reset(md
->crco
);
980 ret
= do_decompress_data(c
, d
, c
->infile
, md
->file_data_pos
, md
->cmpr_size
,
981 outf
, md
->uncmpr_size
, ldd
->cmpr_meth
, ldd
->cmi
, ldd
->bit_flags
);
982 de_dbg_indent(c
, -1);
985 crc_calculated
= de_crcobj_getval(md
->crco
);
986 de_dbg(c
, "crc (calculated): 0x%08x", (unsigned int)crc_calculated
);
988 if(crc_calculated
!= md
->crc_reported
) {
989 de_err(c
, "%s: CRC check failed: Expected 0x%08x, got 0x%08x",
990 ucstring_getpsz_d(ldd
->fname
),
991 (unsigned int)md
->crc_reported
, (unsigned int)crc_calculated
);
996 de_finfo_destroy(c
, fi
);
999 static const char *get_platform_name(unsigned int ver_hi
)
1001 static const char *pltf_names
[20] = {
1002 "MS-DOS, etc.", "Amiga", "OpenVMS", "Unix",
1003 "VM/CMS", "Atari ST", "HPFS", "Macintosh",
1004 "Z-System", "CP/M", "NTFS or TOPS-20", "MVS or NTFS",
1005 "VSE or SMS/QDOS", "Acorn RISC OS", "VFAT", "MVS",
1006 "BeOS", "Tandem", "OS/400", "OS X" };
1009 return pltf_names
[ver_hi
];
1010 if(ver_hi
==30) return "AtheOS/Syllable";
1014 // Look at the attributes, and set some other fields based on them.
1015 static void process_ext_attr(deark
*c
, lctx
*d
, struct member_data
*md
)
1017 if(d
->using_scanmode
) {
1018 // In this mode, there is no 'external attribs' field.
1022 if(md
->ver_made_by_hi
==3) { // Unix
1023 unsigned int unix_filetype
;
1024 unix_filetype
= (md
->attr_e
>>16)&0170000;
1025 if(unix_filetype
== 0040000) {
1028 else if(unix_filetype
== 0120000) {
1032 if((md
->attr_e
>>16)&0111) {
1033 md
->is_executable
= 1;
1036 md
->is_nonexecutable
= 1;
1040 // MS-DOS-style attributes.
1041 // Technically, we should only do this if
1042 // md->central_dir_entry_data.ver_made_by_hi==0.
1043 // However, most(?) zip programs set the low byte of the external attribs
1044 // to the equivalent MS-DOS attribs, at least in cases where it matters.
1045 if(md
->attr_e
& 0x10) {
1049 // TODO: Support more platforms.
1050 // TODO: The 0x756e (ASi Unix) extra field might be important, as it contains
1051 // file permissions.
1053 if(md
->is_dir
&& md
->uncmpr_size
!=0) {
1054 // I'd expect a subdirectory entry to have zero size. If it doesn't,
1055 // let's just assume we misidentified it as a subdirectory, and
1056 // extract its data.
1061 static void describe_internal_attr(deark
*c
, struct member_data
*md
,
1064 unsigned int bf
= md
->attr_i
;
1067 ucstring_append_flags_item(s
, "text file");
1071 if(bf
!=0) { // Report any unrecognized flags
1072 ucstring_append_flags_itemf(s
, "0x%04x", bf
);
1076 // Uses dd->bit_flags, dd->cmpr_method
1077 static void describe_general_purpose_bit_flags(deark
*c
, struct dir_entry_data
*dd
,
1081 unsigned int bf
= dd
->bit_flags
;
1084 ucstring_append_flags_item(s
, "encrypted");
1088 if(dd
->cmpr_meth
==6) { // implode
1096 ucstring_append_flags_itemf(s
, "%s sliding dictionary", name
);
1105 ucstring_append_flags_itemf(s
, "%s trees", name
);
1108 if(dd
->cmpr_meth
==8 || dd
->cmpr_meth
==9) { // deflate flags
1111 code
= (bf
& 0x0006)>>1;
1113 case 1: name
="max"; break;
1114 case 2: name
="fast"; break;
1115 case 3: name
="super_fast"; break;
1116 default: name
="normal";
1118 ucstring_append_flags_itemf(s
, "cmprlevel=%s", name
);
1119 bf
-= (bf
& 0x0006);
1123 ucstring_append_flags_item(s
, "uses data descriptor");
1128 ucstring_append_flags_item(s
, "UTF-8");
1132 if(bf
!=0) { // Report any unrecognized flags
1133 ucstring_append_flags_itemf(s
, "0x%04x", bf
);
1137 // Read either a central directory entry (a.k.a. central directory file header),
1138 // or a local file header.
1139 static int do_file_header(deark
*c
, lctx
*d
, struct member_data
*md
,
1140 int is_central
, i64 pos1
, i64
*p_entry_size
)
1144 i64 fn_len
, extra_len
, comment_len
;
1147 i64 fixed_header_size
;
1148 i64 mod_time_raw
, mod_date_raw
;
1149 struct dir_entry_data
*dd
; // Points to either md->central or md->local
1150 de_ucstring
*descr
= NULL
;
1151 struct de_timestamp dos_timestamp
;
1152 char timestamp_buf
[64];
1155 descr
= ucstring_create(c
);
1157 dd
= &md
->central_dir_entry_data
;
1158 fixed_header_size
= 46;
1159 de_dbg(c
, "central dir entry at %"I64_FMT
, pos
);
1162 dd
= &md
->local_dir_entry_data
;
1163 fixed_header_size
= 30;
1164 if(md
->disk_number_start
!=d
->this_disk_num
) {
1165 de_err(c
, "Member file not in this ZIP file");
1168 de_dbg(c
, "local file header at %"I64_FMT
, pos
);
1170 de_dbg_indent(c
, 1);
1172 sig
= (u32
)de_getu32le_p(&pos
);
1173 if(is_central
&& sig
!=CODE_PK12
) {
1174 de_err(c
, "Central dir file header not found at %"I64_FMT
, pos1
);
1177 else if(!is_central
&& sig
!=CODE_PK34
) {
1178 de_err(c
, "Local file header not found at %"I64_FMT
, pos1
);
1183 md
->ver_made_by
= (unsigned int)de_getu16le_p(&pos
);
1184 md
->ver_made_by_hi
= (unsigned int)((md
->ver_made_by
&0xff00)>>8);
1185 md
->ver_made_by_lo
= (unsigned int)(md
->ver_made_by
&0x00ff);
1186 de_dbg(c
, "version made by: platform=%u (%s), ZIP spec=%u.%u",
1187 md
->ver_made_by_hi
, get_platform_name(md
->ver_made_by_hi
),
1188 (unsigned int)(md
->ver_made_by_lo
/10), (unsigned int)(md
->ver_made_by_lo
%10));
1191 dd
->ver_needed
= (unsigned int)de_getu16le_p(&pos
);
1192 dd
->ver_needed_hi
= (unsigned int)((dd
->ver_needed
&0xff00)>>8);
1193 dd
->ver_needed_lo
= (unsigned int)(dd
->ver_needed
&0x00ff);
1194 de_dbg(c
, "version needed to extract: platform=%u (%s), ZIP spec=%u.%u",
1195 dd
->ver_needed_hi
, get_platform_name(dd
->ver_needed_hi
),
1196 (unsigned int)(dd
->ver_needed_lo
/10), (unsigned int)(dd
->ver_needed_lo
%10));
1198 dd
->bit_flags
= (unsigned int)de_getu16le_p(&pos
);
1199 dd
->cmpr_meth
= (int)de_getu16le_p(&pos
);
1200 dd
->cmi
= get_cmpr_meth_info(dd
->cmpr_meth
);
1202 utf8_flag
= (dd
->bit_flags
& 0x800)?1:0;
1203 ucstring_empty(descr
);
1204 describe_general_purpose_bit_flags(c
, dd
, descr
);
1205 de_dbg(c
, "flags: 0x%04x (%s)", dd
->bit_flags
, ucstring_getpsz(descr
));
1207 de_dbg(c
, "cmpr method: %d (%s)", dd
->cmpr_meth
,
1208 (dd
->cmi
? dd
->cmi
->name
: "?"));
1210 mod_time_raw
= de_getu16le_p(&pos
);
1211 mod_date_raw
= de_getu16le_p(&pos
);
1212 de_dos_datetime_to_timestamp(&dos_timestamp
, mod_date_raw
, mod_time_raw
);
1213 dos_timestamp
.tzcode
= DE_TZCODE_LOCAL
;
1214 de_dbg_timestamp_to_string(c
, &dos_timestamp
, timestamp_buf
, sizeof(timestamp_buf
), 0);
1215 de_dbg(c
, "mod time: %s", timestamp_buf
);
1216 apply_timestamp(c
, d
, md
, DE_TIMESTAMPIDX_MODIFY
, &dos_timestamp
, 10);
1218 dd
->crc_reported
= (u32
)de_getu32le_p(&pos
);
1219 de_dbg(c
, "crc (reported): 0x%08x", (unsigned int)dd
->crc_reported
);
1221 dd
->cmpr_size
= de_getu32le_p(&pos
);
1222 dd
->uncmpr_size
= de_getu32le_p(&pos
);
1223 de_dbg(c
, "cmpr size: %" I64_FMT
", uncmpr size: %" I64_FMT
, dd
->cmpr_size
, dd
->uncmpr_size
);
1225 fn_len
= de_getu16le_p(&pos
);
1227 extra_len
= de_getu16le_p(&pos
);
1230 comment_len
= de_getu16le_p(&pos
);
1237 md
->file_data_pos
= pos
+ fn_len
+ extra_len
;
1241 md
->disk_number_start
= de_getu16le_p(&pos
);
1243 md
->attr_i
= (unsigned int)de_getu16le_p(&pos
);
1244 ucstring_empty(descr
);
1245 describe_internal_attr(c
, md
, descr
);
1246 de_dbg(c
, "internal file attributes: 0x%04x (%s)", md
->attr_i
,
1247 ucstring_getpsz(descr
));
1249 md
->attr_e
= (unsigned int)de_getu32le_p(&pos
);
1250 de_dbg(c
, "external file attributes: 0x%08x", md
->attr_e
);
1251 de_dbg_indent(c
, 1);
1254 // The low byte is, AFAIK, *almost* universally used for MS-DOS-style
1256 unsigned int dos_attrs
= (md
->attr_e
& 0xff);
1257 ucstring_empty(descr
);
1258 de_describe_dos_attribs(c
, dos_attrs
, descr
, 0);
1259 de_dbg(c
, "%sMS-DOS attribs: 0x%02x (%s)",
1260 (md
->ver_made_by_hi
==0)?"":"(hypothetical) ",
1261 dos_attrs
, ucstring_getpsz(descr
));
1264 if((md
->attr_e
>>16) != 0) {
1265 // A number of platforms put Unix-style file attributes here, so
1266 // decode them as such whenever they are nonzero.
1267 de_dbg(c
, "%sUnix attribs: octal(%06o)",
1268 (md
->ver_made_by_hi
==3)?"":"(hypothetical) ",
1269 (unsigned int)(md
->attr_e
>>16));
1272 de_dbg_indent(c
, -1);
1274 md
->offset_of_local_header
= de_getu32le_p(&pos
);
1275 de_dbg(c
, "offset of local header: %"I64_FMT
", disk: %d", md
->offset_of_local_header
,
1276 (int)md
->disk_number_start
);
1280 de_dbg(c
, "filename_len: %d, extra_len: %d, comment_len: %d", (int)fn_len
,
1281 (int)extra_len
, (int)comment_len
);
1284 de_dbg(c
, "filename_len: %d, extra_len: %d", (int)fn_len
,
1288 *p_entry_size
= fixed_header_size
+ fn_len
+ extra_len
+ comment_len
;
1290 dd
->main_fname_pos
= pos1
+fixed_header_size
;
1291 dd
->main_fname_len
= fn_len
;
1292 do_read_filename(c
, d
, md
, dd
, pos1
+fixed_header_size
, fn_len
, utf8_flag
);
1295 do_extra_data(c
, d
, md
, dd
, pos1
+fixed_header_size
+fn_len
, extra_len
, is_central
);
1299 do_comment(c
, d
, pos1
+fixed_header_size
+fn_len
+extra_len
, comment_len
, utf8_flag
,
1300 "member file comment", "fcomment.txt");
1304 if(d
->used_offset_discrepancy
) {
1305 md
->offset_of_local_header
+= d
->offset_discrepancy
;
1306 de_dbg(c
, "assuming local header is really at %"I64_FMT
, md
->offset_of_local_header
);
1308 else if(d
->offset_discrepancy
!=0) {
1312 sig1
= (u32
)de_getu32le(md
->offset_of_local_header
);
1313 if(sig1
!=CODE_PK34
) {
1314 alt_pos
= md
->offset_of_local_header
+ d
->offset_discrepancy
;
1315 sig2
= (u32
)de_getu32le(alt_pos
);
1316 if(sig2
==CODE_PK34
) {
1317 de_warn(c
, "Local file header found at %"I64_FMT
" instead of %"I64_FMT
". "
1318 "Assuming offsets are wrong by %"I64_FMT
" bytes.",
1319 alt_pos
, md
->offset_of_local_header
, d
->offset_discrepancy
);
1320 md
->offset_of_local_header
+= d
->offset_discrepancy
;
1321 d
->used_offset_discrepancy
= 1;
1330 de_dbg_indent(c
, -1);
1331 ucstring_destroy(descr
);
1335 static struct member_data
*create_member_data(deark
*c
, lctx
*d
)
1337 struct member_data
*md
;
1339 md
= de_malloc(c
, sizeof(struct member_data
));
1340 md
->local_dir_entry_data
.fname
= ucstring_create(c
);
1341 md
->central_dir_entry_data
.fname
= ucstring_create(c
);
1345 static void destroy_member_data(deark
*c
, struct member_data
*md
)
1348 ucstring_destroy(md
->central_dir_entry_data
.fname
);
1349 ucstring_destroy(md
->local_dir_entry_data
.fname
);
1353 static i32
ucstring_lastchar(de_ucstring
*s
)
1355 if(!s
|| s
->len
<1) return 0;
1356 return s
->str
[s
->len
-1];
1359 // Things to do after both the central and local headers have been read.
1360 // E.g., extract the file.
1361 static int do_process_member(deark
*c
, lctx
*d
, struct member_data
*md
)
1365 // If for some reason we have a central-dir filename but not a local-dir
1366 // filename, use the central-dir filename.
1367 if(ucstring_isempty(md
->local_dir_entry_data
.fname
) &&
1368 ucstring_isnonempty(md
->central_dir_entry_data
.fname
))
1370 ucstring_append_ucstring(md
->local_dir_entry_data
.fname
,
1371 md
->central_dir_entry_data
.fname
);
1374 // Set the final file size and crc fields.
1375 if(md
->local_dir_entry_data
.bit_flags
& 0x0008) {
1376 if(d
->using_scanmode
) {
1377 de_err(c
, "File is incompatible with scan mode");
1381 // Indicates that certain fields are not present in the local file header,
1382 // and are instead in a "data descriptor" after the file data.
1383 // Let's hope they are also in the central file header.
1384 md
->cmpr_size
= md
->central_dir_entry_data
.cmpr_size
;
1385 md
->uncmpr_size
= md
->central_dir_entry_data
.uncmpr_size
;
1386 md
->crc_reported
= md
->central_dir_entry_data
.crc_reported
;
1389 md
->cmpr_size
= md
->local_dir_entry_data
.cmpr_size
;
1390 md
->uncmpr_size
= md
->local_dir_entry_data
.uncmpr_size
;
1391 md
->crc_reported
= md
->local_dir_entry_data
.crc_reported
;
1394 process_ext_attr(c
, d
, md
);
1396 // In some cases, detect directories by checking whether the filename ends
1398 if(!md
->is_dir
&& md
->uncmpr_size
==0 &&
1399 (d
->using_scanmode
|| (md
->ver_made_by_lo
<20)))
1401 if(ucstring_lastchar(md
->local_dir_entry_data
.fname
) == '/') {
1402 de_dbg(c
, "[assuming this is a subdirectory]");
1407 do_extract_file(c
, d
, md
);
1414 // In *entry_size, returns the size of the central dir entry.
1415 // Returns 0 if the central dir entry could not even be parsed.
1416 static int do_member_from_central_dir_entry(deark
*c
, lctx
*d
,
1417 struct member_data
*md
, i64 central_index
, i64 pos
, i64
*entry_size
)
1421 int saved_indent_level
;
1423 de_dbg_indent_save(c
, &saved_indent_level
);
1427 if(pos
>= d
->central_dir_offset
+d
->central_dir_byte_size
) {
1431 de_dbg(c
, "central dir entry #%d", (int)central_index
);
1432 de_dbg_indent(c
, 1);
1434 // Read the central dir file header
1435 if(!do_file_header(c
, d
, md
, 1, pos
, entry_size
)) {
1439 // If we were able to read the central dir file header, we might be able
1440 // to continue and read more files, even if the local file header fails.
1443 // Read the local file header
1444 if(!do_file_header(c
, d
, md
, 0, md
->offset_of_local_header
, &tmp_entry_size
)) {
1448 do_process_member(c
, d
, md
);
1451 de_dbg_indent_restore(c
, saved_indent_level
);
1455 static int do_central_dir_entry(deark
*c
, lctx
*d
,
1456 i64 central_index
, i64 pos
, i64
*entry_size
)
1458 struct member_data
*md
= NULL
;
1461 md
= create_member_data(c
, d
);
1462 ret
= do_member_from_central_dir_entry(c
, d
, md
, central_index
, pos
, entry_size
);
1463 destroy_member_data(c
, md
);
1467 static int do_local_dir_only(deark
*c
, lctx
*d
, i64 pos1
, i64
*pmember_size
)
1469 struct member_data
*md
= NULL
;
1473 md
= create_member_data(c
, d
);
1475 md
->offset_of_local_header
= pos1
;
1477 // Read the local file header
1478 if(!do_file_header(c
, d
, md
, 0, md
->offset_of_local_header
, &tmp_entry_size
)) {
1482 if(!do_process_member(c
, d
, md
)) goto done
;
1484 *pmember_size
= md
->file_data_pos
+ md
->cmpr_size
- pos1
;
1488 destroy_member_data(c
, md
);
1492 static void de_run_zip_scanmode(deark
*c
, lctx
*d
)
1496 d
->using_scanmode
= 1;
1501 i64 member_size
= 0;
1503 if(pos
> c
->infile
->len
-4) break;
1504 ret
= dbuf_search(c
->infile
, g_zipsig34
, 4, pos
, c
->infile
->len
-pos
, &foundpos
);
1507 de_dbg(c
, "zip member at %"I64_FMT
, pos
);
1508 de_dbg_indent(c
, 1);
1509 ret
= do_local_dir_only(c
, d
, pos
, &member_size
);
1510 de_dbg_indent(c
, -1);
1512 if(member_size
<1) break;
1517 static int do_central_dir(deark
*c
, lctx
*d
)
1524 pos
= d
->central_dir_offset
;
1525 de_dbg(c
, "central dir at %"I64_FMT
, pos
);
1526 de_dbg_indent(c
, 1);
1528 for(i
=0; i
<d
->central_dir_num_entries
; i
++) {
1529 if(!do_central_dir_entry(c
, d
, i
, pos
, &entry_size
)) {
1530 // TODO: Decide exactly what to do if something fails.
1538 de_dbg_indent(c
, -1);
1542 static int do_zip64_eocd(deark
*c
, lctx
*d
)
1547 int saved_indent_level
;
1548 UI ver
, ver_hi
, ver_lo
;
1550 de_dbg_indent_save(c
, &saved_indent_level
);
1552 if(d
->zip64_eocd_disknum
!=0) {
1553 de_warn(c
, "This might be a multi-disk Zip64 archive, which is not supported");
1559 pos
= d
->zip64_eocd_pos
;
1560 if(dbuf_memcmp(c
->infile
, pos
, g_zipsig66
, 4)) {
1561 de_warn(c
, "Expected Zip64 end-of-central-directory record not found at %"I64_FMT
, pos
);
1562 retval
= 1; // Maybe the eocd locator sig was a false positive?
1567 de_dbg(c
, "zip64 end-of-central-dir record at %"I64_FMT
, pos
);
1569 de_dbg_indent(c
, 1);
1571 n
= de_geti64le(pos
); pos
+= 8;
1572 de_dbg(c
, "size of zip64 eocd record: (12+)%"I64_FMT
, n
);
1574 ver
= (UI
)de_getu16le_p(&pos
);
1575 ver_hi
= (ver
&0xff00)>>8;
1576 ver_lo
= ver
&0x00ff;
1577 de_dbg(c
, "version made by: platform=%u (%s), ZIP spec=%u.%u",
1578 ver_hi
, get_platform_name(ver_hi
), (UI
)(ver_lo
/10), (UI
)(ver_lo
%10));
1580 ver
= (UI
)de_getu16le_p(&pos
);
1581 ver_hi
= (ver
&0xff00)>>8;
1582 ver_lo
= ver
&0x00ff;
1583 de_dbg(c
, "version needed: platform=%u (%s), ZIP spec=%u.%u",
1584 ver_hi
, get_platform_name(ver_hi
), (UI
)(ver_lo
/10), (UI
)(ver_lo
%10));
1586 n
= de_getu32le_p(&pos
);
1587 de_dbg(c
, "this disk num: %"I64_FMT
, n
);
1589 d
->zip64_cd_disknum
= (unsigned int)de_getu32le_p(&pos
);
1590 d
->zip64_num_centr_dir_entries_this_disk
= de_geti64le(pos
); pos
+= 8;
1591 de_dbg(c
, "central dir num entries on this disk: %"I64_FMT
, d
->zip64_num_centr_dir_entries_this_disk
);
1592 d
->zip64_num_centr_dir_entries_total
= de_geti64le(pos
); pos
+= 8;
1593 de_dbg(c
, "central dir num entries: %"I64_FMT
, d
->zip64_num_centr_dir_entries_total
);
1594 d
->zip64_centr_dir_byte_size
= de_geti64le(pos
); pos
+= 8;
1595 de_dbg(c
, "central dir size: %"I64_FMT
, d
->zip64_centr_dir_byte_size
);
1596 d
->zip64_cd_pos
= de_geti64le(pos
); pos
+= 8;
1597 de_dbg(c
, "central dir offset: %"I64_FMT
", disk: %u",
1598 d
->zip64_cd_pos
, d
->zip64_cd_disknum
);
1602 de_dbg_indent_restore(c
, saved_indent_level
);
1606 static void do_zip64_eocd_locator(deark
*c
, lctx
*d
)
1609 i64 pos
= d
->end_of_central_dir_pos
- 20;
1611 if(dbuf_memcmp(c
->infile
, pos
, g_zipsig67
, 4)) {
1614 de_dbg(c
, "zip64 eocd locator found at %"I64_FMT
, pos
);
1617 de_dbg_indent(c
, 1);
1618 d
->zip64_eocd_disknum
= (unsigned int)de_getu32le_p(&pos
);
1619 d
->zip64_eocd_pos
= de_geti64le(pos
); pos
+= 8;
1620 de_dbg(c
, "offset of zip64 eocd: %"I64_FMT
", disk: %u",
1621 d
->zip64_eocd_pos
, d
->zip64_eocd_disknum
);
1622 n
= de_getu32le_p(&pos
);
1623 de_dbg(c
, "total number of disks: %u", (unsigned int)n
);
1624 de_dbg_indent(c
, -1);
1627 static int do_end_of_central_dir(deark
*c
, lctx
*d
)
1630 i64 num_entries_this_disk
;
1631 i64 disk_num_with_central_dir_start
;
1633 i64 alt_central_dir_offset
;
1636 pos
= d
->end_of_central_dir_pos
;
1637 de_dbg(c
, "end-of-central-dir record at %"I64_FMT
, pos
);
1638 de_dbg_indent(c
, 1);
1640 d
->this_disk_num
= de_getu16le(pos
+4);
1641 de_dbg(c
, "this disk num: %"I64_FMT
, d
->this_disk_num
);
1642 disk_num_with_central_dir_start
= de_getu16le(pos
+6);
1644 num_entries_this_disk
= de_getu16le(pos
+8);
1645 de_dbg(c
, "central dir num entries on this disk: %"I64_FMT
, num_entries_this_disk
);
1646 if(d
->is_zip64
&& (num_entries_this_disk
==0xffff)) {
1647 num_entries_this_disk
= d
->zip64_num_centr_dir_entries_this_disk
;
1650 d
->central_dir_num_entries
= de_getu16le(pos
+10);
1651 d
->central_dir_byte_size
= de_getu32le(pos
+12);
1652 d
->central_dir_offset
= de_getu32le(pos
+16);
1653 de_dbg(c
, "central dir num entries: %"I64_FMT
, d
->central_dir_num_entries
);
1654 if(d
->is_zip64
&& (d
->central_dir_num_entries
==0xffff)) {
1655 d
->central_dir_num_entries
= d
->zip64_num_centr_dir_entries_total
;
1658 de_dbg(c
, "central dir size: %"I64_FMT
, d
->central_dir_byte_size
);
1659 if(d
->is_zip64
&& (d
->central_dir_byte_size
==0xffffffffLL
)) {
1660 d
->central_dir_byte_size
= d
->zip64_centr_dir_byte_size
;
1663 de_dbg(c
, "central dir offset: %"I64_FMT
", disk: %"I64_FMT
, d
->central_dir_offset
,
1664 disk_num_with_central_dir_start
);
1665 if(d
->is_zip64
&& (d
->central_dir_offset
==0xffffffffLL
)) {
1666 d
->central_dir_offset
= d
->zip64_cd_pos
;
1669 comment_length
= de_getu16le(pos
+20);
1670 de_dbg(c
, "comment length: %d", (int)comment_length
);
1671 if(comment_length
>0) {
1672 // The comment for the whole .ZIP file presumably has to use
1673 // cp437 encoding. There's no flag that could indicate otherwise.
1674 do_comment(c
, d
, pos
+22, comment_length
, 0,
1675 "ZIP file comment", "comment.txt");
1678 // TODO: Figure out exactly how to detect disk spanning.
1679 if(disk_num_with_central_dir_start
!=d
->this_disk_num
||
1680 (d
->is_zip64
&& d
->zip64_eocd_disknum
!=d
->this_disk_num
))
1682 de_err(c
, "Disk spanning not supported");
1686 if(d
->this_disk_num
!=0) {
1687 de_warn(c
, "This ZIP file might be part of a multi-part archive, and "
1688 "might not be supported correctly");
1691 if(num_entries_this_disk
!=d
->central_dir_num_entries
) {
1692 de_warn(c
, "This ZIP file might not be supported correctly "
1693 "(number-of-entries-this-disk=%d, number-of-entries-total=%d)",
1694 (int)num_entries_this_disk
, (int)d
->central_dir_num_entries
);
1697 alt_central_dir_offset
=
1698 (d
->is_zip64
? d
->zip64_eocd_pos
: d
->end_of_central_dir_pos
) -
1699 d
->central_dir_byte_size
;
1701 if(alt_central_dir_offset
!= d
->central_dir_offset
) {
1704 de_warn(c
, "Inconsistent central directory offset. Reported to be %"I64_FMT
", "
1705 "but based on its reported size, it should be %"I64_FMT
".",
1706 d
->central_dir_offset
, alt_central_dir_offset
);
1708 sig
= (u32
)de_getu32le(alt_central_dir_offset
);
1709 if(sig
==CODE_PK12
) {
1710 d
->offset_discrepancy
= alt_central_dir_offset
- d
->central_dir_offset
;
1711 de_dbg(c
, "likely central dir found at %"I64_FMT
, alt_central_dir_offset
);
1712 d
->central_dir_offset
= alt_central_dir_offset
;
1719 de_dbg_indent(c
, -1);
1723 static void de_run_zip_normally(deark
*c
, lctx
*d
)
1727 if(c
->detection_data
&& c
->detection_data
->zip_eocd_looked_for
) {
1728 eocd_found
= (int)c
->detection_data
->zip_eocd_found
;
1729 d
->end_of_central_dir_pos
= c
->detection_data
->zip_eocd_pos
;
1732 eocd_found
= fmtutil_find_zip_eocd(c
, c
->infile
, &d
->end_of_central_dir_pos
);
1735 de_err(c
, "Not a ZIP file");
1739 de_dbg(c
, "end-of-central-dir record found at %"I64_FMT
,
1740 d
->end_of_central_dir_pos
);
1742 do_zip64_eocd_locator(c
, d
);
1745 if(!do_zip64_eocd(c
, d
)) goto done
;
1749 de_declare_fmt(c
, "ZIP-Zip64");
1751 de_declare_fmt(c
, "ZIP");
1753 if(!do_end_of_central_dir(c
, d
)) {
1757 if(!do_central_dir(c
, d
)) {
1765 static void de_run_zip(deark
*c
, de_module_params
*mparams
)
1769 d
= de_malloc(c
, sizeof(lctx
));
1771 d
->crco
= de_crcobj_create(c
, DE_CRCOBJ_CRC32_IEEE
);
1773 if(de_get_ext_option(c
, "zip:scanmode")) {
1774 de_run_zip_scanmode(c
, d
);
1777 de_run_zip_normally(c
, d
);
1781 de_crcobj_destroy(d
->crco
);
1786 static int de_identify_zip(deark
*c
)
1792 has_zip_ext
= de_input_file_has_ext(c
, "zip");
1797 if(!de_memcmp(b
, g_zipsig34
, 4)) {
1798 return has_zip_ext
? 100 : 90;
1800 if(b
[0]=='M' && b
[1]=='Z') has_mz_sig
= 1;
1802 if(c
->infile
->len
>= 22) {
1803 de_read(b
, c
->infile
->len
- 22, 4);
1804 if(!de_memcmp(b
, g_zipsig56
, 4)) {
1805 return has_zip_ext
? 100 : 19;
1809 // Things to consider:
1810 // * We want de_fmtutil_find_zip_eocd() to be called no more than once, and
1811 // only on files that for some reason we suspect could be ZIP files.
1812 // * If the user disables exe format detection (e.g. with "-onlydetect zip"),
1813 // we want self-extracting-ZIP .exe files to be detected as ZIP instead.
1814 // * And we want the above to work even if the file has a ZIP file comment,
1815 // making it expensive to detect as ZIP.
1817 // Tests below can't return a confidence higher than this.
1818 if(c
->detection_data
->best_confidence_so_far
>= 19) return 0;
1822 if(has_mz_sig
|| has_zip_ext
) {
1825 c
->detection_data
->zip_eocd_looked_for
= 1;
1826 if(fmtutil_find_zip_eocd(c
, c
->infile
, &eocd_pos
)) {
1827 c
->detection_data
->zip_eocd_found
= 1;
1828 c
->detection_data
->zip_eocd_pos
= eocd_pos
;
1836 static void de_help_zip(deark
*c
)
1838 de_msg(c
, "-opt zip:scanmode : Do not use the \"central directory\"");
1839 de_msg(c
, "-opt zip:implodebug : Behave like PKZIP 1.01/1.02");
1842 void de_module_zip(deark
*c
, struct deark_module_info
*mi
)
1845 mi
->desc
= "ZIP archive";
1846 mi
->run_fn
= de_run_zip
;
1847 mi
->identify_fn
= de_identify_zip
;
1848 mi
->help_fn
= de_help_zip
;