1 // This file is part of Deark.
2 // Copyright (C) 2016 Jason Summers
3 // See the file COPYING for terms of use.
7 #include <deark-config.h>
8 #include <deark-private.h>
9 #include <deark-fmtutil.h>
11 DE_DECLARE_MODULE(de_module_zip
);
13 struct localctx_struct
;
14 typedef struct localctx_struct lctx
;
16 #define CODE_PK12 0x02014b50U
17 #define CODE_PK34 0x04034b50U
18 static const u8 g_zipsig34
[4] = {'P', 'K', 0x03, 0x04};
19 static const u8 g_zipsig56
[4] = {'P', 'K', 0x05, 0x06};
20 static const u8 g_zipsig66
[4] = {'P', 'K', 0x06, 0x06};
21 static const u8 g_zipsig67
[4] = {'P', 'K', 0x06, 0x07};
23 struct compression_params
{
24 // ZIP-specific params (not in de_dfilter_*_params) that may be needed to
25 // to decompress something.
27 unsigned int bit_flags
;
30 typedef void (*decompressor_fn
)(deark
*c
, lctx
*d
, struct compression_params
*cparams
,
31 struct de_dfilter_in_params
*dcmpri
, struct de_dfilter_out_params
*dcmpro
,
32 struct de_dfilter_results
*dres
);
34 struct cmpr_meth_info
{
38 decompressor_fn decompressor
;
41 struct dir_entry_data
{
42 unsigned int ver_needed
;
43 unsigned int ver_needed_hi
, ver_needed_lo
;
44 i64 cmpr_size
, uncmpr_size
;
46 const struct cmpr_meth_info
*cmi
;
47 unsigned int bit_flags
;
54 struct timestamp_data
{
55 struct de_timestamp ts
; // The best timestamp of this type found so far
60 unsigned int ver_made_by
;
61 unsigned int ver_made_by_hi
, ver_made_by_lo
;
62 unsigned int attr_i
, attr_e
;
63 i64 offset_of_local_header
;
64 i64 disk_number_start
;
70 struct timestamp_data tsdata
[DE_TIMESTAMPIDX_COUNT
];
72 struct de_riscos_file_attrs rfa
;
74 struct dir_entry_data central_dir_entry_data
;
75 struct dir_entry_data local_dir_entry_data
;
77 i64 cmpr_size
, uncmpr_size
;
81 struct extra_item_type_info_struct
;
83 struct extra_item_info_struct
{
87 const struct extra_item_type_info_struct
*eiti
;
88 struct member_data
*md
;
89 struct dir_entry_data
*dd
;
93 struct localctx_struct
{
94 de_encoding default_enc_for_filenames
;
95 de_encoding default_enc_for_comments
;
96 i64 end_of_central_dir_pos
;
97 i64 central_dir_num_entries
;
98 i64 central_dir_byte_size
;
99 i64 central_dir_offset
;
103 i64 zip64_num_centr_dir_entries_this_disk
;
104 i64 zip64_num_centr_dir_entries_total
;
105 i64 zip64_centr_dir_byte_size
;
106 unsigned int zip64_eocd_disknum
;
107 unsigned int zip64_cd_disknum
;
108 i64 offset_discrepancy
;
109 int used_offset_discrepancy
;
112 struct de_crcobj
*crco
;
115 typedef void (*extrafield_decoder_fn
)(deark
*c
, lctx
*d
,
116 struct extra_item_info_struct
*eii
);
118 static int is_compression_method_supported(lctx
*d
, const struct cmpr_meth_info
*cmi
)
120 if(cmi
&& cmi
->decompressor
) return 1;
124 static void do_decompress_shrink(deark
*c
, lctx
*d
, struct compression_params
*cparams
,
125 struct de_dfilter_in_params
*dcmpri
, struct de_dfilter_out_params
*dcmpro
,
126 struct de_dfilter_results
*dres
)
128 fmtutil_decompress_zip_shrink(c
, dcmpri
, dcmpro
, dres
, NULL
);
131 static void do_decompress_reduce(deark
*c
, lctx
*d
, struct compression_params
*cparams
,
132 struct de_dfilter_in_params
*dcmpri
, struct de_dfilter_out_params
*dcmpro
,
133 struct de_dfilter_results
*dres
)
135 struct de_zipreduce_params params
;
137 de_zeromem(¶ms
, sizeof(struct de_zipreduce_params
));
138 params
.cmpr_factor
= (unsigned int)(cparams
->cmpr_meth
-1);
139 fmtutil_decompress_zip_reduce(c
, dcmpri
, dcmpro
, dres
, ¶ms
);
142 static void do_decompress_implode(deark
*c
, lctx
*d
, struct compression_params
*cparams
,
143 struct de_dfilter_in_params
*dcmpri
, struct de_dfilter_out_params
*dcmpro
,
144 struct de_dfilter_results
*dres
)
146 struct de_zipimplode_params params
;
148 de_zeromem(¶ms
, sizeof(struct de_zipimplode_params
));
149 params
.bit_flags
= cparams
->bit_flags
;
150 params
.mml_bug
= (u8
)de_get_ext_option_bool(c
, "zip:implodebug", 0);
151 fmtutil_decompress_zip_implode(c
, dcmpri
, dcmpro
, dres
, ¶ms
);
154 static void do_decompress_deflate(deark
*c
, lctx
*d
, struct compression_params
*cparams
,
155 struct de_dfilter_in_params
*dcmpri
, struct de_dfilter_out_params
*dcmpro
,
156 struct de_dfilter_results
*dres
)
158 struct de_deflate_params inflparams
;
160 de_zeromem(&inflparams
, sizeof(struct de_deflate_params
));
161 if(cparams
->cmpr_meth
==9) {
162 inflparams
.flags
|= DE_DEFLATEFLAG_DEFLATE64
;
164 fmtutil_decompress_deflate_ex(c
, dcmpri
, dcmpro
, dres
, &inflparams
);
167 static void do_decompress_dclimplode(deark
*c
, lctx
*d
, struct compression_params
*cparams
,
168 struct de_dfilter_in_params
*dcmpri
, struct de_dfilter_out_params
*dcmpro
,
169 struct de_dfilter_results
*dres
)
171 fmtutil_dclimplode_codectype1(c
, dcmpri
, dcmpro
, dres
, NULL
);
174 static void do_decompress_stored(deark
*c
, lctx
*d
, struct compression_params
*cparams
,
175 struct de_dfilter_in_params
*dcmpri
, struct de_dfilter_out_params
*dcmpro
,
176 struct de_dfilter_results
*dres
)
178 fmtutil_decompress_uncompressed(c
, dcmpri
, dcmpro
, dres
, 0);
181 static const struct cmpr_meth_info cmpr_meth_info_arr
[] = {
182 { 0, 0x00, "stored", do_decompress_stored
},
183 { 1, 0x00, "shrink", do_decompress_shrink
},
184 { 2, 0x00, "reduce, CF=1", do_decompress_reduce
},
185 { 3, 0x00, "reduce, CF=2", do_decompress_reduce
},
186 { 4, 0x00, "reduce, CF=3", do_decompress_reduce
},
187 { 5, 0x00, "reduce, CF=4", do_decompress_reduce
},
188 { 6, 0x00, "implode", do_decompress_implode
},
189 { 8, 0x00, "deflate", do_decompress_deflate
},
190 { 9, 0x00, "deflate64", do_decompress_deflate
},
191 { 10, 0x00, "PKWARE DCL implode", do_decompress_dclimplode
},
192 { 12, 0x00, "bzip2", NULL
},
193 { 14, 0x00, "LZMA", NULL
},
194 { 16, 0x00, "IBM z/OS CMPSC", NULL
},
195 { 18, 0x00, "IBM TERSE (new)", NULL
},
196 { 19, 0x00, "IBM LZ77 z Architecture", NULL
},
197 { 94, 0x00, "MP3", NULL
},
198 { 95, 0x00, "XZ", NULL
},
199 { 96, 0x00, "JPEG", NULL
},
200 { 97, 0x00, "WavPack", NULL
},
201 { 98, 0x00, "PPMd", NULL
},
202 { 99, 0x00, "AES", NULL
}
205 static const struct cmpr_meth_info
*get_cmpr_meth_info(int cmpr_meth
)
209 for(k
=0; k
<DE_ARRAYCOUNT(cmpr_meth_info_arr
); k
++) {
210 if(cmpr_meth_info_arr
[k
].cmpr_meth
== cmpr_meth
) {
211 return &cmpr_meth_info_arr
[k
];
217 // Decompress some data, using the given ZIP compression method.
218 // On failure, dres->errcode will be set.
219 static void do_decompress_lowlevel(deark
*c
, lctx
*d
, struct de_dfilter_in_params
*dcmpri
,
220 struct de_dfilter_out_params
*dcmpro
, struct de_dfilter_results
*dres
,
221 int cmpr_meth
, const struct cmpr_meth_info
*cmi
, unsigned int bit_flags
)
223 struct compression_params cparams
;
225 de_zeromem(&cparams
, sizeof(struct compression_params
));
226 cparams
.cmpr_meth
= cmpr_meth
;
227 cparams
.bit_flags
= bit_flags
;
229 if(cmi
&& cmi
->decompressor
) {
230 cmi
->decompressor(c
, d
, &cparams
, dcmpri
, dcmpro
, dres
);
233 de_internal_err_nonfatal(c
, "Unsupported compression method (%d)", cmpr_meth
);
234 de_dfilter_set_generic_error(c
, dres
, NULL
);
238 static void our_writelistener_cb(dbuf
*f
, void *userdata
, const u8
*buf
, i64 buf_len
)
240 struct de_crcobj
*crco
= (struct de_crcobj
*)userdata
;
241 de_crcobj_addbuf(crco
, buf
, buf_len
);
244 // Decompress a Zip member file, writing to outf.
245 // Does CRC calculation.
246 // Reports errors to the user.
247 // Only call this if the compression method is supported -- Call
248 // is_compression_method_supported() first.
249 // Assumes ldd->cmi has been set, by calling get_cmpr_meth_info().
250 static int do_decompress_member(deark
*c
, lctx
*d
, struct member_data
*md
, dbuf
*outf
)
252 struct dir_entry_data
*ldd
= &md
->local_dir_entry_data
;
253 struct de_dfilter_in_params dcmpri
;
254 struct de_dfilter_out_params dcmpro
;
255 struct de_dfilter_results dres
;
259 de_dfilter_init_objects(c
, &dcmpri
, &dcmpro
, &dres
);
260 dcmpri
.f
= c
->infile
;
261 dcmpri
.pos
= md
->file_data_pos
;
262 dcmpri
.len
= md
->cmpr_size
;
264 dcmpro
.expected_len
= md
->uncmpr_size
;
265 dcmpro
.len_known
= 1;
267 dbuf_set_writelistener(outf
, our_writelistener_cb
, (void*)d
->crco
);
268 de_crcobj_reset(d
->crco
);
270 do_decompress_lowlevel(c
, d
, &dcmpri
, &dcmpro
, &dres
, ldd
->cmpr_meth
,
271 ldd
->cmi
, ldd
->bit_flags
);
274 de_err(c
, "%s: %s", ucstring_getpsz_d(ldd
->fname
),
275 de_dfilter_get_errmsg(c
, &dres
));
279 crc_calculated
= de_crcobj_getval(d
->crco
);
280 de_dbg(c
, "crc (calculated): 0x%08x", (unsigned int)crc_calculated
);
282 if(crc_calculated
!= md
->crc_reported
) {
283 de_err(c
, "%s: CRC check failed: Expected 0x%08x, got 0x%08x",
284 ucstring_getpsz_d(ldd
->fname
),
285 (unsigned int)md
->crc_reported
, (unsigned int)crc_calculated
);
286 if(dres
.bytes_consumed_valid
&& (dres
.bytes_consumed
< dcmpri
.len
)) {
287 de_info(c
, "Note: Only used %"I64_FMT
" of %"I64_FMT
" compressed bytes.",
288 dres
.bytes_consumed
, dcmpri
.len
);
298 // A variation of do_decompress_member() -
299 // works for Finder attribute data, and OS/2 extended attributes.
300 // Only call this if the compression method is supported -- Call
301 // is_compression_method_supported() first.
302 // outf is assumed to be a membuf.
303 // dcflags: 0x1 = Validate the crc_reported param.
304 static int do_decompress_attrib_data(deark
*c
, lctx
*d
,
305 i64 dpos
, i64 dlen
, dbuf
*outf
, i64 uncmprsize
, u32 crc_reported
,
306 int cmpr_meth
, const struct cmpr_meth_info
*cmi
, UI flags
, const char *name
)
308 struct de_dfilter_in_params dcmpri
;
309 struct de_dfilter_out_params dcmpro
;
310 struct de_dfilter_results dres
;
314 de_dfilter_init_objects(c
, &dcmpri
, &dcmpro
, &dres
);
315 dcmpri
.f
= c
->infile
;
319 dcmpro
.expected_len
= uncmprsize
;
320 dcmpro
.len_known
= 1;
322 do_decompress_lowlevel(c
, d
, &dcmpri
, &dcmpro
, &dres
, cmpr_meth
, cmi
, 0);
324 goto done
; // Could report the error, but this isn't critical data
328 de_crcobj_reset(d
->crco
);
329 de_crcobj_addslice(d
->crco
, outf
, 0, outf
->len
);
330 crc_calculated
= de_crcobj_getval(d
->crco
);
331 de_dbg(c
, "%s crc (calculated): 0x%08x", name
, (UI
)crc_calculated
);
332 if(crc_calculated
!= crc_reported
) goto done
;
340 // As we read a member file's attributes, we may encounter multiple timestamps,
341 // which can differ in their precision, and whether they use UTC.
342 // This function is called to remember the "best" file modification time
343 // encountered so far.
344 static void apply_timestamp(deark
*c
, lctx
*d
, struct member_data
*md
, int tstype
,
345 const struct de_timestamp
*ts
, int quality
)
347 if(!ts
->is_valid
) return;
349 // In case of a tie, we prefer the later timestamp that we encountered.
350 // This makes local headers have priority over central headers, for
352 if(quality
>= md
->tsdata
[tstype
].quality
) {
353 md
->tsdata
[tstype
].ts
= *ts
;
354 md
->tsdata
[tstype
].quality
= quality
;
358 static void do_read_filename(deark
*c
, lctx
*d
,
359 struct member_data
*md
, struct dir_entry_data
*dd
,
360 i64 pos
, i64 len
, int utf8_flag
)
362 de_encoding from_encoding
;
364 ucstring_empty(dd
->fname
);
365 from_encoding
= utf8_flag
? DE_ENCODING_UTF8
: d
->default_enc_for_filenames
;
366 dbuf_read_to_ucstring(c
->infile
, pos
, len
, dd
->fname
, 0, from_encoding
);
367 de_dbg(c
, "filename: \"%s\"", ucstring_getpsz_d(dd
->fname
));
370 static void do_comment_display(deark
*c
, lctx
*d
, i64 pos
, i64 len
, de_ext_encoding ee
,
373 de_ucstring
*s
= NULL
;
375 s
= ucstring_create(c
);
376 dbuf_read_to_ucstring(c
->infile
, pos
, len
, s
, 0, ee
);
377 de_dbg(c
, "%s: \"%s\"", name
, ucstring_getpsz_d(s
));
381 static void do_comment_extract(deark
*c
, lctx
*d
, i64 pos
, i64 len
, de_ext_encoding ee
,
385 de_ucstring
*s
= NULL
;
387 f
= dbuf_create_output_file(c
, ext
, NULL
, DE_CREATEFLAG_IS_AUX
);
388 s
= ucstring_create(c
);
389 dbuf_read_to_ucstring(c
->infile
, pos
, len
, s
, 0, ee
);
390 ucstring_write_as_utf8(c
, s
, f
, 1);
395 static void do_comment(deark
*c
, lctx
*d
, i64 pos
, i64 len
, int utf8_flag
,
396 const char *name
, const char *ext
)
401 ee
= utf8_flag
? DE_ENCODING_UTF8
: d
->default_enc_for_comments
;
402 ee
= DE_EXTENC_MAKE(ee
, DE_ENCSUBTYPE_HYBRID
);
403 if(c
->extract_level
>=2) {
404 do_comment_extract(c
, d
, pos
, len
, ee
, ext
);
407 do_comment_display(c
, d
, pos
, len
, ee
, name
);
411 static void read_unix_timestamp(deark
*c
, lctx
*d
, i64 pos
,
412 struct de_timestamp
*timestamp
, const char *name
)
415 char timestamp_buf
[64];
417 t
= de_geti32le(pos
);
418 de_unix_time_to_timestamp(t
, timestamp
, 0x1);
419 de_dbg_timestamp_to_string(c
, timestamp
, timestamp_buf
, sizeof(timestamp_buf
), 0);
420 de_dbg(c
, "%s: %"I64_FMT
" (%s)", name
, t
, timestamp_buf
);
423 static void read_FILETIME(deark
*c
, lctx
*d
, i64 pos
,
424 struct de_timestamp
*timestamp
, const char *name
)
427 char timestamp_buf
[64];
429 t_FILETIME
= de_geti64le(pos
);
430 de_FILETIME_to_timestamp(t_FILETIME
, timestamp
, 0x1);
431 de_dbg_timestamp_to_string(c
, timestamp
, timestamp_buf
, sizeof(timestamp_buf
), 0);
432 de_dbg(c
, "%s: %s", name
, timestamp_buf
);
435 static void ef_zip64extinfo(deark
*c
, lctx
*d
, struct extra_item_info_struct
*eii
)
440 if(pos
+8 > eii
->dpos
+eii
->dlen
) goto done
;
441 n
= de_geti64le(pos
); pos
+= 8;
442 de_dbg(c
, "orig uncmpr file size: %"I64_FMT
, n
);
443 if(eii
->dd
->uncmpr_size
==0xffffffffLL
) {
444 eii
->dd
->uncmpr_size
= n
;
447 if(pos
+8 > eii
->dpos
+eii
->dlen
) goto done
;
448 n
= de_geti64le(pos
); pos
+= 8;
449 de_dbg(c
, "cmpr data size: %"I64_FMT
, n
);
450 if(eii
->dd
->cmpr_size
==0xffffffffLL
) {
451 eii
->dd
->cmpr_size
= n
;
454 if(pos
+8 > eii
->dpos
+eii
->dlen
) goto done
;
455 n
= de_geti64le(pos
); pos
+= 8;
456 de_dbg(c
, "offset of local header record: %"I64_FMT
, n
);
458 if(pos
+4 > eii
->dpos
+eii
->dlen
) goto done
;
459 n
= de_getu32le_p(&pos
);
460 de_dbg(c
, "disk start number: %"I64_FMT
, n
);
465 // Extra field 0x5455
466 static void ef_extended_timestamp(deark
*c
, lctx
*d
, struct extra_item_info_struct
*eii
)
471 int has_mtime
, has_atime
, has_ctime
;
472 struct de_timestamp timestamp_tmp
;
474 endpos
= pos
+ eii
->dlen
;
475 if(pos
+1>endpos
) return;
476 flags
= de_getbyte_p(&pos
);
477 de_dbg2(c
, "flags: 0x%02x", (UI
)flags
);
478 if(eii
->is_central
) {
479 has_mtime
= (eii
->dlen
>=5);
484 has_mtime
= (flags
& 0x01)?1:0;
485 has_atime
= (flags
& 0x02)?1:0;
486 has_ctime
= (flags
& 0x04)?1:0;
489 if(pos
+4>endpos
) return;
490 read_unix_timestamp(c
, d
, pos
, ×tamp_tmp
, "mtime");
491 apply_timestamp(c
, d
, eii
->md
, DE_TIMESTAMPIDX_MODIFY
, ×tamp_tmp
, 50);
495 if(pos
+4>endpos
) return;
496 read_unix_timestamp(c
, d
, pos
, ×tamp_tmp
, "atime");
497 apply_timestamp(c
, d
, eii
->md
, DE_TIMESTAMPIDX_ACCESS
, ×tamp_tmp
, 50);
501 if(pos
+4>endpos
) return;
502 read_unix_timestamp(c
, d
, pos
, ×tamp_tmp
, "creation time");
503 apply_timestamp(c
, d
, eii
->md
, DE_TIMESTAMPIDX_CREATE
, ×tamp_tmp
, 50);
508 // Extra field 0x5855
509 static void ef_infozip1(deark
*c
, lctx
*d
, struct extra_item_info_struct
*eii
)
512 struct de_timestamp timestamp_tmp
;
514 if(eii
->dlen
<8) return;
515 read_unix_timestamp(c
, d
, eii
->dpos
, ×tamp_tmp
, "atime");
516 apply_timestamp(c
, d
, eii
->md
, DE_TIMESTAMPIDX_ACCESS
, ×tamp_tmp
, 45);
517 read_unix_timestamp(c
, d
, eii
->dpos
+4, ×tamp_tmp
, "mtime");
518 apply_timestamp(c
, d
, eii
->md
, DE_TIMESTAMPIDX_MODIFY
, ×tamp_tmp
, 45);
519 if(!eii
->is_central
&& eii
->dlen
>=12) {
520 uidnum
= de_getu16le(eii
->dpos
+8);
521 gidnum
= de_getu16le(eii
->dpos
+10);
522 de_dbg(c
, "uid: %d, gid: %d", (int)uidnum
, (int)gidnum
);
526 // Extra field 0x7075 - Info-ZIP Unicode Path
527 static void ef_unicodepath(deark
*c
, lctx
*d
, struct extra_item_info_struct
*eii
)
530 de_ucstring
*fn
= NULL
;
532 u32 crc_reported
, crc_calculated
;
534 if(eii
->dlen
<1) goto done
;
535 ver
= de_getbyte(eii
->dpos
);
536 de_dbg(c
, "version: %u", (unsigned int)ver
);
537 if(ver
!=1) goto done
;
538 if(eii
->dlen
<6) goto done
;
539 crc_reported
= (u32
)de_getu32le(eii
->dpos
+1);
540 de_dbg(c
, "name-crc (reported): 0x%08x", (unsigned int)crc_reported
);
541 fn
= ucstring_create(c
);
542 fnlen
= eii
->dlen
- 5;
543 dbuf_read_to_ucstring(c
->infile
, eii
->dpos
+5, fnlen
, fn
, 0, DE_ENCODING_UTF8
);
544 de_dbg(c
, "unicode name: \"%s\"", ucstring_getpsz_d(fn
));
546 // Need to go back and calculate a CRC of the main filename. This is
547 // protection against the case where a ZIP editor may have changed the
548 // original filename, but retained a now-orphaned Unicode Path field.
549 de_crcobj_reset(d
->crco
);
550 de_crcobj_addslice(d
->crco
, c
->infile
, eii
->dd
->main_fname_pos
, eii
->dd
->main_fname_len
);
551 crc_calculated
= de_crcobj_getval(d
->crco
);
552 de_dbg(c
, "name-crc (calculated): 0x%08x", (unsigned int)crc_calculated
);
554 if(crc_calculated
== crc_reported
) {
555 ucstring_empty(eii
->dd
->fname
);
556 ucstring_append_ucstring(eii
->dd
->fname
, fn
);
560 ucstring_destroy(fn
);
563 // Extra field 0x7855
564 static void ef_infozip2(deark
*c
, lctx
*d
, struct extra_item_info_struct
*eii
)
568 if(eii
->is_central
) return;
569 if(eii
->dlen
<4) return;
570 uidnum
= de_getu16le(eii
->dpos
);
571 gidnum
= de_getu16le(eii
->dpos
+2);
572 de_dbg(c
, "uid: %d, gid: %d", (int)uidnum
, (int)gidnum
);
575 // Extra field 0x7875
576 static void ef_infozip3(deark
*c
, lctx
*d
, struct extra_item_info_struct
*eii
)
584 endpos
= pos
+eii
->dlen
;
586 if(pos
+1>endpos
) return;
587 ver
= de_getbyte_p(&pos
);
588 de_dbg(c
, "version: %d", (int)ver
);
591 if(pos
+1>endpos
) return;
592 sz
= (i64
)de_getbyte_p(&pos
);
593 if(pos
+sz
>endpos
) return;
594 uidnum
= dbuf_getint_ext(c
->infile
, pos
, (unsigned int)sz
, 1, 0);
597 if(pos
+1>endpos
) return;
598 sz
= (i64
)de_getbyte_p(&pos
);
599 if(pos
+sz
>endpos
) return;
600 gidnum
= dbuf_getint_ext(c
->infile
, pos
, (unsigned int)sz
, 1, 0);
603 de_dbg(c
, "uid: %d, gid: %d", (int)uidnum
, (int)gidnum
);
606 // Extra field 0x000a
607 static void ef_ntfs(deark
*c
, lctx
*d
, struct extra_item_info_struct
*eii
)
614 struct de_timestamp timestamp_tmp
;
616 endpos
= pos
+eii
->dlen
;
617 pos
+= 4; // skip reserved field
620 if(pos
+4>endpos
) break;
621 attr_tag
= de_getu16le_p(&pos
);
622 attr_size
= de_getu16le_p(&pos
);
623 if(attr_tag
==0x0001) name
="NTFS filetimes";
625 de_dbg(c
, "tag: 0x%04x (%s), dlen: %d", (unsigned int)attr_tag
, name
,
627 if(pos
+attr_size
>endpos
) break;
630 if(attr_tag
==0x0001 && attr_size
>=24) {
631 read_FILETIME(c
, d
, pos
, ×tamp_tmp
, "mtime");
632 apply_timestamp(c
, d
, eii
->md
, DE_TIMESTAMPIDX_MODIFY
, ×tamp_tmp
, 90);
633 read_FILETIME(c
, d
, pos
+8, ×tamp_tmp
, "atime");
634 apply_timestamp(c
, d
, eii
->md
, DE_TIMESTAMPIDX_ACCESS
, ×tamp_tmp
, 90);
635 read_FILETIME(c
, d
, pos
+16, ×tamp_tmp
, "creation time");
636 apply_timestamp(c
, d
, eii
->md
, DE_TIMESTAMPIDX_CREATE
, ×tamp_tmp
, 90);
638 de_dbg_indent(c
, -1);
644 // Extra field 0x0009
645 static void ef_os2(deark
*c
, lctx
*d
, struct extra_item_info_struct
*eii
)
653 const struct cmpr_meth_info
*cmi
= NULL
;
654 const char *name
= "OS/2 ext. attr. data";
655 dbuf
*attr_data
= NULL
;
656 de_module_params
*mparams
= NULL
;
659 endpos
= pos
+eii
->dlen
;
660 if(pos
+4>endpos
) goto done
;
661 ulen
= de_getu32le_p(&pos
);
662 de_dbg(c
, "uncmpr ext attr data size: %"I64_FMT
, ulen
);
663 if(eii
->is_central
) goto done
;
665 if(pos
+2>endpos
) goto done
;
666 cmpr_meth
= (int)de_getu16le_p(&pos
);
667 de_dbg(c
, "ext attr cmpr method: %d", cmpr_meth
);
669 if(pos
+4>endpos
) goto done
;
670 crc_reported
= (u32
)de_getu32le_p(&pos
);
671 de_dbg(c
, "ext attr crc (reported): 0x%08x", (unsigned int)crc_reported
);
673 cmpr_attr_size
= endpos
-pos
;
674 de_dbg(c
, "cmpr ext attr data at %"I64_FMT
", len=%"I64_FMT
, pos
, cmpr_attr_size
);
675 if(pos
+ cmpr_attr_size
> endpos
) goto done
;
677 cmi
= get_cmpr_meth_info(cmpr_meth
);
678 if(cmpr_meth
==6 || !is_compression_method_supported(d
, cmi
)) {
679 de_warn(c
, "%s: Unsupported compression method: %d (%s)",
680 name
, cmpr_meth
, (cmi
? cmi
->name
: "?"));
684 attr_data
= dbuf_create_membuf(c
, ulen
, 0x1);
685 ret
= do_decompress_attrib_data(c
, d
, pos
, cmpr_attr_size
,
686 attr_data
, ulen
, crc_reported
, cmpr_meth
, cmi
, 0x1, name
);
688 de_warn(c
, "Failed to decompress %s", name
);
692 // attr_data contains an OS/2 extended attribute structure (FEA2LIST)
693 mparams
= de_malloc(c
, sizeof(de_module_params
));
694 mparams
->in_params
.codes
= "L";
695 de_dbg(c
, "decoding OS/2 ext. attribs., unc. len=%"I64_FMT
, attr_data
->len
);
697 de_run_module_by_id_on_slice(c
, "ea_data", mparams
, attr_data
, 0, attr_data
->len
);
698 de_dbg_indent(c
, -1);
701 dbuf_close(attr_data
);
705 // Extra field 0x2705 (ZipIt Macintosh 1.3.5+)
706 static void ef_zipitmac_2705(deark
*c
, lctx
*d
, struct extra_item_info_struct
*eii
)
708 struct de_fourcc sig
;
709 struct de_fourcc filetype
;
710 struct de_fourcc creator
;
712 if(eii
->dlen
<4) goto done
;
713 dbuf_read_fourcc(c
->infile
, eii
->dpos
, &sig
, 4, 0x0);
714 de_dbg(c
, "signature: '%s'", sig
.id_dbgstr
);
715 if(sig
.id
!=0x5a504954U
) goto done
; // expecting 'ZPIT'
716 if(eii
->dlen
<12) goto done
;
717 dbuf_read_fourcc(c
->infile
, eii
->dpos
+4, &filetype
, 4, 0x0);
718 de_dbg(c
, "filetype: '%s'", filetype
.id_dbgstr
);
719 dbuf_read_fourcc(c
->infile
, eii
->dpos
+8, &creator
, 4, 0x0);
720 de_dbg(c
, "creator: '%s'", creator
.id_dbgstr
);
726 // The time will be returned in the caller-supplied 'ts'
727 static void handle_mac_time(deark
*c
, lctx
*d
,
728 i64 mt_raw
, i64 mt_offset
,
729 struct de_timestamp
*ts
, const char *name
)
731 char timestamp_buf
[64];
732 de_mac_time_to_timestamp(mt_raw
- mt_offset
, ts
);
733 ts
->tzcode
= DE_TZCODE_UTC
;
734 de_dbg_timestamp_to_string(c
, ts
, timestamp_buf
, sizeof(timestamp_buf
), 0);
735 de_dbg(c
, "%s: %"I64_FMT
" %+"I64_FMT
" (%s)", name
,
736 mt_raw
, -mt_offset
, timestamp_buf
);
739 // Extra field 0x334d (Info-ZIP Macintosh)
740 static void ef_infozipmac(deark
*c
, lctx
*d
, struct extra_item_info_struct
*eii
)
748 const struct cmpr_meth_info
*cmi
= NULL
;
749 struct de_fourcc filetype
;
750 struct de_fourcc creator
;
751 de_ucstring
*flags_str
= NULL
;
752 dbuf
*attr_data
= NULL
;
755 i64 create_time_offset
;
759 i64 backup_time_offset
;
760 struct de_timestamp tmp_timestamp
;
762 u32 crc_reported
= 0;
764 struct de_stringreaderdata
*srd
;
766 if(eii
->dlen
<14) goto done
;
768 ulen
= de_getu32le_p(&pos
);
769 de_dbg(c
, "uncmpr. finder attr. size: %d", (int)ulen
);
771 flags
= (unsigned int)de_getu16le_p(&pos
);
772 flags_str
= ucstring_create(c
);
773 if(flags
&0x0001) ucstring_append_flags_item(flags_str
, "data_fork");
774 if(flags
&0x0002) ucstring_append_flags_item(flags_str
, "0x0002"); // something about the filename
775 ucstring_append_flags_item(flags_str
,
776 (flags
&0x0004)?"uncmpressed_attribute_data":"compressed_attribute_data");
777 if(flags
&0x0008) ucstring_append_flags_item(flags_str
, "64-bit_times");
778 if(flags
&0x0010) ucstring_append_flags_item(flags_str
, "no_timezone_offsets");
779 de_dbg(c
, "flags: 0x%04x (%s)", flags
, ucstring_getpsz(flags_str
));
781 dbuf_read_fourcc(c
->infile
, pos
, &filetype
, 4, 0x0);
782 de_dbg(c
, "filetype: '%s'", filetype
.id_dbgstr
);
784 dbuf_read_fourcc(c
->infile
, pos
, &creator
, 4, 0x0);
785 de_dbg(c
, "creator: '%s'", creator
.id_dbgstr
);
788 if(eii
->is_central
) goto done
;
790 if(flags
&0x0004) { // Uncompressed attribute data
794 dcflags
|= 0x1; // CRC is known
795 cmpr_meth
= (int)de_getu16le_p(&pos
);
796 cmi
= get_cmpr_meth_info(cmpr_meth
);
797 de_dbg(c
, "finder attr. cmpr. method: %d (%s)", cmpr_meth
, (cmi
? cmi
->name
: "?"));
799 crc_reported
= (u32
)de_getu32le_p(&pos
);
800 de_dbg(c
, "finder attr. data crc (reported): 0x%08x", (UI
)crc_reported
);
803 // The rest of the data is Finder attribute data
804 cmpr_attr_size
= eii
->dpos
+eii
->dlen
- pos
;
805 de_dbg(c
, "cmpr. finder attr. size: %d", (int)cmpr_attr_size
);
806 if(ulen
<1 || ulen
>1000000) goto done
;
808 // Type 6 (implode) compression won't work here, because it needs
809 // additional parameters seemingly not provided by the Finder attr data.
810 if(cmpr_meth
==6 || !is_compression_method_supported(d
, cmi
)) {
811 de_warn(c
, "Finder attribute data: Unsupported compression method: %d (%s)",
812 cmpr_meth
, (cmi
? cmi
->name
: "?"));
816 // Decompress and decode the Finder attribute data
817 attr_data
= dbuf_create_membuf(c
, ulen
, 0x1);
818 ret
= do_decompress_attrib_data(c
, d
, pos
, cmpr_attr_size
,
819 attr_data
, ulen
, crc_reported
, cmpr_meth
, cmi
, dcflags
, "finder attr. data");
821 de_warn(c
, "Failed to decompress finder attribute data");
826 dpos
+= 2; // Finder flags
827 dpos
+= 4; // Icon location
829 dpos
+= 16; // FXInfo
830 dpos
+= 1; // file version number
831 dpos
+= 1; // dir access rights
833 if(flags
&0x0008) goto done
; // We don't support 64-bit times
834 if(flags
&0x0010) goto done
; // We want timezone offsets
835 if(attr_data
->len
- dpos
< 6*4) goto done
;
837 create_time_raw
= dbuf_getu32le_p(attr_data
, &dpos
);
838 mod_time_raw
= dbuf_getu32le_p(attr_data
, &dpos
);
839 backup_time_raw
= dbuf_getu32le_p(attr_data
, &dpos
);
840 create_time_offset
= dbuf_geti32le(attr_data
, dpos
); dpos
+= 4;
841 mod_time_offset
= dbuf_geti32le(attr_data
, dpos
); dpos
+= 4;
842 backup_time_offset
= dbuf_geti32le(attr_data
, dpos
); dpos
+= 4;
844 handle_mac_time(c
, d
, create_time_raw
, create_time_offset
, &tmp_timestamp
, "create time");
845 if(create_time_raw
>0) {
846 apply_timestamp(c
, d
, eii
->md
, DE_TIMESTAMPIDX_CREATE
, &tmp_timestamp
, 40);
848 handle_mac_time(c
, d
, mod_time_raw
, mod_time_offset
, &tmp_timestamp
, "mod time ");
850 apply_timestamp(c
, d
, eii
->md
, DE_TIMESTAMPIDX_MODIFY
, &tmp_timestamp
, 40);
852 handle_mac_time(c
, d
, backup_time_raw
, backup_time_offset
, &tmp_timestamp
, "backup time");
853 if(backup_time_raw
>0) {
854 apply_timestamp(c
, d
, eii
->md
, DE_TIMESTAMPIDX_BACKUP
, &tmp_timestamp
, 40);
857 // Expecting 2 bytes for charset, and at least 2 more for the 2 NUL-terminated
858 // strings that follow.
859 if(attr_data
->len
- dpos
< 4) goto done
;
861 charset
= (int)dbuf_getu16le_p(attr_data
, &dpos
);
862 de_dbg(c
, "charset for fullpath/comment: %d", charset
);
864 // TODO: Can we use the correct encoding?
865 srd
= dbuf_read_string(attr_data
, dpos
, attr_data
->len
-dpos
, DE_DBG_MAX_STRLEN
,
866 DE_CONVFLAG_STOP_AT_NUL
, DE_ENCODING_ASCII
);
867 de_dbg(c
, "fullpath: \"%s\"", ucstring_getpsz(srd
->str
));
868 dpos
+= srd
->bytes_consumed
;
869 de_destroy_stringreaderdata(c
, srd
);
871 srd
= dbuf_read_string(attr_data
, dpos
, attr_data
->len
-dpos
, DE_DBG_MAX_STRLEN
,
872 DE_CONVFLAG_STOP_AT_NUL
, DE_ENCODING_ASCII
);
873 de_dbg(c
, "comment: \"%s\"", ucstring_getpsz(srd
->str
));
874 dpos
+= srd
->bytes_consumed
;
875 de_destroy_stringreaderdata(c
, srd
);
878 ucstring_destroy(flags_str
);
879 dbuf_close(attr_data
);
882 // Acorn / SparkFS / RISC OS
883 static void ef_acorn(deark
*c
, lctx
*d
, struct extra_item_info_struct
*eii
)
886 struct de_riscos_file_attrs rfa
;
888 if(eii
->dlen
<16) return;
889 if(dbuf_memcmp(c
->infile
, eii
->dpos
, "ARC0", 4)) {
890 de_dbg(c
, "[unsupported Acorn extra-field type]");
895 de_zeromem(&rfa
, sizeof(struct de_riscos_file_attrs
));
896 fmtutil_riscos_read_load_exec(c
, c
->infile
, &rfa
, pos
);
898 if(rfa
.mod_time
.is_valid
) {
899 apply_timestamp(c
, d
, eii
->md
, DE_TIMESTAMPIDX_MODIFY
, &rfa
.mod_time
, 70);
902 fmtutil_riscos_read_attribs_field(c
, c
->infile
, &rfa
, pos
, 0);
904 if(!eii
->is_central
&& !eii
->md
->has_riscos_data
) {
905 eii
->md
->has_riscos_data
= 1;
910 struct extra_item_type_info_struct
{
913 extrafield_decoder_fn fn
;
915 static const struct extra_item_type_info_struct extra_item_type_info_arr
[] = {
916 { 0x0001 /* */, "Zip64 extended information", ef_zip64extinfo
},
917 { 0x0007 /* */, "AV Info", NULL
},
918 { 0x0008 /* */, "extended language encoding data", NULL
},
919 { 0x0009 /* */, "OS/2", ef_os2
},
920 { 0x000a /* */, "NTFS", ef_ntfs
},
921 { 0x000c /* */, "OpenVMS", NULL
},
922 { 0x000d /* */, "Unix", NULL
},
923 { 0x000e /* */, "file stream and fork descriptors", NULL
},
924 { 0x000f /* */, "Patch Descriptor", NULL
},
925 { 0x0014 /* */, "PKCS#7 Store for X.509 Certificates", NULL
},
926 { 0x0015 /* */, "X.509 Certificate ID and Signature for individual file", NULL
},
927 { 0x0016 /* */, "X.509 Certificate ID for Central Directory", NULL
},
928 { 0x0017 /* */, "Strong Encryption Header", NULL
},
929 { 0x0018 /* */, "Record Management Controls", NULL
},
930 { 0x0019 /* */, "PKCS#7 Encryption Recipient Certificate List", NULL
},
931 { 0x0021 /* */, "Policy Decryption Key", NULL
},
932 { 0x0022 /* */, "Smartcrypt Key Provider", NULL
},
933 { 0x0023 /* */, "Smartcrypt Policy Key Data", NULL
},
934 { 0x0065 /* */, "IBM S/390 (Z390), AS/400 (I400) attributes", NULL
},
935 { 0x0066 /* */, "IBM S/390 (Z390), AS/400 (I400) attributes - compressed", NULL
},
936 { 0x07c8 /* */, "Macintosh", NULL
},
937 { 0x2605 /* */, "ZipIt Macintosh", NULL
},
938 { 0x2705 /* */, "ZipIt Macintosh 1.3.5+", ef_zipitmac_2705
},
939 { 0x2805 /* */, "ZipIt Macintosh 1.3.5+", NULL
},
940 { 0x334d /* M3 */, "Info-ZIP Macintosh", ef_infozipmac
},
941 { 0x4154 /* TA */, "Tandem NSK", NULL
},
942 { 0x4341 /* AC */, "Acorn/SparkFS", ef_acorn
},
943 { 0x4453 /* SE */, "Windows NT security descriptor (binary ACL)", NULL
},
944 { 0x4690 /* */, "POSZIP 4690", NULL
},
945 { 0x4704 /* */, "VM/CMS", NULL
},
946 { 0x470f /* */, "MVS", NULL
},
947 { 0x4854 /* TH */, "Theos, old unofficial port", NULL
}, // unzip:extrafld.txt says "inofficial"
948 { 0x4b46 /* FK */, "FWKCS MD5", NULL
},
949 { 0x4c41 /* AL */, "OS/2 access control list (text ACL)", NULL
},
950 { 0x4d49 /* IM */, "Info-ZIP OpenVMS", NULL
},
951 { 0x4d63 /* cM */, "Macintosh SmartZIP", NULL
},
952 { 0x4f4c /* LO */, "Xceed original location", NULL
},
953 { 0x5350 /* PS */, "Psion?", NULL
}, // observed in some Psion files
954 { 0x5356 /* VS */, "AOS/VS (ACL)", NULL
},
955 { 0x5455 /* UT */, "extended timestamp", ef_extended_timestamp
},
956 { 0x554e /* NU */, "Xceed unicode", NULL
},
957 { 0x5855 /* UX */, "Info-ZIP Unix, first version", ef_infozip1
},
958 { 0x6375 /* uc */, "Info-ZIP Unicode Comment", NULL
},
959 { 0x6542 /* Be */, "BeOS/BeBox", NULL
},
960 { 0x6854 /* Th */, "Theos", NULL
},
961 { 0x7075 /* up */, "Info-ZIP Unicode Path", ef_unicodepath
},
962 { 0x7441 /* At */, "AtheOS", NULL
},
963 { 0x756e /* nu */, "ASi Unix", NULL
},
964 { 0x7855 /* Ux */, "Info-ZIP Unix, second version", ef_infozip2
},
965 { 0x7875 /* ux */, "Info-ZIP Unix, third version", ef_infozip3
},
966 { 0xa220 /* */, "Microsoft Open Packaging Growth Hint", NULL
},
967 { 0xfb4a /* */, "SMS/QDOS", NULL
}, // according to Info-ZIP zip 3.0
968 { 0xfd4a /* */, "SMS/QDOS", NULL
} // according to ZIP v6.3.4 APPNOTE
971 static const struct extra_item_type_info_struct
*get_extra_item_type_info(i64 id
)
973 static const struct extra_item_type_info_struct default_ei
=
977 for(i
=0; i
<DE_ARRAYCOUNT(extra_item_type_info_arr
); i
++) {
978 if(id
== (i64
)extra_item_type_info_arr
[i
].id
) {
979 return &extra_item_type_info_arr
[i
];
985 static void do_extra_data(deark
*c
, lctx
*d
,
986 struct member_data
*md
, struct dir_entry_data
*dd
,
987 i64 pos1
, i64 len
, int is_central
)
991 de_dbg(c
, "extra data at %"I64_FMT
", len=%d", pos1
, (int)len
);
996 struct extra_item_info_struct eii
;
998 if(pos
+4 >= pos1
+len
) break;
999 de_zeromem(&eii
, sizeof(struct extra_item_info_struct
));
1002 eii
.is_central
= is_central
;
1005 eii
.id
= (u32
)de_getu16le(pos
);
1006 eii
.dlen
= de_getu16le(pos
+2);
1008 eii
.eiti
= get_extra_item_type_info(eii
.id
);
1010 de_dbg(c
, "item id=0x%04x (%s), dlen=%d", (unsigned int)eii
.id
, eii
.eiti
->name
,
1012 if(pos
+4+eii
.dlen
> pos1
+len
) break;
1015 de_dbg_indent(c
, 1);
1016 eii
.eiti
->fn(c
, d
, &eii
);
1017 de_dbg_indent(c
, -1);
1023 de_dbg_indent(c
, -1);
1026 static void do_extract_file(deark
*c
, lctx
*d
, struct member_data
*md
)
1029 de_finfo
*fi
= NULL
;
1030 struct dir_entry_data
*ldd
= &md
->local_dir_entry_data
;
1032 int saved_indent_level
;
1034 de_dbg_indent_save(c
, &saved_indent_level
);
1035 de_dbg(c
, "file data at %"I64_FMT
", len=%"I64_FMT
, md
->file_data_pos
,
1037 de_dbg_indent(c
, 1);
1039 if(ldd
->bit_flags
& 0x1) {
1040 de_err(c
, "%s: Encryption is not supported", ucstring_getpsz_d(ldd
->fname
));
1044 if(!is_compression_method_supported(d
, ldd
->cmi
)) {
1045 de_err(c
, "%s: Unsupported compression method: %d (%s)",
1046 ucstring_getpsz_d(ldd
->fname
),
1047 ldd
->cmpr_meth
, (ldd
->cmi
? ldd
->cmi
->name
: "?"));
1051 if(md
->file_data_pos
+md
->cmpr_size
> c
->infile
->len
) {
1052 de_err(c
, "%s: Data goes beyond end of file", ucstring_getpsz_d(ldd
->fname
));
1056 if(md
->is_symlink
) {
1057 de_warn(c
, "\"%s\" is a symbolic link. It will not be extracted as a link.",
1058 ucstring_getpsz_d(ldd
->fname
));
1061 fi
= de_finfo_create(c
);
1062 fi
->detect_root_dot_dir
= 1;
1064 if(ucstring_isnonempty(ldd
->fname
)) {
1065 unsigned int snflags
= DE_SNFLAG_FULLPATH
;
1067 if(md
->has_riscos_data
) {
1068 fmtutil_riscos_append_type_to_filename(c
, fi
, ldd
->fname
, &md
->rfa
, md
->is_dir
, 0);
1070 if(md
->is_dir
) snflags
|= DE_SNFLAG_STRIPTRAILINGSLASH
;
1071 de_finfo_set_name_from_ucstring(c
, fi
, ldd
->fname
, snflags
);
1072 fi
->original_filename_flag
= 1;
1075 for(tsidx
=0; tsidx
<DE_TIMESTAMPIDX_COUNT
; tsidx
++) {
1076 if(md
->tsdata
[tsidx
].ts
.is_valid
) {
1077 fi
->timestamp
[tsidx
] = md
->tsdata
[tsidx
].ts
;
1081 if(md
->has_riscos_data
) {
1082 fi
->has_riscos_data
= 1;
1083 fi
->riscos_attribs
= md
->rfa
.attribs
;
1084 fi
->load_addr
= md
->rfa
.load_addr
;
1085 fi
->exec_addr
= md
->rfa
.exec_addr
;
1089 fi
->is_directory
= 1;
1091 else if(md
->is_executable
) {
1092 fi
->mode_flags
|= DE_MODEFLAG_EXE
;
1094 else if(md
->is_nonexecutable
) {
1095 fi
->mode_flags
|= DE_MODEFLAG_NONEXE
;
1098 outf
= dbuf_create_output_file(c
, NULL
, fi
, 0);
1103 (void)do_decompress_member(c
, d
, md
, outf
);
1107 de_finfo_destroy(c
, fi
);
1108 de_dbg_indent_restore(c
, saved_indent_level
);
1111 static const char *get_platform_name(unsigned int ver_hi
)
1113 static const char *pltf_names
[20] = {
1114 "MS-DOS, etc.", "Amiga", "OpenVMS", "Unix",
1115 "VM/CMS", "Atari ST", "HPFS", "Macintosh",
1116 "Z-System", "CP/M", "NTFS or TOPS-20", "MVS or NTFS",
1117 "VSE or SMS/QDOS", "Acorn RISC OS", "VFAT", "MVS",
1118 "BeOS", "Tandem", "OS/400", "OS X" };
1121 return pltf_names
[ver_hi
];
1122 if(ver_hi
==30) return "AtheOS/Syllable";
1126 // Look at the attributes, and set some other fields based on them.
1127 static void process_ext_attr(deark
*c
, lctx
*d
, struct member_data
*md
)
1129 if(d
->using_scanmode
) {
1130 // In this mode, there is no 'external attribs' field.
1134 if(md
->ver_made_by_hi
==3) { // Unix
1135 unsigned int unix_filetype
;
1136 unix_filetype
= (md
->attr_e
>>16)&0170000;
1137 if(unix_filetype
== 0040000) {
1140 else if(unix_filetype
== 0120000) {
1144 if((md
->attr_e
>>16)&0111) {
1145 md
->is_executable
= 1;
1148 md
->is_nonexecutable
= 1;
1152 // MS-DOS-style attributes.
1153 // Technically, we should only do this if
1154 // md->central_dir_entry_data.ver_made_by_hi==0.
1155 // However, most(?) zip programs set the low byte of the external attribs
1156 // to the equivalent MS-DOS attribs, at least in cases where it matters.
1157 if(md
->attr_e
& 0x10) {
1161 // TODO: Support more platforms.
1162 // TODO: The 0x756e (ASi Unix) extra field might be important, as it contains
1163 // file permissions.
1165 if(md
->is_dir
&& md
->uncmpr_size
!=0) {
1166 // I'd expect a subdirectory entry to have zero size. If it doesn't,
1167 // let's just assume we misidentified it as a subdirectory, and
1168 // extract its data.
1173 static void describe_internal_attr(deark
*c
, struct member_data
*md
,
1176 unsigned int bf
= md
->attr_i
;
1179 ucstring_append_flags_item(s
, "text file");
1183 if(bf
!=0) { // Report any unrecognized flags
1184 ucstring_append_flags_itemf(s
, "0x%04x", bf
);
1188 // Uses dd->bit_flags, dd->cmpr_method
1189 static void describe_general_purpose_bit_flags(deark
*c
, struct dir_entry_data
*dd
,
1193 unsigned int bf
= dd
->bit_flags
;
1196 ucstring_append_flags_item(s
, "encrypted");
1200 if(dd
->cmpr_meth
==6) { // implode
1208 ucstring_append_flags_itemf(s
, "%s sliding dictionary", name
);
1217 ucstring_append_flags_itemf(s
, "%s trees", name
);
1220 if(dd
->cmpr_meth
==8 || dd
->cmpr_meth
==9) { // deflate flags
1223 code
= (bf
& 0x0006)>>1;
1225 case 1: name
="max"; break;
1226 case 2: name
="fast"; break;
1227 case 3: name
="super_fast"; break;
1228 default: name
="normal";
1230 ucstring_append_flags_itemf(s
, "cmprlevel=%s", name
);
1231 bf
-= (bf
& 0x0006);
1235 ucstring_append_flags_item(s
, "uses data descriptor");
1240 ucstring_append_flags_item(s
, "UTF-8");
1244 if(bf
!=0) { // Report any unrecognized flags
1245 ucstring_append_flags_itemf(s
, "0x%04x", bf
);
1249 // Read either a central directory entry (a.k.a. central directory file header),
1250 // or a local file header.
1251 static int do_file_header(deark
*c
, lctx
*d
, struct member_data
*md
,
1252 int is_central
, i64 pos1
, i64
*p_entry_size
)
1256 i64 fn_len
, extra_len
, comment_len
;
1259 i64 fixed_header_size
;
1260 i64 mod_time_raw
, mod_date_raw
;
1261 struct dir_entry_data
*dd
; // Points to either md->central or md->local
1262 de_ucstring
*descr
= NULL
;
1263 struct de_timestamp dos_timestamp
;
1264 char timestamp_buf
[64];
1267 descr
= ucstring_create(c
);
1269 dd
= &md
->central_dir_entry_data
;
1270 fixed_header_size
= 46;
1271 de_dbg(c
, "central dir entry at %"I64_FMT
, pos
);
1274 dd
= &md
->local_dir_entry_data
;
1275 fixed_header_size
= 30;
1276 if(md
->disk_number_start
!=d
->this_disk_num
) {
1277 de_err(c
, "Member file not in this ZIP file");
1280 de_dbg(c
, "local file header at %"I64_FMT
, pos
);
1282 de_dbg_indent(c
, 1);
1284 sig
= (u32
)de_getu32le_p(&pos
);
1285 if(is_central
&& sig
!=CODE_PK12
) {
1286 de_err(c
, "Central dir file header not found at %"I64_FMT
, pos1
);
1289 else if(!is_central
&& sig
!=CODE_PK34
) {
1290 de_err(c
, "Local file header not found at %"I64_FMT
, pos1
);
1295 md
->ver_made_by
= (unsigned int)de_getu16le_p(&pos
);
1296 md
->ver_made_by_hi
= (unsigned int)((md
->ver_made_by
&0xff00)>>8);
1297 md
->ver_made_by_lo
= (unsigned int)(md
->ver_made_by
&0x00ff);
1298 de_dbg(c
, "version made by: platform=%u (%s), ZIP spec=%u.%u",
1299 md
->ver_made_by_hi
, get_platform_name(md
->ver_made_by_hi
),
1300 (unsigned int)(md
->ver_made_by_lo
/10), (unsigned int)(md
->ver_made_by_lo
%10));
1303 dd
->ver_needed
= (unsigned int)de_getu16le_p(&pos
);
1304 dd
->ver_needed_hi
= (unsigned int)((dd
->ver_needed
&0xff00)>>8);
1305 dd
->ver_needed_lo
= (unsigned int)(dd
->ver_needed
&0x00ff);
1306 de_dbg(c
, "version needed to extract: platform=%u (%s), ZIP spec=%u.%u",
1307 dd
->ver_needed_hi
, get_platform_name(dd
->ver_needed_hi
),
1308 (unsigned int)(dd
->ver_needed_lo
/10), (unsigned int)(dd
->ver_needed_lo
%10));
1310 dd
->bit_flags
= (unsigned int)de_getu16le_p(&pos
);
1311 dd
->cmpr_meth
= (int)de_getu16le_p(&pos
);
1312 dd
->cmi
= get_cmpr_meth_info(dd
->cmpr_meth
);
1314 utf8_flag
= (dd
->bit_flags
& 0x800)?1:0;
1315 ucstring_empty(descr
);
1316 describe_general_purpose_bit_flags(c
, dd
, descr
);
1317 de_dbg(c
, "flags: 0x%04x (%s)", dd
->bit_flags
, ucstring_getpsz(descr
));
1319 de_dbg(c
, "cmpr method: %d (%s)", dd
->cmpr_meth
,
1320 (dd
->cmi
? dd
->cmi
->name
: "?"));
1322 mod_time_raw
= de_getu16le_p(&pos
);
1323 mod_date_raw
= de_getu16le_p(&pos
);
1324 de_dos_datetime_to_timestamp(&dos_timestamp
, mod_date_raw
, mod_time_raw
);
1325 dos_timestamp
.tzcode
= DE_TZCODE_LOCAL
;
1326 de_dbg_timestamp_to_string(c
, &dos_timestamp
, timestamp_buf
, sizeof(timestamp_buf
), 0);
1327 de_dbg(c
, "mod time: %s", timestamp_buf
);
1328 apply_timestamp(c
, d
, md
, DE_TIMESTAMPIDX_MODIFY
, &dos_timestamp
, 10);
1330 dd
->crc_reported
= (u32
)de_getu32le_p(&pos
);
1331 de_dbg(c
, "crc (reported): 0x%08x", (unsigned int)dd
->crc_reported
);
1333 dd
->cmpr_size
= de_getu32le_p(&pos
);
1334 dd
->uncmpr_size
= de_getu32le_p(&pos
);
1335 de_dbg(c
, "cmpr size: %" I64_FMT
", uncmpr size: %" I64_FMT
, dd
->cmpr_size
, dd
->uncmpr_size
);
1337 fn_len
= de_getu16le_p(&pos
);
1339 extra_len
= de_getu16le_p(&pos
);
1342 comment_len
= de_getu16le_p(&pos
);
1349 md
->file_data_pos
= pos
+ fn_len
+ extra_len
;
1353 md
->disk_number_start
= de_getu16le_p(&pos
);
1355 md
->attr_i
= (unsigned int)de_getu16le_p(&pos
);
1356 ucstring_empty(descr
);
1357 describe_internal_attr(c
, md
, descr
);
1358 de_dbg(c
, "internal file attributes: 0x%04x (%s)", md
->attr_i
,
1359 ucstring_getpsz(descr
));
1361 md
->attr_e
= (unsigned int)de_getu32le_p(&pos
);
1362 de_dbg(c
, "external file attributes: 0x%08x", md
->attr_e
);
1363 de_dbg_indent(c
, 1);
1366 // The low byte is, AFAIK, *almost* universally used for MS-DOS-style
1368 unsigned int dos_attrs
= (md
->attr_e
& 0xff);
1369 ucstring_empty(descr
);
1370 de_describe_dos_attribs(c
, dos_attrs
, descr
, 0);
1371 de_dbg(c
, "%sMS-DOS attribs: 0x%02x (%s)",
1372 (md
->ver_made_by_hi
==0)?"":"(hypothetical) ",
1373 dos_attrs
, ucstring_getpsz(descr
));
1376 if((md
->attr_e
>>16) != 0) {
1377 // A number of platforms put Unix-style file attributes here, so
1378 // decode them as such whenever they are nonzero.
1379 de_dbg(c
, "%sUnix attribs: octal(%06o)",
1380 (md
->ver_made_by_hi
==3)?"":"(hypothetical) ",
1381 (unsigned int)(md
->attr_e
>>16));
1384 de_dbg_indent(c
, -1);
1386 md
->offset_of_local_header
= de_getu32le_p(&pos
);
1387 de_dbg(c
, "offset of local header: %"I64_FMT
", disk: %d", md
->offset_of_local_header
,
1388 (int)md
->disk_number_start
);
1392 de_dbg(c
, "filename_len: %d, extra_len: %d, comment_len: %d", (int)fn_len
,
1393 (int)extra_len
, (int)comment_len
);
1396 de_dbg(c
, "filename_len: %d, extra_len: %d", (int)fn_len
,
1400 *p_entry_size
= fixed_header_size
+ fn_len
+ extra_len
+ comment_len
;
1402 dd
->main_fname_pos
= pos1
+fixed_header_size
;
1403 dd
->main_fname_len
= fn_len
;
1404 do_read_filename(c
, d
, md
, dd
, pos1
+fixed_header_size
, fn_len
, utf8_flag
);
1407 do_extra_data(c
, d
, md
, dd
, pos1
+fixed_header_size
+fn_len
, extra_len
, is_central
);
1411 do_comment(c
, d
, pos1
+fixed_header_size
+fn_len
+extra_len
, comment_len
, utf8_flag
,
1412 "member file comment", "fcomment.txt");
1416 if(d
->used_offset_discrepancy
) {
1417 md
->offset_of_local_header
+= d
->offset_discrepancy
;
1418 de_dbg(c
, "assuming local header is really at %"I64_FMT
, md
->offset_of_local_header
);
1420 else if(d
->offset_discrepancy
!=0) {
1424 sig1
= (u32
)de_getu32le(md
->offset_of_local_header
);
1425 if(sig1
!=CODE_PK34
) {
1426 alt_pos
= md
->offset_of_local_header
+ d
->offset_discrepancy
;
1427 sig2
= (u32
)de_getu32le(alt_pos
);
1428 if(sig2
==CODE_PK34
) {
1429 de_warn(c
, "Local file header found at %"I64_FMT
" instead of %"I64_FMT
". "
1430 "Assuming offsets are wrong by %"I64_FMT
" bytes.",
1431 alt_pos
, md
->offset_of_local_header
, d
->offset_discrepancy
);
1432 md
->offset_of_local_header
+= d
->offset_discrepancy
;
1433 d
->used_offset_discrepancy
= 1;
1442 de_dbg_indent(c
, -1);
1443 ucstring_destroy(descr
);
1447 static struct member_data
*create_member_data(deark
*c
, lctx
*d
)
1449 struct member_data
*md
;
1451 md
= de_malloc(c
, sizeof(struct member_data
));
1452 md
->local_dir_entry_data
.fname
= ucstring_create(c
);
1453 md
->central_dir_entry_data
.fname
= ucstring_create(c
);
1457 static void destroy_member_data(deark
*c
, struct member_data
*md
)
1460 ucstring_destroy(md
->central_dir_entry_data
.fname
);
1461 ucstring_destroy(md
->local_dir_entry_data
.fname
);
1465 static i32
ucstring_lastchar(de_ucstring
*s
)
1467 if(!s
|| s
->len
<1) return 0;
1468 return s
->str
[s
->len
-1];
1471 // Things to do after both the central and local headers have been read.
1472 // E.g., extract the file.
1473 static int do_process_member(deark
*c
, lctx
*d
, struct member_data
*md
)
1477 // If for some reason we have a central-dir filename but not a local-dir
1478 // filename, use the central-dir filename.
1479 if(ucstring_isempty(md
->local_dir_entry_data
.fname
) &&
1480 ucstring_isnonempty(md
->central_dir_entry_data
.fname
))
1482 ucstring_append_ucstring(md
->local_dir_entry_data
.fname
,
1483 md
->central_dir_entry_data
.fname
);
1486 // Set the final file size and crc fields.
1487 if(md
->local_dir_entry_data
.bit_flags
& 0x0008) {
1488 if(d
->using_scanmode
) {
1489 de_err(c
, "File is incompatible with scan mode");
1493 // Indicates that certain fields are not present in the local file header,
1494 // and are instead in a "data descriptor" after the file data.
1495 // Let's hope they are also in the central file header.
1496 md
->cmpr_size
= md
->central_dir_entry_data
.cmpr_size
;
1497 md
->uncmpr_size
= md
->central_dir_entry_data
.uncmpr_size
;
1498 md
->crc_reported
= md
->central_dir_entry_data
.crc_reported
;
1501 md
->cmpr_size
= md
->local_dir_entry_data
.cmpr_size
;
1502 md
->uncmpr_size
= md
->local_dir_entry_data
.uncmpr_size
;
1503 md
->crc_reported
= md
->local_dir_entry_data
.crc_reported
;
1506 process_ext_attr(c
, d
, md
);
1508 // In some cases, detect directories by checking whether the filename ends
1510 if(!md
->is_dir
&& md
->uncmpr_size
==0 &&
1511 (d
->using_scanmode
|| (md
->ver_made_by_lo
<20)))
1513 if(ucstring_lastchar(md
->local_dir_entry_data
.fname
) == '/') {
1514 de_dbg(c
, "[assuming this is a subdirectory]");
1519 do_extract_file(c
, d
, md
);
1526 // In *entry_size, returns the size of the central dir entry.
1527 // Returns 0 if the central dir entry could not even be parsed.
1528 static int do_member_from_central_dir_entry(deark
*c
, lctx
*d
,
1529 struct member_data
*md
, i64 central_index
, i64 pos
, i64
*entry_size
)
1533 int saved_indent_level
;
1535 de_dbg_indent_save(c
, &saved_indent_level
);
1539 if(pos
>= d
->central_dir_offset
+d
->central_dir_byte_size
) {
1543 de_dbg(c
, "central dir entry #%d", (int)central_index
);
1544 de_dbg_indent(c
, 1);
1546 // Read the central dir file header
1547 if(!do_file_header(c
, d
, md
, 1, pos
, entry_size
)) {
1551 // If we were able to read the central dir file header, we might be able
1552 // to continue and read more files, even if the local file header fails.
1555 // Read the local file header
1556 if(!do_file_header(c
, d
, md
, 0, md
->offset_of_local_header
, &tmp_entry_size
)) {
1560 do_process_member(c
, d
, md
);
1563 de_dbg_indent_restore(c
, saved_indent_level
);
1567 static int do_central_dir_entry(deark
*c
, lctx
*d
,
1568 i64 central_index
, i64 pos
, i64
*entry_size
)
1570 struct member_data
*md
= NULL
;
1573 md
= create_member_data(c
, d
);
1574 ret
= do_member_from_central_dir_entry(c
, d
, md
, central_index
, pos
, entry_size
);
1575 destroy_member_data(c
, md
);
1579 static int do_local_dir_only(deark
*c
, lctx
*d
, i64 pos1
, i64
*pmember_size
)
1581 struct member_data
*md
= NULL
;
1585 md
= create_member_data(c
, d
);
1587 md
->offset_of_local_header
= pos1
;
1589 // Read the local file header
1590 if(!do_file_header(c
, d
, md
, 0, md
->offset_of_local_header
, &tmp_entry_size
)) {
1594 if(!do_process_member(c
, d
, md
)) goto done
;
1596 *pmember_size
= md
->file_data_pos
+ md
->cmpr_size
- pos1
;
1600 destroy_member_data(c
, md
);
1604 static void de_run_zip_scanmode(deark
*c
, lctx
*d
)
1608 d
->using_scanmode
= 1;
1613 i64 member_size
= 0;
1615 if(pos
> c
->infile
->len
-4) break;
1616 ret
= dbuf_search(c
->infile
, g_zipsig34
, 4, pos
, c
->infile
->len
-pos
, &foundpos
);
1619 de_dbg(c
, "zip member at %"I64_FMT
, pos
);
1620 de_dbg_indent(c
, 1);
1621 ret
= do_local_dir_only(c
, d
, pos
, &member_size
);
1622 de_dbg_indent(c
, -1);
1624 if(member_size
<1) break;
1629 static int do_central_dir(deark
*c
, lctx
*d
)
1636 pos
= d
->central_dir_offset
;
1637 de_dbg(c
, "central dir at %"I64_FMT
, pos
);
1638 de_dbg_indent(c
, 1);
1640 for(i
=0; i
<d
->central_dir_num_entries
; i
++) {
1641 if(!do_central_dir_entry(c
, d
, i
, pos
, &entry_size
)) {
1642 // TODO: Decide exactly what to do if something fails.
1650 de_dbg_indent(c
, -1);
1654 static int do_zip64_eocd(deark
*c
, lctx
*d
)
1659 int saved_indent_level
;
1660 UI ver
, ver_hi
, ver_lo
;
1662 de_dbg_indent_save(c
, &saved_indent_level
);
1664 if(d
->zip64_eocd_disknum
!=0) {
1665 de_warn(c
, "This might be a multi-disk Zip64 archive, which is not supported");
1671 pos
= d
->zip64_eocd_pos
;
1672 if(dbuf_memcmp(c
->infile
, pos
, g_zipsig66
, 4)) {
1673 de_warn(c
, "Expected Zip64 end-of-central-directory record not found at %"I64_FMT
, pos
);
1674 retval
= 1; // Maybe the eocd locator sig was a false positive?
1679 de_dbg(c
, "zip64 end-of-central-dir record at %"I64_FMT
, pos
);
1681 de_dbg_indent(c
, 1);
1683 n
= de_geti64le(pos
); pos
+= 8;
1684 de_dbg(c
, "size of zip64 eocd record: (12+)%"I64_FMT
, n
);
1686 ver
= (UI
)de_getu16le_p(&pos
);
1687 ver_hi
= (ver
&0xff00)>>8;
1688 ver_lo
= ver
&0x00ff;
1689 de_dbg(c
, "version made by: platform=%u (%s), ZIP spec=%u.%u",
1690 ver_hi
, get_platform_name(ver_hi
), (UI
)(ver_lo
/10), (UI
)(ver_lo
%10));
1692 ver
= (UI
)de_getu16le_p(&pos
);
1693 ver_hi
= (ver
&0xff00)>>8;
1694 ver_lo
= ver
&0x00ff;
1695 de_dbg(c
, "version needed: platform=%u (%s), ZIP spec=%u.%u",
1696 ver_hi
, get_platform_name(ver_hi
), (UI
)(ver_lo
/10), (UI
)(ver_lo
%10));
1698 n
= de_getu32le_p(&pos
);
1699 de_dbg(c
, "this disk num: %"I64_FMT
, n
);
1701 d
->zip64_cd_disknum
= (unsigned int)de_getu32le_p(&pos
);
1702 d
->zip64_num_centr_dir_entries_this_disk
= de_geti64le(pos
); pos
+= 8;
1703 de_dbg(c
, "central dir num entries on this disk: %"I64_FMT
, d
->zip64_num_centr_dir_entries_this_disk
);
1704 d
->zip64_num_centr_dir_entries_total
= de_geti64le(pos
); pos
+= 8;
1705 de_dbg(c
, "central dir num entries: %"I64_FMT
, d
->zip64_num_centr_dir_entries_total
);
1706 d
->zip64_centr_dir_byte_size
= de_geti64le(pos
); pos
+= 8;
1707 de_dbg(c
, "central dir size: %"I64_FMT
, d
->zip64_centr_dir_byte_size
);
1708 d
->zip64_cd_pos
= de_geti64le(pos
); pos
+= 8;
1709 de_dbg(c
, "central dir offset: %"I64_FMT
", disk: %u",
1710 d
->zip64_cd_pos
, d
->zip64_cd_disknum
);
1714 de_dbg_indent_restore(c
, saved_indent_level
);
1718 static void do_zip64_eocd_locator(deark
*c
, lctx
*d
)
1721 i64 pos
= d
->end_of_central_dir_pos
- 20;
1723 if(dbuf_memcmp(c
->infile
, pos
, g_zipsig67
, 4)) {
1726 de_dbg(c
, "zip64 eocd locator found at %"I64_FMT
, pos
);
1729 de_dbg_indent(c
, 1);
1730 d
->zip64_eocd_disknum
= (unsigned int)de_getu32le_p(&pos
);
1731 d
->zip64_eocd_pos
= de_geti64le(pos
); pos
+= 8;
1732 de_dbg(c
, "offset of zip64 eocd: %"I64_FMT
", disk: %u",
1733 d
->zip64_eocd_pos
, d
->zip64_eocd_disknum
);
1734 n
= de_getu32le_p(&pos
);
1735 de_dbg(c
, "total number of disks: %u", (unsigned int)n
);
1736 de_dbg_indent(c
, -1);
1739 static int do_end_of_central_dir(deark
*c
, lctx
*d
)
1742 i64 num_entries_this_disk
;
1743 i64 disk_num_with_central_dir_start
;
1745 i64 alt_central_dir_offset
;
1748 pos
= d
->end_of_central_dir_pos
;
1749 de_dbg(c
, "end-of-central-dir record at %"I64_FMT
, pos
);
1750 de_dbg_indent(c
, 1);
1752 d
->this_disk_num
= de_getu16le(pos
+4);
1753 de_dbg(c
, "this disk num: %"I64_FMT
, d
->this_disk_num
);
1754 disk_num_with_central_dir_start
= de_getu16le(pos
+6);
1756 num_entries_this_disk
= de_getu16le(pos
+8);
1757 de_dbg(c
, "central dir num entries on this disk: %"I64_FMT
, num_entries_this_disk
);
1758 if(d
->is_zip64
&& (num_entries_this_disk
==0xffff)) {
1759 num_entries_this_disk
= d
->zip64_num_centr_dir_entries_this_disk
;
1762 d
->central_dir_num_entries
= de_getu16le(pos
+10);
1763 d
->central_dir_byte_size
= de_getu32le(pos
+12);
1764 d
->central_dir_offset
= de_getu32le(pos
+16);
1765 de_dbg(c
, "central dir num entries: %"I64_FMT
, d
->central_dir_num_entries
);
1766 if(d
->is_zip64
&& (d
->central_dir_num_entries
==0xffff)) {
1767 d
->central_dir_num_entries
= d
->zip64_num_centr_dir_entries_total
;
1770 de_dbg(c
, "central dir size: %"I64_FMT
, d
->central_dir_byte_size
);
1771 if(d
->is_zip64
&& (d
->central_dir_byte_size
==0xffffffffLL
)) {
1772 d
->central_dir_byte_size
= d
->zip64_centr_dir_byte_size
;
1775 de_dbg(c
, "central dir offset: %"I64_FMT
", disk: %"I64_FMT
, d
->central_dir_offset
,
1776 disk_num_with_central_dir_start
);
1777 if(d
->is_zip64
&& (d
->central_dir_offset
==0xffffffffLL
)) {
1778 d
->central_dir_offset
= d
->zip64_cd_pos
;
1781 comment_length
= de_getu16le(pos
+20);
1782 de_dbg(c
, "comment length: %d", (int)comment_length
);
1783 if(comment_length
>0) {
1784 // The comment for the whole .ZIP file presumably has to use
1785 // cp437 encoding. There's no flag that could indicate otherwise.
1786 do_comment(c
, d
, pos
+22, comment_length
, 0,
1787 "ZIP file comment", "comment.txt");
1790 // TODO: Figure out exactly how to detect disk spanning.
1791 if(disk_num_with_central_dir_start
!=d
->this_disk_num
||
1792 (d
->is_zip64
&& d
->zip64_eocd_disknum
!=d
->this_disk_num
))
1794 de_err(c
, "Disk spanning not supported");
1798 if(d
->this_disk_num
!=0) {
1799 de_warn(c
, "This ZIP file might be part of a multi-part archive, and "
1800 "might not be supported correctly");
1803 if(num_entries_this_disk
!=d
->central_dir_num_entries
) {
1804 de_warn(c
, "This ZIP file might not be supported correctly "
1805 "(number-of-entries-this-disk=%d, number-of-entries-total=%d)",
1806 (int)num_entries_this_disk
, (int)d
->central_dir_num_entries
);
1809 alt_central_dir_offset
=
1810 (d
->is_zip64
? d
->zip64_eocd_pos
: d
->end_of_central_dir_pos
) -
1811 d
->central_dir_byte_size
;
1813 if(alt_central_dir_offset
!= d
->central_dir_offset
) {
1816 de_warn(c
, "Inconsistent central directory offset. Reported to be %"I64_FMT
", "
1817 "but based on its reported size, it should be %"I64_FMT
".",
1818 d
->central_dir_offset
, alt_central_dir_offset
);
1820 sig
= (u32
)de_getu32le(alt_central_dir_offset
);
1821 if(sig
==CODE_PK12
) {
1822 d
->offset_discrepancy
= alt_central_dir_offset
- d
->central_dir_offset
;
1823 de_dbg(c
, "likely central dir found at %"I64_FMT
, alt_central_dir_offset
);
1824 d
->central_dir_offset
= alt_central_dir_offset
;
1831 de_dbg_indent(c
, -1);
1835 static void de_run_zip_normally(deark
*c
, lctx
*d
)
1839 if(c
->detection_data
&& c
->detection_data
->zip_eocd_looked_for
) {
1840 eocd_found
= (int)c
->detection_data
->zip_eocd_found
;
1841 d
->end_of_central_dir_pos
= c
->detection_data
->zip_eocd_pos
;
1844 eocd_found
= fmtutil_find_zip_eocd(c
, c
->infile
, &d
->end_of_central_dir_pos
);
1847 if(c
->module_disposition
==DE_MODDISP_AUTODETECT
||
1848 c
->module_disposition
==DE_MODDISP_EXPLICIT
)
1850 if(de_getu32le(0)==CODE_PK34
) {
1851 de_err(c
, "ZIP central directory not found. "
1852 "You could try \"-opt zip:scanmode\".");
1856 de_err(c
, "Not a valid ZIP file");
1860 de_dbg(c
, "end-of-central-dir record found at %"I64_FMT
,
1861 d
->end_of_central_dir_pos
);
1863 do_zip64_eocd_locator(c
, d
);
1866 if(!do_zip64_eocd(c
, d
)) goto done
;
1870 de_declare_fmt(c
, "ZIP-Zip64");
1872 de_declare_fmt(c
, "ZIP");
1874 if(!do_end_of_central_dir(c
, d
)) {
1878 if(!do_central_dir(c
, d
)) {
1886 static void de_run_zip(deark
*c
, de_module_params
*mparams
)
1891 d
= de_malloc(c
, sizeof(lctx
));
1893 enc
= de_get_input_encoding(c
, NULL
, DE_ENCODING_CP437
);
1894 d
->default_enc_for_filenames
= enc
;
1895 d
->default_enc_for_comments
= enc
;
1897 d
->crco
= de_crcobj_create(c
, DE_CRCOBJ_CRC32_IEEE
);
1899 if(de_get_ext_option(c
, "zip:scanmode")) {
1900 de_run_zip_scanmode(c
, d
);
1903 de_run_zip_normally(c
, d
);
1907 de_crcobj_destroy(d
->crco
);
1912 static int de_identify_zip(deark
*c
)
1918 has_zip_ext
= de_input_file_has_ext(c
, "zip");
1923 if(!de_memcmp(b
, g_zipsig34
, 4)) {
1924 return has_zip_ext
? 100 : 90;
1926 if(b
[0]=='M' && b
[1]=='Z') has_mz_sig
= 1;
1928 if(c
->infile
->len
>= 22) {
1929 de_read(b
, c
->infile
->len
- 22, 4);
1930 if(!de_memcmp(b
, g_zipsig56
, 4)) {
1931 return has_zip_ext
? 100 : 19;
1935 // Things to consider:
1936 // * We want de_fmtutil_find_zip_eocd() to be called no more than once, and
1937 // only on files that for some reason we suspect could be ZIP files.
1938 // * If the user disables exe format detection (e.g. with "-onlydetect zip"),
1939 // we want self-extracting-ZIP .exe files to be detected as ZIP instead.
1940 // * And we want the above to work even if the file has a ZIP file comment,
1941 // making it expensive to detect as ZIP.
1943 // Tests below can't return a confidence higher than this.
1944 if(c
->detection_data
->best_confidence_so_far
>= 19) return 0;
1948 if(has_mz_sig
|| has_zip_ext
) {
1951 c
->detection_data
->zip_eocd_looked_for
= 1;
1952 if(fmtutil_find_zip_eocd(c
, c
->infile
, &eocd_pos
)) {
1953 c
->detection_data
->zip_eocd_found
= 1;
1954 c
->detection_data
->zip_eocd_pos
= eocd_pos
;
1962 static void de_help_zip(deark
*c
)
1964 de_msg(c
, "-opt zip:scanmode : Do not use the \"central directory\"");
1965 de_msg(c
, "-opt zip:implodebug : Behave like PKZIP 1.01/1.02");
1968 void de_module_zip(deark
*c
, struct deark_module_info
*mi
)
1971 mi
->desc
= "ZIP archive";
1972 mi
->run_fn
= de_run_zip
;
1973 mi
->identify_fn
= de_identify_zip
;
1974 mi
->help_fn
= de_help_zip
;