1 // This file is part of Deark.
2 // Copyright (C) 2016 Jason Summers
3 // See the file COPYING for terms of use.
7 #include <deark-config.h>
8 #include <deark-private.h>
9 #include <deark-fmtutil.h>
11 DE_DECLARE_MODULE(de_module_zip
);
13 struct localctx_struct
;
14 typedef struct localctx_struct lctx
;
16 #define CODE_PK12 0x02014b50U
17 #define CODE_PK34 0x04034b50U
18 static const u8 g_zipsig34
[4] = {'P', 'K', 0x03, 0x04};
19 static const u8 g_zipsig56
[4] = {'P', 'K', 0x05, 0x06};
20 static const u8 g_zipsig66
[4] = {'P', 'K', 0x06, 0x06};
21 static const u8 g_zipsig67
[4] = {'P', 'K', 0x06, 0x07};
23 struct compression_params
{
24 // ZIP-specific params (not in de_dfilter_*_params) that may be needed to
25 // to decompress something.
27 unsigned int bit_flags
;
30 typedef void (*decompressor_fn
)(deark
*c
, lctx
*d
, struct compression_params
*cparams
,
31 struct de_dfilter_in_params
*dcmpri
, struct de_dfilter_out_params
*dcmpro
,
32 struct de_dfilter_results
*dres
);
34 struct cmpr_meth_info
{
38 decompressor_fn decompressor
;
41 struct dir_entry_data
{
42 unsigned int ver_needed
;
43 unsigned int ver_needed_hi
, ver_needed_lo
;
44 i64 cmpr_size
, uncmpr_size
;
46 const struct cmpr_meth_info
*cmi
;
47 unsigned int bit_flags
;
54 struct timestamp_data
{
55 struct de_timestamp ts
; // The best timestamp of this type found so far
60 unsigned int ver_made_by
;
61 unsigned int ver_made_by_hi
, ver_made_by_lo
;
62 unsigned int attr_i
, attr_e
;
63 i64 offset_of_local_header
;
64 i64 disk_number_start
;
70 struct timestamp_data tsdata
[DE_TIMESTAMPIDX_COUNT
];
72 struct de_riscos_file_attrs rfa
;
74 struct dir_entry_data central_dir_entry_data
;
75 struct dir_entry_data local_dir_entry_data
;
77 i64 cmpr_size
, uncmpr_size
;
79 u8 has_extts
, has_extts_atime
, has_extts_crtime
;
80 u8 questionable_atime
, questionable_crtime
;
83 struct extra_item_type_info_struct
;
85 struct extra_item_info_struct
{
89 const struct extra_item_type_info_struct
*eiti
;
90 struct member_data
*md
;
91 struct dir_entry_data
*dd
;
95 struct localctx_struct
{
96 de_encoding default_enc_for_filenames
;
97 de_encoding default_enc_for_comments
;
98 i64 end_of_central_dir_pos
;
99 i64 central_dir_num_entries
;
100 i64 central_dir_byte_size
;
101 i64 central_dir_offset
;
105 i64 zip64_num_centr_dir_entries_this_disk
;
106 i64 zip64_num_centr_dir_entries_total
;
107 i64 zip64_centr_dir_byte_size
;
108 unsigned int zip64_eocd_disknum
;
109 unsigned int zip64_cd_disknum
;
110 i64 offset_discrepancy
;
111 int used_offset_discrepancy
;
114 struct de_crcobj
*crco
;
117 typedef void (*extrafield_decoder_fn
)(deark
*c
, lctx
*d
,
118 struct extra_item_info_struct
*eii
);
120 // (Timezone info and precision are ignored.)
121 static int timestamps_are_valid_and_equal(const struct de_timestamp
*ts1
,
122 const struct de_timestamp
*ts2
)
124 if(!ts1
->is_valid
|| !ts2
->is_valid
) return 0;
125 return (ts1
->ts_FILETIME
== ts2
->ts_FILETIME
);
128 static int is_compression_method_supported(lctx
*d
, const struct cmpr_meth_info
*cmi
)
130 if(cmi
&& cmi
->decompressor
) return 1;
134 static void do_decompress_shrink(deark
*c
, lctx
*d
, struct compression_params
*cparams
,
135 struct de_dfilter_in_params
*dcmpri
, struct de_dfilter_out_params
*dcmpro
,
136 struct de_dfilter_results
*dres
)
138 fmtutil_decompress_zip_shrink(c
, dcmpri
, dcmpro
, dres
, NULL
);
141 static void do_decompress_reduce(deark
*c
, lctx
*d
, struct compression_params
*cparams
,
142 struct de_dfilter_in_params
*dcmpri
, struct de_dfilter_out_params
*dcmpro
,
143 struct de_dfilter_results
*dres
)
145 struct de_zipreduce_params params
;
147 de_zeromem(¶ms
, sizeof(struct de_zipreduce_params
));
148 params
.cmpr_factor
= (unsigned int)(cparams
->cmpr_meth
-1);
149 fmtutil_decompress_zip_reduce(c
, dcmpri
, dcmpro
, dres
, ¶ms
);
152 static void do_decompress_implode(deark
*c
, lctx
*d
, struct compression_params
*cparams
,
153 struct de_dfilter_in_params
*dcmpri
, struct de_dfilter_out_params
*dcmpro
,
154 struct de_dfilter_results
*dres
)
156 struct de_zipimplode_params params
;
158 de_zeromem(¶ms
, sizeof(struct de_zipimplode_params
));
159 params
.bit_flags
= cparams
->bit_flags
;
160 params
.mml_bug
= (u8
)de_get_ext_option_bool(c
, "zip:implodebug", 0);
161 fmtutil_decompress_zip_implode(c
, dcmpri
, dcmpro
, dres
, ¶ms
);
164 static void do_decompress_deflate(deark
*c
, lctx
*d
, struct compression_params
*cparams
,
165 struct de_dfilter_in_params
*dcmpri
, struct de_dfilter_out_params
*dcmpro
,
166 struct de_dfilter_results
*dres
)
168 struct de_deflate_params inflparams
;
170 de_zeromem(&inflparams
, sizeof(struct de_deflate_params
));
171 if(cparams
->cmpr_meth
==9) {
172 inflparams
.flags
|= DE_DEFLATEFLAG_DEFLATE64
;
174 fmtutil_decompress_deflate_ex(c
, dcmpri
, dcmpro
, dres
, &inflparams
);
177 static void do_decompress_dclimplode(deark
*c
, lctx
*d
, struct compression_params
*cparams
,
178 struct de_dfilter_in_params
*dcmpri
, struct de_dfilter_out_params
*dcmpro
,
179 struct de_dfilter_results
*dres
)
181 fmtutil_dclimplode_codectype1(c
, dcmpri
, dcmpro
, dres
, NULL
);
184 static void do_decompress_stored(deark
*c
, lctx
*d
, struct compression_params
*cparams
,
185 struct de_dfilter_in_params
*dcmpri
, struct de_dfilter_out_params
*dcmpro
,
186 struct de_dfilter_results
*dres
)
188 fmtutil_decompress_uncompressed(c
, dcmpri
, dcmpro
, dres
, 0);
191 static const struct cmpr_meth_info cmpr_meth_info_arr
[] = {
192 { 0, 0x00, "stored", do_decompress_stored
},
193 { 1, 0x00, "shrink", do_decompress_shrink
},
194 { 2, 0x00, "reduce, CF=1", do_decompress_reduce
},
195 { 3, 0x00, "reduce, CF=2", do_decompress_reduce
},
196 { 4, 0x00, "reduce, CF=3", do_decompress_reduce
},
197 { 5, 0x00, "reduce, CF=4", do_decompress_reduce
},
198 { 6, 0x00, "implode", do_decompress_implode
},
199 { 8, 0x00, "deflate", do_decompress_deflate
},
200 { 9, 0x00, "deflate64", do_decompress_deflate
},
201 { 10, 0x00, "PKWARE DCL implode", do_decompress_dclimplode
},
202 { 12, 0x00, "bzip2", NULL
},
203 { 14, 0x00, "LZMA", NULL
},
204 { 16, 0x00, "IBM z/OS CMPSC", NULL
},
205 { 18, 0x00, "IBM TERSE (new)", NULL
},
206 { 19, 0x00, "IBM LZ77 z Architecture", NULL
},
207 { 94, 0x00, "MP3", NULL
},
208 { 95, 0x00, "XZ", NULL
},
209 { 96, 0x00, "JPEG", NULL
},
210 { 97, 0x00, "WavPack", NULL
},
211 { 98, 0x00, "PPMd", NULL
},
212 { 99, 0x00, "AES", NULL
}
215 static const struct cmpr_meth_info
*get_cmpr_meth_info(int cmpr_meth
)
219 for(k
=0; k
<DE_ARRAYCOUNT(cmpr_meth_info_arr
); k
++) {
220 if(cmpr_meth_info_arr
[k
].cmpr_meth
== cmpr_meth
) {
221 return &cmpr_meth_info_arr
[k
];
227 // Decompress some data, using the given ZIP compression method.
228 // On failure, dres->errcode will be set.
229 static void do_decompress_lowlevel(deark
*c
, lctx
*d
, struct de_dfilter_in_params
*dcmpri
,
230 struct de_dfilter_out_params
*dcmpro
, struct de_dfilter_results
*dres
,
231 int cmpr_meth
, const struct cmpr_meth_info
*cmi
, unsigned int bit_flags
)
233 struct compression_params cparams
;
235 de_zeromem(&cparams
, sizeof(struct compression_params
));
236 cparams
.cmpr_meth
= cmpr_meth
;
237 cparams
.bit_flags
= bit_flags
;
239 if(cmi
&& cmi
->decompressor
) {
240 cmi
->decompressor(c
, d
, &cparams
, dcmpri
, dcmpro
, dres
);
243 de_internal_err_nonfatal(c
, "Unsupported compression method (%d)", cmpr_meth
);
244 de_dfilter_set_generic_error(c
, dres
, NULL
);
248 static void our_writelistener_cb(dbuf
*f
, void *userdata
, const u8
*buf
, i64 buf_len
)
250 struct de_crcobj
*crco
= (struct de_crcobj
*)userdata
;
251 de_crcobj_addbuf(crco
, buf
, buf_len
);
254 // Decompress a Zip member file, writing to outf.
255 // Does CRC calculation.
256 // Reports errors to the user.
257 // Only call this if the compression method is supported -- Call
258 // is_compression_method_supported() first.
259 // Assumes ldd->cmi has been set, by calling get_cmpr_meth_info().
260 static int do_decompress_member(deark
*c
, lctx
*d
, struct member_data
*md
, dbuf
*outf
)
262 struct dir_entry_data
*ldd
= &md
->local_dir_entry_data
;
263 struct de_dfilter_in_params dcmpri
;
264 struct de_dfilter_out_params dcmpro
;
265 struct de_dfilter_results dres
;
269 de_dfilter_init_objects(c
, &dcmpri
, &dcmpro
, &dres
);
270 dcmpri
.f
= c
->infile
;
271 dcmpri
.pos
= md
->file_data_pos
;
272 dcmpri
.len
= md
->cmpr_size
;
274 dcmpro
.expected_len
= md
->uncmpr_size
;
275 dcmpro
.len_known
= 1;
277 dbuf_set_writelistener(outf
, our_writelistener_cb
, (void*)d
->crco
);
278 de_crcobj_reset(d
->crco
);
280 do_decompress_lowlevel(c
, d
, &dcmpri
, &dcmpro
, &dres
, ldd
->cmpr_meth
,
281 ldd
->cmi
, ldd
->bit_flags
);
284 de_err(c
, "%s: %s", ucstring_getpsz_d(ldd
->fname
),
285 de_dfilter_get_errmsg(c
, &dres
));
289 crc_calculated
= de_crcobj_getval(d
->crco
);
290 de_dbg(c
, "crc (calculated): 0x%08x", (unsigned int)crc_calculated
);
292 if(crc_calculated
!= md
->crc_reported
) {
293 de_err(c
, "%s: CRC check failed: Expected 0x%08x, got 0x%08x",
294 ucstring_getpsz_d(ldd
->fname
),
295 (unsigned int)md
->crc_reported
, (unsigned int)crc_calculated
);
296 if(dres
.bytes_consumed_valid
&& (dres
.bytes_consumed
< dcmpri
.len
)) {
297 de_info(c
, "Note: Only used %"I64_FMT
" of %"I64_FMT
" compressed bytes.",
298 dres
.bytes_consumed
, dcmpri
.len
);
308 // A variation of do_decompress_member() -
309 // works for Finder attribute data, and OS/2 extended attributes.
310 // Only call this if the compression method is supported -- Call
311 // is_compression_method_supported() first.
312 // outf is assumed to be a membuf.
313 // dcflags: 0x1 = Validate the crc_reported param.
314 static int do_decompress_attrib_data(deark
*c
, lctx
*d
,
315 i64 dpos
, i64 dlen
, dbuf
*outf
, i64 uncmprsize
, u32 crc_reported
,
316 int cmpr_meth
, const struct cmpr_meth_info
*cmi
, UI flags
, const char *name
)
318 struct de_dfilter_in_params dcmpri
;
319 struct de_dfilter_out_params dcmpro
;
320 struct de_dfilter_results dres
;
324 de_dfilter_init_objects(c
, &dcmpri
, &dcmpro
, &dres
);
325 dcmpri
.f
= c
->infile
;
329 dcmpro
.expected_len
= uncmprsize
;
330 dcmpro
.len_known
= 1;
332 do_decompress_lowlevel(c
, d
, &dcmpri
, &dcmpro
, &dres
, cmpr_meth
, cmi
, 0);
334 goto done
; // Could report the error, but this isn't critical data
338 de_crcobj_reset(d
->crco
);
339 de_crcobj_addslice(d
->crco
, outf
, 0, outf
->len
);
340 crc_calculated
= de_crcobj_getval(d
->crco
);
341 de_dbg(c
, "%s crc (calculated): 0x%08x", name
, (UI
)crc_calculated
);
342 if(crc_calculated
!= crc_reported
) goto done
;
350 // As we read a member file's attributes, we may encounter multiple timestamps,
351 // which can differ in their precision, and whether they use UTC.
352 // This function is called to remember the "best" file modification time
353 // encountered so far.
354 static void apply_timestamp(deark
*c
, lctx
*d
, struct member_data
*md
, int tstype
,
355 const struct de_timestamp
*ts
, int quality
)
357 if(!ts
->is_valid
) return;
359 // In case of a tie, we prefer the later timestamp that we encountered.
360 // This makes local headers have priority over central headers, for
362 if(quality
>= md
->tsdata
[tstype
].quality
) {
363 md
->tsdata
[tstype
].ts
= *ts
;
364 md
->tsdata
[tstype
].quality
= quality
;
368 static void do_read_filename(deark
*c
, lctx
*d
,
369 struct member_data
*md
, struct dir_entry_data
*dd
,
370 i64 pos
, i64 len
, int utf8_flag
)
372 de_encoding from_encoding
;
374 ucstring_empty(dd
->fname
);
375 from_encoding
= utf8_flag
? DE_ENCODING_UTF8
: d
->default_enc_for_filenames
;
376 dbuf_read_to_ucstring(c
->infile
, pos
, len
, dd
->fname
, 0, from_encoding
);
377 de_dbg(c
, "filename: \"%s\"", ucstring_getpsz_d(dd
->fname
));
380 static void do_comment_display(deark
*c
, lctx
*d
, i64 pos
, i64 len
, de_ext_encoding ee
,
383 de_ucstring
*s
= NULL
;
385 s
= ucstring_create(c
);
386 dbuf_read_to_ucstring(c
->infile
, pos
, len
, s
, 0, ee
);
387 de_dbg(c
, "%s: \"%s\"", name
, ucstring_getpsz_d(s
));
391 static void do_comment_extract(deark
*c
, lctx
*d
, i64 pos
, i64 len
, de_ext_encoding ee
,
395 de_ucstring
*s
= NULL
;
397 f
= dbuf_create_output_file(c
, ext
, NULL
, DE_CREATEFLAG_IS_AUX
);
398 s
= ucstring_create(c
);
399 dbuf_read_to_ucstring(c
->infile
, pos
, len
, s
, 0, ee
);
400 ucstring_write_as_utf8(c
, s
, f
, 1);
405 static void do_comment(deark
*c
, lctx
*d
, i64 pos
, i64 len
, int utf8_flag
,
406 const char *name
, const char *ext
)
411 ee
= utf8_flag
? DE_ENCODING_UTF8
: d
->default_enc_for_comments
;
412 ee
= DE_EXTENC_MAKE(ee
, DE_ENCSUBTYPE_HYBRID
);
413 if(c
->extract_level
>=2) {
414 do_comment_extract(c
, d
, pos
, len
, ee
, ext
);
417 do_comment_display(c
, d
, pos
, len
, ee
, name
);
421 static void read_unix_timestamp(deark
*c
, lctx
*d
, i64 pos
,
422 struct de_timestamp
*timestamp
, const char *name
)
425 char timestamp_buf
[64];
427 t
= de_geti32le(pos
);
428 de_unix_time_to_timestamp(t
, timestamp
, 0x1);
429 de_dbg_timestamp_to_string(c
, timestamp
, timestamp_buf
, sizeof(timestamp_buf
), 0);
430 de_dbg(c
, "%s: %"I64_FMT
" (%s)", name
, t
, timestamp_buf
);
433 static void read_FILETIME(deark
*c
, lctx
*d
, i64 pos
,
434 struct de_timestamp
*timestamp
, const char *name
)
437 char timestamp_buf
[64];
439 t_FILETIME
= de_geti64le(pos
);
440 de_FILETIME_to_timestamp(t_FILETIME
, timestamp
, 0x1);
441 de_dbg_timestamp_to_string(c
, timestamp
, timestamp_buf
, sizeof(timestamp_buf
), 0);
442 de_dbg(c
, "%s: %s", name
, timestamp_buf
);
445 static void ef_zip64extinfo(deark
*c
, lctx
*d
, struct extra_item_info_struct
*eii
)
450 if(pos
+8 > eii
->dpos
+eii
->dlen
) goto done
;
451 n
= de_geti64le(pos
); pos
+= 8;
452 de_dbg(c
, "orig uncmpr file size: %"I64_FMT
, n
);
453 if(eii
->dd
->uncmpr_size
==0xffffffffLL
) {
454 eii
->dd
->uncmpr_size
= n
;
457 if(pos
+8 > eii
->dpos
+eii
->dlen
) goto done
;
458 n
= de_geti64le(pos
); pos
+= 8;
459 de_dbg(c
, "cmpr data size: %"I64_FMT
, n
);
460 if(eii
->dd
->cmpr_size
==0xffffffffLL
) {
461 eii
->dd
->cmpr_size
= n
;
464 if(pos
+8 > eii
->dpos
+eii
->dlen
) goto done
;
465 n
= de_geti64le(pos
); pos
+= 8;
466 de_dbg(c
, "offset of local header record: %"I64_FMT
, n
);
468 if(pos
+4 > eii
->dpos
+eii
->dlen
) goto done
;
469 n
= de_getu32le_p(&pos
);
470 de_dbg(c
, "disk start number: %"I64_FMT
, n
);
475 // Extra field 0x5455
476 static void ef_extended_timestamp(deark
*c
, lctx
*d
, struct extra_item_info_struct
*eii
)
481 int has_mtime
, has_atime
, has_ctime
;
482 struct de_timestamp timestamp_tmp
;
484 endpos
= pos
+ eii
->dlen
;
485 if(pos
+1>endpos
) return;
486 flags
= de_getbyte_p(&pos
);
487 de_dbg2(c
, "flags: 0x%02x", (UI
)flags
);
488 if(eii
->is_central
) {
489 has_mtime
= (eii
->dlen
>=5);
494 eii
->md
->has_extts
= 1;
495 has_mtime
= (flags
& 0x01)?1:0;
496 has_atime
= (flags
& 0x02)?1:0;
497 has_ctime
= (flags
& 0x04)?1:0;
500 if(pos
+4>endpos
) return;
501 read_unix_timestamp(c
, d
, pos
, ×tamp_tmp
, "mtime");
502 apply_timestamp(c
, d
, eii
->md
, DE_TIMESTAMPIDX_MODIFY
, ×tamp_tmp
, 50);
506 if(pos
+4>endpos
) return;
507 read_unix_timestamp(c
, d
, pos
, ×tamp_tmp
, "atime");
508 apply_timestamp(c
, d
, eii
->md
, DE_TIMESTAMPIDX_ACCESS
, ×tamp_tmp
, 50);
509 eii
->md
->has_extts_atime
= 1;
513 if(pos
+4>endpos
) return;
514 read_unix_timestamp(c
, d
, pos
, ×tamp_tmp
, "creation time");
515 apply_timestamp(c
, d
, eii
->md
, DE_TIMESTAMPIDX_CREATE
, ×tamp_tmp
, 50);
516 eii
->md
->has_extts_crtime
= 1;
521 // Extra field 0x5855
522 static void ef_infozip1(deark
*c
, lctx
*d
, struct extra_item_info_struct
*eii
)
525 struct de_timestamp timestamp_tmp
;
527 if(eii
->dlen
<8) return;
528 read_unix_timestamp(c
, d
, eii
->dpos
, ×tamp_tmp
, "atime");
529 apply_timestamp(c
, d
, eii
->md
, DE_TIMESTAMPIDX_ACCESS
, ×tamp_tmp
, 45);
530 read_unix_timestamp(c
, d
, eii
->dpos
+4, ×tamp_tmp
, "mtime");
531 apply_timestamp(c
, d
, eii
->md
, DE_TIMESTAMPIDX_MODIFY
, ×tamp_tmp
, 45);
532 if(!eii
->is_central
&& eii
->dlen
>=12) {
533 uidnum
= de_getu16le(eii
->dpos
+8);
534 gidnum
= de_getu16le(eii
->dpos
+10);
535 de_dbg(c
, "uid: %d, gid: %d", (int)uidnum
, (int)gidnum
);
539 // Extra field 0x7075 - Info-ZIP Unicode Path
540 static void ef_unicodepath(deark
*c
, lctx
*d
, struct extra_item_info_struct
*eii
)
543 de_ucstring
*fn
= NULL
;
545 u32 crc_reported
, crc_calculated
;
547 if(eii
->dlen
<1) goto done
;
548 ver
= de_getbyte(eii
->dpos
);
549 de_dbg(c
, "version: %u", (unsigned int)ver
);
550 if(ver
!=1) goto done
;
551 if(eii
->dlen
<6) goto done
;
552 crc_reported
= (u32
)de_getu32le(eii
->dpos
+1);
553 de_dbg(c
, "name-crc (reported): 0x%08x", (unsigned int)crc_reported
);
554 fn
= ucstring_create(c
);
555 fnlen
= eii
->dlen
- 5;
556 dbuf_read_to_ucstring(c
->infile
, eii
->dpos
+5, fnlen
, fn
, 0, DE_ENCODING_UTF8
);
557 de_dbg(c
, "unicode name: \"%s\"", ucstring_getpsz_d(fn
));
559 // Need to go back and calculate a CRC of the main filename. This is
560 // protection against the case where a ZIP editor may have changed the
561 // original filename, but retained a now-orphaned Unicode Path field.
562 de_crcobj_reset(d
->crco
);
563 de_crcobj_addslice(d
->crco
, c
->infile
, eii
->dd
->main_fname_pos
, eii
->dd
->main_fname_len
);
564 crc_calculated
= de_crcobj_getval(d
->crco
);
565 de_dbg(c
, "name-crc (calculated): 0x%08x", (unsigned int)crc_calculated
);
567 if(crc_calculated
== crc_reported
) {
568 ucstring_empty(eii
->dd
->fname
);
569 ucstring_append_ucstring(eii
->dd
->fname
, fn
);
573 ucstring_destroy(fn
);
576 // Extra field 0x7855
577 static void ef_infozip2(deark
*c
, lctx
*d
, struct extra_item_info_struct
*eii
)
581 if(eii
->is_central
) return;
582 if(eii
->dlen
<4) return;
583 uidnum
= de_getu16le(eii
->dpos
);
584 gidnum
= de_getu16le(eii
->dpos
+2);
585 de_dbg(c
, "uid: %d, gid: %d", (int)uidnum
, (int)gidnum
);
588 // Extra field 0x7875
589 static void ef_infozip3(deark
*c
, lctx
*d
, struct extra_item_info_struct
*eii
)
597 endpos
= pos
+eii
->dlen
;
599 if(pos
+1>endpos
) return;
600 ver
= de_getbyte_p(&pos
);
601 de_dbg(c
, "version: %d", (int)ver
);
604 if(pos
+1>endpos
) return;
605 sz
= (i64
)de_getbyte_p(&pos
);
606 if(pos
+sz
>endpos
) return;
607 uidnum
= dbuf_getint_ext(c
->infile
, pos
, (unsigned int)sz
, 1, 0);
610 if(pos
+1>endpos
) return;
611 sz
= (i64
)de_getbyte_p(&pos
);
612 if(pos
+sz
>endpos
) return;
613 gidnum
= dbuf_getint_ext(c
->infile
, pos
, (unsigned int)sz
, 1, 0);
616 de_dbg(c
, "uid: %d, gid: %d", (int)uidnum
, (int)gidnum
);
619 // Extra field 0x000a
620 static void ef_ntfs(deark
*c
, lctx
*d
, struct extra_item_info_struct
*eii
)
627 struct de_timestamp timestamp_tmp_m
;
628 struct de_timestamp timestamp_tmp
;
630 endpos
= pos
+eii
->dlen
;
631 pos
+= 4; // skip reserved field
634 if(pos
+4>endpos
) break;
635 attr_tag
= de_getu16le_p(&pos
);
636 attr_size
= de_getu16le_p(&pos
);
637 if(attr_tag
==0x0001) name
="NTFS filetimes";
639 de_dbg(c
, "tag: 0x%04x (%s), dlen: %d", (unsigned int)attr_tag
, name
,
641 if(pos
+attr_size
>endpos
) break;
644 if(attr_tag
==0x0001 && attr_size
>=24) {
645 read_FILETIME(c
, d
, pos
, ×tamp_tmp_m
, "mtime");
646 apply_timestamp(c
, d
, eii
->md
, DE_TIMESTAMPIDX_MODIFY
, ×tamp_tmp_m
, 90);
648 read_FILETIME(c
, d
, pos
+8, ×tamp_tmp
, "atime");
649 apply_timestamp(c
, d
, eii
->md
, DE_TIMESTAMPIDX_ACCESS
, ×tamp_tmp
, 90);
650 if(timestamps_are_valid_and_equal(×tamp_tmp
, ×tamp_tmp_m
)) {
651 eii
->md
->questionable_atime
= 1;
654 read_FILETIME(c
, d
, pos
+16, ×tamp_tmp
, "creation time");
655 apply_timestamp(c
, d
, eii
->md
, DE_TIMESTAMPIDX_CREATE
, ×tamp_tmp
, 90);
656 if(timestamps_are_valid_and_equal(×tamp_tmp
, ×tamp_tmp_m
)) {
657 eii
->md
->questionable_crtime
= 1;
660 de_dbg_indent(c
, -1);
666 // Extra field 0x0009
667 static void ef_os2(deark
*c
, lctx
*d
, struct extra_item_info_struct
*eii
)
675 const struct cmpr_meth_info
*cmi
= NULL
;
676 const char *name
= "OS/2 ext. attr. data";
677 dbuf
*attr_data
= NULL
;
678 de_module_params
*mparams
= NULL
;
681 endpos
= pos
+eii
->dlen
;
682 if(pos
+4>endpos
) goto done
;
683 ulen
= de_getu32le_p(&pos
);
684 de_dbg(c
, "uncmpr ext attr data size: %"I64_FMT
, ulen
);
685 if(eii
->is_central
) goto done
;
687 if(pos
+2>endpos
) goto done
;
688 cmpr_meth
= (int)de_getu16le_p(&pos
);
689 de_dbg(c
, "ext attr cmpr method: %d", cmpr_meth
);
691 if(pos
+4>endpos
) goto done
;
692 crc_reported
= (u32
)de_getu32le_p(&pos
);
693 de_dbg(c
, "ext attr crc (reported): 0x%08x", (unsigned int)crc_reported
);
695 cmpr_attr_size
= endpos
-pos
;
696 de_dbg(c
, "cmpr ext attr data at %"I64_FMT
", len=%"I64_FMT
, pos
, cmpr_attr_size
);
697 if(pos
+ cmpr_attr_size
> endpos
) goto done
;
699 cmi
= get_cmpr_meth_info(cmpr_meth
);
700 if(cmpr_meth
==6 || !is_compression_method_supported(d
, cmi
)) {
701 de_warn(c
, "%s: Unsupported compression method: %d (%s)",
702 name
, cmpr_meth
, (cmi
? cmi
->name
: "?"));
706 attr_data
= dbuf_create_membuf(c
, ulen
, 0x1);
707 ret
= do_decompress_attrib_data(c
, d
, pos
, cmpr_attr_size
,
708 attr_data
, ulen
, crc_reported
, cmpr_meth
, cmi
, 0x1, name
);
710 de_warn(c
, "Failed to decompress %s", name
);
714 // attr_data contains an OS/2 extended attribute structure (FEA2LIST)
715 mparams
= de_malloc(c
, sizeof(de_module_params
));
716 mparams
->in_params
.codes
= "L";
717 de_dbg(c
, "decoding OS/2 ext. attribs., unc. len=%"I64_FMT
, attr_data
->len
);
719 de_run_module_by_id_on_slice(c
, "ea_data", mparams
, attr_data
, 0, attr_data
->len
);
720 de_dbg_indent(c
, -1);
723 dbuf_close(attr_data
);
727 // Extra field 0x2705 (ZipIt Macintosh 1.3.5+)
728 static void ef_zipitmac_2705(deark
*c
, lctx
*d
, struct extra_item_info_struct
*eii
)
730 struct de_fourcc sig
;
731 struct de_fourcc filetype
;
732 struct de_fourcc creator
;
734 if(eii
->dlen
<4) goto done
;
735 dbuf_read_fourcc(c
->infile
, eii
->dpos
, &sig
, 4, 0x0);
736 de_dbg(c
, "signature: '%s'", sig
.id_dbgstr
);
737 if(sig
.id
!=0x5a504954U
) goto done
; // expecting 'ZPIT'
738 if(eii
->dlen
<12) goto done
;
739 dbuf_read_fourcc(c
->infile
, eii
->dpos
+4, &filetype
, 4, 0x0);
740 de_dbg(c
, "filetype: '%s'", filetype
.id_dbgstr
);
741 dbuf_read_fourcc(c
->infile
, eii
->dpos
+8, &creator
, 4, 0x0);
742 de_dbg(c
, "creator: '%s'", creator
.id_dbgstr
);
748 // The time will be returned in the caller-supplied 'ts'
749 static void handle_mac_time(deark
*c
, lctx
*d
,
750 i64 mt_raw
, i64 mt_offset
,
751 struct de_timestamp
*ts
, const char *name
)
753 char timestamp_buf
[64];
754 de_mac_time_to_timestamp(mt_raw
- mt_offset
, ts
);
755 ts
->tzcode
= DE_TZCODE_UTC
;
756 de_dbg_timestamp_to_string(c
, ts
, timestamp_buf
, sizeof(timestamp_buf
), 0);
757 de_dbg(c
, "%s: %"I64_FMT
" %+"I64_FMT
" (%s)", name
,
758 mt_raw
, -mt_offset
, timestamp_buf
);
761 // Extra field 0x334d (Info-ZIP Macintosh)
762 static void ef_infozipmac(deark
*c
, lctx
*d
, struct extra_item_info_struct
*eii
)
770 const struct cmpr_meth_info
*cmi
= NULL
;
771 struct de_fourcc filetype
;
772 struct de_fourcc creator
;
773 de_ucstring
*flags_str
= NULL
;
774 dbuf
*attr_data
= NULL
;
777 i64 create_time_offset
;
781 i64 backup_time_offset
;
782 struct de_timestamp tmp_timestamp
;
784 u32 crc_reported
= 0;
786 struct de_stringreaderdata
*srd
;
788 if(eii
->dlen
<14) goto done
;
790 ulen
= de_getu32le_p(&pos
);
791 de_dbg(c
, "uncmpr. finder attr. size: %d", (int)ulen
);
793 flags
= (unsigned int)de_getu16le_p(&pos
);
794 flags_str
= ucstring_create(c
);
795 if(flags
&0x0001) ucstring_append_flags_item(flags_str
, "data_fork");
796 if(flags
&0x0002) ucstring_append_flags_item(flags_str
, "0x0002"); // something about the filename
797 ucstring_append_flags_item(flags_str
,
798 (flags
&0x0004)?"uncmpressed_attribute_data":"compressed_attribute_data");
799 if(flags
&0x0008) ucstring_append_flags_item(flags_str
, "64-bit_times");
800 if(flags
&0x0010) ucstring_append_flags_item(flags_str
, "no_timezone_offsets");
801 de_dbg(c
, "flags: 0x%04x (%s)", flags
, ucstring_getpsz(flags_str
));
803 dbuf_read_fourcc(c
->infile
, pos
, &filetype
, 4, 0x0);
804 de_dbg(c
, "filetype: '%s'", filetype
.id_dbgstr
);
806 dbuf_read_fourcc(c
->infile
, pos
, &creator
, 4, 0x0);
807 de_dbg(c
, "creator: '%s'", creator
.id_dbgstr
);
810 if(eii
->is_central
) goto done
;
812 if(flags
&0x0004) { // Uncompressed attribute data
816 dcflags
|= 0x1; // CRC is known
817 cmpr_meth
= (int)de_getu16le_p(&pos
);
818 cmi
= get_cmpr_meth_info(cmpr_meth
);
819 de_dbg(c
, "finder attr. cmpr. method: %d (%s)", cmpr_meth
, (cmi
? cmi
->name
: "?"));
821 crc_reported
= (u32
)de_getu32le_p(&pos
);
822 de_dbg(c
, "finder attr. data crc (reported): 0x%08x", (UI
)crc_reported
);
825 // The rest of the data is Finder attribute data
826 cmpr_attr_size
= eii
->dpos
+eii
->dlen
- pos
;
827 de_dbg(c
, "cmpr. finder attr. size: %d", (int)cmpr_attr_size
);
828 if(ulen
<1 || ulen
>1000000) goto done
;
830 // Type 6 (implode) compression won't work here, because it needs
831 // additional parameters seemingly not provided by the Finder attr data.
832 if(cmpr_meth
==6 || !is_compression_method_supported(d
, cmi
)) {
833 de_warn(c
, "Finder attribute data: Unsupported compression method: %d (%s)",
834 cmpr_meth
, (cmi
? cmi
->name
: "?"));
838 // Decompress and decode the Finder attribute data
839 attr_data
= dbuf_create_membuf(c
, ulen
, 0x1);
840 ret
= do_decompress_attrib_data(c
, d
, pos
, cmpr_attr_size
,
841 attr_data
, ulen
, crc_reported
, cmpr_meth
, cmi
, dcflags
, "finder attr. data");
843 de_warn(c
, "Failed to decompress finder attribute data");
848 dpos
+= 2; // Finder flags
849 dpos
+= 4; // Icon location
851 dpos
+= 16; // FXInfo
852 dpos
+= 1; // file version number
853 dpos
+= 1; // dir access rights
855 if(flags
&0x0008) goto done
; // We don't support 64-bit times
856 if(flags
&0x0010) goto done
; // We want timezone offsets
857 if(attr_data
->len
- dpos
< 6*4) goto done
;
859 create_time_raw
= dbuf_getu32le_p(attr_data
, &dpos
);
860 mod_time_raw
= dbuf_getu32le_p(attr_data
, &dpos
);
861 backup_time_raw
= dbuf_getu32le_p(attr_data
, &dpos
);
862 create_time_offset
= dbuf_geti32le(attr_data
, dpos
); dpos
+= 4;
863 mod_time_offset
= dbuf_geti32le(attr_data
, dpos
); dpos
+= 4;
864 backup_time_offset
= dbuf_geti32le(attr_data
, dpos
); dpos
+= 4;
866 handle_mac_time(c
, d
, create_time_raw
, create_time_offset
, &tmp_timestamp
, "create time");
867 if(create_time_raw
>0) {
868 apply_timestamp(c
, d
, eii
->md
, DE_TIMESTAMPIDX_CREATE
, &tmp_timestamp
, 40);
870 handle_mac_time(c
, d
, mod_time_raw
, mod_time_offset
, &tmp_timestamp
, "mod time ");
872 apply_timestamp(c
, d
, eii
->md
, DE_TIMESTAMPIDX_MODIFY
, &tmp_timestamp
, 40);
874 handle_mac_time(c
, d
, backup_time_raw
, backup_time_offset
, &tmp_timestamp
, "backup time");
875 if(backup_time_raw
>0) {
876 apply_timestamp(c
, d
, eii
->md
, DE_TIMESTAMPIDX_BACKUP
, &tmp_timestamp
, 40);
879 // Expecting 2 bytes for charset, and at least 2 more for the 2 NUL-terminated
880 // strings that follow.
881 if(attr_data
->len
- dpos
< 4) goto done
;
883 charset
= (int)dbuf_getu16le_p(attr_data
, &dpos
);
884 de_dbg(c
, "charset for fullpath/comment: %d", charset
);
886 // TODO: Can we use the correct encoding?
887 srd
= dbuf_read_string(attr_data
, dpos
, attr_data
->len
-dpos
, DE_DBG_MAX_STRLEN
,
888 DE_CONVFLAG_STOP_AT_NUL
, DE_ENCODING_ASCII
);
889 de_dbg(c
, "fullpath: \"%s\"", ucstring_getpsz(srd
->str
));
890 dpos
+= srd
->bytes_consumed
;
891 de_destroy_stringreaderdata(c
, srd
);
893 srd
= dbuf_read_string(attr_data
, dpos
, attr_data
->len
-dpos
, DE_DBG_MAX_STRLEN
,
894 DE_CONVFLAG_STOP_AT_NUL
, DE_ENCODING_ASCII
);
895 de_dbg(c
, "comment: \"%s\"", ucstring_getpsz(srd
->str
));
896 dpos
+= srd
->bytes_consumed
;
897 de_destroy_stringreaderdata(c
, srd
);
900 ucstring_destroy(flags_str
);
901 dbuf_close(attr_data
);
904 // Acorn / SparkFS / RISC OS
905 static void ef_acorn(deark
*c
, lctx
*d
, struct extra_item_info_struct
*eii
)
908 struct de_riscos_file_attrs rfa
;
910 if(eii
->dlen
<16) return;
911 if(dbuf_memcmp(c
->infile
, eii
->dpos
, "ARC0", 4)) {
912 de_dbg(c
, "[unsupported Acorn extra-field type]");
917 de_zeromem(&rfa
, sizeof(struct de_riscos_file_attrs
));
918 fmtutil_riscos_read_load_exec(c
, c
->infile
, &rfa
, pos
);
920 if(rfa
.mod_time
.is_valid
) {
921 apply_timestamp(c
, d
, eii
->md
, DE_TIMESTAMPIDX_MODIFY
, &rfa
.mod_time
, 70);
924 fmtutil_riscos_read_attribs_field(c
, c
->infile
, &rfa
, pos
, 0);
926 if(!eii
->is_central
&& !eii
->md
->has_riscos_data
) {
927 eii
->md
->has_riscos_data
= 1;
932 struct extra_item_type_info_struct
{
935 extrafield_decoder_fn fn
;
937 static const struct extra_item_type_info_struct extra_item_type_info_arr
[] = {
938 { 0x0001 /* */, "Zip64 extended information", ef_zip64extinfo
},
939 { 0x0007 /* */, "AV Info", NULL
},
940 { 0x0008 /* */, "extended language encoding data", NULL
},
941 { 0x0009 /* */, "OS/2", ef_os2
},
942 { 0x000a /* */, "NTFS", ef_ntfs
},
943 { 0x000c /* */, "OpenVMS", NULL
},
944 { 0x000d /* */, "Unix", NULL
},
945 { 0x000e /* */, "file stream and fork descriptors", NULL
},
946 { 0x000f /* */, "Patch Descriptor", NULL
},
947 { 0x0014 /* */, "PKCS#7 Store for X.509 Certificates", NULL
},
948 { 0x0015 /* */, "X.509 Certificate ID and Signature for individual file", NULL
},
949 { 0x0016 /* */, "X.509 Certificate ID for Central Directory", NULL
},
950 { 0x0017 /* */, "Strong Encryption Header", NULL
},
951 { 0x0018 /* */, "Record Management Controls", NULL
},
952 { 0x0019 /* */, "PKCS#7 Encryption Recipient Certificate List", NULL
},
953 { 0x0021 /* */, "Policy Decryption Key", NULL
},
954 { 0x0022 /* */, "Smartcrypt Key Provider", NULL
},
955 { 0x0023 /* */, "Smartcrypt Policy Key Data", NULL
},
956 { 0x0065 /* */, "IBM S/390 (Z390), AS/400 (I400) attributes", NULL
},
957 { 0x0066 /* */, "IBM S/390 (Z390), AS/400 (I400) attributes - compressed", NULL
},
958 { 0x07c8 /* */, "Macintosh", NULL
},
959 { 0x2605 /* */, "ZipIt Macintosh", NULL
},
960 { 0x2705 /* */, "ZipIt Macintosh 1.3.5+", ef_zipitmac_2705
},
961 { 0x2805 /* */, "ZipIt Macintosh 1.3.5+", NULL
},
962 { 0x334d /* M3 */, "Info-ZIP Macintosh", ef_infozipmac
},
963 { 0x4154 /* TA */, "Tandem NSK", NULL
},
964 { 0x4341 /* AC */, "Acorn/SparkFS", ef_acorn
},
965 { 0x4453 /* SE */, "Windows NT security descriptor (binary ACL)", NULL
},
966 { 0x4690 /* */, "POSZIP 4690", NULL
},
967 { 0x4704 /* */, "VM/CMS", NULL
},
968 { 0x470f /* */, "MVS", NULL
},
969 { 0x4854 /* TH */, "Theos, old unofficial port", NULL
}, // unzip:extrafld.txt says "inofficial"
970 { 0x4b46 /* FK */, "FWKCS MD5", NULL
},
971 { 0x4c41 /* AL */, "OS/2 access control list (text ACL)", NULL
},
972 { 0x4d49 /* IM */, "Info-ZIP OpenVMS", NULL
},
973 { 0x4d63 /* cM */, "Macintosh SmartZIP", NULL
},
974 { 0x4f4c /* LO */, "Xceed original location", NULL
},
975 { 0x5350 /* PS */, "Psion?", NULL
}, // observed in some Psion files
976 { 0x5356 /* VS */, "AOS/VS (ACL)", NULL
},
977 { 0x5455 /* UT */, "extended timestamp", ef_extended_timestamp
},
978 { 0x554e /* NU */, "Xceed unicode", NULL
},
979 { 0x5855 /* UX */, "Info-ZIP Unix, first version", ef_infozip1
},
980 { 0x6375 /* uc */, "Info-ZIP Unicode Comment", NULL
},
981 { 0x6542 /* Be */, "BeOS/BeBox", NULL
},
982 { 0x6854 /* Th */, "Theos", NULL
},
983 { 0x7075 /* up */, "Info-ZIP Unicode Path", ef_unicodepath
},
984 { 0x7441 /* At */, "AtheOS", NULL
},
985 { 0x756e /* nu */, "ASi Unix", NULL
},
986 { 0x7855 /* Ux */, "Info-ZIP Unix, second version", ef_infozip2
},
987 { 0x7875 /* ux */, "Info-ZIP Unix, third version", ef_infozip3
},
988 { 0xa220 /* */, "Microsoft Open Packaging Growth Hint", NULL
},
989 { 0xfb4a /* */, "SMS/QDOS", NULL
}, // according to Info-ZIP zip 3.0
990 { 0xfd4a /* */, "SMS/QDOS", NULL
} // according to ZIP v6.3.4 APPNOTE
993 static const struct extra_item_type_info_struct
*get_extra_item_type_info(i64 id
)
995 static const struct extra_item_type_info_struct default_ei
=
999 for(i
=0; i
<DE_ARRAYCOUNT(extra_item_type_info_arr
); i
++) {
1000 if(id
== (i64
)extra_item_type_info_arr
[i
].id
) {
1001 return &extra_item_type_info_arr
[i
];
1007 static void do_extra_data(deark
*c
, lctx
*d
,
1008 struct member_data
*md
, struct dir_entry_data
*dd
,
1009 i64 pos1
, i64 len
, int is_central
)
1013 de_dbg(c
, "extra data at %"I64_FMT
", len=%d", pos1
, (int)len
);
1014 de_dbg_indent(c
, 1);
1018 struct extra_item_info_struct eii
;
1020 if(pos
+4 >= pos1
+len
) break;
1021 de_zeromem(&eii
, sizeof(struct extra_item_info_struct
));
1024 eii
.is_central
= is_central
;
1027 eii
.id
= (u32
)de_getu16le(pos
);
1028 eii
.dlen
= de_getu16le(pos
+2);
1030 eii
.eiti
= get_extra_item_type_info(eii
.id
);
1032 de_dbg(c
, "item id=0x%04x (%s), dlen=%d", (unsigned int)eii
.id
, eii
.eiti
->name
,
1034 if(pos
+4+eii
.dlen
> pos1
+len
) break;
1037 de_dbg_indent(c
, 1);
1038 eii
.eiti
->fn(c
, d
, &eii
);
1039 de_dbg_indent(c
, -1);
1045 de_dbg_indent(c
, -1);
1048 static void do_extract_file(deark
*c
, lctx
*d
, struct member_data
*md
)
1051 de_finfo
*fi
= NULL
;
1052 struct dir_entry_data
*ldd
= &md
->local_dir_entry_data
;
1054 int saved_indent_level
;
1056 de_dbg_indent_save(c
, &saved_indent_level
);
1057 de_dbg(c
, "file data at %"I64_FMT
", len=%"I64_FMT
, md
->file_data_pos
,
1059 de_dbg_indent(c
, 1);
1061 if(ldd
->bit_flags
& 0x1) {
1062 de_err(c
, "%s: Encryption is not supported", ucstring_getpsz_d(ldd
->fname
));
1066 if(!is_compression_method_supported(d
, ldd
->cmi
)) {
1067 de_err(c
, "%s: Unsupported compression method: %d (%s)",
1068 ucstring_getpsz_d(ldd
->fname
),
1069 ldd
->cmpr_meth
, (ldd
->cmi
? ldd
->cmi
->name
: "?"));
1073 if(md
->file_data_pos
+md
->cmpr_size
> c
->infile
->len
) {
1074 de_err(c
, "%s: Data goes beyond end of file", ucstring_getpsz_d(ldd
->fname
));
1078 if(md
->is_symlink
) {
1079 de_warn(c
, "\"%s\" is a symbolic link. It will not be extracted as a link.",
1080 ucstring_getpsz_d(ldd
->fname
));
1083 fi
= de_finfo_create(c
);
1084 fi
->detect_root_dot_dir
= 1;
1086 if(ucstring_isnonempty(ldd
->fname
)) {
1087 unsigned int snflags
= DE_SNFLAG_FULLPATH
;
1089 if(md
->has_riscos_data
) {
1090 fmtutil_riscos_append_type_to_filename(c
, fi
, ldd
->fname
, &md
->rfa
, md
->is_dir
, 0);
1092 if(md
->is_dir
) snflags
|= DE_SNFLAG_STRIPTRAILINGSLASH
;
1093 de_finfo_set_name_from_ucstring(c
, fi
, ldd
->fname
, snflags
);
1094 fi
->original_filename_flag
= 1;
1097 // This is basically a hack to better deal with Deark's ZIP writer's habit of
1098 // using the NTFS field to store high resolution timestamps. The problem is
1099 // that there seems to be no standard way to indicate the lack of a particular
1101 // We disregard the NTFS Access or Creation timestamp in some cases, to make it
1102 // more likely that a ZIP file can be round-tripped through Deark, without
1103 // spurious timestamps appearing in the 0x5455 (extended timestamp) field.
1104 if(md
->questionable_atime
&& md
->has_extts
&& !md
->has_extts_atime
) {
1105 md
->tsdata
[DE_TIMESTAMPIDX_ACCESS
].ts
.is_valid
= 0;
1107 if(md
->questionable_crtime
&& md
->has_extts
&& !md
->has_extts_crtime
) {
1108 md
->tsdata
[DE_TIMESTAMPIDX_CREATE
].ts
.is_valid
= 0;
1111 for(tsidx
=0; tsidx
<DE_TIMESTAMPIDX_COUNT
; tsidx
++) {
1112 if(md
->tsdata
[tsidx
].ts
.is_valid
) {
1113 fi
->timestamp
[tsidx
] = md
->tsdata
[tsidx
].ts
;
1117 if(md
->has_riscos_data
) {
1118 fi
->has_riscos_data
= 1;
1119 fi
->riscos_attribs
= md
->rfa
.attribs
;
1120 fi
->load_addr
= md
->rfa
.load_addr
;
1121 fi
->exec_addr
= md
->rfa
.exec_addr
;
1125 fi
->is_directory
= 1;
1127 else if(md
->is_executable
) {
1128 fi
->mode_flags
|= DE_MODEFLAG_EXE
;
1130 else if(md
->is_nonexecutable
) {
1131 fi
->mode_flags
|= DE_MODEFLAG_NONEXE
;
1134 outf
= dbuf_create_output_file(c
, NULL
, fi
, 0);
1139 (void)do_decompress_member(c
, d
, md
, outf
);
1143 de_finfo_destroy(c
, fi
);
1144 de_dbg_indent_restore(c
, saved_indent_level
);
1147 static const char *get_platform_name(unsigned int ver_hi
)
1149 static const char *pltf_names
[20] = {
1150 "MS-DOS, etc.", "Amiga", "OpenVMS", "Unix",
1151 "VM/CMS", "Atari ST", "HPFS", "Macintosh",
1152 "Z-System", "CP/M", "NTFS or TOPS-20", "MVS or NTFS",
1153 "VSE or SMS/QDOS", "Acorn RISC OS", "VFAT", "MVS",
1154 "BeOS", "Tandem", "OS/400", "OS X" };
1157 return pltf_names
[ver_hi
];
1158 if(ver_hi
==30) return "AtheOS/Syllable";
1162 // Look at the attributes, and set some other fields based on them.
1163 static void process_ext_attr(deark
*c
, lctx
*d
, struct member_data
*md
)
1165 if(d
->using_scanmode
) {
1166 // In this mode, there is no 'external attribs' field.
1170 if(md
->ver_made_by_hi
==3) { // Unix
1171 unsigned int unix_filetype
;
1172 unix_filetype
= (md
->attr_e
>>16)&0170000;
1173 if(unix_filetype
== 0040000) {
1176 else if(unix_filetype
== 0120000) {
1180 if((md
->attr_e
>>16)&0111) {
1181 md
->is_executable
= 1;
1184 md
->is_nonexecutable
= 1;
1188 // MS-DOS-style attributes.
1189 // Technically, we should only do this if
1190 // md->central_dir_entry_data.ver_made_by_hi==0.
1191 // However, most(?) zip programs set the low byte of the external attribs
1192 // to the equivalent MS-DOS attribs, at least in cases where it matters.
1193 if(md
->attr_e
& 0x10) {
1197 // TODO: Support more platforms.
1198 // TODO: The 0x756e (ASi Unix) extra field might be important, as it contains
1199 // file permissions.
1201 if(md
->is_dir
&& md
->uncmpr_size
!=0) {
1202 // I'd expect a subdirectory entry to have zero size. If it doesn't,
1203 // let's just assume we misidentified it as a subdirectory, and
1204 // extract its data.
1209 static void describe_internal_attr(deark
*c
, struct member_data
*md
,
1212 unsigned int bf
= md
->attr_i
;
1215 ucstring_append_flags_item(s
, "text file");
1219 if(bf
!=0) { // Report any unrecognized flags
1220 ucstring_append_flags_itemf(s
, "0x%04x", bf
);
1224 // Uses dd->bit_flags, dd->cmpr_method
1225 static void describe_general_purpose_bit_flags(deark
*c
, struct dir_entry_data
*dd
,
1229 unsigned int bf
= dd
->bit_flags
;
1232 ucstring_append_flags_item(s
, "encrypted");
1236 if(dd
->cmpr_meth
==6) { // implode
1244 ucstring_append_flags_itemf(s
, "%s sliding dictionary", name
);
1253 ucstring_append_flags_itemf(s
, "%s trees", name
);
1256 if(dd
->cmpr_meth
==8 || dd
->cmpr_meth
==9) { // deflate flags
1259 code
= (bf
& 0x0006)>>1;
1261 case 1: name
="max"; break;
1262 case 2: name
="fast"; break;
1263 case 3: name
="super_fast"; break;
1264 default: name
="normal";
1266 ucstring_append_flags_itemf(s
, "cmprlevel=%s", name
);
1267 bf
-= (bf
& 0x0006);
1271 ucstring_append_flags_item(s
, "uses data descriptor");
1276 ucstring_append_flags_item(s
, "UTF-8");
1280 if(bf
!=0) { // Report any unrecognized flags
1281 ucstring_append_flags_itemf(s
, "0x%04x", bf
);
1285 // Read either a central directory entry (a.k.a. central directory file header),
1286 // or a local file header.
1287 static int do_file_header(deark
*c
, lctx
*d
, struct member_data
*md
,
1288 int is_central
, i64 pos1
, i64
*p_entry_size
)
1292 i64 fn_len
, extra_len
, comment_len
;
1295 i64 fixed_header_size
;
1296 i64 mod_time_raw
, mod_date_raw
;
1297 struct dir_entry_data
*dd
; // Points to either md->central or md->local
1298 de_ucstring
*descr
= NULL
;
1299 struct de_timestamp dos_timestamp
;
1300 char timestamp_buf
[64];
1303 descr
= ucstring_create(c
);
1305 dd
= &md
->central_dir_entry_data
;
1306 fixed_header_size
= 46;
1307 de_dbg(c
, "central dir entry at %"I64_FMT
, pos
);
1310 dd
= &md
->local_dir_entry_data
;
1311 fixed_header_size
= 30;
1312 if(md
->disk_number_start
!=d
->this_disk_num
) {
1313 de_err(c
, "Member file not in this ZIP file");
1316 de_dbg(c
, "local file header at %"I64_FMT
, pos
);
1318 de_dbg_indent(c
, 1);
1320 sig
= (u32
)de_getu32le_p(&pos
);
1321 if(is_central
&& sig
!=CODE_PK12
) {
1322 de_err(c
, "Central dir file header not found at %"I64_FMT
, pos1
);
1325 else if(!is_central
&& sig
!=CODE_PK34
) {
1326 de_err(c
, "Local file header not found at %"I64_FMT
, pos1
);
1331 md
->ver_made_by
= (unsigned int)de_getu16le_p(&pos
);
1332 md
->ver_made_by_hi
= (unsigned int)((md
->ver_made_by
&0xff00)>>8);
1333 md
->ver_made_by_lo
= (unsigned int)(md
->ver_made_by
&0x00ff);
1334 de_dbg(c
, "version made by: platform=%u (%s), ZIP spec=%u.%u",
1335 md
->ver_made_by_hi
, get_platform_name(md
->ver_made_by_hi
),
1336 (unsigned int)(md
->ver_made_by_lo
/10), (unsigned int)(md
->ver_made_by_lo
%10));
1339 dd
->ver_needed
= (unsigned int)de_getu16le_p(&pos
);
1340 dd
->ver_needed_hi
= (unsigned int)((dd
->ver_needed
&0xff00)>>8);
1341 dd
->ver_needed_lo
= (unsigned int)(dd
->ver_needed
&0x00ff);
1342 de_dbg(c
, "version needed to extract: platform=%u (%s), ZIP spec=%u.%u",
1343 dd
->ver_needed_hi
, get_platform_name(dd
->ver_needed_hi
),
1344 (unsigned int)(dd
->ver_needed_lo
/10), (unsigned int)(dd
->ver_needed_lo
%10));
1346 dd
->bit_flags
= (unsigned int)de_getu16le_p(&pos
);
1347 dd
->cmpr_meth
= (int)de_getu16le_p(&pos
);
1348 dd
->cmi
= get_cmpr_meth_info(dd
->cmpr_meth
);
1350 utf8_flag
= (dd
->bit_flags
& 0x800)?1:0;
1351 ucstring_empty(descr
);
1352 describe_general_purpose_bit_flags(c
, dd
, descr
);
1353 de_dbg(c
, "flags: 0x%04x (%s)", dd
->bit_flags
, ucstring_getpsz(descr
));
1355 de_dbg(c
, "cmpr method: %d (%s)", dd
->cmpr_meth
,
1356 (dd
->cmi
? dd
->cmi
->name
: "?"));
1358 mod_time_raw
= de_getu16le_p(&pos
);
1359 mod_date_raw
= de_getu16le_p(&pos
);
1360 de_dos_datetime_to_timestamp(&dos_timestamp
, mod_date_raw
, mod_time_raw
);
1361 dos_timestamp
.tzcode
= DE_TZCODE_LOCAL
;
1362 de_dbg_timestamp_to_string(c
, &dos_timestamp
, timestamp_buf
, sizeof(timestamp_buf
), 0);
1363 de_dbg(c
, "mod time: %s", timestamp_buf
);
1364 apply_timestamp(c
, d
, md
, DE_TIMESTAMPIDX_MODIFY
, &dos_timestamp
, 10);
1366 dd
->crc_reported
= (u32
)de_getu32le_p(&pos
);
1367 de_dbg(c
, "crc (reported): 0x%08x", (unsigned int)dd
->crc_reported
);
1369 dd
->cmpr_size
= de_getu32le_p(&pos
);
1370 dd
->uncmpr_size
= de_getu32le_p(&pos
);
1371 de_dbg(c
, "cmpr size: %" I64_FMT
", uncmpr size: %" I64_FMT
, dd
->cmpr_size
, dd
->uncmpr_size
);
1373 fn_len
= de_getu16le_p(&pos
);
1375 extra_len
= de_getu16le_p(&pos
);
1378 comment_len
= de_getu16le_p(&pos
);
1385 md
->file_data_pos
= pos
+ fn_len
+ extra_len
;
1389 md
->disk_number_start
= de_getu16le_p(&pos
);
1391 md
->attr_i
= (unsigned int)de_getu16le_p(&pos
);
1392 ucstring_empty(descr
);
1393 describe_internal_attr(c
, md
, descr
);
1394 de_dbg(c
, "internal file attributes: 0x%04x (%s)", md
->attr_i
,
1395 ucstring_getpsz(descr
));
1397 md
->attr_e
= (unsigned int)de_getu32le_p(&pos
);
1398 de_dbg(c
, "external file attributes: 0x%08x", md
->attr_e
);
1399 de_dbg_indent(c
, 1);
1402 // The low byte is, AFAIK, *almost* universally used for MS-DOS-style
1404 unsigned int dos_attrs
= (md
->attr_e
& 0xff);
1405 ucstring_empty(descr
);
1406 de_describe_dos_attribs(c
, dos_attrs
, descr
, 0);
1407 de_dbg(c
, "%sMS-DOS attribs: 0x%02x (%s)",
1408 (md
->ver_made_by_hi
==0)?"":"(hypothetical) ",
1409 dos_attrs
, ucstring_getpsz(descr
));
1412 if((md
->attr_e
>>16) != 0) {
1413 // A number of platforms put Unix-style file attributes here, so
1414 // decode them as such whenever they are nonzero.
1415 de_dbg(c
, "%sUnix attribs: octal(%06o)",
1416 (md
->ver_made_by_hi
==3)?"":"(hypothetical) ",
1417 (unsigned int)(md
->attr_e
>>16));
1420 de_dbg_indent(c
, -1);
1422 md
->offset_of_local_header
= de_getu32le_p(&pos
);
1423 de_dbg(c
, "offset of local header: %"I64_FMT
", disk: %d", md
->offset_of_local_header
,
1424 (int)md
->disk_number_start
);
1428 de_dbg(c
, "filename_len: %d, extra_len: %d, comment_len: %d", (int)fn_len
,
1429 (int)extra_len
, (int)comment_len
);
1432 de_dbg(c
, "filename_len: %d, extra_len: %d", (int)fn_len
,
1436 *p_entry_size
= fixed_header_size
+ fn_len
+ extra_len
+ comment_len
;
1438 dd
->main_fname_pos
= pos1
+fixed_header_size
;
1439 dd
->main_fname_len
= fn_len
;
1440 do_read_filename(c
, d
, md
, dd
, pos1
+fixed_header_size
, fn_len
, utf8_flag
);
1443 do_extra_data(c
, d
, md
, dd
, pos1
+fixed_header_size
+fn_len
, extra_len
, is_central
);
1447 do_comment(c
, d
, pos1
+fixed_header_size
+fn_len
+extra_len
, comment_len
, utf8_flag
,
1448 "member file comment", "fcomment.txt");
1452 if(d
->used_offset_discrepancy
) {
1453 md
->offset_of_local_header
+= d
->offset_discrepancy
;
1454 de_dbg(c
, "assuming local header is really at %"I64_FMT
, md
->offset_of_local_header
);
1456 else if(d
->offset_discrepancy
!=0) {
1460 sig1
= (u32
)de_getu32le(md
->offset_of_local_header
);
1461 if(sig1
!=CODE_PK34
) {
1462 alt_pos
= md
->offset_of_local_header
+ d
->offset_discrepancy
;
1463 sig2
= (u32
)de_getu32le(alt_pos
);
1464 if(sig2
==CODE_PK34
) {
1465 de_warn(c
, "Local file header found at %"I64_FMT
" instead of %"I64_FMT
". "
1466 "Assuming offsets are wrong by %"I64_FMT
" bytes.",
1467 alt_pos
, md
->offset_of_local_header
, d
->offset_discrepancy
);
1468 md
->offset_of_local_header
+= d
->offset_discrepancy
;
1469 d
->used_offset_discrepancy
= 1;
1478 de_dbg_indent(c
, -1);
1479 ucstring_destroy(descr
);
1483 static struct member_data
*create_member_data(deark
*c
, lctx
*d
)
1485 struct member_data
*md
;
1487 md
= de_malloc(c
, sizeof(struct member_data
));
1488 md
->local_dir_entry_data
.fname
= ucstring_create(c
);
1489 md
->central_dir_entry_data
.fname
= ucstring_create(c
);
1493 static void destroy_member_data(deark
*c
, struct member_data
*md
)
1496 ucstring_destroy(md
->central_dir_entry_data
.fname
);
1497 ucstring_destroy(md
->local_dir_entry_data
.fname
);
1501 static i32
ucstring_lastchar(de_ucstring
*s
)
1503 if(!s
|| s
->len
<1) return 0;
1504 return s
->str
[s
->len
-1];
1507 // Things to do after both the central and local headers have been read.
1508 // E.g., extract the file.
1509 static int do_process_member(deark
*c
, lctx
*d
, struct member_data
*md
)
1513 // If for some reason we have a central-dir filename but not a local-dir
1514 // filename, use the central-dir filename.
1515 if(ucstring_isempty(md
->local_dir_entry_data
.fname
) &&
1516 ucstring_isnonempty(md
->central_dir_entry_data
.fname
))
1518 ucstring_append_ucstring(md
->local_dir_entry_data
.fname
,
1519 md
->central_dir_entry_data
.fname
);
1522 // Set the final file size and crc fields.
1523 if(md
->local_dir_entry_data
.bit_flags
& 0x0008) {
1524 if(d
->using_scanmode
) {
1525 de_err(c
, "File is incompatible with scan mode");
1529 // Indicates that certain fields are not present in the local file header,
1530 // and are instead in a "data descriptor" after the file data.
1531 // Let's hope they are also in the central file header.
1532 md
->cmpr_size
= md
->central_dir_entry_data
.cmpr_size
;
1533 md
->uncmpr_size
= md
->central_dir_entry_data
.uncmpr_size
;
1534 md
->crc_reported
= md
->central_dir_entry_data
.crc_reported
;
1537 md
->cmpr_size
= md
->local_dir_entry_data
.cmpr_size
;
1538 md
->uncmpr_size
= md
->local_dir_entry_data
.uncmpr_size
;
1539 md
->crc_reported
= md
->local_dir_entry_data
.crc_reported
;
1542 process_ext_attr(c
, d
, md
);
1544 // In some cases, detect directories by checking whether the filename ends
1546 if(!md
->is_dir
&& md
->uncmpr_size
==0 &&
1547 (d
->using_scanmode
|| (md
->ver_made_by_lo
<20)))
1549 if(ucstring_lastchar(md
->local_dir_entry_data
.fname
) == '/') {
1550 de_dbg(c
, "[assuming this is a subdirectory]");
1555 do_extract_file(c
, d
, md
);
1562 // In *entry_size, returns the size of the central dir entry.
1563 // Returns 0 if the central dir entry could not even be parsed.
1564 static int do_member_from_central_dir_entry(deark
*c
, lctx
*d
,
1565 struct member_data
*md
, i64 central_index
, i64 pos
, i64
*entry_size
)
1569 int saved_indent_level
;
1571 de_dbg_indent_save(c
, &saved_indent_level
);
1575 if(pos
>= d
->central_dir_offset
+d
->central_dir_byte_size
) {
1579 de_dbg(c
, "central dir entry #%d", (int)central_index
);
1580 de_dbg_indent(c
, 1);
1582 // Read the central dir file header
1583 if(!do_file_header(c
, d
, md
, 1, pos
, entry_size
)) {
1587 // If we were able to read the central dir file header, we might be able
1588 // to continue and read more files, even if the local file header fails.
1591 // Read the local file header
1592 if(!do_file_header(c
, d
, md
, 0, md
->offset_of_local_header
, &tmp_entry_size
)) {
1596 do_process_member(c
, d
, md
);
1599 de_dbg_indent_restore(c
, saved_indent_level
);
1603 static int do_central_dir_entry(deark
*c
, lctx
*d
,
1604 i64 central_index
, i64 pos
, i64
*entry_size
)
1606 struct member_data
*md
= NULL
;
1609 md
= create_member_data(c
, d
);
1610 ret
= do_member_from_central_dir_entry(c
, d
, md
, central_index
, pos
, entry_size
);
1611 destroy_member_data(c
, md
);
1615 static int do_local_dir_only(deark
*c
, lctx
*d
, i64 pos1
, i64
*pmember_size
)
1617 struct member_data
*md
= NULL
;
1621 md
= create_member_data(c
, d
);
1623 md
->offset_of_local_header
= pos1
;
1625 // Read the local file header
1626 if(!do_file_header(c
, d
, md
, 0, md
->offset_of_local_header
, &tmp_entry_size
)) {
1630 if(!do_process_member(c
, d
, md
)) goto done
;
1632 *pmember_size
= md
->file_data_pos
+ md
->cmpr_size
- pos1
;
1636 destroy_member_data(c
, md
);
1640 static void de_run_zip_scanmode(deark
*c
, lctx
*d
)
1644 d
->using_scanmode
= 1;
1649 i64 member_size
= 0;
1651 if(pos
> c
->infile
->len
-4) break;
1652 ret
= dbuf_search(c
->infile
, g_zipsig34
, 4, pos
, c
->infile
->len
-pos
, &foundpos
);
1655 de_dbg(c
, "zip member at %"I64_FMT
, pos
);
1656 de_dbg_indent(c
, 1);
1657 ret
= do_local_dir_only(c
, d
, pos
, &member_size
);
1658 de_dbg_indent(c
, -1);
1660 if(member_size
<1) break;
1665 static int do_central_dir(deark
*c
, lctx
*d
)
1672 pos
= d
->central_dir_offset
;
1673 de_dbg(c
, "central dir at %"I64_FMT
, pos
);
1674 de_dbg_indent(c
, 1);
1676 for(i
=0; i
<d
->central_dir_num_entries
; i
++) {
1677 if(!do_central_dir_entry(c
, d
, i
, pos
, &entry_size
)) {
1678 // TODO: Decide exactly what to do if something fails.
1686 de_dbg_indent(c
, -1);
1690 static int do_zip64_eocd(deark
*c
, lctx
*d
)
1695 int saved_indent_level
;
1696 UI ver
, ver_hi
, ver_lo
;
1698 de_dbg_indent_save(c
, &saved_indent_level
);
1700 if(d
->zip64_eocd_disknum
!=0) {
1701 de_warn(c
, "This might be a multi-disk Zip64 archive, which is not supported");
1707 pos
= d
->zip64_eocd_pos
;
1708 if(dbuf_memcmp(c
->infile
, pos
, g_zipsig66
, 4)) {
1709 de_warn(c
, "Expected Zip64 end-of-central-directory record not found at %"I64_FMT
, pos
);
1710 retval
= 1; // Maybe the eocd locator sig was a false positive?
1715 de_dbg(c
, "zip64 end-of-central-dir record at %"I64_FMT
, pos
);
1717 de_dbg_indent(c
, 1);
1719 n
= de_geti64le(pos
); pos
+= 8;
1720 de_dbg(c
, "size of zip64 eocd record: (12+)%"I64_FMT
, n
);
1722 ver
= (UI
)de_getu16le_p(&pos
);
1723 ver_hi
= (ver
&0xff00)>>8;
1724 ver_lo
= ver
&0x00ff;
1725 de_dbg(c
, "version made by: platform=%u (%s), ZIP spec=%u.%u",
1726 ver_hi
, get_platform_name(ver_hi
), (UI
)(ver_lo
/10), (UI
)(ver_lo
%10));
1728 ver
= (UI
)de_getu16le_p(&pos
);
1729 ver_hi
= (ver
&0xff00)>>8;
1730 ver_lo
= ver
&0x00ff;
1731 de_dbg(c
, "version needed: platform=%u (%s), ZIP spec=%u.%u",
1732 ver_hi
, get_platform_name(ver_hi
), (UI
)(ver_lo
/10), (UI
)(ver_lo
%10));
1734 n
= de_getu32le_p(&pos
);
1735 de_dbg(c
, "this disk num: %"I64_FMT
, n
);
1737 d
->zip64_cd_disknum
= (unsigned int)de_getu32le_p(&pos
);
1738 d
->zip64_num_centr_dir_entries_this_disk
= de_geti64le(pos
); pos
+= 8;
1739 de_dbg(c
, "central dir num entries on this disk: %"I64_FMT
, d
->zip64_num_centr_dir_entries_this_disk
);
1740 d
->zip64_num_centr_dir_entries_total
= de_geti64le(pos
); pos
+= 8;
1741 de_dbg(c
, "central dir num entries: %"I64_FMT
, d
->zip64_num_centr_dir_entries_total
);
1742 d
->zip64_centr_dir_byte_size
= de_geti64le(pos
); pos
+= 8;
1743 de_dbg(c
, "central dir size: %"I64_FMT
, d
->zip64_centr_dir_byte_size
);
1744 d
->zip64_cd_pos
= de_geti64le(pos
); pos
+= 8;
1745 de_dbg(c
, "central dir offset: %"I64_FMT
", disk: %u",
1746 d
->zip64_cd_pos
, d
->zip64_cd_disknum
);
1750 de_dbg_indent_restore(c
, saved_indent_level
);
1754 static void do_zip64_eocd_locator(deark
*c
, lctx
*d
)
1757 i64 pos
= d
->end_of_central_dir_pos
- 20;
1759 if(dbuf_memcmp(c
->infile
, pos
, g_zipsig67
, 4)) {
1762 de_dbg(c
, "zip64 eocd locator found at %"I64_FMT
, pos
);
1765 de_dbg_indent(c
, 1);
1766 d
->zip64_eocd_disknum
= (unsigned int)de_getu32le_p(&pos
);
1767 d
->zip64_eocd_pos
= de_geti64le(pos
); pos
+= 8;
1768 de_dbg(c
, "offset of zip64 eocd: %"I64_FMT
", disk: %u",
1769 d
->zip64_eocd_pos
, d
->zip64_eocd_disknum
);
1770 n
= de_getu32le_p(&pos
);
1771 de_dbg(c
, "total number of disks: %u", (unsigned int)n
);
1772 de_dbg_indent(c
, -1);
1775 static int do_end_of_central_dir(deark
*c
, lctx
*d
)
1778 i64 num_entries_this_disk
;
1779 i64 disk_num_with_central_dir_start
;
1781 i64 alt_central_dir_offset
;
1784 pos
= d
->end_of_central_dir_pos
;
1785 de_dbg(c
, "end-of-central-dir record at %"I64_FMT
, pos
);
1786 de_dbg_indent(c
, 1);
1788 d
->this_disk_num
= de_getu16le(pos
+4);
1789 de_dbg(c
, "this disk num: %"I64_FMT
, d
->this_disk_num
);
1790 disk_num_with_central_dir_start
= de_getu16le(pos
+6);
1792 num_entries_this_disk
= de_getu16le(pos
+8);
1793 de_dbg(c
, "central dir num entries on this disk: %"I64_FMT
, num_entries_this_disk
);
1794 if(d
->is_zip64
&& (num_entries_this_disk
==0xffff)) {
1795 num_entries_this_disk
= d
->zip64_num_centr_dir_entries_this_disk
;
1798 d
->central_dir_num_entries
= de_getu16le(pos
+10);
1799 d
->central_dir_byte_size
= de_getu32le(pos
+12);
1800 d
->central_dir_offset
= de_getu32le(pos
+16);
1801 de_dbg(c
, "central dir num entries: %"I64_FMT
, d
->central_dir_num_entries
);
1802 if(d
->is_zip64
&& (d
->central_dir_num_entries
==0xffff)) {
1803 d
->central_dir_num_entries
= d
->zip64_num_centr_dir_entries_total
;
1806 de_dbg(c
, "central dir size: %"I64_FMT
, d
->central_dir_byte_size
);
1807 if(d
->is_zip64
&& (d
->central_dir_byte_size
==0xffffffffLL
)) {
1808 d
->central_dir_byte_size
= d
->zip64_centr_dir_byte_size
;
1811 de_dbg(c
, "central dir offset: %"I64_FMT
", disk: %"I64_FMT
, d
->central_dir_offset
,
1812 disk_num_with_central_dir_start
);
1813 if(d
->is_zip64
&& (d
->central_dir_offset
==0xffffffffLL
)) {
1814 d
->central_dir_offset
= d
->zip64_cd_pos
;
1817 comment_length
= de_getu16le(pos
+20);
1818 de_dbg(c
, "comment length: %d", (int)comment_length
);
1819 if(comment_length
>0) {
1820 // The comment for the whole .ZIP file presumably has to use
1821 // cp437 encoding. There's no flag that could indicate otherwise.
1822 do_comment(c
, d
, pos
+22, comment_length
, 0,
1823 "ZIP file comment", "comment.txt");
1826 // TODO: Figure out exactly how to detect disk spanning.
1827 if(disk_num_with_central_dir_start
!=d
->this_disk_num
||
1828 (d
->is_zip64
&& d
->zip64_eocd_disknum
!=d
->this_disk_num
))
1830 de_err(c
, "Disk spanning not supported");
1834 if(d
->this_disk_num
!=0) {
1835 de_warn(c
, "This ZIP file might be part of a multi-part archive, and "
1836 "might not be supported correctly");
1839 if(num_entries_this_disk
!=d
->central_dir_num_entries
) {
1840 de_warn(c
, "This ZIP file might not be supported correctly "
1841 "(number-of-entries-this-disk=%d, number-of-entries-total=%d)",
1842 (int)num_entries_this_disk
, (int)d
->central_dir_num_entries
);
1845 alt_central_dir_offset
=
1846 (d
->is_zip64
? d
->zip64_eocd_pos
: d
->end_of_central_dir_pos
) -
1847 d
->central_dir_byte_size
;
1849 if(alt_central_dir_offset
!= d
->central_dir_offset
) {
1852 de_warn(c
, "Inconsistent central directory offset. Reported to be %"I64_FMT
", "
1853 "but based on its reported size, it should be %"I64_FMT
".",
1854 d
->central_dir_offset
, alt_central_dir_offset
);
1856 sig
= (u32
)de_getu32le(alt_central_dir_offset
);
1857 if(sig
==CODE_PK12
) {
1858 d
->offset_discrepancy
= alt_central_dir_offset
- d
->central_dir_offset
;
1859 de_dbg(c
, "likely central dir found at %"I64_FMT
, alt_central_dir_offset
);
1860 d
->central_dir_offset
= alt_central_dir_offset
;
1867 de_dbg_indent(c
, -1);
1871 static void de_run_zip_normally(deark
*c
, lctx
*d
)
1875 if(c
->detection_data
&& c
->detection_data
->zip_eocd_looked_for
) {
1876 eocd_found
= (int)c
->detection_data
->zip_eocd_found
;
1877 d
->end_of_central_dir_pos
= c
->detection_data
->zip_eocd_pos
;
1880 eocd_found
= fmtutil_find_zip_eocd(c
, c
->infile
, &d
->end_of_central_dir_pos
);
1883 if(c
->module_disposition
==DE_MODDISP_AUTODETECT
||
1884 c
->module_disposition
==DE_MODDISP_EXPLICIT
)
1886 if(de_getu32le(0)==CODE_PK34
) {
1887 de_err(c
, "ZIP central directory not found. "
1888 "You could try \"-opt zip:scanmode\".");
1892 de_err(c
, "Not a valid ZIP file");
1896 de_dbg(c
, "end-of-central-dir record found at %"I64_FMT
,
1897 d
->end_of_central_dir_pos
);
1899 do_zip64_eocd_locator(c
, d
);
1902 if(!do_zip64_eocd(c
, d
)) goto done
;
1906 de_declare_fmt(c
, "ZIP-Zip64");
1908 de_declare_fmt(c
, "ZIP");
1910 if(!do_end_of_central_dir(c
, d
)) {
1914 if(!do_central_dir(c
, d
)) {
1922 static void de_run_zip(deark
*c
, de_module_params
*mparams
)
1927 d
= de_malloc(c
, sizeof(lctx
));
1929 enc
= de_get_input_encoding(c
, NULL
, DE_ENCODING_CP437
);
1930 d
->default_enc_for_filenames
= enc
;
1931 d
->default_enc_for_comments
= enc
;
1933 d
->crco
= de_crcobj_create(c
, DE_CRCOBJ_CRC32_IEEE
);
1935 if(de_get_ext_option(c
, "zip:scanmode")) {
1936 de_run_zip_scanmode(c
, d
);
1939 de_run_zip_normally(c
, d
);
1943 de_crcobj_destroy(d
->crco
);
1948 static int de_identify_zip(deark
*c
)
1954 has_zip_ext
= de_input_file_has_ext(c
, "zip");
1959 if(!de_memcmp(b
, g_zipsig34
, 4)) {
1960 return has_zip_ext
? 100 : 90;
1962 if(b
[0]=='M' && b
[1]=='Z') has_mz_sig
= 1;
1964 if(c
->infile
->len
>= 22) {
1965 de_read(b
, c
->infile
->len
- 22, 4);
1966 if(!de_memcmp(b
, g_zipsig56
, 4)) {
1967 return has_zip_ext
? 100 : 19;
1971 // Things to consider:
1972 // * We want de_fmtutil_find_zip_eocd() to be called no more than once, and
1973 // only on files that for some reason we suspect could be ZIP files.
1974 // * If the user disables exe format detection (e.g. with "-onlydetect zip"),
1975 // we want self-extracting-ZIP .exe files to be detected as ZIP instead.
1976 // * And we want the above to work even if the file has a ZIP file comment,
1977 // making it expensive to detect as ZIP.
1979 // Tests below can't return a confidence higher than this.
1980 if(c
->detection_data
->best_confidence_so_far
>= 19) return 0;
1984 if(has_mz_sig
|| has_zip_ext
) {
1987 c
->detection_data
->zip_eocd_looked_for
= 1;
1988 if(fmtutil_find_zip_eocd(c
, c
->infile
, &eocd_pos
)) {
1989 c
->detection_data
->zip_eocd_found
= 1;
1990 c
->detection_data
->zip_eocd_pos
= eocd_pos
;
1998 static void de_help_zip(deark
*c
)
2000 de_msg(c
, "-opt zip:scanmode : Do not use the \"central directory\"");
2001 de_msg(c
, "-opt zip:implodebug : Behave like PKZIP 1.01/1.02");
2004 void de_module_zip(deark
*c
, struct deark_module_info
*mi
)
2007 mi
->desc
= "ZIP archive";
2008 mi
->run_fn
= de_run_zip
;
2009 mi
->identify_fn
= de_identify_zip
;
2010 mi
->help_fn
= de_help_zip
;