Minor refactoring, related to lzah
[deark.git] / src / deark-zip.c
blob4990ae1a425a1c291b2a6d8ee92c890e86e790a6
1 // This file is part of Deark.
2 // Copyright (C) 2016-2019 Jason Summers
3 // See the file COPYING for terms of use.
5 // ZIP encoding
7 #define DE_NOT_IN_MODULE
8 #include "deark-config.h"
9 #include "deark-private.h"
10 #include "deark-fmtutil.h"
12 // TODO: Finish removing the "mz" symbols, and other miniz things.
13 #define MZ_NO_COMPRESSION 0
14 #define MZ_BEST_COMPRESSION 9
15 #define MZ_DEFAULT_LEVEL 6
16 #define MZ_DEFAULT_STRATEGY 0
18 // 03xx = Unix
19 // 63 decimal = ZIP spec v6.3 (first version to document the UTF-8 flag)
20 #define ZIPENC_VER_MADE_BY ((3<<8) | 63)
22 #define CODE_PK12 0x02014b50U
23 #define CODE_PK34 0x04034b50U
24 #define CODE_PK56 0x06054b50U
25 #define CODE_PK66 0x06064b50U
26 #define CODE_PK67 0x07064b50U
28 struct zipw_md {
29 struct de_timestamp modtime;
30 struct de_timestamp actime;
31 struct de_timestamp crtime;
32 i64 modtime_unix;
33 unsigned int modtime_dosdate;
34 unsigned int modtime_dostime;
35 i64 modtime_as_FILETIME; // valid if nonzero
36 i64 actime_as_FILETIME;
37 i64 crtime_as_FILETIME;
38 u8 is_executable;
39 u8 is_directory;
40 dbuf *eflocal;
41 dbuf *efcentral;
44 struct zipw_ctx {
45 deark *c;
46 const char *pFilename;
47 unsigned int cmprlevel;
48 i64 membercount;
49 dbuf *outf;
50 dbuf *cdir; // central directory
51 struct de_crcobj *crc32o;
54 static int is_valid_32bit_unix_time(i64 ut)
56 return (ut >= -0x80000000LL) && (ut <= 0x7fffffffLL);
59 // Create and initialize the main ZIP archive
60 int de_zip_create_file(deark *c)
62 struct zipw_ctx *zzz;
63 const char *opt_level;
65 if(c->zip_data) return 1; // Already created. Shouldn't happen.
67 zzz = de_malloc(c, sizeof(struct zipw_ctx));
68 zzz->c = c;
69 c->zip_data = (void*)zzz;
70 zzz->crc32o = de_crcobj_create(c, DE_CRCOBJ_CRC32_IEEE);
72 zzz->cmprlevel = MZ_BEST_COMPRESSION; // default
73 opt_level = de_get_ext_option(c, "archive:zipcmprlevel");
74 if(opt_level) {
75 i64 opt_level_n = de_atoi64(opt_level);
76 if(opt_level_n>9) {
77 zzz->cmprlevel = 9;
79 else if(opt_level_n<0) {
80 zzz->cmprlevel = MZ_DEFAULT_LEVEL;
82 else {
83 zzz->cmprlevel = (unsigned int)opt_level_n;
87 if(c->archive_to_stdout) {
88 zzz->pFilename = "[stdout]";
90 else {
91 if(c->output_archive_filename) {
92 zzz->pFilename = c->output_archive_filename;
94 else {
95 zzz->pFilename = "output.zip";
99 if(c->archive_to_stdout) {
100 zzz->outf = dbuf_create_unmanaged_file_stdout(c, "[ZIP stdout stream]");
102 else {
103 de_info(c, "Creating %s", zzz->pFilename);
104 zzz->outf = dbuf_create_unmanaged_file(c, zzz->pFilename, c->overwrite_mode, 0);
107 zzz->cdir = dbuf_create_membuf(c, 1024, 0);
109 if(zzz->outf->btype==DBUF_TYPE_NULL) {
110 de_err(c, "Failed to create ZIP file");
111 dbuf_close(zzz->outf);
112 zzz->outf = NULL;
113 return 0;
116 return 1;
119 static void set_dos_modtime(struct zipw_md *md)
121 struct de_timestamp tmpts;
122 struct de_struct_tm tm2;
124 // Clamp to the range of times supported
125 if(md->modtime_unix < 315532800) { // 1 Jan 1980 00:00:00
126 de_unix_time_to_timestamp(315532800, &tmpts, 0x0);
127 de_gmtime(&tmpts, &tm2);
129 else if(md->modtime_unix > 4354819198LL) { // 31 Dec 2107 23:59:58
130 de_unix_time_to_timestamp(4354819198LL, &tmpts, 0x0);
131 de_gmtime(&tmpts, &tm2);
133 else {
134 de_gmtime(&md->modtime, &tm2);
137 md->modtime_dostime = (unsigned int)(((tm2.tm_hour) << 11) +
138 ((tm2.tm_min) << 5) + ((tm2.tm_sec) >> 1));
139 md->modtime_dosdate = (unsigned int)(((tm2.tm_fullyear - 1980) << 9) +
140 ((tm2.tm_mon + 1) << 5) + tm2.tm_mday);
144 static void do_UT_times(deark *c, struct zipw_md *md,
145 dbuf *ef, int is_central)
147 int write_crtime = 0;
148 int write_actime = 0;
149 i64 num_timestamps = 0;
150 i64 actime_unix = 0;
151 i64 crtime_unix = 0;
152 u8 flags = 0;
153 // Note: Although our 0x5455 central and local extra data fields happen to
154 // be identical, that is not generally the case.
156 if(!is_central) {
157 if(md->actime.is_valid) {
158 actime_unix = de_timestamp_to_unix_time(&md->actime);
159 if(is_valid_32bit_unix_time(actime_unix)) {
160 write_actime = 1;
164 if(md->crtime.is_valid) {
165 crtime_unix = de_timestamp_to_unix_time(&md->crtime);
166 if(is_valid_32bit_unix_time(crtime_unix)) {
167 write_crtime = 1;
172 // Always write mod time
173 num_timestamps++;
174 flags |= 0x01;
176 if(write_actime) {
177 num_timestamps++;
178 flags |= 0x02;
181 if(write_crtime) {
182 num_timestamps++;
183 flags |= 0x04;
186 dbuf_writeu16le(ef, 0x5455);
187 dbuf_writeu16le(ef, (i64)(1+4*num_timestamps));
188 dbuf_writebyte(ef, flags); // tells which fields are present
189 dbuf_writei32le(ef, md->modtime_unix);
190 if(write_actime) {
191 dbuf_writei32le(ef, actime_unix);
193 if(write_crtime) {
194 dbuf_writei32le(ef, crtime_unix);
198 static void do_ntfs_times(deark *c, struct zipw_md *md,
199 dbuf *ef, int is_central)
201 u64 modtm, actm, crtm;
203 dbuf_writeu16le(ef, 0x000a); // = NTFS
204 dbuf_writeu16le(ef, 32); // data size
205 dbuf_write_zeroes(ef, 4);
206 dbuf_writeu16le(ef, 0x0001); // file times element
207 dbuf_writeu16le(ef, 24); // element data size
208 // We only necessarily know the mod time, but we have to write something for
209 // the others.
210 modtm = (u64)md->modtime_as_FILETIME;
211 actm = (md->actime_as_FILETIME>0) ? (u64)md->actime_as_FILETIME : modtm;
212 crtm = (md->crtime_as_FILETIME>0) ? (u64)md->crtime_as_FILETIME : modtm;
213 dbuf_writeu64le(ef, modtm);
214 dbuf_writeu64le(ef, actm);
215 dbuf_writeu64le(ef, crtm);
218 // uncmpr_data must be a membuf
219 static int zipw_deflate(deark *c, struct zipw_ctx *zzz, dbuf *uncmpr_data,
220 dbuf *cmpr_data, unsigned int level)
222 int retval = 0;
223 int ret;
224 const u8 *uncmpr_mem;
225 struct fmtutil_tdefl_ctx *tdctx = NULL;
227 tdctx = fmtutil_tdefl_create(c, cmpr_data,
228 fmtutil_tdefl_create_comp_flags_from_zip_params(level, -15, MZ_DEFAULT_STRATEGY));
230 uncmpr_mem = dbuf_get_membuf_direct_ptr(uncmpr_data);
231 if(uncmpr_mem==NULL && uncmpr_data->len!=0) goto done;
233 ret = fmtutil_tdefl_compress_buffer(tdctx, uncmpr_mem, (size_t)uncmpr_data->len,
234 FMTUTIL_TDEFL_FINISH);
235 if(ret != FMTUTIL_TDEFL_STATUS_DONE) goto done;
236 retval = 1;
238 done:
239 if(retval==0) {
240 de_err(c, "Deflate compression error");
242 fmtutil_tdefl_destroy(tdctx);
243 return retval;
246 static void zipw_add_memberfile(deark *c, struct zipw_ctx *zzz, struct zipw_md *md,
247 dbuf *f, const char *name, unsigned int level_and_flags)
249 i64 ldir_offset;
250 i64 fnlen;
251 u32 crc;
252 int try_compression = 0;
253 int using_compression = 0;
254 dbuf *cmpr_data = NULL;
255 i64 cmpr_len;
256 unsigned int bit_flags = 0;
257 unsigned int ext_attributes;
258 unsigned int ver_needed;
260 // Just a sanity check; we'll run into some other limit long before this
261 if(zzz->membercount >= 0x7fffffff) {
262 de_err(c, "Maximum number of ZIP member files exceeded");
263 goto done;
266 de_crcobj_reset(zzz->crc32o);
267 de_crcobj_addslice(zzz->crc32o, f, 0, f->len);
268 crc = de_crcobj_getval(zzz->crc32o);
270 ldir_offset = zzz->outf->len;
271 if(ldir_offset > 0xffffffffLL) {
272 de_err(c, "Maximum ZIP file size exceeded");
273 goto done;
275 if(f->len > 0xffffffffLL) {
276 de_err(c, "Maximum ZIP member file size exceeded");
277 goto done;
279 cmpr_len = f->len; // default
281 if(f->len>5 && !md->is_directory) {
282 try_compression = 1;
285 if(try_compression) {
286 unsigned int level;
288 cmpr_data = dbuf_create_membuf(c, 0, 0);
290 if ((int)level_and_flags < 0)
291 level_and_flags = MZ_DEFAULT_LEVEL;
292 level = level_and_flags & 0xF;
294 zipw_deflate(c, zzz, f, cmpr_data, level);
296 if(cmpr_data->len < f->len) {
297 using_compression = 1;
298 cmpr_len = cmpr_data->len;
300 // This is the logic used by Info-Zip
301 if(level<=2) bit_flags |= 4;
302 else if(level>=8) bit_flags |= 2;
304 else { // No savings - Discard compressed data
305 dbuf_close(cmpr_data);
306 cmpr_data = NULL;
310 bit_flags |= 0x0800; // Use UTF-8 filenames
312 dbuf_writeu32le(zzz->cdir, CODE_PK12);
313 dbuf_writeu32le(zzz->outf, CODE_PK34);
314 dbuf_writeu16le(zzz->cdir, ZIPENC_VER_MADE_BY);
316 if(using_compression) ver_needed = 20;
317 else if(md->is_directory) ver_needed = 20;
318 else ver_needed = 10;
320 dbuf_writeu16le(zzz->cdir, ver_needed);
321 dbuf_writeu16le(zzz->outf, ver_needed);
323 dbuf_writeu16le(zzz->cdir, bit_flags);
324 dbuf_writeu16le(zzz->outf, bit_flags);
326 dbuf_writeu16le(zzz->cdir, using_compression?8:0); // cmpr method
327 dbuf_writeu16le(zzz->outf, using_compression?8:0);
329 dbuf_writeu16le(zzz->cdir, md->modtime_dostime);
330 dbuf_writeu16le(zzz->outf, md->modtime_dostime);
331 dbuf_writeu16le(zzz->cdir, md->modtime_dosdate);
332 dbuf_writeu16le(zzz->outf, md->modtime_dosdate);
334 dbuf_writeu32le(zzz->cdir, crc); // crc
335 dbuf_writeu32le(zzz->outf, crc);
337 dbuf_writeu32le(zzz->cdir, cmpr_len); // cmpr size
338 dbuf_writeu32le(zzz->outf, cmpr_len);
339 dbuf_writeu32le(zzz->cdir, f->len); // uncmpr size
340 dbuf_writeu32le(zzz->outf, f->len);
342 fnlen = de_strlen(name);
343 dbuf_writeu16le(zzz->cdir, fnlen);
344 dbuf_writeu16le(zzz->outf, fnlen);
346 dbuf_writeu16le(zzz->cdir, md->efcentral->len); // eflen
347 dbuf_writeu16le(zzz->outf, md->eflocal->len);
349 dbuf_writeu16le(zzz->cdir, 0); // file comment len
350 dbuf_writeu16le(zzz->cdir, 0); // disk number start
352 dbuf_writeu16le(zzz->cdir, 0); // int attrib
354 // Set the Unix (etc.) file attributes to "-rw-r--r--" or
355 // "-rwxr-xr-x", etc.
356 if(md->is_directory)
357 ext_attributes = (0040755U << 16) | 0x10;
358 else if(md->is_executable)
359 ext_attributes = (0100755U << 16);
360 else
361 ext_attributes = (0100644U << 16);
363 dbuf_writeu32le(zzz->cdir, (i64)ext_attributes); // ext attrib
365 dbuf_writeu32le(zzz->cdir, ldir_offset);
367 dbuf_write(zzz->cdir, (const u8*)name, fnlen);
368 dbuf_write(zzz->outf, (const u8*)name, fnlen);
370 dbuf_copy(md->efcentral, 0, md->efcentral->len, zzz->cdir);
371 dbuf_copy(md->eflocal, 0, md->eflocal->len, zzz->outf);
373 if(using_compression) {
374 if(cmpr_data) {
375 dbuf_copy(cmpr_data, 0, cmpr_data->len, zzz->outf);
378 else {
379 dbuf_copy(f, 0, f->len, zzz->outf);
382 zzz->membercount++;
384 done:
385 if(cmpr_data) dbuf_close(cmpr_data);
388 void de_zip_add_file_to_archive(deark *c, dbuf *f)
390 struct zipw_ctx *zzz;
391 struct zipw_md *md = NULL;
392 int write_ntfs_times = 0;
393 int write_UT_time = 0;
395 md = de_malloc(c, sizeof(struct zipw_md));
397 if(!c->zip_data) {
398 // ZIP file hasn't been created yet
399 if(!de_zip_create_file(c)) {
400 de_fatalerror(c);
401 goto done;
405 zzz = (struct zipw_ctx*)c->zip_data;
407 de_dbg(c, "adding to zip: name=%s len=%"I64_FMT, f->name, f->len);
409 if(f->fi_copy && f->fi_copy->is_directory) {
410 md->is_directory = 1;
413 if(f->fi_copy && (f->fi_copy->mode_flags&DE_MODEFLAG_EXE)) {
414 md->is_executable = 1;
417 if(c->preserve_file_times_archives && f->fi_copy && f->fi_copy->timestamp[DE_TIMESTAMPIDX_MODIFY].is_valid) {
418 md->modtime = f->fi_copy->timestamp[DE_TIMESTAMPIDX_MODIFY];
419 if(md->modtime.precision>DE_TSPREC_1SEC) {
420 write_ntfs_times = 1;
423 else if(c->reproducible_output) {
424 de_get_reproducible_timestamp(c, &md->modtime);
426 else {
427 de_cached_current_time_to_timestamp(c, &md->modtime);
429 // We only write the current time because ZIP format leaves us little
430 // choice.
431 // Note that although c->current_time is probably high precision,
432 // we don't consider that good enough reason to force NTFS timestamps
433 // to be written.
436 // Note: Timestamps other than the modification time are a low priority.
437 // We'll write them in some cases, when it is easy to do so.
438 if(c->preserve_file_times_archives && f->fi_copy) {
439 md->actime = f->fi_copy->timestamp[DE_TIMESTAMPIDX_ACCESS];
440 md->crtime = f->fi_copy->timestamp[DE_TIMESTAMPIDX_CREATE];
443 md->modtime_unix = de_timestamp_to_unix_time(&md->modtime);
444 set_dos_modtime(md);
446 if(is_valid_32bit_unix_time(md->modtime_unix)) {
447 // Always write a Unix timestamp if we can.
448 write_UT_time = 1;
450 if(md->modtime_unix < 0) {
451 // This negative Unix time is in range, but problematic,
452 // so write NTFS times as well.
453 write_ntfs_times = 1;
456 else { // Out of range of ZIP's (signed int32) Unix style timestamps
457 write_ntfs_times = 1;
460 if(write_ntfs_times) {
461 md->modtime_as_FILETIME = de_timestamp_to_FILETIME(&md->modtime);
462 if(md->modtime_as_FILETIME == 0) {
463 write_ntfs_times = 0;
465 else {
466 md->actime_as_FILETIME = de_timestamp_to_FILETIME(&md->actime);
467 md->crtime_as_FILETIME = de_timestamp_to_FILETIME(&md->crtime);
471 // Create ZIP "extra data" "Extended Timestamp" and "NTFS" fields,
472 // containing the UTC timestamp.
474 // Use temporary dbufs to help construct the extra field data.
475 md->eflocal = dbuf_create_membuf(c, 256, 0);
476 md->efcentral = dbuf_create_membuf(c, 256, 0);
478 if(write_UT_time) {
479 do_UT_times(c, md, md->eflocal, 0);
480 do_UT_times(c, md, md->efcentral, 1);
483 if(write_ntfs_times) {
484 // Note: Info-ZIP says: "In the current implementations, this field [...]
485 // is only stored as local extra field.
486 // But 7-Zip supports it *only* as a central extra field.
487 // So we'll write both.
488 do_ntfs_times(c, md, md->eflocal, 0);
489 do_ntfs_times(c, md, md->efcentral, 1);
492 if(md->is_directory) {
493 size_t nlen;
494 char *name2;
496 // Append a "/" to the name
497 nlen = de_strlen(f->name);
498 name2 = de_malloc(c, (i64)nlen+2);
499 de_snprintf(name2, nlen+2, "%s/", f->name);
501 zipw_add_memberfile(c, zzz, md, f, name2, MZ_NO_COMPRESSION);
503 de_free(c, name2);
505 else {
506 zipw_add_memberfile(c, zzz, md, f, f->name, zzz->cmprlevel);
509 done:
510 if(md) {
511 dbuf_close(md->eflocal);
512 dbuf_close(md->efcentral);
513 de_free(c, md);
517 static int copy_to_FILE_cbfn(struct de_bufferedreadctx *brctx, const u8 *buf,
518 i64 buf_len)
520 size_t ret;
521 ret = fwrite(buf, 1, (size_t)buf_len, (FILE*)brctx->userdata);
522 return (ret==(size_t)buf_len);
525 static void dbuf_copy_to_FILE(dbuf *inf, i64 input_offset, i64 input_len, FILE *outfile)
527 if(input_len<1) return;
528 dbuf_buffered_read(inf, input_offset, input_len, copy_to_FILE_cbfn, (void*)outfile);
531 static void zipw_finalize(deark *c, struct zipw_ctx *zzz)
533 i64 cdir_start;
534 i64 zip64_eocd_pos;
535 int opt_zip64;
536 int need_zip64 = 0;
537 int use_zip64 = 0;
539 cdir_start = zzz->outf->len;
541 if((zzz->membercount > 0xffff) || (cdir_start > 0xffffffffLL) ||
542 (zzz->cdir->len > 0xffffffffLL))
544 need_zip64 = 1;
547 opt_zip64 = de_get_ext_option_bool(c, "archive:zip64", 0);
548 if(opt_zip64>0) {
549 use_zip64 = 1; // Zip64 always
551 else if(need_zip64) {
552 use_zip64 = 1; // Zip64 auto
553 de_info(c, "Note: Writing a ZIP file that uses Zip64 extensions. Not all unzip "
554 "programs will correctly support it.");
557 // Write the central directory
558 dbuf_copy(zzz->cdir, 0, zzz->cdir->len, zzz->outf);
560 zip64_eocd_pos = zzz->outf->len;
562 if(use_zip64) {
563 // Write 56-byte zip64 EOCD record
564 dbuf_writeu32le(zzz->outf, CODE_PK66);
565 dbuf_writeu64le(zzz->outf, 56-12); // recsize
566 dbuf_writeu16le(zzz->outf, ZIPENC_VER_MADE_BY);
568 // version-needed: 4.5 = minimum for Zip64. This is a formality, because
569 // anything that doesn't support Zip64 won't ever see this field.
570 // Unfortunately, the original EOCD record does not have a version-needed
571 // field, so (I guess) there is no good way to tell old unzip programs that
572 // they cannot fully support this ZIP file.
573 dbuf_writeu16le(zzz->outf, 45);
575 dbuf_writeu32le(zzz->outf, 0); // this disk num
576 dbuf_writeu32le(zzz->outf, 0); // central dir disk
577 dbuf_writeu64le(zzz->outf, (u64)zzz->membercount); // num files this disk
578 dbuf_writeu64le(zzz->outf, (u64)zzz->membercount); // num files total
579 dbuf_writeu64le(zzz->outf, (u64)zzz->cdir->len); // central dir size
580 dbuf_writeu64le(zzz->outf, (u64)cdir_start); // central dir offset
582 // Write 20-byte EOCD locator
583 dbuf_writeu32le(zzz->outf, CODE_PK67);
584 dbuf_writeu32le(zzz->outf, 0); // central dir disk
585 dbuf_writeu64le(zzz->outf, (u64)zip64_eocd_pos);
586 dbuf_writeu32le(zzz->outf, 1); // number of disks
589 // Write 22-byte EOCD record
590 dbuf_writeu32le(zzz->outf, CODE_PK56);
591 dbuf_writeu16le(zzz->outf, 0); // this disk num
592 dbuf_writeu16le(zzz->outf, 0); // central dir disk
594 if(zzz->membercount > 0xffff) {
595 dbuf_writeu16le(zzz->outf, 0xffff);
596 dbuf_writeu16le(zzz->outf, 0xffff);
598 else {
599 dbuf_writeu16le(zzz->outf, zzz->membercount); // num files this disk
600 dbuf_writeu16le(zzz->outf, zzz->membercount); // num files total
603 if(zzz->cdir->len > 0xffffffffLL) {
604 dbuf_writeu32le(zzz->outf, 0xffffffffLL);
606 else {
607 dbuf_writeu32le(zzz->outf, zzz->cdir->len);
610 if(cdir_start > 0xffffffffLL) {
611 dbuf_writeu32le(zzz->outf, 0xffffffffLL);
613 else {
614 dbuf_writeu32le(zzz->outf, cdir_start);
617 dbuf_writeu16le(zzz->outf, 0); // ZIP comment length
620 void de_zip_close_file(deark *c)
622 struct zipw_ctx *zzz;
624 if(!c->zip_data) return;
625 de_dbg(c, "closing zip file");
627 zzz = (struct zipw_ctx*)c->zip_data;
629 zipw_finalize(c, zzz);
631 if(c->archive_to_stdout && zzz->outf && zzz->outf->btype==DBUF_TYPE_MEMBUF) {
632 dbuf_copy_to_FILE(zzz->outf, 0, zzz->outf->len, stdout);
635 dbuf_close(zzz->cdir);
636 dbuf_close(zzz->outf);
637 de_crcobj_destroy(zzz->crc32o);
639 de_free(c, zzz);
640 c->zip_data = NULL;