1 // This file is part of Deark.
2 // Copyright (C) 2016-2019 Jason Summers
3 // See the file COPYING for terms of use.
7 #define DE_NOT_IN_MODULE
8 #include "deark-config.h"
9 #include "deark-private.h"
10 #include "deark-fmtutil.h"
12 // TODO: Finish removing the "mz" symbols, and other miniz things.
13 #define MZ_NO_COMPRESSION 0
14 #define MZ_BEST_COMPRESSION 9
15 #define MZ_DEFAULT_LEVEL 6
16 #define MZ_DEFAULT_STRATEGY 0
19 // 63 decimal = ZIP spec v6.3 (first version to document the UTF-8 flag)
20 #define ZIPENC_VER_MADE_BY ((3<<8) | 63)
22 #define CODE_PK12 0x02014b50U
23 #define CODE_PK34 0x04034b50U
24 #define CODE_PK56 0x06054b50U
25 #define CODE_PK66 0x06064b50U
26 #define CODE_PK67 0x07064b50U
29 struct de_timestamp modtime
;
30 struct de_timestamp actime
;
31 struct de_timestamp crtime
;
33 unsigned int modtime_dosdate
;
34 unsigned int modtime_dostime
;
35 i64 modtime_as_FILETIME
; // valid if nonzero
36 i64 actime_as_FILETIME
;
37 i64 crtime_as_FILETIME
;
46 const char *pFilename
;
47 unsigned int cmprlevel
;
50 dbuf
*cdir
; // central directory
51 struct de_crcobj
*crc32o
;
54 static int is_valid_32bit_unix_time(i64 ut
)
56 return (ut
>= -0x80000000LL
) && (ut
<= 0x7fffffffLL
);
59 // Create and initialize the main ZIP archive
60 int de_zip_create_file(deark
*c
)
63 const char *opt_level
;
65 if(c
->zip_data
) return 1; // Already created. Shouldn't happen.
67 zzz
= de_malloc(c
, sizeof(struct zipw_ctx
));
69 c
->zip_data
= (void*)zzz
;
70 zzz
->crc32o
= de_crcobj_create(c
, DE_CRCOBJ_CRC32_IEEE
);
72 zzz
->cmprlevel
= MZ_BEST_COMPRESSION
; // default
73 opt_level
= de_get_ext_option(c
, "archive:zipcmprlevel");
75 i64 opt_level_n
= de_atoi64(opt_level
);
79 else if(opt_level_n
<0) {
80 zzz
->cmprlevel
= MZ_DEFAULT_LEVEL
;
83 zzz
->cmprlevel
= (unsigned int)opt_level_n
;
87 if(c
->archive_to_stdout
) {
88 zzz
->pFilename
= "[stdout]";
91 if(c
->output_archive_filename
) {
92 zzz
->pFilename
= c
->output_archive_filename
;
95 zzz
->pFilename
= "output.zip";
99 if(c
->archive_to_stdout
) {
100 zzz
->outf
= dbuf_create_unmanaged_file_stdout(c
, "[ZIP stdout stream]");
103 de_info(c
, "Creating %s", zzz
->pFilename
);
104 zzz
->outf
= dbuf_create_unmanaged_file(c
, zzz
->pFilename
, c
->overwrite_mode
, 0);
107 zzz
->cdir
= dbuf_create_membuf(c
, 1024, 0);
109 if(zzz
->outf
->btype
==DBUF_TYPE_NULL
) {
110 de_err(c
, "Failed to create ZIP file");
111 dbuf_close(zzz
->outf
);
119 static void set_dos_modtime(struct zipw_md
*md
)
121 struct de_timestamp tmpts
;
122 struct de_struct_tm tm2
;
124 // Clamp to the range of times supported
125 if(md
->modtime_unix
< 315532800) { // 1 Jan 1980 00:00:00
126 de_unix_time_to_timestamp(315532800, &tmpts
, 0x0);
127 de_gmtime(&tmpts
, &tm2
);
129 else if(md
->modtime_unix
> 4354819198LL) { // 31 Dec 2107 23:59:58
130 de_unix_time_to_timestamp(4354819198LL, &tmpts
, 0x0);
131 de_gmtime(&tmpts
, &tm2
);
134 de_gmtime(&md
->modtime
, &tm2
);
137 md
->modtime_dostime
= (unsigned int)(((tm2
.tm_hour
) << 11) +
138 ((tm2
.tm_min
) << 5) + ((tm2
.tm_sec
) >> 1));
139 md
->modtime_dosdate
= (unsigned int)(((tm2
.tm_fullyear
- 1980) << 9) +
140 ((tm2
.tm_mon
+ 1) << 5) + tm2
.tm_mday
);
144 static void do_UT_times(deark
*c
, struct zipw_md
*md
,
145 dbuf
*ef
, int is_central
)
147 int write_crtime
= 0;
148 int write_actime
= 0;
149 i64 num_timestamps
= 0;
153 // Note: Although our 0x5455 central and local extra data fields happen to
154 // be identical, that is not generally the case.
157 if(md
->actime
.is_valid
) {
158 actime_unix
= de_timestamp_to_unix_time(&md
->actime
);
159 if(is_valid_32bit_unix_time(actime_unix
)) {
164 if(md
->crtime
.is_valid
) {
165 crtime_unix
= de_timestamp_to_unix_time(&md
->crtime
);
166 if(is_valid_32bit_unix_time(crtime_unix
)) {
172 // Always write mod time
186 dbuf_writeu16le(ef
, 0x5455);
187 dbuf_writeu16le(ef
, (i64
)(1+4*num_timestamps
));
188 dbuf_writebyte(ef
, flags
); // tells which fields are present
189 dbuf_writei32le(ef
, md
->modtime_unix
);
191 dbuf_writei32le(ef
, actime_unix
);
194 dbuf_writei32le(ef
, crtime_unix
);
198 static void do_ntfs_times(deark
*c
, struct zipw_md
*md
,
199 dbuf
*ef
, int is_central
)
201 u64 modtm
, actm
, crtm
;
203 dbuf_writeu16le(ef
, 0x000a); // = NTFS
204 dbuf_writeu16le(ef
, 32); // data size
205 dbuf_write_zeroes(ef
, 4);
206 dbuf_writeu16le(ef
, 0x0001); // file times element
207 dbuf_writeu16le(ef
, 24); // element data size
208 // We only necessarily know the mod time, but we have to write something for
210 modtm
= (u64
)md
->modtime_as_FILETIME
;
211 actm
= (md
->actime_as_FILETIME
>0) ? (u64
)md
->actime_as_FILETIME
: modtm
;
212 crtm
= (md
->crtime_as_FILETIME
>0) ? (u64
)md
->crtime_as_FILETIME
: modtm
;
213 dbuf_writeu64le(ef
, modtm
);
214 dbuf_writeu64le(ef
, actm
);
215 dbuf_writeu64le(ef
, crtm
);
218 // uncmpr_data must be a membuf
219 static int zipw_deflate(deark
*c
, struct zipw_ctx
*zzz
, dbuf
*uncmpr_data
,
220 dbuf
*cmpr_data
, unsigned int level
)
224 const u8
*uncmpr_mem
;
225 struct fmtutil_tdefl_ctx
*tdctx
= NULL
;
227 tdctx
= fmtutil_tdefl_create(c
, cmpr_data
,
228 fmtutil_tdefl_create_comp_flags_from_zip_params(level
, -15, MZ_DEFAULT_STRATEGY
));
230 uncmpr_mem
= dbuf_get_membuf_direct_ptr(uncmpr_data
);
231 if(uncmpr_mem
==NULL
&& uncmpr_data
->len
!=0) goto done
;
233 ret
= fmtutil_tdefl_compress_buffer(tdctx
, uncmpr_mem
, (size_t)uncmpr_data
->len
,
234 FMTUTIL_TDEFL_FINISH
);
235 if(ret
!= FMTUTIL_TDEFL_STATUS_DONE
) goto done
;
240 de_err(c
, "Deflate compression error");
242 fmtutil_tdefl_destroy(tdctx
);
246 static void zipw_add_memberfile(deark
*c
, struct zipw_ctx
*zzz
, struct zipw_md
*md
,
247 dbuf
*f
, const char *name
, unsigned int level_and_flags
)
252 int try_compression
= 0;
253 int using_compression
= 0;
254 dbuf
*cmpr_data
= NULL
;
256 unsigned int bit_flags
= 0;
257 unsigned int ext_attributes
;
258 unsigned int ver_needed
;
260 // Just a sanity check; we'll run into some other limit long before this
261 if(zzz
->membercount
>= 0x7fffffff) {
262 de_err(c
, "Maximum number of ZIP member files exceeded");
266 de_crcobj_reset(zzz
->crc32o
);
267 de_crcobj_addslice(zzz
->crc32o
, f
, 0, f
->len
);
268 crc
= de_crcobj_getval(zzz
->crc32o
);
270 ldir_offset
= zzz
->outf
->len
;
271 if(ldir_offset
> 0xffffffffLL
) {
272 de_err(c
, "Maximum ZIP file size exceeded");
275 if(f
->len
> 0xffffffffLL
) {
276 de_err(c
, "Maximum ZIP member file size exceeded");
279 cmpr_len
= f
->len
; // default
281 if(f
->len
>5 && !md
->is_directory
) {
285 if(try_compression
) {
288 cmpr_data
= dbuf_create_membuf(c
, 0, 0);
290 if ((int)level_and_flags
< 0)
291 level_and_flags
= MZ_DEFAULT_LEVEL
;
292 level
= level_and_flags
& 0xF;
294 zipw_deflate(c
, zzz
, f
, cmpr_data
, level
);
296 if(cmpr_data
->len
< f
->len
) {
297 using_compression
= 1;
298 cmpr_len
= cmpr_data
->len
;
300 // This is the logic used by Info-Zip
301 if(level
<=2) bit_flags
|= 4;
302 else if(level
>=8) bit_flags
|= 2;
304 else { // No savings - Discard compressed data
305 dbuf_close(cmpr_data
);
310 bit_flags
|= 0x0800; // Use UTF-8 filenames
312 dbuf_writeu32le(zzz
->cdir
, CODE_PK12
);
313 dbuf_writeu32le(zzz
->outf
, CODE_PK34
);
314 dbuf_writeu16le(zzz
->cdir
, ZIPENC_VER_MADE_BY
);
316 if(using_compression
) ver_needed
= 20;
317 else if(md
->is_directory
) ver_needed
= 20;
318 else ver_needed
= 10;
320 dbuf_writeu16le(zzz
->cdir
, ver_needed
);
321 dbuf_writeu16le(zzz
->outf
, ver_needed
);
323 dbuf_writeu16le(zzz
->cdir
, bit_flags
);
324 dbuf_writeu16le(zzz
->outf
, bit_flags
);
326 dbuf_writeu16le(zzz
->cdir
, using_compression
?8:0); // cmpr method
327 dbuf_writeu16le(zzz
->outf
, using_compression
?8:0);
329 dbuf_writeu16le(zzz
->cdir
, md
->modtime_dostime
);
330 dbuf_writeu16le(zzz
->outf
, md
->modtime_dostime
);
331 dbuf_writeu16le(zzz
->cdir
, md
->modtime_dosdate
);
332 dbuf_writeu16le(zzz
->outf
, md
->modtime_dosdate
);
334 dbuf_writeu32le(zzz
->cdir
, crc
); // crc
335 dbuf_writeu32le(zzz
->outf
, crc
);
337 dbuf_writeu32le(zzz
->cdir
, cmpr_len
); // cmpr size
338 dbuf_writeu32le(zzz
->outf
, cmpr_len
);
339 dbuf_writeu32le(zzz
->cdir
, f
->len
); // uncmpr size
340 dbuf_writeu32le(zzz
->outf
, f
->len
);
342 fnlen
= de_strlen(name
);
343 dbuf_writeu16le(zzz
->cdir
, fnlen
);
344 dbuf_writeu16le(zzz
->outf
, fnlen
);
346 dbuf_writeu16le(zzz
->cdir
, md
->efcentral
->len
); // eflen
347 dbuf_writeu16le(zzz
->outf
, md
->eflocal
->len
);
349 dbuf_writeu16le(zzz
->cdir
, 0); // file comment len
350 dbuf_writeu16le(zzz
->cdir
, 0); // disk number start
352 dbuf_writeu16le(zzz
->cdir
, 0); // int attrib
354 // Set the Unix (etc.) file attributes to "-rw-r--r--" or
355 // "-rwxr-xr-x", etc.
357 ext_attributes
= (0040755U << 16) | 0x10;
358 else if(md
->is_executable
)
359 ext_attributes
= (0100755U << 16);
361 ext_attributes
= (0100644U << 16);
363 dbuf_writeu32le(zzz
->cdir
, (i64
)ext_attributes
); // ext attrib
365 dbuf_writeu32le(zzz
->cdir
, ldir_offset
);
367 dbuf_write(zzz
->cdir
, (const u8
*)name
, fnlen
);
368 dbuf_write(zzz
->outf
, (const u8
*)name
, fnlen
);
370 dbuf_copy(md
->efcentral
, 0, md
->efcentral
->len
, zzz
->cdir
);
371 dbuf_copy(md
->eflocal
, 0, md
->eflocal
->len
, zzz
->outf
);
373 if(using_compression
) {
375 dbuf_copy(cmpr_data
, 0, cmpr_data
->len
, zzz
->outf
);
379 dbuf_copy(f
, 0, f
->len
, zzz
->outf
);
385 if(cmpr_data
) dbuf_close(cmpr_data
);
388 void de_zip_add_file_to_archive(deark
*c
, dbuf
*f
)
390 struct zipw_ctx
*zzz
;
391 struct zipw_md
*md
= NULL
;
392 int write_ntfs_times
= 0;
393 int write_UT_time
= 0;
395 md
= de_malloc(c
, sizeof(struct zipw_md
));
398 // ZIP file hasn't been created yet
399 if(!de_zip_create_file(c
)) {
405 zzz
= (struct zipw_ctx
*)c
->zip_data
;
407 de_dbg(c
, "adding to zip: name=%s len=%"I64_FMT
, f
->name
, f
->len
);
409 if(f
->fi_copy
&& f
->fi_copy
->is_directory
) {
410 md
->is_directory
= 1;
413 if(f
->fi_copy
&& (f
->fi_copy
->mode_flags
&DE_MODEFLAG_EXE
)) {
414 md
->is_executable
= 1;
417 if(c
->preserve_file_times_archives
&& f
->fi_copy
&& f
->fi_copy
->timestamp
[DE_TIMESTAMPIDX_MODIFY
].is_valid
) {
418 md
->modtime
= f
->fi_copy
->timestamp
[DE_TIMESTAMPIDX_MODIFY
];
419 if(md
->modtime
.precision
>DE_TSPREC_1SEC
) {
420 write_ntfs_times
= 1;
423 else if(c
->reproducible_output
) {
424 de_get_reproducible_timestamp(c
, &md
->modtime
);
427 de_cached_current_time_to_timestamp(c
, &md
->modtime
);
429 // We only write the current time because ZIP format leaves us little
431 // Note that although c->current_time is probably high precision,
432 // we don't consider that good enough reason to force NTFS timestamps
436 // Note: Timestamps other than the modification time are a low priority.
437 // We'll write them in some cases, when it is easy to do so.
438 if(c
->preserve_file_times_archives
&& f
->fi_copy
) {
439 md
->actime
= f
->fi_copy
->timestamp
[DE_TIMESTAMPIDX_ACCESS
];
440 md
->crtime
= f
->fi_copy
->timestamp
[DE_TIMESTAMPIDX_CREATE
];
443 md
->modtime_unix
= de_timestamp_to_unix_time(&md
->modtime
);
446 if(is_valid_32bit_unix_time(md
->modtime_unix
)) {
447 // Always write a Unix timestamp if we can.
450 if(md
->modtime_unix
< 0) {
451 // This negative Unix time is in range, but problematic,
452 // so write NTFS times as well.
453 write_ntfs_times
= 1;
456 else { // Out of range of ZIP's (signed int32) Unix style timestamps
457 write_ntfs_times
= 1;
460 if(write_ntfs_times
) {
461 md
->modtime_as_FILETIME
= de_timestamp_to_FILETIME(&md
->modtime
);
462 if(md
->modtime_as_FILETIME
== 0) {
463 write_ntfs_times
= 0;
466 md
->actime_as_FILETIME
= de_timestamp_to_FILETIME(&md
->actime
);
467 md
->crtime_as_FILETIME
= de_timestamp_to_FILETIME(&md
->crtime
);
471 // Create ZIP "extra data" "Extended Timestamp" and "NTFS" fields,
472 // containing the UTC timestamp.
474 // Use temporary dbufs to help construct the extra field data.
475 md
->eflocal
= dbuf_create_membuf(c
, 256, 0);
476 md
->efcentral
= dbuf_create_membuf(c
, 256, 0);
479 do_UT_times(c
, md
, md
->eflocal
, 0);
480 do_UT_times(c
, md
, md
->efcentral
, 1);
483 if(write_ntfs_times
) {
484 // Note: Info-ZIP says: "In the current implementations, this field [...]
485 // is only stored as local extra field.
486 // But 7-Zip supports it *only* as a central extra field.
487 // So we'll write both.
488 do_ntfs_times(c
, md
, md
->eflocal
, 0);
489 do_ntfs_times(c
, md
, md
->efcentral
, 1);
492 if(md
->is_directory
) {
496 // Append a "/" to the name
497 nlen
= de_strlen(f
->name
);
498 name2
= de_malloc(c
, (i64
)nlen
+2);
499 de_snprintf(name2
, nlen
+2, "%s/", f
->name
);
501 zipw_add_memberfile(c
, zzz
, md
, f
, name2
, MZ_NO_COMPRESSION
);
506 zipw_add_memberfile(c
, zzz
, md
, f
, f
->name
, zzz
->cmprlevel
);
511 dbuf_close(md
->eflocal
);
512 dbuf_close(md
->efcentral
);
517 static int copy_to_FILE_cbfn(struct de_bufferedreadctx
*brctx
, const u8
*buf
,
521 ret
= fwrite(buf
, 1, (size_t)buf_len
, (FILE*)brctx
->userdata
);
522 return (ret
==(size_t)buf_len
);
525 static void dbuf_copy_to_FILE(dbuf
*inf
, i64 input_offset
, i64 input_len
, FILE *outfile
)
527 if(input_len
<1) return;
528 dbuf_buffered_read(inf
, input_offset
, input_len
, copy_to_FILE_cbfn
, (void*)outfile
);
531 static void zipw_finalize(deark
*c
, struct zipw_ctx
*zzz
)
539 cdir_start
= zzz
->outf
->len
;
541 if((zzz
->membercount
> 0xffff) || (cdir_start
> 0xffffffffLL
) ||
542 (zzz
->cdir
->len
> 0xffffffffLL
))
547 opt_zip64
= de_get_ext_option_bool(c
, "archive:zip64", 0);
549 use_zip64
= 1; // Zip64 always
551 else if(need_zip64
) {
552 use_zip64
= 1; // Zip64 auto
553 de_info(c
, "Note: Writing a ZIP file that uses Zip64 extensions. Not all unzip "
554 "programs will correctly support it.");
557 // Write the central directory
558 dbuf_copy(zzz
->cdir
, 0, zzz
->cdir
->len
, zzz
->outf
);
560 zip64_eocd_pos
= zzz
->outf
->len
;
563 // Write 56-byte zip64 EOCD record
564 dbuf_writeu32le(zzz
->outf
, CODE_PK66
);
565 dbuf_writeu64le(zzz
->outf
, 56-12); // recsize
566 dbuf_writeu16le(zzz
->outf
, ZIPENC_VER_MADE_BY
);
568 // version-needed: 4.5 = minimum for Zip64. This is a formality, because
569 // anything that doesn't support Zip64 won't ever see this field.
570 // Unfortunately, the original EOCD record does not have a version-needed
571 // field, so (I guess) there is no good way to tell old unzip programs that
572 // they cannot fully support this ZIP file.
573 dbuf_writeu16le(zzz
->outf
, 45);
575 dbuf_writeu32le(zzz
->outf
, 0); // this disk num
576 dbuf_writeu32le(zzz
->outf
, 0); // central dir disk
577 dbuf_writeu64le(zzz
->outf
, (u64
)zzz
->membercount
); // num files this disk
578 dbuf_writeu64le(zzz
->outf
, (u64
)zzz
->membercount
); // num files total
579 dbuf_writeu64le(zzz
->outf
, (u64
)zzz
->cdir
->len
); // central dir size
580 dbuf_writeu64le(zzz
->outf
, (u64
)cdir_start
); // central dir offset
582 // Write 20-byte EOCD locator
583 dbuf_writeu32le(zzz
->outf
, CODE_PK67
);
584 dbuf_writeu32le(zzz
->outf
, 0); // central dir disk
585 dbuf_writeu64le(zzz
->outf
, (u64
)zip64_eocd_pos
);
586 dbuf_writeu32le(zzz
->outf
, 1); // number of disks
589 // Write 22-byte EOCD record
590 dbuf_writeu32le(zzz
->outf
, CODE_PK56
);
591 dbuf_writeu16le(zzz
->outf
, 0); // this disk num
592 dbuf_writeu16le(zzz
->outf
, 0); // central dir disk
594 if(zzz
->membercount
> 0xffff) {
595 dbuf_writeu16le(zzz
->outf
, 0xffff);
596 dbuf_writeu16le(zzz
->outf
, 0xffff);
599 dbuf_writeu16le(zzz
->outf
, zzz
->membercount
); // num files this disk
600 dbuf_writeu16le(zzz
->outf
, zzz
->membercount
); // num files total
603 if(zzz
->cdir
->len
> 0xffffffffLL
) {
604 dbuf_writeu32le(zzz
->outf
, 0xffffffffLL
);
607 dbuf_writeu32le(zzz
->outf
, zzz
->cdir
->len
);
610 if(cdir_start
> 0xffffffffLL
) {
611 dbuf_writeu32le(zzz
->outf
, 0xffffffffLL
);
614 dbuf_writeu32le(zzz
->outf
, cdir_start
);
617 dbuf_writeu16le(zzz
->outf
, 0); // ZIP comment length
620 void de_zip_close_file(deark
*c
)
622 struct zipw_ctx
*zzz
;
624 if(!c
->zip_data
) return;
625 de_dbg(c
, "closing zip file");
627 zzz
= (struct zipw_ctx
*)c
->zip_data
;
629 zipw_finalize(c
, zzz
);
631 if(c
->archive_to_stdout
&& zzz
->outf
&& zzz
->outf
->btype
==DBUF_TYPE_MEMBUF
) {
632 dbuf_copy_to_FILE(zzz
->outf
, 0, zzz
->outf
->len
, stdout
);
635 dbuf_close(zzz
->cdir
);
636 dbuf_close(zzz
->outf
);
637 de_crcobj_destroy(zzz
->crc32o
);