1 // Copyright 2007 Alan Donovan. All rights reserved.
3 // Author: Alan Donovan <adonovan@google.com>
5 // Licensed under the Apache License, Version 2.0 (the "License");
6 // you may not use this file except in compliance with the License.
7 // You may obtain a copy of the License at
9 // http://www.apache.org/licenses/LICENSE-2.0
11 // Unless required by applicable law or agreed to in writing, software
12 // distributed under the License is distributed on an "AS IS" BASIS,
13 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 // See the License for the specific language governing permissions and
15 // limitations under the License.
17 // zip.cc -- .zip (.jar) file reading/writing routines.
20 // See README.txt for details.
22 // See http://www.pkware.com/documents/casestudies/APPNOTE.TXT
23 // for definition of PKZIP file format.
25 #define _FILE_OFFSET_BITS 64 // Support zip files larger than 2GB
41 #include "third_party/ijar/zip.h"
44 #define LOCAL_FILE_HEADER_SIGNATURE 0x04034b50
45 #define CENTRAL_FILE_HEADER_SIGNATURE 0x02014b50
46 #define END_OF_CENTRAL_DIR_SIGNATURE 0x06054b50
47 #define DATA_DESCRIPTOR_SIGNATURE 0x08074b50
49 // version to extract: 1.0 - default value from APPNOTE.TXT.
50 // Output JAR files contain no extra ZIP features, so this is enough.
51 #define ZIP_VERSION_TO_EXTRACT 10
52 #define COMPRESSION_METHOD_STORED 0 // no compression
53 #define COMPRESSION_METHOD_DEFLATED 8
55 #define GENERAL_PURPOSE_BIT_FLAG_COMPRESSED (1 << 3)
56 #define GENERAL_PURPOSE_BIT_FLAG_UTF8_ENCODED (1 << 11)
57 #define GENERAL_PURPOSE_BIT_FLAG_COMPRESSION_SPEED ((1 << 2) | (1 << 1))
58 #define GENERAL_PURPOSE_BIT_FLAG_SUPPORTED \
59 (GENERAL_PURPOSE_BIT_FLAG_COMPRESSED \
60 | GENERAL_PURPOSE_BIT_FLAG_UTF8_ENCODED \
61 | GENERAL_PURPOSE_BIT_FLAG_COMPRESSION_SPEED)
63 namespace devtools_ijar
{
64 // In the absence of ZIP64 support, zip files are limited to 4GB.
65 // http://www.info-zip.org/FAQ.html#limits
66 static const u8 kMaximumOutputSize
= std::numeric_limits
<uint32_t>::max();
68 static bool ProcessCentralDirEntry(const u1
*&p
,
69 size_t *compressed_size
,
70 size_t *uncompressed_size
,
77 // A class representing a ZipFile for reading. Its public API is exposed
78 // using the ZipExtractor abstract class.
80 class InputZipFile
: public ZipExtractor
{
82 InputZipFile(ZipExtractorProcessor
*processor
, int fd
, off_t in_length
,
83 off_t in_offset
, const u1
* zipdata_in
, const u1
* central_dir
);
84 virtual ~InputZipFile();
86 virtual const char* GetError() {
93 virtual bool ProcessNext();
95 virtual size_t GetSize() {
99 virtual u8
CalculateOutputLength();
102 ZipExtractorProcessor
*processor
;
104 int fd_in
; // Input file descripor
106 // InputZipFile is responsible for maintaining the following
107 // pointers. They are allocated by the Create() method before
108 // the object is actually created using mmap.
109 const u1
* const zipdata_in_
; // start of input file mmap
110 const u1
* zipdata_in_mapped_
; // start of still mapped region
111 const u1
* const central_dir_
; // central directory in input file
113 size_t in_length_
; // size of the input file
114 size_t in_offset_
; // offset the input file
116 const u1
*p
; // input cursor
118 const u1
* central_dir_current_
; // central dir input cursor
120 // Buffer size is initially INITIAL_BUFFER_SIZE. It doubles in size every
121 // time it is found too small, until it reaches MAX_BUFFER_SIZE. If that is
122 // not enough, we bail out. We only decompress class files, so they should
123 // be smaller than 64K anyway, but we give a little leeway.
124 // MAX_BUFFER_SIZE must be bigger than the size of the biggest file in the
125 // ZIP. It is set to 128M here so we can uncompress the Bazel server with
127 static const size_t INITIAL_BUFFER_SIZE
= 256 * 1024; // 256K
128 static const size_t MAX_BUFFER_SIZE
= 128 * 1024 * 1024;
129 static const size_t MAX_MAPPED_REGION
= 32 * 1024 * 1024;
131 // These metadata fields are the fields of the ZIP header of the file being
134 u2 general_purpose_bit_flag_
;
135 u2 compression_method_
;
136 u4 uncompressed_size_
;
138 u2 file_name_length_
;
139 u2 extra_field_length_
;
140 const u1
*file_name_
;
141 const u1
*extra_field_
;
143 // Administration of memory reserved for decompressed data. We use the same
144 // buffer for each file to avoid some malloc()/free() calls and free the
145 // memory only in the dtor. C-style memory management is used so that we
147 u1
*uncompressed_data_
;
148 size_t uncompressed_data_allocated_
;
150 // Copy of the last filename entry - Null-terminated.
151 char filename
[PATH_MAX
];
152 // The external file attribute field
156 char errmsg
[4*PATH_MAX
];
158 int error(const char *fmt
, ...) {
161 vsnprintf(errmsg
, 4*PATH_MAX
, fmt
, ap
);
166 // Check that at least n bytes remain in the input file, otherwise
167 // abort with an error message. "state" is the name of the field
168 // we're about to read, for diagnostics.
169 int EnsureRemaining(size_t n
, const char *state
) {
170 size_t in_offset
= p
- zipdata_in_
;
171 size_t remaining
= in_length_
- in_offset
;
173 return error("Premature end of file (at offset %zd, state=%s); "
174 "expected %zd more bytes but found %zd.\n",
175 in_offset
, state
, n
, remaining
);
180 // Read one entry from input zip file
181 int ProcessLocalFileEntry(size_t compressed_size
, size_t uncompressed_size
);
183 // Uncompress a file from the archive using zlib. The pointer returned
184 // is owned by InputZipFile, so it must not be freed. Advances the input
185 // cursor to the first byte after the compressed data.
186 u1
* UncompressFile();
189 int SkipFile(const bool compressed
);
192 int ProcessFile(const bool compressed
);
196 // A class implementing ZipBuilder that represent an open zip file for writing.
198 class OutputZipFile
: public ZipBuilder
{
200 OutputZipFile(int fd
, u1
* const zipdata_out
) :
202 zipdata_out_(zipdata_out
),
207 virtual const char* GetError() {
208 if (errmsg
[0] == 0) {
214 virtual ~OutputZipFile() { Finish(); }
215 virtual u1
* NewFile(const char* filename
, const u4 attr
);
216 virtual int FinishFile(size_t filelength
, bool compress
= false);
217 virtual int WriteEmptyFile(const char *filename
);
218 virtual size_t GetSize() {
221 virtual int GetNumberFiles() {
222 return entries_
.size();
224 virtual int Finish();
227 struct LocalFileEntry
{
228 // Start of the local header (in the output buffer).
229 size_t local_header_offset
;
231 // Sizes of the file entry
232 size_t uncompressed_length
;
233 size_t compressed_length
;
235 // Compression method
236 u2 compression_method
;
238 // external attributes field
241 // Start/length of the file_name in the local header.
245 // Start/length of the extra_field in the local header.
246 const u1
*extra_field
;
247 u2 extra_field_length
;
250 int fd_out
; // file descriptor for the output file
252 // OutputZipFile is responsible for maintaining the following
253 // pointers. They are allocated by the Create() method before
254 // the object is actually created using mmap.
255 u1
* const zipdata_out_
; // start of output file mmap
256 u1
*q
; // output cursor
258 u1
*header_ptr
; // Current pointer to "compression method" entry.
260 // List of entries to write the central directory
261 std::vector
<LocalFileEntry
*> entries_
;
264 char errmsg
[4*PATH_MAX
];
266 int error(const char *fmt
, ...) {
269 vsnprintf(errmsg
, 4*PATH_MAX
, fmt
, ap
);
274 // Write the ZIP central directory structure for each local file
275 // entry in "entries".
276 void WriteCentralDirectory();
278 // Returns the offset of the pointer relative to the start of the
280 size_t Offset(const u1
*const x
) {
281 return x
- zipdata_out_
;
284 // Write ZIP file header in the output. Since the compressed size is not
285 // known in advance, it must be recorded later. This method returns a pointer
286 // to "compressed size" in the file header that should be passed to
287 // WriteFileSizeInLocalFileHeader() later.
288 u1
* WriteLocalFileHeader(const char *filename
, const u4 attr
);
290 // Fill in the "compressed size" and "uncompressed size" fields in a local
291 // file header previously written by WriteLocalFileHeader().
292 size_t WriteFileSizeInLocalFileHeader(u1
*header_ptr
, size_t out_length
,
293 bool compress
= false);
297 // Implementation of InputZipFile
299 bool InputZipFile::ProcessNext() {
300 // Process the next entry in the central directory. Also make sure that the
301 // content pointer is in sync.
302 size_t compressed
, uncompressed
;
304 if (!ProcessCentralDirEntry(central_dir_current_
, &compressed
, &uncompressed
,
305 filename
, PATH_MAX
, &attr
, &offset
)) {
309 // There might be an offset specified in the central directory that does
310 // not match the file offset, if so, correct the pointer.
311 if (offset
!= 0 && (p
!= (zipdata_in_
+ in_offset_
+ offset
))) {
312 p
= zipdata_in_
+ offset
;
315 if (EnsureRemaining(4, "signature") < 0) {
318 u4 signature
= get_u4le(p
);
319 if (signature
== LOCAL_FILE_HEADER_SIGNATURE
) {
320 if (ProcessLocalFileEntry(compressed
, uncompressed
) < 0) {
324 error("local file header signature for file %s not found\n", filename
);
331 int InputZipFile::ProcessLocalFileEntry(
332 size_t compressed_size
, size_t uncompressed_size
) {
333 if (EnsureRemaining(26, "extract_version") < 0) {
336 extract_version_
= get_u2le(p
);
337 general_purpose_bit_flag_
= get_u2le(p
);
339 if ((general_purpose_bit_flag_
& ~GENERAL_PURPOSE_BIT_FLAG_SUPPORTED
) != 0) {
340 return error("Unsupported value (0x%04x) in general purpose bit flag.\n",
341 general_purpose_bit_flag_
);
344 compression_method_
= get_u2le(p
);
346 if (compression_method_
!= COMPRESSION_METHOD_DEFLATED
&&
347 compression_method_
!= COMPRESSION_METHOD_STORED
) {
348 return error("Unsupported compression method (%d).\n",
349 compression_method_
);
352 // skip over: last_mod_file_time, last_mod_file_date, crc32
354 compressed_size_
= get_u4le(p
);
355 uncompressed_size_
= get_u4le(p
);
356 file_name_length_
= get_u2le(p
);
357 extra_field_length_
= get_u2le(p
);
359 if (EnsureRemaining(file_name_length_
, "file_name") < 0) {
363 p
+= file_name_length_
;
365 if (EnsureRemaining(extra_field_length_
, "extra_field") < 0) {
369 p
+= extra_field_length_
;
371 bool is_compressed
= compression_method_
== COMPRESSION_METHOD_DEFLATED
;
373 // If the zip is compressed, compressed and uncompressed size members are
374 // zero in the local file header. If not, check that they are the same as the
375 // lengths from the central directory, otherwise, just believe the central
377 if (compressed_size_
== 0) {
378 compressed_size_
= compressed_size
;
380 if (compressed_size_
!= compressed_size
) {
381 return error("central directory and file header inconsistent\n");
385 if (uncompressed_size_
== 0) {
386 uncompressed_size_
= uncompressed_size
;
388 if (uncompressed_size_
!= uncompressed_size
) {
389 return error("central directory and file header inconsistent\n");
393 if (processor
->Accept(filename
, attr
)) {
394 if (ProcessFile(is_compressed
) < 0) {
398 if (SkipFile(is_compressed
) < 0) {
403 if (general_purpose_bit_flag_
& GENERAL_PURPOSE_BIT_FLAG_COMPRESSED
) {
404 // Skip the data descriptor. Some implementations do not put the signature
405 // here, so check if the next 4 bytes are a signature, and if so, skip the
406 // next 12 bytes (for CRC, compressed/uncompressed size), otherwise skip
407 // the next 8 bytes (because the value just read was the CRC).
408 u4 signature
= get_u4le(p
);
409 if (signature
== DATA_DESCRIPTOR_SIGNATURE
) {
416 if (p
> zipdata_in_mapped_
+ MAX_MAPPED_REGION
) {
417 munmap(const_cast<u1
*>(zipdata_in_mapped_
), MAX_MAPPED_REGION
);
418 zipdata_in_mapped_
+= MAX_MAPPED_REGION
;
424 int InputZipFile::SkipFile(const bool compressed
) {
426 // In this case, compressed_size_ == uncompressed_size_ (since the file is
427 // uncompressed), so we can use either.
428 if (compressed_size_
!= uncompressed_size_
) {
429 return error("compressed size != uncompressed size, although the file "
430 "is uncompressed.\n");
434 if (EnsureRemaining(compressed_size_
, "file_data") < 0) {
437 p
+= compressed_size_
;
441 u1
* InputZipFile::UncompressFile() {
442 size_t in_offset
= p
- zipdata_in_
;
443 size_t remaining
= in_length_
- in_offset
;
446 stream
.zalloc
= Z_NULL
;
447 stream
.zfree
= Z_NULL
;
448 stream
.opaque
= Z_NULL
;
449 stream
.avail_in
= remaining
;
450 stream
.next_in
= (Bytef
*) p
;
452 int ret
= inflateInit2(&stream
, -MAX_WBITS
);
454 error("inflateInit: %d\n", ret
);
458 int uncompressed_until_now
= 0;
461 stream
.avail_out
= uncompressed_data_allocated_
- uncompressed_until_now
;
462 stream
.next_out
= uncompressed_data_
+ uncompressed_until_now
;
463 int old_avail_out
= stream
.avail_out
;
465 ret
= inflate(&stream
, Z_SYNC_FLUSH
);
466 int uncompressed_now
= old_avail_out
- stream
.avail_out
;
467 uncompressed_until_now
+= uncompressed_now
;
471 // zlib said that there is no more data to decompress.
473 u1
*new_p
= reinterpret_cast<u1
*>(stream
.next_in
);
474 compressed_size_
= new_p
- p
;
475 uncompressed_size_
= uncompressed_until_now
;
478 return uncompressed_data_
;
482 // zlib said that there is no more room in the buffer allocated for
483 // the decompressed data. Enlarge that buffer and try again.
485 if (uncompressed_data_allocated_
== MAX_BUFFER_SIZE
) {
486 error("ijar does not support decompressing files "
487 "larger than %dMB.\n",
488 (int) (MAX_BUFFER_SIZE
/(1024*1024)));
492 uncompressed_data_allocated_
*= 2;
493 if (uncompressed_data_allocated_
> MAX_BUFFER_SIZE
) {
494 uncompressed_data_allocated_
= MAX_BUFFER_SIZE
;
497 uncompressed_data_
= reinterpret_cast<u1
*>(
498 realloc(uncompressed_data_
, uncompressed_data_allocated_
));
507 error("zlib returned error code %d during inflate.\n", ret
);
514 int InputZipFile::ProcessFile(const bool compressed
) {
517 file_data
= UncompressFile();
518 if (file_data
== NULL
) {
522 // In this case, compressed_size_ == uncompressed_size_ (since the file is
523 // uncompressed), so we can use either.
524 if (compressed_size_
!= uncompressed_size_
) {
525 return error("compressed size != uncompressed size, although the file "
526 "is uncompressed.\n");
529 if (EnsureRemaining(compressed_size_
, "file_data") < 0) {
533 p
+= compressed_size_
;
535 processor
->Process(filename
, attr
, file_data
, uncompressed_size_
);
540 // Reads and returns some metadata of the next file from the central directory:
542 // - uncompressed size
543 // - whether the entry is a class file (to be included in the output).
544 // Precondition: p points to the beginning of an entry in the central dir
545 // Postcondition: p points to the beginning of the next entry in the central dir
546 // Returns true if the central directory contains another file and false if not.
547 // Of course, in the latter case, the size output variables are not changed.
548 // Note that the central directory is always followed by another data structure
549 // that has a signature, so parsing it this way is safe.
550 static bool ProcessCentralDirEntry(
551 const u1
*&p
, size_t *compressed_size
, size_t *uncompressed_size
,
552 char *filename
, size_t filename_size
, u4
*attr
, u4
*offset
) {
553 u4 signature
= get_u4le(p
);
554 if (signature
!= CENTRAL_FILE_HEADER_SIGNATURE
) {
558 p
+= 16; // skip to 'compressed size' field
559 *compressed_size
= get_u4le(p
);
560 *uncompressed_size
= get_u4le(p
);
561 u2 file_name_length
= get_u2le(p
);
562 u2 extra_field_length
= get_u2le(p
);
563 u2 file_comment_length
= get_u2le(p
);
564 p
+= 4; // skip to external file attributes field
566 *offset
= get_u4le(p
);
568 size_t len
= (file_name_length
< filename_size
)
570 : (filename_size
- 1);
571 memcpy(reinterpret_cast<void*>(filename
), p
, len
);
574 p
+= file_name_length
;
575 p
+= extra_field_length
;
576 p
+= file_comment_length
;
580 // Gives a maximum bound on the size of the interface JAR. Basically, adds
581 // the difference between the compressed and uncompressed sizes to the size
582 // of the input file.
583 u8
InputZipFile::CalculateOutputLength() {
584 const u1
* current
= central_dir_
;
586 u8 compressed_size
= 0;
587 u8 uncompressed_size
= 0;
588 u8 skipped_compressed_size
= 0;
591 char filename
[PATH_MAX
];
594 size_t file_compressed
, file_uncompressed
;
595 if (!ProcessCentralDirEntry(current
,
596 &file_compressed
, &file_uncompressed
,
597 filename
, PATH_MAX
, &attr
, &offset
)) {
601 if (processor
->Accept(filename
, attr
)) {
602 compressed_size
+= (u8
) file_compressed
;
603 uncompressed_size
+= (u8
) file_uncompressed
;
605 skipped_compressed_size
+= file_compressed
;
609 // The worst case is when the output is simply the input uncompressed. The
610 // metadata in the zip file will stay the same, so the file will grow by the
611 // difference between the compressed and uncompressed sizes.
612 return (u8
) in_length_
- skipped_compressed_size
613 + (uncompressed_size
- compressed_size
);
616 // Given the data in the zip file, returns the offset of the central directory
617 // and the number of files contained in it.
618 bool FindZipCentralDirectory(const u1
* bytes
, size_t in_length
,
619 u4
* offset
, const u1
** central_dir
) {
620 static const int MAX_COMMENT_LENGTH
= 0xffff;
621 static const int CENTRAL_DIR_LOCATOR_SIZE
= 22;
622 // Maximum distance of start of central dir locator from end of file
623 static const int MAX_DELTA
= MAX_COMMENT_LENGTH
+ CENTRAL_DIR_LOCATOR_SIZE
;
624 const u1
* last_pos_to_check
= in_length
< MAX_DELTA
626 : bytes
+ (in_length
- MAX_DELTA
);
630 for (current
= bytes
+ in_length
- CENTRAL_DIR_LOCATOR_SIZE
;
631 current
>= last_pos_to_check
;
633 const u1
* p
= current
;
634 if (get_u4le(p
) != END_OF_CENTRAL_DIR_SIGNATURE
) {
638 p
+= 16; // skip to comment length field
639 u2 comment_length
= get_u2le(p
);
641 // Does the comment go exactly till the end of the file?
642 if (current
+ comment_length
+ CENTRAL_DIR_LOCATOR_SIZE
643 != bytes
+ in_length
) {
647 // Hooray, we found it!
653 fprintf(stderr
, "file is invalid or corrupted (missing end of central "
654 "directory record)\n");
658 const u1
* end_of_central_dir
= current
;
659 get_u4le(current
); // central directory locator signature, already checked
660 u2 number_of_this_disk
= get_u2le(current
);
661 u2 disk_with_central_dir
= get_u2le(current
);
662 u2 central_dir_entries_on_this_disk
= get_u2le(current
);
663 u2 central_dir_entries
= get_u2le(current
);
664 u4 central_dir_size
= get_u4le(current
);
665 u4 central_dir_offset
= get_u4le(current
);
666 u2 file_comment_length
= get_u2le(current
);
667 current
+= file_comment_length
; // set current to the end of the central dir
669 if (number_of_this_disk
!= 0
670 || disk_with_central_dir
!= 0
671 || central_dir_entries_on_this_disk
!= central_dir_entries
) {
672 fprintf(stderr
, "multi-disk JAR files are not supported\n");
676 // Do not change output values before determining that they are OK.
677 *offset
= central_dir_offset
;
678 // Central directory start can then be used to determine the actual
679 // starts of the zip file (which can be different in case of a non-zip
680 // header like for auto-extractable binaries).
681 *central_dir
= end_of_central_dir
- central_dir_size
;
685 void InputZipFile::Reset() {
686 central_dir_current_
= central_dir_
;
687 zipdata_in_mapped_
= zipdata_in_
;
688 p
= zipdata_in_
+ in_offset_
;
691 int ZipExtractor::ProcessAll() {
692 while (ProcessNext()) {}
693 if (GetError() != NULL
) {
699 ZipExtractor
* ZipExtractor::Create(const char* filename
,
700 ZipExtractorProcessor
*processor
) {
701 int fd_in
= open(filename
, O_RDONLY
);
706 off_t length
= lseek(fd_in
, 0, SEEK_END
);
711 void *zipdata_in
= mmap(NULL
, length
, PROT_READ
, MAP_PRIVATE
, fd_in
, 0);
712 if (zipdata_in
== MAP_FAILED
) {
716 u4 central_dir_offset
;
717 const u1
*central_dir
= NULL
;
719 if (!devtools_ijar::FindZipCentralDirectory(
720 static_cast<const u1
*>(zipdata_in
), length
,
721 ¢ral_dir_offset
, ¢ral_dir
)) {
722 errno
= EIO
; // we don't really have a good error number
725 const u1
*zipdata_start
= static_cast<const u1
*>(zipdata_in
);
726 off_t offset
= - static_cast<off_t
>(zipdata_start
730 return new InputZipFile(processor
, fd_in
, length
, offset
,
731 zipdata_start
, central_dir
);
734 InputZipFile::InputZipFile(ZipExtractorProcessor
*processor
, int fd
,
735 off_t in_length
, off_t in_offset
,
736 const u1
* zipdata_in
, const u1
* central_dir
)
737 : processor(processor
), fd_in(fd
),
738 zipdata_in_(zipdata_in
), zipdata_in_mapped_(zipdata_in
),
739 central_dir_(central_dir
), in_length_(in_length
), in_offset_(in_offset
),
740 p(zipdata_in
+ in_offset
), central_dir_current_(central_dir
) {
741 uncompressed_data_allocated_
= INITIAL_BUFFER_SIZE
;
743 reinterpret_cast<u1
*>(malloc(uncompressed_data_allocated_
));
747 InputZipFile::~InputZipFile() {
748 free(uncompressed_data_
);
754 // Implementation of OutputZipFile
756 int OutputZipFile::WriteEmptyFile(const char *filename
) {
757 const u1
* file_name
= (const u1
*) filename
;
758 size_t file_name_length
= strlen(filename
);
760 LocalFileEntry
*entry
= new LocalFileEntry
;
761 entry
->local_header_offset
= Offset(q
);
762 entry
->external_attr
= 0;
764 // Output the ZIP local_file_header:
765 put_u4le(q
, LOCAL_FILE_HEADER_SIGNATURE
);
766 put_u2le(q
, 10); // extract_version
767 put_u2le(q
, 0); // general_purpose_bit_flag
768 put_u2le(q
, 0); // compression_method
769 put_u2le(q
, 0); // last_mod_file_time
770 put_u2le(q
, 0); // last_mod_file_date
771 put_u4le(q
, 0); // crc32
772 put_u4le(q
, 0); // compressed_size
773 put_u4le(q
, 0); // uncompressed_size
774 put_u2le(q
, file_name_length
);
775 put_u2le(q
, 0); // extra_field_length
776 put_n(q
, file_name
, file_name_length
);
778 entry
->file_name_length
= file_name_length
;
779 entry
->extra_field_length
= 0;
780 entry
->compressed_length
= 0;
781 entry
->uncompressed_length
= 0;
782 entry
->compression_method
= 0;
783 entry
->extra_field
= (const u1
*)"";
784 entry
->file_name
= (u1
*) strdup((const char *) file_name
);
785 entries_
.push_back(entry
);
790 void OutputZipFile::WriteCentralDirectory() {
791 // central directory:
792 const u1
*central_directory_start
= q
;
793 for (size_t ii
= 0; ii
< entries_
.size(); ++ii
) {
794 LocalFileEntry
*entry
= entries_
[ii
];
795 put_u4le(q
, CENTRAL_FILE_HEADER_SIGNATURE
);
796 put_u2le(q
, 0); // version made by
798 put_u2le(q
, ZIP_VERSION_TO_EXTRACT
); // version to extract
799 put_u2le(q
, 0); // general purpose bit flag
800 put_u2le(q
, entry
->compression_method
); // compression method:
801 put_u2le(q
, 0); // last_mod_file_time
802 put_u2le(q
, 0); // last_mod_file_date
803 put_u4le(q
, 0); // crc32 (jar/javac tools don't care)
804 put_u4le(q
, entry
->compressed_length
); // compressed_size
805 put_u4le(q
, entry
->uncompressed_length
); // uncompressed_size
806 put_u2le(q
, entry
->file_name_length
);
807 put_u2le(q
, entry
->extra_field_length
);
809 put_u2le(q
, 0); // file comment length
810 put_u2le(q
, 0); // disk number start
811 put_u2le(q
, 0); // internal file attributes
812 put_u4le(q
, entry
->external_attr
); // external file attributes
813 // relative offset of local header:
814 put_u4le(q
, entry
->local_header_offset
);
816 put_n(q
, entry
->file_name
, entry
->file_name_length
);
817 put_n(q
, entry
->extra_field
, entry
->extra_field_length
);
819 u4 central_directory_size
= q
- central_directory_start
;
821 put_u4le(q
, END_OF_CENTRAL_DIR_SIGNATURE
);
822 put_u2le(q
, 0); // number of this disk
823 put_u2le(q
, 0); // number of the disk with the start of the central directory
824 put_u2le(q
, entries_
.size()); // # central dir entries on this disk
825 put_u2le(q
, entries_
.size()); // total # entries in the central directory
826 put_u4le(q
, central_directory_size
); // size of the central directory
827 put_u4le(q
, Offset(central_directory_start
)); // offset of start of central
828 // directory wrt starting disk
829 put_u2le(q
, 0); // .ZIP file comment length
832 u1
* OutputZipFile::WriteLocalFileHeader(const char* filename
, const u4 attr
) {
833 off_t file_name_length_
= strlen(filename
);
834 LocalFileEntry
*entry
= new LocalFileEntry
;
835 entry
->local_header_offset
= Offset(q
);
836 entry
->file_name_length
= file_name_length_
;
837 entry
->file_name
= new u1
[file_name_length_
];
838 entry
->external_attr
= attr
;
839 memcpy(entry
->file_name
, filename
, file_name_length_
);
840 entry
->extra_field_length
= 0;
841 entry
->extra_field
= (const u1
*)"";
843 // Output the ZIP local_file_header:
844 put_u4le(q
, LOCAL_FILE_HEADER_SIGNATURE
);
845 put_u2le(q
, ZIP_VERSION_TO_EXTRACT
); // version to extract
846 put_u2le(q
, 0); // general purpose bit flag
848 put_u2le(q
, COMPRESSION_METHOD_STORED
); // compression method = placeholder
849 put_u2le(q
, 0); // last_mod_file_time
850 put_u2le(q
, 0); // last_mod_file_date
851 put_u4le(q
, 0); // crc32 (jar/javac tools don't care)
852 put_u4le(q
, 0); // compressed_size = placeholder
853 put_u4le(q
, 0); // uncompressed_size = placeholder
854 put_u2le(q
, entry
->file_name_length
);
855 put_u2le(q
, entry
->extra_field_length
);
857 put_n(q
, entry
->file_name
, entry
->file_name_length
);
858 put_n(q
, entry
->extra_field
, entry
->extra_field_length
);
859 entries_
.push_back(entry
);
864 // Try to compress a file entry in memory using the deflate algorithm.
865 // It will compress buf (of size length) unless the compressed size is bigger
866 // than the input size. The result will overwrite the content of buf and the
867 // final size is returned.
868 size_t TryDeflate(u1
*buf
, size_t length
) {
869 u1
*outbuf
= reinterpret_cast<u1
*>(malloc(length
));
872 // Initialize the z_stream strcut for reading from buf and wrinting in outbuf.
873 stream
.zalloc
= Z_NULL
;
874 stream
.zfree
= Z_NULL
;
875 stream
.opaque
= Z_NULL
;
876 stream
.total_in
= length
;
877 stream
.avail_in
= length
;
878 stream
.total_out
= length
;
879 stream
.avail_out
= length
;
880 stream
.next_in
= buf
;
881 stream
.next_out
= outbuf
;
883 if (deflateInit(&stream
, Z_DEFAULT_COMPRESSION
) != Z_OK
) {
884 // Failure to compress => return the buffer uncompressed
889 if (deflate(&stream
, Z_FINISH
) == Z_STREAM_END
) {
890 // Compression successful and fits in outbuf, let's copy the result in buf.
891 length
= stream
.total_out
;
892 memcpy(buf
, outbuf
, length
);
898 // Return the length of the resulting buffer
902 size_t OutputZipFile::WriteFileSizeInLocalFileHeader(u1
*header_ptr
,
905 size_t compressed_size
= out_length
;
907 compressed_size
= TryDeflate(q
, out_length
);
909 // compression method
910 if (compressed_size
< out_length
) {
911 put_u2le(header_ptr
, COMPRESSION_METHOD_DEFLATED
);
913 put_u2le(header_ptr
, COMPRESSION_METHOD_STORED
);
916 put_u4le(header_ptr
, compressed_size
); // compressed_size
917 put_u4le(header_ptr
, out_length
); // uncompressed_size
918 return compressed_size
;
921 int OutputZipFile::Finish() {
923 WriteCentralDirectory();
924 if (ftruncate(fd_out
, GetSize()) < 0) {
925 return error("ftruncate(fd_out, GetSize()): %s", strerror(errno
));
927 if (close(fd_out
) < 0) {
928 return error("close(fd_out): %s", strerror(errno
));
935 u1
* OutputZipFile::NewFile(const char* filename
, const u4 attr
) {
936 header_ptr
= WriteLocalFileHeader(filename
, attr
);
940 int OutputZipFile::FinishFile(size_t filelength
, bool compress
) {
941 size_t compressed_size
=
942 WriteFileSizeInLocalFileHeader(header_ptr
, filelength
, compress
);
943 entries_
.back()->compressed_length
= compressed_size
;
944 entries_
.back()->uncompressed_length
= filelength
;
945 if (compressed_size
< filelength
) {
946 entries_
.back()->compression_method
= COMPRESSION_METHOD_DEFLATED
;
948 entries_
.back()->compression_method
= COMPRESSION_METHOD_STORED
;
950 q
+= compressed_size
;
954 ZipBuilder
* ZipBuilder::Create(const char* zip_file
, u8 estimated_size
) {
955 if (estimated_size
> kMaximumOutputSize
) {
957 "Uncompressed input jar has size %llu, "
958 "which exceeds the maximum supported output size %llu.\n"
959 "Assuming that ijar will be smaller and hoping for the best.\n",
960 estimated_size
, kMaximumOutputSize
);
961 estimated_size
= kMaximumOutputSize
;
964 int fd_out
= open(zip_file
, O_CREAT
|O_RDWR
|O_TRUNC
, 0644);
969 // Create mmap-able sparse file
970 if (ftruncate(fd_out
, estimated_size
) < 0) {
974 // Ensure that any buffer overflow in JarStripper will result in
975 // SIGSEGV or SIGBUS by over-allocating beyond the end of the file.
976 size_t mmap_length
= std::min(estimated_size
+ sysconf(_SC_PAGESIZE
),
977 (u8
) std::numeric_limits
<size_t>::max());
979 void *zipdata_out
= mmap(NULL
, mmap_length
, PROT_WRITE
,
980 MAP_SHARED
, fd_out
, 0);
981 if (zipdata_out
== MAP_FAILED
) {
982 fprintf(stderr
, "output_length=%llu\n", estimated_size
);
986 return new OutputZipFile(fd_out
, (u1
*) zipdata_out
);
989 u8
ZipBuilder::EstimateSize(char **files
) {
991 // Digital signature field size = 6, End of central directory = 22, Total = 28
993 // Count the size of all the files in the input to estimate the size of the
995 for (int i
= 0; files
[i
] != NULL
; i
++) {
996 if (stat(files
[i
], &statst
) != 0) {
997 fprintf(stderr
, "File %s does not seem to exist.", files
[i
]);
1000 size
+= statst
.st_size
;
1001 // Add sizes of Zip meta data
1002 // local file header = 30 bytes
1003 // data descriptor = 12 bytes
1004 // central directory descriptor = 46 bytes
1007 // The filename is stored twice (once in the central directory
1008 // and once in the local file header).
1009 size
+= strlen(files
[i
]) * 2;
1014 } // namespace devtools_ijar