Roll src/third_party/skia ef6c50a:d0fff5b
[chromium-blink-merge.git] / third_party / ijar / zip.cc
blobcb9d1cc9d776e6b311e983fdbaec579faaf7730e
1 // Copyright 2007 Alan Donovan. All rights reserved.
2 //
3 // Author: Alan Donovan <adonovan@google.com>
4 //
5 // Licensed under the Apache License, Version 2.0 (the "License");
6 // you may not use this file except in compliance with the License.
7 // You may obtain a copy of the License at
8 //
9 // http://www.apache.org/licenses/LICENSE-2.0
11 // Unless required by applicable law or agreed to in writing, software
12 // distributed under the License is distributed on an "AS IS" BASIS,
13 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 // See the License for the specific language governing permissions and
15 // limitations under the License.
17 // zip.cc -- .zip (.jar) file reading/writing routines.
20 // See README.txt for details.
22 // See http://www.pkware.com/documents/casestudies/APPNOTE.TXT
23 // for definition of PKZIP file format.
25 #define _FILE_OFFSET_BITS 64 // Support zip files larger than 2GB
27 #include <errno.h>
28 #include <fcntl.h>
29 #include <stddef.h>
30 #include <stdint.h>
31 #include <stdarg.h>
32 #include <stdio.h>
33 #include <stdlib.h>
34 #include <string.h>
35 #include <sys/mman.h>
36 #include <unistd.h>
37 #include <limits.h>
38 #include <limits>
39 #include <vector>
41 #include "third_party/ijar/zip.h"
42 #include <zlib.h>
44 #define LOCAL_FILE_HEADER_SIGNATURE 0x04034b50
45 #define CENTRAL_FILE_HEADER_SIGNATURE 0x02014b50
46 #define END_OF_CENTRAL_DIR_SIGNATURE 0x06054b50
47 #define DATA_DESCRIPTOR_SIGNATURE 0x08074b50
49 // version to extract: 1.0 - default value from APPNOTE.TXT.
50 // Output JAR files contain no extra ZIP features, so this is enough.
51 #define ZIP_VERSION_TO_EXTRACT 10
52 #define COMPRESSION_METHOD_STORED 0 // no compression
53 #define COMPRESSION_METHOD_DEFLATED 8
55 #define GENERAL_PURPOSE_BIT_FLAG_COMPRESSED (1 << 3)
56 #define GENERAL_PURPOSE_BIT_FLAG_UTF8_ENCODED (1 << 11)
57 #define GENERAL_PURPOSE_BIT_FLAG_COMPRESSION_SPEED ((1 << 2) | (1 << 1))
58 #define GENERAL_PURPOSE_BIT_FLAG_SUPPORTED \
59 (GENERAL_PURPOSE_BIT_FLAG_COMPRESSED \
60 | GENERAL_PURPOSE_BIT_FLAG_UTF8_ENCODED \
61 | GENERAL_PURPOSE_BIT_FLAG_COMPRESSION_SPEED)
63 namespace devtools_ijar {
64 // In the absence of ZIP64 support, zip files are limited to 4GB.
65 // http://www.info-zip.org/FAQ.html#limits
66 static const u8 kMaximumOutputSize = std::numeric_limits<uint32_t>::max();
68 static bool ProcessCentralDirEntry(const u1 *&p,
69 size_t *compressed_size,
70 size_t *uncompressed_size,
71 char *filename,
72 size_t filename_size,
73 u4 *attr,
74 u4 *offset);
77 // A class representing a ZipFile for reading. Its public API is exposed
78 // using the ZipExtractor abstract class.
80 class InputZipFile : public ZipExtractor {
81 public:
82 InputZipFile(ZipExtractorProcessor *processor, int fd, off_t in_length,
83 off_t in_offset, const u1* zipdata_in, const u1* central_dir);
84 virtual ~InputZipFile();
86 virtual const char* GetError() {
87 if (errmsg[0] == 0) {
88 return NULL;
90 return errmsg;
93 virtual bool ProcessNext();
94 virtual void Reset();
95 virtual size_t GetSize() {
96 return in_length_;
99 virtual u8 CalculateOutputLength();
101 private:
102 ZipExtractorProcessor *processor;
104 int fd_in; // Input file descripor
106 // InputZipFile is responsible for maintaining the following
107 // pointers. They are allocated by the Create() method before
108 // the object is actually created using mmap.
109 const u1 * const zipdata_in_; // start of input file mmap
110 const u1 * zipdata_in_mapped_; // start of still mapped region
111 const u1 * const central_dir_; // central directory in input file
113 size_t in_length_; // size of the input file
114 size_t in_offset_; // offset the input file
116 const u1 *p; // input cursor
118 const u1* central_dir_current_; // central dir input cursor
120 // Buffer size is initially INITIAL_BUFFER_SIZE. It doubles in size every
121 // time it is found too small, until it reaches MAX_BUFFER_SIZE. If that is
122 // not enough, we bail out. We only decompress class files, so they should
123 // be smaller than 64K anyway, but we give a little leeway.
124 // MAX_BUFFER_SIZE must be bigger than the size of the biggest file in the
125 // ZIP. It is set to 128M here so we can uncompress the Bazel server with
126 // this library.
127 static const size_t INITIAL_BUFFER_SIZE = 256 * 1024; // 256K
128 static const size_t MAX_BUFFER_SIZE = 128 * 1024 * 1024;
129 static const size_t MAX_MAPPED_REGION = 32 * 1024 * 1024;
131 // These metadata fields are the fields of the ZIP header of the file being
132 // processed.
133 u2 extract_version_;
134 u2 general_purpose_bit_flag_;
135 u2 compression_method_;
136 u4 uncompressed_size_;
137 u4 compressed_size_;
138 u2 file_name_length_;
139 u2 extra_field_length_;
140 const u1 *file_name_;
141 const u1 *extra_field_;
143 // Administration of memory reserved for decompressed data. We use the same
144 // buffer for each file to avoid some malloc()/free() calls and free the
145 // memory only in the dtor. C-style memory management is used so that we
146 // can call realloc.
147 u1 *uncompressed_data_;
148 size_t uncompressed_data_allocated_;
150 // Copy of the last filename entry - Null-terminated.
151 char filename[PATH_MAX];
152 // The external file attribute field
153 u4 attr;
155 // last error
156 char errmsg[4*PATH_MAX];
158 int error(const char *fmt, ...) {
159 va_list ap;
160 va_start(ap, fmt);
161 vsnprintf(errmsg, 4*PATH_MAX, fmt, ap);
162 va_end(ap);
163 return -1;
166 // Check that at least n bytes remain in the input file, otherwise
167 // abort with an error message. "state" is the name of the field
168 // we're about to read, for diagnostics.
169 int EnsureRemaining(size_t n, const char *state) {
170 size_t in_offset = p - zipdata_in_;
171 size_t remaining = in_length_ - in_offset;
172 if (n > remaining) {
173 return error("Premature end of file (at offset %zd, state=%s); "
174 "expected %zd more bytes but found %zd.\n",
175 in_offset, state, n, remaining);
177 return 0;
180 // Read one entry from input zip file
181 int ProcessLocalFileEntry(size_t compressed_size, size_t uncompressed_size);
183 // Uncompress a file from the archive using zlib. The pointer returned
184 // is owned by InputZipFile, so it must not be freed. Advances the input
185 // cursor to the first byte after the compressed data.
186 u1* UncompressFile();
188 // Skip a file
189 int SkipFile(const bool compressed);
191 // Process a file
192 int ProcessFile(const bool compressed);
196 // A class implementing ZipBuilder that represent an open zip file for writing.
198 class OutputZipFile : public ZipBuilder {
199 public:
200 OutputZipFile(int fd, u1 * const zipdata_out) :
201 fd_out(fd),
202 zipdata_out_(zipdata_out),
203 q(zipdata_out) {
204 errmsg[0] = 0;
207 virtual const char* GetError() {
208 if (errmsg[0] == 0) {
209 return NULL;
211 return errmsg;
214 virtual ~OutputZipFile() { Finish(); }
215 virtual u1* NewFile(const char* filename, const u4 attr);
216 virtual int FinishFile(size_t filelength, bool compress = false);
217 virtual int WriteEmptyFile(const char *filename);
218 virtual size_t GetSize() {
219 return Offset(q);
221 virtual int GetNumberFiles() {
222 return entries_.size();
224 virtual int Finish();
226 private:
227 struct LocalFileEntry {
228 // Start of the local header (in the output buffer).
229 size_t local_header_offset;
231 // Sizes of the file entry
232 size_t uncompressed_length;
233 size_t compressed_length;
235 // Compression method
236 u2 compression_method;
238 // external attributes field
239 u4 external_attr;
241 // Start/length of the file_name in the local header.
242 u1 *file_name;
243 u2 file_name_length;
245 // Start/length of the extra_field in the local header.
246 const u1 *extra_field;
247 u2 extra_field_length;
250 int fd_out; // file descriptor for the output file
252 // OutputZipFile is responsible for maintaining the following
253 // pointers. They are allocated by the Create() method before
254 // the object is actually created using mmap.
255 u1 * const zipdata_out_; // start of output file mmap
256 u1 *q; // output cursor
258 u1 *header_ptr; // Current pointer to "compression method" entry.
260 // List of entries to write the central directory
261 std::vector<LocalFileEntry*> entries_;
263 // last error
264 char errmsg[4*PATH_MAX];
266 int error(const char *fmt, ...) {
267 va_list ap;
268 va_start(ap, fmt);
269 vsnprintf(errmsg, 4*PATH_MAX, fmt, ap);
270 va_end(ap);
271 return -1;
274 // Write the ZIP central directory structure for each local file
275 // entry in "entries".
276 void WriteCentralDirectory();
278 // Returns the offset of the pointer relative to the start of the
279 // output zip file.
280 size_t Offset(const u1 *const x) {
281 return x - zipdata_out_;
284 // Write ZIP file header in the output. Since the compressed size is not
285 // known in advance, it must be recorded later. This method returns a pointer
286 // to "compressed size" in the file header that should be passed to
287 // WriteFileSizeInLocalFileHeader() later.
288 u1* WriteLocalFileHeader(const char *filename, const u4 attr);
290 // Fill in the "compressed size" and "uncompressed size" fields in a local
291 // file header previously written by WriteLocalFileHeader().
292 size_t WriteFileSizeInLocalFileHeader(u1 *header_ptr, size_t out_length,
293 bool compress = false);
297 // Implementation of InputZipFile
299 bool InputZipFile::ProcessNext() {
300 // Process the next entry in the central directory. Also make sure that the
301 // content pointer is in sync.
302 size_t compressed, uncompressed;
303 u4 offset;
304 if (!ProcessCentralDirEntry(central_dir_current_, &compressed, &uncompressed,
305 filename, PATH_MAX, &attr, &offset)) {
306 return false;
309 // There might be an offset specified in the central directory that does
310 // not match the file offset, if so, correct the pointer.
311 if (offset != 0 && (p != (zipdata_in_ + in_offset_ + offset))) {
312 p = zipdata_in_ + offset;
315 if (EnsureRemaining(4, "signature") < 0) {
316 return false;
318 u4 signature = get_u4le(p);
319 if (signature == LOCAL_FILE_HEADER_SIGNATURE) {
320 if (ProcessLocalFileEntry(compressed, uncompressed) < 0) {
321 return false;
323 } else {
324 error("local file header signature for file %s not found\n", filename);
325 return false;
328 return true;
331 int InputZipFile::ProcessLocalFileEntry(
332 size_t compressed_size, size_t uncompressed_size) {
333 if (EnsureRemaining(26, "extract_version") < 0) {
334 return -1;
336 extract_version_ = get_u2le(p);
337 general_purpose_bit_flag_ = get_u2le(p);
339 if ((general_purpose_bit_flag_ & ~GENERAL_PURPOSE_BIT_FLAG_SUPPORTED) != 0) {
340 return error("Unsupported value (0x%04x) in general purpose bit flag.\n",
341 general_purpose_bit_flag_);
344 compression_method_ = get_u2le(p);
346 if (compression_method_ != COMPRESSION_METHOD_DEFLATED &&
347 compression_method_ != COMPRESSION_METHOD_STORED) {
348 return error("Unsupported compression method (%d).\n",
349 compression_method_);
352 // skip over: last_mod_file_time, last_mod_file_date, crc32
353 p += 2 + 2 + 4;
354 compressed_size_ = get_u4le(p);
355 uncompressed_size_ = get_u4le(p);
356 file_name_length_ = get_u2le(p);
357 extra_field_length_ = get_u2le(p);
359 if (EnsureRemaining(file_name_length_, "file_name") < 0) {
360 return -1;
362 file_name_ = p;
363 p += file_name_length_;
365 if (EnsureRemaining(extra_field_length_, "extra_field") < 0) {
366 return -1;
368 extra_field_ = p;
369 p += extra_field_length_;
371 bool is_compressed = compression_method_ == COMPRESSION_METHOD_DEFLATED;
373 // If the zip is compressed, compressed and uncompressed size members are
374 // zero in the local file header. If not, check that they are the same as the
375 // lengths from the central directory, otherwise, just believe the central
376 // directory
377 if (compressed_size_ == 0) {
378 compressed_size_ = compressed_size;
379 } else {
380 if (compressed_size_ != compressed_size) {
381 return error("central directory and file header inconsistent\n");
385 if (uncompressed_size_ == 0) {
386 uncompressed_size_ = uncompressed_size;
387 } else {
388 if (uncompressed_size_ != uncompressed_size) {
389 return error("central directory and file header inconsistent\n");
393 if (processor->Accept(filename, attr)) {
394 if (ProcessFile(is_compressed) < 0) {
395 return -1;
397 } else {
398 if (SkipFile(is_compressed) < 0) {
399 return -1;
403 if (general_purpose_bit_flag_ & GENERAL_PURPOSE_BIT_FLAG_COMPRESSED) {
404 // Skip the data descriptor. Some implementations do not put the signature
405 // here, so check if the next 4 bytes are a signature, and if so, skip the
406 // next 12 bytes (for CRC, compressed/uncompressed size), otherwise skip
407 // the next 8 bytes (because the value just read was the CRC).
408 u4 signature = get_u4le(p);
409 if (signature == DATA_DESCRIPTOR_SIGNATURE) {
410 p += 4 * 3;
411 } else {
412 p += 4 * 2;
416 if (p > zipdata_in_mapped_ + MAX_MAPPED_REGION) {
417 munmap(const_cast<u1 *>(zipdata_in_mapped_), MAX_MAPPED_REGION);
418 zipdata_in_mapped_ += MAX_MAPPED_REGION;
421 return 0;
424 int InputZipFile::SkipFile(const bool compressed) {
425 if (!compressed) {
426 // In this case, compressed_size_ == uncompressed_size_ (since the file is
427 // uncompressed), so we can use either.
428 if (compressed_size_ != uncompressed_size_) {
429 return error("compressed size != uncompressed size, although the file "
430 "is uncompressed.\n");
434 if (EnsureRemaining(compressed_size_, "file_data") < 0) {
435 return -1;
437 p += compressed_size_;
438 return 0;
441 u1* InputZipFile::UncompressFile() {
442 size_t in_offset = p - zipdata_in_;
443 size_t remaining = in_length_ - in_offset;
444 z_stream stream;
446 stream.zalloc = Z_NULL;
447 stream.zfree = Z_NULL;
448 stream.opaque = Z_NULL;
449 stream.avail_in = remaining;
450 stream.next_in = (Bytef *) p;
452 int ret = inflateInit2(&stream, -MAX_WBITS);
453 if (ret != Z_OK) {
454 error("inflateInit: %d\n", ret);
455 return NULL;
458 int uncompressed_until_now = 0;
460 while (true) {
461 stream.avail_out = uncompressed_data_allocated_ - uncompressed_until_now;
462 stream.next_out = uncompressed_data_ + uncompressed_until_now;
463 int old_avail_out = stream.avail_out;
465 ret = inflate(&stream, Z_SYNC_FLUSH);
466 int uncompressed_now = old_avail_out - stream.avail_out;
467 uncompressed_until_now += uncompressed_now;
469 switch (ret) {
470 case Z_STREAM_END: {
471 // zlib said that there is no more data to decompress.
473 u1 *new_p = reinterpret_cast<u1*>(stream.next_in);
474 compressed_size_ = new_p - p;
475 uncompressed_size_ = uncompressed_until_now;
476 p = new_p;
477 inflateEnd(&stream);
478 return uncompressed_data_;
481 case Z_OK: {
482 // zlib said that there is no more room in the buffer allocated for
483 // the decompressed data. Enlarge that buffer and try again.
485 if (uncompressed_data_allocated_ == MAX_BUFFER_SIZE) {
486 error("ijar does not support decompressing files "
487 "larger than %dMB.\n",
488 (int) (MAX_BUFFER_SIZE/(1024*1024)));
489 return NULL;
492 uncompressed_data_allocated_ *= 2;
493 if (uncompressed_data_allocated_ > MAX_BUFFER_SIZE) {
494 uncompressed_data_allocated_ = MAX_BUFFER_SIZE;
497 uncompressed_data_ = reinterpret_cast<u1*>(
498 realloc(uncompressed_data_, uncompressed_data_allocated_));
499 break;
502 case Z_DATA_ERROR:
503 case Z_BUF_ERROR:
504 case Z_STREAM_ERROR:
505 case Z_NEED_DICT:
506 default: {
507 error("zlib returned error code %d during inflate.\n", ret);
508 return NULL;
514 int InputZipFile::ProcessFile(const bool compressed) {
515 const u1 *file_data;
516 if (compressed) {
517 file_data = UncompressFile();
518 if (file_data == NULL) {
519 return -1;
521 } else {
522 // In this case, compressed_size_ == uncompressed_size_ (since the file is
523 // uncompressed), so we can use either.
524 if (compressed_size_ != uncompressed_size_) {
525 return error("compressed size != uncompressed size, although the file "
526 "is uncompressed.\n");
529 if (EnsureRemaining(compressed_size_, "file_data") < 0) {
530 return -1;
532 file_data = p;
533 p += compressed_size_;
535 processor->Process(filename, attr, file_data, uncompressed_size_);
536 return 0;
540 // Reads and returns some metadata of the next file from the central directory:
541 // - compressed size
542 // - uncompressed size
543 // - whether the entry is a class file (to be included in the output).
544 // Precondition: p points to the beginning of an entry in the central dir
545 // Postcondition: p points to the beginning of the next entry in the central dir
546 // Returns true if the central directory contains another file and false if not.
547 // Of course, in the latter case, the size output variables are not changed.
548 // Note that the central directory is always followed by another data structure
549 // that has a signature, so parsing it this way is safe.
550 static bool ProcessCentralDirEntry(
551 const u1 *&p, size_t *compressed_size, size_t *uncompressed_size,
552 char *filename, size_t filename_size, u4 *attr, u4 *offset) {
553 u4 signature = get_u4le(p);
554 if (signature != CENTRAL_FILE_HEADER_SIGNATURE) {
555 return false;
558 p += 16; // skip to 'compressed size' field
559 *compressed_size = get_u4le(p);
560 *uncompressed_size = get_u4le(p);
561 u2 file_name_length = get_u2le(p);
562 u2 extra_field_length = get_u2le(p);
563 u2 file_comment_length = get_u2le(p);
564 p += 4; // skip to external file attributes field
565 *attr = get_u4le(p);
566 *offset = get_u4le(p);
568 size_t len = (file_name_length < filename_size)
569 ? file_name_length
570 : (filename_size - 1);
571 memcpy(reinterpret_cast<void*>(filename), p, len);
572 filename[len] = 0;
574 p += file_name_length;
575 p += extra_field_length;
576 p += file_comment_length;
577 return true;
580 // Gives a maximum bound on the size of the interface JAR. Basically, adds
581 // the difference between the compressed and uncompressed sizes to the size
582 // of the input file.
583 u8 InputZipFile::CalculateOutputLength() {
584 const u1* current = central_dir_;
586 u8 compressed_size = 0;
587 u8 uncompressed_size = 0;
588 u8 skipped_compressed_size = 0;
589 u4 attr;
590 u4 offset;
591 char filename[PATH_MAX];
593 while (true) {
594 size_t file_compressed, file_uncompressed;
595 if (!ProcessCentralDirEntry(current,
596 &file_compressed, &file_uncompressed,
597 filename, PATH_MAX, &attr, &offset)) {
598 break;
601 if (processor->Accept(filename, attr)) {
602 compressed_size += (u8) file_compressed;
603 uncompressed_size += (u8) file_uncompressed;
604 } else {
605 skipped_compressed_size += file_compressed;
609 // The worst case is when the output is simply the input uncompressed. The
610 // metadata in the zip file will stay the same, so the file will grow by the
611 // difference between the compressed and uncompressed sizes.
612 return (u8) in_length_ - skipped_compressed_size
613 + (uncompressed_size - compressed_size);
616 // Given the data in the zip file, returns the offset of the central directory
617 // and the number of files contained in it.
618 bool FindZipCentralDirectory(const u1* bytes, size_t in_length,
619 u4* offset, const u1** central_dir) {
620 static const int MAX_COMMENT_LENGTH = 0xffff;
621 static const int CENTRAL_DIR_LOCATOR_SIZE = 22;
622 // Maximum distance of start of central dir locator from end of file
623 static const int MAX_DELTA = MAX_COMMENT_LENGTH + CENTRAL_DIR_LOCATOR_SIZE;
624 const u1* last_pos_to_check = in_length < MAX_DELTA
625 ? bytes
626 : bytes + (in_length - MAX_DELTA);
627 const u1* current;
628 bool found = false;
630 for (current = bytes + in_length - CENTRAL_DIR_LOCATOR_SIZE;
631 current >= last_pos_to_check;
632 current-- ) {
633 const u1* p = current;
634 if (get_u4le(p) != END_OF_CENTRAL_DIR_SIGNATURE) {
635 continue;
638 p += 16; // skip to comment length field
639 u2 comment_length = get_u2le(p);
641 // Does the comment go exactly till the end of the file?
642 if (current + comment_length + CENTRAL_DIR_LOCATOR_SIZE
643 != bytes + in_length) {
644 continue;
647 // Hooray, we found it!
648 found = true;
649 break;
652 if (!found) {
653 fprintf(stderr, "file is invalid or corrupted (missing end of central "
654 "directory record)\n");
655 return false;
658 const u1* end_of_central_dir = current;
659 get_u4le(current); // central directory locator signature, already checked
660 u2 number_of_this_disk = get_u2le(current);
661 u2 disk_with_central_dir = get_u2le(current);
662 u2 central_dir_entries_on_this_disk = get_u2le(current);
663 u2 central_dir_entries = get_u2le(current);
664 u4 central_dir_size = get_u4le(current);
665 u4 central_dir_offset = get_u4le(current);
666 u2 file_comment_length = get_u2le(current);
667 current += file_comment_length; // set current to the end of the central dir
669 if (number_of_this_disk != 0
670 || disk_with_central_dir != 0
671 || central_dir_entries_on_this_disk != central_dir_entries) {
672 fprintf(stderr, "multi-disk JAR files are not supported\n");
673 return false;
676 // Do not change output values before determining that they are OK.
677 *offset = central_dir_offset;
678 // Central directory start can then be used to determine the actual
679 // starts of the zip file (which can be different in case of a non-zip
680 // header like for auto-extractable binaries).
681 *central_dir = end_of_central_dir - central_dir_size;
682 return true;
685 void InputZipFile::Reset() {
686 central_dir_current_ = central_dir_;
687 zipdata_in_mapped_ = zipdata_in_;
688 p = zipdata_in_ + in_offset_;
691 int ZipExtractor::ProcessAll() {
692 while (ProcessNext()) {}
693 if (GetError() != NULL) {
694 return -1;
696 return 0;
699 ZipExtractor* ZipExtractor::Create(const char* filename,
700 ZipExtractorProcessor *processor) {
701 int fd_in = open(filename, O_RDONLY);
702 if (fd_in < 0) {
703 return NULL;
706 off_t length = lseek(fd_in, 0, SEEK_END);
707 if (length < 0) {
708 return NULL;
711 void *zipdata_in = mmap(NULL, length, PROT_READ, MAP_PRIVATE, fd_in, 0);
712 if (zipdata_in == MAP_FAILED) {
713 return NULL;
716 u4 central_dir_offset;
717 const u1 *central_dir = NULL;
719 if (!devtools_ijar::FindZipCentralDirectory(
720 static_cast<const u1*>(zipdata_in), length,
721 &central_dir_offset, &central_dir)) {
722 errno = EIO; // we don't really have a good error number
723 return NULL;
725 const u1 *zipdata_start = static_cast<const u1*>(zipdata_in);
726 off_t offset = - static_cast<off_t>(zipdata_start
727 + central_dir_offset
728 - central_dir);
730 return new InputZipFile(processor, fd_in, length, offset,
731 zipdata_start, central_dir);
734 InputZipFile::InputZipFile(ZipExtractorProcessor *processor, int fd,
735 off_t in_length, off_t in_offset,
736 const u1* zipdata_in, const u1* central_dir)
737 : processor(processor), fd_in(fd),
738 zipdata_in_(zipdata_in), zipdata_in_mapped_(zipdata_in),
739 central_dir_(central_dir), in_length_(in_length), in_offset_(in_offset),
740 p(zipdata_in + in_offset), central_dir_current_(central_dir) {
741 uncompressed_data_allocated_ = INITIAL_BUFFER_SIZE;
742 uncompressed_data_ =
743 reinterpret_cast<u1*>(malloc(uncompressed_data_allocated_));
744 errmsg[0] = 0;
747 InputZipFile::~InputZipFile() {
748 free(uncompressed_data_);
749 close(fd_in);
754 // Implementation of OutputZipFile
756 int OutputZipFile::WriteEmptyFile(const char *filename) {
757 const u1* file_name = (const u1*) filename;
758 size_t file_name_length = strlen(filename);
760 LocalFileEntry *entry = new LocalFileEntry;
761 entry->local_header_offset = Offset(q);
762 entry->external_attr = 0;
764 // Output the ZIP local_file_header:
765 put_u4le(q, LOCAL_FILE_HEADER_SIGNATURE);
766 put_u2le(q, 10); // extract_version
767 put_u2le(q, 0); // general_purpose_bit_flag
768 put_u2le(q, 0); // compression_method
769 put_u2le(q, 0); // last_mod_file_time
770 put_u2le(q, 0); // last_mod_file_date
771 put_u4le(q, 0); // crc32
772 put_u4le(q, 0); // compressed_size
773 put_u4le(q, 0); // uncompressed_size
774 put_u2le(q, file_name_length);
775 put_u2le(q, 0); // extra_field_length
776 put_n(q, file_name, file_name_length);
778 entry->file_name_length = file_name_length;
779 entry->extra_field_length = 0;
780 entry->compressed_length = 0;
781 entry->uncompressed_length = 0;
782 entry->compression_method = 0;
783 entry->extra_field = (const u1 *)"";
784 entry->file_name = (u1*) strdup((const char *) file_name);
785 entries_.push_back(entry);
787 return 0;
790 void OutputZipFile::WriteCentralDirectory() {
791 // central directory:
792 const u1 *central_directory_start = q;
793 for (size_t ii = 0; ii < entries_.size(); ++ii) {
794 LocalFileEntry *entry = entries_[ii];
795 put_u4le(q, CENTRAL_FILE_HEADER_SIGNATURE);
796 put_u2le(q, 0); // version made by
798 put_u2le(q, ZIP_VERSION_TO_EXTRACT); // version to extract
799 put_u2le(q, 0); // general purpose bit flag
800 put_u2le(q, entry->compression_method); // compression method:
801 put_u2le(q, 0); // last_mod_file_time
802 put_u2le(q, 0); // last_mod_file_date
803 put_u4le(q, 0); // crc32 (jar/javac tools don't care)
804 put_u4le(q, entry->compressed_length); // compressed_size
805 put_u4le(q, entry->uncompressed_length); // uncompressed_size
806 put_u2le(q, entry->file_name_length);
807 put_u2le(q, entry->extra_field_length);
809 put_u2le(q, 0); // file comment length
810 put_u2le(q, 0); // disk number start
811 put_u2le(q, 0); // internal file attributes
812 put_u4le(q, entry->external_attr); // external file attributes
813 // relative offset of local header:
814 put_u4le(q, entry->local_header_offset);
816 put_n(q, entry->file_name, entry->file_name_length);
817 put_n(q, entry->extra_field, entry->extra_field_length);
819 u4 central_directory_size = q - central_directory_start;
821 put_u4le(q, END_OF_CENTRAL_DIR_SIGNATURE);
822 put_u2le(q, 0); // number of this disk
823 put_u2le(q, 0); // number of the disk with the start of the central directory
824 put_u2le(q, entries_.size()); // # central dir entries on this disk
825 put_u2le(q, entries_.size()); // total # entries in the central directory
826 put_u4le(q, central_directory_size); // size of the central directory
827 put_u4le(q, Offset(central_directory_start)); // offset of start of central
828 // directory wrt starting disk
829 put_u2le(q, 0); // .ZIP file comment length
832 u1* OutputZipFile::WriteLocalFileHeader(const char* filename, const u4 attr) {
833 off_t file_name_length_ = strlen(filename);
834 LocalFileEntry *entry = new LocalFileEntry;
835 entry->local_header_offset = Offset(q);
836 entry->file_name_length = file_name_length_;
837 entry->file_name = new u1[file_name_length_];
838 entry->external_attr = attr;
839 memcpy(entry->file_name, filename, file_name_length_);
840 entry->extra_field_length = 0;
841 entry->extra_field = (const u1 *)"";
843 // Output the ZIP local_file_header:
844 put_u4le(q, LOCAL_FILE_HEADER_SIGNATURE);
845 put_u2le(q, ZIP_VERSION_TO_EXTRACT); // version to extract
846 put_u2le(q, 0); // general purpose bit flag
847 u1 *header_ptr = q;
848 put_u2le(q, COMPRESSION_METHOD_STORED); // compression method = placeholder
849 put_u2le(q, 0); // last_mod_file_time
850 put_u2le(q, 0); // last_mod_file_date
851 put_u4le(q, 0); // crc32 (jar/javac tools don't care)
852 put_u4le(q, 0); // compressed_size = placeholder
853 put_u4le(q, 0); // uncompressed_size = placeholder
854 put_u2le(q, entry->file_name_length);
855 put_u2le(q, entry->extra_field_length);
857 put_n(q, entry->file_name, entry->file_name_length);
858 put_n(q, entry->extra_field, entry->extra_field_length);
859 entries_.push_back(entry);
861 return header_ptr;
864 // Try to compress a file entry in memory using the deflate algorithm.
865 // It will compress buf (of size length) unless the compressed size is bigger
866 // than the input size. The result will overwrite the content of buf and the
867 // final size is returned.
868 size_t TryDeflate(u1 *buf, size_t length) {
869 u1 *outbuf = reinterpret_cast<u1 *>(malloc(length));
870 z_stream stream;
872 // Initialize the z_stream strcut for reading from buf and wrinting in outbuf.
873 stream.zalloc = Z_NULL;
874 stream.zfree = Z_NULL;
875 stream.opaque = Z_NULL;
876 stream.total_in = length;
877 stream.avail_in = length;
878 stream.total_out = length;
879 stream.avail_out = length;
880 stream.next_in = buf;
881 stream.next_out = outbuf;
883 if (deflateInit(&stream, Z_DEFAULT_COMPRESSION) != Z_OK) {
884 // Failure to compress => return the buffer uncompressed
885 free(outbuf);
886 return length;
889 if (deflate(&stream, Z_FINISH) == Z_STREAM_END) {
890 // Compression successful and fits in outbuf, let's copy the result in buf.
891 length = stream.total_out;
892 memcpy(buf, outbuf, length);
895 deflateEnd(&stream);
896 free(outbuf);
898 // Return the length of the resulting buffer
899 return length;
902 size_t OutputZipFile::WriteFileSizeInLocalFileHeader(u1 *header_ptr,
903 size_t out_length,
904 bool compress) {
905 size_t compressed_size = out_length;
906 if (compress) {
907 compressed_size = TryDeflate(q, out_length);
909 // compression method
910 if (compressed_size < out_length) {
911 put_u2le(header_ptr, COMPRESSION_METHOD_DEFLATED);
912 } else {
913 put_u2le(header_ptr, COMPRESSION_METHOD_STORED);
915 header_ptr += 8;
916 put_u4le(header_ptr, compressed_size); // compressed_size
917 put_u4le(header_ptr, out_length); // uncompressed_size
918 return compressed_size;
921 int OutputZipFile::Finish() {
922 if (fd_out > 0) {
923 WriteCentralDirectory();
924 if (ftruncate(fd_out, GetSize()) < 0) {
925 return error("ftruncate(fd_out, GetSize()): %s", strerror(errno));
927 if (close(fd_out) < 0) {
928 return error("close(fd_out): %s", strerror(errno));
930 fd_out = -1;
932 return 0;
935 u1* OutputZipFile::NewFile(const char* filename, const u4 attr) {
936 header_ptr = WriteLocalFileHeader(filename, attr);
937 return q;
940 int OutputZipFile::FinishFile(size_t filelength, bool compress) {
941 size_t compressed_size =
942 WriteFileSizeInLocalFileHeader(header_ptr, filelength, compress);
943 entries_.back()->compressed_length = compressed_size;
944 entries_.back()->uncompressed_length = filelength;
945 if (compressed_size < filelength) {
946 entries_.back()->compression_method = COMPRESSION_METHOD_DEFLATED;
947 } else {
948 entries_.back()->compression_method = COMPRESSION_METHOD_STORED;
950 q += compressed_size;
951 return 0;
954 ZipBuilder* ZipBuilder::Create(const char* zip_file, u8 estimated_size) {
955 if (estimated_size > kMaximumOutputSize) {
956 fprintf(stderr,
957 "Uncompressed input jar has size %llu, "
958 "which exceeds the maximum supported output size %llu.\n"
959 "Assuming that ijar will be smaller and hoping for the best.\n",
960 estimated_size, kMaximumOutputSize);
961 estimated_size = kMaximumOutputSize;
964 int fd_out = open(zip_file, O_CREAT|O_RDWR|O_TRUNC, 0644);
965 if (fd_out < 0) {
966 return NULL;
969 // Create mmap-able sparse file
970 if (ftruncate(fd_out, estimated_size) < 0) {
971 return NULL;
974 // Ensure that any buffer overflow in JarStripper will result in
975 // SIGSEGV or SIGBUS by over-allocating beyond the end of the file.
976 size_t mmap_length = std::min(estimated_size + sysconf(_SC_PAGESIZE),
977 (u8) std::numeric_limits<size_t>::max());
979 void *zipdata_out = mmap(NULL, mmap_length, PROT_WRITE,
980 MAP_SHARED, fd_out, 0);
981 if (zipdata_out == MAP_FAILED) {
982 fprintf(stderr, "output_length=%llu\n", estimated_size);
983 return NULL;
986 return new OutputZipFile(fd_out, (u1*) zipdata_out);
989 u8 ZipBuilder::EstimateSize(char **files) {
990 struct stat statst;
991 // Digital signature field size = 6, End of central directory = 22, Total = 28
992 u8 size = 28;
993 // Count the size of all the files in the input to estimate the size of the
994 // output.
995 for (int i = 0; files[i] != NULL; i++) {
996 if (stat(files[i], &statst) != 0) {
997 fprintf(stderr, "File %s does not seem to exist.", files[i]);
998 return 0;
1000 size += statst.st_size;
1001 // Add sizes of Zip meta data
1002 // local file header = 30 bytes
1003 // data descriptor = 12 bytes
1004 // central directory descriptor = 46 bytes
1005 // Total: 88bytes
1006 size += 88;
1007 // The filename is stored twice (once in the central directory
1008 // and once in the local file header).
1009 size += strlen(files[i]) * 2;
1011 return size;
1014 } // namespace devtools_ijar