1 // Copyright (c) 2011 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 #ifndef THIRD_PARTY_ZLIB_GOOGLE_ZIP_READER_H_
5 #define THIRD_PARTY_ZLIB_GOOGLE_ZIP_READER_H_
9 #include "base/basictypes.h"
10 #include "base/callback.h"
11 #include "base/files/file.h"
12 #include "base/files/file_path.h"
13 #include "base/files/file_util.h"
14 #include "base/memory/scoped_ptr.h"
15 #include "base/memory/weak_ptr.h"
16 #include "base/time/time.h"
18 #if defined(USE_SYSTEM_MINIZIP)
19 #include <minizip/unzip.h>
21 #include "third_party/zlib/contrib/minizip/unzip.h"
26 // This class is used for reading zip files. A typical use case of this
27 // class is to scan entries in a zip file and extract them. The code will
31 // reader.Open(zip_file_path);
32 // while (reader.HasMore()) {
33 // reader.OpenCurrentEntryInZip();
34 // reader.ExtractCurrentEntryToDirectory(output_directory_path);
35 // reader.AdvanceToNextEntry();
38 // For simplicity, error checking is omitted in the example code above. The
39 // production code should check return values from all of these functions.
41 // This calls can also be used for random access of contents in a zip file
42 // using LocateAndOpenEntry().
46 // A callback that is called when the operation is successful.
47 typedef base::Closure SuccessCallback
;
48 // A callback that is called when the operation fails.
49 typedef base::Closure FailureCallback
;
50 // A callback that is called periodically during the operation with the number
51 // of bytes that have been processed so far.
52 typedef base::Callback
<void(int64
)> ProgressCallback
;
54 // This class represents information of an entry (file or directory) in
58 EntryInfo(const std::string
& filename_in_zip
,
59 const unz_file_info
& raw_file_info
);
61 // Returns the file path. The path is usually relative like
62 // "foo/bar.txt", but if it's absolute, is_unsafe() returns true.
63 const base::FilePath
& file_path() const { return file_path_
; }
65 // Returns the size of the original file (i.e. after uncompressed).
66 // Returns 0 if the entry is a directory.
67 // Note: this value should not be trusted, because it is stored as metadata
68 // in the zip archive and can be different from the real uncompressed size.
69 int64
original_size() const { return original_size_
; }
71 // Returns the last modified time. If the time stored in the zip file was
72 // not valid, the unix epoch will be returned.
74 // The time stored in the zip archive uses the MS-DOS date and time format.
75 // http://msdn.microsoft.com/en-us/library/ms724247(v=vs.85).aspx
76 // As such the following limitations apply:
77 // * only years from 1980 to 2107 can be represented.
78 // * the time stamp has a 2 second resolution.
79 // * there's no timezone information, so the time is interpreted as local.
80 base::Time
last_modified() const { return last_modified_
; }
82 // Returns true if the entry is a directory.
83 bool is_directory() const { return is_directory_
; }
85 // Returns true if the entry is unsafe, like having ".." or invalid
86 // UTF-8 characters in its file name, or the file path is absolute.
87 bool is_unsafe() const { return is_unsafe_
; }
90 const base::FilePath file_path_
;
92 base::Time last_modified_
;
95 DISALLOW_COPY_AND_ASSIGN(EntryInfo
);
101 // Opens the zip file specified by |zip_file_path|. Returns true on
103 bool Open(const base::FilePath
& zip_file_path
);
105 // Opens the zip file referred to by the platform file |zip_fd|, without
106 // taking ownership of |zip_fd|. Returns true on success.
107 bool OpenFromPlatformFile(base::PlatformFile zip_fd
);
109 // Opens the zip data stored in |data|. This class uses a weak reference to
110 // the given sring while extracting files, i.e. the caller should keep the
111 // string until it finishes extracting files.
112 bool OpenFromString(const std::string
& data
);
114 // Closes the currently opened zip file. This function is called in the
115 // destructor of the class, so you usually don't need to call this.
118 // Returns true if there is at least one entry to read. This function is
119 // used to scan entries with AdvanceToNextEntry(), like:
121 // while (reader.HasMore()) {
122 // // Do something with the current file here.
123 // reader.AdvanceToNextEntry();
127 // Advances the next entry. Returns true on success.
128 bool AdvanceToNextEntry();
130 // Opens the current entry in the zip file. On success, returns true and
131 // updates the the current entry state (i.e. current_entry_info() is
132 // updated). This function should be called before operations over the
133 // current entry like ExtractCurrentEntryToFile().
135 // Note that there is no CloseCurrentEntryInZip(). The the current entry
136 // state is reset automatically as needed.
137 bool OpenCurrentEntryInZip();
139 // Locates an entry in the zip file and opens it. Returns true on
140 // success. This function internally calls OpenCurrentEntryInZip() on
141 // success. On failure, current_entry_info() becomes NULL.
142 bool LocateAndOpenEntry(const base::FilePath
& path_in_zip
);
144 // Extracts the current entry to the given output file path. If the
145 // current file is a directory, just creates a directory
146 // instead. Returns true on success. OpenCurrentEntryInZip() must be
147 // called beforehand.
149 // This function preserves the timestamp of the original entry. If that
150 // timestamp is not valid, the timestamp will be set to the current time.
151 bool ExtractCurrentEntryToFilePath(const base::FilePath
& output_file_path
);
153 // Asynchronously extracts the current entry to the given output file path.
154 // If the current entry is a directory it just creates the directory
155 // synchronously instead. OpenCurrentEntryInZip() must be called beforehand.
156 // success_callback will be called on success and failure_callback will be
157 // called on failure. progress_callback will be called at least once.
158 // Callbacks will be posted to the current MessageLoop in-order.
159 void ExtractCurrentEntryToFilePathAsync(
160 const base::FilePath
& output_file_path
,
161 const SuccessCallback
& success_callback
,
162 const FailureCallback
& failure_callback
,
163 const ProgressCallback
& progress_callback
);
165 // Extracts the current entry to the given output directory path using
166 // ExtractCurrentEntryToFilePath(). Sub directories are created as needed
167 // based on the file path of the current entry. For example, if the file
168 // path in zip is "foo/bar.txt", and the output directory is "output",
169 // "output/foo/bar.txt" will be created.
171 // Returns true on success. OpenCurrentEntryInZip() must be called
174 // This function preserves the timestamp of the original entry. If that
175 // timestamp is not valid, the timestamp will be set to the current time.
176 bool ExtractCurrentEntryIntoDirectory(
177 const base::FilePath
& output_directory_path
);
179 #if defined(OS_POSIX)
180 // Extracts the current entry by writing directly to a file descriptor.
181 // Does not close the file descriptor. Returns true on success.
182 bool ExtractCurrentEntryToFd(int fd
);
185 // Extracts the current entry into memory. If the current entry is a directory
186 // the |output| parameter is set to the empty string. If the current entry is
187 // a file, the |output| parameter is filled with its contents. Returns true on
188 // success. OpenCurrentEntryInZip() must be called beforehand.
189 // Note: the |output| parameter can be filled with a big amount of data, avoid
190 // passing it around by value, but by reference or pointer.
191 // Note: the value returned by EntryInfo::original_size() cannot be
192 // trusted, so the real size of the uncompressed contents can be different.
193 // Use max_read_bytes to limit the ammount of memory used to carry the entry.
194 // If the real size of the uncompressed data is bigger than max_read_bytes
195 // then false is returned. |max_read_bytes| must be non-zero.
196 bool ExtractCurrentEntryToString(
197 size_t max_read_bytes
,
198 std::string
* output
) const;
200 // Returns the current entry info. Returns NULL if the current entry is
201 // not yet opened. OpenCurrentEntryInZip() must be called beforehand.
202 EntryInfo
* current_entry_info() const {
203 return current_entry_info_
.get();
206 // Returns the number of entries in the zip file.
207 // Open() must be called beforehand.
208 int num_entries() const { return num_entries_
; }
211 // Common code used both in Open and OpenFromFd.
214 // Resets the internal state.
217 // Extracts a chunk of the file to the target. Will post a task for the next
218 // chunk and success/failure/progress callbacks as necessary.
219 void ExtractChunk(base::File target_file
,
220 const SuccessCallback
& success_callback
,
221 const FailureCallback
& failure_callback
,
222 const ProgressCallback
& progress_callback
,
228 scoped_ptr
<EntryInfo
> current_entry_info_
;
230 base::WeakPtrFactory
<ZipReader
> weak_ptr_factory_
;
232 DISALLOW_COPY_AND_ASSIGN(ZipReader
);
237 #endif // THIRD_PARTY_ZLIB_GOOGLE_ZIP_READER_H_