1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
5 #include "third_party/zlib/google/zip_reader.h"
8 #include "base/files/file.h"
9 #include "base/logging.h"
10 #include "base/message_loop/message_loop.h"
11 #include "base/strings/string_util.h"
12 #include "base/strings/utf_string_conversions.h"
13 #include "third_party/zlib/google/zip_internal.h"
15 #if defined(USE_SYSTEM_MINIZIP)
16 #include <minizip/unzip.h>
18 #include "third_party/zlib/contrib/minizip/unzip.h"
20 #include "third_party/zlib/contrib/minizip/iowin32.h"
21 #endif // defined(OS_WIN)
22 #endif // defined(USE_SYSTEM_MINIZIP)
26 // TODO(satorux): The implementation assumes that file names in zip files
27 // are encoded in UTF-8. This is true for zip files created by Zip()
28 // function in zip.h, but not true for user-supplied random zip files.
29 ZipReader::EntryInfo::EntryInfo(const std::string
& file_name_in_zip
,
30 const unz_file_info
& raw_file_info
)
31 : file_path_(base::FilePath::FromUTF8Unsafe(file_name_in_zip
)),
32 is_directory_(false) {
33 original_size_
= raw_file_info
.uncompressed_size
;
35 // Directory entries in zip files end with "/".
36 is_directory_
= EndsWith(file_name_in_zip
, "/", false);
38 // Check the file name here for directory traversal issues.
39 is_unsafe_
= file_path_
.ReferencesParent();
41 // We also consider that the file name is unsafe, if it's invalid UTF-8.
42 base::string16 file_name_utf16
;
43 if (!base::UTF8ToUTF16(file_name_in_zip
.data(), file_name_in_zip
.size(),
48 // We also consider that the file name is unsafe, if it's absolute.
49 // On Windows, IsAbsolute() returns false for paths starting with "/".
50 if (file_path_
.IsAbsolute() || StartsWithASCII(file_name_in_zip
, "/", false))
53 // Construct the last modified time. The timezone info is not present in
54 // zip files, so we construct the time as local time.
55 base::Time::Exploded exploded_time
= {}; // Zero-clear.
56 exploded_time
.year
= raw_file_info
.tmu_date
.tm_year
;
57 // The month in zip file is 0-based, whereas ours is 1-based.
58 exploded_time
.month
= raw_file_info
.tmu_date
.tm_mon
+ 1;
59 exploded_time
.day_of_month
= raw_file_info
.tmu_date
.tm_mday
;
60 exploded_time
.hour
= raw_file_info
.tmu_date
.tm_hour
;
61 exploded_time
.minute
= raw_file_info
.tmu_date
.tm_min
;
62 exploded_time
.second
= raw_file_info
.tmu_date
.tm_sec
;
63 exploded_time
.millisecond
= 0;
64 if (exploded_time
.HasValidValues()) {
65 last_modified_
= base::Time::FromLocalExploded(exploded_time
);
67 // Use Unix time epoch if the time stamp data is invalid.
68 last_modified_
= base::Time::UnixEpoch();
72 ZipReader::ZipReader()
73 : weak_ptr_factory_(this) {
77 ZipReader::~ZipReader() {
81 bool ZipReader::Open(const base::FilePath
& zip_file_path
) {
84 // Use of "Unsafe" function does not look good, but there is no way to do
85 // this safely on Linux. See file_util.h for details.
86 zip_file_
= internal::OpenForUnzipping(zip_file_path
.AsUTF8Unsafe());
91 return OpenInternal();
94 bool ZipReader::OpenFromPlatformFile(base::PlatformFile zip_fd
) {
98 zip_file_
= internal::OpenFdForUnzipping(zip_fd
);
100 zip_file_
= internal::OpenHandleForUnzipping(zip_fd
);
106 return OpenInternal();
109 bool ZipReader::OpenFromString(const std::string
& data
) {
110 zip_file_
= internal::PrepareMemoryForUnzipping(data
);
113 return OpenInternal();
116 void ZipReader::Close() {
123 bool ZipReader::HasMore() {
124 return !reached_end_
;
127 bool ZipReader::AdvanceToNextEntry() {
130 // Should not go further if we already reached the end.
134 unz_file_pos position
= {};
135 if (unzGetFilePos(zip_file_
, &position
) != UNZ_OK
)
137 const int current_entry_index
= position
.num_of_file
;
138 // If we are currently at the last entry, then the next position is the
139 // end of the zip file, so mark that we reached the end.
140 if (current_entry_index
+ 1 == num_entries_
) {
143 DCHECK_LT(current_entry_index
+ 1, num_entries_
);
144 if (unzGoToNextFile(zip_file_
) != UNZ_OK
) {
148 current_entry_info_
.reset();
152 bool ZipReader::OpenCurrentEntryInZip() {
155 unz_file_info raw_file_info
= {};
156 char raw_file_name_in_zip
[internal::kZipMaxPath
] = {};
157 const int result
= unzGetCurrentFileInfo(zip_file_
,
159 raw_file_name_in_zip
,
160 sizeof(raw_file_name_in_zip
) - 1,
162 0, // extraFieldBufferSize.
164 0); // commentBufferSize.
165 if (result
!= UNZ_OK
)
167 if (raw_file_name_in_zip
[0] == '\0')
169 current_entry_info_
.reset(
170 new EntryInfo(raw_file_name_in_zip
, raw_file_info
));
174 bool ZipReader::LocateAndOpenEntry(const base::FilePath
& path_in_zip
) {
177 current_entry_info_
.reset();
178 reached_end_
= false;
179 const int kDefaultCaseSensivityOfOS
= 0;
180 const int result
= unzLocateFile(zip_file_
,
181 path_in_zip
.AsUTF8Unsafe().c_str(),
182 kDefaultCaseSensivityOfOS
);
183 if (result
!= UNZ_OK
)
186 // Then Open the entry.
187 return OpenCurrentEntryInZip();
190 bool ZipReader::ExtractCurrentEntryToFilePath(
191 const base::FilePath
& output_file_path
) {
194 // If this is a directory, just create it and return.
195 if (current_entry_info()->is_directory())
196 return base::CreateDirectory(output_file_path
);
198 const int open_result
= unzOpenCurrentFile(zip_file_
);
199 if (open_result
!= UNZ_OK
)
202 // We can't rely on parent directory entries being specified in the
203 // zip, so we make sure they are created.
204 base::FilePath output_dir_path
= output_file_path
.DirName();
205 if (!base::CreateDirectory(output_dir_path
))
208 base::File
file(output_file_path
,
209 base::File::FLAG_CREATE_ALWAYS
| base::File::FLAG_WRITE
);
213 bool success
= true; // This becomes false when something bad happens.
215 char buf
[internal::kZipBufSize
];
216 const int num_bytes_read
= unzReadCurrentFile(zip_file_
, buf
,
217 internal::kZipBufSize
);
218 if (num_bytes_read
== 0) {
219 // Reached the end of the file.
221 } else if (num_bytes_read
< 0) {
222 // If num_bytes_read < 0, then it's a specific UNZ_* error code.
225 } else if (num_bytes_read
> 0) {
226 // Some data is read. Write it to the output file.
227 if (num_bytes_read
!= file
.WriteAtCurrentPos(buf
, num_bytes_read
)) {
235 unzCloseCurrentFile(zip_file_
);
237 if (current_entry_info()->last_modified() != base::Time::UnixEpoch())
238 base::TouchFile(output_file_path
,
240 current_entry_info()->last_modified());
245 void ZipReader::ExtractCurrentEntryToFilePathAsync(
246 const base::FilePath
& output_file_path
,
247 const SuccessCallback
& success_callback
,
248 const FailureCallback
& failure_callback
,
249 const ProgressCallback
& progress_callback
) {
251 DCHECK(current_entry_info_
.get());
253 // If this is a directory, just create it and return.
254 if (current_entry_info()->is_directory()) {
255 if (base::CreateDirectory(output_file_path
)) {
256 base::MessageLoopProxy::current()->PostTask(FROM_HERE
, success_callback
);
258 DVLOG(1) << "Unzip failed: unable to create directory.";
259 base::MessageLoopProxy::current()->PostTask(FROM_HERE
, failure_callback
);
264 if (unzOpenCurrentFile(zip_file_
) != UNZ_OK
) {
265 DVLOG(1) << "Unzip failed: unable to open current zip entry.";
266 base::MessageLoopProxy::current()->PostTask(FROM_HERE
, failure_callback
);
270 base::FilePath output_dir_path
= output_file_path
.DirName();
271 if (!base::CreateDirectory(output_dir_path
)) {
272 DVLOG(1) << "Unzip failed: unable to create containing directory.";
273 base::MessageLoopProxy::current()->PostTask(FROM_HERE
, failure_callback
);
277 const int flags
= base::File::FLAG_CREATE_ALWAYS
| base::File::FLAG_WRITE
;
278 base::File
output_file(output_file_path
, flags
);
280 if (!output_file
.IsValid()) {
281 DVLOG(1) << "Unzip failed: unable to create platform file at "
282 << output_file_path
.value();
283 base::MessageLoopProxy::current()->PostTask(FROM_HERE
, failure_callback
);
287 base::MessageLoop::current()->PostTask(
289 base::Bind(&ZipReader::ExtractChunk
,
290 weak_ptr_factory_
.GetWeakPtr(),
291 Passed(output_file
.Pass()),
295 0 /* initial offset */));
298 bool ZipReader::ExtractCurrentEntryIntoDirectory(
299 const base::FilePath
& output_directory_path
) {
300 DCHECK(current_entry_info_
.get());
302 base::FilePath output_file_path
= output_directory_path
.Append(
303 current_entry_info()->file_path());
304 return ExtractCurrentEntryToFilePath(output_file_path
);
307 #if defined(OS_POSIX)
308 bool ZipReader::ExtractCurrentEntryToFd(const int fd
) {
311 // If this is a directory, there's nothing to extract to the file descriptor,
313 if (current_entry_info()->is_directory())
316 const int open_result
= unzOpenCurrentFile(zip_file_
);
317 if (open_result
!= UNZ_OK
)
320 bool success
= true; // This becomes false when something bad happens.
322 char buf
[internal::kZipBufSize
];
323 const int num_bytes_read
= unzReadCurrentFile(zip_file_
, buf
,
324 internal::kZipBufSize
);
325 if (num_bytes_read
== 0) {
326 // Reached the end of the file.
328 } else if (num_bytes_read
< 0) {
329 // If num_bytes_read < 0, then it's a specific UNZ_* error code.
332 } else if (num_bytes_read
> 0) {
333 // Some data is read. Write it to the output file descriptor.
334 if (num_bytes_read
!=
335 base::WriteFileDescriptor(fd
, buf
, num_bytes_read
)) {
342 unzCloseCurrentFile(zip_file_
);
345 #endif // defined(OS_POSIX)
347 bool ZipReader::ExtractCurrentEntryToString(
348 size_t max_read_bytes
,
349 std::string
* output
) const {
352 DCHECK(max_read_bytes
!= 0);
354 if (current_entry_info()->is_directory()) {
359 const int open_result
= unzOpenCurrentFile(zip_file_
);
360 if (open_result
!= UNZ_OK
)
363 // The original_size() is the best hint for the real size, so it saves
364 // doing reallocations for the common case when the uncompressed size is
365 // correct. However, we need to assume that the uncompressed size could be
366 // incorrect therefore this function needs to read as much data as possible.
367 std::string contents
;
368 contents
.reserve(std::min
<size_t>(
369 max_read_bytes
, current_entry_info()->original_size()));
371 bool success
= true; // This becomes false when something bad happens.
372 char buf
[internal::kZipBufSize
];
374 const int num_bytes_read
= unzReadCurrentFile(zip_file_
, buf
,
375 internal::kZipBufSize
);
376 if (num_bytes_read
== 0) {
377 // Reached the end of the file.
379 } else if (num_bytes_read
< 0) {
380 // If num_bytes_read < 0, then it's a specific UNZ_* error code.
383 } else if (num_bytes_read
> 0) {
384 if (contents
.size() + num_bytes_read
> max_read_bytes
) {
388 contents
.append(buf
, num_bytes_read
);
392 unzCloseCurrentFile(zip_file_
);
394 output
->swap(contents
);
399 bool ZipReader::OpenInternal() {
402 unz_global_info zip_info
= {}; // Zero-clear.
403 if (unzGetGlobalInfo(zip_file_
, &zip_info
) != UNZ_OK
) {
406 num_entries_
= zip_info
.number_entry
;
407 if (num_entries_
< 0)
410 // We are already at the end if the zip file is empty.
411 reached_end_
= (num_entries_
== 0);
415 void ZipReader::Reset() {
418 reached_end_
= false;
419 current_entry_info_
.reset();
422 void ZipReader::ExtractChunk(base::File output_file
,
423 const SuccessCallback
& success_callback
,
424 const FailureCallback
& failure_callback
,
425 const ProgressCallback
& progress_callback
,
426 const int64 offset
) {
427 char buffer
[internal::kZipBufSize
];
429 const int num_bytes_read
= unzReadCurrentFile(zip_file_
,
431 internal::kZipBufSize
);
433 if (num_bytes_read
== 0) {
434 unzCloseCurrentFile(zip_file_
);
435 success_callback
.Run();
436 } else if (num_bytes_read
< 0) {
437 DVLOG(1) << "Unzip failed: error while reading zipfile "
438 << "(" << num_bytes_read
<< ")";
439 failure_callback
.Run();
441 if (num_bytes_read
!= output_file
.Write(offset
, buffer
, num_bytes_read
)) {
442 DVLOG(1) << "Unzip failed: unable to write all bytes to target.";
443 failure_callback
.Run();
447 int64 current_progress
= offset
+ num_bytes_read
;
449 progress_callback
.Run(current_progress
);
451 base::MessageLoop::current()->PostTask(
453 base::Bind(&ZipReader::ExtractChunk
,
454 weak_ptr_factory_
.GetWeakPtr(),
455 Passed(output_file
.Pass()),