2 * Copyright (C) 2005-2018 Team Kodi
3 * This file is part of Kodi - https://kodi.tv
5 * SPDX-License-Identifier: GPL-2.0-or-later
6 * See LICENSES/README.md for more information.
9 #include "ZipManager.h"
16 #if defined(TARGET_POSIX)
17 #include "PlatformDefs.h"
19 #include "utils/CharsetConverter.h"
20 #include "utils/EndianSwap.h"
21 #include "utils/log.h"
22 #include "utils/RegExp.h"
23 #include "utils/URIUtils.h"
25 using namespace XFILE
;
27 static const size_t ZC_FLAG_EFS
= 1 << 11; // general purpose bit 11 - zip holds utf-8 filenames
29 CZipManager::CZipManager() = default;
31 CZipManager::~CZipManager() = default;
33 bool CZipManager::GetZipList(const CURL
& url
, std::vector
<SZipEntry
>& items
)
35 struct __stat64 m_StatData
= {};
37 std::string strFile
= url
.GetHostName();
39 if (CFile::Stat(strFile
,&m_StatData
))
41 CLog::Log(LOGDEBUG
, "CZipManager::GetZipList: failed to stat file {}", url
.GetRedacted());
45 std::map
<std::string
, std::vector
<SZipEntry
> >::iterator it
= mZipMap
.find(strFile
);
46 if (it
!= mZipMap
.end()) // already listed, just return it if not changed, else release and reread
48 std::map
<std::string
,int64_t>::iterator it2
=mZipDate
.find(strFile
);
50 if (m_StatData
.st_mtime
== it2
->second
)
60 if (!mFile
.Open(strFile
))
62 CLog::Log(LOGDEBUG
, "ZipManager: unable to open file {}!", strFile
);
67 if (mFile
.Read(&hdr
, 4)!=4 || (Endian_SwapLE32(hdr
) != ZIP_LOCAL_HEADER
&&
68 Endian_SwapLE32(hdr
) != ZIP_DATA_RECORD_HEADER
&&
69 Endian_SwapLE32(hdr
) != ZIP_SPLIT_ARCHIVE_HEADER
))
71 CLog::Log(LOGDEBUG
,"ZipManager: not a zip file!");
76 if (Endian_SwapLE32(hdr
) == ZIP_SPLIT_ARCHIVE_HEADER
)
77 CLog::LogF(LOGWARNING
, "ZIP split archive header found. Trying to process as a single archive..");
79 // push date for update detection
80 mZipDate
.insert(make_pair(strFile
,m_StatData
.st_mtime
));
83 // Look for end of central directory record
84 // Zipfile comment may be up to 65535 bytes
85 // End of central directory record is 22 bytes (ECDREC_SIZE)
86 // -> need to check the last 65557 bytes
87 int64_t fileSize
= mFile
.GetLength();
88 // Don't need to look in the last 18 bytes (ECDREC_SIZE-4)
89 // But as we need to do overlapping between blocks (3 bytes),
90 // we start the search at ECDREC_SIZE-1 from the end of file
91 if (fileSize
< ECDREC_SIZE
- 1)
93 CLog::Log(LOGERROR
, "ZipManager: Invalid zip file length: {}", fileSize
);
96 int searchSize
= (int) std::min(static_cast<int64_t>(65557), fileSize
-ECDREC_SIZE
+1);
97 int blockSize
= (int) std::min(1024, searchSize
);
98 int nbBlock
= searchSize
/ blockSize
;
99 int extraBlockSize
= searchSize
% blockSize
;
100 // Signature is on 4 bytes
101 // It could be between 2 blocks, so we need to read 3 extra bytes
102 std::vector
<char> buffer(blockSize
+ 3);
105 // Loop through blocks starting at the end of the file (minus ECDREC_SIZE-1)
106 for (int nb
=1; !found
&& (nb
<= nbBlock
); nb
++)
108 mFile
.Seek(fileSize
-ECDREC_SIZE
+1-(blockSize
*nb
),SEEK_SET
);
109 if (mFile
.Read(buffer
.data(), blockSize
+ 3) != blockSize
+ 3)
111 for (int i
=blockSize
-1; !found
&& (i
>= 0); i
--)
113 if (Endian_SwapLE32(ReadUnaligned
<uint32_t>(buffer
.data() + i
)) == ZIP_END_CENTRAL_HEADER
)
115 // Set current position to start of end of central directory
116 mFile
.Seek(fileSize
-ECDREC_SIZE
+1-(blockSize
*nb
)+i
,SEEK_SET
);
122 // If not found, look in the last block left...
123 if ( !found
&& (extraBlockSize
> 0) )
125 mFile
.Seek(fileSize
-ECDREC_SIZE
+1-searchSize
,SEEK_SET
);
126 if (mFile
.Read(buffer
.data(), extraBlockSize
+ 3) != extraBlockSize
+ 3)
128 for (int i
=extraBlockSize
-1; !found
&& (i
>= 0); i
--)
130 if (Endian_SwapLE32(ReadUnaligned
<uint32_t>(buffer
.data() + i
)) == ZIP_END_CENTRAL_HEADER
)
132 // Set current position to start of end of central directory
133 mFile
.Seek(fileSize
-ECDREC_SIZE
+1-searchSize
+i
,SEEK_SET
);
143 CLog::Log(LOGDEBUG
, "ZipManager: broken file {}!", strFile
);
148 unsigned int cdirOffset
, cdirSize
;
149 // Get size of the central directory
150 mFile
.Seek(12,SEEK_CUR
);
151 if (mFile
.Read(&cdirSize
, 4) != 4)
153 cdirSize
= Endian_SwapLE32(cdirSize
);
154 // Get Offset of start of central directory with respect to the starting disk number
155 if (mFile
.Read(&cdirOffset
, 4) != 4)
157 cdirOffset
= Endian_SwapLE32(cdirOffset
);
159 // Go to the start of central directory
160 mFile
.Seek(cdirOffset
,SEEK_SET
);
162 CRegExp pathTraversal
;
163 pathTraversal
.RegComp(PATH_TRAVERSAL
);
165 char temp
[CHDR_SIZE
];
166 while (mFile
.GetPosition() < cdirOffset
+ cdirSize
)
169 if (mFile
.Read(temp
, CHDR_SIZE
) != CHDR_SIZE
)
171 readCHeader(temp
, ze
);
172 if (ze
.header
!= ZIP_CENTRAL_HEADER
)
174 CLog::Log(LOGDEBUG
, "ZipManager: broken file {}!", strFile
);
179 // Get the filename just after the central file header
180 std::vector
<char> bufName(ze
.flength
);
181 if (mFile
.Read(bufName
.data(), ze
.flength
) != ze
.flength
)
183 std::string
strName(bufName
.data(), bufName
.size());
185 if ((ze
.flags
& ZC_FLAG_EFS
) == 0)
187 std::string
tmp(strName
);
188 g_charsetConverter
.ToUtf8("CP437", tmp
, strName
);
190 memset(ze
.name
, 0, 255);
191 strncpy(ze
.name
, strName
.c_str(), strName
.size() > 254 ? 254 : strName
.size());
193 // Jump after central file header extra field and file comment
194 mFile
.Seek(ze
.eclength
+ ze
.clength
,SEEK_CUR
);
196 if (pathTraversal
.RegFind(strName
) < 0)
200 /* go through list and figure out file header lengths */
201 for (auto& ze
: items
)
203 // Go to the local file header to get the extra field length
204 // !! local header extra field length != central file header extra field length !!
205 mFile
.Seek(ze
.lhdrOffset
+28,SEEK_SET
);
206 if (mFile
.Read(&(ze
.elength
), 2) != 2)
208 ze
.elength
= Endian_SwapLE16(ze
.elength
);
210 // Compressed data offset = local header offset + size of local header + filename length + local file header extra field length
211 ze
.offset
= ze
.lhdrOffset
+ LHDR_SIZE
+ ze
.flength
+ ze
.elength
;
215 mZipMap
.insert(make_pair(strFile
,items
));
220 bool CZipManager::GetZipEntry(const CURL
& url
, SZipEntry
& item
)
222 const std::string
& strFile
= url
.GetHostName();
224 std::map
<std::string
, std::vector
<SZipEntry
> >::iterator it
= mZipMap
.find(strFile
);
225 std::vector
<SZipEntry
> items
;
226 if (it
== mZipMap
.end()) // we need to list the zip
228 GetZipList(url
,items
);
235 const std::string
& strFileName
= url
.GetFileName();
236 for (const auto& it2
: items
)
238 if (std::string(it2
.name
) == strFileName
)
247 bool CZipManager::ExtractArchive(const std::string
& strArchive
, const std::string
& strPath
)
249 const CURL
pathToUrl(strArchive
);
250 return ExtractArchive(pathToUrl
, strPath
);
253 bool CZipManager::ExtractArchive(const CURL
& archive
, const std::string
& strPath
)
255 std::vector
<SZipEntry
> entry
;
256 CURL url
= URIUtils::CreateArchivePath("zip", archive
);
257 GetZipList(url
, entry
);
258 for (const auto& it
: entry
)
260 if (it
.name
[strlen(it
.name
) - 1] == '/') // skip dirs
262 std::string
strFilePath(it
.name
);
264 CURL zipPath
= URIUtils::CreateArchivePath("zip", archive
, strFilePath
);
265 const CURL
pathToUrl(strPath
+ strFilePath
);
266 if (!CFile::Copy(zipPath
, pathToUrl
))
272 // Read local file header
273 void CZipManager::readHeader(const char* buffer
, SZipEntry
& info
)
275 info
.header
= Endian_SwapLE32(ReadUnaligned
<uint32_t>(buffer
));
276 info
.version
= Endian_SwapLE16(ReadUnaligned
<uint16_t>(buffer
+ 4));
277 info
.flags
= Endian_SwapLE16(ReadUnaligned
<uint16_t>(buffer
+ 6));
278 info
.method
= Endian_SwapLE16(ReadUnaligned
<uint16_t>(buffer
+ 8));
279 info
.mod_time
= Endian_SwapLE16(ReadUnaligned
<uint16_t>(buffer
+ 10));
280 info
.mod_date
= Endian_SwapLE16(ReadUnaligned
<uint16_t>(buffer
+ 12));
281 info
.crc32
= Endian_SwapLE32(ReadUnaligned
<uint32_t>(buffer
+ 14));
282 info
.csize
= Endian_SwapLE32(ReadUnaligned
<uint32_t>(buffer
+ 18));
283 info
.usize
= Endian_SwapLE32(ReadUnaligned
<uint32_t>(buffer
+ 22));
284 info
.flength
= Endian_SwapLE16(ReadUnaligned
<uint16_t>(buffer
+ 26));
285 info
.elength
= Endian_SwapLE16(ReadUnaligned
<uint16_t>(buffer
+ 28));
288 // Read central file header (from central directory)
289 void CZipManager::readCHeader(const char* buffer
, SZipEntry
& info
)
291 info
.header
= Endian_SwapLE32(ReadUnaligned
<uint32_t>(buffer
));
292 // Skip version made by
293 info
.version
= Endian_SwapLE16(ReadUnaligned
<uint16_t>(buffer
+ 6));
294 info
.flags
= Endian_SwapLE16(ReadUnaligned
<uint16_t>(buffer
+ 8));
295 info
.method
= Endian_SwapLE16(ReadUnaligned
<uint16_t>(buffer
+ 10));
296 info
.mod_time
= Endian_SwapLE16(ReadUnaligned
<uint16_t>(buffer
+ 12));
297 info
.mod_date
= Endian_SwapLE16(ReadUnaligned
<uint16_t>(buffer
+ 14));
298 info
.crc32
= Endian_SwapLE32(ReadUnaligned
<uint32_t>(buffer
+ 16));
299 info
.csize
= Endian_SwapLE32(ReadUnaligned
<uint32_t>(buffer
+ 20));
300 info
.usize
= Endian_SwapLE32(ReadUnaligned
<uint32_t>(buffer
+ 24));
301 info
.flength
= Endian_SwapLE16(ReadUnaligned
<uint16_t>(buffer
+ 28));
302 info
.eclength
= Endian_SwapLE16(ReadUnaligned
<uint16_t>(buffer
+ 30));
303 info
.clength
= Endian_SwapLE16(ReadUnaligned
<uint16_t>(buffer
+ 32));
304 // Skip disk number start, internal/external file attributes
305 info
.lhdrOffset
= Endian_SwapLE32(ReadUnaligned
<uint32_t>(buffer
+ 42));
308 void CZipManager::release(const std::string
& strPath
)
311 std::map
<std::string
, std::vector
<SZipEntry
> >::iterator it
= mZipMap
.find(url
.GetHostName());
312 if (it
!= mZipMap
.end())
314 std::map
<std::string
,int64_t>::iterator it2
=mZipDate
.find(url
.GetHostName());