2 * Copyright (C) 2005-2018 Team Kodi
3 * This file is part of Kodi - https://kodi.tv
5 * SPDX-License-Identifier: GPL-2.0-or-later
6 * See LICENSES/README.md for more information.
9 #include "HTTPDirectory.h"
13 #include "FileItemList.h"
14 #include "ServiceBroker.h"
16 #include "settings/AdvancedSettings.h"
17 #include "settings/SettingsComponent.h"
18 #include "utils/CharsetConverter.h"
19 #include "utils/HTMLUtil.h"
20 #include "utils/RegExp.h"
21 #include "utils/StringUtils.h"
22 #include "utils/URIUtils.h"
23 #include "utils/log.h"
27 using namespace XFILE
;
29 CHTTPDirectory::CHTTPDirectory(void) = default;
30 CHTTPDirectory::~CHTTPDirectory(void) = default;
32 bool CHTTPDirectory::GetDirectory(const CURL
& url
, CFileItemList
&items
)
36 const std::string
& strBasePath
= url
.GetFileName();
40 CLog::Log(LOGERROR
, "{} - Unable to get http directory ({})", __FUNCTION__
, url
.GetRedacted());
44 CRegExp
reItem(true); // HTML is case-insensitive
45 reItem
.RegComp("<a href=\"([^\"]*)\"[^>]*>\\s*(.*?)\\s*</a>(.+?)(?=<a|</tr|$)");
47 CRegExp
reDateTimeHtml(true);
48 reDateTimeHtml
.RegComp(
49 "<td align=\"right\">([0-9]{2})-([A-Z]{3})-([0-9]{4}) ([0-9]{2}):([0-9]{2}) +</td>");
51 CRegExp
reDateTimeLighttp(true);
52 reDateTimeLighttp
.RegComp(
53 "<td class=\"m\">([0-9]{4})-([A-Z]{3})-([0-9]{2}) ([0-9]{2}):([0-9]{2}):([0-9]{2})</td>");
55 CRegExp
reDateTimeNginx(true);
56 reDateTimeNginx
.RegComp("([0-9]{2})-([A-Z]{3})-([0-9]{4}) ([0-9]{2}):([0-9]{2})");
58 CRegExp
reDateTimeNginxFancy(true);
59 reDateTimeNginxFancy
.RegComp(
60 "<td class=\"date\">([0-9]{4})-([A-Z]{3})-([0-9]{2}) ([0-9]{2}):([0-9]{2})</td>");
62 CRegExp
reDateTimeApacheNewFormat(true);
63 reDateTimeApacheNewFormat
.RegComp(
64 "<td align=\"right\">([0-9]{4})-([0-9]{2})-([0-9]{2}) ([0-9]{2}):([0-9]{2}) +</td>");
66 CRegExp
reDateTime(true);
67 reDateTime
.RegComp("([0-9]{4})-([0-9]{2})-([0-9]{2}) ([0-9]{2}):([0-9]{2})");
69 CRegExp
reSizeHtml(true);
70 reSizeHtml
.RegComp("> *([0-9.]+) *(B|K|M|G| )(iB)?</td>");
73 reSize
.RegComp(" +([0-9]+)(B|K|M|G)?(?=\\s|<|$)");
75 /* read response from server into string buffer */
76 std::string strBuffer
;
77 if (http
.ReadData(strBuffer
) && strBuffer
.length() > 0)
79 /* if Content-Length is found and its not text/html, URL is pointing to file so don't treat URL as HTTPDirectory */
80 if (!http
.GetHttpHeader().GetValue("Content-Length").empty() &&
81 !StringUtils::StartsWithNoCase(http
.GetHttpHeader().GetValue("Content-type"), "text/html"))
86 std::string
fileCharset(http
.GetProperty(XFILE::FILE_PROPERTY_CONTENT_CHARSET
));
87 if (!fileCharset
.empty() && fileCharset
!= "UTF-8")
89 std::string converted
;
90 if (g_charsetConverter
.ToUtf8(fileCharset
, strBuffer
, converted
) && !converted
.empty())
91 strBuffer
= converted
;
94 unsigned int bufferOffset
= 0;
95 while (bufferOffset
< strBuffer
.length())
97 int matchOffset
= reItem
.RegFind(strBuffer
.c_str(), bufferOffset
);
101 bufferOffset
= matchOffset
+ reItem
.GetSubLength(0);
103 std::string strLink
= reItem
.GetMatch(1);
104 std::string strName
= reItem
.GetMatch(2);
105 std::string strMetadata
= reItem
.GetMatch(3);
106 StringUtils::Trim(strMetadata
);
108 if(strLink
[0] == '/')
109 strLink
= strLink
.substr(1);
111 std::string strNameTemp
= StringUtils::Trim(strName
);
113 std::wstring wName
, wLink
, wConverted
;
114 if (fileCharset
.empty())
115 g_charsetConverter
.unknownToUTF8(strNameTemp
);
116 g_charsetConverter
.utf8ToW(strNameTemp
, wName
, false);
117 HTML::CHTMLUtil::ConvertHTMLToW(wName
, wConverted
);
118 g_charsetConverter
.wToUTF8(wConverted
, strNameTemp
);
119 URIUtils::RemoveSlashAtEnd(strNameTemp
);
121 std::string strLinkBase
= strLink
;
122 std::string strLinkOptions
;
124 // split link with url options
125 size_t pos
= strLinkBase
.find('?');
126 if (pos
!= std::string::npos
)
128 strLinkOptions
= strLinkBase
.substr(pos
);
129 strLinkBase
.erase(pos
);
132 // strip url fragment from the link
133 pos
= strLinkBase
.find('#');
134 if (pos
!= std::string::npos
)
136 strLinkBase
.erase(pos
);
139 // Convert any HTTP character entities (e.g.: "&") to percentage encoding
140 // (e.g.: "%xx") as some web servers (Apache) put these in HTTP Directory Indexes
141 // this is also needed as CURL objects interpret them incorrectly due to the ;
142 // also being allowed as URL option separator
143 if (fileCharset
.empty())
144 g_charsetConverter
.unknownToUTF8(strLinkBase
);
145 g_charsetConverter
.utf8ToW(strLinkBase
, wLink
, false);
146 HTML::CHTMLUtil::ConvertHTMLToW(wLink
, wConverted
);
147 g_charsetConverter
.wToUTF8(wConverted
, strLinkBase
);
149 // encoding + and ; to URL encode if it is not already encoded by http server used on the remote server (example: Apache)
150 // more characters may be added here when required when required by certain http servers
151 pos
= strLinkBase
.find_first_of("+;");
152 while (pos
!= std::string::npos
)
154 std::stringstream convert
;
155 convert
<< '%' << std::hex
<< int(strLinkBase
.at(pos
));
156 strLinkBase
.replace(pos
, 1, convert
.str());
157 pos
= strLinkBase
.find_first_of("+;");
160 std::string strLinkTemp
= strLinkBase
;
162 URIUtils::RemoveSlashAtEnd(strLinkTemp
);
163 strLinkTemp
= CURL::Decode(strLinkTemp
);
165 if (StringUtils::EndsWith(strNameTemp
, "..>") &&
166 StringUtils::StartsWith(strLinkTemp
, strNameTemp
.substr(0, strNameTemp
.length() - 3)))
167 strName
= strNameTemp
= strLinkTemp
;
169 /* Per RFC 1808 ยง 5.3, relative paths containing a colon ":" should be either prefixed with
170 * "./" or escaped (as "%3A"). This handles the prefix case, the escaping should be handled by
171 * the CURL::Decode above
172 * - https://tools.ietf.org/html/rfc1808#section-5.3
174 auto NameMatchesLink([](const std::string
& name
, const std::string
& link
) -> bool
176 return (name
== link
) ||
177 ((std::string::npos
!= name
.find(':')) && (std::string
{"./"}.append(name
) == link
));
180 // we detect http directory items by its display name and its stripped link
181 // if same, we consider it as a valid item.
182 if (strLinkTemp
!= ".." && strLinkTemp
!= "" && NameMatchesLink(strNameTemp
, strLinkTemp
))
184 CFileItemPtr
pItem(new CFileItem(strNameTemp
));
185 pItem
->SetProperty("IsHTTPDirectory", true);
188 url2
.SetFileName(strBasePath
+ strLinkBase
);
189 url2
.SetOptions(strLinkOptions
);
192 if(URIUtils::HasSlashAtEnd(pItem
->GetPath(), true))
193 pItem
->m_bIsFolder
= true;
195 std::string day
, month
, year
, hour
, minute
;
198 if (reDateTimeHtml
.RegFind(strMetadata
.c_str()) >= 0)
200 day
= reDateTimeHtml
.GetMatch(1);
201 month
= reDateTimeHtml
.GetMatch(2);
202 year
= reDateTimeHtml
.GetMatch(3);
203 hour
= reDateTimeHtml
.GetMatch(4);
204 minute
= reDateTimeHtml
.GetMatch(5);
206 else if (reDateTimeNginxFancy
.RegFind(strMetadata
.c_str()) >= 0)
208 day
= reDateTimeNginxFancy
.GetMatch(3);
209 month
= reDateTimeNginxFancy
.GetMatch(2);
210 year
= reDateTimeNginxFancy
.GetMatch(1);
211 hour
= reDateTimeNginxFancy
.GetMatch(4);
212 minute
= reDateTimeNginxFancy
.GetMatch(5);
214 else if (reDateTimeNginx
.RegFind(strMetadata
.c_str()) >= 0)
216 day
= reDateTimeNginx
.GetMatch(1);
217 month
= reDateTimeNginx
.GetMatch(2);
218 year
= reDateTimeNginx
.GetMatch(3);
219 hour
= reDateTimeNginx
.GetMatch(4);
220 minute
= reDateTimeNginx
.GetMatch(5);
222 else if (reDateTimeLighttp
.RegFind(strMetadata
.c_str()) >= 0)
224 day
= reDateTimeLighttp
.GetMatch(3);
225 month
= reDateTimeLighttp
.GetMatch(2);
226 year
= reDateTimeLighttp
.GetMatch(1);
227 hour
= reDateTimeLighttp
.GetMatch(4);
228 minute
= reDateTimeLighttp
.GetMatch(5);
230 else if (reDateTimeApacheNewFormat
.RegFind(strMetadata
.c_str()) >= 0)
232 day
= reDateTimeApacheNewFormat
.GetMatch(3);
233 monthNum
= atoi(reDateTimeApacheNewFormat
.GetMatch(2).c_str());
234 year
= reDateTimeApacheNewFormat
.GetMatch(1);
235 hour
= reDateTimeApacheNewFormat
.GetMatch(4);
236 minute
= reDateTimeApacheNewFormat
.GetMatch(5);
238 else if (reDateTime
.RegFind(strMetadata
.c_str()) >= 0)
240 day
= reDateTime
.GetMatch(3);
241 monthNum
= atoi(reDateTime
.GetMatch(2).c_str());
242 year
= reDateTime
.GetMatch(1);
243 hour
= reDateTime
.GetMatch(4);
244 minute
= reDateTime
.GetMatch(5);
247 if (month
.length() > 0)
248 monthNum
= CDateTime::MonthStringToMonthNum(month
);
250 if (day
.length() > 0 && monthNum
> 0 && year
.length() > 0)
252 pItem
->m_dateTime
= CDateTime(atoi(year
.c_str()), monthNum
, atoi(day
.c_str()), atoi(hour
.c_str()), atoi(minute
.c_str()), 0);
255 if (!pItem
->m_bIsFolder
)
257 if (reSizeHtml
.RegFind(strMetadata
.c_str()) >= 0)
259 double Size
= atof(reSizeHtml
.GetMatch(1).c_str());
260 std::string
strUnit(reSizeHtml
.GetMatch(2));
264 else if (strUnit
== "M")
265 Size
= Size
* 1024 * 1024;
266 else if (strUnit
== "G")
267 Size
= Size
* 1024 * 1024 * 1024;
269 pItem
->m_dwSize
= (int64_t)Size
;
271 else if (reSize
.RegFind(strMetadata
.c_str()) >= 0)
273 double Size
= atof(reSize
.GetMatch(1).c_str());
274 std::string
strUnit(reSize
.GetMatch(2));
278 else if (strUnit
== "M")
279 Size
= Size
* 1024 * 1024;
280 else if (strUnit
== "G")
281 Size
= Size
* 1024 * 1024 * 1024;
283 pItem
->m_dwSize
= (int64_t)Size
;
286 if (CServiceBroker::GetSettingsComponent()->GetAdvancedSettings()->m_bHTTPDirectoryStatFilesize
) // As a fallback get the size by stat-ing the file (slow)
290 pItem
->m_dwSize
=file
.GetLength();
300 items
.SetProperty("IsHTTPDirectory", true);
305 bool CHTTPDirectory::Exists(const CURL
&url
)
308 struct __stat64 buffer
;
310 if( http
.Stat(url
, &buffer
) != 0 )
315 if (buffer
.st_mode
== _S_IFDIR
)