2 * Copyright (C) 2005-2018 Team Kodi
3 * This file is part of Kodi - https://kodi.tv
5 * SPDX-License-Identifier: GPL-2.0-or-later
6 * See LICENSES/README.md for more information.
9 #include "HTTPDirectory.h"
13 #include "ServiceBroker.h"
15 #include "settings/AdvancedSettings.h"
16 #include "settings/SettingsComponent.h"
17 #include "utils/CharsetConverter.h"
18 #include "utils/HTMLUtil.h"
19 #include "utils/RegExp.h"
20 #include "utils/StringUtils.h"
21 #include "utils/URIUtils.h"
22 #include "utils/log.h"
26 using namespace XFILE
;
28 CHTTPDirectory::CHTTPDirectory(void) = default;
29 CHTTPDirectory::~CHTTPDirectory(void) = default;
31 bool CHTTPDirectory::GetDirectory(const CURL
& url
, CFileItemList
&items
)
35 const std::string
& strBasePath
= url
.GetFileName();
39 CLog::Log(LOGERROR
, "{} - Unable to get http directory ({})", __FUNCTION__
, url
.GetRedacted());
43 CRegExp
reItem(true); // HTML is case-insensitive
44 reItem
.RegComp("<a href=\"([^\"]*)\"[^>]*>\\s*(.*?)\\s*</a>(.+?)(?=<a|</tr|$)");
46 CRegExp
reDateTimeHtml(true);
47 reDateTimeHtml
.RegComp(
48 "<td align=\"right\">([0-9]{2})-([A-Z]{3})-([0-9]{4}) ([0-9]{2}):([0-9]{2}) +</td>");
50 CRegExp
reDateTimeLighttp(true);
51 reDateTimeLighttp
.RegComp(
52 "<td class=\"m\">([0-9]{4})-([A-Z]{3})-([0-9]{2}) ([0-9]{2}):([0-9]{2}):([0-9]{2})</td>");
54 CRegExp
reDateTimeNginx(true);
55 reDateTimeNginx
.RegComp("([0-9]{2})-([A-Z]{3})-([0-9]{4}) ([0-9]{2}):([0-9]{2})");
57 CRegExp
reDateTimeNginxFancy(true);
58 reDateTimeNginxFancy
.RegComp(
59 "<td class=\"date\">([0-9]{4})-([A-Z]{3})-([0-9]{2}) ([0-9]{2}):([0-9]{2})</td>");
61 CRegExp
reDateTimeApacheNewFormat(true);
62 reDateTimeApacheNewFormat
.RegComp(
63 "<td align=\"right\">([0-9]{4})-([0-9]{2})-([0-9]{2}) ([0-9]{2}):([0-9]{2}) +</td>");
65 CRegExp
reDateTime(true);
66 reDateTime
.RegComp("([0-9]{4})-([0-9]{2})-([0-9]{2}) ([0-9]{2}):([0-9]{2})");
68 CRegExp
reSizeHtml(true);
69 reSizeHtml
.RegComp("> *([0-9.]+) *(B|K|M|G| )(iB)?</td>");
72 reSize
.RegComp(" +([0-9]+)(B|K|M|G)?(?=\\s|<|$)");
74 /* read response from server into string buffer */
75 std::string strBuffer
;
76 if (http
.ReadData(strBuffer
) && strBuffer
.length() > 0)
78 /* if Content-Length is found and its not text/html, URL is pointing to file so don't treat URL as HTTPDirectory */
79 if (!http
.GetHttpHeader().GetValue("Content-Length").empty() &&
80 !StringUtils::StartsWithNoCase(http
.GetHttpHeader().GetValue("Content-type"), "text/html"))
85 std::string
fileCharset(http
.GetProperty(XFILE::FILE_PROPERTY_CONTENT_CHARSET
));
86 if (!fileCharset
.empty() && fileCharset
!= "UTF-8")
88 std::string converted
;
89 if (g_charsetConverter
.ToUtf8(fileCharset
, strBuffer
, converted
) && !converted
.empty())
90 strBuffer
= converted
;
93 unsigned int bufferOffset
= 0;
94 while (bufferOffset
< strBuffer
.length())
96 int matchOffset
= reItem
.RegFind(strBuffer
.c_str(), bufferOffset
);
100 bufferOffset
= matchOffset
+ reItem
.GetSubLength(0);
102 std::string strLink
= reItem
.GetMatch(1);
103 std::string strName
= reItem
.GetMatch(2);
104 std::string strMetadata
= reItem
.GetMatch(3);
105 StringUtils::Trim(strMetadata
);
107 if(strLink
[0] == '/')
108 strLink
= strLink
.substr(1);
110 std::string strNameTemp
= StringUtils::Trim(strName
);
112 std::wstring wName
, wLink
, wConverted
;
113 if (fileCharset
.empty())
114 g_charsetConverter
.unknownToUTF8(strNameTemp
);
115 g_charsetConverter
.utf8ToW(strNameTemp
, wName
, false);
116 HTML::CHTMLUtil::ConvertHTMLToW(wName
, wConverted
);
117 g_charsetConverter
.wToUTF8(wConverted
, strNameTemp
);
118 URIUtils::RemoveSlashAtEnd(strNameTemp
);
120 std::string strLinkBase
= strLink
;
121 std::string strLinkOptions
;
123 // split link with url options
124 size_t pos
= strLinkBase
.find('?');
125 if (pos
!= std::string::npos
)
127 strLinkOptions
= strLinkBase
.substr(pos
);
128 strLinkBase
.erase(pos
);
131 // strip url fragment from the link
132 pos
= strLinkBase
.find('#');
133 if (pos
!= std::string::npos
)
135 strLinkBase
.erase(pos
);
138 // Convert any HTTP character entities (e.g.: "&") to percentage encoding
139 // (e.g.: "%xx") as some web servers (Apache) put these in HTTP Directory Indexes
140 // this is also needed as CURL objects interpret them incorrectly due to the ;
141 // also being allowed as URL option separator
142 if (fileCharset
.empty())
143 g_charsetConverter
.unknownToUTF8(strLinkBase
);
144 g_charsetConverter
.utf8ToW(strLinkBase
, wLink
, false);
145 HTML::CHTMLUtil::ConvertHTMLToW(wLink
, wConverted
);
146 g_charsetConverter
.wToUTF8(wConverted
, strLinkBase
);
148 // encoding + and ; to URL encode if it is not already encoded by http server used on the remote server (example: Apache)
149 // more characters may be added here when required when required by certain http servers
150 pos
= strLinkBase
.find_first_of("+;");
151 while (pos
!= std::string::npos
)
153 std::stringstream convert
;
154 convert
<< '%' << std::hex
<< int(strLinkBase
.at(pos
));
155 strLinkBase
.replace(pos
, 1, convert
.str());
156 pos
= strLinkBase
.find_first_of("+;");
159 std::string strLinkTemp
= strLinkBase
;
161 URIUtils::RemoveSlashAtEnd(strLinkTemp
);
162 strLinkTemp
= CURL::Decode(strLinkTemp
);
164 if (StringUtils::EndsWith(strNameTemp
, "..>") &&
165 StringUtils::StartsWith(strLinkTemp
, strNameTemp
.substr(0, strNameTemp
.length() - 3)))
166 strName
= strNameTemp
= strLinkTemp
;
168 /* Per RFC 1808 ยง 5.3, relative paths containing a colon ":" should be either prefixed with
169 * "./" or escaped (as "%3A"). This handles the prefix case, the escaping should be handled by
170 * the CURL::Decode above
171 * - https://tools.ietf.org/html/rfc1808#section-5.3
173 auto NameMatchesLink([](const std::string
& name
, const std::string
& link
) -> bool
175 return (name
== link
) ||
176 ((std::string::npos
!= name
.find(':')) && (std::string
{"./"}.append(name
) == link
));
179 // we detect http directory items by its display name and its stripped link
180 // if same, we consider it as a valid item.
181 if (strLinkTemp
!= ".." && strLinkTemp
!= "" && NameMatchesLink(strNameTemp
, strLinkTemp
))
183 CFileItemPtr
pItem(new CFileItem(strNameTemp
));
184 pItem
->SetProperty("IsHTTPDirectory", true);
187 url2
.SetFileName(strBasePath
+ strLinkBase
);
188 url2
.SetOptions(strLinkOptions
);
191 if(URIUtils::HasSlashAtEnd(pItem
->GetPath(), true))
192 pItem
->m_bIsFolder
= true;
194 std::string day
, month
, year
, hour
, minute
;
197 if (reDateTimeHtml
.RegFind(strMetadata
.c_str()) >= 0)
199 day
= reDateTimeHtml
.GetMatch(1);
200 month
= reDateTimeHtml
.GetMatch(2);
201 year
= reDateTimeHtml
.GetMatch(3);
202 hour
= reDateTimeHtml
.GetMatch(4);
203 minute
= reDateTimeHtml
.GetMatch(5);
205 else if (reDateTimeNginxFancy
.RegFind(strMetadata
.c_str()) >= 0)
207 day
= reDateTimeNginxFancy
.GetMatch(3);
208 month
= reDateTimeNginxFancy
.GetMatch(2);
209 year
= reDateTimeNginxFancy
.GetMatch(1);
210 hour
= reDateTimeNginxFancy
.GetMatch(4);
211 minute
= reDateTimeNginxFancy
.GetMatch(5);
213 else if (reDateTimeNginx
.RegFind(strMetadata
.c_str()) >= 0)
215 day
= reDateTimeNginx
.GetMatch(1);
216 month
= reDateTimeNginx
.GetMatch(2);
217 year
= reDateTimeNginx
.GetMatch(3);
218 hour
= reDateTimeNginx
.GetMatch(4);
219 minute
= reDateTimeNginx
.GetMatch(5);
221 else if (reDateTimeLighttp
.RegFind(strMetadata
.c_str()) >= 0)
223 day
= reDateTimeLighttp
.GetMatch(3);
224 month
= reDateTimeLighttp
.GetMatch(2);
225 year
= reDateTimeLighttp
.GetMatch(1);
226 hour
= reDateTimeLighttp
.GetMatch(4);
227 minute
= reDateTimeLighttp
.GetMatch(5);
229 else if (reDateTimeApacheNewFormat
.RegFind(strMetadata
.c_str()) >= 0)
231 day
= reDateTimeApacheNewFormat
.GetMatch(3);
232 monthNum
= atoi(reDateTimeApacheNewFormat
.GetMatch(2).c_str());
233 year
= reDateTimeApacheNewFormat
.GetMatch(1);
234 hour
= reDateTimeApacheNewFormat
.GetMatch(4);
235 minute
= reDateTimeApacheNewFormat
.GetMatch(5);
237 else if (reDateTime
.RegFind(strMetadata
.c_str()) >= 0)
239 day
= reDateTime
.GetMatch(3);
240 monthNum
= atoi(reDateTime
.GetMatch(2).c_str());
241 year
= reDateTime
.GetMatch(1);
242 hour
= reDateTime
.GetMatch(4);
243 minute
= reDateTime
.GetMatch(5);
246 if (month
.length() > 0)
247 monthNum
= CDateTime::MonthStringToMonthNum(month
);
249 if (day
.length() > 0 && monthNum
> 0 && year
.length() > 0)
251 pItem
->m_dateTime
= CDateTime(atoi(year
.c_str()), monthNum
, atoi(day
.c_str()), atoi(hour
.c_str()), atoi(minute
.c_str()), 0);
254 if (!pItem
->m_bIsFolder
)
256 if (reSizeHtml
.RegFind(strMetadata
.c_str()) >= 0)
258 double Size
= atof(reSizeHtml
.GetMatch(1).c_str());
259 std::string
strUnit(reSizeHtml
.GetMatch(2));
263 else if (strUnit
== "M")
264 Size
= Size
* 1024 * 1024;
265 else if (strUnit
== "G")
266 Size
= Size
* 1024 * 1024 * 1024;
268 pItem
->m_dwSize
= (int64_t)Size
;
270 else if (reSize
.RegFind(strMetadata
.c_str()) >= 0)
272 double Size
= atof(reSize
.GetMatch(1).c_str());
273 std::string
strUnit(reSize
.GetMatch(2));
277 else if (strUnit
== "M")
278 Size
= Size
* 1024 * 1024;
279 else if (strUnit
== "G")
280 Size
= Size
* 1024 * 1024 * 1024;
282 pItem
->m_dwSize
= (int64_t)Size
;
285 if (CServiceBroker::GetSettingsComponent()->GetAdvancedSettings()->m_bHTTPDirectoryStatFilesize
) // As a fallback get the size by stat-ing the file (slow)
289 pItem
->m_dwSize
=file
.GetLength();
299 items
.SetProperty("IsHTTPDirectory", true);
304 bool CHTTPDirectory::Exists(const CURL
&url
)
307 struct __stat64 buffer
;
309 if( http
.Stat(url
, &buffer
) != 0 )
314 if (buffer
.st_mode
== _S_IFDIR
)