[windows] Fix MAC Address Discovery
[xbmc.git] / xbmc / filesystem / HTTPDirectory.cpp
blob1db70696d1d2ae861395ba28d71e326b3047c70d
1 /*
2 * Copyright (C) 2005-2018 Team Kodi
3 * This file is part of Kodi - https://kodi.tv
5 * SPDX-License-Identifier: GPL-2.0-or-later
6 * See LICENSES/README.md for more information.
7 */
9 #include "HTTPDirectory.h"
11 #include "CurlFile.h"
12 #include "FileItem.h"
13 #include "FileItemList.h"
14 #include "ServiceBroker.h"
15 #include "URL.h"
16 #include "settings/AdvancedSettings.h"
17 #include "settings/SettingsComponent.h"
18 #include "utils/CharsetConverter.h"
19 #include "utils/HTMLUtil.h"
20 #include "utils/RegExp.h"
21 #include "utils/StringUtils.h"
22 #include "utils/URIUtils.h"
23 #include "utils/log.h"
25 #include <climits>
27 using namespace XFILE;
29 CHTTPDirectory::CHTTPDirectory(void) = default;
30 CHTTPDirectory::~CHTTPDirectory(void) = default;
32 bool CHTTPDirectory::GetDirectory(const CURL& url, CFileItemList &items)
34 CCurlFile http;
36 const std::string& strBasePath = url.GetFileName();
38 if(!http.Open(url))
40 CLog::Log(LOGERROR, "{} - Unable to get http directory ({})", __FUNCTION__, url.GetRedacted());
41 return false;
44 CRegExp reItem(true); // HTML is case-insensitive
45 reItem.RegComp("<a href=\"([^\"]*)\"[^>]*>\\s*(.*?)\\s*</a>(.+?)(?=<a|</tr|$)");
47 CRegExp reDateTimeHtml(true);
48 reDateTimeHtml.RegComp(
49 "<td align=\"right\">([0-9]{2})-([A-Z]{3})-([0-9]{4}) ([0-9]{2}):([0-9]{2}) +</td>");
51 CRegExp reDateTimeLighttp(true);
52 reDateTimeLighttp.RegComp(
53 "<td class=\"m\">([0-9]{4})-([A-Z]{3})-([0-9]{2}) ([0-9]{2}):([0-9]{2}):([0-9]{2})</td>");
55 CRegExp reDateTimeNginx(true);
56 reDateTimeNginx.RegComp("([0-9]{2})-([A-Z]{3})-([0-9]{4}) ([0-9]{2}):([0-9]{2})");
58 CRegExp reDateTimeNginxFancy(true);
59 reDateTimeNginxFancy.RegComp(
60 "<td class=\"date\">([0-9]{4})-([A-Z]{3})-([0-9]{2}) ([0-9]{2}):([0-9]{2})</td>");
62 CRegExp reDateTimeApacheNewFormat(true);
63 reDateTimeApacheNewFormat.RegComp(
64 "<td align=\"right\">([0-9]{4})-([0-9]{2})-([0-9]{2}) ([0-9]{2}):([0-9]{2}) +</td>");
66 CRegExp reDateTime(true);
67 reDateTime.RegComp("([0-9]{4})-([0-9]{2})-([0-9]{2}) ([0-9]{2}):([0-9]{2})");
69 CRegExp reSizeHtml(true);
70 reSizeHtml.RegComp("> *([0-9.]+) *(B|K|M|G| )(iB)?</td>");
72 CRegExp reSize(true);
73 reSize.RegComp(" +([0-9]+)(B|K|M|G)?(?=\\s|<|$)");
75 /* read response from server into string buffer */
76 std::string strBuffer;
77 if (http.ReadData(strBuffer) && strBuffer.length() > 0)
79 /* if Content-Length is found and its not text/html, URL is pointing to file so don't treat URL as HTTPDirectory */
80 if (!http.GetHttpHeader().GetValue("Content-Length").empty() &&
81 !StringUtils::StartsWithNoCase(http.GetHttpHeader().GetValue("Content-type"), "text/html"))
83 return false;
86 std::string fileCharset(http.GetProperty(XFILE::FILE_PROPERTY_CONTENT_CHARSET));
87 if (!fileCharset.empty() && fileCharset != "UTF-8")
89 std::string converted;
90 if (g_charsetConverter.ToUtf8(fileCharset, strBuffer, converted) && !converted.empty())
91 strBuffer = converted;
94 unsigned int bufferOffset = 0;
95 while (bufferOffset < strBuffer.length())
97 int matchOffset = reItem.RegFind(strBuffer.c_str(), bufferOffset);
98 if (matchOffset < 0)
99 break;
101 bufferOffset = matchOffset + reItem.GetSubLength(0);
103 std::string strLink = reItem.GetMatch(1);
104 std::string strName = reItem.GetMatch(2);
105 std::string strMetadata = reItem.GetMatch(3);
106 StringUtils::Trim(strMetadata);
108 if(strLink[0] == '/')
109 strLink = strLink.substr(1);
111 std::string strNameTemp = StringUtils::Trim(strName);
113 std::wstring wName, wLink, wConverted;
114 if (fileCharset.empty())
115 g_charsetConverter.unknownToUTF8(strNameTemp);
116 g_charsetConverter.utf8ToW(strNameTemp, wName, false);
117 HTML::CHTMLUtil::ConvertHTMLToW(wName, wConverted);
118 g_charsetConverter.wToUTF8(wConverted, strNameTemp);
119 URIUtils::RemoveSlashAtEnd(strNameTemp);
121 std::string strLinkBase = strLink;
122 std::string strLinkOptions;
124 // split link with url options
125 size_t pos = strLinkBase.find('?');
126 if (pos != std::string::npos)
128 strLinkOptions = strLinkBase.substr(pos);
129 strLinkBase.erase(pos);
132 // strip url fragment from the link
133 pos = strLinkBase.find('#');
134 if (pos != std::string::npos)
136 strLinkBase.erase(pos);
139 // Convert any HTTP character entities (e.g.: "&amp;") to percentage encoding
140 // (e.g.: "%xx") as some web servers (Apache) put these in HTTP Directory Indexes
141 // this is also needed as CURL objects interpret them incorrectly due to the ;
142 // also being allowed as URL option separator
143 if (fileCharset.empty())
144 g_charsetConverter.unknownToUTF8(strLinkBase);
145 g_charsetConverter.utf8ToW(strLinkBase, wLink, false);
146 HTML::CHTMLUtil::ConvertHTMLToW(wLink, wConverted);
147 g_charsetConverter.wToUTF8(wConverted, strLinkBase);
149 // encoding + and ; to URL encode if it is not already encoded by http server used on the remote server (example: Apache)
150 // more characters may be added here when required when required by certain http servers
151 pos = strLinkBase.find_first_of("+;");
152 while (pos != std::string::npos)
154 std::stringstream convert;
155 convert << '%' << std::hex << int(strLinkBase.at(pos));
156 strLinkBase.replace(pos, 1, convert.str());
157 pos = strLinkBase.find_first_of("+;");
160 std::string strLinkTemp = strLinkBase;
162 URIUtils::RemoveSlashAtEnd(strLinkTemp);
163 strLinkTemp = CURL::Decode(strLinkTemp);
165 if (StringUtils::EndsWith(strNameTemp, "..>") &&
166 StringUtils::StartsWith(strLinkTemp, strNameTemp.substr(0, strNameTemp.length() - 3)))
167 strName = strNameTemp = strLinkTemp;
169 /* Per RFC 1808 ยง 5.3, relative paths containing a colon ":" should be either prefixed with
170 * "./" or escaped (as "%3A"). This handles the prefix case, the escaping should be handled by
171 * the CURL::Decode above
172 * - https://tools.ietf.org/html/rfc1808#section-5.3
174 auto NameMatchesLink([](const std::string& name, const std::string& link) -> bool
176 return (name == link) ||
177 ((std::string::npos != name.find(':')) && (std::string{"./"}.append(name) == link));
180 // we detect http directory items by its display name and its stripped link
181 // if same, we consider it as a valid item.
182 if (strLinkTemp != ".." && strLinkTemp != "" && NameMatchesLink(strNameTemp, strLinkTemp))
184 CFileItemPtr pItem(new CFileItem(strNameTemp));
185 pItem->SetProperty("IsHTTPDirectory", true);
186 CURL url2(url);
188 url2.SetFileName(strBasePath + strLinkBase);
189 url2.SetOptions(strLinkOptions);
190 pItem->SetURL(url2);
192 if(URIUtils::HasSlashAtEnd(pItem->GetPath(), true))
193 pItem->m_bIsFolder = true;
195 std::string day, month, year, hour, minute;
196 int monthNum = 0;
198 if (reDateTimeHtml.RegFind(strMetadata.c_str()) >= 0)
200 day = reDateTimeHtml.GetMatch(1);
201 month = reDateTimeHtml.GetMatch(2);
202 year = reDateTimeHtml.GetMatch(3);
203 hour = reDateTimeHtml.GetMatch(4);
204 minute = reDateTimeHtml.GetMatch(5);
206 else if (reDateTimeNginxFancy.RegFind(strMetadata.c_str()) >= 0)
208 day = reDateTimeNginxFancy.GetMatch(3);
209 month = reDateTimeNginxFancy.GetMatch(2);
210 year = reDateTimeNginxFancy.GetMatch(1);
211 hour = reDateTimeNginxFancy.GetMatch(4);
212 minute = reDateTimeNginxFancy.GetMatch(5);
214 else if (reDateTimeNginx.RegFind(strMetadata.c_str()) >= 0)
216 day = reDateTimeNginx.GetMatch(1);
217 month = reDateTimeNginx.GetMatch(2);
218 year = reDateTimeNginx.GetMatch(3);
219 hour = reDateTimeNginx.GetMatch(4);
220 minute = reDateTimeNginx.GetMatch(5);
222 else if (reDateTimeLighttp.RegFind(strMetadata.c_str()) >= 0)
224 day = reDateTimeLighttp.GetMatch(3);
225 month = reDateTimeLighttp.GetMatch(2);
226 year = reDateTimeLighttp.GetMatch(1);
227 hour = reDateTimeLighttp.GetMatch(4);
228 minute = reDateTimeLighttp.GetMatch(5);
230 else if (reDateTimeApacheNewFormat.RegFind(strMetadata.c_str()) >= 0)
232 day = reDateTimeApacheNewFormat.GetMatch(3);
233 monthNum = atoi(reDateTimeApacheNewFormat.GetMatch(2).c_str());
234 year = reDateTimeApacheNewFormat.GetMatch(1);
235 hour = reDateTimeApacheNewFormat.GetMatch(4);
236 minute = reDateTimeApacheNewFormat.GetMatch(5);
238 else if (reDateTime.RegFind(strMetadata.c_str()) >= 0)
240 day = reDateTime.GetMatch(3);
241 monthNum = atoi(reDateTime.GetMatch(2).c_str());
242 year = reDateTime.GetMatch(1);
243 hour = reDateTime.GetMatch(4);
244 minute = reDateTime.GetMatch(5);
247 if (month.length() > 0)
248 monthNum = CDateTime::MonthStringToMonthNum(month);
250 if (day.length() > 0 && monthNum > 0 && year.length() > 0)
252 pItem->m_dateTime = CDateTime(atoi(year.c_str()), monthNum, atoi(day.c_str()), atoi(hour.c_str()), atoi(minute.c_str()), 0);
255 if (!pItem->m_bIsFolder)
257 if (reSizeHtml.RegFind(strMetadata.c_str()) >= 0)
259 double Size = atof(reSizeHtml.GetMatch(1).c_str());
260 std::string strUnit(reSizeHtml.GetMatch(2));
262 if (strUnit == "K")
263 Size = Size * 1024;
264 else if (strUnit == "M")
265 Size = Size * 1024 * 1024;
266 else if (strUnit == "G")
267 Size = Size * 1024 * 1024 * 1024;
269 pItem->m_dwSize = (int64_t)Size;
271 else if (reSize.RegFind(strMetadata.c_str()) >= 0)
273 double Size = atof(reSize.GetMatch(1).c_str());
274 std::string strUnit(reSize.GetMatch(2));
276 if (strUnit == "K")
277 Size = Size * 1024;
278 else if (strUnit == "M")
279 Size = Size * 1024 * 1024;
280 else if (strUnit == "G")
281 Size = Size * 1024 * 1024 * 1024;
283 pItem->m_dwSize = (int64_t)Size;
285 else
286 if (CServiceBroker::GetSettingsComponent()->GetAdvancedSettings()->m_bHTTPDirectoryStatFilesize) // As a fallback get the size by stat-ing the file (slow)
288 CCurlFile file;
289 file.Open(url);
290 pItem->m_dwSize=file.GetLength();
291 file.Close();
294 items.Add(pItem);
298 http.Close();
300 items.SetProperty("IsHTTPDirectory", true);
302 return true;
305 bool CHTTPDirectory::Exists(const CURL &url)
307 CCurlFile http;
308 struct __stat64 buffer;
310 if( http.Stat(url, &buffer) != 0 )
312 return false;
315 if (buffer.st_mode == _S_IFDIR)
316 return true;
318 return false;