[PVR][Estuary] Timer settings dialog: Show client name in timer type selection dialog...
[xbmc.git] / xbmc / filesystem / HTTPDirectory.cpp
blob0097b7f15ca8157a1acbaeb158824c1e8cc0ec02
1 /*
2 * Copyright (C) 2005-2018 Team Kodi
3 * This file is part of Kodi - https://kodi.tv
5 * SPDX-License-Identifier: GPL-2.0-or-later
6 * See LICENSES/README.md for more information.
7 */
9 #include "HTTPDirectory.h"
11 #include "CurlFile.h"
12 #include "FileItem.h"
13 #include "ServiceBroker.h"
14 #include "URL.h"
15 #include "settings/AdvancedSettings.h"
16 #include "settings/SettingsComponent.h"
17 #include "utils/CharsetConverter.h"
18 #include "utils/HTMLUtil.h"
19 #include "utils/RegExp.h"
20 #include "utils/StringUtils.h"
21 #include "utils/URIUtils.h"
22 #include "utils/log.h"
24 #include <climits>
26 using namespace XFILE;
28 CHTTPDirectory::CHTTPDirectory(void) = default;
29 CHTTPDirectory::~CHTTPDirectory(void) = default;
31 bool CHTTPDirectory::GetDirectory(const CURL& url, CFileItemList &items)
33 CCurlFile http;
35 const std::string& strBasePath = url.GetFileName();
37 if(!http.Open(url))
39 CLog::Log(LOGERROR, "{} - Unable to get http directory ({})", __FUNCTION__, url.GetRedacted());
40 return false;
43 CRegExp reItem(true); // HTML is case-insensitive
44 reItem.RegComp("<a href=\"([^\"]*)\"[^>]*>\\s*(.*?)\\s*</a>(.+?)(?=<a|</tr|$)");
46 CRegExp reDateTimeHtml(true);
47 reDateTimeHtml.RegComp(
48 "<td align=\"right\">([0-9]{2})-([A-Z]{3})-([0-9]{4}) ([0-9]{2}):([0-9]{2}) +</td>");
50 CRegExp reDateTimeLighttp(true);
51 reDateTimeLighttp.RegComp(
52 "<td class=\"m\">([0-9]{4})-([A-Z]{3})-([0-9]{2}) ([0-9]{2}):([0-9]{2}):([0-9]{2})</td>");
54 CRegExp reDateTimeNginx(true);
55 reDateTimeNginx.RegComp("([0-9]{2})-([A-Z]{3})-([0-9]{4}) ([0-9]{2}):([0-9]{2})");
57 CRegExp reDateTimeNginxFancy(true);
58 reDateTimeNginxFancy.RegComp(
59 "<td class=\"date\">([0-9]{4})-([A-Z]{3})-([0-9]{2}) ([0-9]{2}):([0-9]{2})</td>");
61 CRegExp reDateTimeApacheNewFormat(true);
62 reDateTimeApacheNewFormat.RegComp(
63 "<td align=\"right\">([0-9]{4})-([0-9]{2})-([0-9]{2}) ([0-9]{2}):([0-9]{2}) +</td>");
65 CRegExp reDateTime(true);
66 reDateTime.RegComp("([0-9]{4})-([0-9]{2})-([0-9]{2}) ([0-9]{2}):([0-9]{2})");
68 CRegExp reSizeHtml(true);
69 reSizeHtml.RegComp("> *([0-9.]+) *(B|K|M|G| )(iB)?</td>");
71 CRegExp reSize(true);
72 reSize.RegComp(" +([0-9]+)(B|K|M|G)?(?=\\s|<|$)");
74 /* read response from server into string buffer */
75 std::string strBuffer;
76 if (http.ReadData(strBuffer) && strBuffer.length() > 0)
78 /* if Content-Length is found and its not text/html, URL is pointing to file so don't treat URL as HTTPDirectory */
79 if (!http.GetHttpHeader().GetValue("Content-Length").empty() &&
80 !StringUtils::StartsWithNoCase(http.GetHttpHeader().GetValue("Content-type"), "text/html"))
82 return false;
85 std::string fileCharset(http.GetProperty(XFILE::FILE_PROPERTY_CONTENT_CHARSET));
86 if (!fileCharset.empty() && fileCharset != "UTF-8")
88 std::string converted;
89 if (g_charsetConverter.ToUtf8(fileCharset, strBuffer, converted) && !converted.empty())
90 strBuffer = converted;
93 unsigned int bufferOffset = 0;
94 while (bufferOffset < strBuffer.length())
96 int matchOffset = reItem.RegFind(strBuffer.c_str(), bufferOffset);
97 if (matchOffset < 0)
98 break;
100 bufferOffset = matchOffset + reItem.GetSubLength(0);
102 std::string strLink = reItem.GetMatch(1);
103 std::string strName = reItem.GetMatch(2);
104 std::string strMetadata = reItem.GetMatch(3);
105 StringUtils::Trim(strMetadata);
107 if(strLink[0] == '/')
108 strLink = strLink.substr(1);
110 std::string strNameTemp = StringUtils::Trim(strName);
112 std::wstring wName, wLink, wConverted;
113 if (fileCharset.empty())
114 g_charsetConverter.unknownToUTF8(strNameTemp);
115 g_charsetConverter.utf8ToW(strNameTemp, wName, false);
116 HTML::CHTMLUtil::ConvertHTMLToW(wName, wConverted);
117 g_charsetConverter.wToUTF8(wConverted, strNameTemp);
118 URIUtils::RemoveSlashAtEnd(strNameTemp);
120 std::string strLinkBase = strLink;
121 std::string strLinkOptions;
123 // split link with url options
124 size_t pos = strLinkBase.find('?');
125 if (pos != std::string::npos)
127 strLinkOptions = strLinkBase.substr(pos);
128 strLinkBase.erase(pos);
131 // strip url fragment from the link
132 pos = strLinkBase.find('#');
133 if (pos != std::string::npos)
135 strLinkBase.erase(pos);
138 // Convert any HTTP character entities (e.g.: "&amp;") to percentage encoding
139 // (e.g.: "%xx") as some web servers (Apache) put these in HTTP Directory Indexes
140 // this is also needed as CURL objects interpret them incorrectly due to the ;
141 // also being allowed as URL option separator
142 if (fileCharset.empty())
143 g_charsetConverter.unknownToUTF8(strLinkBase);
144 g_charsetConverter.utf8ToW(strLinkBase, wLink, false);
145 HTML::CHTMLUtil::ConvertHTMLToW(wLink, wConverted);
146 g_charsetConverter.wToUTF8(wConverted, strLinkBase);
148 // encoding + and ; to URL encode if it is not already encoded by http server used on the remote server (example: Apache)
149 // more characters may be added here when required when required by certain http servers
150 pos = strLinkBase.find_first_of("+;");
151 while (pos != std::string::npos)
153 std::stringstream convert;
154 convert << '%' << std::hex << int(strLinkBase.at(pos));
155 strLinkBase.replace(pos, 1, convert.str());
156 pos = strLinkBase.find_first_of("+;");
159 std::string strLinkTemp = strLinkBase;
161 URIUtils::RemoveSlashAtEnd(strLinkTemp);
162 strLinkTemp = CURL::Decode(strLinkTemp);
164 if (StringUtils::EndsWith(strNameTemp, "..>") &&
165 StringUtils::StartsWith(strLinkTemp, strNameTemp.substr(0, strNameTemp.length() - 3)))
166 strName = strNameTemp = strLinkTemp;
168 /* Per RFC 1808 ยง 5.3, relative paths containing a colon ":" should be either prefixed with
169 * "./" or escaped (as "%3A"). This handles the prefix case, the escaping should be handled by
170 * the CURL::Decode above
171 * - https://tools.ietf.org/html/rfc1808#section-5.3
173 auto NameMatchesLink([](const std::string& name, const std::string& link) -> bool
175 return (name == link) ||
176 ((std::string::npos != name.find(':')) && (std::string{"./"}.append(name) == link));
179 // we detect http directory items by its display name and its stripped link
180 // if same, we consider it as a valid item.
181 if (strLinkTemp != ".." && strLinkTemp != "" && NameMatchesLink(strNameTemp, strLinkTemp))
183 CFileItemPtr pItem(new CFileItem(strNameTemp));
184 pItem->SetProperty("IsHTTPDirectory", true);
185 CURL url2(url);
187 url2.SetFileName(strBasePath + strLinkBase);
188 url2.SetOptions(strLinkOptions);
189 pItem->SetURL(url2);
191 if(URIUtils::HasSlashAtEnd(pItem->GetPath(), true))
192 pItem->m_bIsFolder = true;
194 std::string day, month, year, hour, minute;
195 int monthNum = 0;
197 if (reDateTimeHtml.RegFind(strMetadata.c_str()) >= 0)
199 day = reDateTimeHtml.GetMatch(1);
200 month = reDateTimeHtml.GetMatch(2);
201 year = reDateTimeHtml.GetMatch(3);
202 hour = reDateTimeHtml.GetMatch(4);
203 minute = reDateTimeHtml.GetMatch(5);
205 else if (reDateTimeNginxFancy.RegFind(strMetadata.c_str()) >= 0)
207 day = reDateTimeNginxFancy.GetMatch(3);
208 month = reDateTimeNginxFancy.GetMatch(2);
209 year = reDateTimeNginxFancy.GetMatch(1);
210 hour = reDateTimeNginxFancy.GetMatch(4);
211 minute = reDateTimeNginxFancy.GetMatch(5);
213 else if (reDateTimeNginx.RegFind(strMetadata.c_str()) >= 0)
215 day = reDateTimeNginx.GetMatch(1);
216 month = reDateTimeNginx.GetMatch(2);
217 year = reDateTimeNginx.GetMatch(3);
218 hour = reDateTimeNginx.GetMatch(4);
219 minute = reDateTimeNginx.GetMatch(5);
221 else if (reDateTimeLighttp.RegFind(strMetadata.c_str()) >= 0)
223 day = reDateTimeLighttp.GetMatch(3);
224 month = reDateTimeLighttp.GetMatch(2);
225 year = reDateTimeLighttp.GetMatch(1);
226 hour = reDateTimeLighttp.GetMatch(4);
227 minute = reDateTimeLighttp.GetMatch(5);
229 else if (reDateTimeApacheNewFormat.RegFind(strMetadata.c_str()) >= 0)
231 day = reDateTimeApacheNewFormat.GetMatch(3);
232 monthNum = atoi(reDateTimeApacheNewFormat.GetMatch(2).c_str());
233 year = reDateTimeApacheNewFormat.GetMatch(1);
234 hour = reDateTimeApacheNewFormat.GetMatch(4);
235 minute = reDateTimeApacheNewFormat.GetMatch(5);
237 else if (reDateTime.RegFind(strMetadata.c_str()) >= 0)
239 day = reDateTime.GetMatch(3);
240 monthNum = atoi(reDateTime.GetMatch(2).c_str());
241 year = reDateTime.GetMatch(1);
242 hour = reDateTime.GetMatch(4);
243 minute = reDateTime.GetMatch(5);
246 if (month.length() > 0)
247 monthNum = CDateTime::MonthStringToMonthNum(month);
249 if (day.length() > 0 && monthNum > 0 && year.length() > 0)
251 pItem->m_dateTime = CDateTime(atoi(year.c_str()), monthNum, atoi(day.c_str()), atoi(hour.c_str()), atoi(minute.c_str()), 0);
254 if (!pItem->m_bIsFolder)
256 if (reSizeHtml.RegFind(strMetadata.c_str()) >= 0)
258 double Size = atof(reSizeHtml.GetMatch(1).c_str());
259 std::string strUnit(reSizeHtml.GetMatch(2));
261 if (strUnit == "K")
262 Size = Size * 1024;
263 else if (strUnit == "M")
264 Size = Size * 1024 * 1024;
265 else if (strUnit == "G")
266 Size = Size * 1024 * 1024 * 1024;
268 pItem->m_dwSize = (int64_t)Size;
270 else if (reSize.RegFind(strMetadata.c_str()) >= 0)
272 double Size = atof(reSize.GetMatch(1).c_str());
273 std::string strUnit(reSize.GetMatch(2));
275 if (strUnit == "K")
276 Size = Size * 1024;
277 else if (strUnit == "M")
278 Size = Size * 1024 * 1024;
279 else if (strUnit == "G")
280 Size = Size * 1024 * 1024 * 1024;
282 pItem->m_dwSize = (int64_t)Size;
284 else
285 if (CServiceBroker::GetSettingsComponent()->GetAdvancedSettings()->m_bHTTPDirectoryStatFilesize) // As a fallback get the size by stat-ing the file (slow)
287 CCurlFile file;
288 file.Open(url);
289 pItem->m_dwSize=file.GetLength();
290 file.Close();
293 items.Add(pItem);
297 http.Close();
299 items.SetProperty("IsHTTPDirectory", true);
301 return true;
304 bool CHTTPDirectory::Exists(const CURL &url)
306 CCurlFile http;
307 struct __stat64 buffer;
309 if( http.Stat(url, &buffer) != 0 )
311 return false;
314 if (buffer.st_mode == _S_IFDIR)
315 return true;
317 return false;