[PVR][Estuary] Timer settings dialog: Show client name in timer type selection dialog...
[xbmc.git] / xbmc / utils / XBMCTinyXML.cpp
blob612ddf21118809fb1ad78ea9942c54cc73a4c451
1 /*
2 * Copyright (C) 2005-2018 Team Kodi
3 * This file is part of Kodi - https://kodi.tv
5 * SPDX-License-Identifier: GPL-2.0-or-later
6 * See LICENSES/README.md for more information.
7 */
9 #include "XBMCTinyXML.h"
11 #include "LangInfo.h"
12 #include "RegExp.h"
13 #include "filesystem/File.h"
14 #include "utils/CharsetConverter.h"
15 #include "utils/CharsetDetection.h"
16 #include "utils/StringUtils.h"
17 #include "utils/Utf8Utils.h"
18 #include "utils/log.h"
20 #define MAX_ENTITY_LENGTH 8 // size of largest entity "&#xNNNN;"
21 #define BUFFER_SIZE 4096
23 CXBMCTinyXML::CXBMCTinyXML()
24 : TiXmlDocument()
28 CXBMCTinyXML::CXBMCTinyXML(const char *documentName)
29 : TiXmlDocument(documentName)
33 CXBMCTinyXML::CXBMCTinyXML(const std::string& documentName)
34 : TiXmlDocument(documentName)
38 CXBMCTinyXML::CXBMCTinyXML(const std::string& documentName, const std::string& documentCharset)
39 : TiXmlDocument(documentName), m_SuggestedCharset(documentCharset)
41 StringUtils::ToUpper(m_SuggestedCharset);
44 bool CXBMCTinyXML::LoadFile(TiXmlEncoding encoding)
46 return LoadFile(value, encoding);
49 bool CXBMCTinyXML::LoadFile(const char *_filename, TiXmlEncoding encoding)
51 return LoadFile(std::string(_filename), encoding);
54 bool CXBMCTinyXML::LoadFile(const std::string& _filename, TiXmlEncoding encoding)
56 value = _filename.c_str();
58 XFILE::CFile file;
59 std::vector<uint8_t> buffer;
61 if (file.LoadFile(value, buffer) <= 0)
63 SetError(TIXML_ERROR_OPENING_FILE, NULL, NULL, TIXML_ENCODING_UNKNOWN);
64 return false;
67 // Delete the existing data:
68 Clear();
69 location.Clear();
71 std::string data(reinterpret_cast<char*>(buffer.data()), buffer.size());
72 buffer.clear(); // free memory early
74 if (encoding == TIXML_ENCODING_UNKNOWN)
75 Parse(data, file.GetProperty(XFILE::FILE_PROPERTY_CONTENT_CHARSET));
76 else
77 Parse(data, encoding);
79 if (Error())
80 return false;
81 return true;
84 bool CXBMCTinyXML::LoadFile(const std::string& _filename, const std::string& documentCharset)
86 m_SuggestedCharset = documentCharset;
87 StringUtils::ToUpper(m_SuggestedCharset);
88 return LoadFile(_filename, TIXML_ENCODING_UNKNOWN);
91 bool CXBMCTinyXML::LoadFile(FILE *f, TiXmlEncoding encoding)
93 std::string data;
94 char buf[BUFFER_SIZE] = {};
95 int result;
96 while ((result = fread(buf, 1, BUFFER_SIZE, f)) > 0)
97 data.append(buf, result);
98 return Parse(data, encoding);
101 bool CXBMCTinyXML::SaveFile(const char *_filename) const
103 return SaveFile(std::string(_filename));
106 bool CXBMCTinyXML::SaveFile(const std::string& filename) const
108 XFILE::CFile file;
109 if (file.OpenForWrite(filename, true))
111 TiXmlPrinter printer;
112 Accept(&printer);
113 bool suc = file.Write(printer.CStr(), printer.Size()) == static_cast<ssize_t>(printer.Size());
114 if (suc)
115 file.Flush();
117 return suc;
119 return false;
122 bool CXBMCTinyXML::Parse(const std::string& data, const std::string& dataCharset)
124 m_SuggestedCharset = dataCharset;
125 StringUtils::ToUpper(m_SuggestedCharset);
126 return Parse(data, TIXML_ENCODING_UNKNOWN);
129 bool CXBMCTinyXML::Parse(const std::string& data, TiXmlEncoding encoding /*= TIXML_DEFAULT_ENCODING */)
131 m_UsedCharset.clear();
132 if (encoding != TIXML_ENCODING_UNKNOWN)
133 { // encoding != TIXML_ENCODING_UNKNOWN means "do not use m_SuggestedCharset and charset detection"
134 m_SuggestedCharset.clear();
135 if (encoding == TIXML_ENCODING_UTF8)
136 m_UsedCharset = "UTF-8";
138 return InternalParse(data, encoding);
141 if (!m_SuggestedCharset.empty() && TryParse(data, m_SuggestedCharset))
142 return true;
144 std::string detectedCharset;
145 if (CCharsetDetection::DetectXmlEncoding(data, detectedCharset) && TryParse(data, detectedCharset))
147 if (!m_SuggestedCharset.empty())
148 CLog::Log(LOGWARNING,
149 "{}: \"{}\" charset was used instead of suggested charset \"{}\" for {}",
150 __FUNCTION__, m_UsedCharset, m_SuggestedCharset,
151 (value.empty() ? "XML data" : ("file \"" + value + "\"")));
153 return true;
156 // check for valid UTF-8
157 if (m_SuggestedCharset != "UTF-8" && detectedCharset != "UTF-8" && CUtf8Utils::isValidUtf8(data) &&
158 TryParse(data, "UTF-8"))
160 if (!m_SuggestedCharset.empty())
161 CLog::Log(LOGWARNING,
162 "{}: \"{}\" charset was used instead of suggested charset \"{}\" for {}",
163 __FUNCTION__, m_UsedCharset, m_SuggestedCharset,
164 (value.empty() ? "XML data" : ("file \"" + value + "\"")));
165 else if (!detectedCharset.empty())
166 CLog::Log(LOGWARNING, "{}: \"{}\" charset was used instead of detected charset \"{}\" for {}",
167 __FUNCTION__, m_UsedCharset, detectedCharset,
168 (value.empty() ? "XML data" : ("file \"" + value + "\"")));
169 return true;
172 // fallback: try user GUI charset
173 if (TryParse(data, g_langInfo.GetGuiCharSet()))
175 if (!m_SuggestedCharset.empty())
176 CLog::Log(LOGWARNING,
177 "{}: \"{}\" charset was used instead of suggested charset \"{}\" for {}",
178 __FUNCTION__, m_UsedCharset, m_SuggestedCharset,
179 (value.empty() ? "XML data" : ("file \"" + value + "\"")));
180 else if (!detectedCharset.empty())
181 CLog::Log(LOGWARNING, "{}: \"{}\" charset was used instead of detected charset \"{}\" for {}",
182 __FUNCTION__, m_UsedCharset, detectedCharset,
183 (value.empty() ? "XML data" : ("file \"" + value + "\"")));
184 return true;
187 // can't detect correct data charset, try to process data as is
188 if (InternalParse(data, TIXML_ENCODING_UNKNOWN))
190 if (!m_SuggestedCharset.empty())
191 CLog::Log(LOGWARNING, "{}: Processed {} as unknown encoding instead of suggested \"{}\"",
192 __FUNCTION__, (value.empty() ? "XML data" : ("file \"" + value + "\"")),
193 m_SuggestedCharset);
194 else if (!detectedCharset.empty())
195 CLog::Log(LOGWARNING, "{}: Processed {} as unknown encoding instead of detected \"{}\"",
196 __FUNCTION__, (value.empty() ? "XML data" : ("file \"" + value + "\"")),
197 detectedCharset);
198 return true;
201 return false;
204 bool CXBMCTinyXML::TryParse(const std::string& data, const std::string& tryDataCharset)
206 if (tryDataCharset == "UTF-8")
207 InternalParse(data, TIXML_ENCODING_UTF8); // process data without conversion
208 else if (!tryDataCharset.empty())
210 std::string converted;
211 /* some wrong conversions can leave US-ASCII XML header and structure untouched but break non-English data
212 * so conversion must fail on wrong character and then other encodings will be tried */
213 if (!g_charsetConverter.ToUtf8(tryDataCharset, data, converted, true) || converted.empty())
214 return false; // can't convert data
216 InternalParse(converted, TIXML_ENCODING_UTF8);
218 else
219 InternalParse(data, TIXML_ENCODING_LEGACY);
221 // 'Error()' contains result of last run of 'TiXmlDocument::Parse()'
222 if (Error())
224 Clear();
225 location.Clear();
227 return false;
230 m_UsedCharset = tryDataCharset;
231 return true;
234 bool CXBMCTinyXML::InternalParse(const std::string& rawdata, TiXmlEncoding encoding /*= TIXML_DEFAULT_ENCODING */)
236 // Preprocess string, replacing '&' with '&amp; for invalid XML entities
237 size_t pos = rawdata.find('&');
238 if (pos == std::string::npos)
239 return (TiXmlDocument::Parse(rawdata.c_str(), NULL, encoding) != NULL); // nothing to fix, process data directly
241 std::string data(rawdata);
242 CRegExp re(false, CRegExp::asciiOnly, "^&(amp|lt|gt|quot|apos|#x[a-fA-F0-9]{1,4}|#[0-9]{1,5});.*");
245 if (re.RegFind(data, pos, MAX_ENTITY_LENGTH) < 0)
246 data.insert(pos + 1, "amp;");
247 pos = data.find('&', pos + 1);
248 } while (pos != std::string::npos);
250 return (TiXmlDocument::Parse(data.c_str(), NULL, encoding) != NULL);
253 bool CXBMCTinyXML::Test()
255 // scraper results with unescaped &
256 CXBMCTinyXML doc;
257 std::string data("<details><url function=\"ParseTMDBRating\" "
258 "cache=\"tmdb-en-12244.json\">"
259 "http://api.themoviedb.org/3/movie/12244"
260 "?api_key=57983e31fb435df4df77afb854740ea9"
261 "&language=en&#x3f;&#x003F;&#0063;</url></details>");
262 doc.Parse(data, TIXML_DEFAULT_ENCODING);
263 TiXmlNode *root = doc.RootElement();
264 if (root && root->ValueStr() == "details")
266 TiXmlElement *url = root->FirstChildElement("url");
267 if (url && url->FirstChild())
269 return (url->FirstChild()->ValueStr() == "http://api.themoviedb.org/3/movie/12244?api_key=57983e31fb435df4df77afb854740ea9&language=en???");
272 return false;