changed: update version strings for beta4
[xbmc.git] / xbmc / utils / HTMLUtil.cpp
blobfe11f4174bfc16d7b752a2a84c958204a899b2ee
1 /*
2 * Copyright (C) 2005-2008 Team XBMC
3 * http://www.xbmc.org
5 * This Program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2, or (at your option)
8 * any later version.
10 * This Program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
15 * You should have received a copy of the GNU General Public License
16 * along with XBMC; see the file COPYING. If not, write to
17 * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
18 * http://www.gnu.org/copyleft/gpl.html
22 #include "CharsetConverter.h"
23 #include "HTMLUtil.h"
25 using namespace std;
26 using namespace HTML;
29 CHTMLUtil::CHTMLUtil(void)
32 CHTMLUtil::~CHTMLUtil(void)
35 int CHTMLUtil::FindTag(const CStdString& strHTML, const CStdString& strTag, CStdString& strtagFound, int iPos) const
37 CStdString strHTMLLow = strHTML;
38 CStdString strTagLow = strTag;
39 strHTMLLow.ToLower();
40 strTagLow.ToLower();
41 strtagFound = "";
42 int iStart = strHTMLLow.Find(strTag, iPos);
43 if (iStart < 0) return -1;
44 int iEnd = strHTMLLow.Find(">", iStart);
45 if (iEnd < 0) iEnd = (int)strHTMLLow.size();
46 strtagFound = strHTMLLow.Mid(iStart, (iEnd + 1) - iStart);
47 return iStart;
50 int CHTMLUtil::FindClosingTag(const CStdString& strHTML, const CStdString& strTag, CStdString& strtagFound, int iPos) const
52 CStdString strHTMLLow = strHTML;
53 CStdString strTagLow = strTag;
54 strHTMLLow.ToLower();
55 strTagLow.ToLower();
56 strtagFound = "";
57 int iStart = strHTMLLow.Find("</" + strTag, iPos);
58 if (iStart < 0) return -1;
59 int iOpenStart = strHTMLLow.Find("<" + strTag, iPos);
60 while (iOpenStart < iStart && iOpenStart != -1)
62 iStart = strHTMLLow.Find("</" + strTag, iStart + 1);
63 iOpenStart = strHTMLLow.Find("<" + strTag, iOpenStart + 1);
66 int iEnd = strHTMLLow.Find(">", iStart);
67 if (iEnd < 0) iEnd = (int)strHTMLLow.size();
68 strtagFound = strHTMLLow.Mid(iStart, (iEnd + 1) - iStart);
69 return iStart;
72 void CHTMLUtil::getValueOfTag(const CStdString& strTagAndValue, CStdString& strValue)
74 // strTagAndValue contains:
75 // like <a href=blablabla.....>value</a>
76 strValue = strTagAndValue;
77 int iStart = strTagAndValue.Find(">");
78 int iEnd = strTagAndValue.Find("<", iStart + 1);
79 if (iStart >= 0 && iEnd >= 0)
81 iStart++;
82 strValue = strTagAndValue.Mid(iStart, iEnd - iStart);
86 void CHTMLUtil::getAttributeOfTag(const CStdString& strTagAndValue, const CStdString& strTag, CStdString& strValue)
88 // strTagAndValue contains:
89 // like <a href=""value".....
90 strValue = strTagAndValue;
91 int iStart = strTagAndValue.Find(strTag);
92 if (iStart < 0) return ;
93 iStart += (int)strTag.size();
94 while (strTagAndValue[iStart + 1] == 0x20 || strTagAndValue[iStart + 1] == 0x27 || strTagAndValue[iStart + 1] == 34) iStart++;
95 int iEnd = iStart + 1;
96 while (strTagAndValue[iEnd] != 0x27 && strTagAndValue[iEnd] != 0x20 && strTagAndValue[iEnd] != 34 && strTagAndValue[iEnd] != '>') iEnd++;
97 if (iStart >= 0 && iEnd >= 0)
99 strValue = strTagAndValue.Mid(iStart, iEnd - iStart);
103 void CHTMLUtil::RemoveTags(CStdString& strHTML)
105 int iNested = 0;
106 CStdString strReturn = "";
107 for (int i = 0; i < (int) strHTML.size(); ++i)
109 if (strHTML[i] == '<') iNested++;
110 else if (strHTML[i] == '>') iNested--;
111 else
113 if (!iNested)
115 strReturn += strHTML[i];
120 strHTML = strReturn;
123 typedef struct
125 const wchar_t* html;
126 const wchar_t w;
127 } HTMLMapping;
129 static const HTMLMapping mappings[] =
130 {{L"&amp;", 0x0026},
131 {L"&apos;", 0x0027},
132 {L"&acute;", 0x00B4},
133 {L"&agrave;", 0x00E0},
134 {L"&aacute;", 0x00E1},
135 {L"&acirc;", 0x00E2},
136 {L"&atilde;", 0x00E3},
137 {L"&auml;", 0x00E4},
138 {L"&aring;", 0x00E5},
139 {L"&aelig;", 0x00E6},
140 {L"&Agrave;", 0x00C0},
141 {L"&Aacute;", 0x00C1},
142 {L"&Acirc;", 0x00C2},
143 {L"&Atilde;", 0x00C3},
144 {L"&Auml;", 0x00C4},
145 {L"&Aring;", 0x00C5},
146 {L"&AElig;", 0x00C6},
147 {L"&bdquo;", 0x201E},
148 {L"&brvbar;", 0x00A6},
149 {L"&bull;", 0x2022},
150 {L"&bullet;", 0x2022},
151 {L"&cent;", 0x00A2},
152 {L"&circ;", 0x02C6},
153 {L"&curren;", 0x00A4},
154 {L"&copy;", 0x00A9},
155 {L"&cedil;", 0x00B8},
156 {L"&Ccedil;", 0x00C7},
157 {L"&ccedil;", 0x00E7},
158 {L"&dagger;", 0x2020},
159 {L"&deg;", 0x00B0},
160 {L"&divide;", 0x00F7},
161 {L"&Dagger;", 0x2021},
162 {L"&egrave;", 0x00E8},
163 {L"&eacute;", 0x00E9},
164 {L"&ecirc;", 0x00EA},
165 {L"&emsp;", 0x2003},
166 {L"&ensp;", 0x2002},
167 {L"&euml;", 0x00EB},
168 {L"&eth;", 0x00F0},
169 {L"&euro;", 0x20AC},
170 {L"&Egrave;", 0x00C8},
171 {L"&Eacute;", 0x00C9},
172 {L"&Ecirc;", 0x00CA},
173 {L"&Euml;", 0x00CB},
174 {L"&ETH;", 0x00D0},
175 {L"&quot;", 0x0022},
176 {L"&frasl;", 0x2044},
177 {L"&frac14;", 0x00BC},
178 {L"&frac12;", 0x00BD},
179 {L"&frac34;", 0x00BE},
180 {L"&gt;", 0x003E},
181 {L"&hellip;", 0x2026},
182 {L"&iexcl;", 0x00A1},
183 {L"&iquest;", 0x00BF},
184 {L"&igrave;", 0x00EC},
185 {L"&iacute;", 0x00ED},
186 {L"&icirc;", 0x00EE},
187 {L"&iuml;", 0x00EF},
188 {L"&Igrave;", 0x00CC},
189 {L"&Iacute;", 0x00CD},
190 {L"&Icirc;", 0x00CE},
191 {L"&Iuml;", 0x00CF},
192 {L"&lrm;", 0x200E},
193 {L"&lt;", 0x003C},
194 {L"&laquo;", 0x00AB},
195 {L"&ldquo;", 0x201C},
196 {L"&lsaquo;", 0x2039},
197 {L"&lsquo;", 0x2018},
198 {L"&macr;", 0x00AF},
199 {L"&micro;", 0x00B5},
200 {L"&middot;", 0x00B7},
201 {L"&mdash;", 0x2014},
202 {L"&nbsp;", 0x00A0},
203 {L"&ndash;", 0x2013},
204 {L"&ntilde;", 0x00F1},
205 {L"&not;", 0x00AC},
206 {L"&Ntilde;", 0x00D1},
207 {L"&ordf;", 0x00AA},
208 {L"&ordm;", 0x00BA},
209 {L"&oelig;", 0x0153},
210 {L"&ograve;", 0x00F2},
211 {L"&oacute;", 0x00F3},
212 {L"&ocirc;", 0x00F4},
213 {L"&otilde;", 0x00F5},
214 {L"&ouml;", 0x00F6},
215 {L"&oslash;", 0x00F8},
216 {L"&OElig;", 0x0152},
217 {L"&Ograve;", 0x00D2},
218 {L"&Oacute;", 0x00D3},
219 {L"&Ocirc;", 0x00D4},
220 {L"&Otilde;", 0x00D5},
221 {L"&Ouml;", 0x00D6},
222 {L"&Oslash;", 0x00D8},
223 {L"&para;", 0x00B6},
224 {L"&permil;", 0x2030},
225 {L"&plusmn;", 0x00B1},
226 {L"&pound;", 0x00A3},
227 {L"&raquo;", 0x00BB},
228 {L"&rdquo;", 0x201D},
229 {L"&reg;", 0x00AE},
230 {L"&rlm;", 0x200F},
231 {L"&rsaquo;", 0x203A},
232 {L"&rsquo;", 0x2019},
233 {L"&sbquo;", 0x201A},
234 {L"&scaron;", 0x0161},
235 {L"&sect;", 0x00A7},
236 {L"&shy;", 0x00AD},
237 {L"&sup1;", 0x00B9},
238 {L"&sup2;", 0x00B2},
239 {L"&sup3;", 0x00B3},
240 {L"&szlig;", 0x00DF},
241 {L"&Scaron;", 0x0160},
242 {L"&thinsp;", 0x2009},
243 {L"&thorn;", 0x00FE},
244 {L"&tilde;", 0x02DC},
245 {L"&times;", 0x00D7},
246 {L"&trade;", 0x2122},
247 {L"&THORN;", 0x00DE},
248 {L"&uml;", 0x00A8},
249 {L"&ugrave;", 0x00F9},
250 {L"&uacute;", 0x00FA},
251 {L"&ucirc;", 0x00FB},
252 {L"&uuml;", 0x00FC},
253 {L"&Ugrave;", 0x00D9},
254 {L"&Uacute;", 0x00DA},
255 {L"&Ucirc;", 0x00DB},
256 {L"&Uuml;", 0x00DC},
257 {L"&yen;", 0x00A5},
258 {L"&yuml;", 0x00FF},
259 {L"&yacute;", 0x00FD},
260 {L"&Yacute;", 0x00DD},
261 {L"&Yuml;", 0x0178},
262 {L"&zwj;", 0x200D},
263 {L"&zwnj;", 0x200C},
264 {NULL, L'\0'}};
266 void CHTMLUtil::ConvertHTMLToW(const CStdStringW& strHTML, CStdStringW& strStripped)
268 if (strHTML.size() == 0)
270 strStripped.Empty();
271 return ;
273 int iPos = 0;
274 strStripped = strHTML;
275 while (mappings[iPos].html)
277 strStripped.Replace(mappings[iPos].html,CStdStringW(1, mappings[iPos].w));
278 iPos++;
281 iPos = strStripped.Find(L"&#");
282 while (iPos > 0 && iPos < (int)strStripped.size()-4)
284 int iStart = iPos + 1;
285 iPos += 2;
286 CStdStringW num;
287 int base = 10;
288 if (strStripped[iPos+1] == L'x')
290 base = 16;
291 iPos++;
294 int i=iPos;
295 while ( iPos < (int)strStripped.size() &&
296 (base==16?iswxdigit(strStripped[iPos]):iswdigit(strStripped[iPos])))
297 iPos++;
299 num = strStripped.Mid(i,iPos-i);
300 wchar_t val = (wchar_t)wcstol(num.c_str(),NULL,base);
301 if (base == 10)
302 num.Format(L"&#%ls;",num.c_str());
303 else
304 num.Format(L"&#x%ls;",num.c_str());
306 strStripped.Replace(num,CStdStringW(1,val));
307 iPos = strStripped.Find(L"&#", iStart);