2 * Copyright (C) 2005-2008 Team XBMC
5 * This Program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2, or (at your option)
10 * This Program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
15 * You should have received a copy of the GNU General Public License
16 * along with XBMC; see the file COPYING. If not, write to
17 * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
18 * http://www.gnu.org/copyleft/gpl.html
22 #include "CharsetConverter.h"
29 CHTMLUtil::CHTMLUtil(void)
32 CHTMLUtil::~CHTMLUtil(void)
35 int CHTMLUtil::FindTag(const CStdString
& strHTML
, const CStdString
& strTag
, CStdString
& strtagFound
, int iPos
) const
37 CStdString strHTMLLow
= strHTML
;
38 CStdString strTagLow
= strTag
;
42 int iStart
= strHTMLLow
.Find(strTag
, iPos
);
43 if (iStart
< 0) return -1;
44 int iEnd
= strHTMLLow
.Find(">", iStart
);
45 if (iEnd
< 0) iEnd
= (int)strHTMLLow
.size();
46 strtagFound
= strHTMLLow
.Mid(iStart
, (iEnd
+ 1) - iStart
);
50 int CHTMLUtil::FindClosingTag(const CStdString
& strHTML
, const CStdString
& strTag
, CStdString
& strtagFound
, int iPos
) const
52 CStdString strHTMLLow
= strHTML
;
53 CStdString strTagLow
= strTag
;
57 int iStart
= strHTMLLow
.Find("</" + strTag
, iPos
);
58 if (iStart
< 0) return -1;
59 int iOpenStart
= strHTMLLow
.Find("<" + strTag
, iPos
);
60 while (iOpenStart
< iStart
&& iOpenStart
!= -1)
62 iStart
= strHTMLLow
.Find("</" + strTag
, iStart
+ 1);
63 iOpenStart
= strHTMLLow
.Find("<" + strTag
, iOpenStart
+ 1);
66 int iEnd
= strHTMLLow
.Find(">", iStart
);
67 if (iEnd
< 0) iEnd
= (int)strHTMLLow
.size();
68 strtagFound
= strHTMLLow
.Mid(iStart
, (iEnd
+ 1) - iStart
);
72 void CHTMLUtil::getValueOfTag(const CStdString
& strTagAndValue
, CStdString
& strValue
)
74 // strTagAndValue contains:
75 // like <a href=blablabla.....>value</a>
76 strValue
= strTagAndValue
;
77 int iStart
= strTagAndValue
.Find(">");
78 int iEnd
= strTagAndValue
.Find("<", iStart
+ 1);
79 if (iStart
>= 0 && iEnd
>= 0)
82 strValue
= strTagAndValue
.Mid(iStart
, iEnd
- iStart
);
86 void CHTMLUtil::getAttributeOfTag(const CStdString
& strTagAndValue
, const CStdString
& strTag
, CStdString
& strValue
)
88 // strTagAndValue contains:
89 // like <a href=""value".....
90 strValue
= strTagAndValue
;
91 int iStart
= strTagAndValue
.Find(strTag
);
92 if (iStart
< 0) return ;
93 iStart
+= (int)strTag
.size();
94 while (strTagAndValue
[iStart
+ 1] == 0x20 || strTagAndValue
[iStart
+ 1] == 0x27 || strTagAndValue
[iStart
+ 1] == 34) iStart
++;
95 int iEnd
= iStart
+ 1;
96 while (strTagAndValue
[iEnd
] != 0x27 && strTagAndValue
[iEnd
] != 0x20 && strTagAndValue
[iEnd
] != 34 && strTagAndValue
[iEnd
] != '>') iEnd
++;
97 if (iStart
>= 0 && iEnd
>= 0)
99 strValue
= strTagAndValue
.Mid(iStart
, iEnd
- iStart
);
103 void CHTMLUtil::RemoveTags(CStdString
& strHTML
)
106 CStdString strReturn
= "";
107 for (int i
= 0; i
< (int) strHTML
.size(); ++i
)
109 if (strHTML
[i
] == '<') iNested
++;
110 else if (strHTML
[i
] == '>') iNested
--;
115 strReturn
+= strHTML
[i
];
129 static const HTMLMapping mappings
[] =
132 {L
"´", 0x00B4},
133 {L
"à", 0x00E0},
134 {L
"á", 0x00E1},
135 {L
"â", 0x00E2},
136 {L
"ã", 0x00E3},
138 {L
"å", 0x00E5},
139 {L
"æ", 0x00E6},
140 {L
"À", 0x00C0},
141 {L
"Á", 0x00C1},
142 {L
"Â", 0x00C2},
143 {L
"Ã", 0x00C3},
145 {L
"Å", 0x00C5},
146 {L
"Æ", 0x00C6},
147 {L
"„", 0x201E},
148 {L
"¦", 0x00A6},
150 {L
"•", 0x2022},
153 {L
"¤", 0x00A4},
155 {L
"¸", 0x00B8},
156 {L
"Ç", 0x00C7},
157 {L
"ç", 0x00E7},
158 {L
"†", 0x2020},
160 {L
"÷", 0x00F7},
161 {L
"‡", 0x2021},
162 {L
"è", 0x00E8},
163 {L
"é", 0x00E9},
164 {L
"ê", 0x00EA},
170 {L
"È", 0x00C8},
171 {L
"É", 0x00C9},
172 {L
"Ê", 0x00CA},
176 {L
"⁄", 0x2044},
177 {L
"¼", 0x00BC},
178 {L
"½", 0x00BD},
179 {L
"¾", 0x00BE},
181 {L
"…", 0x2026},
182 {L
"¡", 0x00A1},
183 {L
"¿", 0x00BF},
184 {L
"ì", 0x00EC},
185 {L
"í", 0x00ED},
186 {L
"î", 0x00EE},
188 {L
"Ì", 0x00CC},
189 {L
"Í", 0x00CD},
190 {L
"Î", 0x00CE},
194 {L
"«", 0x00AB},
195 {L
"“", 0x201C},
196 {L
"‹", 0x2039},
197 {L
"‘", 0x2018},
199 {L
"µ", 0x00B5},
200 {L
"·", 0x00B7},
201 {L
"—", 0x2014},
203 {L
"–", 0x2013},
204 {L
"ñ", 0x00F1},
206 {L
"Ñ", 0x00D1},
209 {L
"œ", 0x0153},
210 {L
"ò", 0x00F2},
211 {L
"ó", 0x00F3},
212 {L
"ô", 0x00F4},
213 {L
"õ", 0x00F5},
215 {L
"ø", 0x00F8},
216 {L
"Œ", 0x0152},
217 {L
"Ò", 0x00D2},
218 {L
"Ó", 0x00D3},
219 {L
"Ô", 0x00D4},
220 {L
"Õ", 0x00D5},
222 {L
"Ø", 0x00D8},
224 {L
"‰", 0x2030},
225 {L
"±", 0x00B1},
226 {L
"£", 0x00A3},
227 {L
"»", 0x00BB},
228 {L
"”", 0x201D},
231 {L
"›", 0x203A},
232 {L
"’", 0x2019},
233 {L
"‚", 0x201A},
234 {L
"š", 0x0161},
240 {L
"ß", 0x00DF},
241 {L
"Š", 0x0160},
242 {L
" ", 0x2009},
243 {L
"þ", 0x00FE},
244 {L
"˜", 0x02DC},
245 {L
"×", 0x00D7},
246 {L
"™", 0x2122},
247 {L
"Þ", 0x00DE},
249 {L
"ù", 0x00F9},
250 {L
"ú", 0x00FA},
251 {L
"û", 0x00FB},
253 {L
"Ù", 0x00D9},
254 {L
"Ú", 0x00DA},
255 {L
"Û", 0x00DB},
259 {L
"ý", 0x00FD},
260 {L
"Ý", 0x00DD},
266 void CHTMLUtil::ConvertHTMLToW(const CStdStringW
& strHTML
, CStdStringW
& strStripped
)
268 if (strHTML
.size() == 0)
274 strStripped
= strHTML
;
275 while (mappings
[iPos
].html
)
277 strStripped
.Replace(mappings
[iPos
].html
,CStdStringW(1, mappings
[iPos
].w
));
281 iPos
= strStripped
.Find(L
"&#");
282 while (iPos
> 0 && iPos
< (int)strStripped
.size()-4)
284 int iStart
= iPos
+ 1;
288 if (strStripped
[iPos
+1] == L
'x')
295 while ( iPos
< (int)strStripped
.size() &&
296 (base
==16?iswxdigit(strStripped
[iPos
]):iswdigit(strStripped
[iPos
])))
299 num
= strStripped
.Mid(i
,iPos
-i
);
300 wchar_t val
= (wchar_t)wcstol(num
.c_str(),NULL
,base
);
302 num
.Format(L
"&#%ls;",num
.c_str());
304 num
.Format(L
"&#x%ls;",num
.c_str());
306 strStripped
.Replace(num
,CStdStringW(1,val
));
307 iPos
= strStripped
.Find(L
"&#", iStart
);