Handle %-escaped UTF-8 sequences in ed2k:// URIs
[amule.git] / src / libs / common / StringFunctions.cpp
blob77a18bdd9d5ec656068ec694e44ea3f284d67b19
1 //
2 // This file is part of the aMule Project.
3 //
4 // Copyright (c) 2004-2008 Angel Vidal ( kry@amule.org )
5 // Copyright (c) 2003-2008 aMule Team ( admin@amule.org / http://www.amule.org )
6 //
7 // Any parts of this program derived from the xMule, lMule or eMule project,
8 // or contributed by third-party developers are copyrighted by their
9 // respective authors.
11 // This program is free software; you can redistribute it and/or modify
12 // it under the terms of the GNU General Public License as published by
13 // the Free Software Foundation; either version 2 of the License, or
14 // (at your option) any later version.
16 // This program is distributed in the hope that it will be useful,
17 // but WITHOUT ANY WARRANTY; without even the implied warranty of
18 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19 // GNU General Public License for more details.
20 //
21 // You should have received a copy of the GNU General Public License
22 // along with this program; if not, write to the Free Software
23 // Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
26 #include "StringFunctions.h"
28 #include <wx/filename.h> // Needed for wxFileName
29 #include <wx/uri.h> // Needed for wxURI
31 // Implementation of the non-inlines
34 // Conversion of wxString so it can be used by printf() in a console
35 // On some platforms (Windows) the console allows only "plain" characters,
36 // so try to convert as much as possible and replace the others with '?'.
37 // On other platforms (some Linux) wxConvLocal silently converts to UTF8
38 // so the console can show even Chinese chars.
40 Unicode2CharBuf unicode2char(const wxChar* s)
42 // First try the straight way.
43 Unicode2CharBuf buf1(wxConvLocal.cWX2MB(s));
44 if ((const char *) buf1) {
45 return buf1;
47 // Failed. Try to convert as much as possible.
48 size_t len = wxStrlen(s);
49 size_t maxlen = len * 4; // Allow for an encoding of up to 4 byte per char.
50 wxCharBuffer buf(maxlen + 1); // This is wasteful, but the string is used temporary anyway.
51 char * data = buf.data();
52 for (size_t i = 0, pos = 0; i < len; i++) {
53 size_t len_char = wxConvLocal.FromWChar(data + pos, maxlen - pos, s + i, 1);
54 if (len_char != wxCONV_FAILED) {
55 pos += len_char - 1;
56 } else if (pos < maxlen) {
57 data[pos++] = '?';
58 data[pos] = 0;
61 return buf;
65 static byte base16Chars[17] = "0123456789ABCDEF";
67 wxString URLEncode(const wxString& sIn)
69 wxString sOut;
70 unsigned char curChar;
72 for ( unsigned int i = 0; i < sIn.Length(); ++i ) {
73 curChar = sIn.GetChar( i );
75 if ( isalnum( curChar ) ) {
76 sOut += curChar;
77 } else if( isspace ( curChar ) ) {
78 sOut += wxT("+");
79 } else {
80 sOut += wxT("%");
81 sOut += base16Chars[ curChar >> 4];
82 sOut += base16Chars[ curChar & 0xf];
87 return sOut;
91 wxChar HexToDec( const wxString& hex )
93 wxChar result = 0;
94 wxString str = hex.Upper();
96 for ( size_t i = 0; i < str.Len(); ++i ) {
97 result *= 16;
98 wxChar cur = str.GetChar(i);
100 if ( isdigit( cur ) ) {
101 result += cur - wxT('0');
102 } else if ( cur >= wxT('A') && cur <= wxT('F') ) {
103 result += cur - wxT('A') + 10;
104 } else {
105 return wxT('\0');
109 return result;
113 wxString UnescapeHTML(const wxString& str)
115 size_t len = str.length();
116 wxWritableCharBuffer buf = str.char_str(wxConvUTF8);
118 // Work around wxWritableCharBuffer's operator[] not being writable
119 char *buffer = (char *)buf;
121 size_t j = 0;
122 for (size_t i = 0; i < len; ++i, ++j) {
123 if (buffer[i] == '%' && (len > i + 2)) {
124 wxChar unesc = HexToDec(str.Mid(i + 1, 2));
125 if (unesc) {
126 i += 2;
127 buffer[j] = (char)unesc;
128 } else {
129 // If conversion failed, then we just add the escape-code
130 // and continue past it like nothing happened.
131 buffer[j] = buffer[i];
133 } else {
134 buffer[j] = buffer[i];
137 buffer[j] = '\0';
139 // Try to interpret the result as UTF-8
140 wxString result(buffer, wxConvUTF8);
141 if (len > 0 && result.length() == 0) {
142 // Fall back to ISO-8859-1
143 result = wxString(buffer, wxConvISO8859_1);
146 return result;
150 wxString validateURI(const wxString& url)
152 wxURI uri(url);
154 return uri.BuildURI();
158 enum ECharType {
159 ECTInteger,
160 ECTText,
161 ECTNone
164 inline wxString GetNextField(const wxString& str, size_t& cookie)
166 // These are taken to seperate "fields"
167 static const wxChar* s_delims = wxT("\t\n\x0b\x0c\r !\"#$%&\'()*+,-./:;<=>?@[\\]^_`{|}~");
169 wxString field;
170 ECharType curType = ECTNone;
171 for (; cookie < str.Length(); ++cookie) {
172 wxChar c = str[cookie];
174 if ((c >= wxT('0')) && (c <= wxT('9'))) {
175 if (curType == ECTText) {
176 break;
179 curType = ECTInteger;
180 field += c;
181 } else if (wxStrchr(s_delims, c)) {
182 if (curType == ECTNone) {
183 continue;
184 } else {
185 break;
187 } else {
188 if (curType == ECTInteger) {
189 break;
192 curType = ECTText;
193 field += c;
197 return field;
201 int FuzzyStrCmp(const wxString& a, const wxString& b)
203 size_t aCookie = 0, bCookie = 0;
204 wxString aField, bField;
206 do {
207 aField = GetNextField(a, aCookie);
208 bField = GetNextField(b, bCookie);
210 if (aField.IsNumber() && bField.IsNumber()) {
211 unsigned long aInteger = StrToULong(aField);
212 unsigned long bInteger = StrToULong(bField);
214 if (aInteger < bInteger) {
215 return -1;
216 } else if (aInteger > bInteger) {
217 return 1;
219 } else if (aField < bField) {
220 return -1;
221 } else if (aField > bField) {
222 return 1;
224 } while (!aField.IsEmpty() && !bField.IsEmpty());
226 return 0;
230 int FuzzyStrCaseCmp(const wxString& a, const wxString& b)
232 return FuzzyStrCmp(a.Lower(), b.Lower());
237 CSimpleTokenizer::CSimpleTokenizer(const wxString& str, wxChar token)
238 : m_string(str),
239 m_delim(token),
240 m_ptr(m_string.c_str()),
241 m_count(0)
246 wxString CSimpleTokenizer::next()
248 const wxChar* start = m_ptr;
249 const wxChar* end = m_string.c_str() + m_string.Len() + 1;
251 for (; m_ptr < end; ++m_ptr) {
252 if (*m_ptr == m_delim) {
253 m_count++;
254 break;
258 // Return the token
259 return m_string.Mid(start - m_string.c_str(), m_ptr++ - start);
263 wxString CSimpleTokenizer::remaining() const
265 return m_string.Mid(m_ptr - m_string.c_str());
269 size_t CSimpleTokenizer::tokenCount() const
271 return m_count;