src/libs/common/StringFunctions.cpp

   1 //
   2 // This file is part of the aMule Project.
   3 //
   4 // Copyright (c) 2004-2008 Angel Vidal ( kry@amule.org )
   5 // Copyright (c) 2003-2008 aMule Team ( admin@amule.org / http://www.amule.org )
   6 //
   7 // Any parts of this program derived from the xMule, lMule or eMule project,
   8 // or contributed by third-party developers are copyrighted by their
   9 // respective authors.
  10 //
  11 // This program is free software; you can redistribute it and/or modify
  12 // it under the terms of the GNU General Public License as published by
  13 // the Free Software Foundation; either version 2 of the License, or
  14 // (at your option) any later version.
  15 //
  16 // This program is distributed in the hope that it will be useful,
  17 // but WITHOUT ANY WARRANTY; without even the implied warranty of
  18 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  19 // GNU General Public License for more details.
  20 //
  21 // You should have received a copy of the GNU General Public License
  22 // along with this program; if not, write to the Free Software
  23 // Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301, USA
  24 //
  25
  26 #include "StringFunctions.h"
  27
  28 #include <wx/filename.h>        // Needed for wxFileName
  29 #include <wx/uri.h>             // Needed for wxURI
  30
  31 // Implementation of the non-inlines
  32
  33 //
  34 // Conversion of wxString so it can be used by printf() in a console
  35 // On some platforms (Windows) the console allows only "plain" characters,
  36 // so try to convert as much as possible and replace the others with '?'.
  37 // On other platforms (some Linux) wxConvLocal silently converts to UTF8
  38 // so the console can show even Chinese chars.
  39 //
  40 Unicode2CharBuf unicode2char(const wxChar* s)
  41 {
  42         // First try the straight way.
  43         Unicode2CharBuf buf1(wxConvLocal.cWX2MB(s));
  44         if ((const char *) buf1) {
  45                 return buf1;
  46         }
  47         // Failed. Try to convert as much as possible.
  48         size_t len = wxStrlen(s);
  49         size_t maxlen = len * 4;                // Allow for an encoding of up to 4 byte per char.
  50         wxCharBuffer buf(maxlen + 1);   // This is wasteful, but the string is used temporary anyway.
  51         char * data = buf.data();
  52         for (size_t i = 0, pos = 0; i < len; i++) {
  53                 size_t len_char = wxConvLocal.FromWChar(data + pos, maxlen - pos, s + i, 1);
  54                 if (len_char != wxCONV_FAILED) {
  55                         pos += len_char - 1;
  56                 } else if (pos < maxlen) {
  57                         data[pos++] = '?';
  58                         data[pos] = 0;
  59                 }
  60         }
  61         return buf;
  62 }
  63
  64
  65 static byte base16Chars[17] = "0123456789ABCDEF";
  66
  67 wxString URLEncode(const wxString& sIn)
  68 {
  69         wxString sOut;
  70         unsigned char curChar;
  71
  72         for ( unsigned int i = 0; i < sIn.Length(); ++i ) {
  73                 curChar = sIn.GetChar( i );
  74
  75                 if ( isalnum( curChar ) ) {
  76                 sOut += curChar;
  77             } else if( isspace ( curChar ) ) {
  78                     sOut += wxT("+");
  79                 } else {
  80                         sOut += wxT("%");
  81                         sOut += base16Chars[ curChar >> 4];
  82                         sOut += base16Chars[ curChar & 0xf];
  83                 }
  84
  85         }
  86
  87         return sOut;
  88 }
  89
  90
  91 wxChar HexToDec( const wxString& hex )
  92 {
  93         wxChar result = 0;
  94         wxString str = hex.Upper();
  95
  96         for ( size_t i = 0; i < str.Len(); ++i ) {
  97                 result *= 16;
  98                 wxChar cur = str.GetChar(i);
  99
 100                 if ( isdigit( cur ) ) {
 101                         result += cur - wxT('0');
 102                 } else if ( cur >= wxT('A') && cur <= wxT('F') ) {
 103                         result += cur - wxT('A') + 10;
 104                 } else {
 105                         return wxT('\0');
 106                 }
 107         }
 108
 109         return result;
 110 }
 111
 112
 113 wxString UnescapeHTML(const wxString& str)
 114 {
 115         size_t len = str.length();
 116         wxWritableCharBuffer buf = str.char_str(wxConvUTF8);
 117
 118         // Work around wxWritableCharBuffer's operator[] not being writable
 119         char *buffer = (char *)buf;
 120
 121         size_t j = 0;
 122         for (size_t i = 0; i < len; ++i, ++j) {
 123                 if (buffer[i] == '%' && (len > i + 2)) {
 124                         wxChar unesc = HexToDec(str.Mid(i + 1, 2));
 125                         if (unesc) {
 126                                 i += 2;
 127                                 buffer[j] = (char)unesc;
 128                         } else {
 129                                 // If conversion failed, then we just add the escape-code
 130                                 // and continue past it like nothing happened.
 131                                 buffer[j] = buffer[i];
 132                         }
 133                 } else {
 134                         buffer[j] = buffer[i];
 135                 }
 136         }
 137         buffer[j] = '\0';
 138
 139         // Try to interpret the result as UTF-8
 140         wxString result(buffer, wxConvUTF8);
 141         if (len > 0 && result.length() == 0) {
 142                 // Fall back to ISO-8859-1
 143                 result = wxString(buffer, wxConvISO8859_1);
 144         }
 145
 146         return result;
 147 }
 148
 149
 150 wxString validateURI(const wxString& url)
 151 {
 152         wxURI uri(url);
 153
 154         return uri.BuildURI();
 155 }
 156
 157
 158 enum ECharType {
 159         ECTInteger,
 160         ECTText,
 161         ECTNone
 162 };
 163
 164 inline wxString GetNextField(const wxString& str, size_t& cookie)
 165 {
 166         // These are taken to seperate "fields"
 167         static const wxChar* s_delims = wxT("\t\n\x0b\x0c\r !\"#$%&\'()*+,-./:;<=>?@[\\]^_`{|}~");
 168
 169         wxString field;
 170         ECharType curType = ECTNone;
 171         for (; cookie < str.Length(); ++cookie) {
 172                 wxChar c = str[cookie];
 173
 174                 if ((c >= wxT('0')) && (c <= wxT('9'))) {
 175                         if (curType == ECTText) {
 176                                 break;
 177                         }
 178
 179                         curType = ECTInteger;
 180                         field += c;
 181                 } else if (wxStrchr(s_delims, c)) {
 182                         if (curType == ECTNone) {
 183                                 continue;
 184                         } else {
 185                                 break;
 186                         }
 187                 } else {
 188                         if (curType == ECTInteger) {
 189                                 break;
 190                         }
 191
 192                         curType = ECTText;
 193                         field += c;
 194                 }
 195         }
 196
 197         return field;
 198 }
 199
 200
 201 int FuzzyStrCmp(const wxString& a, const wxString& b)
 202 {
 203         size_t aCookie = 0, bCookie = 0;
 204         wxString aField, bField;
 205
 206         do {
 207                 aField = GetNextField(a, aCookie);
 208                 bField = GetNextField(b, bCookie);
 209
 210                 if (aField.IsNumber() && bField.IsNumber()) {
 211                         unsigned long aInteger = StrToULong(aField);
 212                         unsigned long bInteger = StrToULong(bField);
 213
 214                         if (aInteger < bInteger) {
 215                                 return -1;
 216                         } else if (aInteger > bInteger) {
 217                                 return  1;
 218                         }
 219                 } else if (aField < bField) {
 220                         return -1;
 221                 } else if (aField > bField) {
 222                         return  1;
 223                 }
 224         } while (!aField.IsEmpty() && !bField.IsEmpty());
 225
 226         return 0;
 227 }
 228
 229
 230 int FuzzyStrCaseCmp(const wxString& a, const wxString& b)
 231 {
 232         return FuzzyStrCmp(a.Lower(), b.Lower());
 233 }
 234
 235
 236
 237 CSimpleTokenizer::CSimpleTokenizer(const wxString& str, wxChar token)
 238         : m_string(str),
 239           m_delim(token),
 240           m_ptr(m_string.c_str()),
 241           m_count(0)
 242 {
 243 }
 244
 245
 246 wxString CSimpleTokenizer::next()
 247 {
 248         const wxChar* start = m_ptr;
 249         const wxChar* end   = m_string.c_str() + m_string.Len() + 1;
 250
 251         for (; m_ptr < end; ++m_ptr) {
 252                 if (*m_ptr == m_delim) {
 253                         m_count++;
 254                         break;
 255                 }
 256         }
 257
 258         // Return the token
 259         return m_string.Mid(start - m_string.c_str(), m_ptr++ - start);
 260 }
 261
 262
 263 wxString CSimpleTokenizer::remaining() const
 264 {
 265         return m_string.Mid(m_ptr - m_string.c_str());
 266 }
 267
 268
 269 size_t CSimpleTokenizer::tokenCount() const
 270 {
 271         return m_count;
 272 }