src/libs/common/StringFunctions.cpp

   1 //
   2 // This file is part of the aMule Project.
   3 //
   4 // Copyright (c) 2004-2008 Angel Vidal ( kry@amule.org )
   5 // Copyright (c) 2003-2008 aMule Team ( admin@amule.org / http://www.amule.org )
   6 //
   7 // Any parts of this program derived from the xMule, lMule or eMule project,
   8 // or contributed by third-party developers are copyrighted by their
   9 // respective authors.
  10 //
  11 // This program is free software; you can redistribute it and/or modify
  12 // it under the terms of the GNU General Public License as published by
  13 // the Free Software Foundation; either version 2 of the License, or
  14 // (at your option) any later version.
  15 //
  16 // This program is distributed in the hope that it will be useful,
  17 // but WITHOUT ANY WARRANTY; without even the implied warranty of
  18 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  19 // GNU General Public License for more details.
  20 //
  21 // You should have received a copy of the GNU General Public License
  22 // along with this program; if not, write to the Free Software
  23 // Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301, USA
  24 //
  25
  26 #include "StringFunctions.h"
  27
  28 #include <wx/filename.h>        // Needed for wxFileName
  29 #include <wx/uri.h>             // Needed for wxURI
  30
  31 // Implementation of the non-inlines
  32
  33 //
  34 // Conversion of wxString so it can be used by printf() in a console
  35 // On some platforms (Windows) the console allows only "plain" characters,
  36 // so try to convert as much as possible and replace the others with '?'.
  37 // On other platforms (some Linux) wxConvLocal silently converts to UTF8
  38 // so the console can show even Chinese chars.
  39 //
  40 Unicode2CharBuf unicode2char(const wxChar* s)
  41 {
  42         // First try the straight way.
  43         Unicode2CharBuf buf1(wxConvLocal.cWX2MB(s));
  44         if ((const char *) buf1) {
  45                 return buf1;
  46         }
  47         // Failed. Try to convert as much as possible.
  48         size_t len = wxStrlen(s);
  49         size_t maxlen = len * 4;                // Allow for an encoding of up to 4 byte per char.
  50         wxCharBuffer buf(maxlen + 1);   // This is wasteful, but the string is used temporary anyway.
  51         char * data = buf.data();
  52         for (size_t i = 0, pos = 0; i < len; i++) {
  53                 size_t len_char = wxConvLocal.FromWChar(data + pos, maxlen - pos, s + i, 1);
  54                 if (len_char != wxCONV_FAILED) {
  55                         pos += len_char - 1;
  56                 } else if (pos < maxlen) {
  57                         data[pos++] = '?';
  58                         data[pos] = 0;
  59                 }
  60         }
  61         return buf;
  62 }
  63
  64
  65 static byte base16Chars[17] = "0123456789ABCDEF";
  66
  67 wxString URLEncode(const wxString& sIn)
  68 {
  69         wxString sOut;
  70         unsigned char curChar;
  71
  72         for ( unsigned int i = 0; i < sIn.Length(); ++i ) {
  73                 curChar = sIn.GetChar( i );
  74
  75                 if ( isalnum( curChar ) ) {
  76                 sOut += curChar;
  77             } else if( isspace ( curChar ) ) {
  78                     sOut += wxT("+");
  79                 } else {
  80                         sOut += wxT("%");
  81                         sOut += base16Chars[ curChar >> 4];
  82                         sOut += base16Chars[ curChar & 0xf];
  83                 }
  84
  85         }
  86
  87         return sOut;
  88 }
  89
  90
  91 wxChar HexToDec( const wxString& hex )
  92 {
  93         wxChar result = 0;
  94         wxString str = hex.Upper();
  95
  96         for ( size_t i = 0; i < str.Len(); ++i ) {
  97                 result *= 16;
  98                 wxChar cur = str.GetChar(i);
  99
 100                 if ( isdigit( cur ) ) {
 101                         result += cur - wxT('0');
 102                 } else if ( cur >= wxT('A') && cur <= wxT('F') ) {
 103                         result += cur - wxT('A') + 10;
 104                 } else {
 105                         return wxT('\0');
 106                 }
 107         }
 108
 109         return result;
 110 }
 111
 112
 113 wxString UnescapeHTML( const wxString& str )
 114 {
 115         wxString result;
 116         result.Alloc( str.Len() );
 117
 118         for ( size_t i = 0; i < str.Len(); ++i ) {
 119                 if ( str.GetChar(i) == wxT('%') && ( i + 2 < str.Len() ) ) {
 120                         wxChar unesc = HexToDec( str.Mid( i + 1, 2 ) );
 121
 122                         if ( unesc ) {
 123                                 i += 2;
 124
 125                                 result += unesc;
 126                         } else {
 127                                 // If conversion failed, then we just add the escape-code
 128                                 // and continue past it like nothing happened.
 129                                 result += str.at(i);
 130                         }
 131                 } else {
 132                         result += str.at(i);
 133                 }
 134         }
 135
 136         return result;
 137 }
 138
 139
 140 wxString validateURI(const wxString& url)
 141 {
 142         wxURI uri(url);
 143
 144         return uri.BuildURI();
 145 }
 146
 147
 148 enum ECharType {
 149         ECTInteger,
 150         ECTText,
 151         ECTNone
 152 };
 153
 154 inline wxString GetNextField(const wxString& str, size_t& cookie)
 155 {
 156         // These are taken to seperate "fields"
 157         static const wxChar* s_delims = wxT("\t\n\x0b\x0c\r !\"#$%&\'()*+,-./:;<=>?@[\\]^_`{|}~");
 158
 159         wxString field;
 160         ECharType curType = ECTNone;
 161         for (; cookie < str.Length(); ++cookie) {
 162                 wxChar c = str[cookie];
 163
 164                 if ((c >= wxT('0')) && (c <= wxT('9'))) {
 165                         if (curType == ECTText) {
 166                                 break;
 167                         }
 168
 169                         curType = ECTInteger;
 170                         field += c;
 171                 } else if (wxStrchr(s_delims, c)) {
 172                         if (curType == ECTNone) {
 173                                 continue;
 174                         } else {
 175                                 break;
 176                         }
 177                 } else {
 178                         if (curType == ECTInteger) {
 179                                 break;
 180                         }
 181
 182                         curType = ECTText;
 183                         field += c;
 184                 }
 185         }
 186
 187         return field;
 188 }
 189
 190
 191 int FuzzyStrCmp(const wxString& a, const wxString& b)
 192 {
 193         size_t aCookie = 0, bCookie = 0;
 194         wxString aField, bField;
 195
 196         do {
 197                 aField = GetNextField(a, aCookie);
 198                 bField = GetNextField(b, bCookie);
 199
 200                 if (aField.IsNumber() && bField.IsNumber()) {
 201                         unsigned long aInteger = StrToULong(aField);
 202                         unsigned long bInteger = StrToULong(bField);
 203
 204                         if (aInteger < bInteger) {
 205                                 return -1;
 206                         } else if (aInteger > bInteger) {
 207                                 return  1;
 208                         }
 209                 } else if (aField < bField) {
 210                         return -1;
 211                 } else if (aField > bField) {
 212                         return  1;
 213                 }
 214         } while (!aField.IsEmpty() && !bField.IsEmpty());
 215
 216         return 0;
 217 }
 218
 219
 220 int FuzzyStrCaseCmp(const wxString& a, const wxString& b)
 221 {
 222         return FuzzyStrCmp(a.Lower(), b.Lower());
 223 }
 224
 225
 226
 227 CSimpleTokenizer::CSimpleTokenizer(const wxString& str, wxChar token)
 228         : m_string(str),
 229           m_delim(token),
 230           m_ptr(m_string.c_str()),
 231           m_count(0)
 232 {
 233 }
 234
 235
 236 wxString CSimpleTokenizer::next()
 237 {
 238         const wxChar* start = m_ptr;
 239         const wxChar* end   = m_string.c_str() + m_string.Len() + 1;
 240
 241         for (; m_ptr < end; ++m_ptr) {
 242                 if (*m_ptr == m_delim) {
 243                         m_count++;
 244                         break;
 245                 }
 246         }
 247
 248         // Return the token
 249         return m_string.Mid(start - m_string.c_str(), m_ptr++ - start);
 250 }
 251
 252
 253 wxString CSimpleTokenizer::remaining() const
 254 {
 255         return m_string.Mid(m_ptr - m_string.c_str());
 256 }
 257
 258
 259 size_t CSimpleTokenizer::tokenCount() const
 260 {
 261         return m_count;
 262 }