src/libs/common/StringFunctions.h

   1 //
   2 // This file is part of the aMule Project.
   3 //
   4 // Copyright (c) 2004-2011 Angel Vidal ( kry@amule.org )
   5 // Copyright (c) 2003-2011 aMule Team ( admin@amule.org / http://www.amule.org )
   6 //
   7 // Any parts of this program derived from the xMule, lMule or eMule project,
   8 // or contributed by third-party developers are copyrighted by their
   9 // respective authors.
  10 //
  11 // This program is free software; you can redistribute it and/or modify
  12 // it under the terms of the GNU General Public License as published by
  13 // the Free Software Foundation; either version 2 of the License, or
  14 // (at your option) any later version.
  15 //
  16 // This program is distributed in the hope that it will be useful,
  17 // but WITHOUT ANY WARRANTY; without even the implied warranty of
  18 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  19 // GNU General Public License for more details.
  20 //
  21 // You should have received a copy of the GNU General Public License
  22 // along with this program; if not, write to the Free Software
  23 // Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301, USA
  24 //
  25
  26
  27 #ifndef STRING_FUNCTIONS_H
  28 #define STRING_FUNCTIONS_H
  29
  30 #include "../../Types.h"                // Needed for uint16 and uint32
  31
  32
  33 // UTF8 types: No UTF8, BOM prefix, or Raw UTF8
  34 enum EUtf8Str
  35 {
  36         utf8strNone,
  37         utf8strOptBOM,
  38         utf8strRaw
  39 };
  40
  41 /****************************************************/
  42 /******************* Inlines ************************/
  43 /****************************************************/
  44
  45 /**
  46  * Functions to perform Unicode <-> (char *) and UTF-8 conversion
  47  *
  48  * Please, DO NOT store pointers returned by unicode2char(), because they
  49  * get free'ed as soon as the return value of cWX2MB gets out of scope.
  50  * If you need to store a pointer, use a buffer of type wxCharBuffer:
  51  * and then cast it to a char pointer, e.g.:
  52  *
  53  * const wxCharBuffer buf(unicode2char(aWxString));
  54  *
  55  * --- Now you can freely use buf as if it were a (const char *) ---
  56  *
  57  * puts(buf);
  58  * printf("%s", (const char *)buf);
  59  *
  60  * The cast in printf is necessary because variable number of parameter
  61  * functions have no type for these parameters, so the automatic casting
  62  * of wxCharBuffer to (const char *) is not performed.
  63  *
  64  * --- don't worry about memory allocation, memory will be       ---
  65  * --- free'ed when buf gets out of scope, i.e., upon return     ---
  66  *
  67  * wxWCharBuffer, wxCharBuffer are always the appropriate return type,
  68  * either (wxChar *) or (wxWCharBuffer)
  69  *
  70  * Use the simplified names Unicode2CharBuf and Char2UnicodeBuf, and
  71  * do not declare these names const or the compiler will complain about
  72  * a double const.
  73  */
  74 typedef const wxCharBuffer Unicode2CharBuf;
  75 typedef const wxWCharBuffer Char2UnicodeBuf;
  76
  77 Unicode2CharBuf unicode2char(const wxChar* x);
  78 Unicode2CharBuf unicode2char(const Char2UnicodeBuf& x);
  79 inline Unicode2CharBuf unicode2char(const wxString& x)          { return unicode2char(x.wc_str()); }
  80 inline Char2UnicodeBuf char2unicode(const char* x)      { return wxConvLocal.cMB2WX(x); }
  81
  82 inline Unicode2CharBuf unicode2UTF8(const wxChar* x)    { return wxConvUTF8.cWX2MB(x); }
  83 inline Unicode2CharBuf unicode2UTF8(const Char2UnicodeBuf& x)   { return wxConvUTF8.cWX2MB(x); }
  84 inline Unicode2CharBuf unicode2UTF8(const wxString& x)  { return x.utf8_str(); }
  85 inline Char2UnicodeBuf UTF82unicode(const char* x)      { return wxConvUTF8.cMB2WX(x); }
  86
  87 inline const wxCharBuffer char2UTF8(const char *x)      { return unicode2UTF8(char2unicode(x)); }
  88 inline const wxCharBuffer UTF82char(const char *x)      { return unicode2char(UTF82unicode(x)); }
  89
  90 inline Unicode2CharBuf filename2char(const wxChar* x)   { return wxConvFile.cWC2MB(x); }
  91 inline Unicode2CharBuf filename2char(const wxString& x) { return x.mb_str(wxConvFile); }
  92 inline Char2UnicodeBuf char2filename(const char* x)     { return wxConvFile.cMB2WC(x); }
  93
  94
  95 //
  96 // Replaces "&" with "&&" in 'in' for use with text-labels
  97 //
  98 inline wxString MakeStringEscaped(wxString in) {
  99         in.Replace(wxT("&"),wxT("&&"));
 100         return in;
 101 }
 102
 103 // Make a string be a folder
 104 inline wxString MakeFoldername(wxString path) {
 105
 106         if ( !path.IsEmpty() && ( path.Right(1) == wxT('/' )) ) {
 107                 path.RemoveLast();
 108         }
 109
 110         return path;
 111 }
 112
 113 // Duplicates a string
 114 inline char* nstrdup(const char* src)
 115 {
 116         size_t len = (src ? strlen(src) : 0) + 1;
 117         char *res = new char[len];
 118         if ( src ) strcpy(res, src);
 119         res[len-1] = 0;
 120         return res;
 121 }
 122
 123
 124 // Replacements for atoi and atol that removes the need for converting
 125 // a string to normal chars with unicode2char. The value returned is the
 126 // value represented in the string or 0 if the conversion failed.
 127 inline long StrToLong(const wxString& str)
 128 {
 129         long value = 0;
 130         if (!str.ToLong(&value)) {      // value may be changed even if it failes according to wx docu
 131                 value = 0;
 132         }
 133         return value;
 134 }
 135
 136 inline unsigned long StrToULong(const wxString& str)
 137 {
 138         unsigned long value = 0;
 139         if (!str.ToULong(&value)) {
 140                 value = 0;
 141         }
 142         return value;
 143 }
 144
 145 inline unsigned long long StrToULongLong(const wxString& str)
 146 {
 147 #if wxCHECK_VERSION(2, 9, 0)
 148         unsigned long long value = 0;
 149         if (!str.ToULongLong(&value)) {
 150                 value = 0;
 151         }
 152         return value;
 153
 154 #else   // wx 2.8
 155
 156         Unicode2CharBuf buf = unicode2char(str);
 157         if (!buf) {             // something went wrong
 158                 return 0;
 159         }
 160 #ifdef _MSC_VER
 161         return _atoi64(buf);
 162 #else
 163         return atoll(buf);
 164 #endif
 165 #endif  // wx 2.8
 166 }
 167
 168 inline size_t GetRawSize(const wxString& rstr, EUtf8Str eEncode)
 169 {
 170         size_t RealLen = 0;
 171         switch (eEncode) {
 172                 case utf8strOptBOM:
 173                         RealLen = 3;
 174                 /* fall through */
 175                 case utf8strRaw: {
 176                         Unicode2CharBuf s(unicode2UTF8(rstr));
 177                         if (s) {
 178                                 RealLen += strlen(s);
 179                                 break;
 180                         } else {
 181                                 RealLen = 0;
 182                         }
 183                 }
 184                 /* fall through */
 185                 default: {
 186                         Unicode2CharBuf s(unicode2char(rstr));
 187                         if (s) {
 188                                 RealLen = strlen(s);
 189                         }
 190                 }
 191         }
 192
 193         return RealLen;
 194 }
 195
 196
 197 /****************************************************/
 198 /***************** Non-inlines **********************/
 199 /****************************************************/
 200
 201
 202 // Makes sIn suitable for inclusion in an URL, by escaping all chars that could cause trouble.
 203 wxString URLEncode(const wxString& sIn);
 204
 205
 206 /**
 207  * Converts a hexadecimal number to a char.
 208  *
 209  * @param hex The hex-number, must be at most 2 digits long.
 210  * @return The resulting char or \0 if conversion failed.
 211  */
 212 wxChar HexToDec( const wxString& hex );
 213
 214
 215 /**
 216  * This function converts all valid HTML escape-codes to their corresponding chars.
 217  *
 218  * @param str The string to unescape.
 219  * @return The unescaped version of the input string.
 220  */
 221 wxString UnescapeHTML( const wxString& str );
 222
 223
 224 /**
 225  * Ensures that the url pass is valid by escaping various chars.
 226  */
 227 wxString validateURI(const wxString& url);
 228
 229
 230 /**
 231  * Compares two strings, while taking numerals into consideration.
 232  *
 233  * @return Returns -1 if a < b, 1 if a > b and 0 if a = b
 234  *
 235  * This function basically splits the two strings into a number of
 236  * fields, deliniated by whitespace, non-alphanumerical chars. The
 237  * numerals are then converted to integers, and the fields are
 238  * compared. This allows strings such as "a (2)" and "a (10)" to
 239  * be properly sorted for displaying.
 240  *
 241  * Currently does not handle floats (they are treated as to seperate
 242  * fields, nor negative numbers.
 243  */
 244 int FuzzyStrCmp(const wxString& a, const wxString& b);
 245
 246 /**
 247  * As with FuzzyStrCmp, but case insensitive.
 248  */
 249 int FuzzyStrCaseCmp(const wxString& a, const wxString& b);
 250
 251
 252 /**
 253  * This class provides a simple and fast tokenizer.
 254  */
 255 class CSimpleTokenizer
 256 {
 257 public:
 258         /**
 259          * @param str The string to tokenize.
 260          * @param delim The delimiter used to split the string.
 261          */
 262         CSimpleTokenizer(const wxString& str, wxChar delim);
 263
 264         /**
 265          * Returns the next part of the string separated by the
 266          * given delimiter. When the entire string has been
 267          * tokenized, an empty string is returned. Note that
 268          * empty tokens are also returned.
 269          */
 270         wxString next();
 271
 272         /**
 273          * Returns the remaining part of the string.
 274          *
 275          * The remaining part is defined as being the part after
 276          * the last encountered token, or an empty string if the
 277          * entire string has been tokenized.
 278          *
 279          * If next() has yet to be called, the entire string will
 280          * be returned.
 281          */
 282         wxString remaining() const;
 283
 284         /**
 285          * Returns the number of tokens encountered so far.
 286          */
 287         size_t tokenCount() const;
 288
 289 private:
 290         //! The string being tokenized.
 291         wxString m_string;
 292
 293         //! The delimiter used to split the string.
 294         wxChar m_delim;
 295
 296         //! A pointer to the current position in the string.
 297         const wxChar* m_ptr;
 298
 299         //! The number of tokens encountered.
 300         size_t m_count;
 301 };
 302
 303
 304 #endif // STRING_FUNCTIONS_H
 305 // File_checked_for_headers