src/libs/common/StringFunctions.h

   1 //
   2 // This file is part of the aMule Project.
   3 //
   4 // Copyright (c) 2004-2008 Angel Vidal ( kry@amule.org )
   5 // Copyright (c) 2003-2008 aMule Team ( admin@amule.org / http://www.amule.org )
   6 //
   7 // Any parts of this program derived from the xMule, lMule or eMule project,
   8 // or contributed by third-party developers are copyrighted by their
   9 // respective authors.
  10 //
  11 // This program is free software; you can redistribute it and/or modify
  12 // it under the terms of the GNU General Public License as published by
  13 // the Free Software Foundation; either version 2 of the License, or
  14 // (at your option) any later version.
  15 //
  16 // This program is distributed in the hope that it will be useful,
  17 // but WITHOUT ANY WARRANTY; without even the implied warranty of
  18 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  19 // GNU General Public License for more details.
  20 //
  21 // You should have received a copy of the GNU General Public License
  22 // along with this program; if not, write to the Free Software
  23 // Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301, USA
  24 //
  25
  26
  27 #ifndef STRING_FUNCTIONS_H
  28 #define STRING_FUNCTIONS_H
  29
  30 #include "../../Types.h"                // Needed for uint16 and uint32
  31
  32
  33 // UTF8 types: No UTF8, BOM prefix, or Raw UTF8
  34 enum EUtf8Str
  35 {
  36         utf8strNone,
  37         utf8strOptBOM,
  38         utf8strRaw
  39 };
  40
  41 /****************************************************/
  42 /******************* Inlines ************************/
  43 /****************************************************/
  44
  45 /**
  46  * Functions to perform Unicode <-> (char *) and UTF-8 conversion
  47  *
  48  * Please, DO NOT store pointers returned by unicode2char(), because they
  49  * get free'ed as soon as the return value of cWX2MB gets out of scope.
  50  * If you need to store a pointer, use a buffer of type wxWX2MBbuf:
  51  * and then cast it to a char pointer, e.g.:
  52  *
  53  * const wxWX2MBbuf buf(unicode2char(aWxString));
  54  *
  55  * --- Now you can freely use buf as if it were a (const char *) ---
  56  *
  57  * puts(buf);
  58  * printf("%s", (const char *)buf);
  59  *
  60  * The cast in printf is necessary because variable number of parameter
  61  * functions have no type for these parameters, so the automatic casting
  62  * of wxWX2MBbuf to (const char *) is not performed.
  63  *
  64  * --- don't worry about memory allocation, memory will be       ---
  65  * --- free'ed when buf gets out of scope, i.e., upon return     ---
  66  *
  67  * wxMB2WXbuf, wxWX2MBbuf are always the appropriate return type,
  68  * either (wxChar *) or (wxWCharBuffer)
  69  *
  70  * Use the simplified names Unicode2CharBuf and Char2UnicodeBuf, and
  71  * do not declare these names const or the compiler will complain about
  72  * a double const.
  73  */
  74 typedef const wxWX2MBbuf Unicode2CharBuf;
  75 typedef const wxMB2WXbuf Char2UnicodeBuf;
  76
  77 Unicode2CharBuf unicode2char(const wxChar* x);
  78 inline Char2UnicodeBuf char2unicode(const char* x)      { return wxConvLocal.cMB2WX(x); }
  79
  80 inline Unicode2CharBuf unicode2UTF8(const wxChar* x)    { return wxConvUTF8.cWX2MB(x); }
  81 inline Char2UnicodeBuf UTF82unicode(const char* x)      { return wxConvUTF8.cMB2WX(x); }
  82
  83 inline const wxCharBuffer char2UTF8(const char *x)      { return unicode2UTF8(char2unicode(x)); }
  84 inline const wxCharBuffer UTF82char(const char *x)      { return unicode2char(UTF82unicode(x)); }
  85
  86 inline Unicode2CharBuf filename2char(const wxChar* x)   { return wxConvFile.cWC2MB(x); }
  87 inline Char2UnicodeBuf char2filename(const char* x)     { return wxConvFile.cMB2WC(x); }
  88
  89
  90 //
  91 // Replaces "&" with "&&" in 'in' for use with text-labels
  92 //
  93 inline wxString MakeStringEscaped(wxString in) {
  94         in.Replace(wxT("&"),wxT("&&"));
  95         return in;
  96 }
  97
  98 // Make a string be a folder
  99 inline wxString MakeFoldername(wxString path) {
 100
 101         if ( !path.IsEmpty() && ( path.Right(1) == wxT('/' )) ) {
 102                 path.RemoveLast();
 103         }
 104
 105         return path;
 106 }
 107
 108 // Duplicates a string
 109 inline char* nstrdup(const char* src)
 110 {
 111         size_t len = (src ? strlen(src) : 0) + 1;
 112         char *res = new char[len];
 113         if ( src ) strcpy(res, src);
 114         res[len-1] = 0;
 115         return res;
 116 }
 117
 118
 119 // Replacements for atoi and atol that removes the need for converting
 120 // a string to normal chars with unicode2char. The value returned is the
 121 // value represented in the string or 0 if the conversion failed.
 122 inline long StrToLong(const wxString& str)
 123 {
 124         long value = 0;
 125         if (!str.ToLong(&value)) {      // value may be changed even if it failes according to wx docu
 126                 value = 0;
 127         }
 128         return value;
 129 }
 130
 131 inline unsigned long StrToULong(const wxString& str)
 132 {
 133         unsigned long value = 0;
 134         if (!str.ToULong(&value)) {
 135                 value = 0;
 136         }
 137         return value;
 138 }
 139
 140 inline unsigned long long StrToULongLong(const wxString& str)
 141 {
 142 #if wxCHECK_VERSION(2, 9, 0)
 143         unsigned long long value = 0;
 144         if (!str.ToULongLong(&value)) {
 145                 value = 0;
 146         }
 147         return value;
 148
 149 #else   // wx 2.8
 150
 151         Unicode2CharBuf buf = unicode2char(str);
 152         if (!buf) {             // something went wrong
 153                 return 0;
 154         }
 155 #ifdef _MSC_VER
 156         return _atoi64(buf);
 157 #else
 158         return atoll(buf);
 159 #endif
 160 #endif  // wx 2.8
 161 }
 162
 163 inline size_t GetRawSize(const wxString& rstr, EUtf8Str eEncode)
 164 {
 165         size_t RealLen = 0;
 166         switch (eEncode) {
 167                 case utf8strOptBOM:
 168                         RealLen = 3;
 169                 case utf8strRaw: {
 170                         Unicode2CharBuf s(unicode2UTF8(rstr));
 171                         if (s) {
 172                                 RealLen += strlen(s);
 173                                 break;
 174                         } else {
 175                                 RealLen = 0;
 176                         }
 177                 }
 178                 default: {
 179                         Unicode2CharBuf s(unicode2char(rstr));
 180                         if (s) {
 181                                 RealLen = strlen(s);
 182                         }
 183                 }
 184         }
 185
 186         return RealLen;
 187 }
 188
 189
 190 /****************************************************/
 191 /***************** Non-inlines **********************/
 192 /****************************************************/
 193
 194
 195 // Makes sIn suitable for inclusion in an URL, by escaping all chars that could cause trouble.
 196 wxString URLEncode(const wxString& sIn);
 197
 198
 199 /**
 200  * Converts a hexadecimal number to a char.
 201  *
 202  * @param hex The hex-number, must be at most 2 digits long.
 203  * @return The resulting char or \0 if conversion failed.
 204  */
 205 wxChar HexToDec( const wxString& hex );
 206
 207
 208 /**
 209  * This function converts all valid HTML escape-codes to their corresponding chars.
 210  *
 211  * @param str The string to unescape.
 212  * @return The unescaped version of the input string.
 213  */
 214 wxString UnescapeHTML( const wxString& str );
 215
 216
 217 /**
 218  * Ensures that the url pass is valid by escaping various chars.
 219  */
 220 wxString validateURI(const wxString& url);
 221
 222
 223 /**
 224  * Compares two strings, while taking numerals into consideration.
 225  *
 226  * @return Returns -1 if a < b, 1 if a > b and 0 if a = b
 227  *
 228  * This function basically splits the two strings into a number of
 229  * fields, deliniated by whitespace, non-alphanumerical chars. The
 230  * numerals are then converted to integers, and the fields are
 231  * compared. This allows strings such as "a (2)" and "a (10)" to
 232  * be properly sorted for displaying.
 233  *
 234  * Currently does not handle floats (they are treated as to seperate
 235  * fields, nor negative numbers.
 236  */
 237 int FuzzyStrCmp(const wxString& a, const wxString& b);
 238
 239 /**
 240  * As with FuzzyStrCmp, but case insensitive.
 241  */
 242 int FuzzyStrCaseCmp(const wxString& a, const wxString& b);
 243
 244
 245 /**
 246  * This class provides a simple and fast tokenizer.
 247  */
 248 class CSimpleTokenizer
 249 {
 250 public:
 251         /**
 252          * @param str The string to tokenize.
 253          * @param delim The delimiter used to split the string.
 254          */
 255         CSimpleTokenizer(const wxString& str, wxChar delim);
 256
 257         /**
 258          * Returns the next part of the string separated by the
 259          * given delimiter. When the entire string has been
 260          * tokenized, an empty string is returned. Note that
 261          * empty tokens are also returned.
 262          */
 263         wxString next();
 264
 265         /**
 266          * Returns the remaining part of the string.
 267          *
 268          * The remaining part is defined as being the part after
 269          * the last encountered token, or an empty string if the
 270          * entire string has been tokenized.
 271          *
 272          * If next() has yet to be called, the entire string will
 273          * be returned.
 274          */
 275         wxString remaining() const;
 276
 277         /**
 278          * Returns the number of tokens encountered so far.
 279          */
 280         size_t tokenCount() const;
 281
 282 private:
 283         //! The string being tokenized.
 284         wxString m_string;
 285
 286         //! The delimiter used to split the string.
 287         wxChar m_delim;
 288
 289         //! A pointer to the current position in the string.
 290         const wxChar* m_ptr;
 291
 292         //! The number of tokens encountered.
 293         size_t m_count;
 294 };
 295
 296
 297 #endif // STRING_FUNCTIONS_H
 298 // File_checked_for_headers