src/libs/common/StringFunctions.h

   1 //
   2 // This file is part of the aMule Project.
   3 //
   4 // Copyright (c) 2004-2008 Angel Vidal ( kry@amule.org )
   5 // Copyright (c) 2003-2008 aMule Team ( admin@amule.org / http://www.amule.org )
   6 //
   7 // Any parts of this program derived from the xMule, lMule or eMule project,
   8 // or contributed by third-party developers are copyrighted by their
   9 // respective authors.
  10 //
  11 // This program is free software; you can redistribute it and/or modify
  12 // it under the terms of the GNU General Public License as published by
  13 // the Free Software Foundation; either version 2 of the License, or
  14 // (at your option) any later version.
  15 //
  16 // This program is distributed in the hope that it will be useful,
  17 // but WITHOUT ANY WARRANTY; without even the implied warranty of
  18 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  19 // GNU General Public License for more details.
  20 //
  21 // You should have received a copy of the GNU General Public License
  22 // along with this program; if not, write to the Free Software
  23 // Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301, USA
  24 //
  25
  26
  27 #ifndef STRING_FUNCTIONS_H
  28 #define STRING_FUNCTIONS_H
  29
  30 #include "../../Types.h"                // Needed for uint16 and uint32
  31
  32 class CPath;
  33
  34
  35 // UTF8 types: No UTF8, BOM prefix, or Raw UTF8
  36 enum EUtf8Str
  37 {
  38         utf8strNone,
  39         utf8strOptBOM,
  40         utf8strRaw
  41 };
  42
  43 /****************************************************/
  44 /******************* Inlines ************************/
  45 /****************************************************/
  46
  47 /**
  48  * Functions to perform Unicode <-> (char *) and UTF-8 conversion
  49  *
  50  * Please, DO NOT store pointers returned by unicode2char(), because they
  51  * get free'ed as soon as the return value of cWX2MB gets out of scope.
  52  * If you need to store a pointer, use a buffer of type wxWX2MBbuf:
  53  * and then cast it to a char pointer, e.g.:
  54  *
  55  * const wxWX2MBbuf buf(unicode2char(aWxString));
  56  *
  57  * --- Now you can freely use buf as if it were a (const char *) ---
  58  *
  59  * puts(buf);
  60  * printf("%s", (const char *)buf);
  61  *
  62  * The cast in printf is necessary because variable number of parameter
  63  * functions have no type for these parameters, so the automatic casting
  64  * of wxWX2MBbuf to (const char *) is not performed.
  65  *
  66  * --- don't worry about memory allocation, memory will be       ---
  67  * --- free'ed when buf gets out of scope, i.e., upon return     ---
  68  *
  69  * wxMB2WXbuf, wxWX2MBbuf are always the appropriate return type,
  70  * either (wxChar *) or (wxWCharBuffer)
  71  *
  72  * Use the simplified names Unicode2CharBuf and Char2UnicodeBuf, and
  73  * do not declare these names const or the compiler will complain about
  74  * a double const.
  75  */
  76 typedef const wxWX2MBbuf Unicode2CharBuf;
  77 typedef const wxMB2WXbuf Char2UnicodeBuf;
  78
  79 inline Unicode2CharBuf unicode2char(const wxChar* x)    { return wxConvLocal.cWX2MB(x); }
  80 inline Char2UnicodeBuf char2unicode(const char* x)      { return wxConvLocal.cMB2WX(x); }
  81
  82 inline Unicode2CharBuf unicode2UTF8(const wxChar* x)    { return wxConvUTF8.cWX2MB(x); }
  83 inline Char2UnicodeBuf UTF82unicode(const char* x)      { return wxConvUTF8.cMB2WX(x); }
  84
  85 inline const wxCharBuffer char2UTF8(const char *x)      { return unicode2UTF8(char2unicode(x)); }
  86 inline const wxCharBuffer UTF82char(const char *x)      { return unicode2char(UTF82unicode(x)); }
  87
  88 inline Unicode2CharBuf filename2char(const wxChar* x)   { return wxConvFile.cWC2MB(x); }
  89 inline Char2UnicodeBuf char2filename(const char* x)     { return wxConvFile.cMB2WC(x); }
  90
  91
  92 //
  93 // Replaces "&" with "&&" in 'in' for use with text-labels
  94 //
  95 inline wxString MakeStringEscaped(wxString in) {
  96         in.Replace(wxT("&"),wxT("&&"));
  97         return in;
  98 }
  99
 100 // Make a string be a folder
 101 inline wxString MakeFoldername(wxString path) {
 102
 103         if ( !path.IsEmpty() && ( path.Right(1) == wxT('/' )) ) {
 104                 path.RemoveLast();
 105         }
 106
 107         return path;
 108 }
 109
 110 // Duplicates a string
 111 inline char* nstrdup(const char* src)
 112 {
 113         size_t len = (src ? strlen(src) : 0) + 1;
 114         char *res = new char[len];
 115         if ( src ) strcpy(res, src);
 116         res[len-1] = 0;
 117         return res;
 118 }
 119
 120
 121 // Replacements for atoi and atol that removes the need for converting
 122 // a string to normal chars with unicode2char. The value returned is the
 123 // value represented in the string or 0 if the conversion failed.
 124 inline long StrToLong( const wxString& str ) {
 125         long value = 0;
 126         str.ToLong( &value );
 127         return value;
 128 }
 129
 130 inline unsigned long StrToULong( const wxString& str ) {
 131         unsigned long value = 0;
 132         str.ToULong( &value );
 133         return value;
 134 }
 135
 136 inline unsigned long long StrToULongLong( const wxString& str ) {
 137 #ifdef _MSC_VER
 138         return _atoi64(unicode2char(str));
 139 #else
 140         return atoll(unicode2char(str));
 141 #endif
 142 }
 143
 144 inline size_t GetRawSize(const wxString& rstr, EUtf8Str eEncode)
 145 {
 146         size_t RealLen = 0;
 147         switch (eEncode) {
 148                 case utf8strOptBOM:
 149                         RealLen = 3;
 150                 case utf8strRaw: {
 151                         Unicode2CharBuf s(unicode2UTF8(rstr));
 152                         if (s) {
 153                                 RealLen += strlen(s);
 154                                 break;
 155                         } else {
 156                                 RealLen = 0;
 157                         }
 158                 }
 159                 default: {
 160                         Unicode2CharBuf s(unicode2char(rstr));
 161                         if (s) {
 162                                 RealLen = strlen(s);
 163                         }
 164                 }
 165         }
 166
 167         return RealLen;
 168 }
 169
 170
 171 /****************************************************/
 172 /***************** Non-inlines **********************/
 173 /****************************************************/
 174
 175 /**
 176  * Truncates a filename to the specified length.
 177  *
 178  * @param filename The original filename.
 179  * @param length The max length of the resulting filename.
 180  * @param isFilePath If true, then the path will be truncated rather than the filename if possible.
 181  * @return The truncated filename.
 182  */
 183 wxString TruncateFilename(const CPath& filename, size_t length, bool isFilePath = false);
 184
 185 /**
 186  * Strips all path separators from the specified end of a path.
 187  *
 188  * Note: type must be either leading or trailing.
 189  */
 190 wxString StripSeparators(wxString path, wxString::stripType type);
 191
 192
 193 /**
 194  * Joins two path with the operating system specific path-separator.
 195  *
 196  * If any of the parameters are empty, the other parameter is
 197  * returned unchanged.
 198  */
 199 wxString JoinPaths(const wxString& path, const wxString& file);
 200
 201 // Makes sIn suitable for inclusion in an URL, by escaping all chars that could cause trouble.
 202 wxString URLEncode(const wxString& sIn);
 203
 204
 205 /**
 206  * Converts a hexadecimal number to a char.
 207  *
 208  * @param hex The hex-number, must be at most 2 digits long.
 209  * @return The resulting char or \0 if conversion failed.
 210  */
 211 wxChar HexToDec( const wxString& hex );
 212
 213
 214 /**
 215  * This function converts all valid HTML escape-codes to their corresponding chars.
 216  *
 217  * @param str The string to unescape.
 218  * @return The unescaped version of the input string.
 219  */
 220 wxString UnescapeHTML( const wxString& str );
 221
 222
 223 /**
 224  * Ensures that the url pass is valid by escaping various chars.
 225  */
 226 wxString validateURI(const wxString& url);
 227
 228
 229 /**
 230  * Compares two strings, while taking numerals into consideration.
 231  *
 232  * @return Returns -1 if a < b, 1 if a > b and 0 if a = b
 233  *
 234  * This function basically splits the two strings into a number of
 235  * fields, deliniated by whitespace, non-alphanumerical chars. The
 236  * numerals are then converted to integers, and the fields are
 237  * compared. This allows strings such as "a (2)" and "a (10)" to
 238  * be properly sorted for displaying.
 239  *
 240  * Currently does not handle floats (they are treated as to seperate
 241  * fields, nor negative numbers.
 242  */
 243 int FuzzyStrCmp(const wxString& a, const wxString& b);
 244
 245 /**
 246  * As with FuzzyStrCmp, but case insensitive.
 247  */
 248 int FuzzyStrCaseCmp(const wxString& a, const wxString& b);
 249
 250
 251 /**
 252  * This class provides a simple and fast tokenizer.
 253  */
 254 class CSimpleTokenizer
 255 {
 256 public:
 257         /**
 258          * @param str The string to tokenize.
 259          * @param delim The delimiter used to split the string.
 260          */
 261         CSimpleTokenizer(const wxString& str, wxChar delim);
 262
 263         /**
 264          * Returns the next part of the string separated by the
 265          * given delimiter. When the entire string has been
 266          * tokenized, an empty string is returned. Note that
 267          * empty tokens are also returned.
 268          */
 269         wxString next();
 270
 271         /**
 272          * Returns the remaining part of the string.
 273          *
 274          * The remaining part is defined as being the part after
 275          * the last encountered token, or an empty string if the
 276          * entire string has been tokenized.
 277          *
 278          * If next() has yet to be called, the entire string will
 279          * be returned.
 280          */
 281         wxString remaining() const;
 282
 283         /**
 284          * Returns the number of tokens encountered so far.
 285          */
 286         size_t tokenCount() const;
 287
 288 private:
 289         //! The string being tokenized.
 290         wxString m_string;
 291
 292         //! The delimiter used to split the string.
 293         wxChar m_delim;
 294
 295         //! A pointer to the current position in the string.
 296         const wxChar* m_ptr;
 297
 298         //! The number of tokens encountered.
 299         size_t m_count;
 300 };
 301
 302
 303 #endif // STRING_FUNCTIONS_H
 304 // File_checked_for_headers