comphelper/source/misc/string.cxx

   1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
   2 /*
   3  * This file is part of the LibreOffice project.
   4  *
   5  * This Source Code Form is subject to the terms of the Mozilla Public
   6  * License, v. 2.0. If a copy of the MPL was not distributed with this
   7  * file, You can obtain one at http://mozilla.org/MPL/2.0/.
   8  *
   9  * This file incorporates work covered by the following license notice:
  10  *
  11  *   Licensed to the Apache Software Foundation (ASF) under one or more
  12  *   contributor license agreements. See the NOTICE file distributed
  13  *   with this work for additional information regarding copyright
  14  *   ownership. The ASF licenses this file to you under the Apache
  15  *   License, Version 2.0 (the "License"); you may not use this file
  16  *   except in compliance with the License. You may obtain a copy of
  17  *   the License at http://www.apache.org/licenses/LICENSE-2.0 .
  18  */
  19
  20 #include <sal/config.h>
  21
  22 #include <cstddef>
  23 #include <string.h>
  24 #include <vector>
  25 #include <algorithm>
  26
  27 #include <rtl/ustring.hxx>
  28 #include <rtl/ustrbuf.hxx>
  29 #include <rtl/string.hxx>
  30 #include <rtl/strbuf.hxx>
  31 #include <sal/types.h>
  32
  33 #include <comphelper/string.hxx>
  34 #include <comphelper/stl_types.hxx>
  35
  36 #include <com/sun/star/i18n/BreakIterator.hpp>
  37 #include <com/sun/star/i18n/CharType.hpp>
  38 #include <com/sun/star/i18n/Collator.hpp>
  39
  40
  41 namespace comphelper { namespace string {
  42
  43 namespace
  44 {
  45     template <typename T, typename C> T tmpl_stripStart(const T &rIn,
  46         const C cRemove)
  47     {
  48         if (rIn.isEmpty())
  49             return rIn;
  50
  51         sal_Int32 i = 0;
  52
  53         while (i < rIn.getLength())
  54         {
  55             if (rIn[i] != cRemove)
  56                 break;
  57             ++i;
  58         }
  59
  60         return rIn.copy(i);
  61     }
  62 }
  63
  64 OString stripStart(const OString &rIn, sal_Char c)
  65 {
  66     return tmpl_stripStart<OString, sal_Char>(rIn, c);
  67 }
  68
  69 OUString stripStart(const OUString &rIn, sal_Unicode c)
  70 {
  71     return tmpl_stripStart<OUString, sal_Unicode>(rIn, c);
  72 }
  73
  74 namespace
  75 {
  76     template <typename T, typename C> T tmpl_stripEnd(const T &rIn,
  77         const C cRemove)
  78     {
  79         if (rIn.isEmpty())
  80             return rIn;
  81
  82         sal_Int32 i = rIn.getLength();
  83
  84         while (i > 0)
  85         {
  86             if (rIn[i-1] != cRemove)
  87                 break;
  88             --i;
  89         }
  90
  91         return rIn.copy(0, i);
  92     }
  93 }
  94
  95 OString stripEnd(const OString &rIn, sal_Char c)
  96 {
  97     return tmpl_stripEnd<OString, sal_Char>(rIn, c);
  98 }
  99
 100 OUString stripEnd(const OUString &rIn, sal_Unicode c)
 101 {
 102     return tmpl_stripEnd<OUString, sal_Unicode>(rIn, c);
 103 }
 104
 105 OString strip(const OString &rIn, sal_Char c)
 106 {
 107     return stripEnd(stripStart(rIn, c), c);
 108 }
 109
 110 OUString strip(const OUString &rIn, sal_Unicode c)
 111 {
 112     return stripEnd(stripStart(rIn, c), c);
 113 }
 114
 115 namespace
 116 {
 117     template <typename T, typename C> sal_Int32 tmpl_getTokenCount(const T &rIn,
 118         C cTok)
 119     {
 120         // Empty String: TokenCount by Definition is 0
 121         if (rIn.isEmpty())
 122             return 0;
 123
 124         sal_Int32 nTokCount = 1;
 125         for (sal_Int32 i = 0; i < rIn.getLength(); ++i)
 126         {
 127             if (rIn[i] == cTok)
 128                 ++nTokCount;
 129         }
 130         return nTokCount;
 131     }
 132 }
 133
 134 sal_Int32 getTokenCount(const OString &rIn, sal_Char cTok)
 135 {
 136     return tmpl_getTokenCount<OString, sal_Char>(rIn, cTok);
 137 }
 138
 139 sal_Int32 getTokenCount(const OUString &rIn, sal_Unicode cTok)
 140 {
 141     return tmpl_getTokenCount<OUString, sal_Unicode>(rIn, cTok);
 142 }
 143
 144 sal_uInt32 decimalStringToNumber(
 145     OUString const & str )
 146 {
 147     sal_uInt32 result = 0;
 148     for( sal_Int32 i = 0 ; i < str.getLength() ; )
 149     {
 150         sal_uInt32 c = str.iterateCodePoints(&i);
 151         sal_uInt32 value = 0;
 152         if( c <= 0x0039)    // ASCII decimal digits, most common
 153             value = c - 0x0030;
 154         else if( c >= 0x1D7F6 )    // mathematical monospace digits
 155             value = c - 0x1D7F6;
 156         else if( c >= 0x1D7EC ) // mathematical sans-serif bold digits
 157             value = c - 0x1D7EC;
 158         else if( c >= 0x1D7E2 ) // mathematical sans-serif digits
 159             value = c - 0x1D7E2;
 160         else if( c >= 0x1D7D8 ) // mathematical double-struck digits
 161             value = c - 0x1D7D8;
 162         else if( c >= 0x1D7CE ) // mathematical bold digits
 163             value = c - 0x1D7CE;
 164         else if( c >= 0x11066 ) // brahmi digits
 165             value = c - 0x11066;
 166         else if( c >= 0x104A0 ) // osmanya digits
 167             value = c - 0x104A0;
 168         else if( c >= 0xFF10 ) // fullwidth digits
 169             value = c - 0xFF10;
 170         else if( c >= 0xABF0 ) // meetei mayek digits
 171             value = c - 0xABF0;
 172         else if( c >= 0xAA50 ) // cham digits
 173             value = c - 0xAA50;
 174         else if( c >= 0xA9D0 ) // javanese digits
 175             value = c - 0xA9D0;
 176         else if( c >= 0xA900 ) // kayah li digits
 177             value = c - 0xA900;
 178         else if( c >= 0xA8D0 ) // saurashtra digits
 179             value = c - 0xA8D0;
 180         else if( c >= 0xA620 ) // vai digits
 181             value = c - 0xA620;
 182         else if( c >= 0x1C50 ) // ol chiki digits
 183             value = c - 0x1C50;
 184         else if( c >= 0x1C40 ) // lepcha digits
 185             value = c - 0x1C40;
 186         else if( c >= 0x1BB0 ) // sundanese digits
 187             value = c - 0x1BB0;
 188         else if( c >= 0x1B50 ) // balinese digits
 189             value = c - 0x1B50;
 190         else if( c >= 0x1A90 ) // tai tham tham digits
 191             value = c - 0x1A90;
 192         else if( c >= 0x1A80 ) // tai tham hora digits
 193             value = c - 0x1A80;
 194         else if( c >= 0x19D0 ) // new tai lue digits
 195             value = c - 0x19D0;
 196         else if( c >= 0x1946 ) // limbu digits
 197             value = c - 0x1946;
 198         else if( c >= 0x1810 ) // mongolian digits
 199             value = c - 0x1810;
 200         else if( c >= 0x17E0 ) // khmer digits
 201             value = c - 0x17E0;
 202         else if( c >= 0x1090 ) // myanmar shan digits
 203             value = c - 0x1090;
 204         else if( c >= 0x1040 ) // myanmar digits
 205             value = c - 0x1040;
 206         else if( c >= 0x0F20 ) // tibetan digits
 207             value = c - 0x0F20;
 208         else if( c >= 0x0ED0 ) // lao digits
 209             value = c - 0x0ED0;
 210         else if( c >= 0x0E50 ) // thai digits
 211             value = c - 0x0E50;
 212         else if( c >= 0x0D66 ) // malayalam digits
 213             value = c - 0x0D66;
 214         else if( c >= 0x0CE6 ) // kannada digits
 215             value = c - 0x0CE6;
 216         else if( c >= 0x0C66 ) // telugu digits
 217             value = c - 0x0C66;
 218         else if( c >= 0x0BE6 ) // tamil digits
 219             value = c - 0x0BE6;
 220         else if( c >= 0x0B66 ) // odia digits
 221             value = c - 0x0B66;
 222         else if( c >= 0x0AE6 ) // gujarati digits
 223             value = c - 0x0AE6;
 224         else if( c >= 0x0A66 ) // gurmukhi digits
 225             value = c - 0x0A66;
 226         else if( c >= 0x09E6 ) // bengali digits
 227             value = c - 0x09E6;
 228         else if( c >= 0x0966 ) // devanagari digit
 229             value = c - 0x0966;
 230         else if( c >= 0x07C0 ) // nko digits
 231             value = c - 0x07C0;
 232         else if( c >= 0x06F0 ) // extended arabic-indic digits
 233             value = c - 0x06F0;
 234         else if( c >= 0x0660 ) // arabic-indic digits
 235             value = c - 0x0660;
 236         result = result * 10 + value;
 237     }
 238     return result;
 239 }
 240
 241 using namespace ::com::sun::star;
 242
 243 // convert between sequence of string and comma separated string
 244
 245 OUString convertCommaSeparated(
 246     uno::Sequence< OUString > const& i_rSeq)
 247 {
 248     OUStringBuffer buf;
 249     ::comphelper::intersperse(
 250         i_rSeq.begin(), i_rSeq.end(), ::comphelper::OUStringBufferAppender(buf), OUString( ", " ));
 251     return buf.makeStringAndClear();
 252 }
 253
 254 uno::Sequence< OUString >
 255     convertCommaSeparated( OUString const& i_rString )
 256 {
 257     std::vector< OUString > vec;
 258     sal_Int32 idx = 0;
 259     do {
 260       OUString kw =
 261         i_rString.getToken(0, static_cast<sal_Unicode> (','), idx);
 262       kw = kw.trim();
 263       if (!kw.isEmpty()) {
 264           vec.push_back(kw);
 265       }
 266     } while (idx >= 0);
 267     uno::Sequence< OUString > kws(vec.size());
 268     std::copy(vec.begin(), vec.end(), kws.begin());
 269     return kws;
 270 }
 271
 272
 273 sal_Int32 compareNatural( const OUString & rLHS, const OUString & rRHS,
 274     const uno::Reference< i18n::XCollator > &rCollator,
 275     const uno::Reference< i18n::XBreakIterator > &rBI,
 276     const lang::Locale &rLocale )
 277 {
 278     sal_Int32 nRet = 0;
 279
 280     sal_Int32 nLHSLastNonDigitPos = 0;
 281     sal_Int32 nRHSLastNonDigitPos = 0;
 282     sal_Int32 nLHSFirstDigitPos = 0;
 283     sal_Int32 nRHSFirstDigitPos = 0;
 284
 285     while (nLHSFirstDigitPos < rLHS.getLength() || nRHSFirstDigitPos < rRHS.getLength())
 286     {
 287         sal_Int32 nLHSChunkLen;
 288         sal_Int32 nRHSChunkLen;
 289
 290         //Compare non digit block as normal strings
 291         nLHSFirstDigitPos = rBI->nextCharBlock(rLHS, nLHSLastNonDigitPos,
 292             rLocale, i18n::CharType::DECIMAL_DIGIT_NUMBER);
 293         nRHSFirstDigitPos = rBI->nextCharBlock(rRHS, nRHSLastNonDigitPos,
 294             rLocale, i18n::CharType::DECIMAL_DIGIT_NUMBER);
 295         if (nLHSFirstDigitPos == -1)
 296             nLHSFirstDigitPos = rLHS.getLength();
 297         if (nRHSFirstDigitPos == -1)
 298             nRHSFirstDigitPos = rRHS.getLength();
 299         nLHSChunkLen = nLHSFirstDigitPos - nLHSLastNonDigitPos;
 300         nRHSChunkLen = nRHSFirstDigitPos - nRHSLastNonDigitPos;
 301
 302         nRet = rCollator->compareSubstring(rLHS, nLHSLastNonDigitPos,
 303             nLHSChunkLen, rRHS, nRHSLastNonDigitPos, nRHSChunkLen);
 304         if (nRet != 0)
 305             break;
 306
 307         //Compare digit block as one number vs another
 308         nLHSLastNonDigitPos = rBI->endOfCharBlock(rLHS, nLHSFirstDigitPos,
 309             rLocale, i18n::CharType::DECIMAL_DIGIT_NUMBER);
 310         nRHSLastNonDigitPos = rBI->endOfCharBlock(rRHS, nRHSFirstDigitPos,
 311             rLocale, i18n::CharType::DECIMAL_DIGIT_NUMBER);
 312         if (nLHSLastNonDigitPos == -1)
 313             nLHSLastNonDigitPos = rLHS.getLength();
 314         if (nRHSLastNonDigitPos == -1)
 315             nRHSLastNonDigitPos = rRHS.getLength();
 316         nLHSChunkLen = nLHSLastNonDigitPos - nLHSFirstDigitPos;
 317         nRHSChunkLen = nRHSLastNonDigitPos - nRHSFirstDigitPos;
 318
 319         //To-Do: Possibly scale down those unicode codepoints that relate to
 320         //numbers outside of the normal 0-9 range, e.g. see GetLocalizedChar in
 321         //vcl
 322
 323         sal_uInt32 nLHS = comphelper::string::decimalStringToNumber(rLHS.copy(nLHSFirstDigitPos, nLHSChunkLen));
 324         sal_uInt32 nRHS = comphelper::string::decimalStringToNumber(rRHS.copy(nRHSFirstDigitPos, nRHSChunkLen));
 325
 326         if (nLHS != nRHS)
 327         {
 328             nRet = (nLHS < nRHS) ? -1 : 1;
 329             break;
 330         }
 331     }
 332
 333     return nRet;
 334 }
 335
 336 NaturalStringSorter::NaturalStringSorter(
 337     const uno::Reference< uno::XComponentContext > &rContext,
 338     const lang::Locale &rLocale) : m_aLocale(rLocale)
 339 {
 340     m_xCollator = i18n::Collator::create( rContext );
 341     m_xCollator->loadDefaultCollator(m_aLocale, 0);
 342     m_xBI = i18n::BreakIterator::create( rContext );
 343 }
 344
 345 namespace
 346 {
 347     //do OPER on each element of the string, return false
 348     //if any OPER is false, true otherwise
 349     template <bool (*OPER)(sal_Unicode), typename T>
 350     bool tmpl_is_OPER_AsciiString(const T &rString)
 351     {
 352         for (sal_Int32 i = 0; i < rString.getLength(); ++i)
 353         {
 354             if (!OPER(rString[i]))
 355                 return false;
 356         }
 357         return true;
 358     }
 359 }
 360
 361 bool isdigitAsciiString(const OString &rString)
 362 {
 363     return tmpl_is_OPER_AsciiString<isdigitAscii>(rString);
 364 }
 365
 366 bool isdigitAsciiString(const OUString &rString)
 367 {
 368     return tmpl_is_OPER_AsciiString<isdigitAscii>(rString);
 369 }
 370
 371 namespace
 372 {
 373     template <typename T, typename O> T tmpl_reverseString(const T &rIn)
 374     {
 375         if (rIn.isEmpty())
 376             return rIn;
 377
 378         sal_Int32 i = rIn.getLength();
 379         O sBuf(i);
 380         while (i)
 381             sBuf.append(rIn[--i]);
 382         return sBuf.makeStringAndClear();
 383     }
 384 }
 385
 386 OUString reverseString(const OUString &rStr)
 387 {
 388     return tmpl_reverseString<OUString, OUStringBuffer>(rStr);
 389 }
 390
 391 OString reverseString(const OString &rStr)
 392 {
 393     return tmpl_reverseString<OString, OStringBuffer>(rStr);
 394 }
 395
 396 sal_Int32 indexOfAny(OUString const& rIn,
 397         sal_Unicode const*const pChars, sal_Int32 const nPos)
 398 {
 399     for (sal_Int32 i = nPos; i < rIn.getLength(); ++i)
 400     {
 401         sal_Unicode const c = rIn[i];
 402         for (sal_Unicode const* pChar = pChars; *pChar; ++pChar)
 403         {
 404             if (c == *pChar)
 405             {
 406                 return i;
 407             }
 408         }
 409     }
 410     return -1;
 411 }
 412
 413 OUString setToken(const OUString& rIn, sal_Int32 nToken, sal_Unicode cTok,
 414     const OUString& rNewToken)
 415 {
 416     const sal_Unicode* pStr = rIn.getStr();
 417     sal_Int32 nLen = rIn.getLength();
 418     sal_Int32 nTok = 0;
 419     sal_Int32 nFirstChar = 0;
 420     sal_Int32 i = 0;
 421
 422     // Determine token position and length
 423     while ( i < nLen )
 424     {
 425         // Increase token count if match
 426         if (*pStr == cTok)
 427         {
 428             ++nTok;
 429
 430             if (nTok == nToken)
 431                 nFirstChar = i+1;
 432             else if (nTok > nToken)
 433                 break;
 434         }
 435
 436         ++pStr,
 437         ++i;
 438     }
 439
 440     if (nTok >= nToken)
 441         return rIn.replaceAt(nFirstChar, i-nFirstChar, rNewToken);
 442     return rIn;
 443 }
 444
 445 } }
 446
 447 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */