sc/source/core/tool/stringutil.cxx

   1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
   2 /*
   3  * This file is part of the LibreOffice project.
   4  *
   5  * This Source Code Form is subject to the terms of the Mozilla Public
   6  * License, v. 2.0. If a copy of the MPL was not distributed with this
   7  * file, You can obtain one at http://mozilla.org/MPL/2.0/.
   8  *
   9  * This file incorporates work covered by the following license notice:
  10  *
  11  *   Licensed to the Apache Software Foundation (ASF) under one or more
  12  *   contributor license agreements. See the NOTICE file distributed
  13  *   with this work for additional information regarding copyright
  14  *   ownership. The ASF licenses this file to you under the Apache
  15  *   License, Version 2.0 (the "License"); you may not use this file
  16  *   except in compliance with the License. You may obtain a copy of
  17  *   the License at http://www.apache.org/licenses/LICENSE-2.0 .
  18  */
  19
  20 #include "stringutil.hxx"
  21 #include "global.hxx"
  22 #include "svl/zforlist.hxx"
  23
  24 #include <rtl/ustrbuf.hxx>
  25 #include <rtl/strbuf.hxx>
  26 #include <rtl/math.hxx>
  27
  28 ScSetStringParam::ScSetStringParam() :
  29     mpNumFormatter(NULL),
  30     mbDetectNumberFormat(true),
  31     meSetTextNumFormat(Never),
  32     mbHandleApostrophe(true)
  33 {
  34 }
  35
  36 void ScSetStringParam::setTextInput()
  37 {
  38     mbDetectNumberFormat = false;
  39     mbHandleApostrophe = false;
  40     meSetTextNumFormat = Always;
  41 }
  42
  43 void ScSetStringParam::setNumericInput()
  44 {
  45     mbDetectNumberFormat = true;
  46     mbHandleApostrophe = true;
  47     meSetTextNumFormat = Never;
  48 }
  49
  50 bool ScStringUtil::parseSimpleNumber(
  51     const OUString& rStr, sal_Unicode dsep, sal_Unicode gsep, double& rVal)
  52 {
  53     // Actually almost the entire pre-check is unnecessary and we could call
  54     // rtl::math::stringToDouble() just after having exchanged ascii space with
  55     // non-breaking space, if it wasn't for check of grouped digits. The NaN
  56     // and Inf cases that are accepted by stringToDouble() could be detected
  57     // using rtl::math::isFinite() on the result.
  58
  59     /* TODO: The grouped digits check isn't even valid for locales that do not
  60      * group in thousands ... e.g. Indian locales. But that's something also
  61      * the number scanner doesn't implement yet, only the formatter. */
  62
  63     OUStringBuffer aBuf;
  64
  65     sal_Int32 i = 0;
  66     sal_Int32 n = rStr.getLength();
  67     const sal_Unicode* p = rStr.getStr();
  68     const sal_Unicode* pLast = p + (n-1);
  69     sal_Int32 nPosDSep = -1, nPosGSep = -1;
  70     sal_uInt32 nDigitCount = 0;
  71     sal_Int32 nPosExponent = -1;
  72
  73     // Skip preceding spaces.
  74     for (i = 0; i < n; ++i, ++p)
  75     {
  76         sal_Unicode c = *p;
  77         if (c != 0x0020 && c != 0x00A0)
  78             // first non-space character.  Exit.
  79             break;
  80     }
  81
  82     if (i == n)
  83         // the whole string is space.  Fail.
  84         return false;
  85
  86     n -= i; // Subtract the length of the preceding spaces.
  87
  88     // Determine the last non-space character.
  89     for (; p != pLast; --pLast, --n)
  90     {
  91         sal_Unicode c = *pLast;
  92         if (c != 0x0020 && c != 0x00A0)
  93             // Non space character. Exit.
  94             break;
  95     }
  96
  97     for (i = 0; i < n; ++i, ++p)
  98     {
  99         sal_Unicode c = *p;
 100         if (c == 0x0020 && gsep == 0x00A0)
 101             // ascii space to unicode space if that is group separator
 102             c = 0x00A0;
 103
 104         if ('0' <= c && c <= '9')
 105         {
 106             // this is a digit.
 107             aBuf.append(c);
 108             ++nDigitCount;
 109         }
 110         else if (c == dsep)
 111         {
 112             // this is a decimal separator.
 113
 114             if (nPosDSep >= 0)
 115                 // a second decimal separator -> not a valid number.
 116                 return false;
 117
 118             if (nPosGSep >= 0 && i - nPosGSep != 4)
 119                 // the number has a group separator and the decimal sep is not
 120                 // positioned correctly.
 121                 return false;
 122
 123             nPosDSep = i;
 124             nPosGSep = -1;
 125             aBuf.append(c);
 126             nDigitCount = 0;
 127         }
 128         else if (c == gsep)
 129         {
 130             // this is a group (thousand) separator.
 131
 132             if (i == 0)
 133                 // not allowed as the first character.
 134                 return false;
 135
 136             if (nPosDSep >= 0)
 137                 // not allowed after the decimal separator.
 138                 return false;
 139
 140             if (nPosGSep >= 0 && nDigitCount != 3)
 141                 // must be exactly 3 digits since the last group separator.
 142                 return false;
 143
 144             if (nPosExponent >= 0)
 145                 // not allowed in exponent.
 146                 return false;
 147
 148             nPosGSep = i;
 149             nDigitCount = 0;
 150         }
 151         else if (c == '-' || c == '+')
 152         {
 153             // A sign must be the first character if it's given, or immediately
 154             // follow the exponent character if present.
 155             if (i == 0 || (nPosExponent >= 0 && i == nPosExponent + 1))
 156                 aBuf.append(c);
 157             else
 158                 return false;
 159         }
 160         else if (c == 'E' || c == 'e')
 161         {
 162             // this is an exponent designator.
 163
 164             if (nPosExponent >= 0)
 165                 // Only one exponent allowed.
 166                 return false;
 167
 168             if (nPosGSep >= 0 && nDigitCount != 3)
 169                 // must be exactly 3 digits since the last group separator.
 170                 return false;
 171
 172             aBuf.append(c);
 173             nPosExponent = i;
 174             nPosDSep = -1;
 175             nPosGSep = -1;
 176             nDigitCount = 0;
 177         }
 178         else
 179             return false;
 180     }
 181
 182     // finished parsing the number.
 183
 184     if (nPosGSep >= 0 && nDigitCount != 3)
 185         // must be exactly 3 digits since the last group separator.
 186         return false;
 187
 188     rtl_math_ConversionStatus eStatus = rtl_math_ConversionStatus_Ok;
 189     sal_Int32 nParseEnd = 0;
 190     OUString aString( aBuf.makeStringAndClear());
 191     rVal = ::rtl::math::stringToDouble( aString, dsep, gsep, &eStatus, &nParseEnd);
 192     if (eStatus != rtl_math_ConversionStatus_Ok || nParseEnd < aString.getLength())
 193         // Not a valid number or not entire string consumed.
 194         return false;
 195
 196     return true;
 197 }
 198
 199 bool ScStringUtil::parseSimpleNumber(
 200     const char* p, size_t n, char dsep, char gsep, double& rVal)
 201 {
 202     // Actually almost the entire pre-check is unnecessary and we could call
 203     // rtl::math::stringToDouble() just after having exchanged ascii space with
 204     // non-breaking space, if it wasn't for check of grouped digits. The NaN
 205     // and Inf cases that are accepted by stringToDouble() could be detected
 206     // using rtl::math::isFinite() on the result.
 207
 208     /* TODO: The grouped digits check isn't even valid for locales that do not
 209      * group in thousands ... e.g. Indian locales. But that's something also
 210      * the number scanner doesn't implement yet, only the formatter. */
 211
 212     OStringBuffer aBuf;
 213
 214     size_t i = 0;
 215     const char* pLast = p + (n-1);
 216     sal_Int32 nPosDSep = -1, nPosGSep = -1;
 217     sal_uInt32 nDigitCount = 0;
 218     sal_Int32 nPosExponent = -1;
 219
 220     // Skip preceding spaces.
 221     for (i = 0; i < n; ++i, ++p)
 222     {
 223         char c = *p;
 224         if (c != ' ')
 225             // first non-space character.  Exit.
 226             break;
 227     }
 228
 229     if (i == n)
 230         // the whole string is space.  Fail.
 231         return false;
 232
 233     n -= i; // Subtract the length of the preceding spaces.
 234
 235     // Determine the last non-space character.
 236     for (; p != pLast; --pLast, --n)
 237     {
 238         char c = *pLast;
 239         if (c != ' ')
 240             // Non space character. Exit.
 241             break;
 242     }
 243
 244     for (i = 0; i < n; ++i, ++p)
 245     {
 246         char c = *p;
 247
 248         if ('0' <= c && c <= '9')
 249         {
 250             // this is a digit.
 251             aBuf.append(c);
 252             ++nDigitCount;
 253         }
 254         else if (c == dsep)
 255         {
 256             // this is a decimal separator.
 257
 258             if (nPosDSep >= 0)
 259                 // a second decimal separator -> not a valid number.
 260                 return false;
 261
 262             if (nPosGSep >= 0 && i - nPosGSep != 4)
 263                 // the number has a group separator and the decimal sep is not
 264                 // positioned correctly.
 265                 return false;
 266
 267             nPosDSep = i;
 268             nPosGSep = -1;
 269             aBuf.append(c);
 270             nDigitCount = 0;
 271         }
 272         else if (c == gsep)
 273         {
 274             // this is a group (thousand) separator.
 275
 276             if (i == 0)
 277                 // not allowed as the first character.
 278                 return false;
 279
 280             if (nPosDSep >= 0)
 281                 // not allowed after the decimal separator.
 282                 return false;
 283
 284             if (nPosGSep >= 0 && nDigitCount != 3)
 285                 // must be exactly 3 digits since the last group separator.
 286                 return false;
 287
 288             if (nPosExponent >= 0)
 289                 // not allowed in exponent.
 290                 return false;
 291
 292             nPosGSep = i;
 293             nDigitCount = 0;
 294         }
 295         else if (c == '-' || c == '+')
 296         {
 297             // A sign must be the first character if it's given, or immediately
 298             // follow the exponent character if present.
 299             if (i == 0 || (nPosExponent >= 0 && i == static_cast<size_t>(nPosExponent+1)))
 300                 aBuf.append(c);
 301             else
 302                 return false;
 303         }
 304         else if (c == 'E' || c == 'e')
 305         {
 306             // this is an exponent designator.
 307
 308             if (nPosExponent >= 0)
 309                 // Only one exponent allowed.
 310                 return false;
 311
 312             if (nPosGSep >= 0 && nDigitCount != 3)
 313                 // must be exactly 3 digits since the last group separator.
 314                 return false;
 315
 316             aBuf.append(c);
 317             nPosExponent = i;
 318             nPosDSep = -1;
 319             nPosGSep = -1;
 320             nDigitCount = 0;
 321         }
 322         else
 323             return false;
 324     }
 325
 326     // finished parsing the number.
 327
 328     if (nPosGSep >= 0 && nDigitCount != 3)
 329         // must be exactly 3 digits since the last group separator.
 330         return false;
 331
 332     rtl_math_ConversionStatus eStatus = rtl_math_ConversionStatus_Ok;
 333     sal_Int32 nParseEnd = 0;
 334     OString aString( aBuf.makeStringAndClear());
 335     rVal = ::rtl::math::stringToDouble( aString, dsep, gsep, &eStatus, &nParseEnd);
 336     if (eStatus != rtl_math_ConversionStatus_Ok || nParseEnd < aString.getLength())
 337         // Not a valid number or not entire string consumed.
 338         return false;
 339
 340     return true;
 341 }
 342
 343 sal_Int32 ScStringUtil::GetQuotedTokenCount(const OUString &rIn, const OUString& rQuotedPairs, sal_Unicode cTok )
 344 {
 345     assert( !(rQuotedPairs.getLength()%2) );
 346     assert( rQuotedPairs.indexOf(cTok) );
 347
 348     // empty string: TokenCount is 0 per definition
 349     if ( rIn.isEmpty() )
 350         return 0;
 351
 352     sal_Int32      nTokCount       = 1;
 353     sal_Int32      nLen            = rIn.getLength();
 354     sal_Int32      nQuotedLen      = rQuotedPairs.getLength();
 355     sal_Unicode         cQuotedEndChar  = 0;
 356     const sal_Unicode*  pQuotedStr      = rQuotedPairs.getStr();
 357     const sal_Unicode*  pStr            = rIn.getStr();
 358     sal_Int32       nIndex         = 0;
 359     while ( nIndex < nLen )
 360     {
 361         sal_Unicode c = *pStr;
 362         if ( cQuotedEndChar )
 363         {
 364             // reached end of the quote ?
 365             if ( c == cQuotedEndChar )
 366                 cQuotedEndChar = 0;
 367         }
 368         else
 369         {
 370             // Is the char a quote-beginn char ?
 371             sal_Int32 nQuoteIndex = 0;
 372             while ( nQuoteIndex < nQuotedLen )
 373             {
 374                 if ( pQuotedStr[nQuoteIndex] == c )
 375                 {
 376                     cQuotedEndChar = pQuotedStr[nQuoteIndex+1];
 377                     break;
 378                 }
 379                 else
 380                     nQuoteIndex += 2;
 381             }
 382
 383             // If the token-char matches then increase TokCount
 384             if ( c == cTok )
 385                 ++nTokCount;
 386         }
 387
 388         ++pStr,
 389         ++nIndex;
 390     }
 391
 392     return nTokCount;
 393 }
 394
 395 OUString ScStringUtil::GetQuotedToken(const OUString &rIn, sal_Int32 nToken, const OUString& rQuotedPairs,
 396                                sal_Unicode cTok, sal_Int32& rIndex )
 397 {
 398     assert( !(rQuotedPairs.getLength()%2) );
 399     assert( rQuotedPairs.indexOf(cTok) == -1 );
 400
 401     const sal_Unicode*  pStr            = rIn.getStr();
 402     const sal_Unicode*  pQuotedStr      = rQuotedPairs.getStr();
 403     sal_Unicode         cQuotedEndChar  = 0;
 404     sal_Int32      nQuotedLen      = rQuotedPairs.getLength();
 405     sal_Int32      nLen            = rIn.getLength();
 406     sal_Int32      nTok            = 0;
 407     sal_Int32      nFirstChar      = rIndex;
 408     sal_Int32      i               = nFirstChar;
 409
 410     // detect token position and length
 411     pStr += i;
 412     while ( i < nLen )
 413     {
 414         sal_Unicode c = *pStr;
 415         if ( cQuotedEndChar )
 416         {
 417             // end of the quote reached ?
 418             if ( c == cQuotedEndChar )
 419                 cQuotedEndChar = 0;
 420         }
 421         else
 422         {
 423             // Is the char a quote-begin char ?
 424             sal_Int32 nQuoteIndex = 0;
 425             while ( nQuoteIndex < nQuotedLen )
 426             {
 427                 if ( pQuotedStr[nQuoteIndex] == c )
 428                 {
 429                     cQuotedEndChar = pQuotedStr[nQuoteIndex+1];
 430                     break;
 431                 }
 432                 else
 433                     nQuoteIndex += 2;
 434             }
 435
 436             // If the token-char matches then increase TokCount
 437             if ( c == cTok )
 438             {
 439                 ++nTok;
 440
 441                 if ( nTok == nToken )
 442                     nFirstChar = i+1;
 443                 else
 444                 {
 445                     if ( nTok > nToken )
 446                         break;
 447                 }
 448             }
 449         }
 450
 451         ++pStr,
 452         ++i;
 453     }
 454
 455     if ( nTok >= nToken )
 456     {
 457         if ( i < nLen )
 458             rIndex = i+1;
 459         else
 460             rIndex = -1;
 461         return rIn.copy( nFirstChar, i-nFirstChar );
 462     }
 463     else
 464     {
 465         rIndex = -1;
 466         return OUString();
 467     }
 468 }
 469
 470 bool ScStringUtil::isMultiline( const OUString& rStr )
 471 {
 472     if (rStr.indexOf('\n') != -1)
 473         return true;
 474
 475     if (rStr.indexOf(CHAR_CR) != -1)
 476         return true;
 477
 478     return false;
 479 }
 480
 481 ScInputStringType ScStringUtil::parseInputString(
 482     SvNumberFormatter& rFormatter, const OUString& rStr, LanguageType eLang )
 483 {
 484     ScInputStringType aRet;
 485     aRet.mnFormatType = 0;
 486     aRet.meType = ScInputStringType::Unknown;
 487     aRet.maText = rStr;
 488     aRet.mfValue = 0.0;
 489
 490     if (rStr.getLength() > 1 && rStr[0] == '=')
 491     {
 492         aRet.meType = ScInputStringType::Formula;
 493     }
 494     else if (rStr.getLength() > 1 && rStr[0] == '\'')
 495     {
 496         //  for bEnglish, "'" at the beginning is always interpreted as text
 497         //  marker and stripped
 498         aRet.maText = rStr.copy(1);
 499         aRet.meType = ScInputStringType::Text;
 500     }
 501     else        // (nur) auf englisches Zahlformat testen
 502     {
 503         sal_uInt32 nNumFormat = rFormatter.GetStandardIndex(eLang);
 504
 505         if (rFormatter.IsNumberFormat(rStr, nNumFormat, aRet.mfValue))
 506         {
 507             aRet.meType = ScInputStringType::Number;
 508             aRet.mnFormatType = rFormatter.GetType(nNumFormat);
 509         }
 510         else if (!rStr.isEmpty())
 511             aRet.meType = ScInputStringType::Text;
 512
 513         //  das (englische) Zahlformat wird nicht gesetzt
 514         //! passendes lokales Format suchen und setzen???
 515     }
 516
 517     return aRet;
 518 }
 519
 520 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */