sc/source/core/tool/stringutil.cxx

   1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
   2 /*
   3  * This file is part of the LibreOffice project.
   4  *
   5  * This Source Code Form is subject to the terms of the Mozilla Public
   6  * License, v. 2.0. If a copy of the MPL was not distributed with this
   7  * file, You can obtain one at http://mozilla.org/MPL/2.0/.
   8  *
   9  * This file incorporates work covered by the following license notice:
  10  *
  11  *   Licensed to the Apache Software Foundation (ASF) under one or more
  12  *   contributor license agreements. See the NOTICE file distributed
  13  *   with this work for additional information regarding copyright
  14  *   ownership. The ASF licenses this file to you under the Apache
  15  *   License, Version 2.0 (the "License"); you may not use this file
  16  *   except in compliance with the License. You may obtain a copy of
  17  *   the License at http://www.apache.org/licenses/LICENSE-2.0 .
  18  */
  19
  20 #include "stringutil.hxx"
  21 #include "global.hxx"
  22 #include <svl/zforlist.hxx>
  23
  24 #include <rtl/ustrbuf.hxx>
  25 #include <rtl/strbuf.hxx>
  26 #include <rtl/math.hxx>
  27
  28 ScSetStringParam::ScSetStringParam() :
  29     mpNumFormatter(NULL),
  30     mbDetectNumberFormat(true),
  31     meSetTextNumFormat(Never),
  32     mbHandleApostrophe(true),
  33     meStartListening(sc::SingleCellListening)
  34 {
  35 }
  36
  37 void ScSetStringParam::setTextInput()
  38 {
  39     mbDetectNumberFormat = false;
  40     mbHandleApostrophe = false;
  41     meSetTextNumFormat = Always;
  42 }
  43
  44 void ScSetStringParam::setNumericInput()
  45 {
  46     mbDetectNumberFormat = true;
  47     mbHandleApostrophe = true;
  48     meSetTextNumFormat = Never;
  49 }
  50
  51 bool ScStringUtil::parseSimpleNumber(
  52     const OUString& rStr, sal_Unicode dsep, sal_Unicode gsep, double& rVal)
  53 {
  54     // Actually almost the entire pre-check is unnecessary and we could call
  55     // rtl::math::stringToDouble() just after having exchanged ascii space with
  56     // non-breaking space, if it wasn't for check of grouped digits. The NaN
  57     // and Inf cases that are accepted by stringToDouble() could be detected
  58     // using rtl::math::isFinite() on the result.
  59
  60     /* TODO: The grouped digits check isn't even valid for locales that do not
  61      * group in thousands ... e.g. Indian locales. But that's something also
  62      * the number scanner doesn't implement yet, only the formatter. */
  63
  64     OUStringBuffer aBuf;
  65
  66     sal_Int32 i = 0;
  67     sal_Int32 n = rStr.getLength();
  68     const sal_Unicode* p = rStr.getStr();
  69     const sal_Unicode* pLast = p + (n-1);
  70     sal_Int32 nPosDSep = -1, nPosGSep = -1;
  71     sal_uInt32 nDigitCount = 0;
  72     sal_Int32 nPosExponent = -1;
  73
  74     // Skip preceding spaces.
  75     for (i = 0; i < n; ++i, ++p)
  76     {
  77         sal_Unicode c = *p;
  78         if (c != 0x0020 && c != 0x00A0)
  79             // first non-space character.  Exit.
  80             break;
  81     }
  82
  83     if (i == n)
  84         // the whole string is space.  Fail.
  85         return false;
  86
  87     n -= i; // Subtract the length of the preceding spaces.
  88
  89     // Determine the last non-space character.
  90     for (; p != pLast; --pLast, --n)
  91     {
  92         sal_Unicode c = *pLast;
  93         if (c != 0x0020 && c != 0x00A0)
  94             // Non space character. Exit.
  95             break;
  96     }
  97
  98     for (i = 0; i < n; ++i, ++p)
  99     {
 100         sal_Unicode c = *p;
 101         if (c == 0x0020 && gsep == 0x00A0)
 102             // ascii space to unicode space if that is group separator
 103             c = 0x00A0;
 104
 105         if ('0' <= c && c <= '9')
 106         {
 107             // this is a digit.
 108             aBuf.append(c);
 109             ++nDigitCount;
 110         }
 111         else if (c == dsep)
 112         {
 113             // this is a decimal separator.
 114
 115             if (nPosDSep >= 0)
 116                 // a second decimal separator -> not a valid number.
 117                 return false;
 118
 119             if (nPosGSep >= 0 && i - nPosGSep != 4)
 120                 // the number has a group separator and the decimal sep is not
 121                 // positioned correctly.
 122                 return false;
 123
 124             nPosDSep = i;
 125             nPosGSep = -1;
 126             aBuf.append(c);
 127             nDigitCount = 0;
 128         }
 129         else if (c == gsep)
 130         {
 131             // this is a group (thousand) separator.
 132
 133             if (i == 0)
 134                 // not allowed as the first character.
 135                 return false;
 136
 137             if (nPosDSep >= 0)
 138                 // not allowed after the decimal separator.
 139                 return false;
 140
 141             if (nPosGSep >= 0 && nDigitCount != 3)
 142                 // must be exactly 3 digits since the last group separator.
 143                 return false;
 144
 145             if (nPosExponent >= 0)
 146                 // not allowed in exponent.
 147                 return false;
 148
 149             nPosGSep = i;
 150             nDigitCount = 0;
 151         }
 152         else if (c == '-' || c == '+')
 153         {
 154             // A sign must be the first character if it's given, or immediately
 155             // follow the exponent character if present.
 156             if (i == 0 || (nPosExponent >= 0 && i == nPosExponent + 1))
 157                 aBuf.append(c);
 158             else
 159                 return false;
 160         }
 161         else if (c == 'E' || c == 'e')
 162         {
 163             // this is an exponent designator.
 164
 165             if (nPosExponent >= 0)
 166                 // Only one exponent allowed.
 167                 return false;
 168
 169             if (nPosGSep >= 0 && nDigitCount != 3)
 170                 // must be exactly 3 digits since the last group separator.
 171                 return false;
 172
 173             aBuf.append(c);
 174             nPosExponent = i;
 175             nPosDSep = -1;
 176             nPosGSep = -1;
 177             nDigitCount = 0;
 178         }
 179         else
 180             return false;
 181     }
 182
 183     // finished parsing the number.
 184
 185     if (nPosGSep >= 0 && nDigitCount != 3)
 186         // must be exactly 3 digits since the last group separator.
 187         return false;
 188
 189     rtl_math_ConversionStatus eStatus = rtl_math_ConversionStatus_Ok;
 190     sal_Int32 nParseEnd = 0;
 191     OUString aString( aBuf.makeStringAndClear());
 192     rVal = ::rtl::math::stringToDouble( aString, dsep, gsep, &eStatus, &nParseEnd);
 193     if (eStatus != rtl_math_ConversionStatus_Ok || nParseEnd < aString.getLength())
 194         // Not a valid number or not entire string consumed.
 195         return false;
 196
 197     return true;
 198 }
 199
 200 bool ScStringUtil::parseSimpleNumber(
 201     const char* p, size_t n, char dsep, char gsep, double& rVal)
 202 {
 203     // Actually almost the entire pre-check is unnecessary and we could call
 204     // rtl::math::stringToDouble() just after having exchanged ascii space with
 205     // non-breaking space, if it wasn't for check of grouped digits. The NaN
 206     // and Inf cases that are accepted by stringToDouble() could be detected
 207     // using rtl::math::isFinite() on the result.
 208
 209     /* TODO: The grouped digits check isn't even valid for locales that do not
 210      * group in thousands ... e.g. Indian locales. But that's something also
 211      * the number scanner doesn't implement yet, only the formatter. */
 212
 213     OStringBuffer aBuf;
 214
 215     size_t i = 0;
 216     const char* pLast = p + (n-1);
 217     sal_Int32 nPosDSep = -1, nPosGSep = -1;
 218     sal_uInt32 nDigitCount = 0;
 219     sal_Int32 nPosExponent = -1;
 220
 221     // Skip preceding spaces.
 222     for (i = 0; i < n; ++i, ++p)
 223     {
 224         char c = *p;
 225         if (c != ' ')
 226             // first non-space character.  Exit.
 227             break;
 228     }
 229
 230     if (i == n)
 231         // the whole string is space.  Fail.
 232         return false;
 233
 234     n -= i; // Subtract the length of the preceding spaces.
 235
 236     // Determine the last non-space character.
 237     for (; p != pLast; --pLast, --n)
 238     {
 239         char c = *pLast;
 240         if (c != ' ')
 241             // Non space character. Exit.
 242             break;
 243     }
 244
 245     for (i = 0; i < n; ++i, ++p)
 246     {
 247         char c = *p;
 248
 249         if ('0' <= c && c <= '9')
 250         {
 251             // this is a digit.
 252             aBuf.append(c);
 253             ++nDigitCount;
 254         }
 255         else if (c == dsep)
 256         {
 257             // this is a decimal separator.
 258
 259             if (nPosDSep >= 0)
 260                 // a second decimal separator -> not a valid number.
 261                 return false;
 262
 263             if (nPosGSep >= 0 && i - nPosGSep != 4)
 264                 // the number has a group separator and the decimal sep is not
 265                 // positioned correctly.
 266                 return false;
 267
 268             nPosDSep = i;
 269             nPosGSep = -1;
 270             aBuf.append(c);
 271             nDigitCount = 0;
 272         }
 273         else if (c == gsep)
 274         {
 275             // this is a group (thousand) separator.
 276
 277             if (i == 0)
 278                 // not allowed as the first character.
 279                 return false;
 280
 281             if (nPosDSep >= 0)
 282                 // not allowed after the decimal separator.
 283                 return false;
 284
 285             if (nPosGSep >= 0 && nDigitCount != 3)
 286                 // must be exactly 3 digits since the last group separator.
 287                 return false;
 288
 289             if (nPosExponent >= 0)
 290                 // not allowed in exponent.
 291                 return false;
 292
 293             nPosGSep = i;
 294             nDigitCount = 0;
 295         }
 296         else if (c == '-' || c == '+')
 297         {
 298             // A sign must be the first character if it's given, or immediately
 299             // follow the exponent character if present.
 300             if (i == 0 || (nPosExponent >= 0 && i == static_cast<size_t>(nPosExponent+1)))
 301                 aBuf.append(c);
 302             else
 303                 return false;
 304         }
 305         else if (c == 'E' || c == 'e')
 306         {
 307             // this is an exponent designator.
 308
 309             if (nPosExponent >= 0)
 310                 // Only one exponent allowed.
 311                 return false;
 312
 313             if (nPosGSep >= 0 && nDigitCount != 3)
 314                 // must be exactly 3 digits since the last group separator.
 315                 return false;
 316
 317             aBuf.append(c);
 318             nPosExponent = i;
 319             nPosDSep = -1;
 320             nPosGSep = -1;
 321             nDigitCount = 0;
 322         }
 323         else
 324             return false;
 325     }
 326
 327     // finished parsing the number.
 328
 329     if (nPosGSep >= 0 && nDigitCount != 3)
 330         // must be exactly 3 digits since the last group separator.
 331         return false;
 332
 333     rtl_math_ConversionStatus eStatus = rtl_math_ConversionStatus_Ok;
 334     sal_Int32 nParseEnd = 0;
 335     OString aString( aBuf.makeStringAndClear());
 336     rVal = ::rtl::math::stringToDouble( aString, dsep, gsep, &eStatus, &nParseEnd);
 337     if (eStatus != rtl_math_ConversionStatus_Ok || nParseEnd < aString.getLength())
 338         // Not a valid number or not entire string consumed.
 339         return false;
 340
 341     return true;
 342 }
 343
 344 sal_Int32 ScStringUtil::GetQuotedTokenCount(const OUString &rIn, const OUString& rQuotedPairs, sal_Unicode cTok )
 345 {
 346     assert( !(rQuotedPairs.getLength()%2) );
 347     assert( rQuotedPairs.indexOf(cTok) );
 348
 349     // empty string: TokenCount is 0 per definition
 350     if ( rIn.isEmpty() )
 351         return 0;
 352
 353     sal_Int32      nTokCount       = 1;
 354     sal_Int32      nLen            = rIn.getLength();
 355     sal_Int32      nQuotedLen      = rQuotedPairs.getLength();
 356     sal_Unicode         cQuotedEndChar  = 0;
 357     const sal_Unicode*  pQuotedStr      = rQuotedPairs.getStr();
 358     const sal_Unicode*  pStr            = rIn.getStr();
 359     sal_Int32       nIndex         = 0;
 360     while ( nIndex < nLen )
 361     {
 362         sal_Unicode c = *pStr;
 363         if ( cQuotedEndChar )
 364         {
 365             // reached end of the quote ?
 366             if ( c == cQuotedEndChar )
 367                 cQuotedEndChar = 0;
 368         }
 369         else
 370         {
 371             // Is the char a quote-beginn char ?
 372             sal_Int32 nQuoteIndex = 0;
 373             while ( nQuoteIndex < nQuotedLen )
 374             {
 375                 if ( pQuotedStr[nQuoteIndex] == c )
 376                 {
 377                     cQuotedEndChar = pQuotedStr[nQuoteIndex+1];
 378                     break;
 379                 }
 380                 else
 381                     nQuoteIndex += 2;
 382             }
 383
 384             // If the token-char matches then increase TokCount
 385             if ( c == cTok )
 386                 ++nTokCount;
 387         }
 388
 389         ++pStr,
 390         ++nIndex;
 391     }
 392
 393     return nTokCount;
 394 }
 395
 396 OUString ScStringUtil::GetQuotedToken(const OUString &rIn, sal_Int32 nToken, const OUString& rQuotedPairs,
 397                                sal_Unicode cTok, sal_Int32& rIndex )
 398 {
 399     assert( !(rQuotedPairs.getLength()%2) );
 400     assert( rQuotedPairs.indexOf(cTok) == -1 );
 401
 402     const sal_Unicode*  pStr            = rIn.getStr();
 403     const sal_Unicode*  pQuotedStr      = rQuotedPairs.getStr();
 404     sal_Unicode         cQuotedEndChar  = 0;
 405     sal_Int32      nQuotedLen      = rQuotedPairs.getLength();
 406     sal_Int32      nLen            = rIn.getLength();
 407     sal_Int32      nTok            = 0;
 408     sal_Int32      nFirstChar      = rIndex;
 409     sal_Int32      i               = nFirstChar;
 410
 411     // detect token position and length
 412     pStr += i;
 413     while ( i < nLen )
 414     {
 415         sal_Unicode c = *pStr;
 416         if ( cQuotedEndChar )
 417         {
 418             // end of the quote reached ?
 419             if ( c == cQuotedEndChar )
 420                 cQuotedEndChar = 0;
 421         }
 422         else
 423         {
 424             // Is the char a quote-begin char ?
 425             sal_Int32 nQuoteIndex = 0;
 426             while ( nQuoteIndex < nQuotedLen )
 427             {
 428                 if ( pQuotedStr[nQuoteIndex] == c )
 429                 {
 430                     cQuotedEndChar = pQuotedStr[nQuoteIndex+1];
 431                     break;
 432                 }
 433                 else
 434                     nQuoteIndex += 2;
 435             }
 436
 437             // If the token-char matches then increase TokCount
 438             if ( c == cTok )
 439             {
 440                 ++nTok;
 441
 442                 if ( nTok == nToken )
 443                     nFirstChar = i+1;
 444                 else
 445                 {
 446                     if ( nTok > nToken )
 447                         break;
 448                 }
 449             }
 450         }
 451
 452         ++pStr,
 453         ++i;
 454     }
 455
 456     if ( nTok >= nToken )
 457     {
 458         if ( i < nLen )
 459             rIndex = i+1;
 460         else
 461             rIndex = -1;
 462         return rIn.copy( nFirstChar, i-nFirstChar );
 463     }
 464     else
 465     {
 466         rIndex = -1;
 467         return OUString();
 468     }
 469 }
 470
 471 bool ScStringUtil::isMultiline( const OUString& rStr )
 472 {
 473     if (rStr.indexOf('\n') != -1)
 474         return true;
 475
 476     if (rStr.indexOf(CHAR_CR) != -1)
 477         return true;
 478
 479     return false;
 480 }
 481
 482 ScInputStringType ScStringUtil::parseInputString(
 483     SvNumberFormatter& rFormatter, const OUString& rStr, LanguageType eLang )
 484 {
 485     ScInputStringType aRet;
 486     aRet.mnFormatType = 0;
 487     aRet.meType = ScInputStringType::Unknown;
 488     aRet.maText = rStr;
 489     aRet.mfValue = 0.0;
 490
 491     if (rStr.getLength() > 1 && rStr[0] == '=')
 492     {
 493         aRet.meType = ScInputStringType::Formula;
 494     }
 495     else if (rStr.getLength() > 1 && rStr[0] == '\'')
 496     {
 497         //  for bEnglish, "'" at the beginning is always interpreted as text
 498         //  marker and stripped
 499         aRet.maText = rStr.copy(1);
 500         aRet.meType = ScInputStringType::Text;
 501     }
 502     else        // (nur) auf englisches Zahlformat testen
 503     {
 504         sal_uInt32 nNumFormat = rFormatter.GetStandardIndex(eLang);
 505
 506         if (rFormatter.IsNumberFormat(rStr, nNumFormat, aRet.mfValue))
 507         {
 508             aRet.meType = ScInputStringType::Number;
 509             aRet.mnFormatType = rFormatter.GetType(nNumFormat);
 510         }
 511         else if (!rStr.isEmpty())
 512             aRet.meType = ScInputStringType::Text;
 513
 514         // the (English) number format is not set
 515         //TODO: find and replace with matching local format???
 516     }
 517
 518     return aRet;
 519 }
 520
 521 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */