sc/source/core/tool/stringutil.cxx

   1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
   2 /*
   3  * This file is part of the LibreOffice project.
   4  *
   5  * This Source Code Form is subject to the terms of the Mozilla Public
   6  * License, v. 2.0. If a copy of the MPL was not distributed with this
   7  * file, You can obtain one at http://mozilla.org/MPL/2.0/.
   8  *
   9  * This file incorporates work covered by the following license notice:
  10  *
  11  *   Licensed to the Apache Software Foundation (ASF) under one or more
  12  *   contributor license agreements. See the NOTICE file distributed
  13  *   with this work for additional information regarding copyright
  14  *   ownership. The ASF licenses this file to you under the Apache
  15  *   License, Version 2.0 (the "License"); you may not use this file
  16  *   except in compliance with the License. You may obtain a copy of
  17  *   the License at http://www.apache.org/licenses/LICENSE-2.0 .
  18  */
  19
  20 #include <interpretercontext.hxx>
  21 #include <stringutil.hxx>
  22 #include <svl/numformat.hxx>
  23 #include <svl/zforlist.hxx>
  24
  25 #include <rtl/ustrbuf.hxx>
  26 #include <rtl/strbuf.hxx>
  27 #include <rtl/math.hxx>
  28
  29 ScSetStringParam::ScSetStringParam() :
  30     mpNumFormatter(nullptr),
  31     mbDetectNumberFormat(true),
  32     mbDetectScientificNumberFormat(true),
  33     meSetTextNumFormat(Never),
  34     mbHandleApostrophe(true),
  35     meStartListening(sc::SingleCellListening),
  36     mbCheckLinkFormula(false)
  37 {
  38 }
  39
  40 void ScSetStringParam::setTextInput()
  41 {
  42     mbDetectNumberFormat = false;
  43     mbDetectScientificNumberFormat = false;
  44     mbHandleApostrophe = false;
  45     meSetTextNumFormat = Always;
  46 }
  47
  48 void ScSetStringParam::setNumericInput()
  49 {
  50     mbDetectNumberFormat = true;
  51     mbDetectScientificNumberFormat = true;
  52     mbHandleApostrophe = true;
  53     meSetTextNumFormat = Never;
  54 }
  55
  56 bool ScStringUtil::parseSimpleNumber(
  57     const OUString& rStr, sal_Unicode dsep, sal_Unicode gsep, sal_Unicode dsepa, double& rVal, bool bDetectScientificNumber)
  58 {
  59     // Actually almost the entire pre-check is unnecessary and we could call
  60     // rtl::math::stringToDouble() just after having exchanged ascii space with
  61     // non-breaking space, if it wasn't for check of grouped digits. The NaN
  62     // and Inf cases that are accepted by stringToDouble() could be detected
  63     // using std::isfinite() on the result.
  64
  65     /* TODO: The grouped digits check isn't even valid for locales that do not
  66      * group in thousands ... e.g. Indian locales. But that's something also
  67      * the number scanner doesn't implement yet, only the formatter. */
  68
  69     OUStringBuffer aBuf;
  70
  71     sal_Int32 i = 0;
  72     sal_Int32 n = rStr.getLength();
  73     const sal_Unicode* p = rStr.getStr();
  74     const sal_Unicode* pLast = p + (n-1);
  75     sal_Int32 nPosDSep = -1, nPosGSep = -1;
  76     sal_uInt32 nDigitCount = 0;
  77     bool haveSeenDigit = false;
  78     sal_Int32 nPosExponent = -1;
  79
  80     // Skip preceding spaces.
  81     for (i = 0; i < n; ++i, ++p)
  82     {
  83         sal_Unicode c = *p;
  84         if (c != 0x0020 && c != 0x00A0)
  85             // first non-space character.  Exit.
  86             break;
  87     }
  88
  89     if (i == n)
  90         // the whole string is space.  Fail.
  91         return false;
  92
  93     n -= i; // Subtract the length of the preceding spaces.
  94
  95     // Determine the last non-space character.
  96     for (; p != pLast; --pLast, --n)
  97     {
  98         sal_Unicode c = *pLast;
  99         if (c != 0x0020 && c != 0x00A0)
 100             // Non space character. Exit.
 101             break;
 102     }
 103
 104     for (i = 0; i < n; ++i, ++p)
 105     {
 106         sal_Unicode c = *p;
 107         if (c == 0x0020 && gsep == 0x00A0)
 108             // ascii space to unicode space if that is group separator
 109             c = 0x00A0;
 110
 111         if ('0' <= c && c <= '9')
 112         {
 113             // this is a digit.
 114             aBuf.append(c);
 115             haveSeenDigit = true;
 116             ++nDigitCount;
 117         }
 118         else if (c == dsep || (dsepa && c == dsepa))
 119         {
 120             // this is a decimal separator.
 121
 122             if (nPosDSep >= 0)
 123                 // a second decimal separator -> not a valid number.
 124                 return false;
 125
 126             if (nPosGSep >= 0 && i - nPosGSep != 4)
 127                 // the number has a group separator and the decimal sep is not
 128                 // positioned correctly.
 129                 return false;
 130
 131             nPosDSep = i;
 132             nPosGSep = -1;
 133             aBuf.append(dsep);  // append the separator that is parsed in stringToDouble() below
 134             nDigitCount = 0;
 135         }
 136         else if (c == gsep)
 137         {
 138             // this is a group (thousand) separator.
 139
 140             if (!haveSeenDigit)
 141                 // not allowed before digits.
 142                 return false;
 143
 144             if (nPosDSep >= 0)
 145                 // not allowed after the decimal separator.
 146                 return false;
 147
 148             if (nPosGSep >= 0 && nDigitCount != 3)
 149                 // must be exactly 3 digits since the last group separator.
 150                 return false;
 151
 152             if (nPosExponent >= 0)
 153                 // not allowed in exponent.
 154                 return false;
 155
 156             nPosGSep = i;
 157             nDigitCount = 0;
 158         }
 159         else if (c == '-' || c == '+')
 160         {
 161             // A sign must be the first character if it's given, or immediately
 162             // follow the exponent character if present.
 163             if (i == 0 || (nPosExponent >= 0 && i == nPosExponent + 1))
 164                 aBuf.append(c);
 165             else
 166                 return false;
 167         }
 168         else if (c == 'E' || c == 'e')
 169         {
 170             // this is an exponent designator.
 171
 172             if (nPosExponent >= 0 || !bDetectScientificNumber)
 173                 // Only one exponent allowed.
 174                 return false;
 175
 176             if (nPosGSep >= 0 && nDigitCount != 3)
 177                 // must be exactly 3 digits since the last group separator.
 178                 return false;
 179
 180             aBuf.append(c);
 181             nPosExponent = i;
 182             nPosDSep = -1;
 183             nPosGSep = -1;
 184             nDigitCount = 0;
 185         }
 186         else
 187             return false;
 188     }
 189
 190     // finished parsing the number.
 191
 192     if (nPosGSep >= 0 && nDigitCount != 3)
 193         // must be exactly 3 digits since the last group separator.
 194         return false;
 195
 196     rtl_math_ConversionStatus eStatus = rtl_math_ConversionStatus_Ok;
 197     sal_Int32 nParseEnd = 0;
 198     rVal = ::rtl::math::stringToDouble( aBuf, dsep, gsep, &eStatus, &nParseEnd);
 199     if (eStatus != rtl_math_ConversionStatus_Ok || nParseEnd < aBuf.getLength())
 200         // Not a valid number or not entire string consumed.
 201         return false;
 202
 203     return true;
 204 }
 205
 206 bool ScStringUtil::parseSimpleNumber(
 207     const char* p, size_t n, char dsep, char gsep, double& rVal)
 208 {
 209     // Actually almost the entire pre-check is unnecessary and we could call
 210     // rtl::math::stringToDouble() just after having exchanged ascii space with
 211     // non-breaking space, if it wasn't for check of grouped digits. The NaN
 212     // and Inf cases that are accepted by stringToDouble() could be detected
 213     // using std::isfinite() on the result.
 214
 215     /* TODO: The grouped digits check isn't even valid for locales that do not
 216      * group in thousands ... e.g. Indian locales. But that's something also
 217      * the number scanner doesn't implement yet, only the formatter. */
 218
 219     OStringBuffer aBuf;
 220
 221     size_t i = 0;
 222     const char* pLast = p + (n-1);
 223     sal_Int32 nPosDSep = -1, nPosGSep = -1;
 224     sal_uInt32 nDigitCount = 0;
 225     bool haveSeenDigit = false;
 226     sal_Int32 nPosExponent = -1;
 227
 228     // Skip preceding spaces.
 229     for (i = 0; i < n; ++i, ++p)
 230     {
 231         char c = *p;
 232         if (c != ' ')
 233             // first non-space character.  Exit.
 234             break;
 235     }
 236
 237     if (i == n)
 238         // the whole string is space.  Fail.
 239         return false;
 240
 241     n -= i; // Subtract the length of the preceding spaces.
 242
 243     // Determine the last non-space character.
 244     for (; p != pLast; --pLast, --n)
 245     {
 246         char c = *pLast;
 247         if (c != ' ')
 248             // Non space character. Exit.
 249             break;
 250     }
 251
 252     for (i = 0; i < n; ++i, ++p)
 253     {
 254         char c = *p;
 255
 256         if ('0' <= c && c <= '9')
 257         {
 258             // this is a digit.
 259             aBuf.append(c);
 260             haveSeenDigit = true;
 261             ++nDigitCount;
 262         }
 263         else if (c == dsep)
 264         {
 265             // this is a decimal separator.
 266
 267             if (nPosDSep >= 0)
 268                 // a second decimal separator -> not a valid number.
 269                 return false;
 270
 271             if (nPosGSep >= 0 && i - nPosGSep != 4)
 272                 // the number has a group separator and the decimal sep is not
 273                 // positioned correctly.
 274                 return false;
 275
 276             nPosDSep = i;
 277             nPosGSep = -1;
 278             aBuf.append(c);
 279             nDigitCount = 0;
 280         }
 281         else if (c == gsep)
 282         {
 283             // this is a group (thousand) separator.
 284
 285             if (!haveSeenDigit)
 286                 // not allowed before digits.
 287                 return false;
 288
 289             if (nPosDSep >= 0)
 290                 // not allowed after the decimal separator.
 291                 return false;
 292
 293             if (nPosGSep >= 0 && nDigitCount != 3)
 294                 // must be exactly 3 digits since the last group separator.
 295                 return false;
 296
 297             if (nPosExponent >= 0)
 298                 // not allowed in exponent.
 299                 return false;
 300
 301             nPosGSep = i;
 302             nDigitCount = 0;
 303         }
 304         else if (c == '-' || c == '+')
 305         {
 306             // A sign must be the first character if it's given, or immediately
 307             // follow the exponent character if present.
 308             if (i == 0 || (nPosExponent >= 0 && i == static_cast<size_t>(nPosExponent+1)))
 309                 aBuf.append(c);
 310             else
 311                 return false;
 312         }
 313         else if (c == 'E' || c == 'e')
 314         {
 315             // this is an exponent designator.
 316
 317             if (nPosExponent >= 0)
 318                 // Only one exponent allowed.
 319                 return false;
 320
 321             if (nPosGSep >= 0 && nDigitCount != 3)
 322                 // must be exactly 3 digits since the last group separator.
 323                 return false;
 324
 325             aBuf.append(c);
 326             nPosExponent = i;
 327             nPosDSep = -1;
 328             nPosGSep = -1;
 329             nDigitCount = 0;
 330         }
 331         else
 332             return false;
 333     }
 334
 335     // finished parsing the number.
 336
 337     if (nPosGSep >= 0 && nDigitCount != 3)
 338         // must be exactly 3 digits since the last group separator.
 339         return false;
 340
 341     rtl_math_ConversionStatus eStatus = rtl_math_ConversionStatus_Ok;
 342     sal_Int32 nParseEnd = 0;
 343     rVal = ::rtl::math::stringToDouble( aBuf, dsep, gsep, &eStatus, &nParseEnd);
 344     if (eStatus != rtl_math_ConversionStatus_Ok || nParseEnd < aBuf.getLength())
 345         // Not a valid number or not entire string consumed.
 346         return false;
 347
 348     return true;
 349 }
 350
 351 OUString ScStringUtil::GetQuotedToken(const OUString &rIn, sal_Int32 nToken, const OUString& rQuotedPairs,
 352                                sal_Unicode cTok, sal_Int32& rIndex )
 353 {
 354     assert( !(rQuotedPairs.getLength()%2) );
 355     assert( rQuotedPairs.indexOf(cTok) == -1 );
 356
 357     const sal_Unicode*  pStr            = rIn.getStr();
 358     const sal_Unicode*  pQuotedStr      = rQuotedPairs.getStr();
 359     sal_Unicode         cQuotedEndChar  = 0;
 360     sal_Int32      nQuotedLen      = rQuotedPairs.getLength();
 361     sal_Int32      nLen            = rIn.getLength();
 362     sal_Int32      nTok            = 0;
 363     sal_Int32      nFirstChar      = rIndex;
 364     sal_Int32      i               = nFirstChar;
 365
 366     // detect token position and length
 367     pStr += i;
 368     while ( i < nLen )
 369     {
 370         sal_Unicode c = *pStr;
 371         if ( cQuotedEndChar )
 372         {
 373             // end of the quote reached ?
 374             if ( c == cQuotedEndChar )
 375                 cQuotedEndChar = 0;
 376         }
 377         else
 378         {
 379             // Is the char a quote-begin char ?
 380             sal_Int32 nQuoteIndex = 0;
 381             while ( nQuoteIndex < nQuotedLen )
 382             {
 383                 if ( pQuotedStr[nQuoteIndex] == c )
 384                 {
 385                     cQuotedEndChar = pQuotedStr[nQuoteIndex+1];
 386                     break;
 387                 }
 388                 else
 389                     nQuoteIndex += 2;
 390             }
 391
 392             // If the token-char matches then increase TokCount
 393             if ( c == cTok )
 394             {
 395                 ++nTok;
 396
 397                 if ( nTok == nToken )
 398                     nFirstChar = i+1;
 399                 else
 400                 {
 401                     if ( nTok > nToken )
 402                         break;
 403                 }
 404             }
 405         }
 406
 407         ++pStr;
 408         ++i;
 409     }
 410
 411     if ( nTok >= nToken )
 412     {
 413         if ( i < nLen )
 414             rIndex = i+1;
 415         else
 416             rIndex = -1;
 417         return rIn.copy( nFirstChar, i-nFirstChar );
 418     }
 419     else
 420     {
 421         rIndex = -1;
 422         return OUString();
 423     }
 424 }
 425
 426 bool ScStringUtil::isMultiline( std::u16string_view rStr )
 427 {
 428     return rStr.find_first_of(u"\n\r") != std::u16string_view::npos;
 429 }
 430
 431 ScInputStringType ScStringUtil::parseInputString(
 432     ScInterpreterContext& rContext, const OUString& rStr, LanguageType eLang )
 433 {
 434     ScInputStringType aRet;
 435     aRet.mnFormatType = SvNumFormatType::ALL;
 436     aRet.meType = ScInputStringType::Unknown;
 437     aRet.maText = rStr;
 438     aRet.mfValue = 0.0;
 439
 440     if (rStr.getLength() > 1 && rStr[0] == '=')
 441     {
 442         aRet.meType = ScInputStringType::Formula;
 443     }
 444     else if (rStr.getLength() > 1 && rStr[0] == '\'')
 445     {
 446         //  for bEnglish, "'" at the beginning is always interpreted as text
 447         //  marker and stripped
 448         aRet.maText = rStr.copy(1);
 449         aRet.meType = ScInputStringType::Text;
 450     }
 451     else        // test for English number format (only)
 452     {
 453         sal_uInt32 nNumFormat = rContext.NFGetStandardIndex(eLang);
 454
 455         if (rContext.NFIsNumberFormat(rStr, nNumFormat, aRet.mfValue))
 456         {
 457             aRet.meType = ScInputStringType::Number;
 458             aRet.mnFormatType = rContext.NFGetType(nNumFormat);
 459         }
 460         else if (!rStr.isEmpty())
 461             aRet.meType = ScInputStringType::Text;
 462
 463         // the (English) number format is not set
 464         //TODO: find and replace with matching local format???
 465     }
 466
 467     return aRet;
 468 }
 469
 470 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */