pyuno: call target only with internal python
[LibreOffice.git] / sc / source / core / tool / stringutil.cxx
blob384a4ca5fd65bbbe48773f41e55c3c39236fcc5f
1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2 /*
3 * This file is part of the LibreOffice project.
5 * This Source Code Form is subject to the terms of the Mozilla Public
6 * License, v. 2.0. If a copy of the MPL was not distributed with this
7 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
9 * This file incorporates work covered by the following license notice:
11 * Licensed to the Apache Software Foundation (ASF) under one or more
12 * contributor license agreements. See the NOTICE file distributed
13 * with this work for additional information regarding copyright
14 * ownership. The ASF licenses this file to you under the Apache
15 * License, Version 2.0 (the "License"); you may not use this file
16 * except in compliance with the License. You may obtain a copy of
17 * the License at http://www.apache.org/licenses/LICENSE-2.0 .
20 #include <interpretercontext.hxx>
21 #include <stringutil.hxx>
22 #include <svl/numformat.hxx>
23 #include <svl/zforlist.hxx>
25 #include <rtl/ustrbuf.hxx>
26 #include <rtl/strbuf.hxx>
27 #include <rtl/math.hxx>
29 ScSetStringParam::ScSetStringParam() :
30 mpNumFormatter(nullptr),
31 mbDetectNumberFormat(true),
32 mbDetectScientificNumberFormat(true),
33 meSetTextNumFormat(Never),
34 mbHandleApostrophe(true),
35 meStartListening(sc::SingleCellListening),
36 mbCheckLinkFormula(false)
40 void ScSetStringParam::setTextInput()
42 mbDetectNumberFormat = false;
43 mbDetectScientificNumberFormat = false;
44 mbHandleApostrophe = false;
45 meSetTextNumFormat = Always;
48 void ScSetStringParam::setNumericInput()
50 mbDetectNumberFormat = true;
51 mbDetectScientificNumberFormat = true;
52 mbHandleApostrophe = true;
53 meSetTextNumFormat = Never;
56 bool ScStringUtil::parseSimpleNumber(
57 const OUString& rStr, sal_Unicode dsep, sal_Unicode gsep, sal_Unicode dsepa, double& rVal, bool bDetectScientificNumber)
59 // Actually almost the entire pre-check is unnecessary and we could call
60 // rtl::math::stringToDouble() just after having exchanged ascii space with
61 // non-breaking space, if it wasn't for check of grouped digits. The NaN
62 // and Inf cases that are accepted by stringToDouble() could be detected
63 // using std::isfinite() on the result.
65 /* TODO: The grouped digits check isn't even valid for locales that do not
66 * group in thousands ... e.g. Indian locales. But that's something also
67 * the number scanner doesn't implement yet, only the formatter. */
69 OUStringBuffer aBuf;
71 sal_Int32 i = 0;
72 sal_Int32 n = rStr.getLength();
73 const sal_Unicode* p = rStr.getStr();
74 const sal_Unicode* pLast = p + (n-1);
75 sal_Int32 nPosDSep = -1, nPosGSep = -1;
76 sal_uInt32 nDigitCount = 0;
77 bool haveSeenDigit = false;
78 sal_Int32 nPosExponent = -1;
80 // Skip preceding spaces.
81 for (i = 0; i < n; ++i, ++p)
83 sal_Unicode c = *p;
84 if (c != 0x0020 && c != 0x00A0)
85 // first non-space character. Exit.
86 break;
89 if (i == n)
90 // the whole string is space. Fail.
91 return false;
93 n -= i; // Subtract the length of the preceding spaces.
95 // Determine the last non-space character.
96 for (; p != pLast; --pLast, --n)
98 sal_Unicode c = *pLast;
99 if (c != 0x0020 && c != 0x00A0)
100 // Non space character. Exit.
101 break;
104 for (i = 0; i < n; ++i, ++p)
106 sal_Unicode c = *p;
107 if (c == 0x0020 && gsep == 0x00A0)
108 // ascii space to unicode space if that is group separator
109 c = 0x00A0;
111 if ('0' <= c && c <= '9')
113 // this is a digit.
114 aBuf.append(c);
115 haveSeenDigit = true;
116 ++nDigitCount;
118 else if (c == dsep || (dsepa && c == dsepa))
120 // this is a decimal separator.
122 if (nPosDSep >= 0)
123 // a second decimal separator -> not a valid number.
124 return false;
126 if (nPosGSep >= 0 && i - nPosGSep != 4)
127 // the number has a group separator and the decimal sep is not
128 // positioned correctly.
129 return false;
131 nPosDSep = i;
132 nPosGSep = -1;
133 aBuf.append(dsep); // append the separator that is parsed in stringToDouble() below
134 nDigitCount = 0;
136 else if (c == gsep)
138 // this is a group (thousand) separator.
140 if (!haveSeenDigit)
141 // not allowed before digits.
142 return false;
144 if (nPosDSep >= 0)
145 // not allowed after the decimal separator.
146 return false;
148 if (nPosGSep >= 0 && nDigitCount != 3)
149 // must be exactly 3 digits since the last group separator.
150 return false;
152 if (nPosExponent >= 0)
153 // not allowed in exponent.
154 return false;
156 nPosGSep = i;
157 nDigitCount = 0;
159 else if (c == '-' || c == '+')
161 // A sign must be the first character if it's given, or immediately
162 // follow the exponent character if present.
163 if (i == 0 || (nPosExponent >= 0 && i == nPosExponent + 1))
164 aBuf.append(c);
165 else
166 return false;
168 else if (c == 'E' || c == 'e')
170 // this is an exponent designator.
172 if (nPosExponent >= 0 || !bDetectScientificNumber)
173 // Only one exponent allowed.
174 return false;
176 if (nPosGSep >= 0 && nDigitCount != 3)
177 // must be exactly 3 digits since the last group separator.
178 return false;
180 aBuf.append(c);
181 nPosExponent = i;
182 nPosDSep = -1;
183 nPosGSep = -1;
184 nDigitCount = 0;
186 else
187 return false;
190 // finished parsing the number.
192 if (nPosGSep >= 0 && nDigitCount != 3)
193 // must be exactly 3 digits since the last group separator.
194 return false;
196 rtl_math_ConversionStatus eStatus = rtl_math_ConversionStatus_Ok;
197 sal_Int32 nParseEnd = 0;
198 rVal = ::rtl::math::stringToDouble( aBuf, dsep, gsep, &eStatus, &nParseEnd);
199 if (eStatus != rtl_math_ConversionStatus_Ok || nParseEnd < aBuf.getLength())
200 // Not a valid number or not entire string consumed.
201 return false;
203 return true;
206 bool ScStringUtil::parseSimpleNumber(
207 const char* p, size_t n, char dsep, char gsep, double& rVal)
209 // Actually almost the entire pre-check is unnecessary and we could call
210 // rtl::math::stringToDouble() just after having exchanged ascii space with
211 // non-breaking space, if it wasn't for check of grouped digits. The NaN
212 // and Inf cases that are accepted by stringToDouble() could be detected
213 // using std::isfinite() on the result.
215 /* TODO: The grouped digits check isn't even valid for locales that do not
216 * group in thousands ... e.g. Indian locales. But that's something also
217 * the number scanner doesn't implement yet, only the formatter. */
219 OStringBuffer aBuf;
221 size_t i = 0;
222 const char* pLast = p + (n-1);
223 sal_Int32 nPosDSep = -1, nPosGSep = -1;
224 sal_uInt32 nDigitCount = 0;
225 bool haveSeenDigit = false;
226 sal_Int32 nPosExponent = -1;
228 // Skip preceding spaces.
229 for (i = 0; i < n; ++i, ++p)
231 char c = *p;
232 if (c != ' ')
233 // first non-space character. Exit.
234 break;
237 if (i == n)
238 // the whole string is space. Fail.
239 return false;
241 n -= i; // Subtract the length of the preceding spaces.
243 // Determine the last non-space character.
244 for (; p != pLast; --pLast, --n)
246 char c = *pLast;
247 if (c != ' ')
248 // Non space character. Exit.
249 break;
252 for (i = 0; i < n; ++i, ++p)
254 char c = *p;
256 if ('0' <= c && c <= '9')
258 // this is a digit.
259 aBuf.append(c);
260 haveSeenDigit = true;
261 ++nDigitCount;
263 else if (c == dsep)
265 // this is a decimal separator.
267 if (nPosDSep >= 0)
268 // a second decimal separator -> not a valid number.
269 return false;
271 if (nPosGSep >= 0 && i - nPosGSep != 4)
272 // the number has a group separator and the decimal sep is not
273 // positioned correctly.
274 return false;
276 nPosDSep = i;
277 nPosGSep = -1;
278 aBuf.append(c);
279 nDigitCount = 0;
281 else if (c == gsep)
283 // this is a group (thousand) separator.
285 if (!haveSeenDigit)
286 // not allowed before digits.
287 return false;
289 if (nPosDSep >= 0)
290 // not allowed after the decimal separator.
291 return false;
293 if (nPosGSep >= 0 && nDigitCount != 3)
294 // must be exactly 3 digits since the last group separator.
295 return false;
297 if (nPosExponent >= 0)
298 // not allowed in exponent.
299 return false;
301 nPosGSep = i;
302 nDigitCount = 0;
304 else if (c == '-' || c == '+')
306 // A sign must be the first character if it's given, or immediately
307 // follow the exponent character if present.
308 if (i == 0 || (nPosExponent >= 0 && i == static_cast<size_t>(nPosExponent+1)))
309 aBuf.append(c);
310 else
311 return false;
313 else if (c == 'E' || c == 'e')
315 // this is an exponent designator.
317 if (nPosExponent >= 0)
318 // Only one exponent allowed.
319 return false;
321 if (nPosGSep >= 0 && nDigitCount != 3)
322 // must be exactly 3 digits since the last group separator.
323 return false;
325 aBuf.append(c);
326 nPosExponent = i;
327 nPosDSep = -1;
328 nPosGSep = -1;
329 nDigitCount = 0;
331 else
332 return false;
335 // finished parsing the number.
337 if (nPosGSep >= 0 && nDigitCount != 3)
338 // must be exactly 3 digits since the last group separator.
339 return false;
341 rtl_math_ConversionStatus eStatus = rtl_math_ConversionStatus_Ok;
342 sal_Int32 nParseEnd = 0;
343 rVal = ::rtl::math::stringToDouble( aBuf, dsep, gsep, &eStatus, &nParseEnd);
344 if (eStatus != rtl_math_ConversionStatus_Ok || nParseEnd < aBuf.getLength())
345 // Not a valid number or not entire string consumed.
346 return false;
348 return true;
351 OUString ScStringUtil::GetQuotedToken(const OUString &rIn, sal_Int32 nToken, const OUString& rQuotedPairs,
352 sal_Unicode cTok, sal_Int32& rIndex )
354 assert( !(rQuotedPairs.getLength()%2) );
355 assert( rQuotedPairs.indexOf(cTok) == -1 );
357 const sal_Unicode* pStr = rIn.getStr();
358 const sal_Unicode* pQuotedStr = rQuotedPairs.getStr();
359 sal_Unicode cQuotedEndChar = 0;
360 sal_Int32 nQuotedLen = rQuotedPairs.getLength();
361 sal_Int32 nLen = rIn.getLength();
362 sal_Int32 nTok = 0;
363 sal_Int32 nFirstChar = rIndex;
364 sal_Int32 i = nFirstChar;
366 // detect token position and length
367 pStr += i;
368 while ( i < nLen )
370 sal_Unicode c = *pStr;
371 if ( cQuotedEndChar )
373 // end of the quote reached ?
374 if ( c == cQuotedEndChar )
375 cQuotedEndChar = 0;
377 else
379 // Is the char a quote-begin char ?
380 sal_Int32 nQuoteIndex = 0;
381 while ( nQuoteIndex < nQuotedLen )
383 if ( pQuotedStr[nQuoteIndex] == c )
385 cQuotedEndChar = pQuotedStr[nQuoteIndex+1];
386 break;
388 else
389 nQuoteIndex += 2;
392 // If the token-char matches then increase TokCount
393 if ( c == cTok )
395 ++nTok;
397 if ( nTok == nToken )
398 nFirstChar = i+1;
399 else
401 if ( nTok > nToken )
402 break;
407 ++pStr;
408 ++i;
411 if ( nTok >= nToken )
413 if ( i < nLen )
414 rIndex = i+1;
415 else
416 rIndex = -1;
417 return rIn.copy( nFirstChar, i-nFirstChar );
419 else
421 rIndex = -1;
422 return OUString();
426 bool ScStringUtil::isMultiline( std::u16string_view rStr )
428 return rStr.find_first_of(u"\n\r") != std::u16string_view::npos;
431 ScInputStringType ScStringUtil::parseInputString(
432 ScInterpreterContext& rContext, const OUString& rStr, LanguageType eLang )
434 ScInputStringType aRet;
435 aRet.mnFormatType = SvNumFormatType::ALL;
436 aRet.meType = ScInputStringType::Unknown;
437 aRet.maText = rStr;
438 aRet.mfValue = 0.0;
440 if (rStr.getLength() > 1 && rStr[0] == '=')
442 aRet.meType = ScInputStringType::Formula;
444 else if (rStr.getLength() > 1 && rStr[0] == '\'')
446 // for bEnglish, "'" at the beginning is always interpreted as text
447 // marker and stripped
448 aRet.maText = rStr.copy(1);
449 aRet.meType = ScInputStringType::Text;
451 else // test for English number format (only)
453 sal_uInt32 nNumFormat = rContext.NFGetStandardIndex(eLang);
455 if (rContext.NFIsNumberFormat(rStr, nNumFormat, aRet.mfValue))
457 aRet.meType = ScInputStringType::Number;
458 aRet.mnFormatType = rContext.NFGetType(nNumFormat);
460 else if (!rStr.isEmpty())
461 aRet.meType = ScInputStringType::Text;
463 // the (English) number format is not set
464 //TODO: find and replace with matching local format???
467 return aRet;
470 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */