Stop leaking all ScPostIt instances.
[LibreOffice.git] / sc / source / core / tool / stringutil.cxx
blob5bdc2c2fe7521aa7731c212be56426b1b275806d
1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2 /*
3 * This file is part of the LibreOffice project.
5 * This Source Code Form is subject to the terms of the Mozilla Public
6 * License, v. 2.0. If a copy of the MPL was not distributed with this
7 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
9 * This file incorporates work covered by the following license notice:
11 * Licensed to the Apache Software Foundation (ASF) under one or more
12 * contributor license agreements. See the NOTICE file distributed
13 * with this work for additional information regarding copyright
14 * ownership. The ASF licenses this file to you under the Apache
15 * License, Version 2.0 (the "License"); you may not use this file
16 * except in compliance with the License. You may obtain a copy of
17 * the License at http://www.apache.org/licenses/LICENSE-2.0 .
20 #include "stringutil.hxx"
21 #include "global.hxx"
22 #include "svl/zforlist.hxx"
24 #include <rtl/ustrbuf.hxx>
25 #include <rtl/strbuf.hxx>
26 #include <rtl/math.hxx>
28 ScSetStringParam::ScSetStringParam() :
29 mpNumFormatter(NULL),
30 mbDetectNumberFormat(true),
31 meSetTextNumFormat(Never),
32 mbHandleApostrophe(true)
36 void ScSetStringParam::setTextInput()
38 mbDetectNumberFormat = false;
39 mbHandleApostrophe = false;
40 meSetTextNumFormat = Always;
43 void ScSetStringParam::setNumericInput()
45 mbDetectNumberFormat = true;
46 mbHandleApostrophe = true;
47 meSetTextNumFormat = Never;
50 bool ScStringUtil::parseSimpleNumber(
51 const OUString& rStr, sal_Unicode dsep, sal_Unicode gsep, double& rVal)
53 // Actually almost the entire pre-check is unnecessary and we could call
54 // rtl::math::stringToDouble() just after having exchanged ascii space with
55 // non-breaking space, if it wasn't for check of grouped digits. The NaN
56 // and Inf cases that are accepted by stringToDouble() could be detected
57 // using rtl::math::isFinite() on the result.
59 /* TODO: The grouped digits check isn't even valid for locales that do not
60 * group in thousands ... e.g. Indian locales. But that's something also
61 * the number scanner doesn't implement yet, only the formatter. */
63 OUStringBuffer aBuf;
65 sal_Int32 i = 0;
66 sal_Int32 n = rStr.getLength();
67 const sal_Unicode* p = rStr.getStr();
68 const sal_Unicode* pLast = p + (n-1);
69 sal_Int32 nPosDSep = -1, nPosGSep = -1;
70 sal_uInt32 nDigitCount = 0;
71 sal_Int32 nPosExponent = -1;
73 // Skip preceding spaces.
74 for (i = 0; i < n; ++i, ++p)
76 sal_Unicode c = *p;
77 if (c != 0x0020 && c != 0x00A0)
78 // first non-space character. Exit.
79 break;
82 if (i == n)
83 // the whole string is space. Fail.
84 return false;
86 n -= i; // Subtract the length of the preceding spaces.
88 // Determine the last non-space character.
89 for (; p != pLast; --pLast, --n)
91 sal_Unicode c = *pLast;
92 if (c != 0x0020 && c != 0x00A0)
93 // Non space character. Exit.
94 break;
97 for (i = 0; i < n; ++i, ++p)
99 sal_Unicode c = *p;
100 if (c == 0x0020 && gsep == 0x00A0)
101 // ascii space to unicode space if that is group separator
102 c = 0x00A0;
104 if ('0' <= c && c <= '9')
106 // this is a digit.
107 aBuf.append(c);
108 ++nDigitCount;
110 else if (c == dsep)
112 // this is a decimal separator.
114 if (nPosDSep >= 0)
115 // a second decimal separator -> not a valid number.
116 return false;
118 if (nPosGSep >= 0 && i - nPosGSep != 4)
119 // the number has a group separator and the decimal sep is not
120 // positioned correctly.
121 return false;
123 nPosDSep = i;
124 nPosGSep = -1;
125 aBuf.append(c);
126 nDigitCount = 0;
128 else if (c == gsep)
130 // this is a group (thousand) separator.
132 if (i == 0)
133 // not allowed as the first character.
134 return false;
136 if (nPosDSep >= 0)
137 // not allowed after the decimal separator.
138 return false;
140 if (nPosGSep >= 0 && nDigitCount != 3)
141 // must be exactly 3 digits since the last group separator.
142 return false;
144 if (nPosExponent >= 0)
145 // not allowed in exponent.
146 return false;
148 nPosGSep = i;
149 nDigitCount = 0;
151 else if (c == '-' || c == '+')
153 // A sign must be the first character if it's given, or immediately
154 // follow the exponent character if present.
155 if (i == 0 || (nPosExponent >= 0 && i == nPosExponent + 1))
156 aBuf.append(c);
157 else
158 return false;
160 else if (c == 'E' || c == 'e')
162 // this is an exponent designator.
164 if (nPosExponent >= 0)
165 // Only one exponent allowed.
166 return false;
168 if (nPosGSep >= 0 && nDigitCount != 3)
169 // must be exactly 3 digits since the last group separator.
170 return false;
172 aBuf.append(c);
173 nPosExponent = i;
174 nPosDSep = -1;
175 nPosGSep = -1;
176 nDigitCount = 0;
178 else
179 return false;
182 // finished parsing the number.
184 if (nPosGSep >= 0 && nDigitCount != 3)
185 // must be exactly 3 digits since the last group separator.
186 return false;
188 rtl_math_ConversionStatus eStatus = rtl_math_ConversionStatus_Ok;
189 sal_Int32 nParseEnd = 0;
190 OUString aString( aBuf.makeStringAndClear());
191 rVal = ::rtl::math::stringToDouble( aString, dsep, gsep, &eStatus, &nParseEnd);
192 if (eStatus != rtl_math_ConversionStatus_Ok || nParseEnd < aString.getLength())
193 // Not a valid number or not entire string consumed.
194 return false;
196 return true;
199 bool ScStringUtil::parseSimpleNumber(
200 const char* p, size_t n, char dsep, char gsep, double& rVal)
202 // Actually almost the entire pre-check is unnecessary and we could call
203 // rtl::math::stringToDouble() just after having exchanged ascii space with
204 // non-breaking space, if it wasn't for check of grouped digits. The NaN
205 // and Inf cases that are accepted by stringToDouble() could be detected
206 // using rtl::math::isFinite() on the result.
208 /* TODO: The grouped digits check isn't even valid for locales that do not
209 * group in thousands ... e.g. Indian locales. But that's something also
210 * the number scanner doesn't implement yet, only the formatter. */
212 OStringBuffer aBuf;
214 size_t i = 0;
215 const char* pLast = p + (n-1);
216 sal_Int32 nPosDSep = -1, nPosGSep = -1;
217 sal_uInt32 nDigitCount = 0;
218 sal_Int32 nPosExponent = -1;
220 // Skip preceding spaces.
221 for (i = 0; i < n; ++i, ++p)
223 char c = *p;
224 if (c != ' ')
225 // first non-space character. Exit.
226 break;
229 if (i == n)
230 // the whole string is space. Fail.
231 return false;
233 n -= i; // Subtract the length of the preceding spaces.
235 // Determine the last non-space character.
236 for (; p != pLast; --pLast, --n)
238 char c = *pLast;
239 if (c != ' ')
240 // Non space character. Exit.
241 break;
244 for (i = 0; i < n; ++i, ++p)
246 char c = *p;
248 if ('0' <= c && c <= '9')
250 // this is a digit.
251 aBuf.append(c);
252 ++nDigitCount;
254 else if (c == dsep)
256 // this is a decimal separator.
258 if (nPosDSep >= 0)
259 // a second decimal separator -> not a valid number.
260 return false;
262 if (nPosGSep >= 0 && i - nPosGSep != 4)
263 // the number has a group separator and the decimal sep is not
264 // positioned correctly.
265 return false;
267 nPosDSep = i;
268 nPosGSep = -1;
269 aBuf.append(c);
270 nDigitCount = 0;
272 else if (c == gsep)
274 // this is a group (thousand) separator.
276 if (i == 0)
277 // not allowed as the first character.
278 return false;
280 if (nPosDSep >= 0)
281 // not allowed after the decimal separator.
282 return false;
284 if (nPosGSep >= 0 && nDigitCount != 3)
285 // must be exactly 3 digits since the last group separator.
286 return false;
288 if (nPosExponent >= 0)
289 // not allowed in exponent.
290 return false;
292 nPosGSep = i;
293 nDigitCount = 0;
295 else if (c == '-' || c == '+')
297 // A sign must be the first character if it's given, or immediately
298 // follow the exponent character if present.
299 if (i == 0 || (nPosExponent >= 0 && i == static_cast<size_t>(nPosExponent+1)))
300 aBuf.append(c);
301 else
302 return false;
304 else if (c == 'E' || c == 'e')
306 // this is an exponent designator.
308 if (nPosExponent >= 0)
309 // Only one exponent allowed.
310 return false;
312 if (nPosGSep >= 0 && nDigitCount != 3)
313 // must be exactly 3 digits since the last group separator.
314 return false;
316 aBuf.append(c);
317 nPosExponent = i;
318 nPosDSep = -1;
319 nPosGSep = -1;
320 nDigitCount = 0;
322 else
323 return false;
326 // finished parsing the number.
328 if (nPosGSep >= 0 && nDigitCount != 3)
329 // must be exactly 3 digits since the last group separator.
330 return false;
332 rtl_math_ConversionStatus eStatus = rtl_math_ConversionStatus_Ok;
333 sal_Int32 nParseEnd = 0;
334 OString aString( aBuf.makeStringAndClear());
335 rVal = ::rtl::math::stringToDouble( aString, dsep, gsep, &eStatus, &nParseEnd);
336 if (eStatus != rtl_math_ConversionStatus_Ok || nParseEnd < aString.getLength())
337 // Not a valid number or not entire string consumed.
338 return false;
340 return true;
343 sal_Int32 ScStringUtil::GetQuotedTokenCount(const OUString &rIn, const OUString& rQuotedPairs, sal_Unicode cTok )
345 assert( !(rQuotedPairs.getLength()%2) );
346 assert( rQuotedPairs.indexOf(cTok) );
348 // empty string: TokenCount is 0 per definition
349 if ( rIn.isEmpty() )
350 return 0;
352 sal_Int32 nTokCount = 1;
353 sal_Int32 nLen = rIn.getLength();
354 sal_Int32 nQuotedLen = rQuotedPairs.getLength();
355 sal_Unicode cQuotedEndChar = 0;
356 const sal_Unicode* pQuotedStr = rQuotedPairs.getStr();
357 const sal_Unicode* pStr = rIn.getStr();
358 sal_Int32 nIndex = 0;
359 while ( nIndex < nLen )
361 sal_Unicode c = *pStr;
362 if ( cQuotedEndChar )
364 // reached end of the quote ?
365 if ( c == cQuotedEndChar )
366 cQuotedEndChar = 0;
368 else
370 // Is the char a quote-beginn char ?
371 sal_Int32 nQuoteIndex = 0;
372 while ( nQuoteIndex < nQuotedLen )
374 if ( pQuotedStr[nQuoteIndex] == c )
376 cQuotedEndChar = pQuotedStr[nQuoteIndex+1];
377 break;
379 else
380 nQuoteIndex += 2;
383 // If the token-char matches then increase TokCount
384 if ( c == cTok )
385 ++nTokCount;
388 ++pStr,
389 ++nIndex;
392 return nTokCount;
395 OUString ScStringUtil::GetQuotedToken(const OUString &rIn, sal_Int32 nToken, const OUString& rQuotedPairs,
396 sal_Unicode cTok, sal_Int32& rIndex )
398 assert( !(rQuotedPairs.getLength()%2) );
399 assert( rQuotedPairs.indexOf(cTok) == -1 );
401 const sal_Unicode* pStr = rIn.getStr();
402 const sal_Unicode* pQuotedStr = rQuotedPairs.getStr();
403 sal_Unicode cQuotedEndChar = 0;
404 sal_Int32 nQuotedLen = rQuotedPairs.getLength();
405 sal_Int32 nLen = rIn.getLength();
406 sal_Int32 nTok = 0;
407 sal_Int32 nFirstChar = rIndex;
408 sal_Int32 i = nFirstChar;
410 // detect token position and length
411 pStr += i;
412 while ( i < nLen )
414 sal_Unicode c = *pStr;
415 if ( cQuotedEndChar )
417 // end of the quote reached ?
418 if ( c == cQuotedEndChar )
419 cQuotedEndChar = 0;
421 else
423 // Is the char a quote-begin char ?
424 sal_Int32 nQuoteIndex = 0;
425 while ( nQuoteIndex < nQuotedLen )
427 if ( pQuotedStr[nQuoteIndex] == c )
429 cQuotedEndChar = pQuotedStr[nQuoteIndex+1];
430 break;
432 else
433 nQuoteIndex += 2;
436 // If the token-char matches then increase TokCount
437 if ( c == cTok )
439 ++nTok;
441 if ( nTok == nToken )
442 nFirstChar = i+1;
443 else
445 if ( nTok > nToken )
446 break;
451 ++pStr,
452 ++i;
455 if ( nTok >= nToken )
457 if ( i < nLen )
458 rIndex = i+1;
459 else
460 rIndex = -1;
461 return rIn.copy( nFirstChar, i-nFirstChar );
463 else
465 rIndex = -1;
466 return OUString();
470 bool ScStringUtil::isMultiline( const OUString& rStr )
472 if (rStr.indexOf('\n') != -1)
473 return true;
475 if (rStr.indexOf(CHAR_CR) != -1)
476 return true;
478 return false;
481 ScInputStringType ScStringUtil::parseInputString(
482 SvNumberFormatter& rFormatter, const OUString& rStr, LanguageType eLang )
484 ScInputStringType aRet;
485 aRet.mnFormatType = 0;
486 aRet.meType = ScInputStringType::Unknown;
487 aRet.maText = rStr;
488 aRet.mfValue = 0.0;
490 if (rStr.getLength() > 1 && rStr[0] == '=')
492 aRet.meType = ScInputStringType::Formula;
494 else if (rStr.getLength() > 1 && rStr[0] == '\'')
496 // for bEnglish, "'" at the beginning is always interpreted as text
497 // marker and stripped
498 aRet.maText = rStr.copy(1);
499 aRet.meType = ScInputStringType::Text;
501 else // (nur) auf englisches Zahlformat testen
503 sal_uInt32 nNumFormat = rFormatter.GetStandardIndex(eLang);
505 if (rFormatter.IsNumberFormat(rStr, nNumFormat, aRet.mfValue))
507 aRet.meType = ScInputStringType::Number;
508 aRet.mnFormatType = rFormatter.GetType(nNumFormat);
510 else if (!rStr.isEmpty())
511 aRet.meType = ScInputStringType::Text;
513 // das (englische) Zahlformat wird nicht gesetzt
514 //! passendes lokales Format suchen und setzen???
517 return aRet;
520 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */