update emoji autocorrect entries from po-files
[LibreOffice.git] / sc / source / core / tool / stringutil.cxx
blob4b454809d1c21a48a68986f508e7f186004aa38e
1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2 /*
3 * This file is part of the LibreOffice project.
5 * This Source Code Form is subject to the terms of the Mozilla Public
6 * License, v. 2.0. If a copy of the MPL was not distributed with this
7 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
9 * This file incorporates work covered by the following license notice:
11 * Licensed to the Apache Software Foundation (ASF) under one or more
12 * contributor license agreements. See the NOTICE file distributed
13 * with this work for additional information regarding copyright
14 * ownership. The ASF licenses this file to you under the Apache
15 * License, Version 2.0 (the "License"); you may not use this file
16 * except in compliance with the License. You may obtain a copy of
17 * the License at http://www.apache.org/licenses/LICENSE-2.0 .
20 #include "stringutil.hxx"
21 #include "global.hxx"
22 #include <svl/zforlist.hxx>
24 #include <rtl/ustrbuf.hxx>
25 #include <rtl/strbuf.hxx>
26 #include <rtl/math.hxx>
28 ScSetStringParam::ScSetStringParam() :
29 mpNumFormatter(NULL),
30 mbDetectNumberFormat(true),
31 meSetTextNumFormat(Never),
32 mbHandleApostrophe(true),
33 meStartListening(sc::SingleCellListening)
37 void ScSetStringParam::setTextInput()
39 mbDetectNumberFormat = false;
40 mbHandleApostrophe = false;
41 meSetTextNumFormat = Always;
44 void ScSetStringParam::setNumericInput()
46 mbDetectNumberFormat = true;
47 mbHandleApostrophe = true;
48 meSetTextNumFormat = Never;
51 bool ScStringUtil::parseSimpleNumber(
52 const OUString& rStr, sal_Unicode dsep, sal_Unicode gsep, double& rVal)
54 // Actually almost the entire pre-check is unnecessary and we could call
55 // rtl::math::stringToDouble() just after having exchanged ascii space with
56 // non-breaking space, if it wasn't for check of grouped digits. The NaN
57 // and Inf cases that are accepted by stringToDouble() could be detected
58 // using rtl::math::isFinite() on the result.
60 /* TODO: The grouped digits check isn't even valid for locales that do not
61 * group in thousands ... e.g. Indian locales. But that's something also
62 * the number scanner doesn't implement yet, only the formatter. */
64 OUStringBuffer aBuf;
66 sal_Int32 i = 0;
67 sal_Int32 n = rStr.getLength();
68 const sal_Unicode* p = rStr.getStr();
69 const sal_Unicode* pLast = p + (n-1);
70 sal_Int32 nPosDSep = -1, nPosGSep = -1;
71 sal_uInt32 nDigitCount = 0;
72 sal_Int32 nPosExponent = -1;
74 // Skip preceding spaces.
75 for (i = 0; i < n; ++i, ++p)
77 sal_Unicode c = *p;
78 if (c != 0x0020 && c != 0x00A0)
79 // first non-space character. Exit.
80 break;
83 if (i == n)
84 // the whole string is space. Fail.
85 return false;
87 n -= i; // Subtract the length of the preceding spaces.
89 // Determine the last non-space character.
90 for (; p != pLast; --pLast, --n)
92 sal_Unicode c = *pLast;
93 if (c != 0x0020 && c != 0x00A0)
94 // Non space character. Exit.
95 break;
98 for (i = 0; i < n; ++i, ++p)
100 sal_Unicode c = *p;
101 if (c == 0x0020 && gsep == 0x00A0)
102 // ascii space to unicode space if that is group separator
103 c = 0x00A0;
105 if ('0' <= c && c <= '9')
107 // this is a digit.
108 aBuf.append(c);
109 ++nDigitCount;
111 else if (c == dsep)
113 // this is a decimal separator.
115 if (nPosDSep >= 0)
116 // a second decimal separator -> not a valid number.
117 return false;
119 if (nPosGSep >= 0 && i - nPosGSep != 4)
120 // the number has a group separator and the decimal sep is not
121 // positioned correctly.
122 return false;
124 nPosDSep = i;
125 nPosGSep = -1;
126 aBuf.append(c);
127 nDigitCount = 0;
129 else if (c == gsep)
131 // this is a group (thousand) separator.
133 if (i == 0)
134 // not allowed as the first character.
135 return false;
137 if (nPosDSep >= 0)
138 // not allowed after the decimal separator.
139 return false;
141 if (nPosGSep >= 0 && nDigitCount != 3)
142 // must be exactly 3 digits since the last group separator.
143 return false;
145 if (nPosExponent >= 0)
146 // not allowed in exponent.
147 return false;
149 nPosGSep = i;
150 nDigitCount = 0;
152 else if (c == '-' || c == '+')
154 // A sign must be the first character if it's given, or immediately
155 // follow the exponent character if present.
156 if (i == 0 || (nPosExponent >= 0 && i == nPosExponent + 1))
157 aBuf.append(c);
158 else
159 return false;
161 else if (c == 'E' || c == 'e')
163 // this is an exponent designator.
165 if (nPosExponent >= 0)
166 // Only one exponent allowed.
167 return false;
169 if (nPosGSep >= 0 && nDigitCount != 3)
170 // must be exactly 3 digits since the last group separator.
171 return false;
173 aBuf.append(c);
174 nPosExponent = i;
175 nPosDSep = -1;
176 nPosGSep = -1;
177 nDigitCount = 0;
179 else
180 return false;
183 // finished parsing the number.
185 if (nPosGSep >= 0 && nDigitCount != 3)
186 // must be exactly 3 digits since the last group separator.
187 return false;
189 rtl_math_ConversionStatus eStatus = rtl_math_ConversionStatus_Ok;
190 sal_Int32 nParseEnd = 0;
191 OUString aString( aBuf.makeStringAndClear());
192 rVal = ::rtl::math::stringToDouble( aString, dsep, gsep, &eStatus, &nParseEnd);
193 if (eStatus != rtl_math_ConversionStatus_Ok || nParseEnd < aString.getLength())
194 // Not a valid number or not entire string consumed.
195 return false;
197 return true;
200 bool ScStringUtil::parseSimpleNumber(
201 const char* p, size_t n, char dsep, char gsep, double& rVal)
203 // Actually almost the entire pre-check is unnecessary and we could call
204 // rtl::math::stringToDouble() just after having exchanged ascii space with
205 // non-breaking space, if it wasn't for check of grouped digits. The NaN
206 // and Inf cases that are accepted by stringToDouble() could be detected
207 // using rtl::math::isFinite() on the result.
209 /* TODO: The grouped digits check isn't even valid for locales that do not
210 * group in thousands ... e.g. Indian locales. But that's something also
211 * the number scanner doesn't implement yet, only the formatter. */
213 OStringBuffer aBuf;
215 size_t i = 0;
216 const char* pLast = p + (n-1);
217 sal_Int32 nPosDSep = -1, nPosGSep = -1;
218 sal_uInt32 nDigitCount = 0;
219 sal_Int32 nPosExponent = -1;
221 // Skip preceding spaces.
222 for (i = 0; i < n; ++i, ++p)
224 char c = *p;
225 if (c != ' ')
226 // first non-space character. Exit.
227 break;
230 if (i == n)
231 // the whole string is space. Fail.
232 return false;
234 n -= i; // Subtract the length of the preceding spaces.
236 // Determine the last non-space character.
237 for (; p != pLast; --pLast, --n)
239 char c = *pLast;
240 if (c != ' ')
241 // Non space character. Exit.
242 break;
245 for (i = 0; i < n; ++i, ++p)
247 char c = *p;
249 if ('0' <= c && c <= '9')
251 // this is a digit.
252 aBuf.append(c);
253 ++nDigitCount;
255 else if (c == dsep)
257 // this is a decimal separator.
259 if (nPosDSep >= 0)
260 // a second decimal separator -> not a valid number.
261 return false;
263 if (nPosGSep >= 0 && i - nPosGSep != 4)
264 // the number has a group separator and the decimal sep is not
265 // positioned correctly.
266 return false;
268 nPosDSep = i;
269 nPosGSep = -1;
270 aBuf.append(c);
271 nDigitCount = 0;
273 else if (c == gsep)
275 // this is a group (thousand) separator.
277 if (i == 0)
278 // not allowed as the first character.
279 return false;
281 if (nPosDSep >= 0)
282 // not allowed after the decimal separator.
283 return false;
285 if (nPosGSep >= 0 && nDigitCount != 3)
286 // must be exactly 3 digits since the last group separator.
287 return false;
289 if (nPosExponent >= 0)
290 // not allowed in exponent.
291 return false;
293 nPosGSep = i;
294 nDigitCount = 0;
296 else if (c == '-' || c == '+')
298 // A sign must be the first character if it's given, or immediately
299 // follow the exponent character if present.
300 if (i == 0 || (nPosExponent >= 0 && i == static_cast<size_t>(nPosExponent+1)))
301 aBuf.append(c);
302 else
303 return false;
305 else if (c == 'E' || c == 'e')
307 // this is an exponent designator.
309 if (nPosExponent >= 0)
310 // Only one exponent allowed.
311 return false;
313 if (nPosGSep >= 0 && nDigitCount != 3)
314 // must be exactly 3 digits since the last group separator.
315 return false;
317 aBuf.append(c);
318 nPosExponent = i;
319 nPosDSep = -1;
320 nPosGSep = -1;
321 nDigitCount = 0;
323 else
324 return false;
327 // finished parsing the number.
329 if (nPosGSep >= 0 && nDigitCount != 3)
330 // must be exactly 3 digits since the last group separator.
331 return false;
333 rtl_math_ConversionStatus eStatus = rtl_math_ConversionStatus_Ok;
334 sal_Int32 nParseEnd = 0;
335 OString aString( aBuf.makeStringAndClear());
336 rVal = ::rtl::math::stringToDouble( aString, dsep, gsep, &eStatus, &nParseEnd);
337 if (eStatus != rtl_math_ConversionStatus_Ok || nParseEnd < aString.getLength())
338 // Not a valid number or not entire string consumed.
339 return false;
341 return true;
344 sal_Int32 ScStringUtil::GetQuotedTokenCount(const OUString &rIn, const OUString& rQuotedPairs, sal_Unicode cTok )
346 assert( !(rQuotedPairs.getLength()%2) );
347 assert( rQuotedPairs.indexOf(cTok) );
349 // empty string: TokenCount is 0 per definition
350 if ( rIn.isEmpty() )
351 return 0;
353 sal_Int32 nTokCount = 1;
354 sal_Int32 nLen = rIn.getLength();
355 sal_Int32 nQuotedLen = rQuotedPairs.getLength();
356 sal_Unicode cQuotedEndChar = 0;
357 const sal_Unicode* pQuotedStr = rQuotedPairs.getStr();
358 const sal_Unicode* pStr = rIn.getStr();
359 sal_Int32 nIndex = 0;
360 while ( nIndex < nLen )
362 sal_Unicode c = *pStr;
363 if ( cQuotedEndChar )
365 // reached end of the quote ?
366 if ( c == cQuotedEndChar )
367 cQuotedEndChar = 0;
369 else
371 // Is the char a quote-beginn char ?
372 sal_Int32 nQuoteIndex = 0;
373 while ( nQuoteIndex < nQuotedLen )
375 if ( pQuotedStr[nQuoteIndex] == c )
377 cQuotedEndChar = pQuotedStr[nQuoteIndex+1];
378 break;
380 else
381 nQuoteIndex += 2;
384 // If the token-char matches then increase TokCount
385 if ( c == cTok )
386 ++nTokCount;
389 ++pStr,
390 ++nIndex;
393 return nTokCount;
396 OUString ScStringUtil::GetQuotedToken(const OUString &rIn, sal_Int32 nToken, const OUString& rQuotedPairs,
397 sal_Unicode cTok, sal_Int32& rIndex )
399 assert( !(rQuotedPairs.getLength()%2) );
400 assert( rQuotedPairs.indexOf(cTok) == -1 );
402 const sal_Unicode* pStr = rIn.getStr();
403 const sal_Unicode* pQuotedStr = rQuotedPairs.getStr();
404 sal_Unicode cQuotedEndChar = 0;
405 sal_Int32 nQuotedLen = rQuotedPairs.getLength();
406 sal_Int32 nLen = rIn.getLength();
407 sal_Int32 nTok = 0;
408 sal_Int32 nFirstChar = rIndex;
409 sal_Int32 i = nFirstChar;
411 // detect token position and length
412 pStr += i;
413 while ( i < nLen )
415 sal_Unicode c = *pStr;
416 if ( cQuotedEndChar )
418 // end of the quote reached ?
419 if ( c == cQuotedEndChar )
420 cQuotedEndChar = 0;
422 else
424 // Is the char a quote-begin char ?
425 sal_Int32 nQuoteIndex = 0;
426 while ( nQuoteIndex < nQuotedLen )
428 if ( pQuotedStr[nQuoteIndex] == c )
430 cQuotedEndChar = pQuotedStr[nQuoteIndex+1];
431 break;
433 else
434 nQuoteIndex += 2;
437 // If the token-char matches then increase TokCount
438 if ( c == cTok )
440 ++nTok;
442 if ( nTok == nToken )
443 nFirstChar = i+1;
444 else
446 if ( nTok > nToken )
447 break;
452 ++pStr,
453 ++i;
456 if ( nTok >= nToken )
458 if ( i < nLen )
459 rIndex = i+1;
460 else
461 rIndex = -1;
462 return rIn.copy( nFirstChar, i-nFirstChar );
464 else
466 rIndex = -1;
467 return OUString();
471 bool ScStringUtil::isMultiline( const OUString& rStr )
473 if (rStr.indexOf('\n') != -1)
474 return true;
476 if (rStr.indexOf(CHAR_CR) != -1)
477 return true;
479 return false;
482 ScInputStringType ScStringUtil::parseInputString(
483 SvNumberFormatter& rFormatter, const OUString& rStr, LanguageType eLang )
485 ScInputStringType aRet;
486 aRet.mnFormatType = 0;
487 aRet.meType = ScInputStringType::Unknown;
488 aRet.maText = rStr;
489 aRet.mfValue = 0.0;
491 if (rStr.getLength() > 1 && rStr[0] == '=')
493 aRet.meType = ScInputStringType::Formula;
495 else if (rStr.getLength() > 1 && rStr[0] == '\'')
497 // for bEnglish, "'" at the beginning is always interpreted as text
498 // marker and stripped
499 aRet.maText = rStr.copy(1);
500 aRet.meType = ScInputStringType::Text;
502 else // (nur) auf englisches Zahlformat testen
504 sal_uInt32 nNumFormat = rFormatter.GetStandardIndex(eLang);
506 if (rFormatter.IsNumberFormat(rStr, nNumFormat, aRet.mfValue))
508 aRet.meType = ScInputStringType::Number;
509 aRet.mnFormatType = rFormatter.GetType(nNumFormat);
511 else if (!rStr.isEmpty())
512 aRet.meType = ScInputStringType::Text;
514 // the (English) number format is not set
515 //TODO: find and replace with matching local format???
518 return aRet;
521 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */