Version 6.1.4.1, tag libreoffice-6.1.4.1
[LibreOffice.git] / i18npool / source / characterclassification / cclass_unicode_parser.cxx
blobb767f09cb3112dbdf94b73f905473138bee8621e
1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2 /*
3 * This file is part of the LibreOffice project.
5 * This Source Code Form is subject to the terms of the Mozilla Public
6 * License, v. 2.0. If a copy of the MPL was not distributed with this
7 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
9 * This file incorporates work covered by the following license notice:
11 * Licensed to the Apache Software Foundation (ASF) under one or more
12 * contributor license agreements. See the NOTICE file distributed
13 * with this work for additional information regarding copyright
14 * ownership. The ASF licenses this file to you under the Apache
15 * License, Version 2.0 (the "License"); you may not use this file
16 * except in compliance with the License. You may obtain a copy of
17 * the License at http://www.apache.org/licenses/LICENSE-2.0 .
21 #include <cclass_unicode.hxx>
22 #include <unicode/uchar.h>
23 #include <rtl/character.hxx>
24 #include <rtl/math.hxx>
25 #include <rtl/ustring.hxx>
26 #include <com/sun/star/i18n/KParseTokens.hpp>
27 #include <com/sun/star/i18n/KParseType.hpp>
28 #include <com/sun/star/i18n/UnicodeType.hpp>
29 #include <com/sun/star/i18n/LocaleData2.hpp>
30 #include <com/sun/star/i18n/NativeNumberMode.hpp>
31 #include <com/sun/star/i18n/NativeNumberSupplier.hpp>
33 #include <string.h>
35 using namespace ::com::sun::star::uno;
36 using namespace ::com::sun::star::i18n;
37 using namespace ::com::sun::star::lang;
39 #define TOKEN_DIGIT_FLAGS (ParserFlags::CHAR_VALUE | ParserFlags::VALUE | ParserFlags::VALUE_EXP | ParserFlags::VALUE_EXP_VALUE | ParserFlags::VALUE_DIGIT)
41 namespace i18npool {
43 // Default identifier/name specification is [A-Za-z_][A-Za-z0-9_]*
45 const sal_uInt8 cclass_Unicode::nDefCnt = 128;
46 const ParserFlags cclass_Unicode::pDefaultParserTable[ nDefCnt ] =
48 // (...) == Calc formula compiler specific, commented out and modified
50 /* \0 */ ParserFlags::EXCLUDED,
51 ParserFlags::ILLEGAL,
52 ParserFlags::ILLEGAL,
53 ParserFlags::ILLEGAL,
54 ParserFlags::ILLEGAL,
55 ParserFlags::ILLEGAL,
56 ParserFlags::ILLEGAL,
57 ParserFlags::ILLEGAL,
58 ParserFlags::ILLEGAL,
59 /* 9 \t */ ParserFlags::CHAR_DONTCARE | ParserFlags::WORD_SEP | ParserFlags::VALUE_SEP, // (ParserFlags::ILLEGAL)
60 ParserFlags::ILLEGAL,
61 /* 11 \v */ ParserFlags::CHAR_DONTCARE | ParserFlags::WORD_SEP | ParserFlags::VALUE_SEP, // (ParserFlags::ILLEGAL)
62 ParserFlags::ILLEGAL,
63 ParserFlags::ILLEGAL,
64 ParserFlags::ILLEGAL,
65 ParserFlags::ILLEGAL,
66 ParserFlags::ILLEGAL,
67 ParserFlags::ILLEGAL,
68 ParserFlags::ILLEGAL,
69 ParserFlags::ILLEGAL,
70 ParserFlags::ILLEGAL,
71 ParserFlags::ILLEGAL,
72 ParserFlags::ILLEGAL,
73 ParserFlags::ILLEGAL,
74 ParserFlags::ILLEGAL,
75 ParserFlags::ILLEGAL,
76 ParserFlags::ILLEGAL,
77 ParserFlags::ILLEGAL,
78 ParserFlags::ILLEGAL,
79 ParserFlags::ILLEGAL,
80 ParserFlags::ILLEGAL,
81 ParserFlags::ILLEGAL,
82 /* 32 */ ParserFlags::CHAR_DONTCARE | ParserFlags::WORD_SEP | ParserFlags::VALUE_SEP,
83 /* 33 ! */ ParserFlags::CHAR | ParserFlags::WORD_SEP | ParserFlags::VALUE_SEP,
84 /* 34 " */ ParserFlags::CHAR_STRING | ParserFlags::STRING_SEP,
85 /* 35 # */ ParserFlags::CHAR | ParserFlags::WORD_SEP | ParserFlags::VALUE_SEP, // (ParserFlags::WORD_SEP)
86 /* 36 $ */ ParserFlags::CHAR | ParserFlags::WORD_SEP | ParserFlags::VALUE_SEP, // (ParserFlags::CHAR_WORD | ParserFlags::WORD)
87 /* 37 % */ ParserFlags::CHAR | ParserFlags::WORD_SEP | ParserFlags::VALUE_SEP, // (ParserFlags::VALUE)
88 /* 38 & */ ParserFlags::CHAR | ParserFlags::WORD_SEP | ParserFlags::VALUE_SEP,
89 /* 39 ' */ ParserFlags::NAME_SEP,
90 /* 40 ( */ ParserFlags::CHAR | ParserFlags::WORD_SEP | ParserFlags::VALUE_SEP,
91 /* 41 ) */ ParserFlags::CHAR | ParserFlags::WORD_SEP | ParserFlags::VALUE_SEP,
92 /* 42 * */ ParserFlags::CHAR | ParserFlags::WORD_SEP | ParserFlags::VALUE_SEP,
93 /* 43 + */ ParserFlags::CHAR | ParserFlags::WORD_SEP | ParserFlags::VALUE_SEP | ParserFlags::VALUE_EXP | ParserFlags::VALUE_SIGN,
94 /* 44 , */ ParserFlags::CHAR | ParserFlags::WORD_SEP | ParserFlags::VALUE_SEP, // (ParserFlags::CHAR_VALUE | ParserFlags::VALUE)
95 /* 45 - */ ParserFlags::CHAR | ParserFlags::WORD_SEP | ParserFlags::VALUE_SEP | ParserFlags::VALUE_EXP | ParserFlags::VALUE_SIGN,
96 /* 46 . */ ParserFlags::CHAR | ParserFlags::WORD_SEP | ParserFlags::VALUE_SEP, // (ParserFlags::WORD | ParserFlags::CHAR_VALUE | ParserFlags::VALUE)
97 /* 47 / */ ParserFlags::CHAR | ParserFlags::WORD_SEP | ParserFlags::VALUE_SEP,
98 //for ( i = 48; i < 58; i++ )
99 /* 48 0 */ TOKEN_DIGIT_FLAGS | ParserFlags::WORD,
100 /* 49 1 */ TOKEN_DIGIT_FLAGS | ParserFlags::WORD,
101 /* 50 2 */ TOKEN_DIGIT_FLAGS | ParserFlags::WORD,
102 /* 51 3 */ TOKEN_DIGIT_FLAGS | ParserFlags::WORD,
103 /* 52 4 */ TOKEN_DIGIT_FLAGS | ParserFlags::WORD,
104 /* 53 5 */ TOKEN_DIGIT_FLAGS | ParserFlags::WORD,
105 /* 54 6 */ TOKEN_DIGIT_FLAGS | ParserFlags::WORD,
106 /* 55 7 */ TOKEN_DIGIT_FLAGS | ParserFlags::WORD,
107 /* 56 8 */ TOKEN_DIGIT_FLAGS | ParserFlags::WORD,
108 /* 57 9 */ TOKEN_DIGIT_FLAGS | ParserFlags::WORD,
109 /* 58 : */ ParserFlags::CHAR | ParserFlags::WORD_SEP | ParserFlags::VALUE_SEP, // (ParserFlags::WORD)
110 /* 59 ; */ ParserFlags::CHAR | ParserFlags::WORD_SEP | ParserFlags::VALUE_SEP,
111 /* 60 < */ ParserFlags::CHAR_BOOL | ParserFlags::WORD_SEP | ParserFlags::VALUE_SEP,
112 /* 61 = */ ParserFlags::CHAR | ParserFlags::BOOL | ParserFlags::WORD_SEP | ParserFlags::VALUE_SEP,
113 /* 62 > */ ParserFlags::CHAR_BOOL | ParserFlags::BOOL | ParserFlags::WORD_SEP | ParserFlags::VALUE_SEP,
114 /* 63 ? */ ParserFlags::CHAR | ParserFlags::WORD_SEP | ParserFlags::VALUE_SEP, // (ParserFlags::CHAR_WORD | ParserFlags::WORD)
115 /* 64 @ */ ParserFlags::CHAR | ParserFlags::WORD_SEP | ParserFlags::VALUE_SEP, // (ParserFlags::ILLEGAL // UNUSED)
116 //for ( i = 65; i < 91; i++ )
117 /* 65 A */ ParserFlags::CHAR_WORD | ParserFlags::WORD,
118 /* 66 B */ ParserFlags::CHAR_WORD | ParserFlags::WORD,
119 /* 67 C */ ParserFlags::CHAR_WORD | ParserFlags::WORD,
120 /* 68 D */ ParserFlags::CHAR_WORD | ParserFlags::WORD,
121 /* 69 E */ ParserFlags::CHAR_WORD | ParserFlags::WORD,
122 /* 70 F */ ParserFlags::CHAR_WORD | ParserFlags::WORD,
123 /* 71 G */ ParserFlags::CHAR_WORD | ParserFlags::WORD,
124 /* 72 H */ ParserFlags::CHAR_WORD | ParserFlags::WORD,
125 /* 73 I */ ParserFlags::CHAR_WORD | ParserFlags::WORD,
126 /* 74 J */ ParserFlags::CHAR_WORD | ParserFlags::WORD,
127 /* 75 K */ ParserFlags::CHAR_WORD | ParserFlags::WORD,
128 /* 76 L */ ParserFlags::CHAR_WORD | ParserFlags::WORD,
129 /* 77 M */ ParserFlags::CHAR_WORD | ParserFlags::WORD,
130 /* 78 N */ ParserFlags::CHAR_WORD | ParserFlags::WORD,
131 /* 79 O */ ParserFlags::CHAR_WORD | ParserFlags::WORD,
132 /* 80 P */ ParserFlags::CHAR_WORD | ParserFlags::WORD,
133 /* 81 Q */ ParserFlags::CHAR_WORD | ParserFlags::WORD,
134 /* 82 R */ ParserFlags::CHAR_WORD | ParserFlags::WORD,
135 /* 83 S */ ParserFlags::CHAR_WORD | ParserFlags::WORD,
136 /* 84 T */ ParserFlags::CHAR_WORD | ParserFlags::WORD,
137 /* 85 U */ ParserFlags::CHAR_WORD | ParserFlags::WORD,
138 /* 86 V */ ParserFlags::CHAR_WORD | ParserFlags::WORD,
139 /* 87 W */ ParserFlags::CHAR_WORD | ParserFlags::WORD,
140 /* 88 X */ ParserFlags::CHAR_WORD | ParserFlags::WORD,
141 /* 89 Y */ ParserFlags::CHAR_WORD | ParserFlags::WORD,
142 /* 90 Z */ ParserFlags::CHAR_WORD | ParserFlags::WORD,
143 /* 91 [ */ ParserFlags::CHAR | ParserFlags::WORD_SEP | ParserFlags::VALUE_SEP, // (ParserFlags::ILLEGAL // UNUSED)
144 /* 92 \ */ ParserFlags::CHAR | ParserFlags::WORD_SEP | ParserFlags::VALUE_SEP, // (ParserFlags::ILLEGAL // UNUSED)
145 /* 93 ] */ ParserFlags::CHAR | ParserFlags::WORD_SEP | ParserFlags::VALUE_SEP, // (ParserFlags::ILLEGAL // UNUSED)
146 /* 94 ^ */ ParserFlags::CHAR | ParserFlags::WORD_SEP | ParserFlags::VALUE_SEP,
147 /* 95 _ */ ParserFlags::CHAR_WORD | ParserFlags::WORD,
148 /* 96 ` */ ParserFlags::CHAR | ParserFlags::WORD_SEP | ParserFlags::VALUE_SEP, // (ParserFlags::ILLEGAL // UNUSED)
149 //for ( i = 97; i < 123; i++ )
150 /* 97 a */ ParserFlags::CHAR_WORD | ParserFlags::WORD,
151 /* 98 b */ ParserFlags::CHAR_WORD | ParserFlags::WORD,
152 /* 99 c */ ParserFlags::CHAR_WORD | ParserFlags::WORD,
153 /* 100 d */ ParserFlags::CHAR_WORD | ParserFlags::WORD,
154 /* 101 e */ ParserFlags::CHAR_WORD | ParserFlags::WORD,
155 /* 102 f */ ParserFlags::CHAR_WORD | ParserFlags::WORD,
156 /* 103 g */ ParserFlags::CHAR_WORD | ParserFlags::WORD,
157 /* 104 h */ ParserFlags::CHAR_WORD | ParserFlags::WORD,
158 /* 105 i */ ParserFlags::CHAR_WORD | ParserFlags::WORD,
159 /* 106 j */ ParserFlags::CHAR_WORD | ParserFlags::WORD,
160 /* 107 k */ ParserFlags::CHAR_WORD | ParserFlags::WORD,
161 /* 108 l */ ParserFlags::CHAR_WORD | ParserFlags::WORD,
162 /* 109 m */ ParserFlags::CHAR_WORD | ParserFlags::WORD,
163 /* 110 n */ ParserFlags::CHAR_WORD | ParserFlags::WORD,
164 /* 111 o */ ParserFlags::CHAR_WORD | ParserFlags::WORD,
165 /* 112 p */ ParserFlags::CHAR_WORD | ParserFlags::WORD,
166 /* 113 q */ ParserFlags::CHAR_WORD | ParserFlags::WORD,
167 /* 114 r */ ParserFlags::CHAR_WORD | ParserFlags::WORD,
168 /* 115 s */ ParserFlags::CHAR_WORD | ParserFlags::WORD,
169 /* 116 t */ ParserFlags::CHAR_WORD | ParserFlags::WORD,
170 /* 117 u */ ParserFlags::CHAR_WORD | ParserFlags::WORD,
171 /* 118 v */ ParserFlags::CHAR_WORD | ParserFlags::WORD,
172 /* 119 w */ ParserFlags::CHAR_WORD | ParserFlags::WORD,
173 /* 120 x */ ParserFlags::CHAR_WORD | ParserFlags::WORD,
174 /* 121 y */ ParserFlags::CHAR_WORD | ParserFlags::WORD,
175 /* 122 z */ ParserFlags::CHAR_WORD | ParserFlags::WORD,
176 /* 123 { */ ParserFlags::CHAR | ParserFlags::WORD_SEP | ParserFlags::VALUE_SEP, // (ParserFlags::ILLEGAL // UNUSED)
177 /* 124 | */ ParserFlags::CHAR | ParserFlags::WORD_SEP | ParserFlags::VALUE_SEP, // (ParserFlags::ILLEGAL // UNUSED)
178 /* 125 } */ ParserFlags::CHAR | ParserFlags::WORD_SEP | ParserFlags::VALUE_SEP, // (ParserFlags::ILLEGAL // UNUSED)
179 /* 126 ~ */ ParserFlags::CHAR | ParserFlags::WORD_SEP | ParserFlags::VALUE_SEP, // (ParserFlags::ILLEGAL // UNUSED)
180 /* 127 */ ParserFlags::CHAR | ParserFlags::WORD_SEP | ParserFlags::VALUE_SEP // (ParserFlags::ILLEGAL // UNUSED)
184 const sal_Int32 cclass_Unicode::pParseTokensType[ nDefCnt ] =
186 /* \0 */ KParseTokens::ASC_OTHER,
187 KParseTokens::ASC_CONTROL,
188 KParseTokens::ASC_CONTROL,
189 KParseTokens::ASC_CONTROL,
190 KParseTokens::ASC_CONTROL,
191 KParseTokens::ASC_CONTROL,
192 KParseTokens::ASC_CONTROL,
193 KParseTokens::ASC_CONTROL,
194 KParseTokens::ASC_CONTROL,
195 /* 9 \t */ KParseTokens::ASC_CONTROL,
196 KParseTokens::ASC_CONTROL,
197 /* 11 \v */ KParseTokens::ASC_CONTROL,
198 KParseTokens::ASC_CONTROL,
199 KParseTokens::ASC_CONTROL,
200 KParseTokens::ASC_CONTROL,
201 KParseTokens::ASC_CONTROL,
202 KParseTokens::ASC_CONTROL,
203 KParseTokens::ASC_CONTROL,
204 KParseTokens::ASC_CONTROL,
205 KParseTokens::ASC_CONTROL,
206 KParseTokens::ASC_CONTROL,
207 KParseTokens::ASC_CONTROL,
208 KParseTokens::ASC_CONTROL,
209 KParseTokens::ASC_CONTROL,
210 KParseTokens::ASC_CONTROL,
211 KParseTokens::ASC_CONTROL,
212 KParseTokens::ASC_CONTROL,
213 KParseTokens::ASC_CONTROL,
214 KParseTokens::ASC_CONTROL,
215 KParseTokens::ASC_CONTROL,
216 KParseTokens::ASC_CONTROL,
217 KParseTokens::ASC_CONTROL,
218 /* 32 */ KParseTokens::ASC_OTHER,
219 /* 33 ! */ KParseTokens::ASC_OTHER,
220 /* 34 " */ KParseTokens::ASC_OTHER,
221 /* 35 # */ KParseTokens::ASC_OTHER,
222 /* 36 $ */ KParseTokens::ASC_DOLLAR,
223 /* 37 % */ KParseTokens::ASC_OTHER,
224 /* 38 & */ KParseTokens::ASC_OTHER,
225 /* 39 ' */ KParseTokens::ASC_OTHER,
226 /* 40 ( */ KParseTokens::ASC_OTHER,
227 /* 41 ) */ KParseTokens::ASC_OTHER,
228 /* 42 * */ KParseTokens::ASC_OTHER,
229 /* 43 + */ KParseTokens::ASC_OTHER,
230 /* 44 , */ KParseTokens::ASC_OTHER,
231 /* 45 - */ KParseTokens::ASC_OTHER,
232 /* 46 . */ KParseTokens::ASC_DOT,
233 /* 47 / */ KParseTokens::ASC_OTHER,
234 //for ( i = 48; i < 58; i++ )
235 /* 48 0 */ KParseTokens::ASC_DIGIT,
236 /* 49 1 */ KParseTokens::ASC_DIGIT,
237 /* 50 2 */ KParseTokens::ASC_DIGIT,
238 /* 51 3 */ KParseTokens::ASC_DIGIT,
239 /* 52 4 */ KParseTokens::ASC_DIGIT,
240 /* 53 5 */ KParseTokens::ASC_DIGIT,
241 /* 54 6 */ KParseTokens::ASC_DIGIT,
242 /* 55 7 */ KParseTokens::ASC_DIGIT,
243 /* 56 8 */ KParseTokens::ASC_DIGIT,
244 /* 57 9 */ KParseTokens::ASC_DIGIT,
245 /* 58 : */ KParseTokens::ASC_COLON,
246 /* 59 ; */ KParseTokens::ASC_OTHER,
247 /* 60 < */ KParseTokens::ASC_OTHER,
248 /* 61 = */ KParseTokens::ASC_OTHER,
249 /* 62 > */ KParseTokens::ASC_OTHER,
250 /* 63 ? */ KParseTokens::ASC_OTHER,
251 /* 64 @ */ KParseTokens::ASC_OTHER,
252 //for ( i = 65; i < 91; i++ )
253 /* 65 A */ KParseTokens::ASC_UPALPHA,
254 /* 66 B */ KParseTokens::ASC_UPALPHA,
255 /* 67 C */ KParseTokens::ASC_UPALPHA,
256 /* 68 D */ KParseTokens::ASC_UPALPHA,
257 /* 69 E */ KParseTokens::ASC_UPALPHA,
258 /* 70 F */ KParseTokens::ASC_UPALPHA,
259 /* 71 G */ KParseTokens::ASC_UPALPHA,
260 /* 72 H */ KParseTokens::ASC_UPALPHA,
261 /* 73 I */ KParseTokens::ASC_UPALPHA,
262 /* 74 J */ KParseTokens::ASC_UPALPHA,
263 /* 75 K */ KParseTokens::ASC_UPALPHA,
264 /* 76 L */ KParseTokens::ASC_UPALPHA,
265 /* 77 M */ KParseTokens::ASC_UPALPHA,
266 /* 78 N */ KParseTokens::ASC_UPALPHA,
267 /* 79 O */ KParseTokens::ASC_UPALPHA,
268 /* 80 P */ KParseTokens::ASC_UPALPHA,
269 /* 81 Q */ KParseTokens::ASC_UPALPHA,
270 /* 82 R */ KParseTokens::ASC_UPALPHA,
271 /* 83 S */ KParseTokens::ASC_UPALPHA,
272 /* 84 T */ KParseTokens::ASC_UPALPHA,
273 /* 85 U */ KParseTokens::ASC_UPALPHA,
274 /* 86 V */ KParseTokens::ASC_UPALPHA,
275 /* 87 W */ KParseTokens::ASC_UPALPHA,
276 /* 88 X */ KParseTokens::ASC_UPALPHA,
277 /* 89 Y */ KParseTokens::ASC_UPALPHA,
278 /* 90 Z */ KParseTokens::ASC_UPALPHA,
279 /* 91 [ */ KParseTokens::ASC_OTHER,
280 /* 92 \ */ KParseTokens::ASC_OTHER,
281 /* 93 ] */ KParseTokens::ASC_OTHER,
282 /* 94 ^ */ KParseTokens::ASC_OTHER,
283 /* 95 _ */ KParseTokens::ASC_UNDERSCORE,
284 /* 96 ` */ KParseTokens::ASC_OTHER,
285 //for ( i = 97; i < 123; i++ )
286 /* 97 a */ KParseTokens::ASC_LOALPHA,
287 /* 98 b */ KParseTokens::ASC_LOALPHA,
288 /* 99 c */ KParseTokens::ASC_LOALPHA,
289 /* 100 d */ KParseTokens::ASC_LOALPHA,
290 /* 101 e */ KParseTokens::ASC_LOALPHA,
291 /* 102 f */ KParseTokens::ASC_LOALPHA,
292 /* 103 g */ KParseTokens::ASC_LOALPHA,
293 /* 104 h */ KParseTokens::ASC_LOALPHA,
294 /* 105 i */ KParseTokens::ASC_LOALPHA,
295 /* 106 j */ KParseTokens::ASC_LOALPHA,
296 /* 107 k */ KParseTokens::ASC_LOALPHA,
297 /* 108 l */ KParseTokens::ASC_LOALPHA,
298 /* 109 m */ KParseTokens::ASC_LOALPHA,
299 /* 110 n */ KParseTokens::ASC_LOALPHA,
300 /* 111 o */ KParseTokens::ASC_LOALPHA,
301 /* 112 p */ KParseTokens::ASC_LOALPHA,
302 /* 113 q */ KParseTokens::ASC_LOALPHA,
303 /* 114 r */ KParseTokens::ASC_LOALPHA,
304 /* 115 s */ KParseTokens::ASC_LOALPHA,
305 /* 116 t */ KParseTokens::ASC_LOALPHA,
306 /* 117 u */ KParseTokens::ASC_LOALPHA,
307 /* 118 v */ KParseTokens::ASC_LOALPHA,
308 /* 119 w */ KParseTokens::ASC_LOALPHA,
309 /* 120 x */ KParseTokens::ASC_LOALPHA,
310 /* 121 y */ KParseTokens::ASC_LOALPHA,
311 /* 122 z */ KParseTokens::ASC_LOALPHA,
312 /* 123 { */ KParseTokens::ASC_OTHER,
313 /* 124 | */ KParseTokens::ASC_OTHER,
314 /* 125 } */ KParseTokens::ASC_OTHER,
315 /* 126 ~ */ KParseTokens::ASC_OTHER,
316 /* 127 */ KParseTokens::ASC_OTHER
320 // static
321 const sal_Unicode* cclass_Unicode::StrChr( const sal_Unicode* pStr, sal_uInt32 c )
323 if ( !pStr )
324 return nullptr;
325 sal_Unicode cs[2];
326 auto const n = rtl::splitSurrogates(c, cs);
327 while ( *pStr )
329 if ( *pStr == cs[0] && (n == 1 || pStr[1] == cs[1]) )
330 return pStr;
331 pStr++;
333 return nullptr;
337 sal_Int32 cclass_Unicode::getParseTokensType(sal_uInt32 const c, bool const isFirst)
339 if ( c < nDefCnt )
340 return pParseTokensType[ sal_uInt8(c) ];
341 else
344 //! all KParseTokens::UNI_... must be matched
345 switch (u_charType(c))
347 case U_UPPERCASE_LETTER :
348 return KParseTokens::UNI_UPALPHA;
349 case U_LOWERCASE_LETTER :
350 return KParseTokens::UNI_LOALPHA;
351 case U_TITLECASE_LETTER :
352 return KParseTokens::UNI_TITLE_ALPHA;
353 case U_MODIFIER_LETTER :
354 return KParseTokens::UNI_MODIFIER_LETTER;
355 case U_OTHER_LETTER :
356 // Non_Spacing_Mark could not be as leading character
357 if (isFirst) break;
358 SAL_FALLTHROUGH; // treat it as Other_Letter.
359 case U_NON_SPACING_MARK :
360 return KParseTokens::UNI_OTHER_LETTER;
361 case U_DECIMAL_DIGIT_NUMBER :
362 return KParseTokens::UNI_DIGIT;
363 case U_LETTER_NUMBER :
364 return KParseTokens::UNI_LETTER_NUMBER;
365 case U_OTHER_NUMBER :
366 return KParseTokens::UNI_OTHER_NUMBER;
369 return KParseTokens::UNI_OTHER;
373 void cclass_Unicode::setupInternational( const Locale& rLocale )
375 bool bChanged = (aParserLocale.Language != rLocale.Language
376 || aParserLocale.Country != rLocale.Country
377 || aParserLocale.Variant != rLocale.Variant);
378 if ( bChanged )
380 aParserLocale.Language = rLocale.Language;
381 aParserLocale.Country = rLocale.Country;
382 aParserLocale.Variant = rLocale.Variant;
384 if ( !mxLocaleData.is() )
386 mxLocaleData.set( LocaleData2::create(m_xContext) );
391 void cclass_Unicode::setupParserTable( const Locale& rLocale, sal_Int32 startCharTokenType,
392 const OUString& userDefinedCharactersStart, sal_Int32 contCharTokenType,
393 const OUString& userDefinedCharactersCont )
395 bool bIntlEqual = (rLocale.Language == aParserLocale.Language &&
396 rLocale.Country == aParserLocale.Country &&
397 rLocale.Variant == aParserLocale.Variant);
398 if ( !pTable || !bIntlEqual ||
399 startCharTokenType != nStartTypes ||
400 contCharTokenType != nContTypes ||
401 userDefinedCharactersStart != aStartChars ||
402 userDefinedCharactersCont != aContChars )
403 initParserTable( rLocale, startCharTokenType, userDefinedCharactersStart,
404 contCharTokenType, userDefinedCharactersCont );
408 void cclass_Unicode::initParserTable( const Locale& rLocale, sal_Int32 startCharTokenType,
409 const OUString& userDefinedCharactersStart, sal_Int32 contCharTokenType,
410 const OUString& userDefinedCharactersCont )
412 // (Re)Init
413 setupInternational( rLocale );
414 // Memory of pTable is reused.
415 if ( !pTable )
416 pTable.reset(new ParserFlags[nDefCnt]);
417 memcpy( pTable.get(), pDefaultParserTable, sizeof(ParserFlags) * nDefCnt );
418 // Start and cont tables only need reallocation if different length.
419 if ( pStart && userDefinedCharactersStart.getLength() != aStartChars.getLength() )
421 pStart.reset();
423 if ( pCont && userDefinedCharactersCont.getLength() != aContChars.getLength() )
425 pCont.reset();
427 nStartTypes = startCharTokenType;
428 nContTypes = contCharTokenType;
429 aStartChars = userDefinedCharactersStart;
430 aContChars = userDefinedCharactersCont;
432 // specials
433 if( mxLocaleData.is() )
435 LocaleDataItem2 aItem =
436 mxLocaleData->getLocaleItem2( aParserLocale );
437 //!TODO: theoretically separators may be a string, adjustment would have to be
438 //! done here and in parsing and in ::rtl::math::stringToDouble()
439 cGroupSep = aItem.thousandSeparator[0];
440 cDecimalSep = aItem.decimalSeparator[0];
441 cDecimalSepAlt = aItem.decimalSeparatorAlternative.toChar();
444 if ( cGroupSep < nDefCnt )
445 pTable[cGroupSep] |= ParserFlags::VALUE;
446 if ( cDecimalSep < nDefCnt )
447 pTable[cDecimalSep] |= ParserFlags::CHAR_VALUE | ParserFlags::VALUE;
448 if ( cDecimalSepAlt && cDecimalSepAlt < nDefCnt )
449 pTable[cDecimalSepAlt] |= ParserFlags::CHAR_VALUE | ParserFlags::VALUE;
451 // Modify characters according to KParseTokens definitions.
453 using namespace KParseTokens;
454 sal_uInt8 i;
456 if ( !(nStartTypes & ASC_UPALPHA) )
457 for ( i = 65; i < 91; i++ )
458 pTable[i] &= ~ParserFlags::CHAR_WORD; // not allowed as start character
459 if ( !(nContTypes & ASC_UPALPHA) )
460 for ( i = 65; i < 91; i++ )
461 pTable[i] &= ~ParserFlags::WORD; // not allowed as cont character
463 if ( !(nStartTypes & ASC_LOALPHA) )
464 for ( i = 97; i < 123; i++ )
465 pTable[i] &= ~ParserFlags::CHAR_WORD; // not allowed as start character
466 if ( !(nContTypes & ASC_LOALPHA) )
467 for ( i = 97; i < 123; i++ )
468 pTable[i] &= ~ParserFlags::WORD; // not allowed as cont character
470 if ( nStartTypes & ASC_DIGIT )
471 for ( i = 48; i < 58; i++ )
472 pTable[i] |= ParserFlags::CHAR_WORD; // allowed as start character
473 if ( !(nContTypes & ASC_DIGIT) )
474 for ( i = 48; i < 58; i++ )
475 pTable[i] &= ~ParserFlags::WORD; // not allowed as cont character
477 if ( !(nStartTypes & ASC_UNDERSCORE) )
478 pTable[95] &= ~ParserFlags::CHAR_WORD; // not allowed as start character
479 if ( !(nContTypes & ASC_UNDERSCORE) )
480 pTable[95] &= ~ParserFlags::WORD; // not allowed as cont character
482 if ( nStartTypes & ASC_DOLLAR )
483 pTable[36] |= ParserFlags::CHAR_WORD; // allowed as start character
484 if ( nContTypes & ASC_DOLLAR )
485 pTable[36] |= ParserFlags::WORD; // allowed as cont character
487 if ( nStartTypes & ASC_DOT )
488 pTable[46] |= ParserFlags::CHAR_WORD; // allowed as start character
489 if ( nContTypes & ASC_DOT )
490 pTable[46] |= ParserFlags::WORD; // allowed as cont character
492 if ( nStartTypes & ASC_COLON )
493 pTable[58] |= ParserFlags::CHAR_WORD; // allowed as start character
494 if ( nContTypes & ASC_COLON )
495 pTable[58] |= ParserFlags::WORD; // allowed as cont character
497 if ( nStartTypes & ASC_CONTROL )
498 for ( i = 1; i < 32; i++ )
499 pTable[i] |= ParserFlags::CHAR_WORD; // allowed as start character
500 if ( nContTypes & ASC_CONTROL )
501 for ( i = 1; i < 32; i++ )
502 pTable[i] |= ParserFlags::WORD; // allowed as cont character
504 if ( nStartTypes & ASC_ANY_BUT_CONTROL )
505 for ( i = 32; i < nDefCnt; i++ )
506 pTable[i] |= ParserFlags::CHAR_WORD; // allowed as start character
507 if ( nContTypes & ASC_ANY_BUT_CONTROL )
508 for ( i = 32; i < nDefCnt; i++ )
509 pTable[i] |= ParserFlags::WORD; // allowed as cont character
513 // Merge in (positively override with) user defined characters.
514 // StartChars
515 sal_Int32 nLen = aStartChars.getLength();
516 if ( nLen )
518 if ( !pStart )
519 pStart.reset(new ParserFlags[ nLen ]);
520 const sal_Unicode* p = aStartChars.getStr();
521 for ( sal_Int32 j=0; j<nLen; j++, p++ )
523 pStart[j] = ParserFlags::CHAR_WORD;
524 if ( *p < nDefCnt )
525 pTable[*p] |= ParserFlags::CHAR_WORD;
528 // ContChars
529 nLen = aContChars.getLength();
530 if ( nLen )
532 if ( !pCont )
533 pCont.reset(new ParserFlags[ nLen ]);
534 const sal_Unicode* p = aContChars.getStr();
535 for ( sal_Int32 j=0; j<nLen; j++ )
537 pCont[j] = ParserFlags::WORD;
538 if ( *p < nDefCnt )
539 pTable[*p] |= ParserFlags::WORD;
545 void cclass_Unicode::destroyParserTable()
547 pCont.reset();
548 pStart.reset();
549 pTable.reset();
553 ParserFlags cclass_Unicode::getFlags(sal_uInt32 const c)
555 ParserFlags nMask;
556 if ( c < nDefCnt )
557 nMask = pTable[ sal_uInt8(c) ];
558 else
559 nMask = getFlagsExtended(c);
560 switch ( eState )
562 case ssGetChar :
563 case ssRewindFromValue :
564 case ssIgnoreLeadingInRewind :
565 case ssGetWordFirstChar :
566 if ( !(nMask & ParserFlags::CHAR_WORD) )
568 nMask |= getStartCharsFlags( c );
569 if ( nMask & ParserFlags::CHAR_WORD )
570 nMask &= ~ParserFlags::EXCLUDED;
572 break;
573 case ssGetValue :
574 case ssGetWord :
575 if ( !(nMask & ParserFlags::WORD) )
577 nMask |= getContCharsFlags( c );
578 if ( nMask & ParserFlags::WORD )
579 nMask &= ~ParserFlags::EXCLUDED;
581 break;
582 default:
583 ; // other cases aren't needed, no compiler warning
585 return nMask;
589 ParserFlags cclass_Unicode::getFlagsExtended(sal_uInt32 const c)
591 if ( c == cGroupSep )
592 return ParserFlags::VALUE;
593 else if ( c == cDecimalSep )
594 return ParserFlags::CHAR_VALUE | ParserFlags::VALUE;
595 else if ( cDecimalSepAlt && c == cDecimalSepAlt )
596 return ParserFlags::CHAR_VALUE | ParserFlags::VALUE;
597 bool bStart = (eState == ssGetChar || eState == ssGetWordFirstChar ||
598 eState == ssRewindFromValue || eState == ssIgnoreLeadingInRewind);
599 sal_Int32 nTypes = (bStart ? nStartTypes : nContTypes);
601 //! all KParseTokens::UNI_... must be matched
602 switch (u_charType(c))
604 case U_UPPERCASE_LETTER :
605 return (nTypes & KParseTokens::UNI_UPALPHA) ?
606 (bStart ? ParserFlags::CHAR_WORD : ParserFlags::WORD) :
607 ParserFlags::ILLEGAL;
608 case U_LOWERCASE_LETTER :
609 return (nTypes & KParseTokens::UNI_LOALPHA) ?
610 (bStart ? ParserFlags::CHAR_WORD : ParserFlags::WORD) :
611 ParserFlags::ILLEGAL;
612 case U_TITLECASE_LETTER :
613 return (nTypes & KParseTokens::UNI_TITLE_ALPHA) ?
614 (bStart ? ParserFlags::CHAR_WORD : ParserFlags::WORD) :
615 ParserFlags::ILLEGAL;
616 case U_MODIFIER_LETTER :
617 return (nTypes & KParseTokens::UNI_MODIFIER_LETTER) ?
618 (bStart ? ParserFlags::CHAR_WORD : ParserFlags::WORD) :
619 ParserFlags::ILLEGAL;
620 case U_NON_SPACING_MARK :
621 case U_COMBINING_SPACING_MARK :
622 // Non_Spacing_Mark can't be a leading character,
623 // nor can a spacing combining mark.
624 if (bStart)
625 return ParserFlags::ILLEGAL;
626 SAL_FALLTHROUGH; // treat it as Other_Letter.
627 case U_OTHER_LETTER :
628 return (nTypes & KParseTokens::UNI_OTHER_LETTER) ?
629 (bStart ? ParserFlags::CHAR_WORD : ParserFlags::WORD) :
630 ParserFlags::ILLEGAL;
631 case U_DECIMAL_DIGIT_NUMBER :
632 return ((nTypes & KParseTokens::UNI_DIGIT) ?
633 (bStart ? ParserFlags::CHAR_WORD : ParserFlags::WORD) :
634 ParserFlags::ILLEGAL) | TOKEN_DIGIT_FLAGS;
635 case U_LETTER_NUMBER :
636 return ((nTypes & KParseTokens::UNI_LETTER_NUMBER) ?
637 (bStart ? ParserFlags::CHAR_WORD : ParserFlags::WORD) :
638 ParserFlags::ILLEGAL) | TOKEN_DIGIT_FLAGS;
639 case U_OTHER_NUMBER :
640 return ((nTypes & KParseTokens::UNI_OTHER_NUMBER) ?
641 (bStart ? ParserFlags::CHAR_WORD : ParserFlags::WORD) :
642 ParserFlags::ILLEGAL) | TOKEN_DIGIT_FLAGS;
643 case U_SPACE_SEPARATOR :
644 return ((nTypes & KParseTokens::IGNORE_LEADING_WS) ?
645 ParserFlags::CHAR_DONTCARE : (bStart ? ParserFlags::CHAR_WORD : (ParserFlags::CHAR_DONTCARE | ParserFlags::WORD_SEP | ParserFlags::VALUE_SEP) ));
646 case U_OTHER_PUNCTUATION:
647 // fdo#61754 Lets see (if we not at the start) if this is midletter
648 // punctuation and allow it in a word if it is similarly to
649 // U_NON_SPACING_MARK
650 if (bStart || U_WB_MIDLETTER != u_getIntPropertyValue(c, UCHAR_WORD_BREAK))
651 return ParserFlags::ILLEGAL;
652 else
654 //allowing it to continue the word
655 return (nTypes & KParseTokens::UNI_OTHER_LETTER) ?
656 ParserFlags::WORD : ParserFlags::ILLEGAL;
658 break;
661 return ParserFlags::ILLEGAL;
665 ParserFlags cclass_Unicode::getStartCharsFlags( sal_uInt32 c )
667 if ( pStart )
669 const sal_Unicode* pStr = aStartChars.getStr();
670 const sal_Unicode* p = StrChr( pStr, c );
671 if ( p )
672 return pStart[ p - pStr ];
674 return ParserFlags::ILLEGAL;
678 ParserFlags cclass_Unicode::getContCharsFlags( sal_Unicode c )
680 if ( pCont )
682 const sal_Unicode* pStr = aContChars.getStr();
683 const sal_Unicode* p = StrChr( pStr, c );
684 if ( p )
685 return pCont[ p - pStr ];
687 return ParserFlags::ILLEGAL;
691 void cclass_Unicode::parseText( ParseResult& r, const OUString& rText, sal_Int32 nPos, sal_Int32 nTokenType )
693 assert(r.LeadingWhiteSpace == 0);
694 eState = ssGetChar;
696 //! All the variables below (plus ParseResult) have to be resetted on ssRewindFromValue!
697 OUString aSymbol;
698 bool isFirst(true);
699 sal_Int32 index(nPos); // index of next code point after current
700 sal_Int32 postSymbolIndex(index); // index of code point following last quote
701 sal_uInt32 current((index < rText.getLength()) ? rText.iterateCodePoints(&index) : 0);
702 sal_uInt32 cLast = 0;
703 sal_Int32 nCodePoints(0);
704 int nDecSeps = 0;
705 bool bQuote = false;
706 bool bMightBeWord = true;
707 bool bMightBeWordLast = true;
708 bool bDecSepAltUsed = false;
709 //! All the variables above (plus ParseResult) have to be resetted on ssRewindFromValue!
710 sal_Int32 nextCharIndex(nPos); // == index of nextChar
712 while ((current != 0) && (eState != ssStop))
714 ++nCodePoints;
715 ParserFlags nMask = getFlags(current);
716 if ( nMask & ParserFlags::EXCLUDED )
717 eState = ssBounce;
718 if ( bMightBeWord )
719 { // only relevant for ssGetValue fall back
720 if ( eState == ssGetChar || eState == ssRewindFromValue ||
721 eState == ssIgnoreLeadingInRewind )
722 bMightBeWord = bool(nMask & ParserFlags::CHAR_WORD);
723 else
724 bMightBeWord = bool(nMask & ParserFlags::WORD);
726 sal_Int32 nParseTokensType = getParseTokensType(current, isFirst);
727 isFirst = false;
728 sal_Int32 const nextIndex(nextCharIndex); // == index of char following current
729 nextCharIndex = index; // == index of nextChar
730 sal_uInt32 nextChar((index < rText.getLength()) ? rText.iterateCodePoints(&index) : 0);
731 switch (eState)
733 case ssGetChar :
734 case ssRewindFromValue :
735 case ssIgnoreLeadingInRewind :
737 if ( (nMask & ParserFlags::CHAR_VALUE) && eState != ssRewindFromValue
738 && eState != ssIgnoreLeadingInRewind )
739 { //! must be first, may fall back to ssGetWord via bMightBeWord
740 eState = ssGetValue;
741 if ( nMask & ParserFlags::VALUE_DIGIT )
743 if (128 <= current)
744 r.TokenType = KParseType::UNI_NUMBER;
745 else
746 r.TokenType = KParseType::ASC_NUMBER;
748 else if (current == cDecimalSep || (bDecSepAltUsed = (cDecimalSepAlt && current == cDecimalSep)))
750 if (nextChar)
751 ++nDecSeps;
752 else
753 eState = ssRewindFromValue;
754 // retry for ONE_SINGLE_CHAR or others
757 else if ( nMask & ParserFlags::CHAR_WORD )
759 eState = ssGetWord;
760 r.TokenType = KParseType::IDENTNAME;
762 else if ( nMask & ParserFlags::NAME_SEP )
764 eState = ssGetWordFirstChar;
765 bQuote = true;
766 postSymbolIndex = nextCharIndex;
767 nParseTokensType = 0; // will be taken of first real character
768 r.TokenType = KParseType::SINGLE_QUOTE_NAME;
770 else if ( nMask & ParserFlags::CHAR_STRING )
772 eState = ssGetString;
773 postSymbolIndex = nextCharIndex;
774 nParseTokensType = 0; // will be taken of first real character
775 r.TokenType = KParseType::DOUBLE_QUOTE_STRING;
777 else if ( nMask & ParserFlags::CHAR_DONTCARE )
779 if ( nStartTypes & KParseTokens::IGNORE_LEADING_WS )
781 if (eState == ssRewindFromValue)
782 eState = ssIgnoreLeadingInRewind;
783 r.LeadingWhiteSpace = nextCharIndex - nPos;
784 nCodePoints--; // exclude leading whitespace
785 postSymbolIndex = nextCharIndex;
786 nParseTokensType = 0; // wait until real character
787 bMightBeWord = true;
789 else
790 eState = ssBounce;
792 else if ( nMask & ParserFlags::CHAR_BOOL )
794 eState = ssGetBool;
795 r.TokenType = KParseType::BOOLEAN;
797 else if ( nMask & ParserFlags::CHAR )
798 { //! must be last
799 eState = ssStop;
800 r.TokenType = KParseType::ONE_SINGLE_CHAR;
802 else
803 eState = ssBounce; // not known
805 break;
806 case ssGetValue :
808 if ( nMask & ParserFlags::VALUE_DIGIT )
810 if (128 <= current)
811 r.TokenType = KParseType::UNI_NUMBER;
812 else if ( r.TokenType != KParseType::UNI_NUMBER )
813 r.TokenType = KParseType::ASC_NUMBER;
815 if ( nMask & ParserFlags::VALUE )
817 if ((current == cDecimalSep || (bDecSepAltUsed = (cDecimalSepAlt && current == cDecimalSepAlt))) &&
818 ++nDecSeps > 1)
820 if (nCodePoints == 2)
821 eState = ssRewindFromValue;
822 // consecutive separators
823 else
824 eState = ssStopBack;
826 // else keep it going
828 else if (current == 'E' || current == 'e')
830 ParserFlags nNext = getFlags(nextChar);
831 if ( nNext & ParserFlags::VALUE_EXP )
832 ; // keep it going
833 else if (bMightBeWord && ((nNext & ParserFlags::WORD) || !nextChar))
834 { // might be a numerical name (1.2efg)
835 eState = ssGetWord;
836 r.TokenType = KParseType::IDENTNAME;
838 else
839 eState = ssStopBack;
841 else if ( nMask & ParserFlags::VALUE_SIGN )
843 if ( (cLast == 'E') || (cLast == 'e') )
845 ParserFlags nNext = getFlags(nextChar);
846 if ( nNext & ParserFlags::VALUE_EXP_VALUE )
847 ; // keep it going
848 else if (bMightBeWord && ((nNext & ParserFlags::WORD) || !nextChar))
849 { // might be a numerical name (1.2e+fg)
850 eState = ssGetWord;
851 r.TokenType = KParseType::IDENTNAME;
853 else
854 eState = ssStopBack;
856 else if ( bMightBeWord )
857 { // might be a numerical name (1.2+fg)
858 eState = ssGetWord;
859 r.TokenType = KParseType::IDENTNAME;
861 else
862 eState = ssStopBack;
864 else if ( bMightBeWord && (nMask & ParserFlags::WORD) )
865 { // might be a numerical name (1995.A1)
866 eState = ssGetWord;
867 r.TokenType = KParseType::IDENTNAME;
869 else
870 eState = ssStopBack;
872 break;
873 case ssGetWordFirstChar :
874 eState = ssGetWord;
875 SAL_FALLTHROUGH;
876 case ssGetWord :
878 if ( nMask & ParserFlags::WORD )
879 ; // keep it going
880 else if ( nMask & ParserFlags::NAME_SEP )
882 if ( bQuote )
884 if ( cLast == '\\' )
885 { // escaped
886 aSymbol += rText.copy(postSymbolIndex, nextCharIndex - postSymbolIndex - 2);
887 aSymbol += OUString(&current, 1);
889 else
891 eState = ssStop;
892 aSymbol += rText.copy(postSymbolIndex, nextCharIndex - postSymbolIndex - 1);
894 postSymbolIndex = nextCharIndex;
896 else
897 eState = ssStopBack;
899 else if ( bQuote )
900 ; // keep it going
901 else
902 eState = ssStopBack;
904 break;
905 case ssGetString :
907 if ( nMask & ParserFlags::STRING_SEP )
909 if ( cLast == '\\' )
910 { // escaped
911 aSymbol += rText.copy(postSymbolIndex, nextCharIndex - postSymbolIndex - 2);
912 aSymbol += OUString(&current, 1);
914 else if (current == nextChar &&
915 !(nContTypes & KParseTokens::TWO_DOUBLE_QUOTES_BREAK_STRING) )
916 { // "" => literal " escaped
917 aSymbol += rText.copy(postSymbolIndex, nextCharIndex - postSymbolIndex);
918 nextCharIndex = index;
919 if (index < rText.getLength()) { ++nCodePoints; }
920 nextChar = (index < rText.getLength()) ? rText.iterateCodePoints(&index) : 0;
922 else
924 eState = ssStop;
925 aSymbol += rText.copy(postSymbolIndex, nextCharIndex - postSymbolIndex - 1);
927 postSymbolIndex = nextCharIndex;
930 break;
931 case ssGetBool :
933 if ( nMask & ParserFlags::BOOL )
934 eState = ssStop; // maximum 2: <, >, <>, <=, >=
935 else
936 eState = ssStopBack;
938 break;
939 case ssStopBack :
940 case ssBounce :
941 case ssStop :
942 ; // nothing, no compiler warning
943 break;
945 if ( eState == ssRewindFromValue )
947 r = ParseResult();
948 index = nPos;
949 postSymbolIndex = nPos;
950 nextCharIndex = nPos;
951 aSymbol.clear();
952 current = (index < rText.getLength()) ? rText.iterateCodePoints(&index) : 0;
953 nCodePoints = (nPos < rText.getLength()) ? 1 : 0;
954 isFirst = true;
955 cLast = 0;
956 nDecSeps = 0;
957 bQuote = false;
958 bMightBeWord = true;
959 bMightBeWordLast = true;
960 bDecSepAltUsed = false;
962 else
964 if ( !(r.TokenType & nTokenType) )
966 if ( (r.TokenType & (KParseType::ASC_NUMBER | KParseType::UNI_NUMBER))
967 && (nTokenType & KParseType::IDENTNAME) && bMightBeWord )
968 ; // keep a number that might be a word
969 else if (r.LeadingWhiteSpace == (nextCharIndex - nPos))
970 ; // keep ignored white space
971 else if ( !r.TokenType && eState == ssGetValue && (nMask & ParserFlags::VALUE_SEP) )
972 ; // keep uncertain value
973 else
974 eState = ssBounce;
976 if ( eState == ssBounce )
978 r.TokenType = 0;
979 eState = ssStopBack;
981 if ( eState == ssStopBack )
982 { // put back
983 nextChar = rText.iterateCodePoints(&index, -1);
984 nextCharIndex = nextIndex;
985 --nCodePoints;
986 bMightBeWord = bMightBeWordLast;
987 eState = ssStop;
989 if ( eState != ssStop )
991 if ( !r.StartFlags )
992 r.StartFlags |= nParseTokensType;
993 else
994 r.ContFlags |= nParseTokensType;
996 bMightBeWordLast = bMightBeWord;
997 cLast = current;
998 current = nextChar;
1001 // r.CharLen is the length in characters (not code units) of the parsed
1002 // token not including any leading white space.
1003 r.CharLen = nCodePoints;
1004 r.EndPos = nextCharIndex;
1005 if ( r.TokenType & KParseType::ASC_NUMBER )
1007 r.Value = rtl_math_uStringToDouble(rText.getStr() + nPos + r.LeadingWhiteSpace,
1008 rText.getStr() + r.EndPos, (bDecSepAltUsed ? cDecimalSepAlt : cDecimalSep), cGroupSep, nullptr, nullptr);
1009 if ( bMightBeWord )
1010 r.TokenType |= KParseType::IDENTNAME;
1012 else if ( r.TokenType & KParseType::UNI_NUMBER )
1014 if ( !xNatNumSup.is() )
1016 if ( m_xContext.is() )
1018 xNatNumSup = NativeNumberSupplier::create( m_xContext );
1021 OUString aTmp(rText.getStr() + nPos + r.LeadingWhiteSpace,
1022 r.EndPos - nPos - r.LeadingWhiteSpace);
1023 // transliterate to ASCII
1024 aTmp = xNatNumSup->getNativeNumberString( aTmp, aParserLocale,
1025 NativeNumberMode::NATNUM0 );
1026 r.Value = ::rtl::math::stringToDouble( aTmp, cDecimalSep, cGroupSep );
1027 if ( bMightBeWord )
1028 r.TokenType |= KParseType::IDENTNAME;
1030 else if ( r.TokenType & (KParseType::SINGLE_QUOTE_NAME | KParseType::DOUBLE_QUOTE_STRING) )
1032 if (postSymbolIndex < nextCharIndex)
1033 { //! open quote
1034 aSymbol += rText.copy(postSymbolIndex, nextCharIndex - postSymbolIndex - 1);
1035 r.TokenType |= KParseType::MISSING_QUOTE;
1037 r.DequotedNameOrString = aSymbol;
1043 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */