1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
3 * This file is part of the LibreOffice project.
5 * This Source Code Form is subject to the terms of the Mozilla Public
6 * License, v. 2.0. If a copy of the MPL was not distributed with this
7 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
9 * This file incorporates work covered by the following license notice:
11 * Licensed to the Apache Software Foundation (ASF) under one or more
12 * contributor license agreements. See the NOTICE file distributed
13 * with this work for additional information regarding copyright
14 * ownership. The ASF licenses this file to you under the Apache
15 * License, Version 2.0 (the "License"); you may not use this file
16 * except in compliance with the License. You may obtain a copy of
17 * the License at http://www.apache.org/licenses/LICENSE-2.0 .
21 #include <cclass_unicode.hxx>
22 #include <unicode/uchar.h>
23 #include <rtl/character.hxx>
24 #include <rtl/math.hxx>
25 #include <rtl/ustring.hxx>
26 #include <com/sun/star/i18n/KParseTokens.hpp>
27 #include <com/sun/star/i18n/KParseType.hpp>
28 #include <com/sun/star/i18n/LocaleData2.hpp>
29 #include <com/sun/star/i18n/NativeNumberMode.hpp>
30 #include <com/sun/star/i18n/NativeNumberSupplier.hpp>
33 #include <string_view>
35 using namespace ::com::sun::star::uno
;
36 using namespace ::com::sun::star::i18n
;
37 using namespace ::com::sun::star::lang
;
39 #define TOKEN_DIGIT_FLAGS (ParserFlags::CHAR_VALUE | ParserFlags::VALUE | ParserFlags::VALUE_EXP | ParserFlags::VALUE_EXP_VALUE | ParserFlags::VALUE_DIGIT)
43 // Default identifier/name specification is [A-Za-z_][A-Za-z0-9_]*
45 const sal_uInt8
cclass_Unicode::nDefCnt
= 128;
46 const ParserFlags
cclass_Unicode::pDefaultParserTable
[ nDefCnt
] =
48 // (...) == Calc formula compiler specific, commented out and modified
50 /* \0 */ ParserFlags::EXCLUDED
,
59 /* 9 \t */ ParserFlags::CHAR_DONTCARE
| ParserFlags::WORD_SEP
| ParserFlags::VALUE_SEP
, // (ParserFlags::ILLEGAL)
61 /* 11 \v */ ParserFlags::CHAR_DONTCARE
| ParserFlags::WORD_SEP
| ParserFlags::VALUE_SEP
, // (ParserFlags::ILLEGAL)
82 /* 32 */ ParserFlags::CHAR_DONTCARE
| ParserFlags::WORD_SEP
| ParserFlags::VALUE_SEP
,
83 /* 33 ! */ ParserFlags::CHAR
| ParserFlags::WORD_SEP
| ParserFlags::VALUE_SEP
,
84 /* 34 " */ ParserFlags::CHAR_STRING
| ParserFlags::STRING_SEP
,
85 /* 35 # */ ParserFlags::CHAR
| ParserFlags::WORD_SEP
| ParserFlags::VALUE_SEP
, // (ParserFlags::WORD_SEP)
86 /* 36 $ */ ParserFlags::CHAR
| ParserFlags::WORD_SEP
| ParserFlags::VALUE_SEP
, // (ParserFlags::CHAR_WORD | ParserFlags::WORD)
87 /* 37 % */ ParserFlags::CHAR
| ParserFlags::WORD_SEP
| ParserFlags::VALUE_SEP
, // (ParserFlags::VALUE)
88 /* 38 & */ ParserFlags::CHAR
| ParserFlags::WORD_SEP
| ParserFlags::VALUE_SEP
,
89 /* 39 ' */ ParserFlags::NAME_SEP
,
90 /* 40 ( */ ParserFlags::CHAR
| ParserFlags::WORD_SEP
| ParserFlags::VALUE_SEP
,
91 /* 41 ) */ ParserFlags::CHAR
| ParserFlags::WORD_SEP
| ParserFlags::VALUE_SEP
,
92 /* 42 * */ ParserFlags::CHAR
| ParserFlags::WORD_SEP
| ParserFlags::VALUE_SEP
,
93 /* 43 + */ ParserFlags::CHAR
| ParserFlags::WORD_SEP
| ParserFlags::VALUE_SEP
| ParserFlags::VALUE_EXP
| ParserFlags::VALUE_SIGN
,
94 /* 44 , */ ParserFlags::CHAR
| ParserFlags::WORD_SEP
| ParserFlags::VALUE_SEP
, // (ParserFlags::CHAR_VALUE | ParserFlags::VALUE)
95 /* 45 - */ ParserFlags::CHAR
| ParserFlags::WORD_SEP
| ParserFlags::VALUE_SEP
| ParserFlags::VALUE_EXP
| ParserFlags::VALUE_SIGN
,
96 /* 46 . */ ParserFlags::CHAR
| ParserFlags::WORD_SEP
| ParserFlags::VALUE_SEP
, // (ParserFlags::WORD | ParserFlags::CHAR_VALUE | ParserFlags::VALUE)
97 /* 47 / */ ParserFlags::CHAR
| ParserFlags::WORD_SEP
| ParserFlags::VALUE_SEP
,
98 //for ( i = 48; i < 58; i++ )
99 /* 48 0 */ TOKEN_DIGIT_FLAGS
| ParserFlags::WORD
,
100 /* 49 1 */ TOKEN_DIGIT_FLAGS
| ParserFlags::WORD
,
101 /* 50 2 */ TOKEN_DIGIT_FLAGS
| ParserFlags::WORD
,
102 /* 51 3 */ TOKEN_DIGIT_FLAGS
| ParserFlags::WORD
,
103 /* 52 4 */ TOKEN_DIGIT_FLAGS
| ParserFlags::WORD
,
104 /* 53 5 */ TOKEN_DIGIT_FLAGS
| ParserFlags::WORD
,
105 /* 54 6 */ TOKEN_DIGIT_FLAGS
| ParserFlags::WORD
,
106 /* 55 7 */ TOKEN_DIGIT_FLAGS
| ParserFlags::WORD
,
107 /* 56 8 */ TOKEN_DIGIT_FLAGS
| ParserFlags::WORD
,
108 /* 57 9 */ TOKEN_DIGIT_FLAGS
| ParserFlags::WORD
,
109 /* 58 : */ ParserFlags::CHAR
| ParserFlags::WORD_SEP
| ParserFlags::VALUE_SEP
, // (ParserFlags::WORD)
110 /* 59 ; */ ParserFlags::CHAR
| ParserFlags::WORD_SEP
| ParserFlags::VALUE_SEP
,
111 /* 60 < */ ParserFlags::CHAR_BOOL
| ParserFlags::WORD_SEP
| ParserFlags::VALUE_SEP
,
112 /* 61 = */ ParserFlags::CHAR
| ParserFlags::BOOL
| ParserFlags::WORD_SEP
| ParserFlags::VALUE_SEP
,
113 /* 62 > */ ParserFlags::CHAR_BOOL
| ParserFlags::BOOL
| ParserFlags::WORD_SEP
| ParserFlags::VALUE_SEP
,
114 /* 63 ? */ ParserFlags::CHAR
| ParserFlags::WORD_SEP
| ParserFlags::VALUE_SEP
, // (ParserFlags::CHAR_WORD | ParserFlags::WORD)
115 /* 64 @ */ ParserFlags::CHAR
| ParserFlags::WORD_SEP
| ParserFlags::VALUE_SEP
, // (ParserFlags::ILLEGAL // UNUSED)
116 //for ( i = 65; i < 91; i++ )
117 /* 65 A */ ParserFlags::CHAR_WORD
| ParserFlags::WORD
,
118 /* 66 B */ ParserFlags::CHAR_WORD
| ParserFlags::WORD
,
119 /* 67 C */ ParserFlags::CHAR_WORD
| ParserFlags::WORD
,
120 /* 68 D */ ParserFlags::CHAR_WORD
| ParserFlags::WORD
,
121 /* 69 E */ ParserFlags::CHAR_WORD
| ParserFlags::WORD
,
122 /* 70 F */ ParserFlags::CHAR_WORD
| ParserFlags::WORD
,
123 /* 71 G */ ParserFlags::CHAR_WORD
| ParserFlags::WORD
,
124 /* 72 H */ ParserFlags::CHAR_WORD
| ParserFlags::WORD
,
125 /* 73 I */ ParserFlags::CHAR_WORD
| ParserFlags::WORD
,
126 /* 74 J */ ParserFlags::CHAR_WORD
| ParserFlags::WORD
,
127 /* 75 K */ ParserFlags::CHAR_WORD
| ParserFlags::WORD
,
128 /* 76 L */ ParserFlags::CHAR_WORD
| ParserFlags::WORD
,
129 /* 77 M */ ParserFlags::CHAR_WORD
| ParserFlags::WORD
,
130 /* 78 N */ ParserFlags::CHAR_WORD
| ParserFlags::WORD
,
131 /* 79 O */ ParserFlags::CHAR_WORD
| ParserFlags::WORD
,
132 /* 80 P */ ParserFlags::CHAR_WORD
| ParserFlags::WORD
,
133 /* 81 Q */ ParserFlags::CHAR_WORD
| ParserFlags::WORD
,
134 /* 82 R */ ParserFlags::CHAR_WORD
| ParserFlags::WORD
,
135 /* 83 S */ ParserFlags::CHAR_WORD
| ParserFlags::WORD
,
136 /* 84 T */ ParserFlags::CHAR_WORD
| ParserFlags::WORD
,
137 /* 85 U */ ParserFlags::CHAR_WORD
| ParserFlags::WORD
,
138 /* 86 V */ ParserFlags::CHAR_WORD
| ParserFlags::WORD
,
139 /* 87 W */ ParserFlags::CHAR_WORD
| ParserFlags::WORD
,
140 /* 88 X */ ParserFlags::CHAR_WORD
| ParserFlags::WORD
,
141 /* 89 Y */ ParserFlags::CHAR_WORD
| ParserFlags::WORD
,
142 /* 90 Z */ ParserFlags::CHAR_WORD
| ParserFlags::WORD
,
143 /* 91 [ */ ParserFlags::CHAR
| ParserFlags::WORD_SEP
| ParserFlags::VALUE_SEP
, // (ParserFlags::ILLEGAL // UNUSED)
144 /* 92 \ */ ParserFlags::CHAR
| ParserFlags::WORD_SEP
| ParserFlags::VALUE_SEP
, // (ParserFlags::ILLEGAL // UNUSED)
145 /* 93 ] */ ParserFlags::CHAR
| ParserFlags::WORD_SEP
| ParserFlags::VALUE_SEP
, // (ParserFlags::ILLEGAL // UNUSED)
146 /* 94 ^ */ ParserFlags::CHAR
| ParserFlags::WORD_SEP
| ParserFlags::VALUE_SEP
,
147 /* 95 _ */ ParserFlags::CHAR_WORD
| ParserFlags::WORD
,
148 /* 96 ` */ ParserFlags::CHAR
| ParserFlags::WORD_SEP
| ParserFlags::VALUE_SEP
, // (ParserFlags::ILLEGAL // UNUSED)
149 //for ( i = 97; i < 123; i++ )
150 /* 97 a */ ParserFlags::CHAR_WORD
| ParserFlags::WORD
,
151 /* 98 b */ ParserFlags::CHAR_WORD
| ParserFlags::WORD
,
152 /* 99 c */ ParserFlags::CHAR_WORD
| ParserFlags::WORD
,
153 /* 100 d */ ParserFlags::CHAR_WORD
| ParserFlags::WORD
,
154 /* 101 e */ ParserFlags::CHAR_WORD
| ParserFlags::WORD
,
155 /* 102 f */ ParserFlags::CHAR_WORD
| ParserFlags::WORD
,
156 /* 103 g */ ParserFlags::CHAR_WORD
| ParserFlags::WORD
,
157 /* 104 h */ ParserFlags::CHAR_WORD
| ParserFlags::WORD
,
158 /* 105 i */ ParserFlags::CHAR_WORD
| ParserFlags::WORD
,
159 /* 106 j */ ParserFlags::CHAR_WORD
| ParserFlags::WORD
,
160 /* 107 k */ ParserFlags::CHAR_WORD
| ParserFlags::WORD
,
161 /* 108 l */ ParserFlags::CHAR_WORD
| ParserFlags::WORD
,
162 /* 109 m */ ParserFlags::CHAR_WORD
| ParserFlags::WORD
,
163 /* 110 n */ ParserFlags::CHAR_WORD
| ParserFlags::WORD
,
164 /* 111 o */ ParserFlags::CHAR_WORD
| ParserFlags::WORD
,
165 /* 112 p */ ParserFlags::CHAR_WORD
| ParserFlags::WORD
,
166 /* 113 q */ ParserFlags::CHAR_WORD
| ParserFlags::WORD
,
167 /* 114 r */ ParserFlags::CHAR_WORD
| ParserFlags::WORD
,
168 /* 115 s */ ParserFlags::CHAR_WORD
| ParserFlags::WORD
,
169 /* 116 t */ ParserFlags::CHAR_WORD
| ParserFlags::WORD
,
170 /* 117 u */ ParserFlags::CHAR_WORD
| ParserFlags::WORD
,
171 /* 118 v */ ParserFlags::CHAR_WORD
| ParserFlags::WORD
,
172 /* 119 w */ ParserFlags::CHAR_WORD
| ParserFlags::WORD
,
173 /* 120 x */ ParserFlags::CHAR_WORD
| ParserFlags::WORD
,
174 /* 121 y */ ParserFlags::CHAR_WORD
| ParserFlags::WORD
,
175 /* 122 z */ ParserFlags::CHAR_WORD
| ParserFlags::WORD
,
176 /* 123 { */ ParserFlags::CHAR
| ParserFlags::WORD_SEP
| ParserFlags::VALUE_SEP
, // (ParserFlags::ILLEGAL // UNUSED)
177 /* 124 | */ ParserFlags::CHAR
| ParserFlags::WORD_SEP
| ParserFlags::VALUE_SEP
, // (ParserFlags::ILLEGAL // UNUSED)
178 /* 125 } */ ParserFlags::CHAR
| ParserFlags::WORD_SEP
| ParserFlags::VALUE_SEP
, // (ParserFlags::ILLEGAL // UNUSED)
179 /* 126 ~ */ ParserFlags::CHAR
| ParserFlags::WORD_SEP
| ParserFlags::VALUE_SEP
, // (ParserFlags::ILLEGAL // UNUSED)
180 /* 127 */ ParserFlags::CHAR
| ParserFlags::WORD_SEP
| ParserFlags::VALUE_SEP
// (ParserFlags::ILLEGAL // UNUSED)
184 const sal_Int32
cclass_Unicode::pParseTokensType
[ nDefCnt
] =
186 /* \0 */ KParseTokens::ASC_OTHER
,
187 KParseTokens::ASC_CONTROL
,
188 KParseTokens::ASC_CONTROL
,
189 KParseTokens::ASC_CONTROL
,
190 KParseTokens::ASC_CONTROL
,
191 KParseTokens::ASC_CONTROL
,
192 KParseTokens::ASC_CONTROL
,
193 KParseTokens::ASC_CONTROL
,
194 KParseTokens::ASC_CONTROL
,
195 /* 9 \t */ KParseTokens::ASC_CONTROL
,
196 KParseTokens::ASC_CONTROL
,
197 /* 11 \v */ KParseTokens::ASC_CONTROL
,
198 KParseTokens::ASC_CONTROL
,
199 KParseTokens::ASC_CONTROL
,
200 KParseTokens::ASC_CONTROL
,
201 KParseTokens::ASC_CONTROL
,
202 KParseTokens::ASC_CONTROL
,
203 KParseTokens::ASC_CONTROL
,
204 KParseTokens::ASC_CONTROL
,
205 KParseTokens::ASC_CONTROL
,
206 KParseTokens::ASC_CONTROL
,
207 KParseTokens::ASC_CONTROL
,
208 KParseTokens::ASC_CONTROL
,
209 KParseTokens::ASC_CONTROL
,
210 KParseTokens::ASC_CONTROL
,
211 KParseTokens::ASC_CONTROL
,
212 KParseTokens::ASC_CONTROL
,
213 KParseTokens::ASC_CONTROL
,
214 KParseTokens::ASC_CONTROL
,
215 KParseTokens::ASC_CONTROL
,
216 KParseTokens::ASC_CONTROL
,
217 KParseTokens::ASC_CONTROL
,
218 /* 32 */ KParseTokens::ASC_OTHER
,
219 /* 33 ! */ KParseTokens::ASC_OTHER
,
220 /* 34 " */ KParseTokens::ASC_OTHER
,
221 /* 35 # */ KParseTokens::ASC_OTHER
,
222 /* 36 $ */ KParseTokens::ASC_DOLLAR
,
223 /* 37 % */ KParseTokens::ASC_OTHER
,
224 /* 38 & */ KParseTokens::ASC_OTHER
,
225 /* 39 ' */ KParseTokens::ASC_OTHER
,
226 /* 40 ( */ KParseTokens::ASC_OTHER
,
227 /* 41 ) */ KParseTokens::ASC_OTHER
,
228 /* 42 * */ KParseTokens::ASC_OTHER
,
229 /* 43 + */ KParseTokens::ASC_OTHER
,
230 /* 44 , */ KParseTokens::ASC_OTHER
,
231 /* 45 - */ KParseTokens::ASC_OTHER
,
232 /* 46 . */ KParseTokens::ASC_DOT
,
233 /* 47 / */ KParseTokens::ASC_OTHER
,
234 //for ( i = 48; i < 58; i++ )
235 /* 48 0 */ KParseTokens::ASC_DIGIT
,
236 /* 49 1 */ KParseTokens::ASC_DIGIT
,
237 /* 50 2 */ KParseTokens::ASC_DIGIT
,
238 /* 51 3 */ KParseTokens::ASC_DIGIT
,
239 /* 52 4 */ KParseTokens::ASC_DIGIT
,
240 /* 53 5 */ KParseTokens::ASC_DIGIT
,
241 /* 54 6 */ KParseTokens::ASC_DIGIT
,
242 /* 55 7 */ KParseTokens::ASC_DIGIT
,
243 /* 56 8 */ KParseTokens::ASC_DIGIT
,
244 /* 57 9 */ KParseTokens::ASC_DIGIT
,
245 /* 58 : */ KParseTokens::ASC_COLON
,
246 /* 59 ; */ KParseTokens::ASC_OTHER
,
247 /* 60 < */ KParseTokens::ASC_OTHER
,
248 /* 61 = */ KParseTokens::ASC_OTHER
,
249 /* 62 > */ KParseTokens::ASC_OTHER
,
250 /* 63 ? */ KParseTokens::ASC_OTHER
,
251 /* 64 @ */ KParseTokens::ASC_OTHER
,
252 //for ( i = 65; i < 91; i++ )
253 /* 65 A */ KParseTokens::ASC_UPALPHA
,
254 /* 66 B */ KParseTokens::ASC_UPALPHA
,
255 /* 67 C */ KParseTokens::ASC_UPALPHA
,
256 /* 68 D */ KParseTokens::ASC_UPALPHA
,
257 /* 69 E */ KParseTokens::ASC_UPALPHA
,
258 /* 70 F */ KParseTokens::ASC_UPALPHA
,
259 /* 71 G */ KParseTokens::ASC_UPALPHA
,
260 /* 72 H */ KParseTokens::ASC_UPALPHA
,
261 /* 73 I */ KParseTokens::ASC_UPALPHA
,
262 /* 74 J */ KParseTokens::ASC_UPALPHA
,
263 /* 75 K */ KParseTokens::ASC_UPALPHA
,
264 /* 76 L */ KParseTokens::ASC_UPALPHA
,
265 /* 77 M */ KParseTokens::ASC_UPALPHA
,
266 /* 78 N */ KParseTokens::ASC_UPALPHA
,
267 /* 79 O */ KParseTokens::ASC_UPALPHA
,
268 /* 80 P */ KParseTokens::ASC_UPALPHA
,
269 /* 81 Q */ KParseTokens::ASC_UPALPHA
,
270 /* 82 R */ KParseTokens::ASC_UPALPHA
,
271 /* 83 S */ KParseTokens::ASC_UPALPHA
,
272 /* 84 T */ KParseTokens::ASC_UPALPHA
,
273 /* 85 U */ KParseTokens::ASC_UPALPHA
,
274 /* 86 V */ KParseTokens::ASC_UPALPHA
,
275 /* 87 W */ KParseTokens::ASC_UPALPHA
,
276 /* 88 X */ KParseTokens::ASC_UPALPHA
,
277 /* 89 Y */ KParseTokens::ASC_UPALPHA
,
278 /* 90 Z */ KParseTokens::ASC_UPALPHA
,
279 /* 91 [ */ KParseTokens::ASC_OTHER
,
280 /* 92 \ */ KParseTokens::ASC_OTHER
,
281 /* 93 ] */ KParseTokens::ASC_OTHER
,
282 /* 94 ^ */ KParseTokens::ASC_OTHER
,
283 /* 95 _ */ KParseTokens::ASC_UNDERSCORE
,
284 /* 96 ` */ KParseTokens::ASC_OTHER
,
285 //for ( i = 97; i < 123; i++ )
286 /* 97 a */ KParseTokens::ASC_LOALPHA
,
287 /* 98 b */ KParseTokens::ASC_LOALPHA
,
288 /* 99 c */ KParseTokens::ASC_LOALPHA
,
289 /* 100 d */ KParseTokens::ASC_LOALPHA
,
290 /* 101 e */ KParseTokens::ASC_LOALPHA
,
291 /* 102 f */ KParseTokens::ASC_LOALPHA
,
292 /* 103 g */ KParseTokens::ASC_LOALPHA
,
293 /* 104 h */ KParseTokens::ASC_LOALPHA
,
294 /* 105 i */ KParseTokens::ASC_LOALPHA
,
295 /* 106 j */ KParseTokens::ASC_LOALPHA
,
296 /* 107 k */ KParseTokens::ASC_LOALPHA
,
297 /* 108 l */ KParseTokens::ASC_LOALPHA
,
298 /* 109 m */ KParseTokens::ASC_LOALPHA
,
299 /* 110 n */ KParseTokens::ASC_LOALPHA
,
300 /* 111 o */ KParseTokens::ASC_LOALPHA
,
301 /* 112 p */ KParseTokens::ASC_LOALPHA
,
302 /* 113 q */ KParseTokens::ASC_LOALPHA
,
303 /* 114 r */ KParseTokens::ASC_LOALPHA
,
304 /* 115 s */ KParseTokens::ASC_LOALPHA
,
305 /* 116 t */ KParseTokens::ASC_LOALPHA
,
306 /* 117 u */ KParseTokens::ASC_LOALPHA
,
307 /* 118 v */ KParseTokens::ASC_LOALPHA
,
308 /* 119 w */ KParseTokens::ASC_LOALPHA
,
309 /* 120 x */ KParseTokens::ASC_LOALPHA
,
310 /* 121 y */ KParseTokens::ASC_LOALPHA
,
311 /* 122 z */ KParseTokens::ASC_LOALPHA
,
312 /* 123 { */ KParseTokens::ASC_OTHER
,
313 /* 124 | */ KParseTokens::ASC_OTHER
,
314 /* 125 } */ KParseTokens::ASC_OTHER
,
315 /* 126 ~ */ KParseTokens::ASC_OTHER
,
316 /* 127 */ KParseTokens::ASC_OTHER
321 const sal_Unicode
* cclass_Unicode::StrChr( const sal_Unicode
* pStr
, sal_uInt32 c
)
326 auto const n
= rtl::splitSurrogates(c
, cs
);
329 if ( *pStr
== cs
[0] && (n
== 1 || pStr
[1] == cs
[1]) )
337 sal_Int32
cclass_Unicode::getParseTokensType(sal_uInt32
const c
, bool const isFirst
)
340 return pParseTokensType
[ sal_uInt8(c
) ];
344 //! all KParseTokens::UNI_... must be matched
345 switch (u_charType(c
))
347 case U_UPPERCASE_LETTER
:
348 return KParseTokens::UNI_UPALPHA
;
349 case U_LOWERCASE_LETTER
:
350 return KParseTokens::UNI_LOALPHA
;
351 case U_TITLECASE_LETTER
:
352 return KParseTokens::UNI_TITLE_ALPHA
;
353 case U_MODIFIER_LETTER
:
354 return KParseTokens::UNI_MODIFIER_LETTER
;
355 case U_OTHER_LETTER
:
356 // Non_Spacing_Mark could not be as leading character
358 [[fallthrough
]]; // treat it as Other_Letter.
359 case U_NON_SPACING_MARK
:
360 return KParseTokens::UNI_OTHER_LETTER
;
361 case U_DECIMAL_DIGIT_NUMBER
:
362 return KParseTokens::UNI_DIGIT
;
363 case U_LETTER_NUMBER
:
364 return KParseTokens::UNI_LETTER_NUMBER
;
365 case U_OTHER_NUMBER
:
366 return KParseTokens::UNI_OTHER_NUMBER
;
369 return KParseTokens::UNI_OTHER
;
373 void cclass_Unicode::setupInternational( const Locale
& rLocale
)
375 bool bChanged
= (aParserLocale
.Language
!= rLocale
.Language
376 || aParserLocale
.Country
!= rLocale
.Country
377 || aParserLocale
.Variant
!= rLocale
.Variant
);
380 aParserLocale
.Language
= rLocale
.Language
;
381 aParserLocale
.Country
= rLocale
.Country
;
382 aParserLocale
.Variant
= rLocale
.Variant
;
384 if ( !mxLocaleData
.is() )
386 mxLocaleData
.set( LocaleData2::create(m_xContext
) );
391 void cclass_Unicode::setupParserTable( const Locale
& rLocale
, sal_Int32 startCharTokenType
,
392 const OUString
& userDefinedCharactersStart
, sal_Int32 contCharTokenType
,
393 const OUString
& userDefinedCharactersCont
)
395 bool bIntlEqual
= (rLocale
.Language
== aParserLocale
.Language
&&
396 rLocale
.Country
== aParserLocale
.Country
&&
397 rLocale
.Variant
== aParserLocale
.Variant
);
398 if ( !pTable
|| !bIntlEqual
||
399 startCharTokenType
!= nStartTypes
||
400 contCharTokenType
!= nContTypes
||
401 userDefinedCharactersStart
!= aStartChars
||
402 userDefinedCharactersCont
!= aContChars
)
403 initParserTable( rLocale
, startCharTokenType
, userDefinedCharactersStart
,
404 contCharTokenType
, userDefinedCharactersCont
);
408 void cclass_Unicode::initParserTable( const Locale
& rLocale
, sal_Int32 startCharTokenType
,
409 const OUString
& userDefinedCharactersStart
, sal_Int32 contCharTokenType
,
410 const OUString
& userDefinedCharactersCont
)
413 setupInternational( rLocale
);
414 // Memory of pTable is reused.
416 pTable
.reset(new ParserFlags
[nDefCnt
]);
417 memcpy( pTable
.get(), pDefaultParserTable
, sizeof(ParserFlags
) * nDefCnt
);
418 // Start and cont tables only need reallocation if different length.
419 if ( pStart
&& userDefinedCharactersStart
.getLength() != aStartChars
.getLength() )
423 if ( pCont
&& userDefinedCharactersCont
.getLength() != aContChars
.getLength() )
427 nStartTypes
= startCharTokenType
;
428 nContTypes
= contCharTokenType
;
429 aStartChars
= userDefinedCharactersStart
;
430 aContChars
= userDefinedCharactersCont
;
433 if( mxLocaleData
.is() )
435 LocaleDataItem2 aItem
=
436 mxLocaleData
->getLocaleItem2( aParserLocale
);
437 //!TODO: theoretically separators may be a string, adjustment would have to be
438 //! done here and in parsing and in ::rtl::math::stringToDouble()
439 cGroupSep
= aItem
.thousandSeparator
[0];
440 cDecimalSep
= aItem
.decimalSeparator
[0];
441 cDecimalSepAlt
= aItem
.decimalSeparatorAlternative
.toChar();
444 if (nContTypes
& KParseTokens::GROUP_SEPARATOR_IN_NUMBER
)
446 if ( cGroupSep
< nDefCnt
)
447 pTable
[cGroupSep
] |= ParserFlags::VALUE
;
453 if ( cDecimalSep
< nDefCnt
)
454 pTable
[cDecimalSep
] |= ParserFlags::CHAR_VALUE
| ParserFlags::VALUE
;
455 if ( cDecimalSepAlt
&& cDecimalSepAlt
< nDefCnt
)
456 pTable
[cDecimalSepAlt
] |= ParserFlags::CHAR_VALUE
| ParserFlags::VALUE
;
458 // Modify characters according to KParseTokens definitions.
460 using namespace KParseTokens
;
463 if ( !(nStartTypes
& ASC_UPALPHA
) )
464 for ( i
= 65; i
< 91; i
++ )
465 pTable
[i
] &= ~ParserFlags::CHAR_WORD
; // not allowed as start character
466 if ( !(nContTypes
& ASC_UPALPHA
) )
467 for ( i
= 65; i
< 91; i
++ )
468 pTable
[i
] &= ~ParserFlags::WORD
; // not allowed as cont character
470 if ( !(nStartTypes
& ASC_LOALPHA
) )
471 for ( i
= 97; i
< 123; i
++ )
472 pTable
[i
] &= ~ParserFlags::CHAR_WORD
; // not allowed as start character
473 if ( !(nContTypes
& ASC_LOALPHA
) )
474 for ( i
= 97; i
< 123; i
++ )
475 pTable
[i
] &= ~ParserFlags::WORD
; // not allowed as cont character
477 if ( nStartTypes
& ASC_DIGIT
)
478 for ( i
= 48; i
< 58; i
++ )
479 pTable
[i
] |= ParserFlags::CHAR_WORD
; // allowed as start character
480 if ( !(nContTypes
& ASC_DIGIT
) )
481 for ( i
= 48; i
< 58; i
++ )
482 pTable
[i
] &= ~ParserFlags::WORD
; // not allowed as cont character
484 if ( !(nStartTypes
& ASC_UNDERSCORE
) )
485 pTable
[95] &= ~ParserFlags::CHAR_WORD
; // not allowed as start character
486 if ( !(nContTypes
& ASC_UNDERSCORE
) )
487 pTable
[95] &= ~ParserFlags::WORD
; // not allowed as cont character
489 if ( nStartTypes
& ASC_DOLLAR
)
490 pTable
[36] |= ParserFlags::CHAR_WORD
; // allowed as start character
491 if ( nContTypes
& ASC_DOLLAR
)
492 pTable
[36] |= ParserFlags::WORD
; // allowed as cont character
494 if ( nStartTypes
& ASC_DOT
)
495 pTable
[46] |= ParserFlags::CHAR_WORD
; // allowed as start character
496 if ( nContTypes
& ASC_DOT
)
497 pTable
[46] |= ParserFlags::WORD
; // allowed as cont character
499 if ( nStartTypes
& ASC_COLON
)
500 pTable
[58] |= ParserFlags::CHAR_WORD
; // allowed as start character
501 if ( nContTypes
& ASC_COLON
)
502 pTable
[58] |= ParserFlags::WORD
; // allowed as cont character
504 if ( nStartTypes
& ASC_CONTROL
)
505 for ( i
= 1; i
< 32; i
++ )
506 pTable
[i
] |= ParserFlags::CHAR_WORD
; // allowed as start character
507 if ( nContTypes
& ASC_CONTROL
)
508 for ( i
= 1; i
< 32; i
++ )
509 pTable
[i
] |= ParserFlags::WORD
; // allowed as cont character
511 if ( nStartTypes
& ASC_ANY_BUT_CONTROL
)
512 for ( i
= 32; i
< nDefCnt
; i
++ )
513 pTable
[i
] |= ParserFlags::CHAR_WORD
; // allowed as start character
514 if ( nContTypes
& ASC_ANY_BUT_CONTROL
)
515 for ( i
= 32; i
< nDefCnt
; i
++ )
516 pTable
[i
] |= ParserFlags::WORD
; // allowed as cont character
520 // Merge in (positively override with) user defined characters.
522 sal_Int32 nLen
= aStartChars
.getLength();
526 pStart
.reset(new ParserFlags
[ nLen
]);
527 const sal_Unicode
* p
= aStartChars
.getStr();
528 for ( sal_Int32 j
=0; j
<nLen
; j
++, p
++ )
530 pStart
[j
] = ParserFlags::CHAR_WORD
;
532 pTable
[*p
] |= ParserFlags::CHAR_WORD
;
536 nLen
= aContChars
.getLength();
540 pCont
.reset(new ParserFlags
[ nLen
]);
541 const sal_Unicode
* p
= aContChars
.getStr();
542 for ( sal_Int32 j
=0; j
<nLen
; j
++ )
544 pCont
[j
] = ParserFlags::WORD
;
546 pTable
[*p
] |= ParserFlags::WORD
;
552 void cclass_Unicode::destroyParserTable()
560 ParserFlags
cclass_Unicode::getFlags(sal_uInt32
const c
, const cclass_Unicode::ScanState eState
)
564 nMask
= pTable
[ sal_uInt8(c
) ];
566 nMask
= getFlagsExtended(c
, eState
);
570 case ssRewindFromValue
:
571 case ssIgnoreLeadingInRewind
:
572 case ssGetWordFirstChar
:
573 if ( !(nMask
& ParserFlags::CHAR_WORD
) )
575 nMask
|= getStartCharsFlags( c
);
576 if ( nMask
& ParserFlags::CHAR_WORD
)
577 nMask
&= ~ParserFlags::EXCLUDED
;
582 if ( !(nMask
& ParserFlags::WORD
) )
584 nMask
|= getContCharsFlags( c
);
585 if ( nMask
& ParserFlags::WORD
)
586 nMask
&= ~ParserFlags::EXCLUDED
;
590 ; // other cases aren't needed, no compiler warning
596 ParserFlags
cclass_Unicode::getFlagsExtended(sal_uInt32
const c
, const cclass_Unicode::ScanState eState
) const
598 if ( c
== cGroupSep
)
599 return ParserFlags::VALUE
;
600 else if ( c
== cDecimalSep
)
601 return ParserFlags::CHAR_VALUE
| ParserFlags::VALUE
;
602 else if ( cDecimalSepAlt
&& c
== cDecimalSepAlt
)
603 return ParserFlags::CHAR_VALUE
| ParserFlags::VALUE
;
604 bool bStart
= (eState
== ssGetChar
|| eState
== ssGetWordFirstChar
||
605 eState
== ssRewindFromValue
|| eState
== ssIgnoreLeadingInRewind
);
606 sal_Int32 nTypes
= (bStart
? nStartTypes
: nContTypes
);
608 //! all KParseTokens::UNI_... must be matched
609 switch (u_charType(c
))
611 case U_UPPERCASE_LETTER
:
612 return (nTypes
& KParseTokens::UNI_UPALPHA
) ?
613 (bStart
? ParserFlags::CHAR_WORD
: ParserFlags::WORD
) :
614 ParserFlags::ILLEGAL
;
615 case U_LOWERCASE_LETTER
:
616 return (nTypes
& KParseTokens::UNI_LOALPHA
) ?
617 (bStart
? ParserFlags::CHAR_WORD
: ParserFlags::WORD
) :
618 ParserFlags::ILLEGAL
;
619 case U_TITLECASE_LETTER
:
620 return (nTypes
& KParseTokens::UNI_TITLE_ALPHA
) ?
621 (bStart
? ParserFlags::CHAR_WORD
: ParserFlags::WORD
) :
622 ParserFlags::ILLEGAL
;
623 case U_MODIFIER_LETTER
:
624 return (nTypes
& KParseTokens::UNI_MODIFIER_LETTER
) ?
625 (bStart
? ParserFlags::CHAR_WORD
: ParserFlags::WORD
) :
626 ParserFlags::ILLEGAL
;
627 case U_NON_SPACING_MARK
:
628 case U_COMBINING_SPACING_MARK
:
629 // Non_Spacing_Mark can't be a leading character,
630 // nor can a spacing combining mark.
632 return ParserFlags::ILLEGAL
;
633 [[fallthrough
]]; // treat it as Other_Letter.
634 case U_OTHER_LETTER
:
635 return (nTypes
& KParseTokens::UNI_OTHER_LETTER
) ?
636 (bStart
? ParserFlags::CHAR_WORD
: ParserFlags::WORD
) :
637 ParserFlags::ILLEGAL
;
638 case U_DECIMAL_DIGIT_NUMBER
:
639 return ((nTypes
& KParseTokens::UNI_DIGIT
) ?
640 (bStart
? ParserFlags::CHAR_WORD
: ParserFlags::WORD
) :
641 ParserFlags::ILLEGAL
) | TOKEN_DIGIT_FLAGS
;
642 case U_LETTER_NUMBER
:
643 return ((nTypes
& KParseTokens::UNI_LETTER_NUMBER
) ?
644 (bStart
? ParserFlags::CHAR_WORD
: ParserFlags::WORD
) :
645 ParserFlags::ILLEGAL
) | TOKEN_DIGIT_FLAGS
;
646 case U_OTHER_NUMBER
:
647 return ((nTypes
& KParseTokens::UNI_OTHER_NUMBER
) ?
648 (bStart
? ParserFlags::CHAR_WORD
: ParserFlags::WORD
) :
649 ParserFlags::ILLEGAL
) | TOKEN_DIGIT_FLAGS
;
650 case U_SPACE_SEPARATOR
:
651 return ((nTypes
& KParseTokens::IGNORE_LEADING_WS
) ?
652 ParserFlags::CHAR_DONTCARE
: (bStart
? ParserFlags::CHAR_WORD
: (ParserFlags::CHAR_DONTCARE
| ParserFlags::WORD_SEP
| ParserFlags::VALUE_SEP
) ));
653 case U_OTHER_PUNCTUATION
:
654 // fdo#61754 Let's see (if we not at the start) if this is midletter
655 // punctuation and allow it in a word if it is similarly to
656 // U_NON_SPACING_MARK, for example U+00B7 MIDDLE DOT.
657 // tdf#123575 for U+30FB KATAKANA MIDDLE DOT property is not
658 // U_WB_MIDLETTER but U_WB_KATAKANA instead, explicitly test that
659 // and U+FF65 HALFWIDTH KATAKANA MIDDLE DOT.
660 if (bStart
|| (U_WB_MIDLETTER
!= u_getIntPropertyValue(c
, UCHAR_WORD_BREAK
)
661 && c
!= 0x30FB && c
!= 0xFF65))
662 return ParserFlags::ILLEGAL
;
665 //allowing it to continue the word
666 return (nTypes
& KParseTokens::UNI_OTHER_LETTER
) ?
667 ParserFlags::WORD
: ParserFlags::ILLEGAL
;
672 return ParserFlags::ILLEGAL
;
676 ParserFlags
cclass_Unicode::getStartCharsFlags( sal_uInt32 c
)
680 const sal_Unicode
* pStr
= aStartChars
.getStr();
681 const sal_Unicode
* p
= StrChr( pStr
, c
);
683 return pStart
[ p
- pStr
];
685 return ParserFlags::ILLEGAL
;
689 ParserFlags
cclass_Unicode::getContCharsFlags( sal_Unicode c
)
693 const sal_Unicode
* pStr
= aContChars
.getStr();
694 const sal_Unicode
* p
= StrChr( pStr
, c
);
696 return pCont
[ p
- pStr
];
698 return ParserFlags::ILLEGAL
;
702 void cclass_Unicode::parseText( ParseResult
& r
, const OUString
& rText
, sal_Int32 nPos
, sal_Int32 nTokenType
)
704 assert(r
.LeadingWhiteSpace
== 0);
705 ScanState eState
= ssGetChar
;
707 //! All the variables below (plus ParseResult) have to be reset on ssRewindFromValue!
708 OUStringBuffer aSymbol
;
710 sal_Int32
index(nPos
); // index of next code point after current
711 sal_Int32
postSymbolIndex(index
); // index of code point following last quote
712 sal_uInt32
current((index
< rText
.getLength()) ? rText
.iterateCodePoints(&index
) : 0);
713 sal_uInt32 cLast
= 0;
714 sal_Int32
nCodePoints(0);
717 bool bMightBeWord
= true;
718 bool bMightBeWordLast
= true;
719 bool bDecSepAltUsed
= false;
720 //! All the variables above (plus ParseResult) have to be reset on ssRewindFromValue!
721 sal_Int32
nextCharIndex(nPos
); // == index of nextChar
723 while ((current
!= 0) && (eState
!= ssStop
))
726 ParserFlags nMask
= getFlags(current
, eState
);
727 if ( nMask
& ParserFlags::EXCLUDED
)
730 { // only relevant for ssGetValue fall back
731 if ( eState
== ssGetChar
|| eState
== ssRewindFromValue
||
732 eState
== ssIgnoreLeadingInRewind
)
733 bMightBeWord
= bool(nMask
& ParserFlags::CHAR_WORD
);
735 bMightBeWord
= bool(nMask
& ParserFlags::WORD
);
737 sal_Int32 nParseTokensType
= getParseTokensType(current
, isFirst
);
739 sal_Int32
const nextIndex(nextCharIndex
); // == index of char following current
740 nextCharIndex
= index
; // == index of nextChar
741 sal_uInt32
nextChar((index
< rText
.getLength()) ? rText
.iterateCodePoints(&index
) : 0);
745 case ssRewindFromValue
:
746 case ssIgnoreLeadingInRewind
:
748 if ( (nMask
& ParserFlags::CHAR_VALUE
) && eState
!= ssRewindFromValue
749 && eState
!= ssIgnoreLeadingInRewind
)
750 { //! must be first, may fall back to ssGetWord via bMightBeWord
752 if ( nMask
& ParserFlags::VALUE_DIGIT
)
755 r
.TokenType
= KParseType::UNI_NUMBER
;
757 r
.TokenType
= KParseType::ASC_NUMBER
;
759 else if (current
== cDecimalSep
|| (bDecSepAltUsed
= (cDecimalSepAlt
&& current
== cDecimalSepAlt
)))
764 eState
= ssRewindFromValue
;
765 // retry for ONE_SINGLE_CHAR or others
768 else if ( nMask
& ParserFlags::CHAR_WORD
)
771 r
.TokenType
= KParseType::IDENTNAME
;
773 else if ( nMask
& ParserFlags::NAME_SEP
)
775 eState
= ssGetWordFirstChar
;
777 postSymbolIndex
= nextCharIndex
;
778 nParseTokensType
= 0; // will be taken of first real character
779 r
.TokenType
= KParseType::SINGLE_QUOTE_NAME
;
781 else if ( nMask
& ParserFlags::CHAR_STRING
)
783 eState
= ssGetString
;
784 postSymbolIndex
= nextCharIndex
;
785 nParseTokensType
= 0; // will be taken of first real character
786 r
.TokenType
= KParseType::DOUBLE_QUOTE_STRING
;
788 else if ( nMask
& ParserFlags::CHAR_DONTCARE
)
790 if ( nStartTypes
& KParseTokens::IGNORE_LEADING_WS
)
792 if (eState
== ssRewindFromValue
)
793 eState
= ssIgnoreLeadingInRewind
;
794 r
.LeadingWhiteSpace
= nextCharIndex
- nPos
;
795 nCodePoints
--; // exclude leading whitespace
796 postSymbolIndex
= nextCharIndex
;
797 nParseTokensType
= 0; // wait until real character
803 else if ( nMask
& ParserFlags::CHAR_BOOL
)
806 r
.TokenType
= KParseType::BOOLEAN
;
808 else if ( nMask
& ParserFlags::CHAR
)
811 r
.TokenType
= KParseType::ONE_SINGLE_CHAR
;
814 eState
= ssBounce
; // not known
819 if ( nMask
& ParserFlags::VALUE_DIGIT
)
822 r
.TokenType
= KParseType::UNI_NUMBER
;
823 else if ( r
.TokenType
!= KParseType::UNI_NUMBER
)
824 r
.TokenType
= KParseType::ASC_NUMBER
;
826 if ( nMask
& ParserFlags::VALUE
)
828 if (current
== cGroupSep
)
830 // accept only if it is followed by 3 digits
831 sal_Int32
tempIndex(index
);
832 sal_uInt32
const nextChar2((tempIndex
< rText
.getLength()) ? rText
.iterateCodePoints(&tempIndex
) : 0);
833 sal_uInt32
const nextChar3((tempIndex
< rText
.getLength()) ? rText
.iterateCodePoints(&tempIndex
) : 0);
834 if (getFlags(nextChar
, eState
) & ParserFlags::VALUE_DIGIT
835 && getFlags(nextChar2
, eState
) & ParserFlags::VALUE_DIGIT
836 && getFlags(nextChar3
, eState
) & ParserFlags::VALUE_DIGIT
)
838 nParseTokensType
|= KParseTokens::GROUP_SEPARATOR_IN_NUMBER
;
842 // Trailing group separator character is not a
847 else if ((current
== cDecimalSep
||
848 (bDecSepAltUsed
= (cDecimalSepAlt
&& current
== cDecimalSepAlt
))) &&
851 if (nCodePoints
== 2)
852 eState
= ssRewindFromValue
;
853 // consecutive separators
857 // else keep it going
859 else if (current
== 'E' || current
== 'e')
861 ParserFlags nNext
= getFlags(nextChar
, eState
);
862 if ( nNext
& ParserFlags::VALUE_EXP
)
864 else if (bMightBeWord
&& ((nNext
& ParserFlags::WORD
) || !nextChar
))
865 { // might be a numerical name (1.2efg)
867 r
.TokenType
= KParseType::IDENTNAME
;
872 else if ( nMask
& ParserFlags::VALUE_SIGN
)
874 if ( (cLast
== 'E') || (cLast
== 'e') )
876 ParserFlags nNext
= getFlags(nextChar
, eState
);
877 if ( nNext
& ParserFlags::VALUE_EXP_VALUE
)
879 else if (bMightBeWord
&& ((nNext
& ParserFlags::WORD
) || !nextChar
))
880 { // might be a numerical name (1.2e+fg)
882 r
.TokenType
= KParseType::IDENTNAME
;
887 else if ( bMightBeWord
)
888 { // might be a numerical name (1.2+fg)
890 r
.TokenType
= KParseType::IDENTNAME
;
895 else if ( bMightBeWord
&& (nMask
& ParserFlags::WORD
) )
896 { // might be a numerical name (1995.A1)
898 r
.TokenType
= KParseType::IDENTNAME
;
904 case ssGetWordFirstChar
:
909 if ( nMask
& ParserFlags::WORD
)
911 else if ( nMask
& ParserFlags::NAME_SEP
)
918 OUString::Concat(rText
.subView(postSymbolIndex
, nextCharIndex
- postSymbolIndex
- 2))
919 + OUString(¤t
, 1));
924 aSymbol
.append(rText
.subView(postSymbolIndex
, nextCharIndex
- postSymbolIndex
- 1));
926 postSymbolIndex
= nextCharIndex
;
939 if ( nMask
& ParserFlags::STRING_SEP
)
944 rText
.subView(postSymbolIndex
, nextCharIndex
- postSymbolIndex
- 2)
945 + OUString(¤t
, 1));
947 else if (current
== nextChar
&&
948 !(nContTypes
& KParseTokens::TWO_DOUBLE_QUOTES_BREAK_STRING
) )
949 { // "" => literal " escaped
950 aSymbol
.append(rText
.subView(postSymbolIndex
, nextCharIndex
- postSymbolIndex
));
951 nextCharIndex
= index
;
952 if (index
< rText
.getLength()) { ++nCodePoints
; }
953 nextChar
= (index
< rText
.getLength()) ? rText
.iterateCodePoints(&index
) : 0;
958 aSymbol
.append(rText
.subView(postSymbolIndex
, nextCharIndex
- postSymbolIndex
- 1));
960 postSymbolIndex
= nextCharIndex
;
966 if ( nMask
& ParserFlags::BOOL
)
967 eState
= ssStop
; // maximum 2: <, >, <>, <=, >=
975 ; // nothing, no compiler warning
978 if ( eState
== ssRewindFromValue
)
982 postSymbolIndex
= nPos
;
983 nextCharIndex
= nPos
;
984 aSymbol
.setLength(0);
985 current
= (index
< rText
.getLength()) ? rText
.iterateCodePoints(&index
) : 0;
986 nCodePoints
= (nPos
< rText
.getLength()) ? 1 : 0;
992 bMightBeWordLast
= true;
993 bDecSepAltUsed
= false;
997 if ( !(r
.TokenType
& nTokenType
) )
999 if ( (r
.TokenType
& (KParseType::ASC_NUMBER
| KParseType::UNI_NUMBER
))
1000 && (nTokenType
& KParseType::IDENTNAME
) && bMightBeWord
)
1001 ; // keep a number that might be a word
1002 else if (r
.LeadingWhiteSpace
== (nextCharIndex
- nPos
))
1003 ; // keep ignored white space
1004 else if ( !r
.TokenType
&& eState
== ssGetValue
&& (nMask
& ParserFlags::VALUE_SEP
) )
1005 ; // keep uncertain value
1009 if ( eState
== ssBounce
)
1012 eState
= ssStopBack
;
1014 if ( eState
== ssStopBack
)
1016 nextChar
= rText
.iterateCodePoints(&index
, -1);
1017 nextCharIndex
= nextIndex
;
1019 bMightBeWord
= bMightBeWordLast
;
1022 if ( eState
!= ssStop
)
1024 if ( !r
.StartFlags
)
1025 r
.StartFlags
|= nParseTokensType
;
1027 r
.ContFlags
|= nParseTokensType
;
1029 bMightBeWordLast
= bMightBeWord
;
1034 // r.CharLen is the length in characters (not code units) of the parsed
1035 // token not including any leading white space.
1036 r
.CharLen
= nCodePoints
;
1037 r
.EndPos
= nextCharIndex
;
1038 if ( r
.TokenType
& KParseType::ASC_NUMBER
)
1040 r
.Value
= rtl_math_uStringToDouble(rText
.getStr() + nPos
+ r
.LeadingWhiteSpace
,
1041 rText
.getStr() + r
.EndPos
, (bDecSepAltUsed
? cDecimalSepAlt
: cDecimalSep
), cGroupSep
, nullptr, nullptr);
1043 r
.TokenType
|= KParseType::IDENTNAME
;
1045 else if ( r
.TokenType
& KParseType::UNI_NUMBER
)
1047 if ( !xNatNumSup
.is() )
1049 if ( m_xContext
.is() )
1051 xNatNumSup
= NativeNumberSupplier::create( m_xContext
);
1054 OUString
aTmp(rText
.getStr() + nPos
+ r
.LeadingWhiteSpace
,
1055 r
.EndPos
- nPos
- r
.LeadingWhiteSpace
);
1056 // transliterate to ASCII
1057 aTmp
= xNatNumSup
->getNativeNumberString( aTmp
, aParserLocale
,
1058 NativeNumberMode::NATNUM0
);
1059 r
.Value
= ::rtl::math::stringToDouble( aTmp
, cDecimalSep
, cGroupSep
);
1061 r
.TokenType
|= KParseType::IDENTNAME
;
1063 else if ( r
.TokenType
& (KParseType::SINGLE_QUOTE_NAME
| KParseType::DOUBLE_QUOTE_STRING
) )
1065 if (postSymbolIndex
< nextCharIndex
)
1067 aSymbol
.append(rText
.subView(postSymbolIndex
, nextCharIndex
- postSymbolIndex
- 1));
1068 r
.TokenType
|= KParseType::MISSING_QUOTE
;
1070 r
.DequotedNameOrString
= aSymbol
.makeStringAndClear();
1076 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */