Update ooo320-m1
[ooovba.git] / i18npool / source / characterclassification / cclass_unicode_parser.cxx
blobf6a288786a2c77d229b89707ee234527004c7e59
1 /*************************************************************************
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * Copyright 2008 by Sun Microsystems, Inc.
7 * OpenOffice.org - a multi-platform office productivity suite
9 * $RCSfile: cclass_unicode_parser.cxx,v $
10 * $Revision: 1.15 $
12 * This file is part of OpenOffice.org.
14 * OpenOffice.org is free software: you can redistribute it and/or modify
15 * it under the terms of the GNU Lesser General Public License version 3
16 * only, as published by the Free Software Foundation.
18 * OpenOffice.org is distributed in the hope that it will be useful,
19 * but WITHOUT ANY WARRANTY; without even the implied warranty of
20 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
21 * GNU Lesser General Public License version 3 for more details
22 * (a copy is included in the LICENSE file that accompanied this code).
24 * You should have received a copy of the GNU Lesser General Public License
25 * version 3 along with OpenOffice.org. If not, see
26 * <http://www.openoffice.org/license.html>
27 * for a copy of the LGPLv3 License.
29 ************************************************************************/
31 // MARKER(update_precomp.py): autogen include statement, do not remove
32 #include "precompiled_i18npool.hxx"
34 #include <cclass_unicode.hxx>
35 #include <unicode/uchar.h>
36 #include <rtl/math.hxx>
37 #include <rtl/ustring.hxx>
38 #include <com/sun/star/i18n/KParseTokens.hpp>
39 #include <com/sun/star/i18n/KParseType.hpp>
40 #include <com/sun/star/i18n/UnicodeType.hpp>
41 #include <com/sun/star/i18n/XLocaleData.hpp>
42 #include <com/sun/star/i18n/NativeNumberMode.hpp>
44 #include <string.h> // memcpy()
46 using namespace ::com::sun::star::uno;
47 using namespace ::com::sun::star::lang;
48 using namespace ::rtl;
50 namespace com { namespace sun { namespace star { namespace i18n {
52 const UPT_FLAG_TYPE cclass_Unicode::TOKEN_ILLEGAL = 0x00000000;
53 const UPT_FLAG_TYPE cclass_Unicode::TOKEN_CHAR = 0x00000001;
54 const UPT_FLAG_TYPE cclass_Unicode::TOKEN_CHAR_BOOL = 0x00000002;
55 const UPT_FLAG_TYPE cclass_Unicode::TOKEN_CHAR_WORD = 0x00000004;
56 const UPT_FLAG_TYPE cclass_Unicode::TOKEN_CHAR_VALUE = 0x00000008;
57 const UPT_FLAG_TYPE cclass_Unicode::TOKEN_CHAR_STRING = 0x00000010;
58 const UPT_FLAG_TYPE cclass_Unicode::TOKEN_CHAR_DONTCARE= 0x00000020;
59 const UPT_FLAG_TYPE cclass_Unicode::TOKEN_BOOL = 0x00000040;
60 const UPT_FLAG_TYPE cclass_Unicode::TOKEN_WORD = 0x00000080;
61 const UPT_FLAG_TYPE cclass_Unicode::TOKEN_WORD_SEP = 0x00000100;
62 const UPT_FLAG_TYPE cclass_Unicode::TOKEN_VALUE = 0x00000200;
63 const UPT_FLAG_TYPE cclass_Unicode::TOKEN_VALUE_SEP = 0x00000400;
64 const UPT_FLAG_TYPE cclass_Unicode::TOKEN_VALUE_EXP = 0x00000800;
65 const UPT_FLAG_TYPE cclass_Unicode::TOKEN_VALUE_SIGN = 0x00001000;
66 const UPT_FLAG_TYPE cclass_Unicode::TOKEN_VALUE_EXP_VALUE = 0x00002000;
67 const UPT_FLAG_TYPE cclass_Unicode::TOKEN_VALUE_DIGIT = 0x00004000;
68 const UPT_FLAG_TYPE cclass_Unicode::TOKEN_NAME_SEP = 0x20000000;
69 const UPT_FLAG_TYPE cclass_Unicode::TOKEN_STRING_SEP = 0x40000000;
70 const UPT_FLAG_TYPE cclass_Unicode::TOKEN_EXCLUDED = 0x80000000;
72 #define TOKEN_DIGIT_FLAGS (TOKEN_CHAR_VALUE | TOKEN_VALUE | TOKEN_VALUE_EXP | TOKEN_VALUE_EXP_VALUE | TOKEN_VALUE_DIGIT)
74 // Default identifier/name specification is [A-Za-z_][A-Za-z0-9_]*
76 const sal_uInt8 cclass_Unicode::nDefCnt = 128;
77 const UPT_FLAG_TYPE cclass_Unicode::pDefaultParserTable[ nDefCnt ] =
79 // (...) == Calc formula compiler specific, commented out and modified
81 /* \0 */ TOKEN_EXCLUDED,
82 TOKEN_ILLEGAL,
83 TOKEN_ILLEGAL,
84 TOKEN_ILLEGAL,
85 TOKEN_ILLEGAL,
86 TOKEN_ILLEGAL,
87 TOKEN_ILLEGAL,
88 TOKEN_ILLEGAL,
89 TOKEN_ILLEGAL,
90 /* 9 \t */ TOKEN_CHAR_DONTCARE | TOKEN_WORD_SEP | TOKEN_VALUE_SEP, // (TOKEN_ILLEGAL)
91 TOKEN_ILLEGAL,
92 /* 11 \v */ TOKEN_CHAR_DONTCARE | TOKEN_WORD_SEP | TOKEN_VALUE_SEP, // (TOKEN_ILLEGAL)
93 TOKEN_ILLEGAL,
94 TOKEN_ILLEGAL,
95 TOKEN_ILLEGAL,
96 TOKEN_ILLEGAL,
97 TOKEN_ILLEGAL,
98 TOKEN_ILLEGAL,
99 TOKEN_ILLEGAL,
100 TOKEN_ILLEGAL,
101 TOKEN_ILLEGAL,
102 TOKEN_ILLEGAL,
103 TOKEN_ILLEGAL,
104 TOKEN_ILLEGAL,
105 TOKEN_ILLEGAL,
106 TOKEN_ILLEGAL,
107 TOKEN_ILLEGAL,
108 TOKEN_ILLEGAL,
109 TOKEN_ILLEGAL,
110 TOKEN_ILLEGAL,
111 TOKEN_ILLEGAL,
112 TOKEN_ILLEGAL,
113 /* 32 */ TOKEN_CHAR_DONTCARE | TOKEN_WORD_SEP | TOKEN_VALUE_SEP,
114 /* 33 ! */ TOKEN_CHAR | TOKEN_WORD_SEP | TOKEN_VALUE_SEP,
115 /* 34 " */ TOKEN_CHAR_STRING | TOKEN_STRING_SEP,
116 /* 35 # */ TOKEN_CHAR | TOKEN_WORD_SEP | TOKEN_VALUE_SEP, // (TOKEN_WORD_SEP)
117 /* 36 $ */ TOKEN_CHAR | TOKEN_WORD_SEP | TOKEN_VALUE_SEP, // (TOKEN_CHAR_WORD | TOKEN_WORD)
118 /* 37 % */ TOKEN_CHAR | TOKEN_WORD_SEP | TOKEN_VALUE_SEP, // (TOKEN_VALUE)
119 /* 38 & */ TOKEN_CHAR | TOKEN_WORD_SEP | TOKEN_VALUE_SEP,
120 /* 39 ' */ TOKEN_NAME_SEP,
121 /* 40 ( */ TOKEN_CHAR | TOKEN_WORD_SEP | TOKEN_VALUE_SEP,
122 /* 41 ) */ TOKEN_CHAR | TOKEN_WORD_SEP | TOKEN_VALUE_SEP,
123 /* 42 * */ TOKEN_CHAR | TOKEN_WORD_SEP | TOKEN_VALUE_SEP,
124 /* 43 + */ TOKEN_CHAR | TOKEN_WORD_SEP | TOKEN_VALUE_SEP | TOKEN_VALUE_EXP | TOKEN_VALUE_SIGN,
125 /* 44 , */ TOKEN_CHAR | TOKEN_WORD_SEP | TOKEN_VALUE_SEP, // (TOKEN_CHAR_VALUE | TOKEN_VALUE)
126 /* 45 - */ TOKEN_CHAR | TOKEN_WORD_SEP | TOKEN_VALUE_SEP | TOKEN_VALUE_EXP | TOKEN_VALUE_SIGN,
127 /* 46 . */ TOKEN_CHAR | TOKEN_WORD_SEP | TOKEN_VALUE_SEP, // (TOKEN_WORD | TOKEN_CHAR_VALUE | TOKEN_VALUE)
128 /* 47 / */ TOKEN_CHAR | TOKEN_WORD_SEP | TOKEN_VALUE_SEP,
129 //for ( i = 48; i < 58; i++ )
130 /* 48 0 */ TOKEN_DIGIT_FLAGS | TOKEN_WORD,
131 /* 49 1 */ TOKEN_DIGIT_FLAGS | TOKEN_WORD,
132 /* 50 2 */ TOKEN_DIGIT_FLAGS | TOKEN_WORD,
133 /* 51 3 */ TOKEN_DIGIT_FLAGS | TOKEN_WORD,
134 /* 52 4 */ TOKEN_DIGIT_FLAGS | TOKEN_WORD,
135 /* 53 5 */ TOKEN_DIGIT_FLAGS | TOKEN_WORD,
136 /* 54 6 */ TOKEN_DIGIT_FLAGS | TOKEN_WORD,
137 /* 55 7 */ TOKEN_DIGIT_FLAGS | TOKEN_WORD,
138 /* 56 8 */ TOKEN_DIGIT_FLAGS | TOKEN_WORD,
139 /* 57 9 */ TOKEN_DIGIT_FLAGS | TOKEN_WORD,
140 /* 58 : */ TOKEN_CHAR | TOKEN_WORD_SEP | TOKEN_VALUE_SEP, // (TOKEN_WORD)
141 /* 59 ; */ TOKEN_CHAR | TOKEN_WORD_SEP | TOKEN_VALUE_SEP,
142 /* 60 < */ TOKEN_CHAR_BOOL | TOKEN_WORD_SEP | TOKEN_VALUE_SEP,
143 /* 61 = */ TOKEN_CHAR | TOKEN_BOOL | TOKEN_WORD_SEP | TOKEN_VALUE_SEP,
144 /* 62 > */ TOKEN_CHAR_BOOL | TOKEN_BOOL | TOKEN_WORD_SEP | TOKEN_VALUE_SEP,
145 /* 63 ? */ TOKEN_CHAR | TOKEN_WORD_SEP | TOKEN_VALUE_SEP, // (TOKEN_CHAR_WORD | TOKEN_WORD)
146 /* 64 @ */ TOKEN_CHAR | TOKEN_WORD_SEP | TOKEN_VALUE_SEP, // (TOKEN_ILLEGAL // UNUSED)
147 //for ( i = 65; i < 91; i++ )
148 /* 65 A */ TOKEN_CHAR_WORD | TOKEN_WORD,
149 /* 66 B */ TOKEN_CHAR_WORD | TOKEN_WORD,
150 /* 67 C */ TOKEN_CHAR_WORD | TOKEN_WORD,
151 /* 68 D */ TOKEN_CHAR_WORD | TOKEN_WORD,
152 /* 69 E */ TOKEN_CHAR_WORD | TOKEN_WORD,
153 /* 70 F */ TOKEN_CHAR_WORD | TOKEN_WORD,
154 /* 71 G */ TOKEN_CHAR_WORD | TOKEN_WORD,
155 /* 72 H */ TOKEN_CHAR_WORD | TOKEN_WORD,
156 /* 73 I */ TOKEN_CHAR_WORD | TOKEN_WORD,
157 /* 74 J */ TOKEN_CHAR_WORD | TOKEN_WORD,
158 /* 75 K */ TOKEN_CHAR_WORD | TOKEN_WORD,
159 /* 76 L */ TOKEN_CHAR_WORD | TOKEN_WORD,
160 /* 77 M */ TOKEN_CHAR_WORD | TOKEN_WORD,
161 /* 78 N */ TOKEN_CHAR_WORD | TOKEN_WORD,
162 /* 79 O */ TOKEN_CHAR_WORD | TOKEN_WORD,
163 /* 80 P */ TOKEN_CHAR_WORD | TOKEN_WORD,
164 /* 81 Q */ TOKEN_CHAR_WORD | TOKEN_WORD,
165 /* 82 R */ TOKEN_CHAR_WORD | TOKEN_WORD,
166 /* 83 S */ TOKEN_CHAR_WORD | TOKEN_WORD,
167 /* 84 T */ TOKEN_CHAR_WORD | TOKEN_WORD,
168 /* 85 U */ TOKEN_CHAR_WORD | TOKEN_WORD,
169 /* 86 V */ TOKEN_CHAR_WORD | TOKEN_WORD,
170 /* 87 W */ TOKEN_CHAR_WORD | TOKEN_WORD,
171 /* 88 X */ TOKEN_CHAR_WORD | TOKEN_WORD,
172 /* 89 Y */ TOKEN_CHAR_WORD | TOKEN_WORD,
173 /* 90 Z */ TOKEN_CHAR_WORD | TOKEN_WORD,
174 /* 91 [ */ TOKEN_CHAR | TOKEN_WORD_SEP | TOKEN_VALUE_SEP, // (TOKEN_ILLEGAL // UNUSED)
175 /* 92 \ */ TOKEN_CHAR | TOKEN_WORD_SEP | TOKEN_VALUE_SEP, // (TOKEN_ILLEGAL // UNUSED)
176 /* 93 ] */ TOKEN_CHAR | TOKEN_WORD_SEP | TOKEN_VALUE_SEP, // (TOKEN_ILLEGAL // UNUSED)
177 /* 94 ^ */ TOKEN_CHAR | TOKEN_WORD_SEP | TOKEN_VALUE_SEP,
178 /* 95 _ */ TOKEN_CHAR_WORD | TOKEN_WORD,
179 /* 96 ` */ TOKEN_CHAR | TOKEN_WORD_SEP | TOKEN_VALUE_SEP, // (TOKEN_ILLEGAL // UNUSED)
180 //for ( i = 97; i < 123; i++ )
181 /* 97 a */ TOKEN_CHAR_WORD | TOKEN_WORD,
182 /* 98 b */ TOKEN_CHAR_WORD | TOKEN_WORD,
183 /* 99 c */ TOKEN_CHAR_WORD | TOKEN_WORD,
184 /* 100 d */ TOKEN_CHAR_WORD | TOKEN_WORD,
185 /* 101 e */ TOKEN_CHAR_WORD | TOKEN_WORD,
186 /* 102 f */ TOKEN_CHAR_WORD | TOKEN_WORD,
187 /* 103 g */ TOKEN_CHAR_WORD | TOKEN_WORD,
188 /* 104 h */ TOKEN_CHAR_WORD | TOKEN_WORD,
189 /* 105 i */ TOKEN_CHAR_WORD | TOKEN_WORD,
190 /* 106 j */ TOKEN_CHAR_WORD | TOKEN_WORD,
191 /* 107 k */ TOKEN_CHAR_WORD | TOKEN_WORD,
192 /* 108 l */ TOKEN_CHAR_WORD | TOKEN_WORD,
193 /* 109 m */ TOKEN_CHAR_WORD | TOKEN_WORD,
194 /* 110 n */ TOKEN_CHAR_WORD | TOKEN_WORD,
195 /* 111 o */ TOKEN_CHAR_WORD | TOKEN_WORD,
196 /* 112 p */ TOKEN_CHAR_WORD | TOKEN_WORD,
197 /* 113 q */ TOKEN_CHAR_WORD | TOKEN_WORD,
198 /* 114 r */ TOKEN_CHAR_WORD | TOKEN_WORD,
199 /* 115 s */ TOKEN_CHAR_WORD | TOKEN_WORD,
200 /* 116 t */ TOKEN_CHAR_WORD | TOKEN_WORD,
201 /* 117 u */ TOKEN_CHAR_WORD | TOKEN_WORD,
202 /* 118 v */ TOKEN_CHAR_WORD | TOKEN_WORD,
203 /* 119 w */ TOKEN_CHAR_WORD | TOKEN_WORD,
204 /* 120 x */ TOKEN_CHAR_WORD | TOKEN_WORD,
205 /* 121 y */ TOKEN_CHAR_WORD | TOKEN_WORD,
206 /* 122 z */ TOKEN_CHAR_WORD | TOKEN_WORD,
207 /* 123 { */ TOKEN_CHAR | TOKEN_WORD_SEP | TOKEN_VALUE_SEP, // (TOKEN_ILLEGAL // UNUSED)
208 /* 124 | */ TOKEN_CHAR | TOKEN_WORD_SEP | TOKEN_VALUE_SEP, // (TOKEN_ILLEGAL // UNUSED)
209 /* 125 } */ TOKEN_CHAR | TOKEN_WORD_SEP | TOKEN_VALUE_SEP, // (TOKEN_ILLEGAL // UNUSED)
210 /* 126 ~ */ TOKEN_CHAR | TOKEN_WORD_SEP | TOKEN_VALUE_SEP, // (TOKEN_ILLEGAL // UNUSED)
211 /* 127 */ TOKEN_CHAR | TOKEN_WORD_SEP | TOKEN_VALUE_SEP // (TOKEN_ILLEGAL // UNUSED)
215 const sal_Int32 cclass_Unicode::pParseTokensType[ nDefCnt ] =
217 /* \0 */ KParseTokens::ASC_OTHER,
218 KParseTokens::ASC_CONTROL,
219 KParseTokens::ASC_CONTROL,
220 KParseTokens::ASC_CONTROL,
221 KParseTokens::ASC_CONTROL,
222 KParseTokens::ASC_CONTROL,
223 KParseTokens::ASC_CONTROL,
224 KParseTokens::ASC_CONTROL,
225 KParseTokens::ASC_CONTROL,
226 /* 9 \t */ KParseTokens::ASC_CONTROL,
227 KParseTokens::ASC_CONTROL,
228 /* 11 \v */ KParseTokens::ASC_CONTROL,
229 KParseTokens::ASC_CONTROL,
230 KParseTokens::ASC_CONTROL,
231 KParseTokens::ASC_CONTROL,
232 KParseTokens::ASC_CONTROL,
233 KParseTokens::ASC_CONTROL,
234 KParseTokens::ASC_CONTROL,
235 KParseTokens::ASC_CONTROL,
236 KParseTokens::ASC_CONTROL,
237 KParseTokens::ASC_CONTROL,
238 KParseTokens::ASC_CONTROL,
239 KParseTokens::ASC_CONTROL,
240 KParseTokens::ASC_CONTROL,
241 KParseTokens::ASC_CONTROL,
242 KParseTokens::ASC_CONTROL,
243 KParseTokens::ASC_CONTROL,
244 KParseTokens::ASC_CONTROL,
245 KParseTokens::ASC_CONTROL,
246 KParseTokens::ASC_CONTROL,
247 KParseTokens::ASC_CONTROL,
248 KParseTokens::ASC_CONTROL,
249 /* 32 */ KParseTokens::ASC_OTHER,
250 /* 33 ! */ KParseTokens::ASC_OTHER,
251 /* 34 " */ KParseTokens::ASC_OTHER,
252 /* 35 # */ KParseTokens::ASC_OTHER,
253 /* 36 $ */ KParseTokens::ASC_DOLLAR,
254 /* 37 % */ KParseTokens::ASC_OTHER,
255 /* 38 & */ KParseTokens::ASC_OTHER,
256 /* 39 ' */ KParseTokens::ASC_OTHER,
257 /* 40 ( */ KParseTokens::ASC_OTHER,
258 /* 41 ) */ KParseTokens::ASC_OTHER,
259 /* 42 * */ KParseTokens::ASC_OTHER,
260 /* 43 + */ KParseTokens::ASC_OTHER,
261 /* 44 , */ KParseTokens::ASC_OTHER,
262 /* 45 - */ KParseTokens::ASC_OTHER,
263 /* 46 . */ KParseTokens::ASC_DOT,
264 /* 47 / */ KParseTokens::ASC_OTHER,
265 //for ( i = 48; i < 58; i++ )
266 /* 48 0 */ KParseTokens::ASC_DIGIT,
267 /* 49 1 */ KParseTokens::ASC_DIGIT,
268 /* 50 2 */ KParseTokens::ASC_DIGIT,
269 /* 51 3 */ KParseTokens::ASC_DIGIT,
270 /* 52 4 */ KParseTokens::ASC_DIGIT,
271 /* 53 5 */ KParseTokens::ASC_DIGIT,
272 /* 54 6 */ KParseTokens::ASC_DIGIT,
273 /* 55 7 */ KParseTokens::ASC_DIGIT,
274 /* 56 8 */ KParseTokens::ASC_DIGIT,
275 /* 57 9 */ KParseTokens::ASC_DIGIT,
276 /* 58 : */ KParseTokens::ASC_COLON,
277 /* 59 ; */ KParseTokens::ASC_OTHER,
278 /* 60 < */ KParseTokens::ASC_OTHER,
279 /* 61 = */ KParseTokens::ASC_OTHER,
280 /* 62 > */ KParseTokens::ASC_OTHER,
281 /* 63 ? */ KParseTokens::ASC_OTHER,
282 /* 64 @ */ KParseTokens::ASC_OTHER,
283 //for ( i = 65; i < 91; i++ )
284 /* 65 A */ KParseTokens::ASC_UPALPHA,
285 /* 66 B */ KParseTokens::ASC_UPALPHA,
286 /* 67 C */ KParseTokens::ASC_UPALPHA,
287 /* 68 D */ KParseTokens::ASC_UPALPHA,
288 /* 69 E */ KParseTokens::ASC_UPALPHA,
289 /* 70 F */ KParseTokens::ASC_UPALPHA,
290 /* 71 G */ KParseTokens::ASC_UPALPHA,
291 /* 72 H */ KParseTokens::ASC_UPALPHA,
292 /* 73 I */ KParseTokens::ASC_UPALPHA,
293 /* 74 J */ KParseTokens::ASC_UPALPHA,
294 /* 75 K */ KParseTokens::ASC_UPALPHA,
295 /* 76 L */ KParseTokens::ASC_UPALPHA,
296 /* 77 M */ KParseTokens::ASC_UPALPHA,
297 /* 78 N */ KParseTokens::ASC_UPALPHA,
298 /* 79 O */ KParseTokens::ASC_UPALPHA,
299 /* 80 P */ KParseTokens::ASC_UPALPHA,
300 /* 81 Q */ KParseTokens::ASC_UPALPHA,
301 /* 82 R */ KParseTokens::ASC_UPALPHA,
302 /* 83 S */ KParseTokens::ASC_UPALPHA,
303 /* 84 T */ KParseTokens::ASC_UPALPHA,
304 /* 85 U */ KParseTokens::ASC_UPALPHA,
305 /* 86 V */ KParseTokens::ASC_UPALPHA,
306 /* 87 W */ KParseTokens::ASC_UPALPHA,
307 /* 88 X */ KParseTokens::ASC_UPALPHA,
308 /* 89 Y */ KParseTokens::ASC_UPALPHA,
309 /* 90 Z */ KParseTokens::ASC_UPALPHA,
310 /* 91 [ */ KParseTokens::ASC_OTHER,
311 /* 92 \ */ KParseTokens::ASC_OTHER,
312 /* 93 ] */ KParseTokens::ASC_OTHER,
313 /* 94 ^ */ KParseTokens::ASC_OTHER,
314 /* 95 _ */ KParseTokens::ASC_UNDERSCORE,
315 /* 96 ` */ KParseTokens::ASC_OTHER,
316 //for ( i = 97; i < 123; i++ )
317 /* 97 a */ KParseTokens::ASC_LOALPHA,
318 /* 98 b */ KParseTokens::ASC_LOALPHA,
319 /* 99 c */ KParseTokens::ASC_LOALPHA,
320 /* 100 d */ KParseTokens::ASC_LOALPHA,
321 /* 101 e */ KParseTokens::ASC_LOALPHA,
322 /* 102 f */ KParseTokens::ASC_LOALPHA,
323 /* 103 g */ KParseTokens::ASC_LOALPHA,
324 /* 104 h */ KParseTokens::ASC_LOALPHA,
325 /* 105 i */ KParseTokens::ASC_LOALPHA,
326 /* 106 j */ KParseTokens::ASC_LOALPHA,
327 /* 107 k */ KParseTokens::ASC_LOALPHA,
328 /* 108 l */ KParseTokens::ASC_LOALPHA,
329 /* 109 m */ KParseTokens::ASC_LOALPHA,
330 /* 110 n */ KParseTokens::ASC_LOALPHA,
331 /* 111 o */ KParseTokens::ASC_LOALPHA,
332 /* 112 p */ KParseTokens::ASC_LOALPHA,
333 /* 113 q */ KParseTokens::ASC_LOALPHA,
334 /* 114 r */ KParseTokens::ASC_LOALPHA,
335 /* 115 s */ KParseTokens::ASC_LOALPHA,
336 /* 116 t */ KParseTokens::ASC_LOALPHA,
337 /* 117 u */ KParseTokens::ASC_LOALPHA,
338 /* 118 v */ KParseTokens::ASC_LOALPHA,
339 /* 119 w */ KParseTokens::ASC_LOALPHA,
340 /* 120 x */ KParseTokens::ASC_LOALPHA,
341 /* 121 y */ KParseTokens::ASC_LOALPHA,
342 /* 122 z */ KParseTokens::ASC_LOALPHA,
343 /* 123 { */ KParseTokens::ASC_OTHER,
344 /* 124 | */ KParseTokens::ASC_OTHER,
345 /* 125 } */ KParseTokens::ASC_OTHER,
346 /* 126 ~ */ KParseTokens::ASC_OTHER,
347 /* 127 */ KParseTokens::ASC_OTHER
351 // static
352 const sal_Unicode* cclass_Unicode::StrChr( const sal_Unicode* pStr, sal_Unicode c )
354 if ( !pStr )
355 return NULL;
356 while ( *pStr )
358 if ( *pStr == c )
359 return pStr;
360 pStr++;
362 return NULL;
366 sal_Int32 cclass_Unicode::getParseTokensType( const sal_Unicode* aStr, sal_Int32 nPos )
368 sal_Unicode c = aStr[nPos];
369 if ( c < nDefCnt )
370 return pParseTokensType[ sal_uInt8(c) ];
371 else
374 //! all KParseTokens::UNI_... must be matched
375 switch ( u_charType( (sal_uInt32) c ) )
377 case U_UPPERCASE_LETTER :
378 return KParseTokens::UNI_UPALPHA;
379 case U_LOWERCASE_LETTER :
380 return KParseTokens::UNI_LOALPHA;
381 case U_TITLECASE_LETTER :
382 return KParseTokens::UNI_TITLE_ALPHA;
383 case U_MODIFIER_LETTER :
384 return KParseTokens::UNI_MODIFIER_LETTER;
385 case U_OTHER_LETTER :
386 // Non_Spacing_Mark could not be as leading character
387 if (nPos == 0) break;
388 // fall through, treat it as Other_Letter.
389 case U_NON_SPACING_MARK :
390 return KParseTokens::UNI_OTHER_LETTER;
391 case U_DECIMAL_DIGIT_NUMBER :
392 return KParseTokens::UNI_DIGIT;
393 case U_LETTER_NUMBER :
394 return KParseTokens::UNI_LETTER_NUMBER;
395 case U_OTHER_NUMBER :
396 return KParseTokens::UNI_OTHER_NUMBER;
399 return KParseTokens::UNI_OTHER;
403 sal_Bool cclass_Unicode::setupInternational( const Locale& rLocale )
405 sal_Bool bChanged = (aParserLocale.Language != rLocale.Language
406 || aParserLocale.Country != rLocale.Country
407 || aParserLocale.Variant != rLocale.Variant);
408 if ( bChanged )
410 aParserLocale.Language = rLocale.Language;
411 aParserLocale.Country = rLocale.Country;
412 aParserLocale.Variant = rLocale.Variant;
414 if ( !xLocaleData.is() && xMSF.is() )
416 Reference <
417 XInterface > xI =
418 xMSF->createInstance( OUString(
419 RTL_CONSTASCII_USTRINGPARAM( "com.sun.star.i18n.LocaleData" ) ) );
420 if ( xI.is() )
422 Any x = xI->queryInterface( getCppuType((const Reference< XLocaleData>*)0) );
423 x >>= xLocaleData;
426 return bChanged;
430 void cclass_Unicode::setupParserTable( const Locale& rLocale, sal_Int32 startCharTokenType,
431 const OUString& userDefinedCharactersStart, sal_Int32 contCharTokenType,
432 const OUString& userDefinedCharactersCont )
434 bool bIntlEqual = (rLocale.Language == aParserLocale.Language &&
435 rLocale.Country == aParserLocale.Country &&
436 rLocale.Variant == aParserLocale.Variant);
437 if ( !pTable || !bIntlEqual ||
438 startCharTokenType != nStartTypes ||
439 contCharTokenType != nContTypes ||
440 userDefinedCharactersStart != aStartChars ||
441 userDefinedCharactersCont != aContChars )
442 initParserTable( rLocale, startCharTokenType, userDefinedCharactersStart,
443 contCharTokenType, userDefinedCharactersCont );
447 void cclass_Unicode::initParserTable( const Locale& rLocale, sal_Int32 startCharTokenType,
448 const OUString& userDefinedCharactersStart, sal_Int32 contCharTokenType,
449 const OUString& userDefinedCharactersCont )
451 // (Re)Init
452 setupInternational( rLocale );
453 // Memory of pTable is reused.
454 if ( !pTable )
455 pTable = new UPT_FLAG_TYPE[nDefCnt];
456 memcpy( pTable, pDefaultParserTable, sizeof(UPT_FLAG_TYPE) * nDefCnt );
457 // Start and cont tables only need reallocation if different length.
458 if ( pStart && userDefinedCharactersStart.getLength() != aStartChars.getLength() )
460 delete [] pStart;
461 pStart = NULL;
463 if ( pCont && userDefinedCharactersCont.getLength() != aContChars.getLength() )
465 delete [] pCont;
466 pCont = NULL;
468 nStartTypes = startCharTokenType;
469 nContTypes = contCharTokenType;
470 aStartChars = userDefinedCharactersStart;
471 aContChars = userDefinedCharactersCont;
473 // specials
474 if( xLocaleData.is() )
476 LocaleDataItem aItem =
477 xLocaleData->getLocaleItem( aParserLocale );
478 //!TODO: theoretically separators may be a string, adjustment would have to be
479 //! done here and in parsing and in ::rtl::math::stringToDouble()
480 cGroupSep = aItem.thousandSeparator.getStr()[0];
481 cDecimalSep = aItem.decimalSeparator.getStr()[0];
484 if ( cGroupSep < nDefCnt )
485 pTable[cGroupSep] |= TOKEN_VALUE;
486 if ( cDecimalSep < nDefCnt )
487 pTable[cDecimalSep] |= TOKEN_CHAR_VALUE | TOKEN_VALUE;
489 // Modify characters according to KParseTokens definitions.
491 using namespace KParseTokens;
492 sal_uInt8 i;
494 if ( !(nStartTypes & ASC_UPALPHA) )
495 for ( i = 65; i < 91; i++ )
496 pTable[i] &= ~TOKEN_CHAR_WORD; // not allowed as start character
497 if ( !(nContTypes & ASC_UPALPHA) )
498 for ( i = 65; i < 91; i++ )
499 pTable[i] &= ~TOKEN_WORD; // not allowed as cont character
501 if ( !(nStartTypes & ASC_LOALPHA) )
502 for ( i = 97; i < 123; i++ )
503 pTable[i] &= ~TOKEN_CHAR_WORD; // not allowed as start character
504 if ( !(nContTypes & ASC_LOALPHA) )
505 for ( i = 97; i < 123; i++ )
506 pTable[i] &= ~TOKEN_WORD; // not allowed as cont character
508 if ( nStartTypes & ASC_DIGIT )
509 for ( i = 48; i < 58; i++ )
510 pTable[i] |= TOKEN_CHAR_WORD; // allowed as start character
511 if ( !(nContTypes & ASC_DIGIT) )
512 for ( i = 48; i < 58; i++ )
513 pTable[i] &= ~TOKEN_WORD; // not allowed as cont character
515 if ( !(nStartTypes & ASC_UNDERSCORE) )
516 pTable[95] &= ~TOKEN_CHAR_WORD; // not allowed as start character
517 if ( !(nContTypes & ASC_UNDERSCORE) )
518 pTable[95] &= ~TOKEN_WORD; // not allowed as cont character
520 if ( nStartTypes & ASC_DOLLAR )
521 pTable[36] |= TOKEN_CHAR_WORD; // allowed as start character
522 if ( nContTypes & ASC_DOLLAR )
523 pTable[36] |= TOKEN_WORD; // allowed as cont character
525 if ( nStartTypes & ASC_DOT )
526 pTable[46] |= TOKEN_CHAR_WORD; // allowed as start character
527 if ( nContTypes & ASC_DOT )
528 pTable[46] |= TOKEN_WORD; // allowed as cont character
530 if ( nStartTypes & ASC_COLON )
531 pTable[58] |= TOKEN_CHAR_WORD; // allowed as start character
532 if ( nContTypes & ASC_COLON )
533 pTable[58] |= TOKEN_WORD; // allowed as cont character
535 if ( nStartTypes & ASC_CONTROL )
536 for ( i = 1; i < 32; i++ )
537 pTable[i] |= TOKEN_CHAR_WORD; // allowed as start character
538 if ( nContTypes & ASC_CONTROL )
539 for ( i = 1; i < 32; i++ )
540 pTable[i] |= TOKEN_WORD; // allowed as cont character
542 if ( nStartTypes & ASC_ANY_BUT_CONTROL )
543 for ( i = 32; i < nDefCnt; i++ )
544 pTable[i] |= TOKEN_CHAR_WORD; // allowed as start character
545 if ( nContTypes & ASC_ANY_BUT_CONTROL )
546 for ( i = 32; i < nDefCnt; i++ )
547 pTable[i] |= TOKEN_WORD; // allowed as cont character
551 // Merge in (positively override with) user defined characters.
552 // StartChars
553 sal_Int32 nLen = aStartChars.getLength();
554 if ( nLen )
556 if ( !pStart )
557 pStart = new UPT_FLAG_TYPE[ nLen ];
558 const sal_Unicode* p = aStartChars.getStr();
559 for ( sal_Int32 j=0; j<nLen; j++, p++ )
561 pStart[j] = TOKEN_CHAR_WORD;
562 if ( *p < nDefCnt )
563 pTable[*p] |= TOKEN_CHAR_WORD;
566 // ContChars
567 nLen = aContChars.getLength();
568 if ( nLen )
570 if ( !pCont )
571 pCont = new UPT_FLAG_TYPE[ nLen ];
572 const sal_Unicode* p = aContChars.getStr();
573 for ( sal_Int32 j=0; j<nLen; j++ )
575 pCont[j] = TOKEN_WORD;
576 if ( *p < nDefCnt )
577 pTable[*p] |= TOKEN_WORD;
583 void cclass_Unicode::destroyParserTable()
585 if ( pCont )
586 delete [] pCont;
587 if ( pStart )
588 delete [] pStart;
589 if ( pTable )
590 delete [] pTable;
594 UPT_FLAG_TYPE cclass_Unicode::getFlags( const sal_Unicode* aStr, sal_Int32 nPos )
596 UPT_FLAG_TYPE nMask;
597 sal_Unicode c = aStr[nPos];
598 if ( c < nDefCnt )
599 nMask = pTable[ sal_uInt8(c) ];
600 else
601 nMask = getFlagsExtended( aStr, nPos );
602 switch ( eState )
604 case ssGetChar :
605 case ssRewindFromValue :
606 case ssIgnoreLeadingInRewind :
607 case ssGetWordFirstChar :
608 if ( !(nMask & TOKEN_CHAR_WORD) )
610 nMask |= getStartCharsFlags( c );
611 if ( nMask & TOKEN_CHAR_WORD )
612 nMask &= ~TOKEN_EXCLUDED;
614 break;
615 case ssGetValue :
616 case ssGetWord :
617 if ( !(nMask & TOKEN_WORD) )
619 nMask |= getContCharsFlags( c );
620 if ( nMask & TOKEN_WORD )
621 nMask &= ~TOKEN_EXCLUDED;
623 break;
624 default:
625 ; // other cases aren't needed, no compiler warning
627 return nMask;
631 UPT_FLAG_TYPE cclass_Unicode::getFlagsExtended( const sal_Unicode* aStr, sal_Int32 nPos )
633 sal_Unicode c = aStr[nPos];
634 if ( c == cGroupSep )
635 return TOKEN_VALUE;
636 else if ( c == cDecimalSep )
637 return TOKEN_CHAR_VALUE | TOKEN_VALUE;
638 using namespace i18n;
639 bool bStart = (eState == ssGetChar || eState == ssGetWordFirstChar ||
640 eState == ssRewindFromValue || eState == ssIgnoreLeadingInRewind);
641 sal_Int32 nTypes = (bStart ? nStartTypes : nContTypes);
643 //! all KParseTokens::UNI_... must be matched
644 switch ( u_charType( (sal_uInt32) c ) )
646 case U_UPPERCASE_LETTER :
647 return (nTypes & KParseTokens::UNI_UPALPHA) ?
648 (bStart ? TOKEN_CHAR_WORD : TOKEN_WORD) :
649 TOKEN_ILLEGAL;
650 case U_LOWERCASE_LETTER :
651 return (nTypes & KParseTokens::UNI_LOALPHA) ?
652 (bStart ? TOKEN_CHAR_WORD : TOKEN_WORD) :
653 TOKEN_ILLEGAL;
654 case U_TITLECASE_LETTER :
655 return (nTypes & KParseTokens::UNI_TITLE_ALPHA) ?
656 (bStart ? TOKEN_CHAR_WORD : TOKEN_WORD) :
657 TOKEN_ILLEGAL;
658 case U_MODIFIER_LETTER :
659 return (nTypes & KParseTokens::UNI_MODIFIER_LETTER) ?
660 (bStart ? TOKEN_CHAR_WORD : TOKEN_WORD) :
661 TOKEN_ILLEGAL;
662 case U_NON_SPACING_MARK :
663 case U_COMBINING_SPACING_MARK :
664 // Non_Spacing_Mark can't be a leading character,
665 // nor can a spacing combining mark.
666 if (bStart)
667 return TOKEN_ILLEGAL;
668 // fall through, treat it as Other_Letter.
669 case U_OTHER_LETTER :
670 return (nTypes & KParseTokens::UNI_OTHER_LETTER) ?
671 (bStart ? TOKEN_CHAR_WORD : TOKEN_WORD) :
672 TOKEN_ILLEGAL;
673 case U_DECIMAL_DIGIT_NUMBER :
674 return ((nTypes & KParseTokens::UNI_DIGIT) ?
675 (bStart ? TOKEN_CHAR_WORD : TOKEN_WORD) :
676 TOKEN_ILLEGAL) | TOKEN_DIGIT_FLAGS;
677 case U_LETTER_NUMBER :
678 return ((nTypes & KParseTokens::UNI_LETTER_NUMBER) ?
679 (bStart ? TOKEN_CHAR_WORD : TOKEN_WORD) :
680 TOKEN_ILLEGAL) | TOKEN_DIGIT_FLAGS;
681 case U_OTHER_NUMBER :
682 return ((nTypes & KParseTokens::UNI_OTHER_NUMBER) ?
683 (bStart ? TOKEN_CHAR_WORD : TOKEN_WORD) :
684 TOKEN_ILLEGAL) | TOKEN_DIGIT_FLAGS;
685 case U_SPACE_SEPARATOR :
686 return ((nTypes & KParseTokens::IGNORE_LEADING_WS) ?
687 TOKEN_CHAR_DONTCARE : (bStart ? TOKEN_CHAR_WORD : (TOKEN_CHAR_DONTCARE | TOKEN_WORD_SEP | TOKEN_VALUE_SEP) ));
690 return TOKEN_ILLEGAL;
694 UPT_FLAG_TYPE cclass_Unicode::getStartCharsFlags( sal_Unicode c )
696 if ( pStart )
698 const sal_Unicode* pStr = aStartChars.getStr();
699 const sal_Unicode* p = StrChr( pStr, c );
700 if ( p )
701 return pStart[ p - pStr ];
703 return TOKEN_ILLEGAL;
707 UPT_FLAG_TYPE cclass_Unicode::getContCharsFlags( sal_Unicode c )
709 if ( pCont )
711 const sal_Unicode* pStr = aContChars.getStr();
712 const sal_Unicode* p = StrChr( pStr, c );
713 if ( p )
714 return pCont[ p - pStr ];
716 return TOKEN_ILLEGAL;
720 void cclass_Unicode::parseText( ParseResult& r, const OUString& rText, sal_Int32 nPos, sal_Int32 nTokenType )
722 using namespace i18n;
723 const sal_Unicode* const pTextStart = rText.getStr() + nPos;
724 eState = ssGetChar;
726 //! All the variables below (plus ParseResult) have to be resetted on ssRewindFromValue!
727 const sal_Unicode* pSym = pTextStart;
728 const sal_Unicode* pSrc = pSym;
729 OUString aSymbol;
730 sal_Unicode c = *pSrc;
731 sal_Unicode cLast = 0;
732 int nDecSeps = 0;
733 bool bQuote = false;
734 bool bMightBeWord = true;
735 bool bMightBeWordLast = true;
736 //! All the variables above (plus ParseResult) have to be resetted on ssRewindFromValue!
738 while ( (c != 0) && (eState != ssStop) )
740 UPT_FLAG_TYPE nMask = getFlags( pTextStart, pSrc - pTextStart );
741 if ( nMask & TOKEN_EXCLUDED )
742 eState = ssBounce;
743 if ( bMightBeWord )
744 { // only relevant for ssGetValue fall back
745 if ( eState == ssGetChar || eState == ssRewindFromValue ||
746 eState == ssIgnoreLeadingInRewind )
747 bMightBeWord = ((nMask & TOKEN_CHAR_WORD) != 0);
748 else
749 bMightBeWord = ((nMask & TOKEN_WORD) != 0);
751 sal_Int32 nParseTokensType = getParseTokensType( pTextStart, pSrc - pTextStart );
752 pSrc++;
753 switch (eState)
755 case ssGetChar :
756 case ssRewindFromValue :
757 case ssIgnoreLeadingInRewind :
759 if ( (nMask & TOKEN_CHAR_VALUE) && eState != ssRewindFromValue
760 && eState != ssIgnoreLeadingInRewind )
761 { //! must be first, may fall back to ssGetWord via bMightBeWord
762 eState = ssGetValue;
763 if ( nMask & TOKEN_VALUE_DIGIT )
765 if ( 128 <= c )
766 r.TokenType = KParseType::UNI_NUMBER;
767 else
768 r.TokenType = KParseType::ASC_NUMBER;
770 else if ( c == cDecimalSep )
772 if ( *pSrc )
773 ++nDecSeps;
774 else
775 eState = ssRewindFromValue;
776 // retry for ONE_SINGLE_CHAR or others
779 else if ( nMask & TOKEN_CHAR_WORD )
781 eState = ssGetWord;
782 r.TokenType = KParseType::IDENTNAME;
784 else if ( nMask & TOKEN_NAME_SEP )
786 eState = ssGetWordFirstChar;
787 bQuote = true;
788 pSym++;
789 nParseTokensType = 0; // will be taken of first real character
790 r.TokenType = KParseType::SINGLE_QUOTE_NAME;
792 else if ( nMask & TOKEN_CHAR_STRING )
794 eState = ssGetString;
795 pSym++;
796 nParseTokensType = 0; // will be taken of first real character
797 r.TokenType = KParseType::DOUBLE_QUOTE_STRING;
799 else if ( nMask & TOKEN_CHAR_DONTCARE )
801 if ( nStartTypes & KParseTokens::IGNORE_LEADING_WS )
803 if (eState == ssRewindFromValue)
804 eState = ssIgnoreLeadingInRewind;
805 r.LeadingWhiteSpace++;
806 pSym++;
807 nParseTokensType = 0; // wait until real character
808 bMightBeWord = true;
810 else
811 eState = ssBounce;
813 else if ( nMask & TOKEN_CHAR_BOOL )
815 eState = ssGetBool;
816 r.TokenType = KParseType::BOOLEAN;
818 else if ( nMask & TOKEN_CHAR )
819 { //! must be last
820 eState = ssStop;
821 r.TokenType = KParseType::ONE_SINGLE_CHAR;
823 else
824 eState = ssBounce; // not known
826 break;
827 case ssGetValue :
829 if ( nMask & TOKEN_VALUE_DIGIT )
831 if ( 128 <= c )
832 r.TokenType = KParseType::UNI_NUMBER;
833 else if ( r.TokenType != KParseType::UNI_NUMBER )
834 r.TokenType = KParseType::ASC_NUMBER;
836 if ( nMask & TOKEN_VALUE )
838 if ( c == cDecimalSep && ++nDecSeps > 1 )
840 if ( pSrc - pTextStart == 2 )
841 eState = ssRewindFromValue;
842 // consecutive separators
843 else
844 eState = ssStopBack;
846 // else keep it going
848 else if ( c == 'E' || c == 'e' )
850 UPT_FLAG_TYPE nNext = getFlags( pTextStart, pSrc - pTextStart );
851 if ( nNext & TOKEN_VALUE_EXP )
852 ; // keep it going
853 else if ( bMightBeWord && ((nNext & TOKEN_WORD) || !*pSrc) )
854 { // might be a numerical name (1.2efg)
855 eState = ssGetWord;
856 r.TokenType = KParseType::IDENTNAME;
858 else
859 eState = ssStopBack;
861 else if ( nMask & TOKEN_VALUE_SIGN )
863 if ( (cLast == 'E') || (cLast == 'e') )
865 UPT_FLAG_TYPE nNext = getFlags( pTextStart, pSrc - pTextStart );
866 if ( nNext & TOKEN_VALUE_EXP_VALUE )
867 ; // keep it going
868 else if ( bMightBeWord && ((nNext & TOKEN_WORD) || !*pSrc) )
869 { // might be a numerical name (1.2e+fg)
870 eState = ssGetWord;
871 r.TokenType = KParseType::IDENTNAME;
873 else
874 eState = ssStopBack;
876 else if ( bMightBeWord )
877 { // might be a numerical name (1.2+fg)
878 eState = ssGetWord;
879 r.TokenType = KParseType::IDENTNAME;
881 else
882 eState = ssStopBack;
884 else if ( bMightBeWord && (nMask & TOKEN_WORD) )
885 { // might be a numerical name (1995.A1)
886 eState = ssGetWord;
887 r.TokenType = KParseType::IDENTNAME;
889 else
890 eState = ssStopBack;
892 break;
893 case ssGetWordFirstChar :
894 eState = ssGetWord;
895 // fall thru
896 case ssGetWord :
898 if ( nMask & TOKEN_WORD )
899 ; // keep it going
900 else if ( nMask & TOKEN_NAME_SEP )
902 if ( bQuote )
904 if ( cLast == '\\' )
905 { // escaped
906 aSymbol += OUString( pSym, pSrc - pSym - 2 );
907 aSymbol += OUString( &c, 1);
909 else
911 eState = ssStop;
912 aSymbol += OUString( pSym, pSrc - pSym - 1 );
914 pSym = pSrc;
916 else
917 eState = ssStopBack;
919 else if ( bQuote )
920 ; // keep it going
921 else
922 eState = ssStopBack;
924 break;
925 case ssGetString :
927 if ( nMask & TOKEN_STRING_SEP )
929 if ( cLast == '\\' )
930 { // escaped
931 aSymbol += OUString( pSym, pSrc - pSym - 2 );
932 aSymbol += OUString( &c, 1);
934 else if ( c == *pSrc &&
935 !(nContTypes & KParseTokens::TWO_DOUBLE_QUOTES_BREAK_STRING) )
936 { // "" => literal " escaped
937 aSymbol += OUString( pSym, pSrc - pSym );
938 pSrc++;
940 else
942 eState = ssStop;
943 aSymbol += OUString( pSym, pSrc - pSym - 1 );
945 pSym = pSrc;
948 break;
949 case ssGetBool :
951 if ( (nMask & TOKEN_BOOL) )
952 eState = ssStop; // maximum 2: <, >, <>, <=, >=
953 else
954 eState = ssStopBack;
956 break;
957 case ssStopBack :
958 case ssBounce :
959 case ssStop :
960 ; // nothing, no compiler warning
961 break;
963 if ( eState == ssRewindFromValue )
965 r = ParseResult();
966 pSym = pTextStart;
967 pSrc = pSym;
968 aSymbol = OUString();
969 c = *pSrc;
970 cLast = 0;
971 nDecSeps = 0;
972 bQuote = false;
973 bMightBeWord = true;
974 bMightBeWordLast = true;
976 else
978 if ( !(r.TokenType & nTokenType) )
980 if ( (r.TokenType & (KParseType::ASC_NUMBER | KParseType::UNI_NUMBER))
981 && (nTokenType & KParseType::IDENTNAME) && bMightBeWord )
982 ; // keep a number that might be a word
983 else if ( r.LeadingWhiteSpace == (pSrc - pTextStart) )
984 ; // keep ignored white space
985 else if ( !r.TokenType && eState == ssGetValue && (nMask & TOKEN_VALUE_SEP) )
986 ; // keep uncertain value
987 else
988 eState = ssBounce;
990 if ( eState == ssBounce )
992 r.TokenType = 0;
993 eState = ssStopBack;
995 if ( eState == ssStopBack )
996 { // put back
997 pSrc--;
998 bMightBeWord = bMightBeWordLast;
999 eState = ssStop;
1001 if ( eState != ssStop )
1003 if ( !r.StartFlags )
1004 r.StartFlags |= nParseTokensType;
1005 else
1006 r.ContFlags |= nParseTokensType;
1008 bMightBeWordLast = bMightBeWord;
1009 cLast = c;
1010 c = *pSrc;
1013 // r.CharLen is the length in characters (not code points) of the parsed
1014 // token not including any leading white space, change this calculation if
1015 // multi-code-point Unicode characters are to be supported.
1016 r.CharLen = pSrc - pTextStart - r.LeadingWhiteSpace;
1017 r.EndPos = nPos + (pSrc - pTextStart);
1018 if ( r.TokenType & KParseType::ASC_NUMBER )
1020 r.Value = rtl_math_uStringToDouble( pTextStart + r.LeadingWhiteSpace,
1021 pTextStart + r.EndPos, cDecimalSep, cGroupSep, NULL, NULL );
1022 if ( bMightBeWord )
1023 r.TokenType |= KParseType::IDENTNAME;
1025 else if ( r.TokenType & KParseType::UNI_NUMBER )
1027 if ( !xNatNumSup.is() )
1029 #define NATIVENUMBERSUPPLIER_SERVICENAME "com.sun.star.i18n.NativeNumberSupplier"
1030 if ( xMSF.is() )
1032 xNatNumSup = Reference< XNativeNumberSupplier > (
1033 xMSF->createInstance( OUString(
1034 RTL_CONSTASCII_USTRINGPARAM(
1035 NATIVENUMBERSUPPLIER_SERVICENAME ) ) ),
1036 UNO_QUERY );
1038 if ( !xNatNumSup.is() )
1040 throw RuntimeException( OUString(
1041 #ifndef PRODUCT
1042 RTL_CONSTASCII_USTRINGPARAM(
1043 "cclass_Unicode::parseText: can't instanciate "
1044 NATIVENUMBERSUPPLIER_SERVICENAME )
1045 #endif
1046 ), *this );
1048 #undef NATIVENUMBERSUPPLIER_SERVICENAME
1050 OUString aTmp( pTextStart + r.LeadingWhiteSpace, r.EndPos - nPos +
1051 r.LeadingWhiteSpace );
1052 // transliterate to ASCII
1053 aTmp = xNatNumSup->getNativeNumberString( aTmp, aParserLocale,
1054 NativeNumberMode::NATNUM0 );
1055 r.Value = ::rtl::math::stringToDouble( aTmp, cDecimalSep, cGroupSep, NULL, NULL );
1056 if ( bMightBeWord )
1057 r.TokenType |= KParseType::IDENTNAME;
1059 else if ( r.TokenType & (KParseType::SINGLE_QUOTE_NAME | KParseType::DOUBLE_QUOTE_STRING) )
1061 if ( pSym < pSrc )
1062 { //! open quote
1063 aSymbol += OUString( pSym, pSrc - pSym );
1064 r.TokenType |= KParseType::MISSING_QUOTE;
1066 r.DequotedNameOrString = aSymbol;
1070 } } } }