Branch libreoffice-5-0-4
[LibreOffice.git] / i18npool / source / characterclassification / cclass_unicode_parser.cxx
blob08c127e4c940c60ca441611d93a8430a547856a7
1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2 /*
3 * This file is part of the LibreOffice project.
5 * This Source Code Form is subject to the terms of the Mozilla Public
6 * License, v. 2.0. If a copy of the MPL was not distributed with this
7 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
9 * This file incorporates work covered by the following license notice:
11 * Licensed to the Apache Software Foundation (ASF) under one or more
12 * contributor license agreements. See the NOTICE file distributed
13 * with this work for additional information regarding copyright
14 * ownership. The ASF licenses this file to you under the Apache
15 * License, Version 2.0 (the "License"); you may not use this file
16 * except in compliance with the License. You may obtain a copy of
17 * the License at http://www.apache.org/licenses/LICENSE-2.0 .
21 #include <cclass_unicode.hxx>
22 #include <unicode/uchar.h>
23 #include <rtl/math.hxx>
24 #include <rtl/ustring.hxx>
25 #include <com/sun/star/i18n/KParseTokens.hpp>
26 #include <com/sun/star/i18n/KParseType.hpp>
27 #include <com/sun/star/i18n/UnicodeType.hpp>
28 #include <com/sun/star/i18n/LocaleData.hpp>
29 #include <com/sun/star/i18n/NativeNumberMode.hpp>
30 #include <com/sun/star/i18n/NativeNumberSupplier.hpp>
31 #include <comphelper/processfactory.hxx>
33 #include <string.h>
35 using namespace ::com::sun::star::uno;
36 using namespace ::com::sun::star::lang;
38 namespace com { namespace sun { namespace star { namespace i18n {
40 const UPT_FLAG_TYPE cclass_Unicode::TOKEN_ILLEGAL = 0x00000000;
41 const UPT_FLAG_TYPE cclass_Unicode::TOKEN_CHAR = 0x00000001;
42 const UPT_FLAG_TYPE cclass_Unicode::TOKEN_CHAR_BOOL = 0x00000002;
43 const UPT_FLAG_TYPE cclass_Unicode::TOKEN_CHAR_WORD = 0x00000004;
44 const UPT_FLAG_TYPE cclass_Unicode::TOKEN_CHAR_VALUE = 0x00000008;
45 const UPT_FLAG_TYPE cclass_Unicode::TOKEN_CHAR_STRING = 0x00000010;
46 const UPT_FLAG_TYPE cclass_Unicode::TOKEN_CHAR_DONTCARE= 0x00000020;
47 const UPT_FLAG_TYPE cclass_Unicode::TOKEN_BOOL = 0x00000040;
48 const UPT_FLAG_TYPE cclass_Unicode::TOKEN_WORD = 0x00000080;
49 const UPT_FLAG_TYPE cclass_Unicode::TOKEN_WORD_SEP = 0x00000100;
50 const UPT_FLAG_TYPE cclass_Unicode::TOKEN_VALUE = 0x00000200;
51 const UPT_FLAG_TYPE cclass_Unicode::TOKEN_VALUE_SEP = 0x00000400;
52 const UPT_FLAG_TYPE cclass_Unicode::TOKEN_VALUE_EXP = 0x00000800;
53 const UPT_FLAG_TYPE cclass_Unicode::TOKEN_VALUE_SIGN = 0x00001000;
54 const UPT_FLAG_TYPE cclass_Unicode::TOKEN_VALUE_EXP_VALUE = 0x00002000;
55 const UPT_FLAG_TYPE cclass_Unicode::TOKEN_VALUE_DIGIT = 0x00004000;
56 const UPT_FLAG_TYPE cclass_Unicode::TOKEN_NAME_SEP = 0x20000000;
57 const UPT_FLAG_TYPE cclass_Unicode::TOKEN_STRING_SEP = 0x40000000;
58 const UPT_FLAG_TYPE cclass_Unicode::TOKEN_EXCLUDED = 0x80000000;
60 #define TOKEN_DIGIT_FLAGS (TOKEN_CHAR_VALUE | TOKEN_VALUE | TOKEN_VALUE_EXP | TOKEN_VALUE_EXP_VALUE | TOKEN_VALUE_DIGIT)
62 // Default identifier/name specification is [A-Za-z_][A-Za-z0-9_]*
64 const sal_uInt8 cclass_Unicode::nDefCnt = 128;
65 const UPT_FLAG_TYPE cclass_Unicode::pDefaultParserTable[ nDefCnt ] =
67 // (...) == Calc formula compiler specific, commented out and modified
69 /* \0 */ TOKEN_EXCLUDED,
70 TOKEN_ILLEGAL,
71 TOKEN_ILLEGAL,
72 TOKEN_ILLEGAL,
73 TOKEN_ILLEGAL,
74 TOKEN_ILLEGAL,
75 TOKEN_ILLEGAL,
76 TOKEN_ILLEGAL,
77 TOKEN_ILLEGAL,
78 /* 9 \t */ TOKEN_CHAR_DONTCARE | TOKEN_WORD_SEP | TOKEN_VALUE_SEP, // (TOKEN_ILLEGAL)
79 TOKEN_ILLEGAL,
80 /* 11 \v */ TOKEN_CHAR_DONTCARE | TOKEN_WORD_SEP | TOKEN_VALUE_SEP, // (TOKEN_ILLEGAL)
81 TOKEN_ILLEGAL,
82 TOKEN_ILLEGAL,
83 TOKEN_ILLEGAL,
84 TOKEN_ILLEGAL,
85 TOKEN_ILLEGAL,
86 TOKEN_ILLEGAL,
87 TOKEN_ILLEGAL,
88 TOKEN_ILLEGAL,
89 TOKEN_ILLEGAL,
90 TOKEN_ILLEGAL,
91 TOKEN_ILLEGAL,
92 TOKEN_ILLEGAL,
93 TOKEN_ILLEGAL,
94 TOKEN_ILLEGAL,
95 TOKEN_ILLEGAL,
96 TOKEN_ILLEGAL,
97 TOKEN_ILLEGAL,
98 TOKEN_ILLEGAL,
99 TOKEN_ILLEGAL,
100 TOKEN_ILLEGAL,
101 /* 32 */ TOKEN_CHAR_DONTCARE | TOKEN_WORD_SEP | TOKEN_VALUE_SEP,
102 /* 33 ! */ TOKEN_CHAR | TOKEN_WORD_SEP | TOKEN_VALUE_SEP,
103 /* 34 " */ TOKEN_CHAR_STRING | TOKEN_STRING_SEP,
104 /* 35 # */ TOKEN_CHAR | TOKEN_WORD_SEP | TOKEN_VALUE_SEP, // (TOKEN_WORD_SEP)
105 /* 36 $ */ TOKEN_CHAR | TOKEN_WORD_SEP | TOKEN_VALUE_SEP, // (TOKEN_CHAR_WORD | TOKEN_WORD)
106 /* 37 % */ TOKEN_CHAR | TOKEN_WORD_SEP | TOKEN_VALUE_SEP, // (TOKEN_VALUE)
107 /* 38 & */ TOKEN_CHAR | TOKEN_WORD_SEP | TOKEN_VALUE_SEP,
108 /* 39 ' */ TOKEN_NAME_SEP,
109 /* 40 ( */ TOKEN_CHAR | TOKEN_WORD_SEP | TOKEN_VALUE_SEP,
110 /* 41 ) */ TOKEN_CHAR | TOKEN_WORD_SEP | TOKEN_VALUE_SEP,
111 /* 42 * */ TOKEN_CHAR | TOKEN_WORD_SEP | TOKEN_VALUE_SEP,
112 /* 43 + */ TOKEN_CHAR | TOKEN_WORD_SEP | TOKEN_VALUE_SEP | TOKEN_VALUE_EXP | TOKEN_VALUE_SIGN,
113 /* 44 , */ TOKEN_CHAR | TOKEN_WORD_SEP | TOKEN_VALUE_SEP, // (TOKEN_CHAR_VALUE | TOKEN_VALUE)
114 /* 45 - */ TOKEN_CHAR | TOKEN_WORD_SEP | TOKEN_VALUE_SEP | TOKEN_VALUE_EXP | TOKEN_VALUE_SIGN,
115 /* 46 . */ TOKEN_CHAR | TOKEN_WORD_SEP | TOKEN_VALUE_SEP, // (TOKEN_WORD | TOKEN_CHAR_VALUE | TOKEN_VALUE)
116 /* 47 / */ TOKEN_CHAR | TOKEN_WORD_SEP | TOKEN_VALUE_SEP,
117 //for ( i = 48; i < 58; i++ )
118 /* 48 0 */ TOKEN_DIGIT_FLAGS | TOKEN_WORD,
119 /* 49 1 */ TOKEN_DIGIT_FLAGS | TOKEN_WORD,
120 /* 50 2 */ TOKEN_DIGIT_FLAGS | TOKEN_WORD,
121 /* 51 3 */ TOKEN_DIGIT_FLAGS | TOKEN_WORD,
122 /* 52 4 */ TOKEN_DIGIT_FLAGS | TOKEN_WORD,
123 /* 53 5 */ TOKEN_DIGIT_FLAGS | TOKEN_WORD,
124 /* 54 6 */ TOKEN_DIGIT_FLAGS | TOKEN_WORD,
125 /* 55 7 */ TOKEN_DIGIT_FLAGS | TOKEN_WORD,
126 /* 56 8 */ TOKEN_DIGIT_FLAGS | TOKEN_WORD,
127 /* 57 9 */ TOKEN_DIGIT_FLAGS | TOKEN_WORD,
128 /* 58 : */ TOKEN_CHAR | TOKEN_WORD_SEP | TOKEN_VALUE_SEP, // (TOKEN_WORD)
129 /* 59 ; */ TOKEN_CHAR | TOKEN_WORD_SEP | TOKEN_VALUE_SEP,
130 /* 60 < */ TOKEN_CHAR_BOOL | TOKEN_WORD_SEP | TOKEN_VALUE_SEP,
131 /* 61 = */ TOKEN_CHAR | TOKEN_BOOL | TOKEN_WORD_SEP | TOKEN_VALUE_SEP,
132 /* 62 > */ TOKEN_CHAR_BOOL | TOKEN_BOOL | TOKEN_WORD_SEP | TOKEN_VALUE_SEP,
133 /* 63 ? */ TOKEN_CHAR | TOKEN_WORD_SEP | TOKEN_VALUE_SEP, // (TOKEN_CHAR_WORD | TOKEN_WORD)
134 /* 64 @ */ TOKEN_CHAR | TOKEN_WORD_SEP | TOKEN_VALUE_SEP, // (TOKEN_ILLEGAL // UNUSED)
135 //for ( i = 65; i < 91; i++ )
136 /* 65 A */ TOKEN_CHAR_WORD | TOKEN_WORD,
137 /* 66 B */ TOKEN_CHAR_WORD | TOKEN_WORD,
138 /* 67 C */ TOKEN_CHAR_WORD | TOKEN_WORD,
139 /* 68 D */ TOKEN_CHAR_WORD | TOKEN_WORD,
140 /* 69 E */ TOKEN_CHAR_WORD | TOKEN_WORD,
141 /* 70 F */ TOKEN_CHAR_WORD | TOKEN_WORD,
142 /* 71 G */ TOKEN_CHAR_WORD | TOKEN_WORD,
143 /* 72 H */ TOKEN_CHAR_WORD | TOKEN_WORD,
144 /* 73 I */ TOKEN_CHAR_WORD | TOKEN_WORD,
145 /* 74 J */ TOKEN_CHAR_WORD | TOKEN_WORD,
146 /* 75 K */ TOKEN_CHAR_WORD | TOKEN_WORD,
147 /* 76 L */ TOKEN_CHAR_WORD | TOKEN_WORD,
148 /* 77 M */ TOKEN_CHAR_WORD | TOKEN_WORD,
149 /* 78 N */ TOKEN_CHAR_WORD | TOKEN_WORD,
150 /* 79 O */ TOKEN_CHAR_WORD | TOKEN_WORD,
151 /* 80 P */ TOKEN_CHAR_WORD | TOKEN_WORD,
152 /* 81 Q */ TOKEN_CHAR_WORD | TOKEN_WORD,
153 /* 82 R */ TOKEN_CHAR_WORD | TOKEN_WORD,
154 /* 83 S */ TOKEN_CHAR_WORD | TOKEN_WORD,
155 /* 84 T */ TOKEN_CHAR_WORD | TOKEN_WORD,
156 /* 85 U */ TOKEN_CHAR_WORD | TOKEN_WORD,
157 /* 86 V */ TOKEN_CHAR_WORD | TOKEN_WORD,
158 /* 87 W */ TOKEN_CHAR_WORD | TOKEN_WORD,
159 /* 88 X */ TOKEN_CHAR_WORD | TOKEN_WORD,
160 /* 89 Y */ TOKEN_CHAR_WORD | TOKEN_WORD,
161 /* 90 Z */ TOKEN_CHAR_WORD | TOKEN_WORD,
162 /* 91 [ */ TOKEN_CHAR | TOKEN_WORD_SEP | TOKEN_VALUE_SEP, // (TOKEN_ILLEGAL // UNUSED)
163 /* 92 \ */ TOKEN_CHAR | TOKEN_WORD_SEP | TOKEN_VALUE_SEP, // (TOKEN_ILLEGAL // UNUSED)
164 /* 93 ] */ TOKEN_CHAR | TOKEN_WORD_SEP | TOKEN_VALUE_SEP, // (TOKEN_ILLEGAL // UNUSED)
165 /* 94 ^ */ TOKEN_CHAR | TOKEN_WORD_SEP | TOKEN_VALUE_SEP,
166 /* 95 _ */ TOKEN_CHAR_WORD | TOKEN_WORD,
167 /* 96 ` */ TOKEN_CHAR | TOKEN_WORD_SEP | TOKEN_VALUE_SEP, // (TOKEN_ILLEGAL // UNUSED)
168 //for ( i = 97; i < 123; i++ )
169 /* 97 a */ TOKEN_CHAR_WORD | TOKEN_WORD,
170 /* 98 b */ TOKEN_CHAR_WORD | TOKEN_WORD,
171 /* 99 c */ TOKEN_CHAR_WORD | TOKEN_WORD,
172 /* 100 d */ TOKEN_CHAR_WORD | TOKEN_WORD,
173 /* 101 e */ TOKEN_CHAR_WORD | TOKEN_WORD,
174 /* 102 f */ TOKEN_CHAR_WORD | TOKEN_WORD,
175 /* 103 g */ TOKEN_CHAR_WORD | TOKEN_WORD,
176 /* 104 h */ TOKEN_CHAR_WORD | TOKEN_WORD,
177 /* 105 i */ TOKEN_CHAR_WORD | TOKEN_WORD,
178 /* 106 j */ TOKEN_CHAR_WORD | TOKEN_WORD,
179 /* 107 k */ TOKEN_CHAR_WORD | TOKEN_WORD,
180 /* 108 l */ TOKEN_CHAR_WORD | TOKEN_WORD,
181 /* 109 m */ TOKEN_CHAR_WORD | TOKEN_WORD,
182 /* 110 n */ TOKEN_CHAR_WORD | TOKEN_WORD,
183 /* 111 o */ TOKEN_CHAR_WORD | TOKEN_WORD,
184 /* 112 p */ TOKEN_CHAR_WORD | TOKEN_WORD,
185 /* 113 q */ TOKEN_CHAR_WORD | TOKEN_WORD,
186 /* 114 r */ TOKEN_CHAR_WORD | TOKEN_WORD,
187 /* 115 s */ TOKEN_CHAR_WORD | TOKEN_WORD,
188 /* 116 t */ TOKEN_CHAR_WORD | TOKEN_WORD,
189 /* 117 u */ TOKEN_CHAR_WORD | TOKEN_WORD,
190 /* 118 v */ TOKEN_CHAR_WORD | TOKEN_WORD,
191 /* 119 w */ TOKEN_CHAR_WORD | TOKEN_WORD,
192 /* 120 x */ TOKEN_CHAR_WORD | TOKEN_WORD,
193 /* 121 y */ TOKEN_CHAR_WORD | TOKEN_WORD,
194 /* 122 z */ TOKEN_CHAR_WORD | TOKEN_WORD,
195 /* 123 { */ TOKEN_CHAR | TOKEN_WORD_SEP | TOKEN_VALUE_SEP, // (TOKEN_ILLEGAL // UNUSED)
196 /* 124 | */ TOKEN_CHAR | TOKEN_WORD_SEP | TOKEN_VALUE_SEP, // (TOKEN_ILLEGAL // UNUSED)
197 /* 125 } */ TOKEN_CHAR | TOKEN_WORD_SEP | TOKEN_VALUE_SEP, // (TOKEN_ILLEGAL // UNUSED)
198 /* 126 ~ */ TOKEN_CHAR | TOKEN_WORD_SEP | TOKEN_VALUE_SEP, // (TOKEN_ILLEGAL // UNUSED)
199 /* 127 */ TOKEN_CHAR | TOKEN_WORD_SEP | TOKEN_VALUE_SEP // (TOKEN_ILLEGAL // UNUSED)
203 const sal_Int32 cclass_Unicode::pParseTokensType[ nDefCnt ] =
205 /* \0 */ KParseTokens::ASC_OTHER,
206 KParseTokens::ASC_CONTROL,
207 KParseTokens::ASC_CONTROL,
208 KParseTokens::ASC_CONTROL,
209 KParseTokens::ASC_CONTROL,
210 KParseTokens::ASC_CONTROL,
211 KParseTokens::ASC_CONTROL,
212 KParseTokens::ASC_CONTROL,
213 KParseTokens::ASC_CONTROL,
214 /* 9 \t */ KParseTokens::ASC_CONTROL,
215 KParseTokens::ASC_CONTROL,
216 /* 11 \v */ KParseTokens::ASC_CONTROL,
217 KParseTokens::ASC_CONTROL,
218 KParseTokens::ASC_CONTROL,
219 KParseTokens::ASC_CONTROL,
220 KParseTokens::ASC_CONTROL,
221 KParseTokens::ASC_CONTROL,
222 KParseTokens::ASC_CONTROL,
223 KParseTokens::ASC_CONTROL,
224 KParseTokens::ASC_CONTROL,
225 KParseTokens::ASC_CONTROL,
226 KParseTokens::ASC_CONTROL,
227 KParseTokens::ASC_CONTROL,
228 KParseTokens::ASC_CONTROL,
229 KParseTokens::ASC_CONTROL,
230 KParseTokens::ASC_CONTROL,
231 KParseTokens::ASC_CONTROL,
232 KParseTokens::ASC_CONTROL,
233 KParseTokens::ASC_CONTROL,
234 KParseTokens::ASC_CONTROL,
235 KParseTokens::ASC_CONTROL,
236 KParseTokens::ASC_CONTROL,
237 /* 32 */ KParseTokens::ASC_OTHER,
238 /* 33 ! */ KParseTokens::ASC_OTHER,
239 /* 34 " */ KParseTokens::ASC_OTHER,
240 /* 35 # */ KParseTokens::ASC_OTHER,
241 /* 36 $ */ KParseTokens::ASC_DOLLAR,
242 /* 37 % */ KParseTokens::ASC_OTHER,
243 /* 38 & */ KParseTokens::ASC_OTHER,
244 /* 39 ' */ KParseTokens::ASC_OTHER,
245 /* 40 ( */ KParseTokens::ASC_OTHER,
246 /* 41 ) */ KParseTokens::ASC_OTHER,
247 /* 42 * */ KParseTokens::ASC_OTHER,
248 /* 43 + */ KParseTokens::ASC_OTHER,
249 /* 44 , */ KParseTokens::ASC_OTHER,
250 /* 45 - */ KParseTokens::ASC_OTHER,
251 /* 46 . */ KParseTokens::ASC_DOT,
252 /* 47 / */ KParseTokens::ASC_OTHER,
253 //for ( i = 48; i < 58; i++ )
254 /* 48 0 */ KParseTokens::ASC_DIGIT,
255 /* 49 1 */ KParseTokens::ASC_DIGIT,
256 /* 50 2 */ KParseTokens::ASC_DIGIT,
257 /* 51 3 */ KParseTokens::ASC_DIGIT,
258 /* 52 4 */ KParseTokens::ASC_DIGIT,
259 /* 53 5 */ KParseTokens::ASC_DIGIT,
260 /* 54 6 */ KParseTokens::ASC_DIGIT,
261 /* 55 7 */ KParseTokens::ASC_DIGIT,
262 /* 56 8 */ KParseTokens::ASC_DIGIT,
263 /* 57 9 */ KParseTokens::ASC_DIGIT,
264 /* 58 : */ KParseTokens::ASC_COLON,
265 /* 59 ; */ KParseTokens::ASC_OTHER,
266 /* 60 < */ KParseTokens::ASC_OTHER,
267 /* 61 = */ KParseTokens::ASC_OTHER,
268 /* 62 > */ KParseTokens::ASC_OTHER,
269 /* 63 ? */ KParseTokens::ASC_OTHER,
270 /* 64 @ */ KParseTokens::ASC_OTHER,
271 //for ( i = 65; i < 91; i++ )
272 /* 65 A */ KParseTokens::ASC_UPALPHA,
273 /* 66 B */ KParseTokens::ASC_UPALPHA,
274 /* 67 C */ KParseTokens::ASC_UPALPHA,
275 /* 68 D */ KParseTokens::ASC_UPALPHA,
276 /* 69 E */ KParseTokens::ASC_UPALPHA,
277 /* 70 F */ KParseTokens::ASC_UPALPHA,
278 /* 71 G */ KParseTokens::ASC_UPALPHA,
279 /* 72 H */ KParseTokens::ASC_UPALPHA,
280 /* 73 I */ KParseTokens::ASC_UPALPHA,
281 /* 74 J */ KParseTokens::ASC_UPALPHA,
282 /* 75 K */ KParseTokens::ASC_UPALPHA,
283 /* 76 L */ KParseTokens::ASC_UPALPHA,
284 /* 77 M */ KParseTokens::ASC_UPALPHA,
285 /* 78 N */ KParseTokens::ASC_UPALPHA,
286 /* 79 O */ KParseTokens::ASC_UPALPHA,
287 /* 80 P */ KParseTokens::ASC_UPALPHA,
288 /* 81 Q */ KParseTokens::ASC_UPALPHA,
289 /* 82 R */ KParseTokens::ASC_UPALPHA,
290 /* 83 S */ KParseTokens::ASC_UPALPHA,
291 /* 84 T */ KParseTokens::ASC_UPALPHA,
292 /* 85 U */ KParseTokens::ASC_UPALPHA,
293 /* 86 V */ KParseTokens::ASC_UPALPHA,
294 /* 87 W */ KParseTokens::ASC_UPALPHA,
295 /* 88 X */ KParseTokens::ASC_UPALPHA,
296 /* 89 Y */ KParseTokens::ASC_UPALPHA,
297 /* 90 Z */ KParseTokens::ASC_UPALPHA,
298 /* 91 [ */ KParseTokens::ASC_OTHER,
299 /* 92 \ */ KParseTokens::ASC_OTHER,
300 /* 93 ] */ KParseTokens::ASC_OTHER,
301 /* 94 ^ */ KParseTokens::ASC_OTHER,
302 /* 95 _ */ KParseTokens::ASC_UNDERSCORE,
303 /* 96 ` */ KParseTokens::ASC_OTHER,
304 //for ( i = 97; i < 123; i++ )
305 /* 97 a */ KParseTokens::ASC_LOALPHA,
306 /* 98 b */ KParseTokens::ASC_LOALPHA,
307 /* 99 c */ KParseTokens::ASC_LOALPHA,
308 /* 100 d */ KParseTokens::ASC_LOALPHA,
309 /* 101 e */ KParseTokens::ASC_LOALPHA,
310 /* 102 f */ KParseTokens::ASC_LOALPHA,
311 /* 103 g */ KParseTokens::ASC_LOALPHA,
312 /* 104 h */ KParseTokens::ASC_LOALPHA,
313 /* 105 i */ KParseTokens::ASC_LOALPHA,
314 /* 106 j */ KParseTokens::ASC_LOALPHA,
315 /* 107 k */ KParseTokens::ASC_LOALPHA,
316 /* 108 l */ KParseTokens::ASC_LOALPHA,
317 /* 109 m */ KParseTokens::ASC_LOALPHA,
318 /* 110 n */ KParseTokens::ASC_LOALPHA,
319 /* 111 o */ KParseTokens::ASC_LOALPHA,
320 /* 112 p */ KParseTokens::ASC_LOALPHA,
321 /* 113 q */ KParseTokens::ASC_LOALPHA,
322 /* 114 r */ KParseTokens::ASC_LOALPHA,
323 /* 115 s */ KParseTokens::ASC_LOALPHA,
324 /* 116 t */ KParseTokens::ASC_LOALPHA,
325 /* 117 u */ KParseTokens::ASC_LOALPHA,
326 /* 118 v */ KParseTokens::ASC_LOALPHA,
327 /* 119 w */ KParseTokens::ASC_LOALPHA,
328 /* 120 x */ KParseTokens::ASC_LOALPHA,
329 /* 121 y */ KParseTokens::ASC_LOALPHA,
330 /* 122 z */ KParseTokens::ASC_LOALPHA,
331 /* 123 { */ KParseTokens::ASC_OTHER,
332 /* 124 | */ KParseTokens::ASC_OTHER,
333 /* 125 } */ KParseTokens::ASC_OTHER,
334 /* 126 ~ */ KParseTokens::ASC_OTHER,
335 /* 127 */ KParseTokens::ASC_OTHER
339 // static
340 const sal_Unicode* cclass_Unicode::StrChr( const sal_Unicode* pStr, sal_Unicode c )
342 if ( !pStr )
343 return NULL;
344 while ( *pStr )
346 if ( *pStr == c )
347 return pStr;
348 pStr++;
350 return NULL;
354 sal_Int32 cclass_Unicode::getParseTokensType( const sal_Unicode* aStr, sal_Int32 nPos )
356 sal_Unicode c = aStr[nPos];
357 if ( c < nDefCnt )
358 return pParseTokensType[ sal_uInt8(c) ];
359 else
362 //! all KParseTokens::UNI_... must be matched
363 switch ( u_charType( (sal_uInt32) c ) )
365 case U_UPPERCASE_LETTER :
366 return KParseTokens::UNI_UPALPHA;
367 case U_LOWERCASE_LETTER :
368 return KParseTokens::UNI_LOALPHA;
369 case U_TITLECASE_LETTER :
370 return KParseTokens::UNI_TITLE_ALPHA;
371 case U_MODIFIER_LETTER :
372 return KParseTokens::UNI_MODIFIER_LETTER;
373 case U_OTHER_LETTER :
374 // Non_Spacing_Mark could not be as leading character
375 if (nPos == 0) break;
376 // fall through, treat it as Other_Letter.
377 case U_NON_SPACING_MARK :
378 return KParseTokens::UNI_OTHER_LETTER;
379 case U_DECIMAL_DIGIT_NUMBER :
380 return KParseTokens::UNI_DIGIT;
381 case U_LETTER_NUMBER :
382 return KParseTokens::UNI_LETTER_NUMBER;
383 case U_OTHER_NUMBER :
384 return KParseTokens::UNI_OTHER_NUMBER;
387 return KParseTokens::UNI_OTHER;
391 bool cclass_Unicode::setupInternational( const Locale& rLocale )
393 bool bChanged = (aParserLocale.Language != rLocale.Language
394 || aParserLocale.Country != rLocale.Country
395 || aParserLocale.Variant != rLocale.Variant);
396 if ( bChanged )
398 aParserLocale.Language = rLocale.Language;
399 aParserLocale.Country = rLocale.Country;
400 aParserLocale.Variant = rLocale.Variant;
402 if ( !mxLocaleData.is() )
404 mxLocaleData.set( LocaleData::create(m_xContext) );
406 return bChanged;
410 void cclass_Unicode::setupParserTable( const Locale& rLocale, sal_Int32 startCharTokenType,
411 const OUString& userDefinedCharactersStart, sal_Int32 contCharTokenType,
412 const OUString& userDefinedCharactersCont )
414 bool bIntlEqual = (rLocale.Language == aParserLocale.Language &&
415 rLocale.Country == aParserLocale.Country &&
416 rLocale.Variant == aParserLocale.Variant);
417 if ( !pTable || !bIntlEqual ||
418 startCharTokenType != nStartTypes ||
419 contCharTokenType != nContTypes ||
420 userDefinedCharactersStart != aStartChars ||
421 userDefinedCharactersCont != aContChars )
422 initParserTable( rLocale, startCharTokenType, userDefinedCharactersStart,
423 contCharTokenType, userDefinedCharactersCont );
427 void cclass_Unicode::initParserTable( const Locale& rLocale, sal_Int32 startCharTokenType,
428 const OUString& userDefinedCharactersStart, sal_Int32 contCharTokenType,
429 const OUString& userDefinedCharactersCont )
431 // (Re)Init
432 setupInternational( rLocale );
433 // Memory of pTable is reused.
434 if ( !pTable )
435 pTable = new UPT_FLAG_TYPE[nDefCnt];
436 memcpy( pTable, pDefaultParserTable, sizeof(UPT_FLAG_TYPE) * nDefCnt );
437 // Start and cont tables only need reallocation if different length.
438 if ( pStart && userDefinedCharactersStart.getLength() != aStartChars.getLength() )
440 delete [] pStart;
441 pStart = NULL;
443 if ( pCont && userDefinedCharactersCont.getLength() != aContChars.getLength() )
445 delete [] pCont;
446 pCont = NULL;
448 nStartTypes = startCharTokenType;
449 nContTypes = contCharTokenType;
450 aStartChars = userDefinedCharactersStart;
451 aContChars = userDefinedCharactersCont;
453 // specials
454 if( mxLocaleData.is() )
456 LocaleDataItem aItem =
457 mxLocaleData->getLocaleItem( aParserLocale );
458 //!TODO: theoretically separators may be a string, adjustment would have to be
459 //! done here and in parsing and in ::rtl::math::stringToDouble()
460 cGroupSep = aItem.thousandSeparator[0];
461 cDecimalSep = aItem.decimalSeparator[0];
464 if ( cGroupSep < nDefCnt )
465 pTable[cGroupSep] |= TOKEN_VALUE;
466 if ( cDecimalSep < nDefCnt )
467 pTable[cDecimalSep] |= TOKEN_CHAR_VALUE | TOKEN_VALUE;
469 // Modify characters according to KParseTokens definitions.
471 using namespace KParseTokens;
472 sal_uInt8 i;
474 if ( !(nStartTypes & ASC_UPALPHA) )
475 for ( i = 65; i < 91; i++ )
476 pTable[i] &= ~TOKEN_CHAR_WORD; // not allowed as start character
477 if ( !(nContTypes & ASC_UPALPHA) )
478 for ( i = 65; i < 91; i++ )
479 pTable[i] &= ~TOKEN_WORD; // not allowed as cont character
481 if ( !(nStartTypes & ASC_LOALPHA) )
482 for ( i = 97; i < 123; i++ )
483 pTable[i] &= ~TOKEN_CHAR_WORD; // not allowed as start character
484 if ( !(nContTypes & ASC_LOALPHA) )
485 for ( i = 97; i < 123; i++ )
486 pTable[i] &= ~TOKEN_WORD; // not allowed as cont character
488 if ( nStartTypes & ASC_DIGIT )
489 for ( i = 48; i < 58; i++ )
490 pTable[i] |= TOKEN_CHAR_WORD; // allowed as start character
491 if ( !(nContTypes & ASC_DIGIT) )
492 for ( i = 48; i < 58; i++ )
493 pTable[i] &= ~TOKEN_WORD; // not allowed as cont character
495 if ( !(nStartTypes & ASC_UNDERSCORE) )
496 pTable[95] &= ~TOKEN_CHAR_WORD; // not allowed as start character
497 if ( !(nContTypes & ASC_UNDERSCORE) )
498 pTable[95] &= ~TOKEN_WORD; // not allowed as cont character
500 if ( nStartTypes & ASC_DOLLAR )
501 pTable[36] |= TOKEN_CHAR_WORD; // allowed as start character
502 if ( nContTypes & ASC_DOLLAR )
503 pTable[36] |= TOKEN_WORD; // allowed as cont character
505 if ( nStartTypes & ASC_DOT )
506 pTable[46] |= TOKEN_CHAR_WORD; // allowed as start character
507 if ( nContTypes & ASC_DOT )
508 pTable[46] |= TOKEN_WORD; // allowed as cont character
510 if ( nStartTypes & ASC_COLON )
511 pTable[58] |= TOKEN_CHAR_WORD; // allowed as start character
512 if ( nContTypes & ASC_COLON )
513 pTable[58] |= TOKEN_WORD; // allowed as cont character
515 if ( nStartTypes & ASC_CONTROL )
516 for ( i = 1; i < 32; i++ )
517 pTable[i] |= TOKEN_CHAR_WORD; // allowed as start character
518 if ( nContTypes & ASC_CONTROL )
519 for ( i = 1; i < 32; i++ )
520 pTable[i] |= TOKEN_WORD; // allowed as cont character
522 if ( nStartTypes & ASC_ANY_BUT_CONTROL )
523 for ( i = 32; i < nDefCnt; i++ )
524 pTable[i] |= TOKEN_CHAR_WORD; // allowed as start character
525 if ( nContTypes & ASC_ANY_BUT_CONTROL )
526 for ( i = 32; i < nDefCnt; i++ )
527 pTable[i] |= TOKEN_WORD; // allowed as cont character
531 // Merge in (positively override with) user defined characters.
532 // StartChars
533 sal_Int32 nLen = aStartChars.getLength();
534 if ( nLen )
536 if ( !pStart )
537 pStart = new UPT_FLAG_TYPE[ nLen ];
538 const sal_Unicode* p = aStartChars.getStr();
539 for ( sal_Int32 j=0; j<nLen; j++, p++ )
541 pStart[j] = TOKEN_CHAR_WORD;
542 if ( *p < nDefCnt )
543 pTable[*p] |= TOKEN_CHAR_WORD;
546 // ContChars
547 nLen = aContChars.getLength();
548 if ( nLen )
550 if ( !pCont )
551 pCont = new UPT_FLAG_TYPE[ nLen ];
552 const sal_Unicode* p = aContChars.getStr();
553 for ( sal_Int32 j=0; j<nLen; j++ )
555 pCont[j] = TOKEN_WORD;
556 if ( *p < nDefCnt )
557 pTable[*p] |= TOKEN_WORD;
563 void cclass_Unicode::destroyParserTable()
565 if ( pCont )
566 delete [] pCont;
567 if ( pStart )
568 delete [] pStart;
569 if ( pTable )
570 delete [] pTable;
574 UPT_FLAG_TYPE cclass_Unicode::getFlags( const sal_Unicode* aStr, sal_Int32 nPos )
576 UPT_FLAG_TYPE nMask;
577 sal_Unicode c = aStr[nPos];
578 if ( c < nDefCnt )
579 nMask = pTable[ sal_uInt8(c) ];
580 else
581 nMask = getFlagsExtended( aStr, nPos );
582 switch ( eState )
584 case ssGetChar :
585 case ssRewindFromValue :
586 case ssIgnoreLeadingInRewind :
587 case ssGetWordFirstChar :
588 if ( !(nMask & TOKEN_CHAR_WORD) )
590 nMask |= getStartCharsFlags( c );
591 if ( nMask & TOKEN_CHAR_WORD )
592 nMask &= ~TOKEN_EXCLUDED;
594 break;
595 case ssGetValue :
596 case ssGetWord :
597 if ( !(nMask & TOKEN_WORD) )
599 nMask |= getContCharsFlags( c );
600 if ( nMask & TOKEN_WORD )
601 nMask &= ~TOKEN_EXCLUDED;
603 break;
604 default:
605 ; // other cases aren't needed, no compiler warning
607 return nMask;
611 UPT_FLAG_TYPE cclass_Unicode::getFlagsExtended( const sal_Unicode* aStr, sal_Int32 nPos )
613 sal_Unicode c = aStr[nPos];
614 if ( c == cGroupSep )
615 return TOKEN_VALUE;
616 else if ( c == cDecimalSep )
617 return TOKEN_CHAR_VALUE | TOKEN_VALUE;
618 using namespace i18n;
619 bool bStart = (eState == ssGetChar || eState == ssGetWordFirstChar ||
620 eState == ssRewindFromValue || eState == ssIgnoreLeadingInRewind);
621 sal_Int32 nTypes = (bStart ? nStartTypes : nContTypes);
623 //! all KParseTokens::UNI_... must be matched
624 switch ( u_charType( (sal_uInt32) c ) )
626 case U_UPPERCASE_LETTER :
627 return (nTypes & KParseTokens::UNI_UPALPHA) ?
628 (bStart ? TOKEN_CHAR_WORD : TOKEN_WORD) :
629 TOKEN_ILLEGAL;
630 case U_LOWERCASE_LETTER :
631 return (nTypes & KParseTokens::UNI_LOALPHA) ?
632 (bStart ? TOKEN_CHAR_WORD : TOKEN_WORD) :
633 TOKEN_ILLEGAL;
634 case U_TITLECASE_LETTER :
635 return (nTypes & KParseTokens::UNI_TITLE_ALPHA) ?
636 (bStart ? TOKEN_CHAR_WORD : TOKEN_WORD) :
637 TOKEN_ILLEGAL;
638 case U_MODIFIER_LETTER :
639 return (nTypes & KParseTokens::UNI_MODIFIER_LETTER) ?
640 (bStart ? TOKEN_CHAR_WORD : TOKEN_WORD) :
641 TOKEN_ILLEGAL;
642 case U_NON_SPACING_MARK :
643 case U_COMBINING_SPACING_MARK :
644 // Non_Spacing_Mark can't be a leading character,
645 // nor can a spacing combining mark.
646 if (bStart)
647 return TOKEN_ILLEGAL;
648 // fall through, treat it as Other_Letter.
649 case U_OTHER_LETTER :
650 return (nTypes & KParseTokens::UNI_OTHER_LETTER) ?
651 (bStart ? TOKEN_CHAR_WORD : TOKEN_WORD) :
652 TOKEN_ILLEGAL;
653 case U_DECIMAL_DIGIT_NUMBER :
654 return ((nTypes & KParseTokens::UNI_DIGIT) ?
655 (bStart ? TOKEN_CHAR_WORD : TOKEN_WORD) :
656 TOKEN_ILLEGAL) | TOKEN_DIGIT_FLAGS;
657 case U_LETTER_NUMBER :
658 return ((nTypes & KParseTokens::UNI_LETTER_NUMBER) ?
659 (bStart ? TOKEN_CHAR_WORD : TOKEN_WORD) :
660 TOKEN_ILLEGAL) | TOKEN_DIGIT_FLAGS;
661 case U_OTHER_NUMBER :
662 return ((nTypes & KParseTokens::UNI_OTHER_NUMBER) ?
663 (bStart ? TOKEN_CHAR_WORD : TOKEN_WORD) :
664 TOKEN_ILLEGAL) | TOKEN_DIGIT_FLAGS;
665 case U_SPACE_SEPARATOR :
666 return ((nTypes & KParseTokens::IGNORE_LEADING_WS) ?
667 TOKEN_CHAR_DONTCARE : (bStart ? TOKEN_CHAR_WORD : (TOKEN_CHAR_DONTCARE | TOKEN_WORD_SEP | TOKEN_VALUE_SEP) ));
668 case U_OTHER_PUNCTUATION:
669 // fdo#61754 Lets see (if we not at the start) if this is midletter
670 // punctuation and allow it in a word if it is similarly to
671 // U_NON_SPACING_MARK
672 if (bStart || U_WB_MIDLETTER != u_getIntPropertyValue(c, UCHAR_WORD_BREAK))
673 return TOKEN_ILLEGAL;
674 else
676 //allowing it to continue the word
677 return (nTypes & KParseTokens::UNI_OTHER_LETTER) ?
678 TOKEN_WORD : TOKEN_ILLEGAL;
680 break;
683 return TOKEN_ILLEGAL;
687 UPT_FLAG_TYPE cclass_Unicode::getStartCharsFlags( sal_Unicode c )
689 if ( pStart )
691 const sal_Unicode* pStr = aStartChars.getStr();
692 const sal_Unicode* p = StrChr( pStr, c );
693 if ( p )
694 return pStart[ p - pStr ];
696 return TOKEN_ILLEGAL;
700 UPT_FLAG_TYPE cclass_Unicode::getContCharsFlags( sal_Unicode c )
702 if ( pCont )
704 const sal_Unicode* pStr = aContChars.getStr();
705 const sal_Unicode* p = StrChr( pStr, c );
706 if ( p )
707 return pCont[ p - pStr ];
709 return TOKEN_ILLEGAL;
713 void cclass_Unicode::parseText( ParseResult& r, const OUString& rText, sal_Int32 nPos, sal_Int32 nTokenType )
715 using namespace i18n;
716 const sal_Unicode* const pTextStart = rText.getStr() + nPos;
717 eState = ssGetChar;
719 //! All the variables below (plus ParseResult) have to be resetted on ssRewindFromValue!
720 const sal_Unicode* pSym = pTextStart;
721 const sal_Unicode* pSrc = pSym;
722 OUString aSymbol;
723 sal_Unicode c = *pSrc;
724 sal_Unicode cLast = 0;
725 int nDecSeps = 0;
726 bool bQuote = false;
727 bool bMightBeWord = true;
728 bool bMightBeWordLast = true;
729 //! All the variables above (plus ParseResult) have to be resetted on ssRewindFromValue!
731 while ( (c != 0) && (eState != ssStop) )
733 UPT_FLAG_TYPE nMask = getFlags( pTextStart, pSrc - pTextStart );
734 if ( nMask & TOKEN_EXCLUDED )
735 eState = ssBounce;
736 if ( bMightBeWord )
737 { // only relevant for ssGetValue fall back
738 if ( eState == ssGetChar || eState == ssRewindFromValue ||
739 eState == ssIgnoreLeadingInRewind )
740 bMightBeWord = ((nMask & TOKEN_CHAR_WORD) != 0);
741 else
742 bMightBeWord = ((nMask & TOKEN_WORD) != 0);
744 sal_Int32 nParseTokensType = getParseTokensType( pTextStart, pSrc - pTextStart );
745 pSrc++;
746 switch (eState)
748 case ssGetChar :
749 case ssRewindFromValue :
750 case ssIgnoreLeadingInRewind :
752 if ( (nMask & TOKEN_CHAR_VALUE) && eState != ssRewindFromValue
753 && eState != ssIgnoreLeadingInRewind )
754 { //! must be first, may fall back to ssGetWord via bMightBeWord
755 eState = ssGetValue;
756 if ( nMask & TOKEN_VALUE_DIGIT )
758 if ( 128 <= c )
759 r.TokenType = KParseType::UNI_NUMBER;
760 else
761 r.TokenType = KParseType::ASC_NUMBER;
763 else if ( c == cDecimalSep )
765 if ( *pSrc )
766 ++nDecSeps;
767 else
768 eState = ssRewindFromValue;
769 // retry for ONE_SINGLE_CHAR or others
772 else if ( nMask & TOKEN_CHAR_WORD )
774 eState = ssGetWord;
775 r.TokenType = KParseType::IDENTNAME;
777 else if ( nMask & TOKEN_NAME_SEP )
779 eState = ssGetWordFirstChar;
780 bQuote = true;
781 pSym++;
782 nParseTokensType = 0; // will be taken of first real character
783 r.TokenType = KParseType::SINGLE_QUOTE_NAME;
785 else if ( nMask & TOKEN_CHAR_STRING )
787 eState = ssGetString;
788 pSym++;
789 nParseTokensType = 0; // will be taken of first real character
790 r.TokenType = KParseType::DOUBLE_QUOTE_STRING;
792 else if ( nMask & TOKEN_CHAR_DONTCARE )
794 if ( nStartTypes & KParseTokens::IGNORE_LEADING_WS )
796 if (eState == ssRewindFromValue)
797 eState = ssIgnoreLeadingInRewind;
798 r.LeadingWhiteSpace++;
799 pSym++;
800 nParseTokensType = 0; // wait until real character
801 bMightBeWord = true;
803 else
804 eState = ssBounce;
806 else if ( nMask & TOKEN_CHAR_BOOL )
808 eState = ssGetBool;
809 r.TokenType = KParseType::BOOLEAN;
811 else if ( nMask & TOKEN_CHAR )
812 { //! must be last
813 eState = ssStop;
814 r.TokenType = KParseType::ONE_SINGLE_CHAR;
816 else
817 eState = ssBounce; // not known
819 break;
820 case ssGetValue :
822 if ( nMask & TOKEN_VALUE_DIGIT )
824 if ( 128 <= c )
825 r.TokenType = KParseType::UNI_NUMBER;
826 else if ( r.TokenType != KParseType::UNI_NUMBER )
827 r.TokenType = KParseType::ASC_NUMBER;
829 if ( nMask & TOKEN_VALUE )
831 if ( c == cDecimalSep && ++nDecSeps > 1 )
833 if ( pSrc - pTextStart == 2 )
834 eState = ssRewindFromValue;
835 // consecutive separators
836 else
837 eState = ssStopBack;
839 // else keep it going
841 else if ( c == 'E' || c == 'e' )
843 UPT_FLAG_TYPE nNext = getFlags( pTextStart, pSrc - pTextStart );
844 if ( nNext & TOKEN_VALUE_EXP )
845 ; // keep it going
846 else if ( bMightBeWord && ((nNext & TOKEN_WORD) || !*pSrc) )
847 { // might be a numerical name (1.2efg)
848 eState = ssGetWord;
849 r.TokenType = KParseType::IDENTNAME;
851 else
852 eState = ssStopBack;
854 else if ( nMask & TOKEN_VALUE_SIGN )
856 if ( (cLast == 'E') || (cLast == 'e') )
858 UPT_FLAG_TYPE nNext = getFlags( pTextStart, pSrc - pTextStart );
859 if ( nNext & TOKEN_VALUE_EXP_VALUE )
860 ; // keep it going
861 else if ( bMightBeWord && ((nNext & TOKEN_WORD) || !*pSrc) )
862 { // might be a numerical name (1.2e+fg)
863 eState = ssGetWord;
864 r.TokenType = KParseType::IDENTNAME;
866 else
867 eState = ssStopBack;
869 else if ( bMightBeWord )
870 { // might be a numerical name (1.2+fg)
871 eState = ssGetWord;
872 r.TokenType = KParseType::IDENTNAME;
874 else
875 eState = ssStopBack;
877 else if ( bMightBeWord && (nMask & TOKEN_WORD) )
878 { // might be a numerical name (1995.A1)
879 eState = ssGetWord;
880 r.TokenType = KParseType::IDENTNAME;
882 else
883 eState = ssStopBack;
885 break;
886 case ssGetWordFirstChar :
887 eState = ssGetWord;
888 // fall through
889 case ssGetWord :
891 if ( nMask & TOKEN_WORD )
892 ; // keep it going
893 else if ( nMask & TOKEN_NAME_SEP )
895 if ( bQuote )
897 if ( cLast == '\\' )
898 { // escaped
899 aSymbol += OUString( pSym, pSrc - pSym - 2 );
900 aSymbol += OUString( &c, 1);
902 else
904 eState = ssStop;
905 aSymbol += OUString( pSym, pSrc - pSym - 1 );
907 pSym = pSrc;
909 else
910 eState = ssStopBack;
912 else if ( bQuote )
913 ; // keep it going
914 else
915 eState = ssStopBack;
917 break;
918 case ssGetString :
920 if ( nMask & TOKEN_STRING_SEP )
922 if ( cLast == '\\' )
923 { // escaped
924 aSymbol += OUString( pSym, pSrc - pSym - 2 );
925 aSymbol += OUString( &c, 1);
927 else if ( c == *pSrc &&
928 !(nContTypes & KParseTokens::TWO_DOUBLE_QUOTES_BREAK_STRING) )
929 { // "" => literal " escaped
930 aSymbol += OUString( pSym, pSrc - pSym );
931 pSrc++;
933 else
935 eState = ssStop;
936 aSymbol += OUString( pSym, pSrc - pSym - 1 );
938 pSym = pSrc;
941 break;
942 case ssGetBool :
944 if ( (nMask & TOKEN_BOOL) )
945 eState = ssStop; // maximum 2: <, >, <>, <=, >=
946 else
947 eState = ssStopBack;
949 break;
950 case ssStopBack :
951 case ssBounce :
952 case ssStop :
953 ; // nothing, no compiler warning
954 break;
956 if ( eState == ssRewindFromValue )
958 r = ParseResult();
959 pSym = pTextStart;
960 pSrc = pSym;
961 aSymbol.clear();
962 c = *pSrc;
963 cLast = 0;
964 nDecSeps = 0;
965 bQuote = false;
966 bMightBeWord = true;
967 bMightBeWordLast = true;
969 else
971 if ( !(r.TokenType & nTokenType) )
973 if ( (r.TokenType & (KParseType::ASC_NUMBER | KParseType::UNI_NUMBER))
974 && (nTokenType & KParseType::IDENTNAME) && bMightBeWord )
975 ; // keep a number that might be a word
976 else if ( r.LeadingWhiteSpace == (pSrc - pTextStart) )
977 ; // keep ignored white space
978 else if ( !r.TokenType && eState == ssGetValue && (nMask & TOKEN_VALUE_SEP) )
979 ; // keep uncertain value
980 else
981 eState = ssBounce;
983 if ( eState == ssBounce )
985 r.TokenType = 0;
986 eState = ssStopBack;
988 if ( eState == ssStopBack )
989 { // put back
990 pSrc--;
991 bMightBeWord = bMightBeWordLast;
992 eState = ssStop;
994 if ( eState != ssStop )
996 if ( !r.StartFlags )
997 r.StartFlags |= nParseTokensType;
998 else
999 r.ContFlags |= nParseTokensType;
1001 bMightBeWordLast = bMightBeWord;
1002 cLast = c;
1003 c = *pSrc;
1006 // r.CharLen is the length in characters (not code points) of the parsed
1007 // token not including any leading white space, change this calculation if
1008 // multi-code-point Unicode characters are to be supported.
1009 r.CharLen = pSrc - pTextStart - r.LeadingWhiteSpace;
1010 r.EndPos = nPos + (pSrc - pTextStart);
1011 if ( r.TokenType & KParseType::ASC_NUMBER )
1013 r.Value = rtl_math_uStringToDouble( pTextStart + r.LeadingWhiteSpace,
1014 pTextStart + r.EndPos, cDecimalSep, cGroupSep, NULL, NULL );
1015 if ( bMightBeWord )
1016 r.TokenType |= KParseType::IDENTNAME;
1018 else if ( r.TokenType & KParseType::UNI_NUMBER )
1020 if ( !xNatNumSup.is() )
1022 if ( m_xContext.is() )
1024 xNatNumSup = NativeNumberSupplier::create( m_xContext );
1027 OUString aTmp( pTextStart + r.LeadingWhiteSpace, r.EndPos - nPos +
1028 r.LeadingWhiteSpace );
1029 // transliterate to ASCII
1030 aTmp = xNatNumSup->getNativeNumberString( aTmp, aParserLocale,
1031 NativeNumberMode::NATNUM0 );
1032 r.Value = ::rtl::math::stringToDouble( aTmp, cDecimalSep, cGroupSep, NULL, NULL );
1033 if ( bMightBeWord )
1034 r.TokenType |= KParseType::IDENTNAME;
1036 else if ( r.TokenType & (KParseType::SINGLE_QUOTE_NAME | KParseType::DOUBLE_QUOTE_STRING) )
1038 if ( pSym < pSrc )
1039 { //! open quote
1040 aSymbol += OUString( pSym, pSrc - pSym );
1041 r.TokenType |= KParseType::MISSING_QUOTE;
1043 r.DequotedNameOrString = aSymbol;
1047 } } } }
1049 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */