Version 4.0.0.1, tag libreoffice-4.0.0.1
[LibreOffice.git] / i18npool / source / characterclassification / cclass_unicode_parser.cxx
blob3f3de5b8f0028b0db1ab53982af5668ddce3de8a
1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2 /*
3 * This file is part of the LibreOffice project.
5 * This Source Code Form is subject to the terms of the Mozilla Public
6 * License, v. 2.0. If a copy of the MPL was not distributed with this
7 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
9 * This file incorporates work covered by the following license notice:
11 * Licensed to the Apache Software Foundation (ASF) under one or more
12 * contributor license agreements. See the NOTICE file distributed
13 * with this work for additional information regarding copyright
14 * ownership. The ASF licenses this file to you under the Apache
15 * License, Version 2.0 (the "License"); you may not use this file
16 * except in compliance with the License. You may obtain a copy of
17 * the License at http://www.apache.org/licenses/LICENSE-2.0 .
21 #include <cclass_unicode.hxx>
22 #include <unicode/uchar.h>
23 #include <rtl/math.hxx>
24 #include <rtl/ustring.hxx>
25 #include <com/sun/star/i18n/KParseTokens.hpp>
26 #include <com/sun/star/i18n/KParseType.hpp>
27 #include <com/sun/star/i18n/UnicodeType.hpp>
28 #include <com/sun/star/i18n/LocaleData.hpp>
29 #include <com/sun/star/i18n/NativeNumberMode.hpp>
30 #include <com/sun/star/i18n/NativeNumberSupplier.hpp>
31 #include <comphelper/processfactory.hxx>
33 #include <string.h> // memcpy()
35 using namespace ::com::sun::star::uno;
36 using namespace ::com::sun::star::lang;
37 using namespace ::rtl;
39 namespace com { namespace sun { namespace star { namespace i18n {
41 const UPT_FLAG_TYPE cclass_Unicode::TOKEN_ILLEGAL = 0x00000000;
42 const UPT_FLAG_TYPE cclass_Unicode::TOKEN_CHAR = 0x00000001;
43 const UPT_FLAG_TYPE cclass_Unicode::TOKEN_CHAR_BOOL = 0x00000002;
44 const UPT_FLAG_TYPE cclass_Unicode::TOKEN_CHAR_WORD = 0x00000004;
45 const UPT_FLAG_TYPE cclass_Unicode::TOKEN_CHAR_VALUE = 0x00000008;
46 const UPT_FLAG_TYPE cclass_Unicode::TOKEN_CHAR_STRING = 0x00000010;
47 const UPT_FLAG_TYPE cclass_Unicode::TOKEN_CHAR_DONTCARE= 0x00000020;
48 const UPT_FLAG_TYPE cclass_Unicode::TOKEN_BOOL = 0x00000040;
49 const UPT_FLAG_TYPE cclass_Unicode::TOKEN_WORD = 0x00000080;
50 const UPT_FLAG_TYPE cclass_Unicode::TOKEN_WORD_SEP = 0x00000100;
51 const UPT_FLAG_TYPE cclass_Unicode::TOKEN_VALUE = 0x00000200;
52 const UPT_FLAG_TYPE cclass_Unicode::TOKEN_VALUE_SEP = 0x00000400;
53 const UPT_FLAG_TYPE cclass_Unicode::TOKEN_VALUE_EXP = 0x00000800;
54 const UPT_FLAG_TYPE cclass_Unicode::TOKEN_VALUE_SIGN = 0x00001000;
55 const UPT_FLAG_TYPE cclass_Unicode::TOKEN_VALUE_EXP_VALUE = 0x00002000;
56 const UPT_FLAG_TYPE cclass_Unicode::TOKEN_VALUE_DIGIT = 0x00004000;
57 const UPT_FLAG_TYPE cclass_Unicode::TOKEN_NAME_SEP = 0x20000000;
58 const UPT_FLAG_TYPE cclass_Unicode::TOKEN_STRING_SEP = 0x40000000;
59 const UPT_FLAG_TYPE cclass_Unicode::TOKEN_EXCLUDED = 0x80000000;
61 #define TOKEN_DIGIT_FLAGS (TOKEN_CHAR_VALUE | TOKEN_VALUE | TOKEN_VALUE_EXP | TOKEN_VALUE_EXP_VALUE | TOKEN_VALUE_DIGIT)
63 // Default identifier/name specification is [A-Za-z_][A-Za-z0-9_]*
65 const sal_uInt8 cclass_Unicode::nDefCnt = 128;
66 const UPT_FLAG_TYPE cclass_Unicode::pDefaultParserTable[ nDefCnt ] =
68 // (...) == Calc formula compiler specific, commented out and modified
70 /* \0 */ TOKEN_EXCLUDED,
71 TOKEN_ILLEGAL,
72 TOKEN_ILLEGAL,
73 TOKEN_ILLEGAL,
74 TOKEN_ILLEGAL,
75 TOKEN_ILLEGAL,
76 TOKEN_ILLEGAL,
77 TOKEN_ILLEGAL,
78 TOKEN_ILLEGAL,
79 /* 9 \t */ TOKEN_CHAR_DONTCARE | TOKEN_WORD_SEP | TOKEN_VALUE_SEP, // (TOKEN_ILLEGAL)
80 TOKEN_ILLEGAL,
81 /* 11 \v */ TOKEN_CHAR_DONTCARE | TOKEN_WORD_SEP | TOKEN_VALUE_SEP, // (TOKEN_ILLEGAL)
82 TOKEN_ILLEGAL,
83 TOKEN_ILLEGAL,
84 TOKEN_ILLEGAL,
85 TOKEN_ILLEGAL,
86 TOKEN_ILLEGAL,
87 TOKEN_ILLEGAL,
88 TOKEN_ILLEGAL,
89 TOKEN_ILLEGAL,
90 TOKEN_ILLEGAL,
91 TOKEN_ILLEGAL,
92 TOKEN_ILLEGAL,
93 TOKEN_ILLEGAL,
94 TOKEN_ILLEGAL,
95 TOKEN_ILLEGAL,
96 TOKEN_ILLEGAL,
97 TOKEN_ILLEGAL,
98 TOKEN_ILLEGAL,
99 TOKEN_ILLEGAL,
100 TOKEN_ILLEGAL,
101 TOKEN_ILLEGAL,
102 /* 32 */ TOKEN_CHAR_DONTCARE | TOKEN_WORD_SEP | TOKEN_VALUE_SEP,
103 /* 33 ! */ TOKEN_CHAR | TOKEN_WORD_SEP | TOKEN_VALUE_SEP,
104 /* 34 " */ TOKEN_CHAR_STRING | TOKEN_STRING_SEP,
105 /* 35 # */ TOKEN_CHAR | TOKEN_WORD_SEP | TOKEN_VALUE_SEP, // (TOKEN_WORD_SEP)
106 /* 36 $ */ TOKEN_CHAR | TOKEN_WORD_SEP | TOKEN_VALUE_SEP, // (TOKEN_CHAR_WORD | TOKEN_WORD)
107 /* 37 % */ TOKEN_CHAR | TOKEN_WORD_SEP | TOKEN_VALUE_SEP, // (TOKEN_VALUE)
108 /* 38 & */ TOKEN_CHAR | TOKEN_WORD_SEP | TOKEN_VALUE_SEP,
109 /* 39 ' */ TOKEN_NAME_SEP,
110 /* 40 ( */ TOKEN_CHAR | TOKEN_WORD_SEP | TOKEN_VALUE_SEP,
111 /* 41 ) */ TOKEN_CHAR | TOKEN_WORD_SEP | TOKEN_VALUE_SEP,
112 /* 42 * */ TOKEN_CHAR | TOKEN_WORD_SEP | TOKEN_VALUE_SEP,
113 /* 43 + */ TOKEN_CHAR | TOKEN_WORD_SEP | TOKEN_VALUE_SEP | TOKEN_VALUE_EXP | TOKEN_VALUE_SIGN,
114 /* 44 , */ TOKEN_CHAR | TOKEN_WORD_SEP | TOKEN_VALUE_SEP, // (TOKEN_CHAR_VALUE | TOKEN_VALUE)
115 /* 45 - */ TOKEN_CHAR | TOKEN_WORD_SEP | TOKEN_VALUE_SEP | TOKEN_VALUE_EXP | TOKEN_VALUE_SIGN,
116 /* 46 . */ TOKEN_CHAR | TOKEN_WORD_SEP | TOKEN_VALUE_SEP, // (TOKEN_WORD | TOKEN_CHAR_VALUE | TOKEN_VALUE)
117 /* 47 / */ TOKEN_CHAR | TOKEN_WORD_SEP | TOKEN_VALUE_SEP,
118 //for ( i = 48; i < 58; i++ )
119 /* 48 0 */ TOKEN_DIGIT_FLAGS | TOKEN_WORD,
120 /* 49 1 */ TOKEN_DIGIT_FLAGS | TOKEN_WORD,
121 /* 50 2 */ TOKEN_DIGIT_FLAGS | TOKEN_WORD,
122 /* 51 3 */ TOKEN_DIGIT_FLAGS | TOKEN_WORD,
123 /* 52 4 */ TOKEN_DIGIT_FLAGS | TOKEN_WORD,
124 /* 53 5 */ TOKEN_DIGIT_FLAGS | TOKEN_WORD,
125 /* 54 6 */ TOKEN_DIGIT_FLAGS | TOKEN_WORD,
126 /* 55 7 */ TOKEN_DIGIT_FLAGS | TOKEN_WORD,
127 /* 56 8 */ TOKEN_DIGIT_FLAGS | TOKEN_WORD,
128 /* 57 9 */ TOKEN_DIGIT_FLAGS | TOKEN_WORD,
129 /* 58 : */ TOKEN_CHAR | TOKEN_WORD_SEP | TOKEN_VALUE_SEP, // (TOKEN_WORD)
130 /* 59 ; */ TOKEN_CHAR | TOKEN_WORD_SEP | TOKEN_VALUE_SEP,
131 /* 60 < */ TOKEN_CHAR_BOOL | TOKEN_WORD_SEP | TOKEN_VALUE_SEP,
132 /* 61 = */ TOKEN_CHAR | TOKEN_BOOL | TOKEN_WORD_SEP | TOKEN_VALUE_SEP,
133 /* 62 > */ TOKEN_CHAR_BOOL | TOKEN_BOOL | TOKEN_WORD_SEP | TOKEN_VALUE_SEP,
134 /* 63 ? */ TOKEN_CHAR | TOKEN_WORD_SEP | TOKEN_VALUE_SEP, // (TOKEN_CHAR_WORD | TOKEN_WORD)
135 /* 64 @ */ TOKEN_CHAR | TOKEN_WORD_SEP | TOKEN_VALUE_SEP, // (TOKEN_ILLEGAL // UNUSED)
136 //for ( i = 65; i < 91; i++ )
137 /* 65 A */ TOKEN_CHAR_WORD | TOKEN_WORD,
138 /* 66 B */ TOKEN_CHAR_WORD | TOKEN_WORD,
139 /* 67 C */ TOKEN_CHAR_WORD | TOKEN_WORD,
140 /* 68 D */ TOKEN_CHAR_WORD | TOKEN_WORD,
141 /* 69 E */ TOKEN_CHAR_WORD | TOKEN_WORD,
142 /* 70 F */ TOKEN_CHAR_WORD | TOKEN_WORD,
143 /* 71 G */ TOKEN_CHAR_WORD | TOKEN_WORD,
144 /* 72 H */ TOKEN_CHAR_WORD | TOKEN_WORD,
145 /* 73 I */ TOKEN_CHAR_WORD | TOKEN_WORD,
146 /* 74 J */ TOKEN_CHAR_WORD | TOKEN_WORD,
147 /* 75 K */ TOKEN_CHAR_WORD | TOKEN_WORD,
148 /* 76 L */ TOKEN_CHAR_WORD | TOKEN_WORD,
149 /* 77 M */ TOKEN_CHAR_WORD | TOKEN_WORD,
150 /* 78 N */ TOKEN_CHAR_WORD | TOKEN_WORD,
151 /* 79 O */ TOKEN_CHAR_WORD | TOKEN_WORD,
152 /* 80 P */ TOKEN_CHAR_WORD | TOKEN_WORD,
153 /* 81 Q */ TOKEN_CHAR_WORD | TOKEN_WORD,
154 /* 82 R */ TOKEN_CHAR_WORD | TOKEN_WORD,
155 /* 83 S */ TOKEN_CHAR_WORD | TOKEN_WORD,
156 /* 84 T */ TOKEN_CHAR_WORD | TOKEN_WORD,
157 /* 85 U */ TOKEN_CHAR_WORD | TOKEN_WORD,
158 /* 86 V */ TOKEN_CHAR_WORD | TOKEN_WORD,
159 /* 87 W */ TOKEN_CHAR_WORD | TOKEN_WORD,
160 /* 88 X */ TOKEN_CHAR_WORD | TOKEN_WORD,
161 /* 89 Y */ TOKEN_CHAR_WORD | TOKEN_WORD,
162 /* 90 Z */ TOKEN_CHAR_WORD | TOKEN_WORD,
163 /* 91 [ */ TOKEN_CHAR | TOKEN_WORD_SEP | TOKEN_VALUE_SEP, // (TOKEN_ILLEGAL // UNUSED)
164 /* 92 \ */ TOKEN_CHAR | TOKEN_WORD_SEP | TOKEN_VALUE_SEP, // (TOKEN_ILLEGAL // UNUSED)
165 /* 93 ] */ TOKEN_CHAR | TOKEN_WORD_SEP | TOKEN_VALUE_SEP, // (TOKEN_ILLEGAL // UNUSED)
166 /* 94 ^ */ TOKEN_CHAR | TOKEN_WORD_SEP | TOKEN_VALUE_SEP,
167 /* 95 _ */ TOKEN_CHAR_WORD | TOKEN_WORD,
168 /* 96 ` */ TOKEN_CHAR | TOKEN_WORD_SEP | TOKEN_VALUE_SEP, // (TOKEN_ILLEGAL // UNUSED)
169 //for ( i = 97; i < 123; i++ )
170 /* 97 a */ TOKEN_CHAR_WORD | TOKEN_WORD,
171 /* 98 b */ TOKEN_CHAR_WORD | TOKEN_WORD,
172 /* 99 c */ TOKEN_CHAR_WORD | TOKEN_WORD,
173 /* 100 d */ TOKEN_CHAR_WORD | TOKEN_WORD,
174 /* 101 e */ TOKEN_CHAR_WORD | TOKEN_WORD,
175 /* 102 f */ TOKEN_CHAR_WORD | TOKEN_WORD,
176 /* 103 g */ TOKEN_CHAR_WORD | TOKEN_WORD,
177 /* 104 h */ TOKEN_CHAR_WORD | TOKEN_WORD,
178 /* 105 i */ TOKEN_CHAR_WORD | TOKEN_WORD,
179 /* 106 j */ TOKEN_CHAR_WORD | TOKEN_WORD,
180 /* 107 k */ TOKEN_CHAR_WORD | TOKEN_WORD,
181 /* 108 l */ TOKEN_CHAR_WORD | TOKEN_WORD,
182 /* 109 m */ TOKEN_CHAR_WORD | TOKEN_WORD,
183 /* 110 n */ TOKEN_CHAR_WORD | TOKEN_WORD,
184 /* 111 o */ TOKEN_CHAR_WORD | TOKEN_WORD,
185 /* 112 p */ TOKEN_CHAR_WORD | TOKEN_WORD,
186 /* 113 q */ TOKEN_CHAR_WORD | TOKEN_WORD,
187 /* 114 r */ TOKEN_CHAR_WORD | TOKEN_WORD,
188 /* 115 s */ TOKEN_CHAR_WORD | TOKEN_WORD,
189 /* 116 t */ TOKEN_CHAR_WORD | TOKEN_WORD,
190 /* 117 u */ TOKEN_CHAR_WORD | TOKEN_WORD,
191 /* 118 v */ TOKEN_CHAR_WORD | TOKEN_WORD,
192 /* 119 w */ TOKEN_CHAR_WORD | TOKEN_WORD,
193 /* 120 x */ TOKEN_CHAR_WORD | TOKEN_WORD,
194 /* 121 y */ TOKEN_CHAR_WORD | TOKEN_WORD,
195 /* 122 z */ TOKEN_CHAR_WORD | TOKEN_WORD,
196 /* 123 { */ TOKEN_CHAR | TOKEN_WORD_SEP | TOKEN_VALUE_SEP, // (TOKEN_ILLEGAL // UNUSED)
197 /* 124 | */ TOKEN_CHAR | TOKEN_WORD_SEP | TOKEN_VALUE_SEP, // (TOKEN_ILLEGAL // UNUSED)
198 /* 125 } */ TOKEN_CHAR | TOKEN_WORD_SEP | TOKEN_VALUE_SEP, // (TOKEN_ILLEGAL // UNUSED)
199 /* 126 ~ */ TOKEN_CHAR | TOKEN_WORD_SEP | TOKEN_VALUE_SEP, // (TOKEN_ILLEGAL // UNUSED)
200 /* 127 */ TOKEN_CHAR | TOKEN_WORD_SEP | TOKEN_VALUE_SEP // (TOKEN_ILLEGAL // UNUSED)
204 const sal_Int32 cclass_Unicode::pParseTokensType[ nDefCnt ] =
206 /* \0 */ KParseTokens::ASC_OTHER,
207 KParseTokens::ASC_CONTROL,
208 KParseTokens::ASC_CONTROL,
209 KParseTokens::ASC_CONTROL,
210 KParseTokens::ASC_CONTROL,
211 KParseTokens::ASC_CONTROL,
212 KParseTokens::ASC_CONTROL,
213 KParseTokens::ASC_CONTROL,
214 KParseTokens::ASC_CONTROL,
215 /* 9 \t */ KParseTokens::ASC_CONTROL,
216 KParseTokens::ASC_CONTROL,
217 /* 11 \v */ KParseTokens::ASC_CONTROL,
218 KParseTokens::ASC_CONTROL,
219 KParseTokens::ASC_CONTROL,
220 KParseTokens::ASC_CONTROL,
221 KParseTokens::ASC_CONTROL,
222 KParseTokens::ASC_CONTROL,
223 KParseTokens::ASC_CONTROL,
224 KParseTokens::ASC_CONTROL,
225 KParseTokens::ASC_CONTROL,
226 KParseTokens::ASC_CONTROL,
227 KParseTokens::ASC_CONTROL,
228 KParseTokens::ASC_CONTROL,
229 KParseTokens::ASC_CONTROL,
230 KParseTokens::ASC_CONTROL,
231 KParseTokens::ASC_CONTROL,
232 KParseTokens::ASC_CONTROL,
233 KParseTokens::ASC_CONTROL,
234 KParseTokens::ASC_CONTROL,
235 KParseTokens::ASC_CONTROL,
236 KParseTokens::ASC_CONTROL,
237 KParseTokens::ASC_CONTROL,
238 /* 32 */ KParseTokens::ASC_OTHER,
239 /* 33 ! */ KParseTokens::ASC_OTHER,
240 /* 34 " */ KParseTokens::ASC_OTHER,
241 /* 35 # */ KParseTokens::ASC_OTHER,
242 /* 36 $ */ KParseTokens::ASC_DOLLAR,
243 /* 37 % */ KParseTokens::ASC_OTHER,
244 /* 38 & */ KParseTokens::ASC_OTHER,
245 /* 39 ' */ KParseTokens::ASC_OTHER,
246 /* 40 ( */ KParseTokens::ASC_OTHER,
247 /* 41 ) */ KParseTokens::ASC_OTHER,
248 /* 42 * */ KParseTokens::ASC_OTHER,
249 /* 43 + */ KParseTokens::ASC_OTHER,
250 /* 44 , */ KParseTokens::ASC_OTHER,
251 /* 45 - */ KParseTokens::ASC_OTHER,
252 /* 46 . */ KParseTokens::ASC_DOT,
253 /* 47 / */ KParseTokens::ASC_OTHER,
254 //for ( i = 48; i < 58; i++ )
255 /* 48 0 */ KParseTokens::ASC_DIGIT,
256 /* 49 1 */ KParseTokens::ASC_DIGIT,
257 /* 50 2 */ KParseTokens::ASC_DIGIT,
258 /* 51 3 */ KParseTokens::ASC_DIGIT,
259 /* 52 4 */ KParseTokens::ASC_DIGIT,
260 /* 53 5 */ KParseTokens::ASC_DIGIT,
261 /* 54 6 */ KParseTokens::ASC_DIGIT,
262 /* 55 7 */ KParseTokens::ASC_DIGIT,
263 /* 56 8 */ KParseTokens::ASC_DIGIT,
264 /* 57 9 */ KParseTokens::ASC_DIGIT,
265 /* 58 : */ KParseTokens::ASC_COLON,
266 /* 59 ; */ KParseTokens::ASC_OTHER,
267 /* 60 < */ KParseTokens::ASC_OTHER,
268 /* 61 = */ KParseTokens::ASC_OTHER,
269 /* 62 > */ KParseTokens::ASC_OTHER,
270 /* 63 ? */ KParseTokens::ASC_OTHER,
271 /* 64 @ */ KParseTokens::ASC_OTHER,
272 //for ( i = 65; i < 91; i++ )
273 /* 65 A */ KParseTokens::ASC_UPALPHA,
274 /* 66 B */ KParseTokens::ASC_UPALPHA,
275 /* 67 C */ KParseTokens::ASC_UPALPHA,
276 /* 68 D */ KParseTokens::ASC_UPALPHA,
277 /* 69 E */ KParseTokens::ASC_UPALPHA,
278 /* 70 F */ KParseTokens::ASC_UPALPHA,
279 /* 71 G */ KParseTokens::ASC_UPALPHA,
280 /* 72 H */ KParseTokens::ASC_UPALPHA,
281 /* 73 I */ KParseTokens::ASC_UPALPHA,
282 /* 74 J */ KParseTokens::ASC_UPALPHA,
283 /* 75 K */ KParseTokens::ASC_UPALPHA,
284 /* 76 L */ KParseTokens::ASC_UPALPHA,
285 /* 77 M */ KParseTokens::ASC_UPALPHA,
286 /* 78 N */ KParseTokens::ASC_UPALPHA,
287 /* 79 O */ KParseTokens::ASC_UPALPHA,
288 /* 80 P */ KParseTokens::ASC_UPALPHA,
289 /* 81 Q */ KParseTokens::ASC_UPALPHA,
290 /* 82 R */ KParseTokens::ASC_UPALPHA,
291 /* 83 S */ KParseTokens::ASC_UPALPHA,
292 /* 84 T */ KParseTokens::ASC_UPALPHA,
293 /* 85 U */ KParseTokens::ASC_UPALPHA,
294 /* 86 V */ KParseTokens::ASC_UPALPHA,
295 /* 87 W */ KParseTokens::ASC_UPALPHA,
296 /* 88 X */ KParseTokens::ASC_UPALPHA,
297 /* 89 Y */ KParseTokens::ASC_UPALPHA,
298 /* 90 Z */ KParseTokens::ASC_UPALPHA,
299 /* 91 [ */ KParseTokens::ASC_OTHER,
300 /* 92 \ */ KParseTokens::ASC_OTHER,
301 /* 93 ] */ KParseTokens::ASC_OTHER,
302 /* 94 ^ */ KParseTokens::ASC_OTHER,
303 /* 95 _ */ KParseTokens::ASC_UNDERSCORE,
304 /* 96 ` */ KParseTokens::ASC_OTHER,
305 //for ( i = 97; i < 123; i++ )
306 /* 97 a */ KParseTokens::ASC_LOALPHA,
307 /* 98 b */ KParseTokens::ASC_LOALPHA,
308 /* 99 c */ KParseTokens::ASC_LOALPHA,
309 /* 100 d */ KParseTokens::ASC_LOALPHA,
310 /* 101 e */ KParseTokens::ASC_LOALPHA,
311 /* 102 f */ KParseTokens::ASC_LOALPHA,
312 /* 103 g */ KParseTokens::ASC_LOALPHA,
313 /* 104 h */ KParseTokens::ASC_LOALPHA,
314 /* 105 i */ KParseTokens::ASC_LOALPHA,
315 /* 106 j */ KParseTokens::ASC_LOALPHA,
316 /* 107 k */ KParseTokens::ASC_LOALPHA,
317 /* 108 l */ KParseTokens::ASC_LOALPHA,
318 /* 109 m */ KParseTokens::ASC_LOALPHA,
319 /* 110 n */ KParseTokens::ASC_LOALPHA,
320 /* 111 o */ KParseTokens::ASC_LOALPHA,
321 /* 112 p */ KParseTokens::ASC_LOALPHA,
322 /* 113 q */ KParseTokens::ASC_LOALPHA,
323 /* 114 r */ KParseTokens::ASC_LOALPHA,
324 /* 115 s */ KParseTokens::ASC_LOALPHA,
325 /* 116 t */ KParseTokens::ASC_LOALPHA,
326 /* 117 u */ KParseTokens::ASC_LOALPHA,
327 /* 118 v */ KParseTokens::ASC_LOALPHA,
328 /* 119 w */ KParseTokens::ASC_LOALPHA,
329 /* 120 x */ KParseTokens::ASC_LOALPHA,
330 /* 121 y */ KParseTokens::ASC_LOALPHA,
331 /* 122 z */ KParseTokens::ASC_LOALPHA,
332 /* 123 { */ KParseTokens::ASC_OTHER,
333 /* 124 | */ KParseTokens::ASC_OTHER,
334 /* 125 } */ KParseTokens::ASC_OTHER,
335 /* 126 ~ */ KParseTokens::ASC_OTHER,
336 /* 127 */ KParseTokens::ASC_OTHER
340 // static
341 const sal_Unicode* cclass_Unicode::StrChr( const sal_Unicode* pStr, sal_Unicode c )
343 if ( !pStr )
344 return NULL;
345 while ( *pStr )
347 if ( *pStr == c )
348 return pStr;
349 pStr++;
351 return NULL;
355 sal_Int32 cclass_Unicode::getParseTokensType( const sal_Unicode* aStr, sal_Int32 nPos )
357 sal_Unicode c = aStr[nPos];
358 if ( c < nDefCnt )
359 return pParseTokensType[ sal_uInt8(c) ];
360 else
363 //! all KParseTokens::UNI_... must be matched
364 switch ( u_charType( (sal_uInt32) c ) )
366 case U_UPPERCASE_LETTER :
367 return KParseTokens::UNI_UPALPHA;
368 case U_LOWERCASE_LETTER :
369 return KParseTokens::UNI_LOALPHA;
370 case U_TITLECASE_LETTER :
371 return KParseTokens::UNI_TITLE_ALPHA;
372 case U_MODIFIER_LETTER :
373 return KParseTokens::UNI_MODIFIER_LETTER;
374 case U_OTHER_LETTER :
375 // Non_Spacing_Mark could not be as leading character
376 if (nPos == 0) break;
377 // fall through, treat it as Other_Letter.
378 case U_NON_SPACING_MARK :
379 return KParseTokens::UNI_OTHER_LETTER;
380 case U_DECIMAL_DIGIT_NUMBER :
381 return KParseTokens::UNI_DIGIT;
382 case U_LETTER_NUMBER :
383 return KParseTokens::UNI_LETTER_NUMBER;
384 case U_OTHER_NUMBER :
385 return KParseTokens::UNI_OTHER_NUMBER;
388 return KParseTokens::UNI_OTHER;
392 sal_Bool cclass_Unicode::setupInternational( const Locale& rLocale )
394 sal_Bool bChanged = (aParserLocale.Language != rLocale.Language
395 || aParserLocale.Country != rLocale.Country
396 || aParserLocale.Variant != rLocale.Variant);
397 if ( bChanged )
399 aParserLocale.Language = rLocale.Language;
400 aParserLocale.Country = rLocale.Country;
401 aParserLocale.Variant = rLocale.Variant;
403 if ( !mxLocaleData.is() )
405 mxLocaleData.set( LocaleData::create(m_xContext) );
407 return bChanged;
411 void cclass_Unicode::setupParserTable( const Locale& rLocale, sal_Int32 startCharTokenType,
412 const OUString& userDefinedCharactersStart, sal_Int32 contCharTokenType,
413 const OUString& userDefinedCharactersCont )
415 bool bIntlEqual = (rLocale.Language == aParserLocale.Language &&
416 rLocale.Country == aParserLocale.Country &&
417 rLocale.Variant == aParserLocale.Variant);
418 if ( !pTable || !bIntlEqual ||
419 startCharTokenType != nStartTypes ||
420 contCharTokenType != nContTypes ||
421 userDefinedCharactersStart != aStartChars ||
422 userDefinedCharactersCont != aContChars )
423 initParserTable( rLocale, startCharTokenType, userDefinedCharactersStart,
424 contCharTokenType, userDefinedCharactersCont );
428 void cclass_Unicode::initParserTable( const Locale& rLocale, sal_Int32 startCharTokenType,
429 const OUString& userDefinedCharactersStart, sal_Int32 contCharTokenType,
430 const OUString& userDefinedCharactersCont )
432 // (Re)Init
433 setupInternational( rLocale );
434 // Memory of pTable is reused.
435 if ( !pTable )
436 pTable = new UPT_FLAG_TYPE[nDefCnt];
437 memcpy( pTable, pDefaultParserTable, sizeof(UPT_FLAG_TYPE) * nDefCnt );
438 // Start and cont tables only need reallocation if different length.
439 if ( pStart && userDefinedCharactersStart.getLength() != aStartChars.getLength() )
441 delete [] pStart;
442 pStart = NULL;
444 if ( pCont && userDefinedCharactersCont.getLength() != aContChars.getLength() )
446 delete [] pCont;
447 pCont = NULL;
449 nStartTypes = startCharTokenType;
450 nContTypes = contCharTokenType;
451 aStartChars = userDefinedCharactersStart;
452 aContChars = userDefinedCharactersCont;
454 // specials
455 if( mxLocaleData.is() )
457 LocaleDataItem aItem =
458 mxLocaleData->getLocaleItem( aParserLocale );
459 //!TODO: theoretically separators may be a string, adjustment would have to be
460 //! done here and in parsing and in ::rtl::math::stringToDouble()
461 cGroupSep = aItem.thousandSeparator.getStr()[0];
462 cDecimalSep = aItem.decimalSeparator.getStr()[0];
465 if ( cGroupSep < nDefCnt )
466 pTable[cGroupSep] |= TOKEN_VALUE;
467 if ( cDecimalSep < nDefCnt )
468 pTable[cDecimalSep] |= TOKEN_CHAR_VALUE | TOKEN_VALUE;
470 // Modify characters according to KParseTokens definitions.
472 using namespace KParseTokens;
473 sal_uInt8 i;
475 if ( !(nStartTypes & ASC_UPALPHA) )
476 for ( i = 65; i < 91; i++ )
477 pTable[i] &= ~TOKEN_CHAR_WORD; // not allowed as start character
478 if ( !(nContTypes & ASC_UPALPHA) )
479 for ( i = 65; i < 91; i++ )
480 pTable[i] &= ~TOKEN_WORD; // not allowed as cont character
482 if ( !(nStartTypes & ASC_LOALPHA) )
483 for ( i = 97; i < 123; i++ )
484 pTable[i] &= ~TOKEN_CHAR_WORD; // not allowed as start character
485 if ( !(nContTypes & ASC_LOALPHA) )
486 for ( i = 97; i < 123; i++ )
487 pTable[i] &= ~TOKEN_WORD; // not allowed as cont character
489 if ( nStartTypes & ASC_DIGIT )
490 for ( i = 48; i < 58; i++ )
491 pTable[i] |= TOKEN_CHAR_WORD; // allowed as start character
492 if ( !(nContTypes & ASC_DIGIT) )
493 for ( i = 48; i < 58; i++ )
494 pTable[i] &= ~TOKEN_WORD; // not allowed as cont character
496 if ( !(nStartTypes & ASC_UNDERSCORE) )
497 pTable[95] &= ~TOKEN_CHAR_WORD; // not allowed as start character
498 if ( !(nContTypes & ASC_UNDERSCORE) )
499 pTable[95] &= ~TOKEN_WORD; // not allowed as cont character
501 if ( nStartTypes & ASC_DOLLAR )
502 pTable[36] |= TOKEN_CHAR_WORD; // allowed as start character
503 if ( nContTypes & ASC_DOLLAR )
504 pTable[36] |= TOKEN_WORD; // allowed as cont character
506 if ( nStartTypes & ASC_DOT )
507 pTable[46] |= TOKEN_CHAR_WORD; // allowed as start character
508 if ( nContTypes & ASC_DOT )
509 pTable[46] |= TOKEN_WORD; // allowed as cont character
511 if ( nStartTypes & ASC_COLON )
512 pTable[58] |= TOKEN_CHAR_WORD; // allowed as start character
513 if ( nContTypes & ASC_COLON )
514 pTable[58] |= TOKEN_WORD; // allowed as cont character
516 if ( nStartTypes & ASC_CONTROL )
517 for ( i = 1; i < 32; i++ )
518 pTable[i] |= TOKEN_CHAR_WORD; // allowed as start character
519 if ( nContTypes & ASC_CONTROL )
520 for ( i = 1; i < 32; i++ )
521 pTable[i] |= TOKEN_WORD; // allowed as cont character
523 if ( nStartTypes & ASC_ANY_BUT_CONTROL )
524 for ( i = 32; i < nDefCnt; i++ )
525 pTable[i] |= TOKEN_CHAR_WORD; // allowed as start character
526 if ( nContTypes & ASC_ANY_BUT_CONTROL )
527 for ( i = 32; i < nDefCnt; i++ )
528 pTable[i] |= TOKEN_WORD; // allowed as cont character
532 // Merge in (positively override with) user defined characters.
533 // StartChars
534 sal_Int32 nLen = aStartChars.getLength();
535 if ( nLen )
537 if ( !pStart )
538 pStart = new UPT_FLAG_TYPE[ nLen ];
539 const sal_Unicode* p = aStartChars.getStr();
540 for ( sal_Int32 j=0; j<nLen; j++, p++ )
542 pStart[j] = TOKEN_CHAR_WORD;
543 if ( *p < nDefCnt )
544 pTable[*p] |= TOKEN_CHAR_WORD;
547 // ContChars
548 nLen = aContChars.getLength();
549 if ( nLen )
551 if ( !pCont )
552 pCont = new UPT_FLAG_TYPE[ nLen ];
553 const sal_Unicode* p = aContChars.getStr();
554 for ( sal_Int32 j=0; j<nLen; j++ )
556 pCont[j] = TOKEN_WORD;
557 if ( *p < nDefCnt )
558 pTable[*p] |= TOKEN_WORD;
564 void cclass_Unicode::destroyParserTable()
566 if ( pCont )
567 delete [] pCont;
568 if ( pStart )
569 delete [] pStart;
570 if ( pTable )
571 delete [] pTable;
575 UPT_FLAG_TYPE cclass_Unicode::getFlags( const sal_Unicode* aStr, sal_Int32 nPos )
577 UPT_FLAG_TYPE nMask;
578 sal_Unicode c = aStr[nPos];
579 if ( c < nDefCnt )
580 nMask = pTable[ sal_uInt8(c) ];
581 else
582 nMask = getFlagsExtended( aStr, nPos );
583 switch ( eState )
585 case ssGetChar :
586 case ssRewindFromValue :
587 case ssIgnoreLeadingInRewind :
588 case ssGetWordFirstChar :
589 if ( !(nMask & TOKEN_CHAR_WORD) )
591 nMask |= getStartCharsFlags( c );
592 if ( nMask & TOKEN_CHAR_WORD )
593 nMask &= ~TOKEN_EXCLUDED;
595 break;
596 case ssGetValue :
597 case ssGetWord :
598 if ( !(nMask & TOKEN_WORD) )
600 nMask |= getContCharsFlags( c );
601 if ( nMask & TOKEN_WORD )
602 nMask &= ~TOKEN_EXCLUDED;
604 break;
605 default:
606 ; // other cases aren't needed, no compiler warning
608 return nMask;
612 UPT_FLAG_TYPE cclass_Unicode::getFlagsExtended( const sal_Unicode* aStr, sal_Int32 nPos )
614 sal_Unicode c = aStr[nPos];
615 if ( c == cGroupSep )
616 return TOKEN_VALUE;
617 else if ( c == cDecimalSep )
618 return TOKEN_CHAR_VALUE | TOKEN_VALUE;
619 using namespace i18n;
620 bool bStart = (eState == ssGetChar || eState == ssGetWordFirstChar ||
621 eState == ssRewindFromValue || eState == ssIgnoreLeadingInRewind);
622 sal_Int32 nTypes = (bStart ? nStartTypes : nContTypes);
624 //! all KParseTokens::UNI_... must be matched
625 switch ( u_charType( (sal_uInt32) c ) )
627 case U_UPPERCASE_LETTER :
628 return (nTypes & KParseTokens::UNI_UPALPHA) ?
629 (bStart ? TOKEN_CHAR_WORD : TOKEN_WORD) :
630 TOKEN_ILLEGAL;
631 case U_LOWERCASE_LETTER :
632 return (nTypes & KParseTokens::UNI_LOALPHA) ?
633 (bStart ? TOKEN_CHAR_WORD : TOKEN_WORD) :
634 TOKEN_ILLEGAL;
635 case U_TITLECASE_LETTER :
636 return (nTypes & KParseTokens::UNI_TITLE_ALPHA) ?
637 (bStart ? TOKEN_CHAR_WORD : TOKEN_WORD) :
638 TOKEN_ILLEGAL;
639 case U_MODIFIER_LETTER :
640 return (nTypes & KParseTokens::UNI_MODIFIER_LETTER) ?
641 (bStart ? TOKEN_CHAR_WORD : TOKEN_WORD) :
642 TOKEN_ILLEGAL;
643 case U_NON_SPACING_MARK :
644 case U_COMBINING_SPACING_MARK :
645 // Non_Spacing_Mark can't be a leading character,
646 // nor can a spacing combining mark.
647 if (bStart)
648 return TOKEN_ILLEGAL;
649 // fall through, treat it as Other_Letter.
650 case U_OTHER_LETTER :
651 return (nTypes & KParseTokens::UNI_OTHER_LETTER) ?
652 (bStart ? TOKEN_CHAR_WORD : TOKEN_WORD) :
653 TOKEN_ILLEGAL;
654 case U_DECIMAL_DIGIT_NUMBER :
655 return ((nTypes & KParseTokens::UNI_DIGIT) ?
656 (bStart ? TOKEN_CHAR_WORD : TOKEN_WORD) :
657 TOKEN_ILLEGAL) | TOKEN_DIGIT_FLAGS;
658 case U_LETTER_NUMBER :
659 return ((nTypes & KParseTokens::UNI_LETTER_NUMBER) ?
660 (bStart ? TOKEN_CHAR_WORD : TOKEN_WORD) :
661 TOKEN_ILLEGAL) | TOKEN_DIGIT_FLAGS;
662 case U_OTHER_NUMBER :
663 return ((nTypes & KParseTokens::UNI_OTHER_NUMBER) ?
664 (bStart ? TOKEN_CHAR_WORD : TOKEN_WORD) :
665 TOKEN_ILLEGAL) | TOKEN_DIGIT_FLAGS;
666 case U_SPACE_SEPARATOR :
667 return ((nTypes & KParseTokens::IGNORE_LEADING_WS) ?
668 TOKEN_CHAR_DONTCARE : (bStart ? TOKEN_CHAR_WORD : (TOKEN_CHAR_DONTCARE | TOKEN_WORD_SEP | TOKEN_VALUE_SEP) ));
671 return TOKEN_ILLEGAL;
675 UPT_FLAG_TYPE cclass_Unicode::getStartCharsFlags( sal_Unicode c )
677 if ( pStart )
679 const sal_Unicode* pStr = aStartChars.getStr();
680 const sal_Unicode* p = StrChr( pStr, c );
681 if ( p )
682 return pStart[ p - pStr ];
684 return TOKEN_ILLEGAL;
688 UPT_FLAG_TYPE cclass_Unicode::getContCharsFlags( sal_Unicode c )
690 if ( pCont )
692 const sal_Unicode* pStr = aContChars.getStr();
693 const sal_Unicode* p = StrChr( pStr, c );
694 if ( p )
695 return pCont[ p - pStr ];
697 return TOKEN_ILLEGAL;
701 void cclass_Unicode::parseText( ParseResult& r, const OUString& rText, sal_Int32 nPos, sal_Int32 nTokenType )
703 using namespace i18n;
704 const sal_Unicode* const pTextStart = rText.getStr() + nPos;
705 eState = ssGetChar;
707 //! All the variables below (plus ParseResult) have to be resetted on ssRewindFromValue!
708 const sal_Unicode* pSym = pTextStart;
709 const sal_Unicode* pSrc = pSym;
710 OUString aSymbol;
711 sal_Unicode c = *pSrc;
712 sal_Unicode cLast = 0;
713 int nDecSeps = 0;
714 bool bQuote = false;
715 bool bMightBeWord = true;
716 bool bMightBeWordLast = true;
717 //! All the variables above (plus ParseResult) have to be resetted on ssRewindFromValue!
719 while ( (c != 0) && (eState != ssStop) )
721 UPT_FLAG_TYPE nMask = getFlags( pTextStart, pSrc - pTextStart );
722 if ( nMask & TOKEN_EXCLUDED )
723 eState = ssBounce;
724 if ( bMightBeWord )
725 { // only relevant for ssGetValue fall back
726 if ( eState == ssGetChar || eState == ssRewindFromValue ||
727 eState == ssIgnoreLeadingInRewind )
728 bMightBeWord = ((nMask & TOKEN_CHAR_WORD) != 0);
729 else
730 bMightBeWord = ((nMask & TOKEN_WORD) != 0);
732 sal_Int32 nParseTokensType = getParseTokensType( pTextStart, pSrc - pTextStart );
733 pSrc++;
734 switch (eState)
736 case ssGetChar :
737 case ssRewindFromValue :
738 case ssIgnoreLeadingInRewind :
740 if ( (nMask & TOKEN_CHAR_VALUE) && eState != ssRewindFromValue
741 && eState != ssIgnoreLeadingInRewind )
742 { //! must be first, may fall back to ssGetWord via bMightBeWord
743 eState = ssGetValue;
744 if ( nMask & TOKEN_VALUE_DIGIT )
746 if ( 128 <= c )
747 r.TokenType = KParseType::UNI_NUMBER;
748 else
749 r.TokenType = KParseType::ASC_NUMBER;
751 else if ( c == cDecimalSep )
753 if ( *pSrc )
754 ++nDecSeps;
755 else
756 eState = ssRewindFromValue;
757 // retry for ONE_SINGLE_CHAR or others
760 else if ( nMask & TOKEN_CHAR_WORD )
762 eState = ssGetWord;
763 r.TokenType = KParseType::IDENTNAME;
765 else if ( nMask & TOKEN_NAME_SEP )
767 eState = ssGetWordFirstChar;
768 bQuote = true;
769 pSym++;
770 nParseTokensType = 0; // will be taken of first real character
771 r.TokenType = KParseType::SINGLE_QUOTE_NAME;
773 else if ( nMask & TOKEN_CHAR_STRING )
775 eState = ssGetString;
776 pSym++;
777 nParseTokensType = 0; // will be taken of first real character
778 r.TokenType = KParseType::DOUBLE_QUOTE_STRING;
780 else if ( nMask & TOKEN_CHAR_DONTCARE )
782 if ( nStartTypes & KParseTokens::IGNORE_LEADING_WS )
784 if (eState == ssRewindFromValue)
785 eState = ssIgnoreLeadingInRewind;
786 r.LeadingWhiteSpace++;
787 pSym++;
788 nParseTokensType = 0; // wait until real character
789 bMightBeWord = true;
791 else
792 eState = ssBounce;
794 else if ( nMask & TOKEN_CHAR_BOOL )
796 eState = ssGetBool;
797 r.TokenType = KParseType::BOOLEAN;
799 else if ( nMask & TOKEN_CHAR )
800 { //! must be last
801 eState = ssStop;
802 r.TokenType = KParseType::ONE_SINGLE_CHAR;
804 else
805 eState = ssBounce; // not known
807 break;
808 case ssGetValue :
810 if ( nMask & TOKEN_VALUE_DIGIT )
812 if ( 128 <= c )
813 r.TokenType = KParseType::UNI_NUMBER;
814 else if ( r.TokenType != KParseType::UNI_NUMBER )
815 r.TokenType = KParseType::ASC_NUMBER;
817 if ( nMask & TOKEN_VALUE )
819 if ( c == cDecimalSep && ++nDecSeps > 1 )
821 if ( pSrc - pTextStart == 2 )
822 eState = ssRewindFromValue;
823 // consecutive separators
824 else
825 eState = ssStopBack;
827 // else keep it going
829 else if ( c == 'E' || c == 'e' )
831 UPT_FLAG_TYPE nNext = getFlags( pTextStart, pSrc - pTextStart );
832 if ( nNext & TOKEN_VALUE_EXP )
833 ; // keep it going
834 else if ( bMightBeWord && ((nNext & TOKEN_WORD) || !*pSrc) )
835 { // might be a numerical name (1.2efg)
836 eState = ssGetWord;
837 r.TokenType = KParseType::IDENTNAME;
839 else
840 eState = ssStopBack;
842 else if ( nMask & TOKEN_VALUE_SIGN )
844 if ( (cLast == 'E') || (cLast == 'e') )
846 UPT_FLAG_TYPE nNext = getFlags( pTextStart, pSrc - pTextStart );
847 if ( nNext & TOKEN_VALUE_EXP_VALUE )
848 ; // keep it going
849 else if ( bMightBeWord && ((nNext & TOKEN_WORD) || !*pSrc) )
850 { // might be a numerical name (1.2e+fg)
851 eState = ssGetWord;
852 r.TokenType = KParseType::IDENTNAME;
854 else
855 eState = ssStopBack;
857 else if ( bMightBeWord )
858 { // might be a numerical name (1.2+fg)
859 eState = ssGetWord;
860 r.TokenType = KParseType::IDENTNAME;
862 else
863 eState = ssStopBack;
865 else if ( bMightBeWord && (nMask & TOKEN_WORD) )
866 { // might be a numerical name (1995.A1)
867 eState = ssGetWord;
868 r.TokenType = KParseType::IDENTNAME;
870 else
871 eState = ssStopBack;
873 break;
874 case ssGetWordFirstChar :
875 eState = ssGetWord;
876 // fall thru
877 case ssGetWord :
879 if ( nMask & TOKEN_WORD )
880 ; // keep it going
881 else if ( nMask & TOKEN_NAME_SEP )
883 if ( bQuote )
885 if ( cLast == '\\' )
886 { // escaped
887 aSymbol += OUString( pSym, pSrc - pSym - 2 );
888 aSymbol += OUString( &c, 1);
890 else
892 eState = ssStop;
893 aSymbol += OUString( pSym, pSrc - pSym - 1 );
895 pSym = pSrc;
897 else
898 eState = ssStopBack;
900 else if ( bQuote )
901 ; // keep it going
902 else
903 eState = ssStopBack;
905 break;
906 case ssGetString :
908 if ( nMask & TOKEN_STRING_SEP )
910 if ( cLast == '\\' )
911 { // escaped
912 aSymbol += OUString( pSym, pSrc - pSym - 2 );
913 aSymbol += OUString( &c, 1);
915 else if ( c == *pSrc &&
916 !(nContTypes & KParseTokens::TWO_DOUBLE_QUOTES_BREAK_STRING) )
917 { // "" => literal " escaped
918 aSymbol += OUString( pSym, pSrc - pSym );
919 pSrc++;
921 else
923 eState = ssStop;
924 aSymbol += OUString( pSym, pSrc - pSym - 1 );
926 pSym = pSrc;
929 break;
930 case ssGetBool :
932 if ( (nMask & TOKEN_BOOL) )
933 eState = ssStop; // maximum 2: <, >, <>, <=, >=
934 else
935 eState = ssStopBack;
937 break;
938 case ssStopBack :
939 case ssBounce :
940 case ssStop :
941 ; // nothing, no compiler warning
942 break;
944 if ( eState == ssRewindFromValue )
946 r = ParseResult();
947 pSym = pTextStart;
948 pSrc = pSym;
949 aSymbol = OUString();
950 c = *pSrc;
951 cLast = 0;
952 nDecSeps = 0;
953 bQuote = false;
954 bMightBeWord = true;
955 bMightBeWordLast = true;
957 else
959 if ( !(r.TokenType & nTokenType) )
961 if ( (r.TokenType & (KParseType::ASC_NUMBER | KParseType::UNI_NUMBER))
962 && (nTokenType & KParseType::IDENTNAME) && bMightBeWord )
963 ; // keep a number that might be a word
964 else if ( r.LeadingWhiteSpace == (pSrc - pTextStart) )
965 ; // keep ignored white space
966 else if ( !r.TokenType && eState == ssGetValue && (nMask & TOKEN_VALUE_SEP) )
967 ; // keep uncertain value
968 else
969 eState = ssBounce;
971 if ( eState == ssBounce )
973 r.TokenType = 0;
974 eState = ssStopBack;
976 if ( eState == ssStopBack )
977 { // put back
978 pSrc--;
979 bMightBeWord = bMightBeWordLast;
980 eState = ssStop;
982 if ( eState != ssStop )
984 if ( !r.StartFlags )
985 r.StartFlags |= nParseTokensType;
986 else
987 r.ContFlags |= nParseTokensType;
989 bMightBeWordLast = bMightBeWord;
990 cLast = c;
991 c = *pSrc;
994 // r.CharLen is the length in characters (not code points) of the parsed
995 // token not including any leading white space, change this calculation if
996 // multi-code-point Unicode characters are to be supported.
997 r.CharLen = pSrc - pTextStart - r.LeadingWhiteSpace;
998 r.EndPos = nPos + (pSrc - pTextStart);
999 if ( r.TokenType & KParseType::ASC_NUMBER )
1001 r.Value = rtl_math_uStringToDouble( pTextStart + r.LeadingWhiteSpace,
1002 pTextStart + r.EndPos, cDecimalSep, cGroupSep, NULL, NULL );
1003 if ( bMightBeWord )
1004 r.TokenType |= KParseType::IDENTNAME;
1006 else if ( r.TokenType & KParseType::UNI_NUMBER )
1008 if ( !xNatNumSup.is() )
1010 if ( m_xContext.is() )
1012 xNatNumSup = NativeNumberSupplier::create( m_xContext );
1015 OUString aTmp( pTextStart + r.LeadingWhiteSpace, r.EndPos - nPos +
1016 r.LeadingWhiteSpace );
1017 // transliterate to ASCII
1018 aTmp = xNatNumSup->getNativeNumberString( aTmp, aParserLocale,
1019 NativeNumberMode::NATNUM0 );
1020 r.Value = ::rtl::math::stringToDouble( aTmp, cDecimalSep, cGroupSep, NULL, NULL );
1021 if ( bMightBeWord )
1022 r.TokenType |= KParseType::IDENTNAME;
1024 else if ( r.TokenType & (KParseType::SINGLE_QUOTE_NAME | KParseType::DOUBLE_QUOTE_STRING) )
1026 if ( pSym < pSrc )
1027 { //! open quote
1028 aSymbol += OUString( pSym, pSrc - pSym );
1029 r.TokenType |= KParseType::MISSING_QUOTE;
1031 r.DequotedNameOrString = aSymbol;
1035 } } } }
1037 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */