i18npool/source/characterclassification/cclass_unicode_parser.cxx

   1 /*************************************************************************
   2  *
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * Copyright 2008 by Sun Microsystems, Inc.
   6  *
   7  * OpenOffice.org - a multi-platform office productivity suite
   8  *
   9  * $RCSfile: cclass_unicode_parser.cxx,v $
  10  * $Revision: 1.15 $
  11  *
  12  * This file is part of OpenOffice.org.
  13  *
  14  * OpenOffice.org is free software: you can redistribute it and/or modify
  15  * it under the terms of the GNU Lesser General Public License version 3
  16  * only, as published by the Free Software Foundation.
  17  *
  18  * OpenOffice.org is distributed in the hope that it will be useful,
  19  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  20  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  21  * GNU Lesser General Public License version 3 for more details
  22  * (a copy is included in the LICENSE file that accompanied this code).
  23  *
  24  * You should have received a copy of the GNU Lesser General Public License
  25  * version 3 along with OpenOffice.org.  If not, see
  26  * <http://www.openoffice.org/license.html>
  27  * for a copy of the LGPLv3 License.
  28  *
  29  ************************************************************************/
  30
  31 // MARKER(update_precomp.py): autogen include statement, do not remove
  32 #include "precompiled_i18npool.hxx"
  33
  34 #include <cclass_unicode.hxx>
  35 #include <unicode/uchar.h>
  36 #include <rtl/math.hxx>
  37 #include <rtl/ustring.hxx>
  38 #include <com/sun/star/i18n/KParseTokens.hpp>
  39 #include <com/sun/star/i18n/KParseType.hpp>
  40 #include <com/sun/star/i18n/UnicodeType.hpp>
  41 #include <com/sun/star/i18n/XLocaleData.hpp>
  42 #include <com/sun/star/i18n/NativeNumberMode.hpp>
  43
  44 #include <string.h>             // memcpy()
  45
  46 using namespace ::com::sun::star::uno;
  47 using namespace ::com::sun::star::lang;
  48 using namespace ::rtl;
  49
  50 namespace com { namespace sun { namespace star { namespace i18n {
  51
  52 const UPT_FLAG_TYPE cclass_Unicode::TOKEN_ILLEGAL               = 0x00000000;
  53 const UPT_FLAG_TYPE cclass_Unicode::TOKEN_CHAR                  = 0x00000001;
  54 const UPT_FLAG_TYPE cclass_Unicode::TOKEN_CHAR_BOOL     = 0x00000002;
  55 const UPT_FLAG_TYPE cclass_Unicode::TOKEN_CHAR_WORD     = 0x00000004;
  56 const UPT_FLAG_TYPE cclass_Unicode::TOKEN_CHAR_VALUE    = 0x00000008;
  57 const UPT_FLAG_TYPE cclass_Unicode::TOKEN_CHAR_STRING   = 0x00000010;
  58 const UPT_FLAG_TYPE cclass_Unicode::TOKEN_CHAR_DONTCARE= 0x00000020;
  59 const UPT_FLAG_TYPE cclass_Unicode::TOKEN_BOOL                  = 0x00000040;
  60 const UPT_FLAG_TYPE cclass_Unicode::TOKEN_WORD                  = 0x00000080;
  61 const UPT_FLAG_TYPE cclass_Unicode::TOKEN_WORD_SEP              = 0x00000100;
  62 const UPT_FLAG_TYPE cclass_Unicode::TOKEN_VALUE         = 0x00000200;
  63 const UPT_FLAG_TYPE cclass_Unicode::TOKEN_VALUE_SEP     = 0x00000400;
  64 const UPT_FLAG_TYPE cclass_Unicode::TOKEN_VALUE_EXP     = 0x00000800;
  65 const UPT_FLAG_TYPE cclass_Unicode::TOKEN_VALUE_SIGN    = 0x00001000;
  66 const UPT_FLAG_TYPE cclass_Unicode::TOKEN_VALUE_EXP_VALUE       = 0x00002000;
  67 const UPT_FLAG_TYPE cclass_Unicode::TOKEN_VALUE_DIGIT   = 0x00004000;
  68 const UPT_FLAG_TYPE cclass_Unicode::TOKEN_NAME_SEP              = 0x20000000;
  69 const UPT_FLAG_TYPE cclass_Unicode::TOKEN_STRING_SEP    = 0x40000000;
  70 const UPT_FLAG_TYPE cclass_Unicode::TOKEN_EXCLUDED              = 0x80000000;
  71
  72 #define TOKEN_DIGIT_FLAGS (TOKEN_CHAR_VALUE | TOKEN_VALUE | TOKEN_VALUE_EXP | TOKEN_VALUE_EXP_VALUE | TOKEN_VALUE_DIGIT)
  73
  74 // Default identifier/name specification is [A-Za-z_][A-Za-z0-9_]*
  75
  76 const sal_uInt8 cclass_Unicode::nDefCnt = 128;
  77 const UPT_FLAG_TYPE cclass_Unicode::pDefaultParserTable[ nDefCnt ] =
  78 {
  79 // (...) == Calc formula compiler specific, commented out and modified
  80
  81     /* \0 */    TOKEN_EXCLUDED,
  82                 TOKEN_ILLEGAL,
  83                 TOKEN_ILLEGAL,
  84                 TOKEN_ILLEGAL,
  85                 TOKEN_ILLEGAL,
  86                 TOKEN_ILLEGAL,
  87                 TOKEN_ILLEGAL,
  88                 TOKEN_ILLEGAL,
  89                 TOKEN_ILLEGAL,
  90     /*  9 \t */ TOKEN_CHAR_DONTCARE | TOKEN_WORD_SEP | TOKEN_VALUE_SEP,         // (TOKEN_ILLEGAL)
  91                 TOKEN_ILLEGAL,
  92     /* 11 \v */ TOKEN_CHAR_DONTCARE | TOKEN_WORD_SEP | TOKEN_VALUE_SEP,         // (TOKEN_ILLEGAL)
  93                 TOKEN_ILLEGAL,
  94                 TOKEN_ILLEGAL,
  95                 TOKEN_ILLEGAL,
  96                 TOKEN_ILLEGAL,
  97                 TOKEN_ILLEGAL,
  98                 TOKEN_ILLEGAL,
  99                 TOKEN_ILLEGAL,
 100                 TOKEN_ILLEGAL,
 101                 TOKEN_ILLEGAL,
 102                 TOKEN_ILLEGAL,
 103                 TOKEN_ILLEGAL,
 104                 TOKEN_ILLEGAL,
 105                 TOKEN_ILLEGAL,
 106                 TOKEN_ILLEGAL,
 107                 TOKEN_ILLEGAL,
 108                 TOKEN_ILLEGAL,
 109                 TOKEN_ILLEGAL,
 110                 TOKEN_ILLEGAL,
 111                 TOKEN_ILLEGAL,
 112                 TOKEN_ILLEGAL,
 113     /*  32   */ TOKEN_CHAR_DONTCARE | TOKEN_WORD_SEP | TOKEN_VALUE_SEP,
 114     /*  33 ! */ TOKEN_CHAR | TOKEN_WORD_SEP | TOKEN_VALUE_SEP,
 115     /*  34 " */ TOKEN_CHAR_STRING | TOKEN_STRING_SEP,
 116     /*  35 # */ TOKEN_CHAR | TOKEN_WORD_SEP | TOKEN_VALUE_SEP,  // (TOKEN_WORD_SEP)
 117     /*  36 $ */ TOKEN_CHAR | TOKEN_WORD_SEP | TOKEN_VALUE_SEP,  // (TOKEN_CHAR_WORD | TOKEN_WORD)
 118     /*  37 % */ TOKEN_CHAR | TOKEN_WORD_SEP | TOKEN_VALUE_SEP,  // (TOKEN_VALUE)
 119     /*  38 & */ TOKEN_CHAR | TOKEN_WORD_SEP | TOKEN_VALUE_SEP,
 120     /*  39 ' */ TOKEN_NAME_SEP,
 121     /*  40 ( */ TOKEN_CHAR | TOKEN_WORD_SEP | TOKEN_VALUE_SEP,
 122     /*  41 ) */ TOKEN_CHAR | TOKEN_WORD_SEP | TOKEN_VALUE_SEP,
 123     /*  42 * */ TOKEN_CHAR | TOKEN_WORD_SEP | TOKEN_VALUE_SEP,
 124     /*  43 + */ TOKEN_CHAR | TOKEN_WORD_SEP | TOKEN_VALUE_SEP | TOKEN_VALUE_EXP | TOKEN_VALUE_SIGN,
 125     /*  44 , */ TOKEN_CHAR | TOKEN_WORD_SEP | TOKEN_VALUE_SEP,  // (TOKEN_CHAR_VALUE | TOKEN_VALUE)
 126     /*  45 - */ TOKEN_CHAR | TOKEN_WORD_SEP | TOKEN_VALUE_SEP | TOKEN_VALUE_EXP | TOKEN_VALUE_SIGN,
 127     /*  46 . */ TOKEN_CHAR | TOKEN_WORD_SEP | TOKEN_VALUE_SEP,  // (TOKEN_WORD | TOKEN_CHAR_VALUE | TOKEN_VALUE)
 128     /*  47 / */ TOKEN_CHAR | TOKEN_WORD_SEP | TOKEN_VALUE_SEP,
 129     //for ( i = 48; i < 58; i++ )
 130     /*  48 0 */ TOKEN_DIGIT_FLAGS | TOKEN_WORD,
 131     /*  49 1 */ TOKEN_DIGIT_FLAGS | TOKEN_WORD,
 132     /*  50 2 */ TOKEN_DIGIT_FLAGS | TOKEN_WORD,
 133     /*  51 3 */ TOKEN_DIGIT_FLAGS | TOKEN_WORD,
 134     /*  52 4 */ TOKEN_DIGIT_FLAGS | TOKEN_WORD,
 135     /*  53 5 */ TOKEN_DIGIT_FLAGS | TOKEN_WORD,
 136     /*  54 6 */ TOKEN_DIGIT_FLAGS | TOKEN_WORD,
 137     /*  55 7 */ TOKEN_DIGIT_FLAGS | TOKEN_WORD,
 138     /*  56 8 */ TOKEN_DIGIT_FLAGS | TOKEN_WORD,
 139     /*  57 9 */ TOKEN_DIGIT_FLAGS | TOKEN_WORD,
 140     /*  58 : */ TOKEN_CHAR | TOKEN_WORD_SEP | TOKEN_VALUE_SEP,  // (TOKEN_WORD)
 141     /*  59 ; */ TOKEN_CHAR | TOKEN_WORD_SEP | TOKEN_VALUE_SEP,
 142     /*  60 < */ TOKEN_CHAR_BOOL | TOKEN_WORD_SEP | TOKEN_VALUE_SEP,
 143     /*  61 = */ TOKEN_CHAR | TOKEN_BOOL | TOKEN_WORD_SEP | TOKEN_VALUE_SEP,
 144     /*  62 > */ TOKEN_CHAR_BOOL | TOKEN_BOOL | TOKEN_WORD_SEP | TOKEN_VALUE_SEP,
 145     /*  63 ? */ TOKEN_CHAR | TOKEN_WORD_SEP | TOKEN_VALUE_SEP,  // (TOKEN_CHAR_WORD | TOKEN_WORD)
 146     /*  64 @ */ TOKEN_CHAR | TOKEN_WORD_SEP | TOKEN_VALUE_SEP,  // (TOKEN_ILLEGAL // UNUSED)
 147     //for ( i = 65; i < 91; i++ )
 148     /*  65 A */ TOKEN_CHAR_WORD | TOKEN_WORD,
 149     /*  66 B */ TOKEN_CHAR_WORD | TOKEN_WORD,
 150     /*  67 C */ TOKEN_CHAR_WORD | TOKEN_WORD,
 151     /*  68 D */ TOKEN_CHAR_WORD | TOKEN_WORD,
 152     /*  69 E */ TOKEN_CHAR_WORD | TOKEN_WORD,
 153     /*  70 F */ TOKEN_CHAR_WORD | TOKEN_WORD,
 154     /*  71 G */ TOKEN_CHAR_WORD | TOKEN_WORD,
 155     /*  72 H */ TOKEN_CHAR_WORD | TOKEN_WORD,
 156     /*  73 I */ TOKEN_CHAR_WORD | TOKEN_WORD,
 157     /*  74 J */ TOKEN_CHAR_WORD | TOKEN_WORD,
 158     /*  75 K */ TOKEN_CHAR_WORD | TOKEN_WORD,
 159     /*  76 L */ TOKEN_CHAR_WORD | TOKEN_WORD,
 160     /*  77 M */ TOKEN_CHAR_WORD | TOKEN_WORD,
 161     /*  78 N */ TOKEN_CHAR_WORD | TOKEN_WORD,
 162     /*  79 O */ TOKEN_CHAR_WORD | TOKEN_WORD,
 163     /*  80 P */ TOKEN_CHAR_WORD | TOKEN_WORD,
 164     /*  81 Q */ TOKEN_CHAR_WORD | TOKEN_WORD,
 165     /*  82 R */ TOKEN_CHAR_WORD | TOKEN_WORD,
 166     /*  83 S */ TOKEN_CHAR_WORD | TOKEN_WORD,
 167     /*  84 T */ TOKEN_CHAR_WORD | TOKEN_WORD,
 168     /*  85 U */ TOKEN_CHAR_WORD | TOKEN_WORD,
 169     /*  86 V */ TOKEN_CHAR_WORD | TOKEN_WORD,
 170     /*  87 W */ TOKEN_CHAR_WORD | TOKEN_WORD,
 171     /*  88 X */ TOKEN_CHAR_WORD | TOKEN_WORD,
 172     /*  89 Y */ TOKEN_CHAR_WORD | TOKEN_WORD,
 173     /*  90 Z */ TOKEN_CHAR_WORD | TOKEN_WORD,
 174     /*  91 [ */ TOKEN_CHAR | TOKEN_WORD_SEP | TOKEN_VALUE_SEP,  // (TOKEN_ILLEGAL // UNUSED)
 175     /*  92 \ */ TOKEN_CHAR | TOKEN_WORD_SEP | TOKEN_VALUE_SEP,  // (TOKEN_ILLEGAL // UNUSED)
 176     /*  93 ] */ TOKEN_CHAR | TOKEN_WORD_SEP | TOKEN_VALUE_SEP,  // (TOKEN_ILLEGAL // UNUSED)
 177     /*  94 ^ */ TOKEN_CHAR | TOKEN_WORD_SEP | TOKEN_VALUE_SEP,
 178     /*  95 _ */ TOKEN_CHAR_WORD | TOKEN_WORD,
 179     /*  96 ` */ TOKEN_CHAR | TOKEN_WORD_SEP | TOKEN_VALUE_SEP,  // (TOKEN_ILLEGAL // UNUSED)
 180     //for ( i = 97; i < 123; i++ )
 181     /*  97 a */ TOKEN_CHAR_WORD | TOKEN_WORD,
 182     /*  98 b */ TOKEN_CHAR_WORD | TOKEN_WORD,
 183     /*  99 c */ TOKEN_CHAR_WORD | TOKEN_WORD,
 184     /* 100 d */ TOKEN_CHAR_WORD | TOKEN_WORD,
 185     /* 101 e */ TOKEN_CHAR_WORD | TOKEN_WORD,
 186     /* 102 f */ TOKEN_CHAR_WORD | TOKEN_WORD,
 187     /* 103 g */ TOKEN_CHAR_WORD | TOKEN_WORD,
 188     /* 104 h */ TOKEN_CHAR_WORD | TOKEN_WORD,
 189     /* 105 i */ TOKEN_CHAR_WORD | TOKEN_WORD,
 190     /* 106 j */ TOKEN_CHAR_WORD | TOKEN_WORD,
 191     /* 107 k */ TOKEN_CHAR_WORD | TOKEN_WORD,
 192     /* 108 l */ TOKEN_CHAR_WORD | TOKEN_WORD,
 193     /* 109 m */ TOKEN_CHAR_WORD | TOKEN_WORD,
 194     /* 110 n */ TOKEN_CHAR_WORD | TOKEN_WORD,
 195     /* 111 o */ TOKEN_CHAR_WORD | TOKEN_WORD,
 196     /* 112 p */ TOKEN_CHAR_WORD | TOKEN_WORD,
 197     /* 113 q */ TOKEN_CHAR_WORD | TOKEN_WORD,
 198     /* 114 r */ TOKEN_CHAR_WORD | TOKEN_WORD,
 199     /* 115 s */ TOKEN_CHAR_WORD | TOKEN_WORD,
 200     /* 116 t */ TOKEN_CHAR_WORD | TOKEN_WORD,
 201     /* 117 u */ TOKEN_CHAR_WORD | TOKEN_WORD,
 202     /* 118 v */ TOKEN_CHAR_WORD | TOKEN_WORD,
 203     /* 119 w */ TOKEN_CHAR_WORD | TOKEN_WORD,
 204     /* 120 x */ TOKEN_CHAR_WORD | TOKEN_WORD,
 205     /* 121 y */ TOKEN_CHAR_WORD | TOKEN_WORD,
 206     /* 122 z */ TOKEN_CHAR_WORD | TOKEN_WORD,
 207     /* 123 { */ TOKEN_CHAR | TOKEN_WORD_SEP | TOKEN_VALUE_SEP,  // (TOKEN_ILLEGAL // UNUSED)
 208     /* 124 | */ TOKEN_CHAR | TOKEN_WORD_SEP | TOKEN_VALUE_SEP,  // (TOKEN_ILLEGAL // UNUSED)
 209     /* 125 } */ TOKEN_CHAR | TOKEN_WORD_SEP | TOKEN_VALUE_SEP,  // (TOKEN_ILLEGAL // UNUSED)
 210     /* 126 ~ */ TOKEN_CHAR | TOKEN_WORD_SEP | TOKEN_VALUE_SEP,  // (TOKEN_ILLEGAL // UNUSED)
 211     /* 127   */ TOKEN_CHAR | TOKEN_WORD_SEP | TOKEN_VALUE_SEP   // (TOKEN_ILLEGAL // UNUSED)
 212 };
 213
 214
 215 const sal_Int32 cclass_Unicode::pParseTokensType[ nDefCnt ] =
 216 {
 217     /* \0 */    KParseTokens::ASC_OTHER,
 218                 KParseTokens::ASC_CONTROL,
 219                 KParseTokens::ASC_CONTROL,
 220                 KParseTokens::ASC_CONTROL,
 221                 KParseTokens::ASC_CONTROL,
 222                 KParseTokens::ASC_CONTROL,
 223                 KParseTokens::ASC_CONTROL,
 224                 KParseTokens::ASC_CONTROL,
 225                 KParseTokens::ASC_CONTROL,
 226     /*  9 \t */ KParseTokens::ASC_CONTROL,
 227                 KParseTokens::ASC_CONTROL,
 228     /* 11 \v */ KParseTokens::ASC_CONTROL,
 229                 KParseTokens::ASC_CONTROL,
 230                 KParseTokens::ASC_CONTROL,
 231                 KParseTokens::ASC_CONTROL,
 232                 KParseTokens::ASC_CONTROL,
 233                 KParseTokens::ASC_CONTROL,
 234                 KParseTokens::ASC_CONTROL,
 235                 KParseTokens::ASC_CONTROL,
 236                 KParseTokens::ASC_CONTROL,
 237                 KParseTokens::ASC_CONTROL,
 238                 KParseTokens::ASC_CONTROL,
 239                 KParseTokens::ASC_CONTROL,
 240                 KParseTokens::ASC_CONTROL,
 241                 KParseTokens::ASC_CONTROL,
 242                 KParseTokens::ASC_CONTROL,
 243                 KParseTokens::ASC_CONTROL,
 244                 KParseTokens::ASC_CONTROL,
 245                 KParseTokens::ASC_CONTROL,
 246                 KParseTokens::ASC_CONTROL,
 247                 KParseTokens::ASC_CONTROL,
 248                 KParseTokens::ASC_CONTROL,
 249     /*  32   */ KParseTokens::ASC_OTHER,
 250     /*  33 ! */ KParseTokens::ASC_OTHER,
 251     /*  34 " */ KParseTokens::ASC_OTHER,
 252     /*  35 # */ KParseTokens::ASC_OTHER,
 253     /*  36 $ */ KParseTokens::ASC_DOLLAR,
 254     /*  37 % */ KParseTokens::ASC_OTHER,
 255     /*  38 & */ KParseTokens::ASC_OTHER,
 256     /*  39 ' */ KParseTokens::ASC_OTHER,
 257     /*  40 ( */ KParseTokens::ASC_OTHER,
 258     /*  41 ) */ KParseTokens::ASC_OTHER,
 259     /*  42 * */ KParseTokens::ASC_OTHER,
 260     /*  43 + */ KParseTokens::ASC_OTHER,
 261     /*  44 , */ KParseTokens::ASC_OTHER,
 262     /*  45 - */ KParseTokens::ASC_OTHER,
 263     /*  46 . */ KParseTokens::ASC_DOT,
 264     /*  47 / */ KParseTokens::ASC_OTHER,
 265     //for ( i = 48; i < 58; i++ )
 266     /*  48 0 */ KParseTokens::ASC_DIGIT,
 267     /*  49 1 */ KParseTokens::ASC_DIGIT,
 268     /*  50 2 */ KParseTokens::ASC_DIGIT,
 269     /*  51 3 */ KParseTokens::ASC_DIGIT,
 270     /*  52 4 */ KParseTokens::ASC_DIGIT,
 271     /*  53 5 */ KParseTokens::ASC_DIGIT,
 272     /*  54 6 */ KParseTokens::ASC_DIGIT,
 273     /*  55 7 */ KParseTokens::ASC_DIGIT,
 274     /*  56 8 */ KParseTokens::ASC_DIGIT,
 275     /*  57 9 */ KParseTokens::ASC_DIGIT,
 276     /*  58 : */ KParseTokens::ASC_COLON,
 277     /*  59 ; */ KParseTokens::ASC_OTHER,
 278     /*  60 < */ KParseTokens::ASC_OTHER,
 279     /*  61 = */ KParseTokens::ASC_OTHER,
 280     /*  62 > */ KParseTokens::ASC_OTHER,
 281     /*  63 ? */ KParseTokens::ASC_OTHER,
 282     /*  64 @ */ KParseTokens::ASC_OTHER,
 283     //for ( i = 65; i < 91; i++ )
 284     /*  65 A */ KParseTokens::ASC_UPALPHA,
 285     /*  66 B */ KParseTokens::ASC_UPALPHA,
 286     /*  67 C */ KParseTokens::ASC_UPALPHA,
 287     /*  68 D */ KParseTokens::ASC_UPALPHA,
 288     /*  69 E */ KParseTokens::ASC_UPALPHA,
 289     /*  70 F */ KParseTokens::ASC_UPALPHA,
 290     /*  71 G */ KParseTokens::ASC_UPALPHA,
 291     /*  72 H */ KParseTokens::ASC_UPALPHA,
 292     /*  73 I */ KParseTokens::ASC_UPALPHA,
 293     /*  74 J */ KParseTokens::ASC_UPALPHA,
 294     /*  75 K */ KParseTokens::ASC_UPALPHA,
 295     /*  76 L */ KParseTokens::ASC_UPALPHA,
 296     /*  77 M */ KParseTokens::ASC_UPALPHA,
 297     /*  78 N */ KParseTokens::ASC_UPALPHA,
 298     /*  79 O */ KParseTokens::ASC_UPALPHA,
 299     /*  80 P */ KParseTokens::ASC_UPALPHA,
 300     /*  81 Q */ KParseTokens::ASC_UPALPHA,
 301     /*  82 R */ KParseTokens::ASC_UPALPHA,
 302     /*  83 S */ KParseTokens::ASC_UPALPHA,
 303     /*  84 T */ KParseTokens::ASC_UPALPHA,
 304     /*  85 U */ KParseTokens::ASC_UPALPHA,
 305     /*  86 V */ KParseTokens::ASC_UPALPHA,
 306     /*  87 W */ KParseTokens::ASC_UPALPHA,
 307     /*  88 X */ KParseTokens::ASC_UPALPHA,
 308     /*  89 Y */ KParseTokens::ASC_UPALPHA,
 309     /*  90 Z */ KParseTokens::ASC_UPALPHA,
 310     /*  91 [ */ KParseTokens::ASC_OTHER,
 311     /*  92 \ */ KParseTokens::ASC_OTHER,
 312     /*  93 ] */ KParseTokens::ASC_OTHER,
 313     /*  94 ^ */ KParseTokens::ASC_OTHER,
 314     /*  95 _ */ KParseTokens::ASC_UNDERSCORE,
 315     /*  96 ` */ KParseTokens::ASC_OTHER,
 316     //for ( i = 97; i < 123; i++ )
 317     /*  97 a */ KParseTokens::ASC_LOALPHA,
 318     /*  98 b */ KParseTokens::ASC_LOALPHA,
 319     /*  99 c */ KParseTokens::ASC_LOALPHA,
 320     /* 100 d */ KParseTokens::ASC_LOALPHA,
 321     /* 101 e */ KParseTokens::ASC_LOALPHA,
 322     /* 102 f */ KParseTokens::ASC_LOALPHA,
 323     /* 103 g */ KParseTokens::ASC_LOALPHA,
 324     /* 104 h */ KParseTokens::ASC_LOALPHA,
 325     /* 105 i */ KParseTokens::ASC_LOALPHA,
 326     /* 106 j */ KParseTokens::ASC_LOALPHA,
 327     /* 107 k */ KParseTokens::ASC_LOALPHA,
 328     /* 108 l */ KParseTokens::ASC_LOALPHA,
 329     /* 109 m */ KParseTokens::ASC_LOALPHA,
 330     /* 110 n */ KParseTokens::ASC_LOALPHA,
 331     /* 111 o */ KParseTokens::ASC_LOALPHA,
 332     /* 112 p */ KParseTokens::ASC_LOALPHA,
 333     /* 113 q */ KParseTokens::ASC_LOALPHA,
 334     /* 114 r */ KParseTokens::ASC_LOALPHA,
 335     /* 115 s */ KParseTokens::ASC_LOALPHA,
 336     /* 116 t */ KParseTokens::ASC_LOALPHA,
 337     /* 117 u */ KParseTokens::ASC_LOALPHA,
 338     /* 118 v */ KParseTokens::ASC_LOALPHA,
 339     /* 119 w */ KParseTokens::ASC_LOALPHA,
 340     /* 120 x */ KParseTokens::ASC_LOALPHA,
 341     /* 121 y */ KParseTokens::ASC_LOALPHA,
 342     /* 122 z */ KParseTokens::ASC_LOALPHA,
 343     /* 123 { */ KParseTokens::ASC_OTHER,
 344     /* 124 | */ KParseTokens::ASC_OTHER,
 345     /* 125 } */ KParseTokens::ASC_OTHER,
 346     /* 126 ~ */ KParseTokens::ASC_OTHER,
 347     /* 127   */ KParseTokens::ASC_OTHER
 348 };
 349
 350
 351 // static
 352 const sal_Unicode* cclass_Unicode::StrChr( const sal_Unicode* pStr, sal_Unicode c )
 353 {
 354     if ( !pStr )
 355         return NULL;
 356     while ( *pStr )
 357     {
 358         if ( *pStr == c )
 359             return pStr;
 360         pStr++;
 361     }
 362     return NULL;
 363 }
 364
 365
 366 sal_Int32 cclass_Unicode::getParseTokensType( const sal_Unicode* aStr, sal_Int32 nPos )
 367 {
 368     sal_Unicode c = aStr[nPos];
 369     if ( c < nDefCnt )
 370         return pParseTokensType[ sal_uInt8(c) ];
 371     else
 372     {
 373
 374         //! all KParseTokens::UNI_... must be matched
 375         switch ( u_charType( (sal_uInt32) c ) )
 376         {
 377             case U_UPPERCASE_LETTER :
 378                 return KParseTokens::UNI_UPALPHA;
 379             case U_LOWERCASE_LETTER :
 380                 return KParseTokens::UNI_LOALPHA;
 381             case U_TITLECASE_LETTER :
 382                 return KParseTokens::UNI_TITLE_ALPHA;
 383             case U_MODIFIER_LETTER :
 384                 return KParseTokens::UNI_MODIFIER_LETTER;
 385             case U_OTHER_LETTER :
 386                 // Non_Spacing_Mark could not be as leading character
 387                 if (nPos == 0) break;
 388                 // fall through, treat it as Other_Letter.
 389             case U_NON_SPACING_MARK :
 390                 return KParseTokens::UNI_OTHER_LETTER;
 391             case U_DECIMAL_DIGIT_NUMBER :
 392                 return KParseTokens::UNI_DIGIT;
 393             case U_LETTER_NUMBER :
 394                 return KParseTokens::UNI_LETTER_NUMBER;
 395             case U_OTHER_NUMBER :
 396                 return KParseTokens::UNI_OTHER_NUMBER;
 397         }
 398
 399         return KParseTokens::UNI_OTHER;
 400     }
 401 }
 402
 403 sal_Bool cclass_Unicode::setupInternational( const Locale& rLocale )
 404 {
 405     sal_Bool bChanged = (aParserLocale.Language != rLocale.Language
 406         || aParserLocale.Country != rLocale.Country
 407         || aParserLocale.Variant != rLocale.Variant);
 408     if ( bChanged )
 409     {
 410         aParserLocale.Language = rLocale.Language;
 411         aParserLocale.Country = rLocale.Country;
 412         aParserLocale.Variant = rLocale.Variant;
 413     }
 414     if ( !xLocaleData.is() && xMSF.is() )
 415     {
 416         Reference <
 417             XInterface > xI =
 418             xMSF->createInstance( OUString(
 419             RTL_CONSTASCII_USTRINGPARAM( "com.sun.star.i18n.LocaleData" ) ) );
 420         if ( xI.is() )
 421         {
 422             Any x = xI->queryInterface( getCppuType((const Reference< XLocaleData>*)0) );
 423             x >>= xLocaleData;
 424         }
 425     }
 426     return bChanged;
 427 }
 428
 429
 430 void cclass_Unicode::setupParserTable( const Locale& rLocale, sal_Int32 startCharTokenType,
 431             const OUString& userDefinedCharactersStart, sal_Int32 contCharTokenType,
 432             const OUString& userDefinedCharactersCont )
 433 {
 434     bool bIntlEqual = (rLocale.Language == aParserLocale.Language &&
 435         rLocale.Country == aParserLocale.Country &&
 436         rLocale.Variant == aParserLocale.Variant);
 437     if ( !pTable || !bIntlEqual ||
 438             startCharTokenType != nStartTypes ||
 439             contCharTokenType != nContTypes ||
 440             userDefinedCharactersStart != aStartChars ||
 441             userDefinedCharactersCont != aContChars )
 442         initParserTable( rLocale, startCharTokenType, userDefinedCharactersStart,
 443             contCharTokenType, userDefinedCharactersCont );
 444 }
 445
 446
 447 void cclass_Unicode::initParserTable( const Locale& rLocale, sal_Int32 startCharTokenType,
 448             const OUString& userDefinedCharactersStart, sal_Int32 contCharTokenType,
 449             const OUString& userDefinedCharactersCont )
 450 {
 451     // (Re)Init
 452     setupInternational( rLocale );
 453     // Memory of pTable is reused.
 454     if ( !pTable )
 455         pTable = new UPT_FLAG_TYPE[nDefCnt];
 456     memcpy( pTable, pDefaultParserTable, sizeof(UPT_FLAG_TYPE) * nDefCnt );
 457     // Start and cont tables only need reallocation if different length.
 458     if ( pStart && userDefinedCharactersStart.getLength() != aStartChars.getLength() )
 459     {
 460         delete [] pStart;
 461         pStart = NULL;
 462     }
 463     if ( pCont && userDefinedCharactersCont.getLength() != aContChars.getLength() )
 464     {
 465         delete [] pCont;
 466         pCont = NULL;
 467     }
 468     nStartTypes = startCharTokenType;
 469     nContTypes = contCharTokenType;
 470     aStartChars = userDefinedCharactersStart;
 471     aContChars = userDefinedCharactersCont;
 472
 473     // specials
 474     if( xLocaleData.is() )
 475     {
 476         LocaleDataItem aItem =
 477             xLocaleData->getLocaleItem( aParserLocale );
 478 //!TODO: theoretically separators may be a string, adjustment would have to be
 479 //! done here and in parsing and in ::rtl::math::stringToDouble()
 480         cGroupSep = aItem.thousandSeparator.getStr()[0];
 481         cDecimalSep = aItem.decimalSeparator.getStr()[0];
 482     }
 483
 484     if ( cGroupSep < nDefCnt )
 485         pTable[cGroupSep] |= TOKEN_VALUE;
 486     if ( cDecimalSep < nDefCnt )
 487         pTable[cDecimalSep] |= TOKEN_CHAR_VALUE | TOKEN_VALUE;
 488
 489     // Modify characters according to KParseTokens definitions.
 490     {
 491         using namespace KParseTokens;
 492         sal_uInt8 i;
 493
 494         if ( !(nStartTypes & ASC_UPALPHA) )
 495             for ( i = 65; i < 91; i++ )
 496                 pTable[i] &= ~TOKEN_CHAR_WORD;  // not allowed as start character
 497         if ( !(nContTypes & ASC_UPALPHA) )
 498             for ( i = 65; i < 91; i++ )
 499                 pTable[i] &= ~TOKEN_WORD;               // not allowed as cont character
 500
 501         if ( !(nStartTypes & ASC_LOALPHA) )
 502             for ( i = 97; i < 123; i++ )
 503                 pTable[i] &= ~TOKEN_CHAR_WORD;  // not allowed as start character
 504         if ( !(nContTypes & ASC_LOALPHA) )
 505             for ( i = 97; i < 123; i++ )
 506                 pTable[i] &= ~TOKEN_WORD;               // not allowed as cont character
 507
 508         if ( nStartTypes & ASC_DIGIT )
 509             for ( i = 48; i < 58; i++ )
 510                 pTable[i] |= TOKEN_CHAR_WORD;   // allowed as start character
 511         if ( !(nContTypes & ASC_DIGIT) )
 512             for ( i = 48; i < 58; i++ )
 513                 pTable[i] &= ~TOKEN_WORD;               // not allowed as cont character
 514
 515         if ( !(nStartTypes & ASC_UNDERSCORE) )
 516             pTable[95] &= ~TOKEN_CHAR_WORD;             // not allowed as start character
 517         if ( !(nContTypes & ASC_UNDERSCORE) )
 518             pTable[95] &= ~TOKEN_WORD;                  // not allowed as cont character
 519
 520         if ( nStartTypes & ASC_DOLLAR )
 521             pTable[36] |= TOKEN_CHAR_WORD;              // allowed as start character
 522         if ( nContTypes & ASC_DOLLAR )
 523             pTable[36] |= TOKEN_WORD;                   // allowed as cont character
 524
 525         if ( nStartTypes & ASC_DOT )
 526             pTable[46] |= TOKEN_CHAR_WORD;              // allowed as start character
 527         if ( nContTypes & ASC_DOT )
 528             pTable[46] |= TOKEN_WORD;                   // allowed as cont character
 529
 530         if ( nStartTypes & ASC_COLON )
 531             pTable[58] |= TOKEN_CHAR_WORD;              // allowed as start character
 532         if ( nContTypes & ASC_COLON )
 533             pTable[58] |= TOKEN_WORD;                   // allowed as cont character
 534
 535         if ( nStartTypes & ASC_CONTROL )
 536             for ( i = 1; i < 32; i++ )
 537                 pTable[i] |= TOKEN_CHAR_WORD;   // allowed as start character
 538         if ( nContTypes & ASC_CONTROL )
 539             for ( i = 1; i < 32; i++ )
 540                 pTable[i] |= TOKEN_WORD;                // allowed as cont character
 541
 542         if ( nStartTypes & ASC_ANY_BUT_CONTROL )
 543             for ( i = 32; i < nDefCnt; i++ )
 544                 pTable[i] |= TOKEN_CHAR_WORD;   // allowed as start character
 545         if ( nContTypes & ASC_ANY_BUT_CONTROL )
 546             for ( i = 32; i < nDefCnt; i++ )
 547                 pTable[i] |= TOKEN_WORD;                // allowed as cont character
 548
 549     }
 550
 551     // Merge in (positively override with) user defined characters.
 552     // StartChars
 553     sal_Int32 nLen = aStartChars.getLength();
 554     if ( nLen )
 555     {
 556         if ( !pStart )
 557             pStart = new UPT_FLAG_TYPE[ nLen ];
 558         const sal_Unicode* p = aStartChars.getStr();
 559         for ( sal_Int32 j=0; j<nLen; j++, p++ )
 560         {
 561             pStart[j] = TOKEN_CHAR_WORD;
 562             if ( *p < nDefCnt )
 563                 pTable[*p] |= TOKEN_CHAR_WORD;
 564         }
 565     }
 566     // ContChars
 567     nLen = aContChars.getLength();
 568     if ( nLen )
 569     {
 570         if ( !pCont )
 571             pCont = new UPT_FLAG_TYPE[ nLen ];
 572         const sal_Unicode* p = aContChars.getStr();
 573         for ( sal_Int32 j=0; j<nLen; j++ )
 574         {
 575             pCont[j] = TOKEN_WORD;
 576             if ( *p < nDefCnt )
 577                 pTable[*p] |= TOKEN_WORD;
 578         }
 579     }
 580 }
 581
 582
 583 void cclass_Unicode::destroyParserTable()
 584 {
 585     if ( pCont )
 586         delete [] pCont;
 587     if ( pStart )
 588         delete [] pStart;
 589     if ( pTable )
 590         delete [] pTable;
 591 }
 592
 593
 594 UPT_FLAG_TYPE cclass_Unicode::getFlags( const sal_Unicode* aStr, sal_Int32 nPos )
 595 {
 596     UPT_FLAG_TYPE nMask;
 597     sal_Unicode c = aStr[nPos];
 598     if ( c < nDefCnt )
 599         nMask = pTable[ sal_uInt8(c) ];
 600     else
 601         nMask = getFlagsExtended( aStr, nPos );
 602     switch ( eState )
 603     {
 604         case ssGetChar :
 605         case ssRewindFromValue :
 606         case ssIgnoreLeadingInRewind :
 607         case ssGetWordFirstChar :
 608             if ( !(nMask & TOKEN_CHAR_WORD) )
 609             {
 610                 nMask |= getStartCharsFlags( c );
 611                 if ( nMask & TOKEN_CHAR_WORD )
 612                     nMask &= ~TOKEN_EXCLUDED;
 613             }
 614         break;
 615         case ssGetValue :
 616         case ssGetWord :
 617             if ( !(nMask & TOKEN_WORD) )
 618             {
 619                 nMask |= getContCharsFlags( c );
 620                 if ( nMask & TOKEN_WORD )
 621                     nMask &= ~TOKEN_EXCLUDED;
 622             }
 623         break;
 624         default:
 625             ;   // other cases aren't needed, no compiler warning
 626     }
 627     return nMask;
 628 }
 629
 630
 631 UPT_FLAG_TYPE cclass_Unicode::getFlagsExtended( const sal_Unicode* aStr, sal_Int32 nPos )
 632 {
 633     sal_Unicode c = aStr[nPos];
 634     if ( c == cGroupSep )
 635         return TOKEN_VALUE;
 636     else if ( c == cDecimalSep )
 637         return TOKEN_CHAR_VALUE | TOKEN_VALUE;
 638     using namespace i18n;
 639     bool bStart = (eState == ssGetChar || eState == ssGetWordFirstChar ||
 640             eState == ssRewindFromValue || eState == ssIgnoreLeadingInRewind);
 641     sal_Int32 nTypes = (bStart ? nStartTypes : nContTypes);
 642
 643     //! all KParseTokens::UNI_... must be matched
 644     switch ( u_charType( (sal_uInt32) c ) )
 645     {
 646         case U_UPPERCASE_LETTER :
 647             return (nTypes & KParseTokens::UNI_UPALPHA) ?
 648                 (bStart ? TOKEN_CHAR_WORD : TOKEN_WORD) :
 649                 TOKEN_ILLEGAL;
 650         case U_LOWERCASE_LETTER :
 651             return (nTypes & KParseTokens::UNI_LOALPHA) ?
 652                 (bStart ? TOKEN_CHAR_WORD : TOKEN_WORD) :
 653                 TOKEN_ILLEGAL;
 654         case U_TITLECASE_LETTER :
 655             return (nTypes & KParseTokens::UNI_TITLE_ALPHA) ?
 656                 (bStart ? TOKEN_CHAR_WORD : TOKEN_WORD) :
 657                 TOKEN_ILLEGAL;
 658         case U_MODIFIER_LETTER :
 659             return (nTypes & KParseTokens::UNI_MODIFIER_LETTER) ?
 660                 (bStart ? TOKEN_CHAR_WORD : TOKEN_WORD) :
 661                 TOKEN_ILLEGAL;
 662         case U_NON_SPACING_MARK :
 663         case U_COMBINING_SPACING_MARK :
 664             // Non_Spacing_Mark can't be a leading character,
 665             // nor can a spacing combining mark.
 666             if (bStart)
 667                 return TOKEN_ILLEGAL;
 668             // fall through, treat it as Other_Letter.
 669         case U_OTHER_LETTER :
 670             return (nTypes & KParseTokens::UNI_OTHER_LETTER) ?
 671                 (bStart ? TOKEN_CHAR_WORD : TOKEN_WORD) :
 672                 TOKEN_ILLEGAL;
 673         case U_DECIMAL_DIGIT_NUMBER :
 674             return ((nTypes & KParseTokens::UNI_DIGIT) ?
 675                 (bStart ? TOKEN_CHAR_WORD : TOKEN_WORD) :
 676                 TOKEN_ILLEGAL) | TOKEN_DIGIT_FLAGS;
 677         case U_LETTER_NUMBER :
 678             return ((nTypes & KParseTokens::UNI_LETTER_NUMBER) ?
 679                 (bStart ? TOKEN_CHAR_WORD : TOKEN_WORD) :
 680                 TOKEN_ILLEGAL) | TOKEN_DIGIT_FLAGS;
 681         case U_OTHER_NUMBER :
 682             return ((nTypes & KParseTokens::UNI_OTHER_NUMBER) ?
 683                 (bStart ? TOKEN_CHAR_WORD : TOKEN_WORD) :
 684                 TOKEN_ILLEGAL) | TOKEN_DIGIT_FLAGS;
 685         case U_SPACE_SEPARATOR :
 686             return ((nTypes & KParseTokens::IGNORE_LEADING_WS) ?
 687                 TOKEN_CHAR_DONTCARE : (bStart ? TOKEN_CHAR_WORD : (TOKEN_CHAR_DONTCARE | TOKEN_WORD_SEP | TOKEN_VALUE_SEP) ));
 688     }
 689
 690     return TOKEN_ILLEGAL;
 691 }
 692
 693
 694 UPT_FLAG_TYPE cclass_Unicode::getStartCharsFlags( sal_Unicode c )
 695 {
 696     if ( pStart )
 697     {
 698         const sal_Unicode* pStr = aStartChars.getStr();
 699         const sal_Unicode* p = StrChr( pStr, c );
 700         if ( p )
 701             return pStart[ p - pStr ];
 702     }
 703     return TOKEN_ILLEGAL;
 704 }
 705
 706
 707 UPT_FLAG_TYPE cclass_Unicode::getContCharsFlags( sal_Unicode c )
 708 {
 709     if ( pCont )
 710     {
 711         const sal_Unicode* pStr = aContChars.getStr();
 712         const sal_Unicode* p = StrChr( pStr, c );
 713         if ( p )
 714             return pCont[ p - pStr ];
 715     }
 716     return TOKEN_ILLEGAL;
 717 }
 718
 719
 720 void cclass_Unicode::parseText( ParseResult& r, const OUString& rText, sal_Int32 nPos, sal_Int32 nTokenType )
 721 {
 722     using namespace i18n;
 723     const sal_Unicode* const pTextStart = rText.getStr() + nPos;
 724     eState = ssGetChar;
 725
 726     //! All the variables below (plus ParseResult) have to be resetted on ssRewindFromValue!
 727     const sal_Unicode* pSym = pTextStart;
 728     const sal_Unicode* pSrc = pSym;
 729     OUString aSymbol;
 730     sal_Unicode c = *pSrc;
 731     sal_Unicode cLast = 0;
 732     int nDecSeps = 0;
 733     bool bQuote = false;
 734     bool bMightBeWord = true;
 735     bool bMightBeWordLast = true;
 736     //! All the variables above (plus ParseResult) have to be resetted on ssRewindFromValue!
 737
 738     while ( (c != 0) && (eState != ssStop) )
 739     {
 740         UPT_FLAG_TYPE nMask = getFlags( pTextStart, pSrc - pTextStart );
 741         if ( nMask & TOKEN_EXCLUDED )
 742             eState = ssBounce;
 743         if ( bMightBeWord )
 744         {       // only relevant for ssGetValue fall back
 745             if ( eState == ssGetChar || eState == ssRewindFromValue ||
 746                     eState == ssIgnoreLeadingInRewind )
 747                 bMightBeWord = ((nMask & TOKEN_CHAR_WORD) != 0);
 748             else
 749                 bMightBeWord = ((nMask & TOKEN_WORD) != 0);
 750         }
 751         sal_Int32 nParseTokensType = getParseTokensType( pTextStart, pSrc - pTextStart );
 752         pSrc++;
 753         switch (eState)
 754         {
 755             case ssGetChar :
 756             case ssRewindFromValue :
 757             case ssIgnoreLeadingInRewind :
 758             {
 759                 if ( (nMask & TOKEN_CHAR_VALUE) && eState != ssRewindFromValue
 760                         && eState != ssIgnoreLeadingInRewind )
 761                 {       //! must be first, may fall back to ssGetWord via bMightBeWord
 762                     eState = ssGetValue;
 763                     if ( nMask & TOKEN_VALUE_DIGIT )
 764                     {
 765                         if ( 128 <= c )
 766                             r.TokenType = KParseType::UNI_NUMBER;
 767                         else
 768                             r.TokenType = KParseType::ASC_NUMBER;
 769                     }
 770                     else if ( c == cDecimalSep )
 771                     {
 772                         if ( *pSrc )
 773                             ++nDecSeps;
 774                         else
 775                             eState = ssRewindFromValue;
 776                             // retry for ONE_SINGLE_CHAR or others
 777                     }
 778                 }
 779                 else if ( nMask & TOKEN_CHAR_WORD )
 780                 {
 781                     eState = ssGetWord;
 782                     r.TokenType = KParseType::IDENTNAME;
 783                 }
 784                 else if ( nMask & TOKEN_NAME_SEP )
 785                 {
 786                     eState = ssGetWordFirstChar;
 787                     bQuote = true;
 788                     pSym++;
 789                     nParseTokensType = 0;       // will be taken of first real character
 790                     r.TokenType = KParseType::SINGLE_QUOTE_NAME;
 791                 }
 792                 else if ( nMask & TOKEN_CHAR_STRING )
 793                 {
 794                     eState = ssGetString;
 795                     pSym++;
 796                     nParseTokensType = 0;       // will be taken of first real character
 797                     r.TokenType = KParseType::DOUBLE_QUOTE_STRING;
 798                 }
 799                 else if ( nMask & TOKEN_CHAR_DONTCARE )
 800                 {
 801                     if ( nStartTypes & KParseTokens::IGNORE_LEADING_WS )
 802                     {
 803                         if (eState == ssRewindFromValue)
 804                             eState = ssIgnoreLeadingInRewind;
 805                         r.LeadingWhiteSpace++;
 806                         pSym++;
 807                         nParseTokensType = 0;   // wait until real character
 808                         bMightBeWord = true;
 809                     }
 810                     else
 811                         eState = ssBounce;
 812                 }
 813                 else if ( nMask & TOKEN_CHAR_BOOL )
 814                 {
 815                     eState = ssGetBool;
 816                     r.TokenType = KParseType::BOOLEAN;
 817                 }
 818                 else if ( nMask & TOKEN_CHAR )
 819                 {       //! must be last
 820                     eState = ssStop;
 821                     r.TokenType = KParseType::ONE_SINGLE_CHAR;
 822                 }
 823                 else
 824                     eState = ssBounce;          // not known
 825             }
 826             break;
 827             case ssGetValue :
 828             {
 829                 if ( nMask & TOKEN_VALUE_DIGIT )
 830                 {
 831                     if ( 128 <= c )
 832                         r.TokenType = KParseType::UNI_NUMBER;
 833                     else if ( r.TokenType != KParseType::UNI_NUMBER )
 834                         r.TokenType = KParseType::ASC_NUMBER;
 835                 }
 836                 if ( nMask & TOKEN_VALUE )
 837                 {
 838                     if ( c == cDecimalSep && ++nDecSeps > 1 )
 839                     {
 840                         if ( pSrc - pTextStart == 2 )
 841                             eState = ssRewindFromValue;
 842                             // consecutive separators
 843                         else
 844                             eState = ssStopBack;
 845                     }
 846                     // else keep it going
 847                 }
 848                 else if ( c == 'E' || c == 'e' )
 849                 {
 850                     UPT_FLAG_TYPE nNext = getFlags( pTextStart, pSrc - pTextStart );
 851                     if ( nNext & TOKEN_VALUE_EXP )
 852                         ;       // keep it going
 853                     else if ( bMightBeWord && ((nNext & TOKEN_WORD) || !*pSrc) )
 854                     {   // might be a numerical name (1.2efg)
 855                         eState = ssGetWord;
 856                         r.TokenType = KParseType::IDENTNAME;
 857                     }
 858                     else
 859                         eState = ssStopBack;
 860                 }
 861                 else if ( nMask & TOKEN_VALUE_SIGN )
 862                 {
 863                     if ( (cLast == 'E') || (cLast == 'e') )
 864                     {
 865                         UPT_FLAG_TYPE nNext = getFlags( pTextStart, pSrc - pTextStart );
 866                         if ( nNext & TOKEN_VALUE_EXP_VALUE )
 867                             ;   // keep it going
 868                         else if ( bMightBeWord && ((nNext & TOKEN_WORD) || !*pSrc) )
 869                         {       // might be a numerical name (1.2e+fg)
 870                             eState = ssGetWord;
 871                             r.TokenType = KParseType::IDENTNAME;
 872                         }
 873                         else
 874                             eState = ssStopBack;
 875                     }
 876                     else if ( bMightBeWord )
 877                     {   // might be a numerical name (1.2+fg)
 878                         eState = ssGetWord;
 879                         r.TokenType = KParseType::IDENTNAME;
 880                     }
 881                     else
 882                         eState = ssStopBack;
 883                 }
 884                 else if ( bMightBeWord && (nMask & TOKEN_WORD) )
 885                 {       // might be a numerical name (1995.A1)
 886                     eState = ssGetWord;
 887                     r.TokenType = KParseType::IDENTNAME;
 888                 }
 889                 else
 890                     eState = ssStopBack;
 891             }
 892             break;
 893             case ssGetWordFirstChar :
 894                 eState = ssGetWord;
 895                 // fall thru
 896             case ssGetWord :
 897             {
 898                 if ( nMask & TOKEN_WORD )
 899                     ;   // keep it going
 900                 else if ( nMask & TOKEN_NAME_SEP )
 901                 {
 902                     if ( bQuote )
 903                     {
 904                         if ( cLast == '\\' )
 905                         {       // escaped
 906                             aSymbol += OUString( pSym, pSrc - pSym - 2 );
 907                             aSymbol += OUString( &c, 1);
 908                         }
 909                         else
 910                         {
 911                             eState = ssStop;
 912                             aSymbol += OUString( pSym, pSrc - pSym - 1 );
 913                         }
 914                         pSym = pSrc;
 915                     }
 916                     else
 917                         eState = ssStopBack;
 918                 }
 919                 else if ( bQuote )
 920                     ;   // keep it going
 921                 else
 922                     eState = ssStopBack;
 923             }
 924             break;
 925             case ssGetString :
 926             {
 927                 if ( nMask & TOKEN_STRING_SEP )
 928                 {
 929                     if ( cLast == '\\' )
 930                     {   // escaped
 931                         aSymbol += OUString( pSym, pSrc - pSym - 2 );
 932                         aSymbol += OUString( &c, 1);
 933                     }
 934                     else if ( c == *pSrc &&
 935                             !(nContTypes & KParseTokens::TWO_DOUBLE_QUOTES_BREAK_STRING) )
 936                     {   // "" => literal " escaped
 937                         aSymbol += OUString( pSym, pSrc - pSym );
 938                         pSrc++;
 939                     }
 940                     else
 941                     {
 942                         eState = ssStop;
 943                         aSymbol += OUString( pSym, pSrc - pSym - 1 );
 944                     }
 945                     pSym = pSrc;
 946                 }
 947             }
 948             break;
 949             case ssGetBool :
 950             {
 951                 if ( (nMask & TOKEN_BOOL) )
 952                     eState = ssStop;    // maximum 2: <, >, <>, <=, >=
 953                 else
 954                     eState = ssStopBack;
 955             }
 956             break;
 957             case ssStopBack :
 958             case ssBounce :
 959             case ssStop :
 960                 ;   // nothing, no compiler warning
 961             break;
 962         }
 963         if ( eState == ssRewindFromValue )
 964         {
 965             r = ParseResult();
 966             pSym = pTextStart;
 967             pSrc = pSym;
 968             aSymbol = OUString();
 969             c = *pSrc;
 970             cLast = 0;
 971             nDecSeps = 0;
 972             bQuote = false;
 973             bMightBeWord = true;
 974             bMightBeWordLast = true;
 975         }
 976         else
 977         {
 978             if ( !(r.TokenType & nTokenType) )
 979             {
 980                 if ( (r.TokenType & (KParseType::ASC_NUMBER | KParseType::UNI_NUMBER))
 981                         && (nTokenType & KParseType::IDENTNAME) && bMightBeWord )
 982                     ;   // keep a number that might be a word
 983                 else if ( r.LeadingWhiteSpace == (pSrc - pTextStart) )
 984                     ;   // keep ignored white space
 985                 else if ( !r.TokenType && eState == ssGetValue && (nMask & TOKEN_VALUE_SEP) )
 986                     ;   // keep uncertain value
 987                 else
 988                     eState = ssBounce;
 989             }
 990             if ( eState == ssBounce )
 991             {
 992                 r.TokenType = 0;
 993                 eState = ssStopBack;
 994             }
 995             if ( eState == ssStopBack )
 996             {   // put back
 997                 pSrc--;
 998                 bMightBeWord = bMightBeWordLast;
 999                 eState = ssStop;
1000             }
1001             if ( eState != ssStop )
1002             {
1003                 if ( !r.StartFlags )
1004                     r.StartFlags |= nParseTokensType;
1005                 else
1006                     r.ContFlags |= nParseTokensType;
1007             }
1008             bMightBeWordLast = bMightBeWord;
1009             cLast = c;
1010             c = *pSrc;
1011         }
1012     }
1013     // r.CharLen is the length in characters (not code points) of the parsed
1014     // token not including any leading white space, change this calculation if
1015     // multi-code-point Unicode characters are to be supported.
1016     r.CharLen = pSrc - pTextStart - r.LeadingWhiteSpace;
1017     r.EndPos = nPos + (pSrc - pTextStart);
1018     if ( r.TokenType & KParseType::ASC_NUMBER )
1019     {
1020         r.Value = rtl_math_uStringToDouble( pTextStart + r.LeadingWhiteSpace,
1021                 pTextStart + r.EndPos, cDecimalSep, cGroupSep, NULL, NULL );
1022         if ( bMightBeWord )
1023             r.TokenType |= KParseType::IDENTNAME;
1024     }
1025     else if ( r.TokenType & KParseType::UNI_NUMBER )
1026     {
1027         if ( !xNatNumSup.is() )
1028         {
1029 #define NATIVENUMBERSUPPLIER_SERVICENAME "com.sun.star.i18n.NativeNumberSupplier"
1030             if ( xMSF.is() )
1031             {
1032                 xNatNumSup = Reference< XNativeNumberSupplier > (
1033                         xMSF->createInstance( OUString(
1034                                 RTL_CONSTASCII_USTRINGPARAM(
1035                                     NATIVENUMBERSUPPLIER_SERVICENAME ) ) ),
1036                         UNO_QUERY );
1037             }
1038             if ( !xNatNumSup.is() )
1039             {
1040                 throw RuntimeException( OUString(
1041 #ifndef PRODUCT
1042                     RTL_CONSTASCII_USTRINGPARAM(
1043                         "cclass_Unicode::parseText: can't instanciate "
1044                         NATIVENUMBERSUPPLIER_SERVICENAME )
1045 #endif
1046                     ), *this );
1047             }
1048 #undef NATIVENUMBERSUPPLIER_SERVICENAME
1049         }
1050         OUString aTmp( pTextStart + r.LeadingWhiteSpace, r.EndPos - nPos +
1051                 r.LeadingWhiteSpace );
1052         // transliterate to ASCII
1053         aTmp = xNatNumSup->getNativeNumberString( aTmp, aParserLocale,
1054                 NativeNumberMode::NATNUM0 );
1055         r.Value = ::rtl::math::stringToDouble( aTmp, cDecimalSep, cGroupSep, NULL, NULL );
1056         if ( bMightBeWord )
1057             r.TokenType |= KParseType::IDENTNAME;
1058     }
1059     else if ( r.TokenType & (KParseType::SINGLE_QUOTE_NAME | KParseType::DOUBLE_QUOTE_STRING) )
1060     {
1061         if ( pSym < pSrc )
1062         {       //! open quote
1063             aSymbol += OUString( pSym, pSrc - pSym );
1064             r.TokenType |= KParseType::MISSING_QUOTE;
1065         }
1066         r.DequotedNameOrString = aSymbol;
1067     }
1068 }
1069
1070 } } } }