1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
3 * This file is part of the LibreOffice project.
5 * This Source Code Form is subject to the terms of the Mozilla Public
6 * License, v. 2.0. If a copy of the MPL was not distributed with this
7 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
9 * This file incorporates work covered by the following license notice:
11 * Licensed to the Apache Software Foundation (ASF) under one or more
12 * contributor license agreements. See the NOTICE file distributed
13 * with this work for additional information regarding copyright
14 * ownership. The ASF licenses this file to you under the Apache
15 * License, Version 2.0 (the "License"); you may not use this file
16 * except in compliance with the License. You may obtain a copy of
17 * the License at http://www.apache.org/licenses/LICENSE-2.0 .
20 #include <com/sun/star/i18n/UnicodeType.hpp>
21 #include <com/sun/star/i18n/KCharacterType.hpp>
22 #include <com/sun/star/i18n/ScriptType.hpp>
23 #include <i18nutil/unicode.hxx>
24 #include "unicode_data.h"
26 // Workaround for glibc braindamage:
27 // glibc 2.4's langinfo.h does "#define CURRENCY_SYMBOL __CURRENCY_SYMBOL"
28 // which (obviously) breaks UnicodeType::CURRENCY_SYMBOL
29 #undef CURRENCY_SYMBOL
31 using namespace ::com::sun::star::i18n
;
33 static ScriptTypeList defaultTypeList
[] = {
34 { UnicodeScript_kBasicLatin
,
35 UnicodeScript_kBasicLatin
,
36 UnicodeScript_kBasicLatin
}, // 0,
37 { UnicodeScript_kLatin1Supplement
,
38 UnicodeScript_kLatin1Supplement
,
39 UnicodeScript_kLatin1Supplement
},// 1,
40 { UnicodeScript_kLatinExtendedA
,
41 UnicodeScript_kLatinExtendedA
,
42 UnicodeScript_kLatinExtendedA
}, // 2,
43 { UnicodeScript_kLatinExtendedB
,
44 UnicodeScript_kLatinExtendedB
,
45 UnicodeScript_kLatinExtendedB
}, // 3,
46 { UnicodeScript_kIPAExtension
,
47 UnicodeScript_kIPAExtension
,
48 UnicodeScript_kIPAExtension
}, // 4,
49 { UnicodeScript_kSpacingModifier
,
50 UnicodeScript_kSpacingModifier
,
51 UnicodeScript_kSpacingModifier
}, // 5,
52 { UnicodeScript_kCombiningDiacritical
,
53 UnicodeScript_kCombiningDiacritical
,
54 UnicodeScript_kCombiningDiacritical
}, // 6,
55 { UnicodeScript_kGreek
,
57 UnicodeScript_kGreek
}, // 7,
58 { UnicodeScript_kCyrillic
,
59 UnicodeScript_kCyrillic
,
60 UnicodeScript_kCyrillic
}, // 8,
61 { UnicodeScript_kArmenian
,
62 UnicodeScript_kArmenian
,
63 UnicodeScript_kArmenian
}, // 9,
64 { UnicodeScript_kHebrew
,
65 UnicodeScript_kHebrew
,
66 UnicodeScript_kHebrew
}, // 10,
67 { UnicodeScript_kArabic
,
68 UnicodeScript_kArabic
,
69 UnicodeScript_kArabic
}, // 11,
70 { UnicodeScript_kSyriac
,
71 UnicodeScript_kSyriac
,
72 UnicodeScript_kSyriac
}, // 12,
73 { UnicodeScript_kThaana
,
74 UnicodeScript_kThaana
,
75 UnicodeScript_kThaana
}, // 13,
76 { UnicodeScript_kDevanagari
,
77 UnicodeScript_kDevanagari
,
78 UnicodeScript_kDevanagari
}, // 14,
79 { UnicodeScript_kBengali
,
80 UnicodeScript_kBengali
,
81 UnicodeScript_kBengali
}, // 15,
82 { UnicodeScript_kGurmukhi
,
83 UnicodeScript_kGurmukhi
,
84 UnicodeScript_kGurmukhi
}, // 16,
85 { UnicodeScript_kGujarati
,
86 UnicodeScript_kGujarati
,
87 UnicodeScript_kGujarati
}, // 17,
88 { UnicodeScript_kOriya
,
90 UnicodeScript_kOriya
}, // 18,
91 { UnicodeScript_kTamil
,
93 UnicodeScript_kTamil
}, // 19,
94 { UnicodeScript_kTelugu
,
95 UnicodeScript_kTelugu
,
96 UnicodeScript_kTelugu
}, // 20,
97 { UnicodeScript_kKannada
,
98 UnicodeScript_kKannada
,
99 UnicodeScript_kKannada
}, // 21,
100 { UnicodeScript_kMalayalam
,
101 UnicodeScript_kMalayalam
,
102 UnicodeScript_kMalayalam
}, // 22,
103 { UnicodeScript_kSinhala
,
104 UnicodeScript_kSinhala
,
105 UnicodeScript_kSinhala
}, // 23,
106 { UnicodeScript_kThai
,
108 UnicodeScript_kThai
}, // 24,
109 { UnicodeScript_kLao
,
111 UnicodeScript_kLao
}, // 25,
112 { UnicodeScript_kTibetan
,
113 UnicodeScript_kTibetan
,
114 UnicodeScript_kTibetan
}, // 26,
115 { UnicodeScript_kMyanmar
,
116 UnicodeScript_kMyanmar
,
117 UnicodeScript_kMyanmar
}, // 27,
118 { UnicodeScript_kGeorgian
,
119 UnicodeScript_kGeorgian
,
120 UnicodeScript_kGeorgian
}, // 28,
121 { UnicodeScript_kHangulJamo
,
122 UnicodeScript_kHangulJamo
,
123 UnicodeScript_kHangulJamo
}, // 29,
124 { UnicodeScript_kEthiopic
,
125 UnicodeScript_kEthiopic
,
126 UnicodeScript_kEthiopic
}, // 30,
127 { UnicodeScript_kCherokee
,
128 UnicodeScript_kCherokee
,
129 UnicodeScript_kCherokee
}, // 31,
130 { UnicodeScript_kUnifiedCanadianAboriginalSyllabics
,
131 UnicodeScript_kUnifiedCanadianAboriginalSyllabics
,
132 UnicodeScript_kUnifiedCanadianAboriginalSyllabics
}, // 32,
133 { UnicodeScript_kOgham
,
134 UnicodeScript_kOgham
,
135 UnicodeScript_kOgham
}, // 33,
136 { UnicodeScript_kRunic
,
137 UnicodeScript_kRunic
,
138 UnicodeScript_kRunic
}, // 34,
139 { UnicodeScript_kKhmer
,
140 UnicodeScript_kKhmer
,
141 UnicodeScript_kKhmer
}, // 35,
142 { UnicodeScript_kMongolian
,
143 UnicodeScript_kMongolian
,
144 UnicodeScript_kMongolian
}, // 36,
145 { UnicodeScript_kLatinExtendedAdditional
,
146 UnicodeScript_kLatinExtendedAdditional
,
147 UnicodeScript_kLatinExtendedAdditional
}, // 37,
148 { UnicodeScript_kGreekExtended
,
149 UnicodeScript_kGreekExtended
,
150 UnicodeScript_kGreekExtended
}, // 38,
151 { UnicodeScript_kGeneralPunctuation
,
152 UnicodeScript_kGeneralPunctuation
,
153 UnicodeScript_kGeneralPunctuation
}, // 39,
154 { UnicodeScript_kSuperSubScript
,
155 UnicodeScript_kSuperSubScript
,
156 UnicodeScript_kSuperSubScript
}, // 40,
157 { UnicodeScript_kCurrencySymbolScript
,
158 UnicodeScript_kCurrencySymbolScript
,
159 UnicodeScript_kCurrencySymbolScript
}, // 41,
160 { UnicodeScript_kSymbolCombiningMark
,
161 UnicodeScript_kSymbolCombiningMark
,
162 UnicodeScript_kSymbolCombiningMark
}, // 42,
163 { UnicodeScript_kLetterlikeSymbol
,
164 UnicodeScript_kLetterlikeSymbol
,
165 UnicodeScript_kLetterlikeSymbol
}, // 43,
166 { UnicodeScript_kNumberForm
,
167 UnicodeScript_kNumberForm
,
168 UnicodeScript_kNumberForm
}, // 44,
169 { UnicodeScript_kArrow
,
170 UnicodeScript_kArrow
,
171 UnicodeScript_kArrow
}, // 45,
172 { UnicodeScript_kMathOperator
,
173 UnicodeScript_kMathOperator
,
174 UnicodeScript_kMathOperator
}, // 46,
175 { UnicodeScript_kMiscTechnical
,
176 UnicodeScript_kMiscTechnical
,
177 UnicodeScript_kMiscTechnical
}, // 47,
178 { UnicodeScript_kControlPicture
,
179 UnicodeScript_kControlPicture
,
180 UnicodeScript_kControlPicture
}, // 48,
181 { UnicodeScript_kOpticalCharacter
,
182 UnicodeScript_kOpticalCharacter
,
183 UnicodeScript_kOpticalCharacter
}, // 49,
184 { UnicodeScript_kEnclosedAlphanumeric
,
185 UnicodeScript_kEnclosedAlphanumeric
,
186 UnicodeScript_kEnclosedAlphanumeric
}, // 50,
187 { UnicodeScript_kBoxDrawing
,
188 UnicodeScript_kBoxDrawing
,
189 UnicodeScript_kBoxDrawing
}, // 51,
190 { UnicodeScript_kBlockElement
,
191 UnicodeScript_kBlockElement
,
192 UnicodeScript_kBlockElement
}, // 52,
193 { UnicodeScript_kGeometricShape
,
194 UnicodeScript_kGeometricShape
,
195 UnicodeScript_kGeometricShape
}, // 53,
196 { UnicodeScript_kMiscSymbol
,
197 UnicodeScript_kMiscSymbol
,
198 UnicodeScript_kMiscSymbol
}, // 54,
199 { UnicodeScript_kDingbat
,
200 UnicodeScript_kDingbat
,
201 UnicodeScript_kDingbat
}, // 55,
202 { UnicodeScript_kBraillePatterns
,
203 UnicodeScript_kBraillePatterns
,
204 UnicodeScript_kBraillePatterns
}, // 56,
205 { UnicodeScript_kCJKRadicalsSupplement
,
206 UnicodeScript_kCJKRadicalsSupplement
,
207 UnicodeScript_kCJKRadicalsSupplement
}, // 57,
208 { UnicodeScript_kKangxiRadicals
,
209 UnicodeScript_kKangxiRadicals
,
210 UnicodeScript_kKangxiRadicals
}, // 58,
211 { UnicodeScript_kIdeographicDescriptionCharacters
,
212 UnicodeScript_kIdeographicDescriptionCharacters
,
213 UnicodeScript_kIdeographicDescriptionCharacters
}, // 59,
214 { UnicodeScript_kCJKSymbolPunctuation
,
215 UnicodeScript_kCJKSymbolPunctuation
,
216 UnicodeScript_kCJKSymbolPunctuation
}, // 60,
217 { UnicodeScript_kHiragana
,
218 UnicodeScript_kHiragana
,
219 UnicodeScript_kHiragana
}, // 61,
220 { UnicodeScript_kKatakana
,
221 UnicodeScript_kKatakana
,
222 UnicodeScript_kKatakana
}, // 62,
223 { UnicodeScript_kBopomofo
,
224 UnicodeScript_kBopomofo
,
225 UnicodeScript_kBopomofo
}, // 63,
226 { UnicodeScript_kHangulCompatibilityJamo
,
227 UnicodeScript_kHangulCompatibilityJamo
,
228 UnicodeScript_kHangulCompatibilityJamo
}, // 64,
229 { UnicodeScript_kKanbun
,
230 UnicodeScript_kKanbun
,
231 UnicodeScript_kKanbun
}, // 65,
232 { UnicodeScript_kBopomofoExtended
,
233 UnicodeScript_kBopomofoExtended
,
234 UnicodeScript_kBopomofoExtended
}, // 66,
235 { UnicodeScript_kEnclosedCJKLetterMonth
,
236 UnicodeScript_kEnclosedCJKLetterMonth
,
237 UnicodeScript_kEnclosedCJKLetterMonth
}, // 67,
238 { UnicodeScript_kCJKCompatibility
,
239 UnicodeScript_kCJKCompatibility
,
240 UnicodeScript_kCJKCompatibility
}, // 68,
241 { UnicodeScript_k_CJKUnifiedIdeographsExtensionA
,
242 UnicodeScript_k_CJKUnifiedIdeographsExtensionA
,
243 UnicodeScript_k_CJKUnifiedIdeographsExtensionA
}, // 69,
244 { UnicodeScript_kCJKUnifiedIdeograph
,
245 UnicodeScript_kCJKUnifiedIdeograph
,
246 UnicodeScript_kCJKUnifiedIdeograph
}, // 70,
247 { UnicodeScript_kYiSyllables
,
248 UnicodeScript_kYiSyllables
,
249 UnicodeScript_kYiSyllables
}, // 71,
250 { UnicodeScript_kYiRadicals
,
251 UnicodeScript_kYiRadicals
,
252 UnicodeScript_kYiRadicals
}, // 72,
253 { UnicodeScript_kHangulSyllable
,
254 UnicodeScript_kHangulSyllable
,
255 UnicodeScript_kHangulSyllable
}, // 73,
256 { UnicodeScript_kHighSurrogate
,
257 UnicodeScript_kHighSurrogate
,
258 UnicodeScript_kHighSurrogate
}, // 74,
259 { UnicodeScript_kHighPrivateUseSurrogate
,
260 UnicodeScript_kHighPrivateUseSurrogate
,
261 UnicodeScript_kHighPrivateUseSurrogate
}, // 75,
262 { UnicodeScript_kLowSurrogate
,
263 UnicodeScript_kLowSurrogate
,
264 UnicodeScript_kLowSurrogate
}, // 76,
265 { UnicodeScript_kPrivateUse
,
266 UnicodeScript_kPrivateUse
,
267 UnicodeScript_kPrivateUse
}, // 77,
268 { UnicodeScript_kCJKCompatibilityIdeograph
,
269 UnicodeScript_kCJKCompatibilityIdeograph
,
270 UnicodeScript_kCJKCompatibilityIdeograph
}, // 78,
271 { UnicodeScript_kAlphabeticPresentation
,
272 UnicodeScript_kAlphabeticPresentation
,
273 UnicodeScript_kAlphabeticPresentation
}, // 79,
274 { UnicodeScript_kArabicPresentationA
,
275 UnicodeScript_kArabicPresentationA
,
276 UnicodeScript_kArabicPresentationA
}, // 80,
277 { UnicodeScript_kCombiningHalfMark
,
278 UnicodeScript_kCombiningHalfMark
,
279 UnicodeScript_kCombiningHalfMark
}, // 81,
280 { UnicodeScript_kCJKCompatibilityForm
,
281 UnicodeScript_kCJKCompatibilityForm
,
282 UnicodeScript_kCJKCompatibilityForm
}, // 82,
283 { UnicodeScript_kSmallFormVariant
,
284 UnicodeScript_kSmallFormVariant
,
285 UnicodeScript_kSmallFormVariant
}, // 83,
286 { UnicodeScript_kArabicPresentationB
,
287 UnicodeScript_kArabicPresentationB
,
288 UnicodeScript_kArabicPresentationB
}, // 84,
289 { UnicodeScript_kNoScript
,
290 UnicodeScript_kNoScript
,
291 UnicodeScript_kNoScript
}, // 85,
292 { UnicodeScript_kHalfwidthFullwidthForm
,
293 UnicodeScript_kHalfwidthFullwidthForm
,
294 UnicodeScript_kHalfwidthFullwidthForm
}, // 86,
295 { UnicodeScript_kScriptCount
,
296 UnicodeScript_kScriptCount
,
297 UnicodeScript_kNoScript
} // 87,
301 unicode::getUnicodeScriptType( const sal_Unicode ch
, ScriptTypeList
* typeList
, sal_Int16 unknownType
) {
304 typeList
= defaultTypeList
;
305 unknownType
= UnicodeScript_kNoScript
;
308 sal_Int16 i
= 0, type
= typeList
[0].to
;
309 while (type
< UnicodeScript_kScriptCount
&& ch
> UnicodeScriptType
[type
][UnicodeScriptTypeTo
]) {
310 type
= typeList
[++i
].to
;
313 return (type
< UnicodeScript_kScriptCount
&&
314 ch
>= UnicodeScriptType
[typeList
[i
].from
][UnicodeScriptTypeFrom
]) ?
315 typeList
[i
].value
: unknownType
;
319 unicode::getUnicodeScriptStart( UnicodeScript type
) {
320 return UnicodeScriptType
[type
][UnicodeScriptTypeFrom
];
324 unicode::getUnicodeScriptEnd( UnicodeScript type
) {
325 return UnicodeScriptType
[type
][UnicodeScriptTypeTo
];
329 unicode::getUnicodeType( const sal_Unicode ch
) {
330 static sal_Unicode c
= 0x00;
331 static sal_Int16 r
= 0x00;
333 if (ch
== c
) return r
;
336 sal_Int16 address
= UnicodeTypeIndex
[ch
>> 8];
337 return r
= (sal_Int16
)((address
< UnicodeTypeNumberBlock
) ? UnicodeTypeBlockValue
[address
] :
338 UnicodeTypeValue
[((address
- UnicodeTypeNumberBlock
) << 8) + (ch
& 0xff)]);
342 unicode::getUnicodeDirection( const sal_Unicode ch
) {
343 static sal_Unicode c
= 0x00;
344 static sal_uInt8 r
= 0x00;
346 if (ch
== c
) return r
;
349 sal_Int16 address
= UnicodeDirectionIndex
[ch
>> 8];
350 return r
= ((address
< UnicodeDirectionNumberBlock
) ? UnicodeDirectionBlockValue
[address
] :
351 UnicodeDirectionValue
[((address
- UnicodeDirectionNumberBlock
) << 8) + (ch
& 0xff)]);
355 #define bit(name) (1 << name)
357 #define UPPERMASK bit(UnicodeType::UPPERCASE_LETTER)
359 #define LOWERMASK bit(UnicodeType::LOWERCASE_LETTER)
361 #define TITLEMASK bit(UnicodeType::TITLECASE_LETTER)
363 #define DIGITMASK bit(UnicodeType::DECIMAL_DIGIT_NUMBER)|\
364 bit(UnicodeType::LETTER_NUMBER)|\
365 bit(UnicodeType::OTHER_NUMBER)
367 #define ALPHAMASK UPPERMASK|LOWERMASK|TITLEMASK|\
368 bit(UnicodeType::MODIFIER_LETTER)|\
369 bit(UnicodeType::OTHER_LETTER)
371 #define BASEMASK DIGITMASK|ALPHAMASK|\
372 bit(UnicodeType::NON_SPACING_MARK)|\
373 bit(UnicodeType::ENCLOSING_MARK)|\
374 bit(UnicodeType::COMBINING_SPACING_MARK)
376 #define SPACEMASK bit(UnicodeType::SPACE_SEPARATOR)|\
377 bit(UnicodeType::LINE_SEPARATOR)|\
378 bit(UnicodeType::PARAGRAPH_SEPARATOR)
380 #define PUNCTUATIONMASK bit(UnicodeType::DASH_PUNCTUATION)|\
381 bit(UnicodeType::INITIAL_PUNCTUATION)|\
382 bit(UnicodeType::FINAL_PUNCTUATION)|\
383 bit(UnicodeType::CONNECTOR_PUNCTUATION)|\
384 bit(UnicodeType::OTHER_PUNCTUATION)
386 #define SYMBOLMASK bit(UnicodeType::MATH_SYMBOL)|\
387 bit(UnicodeType::CURRENCY_SYMBOL)|\
388 bit(UnicodeType::MODIFIER_SYMBOL)|\
389 bit(UnicodeType::OTHER_SYMBOL)
391 #define PRINTMASK BASEMASK|SPACEMASK|PUNCTUATIONMASK|SYMBOLMASK
393 #define CONTROLMASK bit(UnicodeType::CONTROL)|\
394 bit(UnicodeType::FORMAT)|\
395 bit(UnicodeType::LINE_SEPARATOR)|\
396 bit(UnicodeType::PARAGRAPH_SEPARATOR)
398 #define IsType(func, mask) \
399 sal_Bool SAL_CALL func( const sal_Unicode ch) {\
400 return (bit(getUnicodeType(ch)) & (mask)) != 0;\
403 IsType(unicode::isUpper
, UPPERMASK
)
404 IsType(unicode::isLower
, LOWERMASK
)
405 IsType(unicode::isControl
, CONTROLMASK
)
406 IsType(unicode::isPrint
, PRINTMASK
)
407 IsType(unicode::isAlpha
, ALPHAMASK
)
408 IsType(unicode::isDigit
, DIGITMASK
)
409 IsType(unicode::isAlphaDigit
, ALPHAMASK
|DIGITMASK
)
410 IsType(unicode::isSpace
, SPACEMASK
)
412 #define CONTROLSPACE bit(0x09)|bit(0x0a)|bit(0x0b)|bit(0x0c)|bit(0x0d)|\
413 bit(0x1c)|bit(0x1d)|bit(0x1e)|bit(0x1f)
415 sal_Bool SAL_CALL
unicode::isWhiteSpace( const sal_Unicode ch
) {
416 return (ch
!= 0xa0 && isSpace(ch
)) || (ch
<= 0x1F && (bit(ch
) & (CONTROLSPACE
)));
419 sal_Int16 SAL_CALL
unicode::getScriptClassFromUScriptCode(UScriptCode eScript
)
421 //See unicode/uscript.h
422 static sal_Int16 scriptTypes
[] =
424 ScriptType::WEAK
, ScriptType::WEAK
, ScriptType::COMPLEX
, ScriptType::LATIN
, ScriptType::COMPLEX
,
425 ScriptType::ASIAN
, ScriptType::LATIN
, ScriptType::LATIN
, ScriptType::LATIN
, ScriptType::COMPLEX
,
426 ScriptType::COMPLEX
, ScriptType::COMPLEX
, ScriptType::LATIN
, ScriptType::LATIN
, ScriptType::LATIN
,
428 ScriptType::COMPLEX
, ScriptType::COMPLEX
, ScriptType::ASIAN
, ScriptType::ASIAN
, ScriptType::COMPLEX
,
429 ScriptType::ASIAN
, ScriptType::COMPLEX
, ScriptType::ASIAN
, ScriptType::COMPLEX
, ScriptType::COMPLEX
,
430 ScriptType::LATIN
, ScriptType::COMPLEX
, ScriptType::COMPLEX
, ScriptType::COMPLEX
, ScriptType::LATIN
,
432 ScriptType::LATIN
, ScriptType::COMPLEX
, ScriptType::LATIN
, ScriptType::COMPLEX
, ScriptType::COMPLEX
,
433 ScriptType::COMPLEX
, ScriptType::COMPLEX
, ScriptType::COMPLEX
, ScriptType::COMPLEX
, ScriptType::COMPLEX
,
434 ScriptType::LATIN
, ScriptType::ASIAN
, ScriptType::COMPLEX
, ScriptType::COMPLEX
, ScriptType::COMPLEX
,
436 ScriptType::COMPLEX
, ScriptType::LATIN
, ScriptType::LATIN
, ScriptType::COMPLEX
, ScriptType::COMPLEX
,
437 ScriptType::LATIN
, ScriptType::LATIN
, ScriptType::COMPLEX
, ScriptType::COMPLEX
, ScriptType::LATIN
,
438 ScriptType::COMPLEX
, ScriptType::LATIN
, ScriptType::COMPLEX
, ScriptType::COMPLEX
, ScriptType::COMPLEX
,
440 ScriptType::COMPLEX
, ScriptType::COMPLEX
, ScriptType::COMPLEX
, ScriptType::COMPLEX
, ScriptType::COMPLEX
,
441 ScriptType::COMPLEX
, ScriptType::COMPLEX
, ScriptType::LATIN
, ScriptType::LATIN
, ScriptType::COMPLEX
,
442 ScriptType::COMPLEX
, ScriptType::COMPLEX
, ScriptType::COMPLEX
, ScriptType::ASIAN
, ScriptType::ASIAN
,
444 ScriptType::COMPLEX
, ScriptType::LATIN
, ScriptType::COMPLEX
, ScriptType::COMPLEX
, ScriptType::COMPLEX
,
445 ScriptType::LATIN
, ScriptType::LATIN
, ScriptType::COMPLEX
, ScriptType::COMPLEX
, ScriptType::COMPLEX
,
446 ScriptType::COMPLEX
, ScriptType::COMPLEX
, ScriptType::COMPLEX
, ScriptType::COMPLEX
, ScriptType::COMPLEX
,
448 ScriptType::COMPLEX
, ScriptType::COMPLEX
, ScriptType::COMPLEX
, ScriptType::COMPLEX
, ScriptType::COMPLEX
,
449 ScriptType::COMPLEX
, ScriptType::COMPLEX
, ScriptType::COMPLEX
, ScriptType::COMPLEX
, ScriptType::COMPLEX
,
450 ScriptType::COMPLEX
, ScriptType::COMPLEX
, ScriptType::WEAK
, ScriptType::WEAK
, ScriptType::COMPLEX
,
452 ScriptType::ASIAN
, ScriptType::COMPLEX
, ScriptType::COMPLEX
, ScriptType::COMPLEX
, ScriptType::COMPLEX
,
453 ScriptType::COMPLEX
, ScriptType::COMPLEX
, ScriptType::COMPLEX
, ScriptType::COMPLEX
, ScriptType::COMPLEX
,
454 ScriptType::COMPLEX
, ScriptType::COMPLEX
, ScriptType::COMPLEX
, ScriptType::COMPLEX
, ScriptType::ASIAN
,
456 ScriptType::COMPLEX
, ScriptType::COMPLEX
, ScriptType::COMPLEX
, ScriptType::COMPLEX
, ScriptType::COMPLEX
,
457 ScriptType::COMPLEX
, ScriptType::COMPLEX
, ScriptType::COMPLEX
, ScriptType::WEAK
, ScriptType::WEAK
,
458 ScriptType::COMPLEX
, ScriptType::COMPLEX
, ScriptType::COMPLEX
, ScriptType::COMPLEX
, ScriptType::COMPLEX
,
460 ScriptType::COMPLEX
, ScriptType::COMPLEX
, ScriptType::COMPLEX
, ScriptType::COMPLEX
, ScriptType::COMPLEX
,
461 ScriptType::COMPLEX
, ScriptType::COMPLEX
, ScriptType::COMPLEX
, ScriptType::COMPLEX
, ScriptType::COMPLEX
,
467 if (eScript
< USCRIPT_COMMON
)
468 nRet
= ScriptType::WEAK
;
469 else if (static_cast<size_t>(eScript
) >= SAL_N_ELEMENTS(scriptTypes
))
470 nRet
= ScriptType::COMPLEX
; // anything new is going to be pretty wild
472 nRet
= scriptTypes
[eScript
];
476 OString SAL_CALL
unicode::getExemplerLanguageForUScriptCode(UScriptCode eScript
)
481 case USCRIPT_CODE_LIMIT
:
482 case USCRIPT_INVALID_CODE
:
486 case USCRIPT_INHERITED
:
489 case USCRIPT_MATHEMATICAL_NOTATION
:
490 case USCRIPT_SYMBOLS
:
493 case USCRIPT_UNWRITTEN_LANGUAGES
:
494 case USCRIPT_UNKNOWN
:
497 case USCRIPT_NABATAEAN
: //no language with an assigned code yet
500 case USCRIPT_PALMYRENE
: //no language with an assigned code yet
506 case USCRIPT_ARMENIAN
:
509 case USCRIPT_BENGALI
:
512 case USCRIPT_BOPOMOFO
:
515 case USCRIPT_CHEROKEE
:
521 case USCRIPT_CYRILLIC
:
524 case USCRIPT_DESERET
:
527 case USCRIPT_DEVANAGARI
:
530 case USCRIPT_ETHIOPIC
:
533 case USCRIPT_GEORGIAN
:
542 case USCRIPT_GUJARATI
:
545 case USCRIPT_GURMUKHI
:
557 case USCRIPT_HIRAGANA
:
560 case USCRIPT_KANNADA
:
563 case USCRIPT_KATAKANA
:
575 case USCRIPT_MALAYALAM
:
578 case USCRIPT_MONGOLIAN
:
581 case USCRIPT_MYANMAR
:
587 case USCRIPT_OLD_ITALIC
:
596 case USCRIPT_SINHALA
:
614 case USCRIPT_TIBETAN
:
617 case USCRIPT_CANADIAN_ABORIGINAL
:
623 case USCRIPT_TAGALOG
:
626 case USCRIPT_HANUNOO
:
632 case USCRIPT_TAGBANWA
:
635 case USCRIPT_BRAILLE
:
638 case USCRIPT_CYPRIOT
:
644 case USCRIPT_LINEAR_B
:
647 case USCRIPT_OSMANYA
:
650 case USCRIPT_SHAVIAN
:
656 case USCRIPT_UGARITIC
:
659 case USCRIPT_KATAKANA_OR_HIRAGANA
:
662 case USCRIPT_BUGINESE
:
665 case USCRIPT_GLAGOLITIC
:
668 case USCRIPT_KHAROSHTHI
:
671 case USCRIPT_SYLOTI_NAGRI
:
674 case USCRIPT_NEW_TAI_LUE
:
677 case USCRIPT_TIFINAGH
:
680 case USCRIPT_OLD_PERSIAN
:
683 case USCRIPT_BALINESE
:
689 case USCRIPT_BLISSYMBOLS
:
701 case USCRIPT_OLD_CHURCH_SLAVONIC_CYRILLIC
:
704 case USCRIPT_DEMOTIC_EGYPTIAN
:
705 case USCRIPT_HIERATIC_EGYPTIAN
:
706 case USCRIPT_EGYPTIAN_HIEROGLYPHS
:
709 case USCRIPT_KHUTSURI
:
712 case USCRIPT_SIMPLIFIED_HAN
:
715 case USCRIPT_TRADITIONAL_HAN
:
718 case USCRIPT_PAHAWH_HMONG
:
721 case USCRIPT_OLD_HUNGARIAN
:
724 case USCRIPT_HARAPPAN_INDUS
:
727 case USCRIPT_JAVANESE
:
730 case USCRIPT_KAYAH_LI
:
733 case USCRIPT_LATIN_FRAKTUR
:
736 case USCRIPT_LATIN_GAELIC
:
742 case USCRIPT_LINEAR_A
:
745 case USCRIPT_MANDAIC
:
748 case USCRIPT_MAYAN_HIEROGLYPHS
:
751 case USCRIPT_MEROITIC
:
760 case USCRIPT_OLD_PERMIC
:
763 case USCRIPT_PHAGS_PA
:
766 case USCRIPT_PHOENICIAN
:
769 case USCRIPT_PHONETIC_POLLARD
:
772 case USCRIPT_RONGORONGO
:
778 case USCRIPT_ESTRANGELO_SYRIAC
:
781 case USCRIPT_WESTERN_SYRIAC
:
784 case USCRIPT_EASTERN_SYRIAC
:
787 case USCRIPT_TENGWAR
:
793 case USCRIPT_VISIBLE_SPEECH
:
796 case USCRIPT_CUNEIFORM
:
802 case USCRIPT_JAPANESE
:
814 case USCRIPT_OL_CHIKI
:
820 case USCRIPT_SAURASHTRA
:
823 case USCRIPT_SIGN_WRITING
:
826 case USCRIPT_SUNDANESE
:
832 case USCRIPT_MEITEI_MAYEK
:
835 case USCRIPT_IMPERIAL_ARAMAIC
:
838 case USCRIPT_AVESTAN
:
850 case USCRIPT_MANICHAEAN
:
853 case USCRIPT_INSCRIPTIONAL_PAHLAVI
:
854 case USCRIPT_PSALTER_PAHLAVI
:
855 case USCRIPT_BOOK_PAHLAVI
:
856 case USCRIPT_INSCRIPTIONAL_PARTHIAN
:
859 case USCRIPT_SAMARITAN
:
862 case USCRIPT_TAI_VIET
:
871 case USCRIPT_NAKHI_GEBA
:
874 case USCRIPT_OLD_SOUTH_ARABIAN
:
877 case USCRIPT_BASSA_VAH
:
880 case USCRIPT_DUPLOYAN_SHORTAND
:
883 case USCRIPT_ELBASAN
:
886 case USCRIPT_GRANTHA
:
898 case USCRIPT_MEROITIC_CURSIVE
:
901 case USCRIPT_OLD_NORTH_ARABIAN
:
907 case USCRIPT_WARANG_CITI
:
910 #if (U_ICU_VERSION_MAJOR_NUM > 4) || (U_ICU_VERSION_MAJOR_NUM == 4 && U_ICU_VERSION_MINOR_NUM >= 8)
914 case USCRIPT_JURCHEN
:
920 case USCRIPT_NUSHU
: //no language with an assigned code yet
923 case USCRIPT_SHARADA
:
926 case USCRIPT_SORA_SOMPENG
:
939 #if (U_ICU_VERSION_MAJOR_NUM > 4)
940 case USCRIPT_ANATOLIAN_HIEROGLYPHS
:
946 case USCRIPT_TIRHUTA
:
954 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */