1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
3 * This file is part of the LibreOffice project.
5 * This Source Code Form is subject to the terms of the Mozilla Public
6 * License, v. 2.0. If a copy of the MPL was not distributed with this
7 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
9 * This file incorporates work covered by the following license notice:
11 * Licensed to the Apache Software Foundation (ASF) under one or more
12 * contributor license agreements. See the NOTICE file distributed
13 * with this work for additional information regarding copyright
14 * ownership. The ASF licenses this file to you under the Apache
15 * License, Version 2.0 (the "License"); you may not use this file
16 * except in compliance with the License. You may obtain a copy of
17 * the License at http://www.apache.org/licenses/LICENSE-2.0 .
20 #include <boost/scoped_ptr.hpp>
21 #include <com/sun/star/i18n/UnicodeType.hpp>
22 #include <com/sun/star/i18n/KCharacterType.hpp>
23 #include <com/sun/star/i18n/ScriptType.hpp>
24 #include <i18nlangtag/languagetag.hxx>
25 #include <i18nlangtag/languagetagicu.hxx>
26 #include <i18nutil/unicode.hxx>
27 #include <sal/log.hxx>
28 #include <unicode/numfmt.h>
29 #include "unicode_data.h"
31 // Workaround for glibc braindamage:
32 // glibc 2.4's langinfo.h does "#define CURRENCY_SYMBOL __CURRENCY_SYMBOL"
33 // which (obviously) breaks UnicodeType::CURRENCY_SYMBOL
34 #undef CURRENCY_SYMBOL
36 using namespace ::com::sun::star::i18n
;
38 static const ScriptTypeList defaultTypeList
[] = {
39 { UnicodeScript_kBasicLatin
,
40 UnicodeScript_kBasicLatin
,
41 UnicodeScript_kBasicLatin
}, // 0,
42 { UnicodeScript_kLatin1Supplement
,
43 UnicodeScript_kLatin1Supplement
,
44 UnicodeScript_kLatin1Supplement
},// 1,
45 { UnicodeScript_kLatinExtendedA
,
46 UnicodeScript_kLatinExtendedA
,
47 UnicodeScript_kLatinExtendedA
}, // 2,
48 { UnicodeScript_kLatinExtendedB
,
49 UnicodeScript_kLatinExtendedB
,
50 UnicodeScript_kLatinExtendedB
}, // 3,
51 { UnicodeScript_kIPAExtension
,
52 UnicodeScript_kIPAExtension
,
53 UnicodeScript_kIPAExtension
}, // 4,
54 { UnicodeScript_kSpacingModifier
,
55 UnicodeScript_kSpacingModifier
,
56 UnicodeScript_kSpacingModifier
}, // 5,
57 { UnicodeScript_kCombiningDiacritical
,
58 UnicodeScript_kCombiningDiacritical
,
59 UnicodeScript_kCombiningDiacritical
}, // 6,
60 { UnicodeScript_kGreek
,
62 UnicodeScript_kGreek
}, // 7,
63 { UnicodeScript_kCyrillic
,
64 UnicodeScript_kCyrillic
,
65 UnicodeScript_kCyrillic
}, // 8,
66 { UnicodeScript_kArmenian
,
67 UnicodeScript_kArmenian
,
68 UnicodeScript_kArmenian
}, // 9,
69 { UnicodeScript_kHebrew
,
70 UnicodeScript_kHebrew
,
71 UnicodeScript_kHebrew
}, // 10,
72 { UnicodeScript_kArabic
,
73 UnicodeScript_kArabic
,
74 UnicodeScript_kArabic
}, // 11,
75 { UnicodeScript_kSyriac
,
76 UnicodeScript_kSyriac
,
77 UnicodeScript_kSyriac
}, // 12,
78 { UnicodeScript_kThaana
,
79 UnicodeScript_kThaana
,
80 UnicodeScript_kThaana
}, // 13,
81 { UnicodeScript_kDevanagari
,
82 UnicodeScript_kDevanagari
,
83 UnicodeScript_kDevanagari
}, // 14,
84 { UnicodeScript_kBengali
,
85 UnicodeScript_kBengali
,
86 UnicodeScript_kBengali
}, // 15,
87 { UnicodeScript_kGurmukhi
,
88 UnicodeScript_kGurmukhi
,
89 UnicodeScript_kGurmukhi
}, // 16,
90 { UnicodeScript_kGujarati
,
91 UnicodeScript_kGujarati
,
92 UnicodeScript_kGujarati
}, // 17,
93 { UnicodeScript_kOriya
,
95 UnicodeScript_kOriya
}, // 18,
96 { UnicodeScript_kTamil
,
98 UnicodeScript_kTamil
}, // 19,
99 { UnicodeScript_kTelugu
,
100 UnicodeScript_kTelugu
,
101 UnicodeScript_kTelugu
}, // 20,
102 { UnicodeScript_kKannada
,
103 UnicodeScript_kKannada
,
104 UnicodeScript_kKannada
}, // 21,
105 { UnicodeScript_kMalayalam
,
106 UnicodeScript_kMalayalam
,
107 UnicodeScript_kMalayalam
}, // 22,
108 { UnicodeScript_kSinhala
,
109 UnicodeScript_kSinhala
,
110 UnicodeScript_kSinhala
}, // 23,
111 { UnicodeScript_kThai
,
113 UnicodeScript_kThai
}, // 24,
114 { UnicodeScript_kLao
,
116 UnicodeScript_kLao
}, // 25,
117 { UnicodeScript_kTibetan
,
118 UnicodeScript_kTibetan
,
119 UnicodeScript_kTibetan
}, // 26,
120 { UnicodeScript_kMyanmar
,
121 UnicodeScript_kMyanmar
,
122 UnicodeScript_kMyanmar
}, // 27,
123 { UnicodeScript_kGeorgian
,
124 UnicodeScript_kGeorgian
,
125 UnicodeScript_kGeorgian
}, // 28,
126 { UnicodeScript_kHangulJamo
,
127 UnicodeScript_kHangulJamo
,
128 UnicodeScript_kHangulJamo
}, // 29,
129 { UnicodeScript_kEthiopic
,
130 UnicodeScript_kEthiopic
,
131 UnicodeScript_kEthiopic
}, // 30,
132 { UnicodeScript_kCherokee
,
133 UnicodeScript_kCherokee
,
134 UnicodeScript_kCherokee
}, // 31,
135 { UnicodeScript_kUnifiedCanadianAboriginalSyllabics
,
136 UnicodeScript_kUnifiedCanadianAboriginalSyllabics
,
137 UnicodeScript_kUnifiedCanadianAboriginalSyllabics
}, // 32,
138 { UnicodeScript_kOgham
,
139 UnicodeScript_kOgham
,
140 UnicodeScript_kOgham
}, // 33,
141 { UnicodeScript_kRunic
,
142 UnicodeScript_kRunic
,
143 UnicodeScript_kRunic
}, // 34,
144 { UnicodeScript_kKhmer
,
145 UnicodeScript_kKhmer
,
146 UnicodeScript_kKhmer
}, // 35,
147 { UnicodeScript_kMongolian
,
148 UnicodeScript_kMongolian
,
149 UnicodeScript_kMongolian
}, // 36,
150 { UnicodeScript_kLatinExtendedAdditional
,
151 UnicodeScript_kLatinExtendedAdditional
,
152 UnicodeScript_kLatinExtendedAdditional
}, // 37,
153 { UnicodeScript_kGreekExtended
,
154 UnicodeScript_kGreekExtended
,
155 UnicodeScript_kGreekExtended
}, // 38,
156 { UnicodeScript_kGeneralPunctuation
,
157 UnicodeScript_kGeneralPunctuation
,
158 UnicodeScript_kGeneralPunctuation
}, // 39,
159 { UnicodeScript_kSuperSubScript
,
160 UnicodeScript_kSuperSubScript
,
161 UnicodeScript_kSuperSubScript
}, // 40,
162 { UnicodeScript_kCurrencySymbolScript
,
163 UnicodeScript_kCurrencySymbolScript
,
164 UnicodeScript_kCurrencySymbolScript
}, // 41,
165 { UnicodeScript_kSymbolCombiningMark
,
166 UnicodeScript_kSymbolCombiningMark
,
167 UnicodeScript_kSymbolCombiningMark
}, // 42,
168 { UnicodeScript_kLetterlikeSymbol
,
169 UnicodeScript_kLetterlikeSymbol
,
170 UnicodeScript_kLetterlikeSymbol
}, // 43,
171 { UnicodeScript_kNumberForm
,
172 UnicodeScript_kNumberForm
,
173 UnicodeScript_kNumberForm
}, // 44,
174 { UnicodeScript_kArrow
,
175 UnicodeScript_kArrow
,
176 UnicodeScript_kArrow
}, // 45,
177 { UnicodeScript_kMathOperator
,
178 UnicodeScript_kMathOperator
,
179 UnicodeScript_kMathOperator
}, // 46,
180 { UnicodeScript_kMiscTechnical
,
181 UnicodeScript_kMiscTechnical
,
182 UnicodeScript_kMiscTechnical
}, // 47,
183 { UnicodeScript_kControlPicture
,
184 UnicodeScript_kControlPicture
,
185 UnicodeScript_kControlPicture
}, // 48,
186 { UnicodeScript_kOpticalCharacter
,
187 UnicodeScript_kOpticalCharacter
,
188 UnicodeScript_kOpticalCharacter
}, // 49,
189 { UnicodeScript_kEnclosedAlphanumeric
,
190 UnicodeScript_kEnclosedAlphanumeric
,
191 UnicodeScript_kEnclosedAlphanumeric
}, // 50,
192 { UnicodeScript_kBoxDrawing
,
193 UnicodeScript_kBoxDrawing
,
194 UnicodeScript_kBoxDrawing
}, // 51,
195 { UnicodeScript_kBlockElement
,
196 UnicodeScript_kBlockElement
,
197 UnicodeScript_kBlockElement
}, // 52,
198 { UnicodeScript_kGeometricShape
,
199 UnicodeScript_kGeometricShape
,
200 UnicodeScript_kGeometricShape
}, // 53,
201 { UnicodeScript_kMiscSymbol
,
202 UnicodeScript_kMiscSymbol
,
203 UnicodeScript_kMiscSymbol
}, // 54,
204 { UnicodeScript_kDingbat
,
205 UnicodeScript_kDingbat
,
206 UnicodeScript_kDingbat
}, // 55,
207 { UnicodeScript_kBraillePatterns
,
208 UnicodeScript_kBraillePatterns
,
209 UnicodeScript_kBraillePatterns
}, // 56,
210 { UnicodeScript_kCJKRadicalsSupplement
,
211 UnicodeScript_kCJKRadicalsSupplement
,
212 UnicodeScript_kCJKRadicalsSupplement
}, // 57,
213 { UnicodeScript_kKangxiRadicals
,
214 UnicodeScript_kKangxiRadicals
,
215 UnicodeScript_kKangxiRadicals
}, // 58,
216 { UnicodeScript_kIdeographicDescriptionCharacters
,
217 UnicodeScript_kIdeographicDescriptionCharacters
,
218 UnicodeScript_kIdeographicDescriptionCharacters
}, // 59,
219 { UnicodeScript_kCJKSymbolPunctuation
,
220 UnicodeScript_kCJKSymbolPunctuation
,
221 UnicodeScript_kCJKSymbolPunctuation
}, // 60,
222 { UnicodeScript_kHiragana
,
223 UnicodeScript_kHiragana
,
224 UnicodeScript_kHiragana
}, // 61,
225 { UnicodeScript_kKatakana
,
226 UnicodeScript_kKatakana
,
227 UnicodeScript_kKatakana
}, // 62,
228 { UnicodeScript_kBopomofo
,
229 UnicodeScript_kBopomofo
,
230 UnicodeScript_kBopomofo
}, // 63,
231 { UnicodeScript_kHangulCompatibilityJamo
,
232 UnicodeScript_kHangulCompatibilityJamo
,
233 UnicodeScript_kHangulCompatibilityJamo
}, // 64,
234 { UnicodeScript_kKanbun
,
235 UnicodeScript_kKanbun
,
236 UnicodeScript_kKanbun
}, // 65,
237 { UnicodeScript_kBopomofoExtended
,
238 UnicodeScript_kBopomofoExtended
,
239 UnicodeScript_kBopomofoExtended
}, // 66,
240 { UnicodeScript_kEnclosedCJKLetterMonth
,
241 UnicodeScript_kEnclosedCJKLetterMonth
,
242 UnicodeScript_kEnclosedCJKLetterMonth
}, // 67,
243 { UnicodeScript_kCJKCompatibility
,
244 UnicodeScript_kCJKCompatibility
,
245 UnicodeScript_kCJKCompatibility
}, // 68,
246 { UnicodeScript_k_CJKUnifiedIdeographsExtensionA
,
247 UnicodeScript_k_CJKUnifiedIdeographsExtensionA
,
248 UnicodeScript_k_CJKUnifiedIdeographsExtensionA
}, // 69,
249 { UnicodeScript_kCJKUnifiedIdeograph
,
250 UnicodeScript_kCJKUnifiedIdeograph
,
251 UnicodeScript_kCJKUnifiedIdeograph
}, // 70,
252 { UnicodeScript_kYiSyllables
,
253 UnicodeScript_kYiSyllables
,
254 UnicodeScript_kYiSyllables
}, // 71,
255 { UnicodeScript_kYiRadicals
,
256 UnicodeScript_kYiRadicals
,
257 UnicodeScript_kYiRadicals
}, // 72,
258 { UnicodeScript_kHangulSyllable
,
259 UnicodeScript_kHangulSyllable
,
260 UnicodeScript_kHangulSyllable
}, // 73,
261 { UnicodeScript_kHighSurrogate
,
262 UnicodeScript_kHighSurrogate
,
263 UnicodeScript_kHighSurrogate
}, // 74,
264 { UnicodeScript_kHighPrivateUseSurrogate
,
265 UnicodeScript_kHighPrivateUseSurrogate
,
266 UnicodeScript_kHighPrivateUseSurrogate
}, // 75,
267 { UnicodeScript_kLowSurrogate
,
268 UnicodeScript_kLowSurrogate
,
269 UnicodeScript_kLowSurrogate
}, // 76,
270 { UnicodeScript_kPrivateUse
,
271 UnicodeScript_kPrivateUse
,
272 UnicodeScript_kPrivateUse
}, // 77,
273 { UnicodeScript_kCJKCompatibilityIdeograph
,
274 UnicodeScript_kCJKCompatibilityIdeograph
,
275 UnicodeScript_kCJKCompatibilityIdeograph
}, // 78,
276 { UnicodeScript_kAlphabeticPresentation
,
277 UnicodeScript_kAlphabeticPresentation
,
278 UnicodeScript_kAlphabeticPresentation
}, // 79,
279 { UnicodeScript_kArabicPresentationA
,
280 UnicodeScript_kArabicPresentationA
,
281 UnicodeScript_kArabicPresentationA
}, // 80,
282 { UnicodeScript_kCombiningHalfMark
,
283 UnicodeScript_kCombiningHalfMark
,
284 UnicodeScript_kCombiningHalfMark
}, // 81,
285 { UnicodeScript_kCJKCompatibilityForm
,
286 UnicodeScript_kCJKCompatibilityForm
,
287 UnicodeScript_kCJKCompatibilityForm
}, // 82,
288 { UnicodeScript_kSmallFormVariant
,
289 UnicodeScript_kSmallFormVariant
,
290 UnicodeScript_kSmallFormVariant
}, // 83,
291 { UnicodeScript_kArabicPresentationB
,
292 UnicodeScript_kArabicPresentationB
,
293 UnicodeScript_kArabicPresentationB
}, // 84,
294 { UnicodeScript_kNoScript
,
295 UnicodeScript_kNoScript
,
296 UnicodeScript_kNoScript
}, // 85,
297 { UnicodeScript_kHalfwidthFullwidthForm
,
298 UnicodeScript_kHalfwidthFullwidthForm
,
299 UnicodeScript_kHalfwidthFullwidthForm
}, // 86,
300 { UnicodeScript_kScriptCount
,
301 UnicodeScript_kScriptCount
,
302 UnicodeScript_kNoScript
} // 87,
306 unicode::getUnicodeScriptType( const sal_Unicode ch
, const ScriptTypeList
* typeList
, sal_Int16 unknownType
) {
309 typeList
= defaultTypeList
;
310 unknownType
= UnicodeScript_kNoScript
;
313 sal_Int16 i
= 0, type
= typeList
[0].to
;
314 while (type
< UnicodeScript_kScriptCount
&& ch
> UnicodeScriptType
[type
][UnicodeScriptTypeTo
]) {
315 type
= typeList
[++i
].to
;
318 return (type
< UnicodeScript_kScriptCount
&&
319 ch
>= UnicodeScriptType
[typeList
[i
].from
][UnicodeScriptTypeFrom
]) ?
320 typeList
[i
].value
: unknownType
;
324 unicode::getUnicodeScriptStart( UnicodeScript type
) {
325 return UnicodeScriptType
[type
][UnicodeScriptTypeFrom
];
329 unicode::getUnicodeScriptEnd( UnicodeScript type
) {
330 return UnicodeScriptType
[type
][UnicodeScriptTypeTo
];
334 unicode::getUnicodeType( const sal_Unicode ch
) {
335 static sal_Unicode c
= 0x00;
336 static sal_Int16 r
= 0x00;
338 if (ch
== c
) return r
;
341 sal_Int16 address
= UnicodeTypeIndex
[ch
>> 8];
342 return r
= (sal_Int16
)((address
< UnicodeTypeNumberBlock
) ? UnicodeTypeBlockValue
[address
] :
343 UnicodeTypeValue
[((address
- UnicodeTypeNumberBlock
) << 8) + (ch
& 0xff)]);
347 unicode::getUnicodeDirection( const sal_Unicode ch
) {
348 static sal_Unicode c
= 0x00;
349 static sal_uInt8 r
= 0x00;
351 if (ch
== c
) return r
;
354 sal_Int16 address
= UnicodeDirectionIndex
[ch
>> 8];
355 return r
= ((address
< UnicodeDirectionNumberBlock
) ? UnicodeDirectionBlockValue
[address
] :
356 UnicodeDirectionValue
[((address
- UnicodeDirectionNumberBlock
) << 8) + (ch
& 0xff)]);
360 #define bit(name) (1U << name)
362 #define UPPERMASK bit(UnicodeType::UPPERCASE_LETTER)
364 #define LOWERMASK bit(UnicodeType::LOWERCASE_LETTER)
366 #define TITLEMASK bit(UnicodeType::TITLECASE_LETTER)
368 #define ALPHAMASK UPPERMASK|LOWERMASK|TITLEMASK|\
369 bit(UnicodeType::MODIFIER_LETTER)|\
370 bit(UnicodeType::OTHER_LETTER)
372 #define SPACEMASK bit(UnicodeType::SPACE_SEPARATOR)|\
373 bit(UnicodeType::LINE_SEPARATOR)|\
374 bit(UnicodeType::PARAGRAPH_SEPARATOR)
376 #define CONTROLMASK bit(UnicodeType::CONTROL)|\
377 bit(UnicodeType::FORMAT)|\
378 bit(UnicodeType::LINE_SEPARATOR)|\
379 bit(UnicodeType::PARAGRAPH_SEPARATOR)
381 #define IsType(func, mask) \
382 bool SAL_CALL func( const sal_Unicode ch) {\
383 return (bit(getUnicodeType(ch)) & (mask)) != 0;\
386 IsType(unicode::isControl
, CONTROLMASK
)
387 IsType(unicode::isAlpha
, ALPHAMASK
)
388 IsType(unicode::isSpace
, SPACEMASK
)
390 #define CONTROLSPACE bit(0x09)|bit(0x0a)|bit(0x0b)|bit(0x0c)|bit(0x0d)|\
391 bit(0x1c)|bit(0x1d)|bit(0x1e)|bit(0x1f)
393 bool SAL_CALL
unicode::isWhiteSpace( const sal_Unicode ch
) {
394 return (ch
!= 0xa0 && isSpace(ch
)) || (ch
<= 0x1F && (bit(ch
) & (CONTROLSPACE
)));
397 sal_Int16 SAL_CALL
unicode::getScriptClassFromUScriptCode(UScriptCode eScript
)
399 //See unicode/uscript.h
400 static const sal_Int16 scriptTypes
[] =
402 ScriptType::WEAK
, ScriptType::WEAK
, ScriptType::COMPLEX
, ScriptType::LATIN
, ScriptType::COMPLEX
,
403 ScriptType::ASIAN
, ScriptType::LATIN
, ScriptType::LATIN
, ScriptType::LATIN
, ScriptType::COMPLEX
,
404 ScriptType::COMPLEX
, ScriptType::COMPLEX
, ScriptType::LATIN
, ScriptType::LATIN
, ScriptType::LATIN
,
406 ScriptType::COMPLEX
, ScriptType::COMPLEX
, ScriptType::ASIAN
, ScriptType::ASIAN
, ScriptType::COMPLEX
,
407 ScriptType::ASIAN
, ScriptType::COMPLEX
, ScriptType::ASIAN
, ScriptType::COMPLEX
, ScriptType::COMPLEX
,
408 ScriptType::LATIN
, ScriptType::COMPLEX
, ScriptType::COMPLEX
, ScriptType::COMPLEX
, ScriptType::LATIN
,
410 ScriptType::LATIN
, ScriptType::COMPLEX
, ScriptType::LATIN
, ScriptType::COMPLEX
, ScriptType::COMPLEX
,
411 ScriptType::COMPLEX
, ScriptType::COMPLEX
, ScriptType::COMPLEX
, ScriptType::COMPLEX
, ScriptType::COMPLEX
,
412 ScriptType::LATIN
, ScriptType::ASIAN
, ScriptType::COMPLEX
, ScriptType::COMPLEX
, ScriptType::COMPLEX
,
414 ScriptType::COMPLEX
, ScriptType::LATIN
, ScriptType::LATIN
, ScriptType::COMPLEX
, ScriptType::COMPLEX
,
415 ScriptType::LATIN
, ScriptType::LATIN
, ScriptType::COMPLEX
, ScriptType::COMPLEX
, ScriptType::LATIN
,
416 ScriptType::COMPLEX
, ScriptType::LATIN
, ScriptType::COMPLEX
, ScriptType::COMPLEX
, ScriptType::COMPLEX
,
418 ScriptType::COMPLEX
, ScriptType::COMPLEX
, ScriptType::COMPLEX
, ScriptType::COMPLEX
, ScriptType::COMPLEX
,
419 ScriptType::COMPLEX
, ScriptType::COMPLEX
, ScriptType::LATIN
, ScriptType::LATIN
, ScriptType::COMPLEX
,
420 ScriptType::COMPLEX
, ScriptType::COMPLEX
, ScriptType::COMPLEX
, ScriptType::ASIAN
, ScriptType::ASIAN
,
422 ScriptType::COMPLEX
, ScriptType::LATIN
, ScriptType::COMPLEX
, ScriptType::COMPLEX
, ScriptType::COMPLEX
,
423 ScriptType::LATIN
, ScriptType::LATIN
, ScriptType::COMPLEX
, ScriptType::COMPLEX
, ScriptType::COMPLEX
,
424 ScriptType::COMPLEX
, ScriptType::COMPLEX
, ScriptType::COMPLEX
, ScriptType::COMPLEX
, ScriptType::COMPLEX
,
426 ScriptType::COMPLEX
, ScriptType::COMPLEX
, ScriptType::COMPLEX
, ScriptType::COMPLEX
, ScriptType::COMPLEX
,
427 ScriptType::COMPLEX
, ScriptType::COMPLEX
, ScriptType::COMPLEX
, ScriptType::COMPLEX
, ScriptType::COMPLEX
,
428 ScriptType::COMPLEX
, ScriptType::COMPLEX
, ScriptType::WEAK
, ScriptType::WEAK
, ScriptType::COMPLEX
,
430 ScriptType::ASIAN
, ScriptType::COMPLEX
, ScriptType::COMPLEX
, ScriptType::COMPLEX
, ScriptType::COMPLEX
,
431 ScriptType::COMPLEX
, ScriptType::COMPLEX
, ScriptType::COMPLEX
, ScriptType::COMPLEX
, ScriptType::COMPLEX
,
432 ScriptType::COMPLEX
, ScriptType::COMPLEX
, ScriptType::COMPLEX
, ScriptType::COMPLEX
, ScriptType::ASIAN
,
434 ScriptType::COMPLEX
, ScriptType::COMPLEX
, ScriptType::COMPLEX
, ScriptType::COMPLEX
, ScriptType::COMPLEX
,
435 ScriptType::COMPLEX
, ScriptType::COMPLEX
, ScriptType::COMPLEX
, ScriptType::WEAK
, ScriptType::WEAK
,
436 ScriptType::COMPLEX
, ScriptType::COMPLEX
, ScriptType::COMPLEX
, ScriptType::COMPLEX
, ScriptType::COMPLEX
,
438 ScriptType::COMPLEX
, ScriptType::COMPLEX
, ScriptType::COMPLEX
, ScriptType::COMPLEX
, ScriptType::COMPLEX
,
439 ScriptType::COMPLEX
, ScriptType::COMPLEX
, ScriptType::COMPLEX
, ScriptType::COMPLEX
, ScriptType::COMPLEX
,
445 if (eScript
< USCRIPT_COMMON
)
446 nRet
= ScriptType::WEAK
;
447 else if (static_cast<size_t>(eScript
) >= SAL_N_ELEMENTS(scriptTypes
))
448 nRet
= ScriptType::COMPLEX
; // anything new is going to be pretty wild
450 nRet
= scriptTypes
[eScript
];
454 OString SAL_CALL
unicode::getExemplarLanguageForUScriptCode(UScriptCode eScript
)
459 case USCRIPT_CODE_LIMIT
:
460 case USCRIPT_INVALID_CODE
:
464 case USCRIPT_INHERITED
:
467 case USCRIPT_MATHEMATICAL_NOTATION
:
468 case USCRIPT_SYMBOLS
:
471 case USCRIPT_UNWRITTEN_LANGUAGES
:
472 case USCRIPT_UNKNOWN
:
478 case USCRIPT_ARMENIAN
:
481 case USCRIPT_BENGALI
:
484 case USCRIPT_BOPOMOFO
:
487 case USCRIPT_CHEROKEE
:
493 case USCRIPT_CYRILLIC
:
496 case USCRIPT_DESERET
:
499 case USCRIPT_DEVANAGARI
:
502 case USCRIPT_ETHIOPIC
:
505 case USCRIPT_GEORGIAN
:
514 case USCRIPT_GUJARATI
:
517 case USCRIPT_GURMUKHI
:
529 case USCRIPT_HIRAGANA
:
532 case USCRIPT_KANNADA
:
535 case USCRIPT_KATAKANA
:
547 case USCRIPT_MALAYALAM
:
550 case USCRIPT_MONGOLIAN
:
553 case USCRIPT_MYANMAR
:
559 case USCRIPT_OLD_ITALIC
:
568 case USCRIPT_SINHALA
:
586 case USCRIPT_TIBETAN
:
589 case USCRIPT_CANADIAN_ABORIGINAL
:
595 case USCRIPT_TAGALOG
:
598 case USCRIPT_HANUNOO
:
604 case USCRIPT_TAGBANWA
:
607 case USCRIPT_BRAILLE
:
610 case USCRIPT_CYPRIOT
:
616 case USCRIPT_LINEAR_B
:
619 case USCRIPT_OSMANYA
:
622 case USCRIPT_SHAVIAN
:
628 case USCRIPT_UGARITIC
:
631 case USCRIPT_KATAKANA_OR_HIRAGANA
:
634 case USCRIPT_BUGINESE
:
637 case USCRIPT_GLAGOLITIC
:
640 case USCRIPT_KHAROSHTHI
:
643 case USCRIPT_SYLOTI_NAGRI
:
646 case USCRIPT_NEW_TAI_LUE
:
649 case USCRIPT_TIFINAGH
:
652 case USCRIPT_OLD_PERSIAN
:
655 case USCRIPT_BALINESE
:
661 case USCRIPT_BLISSYMBOLS
:
673 case USCRIPT_OLD_CHURCH_SLAVONIC_CYRILLIC
:
676 case USCRIPT_DEMOTIC_EGYPTIAN
:
677 case USCRIPT_HIERATIC_EGYPTIAN
:
678 case USCRIPT_EGYPTIAN_HIEROGLYPHS
:
681 case USCRIPT_KHUTSURI
:
684 case USCRIPT_SIMPLIFIED_HAN
:
687 case USCRIPT_TRADITIONAL_HAN
:
690 case USCRIPT_PAHAWH_HMONG
:
693 case USCRIPT_OLD_HUNGARIAN
:
696 case USCRIPT_HARAPPAN_INDUS
:
699 case USCRIPT_JAVANESE
:
702 case USCRIPT_KAYAH_LI
:
705 case USCRIPT_LATIN_FRAKTUR
:
708 case USCRIPT_LATIN_GAELIC
:
714 case USCRIPT_LINEAR_A
:
717 case USCRIPT_MAYAN_HIEROGLYPHS
:
720 case USCRIPT_MEROITIC
:
729 case USCRIPT_OLD_PERMIC
:
732 case USCRIPT_PHAGS_PA
:
735 case USCRIPT_PHOENICIAN
:
738 case USCRIPT_PHONETIC_POLLARD
:
741 case USCRIPT_RONGORONGO
:
747 case USCRIPT_ESTRANGELO_SYRIAC
:
750 case USCRIPT_WESTERN_SYRIAC
:
753 case USCRIPT_EASTERN_SYRIAC
:
756 case USCRIPT_TENGWAR
:
762 case USCRIPT_VISIBLE_SPEECH
:
765 case USCRIPT_CUNEIFORM
:
771 case USCRIPT_JAPANESE
:
783 case USCRIPT_OL_CHIKI
:
789 case USCRIPT_SAURASHTRA
:
792 case USCRIPT_SIGN_WRITING
:
795 case USCRIPT_SUNDANESE
:
801 case USCRIPT_MEITEI_MAYEK
:
804 case USCRIPT_IMPERIAL_ARAMAIC
:
807 case USCRIPT_AVESTAN
:
819 case USCRIPT_MANICHAEAN
:
822 case USCRIPT_INSCRIPTIONAL_PAHLAVI
:
823 case USCRIPT_PSALTER_PAHLAVI
:
824 case USCRIPT_BOOK_PAHLAVI
:
825 case USCRIPT_INSCRIPTIONAL_PARTHIAN
:
828 case USCRIPT_SAMARITAN
:
831 case USCRIPT_TAI_VIET
:
834 case USCRIPT_MANDAEAN
: /* Aliased to USCRIPT_MANDAIC in icu 4.6. */
837 #if (U_ICU_VERSION_MAJOR_NUM > 4) || (U_ICU_VERSION_MAJOR_NUM == 4 && U_ICU_VERSION_MINOR_NUM >= 4)
838 case USCRIPT_NABATAEAN
: //no language with an assigned code yet
841 case USCRIPT_PALMYRENE
: //no language with an assigned code yet
850 case USCRIPT_NAKHI_GEBA
:
853 case USCRIPT_OLD_SOUTH_ARABIAN
:
856 case USCRIPT_BASSA_VAH
:
859 case USCRIPT_DUPLOYAN_SHORTAND
:
862 case USCRIPT_ELBASAN
:
865 case USCRIPT_GRANTHA
:
877 case USCRIPT_MEROITIC_CURSIVE
:
880 case USCRIPT_OLD_NORTH_ARABIAN
:
886 case USCRIPT_WARANG_CITI
:
890 #if (U_ICU_VERSION_MAJOR_NUM > 4) || (U_ICU_VERSION_MAJOR_NUM == 4 && U_ICU_VERSION_MINOR_NUM >= 8)
894 case USCRIPT_JURCHEN
:
900 case USCRIPT_NUSHU
: //no language with an assigned code yet
903 case USCRIPT_SHARADA
:
906 case USCRIPT_SORA_SOMPENG
:
919 #if (U_ICU_VERSION_MAJOR_NUM >= 49)
920 case USCRIPT_ANATOLIAN_HIEROGLYPHS
:
926 case USCRIPT_TIRHUTA
:
930 #if (U_ICU_VERSION_MAJOR_NUM >= 52)
931 case USCRIPT_CAUCASIAN_ALBANIAN
:
934 case USCRIPT_MAHAJANI
:
938 #if (U_ICU_VERSION_MAJOR_NUM >= 54)
948 case USCRIPT_MULTANI
:
951 case USCRIPT_PAU_CIN_HAU
:
954 case USCRIPT_SIDDHAM
:
962 //Format a number as a percentage according to the rules of the given
963 //language, e.g. 100 -> "100%" for en-US vs "100 %" for de-DE
964 OUString SAL_CALL
unicode::formatPercent(double dNumber
,
965 const LanguageTag
&rLangTag
)
967 // get a currency formatter for this locale ID
968 UErrorCode errorCode
=U_ZERO_ERROR
;
970 LanguageTag
aLangTag(rLangTag
);
972 // As of CLDR Version 24 these languages were not listed as using spacing
973 // between number and % but are reported as such by our l10n groups
974 // http://www.unicode.org/cldr/charts/24/by_type/numbers.number_formatting_patterns.html
975 // so format using French which has the desired rules
976 if (aLangTag
.getLanguage() == "es" || aLangTag
.getLanguage() == "sl")
977 aLangTag
= LanguageTag("fr-FR");
979 icu::Locale aLocale
= LanguageTagIcu::getIcuLocale(aLangTag
);
981 boost::scoped_ptr
<NumberFormat
> xF(
982 NumberFormat::createPercentInstance(aLocale
, errorCode
));
983 if(U_FAILURE(errorCode
))
985 SAL_WARN("i18n", "NumberFormat::createPercentInstance failed");
986 return OUString::number(dNumber
) + "%";
989 UnicodeString output
;
990 xF
->format(dNumber
/100, output
);
991 OUString
aRet(reinterpret_cast<const sal_Unicode
*>(output
.getBuffer()),
993 if (rLangTag
.getLanguage() == "de")
995 //narrow no-break space instead of (normal) no-break space
996 return aRet
.replace(0x00A0, 0x202F);
1001 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */