Version 6.4.0.0.beta1, tag libreoffice-6.4.0.0.beta1
[LibreOffice.git] / i18nlangtag / source / isolang / mslangid.cxx
blob0d76ba3b8288c07f91591f5192b249f10776a559
1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2 /*
3 * This file is part of the LibreOffice project.
5 * This Source Code Form is subject to the terms of the Mozilla Public
6 * License, v. 2.0. If a copy of the MPL was not distributed with this
7 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
9 * This file incorporates work covered by the following license notice:
11 * Licensed to the Apache Software Foundation (ASF) under one or more
12 * contributor license agreements. See the NOTICE file distributed
13 * with this work for additional information regarding copyright
14 * ownership. The ASF licenses this file to you under the Apache
15 * License, Version 2.0 (the "License"); you may not use this file
16 * except in compliance with the License. You may obtain a copy of
17 * the License at http://www.apache.org/licenses/LICENSE-2.0 .
20 #include <sal/config.h>
21 #include <rtl/ustring.hxx>
22 #include <com/sun/star/i18n/ScriptType.hpp>
24 #include <i18nlangtag/mslangid.hxx>
26 // Only very limited few functions that are guaranteed to not be called from
27 // LanguageTag may use LanguageTag ...
28 #include <i18nlangtag/languagetag.hxx>
31 LanguageType MsLangId::nConfiguredSystemLanguage = LANGUAGE_SYSTEM;
32 LanguageType MsLangId::nConfiguredSystemUILanguage = LANGUAGE_SYSTEM;
34 LanguageType MsLangId::nConfiguredWesternFallback = LANGUAGE_SYSTEM;
35 LanguageType MsLangId::nConfiguredAsianFallback = LANGUAGE_SYSTEM;
36 LanguageType MsLangId::nConfiguredComplexFallback = LANGUAGE_SYSTEM;
38 // static
39 void MsLangId::LanguageTagAccess::setConfiguredSystemLanguage( LanguageType nLang )
41 nConfiguredSystemLanguage = nLang;
45 // static
46 void MsLangId::setConfiguredSystemUILanguage( LanguageType nLang )
48 nConfiguredSystemUILanguage = nLang;
51 // static
52 void MsLangId::setConfiguredWesternFallback( LanguageType nLang )
54 nConfiguredWesternFallback = nLang;
57 // static
58 void MsLangId::setConfiguredAsianFallback( LanguageType nLang )
60 nConfiguredAsianFallback = nLang;
63 // static
64 void MsLangId::setConfiguredComplexFallback( LanguageType nLang )
66 nConfiguredComplexFallback = nLang;
69 // static
70 inline LanguageType MsLangId::simplifySystemLanguages( LanguageType nLang )
72 if (nLang.anyOf( LANGUAGE_PROCESS_OR_USER_DEFAULT,
73 LANGUAGE_SYSTEM_DEFAULT,
74 LANGUAGE_SYSTEM))
75 nLang = LANGUAGE_SYSTEM;
76 return nLang;
79 // static
80 LanguageType MsLangId::getRealLanguage( LanguageType nLang )
82 LanguageType simplifyLang = simplifySystemLanguages( nLang);
83 if (simplifyLang == LANGUAGE_SYSTEM )
85 if (nConfiguredSystemLanguage == LANGUAGE_SYSTEM)
86 nLang = getSystemLanguage();
87 else
88 nLang = nConfiguredSystemLanguage;
90 else if (simplifyLang == LANGUAGE_HID_HUMAN_INTERFACE_DEVICE)
92 if (nConfiguredSystemUILanguage == LANGUAGE_SYSTEM)
93 nLang = getSystemUILanguage();
94 else
95 nLang = nConfiguredSystemUILanguage;
97 else
99 /* TODO: would this be useful here? */
100 //nLang = MsLangId::getReplacementForObsoleteLanguage( nLang);
101 ; // nothing
103 if (nLang == LANGUAGE_DONTKNOW)
104 nLang = LANGUAGE_ENGLISH_US;
105 return nLang;
109 // static
110 LanguageType MsLangId::resolveSystemLanguageByScriptType( LanguageType nLang, sal_Int16 nType )
112 if (nLang == LANGUAGE_NONE)
113 return nLang;
115 nLang = getRealLanguage(nLang);
116 if (nType != css::i18n::ScriptType::WEAK && getScriptType(nLang) != nType)
118 switch(nType)
120 case css::i18n::ScriptType::ASIAN:
121 if (nConfiguredAsianFallback == LANGUAGE_SYSTEM)
122 nLang = LANGUAGE_CHINESE_SIMPLIFIED;
123 else
124 nLang = nConfiguredAsianFallback;
125 break;
126 case css::i18n::ScriptType::COMPLEX:
127 if (nConfiguredComplexFallback == LANGUAGE_SYSTEM)
128 nLang = LANGUAGE_HINDI;
129 else
130 nLang = nConfiguredComplexFallback;
131 break;
132 default:
133 if (nConfiguredWesternFallback == LANGUAGE_SYSTEM)
134 nLang = LANGUAGE_ENGLISH_US;
135 else
136 nLang = nConfiguredWesternFallback;
137 break;
140 return nLang;
144 // static
145 css::lang::Locale MsLangId::Conversion::convertLanguageToLocale(
146 LanguageType nLang )
148 css::lang::Locale aLocale;
149 // Still resolve LANGUAGE_DONTKNOW if resolving is not requested,
150 // but not LANGUAGE_SYSTEM or others.
151 LanguageType nOrigLang = nLang;
152 nLang = MsLangId::getRealLanguage(nLang);
153 convertLanguageToLocaleImpl( nLang, aLocale, true );
154 if (aLocale.Language.isEmpty() && simplifySystemLanguages(nOrigLang) == LANGUAGE_SYSTEM)
156 // None found but resolve requested, last resort is "en-US".
157 aLocale.Language = "en";
158 aLocale.Country = "US";
159 aLocale.Variant.clear();
161 return aLocale;
165 // static
166 LanguageType MsLangId::Conversion::convertLocaleToLanguage(
167 const css::lang::Locale& rLocale )
169 // empty language => LANGUAGE_SYSTEM
170 if (rLocale.Language.isEmpty())
171 return LANGUAGE_SYSTEM;
173 return convertLocaleToLanguageImpl( rLocale);
177 // static
178 css::lang::Locale MsLangId::getFallbackLocale(
179 const css::lang::Locale & rLocale )
181 // empty language => LANGUAGE_SYSTEM
182 if (rLocale.Language.isEmpty())
183 return Conversion::lookupFallbackLocale( Conversion::convertLanguageToLocale( LANGUAGE_SYSTEM ));
184 else
185 return Conversion::lookupFallbackLocale( rLocale);
188 static constexpr bool equalsPrimary(LanguageType lhs, LanguageType rhs)
190 return (sal_uInt16(lhs) & LANGUAGE_MASK_PRIMARY )
191 == (sal_uInt16(rhs) & LANGUAGE_MASK_PRIMARY );
194 // static
195 bool MsLangId::isRightToLeft( LanguageType nLang )
197 if( equalsPrimary(nLang, LANGUAGE_ARABIC_SAUDI_ARABIA)
198 || equalsPrimary(nLang, LANGUAGE_HEBREW)
199 || equalsPrimary(nLang, LANGUAGE_YIDDISH)
200 || equalsPrimary(nLang, LANGUAGE_URDU_PAKISTAN)
201 || equalsPrimary(nLang, LANGUAGE_FARSI)
202 || equalsPrimary(nLang, LANGUAGE_KASHMIRI)
203 || equalsPrimary(nLang, LANGUAGE_SINDHI)
204 || equalsPrimary(nLang, LANGUAGE_UIGHUR_CHINA)
205 || equalsPrimary(nLang, LANGUAGE_USER_KYRGYZ_CHINA)
206 || equalsPrimary(nLang, LANGUAGE_USER_NKO) )
208 return true;
210 if (nLang.anyOf(
211 LANGUAGE_USER_KURDISH_IRAN,
212 LANGUAGE_OBSOLETE_USER_KURDISH_IRAQ,
213 LANGUAGE_KURDISH_ARABIC_IRAQ,
214 LANGUAGE_KURDISH_ARABIC_LSO,
215 LANGUAGE_USER_KURDISH_SOUTHERN_IRAN,
216 LANGUAGE_USER_KURDISH_SOUTHERN_IRAQ,
217 LANGUAGE_USER_HUNGARIAN_ROVAS,
218 LANGUAGE_USER_MALAY_ARABIC_MALAYSIA,
219 LANGUAGE_USER_MALAY_ARABIC_BRUNEI))
221 return true;
223 if (LanguageTag::isOnTheFlyID(nLang))
224 return LanguageTag::getOnTheFlyScriptType(nLang) == LanguageTag::ScriptType::RTL;
225 return false;
228 // static
229 bool MsLangId::isRightToLeftMath( LanguageType nLang )
231 //http://www.w3.org/TR/arabic-math/
232 if (nLang == LANGUAGE_FARSI || nLang == LANGUAGE_ARABIC_MOROCCO)
233 return false;
234 return isRightToLeft(nLang);
237 // static
238 bool MsLangId::isSimplifiedChinese( LanguageType nLang )
240 return isChinese(nLang) && !isTraditionalChinese(nLang);
243 // static
244 bool MsLangId::isSimplifiedChinese( const css::lang::Locale & rLocale )
246 return rLocale.Language == "zh" && !isTraditionalChinese(rLocale);
249 // static
250 bool MsLangId::isTraditionalChinese( LanguageType nLang )
252 return nLang.anyOf(
253 LANGUAGE_CHINESE_TRADITIONAL,
254 LANGUAGE_CHINESE_HONGKONG,
255 LANGUAGE_CHINESE_MACAU);
258 // static
259 bool MsLangId::isTraditionalChinese( const css::lang::Locale & rLocale )
261 return rLocale.Language == "zh" && (rLocale.Country == "TW" || rLocale.Country == "HK" || rLocale.Country == "MO");
264 //static
265 bool MsLangId::isChinese( LanguageType nLang )
267 return MsLangId::getPrimaryLanguage(nLang) == MsLangId::getPrimaryLanguage(LANGUAGE_CHINESE) ||
268 MsLangId::getPrimaryLanguage(nLang) == MsLangId::getPrimaryLanguage(LANGUAGE_YUE_CHINESE_HONGKONG);
271 //static
272 bool MsLangId::isKorean( LanguageType nLang )
274 return MsLangId::getPrimaryLanguage(nLang) == MsLangId::getPrimaryLanguage(LANGUAGE_KOREAN);
277 // static
278 bool MsLangId::isCJK( LanguageType nLang )
280 if (primary(nLang).anyOf(
281 primary(LANGUAGE_CHINESE),
282 primary(LANGUAGE_YUE_CHINESE_HONGKONG),
283 primary(LANGUAGE_JAPANESE),
284 primary(LANGUAGE_KOREAN)))
286 return true;
288 if (LanguageTag::isOnTheFlyID(nLang))
289 return LanguageTag::getOnTheFlyScriptType(nLang) == LanguageTag::ScriptType::CJK;
290 return false;
293 // static
294 bool MsLangId::isFamilyNameFirst( LanguageType nLang )
296 return isCJK(nLang) || nLang == LANGUAGE_HUNGARIAN;
299 // static
300 bool MsLangId::hasForbiddenCharacters( LanguageType nLang )
302 return isCJK(nLang);
306 // static
307 bool MsLangId::needsSequenceChecking( LanguageType nLang )
309 return primary(nLang).anyOf(
310 primary(LANGUAGE_BURMESE),
311 primary(LANGUAGE_KHMER),
312 primary(LANGUAGE_LAO),
313 primary(LANGUAGE_THAI));
317 // static
318 sal_Int16 MsLangId::getScriptType( LanguageType nLang )
320 sal_Int16 nScript;
322 // CTL
323 if( nLang.anyOf(
324 LANGUAGE_MONGOLIAN_MONGOLIAN_MONGOLIA,
325 LANGUAGE_MONGOLIAN_MONGOLIAN_CHINA,
326 LANGUAGE_MONGOLIAN_MONGOLIAN_LSO,
327 LANGUAGE_USER_KURDISH_IRAN,
328 LANGUAGE_OBSOLETE_USER_KURDISH_IRAQ,
329 LANGUAGE_KURDISH_ARABIC_IRAQ,
330 LANGUAGE_KURDISH_ARABIC_LSO,
331 LANGUAGE_USER_KURDISH_SOUTHERN_IRAN,
332 LANGUAGE_USER_KURDISH_SOUTHERN_IRAQ,
333 LANGUAGE_USER_KYRGYZ_CHINA,
334 LANGUAGE_USER_HUNGARIAN_ROVAS,
335 LANGUAGE_USER_MANCHU,
336 LANGUAGE_USER_XIBE,
337 LANGUAGE_USER_MALAY_ARABIC_MALAYSIA,
338 LANGUAGE_USER_MALAY_ARABIC_BRUNEI))
340 nScript = css::i18n::ScriptType::COMPLEX;
342 // "Western"
343 else if (nLang.anyOf(
344 LANGUAGE_MONGOLIAN_CYRILLIC_MONGOLIA,
345 LANGUAGE_MONGOLIAN_CYRILLIC_LSO,
346 LANGUAGE_USER_KURDISH_SYRIA,
347 LANGUAGE_USER_KURDISH_TURKEY))
349 nScript = css::i18n::ScriptType::LATIN;
351 // currently not knowing scripttype - defaulted to LATIN:
353 #define LANGUAGE_ARMENIAN 0x042B
354 #define LANGUAGE_INDONESIAN 0x0421
355 #define LANGUAGE_KAZAKH 0x043F
356 #define LANGUAGE_KONKANI 0x0457
357 #define LANGUAGE_MACEDONIAN 0x042F
358 #define LANGUAGE_TATAR 0x0444
360 // CJK catcher
361 else if ( primary(nLang).anyOf(
362 primary(LANGUAGE_CHINESE ),
363 primary(LANGUAGE_YUE_CHINESE_HONGKONG ),
364 primary(LANGUAGE_JAPANESE ),
365 primary(LANGUAGE_KOREAN )
368 nScript = css::i18n::ScriptType::ASIAN;
370 // CTL catcher
371 else if (primary(nLang).anyOf(
372 primary(LANGUAGE_AMHARIC_ETHIOPIA ),
373 primary(LANGUAGE_ARABIC_SAUDI_ARABIA ),
374 primary(LANGUAGE_ASSAMESE ),
375 primary(LANGUAGE_BENGALI ),
376 primary(LANGUAGE_BURMESE ),
377 primary(LANGUAGE_DHIVEHI ),
378 primary(LANGUAGE_FARSI ),
379 primary(LANGUAGE_GUJARATI ),
380 primary(LANGUAGE_HEBREW ),
381 primary(LANGUAGE_HINDI ),
382 primary(LANGUAGE_KANNADA ),
383 primary(LANGUAGE_KASHMIRI ),
384 primary(LANGUAGE_KHMER ),
385 primary(LANGUAGE_LAO ),
386 primary(LANGUAGE_MALAYALAM ),
387 primary(LANGUAGE_MANIPURI ),
388 primary(LANGUAGE_MARATHI ),
389 primary(LANGUAGE_NEPALI ),
390 primary(LANGUAGE_ODIA ),
391 primary(LANGUAGE_PUNJABI ),
392 primary(LANGUAGE_SANSKRIT ),
393 primary(LANGUAGE_SINDHI ),
394 primary(LANGUAGE_SINHALESE_SRI_LANKA ),
395 primary(LANGUAGE_SYRIAC ),
396 primary(LANGUAGE_TAMIL ),
397 primary(LANGUAGE_TELUGU ),
398 primary(LANGUAGE_THAI ),
399 primary(LANGUAGE_TIBETAN ), // also LANGUAGE_DZONGKHA
400 primary(LANGUAGE_UIGHUR_CHINA ),
401 primary(LANGUAGE_URDU_INDIA ),
402 primary(LANGUAGE_USER_BODO_INDIA ),
403 primary(LANGUAGE_USER_DOGRI_INDIA ),
404 primary(LANGUAGE_USER_LIMBU ),
405 primary(LANGUAGE_USER_MAITHILI_INDIA ),
406 primary(LANGUAGE_USER_NKO ),
407 primary(LANGUAGE_YIDDISH )))
409 nScript = css::i18n::ScriptType::COMPLEX;
411 // Western (actually not necessarily Latin but also Cyrillic,
412 // for example)
413 else if (LanguageTag::isOnTheFlyID(nLang))
415 switch (LanguageTag::getOnTheFlyScriptType(nLang))
417 case LanguageTag::ScriptType::CJK :
418 nScript = css::i18n::ScriptType::ASIAN;
419 break;
420 case LanguageTag::ScriptType::CTL :
421 case LanguageTag::ScriptType::RTL :
422 nScript = css::i18n::ScriptType::COMPLEX;
423 break;
424 case LanguageTag::ScriptType::WESTERN :
425 case LanguageTag::ScriptType::UNKNOWN :
426 default:
427 nScript = css::i18n::ScriptType::LATIN;
428 break;
431 else
433 nScript = css::i18n::ScriptType::LATIN;
435 return nScript;
439 // static
440 bool MsLangId::isNonLatinWestern( LanguageType nLang )
442 if (nLang.anyOf(
443 LANGUAGE_AZERI_CYRILLIC,
444 LANGUAGE_AZERI_CYRILLIC_LSO,
445 LANGUAGE_BELARUSIAN,
446 LANGUAGE_BOSNIAN_CYRILLIC_BOSNIA_HERZEGOVINA,
447 LANGUAGE_BOSNIAN_CYRILLIC_LSO,
448 LANGUAGE_BULGARIAN,
449 LANGUAGE_GREEK,
450 LANGUAGE_MONGOLIAN_CYRILLIC_LSO,
451 LANGUAGE_MONGOLIAN_CYRILLIC_MONGOLIA,
452 LANGUAGE_RUSSIAN,
453 LANGUAGE_RUSSIAN_MOLDOVA,
454 LANGUAGE_SERBIAN_CYRILLIC_BOSNIA_HERZEGOVINA,
455 LANGUAGE_SERBIAN_CYRILLIC_LSO,
456 LANGUAGE_SERBIAN_CYRILLIC_MONTENEGRO,
457 LANGUAGE_SERBIAN_CYRILLIC_SAM,
458 LANGUAGE_SERBIAN_CYRILLIC_SERBIA,
459 LANGUAGE_UKRAINIAN,
460 LANGUAGE_UZBEK_CYRILLIC,
461 LANGUAGE_UZBEK_CYRILLIC_LSO))
463 return true;
465 if (getScriptType( nLang) != css::i18n::ScriptType::LATIN)
466 return false;
467 LanguageTag aLanguageTag( nLang);
468 if (aLanguageTag.hasScript())
469 return aLanguageTag.getScript() != "Latn";
470 return false;
474 // static
475 bool MsLangId::isLegacy( LanguageType nLang )
477 return nLang.anyOf(
478 LANGUAGE_SERBIAN_CYRILLIC_SAM,
479 LANGUAGE_SERBIAN_LATIN_SAM);
480 /* TODO: activate once dictionary was renamed from pap-AN to
481 * pap-CW, or the pap-CW one supports also pap-AN, see fdo#44112 */
482 //case LANGUAGE_PAPIAMENTU:
486 // static
487 LanguageType MsLangId::getReplacementForObsoleteLanguage( LanguageType nLang )
489 if (nLang == LANGUAGE_OBSOLETE_USER_LATIN)
490 nLang = LANGUAGE_USER_LATIN_VATICAN;
491 else if (nLang == LANGUAGE_OBSOLETE_USER_MAORI)
492 nLang = LANGUAGE_MAORI_NEW_ZEALAND;
493 else if (nLang == LANGUAGE_OBSOLETE_USER_KINYARWANDA)
494 nLang = LANGUAGE_KINYARWANDA_RWANDA;
495 else if (nLang == LANGUAGE_OBSOLETE_USER_UPPER_SORBIAN)
496 nLang = LANGUAGE_UPPER_SORBIAN_GERMANY;
497 else if (nLang == LANGUAGE_OBSOLETE_USER_LOWER_SORBIAN)
498 nLang = LANGUAGE_LOWER_SORBIAN_GERMANY;
499 else if (nLang == LANGUAGE_OBSOLETE_USER_OCCITAN)
500 nLang = LANGUAGE_OCCITAN_FRANCE;
501 else if (nLang == LANGUAGE_OBSOLETE_USER_BRETON)
502 nLang = LANGUAGE_BRETON_FRANCE;
503 else if (nLang == LANGUAGE_OBSOLETE_USER_KALAALLISUT)
504 nLang = LANGUAGE_KALAALLISUT_GREENLAND;
505 else if (nLang == LANGUAGE_OBSOLETE_USER_LUXEMBOURGISH)
506 nLang = LANGUAGE_LUXEMBOURGISH_LUXEMBOURG;
507 else if (nLang == LANGUAGE_OBSOLETE_USER_KABYLE)
508 nLang = LANGUAGE_TAMAZIGHT_LATIN_ALGERIA;
509 else if (nLang == LANGUAGE_OBSOLETE_USER_CATALAN_VALENCIAN)
510 nLang = LANGUAGE_CATALAN_VALENCIAN;
511 else if (nLang == LANGUAGE_OBSOLETE_USER_MALAGASY_PLATEAU)
512 nLang = LANGUAGE_MALAGASY_PLATEAU;
513 else if (nLang == LANGUAGE_GAELIC_SCOTLAND_LEGACY)
514 nLang = LANGUAGE_GAELIC_SCOTLAND;
515 else if (nLang == LANGUAGE_OBSOLETE_USER_TSWANA_BOTSWANA)
516 nLang = LANGUAGE_TSWANA_BOTSWANA;
517 else if (nLang == LANGUAGE_OBSOLETE_USER_SERBIAN_LATIN_SERBIA)
518 nLang = LANGUAGE_SERBIAN_LATIN_SERBIA;
519 else if (nLang == LANGUAGE_OBSOLETE_USER_SERBIAN_LATIN_MONTENEGRO)
520 nLang = LANGUAGE_SERBIAN_LATIN_MONTENEGRO;
521 else if (nLang == LANGUAGE_OBSOLETE_USER_SERBIAN_CYRILLIC_SERBIA)
522 nLang = LANGUAGE_SERBIAN_CYRILLIC_SERBIA;
523 else if (nLang == LANGUAGE_OBSOLETE_USER_SERBIAN_CYRILLIC_MONTENEGRO)
524 nLang = LANGUAGE_SERBIAN_CYRILLIC_MONTENEGRO;
525 else if (nLang == LANGUAGE_OBSOLETE_USER_KURDISH_IRAQ)
526 nLang = LANGUAGE_KURDISH_ARABIC_IRAQ;
527 else if (nLang == LANGUAGE_OBSOLETE_USER_SPANISH_CUBA)
528 nLang = LANGUAGE_SPANISH_CUBA;
530 // The following are not strictly obsolete but should be mapped to a
531 // replacement locale when encountered.
533 // no_NO is an alias for nb_NO
534 else if (nLang == LANGUAGE_NORWEGIAN)
535 nLang = LANGUAGE_NORWEGIAN_BOKMAL;
537 // The erroneous Tibetan vs. Dzongkha case, #i53497#
538 // We (and MS) have stored LANGUAGE_TIBETAN_BHUTAN. This will need
539 // special attention if MS one day decides to actually use
540 // LANGUAGE_TIBETAN_BHUTAN for bo-BT instead of having it reserved;
541 // then remove the mapping and hope every dz-BT user used ODF to store
542 // documents ;-)
543 else if (nLang == LANGUAGE_TIBETAN_BHUTAN)
544 nLang = LANGUAGE_DZONGKHA_BHUTAN;
546 // en-GB-oed is deprecated, use en-GB-oxendict instead.
547 else if (nLang == LANGUAGE_USER_ENGLISH_UK_OED)
548 nLang = LANGUAGE_USER_ENGLISH_UK_OXENDICT;
549 return nLang;
552 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */