1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
3 * This file is part of the LibreOffice project.
5 * This Source Code Form is subject to the terms of the Mozilla Public
6 * License, v. 2.0. If a copy of the MPL was not distributed with this
7 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
9 * This file incorporates work covered by the following license notice:
11 * Licensed to the Apache Software Foundation (ASF) under one or more
12 * contributor license agreements. See the NOTICE file distributed
13 * with this work for additional information regarding copyright
14 * ownership. The ASF licenses this file to you under the Apache
15 * License, Version 2.0 (the "License"); you may not use this file
16 * except in compliance with the License. You may obtain a copy of
17 * the License at http://www.apache.org/licenses/LICENSE-2.0 .
20 #include <sal/config.h>
21 #include <rtl/ustring.hxx>
22 #include <com/sun/star/i18n/ScriptType.hpp>
24 #include <i18nlangtag/mslangid.hxx>
26 // Only very limited few functions that are guaranteed to not be called from
27 // LanguageTag may use LanguageTag ...
28 #include <i18nlangtag/languagetag.hxx>
31 LanguageType
MsLangId::nConfiguredSystemLanguage
= LANGUAGE_SYSTEM
;
32 LanguageType
MsLangId::nConfiguredSystemUILanguage
= LANGUAGE_SYSTEM
;
34 LanguageType
MsLangId::nConfiguredWesternFallback
= LANGUAGE_SYSTEM
;
35 LanguageType
MsLangId::nConfiguredAsianFallback
= LANGUAGE_SYSTEM
;
36 LanguageType
MsLangId::nConfiguredComplexFallback
= LANGUAGE_SYSTEM
;
39 void MsLangId::LanguageTagAccess::setConfiguredSystemLanguage( LanguageType nLang
)
41 nConfiguredSystemLanguage
= nLang
;
46 void MsLangId::setConfiguredSystemUILanguage( LanguageType nLang
)
48 nConfiguredSystemUILanguage
= nLang
;
52 void MsLangId::setConfiguredWesternFallback( LanguageType nLang
)
54 nConfiguredWesternFallback
= nLang
;
58 void MsLangId::setConfiguredAsianFallback( LanguageType nLang
)
60 nConfiguredAsianFallback
= nLang
;
64 void MsLangId::setConfiguredComplexFallback( LanguageType nLang
)
66 nConfiguredComplexFallback
= nLang
;
70 inline LanguageType
MsLangId::simplifySystemLanguages( LanguageType nLang
)
72 if (nLang
.anyOf( LANGUAGE_PROCESS_OR_USER_DEFAULT
,
73 LANGUAGE_SYSTEM_DEFAULT
,
75 nLang
= LANGUAGE_SYSTEM
;
80 LanguageType
MsLangId::getRealLanguage( LanguageType nLang
)
82 LanguageType simplifyLang
= simplifySystemLanguages( nLang
);
83 if (simplifyLang
== LANGUAGE_SYSTEM
)
85 if (nConfiguredSystemLanguage
== LANGUAGE_SYSTEM
)
86 nLang
= getSystemLanguage();
88 nLang
= nConfiguredSystemLanguage
;
90 else if (simplifyLang
== LANGUAGE_HID_HUMAN_INTERFACE_DEVICE
)
92 if (nConfiguredSystemUILanguage
== LANGUAGE_SYSTEM
)
93 nLang
= getSystemUILanguage();
95 nLang
= nConfiguredSystemUILanguage
;
99 /* TODO: would this be useful here? */
100 //nLang = MsLangId::getReplacementForObsoleteLanguage( nLang);
103 if (nLang
== LANGUAGE_DONTKNOW
)
104 nLang
= LANGUAGE_ENGLISH_US
;
110 LanguageType
MsLangId::resolveSystemLanguageByScriptType( LanguageType nLang
, sal_Int16 nType
)
112 if (nLang
== LANGUAGE_NONE
)
115 nLang
= getRealLanguage(nLang
);
116 if (nType
!= css::i18n::ScriptType::WEAK
&& getScriptType(nLang
) != nType
)
120 case css::i18n::ScriptType::ASIAN
:
121 if (nConfiguredAsianFallback
== LANGUAGE_SYSTEM
)
122 nLang
= LANGUAGE_CHINESE_SIMPLIFIED
;
124 nLang
= nConfiguredAsianFallback
;
126 case css::i18n::ScriptType::COMPLEX
:
127 if (nConfiguredComplexFallback
== LANGUAGE_SYSTEM
)
128 nLang
= LANGUAGE_HINDI
;
130 nLang
= nConfiguredComplexFallback
;
133 if (nConfiguredWesternFallback
== LANGUAGE_SYSTEM
)
134 nLang
= LANGUAGE_ENGLISH_US
;
136 nLang
= nConfiguredWesternFallback
;
145 css::lang::Locale
MsLangId::Conversion::convertLanguageToLocale(
148 css::lang::Locale aLocale
;
149 // Still resolve LANGUAGE_DONTKNOW if resolving is not requested,
150 // but not LANGUAGE_SYSTEM or others.
151 LanguageType nOrigLang
= nLang
;
152 nLang
= MsLangId::getRealLanguage(nLang
);
153 convertLanguageToLocaleImpl( nLang
, aLocale
, true );
154 if (aLocale
.Language
.isEmpty() && simplifySystemLanguages(nOrigLang
) == LANGUAGE_SYSTEM
)
156 // None found but resolve requested, last resort is "en-US".
157 aLocale
.Language
= "en";
158 aLocale
.Country
= "US";
159 aLocale
.Variant
.clear();
166 LanguageType
MsLangId::Conversion::convertLocaleToLanguage(
167 const css::lang::Locale
& rLocale
)
169 // empty language => LANGUAGE_SYSTEM
170 if (rLocale
.Language
.isEmpty())
171 return LANGUAGE_SYSTEM
;
173 return convertLocaleToLanguageImpl( rLocale
);
178 css::lang::Locale
MsLangId::getFallbackLocale(
179 const css::lang::Locale
& rLocale
)
181 // empty language => LANGUAGE_SYSTEM
182 if (rLocale
.Language
.isEmpty())
183 return Conversion::lookupFallbackLocale( Conversion::convertLanguageToLocale( LANGUAGE_SYSTEM
));
185 return Conversion::lookupFallbackLocale( rLocale
);
188 static constexpr bool equalsPrimary(LanguageType lhs
, LanguageType rhs
)
190 return (sal_uInt16(lhs
) & LANGUAGE_MASK_PRIMARY
)
191 == (sal_uInt16(rhs
) & LANGUAGE_MASK_PRIMARY
);
195 bool MsLangId::isRightToLeft( LanguageType nLang
)
197 if( equalsPrimary(nLang
, LANGUAGE_ARABIC_SAUDI_ARABIA
)
198 || equalsPrimary(nLang
, LANGUAGE_HEBREW
)
199 || equalsPrimary(nLang
, LANGUAGE_YIDDISH
)
200 || equalsPrimary(nLang
, LANGUAGE_URDU_PAKISTAN
)
201 || equalsPrimary(nLang
, LANGUAGE_FARSI
)
202 || equalsPrimary(nLang
, LANGUAGE_KASHMIRI
)
203 || equalsPrimary(nLang
, LANGUAGE_SINDHI
)
204 || equalsPrimary(nLang
, LANGUAGE_UIGHUR_CHINA
)
205 || equalsPrimary(nLang
, LANGUAGE_USER_KYRGYZ_CHINA
)
206 || equalsPrimary(nLang
, LANGUAGE_USER_NKO
) )
211 LANGUAGE_USER_KURDISH_IRAN
,
212 LANGUAGE_OBSOLETE_USER_KURDISH_IRAQ
,
213 LANGUAGE_KURDISH_ARABIC_IRAQ
,
214 LANGUAGE_KURDISH_ARABIC_LSO
,
215 LANGUAGE_USER_KURDISH_SOUTHERN_IRAN
,
216 LANGUAGE_USER_KURDISH_SOUTHERN_IRAQ
,
217 LANGUAGE_USER_HUNGARIAN_ROVAS
,
218 LANGUAGE_USER_MALAY_ARABIC_MALAYSIA
,
219 LANGUAGE_USER_MALAY_ARABIC_BRUNEI
))
223 if (LanguageTag::isOnTheFlyID(nLang
))
224 return LanguageTag::getOnTheFlyScriptType(nLang
) == LanguageTag::ScriptType::RTL
;
229 bool MsLangId::isRightToLeftMath( LanguageType nLang
)
231 //http://www.w3.org/TR/arabic-math/
232 if (nLang
== LANGUAGE_FARSI
|| nLang
== LANGUAGE_ARABIC_MOROCCO
)
234 return isRightToLeft(nLang
);
238 bool MsLangId::isSimplifiedChinese( LanguageType nLang
)
240 return isChinese(nLang
) && !isTraditionalChinese(nLang
);
244 bool MsLangId::isSimplifiedChinese( const css::lang::Locale
& rLocale
)
246 return rLocale
.Language
== "zh" && !isTraditionalChinese(rLocale
);
250 bool MsLangId::isTraditionalChinese( LanguageType nLang
)
253 LANGUAGE_CHINESE_TRADITIONAL
,
254 LANGUAGE_CHINESE_HONGKONG
,
255 LANGUAGE_CHINESE_MACAU
);
259 bool MsLangId::isTraditionalChinese( const css::lang::Locale
& rLocale
)
261 return rLocale
.Language
== "zh" && (rLocale
.Country
== "TW" || rLocale
.Country
== "HK" || rLocale
.Country
== "MO");
265 bool MsLangId::isChinese( LanguageType nLang
)
267 return MsLangId::getPrimaryLanguage(nLang
) == MsLangId::getPrimaryLanguage(LANGUAGE_CHINESE
) ||
268 MsLangId::getPrimaryLanguage(nLang
) == MsLangId::getPrimaryLanguage(LANGUAGE_YUE_CHINESE_HONGKONG
);
272 bool MsLangId::isKorean( LanguageType nLang
)
274 return MsLangId::getPrimaryLanguage(nLang
) == MsLangId::getPrimaryLanguage(LANGUAGE_KOREAN
);
278 bool MsLangId::isCJK( LanguageType nLang
)
280 if (primary(nLang
).anyOf(
281 primary(LANGUAGE_CHINESE
),
282 primary(LANGUAGE_YUE_CHINESE_HONGKONG
),
283 primary(LANGUAGE_JAPANESE
),
284 primary(LANGUAGE_KOREAN
)))
288 if (LanguageTag::isOnTheFlyID(nLang
))
289 return LanguageTag::getOnTheFlyScriptType(nLang
) == LanguageTag::ScriptType::CJK
;
294 bool MsLangId::isFamilyNameFirst( LanguageType nLang
)
296 return isCJK(nLang
) || nLang
== LANGUAGE_HUNGARIAN
;
300 bool MsLangId::hasForbiddenCharacters( LanguageType nLang
)
307 bool MsLangId::needsSequenceChecking( LanguageType nLang
)
309 return primary(nLang
).anyOf(
310 primary(LANGUAGE_BURMESE
),
311 primary(LANGUAGE_KHMER
),
312 primary(LANGUAGE_LAO
),
313 primary(LANGUAGE_THAI
));
318 sal_Int16
MsLangId::getScriptType( LanguageType nLang
)
324 LANGUAGE_MONGOLIAN_MONGOLIAN_MONGOLIA
,
325 LANGUAGE_MONGOLIAN_MONGOLIAN_CHINA
,
326 LANGUAGE_MONGOLIAN_MONGOLIAN_LSO
,
327 LANGUAGE_USER_KURDISH_IRAN
,
328 LANGUAGE_OBSOLETE_USER_KURDISH_IRAQ
,
329 LANGUAGE_KURDISH_ARABIC_IRAQ
,
330 LANGUAGE_KURDISH_ARABIC_LSO
,
331 LANGUAGE_USER_KURDISH_SOUTHERN_IRAN
,
332 LANGUAGE_USER_KURDISH_SOUTHERN_IRAQ
,
333 LANGUAGE_USER_KYRGYZ_CHINA
,
334 LANGUAGE_USER_HUNGARIAN_ROVAS
,
335 LANGUAGE_USER_MANCHU
,
337 LANGUAGE_USER_MALAY_ARABIC_MALAYSIA
,
338 LANGUAGE_USER_MALAY_ARABIC_BRUNEI
))
340 nScript
= css::i18n::ScriptType::COMPLEX
;
343 else if (nLang
.anyOf(
344 LANGUAGE_MONGOLIAN_CYRILLIC_MONGOLIA
,
345 LANGUAGE_MONGOLIAN_CYRILLIC_LSO
,
346 LANGUAGE_USER_KURDISH_SYRIA
,
347 LANGUAGE_USER_KURDISH_TURKEY
))
349 nScript
= css::i18n::ScriptType::LATIN
;
351 // currently not knowing scripttype - defaulted to LATIN:
353 #define LANGUAGE_ARMENIAN 0x042B
354 #define LANGUAGE_INDONESIAN 0x0421
355 #define LANGUAGE_KAZAKH 0x043F
356 #define LANGUAGE_KONKANI 0x0457
357 #define LANGUAGE_MACEDONIAN 0x042F
358 #define LANGUAGE_TATAR 0x0444
361 else if ( primary(nLang
).anyOf(
362 primary(LANGUAGE_CHINESE
),
363 primary(LANGUAGE_YUE_CHINESE_HONGKONG
),
364 primary(LANGUAGE_JAPANESE
),
365 primary(LANGUAGE_KOREAN
)
368 nScript
= css::i18n::ScriptType::ASIAN
;
371 else if (primary(nLang
).anyOf(
372 primary(LANGUAGE_AMHARIC_ETHIOPIA
),
373 primary(LANGUAGE_ARABIC_SAUDI_ARABIA
),
374 primary(LANGUAGE_ASSAMESE
),
375 primary(LANGUAGE_BENGALI
),
376 primary(LANGUAGE_BURMESE
),
377 primary(LANGUAGE_DHIVEHI
),
378 primary(LANGUAGE_FARSI
),
379 primary(LANGUAGE_GUJARATI
),
380 primary(LANGUAGE_HEBREW
),
381 primary(LANGUAGE_HINDI
),
382 primary(LANGUAGE_KANNADA
),
383 primary(LANGUAGE_KASHMIRI
),
384 primary(LANGUAGE_KHMER
),
385 primary(LANGUAGE_LAO
),
386 primary(LANGUAGE_MALAYALAM
),
387 primary(LANGUAGE_MANIPURI
),
388 primary(LANGUAGE_MARATHI
),
389 primary(LANGUAGE_NEPALI
),
390 primary(LANGUAGE_ODIA
),
391 primary(LANGUAGE_PUNJABI
),
392 primary(LANGUAGE_SANSKRIT
),
393 primary(LANGUAGE_SINDHI
),
394 primary(LANGUAGE_SINHALESE_SRI_LANKA
),
395 primary(LANGUAGE_SYRIAC
),
396 primary(LANGUAGE_TAMIL
),
397 primary(LANGUAGE_TELUGU
),
398 primary(LANGUAGE_THAI
),
399 primary(LANGUAGE_TIBETAN
), // also LANGUAGE_DZONGKHA
400 primary(LANGUAGE_UIGHUR_CHINA
),
401 primary(LANGUAGE_URDU_INDIA
),
402 primary(LANGUAGE_USER_BODO_INDIA
),
403 primary(LANGUAGE_USER_DOGRI_INDIA
),
404 primary(LANGUAGE_USER_LIMBU
),
405 primary(LANGUAGE_USER_MAITHILI_INDIA
),
406 primary(LANGUAGE_USER_NKO
),
407 primary(LANGUAGE_YIDDISH
)))
409 nScript
= css::i18n::ScriptType::COMPLEX
;
411 // Western (actually not necessarily Latin but also Cyrillic,
413 else if (LanguageTag::isOnTheFlyID(nLang
))
415 switch (LanguageTag::getOnTheFlyScriptType(nLang
))
417 case LanguageTag::ScriptType::CJK
:
418 nScript
= css::i18n::ScriptType::ASIAN
;
420 case LanguageTag::ScriptType::CTL
:
421 case LanguageTag::ScriptType::RTL
:
422 nScript
= css::i18n::ScriptType::COMPLEX
;
424 case LanguageTag::ScriptType::WESTERN
:
425 case LanguageTag::ScriptType::UNKNOWN
:
427 nScript
= css::i18n::ScriptType::LATIN
;
433 nScript
= css::i18n::ScriptType::LATIN
;
440 bool MsLangId::isNonLatinWestern( LanguageType nLang
)
443 LANGUAGE_AZERI_CYRILLIC
,
444 LANGUAGE_AZERI_CYRILLIC_LSO
,
446 LANGUAGE_BOSNIAN_CYRILLIC_BOSNIA_HERZEGOVINA
,
447 LANGUAGE_BOSNIAN_CYRILLIC_LSO
,
450 LANGUAGE_MONGOLIAN_CYRILLIC_LSO
,
451 LANGUAGE_MONGOLIAN_CYRILLIC_MONGOLIA
,
453 LANGUAGE_RUSSIAN_MOLDOVA
,
454 LANGUAGE_SERBIAN_CYRILLIC_BOSNIA_HERZEGOVINA
,
455 LANGUAGE_SERBIAN_CYRILLIC_LSO
,
456 LANGUAGE_SERBIAN_CYRILLIC_MONTENEGRO
,
457 LANGUAGE_SERBIAN_CYRILLIC_SAM
,
458 LANGUAGE_SERBIAN_CYRILLIC_SERBIA
,
460 LANGUAGE_UZBEK_CYRILLIC
,
461 LANGUAGE_UZBEK_CYRILLIC_LSO
))
465 if (getScriptType( nLang
) != css::i18n::ScriptType::LATIN
)
467 LanguageTag
aLanguageTag( nLang
);
468 if (aLanguageTag
.hasScript())
469 return aLanguageTag
.getScript() != "Latn";
475 bool MsLangId::isLegacy( LanguageType nLang
)
478 LANGUAGE_SERBIAN_CYRILLIC_SAM
,
479 LANGUAGE_SERBIAN_LATIN_SAM
);
480 /* TODO: activate once dictionary was renamed from pap-AN to
481 * pap-CW, or the pap-CW one supports also pap-AN, see fdo#44112 */
482 //case LANGUAGE_PAPIAMENTU:
487 LanguageType
MsLangId::getReplacementForObsoleteLanguage( LanguageType nLang
)
489 if (nLang
== LANGUAGE_OBSOLETE_USER_LATIN
)
490 nLang
= LANGUAGE_USER_LATIN_VATICAN
;
491 else if (nLang
== LANGUAGE_OBSOLETE_USER_MAORI
)
492 nLang
= LANGUAGE_MAORI_NEW_ZEALAND
;
493 else if (nLang
== LANGUAGE_OBSOLETE_USER_KINYARWANDA
)
494 nLang
= LANGUAGE_KINYARWANDA_RWANDA
;
495 else if (nLang
== LANGUAGE_OBSOLETE_USER_UPPER_SORBIAN
)
496 nLang
= LANGUAGE_UPPER_SORBIAN_GERMANY
;
497 else if (nLang
== LANGUAGE_OBSOLETE_USER_LOWER_SORBIAN
)
498 nLang
= LANGUAGE_LOWER_SORBIAN_GERMANY
;
499 else if (nLang
== LANGUAGE_OBSOLETE_USER_OCCITAN
)
500 nLang
= LANGUAGE_OCCITAN_FRANCE
;
501 else if (nLang
== LANGUAGE_OBSOLETE_USER_BRETON
)
502 nLang
= LANGUAGE_BRETON_FRANCE
;
503 else if (nLang
== LANGUAGE_OBSOLETE_USER_KALAALLISUT
)
504 nLang
= LANGUAGE_KALAALLISUT_GREENLAND
;
505 else if (nLang
== LANGUAGE_OBSOLETE_USER_LUXEMBOURGISH
)
506 nLang
= LANGUAGE_LUXEMBOURGISH_LUXEMBOURG
;
507 else if (nLang
== LANGUAGE_OBSOLETE_USER_KABYLE
)
508 nLang
= LANGUAGE_TAMAZIGHT_LATIN_ALGERIA
;
509 else if (nLang
== LANGUAGE_OBSOLETE_USER_CATALAN_VALENCIAN
)
510 nLang
= LANGUAGE_CATALAN_VALENCIAN
;
511 else if (nLang
== LANGUAGE_OBSOLETE_USER_MALAGASY_PLATEAU
)
512 nLang
= LANGUAGE_MALAGASY_PLATEAU
;
513 else if (nLang
== LANGUAGE_GAELIC_SCOTLAND_LEGACY
)
514 nLang
= LANGUAGE_GAELIC_SCOTLAND
;
515 else if (nLang
== LANGUAGE_OBSOLETE_USER_TSWANA_BOTSWANA
)
516 nLang
= LANGUAGE_TSWANA_BOTSWANA
;
517 else if (nLang
== LANGUAGE_OBSOLETE_USER_SERBIAN_LATIN_SERBIA
)
518 nLang
= LANGUAGE_SERBIAN_LATIN_SERBIA
;
519 else if (nLang
== LANGUAGE_OBSOLETE_USER_SERBIAN_LATIN_MONTENEGRO
)
520 nLang
= LANGUAGE_SERBIAN_LATIN_MONTENEGRO
;
521 else if (nLang
== LANGUAGE_OBSOLETE_USER_SERBIAN_CYRILLIC_SERBIA
)
522 nLang
= LANGUAGE_SERBIAN_CYRILLIC_SERBIA
;
523 else if (nLang
== LANGUAGE_OBSOLETE_USER_SERBIAN_CYRILLIC_MONTENEGRO
)
524 nLang
= LANGUAGE_SERBIAN_CYRILLIC_MONTENEGRO
;
525 else if (nLang
== LANGUAGE_OBSOLETE_USER_KURDISH_IRAQ
)
526 nLang
= LANGUAGE_KURDISH_ARABIC_IRAQ
;
527 else if (nLang
== LANGUAGE_OBSOLETE_USER_SPANISH_CUBA
)
528 nLang
= LANGUAGE_SPANISH_CUBA
;
530 // The following are not strictly obsolete but should be mapped to a
531 // replacement locale when encountered.
533 // no_NO is an alias for nb_NO
534 else if (nLang
== LANGUAGE_NORWEGIAN
)
535 nLang
= LANGUAGE_NORWEGIAN_BOKMAL
;
537 // The erroneous Tibetan vs. Dzongkha case, #i53497#
538 // We (and MS) have stored LANGUAGE_TIBETAN_BHUTAN. This will need
539 // special attention if MS one day decides to actually use
540 // LANGUAGE_TIBETAN_BHUTAN for bo-BT instead of having it reserved;
541 // then remove the mapping and hope every dz-BT user used ODF to store
543 else if (nLang
== LANGUAGE_TIBETAN_BHUTAN
)
544 nLang
= LANGUAGE_DZONGKHA_BHUTAN
;
546 // en-GB-oed is deprecated, use en-GB-oxendict instead.
547 else if (nLang
== LANGUAGE_USER_ENGLISH_UK_OED
)
548 nLang
= LANGUAGE_USER_ENGLISH_UK_OXENDICT
;
552 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */