1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
3 * This file is part of the LibreOffice project.
5 * This Source Code Form is subject to the terms of the Mozilla Public
6 * License, v. 2.0. If a copy of the MPL was not distributed with this
7 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
10 #include <config_folders.h>
11 #include <config_liblangtag.h>
13 #include <i18nlangtag/languagetag.hxx>
14 #include <i18nlangtag/applelangid.hxx>
15 #include <i18nlangtag/mslangid.hxx>
16 #include <rtl/ustrbuf.hxx>
17 #include <rtl/bootstrap.hxx>
18 #include <sal/log.hxx>
19 #include <osl/file.hxx>
20 #include <rtl/locale.h>
21 #include <o3tl/string_view.hxx>
27 #include <string_view>
28 #include <unordered_set>
32 #if LIBLANGTAG_INLINE_FIX
33 #define LT_HAVE_INLINE
35 #include <liblangtag/langtag.h>
38 #include <osl/detail/android-bootstrap.h>
42 #include <osl/detail/emscripten-bootstrap.h>
45 using namespace com::sun::star
;
49 // Helper to ensure lt_error_t is free'd
53 myLtError() : p(nullptr) {}
54 ~myLtError() { if (p
) lt_error_unref( p
); }
60 std::recursive_mutex
& theMutex()
62 static std::recursive_mutex SINGLETON
;
67 typedef std::unordered_set
< OUString
> KnownTagSet
;
68 static const KnownTagSet
& getKnowns()
70 static KnownTagSet theKnowns
= []()
73 ::std::vector
< MsLangId::LanguagetagMapping
> aDefined( MsLangId::getDefinedLanguagetags());
74 for (auto const& elemDefined
: aDefined
)
76 // Do not use the BCP47 string here to initialize the
77 // LanguageTag because then canonicalize() would call this
78 // getKnowns() again...
79 ::std::vector
< OUString
> aFallbacks( LanguageTag( elemDefined
.mnLang
).getFallbackStrings( true));
80 for (auto const& fallback
: aFallbacks
)
82 tmpSet
.insert(fallback
);
92 struct compareIgnoreAsciiCaseLess
94 bool operator()( std::u16string_view r1
, std::u16string_view r2
) const
96 return o3tl::compareToIgnoreAsciiCase(r1
, r2
) < 0;
99 typedef ::std::map
< OUString
, LanguageTag::ImplPtr
, compareIgnoreAsciiCaseLess
> MapBcp47
;
100 typedef ::std::map
< LanguageType
, LanguageTag::ImplPtr
> MapLangID
;
101 MapBcp47
& theMapBcp47()
103 static MapBcp47 SINGLETON
;
106 MapLangID
& theMapLangID()
108 static MapLangID SINGLETON
;
111 LanguageTag::ImplPtr
& theSystemLocale()
113 static LanguageTag::ImplPtr SINGLETON
;
119 static LanguageType
getNextOnTheFlyLanguage()
121 static LanguageType
nOnTheFlyLanguage(0);
122 std::unique_lock
aGuard( theMutex());
123 if (!nOnTheFlyLanguage
)
124 nOnTheFlyLanguage
= MsLangId::makeLangID( LANGUAGE_ON_THE_FLY_SUB_START
, LANGUAGE_ON_THE_FLY_START
);
127 if (MsLangId::getPrimaryLanguage( nOnTheFlyLanguage
) != LANGUAGE_ON_THE_FLY_END
)
131 LanguageType nSub
= MsLangId::getSubLanguage( nOnTheFlyLanguage
);
132 if (nSub
!= LANGUAGE_ON_THE_FLY_SUB_END
)
133 nOnTheFlyLanguage
= MsLangId::makeLangID( ++nSub
, LANGUAGE_ON_THE_FLY_START
);
136 SAL_WARN( "i18nlangtag", "getNextOnTheFlyLanguage: none left! ("
137 << ((sal_uInt16(LANGUAGE_ON_THE_FLY_END
) - sal_uInt16(LANGUAGE_ON_THE_FLY_START
) + 1)
138 * (sal_uInt16(LANGUAGE_ON_THE_FLY_SUB_END
) - sal_uInt16(LANGUAGE_ON_THE_FLY_SUB_START
) + 1))
140 return LanguageType(0);
144 #if OSL_DEBUG_LEVEL > 0
145 static size_t nOnTheFlies
= 0;
147 SAL_INFO( "i18nlangtag", "getNextOnTheFlyLanguage: number " << nOnTheFlies
);
149 return nOnTheFlyLanguage
;
154 bool LanguageTag::isOnTheFlyID( LanguageType nLang
)
156 LanguageType nPri
= MsLangId::getPrimaryLanguage( nLang
);
157 LanguageType nSub
= MsLangId::getSubLanguage( nLang
);
159 LANGUAGE_ON_THE_FLY_START
<= nPri
&& nPri
<= LANGUAGE_ON_THE_FLY_END
&&
160 LANGUAGE_ON_THE_FLY_SUB_START
<= nSub
&& nSub
<= LANGUAGE_ON_THE_FLY_SUB_END
;
165 /** A reference holder for liblangtag data de/initialization, one static
166 instance. Currently implemented such that the first "ref" inits and dtor
167 (our library deinitialized) tears down.
169 class LiblangtagDataRef
173 ~LiblangtagDataRef();
180 OString maDataPath
; // path to liblangtag data, "|" if system
183 void setupDataPath();
185 static void teardown();
188 LiblangtagDataRef
& theDataRef()
190 static LiblangtagDataRef SINGLETON
;
195 LiblangtagDataRef::LiblangtagDataRef()
201 LiblangtagDataRef::~LiblangtagDataRef()
207 void LiblangtagDataRef::setup()
209 SAL_INFO( "i18nlangtag", "LiblangtagDataRef::setup: initializing database");
210 if (maDataPath
.isEmpty())
213 mbInitialized
= true;
216 void LiblangtagDataRef::teardown()
218 SAL_INFO( "i18nlangtag", "LiblangtagDataRef::teardown: finalizing database");
222 void LiblangtagDataRef::setupDataPath()
224 #if defined(ANDROID) || defined(EMSCRIPTEN)
225 maDataPath
= OString(lo_get_app_data_dir()) + "/share/liblangtag";
227 // maDataPath is assumed to be empty here.
228 OUString
aURL(u
"$BRAND_BASE_DIR/" LIBO_SHARE_FOLDER
"/liblangtag"_ustr
);
229 rtl::Bootstrap::expandMacros(aURL
); //TODO: detect failure
231 // Check if data is in our own installation, else assume system
233 OUString aData
= aURL
+ "/language-subtag-registry.xml";
234 osl::DirectoryItem aDirItem
;
235 if (osl::DirectoryItem::get( aData
, aDirItem
) == osl::DirectoryItem::E_None
)
238 if (osl::FileBase::getSystemPathFromFileURL( aURL
, aPath
) == osl::FileBase::E_None
)
239 maDataPath
= OUStringToOString( aPath
, RTL_TEXTENCODING_UTF8
);
242 if (maDataPath
.isEmpty())
243 maDataPath
= "|"_ostr
; // assume system
245 lt_db_set_datadir( maDataPath
.getStr());
249 /* TODO: we could transform known vendor and browser-specific variants to known
250 * BCP 47 if available. For now just remove them to not confuse any later
251 * treatments that check for empty variants. This vendor stuff was never
252 * supported anyway. */
253 static void handleVendorVariant( css::lang::Locale
& rLocale
)
255 if (!rLocale
.Variant
.isEmpty() && rLocale
.Language
!= I18NLANGTAG_QLT
)
256 rLocale
.Variant
.clear();
260 class LanguageTagImpl
264 explicit LanguageTagImpl( const LanguageTag
& rLanguageTag
);
265 explicit LanguageTagImpl( const LanguageTagImpl
& rLanguageTagImpl
);
267 LanguageTagImpl
& operator=( const LanguageTagImpl
& rLanguageTagImpl
);
271 friend class LanguageTag
;
280 mutable css::lang::Locale maLocale
;
281 mutable OUString maBcp47
;
282 mutable OUString maCachedLanguage
; ///< cache getLanguage()
283 mutable OUString maCachedScript
; ///< cache getScript()
284 mutable OUString maCachedCountry
; ///< cache getCountry()
285 mutable OUString maCachedVariants
; ///< cache getVariants()
286 mutable OUString maCachedGlibcString
; ///< cache getGlibcLocaleString()
287 mutable lt_tag_t
* mpImplLangtag
; ///< liblangtag pointer
288 mutable LanguageType mnLangID
;
289 mutable LanguageTag::ScriptType meScriptType
;
290 mutable Decision meIsValid
;
291 mutable Decision meIsIsoLocale
;
292 mutable Decision meIsIsoODF
;
293 mutable Decision meIsLiblangtagNeeded
; ///< whether processing with liblangtag needed
294 bool mbSystemLocale
: 1;
295 mutable bool mbInitializedBcp47
: 1;
296 mutable bool mbInitializedLocale
: 1;
297 mutable bool mbInitializedLangID
: 1;
298 mutable bool mbCachedLanguage
: 1;
299 mutable bool mbCachedScript
: 1;
300 mutable bool mbCachedCountry
: 1;
301 mutable bool mbCachedVariants
: 1;
302 mutable bool mbCachedGlibcString
: 1;
304 OUString
const & getBcp47() const;
305 OUString
const & getLanguage() const;
306 OUString
const & getScript() const;
307 OUString
const & getCountry() const;
308 OUString
getRegion() const;
309 OUString
const & getVariants() const;
310 bool hasScript() const;
311 OUString
const & getGlibcLocaleString() const;
313 void setScriptType(LanguageTag::ScriptType st
);
314 LanguageTag::ScriptType
getScriptType() const;
316 bool isIsoLocale() const;
317 bool isIsoODF() const;
318 bool isValidBcp47() const;
320 void convertLocaleToBcp47() const;
321 bool convertLocaleToLang( bool bAllowOnTheFlyID
);
322 void convertBcp47ToLocale();
323 void convertBcp47ToLang();
324 void convertLangToLocale() const;
325 void convertLangToBcp47() const;
327 /** @return whether BCP 47 language tag string was changed. */
330 /** Canonicalize if not yet done and synchronize initialized conversions.
332 @return whether BCP 47 language tag string was changed.
334 bool synCanonicalize();
336 OUString
getLanguageFromLangtag();
337 OUString
getScriptFromLangtag();
338 OUString
getRegionFromLangtag();
339 OUString
getVariantsFromLangtag();
341 /** Generates on-the-fly LangID and registers the maBcp47,mnLangID pair.
344 If not 0 and not LANGUAGE_DONTKNOW, suggest (!) to use that ID
345 instead of generating an on-the-fly ID. Implementation may
346 still generate an ID if the suggested ID is already used for
347 another language tag.
349 @return NULL if no ID could be obtained or registration failed.
351 LanguageTag::ImplPtr
registerOnTheFly( LanguageType nRegisterID
);
353 /** Obtain Language, Script, Country and Variants via simpleExtract() and
354 assign them to the cached variables if successful.
356 @return simpleExtract() successfully extracted and cached.
358 bool cacheSimpleLSCV();
372 /** Of a language tag of the form lll[-Ssss][-CC][-vvvvvvvv] extract the
375 Does not check case or content!
377 @return EXTRACTED_LSC if simple tag was detected (i.e. one that
378 would fulfill the isIsoODF() condition),
379 EXTRACTED_LV if a tag with variant was detected,
380 EXTRACTED_LR if a tag with 3-digit UN M.49 region code was detected
381 EXTRACTED_C_LOCALE if a 'C' locale was detected,
382 EXTRACTED_X if x-... privateuse tag was detected,
383 EXTRACTED_X_JOKER if "*" joker was detected,
384 EXTRACTED_KNOWN_BAD if a bad but known (to be remapped) tag was detected
387 static Extraction
simpleExtract( const OUString
& rBcp47
,
392 OUString
& rVariants
);
394 /** Convert Locale to BCP 47 string without resolving system and creating
395 temporary LanguageTag instances. */
396 static OUString
convertToBcp47( const css::lang::Locale
& rLocale
);
401 LanguageTagImpl::LanguageTagImpl( const LanguageTag
& rLanguageTag
)
403 maLocale( rLanguageTag
.maLocale
),
404 maBcp47( rLanguageTag
.maBcp47
),
405 mpImplLangtag( nullptr),
406 mnLangID( rLanguageTag
.mnLangID
),
407 meScriptType( LanguageTag::ScriptType::UNKNOWN
),
408 meIsValid( DECISION_DONTKNOW
),
409 meIsIsoLocale( DECISION_DONTKNOW
),
410 meIsIsoODF( DECISION_DONTKNOW
),
411 meIsLiblangtagNeeded( DECISION_DONTKNOW
),
412 mbSystemLocale( rLanguageTag
.mbSystemLocale
),
413 mbInitializedBcp47( rLanguageTag
.mbInitializedBcp47
),
414 mbInitializedLocale( rLanguageTag
.mbInitializedLocale
),
415 mbInitializedLangID( rLanguageTag
.mbInitializedLangID
),
416 mbCachedLanguage( false),
417 mbCachedScript( false),
418 mbCachedCountry( false),
419 mbCachedVariants( false),
420 mbCachedGlibcString( false)
425 LanguageTagImpl::LanguageTagImpl( const LanguageTagImpl
& rLanguageTagImpl
)
427 maLocale( rLanguageTagImpl
.maLocale
),
428 maBcp47( rLanguageTagImpl
.maBcp47
),
429 maCachedLanguage( rLanguageTagImpl
.maCachedLanguage
),
430 maCachedScript( rLanguageTagImpl
.maCachedScript
),
431 maCachedCountry( rLanguageTagImpl
.maCachedCountry
),
432 maCachedVariants( rLanguageTagImpl
.maCachedVariants
),
433 maCachedGlibcString( rLanguageTagImpl
.maCachedGlibcString
),
434 mpImplLangtag( rLanguageTagImpl
.mpImplLangtag
?
435 lt_tag_copy( rLanguageTagImpl
.mpImplLangtag
) : nullptr),
436 mnLangID( rLanguageTagImpl
.mnLangID
),
437 meScriptType( rLanguageTagImpl
.meScriptType
),
438 meIsValid( rLanguageTagImpl
.meIsValid
),
439 meIsIsoLocale( rLanguageTagImpl
.meIsIsoLocale
),
440 meIsIsoODF( rLanguageTagImpl
.meIsIsoODF
),
441 meIsLiblangtagNeeded( rLanguageTagImpl
.meIsLiblangtagNeeded
),
442 mbSystemLocale( rLanguageTagImpl
.mbSystemLocale
),
443 mbInitializedBcp47( rLanguageTagImpl
.mbInitializedBcp47
),
444 mbInitializedLocale( rLanguageTagImpl
.mbInitializedLocale
),
445 mbInitializedLangID( rLanguageTagImpl
.mbInitializedLangID
),
446 mbCachedLanguage( rLanguageTagImpl
.mbCachedLanguage
),
447 mbCachedScript( rLanguageTagImpl
.mbCachedScript
),
448 mbCachedCountry( rLanguageTagImpl
.mbCachedCountry
),
449 mbCachedVariants( rLanguageTagImpl
.mbCachedVariants
),
450 mbCachedGlibcString( rLanguageTagImpl
.mbCachedGlibcString
)
457 LanguageTagImpl
& LanguageTagImpl::operator=( const LanguageTagImpl
& rLanguageTagImpl
)
459 if (&rLanguageTagImpl
== this)
462 maLocale
= rLanguageTagImpl
.maLocale
;
463 maBcp47
= rLanguageTagImpl
.maBcp47
;
464 maCachedLanguage
= rLanguageTagImpl
.maCachedLanguage
;
465 maCachedScript
= rLanguageTagImpl
.maCachedScript
;
466 maCachedCountry
= rLanguageTagImpl
.maCachedCountry
;
467 maCachedVariants
= rLanguageTagImpl
.maCachedVariants
;
468 maCachedGlibcString
= rLanguageTagImpl
.maCachedGlibcString
;
469 lt_tag_t
* oldTag
= mpImplLangtag
;
470 mpImplLangtag
= rLanguageTagImpl
.mpImplLangtag
?
471 lt_tag_copy( rLanguageTagImpl
.mpImplLangtag
) : nullptr;
472 lt_tag_unref(oldTag
);
473 mnLangID
= rLanguageTagImpl
.mnLangID
;
474 meScriptType
= rLanguageTagImpl
.meScriptType
;
475 meIsValid
= rLanguageTagImpl
.meIsValid
;
476 meIsIsoLocale
= rLanguageTagImpl
.meIsIsoLocale
;
477 meIsIsoODF
= rLanguageTagImpl
.meIsIsoODF
;
478 meIsLiblangtagNeeded
= rLanguageTagImpl
.meIsLiblangtagNeeded
;
479 mbSystemLocale
= rLanguageTagImpl
.mbSystemLocale
;
480 mbInitializedBcp47
= rLanguageTagImpl
.mbInitializedBcp47
;
481 mbInitializedLocale
= rLanguageTagImpl
.mbInitializedLocale
;
482 mbInitializedLangID
= rLanguageTagImpl
.mbInitializedLangID
;
483 mbCachedLanguage
= rLanguageTagImpl
.mbCachedLanguage
;
484 mbCachedScript
= rLanguageTagImpl
.mbCachedScript
;
485 mbCachedCountry
= rLanguageTagImpl
.mbCachedCountry
;
486 mbCachedVariants
= rLanguageTagImpl
.mbCachedVariants
;
487 mbCachedGlibcString
= rLanguageTagImpl
.mbCachedGlibcString
;
488 if (mpImplLangtag
&& !oldTag
)
494 LanguageTagImpl::~LanguageTagImpl()
498 lt_tag_unref( mpImplLangtag
);
503 LanguageTag::LanguageTag( const OUString
& rBcp47LanguageTag
, bool bCanonicalize
)
505 maBcp47( rBcp47LanguageTag
),
506 mnLangID( LANGUAGE_DONTKNOW
),
507 mbSystemLocale( rBcp47LanguageTag
.isEmpty()),
508 mbInitializedBcp47( !mbSystemLocale
),
509 mbInitializedLocale( false),
510 mbInitializedLangID( false),
515 getImpl()->canonicalize();
516 // Registration itself may already have canonicalized, so do an
517 // unconditional sync.
524 LanguageTag::LanguageTag( const css::lang::Locale
& rLocale
)
527 mnLangID( LANGUAGE_DONTKNOW
),
528 mbSystemLocale( rLocale
.Language
.isEmpty()),
529 mbInitializedBcp47( false),
530 mbInitializedLocale( false), // we do not know which mess we got passed in
531 mbInitializedLangID( false),
534 handleVendorVariant( maLocale
);
538 LanguageTag::LanguageTag( LanguageType nLanguage
)
540 mnLangID( nLanguage
),
541 mbSystemLocale( nLanguage
== LANGUAGE_SYSTEM
),
542 mbInitializedBcp47( false),
543 mbInitializedLocale( false),
544 mbInitializedLangID( !mbSystemLocale
),
550 LanguageTag::LanguageTag( const OUString
& rBcp47
, const OUString
& rLanguage
,
551 std::u16string_view rScript
, const OUString
& rCountry
)
554 mnLangID( LANGUAGE_DONTKNOW
),
555 mbSystemLocale( rBcp47
.isEmpty() && rLanguage
.isEmpty()),
556 mbInitializedBcp47( !rBcp47
.isEmpty()),
557 mbInitializedLocale( false),
558 mbInitializedLangID( false),
561 if (mbSystemLocale
|| mbInitializedBcp47
)
566 maBcp47
= rLanguage
+ "-" + rCountry
;
567 mbInitializedBcp47
= true;
568 maLocale
.Language
= rLanguage
;
569 maLocale
.Country
= rCountry
;
570 mbInitializedLocale
= true;
574 if (rCountry
.isEmpty())
575 maBcp47
= rLanguage
+ "-" + rScript
;
577 maBcp47
= rLanguage
+ "-" + rScript
+ "-" + rCountry
;
578 mbInitializedBcp47
= true;
579 maLocale
.Language
= I18NLANGTAG_QLT
;
580 maLocale
.Country
= rCountry
;
581 maLocale
.Variant
= maBcp47
;
582 mbInitializedLocale
= true;
587 LanguageTag::LanguageTag( const rtl_Locale
& rLocale
)
589 maLocale( rLocale
.Language
, rLocale
.Country
, rLocale
.Variant
),
590 mnLangID( LANGUAGE_DONTKNOW
),
591 mbSystemLocale( maLocale
.Language
.isEmpty()),
592 mbInitializedBcp47( false),
593 mbInitializedLocale( !mbSystemLocale
),
594 mbInitializedLangID( false),
597 convertFromRtlLocale();
600 LanguageTag::~LanguageTag() {}
602 LanguageTag::ImplPtr
LanguageTagImpl::registerOnTheFly( LanguageType nRegisterID
)
604 LanguageTag::ImplPtr pImpl
;
606 if (!mbInitializedBcp47
)
608 if (mbInitializedLocale
)
610 maBcp47
= LanguageTagImpl::convertToBcp47( maLocale
);
611 mbInitializedBcp47
= !maBcp47
.isEmpty();
614 if (maBcp47
.isEmpty())
616 SAL_WARN( "i18nlangtag", "LanguageTagImpl::registerOnTheFly: no Bcp47 string, no registering");
620 std::unique_lock
aGuard( theMutex());
622 MapBcp47
& rMapBcp47
= theMapBcp47();
623 MapBcp47::const_iterator
it( rMapBcp47
.find( maBcp47
));
624 bool bOtherImpl
= false;
625 if (it
!= rMapBcp47
.end())
627 SAL_INFO( "i18nlangtag", "LanguageTag::registerOnTheFly: found impl for '" << maBcp47
<< "'");
628 pImpl
= (*it
).second
;
629 if (pImpl
.get() != this)
631 // Could happen for example if during registerImpl() the tag was
632 // changed via canonicalize() and the result was already present in
633 // the map before, for example 'bn-Beng' => 'bn'. This specific
634 // case is now taken care of in registerImpl() and doesn't reach
635 // here. However, use the already existing impl if it matches.
636 SAL_WARN( "i18nlangtag", "LanguageTag::registerOnTheFly: using other impl for this '" << maBcp47
<< "'");
637 *this = *pImpl
; // ensure consistency
643 SAL_INFO( "i18nlangtag", "LanguageTag::registerOnTheFly: new impl for '" << maBcp47
<< "'");
644 pImpl
= std::make_shared
<LanguageTagImpl
>( *this);
645 rMapBcp47
.insert( ::std::make_pair( maBcp47
, pImpl
));
648 if (!bOtherImpl
|| !pImpl
->mbInitializedLangID
)
650 if (nRegisterID
== LanguageType(0) || nRegisterID
== LANGUAGE_DONTKNOW
)
651 nRegisterID
= getNextOnTheFlyLanguage();
654 // Accept a suggested ID only if it is not mapped yet to something
655 // different, otherwise we would end up with ambiguous assignments
656 // of different language tags, for example for the same primary
657 // LangID with "no", "nb" and "nn".
658 const MapLangID
& rMapLangID
= theMapLangID();
659 MapLangID::const_iterator
itID( rMapLangID
.find( nRegisterID
));
660 if (itID
!= rMapLangID
.end())
662 if ((*itID
).second
->maBcp47
!= maBcp47
)
664 SAL_INFO( "i18nlangtag", "LanguageTag::registerOnTheFly: not using suggested 0x"
665 << ::std::hex
<< nRegisterID
<< " for '" << maBcp47
<< "' have '"
666 << (*itID
).second
->maBcp47
<< "'");
667 nRegisterID
= getNextOnTheFlyLanguage();
671 SAL_WARN( "i18nlangtag", "LanguageTag::registerOnTheFly: suggested 0x"
672 << ::std::hex
<< nRegisterID
<< " for '" << maBcp47
<< "' already registered");
678 // out of IDs, nothing to register
681 pImpl
->mnLangID
= nRegisterID
;
682 pImpl
->mbInitializedLangID
= true;
683 if (pImpl
.get() != this)
685 mnLangID
= nRegisterID
;
686 mbInitializedLangID
= true;
690 ::std::pair
< MapLangID::const_iterator
, bool > res(
691 theMapLangID().insert( ::std::make_pair( pImpl
->mnLangID
, pImpl
)));
694 SAL_INFO( "i18nlangtag", "LanguageTag::registerOnTheFly: cross-inserted 0x"
695 << ::std::hex
<< pImpl
->mnLangID
<< " for '" << maBcp47
<< "'");
699 SAL_WARN( "i18nlangtag", "LanguageTag::registerOnTheFly: not cross-inserted 0x"
700 << ::std::hex
<< pImpl
->mnLangID
<< " for '" << maBcp47
<< "' have '"
701 << (*res
.first
).second
->maBcp47
<< "'");
708 LanguageTag::ScriptType
LanguageTag::getOnTheFlyScriptType( LanguageType nRegisterID
)
710 const MapLangID
& rMapLangID
= theMapLangID();
711 MapLangID::const_iterator
itID( rMapLangID
.find( nRegisterID
));
712 if (itID
!= rMapLangID
.end())
713 return (*itID
).second
->getScriptType();
715 return ScriptType::UNKNOWN
;
720 void LanguageTag::setConfiguredSystemLanguage( LanguageType nLang
)
722 if (nLang
== LANGUAGE_DONTKNOW
|| nLang
== LANGUAGE_SYSTEM
)
724 SAL_WARN( "i18nlangtag",
725 "LanguageTag::setConfiguredSystemLanguage: refusing to set unresolved system locale 0x" <<
726 ::std::hex
<< nLang
);
729 SAL_INFO( "i18nlangtag", "LanguageTag::setConfiguredSystemLanguage: setting to 0x" << ::std::hex
<< nLang
);
730 MsLangId::LanguageTagAccess::setConfiguredSystemLanguage( nLang
);
731 // Reset system locale to none and let registerImpl() do the rest to
732 // initialize a new one.
733 theSystemLocale().reset();
734 LanguageTag
aLanguageTag( LANGUAGE_SYSTEM
);
735 aLanguageTag
.registerImpl();
738 static bool lt_tag_parse_disabled
= false;
741 void LanguageTag::disable_lt_tag_parse()
743 lt_tag_parse_disabled
= true;
746 static bool lcl_isKnownOnTheFlyID( LanguageType nLang
)
748 return nLang
!= LANGUAGE_DONTKNOW
&& nLang
!= LANGUAGE_SYSTEM
&&
749 (LanguageTag::isOnTheFlyID( nLang
) || (nLang
== MsLangId::getPrimaryLanguage( nLang
)));
753 LanguageTag::ImplPtr
LanguageTag::registerImpl() const
755 // XXX NOTE: Do not use non-static LanguageTag::convert...() member methods
756 // here as they access getImpl() and syncFromImpl() and would lead to
757 // recursion. Also do not use the static LanguageTag::convertTo...()
758 // methods as they may create temporary LanguageTag instances. Only
759 // LanguageTagImpl::convertToBcp47(Locale) is ok.
763 #if OSL_DEBUG_LEVEL > 0
764 static std::atomic_int nCalls
= 0;
766 SAL_INFO( "i18nlangtag", "LanguageTag::registerImpl: " << nCalls
<< " calls");
769 // Do not register unresolved system locale, also force LangID if system
770 // and take the system locale shortcut if possible.
773 pImpl
= theSystemLocale();
776 #if OSL_DEBUG_LEVEL > 0
777 static size_t nCallsSystem
= 0;
779 SAL_INFO( "i18nlangtag", "LanguageTag::registerImpl: " << nCallsSystem
<< " system calls");
783 if (!mbInitializedLangID
)
785 mnLangID
= MsLangId::getRealLanguage( LANGUAGE_SYSTEM
);
786 mbInitializedLangID
= (mnLangID
!= LANGUAGE_SYSTEM
);
787 SAL_WARN_IF( !mbInitializedLangID
, "i18nlangtag", "LanguageTag::registerImpl: can't resolve system!");
791 if (mbInitializedLangID
)
793 if (mnLangID
== LANGUAGE_DONTKNOW
)
795 static LanguageTag::ImplPtr theDontKnow
;
796 // Heavy usage of LANGUAGE_DONTKNOW, make it an own Impl for all the
797 // conversion attempts. At the same time provide a central breakpoint
798 // to inspect such places.
800 theDontKnow
= std::make_shared
<LanguageTagImpl
>( *this);
802 #if OSL_DEBUG_LEVEL > 0
803 static size_t nCallsDontKnow
= 0;
805 SAL_INFO( "i18nlangtag", "LanguageTag::registerImpl: " << nCallsDontKnow
<< " DontKnow calls");
811 // A great share are calls for a system equal locale.
812 pImpl
= theSystemLocale();
813 if (pImpl
&& pImpl
->mnLangID
== mnLangID
)
815 #if OSL_DEBUG_LEVEL > 0
816 static size_t nCallsSystemEqual
= 0;
818 SAL_INFO( "i18nlangtag", "LanguageTag::registerImpl: " << nCallsSystemEqual
819 << " system equal LangID calls");
826 // Force Bcp47 if not LangID.
827 if (!mbInitializedLangID
&& !mbInitializedBcp47
)
829 // The one central point to set mbInitializedLocale=true if a
830 // LanguageTag was initialized with a Locale. We will now convert and
831 // possibly later resolve it.
832 if (!mbInitializedLocale
&& (mbSystemLocale
|| !maLocale
.Language
.isEmpty()))
833 mbInitializedLocale
= true;
834 SAL_WARN_IF( !mbInitializedLocale
, "i18nlangtag", "LanguageTag::registerImpl: still not mbInitializedLocale");
836 maBcp47
= LanguageTagImpl::convertToBcp47( maLocale
);
837 mbInitializedBcp47
= !maBcp47
.isEmpty();
840 if (mbInitializedBcp47
)
842 // A great share are calls for a system equal locale.
843 pImpl
= theSystemLocale();
844 if (pImpl
&& pImpl
->maBcp47
== maBcp47
)
846 #if OSL_DEBUG_LEVEL > 0
847 static size_t nCallsSystemEqual
= 0;
849 SAL_INFO( "i18nlangtag", "LanguageTag::registerImpl: " << nCallsSystemEqual
<< " system equal BCP47 calls");
855 #if OSL_DEBUG_LEVEL > 0
856 static size_t nCallsNonSystem
= 0;
858 SAL_INFO( "i18nlangtag", "LanguageTag::registerImpl: " << nCallsNonSystem
<< " non-system calls");
861 std::unique_lock
aGuard( theMutex());
863 #if OSL_DEBUG_LEVEL > 0
864 static long nRunning
= 0;
865 // Entering twice here is ok, which is needed for fallback init in
866 // getKnowns() in canonicalize() via pImpl->convertBcp47ToLocale() below,
867 // everything else is suspicious.
868 SAL_WARN_IF( nRunning
> 1, "i18nlangtag", "LanguageTag::registerImpl: re-entered for '"
869 << maBcp47
<< "' 0x" << ::std::hex
<< mnLangID
);
870 struct Runner
{ Runner() { ++nRunning
; } ~Runner() { --nRunning
; } } aRunner
;
873 // Prefer LangID map as find+insert needs less comparison work.
874 if (mbInitializedLangID
)
876 MapLangID
& rMap
= theMapLangID();
877 MapLangID::const_iterator
it( rMap
.find( mnLangID
));
878 if (it
!= rMap
.end())
880 SAL_INFO( "i18nlangtag", "LanguageTag::registerImpl: found impl for 0x" << ::std::hex
<< mnLangID
);
881 pImpl
= (*it
).second
;
885 SAL_INFO( "i18nlangtag", "LanguageTag::registerImpl: new impl for 0x" << ::std::hex
<< mnLangID
);
886 pImpl
= std::make_shared
<LanguageTagImpl
>( *this);
887 rMap
.insert( ::std::make_pair( mnLangID
, pImpl
));
889 if (!pImpl
->mbInitializedLocale
)
890 pImpl
->convertLangToLocale();
891 LanguageType nLang
= MsLangId::Conversion::convertLocaleToLanguage( pImpl
->maLocale
);
892 // If round-trip is identical cross-insert to Bcp47 map.
893 if (nLang
== pImpl
->mnLangID
)
895 if (!pImpl
->mbInitializedBcp47
)
896 pImpl
->convertLocaleToBcp47();
897 ::std::pair
< MapBcp47::const_iterator
, bool > res(
898 theMapBcp47().insert( ::std::make_pair( pImpl
->maBcp47
, pImpl
)));
901 SAL_INFO( "i18nlangtag", "LanguageTag::registerImpl: cross-inserted '" << pImpl
->maBcp47
<< "' for 0x" << ::std::hex
<< mnLangID
);
905 SAL_INFO( "i18nlangtag", "LanguageTag::registerImpl: not cross-inserted '" << pImpl
->maBcp47
<< "' for 0x" << ::std::hex
<< mnLangID
<< " have 0x"
906 << ::std::hex
<< (*res
.first
).second
->mnLangID
);
911 if (!pImpl
->mbInitializedBcp47
)
912 pImpl
->convertLocaleToBcp47();
913 SAL_INFO( "i18nlangtag", "LanguageTag::registerImpl: not cross-inserted '" << pImpl
->maBcp47
<< "' for 0x" << ::std::hex
<< mnLangID
<< " round-trip to 0x" << ::std::hex
<< nLang
);
917 else if (!maBcp47
.isEmpty())
919 MapBcp47
& rMap
= theMapBcp47();
920 MapBcp47::const_iterator
it( rMap
.find( maBcp47
));
921 if (it
!= rMap
.end())
923 SAL_INFO( "i18nlangtag", "LanguageTag::registerImpl: found impl for '" << maBcp47
<< "'");
924 pImpl
= (*it
).second
;
928 SAL_INFO( "i18nlangtag", "LanguageTag::registerImpl: new impl for '" << maBcp47
<< "'");
929 pImpl
= std::make_shared
<LanguageTagImpl
>( *this);
930 ::std::pair
< MapBcp47::iterator
, bool > insOrig( rMap
.insert( ::std::make_pair( maBcp47
, pImpl
)));
931 // If changed after canonicalize() also add the resulting tag to
933 if (pImpl
->synCanonicalize())
935 SAL_INFO( "i18nlangtag", "LanguageTag::registerImpl: canonicalized to '" << pImpl
->maBcp47
<< "'");
936 ::std::pair
< MapBcp47::const_iterator
, bool > insCanon(
937 rMap
.insert( ::std::make_pair( pImpl
->maBcp47
, pImpl
)));
938 SAL_INFO( "i18nlangtag", "LanguageTag::registerImpl: " << (insCanon
.second
? "" : "not ")
939 << "inserted '" << pImpl
->maBcp47
<< "'");
940 // If the canonicalized tag already existed (was not inserted)
941 // and impls are different, make this impl that impl and skip
942 // the rest if that LangID is present as well. The existing
943 // entry may or may not be different, it may even be strictly
944 // identical to this if it differs only in case (e.g. ko-kr =>
945 // ko-KR) which was corrected in canonicalize() hence also in
946 // the map entry but comparison is case insensitive and found
948 if (!insCanon
.second
&& (*insCanon
.first
).second
!= pImpl
)
950 (*insOrig
.first
).second
= pImpl
= (*insCanon
.first
).second
;
951 SAL_INFO( "i18nlangtag", "LanguageTag::registerImpl: share impl with 0x"
952 << ::std::hex
<< pImpl
->mnLangID
);
955 if (!pImpl
->mbInitializedLangID
)
957 // Try round-trip Bcp47->Locale->LangID->Locale->Bcp47.
958 if (!pImpl
->mbInitializedLocale
)
959 pImpl
->convertBcp47ToLocale();
960 if (!pImpl
->mbInitializedLangID
)
961 pImpl
->convertLocaleToLang( true);
962 // Unconditionally insert (round-trip is possible) for
963 // on-the-fly IDs and (generated or not) suggested IDs.
964 bool bInsert
= lcl_isKnownOnTheFlyID( pImpl
->mnLangID
);
968 if (pImpl
->mnLangID
!= LANGUAGE_DONTKNOW
)
970 // May have involved canonicalize(), so compare with
971 // pImpl->maBcp47 instead of maBcp47!
972 aBcp47
= LanguageTagImpl::convertToBcp47(
973 MsLangId::Conversion::convertLanguageToLocale( pImpl
->mnLangID
, true));
974 bInsert
= (aBcp47
== pImpl
->maBcp47
);
977 // If round-trip is identical cross-insert to Bcp47 map.
980 ::std::pair
< MapLangID::const_iterator
, bool > res(
981 theMapLangID().insert( ::std::make_pair( pImpl
->mnLangID
, pImpl
)));
984 SAL_INFO( "i18nlangtag", "LanguageTag::registerImpl: cross-inserted 0x"
985 << ::std::hex
<< pImpl
->mnLangID
<< " for '" << maBcp47
<< "'");
989 SAL_INFO( "i18nlangtag", "LanguageTag::registerImpl: not cross-inserted 0x"
990 << ::std::hex
<< pImpl
->mnLangID
<< " for '" << maBcp47
<< "' have '"
991 << (*res
.first
).second
->maBcp47
<< "'");
996 SAL_INFO( "i18nlangtag", "LanguageTag::registerImpl: not cross-inserted 0x"
997 << ::std::hex
<< pImpl
->mnLangID
<< " for '" << maBcp47
<< "' round-trip to '"
1005 SAL_WARN( "i18nlangtag", "LanguageTag::registerImpl: can't register for 0x" << ::std::hex
<< mnLangID
);
1006 pImpl
= std::make_shared
<LanguageTagImpl
>( *this);
1009 // If we reach here for mbSystemLocale we didn't have theSystemLocale
1010 // above, so add it.
1011 if (mbSystemLocale
&& mbInitializedLangID
)
1013 theSystemLocale() = pImpl
;
1014 SAL_INFO( "i18nlangtag", "LanguageTag::registerImpl: added system locale 0x"
1015 << ::std::hex
<< pImpl
->mnLangID
<< " '" << pImpl
->maBcp47
<< "'");
1022 LanguageTagImpl
const * LanguageTag::getImpl() const
1026 mpImpl
= registerImpl();
1027 syncVarsFromRawImpl();
1029 return mpImpl
.get();
1033 LanguageTagImpl
* LanguageTag::getImpl()
1037 mpImpl
= registerImpl();
1038 syncVarsFromRawImpl();
1040 return mpImpl
.get();
1044 void LanguageTag::resetVars()
1047 maLocale
= lang::Locale();
1049 mnLangID
= LANGUAGE_SYSTEM
;
1050 mbSystemLocale
= true;
1051 mbInitializedBcp47
= false;
1052 mbInitializedLocale
= false;
1053 mbInitializedLangID
= false;
1054 mbIsFallback
= false;
1058 LanguageTag
& LanguageTag::reset( const OUString
& rBcp47LanguageTag
)
1061 maBcp47
= rBcp47LanguageTag
;
1062 mbSystemLocale
= rBcp47LanguageTag
.isEmpty();
1063 mbInitializedBcp47
= !mbSystemLocale
;
1069 LanguageTag
& LanguageTag::reset( const css::lang::Locale
& rLocale
)
1073 mbSystemLocale
= rLocale
.Language
.isEmpty();
1074 mbInitializedLocale
= !mbSystemLocale
;
1075 handleVendorVariant( maLocale
);
1080 LanguageTag
& LanguageTag::reset( LanguageType nLanguage
)
1083 mnLangID
= nLanguage
;
1084 mbSystemLocale
= nLanguage
== LANGUAGE_SYSTEM
;
1085 mbInitializedLangID
= !mbSystemLocale
;
1090 bool LanguageTagImpl::canonicalize()
1097 explicit dumper( lt_tag_t
** pp
) : mpp( *pp
? NULL
: pp
) {}
1098 ~dumper() { if (mpp
&& *mpp
) lt_tag_dump( *mpp
); }
1100 dumper
aDumper( &mpImplLangtag
);
1103 bool bChanged
= false;
1105 // Side effect: have maBcp47 in any case, resolved system.
1106 // Some methods calling canonicalize() (or not calling it due to
1107 // meIsLiblangtagNeeded==DECISION_NO) rely on this! Hence do not set
1108 // meIsLiblangtagNeeded anywhere else than hereafter.
1111 // The simple cases and known locales don't need liblangtag processing,
1112 // which also avoids loading liblangtag data on startup.
1113 if (meIsLiblangtagNeeded
== DECISION_DONTKNOW
)
1115 bool bTemporaryLocale
= false;
1116 bool bTemporaryLangID
= false;
1117 if (!mbInitializedLocale
&& !mbInitializedLangID
)
1121 mnLangID
= MsLangId::getRealLanguage( LANGUAGE_SYSTEM
);
1122 mbInitializedLangID
= true;
1126 // Now this is getting funny... we only have some BCP47 string
1127 // and want to determine if parsing it would be possible
1128 // without using liblangtag just to see if it is a simple known
1129 // locale or could fall back to one.
1130 OUString aLanguage
, aScript
, aCountry
, aRegion
, aVariants
;
1131 Extraction eExt
= simpleExtract( maBcp47
, aLanguage
, aScript
, aCountry
, aRegion
, aVariants
);
1132 if (eExt
!= EXTRACTED_NONE
)
1134 if (eExt
== EXTRACTED_LSC
|| eExt
== EXTRACTED_LV
|| eExt
== EXTRACTED_LR
)
1136 // Rebuild bcp47 with proper casing of tags.
1137 OUStringBuffer
aBuf( aLanguage
.getLength() + 1 + aScript
.getLength() +
1138 1 + aCountry
.getLength() + 1 + aRegion
.getLength() + 1 + aVariants
.getLength());
1139 aBuf
.append( aLanguage
);
1140 if (!aScript
.isEmpty())
1141 aBuf
.append("-" + aScript
);
1142 if (!aCountry
.isEmpty())
1143 aBuf
.append("-" + aCountry
);
1144 if (!aRegion
.isEmpty())
1145 aBuf
.append("-" + aRegion
);
1146 if (!aVariants
.isEmpty())
1147 aBuf
.append("-" + aVariants
);
1148 OUString
aStr( aBuf
.makeStringAndClear());
1150 if (maBcp47
!= aStr
)
1156 if (eExt
== EXTRACTED_LSC
&& aScript
.isEmpty())
1158 maLocale
.Language
= aLanguage
;
1159 maLocale
.Country
= aCountry
;
1161 else if (eExt
== EXTRACTED_C_LOCALE
)
1163 maLocale
.Language
= aLanguage
;
1164 maLocale
.Country
= aCountry
;
1168 maLocale
.Language
= I18NLANGTAG_QLT
;
1169 maLocale
.Country
= aCountry
;
1170 maLocale
.Variant
= maBcp47
;
1172 bTemporaryLocale
= mbInitializedLocale
= true;
1176 if (mbInitializedLangID
&& !mbInitializedLocale
)
1178 // Do not call getLocale() here because that prefers
1179 // convertBcp47ToLocale() which would end up in recursion via
1182 // Prepare to verify that we have a known locale, not just an
1183 // arbitrary MS-LangID.
1184 convertLangToLocale();
1186 if (mbInitializedLocale
)
1188 if (!mbInitializedLangID
)
1190 if (convertLocaleToLang( false))
1192 if (bTemporaryLocale
|| mnLangID
== LANGUAGE_DONTKNOW
)
1193 bTemporaryLangID
= true;
1195 if (mnLangID
!= LANGUAGE_DONTKNOW
&& mnLangID
!= LANGUAGE_SYSTEM
)
1196 meIsLiblangtagNeeded
= DECISION_NO
; // known locale
1199 const KnownTagSet
& rKnowns
= getKnowns();
1200 if (rKnowns
.find( maBcp47
) != rKnowns
.end())
1201 meIsLiblangtagNeeded
= DECISION_NO
; // known fallback
1203 // We may have an internal override "canonicalization".
1204 lang::Locale
aNew( MsLangId::Conversion::getOverride( maLocale
));
1205 if (!aNew
.Language
.isEmpty() &&
1206 (aNew
.Language
!= maLocale
.Language
||
1207 aNew
.Country
!= maLocale
.Country
||
1208 aNew
.Variant
!= maLocale
.Variant
))
1210 maBcp47
= LanguageTagImpl::convertToBcp47( aNew
);
1212 meIsIsoLocale
= DECISION_DONTKNOW
;
1213 meIsIsoODF
= DECISION_DONTKNOW
;
1214 meIsLiblangtagNeeded
= DECISION_NO
; // known locale
1217 if (bTemporaryLocale
)
1219 mbInitializedLocale
= false;
1220 maLocale
= lang::Locale();
1222 if (bTemporaryLangID
)
1224 mbInitializedLangID
= false;
1225 mnLangID
= LANGUAGE_DONTKNOW
;
1228 if (meIsLiblangtagNeeded
== DECISION_NO
)
1230 meIsValid
= DECISION_YES
; // really, known must be valid ...
1231 return bChanged
; // that's it
1234 meIsLiblangtagNeeded
= DECISION_YES
;
1235 SAL_INFO( "i18nlangtag", "LanguageTagImpl::canonicalize: using liblangtag for '" << maBcp47
<< "'");
1239 theDataRef().init();
1240 mpImplLangtag
= lt_tag_new();
1245 if (!lt_tag_parse_disabled
&& lt_tag_parse(mpImplLangtag
, OUStringToOString(maBcp47
, RTL_TEXTENCODING_UTF8
).getStr(), &aError
.p
))
1249 SAL_WARN("i18nlangtag", "LanguageTagImpl::canonicalize: could not parse '" << maBcp47
<< "'");
1253 char* pTag
= lt_tag_canonicalize(mpImplLangtag
, &aError
.p
);
1254 SAL_WARN_IF(!pTag
, "i18nlangtag", "LanguageTagImpl::canonicalize: could not canonicalize '" << maBcp47
<< "'");
1257 OUString
aNew(OUString::createFromAscii(pTag
));
1258 // Make the lt_tag_t follow the new string if different, which
1259 // removes default script and such.
1260 if (maBcp47
!= aNew
)
1264 meIsIsoLocale
= DECISION_DONTKNOW
;
1265 meIsIsoODF
= DECISION_DONTKNOW
;
1266 if (!lt_tag_parse(mpImplLangtag
, pTag
, &aError
.p
))
1268 SAL_WARN("i18nlangtag", "LanguageTagImpl::canonicalize: could not reparse '"
1271 meIsValid
= DECISION_NO
;
1276 meIsValid
= DECISION_YES
;
1283 SAL_INFO( "i18nlangtag", "LanguageTagImpl::canonicalize: could not parse '" << maBcp47
<< "'");
1285 meIsValid
= DECISION_NO
;
1290 bool LanguageTagImpl::synCanonicalize()
1292 bool bChanged
= false;
1293 if (meIsLiblangtagNeeded
!= DECISION_NO
&& !mpImplLangtag
)
1295 bChanged
= canonicalize();
1298 if (mbInitializedLocale
)
1299 convertBcp47ToLocale();
1300 if (mbInitializedLangID
)
1301 convertBcp47ToLang();
1308 void LanguageTag::syncFromImpl() const
1310 const LanguageTagImpl
* pImpl
= getImpl();
1311 bool bRegister
= ((mbInitializedBcp47
&& maBcp47
!= pImpl
->maBcp47
) ||
1312 (mbInitializedLangID
&& mnLangID
!= pImpl
->mnLangID
));
1313 SAL_INFO_IF( bRegister
, "i18nlangtag",
1314 "LanguageTag::syncFromImpl: re-registering, '" << pImpl
->maBcp47
<< "' vs '" << maBcp47
<<
1315 " and 0x" << ::std::hex
<< pImpl
->mnLangID
<< " vs 0x" << ::std::hex
<< mnLangID
);
1316 syncVarsFromRawImpl();
1318 mpImpl
= registerImpl();
1322 void LanguageTag::syncVarsFromImpl() const
1325 getImpl(); // with side effect syncVarsFromRawImpl()
1327 syncVarsFromRawImpl();
1331 void LanguageTag::syncVarsFromRawImpl() const
1333 // Do not use getImpl() here.
1334 LanguageTagImpl
* pImpl
= mpImpl
.get();
1338 // Obviously only mutable variables.
1339 mbInitializedBcp47
= pImpl
->mbInitializedBcp47
;
1340 maBcp47
= pImpl
->maBcp47
;
1341 mbInitializedLocale
= pImpl
->mbInitializedLocale
;
1342 maLocale
= pImpl
->maLocale
;
1343 mbInitializedLangID
= pImpl
->mbInitializedLangID
;
1344 mnLangID
= pImpl
->mnLangID
;
1348 bool LanguageTag::synCanonicalize()
1350 bool bChanged
= getImpl()->synCanonicalize();
1357 void LanguageTagImpl::convertLocaleToBcp47() const
1359 if (mbSystemLocale
&& !mbInitializedLocale
)
1360 convertLangToLocale();
1362 if (maLocale
.Language
.isEmpty())
1364 // Do not call LanguageTag::convertToBcp47(Locale) that for an empty
1365 // locale via LanguageTag::convertToBcp47(LanguageType) and
1366 // LanguageTag::convertToLocale(LanguageType) would instantiate another
1368 maLocale
= MsLangId::Conversion::convertLanguageToLocale( LANGUAGE_SYSTEM
, false);
1370 if (maLocale
.Language
.isEmpty())
1372 maBcp47
.clear(); // bad luck
1374 else if (maLocale
.Language
== I18NLANGTAG_QLT
)
1376 maBcp47
= maLocale
.Variant
;
1377 meIsIsoLocale
= DECISION_NO
;
1381 maBcp47
= LanguageTag::convertToBcp47( maLocale
);
1383 mbInitializedBcp47
= true;
1387 bool LanguageTagImpl::convertLocaleToLang( bool bAllowOnTheFlyID
)
1389 bool bRemapped
= false;
1392 mnLangID
= MsLangId::getRealLanguage( LANGUAGE_SYSTEM
);
1396 mnLangID
= MsLangId::Conversion::convertLocaleToLanguage( maLocale
);
1397 if (mnLangID
== LANGUAGE_DONTKNOW
)
1399 // convertLocaleToLanguage() only searches in ISO and private
1400 // definitions, search in remaining definitions, i.e. for the "C"
1401 // locale and non-standard things like "sr-latin" or "german" to
1402 // resolve to a known locale, skipping ISO lll-CC that were already
1404 mnLangID
= MsLangId::Conversion::convertIsoNamesToLanguage( maLocale
.Language
, maLocale
.Country
, true);
1405 if (mnLangID
!= LANGUAGE_DONTKNOW
)
1407 // If one found, convert back and adapt Locale and Bcp47
1408 // strings so we have a matching entry.
1409 OUString
aOrgBcp47( maBcp47
);
1410 convertLangToLocale();
1411 convertLocaleToBcp47();
1412 bRemapped
= (maBcp47
!= aOrgBcp47
);
1415 if (mnLangID
== LANGUAGE_DONTKNOW
&& bAllowOnTheFlyID
)
1419 // For language-only (including script) look if we know some
1420 // locale of that language and if so try to use the primary
1421 // language ID of that instead of generating an on-the-fly ID.
1422 if (getCountry().isEmpty() && isIsoODF())
1424 lang::Locale
aLoc( MsLangId::Conversion::lookupFallbackLocale( maLocale
));
1425 // 'en-US' is last resort, do not use except when looking
1427 if (aLoc
.Language
!= "en" || getLanguage() == "en")
1429 mnLangID
= MsLangId::Conversion::convertLocaleToLanguage( aLoc
);
1430 if (mnLangID
!= LANGUAGE_DONTKNOW
)
1431 mnLangID
= MsLangId::getPrimaryLanguage( mnLangID
);
1434 registerOnTheFly( mnLangID
);
1438 SAL_WARN( "i18nlangtag", "LanguageTagImpl::convertLocaleToLang: with bAllowOnTheFlyID invalid '"
1443 mbInitializedLangID
= true;
1448 void LanguageTag::convertLocaleToLang()
1450 getImpl()->convertLocaleToLang( true);
1455 void LanguageTagImpl::convertBcp47ToLocale()
1457 bool bIso
= isIsoLocale();
1460 maLocale
.Language
= getLanguageFromLangtag();
1461 maLocale
.Country
= getRegionFromLangtag();
1462 maLocale
.Variant
.clear();
1466 maLocale
.Language
= I18NLANGTAG_QLT
;
1467 maLocale
.Country
= getCountry();
1468 maLocale
.Variant
= maBcp47
;
1470 mbInitializedLocale
= true;
1474 void LanguageTag::convertBcp47ToLocale()
1476 getImpl()->convertBcp47ToLocale();
1481 void LanguageTagImpl::convertBcp47ToLang()
1485 mnLangID
= MsLangId::getRealLanguage( LANGUAGE_SYSTEM
);
1489 if (!mbInitializedLocale
)
1490 convertBcp47ToLocale();
1491 convertLocaleToLang( true);
1493 mbInitializedLangID
= true;
1497 void LanguageTag::convertBcp47ToLang()
1499 getImpl()->convertBcp47ToLang();
1504 void LanguageTagImpl::convertLangToLocale() const
1506 if (mbSystemLocale
&& !mbInitializedLangID
)
1508 mnLangID
= MsLangId::getRealLanguage( LANGUAGE_SYSTEM
);
1509 mbInitializedLangID
= true;
1511 // Resolve system here! The original is remembered as mbSystemLocale.
1512 maLocale
= MsLangId::Conversion::convertLanguageToLocale( mnLangID
, false);
1513 mbInitializedLocale
= true;
1517 void LanguageTag::convertLangToLocale() const
1519 getImpl()->convertLangToLocale();
1524 void LanguageTagImpl::convertLangToBcp47() const
1526 if (!mbInitializedLocale
)
1527 convertLangToLocale();
1528 convertLocaleToBcp47();
1529 mbInitializedBcp47
= true;
1533 void LanguageTag::convertFromRtlLocale()
1535 // The rtl_Locale follows the Open Group Base Specification,
1536 // 8.2 Internationalization Variables
1537 // language[_territory][.codeset][@modifier]
1538 // On GNU/Linux systems usually being glibc locales.
1539 // sal/osl/unx/nlsupport.c _parse_locale() parses them into
1540 // Language: language 2 or 3 alpha code
1541 // Country: [territory] 2 alpha code
1542 // Variant: [.codeset][@modifier]
1543 // Variant effectively contains anything that follows the territory, not
1544 // looking for '.' dot delimiter or '@' modifier content.
1545 if (maLocale
.Variant
.isEmpty())
1548 OString aStr
= OUStringToOString(maLocale
.Language
, RTL_TEXTENCODING_UTF8
) + "_" + OUStringToOString(Concat2View(maLocale
.Country
+ maLocale
.Variant
),
1549 RTL_TEXTENCODING_UTF8
);
1550 /* FIXME: let liblangtag parse this entirely with
1551 * lt_tag_convert_from_locale() but that needs a patch to pass the
1555 theDataRef::get().init();
1556 mpImplLangtag
= lt_tag_convert_from_locale( aStr
.getStr(), &aError
.p
);
1557 maBcp47
= OStringToOUString( lt_tag_get_string( mpImplLangtag
), RTL_TEXTENCODING_UTF8
);
1558 mbInitializedBcp47
= true;
1560 mnLangID
= MsLangId::convertUnxByteStringToLanguage( aStr
);
1561 if (mnLangID
== LANGUAGE_DONTKNOW
)
1563 SAL_WARN( "i18nlangtag", "LanguageTag(rtl_Locale) - unknown: " << aStr
);
1564 mnLangID
= LANGUAGE_ENGLISH_US
; // we need _something_ here
1566 mbInitializedLangID
= true;
1568 maLocale
= lang::Locale();
1569 mbInitializedLocale
= false;
1573 const OUString
& LanguageTagImpl::getBcp47() const
1575 if (!mbInitializedBcp47
)
1577 if (mbInitializedLocale
)
1578 convertLocaleToBcp47();
1580 convertLangToBcp47();
1586 const OUString
& LanguageTag::getBcp47( bool bResolveSystem
) const
1588 static constexpr OUString theEmptyBcp47
= u
""_ustr
;
1590 if (!bResolveSystem
&& mbSystemLocale
)
1591 return theEmptyBcp47
;
1592 if (!mbInitializedBcp47
)
1594 if (!mbInitializedBcp47
)
1596 getImpl()->getBcp47();
1603 OUString
LanguageTagImpl::getLanguageFromLangtag()
1607 if (maBcp47
.isEmpty())
1611 const lt_lang_t
* pLangT
= lt_tag_get_language( mpImplLangtag
);
1612 SAL_WARN_IF( !pLangT
, "i18nlangtag",
1613 "LanguageTag::getLanguageFromLangtag: pLangT==NULL for '" << maBcp47
<< "'");
1616 const char* pLang
= lt_lang_get_tag( pLangT
);
1617 SAL_WARN_IF( !pLang
, "i18nlangtag",
1618 "LanguageTag::getLanguageFromLangtag: pLang==NULL for '" << maBcp47
<< "'");
1620 aLanguage
= OUString::createFromAscii( pLang
);
1624 if (mbCachedLanguage
|| cacheSimpleLSCV())
1625 aLanguage
= maCachedLanguage
;
1631 OUString
LanguageTagImpl::getScriptFromLangtag()
1635 if (maBcp47
.isEmpty())
1639 const lt_script_t
* pScriptT
= lt_tag_get_script( mpImplLangtag
);
1640 // pScriptT==NULL is valid for default scripts
1643 const char* pScript
= lt_script_get_tag( pScriptT
);
1644 SAL_WARN_IF( !pScript
, "i18nlangtag", "LanguageTag::getScriptFromLangtag: pScript==NULL");
1646 aScript
= OUString::createFromAscii( pScript
);
1650 if (mbCachedScript
|| cacheSimpleLSCV())
1651 aScript
= maCachedScript
;
1657 OUString
LanguageTagImpl::getRegionFromLangtag()
1661 if (maBcp47
.isEmpty())
1665 const lt_region_t
* pRegionT
= lt_tag_get_region( mpImplLangtag
);
1666 // pRegionT==NULL is valid for language only tags, rough check here
1667 // that does not take sophisticated tags into account that actually
1668 // should have a region, check for ll, lll, ll-Ssss and lll-Ssss so
1669 // that ll-CC and lll-CC actually fail.
1670 SAL_WARN_IF( !pRegionT
&&
1671 maBcp47
.getLength() != 2 && maBcp47
.getLength() != 3 &&
1672 maBcp47
.getLength() != 7 && maBcp47
.getLength() != 8,
1673 "i18nlangtag", "LanguageTag::getRegionFromLangtag: pRegionT==NULL for '" << maBcp47
<< "'");
1676 const char* pRegion
= lt_region_get_tag( pRegionT
);
1677 SAL_WARN_IF( !pRegion
, "i18nlangtag",
1678 "LanguageTag::getRegionFromLangtag: pRegion==NULL for'" << maBcp47
<< "'");
1680 aRegion
= OUString::createFromAscii( pRegion
);
1684 if (mbCachedCountry
|| cacheSimpleLSCV())
1685 aRegion
= maCachedCountry
;
1691 OUString
LanguageTagImpl::getVariantsFromLangtag()
1693 OUStringBuffer aVariants
;
1695 if (maBcp47
.isEmpty())
1699 const lt_list_t
* pVariantsT
= lt_tag_get_variants( mpImplLangtag
);
1700 for (const lt_list_t
* pE
= pVariantsT
; pE
; pE
= lt_list_next( pE
))
1702 const lt_variant_t
* pVariantT
= static_cast<const lt_variant_t
*>(lt_list_value( pE
));
1705 const char* p
= lt_variant_get_tag( pVariantT
);
1708 if (!aVariants
.isEmpty())
1709 aVariants
.append("-");
1710 aVariants
.appendAscii(p
);
1717 if (mbCachedVariants
|| cacheSimpleLSCV())
1718 aVariants
= maCachedVariants
;
1720 return aVariants
.makeStringAndClear();
1724 const css::lang::Locale
& LanguageTag::getLocale( bool bResolveSystem
) const
1726 // "static" to be returned as const reference to an empty locale.
1727 static lang::Locale theEmptyLocale
;
1729 if (!bResolveSystem
&& mbSystemLocale
)
1730 return theEmptyLocale
;
1731 if (!mbInitializedLocale
)
1733 if (!mbInitializedLocale
)
1735 if (mbInitializedBcp47
)
1736 const_cast<LanguageTag
*>(this)->convertBcp47ToLocale();
1738 convertLangToLocale();
1744 LanguageType
LanguageTag::getLanguageType( bool bResolveSystem
) const
1746 if (!bResolveSystem
&& mbSystemLocale
)
1747 return LANGUAGE_SYSTEM
;
1748 if (!mbInitializedLangID
)
1750 if (!mbInitializedLangID
)
1752 if (mbInitializedBcp47
)
1753 const_cast<LanguageTag
*>(this)->convertBcp47ToLang();
1756 const_cast<LanguageTag
*>(this)->convertLocaleToLang();
1758 /* Resolve a locale only unknown due to some redundant information,
1759 * like 'de-Latn-DE' with script tag. Never call canonicalize()
1760 * from within convert...() methods due to possible recursion, so
1762 if ((!mbSystemLocale
&& mnLangID
== LANGUAGE_SYSTEM
) || mnLangID
== LANGUAGE_DONTKNOW
)
1763 const_cast<LanguageTag
*>(this)->synCanonicalize();
1770 void LanguageTag::getIsoLanguageScriptCountry( OUString
& rLanguage
, OUString
& rScript
, OUString
& rCountry
) const
1772 // Calling isIsoODF() first is a predicate for getLanguage(), getScript()
1773 // and getCountry() to work correctly in this context.
1776 rLanguage
= getLanguage();
1777 rScript
= getScript();
1778 rCountry
= getCountry();
1782 rLanguage
= (LanguageTag::isIsoLanguage( getLanguage()) ? getLanguage() : OUString());
1783 rScript
= (LanguageTag::isIsoScript( getScript()) ? getScript() : OUString());
1784 rCountry
= (LanguageTag::isIsoCountry( getCountry()) ? getCountry() : OUString());
1792 bool isLowerAscii( sal_Unicode c
)
1794 return 'a' <= c
&& c
<= 'z';
1797 bool isUpperAscii( sal_Unicode c
)
1799 return 'A' <= c
&& c
<= 'Z';
1806 bool LanguageTag::isIsoLanguage( const OUString
& rLanguage
)
1808 /* TODO: ignore case? For now let's see where rubbish is used. */
1809 bool b2chars
= rLanguage
.getLength() == 2;
1810 if ((b2chars
|| rLanguage
.getLength() == 3) &&
1811 isLowerAscii( rLanguage
[0]) && isLowerAscii( rLanguage
[1]) &&
1812 (b2chars
|| isLowerAscii( rLanguage
[2])))
1814 SAL_WARN_IF( ((rLanguage
.getLength() == 2 || rLanguage
.getLength() == 3) &&
1815 (isUpperAscii( rLanguage
[0]) || isUpperAscii( rLanguage
[1]))) ||
1816 (rLanguage
.getLength() == 3 && isUpperAscii( rLanguage
[2])), "i18nlangtag",
1817 "LanguageTag::isIsoLanguage: rejecting upper case " << rLanguage
);
1823 bool LanguageTag::isIsoCountry( const OUString
& rRegion
)
1825 /* TODO: ignore case? For now let's see where rubbish is used. */
1826 if (rRegion
.isEmpty() ||
1827 (rRegion
.getLength() == 2 && isUpperAscii( rRegion
[0]) && isUpperAscii( rRegion
[1])))
1829 SAL_WARN_IF( rRegion
.getLength() == 2 && (isLowerAscii( rRegion
[0]) || isLowerAscii( rRegion
[1])),
1830 "i18nlangtag", "LanguageTag::isIsoCountry: rejecting lower case " << rRegion
);
1836 bool LanguageTag::isIsoScript( const OUString
& rScript
)
1838 /* TODO: ignore case? For now let's see where rubbish is used. */
1839 if (rScript
.isEmpty() ||
1840 (rScript
.getLength() == 4 &&
1841 isUpperAscii( rScript
[0]) && isLowerAscii( rScript
[1]) &&
1842 isLowerAscii( rScript
[2]) && isLowerAscii( rScript
[3])))
1844 SAL_WARN_IF( rScript
.getLength() == 4 &&
1845 (isLowerAscii( rScript
[0]) || isUpperAscii( rScript
[1]) ||
1846 isUpperAscii( rScript
[2]) || isUpperAscii( rScript
[3])),
1847 "i18nlangtag", "LanguageTag::isIsoScript: rejecting case mismatch " << rScript
);
1852 OUString
const & LanguageTagImpl::getLanguage() const
1854 if (!mbCachedLanguage
)
1856 maCachedLanguage
= const_cast<LanguageTagImpl
*>(this)->getLanguageFromLangtag();
1857 mbCachedLanguage
= true;
1859 return maCachedLanguage
;
1863 OUString
LanguageTag::getLanguage() const
1865 LanguageTagImpl
const* pImpl
= getImpl();
1866 if (pImpl
->mbCachedLanguage
)
1867 return pImpl
->maCachedLanguage
;
1868 OUString
aRet( pImpl
->getLanguage());
1874 OUString
const & LanguageTagImpl::getScript() const
1876 if (!mbCachedScript
)
1878 maCachedScript
= const_cast<LanguageTagImpl
*>(this)->getScriptFromLangtag();
1879 mbCachedScript
= true;
1881 return maCachedScript
;
1885 OUString
LanguageTag::getScript() const
1887 LanguageTagImpl
const* pImpl
= getImpl();
1888 if (pImpl
->mbCachedScript
)
1889 return pImpl
->maCachedScript
;
1890 OUString
aRet( pImpl
->getScript());
1896 OUString
LanguageTag::getLanguageAndScript() const
1898 OUString
aLanguageScript( getLanguage());
1899 OUString
aScript( getScript());
1900 if (!aScript
.isEmpty())
1902 aLanguageScript
+= "-" + aScript
;
1904 return aLanguageScript
;
1908 OUString
const & LanguageTagImpl::getCountry() const
1910 if (!mbCachedCountry
)
1912 maCachedCountry
= const_cast<LanguageTagImpl
*>(this)->getRegionFromLangtag();
1913 if (!LanguageTag::isIsoCountry( maCachedCountry
))
1914 maCachedCountry
.clear();
1915 mbCachedCountry
= true;
1917 return maCachedCountry
;
1921 OUString
LanguageTag::getCountry() const
1923 LanguageTagImpl
const* pImpl
= getImpl();
1924 if (pImpl
->mbCachedCountry
)
1925 return pImpl
->maCachedCountry
;
1926 OUString
aRet( pImpl
->getCountry());
1932 OUString
LanguageTagImpl::getRegion() const
1934 return const_cast<LanguageTagImpl
*>(this)->getRegionFromLangtag();
1938 OUString
const & LanguageTagImpl::getVariants() const
1940 if (!mbCachedVariants
)
1942 maCachedVariants
= const_cast<LanguageTagImpl
*>(this)->getVariantsFromLangtag();
1943 mbCachedVariants
= true;
1945 return maCachedVariants
;
1949 OUString
LanguageTag::getVariants() const
1951 LanguageTagImpl
const * pImpl
= getImpl();
1952 if (pImpl
->mbCachedVariants
)
1953 return pImpl
->maCachedVariants
;
1954 OUString
aRet( pImpl
->getVariants());
1959 OUString
const & LanguageTagImpl::getGlibcLocaleString() const
1961 if (mbCachedGlibcString
)
1962 return maCachedGlibcString
;
1966 meIsLiblangtagNeeded
= DECISION_YES
;
1967 const_cast<LanguageTagImpl
*>(this)->synCanonicalize();
1971 char* pLang
= lt_tag_convert_to_locale(mpImplLangtag
, nullptr);
1974 maCachedGlibcString
= OUString::createFromAscii( pLang
);
1975 mbCachedGlibcString
= true;
1979 return maCachedGlibcString
;
1982 OUString
LanguageTag::getGlibcLocaleString( std::u16string_view rEncoding
) const
1987 OUString
aCountry( getCountry());
1988 if (aCountry
.isEmpty())
1989 aRet
= getLanguage() + rEncoding
;
1991 aRet
= getLanguage() + "_" + aCountry
+ rEncoding
;
1995 aRet
= getImpl()->getGlibcLocaleString();
1996 sal_Int32 nAt
= aRet
.indexOf('@');
1998 aRet
= OUString::Concat(aRet
.subView(0, nAt
)) + rEncoding
+ aRet
.subView(nAt
);
2005 bool LanguageTagImpl::hasScript() const
2007 if (!mbCachedScript
)
2009 return !maCachedScript
.isEmpty();
2013 bool LanguageTag::hasScript() const
2015 bool bRet
= getImpl()->hasScript();
2021 LanguageTag::ScriptType
LanguageTagImpl::getScriptType() const
2023 return meScriptType
;
2027 LanguageTag::ScriptType
LanguageTag::getScriptType() const
2029 return getImpl()->getScriptType();
2033 void LanguageTagImpl::setScriptType(LanguageTag::ScriptType st
)
2035 if (meScriptType
== LanguageTag::ScriptType::UNKNOWN
) // poor man's clash resolution
2040 void LanguageTag::setScriptType(LanguageTag::ScriptType st
)
2042 getImpl()->setScriptType(st
);
2046 bool LanguageTagImpl::cacheSimpleLSCV()
2048 OUString aLanguage
, aScript
, aCountry
, aRegion
, aVariants
;
2049 Extraction eExt
= simpleExtract( maBcp47
, aLanguage
, aScript
, aCountry
, aRegion
, aVariants
);
2050 bool bRet
= (eExt
== EXTRACTED_LSC
|| eExt
== EXTRACTED_LV
|| eExt
== EXTRACTED_LR
);
2053 maCachedLanguage
= aLanguage
;
2054 maCachedScript
= aScript
;
2055 maCachedCountry
= aCountry
;
2056 maCachedVariants
= aVariants
;
2057 mbCachedLanguage
= mbCachedScript
= mbCachedCountry
= mbCachedVariants
= true;
2063 bool LanguageTagImpl::isIsoLocale() const
2065 if (meIsIsoLocale
== DECISION_DONTKNOW
)
2067 const_cast<LanguageTagImpl
*>(this)->synCanonicalize();
2068 // It must be at most ll-CC or lll-CC
2069 // Do not use getCountry() here, use getRegion() instead.
2070 meIsIsoLocale
= ((maBcp47
.isEmpty() ||
2071 (maBcp47
.getLength() <= 6 && LanguageTag::isIsoLanguage( getLanguage()) &&
2072 LanguageTag::isIsoCountry( getRegion()))) ? DECISION_YES
: DECISION_NO
);
2074 return meIsIsoLocale
== DECISION_YES
;
2078 bool LanguageTag::isIsoLocale() const
2080 bool bRet
= getImpl()->isIsoLocale();
2086 bool LanguageTagImpl::isIsoODF() const
2088 if (meIsIsoODF
== DECISION_DONTKNOW
)
2090 const_cast<LanguageTagImpl
*>(this)->synCanonicalize();
2091 if (!LanguageTag::isIsoScript( getScript()))
2093 meIsIsoODF
= DECISION_NO
;
2096 // The usual case is lll-CC so simply check that first.
2099 meIsIsoODF
= DECISION_YES
;
2102 // If this is not ISO locale for which script must not exist it can
2103 // still be ISO locale plus ISO script lll-Ssss-CC, but not ll-vvvv ...
2105 meIsIsoODF
= ((maBcp47
.getLength() <= 11 && LanguageTag::isIsoLanguage( getLanguage()) &&
2106 LanguageTag::isIsoCountry( getRegion()) && LanguageTag::isIsoScript( getScript()) &&
2107 getVariants().isEmpty()) ? DECISION_YES
: DECISION_NO
);
2109 return meIsIsoODF
== DECISION_YES
;
2113 bool LanguageTag::isIsoODF() const
2115 bool bRet
= getImpl()->isIsoODF();
2121 bool LanguageTagImpl::isValidBcp47() const
2123 if (meIsValid
== DECISION_DONTKNOW
)
2125 const_cast<LanguageTagImpl
*>(this)->synCanonicalize();
2126 SAL_WARN_IF( meIsValid
== DECISION_DONTKNOW
, "i18nlangtag",
2127 "LanguageTag::isValidBcp47: canonicalize() didn't set meIsValid");
2129 return meIsValid
== DECISION_YES
;
2133 bool LanguageTag::isValidBcp47() const
2135 bool bRet
= getImpl()->isValidBcp47();
2141 LanguageTag
& LanguageTag::makeFallback()
2145 const lang::Locale
& rLocale1
= getLocale();
2146 lang::Locale
aLocale2( MsLangId::Conversion::lookupFallbackLocale( rLocale1
));
2147 if ( rLocale1
.Language
!= aLocale2
.Language
||
2148 rLocale1
.Country
!= aLocale2
.Country
||
2149 rLocale1
.Variant
!= aLocale2
.Variant
)
2151 if (rLocale1
.Language
!= "en" && aLocale2
.Language
== "en" && aLocale2
.Country
== "US")
2153 // "en-US" is the last resort fallback, try if we get a better
2154 // one for the fallback hierarchy of a non-"en" locale.
2155 ::std::vector
< OUString
> aFallbacks( getFallbackStrings( false));
2156 for (auto const& fallback
: aFallbacks
)
2158 lang::Locale
aLocale3( LanguageTag(fallback
).getLocale());
2159 aLocale2
= MsLangId::Conversion::lookupFallbackLocale( aLocale3
);
2160 if (aLocale2
.Language
!= "en" || aLocale2
.Country
!= "US")
2161 break; // for, success
2164 SAL_INFO( "i18nlangtag", "LanguageTag::makeFallback - for (" <<
2165 rLocale1
.Language
<< "," << rLocale1
.Country
<< "," << rLocale1
.Variant
<< ") to (" <<
2166 aLocale2
.Language
<< "," << aLocale2
.Country
<< "," << aLocale2
.Variant
<< ")");
2169 mbIsFallback
= true;
2175 /* TODO: maybe this now could take advantage of the mnOverride field in
2176 * isolang.cxx entries and search for kSAME instead of hardcoded special
2177 * fallbacks. Though iterating through those tables would be slower and even
2178 * then there would be some special cases, but we wouldn't lack entries that
2179 * were missed out. */
2180 ::std::vector
< OUString
> LanguageTag::getFallbackStrings( bool bIncludeFullBcp47
) const
2182 ::std::vector
< OUString
> aVec
;
2183 OUString
aLanguage( getLanguage());
2184 OUString
aCountry( getCountry());
2187 if (!aCountry
.isEmpty())
2189 if (bIncludeFullBcp47
)
2190 aVec
.emplace_back(aLanguage
+ "-" + aCountry
);
2191 if (aLanguage
== "zh")
2193 // For zh-HK or zh-MO also list zh-TW to get zh-Hant, for all
2194 // other zh-XX also list zh-CN to get zh-Hans; both of which we
2195 // use the legacy forms instead of the more correct script
2196 // tags that unfortunately most pieces don't understand.
2197 if (aCountry
== "HK" || aCountry
== "MO")
2198 aVec
.emplace_back(aLanguage
+ "-TW");
2199 else if (aCountry
!= "CN")
2200 aVec
.emplace_back(aLanguage
+ "-CN");
2201 aVec
.push_back( aLanguage
);
2203 else if (aLanguage
== "sh")
2205 // Manual list instead of calling
2206 // LanguageTag( "sr-Latn-" + aCountry).getFallbackStrings( true)
2207 // that would also include "sh-*" again.
2208 aVec
.emplace_back("sr-Latn-" + aCountry
);
2209 aVec
.emplace_back("sr-Latn");
2210 aVec
.emplace_back("sh"); // legacy with script, before default script with country
2211 aVec
.emplace_back("sr-" + aCountry
);
2212 aVec
.emplace_back("sr");
2214 else if (aLanguage
== "ca" && aCountry
== "XV")
2216 ::std::vector
< OUString
> aRep( LanguageTag( u
"ca-ES-valencia"_ustr
).getFallbackStrings( true));
2217 aVec
.insert( aVec
.end(), aRep
.begin(), aRep
.end());
2218 // Already includes 'ca' language fallback.
2220 else if (aLanguage
== "ku")
2222 if (aCountry
== "TR" || aCountry
== "SY")
2224 aVec
.emplace_back("kmr-Latn-" + aCountry
);
2225 aVec
.emplace_back("kmr-" + aCountry
);
2226 aVec
.emplace_back("kmr-Latn");
2227 aVec
.emplace_back("kmr");
2228 aVec
.push_back( aLanguage
);
2230 else if (aCountry
== "IQ" || aCountry
== "IR")
2232 aVec
.emplace_back("ckb-" + aCountry
);
2233 aVec
.emplace_back("ckb");
2236 else if (aLanguage
== "kmr" && (aCountry
== "TR" || aCountry
== "SY"))
2238 aVec
.emplace_back("ku-Latn-" + aCountry
);
2239 aVec
.emplace_back("ku-" + aCountry
);
2240 aVec
.push_back( aLanguage
);
2241 aVec
.emplace_back("ku");
2243 else if (aLanguage
== "ckb" && (aCountry
== "IQ" || aCountry
== "IR"))
2245 aVec
.emplace_back("ku-Arab-" + aCountry
);
2246 aVec
.emplace_back("ku-" + aCountry
);
2247 aVec
.push_back( aLanguage
);
2248 // not 'ku' only, that was used for Latin script
2251 aVec
.push_back( aLanguage
);
2255 if (bIncludeFullBcp47
)
2256 aVec
.push_back( aLanguage
);
2257 if (aLanguage
== "sh")
2259 aVec
.emplace_back("sr-Latn");
2260 aVec
.emplace_back("sr");
2262 else if (aLanguage
== "pli")
2264 // a special case for Pali dictionary, see fdo#41599
2265 aVec
.emplace_back("pi-Latn");
2266 aVec
.emplace_back("pi");
2272 getBcp47(); // have maBcp47 now
2273 if (bIncludeFullBcp47
)
2274 aVec
.push_back( maBcp47
);
2276 // Special cases for deprecated tags and their replacements, include both
2277 // in fallbacks in a sensible order.
2278 /* TODO: could such things be generalized and automated with liblangtag? */
2279 if (maBcp47
== "en-GB-oed")
2280 aVec
.emplace_back("en-GB-oxendict");
2281 else if (maBcp47
== "en-GB-oxendict")
2282 aVec
.emplace_back("en-GB-oed");
2284 OUString
aVariants( getVariants());
2288 OUString aScript
= getScript();
2289 bool bHaveLanguageScriptVariant
= false;
2290 if (!aCountry
.isEmpty())
2292 if (!aVariants
.isEmpty())
2294 aTmp
= aLanguage
+ "-" + aScript
+ "-" + aCountry
+ "-" + aVariants
;
2295 if (aTmp
!= maBcp47
)
2296 aVec
.push_back( aTmp
);
2297 // Language with variant but without country before language
2298 // without variant but with country.
2299 aTmp
= aLanguage
+ "-" + aScript
+ "-" + aVariants
;
2300 if (aTmp
!= maBcp47
)
2301 aVec
.push_back( aTmp
);
2302 bHaveLanguageScriptVariant
= true;
2304 aTmp
= aLanguage
+ "-" + aScript
+ "-" + aCountry
;
2305 if (aTmp
!= maBcp47
)
2306 aVec
.push_back( aTmp
);
2307 if (aLanguage
== "sr" && aScript
== "Latn")
2309 // sr-Latn-CS => sr-Latn-YU, sh-CS, sh-YU
2310 if (aCountry
== "CS")
2312 aVec
.emplace_back("sr-Latn-YU");
2313 aVec
.emplace_back("sh-CS");
2314 aVec
.emplace_back("sh-YU");
2317 aVec
.emplace_back("sh-" + aCountry
);
2319 else if (aLanguage
== "pi" && aScript
== "Latn")
2320 aVec
.emplace_back("pli"); // a special case for Pali dictionary, see fdo#41599
2321 else if (aLanguage
== "krm" && aScript
== "Latn" && (aCountry
== "TR" || aCountry
== "SY"))
2322 aVec
.emplace_back("ku-" + aCountry
);
2324 if (!aVariants
.isEmpty() && !bHaveLanguageScriptVariant
)
2326 aTmp
= aLanguage
+ "-" + aScript
+ "-" + aVariants
;
2327 if (aTmp
!= maBcp47
)
2328 aVec
.push_back( aTmp
);
2330 aTmp
= aLanguage
+ "-" + aScript
;
2331 if (aTmp
!= maBcp47
)
2332 aVec
.push_back( aTmp
);
2334 // 'sh' actually denoted a script, so have it here instead of appended
2335 // at the end as language-only.
2336 if (aLanguage
== "sr" && aScript
== "Latn")
2337 aVec
.emplace_back("sh");
2338 else if (aLanguage
== "ku" && aScript
== "Arab")
2339 aVec
.emplace_back("ckb");
2340 // 'ku' only denoted Latin script
2341 else if (aLanguage
== "krm" && aScript
== "Latn" && aCountry
.isEmpty())
2342 aVec
.emplace_back("ku");
2344 bool bHaveLanguageVariant
= false;
2345 if (!aCountry
.isEmpty())
2347 if (!aVariants
.isEmpty())
2349 aTmp
= aLanguage
+ "-" + aCountry
+ "-" + aVariants
;
2350 if (aTmp
!= maBcp47
)
2351 aVec
.push_back( aTmp
);
2352 if (maBcp47
== "ca-ES-valencia")
2353 aVec
.emplace_back("ca-XV");
2354 // Language with variant but without country before language
2355 // without variant but with country.
2356 // But only if variant is not from a grandfathered tag that
2357 // wouldn't match the rules, i.e. "de-1901" is fine but "en-oed" is
2359 if (aVariants
.getLength() >= 5 ||
2360 (aVariants
.getLength() == 4 && '0' <= aVariants
[0] && aVariants
[0] <= '9'))
2362 aTmp
= aLanguage
+ "-" + aVariants
;
2363 if (aTmp
!= maBcp47
)
2364 aVec
.push_back( aTmp
);
2365 bHaveLanguageVariant
= true;
2368 aTmp
= aLanguage
+ "-" + aCountry
;
2369 if (aTmp
!= maBcp47
)
2370 aVec
.push_back( aTmp
);
2372 if (!aVariants
.isEmpty() && !bHaveLanguageVariant
)
2374 // Only if variant is not from a grandfathered tag that wouldn't match
2375 // the rules, i.e. "de-1901" is fine but "en-oed" is not.
2376 if (aVariants
.getLength() >= 5 ||
2377 (aVariants
.getLength() == 4 && '0' <= aVariants
[0] && aVariants
[0] <= '9'))
2379 aTmp
= aLanguage
+ "-" + aVariants
;
2380 if (aTmp
!= maBcp47
)
2381 aVec
.push_back( aTmp
);
2385 // Insert legacy fallbacks with country before language-only, but only
2386 // default script, script was handled already above.
2387 if (!aCountry
.isEmpty())
2389 if (aLanguage
== "sr" && aCountry
== "CS")
2390 aVec
.emplace_back("sr-YU");
2393 // Original language-only.
2394 if (!aLanguage
.isEmpty() && aLanguage
!= maBcp47
)
2395 aVec
.push_back( aLanguage
);
2401 OUString
LanguageTag::getBcp47MS() const
2403 if (getLanguageType() == LANGUAGE_SPANISH_DATED
)
2404 return u
"es-ES_tradnl"_ustr
;
2409 bool LanguageTag::equals( const LanguageTag
& rLanguageTag
) const
2411 // If SYSTEM is not to be resolved or either both are SYSTEM or none, we
2412 // can use the operator==() optimization.
2413 if (isSystemLocale() == rLanguageTag
.isSystemLocale())
2414 return operator==( rLanguageTag
);
2416 // Compare full language tag strings.
2417 return getBcp47() == rLanguageTag
.getBcp47();
2421 bool LanguageTag::operator==( const LanguageTag
& rLanguageTag
) const
2423 if (isSystemLocale() && rLanguageTag
.isSystemLocale())
2424 return true; // both SYSTEM
2426 // No need to convert to BCP47 if both Lang-IDs are available.
2427 if (mbInitializedLangID
&& rLanguageTag
.mbInitializedLangID
)
2429 // Equal if same ID and no SYSTEM is involved or both are SYSTEM.
2430 return mnLangID
== rLanguageTag
.mnLangID
&& isSystemLocale() == rLanguageTag
.isSystemLocale();
2433 // Compare full language tag strings but SYSTEM unresolved.
2434 return getBcp47( false) == rLanguageTag
.getBcp47( false);
2438 bool LanguageTag::operator!=( const LanguageTag
& rLanguageTag
) const
2440 return !operator==( rLanguageTag
);
2444 bool LanguageTag::operator<( const LanguageTag
& rLanguageTag
) const
2446 return getBcp47( false).compareToIgnoreAsciiCase( rLanguageTag
.getBcp47( false)) < 0;
2451 LanguageTagImpl::Extraction
LanguageTagImpl::simpleExtract( const OUString
& rBcp47
,
2452 OUString
& rLanguage
, OUString
& rScript
, OUString
& rCountry
, OUString
& rRegion
, OUString
& rVariants
)
2454 Extraction eRet
= EXTRACTED_NONE
;
2455 const sal_Int32 nLen
= rBcp47
.getLength();
2456 const sal_Int32 nHyph1
= rBcp47
.indexOf( '-');
2457 sal_Int32 nHyph2
= (nHyph1
< 0 ? -1 : rBcp47
.indexOf( '-', nHyph1
+ 1));
2458 sal_Int32 nHyph3
= (nHyph2
< 0 ? -1 : rBcp47
.indexOf( '-', nHyph2
+ 1));
2459 sal_Int32 nHyph4
= (nHyph3
< 0 ? -1 : rBcp47
.indexOf( '-', nHyph3
+ 1));
2460 if (nLen
== 1 && rBcp47
[0] == '*') // * the dreaded jolly joker
2462 // It's f*d up but we need to recognize this.
2463 eRet
= EXTRACTED_X_JOKER
;
2465 else if (nHyph1
== 1 && rBcp47
[0] == 'x') // x-... privateuse
2467 // x-... privateuse tags MUST be known to us by definition.
2470 else if (nLen
== 1 && rBcp47
[0] == 'C') // the 'C' locale
2472 eRet
= EXTRACTED_C_LOCALE
;
2479 else if (nLen
== 2 || nLen
== 3) // ll or lll
2483 rLanguage
= rBcp47
.toAsciiLowerCase();
2488 eRet
= EXTRACTED_LSC
;
2491 else if ( (nHyph1
== 2 && nLen
== 5) // ll-CC
2492 || (nHyph1
== 3 && nLen
== 6)) // lll-CC
2496 rLanguage
= rBcp47
.copy( 0, nHyph1
).toAsciiLowerCase();
2497 rCountry
= rBcp47
.copy( nHyph1
+ 1, 2).toAsciiUpperCase();
2501 eRet
= EXTRACTED_LSC
;
2504 else if ( (nHyph1
== 2 && nLen
== 6) // ll-rrr
2505 || (nHyph1
== 3 && nLen
== 7)) // lll-rrr
2509 rLanguage
= rBcp47
.copy( 0, nHyph1
).toAsciiLowerCase();
2511 rRegion
= rBcp47
.copy( nHyph1
+ 1, 3);
2514 eRet
= EXTRACTED_LR
;
2517 else if ( (nHyph1
== 2 && nLen
== 7) // ll-Ssss or ll-vvvv
2518 || (nHyph1
== 3 && nLen
== 8)) // lll-Ssss or lll-vvvv
2522 sal_Unicode c
= rBcp47
[nHyph1
+1];
2523 if ('0' <= c
&& c
<= '9')
2525 // (DIGIT 3ALNUM) vvvv variant instead of Ssss script
2526 rLanguage
= rBcp47
.copy( 0, nHyph1
).toAsciiLowerCase();
2530 rVariants
= rBcp47
.copy( nHyph1
+ 1);
2531 eRet
= EXTRACTED_LV
;
2535 rLanguage
= rBcp47
.copy( 0, nHyph1
).toAsciiLowerCase();
2536 rScript
= rBcp47
.copy( nHyph1
+ 1, 1).toAsciiUpperCase() +
2537 rBcp47
.copy( nHyph1
+ 2, 3).toAsciiLowerCase();
2541 eRet
= EXTRACTED_LSC
;
2545 else if ( (nHyph1
== 2 && nHyph2
== 7 && nLen
== 10) // ll-Ssss-CC
2546 || (nHyph1
== 3 && nHyph2
== 8 && nLen
== 11)) // lll-Ssss-CC
2550 rLanguage
= rBcp47
.copy( 0, nHyph1
).toAsciiLowerCase();
2551 rScript
= rBcp47
.copy( nHyph1
+ 1, 1).toAsciiUpperCase() + rBcp47
.copy( nHyph1
+ 2, 3).toAsciiLowerCase();
2552 rCountry
= rBcp47
.copy( nHyph2
+ 1, 2).toAsciiUpperCase();
2555 eRet
= EXTRACTED_LSC
;
2558 else if ( (nHyph1
== 2 && nHyph2
== 7 && nLen
== 11) // ll-Ssss-rrr
2559 || (nHyph1
== 3 && nHyph2
== 8 && nLen
== 12)) // lll-Ssss-rrr
2563 rLanguage
= rBcp47
.copy( 0, nHyph1
).toAsciiLowerCase();
2564 rScript
= rBcp47
.copy( nHyph1
+ 1, 1).toAsciiUpperCase() + rBcp47
.copy( nHyph1
+ 2, 3).toAsciiLowerCase();
2566 rRegion
= rBcp47
.copy( nHyph2
+ 1, 3);
2568 eRet
= EXTRACTED_LR
;
2571 else if ( (nHyph1
== 2 && nHyph2
== 7 && nHyph3
== 10 && nLen
>= 15) // ll-Ssss-CC-vvvv[vvvv][-...]
2572 || (nHyph1
== 3 && nHyph2
== 8 && nHyph3
== 11 && nLen
>= 16)) // lll-Ssss-CC-vvvv[vvvv][-...]
2575 nHyph4
= rBcp47
.getLength();
2576 if (nHyph4
- nHyph3
> 4 && nHyph4
- nHyph3
<= 9)
2578 rLanguage
= rBcp47
.copy( 0, nHyph1
).toAsciiLowerCase();
2579 rScript
= rBcp47
.copy( nHyph1
+ 1, 1).toAsciiUpperCase() + rBcp47
.copy( nHyph1
+ 2, 3).toAsciiLowerCase();
2580 rCountry
= rBcp47
.copy( nHyph2
+ 1, 2).toAsciiUpperCase();
2582 rVariants
= rBcp47
.copy( nHyph3
+ 1);
2583 eRet
= EXTRACTED_LV
;
2586 else if ( (nHyph1
== 2 && nHyph2
== 7 && nHyph3
== 11 && nLen
>= 16) // ll-Ssss-rrr-vvvv[vvvv][-...]
2587 || (nHyph1
== 3 && nHyph2
== 8 && nHyph3
== 12 && nLen
>= 17)) // lll-Ssss-rrr-vvvv[vvvv][-...]
2590 nHyph4
= rBcp47
.getLength();
2591 if (nHyph4
- nHyph3
> 4 && nHyph4
- nHyph3
<= 9)
2593 rLanguage
= rBcp47
.copy( 0, nHyph1
).toAsciiLowerCase();
2594 rScript
= rBcp47
.copy( nHyph1
+ 1, 1).toAsciiUpperCase() + rBcp47
.copy( nHyph1
+ 2, 3).toAsciiLowerCase();
2596 rRegion
= rBcp47
.copy( nHyph2
+ 1, 3);
2597 rVariants
= rBcp47
.copy( nHyph3
+ 1);
2598 eRet
= EXTRACTED_LR
;
2601 else if ( (nHyph1
== 2 && nHyph2
== 5 && nHyph3
== 7) // ll-CC-u-...
2602 || (nHyph1
== 3 && nHyph2
== 6 && nHyph3
== 8)) // lll-CC-u-...
2604 if (rBcp47
[nHyph3
-1] == 'u')
2606 // Need to recognize as known, otherwise getLanguage() and
2607 // getCountry() return empty string because mpImplLangtag is not
2608 // used with a known mapping.
2609 /* TODO: if there were more this would get ugly and needed some
2610 * table driven approach via isolang.cxx instead. */
2611 if (rBcp47
.equalsIgnoreAsciiCase( "es-ES-u-co-trad"))
2617 rVariants
= "u-co-trad"; // not strictly a variant, but used to reconstruct the tag.
2618 eRet
= EXTRACTED_LV
;
2622 else if ( (nHyph1
== 2 && nHyph2
== 5 && nLen
>= 10) // ll-CC-vvvv[vvvv][-...]
2623 || (nHyph1
== 3 && nHyph2
== 6 && nLen
>= 11)) // lll-CC-vvvv[vvvv][-...]
2626 nHyph3
= rBcp47
.getLength();
2627 if (nHyph3
- nHyph2
> 4 && nHyph3
- nHyph2
<= 9)
2629 rLanguage
= rBcp47
.copy( 0, nHyph1
).toAsciiLowerCase();
2631 rCountry
= rBcp47
.copy( nHyph1
+ 1, 2).toAsciiUpperCase();
2633 rVariants
= rBcp47
.copy( nHyph2
+ 1);
2634 eRet
= EXTRACTED_LV
;
2637 else if ( (nHyph1
== 2 && nHyph2
== 6 && nLen
>= 11) // ll-rrr-vvvv[vvvv][-...]
2638 || (nHyph1
== 3 && nHyph2
== 7 && nLen
>= 12)) // lll-rrr-vvvv[vvvv][-...]
2641 nHyph3
= rBcp47
.getLength();
2642 if (nHyph3
- nHyph2
> 4 && nHyph3
- nHyph2
<= 9)
2644 rLanguage
= rBcp47
.copy( 0, nHyph1
).toAsciiLowerCase();
2647 rRegion
= rBcp47
.copy( nHyph1
+ 1, 3);
2648 rVariants
= rBcp47
.copy( nHyph2
+ 1);
2649 eRet
= EXTRACTED_LR
;
2652 else if ( (nHyph1
== 2 && nLen
>= 8) // ll-vvvvv[vvv][-...]
2653 || (nHyph1
== 3 && nLen
>= 9)) // lll-vvvvv[vvv][-...]
2656 nHyph2
= rBcp47
.getLength();
2657 if (nHyph2
- nHyph1
> 5 && nHyph2
- nHyph1
<= 9)
2659 rLanguage
= rBcp47
.copy( 0, nHyph1
).toAsciiLowerCase();
2663 rVariants
= rBcp47
.copy( nHyph1
+ 1);
2664 eRet
= EXTRACTED_LV
;
2668 // Known and handled grandfathered; ugly but effective ...
2669 // Note that nLen must have matched above.
2670 // Strictly not a variant, but so far we treat it as such.
2671 if (rBcp47
.equalsIgnoreAsciiCase( "en-GB-oed"))
2678 eRet
= EXTRACTED_LV
;
2680 // Other known and handled odd cases.
2681 else if (rBcp47
.equalsIgnoreAsciiCase( "es-ES_tradnl"))
2683 // Will get overridden, but needs to be recognized as known.
2688 rVariants
= "tradnl"; // this is nonsense, but... ignored.
2689 eRet
= EXTRACTED_KNOWN_BAD
;
2693 if (eRet
== EXTRACTED_NONE
)
2695 SAL_INFO( "i18nlangtag", "LanguageTagImpl::simpleExtract: did not extract '" << rBcp47
<< "'");
2704 assert(rLanguage
.getLength() == 2 || rLanguage
.getLength() == 3
2705 || eRet
== EXTRACTED_X_JOKER
|| eRet
== EXTRACTED_X
|| eRet
== EXTRACTED_C_LOCALE
);
2706 assert(rScript
.isEmpty() || rScript
.getLength() == 4);
2707 assert(rCountry
.isEmpty() || rRegion
.isEmpty()); // [2ALPHA / 3DIGIT]
2708 assert(rCountry
.isEmpty() || rCountry
.getLength() == 2);
2709 assert(rRegion
.isEmpty() || rRegion
.getLength() == 3);
2710 assert(rVariants
.isEmpty() || rVariants
.getLength() >= 4 || rVariants
== "oed");
2717 ::std::vector
< OUString
>::const_iterator
LanguageTag::getFallback(
2718 const ::std::vector
< OUString
> & rList
, const OUString
& rReference
)
2723 // Try the simple case first without constructing fallbacks.
2724 ::std::vector
< OUString
>::const_iterator it
= std::find(rList
.begin(), rList
.end(), rReference
);
2725 if (it
!= rList
.end())
2726 return it
; // exact match
2728 ::std::vector
< OUString
> aFallbacks( LanguageTag( rReference
).getFallbackStrings( false));
2729 if (rReference
!= "en-US")
2731 aFallbacks
.emplace_back("en-US");
2732 if (rReference
!= "en")
2733 aFallbacks
.emplace_back("en");
2735 if (rReference
!= "x-default")
2736 aFallbacks
.emplace_back("x-default");
2737 if (rReference
!= "x-no-translate")
2738 aFallbacks
.emplace_back("x-no-translate");
2739 /* TODO: the original comphelper::Locale::getFallback() code had
2740 * "x-notranslate" instead of "x-no-translate", but all .xcu files use
2741 * "x-no-translate" and "x-notranslate" apparently was never used anywhere.
2742 * Did that ever work? Was it supposed to work at all like this? */
2744 for (const auto& fb
: aFallbacks
)
2746 it
= std::find(rList
.begin(), rList
.end(), fb
);
2747 if (it
!= rList
.end())
2748 return it
; // fallback found
2751 // Did not find anything so return something of the list, the first value
2752 // will do as well as any other as none did match any of the possible
2754 return rList
.begin();
2759 ::std::vector
< css::lang::Locale
>::const_iterator
LanguageTag::getMatchingFallback(
2760 const ::std::vector
< css::lang::Locale
> & rList
,
2761 const css::lang::Locale
& rReference
)
2766 // Try the simple case first without constructing fallbacks.
2767 ::std::vector
< lang::Locale
>::const_iterator it
= std::find_if(rList
.begin(), rList
.end(),
2768 [&rReference
](const lang::Locale
& rLocale
) {
2769 return rLocale
.Language
== rReference
.Language
2770 && rLocale
.Country
== rReference
.Country
2771 && rLocale
.Variant
== rReference
.Variant
; });
2772 if (it
!= rList
.end())
2773 return it
; // exact match
2775 // Now for each reference fallback test the fallbacks of the list in order.
2776 ::std::vector
< OUString
> aFallbacks( LanguageTag( rReference
).getFallbackStrings( false));
2777 ::std::vector
< ::std::vector
< OUString
> > aListFallbacks( rList
.size());
2779 for (auto const& elem
: rList
)
2780 aListFallbacks
[i
++] = LanguageTag(elem
).getFallbackStrings(true);
2782 for (auto const& rfb
: aFallbacks
)
2785 for (auto const& lfb
: aListFallbacks
)
2787 for (auto const& fb
: lfb
)
2790 return rList
.begin() + nPosFb
;
2801 static bool lcl_isSystem( LanguageType nLangID
)
2803 if (nLangID
== LANGUAGE_SYSTEM
)
2805 // There are some special values that simplify to SYSTEM,
2806 // getRealLanguage() catches and resolves them.
2807 LanguageType nNewLangID
= MsLangId::getRealLanguage( nLangID
);
2808 return nNewLangID
!= nLangID
;
2813 css::lang::Locale
LanguageTag::convertToLocale( LanguageType nLangID
, bool bResolveSystem
)
2815 if (!bResolveSystem
&& lcl_isSystem( nLangID
))
2816 return lang::Locale();
2818 return LanguageTag( nLangID
).getLocale( bResolveSystem
);
2823 LanguageType
LanguageTag::convertToLanguageType( const css::lang::Locale
& rLocale
, bool bResolveSystem
)
2825 if (rLocale
.Language
.isEmpty() && !bResolveSystem
)
2826 return LANGUAGE_SYSTEM
;
2828 if (!bResolveSystem
)
2830 // single-item cache
2831 static std::mutex gMutex
;
2832 static std::optional
<lang::Locale
> moCacheKey
;
2833 static std::optional
<LanguageType
> moCacheValue
;
2834 std::unique_lock
l(gMutex
);
2835 if (!moCacheKey
|| *moCacheKey
!= rLocale
)
2837 moCacheValue
= LanguageTag(rLocale
).getLanguageType(false);
2838 moCacheKey
= rLocale
;
2840 return *moCacheValue
;
2843 return LanguageTag( rLocale
).getLanguageType( bResolveSystem
);
2848 OUString
LanguageTagImpl::convertToBcp47( const css::lang::Locale
& rLocale
)
2851 if (rLocale
.Language
.isEmpty())
2853 // aBcp47 stays empty
2855 else if (rLocale
.Language
== I18NLANGTAG_QLT
)
2857 aBcp47
= rLocale
.Variant
;
2861 /* XXX NOTE: most legacy code never evaluated the Variant field, so for
2862 * now just concatenate language and country. In case we stumbled over
2863 * variant aware code we'd have to take care of that. */
2864 if (rLocale
.Country
.isEmpty())
2865 aBcp47
= rLocale
.Language
;
2868 aBcp47
= rLocale
.Language
+ "-" + rLocale
.Country
;
2876 OUString
LanguageTag::convertToBcp47( const css::lang::Locale
& rLocale
, bool bResolveSystem
)
2879 if (rLocale
.Language
.isEmpty())
2882 aBcp47
= LanguageTag::convertToBcp47( LANGUAGE_SYSTEM
);
2883 // else aBcp47 stays empty
2887 aBcp47
= LanguageTagImpl::convertToBcp47( rLocale
);
2894 OUString
LanguageTag::convertToBcp47( LanguageType nLangID
)
2896 lang::Locale
aLocale( LanguageTag::convertToLocale( nLangID
));
2897 // If system for some reason (should not happen... haha) could not be
2898 // resolved DO NOT CALL LanguageTag::convertToBcp47(Locale) because that
2899 // would recurse into this method here!
2900 if (aLocale
.Language
.isEmpty())
2901 return OUString(); // bad luck, bail out
2902 return LanguageTagImpl::convertToBcp47( aLocale
);
2907 css::lang::Locale
LanguageTag::convertToLocale( const OUString
& rBcp47
, bool bResolveSystem
)
2909 if (rBcp47
.isEmpty() && !bResolveSystem
)
2910 return lang::Locale();
2912 return LanguageTag( rBcp47
).getLocale( bResolveSystem
);
2917 LanguageType
LanguageTag::convertToLanguageType( const OUString
& rBcp47
)
2919 return LanguageTag( rBcp47
).getLanguageType();
2924 LanguageType
LanguageTag::convertToLanguageTypeWithFallback( const OUString
& rBcp47
)
2926 return LanguageTag( rBcp47
).makeFallback().getLanguageType();
2931 css::lang::Locale
LanguageTag::convertToLocaleWithFallback( const OUString
& rBcp47
)
2933 return LanguageTag( rBcp47
).makeFallback().getLocale();
2938 LanguageType
LanguageTag::convertToLanguageTypeWithFallback( const css::lang::Locale
& rLocale
)
2940 if (rLocale
.Language
.isEmpty())
2941 return LANGUAGE_SYSTEM
;
2943 return LanguageTag( rLocale
).makeFallback().getLanguageType();
2948 bool LanguageTag::isValidBcp47( const OUString
& rString
, OUString
* o_pCanonicalized
,
2949 LanguageTag::PrivateUse ePrivateUse
)
2951 bool bValid
= false;
2955 lt_tag_t
* mpLangtag
;
2958 theDataRef().init();
2959 mpLangtag
= lt_tag_new();
2963 lt_tag_unref( mpLangtag
);
2969 if (!lt_tag_parse_disabled
&& lt_tag_parse(aVar
.mpLangtag
, OUStringToOString(rString
, RTL_TEXTENCODING_UTF8
).getStr(), &aError
.p
))
2971 char* pTag
= lt_tag_canonicalize( aVar
.mpLangtag
, &aError
.p
);
2972 SAL_WARN_IF( !pTag
, "i18nlangtag", "LanguageTag:isValidBcp47: could not canonicalize '" << rString
<< "'");
2976 if (ePrivateUse
!= PrivateUse::ALLOW
)
2980 const char* pLang
= nullptr;
2981 const lt_lang_t
* pLangT
= lt_tag_get_language( aVar
.mpLangtag
);
2984 pLang
= lt_lang_get_tag( pLangT
);
2985 if (pLang
&& strcmp( pLang
, I18NLANGTAG_QLT_ASCII
) == 0)
2987 // Disallow 'qlt' localuse code to prevent
2988 // confusion with our internal usage.
2993 if (ePrivateUse
== PrivateUse::ALLOW_ART_X
&& pLang
&& strcmp( pLang
, "art") == 0)
2995 // Allow anything 'art' which includes 'art-x-...' and 'art-Latn-x-...'.
2998 const lt_string_t
* pPrivate
= lt_tag_get_privateuse( aVar
.mpLangtag
);
2999 if (pPrivate
&& lt_string_length( pPrivate
) > 0)
3004 if (o_pCanonicalized
)
3005 *o_pCanonicalized
= OUString::createFromAscii( pTag
);
3011 SAL_INFO( "i18nlangtag", "LanguageTag:isValidBcp47: could not parse '" << rString
<< "'");
3016 LanguageTag
makeLanguageTagFromAppleLanguageId(AppleLanguageId nLanguage
)
3018 //map the simple ones via LanguageTypes, and the hard ones explicitly
3019 LanguageType
nLang(LANGUAGE_DONTKNOW
);
3023 case AppleLanguageId::ENGLISH
:
3024 nLang
= LANGUAGE_ENGLISH_US
;
3026 case AppleLanguageId::FRENCH
:
3027 nLang
= LANGUAGE_FRENCH
;
3029 case AppleLanguageId::GERMAN
:
3030 nLang
= LANGUAGE_GERMAN
;
3032 case AppleLanguageId::ITALIAN
:
3033 nLang
= LANGUAGE_ITALIAN
;
3035 case AppleLanguageId::DUTCH
:
3036 nLang
= LANGUAGE_DUTCH
;
3038 case AppleLanguageId::SWEDISH
:
3039 nLang
= LANGUAGE_SWEDISH
;
3041 case AppleLanguageId::SPANISH
:
3042 nLang
= LANGUAGE_SPANISH
;
3044 case AppleLanguageId::DANISH
:
3045 nLang
= LANGUAGE_DANISH
;
3047 case AppleLanguageId::PORTUGUESE
:
3048 nLang
= LANGUAGE_PORTUGUESE
;
3050 case AppleLanguageId::NORWEGIAN
:
3051 nLang
= LANGUAGE_NORWEGIAN
;
3053 case AppleLanguageId::HEBREW
:
3054 nLang
= LANGUAGE_HEBREW
;
3056 case AppleLanguageId::JAPANESE
:
3057 nLang
= LANGUAGE_JAPANESE
;
3059 case AppleLanguageId::ARABIC
:
3060 nLang
= LANGUAGE_ARABIC_PRIMARY_ONLY
;
3062 case AppleLanguageId::FINNISH
:
3063 nLang
= LANGUAGE_FINNISH
;
3065 case AppleLanguageId::GREEK
:
3066 nLang
= LANGUAGE_GREEK
;
3068 case AppleLanguageId::ICELANDIC
:
3069 nLang
= LANGUAGE_ICELANDIC
;
3071 case AppleLanguageId::MALTESE
:
3072 nLang
= LANGUAGE_MALTESE
;
3074 case AppleLanguageId::TURKISH
:
3075 nLang
= LANGUAGE_TURKISH
;
3077 case AppleLanguageId::CROATIAN
:
3078 nLang
= LANGUAGE_CROATIAN
;
3080 case AppleLanguageId::CHINESE_TRADITIONAL
:
3081 nLang
= LANGUAGE_CHINESE_TRADITIONAL
;
3083 case AppleLanguageId::URDU
:
3084 nLang
= LANGUAGE_URDU_PAKISTAN
; //probably, otherwise we need a LANGUAGE_URDU_PRIMARY_ONLY
3086 case AppleLanguageId::HINDI
:
3087 nLang
= LANGUAGE_HINDI
;
3089 case AppleLanguageId::THAI
:
3090 nLang
= LANGUAGE_THAI
;
3092 case AppleLanguageId::KOREAN
:
3093 nLang
= LANGUAGE_KOREAN
;
3095 case AppleLanguageId::LITHUANIAN
:
3096 nLang
= LANGUAGE_LITHUANIAN
;
3098 case AppleLanguageId::POLISH
:
3099 nLang
= LANGUAGE_POLISH
;
3101 case AppleLanguageId::HUNGARIAN
:
3102 nLang
= LANGUAGE_HUNGARIAN
;
3104 case AppleLanguageId::ESTONIAN
:
3105 nLang
= LANGUAGE_ESTONIAN
;
3107 case AppleLanguageId::LATVIAN
:
3108 nLang
= LANGUAGE_LATVIAN
;
3110 case AppleLanguageId::SAMI
:
3111 nLang
= LANGUAGE_SAMI_NORTHERN_NORWAY
; //maybe
3113 case AppleLanguageId::FAROESE
:
3114 nLang
= LANGUAGE_FAEROESE
;
3116 case AppleLanguageId::FARSI
:
3117 nLang
= LANGUAGE_FARSI
;
3119 case AppleLanguageId::RUSSIAN
:
3120 nLang
= LANGUAGE_RUSSIAN
;
3122 case AppleLanguageId::CHINESE_SIMPLIFIED
:
3123 nLang
= LANGUAGE_CHINESE_SIMPLIFIED
;
3125 case AppleLanguageId::FLEMISH
:
3126 nLang
= LANGUAGE_DUTCH_BELGIAN
;
3128 case AppleLanguageId::IRISH_GAELIC
:
3129 nLang
= LANGUAGE_GAELIC_IRELAND
;
3131 case AppleLanguageId::ALBANIAN
:
3132 nLang
= LANGUAGE_ALBANIAN
;
3134 case AppleLanguageId::ROMANIAN
:
3135 nLang
= LANGUAGE_ROMANIAN
;
3137 case AppleLanguageId::CZECH
:
3138 nLang
= LANGUAGE_CZECH
;
3140 case AppleLanguageId::SLOVAK
:
3141 nLang
= LANGUAGE_SLOVAK
;
3143 case AppleLanguageId::SLOVENIAN
:
3144 nLang
= LANGUAGE_SLOVENIAN
;
3146 case AppleLanguageId::YIDDISH
:
3147 nLang
= LANGUAGE_YIDDISH
;
3149 case AppleLanguageId::SERBIAN
:
3150 nLang
= LANGUAGE_SERBIAN_CYRILLIC_SERBIA
; //maybe
3152 case AppleLanguageId::MACEDONIAN
:
3153 nLang
= LANGUAGE_MACEDONIAN
;
3155 case AppleLanguageId::BULGARIAN
:
3156 nLang
= LANGUAGE_BULGARIAN
;
3158 case AppleLanguageId::UKRAINIAN
:
3159 nLang
= LANGUAGE_UKRAINIAN
;
3161 case AppleLanguageId::BYELORUSSIAN
:
3162 nLang
= LANGUAGE_BELARUSIAN
;
3164 case AppleLanguageId::UZBEK
:
3165 nLang
= LANGUAGE_UZBEK_CYRILLIC
; //maybe
3167 case AppleLanguageId::KAZAKH
:
3168 nLang
= LANGUAGE_KAZAKH
;
3170 case AppleLanguageId::AZERI_CYRILLIC
:
3171 nLang
= LANGUAGE_AZERI_CYRILLIC
;
3173 case AppleLanguageId::AZERI_ARABIC
:
3174 return LanguageTag(u
"az-Arab"_ustr
);
3175 case AppleLanguageId::ARMENIAN
:
3176 nLang
= LANGUAGE_ARMENIAN
;
3178 case AppleLanguageId::GEORGIAN
:
3179 nLang
= LANGUAGE_GEORGIAN
;
3181 case AppleLanguageId::MOLDAVIAN
:
3182 nLang
= LANGUAGE_ROMANIAN_MOLDOVA
;
3184 case AppleLanguageId::KIRGHIZ
:
3185 nLang
= LANGUAGE_KIRGHIZ
;
3187 case AppleLanguageId::TAJIKI
:
3188 nLang
= LANGUAGE_TAJIK
;
3190 case AppleLanguageId::TURKMEN
:
3191 nLang
= LANGUAGE_TURKMEN
;
3193 case AppleLanguageId::MONGOLIAN_MONGOLIAN
:
3194 nLang
= LANGUAGE_MONGOLIAN_MONGOLIAN_MONGOLIA
;
3196 case AppleLanguageId::MONGOLIAN_CYRILLIC
:
3197 nLang
= LANGUAGE_MONGOLIAN_CYRILLIC_MONGOLIA
;
3199 case AppleLanguageId::PASHTO
:
3200 nLang
= LANGUAGE_PASHTO
;
3202 case AppleLanguageId::KURDISH
:
3203 nLang
= LANGUAGE_USER_KURDISH_TURKEY
; //maybe
3205 case AppleLanguageId::KASHMIRI
:
3206 nLang
= LANGUAGE_KASHMIRI
;
3208 case AppleLanguageId::SINDHI
:
3209 nLang
= LANGUAGE_SINDHI
;
3211 case AppleLanguageId::TIBETAN
:
3212 nLang
= LANGUAGE_TIBETAN
;
3214 case AppleLanguageId::NEPALI
:
3215 nLang
= LANGUAGE_NEPALI
;
3217 case AppleLanguageId::SANSKRIT
:
3218 nLang
= LANGUAGE_SANSKRIT
;
3220 case AppleLanguageId::MARATHI
:
3221 nLang
= LANGUAGE_MARATHI
;
3223 case AppleLanguageId::BENGALI
:
3224 nLang
= LANGUAGE_BENGALI
;
3226 case AppleLanguageId::ASSAMESE
:
3227 nLang
= LANGUAGE_ASSAMESE
;
3229 case AppleLanguageId::GUJARATI
:
3230 nLang
= LANGUAGE_GUJARATI
;
3232 case AppleLanguageId::PUNJABI
:
3233 nLang
= LANGUAGE_PUNJABI
;
3235 case AppleLanguageId::ORIYA
:
3236 nLang
= LANGUAGE_ODIA
;
3238 case AppleLanguageId::MALAYALAM
:
3239 nLang
= LANGUAGE_MALAYALAM
;
3241 case AppleLanguageId::KANNADA
:
3242 nLang
= LANGUAGE_KANNADA
;
3244 case AppleLanguageId::TAMIL
:
3245 nLang
= LANGUAGE_TAMIL
;
3247 case AppleLanguageId::TELUGU
:
3248 nLang
= LANGUAGE_TELUGU
;
3250 case AppleLanguageId::SINHALESE
:
3251 nLang
= LANGUAGE_SINHALESE_SRI_LANKA
;
3253 case AppleLanguageId::BURMESE
:
3254 nLang
= LANGUAGE_BURMESE
;
3256 case AppleLanguageId::KHMER
:
3257 nLang
= LANGUAGE_KHMER
;
3259 case AppleLanguageId::LAO
:
3260 nLang
= LANGUAGE_LAO
;
3262 case AppleLanguageId::VIETNAMESE
:
3263 nLang
= LANGUAGE_VIETNAMESE
;
3265 case AppleLanguageId::INDONESIAN
:
3266 nLang
= LANGUAGE_INDONESIAN
;
3268 case AppleLanguageId::TAGALONG
:
3269 nLang
= LANGUAGE_USER_TAGALOG
;
3271 case AppleLanguageId::MALAY_LATIN
:
3272 nLang
= LANGUAGE_MALAY_MALAYSIA
;
3274 case AppleLanguageId::MALAY_ARABIC
:
3275 nLang
= LANGUAGE_USER_MALAY_ARABIC_MALAYSIA
;
3277 case AppleLanguageId::AMHARIC
:
3278 nLang
= LANGUAGE_AMHARIC_ETHIOPIA
;
3280 case AppleLanguageId::TIGRINYA
:
3281 nLang
= LANGUAGE_TIGRIGNA_ETHIOPIA
;
3283 case AppleLanguageId::GALLA
:
3284 nLang
= LANGUAGE_OROMO
;
3286 case AppleLanguageId::SOMALI
:
3287 nLang
= LANGUAGE_SOMALI
;
3289 case AppleLanguageId::SWAHILI
:
3290 nLang
= LANGUAGE_SWAHILI
;
3292 case AppleLanguageId::KINYARWANDA
:
3293 nLang
= LANGUAGE_KINYARWANDA_RWANDA
;
3295 case AppleLanguageId::RUNDI
:
3296 return LanguageTag(u
"rn"_ustr
);
3297 case AppleLanguageId::NYANJA
:
3298 nLang
= LANGUAGE_USER_NYANJA
;
3300 case AppleLanguageId::MALAGASY
:
3301 nLang
= LANGUAGE_MALAGASY_PLATEAU
;
3303 case AppleLanguageId::ESPERANTO
:
3304 nLang
= LANGUAGE_USER_ESPERANTO
;
3306 case AppleLanguageId::WELSH
:
3307 nLang
= LANGUAGE_WELSH
;
3309 case AppleLanguageId::BASQUE
:
3310 nLang
= LANGUAGE_BASQUE
;
3312 case AppleLanguageId::CATALAN
:
3313 nLang
= LANGUAGE_CATALAN
;
3315 case AppleLanguageId::LATIN
:
3316 nLang
= LANGUAGE_LATIN
;
3318 case AppleLanguageId::QUENCHUA
:
3319 nLang
= LANGUAGE_QUECHUA_BOLIVIA
; //maybe
3321 case AppleLanguageId::GUARANI
:
3322 nLang
= LANGUAGE_GUARANI_PARAGUAY
;
3324 case AppleLanguageId::AYMARA
:
3325 return LanguageTag(u
"ay"_ustr
);
3326 case AppleLanguageId::TATAR
:
3327 nLang
= LANGUAGE_TATAR
;
3329 case AppleLanguageId::UIGHUR
:
3330 nLang
= LANGUAGE_UIGHUR_CHINA
;
3332 case AppleLanguageId::DZONGKHA
:
3333 nLang
= LANGUAGE_DZONGKHA_BHUTAN
;
3335 case AppleLanguageId::JAVANESE_LATIN
:
3336 return LanguageTag(u
"jv-Latn"_ustr
);
3337 case AppleLanguageId::SUNDANESE_LATIN
:
3338 return LanguageTag(u
"su-Latn"_ustr
);
3339 case AppleLanguageId::GALICIAN
:
3340 nLang
= LANGUAGE_GALICIAN
;
3342 case AppleLanguageId::AFRIKAANS
:
3343 nLang
= LANGUAGE_AFRIKAANS
;
3345 case AppleLanguageId::BRETON
:
3346 nLang
= LANGUAGE_BRETON_FRANCE
;
3348 case AppleLanguageId::INUKTITUT
:
3349 nLang
= LANGUAGE_INUKTITUT_LATIN_CANADA
; //probably
3351 case AppleLanguageId::SCOTTISH_GAELIC
:
3352 nLang
= LANGUAGE_GAELIC_SCOTLAND
;
3354 case AppleLanguageId::MANX_GAELIC
:
3355 nLang
= LANGUAGE_USER_MANX
;
3357 case AppleLanguageId::IRISH_GAELIC_WITH_DOT_ABOVE
:
3358 return LanguageTag(u
"ga-Latg"_ustr
);
3359 case AppleLanguageId::TONGAN
:
3360 return LanguageTag(u
"to"_ustr
);
3361 case AppleLanguageId::GREEK_POLYTONIC
:
3362 nLang
= LANGUAGE_USER_ANCIENT_GREEK
;
3364 case AppleLanguageId::GREENLANDIC
:
3365 nLang
= LANGUAGE_KALAALLISUT_GREENLAND
;
3367 case AppleLanguageId::AZERI_LATIN
:
3368 nLang
= LANGUAGE_AZERI_LATIN
;
3372 return LanguageTag(nLang
);
3375 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */