Move setting of LD_LIBRARY_PATH closer to invocation of cppunittester
[LibreOffice.git] / i18nlangtag / source / languagetag / languagetag.cxx
blob2b301efdb4e1d3824a086ea2d599db5b28810956
1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2 /*
3 * This file is part of the LibreOffice project.
5 * This Source Code Form is subject to the terms of the Mozilla Public
6 * License, v. 2.0. If a copy of the MPL was not distributed with this
7 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
8 */
10 #include <config_folders.h>
11 #include <config_liblangtag.h>
13 #include <i18nlangtag/languagetag.hxx>
14 #include <i18nlangtag/applelangid.hxx>
15 #include <i18nlangtag/mslangid.hxx>
16 #include <rtl/ustrbuf.hxx>
17 #include <rtl/bootstrap.hxx>
18 #include <sal/log.hxx>
19 #include <osl/file.hxx>
20 #include <rtl/locale.h>
21 #include <o3tl/string_view.hxx>
22 #include <algorithm>
23 #include <atomic>
24 #include <map>
25 #include <mutex>
26 #include <optional>
27 #include <string_view>
28 #include <unordered_set>
30 //#define erDEBUG
32 #if LIBLANGTAG_INLINE_FIX
33 #define LT_HAVE_INLINE
34 #endif
35 #include <liblangtag/langtag.h>
37 #ifdef ANDROID
38 #include <osl/detail/android-bootstrap.h>
39 #endif
41 #ifdef EMSCRIPTEN
42 #include <osl/detail/emscripten-bootstrap.h>
43 #endif
45 using namespace com::sun::star;
47 namespace {
49 // Helper to ensure lt_error_t is free'd
50 struct myLtError
52 lt_error_t* p;
53 myLtError() : p(nullptr) {}
54 ~myLtError() { if (p) lt_error_unref( p); }
59 namespace {
60 std::recursive_mutex& theMutex()
62 static std::recursive_mutex SINGLETON;
63 return SINGLETON;
67 typedef std::unordered_set< OUString > KnownTagSet;
68 static const KnownTagSet & getKnowns()
70 static KnownTagSet theKnowns = []()
72 KnownTagSet tmpSet;
73 ::std::vector< MsLangId::LanguagetagMapping > aDefined( MsLangId::getDefinedLanguagetags());
74 for (auto const& elemDefined : aDefined)
76 // Do not use the BCP47 string here to initialize the
77 // LanguageTag because then canonicalize() would call this
78 // getKnowns() again...
79 ::std::vector< OUString > aFallbacks( LanguageTag( elemDefined.mnLang).getFallbackStrings( true));
80 for (auto const& fallback : aFallbacks)
82 tmpSet.insert(fallback);
85 return tmpSet;
86 }();
87 return theKnowns;
91 namespace {
92 struct compareIgnoreAsciiCaseLess
94 bool operator()( std::u16string_view r1, std::u16string_view r2 ) const
96 return o3tl::compareToIgnoreAsciiCase(r1, r2) < 0;
99 typedef ::std::map< OUString, LanguageTag::ImplPtr, compareIgnoreAsciiCaseLess > MapBcp47;
100 typedef ::std::map< LanguageType, LanguageTag::ImplPtr > MapLangID;
101 MapBcp47& theMapBcp47()
103 static MapBcp47 SINGLETON;
104 return SINGLETON;
106 MapLangID& theMapLangID()
108 static MapLangID SINGLETON;
109 return SINGLETON;
111 LanguageTag::ImplPtr& theSystemLocale()
113 static LanguageTag::ImplPtr SINGLETON;
114 return SINGLETON;
119 static LanguageType getNextOnTheFlyLanguage()
121 static LanguageType nOnTheFlyLanguage(0);
122 std::unique_lock aGuard( theMutex());
123 if (!nOnTheFlyLanguage)
124 nOnTheFlyLanguage = MsLangId::makeLangID( LANGUAGE_ON_THE_FLY_SUB_START, LANGUAGE_ON_THE_FLY_START);
125 else
127 if (MsLangId::getPrimaryLanguage( nOnTheFlyLanguage) != LANGUAGE_ON_THE_FLY_END)
128 ++nOnTheFlyLanguage;
129 else
131 LanguageType nSub = MsLangId::getSubLanguage( nOnTheFlyLanguage);
132 if (nSub != LANGUAGE_ON_THE_FLY_SUB_END)
133 nOnTheFlyLanguage = MsLangId::makeLangID( ++nSub, LANGUAGE_ON_THE_FLY_START);
134 else
136 SAL_WARN( "i18nlangtag", "getNextOnTheFlyLanguage: none left! ("
137 << ((sal_uInt16(LANGUAGE_ON_THE_FLY_END) - sal_uInt16(LANGUAGE_ON_THE_FLY_START) + 1)
138 * (sal_uInt16(LANGUAGE_ON_THE_FLY_SUB_END) - sal_uInt16(LANGUAGE_ON_THE_FLY_SUB_START) + 1))
139 << " consumed?!?)");
140 return LanguageType(0);
144 #if OSL_DEBUG_LEVEL > 0
145 static size_t nOnTheFlies = 0;
146 ++nOnTheFlies;
147 SAL_INFO( "i18nlangtag", "getNextOnTheFlyLanguage: number " << nOnTheFlies);
148 #endif
149 return nOnTheFlyLanguage;
153 // static
154 bool LanguageTag::isOnTheFlyID( LanguageType nLang )
156 LanguageType nPri = MsLangId::getPrimaryLanguage( nLang);
157 LanguageType nSub = MsLangId::getSubLanguage( nLang);
158 return
159 LANGUAGE_ON_THE_FLY_START <= nPri && nPri <= LANGUAGE_ON_THE_FLY_END &&
160 LANGUAGE_ON_THE_FLY_SUB_START <= nSub && nSub <= LANGUAGE_ON_THE_FLY_SUB_END;
163 namespace {
165 /** A reference holder for liblangtag data de/initialization, one static
166 instance. Currently implemented such that the first "ref" inits and dtor
167 (our library deinitialized) tears down.
169 class LiblangtagDataRef
171 public:
172 LiblangtagDataRef();
173 ~LiblangtagDataRef();
174 void init()
176 if (!mbInitialized)
177 setup();
179 private:
180 OString maDataPath; // path to liblangtag data, "|" if system
181 bool mbInitialized;
183 void setupDataPath();
184 void setup();
185 static void teardown();
188 LiblangtagDataRef& theDataRef()
190 static LiblangtagDataRef SINGLETON;
191 return SINGLETON;
195 LiblangtagDataRef::LiblangtagDataRef()
197 mbInitialized(false)
201 LiblangtagDataRef::~LiblangtagDataRef()
203 if (mbInitialized)
204 teardown();
207 void LiblangtagDataRef::setup()
209 SAL_INFO( "i18nlangtag", "LiblangtagDataRef::setup: initializing database");
210 if (maDataPath.isEmpty())
211 setupDataPath();
212 lt_db_initialize();
213 mbInitialized = true;
216 void LiblangtagDataRef::teardown()
218 SAL_INFO( "i18nlangtag", "LiblangtagDataRef::teardown: finalizing database");
219 lt_db_finalize();
222 void LiblangtagDataRef::setupDataPath()
224 #if defined(ANDROID) || defined(EMSCRIPTEN)
225 maDataPath = OString(lo_get_app_data_dir()) + "/share/liblangtag";
226 #else
227 // maDataPath is assumed to be empty here.
228 OUString aURL(u"$BRAND_BASE_DIR/" LIBO_SHARE_FOLDER "/liblangtag"_ustr);
229 rtl::Bootstrap::expandMacros(aURL); //TODO: detect failure
231 // Check if data is in our own installation, else assume system
232 // installation.
233 OUString aData = aURL + "/language-subtag-registry.xml";
234 osl::DirectoryItem aDirItem;
235 if (osl::DirectoryItem::get( aData, aDirItem) == osl::DirectoryItem::E_None)
237 OUString aPath;
238 if (osl::FileBase::getSystemPathFromFileURL( aURL, aPath) == osl::FileBase::E_None)
239 maDataPath = OUStringToOString( aPath, RTL_TEXTENCODING_UTF8);
241 #endif
242 if (maDataPath.isEmpty())
243 maDataPath = "|"_ostr; // assume system
244 else
245 lt_db_set_datadir( maDataPath.getStr());
249 /* TODO: we could transform known vendor and browser-specific variants to known
250 * BCP 47 if available. For now just remove them to not confuse any later
251 * treatments that check for empty variants. This vendor stuff was never
252 * supported anyway. */
253 static void handleVendorVariant( css::lang::Locale & rLocale )
255 if (!rLocale.Variant.isEmpty() && rLocale.Language != I18NLANGTAG_QLT)
256 rLocale.Variant.clear();
260 class LanguageTagImpl
262 public:
264 explicit LanguageTagImpl( const LanguageTag & rLanguageTag );
265 explicit LanguageTagImpl( const LanguageTagImpl & rLanguageTagImpl );
266 ~LanguageTagImpl();
267 LanguageTagImpl& operator=( const LanguageTagImpl & rLanguageTagImpl );
269 private:
271 friend class LanguageTag;
273 enum Decision
275 DECISION_DONTKNOW,
276 DECISION_NO,
277 DECISION_YES
280 mutable css::lang::Locale maLocale;
281 mutable OUString maBcp47;
282 mutable OUString maCachedLanguage; ///< cache getLanguage()
283 mutable OUString maCachedScript; ///< cache getScript()
284 mutable OUString maCachedCountry; ///< cache getCountry()
285 mutable OUString maCachedVariants; ///< cache getVariants()
286 mutable OUString maCachedGlibcString; ///< cache getGlibcLocaleString()
287 mutable lt_tag_t* mpImplLangtag; ///< liblangtag pointer
288 mutable LanguageType mnLangID;
289 mutable LanguageTag::ScriptType meScriptType;
290 mutable Decision meIsValid;
291 mutable Decision meIsIsoLocale;
292 mutable Decision meIsIsoODF;
293 mutable Decision meIsLiblangtagNeeded; ///< whether processing with liblangtag needed
294 bool mbSystemLocale : 1;
295 mutable bool mbInitializedBcp47 : 1;
296 mutable bool mbInitializedLocale : 1;
297 mutable bool mbInitializedLangID : 1;
298 mutable bool mbCachedLanguage : 1;
299 mutable bool mbCachedScript : 1;
300 mutable bool mbCachedCountry : 1;
301 mutable bool mbCachedVariants : 1;
302 mutable bool mbCachedGlibcString : 1;
304 OUString const & getBcp47() const;
305 OUString const & getLanguage() const;
306 OUString const & getScript() const;
307 OUString const & getCountry() const;
308 OUString getRegion() const;
309 OUString const & getVariants() const;
310 bool hasScript() const;
311 OUString const & getGlibcLocaleString() const;
313 void setScriptType(LanguageTag::ScriptType st);
314 LanguageTag::ScriptType getScriptType() const;
316 bool isIsoLocale() const;
317 bool isIsoODF() const;
318 bool isValidBcp47() const;
320 void convertLocaleToBcp47() const;
321 bool convertLocaleToLang( bool bAllowOnTheFlyID );
322 void convertBcp47ToLocale();
323 void convertBcp47ToLang();
324 void convertLangToLocale() const;
325 void convertLangToBcp47() const;
327 /** @return whether BCP 47 language tag string was changed. */
328 bool canonicalize();
330 /** Canonicalize if not yet done and synchronize initialized conversions.
332 @return whether BCP 47 language tag string was changed.
334 bool synCanonicalize();
336 OUString getLanguageFromLangtag();
337 OUString getScriptFromLangtag();
338 OUString getRegionFromLangtag();
339 OUString getVariantsFromLangtag();
341 /** Generates on-the-fly LangID and registers the maBcp47,mnLangID pair.
343 @param nRegisterID
344 If not 0 and not LANGUAGE_DONTKNOW, suggest (!) to use that ID
345 instead of generating an on-the-fly ID. Implementation may
346 still generate an ID if the suggested ID is already used for
347 another language tag.
349 @return NULL if no ID could be obtained or registration failed.
351 LanguageTag::ImplPtr registerOnTheFly( LanguageType nRegisterID );
353 /** Obtain Language, Script, Country and Variants via simpleExtract() and
354 assign them to the cached variables if successful.
356 @return simpleExtract() successfully extracted and cached.
358 bool cacheSimpleLSCV();
360 enum Extraction
362 EXTRACTED_NONE,
363 EXTRACTED_LSC,
364 EXTRACTED_LV,
365 EXTRACTED_LR,
366 EXTRACTED_C_LOCALE,
367 EXTRACTED_X,
368 EXTRACTED_X_JOKER,
369 EXTRACTED_KNOWN_BAD
372 /** Of a language tag of the form lll[-Ssss][-CC][-vvvvvvvv] extract the
373 portions.
375 Does not check case or content!
377 @return EXTRACTED_LSC if simple tag was detected (i.e. one that
378 would fulfill the isIsoODF() condition),
379 EXTRACTED_LV if a tag with variant was detected,
380 EXTRACTED_LR if a tag with 3-digit UN M.49 region code was detected
381 EXTRACTED_C_LOCALE if a 'C' locale was detected,
382 EXTRACTED_X if x-... privateuse tag was detected,
383 EXTRACTED_X_JOKER if "*" joker was detected,
384 EXTRACTED_KNOWN_BAD if a bad but known (to be remapped) tag was detected
385 EXTRACTED_NONE else.
387 static Extraction simpleExtract( const OUString& rBcp47,
388 OUString& rLanguage,
389 OUString& rScript,
390 OUString& rCountry,
391 OUString& rRegion,
392 OUString& rVariants );
394 /** Convert Locale to BCP 47 string without resolving system and creating
395 temporary LanguageTag instances. */
396 static OUString convertToBcp47( const css::lang::Locale& rLocale );
401 LanguageTagImpl::LanguageTagImpl( const LanguageTag & rLanguageTag )
403 maLocale( rLanguageTag.maLocale),
404 maBcp47( rLanguageTag.maBcp47),
405 mpImplLangtag( nullptr),
406 mnLangID( rLanguageTag.mnLangID),
407 meScriptType( LanguageTag::ScriptType::UNKNOWN),
408 meIsValid( DECISION_DONTKNOW),
409 meIsIsoLocale( DECISION_DONTKNOW),
410 meIsIsoODF( DECISION_DONTKNOW),
411 meIsLiblangtagNeeded( DECISION_DONTKNOW),
412 mbSystemLocale( rLanguageTag.mbSystemLocale),
413 mbInitializedBcp47( rLanguageTag.mbInitializedBcp47),
414 mbInitializedLocale( rLanguageTag.mbInitializedLocale),
415 mbInitializedLangID( rLanguageTag.mbInitializedLangID),
416 mbCachedLanguage( false),
417 mbCachedScript( false),
418 mbCachedCountry( false),
419 mbCachedVariants( false),
420 mbCachedGlibcString( false)
425 LanguageTagImpl::LanguageTagImpl( const LanguageTagImpl & rLanguageTagImpl )
427 maLocale( rLanguageTagImpl.maLocale),
428 maBcp47( rLanguageTagImpl.maBcp47),
429 maCachedLanguage( rLanguageTagImpl.maCachedLanguage),
430 maCachedScript( rLanguageTagImpl.maCachedScript),
431 maCachedCountry( rLanguageTagImpl.maCachedCountry),
432 maCachedVariants( rLanguageTagImpl.maCachedVariants),
433 maCachedGlibcString( rLanguageTagImpl.maCachedGlibcString),
434 mpImplLangtag( rLanguageTagImpl.mpImplLangtag ?
435 lt_tag_copy( rLanguageTagImpl.mpImplLangtag) : nullptr),
436 mnLangID( rLanguageTagImpl.mnLangID),
437 meScriptType( rLanguageTagImpl.meScriptType),
438 meIsValid( rLanguageTagImpl.meIsValid),
439 meIsIsoLocale( rLanguageTagImpl.meIsIsoLocale),
440 meIsIsoODF( rLanguageTagImpl.meIsIsoODF),
441 meIsLiblangtagNeeded( rLanguageTagImpl.meIsLiblangtagNeeded),
442 mbSystemLocale( rLanguageTagImpl.mbSystemLocale),
443 mbInitializedBcp47( rLanguageTagImpl.mbInitializedBcp47),
444 mbInitializedLocale( rLanguageTagImpl.mbInitializedLocale),
445 mbInitializedLangID( rLanguageTagImpl.mbInitializedLangID),
446 mbCachedLanguage( rLanguageTagImpl.mbCachedLanguage),
447 mbCachedScript( rLanguageTagImpl.mbCachedScript),
448 mbCachedCountry( rLanguageTagImpl.mbCachedCountry),
449 mbCachedVariants( rLanguageTagImpl.mbCachedVariants),
450 mbCachedGlibcString( rLanguageTagImpl.mbCachedGlibcString)
452 if (mpImplLangtag)
453 theDataRef().init();
457 LanguageTagImpl& LanguageTagImpl::operator=( const LanguageTagImpl & rLanguageTagImpl )
459 if (&rLanguageTagImpl == this)
460 return *this;
462 maLocale = rLanguageTagImpl.maLocale;
463 maBcp47 = rLanguageTagImpl.maBcp47;
464 maCachedLanguage = rLanguageTagImpl.maCachedLanguage;
465 maCachedScript = rLanguageTagImpl.maCachedScript;
466 maCachedCountry = rLanguageTagImpl.maCachedCountry;
467 maCachedVariants = rLanguageTagImpl.maCachedVariants;
468 maCachedGlibcString = rLanguageTagImpl.maCachedGlibcString;
469 lt_tag_t * oldTag = mpImplLangtag;
470 mpImplLangtag = rLanguageTagImpl.mpImplLangtag ?
471 lt_tag_copy( rLanguageTagImpl.mpImplLangtag) : nullptr;
472 lt_tag_unref(oldTag);
473 mnLangID = rLanguageTagImpl.mnLangID;
474 meScriptType = rLanguageTagImpl.meScriptType;
475 meIsValid = rLanguageTagImpl.meIsValid;
476 meIsIsoLocale = rLanguageTagImpl.meIsIsoLocale;
477 meIsIsoODF = rLanguageTagImpl.meIsIsoODF;
478 meIsLiblangtagNeeded= rLanguageTagImpl.meIsLiblangtagNeeded;
479 mbSystemLocale = rLanguageTagImpl.mbSystemLocale;
480 mbInitializedBcp47 = rLanguageTagImpl.mbInitializedBcp47;
481 mbInitializedLocale = rLanguageTagImpl.mbInitializedLocale;
482 mbInitializedLangID = rLanguageTagImpl.mbInitializedLangID;
483 mbCachedLanguage = rLanguageTagImpl.mbCachedLanguage;
484 mbCachedScript = rLanguageTagImpl.mbCachedScript;
485 mbCachedCountry = rLanguageTagImpl.mbCachedCountry;
486 mbCachedVariants = rLanguageTagImpl.mbCachedVariants;
487 mbCachedGlibcString = rLanguageTagImpl.mbCachedGlibcString;
488 if (mpImplLangtag && !oldTag)
489 theDataRef().init();
490 return *this;
494 LanguageTagImpl::~LanguageTagImpl()
496 if (mpImplLangtag)
498 lt_tag_unref( mpImplLangtag);
503 LanguageTag::LanguageTag( const OUString & rBcp47LanguageTag, bool bCanonicalize )
505 maBcp47( rBcp47LanguageTag),
506 mnLangID( LANGUAGE_DONTKNOW),
507 mbSystemLocale( rBcp47LanguageTag.isEmpty()),
508 mbInitializedBcp47( !mbSystemLocale),
509 mbInitializedLocale( false),
510 mbInitializedLangID( false),
511 mbIsFallback( false)
513 if (bCanonicalize)
515 getImpl()->canonicalize();
516 // Registration itself may already have canonicalized, so do an
517 // unconditional sync.
518 syncFromImpl();
524 LanguageTag::LanguageTag( const css::lang::Locale & rLocale )
526 maLocale( rLocale),
527 mnLangID( LANGUAGE_DONTKNOW),
528 mbSystemLocale( rLocale.Language.isEmpty()),
529 mbInitializedBcp47( false),
530 mbInitializedLocale( false), // we do not know which mess we got passed in
531 mbInitializedLangID( false),
532 mbIsFallback( false)
534 handleVendorVariant( maLocale);
538 LanguageTag::LanguageTag( LanguageType nLanguage )
540 mnLangID( nLanguage),
541 mbSystemLocale( nLanguage == LANGUAGE_SYSTEM),
542 mbInitializedBcp47( false),
543 mbInitializedLocale( false),
544 mbInitializedLangID( !mbSystemLocale),
545 mbIsFallback( false)
550 LanguageTag::LanguageTag( const OUString& rBcp47, const OUString& rLanguage,
551 std::u16string_view rScript, const OUString& rCountry )
553 maBcp47( rBcp47),
554 mnLangID( LANGUAGE_DONTKNOW),
555 mbSystemLocale( rBcp47.isEmpty() && rLanguage.isEmpty()),
556 mbInitializedBcp47( !rBcp47.isEmpty()),
557 mbInitializedLocale( false),
558 mbInitializedLangID( false),
559 mbIsFallback( false)
561 if (mbSystemLocale || mbInitializedBcp47)
562 return;
564 if (rScript.empty())
566 maBcp47 = rLanguage + "-" + rCountry;
567 mbInitializedBcp47 = true;
568 maLocale.Language = rLanguage;
569 maLocale.Country = rCountry;
570 mbInitializedLocale = true;
572 else
574 if (rCountry.isEmpty())
575 maBcp47 = rLanguage + "-" + rScript;
576 else
577 maBcp47 = rLanguage + "-" + rScript + "-" + rCountry;
578 mbInitializedBcp47 = true;
579 maLocale.Language = I18NLANGTAG_QLT;
580 maLocale.Country = rCountry;
581 maLocale.Variant = maBcp47;
582 mbInitializedLocale = true;
587 LanguageTag::LanguageTag( const rtl_Locale & rLocale )
589 maLocale( rLocale.Language, rLocale.Country, rLocale.Variant),
590 mnLangID( LANGUAGE_DONTKNOW),
591 mbSystemLocale( maLocale.Language.isEmpty()),
592 mbInitializedBcp47( false),
593 mbInitializedLocale( !mbSystemLocale),
594 mbInitializedLangID( false),
595 mbIsFallback( false)
597 convertFromRtlLocale();
600 LanguageTag::~LanguageTag() {}
602 LanguageTag::ImplPtr LanguageTagImpl::registerOnTheFly( LanguageType nRegisterID )
604 LanguageTag::ImplPtr pImpl;
606 if (!mbInitializedBcp47)
608 if (mbInitializedLocale)
610 maBcp47 = LanguageTagImpl::convertToBcp47( maLocale);
611 mbInitializedBcp47 = !maBcp47.isEmpty();
614 if (maBcp47.isEmpty())
616 SAL_WARN( "i18nlangtag", "LanguageTagImpl::registerOnTheFly: no Bcp47 string, no registering");
617 return pImpl;
620 std::unique_lock aGuard( theMutex());
622 MapBcp47& rMapBcp47 = theMapBcp47();
623 MapBcp47::const_iterator it( rMapBcp47.find( maBcp47));
624 bool bOtherImpl = false;
625 if (it != rMapBcp47.end())
627 SAL_INFO( "i18nlangtag", "LanguageTag::registerOnTheFly: found impl for '" << maBcp47 << "'");
628 pImpl = (*it).second;
629 if (pImpl.get() != this)
631 // Could happen for example if during registerImpl() the tag was
632 // changed via canonicalize() and the result was already present in
633 // the map before, for example 'bn-Beng' => 'bn'. This specific
634 // case is now taken care of in registerImpl() and doesn't reach
635 // here. However, use the already existing impl if it matches.
636 SAL_WARN( "i18nlangtag", "LanguageTag::registerOnTheFly: using other impl for this '" << maBcp47 << "'");
637 *this = *pImpl; // ensure consistency
638 bOtherImpl = true;
641 else
643 SAL_INFO( "i18nlangtag", "LanguageTag::registerOnTheFly: new impl for '" << maBcp47 << "'");
644 pImpl = std::make_shared<LanguageTagImpl>( *this);
645 rMapBcp47.insert( ::std::make_pair( maBcp47, pImpl));
648 if (!bOtherImpl || !pImpl->mbInitializedLangID)
650 if (nRegisterID == LanguageType(0) || nRegisterID == LANGUAGE_DONTKNOW)
651 nRegisterID = getNextOnTheFlyLanguage();
652 else
654 // Accept a suggested ID only if it is not mapped yet to something
655 // different, otherwise we would end up with ambiguous assignments
656 // of different language tags, for example for the same primary
657 // LangID with "no", "nb" and "nn".
658 const MapLangID& rMapLangID = theMapLangID();
659 MapLangID::const_iterator itID( rMapLangID.find( nRegisterID));
660 if (itID != rMapLangID.end())
662 if ((*itID).second->maBcp47 != maBcp47)
664 SAL_INFO( "i18nlangtag", "LanguageTag::registerOnTheFly: not using suggested 0x"
665 << ::std::hex << nRegisterID << " for '" << maBcp47 << "' have '"
666 << (*itID).second->maBcp47 << "'");
667 nRegisterID = getNextOnTheFlyLanguage();
669 else
671 SAL_WARN( "i18nlangtag", "LanguageTag::registerOnTheFly: suggested 0x"
672 << ::std::hex << nRegisterID << " for '" << maBcp47 << "' already registered");
676 if (!nRegisterID)
678 // out of IDs, nothing to register
679 return pImpl;
681 pImpl->mnLangID = nRegisterID;
682 pImpl->mbInitializedLangID = true;
683 if (pImpl.get() != this)
685 mnLangID = nRegisterID;
686 mbInitializedLangID = true;
690 ::std::pair< MapLangID::const_iterator, bool > res(
691 theMapLangID().insert( ::std::make_pair( pImpl->mnLangID, pImpl)));
692 if (res.second)
694 SAL_INFO( "i18nlangtag", "LanguageTag::registerOnTheFly: cross-inserted 0x"
695 << ::std::hex << pImpl->mnLangID << " for '" << maBcp47 << "'");
697 else
699 SAL_WARN( "i18nlangtag", "LanguageTag::registerOnTheFly: not cross-inserted 0x"
700 << ::std::hex << pImpl->mnLangID << " for '" << maBcp47 << "' have '"
701 << (*res.first).second->maBcp47 << "'");
704 return pImpl;
708 LanguageTag::ScriptType LanguageTag::getOnTheFlyScriptType( LanguageType nRegisterID )
710 const MapLangID& rMapLangID = theMapLangID();
711 MapLangID::const_iterator itID( rMapLangID.find( nRegisterID));
712 if (itID != rMapLangID.end())
713 return (*itID).second->getScriptType();
714 else
715 return ScriptType::UNKNOWN;
719 // static
720 void LanguageTag::setConfiguredSystemLanguage( LanguageType nLang )
722 if (nLang == LANGUAGE_DONTKNOW || nLang == LANGUAGE_SYSTEM)
724 SAL_WARN( "i18nlangtag",
725 "LanguageTag::setConfiguredSystemLanguage: refusing to set unresolved system locale 0x" <<
726 ::std::hex << nLang);
727 return;
729 SAL_INFO( "i18nlangtag", "LanguageTag::setConfiguredSystemLanguage: setting to 0x" << ::std::hex << nLang);
730 MsLangId::LanguageTagAccess::setConfiguredSystemLanguage( nLang);
731 // Reset system locale to none and let registerImpl() do the rest to
732 // initialize a new one.
733 theSystemLocale().reset();
734 LanguageTag aLanguageTag( LANGUAGE_SYSTEM);
735 aLanguageTag.registerImpl();
738 static bool lt_tag_parse_disabled = false;
740 // static
741 void LanguageTag::disable_lt_tag_parse()
743 lt_tag_parse_disabled = true;
746 static bool lcl_isKnownOnTheFlyID( LanguageType nLang )
748 return nLang != LANGUAGE_DONTKNOW && nLang != LANGUAGE_SYSTEM &&
749 (LanguageTag::isOnTheFlyID( nLang) || (nLang == MsLangId::getPrimaryLanguage( nLang)));
753 LanguageTag::ImplPtr LanguageTag::registerImpl() const
755 // XXX NOTE: Do not use non-static LanguageTag::convert...() member methods
756 // here as they access getImpl() and syncFromImpl() and would lead to
757 // recursion. Also do not use the static LanguageTag::convertTo...()
758 // methods as they may create temporary LanguageTag instances. Only
759 // LanguageTagImpl::convertToBcp47(Locale) is ok.
761 ImplPtr pImpl;
763 #if OSL_DEBUG_LEVEL > 0
764 static std::atomic_int nCalls = 0;
765 ++nCalls;
766 SAL_INFO( "i18nlangtag", "LanguageTag::registerImpl: " << nCalls << " calls");
767 #endif
769 // Do not register unresolved system locale, also force LangID if system
770 // and take the system locale shortcut if possible.
771 if (mbSystemLocale)
773 pImpl = theSystemLocale();
774 if (pImpl)
776 #if OSL_DEBUG_LEVEL > 0
777 static size_t nCallsSystem = 0;
778 ++nCallsSystem;
779 SAL_INFO( "i18nlangtag", "LanguageTag::registerImpl: " << nCallsSystem << " system calls");
780 #endif
781 return pImpl;
783 if (!mbInitializedLangID)
785 mnLangID = MsLangId::getRealLanguage( LANGUAGE_SYSTEM);
786 mbInitializedLangID = (mnLangID != LANGUAGE_SYSTEM);
787 SAL_WARN_IF( !mbInitializedLangID, "i18nlangtag", "LanguageTag::registerImpl: can't resolve system!");
791 if (mbInitializedLangID)
793 if (mnLangID == LANGUAGE_DONTKNOW)
795 static LanguageTag::ImplPtr theDontKnow;
796 // Heavy usage of LANGUAGE_DONTKNOW, make it an own Impl for all the
797 // conversion attempts. At the same time provide a central breakpoint
798 // to inspect such places.
799 if (!theDontKnow)
800 theDontKnow = std::make_shared<LanguageTagImpl>( *this);
801 pImpl = theDontKnow;
802 #if OSL_DEBUG_LEVEL > 0
803 static size_t nCallsDontKnow = 0;
804 ++nCallsDontKnow;
805 SAL_INFO( "i18nlangtag", "LanguageTag::registerImpl: " << nCallsDontKnow << " DontKnow calls");
806 #endif
807 return pImpl;
809 else
811 // A great share are calls for a system equal locale.
812 pImpl = theSystemLocale();
813 if (pImpl && pImpl->mnLangID == mnLangID)
815 #if OSL_DEBUG_LEVEL > 0
816 static size_t nCallsSystemEqual = 0;
817 ++nCallsSystemEqual;
818 SAL_INFO( "i18nlangtag", "LanguageTag::registerImpl: " << nCallsSystemEqual
819 << " system equal LangID calls");
820 #endif
821 return pImpl;
826 // Force Bcp47 if not LangID.
827 if (!mbInitializedLangID && !mbInitializedBcp47)
829 // The one central point to set mbInitializedLocale=true if a
830 // LanguageTag was initialized with a Locale. We will now convert and
831 // possibly later resolve it.
832 if (!mbInitializedLocale && (mbSystemLocale || !maLocale.Language.isEmpty()))
833 mbInitializedLocale = true;
834 SAL_WARN_IF( !mbInitializedLocale, "i18nlangtag", "LanguageTag::registerImpl: still not mbInitializedLocale");
836 maBcp47 = LanguageTagImpl::convertToBcp47( maLocale);
837 mbInitializedBcp47 = !maBcp47.isEmpty();
840 if (mbInitializedBcp47)
842 // A great share are calls for a system equal locale.
843 pImpl = theSystemLocale();
844 if (pImpl && pImpl->maBcp47 == maBcp47)
846 #if OSL_DEBUG_LEVEL > 0
847 static size_t nCallsSystemEqual = 0;
848 ++nCallsSystemEqual;
849 SAL_INFO( "i18nlangtag", "LanguageTag::registerImpl: " << nCallsSystemEqual << " system equal BCP47 calls");
850 #endif
851 return pImpl;
855 #if OSL_DEBUG_LEVEL > 0
856 static size_t nCallsNonSystem = 0;
857 ++nCallsNonSystem;
858 SAL_INFO( "i18nlangtag", "LanguageTag::registerImpl: " << nCallsNonSystem << " non-system calls");
859 #endif
861 std::unique_lock aGuard( theMutex());
863 #if OSL_DEBUG_LEVEL > 0
864 static long nRunning = 0;
865 // Entering twice here is ok, which is needed for fallback init in
866 // getKnowns() in canonicalize() via pImpl->convertBcp47ToLocale() below,
867 // everything else is suspicious.
868 SAL_WARN_IF( nRunning > 1, "i18nlangtag", "LanguageTag::registerImpl: re-entered for '"
869 << maBcp47 << "' 0x" << ::std::hex << mnLangID );
870 struct Runner { Runner() { ++nRunning; } ~Runner() { --nRunning; } } aRunner;
871 #endif
873 // Prefer LangID map as find+insert needs less comparison work.
874 if (mbInitializedLangID)
876 MapLangID& rMap = theMapLangID();
877 MapLangID::const_iterator it( rMap.find( mnLangID));
878 if (it != rMap.end())
880 SAL_INFO( "i18nlangtag", "LanguageTag::registerImpl: found impl for 0x" << ::std::hex << mnLangID);
881 pImpl = (*it).second;
883 else
885 SAL_INFO( "i18nlangtag", "LanguageTag::registerImpl: new impl for 0x" << ::std::hex << mnLangID);
886 pImpl = std::make_shared<LanguageTagImpl>( *this);
887 rMap.insert( ::std::make_pair( mnLangID, pImpl));
888 // Try round-trip.
889 if (!pImpl->mbInitializedLocale)
890 pImpl->convertLangToLocale();
891 LanguageType nLang = MsLangId::Conversion::convertLocaleToLanguage( pImpl->maLocale);
892 // If round-trip is identical cross-insert to Bcp47 map.
893 if (nLang == pImpl->mnLangID)
895 if (!pImpl->mbInitializedBcp47)
896 pImpl->convertLocaleToBcp47();
897 ::std::pair< MapBcp47::const_iterator, bool > res(
898 theMapBcp47().insert( ::std::make_pair( pImpl->maBcp47, pImpl)));
899 if (res.second)
901 SAL_INFO( "i18nlangtag", "LanguageTag::registerImpl: cross-inserted '" << pImpl->maBcp47 << "' for 0x" << ::std::hex << mnLangID);
903 else
905 SAL_INFO( "i18nlangtag", "LanguageTag::registerImpl: not cross-inserted '" << pImpl->maBcp47 << "' for 0x" << ::std::hex << mnLangID << " have 0x"
906 << ::std::hex << (*res.first).second->mnLangID);
909 else
911 if (!pImpl->mbInitializedBcp47)
912 pImpl->convertLocaleToBcp47();
913 SAL_INFO( "i18nlangtag", "LanguageTag::registerImpl: not cross-inserted '" << pImpl->maBcp47 << "' for 0x" << ::std::hex << mnLangID << " round-trip to 0x" << ::std::hex << nLang);
917 else if (!maBcp47.isEmpty())
919 MapBcp47& rMap = theMapBcp47();
920 MapBcp47::const_iterator it( rMap.find( maBcp47));
921 if (it != rMap.end())
923 SAL_INFO( "i18nlangtag", "LanguageTag::registerImpl: found impl for '" << maBcp47 << "'");
924 pImpl = (*it).second;
926 else
928 SAL_INFO( "i18nlangtag", "LanguageTag::registerImpl: new impl for '" << maBcp47 << "'");
929 pImpl = std::make_shared<LanguageTagImpl>( *this);
930 ::std::pair< MapBcp47::iterator, bool > insOrig( rMap.insert( ::std::make_pair( maBcp47, pImpl)));
931 // If changed after canonicalize() also add the resulting tag to
932 // the map.
933 if (pImpl->synCanonicalize())
935 SAL_INFO( "i18nlangtag", "LanguageTag::registerImpl: canonicalized to '" << pImpl->maBcp47 << "'");
936 ::std::pair< MapBcp47::const_iterator, bool > insCanon(
937 rMap.insert( ::std::make_pair( pImpl->maBcp47, pImpl)));
938 SAL_INFO( "i18nlangtag", "LanguageTag::registerImpl: " << (insCanon.second ? "" : "not ")
939 << "inserted '" << pImpl->maBcp47 << "'");
940 // If the canonicalized tag already existed (was not inserted)
941 // and impls are different, make this impl that impl and skip
942 // the rest if that LangID is present as well. The existing
943 // entry may or may not be different, it may even be strictly
944 // identical to this if it differs only in case (e.g. ko-kr =>
945 // ko-KR) which was corrected in canonicalize() hence also in
946 // the map entry but comparison is case insensitive and found
947 // it again.
948 if (!insCanon.second && (*insCanon.first).second != pImpl)
950 (*insOrig.first).second = pImpl = (*insCanon.first).second;
951 SAL_INFO( "i18nlangtag", "LanguageTag::registerImpl: share impl with 0x"
952 << ::std::hex << pImpl->mnLangID);
955 if (!pImpl->mbInitializedLangID)
957 // Try round-trip Bcp47->Locale->LangID->Locale->Bcp47.
958 if (!pImpl->mbInitializedLocale)
959 pImpl->convertBcp47ToLocale();
960 if (!pImpl->mbInitializedLangID)
961 pImpl->convertLocaleToLang( true);
962 // Unconditionally insert (round-trip is possible) for
963 // on-the-fly IDs and (generated or not) suggested IDs.
964 bool bInsert = lcl_isKnownOnTheFlyID( pImpl->mnLangID);
965 OUString aBcp47;
966 if (!bInsert)
968 if (pImpl->mnLangID != LANGUAGE_DONTKNOW)
970 // May have involved canonicalize(), so compare with
971 // pImpl->maBcp47 instead of maBcp47!
972 aBcp47 = LanguageTagImpl::convertToBcp47(
973 MsLangId::Conversion::convertLanguageToLocale( pImpl->mnLangID, true));
974 bInsert = (aBcp47 == pImpl->maBcp47);
977 // If round-trip is identical cross-insert to Bcp47 map.
978 if (bInsert)
980 ::std::pair< MapLangID::const_iterator, bool > res(
981 theMapLangID().insert( ::std::make_pair( pImpl->mnLangID, pImpl)));
982 if (res.second)
984 SAL_INFO( "i18nlangtag", "LanguageTag::registerImpl: cross-inserted 0x"
985 << ::std::hex << pImpl->mnLangID << " for '" << maBcp47 << "'");
987 else
989 SAL_INFO( "i18nlangtag", "LanguageTag::registerImpl: not cross-inserted 0x"
990 << ::std::hex << pImpl->mnLangID << " for '" << maBcp47 << "' have '"
991 << (*res.first).second->maBcp47 << "'");
994 else
996 SAL_INFO( "i18nlangtag", "LanguageTag::registerImpl: not cross-inserted 0x"
997 << ::std::hex << pImpl->mnLangID << " for '" << maBcp47 << "' round-trip to '"
998 << aBcp47 << "'");
1003 else
1005 SAL_WARN( "i18nlangtag", "LanguageTag::registerImpl: can't register for 0x" << ::std::hex << mnLangID );
1006 pImpl = std::make_shared<LanguageTagImpl>( *this);
1009 // If we reach here for mbSystemLocale we didn't have theSystemLocale
1010 // above, so add it.
1011 if (mbSystemLocale && mbInitializedLangID)
1013 theSystemLocale() = pImpl;
1014 SAL_INFO( "i18nlangtag", "LanguageTag::registerImpl: added system locale 0x"
1015 << ::std::hex << pImpl->mnLangID << " '" << pImpl->maBcp47 << "'");
1018 return pImpl;
1022 LanguageTagImpl const * LanguageTag::getImpl() const
1024 if (!mpImpl)
1026 mpImpl = registerImpl();
1027 syncVarsFromRawImpl();
1029 return mpImpl.get();
1033 LanguageTagImpl * LanguageTag::getImpl()
1035 if (!mpImpl)
1037 mpImpl = registerImpl();
1038 syncVarsFromRawImpl();
1040 return mpImpl.get();
1044 void LanguageTag::resetVars()
1046 mpImpl.reset();
1047 maLocale = lang::Locale();
1048 maBcp47.clear();
1049 mnLangID = LANGUAGE_SYSTEM;
1050 mbSystemLocale = true;
1051 mbInitializedBcp47 = false;
1052 mbInitializedLocale = false;
1053 mbInitializedLangID = false;
1054 mbIsFallback = false;
1058 LanguageTag & LanguageTag::reset( const OUString & rBcp47LanguageTag )
1060 resetVars();
1061 maBcp47 = rBcp47LanguageTag;
1062 mbSystemLocale = rBcp47LanguageTag.isEmpty();
1063 mbInitializedBcp47 = !mbSystemLocale;
1065 return *this;
1069 LanguageTag & LanguageTag::reset( const css::lang::Locale & rLocale )
1071 resetVars();
1072 maLocale = rLocale;
1073 mbSystemLocale = rLocale.Language.isEmpty();
1074 mbInitializedLocale = !mbSystemLocale;
1075 handleVendorVariant( maLocale);
1076 return *this;
1080 LanguageTag & LanguageTag::reset( LanguageType nLanguage )
1082 resetVars();
1083 mnLangID = nLanguage;
1084 mbSystemLocale = nLanguage == LANGUAGE_SYSTEM;
1085 mbInitializedLangID = !mbSystemLocale;
1086 return *this;
1090 bool LanguageTagImpl::canonicalize()
1092 #ifdef erDEBUG
1093 // dump once
1094 struct dumper
1096 lt_tag_t** mpp;
1097 explicit dumper( lt_tag_t** pp ) : mpp( *pp ? NULL : pp) {}
1098 ~dumper() { if (mpp && *mpp) lt_tag_dump( *mpp); }
1100 dumper aDumper( &mpImplLangtag);
1101 #endif
1103 bool bChanged = false;
1105 // Side effect: have maBcp47 in any case, resolved system.
1106 // Some methods calling canonicalize() (or not calling it due to
1107 // meIsLiblangtagNeeded==DECISION_NO) rely on this! Hence do not set
1108 // meIsLiblangtagNeeded anywhere else than hereafter.
1109 getBcp47();
1111 // The simple cases and known locales don't need liblangtag processing,
1112 // which also avoids loading liblangtag data on startup.
1113 if (meIsLiblangtagNeeded == DECISION_DONTKNOW)
1115 bool bTemporaryLocale = false;
1116 bool bTemporaryLangID = false;
1117 if (!mbInitializedLocale && !mbInitializedLangID)
1119 if (mbSystemLocale)
1121 mnLangID = MsLangId::getRealLanguage( LANGUAGE_SYSTEM);
1122 mbInitializedLangID = true;
1124 else
1126 // Now this is getting funny... we only have some BCP47 string
1127 // and want to determine if parsing it would be possible
1128 // without using liblangtag just to see if it is a simple known
1129 // locale or could fall back to one.
1130 OUString aLanguage, aScript, aCountry, aRegion, aVariants;
1131 Extraction eExt = simpleExtract( maBcp47, aLanguage, aScript, aCountry, aRegion, aVariants);
1132 if (eExt != EXTRACTED_NONE)
1134 if (eExt == EXTRACTED_LSC || eExt == EXTRACTED_LV || eExt == EXTRACTED_LR)
1136 // Rebuild bcp47 with proper casing of tags.
1137 OUStringBuffer aBuf( aLanguage.getLength() + 1 + aScript.getLength() +
1138 1 + aCountry.getLength() + 1 + aRegion.getLength() + 1 + aVariants.getLength());
1139 aBuf.append( aLanguage);
1140 if (!aScript.isEmpty())
1141 aBuf.append("-" + aScript);
1142 if (!aCountry.isEmpty())
1143 aBuf.append("-" + aCountry);
1144 if (!aRegion.isEmpty())
1145 aBuf.append("-" + aRegion);
1146 if (!aVariants.isEmpty())
1147 aBuf.append("-" + aVariants);
1148 OUString aStr( aBuf.makeStringAndClear());
1150 if (maBcp47 != aStr)
1152 maBcp47 = aStr;
1153 bChanged = true;
1156 if (eExt == EXTRACTED_LSC && aScript.isEmpty())
1158 maLocale.Language = aLanguage;
1159 maLocale.Country = aCountry;
1161 else if (eExt == EXTRACTED_C_LOCALE)
1163 maLocale.Language = aLanguage;
1164 maLocale.Country = aCountry;
1166 else
1168 maLocale.Language = I18NLANGTAG_QLT;
1169 maLocale.Country = aCountry;
1170 maLocale.Variant = maBcp47;
1172 bTemporaryLocale = mbInitializedLocale = true;
1176 if (mbInitializedLangID && !mbInitializedLocale)
1178 // Do not call getLocale() here because that prefers
1179 // convertBcp47ToLocale() which would end up in recursion via
1180 // isIsoLocale()!
1182 // Prepare to verify that we have a known locale, not just an
1183 // arbitrary MS-LangID.
1184 convertLangToLocale();
1186 if (mbInitializedLocale)
1188 if (!mbInitializedLangID)
1190 if (convertLocaleToLang( false))
1191 bChanged = true;
1192 if (bTemporaryLocale || mnLangID == LANGUAGE_DONTKNOW)
1193 bTemporaryLangID = true;
1195 if (mnLangID != LANGUAGE_DONTKNOW && mnLangID != LANGUAGE_SYSTEM)
1196 meIsLiblangtagNeeded = DECISION_NO; // known locale
1197 else
1199 const KnownTagSet& rKnowns = getKnowns();
1200 if (rKnowns.find( maBcp47) != rKnowns.end())
1201 meIsLiblangtagNeeded = DECISION_NO; // known fallback
1203 // We may have an internal override "canonicalization".
1204 lang::Locale aNew( MsLangId::Conversion::getOverride( maLocale));
1205 if (!aNew.Language.isEmpty() &&
1206 (aNew.Language != maLocale.Language ||
1207 aNew.Country != maLocale.Country ||
1208 aNew.Variant != maLocale.Variant))
1210 maBcp47 = LanguageTagImpl::convertToBcp47( aNew);
1211 bChanged = true;
1212 meIsIsoLocale = DECISION_DONTKNOW;
1213 meIsIsoODF = DECISION_DONTKNOW;
1214 meIsLiblangtagNeeded = DECISION_NO; // known locale
1217 if (bTemporaryLocale)
1219 mbInitializedLocale = false;
1220 maLocale = lang::Locale();
1222 if (bTemporaryLangID)
1224 mbInitializedLangID = false;
1225 mnLangID = LANGUAGE_DONTKNOW;
1228 if (meIsLiblangtagNeeded == DECISION_NO)
1230 meIsValid = DECISION_YES; // really, known must be valid ...
1231 return bChanged; // that's it
1234 meIsLiblangtagNeeded = DECISION_YES;
1235 SAL_INFO( "i18nlangtag", "LanguageTagImpl::canonicalize: using liblangtag for '" << maBcp47 << "'");
1237 if (!mpImplLangtag)
1239 theDataRef().init();
1240 mpImplLangtag = lt_tag_new();
1243 myLtError aError;
1245 if (!lt_tag_parse_disabled && lt_tag_parse(mpImplLangtag, OUStringToOString(maBcp47, RTL_TEXTENCODING_UTF8).getStr(), &aError.p))
1247 if (aError.p)
1249 SAL_WARN("i18nlangtag", "LanguageTagImpl::canonicalize: could not parse '" << maBcp47 << "'");
1251 else
1253 char* pTag = lt_tag_canonicalize(mpImplLangtag, &aError.p);
1254 SAL_WARN_IF(!pTag, "i18nlangtag", "LanguageTagImpl::canonicalize: could not canonicalize '" << maBcp47 << "'");
1255 if (pTag)
1257 OUString aNew(OUString::createFromAscii(pTag));
1258 // Make the lt_tag_t follow the new string if different, which
1259 // removes default script and such.
1260 if (maBcp47 != aNew)
1262 maBcp47 = aNew;
1263 bChanged = true;
1264 meIsIsoLocale = DECISION_DONTKNOW;
1265 meIsIsoODF = DECISION_DONTKNOW;
1266 if (!lt_tag_parse(mpImplLangtag, pTag, &aError.p))
1268 SAL_WARN("i18nlangtag", "LanguageTagImpl::canonicalize: could not reparse '"
1269 << maBcp47 << "'");
1270 free(pTag);
1271 meIsValid = DECISION_NO;
1272 return bChanged;
1275 free(pTag);
1276 meIsValid = DECISION_YES;
1277 return bChanged;
1281 else
1283 SAL_INFO( "i18nlangtag", "LanguageTagImpl::canonicalize: could not parse '" << maBcp47 << "'");
1285 meIsValid = DECISION_NO;
1286 return bChanged;
1290 bool LanguageTagImpl::synCanonicalize()
1292 bool bChanged = false;
1293 if (meIsLiblangtagNeeded != DECISION_NO && !mpImplLangtag)
1295 bChanged = canonicalize();
1296 if (bChanged)
1298 if (mbInitializedLocale)
1299 convertBcp47ToLocale();
1300 if (mbInitializedLangID)
1301 convertBcp47ToLang();
1304 return bChanged;
1308 void LanguageTag::syncFromImpl() const
1310 const LanguageTagImpl* pImpl = getImpl();
1311 bool bRegister = ((mbInitializedBcp47 && maBcp47 != pImpl->maBcp47) ||
1312 (mbInitializedLangID && mnLangID != pImpl->mnLangID));
1313 SAL_INFO_IF( bRegister, "i18nlangtag",
1314 "LanguageTag::syncFromImpl: re-registering, '" << pImpl->maBcp47 << "' vs '" << maBcp47 <<
1315 " and 0x" << ::std::hex << pImpl->mnLangID << " vs 0x" << ::std::hex << mnLangID);
1316 syncVarsFromRawImpl();
1317 if (bRegister)
1318 mpImpl = registerImpl();
1322 void LanguageTag::syncVarsFromImpl() const
1324 if (!mpImpl)
1325 getImpl(); // with side effect syncVarsFromRawImpl()
1326 else
1327 syncVarsFromRawImpl();
1331 void LanguageTag::syncVarsFromRawImpl() const
1333 // Do not use getImpl() here.
1334 LanguageTagImpl* pImpl = mpImpl.get();
1335 if (!pImpl)
1336 return;
1338 // Obviously only mutable variables.
1339 mbInitializedBcp47 = pImpl->mbInitializedBcp47;
1340 maBcp47 = pImpl->maBcp47;
1341 mbInitializedLocale = pImpl->mbInitializedLocale;
1342 maLocale = pImpl->maLocale;
1343 mbInitializedLangID = pImpl->mbInitializedLangID;
1344 mnLangID = pImpl->mnLangID;
1348 bool LanguageTag::synCanonicalize()
1350 bool bChanged = getImpl()->synCanonicalize();
1351 if (bChanged)
1352 syncFromImpl();
1353 return bChanged;
1357 void LanguageTagImpl::convertLocaleToBcp47() const
1359 if (mbSystemLocale && !mbInitializedLocale)
1360 convertLangToLocale();
1362 if (maLocale.Language.isEmpty())
1364 // Do not call LanguageTag::convertToBcp47(Locale) that for an empty
1365 // locale via LanguageTag::convertToBcp47(LanguageType) and
1366 // LanguageTag::convertToLocale(LanguageType) would instantiate another
1367 // LanguageTag.
1368 maLocale = MsLangId::Conversion::convertLanguageToLocale( LANGUAGE_SYSTEM, false);
1370 if (maLocale.Language.isEmpty())
1372 maBcp47.clear(); // bad luck
1374 else if (maLocale.Language == I18NLANGTAG_QLT)
1376 maBcp47 = maLocale.Variant;
1377 meIsIsoLocale = DECISION_NO;
1379 else
1381 maBcp47 = LanguageTag::convertToBcp47( maLocale );
1383 mbInitializedBcp47 = true;
1387 bool LanguageTagImpl::convertLocaleToLang( bool bAllowOnTheFlyID )
1389 bool bRemapped = false;
1390 if (mbSystemLocale)
1392 mnLangID = MsLangId::getRealLanguage( LANGUAGE_SYSTEM);
1394 else
1396 mnLangID = MsLangId::Conversion::convertLocaleToLanguage( maLocale);
1397 if (mnLangID == LANGUAGE_DONTKNOW)
1399 // convertLocaleToLanguage() only searches in ISO and private
1400 // definitions, search in remaining definitions, i.e. for the "C"
1401 // locale and non-standard things like "sr-latin" or "german" to
1402 // resolve to a known locale, skipping ISO lll-CC that were already
1403 // searched.
1404 mnLangID = MsLangId::Conversion::convertIsoNamesToLanguage( maLocale.Language, maLocale.Country, true);
1405 if (mnLangID != LANGUAGE_DONTKNOW)
1407 // If one found, convert back and adapt Locale and Bcp47
1408 // strings so we have a matching entry.
1409 OUString aOrgBcp47( maBcp47);
1410 convertLangToLocale();
1411 convertLocaleToBcp47();
1412 bRemapped = (maBcp47 != aOrgBcp47);
1415 if (mnLangID == LANGUAGE_DONTKNOW && bAllowOnTheFlyID)
1417 if (isValidBcp47())
1419 // For language-only (including script) look if we know some
1420 // locale of that language and if so try to use the primary
1421 // language ID of that instead of generating an on-the-fly ID.
1422 if (getCountry().isEmpty() && isIsoODF())
1424 lang::Locale aLoc( MsLangId::Conversion::lookupFallbackLocale( maLocale));
1425 // 'en-US' is last resort, do not use except when looking
1426 // for 'en'.
1427 if (aLoc.Language != "en" || getLanguage() == "en")
1429 mnLangID = MsLangId::Conversion::convertLocaleToLanguage( aLoc);
1430 if (mnLangID != LANGUAGE_DONTKNOW)
1431 mnLangID = MsLangId::getPrimaryLanguage( mnLangID);
1434 registerOnTheFly( mnLangID);
1436 else
1438 SAL_WARN( "i18nlangtag", "LanguageTagImpl::convertLocaleToLang: with bAllowOnTheFlyID invalid '"
1439 << maBcp47 << "'");
1443 mbInitializedLangID = true;
1444 return bRemapped;
1448 void LanguageTag::convertLocaleToLang()
1450 getImpl()->convertLocaleToLang( true);
1451 syncFromImpl();
1455 void LanguageTagImpl::convertBcp47ToLocale()
1457 bool bIso = isIsoLocale();
1458 if (bIso)
1460 maLocale.Language = getLanguageFromLangtag();
1461 maLocale.Country = getRegionFromLangtag();
1462 maLocale.Variant.clear();
1464 else
1466 maLocale.Language = I18NLANGTAG_QLT;
1467 maLocale.Country = getCountry();
1468 maLocale.Variant = maBcp47;
1470 mbInitializedLocale = true;
1474 void LanguageTag::convertBcp47ToLocale()
1476 getImpl()->convertBcp47ToLocale();
1477 syncFromImpl();
1481 void LanguageTagImpl::convertBcp47ToLang()
1483 if (mbSystemLocale)
1485 mnLangID = MsLangId::getRealLanguage( LANGUAGE_SYSTEM);
1487 else
1489 if (!mbInitializedLocale)
1490 convertBcp47ToLocale();
1491 convertLocaleToLang( true);
1493 mbInitializedLangID = true;
1497 void LanguageTag::convertBcp47ToLang()
1499 getImpl()->convertBcp47ToLang();
1500 syncFromImpl();
1504 void LanguageTagImpl::convertLangToLocale() const
1506 if (mbSystemLocale && !mbInitializedLangID)
1508 mnLangID = MsLangId::getRealLanguage( LANGUAGE_SYSTEM);
1509 mbInitializedLangID = true;
1511 // Resolve system here! The original is remembered as mbSystemLocale.
1512 maLocale = MsLangId::Conversion::convertLanguageToLocale( mnLangID, false);
1513 mbInitializedLocale = true;
1517 void LanguageTag::convertLangToLocale() const
1519 getImpl()->convertLangToLocale();
1520 syncFromImpl();
1524 void LanguageTagImpl::convertLangToBcp47() const
1526 if (!mbInitializedLocale)
1527 convertLangToLocale();
1528 convertLocaleToBcp47();
1529 mbInitializedBcp47 = true;
1533 void LanguageTag::convertFromRtlLocale()
1535 // The rtl_Locale follows the Open Group Base Specification,
1536 // 8.2 Internationalization Variables
1537 // language[_territory][.codeset][@modifier]
1538 // On GNU/Linux systems usually being glibc locales.
1539 // sal/osl/unx/nlsupport.c _parse_locale() parses them into
1540 // Language: language 2 or 3 alpha code
1541 // Country: [territory] 2 alpha code
1542 // Variant: [.codeset][@modifier]
1543 // Variant effectively contains anything that follows the territory, not
1544 // looking for '.' dot delimiter or '@' modifier content.
1545 if (maLocale.Variant.isEmpty())
1546 return;
1548 OString aStr = OUStringToOString(maLocale.Language, RTL_TEXTENCODING_UTF8) + "_" + OUStringToOString(Concat2View(maLocale.Country + maLocale.Variant),
1549 RTL_TEXTENCODING_UTF8);
1550 /* FIXME: let liblangtag parse this entirely with
1551 * lt_tag_convert_from_locale() but that needs a patch to pass the
1552 * string. */
1553 #if 0
1554 myLtError aError;
1555 theDataRef::get().init();
1556 mpImplLangtag = lt_tag_convert_from_locale( aStr.getStr(), &aError.p);
1557 maBcp47 = OStringToOUString( lt_tag_get_string( mpImplLangtag), RTL_TEXTENCODING_UTF8);
1558 mbInitializedBcp47 = true;
1559 #else
1560 mnLangID = MsLangId::convertUnxByteStringToLanguage( aStr);
1561 if (mnLangID == LANGUAGE_DONTKNOW)
1563 SAL_WARN( "i18nlangtag", "LanguageTag(rtl_Locale) - unknown: " << aStr);
1564 mnLangID = LANGUAGE_ENGLISH_US; // we need _something_ here
1566 mbInitializedLangID = true;
1567 #endif
1568 maLocale = lang::Locale();
1569 mbInitializedLocale = false;
1573 const OUString & LanguageTagImpl::getBcp47() const
1575 if (!mbInitializedBcp47)
1577 if (mbInitializedLocale)
1578 convertLocaleToBcp47();
1579 else
1580 convertLangToBcp47();
1582 return maBcp47;
1586 const OUString & LanguageTag::getBcp47( bool bResolveSystem ) const
1588 static constexpr OUString theEmptyBcp47 = u""_ustr;
1590 if (!bResolveSystem && mbSystemLocale)
1591 return theEmptyBcp47;
1592 if (!mbInitializedBcp47)
1593 syncVarsFromImpl();
1594 if (!mbInitializedBcp47)
1596 getImpl()->getBcp47();
1597 syncFromImpl();
1599 return maBcp47;
1603 OUString LanguageTagImpl::getLanguageFromLangtag()
1605 OUString aLanguage;
1606 synCanonicalize();
1607 if (maBcp47.isEmpty())
1608 return aLanguage;
1609 if (mpImplLangtag)
1611 const lt_lang_t* pLangT = lt_tag_get_language( mpImplLangtag);
1612 SAL_WARN_IF( !pLangT, "i18nlangtag",
1613 "LanguageTag::getLanguageFromLangtag: pLangT==NULL for '" << maBcp47 << "'");
1614 if (!pLangT)
1615 return aLanguage;
1616 const char* pLang = lt_lang_get_tag( pLangT);
1617 SAL_WARN_IF( !pLang, "i18nlangtag",
1618 "LanguageTag::getLanguageFromLangtag: pLang==NULL for '" << maBcp47 << "'");
1619 if (pLang)
1620 aLanguage = OUString::createFromAscii( pLang);
1622 else
1624 if (mbCachedLanguage || cacheSimpleLSCV())
1625 aLanguage = maCachedLanguage;
1627 return aLanguage;
1631 OUString LanguageTagImpl::getScriptFromLangtag()
1633 OUString aScript;
1634 synCanonicalize();
1635 if (maBcp47.isEmpty())
1636 return aScript;
1637 if (mpImplLangtag)
1639 const lt_script_t* pScriptT = lt_tag_get_script( mpImplLangtag);
1640 // pScriptT==NULL is valid for default scripts
1641 if (!pScriptT)
1642 return aScript;
1643 const char* pScript = lt_script_get_tag( pScriptT);
1644 SAL_WARN_IF( !pScript, "i18nlangtag", "LanguageTag::getScriptFromLangtag: pScript==NULL");
1645 if (pScript)
1646 aScript = OUString::createFromAscii( pScript);
1648 else
1650 if (mbCachedScript || cacheSimpleLSCV())
1651 aScript = maCachedScript;
1653 return aScript;
1657 OUString LanguageTagImpl::getRegionFromLangtag()
1659 OUString aRegion;
1660 synCanonicalize();
1661 if (maBcp47.isEmpty())
1662 return aRegion;
1663 if (mpImplLangtag)
1665 const lt_region_t* pRegionT = lt_tag_get_region( mpImplLangtag);
1666 // pRegionT==NULL is valid for language only tags, rough check here
1667 // that does not take sophisticated tags into account that actually
1668 // should have a region, check for ll, lll, ll-Ssss and lll-Ssss so
1669 // that ll-CC and lll-CC actually fail.
1670 SAL_WARN_IF( !pRegionT &&
1671 maBcp47.getLength() != 2 && maBcp47.getLength() != 3 &&
1672 maBcp47.getLength() != 7 && maBcp47.getLength() != 8,
1673 "i18nlangtag", "LanguageTag::getRegionFromLangtag: pRegionT==NULL for '" << maBcp47 << "'");
1674 if (!pRegionT)
1675 return aRegion;
1676 const char* pRegion = lt_region_get_tag( pRegionT);
1677 SAL_WARN_IF( !pRegion, "i18nlangtag",
1678 "LanguageTag::getRegionFromLangtag: pRegion==NULL for'" << maBcp47 << "'");
1679 if (pRegion)
1680 aRegion = OUString::createFromAscii( pRegion);
1682 else
1684 if (mbCachedCountry || cacheSimpleLSCV())
1685 aRegion = maCachedCountry;
1687 return aRegion;
1691 OUString LanguageTagImpl::getVariantsFromLangtag()
1693 OUStringBuffer aVariants;
1694 synCanonicalize();
1695 if (maBcp47.isEmpty())
1696 return OUString();
1697 if (mpImplLangtag)
1699 const lt_list_t* pVariantsT = lt_tag_get_variants( mpImplLangtag);
1700 for (const lt_list_t* pE = pVariantsT; pE; pE = lt_list_next( pE))
1702 const lt_variant_t* pVariantT = static_cast<const lt_variant_t*>(lt_list_value( pE));
1703 if (pVariantT)
1705 const char* p = lt_variant_get_tag( pVariantT);
1706 if (p)
1708 if (!aVariants.isEmpty())
1709 aVariants.append("-");
1710 aVariants.appendAscii(p);
1715 else
1717 if (mbCachedVariants || cacheSimpleLSCV())
1718 aVariants = maCachedVariants;
1720 return aVariants.makeStringAndClear();
1724 const css::lang::Locale & LanguageTag::getLocale( bool bResolveSystem ) const
1726 // "static" to be returned as const reference to an empty locale.
1727 static lang::Locale theEmptyLocale;
1729 if (!bResolveSystem && mbSystemLocale)
1730 return theEmptyLocale;
1731 if (!mbInitializedLocale)
1732 syncVarsFromImpl();
1733 if (!mbInitializedLocale)
1735 if (mbInitializedBcp47)
1736 const_cast<LanguageTag*>(this)->convertBcp47ToLocale();
1737 else
1738 convertLangToLocale();
1740 return maLocale;
1744 LanguageType LanguageTag::getLanguageType( bool bResolveSystem ) const
1746 if (!bResolveSystem && mbSystemLocale)
1747 return LANGUAGE_SYSTEM;
1748 if (!mbInitializedLangID)
1749 syncVarsFromImpl();
1750 if (!mbInitializedLangID)
1752 if (mbInitializedBcp47)
1753 const_cast<LanguageTag*>(this)->convertBcp47ToLang();
1754 else
1756 const_cast<LanguageTag*>(this)->convertLocaleToLang();
1758 /* Resolve a locale only unknown due to some redundant information,
1759 * like 'de-Latn-DE' with script tag. Never call canonicalize()
1760 * from within convert...() methods due to possible recursion, so
1761 * do it here. */
1762 if ((!mbSystemLocale && mnLangID == LANGUAGE_SYSTEM) || mnLangID == LANGUAGE_DONTKNOW)
1763 const_cast<LanguageTag*>(this)->synCanonicalize();
1766 return mnLangID;
1770 void LanguageTag::getIsoLanguageScriptCountry( OUString& rLanguage, OUString& rScript, OUString& rCountry ) const
1772 // Calling isIsoODF() first is a predicate for getLanguage(), getScript()
1773 // and getCountry() to work correctly in this context.
1774 if (isIsoODF())
1776 rLanguage = getLanguage();
1777 rScript = getScript();
1778 rCountry = getCountry();
1780 else
1782 rLanguage = (LanguageTag::isIsoLanguage( getLanguage()) ? getLanguage() : OUString());
1783 rScript = (LanguageTag::isIsoScript( getScript()) ? getScript() : OUString());
1784 rCountry = (LanguageTag::isIsoCountry( getCountry()) ? getCountry() : OUString());
1789 namespace
1792 bool isLowerAscii( sal_Unicode c )
1794 return 'a' <= c && c <= 'z';
1797 bool isUpperAscii( sal_Unicode c )
1799 return 'A' <= c && c <= 'Z';
1805 // static
1806 bool LanguageTag::isIsoLanguage( const OUString& rLanguage )
1808 /* TODO: ignore case? For now let's see where rubbish is used. */
1809 bool b2chars = rLanguage.getLength() == 2;
1810 if ((b2chars || rLanguage.getLength() == 3) &&
1811 isLowerAscii( rLanguage[0]) && isLowerAscii( rLanguage[1]) &&
1812 (b2chars || isLowerAscii( rLanguage[2])))
1813 return true;
1814 SAL_WARN_IF( ((rLanguage.getLength() == 2 || rLanguage.getLength() == 3) &&
1815 (isUpperAscii( rLanguage[0]) || isUpperAscii( rLanguage[1]))) ||
1816 (rLanguage.getLength() == 3 && isUpperAscii( rLanguage[2])), "i18nlangtag",
1817 "LanguageTag::isIsoLanguage: rejecting upper case " << rLanguage);
1818 return false;
1822 // static
1823 bool LanguageTag::isIsoCountry( const OUString& rRegion )
1825 /* TODO: ignore case? For now let's see where rubbish is used. */
1826 if (rRegion.isEmpty() ||
1827 (rRegion.getLength() == 2 && isUpperAscii( rRegion[0]) && isUpperAscii( rRegion[1])))
1828 return true;
1829 SAL_WARN_IF( rRegion.getLength() == 2 && (isLowerAscii( rRegion[0]) || isLowerAscii( rRegion[1])),
1830 "i18nlangtag", "LanguageTag::isIsoCountry: rejecting lower case " << rRegion);
1831 return false;
1835 // static
1836 bool LanguageTag::isIsoScript( const OUString& rScript )
1838 /* TODO: ignore case? For now let's see where rubbish is used. */
1839 if (rScript.isEmpty() ||
1840 (rScript.getLength() == 4 &&
1841 isUpperAscii( rScript[0]) && isLowerAscii( rScript[1]) &&
1842 isLowerAscii( rScript[2]) && isLowerAscii( rScript[3])))
1843 return true;
1844 SAL_WARN_IF( rScript.getLength() == 4 &&
1845 (isLowerAscii( rScript[0]) || isUpperAscii( rScript[1]) ||
1846 isUpperAscii( rScript[2]) || isUpperAscii( rScript[3])),
1847 "i18nlangtag", "LanguageTag::isIsoScript: rejecting case mismatch " << rScript);
1848 return false;
1852 OUString const & LanguageTagImpl::getLanguage() const
1854 if (!mbCachedLanguage)
1856 maCachedLanguage = const_cast<LanguageTagImpl*>(this)->getLanguageFromLangtag();
1857 mbCachedLanguage = true;
1859 return maCachedLanguage;
1863 OUString LanguageTag::getLanguage() const
1865 LanguageTagImpl const* pImpl = getImpl();
1866 if (pImpl->mbCachedLanguage)
1867 return pImpl->maCachedLanguage;
1868 OUString aRet( pImpl->getLanguage());
1869 syncFromImpl();
1870 return aRet;
1874 OUString const & LanguageTagImpl::getScript() const
1876 if (!mbCachedScript)
1878 maCachedScript = const_cast<LanguageTagImpl*>(this)->getScriptFromLangtag();
1879 mbCachedScript = true;
1881 return maCachedScript;
1885 OUString LanguageTag::getScript() const
1887 LanguageTagImpl const* pImpl = getImpl();
1888 if (pImpl->mbCachedScript)
1889 return pImpl->maCachedScript;
1890 OUString aRet( pImpl->getScript());
1891 syncFromImpl();
1892 return aRet;
1896 OUString LanguageTag::getLanguageAndScript() const
1898 OUString aLanguageScript( getLanguage());
1899 OUString aScript( getScript());
1900 if (!aScript.isEmpty())
1902 aLanguageScript += "-" + aScript;
1904 return aLanguageScript;
1908 OUString const & LanguageTagImpl::getCountry() const
1910 if (!mbCachedCountry)
1912 maCachedCountry = const_cast<LanguageTagImpl*>(this)->getRegionFromLangtag();
1913 if (!LanguageTag::isIsoCountry( maCachedCountry))
1914 maCachedCountry.clear();
1915 mbCachedCountry = true;
1917 return maCachedCountry;
1921 OUString LanguageTag::getCountry() const
1923 LanguageTagImpl const* pImpl = getImpl();
1924 if (pImpl->mbCachedCountry)
1925 return pImpl->maCachedCountry;
1926 OUString aRet( pImpl->getCountry());
1927 syncFromImpl();
1928 return aRet;
1932 OUString LanguageTagImpl::getRegion() const
1934 return const_cast<LanguageTagImpl*>(this)->getRegionFromLangtag();
1938 OUString const & LanguageTagImpl::getVariants() const
1940 if (!mbCachedVariants)
1942 maCachedVariants = const_cast<LanguageTagImpl*>(this)->getVariantsFromLangtag();
1943 mbCachedVariants = true;
1945 return maCachedVariants;
1949 OUString LanguageTag::getVariants() const
1951 LanguageTagImpl const * pImpl = getImpl();
1952 if (pImpl->mbCachedVariants)
1953 return pImpl->maCachedVariants;
1954 OUString aRet( pImpl->getVariants());
1955 syncFromImpl();
1956 return aRet;
1959 OUString const & LanguageTagImpl::getGlibcLocaleString() const
1961 if (mbCachedGlibcString)
1962 return maCachedGlibcString;
1964 if (!mpImplLangtag)
1966 meIsLiblangtagNeeded = DECISION_YES;
1967 const_cast<LanguageTagImpl*>(this)->synCanonicalize();
1969 if (mpImplLangtag)
1971 char* pLang = lt_tag_convert_to_locale(mpImplLangtag, nullptr);
1972 if (pLang)
1974 maCachedGlibcString = OUString::createFromAscii( pLang);
1975 mbCachedGlibcString = true;
1976 free(pLang);
1979 return maCachedGlibcString;
1982 OUString LanguageTag::getGlibcLocaleString( std::u16string_view rEncoding ) const
1984 OUString aRet;
1985 if (isIsoLocale())
1987 OUString aCountry( getCountry());
1988 if (aCountry.isEmpty())
1989 aRet = getLanguage() + rEncoding;
1990 else
1991 aRet = getLanguage() + "_" + aCountry + rEncoding;
1993 else
1995 aRet = getImpl()->getGlibcLocaleString();
1996 sal_Int32 nAt = aRet.indexOf('@');
1997 if (nAt != -1)
1998 aRet = OUString::Concat(aRet.subView(0, nAt)) + rEncoding + aRet.subView(nAt);
1999 else
2000 aRet += rEncoding;
2002 return aRet;
2005 bool LanguageTagImpl::hasScript() const
2007 if (!mbCachedScript)
2008 getScript();
2009 return !maCachedScript.isEmpty();
2013 bool LanguageTag::hasScript() const
2015 bool bRet = getImpl()->hasScript();
2016 syncFromImpl();
2017 return bRet;
2021 LanguageTag::ScriptType LanguageTagImpl::getScriptType() const
2023 return meScriptType;
2027 LanguageTag::ScriptType LanguageTag::getScriptType() const
2029 return getImpl()->getScriptType();
2033 void LanguageTagImpl::setScriptType(LanguageTag::ScriptType st)
2035 if (meScriptType == LanguageTag::ScriptType::UNKNOWN) // poor man's clash resolution
2036 meScriptType = st;
2040 void LanguageTag::setScriptType(LanguageTag::ScriptType st)
2042 getImpl()->setScriptType(st);
2046 bool LanguageTagImpl::cacheSimpleLSCV()
2048 OUString aLanguage, aScript, aCountry, aRegion, aVariants;
2049 Extraction eExt = simpleExtract( maBcp47, aLanguage, aScript, aCountry, aRegion, aVariants);
2050 bool bRet = (eExt == EXTRACTED_LSC || eExt == EXTRACTED_LV || eExt == EXTRACTED_LR);
2051 if (bRet)
2053 maCachedLanguage = aLanguage;
2054 maCachedScript = aScript;
2055 maCachedCountry = aCountry;
2056 maCachedVariants = aVariants;
2057 mbCachedLanguage = mbCachedScript = mbCachedCountry = mbCachedVariants = true;
2059 return bRet;
2063 bool LanguageTagImpl::isIsoLocale() const
2065 if (meIsIsoLocale == DECISION_DONTKNOW)
2067 const_cast<LanguageTagImpl*>(this)->synCanonicalize();
2068 // It must be at most ll-CC or lll-CC
2069 // Do not use getCountry() here, use getRegion() instead.
2070 meIsIsoLocale = ((maBcp47.isEmpty() ||
2071 (maBcp47.getLength() <= 6 && LanguageTag::isIsoLanguage( getLanguage()) &&
2072 LanguageTag::isIsoCountry( getRegion()))) ? DECISION_YES : DECISION_NO);
2074 return meIsIsoLocale == DECISION_YES;
2078 bool LanguageTag::isIsoLocale() const
2080 bool bRet = getImpl()->isIsoLocale();
2081 syncFromImpl();
2082 return bRet;
2086 bool LanguageTagImpl::isIsoODF() const
2088 if (meIsIsoODF == DECISION_DONTKNOW)
2090 const_cast<LanguageTagImpl*>(this)->synCanonicalize();
2091 if (!LanguageTag::isIsoScript( getScript()))
2093 meIsIsoODF = DECISION_NO;
2094 return false;
2096 // The usual case is lll-CC so simply check that first.
2097 if (isIsoLocale())
2099 meIsIsoODF = DECISION_YES;
2100 return true;
2102 // If this is not ISO locale for which script must not exist it can
2103 // still be ISO locale plus ISO script lll-Ssss-CC, but not ll-vvvv ...
2104 // ll-vvvvvvvv
2105 meIsIsoODF = ((maBcp47.getLength() <= 11 && LanguageTag::isIsoLanguage( getLanguage()) &&
2106 LanguageTag::isIsoCountry( getRegion()) && LanguageTag::isIsoScript( getScript()) &&
2107 getVariants().isEmpty()) ? DECISION_YES : DECISION_NO);
2109 return meIsIsoODF == DECISION_YES;
2113 bool LanguageTag::isIsoODF() const
2115 bool bRet = getImpl()->isIsoODF();
2116 syncFromImpl();
2117 return bRet;
2121 bool LanguageTagImpl::isValidBcp47() const
2123 if (meIsValid == DECISION_DONTKNOW)
2125 const_cast<LanguageTagImpl*>(this)->synCanonicalize();
2126 SAL_WARN_IF( meIsValid == DECISION_DONTKNOW, "i18nlangtag",
2127 "LanguageTag::isValidBcp47: canonicalize() didn't set meIsValid");
2129 return meIsValid == DECISION_YES;
2133 bool LanguageTag::isValidBcp47() const
2135 bool bRet = getImpl()->isValidBcp47();
2136 syncFromImpl();
2137 return bRet;
2141 LanguageTag & LanguageTag::makeFallback()
2143 if (!mbIsFallback)
2145 const lang::Locale& rLocale1 = getLocale();
2146 lang::Locale aLocale2( MsLangId::Conversion::lookupFallbackLocale( rLocale1));
2147 if ( rLocale1.Language != aLocale2.Language ||
2148 rLocale1.Country != aLocale2.Country ||
2149 rLocale1.Variant != aLocale2.Variant)
2151 if (rLocale1.Language != "en" && aLocale2.Language == "en" && aLocale2.Country == "US")
2153 // "en-US" is the last resort fallback, try if we get a better
2154 // one for the fallback hierarchy of a non-"en" locale.
2155 ::std::vector< OUString > aFallbacks( getFallbackStrings( false));
2156 for (auto const& fallback : aFallbacks)
2158 lang::Locale aLocale3( LanguageTag(fallback).getLocale());
2159 aLocale2 = MsLangId::Conversion::lookupFallbackLocale( aLocale3);
2160 if (aLocale2.Language != "en" || aLocale2.Country != "US")
2161 break; // for, success
2164 SAL_INFO( "i18nlangtag", "LanguageTag::makeFallback - for (" <<
2165 rLocale1.Language << "," << rLocale1.Country << "," << rLocale1.Variant << ") to (" <<
2166 aLocale2.Language << "," << aLocale2.Country << "," << aLocale2.Variant << ")");
2167 reset( aLocale2);
2169 mbIsFallback = true;
2171 return *this;
2175 /* TODO: maybe this now could take advantage of the mnOverride field in
2176 * isolang.cxx entries and search for kSAME instead of hardcoded special
2177 * fallbacks. Though iterating through those tables would be slower and even
2178 * then there would be some special cases, but we wouldn't lack entries that
2179 * were missed out. */
2180 ::std::vector< OUString > LanguageTag::getFallbackStrings( bool bIncludeFullBcp47 ) const
2182 ::std::vector< OUString > aVec;
2183 OUString aLanguage( getLanguage());
2184 OUString aCountry( getCountry());
2185 if (isIsoLocale())
2187 if (!aCountry.isEmpty())
2189 if (bIncludeFullBcp47)
2190 aVec.emplace_back(aLanguage + "-" + aCountry);
2191 if (aLanguage == "zh")
2193 // For zh-HK or zh-MO also list zh-TW to get zh-Hant, for all
2194 // other zh-XX also list zh-CN to get zh-Hans; both of which we
2195 // use the legacy forms instead of the more correct script
2196 // tags that unfortunately most pieces don't understand.
2197 if (aCountry == "HK" || aCountry == "MO")
2198 aVec.emplace_back(aLanguage + "-TW");
2199 else if (aCountry != "CN")
2200 aVec.emplace_back(aLanguage + "-CN");
2201 aVec.push_back( aLanguage);
2203 else if (aLanguage == "sh")
2205 // Manual list instead of calling
2206 // LanguageTag( "sr-Latn-" + aCountry).getFallbackStrings( true)
2207 // that would also include "sh-*" again.
2208 aVec.emplace_back("sr-Latn-" + aCountry);
2209 aVec.emplace_back("sr-Latn");
2210 aVec.emplace_back("sh"); // legacy with script, before default script with country
2211 aVec.emplace_back("sr-" + aCountry);
2212 aVec.emplace_back("sr");
2214 else if (aLanguage == "ca" && aCountry == "XV")
2216 ::std::vector< OUString > aRep( LanguageTag( u"ca-ES-valencia"_ustr).getFallbackStrings( true));
2217 aVec.insert( aVec.end(), aRep.begin(), aRep.end());
2218 // Already includes 'ca' language fallback.
2220 else if (aLanguage == "ku")
2222 if (aCountry == "TR" || aCountry == "SY")
2224 aVec.emplace_back("kmr-Latn-" + aCountry);
2225 aVec.emplace_back("kmr-" + aCountry);
2226 aVec.emplace_back("kmr-Latn");
2227 aVec.emplace_back("kmr");
2228 aVec.push_back( aLanguage);
2230 else if (aCountry == "IQ" || aCountry == "IR")
2232 aVec.emplace_back("ckb-" + aCountry);
2233 aVec.emplace_back("ckb");
2236 else if (aLanguage == "kmr" && (aCountry == "TR" || aCountry == "SY"))
2238 aVec.emplace_back("ku-Latn-" + aCountry);
2239 aVec.emplace_back("ku-" + aCountry);
2240 aVec.push_back( aLanguage);
2241 aVec.emplace_back("ku");
2243 else if (aLanguage == "ckb" && (aCountry == "IQ" || aCountry == "IR"))
2245 aVec.emplace_back("ku-Arab-" + aCountry);
2246 aVec.emplace_back("ku-" + aCountry);
2247 aVec.push_back( aLanguage);
2248 // not 'ku' only, that was used for Latin script
2250 else
2251 aVec.push_back( aLanguage);
2253 else
2255 if (bIncludeFullBcp47)
2256 aVec.push_back( aLanguage);
2257 if (aLanguage == "sh")
2259 aVec.emplace_back("sr-Latn");
2260 aVec.emplace_back("sr");
2262 else if (aLanguage == "pli")
2264 // a special case for Pali dictionary, see fdo#41599
2265 aVec.emplace_back("pi-Latn");
2266 aVec.emplace_back("pi");
2269 return aVec;
2272 getBcp47(); // have maBcp47 now
2273 if (bIncludeFullBcp47)
2274 aVec.push_back( maBcp47);
2276 // Special cases for deprecated tags and their replacements, include both
2277 // in fallbacks in a sensible order.
2278 /* TODO: could such things be generalized and automated with liblangtag? */
2279 if (maBcp47 == "en-GB-oed")
2280 aVec.emplace_back("en-GB-oxendict");
2281 else if (maBcp47 == "en-GB-oxendict")
2282 aVec.emplace_back("en-GB-oed");
2284 OUString aVariants( getVariants());
2285 OUString aTmp;
2286 if (hasScript())
2288 OUString aScript = getScript();
2289 bool bHaveLanguageScriptVariant = false;
2290 if (!aCountry.isEmpty())
2292 if (!aVariants.isEmpty())
2294 aTmp = aLanguage + "-" + aScript + "-" + aCountry + "-" + aVariants;
2295 if (aTmp != maBcp47)
2296 aVec.push_back( aTmp);
2297 // Language with variant but without country before language
2298 // without variant but with country.
2299 aTmp = aLanguage + "-" + aScript + "-" + aVariants;
2300 if (aTmp != maBcp47)
2301 aVec.push_back( aTmp);
2302 bHaveLanguageScriptVariant = true;
2304 aTmp = aLanguage + "-" + aScript + "-" + aCountry;
2305 if (aTmp != maBcp47)
2306 aVec.push_back( aTmp);
2307 if (aLanguage == "sr" && aScript == "Latn")
2309 // sr-Latn-CS => sr-Latn-YU, sh-CS, sh-YU
2310 if (aCountry == "CS")
2312 aVec.emplace_back("sr-Latn-YU");
2313 aVec.emplace_back("sh-CS");
2314 aVec.emplace_back("sh-YU");
2316 else
2317 aVec.emplace_back("sh-" + aCountry);
2319 else if (aLanguage == "pi" && aScript == "Latn")
2320 aVec.emplace_back("pli"); // a special case for Pali dictionary, see fdo#41599
2321 else if (aLanguage == "krm" && aScript == "Latn" && (aCountry == "TR" || aCountry == "SY"))
2322 aVec.emplace_back("ku-" + aCountry);
2324 if (!aVariants.isEmpty() && !bHaveLanguageScriptVariant)
2326 aTmp = aLanguage + "-" + aScript + "-" + aVariants;
2327 if (aTmp != maBcp47)
2328 aVec.push_back( aTmp);
2330 aTmp = aLanguage + "-" + aScript;
2331 if (aTmp != maBcp47)
2332 aVec.push_back( aTmp);
2334 // 'sh' actually denoted a script, so have it here instead of appended
2335 // at the end as language-only.
2336 if (aLanguage == "sr" && aScript == "Latn")
2337 aVec.emplace_back("sh");
2338 else if (aLanguage == "ku" && aScript == "Arab")
2339 aVec.emplace_back("ckb");
2340 // 'ku' only denoted Latin script
2341 else if (aLanguage == "krm" && aScript == "Latn" && aCountry.isEmpty())
2342 aVec.emplace_back("ku");
2344 bool bHaveLanguageVariant = false;
2345 if (!aCountry.isEmpty())
2347 if (!aVariants.isEmpty())
2349 aTmp = aLanguage + "-" + aCountry + "-" + aVariants;
2350 if (aTmp != maBcp47)
2351 aVec.push_back( aTmp);
2352 if (maBcp47 == "ca-ES-valencia")
2353 aVec.emplace_back("ca-XV");
2354 // Language with variant but without country before language
2355 // without variant but with country.
2356 // But only if variant is not from a grandfathered tag that
2357 // wouldn't match the rules, i.e. "de-1901" is fine but "en-oed" is
2358 // not.
2359 if (aVariants.getLength() >= 5 ||
2360 (aVariants.getLength() == 4 && '0' <= aVariants[0] && aVariants[0] <= '9'))
2362 aTmp = aLanguage + "-" + aVariants;
2363 if (aTmp != maBcp47)
2364 aVec.push_back( aTmp);
2365 bHaveLanguageVariant = true;
2368 aTmp = aLanguage + "-" + aCountry;
2369 if (aTmp != maBcp47)
2370 aVec.push_back( aTmp);
2372 if (!aVariants.isEmpty() && !bHaveLanguageVariant)
2374 // Only if variant is not from a grandfathered tag that wouldn't match
2375 // the rules, i.e. "de-1901" is fine but "en-oed" is not.
2376 if (aVariants.getLength() >= 5 ||
2377 (aVariants.getLength() == 4 && '0' <= aVariants[0] && aVariants[0] <= '9'))
2379 aTmp = aLanguage + "-" + aVariants;
2380 if (aTmp != maBcp47)
2381 aVec.push_back( aTmp);
2385 // Insert legacy fallbacks with country before language-only, but only
2386 // default script, script was handled already above.
2387 if (!aCountry.isEmpty())
2389 if (aLanguage == "sr" && aCountry == "CS")
2390 aVec.emplace_back("sr-YU");
2393 // Original language-only.
2394 if (!aLanguage.isEmpty() && aLanguage != maBcp47)
2395 aVec.push_back( aLanguage);
2397 return aVec;
2401 OUString LanguageTag::getBcp47MS() const
2403 if (getLanguageType() == LANGUAGE_SPANISH_DATED)
2404 return u"es-ES_tradnl"_ustr;
2405 return getBcp47();
2409 bool LanguageTag::equals( const LanguageTag & rLanguageTag ) const
2411 // If SYSTEM is not to be resolved or either both are SYSTEM or none, we
2412 // can use the operator==() optimization.
2413 if (isSystemLocale() == rLanguageTag.isSystemLocale())
2414 return operator==( rLanguageTag);
2416 // Compare full language tag strings.
2417 return getBcp47() == rLanguageTag.getBcp47();
2421 bool LanguageTag::operator==( const LanguageTag & rLanguageTag ) const
2423 if (isSystemLocale() && rLanguageTag.isSystemLocale())
2424 return true; // both SYSTEM
2426 // No need to convert to BCP47 if both Lang-IDs are available.
2427 if (mbInitializedLangID && rLanguageTag.mbInitializedLangID)
2429 // Equal if same ID and no SYSTEM is involved or both are SYSTEM.
2430 return mnLangID == rLanguageTag.mnLangID && isSystemLocale() == rLanguageTag.isSystemLocale();
2433 // Compare full language tag strings but SYSTEM unresolved.
2434 return getBcp47( false) == rLanguageTag.getBcp47( false);
2438 bool LanguageTag::operator!=( const LanguageTag & rLanguageTag ) const
2440 return !operator==( rLanguageTag);
2444 bool LanguageTag::operator<( const LanguageTag & rLanguageTag ) const
2446 return getBcp47( false).compareToIgnoreAsciiCase( rLanguageTag.getBcp47( false)) < 0;
2450 // static
2451 LanguageTagImpl::Extraction LanguageTagImpl::simpleExtract( const OUString& rBcp47,
2452 OUString& rLanguage, OUString& rScript, OUString& rCountry, OUString& rRegion, OUString& rVariants )
2454 Extraction eRet = EXTRACTED_NONE;
2455 const sal_Int32 nLen = rBcp47.getLength();
2456 const sal_Int32 nHyph1 = rBcp47.indexOf( '-');
2457 sal_Int32 nHyph2 = (nHyph1 < 0 ? -1 : rBcp47.indexOf( '-', nHyph1 + 1));
2458 sal_Int32 nHyph3 = (nHyph2 < 0 ? -1 : rBcp47.indexOf( '-', nHyph2 + 1));
2459 sal_Int32 nHyph4 = (nHyph3 < 0 ? -1 : rBcp47.indexOf( '-', nHyph3 + 1));
2460 if (nLen == 1 && rBcp47[0] == '*') // * the dreaded jolly joker
2462 // It's f*d up but we need to recognize this.
2463 eRet = EXTRACTED_X_JOKER;
2465 else if (nHyph1 == 1 && rBcp47[0] == 'x') // x-... privateuse
2467 // x-... privateuse tags MUST be known to us by definition.
2468 eRet = EXTRACTED_X;
2470 else if (nLen == 1 && rBcp47[0] == 'C') // the 'C' locale
2472 eRet = EXTRACTED_C_LOCALE;
2473 rLanguage = "C";
2474 rScript.clear();
2475 rCountry.clear();
2476 rRegion.clear();
2477 rVariants.clear();
2479 else if (nLen == 2 || nLen == 3) // ll or lll
2481 if (nHyph1 < 0)
2483 rLanguage = rBcp47.toAsciiLowerCase();
2484 rScript.clear();
2485 rCountry.clear();
2486 rRegion.clear();
2487 rVariants.clear();
2488 eRet = EXTRACTED_LSC;
2491 else if ( (nHyph1 == 2 && nLen == 5) // ll-CC
2492 || (nHyph1 == 3 && nLen == 6)) // lll-CC
2494 if (nHyph2 < 0)
2496 rLanguage = rBcp47.copy( 0, nHyph1).toAsciiLowerCase();
2497 rCountry = rBcp47.copy( nHyph1 + 1, 2).toAsciiUpperCase();
2498 rRegion.clear();
2499 rScript.clear();
2500 rVariants.clear();
2501 eRet = EXTRACTED_LSC;
2504 else if ( (nHyph1 == 2 && nLen == 6) // ll-rrr
2505 || (nHyph1 == 3 && nLen == 7)) // lll-rrr
2507 if (nHyph2 < 0)
2509 rLanguage = rBcp47.copy( 0, nHyph1).toAsciiLowerCase();
2510 rCountry.clear();
2511 rRegion = rBcp47.copy( nHyph1 + 1, 3);
2512 rScript.clear();
2513 rVariants.clear();
2514 eRet = EXTRACTED_LR;
2517 else if ( (nHyph1 == 2 && nLen == 7) // ll-Ssss or ll-vvvv
2518 || (nHyph1 == 3 && nLen == 8)) // lll-Ssss or lll-vvvv
2520 if (nHyph2 < 0)
2522 sal_Unicode c = rBcp47[nHyph1+1];
2523 if ('0' <= c && c <= '9')
2525 // (DIGIT 3ALNUM) vvvv variant instead of Ssss script
2526 rLanguage = rBcp47.copy( 0, nHyph1).toAsciiLowerCase();
2527 rScript.clear();
2528 rCountry.clear();
2529 rRegion.clear();
2530 rVariants = rBcp47.copy( nHyph1 + 1);
2531 eRet = EXTRACTED_LV;
2533 else
2535 rLanguage = rBcp47.copy( 0, nHyph1).toAsciiLowerCase();
2536 rScript = rBcp47.copy( nHyph1 + 1, 1).toAsciiUpperCase() +
2537 rBcp47.copy( nHyph1 + 2, 3).toAsciiLowerCase();
2538 rCountry.clear();
2539 rRegion.clear();
2540 rVariants.clear();
2541 eRet = EXTRACTED_LSC;
2545 else if ( (nHyph1 == 2 && nHyph2 == 7 && nLen == 10) // ll-Ssss-CC
2546 || (nHyph1 == 3 && nHyph2 == 8 && nLen == 11)) // lll-Ssss-CC
2548 if (nHyph3 < 0)
2550 rLanguage = rBcp47.copy( 0, nHyph1).toAsciiLowerCase();
2551 rScript = rBcp47.copy( nHyph1 + 1, 1).toAsciiUpperCase() + rBcp47.copy( nHyph1 + 2, 3).toAsciiLowerCase();
2552 rCountry = rBcp47.copy( nHyph2 + 1, 2).toAsciiUpperCase();
2553 rRegion.clear();
2554 rVariants.clear();
2555 eRet = EXTRACTED_LSC;
2558 else if ( (nHyph1 == 2 && nHyph2 == 7 && nLen == 11) // ll-Ssss-rrr
2559 || (nHyph1 == 3 && nHyph2 == 8 && nLen == 12)) // lll-Ssss-rrr
2561 if (nHyph3 < 0)
2563 rLanguage = rBcp47.copy( 0, nHyph1).toAsciiLowerCase();
2564 rScript = rBcp47.copy( nHyph1 + 1, 1).toAsciiUpperCase() + rBcp47.copy( nHyph1 + 2, 3).toAsciiLowerCase();
2565 rCountry.clear();
2566 rRegion = rBcp47.copy( nHyph2 + 1, 3);
2567 rVariants.clear();
2568 eRet = EXTRACTED_LR;
2571 else if ( (nHyph1 == 2 && nHyph2 == 7 && nHyph3 == 10 && nLen >= 15) // ll-Ssss-CC-vvvv[vvvv][-...]
2572 || (nHyph1 == 3 && nHyph2 == 8 && nHyph3 == 11 && nLen >= 16)) // lll-Ssss-CC-vvvv[vvvv][-...]
2574 if (nHyph4 < 0)
2575 nHyph4 = rBcp47.getLength();
2576 if (nHyph4 - nHyph3 > 4 && nHyph4 - nHyph3 <= 9)
2578 rLanguage = rBcp47.copy( 0, nHyph1).toAsciiLowerCase();
2579 rScript = rBcp47.copy( nHyph1 + 1, 1).toAsciiUpperCase() + rBcp47.copy( nHyph1 + 2, 3).toAsciiLowerCase();
2580 rCountry = rBcp47.copy( nHyph2 + 1, 2).toAsciiUpperCase();
2581 rRegion.clear();
2582 rVariants = rBcp47.copy( nHyph3 + 1);
2583 eRet = EXTRACTED_LV;
2586 else if ( (nHyph1 == 2 && nHyph2 == 7 && nHyph3 == 11 && nLen >= 16) // ll-Ssss-rrr-vvvv[vvvv][-...]
2587 || (nHyph1 == 3 && nHyph2 == 8 && nHyph3 == 12 && nLen >= 17)) // lll-Ssss-rrr-vvvv[vvvv][-...]
2589 if (nHyph4 < 0)
2590 nHyph4 = rBcp47.getLength();
2591 if (nHyph4 - nHyph3 > 4 && nHyph4 - nHyph3 <= 9)
2593 rLanguage = rBcp47.copy( 0, nHyph1).toAsciiLowerCase();
2594 rScript = rBcp47.copy( nHyph1 + 1, 1).toAsciiUpperCase() + rBcp47.copy( nHyph1 + 2, 3).toAsciiLowerCase();
2595 rCountry.clear();
2596 rRegion = rBcp47.copy( nHyph2 + 1, 3);
2597 rVariants = rBcp47.copy( nHyph3 + 1);
2598 eRet = EXTRACTED_LR;
2601 else if ( (nHyph1 == 2 && nHyph2 == 5 && nHyph3 == 7) // ll-CC-u-...
2602 || (nHyph1 == 3 && nHyph2 == 6 && nHyph3 == 8)) // lll-CC-u-...
2604 if (rBcp47[nHyph3-1] == 'u')
2606 // Need to recognize as known, otherwise getLanguage() and
2607 // getCountry() return empty string because mpImplLangtag is not
2608 // used with a known mapping.
2609 /* TODO: if there were more this would get ugly and needed some
2610 * table driven approach via isolang.cxx instead. */
2611 if (rBcp47.equalsIgnoreAsciiCase( "es-ES-u-co-trad"))
2613 rLanguage = "es";
2614 rScript.clear();
2615 rCountry = "ES";
2616 rRegion.clear();
2617 rVariants = "u-co-trad"; // not strictly a variant, but used to reconstruct the tag.
2618 eRet = EXTRACTED_LV;
2622 else if ( (nHyph1 == 2 && nHyph2 == 5 && nLen >= 10) // ll-CC-vvvv[vvvv][-...]
2623 || (nHyph1 == 3 && nHyph2 == 6 && nLen >= 11)) // lll-CC-vvvv[vvvv][-...]
2625 if (nHyph3 < 0)
2626 nHyph3 = rBcp47.getLength();
2627 if (nHyph3 - nHyph2 > 4 && nHyph3 - nHyph2 <= 9)
2629 rLanguage = rBcp47.copy( 0, nHyph1).toAsciiLowerCase();
2630 rScript.clear();
2631 rCountry = rBcp47.copy( nHyph1 + 1, 2).toAsciiUpperCase();
2632 rRegion.clear();
2633 rVariants = rBcp47.copy( nHyph2 + 1);
2634 eRet = EXTRACTED_LV;
2637 else if ( (nHyph1 == 2 && nHyph2 == 6 && nLen >= 11) // ll-rrr-vvvv[vvvv][-...]
2638 || (nHyph1 == 3 && nHyph2 == 7 && nLen >= 12)) // lll-rrr-vvvv[vvvv][-...]
2640 if (nHyph3 < 0)
2641 nHyph3 = rBcp47.getLength();
2642 if (nHyph3 - nHyph2 > 4 && nHyph3 - nHyph2 <= 9)
2644 rLanguage = rBcp47.copy( 0, nHyph1).toAsciiLowerCase();
2645 rScript.clear();
2646 rCountry.clear();
2647 rRegion = rBcp47.copy( nHyph1 + 1, 3);
2648 rVariants = rBcp47.copy( nHyph2 + 1);
2649 eRet = EXTRACTED_LR;
2652 else if ( (nHyph1 == 2 && nLen >= 8) // ll-vvvvv[vvv][-...]
2653 || (nHyph1 == 3 && nLen >= 9)) // lll-vvvvv[vvv][-...]
2655 if (nHyph2 < 0)
2656 nHyph2 = rBcp47.getLength();
2657 if (nHyph2 - nHyph1 > 5 && nHyph2 - nHyph1 <= 9)
2659 rLanguage = rBcp47.copy( 0, nHyph1).toAsciiLowerCase();
2660 rScript.clear();
2661 rCountry.clear();
2662 rRegion.clear();
2663 rVariants = rBcp47.copy( nHyph1 + 1);
2664 eRet = EXTRACTED_LV;
2666 else
2668 // Known and handled grandfathered; ugly but effective ...
2669 // Note that nLen must have matched above.
2670 // Strictly not a variant, but so far we treat it as such.
2671 if (rBcp47.equalsIgnoreAsciiCase( "en-GB-oed"))
2673 rLanguage = "en";
2674 rScript.clear();
2675 rCountry = "GB";
2676 rRegion.clear();
2677 rVariants = "oed";
2678 eRet = EXTRACTED_LV;
2680 // Other known and handled odd cases.
2681 else if (rBcp47.equalsIgnoreAsciiCase( "es-ES_tradnl"))
2683 // Will get overridden, but needs to be recognized as known.
2684 rLanguage = "es";
2685 rScript.clear();
2686 rCountry = "ES";
2687 rRegion.clear();
2688 rVariants = "tradnl"; // this is nonsense, but... ignored.
2689 eRet = EXTRACTED_KNOWN_BAD;
2693 if (eRet == EXTRACTED_NONE)
2695 SAL_INFO( "i18nlangtag", "LanguageTagImpl::simpleExtract: did not extract '" << rBcp47 << "'");
2696 rLanguage.clear();
2697 rScript.clear();
2698 rCountry.clear();
2699 rRegion.clear();
2700 rVariants.clear();
2702 else
2704 assert(rLanguage.getLength() == 2 || rLanguage.getLength() == 3
2705 || eRet == EXTRACTED_X_JOKER || eRet == EXTRACTED_X || eRet == EXTRACTED_C_LOCALE);
2706 assert(rScript.isEmpty() || rScript.getLength() == 4);
2707 assert(rCountry.isEmpty() || rRegion.isEmpty()); // [2ALPHA / 3DIGIT]
2708 assert(rCountry.isEmpty() || rCountry.getLength() == 2);
2709 assert(rRegion.isEmpty() || rRegion.getLength() == 3);
2710 assert(rVariants.isEmpty() || rVariants.getLength() >= 4 || rVariants == "oed");
2712 return eRet;
2716 // static
2717 ::std::vector< OUString >::const_iterator LanguageTag::getFallback(
2718 const ::std::vector< OUString > & rList, const OUString & rReference )
2720 if (rList.empty())
2721 return rList.end();
2723 // Try the simple case first without constructing fallbacks.
2724 ::std::vector< OUString >::const_iterator it = std::find(rList.begin(), rList.end(), rReference);
2725 if (it != rList.end())
2726 return it; // exact match
2728 ::std::vector< OUString > aFallbacks( LanguageTag( rReference).getFallbackStrings( false));
2729 if (rReference != "en-US")
2731 aFallbacks.emplace_back("en-US");
2732 if (rReference != "en")
2733 aFallbacks.emplace_back("en");
2735 if (rReference != "x-default")
2736 aFallbacks.emplace_back("x-default");
2737 if (rReference != "x-no-translate")
2738 aFallbacks.emplace_back("x-no-translate");
2739 /* TODO: the original comphelper::Locale::getFallback() code had
2740 * "x-notranslate" instead of "x-no-translate", but all .xcu files use
2741 * "x-no-translate" and "x-notranslate" apparently was never used anywhere.
2742 * Did that ever work? Was it supposed to work at all like this? */
2744 for (const auto& fb : aFallbacks)
2746 it = std::find(rList.begin(), rList.end(), fb);
2747 if (it != rList.end())
2748 return it; // fallback found
2751 // Did not find anything so return something of the list, the first value
2752 // will do as well as any other as none did match any of the possible
2753 // fallbacks.
2754 return rList.begin();
2758 // static
2759 ::std::vector< css::lang::Locale >::const_iterator LanguageTag::getMatchingFallback(
2760 const ::std::vector< css::lang::Locale > & rList,
2761 const css::lang::Locale & rReference )
2763 if (rList.empty())
2764 return rList.end();
2766 // Try the simple case first without constructing fallbacks.
2767 ::std::vector< lang::Locale >::const_iterator it = std::find_if(rList.begin(), rList.end(),
2768 [&rReference](const lang::Locale& rLocale) {
2769 return rLocale.Language == rReference.Language
2770 && rLocale.Country == rReference.Country
2771 && rLocale.Variant == rReference.Variant; });
2772 if (it != rList.end())
2773 return it; // exact match
2775 // Now for each reference fallback test the fallbacks of the list in order.
2776 ::std::vector< OUString > aFallbacks( LanguageTag( rReference).getFallbackStrings( false));
2777 ::std::vector< ::std::vector< OUString > > aListFallbacks( rList.size());
2778 size_t i = 0;
2779 for (auto const& elem : rList)
2780 aListFallbacks[i++] = LanguageTag(elem).getFallbackStrings(true);
2782 for (auto const& rfb : aFallbacks)
2784 size_t nPosFb = 0;
2785 for (auto const& lfb : aListFallbacks)
2787 for (auto const& fb : lfb)
2789 if (rfb == fb)
2790 return rList.begin() + nPosFb;
2792 ++nPosFb;
2796 // No match found.
2797 return rList.end();
2801 static bool lcl_isSystem( LanguageType nLangID )
2803 if (nLangID == LANGUAGE_SYSTEM)
2804 return true;
2805 // There are some special values that simplify to SYSTEM,
2806 // getRealLanguage() catches and resolves them.
2807 LanguageType nNewLangID = MsLangId::getRealLanguage( nLangID);
2808 return nNewLangID != nLangID;
2812 // static
2813 css::lang::Locale LanguageTag::convertToLocale( LanguageType nLangID, bool bResolveSystem )
2815 if (!bResolveSystem && lcl_isSystem( nLangID))
2816 return lang::Locale();
2818 return LanguageTag( nLangID).getLocale( bResolveSystem);
2822 // static
2823 LanguageType LanguageTag::convertToLanguageType( const css::lang::Locale& rLocale, bool bResolveSystem )
2825 if (rLocale.Language.isEmpty() && !bResolveSystem)
2826 return LANGUAGE_SYSTEM;
2828 if (!bResolveSystem)
2830 // single-item cache
2831 static std::mutex gMutex;
2832 static std::optional<lang::Locale> moCacheKey;
2833 static std::optional<LanguageType> moCacheValue;
2834 std::unique_lock l(gMutex);
2835 if (!moCacheKey || *moCacheKey != rLocale)
2837 moCacheValue = LanguageTag(rLocale).getLanguageType(false);
2838 moCacheKey = rLocale;
2840 return *moCacheValue;
2842 else
2843 return LanguageTag( rLocale).getLanguageType( bResolveSystem);
2847 // static
2848 OUString LanguageTagImpl::convertToBcp47( const css::lang::Locale& rLocale )
2850 OUString aBcp47;
2851 if (rLocale.Language.isEmpty())
2853 // aBcp47 stays empty
2855 else if (rLocale.Language == I18NLANGTAG_QLT)
2857 aBcp47 = rLocale.Variant;
2859 else
2861 /* XXX NOTE: most legacy code never evaluated the Variant field, so for
2862 * now just concatenate language and country. In case we stumbled over
2863 * variant aware code we'd have to take care of that. */
2864 if (rLocale.Country.isEmpty())
2865 aBcp47 = rLocale.Language;
2866 else
2868 aBcp47 = rLocale.Language + "-" + rLocale.Country;
2871 return aBcp47;
2875 // static
2876 OUString LanguageTag::convertToBcp47( const css::lang::Locale& rLocale, bool bResolveSystem )
2878 OUString aBcp47;
2879 if (rLocale.Language.isEmpty())
2881 if (bResolveSystem)
2882 aBcp47 = LanguageTag::convertToBcp47( LANGUAGE_SYSTEM );
2883 // else aBcp47 stays empty
2885 else
2887 aBcp47 = LanguageTagImpl::convertToBcp47( rLocale);
2889 return aBcp47;
2893 // static
2894 OUString LanguageTag::convertToBcp47( LanguageType nLangID )
2896 lang::Locale aLocale( LanguageTag::convertToLocale( nLangID ));
2897 // If system for some reason (should not happen... haha) could not be
2898 // resolved DO NOT CALL LanguageTag::convertToBcp47(Locale) because that
2899 // would recurse into this method here!
2900 if (aLocale.Language.isEmpty())
2901 return OUString(); // bad luck, bail out
2902 return LanguageTagImpl::convertToBcp47( aLocale);
2906 // static
2907 css::lang::Locale LanguageTag::convertToLocale( const OUString& rBcp47, bool bResolveSystem )
2909 if (rBcp47.isEmpty() && !bResolveSystem)
2910 return lang::Locale();
2912 return LanguageTag( rBcp47).getLocale( bResolveSystem);
2916 // static
2917 LanguageType LanguageTag::convertToLanguageType( const OUString& rBcp47 )
2919 return LanguageTag( rBcp47).getLanguageType();
2923 // static
2924 LanguageType LanguageTag::convertToLanguageTypeWithFallback( const OUString& rBcp47 )
2926 return LanguageTag( rBcp47).makeFallback().getLanguageType();
2930 // static
2931 css::lang::Locale LanguageTag::convertToLocaleWithFallback( const OUString& rBcp47 )
2933 return LanguageTag( rBcp47).makeFallback().getLocale();
2937 // static
2938 LanguageType LanguageTag::convertToLanguageTypeWithFallback( const css::lang::Locale& rLocale )
2940 if (rLocale.Language.isEmpty())
2941 return LANGUAGE_SYSTEM;
2943 return LanguageTag( rLocale).makeFallback().getLanguageType();
2947 // static
2948 bool LanguageTag::isValidBcp47( const OUString& rString, OUString* o_pCanonicalized,
2949 LanguageTag::PrivateUse ePrivateUse )
2951 bool bValid = false;
2953 struct guard
2955 lt_tag_t* mpLangtag;
2956 guard()
2958 theDataRef().init();
2959 mpLangtag = lt_tag_new();
2961 ~guard()
2963 lt_tag_unref( mpLangtag);
2965 } aVar;
2967 myLtError aError;
2969 if (!lt_tag_parse_disabled && lt_tag_parse(aVar.mpLangtag, OUStringToOString(rString, RTL_TEXTENCODING_UTF8).getStr(), &aError.p))
2971 char* pTag = lt_tag_canonicalize( aVar.mpLangtag, &aError.p);
2972 SAL_WARN_IF( !pTag, "i18nlangtag", "LanguageTag:isValidBcp47: could not canonicalize '" << rString << "'");
2973 if (pTag)
2975 bValid = true;
2976 if (ePrivateUse != PrivateUse::ALLOW)
2980 const char* pLang = nullptr;
2981 const lt_lang_t* pLangT = lt_tag_get_language( aVar.mpLangtag);
2982 if (pLangT)
2984 pLang = lt_lang_get_tag( pLangT);
2985 if (pLang && strcmp( pLang, I18NLANGTAG_QLT_ASCII) == 0)
2987 // Disallow 'qlt' localuse code to prevent
2988 // confusion with our internal usage.
2989 bValid = false;
2990 break;
2993 if (ePrivateUse == PrivateUse::ALLOW_ART_X && pLang && strcmp( pLang, "art") == 0)
2995 // Allow anything 'art' which includes 'art-x-...' and 'art-Latn-x-...'.
2996 break;
2998 const lt_string_t* pPrivate = lt_tag_get_privateuse( aVar.mpLangtag);
2999 if (pPrivate && lt_string_length( pPrivate) > 0)
3000 bValid = false;
3002 while (false);
3004 if (o_pCanonicalized)
3005 *o_pCanonicalized = OUString::createFromAscii( pTag);
3006 free( pTag);
3009 else
3011 SAL_INFO( "i18nlangtag", "LanguageTag:isValidBcp47: could not parse '" << rString << "'");
3013 return bValid;
3016 LanguageTag makeLanguageTagFromAppleLanguageId(AppleLanguageId nLanguage)
3018 //map the simple ones via LanguageTypes, and the hard ones explicitly
3019 LanguageType nLang(LANGUAGE_DONTKNOW);
3021 switch (nLanguage)
3023 case AppleLanguageId::ENGLISH:
3024 nLang = LANGUAGE_ENGLISH_US;
3025 break;
3026 case AppleLanguageId::FRENCH:
3027 nLang = LANGUAGE_FRENCH;
3028 break;
3029 case AppleLanguageId::GERMAN:
3030 nLang = LANGUAGE_GERMAN;
3031 break;
3032 case AppleLanguageId::ITALIAN:
3033 nLang = LANGUAGE_ITALIAN;
3034 break;
3035 case AppleLanguageId::DUTCH:
3036 nLang = LANGUAGE_DUTCH;
3037 break;
3038 case AppleLanguageId::SWEDISH:
3039 nLang = LANGUAGE_SWEDISH;
3040 break;
3041 case AppleLanguageId::SPANISH:
3042 nLang = LANGUAGE_SPANISH;
3043 break;
3044 case AppleLanguageId::DANISH:
3045 nLang = LANGUAGE_DANISH;
3046 break;
3047 case AppleLanguageId::PORTUGUESE:
3048 nLang = LANGUAGE_PORTUGUESE;
3049 break;
3050 case AppleLanguageId::NORWEGIAN:
3051 nLang = LANGUAGE_NORWEGIAN;
3052 break;
3053 case AppleLanguageId::HEBREW:
3054 nLang = LANGUAGE_HEBREW;
3055 break;
3056 case AppleLanguageId::JAPANESE:
3057 nLang = LANGUAGE_JAPANESE;
3058 break;
3059 case AppleLanguageId::ARABIC:
3060 nLang = LANGUAGE_ARABIC_PRIMARY_ONLY;
3061 break;
3062 case AppleLanguageId::FINNISH:
3063 nLang = LANGUAGE_FINNISH;
3064 break;
3065 case AppleLanguageId::GREEK:
3066 nLang = LANGUAGE_GREEK;
3067 break;
3068 case AppleLanguageId::ICELANDIC:
3069 nLang = LANGUAGE_ICELANDIC;
3070 break;
3071 case AppleLanguageId::MALTESE:
3072 nLang = LANGUAGE_MALTESE;
3073 break;
3074 case AppleLanguageId::TURKISH:
3075 nLang = LANGUAGE_TURKISH;
3076 break;
3077 case AppleLanguageId::CROATIAN:
3078 nLang = LANGUAGE_CROATIAN;
3079 break;
3080 case AppleLanguageId::CHINESE_TRADITIONAL:
3081 nLang = LANGUAGE_CHINESE_TRADITIONAL;
3082 break;
3083 case AppleLanguageId::URDU:
3084 nLang = LANGUAGE_URDU_PAKISTAN; //probably, otherwise we need a LANGUAGE_URDU_PRIMARY_ONLY
3085 break;
3086 case AppleLanguageId::HINDI:
3087 nLang = LANGUAGE_HINDI;
3088 break;
3089 case AppleLanguageId::THAI:
3090 nLang = LANGUAGE_THAI;
3091 break;
3092 case AppleLanguageId::KOREAN:
3093 nLang = LANGUAGE_KOREAN;
3094 break;
3095 case AppleLanguageId::LITHUANIAN:
3096 nLang = LANGUAGE_LITHUANIAN;
3097 break;
3098 case AppleLanguageId::POLISH:
3099 nLang = LANGUAGE_POLISH;
3100 break;
3101 case AppleLanguageId::HUNGARIAN:
3102 nLang = LANGUAGE_HUNGARIAN;
3103 break;
3104 case AppleLanguageId::ESTONIAN:
3105 nLang = LANGUAGE_ESTONIAN;
3106 break;
3107 case AppleLanguageId::LATVIAN:
3108 nLang = LANGUAGE_LATVIAN;
3109 break;
3110 case AppleLanguageId::SAMI:
3111 nLang = LANGUAGE_SAMI_NORTHERN_NORWAY; //maybe
3112 break;
3113 case AppleLanguageId::FAROESE:
3114 nLang = LANGUAGE_FAEROESE;
3115 break;
3116 case AppleLanguageId::FARSI:
3117 nLang = LANGUAGE_FARSI;
3118 break;
3119 case AppleLanguageId::RUSSIAN:
3120 nLang = LANGUAGE_RUSSIAN;
3121 break;
3122 case AppleLanguageId::CHINESE_SIMPLIFIED:
3123 nLang = LANGUAGE_CHINESE_SIMPLIFIED;
3124 break;
3125 case AppleLanguageId::FLEMISH:
3126 nLang = LANGUAGE_DUTCH_BELGIAN;
3127 break;
3128 case AppleLanguageId::IRISH_GAELIC:
3129 nLang = LANGUAGE_GAELIC_IRELAND;
3130 break;
3131 case AppleLanguageId::ALBANIAN:
3132 nLang = LANGUAGE_ALBANIAN;
3133 break;
3134 case AppleLanguageId::ROMANIAN:
3135 nLang = LANGUAGE_ROMANIAN;
3136 break;
3137 case AppleLanguageId::CZECH:
3138 nLang = LANGUAGE_CZECH;
3139 break;
3140 case AppleLanguageId::SLOVAK:
3141 nLang = LANGUAGE_SLOVAK;
3142 break;
3143 case AppleLanguageId::SLOVENIAN:
3144 nLang = LANGUAGE_SLOVENIAN;
3145 break;
3146 case AppleLanguageId::YIDDISH:
3147 nLang = LANGUAGE_YIDDISH;
3148 break;
3149 case AppleLanguageId::SERBIAN:
3150 nLang = LANGUAGE_SERBIAN_CYRILLIC_SERBIA; //maybe
3151 break;
3152 case AppleLanguageId::MACEDONIAN:
3153 nLang = LANGUAGE_MACEDONIAN;
3154 break;
3155 case AppleLanguageId::BULGARIAN:
3156 nLang = LANGUAGE_BULGARIAN;
3157 break;
3158 case AppleLanguageId::UKRAINIAN:
3159 nLang = LANGUAGE_UKRAINIAN;
3160 break;
3161 case AppleLanguageId::BYELORUSSIAN:
3162 nLang = LANGUAGE_BELARUSIAN;
3163 break;
3164 case AppleLanguageId::UZBEK:
3165 nLang = LANGUAGE_UZBEK_CYRILLIC; //maybe
3166 break;
3167 case AppleLanguageId::KAZAKH:
3168 nLang = LANGUAGE_KAZAKH;
3169 break;
3170 case AppleLanguageId::AZERI_CYRILLIC:
3171 nLang = LANGUAGE_AZERI_CYRILLIC;
3172 break;
3173 case AppleLanguageId::AZERI_ARABIC:
3174 return LanguageTag(u"az-Arab"_ustr);
3175 case AppleLanguageId::ARMENIAN:
3176 nLang = LANGUAGE_ARMENIAN;
3177 break;
3178 case AppleLanguageId::GEORGIAN:
3179 nLang = LANGUAGE_GEORGIAN;
3180 break;
3181 case AppleLanguageId::MOLDAVIAN:
3182 nLang = LANGUAGE_ROMANIAN_MOLDOVA;
3183 break;
3184 case AppleLanguageId::KIRGHIZ:
3185 nLang = LANGUAGE_KIRGHIZ;
3186 break;
3187 case AppleLanguageId::TAJIKI:
3188 nLang = LANGUAGE_TAJIK;
3189 break;
3190 case AppleLanguageId::TURKMEN:
3191 nLang = LANGUAGE_TURKMEN;
3192 break;
3193 case AppleLanguageId::MONGOLIAN_MONGOLIAN:
3194 nLang = LANGUAGE_MONGOLIAN_MONGOLIAN_MONGOLIA;
3195 break;
3196 case AppleLanguageId::MONGOLIAN_CYRILLIC:
3197 nLang = LANGUAGE_MONGOLIAN_CYRILLIC_MONGOLIA;
3198 break;
3199 case AppleLanguageId::PASHTO:
3200 nLang = LANGUAGE_PASHTO;
3201 break;
3202 case AppleLanguageId::KURDISH:
3203 nLang = LANGUAGE_USER_KURDISH_TURKEY; //maybe
3204 break;
3205 case AppleLanguageId::KASHMIRI:
3206 nLang = LANGUAGE_KASHMIRI;
3207 break;
3208 case AppleLanguageId::SINDHI:
3209 nLang = LANGUAGE_SINDHI;
3210 break;
3211 case AppleLanguageId::TIBETAN:
3212 nLang = LANGUAGE_TIBETAN;
3213 break;
3214 case AppleLanguageId::NEPALI:
3215 nLang = LANGUAGE_NEPALI;
3216 break;
3217 case AppleLanguageId::SANSKRIT:
3218 nLang = LANGUAGE_SANSKRIT;
3219 break;
3220 case AppleLanguageId::MARATHI:
3221 nLang = LANGUAGE_MARATHI;
3222 break;
3223 case AppleLanguageId::BENGALI:
3224 nLang = LANGUAGE_BENGALI;
3225 break;
3226 case AppleLanguageId::ASSAMESE:
3227 nLang = LANGUAGE_ASSAMESE;
3228 break;
3229 case AppleLanguageId::GUJARATI:
3230 nLang = LANGUAGE_GUJARATI;
3231 break;
3232 case AppleLanguageId::PUNJABI:
3233 nLang = LANGUAGE_PUNJABI;
3234 break;
3235 case AppleLanguageId::ORIYA:
3236 nLang = LANGUAGE_ODIA;
3237 break;
3238 case AppleLanguageId::MALAYALAM:
3239 nLang = LANGUAGE_MALAYALAM;
3240 break;
3241 case AppleLanguageId::KANNADA:
3242 nLang = LANGUAGE_KANNADA;
3243 break;
3244 case AppleLanguageId::TAMIL:
3245 nLang = LANGUAGE_TAMIL;
3246 break;
3247 case AppleLanguageId::TELUGU:
3248 nLang = LANGUAGE_TELUGU;
3249 break;
3250 case AppleLanguageId::SINHALESE:
3251 nLang = LANGUAGE_SINHALESE_SRI_LANKA;
3252 break;
3253 case AppleLanguageId::BURMESE:
3254 nLang = LANGUAGE_BURMESE;
3255 break;
3256 case AppleLanguageId::KHMER:
3257 nLang = LANGUAGE_KHMER;
3258 break;
3259 case AppleLanguageId::LAO:
3260 nLang = LANGUAGE_LAO;
3261 break;
3262 case AppleLanguageId::VIETNAMESE:
3263 nLang = LANGUAGE_VIETNAMESE;
3264 break;
3265 case AppleLanguageId::INDONESIAN:
3266 nLang = LANGUAGE_INDONESIAN;
3267 break;
3268 case AppleLanguageId::TAGALONG:
3269 nLang = LANGUAGE_USER_TAGALOG;
3270 break;
3271 case AppleLanguageId::MALAY_LATIN:
3272 nLang = LANGUAGE_MALAY_MALAYSIA;
3273 break;
3274 case AppleLanguageId::MALAY_ARABIC:
3275 nLang = LANGUAGE_USER_MALAY_ARABIC_MALAYSIA;
3276 break;
3277 case AppleLanguageId::AMHARIC:
3278 nLang = LANGUAGE_AMHARIC_ETHIOPIA;
3279 break;
3280 case AppleLanguageId::TIGRINYA:
3281 nLang = LANGUAGE_TIGRIGNA_ETHIOPIA;
3282 break;
3283 case AppleLanguageId::GALLA:
3284 nLang = LANGUAGE_OROMO;
3285 break;
3286 case AppleLanguageId::SOMALI:
3287 nLang = LANGUAGE_SOMALI;
3288 break;
3289 case AppleLanguageId::SWAHILI:
3290 nLang = LANGUAGE_SWAHILI;
3291 break;
3292 case AppleLanguageId::KINYARWANDA:
3293 nLang = LANGUAGE_KINYARWANDA_RWANDA;
3294 break;
3295 case AppleLanguageId::RUNDI:
3296 return LanguageTag(u"rn"_ustr);
3297 case AppleLanguageId::NYANJA:
3298 nLang = LANGUAGE_USER_NYANJA;
3299 break;
3300 case AppleLanguageId::MALAGASY:
3301 nLang = LANGUAGE_MALAGASY_PLATEAU;
3302 break;
3303 case AppleLanguageId::ESPERANTO:
3304 nLang = LANGUAGE_USER_ESPERANTO;
3305 break;
3306 case AppleLanguageId::WELSH:
3307 nLang = LANGUAGE_WELSH;
3308 break;
3309 case AppleLanguageId::BASQUE:
3310 nLang = LANGUAGE_BASQUE;
3311 break;
3312 case AppleLanguageId::CATALAN:
3313 nLang = LANGUAGE_CATALAN;
3314 break;
3315 case AppleLanguageId::LATIN:
3316 nLang = LANGUAGE_LATIN;
3317 break;
3318 case AppleLanguageId::QUENCHUA:
3319 nLang = LANGUAGE_QUECHUA_BOLIVIA; //maybe
3320 break;
3321 case AppleLanguageId::GUARANI:
3322 nLang = LANGUAGE_GUARANI_PARAGUAY;
3323 break;
3324 case AppleLanguageId::AYMARA:
3325 return LanguageTag(u"ay"_ustr);
3326 case AppleLanguageId::TATAR:
3327 nLang = LANGUAGE_TATAR;
3328 break;
3329 case AppleLanguageId::UIGHUR:
3330 nLang = LANGUAGE_UIGHUR_CHINA;
3331 break;
3332 case AppleLanguageId::DZONGKHA:
3333 nLang = LANGUAGE_DZONGKHA_BHUTAN;
3334 break;
3335 case AppleLanguageId::JAVANESE_LATIN:
3336 return LanguageTag(u"jv-Latn"_ustr);
3337 case AppleLanguageId::SUNDANESE_LATIN:
3338 return LanguageTag(u"su-Latn"_ustr);
3339 case AppleLanguageId::GALICIAN:
3340 nLang = LANGUAGE_GALICIAN;
3341 break;
3342 case AppleLanguageId::AFRIKAANS:
3343 nLang = LANGUAGE_AFRIKAANS;
3344 break;
3345 case AppleLanguageId::BRETON:
3346 nLang = LANGUAGE_BRETON_FRANCE;
3347 break;
3348 case AppleLanguageId::INUKTITUT:
3349 nLang = LANGUAGE_INUKTITUT_LATIN_CANADA; //probably
3350 break;
3351 case AppleLanguageId::SCOTTISH_GAELIC:
3352 nLang = LANGUAGE_GAELIC_SCOTLAND;
3353 break;
3354 case AppleLanguageId::MANX_GAELIC:
3355 nLang = LANGUAGE_USER_MANX;
3356 break;
3357 case AppleLanguageId::IRISH_GAELIC_WITH_DOT_ABOVE:
3358 return LanguageTag(u"ga-Latg"_ustr);
3359 case AppleLanguageId::TONGAN:
3360 return LanguageTag(u"to"_ustr);
3361 case AppleLanguageId::GREEK_POLYTONIC:
3362 nLang = LANGUAGE_USER_ANCIENT_GREEK;
3363 break;
3364 case AppleLanguageId::GREENLANDIC:
3365 nLang = LANGUAGE_KALAALLISUT_GREENLAND;
3366 break;
3367 case AppleLanguageId::AZERI_LATIN:
3368 nLang = LANGUAGE_AZERI_LATIN;
3369 break;
3372 return LanguageTag(nLang);
3375 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */