bump product version to 7.6.3.2-android
[LibreOffice.git] / i18nlangtag / source / languagetag / languagetag.cxx
blob63462e3e6a3ae10a64b2b61aa365150d9301b420
1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2 /*
3 * This file is part of the LibreOffice project.
5 * This Source Code Form is subject to the terms of the Mozilla Public
6 * License, v. 2.0. If a copy of the MPL was not distributed with this
7 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
8 */
10 #include <config_folders.h>
11 #include <config_liblangtag.h>
13 #include <i18nlangtag/languagetag.hxx>
14 #include <i18nlangtag/applelangid.hxx>
15 #include <i18nlangtag/mslangid.hxx>
16 #include <rtl/ustrbuf.hxx>
17 #include <rtl/bootstrap.hxx>
18 #include <sal/log.hxx>
19 #include <osl/file.hxx>
20 #include <rtl/locale.h>
21 #include <o3tl/string_view.hxx>
22 #include <algorithm>
23 #include <map>
24 #include <mutex>
25 #include <string_view>
26 #include <unordered_set>
28 //#define erDEBUG
30 #if LIBLANGTAG_INLINE_FIX
31 #define LT_HAVE_INLINE
32 #endif
33 #include <liblangtag/langtag.h>
35 #ifdef ANDROID
36 #include <osl/detail/android-bootstrap.h>
37 #endif
39 #ifdef EMSCRIPTEN
40 #include <osl/detail/emscripten-bootstrap.h>
41 #endif
43 using namespace com::sun::star;
45 namespace {
47 // Helper to ensure lt_error_t is free'd
48 struct myLtError
50 lt_error_t* p;
51 myLtError() : p(nullptr) {}
52 ~myLtError() { if (p) lt_error_unref( p); }
57 namespace {
58 std::recursive_mutex& theMutex()
60 static std::recursive_mutex SINGLETON;
61 return SINGLETON;
65 typedef std::unordered_set< OUString > KnownTagSet;
66 static const KnownTagSet & getKnowns()
68 static KnownTagSet theKnowns = []()
70 KnownTagSet tmpSet;
71 ::std::vector< MsLangId::LanguagetagMapping > aDefined( MsLangId::getDefinedLanguagetags());
72 for (auto const& elemDefined : aDefined)
74 // Do not use the BCP47 string here to initialize the
75 // LanguageTag because then canonicalize() would call this
76 // getKnowns() again...
77 ::std::vector< OUString > aFallbacks( LanguageTag( elemDefined.mnLang).getFallbackStrings( true));
78 for (auto const& fallback : aFallbacks)
80 tmpSet.insert(fallback);
83 return tmpSet;
84 }();
85 return theKnowns;
89 namespace {
90 struct compareIgnoreAsciiCaseLess
92 bool operator()( std::u16string_view r1, std::u16string_view r2 ) const
94 return o3tl::compareToIgnoreAsciiCase(r1, r2) < 0;
97 typedef ::std::map< OUString, LanguageTag::ImplPtr, compareIgnoreAsciiCaseLess > MapBcp47;
98 typedef ::std::map< LanguageType, LanguageTag::ImplPtr > MapLangID;
99 MapBcp47& theMapBcp47()
101 static MapBcp47 SINGLETON;
102 return SINGLETON;
104 MapLangID& theMapLangID()
106 static MapLangID SINGLETON;
107 return SINGLETON;
109 LanguageTag::ImplPtr& theSystemLocale()
111 static LanguageTag::ImplPtr SINGLETON;
112 return SINGLETON;
117 static LanguageType getNextOnTheFlyLanguage()
119 static LanguageType nOnTheFlyLanguage(0);
120 std::unique_lock aGuard( theMutex());
121 if (!nOnTheFlyLanguage)
122 nOnTheFlyLanguage = MsLangId::makeLangID( LANGUAGE_ON_THE_FLY_SUB_START, LANGUAGE_ON_THE_FLY_START);
123 else
125 if (MsLangId::getPrimaryLanguage( nOnTheFlyLanguage) != LANGUAGE_ON_THE_FLY_END)
126 ++nOnTheFlyLanguage;
127 else
129 LanguageType nSub = MsLangId::getSubLanguage( nOnTheFlyLanguage);
130 if (nSub != LANGUAGE_ON_THE_FLY_SUB_END)
131 nOnTheFlyLanguage = MsLangId::makeLangID( ++nSub, LANGUAGE_ON_THE_FLY_START);
132 else
134 SAL_WARN( "i18nlangtag", "getNextOnTheFlyLanguage: none left! ("
135 << ((sal_uInt16(LANGUAGE_ON_THE_FLY_END) - sal_uInt16(LANGUAGE_ON_THE_FLY_START) + 1)
136 * (sal_uInt16(LANGUAGE_ON_THE_FLY_SUB_END) - sal_uInt16(LANGUAGE_ON_THE_FLY_SUB_START) + 1))
137 << " consumed?!?)");
138 return LanguageType(0);
142 #if OSL_DEBUG_LEVEL > 0
143 static size_t nOnTheFlies = 0;
144 ++nOnTheFlies;
145 SAL_INFO( "i18nlangtag", "getNextOnTheFlyLanguage: number " << nOnTheFlies);
146 #endif
147 return nOnTheFlyLanguage;
151 // static
152 bool LanguageTag::isOnTheFlyID( LanguageType nLang )
154 LanguageType nPri = MsLangId::getPrimaryLanguage( nLang);
155 LanguageType nSub = MsLangId::getSubLanguage( nLang);
156 return
157 LANGUAGE_ON_THE_FLY_START <= nPri && nPri <= LANGUAGE_ON_THE_FLY_END &&
158 LANGUAGE_ON_THE_FLY_SUB_START <= nSub && nSub <= LANGUAGE_ON_THE_FLY_SUB_END;
161 namespace {
163 /** A reference holder for liblangtag data de/initialization, one static
164 instance. Currently implemented such that the first "ref" inits and dtor
165 (our library deinitialized) tears down.
167 class LiblangtagDataRef
169 public:
170 LiblangtagDataRef();
171 ~LiblangtagDataRef();
172 void init()
174 if (!mbInitialized)
175 setup();
177 private:
178 OString maDataPath; // path to liblangtag data, "|" if system
179 bool mbInitialized;
181 void setupDataPath();
182 void setup();
183 static void teardown();
186 LiblangtagDataRef& theDataRef()
188 static LiblangtagDataRef SINGLETON;
189 return SINGLETON;
193 LiblangtagDataRef::LiblangtagDataRef()
195 mbInitialized(false)
199 LiblangtagDataRef::~LiblangtagDataRef()
201 if (mbInitialized)
202 teardown();
205 void LiblangtagDataRef::setup()
207 SAL_INFO( "i18nlangtag", "LiblangtagDataRef::setup: initializing database");
208 if (maDataPath.isEmpty())
209 setupDataPath();
210 lt_db_initialize();
211 mbInitialized = true;
214 void LiblangtagDataRef::teardown()
216 SAL_INFO( "i18nlangtag", "LiblangtagDataRef::teardown: finalizing database");
217 lt_db_finalize();
220 void LiblangtagDataRef::setupDataPath()
222 #if defined(ANDROID) || defined(EMSCRIPTEN)
223 maDataPath = OString(lo_get_app_data_dir()) + "/share/liblangtag";
224 #else
225 // maDataPath is assumed to be empty here.
226 OUString aURL("$BRAND_BASE_DIR/" LIBO_SHARE_FOLDER "/liblangtag");
227 rtl::Bootstrap::expandMacros(aURL); //TODO: detect failure
229 // Check if data is in our own installation, else assume system
230 // installation.
231 OUString aData = aURL + "/language-subtag-registry.xml";
232 osl::DirectoryItem aDirItem;
233 if (osl::DirectoryItem::get( aData, aDirItem) == osl::DirectoryItem::E_None)
235 OUString aPath;
236 if (osl::FileBase::getSystemPathFromFileURL( aURL, aPath) == osl::FileBase::E_None)
237 maDataPath = OUStringToOString( aPath, RTL_TEXTENCODING_UTF8);
239 #endif
240 if (maDataPath.isEmpty())
241 maDataPath = "|"; // assume system
242 else
243 lt_db_set_datadir( maDataPath.getStr());
247 /* TODO: we could transform known vendor and browser-specific variants to known
248 * BCP 47 if available. For now just remove them to not confuse any later
249 * treatments that check for empty variants. This vendor stuff was never
250 * supported anyway. */
251 static void handleVendorVariant( css::lang::Locale & rLocale )
253 if (!rLocale.Variant.isEmpty() && rLocale.Language != I18NLANGTAG_QLT)
254 rLocale.Variant.clear();
258 class LanguageTagImpl
260 public:
262 explicit LanguageTagImpl( const LanguageTag & rLanguageTag );
263 explicit LanguageTagImpl( const LanguageTagImpl & rLanguageTagImpl );
264 ~LanguageTagImpl();
265 LanguageTagImpl& operator=( const LanguageTagImpl & rLanguageTagImpl );
267 private:
269 friend class LanguageTag;
271 enum Decision
273 DECISION_DONTKNOW,
274 DECISION_NO,
275 DECISION_YES
278 mutable css::lang::Locale maLocale;
279 mutable OUString maBcp47;
280 mutable OUString maCachedLanguage; ///< cache getLanguage()
281 mutable OUString maCachedScript; ///< cache getScript()
282 mutable OUString maCachedCountry; ///< cache getCountry()
283 mutable OUString maCachedVariants; ///< cache getVariants()
284 mutable OUString maCachedGlibcString; ///< cache getGlibcLocaleString()
285 mutable lt_tag_t* mpImplLangtag; ///< liblangtag pointer
286 mutable LanguageType mnLangID;
287 mutable LanguageTag::ScriptType meScriptType;
288 mutable Decision meIsValid;
289 mutable Decision meIsIsoLocale;
290 mutable Decision meIsIsoODF;
291 mutable Decision meIsLiblangtagNeeded; ///< whether processing with liblangtag needed
292 bool mbSystemLocale : 1;
293 mutable bool mbInitializedBcp47 : 1;
294 mutable bool mbInitializedLocale : 1;
295 mutable bool mbInitializedLangID : 1;
296 mutable bool mbCachedLanguage : 1;
297 mutable bool mbCachedScript : 1;
298 mutable bool mbCachedCountry : 1;
299 mutable bool mbCachedVariants : 1;
300 mutable bool mbCachedGlibcString : 1;
302 OUString const & getBcp47() const;
303 OUString const & getLanguage() const;
304 OUString const & getScript() const;
305 OUString const & getCountry() const;
306 OUString getRegion() const;
307 OUString const & getVariants() const;
308 bool hasScript() const;
309 OUString const & getGlibcLocaleString() const;
311 void setScriptType(LanguageTag::ScriptType st);
312 LanguageTag::ScriptType getScriptType() const;
314 bool isIsoLocale() const;
315 bool isIsoODF() const;
316 bool isValidBcp47() const;
318 void convertLocaleToBcp47();
319 bool convertLocaleToLang( bool bAllowOnTheFlyID );
320 void convertBcp47ToLocale();
321 void convertBcp47ToLang();
322 void convertLangToLocale();
323 void convertLangToBcp47();
325 /** @return whether BCP 47 language tag string was changed. */
326 bool canonicalize();
328 /** Canonicalize if not yet done and synchronize initialized conversions.
330 @return whether BCP 47 language tag string was changed.
332 bool synCanonicalize();
334 OUString getLanguageFromLangtag();
335 OUString getScriptFromLangtag();
336 OUString getRegionFromLangtag();
337 OUString getVariantsFromLangtag();
339 /** Generates on-the-fly LangID and registers the maBcp47,mnLangID pair.
341 @param nRegisterID
342 If not 0 and not LANGUAGE_DONTKNOW, suggest (!) to use that ID
343 instead of generating an on-the-fly ID. Implementation may
344 still generate an ID if the suggested ID is already used for
345 another language tag.
347 @return NULL if no ID could be obtained or registration failed.
349 LanguageTag::ImplPtr registerOnTheFly( LanguageType nRegisterID );
351 /** Obtain Language, Script, Country and Variants via simpleExtract() and
352 assign them to the cached variables if successful.
354 @return simpleExtract() successfully extracted and cached.
356 bool cacheSimpleLSCV();
358 enum Extraction
360 EXTRACTED_NONE,
361 EXTRACTED_LSC,
362 EXTRACTED_LV,
363 EXTRACTED_LR,
364 EXTRACTED_C_LOCALE,
365 EXTRACTED_X,
366 EXTRACTED_X_JOKER,
367 EXTRACTED_KNOWN_BAD
370 /** Of a language tag of the form lll[-Ssss][-CC][-vvvvvvvv] extract the
371 portions.
373 Does not check case or content!
375 @return EXTRACTED_LSC if simple tag was detected (i.e. one that
376 would fulfill the isIsoODF() condition),
377 EXTRACTED_LV if a tag with variant was detected,
378 EXTRACTED_LR if a tag with 3-digit UN M.49 region code was detected
379 EXTRACTED_C_LOCALE if a 'C' locale was detected,
380 EXTRACTED_X if x-... privateuse tag was detected,
381 EXTRACTED_X_JOKER if "*" joker was detected,
382 EXTRACTED_KNOWN_BAD if a bad but known (to be remapped) tag was detected
383 EXTRACTED_NONE else.
385 static Extraction simpleExtract( const OUString& rBcp47,
386 OUString& rLanguage,
387 OUString& rScript,
388 OUString& rCountry,
389 OUString& rRegion,
390 OUString& rVariants );
392 /** Convert Locale to BCP 47 string without resolving system and creating
393 temporary LanguageTag instances. */
394 static OUString convertToBcp47( const css::lang::Locale& rLocale );
399 LanguageTagImpl::LanguageTagImpl( const LanguageTag & rLanguageTag )
401 maLocale( rLanguageTag.maLocale),
402 maBcp47( rLanguageTag.maBcp47),
403 mpImplLangtag( nullptr),
404 mnLangID( rLanguageTag.mnLangID),
405 meScriptType( LanguageTag::ScriptType::UNKNOWN),
406 meIsValid( DECISION_DONTKNOW),
407 meIsIsoLocale( DECISION_DONTKNOW),
408 meIsIsoODF( DECISION_DONTKNOW),
409 meIsLiblangtagNeeded( DECISION_DONTKNOW),
410 mbSystemLocale( rLanguageTag.mbSystemLocale),
411 mbInitializedBcp47( rLanguageTag.mbInitializedBcp47),
412 mbInitializedLocale( rLanguageTag.mbInitializedLocale),
413 mbInitializedLangID( rLanguageTag.mbInitializedLangID),
414 mbCachedLanguage( false),
415 mbCachedScript( false),
416 mbCachedCountry( false),
417 mbCachedVariants( false),
418 mbCachedGlibcString( false)
423 LanguageTagImpl::LanguageTagImpl( const LanguageTagImpl & rLanguageTagImpl )
425 maLocale( rLanguageTagImpl.maLocale),
426 maBcp47( rLanguageTagImpl.maBcp47),
427 maCachedLanguage( rLanguageTagImpl.maCachedLanguage),
428 maCachedScript( rLanguageTagImpl.maCachedScript),
429 maCachedCountry( rLanguageTagImpl.maCachedCountry),
430 maCachedVariants( rLanguageTagImpl.maCachedVariants),
431 maCachedGlibcString( rLanguageTagImpl.maCachedGlibcString),
432 mpImplLangtag( rLanguageTagImpl.mpImplLangtag ?
433 lt_tag_copy( rLanguageTagImpl.mpImplLangtag) : nullptr),
434 mnLangID( rLanguageTagImpl.mnLangID),
435 meScriptType( rLanguageTagImpl.meScriptType),
436 meIsValid( rLanguageTagImpl.meIsValid),
437 meIsIsoLocale( rLanguageTagImpl.meIsIsoLocale),
438 meIsIsoODF( rLanguageTagImpl.meIsIsoODF),
439 meIsLiblangtagNeeded( rLanguageTagImpl.meIsLiblangtagNeeded),
440 mbSystemLocale( rLanguageTagImpl.mbSystemLocale),
441 mbInitializedBcp47( rLanguageTagImpl.mbInitializedBcp47),
442 mbInitializedLocale( rLanguageTagImpl.mbInitializedLocale),
443 mbInitializedLangID( rLanguageTagImpl.mbInitializedLangID),
444 mbCachedLanguage( rLanguageTagImpl.mbCachedLanguage),
445 mbCachedScript( rLanguageTagImpl.mbCachedScript),
446 mbCachedCountry( rLanguageTagImpl.mbCachedCountry),
447 mbCachedVariants( rLanguageTagImpl.mbCachedVariants),
448 mbCachedGlibcString( rLanguageTagImpl.mbCachedGlibcString)
450 if (mpImplLangtag)
451 theDataRef().init();
455 LanguageTagImpl& LanguageTagImpl::operator=( const LanguageTagImpl & rLanguageTagImpl )
457 if (&rLanguageTagImpl == this)
458 return *this;
460 maLocale = rLanguageTagImpl.maLocale;
461 maBcp47 = rLanguageTagImpl.maBcp47;
462 maCachedLanguage = rLanguageTagImpl.maCachedLanguage;
463 maCachedScript = rLanguageTagImpl.maCachedScript;
464 maCachedCountry = rLanguageTagImpl.maCachedCountry;
465 maCachedVariants = rLanguageTagImpl.maCachedVariants;
466 maCachedGlibcString = rLanguageTagImpl.maCachedGlibcString;
467 lt_tag_t * oldTag = mpImplLangtag;
468 mpImplLangtag = rLanguageTagImpl.mpImplLangtag ?
469 lt_tag_copy( rLanguageTagImpl.mpImplLangtag) : nullptr;
470 lt_tag_unref(oldTag);
471 mnLangID = rLanguageTagImpl.mnLangID;
472 meScriptType = rLanguageTagImpl.meScriptType;
473 meIsValid = rLanguageTagImpl.meIsValid;
474 meIsIsoLocale = rLanguageTagImpl.meIsIsoLocale;
475 meIsIsoODF = rLanguageTagImpl.meIsIsoODF;
476 meIsLiblangtagNeeded= rLanguageTagImpl.meIsLiblangtagNeeded;
477 mbSystemLocale = rLanguageTagImpl.mbSystemLocale;
478 mbInitializedBcp47 = rLanguageTagImpl.mbInitializedBcp47;
479 mbInitializedLocale = rLanguageTagImpl.mbInitializedLocale;
480 mbInitializedLangID = rLanguageTagImpl.mbInitializedLangID;
481 mbCachedLanguage = rLanguageTagImpl.mbCachedLanguage;
482 mbCachedScript = rLanguageTagImpl.mbCachedScript;
483 mbCachedCountry = rLanguageTagImpl.mbCachedCountry;
484 mbCachedVariants = rLanguageTagImpl.mbCachedVariants;
485 mbCachedGlibcString = rLanguageTagImpl.mbCachedGlibcString;
486 if (mpImplLangtag && !oldTag)
487 theDataRef().init();
488 return *this;
492 LanguageTagImpl::~LanguageTagImpl()
494 if (mpImplLangtag)
496 lt_tag_unref( mpImplLangtag);
501 LanguageTag::LanguageTag( const OUString & rBcp47LanguageTag, bool bCanonicalize )
503 maBcp47( rBcp47LanguageTag),
504 mnLangID( LANGUAGE_DONTKNOW),
505 mbSystemLocale( rBcp47LanguageTag.isEmpty()),
506 mbInitializedBcp47( !mbSystemLocale),
507 mbInitializedLocale( false),
508 mbInitializedLangID( false),
509 mbIsFallback( false)
511 if (bCanonicalize)
513 getImpl()->canonicalize();
514 // Registration itself may already have canonicalized, so do an
515 // unconditional sync.
516 syncFromImpl();
522 LanguageTag::LanguageTag( const css::lang::Locale & rLocale )
524 maLocale( rLocale),
525 mnLangID( LANGUAGE_DONTKNOW),
526 mbSystemLocale( rLocale.Language.isEmpty()),
527 mbInitializedBcp47( false),
528 mbInitializedLocale( false), // we do not know which mess we got passed in
529 mbInitializedLangID( false),
530 mbIsFallback( false)
532 handleVendorVariant( maLocale);
536 LanguageTag::LanguageTag( LanguageType nLanguage )
538 mnLangID( nLanguage),
539 mbSystemLocale( nLanguage == LANGUAGE_SYSTEM),
540 mbInitializedBcp47( false),
541 mbInitializedLocale( false),
542 mbInitializedLangID( !mbSystemLocale),
543 mbIsFallback( false)
548 LanguageTag::LanguageTag( const OUString& rBcp47, const OUString& rLanguage,
549 std::u16string_view rScript, const OUString& rCountry )
551 maBcp47( rBcp47),
552 mnLangID( LANGUAGE_DONTKNOW),
553 mbSystemLocale( rBcp47.isEmpty() && rLanguage.isEmpty()),
554 mbInitializedBcp47( !rBcp47.isEmpty()),
555 mbInitializedLocale( false),
556 mbInitializedLangID( false),
557 mbIsFallback( false)
559 if (mbSystemLocale || mbInitializedBcp47)
560 return;
562 if (rScript.empty())
564 maBcp47 = rLanguage + "-" + rCountry;
565 mbInitializedBcp47 = true;
566 maLocale.Language = rLanguage;
567 maLocale.Country = rCountry;
568 mbInitializedLocale = true;
570 else
572 if (rCountry.isEmpty())
573 maBcp47 = rLanguage + "-" + rScript;
574 else
575 maBcp47 = rLanguage + "-" + rScript + "-" + rCountry;
576 mbInitializedBcp47 = true;
577 maLocale.Language = I18NLANGTAG_QLT;
578 maLocale.Country = rCountry;
579 maLocale.Variant = maBcp47;
580 mbInitializedLocale = true;
585 LanguageTag::LanguageTag( const rtl_Locale & rLocale )
587 maLocale( rLocale.Language, rLocale.Country, rLocale.Variant),
588 mnLangID( LANGUAGE_DONTKNOW),
589 mbSystemLocale( maLocale.Language.isEmpty()),
590 mbInitializedBcp47( false),
591 mbInitializedLocale( !mbSystemLocale),
592 mbInitializedLangID( false),
593 mbIsFallback( false)
595 convertFromRtlLocale();
598 LanguageTag::~LanguageTag() {}
600 LanguageTag::ImplPtr LanguageTagImpl::registerOnTheFly( LanguageType nRegisterID )
602 LanguageTag::ImplPtr pImpl;
604 if (!mbInitializedBcp47)
606 if (mbInitializedLocale)
608 maBcp47 = LanguageTagImpl::convertToBcp47( maLocale);
609 mbInitializedBcp47 = !maBcp47.isEmpty();
612 if (maBcp47.isEmpty())
614 SAL_WARN( "i18nlangtag", "LanguageTagImpl::registerOnTheFly: no Bcp47 string, no registering");
615 return pImpl;
618 std::unique_lock aGuard( theMutex());
620 MapBcp47& rMapBcp47 = theMapBcp47();
621 MapBcp47::const_iterator it( rMapBcp47.find( maBcp47));
622 bool bOtherImpl = false;
623 if (it != rMapBcp47.end())
625 SAL_INFO( "i18nlangtag", "LanguageTag::registerOnTheFly: found impl for '" << maBcp47 << "'");
626 pImpl = (*it).second;
627 if (pImpl.get() != this)
629 // Could happen for example if during registerImpl() the tag was
630 // changed via canonicalize() and the result was already present in
631 // the map before, for example 'bn-Beng' => 'bn'. This specific
632 // case is now taken care of in registerImpl() and doesn't reach
633 // here. However, use the already existing impl if it matches.
634 SAL_WARN( "i18nlangtag", "LanguageTag::registerOnTheFly: using other impl for this '" << maBcp47 << "'");
635 *this = *pImpl; // ensure consistency
636 bOtherImpl = true;
639 else
641 SAL_INFO( "i18nlangtag", "LanguageTag::registerOnTheFly: new impl for '" << maBcp47 << "'");
642 pImpl = std::make_shared<LanguageTagImpl>( *this);
643 rMapBcp47.insert( ::std::make_pair( maBcp47, pImpl));
646 if (!bOtherImpl || !pImpl->mbInitializedLangID)
648 if (nRegisterID == LanguageType(0) || nRegisterID == LANGUAGE_DONTKNOW)
649 nRegisterID = getNextOnTheFlyLanguage();
650 else
652 // Accept a suggested ID only if it is not mapped yet to something
653 // different, otherwise we would end up with ambiguous assignments
654 // of different language tags, for example for the same primary
655 // LangID with "no", "nb" and "nn".
656 const MapLangID& rMapLangID = theMapLangID();
657 MapLangID::const_iterator itID( rMapLangID.find( nRegisterID));
658 if (itID != rMapLangID.end())
660 if ((*itID).second->maBcp47 != maBcp47)
662 SAL_INFO( "i18nlangtag", "LanguageTag::registerOnTheFly: not using suggested 0x"
663 << ::std::hex << nRegisterID << " for '" << maBcp47 << "' have '"
664 << (*itID).second->maBcp47 << "'");
665 nRegisterID = getNextOnTheFlyLanguage();
667 else
669 SAL_WARN( "i18nlangtag", "LanguageTag::registerOnTheFly: suggested 0x"
670 << ::std::hex << nRegisterID << " for '" << maBcp47 << "' already registered");
674 if (!nRegisterID)
676 // out of IDs, nothing to register
677 return pImpl;
679 pImpl->mnLangID = nRegisterID;
680 pImpl->mbInitializedLangID = true;
681 if (pImpl.get() != this)
683 mnLangID = nRegisterID;
684 mbInitializedLangID = true;
688 ::std::pair< MapLangID::const_iterator, bool > res(
689 theMapLangID().insert( ::std::make_pair( pImpl->mnLangID, pImpl)));
690 if (res.second)
692 SAL_INFO( "i18nlangtag", "LanguageTag::registerOnTheFly: cross-inserted 0x"
693 << ::std::hex << pImpl->mnLangID << " for '" << maBcp47 << "'");
695 else
697 SAL_WARN( "i18nlangtag", "LanguageTag::registerOnTheFly: not cross-inserted 0x"
698 << ::std::hex << pImpl->mnLangID << " for '" << maBcp47 << "' have '"
699 << (*res.first).second->maBcp47 << "'");
702 return pImpl;
706 LanguageTag::ScriptType LanguageTag::getOnTheFlyScriptType( LanguageType nRegisterID )
708 const MapLangID& rMapLangID = theMapLangID();
709 MapLangID::const_iterator itID( rMapLangID.find( nRegisterID));
710 if (itID != rMapLangID.end())
711 return (*itID).second->getScriptType();
712 else
713 return ScriptType::UNKNOWN;
717 // static
718 void LanguageTag::setConfiguredSystemLanguage( LanguageType nLang )
720 if (nLang == LANGUAGE_DONTKNOW || nLang == LANGUAGE_SYSTEM)
722 SAL_WARN( "i18nlangtag",
723 "LanguageTag::setConfiguredSystemLanguage: refusing to set unresolved system locale 0x" <<
724 ::std::hex << nLang);
725 return;
727 SAL_INFO( "i18nlangtag", "LanguageTag::setConfiguredSystemLanguage: setting to 0x" << ::std::hex << nLang);
728 MsLangId::LanguageTagAccess::setConfiguredSystemLanguage( nLang);
729 // Reset system locale to none and let registerImpl() do the rest to
730 // initialize a new one.
731 theSystemLocale().reset();
732 LanguageTag aLanguageTag( LANGUAGE_SYSTEM);
733 aLanguageTag.registerImpl();
736 static bool lt_tag_parse_disabled = false;
738 // static
739 void LanguageTag::disable_lt_tag_parse()
741 lt_tag_parse_disabled = true;
744 static bool lcl_isKnownOnTheFlyID( LanguageType nLang )
746 return nLang != LANGUAGE_DONTKNOW && nLang != LANGUAGE_SYSTEM &&
747 (LanguageTag::isOnTheFlyID( nLang) || (nLang == MsLangId::getPrimaryLanguage( nLang)));
751 LanguageTag::ImplPtr LanguageTag::registerImpl() const
753 // XXX NOTE: Do not use non-static LanguageTag::convert...() member methods
754 // here as they access getImpl() and syncFromImpl() and would lead to
755 // recursion. Also do not use the static LanguageTag::convertTo...()
756 // methods as they may create temporary LanguageTag instances. Only
757 // LanguageTagImpl::convertToBcp47(Locale) is ok.
759 ImplPtr pImpl;
761 #if OSL_DEBUG_LEVEL > 0
762 static size_t nCalls = 0;
763 ++nCalls;
764 SAL_INFO( "i18nlangtag", "LanguageTag::registerImpl: " << nCalls << " calls");
765 #endif
767 // Do not register unresolved system locale, also force LangID if system
768 // and take the system locale shortcut if possible.
769 if (mbSystemLocale)
771 pImpl = theSystemLocale();
772 if (pImpl)
774 #if OSL_DEBUG_LEVEL > 0
775 static size_t nCallsSystem = 0;
776 ++nCallsSystem;
777 SAL_INFO( "i18nlangtag", "LanguageTag::registerImpl: " << nCallsSystem << " system calls");
778 #endif
779 return pImpl;
781 if (!mbInitializedLangID)
783 mnLangID = MsLangId::getRealLanguage( LANGUAGE_SYSTEM);
784 mbInitializedLangID = (mnLangID != LANGUAGE_SYSTEM);
785 SAL_WARN_IF( !mbInitializedLangID, "i18nlangtag", "LanguageTag::registerImpl: can't resolve system!");
789 if (mbInitializedLangID)
791 if (mnLangID == LANGUAGE_DONTKNOW)
793 static LanguageTag::ImplPtr theDontKnow;
794 // Heavy usage of LANGUAGE_DONTKNOW, make it an own Impl for all the
795 // conversion attempts. At the same time provide a central breakpoint
796 // to inspect such places.
797 if (!theDontKnow)
798 theDontKnow = std::make_shared<LanguageTagImpl>( *this);
799 pImpl = theDontKnow;
800 #if OSL_DEBUG_LEVEL > 0
801 static size_t nCallsDontKnow = 0;
802 ++nCallsDontKnow;
803 SAL_INFO( "i18nlangtag", "LanguageTag::registerImpl: " << nCallsDontKnow << " DontKnow calls");
804 #endif
805 return pImpl;
807 else
809 // A great share are calls for a system equal locale.
810 pImpl = theSystemLocale();
811 if (pImpl && pImpl->mnLangID == mnLangID)
813 #if OSL_DEBUG_LEVEL > 0
814 static size_t nCallsSystemEqual = 0;
815 ++nCallsSystemEqual;
816 SAL_INFO( "i18nlangtag", "LanguageTag::registerImpl: " << nCallsSystemEqual
817 << " system equal LangID calls");
818 #endif
819 return pImpl;
824 // Force Bcp47 if not LangID.
825 if (!mbInitializedLangID && !mbInitializedBcp47)
827 // The one central point to set mbInitializedLocale=true if a
828 // LanguageTag was initialized with a Locale. We will now convert and
829 // possibly later resolve it.
830 if (!mbInitializedLocale && (mbSystemLocale || !maLocale.Language.isEmpty()))
831 mbInitializedLocale = true;
832 SAL_WARN_IF( !mbInitializedLocale, "i18nlangtag", "LanguageTag::registerImpl: still not mbInitializedLocale");
834 maBcp47 = LanguageTagImpl::convertToBcp47( maLocale);
835 mbInitializedBcp47 = !maBcp47.isEmpty();
838 if (mbInitializedBcp47)
840 // A great share are calls for a system equal locale.
841 pImpl = theSystemLocale();
842 if (pImpl && pImpl->maBcp47 == maBcp47)
844 #if OSL_DEBUG_LEVEL > 0
845 static size_t nCallsSystemEqual = 0;
846 ++nCallsSystemEqual;
847 SAL_INFO( "i18nlangtag", "LanguageTag::registerImpl: " << nCallsSystemEqual << " system equal BCP47 calls");
848 #endif
849 return pImpl;
853 #if OSL_DEBUG_LEVEL > 0
854 static size_t nCallsNonSystem = 0;
855 ++nCallsNonSystem;
856 SAL_INFO( "i18nlangtag", "LanguageTag::registerImpl: " << nCallsNonSystem << " non-system calls");
857 #endif
859 std::unique_lock aGuard( theMutex());
861 #if OSL_DEBUG_LEVEL > 0
862 static long nRunning = 0;
863 // Entering twice here is ok, which is needed for fallback init in
864 // getKnowns() in canonicalize() via pImpl->convertBcp47ToLocale() below,
865 // everything else is suspicious.
866 SAL_WARN_IF( nRunning > 1, "i18nlangtag", "LanguageTag::registerImpl: re-entered for '"
867 << maBcp47 << "' 0x" << ::std::hex << mnLangID );
868 struct Runner { Runner() { ++nRunning; } ~Runner() { --nRunning; } } aRunner;
869 #endif
871 // Prefer LangID map as find+insert needs less comparison work.
872 if (mbInitializedLangID)
874 MapLangID& rMap = theMapLangID();
875 MapLangID::const_iterator it( rMap.find( mnLangID));
876 if (it != rMap.end())
878 SAL_INFO( "i18nlangtag", "LanguageTag::registerImpl: found impl for 0x" << ::std::hex << mnLangID);
879 pImpl = (*it).second;
881 else
883 SAL_INFO( "i18nlangtag", "LanguageTag::registerImpl: new impl for 0x" << ::std::hex << mnLangID);
884 pImpl = std::make_shared<LanguageTagImpl>( *this);
885 rMap.insert( ::std::make_pair( mnLangID, pImpl));
886 // Try round-trip.
887 if (!pImpl->mbInitializedLocale)
888 pImpl->convertLangToLocale();
889 LanguageType nLang = MsLangId::Conversion::convertLocaleToLanguage( pImpl->maLocale);
890 // If round-trip is identical cross-insert to Bcp47 map.
891 if (nLang == pImpl->mnLangID)
893 if (!pImpl->mbInitializedBcp47)
894 pImpl->convertLocaleToBcp47();
895 ::std::pair< MapBcp47::const_iterator, bool > res(
896 theMapBcp47().insert( ::std::make_pair( pImpl->maBcp47, pImpl)));
897 if (res.second)
899 SAL_INFO( "i18nlangtag", "LanguageTag::registerImpl: cross-inserted '" << pImpl->maBcp47 << "' for 0x" << ::std::hex << mnLangID);
901 else
903 SAL_INFO( "i18nlangtag", "LanguageTag::registerImpl: not cross-inserted '" << pImpl->maBcp47 << "' for 0x" << ::std::hex << mnLangID << " have 0x"
904 << ::std::hex << (*res.first).second->mnLangID);
907 else
909 if (!pImpl->mbInitializedBcp47)
910 pImpl->convertLocaleToBcp47();
911 SAL_INFO( "i18nlangtag", "LanguageTag::registerImpl: not cross-inserted '" << pImpl->maBcp47 << "' for 0x" << ::std::hex << mnLangID << " round-trip to 0x" << ::std::hex << nLang);
915 else if (!maBcp47.isEmpty())
917 MapBcp47& rMap = theMapBcp47();
918 MapBcp47::const_iterator it( rMap.find( maBcp47));
919 if (it != rMap.end())
921 SAL_INFO( "i18nlangtag", "LanguageTag::registerImpl: found impl for '" << maBcp47 << "'");
922 pImpl = (*it).second;
924 else
926 SAL_INFO( "i18nlangtag", "LanguageTag::registerImpl: new impl for '" << maBcp47 << "'");
927 pImpl = std::make_shared<LanguageTagImpl>( *this);
928 ::std::pair< MapBcp47::iterator, bool > insOrig( rMap.insert( ::std::make_pair( maBcp47, pImpl)));
929 // If changed after canonicalize() also add the resulting tag to
930 // the map.
931 if (pImpl->synCanonicalize())
933 SAL_INFO( "i18nlangtag", "LanguageTag::registerImpl: canonicalized to '" << pImpl->maBcp47 << "'");
934 ::std::pair< MapBcp47::const_iterator, bool > insCanon(
935 rMap.insert( ::std::make_pair( pImpl->maBcp47, pImpl)));
936 SAL_INFO( "i18nlangtag", "LanguageTag::registerImpl: " << (insCanon.second ? "" : "not ")
937 << "inserted '" << pImpl->maBcp47 << "'");
938 // If the canonicalized tag already existed (was not inserted)
939 // and impls are different, make this impl that impl and skip
940 // the rest if that LangID is present as well. The existing
941 // entry may or may not be different, it may even be strictly
942 // identical to this if it differs only in case (e.g. ko-kr =>
943 // ko-KR) which was corrected in canonicalize() hence also in
944 // the map entry but comparison is case insensitive and found
945 // it again.
946 if (!insCanon.second && (*insCanon.first).second != pImpl)
948 (*insOrig.first).second = pImpl = (*insCanon.first).second;
949 SAL_INFO( "i18nlangtag", "LanguageTag::registerImpl: share impl with 0x"
950 << ::std::hex << pImpl->mnLangID);
953 if (!pImpl->mbInitializedLangID)
955 // Try round-trip Bcp47->Locale->LangID->Locale->Bcp47.
956 if (!pImpl->mbInitializedLocale)
957 pImpl->convertBcp47ToLocale();
958 if (!pImpl->mbInitializedLangID)
959 pImpl->convertLocaleToLang( true);
960 // Unconditionally insert (round-trip is possible) for
961 // on-the-fly IDs and (generated or not) suggested IDs.
962 bool bInsert = lcl_isKnownOnTheFlyID( pImpl->mnLangID);
963 OUString aBcp47;
964 if (!bInsert)
966 if (pImpl->mnLangID != LANGUAGE_DONTKNOW)
968 // May have involved canonicalize(), so compare with
969 // pImpl->maBcp47 instead of maBcp47!
970 aBcp47 = LanguageTagImpl::convertToBcp47(
971 MsLangId::Conversion::convertLanguageToLocale( pImpl->mnLangID, true));
972 bInsert = (aBcp47 == pImpl->maBcp47);
975 // If round-trip is identical cross-insert to Bcp47 map.
976 if (bInsert)
978 ::std::pair< MapLangID::const_iterator, bool > res(
979 theMapLangID().insert( ::std::make_pair( pImpl->mnLangID, pImpl)));
980 if (res.second)
982 SAL_INFO( "i18nlangtag", "LanguageTag::registerImpl: cross-inserted 0x"
983 << ::std::hex << pImpl->mnLangID << " for '" << maBcp47 << "'");
985 else
987 SAL_INFO( "i18nlangtag", "LanguageTag::registerImpl: not cross-inserted 0x"
988 << ::std::hex << pImpl->mnLangID << " for '" << maBcp47 << "' have '"
989 << (*res.first).second->maBcp47 << "'");
992 else
994 SAL_INFO( "i18nlangtag", "LanguageTag::registerImpl: not cross-inserted 0x"
995 << ::std::hex << pImpl->mnLangID << " for '" << maBcp47 << "' round-trip to '"
996 << aBcp47 << "'");
1001 else
1003 SAL_WARN( "i18nlangtag", "LanguageTag::registerImpl: can't register for 0x" << ::std::hex << mnLangID );
1004 pImpl = std::make_shared<LanguageTagImpl>( *this);
1007 // If we reach here for mbSystemLocale we didn't have theSystemLocale
1008 // above, so add it.
1009 if (mbSystemLocale && mbInitializedLangID)
1011 theSystemLocale() = pImpl;
1012 SAL_INFO( "i18nlangtag", "LanguageTag::registerImpl: added system locale 0x"
1013 << ::std::hex << pImpl->mnLangID << " '" << pImpl->maBcp47 << "'");
1016 return pImpl;
1020 LanguageTagImpl const * LanguageTag::getImpl() const
1022 if (!mpImpl)
1024 mpImpl = registerImpl();
1025 syncVarsFromRawImpl();
1027 return mpImpl.get();
1030 LanguageTagImpl * LanguageTag::getImpl()
1032 if (!mpImpl)
1034 mpImpl = registerImpl();
1035 syncVarsFromRawImpl();
1037 return mpImpl.get();
1040 void LanguageTag::resetVars()
1042 mpImpl.reset();
1043 maLocale = lang::Locale();
1044 maBcp47.clear();
1045 mnLangID = LANGUAGE_SYSTEM;
1046 mbSystemLocale = true;
1047 mbInitializedBcp47 = false;
1048 mbInitializedLocale = false;
1049 mbInitializedLangID = false;
1050 mbIsFallback = false;
1054 LanguageTag & LanguageTag::reset( const OUString & rBcp47LanguageTag )
1056 resetVars();
1057 maBcp47 = rBcp47LanguageTag;
1058 mbSystemLocale = rBcp47LanguageTag.isEmpty();
1059 mbInitializedBcp47 = !mbSystemLocale;
1061 return *this;
1065 LanguageTag & LanguageTag::reset( const css::lang::Locale & rLocale )
1067 resetVars();
1068 maLocale = rLocale;
1069 mbSystemLocale = rLocale.Language.isEmpty();
1070 mbInitializedLocale = !mbSystemLocale;
1071 handleVendorVariant( maLocale);
1072 return *this;
1076 LanguageTag & LanguageTag::reset( LanguageType nLanguage )
1078 resetVars();
1079 mnLangID = nLanguage;
1080 mbSystemLocale = nLanguage == LANGUAGE_SYSTEM;
1081 mbInitializedLangID = !mbSystemLocale;
1082 return *this;
1086 bool LanguageTagImpl::canonicalize()
1088 #ifdef erDEBUG
1089 // dump once
1090 struct dumper
1092 lt_tag_t** mpp;
1093 explicit dumper( lt_tag_t** pp ) : mpp( *pp ? NULL : pp) {}
1094 ~dumper() { if (mpp && *mpp) lt_tag_dump( *mpp); }
1096 dumper aDumper( &mpImplLangtag);
1097 #endif
1099 bool bChanged = false;
1101 // Side effect: have maBcp47 in any case, resolved system.
1102 // Some methods calling canonicalize() (or not calling it due to
1103 // meIsLiblangtagNeeded==DECISION_NO) rely on this! Hence do not set
1104 // meIsLiblangtagNeeded anywhere else than hereafter.
1105 getBcp47();
1107 // The simple cases and known locales don't need liblangtag processing,
1108 // which also avoids loading liblangtag data on startup.
1109 if (meIsLiblangtagNeeded == DECISION_DONTKNOW)
1111 bool bTemporaryLocale = false;
1112 bool bTemporaryLangID = false;
1113 if (!mbInitializedLocale && !mbInitializedLangID)
1115 if (mbSystemLocale)
1117 mnLangID = MsLangId::getRealLanguage( LANGUAGE_SYSTEM);
1118 mbInitializedLangID = true;
1120 else
1122 // Now this is getting funny... we only have some BCP47 string
1123 // and want to determine if parsing it would be possible
1124 // without using liblangtag just to see if it is a simple known
1125 // locale or could fall back to one.
1126 OUString aLanguage, aScript, aCountry, aRegion, aVariants;
1127 Extraction eExt = simpleExtract( maBcp47, aLanguage, aScript, aCountry, aRegion, aVariants);
1128 if (eExt != EXTRACTED_NONE)
1130 if (eExt == EXTRACTED_LSC || eExt == EXTRACTED_LV || eExt == EXTRACTED_LR)
1132 // Rebuild bcp47 with proper casing of tags.
1133 OUStringBuffer aBuf( aLanguage.getLength() + 1 + aScript.getLength() +
1134 1 + aCountry.getLength() + 1 + aRegion.getLength() + 1 + aVariants.getLength());
1135 aBuf.append( aLanguage);
1136 if (!aScript.isEmpty())
1137 aBuf.append("-" + aScript);
1138 if (!aCountry.isEmpty())
1139 aBuf.append("-" + aCountry);
1140 if (!aRegion.isEmpty())
1141 aBuf.append("-" + aRegion);
1142 if (!aVariants.isEmpty())
1143 aBuf.append("-" + aVariants);
1144 OUString aStr( aBuf.makeStringAndClear());
1146 if (maBcp47 != aStr)
1148 maBcp47 = aStr;
1149 bChanged = true;
1152 if (eExt == EXTRACTED_LSC && aScript.isEmpty())
1154 maLocale.Language = aLanguage;
1155 maLocale.Country = aCountry;
1157 else if (eExt == EXTRACTED_C_LOCALE)
1159 maLocale.Language = aLanguage;
1160 maLocale.Country = aCountry;
1162 else
1164 maLocale.Language = I18NLANGTAG_QLT;
1165 maLocale.Country = aCountry;
1166 maLocale.Variant = maBcp47;
1168 bTemporaryLocale = mbInitializedLocale = true;
1172 if (mbInitializedLangID && !mbInitializedLocale)
1174 // Do not call getLocale() here because that prefers
1175 // convertBcp47ToLocale() which would end up in recursion via
1176 // isIsoLocale()!
1178 // Prepare to verify that we have a known locale, not just an
1179 // arbitrary MS-LangID.
1180 convertLangToLocale();
1182 if (mbInitializedLocale)
1184 if (!mbInitializedLangID)
1186 if (convertLocaleToLang( false))
1187 bChanged = true;
1188 if (bTemporaryLocale || mnLangID == LANGUAGE_DONTKNOW)
1189 bTemporaryLangID = true;
1191 if (mnLangID != LANGUAGE_DONTKNOW && mnLangID != LANGUAGE_SYSTEM)
1192 meIsLiblangtagNeeded = DECISION_NO; // known locale
1193 else
1195 const KnownTagSet& rKnowns = getKnowns();
1196 if (rKnowns.find( maBcp47) != rKnowns.end())
1197 meIsLiblangtagNeeded = DECISION_NO; // known fallback
1199 // We may have an internal override "canonicalization".
1200 lang::Locale aNew( MsLangId::Conversion::getOverride( maLocale));
1201 if (!aNew.Language.isEmpty() &&
1202 (aNew.Language != maLocale.Language ||
1203 aNew.Country != maLocale.Country ||
1204 aNew.Variant != maLocale.Variant))
1206 maBcp47 = LanguageTagImpl::convertToBcp47( aNew);
1207 bChanged = true;
1208 meIsIsoLocale = DECISION_DONTKNOW;
1209 meIsIsoODF = DECISION_DONTKNOW;
1210 meIsLiblangtagNeeded = DECISION_NO; // known locale
1213 if (bTemporaryLocale)
1215 mbInitializedLocale = false;
1216 maLocale = lang::Locale();
1218 if (bTemporaryLangID)
1220 mbInitializedLangID = false;
1221 mnLangID = LANGUAGE_DONTKNOW;
1224 if (meIsLiblangtagNeeded == DECISION_NO)
1226 meIsValid = DECISION_YES; // really, known must be valid ...
1227 return bChanged; // that's it
1230 meIsLiblangtagNeeded = DECISION_YES;
1231 SAL_INFO( "i18nlangtag", "LanguageTagImpl::canonicalize: using liblangtag for '" << maBcp47 << "'");
1233 if (!mpImplLangtag)
1235 theDataRef().init();
1236 mpImplLangtag = lt_tag_new();
1239 myLtError aError;
1241 if (!lt_tag_parse_disabled && lt_tag_parse(mpImplLangtag, OUStringToOString(maBcp47, RTL_TEXTENCODING_UTF8).getStr(), &aError.p))
1243 if (aError.p)
1245 SAL_WARN("i18nlangtag", "LanguageTagImpl::canonicalize: could not parse '" << maBcp47 << "'");
1247 else
1249 char* pTag = lt_tag_canonicalize(mpImplLangtag, &aError.p);
1250 SAL_WARN_IF(!pTag, "i18nlangtag", "LanguageTagImpl::canonicalize: could not canonicalize '" << maBcp47 << "'");
1251 if (pTag)
1253 OUString aNew(OUString::createFromAscii(pTag));
1254 // Make the lt_tag_t follow the new string if different, which
1255 // removes default script and such.
1256 if (maBcp47 != aNew)
1258 maBcp47 = aNew;
1259 bChanged = true;
1260 meIsIsoLocale = DECISION_DONTKNOW;
1261 meIsIsoODF = DECISION_DONTKNOW;
1262 if (!lt_tag_parse(mpImplLangtag, pTag, &aError.p))
1264 SAL_WARN("i18nlangtag", "LanguageTagImpl::canonicalize: could not reparse '"
1265 << maBcp47 << "'");
1266 free(pTag);
1267 meIsValid = DECISION_NO;
1268 return bChanged;
1271 free(pTag);
1272 meIsValid = DECISION_YES;
1273 return bChanged;
1277 else
1279 SAL_INFO( "i18nlangtag", "LanguageTagImpl::canonicalize: could not parse '" << maBcp47 << "'");
1281 meIsValid = DECISION_NO;
1282 return bChanged;
1286 bool LanguageTagImpl::synCanonicalize()
1288 bool bChanged = false;
1289 if (meIsLiblangtagNeeded != DECISION_NO && !mpImplLangtag)
1291 bChanged = canonicalize();
1292 if (bChanged)
1294 if (mbInitializedLocale)
1295 convertBcp47ToLocale();
1296 if (mbInitializedLangID)
1297 convertBcp47ToLang();
1300 return bChanged;
1304 void LanguageTag::syncFromImpl()
1306 LanguageTagImpl* pImpl = getImpl();
1307 bool bRegister = ((mbInitializedBcp47 && maBcp47 != pImpl->maBcp47) ||
1308 (mbInitializedLangID && mnLangID != pImpl->mnLangID));
1309 SAL_INFO_IF( bRegister, "i18nlangtag",
1310 "LanguageTag::syncFromImpl: re-registering, '" << pImpl->maBcp47 << "' vs '" << maBcp47 <<
1311 " and 0x" << ::std::hex << pImpl->mnLangID << " vs 0x" << ::std::hex << mnLangID);
1312 syncVarsFromRawImpl();
1313 if (bRegister)
1314 mpImpl = registerImpl();
1318 void LanguageTag::syncVarsFromImpl() const
1320 if (!mpImpl)
1321 getImpl(); // with side effect syncVarsFromRawImpl()
1322 else
1323 syncVarsFromRawImpl();
1327 void LanguageTag::syncVarsFromRawImpl() const
1329 // Do not use getImpl() here.
1330 LanguageTagImpl* pImpl = mpImpl.get();
1331 if (!pImpl)
1332 return;
1334 // Obviously only mutable variables.
1335 mbInitializedBcp47 = pImpl->mbInitializedBcp47;
1336 maBcp47 = pImpl->maBcp47;
1337 mbInitializedLocale = pImpl->mbInitializedLocale;
1338 maLocale = pImpl->maLocale;
1339 mbInitializedLangID = pImpl->mbInitializedLangID;
1340 mnLangID = pImpl->mnLangID;
1344 bool LanguageTag::synCanonicalize()
1346 bool bChanged = getImpl()->synCanonicalize();
1347 if (bChanged)
1348 syncFromImpl();
1349 return bChanged;
1353 void LanguageTagImpl::convertLocaleToBcp47()
1355 if (mbSystemLocale && !mbInitializedLocale)
1356 convertLangToLocale();
1358 if (maLocale.Language.isEmpty())
1360 // Do not call LanguageTag::convertToBcp47(Locale) that for an empty
1361 // locale via LanguageTag::convertToBcp47(LanguageType) and
1362 // LanguageTag::convertToLocale(LanguageType) would instantiate another
1363 // LanguageTag.
1364 maLocale = MsLangId::Conversion::convertLanguageToLocale( LANGUAGE_SYSTEM, false);
1366 if (maLocale.Language.isEmpty())
1368 maBcp47.clear(); // bad luck
1370 else if (maLocale.Language == I18NLANGTAG_QLT)
1372 maBcp47 = maLocale.Variant;
1373 meIsIsoLocale = DECISION_NO;
1375 else
1377 maBcp47 = LanguageTag::convertToBcp47( maLocale );
1379 mbInitializedBcp47 = true;
1383 bool LanguageTagImpl::convertLocaleToLang( bool bAllowOnTheFlyID )
1385 bool bRemapped = false;
1386 if (mbSystemLocale)
1388 mnLangID = MsLangId::getRealLanguage( LANGUAGE_SYSTEM);
1390 else
1392 mnLangID = MsLangId::Conversion::convertLocaleToLanguage( maLocale);
1393 if (mnLangID == LANGUAGE_DONTKNOW)
1395 // convertLocaleToLanguage() only searches in ISO and private
1396 // definitions, search in remaining definitions, i.e. for the "C"
1397 // locale and non-standard things like "sr-latin" or "german" to
1398 // resolve to a known locale, skipping ISO lll-CC that were already
1399 // searched.
1400 mnLangID = MsLangId::Conversion::convertIsoNamesToLanguage( maLocale.Language, maLocale.Country, true);
1401 if (mnLangID != LANGUAGE_DONTKNOW)
1403 // If one found, convert back and adapt Locale and Bcp47
1404 // strings so we have a matching entry.
1405 OUString aOrgBcp47( maBcp47);
1406 convertLangToLocale();
1407 convertLocaleToBcp47();
1408 bRemapped = (maBcp47 != aOrgBcp47);
1411 if (mnLangID == LANGUAGE_DONTKNOW && bAllowOnTheFlyID)
1413 if (isValidBcp47())
1415 // For language-only (including script) look if we know some
1416 // locale of that language and if so try to use the primary
1417 // language ID of that instead of generating an on-the-fly ID.
1418 if (getCountry().isEmpty() && isIsoODF())
1420 lang::Locale aLoc( MsLangId::Conversion::lookupFallbackLocale( maLocale));
1421 // 'en-US' is last resort, do not use except when looking
1422 // for 'en'.
1423 if (aLoc.Language != "en" || getLanguage() == "en")
1425 mnLangID = MsLangId::Conversion::convertLocaleToLanguage( aLoc);
1426 if (mnLangID != LANGUAGE_DONTKNOW)
1427 mnLangID = MsLangId::getPrimaryLanguage( mnLangID);
1430 registerOnTheFly( mnLangID);
1432 else
1434 SAL_WARN( "i18nlangtag", "LanguageTagImpl::convertLocaleToLang: with bAllowOnTheFlyID invalid '"
1435 << maBcp47 << "'");
1439 mbInitializedLangID = true;
1440 return bRemapped;
1444 void LanguageTag::convertLocaleToLang()
1446 getImpl()->convertLocaleToLang( true);
1447 syncFromImpl();
1451 void LanguageTagImpl::convertBcp47ToLocale()
1453 bool bIso = isIsoLocale();
1454 if (bIso)
1456 maLocale.Language = getLanguageFromLangtag();
1457 maLocale.Country = getRegionFromLangtag();
1458 maLocale.Variant.clear();
1460 else
1462 maLocale.Language = I18NLANGTAG_QLT;
1463 maLocale.Country = getCountry();
1464 maLocale.Variant = maBcp47;
1466 mbInitializedLocale = true;
1470 void LanguageTag::convertBcp47ToLocale()
1472 getImpl()->convertBcp47ToLocale();
1473 syncFromImpl();
1477 void LanguageTagImpl::convertBcp47ToLang()
1479 if (mbSystemLocale)
1481 mnLangID = MsLangId::getRealLanguage( LANGUAGE_SYSTEM);
1483 else
1485 if (!mbInitializedLocale)
1486 convertBcp47ToLocale();
1487 convertLocaleToLang( true);
1489 mbInitializedLangID = true;
1493 void LanguageTag::convertBcp47ToLang()
1495 getImpl()->convertBcp47ToLang();
1496 syncFromImpl();
1500 void LanguageTagImpl::convertLangToLocale()
1502 if (mbSystemLocale && !mbInitializedLangID)
1504 mnLangID = MsLangId::getRealLanguage( LANGUAGE_SYSTEM);
1505 mbInitializedLangID = true;
1507 // Resolve system here! The original is remembered as mbSystemLocale.
1508 maLocale = MsLangId::Conversion::convertLanguageToLocale( mnLangID, false);
1509 mbInitializedLocale = true;
1513 void LanguageTag::convertLangToLocale()
1515 getImpl()->convertLangToLocale();
1516 syncFromImpl();
1520 void LanguageTagImpl::convertLangToBcp47()
1522 if (!mbInitializedLocale)
1523 convertLangToLocale();
1524 convertLocaleToBcp47();
1525 mbInitializedBcp47 = true;
1529 void LanguageTag::convertFromRtlLocale()
1531 // The rtl_Locale follows the Open Group Base Specification,
1532 // 8.2 Internationalization Variables
1533 // language[_territory][.codeset][@modifier]
1534 // On GNU/Linux systems usually being glibc locales.
1535 // sal/osl/unx/nlsupport.c _parse_locale() parses them into
1536 // Language: language 2 or 3 alpha code
1537 // Country: [territory] 2 alpha code
1538 // Variant: [.codeset][@modifier]
1539 // Variant effectively contains anything that follows the territory, not
1540 // looking for '.' dot delimiter or '@' modifier content.
1541 if (maLocale.Variant.isEmpty())
1542 return;
1544 OString aStr = OUStringToOString(maLocale.Language, RTL_TEXTENCODING_UTF8) + "_" + OUStringToOString(Concat2View(maLocale.Country + maLocale.Variant),
1545 RTL_TEXTENCODING_UTF8);
1546 /* FIXME: let liblangtag parse this entirely with
1547 * lt_tag_convert_from_locale() but that needs a patch to pass the
1548 * string. */
1549 #if 0
1550 myLtError aError;
1551 theDataRef::get().init();
1552 mpImplLangtag = lt_tag_convert_from_locale( aStr.getStr(), &aError.p);
1553 maBcp47 = OStringToOUString( lt_tag_get_string( mpImplLangtag), RTL_TEXTENCODING_UTF8);
1554 mbInitializedBcp47 = true;
1555 #else
1556 mnLangID = MsLangId::convertUnxByteStringToLanguage( aStr);
1557 if (mnLangID == LANGUAGE_DONTKNOW)
1559 SAL_WARN( "i18nlangtag", "LanguageTag(rtl_Locale) - unknown: " << aStr);
1560 mnLangID = LANGUAGE_ENGLISH_US; // we need _something_ here
1562 mbInitializedLangID = true;
1563 #endif
1564 maLocale = lang::Locale();
1565 mbInitializedLocale = false;
1569 const OUString & LanguageTagImpl::getBcp47() const
1571 if (!mbInitializedBcp47)
1573 if (mbInitializedLocale)
1574 const_cast<LanguageTagImpl*>(this)->convertLocaleToBcp47();
1575 else
1576 const_cast<LanguageTagImpl*>(this)->convertLangToBcp47();
1578 return maBcp47;
1582 const OUString & LanguageTag::getBcp47( bool bResolveSystem ) const
1584 static const OUString theEmptyBcp47 = u"";
1586 if (!bResolveSystem && mbSystemLocale)
1587 return theEmptyBcp47;
1588 if (!mbInitializedBcp47)
1589 syncVarsFromImpl();
1590 if (!mbInitializedBcp47)
1592 getImpl()->getBcp47();
1593 const_cast<LanguageTag*>(this)->syncFromImpl();
1595 return maBcp47;
1599 OUString LanguageTagImpl::getLanguageFromLangtag()
1601 OUString aLanguage;
1602 synCanonicalize();
1603 if (maBcp47.isEmpty())
1604 return aLanguage;
1605 if (mpImplLangtag)
1607 const lt_lang_t* pLangT = lt_tag_get_language( mpImplLangtag);
1608 SAL_WARN_IF( !pLangT, "i18nlangtag",
1609 "LanguageTag::getLanguageFromLangtag: pLangT==NULL for '" << maBcp47 << "'");
1610 if (!pLangT)
1611 return aLanguage;
1612 const char* pLang = lt_lang_get_tag( pLangT);
1613 SAL_WARN_IF( !pLang, "i18nlangtag",
1614 "LanguageTag::getLanguageFromLangtag: pLang==NULL for '" << maBcp47 << "'");
1615 if (pLang)
1616 aLanguage = OUString::createFromAscii( pLang);
1618 else
1620 if (mbCachedLanguage || cacheSimpleLSCV())
1621 aLanguage = maCachedLanguage;
1623 return aLanguage;
1627 OUString LanguageTagImpl::getScriptFromLangtag()
1629 OUString aScript;
1630 synCanonicalize();
1631 if (maBcp47.isEmpty())
1632 return aScript;
1633 if (mpImplLangtag)
1635 const lt_script_t* pScriptT = lt_tag_get_script( mpImplLangtag);
1636 // pScriptT==NULL is valid for default scripts
1637 if (!pScriptT)
1638 return aScript;
1639 const char* pScript = lt_script_get_tag( pScriptT);
1640 SAL_WARN_IF( !pScript, "i18nlangtag", "LanguageTag::getScriptFromLangtag: pScript==NULL");
1641 if (pScript)
1642 aScript = OUString::createFromAscii( pScript);
1644 else
1646 if (mbCachedScript || cacheSimpleLSCV())
1647 aScript = maCachedScript;
1649 return aScript;
1653 OUString LanguageTagImpl::getRegionFromLangtag()
1655 OUString aRegion;
1656 synCanonicalize();
1657 if (maBcp47.isEmpty())
1658 return aRegion;
1659 if (mpImplLangtag)
1661 const lt_region_t* pRegionT = lt_tag_get_region( mpImplLangtag);
1662 // pRegionT==NULL is valid for language only tags, rough check here
1663 // that does not take sophisticated tags into account that actually
1664 // should have a region, check for ll, lll, ll-Ssss and lll-Ssss so
1665 // that ll-CC and lll-CC actually fail.
1666 SAL_WARN_IF( !pRegionT &&
1667 maBcp47.getLength() != 2 && maBcp47.getLength() != 3 &&
1668 maBcp47.getLength() != 7 && maBcp47.getLength() != 8,
1669 "i18nlangtag", "LanguageTag::getRegionFromLangtag: pRegionT==NULL for '" << maBcp47 << "'");
1670 if (!pRegionT)
1671 return aRegion;
1672 const char* pRegion = lt_region_get_tag( pRegionT);
1673 SAL_WARN_IF( !pRegion, "i18nlangtag",
1674 "LanguageTag::getRegionFromLangtag: pRegion==NULL for'" << maBcp47 << "'");
1675 if (pRegion)
1676 aRegion = OUString::createFromAscii( pRegion);
1678 else
1680 if (mbCachedCountry || cacheSimpleLSCV())
1681 aRegion = maCachedCountry;
1683 return aRegion;
1687 OUString LanguageTagImpl::getVariantsFromLangtag()
1689 OUStringBuffer aVariants;
1690 synCanonicalize();
1691 if (maBcp47.isEmpty())
1692 return OUString();
1693 if (mpImplLangtag)
1695 const lt_list_t* pVariantsT = lt_tag_get_variants( mpImplLangtag);
1696 for (const lt_list_t* pE = pVariantsT; pE; pE = lt_list_next( pE))
1698 const lt_variant_t* pVariantT = static_cast<const lt_variant_t*>(lt_list_value( pE));
1699 if (pVariantT)
1701 const char* p = lt_variant_get_tag( pVariantT);
1702 if (p)
1704 if (!aVariants.isEmpty())
1705 aVariants.append("-");
1706 aVariants.appendAscii(p);
1711 else
1713 if (mbCachedVariants || cacheSimpleLSCV())
1714 aVariants = maCachedVariants;
1716 return aVariants.makeStringAndClear();
1720 const css::lang::Locale & LanguageTag::getLocale( bool bResolveSystem ) const
1722 // "static" to be returned as const reference to an empty locale.
1723 static lang::Locale theEmptyLocale;
1725 if (!bResolveSystem && mbSystemLocale)
1726 return theEmptyLocale;
1727 if (!mbInitializedLocale)
1728 syncVarsFromImpl();
1729 if (!mbInitializedLocale)
1731 if (mbInitializedBcp47)
1732 const_cast<LanguageTag*>(this)->convertBcp47ToLocale();
1733 else
1734 const_cast<LanguageTag*>(this)->convertLangToLocale();
1736 return maLocale;
1740 LanguageType LanguageTag::getLanguageType( bool bResolveSystem ) const
1742 if (!bResolveSystem && mbSystemLocale)
1743 return LANGUAGE_SYSTEM;
1744 if (!mbInitializedLangID)
1745 syncVarsFromImpl();
1746 if (!mbInitializedLangID)
1748 if (mbInitializedBcp47)
1749 const_cast<LanguageTag*>(this)->convertBcp47ToLang();
1750 else
1752 const_cast<LanguageTag*>(this)->convertLocaleToLang();
1754 /* Resolve a locale only unknown due to some redundant information,
1755 * like 'de-Latn-DE' with script tag. Never call canonicalize()
1756 * from within convert...() methods due to possible recursion, so
1757 * do it here. */
1758 if ((!mbSystemLocale && mnLangID == LANGUAGE_SYSTEM) || mnLangID == LANGUAGE_DONTKNOW)
1759 const_cast<LanguageTag*>(this)->synCanonicalize();
1762 return mnLangID;
1766 void LanguageTag::getIsoLanguageScriptCountry( OUString& rLanguage, OUString& rScript, OUString& rCountry ) const
1768 // Calling isIsoODF() first is a predicate for getLanguage(), getScript()
1769 // and getCountry() to work correctly in this context.
1770 if (isIsoODF())
1772 rLanguage = getLanguage();
1773 rScript = getScript();
1774 rCountry = getCountry();
1776 else
1778 rLanguage = (LanguageTag::isIsoLanguage( getLanguage()) ? getLanguage() : OUString());
1779 rScript = (LanguageTag::isIsoScript( getScript()) ? getScript() : OUString());
1780 rCountry = (LanguageTag::isIsoCountry( getCountry()) ? getCountry() : OUString());
1785 namespace
1788 bool isLowerAscii( sal_Unicode c )
1790 return 'a' <= c && c <= 'z';
1793 bool isUpperAscii( sal_Unicode c )
1795 return 'A' <= c && c <= 'Z';
1801 // static
1802 bool LanguageTag::isIsoLanguage( const OUString& rLanguage )
1804 /* TODO: ignore case? For now let's see where rubbish is used. */
1805 bool b2chars = rLanguage.getLength() == 2;
1806 if ((b2chars || rLanguage.getLength() == 3) &&
1807 isLowerAscii( rLanguage[0]) && isLowerAscii( rLanguage[1]) &&
1808 (b2chars || isLowerAscii( rLanguage[2])))
1809 return true;
1810 SAL_WARN_IF( ((rLanguage.getLength() == 2 || rLanguage.getLength() == 3) &&
1811 (isUpperAscii( rLanguage[0]) || isUpperAscii( rLanguage[1]))) ||
1812 (rLanguage.getLength() == 3 && isUpperAscii( rLanguage[2])), "i18nlangtag",
1813 "LanguageTag::isIsoLanguage: rejecting upper case " << rLanguage);
1814 return false;
1818 // static
1819 bool LanguageTag::isIsoCountry( const OUString& rRegion )
1821 /* TODO: ignore case? For now let's see where rubbish is used. */
1822 if (rRegion.isEmpty() ||
1823 (rRegion.getLength() == 2 && isUpperAscii( rRegion[0]) && isUpperAscii( rRegion[1])))
1824 return true;
1825 SAL_WARN_IF( rRegion.getLength() == 2 && (isLowerAscii( rRegion[0]) || isLowerAscii( rRegion[1])),
1826 "i18nlangtag", "LanguageTag::isIsoCountry: rejecting lower case " << rRegion);
1827 return false;
1831 // static
1832 bool LanguageTag::isIsoScript( const OUString& rScript )
1834 /* TODO: ignore case? For now let's see where rubbish is used. */
1835 if (rScript.isEmpty() ||
1836 (rScript.getLength() == 4 &&
1837 isUpperAscii( rScript[0]) && isLowerAscii( rScript[1]) &&
1838 isLowerAscii( rScript[2]) && isLowerAscii( rScript[3])))
1839 return true;
1840 SAL_WARN_IF( rScript.getLength() == 4 &&
1841 (isLowerAscii( rScript[0]) || isUpperAscii( rScript[1]) ||
1842 isUpperAscii( rScript[2]) || isUpperAscii( rScript[3])),
1843 "i18nlangtag", "LanguageTag::isIsoScript: rejecting case mismatch " << rScript);
1844 return false;
1848 OUString const & LanguageTagImpl::getLanguage() const
1850 if (!mbCachedLanguage)
1852 maCachedLanguage = const_cast<LanguageTagImpl*>(this)->getLanguageFromLangtag();
1853 mbCachedLanguage = true;
1855 return maCachedLanguage;
1859 OUString LanguageTag::getLanguage() const
1861 LanguageTagImpl const* pImpl = getImpl();
1862 if (pImpl->mbCachedLanguage)
1863 return pImpl->maCachedLanguage;
1864 OUString aRet( pImpl->getLanguage());
1865 const_cast<LanguageTag*>(this)->syncFromImpl();
1866 return aRet;
1870 OUString const & LanguageTagImpl::getScript() const
1872 if (!mbCachedScript)
1874 maCachedScript = const_cast<LanguageTagImpl*>(this)->getScriptFromLangtag();
1875 mbCachedScript = true;
1877 return maCachedScript;
1881 OUString LanguageTag::getScript() const
1883 LanguageTagImpl const* pImpl = getImpl();
1884 if (pImpl->mbCachedScript)
1885 return pImpl->maCachedScript;
1886 OUString aRet( pImpl->getScript());
1887 const_cast<LanguageTag*>(this)->syncFromImpl();
1888 return aRet;
1892 OUString LanguageTag::getLanguageAndScript() const
1894 OUString aLanguageScript( getLanguage());
1895 OUString aScript( getScript());
1896 if (!aScript.isEmpty())
1898 aLanguageScript += "-" + aScript;
1900 return aLanguageScript;
1904 OUString const & LanguageTagImpl::getCountry() const
1906 if (!mbCachedCountry)
1908 maCachedCountry = const_cast<LanguageTagImpl*>(this)->getRegionFromLangtag();
1909 if (!LanguageTag::isIsoCountry( maCachedCountry))
1910 maCachedCountry.clear();
1911 mbCachedCountry = true;
1913 return maCachedCountry;
1917 OUString LanguageTag::getCountry() const
1919 LanguageTagImpl const* pImpl = getImpl();
1920 if (pImpl->mbCachedCountry)
1921 return pImpl->maCachedCountry;
1922 OUString aRet( pImpl->getCountry());
1923 const_cast<LanguageTag*>(this)->syncFromImpl();
1924 return aRet;
1928 OUString LanguageTagImpl::getRegion() const
1930 return const_cast<LanguageTagImpl*>(this)->getRegionFromLangtag();
1934 OUString const & LanguageTagImpl::getVariants() const
1936 if (!mbCachedVariants)
1938 maCachedVariants = const_cast<LanguageTagImpl*>(this)->getVariantsFromLangtag();
1939 mbCachedVariants = true;
1941 return maCachedVariants;
1945 OUString LanguageTag::getVariants() const
1947 LanguageTagImpl const * pImpl = getImpl();
1948 if (pImpl->mbCachedVariants)
1949 return pImpl->maCachedVariants;
1950 OUString aRet( pImpl->getVariants());
1951 const_cast<LanguageTag*>(this)->syncFromImpl();
1952 return aRet;
1955 OUString const & LanguageTagImpl::getGlibcLocaleString() const
1957 if (mbCachedGlibcString)
1958 return maCachedGlibcString;
1960 if (!mpImplLangtag)
1962 meIsLiblangtagNeeded = DECISION_YES;
1963 const_cast<LanguageTagImpl*>(this)->synCanonicalize();
1965 if (mpImplLangtag)
1967 char* pLang = lt_tag_convert_to_locale(mpImplLangtag, nullptr);
1968 if (pLang)
1970 maCachedGlibcString = OUString::createFromAscii( pLang);
1971 mbCachedGlibcString = true;
1972 free(pLang);
1975 return maCachedGlibcString;
1978 OUString LanguageTag::getGlibcLocaleString( std::u16string_view rEncoding ) const
1980 OUString aRet;
1981 if (isIsoLocale())
1983 OUString aCountry( getCountry());
1984 if (aCountry.isEmpty())
1985 aRet = getLanguage() + rEncoding;
1986 else
1987 aRet = getLanguage() + "_" + aCountry + rEncoding;
1989 else
1991 aRet = getImpl()->getGlibcLocaleString();
1992 sal_Int32 nAt = aRet.indexOf('@');
1993 if (nAt != -1)
1994 aRet = OUString::Concat(aRet.subView(0, nAt)) + rEncoding + aRet.subView(nAt);
1995 else
1996 aRet += rEncoding;
1998 return aRet;
2001 bool LanguageTagImpl::hasScript() const
2003 if (!mbCachedScript)
2004 getScript();
2005 return !maCachedScript.isEmpty();
2009 bool LanguageTag::hasScript() const
2011 bool bRet = getImpl()->hasScript();
2012 const_cast<LanguageTag*>(this)->syncFromImpl();
2013 return bRet;
2017 LanguageTag::ScriptType LanguageTagImpl::getScriptType() const
2019 return meScriptType;
2023 LanguageTag::ScriptType LanguageTag::getScriptType() const
2025 return getImpl()->getScriptType();
2029 void LanguageTagImpl::setScriptType(LanguageTag::ScriptType st)
2031 if (meScriptType == LanguageTag::ScriptType::UNKNOWN) // poor man's clash resolution
2032 meScriptType = st;
2036 void LanguageTag::setScriptType(LanguageTag::ScriptType st)
2038 getImpl()->setScriptType(st);
2042 bool LanguageTagImpl::cacheSimpleLSCV()
2044 OUString aLanguage, aScript, aCountry, aRegion, aVariants;
2045 Extraction eExt = simpleExtract( maBcp47, aLanguage, aScript, aCountry, aRegion, aVariants);
2046 bool bRet = (eExt == EXTRACTED_LSC || eExt == EXTRACTED_LV || eExt == EXTRACTED_LR);
2047 if (bRet)
2049 maCachedLanguage = aLanguage;
2050 maCachedScript = aScript;
2051 maCachedCountry = aCountry;
2052 maCachedVariants = aVariants;
2053 mbCachedLanguage = mbCachedScript = mbCachedCountry = mbCachedVariants = true;
2055 return bRet;
2059 bool LanguageTagImpl::isIsoLocale() const
2061 if (meIsIsoLocale == DECISION_DONTKNOW)
2063 const_cast<LanguageTagImpl*>(this)->synCanonicalize();
2064 // It must be at most ll-CC or lll-CC
2065 // Do not use getCountry() here, use getRegion() instead.
2066 meIsIsoLocale = ((maBcp47.isEmpty() ||
2067 (maBcp47.getLength() <= 6 && LanguageTag::isIsoLanguage( getLanguage()) &&
2068 LanguageTag::isIsoCountry( getRegion()))) ? DECISION_YES : DECISION_NO);
2070 return meIsIsoLocale == DECISION_YES;
2074 bool LanguageTag::isIsoLocale() const
2076 bool bRet = getImpl()->isIsoLocale();
2077 const_cast<LanguageTag*>(this)->syncFromImpl();
2078 return bRet;
2082 bool LanguageTagImpl::isIsoODF() const
2084 if (meIsIsoODF == DECISION_DONTKNOW)
2086 const_cast<LanguageTagImpl*>(this)->synCanonicalize();
2087 if (!LanguageTag::isIsoScript( getScript()))
2089 meIsIsoODF = DECISION_NO;
2090 return false;
2092 // The usual case is lll-CC so simply check that first.
2093 if (isIsoLocale())
2095 meIsIsoODF = DECISION_YES;
2096 return true;
2098 // If this is not ISO locale for which script must not exist it can
2099 // still be ISO locale plus ISO script lll-Ssss-CC, but not ll-vvvv ...
2100 // ll-vvvvvvvv
2101 meIsIsoODF = ((maBcp47.getLength() <= 11 && LanguageTag::isIsoLanguage( getLanguage()) &&
2102 LanguageTag::isIsoCountry( getRegion()) && LanguageTag::isIsoScript( getScript()) &&
2103 getVariants().isEmpty()) ? DECISION_YES : DECISION_NO);
2105 return meIsIsoODF == DECISION_YES;
2109 bool LanguageTag::isIsoODF() const
2111 bool bRet = getImpl()->isIsoODF();
2112 const_cast<LanguageTag*>(this)->syncFromImpl();
2113 return bRet;
2117 bool LanguageTagImpl::isValidBcp47() const
2119 if (meIsValid == DECISION_DONTKNOW)
2121 const_cast<LanguageTagImpl*>(this)->synCanonicalize();
2122 SAL_WARN_IF( meIsValid == DECISION_DONTKNOW, "i18nlangtag",
2123 "LanguageTag::isValidBcp47: canonicalize() didn't set meIsValid");
2125 return meIsValid == DECISION_YES;
2129 bool LanguageTag::isValidBcp47() const
2131 bool bRet = getImpl()->isValidBcp47();
2132 const_cast<LanguageTag*>(this)->syncFromImpl();
2133 return bRet;
2137 LanguageTag & LanguageTag::makeFallback()
2139 if (!mbIsFallback)
2141 const lang::Locale& rLocale1 = getLocale();
2142 lang::Locale aLocale2( MsLangId::Conversion::lookupFallbackLocale( rLocale1));
2143 if ( rLocale1.Language != aLocale2.Language ||
2144 rLocale1.Country != aLocale2.Country ||
2145 rLocale1.Variant != aLocale2.Variant)
2147 if (rLocale1.Language != "en" && aLocale2.Language == "en" && aLocale2.Country == "US")
2149 // "en-US" is the last resort fallback, try if we get a better
2150 // one for the fallback hierarchy of a non-"en" locale.
2151 ::std::vector< OUString > aFallbacks( getFallbackStrings( false));
2152 for (auto const& fallback : aFallbacks)
2154 lang::Locale aLocale3( LanguageTag(fallback).getLocale());
2155 aLocale2 = MsLangId::Conversion::lookupFallbackLocale( aLocale3);
2156 if (aLocale2.Language != "en" || aLocale2.Country != "US")
2157 break; // for, success
2160 SAL_INFO( "i18nlangtag", "LanguageTag::makeFallback - for (" <<
2161 rLocale1.Language << "," << rLocale1.Country << "," << rLocale1.Variant << ") to (" <<
2162 aLocale2.Language << "," << aLocale2.Country << "," << aLocale2.Variant << ")");
2163 reset( aLocale2);
2165 mbIsFallback = true;
2167 return *this;
2171 /* TODO: maybe this now could take advantage of the mnOverride field in
2172 * isolang.cxx entries and search for kSAME instead of hardcoded special
2173 * fallbacks. Though iterating through those tables would be slower and even
2174 * then there would be some special cases, but we wouldn't lack entries that
2175 * were missed out. */
2176 ::std::vector< OUString > LanguageTag::getFallbackStrings( bool bIncludeFullBcp47 ) const
2178 ::std::vector< OUString > aVec;
2179 OUString aLanguage( getLanguage());
2180 OUString aCountry( getCountry());
2181 if (isIsoLocale())
2183 if (!aCountry.isEmpty())
2185 if (bIncludeFullBcp47)
2186 aVec.emplace_back(aLanguage + "-" + aCountry);
2187 if (aLanguage == "zh")
2189 // For zh-HK or zh-MO also list zh-TW to get zh-Hant, for all
2190 // other zh-XX also list zh-CN to get zh-Hans; both of which we
2191 // use the legacy forms instead of the more correct script
2192 // tags that unfortunately most pieces don't understand.
2193 if (aCountry == "HK" || aCountry == "MO")
2194 aVec.emplace_back(aLanguage + "-TW");
2195 else if (aCountry != "CN")
2196 aVec.emplace_back(aLanguage + "-CN");
2197 aVec.push_back( aLanguage);
2199 else if (aLanguage == "sh")
2201 // Manual list instead of calling
2202 // LanguageTag( "sr-Latn-" + aCountry).getFallbackStrings( true)
2203 // that would also include "sh-*" again.
2204 aVec.emplace_back("sr-Latn-" + aCountry);
2205 aVec.emplace_back("sr-Latn");
2206 aVec.emplace_back("sh"); // legacy with script, before default script with country
2207 aVec.emplace_back("sr-" + aCountry);
2208 aVec.emplace_back("sr");
2210 else if (aLanguage == "ca" && aCountry == "XV")
2212 ::std::vector< OUString > aRep( LanguageTag( "ca-ES-valencia").getFallbackStrings( true));
2213 aVec.insert( aVec.end(), aRep.begin(), aRep.end());
2214 // Already includes 'ca' language fallback.
2216 else if (aLanguage == "ku")
2218 if (aCountry == "TR" || aCountry == "SY")
2220 aVec.emplace_back("kmr-Latn-" + aCountry);
2221 aVec.emplace_back("kmr-" + aCountry);
2222 aVec.emplace_back("kmr-Latn");
2223 aVec.emplace_back("kmr");
2224 aVec.push_back( aLanguage);
2226 else if (aCountry == "IQ" || aCountry == "IR")
2228 aVec.emplace_back("ckb-" + aCountry);
2229 aVec.emplace_back("ckb");
2232 else if (aLanguage == "kmr" && (aCountry == "TR" || aCountry == "SY"))
2234 aVec.emplace_back("ku-Latn-" + aCountry);
2235 aVec.emplace_back("ku-" + aCountry);
2236 aVec.push_back( aLanguage);
2237 aVec.emplace_back("ku");
2239 else if (aLanguage == "ckb" && (aCountry == "IQ" || aCountry == "IR"))
2241 aVec.emplace_back("ku-Arab-" + aCountry);
2242 aVec.emplace_back("ku-" + aCountry);
2243 aVec.push_back( aLanguage);
2244 // not 'ku' only, that was used for Latin script
2246 else
2247 aVec.push_back( aLanguage);
2249 else
2251 if (bIncludeFullBcp47)
2252 aVec.push_back( aLanguage);
2253 if (aLanguage == "sh")
2255 aVec.emplace_back("sr-Latn");
2256 aVec.emplace_back("sr");
2258 else if (aLanguage == "pli")
2260 // a special case for Pali dictionary, see fdo#41599
2261 aVec.emplace_back("pi-Latn");
2262 aVec.emplace_back("pi");
2265 return aVec;
2268 getBcp47(); // have maBcp47 now
2269 if (bIncludeFullBcp47)
2270 aVec.push_back( maBcp47);
2272 // Special cases for deprecated tags and their replacements, include both
2273 // in fallbacks in a sensible order.
2274 /* TODO: could such things be generalized and automated with liblangtag? */
2275 if (maBcp47 == "en-GB-oed")
2276 aVec.emplace_back("en-GB-oxendict");
2277 else if (maBcp47 == "en-GB-oxendict")
2278 aVec.emplace_back("en-GB-oed");
2280 OUString aVariants( getVariants());
2281 OUString aTmp;
2282 if (hasScript())
2284 OUString aScript = getScript();
2285 bool bHaveLanguageScriptVariant = false;
2286 if (!aCountry.isEmpty())
2288 if (!aVariants.isEmpty())
2290 aTmp = aLanguage + "-" + aScript + "-" + aCountry + "-" + aVariants;
2291 if (aTmp != maBcp47)
2292 aVec.push_back( aTmp);
2293 // Language with variant but without country before language
2294 // without variant but with country.
2295 aTmp = aLanguage + "-" + aScript + "-" + aVariants;
2296 if (aTmp != maBcp47)
2297 aVec.push_back( aTmp);
2298 bHaveLanguageScriptVariant = true;
2300 aTmp = aLanguage + "-" + aScript + "-" + aCountry;
2301 if (aTmp != maBcp47)
2302 aVec.push_back( aTmp);
2303 if (aLanguage == "sr" && aScript == "Latn")
2305 // sr-Latn-CS => sr-Latn-YU, sh-CS, sh-YU
2306 if (aCountry == "CS")
2308 aVec.emplace_back("sr-Latn-YU");
2309 aVec.emplace_back("sh-CS");
2310 aVec.emplace_back("sh-YU");
2312 else
2313 aVec.emplace_back("sh-" + aCountry);
2315 else if (aLanguage == "pi" && aScript == "Latn")
2316 aVec.emplace_back("pli"); // a special case for Pali dictionary, see fdo#41599
2317 else if (aLanguage == "krm" && aScript == "Latn" && (aCountry == "TR" || aCountry == "SY"))
2318 aVec.emplace_back("ku-" + aCountry);
2320 if (!aVariants.isEmpty() && !bHaveLanguageScriptVariant)
2322 aTmp = aLanguage + "-" + aScript + "-" + aVariants;
2323 if (aTmp != maBcp47)
2324 aVec.push_back( aTmp);
2326 aTmp = aLanguage + "-" + aScript;
2327 if (aTmp != maBcp47)
2328 aVec.push_back( aTmp);
2330 // 'sh' actually denoted a script, so have it here instead of appended
2331 // at the end as language-only.
2332 if (aLanguage == "sr" && aScript == "Latn")
2333 aVec.emplace_back("sh");
2334 else if (aLanguage == "ku" && aScript == "Arab")
2335 aVec.emplace_back("ckb");
2336 // 'ku' only denoted Latin script
2337 else if (aLanguage == "krm" && aScript == "Latn" && aCountry.isEmpty())
2338 aVec.emplace_back("ku");
2340 bool bHaveLanguageVariant = false;
2341 if (!aCountry.isEmpty())
2343 if (!aVariants.isEmpty())
2345 aTmp = aLanguage + "-" + aCountry + "-" + aVariants;
2346 if (aTmp != maBcp47)
2347 aVec.push_back( aTmp);
2348 if (maBcp47 == "ca-ES-valencia")
2349 aVec.emplace_back("ca-XV");
2350 // Language with variant but without country before language
2351 // without variant but with country.
2352 // But only if variant is not from a grandfathered tag that
2353 // wouldn't match the rules, i.e. "de-1901" is fine but "en-oed" is
2354 // not.
2355 if (aVariants.getLength() >= 5 ||
2356 (aVariants.getLength() == 4 && '0' <= aVariants[0] && aVariants[0] <= '9'))
2358 aTmp = aLanguage + "-" + aVariants;
2359 if (aTmp != maBcp47)
2360 aVec.push_back( aTmp);
2361 bHaveLanguageVariant = true;
2364 aTmp = aLanguage + "-" + aCountry;
2365 if (aTmp != maBcp47)
2366 aVec.push_back( aTmp);
2368 if (!aVariants.isEmpty() && !bHaveLanguageVariant)
2370 // Only if variant is not from a grandfathered tag that wouldn't match
2371 // the rules, i.e. "de-1901" is fine but "en-oed" is not.
2372 if (aVariants.getLength() >= 5 ||
2373 (aVariants.getLength() == 4 && '0' <= aVariants[0] && aVariants[0] <= '9'))
2375 aTmp = aLanguage + "-" + aVariants;
2376 if (aTmp != maBcp47)
2377 aVec.push_back( aTmp);
2381 // Insert legacy fallbacks with country before language-only, but only
2382 // default script, script was handled already above.
2383 if (!aCountry.isEmpty())
2385 if (aLanguage == "sr" && aCountry == "CS")
2386 aVec.emplace_back("sr-YU");
2389 // Original language-only.
2390 if (!aLanguage.isEmpty() && aLanguage != maBcp47)
2391 aVec.push_back( aLanguage);
2393 return aVec;
2397 OUString LanguageTag::getBcp47MS() const
2399 if (getLanguageType() == LANGUAGE_SPANISH_DATED)
2400 return "es-ES_tradnl";
2401 return getBcp47();
2405 bool LanguageTag::equals( const LanguageTag & rLanguageTag ) const
2407 // If SYSTEM is not to be resolved or either both are SYSTEM or none, we
2408 // can use the operator==() optimization.
2409 if (isSystemLocale() == rLanguageTag.isSystemLocale())
2410 return operator==( rLanguageTag);
2412 // Compare full language tag strings.
2413 return getBcp47() == rLanguageTag.getBcp47();
2417 bool LanguageTag::operator==( const LanguageTag & rLanguageTag ) const
2419 if (isSystemLocale() && rLanguageTag.isSystemLocale())
2420 return true; // both SYSTEM
2422 // No need to convert to BCP47 if both Lang-IDs are available.
2423 if (mbInitializedLangID && rLanguageTag.mbInitializedLangID)
2425 // Equal if same ID and no SYSTEM is involved or both are SYSTEM.
2426 return mnLangID == rLanguageTag.mnLangID && isSystemLocale() == rLanguageTag.isSystemLocale();
2429 // Compare full language tag strings but SYSTEM unresolved.
2430 return getBcp47( false) == rLanguageTag.getBcp47( false);
2434 bool LanguageTag::operator!=( const LanguageTag & rLanguageTag ) const
2436 return !operator==( rLanguageTag);
2440 bool LanguageTag::operator<( const LanguageTag & rLanguageTag ) const
2442 return getBcp47( false).compareToIgnoreAsciiCase( rLanguageTag.getBcp47( false)) < 0;
2446 // static
2447 LanguageTagImpl::Extraction LanguageTagImpl::simpleExtract( const OUString& rBcp47,
2448 OUString& rLanguage, OUString& rScript, OUString& rCountry, OUString& rRegion, OUString& rVariants )
2450 Extraction eRet = EXTRACTED_NONE;
2451 const sal_Int32 nLen = rBcp47.getLength();
2452 const sal_Int32 nHyph1 = rBcp47.indexOf( '-');
2453 sal_Int32 nHyph2 = (nHyph1 < 0 ? -1 : rBcp47.indexOf( '-', nHyph1 + 1));
2454 sal_Int32 nHyph3 = (nHyph2 < 0 ? -1 : rBcp47.indexOf( '-', nHyph2 + 1));
2455 sal_Int32 nHyph4 = (nHyph3 < 0 ? -1 : rBcp47.indexOf( '-', nHyph3 + 1));
2456 if (nLen == 1 && rBcp47[0] == '*') // * the dreaded jolly joker
2458 // It's f*d up but we need to recognize this.
2459 eRet = EXTRACTED_X_JOKER;
2461 else if (nHyph1 == 1 && rBcp47[0] == 'x') // x-... privateuse
2463 // x-... privateuse tags MUST be known to us by definition.
2464 eRet = EXTRACTED_X;
2466 else if (nLen == 1 && rBcp47[0] == 'C') // the 'C' locale
2468 eRet = EXTRACTED_C_LOCALE;
2469 rLanguage = "C";
2470 rScript.clear();
2471 rCountry.clear();
2472 rRegion.clear();
2473 rVariants.clear();
2475 else if (nLen == 2 || nLen == 3) // ll or lll
2477 if (nHyph1 < 0)
2479 rLanguage = rBcp47.toAsciiLowerCase();
2480 rScript.clear();
2481 rCountry.clear();
2482 rRegion.clear();
2483 rVariants.clear();
2484 eRet = EXTRACTED_LSC;
2487 else if ( (nHyph1 == 2 && nLen == 5) // ll-CC
2488 || (nHyph1 == 3 && nLen == 6)) // lll-CC
2490 if (nHyph2 < 0)
2492 rLanguage = rBcp47.copy( 0, nHyph1).toAsciiLowerCase();
2493 rCountry = rBcp47.copy( nHyph1 + 1, 2).toAsciiUpperCase();
2494 rRegion.clear();
2495 rScript.clear();
2496 rVariants.clear();
2497 eRet = EXTRACTED_LSC;
2500 else if ( (nHyph1 == 2 && nLen == 6) // ll-rrr
2501 || (nHyph1 == 3 && nLen == 7)) // lll-rrr
2503 if (nHyph2 < 0)
2505 rLanguage = rBcp47.copy( 0, nHyph1).toAsciiLowerCase();
2506 rCountry.clear();
2507 rRegion = rBcp47.copy( nHyph1 + 1, 3);
2508 rScript.clear();
2509 rVariants.clear();
2510 eRet = EXTRACTED_LR;
2513 else if ( (nHyph1 == 2 && nLen == 7) // ll-Ssss or ll-vvvv
2514 || (nHyph1 == 3 && nLen == 8)) // lll-Ssss or lll-vvvv
2516 if (nHyph2 < 0)
2518 sal_Unicode c = rBcp47[nHyph1+1];
2519 if ('0' <= c && c <= '9')
2521 // (DIGIT 3ALNUM) vvvv variant instead of Ssss script
2522 rLanguage = rBcp47.copy( 0, nHyph1).toAsciiLowerCase();
2523 rScript.clear();
2524 rCountry.clear();
2525 rRegion.clear();
2526 rVariants = rBcp47.copy( nHyph1 + 1);
2527 eRet = EXTRACTED_LV;
2529 else
2531 rLanguage = rBcp47.copy( 0, nHyph1).toAsciiLowerCase();
2532 rScript = rBcp47.copy( nHyph1 + 1, 1).toAsciiUpperCase() +
2533 rBcp47.copy( nHyph1 + 2, 3).toAsciiLowerCase();
2534 rCountry.clear();
2535 rRegion.clear();
2536 rVariants.clear();
2537 eRet = EXTRACTED_LSC;
2541 else if ( (nHyph1 == 2 && nHyph2 == 7 && nLen == 10) // ll-Ssss-CC
2542 || (nHyph1 == 3 && nHyph2 == 8 && nLen == 11)) // lll-Ssss-CC
2544 if (nHyph3 < 0)
2546 rLanguage = rBcp47.copy( 0, nHyph1).toAsciiLowerCase();
2547 rScript = rBcp47.copy( nHyph1 + 1, 1).toAsciiUpperCase() + rBcp47.copy( nHyph1 + 2, 3).toAsciiLowerCase();
2548 rCountry = rBcp47.copy( nHyph2 + 1, 2).toAsciiUpperCase();
2549 rRegion.clear();
2550 rVariants.clear();
2551 eRet = EXTRACTED_LSC;
2554 else if ( (nHyph1 == 2 && nHyph2 == 7 && nLen == 11) // ll-Ssss-rrr
2555 || (nHyph1 == 3 && nHyph2 == 8 && nLen == 12)) // lll-Ssss-rrr
2557 if (nHyph3 < 0)
2559 rLanguage = rBcp47.copy( 0, nHyph1).toAsciiLowerCase();
2560 rScript = rBcp47.copy( nHyph1 + 1, 1).toAsciiUpperCase() + rBcp47.copy( nHyph1 + 2, 3).toAsciiLowerCase();
2561 rCountry.clear();
2562 rRegion = rBcp47.copy( nHyph2 + 1, 3);
2563 rVariants.clear();
2564 eRet = EXTRACTED_LR;
2567 else if ( (nHyph1 == 2 && nHyph2 == 7 && nHyph3 == 10 && nLen >= 15) // ll-Ssss-CC-vvvv[vvvv][-...]
2568 || (nHyph1 == 3 && nHyph2 == 8 && nHyph3 == 11 && nLen >= 16)) // lll-Ssss-CC-vvvv[vvvv][-...]
2570 if (nHyph4 < 0)
2571 nHyph4 = rBcp47.getLength();
2572 if (nHyph4 - nHyph3 > 4 && nHyph4 - nHyph3 <= 9)
2574 rLanguage = rBcp47.copy( 0, nHyph1).toAsciiLowerCase();
2575 rScript = rBcp47.copy( nHyph1 + 1, 1).toAsciiUpperCase() + rBcp47.copy( nHyph1 + 2, 3).toAsciiLowerCase();
2576 rCountry = rBcp47.copy( nHyph2 + 1, 2).toAsciiUpperCase();
2577 rRegion.clear();
2578 rVariants = rBcp47.copy( nHyph3 + 1);
2579 eRet = EXTRACTED_LV;
2582 else if ( (nHyph1 == 2 && nHyph2 == 7 && nHyph3 == 11 && nLen >= 16) // ll-Ssss-rrr-vvvv[vvvv][-...]
2583 || (nHyph1 == 3 && nHyph2 == 8 && nHyph3 == 12 && nLen >= 17)) // lll-Ssss-rrr-vvvv[vvvv][-...]
2585 if (nHyph4 < 0)
2586 nHyph4 = rBcp47.getLength();
2587 if (nHyph4 - nHyph3 > 4 && nHyph4 - nHyph3 <= 9)
2589 rLanguage = rBcp47.copy( 0, nHyph1).toAsciiLowerCase();
2590 rScript = rBcp47.copy( nHyph1 + 1, 1).toAsciiUpperCase() + rBcp47.copy( nHyph1 + 2, 3).toAsciiLowerCase();
2591 rCountry.clear();
2592 rRegion = rBcp47.copy( nHyph2 + 1, 3);
2593 rVariants = rBcp47.copy( nHyph3 + 1);
2594 eRet = EXTRACTED_LR;
2597 else if ( (nHyph1 == 2 && nHyph2 == 5 && nHyph3 == 7) // ll-CC-u-...
2598 || (nHyph1 == 3 && nHyph2 == 6 && nHyph3 == 8)) // lll-CC-u-...
2600 if (rBcp47[nHyph3-1] == 'u')
2602 // Need to recognize as known, otherwise getLanguage() and
2603 // getCountry() return empty string because mpImplLangtag is not
2604 // used with a known mapping.
2605 /* TODO: if there were more this would get ugly and needed some
2606 * table driven approach via isolang.cxx instead. */
2607 if (rBcp47.equalsIgnoreAsciiCase( "es-ES-u-co-trad"))
2609 rLanguage = "es";
2610 rScript.clear();
2611 rCountry = "ES";
2612 rRegion.clear();
2613 rVariants = "u-co-trad"; // not strictly a variant, but used to reconstruct the tag.
2614 eRet = EXTRACTED_LV;
2618 else if ( (nHyph1 == 2 && nHyph2 == 5 && nLen >= 10) // ll-CC-vvvv[vvvv][-...]
2619 || (nHyph1 == 3 && nHyph2 == 6 && nLen >= 11)) // lll-CC-vvvv[vvvv][-...]
2621 if (nHyph3 < 0)
2622 nHyph3 = rBcp47.getLength();
2623 if (nHyph3 - nHyph2 > 4 && nHyph3 - nHyph2 <= 9)
2625 rLanguage = rBcp47.copy( 0, nHyph1).toAsciiLowerCase();
2626 rScript.clear();
2627 rCountry = rBcp47.copy( nHyph1 + 1, 2).toAsciiUpperCase();
2628 rRegion.clear();
2629 rVariants = rBcp47.copy( nHyph2 + 1);
2630 eRet = EXTRACTED_LV;
2633 else if ( (nHyph1 == 2 && nHyph2 == 6 && nLen >= 11) // ll-rrr-vvvv[vvvv][-...]
2634 || (nHyph1 == 3 && nHyph2 == 7 && nLen >= 12)) // lll-rrr-vvvv[vvvv][-...]
2636 if (nHyph3 < 0)
2637 nHyph3 = rBcp47.getLength();
2638 if (nHyph3 - nHyph2 > 4 && nHyph3 - nHyph2 <= 9)
2640 rLanguage = rBcp47.copy( 0, nHyph1).toAsciiLowerCase();
2641 rScript.clear();
2642 rCountry.clear();
2643 rRegion = rBcp47.copy( nHyph1 + 1, 3);
2644 rVariants = rBcp47.copy( nHyph2 + 1);
2645 eRet = EXTRACTED_LR;
2648 else if ( (nHyph1 == 2 && nLen >= 8) // ll-vvvvv[vvv][-...]
2649 || (nHyph1 == 3 && nLen >= 9)) // lll-vvvvv[vvv][-...]
2651 if (nHyph2 < 0)
2652 nHyph2 = rBcp47.getLength();
2653 if (nHyph2 - nHyph1 > 5 && nHyph2 - nHyph1 <= 9)
2655 rLanguage = rBcp47.copy( 0, nHyph1).toAsciiLowerCase();
2656 rScript.clear();
2657 rCountry.clear();
2658 rRegion.clear();
2659 rVariants = rBcp47.copy( nHyph1 + 1);
2660 eRet = EXTRACTED_LV;
2662 else
2664 // Known and handled grandfathered; ugly but effective ...
2665 // Note that nLen must have matched above.
2666 // Strictly not a variant, but so far we treat it as such.
2667 if (rBcp47.equalsIgnoreAsciiCase( "en-GB-oed"))
2669 rLanguage = "en";
2670 rScript.clear();
2671 rCountry = "GB";
2672 rRegion.clear();
2673 rVariants = "oed";
2674 eRet = EXTRACTED_LV;
2676 // Other known and handled odd cases.
2677 else if (rBcp47.equalsIgnoreAsciiCase( "es-ES_tradnl"))
2679 // Will get overridden, but needs to be recognized as known.
2680 rLanguage = "es";
2681 rScript.clear();
2682 rCountry = "ES";
2683 rRegion.clear();
2684 rVariants = "tradnl"; // this is nonsense, but... ignored.
2685 eRet = EXTRACTED_KNOWN_BAD;
2689 if (eRet == EXTRACTED_NONE)
2691 SAL_INFO( "i18nlangtag", "LanguageTagImpl::simpleExtract: did not extract '" << rBcp47 << "'");
2692 rLanguage.clear();
2693 rScript.clear();
2694 rCountry.clear();
2695 rRegion.clear();
2696 rVariants.clear();
2698 else
2700 assert(rLanguage.getLength() == 2 || rLanguage.getLength() == 3
2701 || eRet == EXTRACTED_X_JOKER || eRet == EXTRACTED_X || eRet == EXTRACTED_C_LOCALE);
2702 assert(rScript.isEmpty() || rScript.getLength() == 4);
2703 assert(rCountry.isEmpty() || rRegion.isEmpty()); // [2ALPHA / 3DIGIT]
2704 assert(rCountry.isEmpty() || rCountry.getLength() == 2);
2705 assert(rRegion.isEmpty() || rRegion.getLength() == 3);
2706 assert(rVariants.isEmpty() || rVariants.getLength() >= 4 || rVariants == "oed");
2708 return eRet;
2712 // static
2713 ::std::vector< OUString >::const_iterator LanguageTag::getFallback(
2714 const ::std::vector< OUString > & rList, const OUString & rReference )
2716 if (rList.empty())
2717 return rList.end();
2719 // Try the simple case first without constructing fallbacks.
2720 ::std::vector< OUString >::const_iterator it = std::find(rList.begin(), rList.end(), rReference);
2721 if (it != rList.end())
2722 return it; // exact match
2724 ::std::vector< OUString > aFallbacks( LanguageTag( rReference).getFallbackStrings( false));
2725 if (rReference != "en-US")
2727 aFallbacks.emplace_back("en-US");
2728 if (rReference != "en")
2729 aFallbacks.emplace_back("en");
2731 if (rReference != "x-default")
2732 aFallbacks.emplace_back("x-default");
2733 if (rReference != "x-no-translate")
2734 aFallbacks.emplace_back("x-no-translate");
2735 /* TODO: the original comphelper::Locale::getFallback() code had
2736 * "x-notranslate" instead of "x-no-translate", but all .xcu files use
2737 * "x-no-translate" and "x-notranslate" apparently was never used anywhere.
2738 * Did that ever work? Was it supposed to work at all like this? */
2740 for (const auto& fb : aFallbacks)
2742 it = std::find(rList.begin(), rList.end(), fb);
2743 if (it != rList.end())
2744 return it; // fallback found
2747 // Did not find anything so return something of the list, the first value
2748 // will do as well as any other as none did match any of the possible
2749 // fallbacks.
2750 return rList.begin();
2754 // static
2755 ::std::vector< css::lang::Locale >::const_iterator LanguageTag::getMatchingFallback(
2756 const ::std::vector< css::lang::Locale > & rList,
2757 const css::lang::Locale & rReference )
2759 if (rList.empty())
2760 return rList.end();
2762 // Try the simple case first without constructing fallbacks.
2763 ::std::vector< lang::Locale >::const_iterator it = std::find_if(rList.begin(), rList.end(),
2764 [&rReference](const lang::Locale& rLocale) {
2765 return rLocale.Language == rReference.Language
2766 && rLocale.Country == rReference.Country
2767 && rLocale.Variant == rReference.Variant; });
2768 if (it != rList.end())
2769 return it; // exact match
2771 // Now for each reference fallback test the fallbacks of the list in order.
2772 ::std::vector< OUString > aFallbacks( LanguageTag( rReference).getFallbackStrings( false));
2773 ::std::vector< ::std::vector< OUString > > aListFallbacks( rList.size());
2774 size_t i = 0;
2775 for (auto const& elem : rList)
2777 ::std::vector< OUString > aTmp( LanguageTag(elem).getFallbackStrings( true));
2778 aListFallbacks[i++] = aTmp;
2780 for (auto const& rfb : aFallbacks)
2782 size_t nPosFb = 0;
2783 for (auto const& lfb : aListFallbacks)
2785 for (auto const& fb : lfb)
2787 if (rfb == fb)
2788 return rList.begin() + nPosFb;
2790 ++nPosFb;
2794 // No match found.
2795 return rList.end();
2799 static bool lcl_isSystem( LanguageType nLangID )
2801 if (nLangID == LANGUAGE_SYSTEM)
2802 return true;
2803 // There are some special values that simplify to SYSTEM,
2804 // getRealLanguage() catches and resolves them.
2805 LanguageType nNewLangID = MsLangId::getRealLanguage( nLangID);
2806 return nNewLangID != nLangID;
2810 // static
2811 css::lang::Locale LanguageTag::convertToLocale( LanguageType nLangID, bool bResolveSystem )
2813 if (!bResolveSystem && lcl_isSystem( nLangID))
2814 return lang::Locale();
2816 return LanguageTag( nLangID).getLocale( bResolveSystem);
2820 // static
2821 LanguageType LanguageTag::convertToLanguageType( const css::lang::Locale& rLocale, bool bResolveSystem )
2823 if (rLocale.Language.isEmpty() && !bResolveSystem)
2824 return LANGUAGE_SYSTEM;
2826 return LanguageTag( rLocale).getLanguageType( bResolveSystem);
2830 // static
2831 OUString LanguageTagImpl::convertToBcp47( const css::lang::Locale& rLocale )
2833 OUString aBcp47;
2834 if (rLocale.Language.isEmpty())
2836 // aBcp47 stays empty
2838 else if (rLocale.Language == I18NLANGTAG_QLT)
2840 aBcp47 = rLocale.Variant;
2842 else
2844 /* XXX NOTE: most legacy code never evaluated the Variant field, so for
2845 * now just concatenate language and country. In case we stumbled over
2846 * variant aware code we'd have to take care of that. */
2847 if (rLocale.Country.isEmpty())
2848 aBcp47 = rLocale.Language;
2849 else
2851 aBcp47 = rLocale.Language + "-" + rLocale.Country;
2854 return aBcp47;
2858 // static
2859 OUString LanguageTag::convertToBcp47( const css::lang::Locale& rLocale, bool bResolveSystem )
2861 OUString aBcp47;
2862 if (rLocale.Language.isEmpty())
2864 if (bResolveSystem)
2865 aBcp47 = LanguageTag::convertToBcp47( LANGUAGE_SYSTEM );
2866 // else aBcp47 stays empty
2868 else
2870 aBcp47 = LanguageTagImpl::convertToBcp47( rLocale);
2872 return aBcp47;
2876 // static
2877 OUString LanguageTag::convertToBcp47( LanguageType nLangID )
2879 lang::Locale aLocale( LanguageTag::convertToLocale( nLangID ));
2880 // If system for some reason (should not happen... haha) could not be
2881 // resolved DO NOT CALL LanguageTag::convertToBcp47(Locale) because that
2882 // would recurse into this method here!
2883 if (aLocale.Language.isEmpty())
2884 return OUString(); // bad luck, bail out
2885 return LanguageTagImpl::convertToBcp47( aLocale);
2889 // static
2890 css::lang::Locale LanguageTag::convertToLocale( const OUString& rBcp47, bool bResolveSystem )
2892 if (rBcp47.isEmpty() && !bResolveSystem)
2893 return lang::Locale();
2895 return LanguageTag( rBcp47).getLocale( bResolveSystem);
2899 // static
2900 LanguageType LanguageTag::convertToLanguageType( const OUString& rBcp47 )
2902 return LanguageTag( rBcp47).getLanguageType();
2906 // static
2907 LanguageType LanguageTag::convertToLanguageTypeWithFallback( const OUString& rBcp47 )
2909 return LanguageTag( rBcp47).makeFallback().getLanguageType();
2913 // static
2914 css::lang::Locale LanguageTag::convertToLocaleWithFallback( const OUString& rBcp47 )
2916 return LanguageTag( rBcp47).makeFallback().getLocale();
2920 // static
2921 LanguageType LanguageTag::convertToLanguageTypeWithFallback( const css::lang::Locale& rLocale )
2923 if (rLocale.Language.isEmpty())
2924 return LANGUAGE_SYSTEM;
2926 return LanguageTag( rLocale).makeFallback().getLanguageType();
2930 // static
2931 bool LanguageTag::isValidBcp47( const OUString& rString, OUString* o_pCanonicalized,
2932 LanguageTag::PrivateUse ePrivateUse )
2934 bool bValid = false;
2936 struct guard
2938 lt_tag_t* mpLangtag;
2939 guard()
2941 theDataRef().init();
2942 mpLangtag = lt_tag_new();
2944 ~guard()
2946 lt_tag_unref( mpLangtag);
2948 } aVar;
2950 myLtError aError;
2952 if (!lt_tag_parse_disabled && lt_tag_parse(aVar.mpLangtag, OUStringToOString(rString, RTL_TEXTENCODING_UTF8).getStr(), &aError.p))
2954 char* pTag = lt_tag_canonicalize( aVar.mpLangtag, &aError.p);
2955 SAL_WARN_IF( !pTag, "i18nlangtag", "LanguageTag:isValidBcp47: could not canonicalize '" << rString << "'");
2956 if (pTag)
2958 bValid = true;
2959 if (ePrivateUse != PrivateUse::ALLOW)
2963 const char* pLang = nullptr;
2964 const lt_lang_t* pLangT = lt_tag_get_language( aVar.mpLangtag);
2965 if (pLangT)
2967 pLang = lt_lang_get_tag( pLangT);
2968 if (pLang && strcmp( pLang, I18NLANGTAG_QLT_ASCII) == 0)
2970 // Disallow 'qlt' localuse code to prevent
2971 // confusion with our internal usage.
2972 bValid = false;
2973 break;
2976 if (ePrivateUse == PrivateUse::ALLOW_ART_X && pLang && strcmp( pLang, "art") == 0)
2978 // Allow anything 'art' which includes 'art-x-...' and 'art-Latn-x-...'.
2979 break;
2981 const lt_string_t* pPrivate = lt_tag_get_privateuse( aVar.mpLangtag);
2982 if (pPrivate && lt_string_length( pPrivate) > 0)
2983 bValid = false;
2985 while (false);
2987 if (o_pCanonicalized)
2988 *o_pCanonicalized = OUString::createFromAscii( pTag);
2989 free( pTag);
2992 else
2994 SAL_INFO( "i18nlangtag", "LanguageTag:isValidBcp47: could not parse '" << rString << "'");
2996 return bValid;
2999 LanguageTag makeLanguageTagFromAppleLanguageId(AppleLanguageId nLanguage)
3001 //map the simple ones via LanguageTypes, and the hard ones explicitly
3002 LanguageType nLang(LANGUAGE_DONTKNOW);
3004 switch (nLanguage)
3006 case AppleLanguageId::ENGLISH:
3007 nLang = LANGUAGE_ENGLISH_US;
3008 break;
3009 case AppleLanguageId::FRENCH:
3010 nLang = LANGUAGE_FRENCH;
3011 break;
3012 case AppleLanguageId::GERMAN:
3013 nLang = LANGUAGE_GERMAN;
3014 break;
3015 case AppleLanguageId::ITALIAN:
3016 nLang = LANGUAGE_ITALIAN;
3017 break;
3018 case AppleLanguageId::DUTCH:
3019 nLang = LANGUAGE_DUTCH;
3020 break;
3021 case AppleLanguageId::SWEDISH:
3022 nLang = LANGUAGE_SWEDISH;
3023 break;
3024 case AppleLanguageId::SPANISH:
3025 nLang = LANGUAGE_SPANISH;
3026 break;
3027 case AppleLanguageId::DANISH:
3028 nLang = LANGUAGE_DANISH;
3029 break;
3030 case AppleLanguageId::PORTUGUESE:
3031 nLang = LANGUAGE_PORTUGUESE;
3032 break;
3033 case AppleLanguageId::NORWEGIAN:
3034 nLang = LANGUAGE_NORWEGIAN;
3035 break;
3036 case AppleLanguageId::HEBREW:
3037 nLang = LANGUAGE_HEBREW;
3038 break;
3039 case AppleLanguageId::JAPANESE:
3040 nLang = LANGUAGE_JAPANESE;
3041 break;
3042 case AppleLanguageId::ARABIC:
3043 nLang = LANGUAGE_ARABIC_PRIMARY_ONLY;
3044 break;
3045 case AppleLanguageId::FINNISH:
3046 nLang = LANGUAGE_FINNISH;
3047 break;
3048 case AppleLanguageId::GREEK:
3049 nLang = LANGUAGE_GREEK;
3050 break;
3051 case AppleLanguageId::ICELANDIC:
3052 nLang = LANGUAGE_ICELANDIC;
3053 break;
3054 case AppleLanguageId::MALTESE:
3055 nLang = LANGUAGE_MALTESE;
3056 break;
3057 case AppleLanguageId::TURKISH:
3058 nLang = LANGUAGE_TURKISH;
3059 break;
3060 case AppleLanguageId::CROATIAN:
3061 nLang = LANGUAGE_CROATIAN;
3062 break;
3063 case AppleLanguageId::CHINESE_TRADITIONAL:
3064 nLang = LANGUAGE_CHINESE_TRADITIONAL;
3065 break;
3066 case AppleLanguageId::URDU:
3067 nLang = LANGUAGE_URDU_PAKISTAN; //probably, otherwise we need a LANGUAGE_URDU_PRIMARY_ONLY
3068 break;
3069 case AppleLanguageId::HINDI:
3070 nLang = LANGUAGE_HINDI;
3071 break;
3072 case AppleLanguageId::THAI:
3073 nLang = LANGUAGE_THAI;
3074 break;
3075 case AppleLanguageId::KOREAN:
3076 nLang = LANGUAGE_KOREAN;
3077 break;
3078 case AppleLanguageId::LITHUANIAN:
3079 nLang = LANGUAGE_LITHUANIAN;
3080 break;
3081 case AppleLanguageId::POLISH:
3082 nLang = LANGUAGE_POLISH;
3083 break;
3084 case AppleLanguageId::HUNGARIAN:
3085 nLang = LANGUAGE_HUNGARIAN;
3086 break;
3087 case AppleLanguageId::ESTONIAN:
3088 nLang = LANGUAGE_ESTONIAN;
3089 break;
3090 case AppleLanguageId::LATVIAN:
3091 nLang = LANGUAGE_LATVIAN;
3092 break;
3093 case AppleLanguageId::SAMI:
3094 nLang = LANGUAGE_SAMI_NORTHERN_NORWAY; //maybe
3095 break;
3096 case AppleLanguageId::FAROESE:
3097 nLang = LANGUAGE_FAEROESE;
3098 break;
3099 case AppleLanguageId::FARSI:
3100 nLang = LANGUAGE_FARSI;
3101 break;
3102 case AppleLanguageId::RUSSIAN:
3103 nLang = LANGUAGE_RUSSIAN;
3104 break;
3105 case AppleLanguageId::CHINESE_SIMPLIFIED:
3106 nLang = LANGUAGE_CHINESE_SIMPLIFIED;
3107 break;
3108 case AppleLanguageId::FLEMISH:
3109 nLang = LANGUAGE_DUTCH_BELGIAN;
3110 break;
3111 case AppleLanguageId::IRISH_GAELIC:
3112 nLang = LANGUAGE_GAELIC_IRELAND;
3113 break;
3114 case AppleLanguageId::ALBANIAN:
3115 nLang = LANGUAGE_ALBANIAN;
3116 break;
3117 case AppleLanguageId::ROMANIAN:
3118 nLang = LANGUAGE_ROMANIAN;
3119 break;
3120 case AppleLanguageId::CZECH:
3121 nLang = LANGUAGE_CZECH;
3122 break;
3123 case AppleLanguageId::SLOVAK:
3124 nLang = LANGUAGE_SLOVAK;
3125 break;
3126 case AppleLanguageId::SLOVENIAN:
3127 nLang = LANGUAGE_SLOVENIAN;
3128 break;
3129 case AppleLanguageId::YIDDISH:
3130 nLang = LANGUAGE_YIDDISH;
3131 break;
3132 case AppleLanguageId::SERBIAN:
3133 nLang = LANGUAGE_SERBIAN_CYRILLIC_SERBIA; //maybe
3134 break;
3135 case AppleLanguageId::MACEDONIAN:
3136 nLang = LANGUAGE_MACEDONIAN;
3137 break;
3138 case AppleLanguageId::BULGARIAN:
3139 nLang = LANGUAGE_BULGARIAN;
3140 break;
3141 case AppleLanguageId::UKRAINIAN:
3142 nLang = LANGUAGE_UKRAINIAN;
3143 break;
3144 case AppleLanguageId::BYELORUSSIAN:
3145 nLang = LANGUAGE_BELARUSIAN;
3146 break;
3147 case AppleLanguageId::UZBEK:
3148 nLang = LANGUAGE_UZBEK_CYRILLIC; //maybe
3149 break;
3150 case AppleLanguageId::KAZAKH:
3151 nLang = LANGUAGE_KAZAKH;
3152 break;
3153 case AppleLanguageId::AZERI_CYRILLIC:
3154 nLang = LANGUAGE_AZERI_CYRILLIC;
3155 break;
3156 case AppleLanguageId::AZERI_ARABIC:
3157 return LanguageTag("az-Arab");
3158 case AppleLanguageId::ARMENIAN:
3159 nLang = LANGUAGE_ARMENIAN;
3160 break;
3161 case AppleLanguageId::GEORGIAN:
3162 nLang = LANGUAGE_GEORGIAN;
3163 break;
3164 case AppleLanguageId::MOLDAVIAN:
3165 nLang = LANGUAGE_ROMANIAN_MOLDOVA;
3166 break;
3167 case AppleLanguageId::KIRGHIZ:
3168 nLang = LANGUAGE_KIRGHIZ;
3169 break;
3170 case AppleLanguageId::TAJIKI:
3171 nLang = LANGUAGE_TAJIK;
3172 break;
3173 case AppleLanguageId::TURKMEN:
3174 nLang = LANGUAGE_TURKMEN;
3175 break;
3176 case AppleLanguageId::MONGOLIAN_MONGOLIAN:
3177 nLang = LANGUAGE_MONGOLIAN_MONGOLIAN_MONGOLIA;
3178 break;
3179 case AppleLanguageId::MONGOLIAN_CYRILLIC:
3180 nLang = LANGUAGE_MONGOLIAN_CYRILLIC_MONGOLIA;
3181 break;
3182 case AppleLanguageId::PASHTO:
3183 nLang = LANGUAGE_PASHTO;
3184 break;
3185 case AppleLanguageId::KURDISH:
3186 nLang = LANGUAGE_USER_KURDISH_TURKEY; //maybe
3187 break;
3188 case AppleLanguageId::KASHMIRI:
3189 nLang = LANGUAGE_KASHMIRI;
3190 break;
3191 case AppleLanguageId::SINDHI:
3192 nLang = LANGUAGE_SINDHI;
3193 break;
3194 case AppleLanguageId::TIBETAN:
3195 nLang = LANGUAGE_TIBETAN;
3196 break;
3197 case AppleLanguageId::NEPALI:
3198 nLang = LANGUAGE_NEPALI;
3199 break;
3200 case AppleLanguageId::SANSKRIT:
3201 nLang = LANGUAGE_SANSKRIT;
3202 break;
3203 case AppleLanguageId::MARATHI:
3204 nLang = LANGUAGE_MARATHI;
3205 break;
3206 case AppleLanguageId::BENGALI:
3207 nLang = LANGUAGE_BENGALI;
3208 break;
3209 case AppleLanguageId::ASSAMESE:
3210 nLang = LANGUAGE_ASSAMESE;
3211 break;
3212 case AppleLanguageId::GUJARATI:
3213 nLang = LANGUAGE_GUJARATI;
3214 break;
3215 case AppleLanguageId::PUNJABI:
3216 nLang = LANGUAGE_PUNJABI;
3217 break;
3218 case AppleLanguageId::ORIYA:
3219 nLang = LANGUAGE_ODIA;
3220 break;
3221 case AppleLanguageId::MALAYALAM:
3222 nLang = LANGUAGE_MALAYALAM;
3223 break;
3224 case AppleLanguageId::KANNADA:
3225 nLang = LANGUAGE_KANNADA;
3226 break;
3227 case AppleLanguageId::TAMIL:
3228 nLang = LANGUAGE_TAMIL;
3229 break;
3230 case AppleLanguageId::TELUGU:
3231 nLang = LANGUAGE_TELUGU;
3232 break;
3233 case AppleLanguageId::SINHALESE:
3234 nLang = LANGUAGE_SINHALESE_SRI_LANKA;
3235 break;
3236 case AppleLanguageId::BURMESE:
3237 nLang = LANGUAGE_BURMESE;
3238 break;
3239 case AppleLanguageId::KHMER:
3240 nLang = LANGUAGE_KHMER;
3241 break;
3242 case AppleLanguageId::LAO:
3243 nLang = LANGUAGE_LAO;
3244 break;
3245 case AppleLanguageId::VIETNAMESE:
3246 nLang = LANGUAGE_VIETNAMESE;
3247 break;
3248 case AppleLanguageId::INDONESIAN:
3249 nLang = LANGUAGE_INDONESIAN;
3250 break;
3251 case AppleLanguageId::TAGALONG:
3252 nLang = LANGUAGE_USER_TAGALOG;
3253 break;
3254 case AppleLanguageId::MALAY_LATIN:
3255 nLang = LANGUAGE_MALAY_MALAYSIA;
3256 break;
3257 case AppleLanguageId::MALAY_ARABIC:
3258 nLang = LANGUAGE_USER_MALAY_ARABIC_MALAYSIA;
3259 break;
3260 case AppleLanguageId::AMHARIC:
3261 nLang = LANGUAGE_AMHARIC_ETHIOPIA;
3262 break;
3263 case AppleLanguageId::TIGRINYA:
3264 nLang = LANGUAGE_TIGRIGNA_ETHIOPIA;
3265 break;
3266 case AppleLanguageId::GALLA:
3267 nLang = LANGUAGE_OROMO;
3268 break;
3269 case AppleLanguageId::SOMALI:
3270 nLang = LANGUAGE_SOMALI;
3271 break;
3272 case AppleLanguageId::SWAHILI:
3273 nLang = LANGUAGE_SWAHILI;
3274 break;
3275 case AppleLanguageId::KINYARWANDA:
3276 nLang = LANGUAGE_KINYARWANDA_RWANDA;
3277 break;
3278 case AppleLanguageId::RUNDI:
3279 return LanguageTag("rn");
3280 case AppleLanguageId::NYANJA:
3281 nLang = LANGUAGE_USER_NYANJA;
3282 break;
3283 case AppleLanguageId::MALAGASY:
3284 nLang = LANGUAGE_MALAGASY_PLATEAU;
3285 break;
3286 case AppleLanguageId::ESPERANTO:
3287 nLang = LANGUAGE_USER_ESPERANTO;
3288 break;
3289 case AppleLanguageId::WELSH:
3290 nLang = LANGUAGE_WELSH;
3291 break;
3292 case AppleLanguageId::BASQUE:
3293 nLang = LANGUAGE_BASQUE;
3294 break;
3295 case AppleLanguageId::CATALAN:
3296 nLang = LANGUAGE_CATALAN;
3297 break;
3298 case AppleLanguageId::LATIN:
3299 nLang = LANGUAGE_LATIN;
3300 break;
3301 case AppleLanguageId::QUENCHUA:
3302 nLang = LANGUAGE_QUECHUA_BOLIVIA; //maybe
3303 break;
3304 case AppleLanguageId::GUARANI:
3305 nLang = LANGUAGE_GUARANI_PARAGUAY;
3306 break;
3307 case AppleLanguageId::AYMARA:
3308 return LanguageTag("ay");
3309 case AppleLanguageId::TATAR:
3310 nLang = LANGUAGE_TATAR;
3311 break;
3312 case AppleLanguageId::UIGHUR:
3313 nLang = LANGUAGE_UIGHUR_CHINA;
3314 break;
3315 case AppleLanguageId::DZONGKHA:
3316 nLang = LANGUAGE_DZONGKHA_BHUTAN;
3317 break;
3318 case AppleLanguageId::JAVANESE_LATIN:
3319 return LanguageTag("jv-Latn");
3320 case AppleLanguageId::SUNDANESE_LATIN:
3321 return LanguageTag("su-Latn");
3322 case AppleLanguageId::GALICIAN:
3323 nLang = LANGUAGE_GALICIAN;
3324 break;
3325 case AppleLanguageId::AFRIKAANS:
3326 nLang = LANGUAGE_AFRIKAANS;
3327 break;
3328 case AppleLanguageId::BRETON:
3329 nLang = LANGUAGE_BRETON_FRANCE;
3330 break;
3331 case AppleLanguageId::INUKTITUT:
3332 nLang = LANGUAGE_INUKTITUT_LATIN_CANADA; //probably
3333 break;
3334 case AppleLanguageId::SCOTTISH_GAELIC:
3335 nLang = LANGUAGE_GAELIC_SCOTLAND;
3336 break;
3337 case AppleLanguageId::MANX_GAELIC:
3338 nLang = LANGUAGE_USER_MANX;
3339 break;
3340 case AppleLanguageId::IRISH_GAELIC_WITH_DOT_ABOVE:
3341 return LanguageTag("ga-Latg");
3342 case AppleLanguageId::TONGAN:
3343 return LanguageTag("to");
3344 case AppleLanguageId::GREEK_POLYTONIC:
3345 nLang = LANGUAGE_USER_ANCIENT_GREEK;
3346 break;
3347 case AppleLanguageId::GREENLANDIC:
3348 nLang = LANGUAGE_KALAALLISUT_GREENLAND;
3349 break;
3350 case AppleLanguageId::AZERI_LATIN:
3351 nLang = LANGUAGE_AZERI_LATIN;
3352 break;
3355 return LanguageTag(nLang);
3358 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */