bump product version to 6.4.0.3
[LibreOffice.git] / i18nlangtag / source / languagetag / languagetag.cxx
blob7f580dfc9dd33a7f17e55a2ea55d1138b3cb4bd3
1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2 /*
3 * This file is part of the LibreOffice project.
5 * This Source Code Form is subject to the terms of the Mozilla Public
6 * License, v. 2.0. If a copy of the MPL was not distributed with this
7 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
8 */
10 #include <config_folders.h>
11 #include <config_liblangtag.h>
13 #include <i18nlangtag/languagetag.hxx>
14 #include <i18nlangtag/applelangid.hxx>
15 #include <i18nlangtag/mslangid.hxx>
16 #include <rtl/ustrbuf.hxx>
17 #include <rtl/bootstrap.hxx>
18 #include <sal/log.hxx>
19 #include <osl/file.hxx>
20 #include <osl/mutex.hxx>
21 #include <rtl/instance.hxx>
22 #include <rtl/locale.h>
23 #include <algorithm>
24 #include <map>
25 #include <unordered_set>
27 //#define erDEBUG
29 #if LIBLANGTAG_INLINE_FIX
30 #define LT_HAVE_INLINE
31 #endif
32 #include <liblangtag/langtag.h>
34 using namespace com::sun::star;
37 // Helper to ensure lt_error_t is free'd
38 struct myLtError
40 lt_error_t* p;
41 myLtError() : p(nullptr) {}
42 ~myLtError() { if (p) lt_error_unref( p); }
45 // "statics" to be returned as const reference to an empty locale and string.
46 namespace {
47 struct theEmptyLocale : public rtl::Static< lang::Locale, theEmptyLocale > {};
48 struct theEmptyBcp47 : public rtl::Static< OUString, theEmptyBcp47 > {};
51 typedef std::unordered_set< OUString > KnownTagSet;
52 namespace {
53 struct theKnowns : public rtl::Static< KnownTagSet, theKnowns > {};
54 struct theMutex : public rtl::Static< osl::Mutex, theMutex > {};
57 static const KnownTagSet & getKnowns()
59 KnownTagSet & rKnowns = theKnowns::get();
60 if (rKnowns.empty())
62 osl::MutexGuard aGuard( theMutex::get());
63 if (rKnowns.empty())
65 ::std::vector< MsLangId::LanguagetagMapping > aDefined( MsLangId::getDefinedLanguagetags());
66 for (auto const& elemDefined : aDefined)
68 // Do not use the BCP47 string here to initialize the
69 // LanguageTag because then canonicalize() would call this
70 // getKnowns() again...
71 ::std::vector< OUString > aFallbacks( LanguageTag( elemDefined.mnLang).getFallbackStrings( true));
72 for (auto const& fallback : aFallbacks)
74 rKnowns.insert(fallback);
79 return rKnowns;
83 namespace {
84 struct compareIgnoreAsciiCaseLess
86 bool operator()( const OUString& r1, const OUString& r2 ) const
88 return r1.compareToIgnoreAsciiCase( r2) < 0;
91 typedef ::std::map< OUString, LanguageTag::ImplPtr, compareIgnoreAsciiCaseLess > MapBcp47;
92 typedef ::std::map< LanguageType, LanguageTag::ImplPtr > MapLangID;
93 struct theMapBcp47 : public rtl::Static< MapBcp47, theMapBcp47 > {};
94 struct theMapLangID : public rtl::Static< MapLangID, theMapLangID > {};
95 struct theDontKnow : public rtl::Static< LanguageTag::ImplPtr, theDontKnow > {};
96 struct theSystemLocale : public rtl::Static< LanguageTag::ImplPtr, theSystemLocale > {};
100 static LanguageType getNextOnTheFlyLanguage()
102 static LanguageType nOnTheFlyLanguage(0);
103 osl::MutexGuard aGuard( theMutex::get());
104 if (!nOnTheFlyLanguage)
105 nOnTheFlyLanguage = MsLangId::makeLangID( LANGUAGE_ON_THE_FLY_SUB_START, LANGUAGE_ON_THE_FLY_START);
106 else
108 if (MsLangId::getPrimaryLanguage( nOnTheFlyLanguage) != LANGUAGE_ON_THE_FLY_END)
109 ++nOnTheFlyLanguage;
110 else
112 LanguageType nSub = MsLangId::getSubLanguage( nOnTheFlyLanguage);
113 if (nSub != LANGUAGE_ON_THE_FLY_SUB_END)
114 nOnTheFlyLanguage = MsLangId::makeLangID( ++nSub, LANGUAGE_ON_THE_FLY_START);
115 else
117 SAL_WARN( "i18nlangtag", "getNextOnTheFlyLanguage: none left! ("
118 << ((sal_uInt16(LANGUAGE_ON_THE_FLY_END) - sal_uInt16(LANGUAGE_ON_THE_FLY_START) + 1)
119 * (sal_uInt16(LANGUAGE_ON_THE_FLY_SUB_END) - sal_uInt16(LANGUAGE_ON_THE_FLY_SUB_START) + 1))
120 << " consumed?!?)");
121 return LanguageType(0);
125 #if OSL_DEBUG_LEVEL > 0
126 static size_t nOnTheFlies = 0;
127 ++nOnTheFlies;
128 SAL_INFO( "i18nlangtag", "getNextOnTheFlyLanguage: number " << nOnTheFlies);
129 #endif
130 return nOnTheFlyLanguage;
134 // static
135 bool LanguageTag::isOnTheFlyID( LanguageType nLang )
137 LanguageType nPri = MsLangId::getPrimaryLanguage( nLang);
138 LanguageType nSub = MsLangId::getSubLanguage( nLang);
139 return
140 LANGUAGE_ON_THE_FLY_START <= nPri && nPri <= LANGUAGE_ON_THE_FLY_END &&
141 LANGUAGE_ON_THE_FLY_SUB_START <= nSub && nSub <= LANGUAGE_ON_THE_FLY_SUB_END;
145 /** A reference holder for liblangtag data de/initialization, one static
146 instance. Currently implemented such that the first "ref" inits and dtor
147 (our library deinitialized) tears down.
149 class LiblangtagDataRef
151 public:
152 LiblangtagDataRef();
153 ~LiblangtagDataRef();
154 void init()
156 if (!mbInitialized)
157 setup();
159 private:
160 OString maDataPath; // path to liblangtag data, "|" if system
161 bool mbInitialized;
163 void setupDataPath();
164 void setup();
165 static void teardown();
168 namespace {
169 struct theDataRef : public rtl::Static< LiblangtagDataRef, theDataRef > {};
172 LiblangtagDataRef::LiblangtagDataRef()
174 mbInitialized(false)
178 LiblangtagDataRef::~LiblangtagDataRef()
180 if (mbInitialized)
181 teardown();
184 void LiblangtagDataRef::setup()
186 SAL_INFO( "i18nlangtag", "LiblangtagDataRef::setup: initializing database");
187 if (maDataPath.isEmpty())
188 setupDataPath();
189 lt_db_initialize();
190 mbInitialized = true;
193 void LiblangtagDataRef::teardown()
195 SAL_INFO( "i18nlangtag", "LiblangtagDataRef::teardown: finalizing database");
196 lt_db_finalize();
199 void LiblangtagDataRef::setupDataPath()
201 // maDataPath is assumed to be empty here.
202 OUString aURL("$BRAND_BASE_DIR/" LIBO_SHARE_FOLDER "/liblangtag");
203 rtl::Bootstrap::expandMacros(aURL); //TODO: detect failure
205 // Check if data is in our own installation, else assume system
206 // installation.
207 OUString aData = aURL + "/language-subtag-registry.xml";
208 osl::DirectoryItem aDirItem;
209 if (osl::DirectoryItem::get( aData, aDirItem) == osl::DirectoryItem::E_None)
211 OUString aPath;
212 if (osl::FileBase::getSystemPathFromFileURL( aURL, aPath) == osl::FileBase::E_None)
213 maDataPath = OUStringToOString( aPath, RTL_TEXTENCODING_UTF8);
215 if (maDataPath.isEmpty())
216 maDataPath = "|"; // assume system
217 else
218 lt_db_set_datadir( maDataPath.getStr());
222 /* TODO: we could transform known vendor and browser-specific variants to known
223 * BCP 47 if available. For now just remove them to not confuse any later
224 * treatments that check for empty variants. This vendor stuff was never
225 * supported anyway. */
226 static void handleVendorVariant( css::lang::Locale & rLocale )
228 if (!rLocale.Variant.isEmpty() && rLocale.Language != I18NLANGTAG_QLT)
229 rLocale.Variant.clear();
233 class LanguageTagImpl
235 public:
237 explicit LanguageTagImpl( const LanguageTag & rLanguageTag );
238 explicit LanguageTagImpl( const LanguageTagImpl & rLanguageTagImpl );
239 ~LanguageTagImpl();
240 LanguageTagImpl& operator=( const LanguageTagImpl & rLanguageTagImpl );
242 private:
244 friend class LanguageTag;
246 enum Decision
248 DECISION_DONTKNOW,
249 DECISION_NO,
250 DECISION_YES
253 mutable css::lang::Locale maLocale;
254 mutable OUString maBcp47;
255 mutable OUString maCachedLanguage; ///< cache getLanguage()
256 mutable OUString maCachedScript; ///< cache getScript()
257 mutable OUString maCachedCountry; ///< cache getCountry()
258 mutable OUString maCachedVariants; ///< cache getVariants()
259 mutable OUString maCachedGlibcString; ///< cache getGlibcLocaleString()
260 mutable lt_tag_t* mpImplLangtag; ///< liblangtag pointer
261 mutable LanguageType mnLangID;
262 mutable LanguageTag::ScriptType meScriptType;
263 mutable Decision meIsValid;
264 mutable Decision meIsIsoLocale;
265 mutable Decision meIsIsoODF;
266 mutable Decision meIsLiblangtagNeeded; ///< whether processing with liblangtag needed
267 bool mbSystemLocale : 1;
268 mutable bool mbInitializedBcp47 : 1;
269 mutable bool mbInitializedLocale : 1;
270 mutable bool mbInitializedLangID : 1;
271 mutable bool mbCachedLanguage : 1;
272 mutable bool mbCachedScript : 1;
273 mutable bool mbCachedCountry : 1;
274 mutable bool mbCachedVariants : 1;
275 mutable bool mbCachedGlibcString : 1;
277 OUString const & getBcp47() const;
278 OUString const & getLanguage() const;
279 OUString const & getScript() const;
280 OUString const & getCountry() const;
281 OUString getRegion() const;
282 OUString const & getVariants() const;
283 bool hasScript() const;
284 OUString getGlibcLocaleString() const;
286 void setScriptType(LanguageTag::ScriptType st);
287 LanguageTag::ScriptType getScriptType() const;
289 bool isIsoLocale() const;
290 bool isIsoODF() const;
291 bool isValidBcp47() const;
293 void convertLocaleToBcp47();
294 bool convertLocaleToLang( bool bAllowOnTheFlyID );
295 void convertBcp47ToLocale();
296 void convertBcp47ToLang();
297 void convertLangToLocale();
298 void convertLangToBcp47();
300 /** @return whether BCP 47 language tag string was changed. */
301 bool canonicalize();
303 /** Canonicalize if not yet done and synchronize initialized conversions.
305 @return whether BCP 47 language tag string was changed.
307 bool synCanonicalize();
309 OUString getLanguageFromLangtag();
310 OUString getScriptFromLangtag();
311 OUString getRegionFromLangtag();
312 OUString getVariantsFromLangtag();
314 /** Generates on-the-fly LangID and registers the maBcp47,mnLangID pair.
316 @param nRegisterID
317 If not 0 and not LANGUAGE_DONTKNOW, suggest (!) to use that ID
318 instead of generating an on-the-fly ID. Implementation may
319 still generate an ID if the suggested ID is already used for
320 another language tag.
322 @return NULL if no ID could be obtained or registration failed.
324 LanguageTag::ImplPtr registerOnTheFly( LanguageType nRegisterID );
326 /** Obtain Language, Script, Country and Variants via simpleExtract() and
327 assign them to the cached variables if successful.
329 @return simpleExtract() successfully extracted and cached.
331 bool cacheSimpleLSCV();
333 enum Extraction
335 EXTRACTED_NONE,
336 EXTRACTED_LSC,
337 EXTRACTED_LV,
338 EXTRACTED_C_LOCALE,
339 EXTRACTED_X,
340 EXTRACTED_X_JOKER,
341 EXTRACTED_KNOWN_BAD
344 /** Of a language tag of the form lll[-Ssss][-CC][-vvvvvvvv] extract the
345 portions.
347 Does not check case or content!
349 @return EXTRACTED_LSC if simple tag was detected (i.e. one that
350 would fulfill the isIsoODF() condition),
351 EXTRACTED_LV if a tag with variant was detected,
352 EXTRACTED_C_LOCALE if a 'C' locale was detected,
353 EXTRACTED_X if x-... privateuse tag was detected,
354 EXTRACTED_X_JOKER if "*" joker was detected,
355 EXTRACTED_KNOWN_BAD if a bad but known (to be remapped) tag was detected
356 EXTRACTED_NONE else.
358 static Extraction simpleExtract( const OUString& rBcp47,
359 OUString& rLanguage,
360 OUString& rScript,
361 OUString& rCountry,
362 OUString& rVariants );
364 /** Convert Locale to BCP 47 string without resolving system and creating
365 temporary LanguageTag instances. */
366 static OUString convertToBcp47( const css::lang::Locale& rLocale );
371 LanguageTagImpl::LanguageTagImpl( const LanguageTag & rLanguageTag )
373 maLocale( rLanguageTag.maLocale),
374 maBcp47( rLanguageTag.maBcp47),
375 mpImplLangtag( nullptr),
376 mnLangID( rLanguageTag.mnLangID),
377 meScriptType( LanguageTag::ScriptType::UNKNOWN),
378 meIsValid( DECISION_DONTKNOW),
379 meIsIsoLocale( DECISION_DONTKNOW),
380 meIsIsoODF( DECISION_DONTKNOW),
381 meIsLiblangtagNeeded( DECISION_DONTKNOW),
382 mbSystemLocale( rLanguageTag.mbSystemLocale),
383 mbInitializedBcp47( rLanguageTag.mbInitializedBcp47),
384 mbInitializedLocale( rLanguageTag.mbInitializedLocale),
385 mbInitializedLangID( rLanguageTag.mbInitializedLangID),
386 mbCachedLanguage( false),
387 mbCachedScript( false),
388 mbCachedCountry( false),
389 mbCachedVariants( false),
390 mbCachedGlibcString( false)
395 LanguageTagImpl::LanguageTagImpl( const LanguageTagImpl & rLanguageTagImpl )
397 maLocale( rLanguageTagImpl.maLocale),
398 maBcp47( rLanguageTagImpl.maBcp47),
399 maCachedLanguage( rLanguageTagImpl.maCachedLanguage),
400 maCachedScript( rLanguageTagImpl.maCachedScript),
401 maCachedCountry( rLanguageTagImpl.maCachedCountry),
402 maCachedVariants( rLanguageTagImpl.maCachedVariants),
403 maCachedGlibcString( rLanguageTagImpl.maCachedGlibcString),
404 mpImplLangtag( rLanguageTagImpl.mpImplLangtag ?
405 lt_tag_copy( rLanguageTagImpl.mpImplLangtag) : nullptr),
406 mnLangID( rLanguageTagImpl.mnLangID),
407 meScriptType( rLanguageTagImpl.meScriptType),
408 meIsValid( rLanguageTagImpl.meIsValid),
409 meIsIsoLocale( rLanguageTagImpl.meIsIsoLocale),
410 meIsIsoODF( rLanguageTagImpl.meIsIsoODF),
411 meIsLiblangtagNeeded( rLanguageTagImpl.meIsLiblangtagNeeded),
412 mbSystemLocale( rLanguageTagImpl.mbSystemLocale),
413 mbInitializedBcp47( rLanguageTagImpl.mbInitializedBcp47),
414 mbInitializedLocale( rLanguageTagImpl.mbInitializedLocale),
415 mbInitializedLangID( rLanguageTagImpl.mbInitializedLangID),
416 mbCachedLanguage( rLanguageTagImpl.mbCachedLanguage),
417 mbCachedScript( rLanguageTagImpl.mbCachedScript),
418 mbCachedCountry( rLanguageTagImpl.mbCachedCountry),
419 mbCachedVariants( rLanguageTagImpl.mbCachedVariants),
420 mbCachedGlibcString( rLanguageTagImpl.mbCachedGlibcString)
422 if (mpImplLangtag)
423 theDataRef::get().init();
427 LanguageTagImpl& LanguageTagImpl::operator=( const LanguageTagImpl & rLanguageTagImpl )
429 if (&rLanguageTagImpl == this)
430 return *this;
432 maLocale = rLanguageTagImpl.maLocale;
433 maBcp47 = rLanguageTagImpl.maBcp47;
434 maCachedLanguage = rLanguageTagImpl.maCachedLanguage;
435 maCachedScript = rLanguageTagImpl.maCachedScript;
436 maCachedCountry = rLanguageTagImpl.maCachedCountry;
437 maCachedVariants = rLanguageTagImpl.maCachedVariants;
438 maCachedGlibcString = rLanguageTagImpl.maCachedGlibcString;
439 lt_tag_t * oldTag = mpImplLangtag;
440 mpImplLangtag = rLanguageTagImpl.mpImplLangtag ?
441 lt_tag_copy( rLanguageTagImpl.mpImplLangtag) : nullptr;
442 lt_tag_unref(oldTag);
443 mnLangID = rLanguageTagImpl.mnLangID;
444 meScriptType = rLanguageTagImpl.meScriptType;
445 meIsValid = rLanguageTagImpl.meIsValid;
446 meIsIsoLocale = rLanguageTagImpl.meIsIsoLocale;
447 meIsIsoODF = rLanguageTagImpl.meIsIsoODF;
448 meIsLiblangtagNeeded= rLanguageTagImpl.meIsLiblangtagNeeded;
449 mbSystemLocale = rLanguageTagImpl.mbSystemLocale;
450 mbInitializedBcp47 = rLanguageTagImpl.mbInitializedBcp47;
451 mbInitializedLocale = rLanguageTagImpl.mbInitializedLocale;
452 mbInitializedLangID = rLanguageTagImpl.mbInitializedLangID;
453 mbCachedLanguage = rLanguageTagImpl.mbCachedLanguage;
454 mbCachedScript = rLanguageTagImpl.mbCachedScript;
455 mbCachedCountry = rLanguageTagImpl.mbCachedCountry;
456 mbCachedVariants = rLanguageTagImpl.mbCachedVariants;
457 mbCachedGlibcString = rLanguageTagImpl.mbCachedGlibcString;
458 if (mpImplLangtag && !oldTag)
459 theDataRef::get().init();
460 return *this;
464 LanguageTagImpl::~LanguageTagImpl()
466 if (mpImplLangtag)
468 lt_tag_unref( mpImplLangtag);
473 LanguageTag::LanguageTag( const OUString & rBcp47LanguageTag, bool bCanonicalize )
475 maBcp47( rBcp47LanguageTag),
476 mnLangID( LANGUAGE_DONTKNOW),
477 mbSystemLocale( rBcp47LanguageTag.isEmpty()),
478 mbInitializedBcp47( !mbSystemLocale),
479 mbInitializedLocale( false),
480 mbInitializedLangID( false),
481 mbIsFallback( false)
483 if (bCanonicalize)
485 getImpl()->canonicalize();
486 // Registration itself may already have canonicalized, so do an
487 // unconditional sync.
488 syncFromImpl();
494 LanguageTag::LanguageTag( const css::lang::Locale & rLocale )
496 maLocale( rLocale),
497 mnLangID( LANGUAGE_DONTKNOW),
498 mbSystemLocale( rLocale.Language.isEmpty()),
499 mbInitializedBcp47( false),
500 mbInitializedLocale( false), // we do not know which mess we got passed in
501 mbInitializedLangID( false),
502 mbIsFallback( false)
504 handleVendorVariant( maLocale);
508 LanguageTag::LanguageTag( LanguageType nLanguage )
510 mnLangID( nLanguage),
511 mbSystemLocale( nLanguage == LANGUAGE_SYSTEM),
512 mbInitializedBcp47( false),
513 mbInitializedLocale( false),
514 mbInitializedLangID( !mbSystemLocale),
515 mbIsFallback( false)
520 LanguageTag::LanguageTag( const OUString& rBcp47, const OUString& rLanguage,
521 const OUString& rScript, const OUString& rCountry )
523 maBcp47( rBcp47),
524 mnLangID( LANGUAGE_DONTKNOW),
525 mbSystemLocale( rBcp47.isEmpty() && rLanguage.isEmpty()),
526 mbInitializedBcp47( !rBcp47.isEmpty()),
527 mbInitializedLocale( false),
528 mbInitializedLangID( false),
529 mbIsFallback( false)
531 if (!mbSystemLocale && !mbInitializedBcp47)
533 if (rScript.isEmpty())
535 maBcp47 = rLanguage + "-" + rCountry;
536 mbInitializedBcp47 = true;
537 maLocale.Language = rLanguage;
538 maLocale.Country = rCountry;
539 mbInitializedLocale = true;
541 else
543 if (rCountry.isEmpty())
544 maBcp47 = rLanguage + "-" + rScript;
545 else
546 maBcp47 = rLanguage + "-" + rScript + "-" + rCountry;
547 mbInitializedBcp47 = true;
548 maLocale.Language = I18NLANGTAG_QLT;
549 maLocale.Country = rCountry;
550 maLocale.Variant = maBcp47;
551 mbInitializedLocale = true;
557 LanguageTag::LanguageTag( const rtl_Locale & rLocale )
559 maLocale( rLocale.Language, rLocale.Country, rLocale.Variant),
560 mnLangID( LANGUAGE_DONTKNOW),
561 mbSystemLocale( maLocale.Language.isEmpty()),
562 mbInitializedBcp47( false),
563 mbInitializedLocale( !mbSystemLocale),
564 mbInitializedLangID( false),
565 mbIsFallback( false)
567 convertFromRtlLocale();
570 LanguageTag::~LanguageTag() {}
572 LanguageTag::ImplPtr LanguageTagImpl::registerOnTheFly( LanguageType nRegisterID )
574 LanguageTag::ImplPtr pImpl;
576 if (!mbInitializedBcp47)
578 if (mbInitializedLocale)
580 maBcp47 = LanguageTagImpl::convertToBcp47( maLocale);
581 mbInitializedBcp47 = !maBcp47.isEmpty();
584 if (maBcp47.isEmpty())
586 SAL_WARN( "i18nlangtag", "LanguageTagImpl::registerOnTheFly: no Bcp47 string, no registering");
587 return pImpl;
590 osl::MutexGuard aGuard( theMutex::get());
592 MapBcp47& rMapBcp47 = theMapBcp47::get();
593 MapBcp47::const_iterator it( rMapBcp47.find( maBcp47));
594 bool bOtherImpl = false;
595 if (it != rMapBcp47.end())
597 SAL_INFO( "i18nlangtag", "LanguageTag::registerOnTheFly: found impl for '" << maBcp47 << "'");
598 pImpl = (*it).second;
599 if (pImpl.get() != this)
601 // Could happen for example if during registerImpl() the tag was
602 // changed via canonicalize() and the result was already present in
603 // the map before, for example 'bn-Beng' => 'bn'. This specific
604 // case is now taken care of in registerImpl() and doesn't reach
605 // here. However, use the already existing impl if it matches.
606 SAL_WARN( "i18nlangtag", "LanguageTag::registerOnTheFly: using other impl for this '" << maBcp47 << "'");
607 *this = *pImpl; // ensure consistency
608 bOtherImpl = true;
611 else
613 SAL_INFO( "i18nlangtag", "LanguageTag::registerOnTheFly: new impl for '" << maBcp47 << "'");
614 pImpl.reset( new LanguageTagImpl( *this));
615 rMapBcp47.insert( ::std::make_pair( maBcp47, pImpl));
618 if (!bOtherImpl || !pImpl->mbInitializedLangID)
620 if (nRegisterID == LanguageType(0) || nRegisterID == LANGUAGE_DONTKNOW)
621 nRegisterID = getNextOnTheFlyLanguage();
622 else
624 // Accept a suggested ID only if it is not mapped yet to something
625 // different, otherwise we would end up with ambiguous assignments
626 // of different language tags, for example for the same primary
627 // LangID with "no", "nb" and "nn".
628 const MapLangID& rMapLangID = theMapLangID::get();
629 MapLangID::const_iterator itID( rMapLangID.find( nRegisterID));
630 if (itID != rMapLangID.end())
632 if ((*itID).second->maBcp47 != maBcp47)
634 SAL_INFO( "i18nlangtag", "LanguageTag::registerOnTheFly: not using suggested 0x"
635 << ::std::hex << nRegisterID << " for '" << maBcp47 << "' have '"
636 << (*itID).second->maBcp47 << "'");
637 nRegisterID = getNextOnTheFlyLanguage();
639 else
641 SAL_WARN( "i18nlangtag", "LanguageTag::registerOnTheFly: suggested 0x"
642 << ::std::hex << nRegisterID << " for '" << maBcp47 << "' already registered");
646 if (!nRegisterID)
648 // out of IDs, nothing to register
649 return pImpl;
651 pImpl->mnLangID = nRegisterID;
652 pImpl->mbInitializedLangID = true;
653 if (pImpl.get() != this)
655 mnLangID = nRegisterID;
656 mbInitializedLangID = true;
660 ::std::pair< MapLangID::const_iterator, bool > res(
661 theMapLangID::get().insert( ::std::make_pair( pImpl->mnLangID, pImpl)));
662 if (res.second)
664 SAL_INFO( "i18nlangtag", "LanguageTag::registerOnTheFly: cross-inserted 0x"
665 << ::std::hex << pImpl->mnLangID << " for '" << maBcp47 << "'");
667 else
669 SAL_WARN( "i18nlangtag", "LanguageTag::registerOnTheFly: not cross-inserted 0x"
670 << ::std::hex << pImpl->mnLangID << " for '" << maBcp47 << "' have '"
671 << (*res.first).second->maBcp47 << "'");
674 return pImpl;
678 LanguageTag::ScriptType LanguageTag::getOnTheFlyScriptType( LanguageType nRegisterID )
680 const MapLangID& rMapLangID = theMapLangID::get();
681 MapLangID::const_iterator itID( rMapLangID.find( nRegisterID));
682 if (itID != rMapLangID.end())
683 return (*itID).second->getScriptType();
684 else
685 return ScriptType::UNKNOWN;
689 // static
690 void LanguageTag::setConfiguredSystemLanguage( LanguageType nLang )
692 if (nLang == LANGUAGE_DONTKNOW || nLang == LANGUAGE_SYSTEM)
694 SAL_WARN( "i18nlangtag",
695 "LanguageTag::setConfiguredSystemLanguage: refusing to set unresolved system locale 0x" <<
696 ::std::hex << nLang);
697 return;
699 SAL_INFO( "i18nlangtag", "LanguageTag::setConfiguredSystemLanguage: setting to 0x" << ::std::hex << nLang);
700 MsLangId::LanguageTagAccess::setConfiguredSystemLanguage( nLang);
701 // Reset system locale to none and let registerImpl() do the rest to
702 // initialize a new one.
703 theSystemLocale::get().reset();
704 LanguageTag aLanguageTag( LANGUAGE_SYSTEM);
705 aLanguageTag.registerImpl();
708 static bool lt_tag_parse_disabled = false;
710 // static
711 void LanguageTag::disable_lt_tag_parse()
713 lt_tag_parse_disabled = true;
716 static bool lcl_isKnownOnTheFlyID( LanguageType nLang )
718 return nLang != LANGUAGE_DONTKNOW && nLang != LANGUAGE_SYSTEM &&
719 (LanguageTag::isOnTheFlyID( nLang) || (nLang == MsLangId::getPrimaryLanguage( nLang)));
723 LanguageTag::ImplPtr LanguageTag::registerImpl() const
725 // XXX NOTE: Do not use non-static LanguageTag::convert...() member methods
726 // here as they access getImpl() and syncFromImpl() and would lead to
727 // recursion. Also do not use the static LanguageTag::convertTo...()
728 // methods as they may create temporary LanguageTag instances. Only
729 // LanguageTagImpl::convertToBcp47(Locale) is ok.
731 ImplPtr pImpl;
733 #if OSL_DEBUG_LEVEL > 0
734 static size_t nCalls = 0;
735 ++nCalls;
736 SAL_INFO( "i18nlangtag", "LanguageTag::registerImpl: " << nCalls << " calls");
737 #endif
739 // Do not register unresolved system locale, also force LangID if system
740 // and take the system locale shortcut if possible.
741 if (mbSystemLocale)
743 pImpl = theSystemLocale::get();
744 if (pImpl)
746 #if OSL_DEBUG_LEVEL > 0
747 static size_t nCallsSystem = 0;
748 ++nCallsSystem;
749 SAL_INFO( "i18nlangtag", "LanguageTag::registerImpl: " << nCallsSystem << " system calls");
750 #endif
751 return pImpl;
753 if (!mbInitializedLangID)
755 mnLangID = MsLangId::getRealLanguage( LANGUAGE_SYSTEM);
756 mbInitializedLangID = (mnLangID != LANGUAGE_SYSTEM);
757 SAL_WARN_IF( !mbInitializedLangID, "i18nlangtag", "LanguageTag::registerImpl: can't resolve system!");
761 if (mbInitializedLangID)
763 if (mnLangID == LANGUAGE_DONTKNOW)
765 // Heavy usage of LANGUAGE_DONTKNOW, make it an own Impl for all the
766 // conversion attempts. At the same time provide a central breakpoint
767 // to inspect such places.
768 LanguageTag::ImplPtr& rDontKnow = theDontKnow::get();
769 if (!rDontKnow)
770 rDontKnow.reset( new LanguageTagImpl( *this));
771 pImpl = rDontKnow;
772 #if OSL_DEBUG_LEVEL > 0
773 static size_t nCallsDontKnow = 0;
774 ++nCallsDontKnow;
775 SAL_INFO( "i18nlangtag", "LanguageTag::registerImpl: " << nCallsDontKnow << " DontKnow calls");
776 #endif
777 return pImpl;
779 else
781 // A great share are calls for a system equal locale.
782 pImpl = theSystemLocale::get();
783 if (pImpl && pImpl->mnLangID == mnLangID)
785 #if OSL_DEBUG_LEVEL > 0
786 static size_t nCallsSystemEqual = 0;
787 ++nCallsSystemEqual;
788 SAL_INFO( "i18nlangtag", "LanguageTag::registerImpl: " << nCallsSystemEqual
789 << " system equal LangID calls");
790 #endif
791 return pImpl;
796 // Force Bcp47 if not LangID.
797 if (!mbInitializedLangID && !mbInitializedBcp47)
799 // The one central point to set mbInitializedLocale=true if a
800 // LanguageTag was initialized with a Locale. We will now convert and
801 // possibly later resolve it.
802 if (!mbInitializedLocale && (mbSystemLocale || !maLocale.Language.isEmpty()))
803 mbInitializedLocale = true;
804 SAL_WARN_IF( !mbInitializedLocale, "i18nlangtag", "LanguageTag::registerImpl: still not mbInitializedLocale");
806 maBcp47 = LanguageTagImpl::convertToBcp47( maLocale);
807 mbInitializedBcp47 = !maBcp47.isEmpty();
810 if (mbInitializedBcp47)
812 // A great share are calls for a system equal locale.
813 pImpl = theSystemLocale::get();
814 if (pImpl && pImpl->maBcp47 == maBcp47)
816 #if OSL_DEBUG_LEVEL > 0
817 static size_t nCallsSystemEqual = 0;
818 ++nCallsSystemEqual;
819 SAL_INFO( "i18nlangtag", "LanguageTag::registerImpl: " << nCallsSystemEqual << " system equal BCP47 calls");
820 #endif
821 return pImpl;
825 #if OSL_DEBUG_LEVEL > 0
826 static size_t nCallsNonSystem = 0;
827 ++nCallsNonSystem;
828 SAL_INFO( "i18nlangtag", "LanguageTag::registerImpl: " << nCallsNonSystem << " non-system calls");
829 #endif
831 osl::MutexGuard aGuard( theMutex::get());
833 #if OSL_DEBUG_LEVEL > 0
834 static long nRunning = 0;
835 // Entering twice here is ok, which is needed for fallback init in
836 // getKnowns() in canonicalize() via pImpl->convertBcp47ToLocale() below,
837 // everything else is suspicious.
838 SAL_WARN_IF( nRunning > 1, "i18nlangtag", "LanguageTag::registerImpl: re-entered for '"
839 << maBcp47 << "' 0x" << ::std::hex << mnLangID );
840 struct Runner { Runner() { ++nRunning; } ~Runner() { --nRunning; } } aRunner;
841 #endif
843 // Prefer LangID map as find+insert needs less comparison work.
844 if (mbInitializedLangID)
846 MapLangID& rMap = theMapLangID::get();
847 MapLangID::const_iterator it( rMap.find( mnLangID));
848 if (it != rMap.end())
850 SAL_INFO( "i18nlangtag", "LanguageTag::registerImpl: found impl for 0x" << ::std::hex << mnLangID);
851 pImpl = (*it).second;
853 else
855 SAL_INFO( "i18nlangtag", "LanguageTag::registerImpl: new impl for 0x" << ::std::hex << mnLangID);
856 pImpl.reset( new LanguageTagImpl( *this));
857 rMap.insert( ::std::make_pair( mnLangID, pImpl));
858 // Try round-trip.
859 if (!pImpl->mbInitializedLocale)
860 pImpl->convertLangToLocale();
861 LanguageType nLang = MsLangId::Conversion::convertLocaleToLanguage( pImpl->maLocale);
862 // If round-trip is identical cross-insert to Bcp47 map.
863 if (nLang == pImpl->mnLangID)
865 if (!pImpl->mbInitializedBcp47)
866 pImpl->convertLocaleToBcp47();
867 ::std::pair< MapBcp47::const_iterator, bool > res(
868 theMapBcp47::get().insert( ::std::make_pair( pImpl->maBcp47, pImpl)));
869 if (res.second)
871 SAL_INFO( "i18nlangtag", "LanguageTag::registerImpl: cross-inserted '" << pImpl->maBcp47 << "' for 0x" << ::std::hex << mnLangID);
873 else
875 SAL_INFO( "i18nlangtag", "LanguageTag::registerImpl: not cross-inserted '" << pImpl->maBcp47 << "' for 0x" << ::std::hex << mnLangID << " have 0x"
876 << ::std::hex << (*res.first).second->mnLangID);
879 else
881 if (!pImpl->mbInitializedBcp47)
882 pImpl->convertLocaleToBcp47();
883 SAL_INFO( "i18nlangtag", "LanguageTag::registerImpl: not cross-inserted '" << pImpl->maBcp47 << "' for 0x" << ::std::hex << mnLangID << " round-trip to 0x" << ::std::hex << nLang);
887 else if (!maBcp47.isEmpty())
889 MapBcp47& rMap = theMapBcp47::get();
890 MapBcp47::const_iterator it( rMap.find( maBcp47));
891 if (it != rMap.end())
893 SAL_INFO( "i18nlangtag", "LanguageTag::registerImpl: found impl for '" << maBcp47 << "'");
894 pImpl = (*it).second;
896 else
898 SAL_INFO( "i18nlangtag", "LanguageTag::registerImpl: new impl for '" << maBcp47 << "'");
899 pImpl.reset( new LanguageTagImpl( *this));
900 ::std::pair< MapBcp47::iterator, bool > insOrig( rMap.insert( ::std::make_pair( maBcp47, pImpl)));
901 // If changed after canonicalize() also add the resulting tag to
902 // the map.
903 if (pImpl->synCanonicalize())
905 SAL_INFO( "i18nlangtag", "LanguageTag::registerImpl: canonicalized to '" << pImpl->maBcp47 << "'");
906 ::std::pair< MapBcp47::const_iterator, bool > insCanon(
907 rMap.insert( ::std::make_pair( pImpl->maBcp47, pImpl)));
908 SAL_INFO( "i18nlangtag", "LanguageTag::registerImpl: " << (insCanon.second ? "" : "not ")
909 << "inserted '" << pImpl->maBcp47 << "'");
910 // If the canonicalized tag already existed (was not inserted)
911 // and impls are different, make this impl that impl and skip
912 // the rest if that LangID is present as well. The existing
913 // entry may or may not be different, it may even be strictly
914 // identical to this if it differs only in case (e.g. ko-kr =>
915 // ko-KR) which was corrected in canonicalize() hence also in
916 // the map entry but comparison is case insensitive and found
917 // it again.
918 if (!insCanon.second && (*insCanon.first).second != pImpl)
920 (*insOrig.first).second = pImpl = (*insCanon.first).second;
921 SAL_INFO( "i18nlangtag", "LanguageTag::registerImpl: share impl with 0x"
922 << ::std::hex << pImpl->mnLangID);
925 if (!pImpl->mbInitializedLangID)
927 // Try round-trip Bcp47->Locale->LangID->Locale->Bcp47.
928 if (!pImpl->mbInitializedLocale)
929 pImpl->convertBcp47ToLocale();
930 if (!pImpl->mbInitializedLangID)
931 pImpl->convertLocaleToLang( true);
932 // Unconditionally insert (round-trip is possible) for
933 // on-the-fly IDs and (generated or not) suggested IDs.
934 bool bInsert = lcl_isKnownOnTheFlyID( pImpl->mnLangID);
935 OUString aBcp47;
936 if (!bInsert)
938 if (pImpl->mnLangID != LANGUAGE_DONTKNOW)
940 // May have involved canonicalize(), so compare with
941 // pImpl->maBcp47 instead of maBcp47!
942 aBcp47 = LanguageTagImpl::convertToBcp47(
943 MsLangId::Conversion::convertLanguageToLocale( pImpl->mnLangID ));
944 bInsert = (aBcp47 == pImpl->maBcp47);
947 // If round-trip is identical cross-insert to Bcp47 map.
948 if (bInsert)
950 ::std::pair< MapLangID::const_iterator, bool > res(
951 theMapLangID::get().insert( ::std::make_pair( pImpl->mnLangID, pImpl)));
952 if (res.second)
954 SAL_INFO( "i18nlangtag", "LanguageTag::registerImpl: cross-inserted 0x"
955 << ::std::hex << pImpl->mnLangID << " for '" << maBcp47 << "'");
957 else
959 SAL_INFO( "i18nlangtag", "LanguageTag::registerImpl: not cross-inserted 0x"
960 << ::std::hex << pImpl->mnLangID << " for '" << maBcp47 << "' have '"
961 << (*res.first).second->maBcp47 << "'");
964 else
966 SAL_INFO( "i18nlangtag", "LanguageTag::registerImpl: not cross-inserted 0x"
967 << ::std::hex << pImpl->mnLangID << " for '" << maBcp47 << "' round-trip to '"
968 << aBcp47 << "'");
973 else
975 SAL_WARN( "i18nlangtag", "LanguageTag::registerImpl: can't register for 0x" << ::std::hex << mnLangID );
976 pImpl.reset( new LanguageTagImpl( *this));
979 // If we reach here for mbSystemLocale we didn't have theSystemLocale
980 // above, so add it.
981 if (mbSystemLocale && mbInitializedLangID)
983 theSystemLocale::get() = pImpl;
984 SAL_INFO( "i18nlangtag", "LanguageTag::registerImpl: added system locale 0x"
985 << ::std::hex << pImpl->mnLangID << " '" << pImpl->maBcp47 << "'");
988 return pImpl;
992 LanguageTagImpl const * LanguageTag::getImpl() const
994 if (!mpImpl)
996 mpImpl = registerImpl();
997 syncVarsFromRawImpl();
999 return mpImpl.get();
1002 LanguageTagImpl * LanguageTag::getImpl()
1004 if (!mpImpl)
1006 mpImpl = registerImpl();
1007 syncVarsFromRawImpl();
1009 return mpImpl.get();
1012 void LanguageTag::resetVars()
1014 mpImpl.reset();
1015 maLocale = lang::Locale();
1016 maBcp47.clear();
1017 mnLangID = LANGUAGE_SYSTEM;
1018 mbSystemLocale = true;
1019 mbInitializedBcp47 = false;
1020 mbInitializedLocale = false;
1021 mbInitializedLangID = false;
1022 mbIsFallback = false;
1026 LanguageTag & LanguageTag::reset( const OUString & rBcp47LanguageTag )
1028 resetVars();
1029 maBcp47 = rBcp47LanguageTag;
1030 mbSystemLocale = rBcp47LanguageTag.isEmpty();
1031 mbInitializedBcp47 = !mbSystemLocale;
1033 return *this;
1037 LanguageTag & LanguageTag::reset( const css::lang::Locale & rLocale )
1039 resetVars();
1040 maLocale = rLocale;
1041 mbSystemLocale = rLocale.Language.isEmpty();
1042 mbInitializedLocale = !mbSystemLocale;
1043 handleVendorVariant( maLocale);
1044 return *this;
1048 LanguageTag & LanguageTag::reset( LanguageType nLanguage )
1050 resetVars();
1051 mnLangID = nLanguage;
1052 mbSystemLocale = nLanguage == LANGUAGE_SYSTEM;
1053 mbInitializedLangID = !mbSystemLocale;
1054 return *this;
1058 bool LanguageTagImpl::canonicalize()
1060 #ifdef erDEBUG
1061 // dump once
1062 struct dumper
1064 lt_tag_t** mpp;
1065 explicit dumper( lt_tag_t** pp ) : mpp( *pp ? NULL : pp) {}
1066 ~dumper() { if (mpp && *mpp) lt_tag_dump( *mpp); }
1068 dumper aDumper( &mpImplLangtag);
1069 #endif
1071 bool bChanged = false;
1073 // Side effect: have maBcp47 in any case, resolved system.
1074 // Some methods calling canonicalize() (or not calling it due to
1075 // meIsLiblangtagNeeded==DECISION_NO) rely on this! Hence do not set
1076 // meIsLiblangtagNeeded anywhere else than hereafter.
1077 getBcp47();
1079 // The simple cases and known locales don't need liblangtag processing,
1080 // which also avoids loading liblangtag data on startup.
1081 if (meIsLiblangtagNeeded == DECISION_DONTKNOW)
1083 bool bTemporaryLocale = false;
1084 bool bTemporaryLangID = false;
1085 if (!mbInitializedLocale && !mbInitializedLangID)
1087 if (mbSystemLocale)
1089 mnLangID = MsLangId::getRealLanguage( LANGUAGE_SYSTEM);
1090 mbInitializedLangID = true;
1092 else
1094 // Now this is getting funny... we only have some BCP47 string
1095 // and want to determine if parsing it would be possible
1096 // without using liblangtag just to see if it is a simple known
1097 // locale or could fall back to one.
1098 OUString aLanguage, aScript, aCountry, aVariants;
1099 Extraction eExt = simpleExtract( maBcp47, aLanguage, aScript, aCountry, aVariants);
1100 if (eExt != EXTRACTED_NONE)
1102 if (eExt == EXTRACTED_LSC || eExt == EXTRACTED_LV)
1104 // Rebuild bcp47 with proper casing of tags.
1105 OUStringBuffer aBuf( aLanguage.getLength() + 1 + aScript.getLength() +
1106 1 + aCountry.getLength() + 1 + aVariants.getLength());
1107 aBuf.append( aLanguage);
1108 if (!aScript.isEmpty())
1109 aBuf.append("-").append(aScript);
1110 if (!aCountry.isEmpty())
1111 aBuf.append("-").append(aCountry);
1112 if (!aVariants.isEmpty())
1113 aBuf.append("-").append(aVariants);
1114 OUString aStr( aBuf.makeStringAndClear());
1116 if (maBcp47 != aStr)
1118 maBcp47 = aStr;
1119 bChanged = true;
1122 if (eExt == EXTRACTED_LSC && aScript.isEmpty())
1124 maLocale.Language = aLanguage;
1125 maLocale.Country = aCountry;
1127 else if (eExt == EXTRACTED_C_LOCALE)
1129 maLocale.Language = aLanguage;
1130 maLocale.Country = aCountry;
1132 else
1134 maLocale.Language = I18NLANGTAG_QLT;
1135 maLocale.Country = aCountry;
1136 maLocale.Variant = maBcp47;
1138 bTemporaryLocale = mbInitializedLocale = true;
1142 if (mbInitializedLangID && !mbInitializedLocale)
1144 // Do not call getLocale() here because that prefers
1145 // convertBcp47ToLocale() which would end up in recursion via
1146 // isIsoLocale()!
1148 // Prepare to verify that we have a known locale, not just an
1149 // arbitrary MS-LangID.
1150 convertLangToLocale();
1152 if (mbInitializedLocale)
1154 if (!mbInitializedLangID)
1156 if (convertLocaleToLang( false))
1157 bChanged = true;
1158 if (bTemporaryLocale || mnLangID == LANGUAGE_DONTKNOW)
1159 bTemporaryLangID = true;
1161 if (mnLangID != LANGUAGE_DONTKNOW && mnLangID != LANGUAGE_SYSTEM)
1162 meIsLiblangtagNeeded = DECISION_NO; // known locale
1163 else
1165 const KnownTagSet& rKnowns = getKnowns();
1166 if (rKnowns.find( maBcp47) != rKnowns.end())
1167 meIsLiblangtagNeeded = DECISION_NO; // known fallback
1169 // We may have an internal override "canonicalization".
1170 lang::Locale aNew( MsLangId::Conversion::getOverride( maLocale));
1171 if (!aNew.Language.isEmpty() &&
1172 (aNew.Language != maLocale.Language ||
1173 aNew.Country != maLocale.Country ||
1174 aNew.Variant != maLocale.Variant))
1176 maBcp47 = LanguageTagImpl::convertToBcp47( aNew);
1177 bChanged = true;
1178 meIsIsoLocale = DECISION_DONTKNOW;
1179 meIsIsoODF = DECISION_DONTKNOW;
1180 meIsLiblangtagNeeded = DECISION_NO; // known locale
1183 if (bTemporaryLocale)
1185 mbInitializedLocale = false;
1186 maLocale = lang::Locale();
1188 if (bTemporaryLangID)
1190 mbInitializedLangID = false;
1191 mnLangID = LANGUAGE_DONTKNOW;
1194 if (meIsLiblangtagNeeded == DECISION_NO)
1196 meIsValid = DECISION_YES; // really, known must be valid ...
1197 return bChanged; // that's it
1200 meIsLiblangtagNeeded = DECISION_YES;
1201 SAL_INFO( "i18nlangtag", "LanguageTagImpl::canonicalize: using liblangtag for '" << maBcp47 << "'");
1203 if (!mpImplLangtag)
1205 theDataRef::get().init();
1206 mpImplLangtag = lt_tag_new();
1209 myLtError aError;
1211 if (!lt_tag_parse_disabled && lt_tag_parse(mpImplLangtag, OUStringToOString(maBcp47, RTL_TEXTENCODING_UTF8).getStr(), &aError.p))
1213 char* pTag = lt_tag_canonicalize( mpImplLangtag, &aError.p);
1214 SAL_WARN_IF( !pTag, "i18nlangtag", "LanguageTagImpl::canonicalize: could not canonicalize '" << maBcp47 << "'");
1215 if (pTag)
1217 OUString aNew( OUString::createFromAscii( pTag));
1218 // Make the lt_tag_t follow the new string if different, which
1219 // removes default script and such.
1220 if (maBcp47 != aNew)
1222 maBcp47 = aNew;
1223 bChanged = true;
1224 meIsIsoLocale = DECISION_DONTKNOW;
1225 meIsIsoODF = DECISION_DONTKNOW;
1226 if (!lt_tag_parse( mpImplLangtag, pTag, &aError.p))
1228 SAL_WARN( "i18nlangtag", "LanguageTagImpl::canonicalize: could not reparse '" << maBcp47 << "'");
1229 free( pTag);
1230 meIsValid = DECISION_NO;
1231 return bChanged;
1234 free( pTag);
1235 meIsValid = DECISION_YES;
1236 return bChanged;
1239 else
1241 SAL_INFO( "i18nlangtag", "LanguageTagImpl::canonicalize: could not parse '" << maBcp47 << "'");
1243 meIsValid = DECISION_NO;
1244 return bChanged;
1248 bool LanguageTagImpl::synCanonicalize()
1250 bool bChanged = false;
1251 if (meIsLiblangtagNeeded != DECISION_NO && !mpImplLangtag)
1253 bChanged = canonicalize();
1254 if (bChanged)
1256 if (mbInitializedLocale)
1257 convertBcp47ToLocale();
1258 if (mbInitializedLangID)
1259 convertBcp47ToLang();
1262 return bChanged;
1266 void LanguageTag::syncFromImpl()
1268 LanguageTagImpl* pImpl = getImpl();
1269 bool bRegister = ((mbInitializedBcp47 && maBcp47 != pImpl->maBcp47) ||
1270 (mbInitializedLangID && mnLangID != pImpl->mnLangID));
1271 SAL_INFO_IF( bRegister, "i18nlangtag",
1272 "LanguageTag::syncFromImpl: re-registering, '" << pImpl->maBcp47 << "' vs '" << maBcp47 <<
1273 " and 0x" << ::std::hex << pImpl->mnLangID << " vs 0x" << ::std::hex << mnLangID);
1274 syncVarsFromRawImpl();
1275 if (bRegister)
1276 mpImpl = registerImpl();
1280 void LanguageTag::syncVarsFromImpl() const
1282 if (!mpImpl)
1283 getImpl(); // with side effect syncVarsFromRawImpl()
1284 else
1285 syncVarsFromRawImpl();
1289 void LanguageTag::syncVarsFromRawImpl() const
1291 // Do not use getImpl() here.
1292 LanguageTagImpl* pImpl = mpImpl.get();
1293 if (!pImpl)
1294 return;
1296 // Obviously only mutable variables.
1297 mbInitializedBcp47 = pImpl->mbInitializedBcp47;
1298 maBcp47 = pImpl->maBcp47;
1299 mbInitializedLocale = pImpl->mbInitializedLocale;
1300 maLocale = pImpl->maLocale;
1301 mbInitializedLangID = pImpl->mbInitializedLangID;
1302 mnLangID = pImpl->mnLangID;
1306 bool LanguageTag::synCanonicalize()
1308 bool bChanged = getImpl()->synCanonicalize();
1309 if (bChanged)
1310 syncFromImpl();
1311 return bChanged;
1315 void LanguageTagImpl::convertLocaleToBcp47()
1317 if (mbSystemLocale && !mbInitializedLocale)
1318 convertLangToLocale();
1320 if (maLocale.Language.isEmpty())
1322 // Do not call LanguageTag::convertToBcp47(Locale) that for an empty
1323 // locale via LanguageTag::convertToBcp47(LanguageType) and
1324 // LanguageTag::convertToLocale(LanguageType) would instantiate another
1325 // LanguageTag.
1326 maLocale = MsLangId::Conversion::convertLanguageToLocale( LANGUAGE_SYSTEM );
1328 if (maLocale.Language.isEmpty())
1330 maBcp47.clear(); // bad luck
1332 else if (maLocale.Language == I18NLANGTAG_QLT)
1334 maBcp47 = maLocale.Variant;
1335 meIsIsoLocale = DECISION_NO;
1337 else
1339 maBcp47 = LanguageTag::convertToBcp47( maLocale );
1341 mbInitializedBcp47 = true;
1345 bool LanguageTagImpl::convertLocaleToLang( bool bAllowOnTheFlyID )
1347 bool bRemapped = false;
1348 if (mbSystemLocale)
1350 mnLangID = MsLangId::getRealLanguage( LANGUAGE_SYSTEM);
1352 else
1354 mnLangID = MsLangId::Conversion::convertLocaleToLanguage( maLocale);
1355 if (mnLangID == LANGUAGE_DONTKNOW)
1357 // convertLocaleToLanguage() only searches in ISO and private
1358 // definitions, search in remaining definitions, i.e. for the "C"
1359 // locale and non-standard things like "sr-latin" or "german" to
1360 // resolve to a known locale, skipping ISO lll-CC that were already
1361 // searched.
1362 mnLangID = MsLangId::Conversion::convertIsoNamesToLanguage( maLocale.Language, maLocale.Country, true);
1363 if (mnLangID != LANGUAGE_DONTKNOW)
1365 // If one found, convert back and adapt Locale and Bcp47
1366 // strings so we have a matching entry.
1367 OUString aOrgBcp47( maBcp47);
1368 convertLangToLocale();
1369 convertLocaleToBcp47();
1370 bRemapped = (maBcp47 != aOrgBcp47);
1373 if (mnLangID == LANGUAGE_DONTKNOW && bAllowOnTheFlyID)
1375 if (isValidBcp47())
1377 // For language-only (including script) look if we know some
1378 // locale of that language and if so try to use the primary
1379 // language ID of that instead of generating an on-the-fly ID.
1380 if (getCountry().isEmpty() && isIsoODF())
1382 lang::Locale aLoc( MsLangId::Conversion::lookupFallbackLocale( maLocale));
1383 // 'en-US' is last resort, do not use except when looking
1384 // for 'en'.
1385 if (aLoc.Language != "en" || getLanguage() == "en")
1387 mnLangID = MsLangId::Conversion::convertLocaleToLanguage( aLoc);
1388 if (mnLangID != LANGUAGE_DONTKNOW)
1389 mnLangID = MsLangId::getPrimaryLanguage( mnLangID);
1392 registerOnTheFly( mnLangID);
1394 else
1396 SAL_WARN( "i18nlangtag", "LanguageTagImpl::convertLocaleToLang: with bAllowOnTheFlyID invalid '"
1397 << maBcp47 << "'");
1401 mbInitializedLangID = true;
1402 return bRemapped;
1406 void LanguageTag::convertLocaleToLang()
1408 getImpl()->convertLocaleToLang( true);
1409 syncFromImpl();
1413 void LanguageTagImpl::convertBcp47ToLocale()
1415 bool bIso = isIsoLocale();
1416 if (bIso)
1418 maLocale.Language = getLanguageFromLangtag();
1419 maLocale.Country = getRegionFromLangtag();
1420 maLocale.Variant.clear();
1422 else
1424 maLocale.Language = I18NLANGTAG_QLT;
1425 maLocale.Country = getCountry();
1426 maLocale.Variant = maBcp47;
1428 mbInitializedLocale = true;
1432 void LanguageTag::convertBcp47ToLocale()
1434 getImpl()->convertBcp47ToLocale();
1435 syncFromImpl();
1439 void LanguageTagImpl::convertBcp47ToLang()
1441 if (mbSystemLocale)
1443 mnLangID = MsLangId::getRealLanguage( LANGUAGE_SYSTEM);
1445 else
1447 if (!mbInitializedLocale)
1448 convertBcp47ToLocale();
1449 convertLocaleToLang( true);
1451 mbInitializedLangID = true;
1455 void LanguageTag::convertBcp47ToLang()
1457 getImpl()->convertBcp47ToLang();
1458 syncFromImpl();
1462 void LanguageTagImpl::convertLangToLocale()
1464 if (mbSystemLocale && !mbInitializedLangID)
1466 mnLangID = MsLangId::getRealLanguage( LANGUAGE_SYSTEM);
1467 mbInitializedLangID = true;
1469 // Resolve system here! The original is remembered as mbSystemLocale.
1470 maLocale = MsLangId::Conversion::convertLanguageToLocale( mnLangID );
1471 mbInitializedLocale = true;
1475 void LanguageTag::convertLangToLocale()
1477 getImpl()->convertLangToLocale();
1478 syncFromImpl();
1482 void LanguageTagImpl::convertLangToBcp47()
1484 if (!mbInitializedLocale)
1485 convertLangToLocale();
1486 convertLocaleToBcp47();
1487 mbInitializedBcp47 = true;
1491 void LanguageTag::convertFromRtlLocale()
1493 // The rtl_Locale follows the Open Group Base Specification,
1494 // 8.2 Internationalization Variables
1495 // language[_territory][.codeset][@modifier]
1496 // On GNU/Linux systems usually being glibc locales.
1497 // sal/osl/unx/nlsupport.c _parse_locale() parses them into
1498 // Language: language 2 or 3 alpha code
1499 // Country: [territory] 2 alpha code
1500 // Variant: [.codeset][@modifier]
1501 // Variant effectively contains anything that follows the territory, not
1502 // looking for '.' dot delimiter or '@' modifier content.
1503 if (!maLocale.Variant.isEmpty())
1505 OString aStr = OUStringToOString( maLocale.Language + "_" + maLocale.Country + maLocale.Variant,
1506 RTL_TEXTENCODING_UTF8);
1507 /* FIXME: let liblangtag parse this entirely with
1508 * lt_tag_convert_from_locale() but that needs a patch to pass the
1509 * string. */
1510 #if 0
1511 myLtError aError;
1512 theDataRef::get().init();
1513 mpImplLangtag = lt_tag_convert_from_locale( aStr.getStr(), &aError.p);
1514 maBcp47 = OStringToOUString( lt_tag_get_string( mpImplLangtag), RTL_TEXTENCODING_UTF8);
1515 mbInitializedBcp47 = true;
1516 #else
1517 mnLangID = MsLangId::convertUnxByteStringToLanguage( aStr);
1518 if (mnLangID == LANGUAGE_DONTKNOW)
1520 SAL_WARN( "i18nlangtag", "LanguageTag(rtl_Locale) - unknown: " << aStr);
1521 mnLangID = LANGUAGE_ENGLISH_US; // we need _something_ here
1523 mbInitializedLangID = true;
1524 #endif
1525 maLocale = lang::Locale();
1526 mbInitializedLocale = false;
1531 const OUString & LanguageTagImpl::getBcp47() const
1533 if (!mbInitializedBcp47)
1535 if (mbInitializedLocale)
1536 const_cast<LanguageTagImpl*>(this)->convertLocaleToBcp47();
1537 else
1538 const_cast<LanguageTagImpl*>(this)->convertLangToBcp47();
1540 return maBcp47;
1544 const OUString & LanguageTag::getBcp47( bool bResolveSystem ) const
1546 if (!bResolveSystem && mbSystemLocale)
1547 return theEmptyBcp47::get();
1548 if (!mbInitializedBcp47)
1549 syncVarsFromImpl();
1550 if (!mbInitializedBcp47)
1552 getImpl()->getBcp47();
1553 const_cast<LanguageTag*>(this)->syncFromImpl();
1555 return maBcp47;
1559 OUString LanguageTagImpl::getLanguageFromLangtag()
1561 OUString aLanguage;
1562 synCanonicalize();
1563 if (maBcp47.isEmpty())
1564 return aLanguage;
1565 if (mpImplLangtag)
1567 const lt_lang_t* pLangT = lt_tag_get_language( mpImplLangtag);
1568 SAL_WARN_IF( !pLangT, "i18nlangtag",
1569 "LanguageTag::getLanguageFromLangtag: pLangT==NULL for '" << maBcp47 << "'");
1570 if (!pLangT)
1571 return aLanguage;
1572 const char* pLang = lt_lang_get_tag( pLangT);
1573 SAL_WARN_IF( !pLang, "i18nlangtag",
1574 "LanguageTag::getLanguageFromLangtag: pLang==NULL for '" << maBcp47 << "'");
1575 if (pLang)
1576 aLanguage = OUString::createFromAscii( pLang);
1578 else
1580 if (mbCachedLanguage || cacheSimpleLSCV())
1581 aLanguage = maCachedLanguage;
1583 return aLanguage;
1587 OUString LanguageTagImpl::getScriptFromLangtag()
1589 OUString aScript;
1590 synCanonicalize();
1591 if (maBcp47.isEmpty())
1592 return aScript;
1593 if (mpImplLangtag)
1595 const lt_script_t* pScriptT = lt_tag_get_script( mpImplLangtag);
1596 // pScriptT==NULL is valid for default scripts
1597 if (!pScriptT)
1598 return aScript;
1599 const char* pScript = lt_script_get_tag( pScriptT);
1600 SAL_WARN_IF( !pScript, "i18nlangtag", "LanguageTag::getScriptFromLangtag: pScript==NULL");
1601 if (pScript)
1602 aScript = OUString::createFromAscii( pScript);
1604 else
1606 if (mbCachedScript || cacheSimpleLSCV())
1607 aScript = maCachedScript;
1609 return aScript;
1613 OUString LanguageTagImpl::getRegionFromLangtag()
1615 OUString aRegion;
1616 synCanonicalize();
1617 if (maBcp47.isEmpty())
1618 return aRegion;
1619 if (mpImplLangtag)
1621 const lt_region_t* pRegionT = lt_tag_get_region( mpImplLangtag);
1622 // pRegionT==NULL is valid for language only tags, rough check here
1623 // that does not take sophisticated tags into account that actually
1624 // should have a region, check for ll, lll, ll-Ssss and lll-Ssss so
1625 // that ll-CC and lll-CC actually fail.
1626 SAL_WARN_IF( !pRegionT &&
1627 maBcp47.getLength() != 2 && maBcp47.getLength() != 3 &&
1628 maBcp47.getLength() != 7 && maBcp47.getLength() != 8,
1629 "i18nlangtag", "LanguageTag::getRegionFromLangtag: pRegionT==NULL for '" << maBcp47 << "'");
1630 if (!pRegionT)
1631 return aRegion;
1632 const char* pRegion = lt_region_get_tag( pRegionT);
1633 SAL_WARN_IF( !pRegion, "i18nlangtag",
1634 "LanguageTag::getRegionFromLangtag: pRegion==NULL for'" << maBcp47 << "'");
1635 if (pRegion)
1636 aRegion = OUString::createFromAscii( pRegion);
1638 else
1640 if (mbCachedCountry || cacheSimpleLSCV())
1641 aRegion = maCachedCountry;
1643 return aRegion;
1647 OUString LanguageTagImpl::getVariantsFromLangtag()
1649 OUStringBuffer aVariants;
1650 synCanonicalize();
1651 if (maBcp47.isEmpty())
1652 return OUString();
1653 if (mpImplLangtag)
1655 const lt_list_t* pVariantsT = lt_tag_get_variants( mpImplLangtag);
1656 for (const lt_list_t* pE = pVariantsT; pE; pE = lt_list_next( pE))
1658 const lt_variant_t* pVariantT = static_cast<const lt_variant_t*>(lt_list_value( pE));
1659 if (pVariantT)
1661 const char* p = lt_variant_get_tag( pVariantT);
1662 if (p)
1664 if (!aVariants.isEmpty())
1665 aVariants.append("-");
1666 aVariants.appendAscii(p);
1671 else
1673 if (mbCachedVariants || cacheSimpleLSCV())
1674 aVariants = maCachedVariants;
1676 return aVariants.makeStringAndClear();
1680 const css::lang::Locale & LanguageTag::getLocale( bool bResolveSystem ) const
1682 if (!bResolveSystem && mbSystemLocale)
1683 return theEmptyLocale::get();
1684 if (!mbInitializedLocale)
1685 syncVarsFromImpl();
1686 if (!mbInitializedLocale)
1688 if (mbInitializedBcp47)
1689 const_cast<LanguageTag*>(this)->convertBcp47ToLocale();
1690 else
1691 const_cast<LanguageTag*>(this)->convertLangToLocale();
1693 return maLocale;
1697 LanguageType LanguageTag::getLanguageType( bool bResolveSystem ) const
1699 if (!bResolveSystem && mbSystemLocale)
1700 return LANGUAGE_SYSTEM;
1701 if (!mbInitializedLangID)
1702 syncVarsFromImpl();
1703 if (!mbInitializedLangID)
1705 if (mbInitializedBcp47)
1706 const_cast<LanguageTag*>(this)->convertBcp47ToLang();
1707 else
1709 const_cast<LanguageTag*>(this)->convertLocaleToLang();
1711 /* Resolve a locale only unknown due to some redundant information,
1712 * like 'de-Latn-DE' with script tag. Never call canonicalize()
1713 * from within convert...() methods due to possible recursion, so
1714 * do it here. */
1715 if ((!mbSystemLocale && mnLangID == LANGUAGE_SYSTEM) || mnLangID == LANGUAGE_DONTKNOW)
1716 const_cast<LanguageTag*>(this)->synCanonicalize();
1719 return mnLangID;
1723 void LanguageTag::getIsoLanguageScriptCountry( OUString& rLanguage, OUString& rScript, OUString& rCountry ) const
1725 // Calling isIsoODF() first is a predicate for getLanguage(), getScript()
1726 // and getCountry() to work correctly in this context.
1727 if (isIsoODF())
1729 rLanguage = getLanguage();
1730 rScript = getScript();
1731 rCountry = getCountry();
1733 else
1735 rLanguage = (LanguageTag::isIsoLanguage( getLanguage()) ? getLanguage() : OUString());
1736 rScript = (LanguageTag::isIsoScript( getScript()) ? getScript() : OUString());
1737 rCountry = (LanguageTag::isIsoCountry( getCountry()) ? getCountry() : OUString());
1742 namespace
1745 bool isLowerAscii( sal_Unicode c )
1747 return 'a' <= c && c <= 'z';
1750 bool isUpperAscii( sal_Unicode c )
1752 return 'A' <= c && c <= 'Z';
1758 // static
1759 bool LanguageTag::isIsoLanguage( const OUString& rLanguage )
1761 /* TODO: ignore case? For now let's see where rubbish is used. */
1762 bool b2chars = rLanguage.getLength() == 2;
1763 if ((b2chars || rLanguage.getLength() == 3) &&
1764 isLowerAscii( rLanguage[0]) && isLowerAscii( rLanguage[1]) &&
1765 (b2chars || isLowerAscii( rLanguage[2])))
1766 return true;
1767 SAL_WARN_IF( ((rLanguage.getLength() == 2 || rLanguage.getLength() == 3) &&
1768 (isUpperAscii( rLanguage[0]) || isUpperAscii( rLanguage[1]))) ||
1769 (rLanguage.getLength() == 3 && isUpperAscii( rLanguage[2])), "i18nlangtag",
1770 "LanguageTag::isIsoLanguage: rejecting upper case " << rLanguage);
1771 return false;
1775 // static
1776 bool LanguageTag::isIsoCountry( const OUString& rRegion )
1778 /* TODO: ignore case? For now let's see where rubbish is used. */
1779 if (rRegion.isEmpty() ||
1780 (rRegion.getLength() == 2 && isUpperAscii( rRegion[0]) && isUpperAscii( rRegion[1])))
1781 return true;
1782 SAL_WARN_IF( rRegion.getLength() == 2 && (isLowerAscii( rRegion[0]) || isLowerAscii( rRegion[1])),
1783 "i18nlangtag", "LanguageTag::isIsoCountry: rejecting lower case " << rRegion);
1784 return false;
1788 // static
1789 bool LanguageTag::isIsoScript( const OUString& rScript )
1791 /* TODO: ignore case? For now let's see where rubbish is used. */
1792 if (rScript.isEmpty() ||
1793 (rScript.getLength() == 4 &&
1794 isUpperAscii( rScript[0]) && isLowerAscii( rScript[1]) &&
1795 isLowerAscii( rScript[2]) && isLowerAscii( rScript[3])))
1796 return true;
1797 SAL_WARN_IF( rScript.getLength() == 4 &&
1798 (isLowerAscii( rScript[0]) || isUpperAscii( rScript[1]) ||
1799 isUpperAscii( rScript[2]) || isUpperAscii( rScript[3])),
1800 "i18nlangtag", "LanguageTag::isIsoScript: rejecting case mismatch " << rScript);
1801 return false;
1805 OUString const & LanguageTagImpl::getLanguage() const
1807 if (!mbCachedLanguage)
1809 maCachedLanguage = const_cast<LanguageTagImpl*>(this)->getLanguageFromLangtag();
1810 mbCachedLanguage = true;
1812 return maCachedLanguage;
1816 OUString LanguageTag::getLanguage() const
1818 LanguageTagImpl const* pImpl = getImpl();
1819 if (pImpl->mbCachedLanguage)
1820 return pImpl->maCachedLanguage;
1821 OUString aRet( pImpl->getLanguage());
1822 const_cast<LanguageTag*>(this)->syncFromImpl();
1823 return aRet;
1827 OUString const & LanguageTagImpl::getScript() const
1829 if (!mbCachedScript)
1831 maCachedScript = const_cast<LanguageTagImpl*>(this)->getScriptFromLangtag();
1832 mbCachedScript = true;
1834 return maCachedScript;
1838 OUString LanguageTag::getScript() const
1840 LanguageTagImpl const* pImpl = getImpl();
1841 if (pImpl->mbCachedScript)
1842 return pImpl->maCachedScript;
1843 OUString aRet( pImpl->getScript());
1844 const_cast<LanguageTag*>(this)->syncFromImpl();
1845 return aRet;
1849 OUString LanguageTag::getLanguageAndScript() const
1851 OUString aLanguageScript( getLanguage());
1852 OUString aScript( getScript());
1853 if (!aScript.isEmpty())
1855 aLanguageScript += "-" + aScript;
1857 return aLanguageScript;
1861 OUString const & LanguageTagImpl::getCountry() const
1863 if (!mbCachedCountry)
1865 maCachedCountry = const_cast<LanguageTagImpl*>(this)->getRegionFromLangtag();
1866 if (!LanguageTag::isIsoCountry( maCachedCountry))
1867 maCachedCountry.clear();
1868 mbCachedCountry = true;
1870 return maCachedCountry;
1874 OUString LanguageTag::getCountry() const
1876 LanguageTagImpl const* pImpl = getImpl();
1877 if (pImpl->mbCachedCountry)
1878 return pImpl->maCachedCountry;
1879 OUString aRet( pImpl->getCountry());
1880 const_cast<LanguageTag*>(this)->syncFromImpl();
1881 return aRet;
1885 OUString LanguageTagImpl::getRegion() const
1887 return const_cast<LanguageTagImpl*>(this)->getRegionFromLangtag();
1891 OUString const & LanguageTagImpl::getVariants() const
1893 if (!mbCachedVariants)
1895 maCachedVariants = const_cast<LanguageTagImpl*>(this)->getVariantsFromLangtag();
1896 mbCachedVariants = true;
1898 return maCachedVariants;
1902 OUString LanguageTag::getVariants() const
1904 LanguageTagImpl const * pImpl = getImpl();
1905 if (pImpl->mbCachedVariants)
1906 return pImpl->maCachedVariants;
1907 OUString aRet( pImpl->getVariants());
1908 const_cast<LanguageTag*>(this)->syncFromImpl();
1909 return aRet;
1912 OUString LanguageTagImpl::getGlibcLocaleString() const
1914 if (mbCachedGlibcString)
1915 return maCachedGlibcString;
1917 if (!mpImplLangtag)
1919 meIsLiblangtagNeeded = DECISION_YES;
1920 const_cast<LanguageTagImpl*>(this)->synCanonicalize();
1922 if (mpImplLangtag)
1924 char* pLang = lt_tag_convert_to_locale(mpImplLangtag, nullptr);
1925 if (pLang)
1927 maCachedGlibcString = OUString::createFromAscii( pLang);
1928 mbCachedGlibcString = true;
1929 free(pLang);
1932 return maCachedGlibcString;
1935 OUString LanguageTag::getGlibcLocaleString( const OUString & rEncoding ) const
1937 OUString aRet;
1938 if (isIsoLocale())
1940 OUString aCountry( getCountry());
1941 if (aCountry.isEmpty())
1942 aRet = getLanguage() + rEncoding;
1943 else
1944 aRet = getLanguage() + "_" + aCountry + rEncoding;
1946 else
1948 aRet = getImpl()->getGlibcLocaleString();
1949 sal_Int32 nAt = aRet.indexOf('@');
1950 if (nAt != -1)
1951 aRet = aRet.copy(0, nAt) + rEncoding + aRet.copy(nAt);
1952 else
1953 aRet += rEncoding;
1955 return aRet;
1958 bool LanguageTagImpl::hasScript() const
1960 if (!mbCachedScript)
1961 getScript();
1962 return !maCachedScript.isEmpty();
1966 bool LanguageTag::hasScript() const
1968 bool bRet = getImpl()->hasScript();
1969 const_cast<LanguageTag*>(this)->syncFromImpl();
1970 return bRet;
1974 LanguageTag::ScriptType LanguageTagImpl::getScriptType() const
1976 return meScriptType;
1980 LanguageTag::ScriptType LanguageTag::getScriptType() const
1982 return getImpl()->getScriptType();
1986 void LanguageTagImpl::setScriptType(LanguageTag::ScriptType st)
1988 if (meScriptType == LanguageTag::ScriptType::UNKNOWN) // poor man's clash resolution
1989 meScriptType = st;
1993 void LanguageTag::setScriptType(LanguageTag::ScriptType st)
1995 getImpl()->setScriptType(st);
1999 bool LanguageTagImpl::cacheSimpleLSCV()
2001 OUString aLanguage, aScript, aCountry, aVariants;
2002 Extraction eExt = simpleExtract( maBcp47, aLanguage, aScript, aCountry, aVariants);
2003 bool bRet = (eExt == EXTRACTED_LSC || eExt == EXTRACTED_LV);
2004 if (bRet)
2006 maCachedLanguage = aLanguage;
2007 maCachedScript = aScript;
2008 maCachedCountry = aCountry;
2009 maCachedVariants = aVariants;
2010 mbCachedLanguage = mbCachedScript = mbCachedCountry = mbCachedVariants = true;
2012 return bRet;
2016 bool LanguageTagImpl::isIsoLocale() const
2018 if (meIsIsoLocale == DECISION_DONTKNOW)
2020 const_cast<LanguageTagImpl*>(this)->synCanonicalize();
2021 // It must be at most ll-CC or lll-CC
2022 // Do not use getCountry() here, use getRegion() instead.
2023 meIsIsoLocale = ((maBcp47.isEmpty() ||
2024 (maBcp47.getLength() <= 6 && LanguageTag::isIsoLanguage( getLanguage()) &&
2025 LanguageTag::isIsoCountry( getRegion()))) ? DECISION_YES : DECISION_NO);
2027 return meIsIsoLocale == DECISION_YES;
2031 bool LanguageTag::isIsoLocale() const
2033 bool bRet = getImpl()->isIsoLocale();
2034 const_cast<LanguageTag*>(this)->syncFromImpl();
2035 return bRet;
2039 bool LanguageTagImpl::isIsoODF() const
2041 if (meIsIsoODF == DECISION_DONTKNOW)
2043 const_cast<LanguageTagImpl*>(this)->synCanonicalize();
2044 if (!LanguageTag::isIsoScript( getScript()))
2046 meIsIsoODF = DECISION_NO;
2047 return false;
2049 // The usual case is lll-CC so simply check that first.
2050 if (isIsoLocale())
2052 meIsIsoODF = DECISION_YES;
2053 return true;
2055 // If this is not ISO locale for which script must not exist it can
2056 // still be ISO locale plus ISO script lll-Ssss-CC, but not ll-vvvv ...
2057 // ll-vvvvvvvv
2058 meIsIsoODF = ((maBcp47.getLength() <= 11 && LanguageTag::isIsoLanguage( getLanguage()) &&
2059 LanguageTag::isIsoCountry( getRegion()) && LanguageTag::isIsoScript( getScript()) &&
2060 getVariants().isEmpty()) ? DECISION_YES : DECISION_NO);
2062 return meIsIsoODF == DECISION_YES;
2066 bool LanguageTag::isIsoODF() const
2068 bool bRet = getImpl()->isIsoODF();
2069 const_cast<LanguageTag*>(this)->syncFromImpl();
2070 return bRet;
2074 bool LanguageTagImpl::isValidBcp47() const
2076 if (meIsValid == DECISION_DONTKNOW)
2078 const_cast<LanguageTagImpl*>(this)->synCanonicalize();
2079 SAL_WARN_IF( meIsValid == DECISION_DONTKNOW, "i18nlangtag",
2080 "LanguageTag::isValidBcp47: canonicalize() didn't set meIsValid");
2082 return meIsValid == DECISION_YES;
2086 bool LanguageTag::isValidBcp47() const
2088 bool bRet = getImpl()->isValidBcp47();
2089 const_cast<LanguageTag*>(this)->syncFromImpl();
2090 return bRet;
2094 LanguageTag & LanguageTag::makeFallback()
2096 if (!mbIsFallback)
2098 const lang::Locale& rLocale1 = getLocale();
2099 lang::Locale aLocale2( MsLangId::Conversion::lookupFallbackLocale( rLocale1));
2100 if ( rLocale1.Language != aLocale2.Language ||
2101 rLocale1.Country != aLocale2.Country ||
2102 rLocale1.Variant != aLocale2.Variant)
2104 if (rLocale1.Language != "en" && aLocale2.Language == "en" && aLocale2.Country == "US")
2106 // "en-US" is the last resort fallback, try if we get a better
2107 // one for the fallback hierarchy of a non-"en" locale.
2108 ::std::vector< OUString > aFallbacks( getFallbackStrings( false));
2109 for (auto const& fallback : aFallbacks)
2111 lang::Locale aLocale3( LanguageTag(fallback).getLocale());
2112 aLocale2 = MsLangId::Conversion::lookupFallbackLocale( aLocale3);
2113 if (aLocale2.Language != "en" || aLocale2.Country != "US")
2114 break; // for, success
2117 SAL_INFO( "i18nlangtag", "LanguageTag::makeFallback - for (" <<
2118 rLocale1.Language << "," << rLocale1.Country << "," << rLocale1.Variant << ") to (" <<
2119 aLocale2.Language << "," << aLocale2.Country << "," << aLocale2.Variant << ")");
2120 reset( aLocale2);
2122 mbIsFallback = true;
2124 return *this;
2128 /* TODO: maybe this now could take advantage of the mnOverride field in
2129 * isolang.cxx entries and search for kSAME instead of hardcoded special
2130 * fallbacks. Though iterating through those tables would be slower and even
2131 * then there would be some special cases, but we wouldn't lack entries that
2132 * were missed out. */
2133 ::std::vector< OUString > LanguageTag::getFallbackStrings( bool bIncludeFullBcp47 ) const
2135 ::std::vector< OUString > aVec;
2136 OUString aLanguage( getLanguage());
2137 OUString aCountry( getCountry());
2138 if (isIsoLocale())
2140 if (!aCountry.isEmpty())
2142 if (bIncludeFullBcp47)
2143 aVec.emplace_back(aLanguage + "-" + aCountry);
2144 if (aLanguage == "zh")
2146 // For zh-HK or zh-MO also list zh-TW, for all other zh-XX also
2147 // list zh-CN.
2148 if (aCountry == "HK" || aCountry == "MO")
2149 aVec.emplace_back(aLanguage + "-TW");
2150 else if (aCountry != "CN")
2151 aVec.emplace_back(aLanguage + "-CN");
2152 aVec.push_back( aLanguage);
2154 else if (aLanguage == "sh")
2156 // Manual list instead of calling
2157 // LanguageTag( "sr-Latn-" + aCountry).getFallbackStrings( true)
2158 // that would also include "sh-*" again.
2159 aVec.emplace_back("sr-Latn-" + aCountry);
2160 aVec.emplace_back("sr-Latn");
2161 aVec.emplace_back("sh"); // legacy with script, before default script with country
2162 aVec.emplace_back("sr-" + aCountry);
2163 aVec.emplace_back("sr");
2165 else if (aLanguage == "ca" && aCountry == "XV")
2167 ::std::vector< OUString > aRep( LanguageTag( "ca-ES-valencia").getFallbackStrings( true));
2168 aVec.insert( aVec.end(), aRep.begin(), aRep.end());
2169 // Already includes 'ca' language fallback.
2171 else if (aLanguage == "ku")
2173 if (aCountry == "TR" || aCountry == "SY")
2175 aVec.emplace_back("kmr-Latn-" + aCountry);
2176 aVec.emplace_back("kmr-" + aCountry);
2177 aVec.emplace_back("kmr-Latn");
2178 aVec.emplace_back("kmr");
2179 aVec.push_back( aLanguage);
2181 else if (aCountry == "IQ" || aCountry == "IR")
2183 aVec.emplace_back("ckb-" + aCountry);
2184 aVec.emplace_back("ckb");
2187 else if (aLanguage == "kmr" && (aCountry == "TR" || aCountry == "SY"))
2189 aVec.emplace_back("ku-Latn-" + aCountry);
2190 aVec.emplace_back("ku-" + aCountry);
2191 aVec.push_back( aLanguage);
2192 aVec.emplace_back("ku");
2194 else if (aLanguage == "ckb" && (aCountry == "IQ" || aCountry == "IR"))
2196 aVec.emplace_back("ku-Arab-" + aCountry);
2197 aVec.emplace_back("ku-" + aCountry);
2198 aVec.push_back( aLanguage);
2199 // not 'ku' only, that was used for Latin script
2201 else
2202 aVec.push_back( aLanguage);
2204 else
2206 if (bIncludeFullBcp47)
2207 aVec.push_back( aLanguage);
2208 if (aLanguage == "sh")
2210 aVec.emplace_back("sr-Latn");
2211 aVec.emplace_back("sr");
2213 else if (aLanguage == "pli")
2215 // a special case for Pali dictionary, see fdo#41599
2216 aVec.emplace_back("pi-Latn");
2217 aVec.emplace_back("pi");
2220 return aVec;
2223 getBcp47(); // have maBcp47 now
2224 if (bIncludeFullBcp47)
2225 aVec.push_back( maBcp47);
2227 // Special cases for deprecated tags and their replacements, include both
2228 // in fallbacks in a sensible order.
2229 /* TODO: could such things be generalized and automated with liblangtag? */
2230 if (maBcp47 == "en-GB-oed")
2231 aVec.emplace_back("en-GB-oxendict");
2232 else if (maBcp47 == "en-GB-oxendict")
2233 aVec.emplace_back("en-GB-oed");
2235 OUString aScript;
2236 OUString aVariants( getVariants());
2237 OUString aTmp;
2238 if (hasScript())
2240 aScript = getScript();
2241 bool bHaveLanguageScriptVariant = false;
2242 if (!aCountry.isEmpty())
2244 if (!aVariants.isEmpty())
2246 aTmp = aLanguage + "-" + aScript + "-" + aCountry + "-" + aVariants;
2247 if (aTmp != maBcp47)
2248 aVec.push_back( aTmp);
2249 // Language with variant but without country before language
2250 // without variant but with country.
2251 aTmp = aLanguage + "-" + aScript + "-" + aVariants;
2252 if (aTmp != maBcp47)
2253 aVec.push_back( aTmp);
2254 bHaveLanguageScriptVariant = true;
2256 aTmp = aLanguage + "-" + aScript + "-" + aCountry;
2257 if (aTmp != maBcp47)
2258 aVec.push_back( aTmp);
2259 if (aLanguage == "sr" && aScript == "Latn")
2261 // sr-Latn-CS => sr-Latn-YU, sh-CS, sh-YU
2262 if (aCountry == "CS")
2264 aVec.emplace_back("sr-Latn-YU");
2265 aVec.emplace_back("sh-CS");
2266 aVec.emplace_back("sh-YU");
2268 else
2269 aVec.emplace_back("sh-" + aCountry);
2271 else if (aLanguage == "pi" && aScript == "Latn")
2272 aVec.emplace_back("pli"); // a special case for Pali dictionary, see fdo#41599
2273 else if (aLanguage == "krm" && aScript == "Latn" && (aCountry == "TR" || aCountry == "SY"))
2274 aVec.emplace_back("ku-" + aCountry);
2276 if (!aVariants.isEmpty() && !bHaveLanguageScriptVariant)
2278 aTmp = aLanguage + "-" + aScript + "-" + aVariants;
2279 if (aTmp != maBcp47)
2280 aVec.push_back( aTmp);
2282 aTmp = aLanguage + "-" + aScript;
2283 if (aTmp != maBcp47)
2284 aVec.push_back( aTmp);
2286 // 'sh' actually denoted a script, so have it here instead of appended
2287 // at the end as language-only.
2288 if (aLanguage == "sr" && aScript == "Latn")
2289 aVec.emplace_back("sh");
2290 else if (aLanguage == "ku" && aScript == "Arab")
2291 aVec.emplace_back("ckb");
2292 // 'ku' only denoted Latin script
2293 else if (aLanguage == "krm" && aScript == "Latn" && aCountry.isEmpty())
2294 aVec.emplace_back("ku");
2296 bool bHaveLanguageVariant = false;
2297 if (!aCountry.isEmpty())
2299 if (!aVariants.isEmpty())
2301 aTmp = aLanguage + "-" + aCountry + "-" + aVariants;
2302 if (aTmp != maBcp47)
2303 aVec.push_back( aTmp);
2304 if (maBcp47 == "ca-ES-valencia")
2305 aVec.emplace_back("ca-XV");
2306 // Language with variant but without country before language
2307 // without variant but with country.
2308 // But only if variant is not from a grandfathered tag that
2309 // wouldn't match the rules, i.e. "de-1901" is fine but "en-oed" is
2310 // not.
2311 if (aVariants.getLength() >= 5 ||
2312 (aVariants.getLength() == 4 && '0' <= aVariants[0] && aVariants[0] <= '9'))
2314 aTmp = aLanguage + "-" + aVariants;
2315 if (aTmp != maBcp47)
2316 aVec.push_back( aTmp);
2317 bHaveLanguageVariant = true;
2320 aTmp = aLanguage + "-" + aCountry;
2321 if (aTmp != maBcp47)
2322 aVec.push_back( aTmp);
2324 if (!aVariants.isEmpty() && !bHaveLanguageVariant)
2326 // Only if variant is not from a grandfathered tag that wouldn't match
2327 // the rules, i.e. "de-1901" is fine but "en-oed" is not.
2328 if (aVariants.getLength() >= 5 ||
2329 (aVariants.getLength() == 4 && '0' <= aVariants[0] && aVariants[0] <= '9'))
2331 aTmp = aLanguage + "-" + aVariants;
2332 if (aTmp != maBcp47)
2333 aVec.push_back( aTmp);
2337 // Insert legacy fallbacks with country before language-only, but only
2338 // default script, script was handled already above.
2339 if (!aCountry.isEmpty())
2341 if (aLanguage == "sr" && aCountry == "CS")
2342 aVec.emplace_back("sr-YU");
2345 // Original language-only.
2346 if (aLanguage != maBcp47)
2347 aVec.push_back( aLanguage);
2349 return aVec;
2353 OUString LanguageTag::getBcp47MS() const
2355 if (getLanguageType() == LANGUAGE_SPANISH_DATED)
2356 return "es-ES_tradnl";
2357 return getBcp47();
2361 bool LanguageTag::equals( const LanguageTag & rLanguageTag ) const
2363 // If SYSTEM is not to be resolved or either both are SYSTEM or none, we
2364 // can use the operator==() optimization.
2365 if (isSystemLocale() == rLanguageTag.isSystemLocale())
2366 return operator==( rLanguageTag);
2368 // Compare full language tag strings.
2369 return getBcp47() == rLanguageTag.getBcp47();
2373 bool LanguageTag::operator==( const LanguageTag & rLanguageTag ) const
2375 if (isSystemLocale() && rLanguageTag.isSystemLocale())
2376 return true; // both SYSTEM
2378 // No need to convert to BCP47 if both Lang-IDs are available.
2379 if (mbInitializedLangID && rLanguageTag.mbInitializedLangID)
2381 // Equal if same ID and no SYSTEM is involved or both are SYSTEM.
2382 return mnLangID == rLanguageTag.mnLangID && isSystemLocale() == rLanguageTag.isSystemLocale();
2385 // Compare full language tag strings but SYSTEM unresolved.
2386 return getBcp47( false) == rLanguageTag.getBcp47( false);
2390 bool LanguageTag::operator!=( const LanguageTag & rLanguageTag ) const
2392 return !operator==( rLanguageTag);
2396 bool LanguageTag::operator<( const LanguageTag & rLanguageTag ) const
2398 return getBcp47( false).compareToIgnoreAsciiCase( rLanguageTag.getBcp47( false)) < 0;
2402 // static
2403 LanguageTagImpl::Extraction LanguageTagImpl::simpleExtract( const OUString& rBcp47,
2404 OUString& rLanguage, OUString& rScript, OUString& rCountry, OUString& rVariants )
2406 Extraction eRet = EXTRACTED_NONE;
2407 const sal_Int32 nLen = rBcp47.getLength();
2408 const sal_Int32 nHyph1 = rBcp47.indexOf( '-');
2409 sal_Int32 nHyph2 = (nHyph1 < 0 ? -1 : rBcp47.indexOf( '-', nHyph1 + 1));
2410 sal_Int32 nHyph3 = (nHyph2 < 0 ? -1 : rBcp47.indexOf( '-', nHyph2 + 1));
2411 sal_Int32 nHyph4 = (nHyph3 < 0 ? -1 : rBcp47.indexOf( '-', nHyph3 + 1));
2412 if (nLen == 1 && rBcp47[0] == '*') // * the dreaded jolly joker
2414 // It's f*d up but we need to recognize this.
2415 eRet = EXTRACTED_X_JOKER;
2417 else if (nHyph1 == 1 && rBcp47[0] == 'x') // x-... privateuse
2419 // x-... privateuse tags MUST be known to us by definition.
2420 eRet = EXTRACTED_X;
2422 else if (nLen == 1 && rBcp47[0] == 'C') // the 'C' locale
2424 eRet = EXTRACTED_C_LOCALE;
2425 rLanguage = "C";
2426 rScript.clear();
2427 rCountry.clear();
2428 rVariants.clear();
2430 else if (nLen == 2 || nLen == 3) // ll or lll
2432 if (nHyph1 < 0)
2434 rLanguage = rBcp47.toAsciiLowerCase();
2435 rScript.clear();
2436 rCountry.clear();
2437 rVariants.clear();
2438 eRet = EXTRACTED_LSC;
2441 else if ( (nHyph1 == 2 && nLen == 5) // ll-CC
2442 || (nHyph1 == 3 && nLen == 6)) // lll-CC
2444 if (nHyph2 < 0)
2446 rLanguage = rBcp47.copy( 0, nHyph1).toAsciiLowerCase();
2447 rCountry = rBcp47.copy( nHyph1 + 1, 2).toAsciiUpperCase();
2448 rScript.clear();
2449 rVariants.clear();
2450 eRet = EXTRACTED_LSC;
2453 else if ( (nHyph1 == 2 && nLen == 7) // ll-Ssss or ll-vvvv
2454 || (nHyph1 == 3 && nLen == 8)) // lll-Ssss or lll-vvvv
2456 if (nHyph2 < 0)
2458 sal_Unicode c = rBcp47[nHyph1+1];
2459 if ('0' <= c && c <= '9')
2461 // (DIGIT 3ALNUM) vvvv variant instead of Ssss script
2462 rLanguage = rBcp47.copy( 0, nHyph1).toAsciiLowerCase();
2463 rScript.clear();
2464 rCountry.clear();
2465 rVariants = rBcp47.copy( nHyph1 + 1);
2466 eRet = EXTRACTED_LV;
2468 else
2470 rLanguage = rBcp47.copy( 0, nHyph1).toAsciiLowerCase();
2471 rScript = rBcp47.copy( nHyph1 + 1, 1).toAsciiUpperCase() +
2472 rBcp47.copy( nHyph1 + 2, 3).toAsciiLowerCase();
2473 rCountry.clear();
2474 rVariants.clear();
2475 eRet = EXTRACTED_LSC;
2479 else if ( (nHyph1 == 2 && nHyph2 == 7 && nLen == 10) // ll-Ssss-CC
2480 || (nHyph1 == 3 && nHyph2 == 8 && nLen == 11)) // lll-Ssss-CC
2482 if (nHyph3 < 0)
2484 rLanguage = rBcp47.copy( 0, nHyph1).toAsciiLowerCase();
2485 rScript = rBcp47.copy( nHyph1 + 1, 1).toAsciiUpperCase() + rBcp47.copy( nHyph1 + 2, 3).toAsciiLowerCase();
2486 rCountry = rBcp47.copy( nHyph2 + 1, 2).toAsciiUpperCase();
2487 rVariants.clear();
2488 eRet = EXTRACTED_LSC;
2491 else if ( (nHyph1 == 2 && nHyph2 == 7 && nHyph3 == 10 && nLen >= 15) // ll-Ssss-CC-vvvv[vvvv][-...]
2492 || (nHyph1 == 3 && nHyph2 == 8 && nHyph3 == 11 && nLen >= 16)) // lll-Ssss-CC-vvvv[vvvv][-...]
2494 if (nHyph4 < 0)
2495 nHyph4 = rBcp47.getLength();
2496 if (nHyph4 - nHyph3 > 4 && nHyph4 - nHyph3 <= 9)
2498 rLanguage = rBcp47.copy( 0, nHyph1).toAsciiLowerCase();
2499 rScript = rBcp47.copy( nHyph1 + 1, 1).toAsciiUpperCase() + rBcp47.copy( nHyph1 + 2, 3).toAsciiLowerCase();
2500 rCountry = rBcp47.copy( nHyph2 + 1, 2).toAsciiUpperCase();
2501 rVariants = rBcp47.copy( nHyph3 + 1);
2502 eRet = EXTRACTED_LV;
2505 else if ( (nHyph1 == 2 && nHyph2 == 5 && nHyph3 == 7) // ll-CC-u-...
2506 || (nHyph1 == 3 && nHyph2 == 6 && nHyph3 == 8)) // lll-CC-u-...
2508 if (rBcp47[nHyph3-1] == 'u')
2510 // Need to recognize as known, otherwise getLanguage() and
2511 // getCountry() return empty string because mpImplLangtag is not
2512 // used with a known mapping.
2513 /* TODO: if there were more this would get ugly and needed some
2514 * table driven approach via isolang.cxx instead. */
2515 if (rBcp47.equalsIgnoreAsciiCase( "es-ES-u-co-trad"))
2517 rLanguage = "es";
2518 rScript.clear();
2519 rCountry = "ES";
2520 rVariants = "u-co-trad"; // not strictly a variant, but used to reconstruct the tag.
2521 eRet = EXTRACTED_LV;
2525 else if ( (nHyph1 == 2 && nHyph2 == 5 && nLen >= 10) // ll-CC-vvvv[vvvv][-...]
2526 || (nHyph1 == 3 && nHyph2 == 6 && nLen >= 11)) // lll-CC-vvvv[vvvv][-...]
2528 if (nHyph3 < 0)
2529 nHyph3 = rBcp47.getLength();
2530 if (nHyph3 - nHyph2 > 4 && nHyph3 - nHyph2 <= 9)
2532 rLanguage = rBcp47.copy( 0, nHyph1).toAsciiLowerCase();
2533 rScript.clear();
2534 rCountry = rBcp47.copy( nHyph1 + 1, 2).toAsciiUpperCase();
2535 rVariants = rBcp47.copy( nHyph2 + 1);
2536 eRet = EXTRACTED_LV;
2539 else if ( (nHyph1 == 2 && nLen >= 8) // ll-vvvvv[vvv][-...]
2540 || (nHyph1 == 3 && nLen >= 9)) // lll-vvvvv[vvv][-...]
2542 if (nHyph2 < 0)
2543 nHyph2 = rBcp47.getLength();
2544 if (nHyph2 - nHyph1 > 5 && nHyph2 - nHyph1 <= 9)
2546 rLanguage = rBcp47.copy( 0, nHyph1).toAsciiLowerCase();
2547 rScript.clear();
2548 rCountry.clear();
2549 rVariants = rBcp47.copy( nHyph1 + 1);
2550 eRet = EXTRACTED_LV;
2552 else
2554 // Known and handled grandfathered; ugly but effective ...
2555 // Note that nLen must have matched above.
2556 // Strictly not a variant, but so far we treat it as such.
2557 if (rBcp47.equalsIgnoreAsciiCase( "en-GB-oed"))
2559 rLanguage = "en";
2560 rScript.clear();
2561 rCountry = "GB";
2562 rVariants = "oed";
2563 eRet = EXTRACTED_LV;
2565 // Other known and handled odd cases.
2566 else if (rBcp47.equalsIgnoreAsciiCase( "es-ES_tradnl"))
2568 // Will get overridden, but needs to be recognized as known.
2569 rLanguage = "es";
2570 rScript.clear();
2571 rCountry = "ES";
2572 rVariants = "tradnl"; // this is nonsense, but... ignored.
2573 eRet = EXTRACTED_KNOWN_BAD;
2577 if (eRet == EXTRACTED_NONE)
2579 SAL_INFO( "i18nlangtag", "LanguageTagImpl::simpleExtract: did not extract '" << rBcp47 << "'");
2580 rLanguage.clear();
2581 rScript.clear();
2582 rCountry.clear();
2583 rVariants.clear();
2585 return eRet;
2589 // static
2590 ::std::vector< OUString >::const_iterator LanguageTag::getFallback(
2591 const ::std::vector< OUString > & rList, const OUString & rReference )
2593 if (rList.empty())
2594 return rList.end();
2596 // Try the simple case first without constructing fallbacks.
2597 ::std::vector< OUString >::const_iterator it = std::find(rList.begin(), rList.end(), rReference);
2598 if (it != rList.end())
2599 return it; // exact match
2601 ::std::vector< OUString > aFallbacks( LanguageTag( rReference).getFallbackStrings( false));
2602 if (rReference != "en-US")
2604 aFallbacks.emplace_back("en-US");
2605 if (rReference != "en")
2606 aFallbacks.emplace_back("en");
2608 if (rReference != "x-default")
2609 aFallbacks.emplace_back("x-default");
2610 if (rReference != "x-no-translate")
2611 aFallbacks.emplace_back("x-no-translate");
2612 /* TODO: the original comphelper::Locale::getFallback() code had
2613 * "x-notranslate" instead of "x-no-translate", but all .xcu files use
2614 * "x-no-translate" and "x-notranslate" apparently was never used anywhere.
2615 * Did that ever work? Was it supposed to work at all like this? */
2617 for (const auto& fb : aFallbacks)
2619 it = std::find(rList.begin(), rList.end(), fb);
2620 if (it != rList.end())
2621 return it; // fallback found
2624 // Did not find anything so return something of the list, the first value
2625 // will do as well as any other as none did match any of the possible
2626 // fallbacks.
2627 return rList.begin();
2631 // static
2632 ::std::vector< css::lang::Locale >::const_iterator LanguageTag::getMatchingFallback(
2633 const ::std::vector< css::lang::Locale > & rList,
2634 const css::lang::Locale & rReference )
2636 if (rList.empty())
2637 return rList.end();
2639 // Try the simple case first without constructing fallbacks.
2640 ::std::vector< lang::Locale >::const_iterator it = std::find_if(rList.begin(), rList.end(),
2641 [&rReference](const lang::Locale& rLocale) {
2642 return rLocale.Language == rReference.Language
2643 && rLocale.Country == rReference.Country
2644 && rLocale.Variant == rReference.Variant; });
2645 if (it != rList.end())
2646 return it; // exact match
2648 // Now for each reference fallback test the fallbacks of the list in order.
2649 ::std::vector< OUString > aFallbacks( LanguageTag( rReference).getFallbackStrings( false));
2650 ::std::vector< ::std::vector< OUString > > aListFallbacks( rList.size());
2651 size_t i = 0;
2652 for (auto const& elem : rList)
2654 ::std::vector< OUString > aTmp( LanguageTag(elem).getFallbackStrings( true));
2655 aListFallbacks[i++] = aTmp;
2657 for (auto const& rfb : aFallbacks)
2659 size_t nPosFb = 0;
2660 for (auto const& lfb : aListFallbacks)
2662 for (auto const& fb : lfb)
2664 if (rfb == fb)
2665 return rList.begin() + nPosFb;
2667 ++nPosFb;
2671 // No match found.
2672 return rList.end();
2676 static bool lcl_isSystem( LanguageType nLangID )
2678 if (nLangID == LANGUAGE_SYSTEM)
2679 return true;
2680 // There are some special values that simplify to SYSTEM,
2681 // getRealLanguage() catches and resolves them.
2682 LanguageType nNewLangID = MsLangId::getRealLanguage( nLangID);
2683 return nNewLangID != nLangID;
2687 // static
2688 css::lang::Locale LanguageTag::convertToLocale( LanguageType nLangID, bool bResolveSystem )
2690 if (!bResolveSystem && lcl_isSystem( nLangID))
2691 return lang::Locale();
2693 return LanguageTag( nLangID).getLocale( bResolveSystem);
2697 // static
2698 LanguageType LanguageTag::convertToLanguageType( const css::lang::Locale& rLocale, bool bResolveSystem )
2700 if (rLocale.Language.isEmpty() && !bResolveSystem)
2701 return LANGUAGE_SYSTEM;
2703 return LanguageTag( rLocale).getLanguageType( bResolveSystem);
2707 // static
2708 OUString LanguageTagImpl::convertToBcp47( const css::lang::Locale& rLocale )
2710 OUString aBcp47;
2711 if (rLocale.Language.isEmpty())
2713 // aBcp47 stays empty
2715 else if (rLocale.Language == I18NLANGTAG_QLT)
2717 aBcp47 = rLocale.Variant;
2719 else
2721 /* XXX NOTE: most legacy code never evaluated the Variant field, so for
2722 * now just concatenate language and country. In case we stumbled over
2723 * variant aware code we'd have to take care of that. */
2724 if (rLocale.Country.isEmpty())
2725 aBcp47 = rLocale.Language;
2726 else
2728 aBcp47 = rLocale.Language + "-" + rLocale.Country;
2731 return aBcp47;
2735 // static
2736 OUString LanguageTag::convertToBcp47( const css::lang::Locale& rLocale, bool bResolveSystem )
2738 OUString aBcp47;
2739 if (rLocale.Language.isEmpty())
2741 if (bResolveSystem)
2742 aBcp47 = LanguageTag::convertToBcp47( LANGUAGE_SYSTEM );
2743 // else aBcp47 stays empty
2745 else
2747 aBcp47 = LanguageTagImpl::convertToBcp47( rLocale);
2749 return aBcp47;
2753 // static
2754 OUString LanguageTag::convertToBcp47( LanguageType nLangID )
2756 lang::Locale aLocale( LanguageTag::convertToLocale( nLangID ));
2757 // If system for some reason (should not happen... haha) could not be
2758 // resolved DO NOT CALL LanguageTag::convertToBcp47(Locale) because that
2759 // would recurse into this method here!
2760 if (aLocale.Language.isEmpty())
2761 return OUString(); // bad luck, bail out
2762 return LanguageTagImpl::convertToBcp47( aLocale);
2766 // static
2767 css::lang::Locale LanguageTag::convertToLocale( const OUString& rBcp47, bool bResolveSystem )
2769 if (rBcp47.isEmpty() && !bResolveSystem)
2770 return lang::Locale();
2772 return LanguageTag( rBcp47).getLocale( bResolveSystem);
2776 // static
2777 LanguageType LanguageTag::convertToLanguageType( const OUString& rBcp47 )
2779 return LanguageTag( rBcp47).getLanguageType();
2783 // static
2784 LanguageType LanguageTag::convertToLanguageTypeWithFallback( const OUString& rBcp47 )
2786 return LanguageTag( rBcp47).makeFallback().getLanguageType();
2790 // static
2791 css::lang::Locale LanguageTag::convertToLocaleWithFallback( const OUString& rBcp47 )
2793 return LanguageTag( rBcp47).makeFallback().getLocale();
2797 // static
2798 bool LanguageTag::isValidBcp47( const OUString& rString, OUString* o_pCanonicalized, bool bDisallowPrivate )
2800 bool bValid = false;
2802 struct guard
2804 lt_tag_t* mpLangtag;
2805 guard()
2807 theDataRef::get().init();
2808 mpLangtag = lt_tag_new();
2810 ~guard()
2812 lt_tag_unref( mpLangtag);
2814 } aVar;
2816 myLtError aError;
2818 if (!lt_tag_parse_disabled && lt_tag_parse(aVar.mpLangtag, OUStringToOString(rString, RTL_TEXTENCODING_UTF8).getStr(), &aError.p))
2820 char* pTag = lt_tag_canonicalize( aVar.mpLangtag, &aError.p);
2821 SAL_WARN_IF( !pTag, "i18nlangtag", "LanguageTag:isValidBcp47: could not canonicalize '" << rString << "'");
2822 if (pTag)
2824 bValid = true;
2825 if (bDisallowPrivate)
2827 const lt_string_t* pPrivate = lt_tag_get_privateuse( aVar.mpLangtag);
2828 if (pPrivate && lt_string_length( pPrivate) > 0)
2829 bValid = false;
2830 else
2832 const lt_lang_t* pLangT = lt_tag_get_language( aVar.mpLangtag);
2833 if (pLangT)
2835 const char* pLang = lt_lang_get_tag( pLangT);
2836 if (pLang && strcmp( pLang, I18NLANGTAG_QLT) == 0)
2838 // Disallow 'qlt' privateuse code to prevent
2839 // confusion with our internal usage.
2840 bValid = false;
2845 if (o_pCanonicalized)
2846 *o_pCanonicalized = OUString::createFromAscii( pTag);
2847 free( pTag);
2848 return bValid;
2851 else
2853 SAL_INFO( "i18nlangtag", "LanguageTag:isValidBcp47: could not parse '" << rString << "'");
2855 return bValid;
2858 LanguageTag makeLanguageTagFromAppleLanguageId(AppleLanguageId nLanguage)
2860 //map the simple ones via LanguageTypes, and the hard ones explicitly
2861 LanguageType nLang(LANGUAGE_DONTKNOW);
2863 switch (nLanguage)
2865 case AppleLanguageId::ENGLISH:
2866 nLang = LANGUAGE_ENGLISH;
2867 break;
2868 case AppleLanguageId::FRENCH:
2869 nLang = LANGUAGE_FRENCH;
2870 break;
2871 case AppleLanguageId::GERMAN:
2872 nLang = LANGUAGE_GERMAN;
2873 break;
2874 case AppleLanguageId::ITALIAN:
2875 nLang = LANGUAGE_ITALIAN;
2876 break;
2877 case AppleLanguageId::DUTCH:
2878 nLang = LANGUAGE_DUTCH;
2879 break;
2880 case AppleLanguageId::SWEDISH:
2881 nLang = LANGUAGE_SWEDISH;
2882 break;
2883 case AppleLanguageId::SPANISH:
2884 nLang = LANGUAGE_SPANISH;
2885 break;
2886 case AppleLanguageId::DANISH:
2887 nLang = LANGUAGE_DANISH;
2888 break;
2889 case AppleLanguageId::PORTUGUESE:
2890 nLang = LANGUAGE_PORTUGUESE;
2891 break;
2892 case AppleLanguageId::NORWEGIAN:
2893 nLang = LANGUAGE_NORWEGIAN;
2894 break;
2895 case AppleLanguageId::HEBREW:
2896 nLang = LANGUAGE_HEBREW;
2897 break;
2898 case AppleLanguageId::JAPANESE:
2899 nLang = LANGUAGE_JAPANESE;
2900 break;
2901 case AppleLanguageId::ARABIC:
2902 nLang = LANGUAGE_ARABIC_PRIMARY_ONLY;
2903 break;
2904 case AppleLanguageId::FINNISH:
2905 nLang = LANGUAGE_FINNISH;
2906 break;
2907 case AppleLanguageId::GREEK:
2908 nLang = LANGUAGE_GREEK;
2909 break;
2910 case AppleLanguageId::ICELANDIC:
2911 nLang = LANGUAGE_ICELANDIC;
2912 break;
2913 case AppleLanguageId::MALTESE:
2914 nLang = LANGUAGE_MALTESE;
2915 break;
2916 case AppleLanguageId::TURKISH:
2917 nLang = LANGUAGE_TURKISH;
2918 break;
2919 case AppleLanguageId::CROATIAN:
2920 nLang = LANGUAGE_CROATIAN;
2921 break;
2922 case AppleLanguageId::CHINESE_TRADITIONAL:
2923 nLang = LANGUAGE_CHINESE_TRADITIONAL;
2924 break;
2925 case AppleLanguageId::URDU:
2926 nLang = LANGUAGE_URDU_PAKISTAN; //probably, otherwise we need a LANGUAGE_URDU_PRIMARY_ONLY
2927 break;
2928 case AppleLanguageId::HINDI:
2929 nLang = LANGUAGE_HINDI;
2930 break;
2931 case AppleLanguageId::THAI:
2932 nLang = LANGUAGE_THAI;
2933 break;
2934 case AppleLanguageId::KOREAN:
2935 nLang = LANGUAGE_KOREAN;
2936 break;
2937 case AppleLanguageId::LITHUANIAN:
2938 nLang = LANGUAGE_LITHUANIAN;
2939 break;
2940 case AppleLanguageId::POLISH:
2941 nLang = LANGUAGE_POLISH;
2942 break;
2943 case AppleLanguageId::HUNGARIAN:
2944 nLang = LANGUAGE_HUNGARIAN;
2945 break;
2946 case AppleLanguageId::ESTONIAN:
2947 nLang = LANGUAGE_ESTONIAN;
2948 break;
2949 case AppleLanguageId::LATVIAN:
2950 nLang = LANGUAGE_LATVIAN;
2951 break;
2952 case AppleLanguageId::SAMI:
2953 nLang = LANGUAGE_SAMI_NORTHERN_NORWAY; //maybe
2954 break;
2955 case AppleLanguageId::FAROESE:
2956 nLang = LANGUAGE_FAEROESE;
2957 break;
2958 case AppleLanguageId::FARSI:
2959 nLang = LANGUAGE_FARSI;
2960 break;
2961 case AppleLanguageId::RUSSIAN:
2962 nLang = LANGUAGE_RUSSIAN;
2963 break;
2964 case AppleLanguageId::CHINESE_SIMPLIFIED:
2965 nLang = LANGUAGE_CHINESE_SIMPLIFIED;
2966 break;
2967 case AppleLanguageId::FLEMISH:
2968 nLang = LANGUAGE_DUTCH_BELGIAN;
2969 break;
2970 case AppleLanguageId::IRISH_GAELIC:
2971 nLang = LANGUAGE_GAELIC_IRELAND;
2972 break;
2973 case AppleLanguageId::ALBANIAN:
2974 nLang = LANGUAGE_ALBANIAN;
2975 break;
2976 case AppleLanguageId::ROMANIAN:
2977 nLang = LANGUAGE_ROMANIAN;
2978 break;
2979 case AppleLanguageId::CZECH:
2980 nLang = LANGUAGE_CZECH;
2981 break;
2982 case AppleLanguageId::SLOVAK:
2983 nLang = LANGUAGE_SLOVAK;
2984 break;
2985 case AppleLanguageId::SLOVENIAN:
2986 nLang = LANGUAGE_SLOVENIAN;
2987 break;
2988 case AppleLanguageId::YIDDISH:
2989 nLang = LANGUAGE_YIDDISH;
2990 break;
2991 case AppleLanguageId::SERBIAN:
2992 nLang = LANGUAGE_SERBIAN_CYRILLIC_SERBIA; //maybe
2993 break;
2994 case AppleLanguageId::MACEDONIAN:
2995 nLang = LANGUAGE_MACEDONIAN;
2996 break;
2997 case AppleLanguageId::BULGARIAN:
2998 nLang = LANGUAGE_BULGARIAN;
2999 break;
3000 case AppleLanguageId::UKRAINIAN:
3001 nLang = LANGUAGE_UKRAINIAN;
3002 break;
3003 case AppleLanguageId::BYELORUSSIAN:
3004 nLang = LANGUAGE_BELARUSIAN;
3005 break;
3006 case AppleLanguageId::UZBEK:
3007 nLang = LANGUAGE_UZBEK_CYRILLIC; //maybe
3008 break;
3009 case AppleLanguageId::KAZAKH:
3010 nLang = LANGUAGE_KAZAKH;
3011 break;
3012 case AppleLanguageId::AZERI_CYRILLIC:
3013 nLang = LANGUAGE_AZERI_CYRILLIC;
3014 break;
3015 case AppleLanguageId::AZERI_ARABIC:
3016 return LanguageTag("az-Arab");
3017 break;
3018 case AppleLanguageId::ARMENIAN:
3019 nLang = LANGUAGE_ARMENIAN;
3020 break;
3021 case AppleLanguageId::GEORGIAN:
3022 nLang = LANGUAGE_GEORGIAN;
3023 break;
3024 case AppleLanguageId::MOLDAVIAN:
3025 nLang = LANGUAGE_ROMANIAN_MOLDOVA;
3026 break;
3027 case AppleLanguageId::KIRGHIZ:
3028 nLang = LANGUAGE_KIRGHIZ;
3029 break;
3030 case AppleLanguageId::TAJIKI:
3031 nLang = LANGUAGE_TAJIK;
3032 break;
3033 case AppleLanguageId::TURKMEN:
3034 nLang = LANGUAGE_TURKMEN;
3035 break;
3036 case AppleLanguageId::MONGOLIAN_MONGOLIAN:
3037 nLang = LANGUAGE_MONGOLIAN_MONGOLIAN_MONGOLIA;
3038 break;
3039 case AppleLanguageId::MONGOLIAN_CYRILLIC:
3040 nLang = LANGUAGE_MONGOLIAN_CYRILLIC_MONGOLIA;
3041 break;
3042 case AppleLanguageId::PASHTO:
3043 nLang = LANGUAGE_PASHTO;
3044 break;
3045 case AppleLanguageId::KURDISH:
3046 nLang = LANGUAGE_USER_KURDISH_TURKEY; //maybe
3047 break;
3048 case AppleLanguageId::KASHMIRI:
3049 nLang = LANGUAGE_KASHMIRI;
3050 break;
3051 case AppleLanguageId::SINDHI:
3052 nLang = LANGUAGE_SINDHI;
3053 break;
3054 case AppleLanguageId::TIBETAN:
3055 nLang = LANGUAGE_TIBETAN;
3056 break;
3057 case AppleLanguageId::NEPALI:
3058 nLang = LANGUAGE_NEPALI;
3059 break;
3060 case AppleLanguageId::SANSKRIT:
3061 nLang = LANGUAGE_SANSKRIT;
3062 break;
3063 case AppleLanguageId::MARATHI:
3064 nLang = LANGUAGE_MARATHI;
3065 break;
3066 case AppleLanguageId::BENGALI:
3067 nLang = LANGUAGE_BENGALI;
3068 break;
3069 case AppleLanguageId::ASSAMESE:
3070 nLang = LANGUAGE_ASSAMESE;
3071 break;
3072 case AppleLanguageId::GUJARATI:
3073 nLang = LANGUAGE_GUJARATI;
3074 break;
3075 case AppleLanguageId::PUNJABI:
3076 nLang = LANGUAGE_PUNJABI;
3077 break;
3078 case AppleLanguageId::ORIYA:
3079 nLang = LANGUAGE_ODIA;
3080 break;
3081 case AppleLanguageId::MALAYALAM:
3082 nLang = LANGUAGE_MALAYALAM;
3083 break;
3084 case AppleLanguageId::KANNADA:
3085 nLang = LANGUAGE_KANNADA;
3086 break;
3087 case AppleLanguageId::TAMIL:
3088 nLang = LANGUAGE_TAMIL;
3089 break;
3090 case AppleLanguageId::TELUGU:
3091 nLang = LANGUAGE_TELUGU;
3092 break;
3093 case AppleLanguageId::SINHALESE:
3094 nLang = LANGUAGE_SINHALESE_SRI_LANKA;
3095 break;
3096 case AppleLanguageId::BURMESE:
3097 nLang = LANGUAGE_BURMESE;
3098 break;
3099 case AppleLanguageId::KHMER:
3100 nLang = LANGUAGE_KHMER;
3101 break;
3102 case AppleLanguageId::LAO:
3103 nLang = LANGUAGE_LAO;
3104 break;
3105 case AppleLanguageId::VIETNAMESE:
3106 nLang = LANGUAGE_VIETNAMESE;
3107 break;
3108 case AppleLanguageId::INDONESIAN:
3109 nLang = LANGUAGE_INDONESIAN;
3110 break;
3111 case AppleLanguageId::TAGALONG:
3112 nLang = LANGUAGE_USER_TAGALOG;
3113 break;
3114 case AppleLanguageId::MALAY_LATIN:
3115 nLang = LANGUAGE_MALAY_MALAYSIA;
3116 break;
3117 case AppleLanguageId::MALAY_ARABIC:
3118 nLang = LANGUAGE_USER_MALAY_ARABIC_MALAYSIA;
3119 break;
3120 case AppleLanguageId::AMHARIC:
3121 nLang = LANGUAGE_AMHARIC_ETHIOPIA;
3122 break;
3123 case AppleLanguageId::TIGRINYA:
3124 nLang = LANGUAGE_TIGRIGNA_ETHIOPIA;
3125 break;
3126 case AppleLanguageId::GALLA:
3127 nLang = LANGUAGE_OROMO;
3128 break;
3129 case AppleLanguageId::SOMALI:
3130 nLang = LANGUAGE_SOMALI;
3131 break;
3132 case AppleLanguageId::SWAHILI:
3133 nLang = LANGUAGE_SWAHILI;
3134 break;
3135 case AppleLanguageId::KINYARWANDA:
3136 nLang = LANGUAGE_KINYARWANDA_RWANDA;
3137 break;
3138 case AppleLanguageId::RUNDI:
3139 return LanguageTag("rn");
3140 break;
3141 case AppleLanguageId::NYANJA:
3142 nLang = LANGUAGE_USER_NYANJA;
3143 break;
3144 case AppleLanguageId::MALAGASY:
3145 nLang = LANGUAGE_MALAGASY_PLATEAU;
3146 break;
3147 case AppleLanguageId::ESPERANTO:
3148 nLang = LANGUAGE_USER_ESPERANTO;
3149 break;
3150 case AppleLanguageId::WELSH:
3151 nLang = LANGUAGE_WELSH;
3152 break;
3153 case AppleLanguageId::BASQUE:
3154 nLang = LANGUAGE_BASQUE;
3155 break;
3156 case AppleLanguageId::CATALAN:
3157 nLang = LANGUAGE_CATALAN;
3158 break;
3159 case AppleLanguageId::LATIN:
3160 nLang = LANGUAGE_USER_LATIN;
3161 break;
3162 case AppleLanguageId::QUENCHUA:
3163 nLang = LANGUAGE_QUECHUA_BOLIVIA; //maybe
3164 break;
3165 case AppleLanguageId::GUARANI:
3166 nLang = LANGUAGE_GUARANI_PARAGUAY;
3167 break;
3168 case AppleLanguageId::AYMARA:
3169 return LanguageTag("ay");
3170 break;
3171 case AppleLanguageId::TATAR:
3172 nLang = LANGUAGE_TATAR;
3173 break;
3174 case AppleLanguageId::UIGHUR:
3175 nLang = LANGUAGE_UIGHUR_CHINA;
3176 break;
3177 case AppleLanguageId::DZONGKHA:
3178 nLang = LANGUAGE_DZONGKHA_BHUTAN;
3179 break;
3180 case AppleLanguageId::JAVANESE_LATIN:
3181 return LanguageTag("jv-Latn");
3182 break;
3183 case AppleLanguageId::SUNDANESE_LATIN:
3184 return LanguageTag("su-Latn");
3185 break;
3186 case AppleLanguageId::GALICIAN:
3187 nLang = LANGUAGE_GALICIAN;
3188 break;
3189 case AppleLanguageId::AFRIKAANS:
3190 nLang = LANGUAGE_AFRIKAANS;
3191 break;
3192 case AppleLanguageId::BRETON:
3193 nLang = LANGUAGE_BRETON_FRANCE;
3194 break;
3195 case AppleLanguageId::INUKTITUT:
3196 nLang = LANGUAGE_INUKTITUT_LATIN_CANADA; //probably
3197 break;
3198 case AppleLanguageId::SCOTTISH_GAELIC:
3199 nLang = LANGUAGE_GAELIC_SCOTLAND;
3200 break;
3201 case AppleLanguageId::MANX_GAELIC:
3202 nLang = LANGUAGE_USER_MANX;
3203 break;
3204 case AppleLanguageId::IRISH_GAELIC_WITH_DOT_ABOVE:
3205 return LanguageTag("ga-Latg");
3206 break;
3207 case AppleLanguageId::TONGAN:
3208 return LanguageTag("to");
3209 break;
3210 case AppleLanguageId::GREEK_POLYTONIC:
3211 nLang = LANGUAGE_USER_ANCIENT_GREEK;
3212 break;
3213 case AppleLanguageId::GREENLANDIC:
3214 nLang = LANGUAGE_KALAALLISUT_GREENLAND;
3215 break;
3216 case AppleLanguageId::AZERI_LATIN:
3217 nLang = LANGUAGE_AZERI_LATIN;
3218 break;
3221 return LanguageTag(nLang);
3224 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */