1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
3 * This file is part of the LibreOffice project.
5 * This Source Code Form is subject to the terms of the Mozilla Public
6 * License, v. 2.0. If a copy of the MPL was not distributed with this
7 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
10 #include <config_folders.h>
11 #include <config_liblangtag.h>
13 #include <i18nlangtag/languagetag.hxx>
14 #include <i18nlangtag/applelangid.hxx>
15 #include <i18nlangtag/mslangid.hxx>
16 #include <rtl/ustrbuf.hxx>
17 #include <rtl/bootstrap.hxx>
18 #include <sal/log.hxx>
19 #include <osl/file.hxx>
20 #include <rtl/locale.h>
21 #include <o3tl/string_view.hxx>
25 #include <string_view>
26 #include <unordered_set>
30 #if LIBLANGTAG_INLINE_FIX
31 #define LT_HAVE_INLINE
33 #include <liblangtag/langtag.h>
36 #include <osl/detail/android-bootstrap.h>
40 #include <osl/detail/emscripten-bootstrap.h>
43 using namespace com::sun::star
;
47 // Helper to ensure lt_error_t is free'd
51 myLtError() : p(nullptr) {}
52 ~myLtError() { if (p
) lt_error_unref( p
); }
58 std::recursive_mutex
& theMutex()
60 static std::recursive_mutex SINGLETON
;
65 typedef std::unordered_set
< OUString
> KnownTagSet
;
66 static const KnownTagSet
& getKnowns()
68 static KnownTagSet theKnowns
= []()
71 ::std::vector
< MsLangId::LanguagetagMapping
> aDefined( MsLangId::getDefinedLanguagetags());
72 for (auto const& elemDefined
: aDefined
)
74 // Do not use the BCP47 string here to initialize the
75 // LanguageTag because then canonicalize() would call this
76 // getKnowns() again...
77 ::std::vector
< OUString
> aFallbacks( LanguageTag( elemDefined
.mnLang
).getFallbackStrings( true));
78 for (auto const& fallback
: aFallbacks
)
80 tmpSet
.insert(fallback
);
90 struct compareIgnoreAsciiCaseLess
92 bool operator()( std::u16string_view r1
, std::u16string_view r2
) const
94 return o3tl::compareToIgnoreAsciiCase(r1
, r2
) < 0;
97 typedef ::std::map
< OUString
, LanguageTag::ImplPtr
, compareIgnoreAsciiCaseLess
> MapBcp47
;
98 typedef ::std::map
< LanguageType
, LanguageTag::ImplPtr
> MapLangID
;
99 MapBcp47
& theMapBcp47()
101 static MapBcp47 SINGLETON
;
104 MapLangID
& theMapLangID()
106 static MapLangID SINGLETON
;
109 LanguageTag::ImplPtr
& theSystemLocale()
111 static LanguageTag::ImplPtr SINGLETON
;
117 static LanguageType
getNextOnTheFlyLanguage()
119 static LanguageType
nOnTheFlyLanguage(0);
120 std::unique_lock
aGuard( theMutex());
121 if (!nOnTheFlyLanguage
)
122 nOnTheFlyLanguage
= MsLangId::makeLangID( LANGUAGE_ON_THE_FLY_SUB_START
, LANGUAGE_ON_THE_FLY_START
);
125 if (MsLangId::getPrimaryLanguage( nOnTheFlyLanguage
) != LANGUAGE_ON_THE_FLY_END
)
129 LanguageType nSub
= MsLangId::getSubLanguage( nOnTheFlyLanguage
);
130 if (nSub
!= LANGUAGE_ON_THE_FLY_SUB_END
)
131 nOnTheFlyLanguage
= MsLangId::makeLangID( ++nSub
, LANGUAGE_ON_THE_FLY_START
);
134 SAL_WARN( "i18nlangtag", "getNextOnTheFlyLanguage: none left! ("
135 << ((sal_uInt16(LANGUAGE_ON_THE_FLY_END
) - sal_uInt16(LANGUAGE_ON_THE_FLY_START
) + 1)
136 * (sal_uInt16(LANGUAGE_ON_THE_FLY_SUB_END
) - sal_uInt16(LANGUAGE_ON_THE_FLY_SUB_START
) + 1))
138 return LanguageType(0);
142 #if OSL_DEBUG_LEVEL > 0
143 static size_t nOnTheFlies
= 0;
145 SAL_INFO( "i18nlangtag", "getNextOnTheFlyLanguage: number " << nOnTheFlies
);
147 return nOnTheFlyLanguage
;
152 bool LanguageTag::isOnTheFlyID( LanguageType nLang
)
154 LanguageType nPri
= MsLangId::getPrimaryLanguage( nLang
);
155 LanguageType nSub
= MsLangId::getSubLanguage( nLang
);
157 LANGUAGE_ON_THE_FLY_START
<= nPri
&& nPri
<= LANGUAGE_ON_THE_FLY_END
&&
158 LANGUAGE_ON_THE_FLY_SUB_START
<= nSub
&& nSub
<= LANGUAGE_ON_THE_FLY_SUB_END
;
163 /** A reference holder for liblangtag data de/initialization, one static
164 instance. Currently implemented such that the first "ref" inits and dtor
165 (our library deinitialized) tears down.
167 class LiblangtagDataRef
171 ~LiblangtagDataRef();
178 OString maDataPath
; // path to liblangtag data, "|" if system
181 void setupDataPath();
183 static void teardown();
186 LiblangtagDataRef
& theDataRef()
188 static LiblangtagDataRef SINGLETON
;
193 LiblangtagDataRef::LiblangtagDataRef()
199 LiblangtagDataRef::~LiblangtagDataRef()
205 void LiblangtagDataRef::setup()
207 SAL_INFO( "i18nlangtag", "LiblangtagDataRef::setup: initializing database");
208 if (maDataPath
.isEmpty())
211 mbInitialized
= true;
214 void LiblangtagDataRef::teardown()
216 SAL_INFO( "i18nlangtag", "LiblangtagDataRef::teardown: finalizing database");
220 void LiblangtagDataRef::setupDataPath()
222 #if defined(ANDROID) || defined(EMSCRIPTEN)
223 maDataPath
= OString(lo_get_app_data_dir()) + "/share/liblangtag";
225 // maDataPath is assumed to be empty here.
226 OUString
aURL("$BRAND_BASE_DIR/" LIBO_SHARE_FOLDER
"/liblangtag");
227 rtl::Bootstrap::expandMacros(aURL
); //TODO: detect failure
229 // Check if data is in our own installation, else assume system
231 OUString aData
= aURL
+ "/language-subtag-registry.xml";
232 osl::DirectoryItem aDirItem
;
233 if (osl::DirectoryItem::get( aData
, aDirItem
) == osl::DirectoryItem::E_None
)
236 if (osl::FileBase::getSystemPathFromFileURL( aURL
, aPath
) == osl::FileBase::E_None
)
237 maDataPath
= OUStringToOString( aPath
, RTL_TEXTENCODING_UTF8
);
240 if (maDataPath
.isEmpty())
241 maDataPath
= "|"; // assume system
243 lt_db_set_datadir( maDataPath
.getStr());
247 /* TODO: we could transform known vendor and browser-specific variants to known
248 * BCP 47 if available. For now just remove them to not confuse any later
249 * treatments that check for empty variants. This vendor stuff was never
250 * supported anyway. */
251 static void handleVendorVariant( css::lang::Locale
& rLocale
)
253 if (!rLocale
.Variant
.isEmpty() && rLocale
.Language
!= I18NLANGTAG_QLT
)
254 rLocale
.Variant
.clear();
258 class LanguageTagImpl
262 explicit LanguageTagImpl( const LanguageTag
& rLanguageTag
);
263 explicit LanguageTagImpl( const LanguageTagImpl
& rLanguageTagImpl
);
265 LanguageTagImpl
& operator=( const LanguageTagImpl
& rLanguageTagImpl
);
269 friend class LanguageTag
;
278 mutable css::lang::Locale maLocale
;
279 mutable OUString maBcp47
;
280 mutable OUString maCachedLanguage
; ///< cache getLanguage()
281 mutable OUString maCachedScript
; ///< cache getScript()
282 mutable OUString maCachedCountry
; ///< cache getCountry()
283 mutable OUString maCachedVariants
; ///< cache getVariants()
284 mutable OUString maCachedGlibcString
; ///< cache getGlibcLocaleString()
285 mutable lt_tag_t
* mpImplLangtag
; ///< liblangtag pointer
286 mutable LanguageType mnLangID
;
287 mutable LanguageTag::ScriptType meScriptType
;
288 mutable Decision meIsValid
;
289 mutable Decision meIsIsoLocale
;
290 mutable Decision meIsIsoODF
;
291 mutable Decision meIsLiblangtagNeeded
; ///< whether processing with liblangtag needed
292 bool mbSystemLocale
: 1;
293 mutable bool mbInitializedBcp47
: 1;
294 mutable bool mbInitializedLocale
: 1;
295 mutable bool mbInitializedLangID
: 1;
296 mutable bool mbCachedLanguage
: 1;
297 mutable bool mbCachedScript
: 1;
298 mutable bool mbCachedCountry
: 1;
299 mutable bool mbCachedVariants
: 1;
300 mutable bool mbCachedGlibcString
: 1;
302 OUString
const & getBcp47() const;
303 OUString
const & getLanguage() const;
304 OUString
const & getScript() const;
305 OUString
const & getCountry() const;
306 OUString
getRegion() const;
307 OUString
const & getVariants() const;
308 bool hasScript() const;
309 OUString
const & getGlibcLocaleString() const;
311 void setScriptType(LanguageTag::ScriptType st
);
312 LanguageTag::ScriptType
getScriptType() const;
314 bool isIsoLocale() const;
315 bool isIsoODF() const;
316 bool isValidBcp47() const;
318 void convertLocaleToBcp47();
319 bool convertLocaleToLang( bool bAllowOnTheFlyID
);
320 void convertBcp47ToLocale();
321 void convertBcp47ToLang();
322 void convertLangToLocale();
323 void convertLangToBcp47();
325 /** @return whether BCP 47 language tag string was changed. */
328 /** Canonicalize if not yet done and synchronize initialized conversions.
330 @return whether BCP 47 language tag string was changed.
332 bool synCanonicalize();
334 OUString
getLanguageFromLangtag();
335 OUString
getScriptFromLangtag();
336 OUString
getRegionFromLangtag();
337 OUString
getVariantsFromLangtag();
339 /** Generates on-the-fly LangID and registers the maBcp47,mnLangID pair.
342 If not 0 and not LANGUAGE_DONTKNOW, suggest (!) to use that ID
343 instead of generating an on-the-fly ID. Implementation may
344 still generate an ID if the suggested ID is already used for
345 another language tag.
347 @return NULL if no ID could be obtained or registration failed.
349 LanguageTag::ImplPtr
registerOnTheFly( LanguageType nRegisterID
);
351 /** Obtain Language, Script, Country and Variants via simpleExtract() and
352 assign them to the cached variables if successful.
354 @return simpleExtract() successfully extracted and cached.
356 bool cacheSimpleLSCV();
370 /** Of a language tag of the form lll[-Ssss][-CC][-vvvvvvvv] extract the
373 Does not check case or content!
375 @return EXTRACTED_LSC if simple tag was detected (i.e. one that
376 would fulfill the isIsoODF() condition),
377 EXTRACTED_LV if a tag with variant was detected,
378 EXTRACTED_LR if a tag with 3-digit UN M.49 region code was detected
379 EXTRACTED_C_LOCALE if a 'C' locale was detected,
380 EXTRACTED_X if x-... privateuse tag was detected,
381 EXTRACTED_X_JOKER if "*" joker was detected,
382 EXTRACTED_KNOWN_BAD if a bad but known (to be remapped) tag was detected
385 static Extraction
simpleExtract( const OUString
& rBcp47
,
390 OUString
& rVariants
);
392 /** Convert Locale to BCP 47 string without resolving system and creating
393 temporary LanguageTag instances. */
394 static OUString
convertToBcp47( const css::lang::Locale
& rLocale
);
399 LanguageTagImpl::LanguageTagImpl( const LanguageTag
& rLanguageTag
)
401 maLocale( rLanguageTag
.maLocale
),
402 maBcp47( rLanguageTag
.maBcp47
),
403 mpImplLangtag( nullptr),
404 mnLangID( rLanguageTag
.mnLangID
),
405 meScriptType( LanguageTag::ScriptType::UNKNOWN
),
406 meIsValid( DECISION_DONTKNOW
),
407 meIsIsoLocale( DECISION_DONTKNOW
),
408 meIsIsoODF( DECISION_DONTKNOW
),
409 meIsLiblangtagNeeded( DECISION_DONTKNOW
),
410 mbSystemLocale( rLanguageTag
.mbSystemLocale
),
411 mbInitializedBcp47( rLanguageTag
.mbInitializedBcp47
),
412 mbInitializedLocale( rLanguageTag
.mbInitializedLocale
),
413 mbInitializedLangID( rLanguageTag
.mbInitializedLangID
),
414 mbCachedLanguage( false),
415 mbCachedScript( false),
416 mbCachedCountry( false),
417 mbCachedVariants( false),
418 mbCachedGlibcString( false)
423 LanguageTagImpl::LanguageTagImpl( const LanguageTagImpl
& rLanguageTagImpl
)
425 maLocale( rLanguageTagImpl
.maLocale
),
426 maBcp47( rLanguageTagImpl
.maBcp47
),
427 maCachedLanguage( rLanguageTagImpl
.maCachedLanguage
),
428 maCachedScript( rLanguageTagImpl
.maCachedScript
),
429 maCachedCountry( rLanguageTagImpl
.maCachedCountry
),
430 maCachedVariants( rLanguageTagImpl
.maCachedVariants
),
431 maCachedGlibcString( rLanguageTagImpl
.maCachedGlibcString
),
432 mpImplLangtag( rLanguageTagImpl
.mpImplLangtag
?
433 lt_tag_copy( rLanguageTagImpl
.mpImplLangtag
) : nullptr),
434 mnLangID( rLanguageTagImpl
.mnLangID
),
435 meScriptType( rLanguageTagImpl
.meScriptType
),
436 meIsValid( rLanguageTagImpl
.meIsValid
),
437 meIsIsoLocale( rLanguageTagImpl
.meIsIsoLocale
),
438 meIsIsoODF( rLanguageTagImpl
.meIsIsoODF
),
439 meIsLiblangtagNeeded( rLanguageTagImpl
.meIsLiblangtagNeeded
),
440 mbSystemLocale( rLanguageTagImpl
.mbSystemLocale
),
441 mbInitializedBcp47( rLanguageTagImpl
.mbInitializedBcp47
),
442 mbInitializedLocale( rLanguageTagImpl
.mbInitializedLocale
),
443 mbInitializedLangID( rLanguageTagImpl
.mbInitializedLangID
),
444 mbCachedLanguage( rLanguageTagImpl
.mbCachedLanguage
),
445 mbCachedScript( rLanguageTagImpl
.mbCachedScript
),
446 mbCachedCountry( rLanguageTagImpl
.mbCachedCountry
),
447 mbCachedVariants( rLanguageTagImpl
.mbCachedVariants
),
448 mbCachedGlibcString( rLanguageTagImpl
.mbCachedGlibcString
)
455 LanguageTagImpl
& LanguageTagImpl::operator=( const LanguageTagImpl
& rLanguageTagImpl
)
457 if (&rLanguageTagImpl
== this)
460 maLocale
= rLanguageTagImpl
.maLocale
;
461 maBcp47
= rLanguageTagImpl
.maBcp47
;
462 maCachedLanguage
= rLanguageTagImpl
.maCachedLanguage
;
463 maCachedScript
= rLanguageTagImpl
.maCachedScript
;
464 maCachedCountry
= rLanguageTagImpl
.maCachedCountry
;
465 maCachedVariants
= rLanguageTagImpl
.maCachedVariants
;
466 maCachedGlibcString
= rLanguageTagImpl
.maCachedGlibcString
;
467 lt_tag_t
* oldTag
= mpImplLangtag
;
468 mpImplLangtag
= rLanguageTagImpl
.mpImplLangtag
?
469 lt_tag_copy( rLanguageTagImpl
.mpImplLangtag
) : nullptr;
470 lt_tag_unref(oldTag
);
471 mnLangID
= rLanguageTagImpl
.mnLangID
;
472 meScriptType
= rLanguageTagImpl
.meScriptType
;
473 meIsValid
= rLanguageTagImpl
.meIsValid
;
474 meIsIsoLocale
= rLanguageTagImpl
.meIsIsoLocale
;
475 meIsIsoODF
= rLanguageTagImpl
.meIsIsoODF
;
476 meIsLiblangtagNeeded
= rLanguageTagImpl
.meIsLiblangtagNeeded
;
477 mbSystemLocale
= rLanguageTagImpl
.mbSystemLocale
;
478 mbInitializedBcp47
= rLanguageTagImpl
.mbInitializedBcp47
;
479 mbInitializedLocale
= rLanguageTagImpl
.mbInitializedLocale
;
480 mbInitializedLangID
= rLanguageTagImpl
.mbInitializedLangID
;
481 mbCachedLanguage
= rLanguageTagImpl
.mbCachedLanguage
;
482 mbCachedScript
= rLanguageTagImpl
.mbCachedScript
;
483 mbCachedCountry
= rLanguageTagImpl
.mbCachedCountry
;
484 mbCachedVariants
= rLanguageTagImpl
.mbCachedVariants
;
485 mbCachedGlibcString
= rLanguageTagImpl
.mbCachedGlibcString
;
486 if (mpImplLangtag
&& !oldTag
)
492 LanguageTagImpl::~LanguageTagImpl()
496 lt_tag_unref( mpImplLangtag
);
501 LanguageTag::LanguageTag( const OUString
& rBcp47LanguageTag
, bool bCanonicalize
)
503 maBcp47( rBcp47LanguageTag
),
504 mnLangID( LANGUAGE_DONTKNOW
),
505 mbSystemLocale( rBcp47LanguageTag
.isEmpty()),
506 mbInitializedBcp47( !mbSystemLocale
),
507 mbInitializedLocale( false),
508 mbInitializedLangID( false),
513 getImpl()->canonicalize();
514 // Registration itself may already have canonicalized, so do an
515 // unconditional sync.
522 LanguageTag::LanguageTag( const css::lang::Locale
& rLocale
)
525 mnLangID( LANGUAGE_DONTKNOW
),
526 mbSystemLocale( rLocale
.Language
.isEmpty()),
527 mbInitializedBcp47( false),
528 mbInitializedLocale( false), // we do not know which mess we got passed in
529 mbInitializedLangID( false),
532 handleVendorVariant( maLocale
);
536 LanguageTag::LanguageTag( LanguageType nLanguage
)
538 mnLangID( nLanguage
),
539 mbSystemLocale( nLanguage
== LANGUAGE_SYSTEM
),
540 mbInitializedBcp47( false),
541 mbInitializedLocale( false),
542 mbInitializedLangID( !mbSystemLocale
),
548 LanguageTag::LanguageTag( const OUString
& rBcp47
, const OUString
& rLanguage
,
549 std::u16string_view rScript
, const OUString
& rCountry
)
552 mnLangID( LANGUAGE_DONTKNOW
),
553 mbSystemLocale( rBcp47
.isEmpty() && rLanguage
.isEmpty()),
554 mbInitializedBcp47( !rBcp47
.isEmpty()),
555 mbInitializedLocale( false),
556 mbInitializedLangID( false),
559 if (mbSystemLocale
|| mbInitializedBcp47
)
564 maBcp47
= rLanguage
+ "-" + rCountry
;
565 mbInitializedBcp47
= true;
566 maLocale
.Language
= rLanguage
;
567 maLocale
.Country
= rCountry
;
568 mbInitializedLocale
= true;
572 if (rCountry
.isEmpty())
573 maBcp47
= rLanguage
+ "-" + rScript
;
575 maBcp47
= rLanguage
+ "-" + rScript
+ "-" + rCountry
;
576 mbInitializedBcp47
= true;
577 maLocale
.Language
= I18NLANGTAG_QLT
;
578 maLocale
.Country
= rCountry
;
579 maLocale
.Variant
= maBcp47
;
580 mbInitializedLocale
= true;
585 LanguageTag::LanguageTag( const rtl_Locale
& rLocale
)
587 maLocale( rLocale
.Language
, rLocale
.Country
, rLocale
.Variant
),
588 mnLangID( LANGUAGE_DONTKNOW
),
589 mbSystemLocale( maLocale
.Language
.isEmpty()),
590 mbInitializedBcp47( false),
591 mbInitializedLocale( !mbSystemLocale
),
592 mbInitializedLangID( false),
595 convertFromRtlLocale();
598 LanguageTag::~LanguageTag() {}
600 LanguageTag::ImplPtr
LanguageTagImpl::registerOnTheFly( LanguageType nRegisterID
)
602 LanguageTag::ImplPtr pImpl
;
604 if (!mbInitializedBcp47
)
606 if (mbInitializedLocale
)
608 maBcp47
= LanguageTagImpl::convertToBcp47( maLocale
);
609 mbInitializedBcp47
= !maBcp47
.isEmpty();
612 if (maBcp47
.isEmpty())
614 SAL_WARN( "i18nlangtag", "LanguageTagImpl::registerOnTheFly: no Bcp47 string, no registering");
618 std::unique_lock
aGuard( theMutex());
620 MapBcp47
& rMapBcp47
= theMapBcp47();
621 MapBcp47::const_iterator
it( rMapBcp47
.find( maBcp47
));
622 bool bOtherImpl
= false;
623 if (it
!= rMapBcp47
.end())
625 SAL_INFO( "i18nlangtag", "LanguageTag::registerOnTheFly: found impl for '" << maBcp47
<< "'");
626 pImpl
= (*it
).second
;
627 if (pImpl
.get() != this)
629 // Could happen for example if during registerImpl() the tag was
630 // changed via canonicalize() and the result was already present in
631 // the map before, for example 'bn-Beng' => 'bn'. This specific
632 // case is now taken care of in registerImpl() and doesn't reach
633 // here. However, use the already existing impl if it matches.
634 SAL_WARN( "i18nlangtag", "LanguageTag::registerOnTheFly: using other impl for this '" << maBcp47
<< "'");
635 *this = *pImpl
; // ensure consistency
641 SAL_INFO( "i18nlangtag", "LanguageTag::registerOnTheFly: new impl for '" << maBcp47
<< "'");
642 pImpl
= std::make_shared
<LanguageTagImpl
>( *this);
643 rMapBcp47
.insert( ::std::make_pair( maBcp47
, pImpl
));
646 if (!bOtherImpl
|| !pImpl
->mbInitializedLangID
)
648 if (nRegisterID
== LanguageType(0) || nRegisterID
== LANGUAGE_DONTKNOW
)
649 nRegisterID
= getNextOnTheFlyLanguage();
652 // Accept a suggested ID only if it is not mapped yet to something
653 // different, otherwise we would end up with ambiguous assignments
654 // of different language tags, for example for the same primary
655 // LangID with "no", "nb" and "nn".
656 const MapLangID
& rMapLangID
= theMapLangID();
657 MapLangID::const_iterator
itID( rMapLangID
.find( nRegisterID
));
658 if (itID
!= rMapLangID
.end())
660 if ((*itID
).second
->maBcp47
!= maBcp47
)
662 SAL_INFO( "i18nlangtag", "LanguageTag::registerOnTheFly: not using suggested 0x"
663 << ::std::hex
<< nRegisterID
<< " for '" << maBcp47
<< "' have '"
664 << (*itID
).second
->maBcp47
<< "'");
665 nRegisterID
= getNextOnTheFlyLanguage();
669 SAL_WARN( "i18nlangtag", "LanguageTag::registerOnTheFly: suggested 0x"
670 << ::std::hex
<< nRegisterID
<< " for '" << maBcp47
<< "' already registered");
676 // out of IDs, nothing to register
679 pImpl
->mnLangID
= nRegisterID
;
680 pImpl
->mbInitializedLangID
= true;
681 if (pImpl
.get() != this)
683 mnLangID
= nRegisterID
;
684 mbInitializedLangID
= true;
688 ::std::pair
< MapLangID::const_iterator
, bool > res(
689 theMapLangID().insert( ::std::make_pair( pImpl
->mnLangID
, pImpl
)));
692 SAL_INFO( "i18nlangtag", "LanguageTag::registerOnTheFly: cross-inserted 0x"
693 << ::std::hex
<< pImpl
->mnLangID
<< " for '" << maBcp47
<< "'");
697 SAL_WARN( "i18nlangtag", "LanguageTag::registerOnTheFly: not cross-inserted 0x"
698 << ::std::hex
<< pImpl
->mnLangID
<< " for '" << maBcp47
<< "' have '"
699 << (*res
.first
).second
->maBcp47
<< "'");
706 LanguageTag::ScriptType
LanguageTag::getOnTheFlyScriptType( LanguageType nRegisterID
)
708 const MapLangID
& rMapLangID
= theMapLangID();
709 MapLangID::const_iterator
itID( rMapLangID
.find( nRegisterID
));
710 if (itID
!= rMapLangID
.end())
711 return (*itID
).second
->getScriptType();
713 return ScriptType::UNKNOWN
;
718 void LanguageTag::setConfiguredSystemLanguage( LanguageType nLang
)
720 if (nLang
== LANGUAGE_DONTKNOW
|| nLang
== LANGUAGE_SYSTEM
)
722 SAL_WARN( "i18nlangtag",
723 "LanguageTag::setConfiguredSystemLanguage: refusing to set unresolved system locale 0x" <<
724 ::std::hex
<< nLang
);
727 SAL_INFO( "i18nlangtag", "LanguageTag::setConfiguredSystemLanguage: setting to 0x" << ::std::hex
<< nLang
);
728 MsLangId::LanguageTagAccess::setConfiguredSystemLanguage( nLang
);
729 // Reset system locale to none and let registerImpl() do the rest to
730 // initialize a new one.
731 theSystemLocale().reset();
732 LanguageTag
aLanguageTag( LANGUAGE_SYSTEM
);
733 aLanguageTag
.registerImpl();
736 static bool lt_tag_parse_disabled
= false;
739 void LanguageTag::disable_lt_tag_parse()
741 lt_tag_parse_disabled
= true;
744 static bool lcl_isKnownOnTheFlyID( LanguageType nLang
)
746 return nLang
!= LANGUAGE_DONTKNOW
&& nLang
!= LANGUAGE_SYSTEM
&&
747 (LanguageTag::isOnTheFlyID( nLang
) || (nLang
== MsLangId::getPrimaryLanguage( nLang
)));
751 LanguageTag::ImplPtr
LanguageTag::registerImpl() const
753 // XXX NOTE: Do not use non-static LanguageTag::convert...() member methods
754 // here as they access getImpl() and syncFromImpl() and would lead to
755 // recursion. Also do not use the static LanguageTag::convertTo...()
756 // methods as they may create temporary LanguageTag instances. Only
757 // LanguageTagImpl::convertToBcp47(Locale) is ok.
761 #if OSL_DEBUG_LEVEL > 0
762 static size_t nCalls
= 0;
764 SAL_INFO( "i18nlangtag", "LanguageTag::registerImpl: " << nCalls
<< " calls");
767 // Do not register unresolved system locale, also force LangID if system
768 // and take the system locale shortcut if possible.
771 pImpl
= theSystemLocale();
774 #if OSL_DEBUG_LEVEL > 0
775 static size_t nCallsSystem
= 0;
777 SAL_INFO( "i18nlangtag", "LanguageTag::registerImpl: " << nCallsSystem
<< " system calls");
781 if (!mbInitializedLangID
)
783 mnLangID
= MsLangId::getRealLanguage( LANGUAGE_SYSTEM
);
784 mbInitializedLangID
= (mnLangID
!= LANGUAGE_SYSTEM
);
785 SAL_WARN_IF( !mbInitializedLangID
, "i18nlangtag", "LanguageTag::registerImpl: can't resolve system!");
789 if (mbInitializedLangID
)
791 if (mnLangID
== LANGUAGE_DONTKNOW
)
793 static LanguageTag::ImplPtr theDontKnow
;
794 // Heavy usage of LANGUAGE_DONTKNOW, make it an own Impl for all the
795 // conversion attempts. At the same time provide a central breakpoint
796 // to inspect such places.
798 theDontKnow
= std::make_shared
<LanguageTagImpl
>( *this);
800 #if OSL_DEBUG_LEVEL > 0
801 static size_t nCallsDontKnow
= 0;
803 SAL_INFO( "i18nlangtag", "LanguageTag::registerImpl: " << nCallsDontKnow
<< " DontKnow calls");
809 // A great share are calls for a system equal locale.
810 pImpl
= theSystemLocale();
811 if (pImpl
&& pImpl
->mnLangID
== mnLangID
)
813 #if OSL_DEBUG_LEVEL > 0
814 static size_t nCallsSystemEqual
= 0;
816 SAL_INFO( "i18nlangtag", "LanguageTag::registerImpl: " << nCallsSystemEqual
817 << " system equal LangID calls");
824 // Force Bcp47 if not LangID.
825 if (!mbInitializedLangID
&& !mbInitializedBcp47
)
827 // The one central point to set mbInitializedLocale=true if a
828 // LanguageTag was initialized with a Locale. We will now convert and
829 // possibly later resolve it.
830 if (!mbInitializedLocale
&& (mbSystemLocale
|| !maLocale
.Language
.isEmpty()))
831 mbInitializedLocale
= true;
832 SAL_WARN_IF( !mbInitializedLocale
, "i18nlangtag", "LanguageTag::registerImpl: still not mbInitializedLocale");
834 maBcp47
= LanguageTagImpl::convertToBcp47( maLocale
);
835 mbInitializedBcp47
= !maBcp47
.isEmpty();
838 if (mbInitializedBcp47
)
840 // A great share are calls for a system equal locale.
841 pImpl
= theSystemLocale();
842 if (pImpl
&& pImpl
->maBcp47
== maBcp47
)
844 #if OSL_DEBUG_LEVEL > 0
845 static size_t nCallsSystemEqual
= 0;
847 SAL_INFO( "i18nlangtag", "LanguageTag::registerImpl: " << nCallsSystemEqual
<< " system equal BCP47 calls");
853 #if OSL_DEBUG_LEVEL > 0
854 static size_t nCallsNonSystem
= 0;
856 SAL_INFO( "i18nlangtag", "LanguageTag::registerImpl: " << nCallsNonSystem
<< " non-system calls");
859 std::unique_lock
aGuard( theMutex());
861 #if OSL_DEBUG_LEVEL > 0
862 static long nRunning
= 0;
863 // Entering twice here is ok, which is needed for fallback init in
864 // getKnowns() in canonicalize() via pImpl->convertBcp47ToLocale() below,
865 // everything else is suspicious.
866 SAL_WARN_IF( nRunning
> 1, "i18nlangtag", "LanguageTag::registerImpl: re-entered for '"
867 << maBcp47
<< "' 0x" << ::std::hex
<< mnLangID
);
868 struct Runner
{ Runner() { ++nRunning
; } ~Runner() { --nRunning
; } } aRunner
;
871 // Prefer LangID map as find+insert needs less comparison work.
872 if (mbInitializedLangID
)
874 MapLangID
& rMap
= theMapLangID();
875 MapLangID::const_iterator
it( rMap
.find( mnLangID
));
876 if (it
!= rMap
.end())
878 SAL_INFO( "i18nlangtag", "LanguageTag::registerImpl: found impl for 0x" << ::std::hex
<< mnLangID
);
879 pImpl
= (*it
).second
;
883 SAL_INFO( "i18nlangtag", "LanguageTag::registerImpl: new impl for 0x" << ::std::hex
<< mnLangID
);
884 pImpl
= std::make_shared
<LanguageTagImpl
>( *this);
885 rMap
.insert( ::std::make_pair( mnLangID
, pImpl
));
887 if (!pImpl
->mbInitializedLocale
)
888 pImpl
->convertLangToLocale();
889 LanguageType nLang
= MsLangId::Conversion::convertLocaleToLanguage( pImpl
->maLocale
);
890 // If round-trip is identical cross-insert to Bcp47 map.
891 if (nLang
== pImpl
->mnLangID
)
893 if (!pImpl
->mbInitializedBcp47
)
894 pImpl
->convertLocaleToBcp47();
895 ::std::pair
< MapBcp47::const_iterator
, bool > res(
896 theMapBcp47().insert( ::std::make_pair( pImpl
->maBcp47
, pImpl
)));
899 SAL_INFO( "i18nlangtag", "LanguageTag::registerImpl: cross-inserted '" << pImpl
->maBcp47
<< "' for 0x" << ::std::hex
<< mnLangID
);
903 SAL_INFO( "i18nlangtag", "LanguageTag::registerImpl: not cross-inserted '" << pImpl
->maBcp47
<< "' for 0x" << ::std::hex
<< mnLangID
<< " have 0x"
904 << ::std::hex
<< (*res
.first
).second
->mnLangID
);
909 if (!pImpl
->mbInitializedBcp47
)
910 pImpl
->convertLocaleToBcp47();
911 SAL_INFO( "i18nlangtag", "LanguageTag::registerImpl: not cross-inserted '" << pImpl
->maBcp47
<< "' for 0x" << ::std::hex
<< mnLangID
<< " round-trip to 0x" << ::std::hex
<< nLang
);
915 else if (!maBcp47
.isEmpty())
917 MapBcp47
& rMap
= theMapBcp47();
918 MapBcp47::const_iterator
it( rMap
.find( maBcp47
));
919 if (it
!= rMap
.end())
921 SAL_INFO( "i18nlangtag", "LanguageTag::registerImpl: found impl for '" << maBcp47
<< "'");
922 pImpl
= (*it
).second
;
926 SAL_INFO( "i18nlangtag", "LanguageTag::registerImpl: new impl for '" << maBcp47
<< "'");
927 pImpl
= std::make_shared
<LanguageTagImpl
>( *this);
928 ::std::pair
< MapBcp47::iterator
, bool > insOrig( rMap
.insert( ::std::make_pair( maBcp47
, pImpl
)));
929 // If changed after canonicalize() also add the resulting tag to
931 if (pImpl
->synCanonicalize())
933 SAL_INFO( "i18nlangtag", "LanguageTag::registerImpl: canonicalized to '" << pImpl
->maBcp47
<< "'");
934 ::std::pair
< MapBcp47::const_iterator
, bool > insCanon(
935 rMap
.insert( ::std::make_pair( pImpl
->maBcp47
, pImpl
)));
936 SAL_INFO( "i18nlangtag", "LanguageTag::registerImpl: " << (insCanon
.second
? "" : "not ")
937 << "inserted '" << pImpl
->maBcp47
<< "'");
938 // If the canonicalized tag already existed (was not inserted)
939 // and impls are different, make this impl that impl and skip
940 // the rest if that LangID is present as well. The existing
941 // entry may or may not be different, it may even be strictly
942 // identical to this if it differs only in case (e.g. ko-kr =>
943 // ko-KR) which was corrected in canonicalize() hence also in
944 // the map entry but comparison is case insensitive and found
946 if (!insCanon
.second
&& (*insCanon
.first
).second
!= pImpl
)
948 (*insOrig
.first
).second
= pImpl
= (*insCanon
.first
).second
;
949 SAL_INFO( "i18nlangtag", "LanguageTag::registerImpl: share impl with 0x"
950 << ::std::hex
<< pImpl
->mnLangID
);
953 if (!pImpl
->mbInitializedLangID
)
955 // Try round-trip Bcp47->Locale->LangID->Locale->Bcp47.
956 if (!pImpl
->mbInitializedLocale
)
957 pImpl
->convertBcp47ToLocale();
958 if (!pImpl
->mbInitializedLangID
)
959 pImpl
->convertLocaleToLang( true);
960 // Unconditionally insert (round-trip is possible) for
961 // on-the-fly IDs and (generated or not) suggested IDs.
962 bool bInsert
= lcl_isKnownOnTheFlyID( pImpl
->mnLangID
);
966 if (pImpl
->mnLangID
!= LANGUAGE_DONTKNOW
)
968 // May have involved canonicalize(), so compare with
969 // pImpl->maBcp47 instead of maBcp47!
970 aBcp47
= LanguageTagImpl::convertToBcp47(
971 MsLangId::Conversion::convertLanguageToLocale( pImpl
->mnLangID
, true));
972 bInsert
= (aBcp47
== pImpl
->maBcp47
);
975 // If round-trip is identical cross-insert to Bcp47 map.
978 ::std::pair
< MapLangID::const_iterator
, bool > res(
979 theMapLangID().insert( ::std::make_pair( pImpl
->mnLangID
, pImpl
)));
982 SAL_INFO( "i18nlangtag", "LanguageTag::registerImpl: cross-inserted 0x"
983 << ::std::hex
<< pImpl
->mnLangID
<< " for '" << maBcp47
<< "'");
987 SAL_INFO( "i18nlangtag", "LanguageTag::registerImpl: not cross-inserted 0x"
988 << ::std::hex
<< pImpl
->mnLangID
<< " for '" << maBcp47
<< "' have '"
989 << (*res
.first
).second
->maBcp47
<< "'");
994 SAL_INFO( "i18nlangtag", "LanguageTag::registerImpl: not cross-inserted 0x"
995 << ::std::hex
<< pImpl
->mnLangID
<< " for '" << maBcp47
<< "' round-trip to '"
1003 SAL_WARN( "i18nlangtag", "LanguageTag::registerImpl: can't register for 0x" << ::std::hex
<< mnLangID
);
1004 pImpl
= std::make_shared
<LanguageTagImpl
>( *this);
1007 // If we reach here for mbSystemLocale we didn't have theSystemLocale
1008 // above, so add it.
1009 if (mbSystemLocale
&& mbInitializedLangID
)
1011 theSystemLocale() = pImpl
;
1012 SAL_INFO( "i18nlangtag", "LanguageTag::registerImpl: added system locale 0x"
1013 << ::std::hex
<< pImpl
->mnLangID
<< " '" << pImpl
->maBcp47
<< "'");
1020 LanguageTagImpl
const * LanguageTag::getImpl() const
1024 mpImpl
= registerImpl();
1025 syncVarsFromRawImpl();
1027 return mpImpl
.get();
1030 LanguageTagImpl
* LanguageTag::getImpl()
1034 mpImpl
= registerImpl();
1035 syncVarsFromRawImpl();
1037 return mpImpl
.get();
1040 void LanguageTag::resetVars()
1043 maLocale
= lang::Locale();
1045 mnLangID
= LANGUAGE_SYSTEM
;
1046 mbSystemLocale
= true;
1047 mbInitializedBcp47
= false;
1048 mbInitializedLocale
= false;
1049 mbInitializedLangID
= false;
1050 mbIsFallback
= false;
1054 LanguageTag
& LanguageTag::reset( const OUString
& rBcp47LanguageTag
)
1057 maBcp47
= rBcp47LanguageTag
;
1058 mbSystemLocale
= rBcp47LanguageTag
.isEmpty();
1059 mbInitializedBcp47
= !mbSystemLocale
;
1065 LanguageTag
& LanguageTag::reset( const css::lang::Locale
& rLocale
)
1069 mbSystemLocale
= rLocale
.Language
.isEmpty();
1070 mbInitializedLocale
= !mbSystemLocale
;
1071 handleVendorVariant( maLocale
);
1076 LanguageTag
& LanguageTag::reset( LanguageType nLanguage
)
1079 mnLangID
= nLanguage
;
1080 mbSystemLocale
= nLanguage
== LANGUAGE_SYSTEM
;
1081 mbInitializedLangID
= !mbSystemLocale
;
1086 bool LanguageTagImpl::canonicalize()
1093 explicit dumper( lt_tag_t
** pp
) : mpp( *pp
? NULL
: pp
) {}
1094 ~dumper() { if (mpp
&& *mpp
) lt_tag_dump( *mpp
); }
1096 dumper
aDumper( &mpImplLangtag
);
1099 bool bChanged
= false;
1101 // Side effect: have maBcp47 in any case, resolved system.
1102 // Some methods calling canonicalize() (or not calling it due to
1103 // meIsLiblangtagNeeded==DECISION_NO) rely on this! Hence do not set
1104 // meIsLiblangtagNeeded anywhere else than hereafter.
1107 // The simple cases and known locales don't need liblangtag processing,
1108 // which also avoids loading liblangtag data on startup.
1109 if (meIsLiblangtagNeeded
== DECISION_DONTKNOW
)
1111 bool bTemporaryLocale
= false;
1112 bool bTemporaryLangID
= false;
1113 if (!mbInitializedLocale
&& !mbInitializedLangID
)
1117 mnLangID
= MsLangId::getRealLanguage( LANGUAGE_SYSTEM
);
1118 mbInitializedLangID
= true;
1122 // Now this is getting funny... we only have some BCP47 string
1123 // and want to determine if parsing it would be possible
1124 // without using liblangtag just to see if it is a simple known
1125 // locale or could fall back to one.
1126 OUString aLanguage
, aScript
, aCountry
, aRegion
, aVariants
;
1127 Extraction eExt
= simpleExtract( maBcp47
, aLanguage
, aScript
, aCountry
, aRegion
, aVariants
);
1128 if (eExt
!= EXTRACTED_NONE
)
1130 if (eExt
== EXTRACTED_LSC
|| eExt
== EXTRACTED_LV
|| eExt
== EXTRACTED_LR
)
1132 // Rebuild bcp47 with proper casing of tags.
1133 OUStringBuffer
aBuf( aLanguage
.getLength() + 1 + aScript
.getLength() +
1134 1 + aCountry
.getLength() + 1 + aRegion
.getLength() + 1 + aVariants
.getLength());
1135 aBuf
.append( aLanguage
);
1136 if (!aScript
.isEmpty())
1137 aBuf
.append("-" + aScript
);
1138 if (!aCountry
.isEmpty())
1139 aBuf
.append("-" + aCountry
);
1140 if (!aRegion
.isEmpty())
1141 aBuf
.append("-" + aRegion
);
1142 if (!aVariants
.isEmpty())
1143 aBuf
.append("-" + aVariants
);
1144 OUString
aStr( aBuf
.makeStringAndClear());
1146 if (maBcp47
!= aStr
)
1152 if (eExt
== EXTRACTED_LSC
&& aScript
.isEmpty())
1154 maLocale
.Language
= aLanguage
;
1155 maLocale
.Country
= aCountry
;
1157 else if (eExt
== EXTRACTED_C_LOCALE
)
1159 maLocale
.Language
= aLanguage
;
1160 maLocale
.Country
= aCountry
;
1164 maLocale
.Language
= I18NLANGTAG_QLT
;
1165 maLocale
.Country
= aCountry
;
1166 maLocale
.Variant
= maBcp47
;
1168 bTemporaryLocale
= mbInitializedLocale
= true;
1172 if (mbInitializedLangID
&& !mbInitializedLocale
)
1174 // Do not call getLocale() here because that prefers
1175 // convertBcp47ToLocale() which would end up in recursion via
1178 // Prepare to verify that we have a known locale, not just an
1179 // arbitrary MS-LangID.
1180 convertLangToLocale();
1182 if (mbInitializedLocale
)
1184 if (!mbInitializedLangID
)
1186 if (convertLocaleToLang( false))
1188 if (bTemporaryLocale
|| mnLangID
== LANGUAGE_DONTKNOW
)
1189 bTemporaryLangID
= true;
1191 if (mnLangID
!= LANGUAGE_DONTKNOW
&& mnLangID
!= LANGUAGE_SYSTEM
)
1192 meIsLiblangtagNeeded
= DECISION_NO
; // known locale
1195 const KnownTagSet
& rKnowns
= getKnowns();
1196 if (rKnowns
.find( maBcp47
) != rKnowns
.end())
1197 meIsLiblangtagNeeded
= DECISION_NO
; // known fallback
1199 // We may have an internal override "canonicalization".
1200 lang::Locale
aNew( MsLangId::Conversion::getOverride( maLocale
));
1201 if (!aNew
.Language
.isEmpty() &&
1202 (aNew
.Language
!= maLocale
.Language
||
1203 aNew
.Country
!= maLocale
.Country
||
1204 aNew
.Variant
!= maLocale
.Variant
))
1206 maBcp47
= LanguageTagImpl::convertToBcp47( aNew
);
1208 meIsIsoLocale
= DECISION_DONTKNOW
;
1209 meIsIsoODF
= DECISION_DONTKNOW
;
1210 meIsLiblangtagNeeded
= DECISION_NO
; // known locale
1213 if (bTemporaryLocale
)
1215 mbInitializedLocale
= false;
1216 maLocale
= lang::Locale();
1218 if (bTemporaryLangID
)
1220 mbInitializedLangID
= false;
1221 mnLangID
= LANGUAGE_DONTKNOW
;
1224 if (meIsLiblangtagNeeded
== DECISION_NO
)
1226 meIsValid
= DECISION_YES
; // really, known must be valid ...
1227 return bChanged
; // that's it
1230 meIsLiblangtagNeeded
= DECISION_YES
;
1231 SAL_INFO( "i18nlangtag", "LanguageTagImpl::canonicalize: using liblangtag for '" << maBcp47
<< "'");
1235 theDataRef().init();
1236 mpImplLangtag
= lt_tag_new();
1241 if (!lt_tag_parse_disabled
&& lt_tag_parse(mpImplLangtag
, OUStringToOString(maBcp47
, RTL_TEXTENCODING_UTF8
).getStr(), &aError
.p
))
1245 SAL_WARN("i18nlangtag", "LanguageTagImpl::canonicalize: could not parse '" << maBcp47
<< "'");
1249 char* pTag
= lt_tag_canonicalize(mpImplLangtag
, &aError
.p
);
1250 SAL_WARN_IF(!pTag
, "i18nlangtag", "LanguageTagImpl::canonicalize: could not canonicalize '" << maBcp47
<< "'");
1253 OUString
aNew(OUString::createFromAscii(pTag
));
1254 // Make the lt_tag_t follow the new string if different, which
1255 // removes default script and such.
1256 if (maBcp47
!= aNew
)
1260 meIsIsoLocale
= DECISION_DONTKNOW
;
1261 meIsIsoODF
= DECISION_DONTKNOW
;
1262 if (!lt_tag_parse(mpImplLangtag
, pTag
, &aError
.p
))
1264 SAL_WARN("i18nlangtag", "LanguageTagImpl::canonicalize: could not reparse '"
1267 meIsValid
= DECISION_NO
;
1272 meIsValid
= DECISION_YES
;
1279 SAL_INFO( "i18nlangtag", "LanguageTagImpl::canonicalize: could not parse '" << maBcp47
<< "'");
1281 meIsValid
= DECISION_NO
;
1286 bool LanguageTagImpl::synCanonicalize()
1288 bool bChanged
= false;
1289 if (meIsLiblangtagNeeded
!= DECISION_NO
&& !mpImplLangtag
)
1291 bChanged
= canonicalize();
1294 if (mbInitializedLocale
)
1295 convertBcp47ToLocale();
1296 if (mbInitializedLangID
)
1297 convertBcp47ToLang();
1304 void LanguageTag::syncFromImpl()
1306 LanguageTagImpl
* pImpl
= getImpl();
1307 bool bRegister
= ((mbInitializedBcp47
&& maBcp47
!= pImpl
->maBcp47
) ||
1308 (mbInitializedLangID
&& mnLangID
!= pImpl
->mnLangID
));
1309 SAL_INFO_IF( bRegister
, "i18nlangtag",
1310 "LanguageTag::syncFromImpl: re-registering, '" << pImpl
->maBcp47
<< "' vs '" << maBcp47
<<
1311 " and 0x" << ::std::hex
<< pImpl
->mnLangID
<< " vs 0x" << ::std::hex
<< mnLangID
);
1312 syncVarsFromRawImpl();
1314 mpImpl
= registerImpl();
1318 void LanguageTag::syncVarsFromImpl() const
1321 getImpl(); // with side effect syncVarsFromRawImpl()
1323 syncVarsFromRawImpl();
1327 void LanguageTag::syncVarsFromRawImpl() const
1329 // Do not use getImpl() here.
1330 LanguageTagImpl
* pImpl
= mpImpl
.get();
1334 // Obviously only mutable variables.
1335 mbInitializedBcp47
= pImpl
->mbInitializedBcp47
;
1336 maBcp47
= pImpl
->maBcp47
;
1337 mbInitializedLocale
= pImpl
->mbInitializedLocale
;
1338 maLocale
= pImpl
->maLocale
;
1339 mbInitializedLangID
= pImpl
->mbInitializedLangID
;
1340 mnLangID
= pImpl
->mnLangID
;
1344 bool LanguageTag::synCanonicalize()
1346 bool bChanged
= getImpl()->synCanonicalize();
1353 void LanguageTagImpl::convertLocaleToBcp47()
1355 if (mbSystemLocale
&& !mbInitializedLocale
)
1356 convertLangToLocale();
1358 if (maLocale
.Language
.isEmpty())
1360 // Do not call LanguageTag::convertToBcp47(Locale) that for an empty
1361 // locale via LanguageTag::convertToBcp47(LanguageType) and
1362 // LanguageTag::convertToLocale(LanguageType) would instantiate another
1364 maLocale
= MsLangId::Conversion::convertLanguageToLocale( LANGUAGE_SYSTEM
, false);
1366 if (maLocale
.Language
.isEmpty())
1368 maBcp47
.clear(); // bad luck
1370 else if (maLocale
.Language
== I18NLANGTAG_QLT
)
1372 maBcp47
= maLocale
.Variant
;
1373 meIsIsoLocale
= DECISION_NO
;
1377 maBcp47
= LanguageTag::convertToBcp47( maLocale
);
1379 mbInitializedBcp47
= true;
1383 bool LanguageTagImpl::convertLocaleToLang( bool bAllowOnTheFlyID
)
1385 bool bRemapped
= false;
1388 mnLangID
= MsLangId::getRealLanguage( LANGUAGE_SYSTEM
);
1392 mnLangID
= MsLangId::Conversion::convertLocaleToLanguage( maLocale
);
1393 if (mnLangID
== LANGUAGE_DONTKNOW
)
1395 // convertLocaleToLanguage() only searches in ISO and private
1396 // definitions, search in remaining definitions, i.e. for the "C"
1397 // locale and non-standard things like "sr-latin" or "german" to
1398 // resolve to a known locale, skipping ISO lll-CC that were already
1400 mnLangID
= MsLangId::Conversion::convertIsoNamesToLanguage( maLocale
.Language
, maLocale
.Country
, true);
1401 if (mnLangID
!= LANGUAGE_DONTKNOW
)
1403 // If one found, convert back and adapt Locale and Bcp47
1404 // strings so we have a matching entry.
1405 OUString
aOrgBcp47( maBcp47
);
1406 convertLangToLocale();
1407 convertLocaleToBcp47();
1408 bRemapped
= (maBcp47
!= aOrgBcp47
);
1411 if (mnLangID
== LANGUAGE_DONTKNOW
&& bAllowOnTheFlyID
)
1415 // For language-only (including script) look if we know some
1416 // locale of that language and if so try to use the primary
1417 // language ID of that instead of generating an on-the-fly ID.
1418 if (getCountry().isEmpty() && isIsoODF())
1420 lang::Locale
aLoc( MsLangId::Conversion::lookupFallbackLocale( maLocale
));
1421 // 'en-US' is last resort, do not use except when looking
1423 if (aLoc
.Language
!= "en" || getLanguage() == "en")
1425 mnLangID
= MsLangId::Conversion::convertLocaleToLanguage( aLoc
);
1426 if (mnLangID
!= LANGUAGE_DONTKNOW
)
1427 mnLangID
= MsLangId::getPrimaryLanguage( mnLangID
);
1430 registerOnTheFly( mnLangID
);
1434 SAL_WARN( "i18nlangtag", "LanguageTagImpl::convertLocaleToLang: with bAllowOnTheFlyID invalid '"
1439 mbInitializedLangID
= true;
1444 void LanguageTag::convertLocaleToLang()
1446 getImpl()->convertLocaleToLang( true);
1451 void LanguageTagImpl::convertBcp47ToLocale()
1453 bool bIso
= isIsoLocale();
1456 maLocale
.Language
= getLanguageFromLangtag();
1457 maLocale
.Country
= getRegionFromLangtag();
1458 maLocale
.Variant
.clear();
1462 maLocale
.Language
= I18NLANGTAG_QLT
;
1463 maLocale
.Country
= getCountry();
1464 maLocale
.Variant
= maBcp47
;
1466 mbInitializedLocale
= true;
1470 void LanguageTag::convertBcp47ToLocale()
1472 getImpl()->convertBcp47ToLocale();
1477 void LanguageTagImpl::convertBcp47ToLang()
1481 mnLangID
= MsLangId::getRealLanguage( LANGUAGE_SYSTEM
);
1485 if (!mbInitializedLocale
)
1486 convertBcp47ToLocale();
1487 convertLocaleToLang( true);
1489 mbInitializedLangID
= true;
1493 void LanguageTag::convertBcp47ToLang()
1495 getImpl()->convertBcp47ToLang();
1500 void LanguageTagImpl::convertLangToLocale()
1502 if (mbSystemLocale
&& !mbInitializedLangID
)
1504 mnLangID
= MsLangId::getRealLanguage( LANGUAGE_SYSTEM
);
1505 mbInitializedLangID
= true;
1507 // Resolve system here! The original is remembered as mbSystemLocale.
1508 maLocale
= MsLangId::Conversion::convertLanguageToLocale( mnLangID
, false);
1509 mbInitializedLocale
= true;
1513 void LanguageTag::convertLangToLocale()
1515 getImpl()->convertLangToLocale();
1520 void LanguageTagImpl::convertLangToBcp47()
1522 if (!mbInitializedLocale
)
1523 convertLangToLocale();
1524 convertLocaleToBcp47();
1525 mbInitializedBcp47
= true;
1529 void LanguageTag::convertFromRtlLocale()
1531 // The rtl_Locale follows the Open Group Base Specification,
1532 // 8.2 Internationalization Variables
1533 // language[_territory][.codeset][@modifier]
1534 // On GNU/Linux systems usually being glibc locales.
1535 // sal/osl/unx/nlsupport.c _parse_locale() parses them into
1536 // Language: language 2 or 3 alpha code
1537 // Country: [territory] 2 alpha code
1538 // Variant: [.codeset][@modifier]
1539 // Variant effectively contains anything that follows the territory, not
1540 // looking for '.' dot delimiter or '@' modifier content.
1541 if (maLocale
.Variant
.isEmpty())
1544 OString aStr
= OUStringToOString(maLocale
.Language
, RTL_TEXTENCODING_UTF8
) + "_" + OUStringToOString(Concat2View(maLocale
.Country
+ maLocale
.Variant
),
1545 RTL_TEXTENCODING_UTF8
);
1546 /* FIXME: let liblangtag parse this entirely with
1547 * lt_tag_convert_from_locale() but that needs a patch to pass the
1551 theDataRef::get().init();
1552 mpImplLangtag
= lt_tag_convert_from_locale( aStr
.getStr(), &aError
.p
);
1553 maBcp47
= OStringToOUString( lt_tag_get_string( mpImplLangtag
), RTL_TEXTENCODING_UTF8
);
1554 mbInitializedBcp47
= true;
1556 mnLangID
= MsLangId::convertUnxByteStringToLanguage( aStr
);
1557 if (mnLangID
== LANGUAGE_DONTKNOW
)
1559 SAL_WARN( "i18nlangtag", "LanguageTag(rtl_Locale) - unknown: " << aStr
);
1560 mnLangID
= LANGUAGE_ENGLISH_US
; // we need _something_ here
1562 mbInitializedLangID
= true;
1564 maLocale
= lang::Locale();
1565 mbInitializedLocale
= false;
1569 const OUString
& LanguageTagImpl::getBcp47() const
1571 if (!mbInitializedBcp47
)
1573 if (mbInitializedLocale
)
1574 const_cast<LanguageTagImpl
*>(this)->convertLocaleToBcp47();
1576 const_cast<LanguageTagImpl
*>(this)->convertLangToBcp47();
1582 const OUString
& LanguageTag::getBcp47( bool bResolveSystem
) const
1584 static const OUString theEmptyBcp47
= u
"";
1586 if (!bResolveSystem
&& mbSystemLocale
)
1587 return theEmptyBcp47
;
1588 if (!mbInitializedBcp47
)
1590 if (!mbInitializedBcp47
)
1592 getImpl()->getBcp47();
1593 const_cast<LanguageTag
*>(this)->syncFromImpl();
1599 OUString
LanguageTagImpl::getLanguageFromLangtag()
1603 if (maBcp47
.isEmpty())
1607 const lt_lang_t
* pLangT
= lt_tag_get_language( mpImplLangtag
);
1608 SAL_WARN_IF( !pLangT
, "i18nlangtag",
1609 "LanguageTag::getLanguageFromLangtag: pLangT==NULL for '" << maBcp47
<< "'");
1612 const char* pLang
= lt_lang_get_tag( pLangT
);
1613 SAL_WARN_IF( !pLang
, "i18nlangtag",
1614 "LanguageTag::getLanguageFromLangtag: pLang==NULL for '" << maBcp47
<< "'");
1616 aLanguage
= OUString::createFromAscii( pLang
);
1620 if (mbCachedLanguage
|| cacheSimpleLSCV())
1621 aLanguage
= maCachedLanguage
;
1627 OUString
LanguageTagImpl::getScriptFromLangtag()
1631 if (maBcp47
.isEmpty())
1635 const lt_script_t
* pScriptT
= lt_tag_get_script( mpImplLangtag
);
1636 // pScriptT==NULL is valid for default scripts
1639 const char* pScript
= lt_script_get_tag( pScriptT
);
1640 SAL_WARN_IF( !pScript
, "i18nlangtag", "LanguageTag::getScriptFromLangtag: pScript==NULL");
1642 aScript
= OUString::createFromAscii( pScript
);
1646 if (mbCachedScript
|| cacheSimpleLSCV())
1647 aScript
= maCachedScript
;
1653 OUString
LanguageTagImpl::getRegionFromLangtag()
1657 if (maBcp47
.isEmpty())
1661 const lt_region_t
* pRegionT
= lt_tag_get_region( mpImplLangtag
);
1662 // pRegionT==NULL is valid for language only tags, rough check here
1663 // that does not take sophisticated tags into account that actually
1664 // should have a region, check for ll, lll, ll-Ssss and lll-Ssss so
1665 // that ll-CC and lll-CC actually fail.
1666 SAL_WARN_IF( !pRegionT
&&
1667 maBcp47
.getLength() != 2 && maBcp47
.getLength() != 3 &&
1668 maBcp47
.getLength() != 7 && maBcp47
.getLength() != 8,
1669 "i18nlangtag", "LanguageTag::getRegionFromLangtag: pRegionT==NULL for '" << maBcp47
<< "'");
1672 const char* pRegion
= lt_region_get_tag( pRegionT
);
1673 SAL_WARN_IF( !pRegion
, "i18nlangtag",
1674 "LanguageTag::getRegionFromLangtag: pRegion==NULL for'" << maBcp47
<< "'");
1676 aRegion
= OUString::createFromAscii( pRegion
);
1680 if (mbCachedCountry
|| cacheSimpleLSCV())
1681 aRegion
= maCachedCountry
;
1687 OUString
LanguageTagImpl::getVariantsFromLangtag()
1689 OUStringBuffer aVariants
;
1691 if (maBcp47
.isEmpty())
1695 const lt_list_t
* pVariantsT
= lt_tag_get_variants( mpImplLangtag
);
1696 for (const lt_list_t
* pE
= pVariantsT
; pE
; pE
= lt_list_next( pE
))
1698 const lt_variant_t
* pVariantT
= static_cast<const lt_variant_t
*>(lt_list_value( pE
));
1701 const char* p
= lt_variant_get_tag( pVariantT
);
1704 if (!aVariants
.isEmpty())
1705 aVariants
.append("-");
1706 aVariants
.appendAscii(p
);
1713 if (mbCachedVariants
|| cacheSimpleLSCV())
1714 aVariants
= maCachedVariants
;
1716 return aVariants
.makeStringAndClear();
1720 const css::lang::Locale
& LanguageTag::getLocale( bool bResolveSystem
) const
1722 // "static" to be returned as const reference to an empty locale.
1723 static lang::Locale theEmptyLocale
;
1725 if (!bResolveSystem
&& mbSystemLocale
)
1726 return theEmptyLocale
;
1727 if (!mbInitializedLocale
)
1729 if (!mbInitializedLocale
)
1731 if (mbInitializedBcp47
)
1732 const_cast<LanguageTag
*>(this)->convertBcp47ToLocale();
1734 const_cast<LanguageTag
*>(this)->convertLangToLocale();
1740 LanguageType
LanguageTag::getLanguageType( bool bResolveSystem
) const
1742 if (!bResolveSystem
&& mbSystemLocale
)
1743 return LANGUAGE_SYSTEM
;
1744 if (!mbInitializedLangID
)
1746 if (!mbInitializedLangID
)
1748 if (mbInitializedBcp47
)
1749 const_cast<LanguageTag
*>(this)->convertBcp47ToLang();
1752 const_cast<LanguageTag
*>(this)->convertLocaleToLang();
1754 /* Resolve a locale only unknown due to some redundant information,
1755 * like 'de-Latn-DE' with script tag. Never call canonicalize()
1756 * from within convert...() methods due to possible recursion, so
1758 if ((!mbSystemLocale
&& mnLangID
== LANGUAGE_SYSTEM
) || mnLangID
== LANGUAGE_DONTKNOW
)
1759 const_cast<LanguageTag
*>(this)->synCanonicalize();
1766 void LanguageTag::getIsoLanguageScriptCountry( OUString
& rLanguage
, OUString
& rScript
, OUString
& rCountry
) const
1768 // Calling isIsoODF() first is a predicate for getLanguage(), getScript()
1769 // and getCountry() to work correctly in this context.
1772 rLanguage
= getLanguage();
1773 rScript
= getScript();
1774 rCountry
= getCountry();
1778 rLanguage
= (LanguageTag::isIsoLanguage( getLanguage()) ? getLanguage() : OUString());
1779 rScript
= (LanguageTag::isIsoScript( getScript()) ? getScript() : OUString());
1780 rCountry
= (LanguageTag::isIsoCountry( getCountry()) ? getCountry() : OUString());
1788 bool isLowerAscii( sal_Unicode c
)
1790 return 'a' <= c
&& c
<= 'z';
1793 bool isUpperAscii( sal_Unicode c
)
1795 return 'A' <= c
&& c
<= 'Z';
1802 bool LanguageTag::isIsoLanguage( const OUString
& rLanguage
)
1804 /* TODO: ignore case? For now let's see where rubbish is used. */
1805 bool b2chars
= rLanguage
.getLength() == 2;
1806 if ((b2chars
|| rLanguage
.getLength() == 3) &&
1807 isLowerAscii( rLanguage
[0]) && isLowerAscii( rLanguage
[1]) &&
1808 (b2chars
|| isLowerAscii( rLanguage
[2])))
1810 SAL_WARN_IF( ((rLanguage
.getLength() == 2 || rLanguage
.getLength() == 3) &&
1811 (isUpperAscii( rLanguage
[0]) || isUpperAscii( rLanguage
[1]))) ||
1812 (rLanguage
.getLength() == 3 && isUpperAscii( rLanguage
[2])), "i18nlangtag",
1813 "LanguageTag::isIsoLanguage: rejecting upper case " << rLanguage
);
1819 bool LanguageTag::isIsoCountry( const OUString
& rRegion
)
1821 /* TODO: ignore case? For now let's see where rubbish is used. */
1822 if (rRegion
.isEmpty() ||
1823 (rRegion
.getLength() == 2 && isUpperAscii( rRegion
[0]) && isUpperAscii( rRegion
[1])))
1825 SAL_WARN_IF( rRegion
.getLength() == 2 && (isLowerAscii( rRegion
[0]) || isLowerAscii( rRegion
[1])),
1826 "i18nlangtag", "LanguageTag::isIsoCountry: rejecting lower case " << rRegion
);
1832 bool LanguageTag::isIsoScript( const OUString
& rScript
)
1834 /* TODO: ignore case? For now let's see where rubbish is used. */
1835 if (rScript
.isEmpty() ||
1836 (rScript
.getLength() == 4 &&
1837 isUpperAscii( rScript
[0]) && isLowerAscii( rScript
[1]) &&
1838 isLowerAscii( rScript
[2]) && isLowerAscii( rScript
[3])))
1840 SAL_WARN_IF( rScript
.getLength() == 4 &&
1841 (isLowerAscii( rScript
[0]) || isUpperAscii( rScript
[1]) ||
1842 isUpperAscii( rScript
[2]) || isUpperAscii( rScript
[3])),
1843 "i18nlangtag", "LanguageTag::isIsoScript: rejecting case mismatch " << rScript
);
1848 OUString
const & LanguageTagImpl::getLanguage() const
1850 if (!mbCachedLanguage
)
1852 maCachedLanguage
= const_cast<LanguageTagImpl
*>(this)->getLanguageFromLangtag();
1853 mbCachedLanguage
= true;
1855 return maCachedLanguage
;
1859 OUString
LanguageTag::getLanguage() const
1861 LanguageTagImpl
const* pImpl
= getImpl();
1862 if (pImpl
->mbCachedLanguage
)
1863 return pImpl
->maCachedLanguage
;
1864 OUString
aRet( pImpl
->getLanguage());
1865 const_cast<LanguageTag
*>(this)->syncFromImpl();
1870 OUString
const & LanguageTagImpl::getScript() const
1872 if (!mbCachedScript
)
1874 maCachedScript
= const_cast<LanguageTagImpl
*>(this)->getScriptFromLangtag();
1875 mbCachedScript
= true;
1877 return maCachedScript
;
1881 OUString
LanguageTag::getScript() const
1883 LanguageTagImpl
const* pImpl
= getImpl();
1884 if (pImpl
->mbCachedScript
)
1885 return pImpl
->maCachedScript
;
1886 OUString
aRet( pImpl
->getScript());
1887 const_cast<LanguageTag
*>(this)->syncFromImpl();
1892 OUString
LanguageTag::getLanguageAndScript() const
1894 OUString
aLanguageScript( getLanguage());
1895 OUString
aScript( getScript());
1896 if (!aScript
.isEmpty())
1898 aLanguageScript
+= "-" + aScript
;
1900 return aLanguageScript
;
1904 OUString
const & LanguageTagImpl::getCountry() const
1906 if (!mbCachedCountry
)
1908 maCachedCountry
= const_cast<LanguageTagImpl
*>(this)->getRegionFromLangtag();
1909 if (!LanguageTag::isIsoCountry( maCachedCountry
))
1910 maCachedCountry
.clear();
1911 mbCachedCountry
= true;
1913 return maCachedCountry
;
1917 OUString
LanguageTag::getCountry() const
1919 LanguageTagImpl
const* pImpl
= getImpl();
1920 if (pImpl
->mbCachedCountry
)
1921 return pImpl
->maCachedCountry
;
1922 OUString
aRet( pImpl
->getCountry());
1923 const_cast<LanguageTag
*>(this)->syncFromImpl();
1928 OUString
LanguageTagImpl::getRegion() const
1930 return const_cast<LanguageTagImpl
*>(this)->getRegionFromLangtag();
1934 OUString
const & LanguageTagImpl::getVariants() const
1936 if (!mbCachedVariants
)
1938 maCachedVariants
= const_cast<LanguageTagImpl
*>(this)->getVariantsFromLangtag();
1939 mbCachedVariants
= true;
1941 return maCachedVariants
;
1945 OUString
LanguageTag::getVariants() const
1947 LanguageTagImpl
const * pImpl
= getImpl();
1948 if (pImpl
->mbCachedVariants
)
1949 return pImpl
->maCachedVariants
;
1950 OUString
aRet( pImpl
->getVariants());
1951 const_cast<LanguageTag
*>(this)->syncFromImpl();
1955 OUString
const & LanguageTagImpl::getGlibcLocaleString() const
1957 if (mbCachedGlibcString
)
1958 return maCachedGlibcString
;
1962 meIsLiblangtagNeeded
= DECISION_YES
;
1963 const_cast<LanguageTagImpl
*>(this)->synCanonicalize();
1967 char* pLang
= lt_tag_convert_to_locale(mpImplLangtag
, nullptr);
1970 maCachedGlibcString
= OUString::createFromAscii( pLang
);
1971 mbCachedGlibcString
= true;
1975 return maCachedGlibcString
;
1978 OUString
LanguageTag::getGlibcLocaleString( std::u16string_view rEncoding
) const
1983 OUString
aCountry( getCountry());
1984 if (aCountry
.isEmpty())
1985 aRet
= getLanguage() + rEncoding
;
1987 aRet
= getLanguage() + "_" + aCountry
+ rEncoding
;
1991 aRet
= getImpl()->getGlibcLocaleString();
1992 sal_Int32 nAt
= aRet
.indexOf('@');
1994 aRet
= OUString::Concat(aRet
.subView(0, nAt
)) + rEncoding
+ aRet
.subView(nAt
);
2001 bool LanguageTagImpl::hasScript() const
2003 if (!mbCachedScript
)
2005 return !maCachedScript
.isEmpty();
2009 bool LanguageTag::hasScript() const
2011 bool bRet
= getImpl()->hasScript();
2012 const_cast<LanguageTag
*>(this)->syncFromImpl();
2017 LanguageTag::ScriptType
LanguageTagImpl::getScriptType() const
2019 return meScriptType
;
2023 LanguageTag::ScriptType
LanguageTag::getScriptType() const
2025 return getImpl()->getScriptType();
2029 void LanguageTagImpl::setScriptType(LanguageTag::ScriptType st
)
2031 if (meScriptType
== LanguageTag::ScriptType::UNKNOWN
) // poor man's clash resolution
2036 void LanguageTag::setScriptType(LanguageTag::ScriptType st
)
2038 getImpl()->setScriptType(st
);
2042 bool LanguageTagImpl::cacheSimpleLSCV()
2044 OUString aLanguage
, aScript
, aCountry
, aRegion
, aVariants
;
2045 Extraction eExt
= simpleExtract( maBcp47
, aLanguage
, aScript
, aCountry
, aRegion
, aVariants
);
2046 bool bRet
= (eExt
== EXTRACTED_LSC
|| eExt
== EXTRACTED_LV
|| eExt
== EXTRACTED_LR
);
2049 maCachedLanguage
= aLanguage
;
2050 maCachedScript
= aScript
;
2051 maCachedCountry
= aCountry
;
2052 maCachedVariants
= aVariants
;
2053 mbCachedLanguage
= mbCachedScript
= mbCachedCountry
= mbCachedVariants
= true;
2059 bool LanguageTagImpl::isIsoLocale() const
2061 if (meIsIsoLocale
== DECISION_DONTKNOW
)
2063 const_cast<LanguageTagImpl
*>(this)->synCanonicalize();
2064 // It must be at most ll-CC or lll-CC
2065 // Do not use getCountry() here, use getRegion() instead.
2066 meIsIsoLocale
= ((maBcp47
.isEmpty() ||
2067 (maBcp47
.getLength() <= 6 && LanguageTag::isIsoLanguage( getLanguage()) &&
2068 LanguageTag::isIsoCountry( getRegion()))) ? DECISION_YES
: DECISION_NO
);
2070 return meIsIsoLocale
== DECISION_YES
;
2074 bool LanguageTag::isIsoLocale() const
2076 bool bRet
= getImpl()->isIsoLocale();
2077 const_cast<LanguageTag
*>(this)->syncFromImpl();
2082 bool LanguageTagImpl::isIsoODF() const
2084 if (meIsIsoODF
== DECISION_DONTKNOW
)
2086 const_cast<LanguageTagImpl
*>(this)->synCanonicalize();
2087 if (!LanguageTag::isIsoScript( getScript()))
2089 meIsIsoODF
= DECISION_NO
;
2092 // The usual case is lll-CC so simply check that first.
2095 meIsIsoODF
= DECISION_YES
;
2098 // If this is not ISO locale for which script must not exist it can
2099 // still be ISO locale plus ISO script lll-Ssss-CC, but not ll-vvvv ...
2101 meIsIsoODF
= ((maBcp47
.getLength() <= 11 && LanguageTag::isIsoLanguage( getLanguage()) &&
2102 LanguageTag::isIsoCountry( getRegion()) && LanguageTag::isIsoScript( getScript()) &&
2103 getVariants().isEmpty()) ? DECISION_YES
: DECISION_NO
);
2105 return meIsIsoODF
== DECISION_YES
;
2109 bool LanguageTag::isIsoODF() const
2111 bool bRet
= getImpl()->isIsoODF();
2112 const_cast<LanguageTag
*>(this)->syncFromImpl();
2117 bool LanguageTagImpl::isValidBcp47() const
2119 if (meIsValid
== DECISION_DONTKNOW
)
2121 const_cast<LanguageTagImpl
*>(this)->synCanonicalize();
2122 SAL_WARN_IF( meIsValid
== DECISION_DONTKNOW
, "i18nlangtag",
2123 "LanguageTag::isValidBcp47: canonicalize() didn't set meIsValid");
2125 return meIsValid
== DECISION_YES
;
2129 bool LanguageTag::isValidBcp47() const
2131 bool bRet
= getImpl()->isValidBcp47();
2132 const_cast<LanguageTag
*>(this)->syncFromImpl();
2137 LanguageTag
& LanguageTag::makeFallback()
2141 const lang::Locale
& rLocale1
= getLocale();
2142 lang::Locale
aLocale2( MsLangId::Conversion::lookupFallbackLocale( rLocale1
));
2143 if ( rLocale1
.Language
!= aLocale2
.Language
||
2144 rLocale1
.Country
!= aLocale2
.Country
||
2145 rLocale1
.Variant
!= aLocale2
.Variant
)
2147 if (rLocale1
.Language
!= "en" && aLocale2
.Language
== "en" && aLocale2
.Country
== "US")
2149 // "en-US" is the last resort fallback, try if we get a better
2150 // one for the fallback hierarchy of a non-"en" locale.
2151 ::std::vector
< OUString
> aFallbacks( getFallbackStrings( false));
2152 for (auto const& fallback
: aFallbacks
)
2154 lang::Locale
aLocale3( LanguageTag(fallback
).getLocale());
2155 aLocale2
= MsLangId::Conversion::lookupFallbackLocale( aLocale3
);
2156 if (aLocale2
.Language
!= "en" || aLocale2
.Country
!= "US")
2157 break; // for, success
2160 SAL_INFO( "i18nlangtag", "LanguageTag::makeFallback - for (" <<
2161 rLocale1
.Language
<< "," << rLocale1
.Country
<< "," << rLocale1
.Variant
<< ") to (" <<
2162 aLocale2
.Language
<< "," << aLocale2
.Country
<< "," << aLocale2
.Variant
<< ")");
2165 mbIsFallback
= true;
2171 /* TODO: maybe this now could take advantage of the mnOverride field in
2172 * isolang.cxx entries and search for kSAME instead of hardcoded special
2173 * fallbacks. Though iterating through those tables would be slower and even
2174 * then there would be some special cases, but we wouldn't lack entries that
2175 * were missed out. */
2176 ::std::vector
< OUString
> LanguageTag::getFallbackStrings( bool bIncludeFullBcp47
) const
2178 ::std::vector
< OUString
> aVec
;
2179 OUString
aLanguage( getLanguage());
2180 OUString
aCountry( getCountry());
2183 if (!aCountry
.isEmpty())
2185 if (bIncludeFullBcp47
)
2186 aVec
.emplace_back(aLanguage
+ "-" + aCountry
);
2187 if (aLanguage
== "zh")
2189 // For zh-HK or zh-MO also list zh-TW to get zh-Hant, for all
2190 // other zh-XX also list zh-CN to get zh-Hans; both of which we
2191 // use the legacy forms instead of the more correct script
2192 // tags that unfortunately most pieces don't understand.
2193 if (aCountry
== "HK" || aCountry
== "MO")
2194 aVec
.emplace_back(aLanguage
+ "-TW");
2195 else if (aCountry
!= "CN")
2196 aVec
.emplace_back(aLanguage
+ "-CN");
2197 aVec
.push_back( aLanguage
);
2199 else if (aLanguage
== "sh")
2201 // Manual list instead of calling
2202 // LanguageTag( "sr-Latn-" + aCountry).getFallbackStrings( true)
2203 // that would also include "sh-*" again.
2204 aVec
.emplace_back("sr-Latn-" + aCountry
);
2205 aVec
.emplace_back("sr-Latn");
2206 aVec
.emplace_back("sh"); // legacy with script, before default script with country
2207 aVec
.emplace_back("sr-" + aCountry
);
2208 aVec
.emplace_back("sr");
2210 else if (aLanguage
== "ca" && aCountry
== "XV")
2212 ::std::vector
< OUString
> aRep( LanguageTag( "ca-ES-valencia").getFallbackStrings( true));
2213 aVec
.insert( aVec
.end(), aRep
.begin(), aRep
.end());
2214 // Already includes 'ca' language fallback.
2216 else if (aLanguage
== "ku")
2218 if (aCountry
== "TR" || aCountry
== "SY")
2220 aVec
.emplace_back("kmr-Latn-" + aCountry
);
2221 aVec
.emplace_back("kmr-" + aCountry
);
2222 aVec
.emplace_back("kmr-Latn");
2223 aVec
.emplace_back("kmr");
2224 aVec
.push_back( aLanguage
);
2226 else if (aCountry
== "IQ" || aCountry
== "IR")
2228 aVec
.emplace_back("ckb-" + aCountry
);
2229 aVec
.emplace_back("ckb");
2232 else if (aLanguage
== "kmr" && (aCountry
== "TR" || aCountry
== "SY"))
2234 aVec
.emplace_back("ku-Latn-" + aCountry
);
2235 aVec
.emplace_back("ku-" + aCountry
);
2236 aVec
.push_back( aLanguage
);
2237 aVec
.emplace_back("ku");
2239 else if (aLanguage
== "ckb" && (aCountry
== "IQ" || aCountry
== "IR"))
2241 aVec
.emplace_back("ku-Arab-" + aCountry
);
2242 aVec
.emplace_back("ku-" + aCountry
);
2243 aVec
.push_back( aLanguage
);
2244 // not 'ku' only, that was used for Latin script
2247 aVec
.push_back( aLanguage
);
2251 if (bIncludeFullBcp47
)
2252 aVec
.push_back( aLanguage
);
2253 if (aLanguage
== "sh")
2255 aVec
.emplace_back("sr-Latn");
2256 aVec
.emplace_back("sr");
2258 else if (aLanguage
== "pli")
2260 // a special case for Pali dictionary, see fdo#41599
2261 aVec
.emplace_back("pi-Latn");
2262 aVec
.emplace_back("pi");
2268 getBcp47(); // have maBcp47 now
2269 if (bIncludeFullBcp47
)
2270 aVec
.push_back( maBcp47
);
2272 // Special cases for deprecated tags and their replacements, include both
2273 // in fallbacks in a sensible order.
2274 /* TODO: could such things be generalized and automated with liblangtag? */
2275 if (maBcp47
== "en-GB-oed")
2276 aVec
.emplace_back("en-GB-oxendict");
2277 else if (maBcp47
== "en-GB-oxendict")
2278 aVec
.emplace_back("en-GB-oed");
2280 OUString
aVariants( getVariants());
2284 OUString aScript
= getScript();
2285 bool bHaveLanguageScriptVariant
= false;
2286 if (!aCountry
.isEmpty())
2288 if (!aVariants
.isEmpty())
2290 aTmp
= aLanguage
+ "-" + aScript
+ "-" + aCountry
+ "-" + aVariants
;
2291 if (aTmp
!= maBcp47
)
2292 aVec
.push_back( aTmp
);
2293 // Language with variant but without country before language
2294 // without variant but with country.
2295 aTmp
= aLanguage
+ "-" + aScript
+ "-" + aVariants
;
2296 if (aTmp
!= maBcp47
)
2297 aVec
.push_back( aTmp
);
2298 bHaveLanguageScriptVariant
= true;
2300 aTmp
= aLanguage
+ "-" + aScript
+ "-" + aCountry
;
2301 if (aTmp
!= maBcp47
)
2302 aVec
.push_back( aTmp
);
2303 if (aLanguage
== "sr" && aScript
== "Latn")
2305 // sr-Latn-CS => sr-Latn-YU, sh-CS, sh-YU
2306 if (aCountry
== "CS")
2308 aVec
.emplace_back("sr-Latn-YU");
2309 aVec
.emplace_back("sh-CS");
2310 aVec
.emplace_back("sh-YU");
2313 aVec
.emplace_back("sh-" + aCountry
);
2315 else if (aLanguage
== "pi" && aScript
== "Latn")
2316 aVec
.emplace_back("pli"); // a special case for Pali dictionary, see fdo#41599
2317 else if (aLanguage
== "krm" && aScript
== "Latn" && (aCountry
== "TR" || aCountry
== "SY"))
2318 aVec
.emplace_back("ku-" + aCountry
);
2320 if (!aVariants
.isEmpty() && !bHaveLanguageScriptVariant
)
2322 aTmp
= aLanguage
+ "-" + aScript
+ "-" + aVariants
;
2323 if (aTmp
!= maBcp47
)
2324 aVec
.push_back( aTmp
);
2326 aTmp
= aLanguage
+ "-" + aScript
;
2327 if (aTmp
!= maBcp47
)
2328 aVec
.push_back( aTmp
);
2330 // 'sh' actually denoted a script, so have it here instead of appended
2331 // at the end as language-only.
2332 if (aLanguage
== "sr" && aScript
== "Latn")
2333 aVec
.emplace_back("sh");
2334 else if (aLanguage
== "ku" && aScript
== "Arab")
2335 aVec
.emplace_back("ckb");
2336 // 'ku' only denoted Latin script
2337 else if (aLanguage
== "krm" && aScript
== "Latn" && aCountry
.isEmpty())
2338 aVec
.emplace_back("ku");
2340 bool bHaveLanguageVariant
= false;
2341 if (!aCountry
.isEmpty())
2343 if (!aVariants
.isEmpty())
2345 aTmp
= aLanguage
+ "-" + aCountry
+ "-" + aVariants
;
2346 if (aTmp
!= maBcp47
)
2347 aVec
.push_back( aTmp
);
2348 if (maBcp47
== "ca-ES-valencia")
2349 aVec
.emplace_back("ca-XV");
2350 // Language with variant but without country before language
2351 // without variant but with country.
2352 // But only if variant is not from a grandfathered tag that
2353 // wouldn't match the rules, i.e. "de-1901" is fine but "en-oed" is
2355 if (aVariants
.getLength() >= 5 ||
2356 (aVariants
.getLength() == 4 && '0' <= aVariants
[0] && aVariants
[0] <= '9'))
2358 aTmp
= aLanguage
+ "-" + aVariants
;
2359 if (aTmp
!= maBcp47
)
2360 aVec
.push_back( aTmp
);
2361 bHaveLanguageVariant
= true;
2364 aTmp
= aLanguage
+ "-" + aCountry
;
2365 if (aTmp
!= maBcp47
)
2366 aVec
.push_back( aTmp
);
2368 if (!aVariants
.isEmpty() && !bHaveLanguageVariant
)
2370 // Only if variant is not from a grandfathered tag that wouldn't match
2371 // the rules, i.e. "de-1901" is fine but "en-oed" is not.
2372 if (aVariants
.getLength() >= 5 ||
2373 (aVariants
.getLength() == 4 && '0' <= aVariants
[0] && aVariants
[0] <= '9'))
2375 aTmp
= aLanguage
+ "-" + aVariants
;
2376 if (aTmp
!= maBcp47
)
2377 aVec
.push_back( aTmp
);
2381 // Insert legacy fallbacks with country before language-only, but only
2382 // default script, script was handled already above.
2383 if (!aCountry
.isEmpty())
2385 if (aLanguage
== "sr" && aCountry
== "CS")
2386 aVec
.emplace_back("sr-YU");
2389 // Original language-only.
2390 if (!aLanguage
.isEmpty() && aLanguage
!= maBcp47
)
2391 aVec
.push_back( aLanguage
);
2397 OUString
LanguageTag::getBcp47MS() const
2399 if (getLanguageType() == LANGUAGE_SPANISH_DATED
)
2400 return "es-ES_tradnl";
2405 bool LanguageTag::equals( const LanguageTag
& rLanguageTag
) const
2407 // If SYSTEM is not to be resolved or either both are SYSTEM or none, we
2408 // can use the operator==() optimization.
2409 if (isSystemLocale() == rLanguageTag
.isSystemLocale())
2410 return operator==( rLanguageTag
);
2412 // Compare full language tag strings.
2413 return getBcp47() == rLanguageTag
.getBcp47();
2417 bool LanguageTag::operator==( const LanguageTag
& rLanguageTag
) const
2419 if (isSystemLocale() && rLanguageTag
.isSystemLocale())
2420 return true; // both SYSTEM
2422 // No need to convert to BCP47 if both Lang-IDs are available.
2423 if (mbInitializedLangID
&& rLanguageTag
.mbInitializedLangID
)
2425 // Equal if same ID and no SYSTEM is involved or both are SYSTEM.
2426 return mnLangID
== rLanguageTag
.mnLangID
&& isSystemLocale() == rLanguageTag
.isSystemLocale();
2429 // Compare full language tag strings but SYSTEM unresolved.
2430 return getBcp47( false) == rLanguageTag
.getBcp47( false);
2434 bool LanguageTag::operator!=( const LanguageTag
& rLanguageTag
) const
2436 return !operator==( rLanguageTag
);
2440 bool LanguageTag::operator<( const LanguageTag
& rLanguageTag
) const
2442 return getBcp47( false).compareToIgnoreAsciiCase( rLanguageTag
.getBcp47( false)) < 0;
2447 LanguageTagImpl::Extraction
LanguageTagImpl::simpleExtract( const OUString
& rBcp47
,
2448 OUString
& rLanguage
, OUString
& rScript
, OUString
& rCountry
, OUString
& rRegion
, OUString
& rVariants
)
2450 Extraction eRet
= EXTRACTED_NONE
;
2451 const sal_Int32 nLen
= rBcp47
.getLength();
2452 const sal_Int32 nHyph1
= rBcp47
.indexOf( '-');
2453 sal_Int32 nHyph2
= (nHyph1
< 0 ? -1 : rBcp47
.indexOf( '-', nHyph1
+ 1));
2454 sal_Int32 nHyph3
= (nHyph2
< 0 ? -1 : rBcp47
.indexOf( '-', nHyph2
+ 1));
2455 sal_Int32 nHyph4
= (nHyph3
< 0 ? -1 : rBcp47
.indexOf( '-', nHyph3
+ 1));
2456 if (nLen
== 1 && rBcp47
[0] == '*') // * the dreaded jolly joker
2458 // It's f*d up but we need to recognize this.
2459 eRet
= EXTRACTED_X_JOKER
;
2461 else if (nHyph1
== 1 && rBcp47
[0] == 'x') // x-... privateuse
2463 // x-... privateuse tags MUST be known to us by definition.
2466 else if (nLen
== 1 && rBcp47
[0] == 'C') // the 'C' locale
2468 eRet
= EXTRACTED_C_LOCALE
;
2475 else if (nLen
== 2 || nLen
== 3) // ll or lll
2479 rLanguage
= rBcp47
.toAsciiLowerCase();
2484 eRet
= EXTRACTED_LSC
;
2487 else if ( (nHyph1
== 2 && nLen
== 5) // ll-CC
2488 || (nHyph1
== 3 && nLen
== 6)) // lll-CC
2492 rLanguage
= rBcp47
.copy( 0, nHyph1
).toAsciiLowerCase();
2493 rCountry
= rBcp47
.copy( nHyph1
+ 1, 2).toAsciiUpperCase();
2497 eRet
= EXTRACTED_LSC
;
2500 else if ( (nHyph1
== 2 && nLen
== 6) // ll-rrr
2501 || (nHyph1
== 3 && nLen
== 7)) // lll-rrr
2505 rLanguage
= rBcp47
.copy( 0, nHyph1
).toAsciiLowerCase();
2507 rRegion
= rBcp47
.copy( nHyph1
+ 1, 3);
2510 eRet
= EXTRACTED_LR
;
2513 else if ( (nHyph1
== 2 && nLen
== 7) // ll-Ssss or ll-vvvv
2514 || (nHyph1
== 3 && nLen
== 8)) // lll-Ssss or lll-vvvv
2518 sal_Unicode c
= rBcp47
[nHyph1
+1];
2519 if ('0' <= c
&& c
<= '9')
2521 // (DIGIT 3ALNUM) vvvv variant instead of Ssss script
2522 rLanguage
= rBcp47
.copy( 0, nHyph1
).toAsciiLowerCase();
2526 rVariants
= rBcp47
.copy( nHyph1
+ 1);
2527 eRet
= EXTRACTED_LV
;
2531 rLanguage
= rBcp47
.copy( 0, nHyph1
).toAsciiLowerCase();
2532 rScript
= rBcp47
.copy( nHyph1
+ 1, 1).toAsciiUpperCase() +
2533 rBcp47
.copy( nHyph1
+ 2, 3).toAsciiLowerCase();
2537 eRet
= EXTRACTED_LSC
;
2541 else if ( (nHyph1
== 2 && nHyph2
== 7 && nLen
== 10) // ll-Ssss-CC
2542 || (nHyph1
== 3 && nHyph2
== 8 && nLen
== 11)) // lll-Ssss-CC
2546 rLanguage
= rBcp47
.copy( 0, nHyph1
).toAsciiLowerCase();
2547 rScript
= rBcp47
.copy( nHyph1
+ 1, 1).toAsciiUpperCase() + rBcp47
.copy( nHyph1
+ 2, 3).toAsciiLowerCase();
2548 rCountry
= rBcp47
.copy( nHyph2
+ 1, 2).toAsciiUpperCase();
2551 eRet
= EXTRACTED_LSC
;
2554 else if ( (nHyph1
== 2 && nHyph2
== 7 && nLen
== 11) // ll-Ssss-rrr
2555 || (nHyph1
== 3 && nHyph2
== 8 && nLen
== 12)) // lll-Ssss-rrr
2559 rLanguage
= rBcp47
.copy( 0, nHyph1
).toAsciiLowerCase();
2560 rScript
= rBcp47
.copy( nHyph1
+ 1, 1).toAsciiUpperCase() + rBcp47
.copy( nHyph1
+ 2, 3).toAsciiLowerCase();
2562 rRegion
= rBcp47
.copy( nHyph2
+ 1, 3);
2564 eRet
= EXTRACTED_LR
;
2567 else if ( (nHyph1
== 2 && nHyph2
== 7 && nHyph3
== 10 && nLen
>= 15) // ll-Ssss-CC-vvvv[vvvv][-...]
2568 || (nHyph1
== 3 && nHyph2
== 8 && nHyph3
== 11 && nLen
>= 16)) // lll-Ssss-CC-vvvv[vvvv][-...]
2571 nHyph4
= rBcp47
.getLength();
2572 if (nHyph4
- nHyph3
> 4 && nHyph4
- nHyph3
<= 9)
2574 rLanguage
= rBcp47
.copy( 0, nHyph1
).toAsciiLowerCase();
2575 rScript
= rBcp47
.copy( nHyph1
+ 1, 1).toAsciiUpperCase() + rBcp47
.copy( nHyph1
+ 2, 3).toAsciiLowerCase();
2576 rCountry
= rBcp47
.copy( nHyph2
+ 1, 2).toAsciiUpperCase();
2578 rVariants
= rBcp47
.copy( nHyph3
+ 1);
2579 eRet
= EXTRACTED_LV
;
2582 else if ( (nHyph1
== 2 && nHyph2
== 7 && nHyph3
== 11 && nLen
>= 16) // ll-Ssss-rrr-vvvv[vvvv][-...]
2583 || (nHyph1
== 3 && nHyph2
== 8 && nHyph3
== 12 && nLen
>= 17)) // lll-Ssss-rrr-vvvv[vvvv][-...]
2586 nHyph4
= rBcp47
.getLength();
2587 if (nHyph4
- nHyph3
> 4 && nHyph4
- nHyph3
<= 9)
2589 rLanguage
= rBcp47
.copy( 0, nHyph1
).toAsciiLowerCase();
2590 rScript
= rBcp47
.copy( nHyph1
+ 1, 1).toAsciiUpperCase() + rBcp47
.copy( nHyph1
+ 2, 3).toAsciiLowerCase();
2592 rRegion
= rBcp47
.copy( nHyph2
+ 1, 3);
2593 rVariants
= rBcp47
.copy( nHyph3
+ 1);
2594 eRet
= EXTRACTED_LR
;
2597 else if ( (nHyph1
== 2 && nHyph2
== 5 && nHyph3
== 7) // ll-CC-u-...
2598 || (nHyph1
== 3 && nHyph2
== 6 && nHyph3
== 8)) // lll-CC-u-...
2600 if (rBcp47
[nHyph3
-1] == 'u')
2602 // Need to recognize as known, otherwise getLanguage() and
2603 // getCountry() return empty string because mpImplLangtag is not
2604 // used with a known mapping.
2605 /* TODO: if there were more this would get ugly and needed some
2606 * table driven approach via isolang.cxx instead. */
2607 if (rBcp47
.equalsIgnoreAsciiCase( "es-ES-u-co-trad"))
2613 rVariants
= "u-co-trad"; // not strictly a variant, but used to reconstruct the tag.
2614 eRet
= EXTRACTED_LV
;
2618 else if ( (nHyph1
== 2 && nHyph2
== 5 && nLen
>= 10) // ll-CC-vvvv[vvvv][-...]
2619 || (nHyph1
== 3 && nHyph2
== 6 && nLen
>= 11)) // lll-CC-vvvv[vvvv][-...]
2622 nHyph3
= rBcp47
.getLength();
2623 if (nHyph3
- nHyph2
> 4 && nHyph3
- nHyph2
<= 9)
2625 rLanguage
= rBcp47
.copy( 0, nHyph1
).toAsciiLowerCase();
2627 rCountry
= rBcp47
.copy( nHyph1
+ 1, 2).toAsciiUpperCase();
2629 rVariants
= rBcp47
.copy( nHyph2
+ 1);
2630 eRet
= EXTRACTED_LV
;
2633 else if ( (nHyph1
== 2 && nHyph2
== 6 && nLen
>= 11) // ll-rrr-vvvv[vvvv][-...]
2634 || (nHyph1
== 3 && nHyph2
== 7 && nLen
>= 12)) // lll-rrr-vvvv[vvvv][-...]
2637 nHyph3
= rBcp47
.getLength();
2638 if (nHyph3
- nHyph2
> 4 && nHyph3
- nHyph2
<= 9)
2640 rLanguage
= rBcp47
.copy( 0, nHyph1
).toAsciiLowerCase();
2643 rRegion
= rBcp47
.copy( nHyph1
+ 1, 3);
2644 rVariants
= rBcp47
.copy( nHyph2
+ 1);
2645 eRet
= EXTRACTED_LR
;
2648 else if ( (nHyph1
== 2 && nLen
>= 8) // ll-vvvvv[vvv][-...]
2649 || (nHyph1
== 3 && nLen
>= 9)) // lll-vvvvv[vvv][-...]
2652 nHyph2
= rBcp47
.getLength();
2653 if (nHyph2
- nHyph1
> 5 && nHyph2
- nHyph1
<= 9)
2655 rLanguage
= rBcp47
.copy( 0, nHyph1
).toAsciiLowerCase();
2659 rVariants
= rBcp47
.copy( nHyph1
+ 1);
2660 eRet
= EXTRACTED_LV
;
2664 // Known and handled grandfathered; ugly but effective ...
2665 // Note that nLen must have matched above.
2666 // Strictly not a variant, but so far we treat it as such.
2667 if (rBcp47
.equalsIgnoreAsciiCase( "en-GB-oed"))
2674 eRet
= EXTRACTED_LV
;
2676 // Other known and handled odd cases.
2677 else if (rBcp47
.equalsIgnoreAsciiCase( "es-ES_tradnl"))
2679 // Will get overridden, but needs to be recognized as known.
2684 rVariants
= "tradnl"; // this is nonsense, but... ignored.
2685 eRet
= EXTRACTED_KNOWN_BAD
;
2689 if (eRet
== EXTRACTED_NONE
)
2691 SAL_INFO( "i18nlangtag", "LanguageTagImpl::simpleExtract: did not extract '" << rBcp47
<< "'");
2700 assert(rLanguage
.getLength() == 2 || rLanguage
.getLength() == 3
2701 || eRet
== EXTRACTED_X_JOKER
|| eRet
== EXTRACTED_X
|| eRet
== EXTRACTED_C_LOCALE
);
2702 assert(rScript
.isEmpty() || rScript
.getLength() == 4);
2703 assert(rCountry
.isEmpty() || rRegion
.isEmpty()); // [2ALPHA / 3DIGIT]
2704 assert(rCountry
.isEmpty() || rCountry
.getLength() == 2);
2705 assert(rRegion
.isEmpty() || rRegion
.getLength() == 3);
2706 assert(rVariants
.isEmpty() || rVariants
.getLength() >= 4 || rVariants
== "oed");
2713 ::std::vector
< OUString
>::const_iterator
LanguageTag::getFallback(
2714 const ::std::vector
< OUString
> & rList
, const OUString
& rReference
)
2719 // Try the simple case first without constructing fallbacks.
2720 ::std::vector
< OUString
>::const_iterator it
= std::find(rList
.begin(), rList
.end(), rReference
);
2721 if (it
!= rList
.end())
2722 return it
; // exact match
2724 ::std::vector
< OUString
> aFallbacks( LanguageTag( rReference
).getFallbackStrings( false));
2725 if (rReference
!= "en-US")
2727 aFallbacks
.emplace_back("en-US");
2728 if (rReference
!= "en")
2729 aFallbacks
.emplace_back("en");
2731 if (rReference
!= "x-default")
2732 aFallbacks
.emplace_back("x-default");
2733 if (rReference
!= "x-no-translate")
2734 aFallbacks
.emplace_back("x-no-translate");
2735 /* TODO: the original comphelper::Locale::getFallback() code had
2736 * "x-notranslate" instead of "x-no-translate", but all .xcu files use
2737 * "x-no-translate" and "x-notranslate" apparently was never used anywhere.
2738 * Did that ever work? Was it supposed to work at all like this? */
2740 for (const auto& fb
: aFallbacks
)
2742 it
= std::find(rList
.begin(), rList
.end(), fb
);
2743 if (it
!= rList
.end())
2744 return it
; // fallback found
2747 // Did not find anything so return something of the list, the first value
2748 // will do as well as any other as none did match any of the possible
2750 return rList
.begin();
2755 ::std::vector
< css::lang::Locale
>::const_iterator
LanguageTag::getMatchingFallback(
2756 const ::std::vector
< css::lang::Locale
> & rList
,
2757 const css::lang::Locale
& rReference
)
2762 // Try the simple case first without constructing fallbacks.
2763 ::std::vector
< lang::Locale
>::const_iterator it
= std::find_if(rList
.begin(), rList
.end(),
2764 [&rReference
](const lang::Locale
& rLocale
) {
2765 return rLocale
.Language
== rReference
.Language
2766 && rLocale
.Country
== rReference
.Country
2767 && rLocale
.Variant
== rReference
.Variant
; });
2768 if (it
!= rList
.end())
2769 return it
; // exact match
2771 // Now for each reference fallback test the fallbacks of the list in order.
2772 ::std::vector
< OUString
> aFallbacks( LanguageTag( rReference
).getFallbackStrings( false));
2773 ::std::vector
< ::std::vector
< OUString
> > aListFallbacks( rList
.size());
2775 for (auto const& elem
: rList
)
2777 ::std::vector
< OUString
> aTmp( LanguageTag(elem
).getFallbackStrings( true));
2778 aListFallbacks
[i
++] = aTmp
;
2780 for (auto const& rfb
: aFallbacks
)
2783 for (auto const& lfb
: aListFallbacks
)
2785 for (auto const& fb
: lfb
)
2788 return rList
.begin() + nPosFb
;
2799 static bool lcl_isSystem( LanguageType nLangID
)
2801 if (nLangID
== LANGUAGE_SYSTEM
)
2803 // There are some special values that simplify to SYSTEM,
2804 // getRealLanguage() catches and resolves them.
2805 LanguageType nNewLangID
= MsLangId::getRealLanguage( nLangID
);
2806 return nNewLangID
!= nLangID
;
2811 css::lang::Locale
LanguageTag::convertToLocale( LanguageType nLangID
, bool bResolveSystem
)
2813 if (!bResolveSystem
&& lcl_isSystem( nLangID
))
2814 return lang::Locale();
2816 return LanguageTag( nLangID
).getLocale( bResolveSystem
);
2821 LanguageType
LanguageTag::convertToLanguageType( const css::lang::Locale
& rLocale
, bool bResolveSystem
)
2823 if (rLocale
.Language
.isEmpty() && !bResolveSystem
)
2824 return LANGUAGE_SYSTEM
;
2826 return LanguageTag( rLocale
).getLanguageType( bResolveSystem
);
2831 OUString
LanguageTagImpl::convertToBcp47( const css::lang::Locale
& rLocale
)
2834 if (rLocale
.Language
.isEmpty())
2836 // aBcp47 stays empty
2838 else if (rLocale
.Language
== I18NLANGTAG_QLT
)
2840 aBcp47
= rLocale
.Variant
;
2844 /* XXX NOTE: most legacy code never evaluated the Variant field, so for
2845 * now just concatenate language and country. In case we stumbled over
2846 * variant aware code we'd have to take care of that. */
2847 if (rLocale
.Country
.isEmpty())
2848 aBcp47
= rLocale
.Language
;
2851 aBcp47
= rLocale
.Language
+ "-" + rLocale
.Country
;
2859 OUString
LanguageTag::convertToBcp47( const css::lang::Locale
& rLocale
, bool bResolveSystem
)
2862 if (rLocale
.Language
.isEmpty())
2865 aBcp47
= LanguageTag::convertToBcp47( LANGUAGE_SYSTEM
);
2866 // else aBcp47 stays empty
2870 aBcp47
= LanguageTagImpl::convertToBcp47( rLocale
);
2877 OUString
LanguageTag::convertToBcp47( LanguageType nLangID
)
2879 lang::Locale
aLocale( LanguageTag::convertToLocale( nLangID
));
2880 // If system for some reason (should not happen... haha) could not be
2881 // resolved DO NOT CALL LanguageTag::convertToBcp47(Locale) because that
2882 // would recurse into this method here!
2883 if (aLocale
.Language
.isEmpty())
2884 return OUString(); // bad luck, bail out
2885 return LanguageTagImpl::convertToBcp47( aLocale
);
2890 css::lang::Locale
LanguageTag::convertToLocale( const OUString
& rBcp47
, bool bResolveSystem
)
2892 if (rBcp47
.isEmpty() && !bResolveSystem
)
2893 return lang::Locale();
2895 return LanguageTag( rBcp47
).getLocale( bResolveSystem
);
2900 LanguageType
LanguageTag::convertToLanguageType( const OUString
& rBcp47
)
2902 return LanguageTag( rBcp47
).getLanguageType();
2907 LanguageType
LanguageTag::convertToLanguageTypeWithFallback( const OUString
& rBcp47
)
2909 return LanguageTag( rBcp47
).makeFallback().getLanguageType();
2914 css::lang::Locale
LanguageTag::convertToLocaleWithFallback( const OUString
& rBcp47
)
2916 return LanguageTag( rBcp47
).makeFallback().getLocale();
2921 LanguageType
LanguageTag::convertToLanguageTypeWithFallback( const css::lang::Locale
& rLocale
)
2923 if (rLocale
.Language
.isEmpty())
2924 return LANGUAGE_SYSTEM
;
2926 return LanguageTag( rLocale
).makeFallback().getLanguageType();
2931 bool LanguageTag::isValidBcp47( const OUString
& rString
, OUString
* o_pCanonicalized
,
2932 LanguageTag::PrivateUse ePrivateUse
)
2934 bool bValid
= false;
2938 lt_tag_t
* mpLangtag
;
2941 theDataRef().init();
2942 mpLangtag
= lt_tag_new();
2946 lt_tag_unref( mpLangtag
);
2952 if (!lt_tag_parse_disabled
&& lt_tag_parse(aVar
.mpLangtag
, OUStringToOString(rString
, RTL_TEXTENCODING_UTF8
).getStr(), &aError
.p
))
2954 char* pTag
= lt_tag_canonicalize( aVar
.mpLangtag
, &aError
.p
);
2955 SAL_WARN_IF( !pTag
, "i18nlangtag", "LanguageTag:isValidBcp47: could not canonicalize '" << rString
<< "'");
2959 if (ePrivateUse
!= PrivateUse::ALLOW
)
2963 const char* pLang
= nullptr;
2964 const lt_lang_t
* pLangT
= lt_tag_get_language( aVar
.mpLangtag
);
2967 pLang
= lt_lang_get_tag( pLangT
);
2968 if (pLang
&& strcmp( pLang
, I18NLANGTAG_QLT_ASCII
) == 0)
2970 // Disallow 'qlt' localuse code to prevent
2971 // confusion with our internal usage.
2976 if (ePrivateUse
== PrivateUse::ALLOW_ART_X
&& pLang
&& strcmp( pLang
, "art") == 0)
2978 // Allow anything 'art' which includes 'art-x-...' and 'art-Latn-x-...'.
2981 const lt_string_t
* pPrivate
= lt_tag_get_privateuse( aVar
.mpLangtag
);
2982 if (pPrivate
&& lt_string_length( pPrivate
) > 0)
2987 if (o_pCanonicalized
)
2988 *o_pCanonicalized
= OUString::createFromAscii( pTag
);
2994 SAL_INFO( "i18nlangtag", "LanguageTag:isValidBcp47: could not parse '" << rString
<< "'");
2999 LanguageTag
makeLanguageTagFromAppleLanguageId(AppleLanguageId nLanguage
)
3001 //map the simple ones via LanguageTypes, and the hard ones explicitly
3002 LanguageType
nLang(LANGUAGE_DONTKNOW
);
3006 case AppleLanguageId::ENGLISH
:
3007 nLang
= LANGUAGE_ENGLISH_US
;
3009 case AppleLanguageId::FRENCH
:
3010 nLang
= LANGUAGE_FRENCH
;
3012 case AppleLanguageId::GERMAN
:
3013 nLang
= LANGUAGE_GERMAN
;
3015 case AppleLanguageId::ITALIAN
:
3016 nLang
= LANGUAGE_ITALIAN
;
3018 case AppleLanguageId::DUTCH
:
3019 nLang
= LANGUAGE_DUTCH
;
3021 case AppleLanguageId::SWEDISH
:
3022 nLang
= LANGUAGE_SWEDISH
;
3024 case AppleLanguageId::SPANISH
:
3025 nLang
= LANGUAGE_SPANISH
;
3027 case AppleLanguageId::DANISH
:
3028 nLang
= LANGUAGE_DANISH
;
3030 case AppleLanguageId::PORTUGUESE
:
3031 nLang
= LANGUAGE_PORTUGUESE
;
3033 case AppleLanguageId::NORWEGIAN
:
3034 nLang
= LANGUAGE_NORWEGIAN
;
3036 case AppleLanguageId::HEBREW
:
3037 nLang
= LANGUAGE_HEBREW
;
3039 case AppleLanguageId::JAPANESE
:
3040 nLang
= LANGUAGE_JAPANESE
;
3042 case AppleLanguageId::ARABIC
:
3043 nLang
= LANGUAGE_ARABIC_PRIMARY_ONLY
;
3045 case AppleLanguageId::FINNISH
:
3046 nLang
= LANGUAGE_FINNISH
;
3048 case AppleLanguageId::GREEK
:
3049 nLang
= LANGUAGE_GREEK
;
3051 case AppleLanguageId::ICELANDIC
:
3052 nLang
= LANGUAGE_ICELANDIC
;
3054 case AppleLanguageId::MALTESE
:
3055 nLang
= LANGUAGE_MALTESE
;
3057 case AppleLanguageId::TURKISH
:
3058 nLang
= LANGUAGE_TURKISH
;
3060 case AppleLanguageId::CROATIAN
:
3061 nLang
= LANGUAGE_CROATIAN
;
3063 case AppleLanguageId::CHINESE_TRADITIONAL
:
3064 nLang
= LANGUAGE_CHINESE_TRADITIONAL
;
3066 case AppleLanguageId::URDU
:
3067 nLang
= LANGUAGE_URDU_PAKISTAN
; //probably, otherwise we need a LANGUAGE_URDU_PRIMARY_ONLY
3069 case AppleLanguageId::HINDI
:
3070 nLang
= LANGUAGE_HINDI
;
3072 case AppleLanguageId::THAI
:
3073 nLang
= LANGUAGE_THAI
;
3075 case AppleLanguageId::KOREAN
:
3076 nLang
= LANGUAGE_KOREAN
;
3078 case AppleLanguageId::LITHUANIAN
:
3079 nLang
= LANGUAGE_LITHUANIAN
;
3081 case AppleLanguageId::POLISH
:
3082 nLang
= LANGUAGE_POLISH
;
3084 case AppleLanguageId::HUNGARIAN
:
3085 nLang
= LANGUAGE_HUNGARIAN
;
3087 case AppleLanguageId::ESTONIAN
:
3088 nLang
= LANGUAGE_ESTONIAN
;
3090 case AppleLanguageId::LATVIAN
:
3091 nLang
= LANGUAGE_LATVIAN
;
3093 case AppleLanguageId::SAMI
:
3094 nLang
= LANGUAGE_SAMI_NORTHERN_NORWAY
; //maybe
3096 case AppleLanguageId::FAROESE
:
3097 nLang
= LANGUAGE_FAEROESE
;
3099 case AppleLanguageId::FARSI
:
3100 nLang
= LANGUAGE_FARSI
;
3102 case AppleLanguageId::RUSSIAN
:
3103 nLang
= LANGUAGE_RUSSIAN
;
3105 case AppleLanguageId::CHINESE_SIMPLIFIED
:
3106 nLang
= LANGUAGE_CHINESE_SIMPLIFIED
;
3108 case AppleLanguageId::FLEMISH
:
3109 nLang
= LANGUAGE_DUTCH_BELGIAN
;
3111 case AppleLanguageId::IRISH_GAELIC
:
3112 nLang
= LANGUAGE_GAELIC_IRELAND
;
3114 case AppleLanguageId::ALBANIAN
:
3115 nLang
= LANGUAGE_ALBANIAN
;
3117 case AppleLanguageId::ROMANIAN
:
3118 nLang
= LANGUAGE_ROMANIAN
;
3120 case AppleLanguageId::CZECH
:
3121 nLang
= LANGUAGE_CZECH
;
3123 case AppleLanguageId::SLOVAK
:
3124 nLang
= LANGUAGE_SLOVAK
;
3126 case AppleLanguageId::SLOVENIAN
:
3127 nLang
= LANGUAGE_SLOVENIAN
;
3129 case AppleLanguageId::YIDDISH
:
3130 nLang
= LANGUAGE_YIDDISH
;
3132 case AppleLanguageId::SERBIAN
:
3133 nLang
= LANGUAGE_SERBIAN_CYRILLIC_SERBIA
; //maybe
3135 case AppleLanguageId::MACEDONIAN
:
3136 nLang
= LANGUAGE_MACEDONIAN
;
3138 case AppleLanguageId::BULGARIAN
:
3139 nLang
= LANGUAGE_BULGARIAN
;
3141 case AppleLanguageId::UKRAINIAN
:
3142 nLang
= LANGUAGE_UKRAINIAN
;
3144 case AppleLanguageId::BYELORUSSIAN
:
3145 nLang
= LANGUAGE_BELARUSIAN
;
3147 case AppleLanguageId::UZBEK
:
3148 nLang
= LANGUAGE_UZBEK_CYRILLIC
; //maybe
3150 case AppleLanguageId::KAZAKH
:
3151 nLang
= LANGUAGE_KAZAKH
;
3153 case AppleLanguageId::AZERI_CYRILLIC
:
3154 nLang
= LANGUAGE_AZERI_CYRILLIC
;
3156 case AppleLanguageId::AZERI_ARABIC
:
3157 return LanguageTag("az-Arab");
3158 case AppleLanguageId::ARMENIAN
:
3159 nLang
= LANGUAGE_ARMENIAN
;
3161 case AppleLanguageId::GEORGIAN
:
3162 nLang
= LANGUAGE_GEORGIAN
;
3164 case AppleLanguageId::MOLDAVIAN
:
3165 nLang
= LANGUAGE_ROMANIAN_MOLDOVA
;
3167 case AppleLanguageId::KIRGHIZ
:
3168 nLang
= LANGUAGE_KIRGHIZ
;
3170 case AppleLanguageId::TAJIKI
:
3171 nLang
= LANGUAGE_TAJIK
;
3173 case AppleLanguageId::TURKMEN
:
3174 nLang
= LANGUAGE_TURKMEN
;
3176 case AppleLanguageId::MONGOLIAN_MONGOLIAN
:
3177 nLang
= LANGUAGE_MONGOLIAN_MONGOLIAN_MONGOLIA
;
3179 case AppleLanguageId::MONGOLIAN_CYRILLIC
:
3180 nLang
= LANGUAGE_MONGOLIAN_CYRILLIC_MONGOLIA
;
3182 case AppleLanguageId::PASHTO
:
3183 nLang
= LANGUAGE_PASHTO
;
3185 case AppleLanguageId::KURDISH
:
3186 nLang
= LANGUAGE_USER_KURDISH_TURKEY
; //maybe
3188 case AppleLanguageId::KASHMIRI
:
3189 nLang
= LANGUAGE_KASHMIRI
;
3191 case AppleLanguageId::SINDHI
:
3192 nLang
= LANGUAGE_SINDHI
;
3194 case AppleLanguageId::TIBETAN
:
3195 nLang
= LANGUAGE_TIBETAN
;
3197 case AppleLanguageId::NEPALI
:
3198 nLang
= LANGUAGE_NEPALI
;
3200 case AppleLanguageId::SANSKRIT
:
3201 nLang
= LANGUAGE_SANSKRIT
;
3203 case AppleLanguageId::MARATHI
:
3204 nLang
= LANGUAGE_MARATHI
;
3206 case AppleLanguageId::BENGALI
:
3207 nLang
= LANGUAGE_BENGALI
;
3209 case AppleLanguageId::ASSAMESE
:
3210 nLang
= LANGUAGE_ASSAMESE
;
3212 case AppleLanguageId::GUJARATI
:
3213 nLang
= LANGUAGE_GUJARATI
;
3215 case AppleLanguageId::PUNJABI
:
3216 nLang
= LANGUAGE_PUNJABI
;
3218 case AppleLanguageId::ORIYA
:
3219 nLang
= LANGUAGE_ODIA
;
3221 case AppleLanguageId::MALAYALAM
:
3222 nLang
= LANGUAGE_MALAYALAM
;
3224 case AppleLanguageId::KANNADA
:
3225 nLang
= LANGUAGE_KANNADA
;
3227 case AppleLanguageId::TAMIL
:
3228 nLang
= LANGUAGE_TAMIL
;
3230 case AppleLanguageId::TELUGU
:
3231 nLang
= LANGUAGE_TELUGU
;
3233 case AppleLanguageId::SINHALESE
:
3234 nLang
= LANGUAGE_SINHALESE_SRI_LANKA
;
3236 case AppleLanguageId::BURMESE
:
3237 nLang
= LANGUAGE_BURMESE
;
3239 case AppleLanguageId::KHMER
:
3240 nLang
= LANGUAGE_KHMER
;
3242 case AppleLanguageId::LAO
:
3243 nLang
= LANGUAGE_LAO
;
3245 case AppleLanguageId::VIETNAMESE
:
3246 nLang
= LANGUAGE_VIETNAMESE
;
3248 case AppleLanguageId::INDONESIAN
:
3249 nLang
= LANGUAGE_INDONESIAN
;
3251 case AppleLanguageId::TAGALONG
:
3252 nLang
= LANGUAGE_USER_TAGALOG
;
3254 case AppleLanguageId::MALAY_LATIN
:
3255 nLang
= LANGUAGE_MALAY_MALAYSIA
;
3257 case AppleLanguageId::MALAY_ARABIC
:
3258 nLang
= LANGUAGE_USER_MALAY_ARABIC_MALAYSIA
;
3260 case AppleLanguageId::AMHARIC
:
3261 nLang
= LANGUAGE_AMHARIC_ETHIOPIA
;
3263 case AppleLanguageId::TIGRINYA
:
3264 nLang
= LANGUAGE_TIGRIGNA_ETHIOPIA
;
3266 case AppleLanguageId::GALLA
:
3267 nLang
= LANGUAGE_OROMO
;
3269 case AppleLanguageId::SOMALI
:
3270 nLang
= LANGUAGE_SOMALI
;
3272 case AppleLanguageId::SWAHILI
:
3273 nLang
= LANGUAGE_SWAHILI
;
3275 case AppleLanguageId::KINYARWANDA
:
3276 nLang
= LANGUAGE_KINYARWANDA_RWANDA
;
3278 case AppleLanguageId::RUNDI
:
3279 return LanguageTag("rn");
3280 case AppleLanguageId::NYANJA
:
3281 nLang
= LANGUAGE_USER_NYANJA
;
3283 case AppleLanguageId::MALAGASY
:
3284 nLang
= LANGUAGE_MALAGASY_PLATEAU
;
3286 case AppleLanguageId::ESPERANTO
:
3287 nLang
= LANGUAGE_USER_ESPERANTO
;
3289 case AppleLanguageId::WELSH
:
3290 nLang
= LANGUAGE_WELSH
;
3292 case AppleLanguageId::BASQUE
:
3293 nLang
= LANGUAGE_BASQUE
;
3295 case AppleLanguageId::CATALAN
:
3296 nLang
= LANGUAGE_CATALAN
;
3298 case AppleLanguageId::LATIN
:
3299 nLang
= LANGUAGE_LATIN
;
3301 case AppleLanguageId::QUENCHUA
:
3302 nLang
= LANGUAGE_QUECHUA_BOLIVIA
; //maybe
3304 case AppleLanguageId::GUARANI
:
3305 nLang
= LANGUAGE_GUARANI_PARAGUAY
;
3307 case AppleLanguageId::AYMARA
:
3308 return LanguageTag("ay");
3309 case AppleLanguageId::TATAR
:
3310 nLang
= LANGUAGE_TATAR
;
3312 case AppleLanguageId::UIGHUR
:
3313 nLang
= LANGUAGE_UIGHUR_CHINA
;
3315 case AppleLanguageId::DZONGKHA
:
3316 nLang
= LANGUAGE_DZONGKHA_BHUTAN
;
3318 case AppleLanguageId::JAVANESE_LATIN
:
3319 return LanguageTag("jv-Latn");
3320 case AppleLanguageId::SUNDANESE_LATIN
:
3321 return LanguageTag("su-Latn");
3322 case AppleLanguageId::GALICIAN
:
3323 nLang
= LANGUAGE_GALICIAN
;
3325 case AppleLanguageId::AFRIKAANS
:
3326 nLang
= LANGUAGE_AFRIKAANS
;
3328 case AppleLanguageId::BRETON
:
3329 nLang
= LANGUAGE_BRETON_FRANCE
;
3331 case AppleLanguageId::INUKTITUT
:
3332 nLang
= LANGUAGE_INUKTITUT_LATIN_CANADA
; //probably
3334 case AppleLanguageId::SCOTTISH_GAELIC
:
3335 nLang
= LANGUAGE_GAELIC_SCOTLAND
;
3337 case AppleLanguageId::MANX_GAELIC
:
3338 nLang
= LANGUAGE_USER_MANX
;
3340 case AppleLanguageId::IRISH_GAELIC_WITH_DOT_ABOVE
:
3341 return LanguageTag("ga-Latg");
3342 case AppleLanguageId::TONGAN
:
3343 return LanguageTag("to");
3344 case AppleLanguageId::GREEK_POLYTONIC
:
3345 nLang
= LANGUAGE_USER_ANCIENT_GREEK
;
3347 case AppleLanguageId::GREENLANDIC
:
3348 nLang
= LANGUAGE_KALAALLISUT_GREENLAND
;
3350 case AppleLanguageId::AZERI_LATIN
:
3351 nLang
= LANGUAGE_AZERI_LATIN
;
3355 return LanguageTag(nLang
);
3358 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */