1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
3 * This file is part of the LibreOffice project.
5 * This Source Code Form is subject to the terms of the Mozilla Public
6 * License, v. 2.0. If a copy of the MPL was not distributed with this
7 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
10 #include <config_folders.h>
11 #include <config_liblangtag.h>
13 #include <i18nlangtag/languagetag.hxx>
14 #include <i18nlangtag/applelangid.hxx>
15 #include <i18nlangtag/mslangid.hxx>
16 #include <rtl/ustrbuf.hxx>
17 #include <rtl/bootstrap.hxx>
18 #include <sal/log.hxx>
19 #include <osl/file.hxx>
20 #include <osl/mutex.hxx>
21 #include <rtl/instance.hxx>
22 #include <rtl/locale.h>
25 #include <unordered_set>
29 #if LIBLANGTAG_INLINE_FIX
30 #define LT_HAVE_INLINE
32 #include <liblangtag/langtag.h>
34 using namespace com::sun::star
;
37 // Helper to ensure lt_error_t is free'd
41 myLtError() : p(nullptr) {}
42 ~myLtError() { if (p
) lt_error_unref( p
); }
45 // "statics" to be returned as const reference to an empty locale and string.
47 struct theEmptyLocale
: public rtl::Static
< lang::Locale
, theEmptyLocale
> {};
48 struct theEmptyBcp47
: public rtl::Static
< OUString
, theEmptyBcp47
> {};
51 typedef std::unordered_set
< OUString
> KnownTagSet
;
53 struct theKnowns
: public rtl::Static
< KnownTagSet
, theKnowns
> {};
54 struct theMutex
: public rtl::Static
< osl::Mutex
, theMutex
> {};
57 static const KnownTagSet
& getKnowns()
59 KnownTagSet
& rKnowns
= theKnowns::get();
62 osl::MutexGuard
aGuard( theMutex::get());
65 ::std::vector
< MsLangId::LanguagetagMapping
> aDefined( MsLangId::getDefinedLanguagetags());
66 for (auto const& elemDefined
: aDefined
)
68 // Do not use the BCP47 string here to initialize the
69 // LanguageTag because then canonicalize() would call this
70 // getKnowns() again...
71 ::std::vector
< OUString
> aFallbacks( LanguageTag( elemDefined
.mnLang
).getFallbackStrings( true));
72 for (auto const& fallback
: aFallbacks
)
74 rKnowns
.insert(fallback
);
84 struct compareIgnoreAsciiCaseLess
86 bool operator()( const OUString
& r1
, const OUString
& r2
) const
88 return r1
.compareToIgnoreAsciiCase( r2
) < 0;
91 typedef ::std::map
< OUString
, LanguageTag::ImplPtr
, compareIgnoreAsciiCaseLess
> MapBcp47
;
92 typedef ::std::map
< LanguageType
, LanguageTag::ImplPtr
> MapLangID
;
93 struct theMapBcp47
: public rtl::Static
< MapBcp47
, theMapBcp47
> {};
94 struct theMapLangID
: public rtl::Static
< MapLangID
, theMapLangID
> {};
95 struct theDontKnow
: public rtl::Static
< LanguageTag::ImplPtr
, theDontKnow
> {};
96 struct theSystemLocale
: public rtl::Static
< LanguageTag::ImplPtr
, theSystemLocale
> {};
100 static LanguageType
getNextOnTheFlyLanguage()
102 static LanguageType
nOnTheFlyLanguage(0);
103 osl::MutexGuard
aGuard( theMutex::get());
104 if (!nOnTheFlyLanguage
)
105 nOnTheFlyLanguage
= MsLangId::makeLangID( LANGUAGE_ON_THE_FLY_SUB_START
, LANGUAGE_ON_THE_FLY_START
);
108 if (MsLangId::getPrimaryLanguage( nOnTheFlyLanguage
) != LANGUAGE_ON_THE_FLY_END
)
112 LanguageType nSub
= MsLangId::getSubLanguage( nOnTheFlyLanguage
);
113 if (nSub
!= LANGUAGE_ON_THE_FLY_SUB_END
)
114 nOnTheFlyLanguage
= MsLangId::makeLangID( ++nSub
, LANGUAGE_ON_THE_FLY_START
);
117 SAL_WARN( "i18nlangtag", "getNextOnTheFlyLanguage: none left! ("
118 << ((sal_uInt16(LANGUAGE_ON_THE_FLY_END
) - sal_uInt16(LANGUAGE_ON_THE_FLY_START
) + 1)
119 * (sal_uInt16(LANGUAGE_ON_THE_FLY_SUB_END
) - sal_uInt16(LANGUAGE_ON_THE_FLY_SUB_START
) + 1))
121 return LanguageType(0);
125 #if OSL_DEBUG_LEVEL > 0
126 static size_t nOnTheFlies
= 0;
128 SAL_INFO( "i18nlangtag", "getNextOnTheFlyLanguage: number " << nOnTheFlies
);
130 return nOnTheFlyLanguage
;
135 bool LanguageTag::isOnTheFlyID( LanguageType nLang
)
137 LanguageType nPri
= MsLangId::getPrimaryLanguage( nLang
);
138 LanguageType nSub
= MsLangId::getSubLanguage( nLang
);
140 LANGUAGE_ON_THE_FLY_START
<= nPri
&& nPri
<= LANGUAGE_ON_THE_FLY_END
&&
141 LANGUAGE_ON_THE_FLY_SUB_START
<= nSub
&& nSub
<= LANGUAGE_ON_THE_FLY_SUB_END
;
145 /** A reference holder for liblangtag data de/initialization, one static
146 instance. Currently implemented such that the first "ref" inits and dtor
147 (our library deinitialized) tears down.
149 class LiblangtagDataRef
153 ~LiblangtagDataRef();
160 OString maDataPath
; // path to liblangtag data, "|" if system
163 void setupDataPath();
165 static void teardown();
169 struct theDataRef
: public rtl::Static
< LiblangtagDataRef
, theDataRef
> {};
172 LiblangtagDataRef::LiblangtagDataRef()
178 LiblangtagDataRef::~LiblangtagDataRef()
184 void LiblangtagDataRef::setup()
186 SAL_INFO( "i18nlangtag", "LiblangtagDataRef::setup: initializing database");
187 if (maDataPath
.isEmpty())
190 mbInitialized
= true;
193 void LiblangtagDataRef::teardown()
195 SAL_INFO( "i18nlangtag", "LiblangtagDataRef::teardown: finalizing database");
199 void LiblangtagDataRef::setupDataPath()
201 // maDataPath is assumed to be empty here.
202 OUString
aURL("$BRAND_BASE_DIR/" LIBO_SHARE_FOLDER
"/liblangtag");
203 rtl::Bootstrap::expandMacros(aURL
); //TODO: detect failure
205 // Check if data is in our own installation, else assume system
207 OUString aData
= aURL
+ "/language-subtag-registry.xml";
208 osl::DirectoryItem aDirItem
;
209 if (osl::DirectoryItem::get( aData
, aDirItem
) == osl::DirectoryItem::E_None
)
212 if (osl::FileBase::getSystemPathFromFileURL( aURL
, aPath
) == osl::FileBase::E_None
)
213 maDataPath
= OUStringToOString( aPath
, RTL_TEXTENCODING_UTF8
);
215 if (maDataPath
.isEmpty())
216 maDataPath
= "|"; // assume system
218 lt_db_set_datadir( maDataPath
.getStr());
222 /* TODO: we could transform known vendor and browser-specific variants to known
223 * BCP 47 if available. For now just remove them to not confuse any later
224 * treatments that check for empty variants. This vendor stuff was never
225 * supported anyway. */
226 static void handleVendorVariant( css::lang::Locale
& rLocale
)
228 if (!rLocale
.Variant
.isEmpty() && rLocale
.Language
!= I18NLANGTAG_QLT
)
229 rLocale
.Variant
.clear();
233 class LanguageTagImpl
237 explicit LanguageTagImpl( const LanguageTag
& rLanguageTag
);
238 explicit LanguageTagImpl( const LanguageTagImpl
& rLanguageTagImpl
);
240 LanguageTagImpl
& operator=( const LanguageTagImpl
& rLanguageTagImpl
);
244 friend class LanguageTag
;
253 mutable css::lang::Locale maLocale
;
254 mutable OUString maBcp47
;
255 mutable OUString maCachedLanguage
; ///< cache getLanguage()
256 mutable OUString maCachedScript
; ///< cache getScript()
257 mutable OUString maCachedCountry
; ///< cache getCountry()
258 mutable OUString maCachedVariants
; ///< cache getVariants()
259 mutable OUString maCachedGlibcString
; ///< cache getGlibcLocaleString()
260 mutable lt_tag_t
* mpImplLangtag
; ///< liblangtag pointer
261 mutable LanguageType mnLangID
;
262 mutable LanguageTag::ScriptType meScriptType
;
263 mutable Decision meIsValid
;
264 mutable Decision meIsIsoLocale
;
265 mutable Decision meIsIsoODF
;
266 mutable Decision meIsLiblangtagNeeded
; ///< whether processing with liblangtag needed
267 bool mbSystemLocale
: 1;
268 mutable bool mbInitializedBcp47
: 1;
269 mutable bool mbInitializedLocale
: 1;
270 mutable bool mbInitializedLangID
: 1;
271 mutable bool mbCachedLanguage
: 1;
272 mutable bool mbCachedScript
: 1;
273 mutable bool mbCachedCountry
: 1;
274 mutable bool mbCachedVariants
: 1;
275 mutable bool mbCachedGlibcString
: 1;
277 OUString
const & getBcp47() const;
278 OUString
const & getLanguage() const;
279 OUString
const & getScript() const;
280 OUString
const & getCountry() const;
281 OUString
getRegion() const;
282 OUString
const & getVariants() const;
283 bool hasScript() const;
284 OUString
getGlibcLocaleString() const;
286 void setScriptType(LanguageTag::ScriptType st
);
287 LanguageTag::ScriptType
getScriptType() const;
289 bool isIsoLocale() const;
290 bool isIsoODF() const;
291 bool isValidBcp47() const;
293 void convertLocaleToBcp47();
294 bool convertLocaleToLang( bool bAllowOnTheFlyID
);
295 void convertBcp47ToLocale();
296 void convertBcp47ToLang();
297 void convertLangToLocale();
298 void convertLangToBcp47();
300 /** @return whether BCP 47 language tag string was changed. */
303 /** Canonicalize if not yet done and synchronize initialized conversions.
305 @return whether BCP 47 language tag string was changed.
307 bool synCanonicalize();
309 OUString
getLanguageFromLangtag();
310 OUString
getScriptFromLangtag();
311 OUString
getRegionFromLangtag();
312 OUString
getVariantsFromLangtag();
314 /** Generates on-the-fly LangID and registers the maBcp47,mnLangID pair.
317 If not 0 and not LANGUAGE_DONTKNOW, suggest (!) to use that ID
318 instead of generating an on-the-fly ID. Implementation may
319 still generate an ID if the suggested ID is already used for
320 another language tag.
322 @return NULL if no ID could be obtained or registration failed.
324 LanguageTag::ImplPtr
registerOnTheFly( LanguageType nRegisterID
);
326 /** Obtain Language, Script, Country and Variants via simpleExtract() and
327 assign them to the cached variables if successful.
329 @return simpleExtract() successfully extracted and cached.
331 bool cacheSimpleLSCV();
344 /** Of a language tag of the form lll[-Ssss][-CC][-vvvvvvvv] extract the
347 Does not check case or content!
349 @return EXTRACTED_LSC if simple tag was detected (i.e. one that
350 would fulfill the isIsoODF() condition),
351 EXTRACTED_LV if a tag with variant was detected,
352 EXTRACTED_C_LOCALE if a 'C' locale was detected,
353 EXTRACTED_X if x-... privateuse tag was detected,
354 EXTRACTED_X_JOKER if "*" joker was detected,
355 EXTRACTED_KNOWN_BAD if a bad but known (to be remapped) tag was detected
358 static Extraction
simpleExtract( const OUString
& rBcp47
,
362 OUString
& rVariants
);
364 /** Convert Locale to BCP 47 string without resolving system and creating
365 temporary LanguageTag instances. */
366 static OUString
convertToBcp47( const css::lang::Locale
& rLocale
);
371 LanguageTagImpl::LanguageTagImpl( const LanguageTag
& rLanguageTag
)
373 maLocale( rLanguageTag
.maLocale
),
374 maBcp47( rLanguageTag
.maBcp47
),
375 mpImplLangtag( nullptr),
376 mnLangID( rLanguageTag
.mnLangID
),
377 meScriptType( LanguageTag::ScriptType::UNKNOWN
),
378 meIsValid( DECISION_DONTKNOW
),
379 meIsIsoLocale( DECISION_DONTKNOW
),
380 meIsIsoODF( DECISION_DONTKNOW
),
381 meIsLiblangtagNeeded( DECISION_DONTKNOW
),
382 mbSystemLocale( rLanguageTag
.mbSystemLocale
),
383 mbInitializedBcp47( rLanguageTag
.mbInitializedBcp47
),
384 mbInitializedLocale( rLanguageTag
.mbInitializedLocale
),
385 mbInitializedLangID( rLanguageTag
.mbInitializedLangID
),
386 mbCachedLanguage( false),
387 mbCachedScript( false),
388 mbCachedCountry( false),
389 mbCachedVariants( false),
390 mbCachedGlibcString( false)
395 LanguageTagImpl::LanguageTagImpl( const LanguageTagImpl
& rLanguageTagImpl
)
397 maLocale( rLanguageTagImpl
.maLocale
),
398 maBcp47( rLanguageTagImpl
.maBcp47
),
399 maCachedLanguage( rLanguageTagImpl
.maCachedLanguage
),
400 maCachedScript( rLanguageTagImpl
.maCachedScript
),
401 maCachedCountry( rLanguageTagImpl
.maCachedCountry
),
402 maCachedVariants( rLanguageTagImpl
.maCachedVariants
),
403 maCachedGlibcString( rLanguageTagImpl
.maCachedGlibcString
),
404 mpImplLangtag( rLanguageTagImpl
.mpImplLangtag
?
405 lt_tag_copy( rLanguageTagImpl
.mpImplLangtag
) : nullptr),
406 mnLangID( rLanguageTagImpl
.mnLangID
),
407 meScriptType( rLanguageTagImpl
.meScriptType
),
408 meIsValid( rLanguageTagImpl
.meIsValid
),
409 meIsIsoLocale( rLanguageTagImpl
.meIsIsoLocale
),
410 meIsIsoODF( rLanguageTagImpl
.meIsIsoODF
),
411 meIsLiblangtagNeeded( rLanguageTagImpl
.meIsLiblangtagNeeded
),
412 mbSystemLocale( rLanguageTagImpl
.mbSystemLocale
),
413 mbInitializedBcp47( rLanguageTagImpl
.mbInitializedBcp47
),
414 mbInitializedLocale( rLanguageTagImpl
.mbInitializedLocale
),
415 mbInitializedLangID( rLanguageTagImpl
.mbInitializedLangID
),
416 mbCachedLanguage( rLanguageTagImpl
.mbCachedLanguage
),
417 mbCachedScript( rLanguageTagImpl
.mbCachedScript
),
418 mbCachedCountry( rLanguageTagImpl
.mbCachedCountry
),
419 mbCachedVariants( rLanguageTagImpl
.mbCachedVariants
),
420 mbCachedGlibcString( rLanguageTagImpl
.mbCachedGlibcString
)
423 theDataRef::get().init();
427 LanguageTagImpl
& LanguageTagImpl::operator=( const LanguageTagImpl
& rLanguageTagImpl
)
429 if (&rLanguageTagImpl
== this)
432 maLocale
= rLanguageTagImpl
.maLocale
;
433 maBcp47
= rLanguageTagImpl
.maBcp47
;
434 maCachedLanguage
= rLanguageTagImpl
.maCachedLanguage
;
435 maCachedScript
= rLanguageTagImpl
.maCachedScript
;
436 maCachedCountry
= rLanguageTagImpl
.maCachedCountry
;
437 maCachedVariants
= rLanguageTagImpl
.maCachedVariants
;
438 maCachedGlibcString
= rLanguageTagImpl
.maCachedGlibcString
;
439 lt_tag_t
* oldTag
= mpImplLangtag
;
440 mpImplLangtag
= rLanguageTagImpl
.mpImplLangtag
?
441 lt_tag_copy( rLanguageTagImpl
.mpImplLangtag
) : nullptr;
442 lt_tag_unref(oldTag
);
443 mnLangID
= rLanguageTagImpl
.mnLangID
;
444 meScriptType
= rLanguageTagImpl
.meScriptType
;
445 meIsValid
= rLanguageTagImpl
.meIsValid
;
446 meIsIsoLocale
= rLanguageTagImpl
.meIsIsoLocale
;
447 meIsIsoODF
= rLanguageTagImpl
.meIsIsoODF
;
448 meIsLiblangtagNeeded
= rLanguageTagImpl
.meIsLiblangtagNeeded
;
449 mbSystemLocale
= rLanguageTagImpl
.mbSystemLocale
;
450 mbInitializedBcp47
= rLanguageTagImpl
.mbInitializedBcp47
;
451 mbInitializedLocale
= rLanguageTagImpl
.mbInitializedLocale
;
452 mbInitializedLangID
= rLanguageTagImpl
.mbInitializedLangID
;
453 mbCachedLanguage
= rLanguageTagImpl
.mbCachedLanguage
;
454 mbCachedScript
= rLanguageTagImpl
.mbCachedScript
;
455 mbCachedCountry
= rLanguageTagImpl
.mbCachedCountry
;
456 mbCachedVariants
= rLanguageTagImpl
.mbCachedVariants
;
457 mbCachedGlibcString
= rLanguageTagImpl
.mbCachedGlibcString
;
458 if (mpImplLangtag
&& !oldTag
)
459 theDataRef::get().init();
464 LanguageTagImpl::~LanguageTagImpl()
468 lt_tag_unref( mpImplLangtag
);
473 LanguageTag::LanguageTag( const OUString
& rBcp47LanguageTag
, bool bCanonicalize
)
475 maBcp47( rBcp47LanguageTag
),
476 mnLangID( LANGUAGE_DONTKNOW
),
477 mbSystemLocale( rBcp47LanguageTag
.isEmpty()),
478 mbInitializedBcp47( !mbSystemLocale
),
479 mbInitializedLocale( false),
480 mbInitializedLangID( false),
485 getImpl()->canonicalize();
486 // Registration itself may already have canonicalized, so do an
487 // unconditional sync.
494 LanguageTag::LanguageTag( const css::lang::Locale
& rLocale
)
497 mnLangID( LANGUAGE_DONTKNOW
),
498 mbSystemLocale( rLocale
.Language
.isEmpty()),
499 mbInitializedBcp47( false),
500 mbInitializedLocale( false), // we do not know which mess we got passed in
501 mbInitializedLangID( false),
504 handleVendorVariant( maLocale
);
508 LanguageTag::LanguageTag( LanguageType nLanguage
)
510 mnLangID( nLanguage
),
511 mbSystemLocale( nLanguage
== LANGUAGE_SYSTEM
),
512 mbInitializedBcp47( false),
513 mbInitializedLocale( false),
514 mbInitializedLangID( !mbSystemLocale
),
520 LanguageTag::LanguageTag( const OUString
& rBcp47
, const OUString
& rLanguage
,
521 const OUString
& rScript
, const OUString
& rCountry
)
524 mnLangID( LANGUAGE_DONTKNOW
),
525 mbSystemLocale( rBcp47
.isEmpty() && rLanguage
.isEmpty()),
526 mbInitializedBcp47( !rBcp47
.isEmpty()),
527 mbInitializedLocale( false),
528 mbInitializedLangID( false),
531 if (!mbSystemLocale
&& !mbInitializedBcp47
)
533 if (rScript
.isEmpty())
535 maBcp47
= rLanguage
+ "-" + rCountry
;
536 mbInitializedBcp47
= true;
537 maLocale
.Language
= rLanguage
;
538 maLocale
.Country
= rCountry
;
539 mbInitializedLocale
= true;
543 if (rCountry
.isEmpty())
544 maBcp47
= rLanguage
+ "-" + rScript
;
546 maBcp47
= rLanguage
+ "-" + rScript
+ "-" + rCountry
;
547 mbInitializedBcp47
= true;
548 maLocale
.Language
= I18NLANGTAG_QLT
;
549 maLocale
.Country
= rCountry
;
550 maLocale
.Variant
= maBcp47
;
551 mbInitializedLocale
= true;
557 LanguageTag::LanguageTag( const rtl_Locale
& rLocale
)
559 maLocale( rLocale
.Language
, rLocale
.Country
, rLocale
.Variant
),
560 mnLangID( LANGUAGE_DONTKNOW
),
561 mbSystemLocale( maLocale
.Language
.isEmpty()),
562 mbInitializedBcp47( false),
563 mbInitializedLocale( !mbSystemLocale
),
564 mbInitializedLangID( false),
567 convertFromRtlLocale();
570 LanguageTag::~LanguageTag() {}
572 LanguageTag::ImplPtr
LanguageTagImpl::registerOnTheFly( LanguageType nRegisterID
)
574 LanguageTag::ImplPtr pImpl
;
576 if (!mbInitializedBcp47
)
578 if (mbInitializedLocale
)
580 maBcp47
= LanguageTagImpl::convertToBcp47( maLocale
);
581 mbInitializedBcp47
= !maBcp47
.isEmpty();
584 if (maBcp47
.isEmpty())
586 SAL_WARN( "i18nlangtag", "LanguageTagImpl::registerOnTheFly: no Bcp47 string, no registering");
590 osl::MutexGuard
aGuard( theMutex::get());
592 MapBcp47
& rMapBcp47
= theMapBcp47::get();
593 MapBcp47::const_iterator
it( rMapBcp47
.find( maBcp47
));
594 bool bOtherImpl
= false;
595 if (it
!= rMapBcp47
.end())
597 SAL_INFO( "i18nlangtag", "LanguageTag::registerOnTheFly: found impl for '" << maBcp47
<< "'");
598 pImpl
= (*it
).second
;
599 if (pImpl
.get() != this)
601 // Could happen for example if during registerImpl() the tag was
602 // changed via canonicalize() and the result was already present in
603 // the map before, for example 'bn-Beng' => 'bn'. This specific
604 // case is now taken care of in registerImpl() and doesn't reach
605 // here. However, use the already existing impl if it matches.
606 SAL_WARN( "i18nlangtag", "LanguageTag::registerOnTheFly: using other impl for this '" << maBcp47
<< "'");
607 *this = *pImpl
; // ensure consistency
613 SAL_INFO( "i18nlangtag", "LanguageTag::registerOnTheFly: new impl for '" << maBcp47
<< "'");
614 pImpl
.reset( new LanguageTagImpl( *this));
615 rMapBcp47
.insert( ::std::make_pair( maBcp47
, pImpl
));
618 if (!bOtherImpl
|| !pImpl
->mbInitializedLangID
)
620 if (nRegisterID
== LanguageType(0) || nRegisterID
== LANGUAGE_DONTKNOW
)
621 nRegisterID
= getNextOnTheFlyLanguage();
624 // Accept a suggested ID only if it is not mapped yet to something
625 // different, otherwise we would end up with ambiguous assignments
626 // of different language tags, for example for the same primary
627 // LangID with "no", "nb" and "nn".
628 const MapLangID
& rMapLangID
= theMapLangID::get();
629 MapLangID::const_iterator
itID( rMapLangID
.find( nRegisterID
));
630 if (itID
!= rMapLangID
.end())
632 if ((*itID
).second
->maBcp47
!= maBcp47
)
634 SAL_INFO( "i18nlangtag", "LanguageTag::registerOnTheFly: not using suggested 0x"
635 << ::std::hex
<< nRegisterID
<< " for '" << maBcp47
<< "' have '"
636 << (*itID
).second
->maBcp47
<< "'");
637 nRegisterID
= getNextOnTheFlyLanguage();
641 SAL_WARN( "i18nlangtag", "LanguageTag::registerOnTheFly: suggested 0x"
642 << ::std::hex
<< nRegisterID
<< " for '" << maBcp47
<< "' already registered");
648 // out of IDs, nothing to register
651 pImpl
->mnLangID
= nRegisterID
;
652 pImpl
->mbInitializedLangID
= true;
653 if (pImpl
.get() != this)
655 mnLangID
= nRegisterID
;
656 mbInitializedLangID
= true;
660 ::std::pair
< MapLangID::const_iterator
, bool > res(
661 theMapLangID::get().insert( ::std::make_pair( pImpl
->mnLangID
, pImpl
)));
664 SAL_INFO( "i18nlangtag", "LanguageTag::registerOnTheFly: cross-inserted 0x"
665 << ::std::hex
<< pImpl
->mnLangID
<< " for '" << maBcp47
<< "'");
669 SAL_WARN( "i18nlangtag", "LanguageTag::registerOnTheFly: not cross-inserted 0x"
670 << ::std::hex
<< pImpl
->mnLangID
<< " for '" << maBcp47
<< "' have '"
671 << (*res
.first
).second
->maBcp47
<< "'");
678 LanguageTag::ScriptType
LanguageTag::getOnTheFlyScriptType( LanguageType nRegisterID
)
680 const MapLangID
& rMapLangID
= theMapLangID::get();
681 MapLangID::const_iterator
itID( rMapLangID
.find( nRegisterID
));
682 if (itID
!= rMapLangID
.end())
683 return (*itID
).second
->getScriptType();
685 return ScriptType::UNKNOWN
;
690 void LanguageTag::setConfiguredSystemLanguage( LanguageType nLang
)
692 if (nLang
== LANGUAGE_DONTKNOW
|| nLang
== LANGUAGE_SYSTEM
)
694 SAL_WARN( "i18nlangtag",
695 "LanguageTag::setConfiguredSystemLanguage: refusing to set unresolved system locale 0x" <<
696 ::std::hex
<< nLang
);
699 SAL_INFO( "i18nlangtag", "LanguageTag::setConfiguredSystemLanguage: setting to 0x" << ::std::hex
<< nLang
);
700 MsLangId::LanguageTagAccess::setConfiguredSystemLanguage( nLang
);
701 // Reset system locale to none and let registerImpl() do the rest to
702 // initialize a new one.
703 theSystemLocale::get().reset();
704 LanguageTag
aLanguageTag( LANGUAGE_SYSTEM
);
705 aLanguageTag
.registerImpl();
708 static bool lt_tag_parse_disabled
= false;
711 void LanguageTag::disable_lt_tag_parse()
713 lt_tag_parse_disabled
= true;
716 static bool lcl_isKnownOnTheFlyID( LanguageType nLang
)
718 return nLang
!= LANGUAGE_DONTKNOW
&& nLang
!= LANGUAGE_SYSTEM
&&
719 (LanguageTag::isOnTheFlyID( nLang
) || (nLang
== MsLangId::getPrimaryLanguage( nLang
)));
723 LanguageTag::ImplPtr
LanguageTag::registerImpl() const
725 // XXX NOTE: Do not use non-static LanguageTag::convert...() member methods
726 // here as they access getImpl() and syncFromImpl() and would lead to
727 // recursion. Also do not use the static LanguageTag::convertTo...()
728 // methods as they may create temporary LanguageTag instances. Only
729 // LanguageTagImpl::convertToBcp47(Locale) is ok.
733 #if OSL_DEBUG_LEVEL > 0
734 static size_t nCalls
= 0;
736 SAL_INFO( "i18nlangtag", "LanguageTag::registerImpl: " << nCalls
<< " calls");
739 // Do not register unresolved system locale, also force LangID if system
740 // and take the system locale shortcut if possible.
743 pImpl
= theSystemLocale::get();
746 #if OSL_DEBUG_LEVEL > 0
747 static size_t nCallsSystem
= 0;
749 SAL_INFO( "i18nlangtag", "LanguageTag::registerImpl: " << nCallsSystem
<< " system calls");
753 if (!mbInitializedLangID
)
755 mnLangID
= MsLangId::getRealLanguage( LANGUAGE_SYSTEM
);
756 mbInitializedLangID
= (mnLangID
!= LANGUAGE_SYSTEM
);
757 SAL_WARN_IF( !mbInitializedLangID
, "i18nlangtag", "LanguageTag::registerImpl: can't resolve system!");
761 if (mbInitializedLangID
)
763 if (mnLangID
== LANGUAGE_DONTKNOW
)
765 // Heavy usage of LANGUAGE_DONTKNOW, make it an own Impl for all the
766 // conversion attempts. At the same time provide a central breakpoint
767 // to inspect such places.
768 LanguageTag::ImplPtr
& rDontKnow
= theDontKnow::get();
770 rDontKnow
.reset( new LanguageTagImpl( *this));
772 #if OSL_DEBUG_LEVEL > 0
773 static size_t nCallsDontKnow
= 0;
775 SAL_INFO( "i18nlangtag", "LanguageTag::registerImpl: " << nCallsDontKnow
<< " DontKnow calls");
781 // A great share are calls for a system equal locale.
782 pImpl
= theSystemLocale::get();
783 if (pImpl
&& pImpl
->mnLangID
== mnLangID
)
785 #if OSL_DEBUG_LEVEL > 0
786 static size_t nCallsSystemEqual
= 0;
788 SAL_INFO( "i18nlangtag", "LanguageTag::registerImpl: " << nCallsSystemEqual
789 << " system equal LangID calls");
796 // Force Bcp47 if not LangID.
797 if (!mbInitializedLangID
&& !mbInitializedBcp47
)
799 // The one central point to set mbInitializedLocale=true if a
800 // LanguageTag was initialized with a Locale. We will now convert and
801 // possibly later resolve it.
802 if (!mbInitializedLocale
&& (mbSystemLocale
|| !maLocale
.Language
.isEmpty()))
803 mbInitializedLocale
= true;
804 SAL_WARN_IF( !mbInitializedLocale
, "i18nlangtag", "LanguageTag::registerImpl: still not mbInitializedLocale");
806 maBcp47
= LanguageTagImpl::convertToBcp47( maLocale
);
807 mbInitializedBcp47
= !maBcp47
.isEmpty();
810 if (mbInitializedBcp47
)
812 // A great share are calls for a system equal locale.
813 pImpl
= theSystemLocale::get();
814 if (pImpl
&& pImpl
->maBcp47
== maBcp47
)
816 #if OSL_DEBUG_LEVEL > 0
817 static size_t nCallsSystemEqual
= 0;
819 SAL_INFO( "i18nlangtag", "LanguageTag::registerImpl: " << nCallsSystemEqual
<< " system equal BCP47 calls");
825 #if OSL_DEBUG_LEVEL > 0
826 static size_t nCallsNonSystem
= 0;
828 SAL_INFO( "i18nlangtag", "LanguageTag::registerImpl: " << nCallsNonSystem
<< " non-system calls");
831 osl::MutexGuard
aGuard( theMutex::get());
833 #if OSL_DEBUG_LEVEL > 0
834 static long nRunning
= 0;
835 // Entering twice here is ok, which is needed for fallback init in
836 // getKnowns() in canonicalize() via pImpl->convertBcp47ToLocale() below,
837 // everything else is suspicious.
838 SAL_WARN_IF( nRunning
> 1, "i18nlangtag", "LanguageTag::registerImpl: re-entered for '"
839 << maBcp47
<< "' 0x" << ::std::hex
<< mnLangID
);
840 struct Runner
{ Runner() { ++nRunning
; } ~Runner() { --nRunning
; } } aRunner
;
843 // Prefer LangID map as find+insert needs less comparison work.
844 if (mbInitializedLangID
)
846 MapLangID
& rMap
= theMapLangID::get();
847 MapLangID::const_iterator
it( rMap
.find( mnLangID
));
848 if (it
!= rMap
.end())
850 SAL_INFO( "i18nlangtag", "LanguageTag::registerImpl: found impl for 0x" << ::std::hex
<< mnLangID
);
851 pImpl
= (*it
).second
;
855 SAL_INFO( "i18nlangtag", "LanguageTag::registerImpl: new impl for 0x" << ::std::hex
<< mnLangID
);
856 pImpl
.reset( new LanguageTagImpl( *this));
857 rMap
.insert( ::std::make_pair( mnLangID
, pImpl
));
859 if (!pImpl
->mbInitializedLocale
)
860 pImpl
->convertLangToLocale();
861 LanguageType nLang
= MsLangId::Conversion::convertLocaleToLanguage( pImpl
->maLocale
);
862 // If round-trip is identical cross-insert to Bcp47 map.
863 if (nLang
== pImpl
->mnLangID
)
865 if (!pImpl
->mbInitializedBcp47
)
866 pImpl
->convertLocaleToBcp47();
867 ::std::pair
< MapBcp47::const_iterator
, bool > res(
868 theMapBcp47::get().insert( ::std::make_pair( pImpl
->maBcp47
, pImpl
)));
871 SAL_INFO( "i18nlangtag", "LanguageTag::registerImpl: cross-inserted '" << pImpl
->maBcp47
<< "' for 0x" << ::std::hex
<< mnLangID
);
875 SAL_INFO( "i18nlangtag", "LanguageTag::registerImpl: not cross-inserted '" << pImpl
->maBcp47
<< "' for 0x" << ::std::hex
<< mnLangID
<< " have 0x"
876 << ::std::hex
<< (*res
.first
).second
->mnLangID
);
881 if (!pImpl
->mbInitializedBcp47
)
882 pImpl
->convertLocaleToBcp47();
883 SAL_INFO( "i18nlangtag", "LanguageTag::registerImpl: not cross-inserted '" << pImpl
->maBcp47
<< "' for 0x" << ::std::hex
<< mnLangID
<< " round-trip to 0x" << ::std::hex
<< nLang
);
887 else if (!maBcp47
.isEmpty())
889 MapBcp47
& rMap
= theMapBcp47::get();
890 MapBcp47::const_iterator
it( rMap
.find( maBcp47
));
891 if (it
!= rMap
.end())
893 SAL_INFO( "i18nlangtag", "LanguageTag::registerImpl: found impl for '" << maBcp47
<< "'");
894 pImpl
= (*it
).second
;
898 SAL_INFO( "i18nlangtag", "LanguageTag::registerImpl: new impl for '" << maBcp47
<< "'");
899 pImpl
.reset( new LanguageTagImpl( *this));
900 ::std::pair
< MapBcp47::iterator
, bool > insOrig( rMap
.insert( ::std::make_pair( maBcp47
, pImpl
)));
901 // If changed after canonicalize() also add the resulting tag to
903 if (pImpl
->synCanonicalize())
905 SAL_INFO( "i18nlangtag", "LanguageTag::registerImpl: canonicalized to '" << pImpl
->maBcp47
<< "'");
906 ::std::pair
< MapBcp47::const_iterator
, bool > insCanon(
907 rMap
.insert( ::std::make_pair( pImpl
->maBcp47
, pImpl
)));
908 SAL_INFO( "i18nlangtag", "LanguageTag::registerImpl: " << (insCanon
.second
? "" : "not ")
909 << "inserted '" << pImpl
->maBcp47
<< "'");
910 // If the canonicalized tag already existed (was not inserted)
911 // and impls are different, make this impl that impl and skip
912 // the rest if that LangID is present as well. The existing
913 // entry may or may not be different, it may even be strictly
914 // identical to this if it differs only in case (e.g. ko-kr =>
915 // ko-KR) which was corrected in canonicalize() hence also in
916 // the map entry but comparison is case insensitive and found
918 if (!insCanon
.second
&& (*insCanon
.first
).second
!= pImpl
)
920 (*insOrig
.first
).second
= pImpl
= (*insCanon
.first
).second
;
921 SAL_INFO( "i18nlangtag", "LanguageTag::registerImpl: share impl with 0x"
922 << ::std::hex
<< pImpl
->mnLangID
);
925 if (!pImpl
->mbInitializedLangID
)
927 // Try round-trip Bcp47->Locale->LangID->Locale->Bcp47.
928 if (!pImpl
->mbInitializedLocale
)
929 pImpl
->convertBcp47ToLocale();
930 if (!pImpl
->mbInitializedLangID
)
931 pImpl
->convertLocaleToLang( true);
932 // Unconditionally insert (round-trip is possible) for
933 // on-the-fly IDs and (generated or not) suggested IDs.
934 bool bInsert
= lcl_isKnownOnTheFlyID( pImpl
->mnLangID
);
938 if (pImpl
->mnLangID
!= LANGUAGE_DONTKNOW
)
940 // May have involved canonicalize(), so compare with
941 // pImpl->maBcp47 instead of maBcp47!
942 aBcp47
= LanguageTagImpl::convertToBcp47(
943 MsLangId::Conversion::convertLanguageToLocale( pImpl
->mnLangID
));
944 bInsert
= (aBcp47
== pImpl
->maBcp47
);
947 // If round-trip is identical cross-insert to Bcp47 map.
950 ::std::pair
< MapLangID::const_iterator
, bool > res(
951 theMapLangID::get().insert( ::std::make_pair( pImpl
->mnLangID
, pImpl
)));
954 SAL_INFO( "i18nlangtag", "LanguageTag::registerImpl: cross-inserted 0x"
955 << ::std::hex
<< pImpl
->mnLangID
<< " for '" << maBcp47
<< "'");
959 SAL_INFO( "i18nlangtag", "LanguageTag::registerImpl: not cross-inserted 0x"
960 << ::std::hex
<< pImpl
->mnLangID
<< " for '" << maBcp47
<< "' have '"
961 << (*res
.first
).second
->maBcp47
<< "'");
966 SAL_INFO( "i18nlangtag", "LanguageTag::registerImpl: not cross-inserted 0x"
967 << ::std::hex
<< pImpl
->mnLangID
<< " for '" << maBcp47
<< "' round-trip to '"
975 SAL_WARN( "i18nlangtag", "LanguageTag::registerImpl: can't register for 0x" << ::std::hex
<< mnLangID
);
976 pImpl
.reset( new LanguageTagImpl( *this));
979 // If we reach here for mbSystemLocale we didn't have theSystemLocale
981 if (mbSystemLocale
&& mbInitializedLangID
)
983 theSystemLocale::get() = pImpl
;
984 SAL_INFO( "i18nlangtag", "LanguageTag::registerImpl: added system locale 0x"
985 << ::std::hex
<< pImpl
->mnLangID
<< " '" << pImpl
->maBcp47
<< "'");
992 LanguageTagImpl
const * LanguageTag::getImpl() const
996 mpImpl
= registerImpl();
997 syncVarsFromRawImpl();
1002 LanguageTagImpl
* LanguageTag::getImpl()
1006 mpImpl
= registerImpl();
1007 syncVarsFromRawImpl();
1009 return mpImpl
.get();
1012 void LanguageTag::resetVars()
1015 maLocale
= lang::Locale();
1017 mnLangID
= LANGUAGE_SYSTEM
;
1018 mbSystemLocale
= true;
1019 mbInitializedBcp47
= false;
1020 mbInitializedLocale
= false;
1021 mbInitializedLangID
= false;
1022 mbIsFallback
= false;
1026 LanguageTag
& LanguageTag::reset( const OUString
& rBcp47LanguageTag
)
1029 maBcp47
= rBcp47LanguageTag
;
1030 mbSystemLocale
= rBcp47LanguageTag
.isEmpty();
1031 mbInitializedBcp47
= !mbSystemLocale
;
1037 LanguageTag
& LanguageTag::reset( const css::lang::Locale
& rLocale
)
1041 mbSystemLocale
= rLocale
.Language
.isEmpty();
1042 mbInitializedLocale
= !mbSystemLocale
;
1043 handleVendorVariant( maLocale
);
1048 LanguageTag
& LanguageTag::reset( LanguageType nLanguage
)
1051 mnLangID
= nLanguage
;
1052 mbSystemLocale
= nLanguage
== LANGUAGE_SYSTEM
;
1053 mbInitializedLangID
= !mbSystemLocale
;
1058 bool LanguageTagImpl::canonicalize()
1065 explicit dumper( lt_tag_t
** pp
) : mpp( *pp
? NULL
: pp
) {}
1066 ~dumper() { if (mpp
&& *mpp
) lt_tag_dump( *mpp
); }
1068 dumper
aDumper( &mpImplLangtag
);
1071 bool bChanged
= false;
1073 // Side effect: have maBcp47 in any case, resolved system.
1074 // Some methods calling canonicalize() (or not calling it due to
1075 // meIsLiblangtagNeeded==DECISION_NO) rely on this! Hence do not set
1076 // meIsLiblangtagNeeded anywhere else than hereafter.
1079 // The simple cases and known locales don't need liblangtag processing,
1080 // which also avoids loading liblangtag data on startup.
1081 if (meIsLiblangtagNeeded
== DECISION_DONTKNOW
)
1083 bool bTemporaryLocale
= false;
1084 bool bTemporaryLangID
= false;
1085 if (!mbInitializedLocale
&& !mbInitializedLangID
)
1089 mnLangID
= MsLangId::getRealLanguage( LANGUAGE_SYSTEM
);
1090 mbInitializedLangID
= true;
1094 // Now this is getting funny... we only have some BCP47 string
1095 // and want to determine if parsing it would be possible
1096 // without using liblangtag just to see if it is a simple known
1097 // locale or could fall back to one.
1098 OUString aLanguage
, aScript
, aCountry
, aVariants
;
1099 Extraction eExt
= simpleExtract( maBcp47
, aLanguage
, aScript
, aCountry
, aVariants
);
1100 if (eExt
!= EXTRACTED_NONE
)
1102 if (eExt
== EXTRACTED_LSC
|| eExt
== EXTRACTED_LV
)
1104 // Rebuild bcp47 with proper casing of tags.
1105 OUStringBuffer
aBuf( aLanguage
.getLength() + 1 + aScript
.getLength() +
1106 1 + aCountry
.getLength() + 1 + aVariants
.getLength());
1107 aBuf
.append( aLanguage
);
1108 if (!aScript
.isEmpty())
1109 aBuf
.append("-").append(aScript
);
1110 if (!aCountry
.isEmpty())
1111 aBuf
.append("-").append(aCountry
);
1112 if (!aVariants
.isEmpty())
1113 aBuf
.append("-").append(aVariants
);
1114 OUString
aStr( aBuf
.makeStringAndClear());
1116 if (maBcp47
!= aStr
)
1122 if (eExt
== EXTRACTED_LSC
&& aScript
.isEmpty())
1124 maLocale
.Language
= aLanguage
;
1125 maLocale
.Country
= aCountry
;
1127 else if (eExt
== EXTRACTED_C_LOCALE
)
1129 maLocale
.Language
= aLanguage
;
1130 maLocale
.Country
= aCountry
;
1134 maLocale
.Language
= I18NLANGTAG_QLT
;
1135 maLocale
.Country
= aCountry
;
1136 maLocale
.Variant
= maBcp47
;
1138 bTemporaryLocale
= mbInitializedLocale
= true;
1142 if (mbInitializedLangID
&& !mbInitializedLocale
)
1144 // Do not call getLocale() here because that prefers
1145 // convertBcp47ToLocale() which would end up in recursion via
1148 // Prepare to verify that we have a known locale, not just an
1149 // arbitrary MS-LangID.
1150 convertLangToLocale();
1152 if (mbInitializedLocale
)
1154 if (!mbInitializedLangID
)
1156 if (convertLocaleToLang( false))
1158 if (bTemporaryLocale
|| mnLangID
== LANGUAGE_DONTKNOW
)
1159 bTemporaryLangID
= true;
1161 if (mnLangID
!= LANGUAGE_DONTKNOW
&& mnLangID
!= LANGUAGE_SYSTEM
)
1162 meIsLiblangtagNeeded
= DECISION_NO
; // known locale
1165 const KnownTagSet
& rKnowns
= getKnowns();
1166 if (rKnowns
.find( maBcp47
) != rKnowns
.end())
1167 meIsLiblangtagNeeded
= DECISION_NO
; // known fallback
1169 // We may have an internal override "canonicalization".
1170 lang::Locale
aNew( MsLangId::Conversion::getOverride( maLocale
));
1171 if (!aNew
.Language
.isEmpty() &&
1172 (aNew
.Language
!= maLocale
.Language
||
1173 aNew
.Country
!= maLocale
.Country
||
1174 aNew
.Variant
!= maLocale
.Variant
))
1176 maBcp47
= LanguageTagImpl::convertToBcp47( aNew
);
1178 meIsIsoLocale
= DECISION_DONTKNOW
;
1179 meIsIsoODF
= DECISION_DONTKNOW
;
1180 meIsLiblangtagNeeded
= DECISION_NO
; // known locale
1183 if (bTemporaryLocale
)
1185 mbInitializedLocale
= false;
1186 maLocale
= lang::Locale();
1188 if (bTemporaryLangID
)
1190 mbInitializedLangID
= false;
1191 mnLangID
= LANGUAGE_DONTKNOW
;
1194 if (meIsLiblangtagNeeded
== DECISION_NO
)
1196 meIsValid
= DECISION_YES
; // really, known must be valid ...
1197 return bChanged
; // that's it
1200 meIsLiblangtagNeeded
= DECISION_YES
;
1201 SAL_INFO( "i18nlangtag", "LanguageTagImpl::canonicalize: using liblangtag for '" << maBcp47
<< "'");
1205 theDataRef::get().init();
1206 mpImplLangtag
= lt_tag_new();
1211 if (!lt_tag_parse_disabled
&& lt_tag_parse(mpImplLangtag
, OUStringToOString(maBcp47
, RTL_TEXTENCODING_UTF8
).getStr(), &aError
.p
))
1213 char* pTag
= lt_tag_canonicalize( mpImplLangtag
, &aError
.p
);
1214 SAL_WARN_IF( !pTag
, "i18nlangtag", "LanguageTagImpl::canonicalize: could not canonicalize '" << maBcp47
<< "'");
1217 OUString
aNew( OUString::createFromAscii( pTag
));
1218 // Make the lt_tag_t follow the new string if different, which
1219 // removes default script and such.
1220 if (maBcp47
!= aNew
)
1224 meIsIsoLocale
= DECISION_DONTKNOW
;
1225 meIsIsoODF
= DECISION_DONTKNOW
;
1226 if (!lt_tag_parse( mpImplLangtag
, pTag
, &aError
.p
))
1228 SAL_WARN( "i18nlangtag", "LanguageTagImpl::canonicalize: could not reparse '" << maBcp47
<< "'");
1230 meIsValid
= DECISION_NO
;
1235 meIsValid
= DECISION_YES
;
1241 SAL_INFO( "i18nlangtag", "LanguageTagImpl::canonicalize: could not parse '" << maBcp47
<< "'");
1243 meIsValid
= DECISION_NO
;
1248 bool LanguageTagImpl::synCanonicalize()
1250 bool bChanged
= false;
1251 if (meIsLiblangtagNeeded
!= DECISION_NO
&& !mpImplLangtag
)
1253 bChanged
= canonicalize();
1256 if (mbInitializedLocale
)
1257 convertBcp47ToLocale();
1258 if (mbInitializedLangID
)
1259 convertBcp47ToLang();
1266 void LanguageTag::syncFromImpl()
1268 LanguageTagImpl
* pImpl
= getImpl();
1269 bool bRegister
= ((mbInitializedBcp47
&& maBcp47
!= pImpl
->maBcp47
) ||
1270 (mbInitializedLangID
&& mnLangID
!= pImpl
->mnLangID
));
1271 SAL_INFO_IF( bRegister
, "i18nlangtag",
1272 "LanguageTag::syncFromImpl: re-registering, '" << pImpl
->maBcp47
<< "' vs '" << maBcp47
<<
1273 " and 0x" << ::std::hex
<< pImpl
->mnLangID
<< " vs 0x" << ::std::hex
<< mnLangID
);
1274 syncVarsFromRawImpl();
1276 mpImpl
= registerImpl();
1280 void LanguageTag::syncVarsFromImpl() const
1283 getImpl(); // with side effect syncVarsFromRawImpl()
1285 syncVarsFromRawImpl();
1289 void LanguageTag::syncVarsFromRawImpl() const
1291 // Do not use getImpl() here.
1292 LanguageTagImpl
* pImpl
= mpImpl
.get();
1296 // Obviously only mutable variables.
1297 mbInitializedBcp47
= pImpl
->mbInitializedBcp47
;
1298 maBcp47
= pImpl
->maBcp47
;
1299 mbInitializedLocale
= pImpl
->mbInitializedLocale
;
1300 maLocale
= pImpl
->maLocale
;
1301 mbInitializedLangID
= pImpl
->mbInitializedLangID
;
1302 mnLangID
= pImpl
->mnLangID
;
1306 bool LanguageTag::synCanonicalize()
1308 bool bChanged
= getImpl()->synCanonicalize();
1315 void LanguageTagImpl::convertLocaleToBcp47()
1317 if (mbSystemLocale
&& !mbInitializedLocale
)
1318 convertLangToLocale();
1320 if (maLocale
.Language
.isEmpty())
1322 // Do not call LanguageTag::convertToBcp47(Locale) that for an empty
1323 // locale via LanguageTag::convertToBcp47(LanguageType) and
1324 // LanguageTag::convertToLocale(LanguageType) would instantiate another
1326 maLocale
= MsLangId::Conversion::convertLanguageToLocale( LANGUAGE_SYSTEM
);
1328 if (maLocale
.Language
.isEmpty())
1330 maBcp47
.clear(); // bad luck
1332 else if (maLocale
.Language
== I18NLANGTAG_QLT
)
1334 maBcp47
= maLocale
.Variant
;
1335 meIsIsoLocale
= DECISION_NO
;
1339 maBcp47
= LanguageTag::convertToBcp47( maLocale
);
1341 mbInitializedBcp47
= true;
1345 bool LanguageTagImpl::convertLocaleToLang( bool bAllowOnTheFlyID
)
1347 bool bRemapped
= false;
1350 mnLangID
= MsLangId::getRealLanguage( LANGUAGE_SYSTEM
);
1354 mnLangID
= MsLangId::Conversion::convertLocaleToLanguage( maLocale
);
1355 if (mnLangID
== LANGUAGE_DONTKNOW
)
1357 // convertLocaleToLanguage() only searches in ISO and private
1358 // definitions, search in remaining definitions, i.e. for the "C"
1359 // locale and non-standard things like "sr-latin" or "german" to
1360 // resolve to a known locale, skipping ISO lll-CC that were already
1362 mnLangID
= MsLangId::Conversion::convertIsoNamesToLanguage( maLocale
.Language
, maLocale
.Country
, true);
1363 if (mnLangID
!= LANGUAGE_DONTKNOW
)
1365 // If one found, convert back and adapt Locale and Bcp47
1366 // strings so we have a matching entry.
1367 OUString
aOrgBcp47( maBcp47
);
1368 convertLangToLocale();
1369 convertLocaleToBcp47();
1370 bRemapped
= (maBcp47
!= aOrgBcp47
);
1373 if (mnLangID
== LANGUAGE_DONTKNOW
&& bAllowOnTheFlyID
)
1377 // For language-only (including script) look if we know some
1378 // locale of that language and if so try to use the primary
1379 // language ID of that instead of generating an on-the-fly ID.
1380 if (getCountry().isEmpty() && isIsoODF())
1382 lang::Locale
aLoc( MsLangId::Conversion::lookupFallbackLocale( maLocale
));
1383 // 'en-US' is last resort, do not use except when looking
1385 if (aLoc
.Language
!= "en" || getLanguage() == "en")
1387 mnLangID
= MsLangId::Conversion::convertLocaleToLanguage( aLoc
);
1388 if (mnLangID
!= LANGUAGE_DONTKNOW
)
1389 mnLangID
= MsLangId::getPrimaryLanguage( mnLangID
);
1392 registerOnTheFly( mnLangID
);
1396 SAL_WARN( "i18nlangtag", "LanguageTagImpl::convertLocaleToLang: with bAllowOnTheFlyID invalid '"
1401 mbInitializedLangID
= true;
1406 void LanguageTag::convertLocaleToLang()
1408 getImpl()->convertLocaleToLang( true);
1413 void LanguageTagImpl::convertBcp47ToLocale()
1415 bool bIso
= isIsoLocale();
1418 maLocale
.Language
= getLanguageFromLangtag();
1419 maLocale
.Country
= getRegionFromLangtag();
1420 maLocale
.Variant
.clear();
1424 maLocale
.Language
= I18NLANGTAG_QLT
;
1425 maLocale
.Country
= getCountry();
1426 maLocale
.Variant
= maBcp47
;
1428 mbInitializedLocale
= true;
1432 void LanguageTag::convertBcp47ToLocale()
1434 getImpl()->convertBcp47ToLocale();
1439 void LanguageTagImpl::convertBcp47ToLang()
1443 mnLangID
= MsLangId::getRealLanguage( LANGUAGE_SYSTEM
);
1447 if (!mbInitializedLocale
)
1448 convertBcp47ToLocale();
1449 convertLocaleToLang( true);
1451 mbInitializedLangID
= true;
1455 void LanguageTag::convertBcp47ToLang()
1457 getImpl()->convertBcp47ToLang();
1462 void LanguageTagImpl::convertLangToLocale()
1464 if (mbSystemLocale
&& !mbInitializedLangID
)
1466 mnLangID
= MsLangId::getRealLanguage( LANGUAGE_SYSTEM
);
1467 mbInitializedLangID
= true;
1469 // Resolve system here! The original is remembered as mbSystemLocale.
1470 maLocale
= MsLangId::Conversion::convertLanguageToLocale( mnLangID
);
1471 mbInitializedLocale
= true;
1475 void LanguageTag::convertLangToLocale()
1477 getImpl()->convertLangToLocale();
1482 void LanguageTagImpl::convertLangToBcp47()
1484 if (!mbInitializedLocale
)
1485 convertLangToLocale();
1486 convertLocaleToBcp47();
1487 mbInitializedBcp47
= true;
1491 void LanguageTag::convertFromRtlLocale()
1493 // The rtl_Locale follows the Open Group Base Specification,
1494 // 8.2 Internationalization Variables
1495 // language[_territory][.codeset][@modifier]
1496 // On GNU/Linux systems usually being glibc locales.
1497 // sal/osl/unx/nlsupport.c _parse_locale() parses them into
1498 // Language: language 2 or 3 alpha code
1499 // Country: [territory] 2 alpha code
1500 // Variant: [.codeset][@modifier]
1501 // Variant effectively contains anything that follows the territory, not
1502 // looking for '.' dot delimiter or '@' modifier content.
1503 if (!maLocale
.Variant
.isEmpty())
1505 OString aStr
= OUStringToOString( maLocale
.Language
+ "_" + maLocale
.Country
+ maLocale
.Variant
,
1506 RTL_TEXTENCODING_UTF8
);
1507 /* FIXME: let liblangtag parse this entirely with
1508 * lt_tag_convert_from_locale() but that needs a patch to pass the
1512 theDataRef::get().init();
1513 mpImplLangtag
= lt_tag_convert_from_locale( aStr
.getStr(), &aError
.p
);
1514 maBcp47
= OStringToOUString( lt_tag_get_string( mpImplLangtag
), RTL_TEXTENCODING_UTF8
);
1515 mbInitializedBcp47
= true;
1517 mnLangID
= MsLangId::convertUnxByteStringToLanguage( aStr
);
1518 if (mnLangID
== LANGUAGE_DONTKNOW
)
1520 SAL_WARN( "i18nlangtag", "LanguageTag(rtl_Locale) - unknown: " << aStr
);
1521 mnLangID
= LANGUAGE_ENGLISH_US
; // we need _something_ here
1523 mbInitializedLangID
= true;
1525 maLocale
= lang::Locale();
1526 mbInitializedLocale
= false;
1531 const OUString
& LanguageTagImpl::getBcp47() const
1533 if (!mbInitializedBcp47
)
1535 if (mbInitializedLocale
)
1536 const_cast<LanguageTagImpl
*>(this)->convertLocaleToBcp47();
1538 const_cast<LanguageTagImpl
*>(this)->convertLangToBcp47();
1544 const OUString
& LanguageTag::getBcp47( bool bResolveSystem
) const
1546 if (!bResolveSystem
&& mbSystemLocale
)
1547 return theEmptyBcp47::get();
1548 if (!mbInitializedBcp47
)
1550 if (!mbInitializedBcp47
)
1552 getImpl()->getBcp47();
1553 const_cast<LanguageTag
*>(this)->syncFromImpl();
1559 OUString
LanguageTagImpl::getLanguageFromLangtag()
1563 if (maBcp47
.isEmpty())
1567 const lt_lang_t
* pLangT
= lt_tag_get_language( mpImplLangtag
);
1568 SAL_WARN_IF( !pLangT
, "i18nlangtag",
1569 "LanguageTag::getLanguageFromLangtag: pLangT==NULL for '" << maBcp47
<< "'");
1572 const char* pLang
= lt_lang_get_tag( pLangT
);
1573 SAL_WARN_IF( !pLang
, "i18nlangtag",
1574 "LanguageTag::getLanguageFromLangtag: pLang==NULL for '" << maBcp47
<< "'");
1576 aLanguage
= OUString::createFromAscii( pLang
);
1580 if (mbCachedLanguage
|| cacheSimpleLSCV())
1581 aLanguage
= maCachedLanguage
;
1587 OUString
LanguageTagImpl::getScriptFromLangtag()
1591 if (maBcp47
.isEmpty())
1595 const lt_script_t
* pScriptT
= lt_tag_get_script( mpImplLangtag
);
1596 // pScriptT==NULL is valid for default scripts
1599 const char* pScript
= lt_script_get_tag( pScriptT
);
1600 SAL_WARN_IF( !pScript
, "i18nlangtag", "LanguageTag::getScriptFromLangtag: pScript==NULL");
1602 aScript
= OUString::createFromAscii( pScript
);
1606 if (mbCachedScript
|| cacheSimpleLSCV())
1607 aScript
= maCachedScript
;
1613 OUString
LanguageTagImpl::getRegionFromLangtag()
1617 if (maBcp47
.isEmpty())
1621 const lt_region_t
* pRegionT
= lt_tag_get_region( mpImplLangtag
);
1622 // pRegionT==NULL is valid for language only tags, rough check here
1623 // that does not take sophisticated tags into account that actually
1624 // should have a region, check for ll, lll, ll-Ssss and lll-Ssss so
1625 // that ll-CC and lll-CC actually fail.
1626 SAL_WARN_IF( !pRegionT
&&
1627 maBcp47
.getLength() != 2 && maBcp47
.getLength() != 3 &&
1628 maBcp47
.getLength() != 7 && maBcp47
.getLength() != 8,
1629 "i18nlangtag", "LanguageTag::getRegionFromLangtag: pRegionT==NULL for '" << maBcp47
<< "'");
1632 const char* pRegion
= lt_region_get_tag( pRegionT
);
1633 SAL_WARN_IF( !pRegion
, "i18nlangtag",
1634 "LanguageTag::getRegionFromLangtag: pRegion==NULL for'" << maBcp47
<< "'");
1636 aRegion
= OUString::createFromAscii( pRegion
);
1640 if (mbCachedCountry
|| cacheSimpleLSCV())
1641 aRegion
= maCachedCountry
;
1647 OUString
LanguageTagImpl::getVariantsFromLangtag()
1649 OUStringBuffer aVariants
;
1651 if (maBcp47
.isEmpty())
1655 const lt_list_t
* pVariantsT
= lt_tag_get_variants( mpImplLangtag
);
1656 for (const lt_list_t
* pE
= pVariantsT
; pE
; pE
= lt_list_next( pE
))
1658 const lt_variant_t
* pVariantT
= static_cast<const lt_variant_t
*>(lt_list_value( pE
));
1661 const char* p
= lt_variant_get_tag( pVariantT
);
1664 if (!aVariants
.isEmpty())
1665 aVariants
.append("-");
1666 aVariants
.appendAscii(p
);
1673 if (mbCachedVariants
|| cacheSimpleLSCV())
1674 aVariants
= maCachedVariants
;
1676 return aVariants
.makeStringAndClear();
1680 const css::lang::Locale
& LanguageTag::getLocale( bool bResolveSystem
) const
1682 if (!bResolveSystem
&& mbSystemLocale
)
1683 return theEmptyLocale::get();
1684 if (!mbInitializedLocale
)
1686 if (!mbInitializedLocale
)
1688 if (mbInitializedBcp47
)
1689 const_cast<LanguageTag
*>(this)->convertBcp47ToLocale();
1691 const_cast<LanguageTag
*>(this)->convertLangToLocale();
1697 LanguageType
LanguageTag::getLanguageType( bool bResolveSystem
) const
1699 if (!bResolveSystem
&& mbSystemLocale
)
1700 return LANGUAGE_SYSTEM
;
1701 if (!mbInitializedLangID
)
1703 if (!mbInitializedLangID
)
1705 if (mbInitializedBcp47
)
1706 const_cast<LanguageTag
*>(this)->convertBcp47ToLang();
1709 const_cast<LanguageTag
*>(this)->convertLocaleToLang();
1711 /* Resolve a locale only unknown due to some redundant information,
1712 * like 'de-Latn-DE' with script tag. Never call canonicalize()
1713 * from within convert...() methods due to possible recursion, so
1715 if ((!mbSystemLocale
&& mnLangID
== LANGUAGE_SYSTEM
) || mnLangID
== LANGUAGE_DONTKNOW
)
1716 const_cast<LanguageTag
*>(this)->synCanonicalize();
1723 void LanguageTag::getIsoLanguageScriptCountry( OUString
& rLanguage
, OUString
& rScript
, OUString
& rCountry
) const
1725 // Calling isIsoODF() first is a predicate for getLanguage(), getScript()
1726 // and getCountry() to work correctly in this context.
1729 rLanguage
= getLanguage();
1730 rScript
= getScript();
1731 rCountry
= getCountry();
1735 rLanguage
= (LanguageTag::isIsoLanguage( getLanguage()) ? getLanguage() : OUString());
1736 rScript
= (LanguageTag::isIsoScript( getScript()) ? getScript() : OUString());
1737 rCountry
= (LanguageTag::isIsoCountry( getCountry()) ? getCountry() : OUString());
1745 bool isLowerAscii( sal_Unicode c
)
1747 return 'a' <= c
&& c
<= 'z';
1750 bool isUpperAscii( sal_Unicode c
)
1752 return 'A' <= c
&& c
<= 'Z';
1759 bool LanguageTag::isIsoLanguage( const OUString
& rLanguage
)
1761 /* TODO: ignore case? For now let's see where rubbish is used. */
1762 bool b2chars
= rLanguage
.getLength() == 2;
1763 if ((b2chars
|| rLanguage
.getLength() == 3) &&
1764 isLowerAscii( rLanguage
[0]) && isLowerAscii( rLanguage
[1]) &&
1765 (b2chars
|| isLowerAscii( rLanguage
[2])))
1767 SAL_WARN_IF( ((rLanguage
.getLength() == 2 || rLanguage
.getLength() == 3) &&
1768 (isUpperAscii( rLanguage
[0]) || isUpperAscii( rLanguage
[1]))) ||
1769 (rLanguage
.getLength() == 3 && isUpperAscii( rLanguage
[2])), "i18nlangtag",
1770 "LanguageTag::isIsoLanguage: rejecting upper case " << rLanguage
);
1776 bool LanguageTag::isIsoCountry( const OUString
& rRegion
)
1778 /* TODO: ignore case? For now let's see where rubbish is used. */
1779 if (rRegion
.isEmpty() ||
1780 (rRegion
.getLength() == 2 && isUpperAscii( rRegion
[0]) && isUpperAscii( rRegion
[1])))
1782 SAL_WARN_IF( rRegion
.getLength() == 2 && (isLowerAscii( rRegion
[0]) || isLowerAscii( rRegion
[1])),
1783 "i18nlangtag", "LanguageTag::isIsoCountry: rejecting lower case " << rRegion
);
1789 bool LanguageTag::isIsoScript( const OUString
& rScript
)
1791 /* TODO: ignore case? For now let's see where rubbish is used. */
1792 if (rScript
.isEmpty() ||
1793 (rScript
.getLength() == 4 &&
1794 isUpperAscii( rScript
[0]) && isLowerAscii( rScript
[1]) &&
1795 isLowerAscii( rScript
[2]) && isLowerAscii( rScript
[3])))
1797 SAL_WARN_IF( rScript
.getLength() == 4 &&
1798 (isLowerAscii( rScript
[0]) || isUpperAscii( rScript
[1]) ||
1799 isUpperAscii( rScript
[2]) || isUpperAscii( rScript
[3])),
1800 "i18nlangtag", "LanguageTag::isIsoScript: rejecting case mismatch " << rScript
);
1805 OUString
const & LanguageTagImpl::getLanguage() const
1807 if (!mbCachedLanguage
)
1809 maCachedLanguage
= const_cast<LanguageTagImpl
*>(this)->getLanguageFromLangtag();
1810 mbCachedLanguage
= true;
1812 return maCachedLanguage
;
1816 OUString
LanguageTag::getLanguage() const
1818 LanguageTagImpl
const* pImpl
= getImpl();
1819 if (pImpl
->mbCachedLanguage
)
1820 return pImpl
->maCachedLanguage
;
1821 OUString
aRet( pImpl
->getLanguage());
1822 const_cast<LanguageTag
*>(this)->syncFromImpl();
1827 OUString
const & LanguageTagImpl::getScript() const
1829 if (!mbCachedScript
)
1831 maCachedScript
= const_cast<LanguageTagImpl
*>(this)->getScriptFromLangtag();
1832 mbCachedScript
= true;
1834 return maCachedScript
;
1838 OUString
LanguageTag::getScript() const
1840 LanguageTagImpl
const* pImpl
= getImpl();
1841 if (pImpl
->mbCachedScript
)
1842 return pImpl
->maCachedScript
;
1843 OUString
aRet( pImpl
->getScript());
1844 const_cast<LanguageTag
*>(this)->syncFromImpl();
1849 OUString
LanguageTag::getLanguageAndScript() const
1851 OUString
aLanguageScript( getLanguage());
1852 OUString
aScript( getScript());
1853 if (!aScript
.isEmpty())
1855 aLanguageScript
+= "-" + aScript
;
1857 return aLanguageScript
;
1861 OUString
const & LanguageTagImpl::getCountry() const
1863 if (!mbCachedCountry
)
1865 maCachedCountry
= const_cast<LanguageTagImpl
*>(this)->getRegionFromLangtag();
1866 if (!LanguageTag::isIsoCountry( maCachedCountry
))
1867 maCachedCountry
.clear();
1868 mbCachedCountry
= true;
1870 return maCachedCountry
;
1874 OUString
LanguageTag::getCountry() const
1876 LanguageTagImpl
const* pImpl
= getImpl();
1877 if (pImpl
->mbCachedCountry
)
1878 return pImpl
->maCachedCountry
;
1879 OUString
aRet( pImpl
->getCountry());
1880 const_cast<LanguageTag
*>(this)->syncFromImpl();
1885 OUString
LanguageTagImpl::getRegion() const
1887 return const_cast<LanguageTagImpl
*>(this)->getRegionFromLangtag();
1891 OUString
const & LanguageTagImpl::getVariants() const
1893 if (!mbCachedVariants
)
1895 maCachedVariants
= const_cast<LanguageTagImpl
*>(this)->getVariantsFromLangtag();
1896 mbCachedVariants
= true;
1898 return maCachedVariants
;
1902 OUString
LanguageTag::getVariants() const
1904 LanguageTagImpl
const * pImpl
= getImpl();
1905 if (pImpl
->mbCachedVariants
)
1906 return pImpl
->maCachedVariants
;
1907 OUString
aRet( pImpl
->getVariants());
1908 const_cast<LanguageTag
*>(this)->syncFromImpl();
1912 OUString
LanguageTagImpl::getGlibcLocaleString() const
1914 if (mbCachedGlibcString
)
1915 return maCachedGlibcString
;
1919 meIsLiblangtagNeeded
= DECISION_YES
;
1920 const_cast<LanguageTagImpl
*>(this)->synCanonicalize();
1924 char* pLang
= lt_tag_convert_to_locale(mpImplLangtag
, nullptr);
1927 maCachedGlibcString
= OUString::createFromAscii( pLang
);
1928 mbCachedGlibcString
= true;
1932 return maCachedGlibcString
;
1935 OUString
LanguageTag::getGlibcLocaleString( const OUString
& rEncoding
) const
1940 OUString
aCountry( getCountry());
1941 if (aCountry
.isEmpty())
1942 aRet
= getLanguage() + rEncoding
;
1944 aRet
= getLanguage() + "_" + aCountry
+ rEncoding
;
1948 aRet
= getImpl()->getGlibcLocaleString();
1949 sal_Int32 nAt
= aRet
.indexOf('@');
1951 aRet
= aRet
.copy(0, nAt
) + rEncoding
+ aRet
.copy(nAt
);
1958 bool LanguageTagImpl::hasScript() const
1960 if (!mbCachedScript
)
1962 return !maCachedScript
.isEmpty();
1966 bool LanguageTag::hasScript() const
1968 bool bRet
= getImpl()->hasScript();
1969 const_cast<LanguageTag
*>(this)->syncFromImpl();
1974 LanguageTag::ScriptType
LanguageTagImpl::getScriptType() const
1976 return meScriptType
;
1980 LanguageTag::ScriptType
LanguageTag::getScriptType() const
1982 return getImpl()->getScriptType();
1986 void LanguageTagImpl::setScriptType(LanguageTag::ScriptType st
)
1988 if (meScriptType
== LanguageTag::ScriptType::UNKNOWN
) // poor man's clash resolution
1993 void LanguageTag::setScriptType(LanguageTag::ScriptType st
)
1995 getImpl()->setScriptType(st
);
1999 bool LanguageTagImpl::cacheSimpleLSCV()
2001 OUString aLanguage
, aScript
, aCountry
, aVariants
;
2002 Extraction eExt
= simpleExtract( maBcp47
, aLanguage
, aScript
, aCountry
, aVariants
);
2003 bool bRet
= (eExt
== EXTRACTED_LSC
|| eExt
== EXTRACTED_LV
);
2006 maCachedLanguage
= aLanguage
;
2007 maCachedScript
= aScript
;
2008 maCachedCountry
= aCountry
;
2009 maCachedVariants
= aVariants
;
2010 mbCachedLanguage
= mbCachedScript
= mbCachedCountry
= mbCachedVariants
= true;
2016 bool LanguageTagImpl::isIsoLocale() const
2018 if (meIsIsoLocale
== DECISION_DONTKNOW
)
2020 const_cast<LanguageTagImpl
*>(this)->synCanonicalize();
2021 // It must be at most ll-CC or lll-CC
2022 // Do not use getCountry() here, use getRegion() instead.
2023 meIsIsoLocale
= ((maBcp47
.isEmpty() ||
2024 (maBcp47
.getLength() <= 6 && LanguageTag::isIsoLanguage( getLanguage()) &&
2025 LanguageTag::isIsoCountry( getRegion()))) ? DECISION_YES
: DECISION_NO
);
2027 return meIsIsoLocale
== DECISION_YES
;
2031 bool LanguageTag::isIsoLocale() const
2033 bool bRet
= getImpl()->isIsoLocale();
2034 const_cast<LanguageTag
*>(this)->syncFromImpl();
2039 bool LanguageTagImpl::isIsoODF() const
2041 if (meIsIsoODF
== DECISION_DONTKNOW
)
2043 const_cast<LanguageTagImpl
*>(this)->synCanonicalize();
2044 if (!LanguageTag::isIsoScript( getScript()))
2046 meIsIsoODF
= DECISION_NO
;
2049 // The usual case is lll-CC so simply check that first.
2052 meIsIsoODF
= DECISION_YES
;
2055 // If this is not ISO locale for which script must not exist it can
2056 // still be ISO locale plus ISO script lll-Ssss-CC, but not ll-vvvv ...
2058 meIsIsoODF
= ((maBcp47
.getLength() <= 11 && LanguageTag::isIsoLanguage( getLanguage()) &&
2059 LanguageTag::isIsoCountry( getRegion()) && LanguageTag::isIsoScript( getScript()) &&
2060 getVariants().isEmpty()) ? DECISION_YES
: DECISION_NO
);
2062 return meIsIsoODF
== DECISION_YES
;
2066 bool LanguageTag::isIsoODF() const
2068 bool bRet
= getImpl()->isIsoODF();
2069 const_cast<LanguageTag
*>(this)->syncFromImpl();
2074 bool LanguageTagImpl::isValidBcp47() const
2076 if (meIsValid
== DECISION_DONTKNOW
)
2078 const_cast<LanguageTagImpl
*>(this)->synCanonicalize();
2079 SAL_WARN_IF( meIsValid
== DECISION_DONTKNOW
, "i18nlangtag",
2080 "LanguageTag::isValidBcp47: canonicalize() didn't set meIsValid");
2082 return meIsValid
== DECISION_YES
;
2086 bool LanguageTag::isValidBcp47() const
2088 bool bRet
= getImpl()->isValidBcp47();
2089 const_cast<LanguageTag
*>(this)->syncFromImpl();
2094 LanguageTag
& LanguageTag::makeFallback()
2098 const lang::Locale
& rLocale1
= getLocale();
2099 lang::Locale
aLocale2( MsLangId::Conversion::lookupFallbackLocale( rLocale1
));
2100 if ( rLocale1
.Language
!= aLocale2
.Language
||
2101 rLocale1
.Country
!= aLocale2
.Country
||
2102 rLocale1
.Variant
!= aLocale2
.Variant
)
2104 if (rLocale1
.Language
!= "en" && aLocale2
.Language
== "en" && aLocale2
.Country
== "US")
2106 // "en-US" is the last resort fallback, try if we get a better
2107 // one for the fallback hierarchy of a non-"en" locale.
2108 ::std::vector
< OUString
> aFallbacks( getFallbackStrings( false));
2109 for (auto const& fallback
: aFallbacks
)
2111 lang::Locale
aLocale3( LanguageTag(fallback
).getLocale());
2112 aLocale2
= MsLangId::Conversion::lookupFallbackLocale( aLocale3
);
2113 if (aLocale2
.Language
!= "en" || aLocale2
.Country
!= "US")
2114 break; // for, success
2117 SAL_INFO( "i18nlangtag", "LanguageTag::makeFallback - for (" <<
2118 rLocale1
.Language
<< "," << rLocale1
.Country
<< "," << rLocale1
.Variant
<< ") to (" <<
2119 aLocale2
.Language
<< "," << aLocale2
.Country
<< "," << aLocale2
.Variant
<< ")");
2122 mbIsFallback
= true;
2128 /* TODO: maybe this now could take advantage of the mnOverride field in
2129 * isolang.cxx entries and search for kSAME instead of hardcoded special
2130 * fallbacks. Though iterating through those tables would be slower and even
2131 * then there would be some special cases, but we wouldn't lack entries that
2132 * were missed out. */
2133 ::std::vector
< OUString
> LanguageTag::getFallbackStrings( bool bIncludeFullBcp47
) const
2135 ::std::vector
< OUString
> aVec
;
2136 OUString
aLanguage( getLanguage());
2137 OUString
aCountry( getCountry());
2140 if (!aCountry
.isEmpty())
2142 if (bIncludeFullBcp47
)
2143 aVec
.emplace_back(aLanguage
+ "-" + aCountry
);
2144 if (aLanguage
== "zh")
2146 // For zh-HK or zh-MO also list zh-TW, for all other zh-XX also
2148 if (aCountry
== "HK" || aCountry
== "MO")
2149 aVec
.emplace_back(aLanguage
+ "-TW");
2150 else if (aCountry
!= "CN")
2151 aVec
.emplace_back(aLanguage
+ "-CN");
2152 aVec
.push_back( aLanguage
);
2154 else if (aLanguage
== "sh")
2156 // Manual list instead of calling
2157 // LanguageTag( "sr-Latn-" + aCountry).getFallbackStrings( true)
2158 // that would also include "sh-*" again.
2159 aVec
.emplace_back("sr-Latn-" + aCountry
);
2160 aVec
.emplace_back("sr-Latn");
2161 aVec
.emplace_back("sh"); // legacy with script, before default script with country
2162 aVec
.emplace_back("sr-" + aCountry
);
2163 aVec
.emplace_back("sr");
2165 else if (aLanguage
== "ca" && aCountry
== "XV")
2167 ::std::vector
< OUString
> aRep( LanguageTag( "ca-ES-valencia").getFallbackStrings( true));
2168 aVec
.insert( aVec
.end(), aRep
.begin(), aRep
.end());
2169 // Already includes 'ca' language fallback.
2171 else if (aLanguage
== "ku")
2173 if (aCountry
== "TR" || aCountry
== "SY")
2175 aVec
.emplace_back("kmr-Latn-" + aCountry
);
2176 aVec
.emplace_back("kmr-" + aCountry
);
2177 aVec
.emplace_back("kmr-Latn");
2178 aVec
.emplace_back("kmr");
2179 aVec
.push_back( aLanguage
);
2181 else if (aCountry
== "IQ" || aCountry
== "IR")
2183 aVec
.emplace_back("ckb-" + aCountry
);
2184 aVec
.emplace_back("ckb");
2187 else if (aLanguage
== "kmr" && (aCountry
== "TR" || aCountry
== "SY"))
2189 aVec
.emplace_back("ku-Latn-" + aCountry
);
2190 aVec
.emplace_back("ku-" + aCountry
);
2191 aVec
.push_back( aLanguage
);
2192 aVec
.emplace_back("ku");
2194 else if (aLanguage
== "ckb" && (aCountry
== "IQ" || aCountry
== "IR"))
2196 aVec
.emplace_back("ku-Arab-" + aCountry
);
2197 aVec
.emplace_back("ku-" + aCountry
);
2198 aVec
.push_back( aLanguage
);
2199 // not 'ku' only, that was used for Latin script
2202 aVec
.push_back( aLanguage
);
2206 if (bIncludeFullBcp47
)
2207 aVec
.push_back( aLanguage
);
2208 if (aLanguage
== "sh")
2210 aVec
.emplace_back("sr-Latn");
2211 aVec
.emplace_back("sr");
2213 else if (aLanguage
== "pli")
2215 // a special case for Pali dictionary, see fdo#41599
2216 aVec
.emplace_back("pi-Latn");
2217 aVec
.emplace_back("pi");
2223 getBcp47(); // have maBcp47 now
2224 if (bIncludeFullBcp47
)
2225 aVec
.push_back( maBcp47
);
2227 // Special cases for deprecated tags and their replacements, include both
2228 // in fallbacks in a sensible order.
2229 /* TODO: could such things be generalized and automated with liblangtag? */
2230 if (maBcp47
== "en-GB-oed")
2231 aVec
.emplace_back("en-GB-oxendict");
2232 else if (maBcp47
== "en-GB-oxendict")
2233 aVec
.emplace_back("en-GB-oed");
2236 OUString
aVariants( getVariants());
2240 aScript
= getScript();
2241 bool bHaveLanguageScriptVariant
= false;
2242 if (!aCountry
.isEmpty())
2244 if (!aVariants
.isEmpty())
2246 aTmp
= aLanguage
+ "-" + aScript
+ "-" + aCountry
+ "-" + aVariants
;
2247 if (aTmp
!= maBcp47
)
2248 aVec
.push_back( aTmp
);
2249 // Language with variant but without country before language
2250 // without variant but with country.
2251 aTmp
= aLanguage
+ "-" + aScript
+ "-" + aVariants
;
2252 if (aTmp
!= maBcp47
)
2253 aVec
.push_back( aTmp
);
2254 bHaveLanguageScriptVariant
= true;
2256 aTmp
= aLanguage
+ "-" + aScript
+ "-" + aCountry
;
2257 if (aTmp
!= maBcp47
)
2258 aVec
.push_back( aTmp
);
2259 if (aLanguage
== "sr" && aScript
== "Latn")
2261 // sr-Latn-CS => sr-Latn-YU, sh-CS, sh-YU
2262 if (aCountry
== "CS")
2264 aVec
.emplace_back("sr-Latn-YU");
2265 aVec
.emplace_back("sh-CS");
2266 aVec
.emplace_back("sh-YU");
2269 aVec
.emplace_back("sh-" + aCountry
);
2271 else if (aLanguage
== "pi" && aScript
== "Latn")
2272 aVec
.emplace_back("pli"); // a special case for Pali dictionary, see fdo#41599
2273 else if (aLanguage
== "krm" && aScript
== "Latn" && (aCountry
== "TR" || aCountry
== "SY"))
2274 aVec
.emplace_back("ku-" + aCountry
);
2276 if (!aVariants
.isEmpty() && !bHaveLanguageScriptVariant
)
2278 aTmp
= aLanguage
+ "-" + aScript
+ "-" + aVariants
;
2279 if (aTmp
!= maBcp47
)
2280 aVec
.push_back( aTmp
);
2282 aTmp
= aLanguage
+ "-" + aScript
;
2283 if (aTmp
!= maBcp47
)
2284 aVec
.push_back( aTmp
);
2286 // 'sh' actually denoted a script, so have it here instead of appended
2287 // at the end as language-only.
2288 if (aLanguage
== "sr" && aScript
== "Latn")
2289 aVec
.emplace_back("sh");
2290 else if (aLanguage
== "ku" && aScript
== "Arab")
2291 aVec
.emplace_back("ckb");
2292 // 'ku' only denoted Latin script
2293 else if (aLanguage
== "krm" && aScript
== "Latn" && aCountry
.isEmpty())
2294 aVec
.emplace_back("ku");
2296 bool bHaveLanguageVariant
= false;
2297 if (!aCountry
.isEmpty())
2299 if (!aVariants
.isEmpty())
2301 aTmp
= aLanguage
+ "-" + aCountry
+ "-" + aVariants
;
2302 if (aTmp
!= maBcp47
)
2303 aVec
.push_back( aTmp
);
2304 if (maBcp47
== "ca-ES-valencia")
2305 aVec
.emplace_back("ca-XV");
2306 // Language with variant but without country before language
2307 // without variant but with country.
2308 // But only if variant is not from a grandfathered tag that
2309 // wouldn't match the rules, i.e. "de-1901" is fine but "en-oed" is
2311 if (aVariants
.getLength() >= 5 ||
2312 (aVariants
.getLength() == 4 && '0' <= aVariants
[0] && aVariants
[0] <= '9'))
2314 aTmp
= aLanguage
+ "-" + aVariants
;
2315 if (aTmp
!= maBcp47
)
2316 aVec
.push_back( aTmp
);
2317 bHaveLanguageVariant
= true;
2320 aTmp
= aLanguage
+ "-" + aCountry
;
2321 if (aTmp
!= maBcp47
)
2322 aVec
.push_back( aTmp
);
2324 if (!aVariants
.isEmpty() && !bHaveLanguageVariant
)
2326 // Only if variant is not from a grandfathered tag that wouldn't match
2327 // the rules, i.e. "de-1901" is fine but "en-oed" is not.
2328 if (aVariants
.getLength() >= 5 ||
2329 (aVariants
.getLength() == 4 && '0' <= aVariants
[0] && aVariants
[0] <= '9'))
2331 aTmp
= aLanguage
+ "-" + aVariants
;
2332 if (aTmp
!= maBcp47
)
2333 aVec
.push_back( aTmp
);
2337 // Insert legacy fallbacks with country before language-only, but only
2338 // default script, script was handled already above.
2339 if (!aCountry
.isEmpty())
2341 if (aLanguage
== "sr" && aCountry
== "CS")
2342 aVec
.emplace_back("sr-YU");
2345 // Original language-only.
2346 if (aLanguage
!= maBcp47
)
2347 aVec
.push_back( aLanguage
);
2353 OUString
LanguageTag::getBcp47MS() const
2355 if (getLanguageType() == LANGUAGE_SPANISH_DATED
)
2356 return "es-ES_tradnl";
2361 bool LanguageTag::equals( const LanguageTag
& rLanguageTag
) const
2363 // If SYSTEM is not to be resolved or either both are SYSTEM or none, we
2364 // can use the operator==() optimization.
2365 if (isSystemLocale() == rLanguageTag
.isSystemLocale())
2366 return operator==( rLanguageTag
);
2368 // Compare full language tag strings.
2369 return getBcp47() == rLanguageTag
.getBcp47();
2373 bool LanguageTag::operator==( const LanguageTag
& rLanguageTag
) const
2375 if (isSystemLocale() && rLanguageTag
.isSystemLocale())
2376 return true; // both SYSTEM
2378 // No need to convert to BCP47 if both Lang-IDs are available.
2379 if (mbInitializedLangID
&& rLanguageTag
.mbInitializedLangID
)
2381 // Equal if same ID and no SYSTEM is involved or both are SYSTEM.
2382 return mnLangID
== rLanguageTag
.mnLangID
&& isSystemLocale() == rLanguageTag
.isSystemLocale();
2385 // Compare full language tag strings but SYSTEM unresolved.
2386 return getBcp47( false) == rLanguageTag
.getBcp47( false);
2390 bool LanguageTag::operator!=( const LanguageTag
& rLanguageTag
) const
2392 return !operator==( rLanguageTag
);
2396 bool LanguageTag::operator<( const LanguageTag
& rLanguageTag
) const
2398 return getBcp47( false).compareToIgnoreAsciiCase( rLanguageTag
.getBcp47( false)) < 0;
2403 LanguageTagImpl::Extraction
LanguageTagImpl::simpleExtract( const OUString
& rBcp47
,
2404 OUString
& rLanguage
, OUString
& rScript
, OUString
& rCountry
, OUString
& rVariants
)
2406 Extraction eRet
= EXTRACTED_NONE
;
2407 const sal_Int32 nLen
= rBcp47
.getLength();
2408 const sal_Int32 nHyph1
= rBcp47
.indexOf( '-');
2409 sal_Int32 nHyph2
= (nHyph1
< 0 ? -1 : rBcp47
.indexOf( '-', nHyph1
+ 1));
2410 sal_Int32 nHyph3
= (nHyph2
< 0 ? -1 : rBcp47
.indexOf( '-', nHyph2
+ 1));
2411 sal_Int32 nHyph4
= (nHyph3
< 0 ? -1 : rBcp47
.indexOf( '-', nHyph3
+ 1));
2412 if (nLen
== 1 && rBcp47
[0] == '*') // * the dreaded jolly joker
2414 // It's f*d up but we need to recognize this.
2415 eRet
= EXTRACTED_X_JOKER
;
2417 else if (nHyph1
== 1 && rBcp47
[0] == 'x') // x-... privateuse
2419 // x-... privateuse tags MUST be known to us by definition.
2422 else if (nLen
== 1 && rBcp47
[0] == 'C') // the 'C' locale
2424 eRet
= EXTRACTED_C_LOCALE
;
2430 else if (nLen
== 2 || nLen
== 3) // ll or lll
2434 rLanguage
= rBcp47
.toAsciiLowerCase();
2438 eRet
= EXTRACTED_LSC
;
2441 else if ( (nHyph1
== 2 && nLen
== 5) // ll-CC
2442 || (nHyph1
== 3 && nLen
== 6)) // lll-CC
2446 rLanguage
= rBcp47
.copy( 0, nHyph1
).toAsciiLowerCase();
2447 rCountry
= rBcp47
.copy( nHyph1
+ 1, 2).toAsciiUpperCase();
2450 eRet
= EXTRACTED_LSC
;
2453 else if ( (nHyph1
== 2 && nLen
== 7) // ll-Ssss or ll-vvvv
2454 || (nHyph1
== 3 && nLen
== 8)) // lll-Ssss or lll-vvvv
2458 sal_Unicode c
= rBcp47
[nHyph1
+1];
2459 if ('0' <= c
&& c
<= '9')
2461 // (DIGIT 3ALNUM) vvvv variant instead of Ssss script
2462 rLanguage
= rBcp47
.copy( 0, nHyph1
).toAsciiLowerCase();
2465 rVariants
= rBcp47
.copy( nHyph1
+ 1);
2466 eRet
= EXTRACTED_LV
;
2470 rLanguage
= rBcp47
.copy( 0, nHyph1
).toAsciiLowerCase();
2471 rScript
= rBcp47
.copy( nHyph1
+ 1, 1).toAsciiUpperCase() +
2472 rBcp47
.copy( nHyph1
+ 2, 3).toAsciiLowerCase();
2475 eRet
= EXTRACTED_LSC
;
2479 else if ( (nHyph1
== 2 && nHyph2
== 7 && nLen
== 10) // ll-Ssss-CC
2480 || (nHyph1
== 3 && nHyph2
== 8 && nLen
== 11)) // lll-Ssss-CC
2484 rLanguage
= rBcp47
.copy( 0, nHyph1
).toAsciiLowerCase();
2485 rScript
= rBcp47
.copy( nHyph1
+ 1, 1).toAsciiUpperCase() + rBcp47
.copy( nHyph1
+ 2, 3).toAsciiLowerCase();
2486 rCountry
= rBcp47
.copy( nHyph2
+ 1, 2).toAsciiUpperCase();
2488 eRet
= EXTRACTED_LSC
;
2491 else if ( (nHyph1
== 2 && nHyph2
== 7 && nHyph3
== 10 && nLen
>= 15) // ll-Ssss-CC-vvvv[vvvv][-...]
2492 || (nHyph1
== 3 && nHyph2
== 8 && nHyph3
== 11 && nLen
>= 16)) // lll-Ssss-CC-vvvv[vvvv][-...]
2495 nHyph4
= rBcp47
.getLength();
2496 if (nHyph4
- nHyph3
> 4 && nHyph4
- nHyph3
<= 9)
2498 rLanguage
= rBcp47
.copy( 0, nHyph1
).toAsciiLowerCase();
2499 rScript
= rBcp47
.copy( nHyph1
+ 1, 1).toAsciiUpperCase() + rBcp47
.copy( nHyph1
+ 2, 3).toAsciiLowerCase();
2500 rCountry
= rBcp47
.copy( nHyph2
+ 1, 2).toAsciiUpperCase();
2501 rVariants
= rBcp47
.copy( nHyph3
+ 1);
2502 eRet
= EXTRACTED_LV
;
2505 else if ( (nHyph1
== 2 && nHyph2
== 5 && nHyph3
== 7) // ll-CC-u-...
2506 || (nHyph1
== 3 && nHyph2
== 6 && nHyph3
== 8)) // lll-CC-u-...
2508 if (rBcp47
[nHyph3
-1] == 'u')
2510 // Need to recognize as known, otherwise getLanguage() and
2511 // getCountry() return empty string because mpImplLangtag is not
2512 // used with a known mapping.
2513 /* TODO: if there were more this would get ugly and needed some
2514 * table driven approach via isolang.cxx instead. */
2515 if (rBcp47
.equalsIgnoreAsciiCase( "es-ES-u-co-trad"))
2520 rVariants
= "u-co-trad"; // not strictly a variant, but used to reconstruct the tag.
2521 eRet
= EXTRACTED_LV
;
2525 else if ( (nHyph1
== 2 && nHyph2
== 5 && nLen
>= 10) // ll-CC-vvvv[vvvv][-...]
2526 || (nHyph1
== 3 && nHyph2
== 6 && nLen
>= 11)) // lll-CC-vvvv[vvvv][-...]
2529 nHyph3
= rBcp47
.getLength();
2530 if (nHyph3
- nHyph2
> 4 && nHyph3
- nHyph2
<= 9)
2532 rLanguage
= rBcp47
.copy( 0, nHyph1
).toAsciiLowerCase();
2534 rCountry
= rBcp47
.copy( nHyph1
+ 1, 2).toAsciiUpperCase();
2535 rVariants
= rBcp47
.copy( nHyph2
+ 1);
2536 eRet
= EXTRACTED_LV
;
2539 else if ( (nHyph1
== 2 && nLen
>= 8) // ll-vvvvv[vvv][-...]
2540 || (nHyph1
== 3 && nLen
>= 9)) // lll-vvvvv[vvv][-...]
2543 nHyph2
= rBcp47
.getLength();
2544 if (nHyph2
- nHyph1
> 5 && nHyph2
- nHyph1
<= 9)
2546 rLanguage
= rBcp47
.copy( 0, nHyph1
).toAsciiLowerCase();
2549 rVariants
= rBcp47
.copy( nHyph1
+ 1);
2550 eRet
= EXTRACTED_LV
;
2554 // Known and handled grandfathered; ugly but effective ...
2555 // Note that nLen must have matched above.
2556 // Strictly not a variant, but so far we treat it as such.
2557 if (rBcp47
.equalsIgnoreAsciiCase( "en-GB-oed"))
2563 eRet
= EXTRACTED_LV
;
2565 // Other known and handled odd cases.
2566 else if (rBcp47
.equalsIgnoreAsciiCase( "es-ES_tradnl"))
2568 // Will get overridden, but needs to be recognized as known.
2572 rVariants
= "tradnl"; // this is nonsense, but... ignored.
2573 eRet
= EXTRACTED_KNOWN_BAD
;
2577 if (eRet
== EXTRACTED_NONE
)
2579 SAL_INFO( "i18nlangtag", "LanguageTagImpl::simpleExtract: did not extract '" << rBcp47
<< "'");
2590 ::std::vector
< OUString
>::const_iterator
LanguageTag::getFallback(
2591 const ::std::vector
< OUString
> & rList
, const OUString
& rReference
)
2596 // Try the simple case first without constructing fallbacks.
2597 ::std::vector
< OUString
>::const_iterator it
= std::find(rList
.begin(), rList
.end(), rReference
);
2598 if (it
!= rList
.end())
2599 return it
; // exact match
2601 ::std::vector
< OUString
> aFallbacks( LanguageTag( rReference
).getFallbackStrings( false));
2602 if (rReference
!= "en-US")
2604 aFallbacks
.emplace_back("en-US");
2605 if (rReference
!= "en")
2606 aFallbacks
.emplace_back("en");
2608 if (rReference
!= "x-default")
2609 aFallbacks
.emplace_back("x-default");
2610 if (rReference
!= "x-no-translate")
2611 aFallbacks
.emplace_back("x-no-translate");
2612 /* TODO: the original comphelper::Locale::getFallback() code had
2613 * "x-notranslate" instead of "x-no-translate", but all .xcu files use
2614 * "x-no-translate" and "x-notranslate" apparently was never used anywhere.
2615 * Did that ever work? Was it supposed to work at all like this? */
2617 for (const auto& fb
: aFallbacks
)
2619 it
= std::find(rList
.begin(), rList
.end(), fb
);
2620 if (it
!= rList
.end())
2621 return it
; // fallback found
2624 // Did not find anything so return something of the list, the first value
2625 // will do as well as any other as none did match any of the possible
2627 return rList
.begin();
2632 ::std::vector
< css::lang::Locale
>::const_iterator
LanguageTag::getMatchingFallback(
2633 const ::std::vector
< css::lang::Locale
> & rList
,
2634 const css::lang::Locale
& rReference
)
2639 // Try the simple case first without constructing fallbacks.
2640 ::std::vector
< lang::Locale
>::const_iterator it
= std::find_if(rList
.begin(), rList
.end(),
2641 [&rReference
](const lang::Locale
& rLocale
) {
2642 return rLocale
.Language
== rReference
.Language
2643 && rLocale
.Country
== rReference
.Country
2644 && rLocale
.Variant
== rReference
.Variant
; });
2645 if (it
!= rList
.end())
2646 return it
; // exact match
2648 // Now for each reference fallback test the fallbacks of the list in order.
2649 ::std::vector
< OUString
> aFallbacks( LanguageTag( rReference
).getFallbackStrings( false));
2650 ::std::vector
< ::std::vector
< OUString
> > aListFallbacks( rList
.size());
2652 for (auto const& elem
: rList
)
2654 ::std::vector
< OUString
> aTmp( LanguageTag(elem
).getFallbackStrings( true));
2655 aListFallbacks
[i
++] = aTmp
;
2657 for (auto const& rfb
: aFallbacks
)
2660 for (auto const& lfb
: aListFallbacks
)
2662 for (auto const& fb
: lfb
)
2665 return rList
.begin() + nPosFb
;
2676 static bool lcl_isSystem( LanguageType nLangID
)
2678 if (nLangID
== LANGUAGE_SYSTEM
)
2680 // There are some special values that simplify to SYSTEM,
2681 // getRealLanguage() catches and resolves them.
2682 LanguageType nNewLangID
= MsLangId::getRealLanguage( nLangID
);
2683 return nNewLangID
!= nLangID
;
2688 css::lang::Locale
LanguageTag::convertToLocale( LanguageType nLangID
, bool bResolveSystem
)
2690 if (!bResolveSystem
&& lcl_isSystem( nLangID
))
2691 return lang::Locale();
2693 return LanguageTag( nLangID
).getLocale( bResolveSystem
);
2698 LanguageType
LanguageTag::convertToLanguageType( const css::lang::Locale
& rLocale
, bool bResolveSystem
)
2700 if (rLocale
.Language
.isEmpty() && !bResolveSystem
)
2701 return LANGUAGE_SYSTEM
;
2703 return LanguageTag( rLocale
).getLanguageType( bResolveSystem
);
2708 OUString
LanguageTagImpl::convertToBcp47( const css::lang::Locale
& rLocale
)
2711 if (rLocale
.Language
.isEmpty())
2713 // aBcp47 stays empty
2715 else if (rLocale
.Language
== I18NLANGTAG_QLT
)
2717 aBcp47
= rLocale
.Variant
;
2721 /* XXX NOTE: most legacy code never evaluated the Variant field, so for
2722 * now just concatenate language and country. In case we stumbled over
2723 * variant aware code we'd have to take care of that. */
2724 if (rLocale
.Country
.isEmpty())
2725 aBcp47
= rLocale
.Language
;
2728 aBcp47
= rLocale
.Language
+ "-" + rLocale
.Country
;
2736 OUString
LanguageTag::convertToBcp47( const css::lang::Locale
& rLocale
, bool bResolveSystem
)
2739 if (rLocale
.Language
.isEmpty())
2742 aBcp47
= LanguageTag::convertToBcp47( LANGUAGE_SYSTEM
);
2743 // else aBcp47 stays empty
2747 aBcp47
= LanguageTagImpl::convertToBcp47( rLocale
);
2754 OUString
LanguageTag::convertToBcp47( LanguageType nLangID
)
2756 lang::Locale
aLocale( LanguageTag::convertToLocale( nLangID
));
2757 // If system for some reason (should not happen... haha) could not be
2758 // resolved DO NOT CALL LanguageTag::convertToBcp47(Locale) because that
2759 // would recurse into this method here!
2760 if (aLocale
.Language
.isEmpty())
2761 return OUString(); // bad luck, bail out
2762 return LanguageTagImpl::convertToBcp47( aLocale
);
2767 css::lang::Locale
LanguageTag::convertToLocale( const OUString
& rBcp47
, bool bResolveSystem
)
2769 if (rBcp47
.isEmpty() && !bResolveSystem
)
2770 return lang::Locale();
2772 return LanguageTag( rBcp47
).getLocale( bResolveSystem
);
2777 LanguageType
LanguageTag::convertToLanguageType( const OUString
& rBcp47
)
2779 return LanguageTag( rBcp47
).getLanguageType();
2784 LanguageType
LanguageTag::convertToLanguageTypeWithFallback( const OUString
& rBcp47
)
2786 return LanguageTag( rBcp47
).makeFallback().getLanguageType();
2791 css::lang::Locale
LanguageTag::convertToLocaleWithFallback( const OUString
& rBcp47
)
2793 return LanguageTag( rBcp47
).makeFallback().getLocale();
2798 bool LanguageTag::isValidBcp47( const OUString
& rString
, OUString
* o_pCanonicalized
, bool bDisallowPrivate
)
2800 bool bValid
= false;
2804 lt_tag_t
* mpLangtag
;
2807 theDataRef::get().init();
2808 mpLangtag
= lt_tag_new();
2812 lt_tag_unref( mpLangtag
);
2818 if (!lt_tag_parse_disabled
&& lt_tag_parse(aVar
.mpLangtag
, OUStringToOString(rString
, RTL_TEXTENCODING_UTF8
).getStr(), &aError
.p
))
2820 char* pTag
= lt_tag_canonicalize( aVar
.mpLangtag
, &aError
.p
);
2821 SAL_WARN_IF( !pTag
, "i18nlangtag", "LanguageTag:isValidBcp47: could not canonicalize '" << rString
<< "'");
2825 if (bDisallowPrivate
)
2827 const lt_string_t
* pPrivate
= lt_tag_get_privateuse( aVar
.mpLangtag
);
2828 if (pPrivate
&& lt_string_length( pPrivate
) > 0)
2832 const lt_lang_t
* pLangT
= lt_tag_get_language( aVar
.mpLangtag
);
2835 const char* pLang
= lt_lang_get_tag( pLangT
);
2836 if (pLang
&& strcmp( pLang
, I18NLANGTAG_QLT
) == 0)
2838 // Disallow 'qlt' privateuse code to prevent
2839 // confusion with our internal usage.
2845 if (o_pCanonicalized
)
2846 *o_pCanonicalized
= OUString::createFromAscii( pTag
);
2853 SAL_INFO( "i18nlangtag", "LanguageTag:isValidBcp47: could not parse '" << rString
<< "'");
2858 LanguageTag
makeLanguageTagFromAppleLanguageId(AppleLanguageId nLanguage
)
2860 //map the simple ones via LanguageTypes, and the hard ones explicitly
2861 LanguageType
nLang(LANGUAGE_DONTKNOW
);
2865 case AppleLanguageId::ENGLISH
:
2866 nLang
= LANGUAGE_ENGLISH
;
2868 case AppleLanguageId::FRENCH
:
2869 nLang
= LANGUAGE_FRENCH
;
2871 case AppleLanguageId::GERMAN
:
2872 nLang
= LANGUAGE_GERMAN
;
2874 case AppleLanguageId::ITALIAN
:
2875 nLang
= LANGUAGE_ITALIAN
;
2877 case AppleLanguageId::DUTCH
:
2878 nLang
= LANGUAGE_DUTCH
;
2880 case AppleLanguageId::SWEDISH
:
2881 nLang
= LANGUAGE_SWEDISH
;
2883 case AppleLanguageId::SPANISH
:
2884 nLang
= LANGUAGE_SPANISH
;
2886 case AppleLanguageId::DANISH
:
2887 nLang
= LANGUAGE_DANISH
;
2889 case AppleLanguageId::PORTUGUESE
:
2890 nLang
= LANGUAGE_PORTUGUESE
;
2892 case AppleLanguageId::NORWEGIAN
:
2893 nLang
= LANGUAGE_NORWEGIAN
;
2895 case AppleLanguageId::HEBREW
:
2896 nLang
= LANGUAGE_HEBREW
;
2898 case AppleLanguageId::JAPANESE
:
2899 nLang
= LANGUAGE_JAPANESE
;
2901 case AppleLanguageId::ARABIC
:
2902 nLang
= LANGUAGE_ARABIC_PRIMARY_ONLY
;
2904 case AppleLanguageId::FINNISH
:
2905 nLang
= LANGUAGE_FINNISH
;
2907 case AppleLanguageId::GREEK
:
2908 nLang
= LANGUAGE_GREEK
;
2910 case AppleLanguageId::ICELANDIC
:
2911 nLang
= LANGUAGE_ICELANDIC
;
2913 case AppleLanguageId::MALTESE
:
2914 nLang
= LANGUAGE_MALTESE
;
2916 case AppleLanguageId::TURKISH
:
2917 nLang
= LANGUAGE_TURKISH
;
2919 case AppleLanguageId::CROATIAN
:
2920 nLang
= LANGUAGE_CROATIAN
;
2922 case AppleLanguageId::CHINESE_TRADITIONAL
:
2923 nLang
= LANGUAGE_CHINESE_TRADITIONAL
;
2925 case AppleLanguageId::URDU
:
2926 nLang
= LANGUAGE_URDU_PAKISTAN
; //probably, otherwise we need a LANGUAGE_URDU_PRIMARY_ONLY
2928 case AppleLanguageId::HINDI
:
2929 nLang
= LANGUAGE_HINDI
;
2931 case AppleLanguageId::THAI
:
2932 nLang
= LANGUAGE_THAI
;
2934 case AppleLanguageId::KOREAN
:
2935 nLang
= LANGUAGE_KOREAN
;
2937 case AppleLanguageId::LITHUANIAN
:
2938 nLang
= LANGUAGE_LITHUANIAN
;
2940 case AppleLanguageId::POLISH
:
2941 nLang
= LANGUAGE_POLISH
;
2943 case AppleLanguageId::HUNGARIAN
:
2944 nLang
= LANGUAGE_HUNGARIAN
;
2946 case AppleLanguageId::ESTONIAN
:
2947 nLang
= LANGUAGE_ESTONIAN
;
2949 case AppleLanguageId::LATVIAN
:
2950 nLang
= LANGUAGE_LATVIAN
;
2952 case AppleLanguageId::SAMI
:
2953 nLang
= LANGUAGE_SAMI_NORTHERN_NORWAY
; //maybe
2955 case AppleLanguageId::FAROESE
:
2956 nLang
= LANGUAGE_FAEROESE
;
2958 case AppleLanguageId::FARSI
:
2959 nLang
= LANGUAGE_FARSI
;
2961 case AppleLanguageId::RUSSIAN
:
2962 nLang
= LANGUAGE_RUSSIAN
;
2964 case AppleLanguageId::CHINESE_SIMPLIFIED
:
2965 nLang
= LANGUAGE_CHINESE_SIMPLIFIED
;
2967 case AppleLanguageId::FLEMISH
:
2968 nLang
= LANGUAGE_DUTCH_BELGIAN
;
2970 case AppleLanguageId::IRISH_GAELIC
:
2971 nLang
= LANGUAGE_GAELIC_IRELAND
;
2973 case AppleLanguageId::ALBANIAN
:
2974 nLang
= LANGUAGE_ALBANIAN
;
2976 case AppleLanguageId::ROMANIAN
:
2977 nLang
= LANGUAGE_ROMANIAN
;
2979 case AppleLanguageId::CZECH
:
2980 nLang
= LANGUAGE_CZECH
;
2982 case AppleLanguageId::SLOVAK
:
2983 nLang
= LANGUAGE_SLOVAK
;
2985 case AppleLanguageId::SLOVENIAN
:
2986 nLang
= LANGUAGE_SLOVENIAN
;
2988 case AppleLanguageId::YIDDISH
:
2989 nLang
= LANGUAGE_YIDDISH
;
2991 case AppleLanguageId::SERBIAN
:
2992 nLang
= LANGUAGE_SERBIAN_CYRILLIC_SERBIA
; //maybe
2994 case AppleLanguageId::MACEDONIAN
:
2995 nLang
= LANGUAGE_MACEDONIAN
;
2997 case AppleLanguageId::BULGARIAN
:
2998 nLang
= LANGUAGE_BULGARIAN
;
3000 case AppleLanguageId::UKRAINIAN
:
3001 nLang
= LANGUAGE_UKRAINIAN
;
3003 case AppleLanguageId::BYELORUSSIAN
:
3004 nLang
= LANGUAGE_BELARUSIAN
;
3006 case AppleLanguageId::UZBEK
:
3007 nLang
= LANGUAGE_UZBEK_CYRILLIC
; //maybe
3009 case AppleLanguageId::KAZAKH
:
3010 nLang
= LANGUAGE_KAZAKH
;
3012 case AppleLanguageId::AZERI_CYRILLIC
:
3013 nLang
= LANGUAGE_AZERI_CYRILLIC
;
3015 case AppleLanguageId::AZERI_ARABIC
:
3016 return LanguageTag("az-Arab");
3018 case AppleLanguageId::ARMENIAN
:
3019 nLang
= LANGUAGE_ARMENIAN
;
3021 case AppleLanguageId::GEORGIAN
:
3022 nLang
= LANGUAGE_GEORGIAN
;
3024 case AppleLanguageId::MOLDAVIAN
:
3025 nLang
= LANGUAGE_ROMANIAN_MOLDOVA
;
3027 case AppleLanguageId::KIRGHIZ
:
3028 nLang
= LANGUAGE_KIRGHIZ
;
3030 case AppleLanguageId::TAJIKI
:
3031 nLang
= LANGUAGE_TAJIK
;
3033 case AppleLanguageId::TURKMEN
:
3034 nLang
= LANGUAGE_TURKMEN
;
3036 case AppleLanguageId::MONGOLIAN_MONGOLIAN
:
3037 nLang
= LANGUAGE_MONGOLIAN_MONGOLIAN_MONGOLIA
;
3039 case AppleLanguageId::MONGOLIAN_CYRILLIC
:
3040 nLang
= LANGUAGE_MONGOLIAN_CYRILLIC_MONGOLIA
;
3042 case AppleLanguageId::PASHTO
:
3043 nLang
= LANGUAGE_PASHTO
;
3045 case AppleLanguageId::KURDISH
:
3046 nLang
= LANGUAGE_USER_KURDISH_TURKEY
; //maybe
3048 case AppleLanguageId::KASHMIRI
:
3049 nLang
= LANGUAGE_KASHMIRI
;
3051 case AppleLanguageId::SINDHI
:
3052 nLang
= LANGUAGE_SINDHI
;
3054 case AppleLanguageId::TIBETAN
:
3055 nLang
= LANGUAGE_TIBETAN
;
3057 case AppleLanguageId::NEPALI
:
3058 nLang
= LANGUAGE_NEPALI
;
3060 case AppleLanguageId::SANSKRIT
:
3061 nLang
= LANGUAGE_SANSKRIT
;
3063 case AppleLanguageId::MARATHI
:
3064 nLang
= LANGUAGE_MARATHI
;
3066 case AppleLanguageId::BENGALI
:
3067 nLang
= LANGUAGE_BENGALI
;
3069 case AppleLanguageId::ASSAMESE
:
3070 nLang
= LANGUAGE_ASSAMESE
;
3072 case AppleLanguageId::GUJARATI
:
3073 nLang
= LANGUAGE_GUJARATI
;
3075 case AppleLanguageId::PUNJABI
:
3076 nLang
= LANGUAGE_PUNJABI
;
3078 case AppleLanguageId::ORIYA
:
3079 nLang
= LANGUAGE_ODIA
;
3081 case AppleLanguageId::MALAYALAM
:
3082 nLang
= LANGUAGE_MALAYALAM
;
3084 case AppleLanguageId::KANNADA
:
3085 nLang
= LANGUAGE_KANNADA
;
3087 case AppleLanguageId::TAMIL
:
3088 nLang
= LANGUAGE_TAMIL
;
3090 case AppleLanguageId::TELUGU
:
3091 nLang
= LANGUAGE_TELUGU
;
3093 case AppleLanguageId::SINHALESE
:
3094 nLang
= LANGUAGE_SINHALESE_SRI_LANKA
;
3096 case AppleLanguageId::BURMESE
:
3097 nLang
= LANGUAGE_BURMESE
;
3099 case AppleLanguageId::KHMER
:
3100 nLang
= LANGUAGE_KHMER
;
3102 case AppleLanguageId::LAO
:
3103 nLang
= LANGUAGE_LAO
;
3105 case AppleLanguageId::VIETNAMESE
:
3106 nLang
= LANGUAGE_VIETNAMESE
;
3108 case AppleLanguageId::INDONESIAN
:
3109 nLang
= LANGUAGE_INDONESIAN
;
3111 case AppleLanguageId::TAGALONG
:
3112 nLang
= LANGUAGE_USER_TAGALOG
;
3114 case AppleLanguageId::MALAY_LATIN
:
3115 nLang
= LANGUAGE_MALAY_MALAYSIA
;
3117 case AppleLanguageId::MALAY_ARABIC
:
3118 nLang
= LANGUAGE_USER_MALAY_ARABIC_MALAYSIA
;
3120 case AppleLanguageId::AMHARIC
:
3121 nLang
= LANGUAGE_AMHARIC_ETHIOPIA
;
3123 case AppleLanguageId::TIGRINYA
:
3124 nLang
= LANGUAGE_TIGRIGNA_ETHIOPIA
;
3126 case AppleLanguageId::GALLA
:
3127 nLang
= LANGUAGE_OROMO
;
3129 case AppleLanguageId::SOMALI
:
3130 nLang
= LANGUAGE_SOMALI
;
3132 case AppleLanguageId::SWAHILI
:
3133 nLang
= LANGUAGE_SWAHILI
;
3135 case AppleLanguageId::KINYARWANDA
:
3136 nLang
= LANGUAGE_KINYARWANDA_RWANDA
;
3138 case AppleLanguageId::RUNDI
:
3139 return LanguageTag("rn");
3141 case AppleLanguageId::NYANJA
:
3142 nLang
= LANGUAGE_USER_NYANJA
;
3144 case AppleLanguageId::MALAGASY
:
3145 nLang
= LANGUAGE_MALAGASY_PLATEAU
;
3147 case AppleLanguageId::ESPERANTO
:
3148 nLang
= LANGUAGE_USER_ESPERANTO
;
3150 case AppleLanguageId::WELSH
:
3151 nLang
= LANGUAGE_WELSH
;
3153 case AppleLanguageId::BASQUE
:
3154 nLang
= LANGUAGE_BASQUE
;
3156 case AppleLanguageId::CATALAN
:
3157 nLang
= LANGUAGE_CATALAN
;
3159 case AppleLanguageId::LATIN
:
3160 nLang
= LANGUAGE_USER_LATIN
;
3162 case AppleLanguageId::QUENCHUA
:
3163 nLang
= LANGUAGE_QUECHUA_BOLIVIA
; //maybe
3165 case AppleLanguageId::GUARANI
:
3166 nLang
= LANGUAGE_GUARANI_PARAGUAY
;
3168 case AppleLanguageId::AYMARA
:
3169 return LanguageTag("ay");
3171 case AppleLanguageId::TATAR
:
3172 nLang
= LANGUAGE_TATAR
;
3174 case AppleLanguageId::UIGHUR
:
3175 nLang
= LANGUAGE_UIGHUR_CHINA
;
3177 case AppleLanguageId::DZONGKHA
:
3178 nLang
= LANGUAGE_DZONGKHA_BHUTAN
;
3180 case AppleLanguageId::JAVANESE_LATIN
:
3181 return LanguageTag("jv-Latn");
3183 case AppleLanguageId::SUNDANESE_LATIN
:
3184 return LanguageTag("su-Latn");
3186 case AppleLanguageId::GALICIAN
:
3187 nLang
= LANGUAGE_GALICIAN
;
3189 case AppleLanguageId::AFRIKAANS
:
3190 nLang
= LANGUAGE_AFRIKAANS
;
3192 case AppleLanguageId::BRETON
:
3193 nLang
= LANGUAGE_BRETON_FRANCE
;
3195 case AppleLanguageId::INUKTITUT
:
3196 nLang
= LANGUAGE_INUKTITUT_LATIN_CANADA
; //probably
3198 case AppleLanguageId::SCOTTISH_GAELIC
:
3199 nLang
= LANGUAGE_GAELIC_SCOTLAND
;
3201 case AppleLanguageId::MANX_GAELIC
:
3202 nLang
= LANGUAGE_USER_MANX
;
3204 case AppleLanguageId::IRISH_GAELIC_WITH_DOT_ABOVE
:
3205 return LanguageTag("ga-Latg");
3207 case AppleLanguageId::TONGAN
:
3208 return LanguageTag("to");
3210 case AppleLanguageId::GREEK_POLYTONIC
:
3211 nLang
= LANGUAGE_USER_ANCIENT_GREEK
;
3213 case AppleLanguageId::GREENLANDIC
:
3214 nLang
= LANGUAGE_KALAALLISUT_GREENLAND
;
3216 case AppleLanguageId::AZERI_LATIN
:
3217 nLang
= LANGUAGE_AZERI_LATIN
;
3221 return LanguageTag(nLang
);
3224 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */