1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
3 * This file is part of the LibreOffice project.
5 * This Source Code Form is subject to the terms of the Mozilla Public
6 * License, v. 2.0. If a copy of the MPL was not distributed with this
7 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
10 #include "i18npool/languagetag.hxx"
11 #include "i18npool/mslangid.hxx"
12 #include <rtl/ustrbuf.hxx>
13 #include <rtl/bootstrap.hxx>
14 #include <osl/file.hxx>
15 #include <rtl/instance.hxx>
16 #include <rtl/locale.h>
20 #if defined(ENABLE_LIBLANGTAG)
21 #include <liblangtag/langtag.h>
23 /* Replacement code for LGPL phobic and Android systems.
24 * For iOS we could probably use NSLocale instead, that should have more or
25 * less required functionality. If it is good enough, it could be used for Mac
28 #include "simple-langtag.cxx"
33 using rtl::OUStringBuffer
;
34 using namespace com::sun::star
;
36 // The actual pointer type of mpImplLangtag that is declared void* to not
37 // pollute the entire code base with liblangtag.
38 #define LANGTAGCAST(p) (reinterpret_cast<lt_tag_t*>(p))
39 #define MPLANGTAG LANGTAGCAST(mpImplLangtag)
41 /** Convention to signal presence of BCP 47 language tag in a Locale's Variant
42 field. The Locale's Language field then will contain this ISO 639-2
43 reserved for local use code. */
44 #define ISO639_LANGUAGE_TAG "qlt"
47 // "statics" to be returned as const reference to an empty locale and string.
49 struct theEmptyLocale
: public rtl::Static
< lang::Locale
, theEmptyLocale
> {};
50 struct theEmptyBcp47
: public rtl::Static
< OUString
, theEmptyBcp47
> {};
54 /** A reference holder for liblangtag data de/initialization, one static
55 instance. Currently implemented such that the first "ref" inits and dtor
56 (our library deinitialized) tears down.
58 class LiblantagDataRef
65 if (mnRef
!= SAL_MAX_UINT32
&& !mnRef
++)
70 if (mnRef
!= SAL_MAX_UINT32
&& mnRef
&& !--mnRef
)
74 rtl::OString maDataPath
; // path to liblangtag data, "|" if system
83 struct theDataRef
: public rtl::Static
< LiblantagDataRef
, theDataRef
> {};
86 LiblantagDataRef::LiblantagDataRef()
92 LiblantagDataRef::~LiblantagDataRef()
94 // When destructed we're tearing down unconditionally.
100 void LiblantagDataRef::setup()
102 SAL_INFO( "i18npool.langtag", "LiblantagDataRef::setup: initializing database");
103 if (maDataPath
.isEmpty())
106 // Hold ref eternally.
107 mnRef
= SAL_MAX_UINT32
;
110 void LiblantagDataRef::teardown()
112 SAL_INFO( "i18npool.langtag", "LiblantagDataRef::teardown: finalizing database");
116 void LiblantagDataRef::setupDataPath()
118 // maDataPath is assumed to be empty here.
119 OUString
aURL("$BRAND_BASE_DIR/share/liblangtag");
120 rtl::Bootstrap::expandMacros(aURL
); //TODO: detect failure
122 // Check if data is in our own installation, else assume system
124 OUString
aData( aURL
);
125 aData
+= "/language-subtag-registry.xml";
126 osl::DirectoryItem aDirItem
;
127 if (osl::DirectoryItem::get( aData
, aDirItem
) == osl::DirectoryItem::E_None
)
130 if (osl::FileBase::getSystemPathFromFileURL( aURL
, aPath
) == osl::FileBase::E_None
)
131 maDataPath
= OUStringToOString( aPath
, RTL_TEXTENCODING_UTF8
);
133 if (maDataPath
.isEmpty())
134 maDataPath
= "|"; // assume system
136 lt_db_set_datadir( maDataPath
.getStr());
139 LanguageTag::LanguageTag( const rtl::OUString
& rBcp47LanguageTag
, bool bCanonicalize
)
141 maBcp47( rBcp47LanguageTag
),
142 mpImplLangtag( NULL
),
143 mnLangID( LANGUAGE_DONTKNOW
),
144 meIsValid( DECISION_DONTKNOW
),
145 meIsIsoLocale( DECISION_DONTKNOW
),
146 meIsIsoODF( DECISION_DONTKNOW
),
147 meIsLiblangtagNeeded( DECISION_DONTKNOW
),
148 mbSystemLocale( rBcp47LanguageTag
.isEmpty()),
149 mbInitializedBcp47( !mbSystemLocale
),
150 mbInitializedLocale( false),
151 mbInitializedLangID( false),
152 mbCachedLanguage( false),
153 mbCachedScript( false),
154 mbCachedCountry( false),
162 LanguageTag::LanguageTag( const com::sun::star::lang::Locale
& rLocale
)
165 mpImplLangtag( NULL
),
166 mnLangID( LANGUAGE_DONTKNOW
),
167 meIsValid( DECISION_DONTKNOW
),
168 meIsIsoLocale( DECISION_DONTKNOW
),
169 meIsIsoODF( DECISION_DONTKNOW
),
170 meIsLiblangtagNeeded( DECISION_DONTKNOW
),
171 mbSystemLocale( rLocale
.Language
.isEmpty()),
172 mbInitializedBcp47( false),
173 mbInitializedLocale( !mbSystemLocale
),
174 mbInitializedLangID( false),
175 mbCachedLanguage( false),
176 mbCachedScript( false),
177 mbCachedCountry( false),
183 LanguageTag::LanguageTag( LanguageType nLanguage
)
185 mpImplLangtag( NULL
),
186 mnLangID( nLanguage
),
187 meIsValid( DECISION_DONTKNOW
),
188 meIsIsoLocale( DECISION_DONTKNOW
),
189 meIsIsoODF( DECISION_DONTKNOW
),
190 meIsLiblangtagNeeded( DECISION_DONTKNOW
),
191 mbSystemLocale( nLanguage
== LANGUAGE_SYSTEM
),
192 mbInitializedBcp47( false),
193 mbInitializedLocale( false),
194 mbInitializedLangID( !mbSystemLocale
),
195 mbCachedLanguage( false),
196 mbCachedScript( false),
197 mbCachedCountry( false),
203 LanguageTag::LanguageTag( const rtl::OUString
& rLanguage
, const rtl::OUString
& rCountry
)
205 maLocale( rLanguage
, rCountry
, ""),
206 mpImplLangtag( NULL
),
207 mnLangID( LANGUAGE_DONTKNOW
),
208 meIsValid( DECISION_DONTKNOW
),
209 meIsIsoLocale( DECISION_DONTKNOW
),
210 meIsIsoODF( DECISION_DONTKNOW
),
211 meIsLiblangtagNeeded( DECISION_DONTKNOW
),
212 mbSystemLocale( rLanguage
.isEmpty()),
213 mbInitializedBcp47( false),
214 mbInitializedLocale( !mbSystemLocale
),
215 mbInitializedLangID( false),
216 mbCachedLanguage( false),
217 mbCachedScript( false),
218 mbCachedCountry( false),
224 LanguageTag::LanguageTag( const rtl_Locale
& rLocale
)
226 maLocale( rLocale
.Language
, rLocale
.Country
, rLocale
.Variant
),
227 mpImplLangtag( NULL
),
228 mnLangID( LANGUAGE_DONTKNOW
),
229 meIsValid( DECISION_DONTKNOW
),
230 meIsIsoLocale( DECISION_DONTKNOW
),
231 meIsIsoODF( DECISION_DONTKNOW
),
232 meIsLiblangtagNeeded( DECISION_DONTKNOW
),
233 mbSystemLocale( maLocale
.Language
.isEmpty()),
234 mbInitializedBcp47( false),
235 mbInitializedLocale( !mbSystemLocale
),
236 mbInitializedLangID( false),
237 mbCachedLanguage( false),
238 mbCachedScript( false),
239 mbCachedCountry( false),
245 LanguageTag::LanguageTag( const LanguageTag
& rLanguageTag
)
247 maLocale( rLanguageTag
.maLocale
),
248 maBcp47( rLanguageTag
.maBcp47
),
249 maCachedLanguage( rLanguageTag
.maCachedLanguage
),
250 maCachedScript( rLanguageTag
.maCachedScript
),
251 maCachedCountry( rLanguageTag
.maCachedCountry
),
252 mpImplLangtag( rLanguageTag
.mpImplLangtag
?
253 lt_tag_copy( LANGTAGCAST( rLanguageTag
.mpImplLangtag
)) : NULL
),
254 mnLangID( rLanguageTag
.mnLangID
),
255 meIsValid( rLanguageTag
.meIsValid
),
256 meIsIsoLocale( rLanguageTag
.meIsIsoLocale
),
257 meIsIsoODF( rLanguageTag
.meIsIsoODF
),
258 meIsLiblangtagNeeded( rLanguageTag
.meIsLiblangtagNeeded
),
259 mbSystemLocale( rLanguageTag
.mbSystemLocale
),
260 mbInitializedBcp47( rLanguageTag
.mbInitializedBcp47
),
261 mbInitializedLocale( rLanguageTag
.mbInitializedLocale
),
262 mbInitializedLangID( rLanguageTag
.mbInitializedLangID
),
263 mbCachedLanguage( rLanguageTag
.mbCachedLanguage
),
264 mbCachedScript( rLanguageTag
.mbCachedScript
),
265 mbCachedCountry( rLanguageTag
.mbCachedCountry
),
266 mbIsFallback( rLanguageTag
.mbIsFallback
)
269 theDataRef::get().incRef();
273 LanguageTag
& LanguageTag::operator=( const LanguageTag
& rLanguageTag
)
275 maLocale
= rLanguageTag
.maLocale
;
276 maBcp47
= rLanguageTag
.maBcp47
;
277 maCachedLanguage
= rLanguageTag
.maCachedLanguage
;
278 maCachedScript
= rLanguageTag
.maCachedScript
;
279 maCachedCountry
= rLanguageTag
.maCachedCountry
;
280 mpImplLangtag
= rLanguageTag
.mpImplLangtag
;
281 mpImplLangtag
= rLanguageTag
.mpImplLangtag
?
282 lt_tag_copy( LANGTAGCAST( rLanguageTag
.mpImplLangtag
)) : NULL
;
283 mnLangID
= rLanguageTag
.mnLangID
;
284 meIsValid
= rLanguageTag
.meIsValid
;
285 meIsIsoLocale
= rLanguageTag
.meIsIsoLocale
;
286 meIsIsoODF
= rLanguageTag
.meIsIsoODF
;
287 meIsLiblangtagNeeded
= rLanguageTag
.meIsLiblangtagNeeded
;
288 mbSystemLocale
= rLanguageTag
.mbSystemLocale
;
289 mbInitializedBcp47
= rLanguageTag
.mbInitializedBcp47
;
290 mbInitializedLocale
= rLanguageTag
.mbInitializedLocale
;
291 mbInitializedLangID
= rLanguageTag
.mbInitializedLangID
;
292 mbCachedLanguage
= rLanguageTag
.mbCachedLanguage
;
293 mbCachedScript
= rLanguageTag
.mbCachedScript
;
294 mbCachedCountry
= rLanguageTag
.mbCachedCountry
;
295 mbIsFallback
= rLanguageTag
.mbIsFallback
;
297 theDataRef::get().incRef();
302 LanguageTag::~LanguageTag()
306 lt_tag_unref( MPLANGTAG
);
307 theDataRef::get().decRef();
312 void LanguageTag::resetVars()
316 lt_tag_unref( MPLANGTAG
);
317 mpImplLangtag
= NULL
;
318 theDataRef::get().decRef();
321 maLocale
= lang::Locale();
322 if (!maBcp47
.isEmpty())
323 maBcp47
= OUString();
324 if (!maCachedLanguage
.isEmpty())
325 maCachedLanguage
= OUString();
326 if (!maCachedScript
.isEmpty())
327 maCachedScript
= OUString();
328 if (!maCachedCountry
.isEmpty())
329 maCachedCountry
= OUString();
330 mnLangID
= LANGUAGE_DONTKNOW
;
331 meIsValid
= DECISION_DONTKNOW
;
332 meIsIsoLocale
= DECISION_DONTKNOW
;
333 meIsIsoODF
= DECISION_DONTKNOW
;
334 meIsLiblangtagNeeded
= DECISION_DONTKNOW
;
335 mbSystemLocale
= true;
336 mbInitializedBcp47
= false;
337 mbInitializedLocale
= false;
338 mbInitializedLangID
= false;
339 mbCachedLanguage
= false;
340 mbCachedScript
= false;
341 mbCachedCountry
= false;
342 mbIsFallback
= false;
346 void LanguageTag::reset( const rtl::OUString
& rBcp47LanguageTag
, bool bCanonicalize
)
349 maBcp47
= rBcp47LanguageTag
;
350 mbSystemLocale
= rBcp47LanguageTag
.isEmpty();
351 mbInitializedBcp47
= !mbSystemLocale
;
358 void LanguageTag::reset( const com::sun::star::lang::Locale
& rLocale
)
362 mbSystemLocale
= rLocale
.Language
.isEmpty();
363 mbInitializedLocale
= !mbSystemLocale
;
367 void LanguageTag::reset( LanguageType nLanguage
)
370 mnLangID
= nLanguage
;
371 mbSystemLocale
= nLanguage
== LANGUAGE_SYSTEM
;
372 mbInitializedLangID
= !mbSystemLocale
;
376 bool LanguageTag::canonicalize()
383 dumper( void** pp
) : mpp( *pp
? NULL
: pp
) {}
384 ~dumper() { if (mpp
&& *mpp
) lt_tag_dump( LANGTAGCAST( *mpp
)); }
386 dumper
aDumper( &mpImplLangtag
);
389 // Side effect: have maBcp47 in any case, resolved system.
390 // Some methods calling canonicalize() (or not calling it due to
391 // meIsLiblangtagNeeded==DECISION_NO) rely on this! Hence do not set
392 // meIsLiblangtagNeeded anywhere else than hereafter.
395 // The simple cases and known locales don't need liblangtag processing,
396 // which also avoids loading liblangtag data on startup.
397 if (meIsLiblangtagNeeded
== DECISION_DONTKNOW
)
399 bool bTemporaryLocale
= false;
400 bool bTemporaryLangID
= false;
401 if (!mbInitializedLocale
&& !mbInitializedLangID
)
405 mnLangID
= MsLangId::getRealLanguage( LANGUAGE_SYSTEM
);
406 mbInitializedLangID
= true;
410 // Now this is getting funny.. we only have some BCP47 string
411 // and want to determine if parsing it would be possible
412 // without using liblangtag just to see if it is a simple known
414 OUString aLanguage
, aScript
, aCountry
;
415 if (simpleExtract( maBcp47
, aLanguage
, aScript
, aCountry
))
417 if (aScript
.isEmpty())
419 maLocale
.Language
= aLanguage
;
420 maLocale
.Country
= aCountry
;
424 maLocale
.Language
= ISO639_LANGUAGE_TAG
;
425 maLocale
.Country
= aCountry
;
426 maLocale
.Variant
= maBcp47
;
428 bTemporaryLocale
= mbInitializedLocale
= true;
432 if (mbInitializedLangID
&& !mbInitializedLocale
)
434 // Do not call getLocale() here because that prefers
435 // convertBcp47ToLocale() which would end up in recursion via
438 // Prepare to verify that we have a known locale, not just an
439 // arbitrary MS-LangID.
440 convertLangToLocale();
442 if (mbInitializedLocale
)
444 if (maLocale
.Variant
.isEmpty())
445 meIsLiblangtagNeeded
= DECISION_NO
; // per definition ll[l][-CC]
448 if (!mbInitializedLangID
)
450 convertLocaleToLang();
451 if (bTemporaryLocale
)
452 bTemporaryLangID
= true;
454 if (mnLangID
!= LANGUAGE_DONTKNOW
&& mnLangID
!= LANGUAGE_SYSTEM
)
455 meIsLiblangtagNeeded
= DECISION_NO
; // known locale
458 if (bTemporaryLocale
)
460 mbInitializedLocale
= false;
461 maLocale
= lang::Locale();
463 if (bTemporaryLangID
)
465 mbInitializedLangID
= false;
466 mnLangID
= LANGUAGE_DONTKNOW
;
469 if (meIsLiblangtagNeeded
== DECISION_NO
)
471 meIsValid
= DECISION_YES
; // really, known must be valid ...
472 return true; // that's it
474 meIsLiblangtagNeeded
= DECISION_YES
;
475 SAL_INFO( "i18npool.langtag", "LanguageTag::canonicalize: using liblangtag for " << maBcp47
);
479 theDataRef::get().incRef();
480 mpImplLangtag
= lt_tag_new();
483 // ensure error is free'd
487 myerror() : p(NULL
) {}
488 ~myerror() { if (p
) lt_error_unref( p
); }
491 if (lt_tag_parse( MPLANGTAG
, OUStringToOString( maBcp47
, RTL_TEXTENCODING_UTF8
).getStr(), &aError
.p
))
493 char* pTag
= lt_tag_canonicalize( MPLANGTAG
, &aError
.p
);
494 SAL_WARN_IF( !pTag
, "i18npool.langtag", "LanguageTag::canonicalize: could not canonicalize " << maBcp47
);
497 OUString
aOld( maBcp47
);
498 maBcp47
= OUString::createFromAscii( pTag
);
499 // Make the lt_tag_t follow the new string if different, which
500 // removes default script and such.
503 if (!lt_tag_parse( MPLANGTAG
, pTag
, &aError
.p
))
505 SAL_WARN( "i18npool.langtag", "LanguageTag::canonicalize: could not reparse " << maBcp47
);
507 meIsValid
= DECISION_NO
;
512 meIsValid
= DECISION_YES
;
518 SAL_INFO( "i18npool.langtag", "LanguageTag::canonicalize: could not parse " << maBcp47
);
520 meIsValid
= DECISION_NO
;
525 void LanguageTag::convertLocaleToBcp47()
527 if (mbSystemLocale
&& !mbInitializedLocale
)
528 convertLangToLocale();
530 if (maLocale
.Language
== ISO639_LANGUAGE_TAG
)
532 maBcp47
= maLocale
.Variant
;
533 meIsIsoLocale
= DECISION_NO
;
537 /* XXX NOTE: most legacy code never evaluated the Variant field, so for
538 * now just concatenate language and country. In case we stumbled over
539 * variant aware code we'd have to take care of that. */
540 if (maLocale
.Country
.isEmpty())
541 maBcp47
= maLocale
.Language
;
544 OUStringBuffer
aBuf( maLocale
.Language
.getLength() + 1 + maLocale
.Country
.getLength());
545 aBuf
.append( maLocale
.Language
).append( '-').append( maLocale
.Country
);
546 maBcp47
= aBuf
.makeStringAndClear();
549 mbInitializedBcp47
= true;
553 void LanguageTag::convertLocaleToLang()
557 mnLangID
= MsLangId::getRealLanguage( LANGUAGE_SYSTEM
);
561 /* FIXME: this is temporary until code base is converted to not use
562 * MsLangId::convert...() anymore. After that, proper new method has to
563 * be implemented to allow ISO639_LANGUAGE_TAG and sript tag and such. */
564 mnLangID
= MsLangId::Conversion::convertLocaleToLanguage( maLocale
);
566 mbInitializedLangID
= true;
570 void LanguageTag::convertBcp47ToLocale()
572 bool bIso
= isIsoLocale();
575 maLocale
.Language
= getLanguageFromLangtag();
576 maLocale
.Country
= getRegionFromLangtag();
577 maLocale
.Variant
= OUString();
581 maLocale
.Language
= ISO639_LANGUAGE_TAG
;
582 maLocale
.Country
= getCountry();
583 maLocale
.Variant
= maBcp47
;
585 mbInitializedLocale
= true;
589 void LanguageTag::convertBcp47ToLang()
593 mnLangID
= MsLangId::getRealLanguage( LANGUAGE_SYSTEM
);
597 /* FIXME: this is temporary. If we support locales that consist not
598 * only of language and country, e.g. added script, this probably needs
600 if (!mbInitializedLocale
)
601 convertBcp47ToLocale();
602 convertLocaleToLang();
604 mbInitializedLangID
= true;
608 void LanguageTag::convertLangToLocale()
610 if (mbSystemLocale
&& !mbInitializedLangID
)
612 mnLangID
= MsLangId::getRealLanguage( LANGUAGE_SYSTEM
);
613 mbInitializedLangID
= true;
615 /* FIXME: this is temporary until code base is converted to not use
616 * MsLangId::convert...() anymore. After that, proper new method has to be
617 * implemented to allow ISO639_LANGUAGE_TAG and script tag and such. */
618 // Resolve system here!
619 maLocale
= MsLangId::Conversion::convertLanguageToLocale( mnLangID
, true);
620 mbInitializedLocale
= true;
624 void LanguageTag::convertLangToBcp47()
626 /* FIXME: this is temporary. If we support locales that consist not only of
627 * language and country, e.g. added script, this probably needs to be
629 if (!mbInitializedLocale
)
630 convertLangToLocale();
631 convertLocaleToBcp47();
632 mbInitializedBcp47
= true;
636 const rtl::OUString
& LanguageTag::getBcp47( bool bResolveSystem
) const
638 if (!bResolveSystem
&& mbSystemLocale
)
639 return theEmptyBcp47::get();
640 if (!mbInitializedBcp47
)
642 if (mbInitializedLocale
)
643 const_cast<LanguageTag
*>(this)->convertLocaleToBcp47();
645 const_cast<LanguageTag
*>(this)->convertLangToBcp47();
651 rtl::OUString
LanguageTag::getLanguageFromLangtag()
654 if (meIsLiblangtagNeeded
!= DECISION_NO
&& !mpImplLangtag
)
656 if (maBcp47
.isEmpty())
660 const lt_lang_t
* pLangT
= lt_tag_get_language( MPLANGTAG
);
661 SAL_WARN_IF( !pLangT
, "i18npool.langtag", "LanguageTag::getLanguageFromLangtag: pLangT==NULL");
664 const char* pLang
= lt_lang_get_tag( pLangT
);
665 SAL_WARN_IF( !pLang
, "i18npool.langtag", "LanguageTag::getLanguageFromLangtag: pLang==NULL");
667 aLanguage
= OUString::createFromAscii( pLang
);
671 if (mbCachedLanguage
|| cacheSimpleLSC())
672 aLanguage
= maCachedLanguage
;
678 rtl::OUString
LanguageTag::getScriptFromLangtag()
681 if (meIsLiblangtagNeeded
!= DECISION_NO
&& !mpImplLangtag
)
683 if (maBcp47
.isEmpty())
687 const lt_script_t
* pScriptT
= lt_tag_get_script( MPLANGTAG
);
688 // pScriptT==NULL is valid for default scripts
691 const char* pScript
= lt_script_get_tag( pScriptT
);
692 SAL_WARN_IF( !pScript
, "i18npool.langtag", "LanguageTag::getScriptFromLangtag: pScript==NULL");
694 aScript
= OUString::createFromAscii( pScript
);
698 if (mbCachedScript
|| cacheSimpleLSC())
699 aScript
= maCachedScript
;
705 rtl::OUString
LanguageTag::getRegionFromLangtag()
708 if (meIsLiblangtagNeeded
!= DECISION_NO
&& !mpImplLangtag
)
710 if (maBcp47
.isEmpty())
714 const lt_region_t
* pRegionT
= lt_tag_get_region( MPLANGTAG
);
715 // pRegionT==NULL is valid for language only tags, rough check here
716 // that does not take sophisticated tags into account that actually
717 // should have a region, check for ll, lll, ll-Ssss and lll-Ssss so
718 // that ll-CC and lll-CC actually fail.
719 SAL_WARN_IF( !pRegionT
&&
720 maBcp47
.getLength() != 2 && maBcp47
.getLength() != 3 &&
721 maBcp47
.getLength() != 7 && maBcp47
.getLength() != 8,
722 "i18npool.langtag", "LanguageTag::getRegionFromLangtag: pRegionT==NULL");
725 const char* pRegion
= lt_region_get_tag( pRegionT
);
726 SAL_WARN_IF( !pRegion
, "i18npool.langtag", "LanguageTag::getRegionFromLangtag: pRegion==NULL");
728 aRegion
= OUString::createFromAscii( pRegion
);
732 if (mbCachedCountry
|| cacheSimpleLSC())
733 aRegion
= maCachedCountry
;
739 const com::sun::star::lang::Locale
& LanguageTag::getLocale( bool bResolveSystem
) const
741 if (!bResolveSystem
&& mbSystemLocale
)
742 return theEmptyLocale::get();
743 if (!mbInitializedLocale
)
745 if (mbInitializedBcp47
)
746 const_cast<LanguageTag
*>(this)->convertBcp47ToLocale();
748 const_cast<LanguageTag
*>(this)->convertLangToLocale();
754 LanguageType
LanguageTag::getLanguageType( bool bResolveSystem
) const
756 if (!bResolveSystem
&& mbSystemLocale
)
757 return LANGUAGE_SYSTEM
;
758 if (!mbInitializedLangID
)
760 if (mbInitializedBcp47
)
761 const_cast<LanguageTag
*>(this)->convertBcp47ToLang();
763 const_cast<LanguageTag
*>(this)->convertLocaleToLang();
769 void LanguageTag::getIsoLanguageCountry( rtl::OUString
& rLanguage
, rtl::OUString
& rCountry
) const
773 rLanguage
= OUString();
774 rCountry
= OUString();
777 // After isIsoLocale() it's safe to call getLanguage() for ISO code.
778 rLanguage
= getLanguage();
779 rCountry
= getCountry();
786 bool isLowerAscii( sal_Unicode c
)
788 return 'a' <= c
&& c
<= 'z';
791 bool isUpperAscii( sal_Unicode c
)
793 return 'A' <= c
&& c
<= 'Z';
800 bool LanguageTag::isIsoLanguage( const rtl::OUString
& rLanguage
)
802 /* TODO: ignore case? For now let's see where rubbish is used. */
804 if (((b2chars
= (rLanguage
.getLength() == 2)) || rLanguage
.getLength() == 3) &&
805 isLowerAscii( rLanguage
[0]) && isLowerAscii( rLanguage
[1]) &&
806 (b2chars
|| isLowerAscii( rLanguage
[2])))
808 SAL_WARN_IF( ((rLanguage
.getLength() == 2 || rLanguage
.getLength() == 3) &&
809 (isUpperAscii( rLanguage
[0]) || isUpperAscii( rLanguage
[1]))) ||
810 (rLanguage
.getLength() == 3 && isUpperAscii( rLanguage
[2])), "i18npool.langtag",
811 "LanguageTag::isIsoLanguage: rejecting upper case " << rLanguage
);
817 bool LanguageTag::isIsoCountry( const rtl::OUString
& rRegion
)
819 /* TODO: ignore case? For now let's see where rubbish is used. */
820 if (rRegion
.isEmpty() ||
821 (rRegion
.getLength() == 2 && isUpperAscii( rRegion
[0]) && isUpperAscii( rRegion
[1])))
823 SAL_WARN_IF( rRegion
.getLength() == 2 && (isLowerAscii( rRegion
[0]) || isLowerAscii( rRegion
[1])),
824 "i18npool.langtag", "LanguageTag::isIsoCountry: rejecting lower case " << rRegion
);
830 bool LanguageTag::isIsoScript( const rtl::OUString
& rScript
)
832 /* TODO: ignore case? For now let's see where rubbish is used. */
833 if (rScript
.isEmpty() ||
834 (rScript
.getLength() == 4 &&
835 isUpperAscii( rScript
[0]) && isLowerAscii( rScript
[1]) &&
836 isLowerAscii( rScript
[2]) && isLowerAscii( rScript
[3])))
838 SAL_WARN_IF( rScript
.getLength() == 4 &&
839 (isLowerAscii( rScript
[0]) || isUpperAscii( rScript
[1]) ||
840 isUpperAscii( rScript
[2]) || isUpperAscii( rScript
[3])),
841 "i18npool.langtag", "LanguageTag::isIsoScript: rejecting case mismatch " << rScript
);
846 rtl::OUString
LanguageTag::getLanguage() const
848 if (!mbCachedLanguage
)
850 maCachedLanguage
= const_cast<LanguageTag
*>(this)->getLanguageFromLangtag();
851 mbCachedLanguage
= true;
853 return maCachedLanguage
;
857 rtl::OUString
LanguageTag::getScript() const
861 maCachedScript
= const_cast<LanguageTag
*>(this)->getScriptFromLangtag();
862 mbCachedScript
= true;
864 return maCachedScript
;
868 rtl::OUString
LanguageTag::getLanguageAndScript() const
870 OUString
aLanguageScript( getLanguage());
871 OUString
aScript( getScript());
872 if (!aScript
.isEmpty())
874 OUStringBuffer
aBuf( aLanguageScript
.getLength() + 1 + aScript
.getLength());
875 aBuf
.append( aLanguageScript
).append( '-').append( aScript
);
876 aLanguageScript
= aBuf
.makeStringAndClear();
878 return aLanguageScript
;
882 rtl::OUString
LanguageTag::getCountry() const
884 if (!mbCachedCountry
)
886 maCachedCountry
= const_cast<LanguageTag
*>(this)->getRegionFromLangtag();
887 if (!isIsoCountry( maCachedCountry
))
888 maCachedCountry
= OUString();
889 mbCachedCountry
= true;
891 return maCachedCountry
;
895 rtl::OUString
LanguageTag::getRegion() const
897 return const_cast<LanguageTag
*>(this)->getRegionFromLangtag();
901 bool LanguageTag::cacheSimpleLSC()
903 OUString aLanguage
, aScript
, aCountry
;
904 bool bRet
= simpleExtract( maBcp47
, aLanguage
, aScript
, aCountry
);
907 maCachedLanguage
= aLanguage
;
908 maCachedScript
= aScript
;
909 maCachedCountry
= aCountry
;
910 mbCachedLanguage
= mbCachedScript
= mbCachedCountry
= true;
916 bool LanguageTag::isIsoLocale() const
918 if (meIsIsoLocale
== DECISION_DONTKNOW
)
920 if (meIsLiblangtagNeeded
!= DECISION_NO
&& !mpImplLangtag
)
921 const_cast<LanguageTag
*>(this)->canonicalize();
922 // It must be at most ll-CC or lll-CC
923 // Do not use getCountry() here, use getRegion() instead.
924 meIsIsoLocale
= ((maBcp47
.isEmpty() ||
925 (maBcp47
.getLength() <= 6 && isIsoLanguage( getLanguage()) && isIsoCountry( getRegion()))) ?
926 DECISION_YES
: DECISION_NO
);
928 return meIsIsoLocale
== DECISION_YES
;
932 bool LanguageTag::isIsoODF() const
934 if (meIsIsoODF
== DECISION_DONTKNOW
)
936 if (meIsLiblangtagNeeded
!= DECISION_NO
&& !mpImplLangtag
)
937 const_cast<LanguageTag
*>(this)->canonicalize();
938 if (!isIsoScript( getScript()))
939 return ((meIsIsoODF
= DECISION_NO
) == DECISION_YES
);
940 // The usual case is lll-CC so simply check that first.
942 return ((meIsIsoODF
= DECISION_YES
) == DECISION_YES
);
943 // If this is not ISO locale for which script must not exist it can
944 // still be ISO locale plus ISO script lll-Ssss-CC
945 meIsIsoODF
= ((maBcp47
.getLength() <= 11 &&
946 isIsoLanguage( getLanguage()) && isIsoCountry( getRegion()) && isIsoScript( getScript())) ?
947 DECISION_YES
: DECISION_NO
);
949 return meIsIsoODF
== DECISION_YES
;
953 bool LanguageTag::isValidBcp47() const
955 if (meIsValid
== DECISION_DONTKNOW
)
957 if (meIsLiblangtagNeeded
!= DECISION_NO
&& !mpImplLangtag
)
958 const_cast<LanguageTag
*>(this)->canonicalize();
959 SAL_WARN_IF( meIsValid
== DECISION_DONTKNOW
, "i18npool.langtag",
960 "LanguageTag::isValidBcp47: canonicalize() didn't set meIsValid");
962 return meIsValid
== DECISION_YES
;
966 bool LanguageTag::isSystemLocale() const
968 return mbSystemLocale
;
972 LanguageTag
& LanguageTag::makeFallback()
976 if (mbInitializedLangID
)
978 LanguageType nLang1
= getLanguageType();
979 LanguageType nLang2
= MsLangId::Conversion::lookupFallbackLanguage( nLang1
);
980 if (nLang1
!= nLang2
)
985 const lang::Locale
& rLocale1
= getLocale();
986 lang::Locale
aLocale2( MsLangId::Conversion::lookupFallbackLocale( rLocale1
));
987 if ( rLocale1
.Language
!= aLocale2
.Language
||
988 rLocale1
.Country
!= aLocale2
.Country
||
989 rLocale1
.Variant
!= aLocale2
.Variant
)
998 bool LanguageTag::operator==( const LanguageTag
& rLanguageTag
) const
1000 // Compare full language tag strings but SYSTEM unresolved.
1001 return getBcp47( false) == rLanguageTag
.getBcp47( false);
1005 bool LanguageTag::operator!=( const LanguageTag
& rLanguageTag
) const
1007 return !operator==( rLanguageTag
);
1012 bool LanguageTag::simpleExtract( const rtl::OUString
& rBcp47
,
1013 rtl::OUString
& rLanguage
,
1014 rtl::OUString
& rScript
,
1015 rtl::OUString
& rCountry
)
1018 const sal_Int32 nLen
= rBcp47
.getLength();
1019 const sal_Int32 nHyph1
= rBcp47
.indexOf( '-');
1020 if ((nLen
== 2 || nLen
== 3) && nHyph1
< 0) // ll or lll
1023 rScript
= rCountry
= OUString();
1026 else if ( (nLen
== 5 && nHyph1
== 2) // ll-CC
1027 || (nLen
== 6 && nHyph1
== 3)) // lll-CC
1029 rLanguage
= rBcp47
.copy( 0, nHyph1
);
1030 rCountry
= rBcp47
.copy( nHyph1
+ 1, 2);
1031 rScript
= OUString();
1034 else if ( (nHyph1
== 2 && nLen
== 10) // ll-Ssss-CC check
1035 || (nHyph1
== 3 && nLen
== 11)) // lll-Ssss-CC check
1037 const sal_Int32 nHyph2
= rBcp47
.indexOf( '-', nHyph1
+ 1);
1038 if (nHyph2
== nHyph1
+ 5)
1040 rLanguage
= rBcp47
.copy( 0, nHyph1
);
1041 rScript
= rBcp47
.copy( nHyph1
+ 1, 4);
1042 rCountry
= rBcp47
.copy( nHyph2
+ 1, 2);
1047 rLanguage
= rScript
= rCountry
= OUString();
1052 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */