Version 4.0.0.1, tag libreoffice-4.0.0.1
[LibreOffice.git] / i18npool / source / languagetag / languagetag.cxx
blobbab2443710630cef929262409da7d4aba76f87b2
1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2 /*
3 * This file is part of the LibreOffice project.
5 * This Source Code Form is subject to the terms of the Mozilla Public
6 * License, v. 2.0. If a copy of the MPL was not distributed with this
7 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
8 */
10 #include "i18npool/languagetag.hxx"
11 #include "i18npool/mslangid.hxx"
12 #include <rtl/ustrbuf.hxx>
13 #include <rtl/bootstrap.hxx>
14 #include <osl/file.hxx>
15 #include <rtl/instance.hxx>
16 #include <rtl/locale.h>
18 //#define erDEBUG
20 #if defined(ENABLE_LIBLANGTAG)
21 #include <liblangtag/langtag.h>
22 #else
23 /* Replacement code for LGPL phobic and Android systems.
24 * For iOS we could probably use NSLocale instead, that should have more or
25 * less required functionality. If it is good enough, it could be used for Mac
26 * OS X, too.
28 #include "simple-langtag.cxx"
29 #endif
31 using rtl::OUString;
32 using rtl::OString;
33 using rtl::OUStringBuffer;
34 using namespace com::sun::star;
36 // The actual pointer type of mpImplLangtag that is declared void* to not
37 // pollute the entire code base with liblangtag.
38 #define LANGTAGCAST(p) (reinterpret_cast<lt_tag_t*>(p))
39 #define MPLANGTAG LANGTAGCAST(mpImplLangtag)
41 /** Convention to signal presence of BCP 47 language tag in a Locale's Variant
42 field. The Locale's Language field then will contain this ISO 639-2
43 reserved for local use code. */
44 #define ISO639_LANGUAGE_TAG "qlt"
47 // "statics" to be returned as const reference to an empty locale and string.
48 namespace {
49 struct theEmptyLocale : public rtl::Static< lang::Locale, theEmptyLocale > {};
50 struct theEmptyBcp47 : public rtl::Static< OUString, theEmptyBcp47 > {};
54 /** A reference holder for liblangtag data de/initialization, one static
55 instance. Currently implemented such that the first "ref" inits and dtor
56 (our library deinitialized) tears down.
58 class LiblantagDataRef
60 public:
61 LiblantagDataRef();
62 ~LiblantagDataRef();
63 inline void incRef()
65 if (mnRef != SAL_MAX_UINT32 && !mnRef++)
66 setup();
68 inline void decRef()
70 if (mnRef != SAL_MAX_UINT32 && mnRef && !--mnRef)
71 teardown();
73 private:
74 rtl::OString maDataPath; // path to liblangtag data, "|" if system
75 sal_uInt32 mnRef;
77 void setupDataPath();
78 void setup();
79 void teardown();
82 namespace {
83 struct theDataRef : public rtl::Static< LiblantagDataRef, theDataRef > {};
86 LiblantagDataRef::LiblantagDataRef()
88 mnRef(0)
92 LiblantagDataRef::~LiblantagDataRef()
94 // When destructed we're tearing down unconditionally.
95 if (mnRef)
96 mnRef = 1;
97 decRef();
100 void LiblantagDataRef::setup()
102 SAL_INFO( "i18npool.langtag", "LiblantagDataRef::setup: initializing database");
103 if (maDataPath.isEmpty())
104 setupDataPath();
105 lt_db_initialize();
106 // Hold ref eternally.
107 mnRef = SAL_MAX_UINT32;
110 void LiblantagDataRef::teardown()
112 SAL_INFO( "i18npool.langtag", "LiblantagDataRef::teardown: finalizing database");
113 lt_db_finalize();
116 void LiblantagDataRef::setupDataPath()
118 // maDataPath is assumed to be empty here.
119 OUString aURL("$BRAND_BASE_DIR/share/liblangtag");
120 rtl::Bootstrap::expandMacros(aURL); //TODO: detect failure
122 // Check if data is in our own installation, else assume system
123 // installation.
124 OUString aData( aURL);
125 aData += "/language-subtag-registry.xml";
126 osl::DirectoryItem aDirItem;
127 if (osl::DirectoryItem::get( aData, aDirItem) == osl::DirectoryItem::E_None)
129 OUString aPath;
130 if (osl::FileBase::getSystemPathFromFileURL( aURL, aPath) == osl::FileBase::E_None)
131 maDataPath = OUStringToOString( aPath, RTL_TEXTENCODING_UTF8);
133 if (maDataPath.isEmpty())
134 maDataPath = "|"; // assume system
135 else
136 lt_db_set_datadir( maDataPath.getStr());
139 LanguageTag::LanguageTag( const rtl::OUString & rBcp47LanguageTag, bool bCanonicalize )
141 maBcp47( rBcp47LanguageTag),
142 mpImplLangtag( NULL),
143 mnLangID( LANGUAGE_DONTKNOW),
144 meIsValid( DECISION_DONTKNOW),
145 meIsIsoLocale( DECISION_DONTKNOW),
146 meIsIsoODF( DECISION_DONTKNOW),
147 meIsLiblangtagNeeded( DECISION_DONTKNOW),
148 mbSystemLocale( rBcp47LanguageTag.isEmpty()),
149 mbInitializedBcp47( !mbSystemLocale),
150 mbInitializedLocale( false),
151 mbInitializedLangID( false),
152 mbCachedLanguage( false),
153 mbCachedScript( false),
154 mbCachedCountry( false),
155 mbIsFallback( false)
157 if (bCanonicalize)
158 canonicalize();
162 LanguageTag::LanguageTag( const com::sun::star::lang::Locale & rLocale )
164 maLocale( rLocale),
165 mpImplLangtag( NULL),
166 mnLangID( LANGUAGE_DONTKNOW),
167 meIsValid( DECISION_DONTKNOW),
168 meIsIsoLocale( DECISION_DONTKNOW),
169 meIsIsoODF( DECISION_DONTKNOW),
170 meIsLiblangtagNeeded( DECISION_DONTKNOW),
171 mbSystemLocale( rLocale.Language.isEmpty()),
172 mbInitializedBcp47( false),
173 mbInitializedLocale( !mbSystemLocale),
174 mbInitializedLangID( false),
175 mbCachedLanguage( false),
176 mbCachedScript( false),
177 mbCachedCountry( false),
178 mbIsFallback( false)
183 LanguageTag::LanguageTag( LanguageType nLanguage )
185 mpImplLangtag( NULL),
186 mnLangID( nLanguage),
187 meIsValid( DECISION_DONTKNOW),
188 meIsIsoLocale( DECISION_DONTKNOW),
189 meIsIsoODF( DECISION_DONTKNOW),
190 meIsLiblangtagNeeded( DECISION_DONTKNOW),
191 mbSystemLocale( nLanguage == LANGUAGE_SYSTEM),
192 mbInitializedBcp47( false),
193 mbInitializedLocale( false),
194 mbInitializedLangID( !mbSystemLocale),
195 mbCachedLanguage( false),
196 mbCachedScript( false),
197 mbCachedCountry( false),
198 mbIsFallback( false)
203 LanguageTag::LanguageTag( const rtl::OUString& rLanguage, const rtl::OUString& rCountry )
205 maLocale( rLanguage, rCountry, ""),
206 mpImplLangtag( NULL),
207 mnLangID( LANGUAGE_DONTKNOW),
208 meIsValid( DECISION_DONTKNOW),
209 meIsIsoLocale( DECISION_DONTKNOW),
210 meIsIsoODF( DECISION_DONTKNOW),
211 meIsLiblangtagNeeded( DECISION_DONTKNOW),
212 mbSystemLocale( rLanguage.isEmpty()),
213 mbInitializedBcp47( false),
214 mbInitializedLocale( !mbSystemLocale),
215 mbInitializedLangID( false),
216 mbCachedLanguage( false),
217 mbCachedScript( false),
218 mbCachedCountry( false),
219 mbIsFallback( false)
224 LanguageTag::LanguageTag( const rtl_Locale & rLocale )
226 maLocale( rLocale.Language, rLocale.Country, rLocale.Variant),
227 mpImplLangtag( NULL),
228 mnLangID( LANGUAGE_DONTKNOW),
229 meIsValid( DECISION_DONTKNOW),
230 meIsIsoLocale( DECISION_DONTKNOW),
231 meIsIsoODF( DECISION_DONTKNOW),
232 meIsLiblangtagNeeded( DECISION_DONTKNOW),
233 mbSystemLocale( maLocale.Language.isEmpty()),
234 mbInitializedBcp47( false),
235 mbInitializedLocale( !mbSystemLocale),
236 mbInitializedLangID( false),
237 mbCachedLanguage( false),
238 mbCachedScript( false),
239 mbCachedCountry( false),
240 mbIsFallback( false)
245 LanguageTag::LanguageTag( const LanguageTag & rLanguageTag )
247 maLocale( rLanguageTag.maLocale),
248 maBcp47( rLanguageTag.maBcp47),
249 maCachedLanguage( rLanguageTag.maCachedLanguage),
250 maCachedScript( rLanguageTag.maCachedScript),
251 maCachedCountry( rLanguageTag.maCachedCountry),
252 mpImplLangtag( rLanguageTag.mpImplLangtag ?
253 lt_tag_copy( LANGTAGCAST( rLanguageTag.mpImplLangtag)) : NULL),
254 mnLangID( rLanguageTag.mnLangID),
255 meIsValid( rLanguageTag.meIsValid),
256 meIsIsoLocale( rLanguageTag.meIsIsoLocale),
257 meIsIsoODF( rLanguageTag.meIsIsoODF),
258 meIsLiblangtagNeeded( rLanguageTag.meIsLiblangtagNeeded),
259 mbSystemLocale( rLanguageTag.mbSystemLocale),
260 mbInitializedBcp47( rLanguageTag.mbInitializedBcp47),
261 mbInitializedLocale( rLanguageTag.mbInitializedLocale),
262 mbInitializedLangID( rLanguageTag.mbInitializedLangID),
263 mbCachedLanguage( rLanguageTag.mbCachedLanguage),
264 mbCachedScript( rLanguageTag.mbCachedScript),
265 mbCachedCountry( rLanguageTag.mbCachedCountry),
266 mbIsFallback( rLanguageTag.mbIsFallback)
268 if (mpImplLangtag)
269 theDataRef::get().incRef();
273 LanguageTag& LanguageTag::operator=( const LanguageTag & rLanguageTag )
275 maLocale = rLanguageTag.maLocale;
276 maBcp47 = rLanguageTag.maBcp47;
277 maCachedLanguage = rLanguageTag.maCachedLanguage;
278 maCachedScript = rLanguageTag.maCachedScript;
279 maCachedCountry = rLanguageTag.maCachedCountry;
280 mpImplLangtag = rLanguageTag.mpImplLangtag;
281 mpImplLangtag = rLanguageTag.mpImplLangtag ?
282 lt_tag_copy( LANGTAGCAST( rLanguageTag.mpImplLangtag)) : NULL;
283 mnLangID = rLanguageTag.mnLangID;
284 meIsValid = rLanguageTag.meIsValid;
285 meIsIsoLocale = rLanguageTag.meIsIsoLocale;
286 meIsIsoODF = rLanguageTag.meIsIsoODF;
287 meIsLiblangtagNeeded= rLanguageTag.meIsLiblangtagNeeded;
288 mbSystemLocale = rLanguageTag.mbSystemLocale;
289 mbInitializedBcp47 = rLanguageTag.mbInitializedBcp47;
290 mbInitializedLocale = rLanguageTag.mbInitializedLocale;
291 mbInitializedLangID = rLanguageTag.mbInitializedLangID;
292 mbCachedLanguage = rLanguageTag.mbCachedLanguage;
293 mbCachedScript = rLanguageTag.mbCachedScript;
294 mbCachedCountry = rLanguageTag.mbCachedCountry;
295 mbIsFallback = rLanguageTag.mbIsFallback;
296 if (mpImplLangtag)
297 theDataRef::get().incRef();
298 return *this;
302 LanguageTag::~LanguageTag()
304 if (mpImplLangtag)
306 lt_tag_unref( MPLANGTAG);
307 theDataRef::get().decRef();
312 void LanguageTag::resetVars()
314 if (mpImplLangtag)
316 lt_tag_unref( MPLANGTAG);
317 mpImplLangtag = NULL;
318 theDataRef::get().decRef();
321 maLocale = lang::Locale();
322 if (!maBcp47.isEmpty())
323 maBcp47 = OUString();
324 if (!maCachedLanguage.isEmpty())
325 maCachedLanguage= OUString();
326 if (!maCachedScript.isEmpty())
327 maCachedScript = OUString();
328 if (!maCachedCountry.isEmpty())
329 maCachedCountry = OUString();
330 mnLangID = LANGUAGE_DONTKNOW;
331 meIsValid = DECISION_DONTKNOW;
332 meIsIsoLocale = DECISION_DONTKNOW;
333 meIsIsoODF = DECISION_DONTKNOW;
334 meIsLiblangtagNeeded= DECISION_DONTKNOW;
335 mbSystemLocale = true;
336 mbInitializedBcp47 = false;
337 mbInitializedLocale = false;
338 mbInitializedLangID = false;
339 mbCachedLanguage = false;
340 mbCachedScript = false;
341 mbCachedCountry = false;
342 mbIsFallback = false;
346 void LanguageTag::reset( const rtl::OUString & rBcp47LanguageTag, bool bCanonicalize )
348 resetVars();
349 maBcp47 = rBcp47LanguageTag;
350 mbSystemLocale = rBcp47LanguageTag.isEmpty();
351 mbInitializedBcp47 = !mbSystemLocale;
353 if (bCanonicalize)
354 canonicalize();
358 void LanguageTag::reset( const com::sun::star::lang::Locale & rLocale )
360 resetVars();
361 maLocale = rLocale;
362 mbSystemLocale = rLocale.Language.isEmpty();
363 mbInitializedLocale = !mbSystemLocale;
367 void LanguageTag::reset( LanguageType nLanguage )
369 resetVars();
370 mnLangID = nLanguage;
371 mbSystemLocale = nLanguage == LANGUAGE_SYSTEM;
372 mbInitializedLangID = !mbSystemLocale;
376 bool LanguageTag::canonicalize()
378 #ifdef erDEBUG
379 // dump once
380 struct dumper
382 void** mpp;
383 dumper( void** pp ) : mpp( *pp ? NULL : pp) {}
384 ~dumper() { if (mpp && *mpp) lt_tag_dump( LANGTAGCAST( *mpp)); }
386 dumper aDumper( &mpImplLangtag);
387 #endif
389 // Side effect: have maBcp47 in any case, resolved system.
390 // Some methods calling canonicalize() (or not calling it due to
391 // meIsLiblangtagNeeded==DECISION_NO) rely on this! Hence do not set
392 // meIsLiblangtagNeeded anywhere else than hereafter.
393 getBcp47( true );
395 // The simple cases and known locales don't need liblangtag processing,
396 // which also avoids loading liblangtag data on startup.
397 if (meIsLiblangtagNeeded == DECISION_DONTKNOW)
399 bool bTemporaryLocale = false;
400 bool bTemporaryLangID = false;
401 if (!mbInitializedLocale && !mbInitializedLangID)
403 if (mbSystemLocale)
405 mnLangID = MsLangId::getRealLanguage( LANGUAGE_SYSTEM);
406 mbInitializedLangID = true;
408 else
410 // Now this is getting funny.. we only have some BCP47 string
411 // and want to determine if parsing it would be possible
412 // without using liblangtag just to see if it is a simple known
413 // locale.
414 OUString aLanguage, aScript, aCountry;
415 if (simpleExtract( maBcp47, aLanguage, aScript, aCountry))
417 if (aScript.isEmpty())
419 maLocale.Language = aLanguage;
420 maLocale.Country = aCountry;
422 else
424 maLocale.Language = ISO639_LANGUAGE_TAG;
425 maLocale.Country = aCountry;
426 maLocale.Variant = maBcp47;
428 bTemporaryLocale = mbInitializedLocale = true;
432 if (mbInitializedLangID && !mbInitializedLocale)
434 // Do not call getLocale() here because that prefers
435 // convertBcp47ToLocale() which would end up in recursion via
436 // isIsoLocale()!
438 // Prepare to verify that we have a known locale, not just an
439 // arbitrary MS-LangID.
440 convertLangToLocale();
442 if (mbInitializedLocale)
444 if (maLocale.Variant.isEmpty())
445 meIsLiblangtagNeeded = DECISION_NO; // per definition ll[l][-CC]
446 else
448 if (!mbInitializedLangID)
450 convertLocaleToLang();
451 if (bTemporaryLocale)
452 bTemporaryLangID = true;
454 if (mnLangID != LANGUAGE_DONTKNOW && mnLangID != LANGUAGE_SYSTEM)
455 meIsLiblangtagNeeded = DECISION_NO; // known locale
458 if (bTemporaryLocale)
460 mbInitializedLocale = false;
461 maLocale = lang::Locale();
463 if (bTemporaryLangID)
465 mbInitializedLangID = false;
466 mnLangID = LANGUAGE_DONTKNOW;
469 if (meIsLiblangtagNeeded == DECISION_NO)
471 meIsValid = DECISION_YES; // really, known must be valid ...
472 return true; // that's it
474 meIsLiblangtagNeeded = DECISION_YES;
475 SAL_INFO( "i18npool.langtag", "LanguageTag::canonicalize: using liblangtag for " << maBcp47);
477 if (!mpImplLangtag)
479 theDataRef::get().incRef();
480 mpImplLangtag = lt_tag_new();
483 // ensure error is free'd
484 struct myerror
486 lt_error_t* p;
487 myerror() : p(NULL) {}
488 ~myerror() { if (p) lt_error_unref( p); }
489 } aError;
491 if (lt_tag_parse( MPLANGTAG, OUStringToOString( maBcp47, RTL_TEXTENCODING_UTF8).getStr(), &aError.p))
493 char* pTag = lt_tag_canonicalize( MPLANGTAG, &aError.p);
494 SAL_WARN_IF( !pTag, "i18npool.langtag", "LanguageTag::canonicalize: could not canonicalize " << maBcp47);
495 if (pTag)
497 OUString aOld( maBcp47);
498 maBcp47 = OUString::createFromAscii( pTag);
499 // Make the lt_tag_t follow the new string if different, which
500 // removes default script and such.
501 if (maBcp47 != aOld)
503 if (!lt_tag_parse( MPLANGTAG, pTag, &aError.p))
505 SAL_WARN( "i18npool.langtag", "LanguageTag::canonicalize: could not reparse " << maBcp47);
506 free( pTag);
507 meIsValid = DECISION_NO;
508 return false;
511 free( pTag);
512 meIsValid = DECISION_YES;
513 return true;
516 else
518 SAL_INFO( "i18npool.langtag", "LanguageTag::canonicalize: could not parse " << maBcp47);
520 meIsValid = DECISION_NO;
521 return false;
525 void LanguageTag::convertLocaleToBcp47()
527 if (mbSystemLocale && !mbInitializedLocale)
528 convertLangToLocale();
530 if (maLocale.Language == ISO639_LANGUAGE_TAG)
532 maBcp47 = maLocale.Variant;
533 meIsIsoLocale = DECISION_NO;
535 else
537 /* XXX NOTE: most legacy code never evaluated the Variant field, so for
538 * now just concatenate language and country. In case we stumbled over
539 * variant aware code we'd have to take care of that. */
540 if (maLocale.Country.isEmpty())
541 maBcp47 = maLocale.Language;
542 else
544 OUStringBuffer aBuf( maLocale.Language.getLength() + 1 + maLocale.Country.getLength());
545 aBuf.append( maLocale.Language).append( '-').append( maLocale.Country);
546 maBcp47 = aBuf.makeStringAndClear();
549 mbInitializedBcp47 = true;
553 void LanguageTag::convertLocaleToLang()
555 if (mbSystemLocale)
557 mnLangID = MsLangId::getRealLanguage( LANGUAGE_SYSTEM);
559 else
561 /* FIXME: this is temporary until code base is converted to not use
562 * MsLangId::convert...() anymore. After that, proper new method has to
563 * be implemented to allow ISO639_LANGUAGE_TAG and sript tag and such. */
564 mnLangID = MsLangId::Conversion::convertLocaleToLanguage( maLocale);
566 mbInitializedLangID = true;
570 void LanguageTag::convertBcp47ToLocale()
572 bool bIso = isIsoLocale();
573 if (bIso)
575 maLocale.Language = getLanguageFromLangtag();
576 maLocale.Country = getRegionFromLangtag();
577 maLocale.Variant = OUString();
579 else
581 maLocale.Language = ISO639_LANGUAGE_TAG;
582 maLocale.Country = getCountry();
583 maLocale.Variant = maBcp47;
585 mbInitializedLocale = true;
589 void LanguageTag::convertBcp47ToLang()
591 if (mbSystemLocale)
593 mnLangID = MsLangId::getRealLanguage( LANGUAGE_SYSTEM);
595 else
597 /* FIXME: this is temporary. If we support locales that consist not
598 * only of language and country, e.g. added script, this probably needs
599 * to be adapted. */
600 if (!mbInitializedLocale)
601 convertBcp47ToLocale();
602 convertLocaleToLang();
604 mbInitializedLangID = true;
608 void LanguageTag::convertLangToLocale()
610 if (mbSystemLocale && !mbInitializedLangID)
612 mnLangID = MsLangId::getRealLanguage( LANGUAGE_SYSTEM);
613 mbInitializedLangID = true;
615 /* FIXME: this is temporary until code base is converted to not use
616 * MsLangId::convert...() anymore. After that, proper new method has to be
617 * implemented to allow ISO639_LANGUAGE_TAG and script tag and such. */
618 // Resolve system here!
619 maLocale = MsLangId::Conversion::convertLanguageToLocale( mnLangID, true);
620 mbInitializedLocale = true;
624 void LanguageTag::convertLangToBcp47()
626 /* FIXME: this is temporary. If we support locales that consist not only of
627 * language and country, e.g. added script, this probably needs to be
628 * adapted. */
629 if (!mbInitializedLocale)
630 convertLangToLocale();
631 convertLocaleToBcp47();
632 mbInitializedBcp47 = true;
636 const rtl::OUString & LanguageTag::getBcp47( bool bResolveSystem ) const
638 if (!bResolveSystem && mbSystemLocale)
639 return theEmptyBcp47::get();
640 if (!mbInitializedBcp47)
642 if (mbInitializedLocale)
643 const_cast<LanguageTag*>(this)->convertLocaleToBcp47();
644 else
645 const_cast<LanguageTag*>(this)->convertLangToBcp47();
647 return maBcp47;
651 rtl::OUString LanguageTag::getLanguageFromLangtag()
653 OUString aLanguage;
654 if (meIsLiblangtagNeeded != DECISION_NO && !mpImplLangtag)
655 canonicalize();
656 if (maBcp47.isEmpty())
657 return aLanguage;
658 if (mpImplLangtag)
660 const lt_lang_t* pLangT = lt_tag_get_language( MPLANGTAG);
661 SAL_WARN_IF( !pLangT, "i18npool.langtag", "LanguageTag::getLanguageFromLangtag: pLangT==NULL");
662 if (!pLangT)
663 return aLanguage;
664 const char* pLang = lt_lang_get_tag( pLangT);
665 SAL_WARN_IF( !pLang, "i18npool.langtag", "LanguageTag::getLanguageFromLangtag: pLang==NULL");
666 if (pLang)
667 aLanguage = OUString::createFromAscii( pLang);
669 else
671 if (mbCachedLanguage || cacheSimpleLSC())
672 aLanguage = maCachedLanguage;
674 return aLanguage;
678 rtl::OUString LanguageTag::getScriptFromLangtag()
680 OUString aScript;
681 if (meIsLiblangtagNeeded != DECISION_NO && !mpImplLangtag)
682 canonicalize();
683 if (maBcp47.isEmpty())
684 return aScript;
685 if (mpImplLangtag)
687 const lt_script_t* pScriptT = lt_tag_get_script( MPLANGTAG);
688 // pScriptT==NULL is valid for default scripts
689 if (!pScriptT)
690 return aScript;
691 const char* pScript = lt_script_get_tag( pScriptT);
692 SAL_WARN_IF( !pScript, "i18npool.langtag", "LanguageTag::getScriptFromLangtag: pScript==NULL");
693 if (pScript)
694 aScript = OUString::createFromAscii( pScript);
696 else
698 if (mbCachedScript || cacheSimpleLSC())
699 aScript = maCachedScript;
701 return aScript;
705 rtl::OUString LanguageTag::getRegionFromLangtag()
707 OUString aRegion;
708 if (meIsLiblangtagNeeded != DECISION_NO && !mpImplLangtag)
709 canonicalize();
710 if (maBcp47.isEmpty())
711 return aRegion;
712 if (mpImplLangtag)
714 const lt_region_t* pRegionT = lt_tag_get_region( MPLANGTAG);
715 // pRegionT==NULL is valid for language only tags, rough check here
716 // that does not take sophisticated tags into account that actually
717 // should have a region, check for ll, lll, ll-Ssss and lll-Ssss so
718 // that ll-CC and lll-CC actually fail.
719 SAL_WARN_IF( !pRegionT &&
720 maBcp47.getLength() != 2 && maBcp47.getLength() != 3 &&
721 maBcp47.getLength() != 7 && maBcp47.getLength() != 8,
722 "i18npool.langtag", "LanguageTag::getRegionFromLangtag: pRegionT==NULL");
723 if (!pRegionT)
724 return aRegion;
725 const char* pRegion = lt_region_get_tag( pRegionT);
726 SAL_WARN_IF( !pRegion, "i18npool.langtag", "LanguageTag::getRegionFromLangtag: pRegion==NULL");
727 if (pRegion)
728 aRegion = OUString::createFromAscii( pRegion);
730 else
732 if (mbCachedCountry || cacheSimpleLSC())
733 aRegion = maCachedCountry;
735 return aRegion;
739 const com::sun::star::lang::Locale & LanguageTag::getLocale( bool bResolveSystem ) const
741 if (!bResolveSystem && mbSystemLocale)
742 return theEmptyLocale::get();
743 if (!mbInitializedLocale)
745 if (mbInitializedBcp47)
746 const_cast<LanguageTag*>(this)->convertBcp47ToLocale();
747 else
748 const_cast<LanguageTag*>(this)->convertLangToLocale();
750 return maLocale;
754 LanguageType LanguageTag::getLanguageType( bool bResolveSystem ) const
756 if (!bResolveSystem && mbSystemLocale)
757 return LANGUAGE_SYSTEM;
758 if (!mbInitializedLangID)
760 if (mbInitializedBcp47)
761 const_cast<LanguageTag*>(this)->convertBcp47ToLang();
762 else
763 const_cast<LanguageTag*>(this)->convertLocaleToLang();
765 return mnLangID;
769 void LanguageTag::getIsoLanguageCountry( rtl::OUString& rLanguage, rtl::OUString& rCountry ) const
771 if (!isIsoLocale())
773 rLanguage = OUString();
774 rCountry = OUString();
775 return;
777 // After isIsoLocale() it's safe to call getLanguage() for ISO code.
778 rLanguage = getLanguage();
779 rCountry = getCountry();
783 namespace
786 bool isLowerAscii( sal_Unicode c )
788 return 'a' <= c && c <= 'z';
791 bool isUpperAscii( sal_Unicode c )
793 return 'A' <= c && c <= 'Z';
799 // static
800 bool LanguageTag::isIsoLanguage( const rtl::OUString& rLanguage )
802 /* TODO: ignore case? For now let's see where rubbish is used. */
803 bool b2chars;
804 if (((b2chars = (rLanguage.getLength() == 2)) || rLanguage.getLength() == 3) &&
805 isLowerAscii( rLanguage[0]) && isLowerAscii( rLanguage[1]) &&
806 (b2chars || isLowerAscii( rLanguage[2])))
807 return true;
808 SAL_WARN_IF( ((rLanguage.getLength() == 2 || rLanguage.getLength() == 3) &&
809 (isUpperAscii( rLanguage[0]) || isUpperAscii( rLanguage[1]))) ||
810 (rLanguage.getLength() == 3 && isUpperAscii( rLanguage[2])), "i18npool.langtag",
811 "LanguageTag::isIsoLanguage: rejecting upper case " << rLanguage);
812 return false;
816 // static
817 bool LanguageTag::isIsoCountry( const rtl::OUString& rRegion )
819 /* TODO: ignore case? For now let's see where rubbish is used. */
820 if (rRegion.isEmpty() ||
821 (rRegion.getLength() == 2 && isUpperAscii( rRegion[0]) && isUpperAscii( rRegion[1])))
822 return true;
823 SAL_WARN_IF( rRegion.getLength() == 2 && (isLowerAscii( rRegion[0]) || isLowerAscii( rRegion[1])),
824 "i18npool.langtag", "LanguageTag::isIsoCountry: rejecting lower case " << rRegion);
825 return false;
829 // static
830 bool LanguageTag::isIsoScript( const rtl::OUString& rScript )
832 /* TODO: ignore case? For now let's see where rubbish is used. */
833 if (rScript.isEmpty() ||
834 (rScript.getLength() == 4 &&
835 isUpperAscii( rScript[0]) && isLowerAscii( rScript[1]) &&
836 isLowerAscii( rScript[2]) && isLowerAscii( rScript[3])))
837 return true;
838 SAL_WARN_IF( rScript.getLength() == 4 &&
839 (isLowerAscii( rScript[0]) || isUpperAscii( rScript[1]) ||
840 isUpperAscii( rScript[2]) || isUpperAscii( rScript[3])),
841 "i18npool.langtag", "LanguageTag::isIsoScript: rejecting case mismatch " << rScript);
842 return false;
846 rtl::OUString LanguageTag::getLanguage() const
848 if (!mbCachedLanguage)
850 maCachedLanguage = const_cast<LanguageTag*>(this)->getLanguageFromLangtag();
851 mbCachedLanguage = true;
853 return maCachedLanguage;
857 rtl::OUString LanguageTag::getScript() const
859 if (!mbCachedScript)
861 maCachedScript = const_cast<LanguageTag*>(this)->getScriptFromLangtag();
862 mbCachedScript = true;
864 return maCachedScript;
868 rtl::OUString LanguageTag::getLanguageAndScript() const
870 OUString aLanguageScript( getLanguage());
871 OUString aScript( getScript());
872 if (!aScript.isEmpty())
874 OUStringBuffer aBuf( aLanguageScript.getLength() + 1 + aScript.getLength());
875 aBuf.append( aLanguageScript).append( '-').append( aScript);
876 aLanguageScript = aBuf.makeStringAndClear();
878 return aLanguageScript;
882 rtl::OUString LanguageTag::getCountry() const
884 if (!mbCachedCountry)
886 maCachedCountry = const_cast<LanguageTag*>(this)->getRegionFromLangtag();
887 if (!isIsoCountry( maCachedCountry))
888 maCachedCountry = OUString();
889 mbCachedCountry = true;
891 return maCachedCountry;
895 rtl::OUString LanguageTag::getRegion() const
897 return const_cast<LanguageTag*>(this)->getRegionFromLangtag();
901 bool LanguageTag::cacheSimpleLSC()
903 OUString aLanguage, aScript, aCountry;
904 bool bRet = simpleExtract( maBcp47, aLanguage, aScript, aCountry);
905 if (bRet)
907 maCachedLanguage = aLanguage;
908 maCachedScript = aScript;
909 maCachedCountry = aCountry;
910 mbCachedLanguage = mbCachedScript = mbCachedCountry = true;
912 return bRet;
916 bool LanguageTag::isIsoLocale() const
918 if (meIsIsoLocale == DECISION_DONTKNOW)
920 if (meIsLiblangtagNeeded != DECISION_NO && !mpImplLangtag)
921 const_cast<LanguageTag*>(this)->canonicalize();
922 // It must be at most ll-CC or lll-CC
923 // Do not use getCountry() here, use getRegion() instead.
924 meIsIsoLocale = ((maBcp47.isEmpty() ||
925 (maBcp47.getLength() <= 6 && isIsoLanguage( getLanguage()) && isIsoCountry( getRegion()))) ?
926 DECISION_YES : DECISION_NO);
928 return meIsIsoLocale == DECISION_YES;
932 bool LanguageTag::isIsoODF() const
934 if (meIsIsoODF == DECISION_DONTKNOW)
936 if (meIsLiblangtagNeeded != DECISION_NO && !mpImplLangtag)
937 const_cast<LanguageTag*>(this)->canonicalize();
938 if (!isIsoScript( getScript()))
939 return ((meIsIsoODF = DECISION_NO) == DECISION_YES);
940 // The usual case is lll-CC so simply check that first.
941 if (isIsoLocale())
942 return ((meIsIsoODF = DECISION_YES) == DECISION_YES);
943 // If this is not ISO locale for which script must not exist it can
944 // still be ISO locale plus ISO script lll-Ssss-CC
945 meIsIsoODF = ((maBcp47.getLength() <= 11 &&
946 isIsoLanguage( getLanguage()) && isIsoCountry( getRegion()) && isIsoScript( getScript())) ?
947 DECISION_YES : DECISION_NO);
949 return meIsIsoODF == DECISION_YES;
953 bool LanguageTag::isValidBcp47() const
955 if (meIsValid == DECISION_DONTKNOW)
957 if (meIsLiblangtagNeeded != DECISION_NO && !mpImplLangtag)
958 const_cast<LanguageTag*>(this)->canonicalize();
959 SAL_WARN_IF( meIsValid == DECISION_DONTKNOW, "i18npool.langtag",
960 "LanguageTag::isValidBcp47: canonicalize() didn't set meIsValid");
962 return meIsValid == DECISION_YES;
966 bool LanguageTag::isSystemLocale() const
968 return mbSystemLocale;
972 LanguageTag & LanguageTag::makeFallback()
974 if (!mbIsFallback)
976 if (mbInitializedLangID)
978 LanguageType nLang1 = getLanguageType();
979 LanguageType nLang2 = MsLangId::Conversion::lookupFallbackLanguage( nLang1);
980 if (nLang1 != nLang2)
981 reset( nLang2);
983 else
985 const lang::Locale& rLocale1 = getLocale();
986 lang::Locale aLocale2( MsLangId::Conversion::lookupFallbackLocale( rLocale1));
987 if ( rLocale1.Language != aLocale2.Language ||
988 rLocale1.Country != aLocale2.Country ||
989 rLocale1.Variant != aLocale2.Variant)
990 reset( aLocale2);
992 mbIsFallback = true;
994 return *this;
998 bool LanguageTag::operator==( const LanguageTag & rLanguageTag ) const
1000 // Compare full language tag strings but SYSTEM unresolved.
1001 return getBcp47( false) == rLanguageTag.getBcp47( false);
1005 bool LanguageTag::operator!=( const LanguageTag & rLanguageTag ) const
1007 return !operator==( rLanguageTag);
1011 // static
1012 bool LanguageTag::simpleExtract( const rtl::OUString& rBcp47,
1013 rtl::OUString& rLanguage,
1014 rtl::OUString& rScript,
1015 rtl::OUString& rCountry )
1017 bool bRet = false;
1018 const sal_Int32 nLen = rBcp47.getLength();
1019 const sal_Int32 nHyph1 = rBcp47.indexOf( '-');
1020 if ((nLen == 2 || nLen == 3) && nHyph1 < 0) // ll or lll
1022 rLanguage = rBcp47;
1023 rScript = rCountry = OUString();
1024 bRet = true;
1026 else if ( (nLen == 5 && nHyph1 == 2) // ll-CC
1027 || (nLen == 6 && nHyph1 == 3)) // lll-CC
1029 rLanguage = rBcp47.copy( 0, nHyph1);
1030 rCountry = rBcp47.copy( nHyph1 + 1, 2);
1031 rScript = OUString();
1032 bRet = true;
1034 else if ( (nHyph1 == 2 && nLen == 10) // ll-Ssss-CC check
1035 || (nHyph1 == 3 && nLen == 11)) // lll-Ssss-CC check
1037 const sal_Int32 nHyph2 = rBcp47.indexOf( '-', nHyph1 + 1);
1038 if (nHyph2 == nHyph1 + 5)
1040 rLanguage = rBcp47.copy( 0, nHyph1);
1041 rScript = rBcp47.copy( nHyph1 + 1, 4);
1042 rCountry = rBcp47.copy( nHyph2 + 1, 2);
1043 bRet = true;
1046 if (!bRet)
1047 rLanguage = rScript = rCountry = OUString();
1048 return bRet;
1052 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */