1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
3 * This file is part of the LibreOffice project.
5 * This Source Code Form is subject to the terms of the Mozilla Public
6 * License, v. 2.0. If a copy of the MPL was not distributed with this
7 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
10 #ifndef INCLUDED_I18NLANGTAG_LANGUAGETAG_HXX
11 #define INCLUDED_I18NLANGTAG_LANGUAGETAG_HXX
13 #include <sal/config.h>
14 #include <rtl/locale.h>
15 #include <rtl/ustring.hxx>
16 #include <com/sun/star/lang/Locale.hpp>
17 #include <i18nlangtag/i18nlangtagdllapi.h>
18 #include <i18nlangtag/lang.h>
23 /** The ISO 639-2 code reserved for local use used to indicate that a
24 css::Locale contains a BCP 47 string in its Variant field. The
25 Locale's Language field then will contain this language code.
27 @see LanguageTag::getLocale()
29 Avoid use, only needed internally or if conversion from Locale to
30 LanguageTag is not wanted, i.e. during ODF import. To check whether a
31 LanguageTag contains a plain language/country combination or a more
32 detailed BCP 47 language tag use LanguageTag::isIsoLocale() instead.
34 #define I18NLANGTAG_QLT "qlt"
37 class LanguageTagImpl
;
40 /** Wrapper for liblangtag BCP 47 language tags, MS-LangIDs, locales and
41 conversions in between.
43 Note that member variables are mutable and may change their values even in
44 const methods. Getter methods return either the original value or matching
47 For standalone conversions if no LanguageTag instance is at hand, static
48 convertTo...() methods exist.
50 class SAL_WARN_UNUSED I18NLANGTAG_DLLPUBLIC LanguageTag
52 friend class LanguageTagImpl
;
56 /** ScriptType for a language.
58 Used only in onTheFly languages as a way of marking key script behaviours
59 for the script of the language without having to store and analyse the
60 script each time. Used primarily from msLangId.
62 These need to correspond to the ExtraLanguages.ScriptType template
63 property in officecfg/registry/schema/org/openoffice/VCL.xcs
68 WESTERN
= 1, // Copies css::i18n::ScriptType for strong types
71 RTL
= 4 // implies CTL
74 /** Init LanguageTag with existing BCP 47 language tag string.
77 If TRUE, canonicalize tag and reparse, the resulting tag string may
79 IF FALSE, the tag is simply stored and can be retrieved with
82 Note that conversions to ISO codes, locales or LanguageType or
83 obtaining language or script will canonicalize the tag string anyway,
84 so specifying bCanonicalize=false is not a guarantee that the tag will
85 stay identical to what was passed.
87 explicit LanguageTag( const OUString
& rBcp47LanguageTag
, bool bCanonicalize
= false );
89 /** Init LanguageTag with Locale. */
90 explicit LanguageTag( const css::lang::Locale
& rLocale
);
92 /** Init LanguageTag with LanguageType MS-LangID. */
93 explicit LanguageTag( LanguageType nLanguage
);
95 /** Init LanguageTag with either BCP 47 language tag (precedence if not
96 empty), or a combination of language, script and country.
98 This is a convenience ctor to be used in ODF import where these are
101 explicit LanguageTag( const OUString
& rBcp47
, const OUString
& rLanguage
,
102 const OUString
& rScript
, const OUString
& rCountry
);
104 /** Init LanguageTag with rtl_Locale.
106 This is a convenience ctor.
108 explicit LanguageTag( const rtl_Locale
& rLocale
);
110 LanguageTag( const LanguageTag
& rLanguageTag
);
112 LanguageTag
& operator=( const LanguageTag
& rLanguageTag
);
114 /** Obtain BCP 47 language tag.
116 @param bResolveSystem
117 If TRUE, resolve an empty language tag denoting the system
118 locale to the real locale used.
119 If FALSE, return an empty OUString for such a tag.
121 const OUString
& getBcp47( bool bResolveSystem
= true ) const;
123 /** Obtain language tag as Locale.
125 As a convention, language tags that can not be expressed as "pure"
126 css::lang::Locale content using Language and Country fields
127 store "qlt" (ISO 639 reserved for local use) in the Language field and
128 the entire BCP 47 language tag in the Variant field. The Country field
129 contains the corresponding ISO 3166 country code _if_ there is one, or
132 @param bResolveSystem
133 If TRUE, resolve an empty language tag denoting the system
134 locale to the real locale used.
135 If FALSE, return an empty Locale for such a tag.
137 const css::lang::Locale
& getLocale( bool bResolveSystem
= true ) const;
139 /** Obtain mapping to MS-LangID.
141 @param bResolveSystem
142 If TRUE, resolve an empty language tag denoting the system
143 locale to the real locale used.
144 If FALSE, return LANGUAGE_SYSTEM for such a tag.
146 LanguageType
getLanguageType( bool bResolveSystem
= true ) const;
148 /** Obtain ISO strings for language, script and country.
150 This is a convenience method for ODF export places only. Avoid use in
153 ATTENTION! May return empty strings if the language tag is not
154 expressable in valid ISO codes!
158 Always resolves an empty tag to the system locale.
160 void getIsoLanguageScriptCountry( OUString
& rLanguage
,
161 OUString
& rScript
, OUString
& rCountry
) const;
163 /** Get ISO 639 language code, or BCP 47 language.
165 Always resolves an empty tag to the system locale.
167 OUString
getLanguage() const;
169 /** Get ISO 15924 script code, if not the default script according to
170 BCP 47. For default script an empty string is returned.
174 Always resolves an empty tag to the system locale.
176 OUString
getScript() const;
178 /** Get combined language and script code, separated by '-' if
179 non-default script, if default script only language.
183 Always resolves an empty tag to the system locale.
185 OUString
getLanguageAndScript() const;
187 /** Get ISO 3166 country alpha code. Empty if the BCP 47 tags denote a
188 region not expressable as 2 character country code.
190 Always resolves an empty tag to the system locale.
192 OUString
getCountry() const;
194 /** Get BCP 47 variant subtags, of the IANA Language Subtag Registry.
196 If there are multiple variant subtags they are separated by '-'.
198 This is NOT related to Locale.Variant!
200 Always resolves an empty tag to the system locale.
202 OUString
getVariants() const;
204 /** Get a GLIBC locale string.
206 Always resolves an empty tag to the system locale.
209 An encoding to be appended to language_country, for example
210 ".UTF-8" including the dot.
212 @return The resulting GLIBC locale string if it could be constructed,
213 if not an empty string is returned.
215 OUString
getGlibcLocaleString( const OUString
& rEncoding
) const;
217 /** If language tag has a non-default script specified.
219 bool hasScript() const;
221 /** If language tag is a locale that can be expressed using only ISO 639
222 language codes and ISO 3166 country codes, thus is convertible to a
223 conforming Locale struct without using extension mechanisms.
225 Note that an empty language tag or empty Locale::Language field or
226 LanguageType LANGUAGE_SYSTEM could be treated as a valid ISO locale in
227 some context, but here is not. If you want that ask for
228 aTag.isSystemLocale() || aTag.isIsoLocale()
230 Always resolves an empty tag to the system locale.
232 bool isIsoLocale() const;
234 /** If language tag is a locale that can be expressed using only ISO 639
235 language codes and ISO 15924 script codes and ISO 3166 country codes,
236 thus can be stored in an ODF document using only fo:language, fo:script
237 and fo:country attributes. If this is FALSE, the locale must be stored
238 as a <*:rfc-language-tag> element.
240 Always resolves an empty tag to the system locale.
242 bool isIsoODF() const;
244 /** If this is a valid BCP 47 language tag.
246 Always resolves an empty tag to the system locale.
248 @seealso static bool isValidBcp47(const OUString&)
250 bool isValidBcp47() const;
252 /** If this tag was contructed as an empty tag denoting the system locale.
254 bool isSystemLocale() const { return mbSystemLocale
;}
256 /** Returns the script type for this language, UNKNOWN if not set */
257 ScriptType
getScriptType() const;
259 /** Sets the script type for this language */
260 void setScriptType(ScriptType st
);
262 /** Reset with existing BCP 47 language tag string. See ctor. */
263 LanguageTag
& reset( const OUString
& rBcp47LanguageTag
);
265 /** Reset with Locale. */
266 LanguageTag
& reset( const css::lang::Locale
& rLocale
);
268 /** Reset with LanguageType MS-LangID. */
269 LanguageTag
& reset( LanguageType nLanguage
);
272 /** Fall back to a known locale.
274 If the current tag does not represent a known (by us) locale, fall back
275 to the most likely locale possible known.
276 If the current tag is known, no change occurs.
278 LanguageTag
& makeFallback();
280 /** Return a vector of fall-back strings.
283 full BCP 47 tag, same as getBcp47()
289 If the tag includes variants the order is:
290 full BCP 47 tag, same as getBcp47()
300 Only strings that differ from a higher order are included, for example
301 if there is no script the elements will be bcp47, lll-CC, lll; if the
302 bcp47 string is identical to lll-CC then only lll-CC, lll.
304 Note that lll is only ISO 639-1/2 alpha code and CC is only ISO 3166
305 alpha code. If the region can not be expressed as ISO 3166 then no -CC
308 @param bIncludeFullBcp47
309 If TRUE, the full BCP 47 tag is included as first element.
310 If FALSE, the full tag is not included; used if the caller
311 obtains the fallbacks only if the full tag did not lead to a
312 match, so subsequent tries need not to include it again.
314 ::std::vector
< OUString
> getFallbackStrings( bool bIncludeFullBcp47
) const;
317 /** @short Search for an equal or at least for a similar locale in a list
320 @descr First search for a locale that is equal to the reference
321 locale. (means: same BCP47 string)
323 If the reference locale could not be located, check for
324 "similar" locales, in the same order as obtained by
325 getFallbackStrings().
327 If no similar locale could be located, we search for a locale
328 "en-US" inside the given locale list.
330 If "en-US" could not be located, we search for a locale "en"
331 inside the given list.
333 If no "same" nor any "similar" locale could be found, we try
334 "x-default" and "x-no-translate" explicitly. Sometimes
335 variables don't use real localization. For example, in case the
336 localized value is a fix product name.
338 If no locale matched until then, we use any other locale that
339 exists inside the set of given ones, namely the first
343 the vector of possible locales as BCP47 strings.
346 the reference locale, BCP47 string.
348 @return An iterator that points to the found element inside the given
349 locale list. If no matching locale could be found it points to
350 the beginning of the list.
352 static ::std::vector
< OUString
>::const_iterator
getFallback( const ::std::vector
< OUString
> & rList
,
353 const OUString
& rReference
);
356 /** @short Search for an equal or for a similar locale in a list
357 of possible ones where at least the language matches.
359 @descr First search for a locale that is equal to the reference
362 If the reference locale could not be located, check for
363 "similar" locales, in the same order as obtained by
364 getFallbackStrings().
366 If no locale matches, rList.end() is returned.
369 the vector of possible locales.
372 the reference locale.
374 @return An iterator that points to the found element inside the given
375 locale list. If no matching locale could be found it points to
378 static ::std::vector
< css::lang::Locale
>::const_iterator
getMatchingFallback(
379 const ::std::vector
< css::lang::Locale
> & rList
,
380 const css::lang::Locale
& rReference
);
383 /** Test equality of two LanguageTag, possibly resolving system locale.
385 Resolve empty language tags denoting the system
386 locale to the real locale used before comparing.
388 bool equals( const LanguageTag
& rLanguageTag
) const;
390 /** Test equality of two LanguageTag.
392 Does NOT resolve system, i.e. if the system locale is en-US
393 LanguageTag("")==LanguageTag("en-US") returns false! Use
394 equals(...) instead if system locales shall be resolved.
396 bool operator==( const LanguageTag
& rLanguageTag
) const;
398 /** Test inequality of two LanguageTag.
400 Does NOT resolve system, i.e. if the system locale is en-US
401 LanguageTag("")!=LanguageTag("en-US") returns true! Use
402 !equals(,..) instead if system locales shall be resolved.
404 bool operator!=( const LanguageTag
& rLanguageTag
) const;
406 /** Test this LanguageTag less than that LanguageTag.
408 For sorted containers. Does NOT resolve system.
410 bool operator<( const LanguageTag
& rLanguageTag
) const;
412 /** Convert MS-LangID to Locale.
414 @param bResolveSystem
415 If TRUE, resolve an empty language tag denoting the system
416 locale to the real locale used.
417 If FALSE, return an empty Locale for such a tag.
419 static css::lang::Locale
convertToLocale( LanguageType nLangID
, bool bResolveSystem
= true );
421 /** Convert Locale to MS-LangID.
423 @param bResolveSystem
424 If TRUE, resolve an empty language tag denoting the system
425 locale to the real locale used.
426 If FALSE, return LANGUAGE_SYSTEM for such a tag.
428 static LanguageType
convertToLanguageType( const css::lang::Locale
& rLocale
, bool bResolveSystem
= true );
430 /** Convert MS-LangID to BCP 47 string.
432 Resolve an empty language tag denoting the system
433 locale to the real locale used.
435 static OUString
convertToBcp47( LanguageType nLangID
);
437 /** Convert Locale to BCP 47 string.
439 @param bResolveSystem
440 If TRUE, resolve an empty language tag denoting the system
441 locale to the real locale used.
442 If FALSE, return an empty OUString for such a tag.
444 static OUString
convertToBcp47( const css::lang::Locale
& rLocale
, bool bResolveSystem
= true );
446 /** Convert BCP 47 string to Locale, convenience method.
448 NOTE: exists only for consistency with the other convertTo...()
449 methods, internally uses a temporary LanguageTag instance for
450 conversion so does not save anything compared to
451 LanguageTag(rBcp47).getLocale(bResolveSystem).
453 @param bResolveSystem
454 If TRUE, resolve an empty language tag denoting the system
455 locale to the real locale used.
456 If FALSE, return an empty Locale for such a tag.
458 static css::lang::Locale
convertToLocale( const OUString
& rBcp47
, bool bResolveSystem
= true );
460 /** Convert BCP 47 string to MS-LangID, convenience method.
462 NOTE: exists only for consistency with the other convertTo...()
463 methods, internally uses a temporary LanguageTag instance for
464 conversion so does not save anything compared to
465 LanguageTag(rBcp47).getLanguageType(bResolveSystem).
467 Resolve an empty language tag denoting the system
468 locale to the real locale used.
470 static LanguageType
convertToLanguageType( const OUString
& rBcp47
);
472 /** Convert BCP 47 string to MS-LangID with fallback, convenience method.
474 NOTE: exists only for consistency with the other convertTo...()
475 methods, internally uses a temporary LanguageTag instance for
476 conversion so does not save anything compared to
477 LanguageTag(rBcp47).makeFallback().getLanguageType(bResolveSystem).
481 Always resolves an empty tag to the system locale.
483 static LanguageType
convertToLanguageTypeWithFallback( const OUString
& rBcp47
);
485 /** Convert BCP 47 string to Locale with fallback, convenience method.
487 NOTE: exists only for consistency with the other convertTo...()
488 methods, internally uses a temporary LanguageTag instance for
489 conversion so does not save anything compared to
490 LanguageTag(rBcp47).makeFallback().getLocale(bResolveSystem).
494 Always resolves an empty tag to the system locale.
496 static css::lang::Locale
convertToLocaleWithFallback( const OUString
& rBcp47
);
498 /** If rString represents a valid BCP 47 language tag.
500 Never resolves an empty tag to the system locale, in fact an empty
501 string is invalid here. Does not create an instance to be registered
502 with a conversion to Locale or LanguageType.
504 @param o_pCanonicalized
505 If given and rString is a valid BCP 47 language tag, the
506 canonicalized form is assigned, which may differ from the
507 original string even if that was a valid tag. If rString is not
508 a valid tag, nothing is assigned.
510 @param bDisallowPrivate
511 If TRUE, valid tags according to BCP 47 but reserved for
512 private use, like 'x-...', are not allowed and FALSE is
513 returned in this case.
515 static bool isValidBcp47( const OUString
& rString
, OUString
* o_pCanonicalized
,
516 bool bDisallowPrivate
= false );
518 /** If nLang is a generated on-the-fly LangID */
519 static bool isOnTheFlyID( LanguageType nLang
);
520 static ScriptType
getOnTheFlyScriptType( LanguageType nLang
);
522 /** @ATTENTION: _ONLY_ to be called by the application's configuration! */
523 static void setConfiguredSystemLanguage( LanguageType nLang
);
525 typedef std::shared_ptr
< LanguageTagImpl
> ImplPtr
;
529 mutable css::lang::Locale maLocale
;
530 mutable OUString maBcp47
;
531 mutable LanguageType mnLangID
;
532 mutable ImplPtr mpImpl
;
533 bool mbSystemLocale
: 1;
534 mutable bool mbInitializedBcp47
: 1;
535 mutable bool mbInitializedLocale
: 1;
536 mutable bool mbInitializedLangID
: 1;
537 bool mbIsFallback
: 1;
539 ImplPtr
const & getImpl() const;
540 ImplPtr
registerImpl() const;
542 void syncVarsFromRawImpl() const;
543 void syncVarsFromImpl() const;
545 void convertLocaleToLang();
546 void convertBcp47ToLocale();
547 void convertBcp47ToLang();
548 void convertLangToLocale();
550 void convertFromRtlLocale();
552 /** Canonicalize if not yet done and synchronize initialized conversions.
554 @return whether BCP 47 language tag string was changed.
556 bool synCanonicalize();
560 static bool isIsoLanguage( const OUString
& rLanguage
);
561 static bool isIsoScript( const OUString
& rScript
);
562 static bool isIsoCountry( const OUString
& rRegion
);
566 #endif // INCLUDED_I18NLANGTAG_LANGUAGETAG_HXX
568 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */