Bump version to 6.4-15
[LibreOffice.git] / include / i18nlangtag / languagetag.hxx
blob2d102a500c27ea2eb9fe2f74fcc21513b95e0cd1
1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2 /*
3 * This file is part of the LibreOffice project.
5 * This Source Code Form is subject to the terms of the Mozilla Public
6 * License, v. 2.0. If a copy of the MPL was not distributed with this
7 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
8 */
10 #ifndef INCLUDED_I18NLANGTAG_LANGUAGETAG_HXX
11 #define INCLUDED_I18NLANGTAG_LANGUAGETAG_HXX
13 #include <sal/config.h>
14 #include <rtl/locale.h>
15 #include <rtl/ustring.hxx>
16 #include <com/sun/star/lang/Locale.hpp>
17 #include <i18nlangtag/i18nlangtagdllapi.h>
18 #include <i18nlangtag/lang.h>
20 #include <memory>
21 #include <vector>
23 /** The ISO 639-2 code reserved for local use used to indicate that a
24 css::Locale contains a BCP 47 string in its Variant field. The
25 Locale's Language field then will contain this language code.
27 @see LanguageTag::getLocale()
29 Avoid use, only needed internally or if conversion from Locale to
30 LanguageTag is not wanted, i.e. during ODF import. To check whether a
31 LanguageTag contains a plain language/country combination or a more
32 detailed BCP 47 language tag use LanguageTag::isIsoLocale() instead.
34 #define I18NLANGTAG_QLT "qlt"
37 class LanguageTagImpl;
40 /** Wrapper for liblangtag BCP 47 language tags, MS-LangIDs, locales and
41 conversions in between.
43 Note that member variables are mutable and may change their values even in
44 const methods. Getter methods return either the original value or matching
45 converted values.
47 For standalone conversions if no LanguageTag instance is at hand, static
48 convertTo...() methods exist.
50 class SAL_WARN_UNUSED I18NLANGTAG_DLLPUBLIC LanguageTag
52 friend class LanguageTagImpl;
54 public:
56 /** ScriptType for a language.
58 Used only in onTheFly languages as a way of marking key script behaviours
59 for the script of the language without having to store and analyse the
60 script each time. Used primarily from msLangId.
62 These need to correspond to the ExtraLanguages.ScriptType template
63 property in officecfg/registry/schema/org/openoffice/VCL.xcs
65 enum class ScriptType
67 UNKNOWN = 0,
68 WESTERN = 1, // Copies css::i18n::ScriptType for strong types
69 CJK = 2,
70 CTL = 3,
71 RTL = 4 // implies CTL
74 /** Init LanguageTag with existing BCP 47 language tag string.
76 @param bCanonicalize
77 If TRUE, canonicalize tag and reparse, the resulting tag string may
78 be different.
79 IF FALSE, the tag is simply stored and can be retrieved with
80 getBcp47().
82 Note that conversions to ISO codes, locales or LanguageType or
83 obtaining language or script will canonicalize the tag string anyway,
84 so specifying bCanonicalize=false is not a guarantee that the tag will
85 stay identical to what was passed.
87 explicit LanguageTag( const OUString & rBcp47LanguageTag, bool bCanonicalize = false );
89 /** Init LanguageTag with Locale. */
90 explicit LanguageTag( const css::lang::Locale & rLocale );
92 /** Init LanguageTag with LanguageType MS-LangID. */
93 explicit LanguageTag( LanguageType nLanguage );
95 /** Init LanguageTag with either BCP 47 language tag (precedence if not
96 empty), or a combination of language, script and country.
98 This is a convenience ctor to be used in ODF import where these are
99 distinct attributes.
101 explicit LanguageTag( const OUString& rBcp47, const OUString& rLanguage,
102 const OUString& rScript, const OUString& rCountry );
104 /** Init LanguageTag with rtl_Locale.
106 This is a convenience ctor.
108 explicit LanguageTag( const rtl_Locale & rLocale );
110 ~LanguageTag();
112 /** Obtain BCP 47 language tag.
114 @param bResolveSystem
115 If TRUE, resolve an empty language tag denoting the system
116 locale to the real locale used.
117 If FALSE, return an empty OUString for such a tag.
119 const OUString & getBcp47( bool bResolveSystem = true ) const;
121 /** Obtain BCP 47 language tag, but with MS malformed exceptions.
123 To be used *only* in OOXML filter context.
124 For example, es-ES-u-co-trad is stored as es-ES_tradnl which is not a
125 valid BCP 47 language tag.
127 OUString getBcp47MS() const;
129 /** Obtain language tag as Locale.
131 As a convention, language tags that can not be expressed as "pure"
132 css::lang::Locale content using Language and Country fields
133 store "qlt" (ISO 639 reserved for local use) in the Language field and
134 the entire BCP 47 language tag in the Variant field. The Country field
135 contains the corresponding ISO 3166 country code _if_ there is one, or
136 otherwise is empty.
138 @param bResolveSystem
139 If TRUE, resolve an empty language tag denoting the system
140 locale to the real locale used.
141 If FALSE, return an empty Locale for such a tag.
143 const css::lang::Locale & getLocale( bool bResolveSystem = true ) const;
145 /** Obtain mapping to MS-LangID.
147 @param bResolveSystem
148 If TRUE, resolve an empty language tag denoting the system
149 locale to the real locale used.
150 If FALSE, return LANGUAGE_SYSTEM for such a tag.
152 LanguageType getLanguageType( bool bResolveSystem = true ) const;
154 /** Obtain ISO strings for language, script and country.
156 This is a convenience method for ODF export places only. Avoid use in
157 other code.
159 ATTENTION! May return empty strings if the language tag is not
160 expressible in valid ISO codes!
162 @see isIsoODF()
164 Always resolves an empty tag to the system locale.
166 void getIsoLanguageScriptCountry( OUString& rLanguage,
167 OUString& rScript, OUString& rCountry ) const;
169 /** Get ISO 639 language code, or BCP 47 language.
171 Always resolves an empty tag to the system locale.
173 OUString getLanguage() const;
175 /** Get ISO 15924 script code, if not the default script according to
176 BCP 47. For default script an empty string is returned.
178 @see hasScript()
180 Always resolves an empty tag to the system locale.
182 OUString getScript() const;
184 /** Get combined language and script code, separated by '-' if
185 non-default script, if default script only language.
187 @see hasScript()
189 Always resolves an empty tag to the system locale.
191 OUString getLanguageAndScript() const;
193 /** Get ISO 3166 country alpha code. Empty if the BCP 47 tags denote a
194 region not expressible as 2 character country code.
196 Always resolves an empty tag to the system locale.
198 OUString getCountry() const;
200 /** Get BCP 47 variant subtags, of the IANA Language Subtag Registry.
202 If there are multiple variant subtags they are separated by '-'.
204 This is NOT related to Locale.Variant!
206 Always resolves an empty tag to the system locale.
208 OUString getVariants() const;
210 /** Get a GLIBC locale string.
212 Always resolves an empty tag to the system locale.
214 @param rEncoding
215 An encoding to be appended to language_country, for example
216 ".UTF-8" including the dot.
218 @return The resulting GLIBC locale string if it could be constructed,
219 if not an empty string is returned.
221 OUString getGlibcLocaleString( const OUString & rEncoding ) const;
223 /** If language tag has a non-default script specified.
225 bool hasScript() const;
227 /** If language tag is a locale that can be expressed using only ISO 639
228 language codes and ISO 3166 country codes, thus is convertible to a
229 conforming Locale struct without using extension mechanisms.
231 Note that an empty language tag or empty Locale::Language field or
232 LanguageType LANGUAGE_SYSTEM could be treated as a valid ISO locale in
233 some context, but here is not. If you want that ask for
234 aTag.isSystemLocale() || aTag.isIsoLocale()
236 Always resolves an empty tag to the system locale.
238 bool isIsoLocale() const;
240 /** If language tag is a locale that can be expressed using only ISO 639
241 language codes and ISO 15924 script codes and ISO 3166 country codes,
242 thus can be stored in an ODF document using only fo:language, fo:script
243 and fo:country attributes. If this is FALSE, the locale must be stored
244 as a <*:rfc-language-tag> element.
246 Always resolves an empty tag to the system locale.
248 bool isIsoODF() const;
250 /** If this is a valid BCP 47 language tag.
252 Always resolves an empty tag to the system locale.
254 @seealso static bool isValidBcp47(const OUString&)
256 bool isValidBcp47() const;
258 /** If this tag was constructed as an empty tag denoting the system locale.
260 bool isSystemLocale() const { return mbSystemLocale;}
262 /** Returns the script type for this language, UNKNOWN if not set */
263 ScriptType getScriptType() const;
265 /** Sets the script type for this language */
266 void setScriptType(ScriptType st);
268 /** Reset with existing BCP 47 language tag string. See ctor. */
269 LanguageTag & reset( const OUString & rBcp47LanguageTag );
271 /** Reset with Locale. */
272 LanguageTag & reset( const css::lang::Locale & rLocale );
274 /** Reset with LanguageType MS-LangID. */
275 LanguageTag & reset( LanguageType nLanguage );
278 /** Fall back to a known locale.
280 If the current tag does not represent a known (by us) locale, fall back
281 to the most likely locale possible known.
282 If the current tag is known, no change occurs.
284 LanguageTag & makeFallback();
286 /** Return a vector of fall-back strings.
288 In order:
289 full BCP 47 tag, same as getBcp47()
290 lll-Ssss-CC
291 lll-Ssss
292 lll-CC
295 If the tag includes variants the order is:
296 full BCP 47 tag, same as getBcp47()
297 lll-Ssss-CC-vvvvvvvv
298 lll-Ssss-vvvvvvvv
299 lll-Ssss-CC
300 lll-Ssss
301 lll-CC-vvvvvvvv
302 lll-vvvvvvvv
303 lll-CC
306 Only strings that differ from a higher order are included, for example
307 if there is no script the elements will be bcp47, lll-CC, lll; if the
308 bcp47 string is identical to lll-CC then only lll-CC, lll.
310 Note that lll is only ISO 639-1/2 alpha code and CC is only ISO 3166
311 alpha code. If the region can not be expressed as ISO 3166 then no -CC
312 tags are included.
314 @param bIncludeFullBcp47
315 If TRUE, the full BCP 47 tag is included as first element.
316 If FALSE, the full tag is not included; used if the caller
317 obtains the fallbacks only if the full tag did not lead to a
318 match, so subsequent tries need not to include it again.
320 ::std::vector< OUString > getFallbackStrings( bool bIncludeFullBcp47 ) const;
323 /** @short Search for an equal or at least for a similar locale in a list
324 of possible ones.
326 @descr First search for a locale that is equal to the reference
327 locale. (means: same BCP47 string)
329 If the reference locale could not be located, check for
330 "similar" locales, in the same order as obtained by
331 getFallbackStrings().
333 If no similar locale could be located, we search for a locale
334 "en-US" inside the given locale list.
336 If "en-US" could not be located, we search for a locale "en"
337 inside the given list.
339 If no "same" nor any "similar" locale could be found, we try
340 "x-default" and "x-no-translate" explicitly. Sometimes
341 variables don't use real localization. For example, in case the
342 localized value is a fix product name.
344 If no locale matched until then, we use any other locale that
345 exists inside the set of given ones, namely the first
346 encountered!
348 @param rList
349 the vector of possible locales as BCP47 strings.
351 @param rReference
352 the reference locale, BCP47 string.
354 @return An iterator that points to the found element inside the given
355 locale list. If no matching locale could be found it points to
356 the beginning of the list.
358 static ::std::vector< OUString >::const_iterator getFallback( const ::std::vector< OUString > & rList,
359 const OUString & rReference );
362 /** @short Search for an equal or for a similar locale in a list
363 of possible ones where at least the language matches.
365 @descr First search for a locale that is equal to the reference
366 locale.
368 If the reference locale could not be located, check for
369 "similar" locales, in the same order as obtained by
370 getFallbackStrings().
372 If no locale matches, rList.end() is returned.
374 @param rList
375 the vector of possible locales.
377 @param rReference
378 the reference locale.
380 @return An iterator that points to the found element inside the given
381 locale list. If no matching locale could be found it points to
382 the end of the list.
384 static ::std::vector< css::lang::Locale >::const_iterator getMatchingFallback(
385 const ::std::vector< css::lang::Locale > & rList,
386 const css::lang::Locale & rReference );
389 /** Test equality of two LanguageTag, possibly resolving system locale.
391 Resolve empty language tags denoting the system
392 locale to the real locale used before comparing.
394 bool equals( const LanguageTag & rLanguageTag ) const;
396 /** Test equality of two LanguageTag.
398 Does NOT resolve system, i.e. if the system locale is en-US
399 LanguageTag("")==LanguageTag("en-US") returns false! Use
400 equals(...) instead if system locales shall be resolved.
402 bool operator==( const LanguageTag & rLanguageTag ) const;
404 /** Test inequality of two LanguageTag.
406 Does NOT resolve system, i.e. if the system locale is en-US
407 LanguageTag("")!=LanguageTag("en-US") returns true! Use
408 !equals(,..) instead if system locales shall be resolved.
410 bool operator!=( const LanguageTag & rLanguageTag ) const;
412 /** Test this LanguageTag less than that LanguageTag.
414 For sorted containers. Does NOT resolve system.
416 bool operator<( const LanguageTag & rLanguageTag ) const;
418 /** Convert MS-LangID to Locale.
420 @param bResolveSystem
421 If TRUE, resolve an empty language tag denoting the system
422 locale to the real locale used.
423 If FALSE, return an empty Locale for such a tag.
425 static css::lang::Locale convertToLocale( LanguageType nLangID, bool bResolveSystem = true );
427 /** Convert Locale to MS-LangID.
429 @param bResolveSystem
430 If TRUE, resolve an empty language tag denoting the system
431 locale to the real locale used.
432 If FALSE, return LANGUAGE_SYSTEM for such a tag.
434 static LanguageType convertToLanguageType( const css::lang::Locale& rLocale, bool bResolveSystem = true );
436 /** Convert MS-LangID to BCP 47 string.
438 Resolve an empty language tag denoting the system
439 locale to the real locale used.
441 static OUString convertToBcp47( LanguageType nLangID );
443 /** Convert Locale to BCP 47 string.
445 @param bResolveSystem
446 If TRUE, resolve an empty language tag denoting the system
447 locale to the real locale used.
448 If FALSE, return an empty OUString for such a tag.
450 static OUString convertToBcp47( const css::lang::Locale& rLocale, bool bResolveSystem = true );
452 /** Convert BCP 47 string to Locale, convenience method.
454 NOTE: exists only for consistency with the other convertTo...()
455 methods, internally uses a temporary LanguageTag instance for
456 conversion so does not save anything compared to
457 LanguageTag(rBcp47).getLocale(bResolveSystem).
459 @param bResolveSystem
460 If TRUE, resolve an empty language tag denoting the system
461 locale to the real locale used.
462 If FALSE, return an empty Locale for such a tag.
464 static css::lang::Locale convertToLocale( const OUString& rBcp47, bool bResolveSystem = true );
466 /** Convert BCP 47 string to MS-LangID, convenience method.
468 NOTE: exists only for consistency with the other convertTo...()
469 methods, internally uses a temporary LanguageTag instance for
470 conversion so does not save anything compared to
471 LanguageTag(rBcp47).getLanguageType(bResolveSystem).
473 Resolve an empty language tag denoting the system
474 locale to the real locale used.
476 static LanguageType convertToLanguageType( const OUString& rBcp47 );
478 /** Convert BCP 47 string to MS-LangID with fallback, convenience method.
480 NOTE: exists only for consistency with the other convertTo...()
481 methods, internally uses a temporary LanguageTag instance for
482 conversion so does not save anything compared to
483 LanguageTag(rBcp47).makeFallback().getLanguageType(bResolveSystem).
485 @see makeFallback()
487 Always resolves an empty tag to the system locale.
489 static LanguageType convertToLanguageTypeWithFallback( const OUString& rBcp47 );
491 /** Convert BCP 47 string to Locale with fallback, convenience method.
493 NOTE: exists only for consistency with the other convertTo...()
494 methods, internally uses a temporary LanguageTag instance for
495 conversion so does not save anything compared to
496 LanguageTag(rBcp47).makeFallback().getLocale(bResolveSystem).
498 @see makeFallback()
500 Always resolves an empty tag to the system locale.
502 static css::lang::Locale convertToLocaleWithFallback( const OUString& rBcp47 );
504 /** If rString represents a valid BCP 47 language tag.
506 Never resolves an empty tag to the system locale, in fact an empty
507 string is invalid here. Does not create an instance to be registered
508 with a conversion to Locale or LanguageType.
510 @param o_pCanonicalized
511 If given and rString is a valid BCP 47 language tag, the
512 canonicalized form is assigned, which may differ from the
513 original string even if that was a valid tag. If rString is not
514 a valid tag, nothing is assigned.
516 @param bDisallowPrivate
517 If TRUE, valid tags according to BCP 47 but reserved for
518 private use, like 'x-...', are not allowed and FALSE is
519 returned in this case.
521 static bool isValidBcp47( const OUString& rString, OUString* o_pCanonicalized,
522 bool bDisallowPrivate = false );
524 /** If nLang is a generated on-the-fly LangID */
525 static bool isOnTheFlyID( LanguageType nLang );
526 static ScriptType getOnTheFlyScriptType( LanguageType nLang );
528 /** @ATTENTION: _ONLY_ to be called by the application's configuration! */
529 static void setConfiguredSystemLanguage( LanguageType nLang );
531 /** @ATTENTION: _ONLY_ to be called by fuzzing setup */
532 static void disable_lt_tag_parse();
534 typedef std::shared_ptr< LanguageTagImpl > ImplPtr;
536 private:
538 mutable css::lang::Locale maLocale;
539 mutable OUString maBcp47;
540 mutable LanguageType mnLangID;
541 mutable ImplPtr mpImpl;
542 bool mbSystemLocale : 1;
543 mutable bool mbInitializedBcp47 : 1;
544 mutable bool mbInitializedLocale : 1;
545 mutable bool mbInitializedLangID : 1;
546 bool mbIsFallback : 1;
548 LanguageTagImpl* getImpl();
549 LanguageTagImpl const* getImpl() const;
550 ImplPtr registerImpl() const;
551 void syncFromImpl();
552 void syncVarsFromRawImpl() const;
553 void syncVarsFromImpl() const;
555 void convertLocaleToLang();
556 void convertBcp47ToLocale();
557 void convertBcp47ToLang();
558 void convertLangToLocale();
560 void convertFromRtlLocale();
562 /** Canonicalize if not yet done and synchronize initialized conversions.
564 @return whether BCP 47 language tag string was changed.
566 bool synCanonicalize();
568 void resetVars();
570 static bool isIsoLanguage( const OUString& rLanguage );
571 static bool isIsoScript( const OUString& rScript );
572 static bool isIsoCountry( const OUString& rRegion );
576 #endif // INCLUDED_I18NLANGTAG_LANGUAGETAG_HXX
578 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */