1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
3 * This file is part of the LibreOffice project.
5 * This Source Code Form is subject to the terms of the Mozilla Public
6 * License, v. 2.0. If a copy of the MPL was not distributed with this
7 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
10 #ifndef INCLUDED_I18NLANGTAG_LANGUAGETAG_HXX
11 #define INCLUDED_I18NLANGTAG_LANGUAGETAG_HXX
13 #include <sal/config.h>
14 #include <rtl/locale.h>
15 #include <rtl/ustring.hxx>
16 #include <com/sun/star/lang/Locale.hpp>
17 #include <i18nlangtag/i18nlangtagdllapi.h>
18 #include <i18nlangtag/lang.h>
21 #include <string_view>
24 /** The ISO 639-2 code reserved for local use used to indicate that a
25 css::Locale contains a BCP 47 string in its Variant field. The
26 Locale's Language field then will contain this language code.
28 @see LanguageTag::getLocale()
30 Avoid use, only needed internally or if conversion from Locale to
31 LanguageTag is not wanted, i.e. during ODF import. To check whether a
32 LanguageTag contains a plain language/country combination or a more
33 detailed BCP 47 language tag use LanguageTag::isIsoLocale() instead.
35 #define I18NLANGTAG_QLT "qlt"
38 class LanguageTagImpl
;
41 /** Wrapper for liblangtag BCP 47 language tags, MS-LangIDs, locales and
42 conversions in between.
44 Note that member variables are mutable and may change their values even in
45 const methods. Getter methods return either the original value or matching
48 For standalone conversions if no LanguageTag instance is at hand, static
49 convertTo...() methods exist.
51 class SAL_WARN_UNUSED I18NLANGTAG_DLLPUBLIC LanguageTag
53 friend class LanguageTagImpl
;
57 /** ScriptType for a language.
59 Used only in onTheFly languages as a way of marking key script behaviours
60 for the script of the language without having to store and analyse the
61 script each time. Used primarily from msLangId.
63 These need to correspond to the ExtraLanguages.ScriptType template
64 property in officecfg/registry/schema/org/openoffice/VCL.xcs
69 WESTERN
= 1, // Copies css::i18n::ScriptType for strong types
72 RTL
= 4 // implies CTL
75 /** Init LanguageTag with existing BCP 47 language tag string.
78 If TRUE, canonicalize tag and reparse, the resulting tag string may
80 IF FALSE, the tag is simply stored and can be retrieved with
83 Note that conversions to ISO codes, locales or LanguageType or
84 obtaining language or script will canonicalize the tag string anyway,
85 so specifying bCanonicalize=false is not a guarantee that the tag will
86 stay identical to what was passed.
88 explicit LanguageTag( const OUString
& rBcp47LanguageTag
, bool bCanonicalize
= false );
90 /** Init LanguageTag with Locale. */
91 explicit LanguageTag( const css::lang::Locale
& rLocale
);
93 /** Init LanguageTag with LanguageType MS-LangID. */
94 explicit LanguageTag( LanguageType nLanguage
);
96 /** Init LanguageTag with either BCP 47 language tag (precedence if not
97 empty), or a combination of language, script and country.
99 This is a convenience ctor to be used in ODF import where these are
102 explicit LanguageTag( const OUString
& rBcp47
, const OUString
& rLanguage
,
103 std::u16string_view rScript
, const OUString
& rCountry
);
105 /** Init LanguageTag with rtl_Locale.
107 This is a convenience ctor.
109 explicit LanguageTag( const rtl_Locale
& rLocale
);
113 LanguageTag(LanguageTag
const &) = default;
114 LanguageTag(LanguageTag
&&) = default;
115 LanguageTag
& operator =(LanguageTag
const &) = default;
116 LanguageTag
& operator =(LanguageTag
&&) = default;
118 /** Obtain BCP 47 language tag.
120 @param bResolveSystem
121 If TRUE, resolve an empty language tag denoting the system
122 locale to the real locale used.
123 If FALSE, return an empty OUString for such a tag.
125 const OUString
& getBcp47( bool bResolveSystem
= true ) const;
127 /** Obtain BCP 47 language tag, but with MS malformed exceptions.
129 To be used *only* in OOXML filter context.
130 For example, es-ES-u-co-trad is stored as es-ES_tradnl which is not a
131 valid BCP 47 language tag.
133 OUString
getBcp47MS() const;
135 /** Obtain language tag as Locale.
137 As a convention, language tags that can not be expressed as "pure"
138 css::lang::Locale content using Language and Country fields
139 store "qlt" (ISO 639 reserved for local use) in the Language field and
140 the entire BCP 47 language tag in the Variant field. The Country field
141 contains the corresponding ISO 3166 country code _if_ there is one, or
144 @param bResolveSystem
145 If TRUE, resolve an empty language tag denoting the system
146 locale to the real locale used.
147 If FALSE, return an empty Locale for such a tag.
149 const css::lang::Locale
& getLocale( bool bResolveSystem
= true ) const;
151 /** Obtain mapping to MS-LangID.
153 @param bResolveSystem
154 If TRUE, resolve an empty language tag denoting the system
155 locale to the real locale used.
156 If FALSE, return LANGUAGE_SYSTEM for such a tag.
158 LanguageType
getLanguageType( bool bResolveSystem
= true ) const;
160 /** Obtain ISO strings for language, script and country.
162 This is a convenience method for ODF export places only. Avoid use in
165 ATTENTION! May return empty strings if the language tag is not
166 expressible in valid ISO codes!
170 Always resolves an empty tag to the system locale.
172 void getIsoLanguageScriptCountry( OUString
& rLanguage
,
173 OUString
& rScript
, OUString
& rCountry
) const;
175 /** Get ISO 639 language code, or BCP 47 language.
177 Always resolves an empty tag to the system locale.
179 OUString
getLanguage() const;
181 /** Get ISO 15924 script code, if not the default script according to
182 BCP 47. For default script an empty string is returned.
186 Always resolves an empty tag to the system locale.
188 OUString
getScript() const;
190 /** Get combined language and script code, separated by '-' if
191 non-default script, if default script only language.
195 Always resolves an empty tag to the system locale.
197 OUString
getLanguageAndScript() const;
199 /** Get ISO 3166 country alpha code. Empty if the BCP 47 tags denote a
200 region not expressible as 2 character country code.
202 Always resolves an empty tag to the system locale.
204 OUString
getCountry() const;
206 /** Get BCP 47 variant subtags, of the IANA Language Subtag Registry.
208 If there are multiple variant subtags they are separated by '-'.
210 This is NOT related to Locale.Variant!
212 Always resolves an empty tag to the system locale.
214 OUString
getVariants() const;
216 /** Get a GLIBC locale string.
218 Always resolves an empty tag to the system locale.
221 An encoding to be appended to language_country, for example
222 ".UTF-8" including the dot.
224 @return The resulting GLIBC locale string if it could be constructed,
225 if not an empty string is returned.
227 OUString
getGlibcLocaleString( std::u16string_view rEncoding
) const;
229 /** If language tag has a non-default script specified.
231 bool hasScript() const;
233 /** If language tag is a locale that can be expressed using only ISO 639
234 language codes and ISO 3166 country codes, thus is convertible to a
235 conforming Locale struct without using extension mechanisms.
237 Note that an empty language tag or empty Locale::Language field or
238 LanguageType LANGUAGE_SYSTEM could be treated as a valid ISO locale in
239 some context, but here is not. If you want that ask for
240 aTag.isSystemLocale() || aTag.isIsoLocale()
242 Always resolves an empty tag to the system locale.
244 bool isIsoLocale() const;
246 /** If language tag is a locale that can be expressed using only ISO 639
247 language codes and ISO 15924 script codes and ISO 3166 country codes,
248 thus can be stored in an ODF document using only fo:language, fo:script
249 and fo:country attributes. If this is FALSE, the locale must be stored
250 as a <*:rfc-language-tag> element.
252 Always resolves an empty tag to the system locale.
254 bool isIsoODF() const;
256 /** If this is a valid BCP 47 language tag.
258 Always resolves an empty tag to the system locale.
260 @seealso static bool isValidBcp47(const OUString&)
262 bool isValidBcp47() const;
264 /** If this tag was constructed as an empty tag denoting the system locale.
266 bool isSystemLocale() const { return mbSystemLocale
;}
268 /** Returns the script type for this language, UNKNOWN if not set */
269 ScriptType
getScriptType() const;
271 /** Sets the script type for this language */
272 void setScriptType(ScriptType st
);
274 /** Reset with existing BCP 47 language tag string. See ctor. */
275 LanguageTag
& reset( const OUString
& rBcp47LanguageTag
);
277 /** Reset with Locale. */
278 LanguageTag
& reset( const css::lang::Locale
& rLocale
);
280 /** Reset with LanguageType MS-LangID. */
281 LanguageTag
& reset( LanguageType nLanguage
);
284 /** Fall back to a known locale.
286 If the current tag does not represent a known (by us) locale, fall back
287 to the most likely locale possible known.
288 If the current tag is known, no change occurs.
290 LanguageTag
& makeFallback();
292 /** Return a vector of fall-back strings.
295 full BCP 47 tag, same as getBcp47()
301 If the tag includes variants the order is:
302 full BCP 47 tag, same as getBcp47()
312 Only strings that differ from a higher order are included, for example
313 if there is no script the elements will be bcp47, lll-CC, lll; if the
314 bcp47 string is identical to lll-CC then only lll-CC, lll.
316 Note that lll is only ISO 639-1/2 alpha code and CC is only ISO 3166
317 alpha code. If the region can not be expressed as ISO 3166 then no -CC
320 @param bIncludeFullBcp47
321 If TRUE, the full BCP 47 tag is included as first element.
322 If FALSE, the full tag is not included; used if the caller
323 obtains the fallbacks only if the full tag did not lead to a
324 match, so subsequent tries need not to include it again.
326 ::std::vector
< OUString
> getFallbackStrings( bool bIncludeFullBcp47
) const;
329 /** @short Search for an equal or at least for a similar locale in a list
332 @descr First search for a locale that is equal to the reference
333 locale. (means: same BCP47 string)
335 If the reference locale could not be located, check for
336 "similar" locales, in the same order as obtained by
337 getFallbackStrings().
339 If no similar locale could be located, we search for a locale
340 "en-US" inside the given locale list.
342 If "en-US" could not be located, we search for a locale "en"
343 inside the given list.
345 If no "same" nor any "similar" locale could be found, we try
346 "x-default" and "x-no-translate" explicitly. Sometimes
347 variables don't use real localization. For example, in case the
348 localized value is a fix product name.
350 If no locale matched until then, we use any other locale that
351 exists inside the set of given ones, namely the first
355 the vector of possible locales as BCP47 strings.
358 the reference locale, BCP47 string.
360 @return An iterator that points to the found element inside the given
361 locale list. If no matching locale could be found it points to
362 the beginning of the list.
364 static ::std::vector
< OUString
>::const_iterator
getFallback( const ::std::vector
< OUString
> & rList
,
365 const OUString
& rReference
);
368 /** @short Search for an equal or for a similar locale in a list
369 of possible ones where at least the language matches.
371 @descr First search for a locale that is equal to the reference
374 If the reference locale could not be located, check for
375 "similar" locales, in the same order as obtained by
376 getFallbackStrings().
378 If no locale matches, rList.end() is returned.
381 the vector of possible locales.
384 the reference locale.
386 @return An iterator that points to the found element inside the given
387 locale list. If no matching locale could be found it points to
390 static ::std::vector
< css::lang::Locale
>::const_iterator
getMatchingFallback(
391 const ::std::vector
< css::lang::Locale
> & rList
,
392 const css::lang::Locale
& rReference
);
395 /** Test equality of two LanguageTag, possibly resolving system locale.
397 Resolve empty language tags denoting the system
398 locale to the real locale used before comparing.
400 bool equals( const LanguageTag
& rLanguageTag
) const;
402 /** Test equality of two LanguageTag.
404 Does NOT resolve system, i.e. if the system locale is en-US
405 LanguageTag("")==LanguageTag("en-US") returns false! Use
406 equals(...) instead if system locales shall be resolved.
408 bool operator==( const LanguageTag
& rLanguageTag
) const;
410 /** Test inequality of two LanguageTag.
412 Does NOT resolve system, i.e. if the system locale is en-US
413 LanguageTag("")!=LanguageTag("en-US") returns true! Use
414 !equals(,..) instead if system locales shall be resolved.
416 bool operator!=( const LanguageTag
& rLanguageTag
) const;
418 /** Test this LanguageTag less than that LanguageTag.
420 For sorted containers. Does NOT resolve system.
422 bool operator<( const LanguageTag
& rLanguageTag
) const;
424 /** Convert MS-LangID to Locale.
426 @param bResolveSystem
427 If TRUE, resolve an empty language tag denoting the system
428 locale to the real locale used.
429 If FALSE, return an empty Locale for such a tag.
431 static css::lang::Locale
convertToLocale( LanguageType nLangID
, bool bResolveSystem
= true );
433 /** Convert Locale to MS-LangID.
435 @param bResolveSystem
436 If TRUE, resolve an empty language tag denoting the system
437 locale to the real locale used.
438 If FALSE, return LANGUAGE_SYSTEM for such a tag.
440 static LanguageType
convertToLanguageType( const css::lang::Locale
& rLocale
, bool bResolveSystem
= true );
442 /** Convert MS-LangID to BCP 47 string.
444 Resolve an empty language tag denoting the system
445 locale to the real locale used.
447 static OUString
convertToBcp47( LanguageType nLangID
);
449 /** Convert Locale to BCP 47 string.
451 @param bResolveSystem
452 If TRUE, resolve an empty language tag denoting the system
453 locale to the real locale used.
454 If FALSE, return an empty OUString for such a tag.
456 static OUString
convertToBcp47( const css::lang::Locale
& rLocale
, bool bResolveSystem
= true );
458 /** Convert BCP 47 string to Locale, convenience method.
460 NOTE: exists only for consistency with the other convertTo...()
461 methods, internally uses a temporary LanguageTag instance for
462 conversion so does not save anything compared to
463 LanguageTag(rBcp47).getLocale(bResolveSystem).
465 @param bResolveSystem
466 If TRUE, resolve an empty language tag denoting the system
467 locale to the real locale used.
468 If FALSE, return an empty Locale for such a tag.
470 static css::lang::Locale
convertToLocale( const OUString
& rBcp47
, bool bResolveSystem
= true );
472 /** Convert BCP 47 string to MS-LangID, convenience method.
474 NOTE: exists only for consistency with the other convertTo...()
475 methods, internally uses a temporary LanguageTag instance for
476 conversion so does not save anything compared to
477 LanguageTag(rBcp47).getLanguageType(bResolveSystem).
479 Resolve an empty language tag denoting the system
480 locale to the real locale used.
482 static LanguageType
convertToLanguageType( const OUString
& rBcp47
);
484 /** Convert BCP 47 string to MS-LangID with fallback, convenience method.
486 NOTE: exists only for consistency with the other convertTo...()
487 methods, internally uses a temporary LanguageTag instance for
488 conversion so does not save anything compared to
489 LanguageTag(rBcp47).makeFallback().getLanguageType(bResolveSystem).
493 Always resolves an empty tag to the system locale.
495 static LanguageType
convertToLanguageTypeWithFallback( const OUString
& rBcp47
);
497 /** Convert BCP 47 string to Locale with fallback, convenience method.
499 NOTE: exists only for consistency with the other convertTo...()
500 methods, internally uses a temporary LanguageTag instance for
501 conversion so does not save anything compared to
502 LanguageTag(rBcp47).makeFallback().getLocale(bResolveSystem).
506 Always resolves an empty tag to the system locale.
508 static css::lang::Locale
convertToLocaleWithFallback( const OUString
& rBcp47
);
510 /** Convert Locale to MS-LangID with fallback.
512 @param bResolveSystem
513 If TRUE, resolve an empty language tag denoting the system
514 locale to the real locale used and fallback.
515 If FALSE, return LANGUAGE_SYSTEM for such a tag and do not fallback.
517 static LanguageType
convertToLanguageTypeWithFallback( const css::lang::Locale
& rLocale
,
518 bool bResolveSystem
= true );
520 /** If rString represents a valid BCP 47 language tag.
522 Never resolves an empty tag to the system locale, in fact an empty
523 string is invalid here. Does not create an instance to be registered
524 with a conversion to Locale or LanguageType.
526 @param o_pCanonicalized
527 If given and rString is a valid BCP 47 language tag, the
528 canonicalized form is assigned, which may differ from the
529 original string even if that was a valid tag. If rString is not
530 a valid tag, nothing is assigned.
532 @param bDisallowPrivate
533 If TRUE, valid tags according to BCP 47 but reserved for
534 private use, like 'x-...', are not allowed and FALSE is
535 returned in this case.
537 static bool isValidBcp47( const OUString
& rString
, OUString
* o_pCanonicalized
,
538 bool bDisallowPrivate
= false );
540 /** If nLang is a generated on-the-fly LangID */
541 static bool isOnTheFlyID( LanguageType nLang
);
542 static ScriptType
getOnTheFlyScriptType( LanguageType nLang
);
544 /** @ATTENTION: _ONLY_ to be called by the application's configuration! */
545 static void setConfiguredSystemLanguage( LanguageType nLang
);
547 /** @ATTENTION: _ONLY_ to be called by fuzzing setup */
548 static void disable_lt_tag_parse();
550 typedef std::shared_ptr
< LanguageTagImpl
> ImplPtr
;
554 mutable css::lang::Locale maLocale
;
555 mutable OUString maBcp47
;
556 mutable LanguageType mnLangID
;
557 mutable ImplPtr mpImpl
;
558 bool mbSystemLocale
: 1;
559 mutable bool mbInitializedBcp47
: 1;
560 mutable bool mbInitializedLocale
: 1;
561 mutable bool mbInitializedLangID
: 1;
562 bool mbIsFallback
: 1;
564 LanguageTagImpl
* getImpl();
565 LanguageTagImpl
const* getImpl() const;
566 ImplPtr
registerImpl() const;
568 void syncVarsFromRawImpl() const;
569 void syncVarsFromImpl() const;
571 void convertLocaleToLang();
572 void convertBcp47ToLocale();
573 void convertBcp47ToLang();
574 void convertLangToLocale();
576 void convertFromRtlLocale();
578 /** Canonicalize if not yet done and synchronize initialized conversions.
580 @return whether BCP 47 language tag string was changed.
582 bool synCanonicalize();
586 static bool isIsoLanguage( const OUString
& rLanguage
);
587 static bool isIsoScript( const OUString
& rScript
);
588 static bool isIsoCountry( const OUString
& rRegion
);
592 #endif // INCLUDED_I18NLANGTAG_LANGUAGETAG_HXX
594 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */