1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
3 * This file is part of the LibreOffice project.
5 * This Source Code Form is subject to the terms of the Mozilla Public
6 * License, v. 2.0. If a copy of the MPL was not distributed with this
7 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
10 #ifndef INCLUDED_I18NLANGTAG_LANGUAGETAG_HXX
11 #define INCLUDED_I18NLANGTAG_LANGUAGETAG_HXX
13 #include <sal/config.h>
14 #include <rtl/ustring.hxx>
15 #include <com/sun/star/lang/Locale.hpp>
16 #include <i18nlangtag/i18nlangtagdllapi.h>
17 #include <i18nlangtag/lang.h>
21 typedef struct _rtl_Locale rtl_Locale
; // as in rtl/locale.h
24 /** Wrapper for liblangtag BCP 47 language tags, MS-LangIDs, locales and
25 conversions in between.
27 Note that member variables are mutable and may change their values even in
28 const methods. Getter methods return either the original value or matching
31 class I18NLANGTAG_DLLPUBLIC LanguageTag
35 /** Init LanguageTag with existing BCP 47 language tag string.
38 If TRUE, canonicalize tag and reparse, the resulting tag string may
40 IF FALSE, the tag is simply stored and can be retrieved with
43 Note that conversions to ISO codes, locales or LanguageType or
44 obtaining language or script will canonicalize the tag string anyway,
45 so specifying bCanonicalize=false is not a guarantee that the tag will
46 stay identical to what was passed.
48 explicit LanguageTag( const OUString
& rBcp47LanguageTag
, bool bCanonicalize
= false );
50 /** Init LanguageTag with Locale. */
51 explicit LanguageTag( const com::sun::star::lang::Locale
& rLocale
);
53 /** Init LanguageTag with LanguageType MS-LangID. */
54 explicit LanguageTag( LanguageType nLanguage
);
56 /** Init LanguageTag with language and country strings.
58 This is a convenience ctor for places that so far use only language and
59 country to replace the MsLangId::convert...IsoNames...() calls. Avoid
62 explicit LanguageTag( const OUString
& rLanguage
, const OUString
& rCountry
);
64 /** Init LanguageTag with rtl_Locale.
66 This is a convenience ctor.
68 explicit LanguageTag( const rtl_Locale
& rLocale
);
70 LanguageTag( const LanguageTag
& rLanguageTag
);
72 LanguageTag
& operator=( const LanguageTag
& rLanguageTag
);
74 /** Obtain BCP 47 language tag.
77 If TRUE, resolve an empty language tag denoting the system
78 locale to the real locale used.
79 If FALSE, return an empty OUString for such a tag.
81 const OUString
& getBcp47( bool bResolveSystem
= true ) const;
83 /** Obtain language tag as Locale.
85 As a convention, language tags that can not be expressed as "pure"
86 com::sun::star::lang::Locale content using Language and Country fields
87 store "qlt" (ISO 639 reserved for local use) in the Language field and
88 the entire BCP 47 language tag in the Variant field. The Country field
89 contains the corresponding ISO 3166 country code _if_ there is one, or
93 If TRUE, resolve an empty language tag denoting the system
94 locale to the real locale used.
95 If FALSE, return an empty Locale for such a tag.
97 const com::sun::star::lang::Locale
& getLocale( bool bResolveSystem
= true ) const;
99 /** Obtain mapping to MS-LangID.
101 @param bResolveSystem
102 If TRUE, resolve an empty language tag denoting the system
103 locale to the real locale used.
104 If FALSE, return LANGUAGE_SYSTEM for such a tag.
106 LanguageType
getLanguageType( bool bResolveSystem
= true ) const;
108 /** Obtain ISO strings for language and country.
110 This is a convenience method for places that so far use only language and
111 country to replace the MsLangId::convert...IsoNames...() calls. Avoid
114 ATTENTION! May return empty strings if the language tag is not
115 expressable in valid ISO codes!
119 Always resolves an empty tag to the system locale.
121 void getIsoLanguageCountry( OUString
& rLanguage
, OUString
& rCountry
) const;
123 /** Get ISO 639 language code, or BCP 47 language.
125 Always resolves an empty tag to the system locale.
127 OUString
getLanguage() const;
129 /** Get ISO 15924 script code, if not the default script according to
130 BCP 47. For default script an empty string is returned.
134 Always resolves an empty tag to the system locale.
136 OUString
getScript() const;
138 /** Get combined language and script code, separated by '-' if
139 non-default script, if default script only language.
143 Always resolves an empty tag to the system locale.
145 OUString
getLanguageAndScript() const;
147 /** Get ISO 3166 country alpha code. Empty if the BCP 47 tags denote a
148 region not expressable as 2 character country code.
150 Always resolves an empty tag to the system locale.
152 OUString
getCountry() const;
154 /** Get BCP 47 region tag, which may be an ISO 3166 country alpha code or
155 any other BCP 47 region tag.
157 Always resolves an empty tag to the system locale.
159 OUString
getRegion() const;
161 /** Get a GLIBC locale string.
163 Always resolves an empty tag to the system locale.
166 An encoding to be appended to language_country, for example
167 ".UTF-8" including the dot.
169 @return The resulting GLIBC locale string if it could be constructed,
170 if not an empty string is returned.
172 OUString
getGlibcLocaleString( const OUString
& rEncoding
) const;
174 /** If language tag has a non-default script specified.
176 bool hasScript() const;
178 /** If language tag is a locale that can be expressed using only ISO 639
179 language codes and ISO 3166 country codes, thus is convertible to a
180 conforming Locale struct without using extension mechanisms.
182 Note that an empty language tag or empty Locale::Language field or
183 LanguageType LANGUAGE_SYSTEM could be treated as a valid ISO locale in
184 some context, but here is not. If you want that ask for
185 aTag.isSystemLocale() || aTag.isIsoLocale()
187 Always resolves an empty tag to the system locale.
189 bool isIsoLocale() const;
191 /** If language tag is a locale that can be expressed using only ISO 639
192 language codes and ISO 15924 script codes and ISO 3166 country codes,
193 thus can be stored in an ODF document using only fo:language, fo:script
194 and fo:country attributes. If this is FALSE, the locale must be stored
195 as a <*:rfc-language-tag> element.
197 Always resolves an empty tag to the system locale.
199 bool isIsoODF() const;
201 /** If this is a valid BCP 47 language tag.
203 Always resolves an empty tag to the system locale.
205 bool isValidBcp47() const;
207 /** If this tag was contructed as an empty tag denoting the system locale.
209 bool isSystemLocale() const;
212 /** Reset with existing BCP 47 language tag string. See ctor. */
213 void reset( const OUString
& rBcp47LanguageTag
, bool bCanonicalize
= false );
215 /** Reset with Locale. */
216 void reset( const com::sun::star::lang::Locale
& rLocale
);
218 /** Reset with LanguageType MS-LangID. */
219 void reset( LanguageType nLanguage
);
221 /** Reset with rtl_Locale. */
222 void reset( const rtl_Locale
& rLocale
);
225 /** Fall back to a known locale.
227 If the current tag does not represent a known (by us) locale, fall back
228 to the most likely locale possible known.
229 If the current tag is known, no change occurs.
231 LanguageTag
& makeFallback();
233 /** Return a vector of fall-back strings.
236 full BCP 47 tag, same as getBcp47()
242 Only strings that differ from a higher order are included, for example
243 if there is no script the elements will be bcp47, lll-CC, lll; if the
244 bcp47 string is identical to lll-CC then only lll-CC, lll.
246 Note that lll is only ISO 639-1/2 alpha code and CC is only ISO 3166
247 alpha code. If the region can not be expressed as ISO 3166 then no -CC
250 ::std::vector
< OUString
> getFallbackStrings() const;
253 /** @short search for an equal or at least for a similar locale in a list
256 @descr First search for a locale that is equal to the reference
257 locale. (means: same BCP47 string)
259 If the reference locale could not be located, check for
260 "similar" locales, in the same order as obtained by
261 getFallbackStrings().
263 If no similar locale could be located, we search for a locale
264 "en-US" inside the given locale list.
266 If "en-US" could not be located, we search for a locale "en"
267 inside the given list.
269 If no "same" nor any "similar" locale could be found, we try
270 "x-default" and "x-no-translate" explicitly. Sometimes
271 variables don't use real localization. For example, in case the
272 localized value is a fix product name.
274 If no locale matched until then, we use any other locale that
275 exists inside the set of given ones, namely the first
279 the vector of possible locales as BCP47 strings.
282 the reference locale, BCP47 string.
284 @return An iterator that points to the found element inside the given
285 locale list. If no matching locale could be found it points to
288 static ::std::vector
< OUString
>::const_iterator
getFallback( const ::std::vector
< OUString
> & rList
,
289 const OUString
& rReference
);
292 /** Test equality of two LanguageTag, possibly resolving system locale.
294 @param bResolveSystem
295 If TRUE, resolve empty language tags denoting the system
296 locale to the real locale used before comparing.
297 If FALSE, the behavior is identical to operator==(), system
298 locales are not resolved first.
300 bool equals( const LanguageTag
& rLanguageTag
, bool bResolveSystem
= false ) const;
302 /** Test equality of two LanguageTag.
304 Does NOT resolve system, i.e. if the system locale is en-US
305 LanguageTag("")==LanguageTag("en-US") returns false! Use
306 equals(...,true) instead if system locales shall be resolved.
308 bool operator==( const LanguageTag
& rLanguageTag
) const;
310 /** Test inequality of two LanguageTag.
312 Does NOT resolve system, i.e. if the system locale is en-US
313 LanguageTag("")!=LanguageTag("en-US") returns true! Use
314 !equals(,...true) instead if system locales shall be resolved.
316 bool operator!=( const LanguageTag
& rLanguageTag
) const;
327 mutable com::sun::star::lang::Locale maLocale
;
328 mutable OUString maBcp47
;
329 mutable OUString maCachedLanguage
; ///< cache getLanguage()
330 mutable OUString maCachedScript
; ///< cache getScript()
331 mutable OUString maCachedCountry
; ///< cache getCountry()
332 mutable void* mpImplLangtag
; ///< actually lt_tag_t pointer, encapsulated
333 mutable LanguageType mnLangID
;
334 mutable Decision meIsValid
;
335 mutable Decision meIsIsoLocale
;
336 mutable Decision meIsIsoODF
;
337 mutable Decision meIsLiblangtagNeeded
; ///< whether processing with liblangtag needed
338 bool mbSystemLocale
: 1;
339 mutable bool mbInitializedBcp47
: 1;
340 mutable bool mbInitializedLocale
: 1;
341 mutable bool mbInitializedLangID
: 1;
342 mutable bool mbCachedLanguage
: 1;
343 mutable bool mbCachedScript
: 1;
344 mutable bool mbCachedCountry
: 1;
345 bool mbIsFallback
: 1;
347 void convertLocaleToBcp47();
348 void convertLocaleToLang();
349 void convertBcp47ToLocale();
350 void convertBcp47ToLang();
351 void convertLangToLocale();
352 void convertLangToBcp47();
354 void convertFromRtlLocale();
358 OUString
getLanguageFromLangtag();
359 OUString
getScriptFromLangtag();
360 OUString
getRegionFromLangtag();
364 /** Obtain Language, Script and Country via simpleExtract() and assign them
365 to the cached variables if successful.
367 @return return of simpleExtract()
369 bool cacheSimpleLSC();
371 static bool isIsoLanguage( const OUString
& rLanguage
);
372 static bool isIsoScript( const OUString
& rScript
);
373 static bool isIsoCountry( const OUString
& rRegion
);
383 /** Of a simple language tag of the form lll[-Ssss][-CC] (i.e. one that
384 would fulfill the isIsoODF() condition) extract the portions.
386 Does not check case or content!
388 @return EXTRACTED_LSC if simple tag was detected, EXTRACTED_X if x-...
389 privateuse tag was detected, EXTRACTED_X_JOKER if "*" joker was
390 detected, else EXTRACTED_NONE.
392 static Extraction
simpleExtract( const OUString
& rBcp47
,
395 OUString
& rCountry
);
399 #endif // INCLUDED_I18NLANGTAG_LANGUAGETAG_HXX
401 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */