1 /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
2 /* This Source Code Form is subject to the terms of the Mozilla Public
3 * License, v. 2.0. If a copy of the MPL was not distributed with this
4 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
6 #include "nsLanguageAtomService.h"
7 #include "nsUConvPropertySearch.h"
8 #include "nsUnicharUtils.h"
10 #include "nsGkAtoms.h"
11 #include "mozilla/ArrayUtils.h"
12 #include "mozilla/ClearOnShutdown.h"
13 #include "mozilla/Encoding.h"
14 #include "mozilla/intl/Locale.h"
15 #include "mozilla/intl/OSPreferences.h"
16 #include "mozilla/ServoBindings.h"
17 #include "mozilla/ServoUtils.h"
18 #include "mozilla/StaticPtr.h"
20 using namespace mozilla
;
21 using mozilla::intl::OSPreferences
;
23 static constexpr nsUConvProp encodingsGroups
[] = {
24 #include "encodingsgroups.properties.h"
27 // List of mozilla internal x-* tags that map to themselves (see bug 256257)
28 static constexpr nsStaticAtom
* kLangGroups
[] = {
29 // This list must be sorted!
30 nsGkAtoms::x_armn
, nsGkAtoms::x_cyrillic
, nsGkAtoms::x_devanagari
,
31 nsGkAtoms::x_geor
, nsGkAtoms::x_math
, nsGkAtoms::x_tamil
,
32 nsGkAtoms::Unicode
, nsGkAtoms::x_western
33 // These self-mappings are not necessary unless somebody use them to specify
34 // lang in (X)HTML/XML documents, which they shouldn't. (see bug 256257)
44 // Map ISO 15924 script codes from BCP47 lang tag to mozilla's langGroups.
45 static constexpr struct {
48 } kScriptLangGroup
[] = {
49 // This list must be sorted by script code!
50 {"Arab", nsGkAtoms::ar
},
51 {"Armn", nsGkAtoms::x_armn
},
52 {"Beng", nsGkAtoms::x_beng
},
53 {"Cans", nsGkAtoms::x_cans
},
54 {"Cyrl", nsGkAtoms::x_cyrillic
},
55 {"Deva", nsGkAtoms::x_devanagari
},
56 {"Ethi", nsGkAtoms::x_ethi
},
57 {"Geok", nsGkAtoms::x_geor
},
58 {"Geor", nsGkAtoms::x_geor
},
59 {"Grek", nsGkAtoms::el
},
60 {"Gujr", nsGkAtoms::x_gujr
},
61 {"Guru", nsGkAtoms::x_guru
},
62 {"Hang", nsGkAtoms::ko
},
63 // Hani is not mapped to a specific langGroup, we prefer to look at the
64 // primary language subtag in this case
65 {"Hans", nsGkAtoms::Chinese
},
66 // Hant is special-cased in code
69 {"Hebr", nsGkAtoms::he
},
70 {"Hira", nsGkAtoms::Japanese
},
71 {"Jpan", nsGkAtoms::Japanese
},
72 {"Kana", nsGkAtoms::Japanese
},
73 {"Khmr", nsGkAtoms::x_khmr
},
74 {"Knda", nsGkAtoms::x_knda
},
75 {"Kore", nsGkAtoms::ko
},
76 {"Latn", nsGkAtoms::x_western
},
77 {"Mlym", nsGkAtoms::x_mlym
},
78 {"Orya", nsGkAtoms::x_orya
},
79 {"Sinh", nsGkAtoms::x_sinh
},
80 {"Taml", nsGkAtoms::x_tamil
},
81 {"Telu", nsGkAtoms::x_telu
},
82 {"Thai", nsGkAtoms::th
},
83 {"Tibt", nsGkAtoms::x_tibt
}};
85 static StaticAutoPtr
<nsLanguageAtomService
> gLangAtomService
;
88 nsLanguageAtomService
* nsLanguageAtomService::GetService() {
89 if (!gLangAtomService
) {
90 gLangAtomService
= new nsLanguageAtomService();
92 return gLangAtomService
.get();
96 void nsLanguageAtomService::Shutdown() { gLangAtomService
= nullptr; }
98 nsStaticAtom
* nsLanguageAtomService::LookupLanguage(
99 const nsACString
& aLanguage
) {
100 nsAutoCString
lowered(aLanguage
);
101 ToLowerCase(lowered
);
103 RefPtr
<nsAtom
> lang
= NS_Atomize(lowered
);
104 return GetLanguageGroup(lang
);
107 already_AddRefed
<nsAtom
> nsLanguageAtomService::LookupCharSet(
108 NotNull
<const Encoding
*> aEncoding
) {
109 nsAutoCString charset
;
110 aEncoding
->Name(charset
);
112 if (NS_FAILED(nsUConvPropertySearch::SearchPropertyValue(
113 encodingsGroups
, std::size(encodingsGroups
), charset
, group
))) {
114 return RefPtr
<nsAtom
>(nsGkAtoms::Unicode
).forget();
116 return NS_Atomize(group
);
119 nsAtom
* nsLanguageAtomService::GetLocaleLanguage() {
121 if (!mLocaleLanguage
) {
122 AutoTArray
<nsCString
, 10> regionalPrefsLocales
;
123 if (NS_SUCCEEDED(OSPreferences::GetInstance()->GetRegionalPrefsLocales(
124 regionalPrefsLocales
))) {
125 // use lowercase for all language atoms
126 ToLowerCase(regionalPrefsLocales
[0]);
127 mLocaleLanguage
= NS_Atomize(regionalPrefsLocales
[0]);
129 nsAutoCString locale
;
130 OSPreferences::GetInstance()->GetSystemLocale(locale
);
132 ToLowerCase(locale
); // use lowercase for all language atoms
133 mLocaleLanguage
= NS_Atomize(locale
);
138 return mLocaleLanguage
;
141 nsStaticAtom
* nsLanguageAtomService::GetLanguageGroup(nsAtom
* aLanguage
,
142 bool* aNeedsToCache
) {
144 if (nsStaticAtom
* atom
= mLangToGroup
.Get(aLanguage
)) {
147 *aNeedsToCache
= true;
151 return mLangToGroup
.LookupOrInsertWith(aLanguage
, [&] {
152 AssertIsMainThreadOrServoFontMetricsLocked();
153 return GetUncachedLanguageGroup(aLanguage
);
157 nsStaticAtom
* nsLanguageAtomService::GetUncachedLanguageGroup(
158 nsAtom
* aLanguage
) const {
159 nsAutoCString langStr
;
160 aLanguage
->ToUTF8String(langStr
);
161 ToLowerCase(langStr
);
163 if (langStr
[0] == 'x' && langStr
[1] == '-') {
164 // Internal x-* langGroup codes map to themselves (see bug 256257)
165 for (nsStaticAtom
* langGroup
: kLangGroups
) {
166 if (langGroup
== aLanguage
) {
169 if (aLanguage
->IsAsciiLowercase()) {
172 // Do the slow ascii-case-insensitive comparison just if needed.
173 nsDependentAtomString
string(langGroup
);
174 if (string
.EqualsASCII(langStr
.get(), langStr
.Length())) {
179 // If the lang code can be parsed as BCP47, look up its (likely) script.
181 // https://bugzilla.mozilla.org/show_bug.cgi?id=1618034:
182 // First strip any private subtags that would cause Locale to reject the
183 // tag as non-wellformed.
184 nsACString::const_iterator start
, end
;
185 langStr
.BeginReading(start
);
186 langStr
.EndReading(end
);
187 if (FindInReadable("-x-"_ns
, start
, end
)) {
188 // The substring we want ends at the beginning of the "-x-" subtag.
189 langStr
.Truncate(start
.get() - langStr
.BeginReading());
193 auto result
= intl::LocaleParser::TryParse(langStr
, loc
);
194 if (!result
.isOk()) {
195 // Did the author (wrongly) use '_' instead of '-' to separate subtags?
196 // If so, fix it up and re-try parsing.
197 if (langStr
.Contains('_')) {
198 langStr
.ReplaceChar('_', '-');
200 // Throw away the partially parsed locale and re-start parsing.
202 result
= intl::LocaleParser::TryParse(langStr
, loc
);
205 if (result
.isOk() && loc
.Canonicalize().isOk()) {
206 // Fill in script subtag if not present.
207 if (loc
.Script().Missing()) {
208 if (loc
.AddLikelySubtags().isErr()) {
209 // Fall back to x-unicode if no match was found
210 return nsGkAtoms::Unicode
;
213 // Traditional Chinese has separate prefs for Hong Kong / Taiwan;
214 // check the region subtag.
215 if (loc
.Script().EqualTo("Hant")) {
216 if (loc
.Region().EqualTo("HK")) {
217 return nsGkAtoms::HongKongChinese
;
219 return nsGkAtoms::Taiwanese
;
221 // Search list of known script subtags that map to langGroup codes.
223 Span
<const char> scriptAsSpan
= loc
.Script().Span();
224 nsDependentCSubstring
script(scriptAsSpan
.data(), scriptAsSpan
.size());
226 kScriptLangGroup
, 0, std::size(kScriptLangGroup
),
227 [script
](const auto& entry
) -> int {
228 return Compare(script
, nsDependentCString(entry
.mTag
));
231 return kScriptLangGroup
[foundIndex
].mAtom
;
233 // Script subtag was not recognized (includes "Hani"); check the language
234 // subtag for CJK possibilities so that we'll prefer the appropriate font
235 // rather than falling back to the browser's hardcoded preference.
236 if (loc
.Language().EqualTo("zh")) {
237 if (loc
.Region().EqualTo("HK")) {
238 return nsGkAtoms::HongKongChinese
;
240 if (loc
.Region().EqualTo("TW")) {
241 return nsGkAtoms::Taiwanese
;
243 return nsGkAtoms::Chinese
;
245 if (loc
.Language().EqualTo("ja")) {
246 return nsGkAtoms::Japanese
;
248 if (loc
.Language().EqualTo("ko")) {
249 return nsGkAtoms::ko
;
254 // Fall back to x-unicode if no match was found
255 return nsGkAtoms::Unicode
;