2 * Copyright (C) 2011 Google Inc. All rights reserved.
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions are
8 * * Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * * Redistributions in binary form must reproduce the above
11 * copyright notice, this list of conditions and the following disclaimer
12 * in the documentation and/or other materials provided with the
14 * * Neither the name of Google Inc. nor the names of its
15 * contributors may be used to endorse or promote products derived from
16 * this software without specific prior written permission.
18 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32 #include "platform/text/LocaleToScriptMapping.h"
34 #include "wtf/HashMap.h"
35 #include "wtf/HashSet.h"
36 #include "wtf/text/StringHash.h"
40 UScriptCode
scriptNameToCode(const String
& scriptName
)
42 struct ScriptNameCode
{
47 // This generally maps an ISO 15924 script code to its UScriptCode, but certain families of script codes are
48 // treated as a single script for assigning a per-script font in Settings. For example, "hira" is mapped to
49 // USCRIPT_KATAKANA_OR_HIRAGANA instead of USCRIPT_HIRAGANA, since we want all Japanese scripts to be rendered
50 // using the same font setting.
51 static const ScriptNameCode scriptNameCodeList
[] = {
52 { "zyyy", USCRIPT_COMMON
},
53 { "qaai", USCRIPT_INHERITED
},
54 { "arab", USCRIPT_ARABIC
},
55 { "armn", USCRIPT_ARMENIAN
},
56 { "beng", USCRIPT_BENGALI
},
57 { "bopo", USCRIPT_BOPOMOFO
},
58 { "cher", USCRIPT_CHEROKEE
},
59 { "copt", USCRIPT_COPTIC
},
60 { "cyrl", USCRIPT_CYRILLIC
},
61 { "dsrt", USCRIPT_DESERET
},
62 { "deva", USCRIPT_DEVANAGARI
},
63 { "ethi", USCRIPT_ETHIOPIC
},
64 { "geor", USCRIPT_GEORGIAN
},
65 { "goth", USCRIPT_GOTHIC
},
66 { "grek", USCRIPT_GREEK
},
67 { "gujr", USCRIPT_GUJARATI
},
68 { "guru", USCRIPT_GURMUKHI
},
69 { "hani", USCRIPT_HAN
},
70 { "hang", USCRIPT_HANGUL
},
71 { "hebr", USCRIPT_HEBREW
},
72 { "hira", USCRIPT_KATAKANA_OR_HIRAGANA
},
73 { "knda", USCRIPT_KANNADA
},
74 { "kana", USCRIPT_KATAKANA_OR_HIRAGANA
},
75 { "khmr", USCRIPT_KHMER
},
76 { "laoo", USCRIPT_LAO
},
77 { "latn", USCRIPT_LATIN
},
78 { "mlym", USCRIPT_MALAYALAM
},
79 { "mong", USCRIPT_MONGOLIAN
},
80 { "mymr", USCRIPT_MYANMAR
},
81 { "ogam", USCRIPT_OGHAM
},
82 { "ital", USCRIPT_OLD_ITALIC
},
83 { "orya", USCRIPT_ORIYA
},
84 { "runr", USCRIPT_RUNIC
},
85 { "sinh", USCRIPT_SINHALA
},
86 { "syrc", USCRIPT_SYRIAC
},
87 { "taml", USCRIPT_TAMIL
},
88 { "telu", USCRIPT_TELUGU
},
89 { "thaa", USCRIPT_THAANA
},
90 { "thai", USCRIPT_THAI
},
91 { "tibt", USCRIPT_TIBETAN
},
92 { "cans", USCRIPT_CANADIAN_ABORIGINAL
},
93 { "yiii", USCRIPT_YI
},
94 { "tglg", USCRIPT_TAGALOG
},
95 { "hano", USCRIPT_HANUNOO
},
96 { "buhd", USCRIPT_BUHID
},
97 { "tagb", USCRIPT_TAGBANWA
},
98 { "brai", USCRIPT_BRAILLE
},
99 { "cprt", USCRIPT_CYPRIOT
},
100 { "limb", USCRIPT_LIMBU
},
101 { "linb", USCRIPT_LINEAR_B
},
102 { "osma", USCRIPT_OSMANYA
},
103 { "shaw", USCRIPT_SHAVIAN
},
104 { "tale", USCRIPT_TAI_LE
},
105 { "ugar", USCRIPT_UGARITIC
},
106 { "hrkt", USCRIPT_KATAKANA_OR_HIRAGANA
},
107 { "bugi", USCRIPT_BUGINESE
},
108 { "glag", USCRIPT_GLAGOLITIC
},
109 { "khar", USCRIPT_KHAROSHTHI
},
110 { "sylo", USCRIPT_SYLOTI_NAGRI
},
111 { "talu", USCRIPT_NEW_TAI_LUE
},
112 { "tfng", USCRIPT_TIFINAGH
},
113 { "xpeo", USCRIPT_OLD_PERSIAN
},
114 { "bali", USCRIPT_BALINESE
},
115 { "batk", USCRIPT_BATAK
},
116 { "blis", USCRIPT_BLISSYMBOLS
},
117 { "brah", USCRIPT_BRAHMI
},
118 { "cham", USCRIPT_CHAM
},
119 { "cirt", USCRIPT_CIRTH
},
120 { "cyrs", USCRIPT_OLD_CHURCH_SLAVONIC_CYRILLIC
},
121 { "egyd", USCRIPT_DEMOTIC_EGYPTIAN
},
122 { "egyh", USCRIPT_HIERATIC_EGYPTIAN
},
123 { "egyp", USCRIPT_EGYPTIAN_HIEROGLYPHS
},
124 { "geok", USCRIPT_KHUTSURI
},
125 { "hans", USCRIPT_SIMPLIFIED_HAN
},
126 { "hant", USCRIPT_TRADITIONAL_HAN
},
127 { "hmng", USCRIPT_PAHAWH_HMONG
},
128 { "hung", USCRIPT_OLD_HUNGARIAN
},
129 { "inds", USCRIPT_HARAPPAN_INDUS
},
130 { "java", USCRIPT_JAVANESE
},
131 { "kali", USCRIPT_KAYAH_LI
},
132 { "latf", USCRIPT_LATIN_FRAKTUR
},
133 { "latg", USCRIPT_LATIN_GAELIC
},
134 { "lepc", USCRIPT_LEPCHA
},
135 { "lina", USCRIPT_LINEAR_A
},
136 { "mand", USCRIPT_MANDAEAN
},
137 { "maya", USCRIPT_MAYAN_HIEROGLYPHS
},
138 { "mero", USCRIPT_MEROITIC
},
139 { "nkoo", USCRIPT_NKO
},
140 { "orkh", USCRIPT_ORKHON
},
141 { "perm", USCRIPT_OLD_PERMIC
},
142 { "phag", USCRIPT_PHAGS_PA
},
143 { "phnx", USCRIPT_PHOENICIAN
},
144 { "plrd", USCRIPT_PHONETIC_POLLARD
},
145 { "roro", USCRIPT_RONGORONGO
},
146 { "sara", USCRIPT_SARATI
},
147 { "syre", USCRIPT_ESTRANGELO_SYRIAC
},
148 { "syrj", USCRIPT_WESTERN_SYRIAC
},
149 { "syrn", USCRIPT_EASTERN_SYRIAC
},
150 { "teng", USCRIPT_TENGWAR
},
151 { "vaii", USCRIPT_VAI
},
152 { "visp", USCRIPT_VISIBLE_SPEECH
},
153 { "xsux", USCRIPT_CUNEIFORM
},
154 { "jpan", USCRIPT_KATAKANA_OR_HIRAGANA
},
155 { "kore", USCRIPT_HANGUL
},
156 { "zxxx", USCRIPT_UNWRITTEN_LANGUAGES
},
157 { "zzzz", USCRIPT_UNKNOWN
}
160 typedef HashMap
<String
, UScriptCode
> ScriptNameCodeMap
;
161 DEFINE_STATIC_LOCAL(ScriptNameCodeMap
, scriptNameCodeMap
, ());
162 if (scriptNameCodeMap
.isEmpty()) {
163 for (size_t i
= 0; i
< sizeof(scriptNameCodeList
) / sizeof(scriptNameCodeList
[0]); ++i
)
164 scriptNameCodeMap
.set(scriptNameCodeList
[i
].name
, scriptNameCodeList
[i
].code
);
167 HashMap
<String
, UScriptCode
>::iterator it
= scriptNameCodeMap
.find(scriptName
.lower());
168 if (it
!= scriptNameCodeMap
.end())
170 return USCRIPT_INVALID_CODE
;
173 UScriptCode
localeToScriptCodeForFontSelection(const String
& locale
)
175 struct LocaleScript
{
180 static const LocaleScript localeScriptList
[] = {
181 { "aa", USCRIPT_LATIN
},
182 { "ab", USCRIPT_CYRILLIC
},
183 { "ady", USCRIPT_CYRILLIC
},
184 { "aeb", USCRIPT_ARABIC
},
185 { "af", USCRIPT_LATIN
},
186 { "ak", USCRIPT_LATIN
},
187 { "am", USCRIPT_ETHIOPIC
},
188 { "ar", USCRIPT_ARABIC
},
189 { "arq", USCRIPT_ARABIC
},
190 { "ary", USCRIPT_ARABIC
},
191 { "arz", USCRIPT_ARABIC
},
192 { "as", USCRIPT_BENGALI
},
193 { "ast", USCRIPT_LATIN
},
194 { "av", USCRIPT_CYRILLIC
},
195 { "ay", USCRIPT_LATIN
},
196 { "az", USCRIPT_LATIN
},
197 { "azb", USCRIPT_ARABIC
},
198 { "ba", USCRIPT_CYRILLIC
},
199 { "bal", USCRIPT_ARABIC
},
200 { "be", USCRIPT_CYRILLIC
},
201 { "bej", USCRIPT_ARABIC
},
202 { "bg", USCRIPT_CYRILLIC
},
203 { "bi", USCRIPT_LATIN
},
204 { "bn", USCRIPT_BENGALI
},
205 { "bo", USCRIPT_TIBETAN
},
206 { "bqi", USCRIPT_ARABIC
},
207 { "brh", USCRIPT_ARABIC
},
208 { "bs", USCRIPT_LATIN
},
209 { "ca", USCRIPT_LATIN
},
210 { "ce", USCRIPT_CYRILLIC
},
211 { "ceb", USCRIPT_LATIN
},
212 { "ch", USCRIPT_LATIN
},
213 { "chk", USCRIPT_LATIN
},
214 { "cja", USCRIPT_ARABIC
},
215 { "cjm", USCRIPT_ARABIC
},
216 { "ckb", USCRIPT_ARABIC
},
217 { "cs", USCRIPT_LATIN
},
218 { "cy", USCRIPT_LATIN
},
219 { "da", USCRIPT_LATIN
},
220 { "dcc", USCRIPT_ARABIC
},
221 { "de", USCRIPT_LATIN
},
222 { "doi", USCRIPT_ARABIC
},
223 { "dv", USCRIPT_THAANA
},
224 { "dyo", USCRIPT_ARABIC
},
225 { "dz", USCRIPT_TIBETAN
},
226 { "ee", USCRIPT_LATIN
},
227 { "efi", USCRIPT_LATIN
},
228 { "el", USCRIPT_GREEK
},
229 { "en", USCRIPT_LATIN
},
230 { "es", USCRIPT_LATIN
},
231 { "et", USCRIPT_LATIN
},
232 { "eu", USCRIPT_LATIN
},
233 { "fa", USCRIPT_ARABIC
},
234 { "fi", USCRIPT_LATIN
},
235 { "fil", USCRIPT_LATIN
},
236 { "fj", USCRIPT_LATIN
},
237 { "fo", USCRIPT_LATIN
},
238 { "fr", USCRIPT_LATIN
},
239 { "fur", USCRIPT_LATIN
},
240 { "fy", USCRIPT_LATIN
},
241 { "ga", USCRIPT_LATIN
},
242 { "gaa", USCRIPT_LATIN
},
243 { "gba", USCRIPT_ARABIC
},
244 { "gbz", USCRIPT_ARABIC
},
245 { "gd", USCRIPT_LATIN
},
246 { "gil", USCRIPT_LATIN
},
247 { "gl", USCRIPT_LATIN
},
248 { "gjk", USCRIPT_ARABIC
},
249 { "gju", USCRIPT_ARABIC
},
250 { "glk", USCRIPT_ARABIC
},
251 { "gn", USCRIPT_LATIN
},
252 { "gsw", USCRIPT_LATIN
},
253 { "gu", USCRIPT_GUJARATI
},
254 { "ha", USCRIPT_LATIN
},
255 { "haw", USCRIPT_LATIN
},
256 { "haz", USCRIPT_ARABIC
},
257 { "he", USCRIPT_HEBREW
},
258 { "hi", USCRIPT_DEVANAGARI
},
259 { "hil", USCRIPT_LATIN
},
260 { "hnd", USCRIPT_ARABIC
},
261 { "hno", USCRIPT_ARABIC
},
262 { "ho", USCRIPT_LATIN
},
263 { "hr", USCRIPT_LATIN
},
264 { "ht", USCRIPT_LATIN
},
265 { "hu", USCRIPT_LATIN
},
266 { "hy", USCRIPT_ARMENIAN
},
267 { "id", USCRIPT_LATIN
},
268 { "ig", USCRIPT_LATIN
},
269 { "ii", USCRIPT_YI
},
270 { "ilo", USCRIPT_LATIN
},
271 { "inh", USCRIPT_CYRILLIC
},
272 { "is", USCRIPT_LATIN
},
273 { "it", USCRIPT_LATIN
},
274 { "iu", USCRIPT_CANADIAN_ABORIGINAL
},
275 { "ja", USCRIPT_KATAKANA_OR_HIRAGANA
},
276 { "jv", USCRIPT_LATIN
},
277 { "ka", USCRIPT_GEORGIAN
},
278 { "kaj", USCRIPT_LATIN
},
279 { "kam", USCRIPT_LATIN
},
280 { "kbd", USCRIPT_CYRILLIC
},
281 { "kha", USCRIPT_LATIN
},
282 { "khw", USCRIPT_ARABIC
},
283 { "kk", USCRIPT_CYRILLIC
},
284 { "kl", USCRIPT_LATIN
},
285 { "km", USCRIPT_KHMER
},
286 { "kn", USCRIPT_KANNADA
},
287 { "ko", USCRIPT_HANGUL
},
288 { "kok", USCRIPT_DEVANAGARI
},
289 { "kos", USCRIPT_LATIN
},
290 { "kpe", USCRIPT_LATIN
},
291 { "krc", USCRIPT_CYRILLIC
},
292 { "ks", USCRIPT_ARABIC
},
293 { "ku", USCRIPT_ARABIC
},
294 { "kum", USCRIPT_CYRILLIC
},
295 { "kvx", USCRIPT_ARABIC
},
296 { "kxp", USCRIPT_ARABIC
},
297 { "ky", USCRIPT_CYRILLIC
},
298 { "la", USCRIPT_LATIN
},
299 { "lah", USCRIPT_ARABIC
},
300 { "lb", USCRIPT_LATIN
},
301 { "lez", USCRIPT_CYRILLIC
},
302 { "lki", USCRIPT_ARABIC
},
303 { "ln", USCRIPT_LATIN
},
304 { "lo", USCRIPT_LAO
},
305 { "lrc", USCRIPT_ARABIC
},
306 { "lt", USCRIPT_LATIN
},
307 { "luz", USCRIPT_ARABIC
},
308 { "lv", USCRIPT_LATIN
},
309 { "mai", USCRIPT_DEVANAGARI
},
310 { "mdf", USCRIPT_CYRILLIC
},
311 { "mfa", USCRIPT_ARABIC
},
312 { "mg", USCRIPT_LATIN
},
313 { "mh", USCRIPT_LATIN
},
314 { "mi", USCRIPT_LATIN
},
315 { "mk", USCRIPT_CYRILLIC
},
316 { "ml", USCRIPT_MALAYALAM
},
317 { "mn", USCRIPT_CYRILLIC
},
318 { "mr", USCRIPT_DEVANAGARI
},
319 { "ms", USCRIPT_LATIN
},
320 { "mt", USCRIPT_LATIN
},
321 { "mvy", USCRIPT_ARABIC
},
322 { "my", USCRIPT_MYANMAR
},
323 { "myv", USCRIPT_CYRILLIC
},
324 { "mzn", USCRIPT_ARABIC
},
325 { "na", USCRIPT_LATIN
},
326 { "nb", USCRIPT_LATIN
},
327 { "ne", USCRIPT_DEVANAGARI
},
328 { "niu", USCRIPT_LATIN
},
329 { "nl", USCRIPT_LATIN
},
330 { "nn", USCRIPT_LATIN
},
331 { "nr", USCRIPT_LATIN
},
332 { "nso", USCRIPT_LATIN
},
333 { "ny", USCRIPT_LATIN
},
334 { "oc", USCRIPT_LATIN
},
335 { "om", USCRIPT_LATIN
},
336 { "or", USCRIPT_ORIYA
},
337 { "os", USCRIPT_CYRILLIC
},
338 { "pa", USCRIPT_GURMUKHI
},
339 { "pag", USCRIPT_LATIN
},
340 { "pap", USCRIPT_LATIN
},
341 { "pau", USCRIPT_LATIN
},
342 { "pl", USCRIPT_LATIN
},
343 { "pon", USCRIPT_LATIN
},
344 { "prd", USCRIPT_ARABIC
},
345 { "prs", USCRIPT_ARABIC
},
346 { "ps", USCRIPT_ARABIC
},
347 { "pt", USCRIPT_LATIN
},
348 { "qu", USCRIPT_LATIN
},
349 { "rm", USCRIPT_LATIN
},
350 { "rmt", USCRIPT_ARABIC
},
351 { "rn", USCRIPT_LATIN
},
352 { "ro", USCRIPT_LATIN
},
353 { "ru", USCRIPT_CYRILLIC
},
354 { "rw", USCRIPT_LATIN
},
355 { "sa", USCRIPT_DEVANAGARI
},
356 { "sah", USCRIPT_CYRILLIC
},
357 { "sat", USCRIPT_LATIN
},
358 { "sd", USCRIPT_ARABIC
},
359 { "sdh", USCRIPT_ARABIC
},
360 { "se", USCRIPT_LATIN
},
361 { "sg", USCRIPT_LATIN
},
362 { "shi", USCRIPT_ARABIC
},
363 { "si", USCRIPT_SINHALA
},
364 { "sid", USCRIPT_LATIN
},
365 { "sk", USCRIPT_LATIN
},
366 { "skr", USCRIPT_ARABIC
},
367 { "sl", USCRIPT_LATIN
},
368 { "sm", USCRIPT_LATIN
},
369 { "so", USCRIPT_LATIN
},
370 { "sq", USCRIPT_LATIN
},
371 { "sr", USCRIPT_CYRILLIC
},
372 { "ss", USCRIPT_LATIN
},
373 { "st", USCRIPT_LATIN
},
374 { "su", USCRIPT_LATIN
},
375 { "sus", USCRIPT_ARABIC
},
376 { "sv", USCRIPT_LATIN
},
377 { "sw", USCRIPT_LATIN
},
378 { "swb", USCRIPT_ARABIC
},
379 { "syr", USCRIPT_ARABIC
},
380 { "ta", USCRIPT_TAMIL
},
381 { "te", USCRIPT_TELUGU
},
382 { "tet", USCRIPT_LATIN
},
383 { "tg", USCRIPT_CYRILLIC
},
384 { "th", USCRIPT_THAI
},
385 { "ti", USCRIPT_ETHIOPIC
},
386 { "tig", USCRIPT_ETHIOPIC
},
387 { "tk", USCRIPT_LATIN
},
388 { "tkl", USCRIPT_LATIN
},
389 { "tl", USCRIPT_LATIN
},
390 { "tn", USCRIPT_LATIN
},
391 { "to", USCRIPT_LATIN
},
392 { "tpi", USCRIPT_LATIN
},
393 { "tr", USCRIPT_LATIN
},
394 { "trv", USCRIPT_LATIN
},
395 { "ts", USCRIPT_LATIN
},
396 { "tt", USCRIPT_CYRILLIC
},
397 { "ttt", USCRIPT_ARABIC
},
398 { "tvl", USCRIPT_LATIN
},
399 { "tw", USCRIPT_LATIN
},
400 { "ty", USCRIPT_LATIN
},
401 { "tyv", USCRIPT_CYRILLIC
},
402 { "udm", USCRIPT_CYRILLIC
},
403 { "ug", USCRIPT_ARABIC
},
404 { "uk", USCRIPT_CYRILLIC
},
405 { "und", USCRIPT_LATIN
},
406 { "ur", USCRIPT_ARABIC
},
407 { "uz", USCRIPT_CYRILLIC
},
408 { "ve", USCRIPT_LATIN
},
409 { "vi", USCRIPT_LATIN
},
410 { "wal", USCRIPT_ETHIOPIC
},
411 { "war", USCRIPT_LATIN
},
412 { "wo", USCRIPT_LATIN
},
413 { "xh", USCRIPT_LATIN
},
414 { "yap", USCRIPT_LATIN
},
415 { "yo", USCRIPT_LATIN
},
416 { "za", USCRIPT_LATIN
},
417 { "zdj", USCRIPT_ARABIC
},
418 { "zh", USCRIPT_SIMPLIFIED_HAN
},
419 { "zh_hk", USCRIPT_TRADITIONAL_HAN
},
420 { "zh_tw", USCRIPT_TRADITIONAL_HAN
},
421 { "zu", USCRIPT_LATIN
}
424 typedef HashMap
<String
, UScriptCode
> LocaleScriptMap
;
425 DEFINE_STATIC_LOCAL(LocaleScriptMap
, localeScriptMap
, ());
426 if (localeScriptMap
.isEmpty()) {
427 for (size_t i
= 0; i
< sizeof(localeScriptList
) / sizeof(localeScriptList
[0]); ++i
)
428 localeScriptMap
.set(localeScriptList
[i
].locale
, localeScriptList
[i
].script
);
431 String canonicalLocale
= locale
.lower().replace('-', '_');
432 while (!canonicalLocale
.isEmpty()) {
433 HashMap
<String
, UScriptCode
>::iterator it
= localeScriptMap
.find(canonicalLocale
);
434 if (it
!= localeScriptMap
.end())
436 size_t pos
= canonicalLocale
.reverseFind('_');
437 if (pos
== kNotFound
)
439 UScriptCode code
= scriptNameToCode(canonicalLocale
.substring(pos
+ 1));
440 if (code
!= USCRIPT_INVALID_CODE
&& code
!= USCRIPT_UNKNOWN
)
442 canonicalLocale
= canonicalLocale
.substring(0, pos
);
444 return USCRIPT_COMMON
;