Move parseFontFaceDescriptor to CSSPropertyParser.cpp
[chromium-blink-merge.git] / third_party / WebKit / Source / platform / text / LocaleToScriptMapping.cpp
blob48f44a24b46f15ae9681ff630edfddb21b64a7ae
1 /*
2 * Copyright (C) 2011 Google Inc. All rights reserved.
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions are
6 * met:
8 * * Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * * Redistributions in binary form must reproduce the above
11 * copyright notice, this list of conditions and the following disclaimer
12 * in the documentation and/or other materials provided with the
13 * distribution.
14 * * Neither the name of Google Inc. nor the names of its
15 * contributors may be used to endorse or promote products derived from
16 * this software without specific prior written permission.
18 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31 #include "config.h"
32 #include "platform/text/LocaleToScriptMapping.h"
34 #include "wtf/HashMap.h"
35 #include "wtf/HashSet.h"
36 #include "wtf/text/StringHash.h"
38 namespace blink {
40 UScriptCode scriptNameToCode(const String& scriptName)
42 struct ScriptNameCode {
43 const char* name;
44 UScriptCode code;
47 // This generally maps an ISO 15924 script code to its UScriptCode, but certain families of script codes are
48 // treated as a single script for assigning a per-script font in Settings. For example, "hira" is mapped to
49 // USCRIPT_KATAKANA_OR_HIRAGANA instead of USCRIPT_HIRAGANA, since we want all Japanese scripts to be rendered
50 // using the same font setting.
51 static const ScriptNameCode scriptNameCodeList[] = {
52 { "zyyy", USCRIPT_COMMON },
53 { "qaai", USCRIPT_INHERITED },
54 { "arab", USCRIPT_ARABIC },
55 { "armn", USCRIPT_ARMENIAN },
56 { "beng", USCRIPT_BENGALI },
57 { "bopo", USCRIPT_BOPOMOFO },
58 { "cher", USCRIPT_CHEROKEE },
59 { "copt", USCRIPT_COPTIC },
60 { "cyrl", USCRIPT_CYRILLIC },
61 { "dsrt", USCRIPT_DESERET },
62 { "deva", USCRIPT_DEVANAGARI },
63 { "ethi", USCRIPT_ETHIOPIC },
64 { "geor", USCRIPT_GEORGIAN },
65 { "goth", USCRIPT_GOTHIC },
66 { "grek", USCRIPT_GREEK },
67 { "gujr", USCRIPT_GUJARATI },
68 { "guru", USCRIPT_GURMUKHI },
69 { "hani", USCRIPT_HAN },
70 { "hang", USCRIPT_HANGUL },
71 { "hebr", USCRIPT_HEBREW },
72 { "hira", USCRIPT_KATAKANA_OR_HIRAGANA },
73 { "knda", USCRIPT_KANNADA },
74 { "kana", USCRIPT_KATAKANA_OR_HIRAGANA },
75 { "khmr", USCRIPT_KHMER },
76 { "laoo", USCRIPT_LAO },
77 { "latn", USCRIPT_LATIN },
78 { "mlym", USCRIPT_MALAYALAM },
79 { "mong", USCRIPT_MONGOLIAN },
80 { "mymr", USCRIPT_MYANMAR },
81 { "ogam", USCRIPT_OGHAM },
82 { "ital", USCRIPT_OLD_ITALIC },
83 { "orya", USCRIPT_ORIYA },
84 { "runr", USCRIPT_RUNIC },
85 { "sinh", USCRIPT_SINHALA },
86 { "syrc", USCRIPT_SYRIAC },
87 { "taml", USCRIPT_TAMIL },
88 { "telu", USCRIPT_TELUGU },
89 { "thaa", USCRIPT_THAANA },
90 { "thai", USCRIPT_THAI },
91 { "tibt", USCRIPT_TIBETAN },
92 { "cans", USCRIPT_CANADIAN_ABORIGINAL },
93 { "yiii", USCRIPT_YI },
94 { "tglg", USCRIPT_TAGALOG },
95 { "hano", USCRIPT_HANUNOO },
96 { "buhd", USCRIPT_BUHID },
97 { "tagb", USCRIPT_TAGBANWA },
98 { "brai", USCRIPT_BRAILLE },
99 { "cprt", USCRIPT_CYPRIOT },
100 { "limb", USCRIPT_LIMBU },
101 { "linb", USCRIPT_LINEAR_B },
102 { "osma", USCRIPT_OSMANYA },
103 { "shaw", USCRIPT_SHAVIAN },
104 { "tale", USCRIPT_TAI_LE },
105 { "ugar", USCRIPT_UGARITIC },
106 { "hrkt", USCRIPT_KATAKANA_OR_HIRAGANA },
107 { "bugi", USCRIPT_BUGINESE },
108 { "glag", USCRIPT_GLAGOLITIC },
109 { "khar", USCRIPT_KHAROSHTHI },
110 { "sylo", USCRIPT_SYLOTI_NAGRI },
111 { "talu", USCRIPT_NEW_TAI_LUE },
112 { "tfng", USCRIPT_TIFINAGH },
113 { "xpeo", USCRIPT_OLD_PERSIAN },
114 { "bali", USCRIPT_BALINESE },
115 { "batk", USCRIPT_BATAK },
116 { "blis", USCRIPT_BLISSYMBOLS },
117 { "brah", USCRIPT_BRAHMI },
118 { "cham", USCRIPT_CHAM },
119 { "cirt", USCRIPT_CIRTH },
120 { "cyrs", USCRIPT_OLD_CHURCH_SLAVONIC_CYRILLIC },
121 { "egyd", USCRIPT_DEMOTIC_EGYPTIAN },
122 { "egyh", USCRIPT_HIERATIC_EGYPTIAN },
123 { "egyp", USCRIPT_EGYPTIAN_HIEROGLYPHS },
124 { "geok", USCRIPT_KHUTSURI },
125 { "hans", USCRIPT_SIMPLIFIED_HAN },
126 { "hant", USCRIPT_TRADITIONAL_HAN },
127 { "hmng", USCRIPT_PAHAWH_HMONG },
128 { "hung", USCRIPT_OLD_HUNGARIAN },
129 { "inds", USCRIPT_HARAPPAN_INDUS },
130 { "java", USCRIPT_JAVANESE },
131 { "kali", USCRIPT_KAYAH_LI },
132 { "latf", USCRIPT_LATIN_FRAKTUR },
133 { "latg", USCRIPT_LATIN_GAELIC },
134 { "lepc", USCRIPT_LEPCHA },
135 { "lina", USCRIPT_LINEAR_A },
136 { "mand", USCRIPT_MANDAEAN },
137 { "maya", USCRIPT_MAYAN_HIEROGLYPHS },
138 { "mero", USCRIPT_MEROITIC },
139 { "nkoo", USCRIPT_NKO },
140 { "orkh", USCRIPT_ORKHON },
141 { "perm", USCRIPT_OLD_PERMIC },
142 { "phag", USCRIPT_PHAGS_PA },
143 { "phnx", USCRIPT_PHOENICIAN },
144 { "plrd", USCRIPT_PHONETIC_POLLARD },
145 { "roro", USCRIPT_RONGORONGO },
146 { "sara", USCRIPT_SARATI },
147 { "syre", USCRIPT_ESTRANGELO_SYRIAC },
148 { "syrj", USCRIPT_WESTERN_SYRIAC },
149 { "syrn", USCRIPT_EASTERN_SYRIAC },
150 { "teng", USCRIPT_TENGWAR },
151 { "vaii", USCRIPT_VAI },
152 { "visp", USCRIPT_VISIBLE_SPEECH },
153 { "xsux", USCRIPT_CUNEIFORM },
154 { "jpan", USCRIPT_KATAKANA_OR_HIRAGANA },
155 { "kore", USCRIPT_HANGUL },
156 { "zxxx", USCRIPT_UNWRITTEN_LANGUAGES },
157 { "zzzz", USCRIPT_UNKNOWN }
160 typedef HashMap<String, UScriptCode> ScriptNameCodeMap;
161 DEFINE_STATIC_LOCAL(ScriptNameCodeMap, scriptNameCodeMap, ());
162 if (scriptNameCodeMap.isEmpty()) {
163 for (size_t i = 0; i < sizeof(scriptNameCodeList) / sizeof(scriptNameCodeList[0]); ++i)
164 scriptNameCodeMap.set(scriptNameCodeList[i].name, scriptNameCodeList[i].code);
167 HashMap<String, UScriptCode>::iterator it = scriptNameCodeMap.find(scriptName.lower());
168 if (it != scriptNameCodeMap.end())
169 return it->value;
170 return USCRIPT_INVALID_CODE;
173 UScriptCode localeToScriptCodeForFontSelection(const String& locale)
175 struct LocaleScript {
176 const char* locale;
177 UScriptCode script;
180 static const LocaleScript localeScriptList[] = {
181 { "aa", USCRIPT_LATIN },
182 { "ab", USCRIPT_CYRILLIC },
183 { "ady", USCRIPT_CYRILLIC },
184 { "aeb", USCRIPT_ARABIC },
185 { "af", USCRIPT_LATIN },
186 { "ak", USCRIPT_LATIN },
187 { "am", USCRIPT_ETHIOPIC },
188 { "ar", USCRIPT_ARABIC },
189 { "arq", USCRIPT_ARABIC },
190 { "ary", USCRIPT_ARABIC },
191 { "arz", USCRIPT_ARABIC },
192 { "as", USCRIPT_BENGALI },
193 { "ast", USCRIPT_LATIN },
194 { "av", USCRIPT_CYRILLIC },
195 { "ay", USCRIPT_LATIN },
196 { "az", USCRIPT_LATIN },
197 { "azb", USCRIPT_ARABIC },
198 { "ba", USCRIPT_CYRILLIC },
199 { "bal", USCRIPT_ARABIC },
200 { "be", USCRIPT_CYRILLIC },
201 { "bej", USCRIPT_ARABIC },
202 { "bg", USCRIPT_CYRILLIC },
203 { "bi", USCRIPT_LATIN },
204 { "bn", USCRIPT_BENGALI },
205 { "bo", USCRIPT_TIBETAN },
206 { "bqi", USCRIPT_ARABIC },
207 { "brh", USCRIPT_ARABIC },
208 { "bs", USCRIPT_LATIN },
209 { "ca", USCRIPT_LATIN },
210 { "ce", USCRIPT_CYRILLIC },
211 { "ceb", USCRIPT_LATIN },
212 { "ch", USCRIPT_LATIN },
213 { "chk", USCRIPT_LATIN },
214 { "cja", USCRIPT_ARABIC },
215 { "cjm", USCRIPT_ARABIC },
216 { "ckb", USCRIPT_ARABIC },
217 { "cs", USCRIPT_LATIN },
218 { "cy", USCRIPT_LATIN },
219 { "da", USCRIPT_LATIN },
220 { "dcc", USCRIPT_ARABIC },
221 { "de", USCRIPT_LATIN },
222 { "doi", USCRIPT_ARABIC },
223 { "dv", USCRIPT_THAANA },
224 { "dyo", USCRIPT_ARABIC },
225 { "dz", USCRIPT_TIBETAN },
226 { "ee", USCRIPT_LATIN },
227 { "efi", USCRIPT_LATIN },
228 { "el", USCRIPT_GREEK },
229 { "en", USCRIPT_LATIN },
230 { "es", USCRIPT_LATIN },
231 { "et", USCRIPT_LATIN },
232 { "eu", USCRIPT_LATIN },
233 { "fa", USCRIPT_ARABIC },
234 { "fi", USCRIPT_LATIN },
235 { "fil", USCRIPT_LATIN },
236 { "fj", USCRIPT_LATIN },
237 { "fo", USCRIPT_LATIN },
238 { "fr", USCRIPT_LATIN },
239 { "fur", USCRIPT_LATIN },
240 { "fy", USCRIPT_LATIN },
241 { "ga", USCRIPT_LATIN },
242 { "gaa", USCRIPT_LATIN },
243 { "gba", USCRIPT_ARABIC },
244 { "gbz", USCRIPT_ARABIC },
245 { "gd", USCRIPT_LATIN },
246 { "gil", USCRIPT_LATIN },
247 { "gl", USCRIPT_LATIN },
248 { "gjk", USCRIPT_ARABIC },
249 { "gju", USCRIPT_ARABIC },
250 { "glk", USCRIPT_ARABIC },
251 { "gn", USCRIPT_LATIN },
252 { "gsw", USCRIPT_LATIN },
253 { "gu", USCRIPT_GUJARATI },
254 { "ha", USCRIPT_LATIN },
255 { "haw", USCRIPT_LATIN },
256 { "haz", USCRIPT_ARABIC },
257 { "he", USCRIPT_HEBREW },
258 { "hi", USCRIPT_DEVANAGARI },
259 { "hil", USCRIPT_LATIN },
260 { "hnd", USCRIPT_ARABIC },
261 { "hno", USCRIPT_ARABIC },
262 { "ho", USCRIPT_LATIN },
263 { "hr", USCRIPT_LATIN },
264 { "ht", USCRIPT_LATIN },
265 { "hu", USCRIPT_LATIN },
266 { "hy", USCRIPT_ARMENIAN },
267 { "id", USCRIPT_LATIN },
268 { "ig", USCRIPT_LATIN },
269 { "ii", USCRIPT_YI },
270 { "ilo", USCRIPT_LATIN },
271 { "inh", USCRIPT_CYRILLIC },
272 { "is", USCRIPT_LATIN },
273 { "it", USCRIPT_LATIN },
274 { "iu", USCRIPT_CANADIAN_ABORIGINAL },
275 { "ja", USCRIPT_KATAKANA_OR_HIRAGANA },
276 { "jv", USCRIPT_LATIN },
277 { "ka", USCRIPT_GEORGIAN },
278 { "kaj", USCRIPT_LATIN },
279 { "kam", USCRIPT_LATIN },
280 { "kbd", USCRIPT_CYRILLIC },
281 { "kha", USCRIPT_LATIN },
282 { "khw", USCRIPT_ARABIC },
283 { "kk", USCRIPT_CYRILLIC },
284 { "kl", USCRIPT_LATIN },
285 { "km", USCRIPT_KHMER },
286 { "kn", USCRIPT_KANNADA },
287 { "ko", USCRIPT_HANGUL },
288 { "kok", USCRIPT_DEVANAGARI },
289 { "kos", USCRIPT_LATIN },
290 { "kpe", USCRIPT_LATIN },
291 { "krc", USCRIPT_CYRILLIC },
292 { "ks", USCRIPT_ARABIC },
293 { "ku", USCRIPT_ARABIC },
294 { "kum", USCRIPT_CYRILLIC },
295 { "kvx", USCRIPT_ARABIC },
296 { "kxp", USCRIPT_ARABIC },
297 { "ky", USCRIPT_CYRILLIC },
298 { "la", USCRIPT_LATIN },
299 { "lah", USCRIPT_ARABIC },
300 { "lb", USCRIPT_LATIN },
301 { "lez", USCRIPT_CYRILLIC },
302 { "lki", USCRIPT_ARABIC },
303 { "ln", USCRIPT_LATIN },
304 { "lo", USCRIPT_LAO },
305 { "lrc", USCRIPT_ARABIC },
306 { "lt", USCRIPT_LATIN },
307 { "luz", USCRIPT_ARABIC },
308 { "lv", USCRIPT_LATIN },
309 { "mai", USCRIPT_DEVANAGARI },
310 { "mdf", USCRIPT_CYRILLIC },
311 { "mfa", USCRIPT_ARABIC },
312 { "mg", USCRIPT_LATIN },
313 { "mh", USCRIPT_LATIN },
314 { "mi", USCRIPT_LATIN },
315 { "mk", USCRIPT_CYRILLIC },
316 { "ml", USCRIPT_MALAYALAM },
317 { "mn", USCRIPT_CYRILLIC },
318 { "mr", USCRIPT_DEVANAGARI },
319 { "ms", USCRIPT_LATIN },
320 { "mt", USCRIPT_LATIN },
321 { "mvy", USCRIPT_ARABIC },
322 { "my", USCRIPT_MYANMAR },
323 { "myv", USCRIPT_CYRILLIC },
324 { "mzn", USCRIPT_ARABIC },
325 { "na", USCRIPT_LATIN },
326 { "nb", USCRIPT_LATIN },
327 { "ne", USCRIPT_DEVANAGARI },
328 { "niu", USCRIPT_LATIN },
329 { "nl", USCRIPT_LATIN },
330 { "nn", USCRIPT_LATIN },
331 { "nr", USCRIPT_LATIN },
332 { "nso", USCRIPT_LATIN },
333 { "ny", USCRIPT_LATIN },
334 { "oc", USCRIPT_LATIN },
335 { "om", USCRIPT_LATIN },
336 { "or", USCRIPT_ORIYA },
337 { "os", USCRIPT_CYRILLIC },
338 { "pa", USCRIPT_GURMUKHI },
339 { "pag", USCRIPT_LATIN },
340 { "pap", USCRIPT_LATIN },
341 { "pau", USCRIPT_LATIN },
342 { "pl", USCRIPT_LATIN },
343 { "pon", USCRIPT_LATIN },
344 { "prd", USCRIPT_ARABIC },
345 { "prs", USCRIPT_ARABIC },
346 { "ps", USCRIPT_ARABIC },
347 { "pt", USCRIPT_LATIN },
348 { "qu", USCRIPT_LATIN },
349 { "rm", USCRIPT_LATIN },
350 { "rmt", USCRIPT_ARABIC },
351 { "rn", USCRIPT_LATIN },
352 { "ro", USCRIPT_LATIN },
353 { "ru", USCRIPT_CYRILLIC },
354 { "rw", USCRIPT_LATIN },
355 { "sa", USCRIPT_DEVANAGARI },
356 { "sah", USCRIPT_CYRILLIC },
357 { "sat", USCRIPT_LATIN },
358 { "sd", USCRIPT_ARABIC },
359 { "sdh", USCRIPT_ARABIC },
360 { "se", USCRIPT_LATIN },
361 { "sg", USCRIPT_LATIN },
362 { "shi", USCRIPT_ARABIC },
363 { "si", USCRIPT_SINHALA },
364 { "sid", USCRIPT_LATIN },
365 { "sk", USCRIPT_LATIN },
366 { "skr", USCRIPT_ARABIC },
367 { "sl", USCRIPT_LATIN },
368 { "sm", USCRIPT_LATIN },
369 { "so", USCRIPT_LATIN },
370 { "sq", USCRIPT_LATIN },
371 { "sr", USCRIPT_CYRILLIC },
372 { "ss", USCRIPT_LATIN },
373 { "st", USCRIPT_LATIN },
374 { "su", USCRIPT_LATIN },
375 { "sus", USCRIPT_ARABIC },
376 { "sv", USCRIPT_LATIN },
377 { "sw", USCRIPT_LATIN },
378 { "swb", USCRIPT_ARABIC },
379 { "syr", USCRIPT_ARABIC },
380 { "ta", USCRIPT_TAMIL },
381 { "te", USCRIPT_TELUGU },
382 { "tet", USCRIPT_LATIN },
383 { "tg", USCRIPT_CYRILLIC },
384 { "th", USCRIPT_THAI },
385 { "ti", USCRIPT_ETHIOPIC },
386 { "tig", USCRIPT_ETHIOPIC },
387 { "tk", USCRIPT_LATIN },
388 { "tkl", USCRIPT_LATIN },
389 { "tl", USCRIPT_LATIN },
390 { "tn", USCRIPT_LATIN },
391 { "to", USCRIPT_LATIN },
392 { "tpi", USCRIPT_LATIN },
393 { "tr", USCRIPT_LATIN },
394 { "trv", USCRIPT_LATIN },
395 { "ts", USCRIPT_LATIN },
396 { "tt", USCRIPT_CYRILLIC },
397 { "ttt", USCRIPT_ARABIC },
398 { "tvl", USCRIPT_LATIN },
399 { "tw", USCRIPT_LATIN },
400 { "ty", USCRIPT_LATIN },
401 { "tyv", USCRIPT_CYRILLIC },
402 { "udm", USCRIPT_CYRILLIC },
403 { "ug", USCRIPT_ARABIC },
404 { "uk", USCRIPT_CYRILLIC },
405 { "und", USCRIPT_LATIN },
406 { "ur", USCRIPT_ARABIC },
407 { "uz", USCRIPT_CYRILLIC },
408 { "ve", USCRIPT_LATIN },
409 { "vi", USCRIPT_LATIN },
410 { "wal", USCRIPT_ETHIOPIC },
411 { "war", USCRIPT_LATIN },
412 { "wo", USCRIPT_LATIN },
413 { "xh", USCRIPT_LATIN },
414 { "yap", USCRIPT_LATIN },
415 { "yo", USCRIPT_LATIN },
416 { "za", USCRIPT_LATIN },
417 { "zdj", USCRIPT_ARABIC },
418 { "zh", USCRIPT_SIMPLIFIED_HAN },
419 { "zh_hk", USCRIPT_TRADITIONAL_HAN },
420 { "zh_tw", USCRIPT_TRADITIONAL_HAN },
421 { "zu", USCRIPT_LATIN }
424 typedef HashMap<String, UScriptCode> LocaleScriptMap;
425 DEFINE_STATIC_LOCAL(LocaleScriptMap, localeScriptMap, ());
426 if (localeScriptMap.isEmpty()) {
427 for (size_t i = 0; i < sizeof(localeScriptList) / sizeof(localeScriptList[0]); ++i)
428 localeScriptMap.set(localeScriptList[i].locale, localeScriptList[i].script);
431 String canonicalLocale = locale.lower().replace('-', '_');
432 while (!canonicalLocale.isEmpty()) {
433 HashMap<String, UScriptCode>::iterator it = localeScriptMap.find(canonicalLocale);
434 if (it != localeScriptMap.end())
435 return it->value;
436 size_t pos = canonicalLocale.reverseFind('_');
437 if (pos == kNotFound)
438 break;
439 UScriptCode code = scriptNameToCode(canonicalLocale.substring(pos + 1));
440 if (code != USCRIPT_INVALID_CODE && code != USCRIPT_UNKNOWN)
441 return code;
442 canonicalLocale = canonicalLocale.substring(0, pos);
444 return USCRIPT_COMMON;
447 } // namespace blink