1 // Copyright 2013 Google Inc. All Rights Reserved.
4 * @fileoverview This file defines the input tool, included IME and virtual
7 * @author wuyingbing@google.com (Yingbing Wu)
10 goog
.provide('i18n.input.lang.InputTool');
12 goog
.require('goog.array');
13 goog
.require('goog.object');
14 goog
.require('goog.string');
15 goog
.require('i18n.input.common.GlobalSettings');
16 goog
.require('i18n.input.lang.InputToolCode');
17 goog
.require('i18n.input.lang.InputToolType');
19 goog
.scope(function() {
20 var GlobalSettings
= i18n
.input
.common
.GlobalSettings
;
21 var InputToolCode
= i18n
.input
.lang
.InputToolCode
;
22 var InputToolType
= i18n
.input
.lang
.InputToolType
;
27 * The input tool class is used to define Input Tool. Don't call the method
28 * directly, use InputTool.get instead.
30 * @param {!InputToolCode} inputToolCode The input tool code
34 i18n
.input
.lang
.InputTool = function(inputToolCode
) {
36 * The unique code of input tools.
38 * @type {!InputToolCode}
40 this.code
= inputToolCode
;
43 * The input tools type value.
45 * @type {?InputToolType}
50 * The target language code.
54 this.languageCode
= 'en';
57 * The source language code.
61 this.sourceLanguageCode
= 'en';
64 * Keyboard layout code. Only valid if type is KBD.
69 // Parses input tool code.
70 this.parseInputToolCode_();
72 var InputTool
= i18n
.input
.lang
.InputTool
;
76 * The array of rtl keyboards' input tool codes.
78 * @type {!Array.<string>}
80 InputTool
.RtlKeyboards
= [
81 InputToolCode
.KEYBOARD_ARABIC
,
82 InputToolCode
.KEYBOARD_DARI
,
83 InputToolCode
.KEYBOARD_HEBREW
,
84 InputToolCode
.KEYBOARD_PASHTO
,
85 InputToolCode
.KEYBOARD_PERSIAN
,
86 InputToolCode
.KEYBOARD_SOUTHERN_UZBEK
,
87 InputToolCode
.KEYBOARD_UIGHUR
,
88 InputToolCode
.KEYBOARD_URDU
,
89 InputToolCode
.KEYBOARD_YIDDISH
];
93 * The array of rtl ime' input tool codes.
95 * @type {!Array.<string>}
98 InputToolCode
.INPUTMETHOD_TRANSLITERATION_ARABIC
,
99 InputToolCode
.INPUTMETHOD_TRANSLITERATION_HEBREW
,
100 InputToolCode
.INPUTMETHOD_TRANSLITERATION_PERSIAN
,
101 InputToolCode
.INPUTMETHOD_TRANSLITERATION_URDU
];
105 * The mapping from 3-letter language codes to 2-letter language codes.
107 * @type {!Object.<string, string>}
109 InputTool
.LanguageCodeThreeTwoMap
= goog
.object
.create(
135 // The new specification is "nb", but NACL uses "no".
152 * The special XKB id to language code mapping.
154 * @private {!Object.<string, string>}
156 InputTool
.XkbId2Language_
= {
157 // NACL treads "pt-BR", "pt-PT" the same with "pt".
158 'xkb:us:intl:por': 'pt',
165 * The input tool code and instance mapping.
167 * @type {!Object.<string, InputTool>}
170 InputTool
.instances_
= {};
174 * Gets an input tool by code.
176 * @param {!string} inputToolCode The input tool code value.
177 * @return {InputTool} The input tool.
179 InputTool
.get = function(inputToolCode
) {
180 if (!inputToolCode
) {
184 // The code isn't BCP47 pattern, transfers it from old pattern.
185 if (!goog
.object
.contains(InputToolCode
, inputToolCode
)) {
186 inputToolCode
= InputTool
.parseToBCP47_(inputToolCode
);
189 // Allow BCP47 code 'fa_t_k0_und' to 'fa-t-k0-und'.
190 inputToolCode
= inputToolCode
.replace(/_
/g
, '-');
192 // Adds '-und' to keep compatible with previous codes.
193 if (!goog
.object
.contains(InputToolCode
, inputToolCode
)) {
194 inputToolCode
= InputTool
.parseToBCP47_(
195 inputToolCode
+ '-und');
198 if (InputTool
.instances_
[inputToolCode
]) {
199 return InputTool
.instances_
[inputToolCode
];
202 // If the input tool code is valid.
203 if (goog
.object
.contains(InputToolCode
, inputToolCode
)) {
204 InputTool
.instances_
[inputToolCode
] =
206 /** @type {InputToolCode} */ (inputToolCode
));
207 return InputTool
.instances_
[inputToolCode
];
215 * Language codes whose BCP47 code has a rule like:
216 * Has 'und-latn', then adding 'phone' at last, otherwise, 'inscript' at last.
218 * @type {!Array.<string>}
221 InputTool
.PHONETIC_INSCRIPT_LANGS_
= [
222 'bn', 'gu', 'pa', 'kn', 'ml', 'or', 'sa', 'ta', 'te', 'ne'
227 * Special previous old code mapping to BCP47 code.
229 * @type {!Object.<string, string>}
233 InputTool
.BCP47_SPECIAL_
= {
234 'im_pinyin_zh_hans': InputToolCode
.INPUTMETHOD_PINYIN_CHINESE_SIMPLIFIED
,
235 'im_pinyin_zh_hant': InputToolCode
.INPUTMETHOD_PINYIN_CHINESE_TRADITIONAL
,
236 'im_t13n_ja': InputToolCode
.INPUTMETHOD_TRANSLITERATION_JAPANESE
,
237 'im_t13n_ja-Hira': InputToolCode
.INPUTMETHOD_TRANSLITERATION_HIRAGANA
,
238 'im_wubi_zh_hans': InputToolCode
.INPUTMETHOD_WUBI_CHINESE_SIMPLIFIED
,
239 'im_zhuyin_zh_hant': InputToolCode
.INPUTMETHOD_ZHUYIN_CHINESE_TRADITIONAL
,
240 'vkd_bg_phone': InputToolCode
.KEYBOARD_BULGARIAN_PHONETIC
,
241 'vkd_chr_phone': InputToolCode
.KEYBOARD_CHEROKEE_PHONETIC
,
242 'vkd_cs_qwertz': InputToolCode
.KEYBOARD_CZECH_QWERTZ
,
243 'vkd_deva_phone': InputToolCode
.KEYBOARD_DEVANAGARI_PHONETIC
,
244 'vkd_en_dvorak': InputToolCode
.KEYBOARD_ENGLISH_DVORAK
,
245 'vkd_es_es': InputToolCode
.KEYBOARD_SPANISH
,
246 'vkd_ethi': InputToolCode
.KEYBOARD_ETHIOPIC
,
247 'vkd_gu_phone': InputToolCode
.KEYBOARD_GUJARATI_PHONETIC
,
248 'vkd_guru_inscript': InputToolCode
.KEYBOARD_GURMUKHI_INSCRIPT
,
249 'vkd_guru_phone': InputToolCode
.KEYBOARD_GURMUKHI_PHONETIC
,
250 'vkd_hu_101': InputToolCode
.KEYBOARD_HUNGARIAN_101
,
251 'vkd_hy_east': InputToolCode
.KEYBOARD_ARMENIAN_EASTERN
,
252 'vkd_hy_west': InputToolCode
.KEYBOARD_ARMENIAN_WESTERN
,
253 'vkd_ka_qwerty': InputToolCode
.KEYBOARD_GEORGIAN_QWERTY
,
254 'vkd_ka_typewriter': InputToolCode
.KEYBOARD_GEORGIAN_TYPEWRITER
,
255 'vkd_ro_sr13392_primary': InputToolCode
.KEYBOARD_ROMANIAN_SR13392_PRIMARY
,
256 'vkd_ro_sr13392_secondary': InputToolCode
.KEYBOARD_ROMANIAN_SR13392_SECONDARY
,
257 'vkd_ru_phone': InputToolCode
.KEYBOARD_RUSSIAN_PHONETIC
,
258 'vkd_ru_phone_aatseel': InputToolCode
.KEYBOARD_RUSSIAN_PHONETIC_AATSEEL
,
259 'vkd_ru_phone_yazhert': InputToolCode
.KEYBOARD_RUSSIAN_PHONETIC_YAZHERT
,
260 'vkd_sk_qwerty': InputToolCode
.KEYBOARD_SLOVAK_QWERTY
,
261 'vkd_ta_itrans': InputToolCode
.KEYBOARD_TAMIL_ITRANS
,
262 'vkd_ta_tamil99': InputToolCode
.KEYBOARD_TAMIL_99
,
263 'vkd_ta_typewriter': InputToolCode
.KEYBOARD_TAMIL_TYPEWRITER
,
264 'vkd_th_pattajoti': InputToolCode
.KEYBOARD_THAI_PATTAJOTI
,
265 'vkd_th_tis': InputToolCode
.KEYBOARD_THAI_TIS
,
266 'vkd_tr_f': InputToolCode
.KEYBOARD_TURKISH_F
,
267 'vkd_tr_q': InputToolCode
.KEYBOARD_TURKISH_Q
,
268 'vkd_uk_101': InputToolCode
.KEYBOARD_UKRAINIAN_101
,
269 'vkd_us_intl': InputToolCode
.KEYBOARD_FRENCH_INTL
,
270 'vkd_uz_cyrl_phone': InputToolCode
.KEYBOARD_UZBEK_CYRILLIC_PHONETIC
,
271 'vkd_uz_cyrl_type': InputToolCode
.KEYBOARD_UZBEK_CYRILLIC_TYPEWRITTER
,
272 'vkd_vi_tcvn': InputToolCode
.KEYBOARD_VIETNAMESE_TCVN
,
273 'vkd_vi_telex': InputToolCode
.KEYBOARD_VIETNAMESE_TELEX
278 * BCP47 code maps to previous code.
280 * @type {!Object.<string, string>}
283 InputTool
.BCP47_SPECIAL_REVERSE_
= goog
.object
.transpose(
284 InputTool
.BCP47_SPECIAL_
);
288 * Special keyboard layout code mapping. Multiple Input Tools map to the same
291 * key: Input Tool code.
292 * value: layout code.
294 * @private {!Object.<string, string>}
296 InputTool
.SpecialLayoutCodes_
= goog
.object
.create(
297 InputToolCode
.KEYBOARD_DUTCH_INTL
, 'us_intl',
298 InputToolCode
.KEYBOARD_FRENCH_INTL
, 'us_intl',
299 InputToolCode
.KEYBOARD_GERMAN_INTL
, 'us_intl',
300 InputToolCode
.KEYBOARD_HAITIAN
, 'fr',
301 InputToolCode
.KEYBOARD_INDONESIAN
, 'latn_002',
302 InputToolCode
.KEYBOARD_IRISH
, 'latn_002',
303 InputToolCode
.KEYBOARD_ITALIAN_INTL
, 'us_intl',
304 InputToolCode
.KEYBOARD_JAVANESE
, 'latn_002',
305 InputToolCode
.KEYBOARD_MARATHI
, 'deva_phone',
306 InputToolCode
.KEYBOARD_MALAY
, 'latn_002',
307 InputToolCode
.KEYBOARD_PORTUGUESE_BRAZIL_INTL
, 'us_intl',
308 InputToolCode
.KEYBOARD_PORTUGUESE_PORTUGAL_INTL
, 'us_intl',
309 InputToolCode
.KEYBOARD_SPANISH_INTL
, 'us_intl',
310 InputToolCode
.KEYBOARD_SWAHILI
, 'latn_002',
311 InputToolCode
.KEYBOARD_TAGALOG
, 'latn_002',
312 InputToolCode
.KEYBOARD_TIGRINYA
, 'ethi',
313 InputToolCode
.KEYBOARD_WELSH
, 'latn_002');
317 * Parses previous old code to BCP 47 code.
319 * @param {string} itCode Previous old input tool code format.
320 * @return {string} BCP 47 code.
323 InputTool
.parseToBCP47_ = function(itCode
) {
324 if (InputTool
.BCP47_SPECIAL_
[itCode
]) {
325 return InputTool
.BCP47_SPECIAL_
[itCode
];
328 if (itCode
== 'vkd_iw') {
329 return InputToolCode
.KEYBOARD_HEBREW
;
332 if (itCode
== 'im_t13n_iw') {
333 return InputToolCode
.INPUTMETHOD_TRANSLITERATION_HEBREW
;
336 // Types 'legacy' to 'lagacy' by mistake, correct it.
337 // Can't put 'tr' + '-t-k0-lagacy' into BCP47_SPECIAL_ map, becasue we have
338 // to split 'tr-t-k0-lagacy' but JS grammar wasn't allow to
339 // use 'tr' + '-t-k0-lagacy' as key.
340 if (itCode
== 'tr' + '-t-k0-lagacy') {
341 return InputToolCode
.KEYBOARD_TURKISH_F
;
344 var parts
= itCode
.split('_');
346 if (goog
.string
.startsWith(itCode
, 'im_t13n')) {
347 // Example: 'im_t13n_hi'.
348 code
= parts
[2] + '-t-i0-und';
349 } else if (goog
.string
.startsWith(itCode
, 'vkd_')) {
350 // Special codes for keyboard.
351 if (parts
.length
== 2) {
353 code
= parts
[1] + '-t-k0-und';
355 if (goog
.array
.contains(
356 InputTool
.PHONETIC_INSCRIPT_LANGS_
, parts
[1])) {
357 if (parts
[2] == 'inscript') {
358 code
= parts
[1] + '-t-k0-und';
360 code
= parts
[1] + '-t-und-latn-k0-und';
363 code
= parts
[1] + '-t-k0-' + parts
[2];
364 if (!goog
.object
.contains(InputToolCode
, code
)) {
365 code
= parts
[1] + '-' + parts
[2] + '-t-k0-und';
370 return goog
.object
.contains(InputToolCode
, code
) ? code
: itCode
;
375 * Gets the input tools by parameters. Keep compatible with previous language
376 * code pair. Not support to get input tool by keyboard layout.
378 * @param {!InputToolType} type The input tool type.
379 * @param {!string} code It's the target language code if type is input method.
380 * @return {InputTool} The input tool.
382 InputTool
.getInputTool = function(type
, code
) {
383 // Makes compatible input tool code with previous language code version.
384 if (type
== InputToolType
.IME
) {
385 if (code
== 'zh' || code
== 'zh-Hans') {
386 return InputTool
.get(
387 InputToolCode
.INPUTMETHOD_PINYIN_CHINESE_SIMPLIFIED
);
388 } else if (code
== 'zh-Hant') {
389 return InputTool
.get(
390 InputToolCode
.INPUTMETHOD_ZHUYIN_CHINESE_TRADITIONAL
);
391 } else if (code
== 'ja') {
392 return InputTool
.get(
393 InputToolCode
.INPUTMETHOD_TRANSLITERATION_JAPANESE
);
395 return InputTool
.get(code
+ '-t-i0-und');
397 } else if (type
== InputToolType
.KBD
) {
398 return InputTool
.get('vkd_' + code
);
405 * Parses BCP47 codes to the virtual keyboard layout.
409 InputTool
.prototype.parseLayoutCode_ = function() {
410 if (InputTool
.SpecialLayoutCodes_
[this.code
]) {
411 this.layoutCode
= InputTool
.SpecialLayoutCodes_
[this.code
];
412 } else if (InputTool
.BCP47_SPECIAL_REVERSE_
[this.code
]) {
413 // Removes prefix 'vkd_';
414 this.layoutCode
= InputTool
.
415 BCP47_SPECIAL_REVERSE_
[this.code
].slice(4);
417 var parts
= this.code
.split('-t-');
418 var countryCode
= parts
[0];
419 var inputToolType
= parts
[1];
420 countryCode
= countryCode
.replace(/-/g
, '_');
421 if (countryCode
== 'en_us') {
425 if (goog
.array
.contains(
426 InputTool
.PHONETIC_INSCRIPT_LANGS_
, countryCode
) &&
427 (inputToolType
== 'und-latn-k0-und' || inputToolType
== 'k0-und')) {
428 // If it's virtual keyboard having the inscript/phonetic rule.
429 this.layoutCode
= countryCode
+
430 (inputToolType
== 'k0-und' ? '_inscript' : '_phone');
431 } else if (inputToolType
== 'k0-und') {
432 this.layoutCode
= countryCode
;
434 var matches
= inputToolType
.match(/k0-(.*)/);
436 this.layoutCode
= countryCode
+ '_' + matches
[1].replace(
445 * Parses the input tool code.
446 * TODO(wuyingbing): We will introduce new code pattern, and then write a new
451 InputTool
.prototype.parseInputToolCode_ = function() {
452 // Sets the input tool type.
453 if (this.code
.indexOf('-i0') >= 0) {
454 this.type
= InputToolType
.IME
;
455 if (goog
.string
.endsWith(this.code
, '-handwrit')) {
456 this.type
= InputToolType
.HWT
;
457 } else if (goog
.string
.endsWith(this.code
, '-voice')) {
458 this.type
= InputToolType
.VOICE
;
460 } else if (this.code
.indexOf('-k0') >= 0) {
461 this.type
= InputToolType
.KBD
;
462 } else if (goog
.string
.startsWith(this.code
, 'xkb')) {
463 this.type
= InputToolType
.XKB
;
466 // Sets target language code.
467 var codes
= this.code
.split(/-t|-i0|-k0|:/);
469 if (codes
[0] == 'yue-hant') {
470 codes
[0] = 'zh-Hant';
473 // Currently most of systems doesn't support 'yue-hant', so hack it to
475 case InputToolCode
.INPUTMETHOD_CANTONESE_TRADITIONAL
:
476 codes
[0] = 'zh-Hant';
478 case InputToolCode
.INPUTMETHOD_PINYIN_CHINESE_SIMPLIFIED
:
479 case InputToolCode
.INPUTMETHOD_WUBI_CHINESE_SIMPLIFIED
:
480 codes
[0] = 'zh-Hans';
483 if (this.type
== InputToolType
.XKB
) {
484 if (InputTool
.XkbId2Language_
[this.code
]) {
485 this.languageCode
= InputTool
.XkbId2Language_
[this.code
];
487 this.languageCode
= this.formatLanguageCode_(codes
[codes
.length
- 1]);
490 this.languageCode
= this.formatLanguageCode_(codes
[0]);
491 // Sets source language target.
493 this.sourceLanguageCode
= this.formatLanguageCode_(codes
[1]);
497 if (this.type
== InputToolType
.KBD
) {
498 this.parseLayoutCode_();
504 InputTool
.prototype.toString = function() {
510 * Gets the input tool's direction.
512 * @return {string} The direction string - 'rtl' or 'ltr'.
514 InputTool
.prototype.getDirection = function() {
515 return this.isRightToLeft() ? 'rtl' : 'ltr';
520 * Gets the input tool's direction.
522 * @return {boolean} Whether is rtl direction of the input tool.
524 InputTool
.prototype.isRightToLeft = function() {
525 return goog
.array
.contains(InputTool
.RtlIMEs
, this.code
) ||
526 goog
.array
.contains(InputTool
.RtlKeyboards
, this.code
);
531 * Gets whether has status bar.
533 * @return {boolean} Whether has status bar.
535 InputTool
.prototype.hasStatusBar = function() {
536 // Don't show status bar in moblie device.
537 if (!GlobalSettings
.mobile
&& this.type
== InputToolType
.IME
) {
538 return /^(zh|yue)/.test(this.code
);
545 * Format language to standard language code.
547 * @param {string} code The language code.
548 * @return {string} The standard language code.
551 InputTool
.prototype.formatLanguageCode_ = function(code
) {
552 // Hack 'und-ethi' to 'et'. The major population use 'ethi' script in
553 // Ethiopia country. So we set 'et' as language code.
554 if (code
== 'und-ethi') {
558 var parts
= code
.split('-');
560 if (parts
.length
== 2) {
561 if (parts
[1].length
== 2) {
562 retCode
= parts
[0] + '-' + parts
[1].toUpperCase();
564 retCode
= parts
[0] + '-' + parts
[1].charAt(0).toUpperCase() +
568 if (goog
.object
.containsKey(InputTool
.LanguageCodeThreeTwoMap
, parts
[0])) {
569 retCode
= InputTool
.LanguageCodeThreeTwoMap
[parts
[0]];
579 * Returns whether the input tool is transliteration or not.
581 * @return {boolean} .
583 InputTool
.prototype.isTransliteration = function() {
584 var reg
= new RegExp('^(am|ar|bn|el|gu|he|hi|kn|ml|mr|ne|or|fa|pa|ru|sa|' +
585 'sr|si|ta|te|ti|ur|uk|be|bg)');
586 return this.type
== InputToolType
.IME
&& reg
.test(this.code
);
591 * Returns whether the input tool is Latin suggestion or not.
593 * @return {boolean} .
595 InputTool
.prototype.isLatin = function() {
596 return this.type
== InputToolType
.IME
&&
597 /^(en|fr|de|it|es|nl|pt|tr|sv|da|fi|no)/.test(this.code
);