1 // Copyright 2013 The ChromeOS IME Authors. All Rights Reserved.
2 // limitations under the License.
3 // See the License for the specific language governing permissions and
4 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
5 // distributed under the License is distributed on an "AS-IS" BASIS,
6 // Unless required by applicable law or agreed to in writing, software
8 // http://www.apache.org/licenses/LICENSE-2.0
10 // You may obtain a copy of the License at
11 // you may not use this file except in compliance with the License.
12 // Licensed under the Apache License, Version 2.0 (the "License");
16 * @fileoverview Defines the parsed layout object which will do layout parsing
17 * and expose the keymappings and the transforms to Model.
20 goog
.provide('i18n.input.chrome.vk.ParsedLayout');
22 goog
.require('goog.object');
23 goog
.require('i18n.input.chrome.vk.KeyCode');
28 * Creates the parsed layout object per the raw layout info.
30 * @param {!Object} layout The raw layout object defined in the
34 i18n
.input
.chrome
.vk
.ParsedLayout = function(layout
) {
36 * The layout code (a.k.a. id).
40 this.id
= layout
['id'];
43 * The view object needed by UI rendering, including the key
44 * mappings. Some extra keys are not appear in following, which are
45 * '', 's', 'l', 'sl', 'cl', 'sc', 'scl'. They define the key mappings
46 * for each keyboard mode:
51 * Those modes will be filled when parsing the raw layout.
52 * If certain modes are not defined by the raw layout, this.view.<mode>
54 * The mode format is: {
55 * '<keyChar>': ['<disp type(S|P)>', '<disp chars>', '<commit chars>']
62 'title': layout
['title'],
63 'isRTL': layout
['direction'] == 'rtl',
64 'is102': !!layout
['is102Keyboard'],
65 'mappings': goog
.object
.create([
78 * The parsed layout transforms. There are only 3 elements of this array.
79 * !st is the long exgexp to match, 2nd is the map of:
80 * <match location>: [<regexp>, <replacement>].
81 * 3rd/4th are the regexp for prefix matches.
83 * @type {Array.<!Object>}
85 this.transforms
= null;
88 * The parsed layout ambiguous chars.
93 this.ambiRegex_
= null;
95 // Parses the key mapping & transforms of the layout.
96 this.parseKeyMappings_(layout
);
97 this.parseTransforms_(layout
);
102 * Parses the key mappings of the given layout.
104 * @param {!Object} layout The raw layout object. It's format is:
105 * id: <layout id> in {string}
106 * title: <layout title> in {string}
107 * direction: 'rtl' or 'ltr'
108 * is102Keyboard: True if vk is 102, False/undefined for 101
109 * mappings: key map in {Object.<string,string>}
110 * '': keycodes (each char's charCode represents keycode) in normal state
111 * s: keycodes in SHIFT state
112 * c: keycodes in ALTGR state
113 * l: keycodes in CAPSLOCK state
114 * <the states could be combined, e.g. ',s,sc,sl,scl'>
115 * transform: in {Object.<string,string>}
116 * <regexp>: <replacement>
117 * historyPruneRegex: <regexp string to represent the ambiguities>.
120 i18n
.input
.chrome
.vk
.ParsedLayout
.prototype.parseKeyMappings_ = function(
122 var codes
= this.view
['is102'] ? i18n
.input
.chrome
.vk
.KeyCode
.CODES102
:
123 i18n
.input
.chrome
.vk
.KeyCode
.CODES101
;
125 var mappings
= layout
['mappings'];
126 for (var m
in mappings
) {
127 var map
= mappings
[m
];
128 var modes
= m
.split(/,/);
129 if (modes
.join(',') != m
) {
130 modes
.push(''); // IE splits 'a,b,' into ['a','b']
133 // Example for map is like:
134 // 1) {'': '\u00c0123456...', ...}
135 // 2) {'QWERT': 'QWERT', ...}
136 // 3) {'A': 'aa', ...}
137 // 4) {'BCD': '{{bb}}cd', ...}
138 // 5) {'EFG': '{{S||e||ee}}FG', ...}
139 // 6) {'HI': '{{P||12||H}}i', ...}
140 for (var from in map
) {
141 // In case #1, from is '', to is '\u00c0123456...'.
142 // In case #3, from is 'A', to is 'aa'.
146 // If is 102 keyboard, modify 'to' to be compatible with the old vk.
147 if (this.view
['is102']) {
148 // Moves the 26th char {\} to be the 38th char (after {'}).
149 var normalizedTo
= to
.slice(0, 25);
150 normalizedTo
+= to
.slice(26, 37);
151 normalizedTo
+= to
.charAt(25);
152 normalizedTo
+= to
.slice(37);
156 // Replaces some chars for backward compatibility to old layout
158 from = from.replace('m', '\u00bd');
159 from = from.replace('=', '\u00bb');
160 from = from.replace(';', '\u00ba');
161 if (from.length
== 1) {
162 // Case #3: single char map to chars.
163 parsed
[from] = ['S', to
, to
];
166 for (var i
= 0, c
; c
= from.charAt(i
); ++i
) {
167 var t
= to
.charAt(j
++);
168 if (t
== to
.charAt(j
) && t
== '{') {
169 // Case #4/5/6: {{}} to define single char map to chars.
170 var k
= to
.indexOf('}}', j
);
172 var s
= to
.slice(j
+ 1, k
);
173 var parts
= s
.split('||');
174 if (parts
.length
== 3) {
175 // Case #5/6: button/commit chars seperation.
177 } else if (parts
.length
== 1) {
179 parsed
[c
] = ['S', s
, s
];
183 // Normal case: single char map to according single char.
184 parsed
[c
] = ['S', t
, t
];
189 for (var i
= 0, mode
; mode
= modes
[i
], mode
!= undefined; ++i
) {
190 this.view
['mappings'][mode
] = parsed
;
197 * Prefixalizes the regexp string.
199 * @param {string} re_str The original regexp string.
200 * @return {string} The prefixalized the regexp string.
203 i18n
.input
.chrome
.vk
.ParsedLayout
.prototype.prefixalizeRegexString_ = function(
205 // Makes sure [...\[\]...] won't impact the later replaces.
206 re_str
= re_str
.replace(/\\./g, function(m
) {
207 if (/^\\\[/.test(m
)) {
210 if (/^\\\]/.test(m
)) {
216 re_str
= re_str
.replace(/\\.|\[[^\[\]]*\]|\{.*\}|[^\|\\\(\)\[\]\{\}\*\+\?]/g,
221 return '(?:' + m
+ '|$)';
223 // Restores the \[\].
224 re_str
= re_str
.replace(/\u0001/g, '\\[');
225 re_str
= re_str
.replace(/\u0002/g, '\\]');
231 * Parses the transforms of the given layout.
233 * @param {!Object} layout The raw layout object. It's format is:
234 * id: <layout id> in {string}
235 * title: <layout title> in {string}
236 * direction: 'rtl' or 'ltr'
237 * is102Keyboard: True if vk is 102, False/undefined for 101
238 * mappings: key map in {Object.<string,string>}
239 * '': keycodes (each char's charCode represents keycode) in normal state
240 * s: keycodes in SHIFT state
241 * c: keycodes in ALTGR state
242 * l: keycodes in CAPSLOCK state
243 * <the states could be combined, e.g. ',s,sc,sl,scl'>
244 * transform: in {Object.<string,string>}
245 * <regexp>: <replacement>
246 * historyPruneRegex: <regexp string to represent the ambiguities>.
249 i18n
.input
.chrome
.vk
.ParsedLayout
.prototype.parseTransforms_ = function(
251 var transforms
= layout
['transform'];
253 // regobjs is RegExp objects of the regexp string.
254 // regexsalone will be used to get the long regexp which concats all the
255 // transform regexp as (...$)|(...$)|...
256 // The long regexp is needed because it is ineffecient to match each regexp
257 // one by one. Instead, we match the long regexp only once. But we need to
258 // know where the match happens and which replacement we need to use.
259 // So regobjs will hold the map between the match location and the
260 // regexp/replacement.
261 var regobjs
= [], regexesalone
= [], partialRegexs
= [];
262 // sum_numgrps is the index of current reg group for future matching.
263 // Don't care about the whole string in array index 0.
265 for (var regex
in transforms
) {
266 var regobj
= new RegExp(regex
+ '$');
267 var repl
= transforms
[regex
];
268 regobjs
[sum_numgrps
] = [regobj
, repl
];
269 regexesalone
.push('(' + regex
+ '$)');
270 partialRegexs
.push('^(' + this.prefixalizeRegexString_(regex
) + ')');
271 // The match should happen to count braces.
272 var grpCountRegexp
= new RegExp(regex
+ '|.*');
273 // The length attribute would count whole string as well.
274 // However, that extra count 1 is compensated by
275 // extra braces added.
276 var numgrps
= grpCountRegexp
.exec('').length
;
277 sum_numgrps
+= numgrps
;
279 var longregobj
= new RegExp(regexesalone
.join('|'));
280 // Saves 2 long regexp objects for later prefix matching.
281 // The reason to save a regexp with '\u0001' is to make sure the whole
282 // string won't match as a prefix for the whole pattern. For example,
283 // 'abc' shouldn't match /abc/.
284 // In above case, /abc/ is prefixalized as re = /(a|$)(b|$)(c|$)/.
285 // 'a', 'ab' & 'abc' can all match re.
286 // So make another re2 = /(a|$)(b|$)(c|$)\u0001/, therefore, 'abc' will
287 // fail to match. Finally, we can use this checks to make sure the prefix
288 // match: "s matches re but it doesn't match re2".
289 var prefixregobj
= new RegExp(partialRegexs
.join('|'));
290 // Uses reverse-ordered regexp for prefix matching. Details are explained
291 // in predictTransform().
292 var prefixregobj2
= new RegExp(partialRegexs
.reverse().join('|'));
293 this.transforms
= [longregobj
, regobjs
, prefixregobj
, prefixregobj2
];
296 var hisPruReg
= layout
['historyPruneRegex'];
298 this.ambiRegex_
= new RegExp('^(' + hisPruReg
+ ')$');
304 * Predicts whether there would be future transforms for the given string.
306 * @param {string} text The given string.
307 * @return {number} The matched position in the string. Returns -1 for no match.
309 i18n
.input
.chrome
.vk
.ParsedLayout
.prototype.predictTransform = function(text
) {
310 if (!this.transforms
|| !text
) {
313 for (var i
= 0; i
< text
.length
; i
++) {
314 var s
= text
.slice(i
- text
.length
);
315 // Uses multiple mathches to make sure the prefix match.
316 // Refers to comments in parseTransforms_() method.
317 var matches
= s
.match(this.transforms
[2]);
318 if (matches
&& matches
[0]) {
319 for (var j
= 1; j
< matches
.length
&& !matches
[j
]; j
++) {}
320 var matchedIndex
= j
;
321 // Ties to match the reversed regexp and see whether the matched indexes
322 // are pointed to the same rule.
323 matches
= s
.match(this.transforms
[3]);
324 if (matches
&& matches
[0]) { // This should always match!
325 for (var j
= 1; j
< matches
.length
&& !matches
[j
]; j
++) {}
326 if (matchedIndex
!= matches
.length
- j
) {
327 // If the matched and reverse-matched index are not the same, it
328 // means the string must be a prefix, because the layout transforms
329 // shouldn't have duplicated transforms.
332 // Gets the matched rule regexp, and revise it to add a never-matched
333 // char X in the end. And tries to match it with s+X.
334 // If matched, it means the s is a full match instead of a prefix
336 var re
= this.transforms
[1][matchedIndex
][0];
337 re
= new RegExp(re
.toString().match(/\/(.*)\//)[1] + '\u0001');
338 if (!(s
+ '\u0001').match(re
)) {
350 * Applies the layout transform and gets the result.
352 * @param {string} prevstr The previous text.
353 * @param {number} transat The position of previous transform. If it's -1,
354 * it means no transform happened.
355 * @param {string} ch The new chars currently added to prevstr.
356 * @return {Object} The transform result. It's format is:
357 * {back: <the number of chars to be deleted in the end of the prevstr>,
358 * chars: <the chars to add at the tail after the deletion>}.
359 * If there is no transform applies, return null.
361 i18n
.input
.chrome
.vk
.ParsedLayout
.prototype.transform = function(
362 prevstr
, transat
, ch
) {
363 if (!this.transforms
) return null;
367 str
= prevstr
.slice(0, transat
) + '\u001d' +
368 prevstr
.slice(transat
) + ch
;
372 var longr
= this.transforms
[0];
373 var matchArr
= longr
.exec(str
);
375 var rs
= this.transforms
[1];
377 for (var i
= 1; i
< matchArr
.length
&& !matchArr
[i
]; i
++) {}
380 var regobj
= rs
[matchGroup
][0];
381 var repl
= rs
[matchGroup
][1];
382 var m
= regobj
.exec(str
);
384 // String visible to user does not have LOOK_BEHIND_SEP_ and chars.
385 // So need to discount them in backspace count.
386 var rmstr
= str
.slice(m
.index
);
387 var numseps
= rmstr
.search('\u001d') > -1 ? 1 : 0;
388 var backlen
= rmstr
.length
- numseps
- ch
.length
;
390 var newstr
= str
.replace(regobj
, repl
);
391 var replstr
= newstr
.slice(m
.index
);
392 replstr
= replstr
.replace('\u001d', '');
394 return {back
: backlen
, chars
: replstr
};
402 * Gets whether the given chars is ambiguious chars.
404 * @param {string} chars The chars to be judged.
405 * @return {boolean} True if given chars is ambiguious chars, false
408 i18n
.input
.chrome
.vk
.ParsedLayout
.prototype.isAmbiChars = function(chars
) {
409 return this.ambiRegex_
? !!this.ambiRegex_
.exec(chars
) : false;