1 // Copyright 2013 The ChromeOS IME Authors. All Rights Reserved.
2 // limitations under the License.
3 // See the License for the specific language governing permissions and
4 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
5 // distributed under the License is distributed on an "AS-IS" BASIS,
6 // Unless required by applicable law or agreed to in writing, software
8 // http://www.apache.org/licenses/LICENSE-2.0
10 // You may obtain a copy of the License at
11 // you may not use this file except in compliance with the License.
12 // Licensed under the Apache License, Version 2.0 (the "License");
16 * @fileoverview Defines the parsed layout object which will do layout parsing
17 * and expose the keymappings and the transforms to Model.
20 goog.provide('i18n.input.chrome.vk.ParsedLayout');
22 goog.require('goog.object');
23 goog.require('i18n.input.chrome.vk.KeyCode');
28 * Creates the parsed layout object per the raw layout info.
30 * @param {!Object} layout The raw layout object defined in the
34 i18n.input.chrome.vk.ParsedLayout = function(layout) {
36 * The layout code (a.k.a. id).
40 this.id = layout['id'];
43 * The view object needed by UI rendering, including the key
44 * mappings. Some extra keys are not appear in following, which are
45 * '', 's', 'l', 'sl', 'cl', 'sc', 'scl'. They define the key mappings
46 * for each keyboard mode:
51 * Those modes will be filled when parsing the raw layout.
52 * If certain modes are not defined by the raw layout, this.view.<mode>
54 * The mode format is: {
55 * '<keyChar>': ['<disp type(S|P)>', '<disp chars>', '<commit chars>']
62 'title': layout['title'],
63 'isRTL': layout['direction'] == 'rtl',
64 'is102': !!layout['is102Keyboard'],
65 'mappings': goog.object.create([
78 * The parsed layout transforms. There are only 3 elements of this array.
79 * !st is the long exgexp to match, 2nd is the map of:
80 * <match location>: [<regexp>, <replacement>].
81 * 3rd/4th are the regexp for prefix matches.
83 * @type {Array.<!Object>}
85 this.transforms = null;
88 * The parsed layout ambiguous chars.
93 this.ambiRegex_ = null;
95 // Parses the key mapping & transforms of the layout.
96 this.parseKeyMappings_(layout);
97 this.parseTransforms_(layout);
102 * Parses the key mappings of the given layout.
104 * @param {!Object} layout The raw layout object. It's format is:
105 * id: <layout id> in {string}
106 * title: <layout title> in {string}
107 * direction: 'rtl' or 'ltr'
108 * is102Keyboard: True if vk is 102, False/undefined for 101
109 * mappings: key map in {Object.<string,string>}
110 * '': keycodes (each char's charCode represents keycode) in normal state
111 * s: keycodes in SHIFT state
112 * c: keycodes in ALTGR state
113 * l: keycodes in CAPSLOCK state
114 * <the states could be combined, e.g. ',s,sc,sl,scl'>
115 * transform: in {Object.<string,string>}
116 * <regexp>: <replacement>
117 * historyPruneRegex: <regexp string to represent the ambiguities>.
120 i18n.input.chrome.vk.ParsedLayout.prototype.parseKeyMappings_ = function(
122 var codes = this.view['is102'] ? i18n.input.chrome.vk.KeyCode.CODES102 :
123 i18n.input.chrome.vk.KeyCode.CODES101;
125 var mappings = layout['mappings'];
126 for (var m in mappings) {
127 var map = mappings[m];
128 var modes = m.split(/,/);
129 if (modes.join(',') != m) {
130 modes.push(''); // IE splits 'a,b,' into ['a','b']
133 // Example for map is like:
134 // 1) {'': '\u00c0123456...', ...}
135 // 2) {'QWERT': 'QWERT', ...}
136 // 3) {'A': 'aa', ...}
137 // 4) {'BCD': '{{bb}}cd', ...}
138 // 5) {'EFG': '{{S||e||ee}}FG', ...}
139 // 6) {'HI': '{{P||12||H}}i', ...}
140 for (var from in map) {
141 // In case #1, from is '', to is '\u00c0123456...'.
142 // In case #3, from is 'A', to is 'aa'.
146 // If is 102 keyboard, modify 'to' to be compatible with the old vk.
147 if (this.view['is102']) {
148 // Moves the 26th char {\} to be the 38th char (after {'}).
149 var normalizedTo = to.slice(0, 25);
150 normalizedTo += to.slice(26, 37);
151 normalizedTo += to.charAt(25);
152 normalizedTo += to.slice(37);
156 // Replaces some chars for backward compatibility to old layout
158 from = from.replace('m', '\u00bd');
159 from = from.replace('=', '\u00bb');
160 from = from.replace(';', '\u00ba');
161 if (from.length == 1) {
162 // Case #3: single char map to chars.
163 parsed[from] = ['S', to, to];
166 for (var i = 0, c; c = from.charAt(i); ++i) {
167 var t = to.charAt(j++);
168 if (t == to.charAt(j) && t == '{') {
169 // Case #4/5/6: {{}} to define single char map to chars.
170 var k = to.indexOf('}}', j);
172 var s = to.slice(j + 1, k);
173 var parts = s.split('||');
174 if (parts.length == 3) {
175 // Case #5/6: button/commit chars seperation.
177 } else if (parts.length == 1) {
179 parsed[c] = ['S', s, s];
183 // Normal case: single char map to according single char.
184 parsed[c] = ['S', t, t];
189 for (var i = 0, mode; mode = modes[i], mode != undefined; ++i) {
190 this.view['mappings'][mode] = parsed;
197 * Prefixalizes the regexp string.
199 * @param {string} re_str The original regexp string.
200 * @return {string} The prefixalized the regexp string.
203 i18n.input.chrome.vk.ParsedLayout.prototype.prefixalizeRegexString_ = function(
205 // Makes sure [...\[\]...] won't impact the later replaces.
206 re_str = re_str.replace(/\\./g, function(m) {
207 if (/^\\\[/.test(m)) {
210 if (/^\\\]/.test(m)) {
216 re_str = re_str.replace(/\\.|\[[^\[\]]*\]|\{.*\}|[^\|\\\(\)\[\]\{\}\*\+\?]/g,
221 return '(?:' + m + '|$)';
223 // Restores the \[\].
224 re_str = re_str.replace(/\u0001/g, '\\[');
225 re_str = re_str.replace(/\u0002/g, '\\]');
231 * Parses the transforms of the given layout.
233 * @param {!Object} layout The raw layout object. It's format is:
234 * id: <layout id> in {string}
235 * title: <layout title> in {string}
236 * direction: 'rtl' or 'ltr'
237 * is102Keyboard: True if vk is 102, False/undefined for 101
238 * mappings: key map in {Object.<string,string>}
239 * '': keycodes (each char's charCode represents keycode) in normal state
240 * s: keycodes in SHIFT state
241 * c: keycodes in ALTGR state
242 * l: keycodes in CAPSLOCK state
243 * <the states could be combined, e.g. ',s,sc,sl,scl'>
244 * transform: in {Object.<string,string>}
245 * <regexp>: <replacement>
246 * historyPruneRegex: <regexp string to represent the ambiguities>.
249 i18n.input.chrome.vk.ParsedLayout.prototype.parseTransforms_ = function(
251 var transforms = layout['transform'];
253 // regobjs is RegExp objects of the regexp string.
254 // regexsalone will be used to get the long regexp which concats all the
255 // transform regexp as (...$)|(...$)|...
256 // The long regexp is needed because it is ineffecient to match each regexp
257 // one by one. Instead, we match the long regexp only once. But we need to
258 // know where the match happens and which replacement we need to use.
259 // So regobjs will hold the map between the match location and the
260 // regexp/replacement.
261 var regobjs = [], regexesalone = [], partialRegexs = [];
262 // sum_numgrps is the index of current reg group for future matching.
263 // Don't care about the whole string in array index 0.
265 for (var regex in transforms) {
266 var regobj = new RegExp(regex + '$');
267 var repl = transforms[regex];
268 regobjs[sum_numgrps] = [regobj, repl];
269 regexesalone.push('(' + regex + '$)');
270 partialRegexs.push('^(' + this.prefixalizeRegexString_(regex) + ')');
271 // The match should happen to count braces.
272 var grpCountRegexp = new RegExp(regex + '|.*');
273 // The length attribute would count whole string as well.
274 // However, that extra count 1 is compensated by
275 // extra braces added.
276 var numgrps = grpCountRegexp.exec('').length;
277 sum_numgrps += numgrps;
279 var longregobj = new RegExp(regexesalone.join('|'));
280 // Saves 2 long regexp objects for later prefix matching.
281 // The reason to save a regexp with '\u0001' is to make sure the whole
282 // string won't match as a prefix for the whole pattern. For example,
283 // 'abc' shouldn't match /abc/.
284 // In above case, /abc/ is prefixalized as re = /(a|$)(b|$)(c|$)/.
285 // 'a', 'ab' & 'abc' can all match re.
286 // So make another re2 = /(a|$)(b|$)(c|$)\u0001/, therefore, 'abc' will
287 // fail to match. Finally, we can use this checks to make sure the prefix
288 // match: "s matches re but it doesn't match re2".
289 var prefixregobj = new RegExp(partialRegexs.join('|'));
290 // Uses reverse-ordered regexp for prefix matching. Details are explained
291 // in predictTransform().
292 var prefixregobj2 = new RegExp(partialRegexs.reverse().join('|'));
293 this.transforms = [longregobj, regobjs, prefixregobj, prefixregobj2];
296 var hisPruReg = layout['historyPruneRegex'];
298 this.ambiRegex_ = new RegExp('^(' + hisPruReg + ')$');
304 * Predicts whether there would be future transforms for the given string.
306 * @param {string} text The given string.
307 * @return {number} The matched position in the string. Returns -1 for no match.
309 i18n.input.chrome.vk.ParsedLayout.prototype.predictTransform = function(text) {
310 if (!this.transforms || !text) {
313 for (var i = 0; i < text.length; i++) {
314 var s = text.slice(i - text.length);
315 // Uses multiple mathches to make sure the prefix match.
316 // Refers to comments in parseTransforms_() method.
317 var matches = s.match(this.transforms[2]);
318 if (matches && matches[0]) {
319 for (var j = 1; j < matches.length && !matches[j]; j++) {}
320 var matchedIndex = j;
321 // Ties to match the reversed regexp and see whether the matched indexes
322 // are pointed to the same rule.
323 matches = s.match(this.transforms[3]);
324 if (matches && matches[0]) { // This should always match!
325 for (var j = 1; j < matches.length && !matches[j]; j++) {}
326 if (matchedIndex != matches.length - j) {
327 // If the matched and reverse-matched index are not the same, it
328 // means the string must be a prefix, because the layout transforms
329 // shouldn't have duplicated transforms.
332 // Gets the matched rule regexp, and revise it to add a never-matched
333 // char X in the end. And tries to match it with s+X.
334 // If matched, it means the s is a full match instead of a prefix
336 var re = this.transforms[1][matchedIndex][0];
337 re = new RegExp(re.toString().match(/\/(.*)\//)[1] + '\u0001');
338 if (!(s + '\u0001').match(re)) {
350 * Applies the layout transform and gets the result.
352 * @param {string} prevstr The previous text.
353 * @param {number} transat The position of previous transform. If it's -1,
354 * it means no transform happened.
355 * @param {string} ch The new chars currently added to prevstr.
356 * @return {Object} The transform result. It's format is:
357 * {back: <the number of chars to be deleted in the end of the prevstr>,
358 * chars: <the chars to add at the tail after the deletion>}.
359 * If there is no transform applies, return null.
361 i18n.input.chrome.vk.ParsedLayout.prototype.transform = function(
362 prevstr, transat, ch) {
363 if (!this.transforms) return null;
367 str = prevstr.slice(0, transat) + '\u001d' +
368 prevstr.slice(transat) + ch;
372 var longr = this.transforms[0];
373 var matchArr = longr.exec(str);
375 var rs = this.transforms[1];
377 for (var i = 1; i < matchArr.length && !matchArr[i]; i++) {}
380 var regobj = rs[matchGroup][0];
381 var repl = rs[matchGroup][1];
382 var m = regobj.exec(str);
384 // String visible to user does not have LOOK_BEHIND_SEP_ and chars.
385 // So need to discount them in backspace count.
386 var rmstr = str.slice(m.index);
387 var numseps = rmstr.search('\u001d') > -1 ? 1 : 0;
388 var backlen = rmstr.length - numseps - ch.length;
390 var newstr = str.replace(regobj, repl);
391 var replstr = newstr.slice(m.index);
392 replstr = replstr.replace('\u001d', '');
394 return {back: backlen, chars: replstr};
402 * Gets whether the given chars is ambiguious chars.
404 * @param {string} chars The chars to be judged.
405 * @return {boolean} True if given chars is ambiguious chars, false
408 i18n.input.chrome.vk.ParsedLayout.prototype.isAmbiChars = function(chars) {
409 return this.ambiRegex_ ? !!this.ambiRegex_.exec(chars) : false;