third_party/google_input_tools/src/chrome/os/keyboard/parsedlayout.js

   1 // Copyright 2013 The ChromeOS IME Authors. All Rights Reserved.
   2 // limitations under the License.
   3 // See the License for the specific language governing permissions and
   4 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
   5 // distributed under the License is distributed on an "AS-IS" BASIS,
   6 // Unless required by applicable law or agreed to in writing, software
   7 //
   8 //      http://www.apache.org/licenses/LICENSE-2.0
   9 //
  10 // You may obtain a copy of the License at
  11 // you may not use this file except in compliance with the License.
  12 // Licensed under the Apache License, Version 2.0 (the "License");
  13 //
  14
  15 /**
  16  * @fileoverview Defines the parsed layout object which will do layout parsing
  17  *     and expose the keymappings and the transforms to Model.
  18  */
  19
  20 goog.provide('i18n.input.chrome.vk.ParsedLayout');
  21
  22 goog.require('goog.object');
  23 goog.require('i18n.input.chrome.vk.KeyCode');
  24
  25
  26
  27 /**
  28  * Creates the parsed layout object per the raw layout info.
  29  *
  30  * @param {!Object} layout The raw layout object defined in the
  31  *     xxx_layout.js.
  32  * @constructor
  33  */
  34 i18n.input.chrome.vk.ParsedLayout = function(layout) {
  35   /**
  36    * The layout code (a.k.a. id).
  37    *
  38    * @type {string}
  39    */
  40   this.id = layout['id'];
  41
  42   /**
  43    * The view object needed by UI rendering, including the key
  44    * mappings. Some extra keys are not appear in following, which are
  45    * '', 's', 'l', 'sl', 'cl', 'sc', 'scl'. They define the key mappings
  46    * for each keyboard mode:
  47    *   '' means normal;
  48    *   's' means SHIFT;
  49    *   'l' means CAPSLOCK;
  50    *   'c' means CTRL+ALT.
  51    * Those modes will be filled when parsing the raw layout.
  52    * If certain modes are not defined by the raw layout, this.view.<mode>
  53    * won't be filled in.
  54    * The mode format is: {
  55    *   '<keyChar>': ['<disp type(S|P)>', '<disp chars>', '<commit chars>']
  56    * }.
  57    *
  58    * @type {!Object}
  59    */
  60   this.view = {
  61     'id': layout['id'],
  62     'title': layout['title'],
  63     'isRTL': layout['direction'] == 'rtl',
  64     'is102': !!layout['is102Keyboard'],
  65     'mappings': goog.object.create([
  66       '', null,
  67       's', null,
  68       'c', null,
  69       'l', null,
  70       'sc', null,
  71       'cl', null,
  72       'sl', null,
  73       'scl', null
  74     ])
  75   };
  76
  77   /**
  78    * The parsed layout transforms. There are only 3 elements of this array.
  79    * !st is the long exgexp to match, 2nd is the map of:
  80    * <match location>: [<regexp>, <replacement>].
  81    * 3rd/4th are the regexp for prefix matches.
  82    *
  83    * @type {Array.<!Object>}
  84    */
  85   this.transforms = null;
  86
  87   /**
  88    * The parsed layout ambiguous chars.
  89    *
  90    * @type {Object}
  91    * @private
  92    */
  93   this.ambiRegex_ = null;
  94
  95   // Parses the key mapping & transforms of the layout.
  96   this.parseKeyMappings_(layout);
  97   this.parseTransforms_(layout);
  98 };
  99
 100
 101 /**
 102  * Parses the key mappings of the given layout.
 103  *
 104  * @param {!Object} layout The raw layout object. It's format is:
 105  *     id: <layout id> in {string}
 106  *     title: <layout title> in {string}
 107  *     direction: 'rtl' or 'ltr'
 108  *     is102Keyboard: True if vk is 102, False/undefined for 101
 109  *     mappings: key map in {Object.<string,string>}
 110  *       '': keycodes (each char's charCode represents keycode) in normal state
 111  *       s: keycodes in SHIFT state
 112  *       c: keycodes in ALTGR state
 113  *       l: keycodes in CAPSLOCK state
 114  *       <the states could be combined, e.g. ',s,sc,sl,scl'>
 115  *     transform: in {Object.<string,string>}
 116  *       <regexp>: <replacement>
 117  *     historyPruneRegex: <regexp string to represent the ambiguities>.
 118  * @private
 119  */
 120 i18n.input.chrome.vk.ParsedLayout.prototype.parseKeyMappings_ = function(
 121     layout) {
 122   var codes = this.view['is102'] ? i18n.input.chrome.vk.KeyCode.CODES102 :
 123       i18n.input.chrome.vk.KeyCode.CODES101;
 124
 125   var mappings = layout['mappings'];
 126   for (var m in mappings) {
 127     var map = mappings[m];
 128     var modes = m.split(/,/);
 129     if (modes.join(',') != m) {
 130       modes.push(''); // IE splits 'a,b,' into ['a','b']
 131     }
 132     var parsed = {};
 133     // Example for map is like:
 134     //   1) {'': '\u00c0123456...', ...}
 135     //   2) {'QWERT': 'QWERT', ...}
 136     //   3) {'A': 'aa', ...}
 137     //   4) {'BCD': '{{bb}}cd', ...}
 138     //   5) {'EFG': '{{S||e||ee}}FG', ...}
 139     //   6) {'HI': '{{P||12||H}}i', ...}
 140     for (var from in map) {
 141       // In case #1, from is '', to is '\u00c0123456...'.
 142       // In case #3, from is 'A', to is 'aa'.
 143       var to = map[from];
 144       if (from == '') {
 145         from = codes;
 146         // If is 102 keyboard, modify 'to' to be compatible with the old vk.
 147         if (this.view['is102']) {
 148           // Moves the 26th char {\} to be the 38th char (after {'}).
 149           var normalizedTo = to.slice(0, 25);
 150           normalizedTo += to.slice(26, 37);
 151           normalizedTo += to.charAt(25);
 152           normalizedTo += to.slice(37);
 153           to = normalizedTo;
 154         }
 155       }
 156       // Replaces some chars for backward compatibility to old layout
 157       // definitions.
 158       from = from.replace('m', '\u00bd');
 159       from = from.replace('=', '\u00bb');
 160       from = from.replace(';', '\u00ba');
 161       if (from.length == 1) {
 162         // Case #3: single char map to chars.
 163         parsed[from] = ['S', to, to];
 164       } else {
 165         var j = 0;
 166         for (var i = 0, c; c = from.charAt(i); ++i) {
 167           var t = to.charAt(j++);
 168           if (t == to.charAt(j) && t == '{') {
 169             // Case #4/5/6: {{}} to define single char map to chars.
 170             var k = to.indexOf('}}', j);
 171             if (k < j) break;
 172             var s = to.slice(j + 1, k);
 173             var parts = s.split('||');
 174             if (parts.length == 3) {
 175               // Case #5/6: button/commit chars seperation.
 176               parsed[c] = parts;
 177             } else if (parts.length == 1) {
 178               // Case #4.
 179               parsed[c] = ['S', s, s];
 180             }
 181             j = k + 2;
 182           } else {
 183             // Normal case: single char map to according single char.
 184             parsed[c] = ['S', t, t];
 185           }
 186         }
 187       }
 188     }
 189     for (var i = 0, mode; mode = modes[i], mode != undefined; ++i) {
 190       this.view['mappings'][mode] = parsed;
 191     }
 192   }
 193 };
 194
 195
 196 /**
 197  * Prefixalizes the regexp string.
 198  *
 199  * @param {string} re_str The original regexp string.
 200  * @return {string} The prefixalized the regexp string.
 201  * @private
 202  */
 203 i18n.input.chrome.vk.ParsedLayout.prototype.prefixalizeRegexString_ = function(
 204     re_str) {
 205   // Makes sure [...\[\]...] won't impact the later replaces.
 206   re_str = re_str.replace(/\\./g, function(m) {
 207     if (/^\\\[/.test(m)) {
 208       return '\u0001';
 209     }
 210     if (/^\\\]/.test(m)) {
 211       return '\u0002';
 212     }
 213     return m;
 214   });
 215   // Prefixalizes.
 216   re_str = re_str.replace(/\\.|\[[^\[\]]*\]|\{.*\}|[^\|\\\(\)\[\]\{\}\*\+\?]/g,
 217       function(m) {
 218         if (/^\{/.test(m)) {
 219           return m;
 220         }
 221         return '(?:' + m + '|$)';
 222       });
 223   // Restores the \[\].
 224   re_str = re_str.replace(/\u0001/g, '\\[');
 225   re_str = re_str.replace(/\u0002/g, '\\]');
 226   return re_str;
 227 };
 228
 229
 230 /**
 231  * Parses the transforms of the given layout.
 232  *
 233  * @param {!Object} layout The raw layout object. It's format is:
 234  *     id: <layout id> in {string}
 235  *     title: <layout title> in {string}
 236  *     direction: 'rtl' or 'ltr'
 237  *     is102Keyboard: True if vk is 102, False/undefined for 101
 238  *     mappings: key map in {Object.<string,string>}
 239  *       '': keycodes (each char's charCode represents keycode) in normal state
 240  *       s: keycodes in SHIFT state
 241  *       c: keycodes in ALTGR state
 242  *       l: keycodes in CAPSLOCK state
 243  *       <the states could be combined, e.g. ',s,sc,sl,scl'>
 244  *     transform: in {Object.<string,string>}
 245  *       <regexp>: <replacement>
 246  *     historyPruneRegex: <regexp string to represent the ambiguities>.
 247  * @private
 248  */
 249 i18n.input.chrome.vk.ParsedLayout.prototype.parseTransforms_ = function(
 250     layout) {
 251   var transforms = layout['transform'];
 252   if (transforms) {
 253     // regobjs is RegExp objects of the regexp string.
 254     // regexsalone will be used to get the long regexp which concats all the
 255     // transform regexp as (...$)|(...$)|...
 256     // The long regexp is needed because it is ineffecient to match each regexp
 257     // one by one. Instead, we match the long regexp only once. But we need to
 258     // know where the match happens and which replacement we need to use.
 259     // So regobjs will hold the map between the match location and the
 260     // regexp/replacement.
 261     var regobjs = [], regexesalone = [], partialRegexs = [];
 262     // sum_numgrps is the index of current reg group for future matching.
 263     // Don't care about the whole string in array index 0.
 264     var sum_numgrps = 1;
 265     for (var regex in transforms) {
 266       var regobj = new RegExp(regex + '$');
 267       var repl = transforms[regex];
 268       regobjs[sum_numgrps] = [regobj, repl];
 269       regexesalone.push('(' + regex + '$)');
 270       partialRegexs.push('^(' + this.prefixalizeRegexString_(regex) + ')');
 271       // The match should happen to count braces.
 272       var grpCountRegexp = new RegExp(regex + '|.*');
 273       // The length attribute would count whole string as well.
 274       // However, that extra count 1 is compensated by
 275       // extra braces added.
 276       var numgrps = grpCountRegexp.exec('').length;
 277       sum_numgrps += numgrps;
 278     }
 279     var longregobj = new RegExp(regexesalone.join('|'));
 280     // Saves 2 long regexp objects for later prefix matching.
 281     // The reason to save a regexp with '\u0001' is to make sure the whole
 282     // string won't match as a prefix for the whole pattern. For example,
 283     // 'abc' shouldn't match /abc/.
 284     // In above case, /abc/ is prefixalized as re = /(a|$)(b|$)(c|$)/.
 285     // 'a', 'ab' & 'abc' can all match re.
 286     // So make another re2 = /(a|$)(b|$)(c|$)\u0001/, therefore, 'abc' will
 287     // fail to match. Finally, we can use this checks to make sure the prefix
 288     // match: "s matches re but it doesn't match re2".
 289     var prefixregobj = new RegExp(partialRegexs.join('|'));
 290     // Uses reverse-ordered regexp for prefix matching. Details are explained
 291     // in predictTransform().
 292     var prefixregobj2 = new RegExp(partialRegexs.reverse().join('|'));
 293     this.transforms = [longregobj, regobjs, prefixregobj, prefixregobj2];
 294   }
 295
 296   var hisPruReg = layout['historyPruneRegex'];
 297   if (hisPruReg) {
 298     this.ambiRegex_ = new RegExp('^(' + hisPruReg + ')$');
 299   }
 300 };
 301
 302
 303 /**
 304  * Predicts whether there would be future transforms for the given string.
 305  *
 306  * @param {string} text The given string.
 307  * @return {number} The matched position in the string. Returns -1 for no match.
 308  */
 309 i18n.input.chrome.vk.ParsedLayout.prototype.predictTransform = function(text) {
 310   if (!this.transforms || !text) {
 311     return -1;
 312   }
 313   for (var i = 0; i < text.length; i++) {
 314     var s = text.slice(i - text.length);
 315     // Uses multiple mathches to make sure the prefix match.
 316     // Refers to comments in parseTransforms_() method.
 317     var matches = s.match(this.transforms[2]);
 318     if (matches && matches[0]) {
 319       for (var j = 1; j < matches.length && !matches[j]; j++) {}
 320       var matchedIndex = j;
 321       // Ties to match the reversed regexp and see whether the matched indexes
 322       // are pointed to the same rule.
 323       matches = s.match(this.transforms[3]);
 324       if (matches && matches[0]) { // This should always match!
 325         for (var j = 1; j < matches.length && !matches[j]; j++) {}
 326         if (matchedIndex != matches.length - j) {
 327           // If the matched and reverse-matched index are not the same, it
 328           // means the string must be a prefix, because the layout transforms
 329           // shouldn't have duplicated transforms.
 330           return i;
 331         } else {
 332           // Gets the matched rule regexp, and revise it to add a never-matched
 333           // char X in the end. And tries to match it with s+X.
 334           // If matched, it means the s is a full match instead of a prefix
 335           // match.
 336           var re = this.transforms[1][matchedIndex][0];
 337           re = new RegExp(re.toString().match(/\/(.*)\//)[1] + '\u0001');
 338           if (!(s + '\u0001').match(re)) {
 339             return i;
 340           }
 341         }
 342       }
 343     }
 344   }
 345   return -1;
 346 };
 347
 348
 349 /**
 350  * Applies the layout transform and gets the result.
 351  *
 352  * @param {string} prevstr The previous text.
 353  * @param {number} transat The position of previous transform. If it's -1,
 354  *     it means no transform happened.
 355  * @param {string} ch The new chars currently added to prevstr.
 356  * @return {Object} The transform result. It's format is:
 357  *     {back: <the number of chars to be deleted in the end of the prevstr>,
 358  *     chars: <the chars to add at the tail after the deletion>}.
 359  *     If there is no transform applies, return null.
 360  */
 361 i18n.input.chrome.vk.ParsedLayout.prototype.transform = function(
 362     prevstr, transat, ch) {
 363   if (!this.transforms) return null;
 364
 365   var str;
 366   if (transat > 0) {
 367     str = prevstr.slice(0, transat) + '\u001d' +
 368           prevstr.slice(transat) + ch;
 369   } else {
 370     str = prevstr + ch;
 371   }
 372   var longr = this.transforms[0];
 373   var matchArr = longr.exec(str);
 374   if (matchArr) {
 375     var rs = this.transforms[1];
 376
 377     for (var i = 1; i < matchArr.length && !matchArr[i]; i++) {}
 378     var matchGroup = i;
 379
 380     var regobj = rs[matchGroup][0];
 381     var repl = rs[matchGroup][1];
 382     var m = regobj.exec(str);
 383
 384     // String visible to user does not have LOOK_BEHIND_SEP_ and chars.
 385     // So need to discount them in backspace count.
 386     var rmstr = str.slice(m.index);
 387     var numseps = rmstr.search('\u001d') > -1 ? 1 : 0;
 388     var backlen = rmstr.length - numseps - ch.length;
 389
 390     var newstr = str.replace(regobj, repl);
 391     var replstr = newstr.slice(m.index);
 392     replstr = replstr.replace('\u001d', '');
 393
 394     return {back: backlen, chars: replstr};
 395   }
 396
 397   return null;
 398 };
 399
 400
 401 /**
 402  * Gets whether the given chars is ambiguious chars.
 403  *
 404  * @param {string} chars The chars to be judged.
 405  * @return {boolean} True if given chars is ambiguious chars, false
 406  *     otherwise.
 407  */
 408 i18n.input.chrome.vk.ParsedLayout.prototype.isAmbiChars = function(chars) {
 409   return this.ambiRegex_ ? !!this.ambiRegex_.exec(chars) : false;
 410 };