content/test/data/tree_parser_util.js

   1 // Copyright 2015 The Chromium Authors. All rights reserved.
   2 // Use of this source code is governed by a BSD-style license that can be
   3 // found in the LICENSE file.
   4
   5 /**
   6  * Parser for a simple grammar that describes a tree structure using a function-
   7  * like "a(b(c,d))" syntax. Original intended usage: to have browsertests
   8  * specify an arbitrary tree of iframes, loaded from various sites, without
   9  * having to write a .html page for each level or do crazy feats of data: url
  10  * escaping. But there's nothing really iframe-specific here. See below for some
  11  * examples of the grammar and the parser output.
  12  *
  13  * @example <caption>Basic syntax: an identifier followed by arg list.</caption>
  14  * TreeParserUtil.parse('abc ()');  // returns { value: 'abc', children: [] }
  15  *
  16  * @example <caption>The arg list is optional. Dots are legal in ids.</caption>
  17  * TreeParserUtil.parse('b.com');  // returns { value: 'b.com', children: [] }
  18  *
  19  * @example <caption>Commas separate children in the arg list.</caption>
  20  * // returns { value: 'b', children: [
  21  * //           { value: 'c', children: [] },
  22  * //           { value: 'd', children: [] }
  23  * //         ]}
  24  * TreeParserUtil.parse('b (c, d)';
  25  *
  26  * @example <caption>Children can have children, and so on.</caption>
  27  * // returns { value: 'e', children: [
  28  * //           { value: 'f', children: [
  29  * //             { value: 'g', children: [
  30  * //               { value: 'h', children: [] },
  31  * //               { value: 'i', children: [
  32  * //                 { value: 'j', children: [] }
  33  * //               ]},
  34  * //             ]}
  35  * //           ]}
  36  * //         ]}
  37  * TreeParserUtil.parse('e(f(g(h(),i(j))))';
  38  *
  39  * @example <caption>flatten() converts a [sub]tree back to a string.</caption>
  40  * var tree = TreeParserUtil.parse('b.com (c.com(e.com), d.com)');
  41  * TreeParserUtil.flatten(tree.children[0]);  // returns 'c.com(e.com())'
  42  */
  43 var TreeParserUtil = (function() {
  44   'use strict';
  45
  46   /**
  47    * Parses an input string into a tree. See class comment for examples.
  48    * @returns A tree of the form {value: <string>, children: <Array.<tree>>}.
  49    */
  50   function parse(input) {
  51     var tokenStream = lex(input);
  52
  53     var result = takeIdAndChild(tokenStream);
  54     if (tokenStream.length != 0)
  55       throw new Error('Expected end of stream, but found "' +
  56                       tokenStream[0] + '".')
  57     return result;
  58   }
  59
  60   /**
  61    * Inverse of |parse|. Converts a parsed tree object into a string. Can be
  62    * used to forward a subtree as an argument to a nested document.
  63    */
  64   function flatten(tree) {
  65     return tree.value + '(' + tree.children.map(flatten).join(',') + ')';
  66   }
  67
  68   /**
  69    * Lexer function to convert an input string into a token stream.  Splits the
  70    * input along whitespace, parens and commas. Whitespace is removed, while
  71    * parens and commas are preserved as standalone tokens.
  72    *
  73    * @param {string} input The input string.
  74    * @return {Array.<string>} The resulting token stream.
  75    */
  76   function lex(input) {
  77     return input.split(/(\s+|\(|\)|,)/).reduce(
  78       function (resultArray, token) {
  79         var trimmed = token.trim();
  80         if (trimmed) {
  81           resultArray.push(trimmed);
  82         }
  83         return resultArray;
  84       }, []);
  85   }
  86
  87
  88   /**
  89    * Consumes from the stream an identifier and optional child list, returning
  90    * its parsed representation.
  91    */
  92   function takeIdAndChild(tokenStream) {
  93     return { value: takeIdentifier(tokenStream),
  94              children: takeChildList(tokenStream) };
  95   }
  96
  97   /**
  98    * Consumes from the stream an identifier, returning its value (as a string).
  99    */
 100   function takeIdentifier(tokenStream) {
 101     if (tokenStream.length == 0)
 102       throw new Error('Expected an identifier, but found end-of-stream.');
 103     var token = tokenStream.shift();
 104     if (!token.match(/[a-zA-Z0-9.-]+/))
 105       throw new Error('Expected an identifier, but found "' + token + '".');
 106     return token;
 107   }
 108
 109   /**
 110    * Consumes an optional child list from the token stream, returning a list of
 111    * the parsed children.
 112    */
 113   function takeChildList(tokenStream) {
 114     // Remove the next token from the stream if it matches |token|.
 115     function tryToEatA(token) {
 116       if (tokenStream[0] == token) {
 117         tokenStream.shift();
 118         return true;
 119       }
 120       return false;
 121     }
 122
 123     // Bare identifier case, as in 'b' in the input '(a (b, c))'
 124     if (!tryToEatA('('))
 125       return [];
 126
 127     // Empty list case, as in 'b' in the input 'a (b (), c)'.
 128     if (tryToEatA(')')) {
 129       return [];
 130     }
 131
 132     // List with at least one entry.
 133     var result = [ takeIdAndChild(tokenStream) ];
 134
 135     // Additional entries allowed with commas.
 136     while (tryToEatA(',')) {
 137       result.push(takeIdAndChild(tokenStream));
 138     }
 139
 140     // End of list.
 141     if (tryToEatA(')')) {
 142       return result;
 143     }
 144     if (tokenStream.length == 0)
 145       throw new Error('Expected ")" or ",", but found end-of-stream.');
 146     throw new Error('Expected ")" or ",", but found "' + tokenStream[0] + '".');
 147   }
 148
 149   return {
 150     parse:   parse,
 151     flatten: flatten
 152   };
 153 })();