tools/telemetry/catapult_base/refactor/snippet.py

   1 # Copyright 2015 The Chromium Authors. All rights reserved.
   2 # Use of this source code is governed by a BSD-style license that can be
   3 # found in the LICENSE file.
   4
   5 import parser
   6 import symbol
   7 import sys
   8 import token
   9 import tokenize
  10
  11 from catapult_base.refactor import offset_token
  12
  13
  14 class Snippet(object):
  15   """A node in the Python parse tree.
  16
  17   The Python grammar is defined at:
  18   https://docs.python.org/2/reference/grammar.html
  19
  20   There are two types of Snippets:
  21     TokenSnippets are leaf nodes containing actual text.
  22     Symbols are internal nodes representing higher-level groupings, and are
  23         defined by the left-hand sides of the BNFs in the above link.
  24   """
  25   @property
  26   def type(self):
  27     raise NotImplementedError()
  28
  29   @property
  30   def type_name(self):
  31     raise NotImplementedError()
  32
  33   @property
  34   def children(self):
  35     """Return a list of this node's children."""
  36     raise NotImplementedError()
  37
  38   @property
  39   def tokens(self):
  40     """Return a tuple of the tokens this Snippet contains."""
  41     raise NotImplementedError()
  42
  43   def PrintTree(self, indent=0, stream=sys.stdout):
  44     """Spew a pretty-printed parse tree. Mostly useful for debugging."""
  45     raise NotImplementedError()
  46
  47   def __str__(self):
  48     return offset_token.Untokenize(self.tokens)
  49
  50   def FindAll(self, snippet_type):
  51     if isinstance(snippet_type, int):
  52       if self.type == snippet_type:
  53         yield self
  54     else:
  55       if isinstance(self, snippet_type):
  56         yield self
  57
  58     for child in self.children:
  59       for snippet in child.FindAll(snippet_type):
  60         yield snippet
  61
  62   def FindChild(self, snippet_type, **kwargs):
  63     for child in self.children:
  64       if isinstance(snippet_type, int):
  65         if child.type != snippet_type:
  66           continue
  67       else:
  68         if not isinstance(child, snippet_type):
  69           continue
  70
  71       for attribute, value in kwargs:
  72         if getattr(child, attribute) != value:
  73           break
  74       else:
  75         return child
  76     raise ValueError('%s is not in %s. Children are: %s' %
  77                      (snippet_type, self, self.children))
  78
  79   def FindChildren(self, snippet_type):
  80     if isinstance(snippet_type, int):
  81       for child in self.children:
  82         if child.type == snippet_type:
  83           yield child
  84     else:
  85       for child in self.children:
  86         if isinstance(child, snippet_type):
  87           yield child
  88
  89
  90 class TokenSnippet(Snippet):
  91   """A Snippet containing a list of tokens.
  92
  93   A list of tokens may start with any number of comments and non-terminating
  94   newlines, but must end with a syntactically meaningful token.
  95   """
  96   def __init__(self, token_type, tokens):
  97     # For operators and delimiters, the TokenSnippet's type may be more specific
  98     # than the type of the constituent token. E.g. the TokenSnippet type is
  99     # token.DOT, but the token type is token.OP. This is because the parser
 100     # has more context than the tokenizer.
 101     self._type = token_type
 102     self._tokens = tokens
 103     self._modified = False
 104
 105   @classmethod
 106   def Create(cls, token_type, string, offset=(0, 0)):
 107     return cls(token_type,
 108                [offset_token.OffsetToken(token_type, string, offset)])
 109
 110   @property
 111   def type(self):
 112     return self._type
 113
 114   @property
 115   def type_name(self):
 116     return token.tok_name[self.type]
 117
 118   @property
 119   def value(self):
 120     return self._tokens[-1].string
 121
 122   @value.setter
 123   def value(self, value):
 124     self._tokens[-1].string = value
 125     self._modified = True
 126
 127   @property
 128   def children(self):
 129     return []
 130
 131   @property
 132   def tokens(self):
 133     return tuple(self._tokens)
 134
 135   @property
 136   def modified(self):
 137     return self._modified
 138
 139   def PrintTree(self, indent=0, stream=sys.stdout):
 140     stream.write(' ' * indent)
 141     if not self.tokens:
 142       print >> stream, self.type_name
 143       return
 144
 145     print >> stream, '%-4s' % self.type_name, repr(self.tokens[0].string)
 146     for tok in self.tokens[1:]:
 147       stream.write(' ' * indent)
 148       print >> stream, ' ' * max(len(self.type_name), 4), repr(tok.string)
 149
 150
 151 class Symbol(Snippet):
 152   """A Snippet containing sub-Snippets.
 153
 154   The possible types and type_names are defined in Python's symbol module."""
 155   def __init__(self, symbol_type, children):
 156     self._type = symbol_type
 157     self._children = children
 158
 159   @property
 160   def type(self):
 161     return self._type
 162
 163   @property
 164   def type_name(self):
 165     return symbol.sym_name[self.type]
 166
 167   @property
 168   def children(self):
 169     return self._children
 170
 171   @children.setter
 172   def children(self, value):  # pylint: disable=arguments-differ
 173     self._children = value
 174
 175   @property
 176   def tokens(self):
 177     tokens = []
 178     for child in self.children:
 179       tokens += child.tokens
 180     return tuple(tokens)
 181
 182   @property
 183   def modified(self):
 184     return any(child.modified for child in self.children)
 185
 186   def PrintTree(self, indent=0, stream=sys.stdout):
 187     stream.write(' ' * indent)
 188
 189     # If there's only one child, collapse it onto the same line.
 190     node = self
 191     while len(node.children) == 1 and len(node.children[0].children) == 1:
 192       print >> stream, node.type_name,
 193       node = node.children[0]
 194
 195     print >> stream, node.type_name
 196     for child in node.children:
 197       child.PrintTree(indent+2, stream)
 198
 199
 200 def Snippetize(f):
 201   """Return the syntax tree of the given file."""
 202   f.seek(0)
 203   syntax_tree = parser.st2list(parser.suite(f.read()))
 204   tokens = offset_token.Tokenize(f)
 205
 206   snippet = _SnippetizeNode(syntax_tree, tokens)
 207   assert not tokens
 208   return snippet
 209
 210
 211 def _SnippetizeNode(node, tokens):
 212   # The parser module gives a syntax tree that discards comments,
 213   # non-terminating newlines, and whitespace information. Use the tokens given
 214   # by the tokenize module to annotate the syntax tree with the information
 215   # needed to exactly reproduce the original source code.
 216   node_type = node[0]
 217
 218   if node_type >= token.NT_OFFSET:
 219     # Symbol.
 220     children = tuple(_SnippetizeNode(child, tokens) for child in node[1:])
 221     return Symbol(node_type, children)
 222   else:
 223     # Token.
 224     grabbed_tokens = []
 225     while tokens and (
 226         tokens[0].type == tokenize.COMMENT or tokens[0].type == tokenize.NL):
 227       grabbed_tokens.append(tokens.popleft())
 228
 229     # parser has 2 NEWLINEs right before the end.
 230     # tokenize has 0 or 1 depending on if the file has one.
 231     # Create extra nodes without consuming tokens to account for this.
 232     if node_type == token.NEWLINE:
 233       for tok in tokens:
 234         if tok.type == token.ENDMARKER:
 235           return TokenSnippet(node_type, grabbed_tokens)
 236         if tok.type != token.DEDENT:
 237           break
 238
 239     assert tokens[0].type == token.OP or node_type == tokens[0].type
 240
 241     grabbed_tokens.append(tokens.popleft())
 242     return TokenSnippet(node_type, grabbed_tokens)