Add long running gmail memory benchmark for background tab.
[chromium-blink-merge.git] / tools / telemetry / catapult_base / refactor / snippet.py
blobecb688af9004038866ca44a1335dd003c02356ab
1 # Copyright 2015 The Chromium Authors. All rights reserved.
2 # Use of this source code is governed by a BSD-style license that can be
3 # found in the LICENSE file.
5 import parser
6 import symbol
7 import sys
8 import token
9 import tokenize
11 from catapult_base.refactor import offset_token
14 class Snippet(object):
15 """A node in the Python parse tree.
17 The Python grammar is defined at:
18 https://docs.python.org/2/reference/grammar.html
20 There are two types of Snippets:
21 TokenSnippets are leaf nodes containing actual text.
22 Symbols are internal nodes representing higher-level groupings, and are
23 defined by the left-hand sides of the BNFs in the above link.
24 """
25 @property
26 def type(self):
27 raise NotImplementedError()
29 @property
30 def type_name(self):
31 raise NotImplementedError()
33 @property
34 def children(self):
35 """Return a list of this node's children."""
36 raise NotImplementedError()
38 @property
39 def tokens(self):
40 """Return a tuple of the tokens this Snippet contains."""
41 raise NotImplementedError()
43 def PrintTree(self, indent=0, stream=sys.stdout):
44 """Spew a pretty-printed parse tree. Mostly useful for debugging."""
45 raise NotImplementedError()
47 def __str__(self):
48 return offset_token.Untokenize(self.tokens)
50 def FindAll(self, snippet_type):
51 if isinstance(snippet_type, int):
52 if self.type == snippet_type:
53 yield self
54 else:
55 if isinstance(self, snippet_type):
56 yield self
58 for child in self.children:
59 for snippet in child.FindAll(snippet_type):
60 yield snippet
62 def FindChild(self, snippet_type, **kwargs):
63 for child in self.children:
64 if isinstance(snippet_type, int):
65 if child.type != snippet_type:
66 continue
67 else:
68 if not isinstance(child, snippet_type):
69 continue
71 for attribute, value in kwargs:
72 if getattr(child, attribute) != value:
73 break
74 else:
75 return child
76 raise ValueError('%s is not in %s. Children are: %s' %
77 (snippet_type, self, self.children))
79 def FindChildren(self, snippet_type):
80 if isinstance(snippet_type, int):
81 for child in self.children:
82 if child.type == snippet_type:
83 yield child
84 else:
85 for child in self.children:
86 if isinstance(child, snippet_type):
87 yield child
90 class TokenSnippet(Snippet):
91 """A Snippet containing a list of tokens.
93 A list of tokens may start with any number of comments and non-terminating
94 newlines, but must end with a syntactically meaningful token.
95 """
96 def __init__(self, token_type, tokens):
97 # For operators and delimiters, the TokenSnippet's type may be more specific
98 # than the type of the constituent token. E.g. the TokenSnippet type is
99 # token.DOT, but the token type is token.OP. This is because the parser
100 # has more context than the tokenizer.
101 self._type = token_type
102 self._tokens = tokens
103 self._modified = False
105 @classmethod
106 def Create(cls, token_type, string, offset=(0, 0)):
107 return cls(token_type,
108 [offset_token.OffsetToken(token_type, string, offset)])
110 @property
111 def type(self):
112 return self._type
114 @property
115 def type_name(self):
116 return token.tok_name[self.type]
118 @property
119 def value(self):
120 return self._tokens[-1].string
122 @value.setter
123 def value(self, value):
124 self._tokens[-1].string = value
125 self._modified = True
127 @property
128 def children(self):
129 return []
131 @property
132 def tokens(self):
133 return tuple(self._tokens)
135 @property
136 def modified(self):
137 return self._modified
139 def PrintTree(self, indent=0, stream=sys.stdout):
140 stream.write(' ' * indent)
141 if not self.tokens:
142 print >> stream, self.type_name
143 return
145 print >> stream, '%-4s' % self.type_name, repr(self.tokens[0].string)
146 for tok in self.tokens[1:]:
147 stream.write(' ' * indent)
148 print >> stream, ' ' * max(len(self.type_name), 4), repr(tok.string)
151 class Symbol(Snippet):
152 """A Snippet containing sub-Snippets.
154 The possible types and type_names are defined in Python's symbol module."""
155 def __init__(self, symbol_type, children):
156 self._type = symbol_type
157 self._children = children
159 @property
160 def type(self):
161 return self._type
163 @property
164 def type_name(self):
165 return symbol.sym_name[self.type]
167 @property
168 def children(self):
169 return self._children
171 @children.setter
172 def children(self, value): # pylint: disable=arguments-differ
173 self._children = value
175 @property
176 def tokens(self):
177 tokens = []
178 for child in self.children:
179 tokens += child.tokens
180 return tuple(tokens)
182 @property
183 def modified(self):
184 return any(child.modified for child in self.children)
186 def PrintTree(self, indent=0, stream=sys.stdout):
187 stream.write(' ' * indent)
189 # If there's only one child, collapse it onto the same line.
190 node = self
191 while len(node.children) == 1 and len(node.children[0].children) == 1:
192 print >> stream, node.type_name,
193 node = node.children[0]
195 print >> stream, node.type_name
196 for child in node.children:
197 child.PrintTree(indent+2, stream)
200 def Snippetize(f):
201 """Return the syntax tree of the given file."""
202 f.seek(0)
203 syntax_tree = parser.st2list(parser.suite(f.read()))
204 tokens = offset_token.Tokenize(f)
206 snippet = _SnippetizeNode(syntax_tree, tokens)
207 assert not tokens
208 return snippet
211 def _SnippetizeNode(node, tokens):
212 # The parser module gives a syntax tree that discards comments,
213 # non-terminating newlines, and whitespace information. Use the tokens given
214 # by the tokenize module to annotate the syntax tree with the information
215 # needed to exactly reproduce the original source code.
216 node_type = node[0]
218 if node_type >= token.NT_OFFSET:
219 # Symbol.
220 children = tuple(_SnippetizeNode(child, tokens) for child in node[1:])
221 return Symbol(node_type, children)
222 else:
223 # Token.
224 grabbed_tokens = []
225 while tokens and (
226 tokens[0].type == tokenize.COMMENT or tokens[0].type == tokenize.NL):
227 grabbed_tokens.append(tokens.popleft())
229 # parser has 2 NEWLINEs right before the end.
230 # tokenize has 0 or 1 depending on if the file has one.
231 # Create extra nodes without consuming tokens to account for this.
232 if node_type == token.NEWLINE:
233 for tok in tokens:
234 if tok.type == token.ENDMARKER:
235 return TokenSnippet(node_type, grabbed_tokens)
236 if tok.type != token.DEDENT:
237 break
239 assert tokens[0].type == token.OP or node_type == tokens[0].type
241 grabbed_tokens.append(tokens.popleft())
242 return TokenSnippet(node_type, grabbed_tokens)