1 # Copyright 2015 The Chromium Authors. All rights reserved.
2 # Use of this source code is governed by a BSD-style license that can be
3 # found in the LICENSE file.
11 from catapult_base
.refactor
import offset_token
14 class Snippet(object):
15 """A node in the Python parse tree.
17 The Python grammar is defined at:
18 https://docs.python.org/2/reference/grammar.html
20 There are two types of Snippets:
21 TokenSnippets are leaf nodes containing actual text.
22 Symbols are internal nodes representing higher-level groupings, and are
23 defined by the left-hand sides of the BNFs in the above link.
27 raise NotImplementedError()
31 raise NotImplementedError()
35 """Return a list of this node's children."""
36 raise NotImplementedError()
40 """Return a tuple of the tokens this Snippet contains."""
41 raise NotImplementedError()
43 def PrintTree(self
, indent
=0, stream
=sys
.stdout
):
44 """Spew a pretty-printed parse tree. Mostly useful for debugging."""
45 raise NotImplementedError()
48 return offset_token
.Untokenize(self
.tokens
)
50 def FindAll(self
, snippet_type
):
51 if isinstance(snippet_type
, int):
52 if self
.type == snippet_type
:
55 if isinstance(self
, snippet_type
):
58 for child
in self
.children
:
59 for snippet
in child
.FindAll(snippet_type
):
62 def FindChild(self
, snippet_type
, **kwargs
):
63 for child
in self
.children
:
64 if isinstance(snippet_type
, int):
65 if child
.type != snippet_type
:
68 if not isinstance(child
, snippet_type
):
71 for attribute
, value
in kwargs
:
72 if getattr(child
, attribute
) != value
:
76 raise ValueError('%s is not in %s. Children are: %s' %
77 (snippet_type
, self
, self
.children
))
79 def FindChildren(self
, snippet_type
):
80 if isinstance(snippet_type
, int):
81 for child
in self
.children
:
82 if child
.type == snippet_type
:
85 for child
in self
.children
:
86 if isinstance(child
, snippet_type
):
90 class TokenSnippet(Snippet
):
91 """A Snippet containing a list of tokens.
93 A list of tokens may start with any number of comments and non-terminating
94 newlines, but must end with a syntactically meaningful token.
96 def __init__(self
, token_type
, tokens
):
97 # For operators and delimiters, the TokenSnippet's type may be more specific
98 # than the type of the constituent token. E.g. the TokenSnippet type is
99 # token.DOT, but the token type is token.OP. This is because the parser
100 # has more context than the tokenizer.
101 self
._type
= token_type
102 self
._tokens
= tokens
103 self
._modified
= False
106 def Create(cls
, token_type
, string
, offset
=(0, 0)):
107 return cls(token_type
,
108 [offset_token
.OffsetToken(token_type
, string
, offset
)])
116 return token
.tok_name
[self
.type]
120 return self
._tokens
[-1].string
123 def value(self
, value
):
124 self
._tokens
[-1].string
= value
125 self
._modified
= True
133 return tuple(self
._tokens
)
137 return self
._modified
139 def PrintTree(self
, indent
=0, stream
=sys
.stdout
):
140 stream
.write(' ' * indent
)
142 print >> stream
, self
.type_name
145 print >> stream
, '%-4s' % self
.type_name
, repr(self
.tokens
[0].string
)
146 for tok
in self
.tokens
[1:]:
147 stream
.write(' ' * indent
)
148 print >> stream
, ' ' * max(len(self
.type_name
), 4), repr(tok
.string
)
151 class Symbol(Snippet
):
152 """A Snippet containing sub-Snippets.
154 The possible types and type_names are defined in Python's symbol module."""
155 def __init__(self
, symbol_type
, children
):
156 self
._type
= symbol_type
157 self
._children
= children
165 return symbol
.sym_name
[self
.type]
169 return self
._children
172 def children(self
, value
): # pylint: disable=arguments-differ
173 self
._children
= value
178 for child
in self
.children
:
179 tokens
+= child
.tokens
184 return any(child
.modified
for child
in self
.children
)
186 def PrintTree(self
, indent
=0, stream
=sys
.stdout
):
187 stream
.write(' ' * indent
)
189 # If there's only one child, collapse it onto the same line.
191 while len(node
.children
) == 1 and len(node
.children
[0].children
) == 1:
192 print >> stream
, node
.type_name
,
193 node
= node
.children
[0]
195 print >> stream
, node
.type_name
196 for child
in node
.children
:
197 child
.PrintTree(indent
+2, stream
)
201 """Return the syntax tree of the given file."""
203 syntax_tree
= parser
.st2list(parser
.suite(f
.read()))
204 tokens
= offset_token
.Tokenize(f
)
206 snippet
= _SnippetizeNode(syntax_tree
, tokens
)
211 def _SnippetizeNode(node
, tokens
):
212 # The parser module gives a syntax tree that discards comments,
213 # non-terminating newlines, and whitespace information. Use the tokens given
214 # by the tokenize module to annotate the syntax tree with the information
215 # needed to exactly reproduce the original source code.
218 if node_type
>= token
.NT_OFFSET
:
220 children
= tuple(_SnippetizeNode(child
, tokens
) for child
in node
[1:])
221 return Symbol(node_type
, children
)
226 tokens
[0].type == tokenize
.COMMENT
or tokens
[0].type == tokenize
.NL
):
227 grabbed_tokens
.append(tokens
.popleft())
229 # parser has 2 NEWLINEs right before the end.
230 # tokenize has 0 or 1 depending on if the file has one.
231 # Create extra nodes without consuming tokens to account for this.
232 if node_type
== token
.NEWLINE
:
234 if tok
.type == token
.ENDMARKER
:
235 return TokenSnippet(node_type
, grabbed_tokens
)
236 if tok
.type != token
.DEDENT
:
239 assert tokens
[0].type == token
.OP
or node_type
== tokens
[0].type
241 grabbed_tokens
.append(tokens
.popleft())
242 return TokenSnippet(node_type
, grabbed_tokens
)