1 # Copyright 2006 Google, Inc. All Rights Reserved.
2 # Licensed to PSF under a Contributor Agreement.
6 The grammer is taken from PatternGrammar.txt.
8 The compiler compiles a pattern to a pytree.*Pattern instance.
11 __author__
= "Guido van Rossum <guido@python.org>"
16 # Fairly local imports
17 from .pgen2
import driver
18 from .pgen2
import literals
19 from .pgen2
import token
20 from .pgen2
import tokenize
22 # Really local imports
26 # The pattern grammar file
27 _PATTERN_GRAMMAR_FILE
= os
.path
.join(os
.path
.dirname(__file__
),
31 def tokenize_wrapper(input):
32 """Tokenizes a string suppressing significant whitespace."""
33 skip
= (token
.NEWLINE
, token
.INDENT
, token
.DEDENT
)
34 tokens
= tokenize
.generate_tokens(driver
.generate_lines(input).__next
__)
35 for quintuple
in tokens
:
36 type, value
, start
, end
, line_text
= quintuple
41 class PatternCompiler(object):
43 def __init__(self
, grammar_file
=_PATTERN_GRAMMAR_FILE
):
46 Takes an optional alternative filename for the pattern grammar.
48 self
.grammar
= driver
.load_grammar(grammar_file
)
49 self
.syms
= pygram
.Symbols(self
.grammar
)
50 self
.pygrammar
= pygram
.python_grammar
51 self
.pysyms
= pygram
.python_symbols
52 self
.driver
= driver
.Driver(self
.grammar
, convert
=pattern_convert
)
54 def compile_pattern(self
, input, debug
=False):
55 """Compiles a pattern string to a nested pytree.*Pattern object."""
56 tokens
= tokenize_wrapper(input)
57 root
= self
.driver
.parse_tokens(tokens
, debug
=debug
)
58 return self
.compile_node(root
)
60 def compile_node(self
, node
):
61 """Compiles a node, recursively.
63 This is one big switch on the node type.
65 # XXX Optimize certain Wildcard-containing-Wildcard patterns
67 if node
.type == self
.syms
.Matcher
:
68 node
= node
.children
[0] # Avoid unneeded recursion
70 if node
.type == self
.syms
.Alternatives
:
71 # Skip the odd children since they are just '|' tokens
72 alts
= [self
.compile_node(ch
) for ch
in node
.children
[::2]]
75 p
= pytree
.WildcardPattern([[a
] for a
in alts
], min=1, max=1)
78 if node
.type == self
.syms
.Alternative
:
79 units
= [self
.compile_node(ch
) for ch
in node
.children
]
82 p
= pytree
.WildcardPattern([units
], min=1, max=1)
85 if node
.type == self
.syms
.NegatedUnit
:
86 pattern
= self
.compile_basic(node
.children
[1:])
87 p
= pytree
.NegatedPattern(pattern
)
90 assert node
.type == self
.syms
.Unit
94 if len(nodes
) >= 3 and nodes
[1].type == token
.EQUAL
:
98 if len(nodes
) >= 2 and nodes
[-1].type == self
.syms
.Repeater
:
102 # Now we've reduced it to: STRING | NAME [Details] | (...) | [...]
103 pattern
= self
.compile_basic(nodes
, repeat
)
105 if repeat
is not None:
106 assert repeat
.type == self
.syms
.Repeater
107 children
= repeat
.children
109 if child
.type == token
.STAR
:
112 elif child
.type == token
.PLUS
:
115 elif child
.type == token
.LBRACE
:
116 assert children
[-1].type == token
.RBRACE
117 assert len(children
) in (3, 5)
118 min = max = self
.get_int(children
[1])
119 if len(children
) == 5:
120 max = self
.get_int(children
[3])
123 if min != 1 or max != 1:
124 pattern
= pattern
.optimize()
125 pattern
= pytree
.WildcardPattern([[pattern
]], min=min, max=max)
129 return pattern
.optimize()
131 def compile_basic(self
, nodes
, repeat
=None):
132 # Compile STRING | NAME [Details] | (...) | [...]
133 assert len(nodes
) >= 1
135 if node
.type == token
.STRING
:
136 value
= literals
.evalString(node
.value
)
137 return pytree
.LeafPattern(content
=value
)
138 elif node
.type == token
.NAME
:
141 if value
not in TOKEN_MAP
:
142 raise SyntaxError("Invalid token: %r" % value
)
143 return pytree
.LeafPattern(TOKEN_MAP
[value
])
147 elif not value
.startswith("_"):
148 type = getattr(self
.pysyms
, value
, None)
150 raise SyntaxError("Invalid symbol: %r" % value
)
151 if nodes
[1:]: # Details present
152 content
= [self
.compile_node(nodes
[1].children
[1])]
155 return pytree
.NodePattern(type, content
)
156 elif node
.value
== "(":
157 return self
.compile_node(nodes
[1])
158 elif node
.value
== "[":
159 assert repeat
is None
160 subpattern
= self
.compile_node(nodes
[1])
161 return pytree
.WildcardPattern([[subpattern
]], min=0, max=1)
164 def get_int(self
, node
):
165 assert node
.type == token
.NUMBER
166 return int(node
.value
)
169 # Map named tokens to the type value for a LeafPattern
170 TOKEN_MAP
= {"NAME": token
.NAME
,
171 "STRING": token
.STRING
,
172 "NUMBER": token
.NUMBER
,
176 def pattern_convert(grammar
, raw_node_info
):
177 """Converts raw node information to a Node or Leaf instance."""
178 type, value
, context
, children
= raw_node_info
179 if children
or type in grammar
.number2symbol
:
180 return pytree
.Node(type, children
, context
=context
)
182 return pytree
.Leaf(type, value
, context
=context
)
185 def compile_pattern(pattern
):
186 return PatternCompiler().compile_pattern(pattern
)