2 # YAML can be parsed by an LL(1) parser!
4 # We use the following production rules:
5 # stream ::= implicit_document? explicit_document* STREAM-END
6 # explicit_document ::= DIRECTIVE* DOCUMENT-START block_node? DOCUMENT-END?
7 # implicit_document ::= block_node DOCUMENT-END?
8 # block_node ::= ALIAS | properties? block_content
9 # flow_node ::= ALIAS | properties? flow_content
10 # properties ::= TAG ANCHOR? | ANCHOR TAG?
11 # block_content ::= block_collection | flow_collection | SCALAR
12 # flow_content ::= flow_collection | SCALAR
13 # block_collection ::= block_sequence | block_mapping
14 # block_sequence ::= BLOCK-SEQUENCE-START (BLOCK-ENTRY block_node?)* BLOCK-END
15 # block_mapping ::= BLOCK-MAPPING_START ((KEY block_node_or_indentless_sequence?)? (VALUE block_node_or_indentless_sequence?)?)* BLOCK-END
16 # block_node_or_indentless_sequence ::= ALIAS | properties? (block_content | indentless_block_sequence)
17 # indentless_block_sequence ::= (BLOCK-ENTRY block_node?)+
18 # flow_collection ::= flow_sequence | flow_mapping
19 # flow_sequence ::= FLOW-SEQUENCE-START (flow_sequence_entry FLOW-ENTRY)* flow_sequence_entry? FLOW-SEQUENCE-END
20 # flow_mapping ::= FLOW-MAPPING-START (flow_mapping_entry FLOW-ENTRY)* flow_mapping_entry? FLOW-MAPPING-END
21 # flow_sequence_entry ::= flow_node | KEY flow_node? (VALUE flow_node?)?
22 # flow_mapping_entry ::= flow_node | KEY flow_node? (VALUE flow_node?)?
24 # TODO: support for BOM within a stream.
25 # stream ::= (BOM? implicit_document)? (BOM? explicit_document)* STREAM-END
27 # Note that there is a slight deviation from the specification. We require a
28 # non-empty node content if ANCHOR or TAG is specified. This disallow such
31 # key: !!str # empty value
33 # This is done to prevent ambiguity in parsing tags and aliases:
35 # { !!perl/YAML::Parser: value }
37 # What is it? Should it be interpreted as
38 # { ? !<tag:yaml.org,2002:perl/YAML::Parser> '' : value }
40 # { ? !<tag:yaml.org,2002:perl/YAML::Parser:> value : '' }
41 # Since we disallow non-empty node content, tags are always followed by spaces
45 # stream: FIRST(block_node) + { DIRECTIVE DOCUMENT-START }
46 # explicit_document: { DIRECTIVE DOCUMENT-START }
47 # implicit_document: FIRST(block_node)
48 # block_node: { ALIAS TAG ANCHOR SCALAR BLOCK-SEQUENCE-START BLOCK-MAPPING-START FLOW-SEQUENCE-START FLOW-MAPPING-START }
49 # flow_node: { ALIAS ANCHOR TAG SCALAR FLOW-SEQUENCE-START FLOW-MAPPING-START }
50 # block_content: { BLOCK-SEQUENCE-START BLOCK-MAPPING-START FLOW-SEQUENCE-START FLOW-MAPPING-START SCALAR }
51 # flow_content: { FLOW-SEQUENCE-START FLOW-MAPPING-START SCALAR }
52 # block_collection: { BLOCK-SEQUENCE-START BLOCK-MAPPING-START }
53 # flow_collection: { FLOW-SEQUENCE-START FLOW-MAPPING-START }
54 # block_sequence: { BLOCK-SEQUENCE-START }
55 # block_mapping: { BLOCK-MAPPING-START }
56 # block_node_or_indentless_sequence: { ALIAS ANCHOR TAG SCALAR BLOCK-SEQUENCE-START BLOCK-MAPPING-START FLOW-SEQUENCE-START FLOW-MAPPING-START BLOCK-ENTRY }
57 # indentless_sequence: { ENTRY }
58 # flow_collection: { FLOW-SEQUENCE-START FLOW-MAPPING-START }
59 # flow_sequence: { FLOW-SEQUENCE-START }
60 # flow_mapping: { FLOW-MAPPING-START }
61 # flow_sequence_entry: { ALIAS ANCHOR TAG SCALAR FLOW-SEQUENCE-START FLOW-MAPPING-START KEY }
62 # flow_mapping_entry: { ALIAS ANCHOR TAG SCALAR FLOW-SEQUENCE-START FLOW-MAPPING-START KEY }
64 __all__
= ['Parser', 'ParserError']
66 from error
import MarkedYAMLError
70 class ParserError(MarkedYAMLError
):
74 # Since writing an LL(1) parser is a straightforward task, we do not give
76 # Note that we use Python generators. If you rewrite the parser in another
77 # language, you may replace all 'yield'-s with event handler calls.
81 u
'!!': u
'tag:yaml.org,2002:',
84 def __init__(self
, scanner
):
85 self
.scanner
= scanner
86 self
.current_event
= None
87 self
.yaml_version
= None
89 self
.event_generator
= self
.parse_stream()
91 def check(self
, *choices
):
92 # Check the type of the next event.
93 if self
.current_event
is None:
95 self
.current_event
= self
.event_generator
.next()
98 if self
.current_event
is not None:
99 for choice
in choices
:
100 if isinstance(self
.current_event
, choice
):
105 # Get the next event.
106 if self
.current_event
is None:
108 self
.current_event
= self
.event_generator
.next()
109 except StopIteration:
111 return self
.current_event
114 # Get the next event.
115 if self
.current_event
is None:
117 self
.current_event
= self
.event_generator
.next()
118 except StopIteration:
120 value
= self
.current_event
121 self
.current_event
= None
126 return self
.event_generator
128 def parse_stream(self
):
129 # implicit_document? explicit_document* STREAM-END
131 # Parse implicit document.
132 if not self
.scanner
.check(DirectiveToken
, DocumentStartToken
,
134 self
.tag_handles
= self
.DEFAULT_TAGS
135 for event
in self
.parse_block_node():
138 # Parse explicit documents.
139 while not self
.scanner
.check(StreamEndToken
):
140 self
.process_directives()
141 if not self
.scanner
.check(DocumentStartToken
):
142 raise ParserError(None, None,
143 "expected '<document start>', but found %r"
144 % self
.scanner
.peek().id,
145 self
.scanner
.peek().start_marker
)
146 token
= self
.scanner
.get()
147 if self
.scanner
.check(DirectiveToken
,
148 DocumentStartToken
, DocumentEndToken
, StreamEndToken
):
149 yield self
.process_empty_scalar(token
.end_marker
)
151 for event
in self
.parse_block_node():
153 while self
.scanner
.check(DocumentEndToken
):
156 # Parse end of stream.
157 token
= self
.scanner
.get()
158 yield StreamEndEvent(token
.start_marker
, token
.end_marker
)
160 def process_directives(self
):
162 self
.yaml_version
= None
163 self
.tag_handles
= {}
164 while self
.scanner
.check(DirectiveToken
):
165 token
= self
.scanner
.get()
166 if token
.name
== u
'YAML':
167 if self
.yaml_version
is not None:
168 raise ParserError(None, None,
169 "found duplicate YAML directive", token
.start_marker
)
170 major
, minor
= token
.value
172 raise ParserError(None, None,
173 "found incompatible YAML document (version 1.* is required)",
175 self
.yaml_version
= token
.value
176 elif token
.name
== u
'TAG':
177 handle
, prefix
= token
.value
178 if handle
in self
.tag_handles
:
179 raise ParserError(None, None,
180 "duplicate tag handle %r" % handle
.encode('utf-8'),
182 self
.tag_handles
[handle
] = prefix
183 for key
in self
.DEFAULT_TAGS
:
184 if key
not in self
.tag_handles
:
185 self
.tag_handles
[key
] = self
.DEFAULT_TAGS
[key
]
187 def parse_block_node(self
):
188 return self
.parse_node(block
=True)
190 def parse_flow_node(self
):
191 return self
.parse_node()
193 def parse_block_node_or_indentless_sequence(self
):
194 return self
.parse_node(block
=True, indentless_sequence
=True)
196 def parse_node(self
, block
=False, indentless_sequence
=False):
197 # block_node ::= ALIAS | properties? block_content
198 # flow_node ::= ALIAS | properties? flow_content
199 # properties ::= TAG ANCHOR? | ANCHOR TAG?
200 # block_content ::= block_collection | flow_collection | SCALAR
201 # flow_content ::= flow_collection | SCALAR
202 # block_collection ::= block_sequence | block_mapping
203 # block_node_or_indentless_sequence ::= ALIAS | properties?
204 # (block_content | indentless_block_sequence)
205 if self
.scanner
.check(AliasToken
):
206 token
= self
.scanner
.get()
207 yield AliasEvent(token
.value
, token
.start_marker
, token
.end_marker
)
211 start_marker
= end_marker
= tag_marker
= None
212 if self
.scanner
.check(AnchorToken
):
213 token
= self
.scanner
.get()
214 start_marker
= end_marker
= token
.start_marker
216 if self
.scanner
.check(TagToken
):
217 token
= self
.scanner
.get()
218 end_marker
= tag_marker
= token
.start_marker
220 elif self
.scanner
.check(TagToken
):
221 token
= self
.scanner
.get()
222 start_marker
= end_marker
= tag_marker
= token
.start_marker
224 if self
.scanner
.check(AnchorToken
):
225 token
= self
.scanner
.get()
226 end_marker
= token
.start_marker
230 if handle
is not None:
231 if handle
not in self
.tag_handles
:
232 raise ParserError("while parsing a node", start_marker
,
233 "found undefined tag handle %r" % handle
.encode('utf-8'),
235 tag
= self
.tag_handles
[handle
]+suffix
239 if not (self
.scanner
.check(ScalarToken
) and
240 self
.scanner
.peek().plain
):
242 if start_marker
is None:
243 start_marker
= self
.scanner
.peek().start_marker
245 collection_events
= None
246 if indentless_sequence
and self
.scanner
.check(BlockEntryToken
):
247 end_marker
= self
.scanner
.peek().end_marker
248 event
= SequenceEvent(anchor
, tag
, start_marker
, end_marker
)
249 collection_events
= self
.parse_indentless_sequence()
251 if self
.scanner
.check(ScalarToken
):
252 token
= self
.scanner
.get()
253 end_marker
= token
.end_marker
254 event
= ScalarEvent(anchor
, tag
, token
.value
,
255 start_marker
, end_marker
)
256 elif self
.scanner
.check(FlowSequenceStartToken
):
257 end_marker
= self
.scanner
.peek().end_marker
258 event
= SequenceEvent(anchor
, tag
, start_marker
, end_marker
)
259 collection_events
= self
.parse_flow_sequence()
260 elif self
.scanner
.check(FlowMappingStartToken
):
261 end_marker
= self
.scanner
.peek().end_marker
262 event
= MappingEvent(anchor
, tag
, start_marker
, end_marker
)
263 collection_events
= self
.parse_flow_mapping()
264 elif block
and self
.scanner
.check(BlockSequenceStartToken
):
265 end_marker
= self
.scanner
.peek().start_marker
266 event
= SequenceEvent(anchor
, tag
, start_marker
, end_marker
)
267 collection_events
= self
.parse_block_sequence()
268 elif block
and self
.scanner
.check(BlockMappingStartToken
):
269 end_marker
= self
.scanner
.peek().start_marker
270 event
= MappingEvent(anchor
, tag
, start_marker
, end_marker
)
271 collection_events
= self
.parse_block_mapping()
277 token
= self
.scanner
.peek()
278 raise ParserError("while scanning a %s node" % node
, start_marker
,
279 "expected the node content, but found %r" % token
.id,
282 if collection_events
is not None:
283 for event
in collection_events
:
286 def parse_block_sequence(self
):
287 # BLOCK-SEQUENCE-START (BLOCK-ENTRY block_node?)* BLOCK-END
288 token
= self
.scanner
.get()
289 start_marker
= token
.start_marker
290 while self
.scanner
.check(BlockEntryToken
):
291 token
= self
.scanner
.get()
292 if not self
.scanner
.check(BlockEntryToken
, BlockEndToken
):
293 for event
in self
.parse_block_node():
296 yield self
.process_empty_scalar(token
.end_marker
)
297 if not self
.scanner
.check(BlockEndToken
):
298 token
= self
.scanner
.peek()
299 raise ParserError("while scanning a block collection", start_marker
,
300 "expected <block end>, but found %r" % token
.id, token
.start_marker
)
301 token
= self
.scanner
.get()
302 yield CollectionEndEvent(token
.start_marker
, token
.end_marker
)
304 def parse_indentless_sequence(self
):
305 # (BLOCK-ENTRY block_node?)+
306 while self
.scanner
.check(BlockEntryToken
):
307 token
= self
.scanner
.get()
308 if not self
.scanner
.check(BlockEntryToken
,
309 KeyToken
, ValueToken
, BlockEndToken
):
310 for event
in self
.parse_block_node():
313 yield self
.process_empty_scalar(token
.end_marker
)
314 token
= self
.scanner
.peek()
315 yield CollectionEndEvent(token
.start_marker
, token
.start_marker
)
317 def parse_block_mapping(self
):
318 # BLOCK-MAPPING_START
319 # ((KEY block_node_or_indentless_sequence?)?
320 # (VALUE block_node_or_indentless_sequence?)?)*
322 token
= self
.scanner
.get()
323 start_marker
= token
.start_marker
324 while self
.scanner
.check(KeyToken
, ValueToken
):
325 if self
.scanner
.check(KeyToken
):
326 token
= self
.scanner
.get()
327 if not self
.scanner
.check(KeyToken
, ValueToken
, BlockEndToken
):
328 for event
in self
.parse_block_node_or_indentless_sequence():
331 yield self
.process_empty_scalar(token
.end_marker
)
332 if self
.scanner
.check(ValueToken
):
333 token
= self
.scanner
.get()
334 if not self
.scanner
.check(KeyToken
, ValueToken
, BlockEndToken
):
335 for event
in self
.parse_block_node_or_indentless_sequence():
338 yield self
.process_empty_scalar(token
.end_marker
)
340 token
= self
.scanner
.peek()
341 yield self
.process_empty_scalar(token
.start_marker
)
342 if not self
.scanner
.check(BlockEndToken
):
343 token
= self
.scanner
.peek()
344 raise ParserError("while scanning a block mapping", start_marker
,
345 "expected <block end>, but found %r" % token
.id, token
.start_marker
)
346 token
= self
.scanner
.get()
347 yield CollectionEndEvent(token
.start_marker
, token
.end_marker
)
349 def parse_flow_sequence(self
):
350 # flow_sequence ::= FLOW-SEQUENCE-START
351 # (flow_sequence_entry FLOW-ENTRY)*
352 # flow_sequence_entry?
354 # flow_sequence_entry ::= flow_node | KEY flow_node? (VALUE flow_node?)?
356 # Note that while production rules for both flow_sequence_entry and
357 # flow_mapping_entry are equal, their interpretations are different.
358 # For `flow_sequence_entry`, the part `KEY flow_node? (VALUE flow_node?)?`
359 # generate an inline mapping (set syntax).
360 token
= self
.scanner
.get()
361 start_marker
= token
.start_marker
362 while not self
.scanner
.check(FlowSequenceEndToken
):
363 if self
.scanner
.check(KeyToken
):
364 token
= self
.scanner
.get()
365 yield MappingEvent(None, u
'!',
366 token
.start_marker
, token
.end_marker
)
367 if not self
.scanner
.check(ValueToken
,
368 FlowEntryToken
, FlowSequenceEndToken
):
369 for event
in self
.parse_flow_node():
372 yield self
.process_empty_scalar(token
.end_marker
)
373 if self
.scanner
.check(ValueToken
):
374 token
= self
.scanner
.get()
375 if not self
.scanner
.check(FlowEntryToken
, FlowSequenceEndToken
):
376 for event
in self
.parse_flow_node():
379 yield self
.process_empty_scalar(token
.end_marker
)
381 token
= self
.scanner
.peek()
382 yield self
.process_empty_scalar(token
.start_marker
)
383 token
= self
.scanner
.peek()
384 yield CollectionEndEvent(token
.start_marker
, token
.start_marker
)
386 for event
in self
.parse_flow_node():
388 if not self
.scanner
.check(FlowEntryToken
, FlowSequenceEndToken
):
389 token
= self
.scanner
.peek()
390 raise ParserError("while scanning a flow sequence", start_marker
,
391 "expected ',' or ']', but got %r" % token
.id, token
.start_marker
)
392 if self
.scanner
.check(FlowEntryToken
):
394 token
= self
.scanner
.get()
395 yield CollectionEndEvent(token
.start_marker
, token
.end_marker
)
397 def parse_flow_mapping(self
):
398 # flow_mapping ::= FLOW-MAPPING-START
399 # (flow_mapping_entry FLOW-ENTRY)*
400 # flow_mapping_entry?
402 # flow_mapping_entry ::= flow_node | KEY flow_node? (VALUE flow_node?)?
403 token
= self
.scanner
.get()
404 start_marker
= token
.start_marker
405 while not self
.scanner
.check(FlowMappingEndToken
):
406 if self
.scanner
.check(KeyToken
):
407 token
= self
.scanner
.get()
408 if not self
.scanner
.check(ValueToken
,
409 FlowEntryToken
, FlowMappingEndToken
):
410 for event
in self
.parse_flow_node():
413 yield self
.process_empty_scalar(token
.end_marker
)
414 if self
.scanner
.check(ValueToken
):
415 token
= self
.scanner
.get()
416 if not self
.scanner
.check(FlowEntryToken
, FlowMappingEndToken
):
417 for event
in self
.parse_flow_node():
420 yield self
.process_empty_scalar(token
.end_marker
)
422 token
= self
.scanner
.peek()
423 yield self
.process_empty_scalar(token
.start_marker
)
425 for event
in self
.parse_flow_node():
427 yield self
.process_empty_scalar(self
.scanner
.peek().start_marker
)
428 if not self
.scanner
.check(FlowEntryToken
, FlowMappingEndToken
):
429 token
= self
.scanner
.peek()
430 raise ParserError("while scanning a flow mapping", start_marker
,
431 "expected ',' or '}', but got %r" % token
.id, token
.start_marker
)
432 if self
.scanner
.check(FlowEntryToken
):
434 if not self
.scanner
.check(FlowMappingEndToken
):
435 token
= self
.scanner
.peek()
436 raise ParserError("while scanning a flow mapping", start_marker
,
437 "expected '}', but found %r" % token
.id, token
.start_marker
)
438 token
= self
.scanner
.get()
439 yield CollectionEndEvent(token
.start_marker
, token
.end_marker
)
441 def process_empty_scalar(self
, marker
):
442 return ScalarEvent(None, None, u
'', marker
, marker
)