2 # YAML can be parsed by an LL(1) parser!
4 # We use the following production rules:
5 # stream ::= STREAM-START implicit_document? explicit_document* STREAM-END
6 # explicit_document ::= DIRECTIVE* DOCUMENT-START block_node? DOCUMENT-END?
7 # implicit_document ::= block_node DOCUMENT-END?
8 # block_node ::= ALIAS | properties? block_content
9 # flow_node ::= ALIAS | properties? flow_content
10 # properties ::= TAG ANCHOR? | ANCHOR TAG?
11 # block_content ::= block_collection | flow_collection | SCALAR
12 # flow_content ::= flow_collection | SCALAR
13 # block_collection ::= block_sequence | block_mapping
14 # block_sequence ::= BLOCK-SEQUENCE-START (BLOCK-ENTRY block_node?)* BLOCK-END
15 # block_mapping ::= BLOCK-MAPPING_START ((KEY block_node_or_indentless_sequence?)? (VALUE block_node_or_indentless_sequence?)?)* BLOCK-END
16 # block_node_or_indentless_sequence ::= ALIAS | properties? (block_content | indentless_block_sequence)
17 # indentless_block_sequence ::= (BLOCK-ENTRY block_node?)+
18 # flow_collection ::= flow_sequence | flow_mapping
19 # flow_sequence ::= FLOW-SEQUENCE-START (flow_sequence_entry FLOW-ENTRY)* flow_sequence_entry? FLOW-SEQUENCE-END
20 # flow_mapping ::= FLOW-MAPPING-START (flow_mapping_entry FLOW-ENTRY)* flow_mapping_entry? FLOW-MAPPING-END
21 # flow_sequence_entry ::= flow_node | KEY flow_node? (VALUE flow_node?)?
22 # flow_mapping_entry ::= flow_node | KEY flow_node? (VALUE flow_node?)?
24 # TODO: support for BOM within a stream.
25 # stream ::= (BOM? implicit_document)? (BOM? explicit_document)* STREAM-END
28 # stream: { STREAM-START }
29 # explicit_document: { DIRECTIVE DOCUMENT-START }
30 # implicit_document: FIRST(block_node)
31 # block_node: { ALIAS TAG ANCHOR SCALAR BLOCK-SEQUENCE-START BLOCK-MAPPING-START FLOW-SEQUENCE-START FLOW-MAPPING-START }
32 # flow_node: { ALIAS ANCHOR TAG SCALAR FLOW-SEQUENCE-START FLOW-MAPPING-START }
33 # block_content: { BLOCK-SEQUENCE-START BLOCK-MAPPING-START FLOW-SEQUENCE-START FLOW-MAPPING-START SCALAR }
34 # flow_content: { FLOW-SEQUENCE-START FLOW-MAPPING-START SCALAR }
35 # block_collection: { BLOCK-SEQUENCE-START BLOCK-MAPPING-START }
36 # flow_collection: { FLOW-SEQUENCE-START FLOW-MAPPING-START }
37 # block_sequence: { BLOCK-SEQUENCE-START }
38 # block_mapping: { BLOCK-MAPPING-START }
39 # block_node_or_indentless_sequence: { ALIAS ANCHOR TAG SCALAR BLOCK-SEQUENCE-START BLOCK-MAPPING-START FLOW-SEQUENCE-START FLOW-MAPPING-START BLOCK-ENTRY }
40 # indentless_sequence: { ENTRY }
41 # flow_collection: { FLOW-SEQUENCE-START FLOW-MAPPING-START }
42 # flow_sequence: { FLOW-SEQUENCE-START }
43 # flow_mapping: { FLOW-MAPPING-START }
44 # flow_sequence_entry: { ALIAS ANCHOR TAG SCALAR FLOW-SEQUENCE-START FLOW-MAPPING-START KEY }
45 # flow_mapping_entry: { ALIAS ANCHOR TAG SCALAR FLOW-SEQUENCE-START FLOW-MAPPING-START KEY }
47 require 'rbyaml/error'
48 require 'rbyaml/tokens'
49 require 'rbyaml/events'
50 require 'rbyaml/scanner'
53 class ParserError < YAMLError
59 '!!' => 'tag:yaml.org,2002:'
62 DOCUMENT_END_TRUE = DocumentEndEvent.new(true)
63 DOCUMENT_END_FALSE = DocumentEndEvent.new(false)
64 MAPPING_END = MappingEndEvent.new
65 SEQUENCE_END = SequenceEndEvent.new
66 STREAM_END = StreamEndEvent.new
67 STREAM_START = StreamStartEvent.new
70 def initialize(scanner)
80 def check_event(*choices)
82 @current_event = parse_stream_next if @current_event.nil?
84 return true if choices.empty?
86 return true if choice === @current_event
94 @current_event = parse_stream_next unless @current_event
100 @current_event = parse_stream_next unless @current_event
101 value = @current_event
108 while @current_event = parse_stream_next
115 @parse_stack = [:stream]
121 def parse_stream_next
122 if !@parse_stack.empty?
124 meth = @parse_stack.pop
125 #puts "our method: :#{meth}"
126 #puts "--- with peeked: :#{@scanner.peek_token.class} #{if @scanner.peek_token.respond_to?(:value): @scanner.peek_token.value.inspect; end}"
129 #puts "returning: #{val}"
140 #TERMINALS, definitions
152 def document_start_implicit
153 token = @scanner.peek_token
154 version, tags = process_directives
155 DocumentStartEvent.new(false,version,tags)
159 token = @scanner.peek_token
160 version, tags = process_directives
161 raise ParserError.new(nil, "expected '<document start>', but found #{token.tid}") unless @scanner.peek_token.__is_document_start
163 DocumentStartEvent.new(true,version,tags)
167 token = @scanner.peek_token
169 while @scanner.peek_token.__is_document_end
173 explicit ? DOCUMENT_END_TRUE : DOCUMENT_END_FALSE
177 AliasEvent.new(@scanner.get_token.value)
180 def block_sequence_start
181 implicit = @tags.last.nil? || @tags.last == "!"
183 SequenceStartEvent.new(@anchors.last, @tags.last, implicit, false)
186 def block_indentless_sequence_start
187 implicit = @tags.last.nil? || @tags.last == "!"
188 SequenceStartEvent.new(@anchors.last, @tags.last, implicit, false)
191 def block_sequence_end
192 if !@scanner.peek_token.__is_block_end
193 raise ParserError.new("while scanning a block collection","expected <block end>, but found #{token.tid}: #{token.inspect}")
199 def block_indentless_sequence_end
203 def block_mapping_start
204 implicit = @tags.last.nil? || @tags.last == "!"
206 MappingStartEvent.new(@anchors.last, @tags.last, implicit,false)
209 def block_mapping_end
210 if !@scanner.peek_token.__is_block_end
211 raise ParserError.new("while scanning a block mapping","expected <block end>, but found #{token.tid}")
217 def flow_sequence_start
218 implicit = @tags.last.nil? || @tags.last == "!"
220 SequenceStartEvent.new(@anchors.last, @tags.last, implicit,true)
223 def flow_sequence_end
228 def flow_internal_mapping_start
230 MappingStartEvent.new(nil,nil,true,true)
233 def flow_internal_mapping_end
237 def flow_mapping_start
238 implicit = @tags.last.nil? || @tags.last == "!"
240 MappingStartEvent.new(@anchors.last, @tags.last, implicit,true)
249 token = @scanner.get_token
250 if (token.plain && @tags.last.nil?) || @tags.last == "!"
251 implicit = [true, false]
252 elsif @tags.last.nil?
253 implicit = [false, true]
255 implicit = [false, false]
257 ScalarEvent.new(@anchors.last, @tags.last, implicit, token.value,token.style)
267 @parse_stack += [:stream_end, :explicit_document, :implicit_document]
271 def implicit_document
272 curr = @scanner.peek_token
273 unless curr.__is_directive || curr.__is_document_start || curr.__is_stream_end
274 @parse_stack += [:document_end, :block_node]
275 return document_start_implicit
280 def explicit_document
281 if !@scanner.peek_token.__is_stream_end
282 @parse_stack += [:explicit_document, :document_end, :block_node]
283 return document_start
289 curr = @scanner.peek_token
290 if curr.__is_directive || curr.__is_document_start || curr.__is_document_end || curr.__is_stream_end
296 @parse_stack << :un_properties
304 if @scanner.peek_token.__is_alias
307 @parse_stack << :un_properties
316 if @scanner.peek_token.__is_anchor
317 anchor = @scanner.get_token.value
318 if @scanner.peek_token.__is_tag
319 tag = @scanner.get_token.value
321 elsif @scanner.peek_token.__is_tag
322 tag = @scanner.get_token.value
323 if @scanner.peek_token.__is_anchor
324 anchor = @scanner.get_token.value
328 if !tag.nil? and tag != "!"
331 raise ParserError.new("while parsing a node","found undefined tag handle #{handle}") if !@tag_handles.include?(handle)
332 tag = @tag_handles[handle]+suffix
349 token = @scanner.peek_token
350 if token.__is_block_sequence_start
351 return block_sequence
352 elsif token.__is_block_mapping_start
354 elsif token.__is_flow_sequence_start
356 elsif token.__is_flow_mapping_start
358 elsif token.__is_scalar
361 raise ParserError.new("while scanning a node",
362 "expected the node content, but found #{token.tid.inspect} at #{@scanner.pointer}")
367 token = @scanner.peek_token
368 if token.__is_flow_sequence_start
370 elsif token.__is_flow_mapping_start
372 elsif token.__is_scalar
375 raise ParserError.new("while scanning a flow node","expected the node content, but found #{token.tid}")
379 def block_sequence_entry
380 if @scanner.peek_token.__is_block_entry
382 if !(@scanner.peek_token.__is_block_entry || @scanner.peek_token.__is_block_end)
383 @parse_stack += [:block_sequence_entry]
386 @parse_steck += [:block_sequence_entry]
393 def block_mapping_entry
394 # ((KEY block_node_or_indentless_sequence?)? (VALUE block_node_or_indentless_sequence?)?)*
395 if @scanner.peek_token.__is_key || @scanner.peek_token.__is_value
396 if @scanner.check_token(KeyToken)
398 curr = @scanner.peek_token
399 if !(curr.__is_key || curr.__is_value || curr.__is_block_end)
400 @parse_stack += [:block_mapping_entry,:block_mapping_entry_value]
401 return block_node_or_indentless_sequence
403 @parse_stack += [:block_mapping_entry,:block_mapping_entry_value]
407 @parse_stack += [:block_mapping_entry,:block_mapping_entry_value]
414 def block_mapping_entry_value
415 if @scanner.peek_token.__is_key || @scanner.peek_token.__is_value
416 if @scanner.peek_token.__is_value
418 curr = @scanner.peek_token
419 if !(curr.__is_key || curr.__is_value || curr.__is_block_end)
420 return block_node_or_indentless_sequence
432 @parse_stack += [:block_sequence_end,:block_sequence_entry]
437 @parse_stack += [:block_mapping_end,:block_mapping_entry]
441 def block_node_or_indentless_sequence
442 if @scanner.peek_token.__is_alias
445 if @scanner.peek_token.__is_block_entry
447 return indentless_block_sequence
455 def indentless_block_sequence
456 @parse_stack += [:block_indentless_sequence_end,:indentless_block_sequence_entry]
457 block_indentless_sequence_start
460 def indentless_block_sequence_entry
461 if @scanner.peek_token.__is_block_entry
463 curr = @scanner.peek_token
464 if !(curr.__is_block_entry || curr.__is_key || curr.__is_value || curr.__is_block_end)
465 @parse_stack << :indentless_block_sequence_entry
468 @parse_stack << :indentless_block_sequence_entry
476 @parse_stack += [:flow_sequence_end,:flow_sequence_entry]
481 @parse_stack += [:flow_mapping_end,:flow_mapping_entry]
485 def flow_sequence_entry
486 if !@scanner.peek_token.__is_flow_sequence_end
487 if @scanner.peek_token.__is_key
488 @parse_stack += [:flow_sequence_entry,:flow_entry_marker,:flow_internal_mapping_end,:flow_internal_value,:flow_internal_content]
489 return flow_internal_mapping_start
491 @parse_stack += [:flow_sequence_entry,:flow_node]
492 return flow_entry_marker
498 def flow_internal_content
499 token = @scanner.peek_token
500 if !(token.__is_value || token.__is_flow_entry || token.__is_flow_sequence_end)
507 def flow_internal_value
508 if @scanner.peek_token.__is_value
510 if !(@scanner.peek_token.__is_flow_entry || @scanner.peek_token.__is_flow_sequence_end)
520 def flow_entry_marker
521 if @scanner.peek_token.__is_flow_entry
527 def flow_mapping_entry
528 if !@scanner.peek_token.__is_flow_mapping_end
529 if @scanner.peek_token.__is_key
530 @parse_stack += [:flow_mapping_entry,:flow_entry_marker,:flow_mapping_internal_value]
531 return flow_mapping_internal_content
533 @parse_stack += [:flow_mapping_entry,:flow_node]
534 return flow_entry_marker
540 def flow_mapping_internal_content
541 curr = @scanner.peek_token
542 if !(curr.__is_value || curr.__is_flow_entry || curr.__is_flow_mapping_end)
550 def flow_mapping_internal_value
551 if @scanner.peek_token.__is_value
553 if !(@scanner.peek_token.__is_flow_entry || @scanner.peek_token.__is_flow_mapping_end)
564 def process_directives
566 while @scanner.peek_token.__is_directive
567 token = @scanner.get_token
568 if token.name == "YAML"
569 raise ParserError.new(nil,"found duplicate YAML directive") if !@yaml_version.nil?
570 major, minor = token.value[0].to_i, token.value[1].to_i
571 raise ParserError.new(nil,"found incompatible YAML document (version 1.* is required)") if major != 1
572 @yaml_version = [major,minor]
573 elsif token.name == "TAG"
574 handle, prefix = token.value
575 raise ParserError.new(nil,"duplicate tag handle #{handle}") if @tag_handles.member?(handle)
576 @tag_handles[handle] = prefix
579 if !@tag_handles.empty?
580 value = @yaml_version, @tag_handles.dup
582 value = @yaml_version, nil
584 for key in DEFAULT_TAGS.keys
585 @tag_handles[key] = DEFAULT_TAGS[key] if !@tag_handles.include?(key)
590 def process_empty_scalar
591 ScalarEvent.new(nil, nil, [true, false], "")