Imported File#ftype spec from rubyspecs.
[rbx.git] / lib / rbyaml / parser.rb
blob4ddff7b04cf09f4810ba4dda1683be24974d664b
2 # YAML can be parsed by an LL(1) parser!
4 # We use the following production rules:
5 # stream            ::= STREAM-START implicit_document? explicit_document* STREAM-END
6 # explicit_document ::= DIRECTIVE* DOCUMENT-START block_node? DOCUMENT-END?
7 # implicit_document ::= block_node DOCUMENT-END?
8 # block_node    ::= ALIAS | properties? block_content
9 # flow_node     ::= ALIAS | properties? flow_content
10 # properties    ::= TAG ANCHOR? | ANCHOR TAG?
11 # block_content     ::= block_collection | flow_collection | SCALAR
12 # flow_content      ::= flow_collection | SCALAR
13 # block_collection  ::= block_sequence | block_mapping
14 # block_sequence    ::= BLOCK-SEQUENCE-START (BLOCK-ENTRY block_node?)* BLOCK-END
15 # block_mapping     ::= BLOCK-MAPPING_START ((KEY block_node_or_indentless_sequence?)? (VALUE block_node_or_indentless_sequence?)?)* BLOCK-END
16 # block_node_or_indentless_sequence ::= ALIAS | properties? (block_content | indentless_block_sequence)
17 # indentless_block_sequence         ::= (BLOCK-ENTRY block_node?)+
18 # flow_collection   ::= flow_sequence | flow_mapping
19 # flow_sequence     ::= FLOW-SEQUENCE-START (flow_sequence_entry FLOW-ENTRY)* flow_sequence_entry? FLOW-SEQUENCE-END
20 # flow_mapping      ::= FLOW-MAPPING-START (flow_mapping_entry FLOW-ENTRY)* flow_mapping_entry? FLOW-MAPPING-END
21 # flow_sequence_entry   ::= flow_node | KEY flow_node? (VALUE flow_node?)?
22 # flow_mapping_entry    ::= flow_node | KEY flow_node? (VALUE flow_node?)?
24 # TODO: support for BOM within a stream.
25 # stream ::= (BOM? implicit_document)? (BOM? explicit_document)* STREAM-END
27 # FIRST sets:
28 # stream: { STREAM-START }
29 # explicit_document: { DIRECTIVE DOCUMENT-START }
30 # implicit_document: FIRST(block_node)
31 # block_node: { ALIAS TAG ANCHOR SCALAR BLOCK-SEQUENCE-START BLOCK-MAPPING-START FLOW-SEQUENCE-START FLOW-MAPPING-START }
32 # flow_node: { ALIAS ANCHOR TAG SCALAR FLOW-SEQUENCE-START FLOW-MAPPING-START }
33 # block_content: { BLOCK-SEQUENCE-START BLOCK-MAPPING-START FLOW-SEQUENCE-START FLOW-MAPPING-START SCALAR }
34 # flow_content: { FLOW-SEQUENCE-START FLOW-MAPPING-START SCALAR }
35 # block_collection: { BLOCK-SEQUENCE-START BLOCK-MAPPING-START }
36 # flow_collection: { FLOW-SEQUENCE-START FLOW-MAPPING-START }
37 # block_sequence: { BLOCK-SEQUENCE-START }
38 # block_mapping: { BLOCK-MAPPING-START }
39 # block_node_or_indentless_sequence: { ALIAS ANCHOR TAG SCALAR BLOCK-SEQUENCE-START BLOCK-MAPPING-START FLOW-SEQUENCE-START FLOW-MAPPING-START BLOCK-ENTRY }
40 # indentless_sequence: { ENTRY }
41 # flow_collection: { FLOW-SEQUENCE-START FLOW-MAPPING-START }
42 # flow_sequence: { FLOW-SEQUENCE-START }
43 # flow_mapping: { FLOW-MAPPING-START }
44 # flow_sequence_entry: { ALIAS ANCHOR TAG SCALAR FLOW-SEQUENCE-START FLOW-MAPPING-START KEY }
45 # flow_mapping_entry: { ALIAS ANCHOR TAG SCALAR FLOW-SEQUENCE-START FLOW-MAPPING-START KEY }
47 require 'rbyaml/error'
48 require 'rbyaml/tokens'
49 require 'rbyaml/events'
50 require 'rbyaml/scanner'
52 module RbYAML
53   class ParserError < YAMLError
54   end
56   class Parser
57     DEFAULT_TAGS = {
58       '!' => '!',
59       '!!' => 'tag:yaml.org,2002:'
60     }
62     DOCUMENT_END_TRUE = DocumentEndEvent.new(true)
63     DOCUMENT_END_FALSE = DocumentEndEvent.new(false)
64     MAPPING_END = MappingEndEvent.new
65     SEQUENCE_END = SequenceEndEvent.new
66     STREAM_END = StreamEndEvent.new
67     STREAM_START = StreamStartEvent.new
68     
70     def initialize(scanner)
71       @scanner = scanner
72       @current_event = nil
73       @yaml_version = nil
74       @events = nil
75       @working_events = nil
76       @tag_handles = { }
77       @parse_stack = nil
78     end
80     def check_event(*choices)
81       parse_stream
82       @current_event = parse_stream_next if @current_event.nil?
83       if @current_event
84         return true if choices.empty?
85         for choice in choices
86           return true if choice === @current_event
87         end
88       end
89       false
90     end
92     def peek_event
93       parse_stream
94       @current_event = parse_stream_next unless @current_event
95       @current_event
96     end
98     def get_event
99       parse_stream
100       @current_event = parse_stream_next unless @current_event
101       value = @current_event
102       @current_event = nil
103       value
104     end
106     def each_event
107       parse_stream
108       while @current_event = parse_stream_next
109         yield @current_event
110       end
111     end
113     def parse_stream
114       if !@parse_stack
115         @parse_stack = [:stream]
116         @tags = []
117         @anchors = []
118       end
119     end
121     def parse_stream_next
122       if !@parse_stack.empty?
123         while true
124           meth = @parse_stack.pop
125 #puts "our method: :#{meth}"
126 #puts "--- with peeked: :#{@scanner.peek_token.class} #{if @scanner.peek_token.respond_to?(:value): @scanner.peek_token.value.inspect; end}"
127           val = send(meth)
128           if !val.nil?
129 #puts "returning: #{val}"
130             return val
131           end
132         end
133       else
134         @tags = []
135         @anchors = []
136         return nil
137       end
138     end
140 #TERMINALS, definitions
142     def stream_start
143       @scanner.get_token
144       STREAM_START
145     end
147     def stream_end
148       @scanner.get_token
149       STREAM_END
150     end
151     
152     def document_start_implicit
153       token = @scanner.peek_token
154       version, tags = process_directives
155       DocumentStartEvent.new(false,version,tags)
156     end
158     def document_start
159       token = @scanner.peek_token
160       version, tags = process_directives
161       raise ParserError.new(nil, "expected '<document start>', but found #{token.tid}") unless @scanner.peek_token.__is_document_start
162       @scanner.get_token
163       DocumentStartEvent.new(true,version,tags)
164     end
166     def document_end
167       token = @scanner.peek_token
168       explicit = false
169       while @scanner.peek_token.__is_document_end
170         @scanner.get_token
171         explicit = true
172       end
173       explicit ? DOCUMENT_END_TRUE : DOCUMENT_END_FALSE
174     end
176     def _alias
177       AliasEvent.new(@scanner.get_token.value)
178     end
180     def block_sequence_start
181       implicit = @tags.last.nil? || @tags.last == "!"
182       @scanner.get_token
183       SequenceStartEvent.new(@anchors.last, @tags.last, implicit, false)
184     end
186     def block_indentless_sequence_start
187       implicit = @tags.last.nil? || @tags.last == "!"
188       SequenceStartEvent.new(@anchors.last, @tags.last, implicit, false)
189     end
191     def block_sequence_end
192       if !@scanner.peek_token.__is_block_end
193         raise ParserError.new("while scanning a block collection","expected <block end>, but found #{token.tid}: #{token.inspect}")
194       end
195       @scanner.get_token
196       SEQUENCE_END
197     end
199     def block_indentless_sequence_end
200       SEQUENCE_END
201     end
203     def block_mapping_start
204       implicit = @tags.last.nil? || @tags.last == "!"
205       @scanner.get_token
206       MappingStartEvent.new(@anchors.last, @tags.last, implicit,false)
207     end
209     def block_mapping_end
210       if !@scanner.peek_token.__is_block_end
211         raise ParserError.new("while scanning a block mapping","expected <block end>, but found #{token.tid}")
212       end
213       @scanner.get_token
214       MAPPING_END
215     end
217     def flow_sequence_start
218       implicit = @tags.last.nil? || @tags.last == "!"
219       @scanner.get_token
220       SequenceStartEvent.new(@anchors.last, @tags.last, implicit,true)
221     end
223     def flow_sequence_end
224       @scanner.get_token
225       SEQUENCE_END
226     end
228     def flow_internal_mapping_start
229       @scanner.get_token
230       MappingStartEvent.new(nil,nil,true,true)
231     end
233     def flow_internal_mapping_end
234       MAPPING_END
235     end
237     def flow_mapping_start
238       implicit = @tags.last.nil? || @tags.last == "!"
239       @scanner.get_token
240       MappingStartEvent.new(@anchors.last, @tags.last, implicit,true)
241     end
243     def flow_mapping_end
244       @scanner.get_token
245       MAPPING_END
246     end
248     def scalar
249       token = @scanner.get_token
250       if (token.plain && @tags.last.nil?) || @tags.last == "!"
251         implicit = [true, false]
252       elsif @tags.last.nil?
253         implicit = [false, true]
254       else
255         implicit = [false, false]
256       end
257       ScalarEvent.new(@anchors.last, @tags.last, implicit, token.value,token.style)
258     end
260     def empty_scalar
261       process_empty_scalar
262     end
263     
265 # PRODUCTIONS
266     def stream
267       @parse_stack += [:stream_end, :explicit_document, :implicit_document]
268       stream_start
269     end
271     def implicit_document
272       curr = @scanner.peek_token
273       unless curr.__is_directive || curr.__is_document_start || curr.__is_stream_end
274         @parse_stack += [:document_end, :block_node]
275         return document_start_implicit
276       end
277       nil
278     end
280     def explicit_document
281       if !@scanner.peek_token.__is_stream_end
282         @parse_stack += [:explicit_document, :document_end, :block_node]
283         return document_start
284       end
285       nil
286     end
288     def block_node
289       curr = @scanner.peek_token
290       if curr.__is_directive || curr.__is_document_start || curr.__is_document_end || curr.__is_stream_end
291         return empty_scalar
292       else
293         if curr.__is_alias
294           return _alias
295         else
296           @parse_stack << :un_properties
297           properties
298           return block_content
299         end
300       end
301     end
303     def flow_node
304       if @scanner.peek_token.__is_alias
305         return _alias
306       else
307         @parse_stack << :un_properties
308         properties
309         return flow_content
310       end
311     end
313     def properties
314       anchor = nil
315       tag = nil
316       if @scanner.peek_token.__is_anchor
317         anchor = @scanner.get_token.value
318         if @scanner.peek_token.__is_tag
319           tag = @scanner.get_token.value
320         end
321       elsif @scanner.peek_token.__is_tag
322         tag = @scanner.get_token.value
323         if @scanner.peek_token.__is_anchor
324           anchor = @scanner.get_token.value
325         end
326       end
327       
328       if !tag.nil? and tag != "!"
329         handle, suffix = tag
330         if !handle.nil?
331           raise ParserError.new("while parsing a node","found undefined tag handle #{handle}") if !@tag_handles.include?(handle)
332           tag = @tag_handles[handle]+suffix
333         else
334           tag = suffix
335         end
336       end
337       @anchors << anchor
338       @tags << tag
339       nil
340     end
342     def un_properties
343       @anchors.pop
344       @tags.pop
345       nil
346     end
348     def block_content
349       token = @scanner.peek_token
350       if token.__is_block_sequence_start
351         return block_sequence
352       elsif token.__is_block_mapping_start
353         return block_mapping
354       elsif token.__is_flow_sequence_start
355         return flow_sequence
356       elsif token.__is_flow_mapping_start
357         return flow_mapping
358       elsif token.__is_scalar
359         return scalar
360       else
361         raise ParserError.new("while scanning a node",
362                               "expected the node content, but found #{token.tid.inspect} at #{@scanner.pointer}")
363       end
364     end
366     def flow_content
367       token = @scanner.peek_token
368       if token.__is_flow_sequence_start
369         return flow_sequence
370       elsif token.__is_flow_mapping_start
371         return flow_mapping
372       elsif token.__is_scalar
373         return scalar
374       else
375         raise ParserError.new("while scanning a flow node","expected the node content, but found #{token.tid}")
376       end
377     end
379     def block_sequence_entry
380       if @scanner.peek_token.__is_block_entry
381         @scanner.get_token
382         if !(@scanner.peek_token.__is_block_entry || @scanner.peek_token.__is_block_end)
383           @parse_stack += [:block_sequence_entry]
384           return block_node
385         else
386           @parse_steck += [:block_sequence_entry]
387           return empty_scalar
388         end
389       end      
390       nil
391     end
393     def block_mapping_entry
394       #   ((KEY block_node_or_indentless_sequence?)? (VALUE block_node_or_indentless_sequence?)?)*
395       if @scanner.peek_token.__is_key || @scanner.peek_token.__is_value
396         if @scanner.check_token(KeyToken)
397           @scanner.get_token
398           curr = @scanner.peek_token
399           if !(curr.__is_key || curr.__is_value || curr.__is_block_end)
400             @parse_stack += [:block_mapping_entry,:block_mapping_entry_value]
401             return block_node_or_indentless_sequence
402           else
403             @parse_stack += [:block_mapping_entry,:block_mapping_entry_value]
404             return empty_scalar
405           end
406         else
407           @parse_stack += [:block_mapping_entry,:block_mapping_entry_value]
408           return empty_scalar
409         end
410       end
411       nil
412     end
414     def block_mapping_entry_value
415       if @scanner.peek_token.__is_key || @scanner.peek_token.__is_value
416         if @scanner.peek_token.__is_value
417           @scanner.get_token
418           curr = @scanner.peek_token
419           if !(curr.__is_key || curr.__is_value || curr.__is_block_end)
420             return block_node_or_indentless_sequence
421           else
422             return empty_scalar
423           end
424         else
425           return empty_scalar
426         end
427       end
428       nil
429     end
431     def block_sequence
432       @parse_stack += [:block_sequence_end,:block_sequence_entry]
433       block_sequence_start
434     end
436     def block_mapping
437       @parse_stack += [:block_mapping_end,:block_mapping_entry]
438       block_mapping_start
439     end
441     def block_node_or_indentless_sequence
442       if @scanner.peek_token.__is_alias
443         return _alias
444       else
445         if @scanner.peek_token.__is_block_entry
446           properties
447           return indentless_block_sequence
448         else
449           properties
450           return block_content
451         end
452       end
453     end
455     def indentless_block_sequence
456       @parse_stack += [:block_indentless_sequence_end,:indentless_block_sequence_entry]
457       block_indentless_sequence_start
458     end
460     def indentless_block_sequence_entry
461       if @scanner.peek_token.__is_block_entry
462         @scanner.get_token
463         curr = @scanner.peek_token
464         if !(curr.__is_block_entry || curr.__is_key || curr.__is_value || curr.__is_block_end)
465           @parse_stack << :indentless_block_sequence_entry
466           return block_node
467         else
468           @parse_stack << :indentless_block_sequence_entry
469           return empty_scalar
470         end
471       end
472       nil
473     end
475     def flow_sequence
476       @parse_stack += [:flow_sequence_end,:flow_sequence_entry]
477       flow_sequence_start
478     end
480     def flow_mapping
481       @parse_stack += [:flow_mapping_end,:flow_mapping_entry]
482       flow_mapping_start
483     end
485     def flow_sequence_entry
486       if !@scanner.peek_token.__is_flow_sequence_end
487         if @scanner.peek_token.__is_key
488           @parse_stack += [:flow_sequence_entry,:flow_entry_marker,:flow_internal_mapping_end,:flow_internal_value,:flow_internal_content]
489           return flow_internal_mapping_start
490         else
491           @parse_stack += [:flow_sequence_entry,:flow_node]
492           return flow_entry_marker
493         end
494       end
495       nil
496     end
498     def flow_internal_content
499       token = @scanner.peek_token
500       if !(token.__is_value || token.__is_flow_entry || token.__is_flow_sequence_end)
501         flow_node
502       else
503         empty_scalar
504       end
505     end
507     def flow_internal_value
508       if @scanner.peek_token.__is_value
509         @scanner.get_token
510         if !(@scanner.peek_token.__is_flow_entry || @scanner.peek_token.__is_flow_sequence_end)
511           flow_node
512         else
513           empty_scalar
514         end
515       else
516         empty_scalar
517       end
518     end
520     def flow_entry_marker
521       if @scanner.peek_token.__is_flow_entry
522         @scanner.get_token
523       end
524       nil
525     end
527     def flow_mapping_entry
528       if !@scanner.peek_token.__is_flow_mapping_end
529         if @scanner.peek_token.__is_key
530           @parse_stack += [:flow_mapping_entry,:flow_entry_marker,:flow_mapping_internal_value]
531           return flow_mapping_internal_content
532         else
533           @parse_stack += [:flow_mapping_entry,:flow_node]
534           return flow_entry_marker
535         end
536       end
537       nil
538     end
540     def flow_mapping_internal_content
541       curr = @scanner.peek_token
542       if !(curr.__is_value || curr.__is_flow_entry || curr.__is_flow_mapping_end)
543         @scanner.get_token
544         flow_node
545       else
546         empty_scalar
547       end
548     end
550     def flow_mapping_internal_value
551       if @scanner.peek_token.__is_value
552         @scanner.get_token
553         if !(@scanner.peek_token.__is_flow_entry || @scanner.peek_token.__is_flow_mapping_end)
554           flow_node
555         else
556           empty_scalar
557         end
558       else
559         empty_scalar
560       end
561     end
562    
564     def process_directives
565       # DIRECTIVE*
566       while @scanner.peek_token.__is_directive
567         token = @scanner.get_token
568         if token.name == "YAML"
569           raise ParserError.new(nil,"found duplicate YAML directive") if !@yaml_version.nil?
570           major, minor = token.value[0].to_i, token.value[1].to_i
571           raise ParserError.new(nil,"found incompatible YAML document (version 1.* is required)") if major != 1
572           @yaml_version = [major,minor]
573         elsif token.name == "TAG"
574           handle, prefix = token.value
575           raise ParserError.new(nil,"duplicate tag handle #{handle}") if @tag_handles.member?(handle)
576           @tag_handles[handle] = prefix
577         end
578       end
579       if !@tag_handles.empty?
580         value = @yaml_version, @tag_handles.dup
581       else
582         value = @yaml_version, nil
583       end
584       for key in DEFAULT_TAGS.keys
585         @tag_handles[key] = DEFAULT_TAGS[key] if !@tag_handles.include?(key)
586       end
587       value
588     end
590     def process_empty_scalar
591       ScalarEvent.new(nil, nil, [true, false], "")
592     end
593   end