1 # Scanner produces tokens of the following types:
4 # DIRECTIVE(name, value)
21 # SCALAR(value, plain)
23 # Read comments in the Scanner code for more details.
27 require 'rbyaml/error'
28 require 'rbyaml/tokens'
29 require 'rbyaml/constants'
32 class ScannerError < YAMLError
34 class ReaderError < YAMLError
35 def initialize(name, position, character, encoding, reason)
38 @character = character
44 if @character.__is_str
45 "'#{@encoding}' codec can't decode byte #x%02x: #{@reason}\n in \"#{@name}\", position #{@position}" % @character.to_i
47 "unacceptable character #x%04x: #{@reason}\n in \"#{@name}\", position #{@position}" % @character.to_i
52 SimpleKey = Struct.new(:token_number, :required, :column)
55 attr_reader :column, :stream, :stream_pointer, :eof, :buffer, :pointer
56 def initialize(stream)
57 # Had we reached the end of the stream?
60 # The number of unclosed '{' and '['. `flow_level == 0` means block
65 # List of processed tokens that are not yet emitted.
68 # Add the STREAM-START token.
71 # Number of tokens that were emitted through the `get_token` method.
74 # The current indentation level.
77 # Past indentation levels.
80 # Variables related to simple keys treatment.
82 # A simple key is a key that is not denoted by the '?' indicator.
83 # Example of simple keys:
85 # block simple key: value
87 # : { flow simple key: value }
88 # We emit the KEY token before all keys, so when we find a potential
89 # simple key, we try to locate the corresponding ':' indicator.
90 # Simple keys should be limited to a single line and 1024 characters.
92 # Can a simple key start at the current position? A simple key may
94 # - at the beginning of the line, not counting indentation spaces
96 # - after '{', '[', ',' (in the flow context),
97 # - after '?', ':', '-' (in the block context).
98 # In the block context, this flag also signifies if a block collection
99 # may start at the current position.
100 @allow_simple_key = true
102 # Keep track of possible simple keys. This is a dictionary. The key
103 # is `flow_level`; there can be no more that one possible simple key
104 # for each level. The value is a SimpleKey record:
105 # (token_number, required, index, line, column, mark)
106 # A simple key may start with ALIAS, ANCHOR, TAG, SCALAR(flow),
107 # '[', or '{' tokens.
108 @possible_simple_keys = {}
123 @name = stream.respond_to?(:path) ? stream.path : stream.inspect
134 update(1) unless @pointer1 < @buffer_length
139 update(2) unless @pointer1+1 < @buffer_length
144 update(3) unless @pointer1+2 < @buffer_length
149 update(4) unless @pointer1+3 < @buffer_length
154 pix = @pointer1+index
155 unless pix < @buffer_length
157 pix = @pointer1+index
163 update(length) unless @pointer+length < @buffer_length
164 @buffer[@pointer...@pointer+length]
168 update(2) unless @pointer1+1 < @buffer_length
169 @buffer[@pointer..@pointer1]
172 def forward(length=1)
181 else forwardn(length)
186 update(1) unless @pointer1 < @buffer_length
190 update(2) unless @pointer1+1 < @buffer_length
191 buff = @buffer[@pointer...@pointer1+1]
192 index = buff.rindex(LINE_BR_REG)
193 @column = index ? -index : column+1
199 update(3) unless @pointer1+2 < @buffer_length
200 buff = @buffer[@pointer...@pointer1+2]
201 index = buff.rindex(LINE_BR_REG)
202 @column = index ? 1-index : column+2
208 update(4) unless @pointer1+3 < @buffer_length
209 buff = @buffer[@pointer...@pointer1+3]
210 index = buff.rindex(LINE_BR_REG)
211 @column = index ? 2-index : column+3
217 update(5) unless @pointer1+4 < @buffer_length
218 buff = @buffer[@pointer...@pointer1+4]
219 index = buff.rindex(LINE_BR_REG)
220 @column = index ? 3-index : column+4
226 update(6) unless @pointer1+5 < @buffer_length
227 buff = @buffer[@pointer...@pointer1+5]
228 index = buff.rindex(LINE_BR_REG)
229 @column = index ? 4-index : column+5
235 update(7) unless @pointer1+6 < @buffer_length
236 buff = @buffer[@pointer...@pointer1+6]
237 index = buff.rindex(LINE_BR_REG)
238 @column = index ? 5-index : column+6
244 update(length + 1) unless @pointer1+length < @buffer_length
245 buff = @buffer[@pointer...@pointer+length]
246 index = buff.rindex(LINE_BR_REG)
247 @column = index ? (length-index)-1 : column+length
252 def check_printable(data)
253 if NON_PRINTABLE_RE =~ data
254 position = @buffer.length-@pointer+($~.offset(0)[0])
255 raise ReaderError.new(@name, position, $&,"unicode","special characters are not allowed"),"special characters are not allowed"
261 return if @raw_buffer.nil?
262 @buffer = @buffer[@pointer..-1]
264 while @buffer.length < length
266 data = @stream.read(1024)
267 if data && !data.empty?
269 @stream_pointer += data.length
278 @buffer << @raw_buffer << ?\0
283 @buffer_length = @buffer.length
285 check_printable(@buffer[(-length)..-2])
287 check_printable(@buffer[(-length)..-1])
289 @pointer1 = @pointer+1
292 def check_token(*choices)
293 # Check if the next token is one of the given types.
294 fetch_more_tokens while need_more_tokens
295 unless @tokens.empty?
296 return true if choices.empty?
297 for choice in choices
298 return true if choice === @tokens[0]
305 # Return the next token, but do not delete if from the queue.
306 fetch_more_tokens while need_more_tokens
307 return @tokens[0] unless @tokens.empty?
311 # Return the next token.
312 fetch_more_tokens while need_more_tokens
313 unless @tokens.empty?
320 fetch_more_tokens while need_more_tokens
321 while !@tokens.empty?
324 fetch_more_tokens while need_more_tokens
329 return false if @done
330 @tokens.empty? || next_possible_simple_key == @tokens_taken
333 def fetch_more_tokens
334 # Eat whitespaces and comments until we reach the next token.
337 # Remove obsolete possible simple keys.
338 # stale_possible_simple_keys
340 # Compare the current indentation and column. It may add some tokens
341 # and decrease the current indentation level.
342 unwind_indent(@column)
344 # Peek the next character.
349 when ?\0: return fetch_stream_end
350 when ?': return fetch_single
351 when ?": return fetch_double
352 when ??: if !@flow_zero || NULL_OR_OTHER.include?(peek1): return fetch_key end
353 when ?:: if !@flow_zero || NULL_OR_OTHER.include?(peek1): return fetch_value end
354 when ?%: if colz: return fetch_stream_end end
355 when ?-: if colz && ENDING =~ prefix(4): return fetch_document_start; elsif NULL_OR_OTHER.include?(peek1): return fetch_block_entry end
356 when ?.: if colz && START =~ prefix(4): return fetch_document_end end
357 when ?[: return fetch_flow_sequence_start
358 when ?{: return fetch_flow_mapping_start
359 when ?]: return fetch_flow_sequence_end
360 when ?}: return fetch_flow_mapping_end
361 when ?,: return fetch_flow_entry
362 when ?*: return fetch_alias
363 when ?&: return fetch_anchor
364 when ?!: return fetch_tag
365 when ?|: if @flow_zero: return fetch_literal end
366 when ?>: if @flow_zero: return fetch_folded end
368 return fetch_plain if BEG =~ prefix(2)
369 raise ScannerError.new("while scanning for the next token","found character #{ch.chr}(#{ch}) that cannot start any token")
372 # Simple keys treatment.
374 def next_possible_simple_key
375 # Return the number of the nearest possible simple key. Actually we
376 # don't need to loop through the whole dictionary.
377 @possible_simple_keys.each_value {|key| return key.token_number if key.token_number}
381 def save_possible_simple_key
382 # The next token may start a simple key. We check if it's possible
383 # and save its position. This function is called for
384 # ALIAS, ANCHOR, TAG, SCALAR(flow), '[', and '{'.
385 # The next token might be a simple key. Let's save it's number and
387 @possible_simple_keys[@flow_level] = SimpleKey.new(@tokens_taken+@tokens.length, @flow_zero && @indent == @column,column) if @allow_simple_key
390 # Indentation functions.
392 def unwind_indent(col)
393 ## In flow context, tokens should respect indentation.
394 ## Actually the condition should be `@indent >= column` according to
395 ## the spec. But this condition will prohibit intuitively correct
396 ## constructions such as
399 #if @flow_level and @indent > column
400 # raise ScannerError(nil, nil,
401 # "invalid intendation or unclosed '[' or '{'",
404 # In the flow context, indentation is ignored. We make the scanner less
405 # restrictive then specification requires.
406 return nil if !@flow_zero
407 # In block context, we may need to issue the BLOCK-END tokens.
409 @indent = @indents.pop
415 # Check if we need to increase indentation.
426 def fetch_stream_start
427 # We always add STREAM-START as the first token and STREAM-END as the
431 @tokens << STREAM_START
436 # Set the current intendation to -1.
438 # Reset everything (not really needed).
439 @allow_simple_key = false
440 @possible_simple_keys = {}
443 @tokens << STREAM_END
444 # The stream is finished.
449 # Set the current intendation to -1.
452 @allow_simple_key = false
453 # Scan and add DIRECTIVE.
454 @tokens << scan_directive
457 def fetch_document_start
458 fetch_document_indicator(DOCUMENT_START)
461 def fetch_document_end
462 fetch_document_indicator(DOCUMENT_END)
465 def fetch_document_indicator(token)
466 # Set the current intendation to -1.
468 # Reset simple keys. Note that there could not be a block collection
470 @allow_simple_key = false
471 # Add DOCUMENT-START or DOCUMENT-END.
476 def fetch_flow_sequence_start
477 fetch_flow_collection_start(FLOW_SEQUENCE_START)
480 def fetch_flow_mapping_start
481 fetch_flow_collection_start(FLOW_MAPPING_START)
484 def fetch_flow_collection_start(token)
485 # '[' and '{' may start a simple key.
486 save_possible_simple_key
487 # Increase the flow level.
490 # Simple keys are allowed after '[' and '{'.
491 @allow_simple_key = true
492 # Add FLOW-SEQUENCE-START or FLOW-MAPPING-START.
497 def fetch_flow_sequence_end
498 fetch_flow_collection_end(FLOW_SEQUENCE_END)
501 def fetch_flow_mapping_end
502 fetch_flow_collection_end(FLOW_MAPPING_END)
505 def fetch_flow_collection_end(token)
506 # Decrease the flow level.
511 # No simple keys after ']' or '}'.
512 @allow_simple_key = false
513 # Add FLOW-SEQUENCE-END or FLOW-MAPPING-END.
519 # Simple keys are allowed after ','.
520 @allow_simple_key = true
523 @tokens << FLOW_ENTRY
526 def fetch_block_entry
527 # Block context needs additional checks.
529 raise ScannerError.new(nil,"sequence entries are not allowed here") if !@allow_simple_key
530 # We may need to add BLOCK-SEQUENCE-START.
531 if add_indent(column)
532 @tokens << BLOCK_SEQUENCE_START
534 # It's an error for the block entry to occur in the flow context,
535 # but we let the parser detect this.
537 # Simple keys are allowed after '-'.
538 @allow_simple_key = true
541 @tokens << BLOCK_ENTRY
545 # Block context needs additional checks.
547 # Are we allowed to start a key (not nessesary a simple)?
548 raise ScannerError.new(nil,"mapping keys are not allowed here") if !@allow_simple_key
549 # We may need to add BLOCK-MAPPING-START.
550 if add_indent(column)
551 @tokens << BLOCK_MAPPING_START
554 # Simple keys are allowed after '?' in the block context.
555 @allow_simple_key = @flow_zero
562 key = @possible_simple_keys[@flow_level]
563 # Do we determine a simple key?
565 # Block context needs additional checks.
566 # (Do we really need them? They will be catched by the parser
569 # We are allowed to start a complex value if and only if
570 # we can start a simple key.
571 raise ScannerError.new(nil,"mapping values are not allowed here") if !@allow_simple_key
572 # Simple keys are allowed after ':' in the block context.
573 @allow_simple_key = true
577 @possible_simple_keys.delete(@flow_level)
579 # If this key starts a new block mapping, we need to add
580 # BLOCK-MAPPING-START.
581 se = (@flow_zero && add_indent(key.column)) ? [BLOCK_MAPPING_START] : []
583 @tokens.insert(key.token_number-@tokens_taken,*se)
584 # There cannot be two simple keys one after another.
585 @allow_simple_key = false
586 # It must be a part of a complex key.
594 # ALIAS could be a simple key.
595 save_possible_simple_key
596 # No simple keys after ALIAS.
597 @allow_simple_key = false
598 # Scan and add ALIAS.
599 @tokens << scan_anchor(AliasToken)
603 # ANCHOR could start a simple key.
604 save_possible_simple_key
605 # No simple keys after ANCHOR.
606 @allow_simple_key = false
607 # Scan and add ANCHOR.
608 @tokens << scan_anchor(AnchorToken)
612 # TAG could start a simple key.
613 save_possible_simple_key
614 # No simple keys after TAG.
615 @allow_simple_key = false
621 fetch_block_scalar(?|)
625 fetch_block_scalar(?>)
628 def fetch_block_scalar(style)
629 # A simple key may follow a block scalar.
630 @allow_simple_key = true
631 # Scan and add SCALAR.
632 @tokens << scan_block_scalar(style)
636 fetch_flow_scalar(?')
640 fetch_flow_scalar(?")
643 def fetch_flow_scalar(style)
644 # A flow scalar could be a simple key.
645 save_possible_simple_key
646 # No simple keys after flow scalars.
647 @allow_simple_key = false
648 # Scan and add SCALAR.
649 @tokens << scan_flow_scalar(style)
653 # A plain scalar could be a simple key.
654 save_possible_simple_key
655 # No simple keys after plain scalars. But note that `scan_plain` will
656 # change this flag if the scan is finished at the beginning of the
658 @allow_simple_key = false
659 # Scan and add SCALAR. May change `allow_simple_key`.
660 @tokens << scan_plain
665 def scan_to_next_token
666 # We ignore spaces, line breaks and comments.
667 # If we find a line break in the block context, we set the flag
668 # `allow_simple_key` on.
670 # TODO: We need to make tab handling rules more sane. A good rule is
671 # Tabs cannot precede tokens
672 # BLOCK-SEQUENCE-START, BLOCK-MAPPING-START, BLOCK-END,
673 # KEY(block), VALUE(block), BLOCK-ENTRY
674 # So the checking code is
676 # @allow_simple_keys = false
677 # We also need to add the check for `allow_simple_keys == true` to
678 # `unwind_indent` before issuing BLOCK-END.
679 # Scanners for block, flow, and plain scalars need to be modified.
685 while !NULL_OR_LINEBR.include?(peek0)
690 if !scan_line_break.empty?
691 @allow_simple_key = true if @flow_zero
699 # See the specification for details.
701 name = scan_directive_name
704 value = scan_yaml_directive_value
706 value = scan_tag_directive_value
708 forward1 while !NULL_OR_LINEBR.include?(peek0)
710 scan_directive_ignored_line
711 DirectiveToken.new(name, value)
714 def scan_directive_name
715 # See the specification for details.
719 while ALPHA_REG =~ ch.chr
724 raise ScannerError.new("while scanning a directive","expected alphabetic or numeric character, but found #{ch.to_s}") if zlen
725 value = prefix(length)
728 raise ScannerError.new("while scanning a directive","expected alphabetic or numeric character, but found #{ch.to_s}") if !NULL_BL_LINEBR.include?(ch)
732 def scan_yaml_directive_value
733 # See the specification for details.
734 forward1 while peek0 == 32
735 major = scan_yaml_directive_number
736 raise ScannerError.new("while scanning a directive","expected a digit or '.', but found #{peek.to_s}") if peek0 != ?.
738 minor = scan_yaml_directive_number
739 raise ScannerError.new("while scanning a directive","expected a digit or ' ', but found #{peek.to_s}") if !NULL_BL_LINEBR.include?(peek0)
743 def scan_yaml_directive_number
744 # See the specification for details.
746 raise ScannerError.new("while scanning a directive","expected a digit, but found #{ch.to_s}") if !(ch.__is_ascii_num)
748 length += 1 while (peek(length).__is_ascii_num)
749 value = prefix(length)
754 def scan_tag_directive_value
755 # See the specification for details.
756 forward1 while peek0 == 32
757 handle = scan_tag_directive_handle
758 forward1 while peek0 == 32
759 prefix = scan_tag_directive_prefix
763 def scan_tag_directive_handle
764 # See the specification for details.
765 value = scan_tag_handle("directive")
766 raise ScannerError.new("while scanning a directive","expected ' ', but found #{peek0}") if peek0 != 32
770 def scan_tag_directive_prefix
771 # See the specification for details.
772 value = scan_tag_uri("directive")
773 raise ScannerError.new("while scanning a directive","expected ' ', but found #{peek0}") if !NULL_BL_LINEBR.include?(peek0)
777 def scan_directive_ignored_line
778 # See the specification for details.
779 forward1 while peek0 == 32
781 forward1 while !NULL_OR_LINEBR.include?(peek0)
784 raise ScannerError.new("while scanning a directive","expected a comment or a line break, but found #{peek0.to_s}") if !NULL_OR_LINEBR.include?(peek0)
788 def scan_anchor(token)
789 # The specification does not restrict characters for anchors and
790 # aliases. This may lead to problems, for instance, the document:
792 # can be interpteted in two ways, as
795 # [ *alias , "value" ]
796 # Therefore we restrict aliases to numbers and ASCII letters.
797 name = (peek0 == ?*) ? "alias":"anchor"
802 chunk = prefix(chunk_size)
803 if length = (NON_ALPHA =~ chunk)
808 raise ScannerError.new("while scanning an #{name}","expected alphabetic or numeric character, but found something else...") if length==0
809 value = prefix(length)
811 if !NON_ALPHA_OR_NUM.include?(peek0)
812 raise ScannerError.new("while scanning an #{name}","expected alphabetic or numeric character, but found #{peek0}")
818 # See the specification for details.
823 suffix = scan_tag_uri("tag")
824 raise ScannerError.new("while parsing a tag","expected '>', but found #{peek.to_s}") if peek0 != ?>
826 elsif NULL_T_BL_LINEBR.include?(ch)
833 while !NULL_T_BL_LINEBR.include?(ch)
843 handle = scan_tag_handle("tag")
848 suffix = scan_tag_uri("tag")
850 raise ScannerError.new("while scanning a tag","expected ' ', but found #{peek0}") if !NULL_BL_LINEBR.include?(peek0)
851 value = [handle, suffix]
855 def scan_block_scalar(style)
856 # See the specification for details.
861 chomping, increment = scan_block_scalar_indicators
862 scan_block_scalar_ignored_line
863 # Determine the indentation level and go to the first non-empty line.
864 min_indent = @indent+1
865 min_indent = 1 if min_indent < 1
867 breaks, max_indent = scan_block_scalar_indentation
868 indent = [min_indent, max_indent].max
870 indent = min_indent+increment-1
871 breaks = scan_block_scalar_breaks(indent)
874 # Scan the inner part of the block scalar.
875 while column == indent and peek0 != ?\0
877 leading_non_space = !BLANK_T.include?(peek0)
879 length += 1 while !NULL_OR_LINEBR.include?(peek(length))
880 chunks << prefix(length)
882 line_break = scan_line_break
883 breaks = scan_block_scalar_breaks(indent)
884 if column == indent && peek0 != 0
885 # Unfortunately, folding rules are ambiguous.
887 # This is the folding according to the specification:
888 if folded && line_break == "\n" && leading_non_space && !BLANK_T.include?(peek0)
889 chunks << ' ' if breaks.empty?
893 # This is Clark Evans's interpretation (also in the spec
896 #if folded and line_break == u'\n':
898 # if self.peek() not in ' \t':
899 # chunks.append(u' ')
901 # chunks.append(line_break)
903 # chunks.append(line_break)
916 ScalarToken.new(chunks.to_s, false, style)
919 def scan_block_scalar_indicators
920 # See the specification for details.
924 if PLUS_MIN =~ ch.chr
929 increment = ch.chr.to_i
930 raise ScannerError.new("while scanning a block scalar","expected indentation indicator in the range 1-9, but found 0") if increment == 0
933 elsif ch.__is_ascii_num
934 increment = ch.chr.to_i
935 raise ScannerError.new("while scanning a block scalar","expected indentation indicator in the range 1-9, but found 0") if increment == 0
938 if PLUS_MIN =~ ch.chr
943 raise ScannerError.new("while scanning a block scalar","expected chomping or indentation indicators, but found #{peek0}") if !NULL_BL_LINEBR.include?(peek0)
944 [chomping, increment]
947 def scan_block_scalar_ignored_line
948 # See the specification for details.
949 forward1 while peek0 == 32
951 forward1 while !NULL_OR_LINEBR.include?(peek0)
953 raise ScannerError.new("while scanning a block scalar","expected a comment or a line break, but found #{peek0}") if !NULL_OR_LINEBR.include?(peek0)
957 def scan_block_scalar_indentation
958 # See the specification for details.
961 while BLANK_OR_LINEBR.include?(peek0)
963 chunks << scan_line_break
966 max_indent = column if column > max_indent
972 def scan_block_scalar_breaks(indent)
973 # See the specification for details.
975 forward1 while @column < indent && peek0 == 32
976 while FULL_LINEBR.include?(peek0)
977 chunks << scan_line_break
978 forward1 while @column < indent && peek0 == 32
983 def scan_flow_scalar(style)
984 # See the specification for details.
985 # Note that we loose indentation rules for quoted scalars. Quoted
986 # scalars don't need to adhere indentation because " and ' clearly
987 # mark the beginning and the end of them. Therefore we are less
988 # restrictive then the specification requires. We only need to check
989 # that document separators are not included in scalars.
994 chunks += scan_flow_scalar_non_spaces(double)
996 chunks += scan_flow_scalar_spaces(double)
997 chunks += scan_flow_scalar_non_spaces(double)
1000 ScalarToken.new(chunks.to_s, false, style)
1003 def scan_flow_scalar_non_spaces(double)
1004 # See the specification for details.
1008 length += 1 while !SPACES_AND_STUFF.include?(peek(length))
1010 chunks << prefix(length)
1014 if !double && ch == ?' && peek1 == ?'
1017 elsif (double && ch == ?') || (!double && DOUBLE_ESC.include?(ch))
1020 elsif double && ch == ?\\
1023 if UNESCAPES.member?(ch.chr)
1024 chunks << UNESCAPES[ch.chr]
1026 elsif ESCAPE_CODES.member?(ch.chr)
1027 length = ESCAPE_CODES[ch.chr]
1029 if NOT_HEXA =~ prefix(length)
1030 raise ScannerError.new("while scanning a double-quoted scalar","expected escape sequence of #{length} hexdecimal numbers, but found something else: #{prefix(length)}}")
1032 code = prefix(length).to_i(16).to_s
1035 elsif FULL_LINEBR.include?(ch)
1037 chunks += scan_flow_scalar_breaks(double)
1039 raise ScannerError.new("while scanning a double-quoted scalar","found unknown escape character #{ch}")
1047 def scan_flow_scalar_spaces(double)
1048 # See the specification for details.
1051 length += 1 while BLANK_T.include?(peek(length))
1052 whitespaces = prefix(length)
1056 raise ScannerError.new("while scanning a quoted scalar","found unexpected end of stream")
1057 elsif FULL_LINEBR.include?(ch)
1058 line_break = scan_line_break
1059 breaks = scan_flow_scalar_breaks(double)
1060 if line_break != "\n"
1061 chunks << line_break
1067 chunks << whitespaces
1072 def scan_flow_scalar_breaks(double)
1073 # See the specification for details.
1076 # Instead of checking indentation, we check for document
1079 if (prefix == "---" || prefix == "...") &&NULL_BL_T_LINEBR.include?(peek3)
1080 raise ScannerError.new("while scanning a quoted scalar","found unexpected document separator")
1082 forward1 while BLANK_T.include?(peek0)
1083 if FULL_LINEBR.include?(peek0)
1084 chunks << scan_line_break
1092 # See the specification for details.
1093 # We add an additional restriction for the flow context:
1094 # plain scalars in the flow context cannot contain ',', ':' and '?'.
1095 # We also keep track of the `allow_simple_key` flag here.
1096 # Indentation rules are loosed for the flow context.
1099 # We allow zero indentation for scalars, but then we need to check for
1100 # document separators at the beginning of the line.
1105 f_nzero, r_check = false, R_flowzero
1107 f_nzero, r_check = true, R_flownonzero
1113 chunk_size += 32 until length = (r_check =~ prefix(chunk_size))
1115 if f_nzero && ch == ?: && !S4.include?(peek(length+1))
1117 raise ScannerError.new("while scanning a plain scalar","found unexpected ':'","Please check http://pyyaml.org/wiki/YAMLColonInFlowContext for details.")
1119 break if length == 0
1120 @allow_simple_key = false
1122 chunks << prefix(length)
1124 spaces = scan_plain_spaces(indent)
1125 break if !spaces || (@flow_zero && @column < indent)
1127 return ScalarToken.new(chunks.to_s, true)
1130 def scan_plain_spaces(indent)
1131 # See the specification for details.
1132 # The specification is really confusing about tabs in plain scalars.
1133 # We just forbid them completely. Do not use tabs in YAML!
1136 length += 1 while peek(length) == 32
1137 whitespaces = prefix(length)
1140 if FULL_LINEBR.include?(ch)
1141 line_break = scan_line_break
1142 @allow_simple_key = true
1143 return if END_OR_START =~ prefix(4)
1145 while BLANK_OR_LINEBR.include?(peek0)
1149 breaks << scan_line_break
1150 return if END_OR_START =~ prefix(4)
1153 if line_break != "\n"
1154 chunks << line_break
1155 elsif breaks.nil? || breaks.empty?
1160 chunks << whitespaces
1166 def scan_tag_handle(name)
1167 # See the specification for details.
1168 # For some strange reasons, the specification does not allow '_' in
1169 # tag handles. I have allowed it anyway.
1171 raise ScannerError.new("while scanning a #{name}","expected '!', but found #{ch}") if ch != ?!
1175 while ALPHA_REG =~ ch.chr
1181 raise ScannerError.new("while scanning a #{name}","expected '!', but found #{ch}")
1185 value = prefix(length)
1190 def scan_tag_uri(name)
1191 # See the specification for details.
1192 # Note: we do not check if URI is well-formed.
1196 while STRANGE_CHR =~ ch.chr
1198 chunks << prefix(length)
1201 chunks << scan_uri_escapes(name)
1208 chunks << prefix(length)
1212 raise ScannerError.new("while parsing a #{name}","expected URI, but found #{ch}") if chunks.empty?
1216 def scan_uri_escapes(name)
1217 # See the specification for details.
1221 raise ScannerError.new("while scanning a #{name}","expected URI escape sequence of 2 hexdecimal numbers, but found #{peek1} and #{peek2}") if HEXA_REG !~ peek1.chr || HEXA_REG !~ peek2.chr
1222 bytes << prefix(2).to_i(16).to_s
1236 if FULL_LINEBR.include?(peek0)