3 # YAML-DIRECTIVE(major_version, minor_version), TAG-DIRECTIVE(handle, prefix)
4 # RESERVED-DIRECTIVE(name)
5 # DOCUMENT-START, DOCUMENT-END
6 # BLOCK-SEQUENCE-START, BLOCK-MAPPING-START, BLOCK-END
7 # FLOW-SEQUENCE-START, FLOW-MAPPING-START, FLOW-SEQUENCE-END, FLOW-MAPPING-END
9 # ALIAS(name), ANCHOR(name), TAG(value), SCALAR(value, plain)
12 from marker
import Marker
13 #from error import YAMLError
14 from stream
import Stream
16 #class ScannerError(YAMLError):
17 class ScannerError(Exception):
21 def __init__(self
, start_marker
, end_marker
):
22 self
.start_marker
= start_marker
23 self
.end_marker
= end_marker
25 class DirectiveToken(Token
):
28 class YAMLDirectiveToken(DirectiveToken
):
29 def __init__(self
, major_version
, minor_version
, start_marker
, end_marker
):
30 self
.major_version
= major_version
31 self
.minor_version
= minor_version
32 self
.start_marker
= start_marker
33 self
.end_marker
= end_marker
35 class TagDirectiveToken(DirectiveToken
):
38 class ReservedDirectiveToken(DirectiveToken
):
39 def __init__(self
, name
, start_marker
, end_marker
):
41 self
.start_marker
= start_marker
42 self
.end_marker
= end_marker
44 class DocumentStartToken(Token
):
47 class DocumentEndToken(Token
):
50 class EndToken(Token
):
53 class BlockSequenceStartToken(Token
):
56 class BlockMappingStartToken(Token
):
59 class BlockEndToken(Token
):
62 class FlowSequenceStartToken(Token
):
65 class FlowMappingStartToken(Token
):
68 class FlowSequenceEndToken(Token
):
71 class FlowMappingEndToken(Token
):
74 class KeyToken(Token
):
77 class ValueToken(Token
):
80 class EntryToken(Token
):
83 class AliasToken(Token
):
84 def __init__(self
, value
, start_marker
, end_marker
):
86 self
.start_marker
= start_marker
87 self
.end_marker
= end_marker
89 class AnchorToken(Token
):
90 def __init__(self
, value
, start_marker
, end_marker
):
92 self
.start_marker
= start_marker
93 self
.end_marker
= end_marker
95 class TagToken(Token
):
96 def __init__(self
, value
, start_marker
, end_marker
):
98 self
.start_marker
= start_marker
99 self
.end_marker
= end_marker
101 class ScalarToken(Token
):
102 def __init__(self
, value
, plain
, start_marker
, end_marker
):
105 self
.start_marker
= start_marker
106 self
.end_marker
= end_marker
109 def __init__(self
, token_number
, required
, index
, line
, column
, marker
):
110 self
.token_number
= token_number
111 self
.required
= required
119 def __init__(self
, source
, data
):
120 """Initialize the scanner."""
121 # The input stream. The Stream class do the dirty work of checking for
122 # BOM and converting the input data to Unicode. It also adds NUL to
125 # Stream supports the following methods
126 # self.stream.peek(k=1) # peek the next k characters
127 # self.stream.forward(k=1) # read the next k characters and move the
129 self
.stream
= Stream(source
, data
)
131 # Had we reached the end of the stream?
134 # The number of unclosed '{' and '['. `flow_level == 0` means block
138 # List of processed tokens that are not yet emitted.
141 # Number of tokens that were emitted through the `get_token` method.
142 self
.tokens_taken
= 0
144 # The current indentation level.
147 # Past indentation levels.
150 # Variables related to simple keys treatment.
152 # A simple key is a key that is not denoted by the '?' indicator.
153 # Example of simple keys:
155 # block simple key: value
156 # ? not a simple key:
157 # : { flow simple key: value }
158 # We emit the KEY token before all keys, so when we find a potential
159 # simple key, we try to locate the corresponding ':' indicator.
160 # Simple keys should be limited to a single line and 1024 characters.
162 # Can a simple key start at the current position? A simple key may
164 # - at the beginning of the line, not counting indentation spaces
165 # (in block context),
166 # - after '{', '[', ',' (in the flow context),
167 # - after '?', ':', '-' (in the block context).
168 # In the block context, this flag also signify if a block collection
169 # may start at the current position.
170 self
.allow_simple_key
= True
172 # Keep track of possible simple keys. This is a dictionary. The key
173 # is `flow_level`; there can be no more that one possible simple key
174 # for each level. The value is a SimpleKey record:
175 # (token_number, required, index, line, column, marker)
176 # A simple key may start with ALIAS, ANCHOR, TAG, SCALAR(flow),
177 # '[', or '{' tokens.
178 self
.possible_simple_keys
= {}
180 # Two public methods.
182 def peek_token(self
):
183 """Get the current token."""
184 while self
.need_more_tokens():
185 self
.fetch_more_tokens()
187 return self
.tokens
[0]
190 "Get the current token and remove it from the list of pending tokens."""
191 while self.need_more_tokens():
192 self.fetch_more_tokens()
194 self.tokens_taken += 1
195 return self.tokens.pop(0)
199 def need_more_tokens(self):
204 # The current token may be a potential simple key, so we
205 # need to look further.
206 self.stale_possible_simple_keys()
207 if self.next_possible_simple_key() == self.tokens_taken:
210 def fetch_more_tokens(self):
212 # Eat whitespaces and comments until we reach the next token.
213 self.scan_to_next_token()
215 # Remove obsolete possible simple keys.
216 self.stale_possible_simple_keys()
218 # Compare the current indentation and column. It may add some tokens
219 # and decrease the current indentation level.
220 self.unwind_indent(self.stream.column)
223 #print self.stream.get_marker().get_snippet()
225 # Peek the next character.
226 ch = self.stream.peek()
228 # Is it the end of stream?
230 return self.fetch_end()
233 if ch == u'%' and self.check_directive():
234 return self.fetch_directive()
236 # Is it the document start?
237 if ch == u'-' and self.check_document_start():
238 return self.fetch_document_start()
240 # Is it the document end?
241 if ch == u'.' and self.check_document_end():
242 return self.fetch_document_end()
244 # Note: the order of the following checks is NOT significant.
246 # Is it the flow sequence start indicator?
248 return self.fetch_flow_sequence_start()
250 # Is it the flow mapping start indicator?
252 return self.fetch_flow_mapping_start()
254 # Is it the flow sequence end indicator?
256 return self.fetch_flow_sequence_end()
258 # Is it the flow mapping end indicator?
260 return self.fetch_flow_mapping_end()
262 # Is it the entry indicator?
263 if ch in u'-,' and self.check_entry():
264 return self.fetch_entry()
266 # Is it the key indicator?
267 if ch == u'?' and self.check_key():
268 return self.fetch_key()
270 # Is it the value indicator?
271 if ch == u':' and self.check_value():
272 return self.fetch_value()
276 return self.fetch_alias()
280 return self.fetch_anchor()
284 return self.fetch_tag()
286 # Is it a literal scalar?
287 if ch == u'|' and not self.flow_level:
288 return self.fetch_literal()
290 # Is it a folded scalar?
291 if ch == u'>' and not self.flow_level:
292 return self.fetch_folded()
294 # Is it a single quoted scalar?
296 return self.fetch_single()
298 # Is it a double quoted scalar?
300 return self.fetch_double()
302 # It must be a plain scalar then.
303 if self.check_plain():
304 return self.fetch_plain()
306 # No? It's an error. Let's produce a nice error message.
309 # Simple keys treatment.
311 def next_possible_simple_key(self):
312 # Return the number of the nearest possible simple key. Actually we
313 # don't need to loop through the whole dictionary. We may replace it
314 # with the following code:
315 # if not self.possible_simple_keys:
317 # return self.possible_simple_keys[
318 # min(self.possible_simple_keys.keys())].token_number
319 min_token_number = None
320 for level in self.possible_simple_keys:
321 key = self.possible_simple_keys[level]
322 if min_token_number is None or key.token_number < min_token_number:
323 min_token_number = key.token_number
324 return min_token_number
326 def stale_possible_simple_keys(self):
327 # Remove entries that are no longer possible simple keys. According to
328 # the YAML specification, simple keys
329 # - should be limited to a single line,
330 # - should be no longer than 1024 characters.
331 # Disabling this procedure will allow simple keys of any length and
332 # height (may cause problems if indentation is broken though).
333 for level in self.possible_simple_keys.keys():
334 key = self.possible_simple_keys[level]
335 if key.line != self.stream.line \
336 or self.stream.index-key.index > 1024:
338 self.fail("simple key
is required
")
339 del self.possible_simple_keys[level]
341 def save_possible_simple_key(self):
342 # The next token may start a simple key. We check if it's possible
343 # and save its position. This function is called for
344 # ALIAS, ANCHOR, TAG, SCALAR(flow), '[', and '{'.
346 # Check if a simple key is required at the current position.
347 required = not self.flow_level and self.indent == self.stream.column
349 # The next token might be a simple key. Let's save it's number and
351 if self.allow_simple_key:
352 self.remove_possible_simple_key()
353 token_number = self.tokens_taken+len(self.tokens)
354 index = self.stream.index
355 line = self.stream.line
356 column = self.stream.column
357 marker = self.stream.get_marker()
358 key = SimpleKey(token_number, required,
359 index, line, column, marker)
360 self.possible_simple_keys[self.flow_level] = key
362 # A simple key is required at the current position.
364 self.fail("simple key
is required
")
366 def remove_possible_simple_key(self):
367 # Remove the saved possible key position at the current flow level.
368 if self.flow_level in self.possible_simple_keys:
369 key = self.possible_simple_keys[self.flow_level]
371 self.fail("simple key
is required
")
373 # Indentation functions.
375 def unwind_indent(self, column):
377 # In flow context, tokens should respect indentation.
378 if self.flow_level and self.indent > column:
379 self.fail("invalid intendation
in the flow context
")
381 # In block context, we may need to issue the BLOCK-END tokens.
382 while self.indent > column:
383 marker = self.stream.get_marker()
384 self.indent = self.indents.pop()
385 self.tokens.append(BlockEndToken(marker, marker))
387 def add_indent(self, column):
388 # Check if we need to increase indentation.
389 if self.indent < column:
390 self.indents.append(self.indent)
399 # Set the current intendation to -1.
400 self.unwind_indent(-1)
402 # Reset everything (not really needed).
403 self.allow_simple_key = False
404 self.possible_simple_keys = {}
407 marker = self.stream.get_marker()
410 self.tokens.append(EndToken(marker, marker))
412 # The stream is ended.
415 def fetch_directive(self):
417 # Set the current intendation to -1.
418 self.unwind_indent(-1)
421 self.remove_possible_simple_key()
422 self.allow_simple_key = False
424 # Scan and add DIRECTIVE.
425 self.scan_directive()
427 def fetch_document_start(self):
428 self.fetch_document_indicator(DocumentStartToken)
430 def fetch_document_end(self):
431 self.fetch_document_indicator(DocumentEndToken)
433 def fetch_document_indicator(self, TokenClass):
435 # Set the current intendation to -1.
436 self.unwind_indent(-1)
438 # Reset simple keys. Note that there could not be a block collection
440 self.remove_possible_simple_key()
441 self.allow_simple_key = False
443 # Add DOCUMENT-START or DOCUMENT-END.
444 start_marker = self.stream.get_marker()
445 self.stream.forward(3)
446 end_marker = self.stream.get_marker()
447 self.tokens.append(TokenClass(start_marker, end_marker))
449 def fetch_flow_sequence_start(self):
450 self.fetch_flow_collection_start(FlowSequenceStartToken)
452 def fetch_flow_mapping_start(self):
453 self.fetch_flow_collection_start(FlowMappingStartToken)
455 def fetch_flow_collection_start(self, TokenClass):
457 # '[' and '{' may start a simple key.
458 self.save_possible_simple_key()
460 # Increase the flow level.
463 # Simple keys are allowed after '[' and '{'.
464 self.allow_simple_key = True
466 # Add FLOW-SEQUENCE-START or FLOW-MAPPING-START.
467 start_marker = self.stream.get_marker()
468 self.stream.forward()
469 end_marker = self.stream.get_marker()
470 self.tokens.append(TokenClass(start_marker, end_marker))
472 def fetch_flow_sequence_end(self):
473 self.fetch_flow_collection_end(FlowSequenceEndToken)
475 def fetch_flow_mapping_end(self):
476 self.fetch_flow_collection_end(FlowMappingEndToken)
478 def fetch_flow_collection_end(self, TokenClass):
480 # Reset possible simple key on the current level.
481 self.remove_possible_simple_key()
483 # Decrease the flow level.
486 # No simple keys after ']' or '}'.
487 self.allow_simple_key = False
489 # Add FLOW-SEQUENCE-END or FLOW-MAPPING-END.
490 start_marker = self.stream.get_marker()
491 self.stream.forward()
492 end_marker = self.stream.get_marker()
493 self.tokens.append(TokenClass(start_marker, end_marker))
495 def fetch_entry(self):
497 # Block context needs additional checks.
498 if not self.flow_level:
500 # Are we allowed to start a new entry?
501 if not self.allow_simple_key:
502 self.fail("Cannot start a new entry here
")
504 # We may need to add BLOCK-SEQUENCE-START.
505 if self.add_indent(self.stream.column):
506 marker = self.stream.get_marker()
507 self.tokens.append(BlockSequenceStartToken(marker, marker))
509 # Simple keys are allowed after '-' and ','.
510 self.allow_simple_key = True
512 # Reset possible simple key on the current level.
513 self.remove_possible_simple_key()
516 start_marker = self.stream.get_marker()
517 self.stream.forward()
518 end_marker = self.stream.get_marker()
519 self.tokens.append(EntryToken(start_marker, end_marker))
523 # Block context needs additional checks.
524 if not self.flow_level:
526 # Are we allowed to start a key (not nessesary a simple)?
527 if not self.allow_simple_key:
528 self.fail("Cannot start a new key here
")
530 # We may need to add BLOCK-MAPPING-START.
531 if self.add_indent(self.stream.column):
532 marker = self.stream.get_marker()
533 self.tokens.append(BlockMappingStartToken(marker, marker))
535 # Simple keys are allowed after '?' in the block context.
536 self.allow_simple_key = not self.flow_level
538 # Reset possible simple key on the current level.
539 self.remove_possible_simple_key()
542 start_marker = self.stream.get_marker()
543 self.stream.forward()
544 end_marker = self.stream.get_marker()
545 self.tokens.append(KeyToken(start_marker, end_marker))
547 def fetch_value(self):
549 # Do we determine a simple key?
550 if self.flow_level in self.possible_simple_keys:
553 key = self.possible_simple_keys[self.flow_level]
554 del self.possible_simple_keys[self.flow_level]
555 self.tokens.insert(key.token_number-self.tokens_taken,
556 KeyToken(key.marker, key.marker))
558 # If this key starts a new block mapping, we need to add
559 # BLOCK-MAPPING-START.
560 if not self.flow_level:
561 if self.add_indent(key.column):
562 self.tokens.insert(key.token_number-self.tokens_taken,
563 BlockMappingStartToken(key.marker, key.marker))
565 # There cannot be two simple keys one after another.
566 self.allow_simple_key = False
568 # It must be a part of a complex key.
571 # Simple keys are allowed after ':' in the block context.
572 self.allow_simple_key = not self.flow_level
574 # Reset possible simple key on the current level.
575 self.remove_possible_simple_key()
578 start_marker = self.stream.get_marker()
579 self.stream.forward()
580 end_marker = self.stream.get_marker()
581 self.tokens.append(ValueToken(start_marker, end_marker))
583 def fetch_alias(self):
585 # ALIAS could be a simple key.
586 self.save_possible_simple_key()
588 # No simple keys after ALIAS.
589 self.allow_simple_key = False
591 # Scan and add ALIAS.
592 self.scan_anchor(AliasToken)
594 def fetch_anchor(self):
596 # ANCHOR could start a simple key.
597 self.save_possible_simple_key()
599 # No simple keys after ANCHOR.
600 self.allow_simple_key = False
602 # Scan and add ANCHOR.
603 self.scan_anchor(AnchorToken)
607 # TAG could start a simple key.
608 self.save_possible_simple_key()
610 # No simple keys after TAG.
611 self.allow_simple_key = False
616 def fetch_literal(self):
617 self.fetch_block_scalar(folded=False)
619 def fetch_folded(self):
620 self.fetch_block_scalar(folded=True)
622 def fetch_block_scalar(self, folded):
624 # A simple key may follow a block scalar.
625 self.allow_simple_key = True
627 # Reset possible simple key on the current level.
628 self.remove_possible_simple_key()
630 # Scan and add SCALAR.
631 self.scan_block_scalar(folded)
633 def fetch_single(self):
634 self.fetch_flow_scalar(double=False)
636 def fetch_double(self):
637 self.fetch_flow_scalar(double=True)
639 def fetch_flow_scalar(self, double):
641 # A flow scalar could be a simple key.
642 self.save_possible_simple_key()
644 # No simple keys after flow scalars.
645 self.allow_simple_key = False
647 # Scan and add SCALAR.
648 self.scan_flow_scalar(double)
650 def fetch_plain(self):
652 # A plain scalar could be a simple key.
653 self.save_possible_simple_key()
655 # No simple keys after plain scalars. But note that `scan_plain` will
656 # change this flag if the scan is finished at the beginning of the
658 self.allow_simple_key = False
660 # Scan and add SCALAR. May change `allow_simple_key`.
665 def check_directive(self):
667 # DIRECTIVE: ^ '%' ...
668 # The '%' indicator is already checked.
669 if self.stream.column == 0:
672 def check_document_start(self):
674 # DOCUMENT-START: ^ '---' (' '|'\n')
675 if self.stream.column == 0:
676 prefix = self.stream.peek(4)
677 if prefix[:3] == u'---' and prefix[3] in u'\0 \t\r\n\x85\u2028\u2029':
680 def check_document_end(self):
682 # DOCUMENT-END: ^ '...' (' '|'\n')
683 if self.stream.column == 0:
684 prefix = self.stream.peek(4)
685 if prefix[:3] == u'...' and prefix[3] in u'\0 \t\r\n\x85\u2028\u2029':
688 def check_entry(self):
690 # ENTRY(flow context): ','
692 return self.stream.peek() == u','
694 # ENTRY(block context): '-' (' '|'\n')
696 prefix = self.stream.peek(2)
697 return prefix[0] == u'-' and prefix[1] in u'\0 \t\r\n\x85\u2028\u2029'
701 # KEY(flow context): '?'
705 # KEY(block context): '?' (' '|'\n')
707 prefix = self.stream.peek(2)
708 return prefix[1] in u'\0 \t\r\n\x85\u2028\u2029'
710 def check_value(self):
712 # VALUE(flow context): ':'
716 # VALUE(block context): ':' (' '|'\n')
718 prefix = self.stream.peek(2)
719 return prefix[1] in u'\0 \t\r\n\x85\u2028\u2029'
721 def check_plain(self):
726 def scan_to_next_token(self):
729 while self.stream.peek() == u' ':
730 self.stream.forward()
731 if self.stream.peek() == u'#':
732 while self.stream.peek() not in u'\r\n':
733 self.stream.forward()
734 if self.stream.peek() in u'\r\n':
735 self.stream.forward()
736 if not self.flow_level:
737 self.allow_simple_key = True
741 def scan_directive(self):
742 marker = self.stream.get_marker()
743 if self.stream.peek(5) == u'%YAML ':
744 self.tokens.append(YAMLDirectiveToken(1, 1, marker, marker))
745 elif self.stream.peek(4) == u'%TAG ':
746 self.tokens.append(TagDirectiveToken(marker, marker))
748 self.tokens.append(ReservedDirectiveToken('', marker, marker))
749 while self.stream.peek() not in u'\0\r\n':
750 self.stream.forward()
751 self.stream.forward()
753 def scan_anchor(self, TokenClass):
754 start_marker = self.stream.get_marker()
755 while self.stream.peek() not in u'\0 \t\r\n,:':
756 self.stream.forward()
757 end_marker = self.stream.get_marker()
758 self.tokens.append(TokenClass('', start_marker, end_marker))
761 start_marker = self.stream.get_marker()
762 while self.stream.peek() not in u'\0 \t\r\n':
763 self.stream.forward()
764 end_marker = self.stream.get_marker()
765 self.tokens.append(TagToken('', start_marker, end_marker))
767 def scan_block_scalar(self, folded):
768 start_marker = self.stream.get_marker()
769 indent = self.indent+1
773 while self.stream.peek() and self.stream.peek() and self.stream.peek() not in u'\0\r\n\x85\u2028\u2029':
774 self.stream.forward()
775 if self.stream.peek() != u'\0':
776 self.stream.forward()
778 while count < indent and self.stream.peek() == u' ':
779 self.stream.forward()
781 if count < indent and self.stream.peek() not in u'#\r\n\x85\u2028\u2029':
783 self.tokens.append(ScalarToken('', False, start_marker, start_marker))
785 def scan_flow_scalar(self, double):
786 marker = self.stream.get_marker()
787 quote = self.stream.peek()
788 self.stream.forward()
789 while self.stream.peek() != quote:
790 if double and self.stream.peek() == u'\\':
791 self.stream.forward(2)
792 elif not double and self.stream.peek(3)[1:] == u'\'\'':
793 self.stream.forward(3)
795 self.stream.forward(1)
796 self.stream.forward(1)
797 self.tokens.append(ScalarToken('', False, marker, marker))
799 def scan_plain(self):
800 indent = self.indent+1
804 marker = self.stream.get_marker()
806 while self.stream.peek() == u' ':
807 self.stream.forward()
809 while self.stream.peek() not in u'\0\r\n?:,[]{}#' \
810 or (not space and self.stream.peek() == '#') \
811 or (not self.flow_level and self.stream.peek() in '?,[]{}') \
812 or (not self.flow_level and self.stream.peek() == ':' and self.stream.peek(2)[1] not in u' \0\r\n'):
813 space = self.stream.peek() not in u' \t'
814 self.stream.forward()
815 self.allow_simple_key = False
816 if self.stream.peek() not in u'\r\n':
818 while self.stream.peek() in u'\r\n':
819 self.stream.forward()
820 if not self.flow_level:
821 self.allow_simple_key = True
823 while self.stream.peek() == u' ' and count < indent:
824 self.stream.forward()
829 self.tokens.append(ScalarToken('', True, marker, marker))
831 def invalid_token(self):
832 self.fail("invalid token
")
834 def fail(self, message):
835 raise ScannerError(message)
839 # psyco.bind(Scanner)