Fix allow_unicode (ticket:3).
[pyyaml/python3.git] / lib / yaml / scanner.py
blob7a1d273c24f04245901de079df62eaea374da9d3
2 # Scanner produces tokens of the following types:
3 # STREAM-START
4 # STREAM-END
5 # DIRECTIVE(name, value)
6 # DOCUMENT-START
7 # DOCUMENT-END
8 # BLOCK-SEQUENCE-START
9 # BLOCK-MAPPING-START
10 # BLOCK-END
11 # FLOW-SEQUENCE-START
12 # FLOW-MAPPING-START
13 # FLOW-SEQUENCE-END
14 # FLOW-MAPPING-END
15 # BLOCK-ENTRY
16 # FLOW-ENTRY
17 # KEY
18 # VALUE
19 # ALIAS(value)
20 # ANCHOR(value)
21 # TAG(value)
22 # SCALAR(value, plain)
24 # Read comments in the Scanner code for more details.
27 __all__ = ['Scanner', 'ScannerError']
29 from error import MarkedYAMLError
30 from tokens import *
32 class ScannerError(MarkedYAMLError):
33 pass
35 class SimpleKey:
36 # See below simple keys treatment.
38 def __init__(self, token_number, required, index, line, column, mark):
39 self.token_number = token_number
40 self.required = required
41 self.index = index
42 self.line = line
43 self.column = column
44 self.mark = mark
46 class Scanner:
49 def __init__(self, reader):
50 """Initialize the scanner."""
51 # The input stream. The Reader class do the dirty work of checking for
52 # BOM and converting the input data to Unicode. It also adds NUL to
53 # the end.
55 # Reader supports the following methods
56 # self.reader.peek(i=0) # peek the next i-th character
57 # self.reader.prefix(l=1) # peek the next l characters
58 # self.reader.forward(l=1) # read the next l characters
59 # and move the pointer
60 self.reader = reader
62 # Had we reached the end of the stream?
63 self.done = False
65 # The number of unclosed '{' and '['. `flow_level == 0` means block
66 # context.
67 self.flow_level = 0
69 # List of processed tokens that are not yet emitted.
70 self.tokens = []
72 # Add the STREAM-START token.
73 self.fetch_stream_start()
75 # Number of tokens that were emitted through the `get_token` method.
76 self.tokens_taken = 0
78 # The current indentation level.
79 self.indent = -1
81 # Past indentation levels.
82 self.indents = []
84 # Variables related to simple keys treatment.
86 # A simple key is a key that is not denoted by the '?' indicator.
87 # Example of simple keys:
88 # ---
89 # block simple key: value
90 # ? not a simple key:
91 # : { flow simple key: value }
92 # We emit the KEY token before all keys, so when we find a potential
93 # simple key, we try to locate the corresponding ':' indicator.
94 # Simple keys should be limited to a single line and 1024 characters.
96 # Can a simple key start at the current position? A simple key may
97 # start:
98 # - at the beginning of the line, not counting indentation spaces
99 # (in block context),
100 # - after '{', '[', ',' (in the flow context),
101 # - after '?', ':', '-' (in the block context).
102 # In the block context, this flag also signifies if a block collection
103 # may start at the current position.
104 self.allow_simple_key = True
106 # Keep track of possible simple keys. This is a dictionary. The key
107 # is `flow_level`; there can be no more that one possible simple key
108 # for each level. The value is a SimpleKey record:
109 # (token_number, required, index, line, column, mark)
110 # A simple key may start with ALIAS, ANCHOR, TAG, SCALAR(flow),
111 # '[', or '{' tokens.
112 self.possible_simple_keys = {}
114 # Public methods.
116 def check(self, *choices):
117 # Check if the next token is one of the given types.
118 while self.need_more_tokens():
119 self.fetch_more_tokens()
120 if self.tokens:
121 for choice in choices:
122 if isinstance(self.tokens[0], choice):
123 return True
124 return False
126 def peek(self):
127 # Return the next token, but do not delete if from the queue.
128 while self.need_more_tokens():
129 self.fetch_more_tokens()
130 if self.tokens:
131 return self.tokens[0]
133 def get(self):
134 # Return the next token.
135 while self.need_more_tokens():
136 self.fetch_more_tokens()
137 if self.tokens:
138 self.tokens_taken += 1
139 return self.tokens.pop(0)
141 def __iter__(self):
142 # Iterator protocol.
143 while self.need_more_tokens():
144 self.fetch_more_tokens()
145 while self.tokens:
146 self.tokens_taken += 1
147 yield self.tokens.pop(0)
148 while self.need_more_tokens():
149 self.fetch_more_tokens()
151 # Private methods.
153 def need_more_tokens(self):
154 if self.done:
155 return False
156 if not self.tokens:
157 return True
158 # The current token may be a potential simple key, so we
159 # need to look further.
160 self.stale_possible_simple_keys()
161 if self.next_possible_simple_key() == self.tokens_taken:
162 return True
164 def fetch_more_tokens(self):
166 # Eat whitespaces and comments until we reach the next token.
167 self.scan_to_next_token()
169 # Remove obsolete possible simple keys.
170 self.stale_possible_simple_keys()
172 # Compare the current indentation and column. It may add some tokens
173 # and decrease the current indentation level.
174 self.unwind_indent(self.reader.column)
176 # Peek the next character.
177 ch = self.reader.peek()
179 # Is it the end of stream?
180 if ch == u'\0':
181 return self.fetch_stream_end()
183 # Is it a directive?
184 if ch == u'%' and self.check_directive():
185 return self.fetch_directive()
187 # Is it the document start?
188 if ch == u'-' and self.check_document_start():
189 return self.fetch_document_start()
191 # Is it the document end?
192 if ch == u'.' and self.check_document_end():
193 return self.fetch_document_end()
195 # TODO: support for BOM within a stream.
196 #if ch == u'\uFEFF':
197 # return self.fetch_bom() <-- issue BOMToken
199 # Note: the order of the following checks is NOT significant.
201 # Is it the flow sequence start indicator?
202 if ch == u'[':
203 return self.fetch_flow_sequence_start()
205 # Is it the flow mapping start indicator?
206 if ch == u'{':
207 return self.fetch_flow_mapping_start()
209 # Is it the flow sequence end indicator?
210 if ch == u']':
211 return self.fetch_flow_sequence_end()
213 # Is it the flow mapping end indicator?
214 if ch == u'}':
215 return self.fetch_flow_mapping_end()
217 # Is it the flow entry indicator?
218 if ch in u',':
219 return self.fetch_flow_entry()
221 # Is it the block entry indicator?
222 if ch in u'-' and self.check_block_entry():
223 return self.fetch_block_entry()
225 # Is it the key indicator?
226 if ch == u'?' and self.check_key():
227 return self.fetch_key()
229 # Is it the value indicator?
230 if ch == u':' and self.check_value():
231 return self.fetch_value()
233 # Is it an alias?
234 if ch == u'*':
235 return self.fetch_alias()
237 # Is it an anchor?
238 if ch == u'&':
239 return self.fetch_anchor()
241 # Is it a tag?
242 if ch == u'!':
243 return self.fetch_tag()
245 # Is it a literal scalar?
246 if ch == u'|' and not self.flow_level:
247 return self.fetch_literal()
249 # Is it a folded scalar?
250 if ch == u'>' and not self.flow_level:
251 return self.fetch_folded()
253 # Is it a single quoted scalar?
254 if ch == u'\'':
255 return self.fetch_single()
257 # Is it a double quoted scalar?
258 if ch == u'\"':
259 return self.fetch_double()
261 # It must be a plain scalar then.
262 if self.check_plain():
263 return self.fetch_plain()
265 # No? It's an error. Let's produce a nice error message.
266 raise ScannerError("while scanning for the next token", None,
267 "found character %r that cannot start any token"
268 % ch.encode('utf-8'), self.reader.get_mark())
270 # Simple keys treatment.
272 def next_possible_simple_key(self):
273 # Return the number of the nearest possible simple key. Actually we
274 # don't need to loop through the whole dictionary. We may replace it
275 # with the following code:
276 # if not self.possible_simple_keys:
277 # return None
278 # return self.possible_simple_keys[
279 # min(self.possible_simple_keys.keys())].token_number
280 min_token_number = None
281 for level in self.possible_simple_keys:
282 key = self.possible_simple_keys[level]
283 if min_token_number is None or key.token_number < min_token_number:
284 min_token_number = key.token_number
285 return min_token_number
287 def stale_possible_simple_keys(self):
288 # Remove entries that are no longer possible simple keys. According to
289 # the YAML specification, simple keys
290 # - should be limited to a single line,
291 # - should be no longer than 1024 characters.
292 # Disabling this procedure will allow simple keys of any length and
293 # height (may cause problems if indentation is broken though).
294 for level in self.possible_simple_keys.keys():
295 key = self.possible_simple_keys[level]
296 if key.line != self.reader.line \
297 or self.reader.index-key.index > 1024:
298 if key.required:
299 raise ScannerError("while scanning a simple key", key.mark,
300 "could not found expected ':'", self.reader.get_mark())
301 del self.possible_simple_keys[level]
303 def save_possible_simple_key(self):
304 # The next token may start a simple key. We check if it's possible
305 # and save its position. This function is called for
306 # ALIAS, ANCHOR, TAG, SCALAR(flow), '[', and '{'.
308 # Check if a simple key is required at the current position.
309 required = not self.flow_level and self.indent == self.reader.column
311 # A simple key is required only if it is the first token in the current
312 # line. Therefore it is always allowed.
313 assert self.allow_simple_key or not required
315 # The next token might be a simple key. Let's save it's number and
316 # position.
317 if self.allow_simple_key:
318 self.remove_possible_simple_key()
319 token_number = self.tokens_taken+len(self.tokens)
320 index = self.reader.index
321 line = self.reader.line
322 column = self.reader.column
323 mark = self.reader.get_mark()
324 key = SimpleKey(token_number, required,
325 index, line, column, mark)
326 self.possible_simple_keys[self.flow_level] = key
328 def remove_possible_simple_key(self):
329 # Remove the saved possible key position at the current flow level.
330 if self.flow_level in self.possible_simple_keys:
331 key = self.possible_simple_keys[self.flow_level]
333 # I don't think it's possible, but I could be wrong.
334 assert not key.required
335 #if key.required:
336 # raise ScannerError("while scanning a simple key", key.mark,
337 # "could not found expected ':'", self.reader.get_mark())
339 # Indentation functions.
341 def unwind_indent(self, column):
343 ## In flow context, tokens should respect indentation.
344 ## Actually the condition should be `self.indent >= column` according to
345 ## the spec. But this condition will prohibit intuitively correct
346 ## constructions such as
347 ## key : {
348 ## }
349 #if self.flow_level and self.indent > column:
350 # raise ScannerError(None, None,
351 # "invalid intendation or unclosed '[' or '{'",
352 # self.reader.get_mark())
354 # In the flow context, indentation is ignored. We make the scanner less
355 # restrictive then specification requires.
356 if self.flow_level:
357 return
359 # In block context, we may need to issue the BLOCK-END tokens.
360 while self.indent > column:
361 mark = self.reader.get_mark()
362 self.indent = self.indents.pop()
363 self.tokens.append(BlockEndToken(mark, mark))
365 def add_indent(self, column):
366 # Check if we need to increase indentation.
367 if self.indent < column:
368 self.indents.append(self.indent)
369 self.indent = column
370 return True
371 return False
373 # Fetchers.
375 def fetch_stream_start(self):
376 # We always add STREAM-START as the first token and STREAM-END as the
377 # last token.
379 # Read the token.
380 mark = self.reader.get_mark()
382 # Add STREAM-START.
383 self.tokens.append(StreamStartToken(mark, mark,
384 encoding=self.reader.encoding))
387 def fetch_stream_end(self):
389 # Set the current intendation to -1.
390 self.unwind_indent(-1)
392 # Reset everything (not really needed).
393 self.allow_simple_key = False
394 self.possible_simple_keys = {}
396 # Read the token.
397 mark = self.reader.get_mark()
399 # Add STREAM-END.
400 self.tokens.append(StreamEndToken(mark, mark))
402 # The reader is ended.
403 self.done = True
405 def fetch_directive(self):
407 # Set the current intendation to -1.
408 self.unwind_indent(-1)
410 # Reset simple keys.
411 self.remove_possible_simple_key()
412 self.allow_simple_key = False
414 # Scan and add DIRECTIVE.
415 self.tokens.append(self.scan_directive())
417 def fetch_document_start(self):
418 self.fetch_document_indicator(DocumentStartToken)
420 def fetch_document_end(self):
421 self.fetch_document_indicator(DocumentEndToken)
423 def fetch_document_indicator(self, TokenClass):
425 # Set the current intendation to -1.
426 self.unwind_indent(-1)
428 # Reset simple keys. Note that there could not be a block collection
429 # after '---'.
430 self.remove_possible_simple_key()
431 self.allow_simple_key = False
433 # Add DOCUMENT-START or DOCUMENT-END.
434 start_mark = self.reader.get_mark()
435 self.reader.forward(3)
436 end_mark = self.reader.get_mark()
437 self.tokens.append(TokenClass(start_mark, end_mark))
439 def fetch_flow_sequence_start(self):
440 self.fetch_flow_collection_start(FlowSequenceStartToken)
442 def fetch_flow_mapping_start(self):
443 self.fetch_flow_collection_start(FlowMappingStartToken)
445 def fetch_flow_collection_start(self, TokenClass):
447 # '[' and '{' may start a simple key.
448 self.save_possible_simple_key()
450 # Increase the flow level.
451 self.flow_level += 1
453 # Simple keys are allowed after '[' and '{'.
454 self.allow_simple_key = True
456 # Add FLOW-SEQUENCE-START or FLOW-MAPPING-START.
457 start_mark = self.reader.get_mark()
458 self.reader.forward()
459 end_mark = self.reader.get_mark()
460 self.tokens.append(TokenClass(start_mark, end_mark))
462 def fetch_flow_sequence_end(self):
463 self.fetch_flow_collection_end(FlowSequenceEndToken)
465 def fetch_flow_mapping_end(self):
466 self.fetch_flow_collection_end(FlowMappingEndToken)
468 def fetch_flow_collection_end(self, TokenClass):
470 # Reset possible simple key on the current level.
471 self.remove_possible_simple_key()
473 # Decrease the flow level.
474 self.flow_level -= 1
476 # No simple keys after ']' or '}'.
477 self.allow_simple_key = False
479 # Add FLOW-SEQUENCE-END or FLOW-MAPPING-END.
480 start_mark = self.reader.get_mark()
481 self.reader.forward()
482 end_mark = self.reader.get_mark()
483 self.tokens.append(TokenClass(start_mark, end_mark))
485 def fetch_flow_entry(self):
487 # Simple keys are allowed after ','.
488 self.allow_simple_key = True
490 # Reset possible simple key on the current level.
491 self.remove_possible_simple_key()
493 # Add FLOW-ENTRY.
494 start_mark = self.reader.get_mark()
495 self.reader.forward()
496 end_mark = self.reader.get_mark()
497 self.tokens.append(FlowEntryToken(start_mark, end_mark))
499 def fetch_block_entry(self):
501 # Block context needs additional checks.
502 if not self.flow_level:
504 # Are we allowed to start a new entry?
505 if not self.allow_simple_key:
506 raise ScannerError(None, None,
507 "sequence entries are not allowed here",
508 self.reader.get_mark())
510 # We may need to add BLOCK-SEQUENCE-START.
511 if self.add_indent(self.reader.column):
512 mark = self.reader.get_mark()
513 self.tokens.append(BlockSequenceStartToken(mark, mark))
515 # It's an error for the block entry to occur in the flow context,
516 # but we let the parser detect this.
517 else:
518 pass
520 # Simple keys are allowed after '-'.
521 self.allow_simple_key = True
523 # Reset possible simple key on the current level.
524 self.remove_possible_simple_key()
526 # Add BLOCK-ENTRY.
527 start_mark = self.reader.get_mark()
528 self.reader.forward()
529 end_mark = self.reader.get_mark()
530 self.tokens.append(BlockEntryToken(start_mark, end_mark))
532 def fetch_key(self):
534 # Block context needs additional checks.
535 if not self.flow_level:
537 # Are we allowed to start a key (not nessesary a simple)?
538 if not self.allow_simple_key:
539 raise ScannerError(None, None,
540 "mapping keys are not allowed here",
541 self.reader.get_mark())
543 # We may need to add BLOCK-MAPPING-START.
544 if self.add_indent(self.reader.column):
545 mark = self.reader.get_mark()
546 self.tokens.append(BlockMappingStartToken(mark, mark))
548 # Simple keys are allowed after '?' in the block context.
549 self.allow_simple_key = not self.flow_level
551 # Reset possible simple key on the current level.
552 self.remove_possible_simple_key()
554 # Add KEY.
555 start_mark = self.reader.get_mark()
556 self.reader.forward()
557 end_mark = self.reader.get_mark()
558 self.tokens.append(KeyToken(start_mark, end_mark))
560 def fetch_value(self):
562 # Do we determine a simple key?
563 if self.flow_level in self.possible_simple_keys:
565 # Add KEY.
566 key = self.possible_simple_keys[self.flow_level]
567 del self.possible_simple_keys[self.flow_level]
568 self.tokens.insert(key.token_number-self.tokens_taken,
569 KeyToken(key.mark, key.mark))
571 # If this key starts a new block mapping, we need to add
572 # BLOCK-MAPPING-START.
573 if not self.flow_level:
574 if self.add_indent(key.column):
575 self.tokens.insert(key.token_number-self.tokens_taken,
576 BlockMappingStartToken(key.mark, key.mark))
578 # There cannot be two simple keys one after another.
579 self.allow_simple_key = False
581 # It must be a part of a complex key.
582 else:
584 # Block context needs additional checks.
585 # (Do we really need them? They will be catched by the parser
586 # anyway.)
587 if not self.flow_level:
589 # We are allowed to start a complex value if and only if
590 # we can start a simple key.
591 if not self.allow_simple_key:
592 raise ScannerError(None, None,
593 "mapping values are not allowed here",
594 self.reader.get_mark())
596 # Simple keys are allowed after ':' in the block context.
597 self.allow_simple_key = not self.flow_level
599 # Reset possible simple key on the current level.
600 self.remove_possible_simple_key()
602 # Add VALUE.
603 start_mark = self.reader.get_mark()
604 self.reader.forward()
605 end_mark = self.reader.get_mark()
606 self.tokens.append(ValueToken(start_mark, end_mark))
608 def fetch_alias(self):
610 # ALIAS could be a simple key.
611 self.save_possible_simple_key()
613 # No simple keys after ALIAS.
614 self.allow_simple_key = False
616 # Scan and add ALIAS.
617 self.tokens.append(self.scan_anchor(AliasToken))
619 def fetch_anchor(self):
621 # ANCHOR could start a simple key.
622 self.save_possible_simple_key()
624 # No simple keys after ANCHOR.
625 self.allow_simple_key = False
627 # Scan and add ANCHOR.
628 self.tokens.append(self.scan_anchor(AnchorToken))
630 def fetch_tag(self):
632 # TAG could start a simple key.
633 self.save_possible_simple_key()
635 # No simple keys after TAG.
636 self.allow_simple_key = False
638 # Scan and add TAG.
639 self.tokens.append(self.scan_tag())
641 def fetch_literal(self):
642 self.fetch_block_scalar(style='|')
644 def fetch_folded(self):
645 self.fetch_block_scalar(style='>')
647 def fetch_block_scalar(self, style):
649 # A simple key may follow a block scalar.
650 self.allow_simple_key = True
652 # Reset possible simple key on the current level.
653 self.remove_possible_simple_key()
655 # Scan and add SCALAR.
656 self.tokens.append(self.scan_block_scalar(style))
658 def fetch_single(self):
659 self.fetch_flow_scalar(style='\'')
661 def fetch_double(self):
662 self.fetch_flow_scalar(style='"')
664 def fetch_flow_scalar(self, style):
666 # A flow scalar could be a simple key.
667 self.save_possible_simple_key()
669 # No simple keys after flow scalars.
670 self.allow_simple_key = False
672 # Scan and add SCALAR.
673 self.tokens.append(self.scan_flow_scalar(style))
675 def fetch_plain(self):
677 # A plain scalar could be a simple key.
678 self.save_possible_simple_key()
680 # No simple keys after plain scalars. But note that `scan_plain` will
681 # change this flag if the scan is finished at the beginning of the
682 # line.
683 self.allow_simple_key = False
685 # Scan and add SCALAR. May change `allow_simple_key`.
686 self.tokens.append(self.scan_plain())
688 # Checkers.
690 def check_directive(self):
692 # DIRECTIVE: ^ '%' ...
693 # The '%' indicator is already checked.
694 if self.reader.column == 0:
695 return True
697 def check_document_start(self):
699 # DOCUMENT-START: ^ '---' (' '|'\n')
700 if self.reader.column == 0:
701 if self.reader.prefix(3) == u'---' \
702 and self.reader.peek(3) in u'\0 \t\r\n\x85\u2028\u2029':
703 return True
705 def check_document_end(self):
707 # DOCUMENT-END: ^ '...' (' '|'\n')
708 if self.reader.column == 0:
709 prefix = self.reader.peek(4)
710 if self.reader.prefix(3) == u'...' \
711 and self.reader.peek(3) in u'\0 \t\r\n\x85\u2028\u2029':
712 return True
714 def check_block_entry(self):
716 # BLOCK-ENTRY: '-' (' '|'\n')
717 return self.reader.peek(1) in u'\0 \t\r\n\x85\u2028\u2029'
719 def check_key(self):
721 # KEY(flow context): '?'
722 if self.flow_level:
723 return True
725 # KEY(block context): '?' (' '|'\n')
726 else:
727 return self.reader.peek(1) in u'\0 \t\r\n\x85\u2028\u2029'
729 def check_value(self):
731 # VALUE(flow context): ':'
732 if self.flow_level:
733 return True
735 # VALUE(block context): ':' (' '|'\n')
736 else:
737 return self.reader.peek(1) in u'\0 \t\r\n\x85\u2028\u2029'
739 def check_plain(self):
741 # A plain scalar may start with any non-space character except:
742 # '-', '?', ':', ',', '[', ']', '{', '}',
743 # '#', '&', '*', '!', '|', '>', '\'', '\"',
744 # '%', '@', '`'.
746 # It may also start with
747 # '-', '?', ':'
748 # if it is followed by a non-space character.
750 # Note that we limit the last rule to the block context (except the
751 # '-' character) because we want the flow context to be space
752 # independent.
753 ch = self.reader.peek()
754 return ch not in u'\0 \t\r\n\x85\u2028\u2029-?:,[]{}#&*!|>\'\"%@`' \
755 or (self.reader.peek(1) not in u'\0 \t\r\n\x85\u2028\u2029'
756 and (ch == u'-' or (not self.flow_level and ch in u'?:')))
758 # Scanners.
760 def scan_to_next_token(self):
761 # We ignore spaces, line breaks and comments.
762 # If we find a line break in the block context, we set the flag
763 # `allow_simple_key` on.
764 # The byte order mark is stripped if it's the first character in the
765 # stream. We do not yet support BOM inside the stream as the
766 # specification requires. Any such mark will be considered as a part
767 # of the document.
769 # TODO: We need to make tab handling rules more sane. A good rule is
770 # Tabs cannot precede tokens
771 # BLOCK-SEQUENCE-START, BLOCK-MAPPING-START, BLOCK-END,
772 # KEY(block), VALUE(block), BLOCK-ENTRY
773 # So the checking code is
774 # if <TAB>:
775 # self.allow_simple_keys = False
776 # We also need to add the check for `allow_simple_keys == True` to
777 # `unwind_indent` before issuing BLOCK-END.
778 # Scanners for block, flow, and plain scalars need to be modified.
780 if self.reader.index == 0 and self.reader.peek() == u'\uFEFF':
781 self.reader.forward()
782 found = False
783 while not found:
784 while self.reader.peek() == u' ':
785 self.reader.forward()
786 if self.reader.peek() == u'#':
787 while self.reader.peek() not in u'\0\r\n\x85\u2028\u2029':
788 self.reader.forward()
789 if self.scan_line_break():
790 if not self.flow_level:
791 self.allow_simple_key = True
792 else:
793 found = True
795 def scan_directive(self):
796 # See the specification for details.
797 start_mark = self.reader.get_mark()
798 self.reader.forward()
799 name = self.scan_directive_name(start_mark)
800 value = None
801 if name == u'YAML':
802 value = self.scan_yaml_directive_value(start_mark)
803 end_mark = self.reader.get_mark()
804 elif name == u'TAG':
805 value = self.scan_tag_directive_value(start_mark)
806 end_mark = self.reader.get_mark()
807 else:
808 end_mark = self.reader.get_mark()
809 while self.reader.peek() not in u'\0\r\n\x85\u2028\u2029':
810 self.reader.forward()
811 self.scan_directive_ignored_line(start_mark)
812 return DirectiveToken(name, value, start_mark, end_mark)
814 def scan_directive_name(self, start_mark):
815 # See the specification for details.
816 length = 0
817 ch = self.reader.peek(length)
818 while u'0' <= ch <= u'9' or u'A' <= ch <= 'Z' or u'a' <= ch <= 'z' \
819 or ch in u'-_':
820 length += 1
821 ch = self.reader.peek(length)
822 if not length:
823 raise ScannerError("while scanning a directive", start_mark,
824 "expected alphabetic or numeric character, but found %r"
825 % ch.encode('utf-8'), self.reader.get_mark())
826 value = self.reader.prefix(length)
827 self.reader.forward(length)
828 ch = self.reader.peek()
829 if ch not in u'\0 \r\n\x85\u2028\u2029':
830 raise ScannerError("while scanning a directive", start_mark,
831 "expected alphabetic or numeric character, but found %r"
832 % ch.encode('utf-8'), self.reader.get_mark())
833 return value
835 def scan_yaml_directive_value(self, start_mark):
836 # See the specification for details.
837 while self.reader.peek() == u' ':
838 self.reader.forward()
839 major = self.scan_yaml_directive_number(start_mark)
840 if self.reader.peek() != '.':
841 raise ScannerError("while scanning a directive", start_mark,
842 "expected a digit or '.', but found %r"
843 % self.reader.peek().encode('utf-8'),
844 self.reader.get_mark())
845 self.reader.forward()
846 minor = self.scan_yaml_directive_number(start_mark)
847 if self.reader.peek() not in u'\0 \r\n\x85\u2028\u2029':
848 raise ScannerError("while scanning a directive", start_mark,
849 "expected a digit or ' ', but found %r"
850 % self.reader.peek().encode('utf-8'),
851 self.reader.get_mark())
852 return (major, minor)
854 def scan_yaml_directive_number(self, start_mark):
855 # See the specification for details.
856 ch = self.reader.peek()
857 if not (u'0' <= ch <= '9'):
858 raise ScannerError("while scanning a directive", start_mark,
859 "expected a digit, but found %r" % ch.encode('utf-8'),
860 self.reader.get_mark())
861 length = 0
862 while u'0' <= self.reader.peek(length) <= u'9':
863 length += 1
864 value = int(self.reader.prefix(length))
865 self.reader.forward(length)
866 return value
868 def scan_tag_directive_value(self, start_mark):
869 # See the specification for details.
870 while self.reader.peek() == u' ':
871 self.reader.forward()
872 handle = self.scan_tag_directive_handle(start_mark)
873 while self.reader.peek() == u' ':
874 self.reader.forward()
875 prefix = self.scan_tag_directive_prefix(start_mark)
876 return (handle, prefix)
878 def scan_tag_directive_handle(self, start_mark):
879 # See the specification for details.
880 value = self.scan_tag_handle('directive', start_mark)
881 ch = self.reader.peek()
882 if ch != u' ':
883 raise ScannerError("while scanning a directive", start_mark,
884 "expected ' ', but found %r" % ch.encode('utf-8'),
885 self.reader.get_mark())
886 return value
888 def scan_tag_directive_prefix(self, start_mark):
889 # See the specification for details.
890 value = self.scan_tag_uri('directive', start_mark)
891 ch = self.reader.peek()
892 if ch not in u'\0 \r\n\x85\u2028\u2029':
893 raise ScannerError("while scanning a directive", start_mark,
894 "expected ' ', but found %r" % ch.encode('utf-8'),
895 self.reader.get_mark())
896 return value
898 def scan_directive_ignored_line(self, start_mark):
899 # See the specification for details.
900 while self.reader.peek() == u' ':
901 self.reader.forward()
902 if self.reader.peek() == u'#':
903 while self.reader.peek() not in u'\0\r\n\x85\u2028\u2029':
904 self.reader.forward()
905 ch = self.reader.peek()
906 if ch not in u'\0\r\n\x85\u2028\u2029':
907 raise ScannerError("while scanning a directive", start_mark,
908 "expected a comment or a line break, but found %r"
909 % ch.encode('utf-8'), self.reader.get_mark())
910 self.scan_line_break()
912 def scan_anchor(self, TokenClass):
913 # The specification does not restrict characters for anchors and
914 # aliases. This may lead to problems, for instance, the document:
915 # [ *alias, value ]
916 # can be interpteted in two ways, as
917 # [ "value" ]
918 # and
919 # [ *alias , "value" ]
920 # Therefore we restrict aliases to numbers and ASCII letters.
921 start_mark = self.reader.get_mark()
922 indicator = self.reader.peek()
923 if indicator == '*':
924 name = 'alias'
925 else:
926 name = 'anchor'
927 self.reader.forward()
928 length = 0
929 ch = self.reader.peek(length)
930 while u'0' <= ch <= u'9' or u'A' <= ch <= 'Z' or u'a' <= ch <= 'z' \
931 or ch in u'-_':
932 length += 1
933 ch = self.reader.peek(length)
934 if not length:
935 raise ScannerError("while scanning an %s" % name, start_mark,
936 "expected alphabetic or numeric character, but found %r"
937 % ch.encode('utf-8'), self.reader.get_mark())
938 value = self.reader.prefix(length)
939 self.reader.forward(length)
940 ch = self.reader.peek()
941 if ch not in u'\0 \t\r\n\x85\u2028\u2029?:,]}%@`':
942 raise ScannerError("while scanning an %s" % name, start_mark,
943 "expected alphabetic or numeric character, but found %r"
944 % ch.encode('utf-8'), self.reader.get_mark())
945 end_mark = self.reader.get_mark()
946 return TokenClass(value, start_mark, end_mark)
948 def scan_tag(self):
949 # See the specification for details.
950 start_mark = self.reader.get_mark()
951 ch = self.reader.peek(1)
952 if ch == u'<':
953 handle = None
954 self.reader.forward(2)
955 suffix = self.scan_tag_uri('tag', start_mark)
956 if self.reader.peek() != u'>':
957 raise ScannerError("while parsing a tag", start_mark,
958 "expected '>', but found %r" % self.reader.peek().encode('utf-8'),
959 self.reader.get_mark())
960 self.reader.forward()
961 elif ch in u'\0 \t\r\n\x85\u2028\u2029':
962 handle = None
963 suffix = u'!'
964 self.reader.forward()
965 else:
966 length = 1
967 use_handle = False
968 while ch not in u'\0 \r\n\x85\u2028\u2029':
969 if ch == u'!':
970 use_handle = True
971 break
972 length += 1
973 ch = self.reader.peek(length)
974 handle = u'!'
975 if use_handle:
976 handle = self.scan_tag_handle('tag', start_mark)
977 else:
978 handle = u'!'
979 self.reader.forward()
980 suffix = self.scan_tag_uri('tag', start_mark)
981 ch = self.reader.peek()
982 if ch not in u'\0 \r\n\x85\u2028\u2029':
983 raise ScannerError("while scanning a tag", start_mark,
984 "expected ' ', but found %r" % ch.encode('utf-8'),
985 self.reader.get_mark())
986 value = (handle, suffix)
987 end_mark = self.reader.get_mark()
988 return TagToken(value, start_mark, end_mark)
990 def scan_block_scalar(self, style):
991 # See the specification for details.
993 if style == '>':
994 folded = True
995 else:
996 folded = False
998 chunks = []
999 start_mark = self.reader.get_mark()
1001 # Scan the header.
1002 self.reader.forward()
1003 chomping, increment = self.scan_block_scalar_indicators(start_mark)
1004 self.scan_block_scalar_ignored_line(start_mark)
1006 # Determine the indentation level and go to the first non-empty line.
1007 min_indent = self.indent+1
1008 if min_indent < 1:
1009 min_indent = 1
1010 if increment is None:
1011 breaks, max_indent, end_mark = self.scan_block_scalar_indentation()
1012 indent = max(min_indent, max_indent)
1013 else:
1014 indent = min_indent+increment-1
1015 breaks, end_mark = self.scan_block_scalar_breaks(indent)
1016 line_break = u''
1018 # Scan the inner part of the block scalar.
1019 while self.reader.column == indent and self.reader.peek() != u'\0':
1020 chunks.extend(breaks)
1021 leading_non_space = self.reader.peek() not in u' \t'
1022 length = 0
1023 while self.reader.peek(length) not in u'\0\r\n\x85\u2028\u2029':
1024 length += 1
1025 chunks.append(self.reader.prefix(length))
1026 self.reader.forward(length)
1027 line_break = self.scan_line_break()
1028 breaks, end_mark = self.scan_block_scalar_breaks(indent)
1029 if self.reader.column == indent and self.reader.peek() != u'\0':
1031 # Unfortunately, folding rules are ambiguous.
1033 # This is the folding according to the specification:
1035 if folded and line_break == u'\n' \
1036 and leading_non_space and self.reader.peek() not in u' \t':
1037 if not breaks:
1038 chunks.append(u' ')
1039 else:
1040 chunks.append(line_break)
1042 # This is Clark Evans's interpretation (also in the spec
1043 # examples):
1045 #if folded and line_break == u'\n':
1046 # if not breaks:
1047 # if self.reader.peek() not in ' \t':
1048 # chunks.append(u' ')
1049 # else:
1050 # chunks.append(line_break)
1051 #else:
1052 # chunks.append(line_break)
1053 else:
1054 break
1056 # Chomp the tail.
1057 if chomping is not False:
1058 chunks.append(line_break)
1059 if chomping is True:
1060 chunks.extend(breaks)
1062 # We are done.
1063 return ScalarToken(u''.join(chunks), False, start_mark, end_mark,
1064 style)
1066 def scan_block_scalar_indicators(self, start_mark):
1067 # See the specification for details.
1068 chomping = None
1069 increment = None
1070 ch = self.reader.peek()
1071 if ch in u'+-':
1072 if ch == '+':
1073 chomping = True
1074 else:
1075 chomping = False
1076 self.reader.forward()
1077 ch = self.reader.peek()
1078 if ch in u'0123456789':
1079 increment = int(ch)
1080 if increment == 0:
1081 raise ScannerError("while scanning a block scalar", start_mark,
1082 "expected indentation indicator in the range 1-9, but found 0",
1083 self.reader.get_mark())
1084 self.reader.forward()
1085 elif ch in u'0123456789':
1086 increment = int(ch)
1087 if increment == 0:
1088 raise ScannerError("while scanning a block scalar", start_mark,
1089 "expected indentation indicator in the range 1-9, but found 0",
1090 self.reader.get_mark())
1091 self.reader.forward()
1092 ch = self.reader.peek()
1093 if ch in u'+-':
1094 if ch == '+':
1095 chomping = True
1096 else:
1097 chomping = False
1098 self.reader.forward()
1099 ch = self.reader.peek()
1100 if ch not in u'\0 \r\n\x85\u2028\u2029':
1101 raise ScannerError("while scanning a block scalar", start_mark,
1102 "expected chomping or indentation indicators, but found %r"
1103 % ch.encode('utf-8'), self.reader.get_mark())
1104 return chomping, increment
1106 def scan_block_scalar_ignored_line(self, start_mark):
1107 # See the specification for details.
1108 while self.reader.peek() == u' ':
1109 self.reader.forward()
1110 if self.reader.peek() == u'#':
1111 while self.reader.peek() not in u'\0\r\n\x85\u2028\u2029':
1112 self.reader.forward()
1113 ch = self.reader.peek()
1114 if ch not in u'\0\r\n\x85\u2028\u2029':
1115 raise ScannerError("while scanning a block scalar", start_mark,
1116 "expected a comment or a line break, but found %r"
1117 % ch.encode('utf-8'), self.reader.get_mark())
1118 self.scan_line_break()
1120 def scan_block_scalar_indentation(self):
1121 # See the specification for details.
1122 chunks = []
1123 max_indent = 0
1124 end_mark = self.reader.get_mark()
1125 while self.reader.peek() in u' \r\n\x85\u2028\u2029':
1126 if self.reader.peek() != u' ':
1127 chunks.append(self.scan_line_break())
1128 end_mark = self.reader.get_mark()
1129 else:
1130 self.reader.forward()
1131 if self.reader.column > max_indent:
1132 max_indent = self.reader.column
1133 return chunks, max_indent, end_mark
1135 def scan_block_scalar_breaks(self, indent):
1136 # See the specification for details.
1137 chunks = []
1138 end_mark = self.reader.get_mark()
1139 while self.reader.column < indent and self.reader.peek() == u' ':
1140 self.reader.forward()
1141 while self.reader.peek() in u'\r\n\x85\u2028\u2029':
1142 chunks.append(self.scan_line_break())
1143 end_mark = self.reader.get_mark()
1144 while self.reader.column < indent and self.reader.peek() == u' ':
1145 self.reader.forward()
1146 return chunks, end_mark
1148 def scan_flow_scalar(self, style):
1149 # See the specification for details.
1150 # Note that we loose indentation rules for quoted scalars. Quoted
1151 # scalars don't need to adhere indentation because " and ' clearly
1152 # mark the beginning and the end of them. Therefore we are less
1153 # restrictive then the specification requires. We only need to check
1154 # that document separators are not included in scalars.
1155 if style == '"':
1156 double = True
1157 else:
1158 double = False
1159 chunks = []
1160 start_mark = self.reader.get_mark()
1161 quote = self.reader.peek()
1162 self.reader.forward()
1163 chunks.extend(self.scan_flow_scalar_non_spaces(double, start_mark))
1164 while self.reader.peek() != quote:
1165 chunks.extend(self.scan_flow_scalar_spaces(double, start_mark))
1166 chunks.extend(self.scan_flow_scalar_non_spaces(double, start_mark))
1167 self.reader.forward()
1168 end_mark = self.reader.get_mark()
1169 return ScalarToken(u''.join(chunks), False, start_mark, end_mark,
1170 style)
1172 ESCAPE_REPLACEMENTS = {
1173 u'0': u'\0',
1174 u'a': u'\x07',
1175 u'b': u'\x08',
1176 u't': u'\x09',
1177 u'\t': u'\x09',
1178 u'n': u'\x0A',
1179 u'v': u'\x0B',
1180 u'f': u'\x0C',
1181 u'r': u'\x0D',
1182 u'e': u'\x1B',
1183 u' ': u'\x20',
1184 u'\"': u'\"',
1185 u'\\': u'\\',
1186 u'N': u'\x85',
1187 u'_': u'\xA0',
1188 u'L': u'\u2028',
1189 u'P': u'\u2029',
1192 ESCAPE_CODES = {
1193 u'x': 2,
1194 u'u': 4,
1195 u'U': 8,
1198 def scan_flow_scalar_non_spaces(self, double, start_mark):
1199 # See the specification for details.
1200 chunks = []
1201 while True:
1202 length = 0
1203 while self.reader.peek(length) not in u'\'\"\\\0 \t\r\n\x85\u2028\u2029':
1204 length += 1
1205 if length:
1206 chunks.append(self.reader.prefix(length))
1207 self.reader.forward(length)
1208 ch = self.reader.peek()
1209 if not double and ch == u'\'' and self.reader.peek(1) == u'\'':
1210 chunks.append(u'\'')
1211 self.reader.forward(2)
1212 elif (double and ch == u'\'') or (not double and ch in u'\"\\'):
1213 chunks.append(ch)
1214 self.reader.forward()
1215 elif double and ch == u'\\':
1216 self.reader.forward()
1217 ch = self.reader.peek()
1218 if ch in self.ESCAPE_REPLACEMENTS:
1219 chunks.append(self.ESCAPE_REPLACEMENTS[ch])
1220 self.reader.forward()
1221 elif ch in self.ESCAPE_CODES:
1222 length = self.ESCAPE_CODES[ch]
1223 self.reader.forward()
1224 for k in range(length):
1225 if self.reader.peek(k) not in u'0123456789ABCDEFabcdef':
1226 raise ScannerError("while scanning a double-quoted scalar", start_mark,
1227 "expected escape sequence of %d hexdecimal numbers, but found %r" %
1228 (length, self.reader.peek(k).encode('utf-8')), self.reader.get_mark())
1229 code = int(self.reader.prefix(length), 16)
1230 chunks.append(unichr(code))
1231 self.reader.forward(length)
1232 elif ch in u'\r\n\x85\u2028\u2029':
1233 self.scan_line_break()
1234 chunks.extend(self.scan_flow_scalar_breaks(double, start_mark))
1235 else:
1236 raise ScannerError("while scanning a double-quoted scalar", start_mark,
1237 "found unknown escape character %r" % ch.encode('utf-8'), self.reader.get_mark())
1238 else:
1239 return chunks
1241 def scan_flow_scalar_spaces(self, double, start_mark):
1242 # See the specification for details.
1243 chunks = []
1244 length = 0
1245 while self.reader.peek(length) in u' \t':
1246 length += 1
1247 whitespaces = self.reader.prefix(length)
1248 self.reader.forward(length)
1249 ch = self.reader.peek()
1250 if ch == u'\0':
1251 raise ScannerError("while scanning a quoted scalar", start_mark,
1252 "found unexpected end of stream", self.reader.get_mark())
1253 elif ch in u'\r\n\x85\u2028\u2029':
1254 line_break = self.scan_line_break()
1255 breaks = self.scan_flow_scalar_breaks(double, start_mark)
1256 if line_break != u'\n':
1257 chunks.append(line_break)
1258 elif not breaks:
1259 chunks.append(u' ')
1260 chunks.extend(breaks)
1261 else:
1262 chunks.append(whitespaces)
1263 return chunks
1265 def scan_flow_scalar_breaks(self, double, start_mark):
1266 # See the specification for details.
1267 chunks = []
1268 while True:
1269 # Instead of checking indentation, we check for document
1270 # separators.
1271 prefix = self.reader.prefix(3)
1272 if (prefix == u'---' or prefix == u'...') \
1273 and self.reader.peek(3) in u'\0 \t\r\n\x85\u2028\u2029':
1274 raise ScannerError("while scanning a quoted scalar", start_mark,
1275 "found unexpected document separator", self.reader.get_mark())
1276 while self.reader.peek() in u' \t':
1277 self.reader.forward()
1278 if self.reader.peek() in u'\r\n\x85\u2028\u2029':
1279 chunks.append(self.scan_line_break())
1280 else:
1281 return chunks
1283 def scan_plain(self):
1284 # See the specification for details.
1285 # We add an additional restriction for the flow context:
1286 # plain scalars in the flow context cannot contain ',', ':' and '?'.
1287 # We also keep track of the `allow_simple_key` flag here.
1288 # Indentation rules are loosed for the flow context.
1289 chunks = []
1290 start_mark = self.reader.get_mark()
1291 end_mark = start_mark
1292 indent = self.indent+1
1293 # We allow zero indentation for scalars, but then we need to check for
1294 # document separators at the beginning of the line.
1295 #if indent == 0:
1296 # indent = 1
1297 spaces = []
1298 while True:
1299 length = 0
1300 if self.reader.peek() == u'#':
1301 break
1302 while True:
1303 ch = self.reader.peek(length)
1304 if ch in u'\0 \t\r\n\x85\u2028\u2029' \
1305 or (not self.flow_level and ch == u':' and
1306 self.reader.peek(length+1) in u'\0 \t\r\n\x28\u2028\u2029') \
1307 or (self.flow_level and ch in u',:?[]{}'):
1308 break
1309 length += 1
1310 if length == 0:
1311 break
1312 self.allow_simple_key = False
1313 chunks.extend(spaces)
1314 chunks.append(self.reader.prefix(length))
1315 self.reader.forward(length)
1316 end_mark = self.reader.get_mark()
1317 spaces = self.scan_plain_spaces(indent, start_mark)
1318 if not spaces or self.reader.peek() == u'#' \
1319 or (not self.flow_level and self.reader.column < indent):
1320 break
1321 return ScalarToken(u''.join(chunks), True, start_mark, end_mark)
1323 def scan_plain_spaces(self, indent, start_mark):
1324 # See the specification for details.
1325 # The specification is really confusing about tabs in plain scalars.
1326 # We just forbid them completely. Do not use tabs in YAML!
1327 chunks = []
1328 length = 0
1329 while self.reader.peek(length) in u' ':
1330 length += 1
1331 whitespaces = self.reader.prefix(length)
1332 self.reader.forward(length)
1333 ch = self.reader.peek()
1334 if ch in u'\r\n\x85\u2028\u2029':
1335 line_break = self.scan_line_break()
1336 self.allow_simple_key = True
1337 prefix = self.reader.prefix(3)
1338 if (prefix == u'---' or prefix == u'...') \
1339 and self.reader.peek(3) in u'\0 \t\r\n\x85\u2028\u2029':
1340 return
1341 breaks = []
1342 while self.reader.peek() in u' \r\n\x85\u2028\u2029':
1343 if self.reader.peek() == ' ':
1344 self.reader.forward()
1345 else:
1346 breaks.append(self.scan_line_break())
1347 prefix = self.reader.prefix(3)
1348 if (prefix == u'---' or prefix == u'...') \
1349 and self.reader.peek(3) in u'\0 \t\r\n\x85\u2028\u2029':
1350 return
1351 if line_break != u'\n':
1352 chunks.append(line_break)
1353 elif not breaks:
1354 chunks.append(u' ')
1355 chunks.extend(breaks)
1356 elif whitespaces:
1357 chunks.append(whitespaces)
1358 return chunks
1360 def scan_tag_handle(self, name, start_mark):
1361 # See the specification for details.
1362 # For some strange reasons, the specification does not allow '_' in
1363 # tag handles. I have allowed it anyway.
1364 ch = self.reader.peek()
1365 if ch != u'!':
1366 raise ScannerError("while scanning a %s" % name, start_mark,
1367 "expected '!', but found %r" % ch.encode('utf-8'),
1368 self.reader.get_mark())
1369 length = 1
1370 ch = self.reader.peek(length)
1371 if ch != u' ':
1372 while u'0' <= ch <= u'9' or u'A' <= ch <= 'Z' or u'a' <= ch <= 'z' \
1373 or ch in u'-_':
1374 length += 1
1375 ch = self.reader.peek(length)
1376 if ch != u'!':
1377 self.reader.forward(length)
1378 raise ScannerError("while scanning a %s" % name, start_mark,
1379 "expected '!', but found %r" % ch.encode('utf-8'),
1380 self.reader.get_mark())
1381 length += 1
1382 value = self.reader.prefix(length)
1383 self.reader.forward(length)
1384 return value
1386 def scan_tag_uri(self, name, start_mark):
1387 # See the specification for details.
1388 # Note: we do not check if URI is well-formed.
1389 chunks = []
1390 length = 0
1391 ch = self.reader.peek(length)
1392 while u'0' <= ch <= u'9' or u'A' <= ch <= 'Z' or u'a' <= ch <= 'z' \
1393 or ch in u'-;/?:@&=+$,_.!~*\'()[]%':
1394 if ch == u'%':
1395 chunks.append(self.reader.prefix(length))
1396 self.reader.forward(length)
1397 length = 0
1398 chunks.append(self.scan_uri_escapes(name, start_mark))
1399 else:
1400 length += 1
1401 ch = self.reader.peek(length)
1402 if length:
1403 chunks.append(self.reader.prefix(length))
1404 self.reader.forward(length)
1405 length = 0
1406 if not chunks:
1407 raise ScannerError("while parsing a %s" % name, start_mark,
1408 "expected URI, but found %r" % ch.encode('utf-8'),
1409 self.reader.get_mark())
1410 return u''.join(chunks)
1412 def scan_uri_escapes(self, name, start_mark):
1413 # See the specification for details.
1414 bytes = []
1415 mark = self.reader.get_mark()
1416 while self.reader.peek() == u'%':
1417 self.reader.forward()
1418 for k in range(2):
1419 if self.reader.peek(k) not in u'0123456789ABCDEFabcdef':
1420 raise ScannerError("while scanning a %s" % name, start_mark,
1421 "expected URI escape sequence of 2 hexdecimal numbers, but found %r" %
1422 (self.reader.peek(k).encode('utf-8')), self.reader.get_mark())
1423 bytes.append(chr(int(self.reader.prefix(2), 16)))
1424 self.reader.forward(2)
1425 try:
1426 value = unicode(''.join(bytes), 'utf-8')
1427 except UnicodeDecodeError, exc:
1428 raise ScannerError("while scanning a %s" % name, start_mark, str(exc), mark)
1429 return value
1431 def scan_line_break(self):
1432 # Transforms:
1433 # '\r\n' : '\n'
1434 # '\r' : '\n'
1435 # '\n' : '\n'
1436 # '\x85' : '\n'
1437 # '\u2028' : '\u2028'
1438 # '\u2029 : '\u2029'
1439 # default : ''
1440 ch = self.reader.peek()
1441 if ch in u'\r\n\x85':
1442 if self.reader.prefix(2) == u'\r\n':
1443 self.reader.forward(2)
1444 else:
1445 self.reader.forward()
1446 return u'\n'
1447 elif ch in u'\u2028\u2029':
1448 self.reader.forward()
1449 return ch
1450 return u''
1452 #try:
1453 # import psyco
1454 # psyco.bind(Scanner)
1455 #except ImportError:
1456 # pass