Working on the scanner.
[pyyaml/python3.git] / lib / yaml / scanner.py
bloba1785efa9a59051d8cc71905f47b9a6502242093
2 # Tokens:
3 # YAML-DIRECTIVE(major_version, minor_version), TAG-DIRECTIVE(handle, prefix)
4 # RESERVED-DIRECTIVE(name)
5 # DOCUMENT-START, DOCUMENT-END
6 # BLOCK-SEQUENCE-START, BLOCK-MAPPING-START, BLOCK-END
7 # FLOW-SEQUENCE-START, FLOW-MAPPING-START, FLOW-SEQUENCE-END, FLOW-MAPPING-END
8 # ENTRY, KEY, VALUE
9 # ALIAS(name), ANCHOR(name), TAG(value), SCALAR(value, plain)
11 __all__ = ['Scanner', 'ScannerError']
13 from error import YAMLError
14 from tokens import *
16 class ScannerError(YAMLError):
17 # TODO:
18 # ScannerError: while reading a quoted string
19 # in '...', line 5, column 10:
20 # key: "valu\?e"
21 # ^
22 # got unknown quote character '?'
23 # in '...', line 5, column 15:
24 # key: "valu\?e"
25 # ^
26 def __init__(self, context=None, context_marker=None,
27 problem=None, problem_marker=None, description=None):
28 self.context = context
29 self.context_marker = context_marker
30 self.problem = problem
31 self.problem_marker = problem_marker
32 self.description = description
34 def __str__(self):
35 lines = []
36 for (place, marker) in [(self.context, self.context_marker),
37 (self.problem, self.problem_marker)]:
38 if place is not None:
39 lines.append(place)
40 if marker is not None:
41 lines.append(str(marker))
42 if self.description is not None:
43 lines.append(self.description)
44 return '\n'.join(lines)
46 class SimpleKey:
47 def __init__(self, token_number, required, index, line, column, marker):
48 self.token_number = token_number
49 self.required = required
50 self.index = index
51 self.line = line
52 self.column = column
53 self.marker = marker
55 class Scanner:
58 def __init__(self, reader):
59 """Initialize the scanner."""
60 # The input stream. The Reader class do the dirty work of checking for
61 # BOM and converting the input data to Unicode. It also adds NUL to
62 # the end.
64 # Reader supports the following methods
65 # self.reader.peek(k=1) # peek the next k characters
66 # self.reader.forward(k=1) # read the next k characters and move the
67 # # pointer
68 self.reader = reader
70 # Had we reached the end of the stream?
71 self.done = False
73 # The number of unclosed '{' and '['. `flow_level == 0` means block
74 # context.
75 self.flow_level = 0
77 # List of processed tokens that are not yet emitted.
78 self.tokens = []
80 # Number of tokens that were emitted through the `get_token` method.
81 self.tokens_taken = 0
83 # The current indentation level.
84 self.indent = -1
86 # Past indentation levels.
87 self.indents = []
89 # Variables related to simple keys treatment.
91 # A simple key is a key that is not denoted by the '?' indicator.
92 # Example of simple keys:
93 # ---
94 # block simple key: value
95 # ? not a simple key:
96 # : { flow simple key: value }
97 # We emit the KEY token before all keys, so when we find a potential
98 # simple key, we try to locate the corresponding ':' indicator.
99 # Simple keys should be limited to a single line and 1024 characters.
101 # Can a simple key start at the current position? A simple key may
102 # start:
103 # - at the beginning of the line, not counting indentation spaces
104 # (in block context),
105 # - after '{', '[', ',' (in the flow context),
106 # - after '?', ':', '-' (in the block context).
107 # In the block context, this flag also signify if a block collection
108 # may start at the current position.
109 self.allow_simple_key = True
111 # Keep track of possible simple keys. This is a dictionary. The key
112 # is `flow_level`; there can be no more that one possible simple key
113 # for each level. The value is a SimpleKey record:
114 # (token_number, required, index, line, column, marker)
115 # A simple key may start with ALIAS, ANCHOR, TAG, SCALAR(flow),
116 # '[', or '{' tokens.
117 self.possible_simple_keys = {}
119 # Two public methods.
121 def peek_token(self):
122 """Get the current token."""
123 while self.need_more_tokens():
124 self.fetch_more_tokens()
125 if self.tokens:
126 return self.tokens[0]
128 def get_token(self):
129 "Get the current token and remove it from the list of pending tokens."""
130 while self.need_more_tokens():
131 self.fetch_more_tokens()
132 if self.tokens:
133 self.tokens_taken += 1
134 return self.tokens.pop(0)
136 # Private methods.
138 def need_more_tokens(self):
139 if self.done:
140 return False
141 if not self.tokens:
142 return True
143 # The current token may be a potential simple key, so we
144 # need to look further.
145 self.stale_possible_simple_keys()
146 if self.next_possible_simple_key() == self.tokens_taken:
147 return True
149 def fetch_more_tokens(self):
151 # Eat whitespaces and comments until we reach the next token.
152 self.scan_to_next_token()
154 # Remove obsolete possible simple keys.
155 self.stale_possible_simple_keys()
157 # Compare the current indentation and column. It may add some tokens
158 # and decrease the current indentation level.
159 self.unwind_indent(self.reader.column)
161 # Peek the next character.
162 ch = self.reader.peek()
164 # Is it the end of reader?
165 if ch == u'\0':
166 return self.fetch_end()
168 # Is it a directive?
169 if ch == u'%' and self.check_directive():
170 return self.fetch_directive()
172 # Is it the document start?
173 if ch == u'-' and self.check_document_start():
174 return self.fetch_document_start()
176 # Is it the document end?
177 if ch == u'.' and self.check_document_end():
178 return self.fetch_document_end()
180 # Note: the order of the following checks is NOT significant.
182 # Is it the flow sequence start indicator?
183 if ch == u'[':
184 return self.fetch_flow_sequence_start()
186 # Is it the flow mapping start indicator?
187 if ch == u'{':
188 return self.fetch_flow_mapping_start()
190 # Is it the flow sequence end indicator?
191 if ch == u']':
192 return self.fetch_flow_sequence_end()
194 # Is it the flow mapping end indicator?
195 if ch == u'}':
196 return self.fetch_flow_mapping_end()
198 # Is it the entry indicator?
199 if ch in u'-,' and self.check_entry():
200 return self.fetch_entry()
202 # Is it the key indicator?
203 if ch == u'?' and self.check_key():
204 return self.fetch_key()
206 # Is it the value indicator?
207 if ch == u':' and self.check_value():
208 return self.fetch_value()
210 # Is it an alias?
211 if ch == u'*':
212 return self.fetch_alias()
214 # Is it an anchor?
215 if ch == u'&':
216 return self.fetch_anchor()
218 # Is it a tag?
219 if ch == u'!':
220 return self.fetch_tag()
222 # Is it a literal scalar?
223 if ch == u'|' and not self.flow_level:
224 return self.fetch_literal()
226 # Is it a folded scalar?
227 if ch == u'>' and not self.flow_level:
228 return self.fetch_folded()
230 # Is it a single quoted scalar?
231 if ch == u'\'':
232 return self.fetch_single()
234 # Is it a double quoted scalar?
235 if ch == u'\"':
236 return self.fetch_double()
238 # It must be a plain scalar then.
239 if self.check_plain():
240 return self.fetch_plain()
242 # No? It's an error. Let's produce a nice error message.
243 self.invalid_token()
245 # Simple keys treatment.
247 def next_possible_simple_key(self):
248 # Return the number of the nearest possible simple key. Actually we
249 # don't need to loop through the whole dictionary. We may replace it
250 # with the following code:
251 # if not self.possible_simple_keys:
252 # return None
253 # return self.possible_simple_keys[
254 # min(self.possible_simple_keys.keys())].token_number
255 min_token_number = None
256 for level in self.possible_simple_keys:
257 key = self.possible_simple_keys[level]
258 if min_token_number is None or key.token_number < min_token_number:
259 min_token_number = key.token_number
260 return min_token_number
262 def stale_possible_simple_keys(self):
263 # Remove entries that are no longer possible simple keys. According to
264 # the YAML specification, simple keys
265 # - should be limited to a single line,
266 # - should be no longer than 1024 characters.
267 # Disabling this procedure will allow simple keys of any length and
268 # height (may cause problems if indentation is broken though).
269 for level in self.possible_simple_keys.keys():
270 key = self.possible_simple_keys[level]
271 if key.line != self.reader.line \
272 or self.reader.index-key.index > 1024:
273 if key.required:
274 raise ScannerError("while scanning a simple key", key.marker,
275 "could not found expected ':'", self.reader.get_marker())
276 del self.possible_simple_keys[level]
278 def save_possible_simple_key(self):
279 # The next token may start a simple key. We check if it's possible
280 # and save its position. This function is called for
281 # ALIAS, ANCHOR, TAG, SCALAR(flow), '[', and '{'.
283 # Check if a simple key is required at the current position.
284 required = not self.flow_level and self.indent == self.reader.column
286 # A simple key is required only if it is the first token in the current
287 # line. Therefore it is always allowed.
288 assert self.allow_simple_key or not required
290 # The next token might be a simple key. Let's save it's number and
291 # position.
292 if self.allow_simple_key:
293 self.remove_possible_simple_key()
294 token_number = self.tokens_taken+len(self.tokens)
295 index = self.reader.index
296 line = self.reader.line
297 column = self.reader.column
298 marker = self.reader.get_marker()
299 key = SimpleKey(token_number, required,
300 index, line, column, marker)
301 self.possible_simple_keys[self.flow_level] = key
303 def remove_possible_simple_key(self):
304 # Remove the saved possible key position at the current flow level.
305 if self.flow_level in self.possible_simple_keys:
306 key = self.possible_simple_keys[self.flow_level]
308 # I don't think it's possible, but I could be wrong.
309 assert not key.required
310 #if key.required:
311 # raise ScannerError("while scanning a simple key", key.marker,
312 # "could not found expected ':'", self.reader.get_marker())
314 # Indentation functions.
316 def unwind_indent(self, column):
318 # In flow context, tokens should respect indentation.
319 # Actually the condition should be `self.indent >= column` according to
320 # the spec. But this condition will prohibit intuitively correct
321 # constructions such as
322 # key : {
324 if self.flow_level and self.indent > column:
325 raise ScannerError(None, None,
326 "invalid intendation or unclosed '[' or '{'",
327 self.reader.get_marker())
329 # In block context, we may need to issue the BLOCK-END tokens.
330 while self.indent > column:
331 marker = self.reader.get_marker()
332 self.indent = self.indents.pop()
333 self.tokens.append(BlockEndToken(marker, marker))
335 def add_indent(self, column):
336 # Check if we need to increase indentation.
337 if self.indent < column:
338 self.indents.append(self.indent)
339 self.indent = column
340 return True
341 return False
343 # Fetchers.
345 def fetch_end(self):
347 # Set the current intendation to -1.
348 self.unwind_indent(-1)
350 # Reset everything (not really needed).
351 self.allow_simple_key = False
352 self.possible_simple_keys = {}
354 # Read the token.
355 marker = self.reader.get_marker()
357 # Add END.
358 self.tokens.append(StreamEndToken(marker, marker))
360 # The reader is ended.
361 self.done = True
363 def fetch_directive(self):
365 # Set the current intendation to -1.
366 self.unwind_indent(-1)
368 # Reset simple keys.
369 self.remove_possible_simple_key()
370 self.allow_simple_key = False
372 # Scan and add DIRECTIVE.
373 self.tokens.append(self.scan_directive())
375 def fetch_document_start(self):
376 self.fetch_document_indicator(DocumentStartToken)
378 def fetch_document_end(self):
379 self.fetch_document_indicator(DocumentEndToken)
381 def fetch_document_indicator(self, TokenClass):
383 # Set the current intendation to -1.
384 self.unwind_indent(-1)
386 # Reset simple keys. Note that there could not be a block collection
387 # after '---'.
388 self.remove_possible_simple_key()
389 self.allow_simple_key = False
391 # Add DOCUMENT-START or DOCUMENT-END.
392 start_marker = self.reader.get_marker()
393 self.reader.forward(3)
394 end_marker = self.reader.get_marker()
395 self.tokens.append(TokenClass(start_marker, end_marker))
397 def fetch_flow_sequence_start(self):
398 self.fetch_flow_collection_start(FlowSequenceStartToken)
400 def fetch_flow_mapping_start(self):
401 self.fetch_flow_collection_start(FlowMappingStartToken)
403 def fetch_flow_collection_start(self, TokenClass):
405 # '[' and '{' may start a simple key.
406 self.save_possible_simple_key()
408 # Increase the flow level.
409 self.flow_level += 1
411 # Simple keys are allowed after '[' and '{'.
412 self.allow_simple_key = True
414 # Add FLOW-SEQUENCE-START or FLOW-MAPPING-START.
415 start_marker = self.reader.get_marker()
416 self.reader.forward()
417 end_marker = self.reader.get_marker()
418 self.tokens.append(TokenClass(start_marker, end_marker))
420 def fetch_flow_sequence_end(self):
421 self.fetch_flow_collection_end(FlowSequenceEndToken)
423 def fetch_flow_mapping_end(self):
424 self.fetch_flow_collection_end(FlowMappingEndToken)
426 def fetch_flow_collection_end(self, TokenClass):
428 # Reset possible simple key on the current level.
429 self.remove_possible_simple_key()
431 # Decrease the flow level.
432 self.flow_level -= 1
434 # No simple keys after ']' or '}'.
435 self.allow_simple_key = False
437 # Add FLOW-SEQUENCE-END or FLOW-MAPPING-END.
438 start_marker = self.reader.get_marker()
439 self.reader.forward()
440 end_marker = self.reader.get_marker()
441 self.tokens.append(TokenClass(start_marker, end_marker))
443 def fetch_entry(self):
445 # Block context needs additional checks.
446 if not self.flow_level:
448 # Are we allowed to start a new entry?
449 if not self.allow_simple_key:
450 raise ScannerError(None, None,
451 "sequence entries are not allowed here",
452 self.reader.get_marker())
454 # We may need to add BLOCK-SEQUENCE-START.
455 if self.add_indent(self.reader.column):
456 marker = self.reader.get_marker()
457 self.tokens.append(BlockSequenceStartToken(marker, marker))
459 # Simple keys are allowed after '-' and ','.
460 self.allow_simple_key = True
462 # Reset possible simple key on the current level.
463 self.remove_possible_simple_key()
465 # Add ENTRY.
466 start_marker = self.reader.get_marker()
467 self.reader.forward()
468 end_marker = self.reader.get_marker()
469 self.tokens.append(EntryToken(start_marker, end_marker))
471 def fetch_key(self):
473 # Block context needs additional checks.
474 if not self.flow_level:
476 # Are we allowed to start a key (not nessesary a simple)?
477 if not self.allow_simple_key:
478 raise ScannerError(None, None,
479 "mapping keys are not allowed here",
480 self.reader.get_marker())
482 # We may need to add BLOCK-MAPPING-START.
483 if self.add_indent(self.reader.column):
484 marker = self.reader.get_marker()
485 self.tokens.append(BlockMappingStartToken(marker, marker))
487 # Simple keys are allowed after '?' in the block context.
488 self.allow_simple_key = not self.flow_level
490 # Reset possible simple key on the current level.
491 self.remove_possible_simple_key()
493 # Add KEY.
494 start_marker = self.reader.get_marker()
495 self.reader.forward()
496 end_marker = self.reader.get_marker()
497 self.tokens.append(KeyToken(start_marker, end_marker))
499 def fetch_value(self):
501 # Do we determine a simple key?
502 if self.flow_level in self.possible_simple_keys:
504 # Add KEY.
505 key = self.possible_simple_keys[self.flow_level]
506 del self.possible_simple_keys[self.flow_level]
507 self.tokens.insert(key.token_number-self.tokens_taken,
508 KeyToken(key.marker, key.marker))
510 # If this key starts a new block mapping, we need to add
511 # BLOCK-MAPPING-START.
512 if not self.flow_level:
513 if self.add_indent(key.column):
514 self.tokens.insert(key.token_number-self.tokens_taken,
515 BlockMappingStartToken(key.marker, key.marker))
517 # There cannot be two simple keys one after another.
518 self.allow_simple_key = False
520 # It must be a part of a complex key.
521 else:
523 # Block context needs additional checks.
524 # (Do we really need them? They will be catched by the parser
525 # anyway.)
526 if not self.flow_level:
528 # We are allowed to start a complex value if and only if
529 # we can start a simple key.
530 if not self.allow_simple_key:
531 raise ScannerError(None, None,
532 "mapping values are not allowed here",
533 self.reader.get_marker())
535 # Simple keys are allowed after ':' in the block context.
536 self.allow_simple_key = not self.flow_level
538 # Reset possible simple key on the current level.
539 self.remove_possible_simple_key()
541 # Add VALUE.
542 start_marker = self.reader.get_marker()
543 self.reader.forward()
544 end_marker = self.reader.get_marker()
545 self.tokens.append(ValueToken(start_marker, end_marker))
547 def fetch_alias(self):
549 # ALIAS could be a simple key.
550 self.save_possible_simple_key()
552 # No simple keys after ALIAS.
553 self.allow_simple_key = False
555 # Scan and add ALIAS.
556 self.tokens.append(self.scan_anchor(AliasToken))
558 def fetch_anchor(self):
560 # ANCHOR could start a simple key.
561 self.save_possible_simple_key()
563 # No simple keys after ANCHOR.
564 self.allow_simple_key = False
566 # Scan and add ANCHOR.
567 self.tokens.append(self.scan_anchor(AnchorToken))
569 def fetch_tag(self):
571 # TAG could start a simple key.
572 self.save_possible_simple_key()
574 # No simple keys after TAG.
575 self.allow_simple_key = False
577 # Scan and add TAG.
578 self.tokens.append(self.scan_tag())
580 def fetch_literal(self):
581 self.fetch_block_scalar(folded=False)
583 def fetch_folded(self):
584 self.fetch_block_scalar(folded=True)
586 def fetch_block_scalar(self, folded):
588 # A simple key may follow a block scalar.
589 self.allow_simple_key = True
591 # Reset possible simple key on the current level.
592 self.remove_possible_simple_key()
594 # Scan and add SCALAR.
595 self.tokens.append(self.scan_block_scalar(folded))
597 def fetch_single(self):
598 self.fetch_flow_scalar(double=False)
600 def fetch_double(self):
601 self.fetch_flow_scalar(double=True)
603 def fetch_flow_scalar(self, double):
605 # A flow scalar could be a simple key.
606 self.save_possible_simple_key()
608 # No simple keys after flow scalars.
609 self.allow_simple_key = False
611 # Scan and add SCALAR.
612 self.tokens.append(self.scan_flow_scalar(double))
614 def fetch_plain(self):
616 # A plain scalar could be a simple key.
617 self.save_possible_simple_key()
619 # No simple keys after plain scalars. But note that `scan_plain` will
620 # change this flag if the scan is finished at the beginning of the
621 # line.
622 self.allow_simple_key = False
624 # Scan and add SCALAR. May change `allow_simple_key`.
625 self.tokens.append(self.scan_plain())
627 # Checkers.
629 def check_directive(self):
631 # DIRECTIVE: ^ '%' ...
632 # The '%' indicator is already checked.
633 if self.reader.column == 0:
634 return True
636 def check_document_start(self):
638 # DOCUMENT-START: ^ '---' (' '|'\n')
639 if self.reader.column == 0:
640 prefix = self.reader.peek(4)
641 if prefix[:3] == u'---' and prefix[3] in u'\0 \t\r\n\x85\u2028\u2029':
642 return True
644 def check_document_end(self):
646 # DOCUMENT-END: ^ '...' (' '|'\n')
647 if self.reader.column == 0:
648 prefix = self.reader.peek(4)
649 if prefix[:3] == u'...' and prefix[3] in u'\0 \t\r\n\x85\u2028\u2029':
650 return True
652 def check_entry(self):
654 # ENTRY(flow context): ','
655 if self.flow_level:
656 return self.reader.peek() == u','
658 # ENTRY(block context): '-' (' '|'\n')
659 else:
660 prefix = self.reader.peek(2)
661 return prefix[0] == u'-' and prefix[1] in u'\0 \t\r\n\x85\u2028\u2029'
663 def check_key(self):
665 # KEY(flow context): '?'
666 if self.flow_level:
667 return True
669 # KEY(block context): '?' (' '|'\n')
670 else:
671 prefix = self.reader.peek(2)
672 return prefix[1] in u'\0 \t\r\n\x85\u2028\u2029'
674 def check_value(self):
676 # VALUE(flow context): ':'
677 if self.flow_level:
678 return True
680 # VALUE(block context): ':' (' '|'\n')
681 else:
682 prefix = self.reader.peek(2)
683 return prefix[1] in u'\0 \t\r\n\x85\u2028\u2029'
685 def check_plain(self):
686 return True
688 # Scanners.
690 def scan_to_next_token(self):
691 # We ignore spaces, line breaks and comments.
692 # If we find a line break in the block context, we set the flag
693 # `allow_simple_key` on.
694 found = False
695 while not found:
696 while self.reader.peek() == u' ':
697 self.reader.forward()
698 if self.reader.peek() == u'#':
699 while self.reader.peek() not in u'\0\r\n\x85\u2028\u2029':
700 self.reader.forward()
701 if self.scan_line_break():
702 if not self.flow_level:
703 self.allow_simple_key = True
704 else:
705 found = True
707 def scan_directive(self):
708 marker = self.reader.get_marker()
709 if self.reader.peek(5) == u'%YAML ':
710 token = YAMLDirectiveToken(1, 1, marker, marker)
711 elif self.reader.peek(4) == u'%TAG ':
712 token = TagDirectiveToken(marker, marker)
713 else:
714 token = ReservedDirectiveToken('', marker, marker)
715 while self.reader.peek() not in u'\0\r\n':
716 self.reader.forward()
717 self.reader.forward()
718 return token
720 def scan_anchor(self, TokenClass):
721 start_marker = self.reader.get_marker()
722 while self.reader.peek() not in u'\0 \t\r\n,:':
723 self.reader.forward()
724 end_marker = self.reader.get_marker()
725 return TokenClass('', start_marker, end_marker)
727 def scan_tag(self):
728 start_marker = self.reader.get_marker()
729 while self.reader.peek() not in u'\0 \t\r\n':
730 self.reader.forward()
731 end_marker = self.reader.get_marker()
732 return TagToken('', start_marker, end_marker)
734 def scan_block_scalar(self, folded):
735 start_marker = self.reader.get_marker()
736 indent = self.indent+1
737 if indent < 1:
738 indent = 1
739 while True:
740 while self.reader.peek() and self.reader.peek() and self.reader.peek() not in u'\0\r\n\x85\u2028\u2029':
741 self.reader.forward()
742 if self.reader.peek() != u'\0':
743 self.reader.forward()
744 count = 0
745 while count < indent and self.reader.peek() == u' ':
746 self.reader.forward()
747 count += 1
748 if count < indent and self.reader.peek() not in u'#\r\n\x85\u2028\u2029':
749 break
750 return ScalarToken('', False, start_marker, start_marker)
752 def scan_flow_scalar(self, double):
753 marker = self.reader.get_marker()
754 quote = self.reader.peek()
755 self.reader.forward()
756 while self.reader.peek() != quote:
757 if double and self.reader.peek() == u'\\':
758 self.reader.forward(2)
759 elif not double and self.reader.peek(3)[1:] == u'\'\'':
760 self.reader.forward(3)
761 else:
762 self.reader.forward(1)
763 self.reader.forward(1)
764 return ScalarToken('', False, marker, marker)
766 def scan_plain(self):
767 indent = self.indent+1
768 if indent < 1:
769 indent = 1
770 space = False
771 marker = self.reader.get_marker()
772 while True:
773 while self.reader.peek() == u' ':
774 self.reader.forward()
775 space = True
776 while self.reader.peek() not in u'\0\r\n?:,[]{}#' \
777 or (not space and self.reader.peek() == '#') \
778 or (not self.flow_level and self.reader.peek() in '?,[]{}') \
779 or (not self.flow_level and self.reader.peek() == ':' and self.reader.peek(2)[1] not in u' \0\r\n'):
780 space = self.reader.peek() not in u' \t'
781 self.reader.forward()
782 self.allow_simple_key = False
783 if self.reader.peek() not in u'\r\n':
784 break
785 while self.reader.peek() in u'\r\n':
786 self.reader.forward()
787 if not self.flow_level:
788 self.allow_simple_key = True
789 count = 0
790 while self.reader.peek() == u' ' and count < indent:
791 self.reader.forward()
792 count += 1
793 if count < indent:
794 break
795 space = True
796 return ScalarToken('', True, marker, marker)
798 def scan_line_break(self):
799 # Transforms:
800 # '\r\n' : '\n'
801 # '\r' : '\n'
802 # '\n' : '\n'
803 # '\x85' : '\n'
804 # '\u2028' : '\u2028'
805 # '\u2029 : '\u2029'
806 # default : ''
807 ch = self.reader.peek()
808 if ch in u'\r\n\x85':
809 if self.reader.peek(2) == u'\r\n':
810 self.forward(2)
811 else:
812 self.reader.forward()
813 return u'\n'
814 elif ch in u'\u2028\u2029':
815 self.reader.forward()
816 return ch
817 return u''
819 def invalid_token(self):
820 self.fail("invalid token")
822 #try:
823 # import psyco
824 # psyco.bind(Scanner)
825 #except ImportError:
826 # pass