Add ICU message format support
[chromium-blink-merge.git] / third_party / jinja2 / lexer.py
bloba50128507bb98ac6bc57a76afe8a0776a2df2c49
1 # -*- coding: utf-8 -*-
2 """
3 jinja2.lexer
4 ~~~~~~~~~~~~
6 This module implements a Jinja / Python combination lexer. The
7 `Lexer` class provided by this module is used to do some preprocessing
8 for Jinja.
10 On the one hand it filters out invalid operators like the bitshift
11 operators we don't allow in templates. On the other hand it separates
12 template code and python code in expressions.
14 :copyright: (c) 2010 by the Jinja Team.
15 :license: BSD, see LICENSE for more details.
16 """
17 import re
19 from operator import itemgetter
20 from collections import deque
21 from jinja2.exceptions import TemplateSyntaxError
22 from jinja2.utils import LRUCache
23 from jinja2._compat import next, iteritems, implements_iterator, text_type, \
24 intern
27 # cache for the lexers. Exists in order to be able to have multiple
28 # environments with the same lexer
29 _lexer_cache = LRUCache(50)
31 # static regular expressions
32 whitespace_re = re.compile(r'\s+', re.U)
33 string_re = re.compile(r"('([^'\\]*(?:\\.[^'\\]*)*)'"
34 r'|"([^"\\]*(?:\\.[^"\\]*)*)")', re.S)
35 integer_re = re.compile(r'\d+')
37 # we use the unicode identifier rule if this python version is able
38 # to handle unicode identifiers, otherwise the standard ASCII one.
39 try:
40 compile('föö', '<unknown>', 'eval')
41 except SyntaxError:
42 name_re = re.compile(r'\b[a-zA-Z_][a-zA-Z0-9_]*\b')
43 else:
44 from jinja2 import _stringdefs
45 name_re = re.compile(r'[%s][%s]*' % (_stringdefs.xid_start,
46 _stringdefs.xid_continue))
48 float_re = re.compile(r'(?<!\.)\d+\.\d+')
49 newline_re = re.compile(r'(\r\n|\r|\n)')
51 # internal the tokens and keep references to them
52 TOKEN_ADD = intern('add')
53 TOKEN_ASSIGN = intern('assign')
54 TOKEN_COLON = intern('colon')
55 TOKEN_COMMA = intern('comma')
56 TOKEN_DIV = intern('div')
57 TOKEN_DOT = intern('dot')
58 TOKEN_EQ = intern('eq')
59 TOKEN_FLOORDIV = intern('floordiv')
60 TOKEN_GT = intern('gt')
61 TOKEN_GTEQ = intern('gteq')
62 TOKEN_LBRACE = intern('lbrace')
63 TOKEN_LBRACKET = intern('lbracket')
64 TOKEN_LPAREN = intern('lparen')
65 TOKEN_LT = intern('lt')
66 TOKEN_LTEQ = intern('lteq')
67 TOKEN_MOD = intern('mod')
68 TOKEN_MUL = intern('mul')
69 TOKEN_NE = intern('ne')
70 TOKEN_PIPE = intern('pipe')
71 TOKEN_POW = intern('pow')
72 TOKEN_RBRACE = intern('rbrace')
73 TOKEN_RBRACKET = intern('rbracket')
74 TOKEN_RPAREN = intern('rparen')
75 TOKEN_SEMICOLON = intern('semicolon')
76 TOKEN_SUB = intern('sub')
77 TOKEN_TILDE = intern('tilde')
78 TOKEN_WHITESPACE = intern('whitespace')
79 TOKEN_FLOAT = intern('float')
80 TOKEN_INTEGER = intern('integer')
81 TOKEN_NAME = intern('name')
82 TOKEN_STRING = intern('string')
83 TOKEN_OPERATOR = intern('operator')
84 TOKEN_BLOCK_BEGIN = intern('block_begin')
85 TOKEN_BLOCK_END = intern('block_end')
86 TOKEN_VARIABLE_BEGIN = intern('variable_begin')
87 TOKEN_VARIABLE_END = intern('variable_end')
88 TOKEN_RAW_BEGIN = intern('raw_begin')
89 TOKEN_RAW_END = intern('raw_end')
90 TOKEN_COMMENT_BEGIN = intern('comment_begin')
91 TOKEN_COMMENT_END = intern('comment_end')
92 TOKEN_COMMENT = intern('comment')
93 TOKEN_LINESTATEMENT_BEGIN = intern('linestatement_begin')
94 TOKEN_LINESTATEMENT_END = intern('linestatement_end')
95 TOKEN_LINECOMMENT_BEGIN = intern('linecomment_begin')
96 TOKEN_LINECOMMENT_END = intern('linecomment_end')
97 TOKEN_LINECOMMENT = intern('linecomment')
98 TOKEN_DATA = intern('data')
99 TOKEN_INITIAL = intern('initial')
100 TOKEN_EOF = intern('eof')
102 # bind operators to token types
103 operators = {
104 '+': TOKEN_ADD,
105 '-': TOKEN_SUB,
106 '/': TOKEN_DIV,
107 '//': TOKEN_FLOORDIV,
108 '*': TOKEN_MUL,
109 '%': TOKEN_MOD,
110 '**': TOKEN_POW,
111 '~': TOKEN_TILDE,
112 '[': TOKEN_LBRACKET,
113 ']': TOKEN_RBRACKET,
114 '(': TOKEN_LPAREN,
115 ')': TOKEN_RPAREN,
116 '{': TOKEN_LBRACE,
117 '}': TOKEN_RBRACE,
118 '==': TOKEN_EQ,
119 '!=': TOKEN_NE,
120 '>': TOKEN_GT,
121 '>=': TOKEN_GTEQ,
122 '<': TOKEN_LT,
123 '<=': TOKEN_LTEQ,
124 '=': TOKEN_ASSIGN,
125 '.': TOKEN_DOT,
126 ':': TOKEN_COLON,
127 '|': TOKEN_PIPE,
128 ',': TOKEN_COMMA,
129 ';': TOKEN_SEMICOLON
132 reverse_operators = dict([(v, k) for k, v in iteritems(operators)])
133 assert len(operators) == len(reverse_operators), 'operators dropped'
134 operator_re = re.compile('(%s)' % '|'.join(re.escape(x) for x in
135 sorted(operators, key=lambda x: -len(x))))
137 ignored_tokens = frozenset([TOKEN_COMMENT_BEGIN, TOKEN_COMMENT,
138 TOKEN_COMMENT_END, TOKEN_WHITESPACE,
139 TOKEN_WHITESPACE, TOKEN_LINECOMMENT_BEGIN,
140 TOKEN_LINECOMMENT_END, TOKEN_LINECOMMENT])
141 ignore_if_empty = frozenset([TOKEN_WHITESPACE, TOKEN_DATA,
142 TOKEN_COMMENT, TOKEN_LINECOMMENT])
145 def _describe_token_type(token_type):
146 if token_type in reverse_operators:
147 return reverse_operators[token_type]
148 return {
149 TOKEN_COMMENT_BEGIN: 'begin of comment',
150 TOKEN_COMMENT_END: 'end of comment',
151 TOKEN_COMMENT: 'comment',
152 TOKEN_LINECOMMENT: 'comment',
153 TOKEN_BLOCK_BEGIN: 'begin of statement block',
154 TOKEN_BLOCK_END: 'end of statement block',
155 TOKEN_VARIABLE_BEGIN: 'begin of print statement',
156 TOKEN_VARIABLE_END: 'end of print statement',
157 TOKEN_LINESTATEMENT_BEGIN: 'begin of line statement',
158 TOKEN_LINESTATEMENT_END: 'end of line statement',
159 TOKEN_DATA: 'template data / text',
160 TOKEN_EOF: 'end of template'
161 }.get(token_type, token_type)
164 def describe_token(token):
165 """Returns a description of the token."""
166 if token.type == 'name':
167 return token.value
168 return _describe_token_type(token.type)
171 def describe_token_expr(expr):
172 """Like `describe_token` but for token expressions."""
173 if ':' in expr:
174 type, value = expr.split(':', 1)
175 if type == 'name':
176 return value
177 else:
178 type = expr
179 return _describe_token_type(type)
182 def count_newlines(value):
183 """Count the number of newline characters in the string. This is
184 useful for extensions that filter a stream.
186 return len(newline_re.findall(value))
189 def compile_rules(environment):
190 """Compiles all the rules from the environment into a list of rules."""
191 e = re.escape
192 rules = [
193 (len(environment.comment_start_string), 'comment',
194 e(environment.comment_start_string)),
195 (len(environment.block_start_string), 'block',
196 e(environment.block_start_string)),
197 (len(environment.variable_start_string), 'variable',
198 e(environment.variable_start_string))
201 if environment.line_statement_prefix is not None:
202 rules.append((len(environment.line_statement_prefix), 'linestatement',
203 r'^[ \t\v]*' + e(environment.line_statement_prefix)))
204 if environment.line_comment_prefix is not None:
205 rules.append((len(environment.line_comment_prefix), 'linecomment',
206 r'(?:^|(?<=\S))[^\S\r\n]*' +
207 e(environment.line_comment_prefix)))
209 return [x[1:] for x in sorted(rules, reverse=True)]
212 class Failure(object):
213 """Class that raises a `TemplateSyntaxError` if called.
214 Used by the `Lexer` to specify known errors.
217 def __init__(self, message, cls=TemplateSyntaxError):
218 self.message = message
219 self.error_class = cls
221 def __call__(self, lineno, filename):
222 raise self.error_class(self.message, lineno, filename)
225 class Token(tuple):
226 """Token class."""
227 __slots__ = ()
228 lineno, type, value = (property(itemgetter(x)) for x in range(3))
230 def __new__(cls, lineno, type, value):
231 return tuple.__new__(cls, (lineno, intern(str(type)), value))
233 def __str__(self):
234 if self.type in reverse_operators:
235 return reverse_operators[self.type]
236 elif self.type == 'name':
237 return self.value
238 return self.type
240 def test(self, expr):
241 """Test a token against a token expression. This can either be a
242 token type or ``'token_type:token_value'``. This can only test
243 against string values and types.
245 # here we do a regular string equality check as test_any is usually
246 # passed an iterable of not interned strings.
247 if self.type == expr:
248 return True
249 elif ':' in expr:
250 return expr.split(':', 1) == [self.type, self.value]
251 return False
253 def test_any(self, *iterable):
254 """Test against multiple token expressions."""
255 for expr in iterable:
256 if self.test(expr):
257 return True
258 return False
260 def __repr__(self):
261 return 'Token(%r, %r, %r)' % (
262 self.lineno,
263 self.type,
264 self.value
268 @implements_iterator
269 class TokenStreamIterator(object):
270 """The iterator for tokenstreams. Iterate over the stream
271 until the eof token is reached.
274 def __init__(self, stream):
275 self.stream = stream
277 def __iter__(self):
278 return self
280 def __next__(self):
281 token = self.stream.current
282 if token.type is TOKEN_EOF:
283 self.stream.close()
284 raise StopIteration()
285 next(self.stream)
286 return token
289 @implements_iterator
290 class TokenStream(object):
291 """A token stream is an iterable that yields :class:`Token`\s. The
292 parser however does not iterate over it but calls :meth:`next` to go
293 one token ahead. The current active token is stored as :attr:`current`.
296 def __init__(self, generator, name, filename):
297 self._iter = iter(generator)
298 self._pushed = deque()
299 self.name = name
300 self.filename = filename
301 self.closed = False
302 self.current = Token(1, TOKEN_INITIAL, '')
303 next(self)
305 def __iter__(self):
306 return TokenStreamIterator(self)
308 def __bool__(self):
309 return bool(self._pushed) or self.current.type is not TOKEN_EOF
310 __nonzero__ = __bool__ # py2
312 eos = property(lambda x: not x, doc="Are we at the end of the stream?")
314 def push(self, token):
315 """Push a token back to the stream."""
316 self._pushed.append(token)
318 def look(self):
319 """Look at the next token."""
320 old_token = next(self)
321 result = self.current
322 self.push(result)
323 self.current = old_token
324 return result
326 def skip(self, n=1):
327 """Got n tokens ahead."""
328 for x in range(n):
329 next(self)
331 def next_if(self, expr):
332 """Perform the token test and return the token if it matched.
333 Otherwise the return value is `None`.
335 if self.current.test(expr):
336 return next(self)
338 def skip_if(self, expr):
339 """Like :meth:`next_if` but only returns `True` or `False`."""
340 return self.next_if(expr) is not None
342 def __next__(self):
343 """Go one token ahead and return the old one"""
344 rv = self.current
345 if self._pushed:
346 self.current = self._pushed.popleft()
347 elif self.current.type is not TOKEN_EOF:
348 try:
349 self.current = next(self._iter)
350 except StopIteration:
351 self.close()
352 return rv
354 def close(self):
355 """Close the stream."""
356 self.current = Token(self.current.lineno, TOKEN_EOF, '')
357 self._iter = None
358 self.closed = True
360 def expect(self, expr):
361 """Expect a given token type and return it. This accepts the same
362 argument as :meth:`jinja2.lexer.Token.test`.
364 if not self.current.test(expr):
365 expr = describe_token_expr(expr)
366 if self.current.type is TOKEN_EOF:
367 raise TemplateSyntaxError('unexpected end of template, '
368 'expected %r.' % expr,
369 self.current.lineno,
370 self.name, self.filename)
371 raise TemplateSyntaxError("expected token %r, got %r" %
372 (expr, describe_token(self.current)),
373 self.current.lineno,
374 self.name, self.filename)
375 try:
376 return self.current
377 finally:
378 next(self)
381 def get_lexer(environment):
382 """Return a lexer which is probably cached."""
383 key = (environment.block_start_string,
384 environment.block_end_string,
385 environment.variable_start_string,
386 environment.variable_end_string,
387 environment.comment_start_string,
388 environment.comment_end_string,
389 environment.line_statement_prefix,
390 environment.line_comment_prefix,
391 environment.trim_blocks,
392 environment.lstrip_blocks,
393 environment.newline_sequence,
394 environment.keep_trailing_newline)
395 lexer = _lexer_cache.get(key)
396 if lexer is None:
397 lexer = Lexer(environment)
398 _lexer_cache[key] = lexer
399 return lexer
402 class Lexer(object):
403 """Class that implements a lexer for a given environment. Automatically
404 created by the environment class, usually you don't have to do that.
406 Note that the lexer is not automatically bound to an environment.
407 Multiple environments can share the same lexer.
410 def __init__(self, environment):
411 # shortcuts
412 c = lambda x: re.compile(x, re.M | re.S)
413 e = re.escape
415 # lexing rules for tags
416 tag_rules = [
417 (whitespace_re, TOKEN_WHITESPACE, None),
418 (float_re, TOKEN_FLOAT, None),
419 (integer_re, TOKEN_INTEGER, None),
420 (name_re, TOKEN_NAME, None),
421 (string_re, TOKEN_STRING, None),
422 (operator_re, TOKEN_OPERATOR, None)
425 # assemble the root lexing rule. because "|" is ungreedy
426 # we have to sort by length so that the lexer continues working
427 # as expected when we have parsing rules like <% for block and
428 # <%= for variables. (if someone wants asp like syntax)
429 # variables are just part of the rules if variable processing
430 # is required.
431 root_tag_rules = compile_rules(environment)
433 # block suffix if trimming is enabled
434 block_suffix_re = environment.trim_blocks and '\\n?' or ''
436 # strip leading spaces if lstrip_blocks is enabled
437 prefix_re = {}
438 if environment.lstrip_blocks:
439 # use '{%+' to manually disable lstrip_blocks behavior
440 no_lstrip_re = e('+')
441 # detect overlap between block and variable or comment strings
442 block_diff = c(r'^%s(.*)' % e(environment.block_start_string))
443 # make sure we don't mistake a block for a variable or a comment
444 m = block_diff.match(environment.comment_start_string)
445 no_lstrip_re += m and r'|%s' % e(m.group(1)) or ''
446 m = block_diff.match(environment.variable_start_string)
447 no_lstrip_re += m and r'|%s' % e(m.group(1)) or ''
449 # detect overlap between comment and variable strings
450 comment_diff = c(r'^%s(.*)' % e(environment.comment_start_string))
451 m = comment_diff.match(environment.variable_start_string)
452 no_variable_re = m and r'(?!%s)' % e(m.group(1)) or ''
454 lstrip_re = r'^[ \t]*'
455 block_prefix_re = r'%s%s(?!%s)|%s\+?' % (
456 lstrip_re,
457 e(environment.block_start_string),
458 no_lstrip_re,
459 e(environment.block_start_string),
461 comment_prefix_re = r'%s%s%s|%s\+?' % (
462 lstrip_re,
463 e(environment.comment_start_string),
464 no_variable_re,
465 e(environment.comment_start_string),
467 prefix_re['block'] = block_prefix_re
468 prefix_re['comment'] = comment_prefix_re
469 else:
470 block_prefix_re = '%s' % e(environment.block_start_string)
472 self.newline_sequence = environment.newline_sequence
473 self.keep_trailing_newline = environment.keep_trailing_newline
475 # global lexing rules
476 self.rules = {
477 'root': [
478 # directives
479 (c('(.*?)(?:%s)' % '|'.join(
480 [r'(?P<raw_begin>(?:\s*%s\-|%s)\s*raw\s*(?:\-%s\s*|%s))' % (
481 e(environment.block_start_string),
482 block_prefix_re,
483 e(environment.block_end_string),
484 e(environment.block_end_string)
485 )] + [
486 r'(?P<%s_begin>\s*%s\-|%s)' % (n, r, prefix_re.get(n,r))
487 for n, r in root_tag_rules
488 ])), (TOKEN_DATA, '#bygroup'), '#bygroup'),
489 # data
490 (c('.+'), TOKEN_DATA, None)
492 # comments
493 TOKEN_COMMENT_BEGIN: [
494 (c(r'(.*?)((?:\-%s\s*|%s)%s)' % (
495 e(environment.comment_end_string),
496 e(environment.comment_end_string),
497 block_suffix_re
498 )), (TOKEN_COMMENT, TOKEN_COMMENT_END), '#pop'),
499 (c('(.)'), (Failure('Missing end of comment tag'),), None)
501 # blocks
502 TOKEN_BLOCK_BEGIN: [
503 (c('(?:\-%s\s*|%s)%s' % (
504 e(environment.block_end_string),
505 e(environment.block_end_string),
506 block_suffix_re
507 )), TOKEN_BLOCK_END, '#pop'),
508 ] + tag_rules,
509 # variables
510 TOKEN_VARIABLE_BEGIN: [
511 (c('\-%s\s*|%s' % (
512 e(environment.variable_end_string),
513 e(environment.variable_end_string)
514 )), TOKEN_VARIABLE_END, '#pop')
515 ] + tag_rules,
516 # raw block
517 TOKEN_RAW_BEGIN: [
518 (c('(.*?)((?:\s*%s\-|%s)\s*endraw\s*(?:\-%s\s*|%s%s))' % (
519 e(environment.block_start_string),
520 block_prefix_re,
521 e(environment.block_end_string),
522 e(environment.block_end_string),
523 block_suffix_re
524 )), (TOKEN_DATA, TOKEN_RAW_END), '#pop'),
525 (c('(.)'), (Failure('Missing end of raw directive'),), None)
527 # line statements
528 TOKEN_LINESTATEMENT_BEGIN: [
529 (c(r'\s*(\n|$)'), TOKEN_LINESTATEMENT_END, '#pop')
530 ] + tag_rules,
531 # line comments
532 TOKEN_LINECOMMENT_BEGIN: [
533 (c(r'(.*?)()(?=\n|$)'), (TOKEN_LINECOMMENT,
534 TOKEN_LINECOMMENT_END), '#pop')
538 def _normalize_newlines(self, value):
539 """Called for strings and template data to normalize it to unicode."""
540 return newline_re.sub(self.newline_sequence, value)
542 def tokenize(self, source, name=None, filename=None, state=None):
543 """Calls tokeniter + tokenize and wraps it in a token stream.
545 stream = self.tokeniter(source, name, filename, state)
546 return TokenStream(self.wrap(stream, name, filename), name, filename)
548 def wrap(self, stream, name=None, filename=None):
549 """This is called with the stream as returned by `tokenize` and wraps
550 every token in a :class:`Token` and converts the value.
552 for lineno, token, value in stream:
553 if token in ignored_tokens:
554 continue
555 elif token == 'linestatement_begin':
556 token = 'block_begin'
557 elif token == 'linestatement_end':
558 token = 'block_end'
559 # we are not interested in those tokens in the parser
560 elif token in ('raw_begin', 'raw_end'):
561 continue
562 elif token == 'data':
563 value = self._normalize_newlines(value)
564 elif token == 'keyword':
565 token = value
566 elif token == 'name':
567 value = str(value)
568 elif token == 'string':
569 # try to unescape string
570 try:
571 value = self._normalize_newlines(value[1:-1]) \
572 .encode('ascii', 'backslashreplace') \
573 .decode('unicode-escape')
574 except Exception as e:
575 msg = str(e).split(':')[-1].strip()
576 raise TemplateSyntaxError(msg, lineno, name, filename)
577 # if we can express it as bytestring (ascii only)
578 # we do that for support of semi broken APIs
579 # as datetime.datetime.strftime. On python 3 this
580 # call becomes a noop thanks to 2to3
581 try:
582 value = str(value)
583 except UnicodeError:
584 pass
585 elif token == 'integer':
586 value = int(value)
587 elif token == 'float':
588 value = float(value)
589 elif token == 'operator':
590 token = operators[value]
591 yield Token(lineno, token, value)
593 def tokeniter(self, source, name, filename=None, state=None):
594 """This method tokenizes the text and returns the tokens in a
595 generator. Use this method if you just want to tokenize a template.
597 source = text_type(source)
598 lines = source.splitlines()
599 if self.keep_trailing_newline and source:
600 for newline in ('\r\n', '\r', '\n'):
601 if source.endswith(newline):
602 lines.append('')
603 break
604 source = '\n'.join(lines)
605 pos = 0
606 lineno = 1
607 stack = ['root']
608 if state is not None and state != 'root':
609 assert state in ('variable', 'block'), 'invalid state'
610 stack.append(state + '_begin')
611 else:
612 state = 'root'
613 statetokens = self.rules[stack[-1]]
614 source_length = len(source)
616 balancing_stack = []
618 while 1:
619 # tokenizer loop
620 for regex, tokens, new_state in statetokens:
621 m = regex.match(source, pos)
622 # if no match we try again with the next rule
623 if m is None:
624 continue
626 # we only match blocks and variables if braces / parentheses
627 # are balanced. continue parsing with the lower rule which
628 # is the operator rule. do this only if the end tags look
629 # like operators
630 if balancing_stack and \
631 tokens in ('variable_end', 'block_end',
632 'linestatement_end'):
633 continue
635 # tuples support more options
636 if isinstance(tokens, tuple):
637 for idx, token in enumerate(tokens):
638 # failure group
639 if token.__class__ is Failure:
640 raise token(lineno, filename)
641 # bygroup is a bit more complex, in that case we
642 # yield for the current token the first named
643 # group that matched
644 elif token == '#bygroup':
645 for key, value in iteritems(m.groupdict()):
646 if value is not None:
647 yield lineno, key, value
648 lineno += value.count('\n')
649 break
650 else:
651 raise RuntimeError('%r wanted to resolve '
652 'the token dynamically'
653 ' but no group matched'
654 % regex)
655 # normal group
656 else:
657 data = m.group(idx + 1)
658 if data or token not in ignore_if_empty:
659 yield lineno, token, data
660 lineno += data.count('\n')
662 # strings as token just are yielded as it.
663 else:
664 data = m.group()
665 # update brace/parentheses balance
666 if tokens == 'operator':
667 if data == '{':
668 balancing_stack.append('}')
669 elif data == '(':
670 balancing_stack.append(')')
671 elif data == '[':
672 balancing_stack.append(']')
673 elif data in ('}', ')', ']'):
674 if not balancing_stack:
675 raise TemplateSyntaxError('unexpected \'%s\'' %
676 data, lineno, name,
677 filename)
678 expected_op = balancing_stack.pop()
679 if expected_op != data:
680 raise TemplateSyntaxError('unexpected \'%s\', '
681 'expected \'%s\'' %
682 (data, expected_op),
683 lineno, name,
684 filename)
685 # yield items
686 if data or tokens not in ignore_if_empty:
687 yield lineno, tokens, data
688 lineno += data.count('\n')
690 # fetch new position into new variable so that we can check
691 # if there is a internal parsing error which would result
692 # in an infinite loop
693 pos2 = m.end()
695 # handle state changes
696 if new_state is not None:
697 # remove the uppermost state
698 if new_state == '#pop':
699 stack.pop()
700 # resolve the new state by group checking
701 elif new_state == '#bygroup':
702 for key, value in iteritems(m.groupdict()):
703 if value is not None:
704 stack.append(key)
705 break
706 else:
707 raise RuntimeError('%r wanted to resolve the '
708 'new state dynamically but'
709 ' no group matched' %
710 regex)
711 # direct state name given
712 else:
713 stack.append(new_state)
714 statetokens = self.rules[stack[-1]]
715 # we are still at the same position and no stack change.
716 # this means a loop without break condition, avoid that and
717 # raise error
718 elif pos2 == pos:
719 raise RuntimeError('%r yielded empty string without '
720 'stack change' % regex)
721 # publish new function and start again
722 pos = pos2
723 break
724 # if loop terminated without break we haven't found a single match
725 # either we are at the end of the file or we have a problem
726 else:
727 # end of text
728 if pos >= source_length:
729 return
730 # something went wrong
731 raise TemplateSyntaxError('unexpected char %r at %d' %
732 (source[pos], pos), lineno,
733 name, filename)