2 Iterator based sre token scanner
5 from re
import VERBOSE
, MULTILINE
, DOTALL
9 from sre_constants
import BRANCH
, SUBPATTERN
11 __all__
= ['Scanner', 'pattern']
13 FLAGS
= (VERBOSE | MULTILINE | DOTALL
)
15 class Scanner(object):
16 def __init__(self
, lexicon
, flags
=FLAGS
):
18 # Combine phrases into a compound pattern
19 s
= sre_parse
.Pattern()
22 for idx
, token
in enumerate(lexicon
):
23 phrase
= token
.pattern
25 subpattern
= sre_parse
.SubPattern(s
,
26 [(SUBPATTERN
, (idx
+ 1, sre_parse
.parse(phrase
, flags
)))])
27 except sre_constants
.error
:
30 self
.actions
.append(token
)
32 s
.groups
= len(p
) + 1 # NOTE(guido): Added to make SRE validation work
33 p
= sre_parse
.SubPattern(s
, [(BRANCH
, (None, p
))])
34 self
.scanner
= sre_compile
.compile(p
)
36 def iterscan(self
, string
, idx
=0, context
=None):
38 Yield match, end_idx for each match
40 match
= self
.scanner
.scanner(string
, idx
).match
41 actions
= self
.actions
48 matchbegin
, matchend
= m
.span()
49 if lastend
== matchend
:
51 action
= actions
[m
.lastindex
]
52 if action
is not None:
53 rval
, next_pos
= action(m
, context
)
54 if next_pos
is not None and next_pos
!= matchend
:
55 # "fast forward" the scanner
57 match
= self
.scanner
.scanner(string
, matchend
).match
62 def pattern(pattern
, flags
=FLAGS
):
65 fn
.regex
= re
.compile(pattern
, flags
)