really the last log entry for 1.1
[python/dscho.git] / Lib / tokenize.py
blob8f16115410c02025dfbca8e4f8582c48fa8131fc
1 # This module compiles a regular expression that recognizes Python tokens.
2 # It is designed to match the working of the Python tokenizer exactly.
3 # It takes care of everything except indentation;
4 # note that un-escaped newlines are tokens, too.
5 # tokenprog.regs[3] gives the location of the token without whitespace
6 # It also defines various subexpressions, but doesn't compile them.
7 # See the function test() below for an example of how to use.
9 import regex
11 # Note: to get a quoted backslash in a regexp, it must be quadrupled.
13 Ignore = '[ \t]*\(\\\\\n[ \t]*\)*\(#.*\)?'
15 Name = '[a-zA-Z_][a-zA-Z0-9_]*'
17 Hexnumber = '0[xX][0-9a-fA-F]*[lL]?'
18 Octnumber = '0[0-7]*[lL]?'
19 Decnumber = '[1-9][0-9]*[lL]?'
20 Intnumber = Hexnumber + '\|' + Octnumber + '\|' + Decnumber
21 Exponent = '[eE][-+]?[0-9]+'
22 Pointfloat = '\([0-9]+\.[0-9]*\|\.[0-9]+\)\(' + Exponent + '\)?'
23 Expfloat = '[0-9]+' + Exponent
24 Floatnumber = Pointfloat + '\|' + Expfloat
25 Number = Floatnumber + '\|' + Intnumber
27 String = '\'\(\\\\.\|[^\\\n\']\)*\'' + '\|' + '"\(\\\\.\|[^\\\n"]\)*"'
28 # Note: this module *recognizes* double quotes, but for backward
29 # compatibility, it doesn't *use* them!
31 Operator = '~\|\+\|-\|\*\|/\|%\|\^\|&\||\|<<\|>>\|==\|<=\|<>\|!=\|>=\|=\|<\|>'
32 Bracket = '[][(){}]'
33 Special = '[:;.,`\n]'
34 Funny = Operator + '\|' + Bracket + '\|' + Special
36 PlainToken = Name + '\|' + Number + '\|' + String + '\|' + Funny
38 Token = Ignore + '\(' + PlainToken + '\)'
40 try:
41 save_syntax = regex.set_syntax(0) # Use default syntax
42 tokenprog = regex.compile(Token)
43 finally:
44 if save_syntax != 0:
45 dummy = regex.set_syntax(save_syntax) # Restore original syntax
48 def test(file):
49 f = open(file, 'r')
50 while 1:
51 line = f.readline()
52 if not line: break
53 i, n = 0, len(line)
54 while i < n:
55 j = tokenprog.match(line, i)
56 if j < 0:
57 print 'No token at', `line[i:i+20]` + '...'
58 i = i+1
59 else:
60 i = i+j
61 a, b = tokenprog.regs[3]
62 if a < b:
63 print 'Token:', `line[a:b]`