changes by Barry, e.g. font lock & email addresses
[python/dscho.git] / Demo / comparisons / regextest.py
blob35ee9eed77d098bcf133c82fd55a600124f42211
1 #! /usr/local/bin/python
3 # 1) Regular Expressions Test
4 #
5 # Read a file of (extended per egrep) regular expressions (one per line),
6 # and apply those to all files whose names are listed on the command line.
7 # Basically, an 'egrep -f' simulator. Test it with 20 "vt100" patterns
8 # against a five /etc/termcap files. Tests using more elaborate patters
9 # would also be interesting. Your code should not break if given hundreds
10 # of regular expressions or binary files to scan.
12 # This implementation:
13 # - combines all patterns into a single one using ( ... | ... | ... )
14 # - reads patterns from stdin, scans files given as command line arguments
15 # - produces output in the format <file>:<lineno>:<line>
16 # - is only about 2.5 times as slow as egrep (though I couldn't run
17 # Tom's test -- this system, a vanilla SGI, only has /etc/terminfo)
19 import string
20 import sys
21 import regex
22 from regex_syntax import *
24 regex.set_syntax(RE_SYNTAX_EGREP)
26 def main():
27 pats = map(chomp, sys.stdin.readlines())
28 bigpat = '(' + string.joinfields(pats, '|') + ')'
29 prog = regex.compile(bigpat)
31 for file in sys.argv[1:]:
32 try:
33 fp = open(file, 'r')
34 except IOError, msg:
35 print "%s: %s" % (file, msg)
36 continue
37 lineno = 0
38 while 1:
39 line = fp.readline()
40 if not line:
41 break
42 lineno = lineno + 1
43 if prog.search(line) >= 0:
44 print "%s:%s:%s" % (file, lineno, line),
46 def chomp(s):
47 if s[-1:] == '\n': return s[:-1]
48 else: return s
50 main()