Bump to 2.3.1 to pick up the missing file.
[python/dscho.git] / Tools / bgen / bgen / scantools.py
blob61c2f37581f497173dae5e64e70c25dcffbc7a63
1 """\
3 Tools for scanning header files in search of function prototypes.
5 Often, the function prototypes in header files contain enough information
6 to automatically generate (or reverse-engineer) interface specifications
7 from them. The conventions used are very vendor specific, but once you've
8 figured out what they are they are often a great help, and it sure beats
9 manually entering the interface specifications. (These are needed to generate
10 the glue used to access the functions from Python.)
12 In order to make this class useful, almost every component can be overridden.
13 The defaults are (currently) tuned to scanning Apple Macintosh header files,
14 although most Mac specific details are contained in header-specific subclasses.
15 """
17 import re
18 import sys
19 import os
20 import fnmatch
21 from types import *
22 try:
23 import MacOS
24 except ImportError:
25 MacOS = None
27 try:
28 from bgenlocations import CREATOR, INCLUDEDIR
29 except ImportError:
30 CREATOR = None
31 INCLUDEDIR = os.curdir
33 Error = "scantools.Error"
35 class Scanner:
37 # Set to 1 in subclass to debug your scanner patterns.
38 debug = 0
40 def __init__(self, input = None, output = None, defsoutput = None):
41 self.initsilent()
42 self.initblacklists()
43 self.initrepairinstructions()
44 self.initpaths()
45 self.initfiles()
46 self.initpatterns()
47 self.compilepatterns()
48 self.initosspecifics()
49 self.initusedtypes()
50 if output:
51 self.setoutput(output, defsoutput)
52 if input:
53 self.setinput(input)
55 def initusedtypes(self):
56 self.usedtypes = {}
58 def typeused(self, type, mode):
59 if not self.usedtypes.has_key(type):
60 self.usedtypes[type] = {}
61 self.usedtypes[type][mode] = None
63 def reportusedtypes(self):
64 types = self.usedtypes.keys()
65 types.sort()
66 for type in types:
67 modes = self.usedtypes[type].keys()
68 modes.sort()
69 self.report("%s %s", type, " ".join(modes))
71 def gentypetest(self, file):
72 fp = open(file, "w")
73 fp.write("types=[\n")
74 types = self.usedtypes.keys()
75 types.sort()
76 for type in types:
77 fp.write("\t'%s',\n"%type)
78 fp.write("]\n")
79 fp.write("""missing=0
80 for t in types:
81 try:
82 tt = eval(t)
83 except NameError:
84 print "** Missing type:", t
85 missing = 1
86 if missing: raise "Missing Types"
87 """)
88 fp.close()
90 def initsilent(self):
91 self.silent = 1
93 def error(self, format, *args):
94 if self.silent >= 0:
95 print format%args
97 def report(self, format, *args):
98 if not self.silent:
99 print format%args
101 def writeinitialdefs(self):
102 pass
104 def initblacklists(self):
105 self.blacklistnames = self.makeblacklistnames()
106 self.blacklisttypes = ["unknown", "-"] + self.makeblacklisttypes()
107 self.greydictnames = self.greylist2dict(self.makegreylist())
109 def greylist2dict(self, list):
110 rv = {}
111 for define, namelist in list:
112 for name in namelist:
113 rv[name] = define
114 return rv
116 def makeblacklistnames(self):
117 return []
119 def makeblacklisttypes(self):
120 return []
122 def makegreylist(self):
123 return []
125 def initrepairinstructions(self):
126 self.repairinstructions = self.makerepairinstructions()
127 self.inherentpointertypes = self.makeinherentpointertypes()
129 def makerepairinstructions(self):
130 """Parse the repair file into repair instructions.
132 The file format is simple:
133 1) use \ to split a long logical line in multiple physical lines
134 2) everything after the first # on a line is ignored (as comment)
135 3) empty lines are ignored
136 4) remaining lines must have exactly 3 colon-separated fields:
137 functionpattern : argumentspattern : argumentsreplacement
138 5) all patterns use shell style pattern matching
139 6) an empty functionpattern means the same as *
140 7) the other two fields are each comma-separated lists of triples
141 8) a triple is a space-separated list of 1-3 words
142 9) a triple with less than 3 words is padded at the end with "*" words
143 10) when used as a pattern, a triple matches the type, name, and mode
144 of an argument, respectively
145 11) when used as a replacement, the words of a triple specify
146 replacements for the corresponding words of the argument,
147 with "*" as a word by itself meaning leave the original word
148 (no other uses of "*" is allowed)
149 12) the replacement need not have the same number of triples
150 as the pattern
152 f = self.openrepairfile()
153 if not f: return []
154 print "Reading repair file", `f.name`, "..."
155 list = []
156 lineno = 0
157 while 1:
158 line = f.readline()
159 if not line: break
160 lineno = lineno + 1
161 startlineno = lineno
162 while line[-2:] == '\\\n':
163 line = line[:-2] + ' ' + f.readline()
164 lineno = lineno + 1
165 i = line.find('#')
166 if i >= 0: line = line[:i]
167 words = [s.strip() for s in line.split(':')]
168 if words == ['']: continue
169 if len(words) <> 3:
170 print "Line", startlineno,
171 print ": bad line (not 3 colon-separated fields)"
172 print `line`
173 continue
174 [fpat, pat, rep] = words
175 if not fpat: fpat = "*"
176 if not pat:
177 print "Line", startlineno,
178 print "Empty pattern"
179 print `line`
180 continue
181 patparts = [s.strip() for s in pat.split(',')]
182 repparts = [s.strip() for s in rep.split(',')]
183 patterns = []
184 for p in patparts:
185 if not p:
186 print "Line", startlineno,
187 print "Empty pattern part"
188 print `line`
189 continue
190 pattern = p.split()
191 if len(pattern) > 3:
192 print "Line", startlineno,
193 print "Pattern part has > 3 words"
194 print `line`
195 pattern = pattern[:3]
196 else:
197 while len(pattern) < 3:
198 pattern.append("*")
199 patterns.append(pattern)
200 replacements = []
201 for p in repparts:
202 if not p:
203 print "Line", startlineno,
204 print "Empty replacement part"
205 print `line`
206 continue
207 replacement = p.split()
208 if len(replacement) > 3:
209 print "Line", startlineno,
210 print "Pattern part has > 3 words"
211 print `line`
212 replacement = replacement[:3]
213 else:
214 while len(replacement) < 3:
215 replacement.append("*")
216 replacements.append(replacement)
217 list.append((fpat, patterns, replacements))
218 return list
220 def makeinherentpointertypes(self):
221 return []
223 def openrepairfile(self, filename = "REPAIR"):
224 try:
225 return open(filename, "rU")
226 except IOError, msg:
227 print `filename`, ":", msg
228 print "Cannot open repair file -- assume no repair needed"
229 return None
231 def initfiles(self):
232 self.specmine = 0
233 self.defsmine = 0
234 self.scanmine = 0
235 self.specfile = sys.stdout
236 self.defsfile = None
237 self.scanfile = sys.stdin
238 self.lineno = 0
239 self.line = ""
241 def initpaths(self):
242 self.includepath = [os.curdir, INCLUDEDIR]
244 def initpatterns(self):
245 self.head_pat = r"^EXTERN_API[^_]"
246 self.tail_pat = r"[;={}]"
247 self.type_pat = r"EXTERN_API" + \
248 r"[ \t\n]*\([ \t\n]*" + \
249 r"(?P<type>[a-zA-Z0-9_* \t]*[a-zA-Z0-9_*])" + \
250 r"[ \t\n]*\)[ \t\n]*"
251 self.name_pat = r"(?P<name>[a-zA-Z0-9_]+)[ \t\n]*"
252 self.args_pat = r"\((?P<args>([^\(;=\)]+|\([^\(;=\)]*\))*)\)"
253 self.whole_pat = self.type_pat + self.name_pat + self.args_pat
254 self.sym_pat = r"^[ \t]*(?P<name>[a-zA-Z0-9_]+)[ \t]*=" + \
255 r"[ \t]*(?P<defn>[-0-9_a-zA-Z'\"\(][^\t\n,;}]*),?"
256 self.asplit_pat = r"^(?P<type>.*[^a-zA-Z0-9_])(?P<name>[a-zA-Z0-9_]+)(?P<array>\[\])?$"
257 self.comment1_pat = r"(?P<rest>.*)//.*"
258 # note that the next pattern only removes comments that are wholly within one line
259 self.comment2_pat = r"(?P<rest1>.*)/\*.*\*/(?P<rest2>.*)"
261 def compilepatterns(self):
262 for name in dir(self):
263 if name[-4:] == "_pat":
264 pat = getattr(self, name)
265 prog = re.compile(pat)
266 setattr(self, name[:-4], prog)
268 def initosspecifics(self):
269 if MacOS and CREATOR:
270 self.filetype = 'TEXT'
271 self.filecreator = CREATOR
272 else:
273 self.filetype = self.filecreator = None
275 def setfiletype(self, filename):
276 if MacOS and (self.filecreator or self.filetype):
277 creator, type = MacOS.GetCreatorAndType(filename)
278 if self.filecreator: creator = self.filecreator
279 if self.filetype: type = self.filetype
280 MacOS.SetCreatorAndType(filename, creator, type)
282 def close(self):
283 self.closefiles()
285 def closefiles(self):
286 self.closespec()
287 self.closedefs()
288 self.closescan()
290 def closespec(self):
291 tmp = self.specmine and self.specfile
292 self.specfile = None
293 if tmp: tmp.close()
295 def closedefs(self):
296 tmp = self.defsmine and self.defsfile
297 self.defsfile = None
298 if tmp: tmp.close()
300 def closescan(self):
301 tmp = self.scanmine and self.scanfile
302 self.scanfile = None
303 if tmp: tmp.close()
305 def setoutput(self, spec, defs = None):
306 self.closespec()
307 self.closedefs()
308 if spec:
309 if type(spec) == StringType:
310 file = self.openoutput(spec)
311 mine = 1
312 else:
313 file = spec
314 mine = 0
315 self.specfile = file
316 self.specmine = mine
317 if defs:
318 if type(defs) == StringType:
319 file = self.openoutput(defs)
320 mine = 1
321 else:
322 file = defs
323 mine = 0
324 self.defsfile = file
325 self.defsmine = mine
327 def openoutput(self, filename):
328 try:
329 file = open(filename, 'w')
330 except IOError, arg:
331 raise IOError, (filename, arg)
332 self.setfiletype(filename)
333 return file
335 def setinput(self, scan = sys.stdin):
336 if not type(scan) in (TupleType, ListType):
337 scan = [scan]
338 self.allscaninputs = scan
339 self._nextinput()
341 def _nextinput(self):
342 if not self.allscaninputs:
343 return 0
344 scan = self.allscaninputs[0]
345 self.allscaninputs = self.allscaninputs[1:]
346 self.closescan()
347 if scan:
348 if type(scan) == StringType:
349 file = self.openinput(scan)
350 mine = 1
351 else:
352 file = scan
353 mine = 0
354 self.scanfile = file
355 self.scanmine = mine
356 self.lineno = 0
357 return 1
359 def openinput(self, filename):
360 if not os.path.isabs(filename):
361 for dir in self.includepath:
362 fullname = os.path.join(dir, filename)
363 #self.report("trying full name %s", `fullname`)
364 try:
365 return open(fullname, 'rU')
366 except IOError:
367 pass
368 # If not on the path, or absolute, try default open()
369 try:
370 return open(filename, 'rU')
371 except IOError, arg:
372 raise IOError, (arg, filename)
374 def getline(self):
375 if not self.scanfile:
376 raise Error, "input file not set"
377 self.line = self.scanfile.readline()
378 if not self.line:
379 if self._nextinput():
380 return self.getline()
381 raise EOFError
382 self.lineno = self.lineno + 1
383 return self.line
385 def scan(self):
386 if not self.scanfile:
387 self.error("No input file has been specified")
388 return
389 inputname = self.scanfile.name
390 self.report("scanfile = %s", `inputname`)
391 if not self.specfile:
392 self.report("(No interface specifications will be written)")
393 else:
394 self.report("specfile = %s", `self.specfile.name`)
395 self.specfile.write("# Generated from %s\n\n" % `inputname`)
396 if not self.defsfile:
397 self.report("(No symbol definitions will be written)")
398 else:
399 self.report("defsfile = %s", `self.defsfile.name`)
400 self.defsfile.write("# Generated from %s\n\n" % `os.path.split(inputname)[1]`)
401 self.writeinitialdefs()
402 self.alreadydone = []
403 try:
404 while 1:
405 try: line = self.getline()
406 except EOFError: break
407 if self.debug:
408 self.report("LINE: %s" % `line`)
409 match = self.comment1.match(line)
410 if match:
411 line = match.group('rest')
412 if self.debug:
413 self.report("\tafter comment1: %s" % `line`)
414 match = self.comment2.match(line)
415 while match:
416 line = match.group('rest1')+match.group('rest2')
417 if self.debug:
418 self.report("\tafter comment2: %s" % `line`)
419 match = self.comment2.match(line)
420 if self.defsfile:
421 match = self.sym.match(line)
422 if match:
423 if self.debug:
424 self.report("\tmatches sym.")
425 self.dosymdef(match)
426 continue
427 match = self.head.match(line)
428 if match:
429 if self.debug:
430 self.report("\tmatches head.")
431 self.dofuncspec()
432 continue
433 except EOFError:
434 self.error("Uncaught EOF error")
435 self.reportusedtypes()
437 def dosymdef(self, match):
438 name, defn = match.group('name', 'defn')
439 defn = escape8bit(defn)
440 if self.debug:
441 self.report("\tsym: name=%s, defn=%s" % (`name`, `defn`))
442 if not name in self.blacklistnames:
443 self.defsfile.write("%s = %s\n" % (name, defn))
444 else:
445 self.defsfile.write("# %s = %s\n" % (name, defn))
446 # XXXX No way to handle greylisted names
448 def dofuncspec(self):
449 raw = self.line
450 while not self.tail.search(raw):
451 line = self.getline()
452 if self.debug:
453 self.report("* CONTINUATION LINE: %s" % `line`)
454 match = self.comment1.match(line)
455 if match:
456 line = match.group('rest')
457 if self.debug:
458 self.report("\tafter comment1: %s" % `line`)
459 match = self.comment2.match(line)
460 while match:
461 line = match.group('rest1')+match.group('rest2')
462 if self.debug:
463 self.report("\tafter comment1: %s" % `line`)
464 match = self.comment2.match(line)
465 raw = raw + line
466 if self.debug:
467 self.report("* WHOLE LINE: %s" % `raw`)
468 self.processrawspec(raw)
470 def processrawspec(self, raw):
471 match = self.whole.search(raw)
472 if not match:
473 self.report("Bad raw spec: %s", `raw`)
474 if self.debug:
475 if not self.type.search(raw):
476 self.report("(Type already doesn't match)")
477 else:
478 self.report("(but type matched)")
479 return
480 type, name, args = match.group('type', 'name', 'args')
481 type = re.sub("\*", " ptr", type)
482 type = re.sub("[ \t]+", "_", type)
483 if name in self.alreadydone:
484 self.report("Name has already been defined: %s", `name`)
485 return
486 self.report("==> %s %s <==", type, name)
487 if self.blacklisted(type, name):
488 self.report("*** %s %s blacklisted", type, name)
489 return
490 returnlist = [(type, name, 'ReturnMode')]
491 returnlist = self.repairarglist(name, returnlist)
492 [(type, name, returnmode)] = returnlist
493 arglist = self.extractarglist(args)
494 arglist = self.repairarglist(name, arglist)
495 if self.unmanageable(type, name, arglist):
496 ##for arg in arglist:
497 ## self.report(" %s", `arg`)
498 self.report("*** %s %s unmanageable", type, name)
499 return
500 self.alreadydone.append(name)
501 self.generate(type, name, arglist)
503 def extractarglist(self, args):
504 args = args.strip()
505 if not args or args == "void":
506 return []
507 parts = [s.strip() for s in args.split(",")]
508 arglist = []
509 for part in parts:
510 arg = self.extractarg(part)
511 arglist.append(arg)
512 return arglist
514 def extractarg(self, part):
515 mode = "InMode"
516 part = part.strip()
517 match = self.asplit.match(part)
518 if not match:
519 self.error("Indecipherable argument: %s", `part`)
520 return ("unknown", part, mode)
521 type, name, array = match.group('type', 'name', 'array')
522 if array:
523 # array matches an optional [] after the argument name
524 type = type + " ptr "
525 type = re.sub("\*", " ptr ", type)
526 type = type.strip()
527 type = re.sub("[ \t]+", "_", type)
528 return self.modifyarg(type, name, mode)
530 def modifyarg(self, type, name, mode):
531 if type[:6] == "const_":
532 type = type[6:]
533 elif type[-4:] == "_ptr":
534 type = type[:-4]
535 mode = "OutMode"
536 elif type in self.inherentpointertypes:
537 mode = "OutMode"
538 if type[-4:] == "_far":
539 type = type[:-4]
540 return type, name, mode
542 def repairarglist(self, functionname, arglist):
543 arglist = arglist[:]
544 i = 0
545 while i < len(arglist):
546 for item in self.repairinstructions:
547 if len(item) == 2:
548 pattern, replacement = item
549 functionpat = "*"
550 else:
551 functionpat, pattern, replacement = item
552 if not fnmatch.fnmatchcase(functionname, functionpat):
553 continue
554 n = len(pattern)
555 if i+n > len(arglist): continue
556 current = arglist[i:i+n]
557 for j in range(n):
558 if not self.matcharg(pattern[j], current[j]):
559 break
560 else: # All items of the pattern match
561 new = self.substituteargs(
562 pattern, replacement, current)
563 if new is not None:
564 arglist[i:i+n] = new
565 i = i+len(new) # No recursive substitutions
566 break
567 else: # No patterns match
568 i = i+1
569 return arglist
571 def matcharg(self, patarg, arg):
572 return len(filter(None, map(fnmatch.fnmatchcase, arg, patarg))) == 3
574 def substituteargs(self, pattern, replacement, old):
575 new = []
576 for k in range(len(replacement)):
577 item = replacement[k]
578 newitem = [item[0], item[1], item[2]]
579 for i in range(3):
580 if item[i] == '*':
581 newitem[i] = old[k][i]
582 elif item[i][:1] == '$':
583 index = int(item[i][1:]) - 1
584 newitem[i] = old[index][i]
585 new.append(tuple(newitem))
586 ##self.report("old: %s", `old`)
587 ##self.report("new: %s", `new`)
588 return new
590 def generate(self, type, name, arglist):
591 self.typeused(type, 'return')
592 classname, listname = self.destination(type, name, arglist)
593 if not self.specfile: return
594 self.specfile.write("f = %s(%s, %s,\n" % (classname, type, `name`))
595 for atype, aname, amode in arglist:
596 self.typeused(atype, amode)
597 self.specfile.write(" (%s, %s, %s),\n" %
598 (atype, `aname`, amode))
599 if self.greydictnames.has_key(name):
600 self.specfile.write(" condition=%s,\n"%`self.greydictnames[name]`)
601 self.specfile.write(")\n")
602 self.specfile.write("%s.append(f)\n\n" % listname)
604 def destination(self, type, name, arglist):
605 return "FunctionGenerator", "functions"
607 def blacklisted(self, type, name):
608 if type in self.blacklisttypes:
609 ##self.report("return type %s is blacklisted", type)
610 return 1
611 if name in self.blacklistnames:
612 ##self.report("function name %s is blacklisted", name)
613 return 1
614 return 0
616 def unmanageable(self, type, name, arglist):
617 for atype, aname, amode in arglist:
618 if atype in self.blacklisttypes:
619 self.report("argument type %s is blacklisted", atype)
620 return 1
621 return 0
623 class Scanner_PreUH3(Scanner):
624 """Scanner for Universal Headers before release 3"""
625 def initpatterns(self):
626 Scanner.initpatterns(self)
627 self.head_pat = "^extern pascal[ \t]+" # XXX Mac specific!
628 self.type_pat = "pascal[ \t\n]+(?P<type>[a-zA-Z0-9_ \t]*[a-zA-Z0-9_])[ \t\n]+"
629 self.whole_pat = self.type_pat + self.name_pat + self.args_pat
630 self.sym_pat = "^[ \t]*(?P<name>[a-zA-Z0-9_]+)[ \t]*=" + \
631 "[ \t]*(?P<defn>[-0-9'\"][^\t\n,;}]*),?"
633 class Scanner_OSX(Scanner):
634 """Scanner for modern (post UH3.3) Universal Headers """
635 def initpatterns(self):
636 Scanner.initpatterns(self)
637 self.head_pat = "^EXTERN_API(_C)?"
638 self.type_pat = "EXTERN_API(_C)?" + \
639 "[ \t\n]*\([ \t\n]*" + \
640 "(?P<type>[a-zA-Z0-9_* \t]*[a-zA-Z0-9_*])" + \
641 "[ \t\n]*\)[ \t\n]*"
642 self.whole_pat = self.type_pat + self.name_pat + self.args_pat
643 self.sym_pat = "^[ \t]*(?P<name>[a-zA-Z0-9_]+)[ \t]*=" + \
644 "[ \t]*(?P<defn>[-0-9_a-zA-Z'\"\(][^\t\n,;}]*),?"
646 _8bit = re.compile(r"[\200-\377]")
648 def escape8bit(s):
649 if _8bit.search(s) is not None:
650 out = []
651 for c in s:
652 o = ord(c)
653 if o >= 128:
654 out.append("\\" + hex(o)[1:])
655 else:
656 out.append(c)
657 s = "".join(out)
658 return s
660 def test():
661 input = "D:Development:THINK C:Mac #includes:Apple #includes:AppleEvents.h"
662 output = "@aespecs.py"
663 defsoutput = "@aedefs.py"
664 s = Scanner(input, output, defsoutput)
665 s.scan()
667 if __name__ == '__main__':
668 test()