3 Tools for scanning header files in search of function prototypes.
5 Often, the function prototypes in header files contain enough information
6 to automatically generate (or reverse-engineer) interface specifications
7 from them. The conventions used are very vendor specific, but once you've
8 figured out what they are they are often a great help, and it sure beats
9 manually entering the interface specifications. (These are needed to generate
10 the glue used to access the functions from Python.)
12 In order to make this class useful, almost every component can be overridden.
13 The defaults are (currently) tuned to scanning Apple Macintosh header files,
14 although most Mac specific details are contained in header-specific subclasses.
28 from bgenlocations
import CREATOR
, INCLUDEDIR
31 INCLUDEDIR
= os
.curdir
33 Error
= "scantools.Error"
37 # Set to 1 in subclass to debug your scanner patterns.
40 def __init__(self
, input = None, output
= None, defsoutput
= None):
43 self
.initrepairinstructions()
47 self
.compilepatterns()
48 self
.initosspecifics()
51 self
.setoutput(output
, defsoutput
)
55 def initusedtypes(self
):
58 def typeused(self
, type, mode
):
59 if not self
.usedtypes
.has_key(type):
60 self
.usedtypes
[type] = {}
61 self
.usedtypes
[type][mode
] = None
63 def reportusedtypes(self
):
64 types
= self
.usedtypes
.keys()
67 modes
= self
.usedtypes
[type].keys()
69 self
.report("%s %s", type, " ".join(modes
))
71 def gentypetest(self
, file):
74 types
= self
.usedtypes
.keys()
77 fp
.write("\t'%s',\n"%type)
84 print "** Missing type:", t
86 if missing: raise "Missing Types"
93 def error(self
, format
, *args
):
97 def report(self
, format
, *args
):
101 def writeinitialdefs(self
):
104 def initblacklists(self
):
105 self
.blacklistnames
= self
.makeblacklistnames()
106 self
.blacklisttypes
= ["unknown", "-"] + self
.makeblacklisttypes()
107 self
.greydictnames
= self
.greylist2dict(self
.makegreylist())
109 def greylist2dict(self
, list):
111 for define
, namelist
in list:
112 for name
in namelist
:
116 def makeblacklistnames(self
):
119 def makeblacklisttypes(self
):
122 def makegreylist(self
):
125 def initrepairinstructions(self
):
126 self
.repairinstructions
= self
.makerepairinstructions()
127 self
.inherentpointertypes
= self
.makeinherentpointertypes()
129 def makerepairinstructions(self
):
130 """Parse the repair file into repair instructions.
132 The file format is simple:
133 1) use \ to split a long logical line in multiple physical lines
134 2) everything after the first # on a line is ignored (as comment)
135 3) empty lines are ignored
136 4) remaining lines must have exactly 3 colon-separated fields:
137 functionpattern : argumentspattern : argumentsreplacement
138 5) all patterns use shell style pattern matching
139 6) an empty functionpattern means the same as *
140 7) the other two fields are each comma-separated lists of triples
141 8) a triple is a space-separated list of 1-3 words
142 9) a triple with less than 3 words is padded at the end with "*" words
143 10) when used as a pattern, a triple matches the type, name, and mode
144 of an argument, respectively
145 11) when used as a replacement, the words of a triple specify
146 replacements for the corresponding words of the argument,
147 with "*" as a word by itself meaning leave the original word
148 (no other uses of "*" is allowed)
149 12) the replacement need not have the same number of triples
152 f
= self
.openrepairfile()
154 print "Reading repair file", `f
.name`
, "..."
162 while line
[-2:] == '\\\n':
163 line
= line
[:-2] + ' ' + f
.readline()
166 if i
>= 0: line
= line
[:i
]
167 words
= [s
.strip() for s
in line
.split(':')]
168 if words
== ['']: continue
170 print "Line", startlineno
,
171 print ": bad line (not 3 colon-separated fields)"
174 [fpat
, pat
, rep
] = words
175 if not fpat
: fpat
= "*"
177 print "Line", startlineno
,
178 print "Empty pattern"
181 patparts
= [s
.strip() for s
in pat
.split(',')]
182 repparts
= [s
.strip() for s
in rep
.split(',')]
186 print "Line", startlineno
,
187 print "Empty pattern part"
192 print "Line", startlineno
,
193 print "Pattern part has > 3 words"
195 pattern
= pattern
[:3]
197 while len(pattern
) < 3:
199 patterns
.append(pattern
)
203 print "Line", startlineno
,
204 print "Empty replacement part"
207 replacement
= p
.split()
208 if len(replacement
) > 3:
209 print "Line", startlineno
,
210 print "Pattern part has > 3 words"
212 replacement
= replacement
[:3]
214 while len(replacement
) < 3:
215 replacement
.append("*")
216 replacements
.append(replacement
)
217 list.append((fpat
, patterns
, replacements
))
220 def makeinherentpointertypes(self
):
223 def openrepairfile(self
, filename
= "REPAIR"):
225 return open(filename
, "rU")
227 print `filename`
, ":", msg
228 print "Cannot open repair file -- assume no repair needed"
235 self
.specfile
= sys
.stdout
237 self
.scanfile
= sys
.stdin
242 self
.includepath
= [os
.curdir
, INCLUDEDIR
]
244 def initpatterns(self
):
245 self
.head_pat
= r
"^EXTERN_API[^_]"
246 self
.tail_pat
= r
"[;={}]"
247 self
.type_pat
= r
"EXTERN_API" + \
248 r
"[ \t\n]*\([ \t\n]*" + \
249 r
"(?P<type>[a-zA-Z0-9_* \t]*[a-zA-Z0-9_*])" + \
250 r
"[ \t\n]*\)[ \t\n]*"
251 self
.name_pat
= r
"(?P<name>[a-zA-Z0-9_]+)[ \t\n]*"
252 self
.args_pat
= r
"\((?P<args>([^\(;=\)]+|\([^\(;=\)]*\))*)\)"
253 self
.whole_pat
= self
.type_pat
+ self
.name_pat
+ self
.args_pat
254 self
.sym_pat
= r
"^[ \t]*(?P<name>[a-zA-Z0-9_]+)[ \t]*=" + \
255 r
"[ \t]*(?P<defn>[-0-9_a-zA-Z'\"\
(][^
\t\n,;}]*),?
"
256 self.asplit_pat = r"^
(?P
<type>.*[^a
-zA
-Z0
-9_])(?P
<name
>[a
-zA
-Z0
-9_]+)(?P
<array
>\
[\
])?$
"
257 self.comment1_pat = r"(?P
<rest
>.*)//.*"
258 # note that the next pattern only removes comments that are wholly within one line
259 self.comment2_pat = r"(?P
<rest1
>.*)/\
*.*\
*/(?P
<rest2
>.*)"
261 def compilepatterns(self):
262 for name in dir(self):
263 if name[-4:] == "_pat
":
264 pat = getattr(self, name)
265 prog = re.compile(pat)
266 setattr(self, name[:-4], prog)
268 def initosspecifics(self):
269 if MacOS and CREATOR:
270 self.filetype = 'TEXT'
271 self.filecreator = CREATOR
273 self.filetype = self.filecreator = None
275 def setfiletype(self, filename):
276 if MacOS and (self.filecreator or self.filetype):
277 creator, type = MacOS.GetCreatorAndType(filename)
278 if self.filecreator: creator = self.filecreator
279 if self.filetype: type = self.filetype
280 MacOS.SetCreatorAndType(filename, creator, type)
285 def closefiles(self):
291 tmp = self.specmine and self.specfile
296 tmp = self.defsmine and self.defsfile
301 tmp = self.scanmine and self.scanfile
305 def setoutput(self, spec, defs = None):
309 if type(spec) == StringType:
310 file = self.openoutput(spec)
318 if type(defs) == StringType:
319 file = self.openoutput(defs)
327 def openoutput(self, filename):
329 file = open(filename, 'w')
331 raise IOError, (filename, arg)
332 self.setfiletype(filename)
335 def setinput(self, scan = sys.stdin):
336 if not type(scan) in (TupleType, ListType):
338 self.allscaninputs = scan
341 def _nextinput(self):
342 if not self.allscaninputs:
344 scan = self.allscaninputs[0]
345 self.allscaninputs = self.allscaninputs[1:]
348 if type(scan) == StringType:
349 file = self.openinput(scan)
359 def openinput(self, filename):
360 if not os.path.isabs(filename):
361 for dir in self.includepath:
362 fullname = os.path.join(dir, filename)
363 #self.report("trying full name
%s", `fullname`)
365 return open(fullname, 'rU')
368 # If not on the path, or absolute, try default open()
370 return open(filename, 'rU')
372 raise IOError, (arg, filename)
375 if not self.scanfile:
376 raise Error, "input file not set"
377 self.line = self.scanfile.readline()
379 if self._nextinput():
380 return self.getline()
382 self.lineno = self.lineno + 1
386 if not self.scanfile:
387 self.error("No
input file has been specified
")
389 inputname = self.scanfile.name
390 self.report("scanfile
= %s", `inputname`)
391 if not self.specfile:
392 self.report("(No interface specifications will be written
)")
394 self.report("specfile
= %s", `self.specfile.name`)
395 self.specfile.write("# Generated from %s\n\n" % `inputname`)
396 if not self
.defsfile
:
397 self
.report("(No symbol definitions will be written)")
399 self
.report("defsfile = %s", `self
.defsfile
.name`
)
400 self
.defsfile
.write("# Generated from %s\n\n" % `os
.path
.split(inputname
)[1]`
)
401 self
.writeinitialdefs()
402 self
.alreadydone
= []
405 try: line
= self
.getline()
406 except EOFError: break
408 self
.report("LINE: %s" % `line`
)
409 match
= self
.comment1
.match(line
)
411 line
= match
.group('rest')
413 self
.report("\tafter comment1: %s" % `line`
)
414 match
= self
.comment2
.match(line
)
416 line
= match
.group('rest1')+match
.group('rest2')
418 self
.report("\tafter comment2: %s" % `line`
)
419 match
= self
.comment2
.match(line
)
421 match
= self
.sym
.match(line
)
424 self
.report("\tmatches sym.")
427 match
= self
.head
.match(line
)
430 self
.report("\tmatches head.")
434 self
.error("Uncaught EOF error")
435 self
.reportusedtypes()
437 def dosymdef(self
, match
):
438 name
, defn
= match
.group('name', 'defn')
439 defn
= escape8bit(defn
)
441 self
.report("\tsym: name=%s, defn=%s" % (`name`
, `defn`
))
442 if not name
in self
.blacklistnames
:
443 self
.defsfile
.write("%s = %s\n" % (name
, defn
))
445 self
.defsfile
.write("# %s = %s\n" % (name
, defn
))
446 # XXXX No way to handle greylisted names
448 def dofuncspec(self
):
450 while not self
.tail
.search(raw
):
451 line
= self
.getline()
453 self
.report("* CONTINUATION LINE: %s" % `line`
)
454 match
= self
.comment1
.match(line
)
456 line
= match
.group('rest')
458 self
.report("\tafter comment1: %s" % `line`
)
459 match
= self
.comment2
.match(line
)
461 line
= match
.group('rest1')+match
.group('rest2')
463 self
.report("\tafter comment1: %s" % `line`
)
464 match
= self
.comment2
.match(line
)
467 self
.report("* WHOLE LINE: %s" % `raw`
)
468 self
.processrawspec(raw
)
470 def processrawspec(self
, raw
):
471 match
= self
.whole
.search(raw
)
473 self
.report("Bad raw spec: %s", `raw`
)
475 if not self
.type.search(raw
):
476 self
.report("(Type already doesn't match)")
478 self
.report("(but type matched)")
480 type, name
, args
= match
.group('type', 'name', 'args')
481 type = re
.sub("\*", " ptr", type)
482 type = re
.sub("[ \t]+", "_", type)
483 if name
in self
.alreadydone
:
484 self
.report("Name has already been defined: %s", `name`
)
486 self
.report("==> %s %s <==", type, name
)
487 if self
.blacklisted(type, name
):
488 self
.report("*** %s %s blacklisted", type, name
)
490 returnlist
= [(type, name
, 'ReturnMode')]
491 returnlist
= self
.repairarglist(name
, returnlist
)
492 [(type, name
, returnmode
)] = returnlist
493 arglist
= self
.extractarglist(args
)
494 arglist
= self
.repairarglist(name
, arglist
)
495 if self
.unmanageable(type, name
, arglist
):
496 ##for arg in arglist:
497 ## self.report(" %s", `arg`)
498 self
.report("*** %s %s unmanageable", type, name
)
500 self
.alreadydone
.append(name
)
501 self
.generate(type, name
, arglist
)
503 def extractarglist(self
, args
):
505 if not args
or args
== "void":
507 parts
= [s
.strip() for s
in args
.split(",")]
510 arg
= self
.extractarg(part
)
514 def extractarg(self
, part
):
517 match
= self
.asplit
.match(part
)
519 self
.error("Indecipherable argument: %s", `part`
)
520 return ("unknown", part
, mode
)
521 type, name
, array
= match
.group('type', 'name', 'array')
523 # array matches an optional [] after the argument name
524 type = type + " ptr "
525 type = re
.sub("\*", " ptr ", type)
527 type = re
.sub("[ \t]+", "_", type)
528 return self
.modifyarg(type, name
, mode
)
530 def modifyarg(self
, type, name
, mode
):
531 if type[:6] == "const_":
533 elif type[-4:] == "_ptr":
536 elif type in self
.inherentpointertypes
:
538 if type[-4:] == "_far":
540 return type, name
, mode
542 def repairarglist(self
, functionname
, arglist
):
545 while i
< len(arglist
):
546 for item
in self
.repairinstructions
:
548 pattern
, replacement
= item
551 functionpat
, pattern
, replacement
= item
552 if not fnmatch
.fnmatchcase(functionname
, functionpat
):
555 if i
+n
> len(arglist
): continue
556 current
= arglist
[i
:i
+n
]
558 if not self
.matcharg(pattern
[j
], current
[j
]):
560 else: # All items of the pattern match
561 new
= self
.substituteargs(
562 pattern
, replacement
, current
)
565 i
= i
+len(new
) # No recursive substitutions
567 else: # No patterns match
571 def matcharg(self
, patarg
, arg
):
572 return len(filter(None, map(fnmatch
.fnmatchcase
, arg
, patarg
))) == 3
574 def substituteargs(self
, pattern
, replacement
, old
):
576 for k
in range(len(replacement
)):
577 item
= replacement
[k
]
578 newitem
= [item
[0], item
[1], item
[2]]
581 newitem
[i
] = old
[k
][i
]
582 elif item
[i
][:1] == '$':
583 index
= int(item
[i
][1:]) - 1
584 newitem
[i
] = old
[index
][i
]
585 new
.append(tuple(newitem
))
586 ##self.report("old: %s", `old`)
587 ##self.report("new: %s", `new`)
590 def generate(self
, type, name
, arglist
):
591 self
.typeused(type, 'return')
592 classname
, listname
= self
.destination(type, name
, arglist
)
593 if not self
.specfile
: return
594 self
.specfile
.write("f = %s(%s, %s,\n" % (classname
, type, `name`
))
595 for atype
, aname
, amode
in arglist
:
596 self
.typeused(atype
, amode
)
597 self
.specfile
.write(" (%s, %s, %s),\n" %
598 (atype
, `aname`
, amode
))
599 if self
.greydictnames
.has_key(name
):
600 self
.specfile
.write(" condition=%s,\n"%`self
.greydictnames
[name
]`
)
601 self
.specfile
.write(")\n")
602 self
.specfile
.write("%s.append(f)\n\n" % listname
)
604 def destination(self
, type, name
, arglist
):
605 return "FunctionGenerator", "functions"
607 def blacklisted(self
, type, name
):
608 if type in self
.blacklisttypes
:
609 ##self.report("return type %s is blacklisted", type)
611 if name
in self
.blacklistnames
:
612 ##self.report("function name %s is blacklisted", name)
616 def unmanageable(self
, type, name
, arglist
):
617 for atype
, aname
, amode
in arglist
:
618 if atype
in self
.blacklisttypes
:
619 self
.report("argument type %s is blacklisted", atype
)
623 class Scanner_PreUH3(Scanner
):
624 """Scanner for Universal Headers before release 3"""
625 def initpatterns(self
):
626 Scanner
.initpatterns(self
)
627 self
.head_pat
= "^extern pascal[ \t]+" # XXX Mac specific!
628 self
.type_pat
= "pascal[ \t\n]+(?P<type>[a-zA-Z0-9_ \t]*[a-zA-Z0-9_])[ \t\n]+"
629 self
.whole_pat
= self
.type_pat
+ self
.name_pat
+ self
.args_pat
630 self
.sym_pat
= "^[ \t]*(?P<name>[a-zA-Z0-9_]+)[ \t]*=" + \
631 "[ \t]*(?P<defn>[-0-9'\"][^\t\n,;}]*),?"
633 class Scanner_OSX(Scanner
):
634 """Scanner for modern (post UH3.3) Universal Headers """
635 def initpatterns(self
):
636 Scanner
.initpatterns(self
)
637 self
.head_pat
= "^EXTERN_API(_C)?"
638 self
.type_pat
= "EXTERN_API(_C)?" + \
639 "[ \t\n]*\([ \t\n]*" + \
640 "(?P<type>[a-zA-Z0-9_* \t]*[a-zA-Z0-9_*])" + \
642 self
.whole_pat
= self
.type_pat
+ self
.name_pat
+ self
.args_pat
643 self
.sym_pat
= "^[ \t]*(?P<name>[a-zA-Z0-9_]+)[ \t]*=" + \
644 "[ \t]*(?P<defn>[-0-9_a-zA-Z'\"\(][^\t\n,;}]*),?"
646 _8bit
= re
.compile(r
"[\200-\377]")
649 if _8bit
.search(s
) is not None:
654 out
.append("\\" + hex(o
)[1:])
661 input = "D:Development:THINK C:Mac #includes:Apple #includes:AppleEvents.h"
662 output
= "@aespecs.py"
663 defsoutput
= "@aedefs.py"
664 s
= Scanner(input, output
, defsoutput
)
667 if __name__
== '__main__':