1 """Parse a Python module and describe its classes and methods.
3 Parse enough of a Python file to recognize imports and class and
4 method definitions, and to find out the superclasses of a class.
6 The interface consists of a single function:
7 readmodule_ex(module [, path])
8 where module is the name of a Python module, and path is an optional
9 list of directories where the module is to be searched. If present,
10 path is prepended to the system search path sys.path. The return
11 value is a dictionary. The keys of the dictionary are the names of
12 the classes defined in the module (including classes that are defined
13 via the from XXX import YYY construct). The values are class
14 instances of the class Class defined here. One special key/value pair
15 is present for packages: the key '__path__' has a list as its value
16 which contains the package search path.
18 A class is described by the class Class in this module. Instances
19 of this class have the following instance variables:
20 module -- the module name
21 name -- the name of the class
22 super -- a list of super classes (Class instances)
23 methods -- a dictionary of methods
24 file -- the file in which the class was defined
25 lineno -- the line in the file on which the class statement occurred
26 The dictionary of methods uses the method names as keys and the line
27 numbers on which the method was defined as values.
28 If the name of a super class is not recognized, the corresponding
29 entry in the list of super classes is not a class instance but a
30 string giving the name of the super class. Since import statements
31 are recognized and imported modules are scanned as well, this
32 shouldn't happen often.
34 A function is described by the class Function in this module.
35 Instances of this class have the following instance variables:
36 module -- the module name
37 name -- the name of the class
38 file -- the file in which the class was defined
39 lineno -- the line in the file on which the class statement occurred
44 import tokenize
# Python tokenizer
45 from token
import NAME
, DEDENT
, NEWLINE
47 __all__
= ["readmodule", "readmodule_ex", "Class", "Function"]
49 _modules
= {} # cache of modules we've seen
51 # each Python class is represented by an instance of this class
53 '''Class to represent a Python class.'''
54 def __init__(self
, module
, name
, super, file, lineno
):
64 def _addmethod(self
, name
, lineno
):
65 self
.methods
[name
] = lineno
68 '''Class to represent a top-level Python function'''
69 def __init__(self
, module
, name
, file, lineno
):
75 def readmodule(module
, path
=[]):
76 '''Backwards compatible interface.
78 Call readmodule_ex() and then only keep Class objects from the
79 resulting dictionary.'''
81 dict = _readmodule(module
, path
)
83 for key
, value
in dict.items():
84 if isinstance(value
, Class
):
88 def readmodule_ex(module
, path
=[]):
89 '''Read a module file and return a dictionary of classes.
91 Search for MODULE in PATH and sys.path, read and parse the
92 module and return a dictionary with one entry for each class
95 If INPACKAGE is true, it must be the dotted name of the package in
96 which we are searching for a submodule, and then PATH must be the
97 package search path; otherwise, we are searching for a top-level
98 module, and PATH is combined with sys.path.
100 return _readmodule(module
, path
)
102 def _readmodule(module
, path
, inpackage
=None):
103 '''Do the hard work for readmodule[_ex].'''
104 # Compute the full module name (prepending inpackage if set)
106 fullmodule
= "%s.%s" % (inpackage
, module
)
111 if fullmodule
in _modules
:
112 return _modules
[fullmodule
]
114 # Initialize the dict for this module's contents
117 # Check if it is a built-in module; we don't do much for these
118 if module
in sys
.builtin_module_names
and not inpackage
:
119 _modules
[module
] = dict
122 # Check for a dotted module name
123 i
= module
.rfind('.')
126 submodule
= module
[i
+1:]
127 parent
= _readmodule(package
, path
, inpackage
)
129 package
= "%s.%s" % (inpackage
, package
)
130 return _readmodule(submodule
, parent
['__path__'], package
)
132 # Search the path for the module
135 f
, file, (suff
, mode
, type) = imp
.find_module(module
, path
)
137 f
, file, (suff
, mode
, type) = imp
.find_module(module
, path
+ sys
.path
)
138 if type == imp
.PKG_DIRECTORY
:
139 dict['__path__'] = [file]
141 f
, file, (suff
, mode
, type) = imp
.find_module('__init__', [file])
142 _modules
[fullmodule
] = dict
143 if type != imp
.PY_SOURCE
:
144 # not Python source, can't do anything with this module
148 stack
= [] # stack of (class, indent) pairs
150 g
= tokenize
.generate_tokens(f
.readline
)
152 for tokentype
, token
, start
, end
, line
in g
:
153 if tokentype
== DEDENT
:
154 lineno
, thisindent
= start
155 # close nested classes and defs
156 while stack
and stack
[-1][1] >= thisindent
:
159 lineno
, thisindent
= start
160 # close previous nested classes and defs
161 while stack
and stack
[-1][1] >= thisindent
:
163 tokentype
, meth_name
, start
, end
, line
= g
.next()
164 if tokentype
!= NAME
:
165 continue # Syntax error
167 cur_class
= stack
[-1][0]
168 if isinstance(cur_class
, Class
):
170 cur_class
._addmethod
(meth_name
, lineno
)
171 # else it's a nested def
174 dict[meth_name
] = Function(module
, meth_name
, file, lineno
)
175 stack
.append((None, thisindent
)) # Marker for nested fns
176 elif token
== 'class':
177 lineno
, thisindent
= start
178 # close previous nested classes and defs
179 while stack
and stack
[-1][1] >= thisindent
:
181 tokentype
, class_name
, start
, end
, line
= g
.next()
182 if tokentype
!= NAME
:
183 continue # Syntax error
184 # parse what follows the class name
185 tokentype
, token
, start
, end
, line
= g
.next()
188 names
= [] # List of superclasses
189 # there's a list of superclasses
191 super = [] # Tokens making up current superclass
193 tokentype
, token
, start
, end
, line
= g
.next()
194 if token
in (')', ',') and level
== 1:
197 # we know this super class
202 # super class is of the form
203 # module.class: look in module for
219 elif token
== ',' and level
== 1:
224 cur_class
= Class(module
, class_name
, inherit
, file, lineno
)
226 dict[class_name
] = cur_class
227 stack
.append((cur_class
, thisindent
))
228 elif token
== 'import' and start
[1] == 0:
229 modules
= _getnamelist(g
)
230 for mod
, mod2
in modules
:
232 # Recursively read the imported module
234 _readmodule(mod
, path
)
237 _readmodule(mod
, path
, inpackage
)
241 # If we can't find or parse the imported module,
242 # too bad -- don't die here.
244 elif token
== 'from' and start
[1] == 0:
245 mod
, token
= _getname(g
)
246 if not mod
or token
!= "import":
248 names
= _getnamelist(g
)
250 # Recursively read the imported module
251 d
= _readmodule(mod
, path
, inpackage
)
253 # If we can't find or parse the imported module,
254 # too bad -- don't die here.
256 # add any classes that were defined in the imported module
257 # to our name space if they were mentioned in the list
262 # don't add names that start with _
266 except StopIteration:
273 # Helper to get a comma-separated list of dotted names plus 'as'
274 # clauses. Return a list of pairs (name, name2) where name2 is
275 # the 'as' name, or None if there is no 'as' clause.
278 name
, token
= _getname(g
)
282 name2
, token
= _getname(g
)
285 names
.append((name
, name2
))
286 while token
!= "," and "\n" not in token
:
287 tokentype
, token
, start
, end
, line
= g
.next()
293 # Helper to get a dotted name, return a pair (name, token) where
294 # name is the dotted name, or None if there was no dotted name,
295 # and token is the next input token.
297 tokentype
, token
, start
, end
, line
= g
.next()
298 if tokentype
!= NAME
and token
!= '*':
302 tokentype
, token
, start
, end
, line
= g
.next()
305 tokentype
, token
, start
, end
, line
= g
.next()
306 if tokentype
!= NAME
:
309 return (".".join(parts
), token
)
312 # Main program for testing.
315 if os
.path
.exists(mod
):
316 path
= [os
.path
.dirname(mod
)]
317 mod
= os
.path
.basename(mod
)
318 if mod
.lower().endswith(".py"):
322 dict = readmodule_ex(mod
, path
)
324 objs
.sort(lambda a
, b
: cmp(getattr(a
, 'lineno', 0),
325 getattr(b
, 'lineno', 0)))
327 if isinstance(obj
, Class
):
328 print "class", obj
.name
, obj
.super, obj
.lineno
329 methods
= obj
.methods
.items()
330 methods
.sort(lambda a
, b
: cmp(a
[1], b
[1]))
331 for name
, lineno
in methods
:
332 if name
!= "__path__":
333 print " def", name
, lineno
334 elif isinstance(obj
, Function
):
335 print "def", obj
.name
, obj
.lineno
337 if __name__
== "__main__":