Merged release21-maint changes.
[python/dscho.git] / Lib / compiler / symbols.py
blob6d834e0c1d1ef75a00f18c8cdecda65041c5edd1
1 """Module symbol-table generator"""
3 from compiler import ast
4 from compiler.consts import SC_LOCAL, SC_GLOBAL, SC_FREE, SC_CELL, SC_UNKNOWN
5 import types
7 import sys
9 MANGLE_LEN = 256
11 class Scope:
12 # XXX how much information do I need about each name?
13 def __init__(self, name, module, klass=None):
14 self.name = name
15 self.module = module
16 self.defs = {}
17 self.uses = {}
18 self.globals = {}
19 self.params = {}
20 self.frees = {}
21 self.cells = {}
22 self.children = []
23 # nested is true if the class could contain free variables,
24 # i.e. if it is nested within another function.
25 self.nested = None
26 self.klass = None
27 if klass is not None:
28 for i in range(len(klass)):
29 if klass[i] != '_':
30 self.klass = klass[i:]
31 break
33 def __repr__(self):
34 return "<%s: %s>" % (self.__class__.__name__, self.name)
36 def mangle(self, name):
37 if self.klass is None:
38 return name
39 if not name.startswith('__'):
40 return name
41 if len(name) + 2 >= MANGLE_LEN:
42 return name
43 if name.endswith('__'):
44 return name
45 return "_%s%s" % (self.klass, name)
47 def add_def(self, name):
48 self.defs[self.mangle(name)] = 1
50 def add_use(self, name):
51 self.uses[self.mangle(name)] = 1
53 def add_global(self, name):
54 name = self.mangle(name)
55 if self.uses.has_key(name) or self.defs.has_key(name):
56 pass # XXX warn about global following def/use
57 if self.params.has_key(name):
58 raise SyntaxError, "%s in %s is global and parameter" % \
59 (name, self.name)
60 self.globals[name] = 1
61 self.module.add_def(name)
63 def add_param(self, name):
64 name = self.mangle(name)
65 self.defs[name] = 1
66 self.params[name] = 1
68 def get_names(self):
69 d = {}
70 d.update(self.defs)
71 d.update(self.uses)
72 d.update(self.globals)
73 return d.keys()
75 def add_child(self, child):
76 self.children.append(child)
78 def get_children(self):
79 return self.children
81 def DEBUG(self):
82 return
83 print >> sys.stderr, self.name, self.nested and "nested" or ""
84 print >> sys.stderr, "\tglobals: ", self.globals
85 print >> sys.stderr, "\tcells: ", self.cells
86 print >> sys.stderr, "\tdefs: ", self.defs
87 print >> sys.stderr, "\tuses: ", self.uses
88 print >> sys.stderr, "\tfrees:", self.frees
90 def check_name(self, name):
91 """Return scope of name.
93 The scope of a name could be LOCAL, GLOBAL, FREE, or CELL.
94 """
95 if self.globals.has_key(name):
96 return SC_GLOBAL
97 if self.cells.has_key(name):
98 return SC_CELL
99 if self.defs.has_key(name):
100 return SC_LOCAL
101 if self.nested and (self.frees.has_key(name) or
102 self.uses.has_key(name)):
103 return SC_FREE
104 if self.nested:
105 return SC_UNKNOWN
106 else:
107 return SC_GLOBAL
109 def get_free_vars(self):
110 if not self.nested:
111 return ()
112 free = {}
113 free.update(self.frees)
114 for name in self.uses.keys():
115 if not (self.defs.has_key(name) or
116 self.globals.has_key(name)):
117 free[name] = 1
118 return free.keys()
120 def handle_children(self):
121 for child in self.children:
122 frees = child.get_free_vars()
123 globals = self.add_frees(frees)
124 for name in globals:
125 child.force_global(name)
127 def force_global(self, name):
128 """Force name to be global in scope.
130 Some child of the current node had a free reference to name.
131 When the child was processed, it was labelled a free
132 variable. Now that all its enclosing scope have been
133 processed, the name is known to be a global or builtin. So
134 walk back down the child chain and set the name to be global
135 rather than free.
137 Be careful to stop if a child does not think the name is
138 free.
140 self.globals[name] = 1
141 if self.frees.has_key(name):
142 del self.frees[name]
143 for child in self.children:
144 if child.check_name(name) == SC_FREE:
145 child.force_global(name)
147 def add_frees(self, names):
148 """Process list of free vars from nested scope.
150 Returns a list of names that are either 1) declared global in the
151 parent or 2) undefined in a top-level parent. In either case,
152 the nested scope should treat them as globals.
154 child_globals = []
155 for name in names:
156 sc = self.check_name(name)
157 if self.nested:
158 if sc == SC_UNKNOWN or sc == SC_FREE \
159 or isinstance(self, ClassScope):
160 self.frees[name] = 1
161 elif sc == SC_GLOBAL:
162 child_globals.append(name)
163 elif isinstance(self, FunctionScope) and sc == SC_LOCAL:
164 self.cells[name] = 1
165 else:
166 child_globals.append(name)
167 else:
168 if sc == SC_LOCAL:
169 self.cells[name] = 1
170 else:
171 child_globals.append(name)
172 return child_globals
174 def get_cell_vars(self):
175 return self.cells.keys()
177 class ModuleScope(Scope):
178 __super_init = Scope.__init__
180 def __init__(self):
181 self.__super_init("global", self)
183 class FunctionScope(Scope):
184 pass
186 class LambdaScope(FunctionScope):
187 __super_init = Scope.__init__
189 __counter = 1
191 def __init__(self, module, klass=None):
192 i = self.__counter
193 self.__counter += 1
194 self.__super_init("lambda.%d" % i, module, klass)
196 class ClassScope(Scope):
197 __super_init = Scope.__init__
199 def __init__(self, name, module):
200 self.__super_init(name, module, name)
202 class SymbolVisitor:
203 def __init__(self):
204 self.scopes = {}
205 self.klass = None
207 # node that define new scopes
209 def visitModule(self, node):
210 scope = self.module = self.scopes[node] = ModuleScope()
211 self.visit(node.node, scope)
213 def visitFunction(self, node, parent):
214 parent.add_def(node.name)
215 for n in node.defaults:
216 self.visit(n, parent)
217 scope = FunctionScope(node.name, self.module, self.klass)
218 if parent.nested or isinstance(parent, FunctionScope):
219 scope.nested = 1
220 self.scopes[node] = scope
221 self._do_args(scope, node.argnames)
222 self.visit(node.code, scope)
223 self.handle_free_vars(scope, parent)
224 scope.DEBUG()
226 def visitLambda(self, node, parent):
227 for n in node.defaults:
228 self.visit(n, parent)
229 scope = LambdaScope(self.module, self.klass)
230 if parent.nested or isinstance(parent, FunctionScope):
231 scope.nested = 1
232 self.scopes[node] = scope
233 self._do_args(scope, node.argnames)
234 self.visit(node.code, scope)
235 self.handle_free_vars(scope, parent)
237 def _do_args(self, scope, args):
238 for name in args:
239 if type(name) == types.TupleType:
240 self._do_args(scope, name)
241 else:
242 scope.add_param(name)
244 def handle_free_vars(self, scope, parent):
245 parent.add_child(scope)
246 if scope.children:
247 scope.DEBUG()
248 scope.handle_children()
250 def visitClass(self, node, parent):
251 parent.add_def(node.name)
252 for n in node.bases:
253 self.visit(n, parent)
254 scope = ClassScope(node.name, self.module)
255 if parent.nested or isinstance(parent, FunctionScope):
256 scope.nested = 1
257 self.scopes[node] = scope
258 prev = self.klass
259 self.klass = node.name
260 self.visit(node.code, scope)
261 self.klass = prev
262 self.handle_free_vars(scope, parent)
264 # name can be a def or a use
266 # XXX a few calls and nodes expect a third "assign" arg that is
267 # true if the name is being used as an assignment. only
268 # expressions contained within statements may have the assign arg.
270 def visitName(self, node, scope, assign=0):
271 if assign:
272 scope.add_def(node.name)
273 else:
274 scope.add_use(node.name)
276 # operations that bind new names
278 def visitFor(self, node, scope):
279 self.visit(node.assign, scope, 1)
280 self.visit(node.list, scope)
281 self.visit(node.body, scope)
282 if node.else_:
283 self.visit(node.else_, scope)
285 def visitFrom(self, node, scope):
286 for name, asname in node.names:
287 if name == "*":
288 continue
289 scope.add_def(asname or name)
291 def visitImport(self, node, scope):
292 for name, asname in node.names:
293 i = name.find(".")
294 if i > -1:
295 name = name[:i]
296 scope.add_def(asname or name)
298 def visitAssName(self, node, scope, assign=1):
299 scope.add_def(node.name)
301 def visitAugAssign(self, node, scope):
302 # If the LHS is a name, then this counts as assignment.
303 # Otherwise, it's just use.
304 self.visit(node.node, scope)
305 if isinstance(node.node, ast.Name):
306 self.visit(node.node, scope, 1) # XXX worry about this
307 self.visit(node.expr, scope)
309 def visitAssign(self, node, scope):
310 for n in node.nodes:
311 self.visit(n, scope, 1)
312 self.visit(node.expr, scope)
314 def visitGlobal(self, node, scope):
315 for name in node.names:
316 scope.add_global(name)
318 # prune if statements if tests are false
320 _const_types = types.StringType, types.IntType, types.FloatType
322 def visitIf(self, node, scope):
323 for test, body in node.tests:
324 if isinstance(test, ast.Const):
325 if type(test.value) in self._const_types:
326 if not test.value:
327 continue
328 self.visit(test, scope)
329 self.visit(body, scope)
330 if node.else_:
331 self.visit(node.else_, scope)
333 def sort(l):
334 l = l[:]
335 l.sort()
336 return l
338 def list_eq(l1, l2):
339 return sort(l1) == sort(l2)
341 if __name__ == "__main__":
342 import sys
343 from compiler import parseFile, walk
344 import symtable
346 def get_names(syms):
347 return [s for s in [s.get_name() for s in syms.get_symbols()]
348 if not (s.startswith('_[') or s.startswith('.'))]
350 for file in sys.argv[1:]:
351 print file
352 f = open(file)
353 buf = f.read()
354 f.close()
355 syms = symtable.symtable(buf, file, "exec")
356 mod_names = get_names(syms)
357 tree = parseFile(file)
358 s = SymbolVisitor()
359 walk(tree, s)
361 # compare module-level symbols
362 names2 = s.scopes[tree].get_names()
364 if not list_eq(mod_names, names2):
365 print
366 print "oops", file
367 print sort(mod_names)
368 print sort(names2)
369 sys.exit(-1)
371 d = {}
372 d.update(s.scopes)
373 del d[tree]
374 scopes = d.values()
375 del d
377 for s in syms.get_symbols():
378 if s.is_namespace():
379 l = [sc for sc in scopes
380 if sc.name == s.get_name()]
381 if len(l) > 1:
382 print "skipping", s.get_name()
383 else:
384 if not list_eq(get_names(s.get_namespace()),
385 l[0].get_names()):
386 print s.get_name()
387 print sort(get_names(s.get_namespace()))
388 print sort(l[0].get_names())
389 sys.exit(-1)