Fix a bug in the ``compiler`` package that caused invalid code to be
[python/dscho.git] / Lib / compiler / symbols.py
blob8f6298060d825808bb66ef1dcd99485232f3616f
1 """Module symbol-table generator"""
3 from compiler import ast
4 from compiler.consts import SC_LOCAL, SC_GLOBAL, SC_FREE, SC_CELL, SC_UNKNOWN
5 from compiler.misc import mangle
6 import types
9 import sys
11 MANGLE_LEN = 256
13 class Scope:
14 # XXX how much information do I need about each name?
15 def __init__(self, name, module, klass=None):
16 self.name = name
17 self.module = module
18 self.defs = {}
19 self.uses = {}
20 self.globals = {}
21 self.params = {}
22 self.frees = {}
23 self.cells = {}
24 self.children = []
25 # nested is true if the class could contain free variables,
26 # i.e. if it is nested within another function.
27 self.nested = None
28 self.generator = None
29 self.klass = None
30 if klass is not None:
31 for i in range(len(klass)):
32 if klass[i] != '_':
33 self.klass = klass[i:]
34 break
36 def __repr__(self):
37 return "<%s: %s>" % (self.__class__.__name__, self.name)
39 def mangle(self, name):
40 if self.klass is None:
41 return name
42 return mangle(name, self.klass)
44 def add_def(self, name):
45 self.defs[self.mangle(name)] = 1
47 def add_use(self, name):
48 self.uses[self.mangle(name)] = 1
50 def add_global(self, name):
51 name = self.mangle(name)
52 if self.uses.has_key(name) or self.defs.has_key(name):
53 pass # XXX warn about global following def/use
54 if self.params.has_key(name):
55 raise SyntaxError, "%s in %s is global and parameter" % \
56 (name, self.name)
57 self.globals[name] = 1
58 self.module.add_def(name)
60 def add_param(self, name):
61 name = self.mangle(name)
62 self.defs[name] = 1
63 self.params[name] = 1
65 def get_names(self):
66 d = {}
67 d.update(self.defs)
68 d.update(self.uses)
69 d.update(self.globals)
70 return d.keys()
72 def add_child(self, child):
73 self.children.append(child)
75 def get_children(self):
76 return self.children
78 def DEBUG(self):
79 print >> sys.stderr, self.name, self.nested and "nested" or ""
80 print >> sys.stderr, "\tglobals: ", self.globals
81 print >> sys.stderr, "\tcells: ", self.cells
82 print >> sys.stderr, "\tdefs: ", self.defs
83 print >> sys.stderr, "\tuses: ", self.uses
84 print >> sys.stderr, "\tfrees:", self.frees
86 def check_name(self, name):
87 """Return scope of name.
89 The scope of a name could be LOCAL, GLOBAL, FREE, or CELL.
90 """
91 if self.globals.has_key(name):
92 return SC_GLOBAL
93 if self.cells.has_key(name):
94 return SC_CELL
95 if self.defs.has_key(name):
96 return SC_LOCAL
97 if self.nested and (self.frees.has_key(name) or
98 self.uses.has_key(name)):
99 return SC_FREE
100 if self.nested:
101 return SC_UNKNOWN
102 else:
103 return SC_GLOBAL
105 def get_free_vars(self):
106 if not self.nested:
107 return ()
108 free = {}
109 free.update(self.frees)
110 for name in self.uses.keys():
111 if not (self.defs.has_key(name) or
112 self.globals.has_key(name)):
113 free[name] = 1
114 return free.keys()
116 def handle_children(self):
117 for child in self.children:
118 frees = child.get_free_vars()
119 globals = self.add_frees(frees)
120 for name in globals:
121 child.force_global(name)
123 def force_global(self, name):
124 """Force name to be global in scope.
126 Some child of the current node had a free reference to name.
127 When the child was processed, it was labelled a free
128 variable. Now that all its enclosing scope have been
129 processed, the name is known to be a global or builtin. So
130 walk back down the child chain and set the name to be global
131 rather than free.
133 Be careful to stop if a child does not think the name is
134 free.
136 self.globals[name] = 1
137 if self.frees.has_key(name):
138 del self.frees[name]
139 for child in self.children:
140 if child.check_name(name) == SC_FREE:
141 child.force_global(name)
143 def add_frees(self, names):
144 """Process list of free vars from nested scope.
146 Returns a list of names that are either 1) declared global in the
147 parent or 2) undefined in a top-level parent. In either case,
148 the nested scope should treat them as globals.
150 child_globals = []
151 for name in names:
152 sc = self.check_name(name)
153 if self.nested:
154 if sc == SC_UNKNOWN or sc == SC_FREE \
155 or isinstance(self, ClassScope):
156 self.frees[name] = 1
157 elif sc == SC_GLOBAL:
158 child_globals.append(name)
159 elif isinstance(self, FunctionScope) and sc == SC_LOCAL:
160 self.cells[name] = 1
161 elif sc != SC_CELL:
162 child_globals.append(name)
163 else:
164 if sc == SC_LOCAL:
165 self.cells[name] = 1
166 elif sc != SC_CELL:
167 child_globals.append(name)
168 return child_globals
170 def get_cell_vars(self):
171 return self.cells.keys()
173 class ModuleScope(Scope):
174 __super_init = Scope.__init__
176 def __init__(self):
177 self.__super_init("global", self)
179 class FunctionScope(Scope):
180 pass
182 class GenExprScope(Scope):
183 __super_init = Scope.__init__
185 __counter = 1
187 def __init__(self, module, klass=None):
188 i = self.__counter
189 self.__counter += 1
190 self.__super_init("generator expression<%d>"%i, module, klass)
191 self.add_param('.0')
193 def get_names(self):
194 keys = Scope.get_names(self)
195 return keys
197 class LambdaScope(FunctionScope):
198 __super_init = Scope.__init__
200 __counter = 1
202 def __init__(self, module, klass=None):
203 i = self.__counter
204 self.__counter += 1
205 self.__super_init("lambda.%d" % i, module, klass)
207 class ClassScope(Scope):
208 __super_init = Scope.__init__
210 def __init__(self, name, module):
211 self.__super_init(name, module, name)
213 class SymbolVisitor:
214 def __init__(self):
215 self.scopes = {}
216 self.klass = None
218 # node that define new scopes
220 def visitModule(self, node):
221 scope = self.module = self.scopes[node] = ModuleScope()
222 self.visit(node.node, scope)
224 visitExpression = visitModule
226 def visitFunction(self, node, parent):
227 if node.decorators:
228 self.visit(node.decorators, parent)
229 parent.add_def(node.name)
230 for n in node.defaults:
231 self.visit(n, parent)
232 scope = FunctionScope(node.name, self.module, self.klass)
233 if parent.nested or isinstance(parent, FunctionScope):
234 scope.nested = 1
235 self.scopes[node] = scope
236 self._do_args(scope, node.argnames)
237 self.visit(node.code, scope)
238 self.handle_free_vars(scope, parent)
240 def visitGenExpr(self, node, parent):
241 scope = GenExprScope(self.module, self.klass);
242 if parent.nested or isinstance(parent, FunctionScope) \
243 or isinstance(parent, GenExprScope):
244 scope.nested = 1
246 self.scopes[node] = scope
247 self.visit(node.code, scope)
249 self.handle_free_vars(scope, parent)
251 def visitGenExprInner(self, node, scope):
252 for genfor in node.quals:
253 self.visit(genfor, scope)
255 self.visit(node.expr, scope)
257 def visitGenExprFor(self, node, scope):
258 self.visit(node.assign, scope, 1)
259 self.visit(node.iter, scope)
260 for if_ in node.ifs:
261 self.visit(if_, scope)
263 def visitGenExprIf(self, node, scope):
264 self.visit(node.test, scope)
266 def visitLambda(self, node, parent, assign=0):
267 # Lambda is an expression, so it could appear in an expression
268 # context where assign is passed. The transformer should catch
269 # any code that has a lambda on the left-hand side.
270 assert not assign
272 for n in node.defaults:
273 self.visit(n, parent)
274 scope = LambdaScope(self.module, self.klass)
275 if parent.nested or isinstance(parent, FunctionScope):
276 scope.nested = 1
277 self.scopes[node] = scope
278 self._do_args(scope, node.argnames)
279 self.visit(node.code, scope)
280 self.handle_free_vars(scope, parent)
282 def _do_args(self, scope, args):
283 for name in args:
284 if type(name) == types.TupleType:
285 self._do_args(scope, name)
286 else:
287 scope.add_param(name)
289 def handle_free_vars(self, scope, parent):
290 parent.add_child(scope)
291 scope.handle_children()
293 def visitClass(self, node, parent):
294 parent.add_def(node.name)
295 for n in node.bases:
296 self.visit(n, parent)
297 scope = ClassScope(node.name, self.module)
298 if parent.nested or isinstance(parent, FunctionScope):
299 scope.nested = 1
300 if node.doc is not None:
301 scope.add_def('__doc__')
302 scope.add_def('__module__')
303 self.scopes[node] = scope
304 prev = self.klass
305 self.klass = node.name
306 self.visit(node.code, scope)
307 self.klass = prev
308 self.handle_free_vars(scope, parent)
310 # name can be a def or a use
312 # XXX a few calls and nodes expect a third "assign" arg that is
313 # true if the name is being used as an assignment. only
314 # expressions contained within statements may have the assign arg.
316 def visitName(self, node, scope, assign=0):
317 if assign:
318 scope.add_def(node.name)
319 else:
320 scope.add_use(node.name)
322 # operations that bind new names
324 def visitFor(self, node, scope):
325 self.visit(node.assign, scope, 1)
326 self.visit(node.list, scope)
327 self.visit(node.body, scope)
328 if node.else_:
329 self.visit(node.else_, scope)
331 def visitFrom(self, node, scope):
332 for name, asname in node.names:
333 if name == "*":
334 continue
335 scope.add_def(asname or name)
337 def visitImport(self, node, scope):
338 for name, asname in node.names:
339 i = name.find(".")
340 if i > -1:
341 name = name[:i]
342 scope.add_def(asname or name)
344 def visitGlobal(self, node, scope):
345 for name in node.names:
346 scope.add_global(name)
348 def visitAssign(self, node, scope):
349 """Propagate assignment flag down to child nodes.
351 The Assign node doesn't itself contains the variables being
352 assigned to. Instead, the children in node.nodes are visited
353 with the assign flag set to true. When the names occur in
354 those nodes, they are marked as defs.
356 Some names that occur in an assignment target are not bound by
357 the assignment, e.g. a name occurring inside a slice. The
358 visitor handles these nodes specially; they do not propagate
359 the assign flag to their children.
361 for n in node.nodes:
362 self.visit(n, scope, 1)
363 self.visit(node.expr, scope)
365 def visitAssName(self, node, scope, assign=1):
366 scope.add_def(node.name)
368 def visitAssAttr(self, node, scope, assign=0):
369 self.visit(node.expr, scope, 0)
371 def visitSubscript(self, node, scope, assign=0):
372 self.visit(node.expr, scope, 0)
373 for n in node.subs:
374 self.visit(n, scope, 0)
376 def visitSlice(self, node, scope, assign=0):
377 self.visit(node.expr, scope, 0)
378 if node.lower:
379 self.visit(node.lower, scope, 0)
380 if node.upper:
381 self.visit(node.upper, scope, 0)
383 def visitAugAssign(self, node, scope):
384 # If the LHS is a name, then this counts as assignment.
385 # Otherwise, it's just use.
386 self.visit(node.node, scope)
387 if isinstance(node.node, ast.Name):
388 self.visit(node.node, scope, 1) # XXX worry about this
389 self.visit(node.expr, scope)
391 # prune if statements if tests are false
393 _const_types = types.StringType, types.IntType, types.FloatType
395 def visitIf(self, node, scope):
396 for test, body in node.tests:
397 if isinstance(test, ast.Const):
398 if type(test.value) in self._const_types:
399 if not test.value:
400 continue
401 self.visit(test, scope)
402 self.visit(body, scope)
403 if node.else_:
404 self.visit(node.else_, scope)
406 # a yield statement signals a generator
408 def visitYield(self, node, scope):
409 scope.generator = 1
410 self.visit(node.value, scope)
412 def list_eq(l1, l2):
413 return sorted(l1) == sorted(l2)
415 if __name__ == "__main__":
416 import sys
417 from compiler import parseFile, walk
418 import symtable
420 def get_names(syms):
421 return [s for s in [s.get_name() for s in syms.get_symbols()]
422 if not (s.startswith('_[') or s.startswith('.'))]
424 for file in sys.argv[1:]:
425 print file
426 f = open(file)
427 buf = f.read()
428 f.close()
429 syms = symtable.symtable(buf, file, "exec")
430 mod_names = get_names(syms)
431 tree = parseFile(file)
432 s = SymbolVisitor()
433 walk(tree, s)
435 # compare module-level symbols
436 names2 = s.scopes[tree].get_names()
438 if not list_eq(mod_names, names2):
439 print
440 print "oops", file
441 print sorted(mod_names)
442 print sorted(names2)
443 sys.exit(-1)
445 d = {}
446 d.update(s.scopes)
447 del d[tree]
448 scopes = d.values()
449 del d
451 for s in syms.get_symbols():
452 if s.is_namespace():
453 l = [sc for sc in scopes
454 if sc.name == s.get_name()]
455 if len(l) > 1:
456 print "skipping", s.get_name()
457 else:
458 if not list_eq(get_names(s.get_namespace()),
459 l[0].get_names()):
460 print s.get_name()
461 print sorted(get_names(s.get_namespace()))
462 print sorted(l[0].get_names())
463 sys.exit(-1)