Update for release.
[python/dscho.git] / Lib / compiler / symbols.py
blob9f47fa3f264da9f63a1544e94d13fa86afe6843c
1 """Module symbol-table generator"""
3 from compiler import ast
4 from compiler.consts import SC_LOCAL, SC_GLOBAL, SC_FREE, SC_CELL, SC_UNKNOWN
5 from compiler.misc import mangle
6 import types
9 import sys
11 MANGLE_LEN = 256
13 class Scope:
14 # XXX how much information do I need about each name?
15 def __init__(self, name, module, klass=None):
16 self.name = name
17 self.module = module
18 self.defs = {}
19 self.uses = {}
20 self.globals = {}
21 self.params = {}
22 self.frees = {}
23 self.cells = {}
24 self.children = []
25 # nested is true if the class could contain free variables,
26 # i.e. if it is nested within another function.
27 self.nested = None
28 self.generator = None
29 self.klass = None
30 if klass is not None:
31 for i in range(len(klass)):
32 if klass[i] != '_':
33 self.klass = klass[i:]
34 break
36 def __repr__(self):
37 return "<%s: %s>" % (self.__class__.__name__, self.name)
39 def mangle(self, name):
40 if self.klass is None:
41 return name
42 return mangle(name, self.klass)
44 def add_def(self, name):
45 self.defs[self.mangle(name)] = 1
47 def add_use(self, name):
48 self.uses[self.mangle(name)] = 1
50 def add_global(self, name):
51 name = self.mangle(name)
52 if self.uses.has_key(name) or self.defs.has_key(name):
53 pass # XXX warn about global following def/use
54 if self.params.has_key(name):
55 raise SyntaxError, "%s in %s is global and parameter" % \
56 (name, self.name)
57 self.globals[name] = 1
58 self.module.add_def(name)
60 def add_param(self, name):
61 name = self.mangle(name)
62 self.defs[name] = 1
63 self.params[name] = 1
65 def get_names(self):
66 d = {}
67 d.update(self.defs)
68 d.update(self.uses)
69 d.update(self.globals)
70 return d.keys()
72 def add_child(self, child):
73 self.children.append(child)
75 def get_children(self):
76 return self.children
78 def DEBUG(self):
79 print >> sys.stderr, self.name, self.nested and "nested" or ""
80 print >> sys.stderr, "\tglobals: ", self.globals
81 print >> sys.stderr, "\tcells: ", self.cells
82 print >> sys.stderr, "\tdefs: ", self.defs
83 print >> sys.stderr, "\tuses: ", self.uses
84 print >> sys.stderr, "\tfrees:", self.frees
86 def check_name(self, name):
87 """Return scope of name.
89 The scope of a name could be LOCAL, GLOBAL, FREE, or CELL.
90 """
91 if self.globals.has_key(name):
92 return SC_GLOBAL
93 if self.cells.has_key(name):
94 return SC_CELL
95 if self.defs.has_key(name):
96 return SC_LOCAL
97 if self.nested and (self.frees.has_key(name) or
98 self.uses.has_key(name)):
99 return SC_FREE
100 if self.nested:
101 return SC_UNKNOWN
102 else:
103 return SC_GLOBAL
105 def get_free_vars(self):
106 if not self.nested:
107 return ()
108 free = {}
109 free.update(self.frees)
110 for name in self.uses.keys():
111 if not (self.defs.has_key(name) or
112 self.globals.has_key(name)):
113 free[name] = 1
114 return free.keys()
116 def handle_children(self):
117 for child in self.children:
118 frees = child.get_free_vars()
119 globals = self.add_frees(frees)
120 for name in globals:
121 child.force_global(name)
123 def force_global(self, name):
124 """Force name to be global in scope.
126 Some child of the current node had a free reference to name.
127 When the child was processed, it was labelled a free
128 variable. Now that all its enclosing scope have been
129 processed, the name is known to be a global or builtin. So
130 walk back down the child chain and set the name to be global
131 rather than free.
133 Be careful to stop if a child does not think the name is
134 free.
136 self.globals[name] = 1
137 if self.frees.has_key(name):
138 del self.frees[name]
139 for child in self.children:
140 if child.check_name(name) == SC_FREE:
141 child.force_global(name)
143 def add_frees(self, names):
144 """Process list of free vars from nested scope.
146 Returns a list of names that are either 1) declared global in the
147 parent or 2) undefined in a top-level parent. In either case,
148 the nested scope should treat them as globals.
150 child_globals = []
151 for name in names:
152 sc = self.check_name(name)
153 if self.nested:
154 if sc == SC_UNKNOWN or sc == SC_FREE \
155 or isinstance(self, ClassScope):
156 self.frees[name] = 1
157 elif sc == SC_GLOBAL:
158 child_globals.append(name)
159 elif isinstance(self, FunctionScope) and sc == SC_LOCAL:
160 self.cells[name] = 1
161 elif sc != SC_CELL:
162 child_globals.append(name)
163 else:
164 if sc == SC_LOCAL:
165 self.cells[name] = 1
166 elif sc != SC_CELL:
167 child_globals.append(name)
168 return child_globals
170 def get_cell_vars(self):
171 return self.cells.keys()
173 class ModuleScope(Scope):
174 __super_init = Scope.__init__
176 def __init__(self):
177 self.__super_init("global", self)
179 class FunctionScope(Scope):
180 pass
182 class LambdaScope(FunctionScope):
183 __super_init = Scope.__init__
185 __counter = 1
187 def __init__(self, module, klass=None):
188 i = self.__counter
189 self.__counter += 1
190 self.__super_init("lambda.%d" % i, module, klass)
192 class ClassScope(Scope):
193 __super_init = Scope.__init__
195 def __init__(self, name, module):
196 self.__super_init(name, module, name)
198 class SymbolVisitor:
199 def __init__(self):
200 self.scopes = {}
201 self.klass = None
203 # node that define new scopes
205 def visitModule(self, node):
206 scope = self.module = self.scopes[node] = ModuleScope()
207 self.visit(node.node, scope)
209 visitExpression = visitModule
211 def visitFunction(self, node, parent):
212 parent.add_def(node.name)
213 for n in node.defaults:
214 self.visit(n, parent)
215 scope = FunctionScope(node.name, self.module, self.klass)
216 if parent.nested or isinstance(parent, FunctionScope):
217 scope.nested = 1
218 self.scopes[node] = scope
219 self._do_args(scope, node.argnames)
220 self.visit(node.code, scope)
221 self.handle_free_vars(scope, parent)
223 def visitLambda(self, node, parent):
224 for n in node.defaults:
225 self.visit(n, parent)
226 scope = LambdaScope(self.module, self.klass)
227 if parent.nested or isinstance(parent, FunctionScope):
228 scope.nested = 1
229 self.scopes[node] = scope
230 self._do_args(scope, node.argnames)
231 self.visit(node.code, scope)
232 self.handle_free_vars(scope, parent)
234 def _do_args(self, scope, args):
235 for name in args:
236 if type(name) == types.TupleType:
237 self._do_args(scope, name)
238 else:
239 scope.add_param(name)
241 def handle_free_vars(self, scope, parent):
242 parent.add_child(scope)
243 scope.handle_children()
245 def visitClass(self, node, parent):
246 parent.add_def(node.name)
247 for n in node.bases:
248 self.visit(n, parent)
249 scope = ClassScope(node.name, self.module)
250 if parent.nested or isinstance(parent, FunctionScope):
251 scope.nested = 1
252 if node.doc is not None:
253 scope.add_def('__doc__')
254 scope.add_def('__module__')
255 self.scopes[node] = scope
256 prev = self.klass
257 self.klass = node.name
258 self.visit(node.code, scope)
259 self.klass = prev
260 self.handle_free_vars(scope, parent)
262 # name can be a def or a use
264 # XXX a few calls and nodes expect a third "assign" arg that is
265 # true if the name is being used as an assignment. only
266 # expressions contained within statements may have the assign arg.
268 def visitName(self, node, scope, assign=0):
269 if assign:
270 scope.add_def(node.name)
271 else:
272 scope.add_use(node.name)
274 # operations that bind new names
276 def visitFor(self, node, scope):
277 self.visit(node.assign, scope, 1)
278 self.visit(node.list, scope)
279 self.visit(node.body, scope)
280 if node.else_:
281 self.visit(node.else_, scope)
283 def visitFrom(self, node, scope):
284 for name, asname in node.names:
285 if name == "*":
286 continue
287 scope.add_def(asname or name)
289 def visitImport(self, node, scope):
290 for name, asname in node.names:
291 i = name.find(".")
292 if i > -1:
293 name = name[:i]
294 scope.add_def(asname or name)
296 def visitGlobal(self, node, scope):
297 for name in node.names:
298 scope.add_global(name)
300 def visitAssign(self, node, scope):
301 """Propagate assignment flag down to child nodes.
303 The Assign node doesn't itself contains the variables being
304 assigned to. Instead, the children in node.nodes are visited
305 with the assign flag set to true. When the names occur in
306 those nodes, they are marked as defs.
308 Some names that occur in an assignment target are not bound by
309 the assignment, e.g. a name occurring inside a slice. The
310 visitor handles these nodes specially; they do not propagate
311 the assign flag to their children.
313 for n in node.nodes:
314 self.visit(n, scope, 1)
315 self.visit(node.expr, scope)
317 def visitAssName(self, node, scope, assign=1):
318 scope.add_def(node.name)
320 def visitAssAttr(self, node, scope, assign=0):
321 self.visit(node.expr, scope, 0)
323 def visitSubscript(self, node, scope, assign=0):
324 self.visit(node.expr, scope, 0)
325 for n in node.subs:
326 self.visit(n, scope, 0)
328 def visitSlice(self, node, scope, assign=0):
329 self.visit(node.expr, scope, 0)
330 if node.lower:
331 self.visit(node.lower, scope, 0)
332 if node.upper:
333 self.visit(node.upper, scope, 0)
335 def visitAugAssign(self, node, scope):
336 # If the LHS is a name, then this counts as assignment.
337 # Otherwise, it's just use.
338 self.visit(node.node, scope)
339 if isinstance(node.node, ast.Name):
340 self.visit(node.node, scope, 1) # XXX worry about this
341 self.visit(node.expr, scope)
343 # prune if statements if tests are false
345 _const_types = types.StringType, types.IntType, types.FloatType
347 def visitIf(self, node, scope):
348 for test, body in node.tests:
349 if isinstance(test, ast.Const):
350 if type(test.value) in self._const_types:
351 if not test.value:
352 continue
353 self.visit(test, scope)
354 self.visit(body, scope)
355 if node.else_:
356 self.visit(node.else_, scope)
358 # a yield statement signals a generator
360 def visitYield(self, node, scope):
361 scope.generator = 1
362 self.visit(node.value, scope)
364 def sort(l):
365 l = l[:]
366 l.sort()
367 return l
369 def list_eq(l1, l2):
370 return sort(l1) == sort(l2)
372 if __name__ == "__main__":
373 import sys
374 from compiler import parseFile, walk
375 import symtable
377 def get_names(syms):
378 return [s for s in [s.get_name() for s in syms.get_symbols()]
379 if not (s.startswith('_[') or s.startswith('.'))]
381 for file in sys.argv[1:]:
382 print file
383 f = open(file)
384 buf = f.read()
385 f.close()
386 syms = symtable.symtable(buf, file, "exec")
387 mod_names = get_names(syms)
388 tree = parseFile(file)
389 s = SymbolVisitor()
390 walk(tree, s)
392 # compare module-level symbols
393 names2 = s.scopes[tree].get_names()
395 if not list_eq(mod_names, names2):
396 print
397 print "oops", file
398 print sort(mod_names)
399 print sort(names2)
400 sys.exit(-1)
402 d = {}
403 d.update(s.scopes)
404 del d[tree]
405 scopes = d.values()
406 del d
408 for s in syms.get_symbols():
409 if s.is_namespace():
410 l = [sc for sc in scopes
411 if sc.name == s.get_name()]
412 if len(l) > 1:
413 print "skipping", s.get_name()
414 else:
415 if not list_eq(get_names(s.get_namespace()),
416 l[0].get_names()):
417 print s.get_name()
418 print sort(get_names(s.get_namespace()))
419 print sort(l[0].get_names())
420 sys.exit(-1)