Fix the tag.
[python/dscho.git] / Lib / imputil.py
blob4278e31d78de9275bc52b602115a7a0c2e8cdde1
1 """
2 Import utilities
4 Exported classes:
5 ImportManager Manage the import process
7 Importer Base class for replacing standard import functions
8 BuiltinImporter Emulate the import mechanism for builtin and frozen modules
10 DynLoadSuffixImporter
11 """
13 # note: avoid importing non-builtin modules
14 import imp ### not available in JPython?
15 import sys
16 import builtins
18 # for the DirectoryImporter
19 import struct
20 import marshal
22 __all__ = ["ImportManager","Importer","BuiltinImporter"]
24 _ModuleType = type(sys) ### doesn't work in JPython...
26 class ImportManager:
27 "Manage the import process."
29 def install(self, namespace=vars(builtins)):
30 "Install this ImportManager into the specified namespace."
32 if isinstance(namespace, _ModuleType):
33 namespace = vars(namespace)
35 # Note: we have no notion of "chaining"
37 # Record the previous import hook, then install our own.
38 self.previous_importer = namespace['__import__']
39 self.namespace = namespace
40 namespace['__import__'] = self._import_hook
42 def uninstall(self):
43 "Restore the previous import mechanism."
44 self.namespace['__import__'] = self.previous_importer
46 def add_suffix(self, suffix, importFunc):
47 assert hasattr(importFunc, '__call__')
48 self.fs_imp.add_suffix(suffix, importFunc)
50 ######################################################################
52 # PRIVATE METHODS
55 clsFilesystemImporter = None
57 def __init__(self, fs_imp=None):
58 # we're definitely going to be importing something in the future,
59 # so let's just load the OS-related facilities.
60 if not _os_stat:
61 _os_bootstrap()
63 # This is the Importer that we use for grabbing stuff from the
64 # filesystem. It defines one more method (import_from_dir) for our use.
65 if fs_imp is None:
66 cls = self.clsFilesystemImporter or _FilesystemImporter
67 fs_imp = cls()
68 self.fs_imp = fs_imp
70 # Initialize the set of suffixes that we recognize and import.
71 # The default will import dynamic-load modules first, followed by
72 # .py files (or a .py file's cached bytecode)
73 for desc in imp.get_suffixes():
74 if desc[2] == imp.C_EXTENSION:
75 self.add_suffix(desc[0],
76 DynLoadSuffixImporter(desc).import_file)
77 self.add_suffix('.py', py_suffix_importer)
79 def _import_hook(self, fqname, globals=None, locals=None, fromlist=None):
80 """Python calls this hook to locate and import a module."""
82 parts = fqname.split('.')
84 # determine the context of this import
85 parent = self._determine_import_context(globals)
87 # if there is a parent, then its importer should manage this import
88 if parent:
89 module = parent.__importer__._do_import(parent, parts, fromlist)
90 if module:
91 return module
93 # has the top module already been imported?
94 try:
95 top_module = sys.modules[parts[0]]
96 except KeyError:
98 # look for the topmost module
99 top_module = self._import_top_module(parts[0])
100 if not top_module:
101 # the topmost module wasn't found at all.
102 raise ImportError('No module named ' + fqname)
104 # fast-path simple imports
105 if len(parts) == 1:
106 if not fromlist:
107 return top_module
109 if not top_module.__dict__.get('__ispkg__'):
110 # __ispkg__ isn't defined (the module was not imported by us),
111 # or it is zero.
113 # In the former case, there is no way that we could import
114 # sub-modules that occur in the fromlist (but we can't raise an
115 # error because it may just be names) because we don't know how
116 # to deal with packages that were imported by other systems.
118 # In the latter case (__ispkg__ == 0), there can't be any sub-
119 # modules present, so we can just return.
121 # In both cases, since len(parts) == 1, the top_module is also
122 # the "bottom" which is the defined return when a fromlist
123 # exists.
124 return top_module
126 importer = top_module.__dict__.get('__importer__')
127 if importer:
128 return importer._finish_import(top_module, parts[1:], fromlist)
130 # Grrr, some people "import os.path" or do "from os.path import ..."
131 if len(parts) == 2 and hasattr(top_module, parts[1]):
132 if fromlist:
133 return getattr(top_module, parts[1])
134 else:
135 return top_module
137 # If the importer does not exist, then we have to bail. A missing
138 # importer means that something else imported the module, and we have
139 # no knowledge of how to get sub-modules out of the thing.
140 raise ImportError('No module named ' + fqname)
142 def _determine_import_context(self, globals):
143 """Returns the context in which a module should be imported.
145 The context could be a loaded (package) module and the imported module
146 will be looked for within that package. The context could also be None,
147 meaning there is no context -- the module should be looked for as a
148 "top-level" module.
151 if not globals or not globals.get('__importer__'):
152 # globals does not refer to one of our modules or packages. That
153 # implies there is no relative import context (as far as we are
154 # concerned), and it should just pick it off the standard path.
155 return None
157 # The globals refer to a module or package of ours. It will define
158 # the context of the new import. Get the module/package fqname.
159 parent_fqname = globals['__name__']
161 # if a package is performing the import, then return itself (imports
162 # refer to pkg contents)
163 if globals['__ispkg__']:
164 parent = sys.modules[parent_fqname]
165 assert globals is parent.__dict__
166 return parent
168 i = parent_fqname.rfind('.')
170 # a module outside of a package has no particular import context
171 if i == -1:
172 return None
174 # if a module in a package is performing the import, then return the
175 # package (imports refer to siblings)
176 parent_fqname = parent_fqname[:i]
177 parent = sys.modules[parent_fqname]
178 assert parent.__name__ == parent_fqname
179 return parent
181 def _import_top_module(self, name):
182 # scan sys.path looking for a location in the filesystem that contains
183 # the module, or an Importer object that can import the module.
184 for item in sys.path:
185 if isinstance(item, str):
186 module = self.fs_imp.import_from_dir(item, name)
187 else:
188 module = item.import_top(name)
189 if module:
190 return module
191 return None
194 class Importer:
195 "Base class for replacing standard import functions."
197 def import_top(self, name):
198 "Import a top-level module."
199 return self._import_one(None, name, name)
201 ######################################################################
203 # PRIVATE METHODS
205 def _finish_import(self, top, parts, fromlist):
206 # if "a.b.c" was provided, then load the ".b.c" portion down from
207 # below the top-level module.
208 bottom = self._load_tail(top, parts)
210 # if the form is "import a.b.c", then return "a"
211 if not fromlist:
212 # no fromlist: return the top of the import tree
213 return top
215 # the top module was imported by self.
217 # this means that the bottom module was also imported by self (just
218 # now, or in the past and we fetched it from sys.modules).
220 # since we imported/handled the bottom module, this means that we can
221 # also handle its fromlist (and reliably use __ispkg__).
223 # if the bottom node is a package, then (potentially) import some
224 # modules.
226 # note: if it is not a package, then "fromlist" refers to names in
227 # the bottom module rather than modules.
228 # note: for a mix of names and modules in the fromlist, we will
229 # import all modules and insert those into the namespace of
230 # the package module. Python will pick up all fromlist names
231 # from the bottom (package) module; some will be modules that
232 # we imported and stored in the namespace, others are expected
233 # to be present already.
234 if bottom.__ispkg__:
235 self._import_fromlist(bottom, fromlist)
237 # if the form is "from a.b import c, d" then return "b"
238 return bottom
240 def _import_one(self, parent, modname, fqname):
241 "Import a single module."
243 # has the module already been imported?
244 try:
245 return sys.modules[fqname]
246 except KeyError:
247 pass
249 # load the module's code, or fetch the module itself
250 result = self.get_code(parent, modname, fqname)
251 if result is None:
252 return None
254 module = self._process_result(result, fqname)
256 # insert the module into its parent
257 if parent:
258 setattr(parent, modname, module)
259 return module
261 def _process_result(self, result, fqname):
262 # unpack result
263 ispkg, code, values = result
265 # did get_code() return an actual module? (rather than a code object)
266 is_module = isinstance(code, _ModuleType)
268 # use the returned module, or create a new one to exec code into
269 if is_module:
270 module = code
271 else:
272 module = imp.new_module(fqname)
274 ### record packages a bit differently??
275 module.__importer__ = self
276 module.__ispkg__ = ispkg
278 # insert additional values into the module (before executing the code)
279 module.__dict__.update(values)
281 # the module is almost ready... make it visible
282 sys.modules[fqname] = module
284 # execute the code within the module's namespace
285 if not is_module:
286 try:
287 exec(code, module.__dict__)
288 except:
289 if fqname in sys.modules:
290 del sys.modules[fqname]
291 raise
293 # fetch from sys.modules instead of returning module directly.
294 # also make module's __name__ agree with fqname, in case
295 # the "exec code in module.__dict__" played games on us.
296 module = sys.modules[fqname]
297 module.__name__ = fqname
298 return module
300 def _load_tail(self, m, parts):
301 """Import the rest of the modules, down from the top-level module.
303 Returns the last module in the dotted list of modules.
305 for part in parts:
306 fqname = "%s.%s" % (m.__name__, part)
307 m = self._import_one(m, part, fqname)
308 if not m:
309 raise ImportError("No module named " + fqname)
310 return m
312 def _import_fromlist(self, package, fromlist):
313 'Import any sub-modules in the "from" list.'
315 # if '*' is present in the fromlist, then look for the '__all__'
316 # variable to find additional items (modules) to import.
317 if '*' in fromlist:
318 fromlist = list(fromlist) + \
319 list(package.__dict__.get('__all__', []))
321 for sub in fromlist:
322 # if the name is already present, then don't try to import it (it
323 # might not be a module!).
324 if sub != '*' and not hasattr(package, sub):
325 subname = "%s.%s" % (package.__name__, sub)
326 submod = self._import_one(package, sub, subname)
327 if not submod:
328 raise ImportError("cannot import name " + subname)
330 def _do_import(self, parent, parts, fromlist):
331 """Attempt to import the module relative to parent.
333 This method is used when the import context specifies that <self>
334 imported the parent module.
336 top_name = parts[0]
337 top_fqname = parent.__name__ + '.' + top_name
338 top_module = self._import_one(parent, top_name, top_fqname)
339 if not top_module:
340 # this importer and parent could not find the module (relatively)
341 return None
343 return self._finish_import(top_module, parts[1:], fromlist)
345 ######################################################################
347 # METHODS TO OVERRIDE
349 def get_code(self, parent, modname, fqname):
350 """Find and retrieve the code for the given module.
352 parent specifies a parent module to define a context for importing. It
353 may be None, indicating no particular context for the search.
355 modname specifies a single module (not dotted) within the parent.
357 fqname specifies the fully-qualified module name. This is a
358 (potentially) dotted name from the "root" of the module namespace
359 down to the modname.
360 If there is no parent, then modname==fqname.
362 This method should return None, or a 3-tuple.
364 * If the module was not found, then None should be returned.
366 * The first item of the 2- or 3-tuple should be the integer 0 or 1,
367 specifying whether the module that was found is a package or not.
369 * The second item is the code object for the module (it will be
370 executed within the new module's namespace). This item can also
371 be a fully-loaded module object (e.g. loaded from a shared lib).
373 * The third item is a dictionary of name/value pairs that will be
374 inserted into new module before the code object is executed. This
375 is provided in case the module's code expects certain values (such
376 as where the module was found). When the second item is a module
377 object, then these names/values will be inserted *after* the module
378 has been loaded/initialized.
380 raise RuntimeError("get_code not implemented")
383 ######################################################################
385 # Some handy stuff for the Importers
388 # byte-compiled file suffix character
389 _suffix_char = __debug__ and 'c' or 'o'
391 # byte-compiled file suffix
392 _suffix = '.py' + _suffix_char
394 def _compile(pathname, timestamp):
395 """Compile (and cache) a Python source file.
397 The file specified by <pathname> is compiled to a code object and
398 returned.
400 Presuming the appropriate privileges exist, the bytecodes will be
401 saved back to the filesystem for future imports. The source file's
402 modification timestamp must be provided as a Long value.
404 codestring = open(pathname, 'rU').read()
405 if codestring and codestring[-1] != '\n':
406 codestring = codestring + '\n'
407 code = builtins.compile(codestring, pathname, 'exec')
409 # try to cache the compiled code
410 try:
411 f = open(pathname + _suffix_char, 'wb')
412 except IOError:
413 pass
414 else:
415 f.write('\0\0\0\0')
416 f.write(struct.pack('<I', timestamp))
417 marshal.dump(code, f)
418 f.flush()
419 f.seek(0, 0)
420 f.write(imp.get_magic())
421 f.close()
423 return code
425 _os_stat = _os_path_join = None
426 def _os_bootstrap():
427 "Set up 'os' module replacement functions for use during import bootstrap."
429 names = sys.builtin_module_names
431 join = None
432 if 'posix' in names:
433 sep = '/'
434 from posix import stat
435 elif 'nt' in names:
436 sep = '\\'
437 from nt import stat
438 elif 'dos' in names:
439 sep = '\\'
440 from dos import stat
441 elif 'os2' in names:
442 sep = '\\'
443 from os2 import stat
444 elif 'mac' in names:
445 from mac import stat
446 def join(a, b):
447 if a == '':
448 return b
449 if ':' not in a:
450 a = ':' + a
451 if a[-1:] != ':':
452 a = a + ':'
453 return a + b
454 else:
455 raise ImportError('no os specific module found')
457 if join is None:
458 def join(a, b, sep=sep):
459 if a == '':
460 return b
461 lastchar = a[-1:]
462 if lastchar == '/' or lastchar == sep:
463 return a + b
464 return a + sep + b
466 global _os_stat
467 _os_stat = stat
469 global _os_path_join
470 _os_path_join = join
472 def _os_path_isdir(pathname):
473 "Local replacement for os.path.isdir()."
474 try:
475 s = _os_stat(pathname)
476 except OSError:
477 return None
478 return (s.st_mode & 0o170000) == 0o040000
480 def _timestamp(pathname):
481 "Return the file modification time as a Long."
482 try:
483 s = _os_stat(pathname)
484 except OSError:
485 return None
486 return int(s.st_mtime)
489 ######################################################################
491 # Emulate the import mechanism for builtin and frozen modules
493 class BuiltinImporter(Importer):
494 def get_code(self, parent, modname, fqname):
495 if parent:
496 # these modules definitely do not occur within a package context
497 return None
499 # look for the module
500 if imp.is_builtin(modname):
501 type = imp.C_BUILTIN
502 elif imp.is_frozen(modname):
503 type = imp.PY_FROZEN
504 else:
505 # not found
506 return None
508 # got it. now load and return it.
509 module = imp.load_module(modname, None, modname, ('', '', type))
510 return 0, module, { }
513 ######################################################################
515 # Internal importer used for importing from the filesystem
517 class _FilesystemImporter(Importer):
518 def __init__(self):
519 self.suffixes = [ ]
521 def add_suffix(self, suffix, importFunc):
522 assert hasattr(importFunc, '__call__')
523 self.suffixes.append((suffix, importFunc))
525 def import_from_dir(self, dir, fqname):
526 result = self._import_pathname(_os_path_join(dir, fqname), fqname)
527 if result:
528 return self._process_result(result, fqname)
529 return None
531 def get_code(self, parent, modname, fqname):
532 # This importer is never used with an empty parent. Its existence is
533 # private to the ImportManager. The ImportManager uses the
534 # import_from_dir() method to import top-level modules/packages.
535 # This method is only used when we look for a module within a package.
536 assert parent
538 for submodule_path in parent.__path__:
539 code = self._import_pathname(_os_path_join(submodule_path, modname), fqname)
540 if code is not None:
541 return code
542 return self._import_pathname(_os_path_join(parent.__pkgdir__, modname),
543 fqname)
545 def _import_pathname(self, pathname, fqname):
546 if _os_path_isdir(pathname):
547 result = self._import_pathname(_os_path_join(pathname, '__init__'),
548 fqname)
549 if result:
550 values = result[2]
551 values['__pkgdir__'] = pathname
552 values['__path__'] = [ pathname ]
553 return 1, result[1], values
554 return None
556 for suffix, importFunc in self.suffixes:
557 filename = pathname + suffix
558 try:
559 finfo = _os_stat(filename)
560 except OSError:
561 pass
562 else:
563 return importFunc(filename, finfo, fqname)
564 return None
566 ######################################################################
568 # SUFFIX-BASED IMPORTERS
571 def py_suffix_importer(filename, finfo, fqname):
572 file = filename[:-3] + _suffix
573 t_py = int(finfo[8])
574 t_pyc = _timestamp(file)
576 code = None
577 if t_pyc is not None and t_pyc >= t_py:
578 f = open(file, 'rb')
579 if f.read(4) == imp.get_magic():
580 t = struct.unpack('<I', f.read(4))[0]
581 if t == t_py:
582 code = marshal.load(f)
583 f.close()
584 if code is None:
585 file = filename
586 code = _compile(file, t_py)
588 return 0, code, { '__file__' : file }
590 class DynLoadSuffixImporter:
591 def __init__(self, desc):
592 self.desc = desc
594 def import_file(self, filename, finfo, fqname):
595 fp = open(filename, self.desc[1])
596 module = imp.load_module(fqname, fp, filename, self.desc)
597 module.__file__ = filename
598 return 0, module, { }
601 ######################################################################
603 def _print_importers():
604 items = sys.modules.items()
605 items.sort()
606 for name, module in items:
607 if module:
608 print(name, module.__dict__.get('__importer__', '-- no importer'))
609 else:
610 print(name, '-- non-existent module')
612 def _test_revamp():
613 ImportManager().install()
614 sys.path.insert(0, BuiltinImporter())
616 ######################################################################
619 # TODO
621 # from Finn Bock:
622 # type(sys) is not a module in JPython. what to use instead?
623 # imp.C_EXTENSION is not in JPython. same for get_suffixes and new_module
625 # given foo.py of:
626 # import sys
627 # sys.modules['foo'] = sys
629 # ---- standard import mechanism
630 # >>> import foo
631 # >>> foo
632 # <module 'sys' (built-in)>
634 # ---- revamped import mechanism
635 # >>> import imputil
636 # >>> imputil._test_revamp()
637 # >>> import foo
638 # >>> foo
639 # <module 'foo' from 'foo.py'>
642 # from MAL:
643 # should BuiltinImporter exist in sys.path or hard-wired in ImportManager?
644 # need __path__ processing
645 # performance
646 # move chaining to a subclass [gjs: it's been nuked]
647 # deinstall should be possible
648 # query mechanism needed: is a specific Importer installed?
649 # py/pyc/pyo piping hooks to filter/process these files
650 # wish list:
651 # distutils importer hooked to list of standard Internet repositories
652 # module->file location mapper to speed FS-based imports
653 # relative imports
654 # keep chaining so that it can play nice with other import hooks
656 # from Gordon:
657 # push MAL's mapper into sys.path[0] as a cache (hard-coded for apps)
659 # from Guido:
660 # need hook for MAL's walk-me-up import strategy, or Tim's absolute strategy
661 # watch out for sys.modules[...] is None
662 # flag to force absolute imports? (speeds _determine_import_context and
663 # checking for a relative module)
664 # insert names of archives into sys.path (see quote below)
665 # shift import mechanisms and policies around; provide for hooks, overrides
666 # (see quote below)
667 # add get_source stuff
668 # get_topcode and get_subcode
669 # CRLF handling in _compile
670 # race condition in _compile
671 # refactoring of os.py to deal with _os_bootstrap problem
672 # any special handling to do for importing a module with a SyntaxError?
673 # (e.g. clean up the traceback)
674 # implement "domain" for path-type functionality using pkg namespace
675 # (rather than FS-names like __path__)
676 # don't use the word "private"... maybe "internal"
679 # Guido's comments on sys.path caching:
681 # We could cache this in a dictionary: the ImportManager can have a
682 # cache dict mapping pathnames to importer objects, and a separate
683 # method for coming up with an importer given a pathname that's not yet
684 # in the cache. The method should do a stat and/or look at the
685 # extension to decide which importer class to use; you can register new
686 # importer classes by registering a suffix or a Boolean function, plus a
687 # class. If you register a new importer class, the cache is zapped.
688 # The cache is independent from sys.path (but maintained per
689 # ImportManager instance) so that rearrangements of sys.path do the
690 # right thing. If a path is dropped from sys.path the corresponding
691 # cache entry is simply no longer used.
693 # My/Guido's comments on factoring ImportManager and Importer:
695 # > However, we still have a tension occurring here:
697 # > 1) implementing policy in ImportManager assists in single-point policy
698 # > changes for app situations
699 # > 2) implementing policy in Importer assists in package-private policy
700 # > changes for normal, operating conditions
702 # > I'll see if I can sort out a way to do this. Maybe the Importer class will
703 # > implement the methods (which can be overridden to change policy) by
704 # > delegating to ImportManager.
706 # Maybe also think about what kind of policies an Importer would be
707 # likely to want to change. I have a feeling that a lot of the code
708 # there is actually not so much policy but a *necessity* to get things
709 # working given the calling conventions for the __import__ hook: whether
710 # to return the head or tail of a dotted name, or when to do the "finish
711 # fromlist" stuff.