Lib/encodings/__init__.py

   1 """ Standard "encodings" Package
   2
   3     Standard Python encoding modules are stored in this package
   4     directory.
   5
   6     Codec modules must have names corresponding to normalized encoding
   7     names as defined in the normalize_encoding() function below, e.g.
   8     'utf-8' must be implemented by the module 'utf_8.py'.
   9
  10     Each codec module must export the following interface:
  11
  12     * getregentry() -> (encoder, decoder, stream_reader, stream_writer)
  13     The getregentry() API must return callable objects which adhere to
  14     the Python Codec Interface Standard.
  15
  16     In addition, a module may optionally also define the following
  17     APIs which are then used by the package's codec search function:
  18
  19     * getaliases() -> sequence of encoding name strings to use as aliases
  20
  21     Alias names returned by getaliases() must be normalized encoding
  22     names as defined by normalize_encoding().
  23
  24 Written by Marc-Andre Lemburg (mal@lemburg.com).
  25
  26 (c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
  27
  28 """#"
  29
  30 import codecs, exceptions, re
  31
  32 _cache = {}
  33 _unknown = '--unknown--'
  34 _import_tail = ['*']
  35 _norm_encoding_RE = re.compile('[^a-zA-Z0-9.]')
  36
  37 class CodecRegistryError(exceptions.LookupError,
  38                          exceptions.SystemError):
  39     pass
  40
  41 def normalize_encoding(encoding):
  42
  43     """ Normalize an encoding name.
  44
  45         Normalization works as follows: all non-alphanumeric
  46         characters except the dot used for Python package names are
  47         collapsed and replaced with a single underscore, e.g. '  -;#'
  48         becomes '_'.
  49
  50     """
  51     return '_'.join(_norm_encoding_RE.split(encoding))
  52
  53 def search_function(encoding):
  54
  55     # Cache lookup
  56     entry = _cache.get(encoding, _unknown)
  57     if entry is not _unknown:
  58         return entry
  59
  60     # Import the module:
  61     #
  62     # First look in the encodings package, then try to lookup the
  63     # encoding in the aliases mapping and retry the import using the
  64     # default import module lookup scheme with the alias name.
  65     #
  66     modname = normalize_encoding(encoding)
  67     try:
  68         mod = __import__('encodings.' + modname,
  69                          globals(), locals(), _import_tail)
  70     except ImportError:
  71         import aliases
  72         modname = (aliases.aliases.get(modname) or
  73                    aliases.aliases.get(modname.replace('.', '_')) or
  74                    modname)
  75         try:
  76             mod = __import__(modname, globals(), locals(), _import_tail)
  77         except ImportError:
  78             mod = None
  79
  80     try:
  81         getregentry = mod.getregentry
  82     except AttributeError:
  83         # Not a codec module
  84         mod = None
  85
  86     if mod is None:
  87         # Cache misses
  88         _cache[encoding] = None
  89         return None
  90
  91     # Now ask the module for the registry entry
  92     entry = tuple(getregentry())
  93     if len(entry) != 4:
  94         raise CodecRegistryError,\
  95               'module "%s" (%s) failed to register' % \
  96               (mod.__name__, mod.__file__)
  97     for obj in entry:
  98         if not callable(obj):
  99             raise CodecRegistryError,\
 100                   'incompatible codecs in module "%s" (%s)' % \
 101                   (mod.__name__, mod.__file__)
 102
 103     # Cache the codec registry entry
 104     _cache[encoding] = entry
 105
 106     # Register its aliases (without overwriting previously registered
 107     # aliases)
 108     try:
 109         codecaliases = mod.getaliases()
 110     except AttributeError:
 111         pass
 112     else:
 113         import aliases
 114         for alias in codecaliases:
 115             if not aliases.aliases.has_key(alias):
 116                 aliases.aliases[alias] = modname
 117
 118     # Return the registry entry
 119     return entry
 120
 121 # Register the search_function in the Python codec registry
 122 codecs.register(search_function)