append(): Fixing the test for convertability after consultation with
[python/dscho.git] / Lib / encodings / __init__.py
blobb2542b8bd745e207783af49ba19545bdbedb0ced
1 """ Standard "encodings" Package
3 Standard Python encoding modules are stored in this package
4 directory.
6 Codec modules must have names corresponding to normalized encoding
7 names as defined in the normalize_encoding() function below, e.g.
8 'utf-8' must be implemented by the module 'utf_8.py'.
10 Each codec module must export the following interface:
12 * getregentry() -> (encoder, decoder, stream_reader, stream_writer)
13 The getregentry() API must return callable objects which adhere to
14 the Python Codec Interface Standard.
16 In addition, a module may optionally also define the following
17 APIs which are then used by the package's codec search function:
19 * getaliases() -> sequence of encoding name strings to use as aliases
21 Alias names returned by getaliases() must be normalized encoding
22 names as defined by normalize_encoding().
24 Written by Marc-Andre Lemburg (mal@lemburg.com).
26 (c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
28 """#"
30 import codecs, exceptions, re
32 _cache = {}
33 _unknown = '--unknown--'
34 _import_tail = ['*']
35 _norm_encoding_RE = re.compile('[^a-zA-Z0-9.]')
37 class CodecRegistryError(exceptions.LookupError,
38 exceptions.SystemError):
39 pass
41 def normalize_encoding(encoding):
43 """ Normalize an encoding name.
45 Normalization works as follows: all non-alphanumeric
46 characters except the dot used for Python package names are
47 collapsed and replaced with a single underscore, e.g. ' -;#'
48 becomes '_'.
50 """
51 return '_'.join(_norm_encoding_RE.split(encoding))
53 def search_function(encoding):
55 # Cache lookup
56 entry = _cache.get(encoding, _unknown)
57 if entry is not _unknown:
58 return entry
60 # Import the module:
62 # First look in the encodings package, then try to lookup the
63 # encoding in the aliases mapping and retry the import using the
64 # default import module lookup scheme with the alias name.
66 modname = normalize_encoding(encoding)
67 try:
68 mod = __import__('encodings.' + modname,
69 globals(), locals(), _import_tail)
70 except ImportError:
71 import aliases
72 modname = (aliases.aliases.get(modname) or
73 aliases.aliases.get(modname.replace('.', '_')) or
74 modname)
75 try:
76 mod = __import__(modname, globals(), locals(), _import_tail)
77 except ImportError:
78 mod = None
80 try:
81 getregentry = mod.getregentry
82 except AttributeError:
83 # Not a codec module
84 mod = None
86 if mod is None:
87 # Cache misses
88 _cache[encoding] = None
89 return None
91 # Now ask the module for the registry entry
92 entry = tuple(getregentry())
93 if len(entry) != 4:
94 raise CodecRegistryError,\
95 'module "%s" (%s) failed to register' % \
96 (mod.__name__, mod.__file__)
97 for obj in entry:
98 if not callable(obj):
99 raise CodecRegistryError,\
100 'incompatible codecs in module "%s" (%s)' % \
101 (mod.__name__, mod.__file__)
103 # Cache the codec registry entry
104 _cache[encoding] = entry
106 # Register its aliases (without overwriting previously registered
107 # aliases)
108 try:
109 codecaliases = mod.getaliases()
110 except AttributeError:
111 pass
112 else:
113 import aliases
114 for alias in codecaliases:
115 if not aliases.aliases.has_key(alias):
116 aliases.aliases[alias] = modname
118 # Return the registry entry
119 return entry
121 # Register the search_function in the Python codec registry
122 codecs.register(search_function)