1 """ Standard "encodings" Package
3 Standard Python encoding modules are stored in this package
6 Codec modules must have names corresponding to normalized encoding
7 names as defined in the normalize_encoding() function below, e.g.
8 'utf-8' must be implemented by the module 'utf_8.py'.
10 Each codec module must export the following interface:
12 * getregentry() -> (encoder, decoder, stream_reader, stream_writer)
13 The getregentry() API must return callable objects which adhere to
14 the Python Codec Interface Standard.
16 In addition, a module may optionally also define the following
17 APIs which are then used by the package's codec search function:
19 * getaliases() -> sequence of encoding name strings to use as aliases
21 Alias names returned by getaliases() must be normalized encoding
22 names as defined by normalize_encoding().
24 Written by Marc-Andre Lemburg (mal@lemburg.com).
26 (c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
30 import codecs
, exceptions
, re
33 _unknown
= '--unknown--'
35 _norm_encoding_RE
= re
.compile('[^a-zA-Z0-9.]')
37 class CodecRegistryError(exceptions
.LookupError,
38 exceptions
.SystemError):
41 def normalize_encoding(encoding
):
43 """ Normalize an encoding name.
45 Normalization works as follows: all non-alphanumeric
46 characters except the dot used for Python package names are
47 collapsed and replaced with a single underscore, e.g. ' -;#'
51 return '_'.join(_norm_encoding_RE
.split(encoding
))
53 def search_function(encoding
):
56 entry
= _cache
.get(encoding
, _unknown
)
57 if entry
is not _unknown
:
62 # First look in the encodings package, then try to lookup the
63 # encoding in the aliases mapping and retry the import using the
64 # default import module lookup scheme with the alias name.
66 modname
= normalize_encoding(encoding
)
68 mod
= __import__('encodings.' + modname
,
69 globals(), locals(), _import_tail
)
72 modname
= (aliases
.aliases
.get(modname
) or
73 aliases
.aliases
.get(modname
.replace('.', '_')) or
76 mod
= __import__(modname
, globals(), locals(), _import_tail
)
81 getregentry
= mod
.getregentry
82 except AttributeError:
88 _cache
[encoding
] = None
91 # Now ask the module for the registry entry
92 entry
= tuple(getregentry())
94 raise CodecRegistryError
,\
95 'module "%s" (%s) failed to register' % \
96 (mod
.__name
__, mod
.__file
__)
99 raise CodecRegistryError
,\
100 'incompatible codecs in module "%s" (%s)' % \
101 (mod
.__name
__, mod
.__file
__)
103 # Cache the codec registry entry
104 _cache
[encoding
] = entry
106 # Register its aliases (without overwriting previously registered
109 codecaliases
= mod
.getaliases()
110 except AttributeError:
114 for alias
in codecaliases
:
115 if not aliases
.aliases
.has_key(alias
):
116 aliases
.aliases
[alias
] = modname
118 # Return the registry entry
121 # Register the search_function in the Python codec registry
122 codecs
.register(search_function
)