1 /* ------------------------------------------------------------------------
3 Python Codec Registry and support functions
5 Written by Marc-Andre Lemburg (mal@lemburg.com).
7 Copyright (c) Corporation for National Research Initiatives.
9 ------------------------------------------------------------------------ */
17 /* --- Globals ------------------------------------------------------------ */
19 static PyObject
*_PyCodec_SearchPath
;
20 static PyObject
*_PyCodec_SearchCache
;
22 /* Flag used for lazy import of the standard encodings package */
23 static int import_encodings_called
= 0;
25 /* --- Codec Registry ----------------------------------------------------- */
27 /* Import the standard encodings package which will register the first
28 codec search function.
30 This is done in a lazy way so that the Unicode implementation does
31 not downgrade startup time of scripts not needing it.
33 ImportErrors are silently ignored by this function. Only one try is
39 int import_encodings(void)
43 import_encodings_called
= 1;
44 mod
= PyImport_ImportModule("encodings");
46 if (PyErr_ExceptionMatches(PyExc_ImportError
)) {
47 /* Ignore ImportErrors... this is done so that
48 distributions can disable the encodings package. Note
49 that other errors are not masked, e.g. SystemErrors
50 raised to inform the user of an error in the Python
51 configuration are still reported back to the user. */
61 int PyCodec_Register(PyObject
*search_function
)
63 if (!import_encodings_called
) {
64 if (import_encodings())
67 if (search_function
== NULL
) {
71 if (!PyCallable_Check(search_function
)) {
72 PyErr_SetString(PyExc_TypeError
,
73 "argument must be callable");
76 return PyList_Append(_PyCodec_SearchPath
, search_function
);
82 /* Convert a string to a normalized Python string: all characters are
83 converted to lower case, spaces are replaced with underscores. */
86 PyObject
*normalizestring(const char *string
)
89 size_t len
= strlen(string
);
94 PyErr_SetString(PyExc_OverflowError
, "string is too large");
98 v
= PyString_FromStringAndSize(NULL
, (int)len
);
101 p
= PyString_AS_STRING(v
);
102 for (i
= 0; i
< len
; i
++) {
103 register char ch
= string
[i
];
113 /* Lookup the given encoding and return a tuple providing the codec
116 The encoding string is looked up converted to all lower-case
117 characters. This makes encodings looked up through this mechanism
118 effectively case-insensitive.
120 If no codec is found, a LookupError is set and NULL returned.
122 As side effect, this tries to load the encodings package, if not
123 yet done. This is part of the lazy load strategy for the encodings
128 PyObject
*_PyCodec_Lookup(const char *encoding
)
130 PyObject
*result
, *args
= NULL
, *v
;
133 if (encoding
== NULL
) {
137 if (_PyCodec_SearchCache
== NULL
||
138 _PyCodec_SearchPath
== NULL
) {
139 PyErr_SetString(PyExc_SystemError
,
140 "codec module not properly initialized");
143 if (!import_encodings_called
) {
144 if (import_encodings())
148 /* Convert the encoding to a normalized Python string: all
149 characters are converted to lower case, spaces and hyphens are
150 replaced with underscores. */
151 v
= normalizestring(encoding
);
154 PyString_InternInPlace(&v
);
156 /* First, try to lookup the name in the registry dictionary */
157 result
= PyDict_GetItem(_PyCodec_SearchCache
, v
);
158 if (result
!= NULL
) {
164 /* Next, scan the search functions in order of registration */
165 args
= PyTuple_New(1);
168 PyTuple_SET_ITEM(args
,0,v
);
170 len
= PyList_Size(_PyCodec_SearchPath
);
174 PyErr_SetString(PyExc_LookupError
,
175 "no codec search functions registered: "
176 "can't find encoding");
180 for (i
= 0; i
< len
; i
++) {
183 func
= PyList_GetItem(_PyCodec_SearchPath
, i
);
186 result
= PyEval_CallObject(func
, args
);
189 if (result
== Py_None
) {
193 if (!PyTuple_Check(result
) || PyTuple_GET_SIZE(result
) != 4) {
194 PyErr_SetString(PyExc_TypeError
,
195 "codec search functions must return 4-tuples");
202 /* XXX Perhaps we should cache misses too ? */
203 PyErr_SetString(PyExc_LookupError
,
208 /* Cache and return the result */
209 PyDict_SetItem(_PyCodec_SearchCache
, v
, result
);
219 PyObject
*args_tuple(PyObject
*object
,
224 args
= PyTuple_New(1 + (errors
!= NULL
));
228 PyTuple_SET_ITEM(args
,0,object
);
232 v
= PyString_FromString(errors
);
237 PyTuple_SET_ITEM(args
, 1, v
);
242 /* Build a codec by calling factory(stream[,errors]) or just
243 factory(errors) depending on whether the given parameters are
247 PyObject
*build_stream_codec(PyObject
*factory
,
251 PyObject
*args
, *codec
;
253 args
= args_tuple(stream
, errors
);
257 codec
= PyEval_CallObject(factory
, args
);
262 /* Convenience APIs to query the Codec registry.
264 All APIs return a codec object with incremented refcount.
268 PyObject
*PyCodec_Encoder(const char *encoding
)
273 codecs
= _PyCodec_Lookup(encoding
);
276 v
= PyTuple_GET_ITEM(codecs
,0);
284 PyObject
*PyCodec_Decoder(const char *encoding
)
289 codecs
= _PyCodec_Lookup(encoding
);
292 v
= PyTuple_GET_ITEM(codecs
,1);
300 PyObject
*PyCodec_StreamReader(const char *encoding
,
306 codecs
= _PyCodec_Lookup(encoding
);
309 return build_stream_codec(PyTuple_GET_ITEM(codecs
,2),stream
,errors
);
315 PyObject
*PyCodec_StreamWriter(const char *encoding
,
321 codecs
= _PyCodec_Lookup(encoding
);
324 return build_stream_codec(PyTuple_GET_ITEM(codecs
,3),stream
,errors
);
330 /* Encode an object (e.g. an Unicode object) using the given encoding
331 and return the resulting encoded object (usually a Python string).
333 errors is passed to the encoder factory as argument if non-NULL. */
335 PyObject
*PyCodec_Encode(PyObject
*object
,
336 const char *encoding
,
339 PyObject
*encoder
= NULL
;
340 PyObject
*args
= NULL
, *result
;
343 encoder
= PyCodec_Encoder(encoding
);
347 args
= args_tuple(object
, errors
);
351 result
= PyEval_CallObject(encoder
,args
);
355 if (!PyTuple_Check(result
) ||
356 PyTuple_GET_SIZE(result
) != 2) {
357 PyErr_SetString(PyExc_TypeError
,
358 "encoder must return a tuple (object,integer)");
361 v
= PyTuple_GET_ITEM(result
,0);
363 /* We don't check or use the second (integer) entry. */
376 /* Decode an object (usually a Python string) using the given encoding
377 and return an equivalent object (e.g. an Unicode object).
379 errors is passed to the decoder factory as argument if non-NULL. */
381 PyObject
*PyCodec_Decode(PyObject
*object
,
382 const char *encoding
,
385 PyObject
*decoder
= NULL
;
386 PyObject
*args
= NULL
, *result
= NULL
;
389 decoder
= PyCodec_Decoder(encoding
);
393 args
= args_tuple(object
, errors
);
397 result
= PyEval_CallObject(decoder
,args
);
400 if (!PyTuple_Check(result
) ||
401 PyTuple_GET_SIZE(result
) != 2) {
402 PyErr_SetString(PyExc_TypeError
,
403 "decoder must return a tuple (object,integer)");
406 v
= PyTuple_GET_ITEM(result
,0);
408 /* We don't check or use the second (integer) entry. */
422 void _PyCodecRegistry_Init(void)
424 if (_PyCodec_SearchPath
== NULL
)
425 _PyCodec_SearchPath
= PyList_New(0);
426 if (_PyCodec_SearchCache
== NULL
)
427 _PyCodec_SearchCache
= PyDict_New();
428 if (_PyCodec_SearchPath
== NULL
||
429 _PyCodec_SearchCache
== NULL
)
430 Py_FatalError("can't initialize codec registry");
433 void _PyCodecRegistry_Fini(void)
435 Py_XDECREF(_PyCodec_SearchPath
);
436 _PyCodec_SearchPath
= NULL
;
437 Py_XDECREF(_PyCodec_SearchCache
);
438 _PyCodec_SearchCache
= NULL
;