1 /* ------------------------------------------------------------------------
3 Python Codec Registry and support functions
5 Written by Marc-Andre Lemburg (mal@lemburg.com).
7 Copyright (c) Corporation for National Research Initiatives.
9 ------------------------------------------------------------------------ */
14 /* --- Globals ------------------------------------------------------------ */
16 static PyObject
*_PyCodec_SearchPath
;
17 static PyObject
*_PyCodec_SearchCache
;
19 /* Flag used for lazy import of the standard encodings package */
20 static int import_encodings_called
= 0;
22 /* --- Codec Registry ----------------------------------------------------- */
24 /* Import the standard encodings package which will register the first
25 codec search function.
27 This is done in a lazy way so that the Unicode implementation does
28 not downgrade startup time of scripts not needing it.
30 ImportErrors are silently ignored by this function. Only one try is
36 int import_encodings(void)
40 import_encodings_called
= 1;
41 mod
= PyImport_ImportModule("encodings");
43 if (PyErr_ExceptionMatches(PyExc_ImportError
)) {
44 /* Ignore ImportErrors... this is done so that
45 distributions can disable the encodings package. Note
46 that other errors are not masked, e.g. SystemErrors
47 raised to inform the user of an error in the Python
48 configuration are still reported back to the user. */
58 int PyCodec_Register(PyObject
*search_function
)
60 if (!import_encodings_called
) {
61 if (import_encodings())
64 if (search_function
== NULL
) {
68 if (!PyCallable_Check(search_function
)) {
69 PyErr_SetString(PyExc_TypeError
,
70 "argument must be callable");
73 return PyList_Append(_PyCodec_SearchPath
, search_function
);
79 /* Convert a string to a normalized Python string: all characters are
80 converted to lower case, spaces are replaced with underscores. */
83 PyObject
*normalizestring(const char *string
)
86 size_t len
= strlen(string
);
91 PyErr_SetString(PyExc_OverflowError
, "string is too large");
95 v
= PyString_FromStringAndSize(NULL
, (int)len
);
98 p
= PyString_AS_STRING(v
);
99 for (i
= 0; i
< len
; i
++) {
100 register char ch
= string
[i
];
110 /* Lookup the given encoding and return a tuple providing the codec
113 The encoding string is looked up converted to all lower-case
114 characters. This makes encodings looked up through this mechanism
115 effectively case-insensitive.
117 If no codec is found, a LookupError is set and NULL returned.
119 As side effect, this tries to load the encodings package, if not
120 yet done. This is part of the lazy load strategy for the encodings
125 PyObject
*_PyCodec_Lookup(const char *encoding
)
127 PyObject
*result
, *args
= NULL
, *v
;
130 if (encoding
== NULL
) {
134 if (_PyCodec_SearchCache
== NULL
||
135 _PyCodec_SearchPath
== NULL
) {
136 PyErr_SetString(PyExc_SystemError
,
137 "codec module not properly initialized");
140 if (!import_encodings_called
) {
141 if (import_encodings())
145 /* Convert the encoding to a normalized Python string: all
146 characters are converted to lower case, spaces and hyphens are
147 replaced with underscores. */
148 v
= normalizestring(encoding
);
151 PyString_InternInPlace(&v
);
153 /* First, try to lookup the name in the registry dictionary */
154 result
= PyDict_GetItem(_PyCodec_SearchCache
, v
);
155 if (result
!= NULL
) {
161 /* Next, scan the search functions in order of registration */
162 args
= PyTuple_New(1);
165 PyTuple_SET_ITEM(args
,0,v
);
167 len
= PyList_Size(_PyCodec_SearchPath
);
171 PyErr_SetString(PyExc_LookupError
,
172 "no codec search functions registered: "
173 "can't find encoding");
177 for (i
= 0; i
< len
; i
++) {
180 func
= PyList_GetItem(_PyCodec_SearchPath
, i
);
183 result
= PyEval_CallObject(func
, args
);
186 if (result
== Py_None
) {
190 if (!PyTuple_Check(result
) || PyTuple_GET_SIZE(result
) != 4) {
191 PyErr_SetString(PyExc_TypeError
,
192 "codec search functions must return 4-tuples");
199 /* XXX Perhaps we should cache misses too ? */
200 PyErr_SetString(PyExc_LookupError
,
205 /* Cache and return the result */
206 PyDict_SetItem(_PyCodec_SearchCache
, v
, result
);
216 PyObject
*args_tuple(PyObject
*object
,
221 args
= PyTuple_New(1 + (errors
!= NULL
));
225 PyTuple_SET_ITEM(args
,0,object
);
229 v
= PyString_FromString(errors
);
234 PyTuple_SET_ITEM(args
, 1, v
);
239 /* Build a codec by calling factory(stream[,errors]) or just
240 factory(errors) depending on whether the given parameters are
244 PyObject
*build_stream_codec(PyObject
*factory
,
248 PyObject
*args
, *codec
;
250 args
= args_tuple(stream
, errors
);
254 codec
= PyEval_CallObject(factory
, args
);
259 /* Convenience APIs to query the Codec registry.
261 All APIs return a codec object with incremented refcount.
265 PyObject
*PyCodec_Encoder(const char *encoding
)
270 codecs
= _PyCodec_Lookup(encoding
);
273 v
= PyTuple_GET_ITEM(codecs
,0);
281 PyObject
*PyCodec_Decoder(const char *encoding
)
286 codecs
= _PyCodec_Lookup(encoding
);
289 v
= PyTuple_GET_ITEM(codecs
,1);
297 PyObject
*PyCodec_StreamReader(const char *encoding
,
303 codecs
= _PyCodec_Lookup(encoding
);
306 return build_stream_codec(PyTuple_GET_ITEM(codecs
,2),stream
,errors
);
312 PyObject
*PyCodec_StreamWriter(const char *encoding
,
318 codecs
= _PyCodec_Lookup(encoding
);
321 return build_stream_codec(PyTuple_GET_ITEM(codecs
,3),stream
,errors
);
327 /* Encode an object (e.g. an Unicode object) using the given encoding
328 and return the resulting encoded object (usually a Python string).
330 errors is passed to the encoder factory as argument if non-NULL. */
332 PyObject
*PyCodec_Encode(PyObject
*object
,
333 const char *encoding
,
336 PyObject
*encoder
= NULL
;
337 PyObject
*args
= NULL
, *result
;
340 encoder
= PyCodec_Encoder(encoding
);
344 args
= args_tuple(object
, errors
);
348 result
= PyEval_CallObject(encoder
,args
);
352 if (!PyTuple_Check(result
) ||
353 PyTuple_GET_SIZE(result
) != 2) {
354 PyErr_SetString(PyExc_TypeError
,
355 "encoder must return a tuple (object,integer)");
358 v
= PyTuple_GET_ITEM(result
,0);
360 /* We don't check or use the second (integer) entry. */
373 /* Decode an object (usually a Python string) using the given encoding
374 and return an equivalent object (e.g. an Unicode object).
376 errors is passed to the decoder factory as argument if non-NULL. */
378 PyObject
*PyCodec_Decode(PyObject
*object
,
379 const char *encoding
,
382 PyObject
*decoder
= NULL
;
383 PyObject
*args
= NULL
, *result
= NULL
;
386 decoder
= PyCodec_Decoder(encoding
);
390 args
= args_tuple(object
, errors
);
394 result
= PyEval_CallObject(decoder
,args
);
397 if (!PyTuple_Check(result
) ||
398 PyTuple_GET_SIZE(result
) != 2) {
399 PyErr_SetString(PyExc_TypeError
,
400 "decoder must return a tuple (object,integer)");
403 v
= PyTuple_GET_ITEM(result
,0);
405 /* We don't check or use the second (integer) entry. */
419 void _PyCodecRegistry_Init(void)
421 if (_PyCodec_SearchPath
== NULL
)
422 _PyCodec_SearchPath
= PyList_New(0);
423 if (_PyCodec_SearchCache
== NULL
)
424 _PyCodec_SearchCache
= PyDict_New();
425 if (_PyCodec_SearchPath
== NULL
||
426 _PyCodec_SearchCache
== NULL
)
427 Py_FatalError("can't initialize codec registry");
430 void _PyCodecRegistry_Fini(void)
432 Py_XDECREF(_PyCodec_SearchPath
);
433 _PyCodec_SearchPath
= NULL
;
434 Py_XDECREF(_PyCodec_SearchCache
);
435 _PyCodec_SearchCache
= NULL
;