Bump version to 0.9.1.
[python/dscho.git] / Python / codecs.c
blobc3f93ccda952a9ca8825a7afe980961d572ea5b0
1 /* ------------------------------------------------------------------------
3 Python Codec Registry and support functions
5 Written by Marc-Andre Lemburg (mal@lemburg.com).
7 Copyright (c) Corporation for National Research Initiatives.
9 ------------------------------------------------------------------------ */
11 #include "Python.h"
12 #include <ctype.h>
13 #ifdef HAVE_LIMITS_H
14 #include <limits.h>
15 #endif
17 /* --- Globals ------------------------------------------------------------ */
19 static PyObject *_PyCodec_SearchPath;
20 static PyObject *_PyCodec_SearchCache;
22 /* Flag used for lazy import of the standard encodings package */
23 static int import_encodings_called = 0;
25 /* --- Codec Registry ----------------------------------------------------- */
27 /* Import the standard encodings package which will register the first
28 codec search function.
30 This is done in a lazy way so that the Unicode implementation does
31 not downgrade startup time of scripts not needing it.
33 ImportErrors are silently ignored by this function. Only one try is
34 made.
38 static
39 int import_encodings(void)
41 PyObject *mod;
43 import_encodings_called = 1;
44 mod = PyImport_ImportModule("encodings");
45 if (mod == NULL) {
46 if (PyErr_ExceptionMatches(PyExc_ImportError)) {
47 /* Ignore ImportErrors... this is done so that
48 distributions can disable the encodings package. Note
49 that other errors are not masked, e.g. SystemErrors
50 raised to inform the user of an error in the Python
51 configuration are still reported back to the user. */
52 PyErr_Clear();
53 return 0;
55 return -1;
57 Py_DECREF(mod);
58 return 0;
61 int PyCodec_Register(PyObject *search_function)
63 if (!import_encodings_called) {
64 if (import_encodings())
65 goto onError;
67 if (search_function == NULL) {
68 PyErr_BadArgument();
69 goto onError;
71 if (!PyCallable_Check(search_function)) {
72 PyErr_SetString(PyExc_TypeError,
73 "argument must be callable");
74 goto onError;
76 return PyList_Append(_PyCodec_SearchPath, search_function);
78 onError:
79 return -1;
82 /* Convert a string to a normalized Python string: all characters are
83 converted to lower case, spaces are replaced with underscores. */
85 static
86 PyObject *normalizestring(const char *string)
88 register size_t i;
89 size_t len = strlen(string);
90 char *p;
91 PyObject *v;
93 if (len > INT_MAX) {
94 PyErr_SetString(PyExc_OverflowError, "string is too large");
95 return NULL;
98 v = PyString_FromStringAndSize(NULL, (int)len);
99 if (v == NULL)
100 return NULL;
101 p = PyString_AS_STRING(v);
102 for (i = 0; i < len; i++) {
103 register char ch = string[i];
104 if (ch == ' ')
105 ch = '-';
106 else
107 ch = tolower(ch);
108 p[i] = ch;
110 return v;
113 /* Lookup the given encoding and return a tuple providing the codec
114 facilities.
116 The encoding string is looked up converted to all lower-case
117 characters. This makes encodings looked up through this mechanism
118 effectively case-insensitive.
120 If no codec is found, a LookupError is set and NULL returned.
122 As side effect, this tries to load the encodings package, if not
123 yet done. This is part of the lazy load strategy for the encodings
124 package.
128 PyObject *_PyCodec_Lookup(const char *encoding)
130 PyObject *result, *args = NULL, *v;
131 int i, len;
133 if (encoding == NULL) {
134 PyErr_BadArgument();
135 goto onError;
137 if (_PyCodec_SearchCache == NULL ||
138 _PyCodec_SearchPath == NULL) {
139 PyErr_SetString(PyExc_SystemError,
140 "codec module not properly initialized");
141 goto onError;
143 if (!import_encodings_called) {
144 if (import_encodings())
145 goto onError;
148 /* Convert the encoding to a normalized Python string: all
149 characters are converted to lower case, spaces and hyphens are
150 replaced with underscores. */
151 v = normalizestring(encoding);
152 if (v == NULL)
153 goto onError;
154 PyString_InternInPlace(&v);
156 /* First, try to lookup the name in the registry dictionary */
157 result = PyDict_GetItem(_PyCodec_SearchCache, v);
158 if (result != NULL) {
159 Py_INCREF(result);
160 Py_DECREF(v);
161 return result;
164 /* Next, scan the search functions in order of registration */
165 args = PyTuple_New(1);
166 if (args == NULL)
167 goto onError;
168 PyTuple_SET_ITEM(args,0,v);
170 len = PyList_Size(_PyCodec_SearchPath);
171 if (len < 0)
172 goto onError;
173 if (len == 0) {
174 PyErr_SetString(PyExc_LookupError,
175 "no codec search functions registered: "
176 "can't find encoding");
177 goto onError;
180 for (i = 0; i < len; i++) {
181 PyObject *func;
183 func = PyList_GetItem(_PyCodec_SearchPath, i);
184 if (func == NULL)
185 goto onError;
186 result = PyEval_CallObject(func, args);
187 if (result == NULL)
188 goto onError;
189 if (result == Py_None) {
190 Py_DECREF(result);
191 continue;
193 if (!PyTuple_Check(result) || PyTuple_GET_SIZE(result) != 4) {
194 PyErr_SetString(PyExc_TypeError,
195 "codec search functions must return 4-tuples");
196 Py_DECREF(result);
197 goto onError;
199 break;
201 if (i == len) {
202 /* XXX Perhaps we should cache misses too ? */
203 PyErr_SetString(PyExc_LookupError,
204 "unknown encoding");
205 goto onError;
208 /* Cache and return the result */
209 PyDict_SetItem(_PyCodec_SearchCache, v, result);
210 Py_DECREF(args);
211 return result;
213 onError:
214 Py_XDECREF(args);
215 return NULL;
218 static
219 PyObject *args_tuple(PyObject *object,
220 const char *errors)
222 PyObject *args;
224 args = PyTuple_New(1 + (errors != NULL));
225 if (args == NULL)
226 return NULL;
227 Py_INCREF(object);
228 PyTuple_SET_ITEM(args,0,object);
229 if (errors) {
230 PyObject *v;
232 v = PyString_FromString(errors);
233 if (v == NULL) {
234 Py_DECREF(args);
235 return NULL;
237 PyTuple_SET_ITEM(args, 1, v);
239 return args;
242 /* Build a codec by calling factory(stream[,errors]) or just
243 factory(errors) depending on whether the given parameters are
244 non-NULL. */
246 static
247 PyObject *build_stream_codec(PyObject *factory,
248 PyObject *stream,
249 const char *errors)
251 PyObject *args, *codec;
253 args = args_tuple(stream, errors);
254 if (args == NULL)
255 return NULL;
257 codec = PyEval_CallObject(factory, args);
258 Py_DECREF(args);
259 return codec;
262 /* Convenience APIs to query the Codec registry.
264 All APIs return a codec object with incremented refcount.
268 PyObject *PyCodec_Encoder(const char *encoding)
270 PyObject *codecs;
271 PyObject *v;
273 codecs = _PyCodec_Lookup(encoding);
274 if (codecs == NULL)
275 goto onError;
276 v = PyTuple_GET_ITEM(codecs,0);
277 Py_INCREF(v);
278 return v;
280 onError:
281 return NULL;
284 PyObject *PyCodec_Decoder(const char *encoding)
286 PyObject *codecs;
287 PyObject *v;
289 codecs = _PyCodec_Lookup(encoding);
290 if (codecs == NULL)
291 goto onError;
292 v = PyTuple_GET_ITEM(codecs,1);
293 Py_INCREF(v);
294 return v;
296 onError:
297 return NULL;
300 PyObject *PyCodec_StreamReader(const char *encoding,
301 PyObject *stream,
302 const char *errors)
304 PyObject *codecs;
306 codecs = _PyCodec_Lookup(encoding);
307 if (codecs == NULL)
308 goto onError;
309 return build_stream_codec(PyTuple_GET_ITEM(codecs,2),stream,errors);
311 onError:
312 return NULL;
315 PyObject *PyCodec_StreamWriter(const char *encoding,
316 PyObject *stream,
317 const char *errors)
319 PyObject *codecs;
321 codecs = _PyCodec_Lookup(encoding);
322 if (codecs == NULL)
323 goto onError;
324 return build_stream_codec(PyTuple_GET_ITEM(codecs,3),stream,errors);
326 onError:
327 return NULL;
330 /* Encode an object (e.g. an Unicode object) using the given encoding
331 and return the resulting encoded object (usually a Python string).
333 errors is passed to the encoder factory as argument if non-NULL. */
335 PyObject *PyCodec_Encode(PyObject *object,
336 const char *encoding,
337 const char *errors)
339 PyObject *encoder = NULL;
340 PyObject *args = NULL, *result;
341 PyObject *v;
343 encoder = PyCodec_Encoder(encoding);
344 if (encoder == NULL)
345 goto onError;
347 args = args_tuple(object, errors);
348 if (args == NULL)
349 goto onError;
351 result = PyEval_CallObject(encoder,args);
352 if (result == NULL)
353 goto onError;
355 if (!PyTuple_Check(result) ||
356 PyTuple_GET_SIZE(result) != 2) {
357 PyErr_SetString(PyExc_TypeError,
358 "encoder must return a tuple (object,integer)");
359 goto onError;
361 v = PyTuple_GET_ITEM(result,0);
362 Py_INCREF(v);
363 /* We don't check or use the second (integer) entry. */
365 Py_DECREF(args);
366 Py_DECREF(encoder);
367 Py_DECREF(result);
368 return v;
370 onError:
371 Py_XDECREF(args);
372 Py_XDECREF(encoder);
373 return NULL;
376 /* Decode an object (usually a Python string) using the given encoding
377 and return an equivalent object (e.g. an Unicode object).
379 errors is passed to the decoder factory as argument if non-NULL. */
381 PyObject *PyCodec_Decode(PyObject *object,
382 const char *encoding,
383 const char *errors)
385 PyObject *decoder = NULL;
386 PyObject *args = NULL, *result = NULL;
387 PyObject *v;
389 decoder = PyCodec_Decoder(encoding);
390 if (decoder == NULL)
391 goto onError;
393 args = args_tuple(object, errors);
394 if (args == NULL)
395 goto onError;
397 result = PyEval_CallObject(decoder,args);
398 if (result == NULL)
399 goto onError;
400 if (!PyTuple_Check(result) ||
401 PyTuple_GET_SIZE(result) != 2) {
402 PyErr_SetString(PyExc_TypeError,
403 "decoder must return a tuple (object,integer)");
404 goto onError;
406 v = PyTuple_GET_ITEM(result,0);
407 Py_INCREF(v);
408 /* We don't check or use the second (integer) entry. */
410 Py_DECREF(args);
411 Py_DECREF(decoder);
412 Py_DECREF(result);
413 return v;
415 onError:
416 Py_XDECREF(args);
417 Py_XDECREF(decoder);
418 Py_XDECREF(result);
419 return NULL;
422 void _PyCodecRegistry_Init(void)
424 if (_PyCodec_SearchPath == NULL)
425 _PyCodec_SearchPath = PyList_New(0);
426 if (_PyCodec_SearchCache == NULL)
427 _PyCodec_SearchCache = PyDict_New();
428 if (_PyCodec_SearchPath == NULL ||
429 _PyCodec_SearchCache == NULL)
430 Py_FatalError("can't initialize codec registry");
433 void _PyCodecRegistry_Fini(void)
435 Py_XDECREF(_PyCodec_SearchPath);
436 _PyCodec_SearchPath = NULL;
437 Py_XDECREF(_PyCodec_SearchCache);
438 _PyCodec_SearchCache = NULL;