Apparently the code to forestall Tk eating events was too aggressive (Tk user input...
[python/dscho.git] / Modules / _codecsmodule.c
blob37d89e99ee5317513f648353f457dd9ef7b29e1d
1 /* ------------------------------------------------------------------------
3 _codecs -- Provides access to the codec registry and the builtin
4 codecs.
6 This module should never be imported directly. The standard library
7 module "codecs" wraps this builtin module for use within Python.
9 The codec registry is accessible via:
11 register(search_function) -> None
13 lookup(encoding) -> (encoder, decoder, stream_reader, stream_writer)
15 The builtin Unicode codecs use the following interface:
17 <encoding>_encode(Unicode_object[,errors='strict']) ->
18 (string object, bytes consumed)
20 <encoding>_decode(char_buffer_obj[,errors='strict']) ->
21 (Unicode object, bytes consumed)
23 <encoding>_encode() interfaces also accept non-Unicode object as
24 input. The objects are then converted to Unicode using
25 PyUnicode_FromObject() prior to applying the conversion.
27 These <encoding>s are available: utf_8, unicode_escape,
28 raw_unicode_escape, unicode_internal, latin_1, ascii (7-bit),
29 mbcs (on win32).
32 Written by Marc-Andre Lemburg (mal@lemburg.com).
34 Copyright (c) Corporation for National Research Initiatives.
36 ------------------------------------------------------------------------ */
38 #include "Python.h"
40 /* --- Registry ----------------------------------------------------------- */
42 static
43 PyObject *codecregister(PyObject *self, PyObject *args)
45 PyObject *search_function;
47 if (!PyArg_ParseTuple(args, "O:register", &search_function))
48 goto onError;
50 if (PyCodec_Register(search_function))
51 goto onError;
53 Py_INCREF(Py_None);
54 return Py_None;
56 onError:
57 return NULL;
60 static
61 PyObject *codeclookup(PyObject *self, PyObject *args)
63 char *encoding;
65 if (!PyArg_ParseTuple(args, "s:lookup", &encoding))
66 goto onError;
68 return _PyCodec_Lookup(encoding);
70 onError:
71 return NULL;
74 /* --- Helpers ------------------------------------------------------------ */
76 static
77 PyObject *codec_tuple(PyObject *unicode,
78 int len)
80 PyObject *v,*w;
82 if (unicode == NULL)
83 return NULL;
84 v = PyTuple_New(2);
85 if (v == NULL) {
86 Py_DECREF(unicode);
87 return NULL;
89 PyTuple_SET_ITEM(v,0,unicode);
90 w = PyInt_FromLong(len);
91 if (w == NULL) {
92 Py_DECREF(v);
93 return NULL;
95 PyTuple_SET_ITEM(v,1,w);
96 return v;
99 /* --- Decoder ------------------------------------------------------------ */
101 static PyObject *
102 unicode_internal_decode(PyObject *self,
103 PyObject *args)
105 PyObject *obj;
106 const char *errors = NULL;
107 const char *data;
108 int size;
110 if (!PyArg_ParseTuple(args, "O|z:unicode_internal_decode",
111 &obj, &errors))
112 return NULL;
114 if (PyUnicode_Check(obj))
115 return codec_tuple(obj, PyUnicode_GET_SIZE(obj));
116 else {
117 if (PyObject_AsReadBuffer(obj, (const void **)&data, &size))
118 return NULL;
119 return codec_tuple(PyUnicode_FromUnicode((Py_UNICODE *)data,
120 size / sizeof(Py_UNICODE)),
121 size);
125 static PyObject *
126 utf_8_decode(PyObject *self,
127 PyObject *args)
129 const char *data;
130 int size;
131 const char *errors = NULL;
133 if (!PyArg_ParseTuple(args, "t#|z:utf_8_decode",
134 &data, &size, &errors))
135 return NULL;
137 return codec_tuple(PyUnicode_DecodeUTF8(data, size, errors),
138 size);
141 static PyObject *
142 utf_16_decode(PyObject *self,
143 PyObject *args)
145 const char *data;
146 int size;
147 const char *errors = NULL;
148 int byteorder = 0;
150 if (!PyArg_ParseTuple(args, "t#|z:utf_16_decode",
151 &data, &size, &errors))
152 return NULL;
153 return codec_tuple(PyUnicode_DecodeUTF16(data, size, errors, &byteorder),
154 size);
157 static PyObject *
158 utf_16_le_decode(PyObject *self,
159 PyObject *args)
161 const char *data;
162 int size;
163 const char *errors = NULL;
164 int byteorder = -1;
166 if (!PyArg_ParseTuple(args, "t#|z:utf_16_le_decode",
167 &data, &size, &errors))
168 return NULL;
169 return codec_tuple(PyUnicode_DecodeUTF16(data, size, errors, &byteorder),
170 size);
173 static PyObject *
174 utf_16_be_decode(PyObject *self,
175 PyObject *args)
177 const char *data;
178 int size;
179 const char *errors = NULL;
180 int byteorder = 1;
182 if (!PyArg_ParseTuple(args, "t#|z:utf_16_be_decode",
183 &data, &size, &errors))
184 return NULL;
185 return codec_tuple(PyUnicode_DecodeUTF16(data, size, errors, &byteorder),
186 size);
189 /* This non-standard version also provides access to the byteorder
190 parameter of the builtin UTF-16 codec.
192 It returns a tuple (unicode, bytesread, byteorder) with byteorder
193 being the value in effect at the end of data.
197 static PyObject *
198 utf_16_ex_decode(PyObject *self,
199 PyObject *args)
201 const char *data;
202 int size;
203 const char *errors = NULL;
204 int byteorder = 0;
205 PyObject *unicode, *tuple;
207 if (!PyArg_ParseTuple(args, "t#|zi:utf_16_ex_decode",
208 &data, &size, &errors, &byteorder))
209 return NULL;
211 unicode = PyUnicode_DecodeUTF16(data, size, errors, &byteorder);
212 if (unicode == NULL)
213 return NULL;
214 tuple = Py_BuildValue("Oii", unicode, size, byteorder);
215 Py_DECREF(unicode);
216 return tuple;
219 static PyObject *
220 unicode_escape_decode(PyObject *self,
221 PyObject *args)
223 const char *data;
224 int size;
225 const char *errors = NULL;
227 if (!PyArg_ParseTuple(args, "t#|z:unicode_escape_decode",
228 &data, &size, &errors))
229 return NULL;
231 return codec_tuple(PyUnicode_DecodeUnicodeEscape(data, size, errors),
232 size);
235 static PyObject *
236 raw_unicode_escape_decode(PyObject *self,
237 PyObject *args)
239 const char *data;
240 int size;
241 const char *errors = NULL;
243 if (!PyArg_ParseTuple(args, "t#|z:raw_unicode_escape_decode",
244 &data, &size, &errors))
245 return NULL;
247 return codec_tuple(PyUnicode_DecodeRawUnicodeEscape(data, size, errors),
248 size);
251 static PyObject *
252 latin_1_decode(PyObject *self,
253 PyObject *args)
255 const char *data;
256 int size;
257 const char *errors = NULL;
259 if (!PyArg_ParseTuple(args, "t#|z:latin_1_decode",
260 &data, &size, &errors))
261 return NULL;
263 return codec_tuple(PyUnicode_DecodeLatin1(data, size, errors),
264 size);
267 static PyObject *
268 ascii_decode(PyObject *self,
269 PyObject *args)
271 const char *data;
272 int size;
273 const char *errors = NULL;
275 if (!PyArg_ParseTuple(args, "t#|z:ascii_decode",
276 &data, &size, &errors))
277 return NULL;
279 return codec_tuple(PyUnicode_DecodeASCII(data, size, errors),
280 size);
283 static PyObject *
284 charmap_decode(PyObject *self,
285 PyObject *args)
287 const char *data;
288 int size;
289 const char *errors = NULL;
290 PyObject *mapping = NULL;
292 if (!PyArg_ParseTuple(args, "t#|zO:charmap_decode",
293 &data, &size, &errors, &mapping))
294 return NULL;
295 if (mapping == Py_None)
296 mapping = NULL;
298 return codec_tuple(PyUnicode_DecodeCharmap(data, size, mapping, errors),
299 size);
302 #ifdef MS_WIN32
304 static PyObject *
305 mbcs_decode(PyObject *self,
306 PyObject *args)
308 const char *data;
309 int size;
310 const char *errors = NULL;
312 if (!PyArg_ParseTuple(args, "t#|z:mbcs_decode",
313 &data, &size, &errors))
314 return NULL;
316 return codec_tuple(PyUnicode_DecodeMBCS(data, size, errors),
317 size);
320 #endif /* MS_WIN32 */
322 /* --- Encoder ------------------------------------------------------------ */
324 static PyObject *
325 readbuffer_encode(PyObject *self,
326 PyObject *args)
328 const char *data;
329 int size;
330 const char *errors = NULL;
332 if (!PyArg_ParseTuple(args, "s#|z:readbuffer_encode",
333 &data, &size, &errors))
334 return NULL;
336 return codec_tuple(PyString_FromStringAndSize(data, size),
337 size);
340 static PyObject *
341 charbuffer_encode(PyObject *self,
342 PyObject *args)
344 const char *data;
345 int size;
346 const char *errors = NULL;
348 if (!PyArg_ParseTuple(args, "t#|z:charbuffer_encode",
349 &data, &size, &errors))
350 return NULL;
352 return codec_tuple(PyString_FromStringAndSize(data, size),
353 size);
356 static PyObject *
357 unicode_internal_encode(PyObject *self,
358 PyObject *args)
360 PyObject *obj;
361 const char *errors = NULL;
362 const char *data;
363 int size;
365 if (!PyArg_ParseTuple(args, "O|z:unicode_internal_encode",
366 &obj, &errors))
367 return NULL;
369 if (PyUnicode_Check(obj)) {
370 data = PyUnicode_AS_DATA(obj);
371 size = PyUnicode_GET_DATA_SIZE(obj);
372 return codec_tuple(PyString_FromStringAndSize(data, size),
373 size);
375 else {
376 if (PyObject_AsReadBuffer(obj, (const void **)&data, &size))
377 return NULL;
378 return codec_tuple(PyString_FromStringAndSize(data, size),
379 size);
383 static PyObject *
384 utf_8_encode(PyObject *self,
385 PyObject *args)
387 PyObject *str, *v;
388 const char *errors = NULL;
390 if (!PyArg_ParseTuple(args, "O|z:utf_8_encode",
391 &str, &errors))
392 return NULL;
394 str = PyUnicode_FromObject(str);
395 if (str == NULL)
396 return NULL;
397 v = codec_tuple(PyUnicode_EncodeUTF8(PyUnicode_AS_UNICODE(str),
398 PyUnicode_GET_SIZE(str),
399 errors),
400 PyUnicode_GET_SIZE(str));
401 Py_DECREF(str);
402 return v;
405 /* This version provides access to the byteorder parameter of the
406 builtin UTF-16 codecs as optional third argument. It defaults to 0
407 which means: use the native byte order and prepend the data with a
408 BOM mark.
412 static PyObject *
413 utf_16_encode(PyObject *self,
414 PyObject *args)
416 PyObject *str, *v;
417 const char *errors = NULL;
418 int byteorder = 0;
420 if (!PyArg_ParseTuple(args, "O|zi:utf_16_encode",
421 &str, &errors, &byteorder))
422 return NULL;
424 str = PyUnicode_FromObject(str);
425 if (str == NULL)
426 return NULL;
427 v = codec_tuple(PyUnicode_EncodeUTF16(PyUnicode_AS_UNICODE(str),
428 PyUnicode_GET_SIZE(str),
429 errors,
430 byteorder),
431 PyUnicode_GET_SIZE(str));
432 Py_DECREF(str);
433 return v;
436 static PyObject *
437 utf_16_le_encode(PyObject *self,
438 PyObject *args)
440 PyObject *str, *v;
441 const char *errors = NULL;
443 if (!PyArg_ParseTuple(args, "O|zi:utf_16_le_encode",
444 &str, &errors))
445 return NULL;
447 str = PyUnicode_FromObject(str);
448 if (str == NULL)
449 return NULL;
450 v = codec_tuple(PyUnicode_EncodeUTF16(PyUnicode_AS_UNICODE(str),
451 PyUnicode_GET_SIZE(str),
452 errors,
453 -1),
454 PyUnicode_GET_SIZE(str));
455 Py_DECREF(str);
456 return v;
459 static PyObject *
460 utf_16_be_encode(PyObject *self,
461 PyObject *args)
463 PyObject *str, *v;
464 const char *errors = NULL;
466 if (!PyArg_ParseTuple(args, "O|zi:utf_16_be_encode",
467 &str, &errors))
468 return NULL;
470 str = PyUnicode_FromObject(str);
471 if (str == NULL)
472 return NULL;
473 v = codec_tuple(PyUnicode_EncodeUTF16(PyUnicode_AS_UNICODE(str),
474 PyUnicode_GET_SIZE(str),
475 errors,
476 +1),
477 PyUnicode_GET_SIZE(str));
478 Py_DECREF(str);
479 return v;
482 static PyObject *
483 unicode_escape_encode(PyObject *self,
484 PyObject *args)
486 PyObject *str, *v;
487 const char *errors = NULL;
489 if (!PyArg_ParseTuple(args, "O|z:unicode_escape_encode",
490 &str, &errors))
491 return NULL;
493 str = PyUnicode_FromObject(str);
494 if (str == NULL)
495 return NULL;
496 v = codec_tuple(PyUnicode_EncodeUnicodeEscape(PyUnicode_AS_UNICODE(str),
497 PyUnicode_GET_SIZE(str)),
498 PyUnicode_GET_SIZE(str));
499 Py_DECREF(str);
500 return v;
503 static PyObject *
504 raw_unicode_escape_encode(PyObject *self,
505 PyObject *args)
507 PyObject *str, *v;
508 const char *errors = NULL;
510 if (!PyArg_ParseTuple(args, "O|z:raw_unicode_escape_encode",
511 &str, &errors))
512 return NULL;
514 str = PyUnicode_FromObject(str);
515 if (str == NULL)
516 return NULL;
517 v = codec_tuple(PyUnicode_EncodeRawUnicodeEscape(
518 PyUnicode_AS_UNICODE(str),
519 PyUnicode_GET_SIZE(str)),
520 PyUnicode_GET_SIZE(str));
521 Py_DECREF(str);
522 return v;
525 static PyObject *
526 latin_1_encode(PyObject *self,
527 PyObject *args)
529 PyObject *str, *v;
530 const char *errors = NULL;
532 if (!PyArg_ParseTuple(args, "O|z:latin_1_encode",
533 &str, &errors))
534 return NULL;
536 str = PyUnicode_FromObject(str);
537 if (str == NULL)
538 return NULL;
539 v = codec_tuple(PyUnicode_EncodeLatin1(
540 PyUnicode_AS_UNICODE(str),
541 PyUnicode_GET_SIZE(str),
542 errors),
543 PyUnicode_GET_SIZE(str));
544 Py_DECREF(str);
545 return v;
548 static PyObject *
549 ascii_encode(PyObject *self,
550 PyObject *args)
552 PyObject *str, *v;
553 const char *errors = NULL;
555 if (!PyArg_ParseTuple(args, "O|z:ascii_encode",
556 &str, &errors))
557 return NULL;
559 str = PyUnicode_FromObject(str);
560 if (str == NULL)
561 return NULL;
562 v = codec_tuple(PyUnicode_EncodeASCII(
563 PyUnicode_AS_UNICODE(str),
564 PyUnicode_GET_SIZE(str),
565 errors),
566 PyUnicode_GET_SIZE(str));
567 Py_DECREF(str);
568 return v;
571 static PyObject *
572 charmap_encode(PyObject *self,
573 PyObject *args)
575 PyObject *str, *v;
576 const char *errors = NULL;
577 PyObject *mapping = NULL;
579 if (!PyArg_ParseTuple(args, "O|zO:charmap_encode",
580 &str, &errors, &mapping))
581 return NULL;
582 if (mapping == Py_None)
583 mapping = NULL;
585 str = PyUnicode_FromObject(str);
586 if (str == NULL)
587 return NULL;
588 v = codec_tuple(PyUnicode_EncodeCharmap(
589 PyUnicode_AS_UNICODE(str),
590 PyUnicode_GET_SIZE(str),
591 mapping,
592 errors),
593 PyUnicode_GET_SIZE(str));
594 Py_DECREF(str);
595 return v;
598 #ifdef MS_WIN32
600 static PyObject *
601 mbcs_encode(PyObject *self,
602 PyObject *args)
604 PyObject *str, *v;
605 const char *errors = NULL;
607 if (!PyArg_ParseTuple(args, "O|z:mbcs_encode",
608 &str, &errors))
609 return NULL;
611 str = PyUnicode_FromObject(str);
612 if (str == NULL)
613 return NULL;
614 v = codec_tuple(PyUnicode_EncodeMBCS(
615 PyUnicode_AS_UNICODE(str),
616 PyUnicode_GET_SIZE(str),
617 errors),
618 PyUnicode_GET_SIZE(str));
619 Py_DECREF(str);
620 return v;
623 #endif /* MS_WIN32 */
625 /* --- Module API --------------------------------------------------------- */
627 static PyMethodDef _codecs_functions[] = {
628 {"register", codecregister, 1},
629 {"lookup", codeclookup, 1},
630 {"utf_8_encode", utf_8_encode, 1},
631 {"utf_8_decode", utf_8_decode, 1},
632 {"utf_16_encode", utf_16_encode, 1},
633 {"utf_16_le_encode", utf_16_le_encode, 1},
634 {"utf_16_be_encode", utf_16_be_encode, 1},
635 {"utf_16_decode", utf_16_decode, 1},
636 {"utf_16_le_decode", utf_16_le_decode, 1},
637 {"utf_16_be_decode", utf_16_be_decode, 1},
638 {"utf_16_ex_decode", utf_16_ex_decode, 1},
639 {"unicode_escape_encode", unicode_escape_encode, 1},
640 {"unicode_escape_decode", unicode_escape_decode, 1},
641 {"unicode_internal_encode", unicode_internal_encode, 1},
642 {"unicode_internal_decode", unicode_internal_decode, 1},
643 {"raw_unicode_escape_encode", raw_unicode_escape_encode, 1},
644 {"raw_unicode_escape_decode", raw_unicode_escape_decode, 1},
645 {"latin_1_encode", latin_1_encode, 1},
646 {"latin_1_decode", latin_1_decode, 1},
647 {"ascii_encode", ascii_encode, 1},
648 {"ascii_decode", ascii_decode, 1},
649 {"charmap_encode", charmap_encode, 1},
650 {"charmap_decode", charmap_decode, 1},
651 {"readbuffer_encode", readbuffer_encode, 1},
652 {"charbuffer_encode", charbuffer_encode, 1},
653 #ifdef MS_WIN32
654 {"mbcs_encode", mbcs_encode, 1},
655 {"mbcs_decode", mbcs_decode, 1},
656 #endif
657 {NULL, NULL} /* sentinel */
660 DL_EXPORT(void)
661 init_codecs(void)
663 Py_InitModule("_codecs", _codecs_functions);