Python/codecs.c

   1 /* ------------------------------------------------------------------------
   2
   3    Python Codec Registry and support functions
   4
   5 Written by Marc-Andre Lemburg (mal@lemburg.com).
   6
   7 Copyright (c) Corporation for National Research Initiatives.
   8
   9    ------------------------------------------------------------------------ */
  10
  11 #include "Python.h"
  12 #include <ctype.h>
  13
  14 /* --- Codec Registry ----------------------------------------------------- */
  15
  16 /* Import the standard encodings package which will register the first
  17    codec search function.
  18
  19    This is done in a lazy way so that the Unicode implementation does
  20    not downgrade startup time of scripts not needing it.
  21
  22    ImportErrors are silently ignored by this function. Only one try is
  23    made.
  24
  25 */
  26
  27 static int _PyCodecRegistry_Init(void); /* Forward */
  28
  29 int PyCodec_Register(PyObject *search_function)
  30 {
  31     PyInterpreterState *interp = PyThreadState_Get()->interp;
  32     if (interp->codec_search_path == NULL && _PyCodecRegistry_Init())
  33         goto onError;
  34     if (search_function == NULL) {
  35         PyErr_BadArgument();
  36         goto onError;
  37     }
  38     if (!PyCallable_Check(search_function)) {
  39         PyErr_SetString(PyExc_TypeError,
  40                         "argument must be callable");
  41         goto onError;
  42     }
  43     return PyList_Append(interp->codec_search_path, search_function);
  44
  45  onError:
  46     return -1;
  47 }
  48
  49 /* Convert a string to a normalized Python string: all characters are
  50    converted to lower case, spaces are replaced with underscores. */
  51
  52 static
  53 PyObject *normalizestring(const char *string)
  54 {
  55     register size_t i;
  56     size_t len = strlen(string);
  57     char *p;
  58     PyObject *v;
  59
  60         if (len > INT_MAX) {
  61                 PyErr_SetString(PyExc_OverflowError, "string is too large");
  62                 return NULL;
  63         }
  64
  65     v = PyString_FromStringAndSize(NULL, (int)len);
  66     if (v == NULL)
  67         return NULL;
  68     p = PyString_AS_STRING(v);
  69     for (i = 0; i < len; i++) {
  70         register char ch = string[i];
  71         if (ch == ' ')
  72             ch = '-';
  73         else
  74             ch = tolower(ch);
  75         p[i] = ch;
  76     }
  77     return v;
  78 }
  79
  80 /* Lookup the given encoding and return a tuple providing the codec
  81    facilities.
  82
  83    The encoding string is looked up converted to all lower-case
  84    characters. This makes encodings looked up through this mechanism
  85    effectively case-insensitive.
  86
  87    If no codec is found, a LookupError is set and NULL returned.
  88
  89    As side effect, this tries to load the encodings package, if not
  90    yet done. This is part of the lazy load strategy for the encodings
  91    package.
  92
  93 */
  94
  95 PyObject *_PyCodec_Lookup(const char *encoding)
  96 {
  97     PyInterpreterState *interp;
  98     PyObject *result, *args = NULL, *v;
  99     int i, len;
 100
 101     if (encoding == NULL) {
 102         PyErr_BadArgument();
 103         goto onError;
 104     }
 105
 106     interp = PyThreadState_Get()->interp;
 107     if (interp->codec_search_path == NULL && _PyCodecRegistry_Init())
 108         goto onError;
 109
 110     /* Convert the encoding to a normalized Python string: all
 111        characters are converted to lower case, spaces and hyphens are
 112        replaced with underscores. */
 113     v = normalizestring(encoding);
 114     if (v == NULL)
 115         goto onError;
 116     PyString_InternInPlace(&v);
 117
 118     /* First, try to lookup the name in the registry dictionary */
 119     result = PyDict_GetItem(interp->codec_search_cache, v);
 120     if (result != NULL) {
 121         Py_INCREF(result);
 122         Py_DECREF(v);
 123         return result;
 124     }
 125
 126     /* Next, scan the search functions in order of registration */
 127     args = PyTuple_New(1);
 128     if (args == NULL)
 129         goto onError;
 130     PyTuple_SET_ITEM(args,0,v);
 131
 132     len = PyList_Size(interp->codec_search_path);
 133     if (len < 0)
 134         goto onError;
 135     if (len == 0) {
 136         PyErr_SetString(PyExc_LookupError,
 137                         "no codec search functions registered: "
 138                         "can't find encoding");
 139         goto onError;
 140     }
 141
 142     for (i = 0; i < len; i++) {
 143         PyObject *func;
 144
 145         func = PyList_GetItem(interp->codec_search_path, i);
 146         if (func == NULL)
 147             goto onError;
 148         result = PyEval_CallObject(func, args);
 149         if (result == NULL)
 150             goto onError;
 151         if (result == Py_None) {
 152             Py_DECREF(result);
 153             continue;
 154         }
 155         if (!PyTuple_Check(result) || PyTuple_GET_SIZE(result) != 4) {
 156             PyErr_SetString(PyExc_TypeError,
 157                             "codec search functions must return 4-tuples");
 158             Py_DECREF(result);
 159             goto onError;
 160         }
 161         break;
 162     }
 163     if (i == len) {
 164         /* XXX Perhaps we should cache misses too ? */
 165         PyErr_Format(PyExc_LookupError,
 166                      "unknown encoding: %s", encoding);
 167         goto onError;
 168     }
 169
 170     /* Cache and return the result */
 171     PyDict_SetItem(interp->codec_search_cache, v, result);
 172     Py_DECREF(args);
 173     return result;
 174
 175  onError:
 176     Py_XDECREF(args);
 177     return NULL;
 178 }
 179
 180 static
 181 PyObject *args_tuple(PyObject *object,
 182                      const char *errors)
 183 {
 184     PyObject *args;
 185
 186     args = PyTuple_New(1 + (errors != NULL));
 187     if (args == NULL)
 188         return NULL;
 189     Py_INCREF(object);
 190     PyTuple_SET_ITEM(args,0,object);
 191     if (errors) {
 192         PyObject *v;
 193
 194         v = PyString_FromString(errors);
 195         if (v == NULL) {
 196             Py_DECREF(args);
 197             return NULL;
 198         }
 199         PyTuple_SET_ITEM(args, 1, v);
 200     }
 201     return args;
 202 }
 203
 204 /* Build a codec by calling factory(stream[,errors]) or just
 205    factory(errors) depending on whether the given parameters are
 206    non-NULL. */
 207
 208 static
 209 PyObject *build_stream_codec(PyObject *factory,
 210                              PyObject *stream,
 211                              const char *errors)
 212 {
 213     PyObject *args, *codec;
 214
 215     args = args_tuple(stream, errors);
 216     if (args == NULL)
 217         return NULL;
 218
 219     codec = PyEval_CallObject(factory, args);
 220     Py_DECREF(args);
 221     return codec;
 222 }
 223
 224 /* Convenience APIs to query the Codec registry.
 225
 226    All APIs return a codec object with incremented refcount.
 227
 228  */
 229
 230 PyObject *PyCodec_Encoder(const char *encoding)
 231 {
 232     PyObject *codecs;
 233     PyObject *v;
 234
 235     codecs = _PyCodec_Lookup(encoding);
 236     if (codecs == NULL)
 237         goto onError;
 238     v = PyTuple_GET_ITEM(codecs,0);
 239     Py_DECREF(codecs);
 240     Py_INCREF(v);
 241     return v;
 242
 243  onError:
 244     return NULL;
 245 }
 246
 247 PyObject *PyCodec_Decoder(const char *encoding)
 248 {
 249     PyObject *codecs;
 250     PyObject *v;
 251
 252     codecs = _PyCodec_Lookup(encoding);
 253     if (codecs == NULL)
 254         goto onError;
 255     v = PyTuple_GET_ITEM(codecs,1);
 256     Py_DECREF(codecs);
 257     Py_INCREF(v);
 258     return v;
 259
 260  onError:
 261     return NULL;
 262 }
 263
 264 PyObject *PyCodec_StreamReader(const char *encoding,
 265                                PyObject *stream,
 266                                const char *errors)
 267 {
 268     PyObject *codecs, *ret;
 269
 270     codecs = _PyCodec_Lookup(encoding);
 271     if (codecs == NULL)
 272         goto onError;
 273     ret = build_stream_codec(PyTuple_GET_ITEM(codecs,2),stream,errors);
 274     Py_DECREF(codecs);
 275     return ret;
 276
 277  onError:
 278     return NULL;
 279 }
 280
 281 PyObject *PyCodec_StreamWriter(const char *encoding,
 282                                PyObject *stream,
 283                                const char *errors)
 284 {
 285     PyObject *codecs, *ret;
 286
 287     codecs = _PyCodec_Lookup(encoding);
 288     if (codecs == NULL)
 289         goto onError;
 290     ret = build_stream_codec(PyTuple_GET_ITEM(codecs,3),stream,errors);
 291     Py_DECREF(codecs);
 292     return ret;
 293
 294  onError:
 295     return NULL;
 296 }
 297
 298 /* Encode an object (e.g. an Unicode object) using the given encoding
 299    and return the resulting encoded object (usually a Python string).
 300
 301    errors is passed to the encoder factory as argument if non-NULL. */
 302
 303 PyObject *PyCodec_Encode(PyObject *object,
 304                          const char *encoding,
 305                          const char *errors)
 306 {
 307     PyObject *encoder = NULL;
 308     PyObject *args = NULL, *result;
 309     PyObject *v;
 310
 311     encoder = PyCodec_Encoder(encoding);
 312     if (encoder == NULL)
 313         goto onError;
 314
 315     args = args_tuple(object, errors);
 316     if (args == NULL)
 317         goto onError;
 318
 319     result = PyEval_CallObject(encoder,args);
 320     if (result == NULL)
 321         goto onError;
 322
 323     if (!PyTuple_Check(result) ||
 324         PyTuple_GET_SIZE(result) != 2) {
 325         PyErr_SetString(PyExc_TypeError,
 326                         "encoder must return a tuple (object,integer)");
 327         goto onError;
 328     }
 329     v = PyTuple_GET_ITEM(result,0);
 330     Py_INCREF(v);
 331     /* We don't check or use the second (integer) entry. */
 332
 333     Py_DECREF(args);
 334     Py_DECREF(encoder);
 335     Py_DECREF(result);
 336     return v;
 337
 338  onError:
 339     Py_XDECREF(args);
 340     Py_XDECREF(encoder);
 341     return NULL;
 342 }
 343
 344 /* Decode an object (usually a Python string) using the given encoding
 345    and return an equivalent object (e.g. an Unicode object).
 346
 347    errors is passed to the decoder factory as argument if non-NULL. */
 348
 349 PyObject *PyCodec_Decode(PyObject *object,
 350                          const char *encoding,
 351                          const char *errors)
 352 {
 353     PyObject *decoder = NULL;
 354     PyObject *args = NULL, *result = NULL;
 355     PyObject *v;
 356
 357     decoder = PyCodec_Decoder(encoding);
 358     if (decoder == NULL)
 359         goto onError;
 360
 361     args = args_tuple(object, errors);
 362     if (args == NULL)
 363         goto onError;
 364
 365     result = PyEval_CallObject(decoder,args);
 366     if (result == NULL)
 367         goto onError;
 368     if (!PyTuple_Check(result) ||
 369         PyTuple_GET_SIZE(result) != 2) {
 370         PyErr_SetString(PyExc_TypeError,
 371                         "decoder must return a tuple (object,integer)");
 372         goto onError;
 373     }
 374     v = PyTuple_GET_ITEM(result,0);
 375     Py_INCREF(v);
 376     /* We don't check or use the second (integer) entry. */
 377
 378     Py_DECREF(args);
 379     Py_DECREF(decoder);
 380     Py_DECREF(result);
 381     return v;
 382
 383  onError:
 384     Py_XDECREF(args);
 385     Py_XDECREF(decoder);
 386     Py_XDECREF(result);
 387     return NULL;
 388 }
 389
 390 /* Register the error handling callback function error under the name
 391    name. This function will be called by the codec when it encounters
 392    an unencodable characters/undecodable bytes and doesn't know the
 393    callback name, when name is specified as the error parameter
 394    in the call to the encode/decode function.
 395    Return 0 on success, -1 on error */
 396 int PyCodec_RegisterError(const char *name, PyObject *error)
 397 {
 398     PyInterpreterState *interp = PyThreadState_Get()->interp;
 399     if (interp->codec_search_path == NULL && _PyCodecRegistry_Init())
 400         return -1;
 401     if (!PyCallable_Check(error)) {
 402         PyErr_SetString(PyExc_TypeError, "handler must be callable");
 403         return -1;
 404     }
 405     return PyDict_SetItemString(interp->codec_error_registry,
 406                                 (char *)name, error);
 407 }
 408
 409 /* Lookup the error handling callback function registered under the
 410    name error. As a special case NULL can be passed, in which case
 411    the error handling callback for strict encoding will be returned. */
 412 PyObject *PyCodec_LookupError(const char *name)
 413 {
 414     PyObject *handler = NULL;
 415
 416     PyInterpreterState *interp = PyThreadState_Get()->interp;
 417     if (interp->codec_search_path == NULL && _PyCodecRegistry_Init())
 418         return NULL;
 419
 420     if (name==NULL)
 421         name = "strict";
 422     handler = PyDict_GetItemString(interp->codec_error_registry, (char *)name);
 423     if (!handler)
 424         PyErr_Format(PyExc_LookupError, "unknown error handler name '%.400s'", name);
 425     else
 426         Py_INCREF(handler);
 427     return handler;
 428 }
 429
 430 static void wrong_exception_type(PyObject *exc)
 431 {
 432     PyObject *type = PyObject_GetAttrString(exc, "__class__");
 433     if (type != NULL) {
 434         PyObject *name = PyObject_GetAttrString(type, "__name__");
 435         Py_DECREF(type);
 436         if (name != NULL) {
 437             PyObject *string = PyObject_Str(name);
 438             Py_DECREF(name);
 439             if (string != NULL) {
 440                 PyErr_Format(PyExc_TypeError,
 441                     "don't know how to handle %.400s in error callback",
 442                     PyString_AS_STRING(string));
 443                 Py_DECREF(string);
 444             }
 445         }
 446     }
 447 }
 448
 449 PyObject *PyCodec_StrictErrors(PyObject *exc)
 450 {
 451     if (PyInstance_Check(exc))
 452         PyErr_SetObject((PyObject*)((PyInstanceObject*)exc)->in_class,
 453             exc);
 454     else
 455         PyErr_SetString(PyExc_TypeError, "codec must pass exception instance");
 456     return NULL;
 457 }
 458
 459
 460 #ifdef Py_USING_UNICODE
 461 PyObject *PyCodec_IgnoreErrors(PyObject *exc)
 462 {
 463     int end;
 464     if (PyObject_IsInstance(exc, PyExc_UnicodeEncodeError)) {
 465         if (PyUnicodeEncodeError_GetEnd(exc, &end))
 466             return NULL;
 467     }
 468     else if (PyObject_IsInstance(exc, PyExc_UnicodeDecodeError)) {
 469         if (PyUnicodeDecodeError_GetEnd(exc, &end))
 470             return NULL;
 471     }
 472     else if (PyObject_IsInstance(exc, PyExc_UnicodeTranslateError)) {
 473         if (PyUnicodeTranslateError_GetEnd(exc, &end))
 474             return NULL;
 475     }
 476     else {
 477         wrong_exception_type(exc);
 478         return NULL;
 479     }
 480     /* ouch: passing NULL, 0, pos gives None instead of u'' */
 481     return Py_BuildValue("(u#i)", &end, 0, end);
 482 }
 483
 484
 485 PyObject *PyCodec_ReplaceErrors(PyObject *exc)
 486 {
 487     PyObject *restuple;
 488     int start;
 489     int end;
 490     int i;
 491
 492     if (PyObject_IsInstance(exc, PyExc_UnicodeEncodeError)) {
 493         PyObject *res;
 494         Py_UNICODE *p;
 495         if (PyUnicodeEncodeError_GetStart(exc, &start))
 496             return NULL;
 497         if (PyUnicodeEncodeError_GetEnd(exc, &end))
 498             return NULL;
 499         res = PyUnicode_FromUnicode(NULL, end-start);
 500         if (res == NULL)
 501             return NULL;
 502         for (p = PyUnicode_AS_UNICODE(res), i = start;
 503             i<end; ++p, ++i)
 504             *p = '?';
 505         restuple = Py_BuildValue("(Oi)", res, end);
 506         Py_DECREF(res);
 507         return restuple;
 508     }
 509     else if (PyObject_IsInstance(exc, PyExc_UnicodeDecodeError)) {
 510         Py_UNICODE res = Py_UNICODE_REPLACEMENT_CHARACTER;
 511         if (PyUnicodeDecodeError_GetEnd(exc, &end))
 512             return NULL;
 513         return Py_BuildValue("(u#i)", &res, 1, end);
 514     }
 515     else if (PyObject_IsInstance(exc, PyExc_UnicodeTranslateError)) {
 516         PyObject *res;
 517         Py_UNICODE *p;
 518         if (PyUnicodeTranslateError_GetStart(exc, &start))
 519             return NULL;
 520         if (PyUnicodeTranslateError_GetEnd(exc, &end))
 521             return NULL;
 522         res = PyUnicode_FromUnicode(NULL, end-start);
 523         if (res == NULL)
 524             return NULL;
 525         for (p = PyUnicode_AS_UNICODE(res), i = start;
 526             i<end; ++p, ++i)
 527             *p = Py_UNICODE_REPLACEMENT_CHARACTER;
 528         restuple = Py_BuildValue("(Oi)", res, end);
 529         Py_DECREF(res);
 530         return restuple;
 531     }
 532     else {
 533         wrong_exception_type(exc);
 534         return NULL;
 535     }
 536 }
 537
 538 PyObject *PyCodec_XMLCharRefReplaceErrors(PyObject *exc)
 539 {
 540     if (PyObject_IsInstance(exc, PyExc_UnicodeEncodeError)) {
 541         PyObject *restuple;
 542         PyObject *object;
 543         int start;
 544         int end;
 545         PyObject *res;
 546         Py_UNICODE *p;
 547         Py_UNICODE *startp;
 548         Py_UNICODE *outp;
 549         int ressize;
 550         if (PyUnicodeEncodeError_GetStart(exc, &start))
 551             return NULL;
 552         if (PyUnicodeEncodeError_GetEnd(exc, &end))
 553             return NULL;
 554         if (!(object = PyUnicodeEncodeError_GetObject(exc)))
 555             return NULL;
 556         startp = PyUnicode_AS_UNICODE(object);
 557         for (p = startp+start, ressize = 0; p < startp+end; ++p) {
 558             if (*p<10)
 559                 ressize += 2+1+1;
 560             else if (*p<100)
 561                 ressize += 2+2+1;
 562             else if (*p<1000)
 563                 ressize += 2+3+1;
 564             else if (*p<10000)
 565                 ressize += 2+4+1;
 566             else if (*p<100000)
 567                 ressize += 2+5+1;
 568             else if (*p<1000000)
 569                 ressize += 2+6+1;
 570             else
 571                 ressize += 2+7+1;
 572         }
 573         /* allocate replacement */
 574         res = PyUnicode_FromUnicode(NULL, ressize);
 575         if (res == NULL) {
 576             Py_DECREF(object);
 577             return NULL;
 578         }
 579         /* generate replacement */
 580         for (p = startp+start, outp = PyUnicode_AS_UNICODE(res);
 581             p < startp+end; ++p) {
 582             Py_UNICODE c = *p;
 583             int digits;
 584             int base;
 585             *outp++ = '&';
 586             *outp++ = '#';
 587             if (*p<10) {
 588                 digits = 1;
 589                 base = 1;
 590             }
 591             else if (*p<100) {
 592                 digits = 2;
 593                 base = 10;
 594             }
 595             else if (*p<1000) {
 596                 digits = 3;
 597                 base = 100;
 598             }
 599             else if (*p<10000) {
 600                 digits = 4;
 601                 base = 1000;
 602             }
 603             else if (*p<100000) {
 604                 digits = 5;
 605                 base = 10000;
 606             }
 607             else if (*p<1000000) {
 608                 digits = 6;
 609                 base = 100000;
 610             }
 611             else {
 612                 digits = 7;
 613                 base = 1000000;
 614             }
 615             while (digits-->0) {
 616                 *outp++ = '0' + c/base;
 617                 c %= base;
 618                 base /= 10;
 619             }
 620             *outp++ = ';';
 621         }
 622         restuple = Py_BuildValue("(Oi)", res, end);
 623         Py_DECREF(res);
 624         Py_DECREF(object);
 625         return restuple;
 626     }
 627     else {
 628         wrong_exception_type(exc);
 629         return NULL;
 630     }
 631 }
 632
 633 static Py_UNICODE hexdigits[] = {
 634     '0', '1', '2', '3', '4', '5', '6', '7',
 635     '8', '9', 'a', 'b', 'c', 'd', 'e', 'f'
 636 };
 637
 638 PyObject *PyCodec_BackslashReplaceErrors(PyObject *exc)
 639 {
 640     if (PyObject_IsInstance(exc, PyExc_UnicodeEncodeError)) {
 641         PyObject *restuple;
 642         PyObject *object;
 643         int start;
 644         int end;
 645         PyObject *res;
 646         Py_UNICODE *p;
 647         Py_UNICODE *startp;
 648         Py_UNICODE *outp;
 649         int ressize;
 650         if (PyUnicodeEncodeError_GetStart(exc, &start))
 651             return NULL;
 652         if (PyUnicodeEncodeError_GetEnd(exc, &end))
 653             return NULL;
 654         if (!(object = PyUnicodeEncodeError_GetObject(exc)))
 655             return NULL;
 656         startp = PyUnicode_AS_UNICODE(object);
 657         for (p = startp+start, ressize = 0; p < startp+end; ++p) {
 658             if (*p >= 0x00010000)
 659                 ressize += 1+1+8;
 660             else if (*p >= 0x100) {
 661                 ressize += 1+1+4;
 662             }
 663             else
 664                 ressize += 1+1+2;
 665         }
 666         res = PyUnicode_FromUnicode(NULL, ressize);
 667         if (res==NULL)
 668             return NULL;
 669         for (p = startp+start, outp = PyUnicode_AS_UNICODE(res);
 670             p < startp+end; ++p) {
 671             Py_UNICODE c = *p;
 672             *outp++ = '\\';
 673             if (c >= 0x00010000) {
 674                 *outp++ = 'U';
 675                 *outp++ = hexdigits[(c>>28)&0xf];
 676                 *outp++ = hexdigits[(c>>24)&0xf];
 677                 *outp++ = hexdigits[(c>>20)&0xf];
 678                 *outp++ = hexdigits[(c>>16)&0xf];
 679                 *outp++ = hexdigits[(c>>12)&0xf];
 680                 *outp++ = hexdigits[(c>>8)&0xf];
 681             }
 682             else if (c >= 0x100) {
 683                 *outp++ = 'u';
 684                 *outp++ = hexdigits[(c>>12)&0xf];
 685                 *outp++ = hexdigits[(c>>8)&0xf];
 686             }
 687             else
 688                 *outp++ = 'x';
 689             *outp++ = hexdigits[(c>>4)&0xf];
 690             *outp++ = hexdigits[c&0xf];
 691         }
 692
 693         restuple = Py_BuildValue("(Oi)", res, end);
 694         Py_DECREF(res);
 695         Py_DECREF(object);
 696         return restuple;
 697     }
 698     else {
 699         wrong_exception_type(exc);
 700         return NULL;
 701     }
 702 }
 703 #endif
 704
 705 static PyObject *strict_errors(PyObject *self, PyObject *exc)
 706 {
 707     return PyCodec_StrictErrors(exc);
 708 }
 709
 710
 711 #ifdef Py_USING_UNICODE
 712 static PyObject *ignore_errors(PyObject *self, PyObject *exc)
 713 {
 714     return PyCodec_IgnoreErrors(exc);
 715 }
 716
 717
 718 static PyObject *replace_errors(PyObject *self, PyObject *exc)
 719 {
 720     return PyCodec_ReplaceErrors(exc);
 721 }
 722
 723
 724 static PyObject *xmlcharrefreplace_errors(PyObject *self, PyObject *exc)
 725 {
 726     return PyCodec_XMLCharRefReplaceErrors(exc);
 727 }
 728
 729
 730 static PyObject *backslashreplace_errors(PyObject *self, PyObject *exc)
 731 {
 732     return PyCodec_BackslashReplaceErrors(exc);
 733 }
 734 #endif
 735
 736 static int _PyCodecRegistry_Init(void)
 737 {
 738     static struct {
 739         char *name;
 740         PyMethodDef def;
 741     } methods[] =
 742     {
 743         {
 744             "strict",
 745             {
 746                 "strict_errors",
 747                 strict_errors,
 748                 METH_O
 749             }
 750         },
 751 #ifdef Py_USING_UNICODE
 752         {
 753             "ignore",
 754             {
 755                 "ignore_errors",
 756                 ignore_errors,
 757                 METH_O
 758             }
 759         },
 760         {
 761             "replace",
 762             {
 763                 "replace_errors",
 764                 replace_errors,
 765                 METH_O
 766             }
 767         },
 768         {
 769             "xmlcharrefreplace",
 770             {
 771                 "xmlcharrefreplace_errors",
 772                 xmlcharrefreplace_errors,
 773                 METH_O
 774             }
 775         },
 776         {
 777             "backslashreplace",
 778             {
 779                 "backslashreplace_errors",
 780                 backslashreplace_errors,
 781                 METH_O
 782             }
 783         }
 784 #endif
 785     };
 786
 787     PyInterpreterState *interp = PyThreadState_Get()->interp;
 788     PyObject *mod;
 789     int i;
 790
 791     if (interp->codec_search_path != NULL)
 792         return 0;
 793
 794     interp->codec_search_path = PyList_New(0);
 795     interp->codec_search_cache = PyDict_New();
 796     interp->codec_error_registry = PyDict_New();
 797
 798     if (interp->codec_error_registry) {
 799         for (i = 0; i < sizeof(methods)/sizeof(methods[0]); ++i) {
 800             PyObject *func = PyCFunction_New(&methods[i].def, NULL);
 801             int res;
 802             if (!func)
 803                 Py_FatalError("can't initialize codec error registry");
 804             res = PyCodec_RegisterError(methods[i].name, func);
 805             Py_DECREF(func);
 806             if (res)
 807                 Py_FatalError("can't initialize codec error registry");
 808         }
 809     }
 810
 811     if (interp->codec_search_path == NULL ||
 812         interp->codec_search_cache == NULL ||
 813         interp->codec_error_registry == NULL)
 814         Py_FatalError("can't initialize codec registry");
 815
 816     mod = PyImport_ImportModuleEx("encodings", NULL, NULL, NULL);
 817     if (mod == NULL) {
 818         if (PyErr_ExceptionMatches(PyExc_ImportError)) {
 819             /* Ignore ImportErrors... this is done so that
 820                distributions can disable the encodings package. Note
 821                that other errors are not masked, e.g. SystemErrors
 822                raised to inform the user of an error in the Python
 823                configuration are still reported back to the user. */
 824             PyErr_Clear();
 825             return 0;
 826         }
 827         return -1;
 828     }
 829     Py_DECREF(mod);
 830     return 0;
 831 }