Include/unicodeobject.h

   1 #ifndef Py_UNICODEOBJECT_H
   2 #define Py_UNICODEOBJECT_H
   3
   4 /*
   5
   6 Unicode implementation based on original code by Fredrik Lundh,
   7 modified by Marc-Andre Lemburg (mal@lemburg.com) according to the
   8 Unicode Integration Proposal (see file Misc/unicode.txt).
   9
  10 Copyright (c) Corporation for National Research Initiatives.
  11
  12
  13  Original header:
  14  --------------------------------------------------------------------
  15
  16  * Yet another Unicode string type for Python.  This type supports the
  17  * 16-bit Basic Multilingual Plane (BMP) only.
  18  *
  19  * Written by Fredrik Lundh, January 1999.
  20  *
  21  * Copyright (c) 1999 by Secret Labs AB.
  22  * Copyright (c) 1999 by Fredrik Lundh.
  23  *
  24  * fredrik@pythonware.com
  25  * http://www.pythonware.com
  26  *
  27  * --------------------------------------------------------------------
  28  * This Unicode String Type is
  29  *
  30  * Copyright (c) 1999 by Secret Labs AB
  31  * Copyright (c) 1999 by Fredrik Lundh
  32  *
  33  * By obtaining, using, and/or copying this software and/or its
  34  * associated documentation, you agree that you have read, understood,
  35  * and will comply with the following terms and conditions:
  36  *
  37  * Permission to use, copy, modify, and distribute this software and its
  38  * associated documentation for any purpose and without fee is hereby
  39  * granted, provided that the above copyright notice appears in all
  40  * copies, and that both that copyright notice and this permission notice
  41  * appear in supporting documentation, and that the name of Secret Labs
  42  * AB or the author not be used in advertising or publicity pertaining to
  43  * distribution of the software without specific, written prior
  44  * permission.
  45  *
  46  * SECRET LABS AB AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD TO
  47  * THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND
  48  * FITNESS.  IN NO EVENT SHALL SECRET LABS AB OR THE AUTHOR BE LIABLE FOR
  49  * ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
  50  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
  51  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT
  52  * OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
  53  * -------------------------------------------------------------------- */
  54
  55 #include <ctype.h>
  56
  57 /* === Internal API ======================================================= */
  58
  59 /* --- Internal Unicode Format -------------------------------------------- */
  60
  61 #ifndef Py_USING_UNICODE
  62
  63 #define PyUnicode_Check(op)                 0
  64 #define PyUnicode_CheckExact(op)            0
  65
  66 #else
  67
  68 /* FIXME: MvL's new implementation assumes that Py_UNICODE_SIZE is
  69    properly set, but the default rules below doesn't set it.  I'll
  70    sort this out some other day -- fredrik@pythonware.com */
  71
  72 #ifndef Py_UNICODE_SIZE
  73 #error Must define Py_UNICODE_SIZE
  74 #endif
  75
  76 /* Setting Py_UNICODE_WIDE enables UCS-4 storage.  Otherwise, Unicode
  77    strings are stored as UCS-2 (with limited support for UTF-16) */
  78
  79 #if Py_UNICODE_SIZE >= 4
  80 #define Py_UNICODE_WIDE
  81 #endif
  82
  83 /* Set these flags if the platform has "wchar.h", "wctype.h" and the
  84    wchar_t type is a 16-bit unsigned type */
  85 /* #define HAVE_WCHAR_H */
  86 /* #define HAVE_USABLE_WCHAR_T */
  87
  88 /* Defaults for various platforms */
  89 #ifndef PY_UNICODE_TYPE
  90
  91 /* Windows has a usable wchar_t type (unless we're using UCS-4) */
  92 # if defined(MS_WIN32) && Py_UNICODE_SIZE == 2
  93 #  define HAVE_USABLE_WCHAR_T
  94 #  define PY_UNICODE_TYPE wchar_t
  95 # endif
  96
  97 # if defined(Py_UNICODE_WIDE)
  98 #  define PY_UNICODE_TYPE Py_UCS4
  99 # endif
 100
 101 #endif
 102
 103 /* If the compiler provides a wchar_t type we try to support it
 104    through the interface functions PyUnicode_FromWideChar() and
 105    PyUnicode_AsWideChar(). */
 106
 107 #ifdef HAVE_USABLE_WCHAR_T
 108 # ifndef HAVE_WCHAR_H
 109 #  define HAVE_WCHAR_H
 110 # endif
 111 #endif
 112
 113 #ifdef HAVE_WCHAR_H
 114 /* Work around a cosmetic bug in BSDI 4.x wchar.h; thanks to Thomas Wouters */
 115 # ifdef _HAVE_BSDI
 116 #  include <time.h>
 117 # endif
 118 #  include <wchar.h>
 119 #endif
 120
 121 /*
 122  * Use this typedef when you need to represent a UTF-16 surrogate pair
 123  * as single unsigned integer.
 124  */
 125 #if SIZEOF_INT >= 4
 126 typedef unsigned int Py_UCS4;
 127 #elif SIZEOF_LONG >= 4
 128 typedef unsigned long Py_UCS4;
 129 #endif
 130
 131 typedef PY_UNICODE_TYPE Py_UNICODE;
 132
 133 /* --- UCS-2/UCS-4 Name Mangling ------------------------------------------ */
 134
 135 /* Unicode API names are mangled to assure that UCS-2 and UCS-4 builds
 136    produce different external names and thus cause import errors in
 137    case Python interpreters and extensions with mixed compiled in
 138    Unicode width assumptions are combined. */
 139
 140 #ifndef Py_UNICODE_WIDE
 141
 142 # define PyUnicode_AsASCIIString PyUnicodeUCS2_AsASCIIString
 143 # define PyUnicode_AsCharmapString PyUnicodeUCS2_AsCharmapString
 144 # define PyUnicode_AsEncodedString PyUnicodeUCS2_AsEncodedString
 145 # define PyUnicode_AsLatin1String PyUnicodeUCS2_AsLatin1String
 146 # define PyUnicode_AsRawUnicodeEscapeString PyUnicodeUCS2_AsRawUnicodeEscapeString
 147 # define PyUnicode_AsUTF16String PyUnicodeUCS2_AsUTF16String
 148 # define PyUnicode_AsUTF8String PyUnicodeUCS2_AsUTF8String
 149 # define PyUnicode_AsUnicode PyUnicodeUCS2_AsUnicode
 150 # define PyUnicode_AsUnicodeEscapeString PyUnicodeUCS2_AsUnicodeEscapeString
 151 # define PyUnicode_AsWideChar PyUnicodeUCS2_AsWideChar
 152 # define PyUnicode_Compare PyUnicodeUCS2_Compare
 153 # define PyUnicode_Concat PyUnicodeUCS2_Concat
 154 # define PyUnicode_Contains PyUnicodeUCS2_Contains
 155 # define PyUnicode_Count PyUnicodeUCS2_Count
 156 # define PyUnicode_Decode PyUnicodeUCS2_Decode
 157 # define PyUnicode_DecodeASCII PyUnicodeUCS2_DecodeASCII
 158 # define PyUnicode_DecodeCharmap PyUnicodeUCS2_DecodeCharmap
 159 # define PyUnicode_DecodeLatin1 PyUnicodeUCS2_DecodeLatin1
 160 # define PyUnicode_DecodeRawUnicodeEscape PyUnicodeUCS2_DecodeRawUnicodeEscape
 161 # define PyUnicode_DecodeUTF16 PyUnicodeUCS2_DecodeUTF16
 162 # define PyUnicode_DecodeUTF8 PyUnicodeUCS2_DecodeUTF8
 163 # define PyUnicode_DecodeUnicodeEscape PyUnicodeUCS2_DecodeUnicodeEscape
 164 # define PyUnicode_Encode PyUnicodeUCS2_Encode
 165 # define PyUnicode_EncodeASCII PyUnicodeUCS2_EncodeASCII
 166 # define PyUnicode_EncodeCharmap PyUnicodeUCS2_EncodeCharmap
 167 # define PyUnicode_EncodeDecimal PyUnicodeUCS2_EncodeDecimal
 168 # define PyUnicode_EncodeLatin1 PyUnicodeUCS2_EncodeLatin1
 169 # define PyUnicode_EncodeRawUnicodeEscape PyUnicodeUCS2_EncodeRawUnicodeEscape
 170 # define PyUnicode_EncodeUTF16 PyUnicodeUCS2_EncodeUTF16
 171 # define PyUnicode_EncodeUTF8 PyUnicodeUCS2_EncodeUTF8
 172 # define PyUnicode_EncodeUnicodeEscape PyUnicodeUCS2_EncodeUnicodeEscape
 173 # define PyUnicode_Find PyUnicodeUCS2_Find
 174 # define PyUnicode_Format PyUnicodeUCS2_Format
 175 # define PyUnicode_FromEncodedObject PyUnicodeUCS2_FromEncodedObject
 176 # define PyUnicode_FromObject PyUnicodeUCS2_FromObject
 177 # define PyUnicode_FromUnicode PyUnicodeUCS2_FromUnicode
 178 # define PyUnicode_FromWideChar PyUnicodeUCS2_FromWideChar
 179 # define PyUnicode_GetDefaultEncoding PyUnicodeUCS2_GetDefaultEncoding
 180 # define PyUnicode_GetMax PyUnicodeUCS2_GetMax
 181 # define PyUnicode_GetSize PyUnicodeUCS2_GetSize
 182 # define PyUnicode_Join PyUnicodeUCS2_Join
 183 # define PyUnicode_Replace PyUnicodeUCS2_Replace
 184 # define PyUnicode_Resize PyUnicodeUCS2_Resize
 185 # define PyUnicode_SetDefaultEncoding PyUnicodeUCS2_SetDefaultEncoding
 186 # define PyUnicode_Split PyUnicodeUCS2_Split
 187 # define PyUnicode_Splitlines PyUnicodeUCS2_Splitlines
 188 # define PyUnicode_Tailmatch PyUnicodeUCS2_Tailmatch
 189 # define PyUnicode_Translate PyUnicodeUCS2_Translate
 190 # define PyUnicode_TranslateCharmap PyUnicodeUCS2_TranslateCharmap
 191 # define _PyUnicode_AsDefaultEncodedString _PyUnicodeUCS2_AsDefaultEncodedString
 192 # define _PyUnicode_Fini _PyUnicodeUCS2_Fini
 193 # define _PyUnicode_Init _PyUnicodeUCS2_Init
 194 # define _PyUnicode_IsAlpha _PyUnicodeUCS2_IsAlpha
 195 # define _PyUnicode_IsDecimalDigit _PyUnicodeUCS2_IsDecimalDigit
 196 # define _PyUnicode_IsDigit _PyUnicodeUCS2_IsDigit
 197 # define _PyUnicode_IsLinebreak _PyUnicodeUCS2_IsLinebreak
 198 # define _PyUnicode_IsLowercase _PyUnicodeUCS2_IsLowercase
 199 # define _PyUnicode_IsNumeric _PyUnicodeUCS2_IsNumeric
 200 # define _PyUnicode_IsTitlecase _PyUnicodeUCS2_IsTitlecase
 201 # define _PyUnicode_IsUppercase _PyUnicodeUCS2_IsUppercase
 202 # define _PyUnicode_IsWhitespace _PyUnicodeUCS2_IsWhitespace
 203 # define _PyUnicode_ToDecimalDigit _PyUnicodeUCS2_ToDecimalDigit
 204 # define _PyUnicode_ToDigit _PyUnicodeUCS2_ToDigit
 205 # define _PyUnicode_ToLowercase _PyUnicodeUCS2_ToLowercase
 206 # define _PyUnicode_ToNumeric _PyUnicodeUCS2_ToNumeric
 207 # define _PyUnicode_ToTitlecase _PyUnicodeUCS2_ToTitlecase
 208 # define _PyUnicode_ToUppercase _PyUnicodeUCS2_ToUppercase
 209
 210 #else
 211
 212 # define PyUnicode_AsASCIIString PyUnicodeUCS4_AsASCIIString
 213 # define PyUnicode_AsCharmapString PyUnicodeUCS4_AsCharmapString
 214 # define PyUnicode_AsEncodedString PyUnicodeUCS4_AsEncodedString
 215 # define PyUnicode_AsLatin1String PyUnicodeUCS4_AsLatin1String
 216 # define PyUnicode_AsRawUnicodeEscapeString PyUnicodeUCS4_AsRawUnicodeEscapeString
 217 # define PyUnicode_AsUTF16String PyUnicodeUCS4_AsUTF16String
 218 # define PyUnicode_AsUTF8String PyUnicodeUCS4_AsUTF8String
 219 # define PyUnicode_AsUnicode PyUnicodeUCS4_AsUnicode
 220 # define PyUnicode_AsUnicodeEscapeString PyUnicodeUCS4_AsUnicodeEscapeString
 221 # define PyUnicode_AsWideChar PyUnicodeUCS4_AsWideChar
 222 # define PyUnicode_Compare PyUnicodeUCS4_Compare
 223 # define PyUnicode_Concat PyUnicodeUCS4_Concat
 224 # define PyUnicode_Contains PyUnicodeUCS4_Contains
 225 # define PyUnicode_Count PyUnicodeUCS4_Count
 226 # define PyUnicode_Decode PyUnicodeUCS4_Decode
 227 # define PyUnicode_DecodeASCII PyUnicodeUCS4_DecodeASCII
 228 # define PyUnicode_DecodeCharmap PyUnicodeUCS4_DecodeCharmap
 229 # define PyUnicode_DecodeLatin1 PyUnicodeUCS4_DecodeLatin1
 230 # define PyUnicode_DecodeRawUnicodeEscape PyUnicodeUCS4_DecodeRawUnicodeEscape
 231 # define PyUnicode_DecodeUTF16 PyUnicodeUCS4_DecodeUTF16
 232 # define PyUnicode_DecodeUTF8 PyUnicodeUCS4_DecodeUTF8
 233 # define PyUnicode_DecodeUnicodeEscape PyUnicodeUCS4_DecodeUnicodeEscape
 234 # define PyUnicode_Encode PyUnicodeUCS4_Encode
 235 # define PyUnicode_EncodeASCII PyUnicodeUCS4_EncodeASCII
 236 # define PyUnicode_EncodeCharmap PyUnicodeUCS4_EncodeCharmap
 237 # define PyUnicode_EncodeDecimal PyUnicodeUCS4_EncodeDecimal
 238 # define PyUnicode_EncodeLatin1 PyUnicodeUCS4_EncodeLatin1
 239 # define PyUnicode_EncodeRawUnicodeEscape PyUnicodeUCS4_EncodeRawUnicodeEscape
 240 # define PyUnicode_EncodeUTF16 PyUnicodeUCS4_EncodeUTF16
 241 # define PyUnicode_EncodeUTF8 PyUnicodeUCS4_EncodeUTF8
 242 # define PyUnicode_EncodeUnicodeEscape PyUnicodeUCS4_EncodeUnicodeEscape
 243 # define PyUnicode_Find PyUnicodeUCS4_Find
 244 # define PyUnicode_Format PyUnicodeUCS4_Format
 245 # define PyUnicode_FromEncodedObject PyUnicodeUCS4_FromEncodedObject
 246 # define PyUnicode_FromObject PyUnicodeUCS4_FromObject
 247 # define PyUnicode_FromUnicode PyUnicodeUCS4_FromUnicode
 248 # define PyUnicode_FromWideChar PyUnicodeUCS4_FromWideChar
 249 # define PyUnicode_GetDefaultEncoding PyUnicodeUCS4_GetDefaultEncoding
 250 # define PyUnicode_GetMax PyUnicodeUCS4_GetMax
 251 # define PyUnicode_GetSize PyUnicodeUCS4_GetSize
 252 # define PyUnicode_Join PyUnicodeUCS4_Join
 253 # define PyUnicode_Replace PyUnicodeUCS4_Replace
 254 # define PyUnicode_Resize PyUnicodeUCS4_Resize
 255 # define PyUnicode_SetDefaultEncoding PyUnicodeUCS4_SetDefaultEncoding
 256 # define PyUnicode_Split PyUnicodeUCS4_Split
 257 # define PyUnicode_Splitlines PyUnicodeUCS4_Splitlines
 258 # define PyUnicode_Tailmatch PyUnicodeUCS4_Tailmatch
 259 # define PyUnicode_Translate PyUnicodeUCS4_Translate
 260 # define PyUnicode_TranslateCharmap PyUnicodeUCS4_TranslateCharmap
 261 # define _PyUnicode_AsDefaultEncodedString _PyUnicodeUCS4_AsDefaultEncodedString
 262 # define _PyUnicode_Fini _PyUnicodeUCS4_Fini
 263 # define _PyUnicode_Init _PyUnicodeUCS4_Init
 264 # define _PyUnicode_IsAlpha _PyUnicodeUCS4_IsAlpha
 265 # define _PyUnicode_IsDecimalDigit _PyUnicodeUCS4_IsDecimalDigit
 266 # define _PyUnicode_IsDigit _PyUnicodeUCS4_IsDigit
 267 # define _PyUnicode_IsLinebreak _PyUnicodeUCS4_IsLinebreak
 268 # define _PyUnicode_IsLowercase _PyUnicodeUCS4_IsLowercase
 269 # define _PyUnicode_IsNumeric _PyUnicodeUCS4_IsNumeric
 270 # define _PyUnicode_IsTitlecase _PyUnicodeUCS4_IsTitlecase
 271 # define _PyUnicode_IsUppercase _PyUnicodeUCS4_IsUppercase
 272 # define _PyUnicode_IsWhitespace _PyUnicodeUCS4_IsWhitespace
 273 # define _PyUnicode_ToDecimalDigit _PyUnicodeUCS4_ToDecimalDigit
 274 # define _PyUnicode_ToDigit _PyUnicodeUCS4_ToDigit
 275 # define _PyUnicode_ToLowercase _PyUnicodeUCS4_ToLowercase
 276 # define _PyUnicode_ToNumeric _PyUnicodeUCS4_ToNumeric
 277 # define _PyUnicode_ToTitlecase _PyUnicodeUCS4_ToTitlecase
 278 # define _PyUnicode_ToUppercase _PyUnicodeUCS4_ToUppercase
 279
 280
 281 #endif
 282
 283 /* --- Internal Unicode Operations ---------------------------------------- */
 284
 285 /* If you want Python to use the compiler's wctype.h functions instead
 286    of the ones supplied with Python, define WANT_WCTYPE_FUNCTIONS or
 287    configure Python using --with-ctype-functions.  This reduces the
 288    interpreter's code size. */
 289
 290 #if defined(HAVE_USABLE_WCHAR_T) && defined(WANT_WCTYPE_FUNCTIONS)
 291
 292 #include <wctype.h>
 293
 294 #define Py_UNICODE_ISSPACE(ch) iswspace(ch)
 295
 296 #define Py_UNICODE_ISLOWER(ch) iswlower(ch)
 297 #define Py_UNICODE_ISUPPER(ch) iswupper(ch)
 298 #define Py_UNICODE_ISTITLE(ch) _PyUnicode_IsTitlecase(ch)
 299 #define Py_UNICODE_ISLINEBREAK(ch) _PyUnicode_IsLinebreak(ch)
 300
 301 #define Py_UNICODE_TOLOWER(ch) towlower(ch)
 302 #define Py_UNICODE_TOUPPER(ch) towupper(ch)
 303 #define Py_UNICODE_TOTITLE(ch) _PyUnicode_ToTitlecase(ch)
 304
 305 #define Py_UNICODE_ISDECIMAL(ch) _PyUnicode_IsDecimalDigit(ch)
 306 #define Py_UNICODE_ISDIGIT(ch) _PyUnicode_IsDigit(ch)
 307 #define Py_UNICODE_ISNUMERIC(ch) _PyUnicode_IsNumeric(ch)
 308
 309 #define Py_UNICODE_TODECIMAL(ch) _PyUnicode_ToDecimalDigit(ch)
 310 #define Py_UNICODE_TODIGIT(ch) _PyUnicode_ToDigit(ch)
 311 #define Py_UNICODE_TONUMERIC(ch) _PyUnicode_ToNumeric(ch)
 312
 313 #define Py_UNICODE_ISALPHA(ch) iswalpha(ch)
 314
 315 #else
 316
 317 #define Py_UNICODE_ISSPACE(ch) _PyUnicode_IsWhitespace(ch)
 318
 319 #define Py_UNICODE_ISLOWER(ch) _PyUnicode_IsLowercase(ch)
 320 #define Py_UNICODE_ISUPPER(ch) _PyUnicode_IsUppercase(ch)
 321 #define Py_UNICODE_ISTITLE(ch) _PyUnicode_IsTitlecase(ch)
 322 #define Py_UNICODE_ISLINEBREAK(ch) _PyUnicode_IsLinebreak(ch)
 323
 324 #define Py_UNICODE_TOLOWER(ch) _PyUnicode_ToLowercase(ch)
 325 #define Py_UNICODE_TOUPPER(ch) _PyUnicode_ToUppercase(ch)
 326 #define Py_UNICODE_TOTITLE(ch) _PyUnicode_ToTitlecase(ch)
 327
 328 #define Py_UNICODE_ISDECIMAL(ch) _PyUnicode_IsDecimalDigit(ch)
 329 #define Py_UNICODE_ISDIGIT(ch) _PyUnicode_IsDigit(ch)
 330 #define Py_UNICODE_ISNUMERIC(ch) _PyUnicode_IsNumeric(ch)
 331
 332 #define Py_UNICODE_TODECIMAL(ch) _PyUnicode_ToDecimalDigit(ch)
 333 #define Py_UNICODE_TODIGIT(ch) _PyUnicode_ToDigit(ch)
 334 #define Py_UNICODE_TONUMERIC(ch) _PyUnicode_ToNumeric(ch)
 335
 336 #define Py_UNICODE_ISALPHA(ch) _PyUnicode_IsAlpha(ch)
 337
 338 #endif
 339
 340 #define Py_UNICODE_ISALNUM(ch) \
 341        (Py_UNICODE_ISALPHA(ch) || \
 342         Py_UNICODE_ISDECIMAL(ch) || \
 343         Py_UNICODE_ISDIGIT(ch) || \
 344         Py_UNICODE_ISNUMERIC(ch))
 345
 346 #define Py_UNICODE_COPY(target, source, length)\
 347     (memcpy((target), (source), (length)*sizeof(Py_UNICODE)))
 348
 349 #define Py_UNICODE_FILL(target, value, length) do\
 350     {int i; for (i = 0; i < (length); i++) (target)[i] = (value);}\
 351     while (0)
 352
 353 #define Py_UNICODE_MATCH(string, offset, substring)\
 354     ((*((string)->str + (offset)) == *((substring)->str)) &&\
 355      !memcmp((string)->str + (offset), (substring)->str,\
 356              (substring)->length*sizeof(Py_UNICODE)))
 357
 358 #ifdef __cplusplus
 359 extern "C" {
 360 #endif
 361
 362 /* --- Unicode Type ------------------------------------------------------- */
 363
 364 typedef struct {
 365     PyObject_HEAD
 366     int length;                 /* Length of raw Unicode data in buffer */
 367     Py_UNICODE *str;            /* Raw Unicode buffer */
 368     long hash;                  /* Hash value; -1 if not set */
 369     PyObject *defenc;           /* (Default) Encoded version as Python
 370                                    string, or NULL; this is used for
 371                                    implementing the buffer protocol */
 372 } PyUnicodeObject;
 373
 374 extern DL_IMPORT(PyTypeObject) PyUnicode_Type;
 375
 376 #define PyUnicode_Check(op) PyObject_TypeCheck(op, &PyUnicode_Type)
 377 #define PyUnicode_CheckExact(op) ((op)->ob_type == &PyUnicode_Type)
 378
 379 /* Fast access macros */
 380 #define PyUnicode_GET_SIZE(op) \
 381         (((PyUnicodeObject *)(op))->length)
 382 #define PyUnicode_GET_DATA_SIZE(op) \
 383         (((PyUnicodeObject *)(op))->length * sizeof(Py_UNICODE))
 384 #define PyUnicode_AS_UNICODE(op) \
 385         (((PyUnicodeObject *)(op))->str)
 386 #define PyUnicode_AS_DATA(op) \
 387         ((const char *)((PyUnicodeObject *)(op))->str)
 388
 389 /* --- Constants ---------------------------------------------------------- */
 390
 391 /* This Unicode character will be used as replacement character during
 392    decoding if the errors argument is set to "replace". Note: the
 393    Unicode character U+FFFD is the official REPLACEMENT CHARACTER in
 394    Unicode 3.0. */
 395
 396 #define Py_UNICODE_REPLACEMENT_CHARACTER ((Py_UNICODE) 0xFFFD)
 397
 398 /* === Public API ========================================================= */
 399
 400 /* --- Plain Py_UNICODE --------------------------------------------------- */
 401
 402 /* Create a Unicode Object from the Py_UNICODE buffer u of the given
 403    size.
 404
 405    u may be NULL which causes the contents to be undefined. It is the
 406    user's responsibility to fill in the needed data afterwards. Note
 407    that modifying the Unicode object contents after construction is
 408    only allowed if u was set to NULL.
 409
 410    The buffer is copied into the new object. */
 411
 412 extern DL_IMPORT(PyObject*) PyUnicode_FromUnicode(
 413     const Py_UNICODE *u,        /* Unicode buffer */
 414     int size                    /* size of buffer */
 415     );
 416
 417 /* Return a read-only pointer to the Unicode object's internal
 418    Py_UNICODE buffer. */
 419
 420 extern DL_IMPORT(Py_UNICODE *) PyUnicode_AsUnicode(
 421     PyObject *unicode           /* Unicode object */
 422     );
 423
 424 /* Get the length of the Unicode object. */
 425
 426 extern DL_IMPORT(int) PyUnicode_GetSize(
 427     PyObject *unicode           /* Unicode object */
 428     );
 429
 430 /* Get the maximum ordinal for a Unicode character. */
 431 extern DL_IMPORT(Py_UNICODE) PyUnicode_GetMax(void);
 432
 433 /* Resize an already allocated Unicode object to the new size length.
 434
 435    *unicode is modified to point to the new (resized) object and 0
 436    returned on success.
 437
 438    This API may only be called by the function which also called the
 439    Unicode constructor. The refcount on the object must be 1. Otherwise,
 440    an error is returned.
 441
 442    Error handling is implemented as follows: an exception is set, -1
 443    is returned and *unicode left untouched.
 444
 445 */
 446
 447 extern DL_IMPORT(int) PyUnicode_Resize(
 448     PyObject **unicode,         /* Pointer to the Unicode object */
 449     int length                  /* New length */
 450     );
 451
 452 /* Coerce obj to an Unicode object and return a reference with
 453    *incremented* refcount.
 454
 455    Coercion is done in the following way:
 456
 457    1. String and other char buffer compatible objects are decoded
 458       under the assumptions that they contain data using the current
 459       default encoding. Decoding is done in "strict" mode.
 460
 461    2. All other objects (including Unicode objects) raise an
 462       exception.
 463
 464    The API returns NULL in case of an error. The caller is responsible
 465    for decref'ing the returned objects.
 466
 467 */
 468
 469 extern DL_IMPORT(PyObject*) PyUnicode_FromEncodedObject(
 470     register PyObject *obj,     /* Object */
 471     const char *encoding,       /* encoding */
 472     const char *errors          /* error handling */
 473     );
 474
 475 /* Coerce obj to an Unicode object and return a reference with
 476    *incremented* refcount.
 477
 478    Unicode objects are passed back as-is (subclasses are converted to
 479    true Unicode objects), all other objects are delegated to
 480    PyUnicode_FromEncodedObject(obj, NULL, "strict") which results in
 481    using the default encoding as basis for decoding the object.
 482
 483    The API returns NULL in case of an error. The caller is responsible
 484    for decref'ing the returned objects.
 485
 486 */
 487
 488 extern DL_IMPORT(PyObject*) PyUnicode_FromObject(
 489     register PyObject *obj      /* Object */
 490     );
 491
 492 /* --- wchar_t support for platforms which support it --------------------- */
 493
 494 #ifdef HAVE_WCHAR_H
 495
 496 /* Create a Unicode Object from the whcar_t buffer w of the given
 497    size.
 498
 499    The buffer is copied into the new object. */
 500
 501 extern DL_IMPORT(PyObject*) PyUnicode_FromWideChar(
 502     register const wchar_t *w,  /* wchar_t buffer */
 503     int size                    /* size of buffer */
 504     );
 505
 506 /* Copies the Unicode Object contents into the whcar_t buffer w.  At
 507    most size wchar_t characters are copied.
 508
 509    Returns the number of wchar_t characters copied or -1 in case of an
 510    error. */
 511
 512 extern DL_IMPORT(int) PyUnicode_AsWideChar(
 513     PyUnicodeObject *unicode,   /* Unicode object */
 514     register wchar_t *w,        /* wchar_t buffer */
 515     int size                    /* size of buffer */
 516     );
 517
 518 #endif
 519
 520 /* --- Unicode ordinals --------------------------------------------------- */
 521
 522 /* Create a Unicode Object from the given Unicode code point ordinal.
 523
 524    The ordinal must be in range(0x10000) on narrow Python builds
 525    (UCS2), and range(0x110000) on wide builds (UCS4). A ValueError is
 526    raised in case it is not.
 527
 528 */
 529
 530 extern DL_IMPORT(PyObject*) PyUnicode_FromOrdinal(int ordinal);
 531
 532 /* === Builtin Codecs =====================================================
 533
 534    Many of these APIs take two arguments encoding and errors. These
 535    parameters encoding and errors have the same semantics as the ones
 536    of the builtin unicode() API.
 537
 538    Setting encoding to NULL causes the default encoding to be used.
 539
 540    Error handling is set by errors which may also be set to NULL
 541    meaning to use the default handling defined for the codec. Default
 542    error handling for all builtin codecs is "strict" (ValueErrors are
 543    raised).
 544
 545    The codecs all use a similar interface. Only deviation from the
 546    generic ones are documented.
 547
 548 */
 549
 550 /* --- Manage the default encoding ---------------------------------------- */
 551
 552 /* Return a Python string holding the default encoded value of the
 553    Unicode object.
 554
 555    The resulting string is cached in the Unicode object for subsequent
 556    usage by this function. The cached version is needed to implement
 557    the character buffer interface and will live (at least) as long as
 558    the Unicode object itself.
 559
 560    The refcount of the string is *not* incremented.
 561
 562    *** Exported for internal use by the interpreter only !!! ***
 563
 564 */
 565
 566 extern DL_IMPORT(PyObject *) _PyUnicode_AsDefaultEncodedString(
 567     PyObject *, const char *);
 568
 569 /* Returns the currently active default encoding.
 570
 571    The default encoding is currently implemented as run-time settable
 572    process global.  This may change in future versions of the
 573    interpreter to become a parameter which is managed on a per-thread
 574    basis.
 575
 576  */
 577
 578 extern DL_IMPORT(const char*) PyUnicode_GetDefaultEncoding(void);
 579
 580 /* Sets the currently active default encoding.
 581
 582    Returns 0 on success, -1 in case of an error.
 583
 584  */
 585
 586 extern DL_IMPORT(int) PyUnicode_SetDefaultEncoding(
 587     const char *encoding        /* Encoding name in standard form */
 588     );
 589
 590 /* --- Generic Codecs ----------------------------------------------------- */
 591
 592 /* Create a Unicode object by decoding the encoded string s of the
 593    given size. */
 594
 595 extern DL_IMPORT(PyObject*) PyUnicode_Decode(
 596     const char *s,              /* encoded string */
 597     int size,                   /* size of buffer */
 598     const char *encoding,       /* encoding */
 599     const char *errors          /* error handling */
 600     );
 601
 602 /* Encodes a Py_UNICODE buffer of the given size and returns a
 603    Python string object. */
 604
 605 extern DL_IMPORT(PyObject*) PyUnicode_Encode(
 606     const Py_UNICODE *s,        /* Unicode char buffer */
 607     int size,                   /* number of Py_UNICODE chars to encode */
 608     const char *encoding,       /* encoding */
 609     const char *errors          /* error handling */
 610     );
 611
 612 /* Encodes a Unicode object and returns the result as Python string
 613    object. */
 614
 615 extern DL_IMPORT(PyObject*) PyUnicode_AsEncodedString(
 616     PyObject *unicode,          /* Unicode object */
 617     const char *encoding,       /* encoding */
 618     const char *errors          /* error handling */
 619     );
 620
 621 /* --- UTF-7 Codecs ------------------------------------------------------- */
 622
 623 extern DL_IMPORT(PyObject*) PyUnicode_DecodeUTF7(
 624     const char *string,         /* UTF-7 encoded string */
 625     int length,                 /* size of string */
 626     const char *errors          /* error handling */
 627     );
 628
 629 extern DL_IMPORT(PyObject*) PyUnicode_EncodeUTF7(
 630     const Py_UNICODE *data,     /* Unicode char buffer */
 631     int length,                 /* number of Py_UNICODE chars to encode */
 632     int encodeSetO,             /* force the encoder to encode characters in
 633                                    Set O, as described in RFC2152 */
 634     int encodeWhiteSpace,       /* force the encoder to encode space, tab,
 635                                    carriage return and linefeed characters */
 636     const char *errors          /* error handling */
 637     );
 638
 639 /* --- UTF-8 Codecs ------------------------------------------------------- */
 640
 641 extern DL_IMPORT(PyObject*) PyUnicode_DecodeUTF8(
 642     const char *string,         /* UTF-8 encoded string */
 643     int length,                 /* size of string */
 644     const char *errors          /* error handling */
 645     );
 646
 647 extern DL_IMPORT(PyObject*) PyUnicode_AsUTF8String(
 648     PyObject *unicode           /* Unicode object */
 649     );
 650
 651 extern DL_IMPORT(PyObject*) PyUnicode_EncodeUTF8(
 652     const Py_UNICODE *data,     /* Unicode char buffer */
 653     int length,                 /* number of Py_UNICODE chars to encode */
 654     const char *errors          /* error handling */
 655     );
 656
 657 /* --- UTF-16 Codecs ------------------------------------------------------ */
 658
 659 /* Decodes length bytes from a UTF-16 encoded buffer string and returns
 660    the corresponding Unicode object.
 661
 662    errors (if non-NULL) defines the error handling. It defaults
 663    to "strict".
 664
 665    If byteorder is non-NULL, the decoder starts decoding using the
 666    given byte order:
 667
 668         *byteorder == -1: little endian
 669         *byteorder == 0:  native order
 670         *byteorder == 1:  big endian
 671
 672    In native mode, the first two bytes of the stream are checked for a
 673    BOM mark. If found, the BOM mark is analysed, the byte order
 674    adjusted and the BOM skipped.  In the other modes, no BOM mark
 675    interpretation is done. After completion, *byteorder is set to the
 676    current byte order at the end of input data.
 677
 678    If byteorder is NULL, the codec starts in native order mode.
 679
 680 */
 681
 682 extern DL_IMPORT(PyObject*) PyUnicode_DecodeUTF16(
 683     const char *string,         /* UTF-16 encoded string */
 684     int length,                 /* size of string */
 685     const char *errors,         /* error handling */
 686     int *byteorder              /* pointer to byteorder to use
 687                                    0=native;-1=LE,1=BE; updated on
 688                                    exit */
 689     );
 690
 691 /* Returns a Python string using the UTF-16 encoding in native byte
 692    order. The string always starts with a BOM mark.  */
 693
 694 extern DL_IMPORT(PyObject*) PyUnicode_AsUTF16String(
 695     PyObject *unicode           /* Unicode object */
 696     );
 697
 698 /* Returns a Python string object holding the UTF-16 encoded value of
 699    the Unicode data.
 700
 701    If byteorder is not 0, output is written according to the following
 702    byte order:
 703
 704    byteorder == -1: little endian
 705    byteorder == 0:  native byte order (writes a BOM mark)
 706    byteorder == 1:  big endian
 707
 708    If byteorder is 0, the output string will always start with the
 709    Unicode BOM mark (U+FEFF). In the other two modes, no BOM mark is
 710    prepended.
 711
 712    Note that Py_UNICODE data is being interpreted as UTF-16 reduced to
 713    UCS-2. This trick makes it possible to add full UTF-16 capabilities
 714    at a later point without compromising the APIs.
 715
 716 */
 717
 718 extern DL_IMPORT(PyObject*) PyUnicode_EncodeUTF16(
 719     const Py_UNICODE *data,     /* Unicode char buffer */
 720     int length,                 /* number of Py_UNICODE chars to encode */
 721     const char *errors,         /* error handling */
 722     int byteorder               /* byteorder to use 0=BOM+native;-1=LE,1=BE */
 723     );
 724
 725 /* --- Unicode-Escape Codecs ---------------------------------------------- */
 726
 727 extern DL_IMPORT(PyObject*) PyUnicode_DecodeUnicodeEscape(
 728     const char *string,         /* Unicode-Escape encoded string */
 729     int length,                 /* size of string */
 730     const char *errors          /* error handling */
 731     );
 732
 733 extern DL_IMPORT(PyObject*) PyUnicode_AsUnicodeEscapeString(
 734     PyObject *unicode           /* Unicode object */
 735     );
 736
 737 extern DL_IMPORT(PyObject*) PyUnicode_EncodeUnicodeEscape(
 738     const Py_UNICODE *data,     /* Unicode char buffer */
 739     int length                  /* Number of Py_UNICODE chars to encode */
 740     );
 741
 742 /* --- Raw-Unicode-Escape Codecs ------------------------------------------ */
 743
 744 extern DL_IMPORT(PyObject*) PyUnicode_DecodeRawUnicodeEscape(
 745     const char *string,         /* Raw-Unicode-Escape encoded string */
 746     int length,                 /* size of string */
 747     const char *errors          /* error handling */
 748     );
 749
 750 extern DL_IMPORT(PyObject*) PyUnicode_AsRawUnicodeEscapeString(
 751     PyObject *unicode           /* Unicode object */
 752     );
 753
 754 extern DL_IMPORT(PyObject*) PyUnicode_EncodeRawUnicodeEscape(
 755     const Py_UNICODE *data,     /* Unicode char buffer */
 756     int length                  /* Number of Py_UNICODE chars to encode */
 757     );
 758
 759 /* --- Latin-1 Codecs -----------------------------------------------------
 760
 761    Note: Latin-1 corresponds to the first 256 Unicode ordinals.
 762
 763 */
 764
 765 extern DL_IMPORT(PyObject*) PyUnicode_DecodeLatin1(
 766     const char *string,         /* Latin-1 encoded string */
 767     int length,                 /* size of string */
 768     const char *errors          /* error handling */
 769     );
 770
 771 extern DL_IMPORT(PyObject*) PyUnicode_AsLatin1String(
 772     PyObject *unicode           /* Unicode object */
 773     );
 774
 775 extern DL_IMPORT(PyObject*) PyUnicode_EncodeLatin1(
 776     const Py_UNICODE *data,     /* Unicode char buffer */
 777     int length,                 /* Number of Py_UNICODE chars to encode */
 778     const char *errors          /* error handling */
 779     );
 780
 781 /* --- ASCII Codecs -------------------------------------------------------
 782
 783    Only 7-bit ASCII data is excepted. All other codes generate errors.
 784
 785 */
 786
 787 extern DL_IMPORT(PyObject*) PyUnicode_DecodeASCII(
 788     const char *string,         /* ASCII encoded string */
 789     int length,                 /* size of string */
 790     const char *errors          /* error handling */
 791     );
 792
 793 extern DL_IMPORT(PyObject*) PyUnicode_AsASCIIString(
 794     PyObject *unicode           /* Unicode object */
 795     );
 796
 797 extern DL_IMPORT(PyObject*) PyUnicode_EncodeASCII(
 798     const Py_UNICODE *data,     /* Unicode char buffer */
 799     int length,                 /* Number of Py_UNICODE chars to encode */
 800     const char *errors          /* error handling */
 801     );
 802
 803 /* --- Character Map Codecs -----------------------------------------------
 804
 805    This codec uses mappings to encode and decode characters.
 806
 807    Decoding mappings must map single string characters to single
 808    Unicode characters, integers (which are then interpreted as Unicode
 809    ordinals) or None (meaning "undefined mapping" and causing an
 810    error).
 811
 812    Encoding mappings must map single Unicode characters to single
 813    string characters, integers (which are then interpreted as Latin-1
 814    ordinals) or None (meaning "undefined mapping" and causing an
 815    error).
 816
 817    If a character lookup fails with a LookupError, the character is
 818    copied as-is meaning that its ordinal value will be interpreted as
 819    Unicode or Latin-1 ordinal resp. Because of this mappings only need
 820    to contain those mappings which map characters to different code
 821    points.
 822
 823 */
 824
 825 extern DL_IMPORT(PyObject*) PyUnicode_DecodeCharmap(
 826     const char *string,         /* Encoded string */
 827     int length,                 /* size of string */
 828     PyObject *mapping,          /* character mapping
 829                                    (char ordinal -> unicode ordinal) */
 830     const char *errors          /* error handling */
 831     );
 832
 833 extern DL_IMPORT(PyObject*) PyUnicode_AsCharmapString(
 834     PyObject *unicode,          /* Unicode object */
 835     PyObject *mapping           /* character mapping
 836                                    (unicode ordinal -> char ordinal) */
 837     );
 838
 839 extern DL_IMPORT(PyObject*) PyUnicode_EncodeCharmap(
 840     const Py_UNICODE *data,     /* Unicode char buffer */
 841     int length,                 /* Number of Py_UNICODE chars to encode */
 842     PyObject *mapping,          /* character mapping
 843                                    (unicode ordinal -> char ordinal) */
 844     const char *errors          /* error handling */
 845     );
 846
 847 /* Translate a Py_UNICODE buffer of the given length by applying a
 848    character mapping table to it and return the resulting Unicode
 849    object.
 850
 851    The mapping table must map Unicode ordinal integers to Unicode
 852    ordinal integers or None (causing deletion of the character).
 853
 854    Mapping tables may be dictionaries or sequences. Unmapped character
 855    ordinals (ones which cause a LookupError) are left untouched and
 856    are copied as-is.
 857
 858 */
 859
 860 extern DL_IMPORT(PyObject *) PyUnicode_TranslateCharmap(
 861     const Py_UNICODE *data,     /* Unicode char buffer */
 862     int length,                 /* Number of Py_UNICODE chars to encode */
 863     PyObject *table,            /* Translate table */
 864     const char *errors          /* error handling */
 865     );
 866
 867 #ifdef MS_WIN32
 868
 869 /* --- MBCS codecs for Windows -------------------------------------------- */
 870
 871 extern DL_IMPORT(PyObject*) PyUnicode_DecodeMBCS(
 872     const char *string,         /* MBCS encoded string */
 873     int length,                 /* size of string */
 874     const char *errors          /* error handling */
 875     );
 876
 877 extern DL_IMPORT(PyObject*) PyUnicode_AsMBCSString(
 878     PyObject *unicode           /* Unicode object */
 879     );
 880
 881 extern DL_IMPORT(PyObject*) PyUnicode_EncodeMBCS(
 882     const Py_UNICODE *data,     /* Unicode char buffer */
 883     int length,                 /* Number of Py_UNICODE chars to encode */
 884     const char *errors          /* error handling */
 885     );
 886
 887 #endif /* MS_WIN32 */
 888
 889 /* --- Decimal Encoder ---------------------------------------------------- */
 890
 891 /* Takes a Unicode string holding a decimal value and writes it into
 892    an output buffer using standard ASCII digit codes.
 893
 894    The output buffer has to provide at least length+1 bytes of storage
 895    area. The output string is 0-terminated.
 896
 897    The encoder converts whitespace to ' ', decimal characters to their
 898    corresponding ASCII digit and all other Latin-1 characters except
 899    \0 as-is. Characters outside this range (Unicode ordinals 1-256)
 900    are treated as errors. This includes embedded NULL bytes.
 901
 902    Error handling is defined by the errors argument:
 903
 904       NULL or "strict": raise a ValueError
 905       "ignore": ignore the wrong characters (these are not copied to the
 906                 output buffer)
 907       "replace": replaces illegal characters with '?'
 908
 909    Returns 0 on success, -1 on failure.
 910
 911 */
 912
 913 extern DL_IMPORT(int) PyUnicode_EncodeDecimal(
 914     Py_UNICODE *s,              /* Unicode buffer */
 915     int length,                 /* Number of Py_UNICODE chars to encode */
 916     char *output,               /* Output buffer; must have size >= length */
 917     const char *errors          /* error handling */
 918     );
 919
 920 /* --- Methods & Slots ----------------------------------------------------
 921
 922    These are capable of handling Unicode objects and strings on input
 923    (we refer to them as strings in the descriptions) and return
 924    Unicode objects or integers as apporpriate. */
 925
 926 /* Concat two strings giving a new Unicode string. */
 927
 928 extern DL_IMPORT(PyObject*) PyUnicode_Concat(
 929     PyObject *left,             /* Left string */
 930     PyObject *right             /* Right string */
 931     );
 932
 933 /* Split a string giving a list of Unicode strings.
 934
 935    If sep is NULL, splitting will be done at all whitespace
 936    substrings. Otherwise, splits occur at the given separator.
 937
 938    At most maxsplit splits will be done. If negative, no limit is set.
 939
 940    Separators are not included in the resulting list.
 941
 942 */
 943
 944 extern DL_IMPORT(PyObject*) PyUnicode_Split(
 945     PyObject *s,                /* String to split */
 946     PyObject *sep,              /* String separator */
 947     int maxsplit                /* Maxsplit count */
 948     );
 949
 950 /* Dito, but split at line breaks.
 951
 952    CRLF is considered to be one line break. Line breaks are not
 953    included in the resulting list. */
 954
 955 extern DL_IMPORT(PyObject*) PyUnicode_Splitlines(
 956     PyObject *s,                /* String to split */
 957     int keepends                /* If true, line end markers are included */
 958     );
 959
 960 /* Translate a string by applying a character mapping table to it and
 961    return the resulting Unicode object.
 962
 963    The mapping table must map Unicode ordinal integers to Unicode
 964    ordinal integers or None (causing deletion of the character).
 965
 966    Mapping tables may be dictionaries or sequences. Unmapped character
 967    ordinals (ones which cause a LookupError) are left untouched and
 968    are copied as-is.
 969
 970 */
 971
 972 extern DL_IMPORT(PyObject *) PyUnicode_Translate(
 973     PyObject *str,              /* String */
 974     PyObject *table,            /* Translate table */
 975     const char *errors          /* error handling */
 976     );
 977
 978 /* Join a sequence of strings using the given separator and return
 979    the resulting Unicode string. */
 980
 981 extern DL_IMPORT(PyObject*) PyUnicode_Join(
 982     PyObject *separator,        /* Separator string */
 983     PyObject *seq               /* Sequence object */
 984     );
 985
 986 /* Return 1 if substr matches str[start:end] at the given tail end, 0
 987    otherwise. */
 988
 989 extern DL_IMPORT(int) PyUnicode_Tailmatch(
 990     PyObject *str,              /* String */
 991     PyObject *substr,           /* Prefix or Suffix string */
 992     int start,                  /* Start index */
 993     int end,                    /* Stop index */
 994     int direction               /* Tail end: -1 prefix, +1 suffix */
 995     );
 996
 997 /* Return the first position of substr in str[start:end] using the
 998    given search direction or -1 if not found. */
 999
1000 extern DL_IMPORT(int) PyUnicode_Find(
1001     PyObject *str,              /* String */
1002     PyObject *substr,           /* Substring to find */
1003     int start,                  /* Start index */
1004     int end,                    /* Stop index */
1005     int direction               /* Find direction: +1 forward, -1 backward */
1006     );
1007
1008 /* Count the number of occurrences of substr in str[start:end]. */
1009
1010 extern DL_IMPORT(int) PyUnicode_Count(
1011     PyObject *str,              /* String */
1012     PyObject *substr,           /* Substring to count */
1013     int start,                  /* Start index */
1014     int end                     /* Stop index */
1015     );
1016
1017 /* Replace at most maxcount occurrences of substr in str with replstr
1018    and return the resulting Unicode object. */
1019
1020 extern DL_IMPORT(PyObject *) PyUnicode_Replace(
1021     PyObject *str,              /* String */
1022     PyObject *substr,           /* Substring to find */
1023     PyObject *replstr,          /* Substring to replace */
1024     int maxcount                /* Max. number of replacements to apply;
1025                                    -1 = all */
1026     );
1027
1028 /* Compare two strings and return -1, 0, 1 for less than, equal,
1029    greater than resp. */
1030
1031 extern DL_IMPORT(int) PyUnicode_Compare(
1032     PyObject *left,             /* Left string */
1033     PyObject *right             /* Right string */
1034     );
1035
1036 /* Apply a argument tuple or dictionary to a format string and return
1037    the resulting Unicode string. */
1038
1039 extern DL_IMPORT(PyObject *) PyUnicode_Format(
1040     PyObject *format,           /* Format string */
1041     PyObject *args              /* Argument tuple or dictionary */
1042     );
1043
1044 /* Checks whether element is contained in container and return 1/0
1045    accordingly.
1046
1047    element has to coerce to an one element Unicode string. -1 is
1048    returned in case of an error. */
1049
1050 extern DL_IMPORT(int) PyUnicode_Contains(
1051     PyObject *container,        /* Container string */
1052     PyObject *element           /* Element string */
1053     );
1054
1055 /* Externally visible for str.strip(unicode) */
1056 extern DL_IMPORT(PyObject *) _PyUnicode_XStrip(
1057     PyUnicodeObject *self,
1058     int striptype,
1059     PyObject *sepobj
1060     );
1061
1062 /* === Characters Type APIs =============================================== */
1063
1064 /* These should not be used directly. Use the Py_UNICODE_IS* and
1065    Py_UNICODE_TO* macros instead.
1066
1067    These APIs are implemented in Objects/unicodectype.c.
1068
1069 */
1070
1071 extern DL_IMPORT(int) _PyUnicode_IsLowercase(
1072     Py_UNICODE ch       /* Unicode character */
1073     );
1074
1075 extern DL_IMPORT(int) _PyUnicode_IsUppercase(
1076     Py_UNICODE ch       /* Unicode character */
1077     );
1078
1079 extern DL_IMPORT(int) _PyUnicode_IsTitlecase(
1080     Py_UNICODE ch       /* Unicode character */
1081     );
1082
1083 extern DL_IMPORT(int) _PyUnicode_IsWhitespace(
1084     Py_UNICODE ch       /* Unicode character */
1085     );
1086
1087 extern DL_IMPORT(int) _PyUnicode_IsLinebreak(
1088     Py_UNICODE ch       /* Unicode character */
1089     );
1090
1091 extern DL_IMPORT(Py_UNICODE) _PyUnicode_ToLowercase(
1092     Py_UNICODE ch       /* Unicode character */
1093     );
1094
1095 extern DL_IMPORT(Py_UNICODE) _PyUnicode_ToUppercase(
1096     Py_UNICODE ch       /* Unicode character */
1097     );
1098
1099 extern DL_IMPORT(Py_UNICODE) _PyUnicode_ToTitlecase(
1100     Py_UNICODE ch       /* Unicode character */
1101     );
1102
1103 extern DL_IMPORT(int) _PyUnicode_ToDecimalDigit(
1104     Py_UNICODE ch       /* Unicode character */
1105     );
1106
1107 extern DL_IMPORT(int) _PyUnicode_ToDigit(
1108     Py_UNICODE ch       /* Unicode character */
1109     );
1110
1111 extern DL_IMPORT(double) _PyUnicode_ToNumeric(
1112     Py_UNICODE ch       /* Unicode character */
1113     );
1114
1115 extern DL_IMPORT(int) _PyUnicode_IsDecimalDigit(
1116     Py_UNICODE ch       /* Unicode character */
1117     );
1118
1119 extern DL_IMPORT(int) _PyUnicode_IsDigit(
1120     Py_UNICODE ch       /* Unicode character */
1121     );
1122
1123 extern DL_IMPORT(int) _PyUnicode_IsNumeric(
1124     Py_UNICODE ch       /* Unicode character */
1125     );
1126
1127 extern DL_IMPORT(int) _PyUnicode_IsAlpha(
1128     Py_UNICODE ch       /* Unicode character */
1129     );
1130
1131 #ifdef __cplusplus
1132 }
1133 #endif
1134 #endif /* Py_USING_UNICODE */
1135 #endif /* !Py_UNICODEOBJECT_H */