Include/unicodeobject.h

   1 #ifndef Py_UNICODEOBJECT_H
   2 #define Py_UNICODEOBJECT_H
   3
   4 /*
   5
   6 Unicode implementation based on original code by Fredrik Lundh,
   7 modified by Marc-Andre Lemburg (mal@lemburg.com) according to the
   8 Unicode Integration Proposal (see file Misc/unicode.txt).
   9
  10 Copyright (c) Corporation for National Research Initiatives.
  11
  12
  13  Original header:
  14  --------------------------------------------------------------------
  15
  16  * Yet another Unicode string type for Python.  This type supports the
  17  * 16-bit Basic Multilingual Plane (BMP) only.
  18  *
  19  * Written by Fredrik Lundh, January 1999.
  20  *
  21  * Copyright (c) 1999 by Secret Labs AB.
  22  * Copyright (c) 1999 by Fredrik Lundh.
  23  *
  24  * fredrik@pythonware.com
  25  * http://www.pythonware.com
  26  *
  27  * --------------------------------------------------------------------
  28  * This Unicode String Type is
  29  *
  30  * Copyright (c) 1999 by Secret Labs AB
  31  * Copyright (c) 1999 by Fredrik Lundh
  32  *
  33  * By obtaining, using, and/or copying this software and/or its
  34  * associated documentation, you agree that you have read, understood,
  35  * and will comply with the following terms and conditions:
  36  *
  37  * Permission to use, copy, modify, and distribute this software and its
  38  * associated documentation for any purpose and without fee is hereby
  39  * granted, provided that the above copyright notice appears in all
  40  * copies, and that both that copyright notice and this permission notice
  41  * appear in supporting documentation, and that the name of Secret Labs
  42  * AB or the author not be used in advertising or publicity pertaining to
  43  * distribution of the software without specific, written prior
  44  * permission.
  45  *
  46  * SECRET LABS AB AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD TO
  47  * THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND
  48  * FITNESS.  IN NO EVENT SHALL SECRET LABS AB OR THE AUTHOR BE LIABLE FOR
  49  * ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
  50  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
  51  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT
  52  * OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
  53  * -------------------------------------------------------------------- */
  54
  55 #include <ctype.h>
  56
  57 /* === Internal API ======================================================= */
  58
  59 /* --- Internal Unicode Format -------------------------------------------- */
  60
  61 #ifndef Py_USING_UNICODE
  62
  63 #define PyUnicode_Check(op)                 0
  64 #define PyUnicode_CheckExact(op)            0
  65
  66 #else
  67
  68 /* FIXME: MvL's new implementation assumes that Py_UNICODE_SIZE is
  69    properly set, but the default rules below doesn't set it.  I'll
  70    sort this out some other day -- fredrik@pythonware.com */
  71
  72 #ifndef Py_UNICODE_SIZE
  73 #error Must define Py_UNICODE_SIZE
  74 #endif
  75
  76 /* Setting Py_UNICODE_WIDE enables UCS-4 storage.  Otherwise, Unicode
  77    strings are stored as UCS-2 (with limited support for UTF-16) */
  78
  79 #if Py_UNICODE_SIZE >= 4
  80 #define Py_UNICODE_WIDE
  81 #endif
  82
  83 /* Set these flags if the platform has "wchar.h", "wctype.h" and the
  84    wchar_t type is a 16-bit unsigned type */
  85 /* #define HAVE_WCHAR_H */
  86 /* #define HAVE_USABLE_WCHAR_T */
  87
  88 /* Defaults for various platforms */
  89 #ifndef PY_UNICODE_TYPE
  90
  91 /* Windows has a usable wchar_t type (unless we're using UCS-4) */
  92 # if defined(MS_WIN32) && Py_UNICODE_SIZE == 2
  93 #  define HAVE_USABLE_WCHAR_T
  94 #  define PY_UNICODE_TYPE wchar_t
  95 # endif
  96
  97 # if defined(Py_UNICODE_WIDE)
  98 #  define PY_UNICODE_TYPE Py_UCS4
  99 # endif
 100
 101 #endif
 102
 103 /* If the compiler provides a wchar_t type we try to support it
 104    through the interface functions PyUnicode_FromWideChar() and
 105    PyUnicode_AsWideChar(). */
 106
 107 #ifdef HAVE_USABLE_WCHAR_T
 108 # ifndef HAVE_WCHAR_H
 109 #  define HAVE_WCHAR_H
 110 # endif
 111 #endif
 112
 113 #ifdef HAVE_WCHAR_H
 114 /* Work around a cosmetic bug in BSDI 4.x wchar.h; thanks to Thomas Wouters */
 115 # ifdef _HAVE_BSDI
 116 #  include <time.h>
 117 # endif
 118 #  include <wchar.h>
 119 #endif
 120
 121 /*
 122  * Use this typedef when you need to represent a UTF-16 surrogate pair
 123  * as single unsigned integer.
 124  */
 125 #if SIZEOF_INT >= 4
 126 typedef unsigned int Py_UCS4;
 127 #elif SIZEOF_LONG >= 4
 128 typedef unsigned long Py_UCS4;
 129 #endif
 130
 131 typedef PY_UNICODE_TYPE Py_UNICODE;
 132
 133 /* --- UCS-2/UCS-4 Name Mangling ------------------------------------------ */
 134
 135 /* Unicode API names are mangled to assure that UCS-2 and UCS-4 builds
 136    produce different external names and thus cause import errors in
 137    case Python interpreters and extensions with mixed compiled in
 138    Unicode width assumptions are combined. */
 139
 140 #ifndef Py_UNICODE_WIDE
 141
 142 # define PyUnicode_AsASCIIString PyUnicodeUCS2_AsASCIIString
 143 # define PyUnicode_AsCharmapString PyUnicodeUCS2_AsCharmapString
 144 # define PyUnicode_AsEncodedString PyUnicodeUCS2_AsEncodedString
 145 # define PyUnicode_AsLatin1String PyUnicodeUCS2_AsLatin1String
 146 # define PyUnicode_AsRawUnicodeEscapeString PyUnicodeUCS2_AsRawUnicodeEscapeString
 147 # define PyUnicode_AsUTF16String PyUnicodeUCS2_AsUTF16String
 148 # define PyUnicode_AsUTF8String PyUnicodeUCS2_AsUTF8String
 149 # define PyUnicode_AsUnicode PyUnicodeUCS2_AsUnicode
 150 # define PyUnicode_AsUnicodeEscapeString PyUnicodeUCS2_AsUnicodeEscapeString
 151 # define PyUnicode_AsWideChar PyUnicodeUCS2_AsWideChar
 152 # define PyUnicode_Compare PyUnicodeUCS2_Compare
 153 # define PyUnicode_Concat PyUnicodeUCS2_Concat
 154 # define PyUnicode_Contains PyUnicodeUCS2_Contains
 155 # define PyUnicode_Count PyUnicodeUCS2_Count
 156 # define PyUnicode_Decode PyUnicodeUCS2_Decode
 157 # define PyUnicode_DecodeASCII PyUnicodeUCS2_DecodeASCII
 158 # define PyUnicode_DecodeCharmap PyUnicodeUCS2_DecodeCharmap
 159 # define PyUnicode_DecodeLatin1 PyUnicodeUCS2_DecodeLatin1
 160 # define PyUnicode_DecodeRawUnicodeEscape PyUnicodeUCS2_DecodeRawUnicodeEscape
 161 # define PyUnicode_DecodeUTF16 PyUnicodeUCS2_DecodeUTF16
 162 # define PyUnicode_DecodeUTF8 PyUnicodeUCS2_DecodeUTF8
 163 # define PyUnicode_DecodeUnicodeEscape PyUnicodeUCS2_DecodeUnicodeEscape
 164 # define PyUnicode_Encode PyUnicodeUCS2_Encode
 165 # define PyUnicode_EncodeASCII PyUnicodeUCS2_EncodeASCII
 166 # define PyUnicode_EncodeCharmap PyUnicodeUCS2_EncodeCharmap
 167 # define PyUnicode_EncodeDecimal PyUnicodeUCS2_EncodeDecimal
 168 # define PyUnicode_EncodeLatin1 PyUnicodeUCS2_EncodeLatin1
 169 # define PyUnicode_EncodeRawUnicodeEscape PyUnicodeUCS2_EncodeRawUnicodeEscape
 170 # define PyUnicode_EncodeUTF16 PyUnicodeUCS2_EncodeUTF16
 171 # define PyUnicode_EncodeUTF8 PyUnicodeUCS2_EncodeUTF8
 172 # define PyUnicode_EncodeUnicodeEscape PyUnicodeUCS2_EncodeUnicodeEscape
 173 # define PyUnicode_Find PyUnicodeUCS2_Find
 174 # define PyUnicode_Format PyUnicodeUCS2_Format
 175 # define PyUnicode_FromEncodedObject PyUnicodeUCS2_FromEncodedObject
 176 # define PyUnicode_FromObject PyUnicodeUCS2_FromObject
 177 # define PyUnicode_FromOrdinal PyUnicodeUCS2_FromOrdinal
 178 # define PyUnicode_FromUnicode PyUnicodeUCS2_FromUnicode
 179 # define PyUnicode_FromWideChar PyUnicodeUCS2_FromWideChar
 180 # define PyUnicode_GetDefaultEncoding PyUnicodeUCS2_GetDefaultEncoding
 181 # define PyUnicode_GetMax PyUnicodeUCS2_GetMax
 182 # define PyUnicode_GetSize PyUnicodeUCS2_GetSize
 183 # define PyUnicode_Join PyUnicodeUCS2_Join
 184 # define PyUnicode_Replace PyUnicodeUCS2_Replace
 185 # define PyUnicode_Resize PyUnicodeUCS2_Resize
 186 # define PyUnicode_SetDefaultEncoding PyUnicodeUCS2_SetDefaultEncoding
 187 # define PyUnicode_Split PyUnicodeUCS2_Split
 188 # define PyUnicode_Splitlines PyUnicodeUCS2_Splitlines
 189 # define PyUnicode_Tailmatch PyUnicodeUCS2_Tailmatch
 190 # define PyUnicode_Translate PyUnicodeUCS2_Translate
 191 # define PyUnicode_TranslateCharmap PyUnicodeUCS2_TranslateCharmap
 192 # define _PyUnicode_AsDefaultEncodedString _PyUnicodeUCS2_AsDefaultEncodedString
 193 # define _PyUnicode_Fini _PyUnicodeUCS2_Fini
 194 # define _PyUnicode_Init _PyUnicodeUCS2_Init
 195 # define _PyUnicode_IsAlpha _PyUnicodeUCS2_IsAlpha
 196 # define _PyUnicode_IsDecimalDigit _PyUnicodeUCS2_IsDecimalDigit
 197 # define _PyUnicode_IsDigit _PyUnicodeUCS2_IsDigit
 198 # define _PyUnicode_IsLinebreak _PyUnicodeUCS2_IsLinebreak
 199 # define _PyUnicode_IsLowercase _PyUnicodeUCS2_IsLowercase
 200 # define _PyUnicode_IsNumeric _PyUnicodeUCS2_IsNumeric
 201 # define _PyUnicode_IsTitlecase _PyUnicodeUCS2_IsTitlecase
 202 # define _PyUnicode_IsUppercase _PyUnicodeUCS2_IsUppercase
 203 # define _PyUnicode_IsWhitespace _PyUnicodeUCS2_IsWhitespace
 204 # define _PyUnicode_ToDecimalDigit _PyUnicodeUCS2_ToDecimalDigit
 205 # define _PyUnicode_ToDigit _PyUnicodeUCS2_ToDigit
 206 # define _PyUnicode_ToLowercase _PyUnicodeUCS2_ToLowercase
 207 # define _PyUnicode_ToNumeric _PyUnicodeUCS2_ToNumeric
 208 # define _PyUnicode_ToTitlecase _PyUnicodeUCS2_ToTitlecase
 209 # define _PyUnicode_ToUppercase _PyUnicodeUCS2_ToUppercase
 210
 211 #else
 212
 213 # define PyUnicode_AsASCIIString PyUnicodeUCS4_AsASCIIString
 214 # define PyUnicode_AsCharmapString PyUnicodeUCS4_AsCharmapString
 215 # define PyUnicode_AsEncodedString PyUnicodeUCS4_AsEncodedString
 216 # define PyUnicode_AsLatin1String PyUnicodeUCS4_AsLatin1String
 217 # define PyUnicode_AsRawUnicodeEscapeString PyUnicodeUCS4_AsRawUnicodeEscapeString
 218 # define PyUnicode_AsUTF16String PyUnicodeUCS4_AsUTF16String
 219 # define PyUnicode_AsUTF8String PyUnicodeUCS4_AsUTF8String
 220 # define PyUnicode_AsUnicode PyUnicodeUCS4_AsUnicode
 221 # define PyUnicode_AsUnicodeEscapeString PyUnicodeUCS4_AsUnicodeEscapeString
 222 # define PyUnicode_AsWideChar PyUnicodeUCS4_AsWideChar
 223 # define PyUnicode_Compare PyUnicodeUCS4_Compare
 224 # define PyUnicode_Concat PyUnicodeUCS4_Concat
 225 # define PyUnicode_Contains PyUnicodeUCS4_Contains
 226 # define PyUnicode_Count PyUnicodeUCS4_Count
 227 # define PyUnicode_Decode PyUnicodeUCS4_Decode
 228 # define PyUnicode_DecodeASCII PyUnicodeUCS4_DecodeASCII
 229 # define PyUnicode_DecodeCharmap PyUnicodeUCS4_DecodeCharmap
 230 # define PyUnicode_DecodeLatin1 PyUnicodeUCS4_DecodeLatin1
 231 # define PyUnicode_DecodeRawUnicodeEscape PyUnicodeUCS4_DecodeRawUnicodeEscape
 232 # define PyUnicode_DecodeUTF16 PyUnicodeUCS4_DecodeUTF16
 233 # define PyUnicode_DecodeUTF8 PyUnicodeUCS4_DecodeUTF8
 234 # define PyUnicode_DecodeUnicodeEscape PyUnicodeUCS4_DecodeUnicodeEscape
 235 # define PyUnicode_Encode PyUnicodeUCS4_Encode
 236 # define PyUnicode_EncodeASCII PyUnicodeUCS4_EncodeASCII
 237 # define PyUnicode_EncodeCharmap PyUnicodeUCS4_EncodeCharmap
 238 # define PyUnicode_EncodeDecimal PyUnicodeUCS4_EncodeDecimal
 239 # define PyUnicode_EncodeLatin1 PyUnicodeUCS4_EncodeLatin1
 240 # define PyUnicode_EncodeRawUnicodeEscape PyUnicodeUCS4_EncodeRawUnicodeEscape
 241 # define PyUnicode_EncodeUTF16 PyUnicodeUCS4_EncodeUTF16
 242 # define PyUnicode_EncodeUTF8 PyUnicodeUCS4_EncodeUTF8
 243 # define PyUnicode_EncodeUnicodeEscape PyUnicodeUCS4_EncodeUnicodeEscape
 244 # define PyUnicode_Find PyUnicodeUCS4_Find
 245 # define PyUnicode_Format PyUnicodeUCS4_Format
 246 # define PyUnicode_FromEncodedObject PyUnicodeUCS4_FromEncodedObject
 247 # define PyUnicode_FromObject PyUnicodeUCS4_FromObject
 248 # define PyUnicode_FromOrdinal PyUnicodeUCS4_FromOrdinal
 249 # define PyUnicode_FromUnicode PyUnicodeUCS4_FromUnicode
 250 # define PyUnicode_FromWideChar PyUnicodeUCS4_FromWideChar
 251 # define PyUnicode_GetDefaultEncoding PyUnicodeUCS4_GetDefaultEncoding
 252 # define PyUnicode_GetMax PyUnicodeUCS4_GetMax
 253 # define PyUnicode_GetSize PyUnicodeUCS4_GetSize
 254 # define PyUnicode_Join PyUnicodeUCS4_Join
 255 # define PyUnicode_Replace PyUnicodeUCS4_Replace
 256 # define PyUnicode_Resize PyUnicodeUCS4_Resize
 257 # define PyUnicode_SetDefaultEncoding PyUnicodeUCS4_SetDefaultEncoding
 258 # define PyUnicode_Split PyUnicodeUCS4_Split
 259 # define PyUnicode_Splitlines PyUnicodeUCS4_Splitlines
 260 # define PyUnicode_Tailmatch PyUnicodeUCS4_Tailmatch
 261 # define PyUnicode_Translate PyUnicodeUCS4_Translate
 262 # define PyUnicode_TranslateCharmap PyUnicodeUCS4_TranslateCharmap
 263 # define _PyUnicode_AsDefaultEncodedString _PyUnicodeUCS4_AsDefaultEncodedString
 264 # define _PyUnicode_Fini _PyUnicodeUCS4_Fini
 265 # define _PyUnicode_Init _PyUnicodeUCS4_Init
 266 # define _PyUnicode_IsAlpha _PyUnicodeUCS4_IsAlpha
 267 # define _PyUnicode_IsDecimalDigit _PyUnicodeUCS4_IsDecimalDigit
 268 # define _PyUnicode_IsDigit _PyUnicodeUCS4_IsDigit
 269 # define _PyUnicode_IsLinebreak _PyUnicodeUCS4_IsLinebreak
 270 # define _PyUnicode_IsLowercase _PyUnicodeUCS4_IsLowercase
 271 # define _PyUnicode_IsNumeric _PyUnicodeUCS4_IsNumeric
 272 # define _PyUnicode_IsTitlecase _PyUnicodeUCS4_IsTitlecase
 273 # define _PyUnicode_IsUppercase _PyUnicodeUCS4_IsUppercase
 274 # define _PyUnicode_IsWhitespace _PyUnicodeUCS4_IsWhitespace
 275 # define _PyUnicode_ToDecimalDigit _PyUnicodeUCS4_ToDecimalDigit
 276 # define _PyUnicode_ToDigit _PyUnicodeUCS4_ToDigit
 277 # define _PyUnicode_ToLowercase _PyUnicodeUCS4_ToLowercase
 278 # define _PyUnicode_ToNumeric _PyUnicodeUCS4_ToNumeric
 279 # define _PyUnicode_ToTitlecase _PyUnicodeUCS4_ToTitlecase
 280 # define _PyUnicode_ToUppercase _PyUnicodeUCS4_ToUppercase
 281
 282
 283 #endif
 284
 285 /* --- Internal Unicode Operations ---------------------------------------- */
 286
 287 /* If you want Python to use the compiler's wctype.h functions instead
 288    of the ones supplied with Python, define WANT_WCTYPE_FUNCTIONS or
 289    configure Python using --with-ctype-functions.  This reduces the
 290    interpreter's code size. */
 291
 292 #if defined(HAVE_USABLE_WCHAR_T) && defined(WANT_WCTYPE_FUNCTIONS)
 293
 294 #include <wctype.h>
 295
 296 #define Py_UNICODE_ISSPACE(ch) iswspace(ch)
 297
 298 #define Py_UNICODE_ISLOWER(ch) iswlower(ch)
 299 #define Py_UNICODE_ISUPPER(ch) iswupper(ch)
 300 #define Py_UNICODE_ISTITLE(ch) _PyUnicode_IsTitlecase(ch)
 301 #define Py_UNICODE_ISLINEBREAK(ch) _PyUnicode_IsLinebreak(ch)
 302
 303 #define Py_UNICODE_TOLOWER(ch) towlower(ch)
 304 #define Py_UNICODE_TOUPPER(ch) towupper(ch)
 305 #define Py_UNICODE_TOTITLE(ch) _PyUnicode_ToTitlecase(ch)
 306
 307 #define Py_UNICODE_ISDECIMAL(ch) _PyUnicode_IsDecimalDigit(ch)
 308 #define Py_UNICODE_ISDIGIT(ch) _PyUnicode_IsDigit(ch)
 309 #define Py_UNICODE_ISNUMERIC(ch) _PyUnicode_IsNumeric(ch)
 310
 311 #define Py_UNICODE_TODECIMAL(ch) _PyUnicode_ToDecimalDigit(ch)
 312 #define Py_UNICODE_TODIGIT(ch) _PyUnicode_ToDigit(ch)
 313 #define Py_UNICODE_TONUMERIC(ch) _PyUnicode_ToNumeric(ch)
 314
 315 #define Py_UNICODE_ISALPHA(ch) iswalpha(ch)
 316
 317 #else
 318
 319 #define Py_UNICODE_ISSPACE(ch) _PyUnicode_IsWhitespace(ch)
 320
 321 #define Py_UNICODE_ISLOWER(ch) _PyUnicode_IsLowercase(ch)
 322 #define Py_UNICODE_ISUPPER(ch) _PyUnicode_IsUppercase(ch)
 323 #define Py_UNICODE_ISTITLE(ch) _PyUnicode_IsTitlecase(ch)
 324 #define Py_UNICODE_ISLINEBREAK(ch) _PyUnicode_IsLinebreak(ch)
 325
 326 #define Py_UNICODE_TOLOWER(ch) _PyUnicode_ToLowercase(ch)
 327 #define Py_UNICODE_TOUPPER(ch) _PyUnicode_ToUppercase(ch)
 328 #define Py_UNICODE_TOTITLE(ch) _PyUnicode_ToTitlecase(ch)
 329
 330 #define Py_UNICODE_ISDECIMAL(ch) _PyUnicode_IsDecimalDigit(ch)
 331 #define Py_UNICODE_ISDIGIT(ch) _PyUnicode_IsDigit(ch)
 332 #define Py_UNICODE_ISNUMERIC(ch) _PyUnicode_IsNumeric(ch)
 333
 334 #define Py_UNICODE_TODECIMAL(ch) _PyUnicode_ToDecimalDigit(ch)
 335 #define Py_UNICODE_TODIGIT(ch) _PyUnicode_ToDigit(ch)
 336 #define Py_UNICODE_TONUMERIC(ch) _PyUnicode_ToNumeric(ch)
 337
 338 #define Py_UNICODE_ISALPHA(ch) _PyUnicode_IsAlpha(ch)
 339
 340 #endif
 341
 342 #define Py_UNICODE_ISALNUM(ch) \
 343        (Py_UNICODE_ISALPHA(ch) || \
 344         Py_UNICODE_ISDECIMAL(ch) || \
 345         Py_UNICODE_ISDIGIT(ch) || \
 346         Py_UNICODE_ISNUMERIC(ch))
 347
 348 #define Py_UNICODE_COPY(target, source, length)\
 349     (memcpy((target), (source), (length)*sizeof(Py_UNICODE)))
 350
 351 #define Py_UNICODE_FILL(target, value, length) do\
 352     {int i; for (i = 0; i < (length); i++) (target)[i] = (value);}\
 353     while (0)
 354
 355 #define Py_UNICODE_MATCH(string, offset, substring)\
 356     ((*((string)->str + (offset)) == *((substring)->str)) &&\
 357      !memcmp((string)->str + (offset), (substring)->str,\
 358              (substring)->length*sizeof(Py_UNICODE)))
 359
 360 #ifdef __cplusplus
 361 extern "C" {
 362 #endif
 363
 364 /* --- Unicode Type ------------------------------------------------------- */
 365
 366 typedef struct {
 367     PyObject_HEAD
 368     int length;                 /* Length of raw Unicode data in buffer */
 369     Py_UNICODE *str;            /* Raw Unicode buffer */
 370     long hash;                  /* Hash value; -1 if not set */
 371     PyObject *defenc;           /* (Default) Encoded version as Python
 372                                    string, or NULL; this is used for
 373                                    implementing the buffer protocol */
 374 } PyUnicodeObject;
 375
 376 PyAPI_DATA(PyTypeObject) PyUnicode_Type;
 377
 378 #define PyUnicode_Check(op) PyObject_TypeCheck(op, &PyUnicode_Type)
 379 #define PyUnicode_CheckExact(op) ((op)->ob_type == &PyUnicode_Type)
 380
 381 /* Fast access macros */
 382 #define PyUnicode_GET_SIZE(op) \
 383         (((PyUnicodeObject *)(op))->length)
 384 #define PyUnicode_GET_DATA_SIZE(op) \
 385         (((PyUnicodeObject *)(op))->length * sizeof(Py_UNICODE))
 386 #define PyUnicode_AS_UNICODE(op) \
 387         (((PyUnicodeObject *)(op))->str)
 388 #define PyUnicode_AS_DATA(op) \
 389         ((const char *)((PyUnicodeObject *)(op))->str)
 390
 391 /* --- Constants ---------------------------------------------------------- */
 392
 393 /* This Unicode character will be used as replacement character during
 394    decoding if the errors argument is set to "replace". Note: the
 395    Unicode character U+FFFD is the official REPLACEMENT CHARACTER in
 396    Unicode 3.0. */
 397
 398 #define Py_UNICODE_REPLACEMENT_CHARACTER ((Py_UNICODE) 0xFFFD)
 399
 400 /* === Public API ========================================================= */
 401
 402 /* --- Plain Py_UNICODE --------------------------------------------------- */
 403
 404 /* Create a Unicode Object from the Py_UNICODE buffer u of the given
 405    size.
 406
 407    u may be NULL which causes the contents to be undefined. It is the
 408    user's responsibility to fill in the needed data afterwards. Note
 409    that modifying the Unicode object contents after construction is
 410    only allowed if u was set to NULL.
 411
 412    The buffer is copied into the new object. */
 413
 414 PyAPI_FUNC(PyObject*) PyUnicode_FromUnicode(
 415     const Py_UNICODE *u,        /* Unicode buffer */
 416     int size                    /* size of buffer */
 417     );
 418
 419 /* Return a read-only pointer to the Unicode object's internal
 420    Py_UNICODE buffer. */
 421
 422 PyAPI_FUNC(Py_UNICODE *) PyUnicode_AsUnicode(
 423     PyObject *unicode           /* Unicode object */
 424     );
 425
 426 /* Get the length of the Unicode object. */
 427
 428 PyAPI_FUNC(int) PyUnicode_GetSize(
 429     PyObject *unicode           /* Unicode object */
 430     );
 431
 432 /* Get the maximum ordinal for a Unicode character. */
 433 PyAPI_FUNC(Py_UNICODE) PyUnicode_GetMax(void);
 434
 435 /* Resize an already allocated Unicode object to the new size length.
 436
 437    *unicode is modified to point to the new (resized) object and 0
 438    returned on success.
 439
 440    This API may only be called by the function which also called the
 441    Unicode constructor. The refcount on the object must be 1. Otherwise,
 442    an error is returned.
 443
 444    Error handling is implemented as follows: an exception is set, -1
 445    is returned and *unicode left untouched.
 446
 447 */
 448
 449 PyAPI_FUNC(int) PyUnicode_Resize(
 450     PyObject **unicode,         /* Pointer to the Unicode object */
 451     int length                  /* New length */
 452     );
 453
 454 /* Coerce obj to an Unicode object and return a reference with
 455    *incremented* refcount.
 456
 457    Coercion is done in the following way:
 458
 459    1. String and other char buffer compatible objects are decoded
 460       under the assumptions that they contain data using the current
 461       default encoding. Decoding is done in "strict" mode.
 462
 463    2. All other objects (including Unicode objects) raise an
 464       exception.
 465
 466    The API returns NULL in case of an error. The caller is responsible
 467    for decref'ing the returned objects.
 468
 469 */
 470
 471 PyAPI_FUNC(PyObject*) PyUnicode_FromEncodedObject(
 472     register PyObject *obj,     /* Object */
 473     const char *encoding,       /* encoding */
 474     const char *errors          /* error handling */
 475     );
 476
 477 /* Coerce obj to an Unicode object and return a reference with
 478    *incremented* refcount.
 479
 480    Unicode objects are passed back as-is (subclasses are converted to
 481    true Unicode objects), all other objects are delegated to
 482    PyUnicode_FromEncodedObject(obj, NULL, "strict") which results in
 483    using the default encoding as basis for decoding the object.
 484
 485    The API returns NULL in case of an error. The caller is responsible
 486    for decref'ing the returned objects.
 487
 488 */
 489
 490 PyAPI_FUNC(PyObject*) PyUnicode_FromObject(
 491     register PyObject *obj      /* Object */
 492     );
 493
 494 /* --- wchar_t support for platforms which support it --------------------- */
 495
 496 #ifdef HAVE_WCHAR_H
 497
 498 /* Create a Unicode Object from the whcar_t buffer w of the given
 499    size.
 500
 501    The buffer is copied into the new object. */
 502
 503 PyAPI_FUNC(PyObject*) PyUnicode_FromWideChar(
 504     register const wchar_t *w,  /* wchar_t buffer */
 505     int size                    /* size of buffer */
 506     );
 507
 508 /* Copies the Unicode Object contents into the whcar_t buffer w.  At
 509    most size wchar_t characters are copied.
 510
 511    Returns the number of wchar_t characters copied or -1 in case of an
 512    error. */
 513
 514 PyAPI_FUNC(int) PyUnicode_AsWideChar(
 515     PyUnicodeObject *unicode,   /* Unicode object */
 516     register wchar_t *w,        /* wchar_t buffer */
 517     int size                    /* size of buffer */
 518     );
 519
 520 #endif
 521
 522 /* --- Unicode ordinals --------------------------------------------------- */
 523
 524 /* Create a Unicode Object from the given Unicode code point ordinal.
 525
 526    The ordinal must be in range(0x10000) on narrow Python builds
 527    (UCS2), and range(0x110000) on wide builds (UCS4). A ValueError is
 528    raised in case it is not.
 529
 530 */
 531
 532 PyAPI_FUNC(PyObject*) PyUnicode_FromOrdinal(int ordinal);
 533
 534 /* === Builtin Codecs =====================================================
 535
 536    Many of these APIs take two arguments encoding and errors. These
 537    parameters encoding and errors have the same semantics as the ones
 538    of the builtin unicode() API.
 539
 540    Setting encoding to NULL causes the default encoding to be used.
 541
 542    Error handling is set by errors which may also be set to NULL
 543    meaning to use the default handling defined for the codec. Default
 544    error handling for all builtin codecs is "strict" (ValueErrors are
 545    raised).
 546
 547    The codecs all use a similar interface. Only deviation from the
 548    generic ones are documented.
 549
 550 */
 551
 552 /* --- Manage the default encoding ---------------------------------------- */
 553
 554 /* Return a Python string holding the default encoded value of the
 555    Unicode object.
 556
 557    The resulting string is cached in the Unicode object for subsequent
 558    usage by this function. The cached version is needed to implement
 559    the character buffer interface and will live (at least) as long as
 560    the Unicode object itself.
 561
 562    The refcount of the string is *not* incremented.
 563
 564    *** Exported for internal use by the interpreter only !!! ***
 565
 566 */
 567
 568 PyAPI_FUNC(PyObject *) _PyUnicode_AsDefaultEncodedString(
 569     PyObject *, const char *);
 570
 571 /* Returns the currently active default encoding.
 572
 573    The default encoding is currently implemented as run-time settable
 574    process global.  This may change in future versions of the
 575    interpreter to become a parameter which is managed on a per-thread
 576    basis.
 577
 578  */
 579
 580 PyAPI_FUNC(const char*) PyUnicode_GetDefaultEncoding(void);
 581
 582 /* Sets the currently active default encoding.
 583
 584    Returns 0 on success, -1 in case of an error.
 585
 586  */
 587
 588 PyAPI_FUNC(int) PyUnicode_SetDefaultEncoding(
 589     const char *encoding        /* Encoding name in standard form */
 590     );
 591
 592 /* --- Generic Codecs ----------------------------------------------------- */
 593
 594 /* Create a Unicode object by decoding the encoded string s of the
 595    given size. */
 596
 597 PyAPI_FUNC(PyObject*) PyUnicode_Decode(
 598     const char *s,              /* encoded string */
 599     int size,                   /* size of buffer */
 600     const char *encoding,       /* encoding */
 601     const char *errors          /* error handling */
 602     );
 603
 604 /* Encodes a Py_UNICODE buffer of the given size and returns a
 605    Python string object. */
 606
 607 PyAPI_FUNC(PyObject*) PyUnicode_Encode(
 608     const Py_UNICODE *s,        /* Unicode char buffer */
 609     int size,                   /* number of Py_UNICODE chars to encode */
 610     const char *encoding,       /* encoding */
 611     const char *errors          /* error handling */
 612     );
 613
 614 /* Encodes a Unicode object and returns the result as Python string
 615    object. */
 616
 617 PyAPI_FUNC(PyObject*) PyUnicode_AsEncodedString(
 618     PyObject *unicode,          /* Unicode object */
 619     const char *encoding,       /* encoding */
 620     const char *errors          /* error handling */
 621     );
 622
 623 /* --- UTF-7 Codecs ------------------------------------------------------- */
 624
 625 PyAPI_FUNC(PyObject*) PyUnicode_DecodeUTF7(
 626     const char *string,         /* UTF-7 encoded string */
 627     int length,                 /* size of string */
 628     const char *errors          /* error handling */
 629     );
 630
 631 PyAPI_FUNC(PyObject*) PyUnicode_EncodeUTF7(
 632     const Py_UNICODE *data,     /* Unicode char buffer */
 633     int length,                 /* number of Py_UNICODE chars to encode */
 634     int encodeSetO,             /* force the encoder to encode characters in
 635                                    Set O, as described in RFC2152 */
 636     int encodeWhiteSpace,       /* force the encoder to encode space, tab,
 637                                    carriage return and linefeed characters */
 638     const char *errors          /* error handling */
 639     );
 640
 641 /* --- UTF-8 Codecs ------------------------------------------------------- */
 642
 643 PyAPI_FUNC(PyObject*) PyUnicode_DecodeUTF8(
 644     const char *string,         /* UTF-8 encoded string */
 645     int length,                 /* size of string */
 646     const char *errors          /* error handling */
 647     );
 648
 649 PyAPI_FUNC(PyObject*) PyUnicode_AsUTF8String(
 650     PyObject *unicode           /* Unicode object */
 651     );
 652
 653 PyAPI_FUNC(PyObject*) PyUnicode_EncodeUTF8(
 654     const Py_UNICODE *data,     /* Unicode char buffer */
 655     int length,                 /* number of Py_UNICODE chars to encode */
 656     const char *errors          /* error handling */
 657     );
 658
 659 /* --- UTF-16 Codecs ------------------------------------------------------ */
 660
 661 /* Decodes length bytes from a UTF-16 encoded buffer string and returns
 662    the corresponding Unicode object.
 663
 664    errors (if non-NULL) defines the error handling. It defaults
 665    to "strict".
 666
 667    If byteorder is non-NULL, the decoder starts decoding using the
 668    given byte order:
 669
 670         *byteorder == -1: little endian
 671         *byteorder == 0:  native order
 672         *byteorder == 1:  big endian
 673
 674    In native mode, the first two bytes of the stream are checked for a
 675    BOM mark. If found, the BOM mark is analysed, the byte order
 676    adjusted and the BOM skipped.  In the other modes, no BOM mark
 677    interpretation is done. After completion, *byteorder is set to the
 678    current byte order at the end of input data.
 679
 680    If byteorder is NULL, the codec starts in native order mode.
 681
 682 */
 683
 684 PyAPI_FUNC(PyObject*) PyUnicode_DecodeUTF16(
 685     const char *string,         /* UTF-16 encoded string */
 686     int length,                 /* size of string */
 687     const char *errors,         /* error handling */
 688     int *byteorder              /* pointer to byteorder to use
 689                                    0=native;-1=LE,1=BE; updated on
 690                                    exit */
 691     );
 692
 693 /* Returns a Python string using the UTF-16 encoding in native byte
 694    order. The string always starts with a BOM mark.  */
 695
 696 PyAPI_FUNC(PyObject*) PyUnicode_AsUTF16String(
 697     PyObject *unicode           /* Unicode object */
 698     );
 699
 700 /* Returns a Python string object holding the UTF-16 encoded value of
 701    the Unicode data.
 702
 703    If byteorder is not 0, output is written according to the following
 704    byte order:
 705
 706    byteorder == -1: little endian
 707    byteorder == 0:  native byte order (writes a BOM mark)
 708    byteorder == 1:  big endian
 709
 710    If byteorder is 0, the output string will always start with the
 711    Unicode BOM mark (U+FEFF). In the other two modes, no BOM mark is
 712    prepended.
 713
 714    Note that Py_UNICODE data is being interpreted as UTF-16 reduced to
 715    UCS-2. This trick makes it possible to add full UTF-16 capabilities
 716    at a later point without compromising the APIs.
 717
 718 */
 719
 720 PyAPI_FUNC(PyObject*) PyUnicode_EncodeUTF16(
 721     const Py_UNICODE *data,     /* Unicode char buffer */
 722     int length,                 /* number of Py_UNICODE chars to encode */
 723     const char *errors,         /* error handling */
 724     int byteorder               /* byteorder to use 0=BOM+native;-1=LE,1=BE */
 725     );
 726
 727 /* --- Unicode-Escape Codecs ---------------------------------------------- */
 728
 729 PyAPI_FUNC(PyObject*) PyUnicode_DecodeUnicodeEscape(
 730     const char *string,         /* Unicode-Escape encoded string */
 731     int length,                 /* size of string */
 732     const char *errors          /* error handling */
 733     );
 734
 735 PyAPI_FUNC(PyObject*) PyUnicode_AsUnicodeEscapeString(
 736     PyObject *unicode           /* Unicode object */
 737     );
 738
 739 PyAPI_FUNC(PyObject*) PyUnicode_EncodeUnicodeEscape(
 740     const Py_UNICODE *data,     /* Unicode char buffer */
 741     int length                  /* Number of Py_UNICODE chars to encode */
 742     );
 743
 744 /* --- Raw-Unicode-Escape Codecs ------------------------------------------ */
 745
 746 PyAPI_FUNC(PyObject*) PyUnicode_DecodeRawUnicodeEscape(
 747     const char *string,         /* Raw-Unicode-Escape encoded string */
 748     int length,                 /* size of string */
 749     const char *errors          /* error handling */
 750     );
 751
 752 PyAPI_FUNC(PyObject*) PyUnicode_AsRawUnicodeEscapeString(
 753     PyObject *unicode           /* Unicode object */
 754     );
 755
 756 PyAPI_FUNC(PyObject*) PyUnicode_EncodeRawUnicodeEscape(
 757     const Py_UNICODE *data,     /* Unicode char buffer */
 758     int length                  /* Number of Py_UNICODE chars to encode */
 759     );
 760
 761 /* --- Latin-1 Codecs -----------------------------------------------------
 762
 763    Note: Latin-1 corresponds to the first 256 Unicode ordinals.
 764
 765 */
 766
 767 PyAPI_FUNC(PyObject*) PyUnicode_DecodeLatin1(
 768     const char *string,         /* Latin-1 encoded string */
 769     int length,                 /* size of string */
 770     const char *errors          /* error handling */
 771     );
 772
 773 PyAPI_FUNC(PyObject*) PyUnicode_AsLatin1String(
 774     PyObject *unicode           /* Unicode object */
 775     );
 776
 777 PyAPI_FUNC(PyObject*) PyUnicode_EncodeLatin1(
 778     const Py_UNICODE *data,     /* Unicode char buffer */
 779     int length,                 /* Number of Py_UNICODE chars to encode */
 780     const char *errors          /* error handling */
 781     );
 782
 783 /* --- ASCII Codecs -------------------------------------------------------
 784
 785    Only 7-bit ASCII data is excepted. All other codes generate errors.
 786
 787 */
 788
 789 PyAPI_FUNC(PyObject*) PyUnicode_DecodeASCII(
 790     const char *string,         /* ASCII encoded string */
 791     int length,                 /* size of string */
 792     const char *errors          /* error handling */
 793     );
 794
 795 PyAPI_FUNC(PyObject*) PyUnicode_AsASCIIString(
 796     PyObject *unicode           /* Unicode object */
 797     );
 798
 799 PyAPI_FUNC(PyObject*) PyUnicode_EncodeASCII(
 800     const Py_UNICODE *data,     /* Unicode char buffer */
 801     int length,                 /* Number of Py_UNICODE chars to encode */
 802     const char *errors          /* error handling */
 803     );
 804
 805 /* --- Character Map Codecs -----------------------------------------------
 806
 807    This codec uses mappings to encode and decode characters.
 808
 809    Decoding mappings must map single string characters to single
 810    Unicode characters, integers (which are then interpreted as Unicode
 811    ordinals) or None (meaning "undefined mapping" and causing an
 812    error).
 813
 814    Encoding mappings must map single Unicode characters to single
 815    string characters, integers (which are then interpreted as Latin-1
 816    ordinals) or None (meaning "undefined mapping" and causing an
 817    error).
 818
 819    If a character lookup fails with a LookupError, the character is
 820    copied as-is meaning that its ordinal value will be interpreted as
 821    Unicode or Latin-1 ordinal resp. Because of this mappings only need
 822    to contain those mappings which map characters to different code
 823    points.
 824
 825 */
 826
 827 PyAPI_FUNC(PyObject*) PyUnicode_DecodeCharmap(
 828     const char *string,         /* Encoded string */
 829     int length,                 /* size of string */
 830     PyObject *mapping,          /* character mapping
 831                                    (char ordinal -> unicode ordinal) */
 832     const char *errors          /* error handling */
 833     );
 834
 835 PyAPI_FUNC(PyObject*) PyUnicode_AsCharmapString(
 836     PyObject *unicode,          /* Unicode object */
 837     PyObject *mapping           /* character mapping
 838                                    (unicode ordinal -> char ordinal) */
 839     );
 840
 841 PyAPI_FUNC(PyObject*) PyUnicode_EncodeCharmap(
 842     const Py_UNICODE *data,     /* Unicode char buffer */
 843     int length,                 /* Number of Py_UNICODE chars to encode */
 844     PyObject *mapping,          /* character mapping
 845                                    (unicode ordinal -> char ordinal) */
 846     const char *errors          /* error handling */
 847     );
 848
 849 /* Translate a Py_UNICODE buffer of the given length by applying a
 850    character mapping table to it and return the resulting Unicode
 851    object.
 852
 853    The mapping table must map Unicode ordinal integers to Unicode
 854    ordinal integers or None (causing deletion of the character).
 855
 856    Mapping tables may be dictionaries or sequences. Unmapped character
 857    ordinals (ones which cause a LookupError) are left untouched and
 858    are copied as-is.
 859
 860 */
 861
 862 PyAPI_FUNC(PyObject *) PyUnicode_TranslateCharmap(
 863     const Py_UNICODE *data,     /* Unicode char buffer */
 864     int length,                 /* Number of Py_UNICODE chars to encode */
 865     PyObject *table,            /* Translate table */
 866     const char *errors          /* error handling */
 867     );
 868
 869 #ifdef MS_WIN32
 870
 871 /* --- MBCS codecs for Windows -------------------------------------------- */
 872
 873 PyAPI_FUNC(PyObject*) PyUnicode_DecodeMBCS(
 874     const char *string,         /* MBCS encoded string */
 875     int length,                 /* size of string */
 876     const char *errors          /* error handling */
 877     );
 878
 879 PyAPI_FUNC(PyObject*) PyUnicode_AsMBCSString(
 880     PyObject *unicode           /* Unicode object */
 881     );
 882
 883 PyAPI_FUNC(PyObject*) PyUnicode_EncodeMBCS(
 884     const Py_UNICODE *data,     /* Unicode char buffer */
 885     int length,                 /* Number of Py_UNICODE chars to encode */
 886     const char *errors          /* error handling */
 887     );
 888
 889 #endif /* MS_WIN32 */
 890
 891 /* --- Decimal Encoder ---------------------------------------------------- */
 892
 893 /* Takes a Unicode string holding a decimal value and writes it into
 894    an output buffer using standard ASCII digit codes.
 895
 896    The output buffer has to provide at least length+1 bytes of storage
 897    area. The output string is 0-terminated.
 898
 899    The encoder converts whitespace to ' ', decimal characters to their
 900    corresponding ASCII digit and all other Latin-1 characters except
 901    \0 as-is. Characters outside this range (Unicode ordinals 1-256)
 902    are treated as errors. This includes embedded NULL bytes.
 903
 904    Error handling is defined by the errors argument:
 905
 906       NULL or "strict": raise a ValueError
 907       "ignore": ignore the wrong characters (these are not copied to the
 908                 output buffer)
 909       "replace": replaces illegal characters with '?'
 910
 911    Returns 0 on success, -1 on failure.
 912
 913 */
 914
 915 PyAPI_FUNC(int) PyUnicode_EncodeDecimal(
 916     Py_UNICODE *s,              /* Unicode buffer */
 917     int length,                 /* Number of Py_UNICODE chars to encode */
 918     char *output,               /* Output buffer; must have size >= length */
 919     const char *errors          /* error handling */
 920     );
 921
 922 /* --- Methods & Slots ----------------------------------------------------
 923
 924    These are capable of handling Unicode objects and strings on input
 925    (we refer to them as strings in the descriptions) and return
 926    Unicode objects or integers as apporpriate. */
 927
 928 /* Concat two strings giving a new Unicode string. */
 929
 930 PyAPI_FUNC(PyObject*) PyUnicode_Concat(
 931     PyObject *left,             /* Left string */
 932     PyObject *right             /* Right string */
 933     );
 934
 935 /* Split a string giving a list of Unicode strings.
 936
 937    If sep is NULL, splitting will be done at all whitespace
 938    substrings. Otherwise, splits occur at the given separator.
 939
 940    At most maxsplit splits will be done. If negative, no limit is set.
 941
 942    Separators are not included in the resulting list.
 943
 944 */
 945
 946 PyAPI_FUNC(PyObject*) PyUnicode_Split(
 947     PyObject *s,                /* String to split */
 948     PyObject *sep,              /* String separator */
 949     int maxsplit                /* Maxsplit count */
 950     );
 951
 952 /* Dito, but split at line breaks.
 953
 954    CRLF is considered to be one line break. Line breaks are not
 955    included in the resulting list. */
 956
 957 PyAPI_FUNC(PyObject*) PyUnicode_Splitlines(
 958     PyObject *s,                /* String to split */
 959     int keepends                /* If true, line end markers are included */
 960     );
 961
 962 /* Translate a string by applying a character mapping table to it and
 963    return the resulting Unicode object.
 964
 965    The mapping table must map Unicode ordinal integers to Unicode
 966    ordinal integers or None (causing deletion of the character).
 967
 968    Mapping tables may be dictionaries or sequences. Unmapped character
 969    ordinals (ones which cause a LookupError) are left untouched and
 970    are copied as-is.
 971
 972 */
 973
 974 PyAPI_FUNC(PyObject *) PyUnicode_Translate(
 975     PyObject *str,              /* String */
 976     PyObject *table,            /* Translate table */
 977     const char *errors          /* error handling */
 978     );
 979
 980 /* Join a sequence of strings using the given separator and return
 981    the resulting Unicode string. */
 982
 983 PyAPI_FUNC(PyObject*) PyUnicode_Join(
 984     PyObject *separator,        /* Separator string */
 985     PyObject *seq               /* Sequence object */
 986     );
 987
 988 /* Return 1 if substr matches str[start:end] at the given tail end, 0
 989    otherwise. */
 990
 991 PyAPI_FUNC(int) PyUnicode_Tailmatch(
 992     PyObject *str,              /* String */
 993     PyObject *substr,           /* Prefix or Suffix string */
 994     int start,                  /* Start index */
 995     int end,                    /* Stop index */
 996     int direction               /* Tail end: -1 prefix, +1 suffix */
 997     );
 998
 999 /* Return the first position of substr in str[start:end] using the
1000    given search direction or -1 if not found. -2 is returned in case
1001    an error occurred and an exception is set. */
1002
1003 PyAPI_FUNC(int) PyUnicode_Find(
1004     PyObject *str,              /* String */
1005     PyObject *substr,           /* Substring to find */
1006     int start,                  /* Start index */
1007     int end,                    /* Stop index */
1008     int direction               /* Find direction: +1 forward, -1 backward */
1009     );
1010
1011 /* Count the number of occurrences of substr in str[start:end]. */
1012
1013 PyAPI_FUNC(int) PyUnicode_Count(
1014     PyObject *str,              /* String */
1015     PyObject *substr,           /* Substring to count */
1016     int start,                  /* Start index */
1017     int end                     /* Stop index */
1018     );
1019
1020 /* Replace at most maxcount occurrences of substr in str with replstr
1021    and return the resulting Unicode object. */
1022
1023 PyAPI_FUNC(PyObject *) PyUnicode_Replace(
1024     PyObject *str,              /* String */
1025     PyObject *substr,           /* Substring to find */
1026     PyObject *replstr,          /* Substring to replace */
1027     int maxcount                /* Max. number of replacements to apply;
1028                                    -1 = all */
1029     );
1030
1031 /* Compare two strings and return -1, 0, 1 for less than, equal,
1032    greater than resp. */
1033
1034 PyAPI_FUNC(int) PyUnicode_Compare(
1035     PyObject *left,             /* Left string */
1036     PyObject *right             /* Right string */
1037     );
1038
1039 /* Apply a argument tuple or dictionary to a format string and return
1040    the resulting Unicode string. */
1041
1042 PyAPI_FUNC(PyObject *) PyUnicode_Format(
1043     PyObject *format,           /* Format string */
1044     PyObject *args              /* Argument tuple or dictionary */
1045     );
1046
1047 /* Checks whether element is contained in container and return 1/0
1048    accordingly.
1049
1050    element has to coerce to an one element Unicode string. -1 is
1051    returned in case of an error. */
1052
1053 PyAPI_FUNC(int) PyUnicode_Contains(
1054     PyObject *container,        /* Container string */
1055     PyObject *element           /* Element string */
1056     );
1057
1058 /* Externally visible for str.strip(unicode) */
1059 PyAPI_FUNC(PyObject *) _PyUnicode_XStrip(
1060     PyUnicodeObject *self,
1061     int striptype,
1062     PyObject *sepobj
1063     );
1064
1065 /* === Characters Type APIs =============================================== */
1066
1067 /* These should not be used directly. Use the Py_UNICODE_IS* and
1068    Py_UNICODE_TO* macros instead.
1069
1070    These APIs are implemented in Objects/unicodectype.c.
1071
1072 */
1073
1074 PyAPI_FUNC(int) _PyUnicode_IsLowercase(
1075     Py_UNICODE ch       /* Unicode character */
1076     );
1077
1078 PyAPI_FUNC(int) _PyUnicode_IsUppercase(
1079     Py_UNICODE ch       /* Unicode character */
1080     );
1081
1082 PyAPI_FUNC(int) _PyUnicode_IsTitlecase(
1083     Py_UNICODE ch       /* Unicode character */
1084     );
1085
1086 PyAPI_FUNC(int) _PyUnicode_IsWhitespace(
1087     Py_UNICODE ch       /* Unicode character */
1088     );
1089
1090 PyAPI_FUNC(int) _PyUnicode_IsLinebreak(
1091     Py_UNICODE ch       /* Unicode character */
1092     );
1093
1094 PyAPI_FUNC(Py_UNICODE) _PyUnicode_ToLowercase(
1095     Py_UNICODE ch       /* Unicode character */
1096     );
1097
1098 PyAPI_FUNC(Py_UNICODE) _PyUnicode_ToUppercase(
1099     Py_UNICODE ch       /* Unicode character */
1100     );
1101
1102 PyAPI_FUNC(Py_UNICODE) _PyUnicode_ToTitlecase(
1103     Py_UNICODE ch       /* Unicode character */
1104     );
1105
1106 PyAPI_FUNC(int) _PyUnicode_ToDecimalDigit(
1107     Py_UNICODE ch       /* Unicode character */
1108     );
1109
1110 PyAPI_FUNC(int) _PyUnicode_ToDigit(
1111     Py_UNICODE ch       /* Unicode character */
1112     );
1113
1114 PyAPI_FUNC(double) _PyUnicode_ToNumeric(
1115     Py_UNICODE ch       /* Unicode character */
1116     );
1117
1118 PyAPI_FUNC(int) _PyUnicode_IsDecimalDigit(
1119     Py_UNICODE ch       /* Unicode character */
1120     );
1121
1122 PyAPI_FUNC(int) _PyUnicode_IsDigit(
1123     Py_UNICODE ch       /* Unicode character */
1124     );
1125
1126 PyAPI_FUNC(int) _PyUnicode_IsNumeric(
1127     Py_UNICODE ch       /* Unicode character */
1128     );
1129
1130 PyAPI_FUNC(int) _PyUnicode_IsAlpha(
1131     Py_UNICODE ch       /* Unicode character */
1132     );
1133
1134 #ifdef __cplusplus
1135 }
1136 #endif
1137 #endif /* Py_USING_UNICODE */
1138 #endif /* !Py_UNICODEOBJECT_H */