third_party/cython/src/Cython/Includes/cpython/unicode.pxd

   1 cdef extern from *:
   2     # Return true if the object o is a Unicode object or an instance
   3     # of a Unicode subtype. Changed in version 2.2: Allowed subtypes
   4     # to be accepted.
   5     bint PyUnicode_Check(object o)
   6
   7     # Return true if the object o is a Unicode object, but not an
   8     # instance of a subtype. New in version 2.2.
   9     bint PyUnicode_CheckExact(object o)
  10
  11     # Return the size of the object. o has to be a PyUnicodeObject
  12     # (not checked).
  13     Py_ssize_t PyUnicode_GET_SIZE(object o)
  14
  15     # Return the size of the object's internal buffer in bytes. o has
  16     # to be a PyUnicodeObject (not checked).
  17     Py_ssize_t PyUnicode_GET_DATA_SIZE(object o)
  18
  19     # Return a pointer to the internal Py_UNICODE buffer of the
  20     # object. o has to be a PyUnicodeObject (not checked).
  21     Py_UNICODE* PyUnicode_AS_UNICODE(object o)
  22
  23     # Return a pointer to the internal buffer of the object. o has to
  24     # be a PyUnicodeObject (not checked).
  25     char* PyUnicode_AS_DATA(object o)
  26
  27     # Return 1 or 0 depending on whether ch is a whitespace character.
  28     bint Py_UNICODE_ISSPACE(Py_UNICODE ch)
  29
  30     # Return 1 or 0 depending on whether ch is a lowercase character.
  31     bint Py_UNICODE_ISLOWER(Py_UNICODE ch)
  32
  33     # Return 1 or 0 depending on whether ch is an uppercase character.
  34     bint Py_UNICODE_ISUPPER(Py_UNICODE ch)
  35
  36     # Return 1 or 0 depending on whether ch is a titlecase character.
  37     bint Py_UNICODE_ISTITLE(Py_UNICODE ch)
  38
  39     # Return 1 or 0 depending on whether ch is a linebreak character.
  40     bint Py_UNICODE_ISLINEBREAK(Py_UNICODE ch)
  41
  42     # Return 1 or 0 depending on whether ch is a decimal character.
  43     bint Py_UNICODE_ISDECIMAL(Py_UNICODE ch)
  44
  45     # Return 1 or 0 depending on whether ch is a digit character.
  46     bint Py_UNICODE_ISDIGIT(Py_UNICODE ch)
  47
  48     # Return 1 or 0 depending on whether ch is a numeric character.
  49     bint Py_UNICODE_ISNUMERIC(Py_UNICODE ch)
  50
  51     # Return 1 or 0 depending on whether ch is an alphabetic character.
  52     bint Py_UNICODE_ISALPHA(Py_UNICODE ch)
  53
  54     # Return 1 or 0 depending on whether ch is an alphanumeric character.
  55     bint Py_UNICODE_ISALNUM(Py_UNICODE ch)
  56
  57     # Return the character ch converted to lower case.
  58     Py_UNICODE Py_UNICODE_TOLOWER(Py_UNICODE ch)
  59
  60     # Return the character ch converted to upper case.
  61     Py_UNICODE Py_UNICODE_TOUPPER(Py_UNICODE ch)
  62
  63     # Return the character ch converted to title case.
  64     Py_UNICODE Py_UNICODE_TOTITLE(Py_UNICODE ch)
  65
  66     # Return the character ch converted to a decimal positive
  67     # integer. Return -1 if this is not possible. This macro does not
  68     # raise exceptions.
  69     int Py_UNICODE_TODECIMAL(Py_UNICODE ch)
  70
  71     # Return the character ch converted to a single digit
  72     # integer. Return -1 if this is not possible. This macro does not
  73     # raise exceptions.
  74     int Py_UNICODE_TODIGIT(Py_UNICODE ch)
  75
  76     # Return the character ch converted to a double. Return -1.0 if
  77     # this is not possible. This macro does not raise exceptions.
  78     double Py_UNICODE_TONUMERIC(Py_UNICODE ch)
  79
  80     # To create Unicode objects and access their basic sequence
  81     # properties, use these APIs:
  82
  83     # Create a Unicode Object from the Py_UNICODE buffer u of the
  84     # given size. u may be NULL which causes the contents to be
  85     # undefined. It is the user's responsibility to fill in the needed
  86     # data. The buffer is copied into the new object. If the buffer is
  87     # not NULL, the return value might be a shared object. Therefore,
  88     # modification of the resulting Unicode object is only allowed
  89     # when u is NULL.
  90     object PyUnicode_FromUnicode(Py_UNICODE *u, Py_ssize_t size)
  91
  92     # Create a Unicode Object from the given Unicode code point ordinal.
  93     #
  94     # The ordinal must be in range(0x10000) on narrow Python builds
  95     # (UCS2), and range(0x110000) on wide builds (UCS4). A ValueError
  96     # is raised in case it is not.
  97     object PyUnicode_FromOrdinal(int ordinal)
  98
  99     # Return a read-only pointer to the Unicode object's internal
 100     # Py_UNICODE buffer, NULL if unicode is not a Unicode object.
 101     Py_UNICODE* PyUnicode_AsUnicode(object o) except NULL
 102
 103     # Return the length of the Unicode object.
 104     Py_ssize_t PyUnicode_GetSize(object o) except -1
 105
 106     # Coerce an encoded object obj to an Unicode object and return a
 107     # reference with incremented refcount.
 108     # String and other char buffer compatible objects are decoded
 109     # according to the given encoding and using the error handling
 110     # defined by errors. Both can be NULL to have the interface use
 111     # the default values (see the next section for details).
 112     # All other objects, including Unicode objects, cause a TypeError
 113     # to be set.
 114     object PyUnicode_FromEncodedObject(object o, char *encoding, char *errors)
 115
 116     # Shortcut for PyUnicode_FromEncodedObject(obj, NULL, "strict")
 117     # which is used throughout the interpreter whenever coercion to
 118     # Unicode is needed.
 119     object PyUnicode_FromObject(object obj)
 120
 121     # If the platform supports wchar_t and provides a header file
 122     # wchar.h, Python can interface directly to this type using the
 123     # following functions. Support is optimized if Python's own
 124     # Py_UNICODE type is identical to the system's wchar_t.
 125
 126     #ctypedef int wchar_t
 127
 128     # Create a Unicode object from the wchar_t buffer w of the given
 129     # size. Return NULL on failure.
 130     #PyObject* PyUnicode_FromWideChar(wchar_t *w, Py_ssize_t size)
 131
 132     #Py_ssize_t PyUnicode_AsWideChar(object o, wchar_t *w, Py_ssize_t size)
 133
 134 # Codecs
 135
 136     # Create a Unicode object by decoding size bytes of the encoded
 137     # string s. encoding and errors have the same meaning as the
 138     # parameters of the same name in the unicode() builtin
 139     # function. The codec to be used is looked up using the Python
 140     # codec registry. Return NULL if an exception was raised by the
 141     # codec.
 142     object PyUnicode_Decode(char *s, Py_ssize_t size, char *encoding, char *errors)
 143
 144     # Encode the Py_UNICODE buffer of the given size and return a
 145     # Python string object. encoding and errors have the same meaning
 146     # as the parameters of the same name in the Unicode encode()
 147     # method. The codec to be used is looked up using the Python codec
 148     # registry. Return NULL if an exception was raised by the codec.
 149     object PyUnicode_Encode(Py_UNICODE *s, Py_ssize_t size,
 150                             char *encoding, char *errors)
 151
 152     # Encode a Unicode object and return the result as Python string
 153     # object. encoding and errors have the same meaning as the
 154     # parameters of the same name in the Unicode encode() method. The
 155     # codec to be used is looked up using the Python codec
 156     # registry. Return NULL if an exception was raised by the codec.
 157     object PyUnicode_AsEncodedString(object unicode, char *encoding, char *errors)
 158
 159 # These are the UTF-8 codec APIs:
 160
 161     # Create a Unicode object by decoding size bytes of the UTF-8
 162     # encoded string s. Return NULL if an exception was raised by the
 163     # codec.
 164     object PyUnicode_DecodeUTF8(char *s, Py_ssize_t size, char *errors)
 165
 166     # If consumed is NULL, behave like PyUnicode_DecodeUTF8(). If
 167     # consumed is not NULL, trailing incomplete UTF-8 byte sequences
 168     # will not be treated as an error. Those bytes will not be decoded
 169     # and the number of bytes that have been decoded will be stored in
 170     # consumed. New in version 2.4.
 171     object PyUnicode_DecodeUTF8Stateful(char *s, Py_ssize_t size, char *errors, Py_ssize_t *consumed)
 172
 173     # Encode the Py_UNICODE buffer of the given size using UTF-8 and
 174     # return a Python string object. Return NULL if an exception was
 175     # raised by the codec.
 176     object PyUnicode_EncodeUTF8(Py_UNICODE *s, Py_ssize_t size, char *errors)
 177
 178     # Encode a Unicode objects using UTF-8 and return the result as Python string object. Error handling is ``strict''. Return NULL if an exception was raised by the codec.
 179     object PyUnicode_AsUTF8String(object unicode)
 180
 181 # These are the UTF-16 codec APIs:
 182
 183     # Decode length bytes from a UTF-16 encoded buffer string and
 184     # return the corresponding Unicode object. errors (if non-NULL)
 185     # defines the error handling. It defaults to ``strict''.
 186     #
 187     # If byteorder is non-NULL, the decoder starts decoding using the
 188     # given byte order:
 189     #
 190     #   *byteorder == -1: little endian
 191     #   *byteorder == 0:  native order
 192     #   *byteorder == 1:  big endian
 193     #
 194     # and then switches if the first two bytes of the input data are a
 195     # byte order mark (BOM) and the specified byte order is native
 196     # order. This BOM is not copied into the resulting Unicode
 197     # string. After completion, *byteorder is set to the current byte
 198     # order at the.
 199     #
 200     # If byteorder is NULL, the codec starts in native order mode.
 201     object PyUnicode_DecodeUTF16(char *s, Py_ssize_t size, char *errors, int *byteorder)
 202
 203     # If consumed is NULL, behave like PyUnicode_DecodeUTF16(). If
 204     # consumed is not NULL, PyUnicode_DecodeUTF16Stateful() will not
 205     # treat trailing incomplete UTF-16 byte sequences (such as an odd
 206     # number of bytes or a split surrogate pair) as an error. Those
 207     # bytes will not be decoded and the number of bytes that have been
 208     # decoded will be stored in consumed. New in version 2.4.
 209     object PyUnicode_DecodeUTF16Stateful(char *s, Py_ssize_t size, char *errors, int *byteorder, Py_ssize_t *consumed)
 210
 211     # Return a Python string object holding the UTF-16 encoded value
 212     # of the Unicode data in s. If byteorder is not 0, output is
 213     # written according to the following byte order:
 214     #
 215     #   byteorder == -1: little endian
 216     #   byteorder == 0:  native byte order (writes a BOM mark)
 217     #   byteorder == 1:  big endian
 218     #
 219     # If byteorder is 0, the output string will always start with the
 220     # Unicode BOM mark (U+FEFF). In the other two modes, no BOM mark
 221     # is prepended.
 222     #
 223     # If Py_UNICODE_WIDE is defined, a single Py_UNICODE value may get
 224     # represented as a surrogate pair. If it is not defined, each
 225     # Py_UNICODE values is interpreted as an UCS-2 character.
 226     object PyUnicode_EncodeUTF16(Py_UNICODE *s, Py_ssize_t size, char *errors, int byteorder)
 227
 228     # Return a Python string using the UTF-16 encoding in native byte
 229     # order. The string always starts with a BOM mark. Error handling
 230     # is ``strict''. Return NULL if an exception was raised by the
 231     # codec.
 232     object PyUnicode_AsUTF16String(object unicode)
 233
 234 # These are the ``Unicode Escape'' codec APIs:
 235
 236     # Create a Unicode object by decoding size bytes of the
 237     # Unicode-Escape encoded string s. Return NULL if an exception was
 238     # raised by the codec.
 239     object PyUnicode_DecodeUnicodeEscape(char *s, Py_ssize_t size, char *errors)
 240
 241     # Encode the Py_UNICODE buffer of the given size using
 242     # Unicode-Escape and return a Python string object. Return NULL if
 243     # an exception was raised by the codec.
 244     object PyUnicode_EncodeUnicodeEscape(Py_UNICODE *s, Py_ssize_t size)
 245
 246     # Encode a Unicode objects using Unicode-Escape and return the
 247     # result as Python string object. Error handling is
 248     # ``strict''. Return NULL if an exception was raised by the codec.
 249     object PyUnicode_AsUnicodeEscapeString(object unicode)
 250
 251 # These are the ``Raw Unicode Escape'' codec APIs:
 252
 253     # Create a Unicode object by decoding size bytes of the
 254     # Raw-Unicode-Escape encoded string s. Return NULL if an exception
 255     # was raised by the codec.
 256     object PyUnicode_DecodeRawUnicodeEscape(char *s, Py_ssize_t size, char *errors)
 257
 258     # Encode the Py_UNICODE buffer of the given size using
 259     # Raw-Unicode-Escape and return a Python string object. Return
 260     # NULL if an exception was raised by the codec.
 261     object PyUnicode_EncodeRawUnicodeEscape(Py_UNICODE *s, Py_ssize_t size, char *errors)
 262
 263     # Encode a Unicode objects using Raw-Unicode-Escape and return the
 264     # result as Python string object. Error handling is
 265     # ``strict''. Return NULL if an exception was raised by the codec.
 266     object PyUnicode_AsRawUnicodeEscapeString(object unicode)
 267
 268 # These are the Latin-1 codec APIs: Latin-1 corresponds to the first 256 Unicode ordinals and only these are accepted by the codecs during encoding.
 269
 270     # Create a Unicode object by decoding size bytes of the Latin-1
 271     # encoded string s. Return NULL if an exception was raised by the
 272     # codec.
 273     object PyUnicode_DecodeLatin1(char *s, Py_ssize_t size, char *errors)
 274
 275     # Encode the Py_UNICODE buffer of the given size using Latin-1 and
 276     # return a Python string object. Return NULL if an exception was
 277     # raised by the codec.
 278     object PyUnicode_EncodeLatin1(Py_UNICODE *s, Py_ssize_t size, char *errors)
 279
 280     # Encode a Unicode objects using Latin-1 and return the result as
 281     # Python string object. Error handling is ``strict''. Return NULL
 282     # if an exception was raised by the codec.
 283     object PyUnicode_AsLatin1String(object unicode)
 284
 285 # These are the ASCII codec APIs. Only 7-bit ASCII data is
 286 # accepted. All other codes generate errors.
 287
 288     # Create a Unicode object by decoding size bytes of the ASCII
 289     # encoded string s. Return NULL if an exception was raised by the
 290     # codec.
 291     object PyUnicode_DecodeASCII(char *s, Py_ssize_t size, char *errors)
 292
 293     # Encode the Py_UNICODE buffer of the given size using ASCII and
 294     # return a Python string object. Return NULL if an exception was
 295     # raised by the codec.
 296     object PyUnicode_EncodeASCII(Py_UNICODE *s, Py_ssize_t size, char *errors)
 297
 298     # Encode a Unicode objects using ASCII and return the result as
 299     # Python string object. Error handling is ``strict''. Return NULL
 300     # if an exception was raised by the codec.
 301     object PyUnicode_AsASCIIString(object o)
 302
 303 # These are the mapping codec APIs:
 304 #
 305 # This codec is special in that it can be used to implement many
 306 # different codecs (and this is in fact what was done to obtain most
 307 # of the standard codecs included in the encodings package). The codec
 308 # uses mapping to encode and decode characters.
 309 #
 310 # Decoding mappings must map single string characters to single
 311 # Unicode characters, integers (which are then interpreted as Unicode
 312 # ordinals) or None (meaning "undefined mapping" and causing an
 313 # error).
 314 #
 315 # Encoding mappings must map single Unicode characters to single
 316 # string characters, integers (which are then interpreted as Latin-1
 317 # ordinals) or None (meaning "undefined mapping" and causing an
 318 # error).
 319 #
 320 # The mapping objects provided must only support the __getitem__
 321 # mapping interface.
 322 #
 323 # If a character lookup fails with a LookupError, the character is
 324 # copied as-is meaning that its ordinal value will be interpreted as
 325 # Unicode or Latin-1 ordinal resp. Because of this, mappings only need
 326 # to contain those mappings which map characters to different code
 327 # points.
 328
 329     # Create a Unicode object by decoding size bytes of the encoded
 330     # string s using the given mapping object. Return NULL if an
 331     # exception was raised by the codec. If mapping is NULL latin-1
 332     # decoding will be done. Else it can be a dictionary mapping byte
 333     # or a unicode string, which is treated as a lookup table. Byte
 334     # values greater that the length of the string and U+FFFE
 335     # "characters" are treated as "undefined mapping". Changed in
 336     # version 2.4: Allowed unicode string as mapping argument.
 337     object PyUnicode_DecodeCharmap(char *s, Py_ssize_t size, object mapping, char *errors)
 338
 339     # Encode the Py_UNICODE buffer of the given size using the given
 340     # mapping object and return a Python string object. Return NULL if
 341     # an exception was raised by the codec.
 342     object PyUnicode_EncodeCharmap(Py_UNICODE *s, Py_ssize_t size, object mapping, char *errors)
 343
 344     # Encode a Unicode objects using the given mapping object and
 345     # return the result as Python string object. Error handling is
 346     # ``strict''. Return NULL if an exception was raised by the codec.
 347     object PyUnicode_AsCharmapString(object o, object mapping)
 348
 349 # The following codec API is special in that maps Unicode to Unicode.
 350
 351     # Translate a Py_UNICODE buffer of the given length by applying a
 352     # character mapping table to it and return the resulting Unicode
 353     # object. Return NULL when an exception was raised by the codec.
 354     #
 355     # The mapping table must map Unicode ordinal integers to Unicode
 356     # ordinal integers or None (causing deletion of the character).
 357     #
 358     # Mapping tables need only provide the __getitem__() interface;
 359     # dictionaries and sequences work well. Unmapped character
 360     # ordinals (ones which cause a LookupError) are left untouched and
 361     # are copied as-is.
 362     object PyUnicode_TranslateCharmap(Py_UNICODE *s, Py_ssize_t size,
 363                                       object table, char *errors)
 364
 365 # These are the MBCS codec APIs. They are currently only available on
 366 # Windows and use the Win32 MBCS converters to implement the
 367 # conversions. Note that MBCS (or DBCS) is a class of encodings, not
 368 # just one. The target encoding is defined by the user settings on the
 369 # machine running the codec.
 370
 371     # Create a Unicode object by decoding size bytes of the MBCS
 372     # encoded string s. Return NULL if an exception was raised by the
 373     # codec.
 374     object PyUnicode_DecodeMBCS(char *s, Py_ssize_t size, char *errors)
 375
 376     # If consumed is NULL, behave like PyUnicode_DecodeMBCS(). If
 377     # consumed is not NULL, PyUnicode_DecodeMBCSStateful() will not
 378     # decode trailing lead byte and the number of bytes that have been
 379     # decoded will be stored in consumed. New in version 2.5.
 380     # NOTE: Python 2.x uses 'int' values for 'size' and 'consumed' (changed in 3.0)
 381     object PyUnicode_DecodeMBCSStateful(char *s, Py_ssize_t size, char *errors, Py_ssize_t *consumed)
 382
 383     # Encode the Py_UNICODE buffer of the given size using MBCS and
 384     # return a Python string object. Return NULL if an exception was
 385     # raised by the codec.
 386     object PyUnicode_EncodeMBCS(Py_UNICODE *s, Py_ssize_t size, char *errors)
 387
 388     # Encode a Unicode objects using MBCS and return the result as
 389     # Python string object. Error handling is ``strict''. Return NULL
 390     # if an exception was raised by the codec.
 391     object PyUnicode_AsMBCSString(object o)