Objects/stringobject.c

   1 /* String (str/bytes) object implementation */
   2
   3 #define PY_SSIZE_T_CLEAN
   4
   5 #include "Python.h"
   6 #include <ctype.h>
   7 #include <stddef.h>
   8
   9 #ifdef COUNT_ALLOCS
  10 Py_ssize_t null_strings, one_strings;
  11 #endif
  12
  13 static PyStringObject *characters[UCHAR_MAX + 1];
  14 static PyStringObject *nullstring;
  15
  16 /* This dictionary holds all interned strings.  Note that references to
  17    strings in this dictionary are *not* counted in the string's ob_refcnt.
  18    When the interned string reaches a refcnt of 0 the string deallocation
  19    function will delete the reference from this dictionary.
  20
  21    Another way to look at this is that to say that the actual reference
  22    count of a string is:  s->ob_refcnt + (s->ob_sstate?2:0)
  23 */
  24 static PyObject *interned;
  25
  26 /* PyStringObject_SIZE gives the basic size of a string; any memory allocation
  27    for a string of length n should request PyStringObject_SIZE + n bytes.
  28
  29    Using PyStringObject_SIZE instead of sizeof(PyStringObject) saves
  30    3 bytes per string allocation on a typical system.
  31 */
  32 #define PyStringObject_SIZE (offsetof(PyStringObject, ob_sval) + 1)
  33
  34 /*
  35    For both PyString_FromString() and PyString_FromStringAndSize(), the
  36    parameter `size' denotes number of characters to allocate, not counting any
  37    null terminating character.
  38
  39    For PyString_FromString(), the parameter `str' points to a null-terminated
  40    string containing exactly `size' bytes.
  41
  42    For PyString_FromStringAndSize(), the parameter the parameter `str' is
  43    either NULL or else points to a string containing at least `size' bytes.
  44    For PyString_FromStringAndSize(), the string in the `str' parameter does
  45    not have to be null-terminated.  (Therefore it is safe to construct a
  46    substring by calling `PyString_FromStringAndSize(origstring, substrlen)'.)
  47    If `str' is NULL then PyString_FromStringAndSize() will allocate `size+1'
  48    bytes (setting the last byte to the null terminating character) and you can
  49    fill in the data yourself.  If `str' is non-NULL then the resulting
  50    PyString object must be treated as immutable and you must not fill in nor
  51    alter the data yourself, since the strings may be shared.
  52
  53    The PyObject member `op->ob_size', which denotes the number of "extra
  54    items" in a variable-size object, will contain the number of bytes
  55    allocated for string data, not counting the null terminating character.  It
  56    is therefore equal to the equal to the `size' parameter (for
  57    PyString_FromStringAndSize()) or the length of the string in the `str'
  58    parameter (for PyString_FromString()).
  59 */
  60 PyObject *
  61 PyString_FromStringAndSize(const char *str, Py_ssize_t size)
  62 {
  63     register PyStringObject *op;
  64     if (size < 0) {
  65         PyErr_SetString(PyExc_SystemError,
  66             "Negative size passed to PyString_FromStringAndSize");
  67         return NULL;
  68     }
  69     if (size == 0 && (op = nullstring) != NULL) {
  70 #ifdef COUNT_ALLOCS
  71         null_strings++;
  72 #endif
  73         Py_INCREF(op);
  74         return (PyObject *)op;
  75     }
  76     if (size == 1 && str != NULL &&
  77         (op = characters[*str & UCHAR_MAX]) != NULL)
  78     {
  79 #ifdef COUNT_ALLOCS
  80         one_strings++;
  81 #endif
  82         Py_INCREF(op);
  83         return (PyObject *)op;
  84     }
  85
  86     if (size > PY_SSIZE_T_MAX - PyStringObject_SIZE) {
  87         PyErr_SetString(PyExc_OverflowError, "string is too large");
  88         return NULL;
  89     }
  90
  91     /* Inline PyObject_NewVar */
  92     op = (PyStringObject *)PyObject_MALLOC(PyStringObject_SIZE + size);
  93     if (op == NULL)
  94         return PyErr_NoMemory();
  95     PyObject_INIT_VAR(op, &PyString_Type, size);
  96     op->ob_shash = -1;
  97     op->ob_sstate = SSTATE_NOT_INTERNED;
  98     if (str != NULL)
  99         Py_MEMCPY(op->ob_sval, str, size);
 100     op->ob_sval[size] = '\0';
 101     /* share short strings */
 102     if (size == 0) {
 103         PyObject *t = (PyObject *)op;
 104         PyString_InternInPlace(&t);
 105         op = (PyStringObject *)t;
 106         nullstring = op;
 107         Py_INCREF(op);
 108     } else if (size == 1 && str != NULL) {
 109         PyObject *t = (PyObject *)op;
 110         PyString_InternInPlace(&t);
 111         op = (PyStringObject *)t;
 112         characters[*str & UCHAR_MAX] = op;
 113         Py_INCREF(op);
 114     }
 115     return (PyObject *) op;
 116 }
 117
 118 PyObject *
 119 PyString_FromString(const char *str)
 120 {
 121     register size_t size;
 122     register PyStringObject *op;
 123
 124     assert(str != NULL);
 125     size = strlen(str);
 126     if (size > PY_SSIZE_T_MAX - PyStringObject_SIZE) {
 127         PyErr_SetString(PyExc_OverflowError,
 128             "string is too long for a Python string");
 129         return NULL;
 130     }
 131     if (size == 0 && (op = nullstring) != NULL) {
 132 #ifdef COUNT_ALLOCS
 133         null_strings++;
 134 #endif
 135         Py_INCREF(op);
 136         return (PyObject *)op;
 137     }
 138     if (size == 1 && (op = characters[*str & UCHAR_MAX]) != NULL) {
 139 #ifdef COUNT_ALLOCS
 140         one_strings++;
 141 #endif
 142         Py_INCREF(op);
 143         return (PyObject *)op;
 144     }
 145
 146     /* Inline PyObject_NewVar */
 147     op = (PyStringObject *)PyObject_MALLOC(PyStringObject_SIZE + size);
 148     if (op == NULL)
 149         return PyErr_NoMemory();
 150     PyObject_INIT_VAR(op, &PyString_Type, size);
 151     op->ob_shash = -1;
 152     op->ob_sstate = SSTATE_NOT_INTERNED;
 153     Py_MEMCPY(op->ob_sval, str, size+1);
 154     /* share short strings */
 155     if (size == 0) {
 156         PyObject *t = (PyObject *)op;
 157         PyString_InternInPlace(&t);
 158         op = (PyStringObject *)t;
 159         nullstring = op;
 160         Py_INCREF(op);
 161     } else if (size == 1) {
 162         PyObject *t = (PyObject *)op;
 163         PyString_InternInPlace(&t);
 164         op = (PyStringObject *)t;
 165         characters[*str & UCHAR_MAX] = op;
 166         Py_INCREF(op);
 167     }
 168     return (PyObject *) op;
 169 }
 170
 171 PyObject *
 172 PyString_FromFormatV(const char *format, va_list vargs)
 173 {
 174     va_list count;
 175     Py_ssize_t n = 0;
 176     const char* f;
 177     char *s;
 178     PyObject* string;
 179
 180 #ifdef VA_LIST_IS_ARRAY
 181     Py_MEMCPY(count, vargs, sizeof(va_list));
 182 #else
 183 #ifdef  __va_copy
 184     __va_copy(count, vargs);
 185 #else
 186     count = vargs;
 187 #endif
 188 #endif
 189     /* step 1: figure out how large a buffer we need */
 190     for (f = format; *f; f++) {
 191         if (*f == '%') {
 192 #ifdef HAVE_LONG_LONG
 193             int longlongflag = 0;
 194 #endif
 195             const char* p = f;
 196             while (*++f && *f != '%' && !isalpha(Py_CHARMASK(*f)))
 197                 ;
 198
 199             /* skip the 'l' or 'z' in {%ld, %zd, %lu, %zu} since
 200              * they don't affect the amount of space we reserve.
 201              */
 202             if (*f == 'l') {
 203                 if (f[1] == 'd' || f[1] == 'u') {
 204                     ++f;
 205                 }
 206 #ifdef HAVE_LONG_LONG
 207                 else if (f[1] == 'l' &&
 208                          (f[2] == 'd' || f[2] == 'u')) {
 209                     longlongflag = 1;
 210                     f += 2;
 211                 }
 212 #endif
 213             }
 214             else if (*f == 'z' && (f[1] == 'd' || f[1] == 'u')) {
 215                 ++f;
 216             }
 217
 218             switch (*f) {
 219             case 'c':
 220                 (void)va_arg(count, int);
 221                 /* fall through... */
 222             case '%':
 223                 n++;
 224                 break;
 225             case 'd': case 'u': case 'i': case 'x':
 226                 (void) va_arg(count, int);
 227 #ifdef HAVE_LONG_LONG
 228                 /* Need at most
 229                    ceil(log10(256)*SIZEOF_LONG_LONG) digits,
 230                    plus 1 for the sign.  53/22 is an upper
 231                    bound for log10(256). */
 232                 if (longlongflag)
 233                     n += 2 + (SIZEOF_LONG_LONG*53-1) / 22;
 234                 else
 235 #endif
 236                     /* 20 bytes is enough to hold a 64-bit
 237                        integer.  Decimal takes the most
 238                        space.  This isn't enough for
 239                        octal. */
 240                     n += 20;
 241
 242                 break;
 243             case 's':
 244                 s = va_arg(count, char*);
 245                 n += strlen(s);
 246                 break;
 247             case 'p':
 248                 (void) va_arg(count, int);
 249                 /* maximum 64-bit pointer representation:
 250                  * 0xffffffffffffffff
 251                  * so 19 characters is enough.
 252                  * XXX I count 18 -- what's the extra for?
 253                  */
 254                 n += 19;
 255                 break;
 256             default:
 257                 /* if we stumble upon an unknown
 258                    formatting code, copy the rest of
 259                    the format string to the output
 260                    string. (we cannot just skip the
 261                    code, since there's no way to know
 262                    what's in the argument list) */
 263                 n += strlen(p);
 264                 goto expand;
 265             }
 266         } else
 267             n++;
 268     }
 269  expand:
 270     /* step 2: fill the buffer */
 271     /* Since we've analyzed how much space we need for the worst case,
 272        use sprintf directly instead of the slower PyOS_snprintf. */
 273     string = PyString_FromStringAndSize(NULL, n);
 274     if (!string)
 275         return NULL;
 276
 277     s = PyString_AsString(string);
 278
 279     for (f = format; *f; f++) {
 280         if (*f == '%') {
 281             const char* p = f++;
 282             Py_ssize_t i;
 283             int longflag = 0;
 284 #ifdef HAVE_LONG_LONG
 285             int longlongflag = 0;
 286 #endif
 287             int size_tflag = 0;
 288             /* parse the width.precision part (we're only
 289                interested in the precision value, if any) */
 290             n = 0;
 291             while (isdigit(Py_CHARMASK(*f)))
 292                 n = (n*10) + *f++ - '0';
 293             if (*f == '.') {
 294                 f++;
 295                 n = 0;
 296                 while (isdigit(Py_CHARMASK(*f)))
 297                     n = (n*10) + *f++ - '0';
 298             }
 299             while (*f && *f != '%' && !isalpha(Py_CHARMASK(*f)))
 300                 f++;
 301             /* Handle %ld, %lu, %lld and %llu. */
 302             if (*f == 'l') {
 303                 if (f[1] == 'd' || f[1] == 'u') {
 304                     longflag = 1;
 305                     ++f;
 306                 }
 307 #ifdef HAVE_LONG_LONG
 308                 else if (f[1] == 'l' &&
 309                          (f[2] == 'd' || f[2] == 'u')) {
 310                     longlongflag = 1;
 311                     f += 2;
 312                 }
 313 #endif
 314             }
 315             /* handle the size_t flag. */
 316             else if (*f == 'z' && (f[1] == 'd' || f[1] == 'u')) {
 317                 size_tflag = 1;
 318                 ++f;
 319             }
 320
 321             switch (*f) {
 322             case 'c':
 323                 *s++ = va_arg(vargs, int);
 324                 break;
 325             case 'd':
 326                 if (longflag)
 327                     sprintf(s, "%ld", va_arg(vargs, long));
 328 #ifdef HAVE_LONG_LONG
 329                 else if (longlongflag)
 330                     sprintf(s, "%" PY_FORMAT_LONG_LONG "d",
 331                         va_arg(vargs, PY_LONG_LONG));
 332 #endif
 333                 else if (size_tflag)
 334                     sprintf(s, "%" PY_FORMAT_SIZE_T "d",
 335                         va_arg(vargs, Py_ssize_t));
 336                 else
 337                     sprintf(s, "%d", va_arg(vargs, int));
 338                 s += strlen(s);
 339                 break;
 340             case 'u':
 341                 if (longflag)
 342                     sprintf(s, "%lu",
 343                         va_arg(vargs, unsigned long));
 344 #ifdef HAVE_LONG_LONG
 345                 else if (longlongflag)
 346                     sprintf(s, "%" PY_FORMAT_LONG_LONG "u",
 347                         va_arg(vargs, PY_LONG_LONG));
 348 #endif
 349                 else if (size_tflag)
 350                     sprintf(s, "%" PY_FORMAT_SIZE_T "u",
 351                         va_arg(vargs, size_t));
 352                 else
 353                     sprintf(s, "%u",
 354                         va_arg(vargs, unsigned int));
 355                 s += strlen(s);
 356                 break;
 357             case 'i':
 358                 sprintf(s, "%i", va_arg(vargs, int));
 359                 s += strlen(s);
 360                 break;
 361             case 'x':
 362                 sprintf(s, "%x", va_arg(vargs, int));
 363                 s += strlen(s);
 364                 break;
 365             case 's':
 366                 p = va_arg(vargs, char*);
 367                 i = strlen(p);
 368                 if (n > 0 && i > n)
 369                     i = n;
 370                 Py_MEMCPY(s, p, i);
 371                 s += i;
 372                 break;
 373             case 'p':
 374                 sprintf(s, "%p", va_arg(vargs, void*));
 375                 /* %p is ill-defined:  ensure leading 0x. */
 376                 if (s[1] == 'X')
 377                     s[1] = 'x';
 378                 else if (s[1] != 'x') {
 379                     memmove(s+2, s, strlen(s)+1);
 380                     s[0] = '0';
 381                     s[1] = 'x';
 382                 }
 383                 s += strlen(s);
 384                 break;
 385             case '%':
 386                 *s++ = '%';
 387                 break;
 388             default:
 389                 strcpy(s, p);
 390                 s += strlen(s);
 391                 goto end;
 392             }
 393         } else
 394             *s++ = *f;
 395     }
 396
 397  end:
 398     if (_PyString_Resize(&string, s - PyString_AS_STRING(string)))
 399         return NULL;
 400     return string;
 401 }
 402
 403 PyObject *
 404 PyString_FromFormat(const char *format, ...)
 405 {
 406     PyObject* ret;
 407     va_list vargs;
 408
 409 #ifdef HAVE_STDARG_PROTOTYPES
 410     va_start(vargs, format);
 411 #else
 412     va_start(vargs);
 413 #endif
 414     ret = PyString_FromFormatV(format, vargs);
 415     va_end(vargs);
 416     return ret;
 417 }
 418
 419
 420 PyObject *PyString_Decode(const char *s,
 421                           Py_ssize_t size,
 422                           const char *encoding,
 423                           const char *errors)
 424 {
 425     PyObject *v, *str;
 426
 427     str = PyString_FromStringAndSize(s, size);
 428     if (str == NULL)
 429         return NULL;
 430     v = PyString_AsDecodedString(str, encoding, errors);
 431     Py_DECREF(str);
 432     return v;
 433 }
 434
 435 PyObject *PyString_AsDecodedObject(PyObject *str,
 436                                    const char *encoding,
 437                                    const char *errors)
 438 {
 439     PyObject *v;
 440
 441     if (!PyString_Check(str)) {
 442         PyErr_BadArgument();
 443         goto onError;
 444     }
 445
 446     if (encoding == NULL) {
 447 #ifdef Py_USING_UNICODE
 448         encoding = PyUnicode_GetDefaultEncoding();
 449 #else
 450         PyErr_SetString(PyExc_ValueError, "no encoding specified");
 451         goto onError;
 452 #endif
 453     }
 454
 455     /* Decode via the codec registry */
 456     v = PyCodec_Decode(str, encoding, errors);
 457     if (v == NULL)
 458         goto onError;
 459
 460     return v;
 461
 462  onError:
 463     return NULL;
 464 }
 465
 466 PyObject *PyString_AsDecodedString(PyObject *str,
 467                                    const char *encoding,
 468                                    const char *errors)
 469 {
 470     PyObject *v;
 471
 472     v = PyString_AsDecodedObject(str, encoding, errors);
 473     if (v == NULL)
 474         goto onError;
 475
 476 #ifdef Py_USING_UNICODE
 477     /* Convert Unicode to a string using the default encoding */
 478     if (PyUnicode_Check(v)) {
 479         PyObject *temp = v;
 480         v = PyUnicode_AsEncodedString(v, NULL, NULL);
 481         Py_DECREF(temp);
 482         if (v == NULL)
 483             goto onError;
 484     }
 485 #endif
 486     if (!PyString_Check(v)) {
 487         PyErr_Format(PyExc_TypeError,
 488                      "decoder did not return a string object (type=%.400s)",
 489                      Py_TYPE(v)->tp_name);
 490         Py_DECREF(v);
 491         goto onError;
 492     }
 493
 494     return v;
 495
 496  onError:
 497     return NULL;
 498 }
 499
 500 PyObject *PyString_Encode(const char *s,
 501                           Py_ssize_t size,
 502                           const char *encoding,
 503                           const char *errors)
 504 {
 505     PyObject *v, *str;
 506
 507     str = PyString_FromStringAndSize(s, size);
 508     if (str == NULL)
 509         return NULL;
 510     v = PyString_AsEncodedString(str, encoding, errors);
 511     Py_DECREF(str);
 512     return v;
 513 }
 514
 515 PyObject *PyString_AsEncodedObject(PyObject *str,
 516                                    const char *encoding,
 517                                    const char *errors)
 518 {
 519     PyObject *v;
 520
 521     if (!PyString_Check(str)) {
 522         PyErr_BadArgument();
 523         goto onError;
 524     }
 525
 526     if (encoding == NULL) {
 527 #ifdef Py_USING_UNICODE
 528         encoding = PyUnicode_GetDefaultEncoding();
 529 #else
 530         PyErr_SetString(PyExc_ValueError, "no encoding specified");
 531         goto onError;
 532 #endif
 533     }
 534
 535     /* Encode via the codec registry */
 536     v = PyCodec_Encode(str, encoding, errors);
 537     if (v == NULL)
 538         goto onError;
 539
 540     return v;
 541
 542  onError:
 543     return NULL;
 544 }
 545
 546 PyObject *PyString_AsEncodedString(PyObject *str,
 547                                    const char *encoding,
 548                                    const char *errors)
 549 {
 550     PyObject *v;
 551
 552     v = PyString_AsEncodedObject(str, encoding, errors);
 553     if (v == NULL)
 554         goto onError;
 555
 556 #ifdef Py_USING_UNICODE
 557     /* Convert Unicode to a string using the default encoding */
 558     if (PyUnicode_Check(v)) {
 559         PyObject *temp = v;
 560         v = PyUnicode_AsEncodedString(v, NULL, NULL);
 561         Py_DECREF(temp);
 562         if (v == NULL)
 563             goto onError;
 564     }
 565 #endif
 566     if (!PyString_Check(v)) {
 567         PyErr_Format(PyExc_TypeError,
 568                      "encoder did not return a string object (type=%.400s)",
 569                      Py_TYPE(v)->tp_name);
 570         Py_DECREF(v);
 571         goto onError;
 572     }
 573
 574     return v;
 575
 576  onError:
 577     return NULL;
 578 }
 579
 580 static void
 581 string_dealloc(PyObject *op)
 582 {
 583     switch (PyString_CHECK_INTERNED(op)) {
 584         case SSTATE_NOT_INTERNED:
 585             break;
 586
 587         case SSTATE_INTERNED_MORTAL:
 588             /* revive dead object temporarily for DelItem */
 589             Py_REFCNT(op) = 3;
 590             if (PyDict_DelItem(interned, op) != 0)
 591                 Py_FatalError(
 592                     "deletion of interned string failed");
 593             break;
 594
 595         case SSTATE_INTERNED_IMMORTAL:
 596             Py_FatalError("Immortal interned string died.");
 597
 598         default:
 599             Py_FatalError("Inconsistent interned string state.");
 600     }
 601     Py_TYPE(op)->tp_free(op);
 602 }
 603
 604 /* Unescape a backslash-escaped string. If unicode is non-zero,
 605    the string is a u-literal. If recode_encoding is non-zero,
 606    the string is UTF-8 encoded and should be re-encoded in the
 607    specified encoding.  */
 608
 609 PyObject *PyString_DecodeEscape(const char *s,
 610                                 Py_ssize_t len,
 611                                 const char *errors,
 612                                 Py_ssize_t unicode,
 613                                 const char *recode_encoding)
 614 {
 615     int c;
 616     char *p, *buf;
 617     const char *end;
 618     PyObject *v;
 619     Py_ssize_t newlen = recode_encoding ? 4*len:len;
 620     v = PyString_FromStringAndSize((char *)NULL, newlen);
 621     if (v == NULL)
 622         return NULL;
 623     p = buf = PyString_AsString(v);
 624     end = s + len;
 625     while (s < end) {
 626         if (*s != '\\') {
 627           non_esc:
 628 #ifdef Py_USING_UNICODE
 629             if (recode_encoding && (*s & 0x80)) {
 630                 PyObject *u, *w;
 631                 char *r;
 632                 const char* t;
 633                 Py_ssize_t rn;
 634                 t = s;
 635                 /* Decode non-ASCII bytes as UTF-8. */
 636                 while (t < end && (*t & 0x80)) t++;
 637                 u = PyUnicode_DecodeUTF8(s, t - s, errors);
 638                 if(!u) goto failed;
 639
 640                 /* Recode them in target encoding. */
 641                 w = PyUnicode_AsEncodedString(
 642                     u, recode_encoding, errors);
 643                 Py_DECREF(u);
 644                 if (!w)                 goto failed;
 645
 646                 /* Append bytes to output buffer. */
 647                 assert(PyString_Check(w));
 648                 r = PyString_AS_STRING(w);
 649                 rn = PyString_GET_SIZE(w);
 650                 Py_MEMCPY(p, r, rn);
 651                 p += rn;
 652                 Py_DECREF(w);
 653                 s = t;
 654             } else {
 655                 *p++ = *s++;
 656             }
 657 #else
 658             *p++ = *s++;
 659 #endif
 660             continue;
 661         }
 662         s++;
 663         if (s==end) {
 664             PyErr_SetString(PyExc_ValueError,
 665                             "Trailing \\ in string");
 666             goto failed;
 667         }
 668         switch (*s++) {
 669         /* XXX This assumes ASCII! */
 670         case '\n': break;
 671         case '\\': *p++ = '\\'; break;
 672         case '\'': *p++ = '\''; break;
 673         case '\"': *p++ = '\"'; break;
 674         case 'b': *p++ = '\b'; break;
 675         case 'f': *p++ = '\014'; break; /* FF */
 676         case 't': *p++ = '\t'; break;
 677         case 'n': *p++ = '\n'; break;
 678         case 'r': *p++ = '\r'; break;
 679         case 'v': *p++ = '\013'; break; /* VT */
 680         case 'a': *p++ = '\007'; break; /* BEL, not classic C */
 681         case '0': case '1': case '2': case '3':
 682         case '4': case '5': case '6': case '7':
 683             c = s[-1] - '0';
 684             if (s < end && '0' <= *s && *s <= '7') {
 685                 c = (c<<3) + *s++ - '0';
 686                 if (s < end && '0' <= *s && *s <= '7')
 687                     c = (c<<3) + *s++ - '0';
 688             }
 689             *p++ = c;
 690             break;
 691         case 'x':
 692             if (s+1 < end &&
 693                 isxdigit(Py_CHARMASK(s[0])) &&
 694                 isxdigit(Py_CHARMASK(s[1])))
 695             {
 696                 unsigned int x = 0;
 697                 c = Py_CHARMASK(*s);
 698                 s++;
 699                 if (isdigit(c))
 700                     x = c - '0';
 701                 else if (islower(c))
 702                     x = 10 + c - 'a';
 703                 else
 704                     x = 10 + c - 'A';
 705                 x = x << 4;
 706                 c = Py_CHARMASK(*s);
 707                 s++;
 708                 if (isdigit(c))
 709                     x += c - '0';
 710                 else if (islower(c))
 711                     x += 10 + c - 'a';
 712                 else
 713                     x += 10 + c - 'A';
 714                 *p++ = x;
 715                 break;
 716             }
 717             if (!errors || strcmp(errors, "strict") == 0) {
 718                 PyErr_SetString(PyExc_ValueError,
 719                                 "invalid \\x escape");
 720                 goto failed;
 721             }
 722             if (strcmp(errors, "replace") == 0) {
 723                 *p++ = '?';
 724             } else if (strcmp(errors, "ignore") == 0)
 725                 /* do nothing */;
 726             else {
 727                 PyErr_Format(PyExc_ValueError,
 728                              "decoding error; "
 729                              "unknown error handling code: %.400s",
 730                              errors);
 731                 goto failed;
 732             }
 733 #ifndef Py_USING_UNICODE
 734         case 'u':
 735         case 'U':
 736         case 'N':
 737             if (unicode) {
 738                 PyErr_SetString(PyExc_ValueError,
 739                           "Unicode escapes not legal "
 740                           "when Unicode disabled");
 741                 goto failed;
 742             }
 743 #endif
 744         default:
 745             *p++ = '\\';
 746             s--;
 747             goto non_esc; /* an arbitry number of unescaped
 748                              UTF-8 bytes may follow. */
 749         }
 750     }
 751     if (p-buf < newlen && _PyString_Resize(&v, p - buf))
 752         goto failed;
 753     return v;
 754   failed:
 755     Py_DECREF(v);
 756     return NULL;
 757 }
 758
 759 /* -------------------------------------------------------------------- */
 760 /* object api */
 761
 762 static Py_ssize_t
 763 string_getsize(register PyObject *op)
 764 {
 765     char *s;
 766     Py_ssize_t len;
 767     if (PyString_AsStringAndSize(op, &s, &len))
 768         return -1;
 769     return len;
 770 }
 771
 772 static /*const*/ char *
 773 string_getbuffer(register PyObject *op)
 774 {
 775     char *s;
 776     Py_ssize_t len;
 777     if (PyString_AsStringAndSize(op, &s, &len))
 778         return NULL;
 779     return s;
 780 }
 781
 782 Py_ssize_t
 783 PyString_Size(register PyObject *op)
 784 {
 785     if (!PyString_Check(op))
 786         return string_getsize(op);
 787     return Py_SIZE(op);
 788 }
 789
 790 /*const*/ char *
 791 PyString_AsString(register PyObject *op)
 792 {
 793     if (!PyString_Check(op))
 794         return string_getbuffer(op);
 795     return ((PyStringObject *)op) -> ob_sval;
 796 }
 797
 798 int
 799 PyString_AsStringAndSize(register PyObject *obj,
 800                          register char **s,
 801                          register Py_ssize_t *len)
 802 {
 803     if (s == NULL) {
 804         PyErr_BadInternalCall();
 805         return -1;
 806     }
 807
 808     if (!PyString_Check(obj)) {
 809 #ifdef Py_USING_UNICODE
 810         if (PyUnicode_Check(obj)) {
 811             obj = _PyUnicode_AsDefaultEncodedString(obj, NULL);
 812             if (obj == NULL)
 813                 return -1;
 814         }
 815         else
 816 #endif
 817         {
 818             PyErr_Format(PyExc_TypeError,
 819                          "expected string or Unicode object, "
 820                          "%.200s found", Py_TYPE(obj)->tp_name);
 821             return -1;
 822         }
 823     }
 824
 825     *s = PyString_AS_STRING(obj);
 826     if (len != NULL)
 827         *len = PyString_GET_SIZE(obj);
 828     else if (strlen(*s) != (size_t)PyString_GET_SIZE(obj)) {
 829         PyErr_SetString(PyExc_TypeError,
 830                         "expected string without null bytes");
 831         return -1;
 832     }
 833     return 0;
 834 }
 835
 836 /* -------------------------------------------------------------------- */
 837 /* Methods */
 838
 839 #include "stringlib/stringdefs.h"
 840 #include "stringlib/fastsearch.h"
 841
 842 #include "stringlib/count.h"
 843 #include "stringlib/find.h"
 844 #include "stringlib/partition.h"
 845 #include "stringlib/split.h"
 846
 847 #define _Py_InsertThousandsGrouping _PyString_InsertThousandsGrouping
 848 #include "stringlib/localeutil.h"
 849
 850
 851
 852 static int
 853 string_print(PyStringObject *op, FILE *fp, int flags)
 854 {
 855     Py_ssize_t i, str_len;
 856     char c;
 857     int quote;
 858
 859     /* XXX Ought to check for interrupts when writing long strings */
 860     if (! PyString_CheckExact(op)) {
 861         int ret;
 862         /* A str subclass may have its own __str__ method. */
 863         op = (PyStringObject *) PyObject_Str((PyObject *)op);
 864         if (op == NULL)
 865             return -1;
 866         ret = string_print(op, fp, flags);
 867         Py_DECREF(op);
 868         return ret;
 869     }
 870     if (flags & Py_PRINT_RAW) {
 871         char *data = op->ob_sval;
 872         Py_ssize_t size = Py_SIZE(op);
 873         Py_BEGIN_ALLOW_THREADS
 874         while (size > INT_MAX) {
 875             /* Very long strings cannot be written atomically.
 876              * But don't write exactly INT_MAX bytes at a time
 877              * to avoid memory aligment issues.
 878              */
 879             const int chunk_size = INT_MAX & ~0x3FFF;
 880             fwrite(data, 1, chunk_size, fp);
 881             data += chunk_size;
 882             size -= chunk_size;
 883         }
 884 #ifdef __VMS
 885         if (size) fwrite(data, (int)size, 1, fp);
 886 #else
 887         fwrite(data, 1, (int)size, fp);
 888 #endif
 889         Py_END_ALLOW_THREADS
 890         return 0;
 891     }
 892
 893     /* figure out which quote to use; single is preferred */
 894     quote = '\'';
 895     if (memchr(op->ob_sval, '\'', Py_SIZE(op)) &&
 896         !memchr(op->ob_sval, '"', Py_SIZE(op)))
 897         quote = '"';
 898
 899     str_len = Py_SIZE(op);
 900     Py_BEGIN_ALLOW_THREADS
 901     fputc(quote, fp);
 902     for (i = 0; i < str_len; i++) {
 903         /* Since strings are immutable and the caller should have a
 904         reference, accessing the interal buffer should not be an issue
 905         with the GIL released. */
 906         c = op->ob_sval[i];
 907         if (c == quote || c == '\\')
 908             fprintf(fp, "\\%c", c);
 909         else if (c == '\t')
 910             fprintf(fp, "\\t");
 911         else if (c == '\n')
 912             fprintf(fp, "\\n");
 913         else if (c == '\r')
 914             fprintf(fp, "\\r");
 915         else if (c < ' ' || c >= 0x7f)
 916             fprintf(fp, "\\x%02x", c & 0xff);
 917         else
 918             fputc(c, fp);
 919     }
 920     fputc(quote, fp);
 921     Py_END_ALLOW_THREADS
 922     return 0;
 923 }
 924
 925 PyObject *
 926 PyString_Repr(PyObject *obj, int smartquotes)
 927 {
 928     register PyStringObject* op = (PyStringObject*) obj;
 929     size_t newsize = 2 + 4 * Py_SIZE(op);
 930     PyObject *v;
 931     if (newsize > PY_SSIZE_T_MAX || newsize / 4 != Py_SIZE(op)) {
 932         PyErr_SetString(PyExc_OverflowError,
 933             "string is too large to make repr");
 934         return NULL;
 935     }
 936     v = PyString_FromStringAndSize((char *)NULL, newsize);
 937     if (v == NULL) {
 938         return NULL;
 939     }
 940     else {
 941         register Py_ssize_t i;
 942         register char c;
 943         register char *p;
 944         int quote;
 945
 946         /* figure out which quote to use; single is preferred */
 947         quote = '\'';
 948         if (smartquotes &&
 949             memchr(op->ob_sval, '\'', Py_SIZE(op)) &&
 950             !memchr(op->ob_sval, '"', Py_SIZE(op)))
 951             quote = '"';
 952
 953         p = PyString_AS_STRING(v);
 954         *p++ = quote;
 955         for (i = 0; i < Py_SIZE(op); i++) {
 956             /* There's at least enough room for a hex escape
 957                and a closing quote. */
 958             assert(newsize - (p - PyString_AS_STRING(v)) >= 5);
 959             c = op->ob_sval[i];
 960             if (c == quote || c == '\\')
 961                 *p++ = '\\', *p++ = c;
 962             else if (c == '\t')
 963                 *p++ = '\\', *p++ = 't';
 964             else if (c == '\n')
 965                 *p++ = '\\', *p++ = 'n';
 966             else if (c == '\r')
 967                 *p++ = '\\', *p++ = 'r';
 968             else if (c < ' ' || c >= 0x7f) {
 969                 /* For performance, we don't want to call
 970                    PyOS_snprintf here (extra layers of
 971                    function call). */
 972                 sprintf(p, "\\x%02x", c & 0xff);
 973                 p += 4;
 974             }
 975             else
 976                 *p++ = c;
 977         }
 978         assert(newsize - (p - PyString_AS_STRING(v)) >= 1);
 979         *p++ = quote;
 980         *p = '\0';
 981         if (_PyString_Resize(&v, (p - PyString_AS_STRING(v))))
 982             return NULL;
 983         return v;
 984     }
 985 }
 986
 987 static PyObject *
 988 string_repr(PyObject *op)
 989 {
 990     return PyString_Repr(op, 1);
 991 }
 992
 993 static PyObject *
 994 string_str(PyObject *s)
 995 {
 996     assert(PyString_Check(s));
 997     if (PyString_CheckExact(s)) {
 998         Py_INCREF(s);
 999         return s;
1000     }
1001     else {
1002         /* Subtype -- return genuine string with the same value. */
1003         PyStringObject *t = (PyStringObject *) s;
1004         return PyString_FromStringAndSize(t->ob_sval, Py_SIZE(t));
1005     }
1006 }
1007
1008 static Py_ssize_t
1009 string_length(PyStringObject *a)
1010 {
1011     return Py_SIZE(a);
1012 }
1013
1014 static PyObject *
1015 string_concat(register PyStringObject *a, register PyObject *bb)
1016 {
1017     register Py_ssize_t size;
1018     register PyStringObject *op;
1019     if (!PyString_Check(bb)) {
1020 #ifdef Py_USING_UNICODE
1021         if (PyUnicode_Check(bb))
1022             return PyUnicode_Concat((PyObject *)a, bb);
1023 #endif
1024         if (PyByteArray_Check(bb))
1025             return PyByteArray_Concat((PyObject *)a, bb);
1026         PyErr_Format(PyExc_TypeError,
1027                      "cannot concatenate 'str' and '%.200s' objects",
1028                      Py_TYPE(bb)->tp_name);
1029         return NULL;
1030     }
1031 #define b ((PyStringObject *)bb)
1032     /* Optimize cases with empty left or right operand */
1033     if ((Py_SIZE(a) == 0 || Py_SIZE(b) == 0) &&
1034         PyString_CheckExact(a) && PyString_CheckExact(b)) {
1035         if (Py_SIZE(a) == 0) {
1036             Py_INCREF(bb);
1037             return bb;
1038         }
1039         Py_INCREF(a);
1040         return (PyObject *)a;
1041     }
1042     size = Py_SIZE(a) + Py_SIZE(b);
1043     /* Check that string sizes are not negative, to prevent an
1044        overflow in cases where we are passed incorrectly-created
1045        strings with negative lengths (due to a bug in other code).
1046     */
1047     if (Py_SIZE(a) < 0 || Py_SIZE(b) < 0 ||
1048         Py_SIZE(a) > PY_SSIZE_T_MAX - Py_SIZE(b)) {
1049         PyErr_SetString(PyExc_OverflowError,
1050                         "strings are too large to concat");
1051         return NULL;
1052     }
1053
1054     /* Inline PyObject_NewVar */
1055     if (size > PY_SSIZE_T_MAX - PyStringObject_SIZE) {
1056         PyErr_SetString(PyExc_OverflowError,
1057                         "strings are too large to concat");
1058         return NULL;
1059     }
1060     op = (PyStringObject *)PyObject_MALLOC(PyStringObject_SIZE + size);
1061     if (op == NULL)
1062         return PyErr_NoMemory();
1063     PyObject_INIT_VAR(op, &PyString_Type, size);
1064     op->ob_shash = -1;
1065     op->ob_sstate = SSTATE_NOT_INTERNED;
1066     Py_MEMCPY(op->ob_sval, a->ob_sval, Py_SIZE(a));
1067     Py_MEMCPY(op->ob_sval + Py_SIZE(a), b->ob_sval, Py_SIZE(b));
1068     op->ob_sval[size] = '\0';
1069     return (PyObject *) op;
1070 #undef b
1071 }
1072
1073 static PyObject *
1074 string_repeat(register PyStringObject *a, register Py_ssize_t n)
1075 {
1076     register Py_ssize_t i;
1077     register Py_ssize_t j;
1078     register Py_ssize_t size;
1079     register PyStringObject *op;
1080     size_t nbytes;
1081     if (n < 0)
1082         n = 0;
1083     /* watch out for overflows:  the size can overflow int,
1084      * and the # of bytes needed can overflow size_t
1085      */
1086     size = Py_SIZE(a) * n;
1087     if (n && size / n != Py_SIZE(a)) {
1088         PyErr_SetString(PyExc_OverflowError,
1089             "repeated string is too long");
1090         return NULL;
1091     }
1092     if (size == Py_SIZE(a) && PyString_CheckExact(a)) {
1093         Py_INCREF(a);
1094         return (PyObject *)a;
1095     }
1096     nbytes = (size_t)size;
1097     if (nbytes + PyStringObject_SIZE <= nbytes) {
1098         PyErr_SetString(PyExc_OverflowError,
1099             "repeated string is too long");
1100         return NULL;
1101     }
1102     op = (PyStringObject *)PyObject_MALLOC(PyStringObject_SIZE + nbytes);
1103     if (op == NULL)
1104         return PyErr_NoMemory();
1105     PyObject_INIT_VAR(op, &PyString_Type, size);
1106     op->ob_shash = -1;
1107     op->ob_sstate = SSTATE_NOT_INTERNED;
1108     op->ob_sval[size] = '\0';
1109     if (Py_SIZE(a) == 1 && n > 0) {
1110         memset(op->ob_sval, a->ob_sval[0] , n);
1111         return (PyObject *) op;
1112     }
1113     i = 0;
1114     if (i < size) {
1115         Py_MEMCPY(op->ob_sval, a->ob_sval, Py_SIZE(a));
1116         i = Py_SIZE(a);
1117     }
1118     while (i < size) {
1119         j = (i <= size-i)  ?  i  :  size-i;
1120         Py_MEMCPY(op->ob_sval+i, op->ob_sval, j);
1121         i += j;
1122     }
1123     return (PyObject *) op;
1124 }
1125
1126 /* String slice a[i:j] consists of characters a[i] ... a[j-1] */
1127
1128 static PyObject *
1129 string_slice(register PyStringObject *a, register Py_ssize_t i,
1130              register Py_ssize_t j)
1131      /* j -- may be negative! */
1132 {
1133     if (i < 0)
1134         i = 0;
1135     if (j < 0)
1136         j = 0; /* Avoid signed/unsigned bug in next line */
1137     if (j > Py_SIZE(a))
1138         j = Py_SIZE(a);
1139     if (i == 0 && j == Py_SIZE(a) && PyString_CheckExact(a)) {
1140         /* It's the same as a */
1141         Py_INCREF(a);
1142         return (PyObject *)a;
1143     }
1144     if (j < i)
1145         j = i;
1146     return PyString_FromStringAndSize(a->ob_sval + i, j-i);
1147 }
1148
1149 static int
1150 string_contains(PyObject *str_obj, PyObject *sub_obj)
1151 {
1152     if (!PyString_CheckExact(sub_obj)) {
1153 #ifdef Py_USING_UNICODE
1154         if (PyUnicode_Check(sub_obj))
1155             return PyUnicode_Contains(str_obj, sub_obj);
1156 #endif
1157         if (!PyString_Check(sub_obj)) {
1158             PyErr_Format(PyExc_TypeError,
1159                 "'in <string>' requires string as left operand, "
1160                 "not %.200s", Py_TYPE(sub_obj)->tp_name);
1161             return -1;
1162         }
1163     }
1164
1165     return stringlib_contains_obj(str_obj, sub_obj);
1166 }
1167
1168 static PyObject *
1169 string_item(PyStringObject *a, register Py_ssize_t i)
1170 {
1171     char pchar;
1172     PyObject *v;
1173     if (i < 0 || i >= Py_SIZE(a)) {
1174         PyErr_SetString(PyExc_IndexError, "string index out of range");
1175         return NULL;
1176     }
1177     pchar = a->ob_sval[i];
1178     v = (PyObject *)characters[pchar & UCHAR_MAX];
1179     if (v == NULL)
1180         v = PyString_FromStringAndSize(&pchar, 1);
1181     else {
1182 #ifdef COUNT_ALLOCS
1183         one_strings++;
1184 #endif
1185         Py_INCREF(v);
1186     }
1187     return v;
1188 }
1189
1190 static PyObject*
1191 string_richcompare(PyStringObject *a, PyStringObject *b, int op)
1192 {
1193     int c;
1194     Py_ssize_t len_a, len_b;
1195     Py_ssize_t min_len;
1196     PyObject *result;
1197
1198     /* Make sure both arguments are strings. */
1199     if (!(PyString_Check(a) && PyString_Check(b))) {
1200         result = Py_NotImplemented;
1201         goto out;
1202     }
1203     if (a == b) {
1204         switch (op) {
1205         case Py_EQ:case Py_LE:case Py_GE:
1206             result = Py_True;
1207             goto out;
1208         case Py_NE:case Py_LT:case Py_GT:
1209             result = Py_False;
1210             goto out;
1211         }
1212     }
1213     if (op == Py_EQ) {
1214         /* Supporting Py_NE here as well does not save
1215            much time, since Py_NE is rarely used.  */
1216         if (Py_SIZE(a) == Py_SIZE(b)
1217             && (a->ob_sval[0] == b->ob_sval[0]
1218             && memcmp(a->ob_sval, b->ob_sval, Py_SIZE(a)) == 0)) {
1219             result = Py_True;
1220         } else {
1221             result = Py_False;
1222         }
1223         goto out;
1224     }
1225     len_a = Py_SIZE(a); len_b = Py_SIZE(b);
1226     min_len = (len_a < len_b) ? len_a : len_b;
1227     if (min_len > 0) {
1228         c = Py_CHARMASK(*a->ob_sval) - Py_CHARMASK(*b->ob_sval);
1229         if (c==0)
1230             c = memcmp(a->ob_sval, b->ob_sval, min_len);
1231     } else
1232         c = 0;
1233     if (c == 0)
1234         c = (len_a < len_b) ? -1 : (len_a > len_b) ? 1 : 0;
1235     switch (op) {
1236     case Py_LT: c = c <  0; break;
1237     case Py_LE: c = c <= 0; break;
1238     case Py_EQ: assert(0);  break; /* unreachable */
1239     case Py_NE: c = c != 0; break;
1240     case Py_GT: c = c >  0; break;
1241     case Py_GE: c = c >= 0; break;
1242     default:
1243         result = Py_NotImplemented;
1244         goto out;
1245     }
1246     result = c ? Py_True : Py_False;
1247   out:
1248     Py_INCREF(result);
1249     return result;
1250 }
1251
1252 int
1253 _PyString_Eq(PyObject *o1, PyObject *o2)
1254 {
1255     PyStringObject *a = (PyStringObject*) o1;
1256     PyStringObject *b = (PyStringObject*) o2;
1257     return Py_SIZE(a) == Py_SIZE(b)
1258       && *a->ob_sval == *b->ob_sval
1259       && memcmp(a->ob_sval, b->ob_sval, Py_SIZE(a)) == 0;
1260 }
1261
1262 static long
1263 string_hash(PyStringObject *a)
1264 {
1265     register Py_ssize_t len;
1266     register unsigned char *p;
1267     register long x;
1268
1269     if (a->ob_shash != -1)
1270         return a->ob_shash;
1271     len = Py_SIZE(a);
1272     p = (unsigned char *) a->ob_sval;
1273     x = *p << 7;
1274     while (--len >= 0)
1275         x = (1000003*x) ^ *p++;
1276     x ^= Py_SIZE(a);
1277     if (x == -1)
1278         x = -2;
1279     a->ob_shash = x;
1280     return x;
1281 }
1282
1283 static PyObject*
1284 string_subscript(PyStringObject* self, PyObject* item)
1285 {
1286     if (PyIndex_Check(item)) {
1287         Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
1288         if (i == -1 && PyErr_Occurred())
1289             return NULL;
1290         if (i < 0)
1291             i += PyString_GET_SIZE(self);
1292         return string_item(self, i);
1293     }
1294     else if (PySlice_Check(item)) {
1295         Py_ssize_t start, stop, step, slicelength, cur, i;
1296         char* source_buf;
1297         char* result_buf;
1298         PyObject* result;
1299
1300         if (PySlice_GetIndicesEx((PySliceObject*)item,
1301                          PyString_GET_SIZE(self),
1302                          &start, &stop, &step, &slicelength) < 0) {
1303             return NULL;
1304         }
1305
1306         if (slicelength <= 0) {
1307             return PyString_FromStringAndSize("", 0);
1308         }
1309         else if (start == 0 && step == 1 &&
1310                  slicelength == PyString_GET_SIZE(self) &&
1311                  PyString_CheckExact(self)) {
1312             Py_INCREF(self);
1313             return (PyObject *)self;
1314         }
1315         else if (step == 1) {
1316             return PyString_FromStringAndSize(
1317                 PyString_AS_STRING(self) + start,
1318                 slicelength);
1319         }
1320         else {
1321             source_buf = PyString_AsString((PyObject*)self);
1322             result_buf = (char *)PyMem_Malloc(slicelength);
1323             if (result_buf == NULL)
1324                 return PyErr_NoMemory();
1325
1326             for (cur = start, i = 0; i < slicelength;
1327                  cur += step, i++) {
1328                 result_buf[i] = source_buf[cur];
1329             }
1330
1331             result = PyString_FromStringAndSize(result_buf,
1332                                                 slicelength);
1333             PyMem_Free(result_buf);
1334             return result;
1335         }
1336     }
1337     else {
1338         PyErr_Format(PyExc_TypeError,
1339                      "string indices must be integers, not %.200s",
1340                      Py_TYPE(item)->tp_name);
1341         return NULL;
1342     }
1343 }
1344
1345 static Py_ssize_t
1346 string_buffer_getreadbuf(PyStringObject *self, Py_ssize_t index, const void **ptr)
1347 {
1348     if ( index != 0 ) {
1349         PyErr_SetString(PyExc_SystemError,
1350                         "accessing non-existent string segment");
1351         return -1;
1352     }
1353     *ptr = (void *)self->ob_sval;
1354     return Py_SIZE(self);
1355 }
1356
1357 static Py_ssize_t
1358 string_buffer_getwritebuf(PyStringObject *self, Py_ssize_t index, const void **ptr)
1359 {
1360     PyErr_SetString(PyExc_TypeError,
1361                     "Cannot use string as modifiable buffer");
1362     return -1;
1363 }
1364
1365 static Py_ssize_t
1366 string_buffer_getsegcount(PyStringObject *self, Py_ssize_t *lenp)
1367 {
1368     if ( lenp )
1369         *lenp = Py_SIZE(self);
1370     return 1;
1371 }
1372
1373 static Py_ssize_t
1374 string_buffer_getcharbuf(PyStringObject *self, Py_ssize_t index, const char **ptr)
1375 {
1376     if ( index != 0 ) {
1377         PyErr_SetString(PyExc_SystemError,
1378                         "accessing non-existent string segment");
1379         return -1;
1380     }
1381     *ptr = self->ob_sval;
1382     return Py_SIZE(self);
1383 }
1384
1385 static int
1386 string_buffer_getbuffer(PyStringObject *self, Py_buffer *view, int flags)
1387 {
1388     return PyBuffer_FillInfo(view, (PyObject*)self,
1389                              (void *)self->ob_sval, Py_SIZE(self),
1390                              1, flags);
1391 }
1392
1393 static PySequenceMethods string_as_sequence = {
1394     (lenfunc)string_length, /*sq_length*/
1395     (binaryfunc)string_concat, /*sq_concat*/
1396     (ssizeargfunc)string_repeat, /*sq_repeat*/
1397     (ssizeargfunc)string_item, /*sq_item*/
1398     (ssizessizeargfunc)string_slice, /*sq_slice*/
1399     0,                  /*sq_ass_item*/
1400     0,                  /*sq_ass_slice*/
1401     (objobjproc)string_contains /*sq_contains*/
1402 };
1403
1404 static PyMappingMethods string_as_mapping = {
1405     (lenfunc)string_length,
1406     (binaryfunc)string_subscript,
1407     0,
1408 };
1409
1410 static PyBufferProcs string_as_buffer = {
1411     (readbufferproc)string_buffer_getreadbuf,
1412     (writebufferproc)string_buffer_getwritebuf,
1413     (segcountproc)string_buffer_getsegcount,
1414     (charbufferproc)string_buffer_getcharbuf,
1415     (getbufferproc)string_buffer_getbuffer,
1416     0, /* XXX */
1417 };
1418
1419
1420
1421 #define LEFTSTRIP 0
1422 #define RIGHTSTRIP 1
1423 #define BOTHSTRIP 2
1424
1425 /* Arrays indexed by above */
1426 static const char *stripformat[] = {"|O:lstrip", "|O:rstrip", "|O:strip"};
1427
1428 #define STRIPNAME(i) (stripformat[i]+3)
1429
1430 PyDoc_STRVAR(split__doc__,
1431 "S.split([sep [,maxsplit]]) -> list of strings\n\
1432 \n\
1433 Return a list of the words in the string S, using sep as the\n\
1434 delimiter string.  If maxsplit is given, at most maxsplit\n\
1435 splits are done. If sep is not specified or is None, any\n\
1436 whitespace string is a separator and empty strings are removed\n\
1437 from the result.");
1438
1439 static PyObject *
1440 string_split(PyStringObject *self, PyObject *args)
1441 {
1442     Py_ssize_t len = PyString_GET_SIZE(self), n;
1443     Py_ssize_t maxsplit = -1;
1444     const char *s = PyString_AS_STRING(self), *sub;
1445     PyObject *subobj = Py_None;
1446
1447     if (!PyArg_ParseTuple(args, "|On:split", &subobj, &maxsplit))
1448         return NULL;
1449     if (maxsplit < 0)
1450         maxsplit = PY_SSIZE_T_MAX;
1451     if (subobj == Py_None)
1452         return stringlib_split_whitespace((PyObject*) self, s, len, maxsplit);
1453     if (PyString_Check(subobj)) {
1454         sub = PyString_AS_STRING(subobj);
1455         n = PyString_GET_SIZE(subobj);
1456     }
1457 #ifdef Py_USING_UNICODE
1458     else if (PyUnicode_Check(subobj))
1459         return PyUnicode_Split((PyObject *)self, subobj, maxsplit);
1460 #endif
1461     else if (PyObject_AsCharBuffer(subobj, &sub, &n))
1462         return NULL;
1463
1464     return stringlib_split((PyObject*) self, s, len, sub, n, maxsplit);
1465 }
1466
1467 PyDoc_STRVAR(partition__doc__,
1468 "S.partition(sep) -> (head, sep, tail)\n\
1469 \n\
1470 Search for the separator sep in S, and return the part before it,\n\
1471 the separator itself, and the part after it.  If the separator is not\n\
1472 found, return S and two empty strings.");
1473
1474 static PyObject *
1475 string_partition(PyStringObject *self, PyObject *sep_obj)
1476 {
1477     const char *sep;
1478     Py_ssize_t sep_len;
1479
1480     if (PyString_Check(sep_obj)) {
1481         sep = PyString_AS_STRING(sep_obj);
1482         sep_len = PyString_GET_SIZE(sep_obj);
1483     }
1484 #ifdef Py_USING_UNICODE
1485     else if (PyUnicode_Check(sep_obj))
1486         return PyUnicode_Partition((PyObject *) self, sep_obj);
1487 #endif
1488     else if (PyObject_AsCharBuffer(sep_obj, &sep, &sep_len))
1489         return NULL;
1490
1491     return stringlib_partition(
1492         (PyObject*) self,
1493         PyString_AS_STRING(self), PyString_GET_SIZE(self),
1494         sep_obj, sep, sep_len
1495         );
1496 }
1497
1498 PyDoc_STRVAR(rpartition__doc__,
1499 "S.rpartition(sep) -> (head, sep, tail)\n\
1500 \n\
1501 Search for the separator sep in S, starting at the end of S, and return\n\
1502 the part before it, the separator itself, and the part after it.  If the\n\
1503 separator is not found, return two empty strings and S.");
1504
1505 static PyObject *
1506 string_rpartition(PyStringObject *self, PyObject *sep_obj)
1507 {
1508     const char *sep;
1509     Py_ssize_t sep_len;
1510
1511     if (PyString_Check(sep_obj)) {
1512         sep = PyString_AS_STRING(sep_obj);
1513         sep_len = PyString_GET_SIZE(sep_obj);
1514     }
1515 #ifdef Py_USING_UNICODE
1516     else if (PyUnicode_Check(sep_obj))
1517         return PyUnicode_RPartition((PyObject *) self, sep_obj);
1518 #endif
1519     else if (PyObject_AsCharBuffer(sep_obj, &sep, &sep_len))
1520         return NULL;
1521
1522     return stringlib_rpartition(
1523         (PyObject*) self,
1524         PyString_AS_STRING(self), PyString_GET_SIZE(self),
1525         sep_obj, sep, sep_len
1526         );
1527 }
1528
1529 PyDoc_STRVAR(rsplit__doc__,
1530 "S.rsplit([sep [,maxsplit]]) -> list of strings\n\
1531 \n\
1532 Return a list of the words in the string S, using sep as the\n\
1533 delimiter string, starting at the end of the string and working\n\
1534 to the front.  If maxsplit is given, at most maxsplit splits are\n\
1535 done. If sep is not specified or is None, any whitespace string\n\
1536 is a separator.");
1537
1538 static PyObject *
1539 string_rsplit(PyStringObject *self, PyObject *args)
1540 {
1541     Py_ssize_t len = PyString_GET_SIZE(self), n;
1542     Py_ssize_t maxsplit = -1;
1543     const char *s = PyString_AS_STRING(self), *sub;
1544     PyObject *subobj = Py_None;
1545
1546     if (!PyArg_ParseTuple(args, "|On:rsplit", &subobj, &maxsplit))
1547         return NULL;
1548     if (maxsplit < 0)
1549         maxsplit = PY_SSIZE_T_MAX;
1550     if (subobj == Py_None)
1551         return stringlib_rsplit_whitespace((PyObject*) self, s, len, maxsplit);
1552     if (PyString_Check(subobj)) {
1553         sub = PyString_AS_STRING(subobj);
1554         n = PyString_GET_SIZE(subobj);
1555     }
1556 #ifdef Py_USING_UNICODE
1557     else if (PyUnicode_Check(subobj))
1558         return PyUnicode_RSplit((PyObject *)self, subobj, maxsplit);
1559 #endif
1560     else if (PyObject_AsCharBuffer(subobj, &sub, &n))
1561         return NULL;
1562
1563     return stringlib_rsplit((PyObject*) self, s, len, sub, n, maxsplit);
1564 }
1565
1566
1567 PyDoc_STRVAR(join__doc__,
1568 "S.join(iterable) -> string\n\
1569 \n\
1570 Return a string which is the concatenation of the strings in the\n\
1571 iterable.  The separator between elements is S.");
1572
1573 static PyObject *
1574 string_join(PyStringObject *self, PyObject *orig)
1575 {
1576     char *sep = PyString_AS_STRING(self);
1577     const Py_ssize_t seplen = PyString_GET_SIZE(self);
1578     PyObject *res = NULL;
1579     char *p;
1580     Py_ssize_t seqlen = 0;
1581     size_t sz = 0;
1582     Py_ssize_t i;
1583     PyObject *seq, *item;
1584
1585     seq = PySequence_Fast(orig, "");
1586     if (seq == NULL) {
1587         return NULL;
1588     }
1589
1590     seqlen = PySequence_Size(seq);
1591     if (seqlen == 0) {
1592         Py_DECREF(seq);
1593         return PyString_FromString("");
1594     }
1595     if (seqlen == 1) {
1596         item = PySequence_Fast_GET_ITEM(seq, 0);
1597         if (PyString_CheckExact(item) || PyUnicode_CheckExact(item)) {
1598             Py_INCREF(item);
1599             Py_DECREF(seq);
1600             return item;
1601         }
1602     }
1603
1604     /* There are at least two things to join, or else we have a subclass
1605      * of the builtin types in the sequence.
1606      * Do a pre-pass to figure out the total amount of space we'll
1607      * need (sz), see whether any argument is absurd, and defer to
1608      * the Unicode join if appropriate.
1609      */
1610     for (i = 0; i < seqlen; i++) {
1611         const size_t old_sz = sz;
1612         item = PySequence_Fast_GET_ITEM(seq, i);
1613         if (!PyString_Check(item)){
1614 #ifdef Py_USING_UNICODE
1615             if (PyUnicode_Check(item)) {
1616                 /* Defer to Unicode join.
1617                  * CAUTION:  There's no gurantee that the
1618                  * original sequence can be iterated over
1619                  * again, so we must pass seq here.
1620                  */
1621                 PyObject *result;
1622                 result = PyUnicode_Join((PyObject *)self, seq);
1623                 Py_DECREF(seq);
1624                 return result;
1625             }
1626 #endif
1627             PyErr_Format(PyExc_TypeError,
1628                          "sequence item %zd: expected string,"
1629                          " %.80s found",
1630                          i, Py_TYPE(item)->tp_name);
1631             Py_DECREF(seq);
1632             return NULL;
1633         }
1634         sz += PyString_GET_SIZE(item);
1635         if (i != 0)
1636             sz += seplen;
1637         if (sz < old_sz || sz > PY_SSIZE_T_MAX) {
1638             PyErr_SetString(PyExc_OverflowError,
1639                 "join() result is too long for a Python string");
1640             Py_DECREF(seq);
1641             return NULL;
1642         }
1643     }
1644
1645     /* Allocate result space. */
1646     res = PyString_FromStringAndSize((char*)NULL, sz);
1647     if (res == NULL) {
1648         Py_DECREF(seq);
1649         return NULL;
1650     }
1651
1652     /* Catenate everything. */
1653     p = PyString_AS_STRING(res);
1654     for (i = 0; i < seqlen; ++i) {
1655         size_t n;
1656         item = PySequence_Fast_GET_ITEM(seq, i);
1657         n = PyString_GET_SIZE(item);
1658         Py_MEMCPY(p, PyString_AS_STRING(item), n);
1659         p += n;
1660         if (i < seqlen - 1) {
1661             Py_MEMCPY(p, sep, seplen);
1662             p += seplen;
1663         }
1664     }
1665
1666     Py_DECREF(seq);
1667     return res;
1668 }
1669
1670 PyObject *
1671 _PyString_Join(PyObject *sep, PyObject *x)
1672 {
1673     assert(sep != NULL && PyString_Check(sep));
1674     assert(x != NULL);
1675     return string_join((PyStringObject *)sep, x);
1676 }
1677
1678 /* helper macro to fixup start/end slice values */
1679 #define ADJUST_INDICES(start, end, len)         \
1680     if (end > len)                          \
1681         end = len;                          \
1682     else if (end < 0) {                     \
1683         end += len;                         \
1684         if (end < 0)                        \
1685         end = 0;                        \
1686     }                                       \
1687     if (start < 0) {                        \
1688         start += len;                       \
1689         if (start < 0)                      \
1690         start = 0;                      \
1691     }
1692
1693 Py_LOCAL_INLINE(Py_ssize_t)
1694 string_find_internal(PyStringObject *self, PyObject *args, int dir)
1695 {
1696     PyObject *subobj;
1697     const char *sub;
1698     Py_ssize_t sub_len;
1699     Py_ssize_t start=0, end=PY_SSIZE_T_MAX;
1700     PyObject *obj_start=Py_None, *obj_end=Py_None;
1701
1702     if (!PyArg_ParseTuple(args, "O|OO:find/rfind/index/rindex", &subobj,
1703         &obj_start, &obj_end))
1704         return -2;
1705     /* To support None in "start" and "end" arguments, meaning
1706        the same as if they were not passed.
1707     */
1708     if (obj_start != Py_None)
1709         if (!_PyEval_SliceIndex(obj_start, &start))
1710         return -2;
1711     if (obj_end != Py_None)
1712         if (!_PyEval_SliceIndex(obj_end, &end))
1713         return -2;
1714
1715     if (PyString_Check(subobj)) {
1716         sub = PyString_AS_STRING(subobj);
1717         sub_len = PyString_GET_SIZE(subobj);
1718     }
1719 #ifdef Py_USING_UNICODE
1720     else if (PyUnicode_Check(subobj))
1721         return PyUnicode_Find(
1722             (PyObject *)self, subobj, start, end, dir);
1723 #endif
1724     else if (PyObject_AsCharBuffer(subobj, &sub, &sub_len))
1725         /* XXX - the "expected a character buffer object" is pretty
1726            confusing for a non-expert.  remap to something else ? */
1727         return -2;
1728
1729     if (dir > 0)
1730         return stringlib_find_slice(
1731             PyString_AS_STRING(self), PyString_GET_SIZE(self),
1732             sub, sub_len, start, end);
1733     else
1734         return stringlib_rfind_slice(
1735             PyString_AS_STRING(self), PyString_GET_SIZE(self),
1736             sub, sub_len, start, end);
1737 }
1738
1739
1740 PyDoc_STRVAR(find__doc__,
1741 "S.find(sub [,start [,end]]) -> int\n\
1742 \n\
1743 Return the lowest index in S where substring sub is found,\n\
1744 such that sub is contained within s[start:end].  Optional\n\
1745 arguments start and end are interpreted as in slice notation.\n\
1746 \n\
1747 Return -1 on failure.");
1748
1749 static PyObject *
1750 string_find(PyStringObject *self, PyObject *args)
1751 {
1752     Py_ssize_t result = string_find_internal(self, args, +1);
1753     if (result == -2)
1754         return NULL;
1755     return PyInt_FromSsize_t(result);
1756 }
1757
1758
1759 PyDoc_STRVAR(index__doc__,
1760 "S.index(sub [,start [,end]]) -> int\n\
1761 \n\
1762 Like S.find() but raise ValueError when the substring is not found.");
1763
1764 static PyObject *
1765 string_index(PyStringObject *self, PyObject *args)
1766 {
1767     Py_ssize_t result = string_find_internal(self, args, +1);
1768     if (result == -2)
1769         return NULL;
1770     if (result == -1) {
1771         PyErr_SetString(PyExc_ValueError,
1772                         "substring not found");
1773         return NULL;
1774     }
1775     return PyInt_FromSsize_t(result);
1776 }
1777
1778
1779 PyDoc_STRVAR(rfind__doc__,
1780 "S.rfind(sub [,start [,end]]) -> int\n\
1781 \n\
1782 Return the highest index in S where substring sub is found,\n\
1783 such that sub is contained within s[start:end].  Optional\n\
1784 arguments start and end are interpreted as in slice notation.\n\
1785 \n\
1786 Return -1 on failure.");
1787
1788 static PyObject *
1789 string_rfind(PyStringObject *self, PyObject *args)
1790 {
1791     Py_ssize_t result = string_find_internal(self, args, -1);
1792     if (result == -2)
1793         return NULL;
1794     return PyInt_FromSsize_t(result);
1795 }
1796
1797
1798 PyDoc_STRVAR(rindex__doc__,
1799 "S.rindex(sub [,start [,end]]) -> int\n\
1800 \n\
1801 Like S.rfind() but raise ValueError when the substring is not found.");
1802
1803 static PyObject *
1804 string_rindex(PyStringObject *self, PyObject *args)
1805 {
1806     Py_ssize_t result = string_find_internal(self, args, -1);
1807     if (result == -2)
1808         return NULL;
1809     if (result == -1) {
1810         PyErr_SetString(PyExc_ValueError,
1811                         "substring not found");
1812         return NULL;
1813     }
1814     return PyInt_FromSsize_t(result);
1815 }
1816
1817
1818 Py_LOCAL_INLINE(PyObject *)
1819 do_xstrip(PyStringObject *self, int striptype, PyObject *sepobj)
1820 {
1821     char *s = PyString_AS_STRING(self);
1822     Py_ssize_t len = PyString_GET_SIZE(self);
1823     char *sep = PyString_AS_STRING(sepobj);
1824     Py_ssize_t seplen = PyString_GET_SIZE(sepobj);
1825     Py_ssize_t i, j;
1826
1827     i = 0;
1828     if (striptype != RIGHTSTRIP) {
1829         while (i < len && memchr(sep, Py_CHARMASK(s[i]), seplen)) {
1830             i++;
1831         }
1832     }
1833
1834     j = len;
1835     if (striptype != LEFTSTRIP) {
1836         do {
1837             j--;
1838         } while (j >= i && memchr(sep, Py_CHARMASK(s[j]), seplen));
1839         j++;
1840     }
1841
1842     if (i == 0 && j == len && PyString_CheckExact(self)) {
1843         Py_INCREF(self);
1844         return (PyObject*)self;
1845     }
1846     else
1847         return PyString_FromStringAndSize(s+i, j-i);
1848 }
1849
1850
1851 Py_LOCAL_INLINE(PyObject *)
1852 do_strip(PyStringObject *self, int striptype)
1853 {
1854     char *s = PyString_AS_STRING(self);
1855     Py_ssize_t len = PyString_GET_SIZE(self), i, j;
1856
1857     i = 0;
1858     if (striptype != RIGHTSTRIP) {
1859         while (i < len && isspace(Py_CHARMASK(s[i]))) {
1860             i++;
1861         }
1862     }
1863
1864     j = len;
1865     if (striptype != LEFTSTRIP) {
1866         do {
1867             j--;
1868         } while (j >= i && isspace(Py_CHARMASK(s[j])));
1869         j++;
1870     }
1871
1872     if (i == 0 && j == len && PyString_CheckExact(self)) {
1873         Py_INCREF(self);
1874         return (PyObject*)self;
1875     }
1876     else
1877         return PyString_FromStringAndSize(s+i, j-i);
1878 }
1879
1880
1881 Py_LOCAL_INLINE(PyObject *)
1882 do_argstrip(PyStringObject *self, int striptype, PyObject *args)
1883 {
1884     PyObject *sep = NULL;
1885
1886     if (!PyArg_ParseTuple(args, (char *)stripformat[striptype], &sep))
1887         return NULL;
1888
1889     if (sep != NULL && sep != Py_None) {
1890         if (PyString_Check(sep))
1891             return do_xstrip(self, striptype, sep);
1892 #ifdef Py_USING_UNICODE
1893         else if (PyUnicode_Check(sep)) {
1894             PyObject *uniself = PyUnicode_FromObject((PyObject *)self);
1895             PyObject *res;
1896             if (uniself==NULL)
1897                 return NULL;
1898             res = _PyUnicode_XStrip((PyUnicodeObject *)uniself,
1899                 striptype, sep);
1900             Py_DECREF(uniself);
1901             return res;
1902         }
1903 #endif
1904         PyErr_Format(PyExc_TypeError,
1905 #ifdef Py_USING_UNICODE
1906                      "%s arg must be None, str or unicode",
1907 #else
1908                      "%s arg must be None or str",
1909 #endif
1910                      STRIPNAME(striptype));
1911         return NULL;
1912     }
1913
1914     return do_strip(self, striptype);
1915 }
1916
1917
1918 PyDoc_STRVAR(strip__doc__,
1919 "S.strip([chars]) -> string or unicode\n\
1920 \n\
1921 Return a copy of the string S with leading and trailing\n\
1922 whitespace removed.\n\
1923 If chars is given and not None, remove characters in chars instead.\n\
1924 If chars is unicode, S will be converted to unicode before stripping");
1925
1926 static PyObject *
1927 string_strip(PyStringObject *self, PyObject *args)
1928 {
1929     if (PyTuple_GET_SIZE(args) == 0)
1930         return do_strip(self, BOTHSTRIP); /* Common case */
1931     else
1932         return do_argstrip(self, BOTHSTRIP, args);
1933 }
1934
1935
1936 PyDoc_STRVAR(lstrip__doc__,
1937 "S.lstrip([chars]) -> string or unicode\n\
1938 \n\
1939 Return a copy of the string S with leading whitespace removed.\n\
1940 If chars is given and not None, remove characters in chars instead.\n\
1941 If chars is unicode, S will be converted to unicode before stripping");
1942
1943 static PyObject *
1944 string_lstrip(PyStringObject *self, PyObject *args)
1945 {
1946     if (PyTuple_GET_SIZE(args) == 0)
1947         return do_strip(self, LEFTSTRIP); /* Common case */
1948     else
1949         return do_argstrip(self, LEFTSTRIP, args);
1950 }
1951
1952
1953 PyDoc_STRVAR(rstrip__doc__,
1954 "S.rstrip([chars]) -> string or unicode\n\
1955 \n\
1956 Return a copy of the string S with trailing whitespace removed.\n\
1957 If chars is given and not None, remove characters in chars instead.\n\
1958 If chars is unicode, S will be converted to unicode before stripping");
1959
1960 static PyObject *
1961 string_rstrip(PyStringObject *self, PyObject *args)
1962 {
1963     if (PyTuple_GET_SIZE(args) == 0)
1964         return do_strip(self, RIGHTSTRIP); /* Common case */
1965     else
1966         return do_argstrip(self, RIGHTSTRIP, args);
1967 }
1968
1969
1970 PyDoc_STRVAR(lower__doc__,
1971 "S.lower() -> string\n\
1972 \n\
1973 Return a copy of the string S converted to lowercase.");
1974
1975 /* _tolower and _toupper are defined by SUSv2, but they're not ISO C */
1976 #ifndef _tolower
1977 #define _tolower tolower
1978 #endif
1979
1980 static PyObject *
1981 string_lower(PyStringObject *self)
1982 {
1983     char *s;
1984     Py_ssize_t i, n = PyString_GET_SIZE(self);
1985     PyObject *newobj;
1986
1987     newobj = PyString_FromStringAndSize(NULL, n);
1988     if (!newobj)
1989         return NULL;
1990
1991     s = PyString_AS_STRING(newobj);
1992
1993     Py_MEMCPY(s, PyString_AS_STRING(self), n);
1994
1995     for (i = 0; i < n; i++) {
1996         int c = Py_CHARMASK(s[i]);
1997         if (isupper(c))
1998             s[i] = _tolower(c);
1999     }
2000
2001     return newobj;
2002 }
2003
2004 PyDoc_STRVAR(upper__doc__,
2005 "S.upper() -> string\n\
2006 \n\
2007 Return a copy of the string S converted to uppercase.");
2008
2009 #ifndef _toupper
2010 #define _toupper toupper
2011 #endif
2012
2013 static PyObject *
2014 string_upper(PyStringObject *self)
2015 {
2016     char *s;
2017     Py_ssize_t i, n = PyString_GET_SIZE(self);
2018     PyObject *newobj;
2019
2020     newobj = PyString_FromStringAndSize(NULL, n);
2021     if (!newobj)
2022         return NULL;
2023
2024     s = PyString_AS_STRING(newobj);
2025
2026     Py_MEMCPY(s, PyString_AS_STRING(self), n);
2027
2028     for (i = 0; i < n; i++) {
2029         int c = Py_CHARMASK(s[i]);
2030         if (islower(c))
2031             s[i] = _toupper(c);
2032     }
2033
2034     return newobj;
2035 }
2036
2037 PyDoc_STRVAR(title__doc__,
2038 "S.title() -> string\n\
2039 \n\
2040 Return a titlecased version of S, i.e. words start with uppercase\n\
2041 characters, all remaining cased characters have lowercase.");
2042
2043 static PyObject*
2044 string_title(PyStringObject *self)
2045 {
2046     char *s = PyString_AS_STRING(self), *s_new;
2047     Py_ssize_t i, n = PyString_GET_SIZE(self);
2048     int previous_is_cased = 0;
2049     PyObject *newobj;
2050
2051     newobj = PyString_FromStringAndSize(NULL, n);
2052     if (newobj == NULL)
2053         return NULL;
2054     s_new = PyString_AsString(newobj);
2055     for (i = 0; i < n; i++) {
2056         int c = Py_CHARMASK(*s++);
2057         if (islower(c)) {
2058             if (!previous_is_cased)
2059                 c = toupper(c);
2060             previous_is_cased = 1;
2061         } else if (isupper(c)) {
2062             if (previous_is_cased)
2063                 c = tolower(c);
2064             previous_is_cased = 1;
2065         } else
2066             previous_is_cased = 0;
2067         *s_new++ = c;
2068     }
2069     return newobj;
2070 }
2071
2072 PyDoc_STRVAR(capitalize__doc__,
2073 "S.capitalize() -> string\n\
2074 \n\
2075 Return a copy of the string S with only its first character\n\
2076 capitalized.");
2077
2078 static PyObject *
2079 string_capitalize(PyStringObject *self)
2080 {
2081     char *s = PyString_AS_STRING(self), *s_new;
2082     Py_ssize_t i, n = PyString_GET_SIZE(self);
2083     PyObject *newobj;
2084
2085     newobj = PyString_FromStringAndSize(NULL, n);
2086     if (newobj == NULL)
2087         return NULL;
2088     s_new = PyString_AsString(newobj);
2089     if (0 < n) {
2090         int c = Py_CHARMASK(*s++);
2091         if (islower(c))
2092             *s_new = toupper(c);
2093         else
2094             *s_new = c;
2095         s_new++;
2096     }
2097     for (i = 1; i < n; i++) {
2098         int c = Py_CHARMASK(*s++);
2099         if (isupper(c))
2100             *s_new = tolower(c);
2101         else
2102             *s_new = c;
2103         s_new++;
2104     }
2105     return newobj;
2106 }
2107
2108
2109 PyDoc_STRVAR(count__doc__,
2110 "S.count(sub[, start[, end]]) -> int\n\
2111 \n\
2112 Return the number of non-overlapping occurrences of substring sub in\n\
2113 string S[start:end].  Optional arguments start and end are interpreted\n\
2114 as in slice notation.");
2115
2116 static PyObject *
2117 string_count(PyStringObject *self, PyObject *args)
2118 {
2119     PyObject *sub_obj;
2120     const char *str = PyString_AS_STRING(self), *sub;
2121     Py_ssize_t sub_len;
2122     Py_ssize_t start = 0, end = PY_SSIZE_T_MAX;
2123
2124     if (!PyArg_ParseTuple(args, "O|O&O&:count", &sub_obj,
2125         _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
2126         return NULL;
2127
2128     if (PyString_Check(sub_obj)) {
2129         sub = PyString_AS_STRING(sub_obj);
2130         sub_len = PyString_GET_SIZE(sub_obj);
2131     }
2132 #ifdef Py_USING_UNICODE
2133     else if (PyUnicode_Check(sub_obj)) {
2134         Py_ssize_t count;
2135         count = PyUnicode_Count((PyObject *)self, sub_obj, start, end);
2136         if (count == -1)
2137             return NULL;
2138         else
2139             return PyInt_FromSsize_t(count);
2140     }
2141 #endif
2142     else if (PyObject_AsCharBuffer(sub_obj, &sub, &sub_len))
2143         return NULL;
2144
2145     ADJUST_INDICES(start, end, PyString_GET_SIZE(self));
2146
2147     return PyInt_FromSsize_t(
2148         stringlib_count(str + start, end - start, sub, sub_len, PY_SSIZE_T_MAX)
2149         );
2150 }
2151
2152 PyDoc_STRVAR(swapcase__doc__,
2153 "S.swapcase() -> string\n\
2154 \n\
2155 Return a copy of the string S with uppercase characters\n\
2156 converted to lowercase and vice versa.");
2157
2158 static PyObject *
2159 string_swapcase(PyStringObject *self)
2160 {
2161     char *s = PyString_AS_STRING(self), *s_new;
2162     Py_ssize_t i, n = PyString_GET_SIZE(self);
2163     PyObject *newobj;
2164
2165     newobj = PyString_FromStringAndSize(NULL, n);
2166     if (newobj == NULL)
2167         return NULL;
2168     s_new = PyString_AsString(newobj);
2169     for (i = 0; i < n; i++) {
2170         int c = Py_CHARMASK(*s++);
2171         if (islower(c)) {
2172             *s_new = toupper(c);
2173         }
2174         else if (isupper(c)) {
2175             *s_new = tolower(c);
2176         }
2177         else
2178             *s_new = c;
2179         s_new++;
2180     }
2181     return newobj;
2182 }
2183
2184
2185 PyDoc_STRVAR(translate__doc__,
2186 "S.translate(table [,deletechars]) -> string\n\
2187 \n\
2188 Return a copy of the string S, where all characters occurring\n\
2189 in the optional argument deletechars are removed, and the\n\
2190 remaining characters have been mapped through the given\n\
2191 translation table, which must be a string of length 256.");
2192
2193 static PyObject *
2194 string_translate(PyStringObject *self, PyObject *args)
2195 {
2196     register char *input, *output;
2197     const char *table;
2198     register Py_ssize_t i, c, changed = 0;
2199     PyObject *input_obj = (PyObject*)self;
2200     const char *output_start, *del_table=NULL;
2201     Py_ssize_t inlen, tablen, dellen = 0;
2202     PyObject *result;
2203     int trans_table[256];
2204     PyObject *tableobj, *delobj = NULL;
2205
2206     if (!PyArg_UnpackTuple(args, "translate", 1, 2,
2207                           &tableobj, &delobj))
2208         return NULL;
2209
2210     if (PyString_Check(tableobj)) {
2211         table = PyString_AS_STRING(tableobj);
2212         tablen = PyString_GET_SIZE(tableobj);
2213     }
2214     else if (tableobj == Py_None) {
2215         table = NULL;
2216         tablen = 256;
2217     }
2218 #ifdef Py_USING_UNICODE
2219     else if (PyUnicode_Check(tableobj)) {
2220         /* Unicode .translate() does not support the deletechars
2221            parameter; instead a mapping to None will cause characters
2222            to be deleted. */
2223         if (delobj != NULL) {
2224             PyErr_SetString(PyExc_TypeError,
2225             "deletions are implemented differently for unicode");
2226             return NULL;
2227         }
2228         return PyUnicode_Translate((PyObject *)self, tableobj, NULL);
2229     }
2230 #endif
2231     else if (PyObject_AsCharBuffer(tableobj, &table, &tablen))
2232         return NULL;
2233
2234     if (tablen != 256) {
2235         PyErr_SetString(PyExc_ValueError,
2236           "translation table must be 256 characters long");
2237         return NULL;
2238     }
2239
2240     if (delobj != NULL) {
2241         if (PyString_Check(delobj)) {
2242             del_table = PyString_AS_STRING(delobj);
2243             dellen = PyString_GET_SIZE(delobj);
2244         }
2245 #ifdef Py_USING_UNICODE
2246         else if (PyUnicode_Check(delobj)) {
2247             PyErr_SetString(PyExc_TypeError,
2248             "deletions are implemented differently for unicode");
2249             return NULL;
2250         }
2251 #endif
2252         else if (PyObject_AsCharBuffer(delobj, &del_table, &dellen))
2253             return NULL;
2254     }
2255     else {
2256         del_table = NULL;
2257         dellen = 0;
2258     }
2259
2260     inlen = PyString_GET_SIZE(input_obj);
2261     result = PyString_FromStringAndSize((char *)NULL, inlen);
2262     if (result == NULL)
2263         return NULL;
2264     output_start = output = PyString_AsString(result);
2265     input = PyString_AS_STRING(input_obj);
2266
2267     if (dellen == 0 && table != NULL) {
2268         /* If no deletions are required, use faster code */
2269         for (i = inlen; --i >= 0; ) {
2270             c = Py_CHARMASK(*input++);
2271             if (Py_CHARMASK((*output++ = table[c])) != c)
2272                 changed = 1;
2273         }
2274         if (changed || !PyString_CheckExact(input_obj))
2275             return result;
2276         Py_DECREF(result);
2277         Py_INCREF(input_obj);
2278         return input_obj;
2279     }
2280
2281     if (table == NULL) {
2282         for (i = 0; i < 256; i++)
2283             trans_table[i] = Py_CHARMASK(i);
2284     } else {
2285         for (i = 0; i < 256; i++)
2286             trans_table[i] = Py_CHARMASK(table[i]);
2287     }
2288
2289     for (i = 0; i < dellen; i++)
2290         trans_table[(int) Py_CHARMASK(del_table[i])] = -1;
2291
2292     for (i = inlen; --i >= 0; ) {
2293         c = Py_CHARMASK(*input++);
2294         if (trans_table[c] != -1)
2295             if (Py_CHARMASK(*output++ = (char)trans_table[c]) == c)
2296                 continue;
2297         changed = 1;
2298     }
2299     if (!changed && PyString_CheckExact(input_obj)) {
2300         Py_DECREF(result);
2301         Py_INCREF(input_obj);
2302         return input_obj;
2303     }
2304     /* Fix the size of the resulting string */
2305     if (inlen > 0 && _PyString_Resize(&result, output - output_start))
2306         return NULL;
2307     return result;
2308 }
2309
2310
2311 /* find and count characters and substrings */
2312
2313 #define findchar(target, target_len, c)                         \
2314   ((char *)memchr((const void *)(target), c, target_len))
2315
2316 /* String ops must return a string.  */
2317 /* If the object is subclass of string, create a copy */
2318 Py_LOCAL(PyStringObject *)
2319 return_self(PyStringObject *self)
2320 {
2321     if (PyString_CheckExact(self)) {
2322         Py_INCREF(self);
2323         return self;
2324     }
2325     return (PyStringObject *)PyString_FromStringAndSize(
2326         PyString_AS_STRING(self),
2327         PyString_GET_SIZE(self));
2328 }
2329
2330 Py_LOCAL_INLINE(Py_ssize_t)
2331 countchar(const char *target, int target_len, char c, Py_ssize_t maxcount)
2332 {
2333     Py_ssize_t count=0;
2334     const char *start=target;
2335     const char *end=target+target_len;
2336
2337     while ( (start=findchar(start, end-start, c)) != NULL ) {
2338         count++;
2339         if (count >= maxcount)
2340             break;
2341         start += 1;
2342     }
2343     return count;
2344 }
2345
2346
2347 /* Algorithms for different cases of string replacement */
2348
2349 /* len(self)>=1, from="", len(to)>=1, maxcount>=1 */
2350 Py_LOCAL(PyStringObject *)
2351 replace_interleave(PyStringObject *self,
2352                    const char *to_s, Py_ssize_t to_len,
2353                    Py_ssize_t maxcount)
2354 {
2355     char *self_s, *result_s;
2356     Py_ssize_t self_len, result_len;
2357     Py_ssize_t count, i, product;
2358     PyStringObject *result;
2359
2360     self_len = PyString_GET_SIZE(self);
2361
2362     /* 1 at the end plus 1 after every character */
2363     count = self_len+1;
2364     if (maxcount < count)
2365         count = maxcount;
2366
2367     /* Check for overflow */
2368     /*   result_len = count * to_len + self_len; */
2369     product = count * to_len;
2370     if (product / to_len != count) {
2371         PyErr_SetString(PyExc_OverflowError,
2372                         "replace string is too long");
2373         return NULL;
2374     }
2375     result_len = product + self_len;
2376     if (result_len < 0) {
2377         PyErr_SetString(PyExc_OverflowError,
2378                         "replace string is too long");
2379         return NULL;
2380     }
2381
2382     if (! (result = (PyStringObject *)
2383                      PyString_FromStringAndSize(NULL, result_len)) )
2384         return NULL;
2385
2386     self_s = PyString_AS_STRING(self);
2387     result_s = PyString_AS_STRING(result);
2388
2389     /* TODO: special case single character, which doesn't need memcpy */
2390
2391     /* Lay the first one down (guaranteed this will occur) */
2392     Py_MEMCPY(result_s, to_s, to_len);
2393     result_s += to_len;
2394     count -= 1;
2395
2396     for (i=0; i<count; i++) {
2397         *result_s++ = *self_s++;
2398         Py_MEMCPY(result_s, to_s, to_len);
2399         result_s += to_len;
2400     }
2401
2402     /* Copy the rest of the original string */
2403     Py_MEMCPY(result_s, self_s, self_len-i);
2404
2405     return result;
2406 }
2407
2408 /* Special case for deleting a single character */
2409 /* len(self)>=1, len(from)==1, to="", maxcount>=1 */
2410 Py_LOCAL(PyStringObject *)
2411 replace_delete_single_character(PyStringObject *self,
2412                                 char from_c, Py_ssize_t maxcount)
2413 {
2414     char *self_s, *result_s;
2415     char *start, *next, *end;
2416     Py_ssize_t self_len, result_len;
2417     Py_ssize_t count;
2418     PyStringObject *result;
2419
2420     self_len = PyString_GET_SIZE(self);
2421     self_s = PyString_AS_STRING(self);
2422
2423     count = countchar(self_s, self_len, from_c, maxcount);
2424     if (count == 0) {
2425         return return_self(self);
2426     }
2427
2428     result_len = self_len - count;  /* from_len == 1 */
2429     assert(result_len>=0);
2430
2431     if ( (result = (PyStringObject *)
2432                     PyString_FromStringAndSize(NULL, result_len)) == NULL)
2433         return NULL;
2434     result_s = PyString_AS_STRING(result);
2435
2436     start = self_s;
2437     end = self_s + self_len;
2438     while (count-- > 0) {
2439         next = findchar(start, end-start, from_c);
2440         if (next == NULL)
2441             break;
2442         Py_MEMCPY(result_s, start, next-start);
2443         result_s += (next-start);
2444         start = next+1;
2445     }
2446     Py_MEMCPY(result_s, start, end-start);
2447
2448     return result;
2449 }
2450
2451 /* len(self)>=1, len(from)>=2, to="", maxcount>=1 */
2452
2453 Py_LOCAL(PyStringObject *)
2454 replace_delete_substring(PyStringObject *self,
2455                          const char *from_s, Py_ssize_t from_len,
2456                          Py_ssize_t maxcount) {
2457     char *self_s, *result_s;
2458     char *start, *next, *end;
2459     Py_ssize_t self_len, result_len;
2460     Py_ssize_t count, offset;
2461     PyStringObject *result;
2462
2463     self_len = PyString_GET_SIZE(self);
2464     self_s = PyString_AS_STRING(self);
2465
2466     count = stringlib_count(self_s, self_len,
2467                             from_s, from_len,
2468                             maxcount);
2469
2470     if (count == 0) {
2471         /* no matches */
2472         return return_self(self);
2473     }
2474
2475     result_len = self_len - (count * from_len);
2476     assert (result_len>=0);
2477
2478     if ( (result = (PyStringObject *)
2479           PyString_FromStringAndSize(NULL, result_len)) == NULL )
2480         return NULL;
2481
2482     result_s = PyString_AS_STRING(result);
2483
2484     start = self_s;
2485     end = self_s + self_len;
2486     while (count-- > 0) {
2487         offset = stringlib_find(start, end-start,
2488                                 from_s, from_len,
2489                                 0);
2490         if (offset == -1)
2491             break;
2492         next = start + offset;
2493
2494         Py_MEMCPY(result_s, start, next-start);
2495
2496         result_s += (next-start);
2497         start = next+from_len;
2498     }
2499     Py_MEMCPY(result_s, start, end-start);
2500     return result;
2501 }
2502
2503 /* len(self)>=1, len(from)==len(to)==1, maxcount>=1 */
2504 Py_LOCAL(PyStringObject *)
2505 replace_single_character_in_place(PyStringObject *self,
2506                                   char from_c, char to_c,
2507                                   Py_ssize_t maxcount)
2508 {
2509     char *self_s, *result_s, *start, *end, *next;
2510     Py_ssize_t self_len;
2511     PyStringObject *result;
2512
2513     /* The result string will be the same size */
2514     self_s = PyString_AS_STRING(self);
2515     self_len = PyString_GET_SIZE(self);
2516
2517     next = findchar(self_s, self_len, from_c);
2518
2519     if (next == NULL) {
2520         /* No matches; return the original string */
2521         return return_self(self);
2522     }
2523
2524     /* Need to make a new string */
2525     result = (PyStringObject *) PyString_FromStringAndSize(NULL, self_len);
2526     if (result == NULL)
2527         return NULL;
2528     result_s = PyString_AS_STRING(result);
2529     Py_MEMCPY(result_s, self_s, self_len);
2530
2531     /* change everything in-place, starting with this one */
2532     start =  result_s + (next-self_s);
2533     *start = to_c;
2534     start++;
2535     end = result_s + self_len;
2536
2537     while (--maxcount > 0) {
2538         next = findchar(start, end-start, from_c);
2539         if (next == NULL)
2540             break;
2541         *next = to_c;
2542         start = next+1;
2543     }
2544
2545     return result;
2546 }
2547
2548 /* len(self)>=1, len(from)==len(to)>=2, maxcount>=1 */
2549 Py_LOCAL(PyStringObject *)
2550 replace_substring_in_place(PyStringObject *self,
2551                            const char *from_s, Py_ssize_t from_len,
2552                            const char *to_s, Py_ssize_t to_len,
2553                            Py_ssize_t maxcount)
2554 {
2555     char *result_s, *start, *end;
2556     char *self_s;
2557     Py_ssize_t self_len, offset;
2558     PyStringObject *result;
2559
2560     /* The result string will be the same size */
2561
2562     self_s = PyString_AS_STRING(self);
2563     self_len = PyString_GET_SIZE(self);
2564
2565     offset = stringlib_find(self_s, self_len,
2566                             from_s, from_len,
2567                             0);
2568     if (offset == -1) {
2569         /* No matches; return the original string */
2570         return return_self(self);
2571     }
2572
2573     /* Need to make a new string */
2574     result = (PyStringObject *) PyString_FromStringAndSize(NULL, self_len);
2575     if (result == NULL)
2576         return NULL;
2577     result_s = PyString_AS_STRING(result);
2578     Py_MEMCPY(result_s, self_s, self_len);
2579
2580     /* change everything in-place, starting with this one */
2581     start =  result_s + offset;
2582     Py_MEMCPY(start, to_s, from_len);
2583     start += from_len;
2584     end = result_s + self_len;
2585
2586     while ( --maxcount > 0) {
2587         offset = stringlib_find(start, end-start,
2588                                 from_s, from_len,
2589                                 0);
2590         if (offset==-1)
2591             break;
2592         Py_MEMCPY(start+offset, to_s, from_len);
2593         start += offset+from_len;
2594     }
2595
2596     return result;
2597 }
2598
2599 /* len(self)>=1, len(from)==1, len(to)>=2, maxcount>=1 */
2600 Py_LOCAL(PyStringObject *)
2601 replace_single_character(PyStringObject *self,
2602                          char from_c,
2603                          const char *to_s, Py_ssize_t to_len,
2604                          Py_ssize_t maxcount)
2605 {
2606     char *self_s, *result_s;
2607     char *start, *next, *end;
2608     Py_ssize_t self_len, result_len;
2609     Py_ssize_t count, product;
2610     PyStringObject *result;
2611
2612     self_s = PyString_AS_STRING(self);
2613     self_len = PyString_GET_SIZE(self);
2614
2615     count = countchar(self_s, self_len, from_c, maxcount);
2616     if (count == 0) {
2617         /* no matches, return unchanged */
2618         return return_self(self);
2619     }
2620
2621     /* use the difference between current and new, hence the "-1" */
2622     /*   result_len = self_len + count * (to_len-1)  */
2623     product = count * (to_len-1);
2624     if (product / (to_len-1) != count) {
2625         PyErr_SetString(PyExc_OverflowError, "replace string is too long");
2626         return NULL;
2627     }
2628     result_len = self_len + product;
2629     if (result_len < 0) {
2630         PyErr_SetString(PyExc_OverflowError, "replace string is too long");
2631         return NULL;
2632     }
2633
2634     if ( (result = (PyStringObject *)
2635           PyString_FromStringAndSize(NULL, result_len)) == NULL)
2636         return NULL;
2637     result_s = PyString_AS_STRING(result);
2638
2639     start = self_s;
2640     end = self_s + self_len;
2641     while (count-- > 0) {
2642         next = findchar(start, end-start, from_c);
2643         if (next == NULL)
2644             break;
2645
2646         if (next == start) {
2647             /* replace with the 'to' */
2648             Py_MEMCPY(result_s, to_s, to_len);
2649             result_s += to_len;
2650             start += 1;
2651         } else {
2652             /* copy the unchanged old then the 'to' */
2653             Py_MEMCPY(result_s, start, next-start);
2654             result_s += (next-start);
2655             Py_MEMCPY(result_s, to_s, to_len);
2656             result_s += to_len;
2657             start = next+1;
2658         }
2659     }
2660     /* Copy the remainder of the remaining string */
2661     Py_MEMCPY(result_s, start, end-start);
2662
2663     return result;
2664 }
2665
2666 /* len(self)>=1, len(from)>=2, len(to)>=2, maxcount>=1 */
2667 Py_LOCAL(PyStringObject *)
2668 replace_substring(PyStringObject *self,
2669                   const char *from_s, Py_ssize_t from_len,
2670                   const char *to_s, Py_ssize_t to_len,
2671                   Py_ssize_t maxcount) {
2672     char *self_s, *result_s;
2673     char *start, *next, *end;
2674     Py_ssize_t self_len, result_len;
2675     Py_ssize_t count, offset, product;
2676     PyStringObject *result;
2677
2678     self_s = PyString_AS_STRING(self);
2679     self_len = PyString_GET_SIZE(self);
2680
2681     count = stringlib_count(self_s, self_len,
2682                             from_s, from_len,
2683                             maxcount);
2684
2685     if (count == 0) {
2686         /* no matches, return unchanged */
2687         return return_self(self);
2688     }
2689
2690     /* Check for overflow */
2691     /*    result_len = self_len + count * (to_len-from_len) */
2692     product = count * (to_len-from_len);
2693     if (product / (to_len-from_len) != count) {
2694         PyErr_SetString(PyExc_OverflowError, "replace string is too long");
2695         return NULL;
2696     }
2697     result_len = self_len + product;
2698     if (result_len < 0) {
2699         PyErr_SetString(PyExc_OverflowError, "replace string is too long");
2700         return NULL;
2701     }
2702
2703     if ( (result = (PyStringObject *)
2704           PyString_FromStringAndSize(NULL, result_len)) == NULL)
2705         return NULL;
2706     result_s = PyString_AS_STRING(result);
2707
2708     start = self_s;
2709     end = self_s + self_len;
2710     while (count-- > 0) {
2711         offset = stringlib_find(start, end-start,
2712                                 from_s, from_len,
2713                                 0);
2714         if (offset == -1)
2715             break;
2716         next = start+offset;
2717         if (next == start) {
2718             /* replace with the 'to' */
2719             Py_MEMCPY(result_s, to_s, to_len);
2720             result_s += to_len;
2721             start += from_len;
2722         } else {
2723             /* copy the unchanged old then the 'to' */
2724             Py_MEMCPY(result_s, start, next-start);
2725             result_s += (next-start);
2726             Py_MEMCPY(result_s, to_s, to_len);
2727             result_s += to_len;
2728             start = next+from_len;
2729         }
2730     }
2731     /* Copy the remainder of the remaining string */
2732     Py_MEMCPY(result_s, start, end-start);
2733
2734     return result;
2735 }
2736
2737
2738 Py_LOCAL(PyStringObject *)
2739 replace(PyStringObject *self,
2740     const char *from_s, Py_ssize_t from_len,
2741     const char *to_s, Py_ssize_t to_len,
2742     Py_ssize_t maxcount)
2743 {
2744     if (maxcount < 0) {
2745         maxcount = PY_SSIZE_T_MAX;
2746     } else if (maxcount == 0 || PyString_GET_SIZE(self) == 0) {
2747         /* nothing to do; return the original string */
2748         return return_self(self);
2749     }
2750
2751     if (maxcount == 0 ||
2752         (from_len == 0 && to_len == 0)) {
2753         /* nothing to do; return the original string */
2754         return return_self(self);
2755     }
2756
2757     /* Handle zero-length special cases */
2758
2759     if (from_len == 0) {
2760         /* insert the 'to' string everywhere.   */
2761         /*    >>> "Python".replace("", ".")     */
2762         /*    '.P.y.t.h.o.n.'                   */
2763         return replace_interleave(self, to_s, to_len, maxcount);
2764     }
2765
2766     /* Except for "".replace("", "A") == "A" there is no way beyond this */
2767     /* point for an empty self string to generate a non-empty string */
2768     /* Special case so the remaining code always gets a non-empty string */
2769     if (PyString_GET_SIZE(self) == 0) {
2770         return return_self(self);
2771     }
2772
2773     if (to_len == 0) {
2774         /* delete all occurances of 'from' string */
2775         if (from_len == 1) {
2776             return replace_delete_single_character(
2777                 self, from_s[0], maxcount);
2778         } else {
2779             return replace_delete_substring(self, from_s, from_len, maxcount);
2780         }
2781     }
2782
2783     /* Handle special case where both strings have the same length */
2784
2785     if (from_len == to_len) {
2786         if (from_len == 1) {
2787             return replace_single_character_in_place(
2788                 self,
2789                 from_s[0],
2790                 to_s[0],
2791                 maxcount);
2792         } else {
2793             return replace_substring_in_place(
2794                 self, from_s, from_len, to_s, to_len, maxcount);
2795         }
2796     }
2797
2798     /* Otherwise use the more generic algorithms */
2799     if (from_len == 1) {
2800         return replace_single_character(self, from_s[0],
2801                                         to_s, to_len, maxcount);
2802     } else {
2803         /* len('from')>=2, len('to')>=1 */
2804         return replace_substring(self, from_s, from_len, to_s, to_len, maxcount);
2805     }
2806 }
2807
2808 PyDoc_STRVAR(replace__doc__,
2809 "S.replace(old, new[, count]) -> string\n\
2810 \n\
2811 Return a copy of string S with all occurrences of substring\n\
2812 old replaced by new.  If the optional argument count is\n\
2813 given, only the first count occurrences are replaced.");
2814
2815 static PyObject *
2816 string_replace(PyStringObject *self, PyObject *args)
2817 {
2818     Py_ssize_t count = -1;
2819     PyObject *from, *to;
2820     const char *from_s, *to_s;
2821     Py_ssize_t from_len, to_len;
2822
2823     if (!PyArg_ParseTuple(args, "OO|n:replace", &from, &to, &count))
2824         return NULL;
2825
2826     if (PyString_Check(from)) {
2827         from_s = PyString_AS_STRING(from);
2828         from_len = PyString_GET_SIZE(from);
2829     }
2830 #ifdef Py_USING_UNICODE
2831     if (PyUnicode_Check(from))
2832         return PyUnicode_Replace((PyObject *)self,
2833                                  from, to, count);
2834 #endif
2835     else if (PyObject_AsCharBuffer(from, &from_s, &from_len))
2836         return NULL;
2837
2838     if (PyString_Check(to)) {
2839         to_s = PyString_AS_STRING(to);
2840         to_len = PyString_GET_SIZE(to);
2841     }
2842 #ifdef Py_USING_UNICODE
2843     else if (PyUnicode_Check(to))
2844         return PyUnicode_Replace((PyObject *)self,
2845                                  from, to, count);
2846 #endif
2847     else if (PyObject_AsCharBuffer(to, &to_s, &to_len))
2848         return NULL;
2849
2850     return (PyObject *)replace((PyStringObject *) self,
2851                                from_s, from_len,
2852                                to_s, to_len, count);
2853 }
2854
2855 /** End DALKE **/
2856
2857 /* Matches the end (direction >= 0) or start (direction < 0) of self
2858  * against substr, using the start and end arguments. Returns
2859  * -1 on error, 0 if not found and 1 if found.
2860  */
2861 Py_LOCAL(int)
2862 _string_tailmatch(PyStringObject *self, PyObject *substr, Py_ssize_t start,
2863                   Py_ssize_t end, int direction)
2864 {
2865     Py_ssize_t len = PyString_GET_SIZE(self);
2866     Py_ssize_t slen;
2867     const char* sub;
2868     const char* str;
2869
2870     if (PyString_Check(substr)) {
2871         sub = PyString_AS_STRING(substr);
2872         slen = PyString_GET_SIZE(substr);
2873     }
2874 #ifdef Py_USING_UNICODE
2875     else if (PyUnicode_Check(substr))
2876         return PyUnicode_Tailmatch((PyObject *)self,
2877                                    substr, start, end, direction);
2878 #endif
2879     else if (PyObject_AsCharBuffer(substr, &sub, &slen))
2880         return -1;
2881     str = PyString_AS_STRING(self);
2882
2883     ADJUST_INDICES(start, end, len);
2884
2885     if (direction < 0) {
2886         /* startswith */
2887         if (start+slen > len)
2888             return 0;
2889     } else {
2890         /* endswith */
2891         if (end-start < slen || start > len)
2892             return 0;
2893
2894         if (end-slen > start)
2895             start = end - slen;
2896     }
2897     if (end-start >= slen)
2898         return ! memcmp(str+start, sub, slen);
2899     return 0;
2900 }
2901
2902
2903 PyDoc_STRVAR(startswith__doc__,
2904 "S.startswith(prefix[, start[, end]]) -> bool\n\
2905 \n\
2906 Return True if S starts with the specified prefix, False otherwise.\n\
2907 With optional start, test S beginning at that position.\n\
2908 With optional end, stop comparing S at that position.\n\
2909 prefix can also be a tuple of strings to try.");
2910
2911 static PyObject *
2912 string_startswith(PyStringObject *self, PyObject *args)
2913 {
2914     Py_ssize_t start = 0;
2915     Py_ssize_t end = PY_SSIZE_T_MAX;
2916     PyObject *subobj;
2917     int result;
2918
2919     if (!PyArg_ParseTuple(args, "O|O&O&:startswith", &subobj,
2920         _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
2921         return NULL;
2922     if (PyTuple_Check(subobj)) {
2923         Py_ssize_t i;
2924         for (i = 0; i < PyTuple_GET_SIZE(subobj); i++) {
2925             result = _string_tailmatch(self,
2926                             PyTuple_GET_ITEM(subobj, i),
2927                             start, end, -1);
2928             if (result == -1)
2929                 return NULL;
2930             else if (result) {
2931                 Py_RETURN_TRUE;
2932             }
2933         }
2934         Py_RETURN_FALSE;
2935     }
2936     result = _string_tailmatch(self, subobj, start, end, -1);
2937     if (result == -1)
2938         return NULL;
2939     else
2940         return PyBool_FromLong(result);
2941 }
2942
2943
2944 PyDoc_STRVAR(endswith__doc__,
2945 "S.endswith(suffix[, start[, end]]) -> bool\n\
2946 \n\
2947 Return True if S ends with the specified suffix, False otherwise.\n\
2948 With optional start, test S beginning at that position.\n\
2949 With optional end, stop comparing S at that position.\n\
2950 suffix can also be a tuple of strings to try.");
2951
2952 static PyObject *
2953 string_endswith(PyStringObject *self, PyObject *args)
2954 {
2955     Py_ssize_t start = 0;
2956     Py_ssize_t end = PY_SSIZE_T_MAX;
2957     PyObject *subobj;
2958     int result;
2959
2960     if (!PyArg_ParseTuple(args, "O|O&O&:endswith", &subobj,
2961         _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
2962         return NULL;
2963     if (PyTuple_Check(subobj)) {
2964         Py_ssize_t i;
2965         for (i = 0; i < PyTuple_GET_SIZE(subobj); i++) {
2966             result = _string_tailmatch(self,
2967                             PyTuple_GET_ITEM(subobj, i),
2968                             start, end, +1);
2969             if (result == -1)
2970                 return NULL;
2971             else if (result) {
2972                 Py_RETURN_TRUE;
2973             }
2974         }
2975         Py_RETURN_FALSE;
2976     }
2977     result = _string_tailmatch(self, subobj, start, end, +1);
2978     if (result == -1)
2979         return NULL;
2980     else
2981         return PyBool_FromLong(result);
2982 }
2983
2984
2985 PyDoc_STRVAR(encode__doc__,
2986 "S.encode([encoding[,errors]]) -> object\n\
2987 \n\
2988 Encodes S using the codec registered for encoding. encoding defaults\n\
2989 to the default encoding. errors may be given to set a different error\n\
2990 handling scheme. Default is 'strict' meaning that encoding errors raise\n\
2991 a UnicodeEncodeError. Other possible values are 'ignore', 'replace' and\n\
2992 'xmlcharrefreplace' as well as any other name registered with\n\
2993 codecs.register_error that is able to handle UnicodeEncodeErrors.");
2994
2995 static PyObject *
2996 string_encode(PyStringObject *self, PyObject *args, PyObject *kwargs)
2997 {
2998     static char *kwlist[] = {"encoding", "errors", 0};
2999     char *encoding = NULL;
3000     char *errors = NULL;
3001     PyObject *v;
3002
3003     if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|ss:encode",
3004                                      kwlist, &encoding, &errors))
3005         return NULL;
3006     v = PyString_AsEncodedObject((PyObject *)self, encoding, errors);
3007     if (v == NULL)
3008         goto onError;
3009     if (!PyString_Check(v) && !PyUnicode_Check(v)) {
3010         PyErr_Format(PyExc_TypeError,
3011                      "encoder did not return a string/unicode object "
3012                      "(type=%.400s)",
3013                      Py_TYPE(v)->tp_name);
3014         Py_DECREF(v);
3015         return NULL;
3016     }
3017     return v;
3018
3019  onError:
3020     return NULL;
3021 }
3022
3023
3024 PyDoc_STRVAR(decode__doc__,
3025 "S.decode([encoding[,errors]]) -> object\n\
3026 \n\
3027 Decodes S using the codec registered for encoding. encoding defaults\n\
3028 to the default encoding. errors may be given to set a different error\n\
3029 handling scheme. Default is 'strict' meaning that encoding errors raise\n\
3030 a UnicodeDecodeError. Other possible values are 'ignore' and 'replace'\n\
3031 as well as any other name registered with codecs.register_error that is\n\
3032 able to handle UnicodeDecodeErrors.");
3033
3034 static PyObject *
3035 string_decode(PyStringObject *self, PyObject *args, PyObject *kwargs)
3036 {
3037     static char *kwlist[] = {"encoding", "errors", 0};
3038     char *encoding = NULL;
3039     char *errors = NULL;
3040     PyObject *v;
3041
3042     if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|ss:decode",
3043                                      kwlist, &encoding, &errors))
3044         return NULL;
3045     v = PyString_AsDecodedObject((PyObject *)self, encoding, errors);
3046     if (v == NULL)
3047         goto onError;
3048     if (!PyString_Check(v) && !PyUnicode_Check(v)) {
3049         PyErr_Format(PyExc_TypeError,
3050                      "decoder did not return a string/unicode object "
3051                      "(type=%.400s)",
3052                      Py_TYPE(v)->tp_name);
3053         Py_DECREF(v);
3054         return NULL;
3055     }
3056     return v;
3057
3058  onError:
3059     return NULL;
3060 }
3061
3062
3063 PyDoc_STRVAR(expandtabs__doc__,
3064 "S.expandtabs([tabsize]) -> string\n\
3065 \n\
3066 Return a copy of S where all tab characters are expanded using spaces.\n\
3067 If tabsize is not given, a tab size of 8 characters is assumed.");
3068
3069 static PyObject*
3070 string_expandtabs(PyStringObject *self, PyObject *args)
3071 {
3072     const char *e, *p, *qe;
3073     char *q;
3074     Py_ssize_t i, j, incr;
3075     PyObject *u;
3076     int tabsize = 8;
3077
3078     if (!PyArg_ParseTuple(args, "|i:expandtabs", &tabsize))
3079         return NULL;
3080
3081     /* First pass: determine size of output string */
3082     i = 0; /* chars up to and including most recent \n or \r */
3083     j = 0; /* chars since most recent \n or \r (use in tab calculations) */
3084     e = PyString_AS_STRING(self) + PyString_GET_SIZE(self); /* end of input */
3085     for (p = PyString_AS_STRING(self); p < e; p++)
3086     if (*p == '\t') {
3087         if (tabsize > 0) {
3088             incr = tabsize - (j % tabsize);
3089             if (j > PY_SSIZE_T_MAX - incr)
3090                 goto overflow1;
3091             j += incr;
3092         }
3093     }
3094     else {
3095         if (j > PY_SSIZE_T_MAX - 1)
3096             goto overflow1;
3097         j++;
3098         if (*p == '\n' || *p == '\r') {
3099             if (i > PY_SSIZE_T_MAX - j)
3100                 goto overflow1;
3101             i += j;
3102             j = 0;
3103         }
3104     }
3105
3106     if (i > PY_SSIZE_T_MAX - j)
3107         goto overflow1;
3108
3109     /* Second pass: create output string and fill it */
3110     u = PyString_FromStringAndSize(NULL, i + j);
3111     if (!u)
3112         return NULL;
3113
3114     j = 0; /* same as in first pass */
3115     q = PyString_AS_STRING(u); /* next output char */
3116     qe = PyString_AS_STRING(u) + PyString_GET_SIZE(u); /* end of output */
3117
3118     for (p = PyString_AS_STRING(self); p < e; p++)
3119     if (*p == '\t') {
3120         if (tabsize > 0) {
3121             i = tabsize - (j % tabsize);
3122             j += i;
3123             while (i--) {
3124                 if (q >= qe)
3125                     goto overflow2;
3126                 *q++ = ' ';
3127             }
3128         }
3129     }
3130     else {
3131         if (q >= qe)
3132             goto overflow2;
3133         *q++ = *p;
3134         j++;
3135         if (*p == '\n' || *p == '\r')
3136             j = 0;
3137     }
3138
3139     return u;
3140
3141   overflow2:
3142     Py_DECREF(u);
3143   overflow1:
3144     PyErr_SetString(PyExc_OverflowError, "new string is too long");
3145     return NULL;
3146 }
3147
3148 Py_LOCAL_INLINE(PyObject *)
3149 pad(PyStringObject *self, Py_ssize_t left, Py_ssize_t right, char fill)
3150 {
3151     PyObject *u;
3152
3153     if (left < 0)
3154         left = 0;
3155     if (right < 0)
3156         right = 0;
3157
3158     if (left == 0 && right == 0 && PyString_CheckExact(self)) {
3159         Py_INCREF(self);
3160         return (PyObject *)self;
3161     }
3162
3163     u = PyString_FromStringAndSize(NULL,
3164                                    left + PyString_GET_SIZE(self) + right);
3165     if (u) {
3166         if (left)
3167             memset(PyString_AS_STRING(u), fill, left);
3168         Py_MEMCPY(PyString_AS_STRING(u) + left,
3169                PyString_AS_STRING(self),
3170                PyString_GET_SIZE(self));
3171         if (right)
3172             memset(PyString_AS_STRING(u) + left + PyString_GET_SIZE(self),
3173                fill, right);
3174     }
3175
3176     return u;
3177 }
3178
3179 PyDoc_STRVAR(ljust__doc__,
3180 "S.ljust(width[, fillchar]) -> string\n"
3181 "\n"
3182 "Return S left-justified in a string of length width. Padding is\n"
3183 "done using the specified fill character (default is a space).");
3184
3185 static PyObject *
3186 string_ljust(PyStringObject *self, PyObject *args)
3187 {
3188     Py_ssize_t width;
3189     char fillchar = ' ';
3190
3191     if (!PyArg_ParseTuple(args, "n|c:ljust", &width, &fillchar))
3192         return NULL;
3193
3194     if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) {
3195         Py_INCREF(self);
3196         return (PyObject*) self;
3197     }
3198
3199     return pad(self, 0, width - PyString_GET_SIZE(self), fillchar);
3200 }
3201
3202
3203 PyDoc_STRVAR(rjust__doc__,
3204 "S.rjust(width[, fillchar]) -> string\n"
3205 "\n"
3206 "Return S right-justified in a string of length width. Padding is\n"
3207 "done using the specified fill character (default is a space)");
3208
3209 static PyObject *
3210 string_rjust(PyStringObject *self, PyObject *args)
3211 {
3212     Py_ssize_t width;
3213     char fillchar = ' ';
3214
3215     if (!PyArg_ParseTuple(args, "n|c:rjust", &width, &fillchar))
3216         return NULL;
3217
3218     if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) {
3219         Py_INCREF(self);
3220         return (PyObject*) self;
3221     }
3222
3223     return pad(self, width - PyString_GET_SIZE(self), 0, fillchar);
3224 }
3225
3226
3227 PyDoc_STRVAR(center__doc__,
3228 "S.center(width[, fillchar]) -> string\n"
3229 "\n"
3230 "Return S centered in a string of length width. Padding is\n"
3231 "done using the specified fill character (default is a space)");
3232
3233 static PyObject *
3234 string_center(PyStringObject *self, PyObject *args)
3235 {
3236     Py_ssize_t marg, left;
3237     Py_ssize_t width;
3238     char fillchar = ' ';
3239
3240     if (!PyArg_ParseTuple(args, "n|c:center", &width, &fillchar))
3241         return NULL;
3242
3243     if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) {
3244         Py_INCREF(self);
3245         return (PyObject*) self;
3246     }
3247
3248     marg = width - PyString_GET_SIZE(self);
3249     left = marg / 2 + (marg & width & 1);
3250
3251     return pad(self, left, marg - left, fillchar);
3252 }
3253
3254 PyDoc_STRVAR(zfill__doc__,
3255 "S.zfill(width) -> string\n"
3256 "\n"
3257 "Pad a numeric string S with zeros on the left, to fill a field\n"
3258 "of the specified width.  The string S is never truncated.");
3259
3260 static PyObject *
3261 string_zfill(PyStringObject *self, PyObject *args)
3262 {
3263     Py_ssize_t fill;
3264     PyObject *s;
3265     char *p;
3266     Py_ssize_t width;
3267
3268     if (!PyArg_ParseTuple(args, "n:zfill", &width))
3269         return NULL;
3270
3271     if (PyString_GET_SIZE(self) >= width) {
3272         if (PyString_CheckExact(self)) {
3273             Py_INCREF(self);
3274             return (PyObject*) self;
3275         }
3276         else
3277             return PyString_FromStringAndSize(
3278             PyString_AS_STRING(self),
3279             PyString_GET_SIZE(self)
3280             );
3281     }
3282
3283     fill = width - PyString_GET_SIZE(self);
3284
3285     s = pad(self, fill, 0, '0');
3286
3287     if (s == NULL)
3288         return NULL;
3289
3290     p = PyString_AS_STRING(s);
3291     if (p[fill] == '+' || p[fill] == '-') {
3292         /* move sign to beginning of string */
3293         p[0] = p[fill];
3294         p[fill] = '0';
3295     }
3296
3297     return (PyObject*) s;
3298 }
3299
3300 PyDoc_STRVAR(isspace__doc__,
3301 "S.isspace() -> bool\n\
3302 \n\
3303 Return True if all characters in S are whitespace\n\
3304 and there is at least one character in S, False otherwise.");
3305
3306 static PyObject*
3307 string_isspace(PyStringObject *self)
3308 {
3309     register const unsigned char *p
3310         = (unsigned char *) PyString_AS_STRING(self);
3311     register const unsigned char *e;
3312
3313     /* Shortcut for single character strings */
3314     if (PyString_GET_SIZE(self) == 1 &&
3315         isspace(*p))
3316         return PyBool_FromLong(1);
3317
3318     /* Special case for empty strings */
3319     if (PyString_GET_SIZE(self) == 0)
3320         return PyBool_FromLong(0);
3321
3322     e = p + PyString_GET_SIZE(self);
3323     for (; p < e; p++) {
3324         if (!isspace(*p))
3325             return PyBool_FromLong(0);
3326     }
3327     return PyBool_FromLong(1);
3328 }
3329
3330
3331 PyDoc_STRVAR(isalpha__doc__,
3332 "S.isalpha() -> bool\n\
3333 \n\
3334 Return True if all characters in S are alphabetic\n\
3335 and there is at least one character in S, False otherwise.");
3336
3337 static PyObject*
3338 string_isalpha(PyStringObject *self)
3339 {
3340     register const unsigned char *p
3341         = (unsigned char *) PyString_AS_STRING(self);
3342     register const unsigned char *e;
3343
3344     /* Shortcut for single character strings */
3345     if (PyString_GET_SIZE(self) == 1 &&
3346         isalpha(*p))
3347         return PyBool_FromLong(1);
3348
3349     /* Special case for empty strings */
3350     if (PyString_GET_SIZE(self) == 0)
3351         return PyBool_FromLong(0);
3352
3353     e = p + PyString_GET_SIZE(self);
3354     for (; p < e; p++) {
3355         if (!isalpha(*p))
3356             return PyBool_FromLong(0);
3357     }
3358     return PyBool_FromLong(1);
3359 }
3360
3361
3362 PyDoc_STRVAR(isalnum__doc__,
3363 "S.isalnum() -> bool\n\
3364 \n\
3365 Return True if all characters in S are alphanumeric\n\
3366 and there is at least one character in S, False otherwise.");
3367
3368 static PyObject*
3369 string_isalnum(PyStringObject *self)
3370 {
3371     register const unsigned char *p
3372         = (unsigned char *) PyString_AS_STRING(self);
3373     register const unsigned char *e;
3374
3375     /* Shortcut for single character strings */
3376     if (PyString_GET_SIZE(self) == 1 &&
3377         isalnum(*p))
3378         return PyBool_FromLong(1);
3379
3380     /* Special case for empty strings */
3381     if (PyString_GET_SIZE(self) == 0)
3382         return PyBool_FromLong(0);
3383
3384     e = p + PyString_GET_SIZE(self);
3385     for (; p < e; p++) {
3386         if (!isalnum(*p))
3387             return PyBool_FromLong(0);
3388     }
3389     return PyBool_FromLong(1);
3390 }
3391
3392
3393 PyDoc_STRVAR(isdigit__doc__,
3394 "S.isdigit() -> bool\n\
3395 \n\
3396 Return True if all characters in S are digits\n\
3397 and there is at least one character in S, False otherwise.");
3398
3399 static PyObject*
3400 string_isdigit(PyStringObject *self)
3401 {
3402     register const unsigned char *p
3403         = (unsigned char *) PyString_AS_STRING(self);
3404     register const unsigned char *e;
3405
3406     /* Shortcut for single character strings */
3407     if (PyString_GET_SIZE(self) == 1 &&
3408         isdigit(*p))
3409         return PyBool_FromLong(1);
3410
3411     /* Special case for empty strings */
3412     if (PyString_GET_SIZE(self) == 0)
3413         return PyBool_FromLong(0);
3414
3415     e = p + PyString_GET_SIZE(self);
3416     for (; p < e; p++) {
3417         if (!isdigit(*p))
3418             return PyBool_FromLong(0);
3419     }
3420     return PyBool_FromLong(1);
3421 }
3422
3423
3424 PyDoc_STRVAR(islower__doc__,
3425 "S.islower() -> bool\n\
3426 \n\
3427 Return True if all cased characters in S are lowercase and there is\n\
3428 at least one cased character in S, False otherwise.");
3429
3430 static PyObject*
3431 string_islower(PyStringObject *self)
3432 {
3433     register const unsigned char *p
3434         = (unsigned char *) PyString_AS_STRING(self);
3435     register const unsigned char *e;
3436     int cased;
3437
3438     /* Shortcut for single character strings */
3439     if (PyString_GET_SIZE(self) == 1)
3440         return PyBool_FromLong(islower(*p) != 0);
3441
3442     /* Special case for empty strings */
3443     if (PyString_GET_SIZE(self) == 0)
3444         return PyBool_FromLong(0);
3445
3446     e = p + PyString_GET_SIZE(self);
3447     cased = 0;
3448     for (; p < e; p++) {
3449         if (isupper(*p))
3450             return PyBool_FromLong(0);
3451         else if (!cased && islower(*p))
3452             cased = 1;
3453     }
3454     return PyBool_FromLong(cased);
3455 }
3456
3457
3458 PyDoc_STRVAR(isupper__doc__,
3459 "S.isupper() -> bool\n\
3460 \n\
3461 Return True if all cased characters in S are uppercase and there is\n\
3462 at least one cased character in S, False otherwise.");
3463
3464 static PyObject*
3465 string_isupper(PyStringObject *self)
3466 {
3467     register const unsigned char *p
3468         = (unsigned char *) PyString_AS_STRING(self);
3469     register const unsigned char *e;
3470     int cased;
3471
3472     /* Shortcut for single character strings */
3473     if (PyString_GET_SIZE(self) == 1)
3474         return PyBool_FromLong(isupper(*p) != 0);
3475
3476     /* Special case for empty strings */
3477     if (PyString_GET_SIZE(self) == 0)
3478         return PyBool_FromLong(0);
3479
3480     e = p + PyString_GET_SIZE(self);
3481     cased = 0;
3482     for (; p < e; p++) {
3483         if (islower(*p))
3484             return PyBool_FromLong(0);
3485         else if (!cased && isupper(*p))
3486             cased = 1;
3487     }
3488     return PyBool_FromLong(cased);
3489 }
3490
3491
3492 PyDoc_STRVAR(istitle__doc__,
3493 "S.istitle() -> bool\n\
3494 \n\
3495 Return True if S is a titlecased string and there is at least one\n\
3496 character in S, i.e. uppercase characters may only follow uncased\n\
3497 characters and lowercase characters only cased ones. Return False\n\
3498 otherwise.");
3499
3500 static PyObject*
3501 string_istitle(PyStringObject *self, PyObject *uncased)
3502 {
3503     register const unsigned char *p
3504         = (unsigned char *) PyString_AS_STRING(self);
3505     register const unsigned char *e;
3506     int cased, previous_is_cased;
3507
3508     /* Shortcut for single character strings */
3509     if (PyString_GET_SIZE(self) == 1)
3510         return PyBool_FromLong(isupper(*p) != 0);
3511
3512     /* Special case for empty strings */
3513     if (PyString_GET_SIZE(self) == 0)
3514         return PyBool_FromLong(0);
3515
3516     e = p + PyString_GET_SIZE(self);
3517     cased = 0;
3518     previous_is_cased = 0;
3519     for (; p < e; p++) {
3520         register const unsigned char ch = *p;
3521
3522         if (isupper(ch)) {
3523             if (previous_is_cased)
3524                 return PyBool_FromLong(0);
3525             previous_is_cased = 1;
3526             cased = 1;
3527         }
3528         else if (islower(ch)) {
3529             if (!previous_is_cased)
3530                 return PyBool_FromLong(0);
3531             previous_is_cased = 1;
3532             cased = 1;
3533         }
3534         else
3535             previous_is_cased = 0;
3536     }
3537     return PyBool_FromLong(cased);
3538 }
3539
3540
3541 PyDoc_STRVAR(splitlines__doc__,
3542 "S.splitlines([keepends]) -> list of strings\n\
3543 \n\
3544 Return a list of the lines in S, breaking at line boundaries.\n\
3545 Line breaks are not included in the resulting list unless keepends\n\
3546 is given and true.");
3547
3548 static PyObject*
3549 string_splitlines(PyStringObject *self, PyObject *args)
3550 {
3551     int keepends = 0;
3552
3553     if (!PyArg_ParseTuple(args, "|i:splitlines", &keepends))
3554         return NULL;
3555
3556     return stringlib_splitlines(
3557         (PyObject*) self, PyString_AS_STRING(self), PyString_GET_SIZE(self),
3558         keepends
3559     );
3560 }
3561
3562 PyDoc_STRVAR(sizeof__doc__,
3563 "S.__sizeof__() -> size of S in memory, in bytes");
3564
3565 static PyObject *
3566 string_sizeof(PyStringObject *v)
3567 {
3568     Py_ssize_t res;
3569     res = PyStringObject_SIZE + PyString_GET_SIZE(v) * Py_TYPE(v)->tp_itemsize;
3570     return PyInt_FromSsize_t(res);
3571 }
3572
3573 static PyObject *
3574 string_getnewargs(PyStringObject *v)
3575 {
3576     return Py_BuildValue("(s#)", v->ob_sval, Py_SIZE(v));
3577 }
3578
3579
3580 #include "stringlib/string_format.h"
3581
3582 PyDoc_STRVAR(format__doc__,
3583 "S.format(*args, **kwargs) -> unicode\n\
3584 \n\
3585 ");
3586
3587 static PyObject *
3588 string__format__(PyObject* self, PyObject* args)
3589 {
3590     PyObject *format_spec;
3591     PyObject *result = NULL;
3592     PyObject *tmp = NULL;
3593
3594     /* If 2.x, convert format_spec to the same type as value */
3595     /* This is to allow things like u''.format('') */
3596     if (!PyArg_ParseTuple(args, "O:__format__", &format_spec))
3597         goto done;
3598     if (!(PyString_Check(format_spec) || PyUnicode_Check(format_spec))) {
3599         PyErr_Format(PyExc_TypeError, "__format__ arg must be str "
3600                      "or unicode, not %s", Py_TYPE(format_spec)->tp_name);
3601         goto done;
3602     }
3603     tmp = PyObject_Str(format_spec);
3604     if (tmp == NULL)
3605         goto done;
3606     format_spec = tmp;
3607
3608     result = _PyBytes_FormatAdvanced(self,
3609                                      PyString_AS_STRING(format_spec),
3610                                      PyString_GET_SIZE(format_spec));
3611 done:
3612     Py_XDECREF(tmp);
3613     return result;
3614 }
3615
3616 PyDoc_STRVAR(p_format__doc__,
3617 "S.__format__(format_spec) -> unicode\n\
3618 \n\
3619 ");
3620
3621
3622 static PyMethodDef
3623 string_methods[] = {
3624     /* Counterparts of the obsolete stropmodule functions; except
3625        string.maketrans(). */
3626     {"join", (PyCFunction)string_join, METH_O, join__doc__},
3627     {"split", (PyCFunction)string_split, METH_VARARGS, split__doc__},
3628     {"rsplit", (PyCFunction)string_rsplit, METH_VARARGS, rsplit__doc__},
3629     {"lower", (PyCFunction)string_lower, METH_NOARGS, lower__doc__},
3630     {"upper", (PyCFunction)string_upper, METH_NOARGS, upper__doc__},
3631     {"islower", (PyCFunction)string_islower, METH_NOARGS, islower__doc__},
3632     {"isupper", (PyCFunction)string_isupper, METH_NOARGS, isupper__doc__},
3633     {"isspace", (PyCFunction)string_isspace, METH_NOARGS, isspace__doc__},
3634     {"isdigit", (PyCFunction)string_isdigit, METH_NOARGS, isdigit__doc__},
3635     {"istitle", (PyCFunction)string_istitle, METH_NOARGS, istitle__doc__},
3636     {"isalpha", (PyCFunction)string_isalpha, METH_NOARGS, isalpha__doc__},
3637     {"isalnum", (PyCFunction)string_isalnum, METH_NOARGS, isalnum__doc__},
3638     {"capitalize", (PyCFunction)string_capitalize, METH_NOARGS,
3639      capitalize__doc__},
3640     {"count", (PyCFunction)string_count, METH_VARARGS, count__doc__},
3641     {"endswith", (PyCFunction)string_endswith, METH_VARARGS,
3642      endswith__doc__},
3643     {"partition", (PyCFunction)string_partition, METH_O, partition__doc__},
3644     {"find", (PyCFunction)string_find, METH_VARARGS, find__doc__},
3645     {"index", (PyCFunction)string_index, METH_VARARGS, index__doc__},
3646     {"lstrip", (PyCFunction)string_lstrip, METH_VARARGS, lstrip__doc__},
3647     {"replace", (PyCFunction)string_replace, METH_VARARGS, replace__doc__},
3648     {"rfind", (PyCFunction)string_rfind, METH_VARARGS, rfind__doc__},
3649     {"rindex", (PyCFunction)string_rindex, METH_VARARGS, rindex__doc__},
3650     {"rstrip", (PyCFunction)string_rstrip, METH_VARARGS, rstrip__doc__},
3651     {"rpartition", (PyCFunction)string_rpartition, METH_O,
3652      rpartition__doc__},
3653     {"startswith", (PyCFunction)string_startswith, METH_VARARGS,
3654      startswith__doc__},
3655     {"strip", (PyCFunction)string_strip, METH_VARARGS, strip__doc__},
3656     {"swapcase", (PyCFunction)string_swapcase, METH_NOARGS,
3657      swapcase__doc__},
3658     {"translate", (PyCFunction)string_translate, METH_VARARGS,
3659      translate__doc__},
3660     {"title", (PyCFunction)string_title, METH_NOARGS, title__doc__},
3661     {"ljust", (PyCFunction)string_ljust, METH_VARARGS, ljust__doc__},
3662     {"rjust", (PyCFunction)string_rjust, METH_VARARGS, rjust__doc__},
3663     {"center", (PyCFunction)string_center, METH_VARARGS, center__doc__},
3664     {"zfill", (PyCFunction)string_zfill, METH_VARARGS, zfill__doc__},
3665     {"format", (PyCFunction) do_string_format, METH_VARARGS | METH_KEYWORDS, format__doc__},
3666     {"__format__", (PyCFunction) string__format__, METH_VARARGS, p_format__doc__},
3667     {"_formatter_field_name_split", (PyCFunction) formatter_field_name_split, METH_NOARGS},
3668     {"_formatter_parser", (PyCFunction) formatter_parser, METH_NOARGS},
3669     {"encode", (PyCFunction)string_encode, METH_VARARGS | METH_KEYWORDS, encode__doc__},
3670     {"decode", (PyCFunction)string_decode, METH_VARARGS | METH_KEYWORDS, decode__doc__},
3671     {"expandtabs", (PyCFunction)string_expandtabs, METH_VARARGS,
3672      expandtabs__doc__},
3673     {"splitlines", (PyCFunction)string_splitlines, METH_VARARGS,
3674      splitlines__doc__},
3675     {"__sizeof__", (PyCFunction)string_sizeof, METH_NOARGS,
3676      sizeof__doc__},
3677     {"__getnewargs__",          (PyCFunction)string_getnewargs, METH_NOARGS},
3678     {NULL,     NULL}                         /* sentinel */
3679 };
3680
3681 static PyObject *
3682 str_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds);
3683
3684 static PyObject *
3685 string_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
3686 {
3687     PyObject *x = NULL;
3688     static char *kwlist[] = {"object", 0};
3689
3690     if (type != &PyString_Type)
3691         return str_subtype_new(type, args, kwds);
3692     if (!PyArg_ParseTupleAndKeywords(args, kwds, "|O:str", kwlist, &x))
3693         return NULL;
3694     if (x == NULL)
3695         return PyString_FromString("");
3696     return PyObject_Str(x);
3697 }
3698
3699 static PyObject *
3700 str_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
3701 {
3702     PyObject *tmp, *pnew;
3703     Py_ssize_t n;
3704
3705     assert(PyType_IsSubtype(type, &PyString_Type));
3706     tmp = string_new(&PyString_Type, args, kwds);
3707     if (tmp == NULL)
3708         return NULL;
3709     assert(PyString_CheckExact(tmp));
3710     n = PyString_GET_SIZE(tmp);
3711     pnew = type->tp_alloc(type, n);
3712     if (pnew != NULL) {
3713         Py_MEMCPY(PyString_AS_STRING(pnew), PyString_AS_STRING(tmp), n+1);
3714         ((PyStringObject *)pnew)->ob_shash =
3715             ((PyStringObject *)tmp)->ob_shash;
3716         ((PyStringObject *)pnew)->ob_sstate = SSTATE_NOT_INTERNED;
3717     }
3718     Py_DECREF(tmp);
3719     return pnew;
3720 }
3721
3722 static PyObject *
3723 basestring_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
3724 {
3725     PyErr_SetString(PyExc_TypeError,
3726                     "The basestring type cannot be instantiated");
3727     return NULL;
3728 }
3729
3730 static PyObject *
3731 string_mod(PyObject *v, PyObject *w)
3732 {
3733     if (!PyString_Check(v)) {
3734         Py_INCREF(Py_NotImplemented);
3735         return Py_NotImplemented;
3736     }
3737     return PyString_Format(v, w);
3738 }
3739
3740 PyDoc_STRVAR(basestring_doc,
3741 "Type basestring cannot be instantiated; it is the base for str and unicode.");
3742
3743 static PyNumberMethods string_as_number = {
3744     0,                          /*nb_add*/
3745     0,                          /*nb_subtract*/
3746     0,                          /*nb_multiply*/
3747     0,                          /*nb_divide*/
3748     string_mod,                 /*nb_remainder*/
3749 };
3750
3751
3752 PyTypeObject PyBaseString_Type = {
3753     PyVarObject_HEAD_INIT(&PyType_Type, 0)
3754     "basestring",
3755     0,
3756     0,
3757     0,                                          /* tp_dealloc */
3758     0,                                          /* tp_print */
3759     0,                                          /* tp_getattr */
3760     0,                                          /* tp_setattr */
3761     0,                                          /* tp_compare */
3762     0,                                          /* tp_repr */
3763     0,                                          /* tp_as_number */
3764     0,                                          /* tp_as_sequence */
3765     0,                                          /* tp_as_mapping */
3766     0,                                          /* tp_hash */
3767     0,                                          /* tp_call */
3768     0,                                          /* tp_str */
3769     0,                                          /* tp_getattro */
3770     0,                                          /* tp_setattro */
3771     0,                                          /* tp_as_buffer */
3772     Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */
3773     basestring_doc,                             /* tp_doc */
3774     0,                                          /* tp_traverse */
3775     0,                                          /* tp_clear */
3776     0,                                          /* tp_richcompare */
3777     0,                                          /* tp_weaklistoffset */
3778     0,                                          /* tp_iter */
3779     0,                                          /* tp_iternext */
3780     0,                                          /* tp_methods */
3781     0,                                          /* tp_members */
3782     0,                                          /* tp_getset */
3783     &PyBaseObject_Type,                         /* tp_base */
3784     0,                                          /* tp_dict */
3785     0,                                          /* tp_descr_get */
3786     0,                                          /* tp_descr_set */
3787     0,                                          /* tp_dictoffset */
3788     0,                                          /* tp_init */
3789     0,                                          /* tp_alloc */
3790     basestring_new,                             /* tp_new */
3791     0,                                          /* tp_free */
3792 };
3793
3794 PyDoc_STRVAR(string_doc,
3795 "str(object) -> string\n\
3796 \n\
3797 Return a nice string representation of the object.\n\
3798 If the argument is a string, the return value is the same object.");
3799
3800 PyTypeObject PyString_Type = {
3801     PyVarObject_HEAD_INIT(&PyType_Type, 0)
3802     "str",
3803     PyStringObject_SIZE,
3804     sizeof(char),
3805     string_dealloc,                             /* tp_dealloc */
3806     (printfunc)string_print,                    /* tp_print */
3807     0,                                          /* tp_getattr */
3808     0,                                          /* tp_setattr */
3809     0,                                          /* tp_compare */
3810     string_repr,                                /* tp_repr */
3811     &string_as_number,                          /* tp_as_number */
3812     &string_as_sequence,                        /* tp_as_sequence */
3813     &string_as_mapping,                         /* tp_as_mapping */
3814     (hashfunc)string_hash,                      /* tp_hash */
3815     0,                                          /* tp_call */
3816     string_str,                                 /* tp_str */
3817     PyObject_GenericGetAttr,                    /* tp_getattro */
3818     0,                                          /* tp_setattro */
3819     &string_as_buffer,                          /* tp_as_buffer */
3820     Py_TPFLAGS_DEFAULT | Py_TPFLAGS_CHECKTYPES |
3821         Py_TPFLAGS_BASETYPE | Py_TPFLAGS_STRING_SUBCLASS |
3822         Py_TPFLAGS_HAVE_NEWBUFFER,              /* tp_flags */
3823     string_doc,                                 /* tp_doc */
3824     0,                                          /* tp_traverse */
3825     0,                                          /* tp_clear */
3826     (richcmpfunc)string_richcompare,            /* tp_richcompare */
3827     0,                                          /* tp_weaklistoffset */
3828     0,                                          /* tp_iter */
3829     0,                                          /* tp_iternext */
3830     string_methods,                             /* tp_methods */
3831     0,                                          /* tp_members */
3832     0,                                          /* tp_getset */
3833     &PyBaseString_Type,                         /* tp_base */
3834     0,                                          /* tp_dict */
3835     0,                                          /* tp_descr_get */
3836     0,                                          /* tp_descr_set */
3837     0,                                          /* tp_dictoffset */
3838     0,                                          /* tp_init */
3839     0,                                          /* tp_alloc */
3840     string_new,                                 /* tp_new */
3841     PyObject_Del,                               /* tp_free */
3842 };
3843
3844 void
3845 PyString_Concat(register PyObject **pv, register PyObject *w)
3846 {
3847     register PyObject *v;
3848     if (*pv == NULL)
3849         return;
3850     if (w == NULL || !PyString_Check(*pv)) {
3851         Py_DECREF(*pv);
3852         *pv = NULL;
3853         return;
3854     }
3855     v = string_concat((PyStringObject *) *pv, w);
3856     Py_DECREF(*pv);
3857     *pv = v;
3858 }
3859
3860 void
3861 PyString_ConcatAndDel(register PyObject **pv, register PyObject *w)
3862 {
3863     PyString_Concat(pv, w);
3864     Py_XDECREF(w);
3865 }
3866
3867
3868 /* The following function breaks the notion that strings are immutable:
3869    it changes the size of a string.  We get away with this only if there
3870    is only one module referencing the object.  You can also think of it
3871    as creating a new string object and destroying the old one, only
3872    more efficiently.  In any case, don't use this if the string may
3873    already be known to some other part of the code...
3874    Note that if there's not enough memory to resize the string, the original
3875    string object at *pv is deallocated, *pv is set to NULL, an "out of
3876    memory" exception is set, and -1 is returned.  Else (on success) 0 is
3877    returned, and the value in *pv may or may not be the same as on input.
3878    As always, an extra byte is allocated for a trailing \0 byte (newsize
3879    does *not* include that), and a trailing \0 byte is stored.
3880 */
3881
3882 int
3883 _PyString_Resize(PyObject **pv, Py_ssize_t newsize)
3884 {
3885     register PyObject *v;
3886     register PyStringObject *sv;
3887     v = *pv;
3888     if (!PyString_Check(v) || Py_REFCNT(v) != 1 || newsize < 0 ||
3889         PyString_CHECK_INTERNED(v)) {
3890         *pv = 0;
3891         Py_DECREF(v);
3892         PyErr_BadInternalCall();
3893         return -1;
3894     }
3895     /* XXX UNREF/NEWREF interface should be more symmetrical */
3896     _Py_DEC_REFTOTAL;
3897     _Py_ForgetReference(v);
3898     *pv = (PyObject *)
3899         PyObject_REALLOC((char *)v, PyStringObject_SIZE + newsize);
3900     if (*pv == NULL) {
3901         PyObject_Del(v);
3902         PyErr_NoMemory();
3903         return -1;
3904     }
3905     _Py_NewReference(*pv);
3906     sv = (PyStringObject *) *pv;
3907     Py_SIZE(sv) = newsize;
3908     sv->ob_sval[newsize] = '\0';
3909     sv->ob_shash = -1;          /* invalidate cached hash value */
3910     return 0;
3911 }
3912
3913 /* Helpers for formatstring */
3914
3915 Py_LOCAL_INLINE(PyObject *)
3916 getnextarg(PyObject *args, Py_ssize_t arglen, Py_ssize_t *p_argidx)
3917 {
3918     Py_ssize_t argidx = *p_argidx;
3919     if (argidx < arglen) {
3920         (*p_argidx)++;
3921         if (arglen < 0)
3922             return args;
3923         else
3924             return PyTuple_GetItem(args, argidx);
3925     }
3926     PyErr_SetString(PyExc_TypeError,
3927                     "not enough arguments for format string");
3928     return NULL;
3929 }
3930
3931 /* Format codes
3932  * F_LJUST      '-'
3933  * F_SIGN       '+'
3934  * F_BLANK      ' '
3935  * F_ALT        '#'
3936  * F_ZERO       '0'
3937  */
3938 #define F_LJUST (1<<0)
3939 #define F_SIGN  (1<<1)
3940 #define F_BLANK (1<<2)
3941 #define F_ALT   (1<<3)
3942 #define F_ZERO  (1<<4)
3943
3944 /* Returns a new reference to a PyString object, or NULL on failure. */
3945
3946 static PyObject *
3947 formatfloat(PyObject *v, int flags, int prec, int type)
3948 {
3949     char *p;
3950     PyObject *result;
3951     double x;
3952
3953     x = PyFloat_AsDouble(v);
3954     if (x == -1.0 && PyErr_Occurred()) {
3955         PyErr_Format(PyExc_TypeError, "float argument required, "
3956                      "not %.200s", Py_TYPE(v)->tp_name);
3957         return NULL;
3958     }
3959
3960     if (prec < 0)
3961         prec = 6;
3962
3963     p = PyOS_double_to_string(x, type, prec,
3964                               (flags & F_ALT) ? Py_DTSF_ALT : 0, NULL);
3965
3966     if (p == NULL)
3967         return NULL;
3968     result = PyString_FromStringAndSize(p, strlen(p));
3969     PyMem_Free(p);
3970     return result;
3971 }
3972
3973 /* _PyString_FormatLong emulates the format codes d, u, o, x and X, and
3974  * the F_ALT flag, for Python's long (unbounded) ints.  It's not used for
3975  * Python's regular ints.
3976  * Return value:  a new PyString*, or NULL if error.
3977  *  .  *pbuf is set to point into it,
3978  *     *plen set to the # of chars following that.
3979  *     Caller must decref it when done using pbuf.
3980  *     The string starting at *pbuf is of the form
3981  *         "-"? ("0x" | "0X")? digit+
3982  *     "0x"/"0X" are present only for x and X conversions, with F_ALT
3983  *         set in flags.  The case of hex digits will be correct,
3984  *     There will be at least prec digits, zero-filled on the left if
3985  *         necessary to get that many.
3986  * val          object to be converted
3987  * flags        bitmask of format flags; only F_ALT is looked at
3988  * prec         minimum number of digits; 0-fill on left if needed
3989  * type         a character in [duoxX]; u acts the same as d
3990  *
3991  * CAUTION:  o, x and X conversions on regular ints can never
3992  * produce a '-' sign, but can for Python's unbounded ints.
3993  */
3994 PyObject*
3995 _PyString_FormatLong(PyObject *val, int flags, int prec, int type,
3996                      char **pbuf, int *plen)
3997 {
3998     PyObject *result = NULL;
3999     char *buf;
4000     Py_ssize_t i;
4001     int sign;           /* 1 if '-', else 0 */
4002     int len;            /* number of characters */
4003     Py_ssize_t llen;
4004     int numdigits;      /* len == numnondigits + numdigits */
4005     int numnondigits = 0;
4006
4007     switch (type) {
4008     case 'd':
4009     case 'u':
4010         result = Py_TYPE(val)->tp_str(val);
4011         break;
4012     case 'o':
4013         result = Py_TYPE(val)->tp_as_number->nb_oct(val);
4014         break;
4015     case 'x':
4016     case 'X':
4017         numnondigits = 2;
4018         result = Py_TYPE(val)->tp_as_number->nb_hex(val);
4019         break;
4020     default:
4021         assert(!"'type' not in [duoxX]");
4022     }
4023     if (!result)
4024         return NULL;
4025
4026     buf = PyString_AsString(result);
4027     if (!buf) {
4028         Py_DECREF(result);
4029         return NULL;
4030     }
4031
4032     /* To modify the string in-place, there can only be one reference. */
4033     if (Py_REFCNT(result) != 1) {
4034         PyErr_BadInternalCall();
4035         return NULL;
4036     }
4037     llen = PyString_Size(result);
4038     if (llen > INT_MAX) {
4039         PyErr_SetString(PyExc_ValueError, "string too large in _PyString_FormatLong");
4040         return NULL;
4041     }
4042     len = (int)llen;
4043     if (buf[len-1] == 'L') {
4044         --len;
4045         buf[len] = '\0';
4046     }
4047     sign = buf[0] == '-';
4048     numnondigits += sign;
4049     numdigits = len - numnondigits;
4050     assert(numdigits > 0);
4051
4052     /* Get rid of base marker unless F_ALT */
4053     if ((flags & F_ALT) == 0) {
4054         /* Need to skip 0x, 0X or 0. */
4055         int skipped = 0;
4056         switch (type) {
4057         case 'o':
4058             assert(buf[sign] == '0');
4059             /* If 0 is only digit, leave it alone. */
4060             if (numdigits > 1) {
4061                 skipped = 1;
4062                 --numdigits;
4063             }
4064             break;
4065         case 'x':
4066         case 'X':
4067             assert(buf[sign] == '0');
4068             assert(buf[sign + 1] == 'x');
4069             skipped = 2;
4070             numnondigits -= 2;
4071             break;
4072         }
4073         if (skipped) {
4074             buf += skipped;
4075             len -= skipped;
4076             if (sign)
4077                 buf[0] = '-';
4078         }
4079         assert(len == numnondigits + numdigits);
4080         assert(numdigits > 0);
4081     }
4082
4083     /* Fill with leading zeroes to meet minimum width. */
4084     if (prec > numdigits) {
4085         PyObject *r1 = PyString_FromStringAndSize(NULL,
4086                                 numnondigits + prec);
4087         char *b1;
4088         if (!r1) {
4089             Py_DECREF(result);
4090             return NULL;
4091         }
4092         b1 = PyString_AS_STRING(r1);
4093         for (i = 0; i < numnondigits; ++i)
4094             *b1++ = *buf++;
4095         for (i = 0; i < prec - numdigits; i++)
4096             *b1++ = '0';
4097         for (i = 0; i < numdigits; i++)
4098             *b1++ = *buf++;
4099         *b1 = '\0';
4100         Py_DECREF(result);
4101         result = r1;
4102         buf = PyString_AS_STRING(result);
4103         len = numnondigits + prec;
4104     }
4105
4106     /* Fix up case for hex conversions. */
4107     if (type == 'X') {
4108         /* Need to convert all lower case letters to upper case.
4109            and need to convert 0x to 0X (and -0x to -0X). */
4110         for (i = 0; i < len; i++)
4111             if (buf[i] >= 'a' && buf[i] <= 'x')
4112                 buf[i] -= 'a'-'A';
4113     }
4114     *pbuf = buf;
4115     *plen = len;
4116     return result;
4117 }
4118
4119 Py_LOCAL_INLINE(int)
4120 formatint(char *buf, size_t buflen, int flags,
4121           int prec, int type, PyObject *v)
4122 {
4123     /* fmt = '%#.' + `prec` + 'l' + `type`
4124        worst case length = 3 + 19 (worst len of INT_MAX on 64-bit machine)
4125        + 1 + 1 = 24 */
4126     char fmt[64];       /* plenty big enough! */
4127     char *sign;
4128     long x;
4129
4130     x = PyInt_AsLong(v);
4131     if (x == -1 && PyErr_Occurred()) {
4132         PyErr_Format(PyExc_TypeError, "int argument required, not %.200s",
4133                      Py_TYPE(v)->tp_name);
4134         return -1;
4135     }
4136     if (x < 0 && type == 'u') {
4137         type = 'd';
4138     }
4139     if (x < 0 && (type == 'x' || type == 'X' || type == 'o'))
4140         sign = "-";
4141     else
4142         sign = "";
4143     if (prec < 0)
4144         prec = 1;
4145
4146     if ((flags & F_ALT) &&
4147         (type == 'x' || type == 'X')) {
4148         /* When converting under %#x or %#X, there are a number
4149          * of issues that cause pain:
4150          * - when 0 is being converted, the C standard leaves off
4151          *   the '0x' or '0X', which is inconsistent with other
4152          *   %#x/%#X conversions and inconsistent with Python's
4153          *   hex() function
4154          * - there are platforms that violate the standard and
4155          *   convert 0 with the '0x' or '0X'
4156          *   (Metrowerks, Compaq Tru64)
4157          * - there are platforms that give '0x' when converting
4158          *   under %#X, but convert 0 in accordance with the
4159          *   standard (OS/2 EMX)
4160          *
4161          * We can achieve the desired consistency by inserting our
4162          * own '0x' or '0X' prefix, and substituting %x/%X in place
4163          * of %#x/%#X.
4164          *
4165          * Note that this is the same approach as used in
4166          * formatint() in unicodeobject.c
4167          */
4168         PyOS_snprintf(fmt, sizeof(fmt), "%s0%c%%.%dl%c",
4169                       sign, type, prec, type);
4170     }
4171     else {
4172         PyOS_snprintf(fmt, sizeof(fmt), "%s%%%s.%dl%c",
4173                       sign, (flags&F_ALT) ? "#" : "",
4174                       prec, type);
4175     }
4176
4177     /* buf = '+'/'-'/'' + '0'/'0x'/'' + '[0-9]'*max(prec, len(x in octal))
4178      * worst case buf = '-0x' + [0-9]*prec, where prec >= 11
4179      */
4180     if (buflen <= 14 || buflen <= (size_t)3 + (size_t)prec) {
4181         PyErr_SetString(PyExc_OverflowError,
4182             "formatted integer is too long (precision too large?)");
4183         return -1;
4184     }
4185     if (sign[0])
4186         PyOS_snprintf(buf, buflen, fmt, -x);
4187     else
4188         PyOS_snprintf(buf, buflen, fmt, x);
4189     return (int)strlen(buf);
4190 }
4191
4192 Py_LOCAL_INLINE(int)
4193 formatchar(char *buf, size_t buflen, PyObject *v)
4194 {
4195     /* presume that the buffer is at least 2 characters long */
4196     if (PyString_Check(v)) {
4197         if (!PyArg_Parse(v, "c;%c requires int or char", &buf[0]))
4198             return -1;
4199     }
4200     else {
4201         if (!PyArg_Parse(v, "b;%c requires int or char", &buf[0]))
4202             return -1;
4203     }
4204     buf[1] = '\0';
4205     return 1;
4206 }
4207
4208 /* fmt%(v1,v2,...) is roughly equivalent to sprintf(fmt, v1, v2, ...)
4209
4210    FORMATBUFLEN is the length of the buffer in which the ints &
4211    chars are formatted. XXX This is a magic number. Each formatting
4212    routine does bounds checking to ensure no overflow, but a better
4213    solution may be to malloc a buffer of appropriate size for each
4214    format. For now, the current solution is sufficient.
4215 */
4216 #define FORMATBUFLEN (size_t)120
4217
4218 PyObject *
4219 PyString_Format(PyObject *format, PyObject *args)
4220 {
4221     char *fmt, *res;
4222     Py_ssize_t arglen, argidx;
4223     Py_ssize_t reslen, rescnt, fmtcnt;
4224     int args_owned = 0;
4225     PyObject *result, *orig_args;
4226 #ifdef Py_USING_UNICODE
4227     PyObject *v, *w;
4228 #endif
4229     PyObject *dict = NULL;
4230     if (format == NULL || !PyString_Check(format) || args == NULL) {
4231         PyErr_BadInternalCall();
4232         return NULL;
4233     }
4234     orig_args = args;
4235     fmt = PyString_AS_STRING(format);
4236     fmtcnt = PyString_GET_SIZE(format);
4237     reslen = rescnt = fmtcnt + 100;
4238     result = PyString_FromStringAndSize((char *)NULL, reslen);
4239     if (result == NULL)
4240         return NULL;
4241     res = PyString_AsString(result);
4242     if (PyTuple_Check(args)) {
4243         arglen = PyTuple_GET_SIZE(args);
4244         argidx = 0;
4245     }
4246     else {
4247         arglen = -1;
4248         argidx = -2;
4249     }
4250     if (Py_TYPE(args)->tp_as_mapping && !PyTuple_Check(args) &&
4251         !PyObject_TypeCheck(args, &PyBaseString_Type))
4252         dict = args;
4253     while (--fmtcnt >= 0) {
4254         if (*fmt != '%') {
4255             if (--rescnt < 0) {
4256                 rescnt = fmtcnt + 100;
4257                 reslen += rescnt;
4258                 if (_PyString_Resize(&result, reslen))
4259                     return NULL;
4260                 res = PyString_AS_STRING(result)
4261                     + reslen - rescnt;
4262                 --rescnt;
4263             }
4264             *res++ = *fmt++;
4265         }
4266         else {
4267             /* Got a format specifier */
4268             int flags = 0;
4269             Py_ssize_t width = -1;
4270             int prec = -1;
4271             int c = '\0';
4272             int fill;
4273             int isnumok;
4274             PyObject *v = NULL;
4275             PyObject *temp = NULL;
4276             char *pbuf;
4277             int sign;
4278             Py_ssize_t len;
4279             char formatbuf[FORMATBUFLEN];
4280                  /* For format{int,char}() */
4281 #ifdef Py_USING_UNICODE
4282             char *fmt_start = fmt;
4283             Py_ssize_t argidx_start = argidx;
4284 #endif
4285
4286             fmt++;
4287             if (*fmt == '(') {
4288                 char *keystart;
4289                 Py_ssize_t keylen;
4290                 PyObject *key;
4291                 int pcount = 1;
4292
4293                 if (dict == NULL) {
4294                     PyErr_SetString(PyExc_TypeError,
4295                              "format requires a mapping");
4296                     goto error;
4297                 }
4298                 ++fmt;
4299                 --fmtcnt;
4300                 keystart = fmt;
4301                 /* Skip over balanced parentheses */
4302                 while (pcount > 0 && --fmtcnt >= 0) {
4303                     if (*fmt == ')')
4304                         --pcount;
4305                     else if (*fmt == '(')
4306                         ++pcount;
4307                     fmt++;
4308                 }
4309                 keylen = fmt - keystart - 1;
4310                 if (fmtcnt < 0 || pcount > 0) {
4311                     PyErr_SetString(PyExc_ValueError,
4312                                "incomplete format key");
4313                     goto error;
4314                 }
4315                 key = PyString_FromStringAndSize(keystart,
4316                                                  keylen);
4317                 if (key == NULL)
4318                     goto error;
4319                 if (args_owned) {
4320                     Py_DECREF(args);
4321                     args_owned = 0;
4322                 }
4323                 args = PyObject_GetItem(dict, key);
4324                 Py_DECREF(key);
4325                 if (args == NULL) {
4326                     goto error;
4327                 }
4328                 args_owned = 1;
4329                 arglen = -1;
4330                 argidx = -2;
4331             }
4332             while (--fmtcnt >= 0) {
4333                 switch (c = *fmt++) {
4334                 case '-': flags |= F_LJUST; continue;
4335                 case '+': flags |= F_SIGN; continue;
4336                 case ' ': flags |= F_BLANK; continue;
4337                 case '#': flags |= F_ALT; continue;
4338                 case '0': flags |= F_ZERO; continue;
4339                 }
4340                 break;
4341             }
4342             if (c == '*') {
4343                 v = getnextarg(args, arglen, &argidx);
4344                 if (v == NULL)
4345                     goto error;
4346                 if (!PyInt_Check(v)) {
4347                     PyErr_SetString(PyExc_TypeError,
4348                                     "* wants int");
4349                     goto error;
4350                 }
4351                 width = PyInt_AsLong(v);
4352                 if (width < 0) {
4353                     flags |= F_LJUST;
4354                     width = -width;
4355                 }
4356                 if (--fmtcnt >= 0)
4357                     c = *fmt++;
4358             }
4359             else if (c >= 0 && isdigit(c)) {
4360                 width = c - '0';
4361                 while (--fmtcnt >= 0) {
4362                     c = Py_CHARMASK(*fmt++);
4363                     if (!isdigit(c))
4364                         break;
4365                     if ((width*10) / 10 != width) {
4366                         PyErr_SetString(
4367                             PyExc_ValueError,
4368                             "width too big");
4369                         goto error;
4370                     }
4371                     width = width*10 + (c - '0');
4372                 }
4373             }
4374             if (c == '.') {
4375                 prec = 0;
4376                 if (--fmtcnt >= 0)
4377                     c = *fmt++;
4378                 if (c == '*') {
4379                     v = getnextarg(args, arglen, &argidx);
4380                     if (v == NULL)
4381                         goto error;
4382                     if (!PyInt_Check(v)) {
4383                         PyErr_SetString(
4384                             PyExc_TypeError,
4385                             "* wants int");
4386                         goto error;
4387                     }
4388                     prec = PyInt_AsLong(v);
4389                     if (prec < 0)
4390                         prec = 0;
4391                     if (--fmtcnt >= 0)
4392                         c = *fmt++;
4393                 }
4394                 else if (c >= 0 && isdigit(c)) {
4395                     prec = c - '0';
4396                     while (--fmtcnt >= 0) {
4397                         c = Py_CHARMASK(*fmt++);
4398                         if (!isdigit(c))
4399                             break;
4400                         if ((prec*10) / 10 != prec) {
4401                             PyErr_SetString(
4402                                 PyExc_ValueError,
4403                                 "prec too big");
4404                             goto error;
4405                         }
4406                         prec = prec*10 + (c - '0');
4407                     }
4408                 }
4409             } /* prec */
4410             if (fmtcnt >= 0) {
4411                 if (c == 'h' || c == 'l' || c == 'L') {
4412                     if (--fmtcnt >= 0)
4413                         c = *fmt++;
4414                 }
4415             }
4416             if (fmtcnt < 0) {
4417                 PyErr_SetString(PyExc_ValueError,
4418                                 "incomplete format");
4419                 goto error;
4420             }
4421             if (c != '%') {
4422                 v = getnextarg(args, arglen, &argidx);
4423                 if (v == NULL)
4424                     goto error;
4425             }
4426             sign = 0;
4427             fill = ' ';
4428             switch (c) {
4429             case '%':
4430                 pbuf = "%";
4431                 len = 1;
4432                 break;
4433             case 's':
4434 #ifdef Py_USING_UNICODE
4435                 if (PyUnicode_Check(v)) {
4436                     fmt = fmt_start;
4437                     argidx = argidx_start;
4438                     goto unicode;
4439                 }
4440 #endif
4441                 temp = _PyObject_Str(v);
4442 #ifdef Py_USING_UNICODE
4443                 if (temp != NULL && PyUnicode_Check(temp)) {
4444                     Py_DECREF(temp);
4445                     fmt = fmt_start;
4446                     argidx = argidx_start;
4447                     goto unicode;
4448                 }
4449 #endif
4450                 /* Fall through */
4451             case 'r':
4452                 if (c == 'r')
4453                     temp = PyObject_Repr(v);
4454                 if (temp == NULL)
4455                     goto error;
4456                 if (!PyString_Check(temp)) {
4457                     PyErr_SetString(PyExc_TypeError,
4458                       "%s argument has non-string str()");
4459                     Py_DECREF(temp);
4460                     goto error;
4461                 }
4462                 pbuf = PyString_AS_STRING(temp);
4463                 len = PyString_GET_SIZE(temp);
4464                 if (prec >= 0 && len > prec)
4465                     len = prec;
4466                 break;
4467             case 'i':
4468             case 'd':
4469             case 'u':
4470             case 'o':
4471             case 'x':
4472             case 'X':
4473                 if (c == 'i')
4474                     c = 'd';
4475                 isnumok = 0;
4476                 if (PyNumber_Check(v)) {
4477                     PyObject *iobj=NULL;
4478
4479                     if (PyInt_Check(v) || (PyLong_Check(v))) {
4480                         iobj = v;
4481                         Py_INCREF(iobj);
4482                     }
4483                     else {
4484                         iobj = PyNumber_Int(v);
4485                         if (iobj==NULL) iobj = PyNumber_Long(v);
4486                     }
4487                     if (iobj!=NULL) {
4488                         if (PyInt_Check(iobj)) {
4489                             isnumok = 1;
4490                             pbuf = formatbuf;
4491                             len = formatint(pbuf,
4492                                             sizeof(formatbuf),
4493                                             flags, prec, c, iobj);
4494                             Py_DECREF(iobj);
4495                             if (len < 0)
4496                                 goto error;
4497                             sign = 1;
4498                         }
4499                         else if (PyLong_Check(iobj)) {
4500                             int ilen;
4501
4502                             isnumok = 1;
4503                             temp = _PyString_FormatLong(iobj, flags,
4504                                 prec, c, &pbuf, &ilen);
4505                             Py_DECREF(iobj);
4506                             len = ilen;
4507                             if (!temp)
4508                                 goto error;
4509                             sign = 1;
4510                         }
4511                         else {
4512                             Py_DECREF(iobj);
4513                         }
4514                     }
4515                 }
4516                 if (!isnumok) {
4517                     PyErr_Format(PyExc_TypeError,
4518                         "%%%c format: a number is required, "
4519                         "not %.200s", c, Py_TYPE(v)->tp_name);
4520                     goto error;
4521                 }
4522                 if (flags & F_ZERO)
4523                     fill = '0';
4524                 break;
4525             case 'e':
4526             case 'E':
4527             case 'f':
4528             case 'F':
4529             case 'g':
4530             case 'G':
4531                 temp = formatfloat(v, flags, prec, c);
4532                 if (temp == NULL)
4533                     goto error;
4534                 pbuf = PyString_AS_STRING(temp);
4535                 len = PyString_GET_SIZE(temp);
4536                 sign = 1;
4537                 if (flags & F_ZERO)
4538                     fill = '0';
4539                 break;
4540             case 'c':
4541 #ifdef Py_USING_UNICODE
4542                 if (PyUnicode_Check(v)) {
4543                     fmt = fmt_start;
4544                     argidx = argidx_start;
4545                     goto unicode;
4546                 }
4547 #endif
4548                 pbuf = formatbuf;
4549                 len = formatchar(pbuf, sizeof(formatbuf), v);
4550                 if (len < 0)
4551                     goto error;
4552                 break;
4553             default:
4554                 PyErr_Format(PyExc_ValueError,
4555                   "unsupported format character '%c' (0x%x) "
4556                   "at index %zd",
4557                   c, c,
4558                   (Py_ssize_t)(fmt - 1 -
4559                                PyString_AsString(format)));
4560                 goto error;
4561             }
4562             if (sign) {
4563                 if (*pbuf == '-' || *pbuf == '+') {
4564                     sign = *pbuf++;
4565                     len--;
4566                 }
4567                 else if (flags & F_SIGN)
4568                     sign = '+';
4569                 else if (flags & F_BLANK)
4570                     sign = ' ';
4571                 else
4572                     sign = 0;
4573             }
4574             if (width < len)
4575                 width = len;
4576             if (rescnt - (sign != 0) < width) {
4577                 reslen -= rescnt;
4578                 rescnt = width + fmtcnt + 100;
4579                 reslen += rescnt;
4580                 if (reslen < 0) {
4581                     Py_DECREF(result);
4582                     Py_XDECREF(temp);
4583                     return PyErr_NoMemory();
4584                 }
4585                 if (_PyString_Resize(&result, reslen)) {
4586                     Py_XDECREF(temp);
4587                     return NULL;
4588                 }
4589                 res = PyString_AS_STRING(result)
4590                     + reslen - rescnt;
4591             }
4592             if (sign) {
4593                 if (fill != ' ')
4594                     *res++ = sign;
4595                 rescnt--;
4596                 if (width > len)
4597                     width--;
4598             }
4599             if ((flags & F_ALT) && (c == 'x' || c == 'X')) {
4600                 assert(pbuf[0] == '0');
4601                 assert(pbuf[1] == c);
4602                 if (fill != ' ') {
4603                     *res++ = *pbuf++;
4604                     *res++ = *pbuf++;
4605                 }
4606                 rescnt -= 2;
4607                 width -= 2;
4608                 if (width < 0)
4609                     width = 0;
4610                 len -= 2;
4611             }
4612             if (width > len && !(flags & F_LJUST)) {
4613                 do {
4614                     --rescnt;
4615                     *res++ = fill;
4616                 } while (--width > len);
4617             }
4618             if (fill == ' ') {
4619                 if (sign)
4620                     *res++ = sign;
4621                 if ((flags & F_ALT) &&
4622                     (c == 'x' || c == 'X')) {
4623                     assert(pbuf[0] == '0');
4624                     assert(pbuf[1] == c);
4625                     *res++ = *pbuf++;
4626                     *res++ = *pbuf++;
4627                 }
4628             }
4629             Py_MEMCPY(res, pbuf, len);
4630             res += len;
4631             rescnt -= len;
4632             while (--width >= len) {
4633                 --rescnt;
4634                 *res++ = ' ';
4635             }
4636             if (dict && (argidx < arglen) && c != '%') {
4637                 PyErr_SetString(PyExc_TypeError,
4638                            "not all arguments converted during string formatting");
4639                 Py_XDECREF(temp);
4640                 goto error;
4641             }
4642             Py_XDECREF(temp);
4643         } /* '%' */
4644     } /* until end */
4645     if (argidx < arglen && !dict) {
4646         PyErr_SetString(PyExc_TypeError,
4647                         "not all arguments converted during string formatting");
4648         goto error;
4649     }
4650     if (args_owned) {
4651         Py_DECREF(args);
4652     }
4653     if (_PyString_Resize(&result, reslen - rescnt))
4654         return NULL;
4655     return result;
4656
4657 #ifdef Py_USING_UNICODE
4658  unicode:
4659     if (args_owned) {
4660         Py_DECREF(args);
4661         args_owned = 0;
4662     }
4663     /* Fiddle args right (remove the first argidx arguments) */
4664     if (PyTuple_Check(orig_args) && argidx > 0) {
4665         PyObject *v;
4666         Py_ssize_t n = PyTuple_GET_SIZE(orig_args) - argidx;
4667         v = PyTuple_New(n);
4668         if (v == NULL)
4669             goto error;
4670         while (--n >= 0) {
4671             PyObject *w = PyTuple_GET_ITEM(orig_args, n + argidx);
4672             Py_INCREF(w);
4673             PyTuple_SET_ITEM(v, n, w);
4674         }
4675         args = v;
4676     } else {
4677         Py_INCREF(orig_args);
4678         args = orig_args;
4679     }
4680     args_owned = 1;
4681     /* Take what we have of the result and let the Unicode formatting
4682        function format the rest of the input. */
4683     rescnt = res - PyString_AS_STRING(result);
4684     if (_PyString_Resize(&result, rescnt))
4685         goto error;
4686     fmtcnt = PyString_GET_SIZE(format) - \
4687              (fmt - PyString_AS_STRING(format));
4688     format = PyUnicode_Decode(fmt, fmtcnt, NULL, NULL);
4689     if (format == NULL)
4690         goto error;
4691     v = PyUnicode_Format(format, args);
4692     Py_DECREF(format);
4693     if (v == NULL)
4694         goto error;
4695     /* Paste what we have (result) to what the Unicode formatting
4696        function returned (v) and return the result (or error) */
4697     w = PyUnicode_Concat(result, v);
4698     Py_DECREF(result);
4699     Py_DECREF(v);
4700     Py_DECREF(args);
4701     return w;
4702 #endif /* Py_USING_UNICODE */
4703
4704  error:
4705     Py_DECREF(result);
4706     if (args_owned) {
4707         Py_DECREF(args);
4708     }
4709     return NULL;
4710 }
4711
4712 void
4713 PyString_InternInPlace(PyObject **p)
4714 {
4715     register PyStringObject *s = (PyStringObject *)(*p);
4716     PyObject *t;
4717     if (s == NULL || !PyString_Check(s))
4718         Py_FatalError("PyString_InternInPlace: strings only please!");
4719     /* If it's a string subclass, we don't really know what putting
4720        it in the interned dict might do. */
4721     if (!PyString_CheckExact(s))
4722         return;
4723     if (PyString_CHECK_INTERNED(s))
4724         return;
4725     if (interned == NULL) {
4726         interned = PyDict_New();
4727         if (interned == NULL) {
4728             PyErr_Clear(); /* Don't leave an exception */
4729             return;
4730         }
4731     }
4732     t = PyDict_GetItem(interned, (PyObject *)s);
4733     if (t) {
4734         Py_INCREF(t);
4735         Py_DECREF(*p);
4736         *p = t;
4737         return;
4738     }
4739
4740     if (PyDict_SetItem(interned, (PyObject *)s, (PyObject *)s) < 0) {
4741         PyErr_Clear();
4742         return;
4743     }
4744     /* The two references in interned are not counted by refcnt.
4745        The string deallocator will take care of this */
4746     Py_REFCNT(s) -= 2;
4747     PyString_CHECK_INTERNED(s) = SSTATE_INTERNED_MORTAL;
4748 }
4749
4750 void
4751 PyString_InternImmortal(PyObject **p)
4752 {
4753     PyString_InternInPlace(p);
4754     if (PyString_CHECK_INTERNED(*p) != SSTATE_INTERNED_IMMORTAL) {
4755         PyString_CHECK_INTERNED(*p) = SSTATE_INTERNED_IMMORTAL;
4756         Py_INCREF(*p);
4757     }
4758 }
4759
4760
4761 PyObject *
4762 PyString_InternFromString(const char *cp)
4763 {
4764     PyObject *s = PyString_FromString(cp);
4765     if (s == NULL)
4766         return NULL;
4767     PyString_InternInPlace(&s);
4768     return s;
4769 }
4770
4771 void
4772 PyString_Fini(void)
4773 {
4774     int i;
4775     for (i = 0; i < UCHAR_MAX + 1; i++) {
4776         Py_XDECREF(characters[i]);
4777         characters[i] = NULL;
4778     }
4779     Py_XDECREF(nullstring);
4780     nullstring = NULL;
4781 }
4782
4783 void _Py_ReleaseInternedStrings(void)
4784 {
4785     PyObject *keys;
4786     PyStringObject *s;
4787     Py_ssize_t i, n;
4788     Py_ssize_t immortal_size = 0, mortal_size = 0;
4789
4790     if (interned == NULL || !PyDict_Check(interned))
4791         return;
4792     keys = PyDict_Keys(interned);
4793     if (keys == NULL || !PyList_Check(keys)) {
4794         PyErr_Clear();
4795         return;
4796     }
4797
4798     /* Since _Py_ReleaseInternedStrings() is intended to help a leak
4799        detector, interned strings are not forcibly deallocated; rather, we
4800        give them their stolen references back, and then clear and DECREF
4801        the interned dict. */
4802
4803     n = PyList_GET_SIZE(keys);
4804     fprintf(stderr, "releasing %" PY_FORMAT_SIZE_T "d interned strings\n",
4805         n);
4806     for (i = 0; i < n; i++) {
4807         s = (PyStringObject *) PyList_GET_ITEM(keys, i);
4808         switch (s->ob_sstate) {
4809         case SSTATE_NOT_INTERNED:
4810             /* XXX Shouldn't happen */
4811             break;
4812         case SSTATE_INTERNED_IMMORTAL:
4813             Py_REFCNT(s) += 1;
4814             immortal_size += Py_SIZE(s);
4815             break;
4816         case SSTATE_INTERNED_MORTAL:
4817             Py_REFCNT(s) += 2;
4818             mortal_size += Py_SIZE(s);
4819             break;
4820         default:
4821             Py_FatalError("Inconsistent interned string state.");
4822         }
4823         s->ob_sstate = SSTATE_NOT_INTERNED;
4824     }
4825     fprintf(stderr, "total size of all interned strings: "
4826                     "%" PY_FORMAT_SIZE_T "d/%" PY_FORMAT_SIZE_T "d "
4827                     "mortal/immortal\n", mortal_size, immortal_size);
4828     Py_DECREF(keys);
4829     PyDict_Clear(interned);
4830     Py_DECREF(interned);
4831     interned = NULL;
4832 }