Objects/stringobject.c

   1 /* String object implementation */
   2
   3 #include "Python.h"
   4
   5 #include <ctype.h>
   6
   7 #ifdef COUNT_ALLOCS
   8 int null_strings, one_strings;
   9 #endif
  10
  11 #if !defined(HAVE_LIMITS_H) && !defined(UCHAR_MAX)
  12 #define UCHAR_MAX 255
  13 #endif
  14
  15 static PyStringObject *characters[UCHAR_MAX + 1];
  16 #ifndef DONT_SHARE_SHORT_STRINGS
  17 static PyStringObject *nullstring;
  18 #endif
  19
  20 /*
  21    PyString_FromStringAndSize() and PyString_FromString() try in certain cases
  22    to share string objects.  When the size of the string is zero, these
  23    routines always return a pointer to the same string object; when the size
  24    is one, they return a pointer to an already existing object if the contents
  25    of the string is known.  For PyString_FromString() this is always the case,
  26    for PyString_FromStringAndSize() this is the case when the first argument
  27    in not NULL.
  28
  29    A common practice of allocating a string and then filling it in or changing
  30    it must be done carefully.  It is only allowed to change the contents of
  31    the string if the object was gotten from PyString_FromStringAndSize() with
  32    a NULL first argument, because in the future these routines may try to do
  33    even more sharing of objects.
  34
  35    The string in the  `str' parameter does not have to be null-character
  36    terminated.  (Therefore it is safe to construct a substring by using
  37    `PyString_FromStringAndSize(origstring, substrlen)'.)
  38
  39    The parameter `size' denotes number of characters to allocate, not
  40    counting the null terminating character.  If the `str' argument is
  41    not NULL, then it points to a of length `size'. For
  42    PyString_FromString, this string must be null-terminated.
  43
  44    The member `op->ob_size' denotes the number of bytes of data in the string,
  45    not counting the null terminating character, and is therefore equal to the
  46    `size' parameter.
  47 */
  48 PyObject *
  49 PyString_FromStringAndSize(const char *str, int size)
  50 {
  51         register PyStringObject *op;
  52 #ifndef DONT_SHARE_SHORT_STRINGS
  53         if (size == 0 && (op = nullstring) != NULL) {
  54 #ifdef COUNT_ALLOCS
  55                 null_strings++;
  56 #endif
  57                 Py_INCREF(op);
  58                 return (PyObject *)op;
  59         }
  60         if (size == 1 && str != NULL &&
  61             (op = characters[*str & UCHAR_MAX]) != NULL)
  62         {
  63 #ifdef COUNT_ALLOCS
  64                 one_strings++;
  65 #endif
  66                 Py_INCREF(op);
  67                 return (PyObject *)op;
  68         }
  69 #endif /* DONT_SHARE_SHORT_STRINGS */
  70
  71         /* PyObject_NewVar is inlined */
  72         op = (PyStringObject *)
  73                 PyObject_MALLOC(sizeof(PyStringObject) + size * sizeof(char));
  74         if (op == NULL)
  75                 return PyErr_NoMemory();
  76         PyObject_INIT_VAR(op, &PyString_Type, size);
  77 #ifdef CACHE_HASH
  78         op->ob_shash = -1;
  79 #endif
  80 #ifdef INTERN_STRINGS
  81         op->ob_sinterned = NULL;
  82 #endif
  83         if (str != NULL)
  84                 memcpy(op->ob_sval, str, size);
  85         op->ob_sval[size] = '\0';
  86 #ifndef DONT_SHARE_SHORT_STRINGS
  87         if (size == 0) {
  88                 PyObject *t = (PyObject *)op;
  89                 PyString_InternInPlace(&t);
  90                 op = (PyStringObject *)t;
  91                 nullstring = op;
  92                 Py_INCREF(op);
  93         } else if (size == 1 && str != NULL) {
  94                 PyObject *t = (PyObject *)op;
  95                 PyString_InternInPlace(&t);
  96                 op = (PyStringObject *)t;
  97                 characters[*str & UCHAR_MAX] = op;
  98                 Py_INCREF(op);
  99         }
 100 #endif
 101         return (PyObject *) op;
 102 }
 103
 104 PyObject *
 105 PyString_FromString(const char *str)
 106 {
 107         register size_t size;
 108         register PyStringObject *op;
 109
 110         assert(str != NULL);
 111         size = strlen(str);
 112         if (size > INT_MAX) {
 113                 PyErr_SetString(PyExc_OverflowError,
 114                         "string is too long for a Python string");
 115                 return NULL;
 116         }
 117 #ifndef DONT_SHARE_SHORT_STRINGS
 118         if (size == 0 && (op = nullstring) != NULL) {
 119 #ifdef COUNT_ALLOCS
 120                 null_strings++;
 121 #endif
 122                 Py_INCREF(op);
 123                 return (PyObject *)op;
 124         }
 125         if (size == 1 && (op = characters[*str & UCHAR_MAX]) != NULL) {
 126 #ifdef COUNT_ALLOCS
 127                 one_strings++;
 128 #endif
 129                 Py_INCREF(op);
 130                 return (PyObject *)op;
 131         }
 132 #endif /* DONT_SHARE_SHORT_STRINGS */
 133
 134         /* PyObject_NewVar is inlined */
 135         op = (PyStringObject *)
 136                 PyObject_MALLOC(sizeof(PyStringObject) + size * sizeof(char));
 137         if (op == NULL)
 138                 return PyErr_NoMemory();
 139         PyObject_INIT_VAR(op, &PyString_Type, size);
 140 #ifdef CACHE_HASH
 141         op->ob_shash = -1;
 142 #endif
 143 #ifdef INTERN_STRINGS
 144         op->ob_sinterned = NULL;
 145 #endif
 146         memcpy(op->ob_sval, str, size+1);
 147 #ifndef DONT_SHARE_SHORT_STRINGS
 148         if (size == 0) {
 149                 PyObject *t = (PyObject *)op;
 150                 PyString_InternInPlace(&t);
 151                 op = (PyStringObject *)t;
 152                 nullstring = op;
 153                 Py_INCREF(op);
 154         } else if (size == 1) {
 155                 PyObject *t = (PyObject *)op;
 156                 PyString_InternInPlace(&t);
 157                 op = (PyStringObject *)t;
 158                 characters[*str & UCHAR_MAX] = op;
 159                 Py_INCREF(op);
 160         }
 161 #endif
 162         return (PyObject *) op;
 163 }
 164
 165 PyObject *
 166 PyString_FromFormatV(const char *format, va_list vargs)
 167 {
 168         va_list count;
 169         int n = 0;
 170         const char* f;
 171         char *s;
 172         PyObject* string;
 173
 174 #ifdef VA_LIST_IS_ARRAY
 175         memcpy(count, vargs, sizeof(va_list));
 176 #else
 177 #ifdef  __va_copy
 178         __va_copy(count, vargs);
 179 #else
 180         count = vargs;
 181 #endif
 182 #endif
 183         /* step 1: figure out how large a buffer we need */
 184         for (f = format; *f; f++) {
 185                 if (*f == '%') {
 186                         const char* p = f;
 187                         while (*++f && *f != '%' && !isalpha(Py_CHARMASK(*f)))
 188                                 ;
 189
 190                         /* skip the 'l' in %ld, since it doesn't change the
 191                            width.  although only %d is supported (see
 192                            "expand" section below), others can be easily
 193                            added */
 194                         if (*f == 'l' && *(f+1) == 'd')
 195                                 ++f;
 196
 197                         switch (*f) {
 198                         case 'c':
 199                                 (void)va_arg(count, int);
 200                                 /* fall through... */
 201                         case '%':
 202                                 n++;
 203                                 break;
 204                         case 'd': case 'i': case 'x':
 205                                 (void) va_arg(count, int);
 206                                 /* 20 bytes is enough to hold a 64-bit
 207                                    integer.  Decimal takes the most space.
 208                                    This isn't enough for octal. */
 209                                 n += 20;
 210                                 break;
 211                         case 's':
 212                                 s = va_arg(count, char*);
 213                                 n += strlen(s);
 214                                 break;
 215                         case 'p':
 216                                 (void) va_arg(count, int);
 217                                 /* maximum 64-bit pointer representation:
 218                                  * 0xffffffffffffffff
 219                                  * so 19 characters is enough.
 220                                  * XXX I count 18 -- what's the extra for?
 221                                  */
 222                                 n += 19;
 223                                 break;
 224                         default:
 225                                 /* if we stumble upon an unknown
 226                                    formatting code, copy the rest of
 227                                    the format string to the output
 228                                    string. (we cannot just skip the
 229                                    code, since there's no way to know
 230                                    what's in the argument list) */
 231                                 n += strlen(p);
 232                                 goto expand;
 233                         }
 234                 } else
 235                         n++;
 236         }
 237  expand:
 238         /* step 2: fill the buffer */
 239         /* Since we've analyzed how much space we need for the worst case,
 240            use sprintf directly instead of the slower PyOS_snprintf. */
 241         string = PyString_FromStringAndSize(NULL, n);
 242         if (!string)
 243                 return NULL;
 244
 245         s = PyString_AsString(string);
 246
 247         for (f = format; *f; f++) {
 248                 if (*f == '%') {
 249                         const char* p = f++;
 250                         int i, longflag = 0;
 251                         /* parse the width.precision part (we're only
 252                            interested in the precision value, if any) */
 253                         n = 0;
 254                         while (isdigit(Py_CHARMASK(*f)))
 255                                 n = (n*10) + *f++ - '0';
 256                         if (*f == '.') {
 257                                 f++;
 258                                 n = 0;
 259                                 while (isdigit(Py_CHARMASK(*f)))
 260                                         n = (n*10) + *f++ - '0';
 261                         }
 262                         while (*f && *f != '%' && !isalpha(Py_CHARMASK(*f)))
 263                                 f++;
 264                         /* handle the long flag, but only for %ld.  others
 265                            can be added when necessary. */
 266                         if (*f == 'l' && *(f+1) == 'd') {
 267                                 longflag = 1;
 268                                 ++f;
 269                         }
 270
 271                         switch (*f) {
 272                         case 'c':
 273                                 *s++ = va_arg(vargs, int);
 274                                 break;
 275                         case 'd':
 276                                 if (longflag)
 277                                         sprintf(s, "%ld", va_arg(vargs, long));
 278                                 else
 279                                         sprintf(s, "%d", va_arg(vargs, int));
 280                                 s += strlen(s);
 281                                 break;
 282                         case 'i':
 283                                 sprintf(s, "%i", va_arg(vargs, int));
 284                                 s += strlen(s);
 285                                 break;
 286                         case 'x':
 287                                 sprintf(s, "%x", va_arg(vargs, int));
 288                                 s += strlen(s);
 289                                 break;
 290                         case 's':
 291                                 p = va_arg(vargs, char*);
 292                                 i = strlen(p);
 293                                 if (n > 0 && i > n)
 294                                         i = n;
 295                                 memcpy(s, p, i);
 296                                 s += i;
 297                                 break;
 298                         case 'p':
 299                                 sprintf(s, "%p", va_arg(vargs, void*));
 300                                 /* %p is ill-defined:  ensure leading 0x. */
 301                                 if (s[1] == 'X')
 302                                         s[1] = 'x';
 303                                 else if (s[1] != 'x') {
 304                                         memmove(s+2, s, strlen(s)+1);
 305                                         s[0] = '0';
 306                                         s[1] = 'x';
 307                                 }
 308                                 s += strlen(s);
 309                                 break;
 310                         case '%':
 311                                 *s++ = '%';
 312                                 break;
 313                         default:
 314                                 strcpy(s, p);
 315                                 s += strlen(s);
 316                                 goto end;
 317                         }
 318                 } else
 319                         *s++ = *f;
 320         }
 321
 322  end:
 323         _PyString_Resize(&string, s - PyString_AS_STRING(string));
 324         return string;
 325 }
 326
 327 PyObject *
 328 PyString_FromFormat(const char *format, ...)
 329 {
 330         PyObject* ret;
 331         va_list vargs;
 332
 333 #ifdef HAVE_STDARG_PROTOTYPES
 334         va_start(vargs, format);
 335 #else
 336         va_start(vargs);
 337 #endif
 338         ret = PyString_FromFormatV(format, vargs);
 339         va_end(vargs);
 340         return ret;
 341 }
 342
 343
 344 PyObject *PyString_Decode(const char *s,
 345                           int size,
 346                           const char *encoding,
 347                           const char *errors)
 348 {
 349     PyObject *v, *str;
 350
 351     str = PyString_FromStringAndSize(s, size);
 352     if (str == NULL)
 353         return NULL;
 354     v = PyString_AsDecodedString(str, encoding, errors);
 355     Py_DECREF(str);
 356     return v;
 357 }
 358
 359 PyObject *PyString_AsDecodedObject(PyObject *str,
 360                                    const char *encoding,
 361                                    const char *errors)
 362 {
 363     PyObject *v;
 364
 365     if (!PyString_Check(str)) {
 366         PyErr_BadArgument();
 367         goto onError;
 368     }
 369
 370     if (encoding == NULL) {
 371 #ifdef Py_USING_UNICODE
 372         encoding = PyUnicode_GetDefaultEncoding();
 373 #else
 374         PyErr_SetString(PyExc_ValueError, "no encoding specified");
 375         goto onError;
 376 #endif
 377     }
 378
 379     /* Decode via the codec registry */
 380     v = PyCodec_Decode(str, encoding, errors);
 381     if (v == NULL)
 382         goto onError;
 383
 384     return v;
 385
 386  onError:
 387     return NULL;
 388 }
 389
 390 PyObject *PyString_AsDecodedString(PyObject *str,
 391                                    const char *encoding,
 392                                    const char *errors)
 393 {
 394     PyObject *v;
 395
 396     v = PyString_AsDecodedObject(str, encoding, errors);
 397     if (v == NULL)
 398         goto onError;
 399
 400 #ifdef Py_USING_UNICODE
 401     /* Convert Unicode to a string using the default encoding */
 402     if (PyUnicode_Check(v)) {
 403         PyObject *temp = v;
 404         v = PyUnicode_AsEncodedString(v, NULL, NULL);
 405         Py_DECREF(temp);
 406         if (v == NULL)
 407             goto onError;
 408     }
 409 #endif
 410     if (!PyString_Check(v)) {
 411         PyErr_Format(PyExc_TypeError,
 412                      "decoder did not return a string object (type=%.400s)",
 413                      v->ob_type->tp_name);
 414         Py_DECREF(v);
 415         goto onError;
 416     }
 417
 418     return v;
 419
 420  onError:
 421     return NULL;
 422 }
 423
 424 PyObject *PyString_Encode(const char *s,
 425                           int size,
 426                           const char *encoding,
 427                           const char *errors)
 428 {
 429     PyObject *v, *str;
 430
 431     str = PyString_FromStringAndSize(s, size);
 432     if (str == NULL)
 433         return NULL;
 434     v = PyString_AsEncodedString(str, encoding, errors);
 435     Py_DECREF(str);
 436     return v;
 437 }
 438
 439 PyObject *PyString_AsEncodedObject(PyObject *str,
 440                                    const char *encoding,
 441                                    const char *errors)
 442 {
 443     PyObject *v;
 444
 445     if (!PyString_Check(str)) {
 446         PyErr_BadArgument();
 447         goto onError;
 448     }
 449
 450     if (encoding == NULL) {
 451 #ifdef Py_USING_UNICODE
 452         encoding = PyUnicode_GetDefaultEncoding();
 453 #else
 454         PyErr_SetString(PyExc_ValueError, "no encoding specified");
 455         goto onError;
 456 #endif
 457     }
 458
 459     /* Encode via the codec registry */
 460     v = PyCodec_Encode(str, encoding, errors);
 461     if (v == NULL)
 462         goto onError;
 463
 464     return v;
 465
 466  onError:
 467     return NULL;
 468 }
 469
 470 PyObject *PyString_AsEncodedString(PyObject *str,
 471                                    const char *encoding,
 472                                    const char *errors)
 473 {
 474     PyObject *v;
 475
 476     v = PyString_AsEncodedObject(str, encoding, errors);
 477     if (v == NULL)
 478         goto onError;
 479
 480 #ifdef Py_USING_UNICODE
 481     /* Convert Unicode to a string using the default encoding */
 482     if (PyUnicode_Check(v)) {
 483         PyObject *temp = v;
 484         v = PyUnicode_AsEncodedString(v, NULL, NULL);
 485         Py_DECREF(temp);
 486         if (v == NULL)
 487             goto onError;
 488     }
 489 #endif
 490     if (!PyString_Check(v)) {
 491         PyErr_Format(PyExc_TypeError,
 492                      "encoder did not return a string object (type=%.400s)",
 493                      v->ob_type->tp_name);
 494         Py_DECREF(v);
 495         goto onError;
 496     }
 497
 498     return v;
 499
 500  onError:
 501     return NULL;
 502 }
 503
 504 static void
 505 string_dealloc(PyObject *op)
 506 {
 507         op->ob_type->tp_free(op);
 508 }
 509
 510 static int
 511 string_getsize(register PyObject *op)
 512 {
 513         char *s;
 514         int len;
 515         if (PyString_AsStringAndSize(op, &s, &len))
 516                 return -1;
 517         return len;
 518 }
 519
 520 static /*const*/ char *
 521 string_getbuffer(register PyObject *op)
 522 {
 523         char *s;
 524         int len;
 525         if (PyString_AsStringAndSize(op, &s, &len))
 526                 return NULL;
 527         return s;
 528 }
 529
 530 int
 531 PyString_Size(register PyObject *op)
 532 {
 533         if (!PyString_Check(op))
 534                 return string_getsize(op);
 535         return ((PyStringObject *)op) -> ob_size;
 536 }
 537
 538 /*const*/ char *
 539 PyString_AsString(register PyObject *op)
 540 {
 541         if (!PyString_Check(op))
 542                 return string_getbuffer(op);
 543         return ((PyStringObject *)op) -> ob_sval;
 544 }
 545
 546 int
 547 PyString_AsStringAndSize(register PyObject *obj,
 548                          register char **s,
 549                          register int *len)
 550 {
 551         if (s == NULL) {
 552                 PyErr_BadInternalCall();
 553                 return -1;
 554         }
 555
 556         if (!PyString_Check(obj)) {
 557 #ifdef Py_USING_UNICODE
 558                 if (PyUnicode_Check(obj)) {
 559                         obj = _PyUnicode_AsDefaultEncodedString(obj, NULL);
 560                         if (obj == NULL)
 561                                 return -1;
 562                 }
 563                 else
 564 #endif
 565                 {
 566                         PyErr_Format(PyExc_TypeError,
 567                                      "expected string or Unicode object, "
 568                                      "%.200s found", obj->ob_type->tp_name);
 569                         return -1;
 570                 }
 571         }
 572
 573         *s = PyString_AS_STRING(obj);
 574         if (len != NULL)
 575                 *len = PyString_GET_SIZE(obj);
 576         else if ((int)strlen(*s) != PyString_GET_SIZE(obj)) {
 577                 PyErr_SetString(PyExc_TypeError,
 578                                 "expected string without null bytes");
 579                 return -1;
 580         }
 581         return 0;
 582 }
 583
 584 /* Methods */
 585
 586 static int
 587 string_print(PyStringObject *op, FILE *fp, int flags)
 588 {
 589         int i;
 590         char c;
 591         int quote;
 592
 593         /* XXX Ought to check for interrupts when writing long strings */
 594         if (! PyString_CheckExact(op)) {
 595                 int ret;
 596                 /* A str subclass may have its own __str__ method. */
 597                 op = (PyStringObject *) PyObject_Str((PyObject *)op);
 598                 if (op == NULL)
 599                         return -1;
 600                 ret = string_print(op, fp, flags);
 601                 Py_DECREF(op);
 602                 return ret;
 603         }
 604         if (flags & Py_PRINT_RAW) {
 605                 fwrite(op->ob_sval, 1, (int) op->ob_size, fp);
 606                 return 0;
 607         }
 608
 609         /* figure out which quote to use; single is preferred */
 610         quote = '\'';
 611         if (strchr(op->ob_sval, '\'') &&
 612             !strchr(op->ob_sval, '"'))
 613                 quote = '"';
 614
 615         fputc(quote, fp);
 616         for (i = 0; i < op->ob_size; i++) {
 617                 c = op->ob_sval[i];
 618                 if (c == quote || c == '\\')
 619                         fprintf(fp, "\\%c", c);
 620                 else if (c == '\t')
 621                         fprintf(fp, "\\t");
 622                 else if (c == '\n')
 623                         fprintf(fp, "\\n");
 624                 else if (c == '\r')
 625                         fprintf(fp, "\\r");
 626                 else if (c < ' ' || c >= 0x7f)
 627                         fprintf(fp, "\\x%02x", c & 0xff);
 628                 else
 629                         fputc(c, fp);
 630         }
 631         fputc(quote, fp);
 632         return 0;
 633 }
 634
 635 static PyObject *
 636 string_repr(register PyStringObject *op)
 637 {
 638         size_t newsize = 2 + 4 * op->ob_size * sizeof(char);
 639         PyObject *v;
 640         if (newsize > INT_MAX) {
 641                 PyErr_SetString(PyExc_OverflowError,
 642                         "string is too large to make repr");
 643         }
 644         v = PyString_FromStringAndSize((char *)NULL, newsize);
 645         if (v == NULL) {
 646                 return NULL;
 647         }
 648         else {
 649                 register int i;
 650                 register char c;
 651                 register char *p;
 652                 int quote;
 653
 654                 /* figure out which quote to use; single is preferred */
 655                 quote = '\'';
 656                 if (strchr(op->ob_sval, '\'') &&
 657                     !strchr(op->ob_sval, '"'))
 658                         quote = '"';
 659
 660                 p = PyString_AS_STRING(v);
 661                 *p++ = quote;
 662                 for (i = 0; i < op->ob_size; i++) {
 663                         /* There's at least enough room for a hex escape
 664                            and a closing quote. */
 665                         assert(newsize - (p - PyString_AS_STRING(v)) >= 5);
 666                         c = op->ob_sval[i];
 667                         if (c == quote || c == '\\')
 668                                 *p++ = '\\', *p++ = c;
 669                         else if (c == '\t')
 670                                 *p++ = '\\', *p++ = 't';
 671                         else if (c == '\n')
 672                                 *p++ = '\\', *p++ = 'n';
 673                         else if (c == '\r')
 674                                 *p++ = '\\', *p++ = 'r';
 675                         else if (c < ' ' || c >= 0x7f) {
 676                                 /* For performance, we don't want to call
 677                                    PyOS_snprintf here (extra layers of
 678                                    function call). */
 679                                 sprintf(p, "\\x%02x", c & 0xff);
 680                                 p += 4;
 681                         }
 682                         else
 683                                 *p++ = c;
 684                 }
 685                 assert(newsize - (p - PyString_AS_STRING(v)) >= 1);
 686                 *p++ = quote;
 687                 *p = '\0';
 688                 _PyString_Resize(
 689                         &v, (int) (p - PyString_AS_STRING(v)));
 690                 return v;
 691         }
 692 }
 693
 694 static PyObject *
 695 string_str(PyObject *s)
 696 {
 697         assert(PyString_Check(s));
 698         if (PyString_CheckExact(s)) {
 699                 Py_INCREF(s);
 700                 return s;
 701         }
 702         else {
 703                 /* Subtype -- return genuine string with the same value. */
 704                 PyStringObject *t = (PyStringObject *) s;
 705                 return PyString_FromStringAndSize(t->ob_sval, t->ob_size);
 706         }
 707 }
 708
 709 static int
 710 string_length(PyStringObject *a)
 711 {
 712         return a->ob_size;
 713 }
 714
 715 static PyObject *
 716 string_concat(register PyStringObject *a, register PyObject *bb)
 717 {
 718         register unsigned int size;
 719         register PyStringObject *op;
 720         if (!PyString_Check(bb)) {
 721 #ifdef Py_USING_UNICODE
 722                 if (PyUnicode_Check(bb))
 723                     return PyUnicode_Concat((PyObject *)a, bb);
 724 #endif
 725                 PyErr_Format(PyExc_TypeError,
 726                              "cannot concatenate 'str' and '%.200s' objects",
 727                              bb->ob_type->tp_name);
 728                 return NULL;
 729         }
 730 #define b ((PyStringObject *)bb)
 731         /* Optimize cases with empty left or right operand */
 732         if ((a->ob_size == 0 || b->ob_size == 0) &&
 733             PyString_CheckExact(a) && PyString_CheckExact(b)) {
 734                 if (a->ob_size == 0) {
 735                         Py_INCREF(bb);
 736                         return bb;
 737                 }
 738                 Py_INCREF(a);
 739                 return (PyObject *)a;
 740         }
 741         size = a->ob_size + b->ob_size;
 742         /* PyObject_NewVar is inlined */
 743         op = (PyStringObject *)
 744                 PyObject_MALLOC(sizeof(PyStringObject) + size * sizeof(char));
 745         if (op == NULL)
 746                 return PyErr_NoMemory();
 747         PyObject_INIT_VAR(op, &PyString_Type, size);
 748 #ifdef CACHE_HASH
 749         op->ob_shash = -1;
 750 #endif
 751 #ifdef INTERN_STRINGS
 752         op->ob_sinterned = NULL;
 753 #endif
 754         memcpy(op->ob_sval, a->ob_sval, (int) a->ob_size);
 755         memcpy(op->ob_sval + a->ob_size, b->ob_sval, (int) b->ob_size);
 756         op->ob_sval[size] = '\0';
 757         return (PyObject *) op;
 758 #undef b
 759 }
 760
 761 static PyObject *
 762 string_repeat(register PyStringObject *a, register int n)
 763 {
 764         register int i;
 765         register int size;
 766         register PyStringObject *op;
 767         size_t nbytes;
 768         if (n < 0)
 769                 n = 0;
 770         /* watch out for overflows:  the size can overflow int,
 771          * and the # of bytes needed can overflow size_t
 772          */
 773         size = a->ob_size * n;
 774         if (n && size / n != a->ob_size) {
 775                 PyErr_SetString(PyExc_OverflowError,
 776                         "repeated string is too long");
 777                 return NULL;
 778         }
 779         if (size == a->ob_size && PyString_CheckExact(a)) {
 780                 Py_INCREF(a);
 781                 return (PyObject *)a;
 782         }
 783         nbytes = size * sizeof(char);
 784         if (nbytes / sizeof(char) != (size_t)size ||
 785             nbytes + sizeof(PyStringObject) <= nbytes) {
 786                 PyErr_SetString(PyExc_OverflowError,
 787                         "repeated string is too long");
 788                 return NULL;
 789         }
 790         op = (PyStringObject *)
 791                 PyObject_MALLOC(sizeof(PyStringObject) + nbytes);
 792         if (op == NULL)
 793                 return PyErr_NoMemory();
 794         PyObject_INIT_VAR(op, &PyString_Type, size);
 795 #ifdef CACHE_HASH
 796         op->ob_shash = -1;
 797 #endif
 798 #ifdef INTERN_STRINGS
 799         op->ob_sinterned = NULL;
 800 #endif
 801         for (i = 0; i < size; i += a->ob_size)
 802                 memcpy(op->ob_sval+i, a->ob_sval, (int) a->ob_size);
 803         op->ob_sval[size] = '\0';
 804         return (PyObject *) op;
 805 }
 806
 807 /* String slice a[i:j] consists of characters a[i] ... a[j-1] */
 808
 809 static PyObject *
 810 string_slice(register PyStringObject *a, register int i, register int j)
 811      /* j -- may be negative! */
 812 {
 813         if (i < 0)
 814                 i = 0;
 815         if (j < 0)
 816                 j = 0; /* Avoid signed/unsigned bug in next line */
 817         if (j > a->ob_size)
 818                 j = a->ob_size;
 819         if (i == 0 && j == a->ob_size && PyString_CheckExact(a)) {
 820                 /* It's the same as a */
 821                 Py_INCREF(a);
 822                 return (PyObject *)a;
 823         }
 824         if (j < i)
 825                 j = i;
 826         return PyString_FromStringAndSize(a->ob_sval + i, (int) (j-i));
 827 }
 828
 829 static int
 830 string_contains(PyObject *a, PyObject *el)
 831 {
 832         register char *s, *end;
 833         register char c;
 834 #ifdef Py_USING_UNICODE
 835         if (PyUnicode_Check(el))
 836                 return PyUnicode_Contains(a, el);
 837 #endif
 838         if (!PyString_Check(el) || PyString_Size(el) != 1) {
 839                 PyErr_SetString(PyExc_TypeError,
 840                     "'in <string>' requires character as left operand");
 841                 return -1;
 842         }
 843         c = PyString_AsString(el)[0];
 844         s = PyString_AsString(a);
 845         end = s + PyString_Size(a);
 846         while (s < end) {
 847                 if (c == *s++)
 848                         return 1;
 849         }
 850         return 0;
 851 }
 852
 853 static PyObject *
 854 string_item(PyStringObject *a, register int i)
 855 {
 856         PyObject *v;
 857         char *pchar;
 858         if (i < 0 || i >= a->ob_size) {
 859                 PyErr_SetString(PyExc_IndexError, "string index out of range");
 860                 return NULL;
 861         }
 862         pchar = a->ob_sval + i;
 863         v = (PyObject *)characters[*pchar & UCHAR_MAX];
 864         if (v == NULL)
 865                 v = PyString_FromStringAndSize(pchar, 1);
 866         else {
 867 #ifdef COUNT_ALLOCS
 868                 one_strings++;
 869 #endif
 870                 Py_INCREF(v);
 871         }
 872         return v;
 873 }
 874
 875 static PyObject*
 876 string_richcompare(PyStringObject *a, PyStringObject *b, int op)
 877 {
 878         int c;
 879         int len_a, len_b;
 880         int min_len;
 881         PyObject *result;
 882
 883         /* Make sure both arguments are strings. */
 884         if (!(PyString_Check(a) && PyString_Check(b))) {
 885                 result = Py_NotImplemented;
 886                 goto out;
 887         }
 888         if (a == b) {
 889                 switch (op) {
 890                 case Py_EQ:case Py_LE:case Py_GE:
 891                         result = Py_True;
 892                         goto out;
 893                 case Py_NE:case Py_LT:case Py_GT:
 894                         result = Py_False;
 895                         goto out;
 896                 }
 897         }
 898         if (op == Py_EQ) {
 899                 /* Supporting Py_NE here as well does not save
 900                    much time, since Py_NE is rarely used.  */
 901                 if (a->ob_size == b->ob_size
 902                     && (a->ob_sval[0] == b->ob_sval[0]
 903                         && memcmp(a->ob_sval, b->ob_sval,
 904                                   a->ob_size) == 0)) {
 905                         result = Py_True;
 906                 } else {
 907                         result = Py_False;
 908                 }
 909                 goto out;
 910         }
 911         len_a = a->ob_size; len_b = b->ob_size;
 912         min_len = (len_a < len_b) ? len_a : len_b;
 913         if (min_len > 0) {
 914                 c = Py_CHARMASK(*a->ob_sval) - Py_CHARMASK(*b->ob_sval);
 915                 if (c==0)
 916                         c = memcmp(a->ob_sval, b->ob_sval, min_len);
 917         }else
 918                 c = 0;
 919         if (c == 0)
 920                 c = (len_a < len_b) ? -1 : (len_a > len_b) ? 1 : 0;
 921         switch (op) {
 922         case Py_LT: c = c <  0; break;
 923         case Py_LE: c = c <= 0; break;
 924         case Py_EQ: assert(0);  break; /* unreachable */
 925         case Py_NE: c = c != 0; break;
 926         case Py_GT: c = c >  0; break;
 927         case Py_GE: c = c >= 0; break;
 928         default:
 929                 result = Py_NotImplemented;
 930                 goto out;
 931         }
 932         result = c ? Py_True : Py_False;
 933   out:
 934         Py_INCREF(result);
 935         return result;
 936 }
 937
 938 int
 939 _PyString_Eq(PyObject *o1, PyObject *o2)
 940 {
 941         PyStringObject *a, *b;
 942         a = (PyStringObject*)o1;
 943         b = (PyStringObject*)o2;
 944         return a->ob_size == b->ob_size
 945           && *a->ob_sval == *b->ob_sval
 946           && memcmp(a->ob_sval, b->ob_sval, a->ob_size) == 0;
 947 }
 948
 949 static long
 950 string_hash(PyStringObject *a)
 951 {
 952         register int len;
 953         register unsigned char *p;
 954         register long x;
 955
 956 #ifdef CACHE_HASH
 957         if (a->ob_shash != -1)
 958                 return a->ob_shash;
 959 #ifdef INTERN_STRINGS
 960         if (a->ob_sinterned != NULL)
 961                 return (a->ob_shash =
 962                         ((PyStringObject *)(a->ob_sinterned))->ob_shash);
 963 #endif
 964 #endif
 965         len = a->ob_size;
 966         p = (unsigned char *) a->ob_sval;
 967         x = *p << 7;
 968         while (--len >= 0)
 969                 x = (1000003*x) ^ *p++;
 970         x ^= a->ob_size;
 971         if (x == -1)
 972                 x = -2;
 973 #ifdef CACHE_HASH
 974         a->ob_shash = x;
 975 #endif
 976         return x;
 977 }
 978
 979 static int
 980 string_buffer_getreadbuf(PyStringObject *self, int index, const void **ptr)
 981 {
 982         if ( index != 0 ) {
 983                 PyErr_SetString(PyExc_SystemError,
 984                                 "accessing non-existent string segment");
 985                 return -1;
 986         }
 987         *ptr = (void *)self->ob_sval;
 988         return self->ob_size;
 989 }
 990
 991 static int
 992 string_buffer_getwritebuf(PyStringObject *self, int index, const void **ptr)
 993 {
 994         PyErr_SetString(PyExc_TypeError,
 995                         "Cannot use string as modifiable buffer");
 996         return -1;
 997 }
 998
 999 static int
1000 string_buffer_getsegcount(PyStringObject *self, int *lenp)
1001 {
1002         if ( lenp )
1003                 *lenp = self->ob_size;
1004         return 1;
1005 }
1006
1007 static int
1008 string_buffer_getcharbuf(PyStringObject *self, int index, const char **ptr)
1009 {
1010         if ( index != 0 ) {
1011                 PyErr_SetString(PyExc_SystemError,
1012                                 "accessing non-existent string segment");
1013                 return -1;
1014         }
1015         *ptr = self->ob_sval;
1016         return self->ob_size;
1017 }
1018
1019 static PySequenceMethods string_as_sequence = {
1020         (inquiry)string_length, /*sq_length*/
1021         (binaryfunc)string_concat, /*sq_concat*/
1022         (intargfunc)string_repeat, /*sq_repeat*/
1023         (intargfunc)string_item, /*sq_item*/
1024         (intintargfunc)string_slice, /*sq_slice*/
1025         0,              /*sq_ass_item*/
1026         0,              /*sq_ass_slice*/
1027         (objobjproc)string_contains /*sq_contains*/
1028 };
1029
1030 static PyBufferProcs string_as_buffer = {
1031         (getreadbufferproc)string_buffer_getreadbuf,
1032         (getwritebufferproc)string_buffer_getwritebuf,
1033         (getsegcountproc)string_buffer_getsegcount,
1034         (getcharbufferproc)string_buffer_getcharbuf,
1035 };
1036
1037
1038 \f
1039 #define LEFTSTRIP 0
1040 #define RIGHTSTRIP 1
1041 #define BOTHSTRIP 2
1042
1043 /* Arrays indexed by above */
1044 static const char *stripformat[] = {"|O:lstrip", "|O:rstrip", "|O:strip"};
1045
1046 #define STRIPNAME(i) (stripformat[i]+3)
1047
1048
1049 static PyObject *
1050 split_whitespace(const char *s, int len, int maxsplit)
1051 {
1052         int i, j, err;
1053         PyObject* item;
1054         PyObject *list = PyList_New(0);
1055
1056         if (list == NULL)
1057                 return NULL;
1058
1059         for (i = j = 0; i < len; ) {
1060                 while (i < len && isspace(Py_CHARMASK(s[i])))
1061                         i++;
1062                 j = i;
1063                 while (i < len && !isspace(Py_CHARMASK(s[i])))
1064                         i++;
1065                 if (j < i) {
1066                         if (maxsplit-- <= 0)
1067                                 break;
1068                         item = PyString_FromStringAndSize(s+j, (int)(i-j));
1069                         if (item == NULL)
1070                                 goto finally;
1071                         err = PyList_Append(list, item);
1072                         Py_DECREF(item);
1073                         if (err < 0)
1074                                 goto finally;
1075                         while (i < len && isspace(Py_CHARMASK(s[i])))
1076                                 i++;
1077                         j = i;
1078                 }
1079         }
1080         if (j < len) {
1081                 item = PyString_FromStringAndSize(s+j, (int)(len - j));
1082                 if (item == NULL)
1083                         goto finally;
1084                 err = PyList_Append(list, item);
1085                 Py_DECREF(item);
1086                 if (err < 0)
1087                         goto finally;
1088         }
1089         return list;
1090   finally:
1091         Py_DECREF(list);
1092         return NULL;
1093 }
1094
1095
1096 static char split__doc__[] =
1097 "S.split([sep [,maxsplit]]) -> list of strings\n\
1098 \n\
1099 Return a list of the words in the string S, using sep as the\n\
1100 delimiter string.  If maxsplit is given, at most maxsplit\n\
1101 splits are done. If sep is not specified or is None, any\n\
1102 whitespace string is a separator.";
1103
1104 static PyObject *
1105 string_split(PyStringObject *self, PyObject *args)
1106 {
1107         int len = PyString_GET_SIZE(self), n, i, j, err;
1108         int maxsplit = -1;
1109         const char *s = PyString_AS_STRING(self), *sub;
1110         PyObject *list, *item, *subobj = Py_None;
1111
1112         if (!PyArg_ParseTuple(args, "|Oi:split", &subobj, &maxsplit))
1113                 return NULL;
1114         if (maxsplit < 0)
1115                 maxsplit = INT_MAX;
1116         if (subobj == Py_None)
1117                 return split_whitespace(s, len, maxsplit);
1118         if (PyString_Check(subobj)) {
1119                 sub = PyString_AS_STRING(subobj);
1120                 n = PyString_GET_SIZE(subobj);
1121         }
1122 #ifdef Py_USING_UNICODE
1123         else if (PyUnicode_Check(subobj))
1124                 return PyUnicode_Split((PyObject *)self, subobj, maxsplit);
1125 #endif
1126         else if (PyObject_AsCharBuffer(subobj, &sub, &n))
1127                 return NULL;
1128         if (n == 0) {
1129                 PyErr_SetString(PyExc_ValueError, "empty separator");
1130                 return NULL;
1131         }
1132
1133         list = PyList_New(0);
1134         if (list == NULL)
1135                 return NULL;
1136
1137         i = j = 0;
1138         while (i+n <= len) {
1139                 if (s[i] == sub[0] && memcmp(s+i, sub, n) == 0) {
1140                         if (maxsplit-- <= 0)
1141                                 break;
1142                         item = PyString_FromStringAndSize(s+j, (int)(i-j));
1143                         if (item == NULL)
1144                                 goto fail;
1145                         err = PyList_Append(list, item);
1146                         Py_DECREF(item);
1147                         if (err < 0)
1148                                 goto fail;
1149                         i = j = i + n;
1150                 }
1151                 else
1152                         i++;
1153         }
1154         item = PyString_FromStringAndSize(s+j, (int)(len-j));
1155         if (item == NULL)
1156                 goto fail;
1157         err = PyList_Append(list, item);
1158         Py_DECREF(item);
1159         if (err < 0)
1160                 goto fail;
1161
1162         return list;
1163
1164  fail:
1165         Py_DECREF(list);
1166         return NULL;
1167 }
1168
1169
1170 static char join__doc__[] =
1171 "S.join(sequence) -> string\n\
1172 \n\
1173 Return a string which is the concatenation of the strings in the\n\
1174 sequence.  The separator between elements is S.";
1175
1176 static PyObject *
1177 string_join(PyStringObject *self, PyObject *orig)
1178 {
1179         char *sep = PyString_AS_STRING(self);
1180         const int seplen = PyString_GET_SIZE(self);
1181         PyObject *res = NULL;
1182         char *p;
1183         int seqlen = 0;
1184         size_t sz = 0;
1185         int i;
1186         PyObject *seq, *item;
1187
1188         seq = PySequence_Fast(orig, "");
1189         if (seq == NULL) {
1190                 if (PyErr_ExceptionMatches(PyExc_TypeError))
1191                         PyErr_Format(PyExc_TypeError,
1192                                      "sequence expected, %.80s found",
1193                                      orig->ob_type->tp_name);
1194                 return NULL;
1195         }
1196
1197         seqlen = PySequence_Size(seq);
1198         if (seqlen == 0) {
1199                 Py_DECREF(seq);
1200                 return PyString_FromString("");
1201         }
1202         if (seqlen == 1) {
1203                 item = PySequence_Fast_GET_ITEM(seq, 0);
1204                 if (!PyString_Check(item) && !PyUnicode_Check(item)) {
1205                         PyErr_Format(PyExc_TypeError,
1206                                      "sequence item 0: expected string,"
1207                                      " %.80s found",
1208                                      item->ob_type->tp_name);
1209                         Py_DECREF(seq);
1210                         return NULL;
1211                 }
1212                 Py_INCREF(item);
1213                 Py_DECREF(seq);
1214                 return item;
1215         }
1216
1217         /* There are at least two things to join.  Do a pre-pass to figure out
1218          * the total amount of space we'll need (sz), see whether any argument
1219          * is absurd, and defer to the Unicode join if appropriate.
1220          */
1221         for (i = 0; i < seqlen; i++) {
1222                 const size_t old_sz = sz;
1223                 item = PySequence_Fast_GET_ITEM(seq, i);
1224                 if (!PyString_Check(item)){
1225 #ifdef Py_USING_UNICODE
1226                         if (PyUnicode_Check(item)) {
1227                                 /* Defer to Unicode join.
1228                                  * CAUTION:  There's no gurantee that the
1229                                  * original sequence can be iterated over
1230                                  * again, so we must pass seq here.
1231                                  */
1232                                 PyObject *result;
1233                                 result = PyUnicode_Join((PyObject *)self, seq);
1234                                 Py_DECREF(seq);
1235                                 return result;
1236                         }
1237 #endif
1238                         PyErr_Format(PyExc_TypeError,
1239                                      "sequence item %i: expected string,"
1240                                      " %.80s found",
1241                                      i, item->ob_type->tp_name);
1242                         Py_DECREF(seq);
1243                         return NULL;
1244                 }
1245                 sz += PyString_GET_SIZE(item);
1246                 if (i != 0)
1247                         sz += seplen;
1248                 if (sz < old_sz || sz > INT_MAX) {
1249                         PyErr_SetString(PyExc_OverflowError,
1250                                 "join() is too long for a Python string");
1251                         Py_DECREF(seq);
1252                         return NULL;
1253                 }
1254         }
1255
1256         /* Allocate result space. */
1257         res = PyString_FromStringAndSize((char*)NULL, (int)sz);
1258         if (res == NULL) {
1259                 Py_DECREF(seq);
1260                 return NULL;
1261         }
1262
1263         /* Catenate everything. */
1264         p = PyString_AS_STRING(res);
1265         for (i = 0; i < seqlen; ++i) {
1266                 size_t n;
1267                 item = PySequence_Fast_GET_ITEM(seq, i);
1268                 n = PyString_GET_SIZE(item);
1269                 memcpy(p, PyString_AS_STRING(item), n);
1270                 p += n;
1271                 if (i < seqlen - 1) {
1272                         memcpy(p, sep, seplen);
1273                         p += seplen;
1274                 }
1275         }
1276
1277         Py_DECREF(seq);
1278         return res;
1279 }
1280
1281 PyObject *
1282 _PyString_Join(PyObject *sep, PyObject *x)
1283 {
1284         assert(sep != NULL && PyString_Check(sep));
1285         assert(x != NULL);
1286         return string_join((PyStringObject *)sep, x);
1287 }
1288
1289 static long
1290 string_find_internal(PyStringObject *self, PyObject *args, int dir)
1291 {
1292         const char *s = PyString_AS_STRING(self), *sub;
1293         int len = PyString_GET_SIZE(self);
1294         int n, i = 0, last = INT_MAX;
1295         PyObject *subobj;
1296
1297         if (!PyArg_ParseTuple(args, "O|O&O&:find/rfind/index/rindex",
1298                 &subobj, _PyEval_SliceIndex, &i, _PyEval_SliceIndex, &last))
1299                 return -2;
1300         if (PyString_Check(subobj)) {
1301                 sub = PyString_AS_STRING(subobj);
1302                 n = PyString_GET_SIZE(subobj);
1303         }
1304 #ifdef Py_USING_UNICODE
1305         else if (PyUnicode_Check(subobj))
1306                 return PyUnicode_Find((PyObject *)self, subobj, i, last, dir);
1307 #endif
1308         else if (PyObject_AsCharBuffer(subobj, &sub, &n))
1309                 return -2;
1310
1311         if (last > len)
1312                 last = len;
1313         if (last < 0)
1314                 last += len;
1315         if (last < 0)
1316                 last = 0;
1317         if (i < 0)
1318                 i += len;
1319         if (i < 0)
1320                 i = 0;
1321
1322         if (dir > 0) {
1323                 if (n == 0 && i <= last)
1324                         return (long)i;
1325                 last -= n;
1326                 for (; i <= last; ++i)
1327                         if (s[i] == sub[0] && memcmp(&s[i], sub, n) == 0)
1328                                 return (long)i;
1329         }
1330         else {
1331                 int j;
1332
1333                 if (n == 0 && i <= last)
1334                         return (long)last;
1335                 for (j = last-n; j >= i; --j)
1336                         if (s[j] == sub[0] && memcmp(&s[j], sub, n) == 0)
1337                                 return (long)j;
1338         }
1339
1340         return -1;
1341 }
1342
1343
1344 static char find__doc__[] =
1345 "S.find(sub [,start [,end]]) -> int\n\
1346 \n\
1347 Return the lowest index in S where substring sub is found,\n\
1348 such that sub is contained within s[start,end].  Optional\n\
1349 arguments start and end are interpreted as in slice notation.\n\
1350 \n\
1351 Return -1 on failure.";
1352
1353 static PyObject *
1354 string_find(PyStringObject *self, PyObject *args)
1355 {
1356         long result = string_find_internal(self, args, +1);
1357         if (result == -2)
1358                 return NULL;
1359         return PyInt_FromLong(result);
1360 }
1361
1362
1363 static char index__doc__[] =
1364 "S.index(sub [,start [,end]]) -> int\n\
1365 \n\
1366 Like S.find() but raise ValueError when the substring is not found.";
1367
1368 static PyObject *
1369 string_index(PyStringObject *self, PyObject *args)
1370 {
1371         long result = string_find_internal(self, args, +1);
1372         if (result == -2)
1373                 return NULL;
1374         if (result == -1) {
1375                 PyErr_SetString(PyExc_ValueError,
1376                                 "substring not found in string.index");
1377                 return NULL;
1378         }
1379         return PyInt_FromLong(result);
1380 }
1381
1382
1383 static char rfind__doc__[] =
1384 "S.rfind(sub [,start [,end]]) -> int\n\
1385 \n\
1386 Return the highest index in S where substring sub is found,\n\
1387 such that sub is contained within s[start,end].  Optional\n\
1388 arguments start and end are interpreted as in slice notation.\n\
1389 \n\
1390 Return -1 on failure.";
1391
1392 static PyObject *
1393 string_rfind(PyStringObject *self, PyObject *args)
1394 {
1395         long result = string_find_internal(self, args, -1);
1396         if (result == -2)
1397                 return NULL;
1398         return PyInt_FromLong(result);
1399 }
1400
1401
1402 static char rindex__doc__[] =
1403 "S.rindex(sub [,start [,end]]) -> int\n\
1404 \n\
1405 Like S.rfind() but raise ValueError when the substring is not found.";
1406
1407 static PyObject *
1408 string_rindex(PyStringObject *self, PyObject *args)
1409 {
1410         long result = string_find_internal(self, args, -1);
1411         if (result == -2)
1412                 return NULL;
1413         if (result == -1) {
1414                 PyErr_SetString(PyExc_ValueError,
1415                                 "substring not found in string.rindex");
1416                 return NULL;
1417         }
1418         return PyInt_FromLong(result);
1419 }
1420
1421
1422 static PyObject *
1423 do_xstrip(PyStringObject *self, int striptype, PyObject *sepobj)
1424 {
1425         char *s = PyString_AS_STRING(self);
1426         int len = PyString_GET_SIZE(self);
1427         char *sep = PyString_AS_STRING(sepobj);
1428         int seplen = PyString_GET_SIZE(sepobj);
1429         int i, j;
1430
1431         i = 0;
1432         if (striptype != RIGHTSTRIP) {
1433                 while (i < len && memchr(sep, Py_CHARMASK(s[i]), seplen)) {
1434                         i++;
1435                 }
1436         }
1437
1438         j = len;
1439         if (striptype != LEFTSTRIP) {
1440                 do {
1441                         j--;
1442                 } while (j >= i && memchr(sep, Py_CHARMASK(s[j]), seplen));
1443                 j++;
1444         }
1445
1446         if (i == 0 && j == len && PyString_CheckExact(self)) {
1447                 Py_INCREF(self);
1448                 return (PyObject*)self;
1449         }
1450         else
1451                 return PyString_FromStringAndSize(s+i, j-i);
1452 }
1453
1454
1455 static PyObject *
1456 do_strip(PyStringObject *self, int striptype)
1457 {
1458         char *s = PyString_AS_STRING(self);
1459         int len = PyString_GET_SIZE(self), i, j;
1460
1461         i = 0;
1462         if (striptype != RIGHTSTRIP) {
1463                 while (i < len && isspace(Py_CHARMASK(s[i]))) {
1464                         i++;
1465                 }
1466         }
1467
1468         j = len;
1469         if (striptype != LEFTSTRIP) {
1470                 do {
1471                         j--;
1472                 } while (j >= i && isspace(Py_CHARMASK(s[j])));
1473                 j++;
1474         }
1475
1476         if (i == 0 && j == len && PyString_CheckExact(self)) {
1477                 Py_INCREF(self);
1478                 return (PyObject*)self;
1479         }
1480         else
1481                 return PyString_FromStringAndSize(s+i, j-i);
1482 }
1483
1484
1485 static PyObject *
1486 do_argstrip(PyStringObject *self, int striptype, PyObject *args)
1487 {
1488         PyObject *sep = NULL;
1489
1490         if (!PyArg_ParseTuple(args, (char *)stripformat[striptype], &sep))
1491                 return NULL;
1492
1493         if (sep != NULL && sep != Py_None) {
1494                 if (PyString_Check(sep))
1495                         return do_xstrip(self, striptype, sep);
1496 #ifdef Py_USING_UNICODE
1497                 else if (PyUnicode_Check(sep)) {
1498                         PyObject *uniself = PyUnicode_FromObject((PyObject *)self);
1499                         PyObject *res;
1500                         if (uniself==NULL)
1501                                 return NULL;
1502                         res = _PyUnicode_XStrip((PyUnicodeObject *)uniself,
1503                                 striptype, sep);
1504                         Py_DECREF(uniself);
1505                         return res;
1506                 }
1507 #endif
1508                 else {
1509                         PyErr_Format(PyExc_TypeError,
1510 #ifdef Py_USING_UNICODE
1511                                      "%s arg must be None, str or unicode",
1512 #else
1513                                      "%s arg must be None or str",
1514 #endif
1515                                      STRIPNAME(striptype));
1516                         return NULL;
1517                 }
1518                 return do_xstrip(self, striptype, sep);
1519         }
1520
1521         return do_strip(self, striptype);
1522 }
1523
1524
1525 static char strip__doc__[] =
1526 "S.strip([sep]) -> string or unicode\n\
1527 \n\
1528 Return a copy of the string S with leading and trailing\n\
1529 whitespace removed.\n\
1530 If sep is given and not None, remove characters in sep instead.\n\
1531 If sep is unicode, S will be converted to unicode before stripping";
1532
1533 static PyObject *
1534 string_strip(PyStringObject *self, PyObject *args)
1535 {
1536         if (PyTuple_GET_SIZE(args) == 0)
1537                 return do_strip(self, BOTHSTRIP); /* Common case */
1538         else
1539                 return do_argstrip(self, BOTHSTRIP, args);
1540 }
1541
1542
1543 static char lstrip__doc__[] =
1544 "S.lstrip([sep]) -> string or unicode\n\
1545 \n\
1546 Return a copy of the string S with leading whitespace removed.\n\
1547 If sep is given and not None, remove characters in sep instead.\n\
1548 If sep is unicode, S will be converted to unicode before stripping";
1549
1550 static PyObject *
1551 string_lstrip(PyStringObject *self, PyObject *args)
1552 {
1553         if (PyTuple_GET_SIZE(args) == 0)
1554                 return do_strip(self, LEFTSTRIP); /* Common case */
1555         else
1556                 return do_argstrip(self, LEFTSTRIP, args);
1557 }
1558
1559
1560 static char rstrip__doc__[] =
1561 "S.rstrip([sep]) -> string or unicode\n\
1562 \n\
1563 Return a copy of the string S with trailing whitespace removed.\n\
1564 If sep is given and not None, remove characters in sep instead.\n\
1565 If sep is unicode, S will be converted to unicode before stripping";
1566
1567 static PyObject *
1568 string_rstrip(PyStringObject *self, PyObject *args)
1569 {
1570         if (PyTuple_GET_SIZE(args) == 0)
1571                 return do_strip(self, RIGHTSTRIP); /* Common case */
1572         else
1573                 return do_argstrip(self, RIGHTSTRIP, args);
1574 }
1575
1576
1577 static char lower__doc__[] =
1578 "S.lower() -> string\n\
1579 \n\
1580 Return a copy of the string S converted to lowercase.";
1581
1582 static PyObject *
1583 string_lower(PyStringObject *self)
1584 {
1585         char *s = PyString_AS_STRING(self), *s_new;
1586         int i, n = PyString_GET_SIZE(self);
1587         PyObject *new;
1588
1589         new = PyString_FromStringAndSize(NULL, n);
1590         if (new == NULL)
1591                 return NULL;
1592         s_new = PyString_AsString(new);
1593         for (i = 0; i < n; i++) {
1594                 int c = Py_CHARMASK(*s++);
1595                 if (isupper(c)) {
1596                         *s_new = tolower(c);
1597                 } else
1598                         *s_new = c;
1599                 s_new++;
1600         }
1601         return new;
1602 }
1603
1604
1605 static char upper__doc__[] =
1606 "S.upper() -> string\n\
1607 \n\
1608 Return a copy of the string S converted to uppercase.";
1609
1610 static PyObject *
1611 string_upper(PyStringObject *self)
1612 {
1613         char *s = PyString_AS_STRING(self), *s_new;
1614         int i, n = PyString_GET_SIZE(self);
1615         PyObject *new;
1616
1617         new = PyString_FromStringAndSize(NULL, n);
1618         if (new == NULL)
1619                 return NULL;
1620         s_new = PyString_AsString(new);
1621         for (i = 0; i < n; i++) {
1622                 int c = Py_CHARMASK(*s++);
1623                 if (islower(c)) {
1624                         *s_new = toupper(c);
1625                 } else
1626                         *s_new = c;
1627                 s_new++;
1628         }
1629         return new;
1630 }
1631
1632
1633 static char title__doc__[] =
1634 "S.title() -> string\n\
1635 \n\
1636 Return a titlecased version of S, i.e. words start with uppercase\n\
1637 characters, all remaining cased characters have lowercase.";
1638
1639 static PyObject*
1640 string_title(PyStringObject *self)
1641 {
1642         char *s = PyString_AS_STRING(self), *s_new;
1643         int i, n = PyString_GET_SIZE(self);
1644         int previous_is_cased = 0;
1645         PyObject *new;
1646
1647         new = PyString_FromStringAndSize(NULL, n);
1648         if (new == NULL)
1649                 return NULL;
1650         s_new = PyString_AsString(new);
1651         for (i = 0; i < n; i++) {
1652                 int c = Py_CHARMASK(*s++);
1653                 if (islower(c)) {
1654                         if (!previous_is_cased)
1655                             c = toupper(c);
1656                         previous_is_cased = 1;
1657                 } else if (isupper(c)) {
1658                         if (previous_is_cased)
1659                             c = tolower(c);
1660                         previous_is_cased = 1;
1661                 } else
1662                         previous_is_cased = 0;
1663                 *s_new++ = c;
1664         }
1665         return new;
1666 }
1667
1668 static char capitalize__doc__[] =
1669 "S.capitalize() -> string\n\
1670 \n\
1671 Return a copy of the string S with only its first character\n\
1672 capitalized.";
1673
1674 static PyObject *
1675 string_capitalize(PyStringObject *self)
1676 {
1677         char *s = PyString_AS_STRING(self), *s_new;
1678         int i, n = PyString_GET_SIZE(self);
1679         PyObject *new;
1680
1681         new = PyString_FromStringAndSize(NULL, n);
1682         if (new == NULL)
1683                 return NULL;
1684         s_new = PyString_AsString(new);
1685         if (0 < n) {
1686                 int c = Py_CHARMASK(*s++);
1687                 if (islower(c))
1688                         *s_new = toupper(c);
1689                 else
1690                         *s_new = c;
1691                 s_new++;
1692         }
1693         for (i = 1; i < n; i++) {
1694                 int c = Py_CHARMASK(*s++);
1695                 if (isupper(c))
1696                         *s_new = tolower(c);
1697                 else
1698                         *s_new = c;
1699                 s_new++;
1700         }
1701         return new;
1702 }
1703
1704
1705 static char count__doc__[] =
1706 "S.count(sub[, start[, end]]) -> int\n\
1707 \n\
1708 Return the number of occurrences of substring sub in string\n\
1709 S[start:end].  Optional arguments start and end are\n\
1710 interpreted as in slice notation.";
1711
1712 static PyObject *
1713 string_count(PyStringObject *self, PyObject *args)
1714 {
1715         const char *s = PyString_AS_STRING(self), *sub;
1716         int len = PyString_GET_SIZE(self), n;
1717         int i = 0, last = INT_MAX;
1718         int m, r;
1719         PyObject *subobj;
1720
1721         if (!PyArg_ParseTuple(args, "O|O&O&:count", &subobj,
1722                 _PyEval_SliceIndex, &i, _PyEval_SliceIndex, &last))
1723                 return NULL;
1724
1725         if (PyString_Check(subobj)) {
1726                 sub = PyString_AS_STRING(subobj);
1727                 n = PyString_GET_SIZE(subobj);
1728         }
1729 #ifdef Py_USING_UNICODE
1730         else if (PyUnicode_Check(subobj)) {
1731                 int count;
1732                 count = PyUnicode_Count((PyObject *)self, subobj, i, last);
1733                 if (count == -1)
1734                         return NULL;
1735                 else
1736                         return PyInt_FromLong((long) count);
1737         }
1738 #endif
1739         else if (PyObject_AsCharBuffer(subobj, &sub, &n))
1740                 return NULL;
1741
1742         if (last > len)
1743                 last = len;
1744         if (last < 0)
1745                 last += len;
1746         if (last < 0)
1747                 last = 0;
1748         if (i < 0)
1749                 i += len;
1750         if (i < 0)
1751                 i = 0;
1752         m = last + 1 - n;
1753         if (n == 0)
1754                 return PyInt_FromLong((long) (m-i));
1755
1756         r = 0;
1757         while (i < m) {
1758                 if (!memcmp(s+i, sub, n)) {
1759                         r++;
1760                         i += n;
1761                 } else {
1762                         i++;
1763                 }
1764         }
1765         return PyInt_FromLong((long) r);
1766 }
1767
1768
1769 static char swapcase__doc__[] =
1770 "S.swapcase() -> string\n\
1771 \n\
1772 Return a copy of the string S with uppercase characters\n\
1773 converted to lowercase and vice versa.";
1774
1775 static PyObject *
1776 string_swapcase(PyStringObject *self)
1777 {
1778         char *s = PyString_AS_STRING(self), *s_new;
1779         int i, n = PyString_GET_SIZE(self);
1780         PyObject *new;
1781
1782         new = PyString_FromStringAndSize(NULL, n);
1783         if (new == NULL)
1784                 return NULL;
1785         s_new = PyString_AsString(new);
1786         for (i = 0; i < n; i++) {
1787                 int c = Py_CHARMASK(*s++);
1788                 if (islower(c)) {
1789                         *s_new = toupper(c);
1790                 }
1791                 else if (isupper(c)) {
1792                         *s_new = tolower(c);
1793                 }
1794                 else
1795                         *s_new = c;
1796                 s_new++;
1797         }
1798         return new;
1799 }
1800
1801
1802 static char translate__doc__[] =
1803 "S.translate(table [,deletechars]) -> string\n\
1804 \n\
1805 Return a copy of the string S, where all characters occurring\n\
1806 in the optional argument deletechars are removed, and the\n\
1807 remaining characters have been mapped through the given\n\
1808 translation table, which must be a string of length 256.";
1809
1810 static PyObject *
1811 string_translate(PyStringObject *self, PyObject *args)
1812 {
1813         register char *input, *output;
1814         register const char *table;
1815         register int i, c, changed = 0;
1816         PyObject *input_obj = (PyObject*)self;
1817         const char *table1, *output_start, *del_table=NULL;
1818         int inlen, tablen, dellen = 0;
1819         PyObject *result;
1820         int trans_table[256];
1821         PyObject *tableobj, *delobj = NULL;
1822
1823         if (!PyArg_ParseTuple(args, "O|O:translate",
1824                               &tableobj, &delobj))
1825                 return NULL;
1826
1827         if (PyString_Check(tableobj)) {
1828                 table1 = PyString_AS_STRING(tableobj);
1829                 tablen = PyString_GET_SIZE(tableobj);
1830         }
1831 #ifdef Py_USING_UNICODE
1832         else if (PyUnicode_Check(tableobj)) {
1833                 /* Unicode .translate() does not support the deletechars
1834                    parameter; instead a mapping to None will cause characters
1835                    to be deleted. */
1836                 if (delobj != NULL) {
1837                         PyErr_SetString(PyExc_TypeError,
1838                         "deletions are implemented differently for unicode");
1839                         return NULL;
1840                 }
1841                 return PyUnicode_Translate((PyObject *)self, tableobj, NULL);
1842         }
1843 #endif
1844         else if (PyObject_AsCharBuffer(tableobj, &table1, &tablen))
1845                 return NULL;
1846
1847         if (delobj != NULL) {
1848                 if (PyString_Check(delobj)) {
1849                         del_table = PyString_AS_STRING(delobj);
1850                         dellen = PyString_GET_SIZE(delobj);
1851                 }
1852 #ifdef Py_USING_UNICODE
1853                 else if (PyUnicode_Check(delobj)) {
1854                         PyErr_SetString(PyExc_TypeError,
1855                         "deletions are implemented differently for unicode");
1856                         return NULL;
1857                 }
1858 #endif
1859                 else if (PyObject_AsCharBuffer(delobj, &del_table, &dellen))
1860                         return NULL;
1861
1862                 if (tablen != 256) {
1863                         PyErr_SetString(PyExc_ValueError,
1864                           "translation table must be 256 characters long");
1865                         return NULL;
1866                 }
1867         }
1868         else {
1869                 del_table = NULL;
1870                 dellen = 0;
1871         }
1872
1873         table = table1;
1874         inlen = PyString_Size(input_obj);
1875         result = PyString_FromStringAndSize((char *)NULL, inlen);
1876         if (result == NULL)
1877                 return NULL;
1878         output_start = output = PyString_AsString(result);
1879         input = PyString_AsString(input_obj);
1880
1881         if (dellen == 0) {
1882                 /* If no deletions are required, use faster code */
1883                 for (i = inlen; --i >= 0; ) {
1884                         c = Py_CHARMASK(*input++);
1885                         if (Py_CHARMASK((*output++ = table[c])) != c)
1886                                 changed = 1;
1887                 }
1888                 if (changed || !PyString_CheckExact(input_obj))
1889                         return result;
1890                 Py_DECREF(result);
1891                 Py_INCREF(input_obj);
1892                 return input_obj;
1893         }
1894
1895         for (i = 0; i < 256; i++)
1896                 trans_table[i] = Py_CHARMASK(table[i]);
1897
1898         for (i = 0; i < dellen; i++)
1899                 trans_table[(int) Py_CHARMASK(del_table[i])] = -1;
1900
1901         for (i = inlen; --i >= 0; ) {
1902                 c = Py_CHARMASK(*input++);
1903                 if (trans_table[c] != -1)
1904                         if (Py_CHARMASK(*output++ = (char)trans_table[c]) == c)
1905                                 continue;
1906                 changed = 1;
1907         }
1908         if (!changed && PyString_CheckExact(input_obj)) {
1909                 Py_DECREF(result);
1910                 Py_INCREF(input_obj);
1911                 return input_obj;
1912         }
1913         /* Fix the size of the resulting string */
1914         if (inlen > 0)
1915                 _PyString_Resize(&result, output - output_start);
1916         return result;
1917 }
1918
1919
1920 /* What follows is used for implementing replace().  Perry Stoll. */
1921
1922 /*
1923   mymemfind
1924
1925   strstr replacement for arbitrary blocks of memory.
1926
1927   Locates the first occurrence in the memory pointed to by MEM of the
1928   contents of memory pointed to by PAT.  Returns the index into MEM if
1929   found, or -1 if not found.  If len of PAT is greater than length of
1930   MEM, the function returns -1.
1931 */
1932 static int
1933 mymemfind(const char *mem, int len, const char *pat, int pat_len)
1934 {
1935         register int ii;
1936
1937         /* pattern can not occur in the last pat_len-1 chars */
1938         len -= pat_len;
1939
1940         for (ii = 0; ii <= len; ii++) {
1941                 if (mem[ii] == pat[0] && memcmp(&mem[ii], pat, pat_len) == 0) {
1942                         return ii;
1943                 }
1944         }
1945         return -1;
1946 }
1947
1948 /*
1949   mymemcnt
1950
1951    Return the number of distinct times PAT is found in MEM.
1952    meaning mem=1111 and pat==11 returns 2.
1953            mem=11111 and pat==11 also return 2.
1954  */
1955 static int
1956 mymemcnt(const char *mem, int len, const char *pat, int pat_len)
1957 {
1958         register int offset = 0;
1959         int nfound = 0;
1960
1961         while (len >= 0) {
1962                 offset = mymemfind(mem, len, pat, pat_len);
1963                 if (offset == -1)
1964                         break;
1965                 mem += offset + pat_len;
1966                 len -= offset + pat_len;
1967                 nfound++;
1968         }
1969         return nfound;
1970 }
1971
1972 /*
1973    mymemreplace
1974
1975    Return a string in which all occurrences of PAT in memory STR are
1976    replaced with SUB.
1977
1978    If length of PAT is less than length of STR or there are no occurrences
1979    of PAT in STR, then the original string is returned. Otherwise, a new
1980    string is allocated here and returned.
1981
1982    on return, out_len is:
1983        the length of output string, or
1984        -1 if the input string is returned, or
1985        unchanged if an error occurs (no memory).
1986
1987    return value is:
1988        the new string allocated locally, or
1989        NULL if an error occurred.
1990 */
1991 static char *
1992 mymemreplace(const char *str, int len,          /* input string */
1993              const char *pat, int pat_len,      /* pattern string to find */
1994              const char *sub, int sub_len,      /* substitution string */
1995              int count,                         /* number of replacements */
1996              int *out_len)
1997 {
1998         char *out_s;
1999         char *new_s;
2000         int nfound, offset, new_len;
2001
2002         if (len == 0 || pat_len > len)
2003                 goto return_same;
2004
2005         /* find length of output string */
2006         nfound = mymemcnt(str, len, pat, pat_len);
2007         if (count < 0)
2008                 count = INT_MAX;
2009         else if (nfound > count)
2010                 nfound = count;
2011         if (nfound == 0)
2012                 goto return_same;
2013
2014         new_len = len + nfound*(sub_len - pat_len);
2015         if (new_len == 0) {
2016                 /* Have to allocate something for the caller to free(). */
2017                 out_s = (char *)PyMem_MALLOC(1);
2018                 if (out_s == NULL)
2019                         return NULL;
2020                 out_s[0] = '\0';
2021         }
2022         else {
2023                 assert(new_len > 0);
2024                 new_s = (char *)PyMem_MALLOC(new_len);
2025                 if (new_s == NULL)
2026                         return NULL;
2027                 out_s = new_s;
2028
2029                 for (; count > 0 && len > 0; --count) {
2030                         /* find index of next instance of pattern */
2031                         offset = mymemfind(str, len, pat, pat_len);
2032                         if (offset == -1)
2033                                 break;
2034
2035                         /* copy non matching part of input string */
2036                         memcpy(new_s, str, offset);
2037                         str += offset + pat_len;
2038                         len -= offset + pat_len;
2039
2040                         /* copy substitute into the output string */
2041                         new_s += offset;
2042                         memcpy(new_s, sub, sub_len);
2043                         new_s += sub_len;
2044                 }
2045                 /* copy any remaining values into output string */
2046                 if (len > 0)
2047                         memcpy(new_s, str, len);
2048         }
2049         *out_len = new_len;
2050         return out_s;
2051
2052   return_same:
2053         *out_len = -1;
2054         return (char *)str; /* cast away const */
2055 }
2056
2057
2058 static char replace__doc__[] =
2059 "S.replace (old, new[, maxsplit]) -> string\n\
2060 \n\
2061 Return a copy of string S with all occurrences of substring\n\
2062 old replaced by new.  If the optional argument maxsplit is\n\
2063 given, only the first maxsplit occurrences are replaced.";
2064
2065 static PyObject *
2066 string_replace(PyStringObject *self, PyObject *args)
2067 {
2068         const char *str = PyString_AS_STRING(self), *sub, *repl;
2069         char *new_s;
2070         const int len = PyString_GET_SIZE(self);
2071         int sub_len, repl_len, out_len;
2072         int count = -1;
2073         PyObject *new;
2074         PyObject *subobj, *replobj;
2075
2076         if (!PyArg_ParseTuple(args, "OO|i:replace",
2077                               &subobj, &replobj, &count))
2078                 return NULL;
2079
2080         if (PyString_Check(subobj)) {
2081                 sub = PyString_AS_STRING(subobj);
2082                 sub_len = PyString_GET_SIZE(subobj);
2083         }
2084 #ifdef Py_USING_UNICODE
2085         else if (PyUnicode_Check(subobj))
2086                 return PyUnicode_Replace((PyObject *)self,
2087                                          subobj, replobj, count);
2088 #endif
2089         else if (PyObject_AsCharBuffer(subobj, &sub, &sub_len))
2090                 return NULL;
2091
2092         if (PyString_Check(replobj)) {
2093                 repl = PyString_AS_STRING(replobj);
2094                 repl_len = PyString_GET_SIZE(replobj);
2095         }
2096 #ifdef Py_USING_UNICODE
2097         else if (PyUnicode_Check(replobj))
2098                 return PyUnicode_Replace((PyObject *)self,
2099                                          subobj, replobj, count);
2100 #endif
2101         else if (PyObject_AsCharBuffer(replobj, &repl, &repl_len))
2102                 return NULL;
2103
2104         if (sub_len <= 0) {
2105                 PyErr_SetString(PyExc_ValueError, "empty pattern string");
2106                 return NULL;
2107         }
2108         new_s = mymemreplace(str,len,sub,sub_len,repl,repl_len,count,&out_len);
2109         if (new_s == NULL) {
2110                 PyErr_NoMemory();
2111                 return NULL;
2112         }
2113         if (out_len == -1) {
2114                 if (PyString_CheckExact(self)) {
2115                         /* we're returning another reference to self */
2116                         new = (PyObject*)self;
2117                         Py_INCREF(new);
2118                 }
2119                 else {
2120                         new = PyString_FromStringAndSize(str, len);
2121                         if (new == NULL)
2122                                 return NULL;
2123                 }
2124         }
2125         else {
2126                 new = PyString_FromStringAndSize(new_s, out_len);
2127                 PyMem_FREE(new_s);
2128         }
2129         return new;
2130 }
2131
2132
2133 static char startswith__doc__[] =
2134 "S.startswith(prefix[, start[, end]]) -> int\n\
2135 \n\
2136 Return 1 if S starts with the specified prefix, otherwise return 0.  With\n\
2137 optional start, test S beginning at that position.  With optional end, stop\n\
2138 comparing S at that position.";
2139
2140 static PyObject *
2141 string_startswith(PyStringObject *self, PyObject *args)
2142 {
2143         const char* str = PyString_AS_STRING(self);
2144         int len = PyString_GET_SIZE(self);
2145         const char* prefix;
2146         int plen;
2147         int start = 0;
2148         int end = INT_MAX;
2149         PyObject *subobj;
2150
2151         if (!PyArg_ParseTuple(args, "O|O&O&:startswith", &subobj,
2152                 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
2153                 return NULL;
2154         if (PyString_Check(subobj)) {
2155                 prefix = PyString_AS_STRING(subobj);
2156                 plen = PyString_GET_SIZE(subobj);
2157         }
2158 #ifdef Py_USING_UNICODE
2159         else if (PyUnicode_Check(subobj)) {
2160                 int rc;
2161                 rc = PyUnicode_Tailmatch((PyObject *)self,
2162                                           subobj, start, end, -1);
2163                 if (rc == -1)
2164                         return NULL;
2165                 else
2166                         return PyInt_FromLong((long) rc);
2167         }
2168 #endif
2169         else if (PyObject_AsCharBuffer(subobj, &prefix, &plen))
2170                 return NULL;
2171
2172         /* adopt Java semantics for index out of range.  it is legal for
2173          * offset to be == plen, but this only returns true if prefix is
2174          * the empty string.
2175          */
2176         if (start < 0 || start+plen > len)
2177                 return PyInt_FromLong(0);
2178
2179         if (!memcmp(str+start, prefix, plen)) {
2180                 /* did the match end after the specified end? */
2181                 if (end < 0)
2182                         return PyInt_FromLong(1);
2183                 else if (end - start < plen)
2184                         return PyInt_FromLong(0);
2185                 else
2186                         return PyInt_FromLong(1);
2187         }
2188         else return PyInt_FromLong(0);
2189 }
2190
2191
2192 static char endswith__doc__[] =
2193 "S.endswith(suffix[, start[, end]]) -> int\n\
2194 \n\
2195 Return 1 if S ends with the specified suffix, otherwise return 0.  With\n\
2196 optional start, test S beginning at that position.  With optional end, stop\n\
2197 comparing S at that position.";
2198
2199 static PyObject *
2200 string_endswith(PyStringObject *self, PyObject *args)
2201 {
2202         const char* str = PyString_AS_STRING(self);
2203         int len = PyString_GET_SIZE(self);
2204         const char* suffix;
2205         int slen;
2206         int start = 0;
2207         int end = INT_MAX;
2208         int lower, upper;
2209         PyObject *subobj;
2210
2211         if (!PyArg_ParseTuple(args, "O|O&O&:endswith", &subobj,
2212                 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
2213                 return NULL;
2214         if (PyString_Check(subobj)) {
2215                 suffix = PyString_AS_STRING(subobj);
2216                 slen = PyString_GET_SIZE(subobj);
2217         }
2218 #ifdef Py_USING_UNICODE
2219         else if (PyUnicode_Check(subobj)) {
2220                 int rc;
2221                 rc = PyUnicode_Tailmatch((PyObject *)self,
2222                                           subobj, start, end, +1);
2223                 if (rc == -1)
2224                         return NULL;
2225                 else
2226                         return PyInt_FromLong((long) rc);
2227         }
2228 #endif
2229         else if (PyObject_AsCharBuffer(subobj, &suffix, &slen))
2230                 return NULL;
2231
2232         if (start < 0 || start > len || slen > len)
2233                 return PyInt_FromLong(0);
2234
2235         upper = (end >= 0 && end <= len) ? end : len;
2236         lower = (upper - slen) > start ? (upper - slen) : start;
2237
2238         if (upper-lower >= slen && !memcmp(str+lower, suffix, slen))
2239                 return PyInt_FromLong(1);
2240         else return PyInt_FromLong(0);
2241 }
2242
2243
2244 static char encode__doc__[] =
2245 "S.encode([encoding[,errors]]) -> object\n\
2246 \n\
2247 Encodes S using the codec registered for encoding. encoding defaults\n\
2248 to the default encoding. errors may be given to set a different error\n\
2249 handling scheme. Default is 'strict' meaning that encoding errors raise\n\
2250 a ValueError. Other possible values are 'ignore' and 'replace'.";
2251
2252 static PyObject *
2253 string_encode(PyStringObject *self, PyObject *args)
2254 {
2255     char *encoding = NULL;
2256     char *errors = NULL;
2257     if (!PyArg_ParseTuple(args, "|ss:encode", &encoding, &errors))
2258         return NULL;
2259     return PyString_AsEncodedObject((PyObject *)self, encoding, errors);
2260 }
2261
2262
2263 static char decode__doc__[] =
2264 "S.decode([encoding[,errors]]) -> object\n\
2265 \n\
2266 Decodes S using the codec registered for encoding. encoding defaults\n\
2267 to the default encoding. errors may be given to set a different error\n\
2268 handling scheme. Default is 'strict' meaning that encoding errors raise\n\
2269 a ValueError. Other possible values are 'ignore' and 'replace'.";
2270
2271 static PyObject *
2272 string_decode(PyStringObject *self, PyObject *args)
2273 {
2274     char *encoding = NULL;
2275     char *errors = NULL;
2276     if (!PyArg_ParseTuple(args, "|ss:decode", &encoding, &errors))
2277         return NULL;
2278     return PyString_AsDecodedObject((PyObject *)self, encoding, errors);
2279 }
2280
2281
2282 static char expandtabs__doc__[] =
2283 "S.expandtabs([tabsize]) -> string\n\
2284 \n\
2285 Return a copy of S where all tab characters are expanded using spaces.\n\
2286 If tabsize is not given, a tab size of 8 characters is assumed.";
2287
2288 static PyObject*
2289 string_expandtabs(PyStringObject *self, PyObject *args)
2290 {
2291     const char *e, *p;
2292     char *q;
2293     int i, j;
2294     PyObject *u;
2295     int tabsize = 8;
2296
2297     if (!PyArg_ParseTuple(args, "|i:expandtabs", &tabsize))
2298         return NULL;
2299
2300     /* First pass: determine size of output string */
2301     i = j = 0;
2302     e = PyString_AS_STRING(self) + PyString_GET_SIZE(self);
2303     for (p = PyString_AS_STRING(self); p < e; p++)
2304         if (*p == '\t') {
2305             if (tabsize > 0)
2306                 j += tabsize - (j % tabsize);
2307         }
2308         else {
2309             j++;
2310             if (*p == '\n' || *p == '\r') {
2311                 i += j;
2312                 j = 0;
2313             }
2314         }
2315
2316     /* Second pass: create output string and fill it */
2317     u = PyString_FromStringAndSize(NULL, i + j);
2318     if (!u)
2319         return NULL;
2320
2321     j = 0;
2322     q = PyString_AS_STRING(u);
2323
2324     for (p = PyString_AS_STRING(self); p < e; p++)
2325         if (*p == '\t') {
2326             if (tabsize > 0) {
2327                 i = tabsize - (j % tabsize);
2328                 j += i;
2329                 while (i--)
2330                     *q++ = ' ';
2331             }
2332         }
2333         else {
2334             j++;
2335             *q++ = *p;
2336             if (*p == '\n' || *p == '\r')
2337                 j = 0;
2338         }
2339
2340     return u;
2341 }
2342
2343 static PyObject *
2344 pad(PyStringObject *self, int left, int right, char fill)
2345 {
2346     PyObject *u;
2347
2348     if (left < 0)
2349         left = 0;
2350     if (right < 0)
2351         right = 0;
2352
2353     if (left == 0 && right == 0 && PyString_CheckExact(self)) {
2354         Py_INCREF(self);
2355         return (PyObject *)self;
2356     }
2357
2358     u = PyString_FromStringAndSize(NULL,
2359                                    left + PyString_GET_SIZE(self) + right);
2360     if (u) {
2361         if (left)
2362             memset(PyString_AS_STRING(u), fill, left);
2363         memcpy(PyString_AS_STRING(u) + left,
2364                PyString_AS_STRING(self),
2365                PyString_GET_SIZE(self));
2366         if (right)
2367             memset(PyString_AS_STRING(u) + left + PyString_GET_SIZE(self),
2368                    fill, right);
2369     }
2370
2371     return u;
2372 }
2373
2374 static char ljust__doc__[] =
2375 "S.ljust(width) -> string\n"
2376 "\n"
2377 "Return S left justified in a string of length width. Padding is\n"
2378 "done using spaces.";
2379
2380 static PyObject *
2381 string_ljust(PyStringObject *self, PyObject *args)
2382 {
2383     int width;
2384     if (!PyArg_ParseTuple(args, "i:ljust", &width))
2385         return NULL;
2386
2387     if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) {
2388         Py_INCREF(self);
2389         return (PyObject*) self;
2390     }
2391
2392     return pad(self, 0, width - PyString_GET_SIZE(self), ' ');
2393 }
2394
2395
2396 static char rjust__doc__[] =
2397 "S.rjust(width) -> string\n"
2398 "\n"
2399 "Return S right justified in a string of length width. Padding is\n"
2400 "done using spaces.";
2401
2402 static PyObject *
2403 string_rjust(PyStringObject *self, PyObject *args)
2404 {
2405     int width;
2406     if (!PyArg_ParseTuple(args, "i:rjust", &width))
2407         return NULL;
2408
2409     if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) {
2410         Py_INCREF(self);
2411         return (PyObject*) self;
2412     }
2413
2414     return pad(self, width - PyString_GET_SIZE(self), 0, ' ');
2415 }
2416
2417
2418 static char center__doc__[] =
2419 "S.center(width) -> string\n"
2420 "\n"
2421 "Return S centered in a string of length width. Padding is done\n"
2422 "using spaces.";
2423
2424 static PyObject *
2425 string_center(PyStringObject *self, PyObject *args)
2426 {
2427     int marg, left;
2428     int width;
2429
2430     if (!PyArg_ParseTuple(args, "i:center", &width))
2431         return NULL;
2432
2433     if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) {
2434         Py_INCREF(self);
2435         return (PyObject*) self;
2436     }
2437
2438     marg = width - PyString_GET_SIZE(self);
2439     left = marg / 2 + (marg & width & 1);
2440
2441     return pad(self, left, marg - left, ' ');
2442 }
2443
2444 static char zfill__doc__[] =
2445 "S.zfill(width) -> string\n"
2446 "\n"
2447 "Pad a numeric string S with zeros on the left, to fill a field\n"
2448 "of the specified width.  The string S is never truncated.";
2449
2450 static PyObject *
2451 string_zfill(PyStringObject *self, PyObject *args)
2452 {
2453     int fill;
2454     PyObject *s;
2455     char *p;
2456
2457     int width;
2458     if (!PyArg_ParseTuple(args, "i:zfill", &width))
2459         return NULL;
2460
2461     if (PyString_GET_SIZE(self) >= width) {
2462         if (PyString_CheckExact(self)) {
2463             Py_INCREF(self);
2464             return (PyObject*) self;
2465         }
2466         else
2467             return PyString_FromStringAndSize(
2468                 PyString_AS_STRING(self),
2469                 PyString_GET_SIZE(self)
2470             );
2471     }
2472
2473     fill = width - PyString_GET_SIZE(self);
2474
2475     s = pad(self, fill, 0, '0');
2476
2477     if (s == NULL)
2478         return NULL;
2479
2480     p = PyString_AS_STRING(s);
2481     if (p[fill] == '+' || p[fill] == '-') {
2482         /* move sign to beginning of string */
2483         p[0] = p[fill];
2484         p[fill] = '0';
2485     }
2486
2487     return (PyObject*) s;
2488 }
2489
2490 static char isspace__doc__[] =
2491 "S.isspace() -> int\n"
2492 "\n"
2493 "Return 1 if there are only whitespace characters in S,\n"
2494 "0 otherwise.";
2495
2496 static PyObject*
2497 string_isspace(PyStringObject *self)
2498 {
2499     register const unsigned char *p
2500         = (unsigned char *) PyString_AS_STRING(self);
2501     register const unsigned char *e;
2502
2503     /* Shortcut for single character strings */
2504     if (PyString_GET_SIZE(self) == 1 &&
2505         isspace(*p))
2506         return PyInt_FromLong(1);
2507
2508     /* Special case for empty strings */
2509     if (PyString_GET_SIZE(self) == 0)
2510         return PyInt_FromLong(0);
2511
2512     e = p + PyString_GET_SIZE(self);
2513     for (; p < e; p++) {
2514         if (!isspace(*p))
2515             return PyInt_FromLong(0);
2516     }
2517     return PyInt_FromLong(1);
2518 }
2519
2520
2521 static char isalpha__doc__[] =
2522 "S.isalpha() -> int\n\
2523 \n\
2524 Return 1 if  all characters in S are alphabetic\n\
2525 and there is at least one character in S, 0 otherwise.";
2526
2527 static PyObject*
2528 string_isalpha(PyStringObject *self)
2529 {
2530     register const unsigned char *p
2531         = (unsigned char *) PyString_AS_STRING(self);
2532     register const unsigned char *e;
2533
2534     /* Shortcut for single character strings */
2535     if (PyString_GET_SIZE(self) == 1 &&
2536         isalpha(*p))
2537         return PyInt_FromLong(1);
2538
2539     /* Special case for empty strings */
2540     if (PyString_GET_SIZE(self) == 0)
2541         return PyInt_FromLong(0);
2542
2543     e = p + PyString_GET_SIZE(self);
2544     for (; p < e; p++) {
2545         if (!isalpha(*p))
2546             return PyInt_FromLong(0);
2547     }
2548     return PyInt_FromLong(1);
2549 }
2550
2551
2552 static char isalnum__doc__[] =
2553 "S.isalnum() -> int\n\
2554 \n\
2555 Return 1 if  all characters in S are alphanumeric\n\
2556 and there is at least one character in S, 0 otherwise.";
2557
2558 static PyObject*
2559 string_isalnum(PyStringObject *self)
2560 {
2561     register const unsigned char *p
2562         = (unsigned char *) PyString_AS_STRING(self);
2563     register const unsigned char *e;
2564
2565     /* Shortcut for single character strings */
2566     if (PyString_GET_SIZE(self) == 1 &&
2567         isalnum(*p))
2568         return PyInt_FromLong(1);
2569
2570     /* Special case for empty strings */
2571     if (PyString_GET_SIZE(self) == 0)
2572         return PyInt_FromLong(0);
2573
2574     e = p + PyString_GET_SIZE(self);
2575     for (; p < e; p++) {
2576         if (!isalnum(*p))
2577             return PyInt_FromLong(0);
2578     }
2579     return PyInt_FromLong(1);
2580 }
2581
2582
2583 static char isdigit__doc__[] =
2584 "S.isdigit() -> int\n\
2585 \n\
2586 Return 1 if there are only digit characters in S,\n\
2587 0 otherwise.";
2588
2589 static PyObject*
2590 string_isdigit(PyStringObject *self)
2591 {
2592     register const unsigned char *p
2593         = (unsigned char *) PyString_AS_STRING(self);
2594     register const unsigned char *e;
2595
2596     /* Shortcut for single character strings */
2597     if (PyString_GET_SIZE(self) == 1 &&
2598         isdigit(*p))
2599         return PyInt_FromLong(1);
2600
2601     /* Special case for empty strings */
2602     if (PyString_GET_SIZE(self) == 0)
2603         return PyInt_FromLong(0);
2604
2605     e = p + PyString_GET_SIZE(self);
2606     for (; p < e; p++) {
2607         if (!isdigit(*p))
2608             return PyInt_FromLong(0);
2609     }
2610     return PyInt_FromLong(1);
2611 }
2612
2613
2614 static char islower__doc__[] =
2615 "S.islower() -> int\n\
2616 \n\
2617 Return 1 if  all cased characters in S are lowercase and there is\n\
2618 at least one cased character in S, 0 otherwise.";
2619
2620 static PyObject*
2621 string_islower(PyStringObject *self)
2622 {
2623     register const unsigned char *p
2624         = (unsigned char *) PyString_AS_STRING(self);
2625     register const unsigned char *e;
2626     int cased;
2627
2628     /* Shortcut for single character strings */
2629     if (PyString_GET_SIZE(self) == 1)
2630         return PyInt_FromLong(islower(*p) != 0);
2631
2632     /* Special case for empty strings */
2633     if (PyString_GET_SIZE(self) == 0)
2634         return PyInt_FromLong(0);
2635
2636     e = p + PyString_GET_SIZE(self);
2637     cased = 0;
2638     for (; p < e; p++) {
2639         if (isupper(*p))
2640             return PyInt_FromLong(0);
2641         else if (!cased && islower(*p))
2642             cased = 1;
2643     }
2644     return PyInt_FromLong(cased);
2645 }
2646
2647
2648 static char isupper__doc__[] =
2649 "S.isupper() -> int\n\
2650 \n\
2651 Return 1 if  all cased characters in S are uppercase and there is\n\
2652 at least one cased character in S, 0 otherwise.";
2653
2654 static PyObject*
2655 string_isupper(PyStringObject *self)
2656 {
2657     register const unsigned char *p
2658         = (unsigned char *) PyString_AS_STRING(self);
2659     register const unsigned char *e;
2660     int cased;
2661
2662     /* Shortcut for single character strings */
2663     if (PyString_GET_SIZE(self) == 1)
2664         return PyInt_FromLong(isupper(*p) != 0);
2665
2666     /* Special case for empty strings */
2667     if (PyString_GET_SIZE(self) == 0)
2668         return PyInt_FromLong(0);
2669
2670     e = p + PyString_GET_SIZE(self);
2671     cased = 0;
2672     for (; p < e; p++) {
2673         if (islower(*p))
2674             return PyInt_FromLong(0);
2675         else if (!cased && isupper(*p))
2676             cased = 1;
2677     }
2678     return PyInt_FromLong(cased);
2679 }
2680
2681
2682 static char istitle__doc__[] =
2683 "S.istitle() -> int\n\
2684 \n\
2685 Return 1 if S is a titlecased string, i.e. uppercase characters\n\
2686 may only follow uncased characters and lowercase characters only cased\n\
2687 ones. Return 0 otherwise.";
2688
2689 static PyObject*
2690 string_istitle(PyStringObject *self, PyObject *uncased)
2691 {
2692     register const unsigned char *p
2693         = (unsigned char *) PyString_AS_STRING(self);
2694     register const unsigned char *e;
2695     int cased, previous_is_cased;
2696
2697     /* Shortcut for single character strings */
2698     if (PyString_GET_SIZE(self) == 1)
2699         return PyInt_FromLong(isupper(*p) != 0);
2700
2701     /* Special case for empty strings */
2702     if (PyString_GET_SIZE(self) == 0)
2703         return PyInt_FromLong(0);
2704
2705     e = p + PyString_GET_SIZE(self);
2706     cased = 0;
2707     previous_is_cased = 0;
2708     for (; p < e; p++) {
2709         register const unsigned char ch = *p;
2710
2711         if (isupper(ch)) {
2712             if (previous_is_cased)
2713                 return PyInt_FromLong(0);
2714             previous_is_cased = 1;
2715             cased = 1;
2716         }
2717         else if (islower(ch)) {
2718             if (!previous_is_cased)
2719                 return PyInt_FromLong(0);
2720             previous_is_cased = 1;
2721             cased = 1;
2722         }
2723         else
2724             previous_is_cased = 0;
2725     }
2726     return PyInt_FromLong(cased);
2727 }
2728
2729
2730 static char splitlines__doc__[] =
2731 "S.splitlines([keepends]) -> list of strings\n\
2732 \n\
2733 Return a list of the lines in S, breaking at line boundaries.\n\
2734 Line breaks are not included in the resulting list unless keepends\n\
2735 is given and true.";
2736
2737 #define SPLIT_APPEND(data, left, right)                                 \
2738         str = PyString_FromStringAndSize(data + left, right - left);    \
2739         if (!str)                                                       \
2740             goto onError;                                               \
2741         if (PyList_Append(list, str)) {                                 \
2742             Py_DECREF(str);                                             \
2743             goto onError;                                               \
2744         }                                                               \
2745         else                                                            \
2746             Py_DECREF(str);
2747
2748 static PyObject*
2749 string_splitlines(PyStringObject *self, PyObject *args)
2750 {
2751     register int i;
2752     register int j;
2753     int len;
2754     int keepends = 0;
2755     PyObject *list;
2756     PyObject *str;
2757     char *data;
2758
2759     if (!PyArg_ParseTuple(args, "|i:splitlines", &keepends))
2760         return NULL;
2761
2762     data = PyString_AS_STRING(self);
2763     len = PyString_GET_SIZE(self);
2764
2765     list = PyList_New(0);
2766     if (!list)
2767         goto onError;
2768
2769     for (i = j = 0; i < len; ) {
2770         int eol;
2771
2772         /* Find a line and append it */
2773         while (i < len && data[i] != '\n' && data[i] != '\r')
2774             i++;
2775
2776         /* Skip the line break reading CRLF as one line break */
2777         eol = i;
2778         if (i < len) {
2779             if (data[i] == '\r' && i + 1 < len &&
2780                 data[i+1] == '\n')
2781                 i += 2;
2782             else
2783                 i++;
2784             if (keepends)
2785                 eol = i;
2786         }
2787         SPLIT_APPEND(data, j, eol);
2788         j = i;
2789     }
2790     if (j < len) {
2791         SPLIT_APPEND(data, j, len);
2792     }
2793
2794     return list;
2795
2796  onError:
2797     Py_DECREF(list);
2798     return NULL;
2799 }
2800
2801 #undef SPLIT_APPEND
2802
2803 \f
2804 static PyMethodDef
2805 string_methods[] = {
2806         /* Counterparts of the obsolete stropmodule functions; except
2807            string.maketrans(). */
2808         {"join",       (PyCFunction)string_join,   METH_O, join__doc__},
2809         {"split",       (PyCFunction)string_split, METH_VARARGS, split__doc__},
2810         {"lower",      (PyCFunction)string_lower,  METH_NOARGS, lower__doc__},
2811         {"upper",       (PyCFunction)string_upper, METH_NOARGS, upper__doc__},
2812         {"islower", (PyCFunction)string_islower, METH_NOARGS, islower__doc__},
2813         {"isupper", (PyCFunction)string_isupper, METH_NOARGS, isupper__doc__},
2814         {"isspace", (PyCFunction)string_isspace, METH_NOARGS, isspace__doc__},
2815         {"isdigit", (PyCFunction)string_isdigit, METH_NOARGS, isdigit__doc__},
2816         {"istitle", (PyCFunction)string_istitle, METH_NOARGS, istitle__doc__},
2817         {"isalpha", (PyCFunction)string_isalpha, METH_NOARGS, isalpha__doc__},
2818         {"isalnum", (PyCFunction)string_isalnum, METH_NOARGS, isalnum__doc__},
2819         {"capitalize", (PyCFunction)string_capitalize,  METH_NOARGS,
2820          capitalize__doc__},
2821         {"count",      (PyCFunction)string_count,       METH_VARARGS, count__doc__},
2822         {"endswith",   (PyCFunction)string_endswith,    METH_VARARGS,
2823          endswith__doc__},
2824         {"find",       (PyCFunction)string_find,        METH_VARARGS, find__doc__},
2825         {"index",      (PyCFunction)string_index,       METH_VARARGS, index__doc__},
2826         {"lstrip",     (PyCFunction)string_lstrip,      METH_VARARGS, lstrip__doc__},
2827         {"replace",     (PyCFunction)string_replace,    METH_VARARGS, replace__doc__},
2828         {"rfind",       (PyCFunction)string_rfind,      METH_VARARGS, rfind__doc__},
2829         {"rindex",      (PyCFunction)string_rindex,     METH_VARARGS, rindex__doc__},
2830         {"rstrip",      (PyCFunction)string_rstrip,     METH_VARARGS, rstrip__doc__},
2831         {"startswith",  (PyCFunction)string_startswith, METH_VARARGS,
2832          startswith__doc__},
2833         {"strip",       (PyCFunction)string_strip,      METH_VARARGS, strip__doc__},
2834         {"swapcase",    (PyCFunction)string_swapcase,   METH_NOARGS,
2835          swapcase__doc__},
2836         {"translate",   (PyCFunction)string_translate,  METH_VARARGS,
2837          translate__doc__},
2838         {"title",       (PyCFunction)string_title,      METH_NOARGS, title__doc__},
2839         {"ljust",       (PyCFunction)string_ljust,      METH_VARARGS, ljust__doc__},
2840         {"rjust",       (PyCFunction)string_rjust,      METH_VARARGS, rjust__doc__},
2841         {"center",      (PyCFunction)string_center,     METH_VARARGS, center__doc__},
2842         {"zfill",       (PyCFunction)string_zfill,      METH_VARARGS, zfill__doc__},
2843         {"encode",      (PyCFunction)string_encode,     METH_VARARGS, encode__doc__},
2844         {"decode",      (PyCFunction)string_decode,     METH_VARARGS, decode__doc__},
2845         {"expandtabs",  (PyCFunction)string_expandtabs, METH_VARARGS,
2846          expandtabs__doc__},
2847         {"splitlines",  (PyCFunction)string_splitlines, METH_VARARGS,
2848          splitlines__doc__},
2849         {NULL,     NULL}                     /* sentinel */
2850 };
2851
2852 staticforward PyObject *
2853 str_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds);
2854
2855 static PyObject *
2856 string_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
2857 {
2858         PyObject *x = NULL;
2859         static char *kwlist[] = {"object", 0};
2860
2861         if (type != &PyString_Type)
2862                 return str_subtype_new(type, args, kwds);
2863         if (!PyArg_ParseTupleAndKeywords(args, kwds, "|O:str", kwlist, &x))
2864                 return NULL;
2865         if (x == NULL)
2866                 return PyString_FromString("");
2867         return PyObject_Str(x);
2868 }
2869
2870 static PyObject *
2871 str_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
2872 {
2873         PyObject *tmp, *pnew;
2874         int n;
2875
2876         assert(PyType_IsSubtype(type, &PyString_Type));
2877         tmp = string_new(&PyString_Type, args, kwds);
2878         if (tmp == NULL)
2879                 return NULL;
2880         assert(PyString_CheckExact(tmp));
2881         n = PyString_GET_SIZE(tmp);
2882         pnew = type->tp_alloc(type, n);
2883         if (pnew != NULL) {
2884                 memcpy(PyString_AS_STRING(pnew), PyString_AS_STRING(tmp), n+1);
2885 #ifdef CACHE_HASH
2886                 ((PyStringObject *)pnew)->ob_shash =
2887                         ((PyStringObject *)tmp)->ob_shash;
2888 #endif
2889 #ifdef INTERN_STRINGS
2890                 ((PyStringObject *)pnew)->ob_sinterned =
2891                         ((PyStringObject *)tmp)->ob_sinterned;
2892 #endif
2893         }
2894         Py_DECREF(tmp);
2895         return pnew;
2896 }
2897
2898 static char string_doc[] =
2899 "str(object) -> string\n\
2900 \n\
2901 Return a nice string representation of the object.\n\
2902 If the argument is a string, the return value is the same object.";
2903
2904 PyTypeObject PyString_Type = {
2905         PyObject_HEAD_INIT(&PyType_Type)
2906         0,
2907         "str",
2908         sizeof(PyStringObject),
2909         sizeof(char),
2910         (destructor)string_dealloc,             /* tp_dealloc */
2911         (printfunc)string_print,                /* tp_print */
2912         0,                                      /* tp_getattr */
2913         0,                                      /* tp_setattr */
2914         0,                                      /* tp_compare */
2915         (reprfunc)string_repr,                  /* tp_repr */
2916         0,                                      /* tp_as_number */
2917         &string_as_sequence,                    /* tp_as_sequence */
2918         0,                                      /* tp_as_mapping */
2919         (hashfunc)string_hash,                  /* tp_hash */
2920         0,                                      /* tp_call */
2921         (reprfunc)string_str,                   /* tp_str */
2922         PyObject_GenericGetAttr,                /* tp_getattro */
2923         0,                                      /* tp_setattro */
2924         &string_as_buffer,                      /* tp_as_buffer */
2925         Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */
2926         string_doc,                             /* tp_doc */
2927         0,                                      /* tp_traverse */
2928         0,                                      /* tp_clear */
2929         (richcmpfunc)string_richcompare,        /* tp_richcompare */
2930         0,                                      /* tp_weaklistoffset */
2931         0,                                      /* tp_iter */
2932         0,                                      /* tp_iternext */
2933         string_methods,                         /* tp_methods */
2934         0,                                      /* tp_members */
2935         0,                                      /* tp_getset */
2936         0,                                      /* tp_base */
2937         0,                                      /* tp_dict */
2938         0,                                      /* tp_descr_get */
2939         0,                                      /* tp_descr_set */
2940         0,                                      /* tp_dictoffset */
2941         0,                                      /* tp_init */
2942         0,                                      /* tp_alloc */
2943         string_new,                             /* tp_new */
2944         _PyObject_Del,                          /* tp_free */
2945 };
2946
2947 void
2948 PyString_Concat(register PyObject **pv, register PyObject *w)
2949 {
2950         register PyObject *v;
2951         if (*pv == NULL)
2952                 return;
2953         if (w == NULL || !PyString_Check(*pv)) {
2954                 Py_DECREF(*pv);
2955                 *pv = NULL;
2956                 return;
2957         }
2958         v = string_concat((PyStringObject *) *pv, w);
2959         Py_DECREF(*pv);
2960         *pv = v;
2961 }
2962
2963 void
2964 PyString_ConcatAndDel(register PyObject **pv, register PyObject *w)
2965 {
2966         PyString_Concat(pv, w);
2967         Py_XDECREF(w);
2968 }
2969
2970
2971 /* The following function breaks the notion that strings are immutable:
2972    it changes the size of a string.  We get away with this only if there
2973    is only one module referencing the object.  You can also think of it
2974    as creating a new string object and destroying the old one, only
2975    more efficiently.  In any case, don't use this if the string may
2976    already be known to some other part of the code...
2977    Note that if there's not enough memory to resize the string, the original
2978    string object at *pv is deallocated, *pv is set to NULL, an "out of
2979    memory" exception is set, and -1 is returned.  Else (on success) 0 is
2980    returned, and the value in *pv may or may not be the same as on input.
2981    As always, an extra byte is allocated for a trailing \0 byte (newsize
2982    does *not* include that), and a trailing \0 byte is stored.
2983 */
2984
2985 int
2986 _PyString_Resize(PyObject **pv, int newsize)
2987 {
2988         register PyObject *v;
2989         register PyStringObject *sv;
2990         v = *pv;
2991         if (!PyString_Check(v) || v->ob_refcnt != 1 || newsize < 0) {
2992                 *pv = 0;
2993                 Py_DECREF(v);
2994                 PyErr_BadInternalCall();
2995                 return -1;
2996         }
2997         /* XXX UNREF/NEWREF interface should be more symmetrical */
2998 #ifdef Py_REF_DEBUG
2999         --_Py_RefTotal;
3000 #endif
3001         _Py_ForgetReference(v);
3002         *pv = (PyObject *)
3003                 PyObject_REALLOC((char *)v,
3004                         sizeof(PyStringObject) + newsize * sizeof(char));
3005         if (*pv == NULL) {
3006                 PyObject_DEL(v);
3007                 PyErr_NoMemory();
3008                 return -1;
3009         }
3010         _Py_NewReference(*pv);
3011         sv = (PyStringObject *) *pv;
3012         sv->ob_size = newsize;
3013         sv->ob_sval[newsize] = '\0';
3014         return 0;
3015 }
3016
3017 /* Helpers for formatstring */
3018
3019 static PyObject *
3020 getnextarg(PyObject *args, int arglen, int *p_argidx)
3021 {
3022         int argidx = *p_argidx;
3023         if (argidx < arglen) {
3024                 (*p_argidx)++;
3025                 if (arglen < 0)
3026                         return args;
3027                 else
3028                         return PyTuple_GetItem(args, argidx);
3029         }
3030         PyErr_SetString(PyExc_TypeError,
3031                         "not enough arguments for format string");
3032         return NULL;
3033 }
3034
3035 /* Format codes
3036  * F_LJUST      '-'
3037  * F_SIGN       '+'
3038  * F_BLANK      ' '
3039  * F_ALT        '#'
3040  * F_ZERO       '0'
3041  */
3042 #define F_LJUST (1<<0)
3043 #define F_SIGN  (1<<1)
3044 #define F_BLANK (1<<2)
3045 #define F_ALT   (1<<3)
3046 #define F_ZERO  (1<<4)
3047
3048 static int
3049 formatfloat(char *buf, size_t buflen, int flags,
3050             int prec, int type, PyObject *v)
3051 {
3052         /* fmt = '%#.' + `prec` + `type`
3053            worst case length = 3 + 10 (len of INT_MAX) + 1 = 14 (use 20)*/
3054         char fmt[20];
3055         double x;
3056         if (!PyArg_Parse(v, "d;float argument required", &x))
3057                 return -1;
3058         if (prec < 0)
3059                 prec = 6;
3060         if (type == 'f' && fabs(x)/1e25 >= 1e25)
3061                 type = 'g';
3062         PyOS_snprintf(fmt, sizeof(fmt), "%%%s.%d%c",
3063                       (flags&F_ALT) ? "#" : "",
3064                       prec, type);
3065         /* worst case length calc to ensure no buffer overrun:
3066              fmt = %#.<prec>g
3067              buf = '-' + [0-9]*prec + '.' + 'e+' + (longest exp
3068                 for any double rep.)
3069              len = 1 + prec + 1 + 2 + 5 = 9 + prec
3070            If prec=0 the effective precision is 1 (the leading digit is
3071            always given), therefore increase by one to 10+prec. */
3072         if (buflen <= (size_t)10 + (size_t)prec) {
3073                 PyErr_SetString(PyExc_OverflowError,
3074                         "formatted float is too long (precision too large?)");
3075                 return -1;
3076         }
3077         PyOS_snprintf(buf, buflen, fmt, x);
3078         return strlen(buf);
3079 }
3080
3081 /* _PyString_FormatLong emulates the format codes d, u, o, x and X, and
3082  * the F_ALT flag, for Python's long (unbounded) ints.  It's not used for
3083  * Python's regular ints.
3084  * Return value:  a new PyString*, or NULL if error.
3085  *  .  *pbuf is set to point into it,
3086  *     *plen set to the # of chars following that.
3087  *     Caller must decref it when done using pbuf.
3088  *     The string starting at *pbuf is of the form
3089  *         "-"? ("0x" | "0X")? digit+
3090  *     "0x"/"0X" are present only for x and X conversions, with F_ALT
3091  *         set in flags.  The case of hex digits will be correct,
3092  *     There will be at least prec digits, zero-filled on the left if
3093  *         necessary to get that many.
3094  * val          object to be converted
3095  * flags        bitmask of format flags; only F_ALT is looked at
3096  * prec         minimum number of digits; 0-fill on left if needed
3097  * type         a character in [duoxX]; u acts the same as d
3098  *
3099  * CAUTION:  o, x and X conversions on regular ints can never
3100  * produce a '-' sign, but can for Python's unbounded ints.
3101  */
3102 PyObject*
3103 _PyString_FormatLong(PyObject *val, int flags, int prec, int type,
3104                      char **pbuf, int *plen)
3105 {
3106         PyObject *result = NULL;
3107         char *buf;
3108         int i;
3109         int sign;       /* 1 if '-', else 0 */
3110         int len;        /* number of characters */
3111         int numdigits;  /* len == numnondigits + numdigits */
3112         int numnondigits = 0;
3113
3114         switch (type) {
3115         case 'd':
3116         case 'u':
3117                 result = val->ob_type->tp_str(val);
3118                 break;
3119         case 'o':
3120                 result = val->ob_type->tp_as_number->nb_oct(val);
3121                 break;
3122         case 'x':
3123         case 'X':
3124                 numnondigits = 2;
3125                 result = val->ob_type->tp_as_number->nb_hex(val);
3126                 break;
3127         default:
3128                 assert(!"'type' not in [duoxX]");
3129         }
3130         if (!result)
3131                 return NULL;
3132
3133         /* To modify the string in-place, there can only be one reference. */
3134         if (result->ob_refcnt != 1) {
3135                 PyErr_BadInternalCall();
3136                 return NULL;
3137         }
3138         buf = PyString_AsString(result);
3139         len = PyString_Size(result);
3140         if (buf[len-1] == 'L') {
3141                 --len;
3142                 buf[len] = '\0';
3143         }
3144         sign = buf[0] == '-';
3145         numnondigits += sign;
3146         numdigits = len - numnondigits;
3147         assert(numdigits > 0);
3148
3149         /* Get rid of base marker unless F_ALT */
3150         if ((flags & F_ALT) == 0) {
3151                 /* Need to skip 0x, 0X or 0. */
3152                 int skipped = 0;
3153                 switch (type) {
3154                 case 'o':
3155                         assert(buf[sign] == '0');
3156                         /* If 0 is only digit, leave it alone. */
3157                         if (numdigits > 1) {
3158                                 skipped = 1;
3159                                 --numdigits;
3160                         }
3161                         break;
3162                 case 'x':
3163                 case 'X':
3164                         assert(buf[sign] == '0');
3165                         assert(buf[sign + 1] == 'x');
3166                         skipped = 2;
3167                         numnondigits -= 2;
3168                         break;
3169                 }
3170                 if (skipped) {
3171                         buf += skipped;
3172                         len -= skipped;
3173                         if (sign)
3174                                 buf[0] = '-';
3175                 }
3176                 assert(len == numnondigits + numdigits);
3177                 assert(numdigits > 0);
3178         }
3179
3180         /* Fill with leading zeroes to meet minimum width. */
3181         if (prec > numdigits) {
3182                 PyObject *r1 = PyString_FromStringAndSize(NULL,
3183                                         numnondigits + prec);
3184                 char *b1;
3185                 if (!r1) {
3186                         Py_DECREF(result);
3187                         return NULL;
3188                 }
3189                 b1 = PyString_AS_STRING(r1);
3190                 for (i = 0; i < numnondigits; ++i)
3191                         *b1++ = *buf++;
3192                 for (i = 0; i < prec - numdigits; i++)
3193                         *b1++ = '0';
3194                 for (i = 0; i < numdigits; i++)
3195                         *b1++ = *buf++;
3196                 *b1 = '\0';
3197                 Py_DECREF(result);
3198                 result = r1;
3199                 buf = PyString_AS_STRING(result);
3200                 len = numnondigits + prec;
3201         }
3202
3203         /* Fix up case for hex conversions. */
3204         switch (type) {
3205         case 'x':
3206                 /* Need to convert all upper case letters to lower case. */
3207                 for (i = 0; i < len; i++)
3208                         if (buf[i] >= 'A' && buf[i] <= 'F')
3209                                 buf[i] += 'a'-'A';
3210                 break;
3211         case 'X':
3212                 /* Need to convert 0x to 0X (and -0x to -0X). */
3213                 if (buf[sign + 1] == 'x')
3214                         buf[sign + 1] = 'X';
3215                 break;
3216         }
3217         *pbuf = buf;
3218         *plen = len;
3219         return result;
3220 }
3221
3222 static int
3223 formatint(char *buf, size_t buflen, int flags,
3224           int prec, int type, PyObject *v)
3225 {
3226         /* fmt = '%#.' + `prec` + 'l' + `type`
3227            worst case length = 3 + 19 (worst len of INT_MAX on 64-bit machine)
3228            + 1 + 1 = 24 */
3229         char fmt[64];   /* plenty big enough! */
3230         long x;
3231         if (!PyArg_Parse(v, "l;int argument required", &x))
3232                 return -1;
3233         if (prec < 0)
3234                 prec = 1;
3235         PyOS_snprintf(fmt, sizeof(fmt), "%%%s.%dl%c",
3236                       (flags&F_ALT) ? "#" : "",
3237                       prec, type);
3238         /* buf = '+'/'-'/'0'/'0x' + '[0-9]'*max(prec, len(x in octal))
3239            worst case buf = '0x' + [0-9]*prec, where prec >= 11 */
3240         if (buflen <= 13 || buflen <= (size_t)2 + (size_t)prec) {
3241                 PyErr_SetString(PyExc_OverflowError,
3242                         "formatted integer is too long (precision too large?)");
3243                 return -1;
3244         }
3245         PyOS_snprintf(buf, buflen, fmt, x);
3246         /* When converting 0 under %#x or %#X, C leaves off the base marker,
3247          * but we want it (for consistency with other %#x conversions, and
3248          * for consistency with Python's hex() function).
3249          * BUG 28-Apr-2001 tim:  At least two platform Cs (Metrowerks &
3250          * Compaq Tru64) violate the std by converting 0 w/ leading 0x anyway.
3251          * So add it only if the platform didn't already.
3252          */
3253         if (x == 0 &&
3254            (flags & F_ALT) &&
3255            (type == 'x' || type == 'X') &&
3256             buf[1] != (char)type)  /* this last always true under std C */
3257                 {
3258                 memmove(buf+2, buf, strlen(buf) + 1);
3259                 buf[0] = '0';
3260                 buf[1] = (char)type;
3261         }
3262         return strlen(buf);
3263 }
3264
3265 static int
3266 formatchar(char *buf, size_t buflen, PyObject *v)
3267 {
3268         /* presume that the buffer is at least 2 characters long */
3269         if (PyString_Check(v)) {
3270                 if (!PyArg_Parse(v, "c;%c requires int or char", &buf[0]))
3271                         return -1;
3272         }
3273         else {
3274                 if (!PyArg_Parse(v, "b;%c requires int or char", &buf[0]))
3275                         return -1;
3276         }
3277         buf[1] = '\0';
3278         return 1;
3279 }
3280
3281
3282 /* fmt%(v1,v2,...) is roughly equivalent to sprintf(fmt, v1, v2, ...)
3283
3284    FORMATBUFLEN is the length of the buffer in which the floats, ints, &
3285    chars are formatted. XXX This is a magic number. Each formatting
3286    routine does bounds checking to ensure no overflow, but a better
3287    solution may be to malloc a buffer of appropriate size for each
3288    format. For now, the current solution is sufficient.
3289 */
3290 #define FORMATBUFLEN (size_t)120
3291
3292 PyObject *
3293 PyString_Format(PyObject *format, PyObject *args)
3294 {
3295         char *fmt, *res;
3296         int fmtcnt, rescnt, reslen, arglen, argidx;
3297         int args_owned = 0;
3298         PyObject *result, *orig_args;
3299 #ifdef Py_USING_UNICODE
3300         PyObject *v, *w;
3301 #endif
3302         PyObject *dict = NULL;
3303         if (format == NULL || !PyString_Check(format) || args == NULL) {
3304                 PyErr_BadInternalCall();
3305                 return NULL;
3306         }
3307         orig_args = args;
3308         fmt = PyString_AS_STRING(format);
3309         fmtcnt = PyString_GET_SIZE(format);
3310         reslen = rescnt = fmtcnt + 100;
3311         result = PyString_FromStringAndSize((char *)NULL, reslen);
3312         if (result == NULL)
3313                 return NULL;
3314         res = PyString_AsString(result);
3315         if (PyTuple_Check(args)) {
3316                 arglen = PyTuple_GET_SIZE(args);
3317                 argidx = 0;
3318         }
3319         else {
3320                 arglen = -1;
3321                 argidx = -2;
3322         }
3323         if (args->ob_type->tp_as_mapping)
3324                 dict = args;
3325         while (--fmtcnt >= 0) {
3326                 if (*fmt != '%') {
3327                         if (--rescnt < 0) {
3328                                 rescnt = fmtcnt + 100;
3329                                 reslen += rescnt;
3330                                 if (_PyString_Resize(&result, reslen) < 0)
3331                                         return NULL;
3332                                 res = PyString_AS_STRING(result)
3333                                         + reslen - rescnt;
3334                                 --rescnt;
3335                         }
3336                         *res++ = *fmt++;
3337                 }
3338                 else {
3339                         /* Got a format specifier */
3340                         int flags = 0;
3341                         int width = -1;
3342                         int prec = -1;
3343                         int c = '\0';
3344                         int fill;
3345                         PyObject *v = NULL;
3346                         PyObject *temp = NULL;
3347                         char *pbuf;
3348                         int sign;
3349                         int len;
3350                         char formatbuf[FORMATBUFLEN];
3351                              /* For format{float,int,char}() */
3352 #ifdef Py_USING_UNICODE
3353                         char *fmt_start = fmt;
3354                         int argidx_start = argidx;
3355 #endif
3356
3357                         fmt++;
3358                         if (*fmt == '(') {
3359                                 char *keystart;
3360                                 int keylen;
3361                                 PyObject *key;
3362                                 int pcount = 1;
3363
3364                                 if (dict == NULL) {
3365                                         PyErr_SetString(PyExc_TypeError,
3366                                                  "format requires a mapping");
3367                                         goto error;
3368                                 }
3369                                 ++fmt;
3370                                 --fmtcnt;
3371                                 keystart = fmt;
3372                                 /* Skip over balanced parentheses */
3373                                 while (pcount > 0 && --fmtcnt >= 0) {
3374                                         if (*fmt == ')')
3375                                                 --pcount;
3376                                         else if (*fmt == '(')
3377                                                 ++pcount;
3378                                         fmt++;
3379                                 }
3380                                 keylen = fmt - keystart - 1;
3381                                 if (fmtcnt < 0 || pcount > 0) {
3382                                         PyErr_SetString(PyExc_ValueError,
3383                                                    "incomplete format key");
3384                                         goto error;
3385                                 }
3386                                 key = PyString_FromStringAndSize(keystart,
3387                                                                  keylen);
3388                                 if (key == NULL)
3389                                         goto error;
3390                                 if (args_owned) {
3391                                         Py_DECREF(args);
3392                                         args_owned = 0;
3393                                 }
3394                                 args = PyObject_GetItem(dict, key);
3395                                 Py_DECREF(key);
3396                                 if (args == NULL) {
3397                                         goto error;
3398                                 }
3399                                 args_owned = 1;
3400                                 arglen = -1;
3401                                 argidx = -2;
3402                         }
3403                         while (--fmtcnt >= 0) {
3404                                 switch (c = *fmt++) {
3405                                 case '-': flags |= F_LJUST; continue;
3406                                 case '+': flags |= F_SIGN; continue;
3407                                 case ' ': flags |= F_BLANK; continue;
3408                                 case '#': flags |= F_ALT; continue;
3409                                 case '0': flags |= F_ZERO; continue;
3410                                 }
3411                                 break;
3412                         }
3413                         if (c == '*') {
3414                                 v = getnextarg(args, arglen, &argidx);
3415                                 if (v == NULL)
3416                                         goto error;
3417                                 if (!PyInt_Check(v)) {
3418                                         PyErr_SetString(PyExc_TypeError,
3419                                                         "* wants int");
3420                                         goto error;
3421                                 }
3422                                 width = PyInt_AsLong(v);
3423                                 if (width < 0) {
3424                                         flags |= F_LJUST;
3425                                         width = -width;
3426                                 }
3427                                 if (--fmtcnt >= 0)
3428                                         c = *fmt++;
3429                         }
3430                         else if (c >= 0 && isdigit(c)) {
3431                                 width = c - '0';
3432                                 while (--fmtcnt >= 0) {
3433                                         c = Py_CHARMASK(*fmt++);
3434                                         if (!isdigit(c))
3435                                                 break;
3436                                         if ((width*10) / 10 != width) {
3437                                                 PyErr_SetString(
3438                                                         PyExc_ValueError,
3439                                                         "width too big");
3440                                                 goto error;
3441                                         }
3442                                         width = width*10 + (c - '0');
3443                                 }
3444                         }
3445                         if (c == '.') {
3446                                 prec = 0;
3447                                 if (--fmtcnt >= 0)
3448                                         c = *fmt++;
3449                                 if (c == '*') {
3450                                         v = getnextarg(args, arglen, &argidx);
3451                                         if (v == NULL)
3452                                                 goto error;
3453                                         if (!PyInt_Check(v)) {
3454                                                 PyErr_SetString(
3455                                                         PyExc_TypeError,
3456                                                         "* wants int");
3457                                                 goto error;
3458                                         }
3459                                         prec = PyInt_AsLong(v);
3460                                         if (prec < 0)
3461                                                 prec = 0;
3462                                         if (--fmtcnt >= 0)
3463                                                 c = *fmt++;
3464                                 }
3465                                 else if (c >= 0 && isdigit(c)) {
3466                                         prec = c - '0';
3467                                         while (--fmtcnt >= 0) {
3468                                                 c = Py_CHARMASK(*fmt++);
3469                                                 if (!isdigit(c))
3470                                                         break;
3471                                                 if ((prec*10) / 10 != prec) {
3472                                                         PyErr_SetString(
3473                                                             PyExc_ValueError,
3474                                                             "prec too big");
3475                                                         goto error;
3476                                                 }
3477                                                 prec = prec*10 + (c - '0');
3478                                         }
3479                                 }
3480                         } /* prec */
3481                         if (fmtcnt >= 0) {
3482                                 if (c == 'h' || c == 'l' || c == 'L') {
3483                                         if (--fmtcnt >= 0)
3484                                                 c = *fmt++;
3485                                 }
3486                         }
3487                         if (fmtcnt < 0) {
3488                                 PyErr_SetString(PyExc_ValueError,
3489                                                 "incomplete format");
3490                                 goto error;
3491                         }
3492                         if (c != '%') {
3493                                 v = getnextarg(args, arglen, &argidx);
3494                                 if (v == NULL)
3495                                         goto error;
3496                         }
3497                         sign = 0;
3498                         fill = ' ';
3499                         switch (c) {
3500                         case '%':
3501                                 pbuf = "%";
3502                                 len = 1;
3503                                 break;
3504                         case 's':
3505 #ifdef Py_USING_UNICODE
3506                                 if (PyUnicode_Check(v)) {
3507                                         fmt = fmt_start;
3508                                         argidx = argidx_start;
3509                                         goto unicode;
3510                                 }
3511 #endif
3512                                 /* Fall through */
3513                         case 'r':
3514                                 if (c == 's')
3515                                         temp = PyObject_Str(v);
3516                                 else
3517                                         temp = PyObject_Repr(v);
3518                                 if (temp == NULL)
3519                                         goto error;
3520                                 if (!PyString_Check(temp)) {
3521                                         PyErr_SetString(PyExc_TypeError,
3522                                           "%s argument has non-string str()");
3523                                         Py_DECREF(temp);
3524                                         goto error;
3525                                 }
3526                                 pbuf = PyString_AS_STRING(temp);
3527                                 len = PyString_GET_SIZE(temp);
3528                                 if (prec >= 0 && len > prec)
3529                                         len = prec;
3530                                 break;
3531                         case 'i':
3532                         case 'd':
3533                         case 'u':
3534                         case 'o':
3535                         case 'x':
3536                         case 'X':
3537                                 if (c == 'i')
3538                                         c = 'd';
3539                                 if (PyLong_Check(v)) {
3540                                         temp = _PyString_FormatLong(v, flags,
3541                                                 prec, c, &pbuf, &len);
3542                                         if (!temp)
3543                                                 goto error;
3544                                         /* unbounded ints can always produce
3545                                            a sign character! */
3546                                         sign = 1;
3547                                 }
3548                                 else {
3549                                         pbuf = formatbuf;
3550                                         len = formatint(pbuf,
3551                                                         sizeof(formatbuf),
3552                                                         flags, prec, c, v);
3553                                         if (len < 0)
3554                                                 goto error;
3555                                         /* only d conversion is signed */
3556                                         sign = c == 'd';
3557                                 }
3558                                 if (flags & F_ZERO)
3559                                         fill = '0';
3560                                 break;
3561                         case 'e':
3562                         case 'E':
3563                         case 'f':
3564                         case 'g':
3565                         case 'G':
3566                                 pbuf = formatbuf;
3567                                 len = formatfloat(pbuf, sizeof(formatbuf),
3568                                           flags, prec, c, v);
3569                                 if (len < 0)
3570                                         goto error;
3571                                 sign = 1;
3572                                 if (flags & F_ZERO)
3573                                         fill = '0';
3574                                 break;
3575                         case 'c':
3576                                 pbuf = formatbuf;
3577                                 len = formatchar(pbuf, sizeof(formatbuf), v);
3578                                 if (len < 0)
3579                                         goto error;
3580                                 break;
3581                         default:
3582                                 PyErr_Format(PyExc_ValueError,
3583                                   "unsupported format character '%c' (0x%x) "
3584                                   "at index %i",
3585                                   c, c,
3586                                   (int)(fmt - 1 - PyString_AsString(format)));
3587                                 goto error;
3588                         }
3589                         if (sign) {
3590                                 if (*pbuf == '-' || *pbuf == '+') {
3591                                         sign = *pbuf++;
3592                                         len--;
3593                                 }
3594                                 else if (flags & F_SIGN)
3595                                         sign = '+';
3596                                 else if (flags & F_BLANK)
3597                                         sign = ' ';
3598                                 else
3599                                         sign = 0;
3600                         }
3601                         if (width < len)
3602                                 width = len;
3603                         if (rescnt - (sign != 0) < width) {
3604                                 reslen -= rescnt;
3605                                 rescnt = width + fmtcnt + 100;
3606                                 reslen += rescnt;
3607                                 if (reslen < 0) {
3608                                         Py_DECREF(result);
3609                                         return PyErr_NoMemory();
3610                                 }
3611                                 if (_PyString_Resize(&result, reslen) < 0)
3612                                         return NULL;
3613                                 res = PyString_AS_STRING(result)
3614                                         + reslen - rescnt;
3615                         }
3616                         if (sign) {
3617                                 if (fill != ' ')
3618                                         *res++ = sign;
3619                                 rescnt--;
3620                                 if (width > len)
3621                                         width--;
3622                         }
3623                         if ((flags & F_ALT) && (c == 'x' || c == 'X')) {
3624                                 assert(pbuf[0] == '0');
3625                                 assert(pbuf[1] == c);
3626                                 if (fill != ' ') {
3627                                         *res++ = *pbuf++;
3628                                         *res++ = *pbuf++;
3629                                 }
3630                                 rescnt -= 2;
3631                                 width -= 2;
3632                                 if (width < 0)
3633                                         width = 0;
3634                                 len -= 2;
3635                         }
3636                         if (width > len && !(flags & F_LJUST)) {
3637                                 do {
3638                                         --rescnt;
3639                                         *res++ = fill;
3640                                 } while (--width > len);
3641                         }
3642                         if (fill == ' ') {
3643                                 if (sign)
3644                                         *res++ = sign;
3645                                 if ((flags & F_ALT) &&
3646                                     (c == 'x' || c == 'X')) {
3647                                         assert(pbuf[0] == '0');
3648                                         assert(pbuf[1] == c);
3649                                         *res++ = *pbuf++;
3650                                         *res++ = *pbuf++;
3651                                 }
3652                         }
3653                         memcpy(res, pbuf, len);
3654                         res += len;
3655                         rescnt -= len;
3656                         while (--width >= len) {
3657                                 --rescnt;
3658                                 *res++ = ' ';
3659                         }
3660                         if (dict && (argidx < arglen) && c != '%') {
3661                                 PyErr_SetString(PyExc_TypeError,
3662                                            "not all arguments converted");
3663                                 goto error;
3664                         }
3665                         Py_XDECREF(temp);
3666                 } /* '%' */
3667         } /* until end */
3668         if (argidx < arglen && !dict) {
3669                 PyErr_SetString(PyExc_TypeError,
3670                                 "not all arguments converted");
3671                 goto error;
3672         }
3673         if (args_owned) {
3674                 Py_DECREF(args);
3675         }
3676         _PyString_Resize(&result, reslen - rescnt);
3677         return result;
3678
3679 #ifdef Py_USING_UNICODE
3680  unicode:
3681         if (args_owned) {
3682                 Py_DECREF(args);
3683                 args_owned = 0;
3684         }
3685         /* Fiddle args right (remove the first argidx arguments) */
3686         if (PyTuple_Check(orig_args) && argidx > 0) {
3687                 PyObject *v;
3688                 int n = PyTuple_GET_SIZE(orig_args) - argidx;
3689                 v = PyTuple_New(n);
3690                 if (v == NULL)
3691                         goto error;
3692                 while (--n >= 0) {
3693                         PyObject *w = PyTuple_GET_ITEM(orig_args, n + argidx);
3694                         Py_INCREF(w);
3695                         PyTuple_SET_ITEM(v, n, w);
3696                 }
3697                 args = v;
3698         } else {
3699                 Py_INCREF(orig_args);
3700                 args = orig_args;
3701         }
3702         args_owned = 1;
3703         /* Take what we have of the result and let the Unicode formatting
3704            function format the rest of the input. */
3705         rescnt = res - PyString_AS_STRING(result);
3706         if (_PyString_Resize(&result, rescnt))
3707                 goto error;
3708         fmtcnt = PyString_GET_SIZE(format) - \
3709                  (fmt - PyString_AS_STRING(format));
3710         format = PyUnicode_Decode(fmt, fmtcnt, NULL, NULL);
3711         if (format == NULL)
3712                 goto error;
3713         v = PyUnicode_Format(format, args);
3714         Py_DECREF(format);
3715         if (v == NULL)
3716                 goto error;
3717         /* Paste what we have (result) to what the Unicode formatting
3718            function returned (v) and return the result (or error) */
3719         w = PyUnicode_Concat(result, v);
3720         Py_DECREF(result);
3721         Py_DECREF(v);
3722         Py_DECREF(args);
3723         return w;
3724 #endif /* Py_USING_UNICODE */
3725
3726  error:
3727         Py_DECREF(result);
3728         if (args_owned) {
3729                 Py_DECREF(args);
3730         }
3731         return NULL;
3732 }
3733
3734
3735 #ifdef INTERN_STRINGS
3736
3737 /* This dictionary will leak at PyString_Fini() time.  That's acceptable
3738  * because PyString_Fini() specifically frees interned strings that are
3739  * only referenced by this dictionary.  The CVS log entry for revision 2.45
3740  * says:
3741  *
3742  *    Change the Fini function to only remove otherwise unreferenced
3743  *    strings from the interned table.  There are references in
3744  *    hard-to-find static variables all over the interpreter, and it's not
3745  *    worth trying to get rid of all those; but "uninterning" isn't fair
3746  *    either and may cause subtle failures later -- so we have to keep them
3747  *    in the interned table.
3748  */
3749 static PyObject *interned;
3750
3751 void
3752 PyString_InternInPlace(PyObject **p)
3753 {
3754         register PyStringObject *s = (PyStringObject *)(*p);
3755         PyObject *t;
3756         if (s == NULL || !PyString_Check(s))
3757                 Py_FatalError("PyString_InternInPlace: strings only please!");
3758         if ((t = s->ob_sinterned) != NULL) {
3759                 if (t == (PyObject *)s)
3760                         return;
3761                 Py_INCREF(t);
3762                 *p = t;
3763                 Py_DECREF(s);
3764                 return;
3765         }
3766         if (interned == NULL) {
3767                 interned = PyDict_New();
3768                 if (interned == NULL)
3769                         return;
3770         }
3771         if ((t = PyDict_GetItem(interned, (PyObject *)s)) != NULL) {
3772                 Py_INCREF(t);
3773                 *p = s->ob_sinterned = t;
3774                 Py_DECREF(s);
3775                 return;
3776         }
3777         /* Ensure that only true string objects appear in the intern dict,
3778            and as the value of ob_sinterned. */
3779         if (PyString_CheckExact(s)) {
3780                 t = (PyObject *)s;
3781                 if (PyDict_SetItem(interned, t, t) == 0) {
3782                         s->ob_sinterned = t;
3783                         return;
3784                 }
3785         }
3786         else {
3787                 t = PyString_FromStringAndSize(PyString_AS_STRING(s),
3788                                                 PyString_GET_SIZE(s));
3789                 if (t != NULL) {
3790                         if (PyDict_SetItem(interned, t, t) == 0) {
3791                                 *p = s->ob_sinterned = t;
3792                                 Py_DECREF(s);
3793                                 return;
3794                         }
3795                         Py_DECREF(t);
3796                 }
3797         }
3798         PyErr_Clear();
3799 }
3800
3801
3802 PyObject *
3803 PyString_InternFromString(const char *cp)
3804 {
3805         PyObject *s = PyString_FromString(cp);
3806         if (s == NULL)
3807                 return NULL;
3808         PyString_InternInPlace(&s);
3809         return s;
3810 }
3811
3812 #endif
3813
3814 void
3815 PyString_Fini(void)
3816 {
3817         int i;
3818         for (i = 0; i < UCHAR_MAX + 1; i++) {
3819                 Py_XDECREF(characters[i]);
3820                 characters[i] = NULL;
3821         }
3822 #ifndef DONT_SHARE_SHORT_STRINGS
3823         Py_XDECREF(nullstring);
3824         nullstring = NULL;
3825 #endif
3826 #ifdef INTERN_STRINGS
3827         if (interned) {
3828                 int pos, changed;
3829                 PyObject *key, *value;
3830                 do {
3831                         changed = 0;
3832                         pos = 0;
3833                         while (PyDict_Next(interned, &pos, &key, &value)) {
3834                                 if (key->ob_refcnt == 2 && key == value) {
3835                                         PyDict_DelItem(interned, key);
3836                                         changed = 1;
3837                                 }
3838                         }
3839                 } while (changed);
3840         }
3841 #endif
3842 }
3843
3844 #ifdef INTERN_STRINGS
3845 void _Py_ReleaseInternedStrings(void)
3846 {
3847         if (interned) {
3848                 fprintf(stderr, "releasing interned strings\n");
3849                 PyDict_Clear(interned);
3850                 Py_DECREF(interned);
3851                 interned = NULL;
3852         }
3853 }
3854 #endif /* INTERN_STRINGS */