Objects/stringobject.c

   1 /* String object implementation */
   2
   3 #include "Python.h"
   4
   5 #include <ctype.h>
   6
   7 #ifdef COUNT_ALLOCS
   8 int null_strings, one_strings;
   9 #endif
  10
  11 #if !defined(HAVE_LIMITS_H) && !defined(UCHAR_MAX)
  12 #define UCHAR_MAX 255
  13 #endif
  14
  15 static PyStringObject *characters[UCHAR_MAX + 1];
  16 static PyStringObject *nullstring;
  17
  18 /* This dictionary holds all interned strings.  Note that references to
  19    strings in this dictionary are *not* counted in the string's ob_refcnt.
  20    When the interned string reaches a refcnt of 0 the string deallocation
  21    function will delete the reference from this dictionary.
  22
  23    Another way to look at this is that to say that the actual reference
  24    count of a string is:  s->ob_refcnt + (s->ob_sstate?2:0)
  25 */
  26 static PyObject *interned;
  27
  28
  29 /*
  30    For both PyString_FromString() and PyString_FromStringAndSize(), the
  31    parameter `size' denotes number of characters to allocate, not counting any
  32    null terminating character.
  33
  34    For PyString_FromString(), the parameter `str' points to a null-terminated
  35    string containing exactly `size' bytes.
  36
  37    For PyString_FromStringAndSize(), the parameter the parameter `str' is
  38    either NULL or else points to a string containing at least `size' bytes.
  39    For PyString_FromStringAndSize(), the string in the `str' parameter does
  40    not have to be null-terminated.  (Therefore it is safe to construct a
  41    substring by calling `PyString_FromStringAndSize(origstring, substrlen)'.)
  42    If `str' is NULL then PyString_FromStringAndSize() will allocate `size+1'
  43    bytes (setting the last byte to the null terminating character) and you can
  44    fill in the data yourself.  If `str' is non-NULL then the resulting
  45    PyString object must be treated as immutable and you must not fill in nor
  46    alter the data yourself, since the strings may be shared.
  47
  48    The PyObject member `op->ob_size', which denotes the number of "extra
  49    items" in a variable-size object, will contain the number of bytes
  50    allocated for string data, not counting the null terminating character.  It
  51    is therefore equal to the equal to the `size' parameter (for
  52    PyString_FromStringAndSize()) or the length of the string in the `str'
  53    parameter (for PyString_FromString()).
  54 */
  55 PyObject *
  56 PyString_FromStringAndSize(const char *str, int size)
  57 {
  58         register PyStringObject *op;
  59         if (size == 0 && (op = nullstring) != NULL) {
  60 #ifdef COUNT_ALLOCS
  61                 null_strings++;
  62 #endif
  63                 Py_INCREF(op);
  64                 return (PyObject *)op;
  65         }
  66         if (size == 1 && str != NULL &&
  67             (op = characters[*str & UCHAR_MAX]) != NULL)
  68         {
  69 #ifdef COUNT_ALLOCS
  70                 one_strings++;
  71 #endif
  72                 Py_INCREF(op);
  73                 return (PyObject *)op;
  74         }
  75
  76         /* Inline PyObject_NewVar */
  77         op = (PyStringObject *)
  78                 PyObject_MALLOC(sizeof(PyStringObject) + size * sizeof(char));
  79         if (op == NULL)
  80                 return PyErr_NoMemory();
  81         PyObject_INIT_VAR(op, &PyString_Type, size);
  82         op->ob_shash = -1;
  83         op->ob_sstate = SSTATE_NOT_INTERNED;
  84         if (str != NULL)
  85                 memcpy(op->ob_sval, str, size);
  86         op->ob_sval[size] = '\0';
  87         /* share short strings */
  88         if (size == 0) {
  89                 PyObject *t = (PyObject *)op;
  90                 PyString_InternInPlace(&t);
  91                 op = (PyStringObject *)t;
  92                 nullstring = op;
  93                 Py_INCREF(op);
  94         } else if (size == 1 && str != NULL) {
  95                 PyObject *t = (PyObject *)op;
  96                 PyString_InternInPlace(&t);
  97                 op = (PyStringObject *)t;
  98                 characters[*str & UCHAR_MAX] = op;
  99                 Py_INCREF(op);
 100         }
 101         return (PyObject *) op;
 102 }
 103
 104 PyObject *
 105 PyString_FromString(const char *str)
 106 {
 107         register size_t size;
 108         register PyStringObject *op;
 109
 110         assert(str != NULL);
 111         size = strlen(str);
 112         if (size > INT_MAX) {
 113                 PyErr_SetString(PyExc_OverflowError,
 114                         "string is too long for a Python string");
 115                 return NULL;
 116         }
 117         if (size == 0 && (op = nullstring) != NULL) {
 118 #ifdef COUNT_ALLOCS
 119                 null_strings++;
 120 #endif
 121                 Py_INCREF(op);
 122                 return (PyObject *)op;
 123         }
 124         if (size == 1 && (op = characters[*str & UCHAR_MAX]) != NULL) {
 125 #ifdef COUNT_ALLOCS
 126                 one_strings++;
 127 #endif
 128                 Py_INCREF(op);
 129                 return (PyObject *)op;
 130         }
 131
 132         /* Inline PyObject_NewVar */
 133         op = (PyStringObject *)
 134                 PyObject_MALLOC(sizeof(PyStringObject) + size * sizeof(char));
 135         if (op == NULL)
 136                 return PyErr_NoMemory();
 137         PyObject_INIT_VAR(op, &PyString_Type, size);
 138         op->ob_shash = -1;
 139         op->ob_sstate = SSTATE_NOT_INTERNED;
 140         memcpy(op->ob_sval, str, size+1);
 141         /* share short strings */
 142         if (size == 0) {
 143                 PyObject *t = (PyObject *)op;
 144                 PyString_InternInPlace(&t);
 145                 op = (PyStringObject *)t;
 146                 nullstring = op;
 147                 Py_INCREF(op);
 148         } else if (size == 1) {
 149                 PyObject *t = (PyObject *)op;
 150                 PyString_InternInPlace(&t);
 151                 op = (PyStringObject *)t;
 152                 characters[*str & UCHAR_MAX] = op;
 153                 Py_INCREF(op);
 154         }
 155         return (PyObject *) op;
 156 }
 157
 158 PyObject *
 159 PyString_FromFormatV(const char *format, va_list vargs)
 160 {
 161         va_list count;
 162         int n = 0;
 163         const char* f;
 164         char *s;
 165         PyObject* string;
 166
 167 #ifdef VA_LIST_IS_ARRAY
 168         memcpy(count, vargs, sizeof(va_list));
 169 #else
 170 #ifdef  __va_copy
 171         __va_copy(count, vargs);
 172 #else
 173         count = vargs;
 174 #endif
 175 #endif
 176         /* step 1: figure out how large a buffer we need */
 177         for (f = format; *f; f++) {
 178                 if (*f == '%') {
 179                         const char* p = f;
 180                         while (*++f && *f != '%' && !isalpha(Py_CHARMASK(*f)))
 181                                 ;
 182
 183                         /* skip the 'l' in %ld, since it doesn't change the
 184                            width.  although only %d is supported (see
 185                            "expand" section below), others can be easily
 186                            added */
 187                         if (*f == 'l' && *(f+1) == 'd')
 188                                 ++f;
 189
 190                         switch (*f) {
 191                         case 'c':
 192                                 (void)va_arg(count, int);
 193                                 /* fall through... */
 194                         case '%':
 195                                 n++;
 196                                 break;
 197                         case 'd': case 'i': case 'x':
 198                                 (void) va_arg(count, int);
 199                                 /* 20 bytes is enough to hold a 64-bit
 200                                    integer.  Decimal takes the most space.
 201                                    This isn't enough for octal. */
 202                                 n += 20;
 203                                 break;
 204                         case 's':
 205                                 s = va_arg(count, char*);
 206                                 n += strlen(s);
 207                                 break;
 208                         case 'p':
 209                                 (void) va_arg(count, int);
 210                                 /* maximum 64-bit pointer representation:
 211                                  * 0xffffffffffffffff
 212                                  * so 19 characters is enough.
 213                                  * XXX I count 18 -- what's the extra for?
 214                                  */
 215                                 n += 19;
 216                                 break;
 217                         default:
 218                                 /* if we stumble upon an unknown
 219                                    formatting code, copy the rest of
 220                                    the format string to the output
 221                                    string. (we cannot just skip the
 222                                    code, since there's no way to know
 223                                    what's in the argument list) */
 224                                 n += strlen(p);
 225                                 goto expand;
 226                         }
 227                 } else
 228                         n++;
 229         }
 230  expand:
 231         /* step 2: fill the buffer */
 232         /* Since we've analyzed how much space we need for the worst case,
 233            use sprintf directly instead of the slower PyOS_snprintf. */
 234         string = PyString_FromStringAndSize(NULL, n);
 235         if (!string)
 236                 return NULL;
 237
 238         s = PyString_AsString(string);
 239
 240         for (f = format; *f; f++) {
 241                 if (*f == '%') {
 242                         const char* p = f++;
 243                         int i, longflag = 0;
 244                         /* parse the width.precision part (we're only
 245                            interested in the precision value, if any) */
 246                         n = 0;
 247                         while (isdigit(Py_CHARMASK(*f)))
 248                                 n = (n*10) + *f++ - '0';
 249                         if (*f == '.') {
 250                                 f++;
 251                                 n = 0;
 252                                 while (isdigit(Py_CHARMASK(*f)))
 253                                         n = (n*10) + *f++ - '0';
 254                         }
 255                         while (*f && *f != '%' && !isalpha(Py_CHARMASK(*f)))
 256                                 f++;
 257                         /* handle the long flag, but only for %ld.  others
 258                            can be added when necessary. */
 259                         if (*f == 'l' && *(f+1) == 'd') {
 260                                 longflag = 1;
 261                                 ++f;
 262                         }
 263
 264                         switch (*f) {
 265                         case 'c':
 266                                 *s++ = va_arg(vargs, int);
 267                                 break;
 268                         case 'd':
 269                                 if (longflag)
 270                                         sprintf(s, "%ld", va_arg(vargs, long));
 271                                 else
 272                                         sprintf(s, "%d", va_arg(vargs, int));
 273                                 s += strlen(s);
 274                                 break;
 275                         case 'i':
 276                                 sprintf(s, "%i", va_arg(vargs, int));
 277                                 s += strlen(s);
 278                                 break;
 279                         case 'x':
 280                                 sprintf(s, "%x", va_arg(vargs, int));
 281                                 s += strlen(s);
 282                                 break;
 283                         case 's':
 284                                 p = va_arg(vargs, char*);
 285                                 i = strlen(p);
 286                                 if (n > 0 && i > n)
 287                                         i = n;
 288                                 memcpy(s, p, i);
 289                                 s += i;
 290                                 break;
 291                         case 'p':
 292                                 sprintf(s, "%p", va_arg(vargs, void*));
 293                                 /* %p is ill-defined:  ensure leading 0x. */
 294                                 if (s[1] == 'X')
 295                                         s[1] = 'x';
 296                                 else if (s[1] != 'x') {
 297                                         memmove(s+2, s, strlen(s)+1);
 298                                         s[0] = '0';
 299                                         s[1] = 'x';
 300                                 }
 301                                 s += strlen(s);
 302                                 break;
 303                         case '%':
 304                                 *s++ = '%';
 305                                 break;
 306                         default:
 307                                 strcpy(s, p);
 308                                 s += strlen(s);
 309                                 goto end;
 310                         }
 311                 } else
 312                         *s++ = *f;
 313         }
 314
 315  end:
 316         _PyString_Resize(&string, s - PyString_AS_STRING(string));
 317         return string;
 318 }
 319
 320 PyObject *
 321 PyString_FromFormat(const char *format, ...)
 322 {
 323         PyObject* ret;
 324         va_list vargs;
 325
 326 #ifdef HAVE_STDARG_PROTOTYPES
 327         va_start(vargs, format);
 328 #else
 329         va_start(vargs);
 330 #endif
 331         ret = PyString_FromFormatV(format, vargs);
 332         va_end(vargs);
 333         return ret;
 334 }
 335
 336
 337 PyObject *PyString_Decode(const char *s,
 338                           int size,
 339                           const char *encoding,
 340                           const char *errors)
 341 {
 342     PyObject *v, *str;
 343
 344     str = PyString_FromStringAndSize(s, size);
 345     if (str == NULL)
 346         return NULL;
 347     v = PyString_AsDecodedString(str, encoding, errors);
 348     Py_DECREF(str);
 349     return v;
 350 }
 351
 352 PyObject *PyString_AsDecodedObject(PyObject *str,
 353                                    const char *encoding,
 354                                    const char *errors)
 355 {
 356     PyObject *v;
 357
 358     if (!PyString_Check(str)) {
 359         PyErr_BadArgument();
 360         goto onError;
 361     }
 362
 363     if (encoding == NULL) {
 364 #ifdef Py_USING_UNICODE
 365         encoding = PyUnicode_GetDefaultEncoding();
 366 #else
 367         PyErr_SetString(PyExc_ValueError, "no encoding specified");
 368         goto onError;
 369 #endif
 370     }
 371
 372     /* Decode via the codec registry */
 373     v = PyCodec_Decode(str, encoding, errors);
 374     if (v == NULL)
 375         goto onError;
 376
 377     return v;
 378
 379  onError:
 380     return NULL;
 381 }
 382
 383 PyObject *PyString_AsDecodedString(PyObject *str,
 384                                    const char *encoding,
 385                                    const char *errors)
 386 {
 387     PyObject *v;
 388
 389     v = PyString_AsDecodedObject(str, encoding, errors);
 390     if (v == NULL)
 391         goto onError;
 392
 393 #ifdef Py_USING_UNICODE
 394     /* Convert Unicode to a string using the default encoding */
 395     if (PyUnicode_Check(v)) {
 396         PyObject *temp = v;
 397         v = PyUnicode_AsEncodedString(v, NULL, NULL);
 398         Py_DECREF(temp);
 399         if (v == NULL)
 400             goto onError;
 401     }
 402 #endif
 403     if (!PyString_Check(v)) {
 404         PyErr_Format(PyExc_TypeError,
 405                      "decoder did not return a string object (type=%.400s)",
 406                      v->ob_type->tp_name);
 407         Py_DECREF(v);
 408         goto onError;
 409     }
 410
 411     return v;
 412
 413  onError:
 414     return NULL;
 415 }
 416
 417 PyObject *PyString_Encode(const char *s,
 418                           int size,
 419                           const char *encoding,
 420                           const char *errors)
 421 {
 422     PyObject *v, *str;
 423
 424     str = PyString_FromStringAndSize(s, size);
 425     if (str == NULL)
 426         return NULL;
 427     v = PyString_AsEncodedString(str, encoding, errors);
 428     Py_DECREF(str);
 429     return v;
 430 }
 431
 432 PyObject *PyString_AsEncodedObject(PyObject *str,
 433                                    const char *encoding,
 434                                    const char *errors)
 435 {
 436     PyObject *v;
 437
 438     if (!PyString_Check(str)) {
 439         PyErr_BadArgument();
 440         goto onError;
 441     }
 442
 443     if (encoding == NULL) {
 444 #ifdef Py_USING_UNICODE
 445         encoding = PyUnicode_GetDefaultEncoding();
 446 #else
 447         PyErr_SetString(PyExc_ValueError, "no encoding specified");
 448         goto onError;
 449 #endif
 450     }
 451
 452     /* Encode via the codec registry */
 453     v = PyCodec_Encode(str, encoding, errors);
 454     if (v == NULL)
 455         goto onError;
 456
 457     return v;
 458
 459  onError:
 460     return NULL;
 461 }
 462
 463 PyObject *PyString_AsEncodedString(PyObject *str,
 464                                    const char *encoding,
 465                                    const char *errors)
 466 {
 467     PyObject *v;
 468
 469     v = PyString_AsEncodedObject(str, encoding, errors);
 470     if (v == NULL)
 471         goto onError;
 472
 473 #ifdef Py_USING_UNICODE
 474     /* Convert Unicode to a string using the default encoding */
 475     if (PyUnicode_Check(v)) {
 476         PyObject *temp = v;
 477         v = PyUnicode_AsEncodedString(v, NULL, NULL);
 478         Py_DECREF(temp);
 479         if (v == NULL)
 480             goto onError;
 481     }
 482 #endif
 483     if (!PyString_Check(v)) {
 484         PyErr_Format(PyExc_TypeError,
 485                      "encoder did not return a string object (type=%.400s)",
 486                      v->ob_type->tp_name);
 487         Py_DECREF(v);
 488         goto onError;
 489     }
 490
 491     return v;
 492
 493  onError:
 494     return NULL;
 495 }
 496
 497 static void
 498 string_dealloc(PyObject *op)
 499 {
 500         switch (PyString_CHECK_INTERNED(op)) {
 501                 case SSTATE_NOT_INTERNED:
 502                         break;
 503
 504                 case SSTATE_INTERNED_MORTAL:
 505                         /* revive dead object temporarily for DelItem */
 506                         op->ob_refcnt = 3;
 507                         if (PyDict_DelItem(interned, op) != 0)
 508                                 Py_FatalError(
 509                                         "deletion of interned string failed");
 510                         break;
 511
 512                 case SSTATE_INTERNED_IMMORTAL:
 513                         Py_FatalError("Immortal interned string died.");
 514
 515                 default:
 516                         Py_FatalError("Inconsistent interned string state.");
 517         }
 518         op->ob_type->tp_free(op);
 519 }
 520
 521 /* Unescape a backslash-escaped string. If unicode is non-zero,
 522    the string is a u-literal. If recode_encoding is non-zero,
 523    the string is UTF-8 encoded and should be re-encoded in the
 524    specified encoding.  */
 525
 526 PyObject *PyString_DecodeEscape(const char *s,
 527                                 int len,
 528                                 const char *errors,
 529                                 int unicode,
 530                                 const char *recode_encoding)
 531 {
 532         int c;
 533         char *p, *buf;
 534         const char *end;
 535         PyObject *v;
 536         int newlen = recode_encoding ? 4*len:len;
 537         v = PyString_FromStringAndSize((char *)NULL, newlen);
 538         if (v == NULL)
 539                 return NULL;
 540         p = buf = PyString_AsString(v);
 541         end = s + len;
 542         while (s < end) {
 543                 if (*s != '\\') {
 544                   non_esc:
 545 #ifdef Py_USING_UNICODE
 546                         if (recode_encoding && (*s & 0x80)) {
 547                                 PyObject *u, *w;
 548                                 char *r;
 549                                 const char* t;
 550                                 int rn;
 551                                 t = s;
 552                                 /* Decode non-ASCII bytes as UTF-8. */
 553                                 while (t < end && (*t & 0x80)) t++;
 554                                 u = PyUnicode_DecodeUTF8(s, t - s, errors);
 555                                 if(!u) goto failed;
 556
 557                                 /* Recode them in target encoding. */
 558                                 w = PyUnicode_AsEncodedString(
 559                                         u, recode_encoding, errors);
 560                                 Py_DECREF(u);
 561                                 if (!w) goto failed;
 562
 563                                 /* Append bytes to output buffer. */
 564                                 r = PyString_AsString(w);
 565                                 rn = PyString_Size(w);
 566                                 memcpy(p, r, rn);
 567                                 p += rn;
 568                                 Py_DECREF(w);
 569                                 s = t;
 570                         } else {
 571                                 *p++ = *s++;
 572                         }
 573 #else
 574                         *p++ = *s++;
 575 #endif
 576                         continue;
 577                 }
 578                 s++;
 579                 if (s==end) {
 580                         PyErr_SetString(PyExc_ValueError,
 581                                         "Trailing \\ in string");
 582                         goto failed;
 583                 }
 584                 switch (*s++) {
 585                 /* XXX This assumes ASCII! */
 586                 case '\n': break;
 587                 case '\\': *p++ = '\\'; break;
 588                 case '\'': *p++ = '\''; break;
 589                 case '\"': *p++ = '\"'; break;
 590                 case 'b': *p++ = '\b'; break;
 591                 case 'f': *p++ = '\014'; break; /* FF */
 592                 case 't': *p++ = '\t'; break;
 593                 case 'n': *p++ = '\n'; break;
 594                 case 'r': *p++ = '\r'; break;
 595                 case 'v': *p++ = '\013'; break; /* VT */
 596                 case 'a': *p++ = '\007'; break; /* BEL, not classic C */
 597                 case '0': case '1': case '2': case '3':
 598                 case '4': case '5': case '6': case '7':
 599                         c = s[-1] - '0';
 600                         if ('0' <= *s && *s <= '7') {
 601                                 c = (c<<3) + *s++ - '0';
 602                                 if ('0' <= *s && *s <= '7')
 603                                         c = (c<<3) + *s++ - '0';
 604                         }
 605                         *p++ = c;
 606                         break;
 607                 case 'x':
 608                         if (isxdigit(Py_CHARMASK(s[0]))
 609                             && isxdigit(Py_CHARMASK(s[1]))) {
 610                                 unsigned int x = 0;
 611                                 c = Py_CHARMASK(*s);
 612                                 s++;
 613                                 if (isdigit(c))
 614                                         x = c - '0';
 615                                 else if (islower(c))
 616                                         x = 10 + c - 'a';
 617                                 else
 618                                         x = 10 + c - 'A';
 619                                 x = x << 4;
 620                                 c = Py_CHARMASK(*s);
 621                                 s++;
 622                                 if (isdigit(c))
 623                                         x += c - '0';
 624                                 else if (islower(c))
 625                                         x += 10 + c - 'a';
 626                                 else
 627                                         x += 10 + c - 'A';
 628                                 *p++ = x;
 629                                 break;
 630                         }
 631                         if (!errors || strcmp(errors, "strict") == 0) {
 632                                 PyErr_SetString(PyExc_ValueError,
 633                                                 "invalid \\x escape");
 634                                 goto failed;
 635                         }
 636                         if (strcmp(errors, "replace") == 0) {
 637                                 *p++ = '?';
 638                         } else if (strcmp(errors, "ignore") == 0)
 639                                 /* do nothing */;
 640                         else {
 641                                 PyErr_Format(PyExc_ValueError,
 642                                              "decoding error; "
 643                                              "unknown error handling code: %.400s",
 644                                              errors);
 645                                 goto failed;
 646                         }
 647 #ifndef Py_USING_UNICODE
 648                 case 'u':
 649                 case 'U':
 650                 case 'N':
 651                         if (unicode) {
 652                                 PyErr_SetString(PyExc_ValueError,
 653                                           "Unicode escapes not legal "
 654                                           "when Unicode disabled");
 655                                 goto failed;
 656                         }
 657 #endif
 658                 default:
 659                         *p++ = '\\';
 660                         s--;
 661                         goto non_esc; /* an arbitry number of unescaped
 662                                          UTF-8 bytes may follow. */
 663                 }
 664         }
 665         if (p-buf < newlen)
 666                 _PyString_Resize(&v, (int)(p - buf));
 667         return v;
 668   failed:
 669         Py_DECREF(v);
 670         return NULL;
 671 }
 672
 673 static int
 674 string_getsize(register PyObject *op)
 675 {
 676         char *s;
 677         int len;
 678         if (PyString_AsStringAndSize(op, &s, &len))
 679                 return -1;
 680         return len;
 681 }
 682
 683 static /*const*/ char *
 684 string_getbuffer(register PyObject *op)
 685 {
 686         char *s;
 687         int len;
 688         if (PyString_AsStringAndSize(op, &s, &len))
 689                 return NULL;
 690         return s;
 691 }
 692
 693 int
 694 PyString_Size(register PyObject *op)
 695 {
 696         if (!PyString_Check(op))
 697                 return string_getsize(op);
 698         return ((PyStringObject *)op) -> ob_size;
 699 }
 700
 701 /*const*/ char *
 702 PyString_AsString(register PyObject *op)
 703 {
 704         if (!PyString_Check(op))
 705                 return string_getbuffer(op);
 706         return ((PyStringObject *)op) -> ob_sval;
 707 }
 708
 709 int
 710 PyString_AsStringAndSize(register PyObject *obj,
 711                          register char **s,
 712                          register int *len)
 713 {
 714         if (s == NULL) {
 715                 PyErr_BadInternalCall();
 716                 return -1;
 717         }
 718
 719         if (!PyString_Check(obj)) {
 720 #ifdef Py_USING_UNICODE
 721                 if (PyUnicode_Check(obj)) {
 722                         obj = _PyUnicode_AsDefaultEncodedString(obj, NULL);
 723                         if (obj == NULL)
 724                                 return -1;
 725                 }
 726                 else
 727 #endif
 728                 {
 729                         PyErr_Format(PyExc_TypeError,
 730                                      "expected string or Unicode object, "
 731                                      "%.200s found", obj->ob_type->tp_name);
 732                         return -1;
 733                 }
 734         }
 735
 736         *s = PyString_AS_STRING(obj);
 737         if (len != NULL)
 738                 *len = PyString_GET_SIZE(obj);
 739         else if ((int)strlen(*s) != PyString_GET_SIZE(obj)) {
 740                 PyErr_SetString(PyExc_TypeError,
 741                                 "expected string without null bytes");
 742                 return -1;
 743         }
 744         return 0;
 745 }
 746
 747 /* Methods */
 748
 749 static int
 750 string_print(PyStringObject *op, FILE *fp, int flags)
 751 {
 752         int i;
 753         char c;
 754         int quote;
 755
 756         /* XXX Ought to check for interrupts when writing long strings */
 757         if (! PyString_CheckExact(op)) {
 758                 int ret;
 759                 /* A str subclass may have its own __str__ method. */
 760                 op = (PyStringObject *) PyObject_Str((PyObject *)op);
 761                 if (op == NULL)
 762                         return -1;
 763                 ret = string_print(op, fp, flags);
 764                 Py_DECREF(op);
 765                 return ret;
 766         }
 767         if (flags & Py_PRINT_RAW) {
 768 #ifdef __VMS
 769                 if (op->ob_size) fwrite(op->ob_sval, (int) op->ob_size, 1, fp);
 770 #else
 771                 fwrite(op->ob_sval, 1, (int) op->ob_size, fp);
 772 #endif
 773                 return 0;
 774         }
 775
 776         /* figure out which quote to use; single is preferred */
 777         quote = '\'';
 778         if (memchr(op->ob_sval, '\'', op->ob_size) &&
 779             !memchr(op->ob_sval, '"', op->ob_size))
 780                 quote = '"';
 781
 782         fputc(quote, fp);
 783         for (i = 0; i < op->ob_size; i++) {
 784                 c = op->ob_sval[i];
 785                 if (c == quote || c == '\\')
 786                         fprintf(fp, "\\%c", c);
 787                 else if (c == '\t')
 788                         fprintf(fp, "\\t");
 789                 else if (c == '\n')
 790                         fprintf(fp, "\\n");
 791                 else if (c == '\r')
 792                         fprintf(fp, "\\r");
 793                 else if (c < ' ' || c >= 0x7f)
 794                         fprintf(fp, "\\x%02x", c & 0xff);
 795                 else
 796                         fputc(c, fp);
 797         }
 798         fputc(quote, fp);
 799         return 0;
 800 }
 801
 802 PyObject *
 803 PyString_Repr(PyObject *obj, int smartquotes)
 804 {
 805         register PyStringObject* op = (PyStringObject*) obj;
 806         size_t newsize = 2 + 4 * op->ob_size * sizeof(char);
 807         PyObject *v;
 808         if (newsize > INT_MAX) {
 809                 PyErr_SetString(PyExc_OverflowError,
 810                         "string is too large to make repr");
 811         }
 812         v = PyString_FromStringAndSize((char *)NULL, newsize);
 813         if (v == NULL) {
 814                 return NULL;
 815         }
 816         else {
 817                 register int i;
 818                 register char c;
 819                 register char *p;
 820                 int quote;
 821
 822                 /* figure out which quote to use; single is preferred */
 823                 quote = '\'';
 824                 if (smartquotes &&
 825                     memchr(op->ob_sval, '\'', op->ob_size) &&
 826                     !memchr(op->ob_sval, '"', op->ob_size))
 827                         quote = '"';
 828
 829                 p = PyString_AS_STRING(v);
 830                 *p++ = quote;
 831                 for (i = 0; i < op->ob_size; i++) {
 832                         /* There's at least enough room for a hex escape
 833                            and a closing quote. */
 834                         assert(newsize - (p - PyString_AS_STRING(v)) >= 5);
 835                         c = op->ob_sval[i];
 836                         if (c == quote || c == '\\')
 837                                 *p++ = '\\', *p++ = c;
 838                         else if (c == '\t')
 839                                 *p++ = '\\', *p++ = 't';
 840                         else if (c == '\n')
 841                                 *p++ = '\\', *p++ = 'n';
 842                         else if (c == '\r')
 843                                 *p++ = '\\', *p++ = 'r';
 844                         else if (c < ' ' || c >= 0x7f) {
 845                                 /* For performance, we don't want to call
 846                                    PyOS_snprintf here (extra layers of
 847                                    function call). */
 848                                 sprintf(p, "\\x%02x", c & 0xff);
 849                                 p += 4;
 850                         }
 851                         else
 852                                 *p++ = c;
 853                 }
 854                 assert(newsize - (p - PyString_AS_STRING(v)) >= 1);
 855                 *p++ = quote;
 856                 *p = '\0';
 857                 _PyString_Resize(
 858                         &v, (int) (p - PyString_AS_STRING(v)));
 859                 return v;
 860         }
 861 }
 862
 863 static PyObject *
 864 string_repr(PyObject *op)
 865 {
 866         return PyString_Repr(op, 1);
 867 }
 868
 869 static PyObject *
 870 string_str(PyObject *s)
 871 {
 872         assert(PyString_Check(s));
 873         if (PyString_CheckExact(s)) {
 874                 Py_INCREF(s);
 875                 return s;
 876         }
 877         else {
 878                 /* Subtype -- return genuine string with the same value. */
 879                 PyStringObject *t = (PyStringObject *) s;
 880                 return PyString_FromStringAndSize(t->ob_sval, t->ob_size);
 881         }
 882 }
 883
 884 static int
 885 string_length(PyStringObject *a)
 886 {
 887         return a->ob_size;
 888 }
 889
 890 static PyObject *
 891 string_concat(register PyStringObject *a, register PyObject *bb)
 892 {
 893         register unsigned int size;
 894         register PyStringObject *op;
 895         if (!PyString_Check(bb)) {
 896 #ifdef Py_USING_UNICODE
 897                 if (PyUnicode_Check(bb))
 898                     return PyUnicode_Concat((PyObject *)a, bb);
 899 #endif
 900                 PyErr_Format(PyExc_TypeError,
 901                              "cannot concatenate 'str' and '%.200s' objects",
 902                              bb->ob_type->tp_name);
 903                 return NULL;
 904         }
 905 #define b ((PyStringObject *)bb)
 906         /* Optimize cases with empty left or right operand */
 907         if ((a->ob_size == 0 || b->ob_size == 0) &&
 908             PyString_CheckExact(a) && PyString_CheckExact(b)) {
 909                 if (a->ob_size == 0) {
 910                         Py_INCREF(bb);
 911                         return bb;
 912                 }
 913                 Py_INCREF(a);
 914                 return (PyObject *)a;
 915         }
 916         size = a->ob_size + b->ob_size;
 917         /* Inline PyObject_NewVar */
 918         op = (PyStringObject *)
 919                 PyObject_MALLOC(sizeof(PyStringObject) + size * sizeof(char));
 920         if (op == NULL)
 921                 return PyErr_NoMemory();
 922         PyObject_INIT_VAR(op, &PyString_Type, size);
 923         op->ob_shash = -1;
 924         op->ob_sstate = SSTATE_NOT_INTERNED;
 925         memcpy(op->ob_sval, a->ob_sval, (int) a->ob_size);
 926         memcpy(op->ob_sval + a->ob_size, b->ob_sval, (int) b->ob_size);
 927         op->ob_sval[size] = '\0';
 928         return (PyObject *) op;
 929 #undef b
 930 }
 931
 932 static PyObject *
 933 string_repeat(register PyStringObject *a, register int n)
 934 {
 935         register int i;
 936         register int size;
 937         register PyStringObject *op;
 938         size_t nbytes;
 939         if (n < 0)
 940                 n = 0;
 941         /* watch out for overflows:  the size can overflow int,
 942          * and the # of bytes needed can overflow size_t
 943          */
 944         size = a->ob_size * n;
 945         if (n && size / n != a->ob_size) {
 946                 PyErr_SetString(PyExc_OverflowError,
 947                         "repeated string is too long");
 948                 return NULL;
 949         }
 950         if (size == a->ob_size && PyString_CheckExact(a)) {
 951                 Py_INCREF(a);
 952                 return (PyObject *)a;
 953         }
 954         nbytes = size * sizeof(char);
 955         if (nbytes / sizeof(char) != (size_t)size ||
 956             nbytes + sizeof(PyStringObject) <= nbytes) {
 957                 PyErr_SetString(PyExc_OverflowError,
 958                         "repeated string is too long");
 959                 return NULL;
 960         }
 961         op = (PyStringObject *)
 962                 PyObject_MALLOC(sizeof(PyStringObject) + nbytes);
 963         if (op == NULL)
 964                 return PyErr_NoMemory();
 965         PyObject_INIT_VAR(op, &PyString_Type, size);
 966         op->ob_shash = -1;
 967         op->ob_sstate = SSTATE_NOT_INTERNED;
 968         for (i = 0; i < size; i += a->ob_size)
 969                 memcpy(op->ob_sval+i, a->ob_sval, (int) a->ob_size);
 970         op->ob_sval[size] = '\0';
 971         return (PyObject *) op;
 972 }
 973
 974 /* String slice a[i:j] consists of characters a[i] ... a[j-1] */
 975
 976 static PyObject *
 977 string_slice(register PyStringObject *a, register int i, register int j)
 978      /* j -- may be negative! */
 979 {
 980         if (i < 0)
 981                 i = 0;
 982         if (j < 0)
 983                 j = 0; /* Avoid signed/unsigned bug in next line */
 984         if (j > a->ob_size)
 985                 j = a->ob_size;
 986         if (i == 0 && j == a->ob_size && PyString_CheckExact(a)) {
 987                 /* It's the same as a */
 988                 Py_INCREF(a);
 989                 return (PyObject *)a;
 990         }
 991         if (j < i)
 992                 j = i;
 993         return PyString_FromStringAndSize(a->ob_sval + i, (int) (j-i));
 994 }
 995
 996 static int
 997 string_contains(PyObject *a, PyObject *el)
 998 {
 999         const char *lhs, *rhs, *end;
1000         int size;
1001
1002         if (!PyString_CheckExact(el)) {
1003 #ifdef Py_USING_UNICODE
1004                 if (PyUnicode_Check(el))
1005                         return PyUnicode_Contains(a, el);
1006 #endif
1007                 if (!PyString_Check(el)) {
1008                         PyErr_SetString(PyExc_TypeError,
1009                             "'in <string>' requires string as left operand");
1010                         return -1;
1011                 }
1012         }
1013         size = PyString_GET_SIZE(el);
1014         rhs = PyString_AS_STRING(el);
1015         lhs = PyString_AS_STRING(a);
1016
1017         /* optimize for a single character */
1018         if (size == 1)
1019                 return memchr(lhs, *rhs, PyString_GET_SIZE(a)) != NULL;
1020
1021         end = lhs + (PyString_GET_SIZE(a) - size);
1022         while (lhs <= end) {
1023                 if (memcmp(lhs++, rhs, size) == 0)
1024                         return 1;
1025         }
1026
1027         return 0;
1028 }
1029
1030 static PyObject *
1031 string_item(PyStringObject *a, register int i)
1032 {
1033         PyObject *v;
1034         char *pchar;
1035         if (i < 0 || i >= a->ob_size) {
1036                 PyErr_SetString(PyExc_IndexError, "string index out of range");
1037                 return NULL;
1038         }
1039         pchar = a->ob_sval + i;
1040         v = (PyObject *)characters[*pchar & UCHAR_MAX];
1041         if (v == NULL)
1042                 v = PyString_FromStringAndSize(pchar, 1);
1043         else {
1044 #ifdef COUNT_ALLOCS
1045                 one_strings++;
1046 #endif
1047                 Py_INCREF(v);
1048         }
1049         return v;
1050 }
1051
1052 static PyObject*
1053 string_richcompare(PyStringObject *a, PyStringObject *b, int op)
1054 {
1055         int c;
1056         int len_a, len_b;
1057         int min_len;
1058         PyObject *result;
1059
1060         /* Make sure both arguments are strings. */
1061         if (!(PyString_Check(a) && PyString_Check(b))) {
1062                 result = Py_NotImplemented;
1063                 goto out;
1064         }
1065         if (a == b) {
1066                 switch (op) {
1067                 case Py_EQ:case Py_LE:case Py_GE:
1068                         result = Py_True;
1069                         goto out;
1070                 case Py_NE:case Py_LT:case Py_GT:
1071                         result = Py_False;
1072                         goto out;
1073                 }
1074         }
1075         if (op == Py_EQ) {
1076                 /* Supporting Py_NE here as well does not save
1077                    much time, since Py_NE is rarely used.  */
1078                 if (a->ob_size == b->ob_size
1079                     && (a->ob_sval[0] == b->ob_sval[0]
1080                         && memcmp(a->ob_sval, b->ob_sval,
1081                                   a->ob_size) == 0)) {
1082                         result = Py_True;
1083                 } else {
1084                         result = Py_False;
1085                 }
1086                 goto out;
1087         }
1088         len_a = a->ob_size; len_b = b->ob_size;
1089         min_len = (len_a < len_b) ? len_a : len_b;
1090         if (min_len > 0) {
1091                 c = Py_CHARMASK(*a->ob_sval) - Py_CHARMASK(*b->ob_sval);
1092                 if (c==0)
1093                         c = memcmp(a->ob_sval, b->ob_sval, min_len);
1094         }else
1095                 c = 0;
1096         if (c == 0)
1097                 c = (len_a < len_b) ? -1 : (len_a > len_b) ? 1 : 0;
1098         switch (op) {
1099         case Py_LT: c = c <  0; break;
1100         case Py_LE: c = c <= 0; break;
1101         case Py_EQ: assert(0);  break; /* unreachable */
1102         case Py_NE: c = c != 0; break;
1103         case Py_GT: c = c >  0; break;
1104         case Py_GE: c = c >= 0; break;
1105         default:
1106                 result = Py_NotImplemented;
1107                 goto out;
1108         }
1109         result = c ? Py_True : Py_False;
1110   out:
1111         Py_INCREF(result);
1112         return result;
1113 }
1114
1115 int
1116 _PyString_Eq(PyObject *o1, PyObject *o2)
1117 {
1118         PyStringObject *a, *b;
1119         a = (PyStringObject*)o1;
1120         b = (PyStringObject*)o2;
1121         return a->ob_size == b->ob_size
1122           && *a->ob_sval == *b->ob_sval
1123           && memcmp(a->ob_sval, b->ob_sval, a->ob_size) == 0;
1124 }
1125
1126 static long
1127 string_hash(PyStringObject *a)
1128 {
1129         register int len;
1130         register unsigned char *p;
1131         register long x;
1132
1133         if (a->ob_shash != -1)
1134                 return a->ob_shash;
1135         len = a->ob_size;
1136         p = (unsigned char *) a->ob_sval;
1137         x = *p << 7;
1138         while (--len >= 0)
1139                 x = (1000003*x) ^ *p++;
1140         x ^= a->ob_size;
1141         if (x == -1)
1142                 x = -2;
1143         a->ob_shash = x;
1144         return x;
1145 }
1146
1147 static PyObject*
1148 string_subscript(PyStringObject* self, PyObject* item)
1149 {
1150         if (PyInt_Check(item)) {
1151                 long i = PyInt_AS_LONG(item);
1152                 if (i < 0)
1153                         i += PyString_GET_SIZE(self);
1154                 return string_item(self,i);
1155         }
1156         else if (PyLong_Check(item)) {
1157                 long i = PyLong_AsLong(item);
1158                 if (i == -1 && PyErr_Occurred())
1159                         return NULL;
1160                 if (i < 0)
1161                         i += PyString_GET_SIZE(self);
1162                 return string_item(self,i);
1163         }
1164         else if (PySlice_Check(item)) {
1165                 int start, stop, step, slicelength, cur, i;
1166                 char* source_buf;
1167                 char* result_buf;
1168                 PyObject* result;
1169
1170                 if (PySlice_GetIndicesEx((PySliceObject*)item,
1171                                  PyString_GET_SIZE(self),
1172                                  &start, &stop, &step, &slicelength) < 0) {
1173                         return NULL;
1174                 }
1175
1176                 if (slicelength <= 0) {
1177                         return PyString_FromStringAndSize("", 0);
1178                 }
1179                 else {
1180                         source_buf = PyString_AsString((PyObject*)self);
1181                         result_buf = PyMem_Malloc(slicelength);
1182
1183                         for (cur = start, i = 0; i < slicelength;
1184                              cur += step, i++) {
1185                                 result_buf[i] = source_buf[cur];
1186                         }
1187
1188                         result = PyString_FromStringAndSize(result_buf,
1189                                                             slicelength);
1190                         PyMem_Free(result_buf);
1191                         return result;
1192                 }
1193         }
1194         else {
1195                 PyErr_SetString(PyExc_TypeError,
1196                                 "string indices must be integers");
1197                 return NULL;
1198         }
1199 }
1200
1201 static int
1202 string_buffer_getreadbuf(PyStringObject *self, int index, const void **ptr)
1203 {
1204         if ( index != 0 ) {
1205                 PyErr_SetString(PyExc_SystemError,
1206                                 "accessing non-existent string segment");
1207                 return -1;
1208         }
1209         *ptr = (void *)self->ob_sval;
1210         return self->ob_size;
1211 }
1212
1213 static int
1214 string_buffer_getwritebuf(PyStringObject *self, int index, const void **ptr)
1215 {
1216         PyErr_SetString(PyExc_TypeError,
1217                         "Cannot use string as modifiable buffer");
1218         return -1;
1219 }
1220
1221 static int
1222 string_buffer_getsegcount(PyStringObject *self, int *lenp)
1223 {
1224         if ( lenp )
1225                 *lenp = self->ob_size;
1226         return 1;
1227 }
1228
1229 static int
1230 string_buffer_getcharbuf(PyStringObject *self, int index, const char **ptr)
1231 {
1232         if ( index != 0 ) {
1233                 PyErr_SetString(PyExc_SystemError,
1234                                 "accessing non-existent string segment");
1235                 return -1;
1236         }
1237         *ptr = self->ob_sval;
1238         return self->ob_size;
1239 }
1240
1241 static PySequenceMethods string_as_sequence = {
1242         (inquiry)string_length, /*sq_length*/
1243         (binaryfunc)string_concat, /*sq_concat*/
1244         (intargfunc)string_repeat, /*sq_repeat*/
1245         (intargfunc)string_item, /*sq_item*/
1246         (intintargfunc)string_slice, /*sq_slice*/
1247         0,              /*sq_ass_item*/
1248         0,              /*sq_ass_slice*/
1249         (objobjproc)string_contains /*sq_contains*/
1250 };
1251
1252 static PyMappingMethods string_as_mapping = {
1253         (inquiry)string_length,
1254         (binaryfunc)string_subscript,
1255         0,
1256 };
1257
1258 static PyBufferProcs string_as_buffer = {
1259         (getreadbufferproc)string_buffer_getreadbuf,
1260         (getwritebufferproc)string_buffer_getwritebuf,
1261         (getsegcountproc)string_buffer_getsegcount,
1262         (getcharbufferproc)string_buffer_getcharbuf,
1263 };
1264
1265
1266 \f
1267 #define LEFTSTRIP 0
1268 #define RIGHTSTRIP 1
1269 #define BOTHSTRIP 2
1270
1271 /* Arrays indexed by above */
1272 static const char *stripformat[] = {"|O:lstrip", "|O:rstrip", "|O:strip"};
1273
1274 #define STRIPNAME(i) (stripformat[i]+3)
1275
1276
1277 static PyObject *
1278 split_whitespace(const char *s, int len, int maxsplit)
1279 {
1280         int i, j, err;
1281         PyObject* item;
1282         PyObject *list = PyList_New(0);
1283
1284         if (list == NULL)
1285                 return NULL;
1286
1287         for (i = j = 0; i < len; ) {
1288                 while (i < len && isspace(Py_CHARMASK(s[i])))
1289                         i++;
1290                 j = i;
1291                 while (i < len && !isspace(Py_CHARMASK(s[i])))
1292                         i++;
1293                 if (j < i) {
1294                         if (maxsplit-- <= 0)
1295                                 break;
1296                         item = PyString_FromStringAndSize(s+j, (int)(i-j));
1297                         if (item == NULL)
1298                                 goto finally;
1299                         err = PyList_Append(list, item);
1300                         Py_DECREF(item);
1301                         if (err < 0)
1302                                 goto finally;
1303                         while (i < len && isspace(Py_CHARMASK(s[i])))
1304                                 i++;
1305                         j = i;
1306                 }
1307         }
1308         if (j < len) {
1309                 item = PyString_FromStringAndSize(s+j, (int)(len - j));
1310                 if (item == NULL)
1311                         goto finally;
1312                 err = PyList_Append(list, item);
1313                 Py_DECREF(item);
1314                 if (err < 0)
1315                         goto finally;
1316         }
1317         return list;
1318   finally:
1319         Py_DECREF(list);
1320         return NULL;
1321 }
1322
1323
1324 PyDoc_STRVAR(split__doc__,
1325 "S.split([sep [,maxsplit]]) -> list of strings\n\
1326 \n\
1327 Return a list of the words in the string S, using sep as the\n\
1328 delimiter string.  If maxsplit is given, at most maxsplit\n\
1329 splits are done. If sep is not specified or is None, any\n\
1330 whitespace string is a separator.");
1331
1332 static PyObject *
1333 string_split(PyStringObject *self, PyObject *args)
1334 {
1335         int len = PyString_GET_SIZE(self), n, i, j, err;
1336         int maxsplit = -1;
1337         const char *s = PyString_AS_STRING(self), *sub;
1338         PyObject *list, *item, *subobj = Py_None;
1339
1340         if (!PyArg_ParseTuple(args, "|Oi:split", &subobj, &maxsplit))
1341                 return NULL;
1342         if (maxsplit < 0)
1343                 maxsplit = INT_MAX;
1344         if (subobj == Py_None)
1345                 return split_whitespace(s, len, maxsplit);
1346         if (PyString_Check(subobj)) {
1347                 sub = PyString_AS_STRING(subobj);
1348                 n = PyString_GET_SIZE(subobj);
1349         }
1350 #ifdef Py_USING_UNICODE
1351         else if (PyUnicode_Check(subobj))
1352                 return PyUnicode_Split((PyObject *)self, subobj, maxsplit);
1353 #endif
1354         else if (PyObject_AsCharBuffer(subobj, &sub, &n))
1355                 return NULL;
1356         if (n == 0) {
1357                 PyErr_SetString(PyExc_ValueError, "empty separator");
1358                 return NULL;
1359         }
1360
1361         list = PyList_New(0);
1362         if (list == NULL)
1363                 return NULL;
1364
1365         i = j = 0;
1366         while (i+n <= len) {
1367                 if (s[i] == sub[0] && memcmp(s+i, sub, n) == 0) {
1368                         if (maxsplit-- <= 0)
1369                                 break;
1370                         item = PyString_FromStringAndSize(s+j, (int)(i-j));
1371                         if (item == NULL)
1372                                 goto fail;
1373                         err = PyList_Append(list, item);
1374                         Py_DECREF(item);
1375                         if (err < 0)
1376                                 goto fail;
1377                         i = j = i + n;
1378                 }
1379                 else
1380                         i++;
1381         }
1382         item = PyString_FromStringAndSize(s+j, (int)(len-j));
1383         if (item == NULL)
1384                 goto fail;
1385         err = PyList_Append(list, item);
1386         Py_DECREF(item);
1387         if (err < 0)
1388                 goto fail;
1389
1390         return list;
1391
1392  fail:
1393         Py_DECREF(list);
1394         return NULL;
1395 }
1396
1397
1398 PyDoc_STRVAR(join__doc__,
1399 "S.join(sequence) -> string\n\
1400 \n\
1401 Return a string which is the concatenation of the strings in the\n\
1402 sequence.  The separator between elements is S.");
1403
1404 static PyObject *
1405 string_join(PyStringObject *self, PyObject *orig)
1406 {
1407         char *sep = PyString_AS_STRING(self);
1408         const int seplen = PyString_GET_SIZE(self);
1409         PyObject *res = NULL;
1410         char *p;
1411         int seqlen = 0;
1412         size_t sz = 0;
1413         int i;
1414         PyObject *seq, *item;
1415
1416         seq = PySequence_Fast(orig, "");
1417         if (seq == NULL) {
1418                 if (PyErr_ExceptionMatches(PyExc_TypeError))
1419                         PyErr_Format(PyExc_TypeError,
1420                                      "sequence expected, %.80s found",
1421                                      orig->ob_type->tp_name);
1422                 return NULL;
1423         }
1424
1425         seqlen = PySequence_Size(seq);
1426         if (seqlen == 0) {
1427                 Py_DECREF(seq);
1428                 return PyString_FromString("");
1429         }
1430         if (seqlen == 1) {
1431                 item = PySequence_Fast_GET_ITEM(seq, 0);
1432                 if (!PyString_Check(item) && !PyUnicode_Check(item)) {
1433                         PyErr_Format(PyExc_TypeError,
1434                                      "sequence item 0: expected string,"
1435                                      " %.80s found",
1436                                      item->ob_type->tp_name);
1437                         Py_DECREF(seq);
1438                         return NULL;
1439                 }
1440                 Py_INCREF(item);
1441                 Py_DECREF(seq);
1442                 return item;
1443         }
1444
1445         /* There are at least two things to join.  Do a pre-pass to figure out
1446          * the total amount of space we'll need (sz), see whether any argument
1447          * is absurd, and defer to the Unicode join if appropriate.
1448          */
1449         for (i = 0; i < seqlen; i++) {
1450                 const size_t old_sz = sz;
1451                 item = PySequence_Fast_GET_ITEM(seq, i);
1452                 if (!PyString_Check(item)){
1453 #ifdef Py_USING_UNICODE
1454                         if (PyUnicode_Check(item)) {
1455                                 /* Defer to Unicode join.
1456                                  * CAUTION:  There's no gurantee that the
1457                                  * original sequence can be iterated over
1458                                  * again, so we must pass seq here.
1459                                  */
1460                                 PyObject *result;
1461                                 result = PyUnicode_Join((PyObject *)self, seq);
1462                                 Py_DECREF(seq);
1463                                 return result;
1464                         }
1465 #endif
1466                         PyErr_Format(PyExc_TypeError,
1467                                      "sequence item %i: expected string,"
1468                                      " %.80s found",
1469                                      i, item->ob_type->tp_name);
1470                         Py_DECREF(seq);
1471                         return NULL;
1472                 }
1473                 sz += PyString_GET_SIZE(item);
1474                 if (i != 0)
1475                         sz += seplen;
1476                 if (sz < old_sz || sz > INT_MAX) {
1477                         PyErr_SetString(PyExc_OverflowError,
1478                                 "join() is too long for a Python string");
1479                         Py_DECREF(seq);
1480                         return NULL;
1481                 }
1482         }
1483
1484         /* Allocate result space. */
1485         res = PyString_FromStringAndSize((char*)NULL, (int)sz);
1486         if (res == NULL) {
1487                 Py_DECREF(seq);
1488                 return NULL;
1489         }
1490
1491         /* Catenate everything. */
1492         p = PyString_AS_STRING(res);
1493         for (i = 0; i < seqlen; ++i) {
1494                 size_t n;
1495                 item = PySequence_Fast_GET_ITEM(seq, i);
1496                 n = PyString_GET_SIZE(item);
1497                 memcpy(p, PyString_AS_STRING(item), n);
1498                 p += n;
1499                 if (i < seqlen - 1) {
1500                         memcpy(p, sep, seplen);
1501                         p += seplen;
1502                 }
1503         }
1504
1505         Py_DECREF(seq);
1506         return res;
1507 }
1508
1509 PyObject *
1510 _PyString_Join(PyObject *sep, PyObject *x)
1511 {
1512         assert(sep != NULL && PyString_Check(sep));
1513         assert(x != NULL);
1514         return string_join((PyStringObject *)sep, x);
1515 }
1516
1517 static void
1518 string_adjust_indices(int *start, int *end, int len)
1519 {
1520         if (*end > len)
1521                 *end = len;
1522         else if (*end < 0)
1523                 *end += len;
1524         if (*end < 0)
1525                 *end = 0;
1526         if (*start < 0)
1527                 *start += len;
1528         if (*start < 0)
1529                 *start = 0;
1530 }
1531
1532 static long
1533 string_find_internal(PyStringObject *self, PyObject *args, int dir)
1534 {
1535         const char *s = PyString_AS_STRING(self), *sub;
1536         int len = PyString_GET_SIZE(self);
1537         int n, i = 0, last = INT_MAX;
1538         PyObject *subobj;
1539
1540         if (!PyArg_ParseTuple(args, "O|O&O&:find/rfind/index/rindex",
1541                 &subobj, _PyEval_SliceIndex, &i, _PyEval_SliceIndex, &last))
1542                 return -2;
1543         if (PyString_Check(subobj)) {
1544                 sub = PyString_AS_STRING(subobj);
1545                 n = PyString_GET_SIZE(subobj);
1546         }
1547 #ifdef Py_USING_UNICODE
1548         else if (PyUnicode_Check(subobj))
1549                 return PyUnicode_Find((PyObject *)self, subobj, i, last, dir);
1550 #endif
1551         else if (PyObject_AsCharBuffer(subobj, &sub, &n))
1552                 return -2;
1553
1554         string_adjust_indices(&i, &last, len);
1555
1556         if (dir > 0) {
1557                 if (n == 0 && i <= last)
1558                         return (long)i;
1559                 last -= n;
1560                 for (; i <= last; ++i)
1561                         if (s[i] == sub[0] && memcmp(&s[i], sub, n) == 0)
1562                                 return (long)i;
1563         }
1564         else {
1565                 int j;
1566
1567                 if (n == 0 && i <= last)
1568                         return (long)last;
1569                 for (j = last-n; j >= i; --j)
1570                         if (s[j] == sub[0] && memcmp(&s[j], sub, n) == 0)
1571                                 return (long)j;
1572         }
1573
1574         return -1;
1575 }
1576
1577
1578 PyDoc_STRVAR(find__doc__,
1579 "S.find(sub [,start [,end]]) -> int\n\
1580 \n\
1581 Return the lowest index in S where substring sub is found,\n\
1582 such that sub is contained within s[start,end].  Optional\n\
1583 arguments start and end are interpreted as in slice notation.\n\
1584 \n\
1585 Return -1 on failure.");
1586
1587 static PyObject *
1588 string_find(PyStringObject *self, PyObject *args)
1589 {
1590         long result = string_find_internal(self, args, +1);
1591         if (result == -2)
1592                 return NULL;
1593         return PyInt_FromLong(result);
1594 }
1595
1596
1597 PyDoc_STRVAR(index__doc__,
1598 "S.index(sub [,start [,end]]) -> int\n\
1599 \n\
1600 Like S.find() but raise ValueError when the substring is not found.");
1601
1602 static PyObject *
1603 string_index(PyStringObject *self, PyObject *args)
1604 {
1605         long result = string_find_internal(self, args, +1);
1606         if (result == -2)
1607                 return NULL;
1608         if (result == -1) {
1609                 PyErr_SetString(PyExc_ValueError,
1610                                 "substring not found in string.index");
1611                 return NULL;
1612         }
1613         return PyInt_FromLong(result);
1614 }
1615
1616
1617 PyDoc_STRVAR(rfind__doc__,
1618 "S.rfind(sub [,start [,end]]) -> int\n\
1619 \n\
1620 Return the highest index in S where substring sub is found,\n\
1621 such that sub is contained within s[start,end].  Optional\n\
1622 arguments start and end are interpreted as in slice notation.\n\
1623 \n\
1624 Return -1 on failure.");
1625
1626 static PyObject *
1627 string_rfind(PyStringObject *self, PyObject *args)
1628 {
1629         long result = string_find_internal(self, args, -1);
1630         if (result == -2)
1631                 return NULL;
1632         return PyInt_FromLong(result);
1633 }
1634
1635
1636 PyDoc_STRVAR(rindex__doc__,
1637 "S.rindex(sub [,start [,end]]) -> int\n\
1638 \n\
1639 Like S.rfind() but raise ValueError when the substring is not found.");
1640
1641 static PyObject *
1642 string_rindex(PyStringObject *self, PyObject *args)
1643 {
1644         long result = string_find_internal(self, args, -1);
1645         if (result == -2)
1646                 return NULL;
1647         if (result == -1) {
1648                 PyErr_SetString(PyExc_ValueError,
1649                                 "substring not found in string.rindex");
1650                 return NULL;
1651         }
1652         return PyInt_FromLong(result);
1653 }
1654
1655
1656 static PyObject *
1657 do_xstrip(PyStringObject *self, int striptype, PyObject *sepobj)
1658 {
1659         char *s = PyString_AS_STRING(self);
1660         int len = PyString_GET_SIZE(self);
1661         char *sep = PyString_AS_STRING(sepobj);
1662         int seplen = PyString_GET_SIZE(sepobj);
1663         int i, j;
1664
1665         i = 0;
1666         if (striptype != RIGHTSTRIP) {
1667                 while (i < len && memchr(sep, Py_CHARMASK(s[i]), seplen)) {
1668                         i++;
1669                 }
1670         }
1671
1672         j = len;
1673         if (striptype != LEFTSTRIP) {
1674                 do {
1675                         j--;
1676                 } while (j >= i && memchr(sep, Py_CHARMASK(s[j]), seplen));
1677                 j++;
1678         }
1679
1680         if (i == 0 && j == len && PyString_CheckExact(self)) {
1681                 Py_INCREF(self);
1682                 return (PyObject*)self;
1683         }
1684         else
1685                 return PyString_FromStringAndSize(s+i, j-i);
1686 }
1687
1688
1689 static PyObject *
1690 do_strip(PyStringObject *self, int striptype)
1691 {
1692         char *s = PyString_AS_STRING(self);
1693         int len = PyString_GET_SIZE(self), i, j;
1694
1695         i = 0;
1696         if (striptype != RIGHTSTRIP) {
1697                 while (i < len && isspace(Py_CHARMASK(s[i]))) {
1698                         i++;
1699                 }
1700         }
1701
1702         j = len;
1703         if (striptype != LEFTSTRIP) {
1704                 do {
1705                         j--;
1706                 } while (j >= i && isspace(Py_CHARMASK(s[j])));
1707                 j++;
1708         }
1709
1710         if (i == 0 && j == len && PyString_CheckExact(self)) {
1711                 Py_INCREF(self);
1712                 return (PyObject*)self;
1713         }
1714         else
1715                 return PyString_FromStringAndSize(s+i, j-i);
1716 }
1717
1718
1719 static PyObject *
1720 do_argstrip(PyStringObject *self, int striptype, PyObject *args)
1721 {
1722         PyObject *sep = NULL;
1723
1724         if (!PyArg_ParseTuple(args, (char *)stripformat[striptype], &sep))
1725                 return NULL;
1726
1727         if (sep != NULL && sep != Py_None) {
1728                 if (PyString_Check(sep))
1729                         return do_xstrip(self, striptype, sep);
1730 #ifdef Py_USING_UNICODE
1731                 else if (PyUnicode_Check(sep)) {
1732                         PyObject *uniself = PyUnicode_FromObject((PyObject *)self);
1733                         PyObject *res;
1734                         if (uniself==NULL)
1735                                 return NULL;
1736                         res = _PyUnicode_XStrip((PyUnicodeObject *)uniself,
1737                                 striptype, sep);
1738                         Py_DECREF(uniself);
1739                         return res;
1740                 }
1741 #endif
1742                 else {
1743                         PyErr_Format(PyExc_TypeError,
1744 #ifdef Py_USING_UNICODE
1745                                      "%s arg must be None, str or unicode",
1746 #else
1747                                      "%s arg must be None or str",
1748 #endif
1749                                      STRIPNAME(striptype));
1750                         return NULL;
1751                 }
1752                 return do_xstrip(self, striptype, sep);
1753         }
1754
1755         return do_strip(self, striptype);
1756 }
1757
1758
1759 PyDoc_STRVAR(strip__doc__,
1760 "S.strip([sep]) -> string or unicode\n\
1761 \n\
1762 Return a copy of the string S with leading and trailing\n\
1763 whitespace removed.\n\
1764 If sep is given and not None, remove characters in sep instead.\n\
1765 If sep is unicode, S will be converted to unicode before stripping");
1766
1767 static PyObject *
1768 string_strip(PyStringObject *self, PyObject *args)
1769 {
1770         if (PyTuple_GET_SIZE(args) == 0)
1771                 return do_strip(self, BOTHSTRIP); /* Common case */
1772         else
1773                 return do_argstrip(self, BOTHSTRIP, args);
1774 }
1775
1776
1777 PyDoc_STRVAR(lstrip__doc__,
1778 "S.lstrip([sep]) -> string or unicode\n\
1779 \n\
1780 Return a copy of the string S with leading whitespace removed.\n\
1781 If sep is given and not None, remove characters in sep instead.\n\
1782 If sep is unicode, S will be converted to unicode before stripping");
1783
1784 static PyObject *
1785 string_lstrip(PyStringObject *self, PyObject *args)
1786 {
1787         if (PyTuple_GET_SIZE(args) == 0)
1788                 return do_strip(self, LEFTSTRIP); /* Common case */
1789         else
1790                 return do_argstrip(self, LEFTSTRIP, args);
1791 }
1792
1793
1794 PyDoc_STRVAR(rstrip__doc__,
1795 "S.rstrip([sep]) -> string or unicode\n\
1796 \n\
1797 Return a copy of the string S with trailing whitespace removed.\n\
1798 If sep is given and not None, remove characters in sep instead.\n\
1799 If sep is unicode, S will be converted to unicode before stripping");
1800
1801 static PyObject *
1802 string_rstrip(PyStringObject *self, PyObject *args)
1803 {
1804         if (PyTuple_GET_SIZE(args) == 0)
1805                 return do_strip(self, RIGHTSTRIP); /* Common case */
1806         else
1807                 return do_argstrip(self, RIGHTSTRIP, args);
1808 }
1809
1810
1811 PyDoc_STRVAR(lower__doc__,
1812 "S.lower() -> string\n\
1813 \n\
1814 Return a copy of the string S converted to lowercase.");
1815
1816 static PyObject *
1817 string_lower(PyStringObject *self)
1818 {
1819         char *s = PyString_AS_STRING(self), *s_new;
1820         int i, n = PyString_GET_SIZE(self);
1821         PyObject *new;
1822
1823         new = PyString_FromStringAndSize(NULL, n);
1824         if (new == NULL)
1825                 return NULL;
1826         s_new = PyString_AsString(new);
1827         for (i = 0; i < n; i++) {
1828                 int c = Py_CHARMASK(*s++);
1829                 if (isupper(c)) {
1830                         *s_new = tolower(c);
1831                 } else
1832                         *s_new = c;
1833                 s_new++;
1834         }
1835         return new;
1836 }
1837
1838
1839 PyDoc_STRVAR(upper__doc__,
1840 "S.upper() -> string\n\
1841 \n\
1842 Return a copy of the string S converted to uppercase.");
1843
1844 static PyObject *
1845 string_upper(PyStringObject *self)
1846 {
1847         char *s = PyString_AS_STRING(self), *s_new;
1848         int i, n = PyString_GET_SIZE(self);
1849         PyObject *new;
1850
1851         new = PyString_FromStringAndSize(NULL, n);
1852         if (new == NULL)
1853                 return NULL;
1854         s_new = PyString_AsString(new);
1855         for (i = 0; i < n; i++) {
1856                 int c = Py_CHARMASK(*s++);
1857                 if (islower(c)) {
1858                         *s_new = toupper(c);
1859                 } else
1860                         *s_new = c;
1861                 s_new++;
1862         }
1863         return new;
1864 }
1865
1866
1867 PyDoc_STRVAR(title__doc__,
1868 "S.title() -> string\n\
1869 \n\
1870 Return a titlecased version of S, i.e. words start with uppercase\n\
1871 characters, all remaining cased characters have lowercase.");
1872
1873 static PyObject*
1874 string_title(PyStringObject *self)
1875 {
1876         char *s = PyString_AS_STRING(self), *s_new;
1877         int i, n = PyString_GET_SIZE(self);
1878         int previous_is_cased = 0;
1879         PyObject *new;
1880
1881         new = PyString_FromStringAndSize(NULL, n);
1882         if (new == NULL)
1883                 return NULL;
1884         s_new = PyString_AsString(new);
1885         for (i = 0; i < n; i++) {
1886                 int c = Py_CHARMASK(*s++);
1887                 if (islower(c)) {
1888                         if (!previous_is_cased)
1889                             c = toupper(c);
1890                         previous_is_cased = 1;
1891                 } else if (isupper(c)) {
1892                         if (previous_is_cased)
1893                             c = tolower(c);
1894                         previous_is_cased = 1;
1895                 } else
1896                         previous_is_cased = 0;
1897                 *s_new++ = c;
1898         }
1899         return new;
1900 }
1901
1902 PyDoc_STRVAR(capitalize__doc__,
1903 "S.capitalize() -> string\n\
1904 \n\
1905 Return a copy of the string S with only its first character\n\
1906 capitalized.");
1907
1908 static PyObject *
1909 string_capitalize(PyStringObject *self)
1910 {
1911         char *s = PyString_AS_STRING(self), *s_new;
1912         int i, n = PyString_GET_SIZE(self);
1913         PyObject *new;
1914
1915         new = PyString_FromStringAndSize(NULL, n);
1916         if (new == NULL)
1917                 return NULL;
1918         s_new = PyString_AsString(new);
1919         if (0 < n) {
1920                 int c = Py_CHARMASK(*s++);
1921                 if (islower(c))
1922                         *s_new = toupper(c);
1923                 else
1924                         *s_new = c;
1925                 s_new++;
1926         }
1927         for (i = 1; i < n; i++) {
1928                 int c = Py_CHARMASK(*s++);
1929                 if (isupper(c))
1930                         *s_new = tolower(c);
1931                 else
1932                         *s_new = c;
1933                 s_new++;
1934         }
1935         return new;
1936 }
1937
1938
1939 PyDoc_STRVAR(count__doc__,
1940 "S.count(sub[, start[, end]]) -> int\n\
1941 \n\
1942 Return the number of occurrences of substring sub in string\n\
1943 S[start:end].  Optional arguments start and end are\n\
1944 interpreted as in slice notation.");
1945
1946 static PyObject *
1947 string_count(PyStringObject *self, PyObject *args)
1948 {
1949         const char *s = PyString_AS_STRING(self), *sub;
1950         int len = PyString_GET_SIZE(self), n;
1951         int i = 0, last = INT_MAX;
1952         int m, r;
1953         PyObject *subobj;
1954
1955         if (!PyArg_ParseTuple(args, "O|O&O&:count", &subobj,
1956                 _PyEval_SliceIndex, &i, _PyEval_SliceIndex, &last))
1957                 return NULL;
1958
1959         if (PyString_Check(subobj)) {
1960                 sub = PyString_AS_STRING(subobj);
1961                 n = PyString_GET_SIZE(subobj);
1962         }
1963 #ifdef Py_USING_UNICODE
1964         else if (PyUnicode_Check(subobj)) {
1965                 int count;
1966                 count = PyUnicode_Count((PyObject *)self, subobj, i, last);
1967                 if (count == -1)
1968                         return NULL;
1969                 else
1970                         return PyInt_FromLong((long) count);
1971         }
1972 #endif
1973         else if (PyObject_AsCharBuffer(subobj, &sub, &n))
1974                 return NULL;
1975
1976         string_adjust_indices(&i, &last, len);
1977
1978         m = last + 1 - n;
1979         if (n == 0)
1980                 return PyInt_FromLong((long) (m-i));
1981
1982         r = 0;
1983         while (i < m) {
1984                 if (!memcmp(s+i, sub, n)) {
1985                         r++;
1986                         i += n;
1987                 } else {
1988                         i++;
1989                 }
1990         }
1991         return PyInt_FromLong((long) r);
1992 }
1993
1994
1995 PyDoc_STRVAR(swapcase__doc__,
1996 "S.swapcase() -> string\n\
1997 \n\
1998 Return a copy of the string S with uppercase characters\n\
1999 converted to lowercase and vice versa.");
2000
2001 static PyObject *
2002 string_swapcase(PyStringObject *self)
2003 {
2004         char *s = PyString_AS_STRING(self), *s_new;
2005         int i, n = PyString_GET_SIZE(self);
2006         PyObject *new;
2007
2008         new = PyString_FromStringAndSize(NULL, n);
2009         if (new == NULL)
2010                 return NULL;
2011         s_new = PyString_AsString(new);
2012         for (i = 0; i < n; i++) {
2013                 int c = Py_CHARMASK(*s++);
2014                 if (islower(c)) {
2015                         *s_new = toupper(c);
2016                 }
2017                 else if (isupper(c)) {
2018                         *s_new = tolower(c);
2019                 }
2020                 else
2021                         *s_new = c;
2022                 s_new++;
2023         }
2024         return new;
2025 }
2026
2027
2028 PyDoc_STRVAR(translate__doc__,
2029 "S.translate(table [,deletechars]) -> string\n\
2030 \n\
2031 Return a copy of the string S, where all characters occurring\n\
2032 in the optional argument deletechars are removed, and the\n\
2033 remaining characters have been mapped through the given\n\
2034 translation table, which must be a string of length 256.");
2035
2036 static PyObject *
2037 string_translate(PyStringObject *self, PyObject *args)
2038 {
2039         register char *input, *output;
2040         register const char *table;
2041         register int i, c, changed = 0;
2042         PyObject *input_obj = (PyObject*)self;
2043         const char *table1, *output_start, *del_table=NULL;
2044         int inlen, tablen, dellen = 0;
2045         PyObject *result;
2046         int trans_table[256];
2047         PyObject *tableobj, *delobj = NULL;
2048
2049         if (!PyArg_UnpackTuple(args, "translate", 1, 2,
2050                               &tableobj, &delobj))
2051                 return NULL;
2052
2053         if (PyString_Check(tableobj)) {
2054                 table1 = PyString_AS_STRING(tableobj);
2055                 tablen = PyString_GET_SIZE(tableobj);
2056         }
2057 #ifdef Py_USING_UNICODE
2058         else if (PyUnicode_Check(tableobj)) {
2059                 /* Unicode .translate() does not support the deletechars
2060                    parameter; instead a mapping to None will cause characters
2061                    to be deleted. */
2062                 if (delobj != NULL) {
2063                         PyErr_SetString(PyExc_TypeError,
2064                         "deletions are implemented differently for unicode");
2065                         return NULL;
2066                 }
2067                 return PyUnicode_Translate((PyObject *)self, tableobj, NULL);
2068         }
2069 #endif
2070         else if (PyObject_AsCharBuffer(tableobj, &table1, &tablen))
2071                 return NULL;
2072
2073         if (tablen != 256) {
2074                 PyErr_SetString(PyExc_ValueError,
2075                   "translation table must be 256 characters long");
2076                 return NULL;
2077         }
2078
2079         if (delobj != NULL) {
2080                 if (PyString_Check(delobj)) {
2081                         del_table = PyString_AS_STRING(delobj);
2082                         dellen = PyString_GET_SIZE(delobj);
2083                 }
2084 #ifdef Py_USING_UNICODE
2085                 else if (PyUnicode_Check(delobj)) {
2086                         PyErr_SetString(PyExc_TypeError,
2087                         "deletions are implemented differently for unicode");
2088                         return NULL;
2089                 }
2090 #endif
2091                 else if (PyObject_AsCharBuffer(delobj, &del_table, &dellen))
2092                         return NULL;
2093         }
2094         else {
2095                 del_table = NULL;
2096                 dellen = 0;
2097         }
2098
2099         table = table1;
2100         inlen = PyString_Size(input_obj);
2101         result = PyString_FromStringAndSize((char *)NULL, inlen);
2102         if (result == NULL)
2103                 return NULL;
2104         output_start = output = PyString_AsString(result);
2105         input = PyString_AsString(input_obj);
2106
2107         if (dellen == 0) {
2108                 /* If no deletions are required, use faster code */
2109                 for (i = inlen; --i >= 0; ) {
2110                         c = Py_CHARMASK(*input++);
2111                         if (Py_CHARMASK((*output++ = table[c])) != c)
2112                                 changed = 1;
2113                 }
2114                 if (changed || !PyString_CheckExact(input_obj))
2115                         return result;
2116                 Py_DECREF(result);
2117                 Py_INCREF(input_obj);
2118                 return input_obj;
2119         }
2120
2121         for (i = 0; i < 256; i++)
2122                 trans_table[i] = Py_CHARMASK(table[i]);
2123
2124         for (i = 0; i < dellen; i++)
2125                 trans_table[(int) Py_CHARMASK(del_table[i])] = -1;
2126
2127         for (i = inlen; --i >= 0; ) {
2128                 c = Py_CHARMASK(*input++);
2129                 if (trans_table[c] != -1)
2130                         if (Py_CHARMASK(*output++ = (char)trans_table[c]) == c)
2131                                 continue;
2132                 changed = 1;
2133         }
2134         if (!changed && PyString_CheckExact(input_obj)) {
2135                 Py_DECREF(result);
2136                 Py_INCREF(input_obj);
2137                 return input_obj;
2138         }
2139         /* Fix the size of the resulting string */
2140         if (inlen > 0)
2141                 _PyString_Resize(&result, output - output_start);
2142         return result;
2143 }
2144
2145
2146 /* What follows is used for implementing replace().  Perry Stoll. */
2147
2148 /*
2149   mymemfind
2150
2151   strstr replacement for arbitrary blocks of memory.
2152
2153   Locates the first occurrence in the memory pointed to by MEM of the
2154   contents of memory pointed to by PAT.  Returns the index into MEM if
2155   found, or -1 if not found.  If len of PAT is greater than length of
2156   MEM, the function returns -1.
2157 */
2158 static int
2159 mymemfind(const char *mem, int len, const char *pat, int pat_len)
2160 {
2161         register int ii;
2162
2163         /* pattern can not occur in the last pat_len-1 chars */
2164         len -= pat_len;
2165
2166         for (ii = 0; ii <= len; ii++) {
2167                 if (mem[ii] == pat[0] && memcmp(&mem[ii], pat, pat_len) == 0) {
2168                         return ii;
2169                 }
2170         }
2171         return -1;
2172 }
2173
2174 /*
2175   mymemcnt
2176
2177    Return the number of distinct times PAT is found in MEM.
2178    meaning mem=1111 and pat==11 returns 2.
2179            mem=11111 and pat==11 also return 2.
2180  */
2181 static int
2182 mymemcnt(const char *mem, int len, const char *pat, int pat_len)
2183 {
2184         register int offset = 0;
2185         int nfound = 0;
2186
2187         while (len >= 0) {
2188                 offset = mymemfind(mem, len, pat, pat_len);
2189                 if (offset == -1)
2190                         break;
2191                 mem += offset + pat_len;
2192                 len -= offset + pat_len;
2193                 nfound++;
2194         }
2195         return nfound;
2196 }
2197
2198 /*
2199    mymemreplace
2200
2201    Return a string in which all occurrences of PAT in memory STR are
2202    replaced with SUB.
2203
2204    If length of PAT is less than length of STR or there are no occurrences
2205    of PAT in STR, then the original string is returned. Otherwise, a new
2206    string is allocated here and returned.
2207
2208    on return, out_len is:
2209        the length of output string, or
2210        -1 if the input string is returned, or
2211        unchanged if an error occurs (no memory).
2212
2213    return value is:
2214        the new string allocated locally, or
2215        NULL if an error occurred.
2216 */
2217 static char *
2218 mymemreplace(const char *str, int len,          /* input string */
2219              const char *pat, int pat_len,      /* pattern string to find */
2220              const char *sub, int sub_len,      /* substitution string */
2221              int count,                         /* number of replacements */
2222              int *out_len)
2223 {
2224         char *out_s;
2225         char *new_s;
2226         int nfound, offset, new_len;
2227
2228         if (len == 0 || (pat_len == 0 && sub_len == 0) || pat_len > len)
2229                 goto return_same;
2230
2231         /* find length of output string */
2232         nfound = (pat_len > 0) ? mymemcnt(str, len, pat, pat_len) : len + 1;
2233         if (count < 0)
2234                 count = INT_MAX;
2235         else if (nfound > count)
2236                 nfound = count;
2237         if (nfound == 0)
2238                 goto return_same;
2239
2240         new_len = len + nfound*(sub_len - pat_len);
2241         if (new_len == 0) {
2242                 /* Have to allocate something for the caller to free(). */
2243                 out_s = (char *)PyMem_MALLOC(1);
2244                 if (out_s == NULL)
2245                         return NULL;
2246                 out_s[0] = '\0';
2247         }
2248         else {
2249                 assert(new_len > 0);
2250                 new_s = (char *)PyMem_MALLOC(new_len);
2251                 if (new_s == NULL)
2252                         return NULL;
2253                 out_s = new_s;
2254
2255                 if (pat_len > 0) {
2256                         for (; nfound > 0; --nfound) {
2257                                 /* find index of next instance of pattern */
2258                                 offset = mymemfind(str, len, pat, pat_len);
2259                                 if (offset == -1)
2260                                         break;
2261
2262                                 /* copy non matching part of input string */
2263                                 memcpy(new_s, str, offset);
2264                                 str += offset + pat_len;
2265                                 len -= offset + pat_len;
2266
2267                                 /* copy substitute into the output string */
2268                                 new_s += offset;
2269                                 memcpy(new_s, sub, sub_len);
2270                                 new_s += sub_len;
2271                         }
2272                         /* copy any remaining values into output string */
2273                         if (len > 0)
2274                                 memcpy(new_s, str, len);
2275                 }
2276                 else {
2277                         for (;;++str, --len) {
2278                                 memcpy(new_s, sub, sub_len);
2279                                 new_s += sub_len;
2280                                 if (--nfound <= 0) {
2281                                         memcpy(new_s, str, len);
2282                                         break;
2283                                 }
2284                                 *new_s++ = *str;
2285                         }
2286                 }
2287         }
2288         *out_len = new_len;
2289         return out_s;
2290
2291   return_same:
2292         *out_len = -1;
2293         return (char *)str; /* cast away const */
2294 }
2295
2296
2297 PyDoc_STRVAR(replace__doc__,
2298 "S.replace (old, new[, maxsplit]) -> string\n\
2299 \n\
2300 Return a copy of string S with all occurrences of substring\n\
2301 old replaced by new.  If the optional argument maxsplit is\n\
2302 given, only the first maxsplit occurrences are replaced.");
2303
2304 static PyObject *
2305 string_replace(PyStringObject *self, PyObject *args)
2306 {
2307         const char *str = PyString_AS_STRING(self), *sub, *repl;
2308         char *new_s;
2309         const int len = PyString_GET_SIZE(self);
2310         int sub_len, repl_len, out_len;
2311         int count = -1;
2312         PyObject *new;
2313         PyObject *subobj, *replobj;
2314
2315         if (!PyArg_ParseTuple(args, "OO|i:replace",
2316                               &subobj, &replobj, &count))
2317                 return NULL;
2318
2319         if (PyString_Check(subobj)) {
2320                 sub = PyString_AS_STRING(subobj);
2321                 sub_len = PyString_GET_SIZE(subobj);
2322         }
2323 #ifdef Py_USING_UNICODE
2324         else if (PyUnicode_Check(subobj))
2325                 return PyUnicode_Replace((PyObject *)self,
2326                                          subobj, replobj, count);
2327 #endif
2328         else if (PyObject_AsCharBuffer(subobj, &sub, &sub_len))
2329                 return NULL;
2330
2331         if (PyString_Check(replobj)) {
2332                 repl = PyString_AS_STRING(replobj);
2333                 repl_len = PyString_GET_SIZE(replobj);
2334         }
2335 #ifdef Py_USING_UNICODE
2336         else if (PyUnicode_Check(replobj))
2337                 return PyUnicode_Replace((PyObject *)self,
2338                                          subobj, replobj, count);
2339 #endif
2340         else if (PyObject_AsCharBuffer(replobj, &repl, &repl_len))
2341                 return NULL;
2342
2343         new_s = mymemreplace(str,len,sub,sub_len,repl,repl_len,count,&out_len);
2344         if (new_s == NULL) {
2345                 PyErr_NoMemory();
2346                 return NULL;
2347         }
2348         if (out_len == -1) {
2349                 if (PyString_CheckExact(self)) {
2350                         /* we're returning another reference to self */
2351                         new = (PyObject*)self;
2352                         Py_INCREF(new);
2353                 }
2354                 else {
2355                         new = PyString_FromStringAndSize(str, len);
2356                         if (new == NULL)
2357                                 return NULL;
2358                 }
2359         }
2360         else {
2361                 new = PyString_FromStringAndSize(new_s, out_len);
2362                 PyMem_FREE(new_s);
2363         }
2364         return new;
2365 }
2366
2367
2368 PyDoc_STRVAR(startswith__doc__,
2369 "S.startswith(prefix[, start[, end]]) -> bool\n\
2370 \n\
2371 Return True if S starts with the specified prefix, False otherwise.  With\n\
2372 optional start, test S beginning at that position.  With optional end, stop\n\
2373 comparing S at that position.");
2374
2375 static PyObject *
2376 string_startswith(PyStringObject *self, PyObject *args)
2377 {
2378         const char* str = PyString_AS_STRING(self);
2379         int len = PyString_GET_SIZE(self);
2380         const char* prefix;
2381         int plen;
2382         int start = 0;
2383         int end = INT_MAX;
2384         PyObject *subobj;
2385
2386         if (!PyArg_ParseTuple(args, "O|O&O&:startswith", &subobj,
2387                 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
2388                 return NULL;
2389         if (PyString_Check(subobj)) {
2390                 prefix = PyString_AS_STRING(subobj);
2391                 plen = PyString_GET_SIZE(subobj);
2392         }
2393 #ifdef Py_USING_UNICODE
2394         else if (PyUnicode_Check(subobj)) {
2395                 int rc;
2396                 rc = PyUnicode_Tailmatch((PyObject *)self,
2397                                           subobj, start, end, -1);
2398                 if (rc == -1)
2399                         return NULL;
2400                 else
2401                         return PyBool_FromLong((long) rc);
2402         }
2403 #endif
2404         else if (PyObject_AsCharBuffer(subobj, &prefix, &plen))
2405                 return NULL;
2406
2407         string_adjust_indices(&start, &end, len);
2408
2409         if (start+plen > len)
2410                 return PyBool_FromLong(0);
2411
2412         if (end-start >= plen)
2413                 return PyBool_FromLong(!memcmp(str+start, prefix, plen));
2414         else
2415                 return PyBool_FromLong(0);
2416 }
2417
2418
2419 PyDoc_STRVAR(endswith__doc__,
2420 "S.endswith(suffix[, start[, end]]) -> bool\n\
2421 \n\
2422 Return True if S ends with the specified suffix, False otherwise.  With\n\
2423 optional start, test S beginning at that position.  With optional end, stop\n\
2424 comparing S at that position.");
2425
2426 static PyObject *
2427 string_endswith(PyStringObject *self, PyObject *args)
2428 {
2429         const char* str = PyString_AS_STRING(self);
2430         int len = PyString_GET_SIZE(self);
2431         const char* suffix;
2432         int slen;
2433         int start = 0;
2434         int end = INT_MAX;
2435         PyObject *subobj;
2436
2437         if (!PyArg_ParseTuple(args, "O|O&O&:endswith", &subobj,
2438                 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
2439                 return NULL;
2440         if (PyString_Check(subobj)) {
2441                 suffix = PyString_AS_STRING(subobj);
2442                 slen = PyString_GET_SIZE(subobj);
2443         }
2444 #ifdef Py_USING_UNICODE
2445         else if (PyUnicode_Check(subobj)) {
2446                 int rc;
2447                 rc = PyUnicode_Tailmatch((PyObject *)self,
2448                                           subobj, start, end, +1);
2449                 if (rc == -1)
2450                         return NULL;
2451                 else
2452                         return PyBool_FromLong((long) rc);
2453         }
2454 #endif
2455         else if (PyObject_AsCharBuffer(subobj, &suffix, &slen))
2456                 return NULL;
2457
2458         string_adjust_indices(&start, &end, len);
2459
2460         if (end-start < slen || start > len)
2461                 return PyBool_FromLong(0);
2462
2463         if (end-slen > start)
2464                 start = end - slen;
2465         if (end-start >= slen)
2466                 return PyBool_FromLong(!memcmp(str+start, suffix, slen));
2467         else
2468                 return PyBool_FromLong(0);
2469 }
2470
2471
2472 PyDoc_STRVAR(encode__doc__,
2473 "S.encode([encoding[,errors]]) -> object\n\
2474 \n\
2475 Encodes S using the codec registered for encoding. encoding defaults\n\
2476 to the default encoding. errors may be given to set a different error\n\
2477 handling scheme. Default is 'strict' meaning that encoding errors raise\n\
2478 a UnicodeEncodeError. Other possible values are 'ignore', 'replace' and\n\
2479 'xmlcharrefreplace' as well as any other name registered with\n\
2480 codecs.register_error that is able to handle UnicodeEncodeErrors.");
2481
2482 static PyObject *
2483 string_encode(PyStringObject *self, PyObject *args)
2484 {
2485     char *encoding = NULL;
2486     char *errors = NULL;
2487     if (!PyArg_ParseTuple(args, "|ss:encode", &encoding, &errors))
2488         return NULL;
2489     return PyString_AsEncodedObject((PyObject *)self, encoding, errors);
2490 }
2491
2492
2493 PyDoc_STRVAR(decode__doc__,
2494 "S.decode([encoding[,errors]]) -> object\n\
2495 \n\
2496 Decodes S using the codec registered for encoding. encoding defaults\n\
2497 to the default encoding. errors may be given to set a different error\n\
2498 handling scheme. Default is 'strict' meaning that encoding errors raise\n\
2499 a UnicodeDecodeError. Other possible values are 'ignore' and 'replace'\n\
2500 as well as any other name registerd with codecs.register_error that is\n\
2501 able to handle UnicodeDecodeErrors.");
2502
2503 static PyObject *
2504 string_decode(PyStringObject *self, PyObject *args)
2505 {
2506     char *encoding = NULL;
2507     char *errors = NULL;
2508     if (!PyArg_ParseTuple(args, "|ss:decode", &encoding, &errors))
2509         return NULL;
2510     return PyString_AsDecodedObject((PyObject *)self, encoding, errors);
2511 }
2512
2513
2514 PyDoc_STRVAR(expandtabs__doc__,
2515 "S.expandtabs([tabsize]) -> string\n\
2516 \n\
2517 Return a copy of S where all tab characters are expanded using spaces.\n\
2518 If tabsize is not given, a tab size of 8 characters is assumed.");
2519
2520 static PyObject*
2521 string_expandtabs(PyStringObject *self, PyObject *args)
2522 {
2523     const char *e, *p;
2524     char *q;
2525     int i, j;
2526     PyObject *u;
2527     int tabsize = 8;
2528
2529     if (!PyArg_ParseTuple(args, "|i:expandtabs", &tabsize))
2530         return NULL;
2531
2532     /* First pass: determine size of output string */
2533     i = j = 0;
2534     e = PyString_AS_STRING(self) + PyString_GET_SIZE(self);
2535     for (p = PyString_AS_STRING(self); p < e; p++)
2536         if (*p == '\t') {
2537             if (tabsize > 0)
2538                 j += tabsize - (j % tabsize);
2539         }
2540         else {
2541             j++;
2542             if (*p == '\n' || *p == '\r') {
2543                 i += j;
2544                 j = 0;
2545             }
2546         }
2547
2548     /* Second pass: create output string and fill it */
2549     u = PyString_FromStringAndSize(NULL, i + j);
2550     if (!u)
2551         return NULL;
2552
2553     j = 0;
2554     q = PyString_AS_STRING(u);
2555
2556     for (p = PyString_AS_STRING(self); p < e; p++)
2557         if (*p == '\t') {
2558             if (tabsize > 0) {
2559                 i = tabsize - (j % tabsize);
2560                 j += i;
2561                 while (i--)
2562                     *q++ = ' ';
2563             }
2564         }
2565         else {
2566             j++;
2567             *q++ = *p;
2568             if (*p == '\n' || *p == '\r')
2569                 j = 0;
2570         }
2571
2572     return u;
2573 }
2574
2575 static PyObject *
2576 pad(PyStringObject *self, int left, int right, char fill)
2577 {
2578     PyObject *u;
2579
2580     if (left < 0)
2581         left = 0;
2582     if (right < 0)
2583         right = 0;
2584
2585     if (left == 0 && right == 0 && PyString_CheckExact(self)) {
2586         Py_INCREF(self);
2587         return (PyObject *)self;
2588     }
2589
2590     u = PyString_FromStringAndSize(NULL,
2591                                    left + PyString_GET_SIZE(self) + right);
2592     if (u) {
2593         if (left)
2594             memset(PyString_AS_STRING(u), fill, left);
2595         memcpy(PyString_AS_STRING(u) + left,
2596                PyString_AS_STRING(self),
2597                PyString_GET_SIZE(self));
2598         if (right)
2599             memset(PyString_AS_STRING(u) + left + PyString_GET_SIZE(self),
2600                    fill, right);
2601     }
2602
2603     return u;
2604 }
2605
2606 PyDoc_STRVAR(ljust__doc__,
2607 "S.ljust(width) -> string\n"
2608 "\n"
2609 "Return S left justified in a string of length width. Padding is\n"
2610 "done using spaces.");
2611
2612 static PyObject *
2613 string_ljust(PyStringObject *self, PyObject *args)
2614 {
2615     int width;
2616     if (!PyArg_ParseTuple(args, "i:ljust", &width))
2617         return NULL;
2618
2619     if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) {
2620         Py_INCREF(self);
2621         return (PyObject*) self;
2622     }
2623
2624     return pad(self, 0, width - PyString_GET_SIZE(self), ' ');
2625 }
2626
2627
2628 PyDoc_STRVAR(rjust__doc__,
2629 "S.rjust(width) -> string\n"
2630 "\n"
2631 "Return S right justified in a string of length width. Padding is\n"
2632 "done using spaces.");
2633
2634 static PyObject *
2635 string_rjust(PyStringObject *self, PyObject *args)
2636 {
2637     int width;
2638     if (!PyArg_ParseTuple(args, "i:rjust", &width))
2639         return NULL;
2640
2641     if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) {
2642         Py_INCREF(self);
2643         return (PyObject*) self;
2644     }
2645
2646     return pad(self, width - PyString_GET_SIZE(self), 0, ' ');
2647 }
2648
2649
2650 PyDoc_STRVAR(center__doc__,
2651 "S.center(width) -> string\n"
2652 "\n"
2653 "Return S centered in a string of length width. Padding is done\n"
2654 "using spaces.");
2655
2656 static PyObject *
2657 string_center(PyStringObject *self, PyObject *args)
2658 {
2659     int marg, left;
2660     int width;
2661
2662     if (!PyArg_ParseTuple(args, "i:center", &width))
2663         return NULL;
2664
2665     if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) {
2666         Py_INCREF(self);
2667         return (PyObject*) self;
2668     }
2669
2670     marg = width - PyString_GET_SIZE(self);
2671     left = marg / 2 + (marg & width & 1);
2672
2673     return pad(self, left, marg - left, ' ');
2674 }
2675
2676 PyDoc_STRVAR(zfill__doc__,
2677 "S.zfill(width) -> string\n"
2678 "\n"
2679 "Pad a numeric string S with zeros on the left, to fill a field\n"
2680 "of the specified width.  The string S is never truncated.");
2681
2682 static PyObject *
2683 string_zfill(PyStringObject *self, PyObject *args)
2684 {
2685     int fill;
2686     PyObject *s;
2687     char *p;
2688
2689     int width;
2690     if (!PyArg_ParseTuple(args, "i:zfill", &width))
2691         return NULL;
2692
2693     if (PyString_GET_SIZE(self) >= width) {
2694         if (PyString_CheckExact(self)) {
2695             Py_INCREF(self);
2696             return (PyObject*) self;
2697         }
2698         else
2699             return PyString_FromStringAndSize(
2700                 PyString_AS_STRING(self),
2701                 PyString_GET_SIZE(self)
2702             );
2703     }
2704
2705     fill = width - PyString_GET_SIZE(self);
2706
2707     s = pad(self, fill, 0, '0');
2708
2709     if (s == NULL)
2710         return NULL;
2711
2712     p = PyString_AS_STRING(s);
2713     if (p[fill] == '+' || p[fill] == '-') {
2714         /* move sign to beginning of string */
2715         p[0] = p[fill];
2716         p[fill] = '0';
2717     }
2718
2719     return (PyObject*) s;
2720 }
2721
2722 PyDoc_STRVAR(isspace__doc__,
2723 "S.isspace() -> bool\n"
2724 "\n"
2725 "Return True if there are only whitespace characters in S,\n"
2726 "False otherwise.");
2727
2728 static PyObject*
2729 string_isspace(PyStringObject *self)
2730 {
2731     register const unsigned char *p
2732         = (unsigned char *) PyString_AS_STRING(self);
2733     register const unsigned char *e;
2734
2735     /* Shortcut for single character strings */
2736     if (PyString_GET_SIZE(self) == 1 &&
2737         isspace(*p))
2738         return PyBool_FromLong(1);
2739
2740     /* Special case for empty strings */
2741     if (PyString_GET_SIZE(self) == 0)
2742         return PyBool_FromLong(0);
2743
2744     e = p + PyString_GET_SIZE(self);
2745     for (; p < e; p++) {
2746         if (!isspace(*p))
2747             return PyBool_FromLong(0);
2748     }
2749     return PyBool_FromLong(1);
2750 }
2751
2752
2753 PyDoc_STRVAR(isalpha__doc__,
2754 "S.isalpha() -> bool\n\
2755 \n\
2756 Return True if  all characters in S are alphabetic\n\
2757 and there is at least one character in S, False otherwise.");
2758
2759 static PyObject*
2760 string_isalpha(PyStringObject *self)
2761 {
2762     register const unsigned char *p
2763         = (unsigned char *) PyString_AS_STRING(self);
2764     register const unsigned char *e;
2765
2766     /* Shortcut for single character strings */
2767     if (PyString_GET_SIZE(self) == 1 &&
2768         isalpha(*p))
2769         return PyBool_FromLong(1);
2770
2771     /* Special case for empty strings */
2772     if (PyString_GET_SIZE(self) == 0)
2773         return PyBool_FromLong(0);
2774
2775     e = p + PyString_GET_SIZE(self);
2776     for (; p < e; p++) {
2777         if (!isalpha(*p))
2778             return PyBool_FromLong(0);
2779     }
2780     return PyBool_FromLong(1);
2781 }
2782
2783
2784 PyDoc_STRVAR(isalnum__doc__,
2785 "S.isalnum() -> bool\n\
2786 \n\
2787 Return True if  all characters in S are alphanumeric\n\
2788 and there is at least one character in S, False otherwise.");
2789
2790 static PyObject*
2791 string_isalnum(PyStringObject *self)
2792 {
2793     register const unsigned char *p
2794         = (unsigned char *) PyString_AS_STRING(self);
2795     register const unsigned char *e;
2796
2797     /* Shortcut for single character strings */
2798     if (PyString_GET_SIZE(self) == 1 &&
2799         isalnum(*p))
2800         return PyBool_FromLong(1);
2801
2802     /* Special case for empty strings */
2803     if (PyString_GET_SIZE(self) == 0)
2804         return PyBool_FromLong(0);
2805
2806     e = p + PyString_GET_SIZE(self);
2807     for (; p < e; p++) {
2808         if (!isalnum(*p))
2809             return PyBool_FromLong(0);
2810     }
2811     return PyBool_FromLong(1);
2812 }
2813
2814
2815 PyDoc_STRVAR(isdigit__doc__,
2816 "S.isdigit() -> bool\n\
2817 \n\
2818 Return True if there are only digit characters in S,\n\
2819 False otherwise.");
2820
2821 static PyObject*
2822 string_isdigit(PyStringObject *self)
2823 {
2824     register const unsigned char *p
2825         = (unsigned char *) PyString_AS_STRING(self);
2826     register const unsigned char *e;
2827
2828     /* Shortcut for single character strings */
2829     if (PyString_GET_SIZE(self) == 1 &&
2830         isdigit(*p))
2831         return PyBool_FromLong(1);
2832
2833     /* Special case for empty strings */
2834     if (PyString_GET_SIZE(self) == 0)
2835         return PyBool_FromLong(0);
2836
2837     e = p + PyString_GET_SIZE(self);
2838     for (; p < e; p++) {
2839         if (!isdigit(*p))
2840             return PyBool_FromLong(0);
2841     }
2842     return PyBool_FromLong(1);
2843 }
2844
2845
2846 PyDoc_STRVAR(islower__doc__,
2847 "S.islower() -> bool\n\
2848 \n\
2849 Return True if all cased characters in S are lowercase and there is\n\
2850 at least one cased character in S, False otherwise.");
2851
2852 static PyObject*
2853 string_islower(PyStringObject *self)
2854 {
2855     register const unsigned char *p
2856         = (unsigned char *) PyString_AS_STRING(self);
2857     register const unsigned char *e;
2858     int cased;
2859
2860     /* Shortcut for single character strings */
2861     if (PyString_GET_SIZE(self) == 1)
2862         return PyBool_FromLong(islower(*p) != 0);
2863
2864     /* Special case for empty strings */
2865     if (PyString_GET_SIZE(self) == 0)
2866         return PyBool_FromLong(0);
2867
2868     e = p + PyString_GET_SIZE(self);
2869     cased = 0;
2870     for (; p < e; p++) {
2871         if (isupper(*p))
2872             return PyBool_FromLong(0);
2873         else if (!cased && islower(*p))
2874             cased = 1;
2875     }
2876     return PyBool_FromLong(cased);
2877 }
2878
2879
2880 PyDoc_STRVAR(isupper__doc__,
2881 "S.isupper() -> bool\n\
2882 \n\
2883 Return True if  all cased characters in S are uppercase and there is\n\
2884 at least one cased character in S, False otherwise.");
2885
2886 static PyObject*
2887 string_isupper(PyStringObject *self)
2888 {
2889     register const unsigned char *p
2890         = (unsigned char *) PyString_AS_STRING(self);
2891     register const unsigned char *e;
2892     int cased;
2893
2894     /* Shortcut for single character strings */
2895     if (PyString_GET_SIZE(self) == 1)
2896         return PyBool_FromLong(isupper(*p) != 0);
2897
2898     /* Special case for empty strings */
2899     if (PyString_GET_SIZE(self) == 0)
2900         return PyBool_FromLong(0);
2901
2902     e = p + PyString_GET_SIZE(self);
2903     cased = 0;
2904     for (; p < e; p++) {
2905         if (islower(*p))
2906             return PyBool_FromLong(0);
2907         else if (!cased && isupper(*p))
2908             cased = 1;
2909     }
2910     return PyBool_FromLong(cased);
2911 }
2912
2913
2914 PyDoc_STRVAR(istitle__doc__,
2915 "S.istitle() -> bool\n\
2916 \n\
2917 Return True if S is a titlecased string, i.e. uppercase characters\n\
2918 may only follow uncased characters and lowercase characters only cased\n\
2919 ones. Return False otherwise.");
2920
2921 static PyObject*
2922 string_istitle(PyStringObject *self, PyObject *uncased)
2923 {
2924     register const unsigned char *p
2925         = (unsigned char *) PyString_AS_STRING(self);
2926     register const unsigned char *e;
2927     int cased, previous_is_cased;
2928
2929     /* Shortcut for single character strings */
2930     if (PyString_GET_SIZE(self) == 1)
2931         return PyBool_FromLong(isupper(*p) != 0);
2932
2933     /* Special case for empty strings */
2934     if (PyString_GET_SIZE(self) == 0)
2935         return PyBool_FromLong(0);
2936
2937     e = p + PyString_GET_SIZE(self);
2938     cased = 0;
2939     previous_is_cased = 0;
2940     for (; p < e; p++) {
2941         register const unsigned char ch = *p;
2942
2943         if (isupper(ch)) {
2944             if (previous_is_cased)
2945                 return PyBool_FromLong(0);
2946             previous_is_cased = 1;
2947             cased = 1;
2948         }
2949         else if (islower(ch)) {
2950             if (!previous_is_cased)
2951                 return PyBool_FromLong(0);
2952             previous_is_cased = 1;
2953             cased = 1;
2954         }
2955         else
2956             previous_is_cased = 0;
2957     }
2958     return PyBool_FromLong(cased);
2959 }
2960
2961
2962 PyDoc_STRVAR(splitlines__doc__,
2963 "S.splitlines([keepends]) -> list of strings\n\
2964 \n\
2965 Return a list of the lines in S, breaking at line boundaries.\n\
2966 Line breaks are not included in the resulting list unless keepends\n\
2967 is given and true.");
2968
2969 #define SPLIT_APPEND(data, left, right)                                 \
2970         str = PyString_FromStringAndSize(data + left, right - left);    \
2971         if (!str)                                                       \
2972             goto onError;                                               \
2973         if (PyList_Append(list, str)) {                                 \
2974             Py_DECREF(str);                                             \
2975             goto onError;                                               \
2976         }                                                               \
2977         else                                                            \
2978             Py_DECREF(str);
2979
2980 static PyObject*
2981 string_splitlines(PyStringObject *self, PyObject *args)
2982 {
2983     register int i;
2984     register int j;
2985     int len;
2986     int keepends = 0;
2987     PyObject *list;
2988     PyObject *str;
2989     char *data;
2990
2991     if (!PyArg_ParseTuple(args, "|i:splitlines", &keepends))
2992         return NULL;
2993
2994     data = PyString_AS_STRING(self);
2995     len = PyString_GET_SIZE(self);
2996
2997     list = PyList_New(0);
2998     if (!list)
2999         goto onError;
3000
3001     for (i = j = 0; i < len; ) {
3002         int eol;
3003
3004         /* Find a line and append it */
3005         while (i < len && data[i] != '\n' && data[i] != '\r')
3006             i++;
3007
3008         /* Skip the line break reading CRLF as one line break */
3009         eol = i;
3010         if (i < len) {
3011             if (data[i] == '\r' && i + 1 < len &&
3012                 data[i+1] == '\n')
3013                 i += 2;
3014             else
3015                 i++;
3016             if (keepends)
3017                 eol = i;
3018         }
3019         SPLIT_APPEND(data, j, eol);
3020         j = i;
3021     }
3022     if (j < len) {
3023         SPLIT_APPEND(data, j, len);
3024     }
3025
3026     return list;
3027
3028  onError:
3029     Py_DECREF(list);
3030     return NULL;
3031 }
3032
3033 #undef SPLIT_APPEND
3034
3035 \f
3036 static PyMethodDef
3037 string_methods[] = {
3038         /* Counterparts of the obsolete stropmodule functions; except
3039            string.maketrans(). */
3040         {"join", (PyCFunction)string_join, METH_O, join__doc__},
3041         {"split", (PyCFunction)string_split, METH_VARARGS, split__doc__},
3042         {"lower", (PyCFunction)string_lower, METH_NOARGS, lower__doc__},
3043         {"upper", (PyCFunction)string_upper, METH_NOARGS, upper__doc__},
3044         {"islower", (PyCFunction)string_islower, METH_NOARGS, islower__doc__},
3045         {"isupper", (PyCFunction)string_isupper, METH_NOARGS, isupper__doc__},
3046         {"isspace", (PyCFunction)string_isspace, METH_NOARGS, isspace__doc__},
3047         {"isdigit", (PyCFunction)string_isdigit, METH_NOARGS, isdigit__doc__},
3048         {"istitle", (PyCFunction)string_istitle, METH_NOARGS, istitle__doc__},
3049         {"isalpha", (PyCFunction)string_isalpha, METH_NOARGS, isalpha__doc__},
3050         {"isalnum", (PyCFunction)string_isalnum, METH_NOARGS, isalnum__doc__},
3051         {"capitalize", (PyCFunction)string_capitalize, METH_NOARGS,
3052          capitalize__doc__},
3053         {"count", (PyCFunction)string_count, METH_VARARGS, count__doc__},
3054         {"endswith", (PyCFunction)string_endswith, METH_VARARGS,
3055          endswith__doc__},
3056         {"find", (PyCFunction)string_find, METH_VARARGS, find__doc__},
3057         {"index", (PyCFunction)string_index, METH_VARARGS, index__doc__},
3058         {"lstrip", (PyCFunction)string_lstrip, METH_VARARGS, lstrip__doc__},
3059         {"replace", (PyCFunction)string_replace, METH_VARARGS, replace__doc__},
3060         {"rfind", (PyCFunction)string_rfind, METH_VARARGS, rfind__doc__},
3061         {"rindex", (PyCFunction)string_rindex, METH_VARARGS, rindex__doc__},
3062         {"rstrip", (PyCFunction)string_rstrip, METH_VARARGS, rstrip__doc__},
3063         {"startswith", (PyCFunction)string_startswith, METH_VARARGS,
3064          startswith__doc__},
3065         {"strip", (PyCFunction)string_strip, METH_VARARGS, strip__doc__},
3066         {"swapcase", (PyCFunction)string_swapcase, METH_NOARGS,
3067          swapcase__doc__},
3068         {"translate", (PyCFunction)string_translate, METH_VARARGS,
3069          translate__doc__},
3070         {"title", (PyCFunction)string_title, METH_NOARGS, title__doc__},
3071         {"ljust", (PyCFunction)string_ljust, METH_VARARGS, ljust__doc__},
3072         {"rjust", (PyCFunction)string_rjust, METH_VARARGS, rjust__doc__},
3073         {"center", (PyCFunction)string_center, METH_VARARGS, center__doc__},
3074         {"zfill", (PyCFunction)string_zfill, METH_VARARGS, zfill__doc__},
3075         {"encode", (PyCFunction)string_encode, METH_VARARGS, encode__doc__},
3076         {"decode", (PyCFunction)string_decode, METH_VARARGS, decode__doc__},
3077         {"expandtabs", (PyCFunction)string_expandtabs, METH_VARARGS,
3078          expandtabs__doc__},
3079         {"splitlines", (PyCFunction)string_splitlines, METH_VARARGS,
3080          splitlines__doc__},
3081         {NULL,     NULL}                     /* sentinel */
3082 };
3083
3084 static PyObject *
3085 str_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds);
3086
3087 static PyObject *
3088 string_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
3089 {
3090         PyObject *x = NULL;
3091         static char *kwlist[] = {"object", 0};
3092
3093         if (type != &PyString_Type)
3094                 return str_subtype_new(type, args, kwds);
3095         if (!PyArg_ParseTupleAndKeywords(args, kwds, "|O:str", kwlist, &x))
3096                 return NULL;
3097         if (x == NULL)
3098                 return PyString_FromString("");
3099         return PyObject_Str(x);
3100 }
3101
3102 static PyObject *
3103 str_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
3104 {
3105         PyObject *tmp, *pnew;
3106         int n;
3107
3108         assert(PyType_IsSubtype(type, &PyString_Type));
3109         tmp = string_new(&PyString_Type, args, kwds);
3110         if (tmp == NULL)
3111                 return NULL;
3112         assert(PyString_CheckExact(tmp));
3113         n = PyString_GET_SIZE(tmp);
3114         pnew = type->tp_alloc(type, n);
3115         if (pnew != NULL) {
3116                 memcpy(PyString_AS_STRING(pnew), PyString_AS_STRING(tmp), n+1);
3117                 ((PyStringObject *)pnew)->ob_shash =
3118                         ((PyStringObject *)tmp)->ob_shash;
3119                 ((PyStringObject *)pnew)->ob_sstate = SSTATE_NOT_INTERNED;
3120         }
3121         Py_DECREF(tmp);
3122         return pnew;
3123 }
3124
3125 static PyObject *
3126 basestring_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
3127 {
3128         PyErr_SetString(PyExc_TypeError,
3129                         "The basestring type cannot be instantiated");
3130         return NULL;
3131 }
3132
3133 static PyObject *
3134 string_mod(PyObject *v, PyObject *w)
3135 {
3136         if (!PyString_Check(v)) {
3137                 Py_INCREF(Py_NotImplemented);
3138                 return Py_NotImplemented;
3139         }
3140         return PyString_Format(v, w);
3141 }
3142
3143 PyDoc_STRVAR(basestring_doc,
3144 "Type basestring cannot be instantiated; it is the base for str and unicode.");
3145
3146 static PyNumberMethods string_as_number = {
3147         0,                      /*nb_add*/
3148         0,                      /*nb_subtract*/
3149         0,                      /*nb_multiply*/
3150         0,                      /*nb_divide*/
3151         string_mod,             /*nb_remainder*/
3152 };
3153
3154
3155 PyTypeObject PyBaseString_Type = {
3156         PyObject_HEAD_INIT(&PyType_Type)
3157         0,
3158         "basestring",
3159         0,
3160         0,
3161         0,                                      /* tp_dealloc */
3162         0,                                      /* tp_print */
3163         0,                                      /* tp_getattr */
3164         0,                                      /* tp_setattr */
3165         0,                                      /* tp_compare */
3166         0,                                      /* tp_repr */
3167         0,                                      /* tp_as_number */
3168         0,                                      /* tp_as_sequence */
3169         0,                                      /* tp_as_mapping */
3170         0,                                      /* tp_hash */
3171         0,                                      /* tp_call */
3172         0,                                      /* tp_str */
3173         0,                                      /* tp_getattro */
3174         0,                                      /* tp_setattro */
3175         0,                                      /* tp_as_buffer */
3176         Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */
3177         basestring_doc,                         /* tp_doc */
3178         0,                                      /* tp_traverse */
3179         0,                                      /* tp_clear */
3180         0,                                      /* tp_richcompare */
3181         0,                                      /* tp_weaklistoffset */
3182         0,                                      /* tp_iter */
3183         0,                                      /* tp_iternext */
3184         0,                                      /* tp_methods */
3185         0,                                      /* tp_members */
3186         0,                                      /* tp_getset */
3187         &PyBaseObject_Type,                     /* tp_base */
3188         0,                                      /* tp_dict */
3189         0,                                      /* tp_descr_get */
3190         0,                                      /* tp_descr_set */
3191         0,                                      /* tp_dictoffset */
3192         0,                                      /* tp_init */
3193         0,                                      /* tp_alloc */
3194         basestring_new,                         /* tp_new */
3195         0,                                      /* tp_free */
3196 };
3197
3198 PyDoc_STRVAR(string_doc,
3199 "str(object) -> string\n\
3200 \n\
3201 Return a nice string representation of the object.\n\
3202 If the argument is a string, the return value is the same object.");
3203
3204 PyTypeObject PyString_Type = {
3205         PyObject_HEAD_INIT(&PyType_Type)
3206         0,
3207         "str",
3208         sizeof(PyStringObject),
3209         sizeof(char),
3210         (destructor)string_dealloc,             /* tp_dealloc */
3211         (printfunc)string_print,                /* tp_print */
3212         0,                                      /* tp_getattr */
3213         0,                                      /* tp_setattr */
3214         0,                                      /* tp_compare */
3215         (reprfunc)string_repr,                  /* tp_repr */
3216         &string_as_number,                      /* tp_as_number */
3217         &string_as_sequence,                    /* tp_as_sequence */
3218         &string_as_mapping,                     /* tp_as_mapping */
3219         (hashfunc)string_hash,                  /* tp_hash */
3220         0,                                      /* tp_call */
3221         (reprfunc)string_str,                   /* tp_str */
3222         PyObject_GenericGetAttr,                /* tp_getattro */
3223         0,                                      /* tp_setattro */
3224         &string_as_buffer,                      /* tp_as_buffer */
3225         Py_TPFLAGS_DEFAULT | Py_TPFLAGS_CHECKTYPES |
3226                 Py_TPFLAGS_BASETYPE,            /* tp_flags */
3227         string_doc,                             /* tp_doc */
3228         0,                                      /* tp_traverse */
3229         0,                                      /* tp_clear */
3230         (richcmpfunc)string_richcompare,        /* tp_richcompare */
3231         0,                                      /* tp_weaklistoffset */
3232         0,                                      /* tp_iter */
3233         0,                                      /* tp_iternext */
3234         string_methods,                         /* tp_methods */
3235         0,                                      /* tp_members */
3236         0,                                      /* tp_getset */
3237         &PyBaseString_Type,                     /* tp_base */
3238         0,                                      /* tp_dict */
3239         0,                                      /* tp_descr_get */
3240         0,                                      /* tp_descr_set */
3241         0,                                      /* tp_dictoffset */
3242         0,                                      /* tp_init */
3243         0,                                      /* tp_alloc */
3244         string_new,                             /* tp_new */
3245         PyObject_Del,                           /* tp_free */
3246 };
3247
3248 void
3249 PyString_Concat(register PyObject **pv, register PyObject *w)
3250 {
3251         register PyObject *v;
3252         if (*pv == NULL)
3253                 return;
3254         if (w == NULL || !PyString_Check(*pv)) {
3255                 Py_DECREF(*pv);
3256                 *pv = NULL;
3257                 return;
3258         }
3259         v = string_concat((PyStringObject *) *pv, w);
3260         Py_DECREF(*pv);
3261         *pv = v;
3262 }
3263
3264 void
3265 PyString_ConcatAndDel(register PyObject **pv, register PyObject *w)
3266 {
3267         PyString_Concat(pv, w);
3268         Py_XDECREF(w);
3269 }
3270
3271
3272 /* The following function breaks the notion that strings are immutable:
3273    it changes the size of a string.  We get away with this only if there
3274    is only one module referencing the object.  You can also think of it
3275    as creating a new string object and destroying the old one, only
3276    more efficiently.  In any case, don't use this if the string may
3277    already be known to some other part of the code...
3278    Note that if there's not enough memory to resize the string, the original
3279    string object at *pv is deallocated, *pv is set to NULL, an "out of
3280    memory" exception is set, and -1 is returned.  Else (on success) 0 is
3281    returned, and the value in *pv may or may not be the same as on input.
3282    As always, an extra byte is allocated for a trailing \0 byte (newsize
3283    does *not* include that), and a trailing \0 byte is stored.
3284 */
3285
3286 int
3287 _PyString_Resize(PyObject **pv, int newsize)
3288 {
3289         register PyObject *v;
3290         register PyStringObject *sv;
3291         v = *pv;
3292         if (!PyString_Check(v) || v->ob_refcnt != 1 || newsize < 0) {
3293                 *pv = 0;
3294                 Py_DECREF(v);
3295                 PyErr_BadInternalCall();
3296                 return -1;
3297         }
3298         /* XXX UNREF/NEWREF interface should be more symmetrical */
3299         _Py_DEC_REFTOTAL;
3300         _Py_ForgetReference(v);
3301         *pv = (PyObject *)
3302                 PyObject_REALLOC((char *)v,
3303                         sizeof(PyStringObject) + newsize * sizeof(char));
3304         if (*pv == NULL) {
3305                 PyObject_Del(v);
3306                 PyErr_NoMemory();
3307                 return -1;
3308         }
3309         _Py_NewReference(*pv);
3310         sv = (PyStringObject *) *pv;
3311         sv->ob_size = newsize;
3312         sv->ob_sval[newsize] = '\0';
3313         return 0;
3314 }
3315
3316 /* Helpers for formatstring */
3317
3318 static PyObject *
3319 getnextarg(PyObject *args, int arglen, int *p_argidx)
3320 {
3321         int argidx = *p_argidx;
3322         if (argidx < arglen) {
3323                 (*p_argidx)++;
3324                 if (arglen < 0)
3325                         return args;
3326                 else
3327                         return PyTuple_GetItem(args, argidx);
3328         }
3329         PyErr_SetString(PyExc_TypeError,
3330                         "not enough arguments for format string");
3331         return NULL;
3332 }
3333
3334 /* Format codes
3335  * F_LJUST      '-'
3336  * F_SIGN       '+'
3337  * F_BLANK      ' '
3338  * F_ALT        '#'
3339  * F_ZERO       '0'
3340  */
3341 #define F_LJUST (1<<0)
3342 #define F_SIGN  (1<<1)
3343 #define F_BLANK (1<<2)
3344 #define F_ALT   (1<<3)
3345 #define F_ZERO  (1<<4)
3346
3347 static int
3348 formatfloat(char *buf, size_t buflen, int flags,
3349             int prec, int type, PyObject *v)
3350 {
3351         /* fmt = '%#.' + `prec` + `type`
3352            worst case length = 3 + 10 (len of INT_MAX) + 1 = 14 (use 20)*/
3353         char fmt[20];
3354         double x;
3355         x = PyFloat_AsDouble(v);
3356         if (x == -1.0 && PyErr_Occurred()) {
3357                 PyErr_SetString(PyExc_TypeError, "float argument required");
3358                 return -1;
3359         }
3360         if (prec < 0)
3361                 prec = 6;
3362         if (type == 'f' && fabs(x)/1e25 >= 1e25)
3363                 type = 'g';
3364         /* Worst case length calc to ensure no buffer overrun:
3365
3366            'g' formats:
3367              fmt = %#.<prec>g
3368              buf = '-' + [0-9]*prec + '.' + 'e+' + (longest exp
3369                 for any double rep.)
3370              len = 1 + prec + 1 + 2 + 5 = 9 + prec
3371
3372            'f' formats:
3373              buf = '-' + [0-9]*x + '.' + [0-9]*prec (with x < 50)
3374              len = 1 + 50 + 1 + prec = 52 + prec
3375
3376            If prec=0 the effective precision is 1 (the leading digit is
3377            always given), therefore increase the length by one.
3378
3379         */
3380         if ((type == 'g' && buflen <= (size_t)10 + (size_t)prec) ||
3381             (type == 'f' && buflen <= (size_t)53 + (size_t)prec)) {
3382                 PyErr_SetString(PyExc_OverflowError,
3383                         "formatted float is too long (precision too large?)");
3384                 return -1;
3385         }
3386         PyOS_snprintf(fmt, sizeof(fmt), "%%%s.%d%c",
3387                       (flags&F_ALT) ? "#" : "",
3388                       prec, type);
3389         PyOS_snprintf(buf, buflen, fmt, x);
3390         return strlen(buf);
3391 }
3392
3393 /* _PyString_FormatLong emulates the format codes d, u, o, x and X, and
3394  * the F_ALT flag, for Python's long (unbounded) ints.  It's not used for
3395  * Python's regular ints.
3396  * Return value:  a new PyString*, or NULL if error.
3397  *  .  *pbuf is set to point into it,
3398  *     *plen set to the # of chars following that.
3399  *     Caller must decref it when done using pbuf.
3400  *     The string starting at *pbuf is of the form
3401  *         "-"? ("0x" | "0X")? digit+
3402  *     "0x"/"0X" are present only for x and X conversions, with F_ALT
3403  *         set in flags.  The case of hex digits will be correct,
3404  *     There will be at least prec digits, zero-filled on the left if
3405  *         necessary to get that many.
3406  * val          object to be converted
3407  * flags        bitmask of format flags; only F_ALT is looked at
3408  * prec         minimum number of digits; 0-fill on left if needed
3409  * type         a character in [duoxX]; u acts the same as d
3410  *
3411  * CAUTION:  o, x and X conversions on regular ints can never
3412  * produce a '-' sign, but can for Python's unbounded ints.
3413  */
3414 PyObject*
3415 _PyString_FormatLong(PyObject *val, int flags, int prec, int type,
3416                      char **pbuf, int *plen)
3417 {
3418         PyObject *result = NULL;
3419         char *buf;
3420         int i;
3421         int sign;       /* 1 if '-', else 0 */
3422         int len;        /* number of characters */
3423         int numdigits;  /* len == numnondigits + numdigits */
3424         int numnondigits = 0;
3425
3426         switch (type) {
3427         case 'd':
3428         case 'u':
3429                 result = val->ob_type->tp_str(val);
3430                 break;
3431         case 'o':
3432                 result = val->ob_type->tp_as_number->nb_oct(val);
3433                 break;
3434         case 'x':
3435         case 'X':
3436                 numnondigits = 2;
3437                 result = val->ob_type->tp_as_number->nb_hex(val);
3438                 break;
3439         default:
3440                 assert(!"'type' not in [duoxX]");
3441         }
3442         if (!result)
3443                 return NULL;
3444
3445         /* To modify the string in-place, there can only be one reference. */
3446         if (result->ob_refcnt != 1) {
3447                 PyErr_BadInternalCall();
3448                 return NULL;
3449         }
3450         buf = PyString_AsString(result);
3451         len = PyString_Size(result);
3452         if (buf[len-1] == 'L') {
3453                 --len;
3454                 buf[len] = '\0';
3455         }
3456         sign = buf[0] == '-';
3457         numnondigits += sign;
3458         numdigits = len - numnondigits;
3459         assert(numdigits > 0);
3460
3461         /* Get rid of base marker unless F_ALT */
3462         if ((flags & F_ALT) == 0) {
3463                 /* Need to skip 0x, 0X or 0. */
3464                 int skipped = 0;
3465                 switch (type) {
3466                 case 'o':
3467                         assert(buf[sign] == '0');
3468                         /* If 0 is only digit, leave it alone. */
3469                         if (numdigits > 1) {
3470                                 skipped = 1;
3471                                 --numdigits;
3472                         }
3473                         break;
3474                 case 'x':
3475                 case 'X':
3476                         assert(buf[sign] == '0');
3477                         assert(buf[sign + 1] == 'x');
3478                         skipped = 2;
3479                         numnondigits -= 2;
3480                         break;
3481                 }
3482                 if (skipped) {
3483                         buf += skipped;
3484                         len -= skipped;
3485                         if (sign)
3486                                 buf[0] = '-';
3487                 }
3488                 assert(len == numnondigits + numdigits);
3489                 assert(numdigits > 0);
3490         }
3491
3492         /* Fill with leading zeroes to meet minimum width. */
3493         if (prec > numdigits) {
3494                 PyObject *r1 = PyString_FromStringAndSize(NULL,
3495                                         numnondigits + prec);
3496                 char *b1;
3497                 if (!r1) {
3498                         Py_DECREF(result);
3499                         return NULL;
3500                 }
3501                 b1 = PyString_AS_STRING(r1);
3502                 for (i = 0; i < numnondigits; ++i)
3503                         *b1++ = *buf++;
3504                 for (i = 0; i < prec - numdigits; i++)
3505                         *b1++ = '0';
3506                 for (i = 0; i < numdigits; i++)
3507                         *b1++ = *buf++;
3508                 *b1 = '\0';
3509                 Py_DECREF(result);
3510                 result = r1;
3511                 buf = PyString_AS_STRING(result);
3512                 len = numnondigits + prec;
3513         }
3514
3515         /* Fix up case for hex conversions. */
3516         switch (type) {
3517         case 'x':
3518                 /* Need to convert all upper case letters to lower case. */
3519                 for (i = 0; i < len; i++)
3520                         if (buf[i] >= 'A' && buf[i] <= 'F')
3521                                 buf[i] += 'a'-'A';
3522                 break;
3523         case 'X':
3524                 /* Need to convert 0x to 0X (and -0x to -0X). */
3525                 if (buf[sign + 1] == 'x')
3526                         buf[sign + 1] = 'X';
3527                 break;
3528         }
3529         *pbuf = buf;
3530         *plen = len;
3531         return result;
3532 }
3533
3534 static int
3535 formatint(char *buf, size_t buflen, int flags,
3536           int prec, int type, PyObject *v)
3537 {
3538         /* fmt = '%#.' + `prec` + 'l' + `type`
3539            worst case length = 3 + 19 (worst len of INT_MAX on 64-bit machine)
3540            + 1 + 1 = 24 */
3541         char fmt[64];   /* plenty big enough! */
3542         long x;
3543
3544         x = PyInt_AsLong(v);
3545         if (x == -1 && PyErr_Occurred()) {
3546                 PyErr_SetString(PyExc_TypeError, "int argument required");
3547                 return -1;
3548         }
3549         if (x < 0 && type != 'd' && type != 'i') {
3550                 if (PyErr_Warn(PyExc_FutureWarning,
3551                                "%u/%o/%x/%X of negative int will return "
3552                                "a signed string in Python 2.4 and up") < 0)
3553                         return -1;
3554         }
3555         if (prec < 0)
3556                 prec = 1;
3557
3558         if ((flags & F_ALT) &&
3559             (type == 'x' || type == 'X')) {
3560                 /* When converting under %#x or %#X, there are a number
3561                  * of issues that cause pain:
3562                  * - when 0 is being converted, the C standard leaves off
3563                  *   the '0x' or '0X', which is inconsistent with other
3564                  *   %#x/%#X conversions and inconsistent with Python's
3565                  *   hex() function
3566                  * - there are platforms that violate the standard and
3567                  *   convert 0 with the '0x' or '0X'
3568                  *   (Metrowerks, Compaq Tru64)
3569                  * - there are platforms that give '0x' when converting
3570                  *   under %#X, but convert 0 in accordance with the
3571                  *   standard (OS/2 EMX)
3572                  *
3573                  * We can achieve the desired consistency by inserting our
3574                  * own '0x' or '0X' prefix, and substituting %x/%X in place
3575                  * of %#x/%#X.
3576                  *
3577                  * Note that this is the same approach as used in
3578                  * formatint() in unicodeobject.c
3579                  */
3580                 PyOS_snprintf(fmt, sizeof(fmt), "0%c%%.%dl%c",
3581                               type, prec, type);
3582         }
3583         else {
3584                 PyOS_snprintf(fmt, sizeof(fmt), "%%%s.%dl%c",
3585                               (flags&F_ALT) ? "#" : "",
3586                               prec, type);
3587         }
3588
3589         /* buf = '+'/'-'/'0'/'0x' + '[0-9]'*max(prec, len(x in octal))
3590          * worst case buf = '0x' + [0-9]*prec, where prec >= 11
3591          */
3592         if (buflen <= 13 || buflen <= (size_t)2 + (size_t)prec) {
3593                 PyErr_SetString(PyExc_OverflowError,
3594                     "formatted integer is too long (precision too large?)");
3595                 return -1;
3596         }
3597         PyOS_snprintf(buf, buflen, fmt, x);
3598         return strlen(buf);
3599 }
3600
3601 static int
3602 formatchar(char *buf, size_t buflen, PyObject *v)
3603 {
3604         /* presume that the buffer is at least 2 characters long */
3605         if (PyString_Check(v)) {
3606                 if (!PyArg_Parse(v, "c;%c requires int or char", &buf[0]))
3607                         return -1;
3608         }
3609         else {
3610                 if (!PyArg_Parse(v, "b;%c requires int or char", &buf[0]))
3611                         return -1;
3612         }
3613         buf[1] = '\0';
3614         return 1;
3615 }
3616
3617
3618 /* fmt%(v1,v2,...) is roughly equivalent to sprintf(fmt, v1, v2, ...)
3619
3620    FORMATBUFLEN is the length of the buffer in which the floats, ints, &
3621    chars are formatted. XXX This is a magic number. Each formatting
3622    routine does bounds checking to ensure no overflow, but a better
3623    solution may be to malloc a buffer of appropriate size for each
3624    format. For now, the current solution is sufficient.
3625 */
3626 #define FORMATBUFLEN (size_t)120
3627
3628 PyObject *
3629 PyString_Format(PyObject *format, PyObject *args)
3630 {
3631         char *fmt, *res;
3632         int fmtcnt, rescnt, reslen, arglen, argidx;
3633         int args_owned = 0;
3634         PyObject *result, *orig_args;
3635 #ifdef Py_USING_UNICODE
3636         PyObject *v, *w;
3637 #endif
3638         PyObject *dict = NULL;
3639         if (format == NULL || !PyString_Check(format) || args == NULL) {
3640                 PyErr_BadInternalCall();
3641                 return NULL;
3642         }
3643         orig_args = args;
3644         fmt = PyString_AS_STRING(format);
3645         fmtcnt = PyString_GET_SIZE(format);
3646         reslen = rescnt = fmtcnt + 100;
3647         result = PyString_FromStringAndSize((char *)NULL, reslen);
3648         if (result == NULL)
3649                 return NULL;
3650         res = PyString_AsString(result);
3651         if (PyTuple_Check(args)) {
3652                 arglen = PyTuple_GET_SIZE(args);
3653                 argidx = 0;
3654         }
3655         else {
3656                 arglen = -1;
3657                 argidx = -2;
3658         }
3659         if (args->ob_type->tp_as_mapping && !PyTuple_Check(args) &&
3660             !PyObject_TypeCheck(args, &PyBaseString_Type))
3661                 dict = args;
3662         while (--fmtcnt >= 0) {
3663                 if (*fmt != '%') {
3664                         if (--rescnt < 0) {
3665                                 rescnt = fmtcnt + 100;
3666                                 reslen += rescnt;
3667                                 if (_PyString_Resize(&result, reslen) < 0)
3668                                         return NULL;
3669                                 res = PyString_AS_STRING(result)
3670                                         + reslen - rescnt;
3671                                 --rescnt;
3672                         }
3673                         *res++ = *fmt++;
3674                 }
3675                 else {
3676                         /* Got a format specifier */
3677                         int flags = 0;
3678                         int width = -1;
3679                         int prec = -1;
3680                         int c = '\0';
3681                         int fill;
3682                         PyObject *v = NULL;
3683                         PyObject *temp = NULL;
3684                         char *pbuf;
3685                         int sign;
3686                         int len;
3687                         char formatbuf[FORMATBUFLEN];
3688                              /* For format{float,int,char}() */
3689 #ifdef Py_USING_UNICODE
3690                         char *fmt_start = fmt;
3691                         int argidx_start = argidx;
3692 #endif
3693
3694                         fmt++;
3695                         if (*fmt == '(') {
3696                                 char *keystart;
3697                                 int keylen;
3698                                 PyObject *key;
3699                                 int pcount = 1;
3700
3701                                 if (dict == NULL) {
3702                                         PyErr_SetString(PyExc_TypeError,
3703                                                  "format requires a mapping");
3704                                         goto error;
3705                                 }
3706                                 ++fmt;
3707                                 --fmtcnt;
3708                                 keystart = fmt;
3709                                 /* Skip over balanced parentheses */
3710                                 while (pcount > 0 && --fmtcnt >= 0) {
3711                                         if (*fmt == ')')
3712                                                 --pcount;
3713                                         else if (*fmt == '(')
3714                                                 ++pcount;
3715                                         fmt++;
3716                                 }
3717                                 keylen = fmt - keystart - 1;
3718                                 if (fmtcnt < 0 || pcount > 0) {
3719                                         PyErr_SetString(PyExc_ValueError,
3720                                                    "incomplete format key");
3721                                         goto error;
3722                                 }
3723                                 key = PyString_FromStringAndSize(keystart,
3724                                                                  keylen);
3725                                 if (key == NULL)
3726                                         goto error;
3727                                 if (args_owned) {
3728                                         Py_DECREF(args);
3729                                         args_owned = 0;
3730                                 }
3731                                 args = PyObject_GetItem(dict, key);
3732                                 Py_DECREF(key);
3733                                 if (args == NULL) {
3734                                         goto error;
3735                                 }
3736                                 args_owned = 1;
3737                                 arglen = -1;
3738                                 argidx = -2;
3739                         }
3740                         while (--fmtcnt >= 0) {
3741                                 switch (c = *fmt++) {
3742                                 case '-': flags |= F_LJUST; continue;
3743                                 case '+': flags |= F_SIGN; continue;
3744                                 case ' ': flags |= F_BLANK; continue;
3745                                 case '#': flags |= F_ALT; continue;
3746                                 case '0': flags |= F_ZERO; continue;
3747                                 }
3748                                 break;
3749                         }
3750                         if (c == '*') {
3751                                 v = getnextarg(args, arglen, &argidx);
3752                                 if (v == NULL)
3753                                         goto error;
3754                                 if (!PyInt_Check(v)) {
3755                                         PyErr_SetString(PyExc_TypeError,
3756                                                         "* wants int");
3757                                         goto error;
3758                                 }
3759                                 width = PyInt_AsLong(v);
3760                                 if (width < 0) {
3761                                         flags |= F_LJUST;
3762                                         width = -width;
3763                                 }
3764                                 if (--fmtcnt >= 0)
3765                                         c = *fmt++;
3766                         }
3767                         else if (c >= 0 && isdigit(c)) {
3768                                 width = c - '0';
3769                                 while (--fmtcnt >= 0) {
3770                                         c = Py_CHARMASK(*fmt++);
3771                                         if (!isdigit(c))
3772                                                 break;
3773                                         if ((width*10) / 10 != width) {
3774                                                 PyErr_SetString(
3775                                                         PyExc_ValueError,
3776                                                         "width too big");
3777                                                 goto error;
3778                                         }
3779                                         width = width*10 + (c - '0');
3780                                 }
3781                         }
3782                         if (c == '.') {
3783                                 prec = 0;
3784                                 if (--fmtcnt >= 0)
3785                                         c = *fmt++;
3786                                 if (c == '*') {
3787                                         v = getnextarg(args, arglen, &argidx);
3788                                         if (v == NULL)
3789                                                 goto error;
3790                                         if (!PyInt_Check(v)) {
3791                                                 PyErr_SetString(
3792                                                         PyExc_TypeError,
3793                                                         "* wants int");
3794                                                 goto error;
3795                                         }
3796                                         prec = PyInt_AsLong(v);
3797                                         if (prec < 0)
3798                                                 prec = 0;
3799                                         if (--fmtcnt >= 0)
3800                                                 c = *fmt++;
3801                                 }
3802                                 else if (c >= 0 && isdigit(c)) {
3803                                         prec = c - '0';
3804                                         while (--fmtcnt >= 0) {
3805                                                 c = Py_CHARMASK(*fmt++);
3806                                                 if (!isdigit(c))
3807                                                         break;
3808                                                 if ((prec*10) / 10 != prec) {
3809                                                         PyErr_SetString(
3810                                                             PyExc_ValueError,
3811                                                             "prec too big");
3812                                                         goto error;
3813                                                 }
3814                                                 prec = prec*10 + (c - '0');
3815                                         }
3816                                 }
3817                         } /* prec */
3818                         if (fmtcnt >= 0) {
3819                                 if (c == 'h' || c == 'l' || c == 'L') {
3820                                         if (--fmtcnt >= 0)
3821                                                 c = *fmt++;
3822                                 }
3823                         }
3824                         if (fmtcnt < 0) {
3825                                 PyErr_SetString(PyExc_ValueError,
3826                                                 "incomplete format");
3827                                 goto error;
3828                         }
3829                         if (c != '%') {
3830                                 v = getnextarg(args, arglen, &argidx);
3831                                 if (v == NULL)
3832                                         goto error;
3833                         }
3834                         sign = 0;
3835                         fill = ' ';
3836                         switch (c) {
3837                         case '%':
3838                                 pbuf = "%";
3839                                 len = 1;
3840                                 break;
3841                         case 's':
3842 #ifdef Py_USING_UNICODE
3843                                 if (PyUnicode_Check(v)) {
3844                                         fmt = fmt_start;
3845                                         argidx = argidx_start;
3846                                         goto unicode;
3847                                 }
3848 #endif
3849                                 /* Fall through */
3850                         case 'r':
3851                                 if (c == 's')
3852                                         temp = PyObject_Str(v);
3853                                 else
3854                                         temp = PyObject_Repr(v);
3855                                 if (temp == NULL)
3856                                         goto error;
3857                                 if (!PyString_Check(temp)) {
3858                                         /* XXX Note: this should never happen,
3859                                            since PyObject_Repr() and
3860                                            PyObject_Str() assure this */
3861                                         PyErr_SetString(PyExc_TypeError,
3862                                           "%s argument has non-string str()");
3863                                         Py_DECREF(temp);
3864                                         goto error;
3865                                 }
3866                                 pbuf = PyString_AS_STRING(temp);
3867                                 len = PyString_GET_SIZE(temp);
3868                                 if (prec >= 0 && len > prec)
3869                                         len = prec;
3870                                 break;
3871                         case 'i':
3872                         case 'd':
3873                         case 'u':
3874                         case 'o':
3875                         case 'x':
3876                         case 'X':
3877                                 if (c == 'i')
3878                                         c = 'd';
3879                                 if (PyLong_Check(v)) {
3880                                         temp = _PyString_FormatLong(v, flags,
3881                                                 prec, c, &pbuf, &len);
3882                                         if (!temp)
3883                                                 goto error;
3884                                         /* unbounded ints can always produce
3885                                            a sign character! */
3886                                         sign = 1;
3887                                 }
3888                                 else {
3889                                         pbuf = formatbuf;
3890                                         len = formatint(pbuf,
3891                                                         sizeof(formatbuf),
3892                                                         flags, prec, c, v);
3893                                         if (len < 0)
3894                                                 goto error;
3895                                         /* only d conversion is signed */
3896                                         sign = c == 'd';
3897                                 }
3898                                 if (flags & F_ZERO)
3899                                         fill = '0';
3900                                 break;
3901                         case 'e':
3902                         case 'E':
3903                         case 'f':
3904                         case 'g':
3905                         case 'G':
3906                                 pbuf = formatbuf;
3907                                 len = formatfloat(pbuf, sizeof(formatbuf),
3908                                                   flags, prec, c, v);
3909                                 if (len < 0)
3910                                         goto error;
3911                                 sign = 1;
3912                                 if (flags & F_ZERO)
3913                                         fill = '0';
3914                                 break;
3915                         case 'c':
3916                                 pbuf = formatbuf;
3917                                 len = formatchar(pbuf, sizeof(formatbuf), v);
3918                                 if (len < 0)
3919                                         goto error;
3920                                 break;
3921                         default:
3922                                 PyErr_Format(PyExc_ValueError,
3923                                   "unsupported format character '%c' (0x%x) "
3924                                   "at index %i",
3925                                   c, c,
3926                                   (int)(fmt - 1 - PyString_AsString(format)));
3927                                 goto error;
3928                         }
3929                         if (sign) {
3930                                 if (*pbuf == '-' || *pbuf == '+') {
3931                                         sign = *pbuf++;
3932                                         len--;
3933                                 }
3934                                 else if (flags & F_SIGN)
3935                                         sign = '+';
3936                                 else if (flags & F_BLANK)
3937                                         sign = ' ';
3938                                 else
3939                                         sign = 0;
3940                         }
3941                         if (width < len)
3942                                 width = len;
3943                         if (rescnt - (sign != 0) < width) {
3944                                 reslen -= rescnt;
3945                                 rescnt = width + fmtcnt + 100;
3946                                 reslen += rescnt;
3947                                 if (reslen < 0) {
3948                                         Py_DECREF(result);
3949                                         return PyErr_NoMemory();
3950                                 }
3951                                 if (_PyString_Resize(&result, reslen) < 0)
3952                                         return NULL;
3953                                 res = PyString_AS_STRING(result)
3954                                         + reslen - rescnt;
3955                         }
3956                         if (sign) {
3957                                 if (fill != ' ')
3958                                         *res++ = sign;
3959                                 rescnt--;
3960                                 if (width > len)
3961                                         width--;
3962                         }
3963                         if ((flags & F_ALT) && (c == 'x' || c == 'X')) {
3964                                 assert(pbuf[0] == '0');
3965                                 assert(pbuf[1] == c);
3966                                 if (fill != ' ') {
3967                                         *res++ = *pbuf++;
3968                                         *res++ = *pbuf++;
3969                                 }
3970                                 rescnt -= 2;
3971                                 width -= 2;
3972                                 if (width < 0)
3973                                         width = 0;
3974                                 len -= 2;
3975                         }
3976                         if (width > len && !(flags & F_LJUST)) {
3977                                 do {
3978                                         --rescnt;
3979                                         *res++ = fill;
3980                                 } while (--width > len);
3981                         }
3982                         if (fill == ' ') {
3983                                 if (sign)
3984                                         *res++ = sign;
3985                                 if ((flags & F_ALT) &&
3986                                     (c == 'x' || c == 'X')) {
3987                                         assert(pbuf[0] == '0');
3988                                         assert(pbuf[1] == c);
3989                                         *res++ = *pbuf++;
3990                                         *res++ = *pbuf++;
3991                                 }
3992                         }
3993                         memcpy(res, pbuf, len);
3994                         res += len;
3995                         rescnt -= len;
3996                         while (--width >= len) {
3997                                 --rescnt;
3998                                 *res++ = ' ';
3999                         }
4000                         if (dict && (argidx < arglen) && c != '%') {
4001                                 PyErr_SetString(PyExc_TypeError,
4002                                            "not all arguments converted during string formatting");
4003                                 goto error;
4004                         }
4005                         Py_XDECREF(temp);
4006                 } /* '%' */
4007         } /* until end */
4008         if (argidx < arglen && !dict) {
4009                 PyErr_SetString(PyExc_TypeError,
4010                                 "not all arguments converted during string formatting");
4011                 goto error;
4012         }
4013         if (args_owned) {
4014                 Py_DECREF(args);
4015         }
4016         _PyString_Resize(&result, reslen - rescnt);
4017         return result;
4018
4019 #ifdef Py_USING_UNICODE
4020  unicode:
4021         if (args_owned) {
4022                 Py_DECREF(args);
4023                 args_owned = 0;
4024         }
4025         /* Fiddle args right (remove the first argidx arguments) */
4026         if (PyTuple_Check(orig_args) && argidx > 0) {
4027                 PyObject *v;
4028                 int n = PyTuple_GET_SIZE(orig_args) - argidx;
4029                 v = PyTuple_New(n);
4030                 if (v == NULL)
4031                         goto error;
4032                 while (--n >= 0) {
4033                         PyObject *w = PyTuple_GET_ITEM(orig_args, n + argidx);
4034                         Py_INCREF(w);
4035                         PyTuple_SET_ITEM(v, n, w);
4036                 }
4037                 args = v;
4038         } else {
4039                 Py_INCREF(orig_args);
4040                 args = orig_args;
4041         }
4042         args_owned = 1;
4043         /* Take what we have of the result and let the Unicode formatting
4044            function format the rest of the input. */
4045         rescnt = res - PyString_AS_STRING(result);
4046         if (_PyString_Resize(&result, rescnt))
4047                 goto error;
4048         fmtcnt = PyString_GET_SIZE(format) - \
4049                  (fmt - PyString_AS_STRING(format));
4050         format = PyUnicode_Decode(fmt, fmtcnt, NULL, NULL);
4051         if (format == NULL)
4052                 goto error;
4053         v = PyUnicode_Format(format, args);
4054         Py_DECREF(format);
4055         if (v == NULL)
4056                 goto error;
4057         /* Paste what we have (result) to what the Unicode formatting
4058            function returned (v) and return the result (or error) */
4059         w = PyUnicode_Concat(result, v);
4060         Py_DECREF(result);
4061         Py_DECREF(v);
4062         Py_DECREF(args);
4063         return w;
4064 #endif /* Py_USING_UNICODE */
4065
4066  error:
4067         Py_DECREF(result);
4068         if (args_owned) {
4069                 Py_DECREF(args);
4070         }
4071         return NULL;
4072 }
4073
4074 void
4075 PyString_InternInPlace(PyObject **p)
4076 {
4077         register PyStringObject *s = (PyStringObject *)(*p);
4078         PyObject *t;
4079         if (s == NULL || !PyString_Check(s))
4080                 Py_FatalError("PyString_InternInPlace: strings only please!");
4081         if (PyString_CHECK_INTERNED(s))
4082                 return;
4083         if (interned == NULL) {
4084                 interned = PyDict_New();
4085                 if (interned == NULL) {
4086                         PyErr_Clear(); /* Don't leave an exception */
4087                         return;
4088                 }
4089         }
4090         if ((t = PyDict_GetItem(interned, (PyObject *)s)) != NULL) {
4091                 Py_INCREF(t);
4092                 Py_DECREF(*p);
4093                 *p = t;
4094                 return;
4095         }
4096         /* Ensure that only true string objects appear in the intern dict */
4097         if (!PyString_CheckExact(s)) {
4098                 t = PyString_FromStringAndSize(PyString_AS_STRING(s),
4099                                                 PyString_GET_SIZE(s));
4100                 if (t == NULL) {
4101                         PyErr_Clear();
4102                         return;
4103                 }
4104         } else {
4105                 t = (PyObject*) s;
4106                 Py_INCREF(t);
4107         }
4108
4109         if (PyDict_SetItem(interned, t, t) == 0) {
4110                 /* The two references in interned are not counted by
4111                 refcnt.  The string deallocator will take care of this */
4112                 ((PyObject *)t)->ob_refcnt-=2;
4113                 PyString_CHECK_INTERNED(t) = SSTATE_INTERNED_MORTAL;
4114                 Py_DECREF(*p);
4115                 *p = t;
4116                 return;
4117         }
4118         Py_DECREF(t);
4119         PyErr_Clear();
4120 }
4121
4122 void
4123 PyString_InternImmortal(PyObject **p)
4124 {
4125         PyString_InternInPlace(p);
4126         if (PyString_CHECK_INTERNED(*p) != SSTATE_INTERNED_IMMORTAL) {
4127                 PyString_CHECK_INTERNED(*p) = SSTATE_INTERNED_IMMORTAL;
4128                 Py_INCREF(*p);
4129         }
4130 }
4131
4132
4133 PyObject *
4134 PyString_InternFromString(const char *cp)
4135 {
4136         PyObject *s = PyString_FromString(cp);
4137         if (s == NULL)
4138                 return NULL;
4139         PyString_InternInPlace(&s);
4140         return s;
4141 }
4142
4143 void
4144 PyString_Fini(void)
4145 {
4146         int i;
4147         for (i = 0; i < UCHAR_MAX + 1; i++) {
4148                 Py_XDECREF(characters[i]);
4149                 characters[i] = NULL;
4150         }
4151         Py_XDECREF(nullstring);
4152         nullstring = NULL;
4153 }
4154
4155 void _Py_ReleaseInternedStrings(void)
4156 {
4157         PyObject *keys;
4158         PyStringObject *s;
4159         int i, n;
4160
4161         if (interned == NULL || !PyDict_Check(interned))
4162                 return;
4163         keys = PyDict_Keys(interned);
4164         if (keys == NULL || !PyList_Check(keys)) {
4165                 PyErr_Clear();
4166                 return;
4167         }
4168
4169         /* Since _Py_ReleaseInternedStrings() is intended to help a leak
4170            detector, interned strings are not forcibly deallocated; rather, we
4171            give them their stolen references back, and then clear and DECREF
4172            the interned dict. */
4173
4174         fprintf(stderr, "releasing interned strings\n");
4175         n = PyList_GET_SIZE(keys);
4176         for (i = 0; i < n; i++) {
4177                 s = (PyStringObject *) PyList_GET_ITEM(keys, i);
4178                 switch (s->ob_sstate) {
4179                 case SSTATE_NOT_INTERNED:
4180                         /* XXX Shouldn't happen */
4181                         break;
4182                 case SSTATE_INTERNED_IMMORTAL:
4183                         s->ob_refcnt += 1;
4184                         break;
4185                 case SSTATE_INTERNED_MORTAL:
4186                         s->ob_refcnt += 2;
4187                         break;
4188                 default:
4189                         Py_FatalError("Inconsistent interned string state.");
4190                 }
4191                 s->ob_sstate = SSTATE_NOT_INTERNED;
4192         }
4193         Py_DECREF(keys);
4194         PyDict_Clear(interned);
4195         Py_DECREF(interned);
4196         interned = NULL;
4197 }