Objects/stringobject.c

   1
   2 /* String object implementation */
   3
   4 #include "Python.h"
   5
   6 #include <ctype.h>
   7
   8 #ifdef COUNT_ALLOCS
   9 int null_strings, one_strings;
  10 #endif
  11
  12 #if !defined(HAVE_LIMITS_H) && !defined(UCHAR_MAX)
  13 #define UCHAR_MAX 255
  14 #endif
  15
  16 static PyStringObject *characters[UCHAR_MAX + 1];
  17 #ifndef DONT_SHARE_SHORT_STRINGS
  18 static PyStringObject *nullstring;
  19 #endif
  20
  21 /*
  22    Newsizedstringobject() and newstringobject() try in certain cases
  23    to share string objects.  When the size of the string is zero,
  24    these routines always return a pointer to the same string object;
  25    when the size is one, they return a pointer to an already existing
  26    object if the contents of the string is known.  For
  27    newstringobject() this is always the case, for
  28    newsizedstringobject() this is the case when the first argument in
  29    not NULL.
  30    A common practice to allocate a string and then fill it in or
  31    change it must be done carefully.  It is only allowed to change the
  32    contents of the string if the obect was gotten from
  33    newsizedstringobject() with a NULL first argument, because in the
  34    future these routines may try to do even more sharing of objects.
  35 */
  36 PyObject *
  37 PyString_FromStringAndSize(const char *str, int size)
  38 {
  39         register PyStringObject *op;
  40 #ifndef DONT_SHARE_SHORT_STRINGS
  41         if (size == 0 && (op = nullstring) != NULL) {
  42 #ifdef COUNT_ALLOCS
  43                 null_strings++;
  44 #endif
  45                 Py_INCREF(op);
  46                 return (PyObject *)op;
  47         }
  48         if (size == 1 && str != NULL &&
  49             (op = characters[*str & UCHAR_MAX]) != NULL)
  50         {
  51 #ifdef COUNT_ALLOCS
  52                 one_strings++;
  53 #endif
  54                 Py_INCREF(op);
  55                 return (PyObject *)op;
  56         }
  57 #endif /* DONT_SHARE_SHORT_STRINGS */
  58
  59         /* PyObject_NewVar is inlined */
  60         op = (PyStringObject *)
  61                 PyObject_MALLOC(sizeof(PyStringObject) + size * sizeof(char));
  62         if (op == NULL)
  63                 return PyErr_NoMemory();
  64         PyObject_INIT_VAR(op, &PyString_Type, size);
  65 #ifdef CACHE_HASH
  66         op->ob_shash = -1;
  67 #endif
  68 #ifdef INTERN_STRINGS
  69         op->ob_sinterned = NULL;
  70 #endif
  71         if (str != NULL)
  72                 memcpy(op->ob_sval, str, size);
  73         op->ob_sval[size] = '\0';
  74 #ifndef DONT_SHARE_SHORT_STRINGS
  75         if (size == 0) {
  76                 PyObject *t = (PyObject *)op;
  77                 PyString_InternInPlace(&t);
  78                 op = (PyStringObject *)t;
  79                 nullstring = op;
  80                 Py_INCREF(op);
  81         } else if (size == 1 && str != NULL) {
  82                 PyObject *t = (PyObject *)op;
  83                 PyString_InternInPlace(&t);
  84                 op = (PyStringObject *)t;
  85                 characters[*str & UCHAR_MAX] = op;
  86                 Py_INCREF(op);
  87         }
  88 #endif
  89         return (PyObject *) op;
  90 }
  91
  92 PyObject *
  93 PyString_FromString(const char *str)
  94 {
  95         register size_t size = strlen(str);
  96         register PyStringObject *op;
  97         if (size > INT_MAX) {
  98                 PyErr_SetString(PyExc_OverflowError,
  99                         "string is too long for a Python string");
 100                 return NULL;
 101         }
 102 #ifndef DONT_SHARE_SHORT_STRINGS
 103         if (size == 0 && (op = nullstring) != NULL) {
 104 #ifdef COUNT_ALLOCS
 105                 null_strings++;
 106 #endif
 107                 Py_INCREF(op);
 108                 return (PyObject *)op;
 109         }
 110         if (size == 1 && (op = characters[*str & UCHAR_MAX]) != NULL) {
 111 #ifdef COUNT_ALLOCS
 112                 one_strings++;
 113 #endif
 114                 Py_INCREF(op);
 115                 return (PyObject *)op;
 116         }
 117 #endif /* DONT_SHARE_SHORT_STRINGS */
 118
 119         /* PyObject_NewVar is inlined */
 120         op = (PyStringObject *)
 121                 PyObject_MALLOC(sizeof(PyStringObject) + size * sizeof(char));
 122         if (op == NULL)
 123                 return PyErr_NoMemory();
 124         PyObject_INIT_VAR(op, &PyString_Type, size);
 125 #ifdef CACHE_HASH
 126         op->ob_shash = -1;
 127 #endif
 128 #ifdef INTERN_STRINGS
 129         op->ob_sinterned = NULL;
 130 #endif
 131         strcpy(op->ob_sval, str);
 132 #ifndef DONT_SHARE_SHORT_STRINGS
 133         if (size == 0) {
 134                 PyObject *t = (PyObject *)op;
 135                 PyString_InternInPlace(&t);
 136                 op = (PyStringObject *)t;
 137                 nullstring = op;
 138                 Py_INCREF(op);
 139         } else if (size == 1) {
 140                 PyObject *t = (PyObject *)op;
 141                 PyString_InternInPlace(&t);
 142                 op = (PyStringObject *)t;
 143                 characters[*str & UCHAR_MAX] = op;
 144                 Py_INCREF(op);
 145         }
 146 #endif
 147         return (PyObject *) op;
 148 }
 149
 150 PyObject *
 151 PyString_FromFormatV(const char *format, va_list vargs)
 152 {
 153         va_list count = vargs;
 154         int n = 0;
 155         const char* f;
 156         char *s;
 157         PyObject* string;
 158
 159         /* step 1: figure out how large a buffer we need */
 160         for (f = format; *f; f++) {
 161                 if (*f == '%') {
 162                         const char* p = f;
 163                         while (*++f && *f != '%' && !isalpha(Py_CHARMASK(*f)))
 164                                 ;
 165
 166                         /* skip the 'l' in %ld, since it doesn't change the
 167                            width.  although only %d is supported (see
 168                            "expand" section below), others can be easily
 169                            add */
 170                         if (*f == 'l' && *(f+1) == 'd')
 171                                 ++f;
 172
 173                         switch (*f) {
 174                         case 'c':
 175                                 (void)va_arg(count, int);
 176                                 /* fall through... */
 177                         case '%':
 178                                 n++;
 179                                 break;
 180                         case 'd': case 'i': case 'x':
 181                                 (void) va_arg(count, int);
 182                                 /* 20 bytes should be enough to hold a 64-bit
 183                                    integer */
 184                                 n += 20;
 185                                 break;
 186                         case 's':
 187                                 s = va_arg(count, char*);
 188                                 n += strlen(s);
 189                                 break;
 190                         case 'p':
 191                                 (void) va_arg(count, int);
 192                                 /* maximum 64-bit pointer representation:
 193                                  * 0xffffffffffffffff
 194                                  * so 19 characters is enough.
 195                                  */
 196                                 n += 19;
 197                                 break;
 198                         default:
 199                                 /* if we stumble upon an unknown
 200                                    formatting code, copy the rest of
 201                                    the format string to the output
 202                                    string. (we cannot just skip the
 203                                    code, since there's no way to know
 204                                    what's in the argument list) */
 205                                 n += strlen(p);
 206                                 goto expand;
 207                         }
 208                 } else
 209                         n++;
 210         }
 211  expand:
 212         /* step 2: fill the buffer */
 213         string = PyString_FromStringAndSize(NULL, n);
 214         if (!string)
 215                 return NULL;
 216
 217         s = PyString_AsString(string);
 218
 219         for (f = format; *f; f++) {
 220                 if (*f == '%') {
 221                         const char* p = f++;
 222                         int i, longflag = 0;
 223                         /* parse the width.precision part (we're only
 224                            interested in the precision value, if any) */
 225                         n = 0;
 226                         while (isdigit(Py_CHARMASK(*f)))
 227                                 n = (n*10) + *f++ - '0';
 228                         if (*f == '.') {
 229                                 f++;
 230                                 n = 0;
 231                                 while (isdigit(Py_CHARMASK(*f)))
 232                                         n = (n*10) + *f++ - '0';
 233                         }
 234                         while (*f && *f != '%' && !isalpha(Py_CHARMASK(*f)))
 235                                 f++;
 236                         /* handle the long flag, but only for %ld.  others
 237                            can be added when necessary. */
 238                         if (*f == 'l' && *(f+1) == 'd') {
 239                                 longflag = 1;
 240                                 ++f;
 241                         }
 242
 243                         switch (*f) {
 244                         case 'c':
 245                                 *s++ = va_arg(vargs, int);
 246                                 break;
 247                         case 'd':
 248                                 if (longflag)
 249                                         sprintf(s, "%ld", va_arg(vargs, long));
 250                                 else
 251                                         sprintf(s, "%d", va_arg(vargs, int));
 252                                 s += strlen(s);
 253                                 break;
 254                         case 'i':
 255                                 sprintf(s, "%i", va_arg(vargs, int));
 256                                 s += strlen(s);
 257                                 break;
 258                         case 'x':
 259                                 sprintf(s, "%x", va_arg(vargs, int));
 260                                 s += strlen(s);
 261                                 break;
 262                         case 's':
 263                                 p = va_arg(vargs, char*);
 264                                 i = strlen(p);
 265                                 if (n > 0 && i > n)
 266                                         i = n;
 267                                 memcpy(s, p, i);
 268                                 s += i;
 269                                 break;
 270                         case 'p':
 271                                 sprintf(s, "%p", va_arg(vargs, void*));
 272                                 /* %p is ill-defined:  ensure leading 0x. */
 273                                 if (s[1] == 'X')
 274                                         s[1] = 'x';
 275                                 else if (s[1] != 'x') {
 276                                         memmove(s+2, s, strlen(s)+1);
 277                                         s[0] = '0';
 278                                         s[1] = 'x';
 279                                 }
 280                                 s += strlen(s);
 281                                 break;
 282                         case '%':
 283                                 *s++ = '%';
 284                                 break;
 285                         default:
 286                                 strcpy(s, p);
 287                                 s += strlen(s);
 288                                 goto end;
 289                         }
 290                 } else
 291                         *s++ = *f;
 292         }
 293
 294  end:
 295         _PyString_Resize(&string, s - PyString_AS_STRING(string));
 296         return string;
 297 }
 298
 299 PyObject *
 300 PyString_FromFormat(const char *format, ...)
 301 {
 302         PyObject* ret;
 303         va_list vargs;
 304
 305 #ifdef HAVE_STDARG_PROTOTYPES
 306         va_start(vargs, format);
 307 #else
 308         va_start(vargs);
 309 #endif
 310         ret = PyString_FromFormatV(format, vargs);
 311         va_end(vargs);
 312         return ret;
 313 }
 314
 315
 316 PyObject *PyString_Decode(const char *s,
 317                           int size,
 318                           const char *encoding,
 319                           const char *errors)
 320 {
 321     PyObject *v, *str;
 322
 323     str = PyString_FromStringAndSize(s, size);
 324     if (str == NULL)
 325         return NULL;
 326     v = PyString_AsDecodedString(str, encoding, errors);
 327     Py_DECREF(str);
 328     return v;
 329 }
 330
 331 PyObject *PyString_AsDecodedObject(PyObject *str,
 332                                    const char *encoding,
 333                                    const char *errors)
 334 {
 335     PyObject *v;
 336
 337     if (!PyString_Check(str)) {
 338         PyErr_BadArgument();
 339         goto onError;
 340     }
 341
 342     if (encoding == NULL) {
 343 #ifdef Py_USING_UNICODE
 344         encoding = PyUnicode_GetDefaultEncoding();
 345 #else
 346         PyErr_SetString(PyExc_ValueError, "no encoding specified");
 347         goto onError;
 348 #endif
 349     }
 350
 351     /* Decode via the codec registry */
 352     v = PyCodec_Decode(str, encoding, errors);
 353     if (v == NULL)
 354         goto onError;
 355
 356     return v;
 357
 358  onError:
 359     return NULL;
 360 }
 361
 362 PyObject *PyString_AsDecodedString(PyObject *str,
 363                                    const char *encoding,
 364                                    const char *errors)
 365 {
 366     PyObject *v;
 367
 368     v = PyString_AsDecodedObject(str, encoding, errors);
 369     if (v == NULL)
 370         goto onError;
 371
 372 #ifdef Py_USING_UNICODE
 373     /* Convert Unicode to a string using the default encoding */
 374     if (PyUnicode_Check(v)) {
 375         PyObject *temp = v;
 376         v = PyUnicode_AsEncodedString(v, NULL, NULL);
 377         Py_DECREF(temp);
 378         if (v == NULL)
 379             goto onError;
 380     }
 381 #endif
 382     if (!PyString_Check(v)) {
 383         PyErr_Format(PyExc_TypeError,
 384                      "decoder did not return a string object (type=%.400s)",
 385                      v->ob_type->tp_name);
 386         Py_DECREF(v);
 387         goto onError;
 388     }
 389
 390     return v;
 391
 392  onError:
 393     return NULL;
 394 }
 395
 396 PyObject *PyString_Encode(const char *s,
 397                           int size,
 398                           const char *encoding,
 399                           const char *errors)
 400 {
 401     PyObject *v, *str;
 402
 403     str = PyString_FromStringAndSize(s, size);
 404     if (str == NULL)
 405         return NULL;
 406     v = PyString_AsEncodedString(str, encoding, errors);
 407     Py_DECREF(str);
 408     return v;
 409 }
 410
 411 PyObject *PyString_AsEncodedObject(PyObject *str,
 412                                    const char *encoding,
 413                                    const char *errors)
 414 {
 415     PyObject *v;
 416
 417     if (!PyString_Check(str)) {
 418         PyErr_BadArgument();
 419         goto onError;
 420     }
 421
 422     if (encoding == NULL) {
 423 #ifdef Py_USING_UNICODE
 424         encoding = PyUnicode_GetDefaultEncoding();
 425 #else
 426         PyErr_SetString(PyExc_ValueError, "no encoding specified");
 427         goto onError;
 428 #endif
 429     }
 430
 431     /* Encode via the codec registry */
 432     v = PyCodec_Encode(str, encoding, errors);
 433     if (v == NULL)
 434         goto onError;
 435
 436     return v;
 437
 438  onError:
 439     return NULL;
 440 }
 441
 442 PyObject *PyString_AsEncodedString(PyObject *str,
 443                                    const char *encoding,
 444                                    const char *errors)
 445 {
 446     PyObject *v;
 447
 448     v = PyString_AsEncodedObject(str, encoding, errors);
 449     if (v == NULL)
 450         goto onError;
 451
 452 #ifdef Py_USING_UNICODE
 453     /* Convert Unicode to a string using the default encoding */
 454     if (PyUnicode_Check(v)) {
 455         PyObject *temp = v;
 456         v = PyUnicode_AsEncodedString(v, NULL, NULL);
 457         Py_DECREF(temp);
 458         if (v == NULL)
 459             goto onError;
 460     }
 461 #endif
 462     if (!PyString_Check(v)) {
 463         PyErr_Format(PyExc_TypeError,
 464                      "encoder did not return a string object (type=%.400s)",
 465                      v->ob_type->tp_name);
 466         Py_DECREF(v);
 467         goto onError;
 468     }
 469
 470     return v;
 471
 472  onError:
 473     return NULL;
 474 }
 475
 476 static void
 477 string_dealloc(PyObject *op)
 478 {
 479         PyObject_DEL(op);
 480 }
 481
 482 static int
 483 string_getsize(register PyObject *op)
 484 {
 485         char *s;
 486         int len;
 487         if (PyString_AsStringAndSize(op, &s, &len))
 488                 return -1;
 489         return len;
 490 }
 491
 492 static /*const*/ char *
 493 string_getbuffer(register PyObject *op)
 494 {
 495         char *s;
 496         int len;
 497         if (PyString_AsStringAndSize(op, &s, &len))
 498                 return NULL;
 499         return s;
 500 }
 501
 502 int
 503 PyString_Size(register PyObject *op)
 504 {
 505         if (!PyString_Check(op))
 506                 return string_getsize(op);
 507         return ((PyStringObject *)op) -> ob_size;
 508 }
 509
 510 /*const*/ char *
 511 PyString_AsString(register PyObject *op)
 512 {
 513         if (!PyString_Check(op))
 514                 return string_getbuffer(op);
 515         return ((PyStringObject *)op) -> ob_sval;
 516 }
 517
 518 int
 519 PyString_AsStringAndSize(register PyObject *obj,
 520                          register char **s,
 521                          register int *len)
 522 {
 523         if (s == NULL) {
 524                 PyErr_BadInternalCall();
 525                 return -1;
 526         }
 527
 528         if (!PyString_Check(obj)) {
 529 #ifdef Py_USING_UNICODE
 530                 if (PyUnicode_Check(obj)) {
 531                         obj = _PyUnicode_AsDefaultEncodedString(obj, NULL);
 532                         if (obj == NULL)
 533                                 return -1;
 534                 }
 535                 else
 536 #endif
 537                 {
 538                         PyErr_Format(PyExc_TypeError,
 539                                      "expected string or Unicode object, "
 540                                      "%.200s found", obj->ob_type->tp_name);
 541                         return -1;
 542                 }
 543         }
 544
 545         *s = PyString_AS_STRING(obj);
 546         if (len != NULL)
 547                 *len = PyString_GET_SIZE(obj);
 548         else if ((int)strlen(*s) != PyString_GET_SIZE(obj)) {
 549                 PyErr_SetString(PyExc_TypeError,
 550                                 "expected string without null bytes");
 551                 return -1;
 552         }
 553         return 0;
 554 }
 555
 556 /* Methods */
 557
 558 static int
 559 string_print(PyStringObject *op, FILE *fp, int flags)
 560 {
 561         int i;
 562         char c;
 563         int quote;
 564         /* XXX Ought to check for interrupts when writing long strings */
 565         if (flags & Py_PRINT_RAW) {
 566                 fwrite(op->ob_sval, 1, (int) op->ob_size, fp);
 567                 return 0;
 568         }
 569
 570         /* figure out which quote to use; single is preferred */
 571         quote = '\'';
 572         if (strchr(op->ob_sval, '\'') && !strchr(op->ob_sval, '"'))
 573                 quote = '"';
 574
 575         fputc(quote, fp);
 576         for (i = 0; i < op->ob_size; i++) {
 577                 c = op->ob_sval[i];
 578                 if (c == quote || c == '\\')
 579                         fprintf(fp, "\\%c", c);
 580                 else if (c == '\t')
 581                         fprintf(fp, "\\t");
 582                 else if (c == '\n')
 583                         fprintf(fp, "\\n");
 584                 else if (c == '\r')
 585                         fprintf(fp, "\\r");
 586                 else if (c < ' ' || c >= 0x7f)
 587                         fprintf(fp, "\\x%02x", c & 0xff);
 588                 else
 589                         fputc(c, fp);
 590         }
 591         fputc(quote, fp);
 592         return 0;
 593 }
 594
 595 static PyObject *
 596 string_repr(register PyStringObject *op)
 597 {
 598         size_t newsize = 2 + 4 * op->ob_size * sizeof(char);
 599         PyObject *v;
 600         if (newsize > INT_MAX) {
 601                 PyErr_SetString(PyExc_OverflowError,
 602                         "string is too large to make repr");
 603         }
 604         v = PyString_FromStringAndSize((char *)NULL, newsize);
 605         if (v == NULL) {
 606                 return NULL;
 607         }
 608         else {
 609                 register int i;
 610                 register char c;
 611                 register char *p;
 612                 int quote;
 613
 614                 /* figure out which quote to use; single is preferred */
 615                 quote = '\'';
 616                 if (strchr(op->ob_sval, '\'') && !strchr(op->ob_sval, '"'))
 617                         quote = '"';
 618
 619                 p = ((PyStringObject *)v)->ob_sval;
 620                 *p++ = quote;
 621                 for (i = 0; i < op->ob_size; i++) {
 622                         c = op->ob_sval[i];
 623                         if (c == quote || c == '\\')
 624                                 *p++ = '\\', *p++ = c;
 625                         else if (c == '\t')
 626                                 *p++ = '\\', *p++ = 't';
 627                         else if (c == '\n')
 628                                 *p++ = '\\', *p++ = 'n';
 629                         else if (c == '\r')
 630                                 *p++ = '\\', *p++ = 'r';
 631                         else if (c < ' ' || c >= 0x7f) {
 632                                 sprintf(p, "\\x%02x", c & 0xff);
 633                                 p += 4;
 634                         }
 635                         else
 636                                 *p++ = c;
 637                 }
 638                 *p++ = quote;
 639                 *p = '\0';
 640                 _PyString_Resize(
 641                         &v, (int) (p - ((PyStringObject *)v)->ob_sval));
 642                 return v;
 643         }
 644 }
 645
 646 static PyObject *
 647 string_str(PyObject *s)
 648 {
 649         Py_INCREF(s);
 650         return s;
 651 }
 652
 653 static int
 654 string_length(PyStringObject *a)
 655 {
 656         return a->ob_size;
 657 }
 658
 659 static PyObject *
 660 string_concat(register PyStringObject *a, register PyObject *bb)
 661 {
 662         register unsigned int size;
 663         register PyStringObject *op;
 664         if (!PyString_Check(bb)) {
 665 #ifdef Py_USING_UNICODE
 666                 if (PyUnicode_Check(bb))
 667                     return PyUnicode_Concat((PyObject *)a, bb);
 668 #endif
 669                 PyErr_Format(PyExc_TypeError,
 670                              "cannot add type \"%.200s\" to string",
 671                              bb->ob_type->tp_name);
 672                 return NULL;
 673         }
 674 #define b ((PyStringObject *)bb)
 675         /* Optimize cases with empty left or right operand */
 676         if ((a->ob_size == 0 || b->ob_size == 0) &&
 677             PyString_CheckExact(a) && PyString_CheckExact(b)) {
 678                 if (a->ob_size == 0) {
 679                         Py_INCREF(bb);
 680                         return bb;
 681                 }
 682                 Py_INCREF(a);
 683                 return (PyObject *)a;
 684         }
 685         size = a->ob_size + b->ob_size;
 686         /* PyObject_NewVar is inlined */
 687         op = (PyStringObject *)
 688                 PyObject_MALLOC(sizeof(PyStringObject) + size * sizeof(char));
 689         if (op == NULL)
 690                 return PyErr_NoMemory();
 691         PyObject_INIT_VAR(op, &PyString_Type, size);
 692 #ifdef CACHE_HASH
 693         op->ob_shash = -1;
 694 #endif
 695 #ifdef INTERN_STRINGS
 696         op->ob_sinterned = NULL;
 697 #endif
 698         memcpy(op->ob_sval, a->ob_sval, (int) a->ob_size);
 699         memcpy(op->ob_sval + a->ob_size, b->ob_sval, (int) b->ob_size);
 700         op->ob_sval[size] = '\0';
 701         return (PyObject *) op;
 702 #undef b
 703 }
 704
 705 static PyObject *
 706 string_repeat(register PyStringObject *a, register int n)
 707 {
 708         register int i;
 709         register int size;
 710         register PyStringObject *op;
 711         size_t nbytes;
 712         if (n < 0)
 713                 n = 0;
 714         /* watch out for overflows:  the size can overflow int,
 715          * and the # of bytes needed can overflow size_t
 716          */
 717         size = a->ob_size * n;
 718         if (n && size / n != a->ob_size) {
 719                 PyErr_SetString(PyExc_OverflowError,
 720                         "repeated string is too long");
 721                 return NULL;
 722         }
 723         if (size == a->ob_size && PyString_CheckExact(a)) {
 724                 Py_INCREF(a);
 725                 return (PyObject *)a;
 726         }
 727         nbytes = size * sizeof(char);
 728         if (nbytes / sizeof(char) != (size_t)size ||
 729             nbytes + sizeof(PyStringObject) <= nbytes) {
 730                 PyErr_SetString(PyExc_OverflowError,
 731                         "repeated string is too long");
 732                 return NULL;
 733         }
 734         op = (PyStringObject *)
 735                 PyObject_MALLOC(sizeof(PyStringObject) + nbytes);
 736         if (op == NULL)
 737                 return PyErr_NoMemory();
 738         PyObject_INIT_VAR(op, &PyString_Type, size);
 739 #ifdef CACHE_HASH
 740         op->ob_shash = -1;
 741 #endif
 742 #ifdef INTERN_STRINGS
 743         op->ob_sinterned = NULL;
 744 #endif
 745         for (i = 0; i < size; i += a->ob_size)
 746                 memcpy(op->ob_sval+i, a->ob_sval, (int) a->ob_size);
 747         op->ob_sval[size] = '\0';
 748         return (PyObject *) op;
 749 }
 750
 751 /* String slice a[i:j] consists of characters a[i] ... a[j-1] */
 752
 753 static PyObject *
 754 string_slice(register PyStringObject *a, register int i, register int j)
 755      /* j -- may be negative! */
 756 {
 757         if (i < 0)
 758                 i = 0;
 759         if (j < 0)
 760                 j = 0; /* Avoid signed/unsigned bug in next line */
 761         if (j > a->ob_size)
 762                 j = a->ob_size;
 763         if (i == 0 && j == a->ob_size && PyString_CheckExact(a)) {
 764                 /* It's the same as a */
 765                 Py_INCREF(a);
 766                 return (PyObject *)a;
 767         }
 768         if (j < i)
 769                 j = i;
 770         return PyString_FromStringAndSize(a->ob_sval + i, (int) (j-i));
 771 }
 772
 773 static int
 774 string_contains(PyObject *a, PyObject *el)
 775 {
 776         register char *s, *end;
 777         register char c;
 778 #ifdef Py_USING_UNICODE
 779         if (PyUnicode_Check(el))
 780                 return PyUnicode_Contains(a, el);
 781 #endif
 782         if (!PyString_Check(el) || PyString_Size(el) != 1) {
 783                 PyErr_SetString(PyExc_TypeError,
 784                     "'in <string>' requires character as left operand");
 785                 return -1;
 786         }
 787         c = PyString_AsString(el)[0];
 788         s = PyString_AsString(a);
 789         end = s + PyString_Size(a);
 790         while (s < end) {
 791                 if (c == *s++)
 792                         return 1;
 793         }
 794         return 0;
 795 }
 796
 797 static PyObject *
 798 string_item(PyStringObject *a, register int i)
 799 {
 800         PyObject *v;
 801         char *pchar;
 802         if (i < 0 || i >= a->ob_size) {
 803                 PyErr_SetString(PyExc_IndexError, "string index out of range");
 804                 return NULL;
 805         }
 806         pchar = a->ob_sval + i;
 807         v = (PyObject *)characters[*pchar & UCHAR_MAX];
 808         if (v == NULL)
 809                 v = PyString_FromStringAndSize(pchar, 1);
 810         else {
 811 #ifdef COUNT_ALLOCS
 812                 one_strings++;
 813 #endif
 814                 Py_INCREF(v);
 815         }
 816         return v;
 817 }
 818
 819 static PyObject*
 820 string_richcompare(PyStringObject *a, PyStringObject *b, int op)
 821 {
 822         int c;
 823         int len_a, len_b;
 824         int min_len;
 825         PyObject *result;
 826
 827         /* May sure both arguments use string comparison.
 828            This implies PyString_Check(a) && PyString_Check(b). */
 829         if (a->ob_type->tp_richcompare != (richcmpfunc)string_richcompare ||
 830             b->ob_type->tp_richcompare != (richcmpfunc)string_richcompare) {
 831                 result = Py_NotImplemented;
 832                 goto out;
 833         }
 834         if (a == b) {
 835                 switch (op) {
 836                 case Py_EQ:case Py_LE:case Py_GE:
 837                         result = Py_True;
 838                         goto out;
 839                 case Py_NE:case Py_LT:case Py_GT:
 840                         result = Py_False;
 841                         goto out;
 842                 }
 843         }
 844         if (op == Py_EQ) {
 845                 /* Supporting Py_NE here as well does not save
 846                    much time, since Py_NE is rarely used.  */
 847                 if (a->ob_size == b->ob_size
 848                     && (a->ob_sval[0] == b->ob_sval[0]
 849                         && memcmp(a->ob_sval, b->ob_sval,
 850                                   a->ob_size) == 0)) {
 851                         result = Py_True;
 852                 } else {
 853                         result = Py_False;
 854                 }
 855                 goto out;
 856         }
 857         len_a = a->ob_size; len_b = b->ob_size;
 858         min_len = (len_a < len_b) ? len_a : len_b;
 859         if (min_len > 0) {
 860                 c = Py_CHARMASK(*a->ob_sval) - Py_CHARMASK(*b->ob_sval);
 861                 if (c==0)
 862                         c = memcmp(a->ob_sval, b->ob_sval, min_len);
 863         }else
 864                 c = 0;
 865         if (c == 0)
 866                 c = (len_a < len_b) ? -1 : (len_a > len_b) ? 1 : 0;
 867         switch (op) {
 868         case Py_LT: c = c <  0; break;
 869         case Py_LE: c = c <= 0; break;
 870         case Py_EQ: assert(0);  break; /* unreachable */
 871         case Py_NE: c = c != 0; break;
 872         case Py_GT: c = c >  0; break;
 873         case Py_GE: c = c >= 0; break;
 874         default:
 875                 result = Py_NotImplemented;
 876                 goto out;
 877         }
 878         result = c ? Py_True : Py_False;
 879   out:
 880         Py_INCREF(result);
 881         return result;
 882 }
 883
 884 int
 885 _PyString_Eq(PyObject *o1, PyObject *o2)
 886 {
 887         PyStringObject *a, *b;
 888         a = (PyStringObject*)o1;
 889         b = (PyStringObject*)o2;
 890         return a->ob_size == b->ob_size
 891           && *a->ob_sval == *b->ob_sval
 892           && memcmp(a->ob_sval, b->ob_sval, a->ob_size) == 0;
 893 }
 894
 895 static long
 896 string_hash(PyStringObject *a)
 897 {
 898         register int len;
 899         register unsigned char *p;
 900         register long x;
 901
 902 #ifdef CACHE_HASH
 903         if (a->ob_shash != -1)
 904                 return a->ob_shash;
 905 #ifdef INTERN_STRINGS
 906         if (a->ob_sinterned != NULL)
 907                 return (a->ob_shash =
 908                         ((PyStringObject *)(a->ob_sinterned))->ob_shash);
 909 #endif
 910 #endif
 911         len = a->ob_size;
 912         p = (unsigned char *) a->ob_sval;
 913         x = *p << 7;
 914         while (--len >= 0)
 915                 x = (1000003*x) ^ *p++;
 916         x ^= a->ob_size;
 917         if (x == -1)
 918                 x = -2;
 919 #ifdef CACHE_HASH
 920         a->ob_shash = x;
 921 #endif
 922         return x;
 923 }
 924
 925 static int
 926 string_buffer_getreadbuf(PyStringObject *self, int index, const void **ptr)
 927 {
 928         if ( index != 0 ) {
 929                 PyErr_SetString(PyExc_SystemError,
 930                                 "accessing non-existent string segment");
 931                 return -1;
 932         }
 933         *ptr = (void *)self->ob_sval;
 934         return self->ob_size;
 935 }
 936
 937 static int
 938 string_buffer_getwritebuf(PyStringObject *self, int index, const void **ptr)
 939 {
 940         PyErr_SetString(PyExc_TypeError,
 941                         "Cannot use string as modifiable buffer");
 942         return -1;
 943 }
 944
 945 static int
 946 string_buffer_getsegcount(PyStringObject *self, int *lenp)
 947 {
 948         if ( lenp )
 949                 *lenp = self->ob_size;
 950         return 1;
 951 }
 952
 953 static int
 954 string_buffer_getcharbuf(PyStringObject *self, int index, const char **ptr)
 955 {
 956         if ( index != 0 ) {
 957                 PyErr_SetString(PyExc_SystemError,
 958                                 "accessing non-existent string segment");
 959                 return -1;
 960         }
 961         *ptr = self->ob_sval;
 962         return self->ob_size;
 963 }
 964
 965 static PySequenceMethods string_as_sequence = {
 966         (inquiry)string_length, /*sq_length*/
 967         (binaryfunc)string_concat, /*sq_concat*/
 968         (intargfunc)string_repeat, /*sq_repeat*/
 969         (intargfunc)string_item, /*sq_item*/
 970         (intintargfunc)string_slice, /*sq_slice*/
 971         0,              /*sq_ass_item*/
 972         0,              /*sq_ass_slice*/
 973         (objobjproc)string_contains /*sq_contains*/
 974 };
 975
 976 static PyBufferProcs string_as_buffer = {
 977         (getreadbufferproc)string_buffer_getreadbuf,
 978         (getwritebufferproc)string_buffer_getwritebuf,
 979         (getsegcountproc)string_buffer_getsegcount,
 980         (getcharbufferproc)string_buffer_getcharbuf,
 981 };
 982
 983
 984 \f
 985 #define LEFTSTRIP 0
 986 #define RIGHTSTRIP 1
 987 #define BOTHSTRIP 2
 988
 989
 990 static PyObject *
 991 split_whitespace(const char *s, int len, int maxsplit)
 992 {
 993         int i, j, err;
 994         PyObject* item;
 995         PyObject *list = PyList_New(0);
 996
 997         if (list == NULL)
 998                 return NULL;
 999
1000         for (i = j = 0; i < len; ) {
1001                 while (i < len && isspace(Py_CHARMASK(s[i])))
1002                         i++;
1003                 j = i;
1004                 while (i < len && !isspace(Py_CHARMASK(s[i])))
1005                         i++;
1006                 if (j < i) {
1007                         if (maxsplit-- <= 0)
1008                                 break;
1009                         item = PyString_FromStringAndSize(s+j, (int)(i-j));
1010                         if (item == NULL)
1011                                 goto finally;
1012                         err = PyList_Append(list, item);
1013                         Py_DECREF(item);
1014                         if (err < 0)
1015                                 goto finally;
1016                         while (i < len && isspace(Py_CHARMASK(s[i])))
1017                                 i++;
1018                         j = i;
1019                 }
1020         }
1021         if (j < len) {
1022                 item = PyString_FromStringAndSize(s+j, (int)(len - j));
1023                 if (item == NULL)
1024                         goto finally;
1025                 err = PyList_Append(list, item);
1026                 Py_DECREF(item);
1027                 if (err < 0)
1028                         goto finally;
1029         }
1030         return list;
1031   finally:
1032         Py_DECREF(list);
1033         return NULL;
1034 }
1035
1036
1037 static char split__doc__[] =
1038 "S.split([sep [,maxsplit]]) -> list of strings\n\
1039 \n\
1040 Return a list of the words in the string S, using sep as the\n\
1041 delimiter string.  If maxsplit is given, at most maxsplit\n\
1042 splits are done. If sep is not specified, any whitespace string\n\
1043 is a separator.";
1044
1045 static PyObject *
1046 string_split(PyStringObject *self, PyObject *args)
1047 {
1048         int len = PyString_GET_SIZE(self), n, i, j, err;
1049         int maxsplit = -1;
1050         const char *s = PyString_AS_STRING(self), *sub;
1051         PyObject *list, *item, *subobj = Py_None;
1052
1053         if (!PyArg_ParseTuple(args, "|Oi:split", &subobj, &maxsplit))
1054                 return NULL;
1055         if (maxsplit < 0)
1056                 maxsplit = INT_MAX;
1057         if (subobj == Py_None)
1058                 return split_whitespace(s, len, maxsplit);
1059         if (PyString_Check(subobj)) {
1060                 sub = PyString_AS_STRING(subobj);
1061                 n = PyString_GET_SIZE(subobj);
1062         }
1063 #ifdef Py_USING_UNICODE
1064         else if (PyUnicode_Check(subobj))
1065                 return PyUnicode_Split((PyObject *)self, subobj, maxsplit);
1066 #endif
1067         else if (PyObject_AsCharBuffer(subobj, &sub, &n))
1068                 return NULL;
1069         if (n == 0) {
1070                 PyErr_SetString(PyExc_ValueError, "empty separator");
1071                 return NULL;
1072         }
1073
1074         list = PyList_New(0);
1075         if (list == NULL)
1076                 return NULL;
1077
1078         i = j = 0;
1079         while (i+n <= len) {
1080                 if (s[i] == sub[0] && memcmp(s+i, sub, n) == 0) {
1081                         if (maxsplit-- <= 0)
1082                                 break;
1083                         item = PyString_FromStringAndSize(s+j, (int)(i-j));
1084                         if (item == NULL)
1085                                 goto fail;
1086                         err = PyList_Append(list, item);
1087                         Py_DECREF(item);
1088                         if (err < 0)
1089                                 goto fail;
1090                         i = j = i + n;
1091                 }
1092                 else
1093                         i++;
1094         }
1095         item = PyString_FromStringAndSize(s+j, (int)(len-j));
1096         if (item == NULL)
1097                 goto fail;
1098         err = PyList_Append(list, item);
1099         Py_DECREF(item);
1100         if (err < 0)
1101                 goto fail;
1102
1103         return list;
1104
1105  fail:
1106         Py_DECREF(list);
1107         return NULL;
1108 }
1109
1110
1111 static char join__doc__[] =
1112 "S.join(sequence) -> string\n\
1113 \n\
1114 Return a string which is the concatenation of the strings in the\n\
1115 sequence.  The separator between elements is S.";
1116
1117 static PyObject *
1118 string_join(PyStringObject *self, PyObject *orig)
1119 {
1120         char *sep = PyString_AS_STRING(self);
1121         const int seplen = PyString_GET_SIZE(self);
1122         PyObject *res = NULL;
1123         char *p;
1124         int seqlen = 0;
1125         size_t sz = 0;
1126         int i;
1127         PyObject *seq, *item;
1128
1129         seq = PySequence_Fast(orig, "");
1130         if (seq == NULL) {
1131                 if (PyErr_ExceptionMatches(PyExc_TypeError))
1132                         PyErr_Format(PyExc_TypeError,
1133                                      "sequence expected, %.80s found",
1134                                      orig->ob_type->tp_name);
1135                 return NULL;
1136         }
1137
1138         seqlen = PySequence_Size(seq);
1139         if (seqlen == 0) {
1140                 Py_DECREF(seq);
1141                 return PyString_FromString("");
1142         }
1143         if (seqlen == 1) {
1144                 item = PySequence_Fast_GET_ITEM(seq, 0);
1145                 if (!PyString_Check(item) && !PyUnicode_Check(item)) {
1146                         PyErr_Format(PyExc_TypeError,
1147                                      "sequence item 0: expected string,"
1148                                      " %.80s found",
1149                                      item->ob_type->tp_name);
1150                         Py_DECREF(seq);
1151                         return NULL;
1152                 }
1153                 Py_INCREF(item);
1154                 Py_DECREF(seq);
1155                 return item;
1156         }
1157
1158         /* There are at least two things to join.  Do a pre-pass to figure out
1159          * the total amount of space we'll need (sz), see whether any argument
1160          * is absurd, and defer to the Unicode join if appropriate.
1161          */
1162         for (i = 0; i < seqlen; i++) {
1163                 const size_t old_sz = sz;
1164                 item = PySequence_Fast_GET_ITEM(seq, i);
1165                 if (!PyString_Check(item)){
1166 #ifdef Py_USING_UNICODE
1167                         if (PyUnicode_Check(item)) {
1168                                 /* Defer to Unicode join.
1169                                  * CAUTION:  There's no gurantee that the
1170                                  * original sequence can be iterated over
1171                                  * again, so we must pass seq here.
1172                                  */
1173                                 PyObject *result;
1174                                 result = PyUnicode_Join((PyObject *)self, seq);
1175                                 Py_DECREF(seq);
1176                                 return result;
1177                         }
1178 #endif
1179                         PyErr_Format(PyExc_TypeError,
1180                                      "sequence item %i: expected string,"
1181                                      " %.80s found",
1182                                      i, item->ob_type->tp_name);
1183                         Py_DECREF(seq);
1184                         return NULL;
1185                 }
1186                 sz += PyString_GET_SIZE(item);
1187                 if (i != 0)
1188                         sz += seplen;
1189                 if (sz < old_sz || sz > INT_MAX) {
1190                         PyErr_SetString(PyExc_OverflowError,
1191                                 "join() is too long for a Python string");
1192                         Py_DECREF(seq);
1193                         return NULL;
1194                 }
1195         }
1196
1197         /* Allocate result space. */
1198         res = PyString_FromStringAndSize((char*)NULL, (int)sz);
1199         if (res == NULL) {
1200                 Py_DECREF(seq);
1201                 return NULL;
1202         }
1203
1204         /* Catenate everything. */
1205         p = PyString_AS_STRING(res);
1206         for (i = 0; i < seqlen; ++i) {
1207                 size_t n;
1208                 item = PySequence_Fast_GET_ITEM(seq, i);
1209                 n = PyString_GET_SIZE(item);
1210                 memcpy(p, PyString_AS_STRING(item), n);
1211                 p += n;
1212                 if (i < seqlen - 1) {
1213                         memcpy(p, sep, seplen);
1214                         p += seplen;
1215                 }
1216         }
1217
1218         Py_DECREF(seq);
1219         return res;
1220 }
1221
1222 PyObject *
1223 _PyString_Join(PyObject *sep, PyObject *x)
1224 {
1225         assert(sep != NULL && PyString_Check(sep));
1226         assert(x != NULL);
1227         return string_join((PyStringObject *)sep, x);
1228 }
1229
1230 static long
1231 string_find_internal(PyStringObject *self, PyObject *args, int dir)
1232 {
1233         const char *s = PyString_AS_STRING(self), *sub;
1234         int len = PyString_GET_SIZE(self);
1235         int n, i = 0, last = INT_MAX;
1236         PyObject *subobj;
1237
1238         if (!PyArg_ParseTuple(args, "O|O&O&:find/rfind/index/rindex",
1239                 &subobj, _PyEval_SliceIndex, &i, _PyEval_SliceIndex, &last))
1240                 return -2;
1241         if (PyString_Check(subobj)) {
1242                 sub = PyString_AS_STRING(subobj);
1243                 n = PyString_GET_SIZE(subobj);
1244         }
1245 #ifdef Py_USING_UNICODE
1246         else if (PyUnicode_Check(subobj))
1247                 return PyUnicode_Find((PyObject *)self, subobj, i, last, 1);
1248 #endif
1249         else if (PyObject_AsCharBuffer(subobj, &sub, &n))
1250                 return -2;
1251
1252         if (last > len)
1253                 last = len;
1254         if (last < 0)
1255                 last += len;
1256         if (last < 0)
1257                 last = 0;
1258         if (i < 0)
1259                 i += len;
1260         if (i < 0)
1261                 i = 0;
1262
1263         if (dir > 0) {
1264                 if (n == 0 && i <= last)
1265                         return (long)i;
1266                 last -= n;
1267                 for (; i <= last; ++i)
1268                         if (s[i] == sub[0] && memcmp(&s[i], sub, n) == 0)
1269                                 return (long)i;
1270         }
1271         else {
1272                 int j;
1273
1274                 if (n == 0 && i <= last)
1275                         return (long)last;
1276                 for (j = last-n; j >= i; --j)
1277                         if (s[j] == sub[0] && memcmp(&s[j], sub, n) == 0)
1278                                 return (long)j;
1279         }
1280
1281         return -1;
1282 }
1283
1284
1285 static char find__doc__[] =
1286 "S.find(sub [,start [,end]]) -> int\n\
1287 \n\
1288 Return the lowest index in S where substring sub is found,\n\
1289 such that sub is contained within s[start,end].  Optional\n\
1290 arguments start and end are interpreted as in slice notation.\n\
1291 \n\
1292 Return -1 on failure.";
1293
1294 static PyObject *
1295 string_find(PyStringObject *self, PyObject *args)
1296 {
1297         long result = string_find_internal(self, args, +1);
1298         if (result == -2)
1299                 return NULL;
1300         return PyInt_FromLong(result);
1301 }
1302
1303
1304 static char index__doc__[] =
1305 "S.index(sub [,start [,end]]) -> int\n\
1306 \n\
1307 Like S.find() but raise ValueError when the substring is not found.";
1308
1309 static PyObject *
1310 string_index(PyStringObject *self, PyObject *args)
1311 {
1312         long result = string_find_internal(self, args, +1);
1313         if (result == -2)
1314                 return NULL;
1315         if (result == -1) {
1316                 PyErr_SetString(PyExc_ValueError,
1317                                 "substring not found in string.index");
1318                 return NULL;
1319         }
1320         return PyInt_FromLong(result);
1321 }
1322
1323
1324 static char rfind__doc__[] =
1325 "S.rfind(sub [,start [,end]]) -> int\n\
1326 \n\
1327 Return the highest index in S where substring sub is found,\n\
1328 such that sub is contained within s[start,end].  Optional\n\
1329 arguments start and end are interpreted as in slice notation.\n\
1330 \n\
1331 Return -1 on failure.";
1332
1333 static PyObject *
1334 string_rfind(PyStringObject *self, PyObject *args)
1335 {
1336         long result = string_find_internal(self, args, -1);
1337         if (result == -2)
1338                 return NULL;
1339         return PyInt_FromLong(result);
1340 }
1341
1342
1343 static char rindex__doc__[] =
1344 "S.rindex(sub [,start [,end]]) -> int\n\
1345 \n\
1346 Like S.rfind() but raise ValueError when the substring is not found.";
1347
1348 static PyObject *
1349 string_rindex(PyStringObject *self, PyObject *args)
1350 {
1351         long result = string_find_internal(self, args, -1);
1352         if (result == -2)
1353                 return NULL;
1354         if (result == -1) {
1355                 PyErr_SetString(PyExc_ValueError,
1356                                 "substring not found in string.rindex");
1357                 return NULL;
1358         }
1359         return PyInt_FromLong(result);
1360 }
1361
1362
1363 static PyObject *
1364 do_strip(PyStringObject *self, int striptype)
1365 {
1366         char *s = PyString_AS_STRING(self);
1367         int len = PyString_GET_SIZE(self), i, j;
1368
1369         i = 0;
1370         if (striptype != RIGHTSTRIP) {
1371                 while (i < len && isspace(Py_CHARMASK(s[i]))) {
1372                         i++;
1373                 }
1374         }
1375
1376         j = len;
1377         if (striptype != LEFTSTRIP) {
1378                 do {
1379                         j--;
1380                 } while (j >= i && isspace(Py_CHARMASK(s[j])));
1381                 j++;
1382         }
1383
1384         if (i == 0 && j == len && PyString_CheckExact(self)) {
1385                 Py_INCREF(self);
1386                 return (PyObject*)self;
1387         }
1388         else
1389                 return PyString_FromStringAndSize(s+i, j-i);
1390 }
1391
1392
1393 static char strip__doc__[] =
1394 "S.strip() -> string\n\
1395 \n\
1396 Return a copy of the string S with leading and trailing\n\
1397 whitespace removed.";
1398
1399 static PyObject *
1400 string_strip(PyStringObject *self)
1401 {
1402         return do_strip(self, BOTHSTRIP);
1403 }
1404
1405
1406 static char lstrip__doc__[] =
1407 "S.lstrip() -> string\n\
1408 \n\
1409 Return a copy of the string S with leading whitespace removed.";
1410
1411 static PyObject *
1412 string_lstrip(PyStringObject *self)
1413 {
1414         return do_strip(self, LEFTSTRIP);
1415 }
1416
1417
1418 static char rstrip__doc__[] =
1419 "S.rstrip() -> string\n\
1420 \n\
1421 Return a copy of the string S with trailing whitespace removed.";
1422
1423 static PyObject *
1424 string_rstrip(PyStringObject *self)
1425 {
1426         return do_strip(self, RIGHTSTRIP);
1427 }
1428
1429
1430 static char lower__doc__[] =
1431 "S.lower() -> string\n\
1432 \n\
1433 Return a copy of the string S converted to lowercase.";
1434
1435 static PyObject *
1436 string_lower(PyStringObject *self)
1437 {
1438         char *s = PyString_AS_STRING(self), *s_new;
1439         int i, n = PyString_GET_SIZE(self);
1440         PyObject *new;
1441
1442         new = PyString_FromStringAndSize(NULL, n);
1443         if (new == NULL)
1444                 return NULL;
1445         s_new = PyString_AsString(new);
1446         for (i = 0; i < n; i++) {
1447                 int c = Py_CHARMASK(*s++);
1448                 if (isupper(c)) {
1449                         *s_new = tolower(c);
1450                 } else
1451                         *s_new = c;
1452                 s_new++;
1453         }
1454         return new;
1455 }
1456
1457
1458 static char upper__doc__[] =
1459 "S.upper() -> string\n\
1460 \n\
1461 Return a copy of the string S converted to uppercase.";
1462
1463 static PyObject *
1464 string_upper(PyStringObject *self)
1465 {
1466         char *s = PyString_AS_STRING(self), *s_new;
1467         int i, n = PyString_GET_SIZE(self);
1468         PyObject *new;
1469
1470         new = PyString_FromStringAndSize(NULL, n);
1471         if (new == NULL)
1472                 return NULL;
1473         s_new = PyString_AsString(new);
1474         for (i = 0; i < n; i++) {
1475                 int c = Py_CHARMASK(*s++);
1476                 if (islower(c)) {
1477                         *s_new = toupper(c);
1478                 } else
1479                         *s_new = c;
1480                 s_new++;
1481         }
1482         return new;
1483 }
1484
1485
1486 static char title__doc__[] =
1487 "S.title() -> string\n\
1488 \n\
1489 Return a titlecased version of S, i.e. words start with uppercase\n\
1490 characters, all remaining cased characters have lowercase.";
1491
1492 static PyObject*
1493 string_title(PyStringObject *self)
1494 {
1495         char *s = PyString_AS_STRING(self), *s_new;
1496         int i, n = PyString_GET_SIZE(self);
1497         int previous_is_cased = 0;
1498         PyObject *new;
1499
1500         new = PyString_FromStringAndSize(NULL, n);
1501         if (new == NULL)
1502                 return NULL;
1503         s_new = PyString_AsString(new);
1504         for (i = 0; i < n; i++) {
1505                 int c = Py_CHARMASK(*s++);
1506                 if (islower(c)) {
1507                         if (!previous_is_cased)
1508                             c = toupper(c);
1509                         previous_is_cased = 1;
1510                 } else if (isupper(c)) {
1511                         if (previous_is_cased)
1512                             c = tolower(c);
1513                         previous_is_cased = 1;
1514                 } else
1515                         previous_is_cased = 0;
1516                 *s_new++ = c;
1517         }
1518         return new;
1519 }
1520
1521 static char capitalize__doc__[] =
1522 "S.capitalize() -> string\n\
1523 \n\
1524 Return a copy of the string S with only its first character\n\
1525 capitalized.";
1526
1527 static PyObject *
1528 string_capitalize(PyStringObject *self)
1529 {
1530         char *s = PyString_AS_STRING(self), *s_new;
1531         int i, n = PyString_GET_SIZE(self);
1532         PyObject *new;
1533
1534         new = PyString_FromStringAndSize(NULL, n);
1535         if (new == NULL)
1536                 return NULL;
1537         s_new = PyString_AsString(new);
1538         if (0 < n) {
1539                 int c = Py_CHARMASK(*s++);
1540                 if (islower(c))
1541                         *s_new = toupper(c);
1542                 else
1543                         *s_new = c;
1544                 s_new++;
1545         }
1546         for (i = 1; i < n; i++) {
1547                 int c = Py_CHARMASK(*s++);
1548                 if (isupper(c))
1549                         *s_new = tolower(c);
1550                 else
1551                         *s_new = c;
1552                 s_new++;
1553         }
1554         return new;
1555 }
1556
1557
1558 static char count__doc__[] =
1559 "S.count(sub[, start[, end]]) -> int\n\
1560 \n\
1561 Return the number of occurrences of substring sub in string\n\
1562 S[start:end].  Optional arguments start and end are\n\
1563 interpreted as in slice notation.";
1564
1565 static PyObject *
1566 string_count(PyStringObject *self, PyObject *args)
1567 {
1568         const char *s = PyString_AS_STRING(self), *sub;
1569         int len = PyString_GET_SIZE(self), n;
1570         int i = 0, last = INT_MAX;
1571         int m, r;
1572         PyObject *subobj;
1573
1574         if (!PyArg_ParseTuple(args, "O|O&O&:count", &subobj,
1575                 _PyEval_SliceIndex, &i, _PyEval_SliceIndex, &last))
1576                 return NULL;
1577
1578         if (PyString_Check(subobj)) {
1579                 sub = PyString_AS_STRING(subobj);
1580                 n = PyString_GET_SIZE(subobj);
1581         }
1582 #ifdef Py_USING_UNICODE
1583         else if (PyUnicode_Check(subobj)) {
1584                 int count;
1585                 count = PyUnicode_Count((PyObject *)self, subobj, i, last);
1586                 if (count == -1)
1587                         return NULL;
1588                 else
1589                         return PyInt_FromLong((long) count);
1590         }
1591 #endif
1592         else if (PyObject_AsCharBuffer(subobj, &sub, &n))
1593                 return NULL;
1594
1595         if (last > len)
1596                 last = len;
1597         if (last < 0)
1598                 last += len;
1599         if (last < 0)
1600                 last = 0;
1601         if (i < 0)
1602                 i += len;
1603         if (i < 0)
1604                 i = 0;
1605         m = last + 1 - n;
1606         if (n == 0)
1607                 return PyInt_FromLong((long) (m-i));
1608
1609         r = 0;
1610         while (i < m) {
1611                 if (!memcmp(s+i, sub, n)) {
1612                         r++;
1613                         i += n;
1614                 } else {
1615                         i++;
1616                 }
1617         }
1618         return PyInt_FromLong((long) r);
1619 }
1620
1621
1622 static char swapcase__doc__[] =
1623 "S.swapcase() -> string\n\
1624 \n\
1625 Return a copy of the string S with uppercase characters\n\
1626 converted to lowercase and vice versa.";
1627
1628 static PyObject *
1629 string_swapcase(PyStringObject *self)
1630 {
1631         char *s = PyString_AS_STRING(self), *s_new;
1632         int i, n = PyString_GET_SIZE(self);
1633         PyObject *new;
1634
1635         new = PyString_FromStringAndSize(NULL, n);
1636         if (new == NULL)
1637                 return NULL;
1638         s_new = PyString_AsString(new);
1639         for (i = 0; i < n; i++) {
1640                 int c = Py_CHARMASK(*s++);
1641                 if (islower(c)) {
1642                         *s_new = toupper(c);
1643                 }
1644                 else if (isupper(c)) {
1645                         *s_new = tolower(c);
1646                 }
1647                 else
1648                         *s_new = c;
1649                 s_new++;
1650         }
1651         return new;
1652 }
1653
1654
1655 static char translate__doc__[] =
1656 "S.translate(table [,deletechars]) -> string\n\
1657 \n\
1658 Return a copy of the string S, where all characters occurring\n\
1659 in the optional argument deletechars are removed, and the\n\
1660 remaining characters have been mapped through the given\n\
1661 translation table, which must be a string of length 256.";
1662
1663 static PyObject *
1664 string_translate(PyStringObject *self, PyObject *args)
1665 {
1666         register char *input, *output;
1667         register const char *table;
1668         register int i, c, changed = 0;
1669         PyObject *input_obj = (PyObject*)self;
1670         const char *table1, *output_start, *del_table=NULL;
1671         int inlen, tablen, dellen = 0;
1672         PyObject *result;
1673         int trans_table[256];
1674         PyObject *tableobj, *delobj = NULL;
1675
1676         if (!PyArg_ParseTuple(args, "O|O:translate",
1677                               &tableobj, &delobj))
1678                 return NULL;
1679
1680         if (PyString_Check(tableobj)) {
1681                 table1 = PyString_AS_STRING(tableobj);
1682                 tablen = PyString_GET_SIZE(tableobj);
1683         }
1684 #ifdef Py_USING_UNICODE
1685         else if (PyUnicode_Check(tableobj)) {
1686                 /* Unicode .translate() does not support the deletechars
1687                    parameter; instead a mapping to None will cause characters
1688                    to be deleted. */
1689                 if (delobj != NULL) {
1690                         PyErr_SetString(PyExc_TypeError,
1691                         "deletions are implemented differently for unicode");
1692                         return NULL;
1693                 }
1694                 return PyUnicode_Translate((PyObject *)self, tableobj, NULL);
1695         }
1696 #endif
1697         else if (PyObject_AsCharBuffer(tableobj, &table1, &tablen))
1698                 return NULL;
1699
1700         if (delobj != NULL) {
1701                 if (PyString_Check(delobj)) {
1702                         del_table = PyString_AS_STRING(delobj);
1703                         dellen = PyString_GET_SIZE(delobj);
1704                 }
1705 #ifdef Py_USING_UNICODE
1706                 else if (PyUnicode_Check(delobj)) {
1707                         PyErr_SetString(PyExc_TypeError,
1708                         "deletions are implemented differently for unicode");
1709                         return NULL;
1710                 }
1711 #endif
1712                 else if (PyObject_AsCharBuffer(delobj, &del_table, &dellen))
1713                         return NULL;
1714
1715                 if (tablen != 256) {
1716                         PyErr_SetString(PyExc_ValueError,
1717                           "translation table must be 256 characters long");
1718                         return NULL;
1719                 }
1720         }
1721         else {
1722                 del_table = NULL;
1723                 dellen = 0;
1724         }
1725
1726         table = table1;
1727         inlen = PyString_Size(input_obj);
1728         result = PyString_FromStringAndSize((char *)NULL, inlen);
1729         if (result == NULL)
1730                 return NULL;
1731         output_start = output = PyString_AsString(result);
1732         input = PyString_AsString(input_obj);
1733
1734         if (dellen == 0) {
1735                 /* If no deletions are required, use faster code */
1736                 for (i = inlen; --i >= 0; ) {
1737                         c = Py_CHARMASK(*input++);
1738                         if (Py_CHARMASK((*output++ = table[c])) != c)
1739                                 changed = 1;
1740                 }
1741                 if (changed || !PyString_CheckExact(input_obj))
1742                         return result;
1743                 Py_DECREF(result);
1744                 Py_INCREF(input_obj);
1745                 return input_obj;
1746         }
1747
1748         for (i = 0; i < 256; i++)
1749                 trans_table[i] = Py_CHARMASK(table[i]);
1750
1751         for (i = 0; i < dellen; i++)
1752                 trans_table[(int) Py_CHARMASK(del_table[i])] = -1;
1753
1754         for (i = inlen; --i >= 0; ) {
1755                 c = Py_CHARMASK(*input++);
1756                 if (trans_table[c] != -1)
1757                         if (Py_CHARMASK(*output++ = (char)trans_table[c]) == c)
1758                                 continue;
1759                 changed = 1;
1760         }
1761         if (!changed && PyString_CheckExact(input_obj)) {
1762                 Py_DECREF(result);
1763                 Py_INCREF(input_obj);
1764                 return input_obj;
1765         }
1766         /* Fix the size of the resulting string */
1767         if (inlen > 0 &&_PyString_Resize(&result, output-output_start))
1768                 return NULL;
1769         return result;
1770 }
1771
1772
1773 /* What follows is used for implementing replace().  Perry Stoll. */
1774
1775 /*
1776   mymemfind
1777
1778   strstr replacement for arbitrary blocks of memory.
1779
1780   Locates the first occurrence in the memory pointed to by MEM of the
1781   contents of memory pointed to by PAT.  Returns the index into MEM if
1782   found, or -1 if not found.  If len of PAT is greater than length of
1783   MEM, the function returns -1.
1784 */
1785 static int
1786 mymemfind(const char *mem, int len, const char *pat, int pat_len)
1787 {
1788         register int ii;
1789
1790         /* pattern can not occur in the last pat_len-1 chars */
1791         len -= pat_len;
1792
1793         for (ii = 0; ii <= len; ii++) {
1794                 if (mem[ii] == pat[0] && memcmp(&mem[ii], pat, pat_len) == 0) {
1795                         return ii;
1796                 }
1797         }
1798         return -1;
1799 }
1800
1801 /*
1802   mymemcnt
1803
1804    Return the number of distinct times PAT is found in MEM.
1805    meaning mem=1111 and pat==11 returns 2.
1806            mem=11111 and pat==11 also return 2.
1807  */
1808 static int
1809 mymemcnt(const char *mem, int len, const char *pat, int pat_len)
1810 {
1811         register int offset = 0;
1812         int nfound = 0;
1813
1814         while (len >= 0) {
1815                 offset = mymemfind(mem, len, pat, pat_len);
1816                 if (offset == -1)
1817                         break;
1818                 mem += offset + pat_len;
1819                 len -= offset + pat_len;
1820                 nfound++;
1821         }
1822         return nfound;
1823 }
1824
1825 /*
1826    mymemreplace
1827
1828    Return a string in which all occurrences of PAT in memory STR are
1829    replaced with SUB.
1830
1831    If length of PAT is less than length of STR or there are no occurrences
1832    of PAT in STR, then the original string is returned. Otherwise, a new
1833    string is allocated here and returned.
1834
1835    on return, out_len is:
1836        the length of output string, or
1837        -1 if the input string is returned, or
1838        unchanged if an error occurs (no memory).
1839
1840    return value is:
1841        the new string allocated locally, or
1842        NULL if an error occurred.
1843 */
1844 static char *
1845 mymemreplace(const char *str, int len,          /* input string */
1846              const char *pat, int pat_len,      /* pattern string to find */
1847              const char *sub, int sub_len,      /* substitution string */
1848              int count,                         /* number of replacements */
1849              int *out_len)
1850 {
1851         char *out_s;
1852         char *new_s;
1853         int nfound, offset, new_len;
1854
1855         if (len == 0 || pat_len > len)
1856                 goto return_same;
1857
1858         /* find length of output string */
1859         nfound = mymemcnt(str, len, pat, pat_len);
1860         if (count < 0)
1861                 count = INT_MAX;
1862         else if (nfound > count)
1863                 nfound = count;
1864         if (nfound == 0)
1865                 goto return_same;
1866
1867         new_len = len + nfound*(sub_len - pat_len);
1868         if (new_len == 0) {
1869                 /* Have to allocate something for the caller to free(). */
1870                 out_s = (char *)PyMem_MALLOC(1);
1871                 if (out_s == NULL)
1872                         return NULL;
1873                 out_s[0] = '\0';
1874         }
1875         else {
1876                 assert(new_len > 0);
1877                 new_s = (char *)PyMem_MALLOC(new_len);
1878                 if (new_s == NULL)
1879                         return NULL;
1880                 out_s = new_s;
1881
1882                 for (; count > 0 && len > 0; --count) {
1883                         /* find index of next instance of pattern */
1884                         offset = mymemfind(str, len, pat, pat_len);
1885                         if (offset == -1)
1886                                 break;
1887
1888                         /* copy non matching part of input string */
1889                         memcpy(new_s, str, offset);
1890                         str += offset + pat_len;
1891                         len -= offset + pat_len;
1892
1893                         /* copy substitute into the output string */
1894                         new_s += offset;
1895                         memcpy(new_s, sub, sub_len);
1896                         new_s += sub_len;
1897                 }
1898                 /* copy any remaining values into output string */
1899                 if (len > 0)
1900                         memcpy(new_s, str, len);
1901         }
1902         *out_len = new_len;
1903         return out_s;
1904
1905   return_same:
1906         *out_len = -1;
1907         return (char *)str; /* cast away const */
1908 }
1909
1910
1911 static char replace__doc__[] =
1912 "S.replace (old, new[, maxsplit]) -> string\n\
1913 \n\
1914 Return a copy of string S with all occurrences of substring\n\
1915 old replaced by new.  If the optional argument maxsplit is\n\
1916 given, only the first maxsplit occurrences are replaced.";
1917
1918 static PyObject *
1919 string_replace(PyStringObject *self, PyObject *args)
1920 {
1921         const char *str = PyString_AS_STRING(self), *sub, *repl;
1922         char *new_s;
1923         const int len = PyString_GET_SIZE(self);
1924         int sub_len, repl_len, out_len;
1925         int count = -1;
1926         PyObject *new;
1927         PyObject *subobj, *replobj;
1928
1929         if (!PyArg_ParseTuple(args, "OO|i:replace",
1930                               &subobj, &replobj, &count))
1931                 return NULL;
1932
1933         if (PyString_Check(subobj)) {
1934                 sub = PyString_AS_STRING(subobj);
1935                 sub_len = PyString_GET_SIZE(subobj);
1936         }
1937 #ifdef Py_USING_UNICODE
1938         else if (PyUnicode_Check(subobj))
1939                 return PyUnicode_Replace((PyObject *)self,
1940                                          subobj, replobj, count);
1941 #endif
1942         else if (PyObject_AsCharBuffer(subobj, &sub, &sub_len))
1943                 return NULL;
1944
1945         if (PyString_Check(replobj)) {
1946                 repl = PyString_AS_STRING(replobj);
1947                 repl_len = PyString_GET_SIZE(replobj);
1948         }
1949 #ifdef Py_USING_UNICODE
1950         else if (PyUnicode_Check(replobj))
1951                 return PyUnicode_Replace((PyObject *)self,
1952                                          subobj, replobj, count);
1953 #endif
1954         else if (PyObject_AsCharBuffer(replobj, &repl, &repl_len))
1955                 return NULL;
1956
1957         if (sub_len <= 0) {
1958                 PyErr_SetString(PyExc_ValueError, "empty pattern string");
1959                 return NULL;
1960         }
1961         new_s = mymemreplace(str,len,sub,sub_len,repl,repl_len,count,&out_len);
1962         if (new_s == NULL) {
1963                 PyErr_NoMemory();
1964                 return NULL;
1965         }
1966         if (out_len == -1) {
1967                 if (PyString_CheckExact(self)) {
1968                         /* we're returning another reference to self */
1969                         new = (PyObject*)self;
1970                         Py_INCREF(new);
1971                 }
1972                 else {
1973                         new = PyString_FromStringAndSize(str, len);
1974                         if (new == NULL)
1975                                 return NULL;
1976                 }
1977         }
1978         else {
1979                 new = PyString_FromStringAndSize(new_s, out_len);
1980                 PyMem_FREE(new_s);
1981         }
1982         return new;
1983 }
1984
1985
1986 static char startswith__doc__[] =
1987 "S.startswith(prefix[, start[, end]]) -> int\n\
1988 \n\
1989 Return 1 if S starts with the specified prefix, otherwise return 0.  With\n\
1990 optional start, test S beginning at that position.  With optional end, stop\n\
1991 comparing S at that position.";
1992
1993 static PyObject *
1994 string_startswith(PyStringObject *self, PyObject *args)
1995 {
1996         const char* str = PyString_AS_STRING(self);
1997         int len = PyString_GET_SIZE(self);
1998         const char* prefix;
1999         int plen;
2000         int start = 0;
2001         int end = -1;
2002         PyObject *subobj;
2003
2004         if (!PyArg_ParseTuple(args, "O|O&O&:startswith", &subobj,
2005                 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
2006                 return NULL;
2007         if (PyString_Check(subobj)) {
2008                 prefix = PyString_AS_STRING(subobj);
2009                 plen = PyString_GET_SIZE(subobj);
2010         }
2011 #ifdef Py_USING_UNICODE
2012         else if (PyUnicode_Check(subobj)) {
2013                 int rc;
2014                 rc = PyUnicode_Tailmatch((PyObject *)self,
2015                                           subobj, start, end, -1);
2016                 if (rc == -1)
2017                         return NULL;
2018                 else
2019                         return PyInt_FromLong((long) rc);
2020         }
2021 #endif
2022         else if (PyObject_AsCharBuffer(subobj, &prefix, &plen))
2023                 return NULL;
2024
2025         /* adopt Java semantics for index out of range.  it is legal for
2026          * offset to be == plen, but this only returns true if prefix is
2027          * the empty string.
2028          */
2029         if (start < 0 || start+plen > len)
2030                 return PyInt_FromLong(0);
2031
2032         if (!memcmp(str+start, prefix, plen)) {
2033                 /* did the match end after the specified end? */
2034                 if (end < 0)
2035                         return PyInt_FromLong(1);
2036                 else if (end - start < plen)
2037                         return PyInt_FromLong(0);
2038                 else
2039                         return PyInt_FromLong(1);
2040         }
2041         else return PyInt_FromLong(0);
2042 }
2043
2044
2045 static char endswith__doc__[] =
2046 "S.endswith(suffix[, start[, end]]) -> int\n\
2047 \n\
2048 Return 1 if S ends with the specified suffix, otherwise return 0.  With\n\
2049 optional start, test S beginning at that position.  With optional end, stop\n\
2050 comparing S at that position.";
2051
2052 static PyObject *
2053 string_endswith(PyStringObject *self, PyObject *args)
2054 {
2055         const char* str = PyString_AS_STRING(self);
2056         int len = PyString_GET_SIZE(self);
2057         const char* suffix;
2058         int slen;
2059         int start = 0;
2060         int end = -1;
2061         int lower, upper;
2062         PyObject *subobj;
2063
2064         if (!PyArg_ParseTuple(args, "O|O&O&:endswith", &subobj,
2065                 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
2066                 return NULL;
2067         if (PyString_Check(subobj)) {
2068                 suffix = PyString_AS_STRING(subobj);
2069                 slen = PyString_GET_SIZE(subobj);
2070         }
2071 #ifdef Py_USING_UNICODE
2072         else if (PyUnicode_Check(subobj)) {
2073                 int rc;
2074                 rc = PyUnicode_Tailmatch((PyObject *)self,
2075                                           subobj, start, end, +1);
2076                 if (rc == -1)
2077                         return NULL;
2078                 else
2079                         return PyInt_FromLong((long) rc);
2080         }
2081 #endif
2082         else if (PyObject_AsCharBuffer(subobj, &suffix, &slen))
2083                 return NULL;
2084
2085         if (start < 0 || start > len || slen > len)
2086                 return PyInt_FromLong(0);
2087
2088         upper = (end >= 0 && end <= len) ? end : len;
2089         lower = (upper - slen) > start ? (upper - slen) : start;
2090
2091         if (upper-lower >= slen && !memcmp(str+lower, suffix, slen))
2092                 return PyInt_FromLong(1);
2093         else return PyInt_FromLong(0);
2094 }
2095
2096
2097 static char encode__doc__[] =
2098 "S.encode([encoding[,errors]]) -> object\n\
2099 \n\
2100 Encodes S using the codec registered for encoding. encoding defaults\n\
2101 to the default encoding. errors may be given to set a different error\n\
2102 handling scheme. Default is 'strict' meaning that encoding errors raise\n\
2103 a ValueError. Other possible values are 'ignore' and 'replace'.";
2104
2105 static PyObject *
2106 string_encode(PyStringObject *self, PyObject *args)
2107 {
2108     char *encoding = NULL;
2109     char *errors = NULL;
2110     if (!PyArg_ParseTuple(args, "|ss:encode", &encoding, &errors))
2111         return NULL;
2112     return PyString_AsEncodedObject((PyObject *)self, encoding, errors);
2113 }
2114
2115
2116 static char decode__doc__[] =
2117 "S.decode([encoding[,errors]]) -> object\n\
2118 \n\
2119 Decodes S using the codec registered for encoding. encoding defaults\n\
2120 to the default encoding. errors may be given to set a different error\n\
2121 handling scheme. Default is 'strict' meaning that encoding errors raise\n\
2122 a ValueError. Other possible values are 'ignore' and 'replace'.";
2123
2124 static PyObject *
2125 string_decode(PyStringObject *self, PyObject *args)
2126 {
2127     char *encoding = NULL;
2128     char *errors = NULL;
2129     if (!PyArg_ParseTuple(args, "|ss:decode", &encoding, &errors))
2130         return NULL;
2131     return PyString_AsDecodedObject((PyObject *)self, encoding, errors);
2132 }
2133
2134
2135 static char expandtabs__doc__[] =
2136 "S.expandtabs([tabsize]) -> string\n\
2137 \n\
2138 Return a copy of S where all tab characters are expanded using spaces.\n\
2139 If tabsize is not given, a tab size of 8 characters is assumed.";
2140
2141 static PyObject*
2142 string_expandtabs(PyStringObject *self, PyObject *args)
2143 {
2144     const char *e, *p;
2145     char *q;
2146     int i, j;
2147     PyObject *u;
2148     int tabsize = 8;
2149
2150     if (!PyArg_ParseTuple(args, "|i:expandtabs", &tabsize))
2151         return NULL;
2152
2153     /* First pass: determine size of output string */
2154     i = j = 0;
2155     e = PyString_AS_STRING(self) + PyString_GET_SIZE(self);
2156     for (p = PyString_AS_STRING(self); p < e; p++)
2157         if (*p == '\t') {
2158             if (tabsize > 0)
2159                 j += tabsize - (j % tabsize);
2160         }
2161         else {
2162             j++;
2163             if (*p == '\n' || *p == '\r') {
2164                 i += j;
2165                 j = 0;
2166             }
2167         }
2168
2169     /* Second pass: create output string and fill it */
2170     u = PyString_FromStringAndSize(NULL, i + j);
2171     if (!u)
2172         return NULL;
2173
2174     j = 0;
2175     q = PyString_AS_STRING(u);
2176
2177     for (p = PyString_AS_STRING(self); p < e; p++)
2178         if (*p == '\t') {
2179             if (tabsize > 0) {
2180                 i = tabsize - (j % tabsize);
2181                 j += i;
2182                 while (i--)
2183                     *q++ = ' ';
2184             }
2185         }
2186         else {
2187             j++;
2188             *q++ = *p;
2189             if (*p == '\n' || *p == '\r')
2190                 j = 0;
2191         }
2192
2193     return u;
2194 }
2195
2196 static PyObject *
2197 pad(PyStringObject *self, int left, int right, char fill)
2198 {
2199     PyObject *u;
2200
2201     if (left < 0)
2202         left = 0;
2203     if (right < 0)
2204         right = 0;
2205
2206     if (left == 0 && right == 0 && PyString_CheckExact(self)) {
2207         Py_INCREF(self);
2208         return (PyObject *)self;
2209     }
2210
2211     u = PyString_FromStringAndSize(NULL,
2212                                    left + PyString_GET_SIZE(self) + right);
2213     if (u) {
2214         if (left)
2215             memset(PyString_AS_STRING(u), fill, left);
2216         memcpy(PyString_AS_STRING(u) + left,
2217                PyString_AS_STRING(self),
2218                PyString_GET_SIZE(self));
2219         if (right)
2220             memset(PyString_AS_STRING(u) + left + PyString_GET_SIZE(self),
2221                    fill, right);
2222     }
2223
2224     return u;
2225 }
2226
2227 static char ljust__doc__[] =
2228 "S.ljust(width) -> string\n"
2229 "\n"
2230 "Return S left justified in a string of length width. Padding is\n"
2231 "done using spaces.";
2232
2233 static PyObject *
2234 string_ljust(PyStringObject *self, PyObject *args)
2235 {
2236     int width;
2237     if (!PyArg_ParseTuple(args, "i:ljust", &width))
2238         return NULL;
2239
2240     if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) {
2241         Py_INCREF(self);
2242         return (PyObject*) self;
2243     }
2244
2245     return pad(self, 0, width - PyString_GET_SIZE(self), ' ');
2246 }
2247
2248
2249 static char rjust__doc__[] =
2250 "S.rjust(width) -> string\n"
2251 "\n"
2252 "Return S right justified in a string of length width. Padding is\n"
2253 "done using spaces.";
2254
2255 static PyObject *
2256 string_rjust(PyStringObject *self, PyObject *args)
2257 {
2258     int width;
2259     if (!PyArg_ParseTuple(args, "i:rjust", &width))
2260         return NULL;
2261
2262     if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) {
2263         Py_INCREF(self);
2264         return (PyObject*) self;
2265     }
2266
2267     return pad(self, width - PyString_GET_SIZE(self), 0, ' ');
2268 }
2269
2270
2271 static char center__doc__[] =
2272 "S.center(width) -> string\n"
2273 "\n"
2274 "Return S centered in a string of length width. Padding is done\n"
2275 "using spaces.";
2276
2277 static PyObject *
2278 string_center(PyStringObject *self, PyObject *args)
2279 {
2280     int marg, left;
2281     int width;
2282
2283     if (!PyArg_ParseTuple(args, "i:center", &width))
2284         return NULL;
2285
2286     if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) {
2287         Py_INCREF(self);
2288         return (PyObject*) self;
2289     }
2290
2291     marg = width - PyString_GET_SIZE(self);
2292     left = marg / 2 + (marg & width & 1);
2293
2294     return pad(self, left, marg - left, ' ');
2295 }
2296
2297 static char isspace__doc__[] =
2298 "S.isspace() -> int\n"
2299 "\n"
2300 "Return 1 if there are only whitespace characters in S,\n"
2301 "0 otherwise.";
2302
2303 static PyObject*
2304 string_isspace(PyStringObject *self)
2305 {
2306     register const unsigned char *p
2307         = (unsigned char *) PyString_AS_STRING(self);
2308     register const unsigned char *e;
2309
2310     /* Shortcut for single character strings */
2311     if (PyString_GET_SIZE(self) == 1 &&
2312         isspace(*p))
2313         return PyInt_FromLong(1);
2314
2315     /* Special case for empty strings */
2316     if (PyString_GET_SIZE(self) == 0)
2317         return PyInt_FromLong(0);
2318
2319     e = p + PyString_GET_SIZE(self);
2320     for (; p < e; p++) {
2321         if (!isspace(*p))
2322             return PyInt_FromLong(0);
2323     }
2324     return PyInt_FromLong(1);
2325 }
2326
2327
2328 static char isalpha__doc__[] =
2329 "S.isalpha() -> int\n\
2330 \n\
2331 Return 1 if  all characters in S are alphabetic\n\
2332 and there is at least one character in S, 0 otherwise.";
2333
2334 static PyObject*
2335 string_isalpha(PyStringObject *self)
2336 {
2337     register const unsigned char *p
2338         = (unsigned char *) PyString_AS_STRING(self);
2339     register const unsigned char *e;
2340
2341     /* Shortcut for single character strings */
2342     if (PyString_GET_SIZE(self) == 1 &&
2343         isalpha(*p))
2344         return PyInt_FromLong(1);
2345
2346     /* Special case for empty strings */
2347     if (PyString_GET_SIZE(self) == 0)
2348         return PyInt_FromLong(0);
2349
2350     e = p + PyString_GET_SIZE(self);
2351     for (; p < e; p++) {
2352         if (!isalpha(*p))
2353             return PyInt_FromLong(0);
2354     }
2355     return PyInt_FromLong(1);
2356 }
2357
2358
2359 static char isalnum__doc__[] =
2360 "S.isalnum() -> int\n\
2361 \n\
2362 Return 1 if  all characters in S are alphanumeric\n\
2363 and there is at least one character in S, 0 otherwise.";
2364
2365 static PyObject*
2366 string_isalnum(PyStringObject *self)
2367 {
2368     register const unsigned char *p
2369         = (unsigned char *) PyString_AS_STRING(self);
2370     register const unsigned char *e;
2371
2372     /* Shortcut for single character strings */
2373     if (PyString_GET_SIZE(self) == 1 &&
2374         isalnum(*p))
2375         return PyInt_FromLong(1);
2376
2377     /* Special case for empty strings */
2378     if (PyString_GET_SIZE(self) == 0)
2379         return PyInt_FromLong(0);
2380
2381     e = p + PyString_GET_SIZE(self);
2382     for (; p < e; p++) {
2383         if (!isalnum(*p))
2384             return PyInt_FromLong(0);
2385     }
2386     return PyInt_FromLong(1);
2387 }
2388
2389
2390 static char isdigit__doc__[] =
2391 "S.isdigit() -> int\n\
2392 \n\
2393 Return 1 if there are only digit characters in S,\n\
2394 0 otherwise.";
2395
2396 static PyObject*
2397 string_isdigit(PyStringObject *self)
2398 {
2399     register const unsigned char *p
2400         = (unsigned char *) PyString_AS_STRING(self);
2401     register const unsigned char *e;
2402
2403     /* Shortcut for single character strings */
2404     if (PyString_GET_SIZE(self) == 1 &&
2405         isdigit(*p))
2406         return PyInt_FromLong(1);
2407
2408     /* Special case for empty strings */
2409     if (PyString_GET_SIZE(self) == 0)
2410         return PyInt_FromLong(0);
2411
2412     e = p + PyString_GET_SIZE(self);
2413     for (; p < e; p++) {
2414         if (!isdigit(*p))
2415             return PyInt_FromLong(0);
2416     }
2417     return PyInt_FromLong(1);
2418 }
2419
2420
2421 static char islower__doc__[] =
2422 "S.islower() -> int\n\
2423 \n\
2424 Return 1 if  all cased characters in S are lowercase and there is\n\
2425 at least one cased character in S, 0 otherwise.";
2426
2427 static PyObject*
2428 string_islower(PyStringObject *self)
2429 {
2430     register const unsigned char *p
2431         = (unsigned char *) PyString_AS_STRING(self);
2432     register const unsigned char *e;
2433     int cased;
2434
2435     /* Shortcut for single character strings */
2436     if (PyString_GET_SIZE(self) == 1)
2437         return PyInt_FromLong(islower(*p) != 0);
2438
2439     /* Special case for empty strings */
2440     if (PyString_GET_SIZE(self) == 0)
2441         return PyInt_FromLong(0);
2442
2443     e = p + PyString_GET_SIZE(self);
2444     cased = 0;
2445     for (; p < e; p++) {
2446         if (isupper(*p))
2447             return PyInt_FromLong(0);
2448         else if (!cased && islower(*p))
2449             cased = 1;
2450     }
2451     return PyInt_FromLong(cased);
2452 }
2453
2454
2455 static char isupper__doc__[] =
2456 "S.isupper() -> int\n\
2457 \n\
2458 Return 1 if  all cased characters in S are uppercase and there is\n\
2459 at least one cased character in S, 0 otherwise.";
2460
2461 static PyObject*
2462 string_isupper(PyStringObject *self)
2463 {
2464     register const unsigned char *p
2465         = (unsigned char *) PyString_AS_STRING(self);
2466     register const unsigned char *e;
2467     int cased;
2468
2469     /* Shortcut for single character strings */
2470     if (PyString_GET_SIZE(self) == 1)
2471         return PyInt_FromLong(isupper(*p) != 0);
2472
2473     /* Special case for empty strings */
2474     if (PyString_GET_SIZE(self) == 0)
2475         return PyInt_FromLong(0);
2476
2477     e = p + PyString_GET_SIZE(self);
2478     cased = 0;
2479     for (; p < e; p++) {
2480         if (islower(*p))
2481             return PyInt_FromLong(0);
2482         else if (!cased && isupper(*p))
2483             cased = 1;
2484     }
2485     return PyInt_FromLong(cased);
2486 }
2487
2488
2489 static char istitle__doc__[] =
2490 "S.istitle() -> int\n\
2491 \n\
2492 Return 1 if S is a titlecased string, i.e. uppercase characters\n\
2493 may only follow uncased characters and lowercase characters only cased\n\
2494 ones. Return 0 otherwise.";
2495
2496 static PyObject*
2497 string_istitle(PyStringObject *self, PyObject *uncased)
2498 {
2499     register const unsigned char *p
2500         = (unsigned char *) PyString_AS_STRING(self);
2501     register const unsigned char *e;
2502     int cased, previous_is_cased;
2503
2504     /* Shortcut for single character strings */
2505     if (PyString_GET_SIZE(self) == 1)
2506         return PyInt_FromLong(isupper(*p) != 0);
2507
2508     /* Special case for empty strings */
2509     if (PyString_GET_SIZE(self) == 0)
2510         return PyInt_FromLong(0);
2511
2512     e = p + PyString_GET_SIZE(self);
2513     cased = 0;
2514     previous_is_cased = 0;
2515     for (; p < e; p++) {
2516         register const unsigned char ch = *p;
2517
2518         if (isupper(ch)) {
2519             if (previous_is_cased)
2520                 return PyInt_FromLong(0);
2521             previous_is_cased = 1;
2522             cased = 1;
2523         }
2524         else if (islower(ch)) {
2525             if (!previous_is_cased)
2526                 return PyInt_FromLong(0);
2527             previous_is_cased = 1;
2528             cased = 1;
2529         }
2530         else
2531             previous_is_cased = 0;
2532     }
2533     return PyInt_FromLong(cased);
2534 }
2535
2536
2537 static char splitlines__doc__[] =
2538 "S.splitlines([keepends]]) -> list of strings\n\
2539 \n\
2540 Return a list of the lines in S, breaking at line boundaries.\n\
2541 Line breaks are not included in the resulting list unless keepends\n\
2542 is given and true.";
2543
2544 #define SPLIT_APPEND(data, left, right)                                 \
2545         str = PyString_FromStringAndSize(data + left, right - left);    \
2546         if (!str)                                                       \
2547             goto onError;                                               \
2548         if (PyList_Append(list, str)) {                                 \
2549             Py_DECREF(str);                                             \
2550             goto onError;                                               \
2551         }                                                               \
2552         else                                                            \
2553             Py_DECREF(str);
2554
2555 static PyObject*
2556 string_splitlines(PyStringObject *self, PyObject *args)
2557 {
2558     register int i;
2559     register int j;
2560     int len;
2561     int keepends = 0;
2562     PyObject *list;
2563     PyObject *str;
2564     char *data;
2565
2566     if (!PyArg_ParseTuple(args, "|i:splitlines", &keepends))
2567         return NULL;
2568
2569     data = PyString_AS_STRING(self);
2570     len = PyString_GET_SIZE(self);
2571
2572     list = PyList_New(0);
2573     if (!list)
2574         goto onError;
2575
2576     for (i = j = 0; i < len; ) {
2577         int eol;
2578
2579         /* Find a line and append it */
2580         while (i < len && data[i] != '\n' && data[i] != '\r')
2581             i++;
2582
2583         /* Skip the line break reading CRLF as one line break */
2584         eol = i;
2585         if (i < len) {
2586             if (data[i] == '\r' && i + 1 < len &&
2587                 data[i+1] == '\n')
2588                 i += 2;
2589             else
2590                 i++;
2591             if (keepends)
2592                 eol = i;
2593         }
2594         SPLIT_APPEND(data, j, eol);
2595         j = i;
2596     }
2597     if (j < len) {
2598         SPLIT_APPEND(data, j, len);
2599     }
2600
2601     return list;
2602
2603  onError:
2604     Py_DECREF(list);
2605     return NULL;
2606 }
2607
2608 #undef SPLIT_APPEND
2609
2610 \f
2611 static PyMethodDef
2612 string_methods[] = {
2613         /* Counterparts of the obsolete stropmodule functions; except
2614            string.maketrans(). */
2615         {"join",       (PyCFunction)string_join,   METH_O, join__doc__},
2616         {"split",       (PyCFunction)string_split, METH_VARARGS, split__doc__},
2617         {"lower",      (PyCFunction)string_lower,  METH_NOARGS, lower__doc__},
2618         {"upper",       (PyCFunction)string_upper, METH_NOARGS, upper__doc__},
2619         {"islower", (PyCFunction)string_islower, METH_NOARGS, islower__doc__},
2620         {"isupper", (PyCFunction)string_isupper, METH_NOARGS, isupper__doc__},
2621         {"isspace", (PyCFunction)string_isspace, METH_NOARGS, isspace__doc__},
2622         {"isdigit", (PyCFunction)string_isdigit, METH_NOARGS, isdigit__doc__},
2623         {"istitle", (PyCFunction)string_istitle, METH_NOARGS, istitle__doc__},
2624         {"isalpha", (PyCFunction)string_isalpha, METH_NOARGS, isalpha__doc__},
2625         {"isalnum", (PyCFunction)string_isalnum, METH_NOARGS, isalnum__doc__},
2626         {"capitalize", (PyCFunction)string_capitalize,  METH_NOARGS, capitalize__doc__},
2627         {"count",      (PyCFunction)string_count,       METH_VARARGS, count__doc__},
2628         {"endswith",   (PyCFunction)string_endswith,    METH_VARARGS, endswith__doc__},
2629         {"find",       (PyCFunction)string_find,        METH_VARARGS, find__doc__},
2630         {"index",      (PyCFunction)string_index,       METH_VARARGS, index__doc__},
2631         {"lstrip",     (PyCFunction)string_lstrip,      METH_NOARGS, lstrip__doc__},
2632         {"replace",     (PyCFunction)string_replace,    METH_VARARGS, replace__doc__},
2633         {"rfind",       (PyCFunction)string_rfind,      METH_VARARGS, rfind__doc__},
2634         {"rindex",      (PyCFunction)string_rindex,     METH_VARARGS, rindex__doc__},
2635         {"rstrip",      (PyCFunction)string_rstrip,     METH_NOARGS, rstrip__doc__},
2636         {"startswith",  (PyCFunction)string_startswith, METH_VARARGS, startswith__doc__},
2637         {"strip",       (PyCFunction)string_strip,      METH_NOARGS, strip__doc__},
2638         {"swapcase",    (PyCFunction)string_swapcase,   METH_NOARGS, swapcase__doc__},
2639         {"translate",   (PyCFunction)string_translate,  METH_VARARGS, translate__doc__},
2640         {"title",       (PyCFunction)string_title,      METH_NOARGS, title__doc__},
2641         {"ljust",       (PyCFunction)string_ljust,      METH_VARARGS, ljust__doc__},
2642         {"rjust",       (PyCFunction)string_rjust,      METH_VARARGS, rjust__doc__},
2643         {"center",      (PyCFunction)string_center,     METH_VARARGS, center__doc__},
2644         {"encode",      (PyCFunction)string_encode,     METH_VARARGS, encode__doc__},
2645         {"decode",      (PyCFunction)string_decode,     METH_VARARGS, decode__doc__},
2646         {"expandtabs",  (PyCFunction)string_expandtabs, METH_VARARGS, expandtabs__doc__},
2647         {"splitlines",  (PyCFunction)string_splitlines, METH_VARARGS, splitlines__doc__},
2648 #if 0
2649         {"zfill",       (PyCFunction)string_zfill,      METH_VARARGS, zfill__doc__},
2650 #endif
2651         {NULL,     NULL}                     /* sentinel */
2652 };
2653
2654 staticforward PyObject *
2655 str_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds);
2656
2657 static PyObject *
2658 string_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
2659 {
2660         PyObject *x = NULL;
2661         static char *kwlist[] = {"object", 0};
2662
2663         if (type != &PyString_Type)
2664                 return str_subtype_new(type, args, kwds);
2665         if (!PyArg_ParseTupleAndKeywords(args, kwds, "|O:str", kwlist, &x))
2666                 return NULL;
2667         if (x == NULL)
2668                 return PyString_FromString("");
2669         return PyObject_Str(x);
2670 }
2671
2672 static PyObject *
2673 str_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
2674 {
2675         PyObject *tmp, *pnew;
2676         int n;
2677
2678         assert(PyType_IsSubtype(type, &PyString_Type));
2679         tmp = string_new(&PyString_Type, args, kwds);
2680         if (tmp == NULL)
2681                 return NULL;
2682         assert(PyString_CheckExact(tmp));
2683         n = PyString_GET_SIZE(tmp);
2684         pnew = type->tp_alloc(type, n);
2685         if (pnew != NULL) {
2686                 memcpy(PyString_AS_STRING(pnew), PyString_AS_STRING(tmp), n+1);
2687 #ifdef CACHE_HASH
2688                 ((PyStringObject *)pnew)->ob_shash =
2689                         ((PyStringObject *)tmp)->ob_shash;
2690 #endif
2691 #ifdef INTERN_STRINGS
2692                 ((PyStringObject *)pnew)->ob_sinterned =
2693                         ((PyStringObject *)tmp)->ob_sinterned;
2694 #endif
2695         }
2696         Py_DECREF(tmp);
2697         return pnew;
2698 }
2699
2700 static char string_doc[] =
2701 "str(object) -> string\n\
2702 \n\
2703 Return a nice string representation of the object.\n\
2704 If the argument is a string, the return value is the same object.";
2705
2706 PyTypeObject PyString_Type = {
2707         PyObject_HEAD_INIT(&PyType_Type)
2708         0,
2709         "str",
2710         sizeof(PyStringObject),
2711         sizeof(char),
2712         (destructor)string_dealloc,             /* tp_dealloc */
2713         (printfunc)string_print,                /* tp_print */
2714         0,                                      /* tp_getattr */
2715         0,                                      /* tp_setattr */
2716         0,                                      /* tp_compare */
2717         (reprfunc)string_repr,                  /* tp_repr */
2718         0,                                      /* tp_as_number */
2719         &string_as_sequence,                    /* tp_as_sequence */
2720         0,                                      /* tp_as_mapping */
2721         (hashfunc)string_hash,                  /* tp_hash */
2722         0,                                      /* tp_call */
2723         (reprfunc)string_str,                   /* tp_str */
2724         PyObject_GenericGetAttr,                /* tp_getattro */
2725         0,                                      /* tp_setattro */
2726         &string_as_buffer,                      /* tp_as_buffer */
2727         Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */
2728         string_doc,                             /* tp_doc */
2729         0,                                      /* tp_traverse */
2730         0,                                      /* tp_clear */
2731         (richcmpfunc)string_richcompare,        /* tp_richcompare */
2732         0,                                      /* tp_weaklistoffset */
2733         0,                                      /* tp_iter */
2734         0,                                      /* tp_iternext */
2735         string_methods,                         /* tp_methods */
2736         0,                                      /* tp_members */
2737         0,                                      /* tp_getset */
2738         0,                                      /* tp_base */
2739         0,                                      /* tp_dict */
2740         0,                                      /* tp_descr_get */
2741         0,                                      /* tp_descr_set */
2742         0,                                      /* tp_dictoffset */
2743         0,                                      /* tp_init */
2744         0,                                      /* tp_alloc */
2745         string_new,                             /* tp_new */
2746 };
2747
2748 void
2749 PyString_Concat(register PyObject **pv, register PyObject *w)
2750 {
2751         register PyObject *v;
2752         if (*pv == NULL)
2753                 return;
2754         if (w == NULL || !PyString_Check(*pv)) {
2755                 Py_DECREF(*pv);
2756                 *pv = NULL;
2757                 return;
2758         }
2759         v = string_concat((PyStringObject *) *pv, w);
2760         Py_DECREF(*pv);
2761         *pv = v;
2762 }
2763
2764 void
2765 PyString_ConcatAndDel(register PyObject **pv, register PyObject *w)
2766 {
2767         PyString_Concat(pv, w);
2768         Py_XDECREF(w);
2769 }
2770
2771
2772 /* The following function breaks the notion that strings are immutable:
2773    it changes the size of a string.  We get away with this only if there
2774    is only one module referencing the object.  You can also think of it
2775    as creating a new string object and destroying the old one, only
2776    more efficiently.  In any case, don't use this if the string may
2777    already be known to some other part of the code... */
2778
2779 int
2780 _PyString_Resize(PyObject **pv, int newsize)
2781 {
2782         register PyObject *v;
2783         register PyStringObject *sv;
2784         v = *pv;
2785         if (!PyString_Check(v) || v->ob_refcnt != 1) {
2786                 *pv = 0;
2787                 Py_DECREF(v);
2788                 PyErr_BadInternalCall();
2789                 return -1;
2790         }
2791         /* XXX UNREF/NEWREF interface should be more symmetrical */
2792 #ifdef Py_REF_DEBUG
2793         --_Py_RefTotal;
2794 #endif
2795         _Py_ForgetReference(v);
2796         *pv = (PyObject *)
2797                 PyObject_REALLOC((char *)v,
2798                         sizeof(PyStringObject) + newsize * sizeof(char));
2799         if (*pv == NULL) {
2800                 PyObject_DEL(v);
2801                 PyErr_NoMemory();
2802                 return -1;
2803         }
2804         _Py_NewReference(*pv);
2805         sv = (PyStringObject *) *pv;
2806         sv->ob_size = newsize;
2807         sv->ob_sval[newsize] = '\0';
2808         return 0;
2809 }
2810
2811 /* Helpers for formatstring */
2812
2813 static PyObject *
2814 getnextarg(PyObject *args, int arglen, int *p_argidx)
2815 {
2816         int argidx = *p_argidx;
2817         if (argidx < arglen) {
2818                 (*p_argidx)++;
2819                 if (arglen < 0)
2820                         return args;
2821                 else
2822                         return PyTuple_GetItem(args, argidx);
2823         }
2824         PyErr_SetString(PyExc_TypeError,
2825                         "not enough arguments for format string");
2826         return NULL;
2827 }
2828
2829 /* Format codes
2830  * F_LJUST      '-'
2831  * F_SIGN       '+'
2832  * F_BLANK      ' '
2833  * F_ALT        '#'
2834  * F_ZERO       '0'
2835  */
2836 #define F_LJUST (1<<0)
2837 #define F_SIGN  (1<<1)
2838 #define F_BLANK (1<<2)
2839 #define F_ALT   (1<<3)
2840 #define F_ZERO  (1<<4)
2841
2842 static int
2843 formatfloat(char *buf, size_t buflen, int flags,
2844             int prec, int type, PyObject *v)
2845 {
2846         /* fmt = '%#.' + `prec` + `type`
2847            worst case length = 3 + 10 (len of INT_MAX) + 1 = 14 (use 20)*/
2848         char fmt[20];
2849         double x;
2850         if (!PyArg_Parse(v, "d;float argument required", &x))
2851                 return -1;
2852         if (prec < 0)
2853                 prec = 6;
2854         if (type == 'f' && fabs(x)/1e25 >= 1e25)
2855                 type = 'g';
2856         sprintf(fmt, "%%%s.%d%c", (flags&F_ALT) ? "#" : "", prec, type);
2857         /* worst case length calc to ensure no buffer overrun:
2858              fmt = %#.<prec>g
2859              buf = '-' + [0-9]*prec + '.' + 'e+' + (longest exp
2860                 for any double rep.)
2861              len = 1 + prec + 1 + 2 + 5 = 9 + prec
2862            If prec=0 the effective precision is 1 (the leading digit is
2863            always given), therefore increase by one to 10+prec. */
2864         if (buflen <= (size_t)10 + (size_t)prec) {
2865                 PyErr_SetString(PyExc_OverflowError,
2866                         "formatted float is too long (precision too large?)");
2867                 return -1;
2868         }
2869         sprintf(buf, fmt, x);
2870         return strlen(buf);
2871 }
2872
2873 /* _PyString_FormatLong emulates the format codes d, u, o, x and X, and
2874  * the F_ALT flag, for Python's long (unbounded) ints.  It's not used for
2875  * Python's regular ints.
2876  * Return value:  a new PyString*, or NULL if error.
2877  *  .  *pbuf is set to point into it,
2878  *     *plen set to the # of chars following that.
2879  *     Caller must decref it when done using pbuf.
2880  *     The string starting at *pbuf is of the form
2881  *         "-"? ("0x" | "0X")? digit+
2882  *     "0x"/"0X" are present only for x and X conversions, with F_ALT
2883  *         set in flags.  The case of hex digits will be correct,
2884  *     There will be at least prec digits, zero-filled on the left if
2885  *         necessary to get that many.
2886  * val          object to be converted
2887  * flags        bitmask of format flags; only F_ALT is looked at
2888  * prec         minimum number of digits; 0-fill on left if needed
2889  * type         a character in [duoxX]; u acts the same as d
2890  *
2891  * CAUTION:  o, x and X conversions on regular ints can never
2892  * produce a '-' sign, but can for Python's unbounded ints.
2893  */
2894 PyObject*
2895 _PyString_FormatLong(PyObject *val, int flags, int prec, int type,
2896                      char **pbuf, int *plen)
2897 {
2898         PyObject *result = NULL;
2899         char *buf;
2900         int i;
2901         int sign;       /* 1 if '-', else 0 */
2902         int len;        /* number of characters */
2903         int numdigits;  /* len == numnondigits + numdigits */
2904         int numnondigits = 0;
2905
2906         switch (type) {
2907         case 'd':
2908         case 'u':
2909                 result = val->ob_type->tp_str(val);
2910                 break;
2911         case 'o':
2912                 result = val->ob_type->tp_as_number->nb_oct(val);
2913                 break;
2914         case 'x':
2915         case 'X':
2916                 numnondigits = 2;
2917                 result = val->ob_type->tp_as_number->nb_hex(val);
2918                 break;
2919         default:
2920                 assert(!"'type' not in [duoxX]");
2921         }
2922         if (!result)
2923                 return NULL;
2924
2925         /* To modify the string in-place, there can only be one reference. */
2926         if (result->ob_refcnt != 1) {
2927                 PyErr_BadInternalCall();
2928                 return NULL;
2929         }
2930         buf = PyString_AsString(result);
2931         len = PyString_Size(result);
2932         if (buf[len-1] == 'L') {
2933                 --len;
2934                 buf[len] = '\0';
2935         }
2936         sign = buf[0] == '-';
2937         numnondigits += sign;
2938         numdigits = len - numnondigits;
2939         assert(numdigits > 0);
2940
2941         /* Get rid of base marker unless F_ALT */
2942         if ((flags & F_ALT) == 0) {
2943                 /* Need to skip 0x, 0X or 0. */
2944                 int skipped = 0;
2945                 switch (type) {
2946                 case 'o':
2947                         assert(buf[sign] == '0');
2948                         /* If 0 is only digit, leave it alone. */
2949                         if (numdigits > 1) {
2950                                 skipped = 1;
2951                                 --numdigits;
2952                         }
2953                         break;
2954                 case 'x':
2955                 case 'X':
2956                         assert(buf[sign] == '0');
2957                         assert(buf[sign + 1] == 'x');
2958                         skipped = 2;
2959                         numnondigits -= 2;
2960                         break;
2961                 }
2962                 if (skipped) {
2963                         buf += skipped;
2964                         len -= skipped;
2965                         if (sign)
2966                                 buf[0] = '-';
2967                 }
2968                 assert(len == numnondigits + numdigits);
2969                 assert(numdigits > 0);
2970         }
2971
2972         /* Fill with leading zeroes to meet minimum width. */
2973         if (prec > numdigits) {
2974                 PyObject *r1 = PyString_FromStringAndSize(NULL,
2975                                         numnondigits + prec);
2976                 char *b1;
2977                 if (!r1) {
2978                         Py_DECREF(result);
2979                         return NULL;
2980                 }
2981                 b1 = PyString_AS_STRING(r1);
2982                 for (i = 0; i < numnondigits; ++i)
2983                         *b1++ = *buf++;
2984                 for (i = 0; i < prec - numdigits; i++)
2985                         *b1++ = '0';
2986                 for (i = 0; i < numdigits; i++)
2987                         *b1++ = *buf++;
2988                 *b1 = '\0';
2989                 Py_DECREF(result);
2990                 result = r1;
2991                 buf = PyString_AS_STRING(result);
2992                 len = numnondigits + prec;
2993         }
2994
2995         /* Fix up case for hex conversions. */
2996         switch (type) {
2997         case 'x':
2998                 /* Need to convert all upper case letters to lower case. */
2999                 for (i = 0; i < len; i++)
3000                         if (buf[i] >= 'A' && buf[i] <= 'F')
3001                                 buf[i] += 'a'-'A';
3002                 break;
3003         case 'X':
3004                 /* Need to convert 0x to 0X (and -0x to -0X). */
3005                 if (buf[sign + 1] == 'x')
3006                         buf[sign + 1] = 'X';
3007                 break;
3008         }
3009         *pbuf = buf;
3010         *plen = len;
3011         return result;
3012 }
3013
3014 static int
3015 formatint(char *buf, size_t buflen, int flags,
3016           int prec, int type, PyObject *v)
3017 {
3018         /* fmt = '%#.' + `prec` + 'l' + `type`
3019            worst case length = 3 + 19 (worst len of INT_MAX on 64-bit machine)
3020            + 1 + 1 = 24 */
3021         char fmt[64];   /* plenty big enough! */
3022         long x;
3023         if (!PyArg_Parse(v, "l;int argument required", &x))
3024                 return -1;
3025         if (prec < 0)
3026                 prec = 1;
3027         sprintf(fmt, "%%%s.%dl%c", (flags&F_ALT) ? "#" : "", prec, type);
3028         /* buf = '+'/'-'/'0'/'0x' + '[0-9]'*max(prec, len(x in octal))
3029            worst case buf = '0x' + [0-9]*prec, where prec >= 11 */
3030         if (buflen <= 13 || buflen <= (size_t)2 + (size_t)prec) {
3031                 PyErr_SetString(PyExc_OverflowError,
3032                         "formatted integer is too long (precision too large?)");
3033                 return -1;
3034         }
3035         sprintf(buf, fmt, x);
3036         /* When converting 0 under %#x or %#X, C leaves off the base marker,
3037          * but we want it (for consistency with other %#x conversions, and
3038          * for consistency with Python's hex() function).
3039          * BUG 28-Apr-2001 tim:  At least two platform Cs (Metrowerks &
3040          * Compaq Tru64) violate the std by converting 0 w/ leading 0x anyway.
3041          * So add it only if the platform didn't already.
3042          */
3043         if (x == 0 &&
3044            (flags & F_ALT) &&
3045            (type == 'x' || type == 'X') &&
3046             buf[1] != (char)type)  /* this last always true under std C */
3047                 {
3048                 memmove(buf+2, buf, strlen(buf) + 1);
3049                 buf[0] = '0';
3050                 buf[1] = (char)type;
3051         }
3052         return strlen(buf);
3053 }
3054
3055 static int
3056 formatchar(char *buf, size_t buflen, PyObject *v)
3057 {
3058         /* presume that the buffer is at least 2 characters long */
3059         if (PyString_Check(v)) {
3060                 if (!PyArg_Parse(v, "c;%c requires int or char", &buf[0]))
3061                         return -1;
3062         }
3063         else {
3064                 if (!PyArg_Parse(v, "b;%c requires int or char", &buf[0]))
3065                         return -1;
3066         }
3067         buf[1] = '\0';
3068         return 1;
3069 }
3070
3071
3072 /* fmt%(v1,v2,...) is roughly equivalent to sprintf(fmt, v1, v2, ...)
3073
3074    FORMATBUFLEN is the length of the buffer in which the floats, ints, &
3075    chars are formatted. XXX This is a magic number. Each formatting
3076    routine does bounds checking to ensure no overflow, but a better
3077    solution may be to malloc a buffer of appropriate size for each
3078    format. For now, the current solution is sufficient.
3079 */
3080 #define FORMATBUFLEN (size_t)120
3081
3082 PyObject *
3083 PyString_Format(PyObject *format, PyObject *args)
3084 {
3085         char *fmt, *res;
3086         int fmtcnt, rescnt, reslen, arglen, argidx;
3087         int args_owned = 0;
3088         PyObject *result, *orig_args;
3089 #ifdef Py_USING_UNICODE
3090         PyObject *v, *w;
3091 #endif
3092         PyObject *dict = NULL;
3093         if (format == NULL || !PyString_Check(format) || args == NULL) {
3094                 PyErr_BadInternalCall();
3095                 return NULL;
3096         }
3097         orig_args = args;
3098         fmt = PyString_AsString(format);
3099         fmtcnt = PyString_Size(format);
3100         reslen = rescnt = fmtcnt + 100;
3101         result = PyString_FromStringAndSize((char *)NULL, reslen);
3102         if (result == NULL)
3103                 return NULL;
3104         res = PyString_AsString(result);
3105         if (PyTuple_Check(args)) {
3106                 arglen = PyTuple_Size(args);
3107                 argidx = 0;
3108         }
3109         else {
3110                 arglen = -1;
3111                 argidx = -2;
3112         }
3113         if (args->ob_type->tp_as_mapping)
3114                 dict = args;
3115         while (--fmtcnt >= 0) {
3116                 if (*fmt != '%') {
3117                         if (--rescnt < 0) {
3118                                 rescnt = fmtcnt + 100;
3119                                 reslen += rescnt;
3120                                 if (_PyString_Resize(&result, reslen) < 0)
3121                                         return NULL;
3122                                 res = PyString_AsString(result)
3123                                         + reslen - rescnt;
3124                                 --rescnt;
3125                         }
3126                         *res++ = *fmt++;
3127                 }
3128                 else {
3129                         /* Got a format specifier */
3130                         int flags = 0;
3131                         int width = -1;
3132                         int prec = -1;
3133                         int c = '\0';
3134                         int fill;
3135                         PyObject *v = NULL;
3136                         PyObject *temp = NULL;
3137                         char *pbuf;
3138                         int sign;
3139                         int len;
3140                         char formatbuf[FORMATBUFLEN]; /* For format{float,int,char}() */
3141 #ifdef Py_USING_UNICODE
3142                         char *fmt_start = fmt;
3143                         int argidx_start = argidx;
3144 #endif
3145
3146                         fmt++;
3147                         if (*fmt == '(') {
3148                                 char *keystart;
3149                                 int keylen;
3150                                 PyObject *key;
3151                                 int pcount = 1;
3152
3153                                 if (dict == NULL) {
3154                                         PyErr_SetString(PyExc_TypeError,
3155                                                  "format requires a mapping");
3156                                         goto error;
3157                                 }
3158                                 ++fmt;
3159                                 --fmtcnt;
3160                                 keystart = fmt;
3161                                 /* Skip over balanced parentheses */
3162                                 while (pcount > 0 && --fmtcnt >= 0) {
3163                                         if (*fmt == ')')
3164                                                 --pcount;
3165                                         else if (*fmt == '(')
3166                                                 ++pcount;
3167                                         fmt++;
3168                                 }
3169                                 keylen = fmt - keystart - 1;
3170                                 if (fmtcnt < 0 || pcount > 0) {
3171                                         PyErr_SetString(PyExc_ValueError,
3172                                                    "incomplete format key");
3173                                         goto error;
3174                                 }
3175                                 key = PyString_FromStringAndSize(keystart,
3176                                                                  keylen);
3177                                 if (key == NULL)
3178                                         goto error;
3179                                 if (args_owned) {
3180                                         Py_DECREF(args);
3181                                         args_owned = 0;
3182                                 }
3183                                 args = PyObject_GetItem(dict, key);
3184                                 Py_DECREF(key);
3185                                 if (args == NULL) {
3186                                         goto error;
3187                                 }
3188                                 args_owned = 1;
3189                                 arglen = -1;
3190                                 argidx = -2;
3191                         }
3192                         while (--fmtcnt >= 0) {
3193                                 switch (c = *fmt++) {
3194                                 case '-': flags |= F_LJUST; continue;
3195                                 case '+': flags |= F_SIGN; continue;
3196                                 case ' ': flags |= F_BLANK; continue;
3197                                 case '#': flags |= F_ALT; continue;
3198                                 case '0': flags |= F_ZERO; continue;
3199                                 }
3200                                 break;
3201                         }
3202                         if (c == '*') {
3203                                 v = getnextarg(args, arglen, &argidx);
3204                                 if (v == NULL)
3205                                         goto error;
3206                                 if (!PyInt_Check(v)) {
3207                                         PyErr_SetString(PyExc_TypeError,
3208                                                         "* wants int");
3209                                         goto error;
3210                                 }
3211                                 width = PyInt_AsLong(v);
3212                                 if (width < 0) {
3213                                         flags |= F_LJUST;
3214                                         width = -width;
3215                                 }
3216                                 if (--fmtcnt >= 0)
3217                                         c = *fmt++;
3218                         }
3219                         else if (c >= 0 && isdigit(c)) {
3220                                 width = c - '0';
3221                                 while (--fmtcnt >= 0) {
3222                                         c = Py_CHARMASK(*fmt++);
3223                                         if (!isdigit(c))
3224                                                 break;
3225                                         if ((width*10) / 10 != width) {
3226                                                 PyErr_SetString(
3227                                                         PyExc_ValueError,
3228                                                         "width too big");
3229                                                 goto error;
3230                                         }
3231                                         width = width*10 + (c - '0');
3232                                 }
3233                         }
3234                         if (c == '.') {
3235                                 prec = 0;
3236                                 if (--fmtcnt >= 0)
3237                                         c = *fmt++;
3238                                 if (c == '*') {
3239                                         v = getnextarg(args, arglen, &argidx);
3240                                         if (v == NULL)
3241                                                 goto error;
3242                                         if (!PyInt_Check(v)) {
3243                                                 PyErr_SetString(
3244                                                         PyExc_TypeError,
3245                                                         "* wants int");
3246                                                 goto error;
3247                                         }
3248                                         prec = PyInt_AsLong(v);
3249                                         if (prec < 0)
3250                                                 prec = 0;
3251                                         if (--fmtcnt >= 0)
3252                                                 c = *fmt++;
3253                                 }
3254                                 else if (c >= 0 && isdigit(c)) {
3255                                         prec = c - '0';
3256                                         while (--fmtcnt >= 0) {
3257                                                 c = Py_CHARMASK(*fmt++);
3258                                                 if (!isdigit(c))
3259                                                         break;
3260                                                 if ((prec*10) / 10 != prec) {
3261                                                         PyErr_SetString(
3262                                                             PyExc_ValueError,
3263                                                             "prec too big");
3264                                                         goto error;
3265                                                 }
3266                                                 prec = prec*10 + (c - '0');
3267                                         }
3268                                 }
3269                         } /* prec */
3270                         if (fmtcnt >= 0) {
3271                                 if (c == 'h' || c == 'l' || c == 'L') {
3272                                         if (--fmtcnt >= 0)
3273                                                 c = *fmt++;
3274                                 }
3275                         }
3276                         if (fmtcnt < 0) {
3277                                 PyErr_SetString(PyExc_ValueError,
3278                                                 "incomplete format");
3279                                 goto error;
3280                         }
3281                         if (c != '%') {
3282                                 v = getnextarg(args, arglen, &argidx);
3283                                 if (v == NULL)
3284                                         goto error;
3285                         }
3286                         sign = 0;
3287                         fill = ' ';
3288                         switch (c) {
3289                         case '%':
3290                                 pbuf = "%";
3291                                 len = 1;
3292                                 break;
3293                         case 's':
3294                         case 'r':
3295 #ifdef Py_USING_UNICODE
3296                                 if (PyUnicode_Check(v)) {
3297                                         fmt = fmt_start;
3298                                         argidx = argidx_start;
3299                                         goto unicode;
3300                                 }
3301 #endif
3302                                 if (c == 's')
3303                                 temp = PyObject_Str(v);
3304                                 else
3305                                         temp = PyObject_Repr(v);
3306                                 if (temp == NULL)
3307                                         goto error;
3308                                 if (!PyString_Check(temp)) {
3309                                         PyErr_SetString(PyExc_TypeError,
3310                                           "%s argument has non-string str()");
3311                                         goto error;
3312                                 }
3313                                 pbuf = PyString_AsString(temp);
3314                                 len = PyString_Size(temp);
3315                                 if (prec >= 0 && len > prec)
3316                                         len = prec;
3317                                 break;
3318                         case 'i':
3319                         case 'd':
3320                         case 'u':
3321                         case 'o':
3322                         case 'x':
3323                         case 'X':
3324                                 if (c == 'i')
3325                                         c = 'd';
3326                                 if (PyLong_Check(v)) {
3327                                         temp = _PyString_FormatLong(v, flags,
3328                                                 prec, c, &pbuf, &len);
3329                                         if (!temp)
3330                                                 goto error;
3331                                         /* unbounded ints can always produce
3332                                            a sign character! */
3333                                         sign = 1;
3334                                 }
3335                                 else {
3336                                         pbuf = formatbuf;
3337                                         len = formatint(pbuf, sizeof(formatbuf),
3338                                                         flags, prec, c, v);
3339                                         if (len < 0)
3340                                                 goto error;
3341                                         /* only d conversion is signed */
3342                                         sign = c == 'd';
3343                                 }
3344                                 if (flags & F_ZERO)
3345                                         fill = '0';
3346                                 break;
3347                         case 'e':
3348                         case 'E':
3349                         case 'f':
3350                         case 'g':
3351                         case 'G':
3352                                 pbuf = formatbuf;
3353                                 len = formatfloat(pbuf, sizeof(formatbuf), flags, prec, c, v);
3354                                 if (len < 0)
3355                                         goto error;
3356                                 sign = 1;
3357                                 if (flags & F_ZERO)
3358                                         fill = '0';
3359                                 break;
3360                         case 'c':
3361                                 pbuf = formatbuf;
3362                                 len = formatchar(pbuf, sizeof(formatbuf), v);
3363                                 if (len < 0)
3364                                         goto error;
3365                                 break;
3366                         default:
3367                                 PyErr_Format(PyExc_ValueError,
3368                                   "unsupported format character '%c' (0x%x) "
3369                                   "at index %i",
3370                                   c, c, fmt - 1 - PyString_AsString(format));
3371                                 goto error;
3372                         }
3373                         if (sign) {
3374                                 if (*pbuf == '-' || *pbuf == '+') {
3375                                         sign = *pbuf++;
3376                                         len--;
3377                                 }
3378                                 else if (flags & F_SIGN)
3379                                         sign = '+';
3380                                 else if (flags & F_BLANK)
3381                                         sign = ' ';
3382                                 else
3383                                         sign = 0;
3384                         }
3385                         if (width < len)
3386                                 width = len;
3387                         if (rescnt < width + (sign != 0)) {
3388                                 reslen -= rescnt;
3389                                 rescnt = width + fmtcnt + 100;
3390                                 reslen += rescnt;
3391                                 if (_PyString_Resize(&result, reslen) < 0)
3392                                         return NULL;
3393                                 res = PyString_AsString(result)
3394                                         + reslen - rescnt;
3395                         }
3396                         if (sign) {
3397                                 if (fill != ' ')
3398                                         *res++ = sign;
3399                                 rescnt--;
3400                                 if (width > len)
3401                                         width--;
3402                         }
3403                         if ((flags & F_ALT) && (c == 'x' || c == 'X')) {
3404                                 assert(pbuf[0] == '0');
3405                                 assert(pbuf[1] == c);
3406                                 if (fill != ' ') {
3407                                         *res++ = *pbuf++;
3408                                         *res++ = *pbuf++;
3409                                 }
3410                                 rescnt -= 2;
3411                                 width -= 2;
3412                                 if (width < 0)
3413                                         width = 0;
3414                                 len -= 2;
3415                         }
3416                         if (width > len && !(flags & F_LJUST)) {
3417                                 do {
3418                                         --rescnt;
3419                                         *res++ = fill;
3420                                 } while (--width > len);
3421                         }
3422                         if (fill == ' ') {
3423                                 if (sign)
3424                                         *res++ = sign;
3425                                 if ((flags & F_ALT) &&
3426                                     (c == 'x' || c == 'X')) {
3427                                         assert(pbuf[0] == '0');
3428                                         assert(pbuf[1] == c);
3429                                         *res++ = *pbuf++;
3430                                         *res++ = *pbuf++;
3431                                 }
3432                         }
3433                         memcpy(res, pbuf, len);
3434                         res += len;
3435                         rescnt -= len;
3436                         while (--width >= len) {
3437                                 --rescnt;
3438                                 *res++ = ' ';
3439                         }
3440                         if (dict && (argidx < arglen) && c != '%') {
3441                                 PyErr_SetString(PyExc_TypeError,
3442                                            "not all arguments converted");
3443                                 goto error;
3444                         }
3445                         Py_XDECREF(temp);
3446                 } /* '%' */
3447         } /* until end */
3448         if (argidx < arglen && !dict) {
3449                 PyErr_SetString(PyExc_TypeError,
3450                                 "not all arguments converted");
3451                 goto error;
3452         }
3453         if (args_owned) {
3454                 Py_DECREF(args);
3455         }
3456         _PyString_Resize(&result, reslen - rescnt);
3457         return result;
3458
3459 #ifdef Py_USING_UNICODE
3460  unicode:
3461         if (args_owned) {
3462                 Py_DECREF(args);
3463                 args_owned = 0;
3464         }
3465         /* Fiddle args right (remove the first argidx arguments) */
3466         if (PyTuple_Check(orig_args) && argidx > 0) {
3467                 PyObject *v;
3468                 int n = PyTuple_GET_SIZE(orig_args) - argidx;
3469                 v = PyTuple_New(n);
3470                 if (v == NULL)
3471                         goto error;
3472                 while (--n >= 0) {
3473                         PyObject *w = PyTuple_GET_ITEM(orig_args, n + argidx);
3474                         Py_INCREF(w);
3475                         PyTuple_SET_ITEM(v, n, w);
3476                 }
3477                 args = v;
3478         } else {
3479                 Py_INCREF(orig_args);
3480                 args = orig_args;
3481         }
3482         args_owned = 1;
3483         /* Take what we have of the result and let the Unicode formatting
3484            function format the rest of the input. */
3485         rescnt = res - PyString_AS_STRING(result);
3486         if (_PyString_Resize(&result, rescnt))
3487                 goto error;
3488         fmtcnt = PyString_GET_SIZE(format) - \
3489                  (fmt - PyString_AS_STRING(format));
3490         format = PyUnicode_Decode(fmt, fmtcnt, NULL, NULL);
3491         if (format == NULL)
3492                 goto error;
3493         v = PyUnicode_Format(format, args);
3494         Py_DECREF(format);
3495         if (v == NULL)
3496                 goto error;
3497         /* Paste what we have (result) to what the Unicode formatting
3498            function returned (v) and return the result (or error) */
3499         w = PyUnicode_Concat(result, v);
3500         Py_DECREF(result);
3501         Py_DECREF(v);
3502         Py_DECREF(args);
3503         return w;
3504 #endif /* Py_USING_UNICODE */
3505
3506  error:
3507         Py_DECREF(result);
3508         if (args_owned) {
3509                 Py_DECREF(args);
3510         }
3511         return NULL;
3512 }
3513
3514
3515 #ifdef INTERN_STRINGS
3516
3517 /* This dictionary will leak at PyString_Fini() time.  That's acceptable
3518  * because PyString_Fini() specifically frees interned strings that are
3519  * only referenced by this dictionary.  The CVS log entry for revision 2.45
3520  * says:
3521  *
3522  *    Change the Fini function to only remove otherwise unreferenced
3523  *    strings from the interned table.  There are references in
3524  *    hard-to-find static variables all over the interpreter, and it's not
3525  *    worth trying to get rid of all those; but "uninterning" isn't fair
3526  *    either and may cause subtle failures later -- so we have to keep them
3527  *    in the interned table.
3528  */
3529 static PyObject *interned;
3530
3531 void
3532 PyString_InternInPlace(PyObject **p)
3533 {
3534         register PyStringObject *s = (PyStringObject *)(*p);
3535         PyObject *t;
3536         if (s == NULL || !PyString_Check(s))
3537                 Py_FatalError("PyString_InternInPlace: strings only please!");
3538         if ((t = s->ob_sinterned) != NULL) {
3539                 if (t == (PyObject *)s)
3540                         return;
3541                 Py_INCREF(t);
3542                 *p = t;
3543                 Py_DECREF(s);
3544                 return;
3545         }
3546         if (interned == NULL) {
3547                 interned = PyDict_New();
3548                 if (interned == NULL)
3549                         return;
3550         }
3551         if ((t = PyDict_GetItem(interned, (PyObject *)s)) != NULL) {
3552                 Py_INCREF(t);
3553                 *p = s->ob_sinterned = t;
3554                 Py_DECREF(s);
3555                 return;
3556         }
3557         /* Ensure that only true string objects appear in the intern dict,
3558            and as the value of ob_sinterned. */
3559         if (PyString_CheckExact(s)) {
3560                 t = (PyObject *)s;
3561                 if (PyDict_SetItem(interned, t, t) == 0) {
3562                         s->ob_sinterned = t;
3563                         return;
3564                 }
3565         }
3566         else {
3567                 t = PyString_FromStringAndSize(PyString_AS_STRING(s),
3568                                                 PyString_GET_SIZE(s));
3569                 if (t != NULL) {
3570                         if (PyDict_SetItem(interned, t, t) == 0) {
3571                                 *p = s->ob_sinterned = t;
3572                                 Py_DECREF(s);
3573                                 return;
3574                         }
3575                         Py_DECREF(t);
3576                 }
3577         }
3578         PyErr_Clear();
3579 }
3580
3581
3582 PyObject *
3583 PyString_InternFromString(const char *cp)
3584 {
3585         PyObject *s = PyString_FromString(cp);
3586         if (s == NULL)
3587                 return NULL;
3588         PyString_InternInPlace(&s);
3589         return s;
3590 }
3591
3592 #endif
3593
3594 void
3595 PyString_Fini(void)
3596 {
3597         int i;
3598         for (i = 0; i < UCHAR_MAX + 1; i++) {
3599                 Py_XDECREF(characters[i]);
3600                 characters[i] = NULL;
3601         }
3602 #ifndef DONT_SHARE_SHORT_STRINGS
3603         Py_XDECREF(nullstring);
3604         nullstring = NULL;
3605 #endif
3606 #ifdef INTERN_STRINGS
3607         if (interned) {
3608                 int pos, changed;
3609                 PyObject *key, *value;
3610                 do {
3611                         changed = 0;
3612                         pos = 0;
3613                         while (PyDict_Next(interned, &pos, &key, &value)) {
3614                                 if (key->ob_refcnt == 2 && key == value) {
3615                                         PyDict_DelItem(interned, key);
3616                                         changed = 1;
3617                                 }
3618                         }
3619                 } while (changed);
3620         }
3621 #endif
3622 }
3623
3624 #ifdef INTERN_STRINGS
3625 void _Py_ReleaseInternedStrings(void)
3626 {
3627         if (interned) {
3628                 fprintf(stderr, "releasing interned strings\n");
3629                 PyDict_Clear(interned);
3630                 Py_DECREF(interned);
3631                 interned = NULL;
3632         }
3633 }
3634 #endif /* INTERN_STRINGS */