Objects/stringobject.c

   1 /***********************************************************
   2 Copyright (c) 2000, BeOpen.com.
   3 Copyright (c) 1995-2000, Corporation for National Research Initiatives.
   4 Copyright (c) 1990-1995, Stichting Mathematisch Centrum.
   5 All rights reserved.
   6
   7 See the file "Misc/COPYRIGHT" for information on usage and
   8 redistribution of this file, and for a DISCLAIMER OF ALL WARRANTIES.
   9 ******************************************************************/
  10
  11 /* String object implementation */
  12
  13 #include "Python.h"
  14
  15 #include <ctype.h>
  16
  17 #ifdef COUNT_ALLOCS
  18 int null_strings, one_strings;
  19 #endif
  20
  21 #ifdef HAVE_LIMITS_H
  22 #include <limits.h>
  23 #else
  24 #ifndef UCHAR_MAX
  25 #define UCHAR_MAX 255
  26 #endif
  27 #endif
  28
  29 static PyStringObject *characters[UCHAR_MAX + 1];
  30 #ifndef DONT_SHARE_SHORT_STRINGS
  31 static PyStringObject *nullstring;
  32 #endif
  33
  34 /*
  35    Newsizedstringobject() and newstringobject() try in certain cases
  36    to share string objects.  When the size of the string is zero,
  37    these routines always return a pointer to the same string object;
  38    when the size is one, they return a pointer to an already existing
  39    object if the contents of the string is known.  For
  40    newstringobject() this is always the case, for
  41    newsizedstringobject() this is the case when the first argument in
  42    not NULL.
  43    A common practice to allocate a string and then fill it in or
  44    change it must be done carefully.  It is only allowed to change the
  45    contents of the string if the obect was gotten from
  46    newsizedstringobject() with a NULL first argument, because in the
  47    future these routines may try to do even more sharing of objects.
  48 */
  49 PyObject *
  50 PyString_FromStringAndSize(const char *str, int size)
  51 {
  52         register PyStringObject *op;
  53 #ifndef DONT_SHARE_SHORT_STRINGS
  54         if (size == 0 && (op = nullstring) != NULL) {
  55 #ifdef COUNT_ALLOCS
  56                 null_strings++;
  57 #endif
  58                 Py_INCREF(op);
  59                 return (PyObject *)op;
  60         }
  61         if (size == 1 && str != NULL &&
  62             (op = characters[*str & UCHAR_MAX]) != NULL)
  63         {
  64 #ifdef COUNT_ALLOCS
  65                 one_strings++;
  66 #endif
  67                 Py_INCREF(op);
  68                 return (PyObject *)op;
  69         }
  70 #endif /* DONT_SHARE_SHORT_STRINGS */
  71
  72         /* PyObject_NewVar is inlined */
  73         op = (PyStringObject *)
  74                 PyObject_MALLOC(sizeof(PyStringObject) + size * sizeof(char));
  75         if (op == NULL)
  76                 return PyErr_NoMemory();
  77         PyObject_INIT_VAR(op, &PyString_Type, size);
  78 #ifdef CACHE_HASH
  79         op->ob_shash = -1;
  80 #endif
  81 #ifdef INTERN_STRINGS
  82         op->ob_sinterned = NULL;
  83 #endif
  84         if (str != NULL)
  85                 memcpy(op->ob_sval, str, size);
  86         op->ob_sval[size] = '\0';
  87 #ifndef DONT_SHARE_SHORT_STRINGS
  88         if (size == 0) {
  89                 nullstring = op;
  90                 Py_INCREF(op);
  91         } else if (size == 1 && str != NULL) {
  92                 characters[*str & UCHAR_MAX] = op;
  93                 Py_INCREF(op);
  94         }
  95 #endif
  96         return (PyObject *) op;
  97 }
  98
  99 PyObject *
 100 PyString_FromString(const char *str)
 101 {
 102         register size_t size = strlen(str);
 103         register PyStringObject *op;
 104         if (size > INT_MAX) {
 105                 PyErr_SetString(PyExc_OverflowError,
 106                         "string is too long for a Python string");
 107                 return NULL;
 108         }
 109 #ifndef DONT_SHARE_SHORT_STRINGS
 110         if (size == 0 && (op = nullstring) != NULL) {
 111 #ifdef COUNT_ALLOCS
 112                 null_strings++;
 113 #endif
 114                 Py_INCREF(op);
 115                 return (PyObject *)op;
 116         }
 117         if (size == 1 && (op = characters[*str & UCHAR_MAX]) != NULL) {
 118 #ifdef COUNT_ALLOCS
 119                 one_strings++;
 120 #endif
 121                 Py_INCREF(op);
 122                 return (PyObject *)op;
 123         }
 124 #endif /* DONT_SHARE_SHORT_STRINGS */
 125
 126         /* PyObject_NewVar is inlined */
 127         op = (PyStringObject *)
 128                 PyObject_MALLOC(sizeof(PyStringObject) + size * sizeof(char));
 129         if (op == NULL)
 130                 return PyErr_NoMemory();
 131         PyObject_INIT_VAR(op, &PyString_Type, size);
 132 #ifdef CACHE_HASH
 133         op->ob_shash = -1;
 134 #endif
 135 #ifdef INTERN_STRINGS
 136         op->ob_sinterned = NULL;
 137 #endif
 138         strcpy(op->ob_sval, str);
 139 #ifndef DONT_SHARE_SHORT_STRINGS
 140         if (size == 0) {
 141                 nullstring = op;
 142                 Py_INCREF(op);
 143         } else if (size == 1) {
 144                 characters[*str & UCHAR_MAX] = op;
 145                 Py_INCREF(op);
 146         }
 147 #endif
 148         return (PyObject *) op;
 149 }
 150
 151 PyObject *PyString_Decode(const char *s,
 152                           int size,
 153                           const char *encoding,
 154                           const char *errors)
 155 {
 156     PyObject *buffer = NULL, *str;
 157
 158     if (encoding == NULL)
 159         encoding = PyUnicode_GetDefaultEncoding();
 160
 161     /* Decode via the codec registry */
 162     buffer = PyBuffer_FromMemory((void *)s, size);
 163     if (buffer == NULL)
 164         goto onError;
 165     str = PyCodec_Decode(buffer, encoding, errors);
 166     if (str == NULL)
 167         goto onError;
 168     /* Convert Unicode to a string using the default encoding */
 169     if (PyUnicode_Check(str)) {
 170         PyObject *temp = str;
 171         str = PyUnicode_AsEncodedString(str, NULL, NULL);
 172         Py_DECREF(temp);
 173         if (str == NULL)
 174             goto onError;
 175     }
 176     if (!PyString_Check(str)) {
 177         PyErr_Format(PyExc_TypeError,
 178                      "decoder did not return a string object (type=%.400s)",
 179                      str->ob_type->tp_name);
 180         Py_DECREF(str);
 181         goto onError;
 182     }
 183     Py_DECREF(buffer);
 184     return str;
 185
 186  onError:
 187     Py_XDECREF(buffer);
 188     return NULL;
 189 }
 190
 191 PyObject *PyString_Encode(const char *s,
 192                           int size,
 193                           const char *encoding,
 194                           const char *errors)
 195 {
 196     PyObject *v, *str;
 197
 198     str = PyString_FromStringAndSize(s, size);
 199     if (str == NULL)
 200         return NULL;
 201     v = PyString_AsEncodedString(str, encoding, errors);
 202     Py_DECREF(str);
 203     return v;
 204 }
 205
 206 PyObject *PyString_AsEncodedString(PyObject *str,
 207                                    const char *encoding,
 208                                    const char *errors)
 209 {
 210     PyObject *v;
 211
 212     if (!PyString_Check(str)) {
 213         PyErr_BadArgument();
 214         goto onError;
 215     }
 216
 217     if (encoding == NULL)
 218         encoding = PyUnicode_GetDefaultEncoding();
 219
 220     /* Encode via the codec registry */
 221     v = PyCodec_Encode(str, encoding, errors);
 222     if (v == NULL)
 223         goto onError;
 224     /* Convert Unicode to a string using the default encoding */
 225     if (PyUnicode_Check(v)) {
 226         PyObject *temp = v;
 227         v = PyUnicode_AsEncodedString(v, NULL, NULL);
 228         Py_DECREF(temp);
 229         if (v == NULL)
 230             goto onError;
 231     }
 232     if (!PyString_Check(v)) {
 233         PyErr_Format(PyExc_TypeError,
 234                      "encoder did not return a string object (type=%.400s)",
 235                      v->ob_type->tp_name);
 236         Py_DECREF(v);
 237         goto onError;
 238     }
 239     return v;
 240
 241  onError:
 242     return NULL;
 243 }
 244
 245 static void
 246 string_dealloc(PyObject *op)
 247 {
 248         PyObject_DEL(op);
 249 }
 250
 251 int
 252 PyString_Size(register PyObject *op)
 253 {
 254         if (!PyString_Check(op)) {
 255                 PyErr_BadInternalCall();
 256                 return -1;
 257         }
 258         return ((PyStringObject *)op) -> ob_size;
 259 }
 260
 261 /*const*/ char *
 262 PyString_AsString(register PyObject *op)
 263 {
 264         if (!PyString_Check(op)) {
 265                 PyErr_BadInternalCall();
 266                 return NULL;
 267         }
 268         return ((PyStringObject *)op) -> ob_sval;
 269 }
 270
 271 /* Methods */
 272
 273 static int
 274 string_print(PyStringObject *op, FILE *fp, int flags)
 275 {
 276         int i;
 277         char c;
 278         int quote;
 279         /* XXX Ought to check for interrupts when writing long strings */
 280         if (flags & Py_PRINT_RAW) {
 281                 fwrite(op->ob_sval, 1, (int) op->ob_size, fp);
 282                 return 0;
 283         }
 284
 285         /* figure out which quote to use; single is preferred */
 286         quote = '\'';
 287         if (strchr(op->ob_sval, '\'') && !strchr(op->ob_sval, '"'))
 288                 quote = '"';
 289
 290         fputc(quote, fp);
 291         for (i = 0; i < op->ob_size; i++) {
 292                 c = op->ob_sval[i];
 293                 if (c == quote || c == '\\')
 294                         fprintf(fp, "\\%c", c);
 295                 else if (c < ' ' || c >= 0177)
 296                         fprintf(fp, "\\%03o", c & 0377);
 297                 else
 298                         fputc(c, fp);
 299         }
 300         fputc(quote, fp);
 301         return 0;
 302 }
 303
 304 static PyObject *
 305 string_repr(register PyStringObject *op)
 306 {
 307         size_t newsize = 2 + 4 * op->ob_size * sizeof(char);
 308         PyObject *v;
 309         if (newsize > INT_MAX) {
 310                 PyErr_SetString(PyExc_OverflowError,
 311                         "string is too large to make repr");
 312         }
 313         v = PyString_FromStringAndSize((char *)NULL, newsize);
 314         if (v == NULL) {
 315                 return NULL;
 316         }
 317         else {
 318                 register int i;
 319                 register char c;
 320                 register char *p;
 321                 int quote;
 322
 323                 /* figure out which quote to use; single is preferred */
 324                 quote = '\'';
 325                 if (strchr(op->ob_sval, '\'') && !strchr(op->ob_sval, '"'))
 326                         quote = '"';
 327
 328                 p = ((PyStringObject *)v)->ob_sval;
 329                 *p++ = quote;
 330                 for (i = 0; i < op->ob_size; i++) {
 331                         c = op->ob_sval[i];
 332                         if (c == quote || c == '\\')
 333                                 *p++ = '\\', *p++ = c;
 334                         else if (c < ' ' || c >= 0177) {
 335                                 sprintf(p, "\\%03o", c & 0377);
 336                                 while (*p != '\0')
 337                                         p++;
 338                         }
 339                         else
 340                                 *p++ = c;
 341                 }
 342                 *p++ = quote;
 343                 *p = '\0';
 344                 _PyString_Resize(
 345                         &v, (int) (p - ((PyStringObject *)v)->ob_sval));
 346                 return v;
 347         }
 348 }
 349
 350 static int
 351 string_length(PyStringObject *a)
 352 {
 353         return a->ob_size;
 354 }
 355
 356 static PyObject *
 357 string_concat(register PyStringObject *a, register PyObject *bb)
 358 {
 359         register unsigned int size;
 360         register PyStringObject *op;
 361         if (!PyString_Check(bb)) {
 362                 if (PyUnicode_Check(bb))
 363                     return PyUnicode_Concat((PyObject *)a, bb);
 364                 PyErr_Format(PyExc_TypeError,
 365                              "cannot add type \"%.200s\" to string",
 366                              bb->ob_type->tp_name);
 367                 return NULL;
 368         }
 369 #define b ((PyStringObject *)bb)
 370         /* Optimize cases with empty left or right operand */
 371         if (a->ob_size == 0) {
 372                 Py_INCREF(bb);
 373                 return bb;
 374         }
 375         if (b->ob_size == 0) {
 376                 Py_INCREF(a);
 377                 return (PyObject *)a;
 378         }
 379         size = a->ob_size + b->ob_size;
 380         /* PyObject_NewVar is inlined */
 381         op = (PyStringObject *)
 382                 PyObject_MALLOC(sizeof(PyStringObject) + size * sizeof(char));
 383         if (op == NULL)
 384                 return PyErr_NoMemory();
 385         PyObject_INIT_VAR(op, &PyString_Type, size);
 386 #ifdef CACHE_HASH
 387         op->ob_shash = -1;
 388 #endif
 389 #ifdef INTERN_STRINGS
 390         op->ob_sinterned = NULL;
 391 #endif
 392         memcpy(op->ob_sval, a->ob_sval, (int) a->ob_size);
 393         memcpy(op->ob_sval + a->ob_size, b->ob_sval, (int) b->ob_size);
 394         op->ob_sval[size] = '\0';
 395         return (PyObject *) op;
 396 #undef b
 397 }
 398
 399 static PyObject *
 400 string_repeat(register PyStringObject *a, register int n)
 401 {
 402         register int i;
 403         register int size;
 404         register PyStringObject *op;
 405         if (n < 0)
 406                 n = 0;
 407         size = a->ob_size * n;
 408         if (size == a->ob_size) {
 409                 Py_INCREF(a);
 410                 return (PyObject *)a;
 411         }
 412         /* PyObject_NewVar is inlined */
 413         op = (PyStringObject *)
 414                 PyObject_MALLOC(sizeof(PyStringObject) + size * sizeof(char));
 415         if (op == NULL)
 416                 return PyErr_NoMemory();
 417         PyObject_INIT_VAR(op, &PyString_Type, size);
 418 #ifdef CACHE_HASH
 419         op->ob_shash = -1;
 420 #endif
 421 #ifdef INTERN_STRINGS
 422         op->ob_sinterned = NULL;
 423 #endif
 424         for (i = 0; i < size; i += a->ob_size)
 425                 memcpy(op->ob_sval+i, a->ob_sval, (int) a->ob_size);
 426         op->ob_sval[size] = '\0';
 427         return (PyObject *) op;
 428 }
 429
 430 /* String slice a[i:j] consists of characters a[i] ... a[j-1] */
 431
 432 static PyObject *
 433 string_slice(register PyStringObject *a, register int i, register int j)
 434      /* j -- may be negative! */
 435 {
 436         if (i < 0)
 437                 i = 0;
 438         if (j < 0)
 439                 j = 0; /* Avoid signed/unsigned bug in next line */
 440         if (j > a->ob_size)
 441                 j = a->ob_size;
 442         if (i == 0 && j == a->ob_size) { /* It's the same as a */
 443                 Py_INCREF(a);
 444                 return (PyObject *)a;
 445         }
 446         if (j < i)
 447                 j = i;
 448         return PyString_FromStringAndSize(a->ob_sval + i, (int) (j-i));
 449 }
 450
 451 static int
 452 string_contains(PyObject *a, PyObject *el)
 453 {
 454         register char *s, *end;
 455         register char c;
 456         if (PyUnicode_Check(el))
 457                 return PyUnicode_Contains(a, el);
 458         if (!PyString_Check(el) || PyString_Size(el) != 1) {
 459                 PyErr_SetString(PyExc_TypeError,
 460                     "'in <string>' requires character as left operand");
 461                 return -1;
 462         }
 463         c = PyString_AsString(el)[0];
 464         s = PyString_AsString(a);
 465         end = s + PyString_Size(a);
 466         while (s < end) {
 467                 if (c == *s++)
 468                         return 1;
 469         }
 470         return 0;
 471 }
 472
 473 static PyObject *
 474 string_item(PyStringObject *a, register int i)
 475 {
 476         int c;
 477         PyObject *v;
 478         if (i < 0 || i >= a->ob_size) {
 479                 PyErr_SetString(PyExc_IndexError, "string index out of range");
 480                 return NULL;
 481         }
 482         c = a->ob_sval[i] & UCHAR_MAX;
 483         v = (PyObject *) characters[c];
 484 #ifdef COUNT_ALLOCS
 485         if (v != NULL)
 486                 one_strings++;
 487 #endif
 488         if (v == NULL) {
 489                 v = PyString_FromStringAndSize((char *)NULL, 1);
 490                 if (v == NULL)
 491                         return NULL;
 492                 characters[c] = (PyStringObject *) v;
 493                 ((PyStringObject *)v)->ob_sval[0] = c;
 494         }
 495         Py_INCREF(v);
 496         return v;
 497 }
 498
 499 static int
 500 string_compare(PyStringObject *a, PyStringObject *b)
 501 {
 502         int len_a = a->ob_size, len_b = b->ob_size;
 503         int min_len = (len_a < len_b) ? len_a : len_b;
 504         int cmp;
 505         if (min_len > 0) {
 506                 cmp = Py_CHARMASK(*a->ob_sval) - Py_CHARMASK(*b->ob_sval);
 507                 if (cmp == 0)
 508                         cmp = memcmp(a->ob_sval, b->ob_sval, min_len);
 509                 if (cmp != 0)
 510                         return cmp;
 511         }
 512         return (len_a < len_b) ? -1 : (len_a > len_b) ? 1 : 0;
 513 }
 514
 515 static long
 516 string_hash(PyStringObject *a)
 517 {
 518         register int len;
 519         register unsigned char *p;
 520         register long x;
 521
 522 #ifdef CACHE_HASH
 523         if (a->ob_shash != -1)
 524                 return a->ob_shash;
 525 #ifdef INTERN_STRINGS
 526         if (a->ob_sinterned != NULL)
 527                 return (a->ob_shash =
 528                         ((PyStringObject *)(a->ob_sinterned))->ob_shash);
 529 #endif
 530 #endif
 531         len = a->ob_size;
 532         p = (unsigned char *) a->ob_sval;
 533         x = *p << 7;
 534         while (--len >= 0)
 535                 x = (1000003*x) ^ *p++;
 536         x ^= a->ob_size;
 537         if (x == -1)
 538                 x = -2;
 539 #ifdef CACHE_HASH
 540         a->ob_shash = x;
 541 #endif
 542         return x;
 543 }
 544
 545 static int
 546 string_buffer_getreadbuf(PyStringObject *self, int index, const void **ptr)
 547 {
 548         if ( index != 0 ) {
 549                 PyErr_SetString(PyExc_SystemError,
 550                                 "accessing non-existent string segment");
 551                 return -1;
 552         }
 553         *ptr = (void *)self->ob_sval;
 554         return self->ob_size;
 555 }
 556
 557 static int
 558 string_buffer_getwritebuf(PyStringObject *self, int index, const void **ptr)
 559 {
 560         PyErr_SetString(PyExc_TypeError,
 561                         "Cannot use string as modifiable buffer");
 562         return -1;
 563 }
 564
 565 static int
 566 string_buffer_getsegcount(PyStringObject *self, int *lenp)
 567 {
 568         if ( lenp )
 569                 *lenp = self->ob_size;
 570         return 1;
 571 }
 572
 573 static int
 574 string_buffer_getcharbuf(PyStringObject *self, int index, const char **ptr)
 575 {
 576         if ( index != 0 ) {
 577                 PyErr_SetString(PyExc_SystemError,
 578                                 "accessing non-existent string segment");
 579                 return -1;
 580         }
 581         *ptr = self->ob_sval;
 582         return self->ob_size;
 583 }
 584
 585 static PySequenceMethods string_as_sequence = {
 586         (inquiry)string_length, /*sq_length*/
 587         (binaryfunc)string_concat, /*sq_concat*/
 588         (intargfunc)string_repeat, /*sq_repeat*/
 589         (intargfunc)string_item, /*sq_item*/
 590         (intintargfunc)string_slice, /*sq_slice*/
 591         0,              /*sq_ass_item*/
 592         0,              /*sq_ass_slice*/
 593         (objobjproc)string_contains /*sq_contains*/
 594 };
 595
 596 static PyBufferProcs string_as_buffer = {
 597         (getreadbufferproc)string_buffer_getreadbuf,
 598         (getwritebufferproc)string_buffer_getwritebuf,
 599         (getsegcountproc)string_buffer_getsegcount,
 600         (getcharbufferproc)string_buffer_getcharbuf,
 601 };
 602
 603
 604 \f
 605 #define LEFTSTRIP 0
 606 #define RIGHTSTRIP 1
 607 #define BOTHSTRIP 2
 608
 609
 610 static PyObject *
 611 split_whitespace(const char *s, int len, int maxsplit)
 612 {
 613         int i, j, err;
 614         PyObject* item;
 615         PyObject *list = PyList_New(0);
 616
 617         if (list == NULL)
 618                 return NULL;
 619
 620         for (i = j = 0; i < len; ) {
 621                 while (i < len && isspace(Py_CHARMASK(s[i])))
 622                         i++;
 623                 j = i;
 624                 while (i < len && !isspace(Py_CHARMASK(s[i])))
 625                         i++;
 626                 if (j < i) {
 627                         if (maxsplit-- <= 0)
 628                                 break;
 629                         item = PyString_FromStringAndSize(s+j, (int)(i-j));
 630                         if (item == NULL)
 631                                 goto finally;
 632                         err = PyList_Append(list, item);
 633                         Py_DECREF(item);
 634                         if (err < 0)
 635                                 goto finally;
 636                         while (i < len && isspace(Py_CHARMASK(s[i])))
 637                                 i++;
 638                         j = i;
 639                 }
 640         }
 641         if (j < len) {
 642                 item = PyString_FromStringAndSize(s+j, (int)(len - j));
 643                 if (item == NULL)
 644                         goto finally;
 645                 err = PyList_Append(list, item);
 646                 Py_DECREF(item);
 647                 if (err < 0)
 648                         goto finally;
 649         }
 650         return list;
 651   finally:
 652         Py_DECREF(list);
 653         return NULL;
 654 }
 655
 656
 657 static char split__doc__[] =
 658 "S.split([sep [,maxsplit]]) -> list of strings\n\
 659 \n\
 660 Return a list of the words in the string S, using sep as the\n\
 661 delimiter string.  If maxsplit is given, at most maxsplit\n\
 662 splits are done. If sep is not specified, any whitespace string\n\
 663 is a separator.";
 664
 665 static PyObject *
 666 string_split(PyStringObject *self, PyObject *args)
 667 {
 668         int len = PyString_GET_SIZE(self), n, i, j, err;
 669         int maxsplit = -1;
 670         const char *s = PyString_AS_STRING(self), *sub;
 671         PyObject *list, *item, *subobj = Py_None;
 672
 673         if (!PyArg_ParseTuple(args, "|Oi:split", &subobj, &maxsplit))
 674                 return NULL;
 675         if (maxsplit < 0)
 676                 maxsplit = INT_MAX;
 677         if (subobj == Py_None)
 678                 return split_whitespace(s, len, maxsplit);
 679         if (PyString_Check(subobj)) {
 680                 sub = PyString_AS_STRING(subobj);
 681                 n = PyString_GET_SIZE(subobj);
 682         }
 683         else if (PyUnicode_Check(subobj))
 684                 return PyUnicode_Split((PyObject *)self, subobj, maxsplit);
 685         else if (PyObject_AsCharBuffer(subobj, &sub, &n))
 686                 return NULL;
 687         if (n == 0) {
 688                 PyErr_SetString(PyExc_ValueError, "empty separator");
 689                 return NULL;
 690         }
 691
 692         list = PyList_New(0);
 693         if (list == NULL)
 694                 return NULL;
 695
 696         i = j = 0;
 697         while (i+n <= len) {
 698                 if (s[i] == sub[0] && memcmp(s+i, sub, n) == 0) {
 699                         if (maxsplit-- <= 0)
 700                                 break;
 701                         item = PyString_FromStringAndSize(s+j, (int)(i-j));
 702                         if (item == NULL)
 703                                 goto fail;
 704                         err = PyList_Append(list, item);
 705                         Py_DECREF(item);
 706                         if (err < 0)
 707                                 goto fail;
 708                         i = j = i + n;
 709                 }
 710                 else
 711                         i++;
 712         }
 713         item = PyString_FromStringAndSize(s+j, (int)(len-j));
 714         if (item == NULL)
 715                 goto fail;
 716         err = PyList_Append(list, item);
 717         Py_DECREF(item);
 718         if (err < 0)
 719                 goto fail;
 720
 721         return list;
 722
 723  fail:
 724         Py_DECREF(list);
 725         return NULL;
 726 }
 727
 728
 729 static char join__doc__[] =
 730 "S.join(sequence) -> string\n\
 731 \n\
 732 Return a string which is the concatenation of the strings in the\n\
 733 sequence.  The separator between elements is S.";
 734
 735 static PyObject *
 736 string_join(PyStringObject *self, PyObject *args)
 737 {
 738         char *sep = PyString_AS_STRING(self);
 739         int seplen = PyString_GET_SIZE(self);
 740         PyObject *res = NULL;
 741         int reslen = 0;
 742         char *p;
 743         int seqlen = 0;
 744         int sz = 100;
 745         int i, slen, sz_incr;
 746         PyObject *orig, *seq, *item;
 747
 748         if (!PyArg_ParseTuple(args, "O:join", &orig))
 749                 return NULL;
 750
 751         if (!(seq = PySequence_Fast(orig, ""))) {
 752                 if (PyErr_ExceptionMatches(PyExc_TypeError))
 753                         PyErr_Format(PyExc_TypeError,
 754                                      "sequence expected, %.80s found",
 755                                      orig->ob_type->tp_name);
 756                 return NULL;
 757         }
 758         /* From here on out, errors go through finally: for proper
 759          * reference count manipulations.
 760          */
 761         seqlen = PySequence_Size(seq);
 762         if (seqlen == 1) {
 763                 item = PySequence_Fast_GET_ITEM(seq, 0);
 764                 Py_INCREF(item);
 765                 Py_DECREF(seq);
 766                 return item;
 767         }
 768
 769         if (!(res = PyString_FromStringAndSize((char*)NULL, sz)))
 770                 goto finally;
 771
 772         p = PyString_AS_STRING(res);
 773
 774         for (i = 0; i < seqlen; i++) {
 775                 item = PySequence_Fast_GET_ITEM(seq, i);
 776                 if (!PyString_Check(item)){
 777                         if (PyUnicode_Check(item)) {
 778                                 Py_DECREF(res);
 779                                 Py_DECREF(seq);
 780                                 return PyUnicode_Join((PyObject *)self, seq);
 781                         }
 782                         PyErr_Format(PyExc_TypeError,
 783                                      "sequence item %i: expected string,"
 784                                      " %.80s found",
 785                                      i, item->ob_type->tp_name);
 786                         goto finally;
 787                 }
 788                 slen = PyString_GET_SIZE(item);
 789                 while (reslen + slen + seplen >= sz) {
 790                         /* at least double the size of the string */
 791                         sz_incr = slen + seplen > sz ? slen + seplen : sz;
 792                         if (_PyString_Resize(&res, sz + sz_incr)) {
 793                                 goto finally;
 794                         }
 795                         sz += sz_incr;
 796                         p = PyString_AS_STRING(res) + reslen;
 797                 }
 798                 if (i > 0) {
 799                         memcpy(p, sep, seplen);
 800                         p += seplen;
 801                         reslen += seplen;
 802                 }
 803                 memcpy(p, PyString_AS_STRING(item), slen);
 804                 p += slen;
 805                 reslen += slen;
 806         }
 807         if (_PyString_Resize(&res, reslen))
 808                 goto finally;
 809         Py_DECREF(seq);
 810         return res;
 811
 812   finally:
 813         Py_DECREF(seq);
 814         Py_XDECREF(res);
 815         return NULL;
 816 }
 817
 818
 819
 820 static long
 821 string_find_internal(PyStringObject *self, PyObject *args, int dir)
 822 {
 823         const char *s = PyString_AS_STRING(self), *sub;
 824         int len = PyString_GET_SIZE(self);
 825         int n, i = 0, last = INT_MAX;
 826         PyObject *subobj;
 827
 828         if (!PyArg_ParseTuple(args, "O|O&O&:find/rfind/index/rindex",
 829                 &subobj, _PyEval_SliceIndex, &i, _PyEval_SliceIndex, &last))
 830                 return -2;
 831         if (PyString_Check(subobj)) {
 832                 sub = PyString_AS_STRING(subobj);
 833                 n = PyString_GET_SIZE(subobj);
 834         }
 835         else if (PyUnicode_Check(subobj))
 836                 return PyUnicode_Find((PyObject *)self, subobj, i, last, 1);
 837         else if (PyObject_AsCharBuffer(subobj, &sub, &n))
 838                 return -2;
 839
 840         if (last > len)
 841                 last = len;
 842         if (last < 0)
 843                 last += len;
 844         if (last < 0)
 845                 last = 0;
 846         if (i < 0)
 847                 i += len;
 848         if (i < 0)
 849                 i = 0;
 850
 851         if (dir > 0) {
 852                 if (n == 0 && i <= last)
 853                         return (long)i;
 854                 last -= n;
 855                 for (; i <= last; ++i)
 856                         if (s[i] == sub[0] && memcmp(&s[i], sub, n) == 0)
 857                                 return (long)i;
 858         }
 859         else {
 860                 int j;
 861
 862                 if (n == 0 && i <= last)
 863                         return (long)last;
 864                 for (j = last-n; j >= i; --j)
 865                         if (s[j] == sub[0] && memcmp(&s[j], sub, n) == 0)
 866                                 return (long)j;
 867         }
 868
 869         return -1;
 870 }
 871
 872
 873 static char find__doc__[] =
 874 "S.find(sub [,start [,end]]) -> int\n\
 875 \n\
 876 Return the lowest index in S where substring sub is found,\n\
 877 such that sub is contained within s[start,end].  Optional\n\
 878 arguments start and end are interpreted as in slice notation.\n\
 879 \n\
 880 Return -1 on failure.";
 881
 882 static PyObject *
 883 string_find(PyStringObject *self, PyObject *args)
 884 {
 885         long result = string_find_internal(self, args, +1);
 886         if (result == -2)
 887                 return NULL;
 888         return PyInt_FromLong(result);
 889 }
 890
 891
 892 static char index__doc__[] =
 893 "S.index(sub [,start [,end]]) -> int\n\
 894 \n\
 895 Like S.find() but raise ValueError when the substring is not found.";
 896
 897 static PyObject *
 898 string_index(PyStringObject *self, PyObject *args)
 899 {
 900         long result = string_find_internal(self, args, +1);
 901         if (result == -2)
 902                 return NULL;
 903         if (result == -1) {
 904                 PyErr_SetString(PyExc_ValueError,
 905                                 "substring not found in string.index");
 906                 return NULL;
 907         }
 908         return PyInt_FromLong(result);
 909 }
 910
 911
 912 static char rfind__doc__[] =
 913 "S.rfind(sub [,start [,end]]) -> int\n\
 914 \n\
 915 Return the highest index in S where substring sub is found,\n\
 916 such that sub is contained within s[start,end].  Optional\n\
 917 arguments start and end are interpreted as in slice notation.\n\
 918 \n\
 919 Return -1 on failure.";
 920
 921 static PyObject *
 922 string_rfind(PyStringObject *self, PyObject *args)
 923 {
 924         long result = string_find_internal(self, args, -1);
 925         if (result == -2)
 926                 return NULL;
 927         return PyInt_FromLong(result);
 928 }
 929
 930
 931 static char rindex__doc__[] =
 932 "S.rindex(sub [,start [,end]]) -> int\n\
 933 \n\
 934 Like S.rfind() but raise ValueError when the substring is not found.";
 935
 936 static PyObject *
 937 string_rindex(PyStringObject *self, PyObject *args)
 938 {
 939         long result = string_find_internal(self, args, -1);
 940         if (result == -2)
 941                 return NULL;
 942         if (result == -1) {
 943                 PyErr_SetString(PyExc_ValueError,
 944                                 "substring not found in string.rindex");
 945                 return NULL;
 946         }
 947         return PyInt_FromLong(result);
 948 }
 949
 950
 951 static PyObject *
 952 do_strip(PyStringObject *self, PyObject *args, int striptype)
 953 {
 954         char *s = PyString_AS_STRING(self);
 955         int len = PyString_GET_SIZE(self), i, j;
 956
 957         if (!PyArg_ParseTuple(args, ":strip"))
 958                 return NULL;
 959
 960         i = 0;
 961         if (striptype != RIGHTSTRIP) {
 962                 while (i < len && isspace(Py_CHARMASK(s[i]))) {
 963                         i++;
 964                 }
 965         }
 966
 967         j = len;
 968         if (striptype != LEFTSTRIP) {
 969                 do {
 970                         j--;
 971                 } while (j >= i && isspace(Py_CHARMASK(s[j])));
 972                 j++;
 973         }
 974
 975         if (i == 0 && j == len) {
 976                 Py_INCREF(self);
 977                 return (PyObject*)self;
 978         }
 979         else
 980                 return PyString_FromStringAndSize(s+i, j-i);
 981 }
 982
 983
 984 static char strip__doc__[] =
 985 "S.strip() -> string\n\
 986 \n\
 987 Return a copy of the string S with leading and trailing\n\
 988 whitespace removed.";
 989
 990 static PyObject *
 991 string_strip(PyStringObject *self, PyObject *args)
 992 {
 993         return do_strip(self, args, BOTHSTRIP);
 994 }
 995
 996
 997 static char lstrip__doc__[] =
 998 "S.lstrip() -> string\n\
 999 \n\
1000 Return a copy of the string S with leading whitespace removed.";
1001
1002 static PyObject *
1003 string_lstrip(PyStringObject *self, PyObject *args)
1004 {
1005         return do_strip(self, args, LEFTSTRIP);
1006 }
1007
1008
1009 static char rstrip__doc__[] =
1010 "S.rstrip() -> string\n\
1011 \n\
1012 Return a copy of the string S with trailing whitespace removed.";
1013
1014 static PyObject *
1015 string_rstrip(PyStringObject *self, PyObject *args)
1016 {
1017         return do_strip(self, args, RIGHTSTRIP);
1018 }
1019
1020
1021 static char lower__doc__[] =
1022 "S.lower() -> string\n\
1023 \n\
1024 Return a copy of the string S converted to lowercase.";
1025
1026 static PyObject *
1027 string_lower(PyStringObject *self, PyObject *args)
1028 {
1029         char *s = PyString_AS_STRING(self), *s_new;
1030         int i, n = PyString_GET_SIZE(self);
1031         PyObject *new;
1032
1033         if (!PyArg_ParseTuple(args, ":lower"))
1034                 return NULL;
1035         new = PyString_FromStringAndSize(NULL, n);
1036         if (new == NULL)
1037                 return NULL;
1038         s_new = PyString_AsString(new);
1039         for (i = 0; i < n; i++) {
1040                 int c = Py_CHARMASK(*s++);
1041                 if (isupper(c)) {
1042                         *s_new = tolower(c);
1043                 } else
1044                         *s_new = c;
1045                 s_new++;
1046         }
1047         return new;
1048 }
1049
1050
1051 static char upper__doc__[] =
1052 "S.upper() -> string\n\
1053 \n\
1054 Return a copy of the string S converted to uppercase.";
1055
1056 static PyObject *
1057 string_upper(PyStringObject *self, PyObject *args)
1058 {
1059         char *s = PyString_AS_STRING(self), *s_new;
1060         int i, n = PyString_GET_SIZE(self);
1061         PyObject *new;
1062
1063         if (!PyArg_ParseTuple(args, ":upper"))
1064                 return NULL;
1065         new = PyString_FromStringAndSize(NULL, n);
1066         if (new == NULL)
1067                 return NULL;
1068         s_new = PyString_AsString(new);
1069         for (i = 0; i < n; i++) {
1070                 int c = Py_CHARMASK(*s++);
1071                 if (islower(c)) {
1072                         *s_new = toupper(c);
1073                 } else
1074                         *s_new = c;
1075                 s_new++;
1076         }
1077         return new;
1078 }
1079
1080
1081 static char title__doc__[] =
1082 "S.title() -> string\n\
1083 \n\
1084 Return a titlecased version of S, i.e. words start with uppercase\n\
1085 characters, all remaining cased characters have lowercase.";
1086
1087 static PyObject*
1088 string_title(PyUnicodeObject *self, PyObject *args)
1089 {
1090         char *s = PyString_AS_STRING(self), *s_new;
1091         int i, n = PyString_GET_SIZE(self);
1092         int previous_is_cased = 0;
1093         PyObject *new;
1094
1095         if (!PyArg_ParseTuple(args, ":title"))
1096                 return NULL;
1097         new = PyString_FromStringAndSize(NULL, n);
1098         if (new == NULL)
1099                 return NULL;
1100         s_new = PyString_AsString(new);
1101         for (i = 0; i < n; i++) {
1102                 int c = Py_CHARMASK(*s++);
1103                 if (islower(c)) {
1104                         if (!previous_is_cased)
1105                             c = toupper(c);
1106                         previous_is_cased = 1;
1107                 } else if (isupper(c)) {
1108                         if (previous_is_cased)
1109                             c = tolower(c);
1110                         previous_is_cased = 1;
1111                 } else
1112                         previous_is_cased = 0;
1113                 *s_new++ = c;
1114         }
1115         return new;
1116 }
1117
1118 static char capitalize__doc__[] =
1119 "S.capitalize() -> string\n\
1120 \n\
1121 Return a copy of the string S with only its first character\n\
1122 capitalized.";
1123
1124 static PyObject *
1125 string_capitalize(PyStringObject *self, PyObject *args)
1126 {
1127         char *s = PyString_AS_STRING(self), *s_new;
1128         int i, n = PyString_GET_SIZE(self);
1129         PyObject *new;
1130
1131         if (!PyArg_ParseTuple(args, ":capitalize"))
1132                 return NULL;
1133         new = PyString_FromStringAndSize(NULL, n);
1134         if (new == NULL)
1135                 return NULL;
1136         s_new = PyString_AsString(new);
1137         if (0 < n) {
1138                 int c = Py_CHARMASK(*s++);
1139                 if (islower(c))
1140                         *s_new = toupper(c);
1141                 else
1142                         *s_new = c;
1143                 s_new++;
1144         }
1145         for (i = 1; i < n; i++) {
1146                 int c = Py_CHARMASK(*s++);
1147                 if (isupper(c))
1148                         *s_new = tolower(c);
1149                 else
1150                         *s_new = c;
1151                 s_new++;
1152         }
1153         return new;
1154 }
1155
1156
1157 static char count__doc__[] =
1158 "S.count(sub[, start[, end]]) -> int\n\
1159 \n\
1160 Return the number of occurrences of substring sub in string\n\
1161 S[start:end].  Optional arguments start and end are\n\
1162 interpreted as in slice notation.";
1163
1164 static PyObject *
1165 string_count(PyStringObject *self, PyObject *args)
1166 {
1167         const char *s = PyString_AS_STRING(self), *sub;
1168         int len = PyString_GET_SIZE(self), n;
1169         int i = 0, last = INT_MAX;
1170         int m, r;
1171         PyObject *subobj;
1172
1173         if (!PyArg_ParseTuple(args, "O|O&O&:count", &subobj,
1174                 _PyEval_SliceIndex, &i, _PyEval_SliceIndex, &last))
1175                 return NULL;
1176
1177         if (PyString_Check(subobj)) {
1178                 sub = PyString_AS_STRING(subobj);
1179                 n = PyString_GET_SIZE(subobj);
1180         }
1181         else if (PyUnicode_Check(subobj))
1182                 return PyInt_FromLong(
1183                         PyUnicode_Count((PyObject *)self, subobj, i, last));
1184         else if (PyObject_AsCharBuffer(subobj, &sub, &n))
1185                 return NULL;
1186
1187         if (last > len)
1188                 last = len;
1189         if (last < 0)
1190                 last += len;
1191         if (last < 0)
1192                 last = 0;
1193         if (i < 0)
1194                 i += len;
1195         if (i < 0)
1196                 i = 0;
1197         m = last + 1 - n;
1198         if (n == 0)
1199                 return PyInt_FromLong((long) (m-i));
1200
1201         r = 0;
1202         while (i < m) {
1203                 if (!memcmp(s+i, sub, n)) {
1204                         r++;
1205                         i += n;
1206                 } else {
1207                         i++;
1208                 }
1209         }
1210         return PyInt_FromLong((long) r);
1211 }
1212
1213
1214 static char swapcase__doc__[] =
1215 "S.swapcase() -> string\n\
1216 \n\
1217 Return a copy of the string S with uppercase characters\n\
1218 converted to lowercase and vice versa.";
1219
1220 static PyObject *
1221 string_swapcase(PyStringObject *self, PyObject *args)
1222 {
1223         char *s = PyString_AS_STRING(self), *s_new;
1224         int i, n = PyString_GET_SIZE(self);
1225         PyObject *new;
1226
1227         if (!PyArg_ParseTuple(args, ":swapcase"))
1228                 return NULL;
1229         new = PyString_FromStringAndSize(NULL, n);
1230         if (new == NULL)
1231                 return NULL;
1232         s_new = PyString_AsString(new);
1233         for (i = 0; i < n; i++) {
1234                 int c = Py_CHARMASK(*s++);
1235                 if (islower(c)) {
1236                         *s_new = toupper(c);
1237                 }
1238                 else if (isupper(c)) {
1239                         *s_new = tolower(c);
1240                 }
1241                 else
1242                         *s_new = c;
1243                 s_new++;
1244         }
1245         return new;
1246 }
1247
1248
1249 static char translate__doc__[] =
1250 "S.translate(table [,deletechars]) -> string\n\
1251 \n\
1252 Return a copy of the string S, where all characters occurring\n\
1253 in the optional argument deletechars are removed, and the\n\
1254 remaining characters have been mapped through the given\n\
1255 translation table, which must be a string of length 256.";
1256
1257 static PyObject *
1258 string_translate(PyStringObject *self, PyObject *args)
1259 {
1260         register char *input, *output;
1261         register const char *table;
1262         register int i, c, changed = 0;
1263         PyObject *input_obj = (PyObject*)self;
1264         const char *table1, *output_start, *del_table=NULL;
1265         int inlen, tablen, dellen = 0;
1266         PyObject *result;
1267         int trans_table[256];
1268         PyObject *tableobj, *delobj = NULL;
1269
1270         if (!PyArg_ParseTuple(args, "O|O:translate",
1271                               &tableobj, &delobj))
1272                 return NULL;
1273
1274         if (PyString_Check(tableobj)) {
1275                 table1 = PyString_AS_STRING(tableobj);
1276                 tablen = PyString_GET_SIZE(tableobj);
1277         }
1278         else if (PyUnicode_Check(tableobj)) {
1279                 /* Unicode .translate() does not support the deletechars
1280                    parameter; instead a mapping to None will cause characters
1281                    to be deleted. */
1282                 if (delobj != NULL) {
1283                         PyErr_SetString(PyExc_TypeError,
1284                         "deletions are implemented differently for unicode");
1285                         return NULL;
1286                 }
1287                 return PyUnicode_Translate((PyObject *)self, tableobj, NULL);
1288         }
1289         else if (PyObject_AsCharBuffer(tableobj, &table1, &tablen))
1290                 return NULL;
1291
1292         if (delobj != NULL) {
1293                 if (PyString_Check(delobj)) {
1294                         del_table = PyString_AS_STRING(delobj);
1295                         dellen = PyString_GET_SIZE(delobj);
1296                 }
1297                 else if (PyUnicode_Check(delobj)) {
1298                         PyErr_SetString(PyExc_TypeError,
1299                         "deletions are implemented differently for unicode");
1300                         return NULL;
1301                 }
1302                 else if (PyObject_AsCharBuffer(delobj, &del_table, &dellen))
1303                         return NULL;
1304
1305                 if (tablen != 256) {
1306                         PyErr_SetString(PyExc_ValueError,
1307                           "translation table must be 256 characters long");
1308                         return NULL;
1309                 }
1310         }
1311         else {
1312                 del_table = NULL;
1313                 dellen = 0;
1314         }
1315
1316         table = table1;
1317         inlen = PyString_Size(input_obj);
1318         result = PyString_FromStringAndSize((char *)NULL, inlen);
1319         if (result == NULL)
1320                 return NULL;
1321         output_start = output = PyString_AsString(result);
1322         input = PyString_AsString(input_obj);
1323
1324         if (dellen == 0) {
1325                 /* If no deletions are required, use faster code */
1326                 for (i = inlen; --i >= 0; ) {
1327                         c = Py_CHARMASK(*input++);
1328                         if (Py_CHARMASK((*output++ = table[c])) != c)
1329                                 changed = 1;
1330                 }
1331                 if (changed)
1332                         return result;
1333                 Py_DECREF(result);
1334                 Py_INCREF(input_obj);
1335                 return input_obj;
1336         }
1337
1338         for (i = 0; i < 256; i++)
1339                 trans_table[i] = Py_CHARMASK(table[i]);
1340
1341         for (i = 0; i < dellen; i++)
1342                 trans_table[(int) Py_CHARMASK(del_table[i])] = -1;
1343
1344         for (i = inlen; --i >= 0; ) {
1345                 c = Py_CHARMASK(*input++);
1346                 if (trans_table[c] != -1)
1347                         if (Py_CHARMASK(*output++ = (char)trans_table[c]) == c)
1348                                 continue;
1349                 changed = 1;
1350         }
1351         if (!changed) {
1352                 Py_DECREF(result);
1353                 Py_INCREF(input_obj);
1354                 return input_obj;
1355         }
1356         /* Fix the size of the resulting string */
1357         if (inlen > 0 &&_PyString_Resize(&result, output-output_start))
1358                 return NULL;
1359         return result;
1360 }
1361
1362
1363 /* What follows is used for implementing replace().  Perry Stoll. */
1364
1365 /*
1366   mymemfind
1367
1368   strstr replacement for arbitrary blocks of memory.
1369
1370   Locates the first occurrence in the memory pointed to by MEM of the
1371   contents of memory pointed to by PAT.  Returns the index into MEM if
1372   found, or -1 if not found.  If len of PAT is greater than length of
1373   MEM, the function returns -1.
1374 */
1375 static int
1376 mymemfind(const char *mem, int len, const char *pat, int pat_len)
1377 {
1378         register int ii;
1379
1380         /* pattern can not occur in the last pat_len-1 chars */
1381         len -= pat_len;
1382
1383         for (ii = 0; ii <= len; ii++) {
1384                 if (mem[ii] == pat[0] && memcmp(&mem[ii], pat, pat_len) == 0) {
1385                         return ii;
1386                 }
1387         }
1388         return -1;
1389 }
1390
1391 /*
1392   mymemcnt
1393
1394    Return the number of distinct times PAT is found in MEM.
1395    meaning mem=1111 and pat==11 returns 2.
1396            mem=11111 and pat==11 also return 2.
1397  */
1398 static int
1399 mymemcnt(const char *mem, int len, const char *pat, int pat_len)
1400 {
1401         register int offset = 0;
1402         int nfound = 0;
1403
1404         while (len >= 0) {
1405                 offset = mymemfind(mem, len, pat, pat_len);
1406                 if (offset == -1)
1407                         break;
1408                 mem += offset + pat_len;
1409                 len -= offset + pat_len;
1410                 nfound++;
1411         }
1412         return nfound;
1413 }
1414
1415 /*
1416    mymemreplace
1417
1418    Return a string in which all occurrences of PAT in memory STR are
1419    replaced with SUB.
1420
1421    If length of PAT is less than length of STR or there are no occurrences
1422    of PAT in STR, then the original string is returned. Otherwise, a new
1423    string is allocated here and returned.
1424
1425    on return, out_len is:
1426        the length of output string, or
1427        -1 if the input string is returned, or
1428        unchanged if an error occurs (no memory).
1429
1430    return value is:
1431        the new string allocated locally, or
1432        NULL if an error occurred.
1433 */
1434 static char *
1435 mymemreplace(const char *str, int len,          /* input string */
1436              const char *pat, int pat_len,      /* pattern string to find */
1437              const char *sub, int sub_len,      /* substitution string */
1438              int count,                         /* number of replacements */
1439              int *out_len)
1440 {
1441         char *out_s;
1442         char *new_s;
1443         int nfound, offset, new_len;
1444
1445         if (len == 0 || pat_len > len)
1446                 goto return_same;
1447
1448         /* find length of output string */
1449         nfound = mymemcnt(str, len, pat, pat_len);
1450         if (count < 0)
1451                 count = INT_MAX;
1452         else if (nfound > count)
1453                 nfound = count;
1454         if (nfound == 0)
1455                 goto return_same;
1456         new_len = len + nfound*(sub_len - pat_len);
1457
1458         new_s = (char *)PyMem_MALLOC(new_len);
1459         if (new_s == NULL) return NULL;
1460
1461         *out_len = new_len;
1462         out_s = new_s;
1463
1464         while (len > 0) {
1465                 /* find index of next instance of pattern */
1466                 offset = mymemfind(str, len, pat, pat_len);
1467                 /* if not found,  break out of loop */
1468                 if (offset == -1) break;
1469
1470                 /* copy non matching part of input string */
1471                 memcpy(new_s, str, offset); /* copy part of str before pat */
1472                 str += offset + pat_len; /* move str past pattern */
1473                 len -= offset + pat_len; /* reduce length of str remaining */
1474
1475                 /* copy substitute into the output string */
1476                 new_s += offset; /* move new_s to dest for sub string */
1477                 memcpy(new_s, sub, sub_len); /* copy substring into new_s */
1478                 new_s += sub_len; /* offset new_s past sub string */
1479
1480                 /* break when we've done count replacements */
1481                 if (--count == 0) break;
1482         }
1483         /* copy any remaining values into output string */
1484         if (len > 0)
1485                 memcpy(new_s, str, len);
1486         return out_s;
1487
1488   return_same:
1489         *out_len = -1;
1490         return (char*)str;      /* have to cast away constness here */
1491 }
1492
1493
1494 static char replace__doc__[] =
1495 "S.replace (old, new[, maxsplit]) -> string\n\
1496 \n\
1497 Return a copy of string S with all occurrences of substring\n\
1498 old replaced by new.  If the optional argument maxsplit is\n\
1499 given, only the first maxsplit occurrences are replaced.";
1500
1501 static PyObject *
1502 string_replace(PyStringObject *self, PyObject *args)
1503 {
1504         const char *str = PyString_AS_STRING(self), *sub, *repl;
1505         char *new_s;
1506         int len = PyString_GET_SIZE(self), sub_len, repl_len, out_len;
1507         int count = -1;
1508         PyObject *new;
1509         PyObject *subobj, *replobj;
1510
1511         if (!PyArg_ParseTuple(args, "OO|i:replace",
1512                               &subobj, &replobj, &count))
1513                 return NULL;
1514
1515         if (PyString_Check(subobj)) {
1516                 sub = PyString_AS_STRING(subobj);
1517                 sub_len = PyString_GET_SIZE(subobj);
1518         }
1519         else if (PyUnicode_Check(subobj))
1520                 return PyUnicode_Replace((PyObject *)self,
1521                                          subobj, replobj, count);
1522         else if (PyObject_AsCharBuffer(subobj, &sub, &sub_len))
1523                 return NULL;
1524
1525         if (PyString_Check(replobj)) {
1526                 repl = PyString_AS_STRING(replobj);
1527                 repl_len = PyString_GET_SIZE(replobj);
1528         }
1529         else if (PyUnicode_Check(replobj))
1530                 return PyUnicode_Replace((PyObject *)self,
1531                                          subobj, replobj, count);
1532         else if (PyObject_AsCharBuffer(replobj, &repl, &repl_len))
1533                 return NULL;
1534
1535         if (sub_len <= 0) {
1536                 PyErr_SetString(PyExc_ValueError, "empty pattern string");
1537                 return NULL;
1538         }
1539         new_s = mymemreplace(str,len,sub,sub_len,repl,repl_len,count,&out_len);
1540         if (new_s == NULL) {
1541                 PyErr_NoMemory();
1542                 return NULL;
1543         }
1544         if (out_len == -1) {
1545                 /* we're returning another reference to self */
1546                 new = (PyObject*)self;
1547                 Py_INCREF(new);
1548         }
1549         else {
1550                 new = PyString_FromStringAndSize(new_s, out_len);
1551                 PyMem_FREE(new_s);
1552         }
1553         return new;
1554 }
1555
1556
1557 static char startswith__doc__[] =
1558 "S.startswith(prefix[, start[, end]]) -> int\n\
1559 \n\
1560 Return 1 if S starts with the specified prefix, otherwise return 0.  With\n\
1561 optional start, test S beginning at that position.  With optional end, stop\n\
1562 comparing S at that position.";
1563
1564 static PyObject *
1565 string_startswith(PyStringObject *self, PyObject *args)
1566 {
1567         const char* str = PyString_AS_STRING(self);
1568         int len = PyString_GET_SIZE(self);
1569         const char* prefix;
1570         int plen;
1571         int start = 0;
1572         int end = -1;
1573         PyObject *subobj;
1574
1575         if (!PyArg_ParseTuple(args, "O|O&O&:startswith", &subobj,
1576                 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
1577                 return NULL;
1578         if (PyString_Check(subobj)) {
1579                 prefix = PyString_AS_STRING(subobj);
1580                 plen = PyString_GET_SIZE(subobj);
1581         }
1582         else if (PyUnicode_Check(subobj))
1583                 return PyInt_FromLong(
1584                         PyUnicode_Tailmatch((PyObject *)self,
1585                                             subobj, start, end, -1));
1586         else if (PyObject_AsCharBuffer(subobj, &prefix, &plen))
1587                 return NULL;
1588
1589         /* adopt Java semantics for index out of range.  it is legal for
1590          * offset to be == plen, but this only returns true if prefix is
1591          * the empty string.
1592          */
1593         if (start < 0 || start+plen > len)
1594                 return PyInt_FromLong(0);
1595
1596         if (!memcmp(str+start, prefix, plen)) {
1597                 /* did the match end after the specified end? */
1598                 if (end < 0)
1599                         return PyInt_FromLong(1);
1600                 else if (end - start < plen)
1601                         return PyInt_FromLong(0);
1602                 else
1603                         return PyInt_FromLong(1);
1604         }
1605         else return PyInt_FromLong(0);
1606 }
1607
1608
1609 static char endswith__doc__[] =
1610 "S.endswith(suffix[, start[, end]]) -> int\n\
1611 \n\
1612 Return 1 if S ends with the specified suffix, otherwise return 0.  With\n\
1613 optional start, test S beginning at that position.  With optional end, stop\n\
1614 comparing S at that position.";
1615
1616 static PyObject *
1617 string_endswith(PyStringObject *self, PyObject *args)
1618 {
1619         const char* str = PyString_AS_STRING(self);
1620         int len = PyString_GET_SIZE(self);
1621         const char* suffix;
1622         int slen;
1623         int start = 0;
1624         int end = -1;
1625         int lower, upper;
1626         PyObject *subobj;
1627
1628         if (!PyArg_ParseTuple(args, "O|O&O&:endswith", &subobj,
1629                 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
1630                 return NULL;
1631         if (PyString_Check(subobj)) {
1632                 suffix = PyString_AS_STRING(subobj);
1633                 slen = PyString_GET_SIZE(subobj);
1634         }
1635         else if (PyUnicode_Check(subobj))
1636                 return PyInt_FromLong(
1637                         PyUnicode_Tailmatch((PyObject *)self,
1638                                             subobj, start, end, +1));
1639         else if (PyObject_AsCharBuffer(subobj, &suffix, &slen))
1640                 return NULL;
1641
1642         if (start < 0 || start > len || slen > len)
1643                 return PyInt_FromLong(0);
1644
1645         upper = (end >= 0 && end <= len) ? end : len;
1646         lower = (upper - slen) > start ? (upper - slen) : start;
1647
1648         if (upper-lower >= slen && !memcmp(str+lower, suffix, slen))
1649                 return PyInt_FromLong(1);
1650         else return PyInt_FromLong(0);
1651 }
1652
1653
1654 static char encode__doc__[] =
1655 "S.encode([encoding[,errors]]) -> string\n\
1656 \n\
1657 Return an encoded string version of S. Default encoding is the current\n\
1658 default string encoding. errors may be given to set a different error\n\
1659 handling scheme. Default is 'strict' meaning that encoding errors raise\n\
1660 a ValueError. Other possible values are 'ignore' and 'replace'.";
1661
1662 static PyObject *
1663 string_encode(PyStringObject *self, PyObject *args)
1664 {
1665     char *encoding = NULL;
1666     char *errors = NULL;
1667     if (!PyArg_ParseTuple(args, "|ss:encode", &encoding, &errors))
1668         return NULL;
1669     return PyString_AsEncodedString((PyObject *)self, encoding, errors);
1670 }
1671
1672
1673 static char expandtabs__doc__[] =
1674 "S.expandtabs([tabsize]) -> string\n\
1675 \n\
1676 Return a copy of S where all tab characters are expanded using spaces.\n\
1677 If tabsize is not given, a tab size of 8 characters is assumed.";
1678
1679 static PyObject*
1680 string_expandtabs(PyStringObject *self, PyObject *args)
1681 {
1682     const char *e, *p;
1683     char *q;
1684     int i, j;
1685     PyObject *u;
1686     int tabsize = 8;
1687
1688     if (!PyArg_ParseTuple(args, "|i:expandtabs", &tabsize))
1689         return NULL;
1690
1691     /* First pass: determine size of output string */
1692     i = j = 0;
1693     e = PyString_AS_STRING(self) + PyString_GET_SIZE(self);
1694     for (p = PyString_AS_STRING(self); p < e; p++)
1695         if (*p == '\t') {
1696             if (tabsize > 0)
1697                 j += tabsize - (j % tabsize);
1698         }
1699         else {
1700             j++;
1701             if (*p == '\n' || *p == '\r') {
1702                 i += j;
1703                 j = 0;
1704             }
1705         }
1706
1707     /* Second pass: create output string and fill it */
1708     u = PyString_FromStringAndSize(NULL, i + j);
1709     if (!u)
1710         return NULL;
1711
1712     j = 0;
1713     q = PyString_AS_STRING(u);
1714
1715     for (p = PyString_AS_STRING(self); p < e; p++)
1716         if (*p == '\t') {
1717             if (tabsize > 0) {
1718                 i = tabsize - (j % tabsize);
1719                 j += i;
1720                 while (i--)
1721                     *q++ = ' ';
1722             }
1723         }
1724         else {
1725             j++;
1726             *q++ = *p;
1727             if (*p == '\n' || *p == '\r')
1728                 j = 0;
1729         }
1730
1731     return u;
1732 }
1733
1734 static
1735 PyObject *pad(PyStringObject *self,
1736               int left,
1737               int right,
1738               char fill)
1739 {
1740     PyObject *u;
1741
1742     if (left < 0)
1743         left = 0;
1744     if (right < 0)
1745         right = 0;
1746
1747     if (left == 0 && right == 0) {
1748         Py_INCREF(self);
1749         return (PyObject *)self;
1750     }
1751
1752     u = PyString_FromStringAndSize(NULL,
1753                                    left + PyString_GET_SIZE(self) + right);
1754     if (u) {
1755         if (left)
1756             memset(PyString_AS_STRING(u), fill, left);
1757         memcpy(PyString_AS_STRING(u) + left,
1758                PyString_AS_STRING(self),
1759                PyString_GET_SIZE(self));
1760         if (right)
1761             memset(PyString_AS_STRING(u) + left + PyString_GET_SIZE(self),
1762                    fill, right);
1763     }
1764
1765     return u;
1766 }
1767
1768 static char ljust__doc__[] =
1769 "S.ljust(width) -> string\n\
1770 \n\
1771 Return S left justified in a string of length width. Padding is\n\
1772 done using spaces.";
1773
1774 static PyObject *
1775 string_ljust(PyStringObject *self, PyObject *args)
1776 {
1777     int width;
1778     if (!PyArg_ParseTuple(args, "i:ljust", &width))
1779         return NULL;
1780
1781     if (PyString_GET_SIZE(self) >= width) {
1782         Py_INCREF(self);
1783         return (PyObject*) self;
1784     }
1785
1786     return pad(self, 0, width - PyString_GET_SIZE(self), ' ');
1787 }
1788
1789
1790 static char rjust__doc__[] =
1791 "S.rjust(width) -> string\n\
1792 \n\
1793 Return S right justified in a string of length width. Padding is\n\
1794 done using spaces.";
1795
1796 static PyObject *
1797 string_rjust(PyStringObject *self, PyObject *args)
1798 {
1799     int width;
1800     if (!PyArg_ParseTuple(args, "i:rjust", &width))
1801         return NULL;
1802
1803     if (PyString_GET_SIZE(self) >= width) {
1804         Py_INCREF(self);
1805         return (PyObject*) self;
1806     }
1807
1808     return pad(self, width - PyString_GET_SIZE(self), 0, ' ');
1809 }
1810
1811
1812 static char center__doc__[] =
1813 "S.center(width) -> string\n\
1814 \n\
1815 Return S centered in a string of length width. Padding is done\n\
1816 using spaces.";
1817
1818 static PyObject *
1819 string_center(PyStringObject *self, PyObject *args)
1820 {
1821     int marg, left;
1822     int width;
1823
1824     if (!PyArg_ParseTuple(args, "i:center", &width))
1825         return NULL;
1826
1827     if (PyString_GET_SIZE(self) >= width) {
1828         Py_INCREF(self);
1829         return (PyObject*) self;
1830     }
1831
1832     marg = width - PyString_GET_SIZE(self);
1833     left = marg / 2 + (marg & width & 1);
1834
1835     return pad(self, left, marg - left, ' ');
1836 }
1837
1838 #if 0
1839 static char zfill__doc__[] =
1840 "S.zfill(width) -> string\n\
1841 \n\
1842 Pad a numeric string x with zeros on the left, to fill a field\n\
1843 of the specified width. The string x is never truncated.";
1844
1845 static PyObject *
1846 string_zfill(PyStringObject *self, PyObject *args)
1847 {
1848     int fill;
1849     PyObject *u;
1850     char *str;
1851
1852     int width;
1853     if (!PyArg_ParseTuple(args, "i:zfill", &width))
1854         return NULL;
1855
1856     if (PyString_GET_SIZE(self) >= width) {
1857         Py_INCREF(self);
1858         return (PyObject*) self;
1859     }
1860
1861     fill = width - PyString_GET_SIZE(self);
1862
1863     u = pad(self, fill, 0, '0');
1864     if (u == NULL)
1865         return NULL;
1866
1867     str = PyString_AS_STRING(u);
1868     if (str[fill] == '+' || str[fill] == '-') {
1869         /* move sign to beginning of string */
1870         str[0] = str[fill];
1871         str[fill] = '0';
1872     }
1873
1874     return u;
1875 }
1876 #endif
1877
1878 static char isspace__doc__[] =
1879 "S.isspace() -> int\n\
1880 \n\
1881 Return 1 if there are only whitespace characters in S,\n\
1882 0 otherwise.";
1883
1884 static PyObject*
1885 string_isspace(PyStringObject *self, PyObject *args)
1886 {
1887     register const unsigned char *p
1888         = (unsigned char *) PyString_AS_STRING(self);
1889     register const unsigned char *e;
1890
1891     if (!PyArg_NoArgs(args))
1892         return NULL;
1893
1894     /* Shortcut for single character strings */
1895     if (PyString_GET_SIZE(self) == 1 &&
1896         isspace(*p))
1897         return PyInt_FromLong(1);
1898
1899     /* Special case for empty strings */
1900     if (PyString_GET_SIZE(self) == 0)
1901         return PyInt_FromLong(0);
1902
1903     e = p + PyString_GET_SIZE(self);
1904     for (; p < e; p++) {
1905         if (!isspace(*p))
1906             return PyInt_FromLong(0);
1907     }
1908     return PyInt_FromLong(1);
1909 }
1910
1911
1912 static char isalpha__doc__[] =
1913 "S.isalpha() -> int\n\
1914 \n\
1915 Return 1 if  all characters in S are alphabetic\n\
1916 and there is at least one character in S, 0 otherwise.";
1917
1918 static PyObject*
1919 string_isalpha(PyUnicodeObject *self, PyObject *args)
1920 {
1921     register const unsigned char *p
1922         = (unsigned char *) PyString_AS_STRING(self);
1923     register const unsigned char *e;
1924
1925     if (!PyArg_NoArgs(args))
1926         return NULL;
1927
1928     /* Shortcut for single character strings */
1929     if (PyString_GET_SIZE(self) == 1 &&
1930         isalpha(*p))
1931         return PyInt_FromLong(1);
1932
1933     /* Special case for empty strings */
1934     if (PyString_GET_SIZE(self) == 0)
1935         return PyInt_FromLong(0);
1936
1937     e = p + PyString_GET_SIZE(self);
1938     for (; p < e; p++) {
1939         if (!isalpha(*p))
1940             return PyInt_FromLong(0);
1941     }
1942     return PyInt_FromLong(1);
1943 }
1944
1945
1946 static char isalnum__doc__[] =
1947 "S.isalnum() -> int\n\
1948 \n\
1949 Return 1 if  all characters in S are alphanumeric\n\
1950 and there is at least one character in S, 0 otherwise.";
1951
1952 static PyObject*
1953 string_isalnum(PyUnicodeObject *self, PyObject *args)
1954 {
1955     register const unsigned char *p
1956         = (unsigned char *) PyString_AS_STRING(self);
1957     register const unsigned char *e;
1958
1959     if (!PyArg_NoArgs(args))
1960         return NULL;
1961
1962     /* Shortcut for single character strings */
1963     if (PyString_GET_SIZE(self) == 1 &&
1964         isalnum(*p))
1965         return PyInt_FromLong(1);
1966
1967     /* Special case for empty strings */
1968     if (PyString_GET_SIZE(self) == 0)
1969         return PyInt_FromLong(0);
1970
1971     e = p + PyString_GET_SIZE(self);
1972     for (; p < e; p++) {
1973         if (!isalnum(*p))
1974             return PyInt_FromLong(0);
1975     }
1976     return PyInt_FromLong(1);
1977 }
1978
1979
1980 static char isdigit__doc__[] =
1981 "S.isdigit() -> int\n\
1982 \n\
1983 Return 1 if there are only digit characters in S,\n\
1984 0 otherwise.";
1985
1986 static PyObject*
1987 string_isdigit(PyStringObject *self, PyObject *args)
1988 {
1989     register const unsigned char *p
1990         = (unsigned char *) PyString_AS_STRING(self);
1991     register const unsigned char *e;
1992
1993     if (!PyArg_NoArgs(args))
1994         return NULL;
1995
1996     /* Shortcut for single character strings */
1997     if (PyString_GET_SIZE(self) == 1 &&
1998         isdigit(*p))
1999         return PyInt_FromLong(1);
2000
2001     /* Special case for empty strings */
2002     if (PyString_GET_SIZE(self) == 0)
2003         return PyInt_FromLong(0);
2004
2005     e = p + PyString_GET_SIZE(self);
2006     for (; p < e; p++) {
2007         if (!isdigit(*p))
2008             return PyInt_FromLong(0);
2009     }
2010     return PyInt_FromLong(1);
2011 }
2012
2013
2014 static char islower__doc__[] =
2015 "S.islower() -> int\n\
2016 \n\
2017 Return 1 if  all cased characters in S are lowercase and there is\n\
2018 at least one cased character in S, 0 otherwise.";
2019
2020 static PyObject*
2021 string_islower(PyStringObject *self, PyObject *args)
2022 {
2023     register const unsigned char *p
2024         = (unsigned char *) PyString_AS_STRING(self);
2025     register const unsigned char *e;
2026     int cased;
2027
2028     if (!PyArg_NoArgs(args))
2029         return NULL;
2030
2031     /* Shortcut for single character strings */
2032     if (PyString_GET_SIZE(self) == 1)
2033         return PyInt_FromLong(islower(*p) != 0);
2034
2035     /* Special case for empty strings */
2036     if (PyString_GET_SIZE(self) == 0)
2037         return PyInt_FromLong(0);
2038
2039     e = p + PyString_GET_SIZE(self);
2040     cased = 0;
2041     for (; p < e; p++) {
2042         if (isupper(*p))
2043             return PyInt_FromLong(0);
2044         else if (!cased && islower(*p))
2045             cased = 1;
2046     }
2047     return PyInt_FromLong(cased);
2048 }
2049
2050
2051 static char isupper__doc__[] =
2052 "S.isupper() -> int\n\
2053 \n\
2054 Return 1 if  all cased characters in S are uppercase and there is\n\
2055 at least one cased character in S, 0 otherwise.";
2056
2057 static PyObject*
2058 string_isupper(PyStringObject *self, PyObject *args)
2059 {
2060     register const unsigned char *p
2061         = (unsigned char *) PyString_AS_STRING(self);
2062     register const unsigned char *e;
2063     int cased;
2064
2065     if (!PyArg_NoArgs(args))
2066         return NULL;
2067
2068     /* Shortcut for single character strings */
2069     if (PyString_GET_SIZE(self) == 1)
2070         return PyInt_FromLong(isupper(*p) != 0);
2071
2072     /* Special case for empty strings */
2073     if (PyString_GET_SIZE(self) == 0)
2074         return PyInt_FromLong(0);
2075
2076     e = p + PyString_GET_SIZE(self);
2077     cased = 0;
2078     for (; p < e; p++) {
2079         if (islower(*p))
2080             return PyInt_FromLong(0);
2081         else if (!cased && isupper(*p))
2082             cased = 1;
2083     }
2084     return PyInt_FromLong(cased);
2085 }
2086
2087
2088 static char istitle__doc__[] =
2089 "S.istitle() -> int\n\
2090 \n\
2091 Return 1 if S is a titlecased string, i.e. uppercase characters\n\
2092 may only follow uncased characters and lowercase characters only cased\n\
2093 ones. Return 0 otherwise.";
2094
2095 static PyObject*
2096 string_istitle(PyStringObject *self, PyObject *args)
2097 {
2098     register const unsigned char *p
2099         = (unsigned char *) PyString_AS_STRING(self);
2100     register const unsigned char *e;
2101     int cased, previous_is_cased;
2102
2103     if (!PyArg_NoArgs(args))
2104         return NULL;
2105
2106     /* Shortcut for single character strings */
2107     if (PyString_GET_SIZE(self) == 1)
2108         return PyInt_FromLong(isupper(*p) != 0);
2109
2110     /* Special case for empty strings */
2111     if (PyString_GET_SIZE(self) == 0)
2112         return PyInt_FromLong(0);
2113
2114     e = p + PyString_GET_SIZE(self);
2115     cased = 0;
2116     previous_is_cased = 0;
2117     for (; p < e; p++) {
2118         register const unsigned char ch = *p;
2119
2120         if (isupper(ch)) {
2121             if (previous_is_cased)
2122                 return PyInt_FromLong(0);
2123             previous_is_cased = 1;
2124             cased = 1;
2125         }
2126         else if (islower(ch)) {
2127             if (!previous_is_cased)
2128                 return PyInt_FromLong(0);
2129             previous_is_cased = 1;
2130             cased = 1;
2131         }
2132         else
2133             previous_is_cased = 0;
2134     }
2135     return PyInt_FromLong(cased);
2136 }
2137
2138
2139 static char splitlines__doc__[] =
2140 "S.splitlines([keepends]]) -> list of strings\n\
2141 \n\
2142 Return a list of the lines in S, breaking at line boundaries.\n\
2143 Line breaks are not included in the resulting list unless keepends\n\
2144 is given and true.";
2145
2146 #define SPLIT_APPEND(data, left, right)                                 \
2147         str = PyString_FromStringAndSize(data + left, right - left);    \
2148         if (!str)                                                       \
2149             goto onError;                                               \
2150         if (PyList_Append(list, str)) {                                 \
2151             Py_DECREF(str);                                             \
2152             goto onError;                                               \
2153         }                                                               \
2154         else                                                            \
2155             Py_DECREF(str);
2156
2157 static PyObject*
2158 string_splitlines(PyStringObject *self, PyObject *args)
2159 {
2160     register int i;
2161     register int j;
2162     int len;
2163     int keepends = 0;
2164     PyObject *list;
2165     PyObject *str;
2166     char *data;
2167
2168     if (!PyArg_ParseTuple(args, "|i:splitlines", &keepends))
2169         return NULL;
2170
2171     data = PyString_AS_STRING(self);
2172     len = PyString_GET_SIZE(self);
2173
2174     list = PyList_New(0);
2175     if (!list)
2176         goto onError;
2177
2178     for (i = j = 0; i < len; ) {
2179         int eol;
2180
2181         /* Find a line and append it */
2182         while (i < len && data[i] != '\n' && data[i] != '\r')
2183             i++;
2184
2185         /* Skip the line break reading CRLF as one line break */
2186         eol = i;
2187         if (i < len) {
2188             if (data[i] == '\r' && i + 1 < len &&
2189                 data[i+1] == '\n')
2190                 i += 2;
2191             else
2192                 i++;
2193             if (keepends)
2194                 eol = i;
2195         }
2196         SPLIT_APPEND(data, j, eol);
2197         j = i;
2198     }
2199     if (j < len) {
2200         SPLIT_APPEND(data, j, len);
2201     }
2202
2203     return list;
2204
2205  onError:
2206     Py_DECREF(list);
2207     return NULL;
2208 }
2209
2210 #undef SPLIT_APPEND
2211
2212 \f
2213 static PyMethodDef
2214 string_methods[] = {
2215         /* Counterparts of the obsolete stropmodule functions; except
2216            string.maketrans(). */
2217         {"join",       (PyCFunction)string_join,       1, join__doc__},
2218         {"split",       (PyCFunction)string_split,       1, split__doc__},
2219         {"lower",      (PyCFunction)string_lower,      1, lower__doc__},
2220         {"upper",       (PyCFunction)string_upper,       1, upper__doc__},
2221         {"islower", (PyCFunction)string_islower, 0, islower__doc__},
2222         {"isupper", (PyCFunction)string_isupper, 0, isupper__doc__},
2223         {"isspace", (PyCFunction)string_isspace, 0, isspace__doc__},
2224         {"isdigit", (PyCFunction)string_isdigit, 0, isdigit__doc__},
2225         {"istitle", (PyCFunction)string_istitle, 0, istitle__doc__},
2226         {"isalpha", (PyCFunction)string_isalpha, 0, isalpha__doc__},
2227         {"isalnum", (PyCFunction)string_isalnum, 0, isalnum__doc__},
2228         {"capitalize", (PyCFunction)string_capitalize, 1, capitalize__doc__},
2229         {"count",      (PyCFunction)string_count,      1, count__doc__},
2230         {"endswith",   (PyCFunction)string_endswith,   1, endswith__doc__},
2231         {"find",       (PyCFunction)string_find,       1, find__doc__},
2232         {"index",      (PyCFunction)string_index,      1, index__doc__},
2233         {"lstrip",     (PyCFunction)string_lstrip,     1, lstrip__doc__},
2234         {"replace",     (PyCFunction)string_replace,     1, replace__doc__},
2235         {"rfind",       (PyCFunction)string_rfind,       1, rfind__doc__},
2236         {"rindex",      (PyCFunction)string_rindex,      1, rindex__doc__},
2237         {"rstrip",      (PyCFunction)string_rstrip,      1, rstrip__doc__},
2238         {"startswith",  (PyCFunction)string_startswith,  1, startswith__doc__},
2239         {"strip",       (PyCFunction)string_strip,       1, strip__doc__},
2240         {"swapcase",    (PyCFunction)string_swapcase,    1, swapcase__doc__},
2241         {"translate",   (PyCFunction)string_translate,   1, translate__doc__},
2242         {"title",       (PyCFunction)string_title,       1, title__doc__},
2243         {"ljust",       (PyCFunction)string_ljust,       1, ljust__doc__},
2244         {"rjust",       (PyCFunction)string_rjust,       1, rjust__doc__},
2245         {"center",      (PyCFunction)string_center,      1, center__doc__},
2246         {"encode",      (PyCFunction)string_encode,      1, encode__doc__},
2247         {"expandtabs",  (PyCFunction)string_expandtabs,  1, expandtabs__doc__},
2248         {"splitlines",  (PyCFunction)string_splitlines,  1, splitlines__doc__},
2249 #if 0
2250         {"zfill",       (PyCFunction)string_zfill,       1, zfill__doc__},
2251 #endif
2252         {NULL,     NULL}                     /* sentinel */
2253 };
2254
2255 static PyObject *
2256 string_getattr(PyStringObject *s, char *name)
2257 {
2258         return Py_FindMethod(string_methods, (PyObject*)s, name);
2259 }
2260
2261
2262 PyTypeObject PyString_Type = {
2263         PyObject_HEAD_INIT(&PyType_Type)
2264         0,
2265         "string",
2266         sizeof(PyStringObject),
2267         sizeof(char),
2268         (destructor)string_dealloc, /*tp_dealloc*/
2269         (printfunc)string_print, /*tp_print*/
2270         (getattrfunc)string_getattr,            /*tp_getattr*/
2271         0,              /*tp_setattr*/
2272         (cmpfunc)string_compare, /*tp_compare*/
2273         (reprfunc)string_repr, /*tp_repr*/
2274         0,              /*tp_as_number*/
2275         &string_as_sequence,    /*tp_as_sequence*/
2276         0,              /*tp_as_mapping*/
2277         (hashfunc)string_hash, /*tp_hash*/
2278         0,              /*tp_call*/
2279         0,              /*tp_str*/
2280         0,              /*tp_getattro*/
2281         0,              /*tp_setattro*/
2282         &string_as_buffer,      /*tp_as_buffer*/
2283         Py_TPFLAGS_DEFAULT,     /*tp_flags*/
2284         0,              /*tp_doc*/
2285 };
2286
2287 void
2288 PyString_Concat(register PyObject **pv, register PyObject *w)
2289 {
2290         register PyObject *v;
2291         if (*pv == NULL)
2292                 return;
2293         if (w == NULL || !PyString_Check(*pv)) {
2294                 Py_DECREF(*pv);
2295                 *pv = NULL;
2296                 return;
2297         }
2298         v = string_concat((PyStringObject *) *pv, w);
2299         Py_DECREF(*pv);
2300         *pv = v;
2301 }
2302
2303 void
2304 PyString_ConcatAndDel(register PyObject **pv, register PyObject *w)
2305 {
2306         PyString_Concat(pv, w);
2307         Py_XDECREF(w);
2308 }
2309
2310
2311 /* The following function breaks the notion that strings are immutable:
2312    it changes the size of a string.  We get away with this only if there
2313    is only one module referencing the object.  You can also think of it
2314    as creating a new string object and destroying the old one, only
2315    more efficiently.  In any case, don't use this if the string may
2316    already be known to some other part of the code... */
2317
2318 int
2319 _PyString_Resize(PyObject **pv, int newsize)
2320 {
2321         register PyObject *v;
2322         register PyStringObject *sv;
2323         v = *pv;
2324         if (!PyString_Check(v) || v->ob_refcnt != 1) {
2325                 *pv = 0;
2326                 Py_DECREF(v);
2327                 PyErr_BadInternalCall();
2328                 return -1;
2329         }
2330         /* XXX UNREF/NEWREF interface should be more symmetrical */
2331 #ifdef Py_REF_DEBUG
2332         --_Py_RefTotal;
2333 #endif
2334         _Py_ForgetReference(v);
2335         *pv = (PyObject *)
2336                 PyObject_REALLOC((char *)v,
2337                         sizeof(PyStringObject) + newsize * sizeof(char));
2338         if (*pv == NULL) {
2339                 PyObject_DEL(v);
2340                 PyErr_NoMemory();
2341                 return -1;
2342         }
2343         _Py_NewReference(*pv);
2344         sv = (PyStringObject *) *pv;
2345         sv->ob_size = newsize;
2346         sv->ob_sval[newsize] = '\0';
2347         return 0;
2348 }
2349
2350 /* Helpers for formatstring */
2351
2352 static PyObject *
2353 getnextarg(PyObject *args, int arglen, int *p_argidx)
2354 {
2355         int argidx = *p_argidx;
2356         if (argidx < arglen) {
2357                 (*p_argidx)++;
2358                 if (arglen < 0)
2359                         return args;
2360                 else
2361                         return PyTuple_GetItem(args, argidx);
2362         }
2363         PyErr_SetString(PyExc_TypeError,
2364                         "not enough arguments for format string");
2365         return NULL;
2366 }
2367
2368 #define F_LJUST (1<<0)
2369 #define F_SIGN  (1<<1)
2370 #define F_BLANK (1<<2)
2371 #define F_ALT   (1<<3)
2372 #define F_ZERO  (1<<4)
2373
2374 static int
2375 formatfloat(char *buf, size_t buflen, int flags,
2376             int prec, int type, PyObject *v)
2377 {
2378         /* fmt = '%#.' + `prec` + `type`
2379            worst case length = 3 + 10 (len of INT_MAX) + 1 = 14 (use 20)*/
2380         char fmt[20];
2381         double x;
2382         if (!PyArg_Parse(v, "d;float argument required", &x))
2383                 return -1;
2384         if (prec < 0)
2385                 prec = 6;
2386         if (type == 'f' && fabs(x)/1e25 >= 1e25)
2387                 type = 'g';
2388         sprintf(fmt, "%%%s.%d%c", (flags&F_ALT) ? "#" : "", prec, type);
2389         /* worst case length calc to ensure no buffer overrun:
2390              fmt = %#.<prec>g
2391              buf = '-' + [0-9]*prec + '.' + 'e+' + (longest exp
2392                 for any double rep.)
2393              len = 1 + prec + 1 + 2 + 5 = 9 + prec
2394            If prec=0 the effective precision is 1 (the leading digit is
2395            always given), therefore increase by one to 10+prec. */
2396         if (buflen <= (size_t)10 + (size_t)prec) {
2397                 PyErr_SetString(PyExc_OverflowError,
2398                         "formatted float is too long (precision too long?)");
2399                 return -1;
2400         }
2401         sprintf(buf, fmt, x);
2402         return strlen(buf);
2403 }
2404
2405 static int
2406 formatint(char *buf, size_t buflen, int flags,
2407           int prec, int type, PyObject *v)
2408 {
2409         /* fmt = '%#.' + `prec` + 'l' + `type`
2410            worst case length = 3 + 10 (len of INT_MAX) + 1 + 1 = 15 (use 20)*/
2411         char fmt[20];
2412         long x;
2413         if (!PyArg_Parse(v, "l;int argument required", &x))
2414                 return -1;
2415         if (prec < 0)
2416                 prec = 1;
2417         sprintf(fmt, "%%%s.%dl%c", (flags&F_ALT) ? "#" : "", prec, type);
2418         /* buf = '+'/'-'/'0'/'0x' + '[0-9]'*max(prec,len(x in octal))
2419            worst case buf = '0x' + [0-9]*prec, where prec >= 11 */
2420         if (buflen <= 13 || buflen <= (size_t)2+(size_t)prec) {
2421                 PyErr_SetString(PyExc_OverflowError,
2422                         "formatted integer is too long (precision too long?)");
2423                 return -1;
2424         }
2425         sprintf(buf, fmt, x);
2426         return strlen(buf);
2427 }
2428
2429 static int
2430 formatchar(char *buf, size_t buflen, PyObject *v)
2431 {
2432         /* presume that the buffer is at least 2 characters long */
2433         if (PyString_Check(v)) {
2434                 if (!PyArg_Parse(v, "c;%c requires int or char", &buf[0]))
2435                         return -1;
2436         }
2437         else {
2438                 if (!PyArg_Parse(v, "b;%c requires int or char", &buf[0]))
2439                         return -1;
2440         }
2441         buf[1] = '\0';
2442         return 1;
2443 }
2444
2445
2446 /* fmt%(v1,v2,...) is roughly equivalent to sprintf(fmt, v1, v2, ...)
2447
2448    FORMATBUFLEN is the length of the buffer in which the floats, ints, &
2449    chars are formatted. XXX This is a magic number. Each formatting
2450    routine does bounds checking to ensure no overflow, but a better
2451    solution may be to malloc a buffer of appropriate size for each
2452    format. For now, the current solution is sufficient.
2453 */
2454 #define FORMATBUFLEN (size_t)120
2455
2456 PyObject *
2457 PyString_Format(PyObject *format, PyObject *args)
2458 {
2459         char *fmt, *res;
2460         int fmtcnt, rescnt, reslen, arglen, argidx;
2461         int args_owned = 0;
2462         PyObject *result, *orig_args;
2463         PyObject *dict = NULL;
2464         if (format == NULL || !PyString_Check(format) || args == NULL) {
2465                 PyErr_BadInternalCall();
2466                 return NULL;
2467         }
2468         orig_args = args;
2469         fmt = PyString_AsString(format);
2470         fmtcnt = PyString_Size(format);
2471         reslen = rescnt = fmtcnt + 100;
2472         result = PyString_FromStringAndSize((char *)NULL, reslen);
2473         if (result == NULL)
2474                 return NULL;
2475         res = PyString_AsString(result);
2476         if (PyTuple_Check(args)) {
2477                 arglen = PyTuple_Size(args);
2478                 argidx = 0;
2479         }
2480         else {
2481                 arglen = -1;
2482                 argidx = -2;
2483         }
2484         if (args->ob_type->tp_as_mapping)
2485                 dict = args;
2486         while (--fmtcnt >= 0) {
2487                 if (*fmt != '%') {
2488                         if (--rescnt < 0) {
2489                                 rescnt = fmtcnt + 100;
2490                                 reslen += rescnt;
2491                                 if (_PyString_Resize(&result, reslen) < 0)
2492                                         return NULL;
2493                                 res = PyString_AsString(result)
2494                                         + reslen - rescnt;
2495                                 --rescnt;
2496                         }
2497                         *res++ = *fmt++;
2498                 }
2499                 else {
2500                         /* Got a format specifier */
2501                         int flags = 0;
2502                         int width = -1;
2503                         int prec = -1;
2504                         int size = 0;
2505                         int c = '\0';
2506                         int fill;
2507                         PyObject *v = NULL;
2508                         PyObject *temp = NULL;
2509                         char *pbuf;
2510                         int sign;
2511                         int len;
2512                         char formatbuf[FORMATBUFLEN]; /* For format{float,int,char}() */
2513                         char *fmt_start = fmt;
2514
2515                         fmt++;
2516                         if (*fmt == '(') {
2517                                 char *keystart;
2518                                 int keylen;
2519                                 PyObject *key;
2520                                 int pcount = 1;
2521
2522                                 if (dict == NULL) {
2523                                         PyErr_SetString(PyExc_TypeError,
2524                                                  "format requires a mapping");
2525                                         goto error;
2526                                 }
2527                                 ++fmt;
2528                                 --fmtcnt;
2529                                 keystart = fmt;
2530                                 /* Skip over balanced parentheses */
2531                                 while (pcount > 0 && --fmtcnt >= 0) {
2532                                         if (*fmt == ')')
2533                                                 --pcount;
2534                                         else if (*fmt == '(')
2535                                                 ++pcount;
2536                                         fmt++;
2537                                 }
2538                                 keylen = fmt - keystart - 1;
2539                                 if (fmtcnt < 0 || pcount > 0) {
2540                                         PyErr_SetString(PyExc_ValueError,
2541                                                    "incomplete format key");
2542                                         goto error;
2543                                 }
2544                                 key = PyString_FromStringAndSize(keystart,
2545                                                                  keylen);
2546                                 if (key == NULL)
2547                                         goto error;
2548                                 if (args_owned) {
2549                                         Py_DECREF(args);
2550                                         args_owned = 0;
2551                                 }
2552                                 args = PyObject_GetItem(dict, key);
2553                                 Py_DECREF(key);
2554                                 if (args == NULL) {
2555                                         goto error;
2556                                 }
2557                                 args_owned = 1;
2558                                 arglen = -1;
2559                                 argidx = -2;
2560                         }
2561                         while (--fmtcnt >= 0) {
2562                                 switch (c = *fmt++) {
2563                                 case '-': flags |= F_LJUST; continue;
2564                                 case '+': flags |= F_SIGN; continue;
2565                                 case ' ': flags |= F_BLANK; continue;
2566                                 case '#': flags |= F_ALT; continue;
2567                                 case '0': flags |= F_ZERO; continue;
2568                                 }
2569                                 break;
2570                         }
2571                         if (c == '*') {
2572                                 v = getnextarg(args, arglen, &argidx);
2573                                 if (v == NULL)
2574                                         goto error;
2575                                 if (!PyInt_Check(v)) {
2576                                         PyErr_SetString(PyExc_TypeError,
2577                                                         "* wants int");
2578                                         goto error;
2579                                 }
2580                                 width = PyInt_AsLong(v);
2581                                 if (width < 0) {
2582                                         flags |= F_LJUST;
2583                                         width = -width;
2584                                 }
2585                                 if (--fmtcnt >= 0)
2586                                         c = *fmt++;
2587                         }
2588                         else if (c >= 0 && isdigit(c)) {
2589                                 width = c - '0';
2590                                 while (--fmtcnt >= 0) {
2591                                         c = Py_CHARMASK(*fmt++);
2592                                         if (!isdigit(c))
2593                                                 break;
2594                                         if ((width*10) / 10 != width) {
2595                                                 PyErr_SetString(
2596                                                         PyExc_ValueError,
2597                                                         "width too big");
2598                                                 goto error;
2599                                         }
2600                                         width = width*10 + (c - '0');
2601                                 }
2602                         }
2603                         if (c == '.') {
2604                                 prec = 0;
2605                                 if (--fmtcnt >= 0)
2606                                         c = *fmt++;
2607                                 if (c == '*') {
2608                                         v = getnextarg(args, arglen, &argidx);
2609                                         if (v == NULL)
2610                                                 goto error;
2611                                         if (!PyInt_Check(v)) {
2612                                                 PyErr_SetString(
2613                                                         PyExc_TypeError,
2614                                                         "* wants int");
2615                                                 goto error;
2616                                         }
2617                                         prec = PyInt_AsLong(v);
2618                                         if (prec < 0)
2619                                                 prec = 0;
2620                                         if (--fmtcnt >= 0)
2621                                                 c = *fmt++;
2622                                 }
2623                                 else if (c >= 0 && isdigit(c)) {
2624                                         prec = c - '0';
2625                                         while (--fmtcnt >= 0) {
2626                                                 c = Py_CHARMASK(*fmt++);
2627                                                 if (!isdigit(c))
2628                                                         break;
2629                                                 if ((prec*10) / 10 != prec) {
2630                                                         PyErr_SetString(
2631                                                             PyExc_ValueError,
2632                                                             "prec too big");
2633                                                         goto error;
2634                                                 }
2635                                                 prec = prec*10 + (c - '0');
2636                                         }
2637                                 }
2638                         } /* prec */
2639                         if (fmtcnt >= 0) {
2640                                 if (c == 'h' || c == 'l' || c == 'L') {
2641                                         size = c;
2642                                         if (--fmtcnt >= 0)
2643                                                 c = *fmt++;
2644                                 }
2645                         }
2646                         if (fmtcnt < 0) {
2647                                 PyErr_SetString(PyExc_ValueError,
2648                                                 "incomplete format");
2649                                 goto error;
2650                         }
2651                         if (c != '%') {
2652                                 v = getnextarg(args, arglen, &argidx);
2653                                 if (v == NULL)
2654                                         goto error;
2655                         }
2656                         sign = 0;
2657                         fill = ' ';
2658                         switch (c) {
2659                         case '%':
2660                                 pbuf = "%";
2661                                 len = 1;
2662                                 break;
2663                         case 's':
2664                         case 'r':
2665                                 if (PyUnicode_Check(v)) {
2666                                         fmt = fmt_start;
2667                                         goto unicode;
2668                                 }
2669                                 if (c == 's')
2670                                 temp = PyObject_Str(v);
2671                                 else
2672                                         temp = PyObject_Repr(v);
2673                                 if (temp == NULL)
2674                                         goto error;
2675                                 if (!PyString_Check(temp)) {
2676                                         PyErr_SetString(PyExc_TypeError,
2677                                           "%s argument has non-string str()");
2678                                         goto error;
2679                                 }
2680                                 pbuf = PyString_AsString(temp);
2681                                 len = PyString_Size(temp);
2682                                 if (prec >= 0 && len > prec)
2683                                         len = prec;
2684                                 break;
2685                         case 'i':
2686                         case 'd':
2687                         case 'u':
2688                         case 'o':
2689                         case 'x':
2690                         case 'X':
2691                                 if (c == 'i')
2692                                         c = 'd';
2693                                 pbuf = formatbuf;
2694                                 len = formatint(pbuf, sizeof(formatbuf), flags, prec, c, v);
2695                                 if (len < 0)
2696                                         goto error;
2697                                 sign = (c == 'd');
2698                                 if (flags&F_ZERO) {
2699                                         fill = '0';
2700                                         if ((flags&F_ALT) &&
2701                                             (c == 'x' || c == 'X') &&
2702                                             pbuf[0] == '0' && pbuf[1] == c) {
2703                                                 *res++ = *pbuf++;
2704                                                 *res++ = *pbuf++;
2705                                                 rescnt -= 2;
2706                                                 len -= 2;
2707                                                 width -= 2;
2708                                                 if (width < 0)
2709                                                         width = 0;
2710                                         }
2711                                 }
2712                                 break;
2713                         case 'e':
2714                         case 'E':
2715                         case 'f':
2716                         case 'g':
2717                         case 'G':
2718                                 pbuf = formatbuf;
2719                                 len = formatfloat(pbuf, sizeof(formatbuf), flags, prec, c, v);
2720                                 if (len < 0)
2721                                         goto error;
2722                                 sign = 1;
2723                                 if (flags&F_ZERO)
2724                                         fill = '0';
2725                                 break;
2726                         case 'c':
2727                                 pbuf = formatbuf;
2728                                 len = formatchar(pbuf, sizeof(formatbuf), v);
2729                                 if (len < 0)
2730                                         goto error;
2731                                 break;
2732                         default:
2733                                 PyErr_Format(PyExc_ValueError,
2734                                 "unsupported format character '%c' (0x%x)",
2735                                         c, c);
2736                                 goto error;
2737                         }
2738                         if (sign) {
2739                                 if (*pbuf == '-' || *pbuf == '+') {
2740                                         sign = *pbuf++;
2741                                         len--;
2742                                 }
2743                                 else if (flags & F_SIGN)
2744                                         sign = '+';
2745                                 else if (flags & F_BLANK)
2746                                         sign = ' ';
2747                                 else
2748                                         sign = '\0';
2749                         }
2750                         if (width < len)
2751                                 width = len;
2752                         if (rescnt < width + (sign != '\0')) {
2753                                 reslen -= rescnt;
2754                                 rescnt = width + fmtcnt + 100;
2755                                 reslen += rescnt;
2756                                 if (_PyString_Resize(&result, reslen) < 0)
2757                                         return NULL;
2758                                 res = PyString_AsString(result)
2759                                         + reslen - rescnt;
2760                         }
2761                         if (sign) {
2762                                 if (fill != ' ')
2763                                         *res++ = sign;
2764                                 rescnt--;
2765                                 if (width > len)
2766                                         width--;
2767                         }
2768                         if (width > len && !(flags&F_LJUST)) {
2769                                 do {
2770                                         --rescnt;
2771                                         *res++ = fill;
2772                                 } while (--width > len);
2773                         }
2774                         if (sign && fill == ' ')
2775                                 *res++ = sign;
2776                         memcpy(res, pbuf, len);
2777                         res += len;
2778                         rescnt -= len;
2779                         while (--width >= len) {
2780                                 --rescnt;
2781                                 *res++ = ' ';
2782                         }
2783                         if (dict && (argidx < arglen) && c != '%') {
2784                                 PyErr_SetString(PyExc_TypeError,
2785                                            "not all arguments converted");
2786                                 goto error;
2787                         }
2788                         Py_XDECREF(temp);
2789                 } /* '%' */
2790         } /* until end */
2791         if (argidx < arglen && !dict) {
2792                 PyErr_SetString(PyExc_TypeError,
2793                                 "not all arguments converted");
2794                 goto error;
2795         }
2796         if (args_owned) {
2797                 Py_DECREF(args);
2798         }
2799         _PyString_Resize(&result, reslen - rescnt);
2800         return result;
2801
2802  unicode:
2803         if (args_owned) {
2804                 Py_DECREF(args);
2805                 args_owned = 0;
2806         }
2807         /* Fiddle args right (remove the first argidx-1 arguments) */
2808         --argidx;
2809         if (PyTuple_Check(orig_args) && argidx > 0) {
2810                 PyObject *v;
2811                 int n = PyTuple_GET_SIZE(orig_args) - argidx;
2812                 v = PyTuple_New(n);
2813                 if (v == NULL)
2814                         goto error;
2815                 while (--n >= 0) {
2816                         PyObject *w = PyTuple_GET_ITEM(orig_args, n + argidx);
2817                         Py_INCREF(w);
2818                         PyTuple_SET_ITEM(v, n, w);
2819                 }
2820                 args = v;
2821         } else {
2822                 Py_INCREF(orig_args);
2823                 args = orig_args;
2824         }
2825         /* Paste rest of format string to what we have of the result
2826            string; we reuse result for this */
2827         rescnt = res - PyString_AS_STRING(result);
2828         fmtcnt = PyString_GET_SIZE(format) - \
2829                  (fmt - PyString_AS_STRING(format));
2830         if (_PyString_Resize(&result, rescnt + fmtcnt)) {
2831                 Py_DECREF(args);
2832                 goto error;
2833         }
2834         memcpy(PyString_AS_STRING(result) + rescnt, fmt, fmtcnt);
2835         format = result;
2836         /* Let Unicode do its magic */
2837         result = PyUnicode_Format(format, args);
2838         Py_DECREF(format);
2839         Py_DECREF(args);
2840         return result;
2841
2842  error:
2843         Py_DECREF(result);
2844         if (args_owned) {
2845                 Py_DECREF(args);
2846         }
2847         return NULL;
2848 }
2849
2850
2851 #ifdef INTERN_STRINGS
2852
2853 static PyObject *interned;
2854
2855 void
2856 PyString_InternInPlace(PyObject **p)
2857 {
2858         register PyStringObject *s = (PyStringObject *)(*p);
2859         PyObject *t;
2860         if (s == NULL || !PyString_Check(s))
2861                 Py_FatalError("PyString_InternInPlace: strings only please!");
2862         if ((t = s->ob_sinterned) != NULL) {
2863                 if (t == (PyObject *)s)
2864                         return;
2865                 Py_INCREF(t);
2866                 *p = t;
2867                 Py_DECREF(s);
2868                 return;
2869         }
2870         if (interned == NULL) {
2871                 interned = PyDict_New();
2872                 if (interned == NULL)
2873                         return;
2874         }
2875         if ((t = PyDict_GetItem(interned, (PyObject *)s)) != NULL) {
2876                 Py_INCREF(t);
2877                 *p = s->ob_sinterned = t;
2878                 Py_DECREF(s);
2879                 return;
2880         }
2881         t = (PyObject *)s;
2882         if (PyDict_SetItem(interned, t, t) == 0) {
2883                 s->ob_sinterned = t;
2884                 return;
2885         }
2886         PyErr_Clear();
2887 }
2888
2889
2890 PyObject *
2891 PyString_InternFromString(const char *cp)
2892 {
2893         PyObject *s = PyString_FromString(cp);
2894         if (s == NULL)
2895                 return NULL;
2896         PyString_InternInPlace(&s);
2897         return s;
2898 }
2899
2900 #endif
2901
2902 void
2903 PyString_Fini(void)
2904 {
2905         int i;
2906         for (i = 0; i < UCHAR_MAX + 1; i++) {
2907                 Py_XDECREF(characters[i]);
2908                 characters[i] = NULL;
2909         }
2910 #ifndef DONT_SHARE_SHORT_STRINGS
2911         Py_XDECREF(nullstring);
2912         nullstring = NULL;
2913 #endif
2914 #ifdef INTERN_STRINGS
2915         if (interned) {
2916                 int pos, changed;
2917                 PyObject *key, *value;
2918                 do {
2919                         changed = 0;
2920                         pos = 0;
2921                         while (PyDict_Next(interned, &pos, &key, &value)) {
2922                                 if (key->ob_refcnt == 2 && key == value) {
2923                                         PyDict_DelItem(interned, key);
2924                                         changed = 1;
2925                                 }
2926                         }
2927                 } while (changed);
2928         }
2929 #endif
2930 }