Objects/stringobject.c

   1
   2 /* String object implementation */
   3
   4 #include "Python.h"
   5
   6 #include <ctype.h>
   7
   8 #ifdef COUNT_ALLOCS
   9 int null_strings, one_strings;
  10 #endif
  11
  12 #if !defined(HAVE_LIMITS_H) && !defined(UCHAR_MAX)
  13 #define UCHAR_MAX 255
  14 #endif
  15
  16 static PyStringObject *characters[UCHAR_MAX + 1];
  17 #ifndef DONT_SHARE_SHORT_STRINGS
  18 static PyStringObject *nullstring;
  19 #endif
  20
  21 /*
  22    Newsizedstringobject() and newstringobject() try in certain cases
  23    to share string objects.  When the size of the string is zero,
  24    these routines always return a pointer to the same string object;
  25    when the size is one, they return a pointer to an already existing
  26    object if the contents of the string is known.  For
  27    newstringobject() this is always the case, for
  28    newsizedstringobject() this is the case when the first argument in
  29    not NULL.
  30    A common practice to allocate a string and then fill it in or
  31    change it must be done carefully.  It is only allowed to change the
  32    contents of the string if the obect was gotten from
  33    newsizedstringobject() with a NULL first argument, because in the
  34    future these routines may try to do even more sharing of objects.
  35 */
  36 PyObject *
  37 PyString_FromStringAndSize(const char *str, int size)
  38 {
  39         register PyStringObject *op;
  40 #ifndef DONT_SHARE_SHORT_STRINGS
  41         if (size == 0 && (op = nullstring) != NULL) {
  42 #ifdef COUNT_ALLOCS
  43                 null_strings++;
  44 #endif
  45                 Py_INCREF(op);
  46                 return (PyObject *)op;
  47         }
  48         if (size == 1 && str != NULL &&
  49             (op = characters[*str & UCHAR_MAX]) != NULL)
  50         {
  51 #ifdef COUNT_ALLOCS
  52                 one_strings++;
  53 #endif
  54                 Py_INCREF(op);
  55                 return (PyObject *)op;
  56         }
  57 #endif /* DONT_SHARE_SHORT_STRINGS */
  58
  59         /* PyObject_NewVar is inlined */
  60         op = (PyStringObject *)
  61                 PyObject_MALLOC(sizeof(PyStringObject) + size * sizeof(char));
  62         if (op == NULL)
  63                 return PyErr_NoMemory();
  64         PyObject_INIT_VAR(op, &PyString_Type, size);
  65 #ifdef CACHE_HASH
  66         op->ob_shash = -1;
  67 #endif
  68 #ifdef INTERN_STRINGS
  69         op->ob_sinterned = NULL;
  70 #endif
  71         if (str != NULL)
  72                 memcpy(op->ob_sval, str, size);
  73         op->ob_sval[size] = '\0';
  74 #ifndef DONT_SHARE_SHORT_STRINGS
  75         if (size == 0) {
  76                 nullstring = op;
  77                 Py_INCREF(op);
  78         } else if (size == 1 && str != NULL) {
  79                 characters[*str & UCHAR_MAX] = op;
  80                 Py_INCREF(op);
  81         }
  82 #endif
  83         return (PyObject *) op;
  84 }
  85
  86 PyObject *
  87 PyString_FromString(const char *str)
  88 {
  89         register size_t size = strlen(str);
  90         register PyStringObject *op;
  91         if (size > INT_MAX) {
  92                 PyErr_SetString(PyExc_OverflowError,
  93                         "string is too long for a Python string");
  94                 return NULL;
  95         }
  96 #ifndef DONT_SHARE_SHORT_STRINGS
  97         if (size == 0 && (op = nullstring) != NULL) {
  98 #ifdef COUNT_ALLOCS
  99                 null_strings++;
 100 #endif
 101                 Py_INCREF(op);
 102                 return (PyObject *)op;
 103         }
 104         if (size == 1 && (op = characters[*str & UCHAR_MAX]) != NULL) {
 105 #ifdef COUNT_ALLOCS
 106                 one_strings++;
 107 #endif
 108                 Py_INCREF(op);
 109                 return (PyObject *)op;
 110         }
 111 #endif /* DONT_SHARE_SHORT_STRINGS */
 112
 113         /* PyObject_NewVar is inlined */
 114         op = (PyStringObject *)
 115                 PyObject_MALLOC(sizeof(PyStringObject) + size * sizeof(char));
 116         if (op == NULL)
 117                 return PyErr_NoMemory();
 118         PyObject_INIT_VAR(op, &PyString_Type, size);
 119 #ifdef CACHE_HASH
 120         op->ob_shash = -1;
 121 #endif
 122 #ifdef INTERN_STRINGS
 123         op->ob_sinterned = NULL;
 124 #endif
 125         strcpy(op->ob_sval, str);
 126 #ifndef DONT_SHARE_SHORT_STRINGS
 127         if (size == 0) {
 128                 nullstring = op;
 129                 Py_INCREF(op);
 130         } else if (size == 1) {
 131                 characters[*str & UCHAR_MAX] = op;
 132                 Py_INCREF(op);
 133         }
 134 #endif
 135         return (PyObject *) op;
 136 }
 137
 138 PyObject *PyString_Decode(const char *s,
 139                           int size,
 140                           const char *encoding,
 141                           const char *errors)
 142 {
 143     PyObject *buffer = NULL, *str;
 144
 145     if (encoding == NULL)
 146         encoding = PyUnicode_GetDefaultEncoding();
 147
 148     /* Decode via the codec registry */
 149     buffer = PyBuffer_FromMemory((void *)s, size);
 150     if (buffer == NULL)
 151         goto onError;
 152     str = PyCodec_Decode(buffer, encoding, errors);
 153     if (str == NULL)
 154         goto onError;
 155     /* Convert Unicode to a string using the default encoding */
 156     if (PyUnicode_Check(str)) {
 157         PyObject *temp = str;
 158         str = PyUnicode_AsEncodedString(str, NULL, NULL);
 159         Py_DECREF(temp);
 160         if (str == NULL)
 161             goto onError;
 162     }
 163     if (!PyString_Check(str)) {
 164         PyErr_Format(PyExc_TypeError,
 165                      "decoder did not return a string object (type=%.400s)",
 166                      str->ob_type->tp_name);
 167         Py_DECREF(str);
 168         goto onError;
 169     }
 170     Py_DECREF(buffer);
 171     return str;
 172
 173  onError:
 174     Py_XDECREF(buffer);
 175     return NULL;
 176 }
 177
 178 PyObject *PyString_Encode(const char *s,
 179                           int size,
 180                           const char *encoding,
 181                           const char *errors)
 182 {
 183     PyObject *v, *str;
 184
 185     str = PyString_FromStringAndSize(s, size);
 186     if (str == NULL)
 187         return NULL;
 188     v = PyString_AsEncodedString(str, encoding, errors);
 189     Py_DECREF(str);
 190     return v;
 191 }
 192
 193 PyObject *PyString_AsEncodedString(PyObject *str,
 194                                    const char *encoding,
 195                                    const char *errors)
 196 {
 197     PyObject *v;
 198
 199     if (!PyString_Check(str)) {
 200         PyErr_BadArgument();
 201         goto onError;
 202     }
 203
 204     if (encoding == NULL)
 205         encoding = PyUnicode_GetDefaultEncoding();
 206
 207     /* Encode via the codec registry */
 208     v = PyCodec_Encode(str, encoding, errors);
 209     if (v == NULL)
 210         goto onError;
 211     /* Convert Unicode to a string using the default encoding */
 212     if (PyUnicode_Check(v)) {
 213         PyObject *temp = v;
 214         v = PyUnicode_AsEncodedString(v, NULL, NULL);
 215         Py_DECREF(temp);
 216         if (v == NULL)
 217             goto onError;
 218     }
 219     if (!PyString_Check(v)) {
 220         PyErr_Format(PyExc_TypeError,
 221                      "encoder did not return a string object (type=%.400s)",
 222                      v->ob_type->tp_name);
 223         Py_DECREF(v);
 224         goto onError;
 225     }
 226     return v;
 227
 228  onError:
 229     return NULL;
 230 }
 231
 232 static void
 233 string_dealloc(PyObject *op)
 234 {
 235         PyObject_DEL(op);
 236 }
 237
 238 static int
 239 string_getsize(register PyObject *op)
 240 {
 241         char *s;
 242         int len;
 243         if (PyString_AsStringAndSize(op, &s, &len))
 244                 return -1;
 245         return len;
 246 }
 247
 248 static /*const*/ char *
 249 string_getbuffer(register PyObject *op)
 250 {
 251         char *s;
 252         int len;
 253         if (PyString_AsStringAndSize(op, &s, &len))
 254                 return NULL;
 255         return s;
 256 }
 257
 258 int
 259 PyString_Size(register PyObject *op)
 260 {
 261         if (!PyString_Check(op))
 262                 return string_getsize(op);
 263         return ((PyStringObject *)op) -> ob_size;
 264 }
 265
 266 /*const*/ char *
 267 PyString_AsString(register PyObject *op)
 268 {
 269         if (!PyString_Check(op))
 270                 return string_getbuffer(op);
 271         return ((PyStringObject *)op) -> ob_sval;
 272 }
 273
 274 /* Internal API needed by PyString_AsStringAndSize(): */
 275 extern
 276 PyObject *_PyUnicode_AsDefaultEncodedString(PyObject *unicode,
 277                                             const char *errors);
 278
 279 int
 280 PyString_AsStringAndSize(register PyObject *obj,
 281                          register char **s,
 282                          register int *len)
 283 {
 284         if (s == NULL) {
 285                 PyErr_BadInternalCall();
 286                 return -1;
 287         }
 288
 289         if (!PyString_Check(obj)) {
 290                 if (PyUnicode_Check(obj)) {
 291                         obj = _PyUnicode_AsDefaultEncodedString(obj, NULL);
 292                         if (obj == NULL)
 293                                 return -1;
 294                 }
 295                 else {
 296                         PyErr_Format(PyExc_TypeError,
 297                                      "expected string or Unicode object, "
 298                                      "%.200s found", obj->ob_type->tp_name);
 299                         return -1;
 300                 }
 301         }
 302
 303         *s = PyString_AS_STRING(obj);
 304         if (len != NULL)
 305                 *len = PyString_GET_SIZE(obj);
 306         else if ((int)strlen(*s) != PyString_GET_SIZE(obj)) {
 307                 PyErr_SetString(PyExc_TypeError,
 308                                 "expected string without null bytes");
 309                 return -1;
 310         }
 311         return 0;
 312 }
 313
 314 /* Methods */
 315
 316 static int
 317 string_print(PyStringObject *op, FILE *fp, int flags)
 318 {
 319         int i;
 320         char c;
 321         int quote;
 322         /* XXX Ought to check for interrupts when writing long strings */
 323         if (flags & Py_PRINT_RAW) {
 324                 fwrite(op->ob_sval, 1, (int) op->ob_size, fp);
 325                 return 0;
 326         }
 327
 328         /* figure out which quote to use; single is preferred */
 329         quote = '\'';
 330         if (strchr(op->ob_sval, '\'') && !strchr(op->ob_sval, '"'))
 331                 quote = '"';
 332
 333         fputc(quote, fp);
 334         for (i = 0; i < op->ob_size; i++) {
 335                 c = op->ob_sval[i];
 336                 if (c == quote || c == '\\')
 337                         fprintf(fp, "\\%c", c);
 338                 else if (c < ' ' || c >= 0177)
 339                         fprintf(fp, "\\%03o", c & 0377);
 340                 else
 341                         fputc(c, fp);
 342         }
 343         fputc(quote, fp);
 344         return 0;
 345 }
 346
 347 static PyObject *
 348 string_repr(register PyStringObject *op)
 349 {
 350         size_t newsize = 2 + 4 * op->ob_size * sizeof(char);
 351         PyObject *v;
 352         if (newsize > INT_MAX) {
 353                 PyErr_SetString(PyExc_OverflowError,
 354                         "string is too large to make repr");
 355         }
 356         v = PyString_FromStringAndSize((char *)NULL, newsize);
 357         if (v == NULL) {
 358                 return NULL;
 359         }
 360         else {
 361                 register int i;
 362                 register char c;
 363                 register char *p;
 364                 int quote;
 365
 366                 /* figure out which quote to use; single is preferred */
 367                 quote = '\'';
 368                 if (strchr(op->ob_sval, '\'') && !strchr(op->ob_sval, '"'))
 369                         quote = '"';
 370
 371                 p = ((PyStringObject *)v)->ob_sval;
 372                 *p++ = quote;
 373                 for (i = 0; i < op->ob_size; i++) {
 374                         c = op->ob_sval[i];
 375                         if (c == quote || c == '\\')
 376                                 *p++ = '\\', *p++ = c;
 377                         else if (c < ' ' || c >= 0177) {
 378                                 sprintf(p, "\\%03o", c & 0377);
 379                                 while (*p != '\0')
 380                                         p++;
 381                         }
 382                         else
 383                                 *p++ = c;
 384                 }
 385                 *p++ = quote;
 386                 *p = '\0';
 387                 _PyString_Resize(
 388                         &v, (int) (p - ((PyStringObject *)v)->ob_sval));
 389                 return v;
 390         }
 391 }
 392
 393 static int
 394 string_length(PyStringObject *a)
 395 {
 396         return a->ob_size;
 397 }
 398
 399 static PyObject *
 400 string_concat(register PyStringObject *a, register PyObject *bb)
 401 {
 402         register unsigned int size;
 403         register PyStringObject *op;
 404         if (!PyString_Check(bb)) {
 405                 if (PyUnicode_Check(bb))
 406                     return PyUnicode_Concat((PyObject *)a, bb);
 407                 PyErr_Format(PyExc_TypeError,
 408                              "cannot add type \"%.200s\" to string",
 409                              bb->ob_type->tp_name);
 410                 return NULL;
 411         }
 412 #define b ((PyStringObject *)bb)
 413         /* Optimize cases with empty left or right operand */
 414         if (a->ob_size == 0) {
 415                 Py_INCREF(bb);
 416                 return bb;
 417         }
 418         if (b->ob_size == 0) {
 419                 Py_INCREF(a);
 420                 return (PyObject *)a;
 421         }
 422         size = a->ob_size + b->ob_size;
 423         /* PyObject_NewVar is inlined */
 424         op = (PyStringObject *)
 425                 PyObject_MALLOC(sizeof(PyStringObject) + size * sizeof(char));
 426         if (op == NULL)
 427                 return PyErr_NoMemory();
 428         PyObject_INIT_VAR(op, &PyString_Type, size);
 429 #ifdef CACHE_HASH
 430         op->ob_shash = -1;
 431 #endif
 432 #ifdef INTERN_STRINGS
 433         op->ob_sinterned = NULL;
 434 #endif
 435         memcpy(op->ob_sval, a->ob_sval, (int) a->ob_size);
 436         memcpy(op->ob_sval + a->ob_size, b->ob_sval, (int) b->ob_size);
 437         op->ob_sval[size] = '\0';
 438         return (PyObject *) op;
 439 #undef b
 440 }
 441
 442 static PyObject *
 443 string_repeat(register PyStringObject *a, register int n)
 444 {
 445         register int i;
 446         register int size;
 447         register PyStringObject *op;
 448         size_t nbytes;
 449         if (n < 0)
 450                 n = 0;
 451         /* watch out for overflows:  the size can overflow int,
 452          * and the # of bytes needed can overflow size_t
 453          */
 454         size = a->ob_size * n;
 455         if (n && size / n != a->ob_size) {
 456                 PyErr_SetString(PyExc_OverflowError,
 457                         "repeated string is too long");
 458                 return NULL;
 459         }
 460         if (size == a->ob_size) {
 461                 Py_INCREF(a);
 462                 return (PyObject *)a;
 463         }
 464         nbytes = size * sizeof(char);
 465         if (nbytes / sizeof(char) != (size_t)size ||
 466             nbytes + sizeof(PyStringObject) <= nbytes) {
 467                 PyErr_SetString(PyExc_OverflowError,
 468                         "repeated string is too long");
 469                 return NULL;
 470         }
 471         op = (PyStringObject *)
 472                 PyObject_MALLOC(sizeof(PyStringObject) + nbytes);
 473         if (op == NULL)
 474                 return PyErr_NoMemory();
 475         PyObject_INIT_VAR(op, &PyString_Type, size);
 476 #ifdef CACHE_HASH
 477         op->ob_shash = -1;
 478 #endif
 479 #ifdef INTERN_STRINGS
 480         op->ob_sinterned = NULL;
 481 #endif
 482         for (i = 0; i < size; i += a->ob_size)
 483                 memcpy(op->ob_sval+i, a->ob_sval, (int) a->ob_size);
 484         op->ob_sval[size] = '\0';
 485         return (PyObject *) op;
 486 }
 487
 488 /* String slice a[i:j] consists of characters a[i] ... a[j-1] */
 489
 490 static PyObject *
 491 string_slice(register PyStringObject *a, register int i, register int j)
 492      /* j -- may be negative! */
 493 {
 494         if (i < 0)
 495                 i = 0;
 496         if (j < 0)
 497                 j = 0; /* Avoid signed/unsigned bug in next line */
 498         if (j > a->ob_size)
 499                 j = a->ob_size;
 500         if (i == 0 && j == a->ob_size) { /* It's the same as a */
 501                 Py_INCREF(a);
 502                 return (PyObject *)a;
 503         }
 504         if (j < i)
 505                 j = i;
 506         return PyString_FromStringAndSize(a->ob_sval + i, (int) (j-i));
 507 }
 508
 509 static int
 510 string_contains(PyObject *a, PyObject *el)
 511 {
 512         register char *s, *end;
 513         register char c;
 514         if (PyUnicode_Check(el))
 515                 return PyUnicode_Contains(a, el);
 516         if (!PyString_Check(el) || PyString_Size(el) != 1) {
 517                 PyErr_SetString(PyExc_TypeError,
 518                     "'in <string>' requires character as left operand");
 519                 return -1;
 520         }
 521         c = PyString_AsString(el)[0];
 522         s = PyString_AsString(a);
 523         end = s + PyString_Size(a);
 524         while (s < end) {
 525                 if (c == *s++)
 526                         return 1;
 527         }
 528         return 0;
 529 }
 530
 531 static PyObject *
 532 string_item(PyStringObject *a, register int i)
 533 {
 534         int c;
 535         PyObject *v;
 536         if (i < 0 || i >= a->ob_size) {
 537                 PyErr_SetString(PyExc_IndexError, "string index out of range");
 538                 return NULL;
 539         }
 540         c = a->ob_sval[i] & UCHAR_MAX;
 541         v = (PyObject *) characters[c];
 542 #ifdef COUNT_ALLOCS
 543         if (v != NULL)
 544                 one_strings++;
 545 #endif
 546         if (v == NULL) {
 547                 v = PyString_FromStringAndSize((char *)NULL, 1);
 548                 if (v == NULL)
 549                         return NULL;
 550                 characters[c] = (PyStringObject *) v;
 551                 ((PyStringObject *)v)->ob_sval[0] = c;
 552         }
 553         Py_INCREF(v);
 554         return v;
 555 }
 556
 557 static int
 558 string_compare(PyStringObject *a, PyStringObject *b)
 559 {
 560         int len_a = a->ob_size, len_b = b->ob_size;
 561         int min_len = (len_a < len_b) ? len_a : len_b;
 562         int cmp;
 563         if (min_len > 0) {
 564                 cmp = Py_CHARMASK(*a->ob_sval) - Py_CHARMASK(*b->ob_sval);
 565                 if (cmp == 0)
 566                         cmp = memcmp(a->ob_sval, b->ob_sval, min_len);
 567                 if (cmp != 0)
 568                         return cmp;
 569         }
 570         return (len_a < len_b) ? -1 : (len_a > len_b) ? 1 : 0;
 571 }
 572
 573 static long
 574 string_hash(PyStringObject *a)
 575 {
 576         register int len;
 577         register unsigned char *p;
 578         register long x;
 579
 580 #ifdef CACHE_HASH
 581         if (a->ob_shash != -1)
 582                 return a->ob_shash;
 583 #ifdef INTERN_STRINGS
 584         if (a->ob_sinterned != NULL)
 585                 return (a->ob_shash =
 586                         ((PyStringObject *)(a->ob_sinterned))->ob_shash);
 587 #endif
 588 #endif
 589         len = a->ob_size;
 590         p = (unsigned char *) a->ob_sval;
 591         x = *p << 7;
 592         while (--len >= 0)
 593                 x = (1000003*x) ^ *p++;
 594         x ^= a->ob_size;
 595         if (x == -1)
 596                 x = -2;
 597 #ifdef CACHE_HASH
 598         a->ob_shash = x;
 599 #endif
 600         return x;
 601 }
 602
 603 static int
 604 string_buffer_getreadbuf(PyStringObject *self, int index, const void **ptr)
 605 {
 606         if ( index != 0 ) {
 607                 PyErr_SetString(PyExc_SystemError,
 608                                 "accessing non-existent string segment");
 609                 return -1;
 610         }
 611         *ptr = (void *)self->ob_sval;
 612         return self->ob_size;
 613 }
 614
 615 static int
 616 string_buffer_getwritebuf(PyStringObject *self, int index, const void **ptr)
 617 {
 618         PyErr_SetString(PyExc_TypeError,
 619                         "Cannot use string as modifiable buffer");
 620         return -1;
 621 }
 622
 623 static int
 624 string_buffer_getsegcount(PyStringObject *self, int *lenp)
 625 {
 626         if ( lenp )
 627                 *lenp = self->ob_size;
 628         return 1;
 629 }
 630
 631 static int
 632 string_buffer_getcharbuf(PyStringObject *self, int index, const char **ptr)
 633 {
 634         if ( index != 0 ) {
 635                 PyErr_SetString(PyExc_SystemError,
 636                                 "accessing non-existent string segment");
 637                 return -1;
 638         }
 639         *ptr = self->ob_sval;
 640         return self->ob_size;
 641 }
 642
 643 static PySequenceMethods string_as_sequence = {
 644         (inquiry)string_length, /*sq_length*/
 645         (binaryfunc)string_concat, /*sq_concat*/
 646         (intargfunc)string_repeat, /*sq_repeat*/
 647         (intargfunc)string_item, /*sq_item*/
 648         (intintargfunc)string_slice, /*sq_slice*/
 649         0,              /*sq_ass_item*/
 650         0,              /*sq_ass_slice*/
 651         (objobjproc)string_contains /*sq_contains*/
 652 };
 653
 654 static PyBufferProcs string_as_buffer = {
 655         (getreadbufferproc)string_buffer_getreadbuf,
 656         (getwritebufferproc)string_buffer_getwritebuf,
 657         (getsegcountproc)string_buffer_getsegcount,
 658         (getcharbufferproc)string_buffer_getcharbuf,
 659 };
 660
 661
 662 \f
 663 #define LEFTSTRIP 0
 664 #define RIGHTSTRIP 1
 665 #define BOTHSTRIP 2
 666
 667
 668 static PyObject *
 669 split_whitespace(const char *s, int len, int maxsplit)
 670 {
 671         int i, j, err;
 672         PyObject* item;
 673         PyObject *list = PyList_New(0);
 674
 675         if (list == NULL)
 676                 return NULL;
 677
 678         for (i = j = 0; i < len; ) {
 679                 while (i < len && isspace(Py_CHARMASK(s[i])))
 680                         i++;
 681                 j = i;
 682                 while (i < len && !isspace(Py_CHARMASK(s[i])))
 683                         i++;
 684                 if (j < i) {
 685                         if (maxsplit-- <= 0)
 686                                 break;
 687                         item = PyString_FromStringAndSize(s+j, (int)(i-j));
 688                         if (item == NULL)
 689                                 goto finally;
 690                         err = PyList_Append(list, item);
 691                         Py_DECREF(item);
 692                         if (err < 0)
 693                                 goto finally;
 694                         while (i < len && isspace(Py_CHARMASK(s[i])))
 695                                 i++;
 696                         j = i;
 697                 }
 698         }
 699         if (j < len) {
 700                 item = PyString_FromStringAndSize(s+j, (int)(len - j));
 701                 if (item == NULL)
 702                         goto finally;
 703                 err = PyList_Append(list, item);
 704                 Py_DECREF(item);
 705                 if (err < 0)
 706                         goto finally;
 707         }
 708         return list;
 709   finally:
 710         Py_DECREF(list);
 711         return NULL;
 712 }
 713
 714
 715 static char split__doc__[] =
 716 "S.split([sep [,maxsplit]]) -> list of strings\n\
 717 \n\
 718 Return a list of the words in the string S, using sep as the\n\
 719 delimiter string.  If maxsplit is given, at most maxsplit\n\
 720 splits are done. If sep is not specified, any whitespace string\n\
 721 is a separator.";
 722
 723 static PyObject *
 724 string_split(PyStringObject *self, PyObject *args)
 725 {
 726         int len = PyString_GET_SIZE(self), n, i, j, err;
 727         int maxsplit = -1;
 728         const char *s = PyString_AS_STRING(self), *sub;
 729         PyObject *list, *item, *subobj = Py_None;
 730
 731         if (!PyArg_ParseTuple(args, "|Oi:split", &subobj, &maxsplit))
 732                 return NULL;
 733         if (maxsplit < 0)
 734                 maxsplit = INT_MAX;
 735         if (subobj == Py_None)
 736                 return split_whitespace(s, len, maxsplit);
 737         if (PyString_Check(subobj)) {
 738                 sub = PyString_AS_STRING(subobj);
 739                 n = PyString_GET_SIZE(subobj);
 740         }
 741         else if (PyUnicode_Check(subobj))
 742                 return PyUnicode_Split((PyObject *)self, subobj, maxsplit);
 743         else if (PyObject_AsCharBuffer(subobj, &sub, &n))
 744                 return NULL;
 745         if (n == 0) {
 746                 PyErr_SetString(PyExc_ValueError, "empty separator");
 747                 return NULL;
 748         }
 749
 750         list = PyList_New(0);
 751         if (list == NULL)
 752                 return NULL;
 753
 754         i = j = 0;
 755         while (i+n <= len) {
 756                 if (s[i] == sub[0] && memcmp(s+i, sub, n) == 0) {
 757                         if (maxsplit-- <= 0)
 758                                 break;
 759                         item = PyString_FromStringAndSize(s+j, (int)(i-j));
 760                         if (item == NULL)
 761                                 goto fail;
 762                         err = PyList_Append(list, item);
 763                         Py_DECREF(item);
 764                         if (err < 0)
 765                                 goto fail;
 766                         i = j = i + n;
 767                 }
 768                 else
 769                         i++;
 770         }
 771         item = PyString_FromStringAndSize(s+j, (int)(len-j));
 772         if (item == NULL)
 773                 goto fail;
 774         err = PyList_Append(list, item);
 775         Py_DECREF(item);
 776         if (err < 0)
 777                 goto fail;
 778
 779         return list;
 780
 781  fail:
 782         Py_DECREF(list);
 783         return NULL;
 784 }
 785
 786
 787 static char join__doc__[] =
 788 "S.join(sequence) -> string\n\
 789 \n\
 790 Return a string which is the concatenation of the strings in the\n\
 791 sequence.  The separator between elements is S.";
 792
 793 static PyObject *
 794 string_join(PyStringObject *self, PyObject *args)
 795 {
 796         char *sep = PyString_AS_STRING(self);
 797         int seplen = PyString_GET_SIZE(self);
 798         PyObject *res = NULL;
 799         int reslen = 0;
 800         char *p;
 801         int seqlen = 0;
 802         int sz = 100;
 803         int i, slen, sz_incr;
 804         PyObject *orig, *seq, *item;
 805
 806         if (!PyArg_ParseTuple(args, "O:join", &orig))
 807                 return NULL;
 808
 809         if (!(seq = PySequence_Fast(orig, ""))) {
 810                 if (PyErr_ExceptionMatches(PyExc_TypeError))
 811                         PyErr_Format(PyExc_TypeError,
 812                                      "sequence expected, %.80s found",
 813                                      orig->ob_type->tp_name);
 814                 return NULL;
 815         }
 816         /* From here on out, errors go through finally: for proper
 817          * reference count manipulations.
 818          */
 819         seqlen = PySequence_Size(seq);
 820         if (seqlen == 1) {
 821                 item = PySequence_Fast_GET_ITEM(seq, 0);
 822                 Py_INCREF(item);
 823                 Py_DECREF(seq);
 824                 return item;
 825         }
 826
 827         if (!(res = PyString_FromStringAndSize((char*)NULL, sz)))
 828                 goto finally;
 829
 830         p = PyString_AS_STRING(res);
 831
 832         for (i = 0; i < seqlen; i++) {
 833                 item = PySequence_Fast_GET_ITEM(seq, i);
 834                 if (!PyString_Check(item)){
 835                         if (PyUnicode_Check(item)) {
 836                                 Py_DECREF(res);
 837                                 Py_DECREF(seq);
 838                                 return PyUnicode_Join((PyObject *)self, orig);
 839                         }
 840                         PyErr_Format(PyExc_TypeError,
 841                                      "sequence item %i: expected string,"
 842                                      " %.80s found",
 843                                      i, item->ob_type->tp_name);
 844                         goto finally;
 845                 }
 846                 slen = PyString_GET_SIZE(item);
 847                 while (reslen + slen + seplen >= sz) {
 848                         /* at least double the size of the string */
 849                         sz_incr = slen + seplen > sz ? slen + seplen : sz;
 850                         if (_PyString_Resize(&res, sz + sz_incr)) {
 851                                 goto finally;
 852                         }
 853                         sz += sz_incr;
 854                         p = PyString_AS_STRING(res) + reslen;
 855                 }
 856                 if (i > 0) {
 857                         memcpy(p, sep, seplen);
 858                         p += seplen;
 859                         reslen += seplen;
 860                 }
 861                 memcpy(p, PyString_AS_STRING(item), slen);
 862                 p += slen;
 863                 reslen += slen;
 864         }
 865         if (_PyString_Resize(&res, reslen))
 866                 goto finally;
 867         Py_DECREF(seq);
 868         return res;
 869
 870   finally:
 871         Py_DECREF(seq);
 872         Py_XDECREF(res);
 873         return NULL;
 874 }
 875
 876
 877
 878 static long
 879 string_find_internal(PyStringObject *self, PyObject *args, int dir)
 880 {
 881         const char *s = PyString_AS_STRING(self), *sub;
 882         int len = PyString_GET_SIZE(self);
 883         int n, i = 0, last = INT_MAX;
 884         PyObject *subobj;
 885
 886         if (!PyArg_ParseTuple(args, "O|O&O&:find/rfind/index/rindex",
 887                 &subobj, _PyEval_SliceIndex, &i, _PyEval_SliceIndex, &last))
 888                 return -2;
 889         if (PyString_Check(subobj)) {
 890                 sub = PyString_AS_STRING(subobj);
 891                 n = PyString_GET_SIZE(subobj);
 892         }
 893         else if (PyUnicode_Check(subobj))
 894                 return PyUnicode_Find((PyObject *)self, subobj, i, last, 1);
 895         else if (PyObject_AsCharBuffer(subobj, &sub, &n))
 896                 return -2;
 897
 898         if (last > len)
 899                 last = len;
 900         if (last < 0)
 901                 last += len;
 902         if (last < 0)
 903                 last = 0;
 904         if (i < 0)
 905                 i += len;
 906         if (i < 0)
 907                 i = 0;
 908
 909         if (dir > 0) {
 910                 if (n == 0 && i <= last)
 911                         return (long)i;
 912                 last -= n;
 913                 for (; i <= last; ++i)
 914                         if (s[i] == sub[0] && memcmp(&s[i], sub, n) == 0)
 915                                 return (long)i;
 916         }
 917         else {
 918                 int j;
 919
 920                 if (n == 0 && i <= last)
 921                         return (long)last;
 922                 for (j = last-n; j >= i; --j)
 923                         if (s[j] == sub[0] && memcmp(&s[j], sub, n) == 0)
 924                                 return (long)j;
 925         }
 926
 927         return -1;
 928 }
 929
 930
 931 static char find__doc__[] =
 932 "S.find(sub [,start [,end]]) -> int\n\
 933 \n\
 934 Return the lowest index in S where substring sub is found,\n\
 935 such that sub is contained within s[start,end].  Optional\n\
 936 arguments start and end are interpreted as in slice notation.\n\
 937 \n\
 938 Return -1 on failure.";
 939
 940 static PyObject *
 941 string_find(PyStringObject *self, PyObject *args)
 942 {
 943         long result = string_find_internal(self, args, +1);
 944         if (result == -2)
 945                 return NULL;
 946         return PyInt_FromLong(result);
 947 }
 948
 949
 950 static char index__doc__[] =
 951 "S.index(sub [,start [,end]]) -> int\n\
 952 \n\
 953 Like S.find() but raise ValueError when the substring is not found.";
 954
 955 static PyObject *
 956 string_index(PyStringObject *self, PyObject *args)
 957 {
 958         long result = string_find_internal(self, args, +1);
 959         if (result == -2)
 960                 return NULL;
 961         if (result == -1) {
 962                 PyErr_SetString(PyExc_ValueError,
 963                                 "substring not found in string.index");
 964                 return NULL;
 965         }
 966         return PyInt_FromLong(result);
 967 }
 968
 969
 970 static char rfind__doc__[] =
 971 "S.rfind(sub [,start [,end]]) -> int\n\
 972 \n\
 973 Return the highest index in S where substring sub is found,\n\
 974 such that sub is contained within s[start,end].  Optional\n\
 975 arguments start and end are interpreted as in slice notation.\n\
 976 \n\
 977 Return -1 on failure.";
 978
 979 static PyObject *
 980 string_rfind(PyStringObject *self, PyObject *args)
 981 {
 982         long result = string_find_internal(self, args, -1);
 983         if (result == -2)
 984                 return NULL;
 985         return PyInt_FromLong(result);
 986 }
 987
 988
 989 static char rindex__doc__[] =
 990 "S.rindex(sub [,start [,end]]) -> int\n\
 991 \n\
 992 Like S.rfind() but raise ValueError when the substring is not found.";
 993
 994 static PyObject *
 995 string_rindex(PyStringObject *self, PyObject *args)
 996 {
 997         long result = string_find_internal(self, args, -1);
 998         if (result == -2)
 999                 return NULL;
1000         if (result == -1) {
1001                 PyErr_SetString(PyExc_ValueError,
1002                                 "substring not found in string.rindex");
1003                 return NULL;
1004         }
1005         return PyInt_FromLong(result);
1006 }
1007
1008
1009 static PyObject *
1010 do_strip(PyStringObject *self, PyObject *args, int striptype)
1011 {
1012         char *s = PyString_AS_STRING(self);
1013         int len = PyString_GET_SIZE(self), i, j;
1014
1015         if (!PyArg_ParseTuple(args, ":strip"))
1016                 return NULL;
1017
1018         i = 0;
1019         if (striptype != RIGHTSTRIP) {
1020                 while (i < len && isspace(Py_CHARMASK(s[i]))) {
1021                         i++;
1022                 }
1023         }
1024
1025         j = len;
1026         if (striptype != LEFTSTRIP) {
1027                 do {
1028                         j--;
1029                 } while (j >= i && isspace(Py_CHARMASK(s[j])));
1030                 j++;
1031         }
1032
1033         if (i == 0 && j == len) {
1034                 Py_INCREF(self);
1035                 return (PyObject*)self;
1036         }
1037         else
1038                 return PyString_FromStringAndSize(s+i, j-i);
1039 }
1040
1041
1042 static char strip__doc__[] =
1043 "S.strip() -> string\n\
1044 \n\
1045 Return a copy of the string S with leading and trailing\n\
1046 whitespace removed.";
1047
1048 static PyObject *
1049 string_strip(PyStringObject *self, PyObject *args)
1050 {
1051         return do_strip(self, args, BOTHSTRIP);
1052 }
1053
1054
1055 static char lstrip__doc__[] =
1056 "S.lstrip() -> string\n\
1057 \n\
1058 Return a copy of the string S with leading whitespace removed.";
1059
1060 static PyObject *
1061 string_lstrip(PyStringObject *self, PyObject *args)
1062 {
1063         return do_strip(self, args, LEFTSTRIP);
1064 }
1065
1066
1067 static char rstrip__doc__[] =
1068 "S.rstrip() -> string\n\
1069 \n\
1070 Return a copy of the string S with trailing whitespace removed.";
1071
1072 static PyObject *
1073 string_rstrip(PyStringObject *self, PyObject *args)
1074 {
1075         return do_strip(self, args, RIGHTSTRIP);
1076 }
1077
1078
1079 static char lower__doc__[] =
1080 "S.lower() -> string\n\
1081 \n\
1082 Return a copy of the string S converted to lowercase.";
1083
1084 static PyObject *
1085 string_lower(PyStringObject *self, PyObject *args)
1086 {
1087         char *s = PyString_AS_STRING(self), *s_new;
1088         int i, n = PyString_GET_SIZE(self);
1089         PyObject *new;
1090
1091         if (!PyArg_ParseTuple(args, ":lower"))
1092                 return NULL;
1093         new = PyString_FromStringAndSize(NULL, n);
1094         if (new == NULL)
1095                 return NULL;
1096         s_new = PyString_AsString(new);
1097         for (i = 0; i < n; i++) {
1098                 int c = Py_CHARMASK(*s++);
1099                 if (isupper(c)) {
1100                         *s_new = tolower(c);
1101                 } else
1102                         *s_new = c;
1103                 s_new++;
1104         }
1105         return new;
1106 }
1107
1108
1109 static char upper__doc__[] =
1110 "S.upper() -> string\n\
1111 \n\
1112 Return a copy of the string S converted to uppercase.";
1113
1114 static PyObject *
1115 string_upper(PyStringObject *self, PyObject *args)
1116 {
1117         char *s = PyString_AS_STRING(self), *s_new;
1118         int i, n = PyString_GET_SIZE(self);
1119         PyObject *new;
1120
1121         if (!PyArg_ParseTuple(args, ":upper"))
1122                 return NULL;
1123         new = PyString_FromStringAndSize(NULL, n);
1124         if (new == NULL)
1125                 return NULL;
1126         s_new = PyString_AsString(new);
1127         for (i = 0; i < n; i++) {
1128                 int c = Py_CHARMASK(*s++);
1129                 if (islower(c)) {
1130                         *s_new = toupper(c);
1131                 } else
1132                         *s_new = c;
1133                 s_new++;
1134         }
1135         return new;
1136 }
1137
1138
1139 static char title__doc__[] =
1140 "S.title() -> string\n\
1141 \n\
1142 Return a titlecased version of S, i.e. words start with uppercase\n\
1143 characters, all remaining cased characters have lowercase.";
1144
1145 static PyObject*
1146 string_title(PyUnicodeObject *self, PyObject *args)
1147 {
1148         char *s = PyString_AS_STRING(self), *s_new;
1149         int i, n = PyString_GET_SIZE(self);
1150         int previous_is_cased = 0;
1151         PyObject *new;
1152
1153         if (!PyArg_ParseTuple(args, ":title"))
1154                 return NULL;
1155         new = PyString_FromStringAndSize(NULL, n);
1156         if (new == NULL)
1157                 return NULL;
1158         s_new = PyString_AsString(new);
1159         for (i = 0; i < n; i++) {
1160                 int c = Py_CHARMASK(*s++);
1161                 if (islower(c)) {
1162                         if (!previous_is_cased)
1163                             c = toupper(c);
1164                         previous_is_cased = 1;
1165                 } else if (isupper(c)) {
1166                         if (previous_is_cased)
1167                             c = tolower(c);
1168                         previous_is_cased = 1;
1169                 } else
1170                         previous_is_cased = 0;
1171                 *s_new++ = c;
1172         }
1173         return new;
1174 }
1175
1176 static char capitalize__doc__[] =
1177 "S.capitalize() -> string\n\
1178 \n\
1179 Return a copy of the string S with only its first character\n\
1180 capitalized.";
1181
1182 static PyObject *
1183 string_capitalize(PyStringObject *self, PyObject *args)
1184 {
1185         char *s = PyString_AS_STRING(self), *s_new;
1186         int i, n = PyString_GET_SIZE(self);
1187         PyObject *new;
1188
1189         if (!PyArg_ParseTuple(args, ":capitalize"))
1190                 return NULL;
1191         new = PyString_FromStringAndSize(NULL, n);
1192         if (new == NULL)
1193                 return NULL;
1194         s_new = PyString_AsString(new);
1195         if (0 < n) {
1196                 int c = Py_CHARMASK(*s++);
1197                 if (islower(c))
1198                         *s_new = toupper(c);
1199                 else
1200                         *s_new = c;
1201                 s_new++;
1202         }
1203         for (i = 1; i < n; i++) {
1204                 int c = Py_CHARMASK(*s++);
1205                 if (isupper(c))
1206                         *s_new = tolower(c);
1207                 else
1208                         *s_new = c;
1209                 s_new++;
1210         }
1211         return new;
1212 }
1213
1214
1215 static char count__doc__[] =
1216 "S.count(sub[, start[, end]]) -> int\n\
1217 \n\
1218 Return the number of occurrences of substring sub in string\n\
1219 S[start:end].  Optional arguments start and end are\n\
1220 interpreted as in slice notation.";
1221
1222 static PyObject *
1223 string_count(PyStringObject *self, PyObject *args)
1224 {
1225         const char *s = PyString_AS_STRING(self), *sub;
1226         int len = PyString_GET_SIZE(self), n;
1227         int i = 0, last = INT_MAX;
1228         int m, r;
1229         PyObject *subobj;
1230
1231         if (!PyArg_ParseTuple(args, "O|O&O&:count", &subobj,
1232                 _PyEval_SliceIndex, &i, _PyEval_SliceIndex, &last))
1233                 return NULL;
1234
1235         if (PyString_Check(subobj)) {
1236                 sub = PyString_AS_STRING(subobj);
1237                 n = PyString_GET_SIZE(subobj);
1238         }
1239         else if (PyUnicode_Check(subobj)) {
1240                 int count;
1241                 count = PyUnicode_Count((PyObject *)self, subobj, i, last);
1242                 if (count == -1)
1243                         return NULL;
1244                 else
1245                         return PyInt_FromLong((long) count);
1246         }
1247         else if (PyObject_AsCharBuffer(subobj, &sub, &n))
1248                 return NULL;
1249
1250         if (last > len)
1251                 last = len;
1252         if (last < 0)
1253                 last += len;
1254         if (last < 0)
1255                 last = 0;
1256         if (i < 0)
1257                 i += len;
1258         if (i < 0)
1259                 i = 0;
1260         m = last + 1 - n;
1261         if (n == 0)
1262                 return PyInt_FromLong((long) (m-i));
1263
1264         r = 0;
1265         while (i < m) {
1266                 if (!memcmp(s+i, sub, n)) {
1267                         r++;
1268                         i += n;
1269                 } else {
1270                         i++;
1271                 }
1272         }
1273         return PyInt_FromLong((long) r);
1274 }
1275
1276
1277 static char swapcase__doc__[] =
1278 "S.swapcase() -> string\n\
1279 \n\
1280 Return a copy of the string S with uppercase characters\n\
1281 converted to lowercase and vice versa.";
1282
1283 static PyObject *
1284 string_swapcase(PyStringObject *self, PyObject *args)
1285 {
1286         char *s = PyString_AS_STRING(self), *s_new;
1287         int i, n = PyString_GET_SIZE(self);
1288         PyObject *new;
1289
1290         if (!PyArg_ParseTuple(args, ":swapcase"))
1291                 return NULL;
1292         new = PyString_FromStringAndSize(NULL, n);
1293         if (new == NULL)
1294                 return NULL;
1295         s_new = PyString_AsString(new);
1296         for (i = 0; i < n; i++) {
1297                 int c = Py_CHARMASK(*s++);
1298                 if (islower(c)) {
1299                         *s_new = toupper(c);
1300                 }
1301                 else if (isupper(c)) {
1302                         *s_new = tolower(c);
1303                 }
1304                 else
1305                         *s_new = c;
1306                 s_new++;
1307         }
1308         return new;
1309 }
1310
1311
1312 static char translate__doc__[] =
1313 "S.translate(table [,deletechars]) -> string\n\
1314 \n\
1315 Return a copy of the string S, where all characters occurring\n\
1316 in the optional argument deletechars are removed, and the\n\
1317 remaining characters have been mapped through the given\n\
1318 translation table, which must be a string of length 256.";
1319
1320 static PyObject *
1321 string_translate(PyStringObject *self, PyObject *args)
1322 {
1323         register char *input, *output;
1324         register const char *table;
1325         register int i, c, changed = 0;
1326         PyObject *input_obj = (PyObject*)self;
1327         const char *table1, *output_start, *del_table=NULL;
1328         int inlen, tablen, dellen = 0;
1329         PyObject *result;
1330         int trans_table[256];
1331         PyObject *tableobj, *delobj = NULL;
1332
1333         if (!PyArg_ParseTuple(args, "O|O:translate",
1334                               &tableobj, &delobj))
1335                 return NULL;
1336
1337         if (PyString_Check(tableobj)) {
1338                 table1 = PyString_AS_STRING(tableobj);
1339                 tablen = PyString_GET_SIZE(tableobj);
1340         }
1341         else if (PyUnicode_Check(tableobj)) {
1342                 /* Unicode .translate() does not support the deletechars
1343                    parameter; instead a mapping to None will cause characters
1344                    to be deleted. */
1345                 if (delobj != NULL) {
1346                         PyErr_SetString(PyExc_TypeError,
1347                         "deletions are implemented differently for unicode");
1348                         return NULL;
1349                 }
1350                 return PyUnicode_Translate((PyObject *)self, tableobj, NULL);
1351         }
1352         else if (PyObject_AsCharBuffer(tableobj, &table1, &tablen))
1353                 return NULL;
1354
1355         if (delobj != NULL) {
1356                 if (PyString_Check(delobj)) {
1357                         del_table = PyString_AS_STRING(delobj);
1358                         dellen = PyString_GET_SIZE(delobj);
1359                 }
1360                 else if (PyUnicode_Check(delobj)) {
1361                         PyErr_SetString(PyExc_TypeError,
1362                         "deletions are implemented differently for unicode");
1363                         return NULL;
1364                 }
1365                 else if (PyObject_AsCharBuffer(delobj, &del_table, &dellen))
1366                         return NULL;
1367
1368                 if (tablen != 256) {
1369                         PyErr_SetString(PyExc_ValueError,
1370                           "translation table must be 256 characters long");
1371                         return NULL;
1372                 }
1373         }
1374         else {
1375                 del_table = NULL;
1376                 dellen = 0;
1377         }
1378
1379         table = table1;
1380         inlen = PyString_Size(input_obj);
1381         result = PyString_FromStringAndSize((char *)NULL, inlen);
1382         if (result == NULL)
1383                 return NULL;
1384         output_start = output = PyString_AsString(result);
1385         input = PyString_AsString(input_obj);
1386
1387         if (dellen == 0) {
1388                 /* If no deletions are required, use faster code */
1389                 for (i = inlen; --i >= 0; ) {
1390                         c = Py_CHARMASK(*input++);
1391                         if (Py_CHARMASK((*output++ = table[c])) != c)
1392                                 changed = 1;
1393                 }
1394                 if (changed)
1395                         return result;
1396                 Py_DECREF(result);
1397                 Py_INCREF(input_obj);
1398                 return input_obj;
1399         }
1400
1401         for (i = 0; i < 256; i++)
1402                 trans_table[i] = Py_CHARMASK(table[i]);
1403
1404         for (i = 0; i < dellen; i++)
1405                 trans_table[(int) Py_CHARMASK(del_table[i])] = -1;
1406
1407         for (i = inlen; --i >= 0; ) {
1408                 c = Py_CHARMASK(*input++);
1409                 if (trans_table[c] != -1)
1410                         if (Py_CHARMASK(*output++ = (char)trans_table[c]) == c)
1411                                 continue;
1412                 changed = 1;
1413         }
1414         if (!changed) {
1415                 Py_DECREF(result);
1416                 Py_INCREF(input_obj);
1417                 return input_obj;
1418         }
1419         /* Fix the size of the resulting string */
1420         if (inlen > 0 &&_PyString_Resize(&result, output-output_start))
1421                 return NULL;
1422         return result;
1423 }
1424
1425
1426 /* What follows is used for implementing replace().  Perry Stoll. */
1427
1428 /*
1429   mymemfind
1430
1431   strstr replacement for arbitrary blocks of memory.
1432
1433   Locates the first occurrence in the memory pointed to by MEM of the
1434   contents of memory pointed to by PAT.  Returns the index into MEM if
1435   found, or -1 if not found.  If len of PAT is greater than length of
1436   MEM, the function returns -1.
1437 */
1438 static int
1439 mymemfind(const char *mem, int len, const char *pat, int pat_len)
1440 {
1441         register int ii;
1442
1443         /* pattern can not occur in the last pat_len-1 chars */
1444         len -= pat_len;
1445
1446         for (ii = 0; ii <= len; ii++) {
1447                 if (mem[ii] == pat[0] && memcmp(&mem[ii], pat, pat_len) == 0) {
1448                         return ii;
1449                 }
1450         }
1451         return -1;
1452 }
1453
1454 /*
1455   mymemcnt
1456
1457    Return the number of distinct times PAT is found in MEM.
1458    meaning mem=1111 and pat==11 returns 2.
1459            mem=11111 and pat==11 also return 2.
1460  */
1461 static int
1462 mymemcnt(const char *mem, int len, const char *pat, int pat_len)
1463 {
1464         register int offset = 0;
1465         int nfound = 0;
1466
1467         while (len >= 0) {
1468                 offset = mymemfind(mem, len, pat, pat_len);
1469                 if (offset == -1)
1470                         break;
1471                 mem += offset + pat_len;
1472                 len -= offset + pat_len;
1473                 nfound++;
1474         }
1475         return nfound;
1476 }
1477
1478 /*
1479    mymemreplace
1480
1481    Return a string in which all occurrences of PAT in memory STR are
1482    replaced with SUB.
1483
1484    If length of PAT is less than length of STR or there are no occurrences
1485    of PAT in STR, then the original string is returned. Otherwise, a new
1486    string is allocated here and returned.
1487
1488    on return, out_len is:
1489        the length of output string, or
1490        -1 if the input string is returned, or
1491        unchanged if an error occurs (no memory).
1492
1493    return value is:
1494        the new string allocated locally, or
1495        NULL if an error occurred.
1496 */
1497 static char *
1498 mymemreplace(const char *str, int len,          /* input string */
1499              const char *pat, int pat_len,      /* pattern string to find */
1500              const char *sub, int sub_len,      /* substitution string */
1501              int count,                         /* number of replacements */
1502              int *out_len)
1503 {
1504         char *out_s;
1505         char *new_s;
1506         int nfound, offset, new_len;
1507
1508         if (len == 0 || pat_len > len)
1509                 goto return_same;
1510
1511         /* find length of output string */
1512         nfound = mymemcnt(str, len, pat, pat_len);
1513         if (count < 0)
1514                 count = INT_MAX;
1515         else if (nfound > count)
1516                 nfound = count;
1517         if (nfound == 0)
1518                 goto return_same;
1519         new_len = len + nfound*(sub_len - pat_len);
1520
1521         new_s = (char *)PyMem_MALLOC(new_len);
1522         if (new_s == NULL) return NULL;
1523
1524         *out_len = new_len;
1525         out_s = new_s;
1526
1527         while (len > 0) {
1528                 /* find index of next instance of pattern */
1529                 offset = mymemfind(str, len, pat, pat_len);
1530                 /* if not found,  break out of loop */
1531                 if (offset == -1) break;
1532
1533                 /* copy non matching part of input string */
1534                 memcpy(new_s, str, offset); /* copy part of str before pat */
1535                 str += offset + pat_len; /* move str past pattern */
1536                 len -= offset + pat_len; /* reduce length of str remaining */
1537
1538                 /* copy substitute into the output string */
1539                 new_s += offset; /* move new_s to dest for sub string */
1540                 memcpy(new_s, sub, sub_len); /* copy substring into new_s */
1541                 new_s += sub_len; /* offset new_s past sub string */
1542
1543                 /* break when we've done count replacements */
1544                 if (--count == 0) break;
1545         }
1546         /* copy any remaining values into output string */
1547         if (len > 0)
1548                 memcpy(new_s, str, len);
1549         return out_s;
1550
1551   return_same:
1552         *out_len = -1;
1553         return (char*)str;      /* have to cast away constness here */
1554 }
1555
1556
1557 static char replace__doc__[] =
1558 "S.replace (old, new[, maxsplit]) -> string\n\
1559 \n\
1560 Return a copy of string S with all occurrences of substring\n\
1561 old replaced by new.  If the optional argument maxsplit is\n\
1562 given, only the first maxsplit occurrences are replaced.";
1563
1564 static PyObject *
1565 string_replace(PyStringObject *self, PyObject *args)
1566 {
1567         const char *str = PyString_AS_STRING(self), *sub, *repl;
1568         char *new_s;
1569         int len = PyString_GET_SIZE(self), sub_len, repl_len, out_len;
1570         int count = -1;
1571         PyObject *new;
1572         PyObject *subobj, *replobj;
1573
1574         if (!PyArg_ParseTuple(args, "OO|i:replace",
1575                               &subobj, &replobj, &count))
1576                 return NULL;
1577
1578         if (PyString_Check(subobj)) {
1579                 sub = PyString_AS_STRING(subobj);
1580                 sub_len = PyString_GET_SIZE(subobj);
1581         }
1582         else if (PyUnicode_Check(subobj))
1583                 return PyUnicode_Replace((PyObject *)self,
1584                                          subobj, replobj, count);
1585         else if (PyObject_AsCharBuffer(subobj, &sub, &sub_len))
1586                 return NULL;
1587
1588         if (PyString_Check(replobj)) {
1589                 repl = PyString_AS_STRING(replobj);
1590                 repl_len = PyString_GET_SIZE(replobj);
1591         }
1592         else if (PyUnicode_Check(replobj))
1593                 return PyUnicode_Replace((PyObject *)self,
1594                                          subobj, replobj, count);
1595         else if (PyObject_AsCharBuffer(replobj, &repl, &repl_len))
1596                 return NULL;
1597
1598         if (sub_len <= 0) {
1599                 PyErr_SetString(PyExc_ValueError, "empty pattern string");
1600                 return NULL;
1601         }
1602         new_s = mymemreplace(str,len,sub,sub_len,repl,repl_len,count,&out_len);
1603         if (new_s == NULL) {
1604                 PyErr_NoMemory();
1605                 return NULL;
1606         }
1607         if (out_len == -1) {
1608                 /* we're returning another reference to self */
1609                 new = (PyObject*)self;
1610                 Py_INCREF(new);
1611         }
1612         else {
1613                 new = PyString_FromStringAndSize(new_s, out_len);
1614                 PyMem_FREE(new_s);
1615         }
1616         return new;
1617 }
1618
1619
1620 static char startswith__doc__[] =
1621 "S.startswith(prefix[, start[, end]]) -> int\n\
1622 \n\
1623 Return 1 if S starts with the specified prefix, otherwise return 0.  With\n\
1624 optional start, test S beginning at that position.  With optional end, stop\n\
1625 comparing S at that position.";
1626
1627 static PyObject *
1628 string_startswith(PyStringObject *self, PyObject *args)
1629 {
1630         const char* str = PyString_AS_STRING(self);
1631         int len = PyString_GET_SIZE(self);
1632         const char* prefix;
1633         int plen;
1634         int start = 0;
1635         int end = -1;
1636         PyObject *subobj;
1637
1638         if (!PyArg_ParseTuple(args, "O|O&O&:startswith", &subobj,
1639                 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
1640                 return NULL;
1641         if (PyString_Check(subobj)) {
1642                 prefix = PyString_AS_STRING(subobj);
1643                 plen = PyString_GET_SIZE(subobj);
1644         }
1645         else if (PyUnicode_Check(subobj)) {
1646                 int rc;
1647                 rc = PyUnicode_Tailmatch((PyObject *)self,
1648                                           subobj, start, end, -1);
1649                 if (rc == -1)
1650                         return NULL;
1651                 else
1652                         return PyInt_FromLong((long) rc);
1653         }
1654         else if (PyObject_AsCharBuffer(subobj, &prefix, &plen))
1655                 return NULL;
1656
1657         /* adopt Java semantics for index out of range.  it is legal for
1658          * offset to be == plen, but this only returns true if prefix is
1659          * the empty string.
1660          */
1661         if (start < 0 || start+plen > len)
1662                 return PyInt_FromLong(0);
1663
1664         if (!memcmp(str+start, prefix, plen)) {
1665                 /* did the match end after the specified end? */
1666                 if (end < 0)
1667                         return PyInt_FromLong(1);
1668                 else if (end - start < plen)
1669                         return PyInt_FromLong(0);
1670                 else
1671                         return PyInt_FromLong(1);
1672         }
1673         else return PyInt_FromLong(0);
1674 }
1675
1676
1677 static char endswith__doc__[] =
1678 "S.endswith(suffix[, start[, end]]) -> int\n\
1679 \n\
1680 Return 1 if S ends with the specified suffix, otherwise return 0.  With\n\
1681 optional start, test S beginning at that position.  With optional end, stop\n\
1682 comparing S at that position.";
1683
1684 static PyObject *
1685 string_endswith(PyStringObject *self, PyObject *args)
1686 {
1687         const char* str = PyString_AS_STRING(self);
1688         int len = PyString_GET_SIZE(self);
1689         const char* suffix;
1690         int slen;
1691         int start = 0;
1692         int end = -1;
1693         int lower, upper;
1694         PyObject *subobj;
1695
1696         if (!PyArg_ParseTuple(args, "O|O&O&:endswith", &subobj,
1697                 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
1698                 return NULL;
1699         if (PyString_Check(subobj)) {
1700                 suffix = PyString_AS_STRING(subobj);
1701                 slen = PyString_GET_SIZE(subobj);
1702         }
1703         else if (PyUnicode_Check(subobj)) {
1704                 int rc;
1705                 rc = PyUnicode_Tailmatch((PyObject *)self,
1706                                           subobj, start, end, +1);
1707                 if (rc == -1)
1708                         return NULL;
1709                 else
1710                         return PyInt_FromLong((long) rc);
1711         }
1712         else if (PyObject_AsCharBuffer(subobj, &suffix, &slen))
1713                 return NULL;
1714
1715         if (start < 0 || start > len || slen > len)
1716                 return PyInt_FromLong(0);
1717
1718         upper = (end >= 0 && end <= len) ? end : len;
1719         lower = (upper - slen) > start ? (upper - slen) : start;
1720
1721         if (upper-lower >= slen && !memcmp(str+lower, suffix, slen))
1722                 return PyInt_FromLong(1);
1723         else return PyInt_FromLong(0);
1724 }
1725
1726
1727 static char encode__doc__[] =
1728 "S.encode([encoding[,errors]]) -> string\n\
1729 \n\
1730 Return an encoded string version of S. Default encoding is the current\n\
1731 default string encoding. errors may be given to set a different error\n\
1732 handling scheme. Default is 'strict' meaning that encoding errors raise\n\
1733 a ValueError. Other possible values are 'ignore' and 'replace'.";
1734
1735 static PyObject *
1736 string_encode(PyStringObject *self, PyObject *args)
1737 {
1738     char *encoding = NULL;
1739     char *errors = NULL;
1740     if (!PyArg_ParseTuple(args, "|ss:encode", &encoding, &errors))
1741         return NULL;
1742     return PyString_AsEncodedString((PyObject *)self, encoding, errors);
1743 }
1744
1745
1746 static char expandtabs__doc__[] =
1747 "S.expandtabs([tabsize]) -> string\n\
1748 \n\
1749 Return a copy of S where all tab characters are expanded using spaces.\n\
1750 If tabsize is not given, a tab size of 8 characters is assumed.";
1751
1752 static PyObject*
1753 string_expandtabs(PyStringObject *self, PyObject *args)
1754 {
1755     const char *e, *p;
1756     char *q;
1757     int i, j;
1758     PyObject *u;
1759     int tabsize = 8;
1760
1761     if (!PyArg_ParseTuple(args, "|i:expandtabs", &tabsize))
1762         return NULL;
1763
1764     /* First pass: determine size of output string */
1765     i = j = 0;
1766     e = PyString_AS_STRING(self) + PyString_GET_SIZE(self);
1767     for (p = PyString_AS_STRING(self); p < e; p++)
1768         if (*p == '\t') {
1769             if (tabsize > 0)
1770                 j += tabsize - (j % tabsize);
1771         }
1772         else {
1773             j++;
1774             if (*p == '\n' || *p == '\r') {
1775                 i += j;
1776                 j = 0;
1777             }
1778         }
1779
1780     /* Second pass: create output string and fill it */
1781     u = PyString_FromStringAndSize(NULL, i + j);
1782     if (!u)
1783         return NULL;
1784
1785     j = 0;
1786     q = PyString_AS_STRING(u);
1787
1788     for (p = PyString_AS_STRING(self); p < e; p++)
1789         if (*p == '\t') {
1790             if (tabsize > 0) {
1791                 i = tabsize - (j % tabsize);
1792                 j += i;
1793                 while (i--)
1794                     *q++ = ' ';
1795             }
1796         }
1797         else {
1798             j++;
1799             *q++ = *p;
1800             if (*p == '\n' || *p == '\r')
1801                 j = 0;
1802         }
1803
1804     return u;
1805 }
1806
1807 static
1808 PyObject *pad(PyStringObject *self,
1809               int left,
1810               int right,
1811               char fill)
1812 {
1813     PyObject *u;
1814
1815     if (left < 0)
1816         left = 0;
1817     if (right < 0)
1818         right = 0;
1819
1820     if (left == 0 && right == 0) {
1821         Py_INCREF(self);
1822         return (PyObject *)self;
1823     }
1824
1825     u = PyString_FromStringAndSize(NULL,
1826                                    left + PyString_GET_SIZE(self) + right);
1827     if (u) {
1828         if (left)
1829             memset(PyString_AS_STRING(u), fill, left);
1830         memcpy(PyString_AS_STRING(u) + left,
1831                PyString_AS_STRING(self),
1832                PyString_GET_SIZE(self));
1833         if (right)
1834             memset(PyString_AS_STRING(u) + left + PyString_GET_SIZE(self),
1835                    fill, right);
1836     }
1837
1838     return u;
1839 }
1840
1841 static char ljust__doc__[] =
1842 "S.ljust(width) -> string\n\
1843 \n\
1844 Return S left justified in a string of length width. Padding is\n\
1845 done using spaces.";
1846
1847 static PyObject *
1848 string_ljust(PyStringObject *self, PyObject *args)
1849 {
1850     int width;
1851     if (!PyArg_ParseTuple(args, "i:ljust", &width))
1852         return NULL;
1853
1854     if (PyString_GET_SIZE(self) >= width) {
1855         Py_INCREF(self);
1856         return (PyObject*) self;
1857     }
1858
1859     return pad(self, 0, width - PyString_GET_SIZE(self), ' ');
1860 }
1861
1862
1863 static char rjust__doc__[] =
1864 "S.rjust(width) -> string\n\
1865 \n\
1866 Return S right justified in a string of length width. Padding is\n\
1867 done using spaces.";
1868
1869 static PyObject *
1870 string_rjust(PyStringObject *self, PyObject *args)
1871 {
1872     int width;
1873     if (!PyArg_ParseTuple(args, "i:rjust", &width))
1874         return NULL;
1875
1876     if (PyString_GET_SIZE(self) >= width) {
1877         Py_INCREF(self);
1878         return (PyObject*) self;
1879     }
1880
1881     return pad(self, width - PyString_GET_SIZE(self), 0, ' ');
1882 }
1883
1884
1885 static char center__doc__[] =
1886 "S.center(width) -> string\n\
1887 \n\
1888 Return S centered in a string of length width. Padding is done\n\
1889 using spaces.";
1890
1891 static PyObject *
1892 string_center(PyStringObject *self, PyObject *args)
1893 {
1894     int marg, left;
1895     int width;
1896
1897     if (!PyArg_ParseTuple(args, "i:center", &width))
1898         return NULL;
1899
1900     if (PyString_GET_SIZE(self) >= width) {
1901         Py_INCREF(self);
1902         return (PyObject*) self;
1903     }
1904
1905     marg = width - PyString_GET_SIZE(self);
1906     left = marg / 2 + (marg & width & 1);
1907
1908     return pad(self, left, marg - left, ' ');
1909 }
1910
1911 #if 0
1912 static char zfill__doc__[] =
1913 "S.zfill(width) -> string\n\
1914 \n\
1915 Pad a numeric string x with zeros on the left, to fill a field\n\
1916 of the specified width. The string x is never truncated.";
1917
1918 static PyObject *
1919 string_zfill(PyStringObject *self, PyObject *args)
1920 {
1921     int fill;
1922     PyObject *u;
1923     char *str;
1924
1925     int width;
1926     if (!PyArg_ParseTuple(args, "i:zfill", &width))
1927         return NULL;
1928
1929     if (PyString_GET_SIZE(self) >= width) {
1930         Py_INCREF(self);
1931         return (PyObject*) self;
1932     }
1933
1934     fill = width - PyString_GET_SIZE(self);
1935
1936     u = pad(self, fill, 0, '0');
1937     if (u == NULL)
1938         return NULL;
1939
1940     str = PyString_AS_STRING(u);
1941     if (str[fill] == '+' || str[fill] == '-') {
1942         /* move sign to beginning of string */
1943         str[0] = str[fill];
1944         str[fill] = '0';
1945     }
1946
1947     return u;
1948 }
1949 #endif
1950
1951 static char isspace__doc__[] =
1952 "S.isspace() -> int\n\
1953 \n\
1954 Return 1 if there are only whitespace characters in S,\n\
1955 0 otherwise.";
1956
1957 static PyObject*
1958 string_isspace(PyStringObject *self, PyObject *args)
1959 {
1960     register const unsigned char *p
1961         = (unsigned char *) PyString_AS_STRING(self);
1962     register const unsigned char *e;
1963
1964     if (!PyArg_NoArgs(args))
1965         return NULL;
1966
1967     /* Shortcut for single character strings */
1968     if (PyString_GET_SIZE(self) == 1 &&
1969         isspace(*p))
1970         return PyInt_FromLong(1);
1971
1972     /* Special case for empty strings */
1973     if (PyString_GET_SIZE(self) == 0)
1974         return PyInt_FromLong(0);
1975
1976     e = p + PyString_GET_SIZE(self);
1977     for (; p < e; p++) {
1978         if (!isspace(*p))
1979             return PyInt_FromLong(0);
1980     }
1981     return PyInt_FromLong(1);
1982 }
1983
1984
1985 static char isalpha__doc__[] =
1986 "S.isalpha() -> int\n\
1987 \n\
1988 Return 1 if  all characters in S are alphabetic\n\
1989 and there is at least one character in S, 0 otherwise.";
1990
1991 static PyObject*
1992 string_isalpha(PyUnicodeObject *self, PyObject *args)
1993 {
1994     register const unsigned char *p
1995         = (unsigned char *) PyString_AS_STRING(self);
1996     register const unsigned char *e;
1997
1998     if (!PyArg_NoArgs(args))
1999         return NULL;
2000
2001     /* Shortcut for single character strings */
2002     if (PyString_GET_SIZE(self) == 1 &&
2003         isalpha(*p))
2004         return PyInt_FromLong(1);
2005
2006     /* Special case for empty strings */
2007     if (PyString_GET_SIZE(self) == 0)
2008         return PyInt_FromLong(0);
2009
2010     e = p + PyString_GET_SIZE(self);
2011     for (; p < e; p++) {
2012         if (!isalpha(*p))
2013             return PyInt_FromLong(0);
2014     }
2015     return PyInt_FromLong(1);
2016 }
2017
2018
2019 static char isalnum__doc__[] =
2020 "S.isalnum() -> int\n\
2021 \n\
2022 Return 1 if  all characters in S are alphanumeric\n\
2023 and there is at least one character in S, 0 otherwise.";
2024
2025 static PyObject*
2026 string_isalnum(PyUnicodeObject *self, PyObject *args)
2027 {
2028     register const unsigned char *p
2029         = (unsigned char *) PyString_AS_STRING(self);
2030     register const unsigned char *e;
2031
2032     if (!PyArg_NoArgs(args))
2033         return NULL;
2034
2035     /* Shortcut for single character strings */
2036     if (PyString_GET_SIZE(self) == 1 &&
2037         isalnum(*p))
2038         return PyInt_FromLong(1);
2039
2040     /* Special case for empty strings */
2041     if (PyString_GET_SIZE(self) == 0)
2042         return PyInt_FromLong(0);
2043
2044     e = p + PyString_GET_SIZE(self);
2045     for (; p < e; p++) {
2046         if (!isalnum(*p))
2047             return PyInt_FromLong(0);
2048     }
2049     return PyInt_FromLong(1);
2050 }
2051
2052
2053 static char isdigit__doc__[] =
2054 "S.isdigit() -> int\n\
2055 \n\
2056 Return 1 if there are only digit characters in S,\n\
2057 0 otherwise.";
2058
2059 static PyObject*
2060 string_isdigit(PyStringObject *self, PyObject *args)
2061 {
2062     register const unsigned char *p
2063         = (unsigned char *) PyString_AS_STRING(self);
2064     register const unsigned char *e;
2065
2066     if (!PyArg_NoArgs(args))
2067         return NULL;
2068
2069     /* Shortcut for single character strings */
2070     if (PyString_GET_SIZE(self) == 1 &&
2071         isdigit(*p))
2072         return PyInt_FromLong(1);
2073
2074     /* Special case for empty strings */
2075     if (PyString_GET_SIZE(self) == 0)
2076         return PyInt_FromLong(0);
2077
2078     e = p + PyString_GET_SIZE(self);
2079     for (; p < e; p++) {
2080         if (!isdigit(*p))
2081             return PyInt_FromLong(0);
2082     }
2083     return PyInt_FromLong(1);
2084 }
2085
2086
2087 static char islower__doc__[] =
2088 "S.islower() -> int\n\
2089 \n\
2090 Return 1 if  all cased characters in S are lowercase and there is\n\
2091 at least one cased character in S, 0 otherwise.";
2092
2093 static PyObject*
2094 string_islower(PyStringObject *self, PyObject *args)
2095 {
2096     register const unsigned char *p
2097         = (unsigned char *) PyString_AS_STRING(self);
2098     register const unsigned char *e;
2099     int cased;
2100
2101     if (!PyArg_NoArgs(args))
2102         return NULL;
2103
2104     /* Shortcut for single character strings */
2105     if (PyString_GET_SIZE(self) == 1)
2106         return PyInt_FromLong(islower(*p) != 0);
2107
2108     /* Special case for empty strings */
2109     if (PyString_GET_SIZE(self) == 0)
2110         return PyInt_FromLong(0);
2111
2112     e = p + PyString_GET_SIZE(self);
2113     cased = 0;
2114     for (; p < e; p++) {
2115         if (isupper(*p))
2116             return PyInt_FromLong(0);
2117         else if (!cased && islower(*p))
2118             cased = 1;
2119     }
2120     return PyInt_FromLong(cased);
2121 }
2122
2123
2124 static char isupper__doc__[] =
2125 "S.isupper() -> int\n\
2126 \n\
2127 Return 1 if  all cased characters in S are uppercase and there is\n\
2128 at least one cased character in S, 0 otherwise.";
2129
2130 static PyObject*
2131 string_isupper(PyStringObject *self, PyObject *args)
2132 {
2133     register const unsigned char *p
2134         = (unsigned char *) PyString_AS_STRING(self);
2135     register const unsigned char *e;
2136     int cased;
2137
2138     if (!PyArg_NoArgs(args))
2139         return NULL;
2140
2141     /* Shortcut for single character strings */
2142     if (PyString_GET_SIZE(self) == 1)
2143         return PyInt_FromLong(isupper(*p) != 0);
2144
2145     /* Special case for empty strings */
2146     if (PyString_GET_SIZE(self) == 0)
2147         return PyInt_FromLong(0);
2148
2149     e = p + PyString_GET_SIZE(self);
2150     cased = 0;
2151     for (; p < e; p++) {
2152         if (islower(*p))
2153             return PyInt_FromLong(0);
2154         else if (!cased && isupper(*p))
2155             cased = 1;
2156     }
2157     return PyInt_FromLong(cased);
2158 }
2159
2160
2161 static char istitle__doc__[] =
2162 "S.istitle() -> int\n\
2163 \n\
2164 Return 1 if S is a titlecased string, i.e. uppercase characters\n\
2165 may only follow uncased characters and lowercase characters only cased\n\
2166 ones. Return 0 otherwise.";
2167
2168 static PyObject*
2169 string_istitle(PyStringObject *self, PyObject *args)
2170 {
2171     register const unsigned char *p
2172         = (unsigned char *) PyString_AS_STRING(self);
2173     register const unsigned char *e;
2174     int cased, previous_is_cased;
2175
2176     if (!PyArg_NoArgs(args))
2177         return NULL;
2178
2179     /* Shortcut for single character strings */
2180     if (PyString_GET_SIZE(self) == 1)
2181         return PyInt_FromLong(isupper(*p) != 0);
2182
2183     /* Special case for empty strings */
2184     if (PyString_GET_SIZE(self) == 0)
2185         return PyInt_FromLong(0);
2186
2187     e = p + PyString_GET_SIZE(self);
2188     cased = 0;
2189     previous_is_cased = 0;
2190     for (; p < e; p++) {
2191         register const unsigned char ch = *p;
2192
2193         if (isupper(ch)) {
2194             if (previous_is_cased)
2195                 return PyInt_FromLong(0);
2196             previous_is_cased = 1;
2197             cased = 1;
2198         }
2199         else if (islower(ch)) {
2200             if (!previous_is_cased)
2201                 return PyInt_FromLong(0);
2202             previous_is_cased = 1;
2203             cased = 1;
2204         }
2205         else
2206             previous_is_cased = 0;
2207     }
2208     return PyInt_FromLong(cased);
2209 }
2210
2211
2212 static char splitlines__doc__[] =
2213 "S.splitlines([keepends]]) -> list of strings\n\
2214 \n\
2215 Return a list of the lines in S, breaking at line boundaries.\n\
2216 Line breaks are not included in the resulting list unless keepends\n\
2217 is given and true.";
2218
2219 #define SPLIT_APPEND(data, left, right)                                 \
2220         str = PyString_FromStringAndSize(data + left, right - left);    \
2221         if (!str)                                                       \
2222             goto onError;                                               \
2223         if (PyList_Append(list, str)) {                                 \
2224             Py_DECREF(str);                                             \
2225             goto onError;                                               \
2226         }                                                               \
2227         else                                                            \
2228             Py_DECREF(str);
2229
2230 static PyObject*
2231 string_splitlines(PyStringObject *self, PyObject *args)
2232 {
2233     register int i;
2234     register int j;
2235     int len;
2236     int keepends = 0;
2237     PyObject *list;
2238     PyObject *str;
2239     char *data;
2240
2241     if (!PyArg_ParseTuple(args, "|i:splitlines", &keepends))
2242         return NULL;
2243
2244     data = PyString_AS_STRING(self);
2245     len = PyString_GET_SIZE(self);
2246
2247     list = PyList_New(0);
2248     if (!list)
2249         goto onError;
2250
2251     for (i = j = 0; i < len; ) {
2252         int eol;
2253
2254         /* Find a line and append it */
2255         while (i < len && data[i] != '\n' && data[i] != '\r')
2256             i++;
2257
2258         /* Skip the line break reading CRLF as one line break */
2259         eol = i;
2260         if (i < len) {
2261             if (data[i] == '\r' && i + 1 < len &&
2262                 data[i+1] == '\n')
2263                 i += 2;
2264             else
2265                 i++;
2266             if (keepends)
2267                 eol = i;
2268         }
2269         SPLIT_APPEND(data, j, eol);
2270         j = i;
2271     }
2272     if (j < len) {
2273         SPLIT_APPEND(data, j, len);
2274     }
2275
2276     return list;
2277
2278  onError:
2279     Py_DECREF(list);
2280     return NULL;
2281 }
2282
2283 #undef SPLIT_APPEND
2284
2285 \f
2286 static PyMethodDef
2287 string_methods[] = {
2288         /* Counterparts of the obsolete stropmodule functions; except
2289            string.maketrans(). */
2290         {"join",       (PyCFunction)string_join,       1, join__doc__},
2291         {"split",       (PyCFunction)string_split,       1, split__doc__},
2292         {"lower",      (PyCFunction)string_lower,      1, lower__doc__},
2293         {"upper",       (PyCFunction)string_upper,       1, upper__doc__},
2294         {"islower", (PyCFunction)string_islower, 0, islower__doc__},
2295         {"isupper", (PyCFunction)string_isupper, 0, isupper__doc__},
2296         {"isspace", (PyCFunction)string_isspace, 0, isspace__doc__},
2297         {"isdigit", (PyCFunction)string_isdigit, 0, isdigit__doc__},
2298         {"istitle", (PyCFunction)string_istitle, 0, istitle__doc__},
2299         {"isalpha", (PyCFunction)string_isalpha, 0, isalpha__doc__},
2300         {"isalnum", (PyCFunction)string_isalnum, 0, isalnum__doc__},
2301         {"capitalize", (PyCFunction)string_capitalize, 1, capitalize__doc__},
2302         {"count",      (PyCFunction)string_count,      1, count__doc__},
2303         {"endswith",   (PyCFunction)string_endswith,   1, endswith__doc__},
2304         {"find",       (PyCFunction)string_find,       1, find__doc__},
2305         {"index",      (PyCFunction)string_index,      1, index__doc__},
2306         {"lstrip",     (PyCFunction)string_lstrip,     1, lstrip__doc__},
2307         {"replace",     (PyCFunction)string_replace,     1, replace__doc__},
2308         {"rfind",       (PyCFunction)string_rfind,       1, rfind__doc__},
2309         {"rindex",      (PyCFunction)string_rindex,      1, rindex__doc__},
2310         {"rstrip",      (PyCFunction)string_rstrip,      1, rstrip__doc__},
2311         {"startswith",  (PyCFunction)string_startswith,  1, startswith__doc__},
2312         {"strip",       (PyCFunction)string_strip,       1, strip__doc__},
2313         {"swapcase",    (PyCFunction)string_swapcase,    1, swapcase__doc__},
2314         {"translate",   (PyCFunction)string_translate,   1, translate__doc__},
2315         {"title",       (PyCFunction)string_title,       1, title__doc__},
2316         {"ljust",       (PyCFunction)string_ljust,       1, ljust__doc__},
2317         {"rjust",       (PyCFunction)string_rjust,       1, rjust__doc__},
2318         {"center",      (PyCFunction)string_center,      1, center__doc__},
2319         {"encode",      (PyCFunction)string_encode,      1, encode__doc__},
2320         {"expandtabs",  (PyCFunction)string_expandtabs,  1, expandtabs__doc__},
2321         {"splitlines",  (PyCFunction)string_splitlines,  1, splitlines__doc__},
2322 #if 0
2323         {"zfill",       (PyCFunction)string_zfill,       1, zfill__doc__},
2324 #endif
2325         {NULL,     NULL}                     /* sentinel */
2326 };
2327
2328 static PyObject *
2329 string_getattr(PyStringObject *s, char *name)
2330 {
2331         return Py_FindMethod(string_methods, (PyObject*)s, name);
2332 }
2333
2334
2335 PyTypeObject PyString_Type = {
2336         PyObject_HEAD_INIT(&PyType_Type)
2337         0,
2338         "string",
2339         sizeof(PyStringObject),
2340         sizeof(char),
2341         (destructor)string_dealloc, /*tp_dealloc*/
2342         (printfunc)string_print, /*tp_print*/
2343         (getattrfunc)string_getattr,            /*tp_getattr*/
2344         0,              /*tp_setattr*/
2345         (cmpfunc)string_compare, /*tp_compare*/
2346         (reprfunc)string_repr, /*tp_repr*/
2347         0,              /*tp_as_number*/
2348         &string_as_sequence,    /*tp_as_sequence*/
2349         0,              /*tp_as_mapping*/
2350         (hashfunc)string_hash, /*tp_hash*/
2351         0,              /*tp_call*/
2352         0,              /*tp_str*/
2353         0,              /*tp_getattro*/
2354         0,              /*tp_setattro*/
2355         &string_as_buffer,      /*tp_as_buffer*/
2356         Py_TPFLAGS_DEFAULT,     /*tp_flags*/
2357         0,              /*tp_doc*/
2358 };
2359
2360 void
2361 PyString_Concat(register PyObject **pv, register PyObject *w)
2362 {
2363         register PyObject *v;
2364         if (*pv == NULL)
2365                 return;
2366         if (w == NULL || !PyString_Check(*pv)) {
2367                 Py_DECREF(*pv);
2368                 *pv = NULL;
2369                 return;
2370         }
2371         v = string_concat((PyStringObject *) *pv, w);
2372         Py_DECREF(*pv);
2373         *pv = v;
2374 }
2375
2376 void
2377 PyString_ConcatAndDel(register PyObject **pv, register PyObject *w)
2378 {
2379         PyString_Concat(pv, w);
2380         Py_XDECREF(w);
2381 }
2382
2383
2384 /* The following function breaks the notion that strings are immutable:
2385    it changes the size of a string.  We get away with this only if there
2386    is only one module referencing the object.  You can also think of it
2387    as creating a new string object and destroying the old one, only
2388    more efficiently.  In any case, don't use this if the string may
2389    already be known to some other part of the code... */
2390
2391 int
2392 _PyString_Resize(PyObject **pv, int newsize)
2393 {
2394         register PyObject *v;
2395         register PyStringObject *sv;
2396         v = *pv;
2397         if (!PyString_Check(v) || v->ob_refcnt != 1) {
2398                 *pv = 0;
2399                 Py_DECREF(v);
2400                 PyErr_BadInternalCall();
2401                 return -1;
2402         }
2403         /* XXX UNREF/NEWREF interface should be more symmetrical */
2404 #ifdef Py_REF_DEBUG
2405         --_Py_RefTotal;
2406 #endif
2407         _Py_ForgetReference(v);
2408         *pv = (PyObject *)
2409                 PyObject_REALLOC((char *)v,
2410                         sizeof(PyStringObject) + newsize * sizeof(char));
2411         if (*pv == NULL) {
2412                 PyObject_DEL(v);
2413                 PyErr_NoMemory();
2414                 return -1;
2415         }
2416         _Py_NewReference(*pv);
2417         sv = (PyStringObject *) *pv;
2418         sv->ob_size = newsize;
2419         sv->ob_sval[newsize] = '\0';
2420         return 0;
2421 }
2422
2423 /* Helpers for formatstring */
2424
2425 static PyObject *
2426 getnextarg(PyObject *args, int arglen, int *p_argidx)
2427 {
2428         int argidx = *p_argidx;
2429         if (argidx < arglen) {
2430                 (*p_argidx)++;
2431                 if (arglen < 0)
2432                         return args;
2433                 else
2434                         return PyTuple_GetItem(args, argidx);
2435         }
2436         PyErr_SetString(PyExc_TypeError,
2437                         "not enough arguments for format string");
2438         return NULL;
2439 }
2440
2441 /* Format codes
2442  * F_LJUST      '-'
2443  * F_SIGN       '+'
2444  * F_BLANK      ' '
2445  * F_ALT        '#'
2446  * F_ZERO       '0'
2447  */
2448 #define F_LJUST (1<<0)
2449 #define F_SIGN  (1<<1)
2450 #define F_BLANK (1<<2)
2451 #define F_ALT   (1<<3)
2452 #define F_ZERO  (1<<4)
2453
2454 static int
2455 formatfloat(char *buf, size_t buflen, int flags,
2456             int prec, int type, PyObject *v)
2457 {
2458         /* fmt = '%#.' + `prec` + `type`
2459            worst case length = 3 + 10 (len of INT_MAX) + 1 = 14 (use 20)*/
2460         char fmt[20];
2461         double x;
2462         if (!PyArg_Parse(v, "d;float argument required", &x))
2463                 return -1;
2464         if (prec < 0)
2465                 prec = 6;
2466         if (type == 'f' && fabs(x)/1e25 >= 1e25)
2467                 type = 'g';
2468         sprintf(fmt, "%%%s.%d%c", (flags&F_ALT) ? "#" : "", prec, type);
2469         /* worst case length calc to ensure no buffer overrun:
2470              fmt = %#.<prec>g
2471              buf = '-' + [0-9]*prec + '.' + 'e+' + (longest exp
2472                 for any double rep.)
2473              len = 1 + prec + 1 + 2 + 5 = 9 + prec
2474            If prec=0 the effective precision is 1 (the leading digit is
2475            always given), therefore increase by one to 10+prec. */
2476         if (buflen <= (size_t)10 + (size_t)prec) {
2477                 PyErr_SetString(PyExc_OverflowError,
2478                         "formatted float is too long (precision too long?)");
2479                 return -1;
2480         }
2481         sprintf(buf, fmt, x);
2482         return strlen(buf);
2483 }
2484
2485 /* _PyString_FormatLong emulates the format codes d, u, o, x and X, and
2486  * the F_ALT flag, for Python's long (unbounded) ints.  It's not used for
2487  * Python's regular ints.
2488  * Return value:  a new PyString*, or NULL if error.
2489  *  .  *pbuf is set to point into it,
2490  *     *plen set to the # of chars following that.
2491  *     Caller must decref it when done using pbuf.
2492  *     The string starting at *pbuf is of the form
2493  *         "-"? ("0x" | "0X")? digit+
2494  *     "0x"/"0X" are present only for x and X conversions, with F_ALT
2495  *         set in flags.  The case of hex digits will be correct,
2496  *     There will be at least prec digits, zero-filled on the left if
2497  *         necessary to get that many.
2498  * val          object to be converted
2499  * flags        bitmask of format flags; only F_ALT is looked at
2500  * prec         minimum number of digits; 0-fill on left if needed
2501  * type         a character in [duoxX]; u acts the same as d
2502  *
2503  * CAUTION:  o, x and X conversions on regular ints can never
2504  * produce a '-' sign, but can for Python's unbounded ints.
2505  */
2506 PyObject*
2507 _PyString_FormatLong(PyObject *val, int flags, int prec, int type,
2508                      char **pbuf, int *plen)
2509 {
2510         PyObject *result = NULL;
2511         char *buf;
2512         int i;
2513         int sign;       /* 1 if '-', else 0 */
2514         int len;        /* number of characters */
2515         int numdigits;  /* len == numnondigits + numdigits */
2516         int numnondigits = 0;
2517
2518         switch (type) {
2519         case 'd':
2520         case 'u':
2521                 result = val->ob_type->tp_str(val);
2522                 break;
2523         case 'o':
2524                 result = val->ob_type->tp_as_number->nb_oct(val);
2525                 break;
2526         case 'x':
2527         case 'X':
2528                 numnondigits = 2;
2529                 result = val->ob_type->tp_as_number->nb_hex(val);
2530                 break;
2531         default:
2532                 assert(!"'type' not in [duoxX]");
2533         }
2534         if (!result)
2535                 return NULL;
2536
2537         /* To modify the string in-place, there can only be one reference. */
2538         if (result->ob_refcnt != 1) {
2539                 PyErr_BadInternalCall();
2540                 return NULL;
2541         }
2542         buf = PyString_AsString(result);
2543         len = PyString_Size(result);
2544         if (buf[len-1] == 'L') {
2545                 --len;
2546                 buf[len] = '\0';
2547         }
2548         sign = buf[0] == '-';
2549         numnondigits += sign;
2550         numdigits = len - numnondigits;
2551         assert(numdigits > 0);
2552
2553         /* Get rid of base marker unless F_ALT */
2554         if ((flags & F_ALT) == 0) {
2555                 /* Need to skip 0x, 0X or 0. */
2556                 int skipped = 0;
2557                 switch (type) {
2558                 case 'o':
2559                         assert(buf[sign] == '0');
2560                         /* If 0 is only digit, leave it alone. */
2561                         if (numdigits > 1) {
2562                                 skipped = 1;
2563                                 --numdigits;
2564                         }
2565                         break;
2566                 case 'x':
2567                 case 'X':
2568                         assert(buf[sign] == '0');
2569                         assert(buf[sign + 1] == 'x');
2570                         skipped = 2;
2571                         numnondigits -= 2;
2572                         break;
2573                 }
2574                 if (skipped) {
2575                         buf += skipped;
2576                         len -= skipped;
2577                         if (sign)
2578                                 buf[0] = '-';
2579                 }
2580                 assert(len == numnondigits + numdigits);
2581                 assert(numdigits > 0);
2582         }
2583
2584         /* Fill with leading zeroes to meet minimum width. */
2585         if (prec > numdigits) {
2586                 PyObject *r1 = PyString_FromStringAndSize(NULL,
2587                                         numnondigits + prec);
2588                 char *b1;
2589                 if (!r1) {
2590                         Py_DECREF(result);
2591                         return NULL;
2592                 }
2593                 b1 = PyString_AS_STRING(r1);
2594                 for (i = 0; i < numnondigits; ++i)
2595                         *b1++ = *buf++;
2596                 for (i = 0; i < prec - numdigits; i++)
2597                         *b1++ = '0';
2598                 for (i = 0; i < numdigits; i++)
2599                         *b1++ = *buf++;
2600                 *b1 = '\0';
2601                 Py_DECREF(result);
2602                 result = r1;
2603                 buf = PyString_AS_STRING(result);
2604                 len = numnondigits + prec;
2605         }
2606
2607         /* Fix up case for hex conversions. */
2608         switch (type) {
2609         case 'x':
2610                 /* Need to convert all upper case letters to lower case. */
2611                 for (i = 0; i < len; i++)
2612                         if (buf[i] >= 'A' && buf[i] <= 'F')
2613                                 buf[i] += 'a'-'A';
2614                 break;
2615         case 'X':
2616                 /* Need to convert 0x to 0X (and -0x to -0X). */
2617                 if (buf[sign + 1] == 'x')
2618                         buf[sign + 1] = 'X';
2619                 break;
2620         }
2621         *pbuf = buf;
2622         *plen = len;
2623         return result;
2624 }
2625
2626 static int
2627 formatint(char *buf, size_t buflen, int flags,
2628           int prec, int type, PyObject *v)
2629 {
2630         /* fmt = '%#.' + `prec` + 'l' + `type`
2631            worst case length = 3 + 19 (worst len of INT_MAX on 64-bit machine)
2632            + 1 + 1 = 24 */
2633         char fmt[64];   /* plenty big enough! */
2634         long x;
2635         if (!PyArg_Parse(v, "l;int argument required", &x))
2636                 return -1;
2637         if (prec < 0)
2638                 prec = 1;
2639         sprintf(fmt, "%%%s.%dl%c", (flags&F_ALT) ? "#" : "", prec, type);
2640         /* buf = '+'/'-'/'0'/'0x' + '[0-9]'*max(prec, len(x in octal))
2641            worst case buf = '0x' + [0-9]*prec, where prec >= 11 */
2642         if (buflen <= 13 || buflen <= (size_t)2 + (size_t)prec) {
2643                 PyErr_SetString(PyExc_OverflowError,
2644                         "formatted integer is too long (precision too long?)");
2645                 return -1;
2646         }
2647         sprintf(buf, fmt, x);
2648         return strlen(buf);
2649 }
2650
2651 static int
2652 formatchar(char *buf, size_t buflen, PyObject *v)
2653 {
2654         /* presume that the buffer is at least 2 characters long */
2655         if (PyString_Check(v)) {
2656                 if (!PyArg_Parse(v, "c;%c requires int or char", &buf[0]))
2657                         return -1;
2658         }
2659         else {
2660                 if (!PyArg_Parse(v, "b;%c requires int or char", &buf[0]))
2661                         return -1;
2662         }
2663         buf[1] = '\0';
2664         return 1;
2665 }
2666
2667
2668 /* fmt%(v1,v2,...) is roughly equivalent to sprintf(fmt, v1, v2, ...)
2669
2670    FORMATBUFLEN is the length of the buffer in which the floats, ints, &
2671    chars are formatted. XXX This is a magic number. Each formatting
2672    routine does bounds checking to ensure no overflow, but a better
2673    solution may be to malloc a buffer of appropriate size for each
2674    format. For now, the current solution is sufficient.
2675 */
2676 #define FORMATBUFLEN (size_t)120
2677
2678 PyObject *
2679 PyString_Format(PyObject *format, PyObject *args)
2680 {
2681         char *fmt, *res;
2682         int fmtcnt, rescnt, reslen, arglen, argidx;
2683         int args_owned = 0;
2684         PyObject *result, *orig_args, *v, *w;
2685         PyObject *dict = NULL;
2686         if (format == NULL || !PyString_Check(format) || args == NULL) {
2687                 PyErr_BadInternalCall();
2688                 return NULL;
2689         }
2690         orig_args = args;
2691         fmt = PyString_AsString(format);
2692         fmtcnt = PyString_Size(format);
2693         reslen = rescnt = fmtcnt + 100;
2694         result = PyString_FromStringAndSize((char *)NULL, reslen);
2695         if (result == NULL)
2696                 return NULL;
2697         res = PyString_AsString(result);
2698         if (PyTuple_Check(args)) {
2699                 arglen = PyTuple_Size(args);
2700                 argidx = 0;
2701         }
2702         else {
2703                 arglen = -1;
2704                 argidx = -2;
2705         }
2706         if (args->ob_type->tp_as_mapping)
2707                 dict = args;
2708         while (--fmtcnt >= 0) {
2709                 if (*fmt != '%') {
2710                         if (--rescnt < 0) {
2711                                 rescnt = fmtcnt + 100;
2712                                 reslen += rescnt;
2713                                 if (_PyString_Resize(&result, reslen) < 0)
2714                                         return NULL;
2715                                 res = PyString_AsString(result)
2716                                         + reslen - rescnt;
2717                                 --rescnt;
2718                         }
2719                         *res++ = *fmt++;
2720                 }
2721                 else {
2722                         /* Got a format specifier */
2723                         int flags = 0;
2724                         int width = -1;
2725                         int prec = -1;
2726                         int size = 0;
2727                         int c = '\0';
2728                         int fill;
2729                         PyObject *v = NULL;
2730                         PyObject *temp = NULL;
2731                         char *pbuf;
2732                         int sign;
2733                         int len;
2734                         char formatbuf[FORMATBUFLEN]; /* For format{float,int,char}() */
2735                         char *fmt_start = fmt;
2736
2737                         fmt++;
2738                         if (*fmt == '(') {
2739                                 char *keystart;
2740                                 int keylen;
2741                                 PyObject *key;
2742                                 int pcount = 1;
2743
2744                                 if (dict == NULL) {
2745                                         PyErr_SetString(PyExc_TypeError,
2746                                                  "format requires a mapping");
2747                                         goto error;
2748                                 }
2749                                 ++fmt;
2750                                 --fmtcnt;
2751                                 keystart = fmt;
2752                                 /* Skip over balanced parentheses */
2753                                 while (pcount > 0 && --fmtcnt >= 0) {
2754                                         if (*fmt == ')')
2755                                                 --pcount;
2756                                         else if (*fmt == '(')
2757                                                 ++pcount;
2758                                         fmt++;
2759                                 }
2760                                 keylen = fmt - keystart - 1;
2761                                 if (fmtcnt < 0 || pcount > 0) {
2762                                         PyErr_SetString(PyExc_ValueError,
2763                                                    "incomplete format key");
2764                                         goto error;
2765                                 }
2766                                 key = PyString_FromStringAndSize(keystart,
2767                                                                  keylen);
2768                                 if (key == NULL)
2769                                         goto error;
2770                                 if (args_owned) {
2771                                         Py_DECREF(args);
2772                                         args_owned = 0;
2773                                 }
2774                                 args = PyObject_GetItem(dict, key);
2775                                 Py_DECREF(key);
2776                                 if (args == NULL) {
2777                                         goto error;
2778                                 }
2779                                 args_owned = 1;
2780                                 arglen = -1;
2781                                 argidx = -2;
2782                         }
2783                         while (--fmtcnt >= 0) {
2784                                 switch (c = *fmt++) {
2785                                 case '-': flags |= F_LJUST; continue;
2786                                 case '+': flags |= F_SIGN; continue;
2787                                 case ' ': flags |= F_BLANK; continue;
2788                                 case '#': flags |= F_ALT; continue;
2789                                 case '0': flags |= F_ZERO; continue;
2790                                 }
2791                                 break;
2792                         }
2793                         if (c == '*') {
2794                                 v = getnextarg(args, arglen, &argidx);
2795                                 if (v == NULL)
2796                                         goto error;
2797                                 if (!PyInt_Check(v)) {
2798                                         PyErr_SetString(PyExc_TypeError,
2799                                                         "* wants int");
2800                                         goto error;
2801                                 }
2802                                 width = PyInt_AsLong(v);
2803                                 if (width < 0) {
2804                                         flags |= F_LJUST;
2805                                         width = -width;
2806                                 }
2807                                 if (--fmtcnt >= 0)
2808                                         c = *fmt++;
2809                         }
2810                         else if (c >= 0 && isdigit(c)) {
2811                                 width = c - '0';
2812                                 while (--fmtcnt >= 0) {
2813                                         c = Py_CHARMASK(*fmt++);
2814                                         if (!isdigit(c))
2815                                                 break;
2816                                         if ((width*10) / 10 != width) {
2817                                                 PyErr_SetString(
2818                                                         PyExc_ValueError,
2819                                                         "width too big");
2820                                                 goto error;
2821                                         }
2822                                         width = width*10 + (c - '0');
2823                                 }
2824                         }
2825                         if (c == '.') {
2826                                 prec = 0;
2827                                 if (--fmtcnt >= 0)
2828                                         c = *fmt++;
2829                                 if (c == '*') {
2830                                         v = getnextarg(args, arglen, &argidx);
2831                                         if (v == NULL)
2832                                                 goto error;
2833                                         if (!PyInt_Check(v)) {
2834                                                 PyErr_SetString(
2835                                                         PyExc_TypeError,
2836                                                         "* wants int");
2837                                                 goto error;
2838                                         }
2839                                         prec = PyInt_AsLong(v);
2840                                         if (prec < 0)
2841                                                 prec = 0;
2842                                         if (--fmtcnt >= 0)
2843                                                 c = *fmt++;
2844                                 }
2845                                 else if (c >= 0 && isdigit(c)) {
2846                                         prec = c - '0';
2847                                         while (--fmtcnt >= 0) {
2848                                                 c = Py_CHARMASK(*fmt++);
2849                                                 if (!isdigit(c))
2850                                                         break;
2851                                                 if ((prec*10) / 10 != prec) {
2852                                                         PyErr_SetString(
2853                                                             PyExc_ValueError,
2854                                                             "prec too big");
2855                                                         goto error;
2856                                                 }
2857                                                 prec = prec*10 + (c - '0');
2858                                         }
2859                                 }
2860                         } /* prec */
2861                         if (fmtcnt >= 0) {
2862                                 if (c == 'h' || c == 'l' || c == 'L') {
2863                                         size = c;
2864                                         if (--fmtcnt >= 0)
2865                                                 c = *fmt++;
2866                                 }
2867                         }
2868                         if (fmtcnt < 0) {
2869                                 PyErr_SetString(PyExc_ValueError,
2870                                                 "incomplete format");
2871                                 goto error;
2872                         }
2873                         if (c != '%') {
2874                                 v = getnextarg(args, arglen, &argidx);
2875                                 if (v == NULL)
2876                                         goto error;
2877                         }
2878                         sign = 0;
2879                         fill = ' ';
2880                         switch (c) {
2881                         case '%':
2882                                 pbuf = "%";
2883                                 len = 1;
2884                                 break;
2885                         case 's':
2886                         case 'r':
2887                                 if (PyUnicode_Check(v)) {
2888                                         fmt = fmt_start;
2889                                         goto unicode;
2890                                 }
2891                                 if (c == 's')
2892                                 temp = PyObject_Str(v);
2893                                 else
2894                                         temp = PyObject_Repr(v);
2895                                 if (temp == NULL)
2896                                         goto error;
2897                                 if (!PyString_Check(temp)) {
2898                                         PyErr_SetString(PyExc_TypeError,
2899                                           "%s argument has non-string str()");
2900                                         goto error;
2901                                 }
2902                                 pbuf = PyString_AsString(temp);
2903                                 len = PyString_Size(temp);
2904                                 if (prec >= 0 && len > prec)
2905                                         len = prec;
2906                                 break;
2907                         case 'i':
2908                         case 'd':
2909                         case 'u':
2910                         case 'o':
2911                         case 'x':
2912                         case 'X':
2913                                 if (c == 'i')
2914                                         c = 'd';
2915                                 if (PyLong_Check(v) && PyLong_AsLong(v) == -1
2916                                     && PyErr_Occurred()) {
2917                                         /* Too big for a C long. */
2918                                         PyErr_Clear();
2919                                         temp = _PyString_FormatLong(v, flags,
2920                                                 prec, c, &pbuf, &len);
2921                                         if (!temp)
2922                                                 goto error;
2923                                         /* unbounded ints can always produce
2924                                            a sign character! */
2925                                         sign = 1;
2926                                 }
2927                                 else {
2928                                         pbuf = formatbuf;
2929                                         len = formatint(pbuf, sizeof(formatbuf),
2930                                                         flags, prec, c, v);
2931                                         if (len < 0)
2932                                                 goto error;
2933                                         /* only d conversion is signed */
2934                                         sign = c == 'd';
2935                                 }
2936                                 if (flags & F_ZERO)
2937                                         fill = '0';
2938                                 break;
2939                         case 'e':
2940                         case 'E':
2941                         case 'f':
2942                         case 'g':
2943                         case 'G':
2944                                 pbuf = formatbuf;
2945                                 len = formatfloat(pbuf, sizeof(formatbuf), flags, prec, c, v);
2946                                 if (len < 0)
2947                                         goto error;
2948                                 sign = 1;
2949                                 if (flags & F_ZERO)
2950                                         fill = '0';
2951                                 break;
2952                         case 'c':
2953                                 pbuf = formatbuf;
2954                                 len = formatchar(pbuf, sizeof(formatbuf), v);
2955                                 if (len < 0)
2956                                         goto error;
2957                                 break;
2958                         default:
2959                                 PyErr_Format(PyExc_ValueError,
2960                                 "unsupported format character '%c' (0x%x)",
2961                                         c, c);
2962                                 goto error;
2963                         }
2964                         if (sign) {
2965                                 if (*pbuf == '-' || *pbuf == '+') {
2966                                         sign = *pbuf++;
2967                                         len--;
2968                                 }
2969                                 else if (flags & F_SIGN)
2970                                         sign = '+';
2971                                 else if (flags & F_BLANK)
2972                                         sign = ' ';
2973                                 else
2974                                         sign = 0;
2975                         }
2976                         if (width < len)
2977                                 width = len;
2978                         if (rescnt < width + (sign != 0)) {
2979                                 reslen -= rescnt;
2980                                 rescnt = width + fmtcnt + 100;
2981                                 reslen += rescnt;
2982                                 if (_PyString_Resize(&result, reslen) < 0)
2983                                         return NULL;
2984                                 res = PyString_AsString(result)
2985                                         + reslen - rescnt;
2986                         }
2987                         if (sign) {
2988                                 if (fill != ' ')
2989                                         *res++ = sign;
2990                                 rescnt--;
2991                                 if (width > len)
2992                                         width--;
2993                         }
2994                         if ((flags & F_ALT) && (c == 'x' || c == 'X')) {
2995                                 assert(pbuf[0] == '0');
2996                                 assert(pbuf[1] == c);
2997                                 if (fill != ' ') {
2998                                         *res++ = *pbuf++;
2999                                         *res++ = *pbuf++;
3000                                 }
3001                                 rescnt -= 2;
3002                                 width -= 2;
3003                                 if (width < 0)
3004                                         width = 0;
3005                                 len -= 2;
3006                         }
3007                         if (width > len && !(flags & F_LJUST)) {
3008                                 do {
3009                                         --rescnt;
3010                                         *res++ = fill;
3011                                 } while (--width > len);
3012                         }
3013                         if (fill == ' ') {
3014                                 if (sign)
3015                                         *res++ = sign;
3016                                 if ((flags & F_ALT) &&
3017                                     (c == 'x' || c == 'X')) {
3018                                         assert(pbuf[0] == '0');
3019                                         assert(pbuf[1] == c);
3020                                         *res++ = *pbuf++;
3021                                         *res++ = *pbuf++;
3022                                 }
3023                         }
3024                         memcpy(res, pbuf, len);
3025                         res += len;
3026                         rescnt -= len;
3027                         while (--width >= len) {
3028                                 --rescnt;
3029                                 *res++ = ' ';
3030                         }
3031                         if (dict && (argidx < arglen) && c != '%') {
3032                                 PyErr_SetString(PyExc_TypeError,
3033                                            "not all arguments converted");
3034                                 goto error;
3035                         }
3036                         Py_XDECREF(temp);
3037                 } /* '%' */
3038         } /* until end */
3039         if (argidx < arglen && !dict) {
3040                 PyErr_SetString(PyExc_TypeError,
3041                                 "not all arguments converted");
3042                 goto error;
3043         }
3044         if (args_owned) {
3045                 Py_DECREF(args);
3046         }
3047         _PyString_Resize(&result, reslen - rescnt);
3048         return result;
3049
3050  unicode:
3051         if (args_owned) {
3052                 Py_DECREF(args);
3053                 args_owned = 0;
3054         }
3055         /* Fiddle args right (remove the first argidx-1 arguments) */
3056         --argidx;
3057         if (PyTuple_Check(orig_args) && argidx > 0) {
3058                 PyObject *v;
3059                 int n = PyTuple_GET_SIZE(orig_args) - argidx;
3060                 v = PyTuple_New(n);
3061                 if (v == NULL)
3062                         goto error;
3063                 while (--n >= 0) {
3064                         PyObject *w = PyTuple_GET_ITEM(orig_args, n + argidx);
3065                         Py_INCREF(w);
3066                         PyTuple_SET_ITEM(v, n, w);
3067                 }
3068                 args = v;
3069         } else {
3070                 Py_INCREF(orig_args);
3071                 args = orig_args;
3072         }
3073         args_owned = 1;
3074         /* Take what we have of the result and let the Unicode formatting
3075            function format the rest of the input. */
3076         rescnt = res - PyString_AS_STRING(result);
3077         if (_PyString_Resize(&result, rescnt))
3078                 goto error;
3079         fmtcnt = PyString_GET_SIZE(format) - \
3080                  (fmt - PyString_AS_STRING(format));
3081         format = PyUnicode_Decode(fmt, fmtcnt, NULL, NULL);
3082         if (format == NULL)
3083                 goto error;
3084         v = PyUnicode_Format(format, args);
3085         Py_DECREF(format);
3086         if (v == NULL)
3087                 goto error;
3088         /* Paste what we have (result) to what the Unicode formatting
3089            function returned (v) and return the result (or error) */
3090         w = PyUnicode_Concat(result, v);
3091         Py_DECREF(result);
3092         Py_DECREF(v);
3093         Py_DECREF(args);
3094         return w;
3095
3096  error:
3097         Py_DECREF(result);
3098         if (args_owned) {
3099                 Py_DECREF(args);
3100         }
3101         return NULL;
3102 }
3103
3104
3105 #ifdef INTERN_STRINGS
3106
3107 /* This dictionary will leak at PyString_Fini() time.  That's acceptable
3108  * because PyString_Fini() specifically frees interned strings that are
3109  * only referenced by this dictionary.  The CVS log entry for revision 2.45
3110  * says:
3111  *
3112  *    Change the Fini function to only remove otherwise unreferenced
3113  *    strings from the interned table.  There are references in
3114  *    hard-to-find static variables all over the interpreter, and it's not
3115  *    worth trying to get rid of all those; but "uninterning" isn't fair
3116  *    either and may cause subtle failures later -- so we have to keep them
3117  *    in the interned table.
3118  */
3119 static PyObject *interned;
3120
3121 void
3122 PyString_InternInPlace(PyObject **p)
3123 {
3124         register PyStringObject *s = (PyStringObject *)(*p);
3125         PyObject *t;
3126         if (s == NULL || !PyString_Check(s))
3127                 Py_FatalError("PyString_InternInPlace: strings only please!");
3128         if ((t = s->ob_sinterned) != NULL) {
3129                 if (t == (PyObject *)s)
3130                         return;
3131                 Py_INCREF(t);
3132                 *p = t;
3133                 Py_DECREF(s);
3134                 return;
3135         }
3136         if (interned == NULL) {
3137                 interned = PyDict_New();
3138                 if (interned == NULL)
3139                         return;
3140         }
3141         if ((t = PyDict_GetItem(interned, (PyObject *)s)) != NULL) {
3142                 Py_INCREF(t);
3143                 *p = s->ob_sinterned = t;
3144                 Py_DECREF(s);
3145                 return;
3146         }
3147         t = (PyObject *)s;
3148         if (PyDict_SetItem(interned, t, t) == 0) {
3149                 s->ob_sinterned = t;
3150                 return;
3151         }
3152         PyErr_Clear();
3153 }
3154
3155
3156 PyObject *
3157 PyString_InternFromString(const char *cp)
3158 {
3159         PyObject *s = PyString_FromString(cp);
3160         if (s == NULL)
3161                 return NULL;
3162         PyString_InternInPlace(&s);
3163         return s;
3164 }
3165
3166 #endif
3167
3168 void
3169 PyString_Fini(void)
3170 {
3171         int i;
3172         for (i = 0; i < UCHAR_MAX + 1; i++) {
3173                 Py_XDECREF(characters[i]);
3174                 characters[i] = NULL;
3175         }
3176 #ifndef DONT_SHARE_SHORT_STRINGS
3177         Py_XDECREF(nullstring);
3178         nullstring = NULL;
3179 #endif
3180 #ifdef INTERN_STRINGS
3181         if (interned) {
3182                 int pos, changed;
3183                 PyObject *key, *value;
3184                 do {
3185                         changed = 0;
3186                         pos = 0;
3187                         while (PyDict_Next(interned, &pos, &key, &value)) {
3188                                 if (key->ob_refcnt == 2 && key == value) {
3189                                         PyDict_DelItem(interned, key);
3190                                         changed = 1;
3191                                 }
3192                         }
3193                 } while (changed);
3194         }
3195 #endif
3196 }