Objects/stringobject.c

   1
   2 /* String object implementation */
   3
   4 #include "Python.h"
   5
   6 #include <ctype.h>
   7
   8 #ifdef COUNT_ALLOCS
   9 int null_strings, one_strings;
  10 #endif
  11
  12 #if !defined(HAVE_LIMITS_H) && !defined(UCHAR_MAX)
  13 #define UCHAR_MAX 255
  14 #endif
  15
  16 static PyStringObject *characters[UCHAR_MAX + 1];
  17 #ifndef DONT_SHARE_SHORT_STRINGS
  18 static PyStringObject *nullstring;
  19 #endif
  20
  21 /*
  22    Newsizedstringobject() and newstringobject() try in certain cases
  23    to share string objects.  When the size of the string is zero,
  24    these routines always return a pointer to the same string object;
  25    when the size is one, they return a pointer to an already existing
  26    object if the contents of the string is known.  For
  27    newstringobject() this is always the case, for
  28    newsizedstringobject() this is the case when the first argument in
  29    not NULL.
  30    A common practice to allocate a string and then fill it in or
  31    change it must be done carefully.  It is only allowed to change the
  32    contents of the string if the obect was gotten from
  33    newsizedstringobject() with a NULL first argument, because in the
  34    future these routines may try to do even more sharing of objects.
  35 */
  36 PyObject *
  37 PyString_FromStringAndSize(const char *str, int size)
  38 {
  39         register PyStringObject *op;
  40 #ifndef DONT_SHARE_SHORT_STRINGS
  41         if (size == 0 && (op = nullstring) != NULL) {
  42 #ifdef COUNT_ALLOCS
  43                 null_strings++;
  44 #endif
  45                 Py_INCREF(op);
  46                 return (PyObject *)op;
  47         }
  48         if (size == 1 && str != NULL &&
  49             (op = characters[*str & UCHAR_MAX]) != NULL)
  50         {
  51 #ifdef COUNT_ALLOCS
  52                 one_strings++;
  53 #endif
  54                 Py_INCREF(op);
  55                 return (PyObject *)op;
  56         }
  57 #endif /* DONT_SHARE_SHORT_STRINGS */
  58
  59         /* PyObject_NewVar is inlined */
  60         op = (PyStringObject *)
  61                 PyObject_MALLOC(sizeof(PyStringObject) + size * sizeof(char));
  62         if (op == NULL)
  63                 return PyErr_NoMemory();
  64         PyObject_INIT_VAR(op, &PyString_Type, size);
  65 #ifdef CACHE_HASH
  66         op->ob_shash = -1;
  67 #endif
  68 #ifdef INTERN_STRINGS
  69         op->ob_sinterned = NULL;
  70 #endif
  71         if (str != NULL)
  72                 memcpy(op->ob_sval, str, size);
  73         op->ob_sval[size] = '\0';
  74 #ifndef DONT_SHARE_SHORT_STRINGS
  75         if (size == 0) {
  76                 nullstring = op;
  77                 Py_INCREF(op);
  78         } else if (size == 1 && str != NULL) {
  79                 characters[*str & UCHAR_MAX] = op;
  80                 Py_INCREF(op);
  81         }
  82 #endif
  83         return (PyObject *) op;
  84 }
  85
  86 PyObject *
  87 PyString_FromString(const char *str)
  88 {
  89         register size_t size = strlen(str);
  90         register PyStringObject *op;
  91         if (size > INT_MAX) {
  92                 PyErr_SetString(PyExc_OverflowError,
  93                         "string is too long for a Python string");
  94                 return NULL;
  95         }
  96 #ifndef DONT_SHARE_SHORT_STRINGS
  97         if (size == 0 && (op = nullstring) != NULL) {
  98 #ifdef COUNT_ALLOCS
  99                 null_strings++;
 100 #endif
 101                 Py_INCREF(op);
 102                 return (PyObject *)op;
 103         }
 104         if (size == 1 && (op = characters[*str & UCHAR_MAX]) != NULL) {
 105 #ifdef COUNT_ALLOCS
 106                 one_strings++;
 107 #endif
 108                 Py_INCREF(op);
 109                 return (PyObject *)op;
 110         }
 111 #endif /* DONT_SHARE_SHORT_STRINGS */
 112
 113         /* PyObject_NewVar is inlined */
 114         op = (PyStringObject *)
 115                 PyObject_MALLOC(sizeof(PyStringObject) + size * sizeof(char));
 116         if (op == NULL)
 117                 return PyErr_NoMemory();
 118         PyObject_INIT_VAR(op, &PyString_Type, size);
 119 #ifdef CACHE_HASH
 120         op->ob_shash = -1;
 121 #endif
 122 #ifdef INTERN_STRINGS
 123         op->ob_sinterned = NULL;
 124 #endif
 125         strcpy(op->ob_sval, str);
 126 #ifndef DONT_SHARE_SHORT_STRINGS
 127         if (size == 0) {
 128                 nullstring = op;
 129                 Py_INCREF(op);
 130         } else if (size == 1) {
 131                 characters[*str & UCHAR_MAX] = op;
 132                 Py_INCREF(op);
 133         }
 134 #endif
 135         return (PyObject *) op;
 136 }
 137
 138 PyObject *PyString_Decode(const char *s,
 139                           int size,
 140                           const char *encoding,
 141                           const char *errors)
 142 {
 143     PyObject *buffer = NULL, *str;
 144
 145     if (encoding == NULL)
 146         encoding = PyUnicode_GetDefaultEncoding();
 147
 148     /* Decode via the codec registry */
 149     buffer = PyBuffer_FromMemory((void *)s, size);
 150     if (buffer == NULL)
 151         goto onError;
 152     str = PyCodec_Decode(buffer, encoding, errors);
 153     if (str == NULL)
 154         goto onError;
 155     /* Convert Unicode to a string using the default encoding */
 156     if (PyUnicode_Check(str)) {
 157         PyObject *temp = str;
 158         str = PyUnicode_AsEncodedString(str, NULL, NULL);
 159         Py_DECREF(temp);
 160         if (str == NULL)
 161             goto onError;
 162     }
 163     if (!PyString_Check(str)) {
 164         PyErr_Format(PyExc_TypeError,
 165                      "decoder did not return a string object (type=%.400s)",
 166                      str->ob_type->tp_name);
 167         Py_DECREF(str);
 168         goto onError;
 169     }
 170     Py_DECREF(buffer);
 171     return str;
 172
 173  onError:
 174     Py_XDECREF(buffer);
 175     return NULL;
 176 }
 177
 178 PyObject *PyString_Encode(const char *s,
 179                           int size,
 180                           const char *encoding,
 181                           const char *errors)
 182 {
 183     PyObject *v, *str;
 184
 185     str = PyString_FromStringAndSize(s, size);
 186     if (str == NULL)
 187         return NULL;
 188     v = PyString_AsEncodedString(str, encoding, errors);
 189     Py_DECREF(str);
 190     return v;
 191 }
 192
 193 PyObject *PyString_AsEncodedString(PyObject *str,
 194                                    const char *encoding,
 195                                    const char *errors)
 196 {
 197     PyObject *v;
 198
 199     if (!PyString_Check(str)) {
 200         PyErr_BadArgument();
 201         goto onError;
 202     }
 203
 204     if (encoding == NULL)
 205         encoding = PyUnicode_GetDefaultEncoding();
 206
 207     /* Encode via the codec registry */
 208     v = PyCodec_Encode(str, encoding, errors);
 209     if (v == NULL)
 210         goto onError;
 211     /* Convert Unicode to a string using the default encoding */
 212     if (PyUnicode_Check(v)) {
 213         PyObject *temp = v;
 214         v = PyUnicode_AsEncodedString(v, NULL, NULL);
 215         Py_DECREF(temp);
 216         if (v == NULL)
 217             goto onError;
 218     }
 219     if (!PyString_Check(v)) {
 220         PyErr_Format(PyExc_TypeError,
 221                      "encoder did not return a string object (type=%.400s)",
 222                      v->ob_type->tp_name);
 223         Py_DECREF(v);
 224         goto onError;
 225     }
 226     return v;
 227
 228  onError:
 229     return NULL;
 230 }
 231
 232 static void
 233 string_dealloc(PyObject *op)
 234 {
 235         PyObject_DEL(op);
 236 }
 237
 238 static int
 239 string_getsize(register PyObject *op)
 240 {
 241         char *s;
 242         int len;
 243         if (PyString_AsStringAndSize(op, &s, &len))
 244                 return -1;
 245         return len;
 246 }
 247
 248 static /*const*/ char *
 249 string_getbuffer(register PyObject *op)
 250 {
 251         char *s;
 252         int len;
 253         if (PyString_AsStringAndSize(op, &s, &len))
 254                 return NULL;
 255         return s;
 256 }
 257
 258 int
 259 PyString_Size(register PyObject *op)
 260 {
 261         if (!PyString_Check(op))
 262                 return string_getsize(op);
 263         return ((PyStringObject *)op) -> ob_size;
 264 }
 265
 266 /*const*/ char *
 267 PyString_AsString(register PyObject *op)
 268 {
 269         if (!PyString_Check(op))
 270                 return string_getbuffer(op);
 271         return ((PyStringObject *)op) -> ob_sval;
 272 }
 273
 274 /* Internal API needed by PyString_AsStringAndSize(): */
 275 extern
 276 PyObject *_PyUnicode_AsDefaultEncodedString(PyObject *unicode,
 277                                             const char *errors);
 278
 279 int
 280 PyString_AsStringAndSize(register PyObject *obj,
 281                          register char **s,
 282                          register int *len)
 283 {
 284         if (s == NULL) {
 285                 PyErr_BadInternalCall();
 286                 return -1;
 287         }
 288
 289         if (!PyString_Check(obj)) {
 290                 if (PyUnicode_Check(obj)) {
 291                         obj = _PyUnicode_AsDefaultEncodedString(obj, NULL);
 292                         if (obj == NULL)
 293                                 return -1;
 294                 }
 295                 else {
 296                         PyErr_Format(PyExc_TypeError,
 297                                      "expected string or Unicode object, "
 298                                      "%.200s found", obj->ob_type->tp_name);
 299                         return -1;
 300                 }
 301         }
 302
 303         *s = PyString_AS_STRING(obj);
 304         if (len != NULL)
 305                 *len = PyString_GET_SIZE(obj);
 306         else if ((int)strlen(*s) != PyString_GET_SIZE(obj)) {
 307                 PyErr_SetString(PyExc_TypeError,
 308                                 "expected string without null bytes");
 309                 return -1;
 310         }
 311         return 0;
 312 }
 313
 314 /* Methods */
 315
 316 static int
 317 string_print(PyStringObject *op, FILE *fp, int flags)
 318 {
 319         int i;
 320         char c;
 321         int quote;
 322         /* XXX Ought to check for interrupts when writing long strings */
 323         if (flags & Py_PRINT_RAW) {
 324                 fwrite(op->ob_sval, 1, (int) op->ob_size, fp);
 325                 return 0;
 326         }
 327
 328         /* figure out which quote to use; single is preferred */
 329         quote = '\'';
 330         if (strchr(op->ob_sval, '\'') && !strchr(op->ob_sval, '"'))
 331                 quote = '"';
 332
 333         fputc(quote, fp);
 334         for (i = 0; i < op->ob_size; i++) {
 335                 c = op->ob_sval[i];
 336                 if (c == quote || c == '\\')
 337                         fprintf(fp, "\\%c", c);
 338                 else if (c == '\t')
 339                         fprintf(fp, "\\t");
 340                 else if (c == '\n')
 341                         fprintf(fp, "\\n");
 342                 else if (c == '\r')
 343                         fprintf(fp, "\\r");
 344                 else if (c < ' ' || c >= 0x7f)
 345                         fprintf(fp, "\\x%02x", c & 0xff);
 346                 else
 347                         fputc(c, fp);
 348         }
 349         fputc(quote, fp);
 350         return 0;
 351 }
 352
 353 static PyObject *
 354 string_repr(register PyStringObject *op)
 355 {
 356         size_t newsize = 2 + 4 * op->ob_size * sizeof(char);
 357         PyObject *v;
 358         if (newsize > INT_MAX) {
 359                 PyErr_SetString(PyExc_OverflowError,
 360                         "string is too large to make repr");
 361         }
 362         v = PyString_FromStringAndSize((char *)NULL, newsize);
 363         if (v == NULL) {
 364                 return NULL;
 365         }
 366         else {
 367                 register int i;
 368                 register char c;
 369                 register char *p;
 370                 int quote;
 371
 372                 /* figure out which quote to use; single is preferred */
 373                 quote = '\'';
 374                 if (strchr(op->ob_sval, '\'') && !strchr(op->ob_sval, '"'))
 375                         quote = '"';
 376
 377                 p = ((PyStringObject *)v)->ob_sval;
 378                 *p++ = quote;
 379                 for (i = 0; i < op->ob_size; i++) {
 380                         c = op->ob_sval[i];
 381                         if (c == quote || c == '\\')
 382                                 *p++ = '\\', *p++ = c;
 383                         else if (c == '\t')
 384                                 *p++ = '\\', *p++ = 't';
 385                         else if (c == '\n')
 386                                 *p++ = '\\', *p++ = 'n';
 387                         else if (c == '\r')
 388                                 *p++ = '\\', *p++ = 'r';
 389                         else if (c < ' ' || c >= 0x7f) {
 390                                 sprintf(p, "\\x%02x", c & 0xff);
 391                                 p += 4;
 392                         }
 393                         else
 394                                 *p++ = c;
 395                 }
 396                 *p++ = quote;
 397                 *p = '\0';
 398                 _PyString_Resize(
 399                         &v, (int) (p - ((PyStringObject *)v)->ob_sval));
 400                 return v;
 401         }
 402 }
 403
 404 static PyObject *
 405 string_str(PyObject *s)
 406 {
 407         Py_INCREF(s);
 408         return s;
 409 }
 410
 411 static int
 412 string_length(PyStringObject *a)
 413 {
 414         return a->ob_size;
 415 }
 416
 417 static PyObject *
 418 string_concat(register PyStringObject *a, register PyObject *bb)
 419 {
 420         register unsigned int size;
 421         register PyStringObject *op;
 422         if (!PyString_Check(bb)) {
 423                 if (PyUnicode_Check(bb))
 424                     return PyUnicode_Concat((PyObject *)a, bb);
 425                 PyErr_Format(PyExc_TypeError,
 426                              "cannot add type \"%.200s\" to string",
 427                              bb->ob_type->tp_name);
 428                 return NULL;
 429         }
 430 #define b ((PyStringObject *)bb)
 431         /* Optimize cases with empty left or right operand */
 432         if (a->ob_size == 0) {
 433                 Py_INCREF(bb);
 434                 return bb;
 435         }
 436         if (b->ob_size == 0) {
 437                 Py_INCREF(a);
 438                 return (PyObject *)a;
 439         }
 440         size = a->ob_size + b->ob_size;
 441         /* PyObject_NewVar is inlined */
 442         op = (PyStringObject *)
 443                 PyObject_MALLOC(sizeof(PyStringObject) + size * sizeof(char));
 444         if (op == NULL)
 445                 return PyErr_NoMemory();
 446         PyObject_INIT_VAR(op, &PyString_Type, size);
 447 #ifdef CACHE_HASH
 448         op->ob_shash = -1;
 449 #endif
 450 #ifdef INTERN_STRINGS
 451         op->ob_sinterned = NULL;
 452 #endif
 453         memcpy(op->ob_sval, a->ob_sval, (int) a->ob_size);
 454         memcpy(op->ob_sval + a->ob_size, b->ob_sval, (int) b->ob_size);
 455         op->ob_sval[size] = '\0';
 456         return (PyObject *) op;
 457 #undef b
 458 }
 459
 460 static PyObject *
 461 string_repeat(register PyStringObject *a, register int n)
 462 {
 463         register int i;
 464         register int size;
 465         register PyStringObject *op;
 466         size_t nbytes;
 467         if (n < 0)
 468                 n = 0;
 469         /* watch out for overflows:  the size can overflow int,
 470          * and the # of bytes needed can overflow size_t
 471          */
 472         size = a->ob_size * n;
 473         if (n && size / n != a->ob_size) {
 474                 PyErr_SetString(PyExc_OverflowError,
 475                         "repeated string is too long");
 476                 return NULL;
 477         }
 478         if (size == a->ob_size) {
 479                 Py_INCREF(a);
 480                 return (PyObject *)a;
 481         }
 482         nbytes = size * sizeof(char);
 483         if (nbytes / sizeof(char) != (size_t)size ||
 484             nbytes + sizeof(PyStringObject) <= nbytes) {
 485                 PyErr_SetString(PyExc_OverflowError,
 486                         "repeated string is too long");
 487                 return NULL;
 488         }
 489         op = (PyStringObject *)
 490                 PyObject_MALLOC(sizeof(PyStringObject) + nbytes);
 491         if (op == NULL)
 492                 return PyErr_NoMemory();
 493         PyObject_INIT_VAR(op, &PyString_Type, size);
 494 #ifdef CACHE_HASH
 495         op->ob_shash = -1;
 496 #endif
 497 #ifdef INTERN_STRINGS
 498         op->ob_sinterned = NULL;
 499 #endif
 500         for (i = 0; i < size; i += a->ob_size)
 501                 memcpy(op->ob_sval+i, a->ob_sval, (int) a->ob_size);
 502         op->ob_sval[size] = '\0';
 503         return (PyObject *) op;
 504 }
 505
 506 /* String slice a[i:j] consists of characters a[i] ... a[j-1] */
 507
 508 static PyObject *
 509 string_slice(register PyStringObject *a, register int i, register int j)
 510      /* j -- may be negative! */
 511 {
 512         if (i < 0)
 513                 i = 0;
 514         if (j < 0)
 515                 j = 0; /* Avoid signed/unsigned bug in next line */
 516         if (j > a->ob_size)
 517                 j = a->ob_size;
 518         if (i == 0 && j == a->ob_size) { /* It's the same as a */
 519                 Py_INCREF(a);
 520                 return (PyObject *)a;
 521         }
 522         if (j < i)
 523                 j = i;
 524         return PyString_FromStringAndSize(a->ob_sval + i, (int) (j-i));
 525 }
 526
 527 static int
 528 string_contains(PyObject *a, PyObject *el)
 529 {
 530         register char *s, *end;
 531         register char c;
 532         if (PyUnicode_Check(el))
 533                 return PyUnicode_Contains(a, el);
 534         if (!PyString_Check(el) || PyString_Size(el) != 1) {
 535                 PyErr_SetString(PyExc_TypeError,
 536                     "'in <string>' requires character as left operand");
 537                 return -1;
 538         }
 539         c = PyString_AsString(el)[0];
 540         s = PyString_AsString(a);
 541         end = s + PyString_Size(a);
 542         while (s < end) {
 543                 if (c == *s++)
 544                         return 1;
 545         }
 546         return 0;
 547 }
 548
 549 static PyObject *
 550 string_item(PyStringObject *a, register int i)
 551 {
 552         int c;
 553         PyObject *v;
 554         if (i < 0 || i >= a->ob_size) {
 555                 PyErr_SetString(PyExc_IndexError, "string index out of range");
 556                 return NULL;
 557         }
 558         c = a->ob_sval[i] & UCHAR_MAX;
 559         v = (PyObject *) characters[c];
 560 #ifdef COUNT_ALLOCS
 561         if (v != NULL)
 562                 one_strings++;
 563 #endif
 564         if (v == NULL) {
 565                 v = PyString_FromStringAndSize((char *)NULL, 1);
 566                 if (v == NULL)
 567                         return NULL;
 568                 characters[c] = (PyStringObject *) v;
 569                 ((PyStringObject *)v)->ob_sval[0] = c;
 570         }
 571         Py_INCREF(v);
 572         return v;
 573 }
 574
 575 static int
 576 string_compare(PyStringObject *a, PyStringObject *b)
 577 {
 578         int len_a = a->ob_size, len_b = b->ob_size;
 579         int min_len = (len_a < len_b) ? len_a : len_b;
 580         int cmp;
 581         if (min_len > 0) {
 582                 cmp = Py_CHARMASK(*a->ob_sval) - Py_CHARMASK(*b->ob_sval);
 583                 if (cmp == 0)
 584                         cmp = memcmp(a->ob_sval, b->ob_sval, min_len);
 585                 if (cmp != 0)
 586                         return cmp;
 587         }
 588         return (len_a < len_b) ? -1 : (len_a > len_b) ? 1 : 0;
 589 }
 590
 591 static long
 592 string_hash(PyStringObject *a)
 593 {
 594         register int len;
 595         register unsigned char *p;
 596         register long x;
 597
 598 #ifdef CACHE_HASH
 599         if (a->ob_shash != -1)
 600                 return a->ob_shash;
 601 #ifdef INTERN_STRINGS
 602         if (a->ob_sinterned != NULL)
 603                 return (a->ob_shash =
 604                         ((PyStringObject *)(a->ob_sinterned))->ob_shash);
 605 #endif
 606 #endif
 607         len = a->ob_size;
 608         p = (unsigned char *) a->ob_sval;
 609         x = *p << 7;
 610         while (--len >= 0)
 611                 x = (1000003*x) ^ *p++;
 612         x ^= a->ob_size;
 613         if (x == -1)
 614                 x = -2;
 615 #ifdef CACHE_HASH
 616         a->ob_shash = x;
 617 #endif
 618         return x;
 619 }
 620
 621 static int
 622 string_buffer_getreadbuf(PyStringObject *self, int index, const void **ptr)
 623 {
 624         if ( index != 0 ) {
 625                 PyErr_SetString(PyExc_SystemError,
 626                                 "accessing non-existent string segment");
 627                 return -1;
 628         }
 629         *ptr = (void *)self->ob_sval;
 630         return self->ob_size;
 631 }
 632
 633 static int
 634 string_buffer_getwritebuf(PyStringObject *self, int index, const void **ptr)
 635 {
 636         PyErr_SetString(PyExc_TypeError,
 637                         "Cannot use string as modifiable buffer");
 638         return -1;
 639 }
 640
 641 static int
 642 string_buffer_getsegcount(PyStringObject *self, int *lenp)
 643 {
 644         if ( lenp )
 645                 *lenp = self->ob_size;
 646         return 1;
 647 }
 648
 649 static int
 650 string_buffer_getcharbuf(PyStringObject *self, int index, const char **ptr)
 651 {
 652         if ( index != 0 ) {
 653                 PyErr_SetString(PyExc_SystemError,
 654                                 "accessing non-existent string segment");
 655                 return -1;
 656         }
 657         *ptr = self->ob_sval;
 658         return self->ob_size;
 659 }
 660
 661 static PySequenceMethods string_as_sequence = {
 662         (inquiry)string_length, /*sq_length*/
 663         (binaryfunc)string_concat, /*sq_concat*/
 664         (intargfunc)string_repeat, /*sq_repeat*/
 665         (intargfunc)string_item, /*sq_item*/
 666         (intintargfunc)string_slice, /*sq_slice*/
 667         0,              /*sq_ass_item*/
 668         0,              /*sq_ass_slice*/
 669         (objobjproc)string_contains /*sq_contains*/
 670 };
 671
 672 static PyBufferProcs string_as_buffer = {
 673         (getreadbufferproc)string_buffer_getreadbuf,
 674         (getwritebufferproc)string_buffer_getwritebuf,
 675         (getsegcountproc)string_buffer_getsegcount,
 676         (getcharbufferproc)string_buffer_getcharbuf,
 677 };
 678
 679
 680 \f
 681 #define LEFTSTRIP 0
 682 #define RIGHTSTRIP 1
 683 #define BOTHSTRIP 2
 684
 685
 686 static PyObject *
 687 split_whitespace(const char *s, int len, int maxsplit)
 688 {
 689         int i, j, err;
 690         PyObject* item;
 691         PyObject *list = PyList_New(0);
 692
 693         if (list == NULL)
 694                 return NULL;
 695
 696         for (i = j = 0; i < len; ) {
 697                 while (i < len && isspace(Py_CHARMASK(s[i])))
 698                         i++;
 699                 j = i;
 700                 while (i < len && !isspace(Py_CHARMASK(s[i])))
 701                         i++;
 702                 if (j < i) {
 703                         if (maxsplit-- <= 0)
 704                                 break;
 705                         item = PyString_FromStringAndSize(s+j, (int)(i-j));
 706                         if (item == NULL)
 707                                 goto finally;
 708                         err = PyList_Append(list, item);
 709                         Py_DECREF(item);
 710                         if (err < 0)
 711                                 goto finally;
 712                         while (i < len && isspace(Py_CHARMASK(s[i])))
 713                                 i++;
 714                         j = i;
 715                 }
 716         }
 717         if (j < len) {
 718                 item = PyString_FromStringAndSize(s+j, (int)(len - j));
 719                 if (item == NULL)
 720                         goto finally;
 721                 err = PyList_Append(list, item);
 722                 Py_DECREF(item);
 723                 if (err < 0)
 724                         goto finally;
 725         }
 726         return list;
 727   finally:
 728         Py_DECREF(list);
 729         return NULL;
 730 }
 731
 732
 733 static char split__doc__[] =
 734 "S.split([sep [,maxsplit]]) -> list of strings\n\
 735 \n\
 736 Return a list of the words in the string S, using sep as the\n\
 737 delimiter string.  If maxsplit is given, at most maxsplit\n\
 738 splits are done. If sep is not specified, any whitespace string\n\
 739 is a separator.";
 740
 741 static PyObject *
 742 string_split(PyStringObject *self, PyObject *args)
 743 {
 744         int len = PyString_GET_SIZE(self), n, i, j, err;
 745         int maxsplit = -1;
 746         const char *s = PyString_AS_STRING(self), *sub;
 747         PyObject *list, *item, *subobj = Py_None;
 748
 749         if (!PyArg_ParseTuple(args, "|Oi:split", &subobj, &maxsplit))
 750                 return NULL;
 751         if (maxsplit < 0)
 752                 maxsplit = INT_MAX;
 753         if (subobj == Py_None)
 754                 return split_whitespace(s, len, maxsplit);
 755         if (PyString_Check(subobj)) {
 756                 sub = PyString_AS_STRING(subobj);
 757                 n = PyString_GET_SIZE(subobj);
 758         }
 759         else if (PyUnicode_Check(subobj))
 760                 return PyUnicode_Split((PyObject *)self, subobj, maxsplit);
 761         else if (PyObject_AsCharBuffer(subobj, &sub, &n))
 762                 return NULL;
 763         if (n == 0) {
 764                 PyErr_SetString(PyExc_ValueError, "empty separator");
 765                 return NULL;
 766         }
 767
 768         list = PyList_New(0);
 769         if (list == NULL)
 770                 return NULL;
 771
 772         i = j = 0;
 773         while (i+n <= len) {
 774                 if (s[i] == sub[0] && memcmp(s+i, sub, n) == 0) {
 775                         if (maxsplit-- <= 0)
 776                                 break;
 777                         item = PyString_FromStringAndSize(s+j, (int)(i-j));
 778                         if (item == NULL)
 779                                 goto fail;
 780                         err = PyList_Append(list, item);
 781                         Py_DECREF(item);
 782                         if (err < 0)
 783                                 goto fail;
 784                         i = j = i + n;
 785                 }
 786                 else
 787                         i++;
 788         }
 789         item = PyString_FromStringAndSize(s+j, (int)(len-j));
 790         if (item == NULL)
 791                 goto fail;
 792         err = PyList_Append(list, item);
 793         Py_DECREF(item);
 794         if (err < 0)
 795                 goto fail;
 796
 797         return list;
 798
 799  fail:
 800         Py_DECREF(list);
 801         return NULL;
 802 }
 803
 804
 805 static char join__doc__[] =
 806 "S.join(sequence) -> string\n\
 807 \n\
 808 Return a string which is the concatenation of the strings in the\n\
 809 sequence.  The separator between elements is S.";
 810
 811 static PyObject *
 812 string_join(PyStringObject *self, PyObject *args)
 813 {
 814         char *sep = PyString_AS_STRING(self);
 815         const int seplen = PyString_GET_SIZE(self);
 816         PyObject *res = NULL;
 817         char *p;
 818         int seqlen = 0;
 819         size_t sz = 0;
 820         int i;
 821         PyObject *orig, *seq, *item;
 822
 823         if (!PyArg_ParseTuple(args, "O:join", &orig))
 824                 return NULL;
 825
 826         seq = PySequence_Fast(orig, "");
 827         if (seq == NULL) {
 828                 if (PyErr_ExceptionMatches(PyExc_TypeError))
 829                         PyErr_Format(PyExc_TypeError,
 830                                      "sequence expected, %.80s found",
 831                                      orig->ob_type->tp_name);
 832                 return NULL;
 833         }
 834
 835         seqlen = PySequence_Size(seq);
 836         if (seqlen == 0) {
 837                 Py_DECREF(seq);
 838                 return PyString_FromString("");
 839         }
 840         if (seqlen == 1) {
 841                 item = PySequence_Fast_GET_ITEM(seq, 0);
 842                 if (!PyString_Check(item) && !PyUnicode_Check(item)) {
 843                         PyErr_Format(PyExc_TypeError,
 844                                      "sequence item 0: expected string,"
 845                                      " %.80s found",
 846                                      item->ob_type->tp_name);
 847                         Py_DECREF(seq);
 848                         return NULL;
 849                 }
 850                 Py_INCREF(item);
 851                 Py_DECREF(seq);
 852                 return item;
 853         }
 854
 855         /* There are at least two things to join.  Do a pre-pass to figure out
 856          * the total amount of space we'll need (sz), see whether any argument
 857          * is absurd, and defer to the Unicode join if appropriate.
 858          */
 859         for (i = 0; i < seqlen; i++) {
 860                 const size_t old_sz = sz;
 861                 item = PySequence_Fast_GET_ITEM(seq, i);
 862                 if (!PyString_Check(item)){
 863                         if (PyUnicode_Check(item)) {
 864                                 Py_DECREF(seq);
 865                                 return PyUnicode_Join((PyObject *)self, orig);
 866                         }
 867                         PyErr_Format(PyExc_TypeError,
 868                                      "sequence item %i: expected string,"
 869                                      " %.80s found",
 870                                      i, item->ob_type->tp_name);
 871                         Py_DECREF(seq);
 872                         return NULL;
 873                 }
 874                 sz += PyString_GET_SIZE(item);
 875                 if (i != 0)
 876                         sz += seplen;
 877                 if (sz < old_sz || sz > INT_MAX) {
 878                         PyErr_SetString(PyExc_OverflowError,
 879                                 "join() is too long for a Python string");
 880                         Py_DECREF(seq);
 881                         return NULL;
 882                 }
 883         }
 884
 885         /* Allocate result space. */
 886         res = PyString_FromStringAndSize((char*)NULL, (int)sz);
 887         if (res == NULL) {
 888                 Py_DECREF(seq);
 889                 return NULL;
 890         }
 891
 892         /* Catenate everything. */
 893         p = PyString_AS_STRING(res);
 894         for (i = 0; i < seqlen; ++i) {
 895                 size_t n;
 896                 item = PySequence_Fast_GET_ITEM(seq, i);
 897                 n = PyString_GET_SIZE(item);
 898                 memcpy(p, PyString_AS_STRING(item), n);
 899                 p += n;
 900                 if (i < seqlen - 1) {
 901                         memcpy(p, sep, seplen);
 902                         p += seplen;
 903                 }
 904         }
 905
 906         Py_DECREF(seq);
 907         return res;
 908 }
 909
 910 static long
 911 string_find_internal(PyStringObject *self, PyObject *args, int dir)
 912 {
 913         const char *s = PyString_AS_STRING(self), *sub;
 914         int len = PyString_GET_SIZE(self);
 915         int n, i = 0, last = INT_MAX;
 916         PyObject *subobj;
 917
 918         if (!PyArg_ParseTuple(args, "O|O&O&:find/rfind/index/rindex",
 919                 &subobj, _PyEval_SliceIndex, &i, _PyEval_SliceIndex, &last))
 920                 return -2;
 921         if (PyString_Check(subobj)) {
 922                 sub = PyString_AS_STRING(subobj);
 923                 n = PyString_GET_SIZE(subobj);
 924         }
 925         else if (PyUnicode_Check(subobj))
 926                 return PyUnicode_Find((PyObject *)self, subobj, i, last, 1);
 927         else if (PyObject_AsCharBuffer(subobj, &sub, &n))
 928                 return -2;
 929
 930         if (last > len)
 931                 last = len;
 932         if (last < 0)
 933                 last += len;
 934         if (last < 0)
 935                 last = 0;
 936         if (i < 0)
 937                 i += len;
 938         if (i < 0)
 939                 i = 0;
 940
 941         if (dir > 0) {
 942                 if (n == 0 && i <= last)
 943                         return (long)i;
 944                 last -= n;
 945                 for (; i <= last; ++i)
 946                         if (s[i] == sub[0] && memcmp(&s[i], sub, n) == 0)
 947                                 return (long)i;
 948         }
 949         else {
 950                 int j;
 951
 952                 if (n == 0 && i <= last)
 953                         return (long)last;
 954                 for (j = last-n; j >= i; --j)
 955                         if (s[j] == sub[0] && memcmp(&s[j], sub, n) == 0)
 956                                 return (long)j;
 957         }
 958
 959         return -1;
 960 }
 961
 962
 963 static char find__doc__[] =
 964 "S.find(sub [,start [,end]]) -> int\n\
 965 \n\
 966 Return the lowest index in S where substring sub is found,\n\
 967 such that sub is contained within s[start,end].  Optional\n\
 968 arguments start and end are interpreted as in slice notation.\n\
 969 \n\
 970 Return -1 on failure.";
 971
 972 static PyObject *
 973 string_find(PyStringObject *self, PyObject *args)
 974 {
 975         long result = string_find_internal(self, args, +1);
 976         if (result == -2)
 977                 return NULL;
 978         return PyInt_FromLong(result);
 979 }
 980
 981
 982 static char index__doc__[] =
 983 "S.index(sub [,start [,end]]) -> int\n\
 984 \n\
 985 Like S.find() but raise ValueError when the substring is not found.";
 986
 987 static PyObject *
 988 string_index(PyStringObject *self, PyObject *args)
 989 {
 990         long result = string_find_internal(self, args, +1);
 991         if (result == -2)
 992                 return NULL;
 993         if (result == -1) {
 994                 PyErr_SetString(PyExc_ValueError,
 995                                 "substring not found in string.index");
 996                 return NULL;
 997         }
 998         return PyInt_FromLong(result);
 999 }
1000
1001
1002 static char rfind__doc__[] =
1003 "S.rfind(sub [,start [,end]]) -> int\n\
1004 \n\
1005 Return the highest index in S where substring sub is found,\n\
1006 such that sub is contained within s[start,end].  Optional\n\
1007 arguments start and end are interpreted as in slice notation.\n\
1008 \n\
1009 Return -1 on failure.";
1010
1011 static PyObject *
1012 string_rfind(PyStringObject *self, PyObject *args)
1013 {
1014         long result = string_find_internal(self, args, -1);
1015         if (result == -2)
1016                 return NULL;
1017         return PyInt_FromLong(result);
1018 }
1019
1020
1021 static char rindex__doc__[] =
1022 "S.rindex(sub [,start [,end]]) -> int\n\
1023 \n\
1024 Like S.rfind() but raise ValueError when the substring is not found.";
1025
1026 static PyObject *
1027 string_rindex(PyStringObject *self, PyObject *args)
1028 {
1029         long result = string_find_internal(self, args, -1);
1030         if (result == -2)
1031                 return NULL;
1032         if (result == -1) {
1033                 PyErr_SetString(PyExc_ValueError,
1034                                 "substring not found in string.rindex");
1035                 return NULL;
1036         }
1037         return PyInt_FromLong(result);
1038 }
1039
1040
1041 static PyObject *
1042 do_strip(PyStringObject *self, PyObject *args, int striptype)
1043 {
1044         char *s = PyString_AS_STRING(self);
1045         int len = PyString_GET_SIZE(self), i, j;
1046
1047         if (!PyArg_ParseTuple(args, ":strip"))
1048                 return NULL;
1049
1050         i = 0;
1051         if (striptype != RIGHTSTRIP) {
1052                 while (i < len && isspace(Py_CHARMASK(s[i]))) {
1053                         i++;
1054                 }
1055         }
1056
1057         j = len;
1058         if (striptype != LEFTSTRIP) {
1059                 do {
1060                         j--;
1061                 } while (j >= i && isspace(Py_CHARMASK(s[j])));
1062                 j++;
1063         }
1064
1065         if (i == 0 && j == len) {
1066                 Py_INCREF(self);
1067                 return (PyObject*)self;
1068         }
1069         else
1070                 return PyString_FromStringAndSize(s+i, j-i);
1071 }
1072
1073
1074 static char strip__doc__[] =
1075 "S.strip() -> string\n\
1076 \n\
1077 Return a copy of the string S with leading and trailing\n\
1078 whitespace removed.";
1079
1080 static PyObject *
1081 string_strip(PyStringObject *self, PyObject *args)
1082 {
1083         return do_strip(self, args, BOTHSTRIP);
1084 }
1085
1086
1087 static char lstrip__doc__[] =
1088 "S.lstrip() -> string\n\
1089 \n\
1090 Return a copy of the string S with leading whitespace removed.";
1091
1092 static PyObject *
1093 string_lstrip(PyStringObject *self, PyObject *args)
1094 {
1095         return do_strip(self, args, LEFTSTRIP);
1096 }
1097
1098
1099 static char rstrip__doc__[] =
1100 "S.rstrip() -> string\n\
1101 \n\
1102 Return a copy of the string S with trailing whitespace removed.";
1103
1104 static PyObject *
1105 string_rstrip(PyStringObject *self, PyObject *args)
1106 {
1107         return do_strip(self, args, RIGHTSTRIP);
1108 }
1109
1110
1111 static char lower__doc__[] =
1112 "S.lower() -> string\n\
1113 \n\
1114 Return a copy of the string S converted to lowercase.";
1115
1116 static PyObject *
1117 string_lower(PyStringObject *self, PyObject *args)
1118 {
1119         char *s = PyString_AS_STRING(self), *s_new;
1120         int i, n = PyString_GET_SIZE(self);
1121         PyObject *new;
1122
1123         if (!PyArg_ParseTuple(args, ":lower"))
1124                 return NULL;
1125         new = PyString_FromStringAndSize(NULL, n);
1126         if (new == NULL)
1127                 return NULL;
1128         s_new = PyString_AsString(new);
1129         for (i = 0; i < n; i++) {
1130                 int c = Py_CHARMASK(*s++);
1131                 if (isupper(c)) {
1132                         *s_new = tolower(c);
1133                 } else
1134                         *s_new = c;
1135                 s_new++;
1136         }
1137         return new;
1138 }
1139
1140
1141 static char upper__doc__[] =
1142 "S.upper() -> string\n\
1143 \n\
1144 Return a copy of the string S converted to uppercase.";
1145
1146 static PyObject *
1147 string_upper(PyStringObject *self, PyObject *args)
1148 {
1149         char *s = PyString_AS_STRING(self), *s_new;
1150         int i, n = PyString_GET_SIZE(self);
1151         PyObject *new;
1152
1153         if (!PyArg_ParseTuple(args, ":upper"))
1154                 return NULL;
1155         new = PyString_FromStringAndSize(NULL, n);
1156         if (new == NULL)
1157                 return NULL;
1158         s_new = PyString_AsString(new);
1159         for (i = 0; i < n; i++) {
1160                 int c = Py_CHARMASK(*s++);
1161                 if (islower(c)) {
1162                         *s_new = toupper(c);
1163                 } else
1164                         *s_new = c;
1165                 s_new++;
1166         }
1167         return new;
1168 }
1169
1170
1171 static char title__doc__[] =
1172 "S.title() -> string\n\
1173 \n\
1174 Return a titlecased version of S, i.e. words start with uppercase\n\
1175 characters, all remaining cased characters have lowercase.";
1176
1177 static PyObject*
1178 string_title(PyStringObject *self, PyObject *args)
1179 {
1180         char *s = PyString_AS_STRING(self), *s_new;
1181         int i, n = PyString_GET_SIZE(self);
1182         int previous_is_cased = 0;
1183         PyObject *new;
1184
1185         if (!PyArg_ParseTuple(args, ":title"))
1186                 return NULL;
1187         new = PyString_FromStringAndSize(NULL, n);
1188         if (new == NULL)
1189                 return NULL;
1190         s_new = PyString_AsString(new);
1191         for (i = 0; i < n; i++) {
1192                 int c = Py_CHARMASK(*s++);
1193                 if (islower(c)) {
1194                         if (!previous_is_cased)
1195                             c = toupper(c);
1196                         previous_is_cased = 1;
1197                 } else if (isupper(c)) {
1198                         if (previous_is_cased)
1199                             c = tolower(c);
1200                         previous_is_cased = 1;
1201                 } else
1202                         previous_is_cased = 0;
1203                 *s_new++ = c;
1204         }
1205         return new;
1206 }
1207
1208 static char capitalize__doc__[] =
1209 "S.capitalize() -> string\n\
1210 \n\
1211 Return a copy of the string S with only its first character\n\
1212 capitalized.";
1213
1214 static PyObject *
1215 string_capitalize(PyStringObject *self, PyObject *args)
1216 {
1217         char *s = PyString_AS_STRING(self), *s_new;
1218         int i, n = PyString_GET_SIZE(self);
1219         PyObject *new;
1220
1221         if (!PyArg_ParseTuple(args, ":capitalize"))
1222                 return NULL;
1223         new = PyString_FromStringAndSize(NULL, n);
1224         if (new == NULL)
1225                 return NULL;
1226         s_new = PyString_AsString(new);
1227         if (0 < n) {
1228                 int c = Py_CHARMASK(*s++);
1229                 if (islower(c))
1230                         *s_new = toupper(c);
1231                 else
1232                         *s_new = c;
1233                 s_new++;
1234         }
1235         for (i = 1; i < n; i++) {
1236                 int c = Py_CHARMASK(*s++);
1237                 if (isupper(c))
1238                         *s_new = tolower(c);
1239                 else
1240                         *s_new = c;
1241                 s_new++;
1242         }
1243         return new;
1244 }
1245
1246
1247 static char count__doc__[] =
1248 "S.count(sub[, start[, end]]) -> int\n\
1249 \n\
1250 Return the number of occurrences of substring sub in string\n\
1251 S[start:end].  Optional arguments start and end are\n\
1252 interpreted as in slice notation.";
1253
1254 static PyObject *
1255 string_count(PyStringObject *self, PyObject *args)
1256 {
1257         const char *s = PyString_AS_STRING(self), *sub;
1258         int len = PyString_GET_SIZE(self), n;
1259         int i = 0, last = INT_MAX;
1260         int m, r;
1261         PyObject *subobj;
1262
1263         if (!PyArg_ParseTuple(args, "O|O&O&:count", &subobj,
1264                 _PyEval_SliceIndex, &i, _PyEval_SliceIndex, &last))
1265                 return NULL;
1266
1267         if (PyString_Check(subobj)) {
1268                 sub = PyString_AS_STRING(subobj);
1269                 n = PyString_GET_SIZE(subobj);
1270         }
1271         else if (PyUnicode_Check(subobj)) {
1272                 int count;
1273                 count = PyUnicode_Count((PyObject *)self, subobj, i, last);
1274                 if (count == -1)
1275                         return NULL;
1276                 else
1277                         return PyInt_FromLong((long) count);
1278         }
1279         else if (PyObject_AsCharBuffer(subobj, &sub, &n))
1280                 return NULL;
1281
1282         if (last > len)
1283                 last = len;
1284         if (last < 0)
1285                 last += len;
1286         if (last < 0)
1287                 last = 0;
1288         if (i < 0)
1289                 i += len;
1290         if (i < 0)
1291                 i = 0;
1292         m = last + 1 - n;
1293         if (n == 0)
1294                 return PyInt_FromLong((long) (m-i));
1295
1296         r = 0;
1297         while (i < m) {
1298                 if (!memcmp(s+i, sub, n)) {
1299                         r++;
1300                         i += n;
1301                 } else {
1302                         i++;
1303                 }
1304         }
1305         return PyInt_FromLong((long) r);
1306 }
1307
1308
1309 static char swapcase__doc__[] =
1310 "S.swapcase() -> string\n\
1311 \n\
1312 Return a copy of the string S with uppercase characters\n\
1313 converted to lowercase and vice versa.";
1314
1315 static PyObject *
1316 string_swapcase(PyStringObject *self, PyObject *args)
1317 {
1318         char *s = PyString_AS_STRING(self), *s_new;
1319         int i, n = PyString_GET_SIZE(self);
1320         PyObject *new;
1321
1322         if (!PyArg_ParseTuple(args, ":swapcase"))
1323                 return NULL;
1324         new = PyString_FromStringAndSize(NULL, n);
1325         if (new == NULL)
1326                 return NULL;
1327         s_new = PyString_AsString(new);
1328         for (i = 0; i < n; i++) {
1329                 int c = Py_CHARMASK(*s++);
1330                 if (islower(c)) {
1331                         *s_new = toupper(c);
1332                 }
1333                 else if (isupper(c)) {
1334                         *s_new = tolower(c);
1335                 }
1336                 else
1337                         *s_new = c;
1338                 s_new++;
1339         }
1340         return new;
1341 }
1342
1343
1344 static char translate__doc__[] =
1345 "S.translate(table [,deletechars]) -> string\n\
1346 \n\
1347 Return a copy of the string S, where all characters occurring\n\
1348 in the optional argument deletechars are removed, and the\n\
1349 remaining characters have been mapped through the given\n\
1350 translation table, which must be a string of length 256.";
1351
1352 static PyObject *
1353 string_translate(PyStringObject *self, PyObject *args)
1354 {
1355         register char *input, *output;
1356         register const char *table;
1357         register int i, c, changed = 0;
1358         PyObject *input_obj = (PyObject*)self;
1359         const char *table1, *output_start, *del_table=NULL;
1360         int inlen, tablen, dellen = 0;
1361         PyObject *result;
1362         int trans_table[256];
1363         PyObject *tableobj, *delobj = NULL;
1364
1365         if (!PyArg_ParseTuple(args, "O|O:translate",
1366                               &tableobj, &delobj))
1367                 return NULL;
1368
1369         if (PyString_Check(tableobj)) {
1370                 table1 = PyString_AS_STRING(tableobj);
1371                 tablen = PyString_GET_SIZE(tableobj);
1372         }
1373         else if (PyUnicode_Check(tableobj)) {
1374                 /* Unicode .translate() does not support the deletechars
1375                    parameter; instead a mapping to None will cause characters
1376                    to be deleted. */
1377                 if (delobj != NULL) {
1378                         PyErr_SetString(PyExc_TypeError,
1379                         "deletions are implemented differently for unicode");
1380                         return NULL;
1381                 }
1382                 return PyUnicode_Translate((PyObject *)self, tableobj, NULL);
1383         }
1384         else if (PyObject_AsCharBuffer(tableobj, &table1, &tablen))
1385                 return NULL;
1386
1387         if (delobj != NULL) {
1388                 if (PyString_Check(delobj)) {
1389                         del_table = PyString_AS_STRING(delobj);
1390                         dellen = PyString_GET_SIZE(delobj);
1391                 }
1392                 else if (PyUnicode_Check(delobj)) {
1393                         PyErr_SetString(PyExc_TypeError,
1394                         "deletions are implemented differently for unicode");
1395                         return NULL;
1396                 }
1397                 else if (PyObject_AsCharBuffer(delobj, &del_table, &dellen))
1398                         return NULL;
1399
1400                 if (tablen != 256) {
1401                         PyErr_SetString(PyExc_ValueError,
1402                           "translation table must be 256 characters long");
1403                         return NULL;
1404                 }
1405         }
1406         else {
1407                 del_table = NULL;
1408                 dellen = 0;
1409         }
1410
1411         table = table1;
1412         inlen = PyString_Size(input_obj);
1413         result = PyString_FromStringAndSize((char *)NULL, inlen);
1414         if (result == NULL)
1415                 return NULL;
1416         output_start = output = PyString_AsString(result);
1417         input = PyString_AsString(input_obj);
1418
1419         if (dellen == 0) {
1420                 /* If no deletions are required, use faster code */
1421                 for (i = inlen; --i >= 0; ) {
1422                         c = Py_CHARMASK(*input++);
1423                         if (Py_CHARMASK((*output++ = table[c])) != c)
1424                                 changed = 1;
1425                 }
1426                 if (changed)
1427                         return result;
1428                 Py_DECREF(result);
1429                 Py_INCREF(input_obj);
1430                 return input_obj;
1431         }
1432
1433         for (i = 0; i < 256; i++)
1434                 trans_table[i] = Py_CHARMASK(table[i]);
1435
1436         for (i = 0; i < dellen; i++)
1437                 trans_table[(int) Py_CHARMASK(del_table[i])] = -1;
1438
1439         for (i = inlen; --i >= 0; ) {
1440                 c = Py_CHARMASK(*input++);
1441                 if (trans_table[c] != -1)
1442                         if (Py_CHARMASK(*output++ = (char)trans_table[c]) == c)
1443                                 continue;
1444                 changed = 1;
1445         }
1446         if (!changed) {
1447                 Py_DECREF(result);
1448                 Py_INCREF(input_obj);
1449                 return input_obj;
1450         }
1451         /* Fix the size of the resulting string */
1452         if (inlen > 0 &&_PyString_Resize(&result, output-output_start))
1453                 return NULL;
1454         return result;
1455 }
1456
1457
1458 /* What follows is used for implementing replace().  Perry Stoll. */
1459
1460 /*
1461   mymemfind
1462
1463   strstr replacement for arbitrary blocks of memory.
1464
1465   Locates the first occurrence in the memory pointed to by MEM of the
1466   contents of memory pointed to by PAT.  Returns the index into MEM if
1467   found, or -1 if not found.  If len of PAT is greater than length of
1468   MEM, the function returns -1.
1469 */
1470 static int
1471 mymemfind(const char *mem, int len, const char *pat, int pat_len)
1472 {
1473         register int ii;
1474
1475         /* pattern can not occur in the last pat_len-1 chars */
1476         len -= pat_len;
1477
1478         for (ii = 0; ii <= len; ii++) {
1479                 if (mem[ii] == pat[0] && memcmp(&mem[ii], pat, pat_len) == 0) {
1480                         return ii;
1481                 }
1482         }
1483         return -1;
1484 }
1485
1486 /*
1487   mymemcnt
1488
1489    Return the number of distinct times PAT is found in MEM.
1490    meaning mem=1111 and pat==11 returns 2.
1491            mem=11111 and pat==11 also return 2.
1492  */
1493 static int
1494 mymemcnt(const char *mem, int len, const char *pat, int pat_len)
1495 {
1496         register int offset = 0;
1497         int nfound = 0;
1498
1499         while (len >= 0) {
1500                 offset = mymemfind(mem, len, pat, pat_len);
1501                 if (offset == -1)
1502                         break;
1503                 mem += offset + pat_len;
1504                 len -= offset + pat_len;
1505                 nfound++;
1506         }
1507         return nfound;
1508 }
1509
1510 /*
1511    mymemreplace
1512
1513    Return a string in which all occurrences of PAT in memory STR are
1514    replaced with SUB.
1515
1516    If length of PAT is less than length of STR or there are no occurrences
1517    of PAT in STR, then the original string is returned. Otherwise, a new
1518    string is allocated here and returned.
1519
1520    on return, out_len is:
1521        the length of output string, or
1522        -1 if the input string is returned, or
1523        unchanged if an error occurs (no memory).
1524
1525    return value is:
1526        the new string allocated locally, or
1527        NULL if an error occurred.
1528 */
1529 static char *
1530 mymemreplace(const char *str, int len,          /* input string */
1531              const char *pat, int pat_len,      /* pattern string to find */
1532              const char *sub, int sub_len,      /* substitution string */
1533              int count,                         /* number of replacements */
1534              int *out_len)
1535 {
1536         char *out_s;
1537         char *new_s;
1538         int nfound, offset, new_len;
1539
1540         if (len == 0 || pat_len > len)
1541                 goto return_same;
1542
1543         /* find length of output string */
1544         nfound = mymemcnt(str, len, pat, pat_len);
1545         if (count < 0)
1546                 count = INT_MAX;
1547         else if (nfound > count)
1548                 nfound = count;
1549         if (nfound == 0)
1550                 goto return_same;
1551
1552         new_len = len + nfound*(sub_len - pat_len);
1553         if (new_len == 0) {
1554                 /* Have to allocate something for the caller to free(). */
1555                 out_s = (char *)PyMem_MALLOC(1);
1556                 if (out_s == NULL)
1557                         return NULL;
1558                 out_s[0] = '\0';
1559         }
1560         else {
1561                 assert(new_len > 0);
1562                 new_s = (char *)PyMem_MALLOC(new_len);
1563                 if (new_s == NULL)
1564                         return NULL;
1565                 out_s = new_s;
1566
1567                 for (; count > 0 && len > 0; --count) {
1568                         /* find index of next instance of pattern */
1569                         offset = mymemfind(str, len, pat, pat_len);
1570                         if (offset == -1)
1571                                 break;
1572
1573                         /* copy non matching part of input string */
1574                         memcpy(new_s, str, offset);
1575                         str += offset + pat_len;
1576                         len -= offset + pat_len;
1577
1578                         /* copy substitute into the output string */
1579                         new_s += offset;
1580                         memcpy(new_s, sub, sub_len);
1581                         new_s += sub_len;
1582                 }
1583                 /* copy any remaining values into output string */
1584                 if (len > 0)
1585                         memcpy(new_s, str, len);
1586         }
1587         *out_len = new_len;
1588         return out_s;
1589
1590   return_same:
1591         *out_len = -1;
1592         return (char *)str; /* cast away const */
1593 }
1594
1595
1596 static char replace__doc__[] =
1597 "S.replace (old, new[, maxsplit]) -> string\n\
1598 \n\
1599 Return a copy of string S with all occurrences of substring\n\
1600 old replaced by new.  If the optional argument maxsplit is\n\
1601 given, only the first maxsplit occurrences are replaced.";
1602
1603 static PyObject *
1604 string_replace(PyStringObject *self, PyObject *args)
1605 {
1606         const char *str = PyString_AS_STRING(self), *sub, *repl;
1607         char *new_s;
1608         int len = PyString_GET_SIZE(self), sub_len, repl_len, out_len;
1609         int count = -1;
1610         PyObject *new;
1611         PyObject *subobj, *replobj;
1612
1613         if (!PyArg_ParseTuple(args, "OO|i:replace",
1614                               &subobj, &replobj, &count))
1615                 return NULL;
1616
1617         if (PyString_Check(subobj)) {
1618                 sub = PyString_AS_STRING(subobj);
1619                 sub_len = PyString_GET_SIZE(subobj);
1620         }
1621         else if (PyUnicode_Check(subobj))
1622                 return PyUnicode_Replace((PyObject *)self,
1623                                          subobj, replobj, count);
1624         else if (PyObject_AsCharBuffer(subobj, &sub, &sub_len))
1625                 return NULL;
1626
1627         if (PyString_Check(replobj)) {
1628                 repl = PyString_AS_STRING(replobj);
1629                 repl_len = PyString_GET_SIZE(replobj);
1630         }
1631         else if (PyUnicode_Check(replobj))
1632                 return PyUnicode_Replace((PyObject *)self,
1633                                          subobj, replobj, count);
1634         else if (PyObject_AsCharBuffer(replobj, &repl, &repl_len))
1635                 return NULL;
1636
1637         if (sub_len <= 0) {
1638                 PyErr_SetString(PyExc_ValueError, "empty pattern string");
1639                 return NULL;
1640         }
1641         new_s = mymemreplace(str,len,sub,sub_len,repl,repl_len,count,&out_len);
1642         if (new_s == NULL) {
1643                 PyErr_NoMemory();
1644                 return NULL;
1645         }
1646         if (out_len == -1) {
1647                 /* we're returning another reference to self */
1648                 new = (PyObject*)self;
1649                 Py_INCREF(new);
1650         }
1651         else {
1652                 new = PyString_FromStringAndSize(new_s, out_len);
1653                 PyMem_FREE(new_s);
1654         }
1655         return new;
1656 }
1657
1658
1659 static char startswith__doc__[] =
1660 "S.startswith(prefix[, start[, end]]) -> int\n\
1661 \n\
1662 Return 1 if S starts with the specified prefix, otherwise return 0.  With\n\
1663 optional start, test S beginning at that position.  With optional end, stop\n\
1664 comparing S at that position.";
1665
1666 static PyObject *
1667 string_startswith(PyStringObject *self, PyObject *args)
1668 {
1669         const char* str = PyString_AS_STRING(self);
1670         int len = PyString_GET_SIZE(self);
1671         const char* prefix;
1672         int plen;
1673         int start = 0;
1674         int end = -1;
1675         PyObject *subobj;
1676
1677         if (!PyArg_ParseTuple(args, "O|O&O&:startswith", &subobj,
1678                 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
1679                 return NULL;
1680         if (PyString_Check(subobj)) {
1681                 prefix = PyString_AS_STRING(subobj);
1682                 plen = PyString_GET_SIZE(subobj);
1683         }
1684         else if (PyUnicode_Check(subobj)) {
1685                 int rc;
1686                 rc = PyUnicode_Tailmatch((PyObject *)self,
1687                                           subobj, start, end, -1);
1688                 if (rc == -1)
1689                         return NULL;
1690                 else
1691                         return PyInt_FromLong((long) rc);
1692         }
1693         else if (PyObject_AsCharBuffer(subobj, &prefix, &plen))
1694                 return NULL;
1695
1696         /* adopt Java semantics for index out of range.  it is legal for
1697          * offset to be == plen, but this only returns true if prefix is
1698          * the empty string.
1699          */
1700         if (start < 0 || start+plen > len)
1701                 return PyInt_FromLong(0);
1702
1703         if (!memcmp(str+start, prefix, plen)) {
1704                 /* did the match end after the specified end? */
1705                 if (end < 0)
1706                         return PyInt_FromLong(1);
1707                 else if (end - start < plen)
1708                         return PyInt_FromLong(0);
1709                 else
1710                         return PyInt_FromLong(1);
1711         }
1712         else return PyInt_FromLong(0);
1713 }
1714
1715
1716 static char endswith__doc__[] =
1717 "S.endswith(suffix[, start[, end]]) -> int\n\
1718 \n\
1719 Return 1 if S ends with the specified suffix, otherwise return 0.  With\n\
1720 optional start, test S beginning at that position.  With optional end, stop\n\
1721 comparing S at that position.";
1722
1723 static PyObject *
1724 string_endswith(PyStringObject *self, PyObject *args)
1725 {
1726         const char* str = PyString_AS_STRING(self);
1727         int len = PyString_GET_SIZE(self);
1728         const char* suffix;
1729         int slen;
1730         int start = 0;
1731         int end = -1;
1732         int lower, upper;
1733         PyObject *subobj;
1734
1735         if (!PyArg_ParseTuple(args, "O|O&O&:endswith", &subobj,
1736                 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
1737                 return NULL;
1738         if (PyString_Check(subobj)) {
1739                 suffix = PyString_AS_STRING(subobj);
1740                 slen = PyString_GET_SIZE(subobj);
1741         }
1742         else if (PyUnicode_Check(subobj)) {
1743                 int rc;
1744                 rc = PyUnicode_Tailmatch((PyObject *)self,
1745                                           subobj, start, end, +1);
1746                 if (rc == -1)
1747                         return NULL;
1748                 else
1749                         return PyInt_FromLong((long) rc);
1750         }
1751         else if (PyObject_AsCharBuffer(subobj, &suffix, &slen))
1752                 return NULL;
1753
1754         if (start < 0 || start > len || slen > len)
1755                 return PyInt_FromLong(0);
1756
1757         upper = (end >= 0 && end <= len) ? end : len;
1758         lower = (upper - slen) > start ? (upper - slen) : start;
1759
1760         if (upper-lower >= slen && !memcmp(str+lower, suffix, slen))
1761                 return PyInt_FromLong(1);
1762         else return PyInt_FromLong(0);
1763 }
1764
1765
1766 static char encode__doc__[] =
1767 "S.encode([encoding[,errors]]) -> string\n\
1768 \n\
1769 Return an encoded string version of S. Default encoding is the current\n\
1770 default string encoding. errors may be given to set a different error\n\
1771 handling scheme. Default is 'strict' meaning that encoding errors raise\n\
1772 a ValueError. Other possible values are 'ignore' and 'replace'.";
1773
1774 static PyObject *
1775 string_encode(PyStringObject *self, PyObject *args)
1776 {
1777     char *encoding = NULL;
1778     char *errors = NULL;
1779     if (!PyArg_ParseTuple(args, "|ss:encode", &encoding, &errors))
1780         return NULL;
1781     return PyString_AsEncodedString((PyObject *)self, encoding, errors);
1782 }
1783
1784
1785 static char expandtabs__doc__[] =
1786 "S.expandtabs([tabsize]) -> string\n\
1787 \n\
1788 Return a copy of S where all tab characters are expanded using spaces.\n\
1789 If tabsize is not given, a tab size of 8 characters is assumed.";
1790
1791 static PyObject*
1792 string_expandtabs(PyStringObject *self, PyObject *args)
1793 {
1794     const char *e, *p;
1795     char *q;
1796     int i, j;
1797     PyObject *u;
1798     int tabsize = 8;
1799
1800     if (!PyArg_ParseTuple(args, "|i:expandtabs", &tabsize))
1801         return NULL;
1802
1803     /* First pass: determine size of output string */
1804     i = j = 0;
1805     e = PyString_AS_STRING(self) + PyString_GET_SIZE(self);
1806     for (p = PyString_AS_STRING(self); p < e; p++)
1807         if (*p == '\t') {
1808             if (tabsize > 0)
1809                 j += tabsize - (j % tabsize);
1810         }
1811         else {
1812             j++;
1813             if (*p == '\n' || *p == '\r') {
1814                 i += j;
1815                 j = 0;
1816             }
1817         }
1818
1819     /* Second pass: create output string and fill it */
1820     u = PyString_FromStringAndSize(NULL, i + j);
1821     if (!u)
1822         return NULL;
1823
1824     j = 0;
1825     q = PyString_AS_STRING(u);
1826
1827     for (p = PyString_AS_STRING(self); p < e; p++)
1828         if (*p == '\t') {
1829             if (tabsize > 0) {
1830                 i = tabsize - (j % tabsize);
1831                 j += i;
1832                 while (i--)
1833                     *q++ = ' ';
1834             }
1835         }
1836         else {
1837             j++;
1838             *q++ = *p;
1839             if (*p == '\n' || *p == '\r')
1840                 j = 0;
1841         }
1842
1843     return u;
1844 }
1845
1846 static
1847 PyObject *pad(PyStringObject *self,
1848               int left,
1849               int right,
1850               char fill)
1851 {
1852     PyObject *u;
1853
1854     if (left < 0)
1855         left = 0;
1856     if (right < 0)
1857         right = 0;
1858
1859     if (left == 0 && right == 0) {
1860         Py_INCREF(self);
1861         return (PyObject *)self;
1862     }
1863
1864     u = PyString_FromStringAndSize(NULL,
1865                                    left + PyString_GET_SIZE(self) + right);
1866     if (u) {
1867         if (left)
1868             memset(PyString_AS_STRING(u), fill, left);
1869         memcpy(PyString_AS_STRING(u) + left,
1870                PyString_AS_STRING(self),
1871                PyString_GET_SIZE(self));
1872         if (right)
1873             memset(PyString_AS_STRING(u) + left + PyString_GET_SIZE(self),
1874                    fill, right);
1875     }
1876
1877     return u;
1878 }
1879
1880 static char ljust__doc__[] =
1881 "S.ljust(width) -> string\n\
1882 \n\
1883 Return S left justified in a string of length width. Padding is\n\
1884 done using spaces.";
1885
1886 static PyObject *
1887 string_ljust(PyStringObject *self, PyObject *args)
1888 {
1889     int width;
1890     if (!PyArg_ParseTuple(args, "i:ljust", &width))
1891         return NULL;
1892
1893     if (PyString_GET_SIZE(self) >= width) {
1894         Py_INCREF(self);
1895         return (PyObject*) self;
1896     }
1897
1898     return pad(self, 0, width - PyString_GET_SIZE(self), ' ');
1899 }
1900
1901
1902 static char rjust__doc__[] =
1903 "S.rjust(width) -> string\n\
1904 \n\
1905 Return S right justified in a string of length width. Padding is\n\
1906 done using spaces.";
1907
1908 static PyObject *
1909 string_rjust(PyStringObject *self, PyObject *args)
1910 {
1911     int width;
1912     if (!PyArg_ParseTuple(args, "i:rjust", &width))
1913         return NULL;
1914
1915     if (PyString_GET_SIZE(self) >= width) {
1916         Py_INCREF(self);
1917         return (PyObject*) self;
1918     }
1919
1920     return pad(self, width - PyString_GET_SIZE(self), 0, ' ');
1921 }
1922
1923
1924 static char center__doc__[] =
1925 "S.center(width) -> string\n\
1926 \n\
1927 Return S centered in a string of length width. Padding is done\n\
1928 using spaces.";
1929
1930 static PyObject *
1931 string_center(PyStringObject *self, PyObject *args)
1932 {
1933     int marg, left;
1934     int width;
1935
1936     if (!PyArg_ParseTuple(args, "i:center", &width))
1937         return NULL;
1938
1939     if (PyString_GET_SIZE(self) >= width) {
1940         Py_INCREF(self);
1941         return (PyObject*) self;
1942     }
1943
1944     marg = width - PyString_GET_SIZE(self);
1945     left = marg / 2 + (marg & width & 1);
1946
1947     return pad(self, left, marg - left, ' ');
1948 }
1949
1950 #if 0
1951 static char zfill__doc__[] =
1952 "S.zfill(width) -> string\n\
1953 \n\
1954 Pad a numeric string x with zeros on the left, to fill a field\n\
1955 of the specified width. The string x is never truncated.";
1956
1957 static PyObject *
1958 string_zfill(PyStringObject *self, PyObject *args)
1959 {
1960     int fill;
1961     PyObject *u;
1962     char *str;
1963
1964     int width;
1965     if (!PyArg_ParseTuple(args, "i:zfill", &width))
1966         return NULL;
1967
1968     if (PyString_GET_SIZE(self) >= width) {
1969         Py_INCREF(self);
1970         return (PyObject*) self;
1971     }
1972
1973     fill = width - PyString_GET_SIZE(self);
1974
1975     u = pad(self, fill, 0, '0');
1976     if (u == NULL)
1977         return NULL;
1978
1979     str = PyString_AS_STRING(u);
1980     if (str[fill] == '+' || str[fill] == '-') {
1981         /* move sign to beginning of string */
1982         str[0] = str[fill];
1983         str[fill] = '0';
1984     }
1985
1986     return u;
1987 }
1988 #endif
1989
1990 static char isspace__doc__[] =
1991 "S.isspace() -> int\n\
1992 \n\
1993 Return 1 if there are only whitespace characters in S,\n\
1994 0 otherwise.";
1995
1996 static PyObject*
1997 string_isspace(PyStringObject *self, PyObject *args)
1998 {
1999     register const unsigned char *p
2000         = (unsigned char *) PyString_AS_STRING(self);
2001     register const unsigned char *e;
2002
2003     if (!PyArg_NoArgs(args))
2004         return NULL;
2005
2006     /* Shortcut for single character strings */
2007     if (PyString_GET_SIZE(self) == 1 &&
2008         isspace(*p))
2009         return PyInt_FromLong(1);
2010
2011     /* Special case for empty strings */
2012     if (PyString_GET_SIZE(self) == 0)
2013         return PyInt_FromLong(0);
2014
2015     e = p + PyString_GET_SIZE(self);
2016     for (; p < e; p++) {
2017         if (!isspace(*p))
2018             return PyInt_FromLong(0);
2019     }
2020     return PyInt_FromLong(1);
2021 }
2022
2023
2024 static char isalpha__doc__[] =
2025 "S.isalpha() -> int\n\
2026 \n\
2027 Return 1 if  all characters in S are alphabetic\n\
2028 and there is at least one character in S, 0 otherwise.";
2029
2030 static PyObject*
2031 string_isalpha(PyStringObject *self, PyObject *args)
2032 {
2033     register const unsigned char *p
2034         = (unsigned char *) PyString_AS_STRING(self);
2035     register const unsigned char *e;
2036
2037     if (!PyArg_NoArgs(args))
2038         return NULL;
2039
2040     /* Shortcut for single character strings */
2041     if (PyString_GET_SIZE(self) == 1 &&
2042         isalpha(*p))
2043         return PyInt_FromLong(1);
2044
2045     /* Special case for empty strings */
2046     if (PyString_GET_SIZE(self) == 0)
2047         return PyInt_FromLong(0);
2048
2049     e = p + PyString_GET_SIZE(self);
2050     for (; p < e; p++) {
2051         if (!isalpha(*p))
2052             return PyInt_FromLong(0);
2053     }
2054     return PyInt_FromLong(1);
2055 }
2056
2057
2058 static char isalnum__doc__[] =
2059 "S.isalnum() -> int\n\
2060 \n\
2061 Return 1 if  all characters in S are alphanumeric\n\
2062 and there is at least one character in S, 0 otherwise.";
2063
2064 static PyObject*
2065 string_isalnum(PyStringObject *self, PyObject *args)
2066 {
2067     register const unsigned char *p
2068         = (unsigned char *) PyString_AS_STRING(self);
2069     register const unsigned char *e;
2070
2071     if (!PyArg_NoArgs(args))
2072         return NULL;
2073
2074     /* Shortcut for single character strings */
2075     if (PyString_GET_SIZE(self) == 1 &&
2076         isalnum(*p))
2077         return PyInt_FromLong(1);
2078
2079     /* Special case for empty strings */
2080     if (PyString_GET_SIZE(self) == 0)
2081         return PyInt_FromLong(0);
2082
2083     e = p + PyString_GET_SIZE(self);
2084     for (; p < e; p++) {
2085         if (!isalnum(*p))
2086             return PyInt_FromLong(0);
2087     }
2088     return PyInt_FromLong(1);
2089 }
2090
2091
2092 static char isdigit__doc__[] =
2093 "S.isdigit() -> int\n\
2094 \n\
2095 Return 1 if there are only digit characters in S,\n\
2096 0 otherwise.";
2097
2098 static PyObject*
2099 string_isdigit(PyStringObject *self, PyObject *args)
2100 {
2101     register const unsigned char *p
2102         = (unsigned char *) PyString_AS_STRING(self);
2103     register const unsigned char *e;
2104
2105     if (!PyArg_NoArgs(args))
2106         return NULL;
2107
2108     /* Shortcut for single character strings */
2109     if (PyString_GET_SIZE(self) == 1 &&
2110         isdigit(*p))
2111         return PyInt_FromLong(1);
2112
2113     /* Special case for empty strings */
2114     if (PyString_GET_SIZE(self) == 0)
2115         return PyInt_FromLong(0);
2116
2117     e = p + PyString_GET_SIZE(self);
2118     for (; p < e; p++) {
2119         if (!isdigit(*p))
2120             return PyInt_FromLong(0);
2121     }
2122     return PyInt_FromLong(1);
2123 }
2124
2125
2126 static char islower__doc__[] =
2127 "S.islower() -> int\n\
2128 \n\
2129 Return 1 if  all cased characters in S are lowercase and there is\n\
2130 at least one cased character in S, 0 otherwise.";
2131
2132 static PyObject*
2133 string_islower(PyStringObject *self, PyObject *args)
2134 {
2135     register const unsigned char *p
2136         = (unsigned char *) PyString_AS_STRING(self);
2137     register const unsigned char *e;
2138     int cased;
2139
2140     if (!PyArg_NoArgs(args))
2141         return NULL;
2142
2143     /* Shortcut for single character strings */
2144     if (PyString_GET_SIZE(self) == 1)
2145         return PyInt_FromLong(islower(*p) != 0);
2146
2147     /* Special case for empty strings */
2148     if (PyString_GET_SIZE(self) == 0)
2149         return PyInt_FromLong(0);
2150
2151     e = p + PyString_GET_SIZE(self);
2152     cased = 0;
2153     for (; p < e; p++) {
2154         if (isupper(*p))
2155             return PyInt_FromLong(0);
2156         else if (!cased && islower(*p))
2157             cased = 1;
2158     }
2159     return PyInt_FromLong(cased);
2160 }
2161
2162
2163 static char isupper__doc__[] =
2164 "S.isupper() -> int\n\
2165 \n\
2166 Return 1 if  all cased characters in S are uppercase and there is\n\
2167 at least one cased character in S, 0 otherwise.";
2168
2169 static PyObject*
2170 string_isupper(PyStringObject *self, PyObject *args)
2171 {
2172     register const unsigned char *p
2173         = (unsigned char *) PyString_AS_STRING(self);
2174     register const unsigned char *e;
2175     int cased;
2176
2177     if (!PyArg_NoArgs(args))
2178         return NULL;
2179
2180     /* Shortcut for single character strings */
2181     if (PyString_GET_SIZE(self) == 1)
2182         return PyInt_FromLong(isupper(*p) != 0);
2183
2184     /* Special case for empty strings */
2185     if (PyString_GET_SIZE(self) == 0)
2186         return PyInt_FromLong(0);
2187
2188     e = p + PyString_GET_SIZE(self);
2189     cased = 0;
2190     for (; p < e; p++) {
2191         if (islower(*p))
2192             return PyInt_FromLong(0);
2193         else if (!cased && isupper(*p))
2194             cased = 1;
2195     }
2196     return PyInt_FromLong(cased);
2197 }
2198
2199
2200 static char istitle__doc__[] =
2201 "S.istitle() -> int\n\
2202 \n\
2203 Return 1 if S is a titlecased string, i.e. uppercase characters\n\
2204 may only follow uncased characters and lowercase characters only cased\n\
2205 ones. Return 0 otherwise.";
2206
2207 static PyObject*
2208 string_istitle(PyStringObject *self, PyObject *args)
2209 {
2210     register const unsigned char *p
2211         = (unsigned char *) PyString_AS_STRING(self);
2212     register const unsigned char *e;
2213     int cased, previous_is_cased;
2214
2215     if (!PyArg_NoArgs(args))
2216         return NULL;
2217
2218     /* Shortcut for single character strings */
2219     if (PyString_GET_SIZE(self) == 1)
2220         return PyInt_FromLong(isupper(*p) != 0);
2221
2222     /* Special case for empty strings */
2223     if (PyString_GET_SIZE(self) == 0)
2224         return PyInt_FromLong(0);
2225
2226     e = p + PyString_GET_SIZE(self);
2227     cased = 0;
2228     previous_is_cased = 0;
2229     for (; p < e; p++) {
2230         register const unsigned char ch = *p;
2231
2232         if (isupper(ch)) {
2233             if (previous_is_cased)
2234                 return PyInt_FromLong(0);
2235             previous_is_cased = 1;
2236             cased = 1;
2237         }
2238         else if (islower(ch)) {
2239             if (!previous_is_cased)
2240                 return PyInt_FromLong(0);
2241             previous_is_cased = 1;
2242             cased = 1;
2243         }
2244         else
2245             previous_is_cased = 0;
2246     }
2247     return PyInt_FromLong(cased);
2248 }
2249
2250
2251 static char splitlines__doc__[] =
2252 "S.splitlines([keepends]]) -> list of strings\n\
2253 \n\
2254 Return a list of the lines in S, breaking at line boundaries.\n\
2255 Line breaks are not included in the resulting list unless keepends\n\
2256 is given and true.";
2257
2258 #define SPLIT_APPEND(data, left, right)                                 \
2259         str = PyString_FromStringAndSize(data + left, right - left);    \
2260         if (!str)                                                       \
2261             goto onError;                                               \
2262         if (PyList_Append(list, str)) {                                 \
2263             Py_DECREF(str);                                             \
2264             goto onError;                                               \
2265         }                                                               \
2266         else                                                            \
2267             Py_DECREF(str);
2268
2269 static PyObject*
2270 string_splitlines(PyStringObject *self, PyObject *args)
2271 {
2272     register int i;
2273     register int j;
2274     int len;
2275     int keepends = 0;
2276     PyObject *list;
2277     PyObject *str;
2278     char *data;
2279
2280     if (!PyArg_ParseTuple(args, "|i:splitlines", &keepends))
2281         return NULL;
2282
2283     data = PyString_AS_STRING(self);
2284     len = PyString_GET_SIZE(self);
2285
2286     list = PyList_New(0);
2287     if (!list)
2288         goto onError;
2289
2290     for (i = j = 0; i < len; ) {
2291         int eol;
2292
2293         /* Find a line and append it */
2294         while (i < len && data[i] != '\n' && data[i] != '\r')
2295             i++;
2296
2297         /* Skip the line break reading CRLF as one line break */
2298         eol = i;
2299         if (i < len) {
2300             if (data[i] == '\r' && i + 1 < len &&
2301                 data[i+1] == '\n')
2302                 i += 2;
2303             else
2304                 i++;
2305             if (keepends)
2306                 eol = i;
2307         }
2308         SPLIT_APPEND(data, j, eol);
2309         j = i;
2310     }
2311     if (j < len) {
2312         SPLIT_APPEND(data, j, len);
2313     }
2314
2315     return list;
2316
2317  onError:
2318     Py_DECREF(list);
2319     return NULL;
2320 }
2321
2322 #undef SPLIT_APPEND
2323
2324 \f
2325 static PyMethodDef
2326 string_methods[] = {
2327         /* Counterparts of the obsolete stropmodule functions; except
2328            string.maketrans(). */
2329         {"join",       (PyCFunction)string_join,       1, join__doc__},
2330         {"split",       (PyCFunction)string_split,       1, split__doc__},
2331         {"lower",      (PyCFunction)string_lower,      1, lower__doc__},
2332         {"upper",       (PyCFunction)string_upper,       1, upper__doc__},
2333         {"islower", (PyCFunction)string_islower, 0, islower__doc__},
2334         {"isupper", (PyCFunction)string_isupper, 0, isupper__doc__},
2335         {"isspace", (PyCFunction)string_isspace, 0, isspace__doc__},
2336         {"isdigit", (PyCFunction)string_isdigit, 0, isdigit__doc__},
2337         {"istitle", (PyCFunction)string_istitle, 0, istitle__doc__},
2338         {"isalpha", (PyCFunction)string_isalpha, 0, isalpha__doc__},
2339         {"isalnum", (PyCFunction)string_isalnum, 0, isalnum__doc__},
2340         {"capitalize", (PyCFunction)string_capitalize, 1, capitalize__doc__},
2341         {"count",      (PyCFunction)string_count,      1, count__doc__},
2342         {"endswith",   (PyCFunction)string_endswith,   1, endswith__doc__},
2343         {"find",       (PyCFunction)string_find,       1, find__doc__},
2344         {"index",      (PyCFunction)string_index,      1, index__doc__},
2345         {"lstrip",     (PyCFunction)string_lstrip,     1, lstrip__doc__},
2346         {"replace",     (PyCFunction)string_replace,     1, replace__doc__},
2347         {"rfind",       (PyCFunction)string_rfind,       1, rfind__doc__},
2348         {"rindex",      (PyCFunction)string_rindex,      1, rindex__doc__},
2349         {"rstrip",      (PyCFunction)string_rstrip,      1, rstrip__doc__},
2350         {"startswith",  (PyCFunction)string_startswith,  1, startswith__doc__},
2351         {"strip",       (PyCFunction)string_strip,       1, strip__doc__},
2352         {"swapcase",    (PyCFunction)string_swapcase,    1, swapcase__doc__},
2353         {"translate",   (PyCFunction)string_translate,   1, translate__doc__},
2354         {"title",       (PyCFunction)string_title,       1, title__doc__},
2355         {"ljust",       (PyCFunction)string_ljust,       1, ljust__doc__},
2356         {"rjust",       (PyCFunction)string_rjust,       1, rjust__doc__},
2357         {"center",      (PyCFunction)string_center,      1, center__doc__},
2358         {"encode",      (PyCFunction)string_encode,      1, encode__doc__},
2359         {"expandtabs",  (PyCFunction)string_expandtabs,  1, expandtabs__doc__},
2360         {"splitlines",  (PyCFunction)string_splitlines,  1, splitlines__doc__},
2361 #if 0
2362         {"zfill",       (PyCFunction)string_zfill,       1, zfill__doc__},
2363 #endif
2364         {NULL,     NULL}                     /* sentinel */
2365 };
2366
2367 static PyObject *
2368 string_getattr(PyStringObject *s, char *name)
2369 {
2370         return Py_FindMethod(string_methods, (PyObject*)s, name);
2371 }
2372
2373
2374 PyTypeObject PyString_Type = {
2375         PyObject_HEAD_INIT(&PyType_Type)
2376         0,
2377         "string",
2378         sizeof(PyStringObject),
2379         sizeof(char),
2380         (destructor)string_dealloc, /*tp_dealloc*/
2381         (printfunc)string_print, /*tp_print*/
2382         (getattrfunc)string_getattr,            /*tp_getattr*/
2383         0,              /*tp_setattr*/
2384         (cmpfunc)string_compare, /*tp_compare*/
2385         (reprfunc)string_repr, /*tp_repr*/
2386         0,              /*tp_as_number*/
2387         &string_as_sequence,    /*tp_as_sequence*/
2388         0,              /*tp_as_mapping*/
2389         (hashfunc)string_hash, /*tp_hash*/
2390         0,              /*tp_call*/
2391         (reprfunc)string_str,   /*tp_str*/
2392         0,              /*tp_getattro*/
2393         0,              /*tp_setattro*/
2394         &string_as_buffer,      /*tp_as_buffer*/
2395         Py_TPFLAGS_DEFAULT,     /*tp_flags*/
2396         0,              /*tp_doc*/
2397 };
2398
2399 void
2400 PyString_Concat(register PyObject **pv, register PyObject *w)
2401 {
2402         register PyObject *v;
2403         if (*pv == NULL)
2404                 return;
2405         if (w == NULL || !PyString_Check(*pv)) {
2406                 Py_DECREF(*pv);
2407                 *pv = NULL;
2408                 return;
2409         }
2410         v = string_concat((PyStringObject *) *pv, w);
2411         Py_DECREF(*pv);
2412         *pv = v;
2413 }
2414
2415 void
2416 PyString_ConcatAndDel(register PyObject **pv, register PyObject *w)
2417 {
2418         PyString_Concat(pv, w);
2419         Py_XDECREF(w);
2420 }
2421
2422
2423 /* The following function breaks the notion that strings are immutable:
2424    it changes the size of a string.  We get away with this only if there
2425    is only one module referencing the object.  You can also think of it
2426    as creating a new string object and destroying the old one, only
2427    more efficiently.  In any case, don't use this if the string may
2428    already be known to some other part of the code... */
2429
2430 int
2431 _PyString_Resize(PyObject **pv, int newsize)
2432 {
2433         register PyObject *v;
2434         register PyStringObject *sv;
2435         v = *pv;
2436         if (!PyString_Check(v) || v->ob_refcnt != 1) {
2437                 *pv = 0;
2438                 Py_DECREF(v);
2439                 PyErr_BadInternalCall();
2440                 return -1;
2441         }
2442         /* XXX UNREF/NEWREF interface should be more symmetrical */
2443 #ifdef Py_REF_DEBUG
2444         --_Py_RefTotal;
2445 #endif
2446         _Py_ForgetReference(v);
2447         *pv = (PyObject *)
2448                 PyObject_REALLOC((char *)v,
2449                         sizeof(PyStringObject) + newsize * sizeof(char));
2450         if (*pv == NULL) {
2451                 PyObject_DEL(v);
2452                 PyErr_NoMemory();
2453                 return -1;
2454         }
2455         _Py_NewReference(*pv);
2456         sv = (PyStringObject *) *pv;
2457         sv->ob_size = newsize;
2458         sv->ob_sval[newsize] = '\0';
2459         return 0;
2460 }
2461
2462 /* Helpers for formatstring */
2463
2464 static PyObject *
2465 getnextarg(PyObject *args, int arglen, int *p_argidx)
2466 {
2467         int argidx = *p_argidx;
2468         if (argidx < arglen) {
2469                 (*p_argidx)++;
2470                 if (arglen < 0)
2471                         return args;
2472                 else
2473                         return PyTuple_GetItem(args, argidx);
2474         }
2475         PyErr_SetString(PyExc_TypeError,
2476                         "not enough arguments for format string");
2477         return NULL;
2478 }
2479
2480 /* Format codes
2481  * F_LJUST      '-'
2482  * F_SIGN       '+'
2483  * F_BLANK      ' '
2484  * F_ALT        '#'
2485  * F_ZERO       '0'
2486  */
2487 #define F_LJUST (1<<0)
2488 #define F_SIGN  (1<<1)
2489 #define F_BLANK (1<<2)
2490 #define F_ALT   (1<<3)
2491 #define F_ZERO  (1<<4)
2492
2493 static int
2494 formatfloat(char *buf, size_t buflen, int flags,
2495             int prec, int type, PyObject *v)
2496 {
2497         /* fmt = '%#.' + `prec` + `type`
2498            worst case length = 3 + 10 (len of INT_MAX) + 1 = 14 (use 20)*/
2499         char fmt[20];
2500         double x;
2501         if (!PyArg_Parse(v, "d;float argument required", &x))
2502                 return -1;
2503         if (prec < 0)
2504                 prec = 6;
2505         if (type == 'f' && fabs(x)/1e25 >= 1e25)
2506                 type = 'g';
2507         sprintf(fmt, "%%%s.%d%c", (flags&F_ALT) ? "#" : "", prec, type);
2508         /* worst case length calc to ensure no buffer overrun:
2509              fmt = %#.<prec>g
2510              buf = '-' + [0-9]*prec + '.' + 'e+' + (longest exp
2511                 for any double rep.)
2512              len = 1 + prec + 1 + 2 + 5 = 9 + prec
2513            If prec=0 the effective precision is 1 (the leading digit is
2514            always given), therefore increase by one to 10+prec. */
2515         if (buflen <= (size_t)10 + (size_t)prec) {
2516                 PyErr_SetString(PyExc_OverflowError,
2517                         "formatted float is too long (precision too large?)");
2518                 return -1;
2519         }
2520         sprintf(buf, fmt, x);
2521         return strlen(buf);
2522 }
2523
2524 /* _PyString_FormatLong emulates the format codes d, u, o, x and X, and
2525  * the F_ALT flag, for Python's long (unbounded) ints.  It's not used for
2526  * Python's regular ints.
2527  * Return value:  a new PyString*, or NULL if error.
2528  *  .  *pbuf is set to point into it,
2529  *     *plen set to the # of chars following that.
2530  *     Caller must decref it when done using pbuf.
2531  *     The string starting at *pbuf is of the form
2532  *         "-"? ("0x" | "0X")? digit+
2533  *     "0x"/"0X" are present only for x and X conversions, with F_ALT
2534  *         set in flags.  The case of hex digits will be correct,
2535  *     There will be at least prec digits, zero-filled on the left if
2536  *         necessary to get that many.
2537  * val          object to be converted
2538  * flags        bitmask of format flags; only F_ALT is looked at
2539  * prec         minimum number of digits; 0-fill on left if needed
2540  * type         a character in [duoxX]; u acts the same as d
2541  *
2542  * CAUTION:  o, x and X conversions on regular ints can never
2543  * produce a '-' sign, but can for Python's unbounded ints.
2544  */
2545 PyObject*
2546 _PyString_FormatLong(PyObject *val, int flags, int prec, int type,
2547                      char **pbuf, int *plen)
2548 {
2549         PyObject *result = NULL;
2550         char *buf;
2551         int i;
2552         int sign;       /* 1 if '-', else 0 */
2553         int len;        /* number of characters */
2554         int numdigits;  /* len == numnondigits + numdigits */
2555         int numnondigits = 0;
2556
2557         switch (type) {
2558         case 'd':
2559         case 'u':
2560                 result = val->ob_type->tp_str(val);
2561                 break;
2562         case 'o':
2563                 result = val->ob_type->tp_as_number->nb_oct(val);
2564                 break;
2565         case 'x':
2566         case 'X':
2567                 numnondigits = 2;
2568                 result = val->ob_type->tp_as_number->nb_hex(val);
2569                 break;
2570         default:
2571                 assert(!"'type' not in [duoxX]");
2572         }
2573         if (!result)
2574                 return NULL;
2575
2576         /* To modify the string in-place, there can only be one reference. */
2577         if (result->ob_refcnt != 1) {
2578                 PyErr_BadInternalCall();
2579                 return NULL;
2580         }
2581         buf = PyString_AsString(result);
2582         len = PyString_Size(result);
2583         if (buf[len-1] == 'L') {
2584                 --len;
2585                 buf[len] = '\0';
2586         }
2587         sign = buf[0] == '-';
2588         numnondigits += sign;
2589         numdigits = len - numnondigits;
2590         assert(numdigits > 0);
2591
2592         /* Get rid of base marker unless F_ALT */
2593         if ((flags & F_ALT) == 0) {
2594                 /* Need to skip 0x, 0X or 0. */
2595                 int skipped = 0;
2596                 switch (type) {
2597                 case 'o':
2598                         assert(buf[sign] == '0');
2599                         /* If 0 is only digit, leave it alone. */
2600                         if (numdigits > 1) {
2601                                 skipped = 1;
2602                                 --numdigits;
2603                         }
2604                         break;
2605                 case 'x':
2606                 case 'X':
2607                         assert(buf[sign] == '0');
2608                         assert(buf[sign + 1] == 'x');
2609                         skipped = 2;
2610                         numnondigits -= 2;
2611                         break;
2612                 }
2613                 if (skipped) {
2614                         buf += skipped;
2615                         len -= skipped;
2616                         if (sign)
2617                                 buf[0] = '-';
2618                 }
2619                 assert(len == numnondigits + numdigits);
2620                 assert(numdigits > 0);
2621         }
2622
2623         /* Fill with leading zeroes to meet minimum width. */
2624         if (prec > numdigits) {
2625                 PyObject *r1 = PyString_FromStringAndSize(NULL,
2626                                         numnondigits + prec);
2627                 char *b1;
2628                 if (!r1) {
2629                         Py_DECREF(result);
2630                         return NULL;
2631                 }
2632                 b1 = PyString_AS_STRING(r1);
2633                 for (i = 0; i < numnondigits; ++i)
2634                         *b1++ = *buf++;
2635                 for (i = 0; i < prec - numdigits; i++)
2636                         *b1++ = '0';
2637                 for (i = 0; i < numdigits; i++)
2638                         *b1++ = *buf++;
2639                 *b1 = '\0';
2640                 Py_DECREF(result);
2641                 result = r1;
2642                 buf = PyString_AS_STRING(result);
2643                 len = numnondigits + prec;
2644         }
2645
2646         /* Fix up case for hex conversions. */
2647         switch (type) {
2648         case 'x':
2649                 /* Need to convert all upper case letters to lower case. */
2650                 for (i = 0; i < len; i++)
2651                         if (buf[i] >= 'A' && buf[i] <= 'F')
2652                                 buf[i] += 'a'-'A';
2653                 break;
2654         case 'X':
2655                 /* Need to convert 0x to 0X (and -0x to -0X). */
2656                 if (buf[sign + 1] == 'x')
2657                         buf[sign + 1] = 'X';
2658                 break;
2659         }
2660         *pbuf = buf;
2661         *plen = len;
2662         return result;
2663 }
2664
2665 static int
2666 formatint(char *buf, size_t buflen, int flags,
2667           int prec, int type, PyObject *v)
2668 {
2669         /* fmt = '%#.' + `prec` + 'l' + `type`
2670            worst case length = 3 + 19 (worst len of INT_MAX on 64-bit machine)
2671            + 1 + 1 = 24 */
2672         char fmt[64];   /* plenty big enough! */
2673         long x;
2674         if (!PyArg_Parse(v, "l;int argument required", &x))
2675                 return -1;
2676         if (prec < 0)
2677                 prec = 1;
2678         sprintf(fmt, "%%%s.%dl%c", (flags&F_ALT) ? "#" : "", prec, type);
2679         /* buf = '+'/'-'/'0'/'0x' + '[0-9]'*max(prec, len(x in octal))
2680            worst case buf = '0x' + [0-9]*prec, where prec >= 11 */
2681         if (buflen <= 13 || buflen <= (size_t)2 + (size_t)prec) {
2682                 PyErr_SetString(PyExc_OverflowError,
2683                         "formatted integer is too long (precision too large?)");
2684                 return -1;
2685         }
2686         sprintf(buf, fmt, x);
2687         /* When converting 0 under %#x or %#X, C leaves off the base marker,
2688          * but we want it (for consistency with other %#x conversions, and
2689          * for consistency with Python's hex() function).
2690          * BUG 28-Apr-2001 tim:  At least two platform Cs (Metrowerks &
2691          * Compaq Tru64) violate the std by converting 0 w/ leading 0x anyway.
2692          * So add it only if the platform didn't already.
2693          */
2694         if (x == 0 && (flags & F_ALT) && (type == 'x' || type == 'X') &&
2695             buf[1] != (char)type) /* this last always true under std C */
2696                 {
2697                 memmove(buf+2, buf, strlen(buf) + 1);
2698                 buf[0] = '0';
2699                 buf[1] = (char)type;
2700         }
2701         return strlen(buf);
2702 }
2703
2704 static int
2705 formatchar(char *buf, size_t buflen, PyObject *v)
2706 {
2707         /* presume that the buffer is at least 2 characters long */
2708         if (PyString_Check(v)) {
2709                 if (!PyArg_Parse(v, "c;%c requires int or char", &buf[0]))
2710                         return -1;
2711         }
2712         else {
2713                 if (!PyArg_Parse(v, "b;%c requires int or char", &buf[0]))
2714                         return -1;
2715         }
2716         buf[1] = '\0';
2717         return 1;
2718 }
2719
2720
2721 /* fmt%(v1,v2,...) is roughly equivalent to sprintf(fmt, v1, v2, ...)
2722
2723    FORMATBUFLEN is the length of the buffer in which the floats, ints, &
2724    chars are formatted. XXX This is a magic number. Each formatting
2725    routine does bounds checking to ensure no overflow, but a better
2726    solution may be to malloc a buffer of appropriate size for each
2727    format. For now, the current solution is sufficient.
2728 */
2729 #define FORMATBUFLEN (size_t)120
2730
2731 PyObject *
2732 PyString_Format(PyObject *format, PyObject *args)
2733 {
2734         char *fmt, *res;
2735         int fmtcnt, rescnt, reslen, arglen, argidx;
2736         int args_owned = 0;
2737         PyObject *result, *orig_args, *v, *w;
2738         PyObject *dict = NULL;
2739         if (format == NULL || !PyString_Check(format) || args == NULL) {
2740                 PyErr_BadInternalCall();
2741                 return NULL;
2742         }
2743         orig_args = args;
2744         fmt = PyString_AsString(format);
2745         fmtcnt = PyString_Size(format);
2746         reslen = rescnt = fmtcnt + 100;
2747         result = PyString_FromStringAndSize((char *)NULL, reslen);
2748         if (result == NULL)
2749                 return NULL;
2750         res = PyString_AsString(result);
2751         if (PyTuple_Check(args)) {
2752                 arglen = PyTuple_Size(args);
2753                 argidx = 0;
2754         }
2755         else {
2756                 arglen = -1;
2757                 argidx = -2;
2758         }
2759         if (args->ob_type->tp_as_mapping)
2760                 dict = args;
2761         while (--fmtcnt >= 0) {
2762                 if (*fmt != '%') {
2763                         if (--rescnt < 0) {
2764                                 rescnt = fmtcnt + 100;
2765                                 reslen += rescnt;
2766                                 if (_PyString_Resize(&result, reslen) < 0)
2767                                         return NULL;
2768                                 res = PyString_AsString(result)
2769                                         + reslen - rescnt;
2770                                 --rescnt;
2771                         }
2772                         *res++ = *fmt++;
2773                 }
2774                 else {
2775                         /* Got a format specifier */
2776                         int flags = 0;
2777                         int width = -1;
2778                         int prec = -1;
2779                         int size = 0;
2780                         int c = '\0';
2781                         int fill;
2782                         PyObject *v = NULL;
2783                         PyObject *temp = NULL;
2784                         char *pbuf;
2785                         int sign;
2786                         int len;
2787                         char formatbuf[FORMATBUFLEN]; /* For format{float,int,char}() */
2788                         char *fmt_start = fmt;
2789                         int argidx_start = argidx;
2790
2791                         fmt++;
2792                         if (*fmt == '(') {
2793                                 char *keystart;
2794                                 int keylen;
2795                                 PyObject *key;
2796                                 int pcount = 1;
2797
2798                                 if (dict == NULL) {
2799                                         PyErr_SetString(PyExc_TypeError,
2800                                                  "format requires a mapping");
2801                                         goto error;
2802                                 }
2803                                 ++fmt;
2804                                 --fmtcnt;
2805                                 keystart = fmt;
2806                                 /* Skip over balanced parentheses */
2807                                 while (pcount > 0 && --fmtcnt >= 0) {
2808                                         if (*fmt == ')')
2809                                                 --pcount;
2810                                         else if (*fmt == '(')
2811                                                 ++pcount;
2812                                         fmt++;
2813                                 }
2814                                 keylen = fmt - keystart - 1;
2815                                 if (fmtcnt < 0 || pcount > 0) {
2816                                         PyErr_SetString(PyExc_ValueError,
2817                                                    "incomplete format key");
2818                                         goto error;
2819                                 }
2820                                 key = PyString_FromStringAndSize(keystart,
2821                                                                  keylen);
2822                                 if (key == NULL)
2823                                         goto error;
2824                                 if (args_owned) {
2825                                         Py_DECREF(args);
2826                                         args_owned = 0;
2827                                 }
2828                                 args = PyObject_GetItem(dict, key);
2829                                 Py_DECREF(key);
2830                                 if (args == NULL) {
2831                                         goto error;
2832                                 }
2833                                 args_owned = 1;
2834                                 arglen = -1;
2835                                 argidx = -2;
2836                         }
2837                         while (--fmtcnt >= 0) {
2838                                 switch (c = *fmt++) {
2839                                 case '-': flags |= F_LJUST; continue;
2840                                 case '+': flags |= F_SIGN; continue;
2841                                 case ' ': flags |= F_BLANK; continue;
2842                                 case '#': flags |= F_ALT; continue;
2843                                 case '0': flags |= F_ZERO; continue;
2844                                 }
2845                                 break;
2846                         }
2847                         if (c == '*') {
2848                                 v = getnextarg(args, arglen, &argidx);
2849                                 if (v == NULL)
2850                                         goto error;
2851                                 if (!PyInt_Check(v)) {
2852                                         PyErr_SetString(PyExc_TypeError,
2853                                                         "* wants int");
2854                                         goto error;
2855                                 }
2856                                 width = PyInt_AsLong(v);
2857                                 if (width < 0) {
2858                                         flags |= F_LJUST;
2859                                         width = -width;
2860                                 }
2861                                 if (--fmtcnt >= 0)
2862                                         c = *fmt++;
2863                         }
2864                         else if (c >= 0 && isdigit(c)) {
2865                                 width = c - '0';
2866                                 while (--fmtcnt >= 0) {
2867                                         c = Py_CHARMASK(*fmt++);
2868                                         if (!isdigit(c))
2869                                                 break;
2870                                         if ((width*10) / 10 != width) {
2871                                                 PyErr_SetString(
2872                                                         PyExc_ValueError,
2873                                                         "width too big");
2874                                                 goto error;
2875                                         }
2876                                         width = width*10 + (c - '0');
2877                                 }
2878                         }
2879                         if (c == '.') {
2880                                 prec = 0;
2881                                 if (--fmtcnt >= 0)
2882                                         c = *fmt++;
2883                                 if (c == '*') {
2884                                         v = getnextarg(args, arglen, &argidx);
2885                                         if (v == NULL)
2886                                                 goto error;
2887                                         if (!PyInt_Check(v)) {
2888                                                 PyErr_SetString(
2889                                                         PyExc_TypeError,
2890                                                         "* wants int");
2891                                                 goto error;
2892                                         }
2893                                         prec = PyInt_AsLong(v);
2894                                         if (prec < 0)
2895                                                 prec = 0;
2896                                         if (--fmtcnt >= 0)
2897                                                 c = *fmt++;
2898                                 }
2899                                 else if (c >= 0 && isdigit(c)) {
2900                                         prec = c - '0';
2901                                         while (--fmtcnt >= 0) {
2902                                                 c = Py_CHARMASK(*fmt++);
2903                                                 if (!isdigit(c))
2904                                                         break;
2905                                                 if ((prec*10) / 10 != prec) {
2906                                                         PyErr_SetString(
2907                                                             PyExc_ValueError,
2908                                                             "prec too big");
2909                                                         goto error;
2910                                                 }
2911                                                 prec = prec*10 + (c - '0');
2912                                         }
2913                                 }
2914                         } /* prec */
2915                         if (fmtcnt >= 0) {
2916                                 if (c == 'h' || c == 'l' || c == 'L') {
2917                                         size = c;
2918                                         if (--fmtcnt >= 0)
2919                                                 c = *fmt++;
2920                                 }
2921                         }
2922                         if (fmtcnt < 0) {
2923                                 PyErr_SetString(PyExc_ValueError,
2924                                                 "incomplete format");
2925                                 goto error;
2926                         }
2927                         if (c != '%') {
2928                                 v = getnextarg(args, arglen, &argidx);
2929                                 if (v == NULL)
2930                                         goto error;
2931                         }
2932                         sign = 0;
2933                         fill = ' ';
2934                         switch (c) {
2935                         case '%':
2936                                 pbuf = "%";
2937                                 len = 1;
2938                                 break;
2939                         case 's':
2940                         case 'r':
2941                                 if (PyUnicode_Check(v)) {
2942                                         fmt = fmt_start;
2943                                         argidx = argidx_start;
2944                                         goto unicode;
2945                                 }
2946                                 if (c == 's')
2947                                 temp = PyObject_Str(v);
2948                                 else
2949                                         temp = PyObject_Repr(v);
2950                                 if (temp == NULL)
2951                                         goto error;
2952                                 if (!PyString_Check(temp)) {
2953                                         PyErr_SetString(PyExc_TypeError,
2954                                           "%s argument has non-string str()");
2955                                         goto error;
2956                                 }
2957                                 pbuf = PyString_AsString(temp);
2958                                 len = PyString_Size(temp);
2959                                 if (prec >= 0 && len > prec)
2960                                         len = prec;
2961                                 break;
2962                         case 'i':
2963                         case 'd':
2964                         case 'u':
2965                         case 'o':
2966                         case 'x':
2967                         case 'X':
2968                                 if (c == 'i')
2969                                         c = 'd';
2970                                 if (PyLong_Check(v)) {
2971                                         temp = _PyString_FormatLong(v, flags,
2972                                                 prec, c, &pbuf, &len);
2973                                         if (!temp)
2974                                                 goto error;
2975                                         /* unbounded ints can always produce
2976                                            a sign character! */
2977                                         sign = 1;
2978                                 }
2979                                 else {
2980                                         pbuf = formatbuf;
2981                                         len = formatint(pbuf, sizeof(formatbuf),
2982                                                         flags, prec, c, v);
2983                                         if (len < 0)
2984                                                 goto error;
2985                                         /* only d conversion is signed */
2986                                         sign = c == 'd';
2987                                 }
2988                                 if (flags & F_ZERO)
2989                                         fill = '0';
2990                                 break;
2991                         case 'e':
2992                         case 'E':
2993                         case 'f':
2994                         case 'g':
2995                         case 'G':
2996                                 pbuf = formatbuf;
2997                                 len = formatfloat(pbuf, sizeof(formatbuf), flags, prec, c, v);
2998                                 if (len < 0)
2999                                         goto error;
3000                                 sign = 1;
3001                                 if (flags & F_ZERO)
3002                                         fill = '0';
3003                                 break;
3004                         case 'c':
3005                                 pbuf = formatbuf;
3006                                 len = formatchar(pbuf, sizeof(formatbuf), v);
3007                                 if (len < 0)
3008                                         goto error;
3009                                 break;
3010                         default:
3011                                 PyErr_Format(PyExc_ValueError,
3012                                   "unsupported format character '%c' (0x%x) "
3013                                   "at index %i",
3014                                   c, c, fmt - 1 - PyString_AsString(format));
3015                                 goto error;
3016                         }
3017                         if (sign) {
3018                                 if (*pbuf == '-' || *pbuf == '+') {
3019                                         sign = *pbuf++;
3020                                         len--;
3021                                 }
3022                                 else if (flags & F_SIGN)
3023                                         sign = '+';
3024                                 else if (flags & F_BLANK)
3025                                         sign = ' ';
3026                                 else
3027                                         sign = 0;
3028                         }
3029                         if (width < len)
3030                                 width = len;
3031                         if (rescnt < width + (sign != 0)) {
3032                                 reslen -= rescnt;
3033                                 rescnt = width + fmtcnt + 100;
3034                                 reslen += rescnt;
3035                                 if (_PyString_Resize(&result, reslen) < 0)
3036                                         return NULL;
3037                                 res = PyString_AsString(result)
3038                                         + reslen - rescnt;
3039                         }
3040                         if (sign) {
3041                                 if (fill != ' ')
3042                                         *res++ = sign;
3043                                 rescnt--;
3044                                 if (width > len)
3045                                         width--;
3046                         }
3047                         if ((flags & F_ALT) && (c == 'x' || c == 'X')) {
3048                                 assert(pbuf[0] == '0');
3049                                 assert(pbuf[1] == c);
3050                                 if (fill != ' ') {
3051                                         *res++ = *pbuf++;
3052                                         *res++ = *pbuf++;
3053                                 }
3054                                 rescnt -= 2;
3055                                 width -= 2;
3056                                 if (width < 0)
3057                                         width = 0;
3058                                 len -= 2;
3059                         }
3060                         if (width > len && !(flags & F_LJUST)) {
3061                                 do {
3062                                         --rescnt;
3063                                         *res++ = fill;
3064                                 } while (--width > len);
3065                         }
3066                         if (fill == ' ') {
3067                                 if (sign)
3068                                         *res++ = sign;
3069                                 if ((flags & F_ALT) &&
3070                                     (c == 'x' || c == 'X')) {
3071                                         assert(pbuf[0] == '0');
3072                                         assert(pbuf[1] == c);
3073                                         *res++ = *pbuf++;
3074                                         *res++ = *pbuf++;
3075                                 }
3076                         }
3077                         memcpy(res, pbuf, len);
3078                         res += len;
3079                         rescnt -= len;
3080                         while (--width >= len) {
3081                                 --rescnt;
3082                                 *res++ = ' ';
3083                         }
3084                         if (dict && (argidx < arglen) && c != '%') {
3085                                 PyErr_SetString(PyExc_TypeError,
3086                                            "not all arguments converted");
3087                                 goto error;
3088                         }
3089                         Py_XDECREF(temp);
3090                 } /* '%' */
3091         } /* until end */
3092         if (argidx < arglen && !dict) {
3093                 PyErr_SetString(PyExc_TypeError,
3094                                 "not all arguments converted");
3095                 goto error;
3096         }
3097         if (args_owned) {
3098                 Py_DECREF(args);
3099         }
3100         _PyString_Resize(&result, reslen - rescnt);
3101         return result;
3102
3103  unicode:
3104         if (args_owned) {
3105                 Py_DECREF(args);
3106                 args_owned = 0;
3107         }
3108         /* Fiddle args right (remove the first argidx arguments) */
3109         if (PyTuple_Check(orig_args) && argidx > 0) {
3110                 PyObject *v;
3111                 int n = PyTuple_GET_SIZE(orig_args) - argidx;
3112                 v = PyTuple_New(n);
3113                 if (v == NULL)
3114                         goto error;
3115                 while (--n >= 0) {
3116                         PyObject *w = PyTuple_GET_ITEM(orig_args, n + argidx);
3117                         Py_INCREF(w);
3118                         PyTuple_SET_ITEM(v, n, w);
3119                 }
3120                 args = v;
3121         } else {
3122                 Py_INCREF(orig_args);
3123                 args = orig_args;
3124         }
3125         args_owned = 1;
3126         /* Take what we have of the result and let the Unicode formatting
3127            function format the rest of the input. */
3128         rescnt = res - PyString_AS_STRING(result);
3129         if (_PyString_Resize(&result, rescnt))
3130                 goto error;
3131         fmtcnt = PyString_GET_SIZE(format) - \
3132                  (fmt - PyString_AS_STRING(format));
3133         format = PyUnicode_Decode(fmt, fmtcnt, NULL, NULL);
3134         if (format == NULL)
3135                 goto error;
3136         v = PyUnicode_Format(format, args);
3137         Py_DECREF(format);
3138         if (v == NULL)
3139                 goto error;
3140         /* Paste what we have (result) to what the Unicode formatting
3141            function returned (v) and return the result (or error) */
3142         w = PyUnicode_Concat(result, v);
3143         Py_DECREF(result);
3144         Py_DECREF(v);
3145         Py_DECREF(args);
3146         return w;
3147
3148  error:
3149         Py_DECREF(result);
3150         if (args_owned) {
3151                 Py_DECREF(args);
3152         }
3153         return NULL;
3154 }
3155
3156
3157 #ifdef INTERN_STRINGS
3158
3159 /* This dictionary will leak at PyString_Fini() time.  That's acceptable
3160  * because PyString_Fini() specifically frees interned strings that are
3161  * only referenced by this dictionary.  The CVS log entry for revision 2.45
3162  * says:
3163  *
3164  *    Change the Fini function to only remove otherwise unreferenced
3165  *    strings from the interned table.  There are references in
3166  *    hard-to-find static variables all over the interpreter, and it's not
3167  *    worth trying to get rid of all those; but "uninterning" isn't fair
3168  *    either and may cause subtle failures later -- so we have to keep them
3169  *    in the interned table.
3170  */
3171 static PyObject *interned;
3172
3173 void
3174 PyString_InternInPlace(PyObject **p)
3175 {
3176         register PyStringObject *s = (PyStringObject *)(*p);
3177         PyObject *t;
3178         if (s == NULL || !PyString_Check(s))
3179                 Py_FatalError("PyString_InternInPlace: strings only please!");
3180         if ((t = s->ob_sinterned) != NULL) {
3181                 if (t == (PyObject *)s)
3182                         return;
3183                 Py_INCREF(t);
3184                 *p = t;
3185                 Py_DECREF(s);
3186                 return;
3187         }
3188         if (interned == NULL) {
3189                 interned = PyDict_New();
3190                 if (interned == NULL)
3191                         return;
3192         }
3193         if ((t = PyDict_GetItem(interned, (PyObject *)s)) != NULL) {
3194                 Py_INCREF(t);
3195                 *p = s->ob_sinterned = t;
3196                 Py_DECREF(s);
3197                 return;
3198         }
3199         t = (PyObject *)s;
3200         if (PyDict_SetItem(interned, t, t) == 0) {
3201                 s->ob_sinterned = t;
3202                 return;
3203         }
3204         PyErr_Clear();
3205 }
3206
3207
3208 PyObject *
3209 PyString_InternFromString(const char *cp)
3210 {
3211         PyObject *s = PyString_FromString(cp);
3212         if (s == NULL)
3213                 return NULL;
3214         PyString_InternInPlace(&s);
3215         return s;
3216 }
3217
3218 #endif
3219
3220 void
3221 PyString_Fini(void)
3222 {
3223         int i;
3224         for (i = 0; i < UCHAR_MAX + 1; i++) {
3225                 Py_XDECREF(characters[i]);
3226                 characters[i] = NULL;
3227         }
3228 #ifndef DONT_SHARE_SHORT_STRINGS
3229         Py_XDECREF(nullstring);
3230         nullstring = NULL;
3231 #endif
3232 #ifdef INTERN_STRINGS
3233         if (interned) {
3234                 int pos, changed;
3235                 PyObject *key, *value;
3236                 do {
3237                         changed = 0;
3238                         pos = 0;
3239                         while (PyDict_Next(interned, &pos, &key, &value)) {
3240                                 if (key->ob_refcnt == 2 && key == value) {
3241                                         PyDict_DelItem(interned, key);
3242                                         changed = 1;
3243                                 }
3244                         }
3245                 } while (changed);
3246         }
3247 #endif
3248 }
3249
3250 #ifdef INTERN_STRINGS
3251 void _Py_ReleaseInternedStrings(void)
3252 {
3253         if (interned) {
3254                 Py_DECREF(interned);
3255                 interned = NULL;
3256         }
3257 }
3258 #endif /* INTERN_STRINGS */