Objects/stringobject.c

   1
   2 /* String object implementation */
   3
   4 #include "Python.h"
   5
   6 #include <ctype.h>
   7
   8 #ifdef COUNT_ALLOCS
   9 int null_strings, one_strings;
  10 #endif
  11
  12 #ifdef HAVE_LIMITS_H
  13 #include <limits.h>
  14 #else
  15 #ifndef UCHAR_MAX
  16 #define UCHAR_MAX 255
  17 #endif
  18 #endif
  19
  20 static PyStringObject *characters[UCHAR_MAX + 1];
  21 #ifndef DONT_SHARE_SHORT_STRINGS
  22 static PyStringObject *nullstring;
  23 #endif
  24
  25 /*
  26    Newsizedstringobject() and newstringobject() try in certain cases
  27    to share string objects.  When the size of the string is zero,
  28    these routines always return a pointer to the same string object;
  29    when the size is one, they return a pointer to an already existing
  30    object if the contents of the string is known.  For
  31    newstringobject() this is always the case, for
  32    newsizedstringobject() this is the case when the first argument in
  33    not NULL.
  34    A common practice to allocate a string and then fill it in or
  35    change it must be done carefully.  It is only allowed to change the
  36    contents of the string if the obect was gotten from
  37    newsizedstringobject() with a NULL first argument, because in the
  38    future these routines may try to do even more sharing of objects.
  39 */
  40 PyObject *
  41 PyString_FromStringAndSize(const char *str, int size)
  42 {
  43         register PyStringObject *op;
  44 #ifndef DONT_SHARE_SHORT_STRINGS
  45         if (size == 0 && (op = nullstring) != NULL) {
  46 #ifdef COUNT_ALLOCS
  47                 null_strings++;
  48 #endif
  49                 Py_INCREF(op);
  50                 return (PyObject *)op;
  51         }
  52         if (size == 1 && str != NULL &&
  53             (op = characters[*str & UCHAR_MAX]) != NULL)
  54         {
  55 #ifdef COUNT_ALLOCS
  56                 one_strings++;
  57 #endif
  58                 Py_INCREF(op);
  59                 return (PyObject *)op;
  60         }
  61 #endif /* DONT_SHARE_SHORT_STRINGS */
  62
  63         /* PyObject_NewVar is inlined */
  64         op = (PyStringObject *)
  65                 PyObject_MALLOC(sizeof(PyStringObject) + size * sizeof(char));
  66         if (op == NULL)
  67                 return PyErr_NoMemory();
  68         PyObject_INIT_VAR(op, &PyString_Type, size);
  69 #ifdef CACHE_HASH
  70         op->ob_shash = -1;
  71 #endif
  72 #ifdef INTERN_STRINGS
  73         op->ob_sinterned = NULL;
  74 #endif
  75         if (str != NULL)
  76                 memcpy(op->ob_sval, str, size);
  77         op->ob_sval[size] = '\0';
  78 #ifndef DONT_SHARE_SHORT_STRINGS
  79         if (size == 0) {
  80                 nullstring = op;
  81                 Py_INCREF(op);
  82         } else if (size == 1 && str != NULL) {
  83                 characters[*str & UCHAR_MAX] = op;
  84                 Py_INCREF(op);
  85         }
  86 #endif
  87         return (PyObject *) op;
  88 }
  89
  90 PyObject *
  91 PyString_FromString(const char *str)
  92 {
  93         register size_t size = strlen(str);
  94         register PyStringObject *op;
  95         if (size > INT_MAX) {
  96                 PyErr_SetString(PyExc_OverflowError,
  97                         "string is too long for a Python string");
  98                 return NULL;
  99         }
 100 #ifndef DONT_SHARE_SHORT_STRINGS
 101         if (size == 0 && (op = nullstring) != NULL) {
 102 #ifdef COUNT_ALLOCS
 103                 null_strings++;
 104 #endif
 105                 Py_INCREF(op);
 106                 return (PyObject *)op;
 107         }
 108         if (size == 1 && (op = characters[*str & UCHAR_MAX]) != NULL) {
 109 #ifdef COUNT_ALLOCS
 110                 one_strings++;
 111 #endif
 112                 Py_INCREF(op);
 113                 return (PyObject *)op;
 114         }
 115 #endif /* DONT_SHARE_SHORT_STRINGS */
 116
 117         /* PyObject_NewVar is inlined */
 118         op = (PyStringObject *)
 119                 PyObject_MALLOC(sizeof(PyStringObject) + size * sizeof(char));
 120         if (op == NULL)
 121                 return PyErr_NoMemory();
 122         PyObject_INIT_VAR(op, &PyString_Type, size);
 123 #ifdef CACHE_HASH
 124         op->ob_shash = -1;
 125 #endif
 126 #ifdef INTERN_STRINGS
 127         op->ob_sinterned = NULL;
 128 #endif
 129         strcpy(op->ob_sval, str);
 130 #ifndef DONT_SHARE_SHORT_STRINGS
 131         if (size == 0) {
 132                 nullstring = op;
 133                 Py_INCREF(op);
 134         } else if (size == 1) {
 135                 characters[*str & UCHAR_MAX] = op;
 136                 Py_INCREF(op);
 137         }
 138 #endif
 139         return (PyObject *) op;
 140 }
 141
 142 PyObject *PyString_Decode(const char *s,
 143                           int size,
 144                           const char *encoding,
 145                           const char *errors)
 146 {
 147     PyObject *buffer = NULL, *str;
 148
 149     if (encoding == NULL)
 150         encoding = PyUnicode_GetDefaultEncoding();
 151
 152     /* Decode via the codec registry */
 153     buffer = PyBuffer_FromMemory((void *)s, size);
 154     if (buffer == NULL)
 155         goto onError;
 156     str = PyCodec_Decode(buffer, encoding, errors);
 157     if (str == NULL)
 158         goto onError;
 159     /* Convert Unicode to a string using the default encoding */
 160     if (PyUnicode_Check(str)) {
 161         PyObject *temp = str;
 162         str = PyUnicode_AsEncodedString(str, NULL, NULL);
 163         Py_DECREF(temp);
 164         if (str == NULL)
 165             goto onError;
 166     }
 167     if (!PyString_Check(str)) {
 168         PyErr_Format(PyExc_TypeError,
 169                      "decoder did not return a string object (type=%.400s)",
 170                      str->ob_type->tp_name);
 171         Py_DECREF(str);
 172         goto onError;
 173     }
 174     Py_DECREF(buffer);
 175     return str;
 176
 177  onError:
 178     Py_XDECREF(buffer);
 179     return NULL;
 180 }
 181
 182 PyObject *PyString_Encode(const char *s,
 183                           int size,
 184                           const char *encoding,
 185                           const char *errors)
 186 {
 187     PyObject *v, *str;
 188
 189     str = PyString_FromStringAndSize(s, size);
 190     if (str == NULL)
 191         return NULL;
 192     v = PyString_AsEncodedString(str, encoding, errors);
 193     Py_DECREF(str);
 194     return v;
 195 }
 196
 197 PyObject *PyString_AsEncodedString(PyObject *str,
 198                                    const char *encoding,
 199                                    const char *errors)
 200 {
 201     PyObject *v;
 202
 203     if (!PyString_Check(str)) {
 204         PyErr_BadArgument();
 205         goto onError;
 206     }
 207
 208     if (encoding == NULL)
 209         encoding = PyUnicode_GetDefaultEncoding();
 210
 211     /* Encode via the codec registry */
 212     v = PyCodec_Encode(str, encoding, errors);
 213     if (v == NULL)
 214         goto onError;
 215     /* Convert Unicode to a string using the default encoding */
 216     if (PyUnicode_Check(v)) {
 217         PyObject *temp = v;
 218         v = PyUnicode_AsEncodedString(v, NULL, NULL);
 219         Py_DECREF(temp);
 220         if (v == NULL)
 221             goto onError;
 222     }
 223     if (!PyString_Check(v)) {
 224         PyErr_Format(PyExc_TypeError,
 225                      "encoder did not return a string object (type=%.400s)",
 226                      v->ob_type->tp_name);
 227         Py_DECREF(v);
 228         goto onError;
 229     }
 230     return v;
 231
 232  onError:
 233     return NULL;
 234 }
 235
 236 static void
 237 string_dealloc(PyObject *op)
 238 {
 239         PyObject_DEL(op);
 240 }
 241
 242 static int
 243 string_getsize(register PyObject *op)
 244 {
 245         char *s;
 246         int len;
 247         if (PyString_AsStringAndSize(op, &s, &len))
 248                 return -1;
 249         return len;
 250 }
 251
 252 static /*const*/ char *
 253 string_getbuffer(register PyObject *op)
 254 {
 255         char *s;
 256         int len;
 257         if (PyString_AsStringAndSize(op, &s, &len))
 258                 return NULL;
 259         return s;
 260 }
 261
 262 int
 263 PyString_Size(register PyObject *op)
 264 {
 265         if (!PyString_Check(op))
 266                 return string_getsize(op);
 267         return ((PyStringObject *)op) -> ob_size;
 268 }
 269
 270 /*const*/ char *
 271 PyString_AsString(register PyObject *op)
 272 {
 273         if (!PyString_Check(op))
 274                 return string_getbuffer(op);
 275         return ((PyStringObject *)op) -> ob_sval;
 276 }
 277
 278 /* Internal API needed by PyString_AsStringAndSize(): */
 279 extern
 280 PyObject *_PyUnicode_AsDefaultEncodedString(PyObject *unicode,
 281                                             const char *errors);
 282
 283 int
 284 PyString_AsStringAndSize(register PyObject *obj,
 285                          register char **s,
 286                          register int *len)
 287 {
 288         if (s == NULL) {
 289                 PyErr_BadInternalCall();
 290                 return -1;
 291         }
 292
 293         if (!PyString_Check(obj)) {
 294                 if (PyUnicode_Check(obj)) {
 295                         obj = _PyUnicode_AsDefaultEncodedString(obj, NULL);
 296                         if (obj == NULL)
 297                                 return -1;
 298                 }
 299                 else {
 300                         PyErr_Format(PyExc_TypeError,
 301                                      "expected string or Unicode object, "
 302                                      "%.200s found", obj->ob_type->tp_name);
 303                         return -1;
 304                 }
 305         }
 306
 307         *s = PyString_AS_STRING(obj);
 308         if (len != NULL)
 309                 *len = PyString_GET_SIZE(obj);
 310         else if ((int)strlen(*s) != PyString_GET_SIZE(obj)) {
 311                 PyErr_SetString(PyExc_TypeError,
 312                                 "expected string without null bytes");
 313                 return -1;
 314         }
 315         return 0;
 316 }
 317
 318 /* Methods */
 319
 320 static int
 321 string_print(PyStringObject *op, FILE *fp, int flags)
 322 {
 323         int i;
 324         char c;
 325         int quote;
 326         /* XXX Ought to check for interrupts when writing long strings */
 327         if (flags & Py_PRINT_RAW) {
 328                 fwrite(op->ob_sval, 1, (int) op->ob_size, fp);
 329                 return 0;
 330         }
 331
 332         /* figure out which quote to use; single is preferred */
 333         quote = '\'';
 334         if (strchr(op->ob_sval, '\'') && !strchr(op->ob_sval, '"'))
 335                 quote = '"';
 336
 337         fputc(quote, fp);
 338         for (i = 0; i < op->ob_size; i++) {
 339                 c = op->ob_sval[i];
 340                 if (c == quote || c == '\\')
 341                         fprintf(fp, "\\%c", c);
 342                 else if (c < ' ' || c >= 0177)
 343                         fprintf(fp, "\\%03o", c & 0377);
 344                 else
 345                         fputc(c, fp);
 346         }
 347         fputc(quote, fp);
 348         return 0;
 349 }
 350
 351 static PyObject *
 352 string_repr(register PyStringObject *op)
 353 {
 354         size_t newsize = 2 + 4 * op->ob_size * sizeof(char);
 355         PyObject *v;
 356         if (newsize > INT_MAX) {
 357                 PyErr_SetString(PyExc_OverflowError,
 358                         "string is too large to make repr");
 359         }
 360         v = PyString_FromStringAndSize((char *)NULL, newsize);
 361         if (v == NULL) {
 362                 return NULL;
 363         }
 364         else {
 365                 register int i;
 366                 register char c;
 367                 register char *p;
 368                 int quote;
 369
 370                 /* figure out which quote to use; single is preferred */
 371                 quote = '\'';
 372                 if (strchr(op->ob_sval, '\'') && !strchr(op->ob_sval, '"'))
 373                         quote = '"';
 374
 375                 p = ((PyStringObject *)v)->ob_sval;
 376                 *p++ = quote;
 377                 for (i = 0; i < op->ob_size; i++) {
 378                         c = op->ob_sval[i];
 379                         if (c == quote || c == '\\')
 380                                 *p++ = '\\', *p++ = c;
 381                         else if (c < ' ' || c >= 0177) {
 382                                 sprintf(p, "\\%03o", c & 0377);
 383                                 while (*p != '\0')
 384                                         p++;
 385                         }
 386                         else
 387                                 *p++ = c;
 388                 }
 389                 *p++ = quote;
 390                 *p = '\0';
 391                 _PyString_Resize(
 392                         &v, (int) (p - ((PyStringObject *)v)->ob_sval));
 393                 return v;
 394         }
 395 }
 396
 397 static int
 398 string_length(PyStringObject *a)
 399 {
 400         return a->ob_size;
 401 }
 402
 403 static PyObject *
 404 string_concat(register PyStringObject *a, register PyObject *bb)
 405 {
 406         register unsigned int size;
 407         register PyStringObject *op;
 408         if (!PyString_Check(bb)) {
 409                 if (PyUnicode_Check(bb))
 410                     return PyUnicode_Concat((PyObject *)a, bb);
 411                 PyErr_Format(PyExc_TypeError,
 412                              "cannot add type \"%.200s\" to string",
 413                              bb->ob_type->tp_name);
 414                 return NULL;
 415         }
 416 #define b ((PyStringObject *)bb)
 417         /* Optimize cases with empty left or right operand */
 418         if (a->ob_size == 0) {
 419                 Py_INCREF(bb);
 420                 return bb;
 421         }
 422         if (b->ob_size == 0) {
 423                 Py_INCREF(a);
 424                 return (PyObject *)a;
 425         }
 426         size = a->ob_size + b->ob_size;
 427         /* PyObject_NewVar is inlined */
 428         op = (PyStringObject *)
 429                 PyObject_MALLOC(sizeof(PyStringObject) + size * sizeof(char));
 430         if (op == NULL)
 431                 return PyErr_NoMemory();
 432         PyObject_INIT_VAR(op, &PyString_Type, size);
 433 #ifdef CACHE_HASH
 434         op->ob_shash = -1;
 435 #endif
 436 #ifdef INTERN_STRINGS
 437         op->ob_sinterned = NULL;
 438 #endif
 439         memcpy(op->ob_sval, a->ob_sval, (int) a->ob_size);
 440         memcpy(op->ob_sval + a->ob_size, b->ob_sval, (int) b->ob_size);
 441         op->ob_sval[size] = '\0';
 442         return (PyObject *) op;
 443 #undef b
 444 }
 445
 446 static PyObject *
 447 string_repeat(register PyStringObject *a, register int n)
 448 {
 449         register int i;
 450         register int size;
 451         register PyStringObject *op;
 452         size_t nbytes;
 453         if (n < 0)
 454                 n = 0;
 455         /* watch out for overflows:  the size can overflow int,
 456          * and the # of bytes needed can overflow size_t
 457          */
 458         size = a->ob_size * n;
 459         if (n && size / n != a->ob_size) {
 460                 PyErr_SetString(PyExc_OverflowError,
 461                         "repeated string is too long");
 462                 return NULL;
 463         }
 464         if (size == a->ob_size) {
 465                 Py_INCREF(a);
 466                 return (PyObject *)a;
 467         }
 468         nbytes = size * sizeof(char);
 469         if (nbytes / sizeof(char) != (size_t)size ||
 470             nbytes + sizeof(PyStringObject) <= nbytes) {
 471                 PyErr_SetString(PyExc_OverflowError,
 472                         "repeated string is too long");
 473                 return NULL;
 474         }
 475         op = (PyStringObject *)
 476                 PyObject_MALLOC(sizeof(PyStringObject) + nbytes);
 477         if (op == NULL)
 478                 return PyErr_NoMemory();
 479         PyObject_INIT_VAR(op, &PyString_Type, size);
 480 #ifdef CACHE_HASH
 481         op->ob_shash = -1;
 482 #endif
 483 #ifdef INTERN_STRINGS
 484         op->ob_sinterned = NULL;
 485 #endif
 486         for (i = 0; i < size; i += a->ob_size)
 487                 memcpy(op->ob_sval+i, a->ob_sval, (int) a->ob_size);
 488         op->ob_sval[size] = '\0';
 489         return (PyObject *) op;
 490 }
 491
 492 /* String slice a[i:j] consists of characters a[i] ... a[j-1] */
 493
 494 static PyObject *
 495 string_slice(register PyStringObject *a, register int i, register int j)
 496      /* j -- may be negative! */
 497 {
 498         if (i < 0)
 499                 i = 0;
 500         if (j < 0)
 501                 j = 0; /* Avoid signed/unsigned bug in next line */
 502         if (j > a->ob_size)
 503                 j = a->ob_size;
 504         if (i == 0 && j == a->ob_size) { /* It's the same as a */
 505                 Py_INCREF(a);
 506                 return (PyObject *)a;
 507         }
 508         if (j < i)
 509                 j = i;
 510         return PyString_FromStringAndSize(a->ob_sval + i, (int) (j-i));
 511 }
 512
 513 static int
 514 string_contains(PyObject *a, PyObject *el)
 515 {
 516         register char *s, *end;
 517         register char c;
 518         if (PyUnicode_Check(el))
 519                 return PyUnicode_Contains(a, el);
 520         if (!PyString_Check(el) || PyString_Size(el) != 1) {
 521                 PyErr_SetString(PyExc_TypeError,
 522                     "'in <string>' requires character as left operand");
 523                 return -1;
 524         }
 525         c = PyString_AsString(el)[0];
 526         s = PyString_AsString(a);
 527         end = s + PyString_Size(a);
 528         while (s < end) {
 529                 if (c == *s++)
 530                         return 1;
 531         }
 532         return 0;
 533 }
 534
 535 static PyObject *
 536 string_item(PyStringObject *a, register int i)
 537 {
 538         int c;
 539         PyObject *v;
 540         if (i < 0 || i >= a->ob_size) {
 541                 PyErr_SetString(PyExc_IndexError, "string index out of range");
 542                 return NULL;
 543         }
 544         c = a->ob_sval[i] & UCHAR_MAX;
 545         v = (PyObject *) characters[c];
 546 #ifdef COUNT_ALLOCS
 547         if (v != NULL)
 548                 one_strings++;
 549 #endif
 550         if (v == NULL) {
 551                 v = PyString_FromStringAndSize((char *)NULL, 1);
 552                 if (v == NULL)
 553                         return NULL;
 554                 characters[c] = (PyStringObject *) v;
 555                 ((PyStringObject *)v)->ob_sval[0] = c;
 556         }
 557         Py_INCREF(v);
 558         return v;
 559 }
 560
 561 static int
 562 string_compare(PyStringObject *a, PyStringObject *b)
 563 {
 564         int len_a = a->ob_size, len_b = b->ob_size;
 565         int min_len = (len_a < len_b) ? len_a : len_b;
 566         int cmp;
 567         if (min_len > 0) {
 568                 cmp = Py_CHARMASK(*a->ob_sval) - Py_CHARMASK(*b->ob_sval);
 569                 if (cmp == 0)
 570                         cmp = memcmp(a->ob_sval, b->ob_sval, min_len);
 571                 if (cmp != 0)
 572                         return cmp;
 573         }
 574         return (len_a < len_b) ? -1 : (len_a > len_b) ? 1 : 0;
 575 }
 576
 577 static long
 578 string_hash(PyStringObject *a)
 579 {
 580         register int len;
 581         register unsigned char *p;
 582         register long x;
 583
 584 #ifdef CACHE_HASH
 585         if (a->ob_shash != -1)
 586                 return a->ob_shash;
 587 #ifdef INTERN_STRINGS
 588         if (a->ob_sinterned != NULL)
 589                 return (a->ob_shash =
 590                         ((PyStringObject *)(a->ob_sinterned))->ob_shash);
 591 #endif
 592 #endif
 593         len = a->ob_size;
 594         p = (unsigned char *) a->ob_sval;
 595         x = *p << 7;
 596         while (--len >= 0)
 597                 x = (1000003*x) ^ *p++;
 598         x ^= a->ob_size;
 599         if (x == -1)
 600                 x = -2;
 601 #ifdef CACHE_HASH
 602         a->ob_shash = x;
 603 #endif
 604         return x;
 605 }
 606
 607 static int
 608 string_buffer_getreadbuf(PyStringObject *self, int index, const void **ptr)
 609 {
 610         if ( index != 0 ) {
 611                 PyErr_SetString(PyExc_SystemError,
 612                                 "accessing non-existent string segment");
 613                 return -1;
 614         }
 615         *ptr = (void *)self->ob_sval;
 616         return self->ob_size;
 617 }
 618
 619 static int
 620 string_buffer_getwritebuf(PyStringObject *self, int index, const void **ptr)
 621 {
 622         PyErr_SetString(PyExc_TypeError,
 623                         "Cannot use string as modifiable buffer");
 624         return -1;
 625 }
 626
 627 static int
 628 string_buffer_getsegcount(PyStringObject *self, int *lenp)
 629 {
 630         if ( lenp )
 631                 *lenp = self->ob_size;
 632         return 1;
 633 }
 634
 635 static int
 636 string_buffer_getcharbuf(PyStringObject *self, int index, const char **ptr)
 637 {
 638         if ( index != 0 ) {
 639                 PyErr_SetString(PyExc_SystemError,
 640                                 "accessing non-existent string segment");
 641                 return -1;
 642         }
 643         *ptr = self->ob_sval;
 644         return self->ob_size;
 645 }
 646
 647 static PySequenceMethods string_as_sequence = {
 648         (inquiry)string_length, /*sq_length*/
 649         (binaryfunc)string_concat, /*sq_concat*/
 650         (intargfunc)string_repeat, /*sq_repeat*/
 651         (intargfunc)string_item, /*sq_item*/
 652         (intintargfunc)string_slice, /*sq_slice*/
 653         0,              /*sq_ass_item*/
 654         0,              /*sq_ass_slice*/
 655         (objobjproc)string_contains /*sq_contains*/
 656 };
 657
 658 static PyBufferProcs string_as_buffer = {
 659         (getreadbufferproc)string_buffer_getreadbuf,
 660         (getwritebufferproc)string_buffer_getwritebuf,
 661         (getsegcountproc)string_buffer_getsegcount,
 662         (getcharbufferproc)string_buffer_getcharbuf,
 663 };
 664
 665
 666 \f
 667 #define LEFTSTRIP 0
 668 #define RIGHTSTRIP 1
 669 #define BOTHSTRIP 2
 670
 671
 672 static PyObject *
 673 split_whitespace(const char *s, int len, int maxsplit)
 674 {
 675         int i, j, err;
 676         PyObject* item;
 677         PyObject *list = PyList_New(0);
 678
 679         if (list == NULL)
 680                 return NULL;
 681
 682         for (i = j = 0; i < len; ) {
 683                 while (i < len && isspace(Py_CHARMASK(s[i])))
 684                         i++;
 685                 j = i;
 686                 while (i < len && !isspace(Py_CHARMASK(s[i])))
 687                         i++;
 688                 if (j < i) {
 689                         if (maxsplit-- <= 0)
 690                                 break;
 691                         item = PyString_FromStringAndSize(s+j, (int)(i-j));
 692                         if (item == NULL)
 693                                 goto finally;
 694                         err = PyList_Append(list, item);
 695                         Py_DECREF(item);
 696                         if (err < 0)
 697                                 goto finally;
 698                         while (i < len && isspace(Py_CHARMASK(s[i])))
 699                                 i++;
 700                         j = i;
 701                 }
 702         }
 703         if (j < len) {
 704                 item = PyString_FromStringAndSize(s+j, (int)(len - j));
 705                 if (item == NULL)
 706                         goto finally;
 707                 err = PyList_Append(list, item);
 708                 Py_DECREF(item);
 709                 if (err < 0)
 710                         goto finally;
 711         }
 712         return list;
 713   finally:
 714         Py_DECREF(list);
 715         return NULL;
 716 }
 717
 718
 719 static char split__doc__[] =
 720 "S.split([sep [,maxsplit]]) -> list of strings\n\
 721 \n\
 722 Return a list of the words in the string S, using sep as the\n\
 723 delimiter string.  If maxsplit is given, at most maxsplit\n\
 724 splits are done. If sep is not specified, any whitespace string\n\
 725 is a separator.";
 726
 727 static PyObject *
 728 string_split(PyStringObject *self, PyObject *args)
 729 {
 730         int len = PyString_GET_SIZE(self), n, i, j, err;
 731         int maxsplit = -1;
 732         const char *s = PyString_AS_STRING(self), *sub;
 733         PyObject *list, *item, *subobj = Py_None;
 734
 735         if (!PyArg_ParseTuple(args, "|Oi:split", &subobj, &maxsplit))
 736                 return NULL;
 737         if (maxsplit < 0)
 738                 maxsplit = INT_MAX;
 739         if (subobj == Py_None)
 740                 return split_whitespace(s, len, maxsplit);
 741         if (PyString_Check(subobj)) {
 742                 sub = PyString_AS_STRING(subobj);
 743                 n = PyString_GET_SIZE(subobj);
 744         }
 745         else if (PyUnicode_Check(subobj))
 746                 return PyUnicode_Split((PyObject *)self, subobj, maxsplit);
 747         else if (PyObject_AsCharBuffer(subobj, &sub, &n))
 748                 return NULL;
 749         if (n == 0) {
 750                 PyErr_SetString(PyExc_ValueError, "empty separator");
 751                 return NULL;
 752         }
 753
 754         list = PyList_New(0);
 755         if (list == NULL)
 756                 return NULL;
 757
 758         i = j = 0;
 759         while (i+n <= len) {
 760                 if (s[i] == sub[0] && memcmp(s+i, sub, n) == 0) {
 761                         if (maxsplit-- <= 0)
 762                                 break;
 763                         item = PyString_FromStringAndSize(s+j, (int)(i-j));
 764                         if (item == NULL)
 765                                 goto fail;
 766                         err = PyList_Append(list, item);
 767                         Py_DECREF(item);
 768                         if (err < 0)
 769                                 goto fail;
 770                         i = j = i + n;
 771                 }
 772                 else
 773                         i++;
 774         }
 775         item = PyString_FromStringAndSize(s+j, (int)(len-j));
 776         if (item == NULL)
 777                 goto fail;
 778         err = PyList_Append(list, item);
 779         Py_DECREF(item);
 780         if (err < 0)
 781                 goto fail;
 782
 783         return list;
 784
 785  fail:
 786         Py_DECREF(list);
 787         return NULL;
 788 }
 789
 790
 791 static char join__doc__[] =
 792 "S.join(sequence) -> string\n\
 793 \n\
 794 Return a string which is the concatenation of the strings in the\n\
 795 sequence.  The separator between elements is S.";
 796
 797 static PyObject *
 798 string_join(PyStringObject *self, PyObject *args)
 799 {
 800         char *sep = PyString_AS_STRING(self);
 801         int seplen = PyString_GET_SIZE(self);
 802         PyObject *res = NULL;
 803         int reslen = 0;
 804         char *p;
 805         int seqlen = 0;
 806         int sz = 100;
 807         int i, slen, sz_incr;
 808         PyObject *orig, *seq, *item;
 809
 810         if (!PyArg_ParseTuple(args, "O:join", &orig))
 811                 return NULL;
 812
 813         if (!(seq = PySequence_Fast(orig, ""))) {
 814                 if (PyErr_ExceptionMatches(PyExc_TypeError))
 815                         PyErr_Format(PyExc_TypeError,
 816                                      "sequence expected, %.80s found",
 817                                      orig->ob_type->tp_name);
 818                 return NULL;
 819         }
 820         /* From here on out, errors go through finally: for proper
 821          * reference count manipulations.
 822          */
 823         seqlen = PySequence_Size(seq);
 824         if (seqlen == 1) {
 825                 item = PySequence_Fast_GET_ITEM(seq, 0);
 826                 Py_INCREF(item);
 827                 Py_DECREF(seq);
 828                 return item;
 829         }
 830
 831         if (!(res = PyString_FromStringAndSize((char*)NULL, sz)))
 832                 goto finally;
 833
 834         p = PyString_AS_STRING(res);
 835
 836         for (i = 0; i < seqlen; i++) {
 837                 item = PySequence_Fast_GET_ITEM(seq, i);
 838                 if (!PyString_Check(item)){
 839                         if (PyUnicode_Check(item)) {
 840                                 Py_DECREF(res);
 841                                 Py_DECREF(seq);
 842                                 return PyUnicode_Join((PyObject *)self, seq);
 843                         }
 844                         PyErr_Format(PyExc_TypeError,
 845                                      "sequence item %i: expected string,"
 846                                      " %.80s found",
 847                                      i, item->ob_type->tp_name);
 848                         goto finally;
 849                 }
 850                 slen = PyString_GET_SIZE(item);
 851                 while (reslen + slen + seplen >= sz) {
 852                         /* at least double the size of the string */
 853                         sz_incr = slen + seplen > sz ? slen + seplen : sz;
 854                         if (_PyString_Resize(&res, sz + sz_incr)) {
 855                                 goto finally;
 856                         }
 857                         sz += sz_incr;
 858                         p = PyString_AS_STRING(res) + reslen;
 859                 }
 860                 if (i > 0) {
 861                         memcpy(p, sep, seplen);
 862                         p += seplen;
 863                         reslen += seplen;
 864                 }
 865                 memcpy(p, PyString_AS_STRING(item), slen);
 866                 p += slen;
 867                 reslen += slen;
 868         }
 869         if (_PyString_Resize(&res, reslen))
 870                 goto finally;
 871         Py_DECREF(seq);
 872         return res;
 873
 874   finally:
 875         Py_DECREF(seq);
 876         Py_XDECREF(res);
 877         return NULL;
 878 }
 879
 880
 881
 882 static long
 883 string_find_internal(PyStringObject *self, PyObject *args, int dir)
 884 {
 885         const char *s = PyString_AS_STRING(self), *sub;
 886         int len = PyString_GET_SIZE(self);
 887         int n, i = 0, last = INT_MAX;
 888         PyObject *subobj;
 889
 890         if (!PyArg_ParseTuple(args, "O|O&O&:find/rfind/index/rindex",
 891                 &subobj, _PyEval_SliceIndex, &i, _PyEval_SliceIndex, &last))
 892                 return -2;
 893         if (PyString_Check(subobj)) {
 894                 sub = PyString_AS_STRING(subobj);
 895                 n = PyString_GET_SIZE(subobj);
 896         }
 897         else if (PyUnicode_Check(subobj))
 898                 return PyUnicode_Find((PyObject *)self, subobj, i, last, 1);
 899         else if (PyObject_AsCharBuffer(subobj, &sub, &n))
 900                 return -2;
 901
 902         if (last > len)
 903                 last = len;
 904         if (last < 0)
 905                 last += len;
 906         if (last < 0)
 907                 last = 0;
 908         if (i < 0)
 909                 i += len;
 910         if (i < 0)
 911                 i = 0;
 912
 913         if (dir > 0) {
 914                 if (n == 0 && i <= last)
 915                         return (long)i;
 916                 last -= n;
 917                 for (; i <= last; ++i)
 918                         if (s[i] == sub[0] && memcmp(&s[i], sub, n) == 0)
 919                                 return (long)i;
 920         }
 921         else {
 922                 int j;
 923
 924                 if (n == 0 && i <= last)
 925                         return (long)last;
 926                 for (j = last-n; j >= i; --j)
 927                         if (s[j] == sub[0] && memcmp(&s[j], sub, n) == 0)
 928                                 return (long)j;
 929         }
 930
 931         return -1;
 932 }
 933
 934
 935 static char find__doc__[] =
 936 "S.find(sub [,start [,end]]) -> int\n\
 937 \n\
 938 Return the lowest index in S where substring sub is found,\n\
 939 such that sub is contained within s[start,end].  Optional\n\
 940 arguments start and end are interpreted as in slice notation.\n\
 941 \n\
 942 Return -1 on failure.";
 943
 944 static PyObject *
 945 string_find(PyStringObject *self, PyObject *args)
 946 {
 947         long result = string_find_internal(self, args, +1);
 948         if (result == -2)
 949                 return NULL;
 950         return PyInt_FromLong(result);
 951 }
 952
 953
 954 static char index__doc__[] =
 955 "S.index(sub [,start [,end]]) -> int\n\
 956 \n\
 957 Like S.find() but raise ValueError when the substring is not found.";
 958
 959 static PyObject *
 960 string_index(PyStringObject *self, PyObject *args)
 961 {
 962         long result = string_find_internal(self, args, +1);
 963         if (result == -2)
 964                 return NULL;
 965         if (result == -1) {
 966                 PyErr_SetString(PyExc_ValueError,
 967                                 "substring not found in string.index");
 968                 return NULL;
 969         }
 970         return PyInt_FromLong(result);
 971 }
 972
 973
 974 static char rfind__doc__[] =
 975 "S.rfind(sub [,start [,end]]) -> int\n\
 976 \n\
 977 Return the highest index in S where substring sub is found,\n\
 978 such that sub is contained within s[start,end].  Optional\n\
 979 arguments start and end are interpreted as in slice notation.\n\
 980 \n\
 981 Return -1 on failure.";
 982
 983 static PyObject *
 984 string_rfind(PyStringObject *self, PyObject *args)
 985 {
 986         long result = string_find_internal(self, args, -1);
 987         if (result == -2)
 988                 return NULL;
 989         return PyInt_FromLong(result);
 990 }
 991
 992
 993 static char rindex__doc__[] =
 994 "S.rindex(sub [,start [,end]]) -> int\n\
 995 \n\
 996 Like S.rfind() but raise ValueError when the substring is not found.";
 997
 998 static PyObject *
 999 string_rindex(PyStringObject *self, PyObject *args)
1000 {
1001         long result = string_find_internal(self, args, -1);
1002         if (result == -2)
1003                 return NULL;
1004         if (result == -1) {
1005                 PyErr_SetString(PyExc_ValueError,
1006                                 "substring not found in string.rindex");
1007                 return NULL;
1008         }
1009         return PyInt_FromLong(result);
1010 }
1011
1012
1013 static PyObject *
1014 do_strip(PyStringObject *self, PyObject *args, int striptype)
1015 {
1016         char *s = PyString_AS_STRING(self);
1017         int len = PyString_GET_SIZE(self), i, j;
1018
1019         if (!PyArg_ParseTuple(args, ":strip"))
1020                 return NULL;
1021
1022         i = 0;
1023         if (striptype != RIGHTSTRIP) {
1024                 while (i < len && isspace(Py_CHARMASK(s[i]))) {
1025                         i++;
1026                 }
1027         }
1028
1029         j = len;
1030         if (striptype != LEFTSTRIP) {
1031                 do {
1032                         j--;
1033                 } while (j >= i && isspace(Py_CHARMASK(s[j])));
1034                 j++;
1035         }
1036
1037         if (i == 0 && j == len) {
1038                 Py_INCREF(self);
1039                 return (PyObject*)self;
1040         }
1041         else
1042                 return PyString_FromStringAndSize(s+i, j-i);
1043 }
1044
1045
1046 static char strip__doc__[] =
1047 "S.strip() -> string\n\
1048 \n\
1049 Return a copy of the string S with leading and trailing\n\
1050 whitespace removed.";
1051
1052 static PyObject *
1053 string_strip(PyStringObject *self, PyObject *args)
1054 {
1055         return do_strip(self, args, BOTHSTRIP);
1056 }
1057
1058
1059 static char lstrip__doc__[] =
1060 "S.lstrip() -> string\n\
1061 \n\
1062 Return a copy of the string S with leading whitespace removed.";
1063
1064 static PyObject *
1065 string_lstrip(PyStringObject *self, PyObject *args)
1066 {
1067         return do_strip(self, args, LEFTSTRIP);
1068 }
1069
1070
1071 static char rstrip__doc__[] =
1072 "S.rstrip() -> string\n\
1073 \n\
1074 Return a copy of the string S with trailing whitespace removed.";
1075
1076 static PyObject *
1077 string_rstrip(PyStringObject *self, PyObject *args)
1078 {
1079         return do_strip(self, args, RIGHTSTRIP);
1080 }
1081
1082
1083 static char lower__doc__[] =
1084 "S.lower() -> string\n\
1085 \n\
1086 Return a copy of the string S converted to lowercase.";
1087
1088 static PyObject *
1089 string_lower(PyStringObject *self, PyObject *args)
1090 {
1091         char *s = PyString_AS_STRING(self), *s_new;
1092         int i, n = PyString_GET_SIZE(self);
1093         PyObject *new;
1094
1095         if (!PyArg_ParseTuple(args, ":lower"))
1096                 return NULL;
1097         new = PyString_FromStringAndSize(NULL, n);
1098         if (new == NULL)
1099                 return NULL;
1100         s_new = PyString_AsString(new);
1101         for (i = 0; i < n; i++) {
1102                 int c = Py_CHARMASK(*s++);
1103                 if (isupper(c)) {
1104                         *s_new = tolower(c);
1105                 } else
1106                         *s_new = c;
1107                 s_new++;
1108         }
1109         return new;
1110 }
1111
1112
1113 static char upper__doc__[] =
1114 "S.upper() -> string\n\
1115 \n\
1116 Return a copy of the string S converted to uppercase.";
1117
1118 static PyObject *
1119 string_upper(PyStringObject *self, PyObject *args)
1120 {
1121         char *s = PyString_AS_STRING(self), *s_new;
1122         int i, n = PyString_GET_SIZE(self);
1123         PyObject *new;
1124
1125         if (!PyArg_ParseTuple(args, ":upper"))
1126                 return NULL;
1127         new = PyString_FromStringAndSize(NULL, n);
1128         if (new == NULL)
1129                 return NULL;
1130         s_new = PyString_AsString(new);
1131         for (i = 0; i < n; i++) {
1132                 int c = Py_CHARMASK(*s++);
1133                 if (islower(c)) {
1134                         *s_new = toupper(c);
1135                 } else
1136                         *s_new = c;
1137                 s_new++;
1138         }
1139         return new;
1140 }
1141
1142
1143 static char title__doc__[] =
1144 "S.title() -> string\n\
1145 \n\
1146 Return a titlecased version of S, i.e. words start with uppercase\n\
1147 characters, all remaining cased characters have lowercase.";
1148
1149 static PyObject*
1150 string_title(PyUnicodeObject *self, PyObject *args)
1151 {
1152         char *s = PyString_AS_STRING(self), *s_new;
1153         int i, n = PyString_GET_SIZE(self);
1154         int previous_is_cased = 0;
1155         PyObject *new;
1156
1157         if (!PyArg_ParseTuple(args, ":title"))
1158                 return NULL;
1159         new = PyString_FromStringAndSize(NULL, n);
1160         if (new == NULL)
1161                 return NULL;
1162         s_new = PyString_AsString(new);
1163         for (i = 0; i < n; i++) {
1164                 int c = Py_CHARMASK(*s++);
1165                 if (islower(c)) {
1166                         if (!previous_is_cased)
1167                             c = toupper(c);
1168                         previous_is_cased = 1;
1169                 } else if (isupper(c)) {
1170                         if (previous_is_cased)
1171                             c = tolower(c);
1172                         previous_is_cased = 1;
1173                 } else
1174                         previous_is_cased = 0;
1175                 *s_new++ = c;
1176         }
1177         return new;
1178 }
1179
1180 static char capitalize__doc__[] =
1181 "S.capitalize() -> string\n\
1182 \n\
1183 Return a copy of the string S with only its first character\n\
1184 capitalized.";
1185
1186 static PyObject *
1187 string_capitalize(PyStringObject *self, PyObject *args)
1188 {
1189         char *s = PyString_AS_STRING(self), *s_new;
1190         int i, n = PyString_GET_SIZE(self);
1191         PyObject *new;
1192
1193         if (!PyArg_ParseTuple(args, ":capitalize"))
1194                 return NULL;
1195         new = PyString_FromStringAndSize(NULL, n);
1196         if (new == NULL)
1197                 return NULL;
1198         s_new = PyString_AsString(new);
1199         if (0 < n) {
1200                 int c = Py_CHARMASK(*s++);
1201                 if (islower(c))
1202                         *s_new = toupper(c);
1203                 else
1204                         *s_new = c;
1205                 s_new++;
1206         }
1207         for (i = 1; i < n; i++) {
1208                 int c = Py_CHARMASK(*s++);
1209                 if (isupper(c))
1210                         *s_new = tolower(c);
1211                 else
1212                         *s_new = c;
1213                 s_new++;
1214         }
1215         return new;
1216 }
1217
1218
1219 static char count__doc__[] =
1220 "S.count(sub[, start[, end]]) -> int\n\
1221 \n\
1222 Return the number of occurrences of substring sub in string\n\
1223 S[start:end].  Optional arguments start and end are\n\
1224 interpreted as in slice notation.";
1225
1226 static PyObject *
1227 string_count(PyStringObject *self, PyObject *args)
1228 {
1229         const char *s = PyString_AS_STRING(self), *sub;
1230         int len = PyString_GET_SIZE(self), n;
1231         int i = 0, last = INT_MAX;
1232         int m, r;
1233         PyObject *subobj;
1234
1235         if (!PyArg_ParseTuple(args, "O|O&O&:count", &subobj,
1236                 _PyEval_SliceIndex, &i, _PyEval_SliceIndex, &last))
1237                 return NULL;
1238
1239         if (PyString_Check(subobj)) {
1240                 sub = PyString_AS_STRING(subobj);
1241                 n = PyString_GET_SIZE(subobj);
1242         }
1243         else if (PyUnicode_Check(subobj))
1244                 return PyInt_FromLong(
1245                         PyUnicode_Count((PyObject *)self, subobj, i, last));
1246         else if (PyObject_AsCharBuffer(subobj, &sub, &n))
1247                 return NULL;
1248
1249         if (last > len)
1250                 last = len;
1251         if (last < 0)
1252                 last += len;
1253         if (last < 0)
1254                 last = 0;
1255         if (i < 0)
1256                 i += len;
1257         if (i < 0)
1258                 i = 0;
1259         m = last + 1 - n;
1260         if (n == 0)
1261                 return PyInt_FromLong((long) (m-i));
1262
1263         r = 0;
1264         while (i < m) {
1265                 if (!memcmp(s+i, sub, n)) {
1266                         r++;
1267                         i += n;
1268                 } else {
1269                         i++;
1270                 }
1271         }
1272         return PyInt_FromLong((long) r);
1273 }
1274
1275
1276 static char swapcase__doc__[] =
1277 "S.swapcase() -> string\n\
1278 \n\
1279 Return a copy of the string S with uppercase characters\n\
1280 converted to lowercase and vice versa.";
1281
1282 static PyObject *
1283 string_swapcase(PyStringObject *self, PyObject *args)
1284 {
1285         char *s = PyString_AS_STRING(self), *s_new;
1286         int i, n = PyString_GET_SIZE(self);
1287         PyObject *new;
1288
1289         if (!PyArg_ParseTuple(args, ":swapcase"))
1290                 return NULL;
1291         new = PyString_FromStringAndSize(NULL, n);
1292         if (new == NULL)
1293                 return NULL;
1294         s_new = PyString_AsString(new);
1295         for (i = 0; i < n; i++) {
1296                 int c = Py_CHARMASK(*s++);
1297                 if (islower(c)) {
1298                         *s_new = toupper(c);
1299                 }
1300                 else if (isupper(c)) {
1301                         *s_new = tolower(c);
1302                 }
1303                 else
1304                         *s_new = c;
1305                 s_new++;
1306         }
1307         return new;
1308 }
1309
1310
1311 static char translate__doc__[] =
1312 "S.translate(table [,deletechars]) -> string\n\
1313 \n\
1314 Return a copy of the string S, where all characters occurring\n\
1315 in the optional argument deletechars are removed, and the\n\
1316 remaining characters have been mapped through the given\n\
1317 translation table, which must be a string of length 256.";
1318
1319 static PyObject *
1320 string_translate(PyStringObject *self, PyObject *args)
1321 {
1322         register char *input, *output;
1323         register const char *table;
1324         register int i, c, changed = 0;
1325         PyObject *input_obj = (PyObject*)self;
1326         const char *table1, *output_start, *del_table=NULL;
1327         int inlen, tablen, dellen = 0;
1328         PyObject *result;
1329         int trans_table[256];
1330         PyObject *tableobj, *delobj = NULL;
1331
1332         if (!PyArg_ParseTuple(args, "O|O:translate",
1333                               &tableobj, &delobj))
1334                 return NULL;
1335
1336         if (PyString_Check(tableobj)) {
1337                 table1 = PyString_AS_STRING(tableobj);
1338                 tablen = PyString_GET_SIZE(tableobj);
1339         }
1340         else if (PyUnicode_Check(tableobj)) {
1341                 /* Unicode .translate() does not support the deletechars
1342                    parameter; instead a mapping to None will cause characters
1343                    to be deleted. */
1344                 if (delobj != NULL) {
1345                         PyErr_SetString(PyExc_TypeError,
1346                         "deletions are implemented differently for unicode");
1347                         return NULL;
1348                 }
1349                 return PyUnicode_Translate((PyObject *)self, tableobj, NULL);
1350         }
1351         else if (PyObject_AsCharBuffer(tableobj, &table1, &tablen))
1352                 return NULL;
1353
1354         if (delobj != NULL) {
1355                 if (PyString_Check(delobj)) {
1356                         del_table = PyString_AS_STRING(delobj);
1357                         dellen = PyString_GET_SIZE(delobj);
1358                 }
1359                 else if (PyUnicode_Check(delobj)) {
1360                         PyErr_SetString(PyExc_TypeError,
1361                         "deletions are implemented differently for unicode");
1362                         return NULL;
1363                 }
1364                 else if (PyObject_AsCharBuffer(delobj, &del_table, &dellen))
1365                         return NULL;
1366
1367                 if (tablen != 256) {
1368                         PyErr_SetString(PyExc_ValueError,
1369                           "translation table must be 256 characters long");
1370                         return NULL;
1371                 }
1372         }
1373         else {
1374                 del_table = NULL;
1375                 dellen = 0;
1376         }
1377
1378         table = table1;
1379         inlen = PyString_Size(input_obj);
1380         result = PyString_FromStringAndSize((char *)NULL, inlen);
1381         if (result == NULL)
1382                 return NULL;
1383         output_start = output = PyString_AsString(result);
1384         input = PyString_AsString(input_obj);
1385
1386         if (dellen == 0) {
1387                 /* If no deletions are required, use faster code */
1388                 for (i = inlen; --i >= 0; ) {
1389                         c = Py_CHARMASK(*input++);
1390                         if (Py_CHARMASK((*output++ = table[c])) != c)
1391                                 changed = 1;
1392                 }
1393                 if (changed)
1394                         return result;
1395                 Py_DECREF(result);
1396                 Py_INCREF(input_obj);
1397                 return input_obj;
1398         }
1399
1400         for (i = 0; i < 256; i++)
1401                 trans_table[i] = Py_CHARMASK(table[i]);
1402
1403         for (i = 0; i < dellen; i++)
1404                 trans_table[(int) Py_CHARMASK(del_table[i])] = -1;
1405
1406         for (i = inlen; --i >= 0; ) {
1407                 c = Py_CHARMASK(*input++);
1408                 if (trans_table[c] != -1)
1409                         if (Py_CHARMASK(*output++ = (char)trans_table[c]) == c)
1410                                 continue;
1411                 changed = 1;
1412         }
1413         if (!changed) {
1414                 Py_DECREF(result);
1415                 Py_INCREF(input_obj);
1416                 return input_obj;
1417         }
1418         /* Fix the size of the resulting string */
1419         if (inlen > 0 &&_PyString_Resize(&result, output-output_start))
1420                 return NULL;
1421         return result;
1422 }
1423
1424
1425 /* What follows is used for implementing replace().  Perry Stoll. */
1426
1427 /*
1428   mymemfind
1429
1430   strstr replacement for arbitrary blocks of memory.
1431
1432   Locates the first occurrence in the memory pointed to by MEM of the
1433   contents of memory pointed to by PAT.  Returns the index into MEM if
1434   found, or -1 if not found.  If len of PAT is greater than length of
1435   MEM, the function returns -1.
1436 */
1437 static int
1438 mymemfind(const char *mem, int len, const char *pat, int pat_len)
1439 {
1440         register int ii;
1441
1442         /* pattern can not occur in the last pat_len-1 chars */
1443         len -= pat_len;
1444
1445         for (ii = 0; ii <= len; ii++) {
1446                 if (mem[ii] == pat[0] && memcmp(&mem[ii], pat, pat_len) == 0) {
1447                         return ii;
1448                 }
1449         }
1450         return -1;
1451 }
1452
1453 /*
1454   mymemcnt
1455
1456    Return the number of distinct times PAT is found in MEM.
1457    meaning mem=1111 and pat==11 returns 2.
1458            mem=11111 and pat==11 also return 2.
1459  */
1460 static int
1461 mymemcnt(const char *mem, int len, const char *pat, int pat_len)
1462 {
1463         register int offset = 0;
1464         int nfound = 0;
1465
1466         while (len >= 0) {
1467                 offset = mymemfind(mem, len, pat, pat_len);
1468                 if (offset == -1)
1469                         break;
1470                 mem += offset + pat_len;
1471                 len -= offset + pat_len;
1472                 nfound++;
1473         }
1474         return nfound;
1475 }
1476
1477 /*
1478    mymemreplace
1479
1480    Return a string in which all occurrences of PAT in memory STR are
1481    replaced with SUB.
1482
1483    If length of PAT is less than length of STR or there are no occurrences
1484    of PAT in STR, then the original string is returned. Otherwise, a new
1485    string is allocated here and returned.
1486
1487    on return, out_len is:
1488        the length of output string, or
1489        -1 if the input string is returned, or
1490        unchanged if an error occurs (no memory).
1491
1492    return value is:
1493        the new string allocated locally, or
1494        NULL if an error occurred.
1495 */
1496 static char *
1497 mymemreplace(const char *str, int len,          /* input string */
1498              const char *pat, int pat_len,      /* pattern string to find */
1499              const char *sub, int sub_len,      /* substitution string */
1500              int count,                         /* number of replacements */
1501              int *out_len)
1502 {
1503         char *out_s;
1504         char *new_s;
1505         int nfound, offset, new_len;
1506
1507         if (len == 0 || pat_len > len)
1508                 goto return_same;
1509
1510         /* find length of output string */
1511         nfound = mymemcnt(str, len, pat, pat_len);
1512         if (count < 0)
1513                 count = INT_MAX;
1514         else if (nfound > count)
1515                 nfound = count;
1516         if (nfound == 0)
1517                 goto return_same;
1518         new_len = len + nfound*(sub_len - pat_len);
1519
1520         new_s = (char *)PyMem_MALLOC(new_len);
1521         if (new_s == NULL) return NULL;
1522
1523         *out_len = new_len;
1524         out_s = new_s;
1525
1526         while (len > 0) {
1527                 /* find index of next instance of pattern */
1528                 offset = mymemfind(str, len, pat, pat_len);
1529                 /* if not found,  break out of loop */
1530                 if (offset == -1) break;
1531
1532                 /* copy non matching part of input string */
1533                 memcpy(new_s, str, offset); /* copy part of str before pat */
1534                 str += offset + pat_len; /* move str past pattern */
1535                 len -= offset + pat_len; /* reduce length of str remaining */
1536
1537                 /* copy substitute into the output string */
1538                 new_s += offset; /* move new_s to dest for sub string */
1539                 memcpy(new_s, sub, sub_len); /* copy substring into new_s */
1540                 new_s += sub_len; /* offset new_s past sub string */
1541
1542                 /* break when we've done count replacements */
1543                 if (--count == 0) break;
1544         }
1545         /* copy any remaining values into output string */
1546         if (len > 0)
1547                 memcpy(new_s, str, len);
1548         return out_s;
1549
1550   return_same:
1551         *out_len = -1;
1552         return (char*)str;      /* have to cast away constness here */
1553 }
1554
1555
1556 static char replace__doc__[] =
1557 "S.replace (old, new[, maxsplit]) -> string\n\
1558 \n\
1559 Return a copy of string S with all occurrences of substring\n\
1560 old replaced by new.  If the optional argument maxsplit is\n\
1561 given, only the first maxsplit occurrences are replaced.";
1562
1563 static PyObject *
1564 string_replace(PyStringObject *self, PyObject *args)
1565 {
1566         const char *str = PyString_AS_STRING(self), *sub, *repl;
1567         char *new_s;
1568         int len = PyString_GET_SIZE(self), sub_len, repl_len, out_len;
1569         int count = -1;
1570         PyObject *new;
1571         PyObject *subobj, *replobj;
1572
1573         if (!PyArg_ParseTuple(args, "OO|i:replace",
1574                               &subobj, &replobj, &count))
1575                 return NULL;
1576
1577         if (PyString_Check(subobj)) {
1578                 sub = PyString_AS_STRING(subobj);
1579                 sub_len = PyString_GET_SIZE(subobj);
1580         }
1581         else if (PyUnicode_Check(subobj))
1582                 return PyUnicode_Replace((PyObject *)self,
1583                                          subobj, replobj, count);
1584         else if (PyObject_AsCharBuffer(subobj, &sub, &sub_len))
1585                 return NULL;
1586
1587         if (PyString_Check(replobj)) {
1588                 repl = PyString_AS_STRING(replobj);
1589                 repl_len = PyString_GET_SIZE(replobj);
1590         }
1591         else if (PyUnicode_Check(replobj))
1592                 return PyUnicode_Replace((PyObject *)self,
1593                                          subobj, replobj, count);
1594         else if (PyObject_AsCharBuffer(replobj, &repl, &repl_len))
1595                 return NULL;
1596
1597         if (sub_len <= 0) {
1598                 PyErr_SetString(PyExc_ValueError, "empty pattern string");
1599                 return NULL;
1600         }
1601         new_s = mymemreplace(str,len,sub,sub_len,repl,repl_len,count,&out_len);
1602         if (new_s == NULL) {
1603                 PyErr_NoMemory();
1604                 return NULL;
1605         }
1606         if (out_len == -1) {
1607                 /* we're returning another reference to self */
1608                 new = (PyObject*)self;
1609                 Py_INCREF(new);
1610         }
1611         else {
1612                 new = PyString_FromStringAndSize(new_s, out_len);
1613                 PyMem_FREE(new_s);
1614         }
1615         return new;
1616 }
1617
1618
1619 static char startswith__doc__[] =
1620 "S.startswith(prefix[, start[, end]]) -> int\n\
1621 \n\
1622 Return 1 if S starts with the specified prefix, otherwise return 0.  With\n\
1623 optional start, test S beginning at that position.  With optional end, stop\n\
1624 comparing S at that position.";
1625
1626 static PyObject *
1627 string_startswith(PyStringObject *self, PyObject *args)
1628 {
1629         const char* str = PyString_AS_STRING(self);
1630         int len = PyString_GET_SIZE(self);
1631         const char* prefix;
1632         int plen;
1633         int start = 0;
1634         int end = -1;
1635         PyObject *subobj;
1636
1637         if (!PyArg_ParseTuple(args, "O|O&O&:startswith", &subobj,
1638                 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
1639                 return NULL;
1640         if (PyString_Check(subobj)) {
1641                 prefix = PyString_AS_STRING(subobj);
1642                 plen = PyString_GET_SIZE(subobj);
1643         }
1644         else if (PyUnicode_Check(subobj))
1645                 return PyInt_FromLong(
1646                         PyUnicode_Tailmatch((PyObject *)self,
1647                                             subobj, start, end, -1));
1648         else if (PyObject_AsCharBuffer(subobj, &prefix, &plen))
1649                 return NULL;
1650
1651         /* adopt Java semantics for index out of range.  it is legal for
1652          * offset to be == plen, but this only returns true if prefix is
1653          * the empty string.
1654          */
1655         if (start < 0 || start+plen > len)
1656                 return PyInt_FromLong(0);
1657
1658         if (!memcmp(str+start, prefix, plen)) {
1659                 /* did the match end after the specified end? */
1660                 if (end < 0)
1661                         return PyInt_FromLong(1);
1662                 else if (end - start < plen)
1663                         return PyInt_FromLong(0);
1664                 else
1665                         return PyInt_FromLong(1);
1666         }
1667         else return PyInt_FromLong(0);
1668 }
1669
1670
1671 static char endswith__doc__[] =
1672 "S.endswith(suffix[, start[, end]]) -> int\n\
1673 \n\
1674 Return 1 if S ends with the specified suffix, otherwise return 0.  With\n\
1675 optional start, test S beginning at that position.  With optional end, stop\n\
1676 comparing S at that position.";
1677
1678 static PyObject *
1679 string_endswith(PyStringObject *self, PyObject *args)
1680 {
1681         const char* str = PyString_AS_STRING(self);
1682         int len = PyString_GET_SIZE(self);
1683         const char* suffix;
1684         int slen;
1685         int start = 0;
1686         int end = -1;
1687         int lower, upper;
1688         PyObject *subobj;
1689
1690         if (!PyArg_ParseTuple(args, "O|O&O&:endswith", &subobj,
1691                 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
1692                 return NULL;
1693         if (PyString_Check(subobj)) {
1694                 suffix = PyString_AS_STRING(subobj);
1695                 slen = PyString_GET_SIZE(subobj);
1696         }
1697         else if (PyUnicode_Check(subobj))
1698                 return PyInt_FromLong(
1699                         PyUnicode_Tailmatch((PyObject *)self,
1700                                             subobj, start, end, +1));
1701         else if (PyObject_AsCharBuffer(subobj, &suffix, &slen))
1702                 return NULL;
1703
1704         if (start < 0 || start > len || slen > len)
1705                 return PyInt_FromLong(0);
1706
1707         upper = (end >= 0 && end <= len) ? end : len;
1708         lower = (upper - slen) > start ? (upper - slen) : start;
1709
1710         if (upper-lower >= slen && !memcmp(str+lower, suffix, slen))
1711                 return PyInt_FromLong(1);
1712         else return PyInt_FromLong(0);
1713 }
1714
1715
1716 static char encode__doc__[] =
1717 "S.encode([encoding[,errors]]) -> string\n\
1718 \n\
1719 Return an encoded string version of S. Default encoding is the current\n\
1720 default string encoding. errors may be given to set a different error\n\
1721 handling scheme. Default is 'strict' meaning that encoding errors raise\n\
1722 a ValueError. Other possible values are 'ignore' and 'replace'.";
1723
1724 static PyObject *
1725 string_encode(PyStringObject *self, PyObject *args)
1726 {
1727     char *encoding = NULL;
1728     char *errors = NULL;
1729     if (!PyArg_ParseTuple(args, "|ss:encode", &encoding, &errors))
1730         return NULL;
1731     return PyString_AsEncodedString((PyObject *)self, encoding, errors);
1732 }
1733
1734
1735 static char expandtabs__doc__[] =
1736 "S.expandtabs([tabsize]) -> string\n\
1737 \n\
1738 Return a copy of S where all tab characters are expanded using spaces.\n\
1739 If tabsize is not given, a tab size of 8 characters is assumed.";
1740
1741 static PyObject*
1742 string_expandtabs(PyStringObject *self, PyObject *args)
1743 {
1744     const char *e, *p;
1745     char *q;
1746     int i, j;
1747     PyObject *u;
1748     int tabsize = 8;
1749
1750     if (!PyArg_ParseTuple(args, "|i:expandtabs", &tabsize))
1751         return NULL;
1752
1753     /* First pass: determine size of output string */
1754     i = j = 0;
1755     e = PyString_AS_STRING(self) + PyString_GET_SIZE(self);
1756     for (p = PyString_AS_STRING(self); p < e; p++)
1757         if (*p == '\t') {
1758             if (tabsize > 0)
1759                 j += tabsize - (j % tabsize);
1760         }
1761         else {
1762             j++;
1763             if (*p == '\n' || *p == '\r') {
1764                 i += j;
1765                 j = 0;
1766             }
1767         }
1768
1769     /* Second pass: create output string and fill it */
1770     u = PyString_FromStringAndSize(NULL, i + j);
1771     if (!u)
1772         return NULL;
1773
1774     j = 0;
1775     q = PyString_AS_STRING(u);
1776
1777     for (p = PyString_AS_STRING(self); p < e; p++)
1778         if (*p == '\t') {
1779             if (tabsize > 0) {
1780                 i = tabsize - (j % tabsize);
1781                 j += i;
1782                 while (i--)
1783                     *q++ = ' ';
1784             }
1785         }
1786         else {
1787             j++;
1788             *q++ = *p;
1789             if (*p == '\n' || *p == '\r')
1790                 j = 0;
1791         }
1792
1793     return u;
1794 }
1795
1796 static
1797 PyObject *pad(PyStringObject *self,
1798               int left,
1799               int right,
1800               char fill)
1801 {
1802     PyObject *u;
1803
1804     if (left < 0)
1805         left = 0;
1806     if (right < 0)
1807         right = 0;
1808
1809     if (left == 0 && right == 0) {
1810         Py_INCREF(self);
1811         return (PyObject *)self;
1812     }
1813
1814     u = PyString_FromStringAndSize(NULL,
1815                                    left + PyString_GET_SIZE(self) + right);
1816     if (u) {
1817         if (left)
1818             memset(PyString_AS_STRING(u), fill, left);
1819         memcpy(PyString_AS_STRING(u) + left,
1820                PyString_AS_STRING(self),
1821                PyString_GET_SIZE(self));
1822         if (right)
1823             memset(PyString_AS_STRING(u) + left + PyString_GET_SIZE(self),
1824                    fill, right);
1825     }
1826
1827     return u;
1828 }
1829
1830 static char ljust__doc__[] =
1831 "S.ljust(width) -> string\n\
1832 \n\
1833 Return S left justified in a string of length width. Padding is\n\
1834 done using spaces.";
1835
1836 static PyObject *
1837 string_ljust(PyStringObject *self, PyObject *args)
1838 {
1839     int width;
1840     if (!PyArg_ParseTuple(args, "i:ljust", &width))
1841         return NULL;
1842
1843     if (PyString_GET_SIZE(self) >= width) {
1844         Py_INCREF(self);
1845         return (PyObject*) self;
1846     }
1847
1848     return pad(self, 0, width - PyString_GET_SIZE(self), ' ');
1849 }
1850
1851
1852 static char rjust__doc__[] =
1853 "S.rjust(width) -> string\n\
1854 \n\
1855 Return S right justified in a string of length width. Padding is\n\
1856 done using spaces.";
1857
1858 static PyObject *
1859 string_rjust(PyStringObject *self, PyObject *args)
1860 {
1861     int width;
1862     if (!PyArg_ParseTuple(args, "i:rjust", &width))
1863         return NULL;
1864
1865     if (PyString_GET_SIZE(self) >= width) {
1866         Py_INCREF(self);
1867         return (PyObject*) self;
1868     }
1869
1870     return pad(self, width - PyString_GET_SIZE(self), 0, ' ');
1871 }
1872
1873
1874 static char center__doc__[] =
1875 "S.center(width) -> string\n\
1876 \n\
1877 Return S centered in a string of length width. Padding is done\n\
1878 using spaces.";
1879
1880 static PyObject *
1881 string_center(PyStringObject *self, PyObject *args)
1882 {
1883     int marg, left;
1884     int width;
1885
1886     if (!PyArg_ParseTuple(args, "i:center", &width))
1887         return NULL;
1888
1889     if (PyString_GET_SIZE(self) >= width) {
1890         Py_INCREF(self);
1891         return (PyObject*) self;
1892     }
1893
1894     marg = width - PyString_GET_SIZE(self);
1895     left = marg / 2 + (marg & width & 1);
1896
1897     return pad(self, left, marg - left, ' ');
1898 }
1899
1900 #if 0
1901 static char zfill__doc__[] =
1902 "S.zfill(width) -> string\n\
1903 \n\
1904 Pad a numeric string x with zeros on the left, to fill a field\n\
1905 of the specified width. The string x is never truncated.";
1906
1907 static PyObject *
1908 string_zfill(PyStringObject *self, PyObject *args)
1909 {
1910     int fill;
1911     PyObject *u;
1912     char *str;
1913
1914     int width;
1915     if (!PyArg_ParseTuple(args, "i:zfill", &width))
1916         return NULL;
1917
1918     if (PyString_GET_SIZE(self) >= width) {
1919         Py_INCREF(self);
1920         return (PyObject*) self;
1921     }
1922
1923     fill = width - PyString_GET_SIZE(self);
1924
1925     u = pad(self, fill, 0, '0');
1926     if (u == NULL)
1927         return NULL;
1928
1929     str = PyString_AS_STRING(u);
1930     if (str[fill] == '+' || str[fill] == '-') {
1931         /* move sign to beginning of string */
1932         str[0] = str[fill];
1933         str[fill] = '0';
1934     }
1935
1936     return u;
1937 }
1938 #endif
1939
1940 static char isspace__doc__[] =
1941 "S.isspace() -> int\n\
1942 \n\
1943 Return 1 if there are only whitespace characters in S,\n\
1944 0 otherwise.";
1945
1946 static PyObject*
1947 string_isspace(PyStringObject *self, PyObject *args)
1948 {
1949     register const unsigned char *p
1950         = (unsigned char *) PyString_AS_STRING(self);
1951     register const unsigned char *e;
1952
1953     if (!PyArg_NoArgs(args))
1954         return NULL;
1955
1956     /* Shortcut for single character strings */
1957     if (PyString_GET_SIZE(self) == 1 &&
1958         isspace(*p))
1959         return PyInt_FromLong(1);
1960
1961     /* Special case for empty strings */
1962     if (PyString_GET_SIZE(self) == 0)
1963         return PyInt_FromLong(0);
1964
1965     e = p + PyString_GET_SIZE(self);
1966     for (; p < e; p++) {
1967         if (!isspace(*p))
1968             return PyInt_FromLong(0);
1969     }
1970     return PyInt_FromLong(1);
1971 }
1972
1973
1974 static char isalpha__doc__[] =
1975 "S.isalpha() -> int\n\
1976 \n\
1977 Return 1 if  all characters in S are alphabetic\n\
1978 and there is at least one character in S, 0 otherwise.";
1979
1980 static PyObject*
1981 string_isalpha(PyUnicodeObject *self, PyObject *args)
1982 {
1983     register const unsigned char *p
1984         = (unsigned char *) PyString_AS_STRING(self);
1985     register const unsigned char *e;
1986
1987     if (!PyArg_NoArgs(args))
1988         return NULL;
1989
1990     /* Shortcut for single character strings */
1991     if (PyString_GET_SIZE(self) == 1 &&
1992         isalpha(*p))
1993         return PyInt_FromLong(1);
1994
1995     /* Special case for empty strings */
1996     if (PyString_GET_SIZE(self) == 0)
1997         return PyInt_FromLong(0);
1998
1999     e = p + PyString_GET_SIZE(self);
2000     for (; p < e; p++) {
2001         if (!isalpha(*p))
2002             return PyInt_FromLong(0);
2003     }
2004     return PyInt_FromLong(1);
2005 }
2006
2007
2008 static char isalnum__doc__[] =
2009 "S.isalnum() -> int\n\
2010 \n\
2011 Return 1 if  all characters in S are alphanumeric\n\
2012 and there is at least one character in S, 0 otherwise.";
2013
2014 static PyObject*
2015 string_isalnum(PyUnicodeObject *self, PyObject *args)
2016 {
2017     register const unsigned char *p
2018         = (unsigned char *) PyString_AS_STRING(self);
2019     register const unsigned char *e;
2020
2021     if (!PyArg_NoArgs(args))
2022         return NULL;
2023
2024     /* Shortcut for single character strings */
2025     if (PyString_GET_SIZE(self) == 1 &&
2026         isalnum(*p))
2027         return PyInt_FromLong(1);
2028
2029     /* Special case for empty strings */
2030     if (PyString_GET_SIZE(self) == 0)
2031         return PyInt_FromLong(0);
2032
2033     e = p + PyString_GET_SIZE(self);
2034     for (; p < e; p++) {
2035         if (!isalnum(*p))
2036             return PyInt_FromLong(0);
2037     }
2038     return PyInt_FromLong(1);
2039 }
2040
2041
2042 static char isdigit__doc__[] =
2043 "S.isdigit() -> int\n\
2044 \n\
2045 Return 1 if there are only digit characters in S,\n\
2046 0 otherwise.";
2047
2048 static PyObject*
2049 string_isdigit(PyStringObject *self, PyObject *args)
2050 {
2051     register const unsigned char *p
2052         = (unsigned char *) PyString_AS_STRING(self);
2053     register const unsigned char *e;
2054
2055     if (!PyArg_NoArgs(args))
2056         return NULL;
2057
2058     /* Shortcut for single character strings */
2059     if (PyString_GET_SIZE(self) == 1 &&
2060         isdigit(*p))
2061         return PyInt_FromLong(1);
2062
2063     /* Special case for empty strings */
2064     if (PyString_GET_SIZE(self) == 0)
2065         return PyInt_FromLong(0);
2066
2067     e = p + PyString_GET_SIZE(self);
2068     for (; p < e; p++) {
2069         if (!isdigit(*p))
2070             return PyInt_FromLong(0);
2071     }
2072     return PyInt_FromLong(1);
2073 }
2074
2075
2076 static char islower__doc__[] =
2077 "S.islower() -> int\n\
2078 \n\
2079 Return 1 if  all cased characters in S are lowercase and there is\n\
2080 at least one cased character in S, 0 otherwise.";
2081
2082 static PyObject*
2083 string_islower(PyStringObject *self, PyObject *args)
2084 {
2085     register const unsigned char *p
2086         = (unsigned char *) PyString_AS_STRING(self);
2087     register const unsigned char *e;
2088     int cased;
2089
2090     if (!PyArg_NoArgs(args))
2091         return NULL;
2092
2093     /* Shortcut for single character strings */
2094     if (PyString_GET_SIZE(self) == 1)
2095         return PyInt_FromLong(islower(*p) != 0);
2096
2097     /* Special case for empty strings */
2098     if (PyString_GET_SIZE(self) == 0)
2099         return PyInt_FromLong(0);
2100
2101     e = p + PyString_GET_SIZE(self);
2102     cased = 0;
2103     for (; p < e; p++) {
2104         if (isupper(*p))
2105             return PyInt_FromLong(0);
2106         else if (!cased && islower(*p))
2107             cased = 1;
2108     }
2109     return PyInt_FromLong(cased);
2110 }
2111
2112
2113 static char isupper__doc__[] =
2114 "S.isupper() -> int\n\
2115 \n\
2116 Return 1 if  all cased characters in S are uppercase and there is\n\
2117 at least one cased character in S, 0 otherwise.";
2118
2119 static PyObject*
2120 string_isupper(PyStringObject *self, PyObject *args)
2121 {
2122     register const unsigned char *p
2123         = (unsigned char *) PyString_AS_STRING(self);
2124     register const unsigned char *e;
2125     int cased;
2126
2127     if (!PyArg_NoArgs(args))
2128         return NULL;
2129
2130     /* Shortcut for single character strings */
2131     if (PyString_GET_SIZE(self) == 1)
2132         return PyInt_FromLong(isupper(*p) != 0);
2133
2134     /* Special case for empty strings */
2135     if (PyString_GET_SIZE(self) == 0)
2136         return PyInt_FromLong(0);
2137
2138     e = p + PyString_GET_SIZE(self);
2139     cased = 0;
2140     for (; p < e; p++) {
2141         if (islower(*p))
2142             return PyInt_FromLong(0);
2143         else if (!cased && isupper(*p))
2144             cased = 1;
2145     }
2146     return PyInt_FromLong(cased);
2147 }
2148
2149
2150 static char istitle__doc__[] =
2151 "S.istitle() -> int\n\
2152 \n\
2153 Return 1 if S is a titlecased string, i.e. uppercase characters\n\
2154 may only follow uncased characters and lowercase characters only cased\n\
2155 ones. Return 0 otherwise.";
2156
2157 static PyObject*
2158 string_istitle(PyStringObject *self, PyObject *args)
2159 {
2160     register const unsigned char *p
2161         = (unsigned char *) PyString_AS_STRING(self);
2162     register const unsigned char *e;
2163     int cased, previous_is_cased;
2164
2165     if (!PyArg_NoArgs(args))
2166         return NULL;
2167
2168     /* Shortcut for single character strings */
2169     if (PyString_GET_SIZE(self) == 1)
2170         return PyInt_FromLong(isupper(*p) != 0);
2171
2172     /* Special case for empty strings */
2173     if (PyString_GET_SIZE(self) == 0)
2174         return PyInt_FromLong(0);
2175
2176     e = p + PyString_GET_SIZE(self);
2177     cased = 0;
2178     previous_is_cased = 0;
2179     for (; p < e; p++) {
2180         register const unsigned char ch = *p;
2181
2182         if (isupper(ch)) {
2183             if (previous_is_cased)
2184                 return PyInt_FromLong(0);
2185             previous_is_cased = 1;
2186             cased = 1;
2187         }
2188         else if (islower(ch)) {
2189             if (!previous_is_cased)
2190                 return PyInt_FromLong(0);
2191             previous_is_cased = 1;
2192             cased = 1;
2193         }
2194         else
2195             previous_is_cased = 0;
2196     }
2197     return PyInt_FromLong(cased);
2198 }
2199
2200
2201 static char splitlines__doc__[] =
2202 "S.splitlines([keepends]]) -> list of strings\n\
2203 \n\
2204 Return a list of the lines in S, breaking at line boundaries.\n\
2205 Line breaks are not included in the resulting list unless keepends\n\
2206 is given and true.";
2207
2208 #define SPLIT_APPEND(data, left, right)                                 \
2209         str = PyString_FromStringAndSize(data + left, right - left);    \
2210         if (!str)                                                       \
2211             goto onError;                                               \
2212         if (PyList_Append(list, str)) {                                 \
2213             Py_DECREF(str);                                             \
2214             goto onError;                                               \
2215         }                                                               \
2216         else                                                            \
2217             Py_DECREF(str);
2218
2219 static PyObject*
2220 string_splitlines(PyStringObject *self, PyObject *args)
2221 {
2222     register int i;
2223     register int j;
2224     int len;
2225     int keepends = 0;
2226     PyObject *list;
2227     PyObject *str;
2228     char *data;
2229
2230     if (!PyArg_ParseTuple(args, "|i:splitlines", &keepends))
2231         return NULL;
2232
2233     data = PyString_AS_STRING(self);
2234     len = PyString_GET_SIZE(self);
2235
2236     list = PyList_New(0);
2237     if (!list)
2238         goto onError;
2239
2240     for (i = j = 0; i < len; ) {
2241         int eol;
2242
2243         /* Find a line and append it */
2244         while (i < len && data[i] != '\n' && data[i] != '\r')
2245             i++;
2246
2247         /* Skip the line break reading CRLF as one line break */
2248         eol = i;
2249         if (i < len) {
2250             if (data[i] == '\r' && i + 1 < len &&
2251                 data[i+1] == '\n')
2252                 i += 2;
2253             else
2254                 i++;
2255             if (keepends)
2256                 eol = i;
2257         }
2258         SPLIT_APPEND(data, j, eol);
2259         j = i;
2260     }
2261     if (j < len) {
2262         SPLIT_APPEND(data, j, len);
2263     }
2264
2265     return list;
2266
2267  onError:
2268     Py_DECREF(list);
2269     return NULL;
2270 }
2271
2272 #undef SPLIT_APPEND
2273
2274 \f
2275 static PyMethodDef
2276 string_methods[] = {
2277         /* Counterparts of the obsolete stropmodule functions; except
2278            string.maketrans(). */
2279         {"join",       (PyCFunction)string_join,       1, join__doc__},
2280         {"split",       (PyCFunction)string_split,       1, split__doc__},
2281         {"lower",      (PyCFunction)string_lower,      1, lower__doc__},
2282         {"upper",       (PyCFunction)string_upper,       1, upper__doc__},
2283         {"islower", (PyCFunction)string_islower, 0, islower__doc__},
2284         {"isupper", (PyCFunction)string_isupper, 0, isupper__doc__},
2285         {"isspace", (PyCFunction)string_isspace, 0, isspace__doc__},
2286         {"isdigit", (PyCFunction)string_isdigit, 0, isdigit__doc__},
2287         {"istitle", (PyCFunction)string_istitle, 0, istitle__doc__},
2288         {"isalpha", (PyCFunction)string_isalpha, 0, isalpha__doc__},
2289         {"isalnum", (PyCFunction)string_isalnum, 0, isalnum__doc__},
2290         {"capitalize", (PyCFunction)string_capitalize, 1, capitalize__doc__},
2291         {"count",      (PyCFunction)string_count,      1, count__doc__},
2292         {"endswith",   (PyCFunction)string_endswith,   1, endswith__doc__},
2293         {"find",       (PyCFunction)string_find,       1, find__doc__},
2294         {"index",      (PyCFunction)string_index,      1, index__doc__},
2295         {"lstrip",     (PyCFunction)string_lstrip,     1, lstrip__doc__},
2296         {"replace",     (PyCFunction)string_replace,     1, replace__doc__},
2297         {"rfind",       (PyCFunction)string_rfind,       1, rfind__doc__},
2298         {"rindex",      (PyCFunction)string_rindex,      1, rindex__doc__},
2299         {"rstrip",      (PyCFunction)string_rstrip,      1, rstrip__doc__},
2300         {"startswith",  (PyCFunction)string_startswith,  1, startswith__doc__},
2301         {"strip",       (PyCFunction)string_strip,       1, strip__doc__},
2302         {"swapcase",    (PyCFunction)string_swapcase,    1, swapcase__doc__},
2303         {"translate",   (PyCFunction)string_translate,   1, translate__doc__},
2304         {"title",       (PyCFunction)string_title,       1, title__doc__},
2305         {"ljust",       (PyCFunction)string_ljust,       1, ljust__doc__},
2306         {"rjust",       (PyCFunction)string_rjust,       1, rjust__doc__},
2307         {"center",      (PyCFunction)string_center,      1, center__doc__},
2308         {"encode",      (PyCFunction)string_encode,      1, encode__doc__},
2309         {"expandtabs",  (PyCFunction)string_expandtabs,  1, expandtabs__doc__},
2310         {"splitlines",  (PyCFunction)string_splitlines,  1, splitlines__doc__},
2311 #if 0
2312         {"zfill",       (PyCFunction)string_zfill,       1, zfill__doc__},
2313 #endif
2314         {NULL,     NULL}                     /* sentinel */
2315 };
2316
2317 static PyObject *
2318 string_getattr(PyStringObject *s, char *name)
2319 {
2320         return Py_FindMethod(string_methods, (PyObject*)s, name);
2321 }
2322
2323
2324 PyTypeObject PyString_Type = {
2325         PyObject_HEAD_INIT(&PyType_Type)
2326         0,
2327         "string",
2328         sizeof(PyStringObject),
2329         sizeof(char),
2330         (destructor)string_dealloc, /*tp_dealloc*/
2331         (printfunc)string_print, /*tp_print*/
2332         (getattrfunc)string_getattr,            /*tp_getattr*/
2333         0,              /*tp_setattr*/
2334         (cmpfunc)string_compare, /*tp_compare*/
2335         (reprfunc)string_repr, /*tp_repr*/
2336         0,              /*tp_as_number*/
2337         &string_as_sequence,    /*tp_as_sequence*/
2338         0,              /*tp_as_mapping*/
2339         (hashfunc)string_hash, /*tp_hash*/
2340         0,              /*tp_call*/
2341         0,              /*tp_str*/
2342         0,              /*tp_getattro*/
2343         0,              /*tp_setattro*/
2344         &string_as_buffer,      /*tp_as_buffer*/
2345         Py_TPFLAGS_DEFAULT,     /*tp_flags*/
2346         0,              /*tp_doc*/
2347 };
2348
2349 void
2350 PyString_Concat(register PyObject **pv, register PyObject *w)
2351 {
2352         register PyObject *v;
2353         if (*pv == NULL)
2354                 return;
2355         if (w == NULL || !PyString_Check(*pv)) {
2356                 Py_DECREF(*pv);
2357                 *pv = NULL;
2358                 return;
2359         }
2360         v = string_concat((PyStringObject *) *pv, w);
2361         Py_DECREF(*pv);
2362         *pv = v;
2363 }
2364
2365 void
2366 PyString_ConcatAndDel(register PyObject **pv, register PyObject *w)
2367 {
2368         PyString_Concat(pv, w);
2369         Py_XDECREF(w);
2370 }
2371
2372
2373 /* The following function breaks the notion that strings are immutable:
2374    it changes the size of a string.  We get away with this only if there
2375    is only one module referencing the object.  You can also think of it
2376    as creating a new string object and destroying the old one, only
2377    more efficiently.  In any case, don't use this if the string may
2378    already be known to some other part of the code... */
2379
2380 int
2381 _PyString_Resize(PyObject **pv, int newsize)
2382 {
2383         register PyObject *v;
2384         register PyStringObject *sv;
2385         v = *pv;
2386         if (!PyString_Check(v) || v->ob_refcnt != 1) {
2387                 *pv = 0;
2388                 Py_DECREF(v);
2389                 PyErr_BadInternalCall();
2390                 return -1;
2391         }
2392         /* XXX UNREF/NEWREF interface should be more symmetrical */
2393 #ifdef Py_REF_DEBUG
2394         --_Py_RefTotal;
2395 #endif
2396         _Py_ForgetReference(v);
2397         *pv = (PyObject *)
2398                 PyObject_REALLOC((char *)v,
2399                         sizeof(PyStringObject) + newsize * sizeof(char));
2400         if (*pv == NULL) {
2401                 PyObject_DEL(v);
2402                 PyErr_NoMemory();
2403                 return -1;
2404         }
2405         _Py_NewReference(*pv);
2406         sv = (PyStringObject *) *pv;
2407         sv->ob_size = newsize;
2408         sv->ob_sval[newsize] = '\0';
2409         return 0;
2410 }
2411
2412 /* Helpers for formatstring */
2413
2414 static PyObject *
2415 getnextarg(PyObject *args, int arglen, int *p_argidx)
2416 {
2417         int argidx = *p_argidx;
2418         if (argidx < arglen) {
2419                 (*p_argidx)++;
2420                 if (arglen < 0)
2421                         return args;
2422                 else
2423                         return PyTuple_GetItem(args, argidx);
2424         }
2425         PyErr_SetString(PyExc_TypeError,
2426                         "not enough arguments for format string");
2427         return NULL;
2428 }
2429
2430 /* Format codes
2431  * F_LJUST      '-'
2432  * F_SIGN       '+'
2433  * F_BLANK      ' '
2434  * F_ALT        '#'
2435  * F_ZERO       '0'
2436  */
2437 #define F_LJUST (1<<0)
2438 #define F_SIGN  (1<<1)
2439 #define F_BLANK (1<<2)
2440 #define F_ALT   (1<<3)
2441 #define F_ZERO  (1<<4)
2442
2443 static int
2444 formatfloat(char *buf, size_t buflen, int flags,
2445             int prec, int type, PyObject *v)
2446 {
2447         /* fmt = '%#.' + `prec` + `type`
2448            worst case length = 3 + 10 (len of INT_MAX) + 1 = 14 (use 20)*/
2449         char fmt[20];
2450         double x;
2451         if (!PyArg_Parse(v, "d;float argument required", &x))
2452                 return -1;
2453         if (prec < 0)
2454                 prec = 6;
2455         if (type == 'f' && fabs(x)/1e25 >= 1e25)
2456                 type = 'g';
2457         sprintf(fmt, "%%%s.%d%c", (flags&F_ALT) ? "#" : "", prec, type);
2458         /* worst case length calc to ensure no buffer overrun:
2459              fmt = %#.<prec>g
2460              buf = '-' + [0-9]*prec + '.' + 'e+' + (longest exp
2461                 for any double rep.)
2462              len = 1 + prec + 1 + 2 + 5 = 9 + prec
2463            If prec=0 the effective precision is 1 (the leading digit is
2464            always given), therefore increase by one to 10+prec. */
2465         if (buflen <= (size_t)10 + (size_t)prec) {
2466                 PyErr_SetString(PyExc_OverflowError,
2467                         "formatted float is too long (precision too long?)");
2468                 return -1;
2469         }
2470         sprintf(buf, fmt, x);
2471         return strlen(buf);
2472 }
2473
2474 /* _PyString_FormatLong emulates the format codes d, u, o, x and X, and
2475  * the F_ALT flag, for Python's long (unbounded) ints.  It's not used for
2476  * Python's regular ints.
2477  * Return value:  a new PyString*, or NULL if error.
2478  *  .  *pbuf is set to point into it,
2479  *     *plen set to the # of chars following that.
2480  *     Caller must decref it when done using pbuf.
2481  *     The string starting at *pbuf is of the form
2482  *         "-"? ("0x" | "0X")? digit+
2483  *     "0x"/"0X" are present only for x and X conversions, with F_ALT
2484  *         set in flags.  The case of hex digits will be correct,
2485  *     There will be at least prec digits, zero-filled on the left if
2486  *         necessary to get that many.
2487  * val          object to be converted
2488  * flags        bitmask of format flags; only F_ALT is looked at
2489  * prec         minimum number of digits; 0-fill on left if needed
2490  * type         a character in [duoxX]; u acts the same as d
2491  *
2492  * CAUTION:  o, x and X conversions on regular ints can never
2493  * produce a '-' sign, but can for Python's unbounded ints.
2494  */
2495 PyObject*
2496 _PyString_FormatLong(PyObject *val, int flags, int prec, int type,
2497                      char **pbuf, int *plen)
2498 {
2499         PyObject *result = NULL;
2500         char *buf;
2501         int i;
2502         int sign;       /* 1 if '-', else 0 */
2503         int len;        /* number of characters */
2504         int numdigits;  /* len == numnondigits + numdigits */
2505         int numnondigits = 0;
2506
2507         switch (type) {
2508         case 'd':
2509         case 'u':
2510                 result = val->ob_type->tp_str(val);
2511                 break;
2512         case 'o':
2513                 result = val->ob_type->tp_as_number->nb_oct(val);
2514                 break;
2515         case 'x':
2516         case 'X':
2517                 numnondigits = 2;
2518                 result = val->ob_type->tp_as_number->nb_hex(val);
2519                 break;
2520         default:
2521                 assert(!"'type' not in [duoxX]");
2522         }
2523         if (!result)
2524                 return NULL;
2525
2526         /* To modify the string in-place, there can only be one reference. */
2527         if (result->ob_refcnt != 1) {
2528                 PyErr_BadInternalCall();
2529                 return NULL;
2530         }
2531         buf = PyString_AsString(result);
2532         len = PyString_Size(result);
2533         if (buf[len-1] == 'L') {
2534                 --len;
2535                 buf[len] = '\0';
2536         }
2537         sign = buf[0] == '-';
2538         numnondigits += sign;
2539         numdigits = len - numnondigits;
2540         assert(numdigits > 0);
2541
2542         /* Get rid of base marker unless F_ALT */
2543         if ((flags & F_ALT) == 0) {
2544                 /* Need to skip 0x, 0X or 0. */
2545                 int skipped = 0;
2546                 switch (type) {
2547                 case 'o':
2548                         assert(buf[sign] == '0');
2549                         /* If 0 is only digit, leave it alone. */
2550                         if (numdigits > 1) {
2551                                 skipped = 1;
2552                                 --numdigits;
2553                         }
2554                         break;
2555                 case 'x':
2556                 case 'X':
2557                         assert(buf[sign] == '0');
2558                         assert(buf[sign + 1] == 'x');
2559                         skipped = 2;
2560                         numnondigits -= 2;
2561                         break;
2562                 }
2563                 if (skipped) {
2564                         buf += skipped;
2565                         len -= skipped;
2566                         if (sign)
2567                                 buf[0] = '-';
2568                 }
2569                 assert(len == numnondigits + numdigits);
2570                 assert(numdigits > 0);
2571         }
2572
2573         /* Fill with leading zeroes to meet minimum width. */
2574         if (prec > numdigits) {
2575                 PyObject *r1 = PyString_FromStringAndSize(NULL,
2576                                         numnondigits + prec);
2577                 char *b1;
2578                 if (!r1) {
2579                         Py_DECREF(result);
2580                         return NULL;
2581                 }
2582                 b1 = PyString_AS_STRING(r1);
2583                 for (i = 0; i < numnondigits; ++i)
2584                         *b1++ = *buf++;
2585                 for (i = 0; i < prec - numdigits; i++)
2586                         *b1++ = '0';
2587                 for (i = 0; i < numdigits; i++)
2588                         *b1++ = *buf++;
2589                 *b1 = '\0';
2590                 Py_DECREF(result);
2591                 result = r1;
2592                 buf = PyString_AS_STRING(result);
2593                 len = numnondigits + prec;
2594         }
2595
2596         /* Fix up case for hex conversions. */
2597         switch (type) {
2598         case 'x':
2599                 /* Need to convert all upper case letters to lower case. */
2600                 for (i = 0; i < len; i++)
2601                         if (buf[i] >= 'A' && buf[i] <= 'F')
2602                                 buf[i] += 'a'-'A';
2603                 break;
2604         case 'X':
2605                 /* Need to convert 0x to 0X (and -0x to -0X). */
2606                 if (buf[sign + 1] == 'x')
2607                         buf[sign + 1] = 'X';
2608                 break;
2609         }
2610         *pbuf = buf;
2611         *plen = len;
2612         return result;
2613 }
2614
2615 static int
2616 formatint(char *buf, size_t buflen, int flags,
2617           int prec, int type, PyObject *v)
2618 {
2619         /* fmt = '%#.' + `prec` + 'l' + `type`
2620            worst case length = 3 + 19 (worst len of INT_MAX on 64-bit machine)
2621            + 1 + 1 = 24 */
2622         char fmt[64];   /* plenty big enough! */
2623         long x;
2624         if (!PyArg_Parse(v, "l;int argument required", &x))
2625                 return -1;
2626         if (prec < 0)
2627                 prec = 1;
2628         sprintf(fmt, "%%%s.%dl%c", (flags&F_ALT) ? "#" : "", prec, type);
2629         /* buf = '+'/'-'/'0'/'0x' + '[0-9]'*max(prec, len(x in octal))
2630            worst case buf = '0x' + [0-9]*prec, where prec >= 11 */
2631         if (buflen <= 13 || buflen <= (size_t)2 + (size_t)prec) {
2632                 PyErr_SetString(PyExc_OverflowError,
2633                         "formatted integer is too long (precision too long?)");
2634                 return -1;
2635         }
2636         sprintf(buf, fmt, x);
2637         return strlen(buf);
2638 }
2639
2640 static int
2641 formatchar(char *buf, size_t buflen, PyObject *v)
2642 {
2643         /* presume that the buffer is at least 2 characters long */
2644         if (PyString_Check(v)) {
2645                 if (!PyArg_Parse(v, "c;%c requires int or char", &buf[0]))
2646                         return -1;
2647         }
2648         else {
2649                 if (!PyArg_Parse(v, "b;%c requires int or char", &buf[0]))
2650                         return -1;
2651         }
2652         buf[1] = '\0';
2653         return 1;
2654 }
2655
2656
2657 /* fmt%(v1,v2,...) is roughly equivalent to sprintf(fmt, v1, v2, ...)
2658
2659    FORMATBUFLEN is the length of the buffer in which the floats, ints, &
2660    chars are formatted. XXX This is a magic number. Each formatting
2661    routine does bounds checking to ensure no overflow, but a better
2662    solution may be to malloc a buffer of appropriate size for each
2663    format. For now, the current solution is sufficient.
2664 */
2665 #define FORMATBUFLEN (size_t)120
2666
2667 PyObject *
2668 PyString_Format(PyObject *format, PyObject *args)
2669 {
2670         char *fmt, *res;
2671         int fmtcnt, rescnt, reslen, arglen, argidx;
2672         int args_owned = 0;
2673         PyObject *result, *orig_args;
2674         PyObject *dict = NULL;
2675         if (format == NULL || !PyString_Check(format) || args == NULL) {
2676                 PyErr_BadInternalCall();
2677                 return NULL;
2678         }
2679         orig_args = args;
2680         fmt = PyString_AsString(format);
2681         fmtcnt = PyString_Size(format);
2682         reslen = rescnt = fmtcnt + 100;
2683         result = PyString_FromStringAndSize((char *)NULL, reslen);
2684         if (result == NULL)
2685                 return NULL;
2686         res = PyString_AsString(result);
2687         if (PyTuple_Check(args)) {
2688                 arglen = PyTuple_Size(args);
2689                 argidx = 0;
2690         }
2691         else {
2692                 arglen = -1;
2693                 argidx = -2;
2694         }
2695         if (args->ob_type->tp_as_mapping)
2696                 dict = args;
2697         while (--fmtcnt >= 0) {
2698                 if (*fmt != '%') {
2699                         if (--rescnt < 0) {
2700                                 rescnt = fmtcnt + 100;
2701                                 reslen += rescnt;
2702                                 if (_PyString_Resize(&result, reslen) < 0)
2703                                         return NULL;
2704                                 res = PyString_AsString(result)
2705                                         + reslen - rescnt;
2706                                 --rescnt;
2707                         }
2708                         *res++ = *fmt++;
2709                 }
2710                 else {
2711                         /* Got a format specifier */
2712                         int flags = 0;
2713                         int width = -1;
2714                         int prec = -1;
2715                         int size = 0;
2716                         int c = '\0';
2717                         int fill;
2718                         PyObject *v = NULL;
2719                         PyObject *temp = NULL;
2720                         char *pbuf;
2721                         int sign;
2722                         int len;
2723                         char formatbuf[FORMATBUFLEN]; /* For format{float,int,char}() */
2724                         char *fmt_start = fmt;
2725
2726                         fmt++;
2727                         if (*fmt == '(') {
2728                                 char *keystart;
2729                                 int keylen;
2730                                 PyObject *key;
2731                                 int pcount = 1;
2732
2733                                 if (dict == NULL) {
2734                                         PyErr_SetString(PyExc_TypeError,
2735                                                  "format requires a mapping");
2736                                         goto error;
2737                                 }
2738                                 ++fmt;
2739                                 --fmtcnt;
2740                                 keystart = fmt;
2741                                 /* Skip over balanced parentheses */
2742                                 while (pcount > 0 && --fmtcnt >= 0) {
2743                                         if (*fmt == ')')
2744                                                 --pcount;
2745                                         else if (*fmt == '(')
2746                                                 ++pcount;
2747                                         fmt++;
2748                                 }
2749                                 keylen = fmt - keystart - 1;
2750                                 if (fmtcnt < 0 || pcount > 0) {
2751                                         PyErr_SetString(PyExc_ValueError,
2752                                                    "incomplete format key");
2753                                         goto error;
2754                                 }
2755                                 key = PyString_FromStringAndSize(keystart,
2756                                                                  keylen);
2757                                 if (key == NULL)
2758                                         goto error;
2759                                 if (args_owned) {
2760                                         Py_DECREF(args);
2761                                         args_owned = 0;
2762                                 }
2763                                 args = PyObject_GetItem(dict, key);
2764                                 Py_DECREF(key);
2765                                 if (args == NULL) {
2766                                         goto error;
2767                                 }
2768                                 args_owned = 1;
2769                                 arglen = -1;
2770                                 argidx = -2;
2771                         }
2772                         while (--fmtcnt >= 0) {
2773                                 switch (c = *fmt++) {
2774                                 case '-': flags |= F_LJUST; continue;
2775                                 case '+': flags |= F_SIGN; continue;
2776                                 case ' ': flags |= F_BLANK; continue;
2777                                 case '#': flags |= F_ALT; continue;
2778                                 case '0': flags |= F_ZERO; continue;
2779                                 }
2780                                 break;
2781                         }
2782                         if (c == '*') {
2783                                 v = getnextarg(args, arglen, &argidx);
2784                                 if (v == NULL)
2785                                         goto error;
2786                                 if (!PyInt_Check(v)) {
2787                                         PyErr_SetString(PyExc_TypeError,
2788                                                         "* wants int");
2789                                         goto error;
2790                                 }
2791                                 width = PyInt_AsLong(v);
2792                                 if (width < 0) {
2793                                         flags |= F_LJUST;
2794                                         width = -width;
2795                                 }
2796                                 if (--fmtcnt >= 0)
2797                                         c = *fmt++;
2798                         }
2799                         else if (c >= 0 && isdigit(c)) {
2800                                 width = c - '0';
2801                                 while (--fmtcnt >= 0) {
2802                                         c = Py_CHARMASK(*fmt++);
2803                                         if (!isdigit(c))
2804                                                 break;
2805                                         if ((width*10) / 10 != width) {
2806                                                 PyErr_SetString(
2807                                                         PyExc_ValueError,
2808                                                         "width too big");
2809                                                 goto error;
2810                                         }
2811                                         width = width*10 + (c - '0');
2812                                 }
2813                         }
2814                         if (c == '.') {
2815                                 prec = 0;
2816                                 if (--fmtcnt >= 0)
2817                                         c = *fmt++;
2818                                 if (c == '*') {
2819                                         v = getnextarg(args, arglen, &argidx);
2820                                         if (v == NULL)
2821                                                 goto error;
2822                                         if (!PyInt_Check(v)) {
2823                                                 PyErr_SetString(
2824                                                         PyExc_TypeError,
2825                                                         "* wants int");
2826                                                 goto error;
2827                                         }
2828                                         prec = PyInt_AsLong(v);
2829                                         if (prec < 0)
2830                                                 prec = 0;
2831                                         if (--fmtcnt >= 0)
2832                                                 c = *fmt++;
2833                                 }
2834                                 else if (c >= 0 && isdigit(c)) {
2835                                         prec = c - '0';
2836                                         while (--fmtcnt >= 0) {
2837                                                 c = Py_CHARMASK(*fmt++);
2838                                                 if (!isdigit(c))
2839                                                         break;
2840                                                 if ((prec*10) / 10 != prec) {
2841                                                         PyErr_SetString(
2842                                                             PyExc_ValueError,
2843                                                             "prec too big");
2844                                                         goto error;
2845                                                 }
2846                                                 prec = prec*10 + (c - '0');
2847                                         }
2848                                 }
2849                         } /* prec */
2850                         if (fmtcnt >= 0) {
2851                                 if (c == 'h' || c == 'l' || c == 'L') {
2852                                         size = c;
2853                                         if (--fmtcnt >= 0)
2854                                                 c = *fmt++;
2855                                 }
2856                         }
2857                         if (fmtcnt < 0) {
2858                                 PyErr_SetString(PyExc_ValueError,
2859                                                 "incomplete format");
2860                                 goto error;
2861                         }
2862                         if (c != '%') {
2863                                 v = getnextarg(args, arglen, &argidx);
2864                                 if (v == NULL)
2865                                         goto error;
2866                         }
2867                         sign = 0;
2868                         fill = ' ';
2869                         switch (c) {
2870                         case '%':
2871                                 pbuf = "%";
2872                                 len = 1;
2873                                 break;
2874                         case 's':
2875                         case 'r':
2876                                 if (PyUnicode_Check(v)) {
2877                                         fmt = fmt_start;
2878                                         goto unicode;
2879                                 }
2880                                 if (c == 's')
2881                                 temp = PyObject_Str(v);
2882                                 else
2883                                         temp = PyObject_Repr(v);
2884                                 if (temp == NULL)
2885                                         goto error;
2886                                 if (!PyString_Check(temp)) {
2887                                         PyErr_SetString(PyExc_TypeError,
2888                                           "%s argument has non-string str()");
2889                                         goto error;
2890                                 }
2891                                 pbuf = PyString_AsString(temp);
2892                                 len = PyString_Size(temp);
2893                                 if (prec >= 0 && len > prec)
2894                                         len = prec;
2895                                 break;
2896                         case 'i':
2897                         case 'd':
2898                         case 'u':
2899                         case 'o':
2900                         case 'x':
2901                         case 'X':
2902                                 if (c == 'i')
2903                                         c = 'd';
2904                                 if (PyLong_Check(v) && PyLong_AsLong(v) == -1
2905                                     && PyErr_Occurred()) {
2906                                         /* Too big for a C long. */
2907                                         PyErr_Clear();
2908                                         temp = _PyString_FormatLong(v, flags,
2909                                                 prec, c, &pbuf, &len);
2910                                         if (!temp)
2911                                                 goto error;
2912                                         /* unbounded ints can always produce
2913                                            a sign character! */
2914                                         sign = 1;
2915                                 }
2916                                 else {
2917                                         pbuf = formatbuf;
2918                                         len = formatint(pbuf, sizeof(formatbuf),
2919                                                         flags, prec, c, v);
2920                                         if (len < 0)
2921                                                 goto error;
2922                                         /* only d conversion is signed */
2923                                         sign = c == 'd';
2924                                 }
2925                                 if (flags & F_ZERO)
2926                                         fill = '0';
2927                                 break;
2928                         case 'e':
2929                         case 'E':
2930                         case 'f':
2931                         case 'g':
2932                         case 'G':
2933                                 pbuf = formatbuf;
2934                                 len = formatfloat(pbuf, sizeof(formatbuf), flags, prec, c, v);
2935                                 if (len < 0)
2936                                         goto error;
2937                                 sign = 1;
2938                                 if (flags & F_ZERO)
2939                                         fill = '0';
2940                                 break;
2941                         case 'c':
2942                                 pbuf = formatbuf;
2943                                 len = formatchar(pbuf, sizeof(formatbuf), v);
2944                                 if (len < 0)
2945                                         goto error;
2946                                 break;
2947                         default:
2948                                 PyErr_Format(PyExc_ValueError,
2949                                 "unsupported format character '%c' (0x%x)",
2950                                         c, c);
2951                                 goto error;
2952                         }
2953                         if (sign) {
2954                                 if (*pbuf == '-' || *pbuf == '+') {
2955                                         sign = *pbuf++;
2956                                         len--;
2957                                 }
2958                                 else if (flags & F_SIGN)
2959                                         sign = '+';
2960                                 else if (flags & F_BLANK)
2961                                         sign = ' ';
2962                                 else
2963                                         sign = 0;
2964                         }
2965                         if (width < len)
2966                                 width = len;
2967                         if (rescnt < width + (sign != 0)) {
2968                                 reslen -= rescnt;
2969                                 rescnt = width + fmtcnt + 100;
2970                                 reslen += rescnt;
2971                                 if (_PyString_Resize(&result, reslen) < 0)
2972                                         return NULL;
2973                                 res = PyString_AsString(result)
2974                                         + reslen - rescnt;
2975                         }
2976                         if (sign) {
2977                                 if (fill != ' ')
2978                                         *res++ = sign;
2979                                 rescnt--;
2980                                 if (width > len)
2981                                         width--;
2982                         }
2983                         if ((flags & F_ALT) && (c == 'x' || c == 'X')) {
2984                                 assert(pbuf[0] == '0');
2985                                 assert(pbuf[1] == c);
2986                                 if (fill != ' ') {
2987                                         *res++ = *pbuf++;
2988                                         *res++ = *pbuf++;
2989                                 }
2990                                 rescnt -= 2;
2991                                 width -= 2;
2992                                 if (width < 0)
2993                                         width = 0;
2994                                 len -= 2;
2995                         }
2996                         if (width > len && !(flags & F_LJUST)) {
2997                                 do {
2998                                         --rescnt;
2999                                         *res++ = fill;
3000                                 } while (--width > len);
3001                         }
3002                         if (fill == ' ') {
3003                                 if (sign)
3004                                         *res++ = sign;
3005                                 if ((flags & F_ALT) &&
3006                                     (c == 'x' || c == 'X')) {
3007                                         assert(pbuf[0] == '0');
3008                                         assert(pbuf[1] == c);
3009                                         *res++ = *pbuf++;
3010                                         *res++ = *pbuf++;
3011                                 }
3012                         }
3013                         memcpy(res, pbuf, len);
3014                         res += len;
3015                         rescnt -= len;
3016                         while (--width >= len) {
3017                                 --rescnt;
3018                                 *res++ = ' ';
3019                         }
3020                         if (dict && (argidx < arglen) && c != '%') {
3021                                 PyErr_SetString(PyExc_TypeError,
3022                                            "not all arguments converted");
3023                                 goto error;
3024                         }
3025                         Py_XDECREF(temp);
3026                 } /* '%' */
3027         } /* until end */
3028         if (argidx < arglen && !dict) {
3029                 PyErr_SetString(PyExc_TypeError,
3030                                 "not all arguments converted");
3031                 goto error;
3032         }
3033         if (args_owned) {
3034                 Py_DECREF(args);
3035         }
3036         _PyString_Resize(&result, reslen - rescnt);
3037         return result;
3038
3039  unicode:
3040         if (args_owned) {
3041                 Py_DECREF(args);
3042                 args_owned = 0;
3043         }
3044         /* Fiddle args right (remove the first argidx-1 arguments) */
3045         --argidx;
3046         if (PyTuple_Check(orig_args) && argidx > 0) {
3047                 PyObject *v;
3048                 int n = PyTuple_GET_SIZE(orig_args) - argidx;
3049                 v = PyTuple_New(n);
3050                 if (v == NULL)
3051                         goto error;
3052                 while (--n >= 0) {
3053                         PyObject *w = PyTuple_GET_ITEM(orig_args, n + argidx);
3054                         Py_INCREF(w);
3055                         PyTuple_SET_ITEM(v, n, w);
3056                 }
3057                 args = v;
3058         } else {
3059                 Py_INCREF(orig_args);
3060                 args = orig_args;
3061         }
3062         /* Paste rest of format string to what we have of the result
3063            string; we reuse result for this */
3064         rescnt = res - PyString_AS_STRING(result);
3065         fmtcnt = PyString_GET_SIZE(format) - \
3066                  (fmt - PyString_AS_STRING(format));
3067         if (_PyString_Resize(&result, rescnt + fmtcnt)) {
3068                 Py_DECREF(args);
3069                 goto error;
3070         }
3071         memcpy(PyString_AS_STRING(result) + rescnt, fmt, fmtcnt);
3072         format = result;
3073         /* Let Unicode do its magic */
3074         result = PyUnicode_Format(format, args);
3075         Py_DECREF(format);
3076         Py_DECREF(args);
3077         return result;
3078
3079  error:
3080         Py_DECREF(result);
3081         if (args_owned) {
3082                 Py_DECREF(args);
3083         }
3084         return NULL;
3085 }
3086
3087
3088 #ifdef INTERN_STRINGS
3089
3090 /* This dictionary will leak at PyString_Fini() time.  That's acceptable
3091  * because PyString_Fini() specifically frees interned strings that are
3092  * only referenced by this dictionary.  The CVS log entry for revision 2.45
3093  * says:
3094  *
3095  *    Change the Fini function to only remove otherwise unreferenced
3096  *    strings from the interned table.  There are references in
3097  *    hard-to-find static variables all over the interpreter, and it's not
3098  *    worth trying to get rid of all those; but "uninterning" isn't fair
3099  *    either and may cause subtle failures later -- so we have to keep them
3100  *    in the interned table.
3101  */
3102 static PyObject *interned;
3103
3104 void
3105 PyString_InternInPlace(PyObject **p)
3106 {
3107         register PyStringObject *s = (PyStringObject *)(*p);
3108         PyObject *t;
3109         if (s == NULL || !PyString_Check(s))
3110                 Py_FatalError("PyString_InternInPlace: strings only please!");
3111         if ((t = s->ob_sinterned) != NULL) {
3112                 if (t == (PyObject *)s)
3113                         return;
3114                 Py_INCREF(t);
3115                 *p = t;
3116                 Py_DECREF(s);
3117                 return;
3118         }
3119         if (interned == NULL) {
3120                 interned = PyDict_New();
3121                 if (interned == NULL)
3122                         return;
3123         }
3124         if ((t = PyDict_GetItem(interned, (PyObject *)s)) != NULL) {
3125                 Py_INCREF(t);
3126                 *p = s->ob_sinterned = t;
3127                 Py_DECREF(s);
3128                 return;
3129         }
3130         t = (PyObject *)s;
3131         if (PyDict_SetItem(interned, t, t) == 0) {
3132                 s->ob_sinterned = t;
3133                 return;
3134         }
3135         PyErr_Clear();
3136 }
3137
3138
3139 PyObject *
3140 PyString_InternFromString(const char *cp)
3141 {
3142         PyObject *s = PyString_FromString(cp);
3143         if (s == NULL)
3144                 return NULL;
3145         PyString_InternInPlace(&s);
3146         return s;
3147 }
3148
3149 #endif
3150
3151 void
3152 PyString_Fini(void)
3153 {
3154         int i;
3155         for (i = 0; i < UCHAR_MAX + 1; i++) {
3156                 Py_XDECREF(characters[i]);
3157                 characters[i] = NULL;
3158         }
3159 #ifndef DONT_SHARE_SHORT_STRINGS
3160         Py_XDECREF(nullstring);
3161         nullstring = NULL;
3162 #endif
3163 #ifdef INTERN_STRINGS
3164         if (interned) {
3165                 int pos, changed;
3166                 PyObject *key, *value;
3167                 do {
3168                         changed = 0;
3169                         pos = 0;
3170                         while (PyDict_Next(interned, &pos, &key, &value)) {
3171                                 if (key->ob_refcnt == 2 && key == value) {
3172                                         PyDict_DelItem(interned, key);
3173                                         changed = 1;
3174                                 }
3175                         }
3176                 } while (changed);
3177         }
3178 #endif
3179 }