Remove a ?? in the description of Mac OS support.
[python/dscho.git] / Objects / stringobject.c
blobacae88032549493f7d22460822799bd8203b0023
2 /* String object implementation */
4 #include "Python.h"
6 #include <ctype.h>
8 #ifdef COUNT_ALLOCS
9 int null_strings, one_strings;
10 #endif
12 #ifdef HAVE_LIMITS_H
13 #include <limits.h>
14 #else
15 #ifndef UCHAR_MAX
16 #define UCHAR_MAX 255
17 #endif
18 #endif
20 static PyStringObject *characters[UCHAR_MAX + 1];
21 #ifndef DONT_SHARE_SHORT_STRINGS
22 static PyStringObject *nullstring;
23 #endif
26 Newsizedstringobject() and newstringobject() try in certain cases
27 to share string objects. When the size of the string is zero,
28 these routines always return a pointer to the same string object;
29 when the size is one, they return a pointer to an already existing
30 object if the contents of the string is known. For
31 newstringobject() this is always the case, for
32 newsizedstringobject() this is the case when the first argument in
33 not NULL.
34 A common practice to allocate a string and then fill it in or
35 change it must be done carefully. It is only allowed to change the
36 contents of the string if the obect was gotten from
37 newsizedstringobject() with a NULL first argument, because in the
38 future these routines may try to do even more sharing of objects.
40 PyObject *
41 PyString_FromStringAndSize(const char *str, int size)
43 register PyStringObject *op;
44 #ifndef DONT_SHARE_SHORT_STRINGS
45 if (size == 0 && (op = nullstring) != NULL) {
46 #ifdef COUNT_ALLOCS
47 null_strings++;
48 #endif
49 Py_INCREF(op);
50 return (PyObject *)op;
52 if (size == 1 && str != NULL &&
53 (op = characters[*str & UCHAR_MAX]) != NULL)
55 #ifdef COUNT_ALLOCS
56 one_strings++;
57 #endif
58 Py_INCREF(op);
59 return (PyObject *)op;
61 #endif /* DONT_SHARE_SHORT_STRINGS */
63 /* PyObject_NewVar is inlined */
64 op = (PyStringObject *)
65 PyObject_MALLOC(sizeof(PyStringObject) + size * sizeof(char));
66 if (op == NULL)
67 return PyErr_NoMemory();
68 PyObject_INIT_VAR(op, &PyString_Type, size);
69 #ifdef CACHE_HASH
70 op->ob_shash = -1;
71 #endif
72 #ifdef INTERN_STRINGS
73 op->ob_sinterned = NULL;
74 #endif
75 if (str != NULL)
76 memcpy(op->ob_sval, str, size);
77 op->ob_sval[size] = '\0';
78 #ifndef DONT_SHARE_SHORT_STRINGS
79 if (size == 0) {
80 nullstring = op;
81 Py_INCREF(op);
82 } else if (size == 1 && str != NULL) {
83 characters[*str & UCHAR_MAX] = op;
84 Py_INCREF(op);
86 #endif
87 return (PyObject *) op;
90 PyObject *
91 PyString_FromString(const char *str)
93 register size_t size = strlen(str);
94 register PyStringObject *op;
95 if (size > INT_MAX) {
96 PyErr_SetString(PyExc_OverflowError,
97 "string is too long for a Python string");
98 return NULL;
100 #ifndef DONT_SHARE_SHORT_STRINGS
101 if (size == 0 && (op = nullstring) != NULL) {
102 #ifdef COUNT_ALLOCS
103 null_strings++;
104 #endif
105 Py_INCREF(op);
106 return (PyObject *)op;
108 if (size == 1 && (op = characters[*str & UCHAR_MAX]) != NULL) {
109 #ifdef COUNT_ALLOCS
110 one_strings++;
111 #endif
112 Py_INCREF(op);
113 return (PyObject *)op;
115 #endif /* DONT_SHARE_SHORT_STRINGS */
117 /* PyObject_NewVar is inlined */
118 op = (PyStringObject *)
119 PyObject_MALLOC(sizeof(PyStringObject) + size * sizeof(char));
120 if (op == NULL)
121 return PyErr_NoMemory();
122 PyObject_INIT_VAR(op, &PyString_Type, size);
123 #ifdef CACHE_HASH
124 op->ob_shash = -1;
125 #endif
126 #ifdef INTERN_STRINGS
127 op->ob_sinterned = NULL;
128 #endif
129 strcpy(op->ob_sval, str);
130 #ifndef DONT_SHARE_SHORT_STRINGS
131 if (size == 0) {
132 nullstring = op;
133 Py_INCREF(op);
134 } else if (size == 1) {
135 characters[*str & UCHAR_MAX] = op;
136 Py_INCREF(op);
138 #endif
139 return (PyObject *) op;
142 PyObject *PyString_Decode(const char *s,
143 int size,
144 const char *encoding,
145 const char *errors)
147 PyObject *buffer = NULL, *str;
149 if (encoding == NULL)
150 encoding = PyUnicode_GetDefaultEncoding();
152 /* Decode via the codec registry */
153 buffer = PyBuffer_FromMemory((void *)s, size);
154 if (buffer == NULL)
155 goto onError;
156 str = PyCodec_Decode(buffer, encoding, errors);
157 if (str == NULL)
158 goto onError;
159 /* Convert Unicode to a string using the default encoding */
160 if (PyUnicode_Check(str)) {
161 PyObject *temp = str;
162 str = PyUnicode_AsEncodedString(str, NULL, NULL);
163 Py_DECREF(temp);
164 if (str == NULL)
165 goto onError;
167 if (!PyString_Check(str)) {
168 PyErr_Format(PyExc_TypeError,
169 "decoder did not return a string object (type=%.400s)",
170 str->ob_type->tp_name);
171 Py_DECREF(str);
172 goto onError;
174 Py_DECREF(buffer);
175 return str;
177 onError:
178 Py_XDECREF(buffer);
179 return NULL;
182 PyObject *PyString_Encode(const char *s,
183 int size,
184 const char *encoding,
185 const char *errors)
187 PyObject *v, *str;
189 str = PyString_FromStringAndSize(s, size);
190 if (str == NULL)
191 return NULL;
192 v = PyString_AsEncodedString(str, encoding, errors);
193 Py_DECREF(str);
194 return v;
197 PyObject *PyString_AsEncodedString(PyObject *str,
198 const char *encoding,
199 const char *errors)
201 PyObject *v;
203 if (!PyString_Check(str)) {
204 PyErr_BadArgument();
205 goto onError;
208 if (encoding == NULL)
209 encoding = PyUnicode_GetDefaultEncoding();
211 /* Encode via the codec registry */
212 v = PyCodec_Encode(str, encoding, errors);
213 if (v == NULL)
214 goto onError;
215 /* Convert Unicode to a string using the default encoding */
216 if (PyUnicode_Check(v)) {
217 PyObject *temp = v;
218 v = PyUnicode_AsEncodedString(v, NULL, NULL);
219 Py_DECREF(temp);
220 if (v == NULL)
221 goto onError;
223 if (!PyString_Check(v)) {
224 PyErr_Format(PyExc_TypeError,
225 "encoder did not return a string object (type=%.400s)",
226 v->ob_type->tp_name);
227 Py_DECREF(v);
228 goto onError;
230 return v;
232 onError:
233 return NULL;
236 static void
237 string_dealloc(PyObject *op)
239 PyObject_DEL(op);
242 static int
243 string_getsize(register PyObject *op)
245 char *s;
246 int len;
247 if (PyString_AsStringAndSize(op, &s, &len))
248 return -1;
249 return len;
252 static /*const*/ char *
253 string_getbuffer(register PyObject *op)
255 char *s;
256 int len;
257 if (PyString_AsStringAndSize(op, &s, &len))
258 return NULL;
259 return s;
263 PyString_Size(register PyObject *op)
265 if (!PyString_Check(op))
266 return string_getsize(op);
267 return ((PyStringObject *)op) -> ob_size;
270 /*const*/ char *
271 PyString_AsString(register PyObject *op)
273 if (!PyString_Check(op))
274 return string_getbuffer(op);
275 return ((PyStringObject *)op) -> ob_sval;
278 /* Internal API needed by PyString_AsStringAndSize(): */
279 extern
280 PyObject *_PyUnicode_AsDefaultEncodedString(PyObject *unicode,
281 const char *errors);
284 PyString_AsStringAndSize(register PyObject *obj,
285 register char **s,
286 register int *len)
288 if (s == NULL) {
289 PyErr_BadInternalCall();
290 return -1;
293 if (!PyString_Check(obj)) {
294 if (PyUnicode_Check(obj)) {
295 obj = _PyUnicode_AsDefaultEncodedString(obj, NULL);
296 if (obj == NULL)
297 return -1;
299 else {
300 PyErr_Format(PyExc_TypeError,
301 "expected string or Unicode object, "
302 "%.200s found", obj->ob_type->tp_name);
303 return -1;
307 *s = PyString_AS_STRING(obj);
308 if (len != NULL)
309 *len = PyString_GET_SIZE(obj);
310 else if ((int)strlen(*s) != PyString_GET_SIZE(obj)) {
311 PyErr_SetString(PyExc_TypeError,
312 "expected string without null bytes");
313 return -1;
315 return 0;
318 /* Methods */
320 static int
321 string_print(PyStringObject *op, FILE *fp, int flags)
323 int i;
324 char c;
325 int quote;
326 /* XXX Ought to check for interrupts when writing long strings */
327 if (flags & Py_PRINT_RAW) {
328 fwrite(op->ob_sval, 1, (int) op->ob_size, fp);
329 return 0;
332 /* figure out which quote to use; single is preferred */
333 quote = '\'';
334 if (strchr(op->ob_sval, '\'') && !strchr(op->ob_sval, '"'))
335 quote = '"';
337 fputc(quote, fp);
338 for (i = 0; i < op->ob_size; i++) {
339 c = op->ob_sval[i];
340 if (c == quote || c == '\\')
341 fprintf(fp, "\\%c", c);
342 else if (c < ' ' || c >= 0177)
343 fprintf(fp, "\\%03o", c & 0377);
344 else
345 fputc(c, fp);
347 fputc(quote, fp);
348 return 0;
351 static PyObject *
352 string_repr(register PyStringObject *op)
354 size_t newsize = 2 + 4 * op->ob_size * sizeof(char);
355 PyObject *v;
356 if (newsize > INT_MAX) {
357 PyErr_SetString(PyExc_OverflowError,
358 "string is too large to make repr");
360 v = PyString_FromStringAndSize((char *)NULL, newsize);
361 if (v == NULL) {
362 return NULL;
364 else {
365 register int i;
366 register char c;
367 register char *p;
368 int quote;
370 /* figure out which quote to use; single is preferred */
371 quote = '\'';
372 if (strchr(op->ob_sval, '\'') && !strchr(op->ob_sval, '"'))
373 quote = '"';
375 p = ((PyStringObject *)v)->ob_sval;
376 *p++ = quote;
377 for (i = 0; i < op->ob_size; i++) {
378 c = op->ob_sval[i];
379 if (c == quote || c == '\\')
380 *p++ = '\\', *p++ = c;
381 else if (c < ' ' || c >= 0177) {
382 sprintf(p, "\\%03o", c & 0377);
383 while (*p != '\0')
384 p++;
386 else
387 *p++ = c;
389 *p++ = quote;
390 *p = '\0';
391 _PyString_Resize(
392 &v, (int) (p - ((PyStringObject *)v)->ob_sval));
393 return v;
397 static int
398 string_length(PyStringObject *a)
400 return a->ob_size;
403 static PyObject *
404 string_concat(register PyStringObject *a, register PyObject *bb)
406 register unsigned int size;
407 register PyStringObject *op;
408 if (!PyString_Check(bb)) {
409 if (PyUnicode_Check(bb))
410 return PyUnicode_Concat((PyObject *)a, bb);
411 PyErr_Format(PyExc_TypeError,
412 "cannot add type \"%.200s\" to string",
413 bb->ob_type->tp_name);
414 return NULL;
416 #define b ((PyStringObject *)bb)
417 /* Optimize cases with empty left or right operand */
418 if (a->ob_size == 0) {
419 Py_INCREF(bb);
420 return bb;
422 if (b->ob_size == 0) {
423 Py_INCREF(a);
424 return (PyObject *)a;
426 size = a->ob_size + b->ob_size;
427 /* PyObject_NewVar is inlined */
428 op = (PyStringObject *)
429 PyObject_MALLOC(sizeof(PyStringObject) + size * sizeof(char));
430 if (op == NULL)
431 return PyErr_NoMemory();
432 PyObject_INIT_VAR(op, &PyString_Type, size);
433 #ifdef CACHE_HASH
434 op->ob_shash = -1;
435 #endif
436 #ifdef INTERN_STRINGS
437 op->ob_sinterned = NULL;
438 #endif
439 memcpy(op->ob_sval, a->ob_sval, (int) a->ob_size);
440 memcpy(op->ob_sval + a->ob_size, b->ob_sval, (int) b->ob_size);
441 op->ob_sval[size] = '\0';
442 return (PyObject *) op;
443 #undef b
446 static PyObject *
447 string_repeat(register PyStringObject *a, register int n)
449 register int i;
450 register int size;
451 register PyStringObject *op;
452 size_t nbytes;
453 if (n < 0)
454 n = 0;
455 /* watch out for overflows: the size can overflow int,
456 * and the # of bytes needed can overflow size_t
458 size = a->ob_size * n;
459 if (n && size / n != a->ob_size) {
460 PyErr_SetString(PyExc_OverflowError,
461 "repeated string is too long");
462 return NULL;
464 if (size == a->ob_size) {
465 Py_INCREF(a);
466 return (PyObject *)a;
468 nbytes = size * sizeof(char);
469 if (nbytes / sizeof(char) != (size_t)size ||
470 nbytes + sizeof(PyStringObject) <= nbytes) {
471 PyErr_SetString(PyExc_OverflowError,
472 "repeated string is too long");
473 return NULL;
475 op = (PyStringObject *)
476 PyObject_MALLOC(sizeof(PyStringObject) + nbytes);
477 if (op == NULL)
478 return PyErr_NoMemory();
479 PyObject_INIT_VAR(op, &PyString_Type, size);
480 #ifdef CACHE_HASH
481 op->ob_shash = -1;
482 #endif
483 #ifdef INTERN_STRINGS
484 op->ob_sinterned = NULL;
485 #endif
486 for (i = 0; i < size; i += a->ob_size)
487 memcpy(op->ob_sval+i, a->ob_sval, (int) a->ob_size);
488 op->ob_sval[size] = '\0';
489 return (PyObject *) op;
492 /* String slice a[i:j] consists of characters a[i] ... a[j-1] */
494 static PyObject *
495 string_slice(register PyStringObject *a, register int i, register int j)
496 /* j -- may be negative! */
498 if (i < 0)
499 i = 0;
500 if (j < 0)
501 j = 0; /* Avoid signed/unsigned bug in next line */
502 if (j > a->ob_size)
503 j = a->ob_size;
504 if (i == 0 && j == a->ob_size) { /* It's the same as a */
505 Py_INCREF(a);
506 return (PyObject *)a;
508 if (j < i)
509 j = i;
510 return PyString_FromStringAndSize(a->ob_sval + i, (int) (j-i));
513 static int
514 string_contains(PyObject *a, PyObject *el)
516 register char *s, *end;
517 register char c;
518 if (PyUnicode_Check(el))
519 return PyUnicode_Contains(a, el);
520 if (!PyString_Check(el) || PyString_Size(el) != 1) {
521 PyErr_SetString(PyExc_TypeError,
522 "'in <string>' requires character as left operand");
523 return -1;
525 c = PyString_AsString(el)[0];
526 s = PyString_AsString(a);
527 end = s + PyString_Size(a);
528 while (s < end) {
529 if (c == *s++)
530 return 1;
532 return 0;
535 static PyObject *
536 string_item(PyStringObject *a, register int i)
538 int c;
539 PyObject *v;
540 if (i < 0 || i >= a->ob_size) {
541 PyErr_SetString(PyExc_IndexError, "string index out of range");
542 return NULL;
544 c = a->ob_sval[i] & UCHAR_MAX;
545 v = (PyObject *) characters[c];
546 #ifdef COUNT_ALLOCS
547 if (v != NULL)
548 one_strings++;
549 #endif
550 if (v == NULL) {
551 v = PyString_FromStringAndSize((char *)NULL, 1);
552 if (v == NULL)
553 return NULL;
554 characters[c] = (PyStringObject *) v;
555 ((PyStringObject *)v)->ob_sval[0] = c;
557 Py_INCREF(v);
558 return v;
561 static int
562 string_compare(PyStringObject *a, PyStringObject *b)
564 int len_a = a->ob_size, len_b = b->ob_size;
565 int min_len = (len_a < len_b) ? len_a : len_b;
566 int cmp;
567 if (min_len > 0) {
568 cmp = Py_CHARMASK(*a->ob_sval) - Py_CHARMASK(*b->ob_sval);
569 if (cmp == 0)
570 cmp = memcmp(a->ob_sval, b->ob_sval, min_len);
571 if (cmp != 0)
572 return cmp;
574 return (len_a < len_b) ? -1 : (len_a > len_b) ? 1 : 0;
577 static long
578 string_hash(PyStringObject *a)
580 register int len;
581 register unsigned char *p;
582 register long x;
584 #ifdef CACHE_HASH
585 if (a->ob_shash != -1)
586 return a->ob_shash;
587 #ifdef INTERN_STRINGS
588 if (a->ob_sinterned != NULL)
589 return (a->ob_shash =
590 ((PyStringObject *)(a->ob_sinterned))->ob_shash);
591 #endif
592 #endif
593 len = a->ob_size;
594 p = (unsigned char *) a->ob_sval;
595 x = *p << 7;
596 while (--len >= 0)
597 x = (1000003*x) ^ *p++;
598 x ^= a->ob_size;
599 if (x == -1)
600 x = -2;
601 #ifdef CACHE_HASH
602 a->ob_shash = x;
603 #endif
604 return x;
607 static int
608 string_buffer_getreadbuf(PyStringObject *self, int index, const void **ptr)
610 if ( index != 0 ) {
611 PyErr_SetString(PyExc_SystemError,
612 "accessing non-existent string segment");
613 return -1;
615 *ptr = (void *)self->ob_sval;
616 return self->ob_size;
619 static int
620 string_buffer_getwritebuf(PyStringObject *self, int index, const void **ptr)
622 PyErr_SetString(PyExc_TypeError,
623 "Cannot use string as modifiable buffer");
624 return -1;
627 static int
628 string_buffer_getsegcount(PyStringObject *self, int *lenp)
630 if ( lenp )
631 *lenp = self->ob_size;
632 return 1;
635 static int
636 string_buffer_getcharbuf(PyStringObject *self, int index, const char **ptr)
638 if ( index != 0 ) {
639 PyErr_SetString(PyExc_SystemError,
640 "accessing non-existent string segment");
641 return -1;
643 *ptr = self->ob_sval;
644 return self->ob_size;
647 static PySequenceMethods string_as_sequence = {
648 (inquiry)string_length, /*sq_length*/
649 (binaryfunc)string_concat, /*sq_concat*/
650 (intargfunc)string_repeat, /*sq_repeat*/
651 (intargfunc)string_item, /*sq_item*/
652 (intintargfunc)string_slice, /*sq_slice*/
653 0, /*sq_ass_item*/
654 0, /*sq_ass_slice*/
655 (objobjproc)string_contains /*sq_contains*/
658 static PyBufferProcs string_as_buffer = {
659 (getreadbufferproc)string_buffer_getreadbuf,
660 (getwritebufferproc)string_buffer_getwritebuf,
661 (getsegcountproc)string_buffer_getsegcount,
662 (getcharbufferproc)string_buffer_getcharbuf,
667 #define LEFTSTRIP 0
668 #define RIGHTSTRIP 1
669 #define BOTHSTRIP 2
672 static PyObject *
673 split_whitespace(const char *s, int len, int maxsplit)
675 int i, j, err;
676 PyObject* item;
677 PyObject *list = PyList_New(0);
679 if (list == NULL)
680 return NULL;
682 for (i = j = 0; i < len; ) {
683 while (i < len && isspace(Py_CHARMASK(s[i])))
684 i++;
685 j = i;
686 while (i < len && !isspace(Py_CHARMASK(s[i])))
687 i++;
688 if (j < i) {
689 if (maxsplit-- <= 0)
690 break;
691 item = PyString_FromStringAndSize(s+j, (int)(i-j));
692 if (item == NULL)
693 goto finally;
694 err = PyList_Append(list, item);
695 Py_DECREF(item);
696 if (err < 0)
697 goto finally;
698 while (i < len && isspace(Py_CHARMASK(s[i])))
699 i++;
700 j = i;
703 if (j < len) {
704 item = PyString_FromStringAndSize(s+j, (int)(len - j));
705 if (item == NULL)
706 goto finally;
707 err = PyList_Append(list, item);
708 Py_DECREF(item);
709 if (err < 0)
710 goto finally;
712 return list;
713 finally:
714 Py_DECREF(list);
715 return NULL;
719 static char split__doc__[] =
720 "S.split([sep [,maxsplit]]) -> list of strings\n\
722 Return a list of the words in the string S, using sep as the\n\
723 delimiter string. If maxsplit is given, at most maxsplit\n\
724 splits are done. If sep is not specified, any whitespace string\n\
725 is a separator.";
727 static PyObject *
728 string_split(PyStringObject *self, PyObject *args)
730 int len = PyString_GET_SIZE(self), n, i, j, err;
731 int maxsplit = -1;
732 const char *s = PyString_AS_STRING(self), *sub;
733 PyObject *list, *item, *subobj = Py_None;
735 if (!PyArg_ParseTuple(args, "|Oi:split", &subobj, &maxsplit))
736 return NULL;
737 if (maxsplit < 0)
738 maxsplit = INT_MAX;
739 if (subobj == Py_None)
740 return split_whitespace(s, len, maxsplit);
741 if (PyString_Check(subobj)) {
742 sub = PyString_AS_STRING(subobj);
743 n = PyString_GET_SIZE(subobj);
745 else if (PyUnicode_Check(subobj))
746 return PyUnicode_Split((PyObject *)self, subobj, maxsplit);
747 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
748 return NULL;
749 if (n == 0) {
750 PyErr_SetString(PyExc_ValueError, "empty separator");
751 return NULL;
754 list = PyList_New(0);
755 if (list == NULL)
756 return NULL;
758 i = j = 0;
759 while (i+n <= len) {
760 if (s[i] == sub[0] && memcmp(s+i, sub, n) == 0) {
761 if (maxsplit-- <= 0)
762 break;
763 item = PyString_FromStringAndSize(s+j, (int)(i-j));
764 if (item == NULL)
765 goto fail;
766 err = PyList_Append(list, item);
767 Py_DECREF(item);
768 if (err < 0)
769 goto fail;
770 i = j = i + n;
772 else
773 i++;
775 item = PyString_FromStringAndSize(s+j, (int)(len-j));
776 if (item == NULL)
777 goto fail;
778 err = PyList_Append(list, item);
779 Py_DECREF(item);
780 if (err < 0)
781 goto fail;
783 return list;
785 fail:
786 Py_DECREF(list);
787 return NULL;
791 static char join__doc__[] =
792 "S.join(sequence) -> string\n\
794 Return a string which is the concatenation of the strings in the\n\
795 sequence. The separator between elements is S.";
797 static PyObject *
798 string_join(PyStringObject *self, PyObject *args)
800 char *sep = PyString_AS_STRING(self);
801 int seplen = PyString_GET_SIZE(self);
802 PyObject *res = NULL;
803 int reslen = 0;
804 char *p;
805 int seqlen = 0;
806 int sz = 100;
807 int i, slen, sz_incr;
808 PyObject *orig, *seq, *item;
810 if (!PyArg_ParseTuple(args, "O:join", &orig))
811 return NULL;
813 if (!(seq = PySequence_Fast(orig, ""))) {
814 if (PyErr_ExceptionMatches(PyExc_TypeError))
815 PyErr_Format(PyExc_TypeError,
816 "sequence expected, %.80s found",
817 orig->ob_type->tp_name);
818 return NULL;
820 /* From here on out, errors go through finally: for proper
821 * reference count manipulations.
823 seqlen = PySequence_Size(seq);
824 if (seqlen == 1) {
825 item = PySequence_Fast_GET_ITEM(seq, 0);
826 Py_INCREF(item);
827 Py_DECREF(seq);
828 return item;
831 if (!(res = PyString_FromStringAndSize((char*)NULL, sz)))
832 goto finally;
834 p = PyString_AS_STRING(res);
836 for (i = 0; i < seqlen; i++) {
837 item = PySequence_Fast_GET_ITEM(seq, i);
838 if (!PyString_Check(item)){
839 if (PyUnicode_Check(item)) {
840 Py_DECREF(res);
841 Py_DECREF(seq);
842 return PyUnicode_Join((PyObject *)self, seq);
844 PyErr_Format(PyExc_TypeError,
845 "sequence item %i: expected string,"
846 " %.80s found",
847 i, item->ob_type->tp_name);
848 goto finally;
850 slen = PyString_GET_SIZE(item);
851 while (reslen + slen + seplen >= sz) {
852 /* at least double the size of the string */
853 sz_incr = slen + seplen > sz ? slen + seplen : sz;
854 if (_PyString_Resize(&res, sz + sz_incr)) {
855 goto finally;
857 sz += sz_incr;
858 p = PyString_AS_STRING(res) + reslen;
860 if (i > 0) {
861 memcpy(p, sep, seplen);
862 p += seplen;
863 reslen += seplen;
865 memcpy(p, PyString_AS_STRING(item), slen);
866 p += slen;
867 reslen += slen;
869 if (_PyString_Resize(&res, reslen))
870 goto finally;
871 Py_DECREF(seq);
872 return res;
874 finally:
875 Py_DECREF(seq);
876 Py_XDECREF(res);
877 return NULL;
882 static long
883 string_find_internal(PyStringObject *self, PyObject *args, int dir)
885 const char *s = PyString_AS_STRING(self), *sub;
886 int len = PyString_GET_SIZE(self);
887 int n, i = 0, last = INT_MAX;
888 PyObject *subobj;
890 if (!PyArg_ParseTuple(args, "O|O&O&:find/rfind/index/rindex",
891 &subobj, _PyEval_SliceIndex, &i, _PyEval_SliceIndex, &last))
892 return -2;
893 if (PyString_Check(subobj)) {
894 sub = PyString_AS_STRING(subobj);
895 n = PyString_GET_SIZE(subobj);
897 else if (PyUnicode_Check(subobj))
898 return PyUnicode_Find((PyObject *)self, subobj, i, last, 1);
899 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
900 return -2;
902 if (last > len)
903 last = len;
904 if (last < 0)
905 last += len;
906 if (last < 0)
907 last = 0;
908 if (i < 0)
909 i += len;
910 if (i < 0)
911 i = 0;
913 if (dir > 0) {
914 if (n == 0 && i <= last)
915 return (long)i;
916 last -= n;
917 for (; i <= last; ++i)
918 if (s[i] == sub[0] && memcmp(&s[i], sub, n) == 0)
919 return (long)i;
921 else {
922 int j;
924 if (n == 0 && i <= last)
925 return (long)last;
926 for (j = last-n; j >= i; --j)
927 if (s[j] == sub[0] && memcmp(&s[j], sub, n) == 0)
928 return (long)j;
931 return -1;
935 static char find__doc__[] =
936 "S.find(sub [,start [,end]]) -> int\n\
938 Return the lowest index in S where substring sub is found,\n\
939 such that sub is contained within s[start,end]. Optional\n\
940 arguments start and end are interpreted as in slice notation.\n\
942 Return -1 on failure.";
944 static PyObject *
945 string_find(PyStringObject *self, PyObject *args)
947 long result = string_find_internal(self, args, +1);
948 if (result == -2)
949 return NULL;
950 return PyInt_FromLong(result);
954 static char index__doc__[] =
955 "S.index(sub [,start [,end]]) -> int\n\
957 Like S.find() but raise ValueError when the substring is not found.";
959 static PyObject *
960 string_index(PyStringObject *self, PyObject *args)
962 long result = string_find_internal(self, args, +1);
963 if (result == -2)
964 return NULL;
965 if (result == -1) {
966 PyErr_SetString(PyExc_ValueError,
967 "substring not found in string.index");
968 return NULL;
970 return PyInt_FromLong(result);
974 static char rfind__doc__[] =
975 "S.rfind(sub [,start [,end]]) -> int\n\
977 Return the highest index in S where substring sub is found,\n\
978 such that sub is contained within s[start,end]. Optional\n\
979 arguments start and end are interpreted as in slice notation.\n\
981 Return -1 on failure.";
983 static PyObject *
984 string_rfind(PyStringObject *self, PyObject *args)
986 long result = string_find_internal(self, args, -1);
987 if (result == -2)
988 return NULL;
989 return PyInt_FromLong(result);
993 static char rindex__doc__[] =
994 "S.rindex(sub [,start [,end]]) -> int\n\
996 Like S.rfind() but raise ValueError when the substring is not found.";
998 static PyObject *
999 string_rindex(PyStringObject *self, PyObject *args)
1001 long result = string_find_internal(self, args, -1);
1002 if (result == -2)
1003 return NULL;
1004 if (result == -1) {
1005 PyErr_SetString(PyExc_ValueError,
1006 "substring not found in string.rindex");
1007 return NULL;
1009 return PyInt_FromLong(result);
1013 static PyObject *
1014 do_strip(PyStringObject *self, PyObject *args, int striptype)
1016 char *s = PyString_AS_STRING(self);
1017 int len = PyString_GET_SIZE(self), i, j;
1019 if (!PyArg_ParseTuple(args, ":strip"))
1020 return NULL;
1022 i = 0;
1023 if (striptype != RIGHTSTRIP) {
1024 while (i < len && isspace(Py_CHARMASK(s[i]))) {
1025 i++;
1029 j = len;
1030 if (striptype != LEFTSTRIP) {
1031 do {
1032 j--;
1033 } while (j >= i && isspace(Py_CHARMASK(s[j])));
1034 j++;
1037 if (i == 0 && j == len) {
1038 Py_INCREF(self);
1039 return (PyObject*)self;
1041 else
1042 return PyString_FromStringAndSize(s+i, j-i);
1046 static char strip__doc__[] =
1047 "S.strip() -> string\n\
1049 Return a copy of the string S with leading and trailing\n\
1050 whitespace removed.";
1052 static PyObject *
1053 string_strip(PyStringObject *self, PyObject *args)
1055 return do_strip(self, args, BOTHSTRIP);
1059 static char lstrip__doc__[] =
1060 "S.lstrip() -> string\n\
1062 Return a copy of the string S with leading whitespace removed.";
1064 static PyObject *
1065 string_lstrip(PyStringObject *self, PyObject *args)
1067 return do_strip(self, args, LEFTSTRIP);
1071 static char rstrip__doc__[] =
1072 "S.rstrip() -> string\n\
1074 Return a copy of the string S with trailing whitespace removed.";
1076 static PyObject *
1077 string_rstrip(PyStringObject *self, PyObject *args)
1079 return do_strip(self, args, RIGHTSTRIP);
1083 static char lower__doc__[] =
1084 "S.lower() -> string\n\
1086 Return a copy of the string S converted to lowercase.";
1088 static PyObject *
1089 string_lower(PyStringObject *self, PyObject *args)
1091 char *s = PyString_AS_STRING(self), *s_new;
1092 int i, n = PyString_GET_SIZE(self);
1093 PyObject *new;
1095 if (!PyArg_ParseTuple(args, ":lower"))
1096 return NULL;
1097 new = PyString_FromStringAndSize(NULL, n);
1098 if (new == NULL)
1099 return NULL;
1100 s_new = PyString_AsString(new);
1101 for (i = 0; i < n; i++) {
1102 int c = Py_CHARMASK(*s++);
1103 if (isupper(c)) {
1104 *s_new = tolower(c);
1105 } else
1106 *s_new = c;
1107 s_new++;
1109 return new;
1113 static char upper__doc__[] =
1114 "S.upper() -> string\n\
1116 Return a copy of the string S converted to uppercase.";
1118 static PyObject *
1119 string_upper(PyStringObject *self, PyObject *args)
1121 char *s = PyString_AS_STRING(self), *s_new;
1122 int i, n = PyString_GET_SIZE(self);
1123 PyObject *new;
1125 if (!PyArg_ParseTuple(args, ":upper"))
1126 return NULL;
1127 new = PyString_FromStringAndSize(NULL, n);
1128 if (new == NULL)
1129 return NULL;
1130 s_new = PyString_AsString(new);
1131 for (i = 0; i < n; i++) {
1132 int c = Py_CHARMASK(*s++);
1133 if (islower(c)) {
1134 *s_new = toupper(c);
1135 } else
1136 *s_new = c;
1137 s_new++;
1139 return new;
1143 static char title__doc__[] =
1144 "S.title() -> string\n\
1146 Return a titlecased version of S, i.e. words start with uppercase\n\
1147 characters, all remaining cased characters have lowercase.";
1149 static PyObject*
1150 string_title(PyUnicodeObject *self, PyObject *args)
1152 char *s = PyString_AS_STRING(self), *s_new;
1153 int i, n = PyString_GET_SIZE(self);
1154 int previous_is_cased = 0;
1155 PyObject *new;
1157 if (!PyArg_ParseTuple(args, ":title"))
1158 return NULL;
1159 new = PyString_FromStringAndSize(NULL, n);
1160 if (new == NULL)
1161 return NULL;
1162 s_new = PyString_AsString(new);
1163 for (i = 0; i < n; i++) {
1164 int c = Py_CHARMASK(*s++);
1165 if (islower(c)) {
1166 if (!previous_is_cased)
1167 c = toupper(c);
1168 previous_is_cased = 1;
1169 } else if (isupper(c)) {
1170 if (previous_is_cased)
1171 c = tolower(c);
1172 previous_is_cased = 1;
1173 } else
1174 previous_is_cased = 0;
1175 *s_new++ = c;
1177 return new;
1180 static char capitalize__doc__[] =
1181 "S.capitalize() -> string\n\
1183 Return a copy of the string S with only its first character\n\
1184 capitalized.";
1186 static PyObject *
1187 string_capitalize(PyStringObject *self, PyObject *args)
1189 char *s = PyString_AS_STRING(self), *s_new;
1190 int i, n = PyString_GET_SIZE(self);
1191 PyObject *new;
1193 if (!PyArg_ParseTuple(args, ":capitalize"))
1194 return NULL;
1195 new = PyString_FromStringAndSize(NULL, n);
1196 if (new == NULL)
1197 return NULL;
1198 s_new = PyString_AsString(new);
1199 if (0 < n) {
1200 int c = Py_CHARMASK(*s++);
1201 if (islower(c))
1202 *s_new = toupper(c);
1203 else
1204 *s_new = c;
1205 s_new++;
1207 for (i = 1; i < n; i++) {
1208 int c = Py_CHARMASK(*s++);
1209 if (isupper(c))
1210 *s_new = tolower(c);
1211 else
1212 *s_new = c;
1213 s_new++;
1215 return new;
1219 static char count__doc__[] =
1220 "S.count(sub[, start[, end]]) -> int\n\
1222 Return the number of occurrences of substring sub in string\n\
1223 S[start:end]. Optional arguments start and end are\n\
1224 interpreted as in slice notation.";
1226 static PyObject *
1227 string_count(PyStringObject *self, PyObject *args)
1229 const char *s = PyString_AS_STRING(self), *sub;
1230 int len = PyString_GET_SIZE(self), n;
1231 int i = 0, last = INT_MAX;
1232 int m, r;
1233 PyObject *subobj;
1235 if (!PyArg_ParseTuple(args, "O|O&O&:count", &subobj,
1236 _PyEval_SliceIndex, &i, _PyEval_SliceIndex, &last))
1237 return NULL;
1239 if (PyString_Check(subobj)) {
1240 sub = PyString_AS_STRING(subobj);
1241 n = PyString_GET_SIZE(subobj);
1243 else if (PyUnicode_Check(subobj))
1244 return PyInt_FromLong(
1245 PyUnicode_Count((PyObject *)self, subobj, i, last));
1246 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
1247 return NULL;
1249 if (last > len)
1250 last = len;
1251 if (last < 0)
1252 last += len;
1253 if (last < 0)
1254 last = 0;
1255 if (i < 0)
1256 i += len;
1257 if (i < 0)
1258 i = 0;
1259 m = last + 1 - n;
1260 if (n == 0)
1261 return PyInt_FromLong((long) (m-i));
1263 r = 0;
1264 while (i < m) {
1265 if (!memcmp(s+i, sub, n)) {
1266 r++;
1267 i += n;
1268 } else {
1269 i++;
1272 return PyInt_FromLong((long) r);
1276 static char swapcase__doc__[] =
1277 "S.swapcase() -> string\n\
1279 Return a copy of the string S with uppercase characters\n\
1280 converted to lowercase and vice versa.";
1282 static PyObject *
1283 string_swapcase(PyStringObject *self, PyObject *args)
1285 char *s = PyString_AS_STRING(self), *s_new;
1286 int i, n = PyString_GET_SIZE(self);
1287 PyObject *new;
1289 if (!PyArg_ParseTuple(args, ":swapcase"))
1290 return NULL;
1291 new = PyString_FromStringAndSize(NULL, n);
1292 if (new == NULL)
1293 return NULL;
1294 s_new = PyString_AsString(new);
1295 for (i = 0; i < n; i++) {
1296 int c = Py_CHARMASK(*s++);
1297 if (islower(c)) {
1298 *s_new = toupper(c);
1300 else if (isupper(c)) {
1301 *s_new = tolower(c);
1303 else
1304 *s_new = c;
1305 s_new++;
1307 return new;
1311 static char translate__doc__[] =
1312 "S.translate(table [,deletechars]) -> string\n\
1314 Return a copy of the string S, where all characters occurring\n\
1315 in the optional argument deletechars are removed, and the\n\
1316 remaining characters have been mapped through the given\n\
1317 translation table, which must be a string of length 256.";
1319 static PyObject *
1320 string_translate(PyStringObject *self, PyObject *args)
1322 register char *input, *output;
1323 register const char *table;
1324 register int i, c, changed = 0;
1325 PyObject *input_obj = (PyObject*)self;
1326 const char *table1, *output_start, *del_table=NULL;
1327 int inlen, tablen, dellen = 0;
1328 PyObject *result;
1329 int trans_table[256];
1330 PyObject *tableobj, *delobj = NULL;
1332 if (!PyArg_ParseTuple(args, "O|O:translate",
1333 &tableobj, &delobj))
1334 return NULL;
1336 if (PyString_Check(tableobj)) {
1337 table1 = PyString_AS_STRING(tableobj);
1338 tablen = PyString_GET_SIZE(tableobj);
1340 else if (PyUnicode_Check(tableobj)) {
1341 /* Unicode .translate() does not support the deletechars
1342 parameter; instead a mapping to None will cause characters
1343 to be deleted. */
1344 if (delobj != NULL) {
1345 PyErr_SetString(PyExc_TypeError,
1346 "deletions are implemented differently for unicode");
1347 return NULL;
1349 return PyUnicode_Translate((PyObject *)self, tableobj, NULL);
1351 else if (PyObject_AsCharBuffer(tableobj, &table1, &tablen))
1352 return NULL;
1354 if (delobj != NULL) {
1355 if (PyString_Check(delobj)) {
1356 del_table = PyString_AS_STRING(delobj);
1357 dellen = PyString_GET_SIZE(delobj);
1359 else if (PyUnicode_Check(delobj)) {
1360 PyErr_SetString(PyExc_TypeError,
1361 "deletions are implemented differently for unicode");
1362 return NULL;
1364 else if (PyObject_AsCharBuffer(delobj, &del_table, &dellen))
1365 return NULL;
1367 if (tablen != 256) {
1368 PyErr_SetString(PyExc_ValueError,
1369 "translation table must be 256 characters long");
1370 return NULL;
1373 else {
1374 del_table = NULL;
1375 dellen = 0;
1378 table = table1;
1379 inlen = PyString_Size(input_obj);
1380 result = PyString_FromStringAndSize((char *)NULL, inlen);
1381 if (result == NULL)
1382 return NULL;
1383 output_start = output = PyString_AsString(result);
1384 input = PyString_AsString(input_obj);
1386 if (dellen == 0) {
1387 /* If no deletions are required, use faster code */
1388 for (i = inlen; --i >= 0; ) {
1389 c = Py_CHARMASK(*input++);
1390 if (Py_CHARMASK((*output++ = table[c])) != c)
1391 changed = 1;
1393 if (changed)
1394 return result;
1395 Py_DECREF(result);
1396 Py_INCREF(input_obj);
1397 return input_obj;
1400 for (i = 0; i < 256; i++)
1401 trans_table[i] = Py_CHARMASK(table[i]);
1403 for (i = 0; i < dellen; i++)
1404 trans_table[(int) Py_CHARMASK(del_table[i])] = -1;
1406 for (i = inlen; --i >= 0; ) {
1407 c = Py_CHARMASK(*input++);
1408 if (trans_table[c] != -1)
1409 if (Py_CHARMASK(*output++ = (char)trans_table[c]) == c)
1410 continue;
1411 changed = 1;
1413 if (!changed) {
1414 Py_DECREF(result);
1415 Py_INCREF(input_obj);
1416 return input_obj;
1418 /* Fix the size of the resulting string */
1419 if (inlen > 0 &&_PyString_Resize(&result, output-output_start))
1420 return NULL;
1421 return result;
1425 /* What follows is used for implementing replace(). Perry Stoll. */
1428 mymemfind
1430 strstr replacement for arbitrary blocks of memory.
1432 Locates the first occurrence in the memory pointed to by MEM of the
1433 contents of memory pointed to by PAT. Returns the index into MEM if
1434 found, or -1 if not found. If len of PAT is greater than length of
1435 MEM, the function returns -1.
1437 static int
1438 mymemfind(const char *mem, int len, const char *pat, int pat_len)
1440 register int ii;
1442 /* pattern can not occur in the last pat_len-1 chars */
1443 len -= pat_len;
1445 for (ii = 0; ii <= len; ii++) {
1446 if (mem[ii] == pat[0] && memcmp(&mem[ii], pat, pat_len) == 0) {
1447 return ii;
1450 return -1;
1454 mymemcnt
1456 Return the number of distinct times PAT is found in MEM.
1457 meaning mem=1111 and pat==11 returns 2.
1458 mem=11111 and pat==11 also return 2.
1460 static int
1461 mymemcnt(const char *mem, int len, const char *pat, int pat_len)
1463 register int offset = 0;
1464 int nfound = 0;
1466 while (len >= 0) {
1467 offset = mymemfind(mem, len, pat, pat_len);
1468 if (offset == -1)
1469 break;
1470 mem += offset + pat_len;
1471 len -= offset + pat_len;
1472 nfound++;
1474 return nfound;
1478 mymemreplace
1480 Return a string in which all occurrences of PAT in memory STR are
1481 replaced with SUB.
1483 If length of PAT is less than length of STR or there are no occurrences
1484 of PAT in STR, then the original string is returned. Otherwise, a new
1485 string is allocated here and returned.
1487 on return, out_len is:
1488 the length of output string, or
1489 -1 if the input string is returned, or
1490 unchanged if an error occurs (no memory).
1492 return value is:
1493 the new string allocated locally, or
1494 NULL if an error occurred.
1496 static char *
1497 mymemreplace(const char *str, int len, /* input string */
1498 const char *pat, int pat_len, /* pattern string to find */
1499 const char *sub, int sub_len, /* substitution string */
1500 int count, /* number of replacements */
1501 int *out_len)
1503 char *out_s;
1504 char *new_s;
1505 int nfound, offset, new_len;
1507 if (len == 0 || pat_len > len)
1508 goto return_same;
1510 /* find length of output string */
1511 nfound = mymemcnt(str, len, pat, pat_len);
1512 if (count < 0)
1513 count = INT_MAX;
1514 else if (nfound > count)
1515 nfound = count;
1516 if (nfound == 0)
1517 goto return_same;
1518 new_len = len + nfound*(sub_len - pat_len);
1520 new_s = (char *)PyMem_MALLOC(new_len);
1521 if (new_s == NULL) return NULL;
1523 *out_len = new_len;
1524 out_s = new_s;
1526 while (len > 0) {
1527 /* find index of next instance of pattern */
1528 offset = mymemfind(str, len, pat, pat_len);
1529 /* if not found, break out of loop */
1530 if (offset == -1) break;
1532 /* copy non matching part of input string */
1533 memcpy(new_s, str, offset); /* copy part of str before pat */
1534 str += offset + pat_len; /* move str past pattern */
1535 len -= offset + pat_len; /* reduce length of str remaining */
1537 /* copy substitute into the output string */
1538 new_s += offset; /* move new_s to dest for sub string */
1539 memcpy(new_s, sub, sub_len); /* copy substring into new_s */
1540 new_s += sub_len; /* offset new_s past sub string */
1542 /* break when we've done count replacements */
1543 if (--count == 0) break;
1545 /* copy any remaining values into output string */
1546 if (len > 0)
1547 memcpy(new_s, str, len);
1548 return out_s;
1550 return_same:
1551 *out_len = -1;
1552 return (char*)str; /* have to cast away constness here */
1556 static char replace__doc__[] =
1557 "S.replace (old, new[, maxsplit]) -> string\n\
1559 Return a copy of string S with all occurrences of substring\n\
1560 old replaced by new. If the optional argument maxsplit is\n\
1561 given, only the first maxsplit occurrences are replaced.";
1563 static PyObject *
1564 string_replace(PyStringObject *self, PyObject *args)
1566 const char *str = PyString_AS_STRING(self), *sub, *repl;
1567 char *new_s;
1568 int len = PyString_GET_SIZE(self), sub_len, repl_len, out_len;
1569 int count = -1;
1570 PyObject *new;
1571 PyObject *subobj, *replobj;
1573 if (!PyArg_ParseTuple(args, "OO|i:replace",
1574 &subobj, &replobj, &count))
1575 return NULL;
1577 if (PyString_Check(subobj)) {
1578 sub = PyString_AS_STRING(subobj);
1579 sub_len = PyString_GET_SIZE(subobj);
1581 else if (PyUnicode_Check(subobj))
1582 return PyUnicode_Replace((PyObject *)self,
1583 subobj, replobj, count);
1584 else if (PyObject_AsCharBuffer(subobj, &sub, &sub_len))
1585 return NULL;
1587 if (PyString_Check(replobj)) {
1588 repl = PyString_AS_STRING(replobj);
1589 repl_len = PyString_GET_SIZE(replobj);
1591 else if (PyUnicode_Check(replobj))
1592 return PyUnicode_Replace((PyObject *)self,
1593 subobj, replobj, count);
1594 else if (PyObject_AsCharBuffer(replobj, &repl, &repl_len))
1595 return NULL;
1597 if (sub_len <= 0) {
1598 PyErr_SetString(PyExc_ValueError, "empty pattern string");
1599 return NULL;
1601 new_s = mymemreplace(str,len,sub,sub_len,repl,repl_len,count,&out_len);
1602 if (new_s == NULL) {
1603 PyErr_NoMemory();
1604 return NULL;
1606 if (out_len == -1) {
1607 /* we're returning another reference to self */
1608 new = (PyObject*)self;
1609 Py_INCREF(new);
1611 else {
1612 new = PyString_FromStringAndSize(new_s, out_len);
1613 PyMem_FREE(new_s);
1615 return new;
1619 static char startswith__doc__[] =
1620 "S.startswith(prefix[, start[, end]]) -> int\n\
1622 Return 1 if S starts with the specified prefix, otherwise return 0. With\n\
1623 optional start, test S beginning at that position. With optional end, stop\n\
1624 comparing S at that position.";
1626 static PyObject *
1627 string_startswith(PyStringObject *self, PyObject *args)
1629 const char* str = PyString_AS_STRING(self);
1630 int len = PyString_GET_SIZE(self);
1631 const char* prefix;
1632 int plen;
1633 int start = 0;
1634 int end = -1;
1635 PyObject *subobj;
1637 if (!PyArg_ParseTuple(args, "O|O&O&:startswith", &subobj,
1638 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
1639 return NULL;
1640 if (PyString_Check(subobj)) {
1641 prefix = PyString_AS_STRING(subobj);
1642 plen = PyString_GET_SIZE(subobj);
1644 else if (PyUnicode_Check(subobj))
1645 return PyInt_FromLong(
1646 PyUnicode_Tailmatch((PyObject *)self,
1647 subobj, start, end, -1));
1648 else if (PyObject_AsCharBuffer(subobj, &prefix, &plen))
1649 return NULL;
1651 /* adopt Java semantics for index out of range. it is legal for
1652 * offset to be == plen, but this only returns true if prefix is
1653 * the empty string.
1655 if (start < 0 || start+plen > len)
1656 return PyInt_FromLong(0);
1658 if (!memcmp(str+start, prefix, plen)) {
1659 /* did the match end after the specified end? */
1660 if (end < 0)
1661 return PyInt_FromLong(1);
1662 else if (end - start < plen)
1663 return PyInt_FromLong(0);
1664 else
1665 return PyInt_FromLong(1);
1667 else return PyInt_FromLong(0);
1671 static char endswith__doc__[] =
1672 "S.endswith(suffix[, start[, end]]) -> int\n\
1674 Return 1 if S ends with the specified suffix, otherwise return 0. With\n\
1675 optional start, test S beginning at that position. With optional end, stop\n\
1676 comparing S at that position.";
1678 static PyObject *
1679 string_endswith(PyStringObject *self, PyObject *args)
1681 const char* str = PyString_AS_STRING(self);
1682 int len = PyString_GET_SIZE(self);
1683 const char* suffix;
1684 int slen;
1685 int start = 0;
1686 int end = -1;
1687 int lower, upper;
1688 PyObject *subobj;
1690 if (!PyArg_ParseTuple(args, "O|O&O&:endswith", &subobj,
1691 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
1692 return NULL;
1693 if (PyString_Check(subobj)) {
1694 suffix = PyString_AS_STRING(subobj);
1695 slen = PyString_GET_SIZE(subobj);
1697 else if (PyUnicode_Check(subobj))
1698 return PyInt_FromLong(
1699 PyUnicode_Tailmatch((PyObject *)self,
1700 subobj, start, end, +1));
1701 else if (PyObject_AsCharBuffer(subobj, &suffix, &slen))
1702 return NULL;
1704 if (start < 0 || start > len || slen > len)
1705 return PyInt_FromLong(0);
1707 upper = (end >= 0 && end <= len) ? end : len;
1708 lower = (upper - slen) > start ? (upper - slen) : start;
1710 if (upper-lower >= slen && !memcmp(str+lower, suffix, slen))
1711 return PyInt_FromLong(1);
1712 else return PyInt_FromLong(0);
1716 static char encode__doc__[] =
1717 "S.encode([encoding[,errors]]) -> string\n\
1719 Return an encoded string version of S. Default encoding is the current\n\
1720 default string encoding. errors may be given to set a different error\n\
1721 handling scheme. Default is 'strict' meaning that encoding errors raise\n\
1722 a ValueError. Other possible values are 'ignore' and 'replace'.";
1724 static PyObject *
1725 string_encode(PyStringObject *self, PyObject *args)
1727 char *encoding = NULL;
1728 char *errors = NULL;
1729 if (!PyArg_ParseTuple(args, "|ss:encode", &encoding, &errors))
1730 return NULL;
1731 return PyString_AsEncodedString((PyObject *)self, encoding, errors);
1735 static char expandtabs__doc__[] =
1736 "S.expandtabs([tabsize]) -> string\n\
1738 Return a copy of S where all tab characters are expanded using spaces.\n\
1739 If tabsize is not given, a tab size of 8 characters is assumed.";
1741 static PyObject*
1742 string_expandtabs(PyStringObject *self, PyObject *args)
1744 const char *e, *p;
1745 char *q;
1746 int i, j;
1747 PyObject *u;
1748 int tabsize = 8;
1750 if (!PyArg_ParseTuple(args, "|i:expandtabs", &tabsize))
1751 return NULL;
1753 /* First pass: determine size of output string */
1754 i = j = 0;
1755 e = PyString_AS_STRING(self) + PyString_GET_SIZE(self);
1756 for (p = PyString_AS_STRING(self); p < e; p++)
1757 if (*p == '\t') {
1758 if (tabsize > 0)
1759 j += tabsize - (j % tabsize);
1761 else {
1762 j++;
1763 if (*p == '\n' || *p == '\r') {
1764 i += j;
1765 j = 0;
1769 /* Second pass: create output string and fill it */
1770 u = PyString_FromStringAndSize(NULL, i + j);
1771 if (!u)
1772 return NULL;
1774 j = 0;
1775 q = PyString_AS_STRING(u);
1777 for (p = PyString_AS_STRING(self); p < e; p++)
1778 if (*p == '\t') {
1779 if (tabsize > 0) {
1780 i = tabsize - (j % tabsize);
1781 j += i;
1782 while (i--)
1783 *q++ = ' ';
1786 else {
1787 j++;
1788 *q++ = *p;
1789 if (*p == '\n' || *p == '\r')
1790 j = 0;
1793 return u;
1796 static
1797 PyObject *pad(PyStringObject *self,
1798 int left,
1799 int right,
1800 char fill)
1802 PyObject *u;
1804 if (left < 0)
1805 left = 0;
1806 if (right < 0)
1807 right = 0;
1809 if (left == 0 && right == 0) {
1810 Py_INCREF(self);
1811 return (PyObject *)self;
1814 u = PyString_FromStringAndSize(NULL,
1815 left + PyString_GET_SIZE(self) + right);
1816 if (u) {
1817 if (left)
1818 memset(PyString_AS_STRING(u), fill, left);
1819 memcpy(PyString_AS_STRING(u) + left,
1820 PyString_AS_STRING(self),
1821 PyString_GET_SIZE(self));
1822 if (right)
1823 memset(PyString_AS_STRING(u) + left + PyString_GET_SIZE(self),
1824 fill, right);
1827 return u;
1830 static char ljust__doc__[] =
1831 "S.ljust(width) -> string\n\
1833 Return S left justified in a string of length width. Padding is\n\
1834 done using spaces.";
1836 static PyObject *
1837 string_ljust(PyStringObject *self, PyObject *args)
1839 int width;
1840 if (!PyArg_ParseTuple(args, "i:ljust", &width))
1841 return NULL;
1843 if (PyString_GET_SIZE(self) >= width) {
1844 Py_INCREF(self);
1845 return (PyObject*) self;
1848 return pad(self, 0, width - PyString_GET_SIZE(self), ' ');
1852 static char rjust__doc__[] =
1853 "S.rjust(width) -> string\n\
1855 Return S right justified in a string of length width. Padding is\n\
1856 done using spaces.";
1858 static PyObject *
1859 string_rjust(PyStringObject *self, PyObject *args)
1861 int width;
1862 if (!PyArg_ParseTuple(args, "i:rjust", &width))
1863 return NULL;
1865 if (PyString_GET_SIZE(self) >= width) {
1866 Py_INCREF(self);
1867 return (PyObject*) self;
1870 return pad(self, width - PyString_GET_SIZE(self), 0, ' ');
1874 static char center__doc__[] =
1875 "S.center(width) -> string\n\
1877 Return S centered in a string of length width. Padding is done\n\
1878 using spaces.";
1880 static PyObject *
1881 string_center(PyStringObject *self, PyObject *args)
1883 int marg, left;
1884 int width;
1886 if (!PyArg_ParseTuple(args, "i:center", &width))
1887 return NULL;
1889 if (PyString_GET_SIZE(self) >= width) {
1890 Py_INCREF(self);
1891 return (PyObject*) self;
1894 marg = width - PyString_GET_SIZE(self);
1895 left = marg / 2 + (marg & width & 1);
1897 return pad(self, left, marg - left, ' ');
1900 #if 0
1901 static char zfill__doc__[] =
1902 "S.zfill(width) -> string\n\
1904 Pad a numeric string x with zeros on the left, to fill a field\n\
1905 of the specified width. The string x is never truncated.";
1907 static PyObject *
1908 string_zfill(PyStringObject *self, PyObject *args)
1910 int fill;
1911 PyObject *u;
1912 char *str;
1914 int width;
1915 if (!PyArg_ParseTuple(args, "i:zfill", &width))
1916 return NULL;
1918 if (PyString_GET_SIZE(self) >= width) {
1919 Py_INCREF(self);
1920 return (PyObject*) self;
1923 fill = width - PyString_GET_SIZE(self);
1925 u = pad(self, fill, 0, '0');
1926 if (u == NULL)
1927 return NULL;
1929 str = PyString_AS_STRING(u);
1930 if (str[fill] == '+' || str[fill] == '-') {
1931 /* move sign to beginning of string */
1932 str[0] = str[fill];
1933 str[fill] = '0';
1936 return u;
1938 #endif
1940 static char isspace__doc__[] =
1941 "S.isspace() -> int\n\
1943 Return 1 if there are only whitespace characters in S,\n\
1944 0 otherwise.";
1946 static PyObject*
1947 string_isspace(PyStringObject *self, PyObject *args)
1949 register const unsigned char *p
1950 = (unsigned char *) PyString_AS_STRING(self);
1951 register const unsigned char *e;
1953 if (!PyArg_NoArgs(args))
1954 return NULL;
1956 /* Shortcut for single character strings */
1957 if (PyString_GET_SIZE(self) == 1 &&
1958 isspace(*p))
1959 return PyInt_FromLong(1);
1961 /* Special case for empty strings */
1962 if (PyString_GET_SIZE(self) == 0)
1963 return PyInt_FromLong(0);
1965 e = p + PyString_GET_SIZE(self);
1966 for (; p < e; p++) {
1967 if (!isspace(*p))
1968 return PyInt_FromLong(0);
1970 return PyInt_FromLong(1);
1974 static char isalpha__doc__[] =
1975 "S.isalpha() -> int\n\
1977 Return 1 if all characters in S are alphabetic\n\
1978 and there is at least one character in S, 0 otherwise.";
1980 static PyObject*
1981 string_isalpha(PyUnicodeObject *self, PyObject *args)
1983 register const unsigned char *p
1984 = (unsigned char *) PyString_AS_STRING(self);
1985 register const unsigned char *e;
1987 if (!PyArg_NoArgs(args))
1988 return NULL;
1990 /* Shortcut for single character strings */
1991 if (PyString_GET_SIZE(self) == 1 &&
1992 isalpha(*p))
1993 return PyInt_FromLong(1);
1995 /* Special case for empty strings */
1996 if (PyString_GET_SIZE(self) == 0)
1997 return PyInt_FromLong(0);
1999 e = p + PyString_GET_SIZE(self);
2000 for (; p < e; p++) {
2001 if (!isalpha(*p))
2002 return PyInt_FromLong(0);
2004 return PyInt_FromLong(1);
2008 static char isalnum__doc__[] =
2009 "S.isalnum() -> int\n\
2011 Return 1 if all characters in S are alphanumeric\n\
2012 and there is at least one character in S, 0 otherwise.";
2014 static PyObject*
2015 string_isalnum(PyUnicodeObject *self, PyObject *args)
2017 register const unsigned char *p
2018 = (unsigned char *) PyString_AS_STRING(self);
2019 register const unsigned char *e;
2021 if (!PyArg_NoArgs(args))
2022 return NULL;
2024 /* Shortcut for single character strings */
2025 if (PyString_GET_SIZE(self) == 1 &&
2026 isalnum(*p))
2027 return PyInt_FromLong(1);
2029 /* Special case for empty strings */
2030 if (PyString_GET_SIZE(self) == 0)
2031 return PyInt_FromLong(0);
2033 e = p + PyString_GET_SIZE(self);
2034 for (; p < e; p++) {
2035 if (!isalnum(*p))
2036 return PyInt_FromLong(0);
2038 return PyInt_FromLong(1);
2042 static char isdigit__doc__[] =
2043 "S.isdigit() -> int\n\
2045 Return 1 if there are only digit characters in S,\n\
2046 0 otherwise.";
2048 static PyObject*
2049 string_isdigit(PyStringObject *self, PyObject *args)
2051 register const unsigned char *p
2052 = (unsigned char *) PyString_AS_STRING(self);
2053 register const unsigned char *e;
2055 if (!PyArg_NoArgs(args))
2056 return NULL;
2058 /* Shortcut for single character strings */
2059 if (PyString_GET_SIZE(self) == 1 &&
2060 isdigit(*p))
2061 return PyInt_FromLong(1);
2063 /* Special case for empty strings */
2064 if (PyString_GET_SIZE(self) == 0)
2065 return PyInt_FromLong(0);
2067 e = p + PyString_GET_SIZE(self);
2068 for (; p < e; p++) {
2069 if (!isdigit(*p))
2070 return PyInt_FromLong(0);
2072 return PyInt_FromLong(1);
2076 static char islower__doc__[] =
2077 "S.islower() -> int\n\
2079 Return 1 if all cased characters in S are lowercase and there is\n\
2080 at least one cased character in S, 0 otherwise.";
2082 static PyObject*
2083 string_islower(PyStringObject *self, PyObject *args)
2085 register const unsigned char *p
2086 = (unsigned char *) PyString_AS_STRING(self);
2087 register const unsigned char *e;
2088 int cased;
2090 if (!PyArg_NoArgs(args))
2091 return NULL;
2093 /* Shortcut for single character strings */
2094 if (PyString_GET_SIZE(self) == 1)
2095 return PyInt_FromLong(islower(*p) != 0);
2097 /* Special case for empty strings */
2098 if (PyString_GET_SIZE(self) == 0)
2099 return PyInt_FromLong(0);
2101 e = p + PyString_GET_SIZE(self);
2102 cased = 0;
2103 for (; p < e; p++) {
2104 if (isupper(*p))
2105 return PyInt_FromLong(0);
2106 else if (!cased && islower(*p))
2107 cased = 1;
2109 return PyInt_FromLong(cased);
2113 static char isupper__doc__[] =
2114 "S.isupper() -> int\n\
2116 Return 1 if all cased characters in S are uppercase and there is\n\
2117 at least one cased character in S, 0 otherwise.";
2119 static PyObject*
2120 string_isupper(PyStringObject *self, PyObject *args)
2122 register const unsigned char *p
2123 = (unsigned char *) PyString_AS_STRING(self);
2124 register const unsigned char *e;
2125 int cased;
2127 if (!PyArg_NoArgs(args))
2128 return NULL;
2130 /* Shortcut for single character strings */
2131 if (PyString_GET_SIZE(self) == 1)
2132 return PyInt_FromLong(isupper(*p) != 0);
2134 /* Special case for empty strings */
2135 if (PyString_GET_SIZE(self) == 0)
2136 return PyInt_FromLong(0);
2138 e = p + PyString_GET_SIZE(self);
2139 cased = 0;
2140 for (; p < e; p++) {
2141 if (islower(*p))
2142 return PyInt_FromLong(0);
2143 else if (!cased && isupper(*p))
2144 cased = 1;
2146 return PyInt_FromLong(cased);
2150 static char istitle__doc__[] =
2151 "S.istitle() -> int\n\
2153 Return 1 if S is a titlecased string, i.e. uppercase characters\n\
2154 may only follow uncased characters and lowercase characters only cased\n\
2155 ones. Return 0 otherwise.";
2157 static PyObject*
2158 string_istitle(PyStringObject *self, PyObject *args)
2160 register const unsigned char *p
2161 = (unsigned char *) PyString_AS_STRING(self);
2162 register const unsigned char *e;
2163 int cased, previous_is_cased;
2165 if (!PyArg_NoArgs(args))
2166 return NULL;
2168 /* Shortcut for single character strings */
2169 if (PyString_GET_SIZE(self) == 1)
2170 return PyInt_FromLong(isupper(*p) != 0);
2172 /* Special case for empty strings */
2173 if (PyString_GET_SIZE(self) == 0)
2174 return PyInt_FromLong(0);
2176 e = p + PyString_GET_SIZE(self);
2177 cased = 0;
2178 previous_is_cased = 0;
2179 for (; p < e; p++) {
2180 register const unsigned char ch = *p;
2182 if (isupper(ch)) {
2183 if (previous_is_cased)
2184 return PyInt_FromLong(0);
2185 previous_is_cased = 1;
2186 cased = 1;
2188 else if (islower(ch)) {
2189 if (!previous_is_cased)
2190 return PyInt_FromLong(0);
2191 previous_is_cased = 1;
2192 cased = 1;
2194 else
2195 previous_is_cased = 0;
2197 return PyInt_FromLong(cased);
2201 static char splitlines__doc__[] =
2202 "S.splitlines([keepends]]) -> list of strings\n\
2204 Return a list of the lines in S, breaking at line boundaries.\n\
2205 Line breaks are not included in the resulting list unless keepends\n\
2206 is given and true.";
2208 #define SPLIT_APPEND(data, left, right) \
2209 str = PyString_FromStringAndSize(data + left, right - left); \
2210 if (!str) \
2211 goto onError; \
2212 if (PyList_Append(list, str)) { \
2213 Py_DECREF(str); \
2214 goto onError; \
2216 else \
2217 Py_DECREF(str);
2219 static PyObject*
2220 string_splitlines(PyStringObject *self, PyObject *args)
2222 register int i;
2223 register int j;
2224 int len;
2225 int keepends = 0;
2226 PyObject *list;
2227 PyObject *str;
2228 char *data;
2230 if (!PyArg_ParseTuple(args, "|i:splitlines", &keepends))
2231 return NULL;
2233 data = PyString_AS_STRING(self);
2234 len = PyString_GET_SIZE(self);
2236 list = PyList_New(0);
2237 if (!list)
2238 goto onError;
2240 for (i = j = 0; i < len; ) {
2241 int eol;
2243 /* Find a line and append it */
2244 while (i < len && data[i] != '\n' && data[i] != '\r')
2245 i++;
2247 /* Skip the line break reading CRLF as one line break */
2248 eol = i;
2249 if (i < len) {
2250 if (data[i] == '\r' && i + 1 < len &&
2251 data[i+1] == '\n')
2252 i += 2;
2253 else
2254 i++;
2255 if (keepends)
2256 eol = i;
2258 SPLIT_APPEND(data, j, eol);
2259 j = i;
2261 if (j < len) {
2262 SPLIT_APPEND(data, j, len);
2265 return list;
2267 onError:
2268 Py_DECREF(list);
2269 return NULL;
2272 #undef SPLIT_APPEND
2275 static PyMethodDef
2276 string_methods[] = {
2277 /* Counterparts of the obsolete stropmodule functions; except
2278 string.maketrans(). */
2279 {"join", (PyCFunction)string_join, 1, join__doc__},
2280 {"split", (PyCFunction)string_split, 1, split__doc__},
2281 {"lower", (PyCFunction)string_lower, 1, lower__doc__},
2282 {"upper", (PyCFunction)string_upper, 1, upper__doc__},
2283 {"islower", (PyCFunction)string_islower, 0, islower__doc__},
2284 {"isupper", (PyCFunction)string_isupper, 0, isupper__doc__},
2285 {"isspace", (PyCFunction)string_isspace, 0, isspace__doc__},
2286 {"isdigit", (PyCFunction)string_isdigit, 0, isdigit__doc__},
2287 {"istitle", (PyCFunction)string_istitle, 0, istitle__doc__},
2288 {"isalpha", (PyCFunction)string_isalpha, 0, isalpha__doc__},
2289 {"isalnum", (PyCFunction)string_isalnum, 0, isalnum__doc__},
2290 {"capitalize", (PyCFunction)string_capitalize, 1, capitalize__doc__},
2291 {"count", (PyCFunction)string_count, 1, count__doc__},
2292 {"endswith", (PyCFunction)string_endswith, 1, endswith__doc__},
2293 {"find", (PyCFunction)string_find, 1, find__doc__},
2294 {"index", (PyCFunction)string_index, 1, index__doc__},
2295 {"lstrip", (PyCFunction)string_lstrip, 1, lstrip__doc__},
2296 {"replace", (PyCFunction)string_replace, 1, replace__doc__},
2297 {"rfind", (PyCFunction)string_rfind, 1, rfind__doc__},
2298 {"rindex", (PyCFunction)string_rindex, 1, rindex__doc__},
2299 {"rstrip", (PyCFunction)string_rstrip, 1, rstrip__doc__},
2300 {"startswith", (PyCFunction)string_startswith, 1, startswith__doc__},
2301 {"strip", (PyCFunction)string_strip, 1, strip__doc__},
2302 {"swapcase", (PyCFunction)string_swapcase, 1, swapcase__doc__},
2303 {"translate", (PyCFunction)string_translate, 1, translate__doc__},
2304 {"title", (PyCFunction)string_title, 1, title__doc__},
2305 {"ljust", (PyCFunction)string_ljust, 1, ljust__doc__},
2306 {"rjust", (PyCFunction)string_rjust, 1, rjust__doc__},
2307 {"center", (PyCFunction)string_center, 1, center__doc__},
2308 {"encode", (PyCFunction)string_encode, 1, encode__doc__},
2309 {"expandtabs", (PyCFunction)string_expandtabs, 1, expandtabs__doc__},
2310 {"splitlines", (PyCFunction)string_splitlines, 1, splitlines__doc__},
2311 #if 0
2312 {"zfill", (PyCFunction)string_zfill, 1, zfill__doc__},
2313 #endif
2314 {NULL, NULL} /* sentinel */
2317 static PyObject *
2318 string_getattr(PyStringObject *s, char *name)
2320 return Py_FindMethod(string_methods, (PyObject*)s, name);
2324 PyTypeObject PyString_Type = {
2325 PyObject_HEAD_INIT(&PyType_Type)
2327 "string",
2328 sizeof(PyStringObject),
2329 sizeof(char),
2330 (destructor)string_dealloc, /*tp_dealloc*/
2331 (printfunc)string_print, /*tp_print*/
2332 (getattrfunc)string_getattr, /*tp_getattr*/
2333 0, /*tp_setattr*/
2334 (cmpfunc)string_compare, /*tp_compare*/
2335 (reprfunc)string_repr, /*tp_repr*/
2336 0, /*tp_as_number*/
2337 &string_as_sequence, /*tp_as_sequence*/
2338 0, /*tp_as_mapping*/
2339 (hashfunc)string_hash, /*tp_hash*/
2340 0, /*tp_call*/
2341 0, /*tp_str*/
2342 0, /*tp_getattro*/
2343 0, /*tp_setattro*/
2344 &string_as_buffer, /*tp_as_buffer*/
2345 Py_TPFLAGS_DEFAULT, /*tp_flags*/
2346 0, /*tp_doc*/
2349 void
2350 PyString_Concat(register PyObject **pv, register PyObject *w)
2352 register PyObject *v;
2353 if (*pv == NULL)
2354 return;
2355 if (w == NULL || !PyString_Check(*pv)) {
2356 Py_DECREF(*pv);
2357 *pv = NULL;
2358 return;
2360 v = string_concat((PyStringObject *) *pv, w);
2361 Py_DECREF(*pv);
2362 *pv = v;
2365 void
2366 PyString_ConcatAndDel(register PyObject **pv, register PyObject *w)
2368 PyString_Concat(pv, w);
2369 Py_XDECREF(w);
2373 /* The following function breaks the notion that strings are immutable:
2374 it changes the size of a string. We get away with this only if there
2375 is only one module referencing the object. You can also think of it
2376 as creating a new string object and destroying the old one, only
2377 more efficiently. In any case, don't use this if the string may
2378 already be known to some other part of the code... */
2381 _PyString_Resize(PyObject **pv, int newsize)
2383 register PyObject *v;
2384 register PyStringObject *sv;
2385 v = *pv;
2386 if (!PyString_Check(v) || v->ob_refcnt != 1) {
2387 *pv = 0;
2388 Py_DECREF(v);
2389 PyErr_BadInternalCall();
2390 return -1;
2392 /* XXX UNREF/NEWREF interface should be more symmetrical */
2393 #ifdef Py_REF_DEBUG
2394 --_Py_RefTotal;
2395 #endif
2396 _Py_ForgetReference(v);
2397 *pv = (PyObject *)
2398 PyObject_REALLOC((char *)v,
2399 sizeof(PyStringObject) + newsize * sizeof(char));
2400 if (*pv == NULL) {
2401 PyObject_DEL(v);
2402 PyErr_NoMemory();
2403 return -1;
2405 _Py_NewReference(*pv);
2406 sv = (PyStringObject *) *pv;
2407 sv->ob_size = newsize;
2408 sv->ob_sval[newsize] = '\0';
2409 return 0;
2412 /* Helpers for formatstring */
2414 static PyObject *
2415 getnextarg(PyObject *args, int arglen, int *p_argidx)
2417 int argidx = *p_argidx;
2418 if (argidx < arglen) {
2419 (*p_argidx)++;
2420 if (arglen < 0)
2421 return args;
2422 else
2423 return PyTuple_GetItem(args, argidx);
2425 PyErr_SetString(PyExc_TypeError,
2426 "not enough arguments for format string");
2427 return NULL;
2430 /* Format codes
2431 * F_LJUST '-'
2432 * F_SIGN '+'
2433 * F_BLANK ' '
2434 * F_ALT '#'
2435 * F_ZERO '0'
2437 #define F_LJUST (1<<0)
2438 #define F_SIGN (1<<1)
2439 #define F_BLANK (1<<2)
2440 #define F_ALT (1<<3)
2441 #define F_ZERO (1<<4)
2443 static int
2444 formatfloat(char *buf, size_t buflen, int flags,
2445 int prec, int type, PyObject *v)
2447 /* fmt = '%#.' + `prec` + `type`
2448 worst case length = 3 + 10 (len of INT_MAX) + 1 = 14 (use 20)*/
2449 char fmt[20];
2450 double x;
2451 if (!PyArg_Parse(v, "d;float argument required", &x))
2452 return -1;
2453 if (prec < 0)
2454 prec = 6;
2455 if (type == 'f' && fabs(x)/1e25 >= 1e25)
2456 type = 'g';
2457 sprintf(fmt, "%%%s.%d%c", (flags&F_ALT) ? "#" : "", prec, type);
2458 /* worst case length calc to ensure no buffer overrun:
2459 fmt = %#.<prec>g
2460 buf = '-' + [0-9]*prec + '.' + 'e+' + (longest exp
2461 for any double rep.)
2462 len = 1 + prec + 1 + 2 + 5 = 9 + prec
2463 If prec=0 the effective precision is 1 (the leading digit is
2464 always given), therefore increase by one to 10+prec. */
2465 if (buflen <= (size_t)10 + (size_t)prec) {
2466 PyErr_SetString(PyExc_OverflowError,
2467 "formatted float is too long (precision too long?)");
2468 return -1;
2470 sprintf(buf, fmt, x);
2471 return strlen(buf);
2474 /* _PyString_FormatLong emulates the format codes d, u, o, x and X, and
2475 * the F_ALT flag, for Python's long (unbounded) ints. It's not used for
2476 * Python's regular ints.
2477 * Return value: a new PyString*, or NULL if error.
2478 * . *pbuf is set to point into it,
2479 * *plen set to the # of chars following that.
2480 * Caller must decref it when done using pbuf.
2481 * The string starting at *pbuf is of the form
2482 * "-"? ("0x" | "0X")? digit+
2483 * "0x"/"0X" are present only for x and X conversions, with F_ALT
2484 * set in flags. The case of hex digits will be correct,
2485 * There will be at least prec digits, zero-filled on the left if
2486 * necessary to get that many.
2487 * val object to be converted
2488 * flags bitmask of format flags; only F_ALT is looked at
2489 * prec minimum number of digits; 0-fill on left if needed
2490 * type a character in [duoxX]; u acts the same as d
2492 * CAUTION: o, x and X conversions on regular ints can never
2493 * produce a '-' sign, but can for Python's unbounded ints.
2495 PyObject*
2496 _PyString_FormatLong(PyObject *val, int flags, int prec, int type,
2497 char **pbuf, int *plen)
2499 PyObject *result = NULL;
2500 char *buf;
2501 int i;
2502 int sign; /* 1 if '-', else 0 */
2503 int len; /* number of characters */
2504 int numdigits; /* len == numnondigits + numdigits */
2505 int numnondigits = 0;
2507 switch (type) {
2508 case 'd':
2509 case 'u':
2510 result = val->ob_type->tp_str(val);
2511 break;
2512 case 'o':
2513 result = val->ob_type->tp_as_number->nb_oct(val);
2514 break;
2515 case 'x':
2516 case 'X':
2517 numnondigits = 2;
2518 result = val->ob_type->tp_as_number->nb_hex(val);
2519 break;
2520 default:
2521 assert(!"'type' not in [duoxX]");
2523 if (!result)
2524 return NULL;
2526 /* To modify the string in-place, there can only be one reference. */
2527 if (result->ob_refcnt != 1) {
2528 PyErr_BadInternalCall();
2529 return NULL;
2531 buf = PyString_AsString(result);
2532 len = PyString_Size(result);
2533 if (buf[len-1] == 'L') {
2534 --len;
2535 buf[len] = '\0';
2537 sign = buf[0] == '-';
2538 numnondigits += sign;
2539 numdigits = len - numnondigits;
2540 assert(numdigits > 0);
2542 /* Get rid of base marker unless F_ALT */
2543 if ((flags & F_ALT) == 0) {
2544 /* Need to skip 0x, 0X or 0. */
2545 int skipped = 0;
2546 switch (type) {
2547 case 'o':
2548 assert(buf[sign] == '0');
2549 /* If 0 is only digit, leave it alone. */
2550 if (numdigits > 1) {
2551 skipped = 1;
2552 --numdigits;
2554 break;
2555 case 'x':
2556 case 'X':
2557 assert(buf[sign] == '0');
2558 assert(buf[sign + 1] == 'x');
2559 skipped = 2;
2560 numnondigits -= 2;
2561 break;
2563 if (skipped) {
2564 buf += skipped;
2565 len -= skipped;
2566 if (sign)
2567 buf[0] = '-';
2569 assert(len == numnondigits + numdigits);
2570 assert(numdigits > 0);
2573 /* Fill with leading zeroes to meet minimum width. */
2574 if (prec > numdigits) {
2575 PyObject *r1 = PyString_FromStringAndSize(NULL,
2576 numnondigits + prec);
2577 char *b1;
2578 if (!r1) {
2579 Py_DECREF(result);
2580 return NULL;
2582 b1 = PyString_AS_STRING(r1);
2583 for (i = 0; i < numnondigits; ++i)
2584 *b1++ = *buf++;
2585 for (i = 0; i < prec - numdigits; i++)
2586 *b1++ = '0';
2587 for (i = 0; i < numdigits; i++)
2588 *b1++ = *buf++;
2589 *b1 = '\0';
2590 Py_DECREF(result);
2591 result = r1;
2592 buf = PyString_AS_STRING(result);
2593 len = numnondigits + prec;
2596 /* Fix up case for hex conversions. */
2597 switch (type) {
2598 case 'x':
2599 /* Need to convert all upper case letters to lower case. */
2600 for (i = 0; i < len; i++)
2601 if (buf[i] >= 'A' && buf[i] <= 'F')
2602 buf[i] += 'a'-'A';
2603 break;
2604 case 'X':
2605 /* Need to convert 0x to 0X (and -0x to -0X). */
2606 if (buf[sign + 1] == 'x')
2607 buf[sign + 1] = 'X';
2608 break;
2610 *pbuf = buf;
2611 *plen = len;
2612 return result;
2615 static int
2616 formatint(char *buf, size_t buflen, int flags,
2617 int prec, int type, PyObject *v)
2619 /* fmt = '%#.' + `prec` + 'l' + `type`
2620 worst case length = 3 + 19 (worst len of INT_MAX on 64-bit machine)
2621 + 1 + 1 = 24 */
2622 char fmt[64]; /* plenty big enough! */
2623 long x;
2624 if (!PyArg_Parse(v, "l;int argument required", &x))
2625 return -1;
2626 if (prec < 0)
2627 prec = 1;
2628 sprintf(fmt, "%%%s.%dl%c", (flags&F_ALT) ? "#" : "", prec, type);
2629 /* buf = '+'/'-'/'0'/'0x' + '[0-9]'*max(prec, len(x in octal))
2630 worst case buf = '0x' + [0-9]*prec, where prec >= 11 */
2631 if (buflen <= 13 || buflen <= (size_t)2 + (size_t)prec) {
2632 PyErr_SetString(PyExc_OverflowError,
2633 "formatted integer is too long (precision too long?)");
2634 return -1;
2636 sprintf(buf, fmt, x);
2637 return strlen(buf);
2640 static int
2641 formatchar(char *buf, size_t buflen, PyObject *v)
2643 /* presume that the buffer is at least 2 characters long */
2644 if (PyString_Check(v)) {
2645 if (!PyArg_Parse(v, "c;%c requires int or char", &buf[0]))
2646 return -1;
2648 else {
2649 if (!PyArg_Parse(v, "b;%c requires int or char", &buf[0]))
2650 return -1;
2652 buf[1] = '\0';
2653 return 1;
2657 /* fmt%(v1,v2,...) is roughly equivalent to sprintf(fmt, v1, v2, ...)
2659 FORMATBUFLEN is the length of the buffer in which the floats, ints, &
2660 chars are formatted. XXX This is a magic number. Each formatting
2661 routine does bounds checking to ensure no overflow, but a better
2662 solution may be to malloc a buffer of appropriate size for each
2663 format. For now, the current solution is sufficient.
2665 #define FORMATBUFLEN (size_t)120
2667 PyObject *
2668 PyString_Format(PyObject *format, PyObject *args)
2670 char *fmt, *res;
2671 int fmtcnt, rescnt, reslen, arglen, argidx;
2672 int args_owned = 0;
2673 PyObject *result, *orig_args;
2674 PyObject *dict = NULL;
2675 if (format == NULL || !PyString_Check(format) || args == NULL) {
2676 PyErr_BadInternalCall();
2677 return NULL;
2679 orig_args = args;
2680 fmt = PyString_AsString(format);
2681 fmtcnt = PyString_Size(format);
2682 reslen = rescnt = fmtcnt + 100;
2683 result = PyString_FromStringAndSize((char *)NULL, reslen);
2684 if (result == NULL)
2685 return NULL;
2686 res = PyString_AsString(result);
2687 if (PyTuple_Check(args)) {
2688 arglen = PyTuple_Size(args);
2689 argidx = 0;
2691 else {
2692 arglen = -1;
2693 argidx = -2;
2695 if (args->ob_type->tp_as_mapping)
2696 dict = args;
2697 while (--fmtcnt >= 0) {
2698 if (*fmt != '%') {
2699 if (--rescnt < 0) {
2700 rescnt = fmtcnt + 100;
2701 reslen += rescnt;
2702 if (_PyString_Resize(&result, reslen) < 0)
2703 return NULL;
2704 res = PyString_AsString(result)
2705 + reslen - rescnt;
2706 --rescnt;
2708 *res++ = *fmt++;
2710 else {
2711 /* Got a format specifier */
2712 int flags = 0;
2713 int width = -1;
2714 int prec = -1;
2715 int size = 0;
2716 int c = '\0';
2717 int fill;
2718 PyObject *v = NULL;
2719 PyObject *temp = NULL;
2720 char *pbuf;
2721 int sign;
2722 int len;
2723 char formatbuf[FORMATBUFLEN]; /* For format{float,int,char}() */
2724 char *fmt_start = fmt;
2726 fmt++;
2727 if (*fmt == '(') {
2728 char *keystart;
2729 int keylen;
2730 PyObject *key;
2731 int pcount = 1;
2733 if (dict == NULL) {
2734 PyErr_SetString(PyExc_TypeError,
2735 "format requires a mapping");
2736 goto error;
2738 ++fmt;
2739 --fmtcnt;
2740 keystart = fmt;
2741 /* Skip over balanced parentheses */
2742 while (pcount > 0 && --fmtcnt >= 0) {
2743 if (*fmt == ')')
2744 --pcount;
2745 else if (*fmt == '(')
2746 ++pcount;
2747 fmt++;
2749 keylen = fmt - keystart - 1;
2750 if (fmtcnt < 0 || pcount > 0) {
2751 PyErr_SetString(PyExc_ValueError,
2752 "incomplete format key");
2753 goto error;
2755 key = PyString_FromStringAndSize(keystart,
2756 keylen);
2757 if (key == NULL)
2758 goto error;
2759 if (args_owned) {
2760 Py_DECREF(args);
2761 args_owned = 0;
2763 args = PyObject_GetItem(dict, key);
2764 Py_DECREF(key);
2765 if (args == NULL) {
2766 goto error;
2768 args_owned = 1;
2769 arglen = -1;
2770 argidx = -2;
2772 while (--fmtcnt >= 0) {
2773 switch (c = *fmt++) {
2774 case '-': flags |= F_LJUST; continue;
2775 case '+': flags |= F_SIGN; continue;
2776 case ' ': flags |= F_BLANK; continue;
2777 case '#': flags |= F_ALT; continue;
2778 case '0': flags |= F_ZERO; continue;
2780 break;
2782 if (c == '*') {
2783 v = getnextarg(args, arglen, &argidx);
2784 if (v == NULL)
2785 goto error;
2786 if (!PyInt_Check(v)) {
2787 PyErr_SetString(PyExc_TypeError,
2788 "* wants int");
2789 goto error;
2791 width = PyInt_AsLong(v);
2792 if (width < 0) {
2793 flags |= F_LJUST;
2794 width = -width;
2796 if (--fmtcnt >= 0)
2797 c = *fmt++;
2799 else if (c >= 0 && isdigit(c)) {
2800 width = c - '0';
2801 while (--fmtcnt >= 0) {
2802 c = Py_CHARMASK(*fmt++);
2803 if (!isdigit(c))
2804 break;
2805 if ((width*10) / 10 != width) {
2806 PyErr_SetString(
2807 PyExc_ValueError,
2808 "width too big");
2809 goto error;
2811 width = width*10 + (c - '0');
2814 if (c == '.') {
2815 prec = 0;
2816 if (--fmtcnt >= 0)
2817 c = *fmt++;
2818 if (c == '*') {
2819 v = getnextarg(args, arglen, &argidx);
2820 if (v == NULL)
2821 goto error;
2822 if (!PyInt_Check(v)) {
2823 PyErr_SetString(
2824 PyExc_TypeError,
2825 "* wants int");
2826 goto error;
2828 prec = PyInt_AsLong(v);
2829 if (prec < 0)
2830 prec = 0;
2831 if (--fmtcnt >= 0)
2832 c = *fmt++;
2834 else if (c >= 0 && isdigit(c)) {
2835 prec = c - '0';
2836 while (--fmtcnt >= 0) {
2837 c = Py_CHARMASK(*fmt++);
2838 if (!isdigit(c))
2839 break;
2840 if ((prec*10) / 10 != prec) {
2841 PyErr_SetString(
2842 PyExc_ValueError,
2843 "prec too big");
2844 goto error;
2846 prec = prec*10 + (c - '0');
2849 } /* prec */
2850 if (fmtcnt >= 0) {
2851 if (c == 'h' || c == 'l' || c == 'L') {
2852 size = c;
2853 if (--fmtcnt >= 0)
2854 c = *fmt++;
2857 if (fmtcnt < 0) {
2858 PyErr_SetString(PyExc_ValueError,
2859 "incomplete format");
2860 goto error;
2862 if (c != '%') {
2863 v = getnextarg(args, arglen, &argidx);
2864 if (v == NULL)
2865 goto error;
2867 sign = 0;
2868 fill = ' ';
2869 switch (c) {
2870 case '%':
2871 pbuf = "%";
2872 len = 1;
2873 break;
2874 case 's':
2875 case 'r':
2876 if (PyUnicode_Check(v)) {
2877 fmt = fmt_start;
2878 goto unicode;
2880 if (c == 's')
2881 temp = PyObject_Str(v);
2882 else
2883 temp = PyObject_Repr(v);
2884 if (temp == NULL)
2885 goto error;
2886 if (!PyString_Check(temp)) {
2887 PyErr_SetString(PyExc_TypeError,
2888 "%s argument has non-string str()");
2889 goto error;
2891 pbuf = PyString_AsString(temp);
2892 len = PyString_Size(temp);
2893 if (prec >= 0 && len > prec)
2894 len = prec;
2895 break;
2896 case 'i':
2897 case 'd':
2898 case 'u':
2899 case 'o':
2900 case 'x':
2901 case 'X':
2902 if (c == 'i')
2903 c = 'd';
2904 if (PyLong_Check(v) && PyLong_AsLong(v) == -1
2905 && PyErr_Occurred()) {
2906 /* Too big for a C long. */
2907 PyErr_Clear();
2908 temp = _PyString_FormatLong(v, flags,
2909 prec, c, &pbuf, &len);
2910 if (!temp)
2911 goto error;
2912 /* unbounded ints can always produce
2913 a sign character! */
2914 sign = 1;
2916 else {
2917 pbuf = formatbuf;
2918 len = formatint(pbuf, sizeof(formatbuf),
2919 flags, prec, c, v);
2920 if (len < 0)
2921 goto error;
2922 /* only d conversion is signed */
2923 sign = c == 'd';
2925 if (flags & F_ZERO)
2926 fill = '0';
2927 break;
2928 case 'e':
2929 case 'E':
2930 case 'f':
2931 case 'g':
2932 case 'G':
2933 pbuf = formatbuf;
2934 len = formatfloat(pbuf, sizeof(formatbuf), flags, prec, c, v);
2935 if (len < 0)
2936 goto error;
2937 sign = 1;
2938 if (flags & F_ZERO)
2939 fill = '0';
2940 break;
2941 case 'c':
2942 pbuf = formatbuf;
2943 len = formatchar(pbuf, sizeof(formatbuf), v);
2944 if (len < 0)
2945 goto error;
2946 break;
2947 default:
2948 PyErr_Format(PyExc_ValueError,
2949 "unsupported format character '%c' (0x%x)",
2950 c, c);
2951 goto error;
2953 if (sign) {
2954 if (*pbuf == '-' || *pbuf == '+') {
2955 sign = *pbuf++;
2956 len--;
2958 else if (flags & F_SIGN)
2959 sign = '+';
2960 else if (flags & F_BLANK)
2961 sign = ' ';
2962 else
2963 sign = 0;
2965 if (width < len)
2966 width = len;
2967 if (rescnt < width + (sign != 0)) {
2968 reslen -= rescnt;
2969 rescnt = width + fmtcnt + 100;
2970 reslen += rescnt;
2971 if (_PyString_Resize(&result, reslen) < 0)
2972 return NULL;
2973 res = PyString_AsString(result)
2974 + reslen - rescnt;
2976 if (sign) {
2977 if (fill != ' ')
2978 *res++ = sign;
2979 rescnt--;
2980 if (width > len)
2981 width--;
2983 if ((flags & F_ALT) && (c == 'x' || c == 'X')) {
2984 assert(pbuf[0] == '0');
2985 assert(pbuf[1] == c);
2986 if (fill != ' ') {
2987 *res++ = *pbuf++;
2988 *res++ = *pbuf++;
2990 rescnt -= 2;
2991 width -= 2;
2992 if (width < 0)
2993 width = 0;
2994 len -= 2;
2996 if (width > len && !(flags & F_LJUST)) {
2997 do {
2998 --rescnt;
2999 *res++ = fill;
3000 } while (--width > len);
3002 if (fill == ' ') {
3003 if (sign)
3004 *res++ = sign;
3005 if ((flags & F_ALT) &&
3006 (c == 'x' || c == 'X')) {
3007 assert(pbuf[0] == '0');
3008 assert(pbuf[1] == c);
3009 *res++ = *pbuf++;
3010 *res++ = *pbuf++;
3013 memcpy(res, pbuf, len);
3014 res += len;
3015 rescnt -= len;
3016 while (--width >= len) {
3017 --rescnt;
3018 *res++ = ' ';
3020 if (dict && (argidx < arglen) && c != '%') {
3021 PyErr_SetString(PyExc_TypeError,
3022 "not all arguments converted");
3023 goto error;
3025 Py_XDECREF(temp);
3026 } /* '%' */
3027 } /* until end */
3028 if (argidx < arglen && !dict) {
3029 PyErr_SetString(PyExc_TypeError,
3030 "not all arguments converted");
3031 goto error;
3033 if (args_owned) {
3034 Py_DECREF(args);
3036 _PyString_Resize(&result, reslen - rescnt);
3037 return result;
3039 unicode:
3040 if (args_owned) {
3041 Py_DECREF(args);
3042 args_owned = 0;
3044 /* Fiddle args right (remove the first argidx-1 arguments) */
3045 --argidx;
3046 if (PyTuple_Check(orig_args) && argidx > 0) {
3047 PyObject *v;
3048 int n = PyTuple_GET_SIZE(orig_args) - argidx;
3049 v = PyTuple_New(n);
3050 if (v == NULL)
3051 goto error;
3052 while (--n >= 0) {
3053 PyObject *w = PyTuple_GET_ITEM(orig_args, n + argidx);
3054 Py_INCREF(w);
3055 PyTuple_SET_ITEM(v, n, w);
3057 args = v;
3058 } else {
3059 Py_INCREF(orig_args);
3060 args = orig_args;
3062 /* Paste rest of format string to what we have of the result
3063 string; we reuse result for this */
3064 rescnt = res - PyString_AS_STRING(result);
3065 fmtcnt = PyString_GET_SIZE(format) - \
3066 (fmt - PyString_AS_STRING(format));
3067 if (_PyString_Resize(&result, rescnt + fmtcnt)) {
3068 Py_DECREF(args);
3069 goto error;
3071 memcpy(PyString_AS_STRING(result) + rescnt, fmt, fmtcnt);
3072 format = result;
3073 /* Let Unicode do its magic */
3074 result = PyUnicode_Format(format, args);
3075 Py_DECREF(format);
3076 Py_DECREF(args);
3077 return result;
3079 error:
3080 Py_DECREF(result);
3081 if (args_owned) {
3082 Py_DECREF(args);
3084 return NULL;
3088 #ifdef INTERN_STRINGS
3090 /* This dictionary will leak at PyString_Fini() time. That's acceptable
3091 * because PyString_Fini() specifically frees interned strings that are
3092 * only referenced by this dictionary. The CVS log entry for revision 2.45
3093 * says:
3095 * Change the Fini function to only remove otherwise unreferenced
3096 * strings from the interned table. There are references in
3097 * hard-to-find static variables all over the interpreter, and it's not
3098 * worth trying to get rid of all those; but "uninterning" isn't fair
3099 * either and may cause subtle failures later -- so we have to keep them
3100 * in the interned table.
3102 static PyObject *interned;
3104 void
3105 PyString_InternInPlace(PyObject **p)
3107 register PyStringObject *s = (PyStringObject *)(*p);
3108 PyObject *t;
3109 if (s == NULL || !PyString_Check(s))
3110 Py_FatalError("PyString_InternInPlace: strings only please!");
3111 if ((t = s->ob_sinterned) != NULL) {
3112 if (t == (PyObject *)s)
3113 return;
3114 Py_INCREF(t);
3115 *p = t;
3116 Py_DECREF(s);
3117 return;
3119 if (interned == NULL) {
3120 interned = PyDict_New();
3121 if (interned == NULL)
3122 return;
3124 if ((t = PyDict_GetItem(interned, (PyObject *)s)) != NULL) {
3125 Py_INCREF(t);
3126 *p = s->ob_sinterned = t;
3127 Py_DECREF(s);
3128 return;
3130 t = (PyObject *)s;
3131 if (PyDict_SetItem(interned, t, t) == 0) {
3132 s->ob_sinterned = t;
3133 return;
3135 PyErr_Clear();
3139 PyObject *
3140 PyString_InternFromString(const char *cp)
3142 PyObject *s = PyString_FromString(cp);
3143 if (s == NULL)
3144 return NULL;
3145 PyString_InternInPlace(&s);
3146 return s;
3149 #endif
3151 void
3152 PyString_Fini(void)
3154 int i;
3155 for (i = 0; i < UCHAR_MAX + 1; i++) {
3156 Py_XDECREF(characters[i]);
3157 characters[i] = NULL;
3159 #ifndef DONT_SHARE_SHORT_STRINGS
3160 Py_XDECREF(nullstring);
3161 nullstring = NULL;
3162 #endif
3163 #ifdef INTERN_STRINGS
3164 if (interned) {
3165 int pos, changed;
3166 PyObject *key, *value;
3167 do {
3168 changed = 0;
3169 pos = 0;
3170 while (PyDict_Next(interned, &pos, &key, &value)) {
3171 if (key->ob_refcnt == 2 && key == value) {
3172 PyDict_DelItem(interned, key);
3173 changed = 1;
3176 } while (changed);
3178 #endif