This commit was manufactured by cvs2svn to create tag 'r201'.
[python/dscho.git] / Objects / stringobject.c
blob0c7a9999e548d78212cfa73d62a47e0f8f1ac2ba
2 /* String object implementation */
4 #include "Python.h"
6 #include <ctype.h>
8 #ifdef COUNT_ALLOCS
9 int null_strings, one_strings;
10 #endif
12 #if !defined(HAVE_LIMITS_H) && !defined(UCHAR_MAX)
13 #define UCHAR_MAX 255
14 #endif
16 static PyStringObject *characters[UCHAR_MAX + 1];
17 #ifndef DONT_SHARE_SHORT_STRINGS
18 static PyStringObject *nullstring;
19 #endif
22 Newsizedstringobject() and newstringobject() try in certain cases
23 to share string objects. When the size of the string is zero,
24 these routines always return a pointer to the same string object;
25 when the size is one, they return a pointer to an already existing
26 object if the contents of the string is known. For
27 newstringobject() this is always the case, for
28 newsizedstringobject() this is the case when the first argument in
29 not NULL.
30 A common practice to allocate a string and then fill it in or
31 change it must be done carefully. It is only allowed to change the
32 contents of the string if the obect was gotten from
33 newsizedstringobject() with a NULL first argument, because in the
34 future these routines may try to do even more sharing of objects.
36 PyObject *
37 PyString_FromStringAndSize(const char *str, int size)
39 register PyStringObject *op;
40 #ifndef DONT_SHARE_SHORT_STRINGS
41 if (size == 0 && (op = nullstring) != NULL) {
42 #ifdef COUNT_ALLOCS
43 null_strings++;
44 #endif
45 Py_INCREF(op);
46 return (PyObject *)op;
48 if (size == 1 && str != NULL &&
49 (op = characters[*str & UCHAR_MAX]) != NULL)
51 #ifdef COUNT_ALLOCS
52 one_strings++;
53 #endif
54 Py_INCREF(op);
55 return (PyObject *)op;
57 #endif /* DONT_SHARE_SHORT_STRINGS */
59 /* PyObject_NewVar is inlined */
60 op = (PyStringObject *)
61 PyObject_MALLOC(sizeof(PyStringObject) + size * sizeof(char));
62 if (op == NULL)
63 return PyErr_NoMemory();
64 PyObject_INIT_VAR(op, &PyString_Type, size);
65 #ifdef CACHE_HASH
66 op->ob_shash = -1;
67 #endif
68 #ifdef INTERN_STRINGS
69 op->ob_sinterned = NULL;
70 #endif
71 if (str != NULL)
72 memcpy(op->ob_sval, str, size);
73 op->ob_sval[size] = '\0';
74 #ifndef DONT_SHARE_SHORT_STRINGS
75 if (size == 0) {
76 nullstring = op;
77 Py_INCREF(op);
78 } else if (size == 1 && str != NULL) {
79 characters[*str & UCHAR_MAX] = op;
80 Py_INCREF(op);
82 #endif
83 return (PyObject *) op;
86 PyObject *
87 PyString_FromString(const char *str)
89 register size_t size = strlen(str);
90 register PyStringObject *op;
91 if (size > INT_MAX) {
92 PyErr_SetString(PyExc_OverflowError,
93 "string is too long for a Python string");
94 return NULL;
96 #ifndef DONT_SHARE_SHORT_STRINGS
97 if (size == 0 && (op = nullstring) != NULL) {
98 #ifdef COUNT_ALLOCS
99 null_strings++;
100 #endif
101 Py_INCREF(op);
102 return (PyObject *)op;
104 if (size == 1 && (op = characters[*str & UCHAR_MAX]) != NULL) {
105 #ifdef COUNT_ALLOCS
106 one_strings++;
107 #endif
108 Py_INCREF(op);
109 return (PyObject *)op;
111 #endif /* DONT_SHARE_SHORT_STRINGS */
113 /* PyObject_NewVar is inlined */
114 op = (PyStringObject *)
115 PyObject_MALLOC(sizeof(PyStringObject) + size * sizeof(char));
116 if (op == NULL)
117 return PyErr_NoMemory();
118 PyObject_INIT_VAR(op, &PyString_Type, size);
119 #ifdef CACHE_HASH
120 op->ob_shash = -1;
121 #endif
122 #ifdef INTERN_STRINGS
123 op->ob_sinterned = NULL;
124 #endif
125 strcpy(op->ob_sval, str);
126 #ifndef DONT_SHARE_SHORT_STRINGS
127 if (size == 0) {
128 nullstring = op;
129 Py_INCREF(op);
130 } else if (size == 1) {
131 characters[*str & UCHAR_MAX] = op;
132 Py_INCREF(op);
134 #endif
135 return (PyObject *) op;
138 PyObject *PyString_Decode(const char *s,
139 int size,
140 const char *encoding,
141 const char *errors)
143 PyObject *buffer = NULL, *str;
145 if (encoding == NULL)
146 encoding = PyUnicode_GetDefaultEncoding();
148 /* Decode via the codec registry */
149 buffer = PyBuffer_FromMemory((void *)s, size);
150 if (buffer == NULL)
151 goto onError;
152 str = PyCodec_Decode(buffer, encoding, errors);
153 if (str == NULL)
154 goto onError;
155 /* Convert Unicode to a string using the default encoding */
156 if (PyUnicode_Check(str)) {
157 PyObject *temp = str;
158 str = PyUnicode_AsEncodedString(str, NULL, NULL);
159 Py_DECREF(temp);
160 if (str == NULL)
161 goto onError;
163 if (!PyString_Check(str)) {
164 PyErr_Format(PyExc_TypeError,
165 "decoder did not return a string object (type=%.400s)",
166 str->ob_type->tp_name);
167 Py_DECREF(str);
168 goto onError;
170 Py_DECREF(buffer);
171 return str;
173 onError:
174 Py_XDECREF(buffer);
175 return NULL;
178 PyObject *PyString_Encode(const char *s,
179 int size,
180 const char *encoding,
181 const char *errors)
183 PyObject *v, *str;
185 str = PyString_FromStringAndSize(s, size);
186 if (str == NULL)
187 return NULL;
188 v = PyString_AsEncodedString(str, encoding, errors);
189 Py_DECREF(str);
190 return v;
193 PyObject *PyString_AsEncodedString(PyObject *str,
194 const char *encoding,
195 const char *errors)
197 PyObject *v;
199 if (!PyString_Check(str)) {
200 PyErr_BadArgument();
201 goto onError;
204 if (encoding == NULL)
205 encoding = PyUnicode_GetDefaultEncoding();
207 /* Encode via the codec registry */
208 v = PyCodec_Encode(str, encoding, errors);
209 if (v == NULL)
210 goto onError;
211 /* Convert Unicode to a string using the default encoding */
212 if (PyUnicode_Check(v)) {
213 PyObject *temp = v;
214 v = PyUnicode_AsEncodedString(v, NULL, NULL);
215 Py_DECREF(temp);
216 if (v == NULL)
217 goto onError;
219 if (!PyString_Check(v)) {
220 PyErr_Format(PyExc_TypeError,
221 "encoder did not return a string object (type=%.400s)",
222 v->ob_type->tp_name);
223 Py_DECREF(v);
224 goto onError;
226 return v;
228 onError:
229 return NULL;
232 static void
233 string_dealloc(PyObject *op)
235 PyObject_DEL(op);
238 static int
239 string_getsize(register PyObject *op)
241 char *s;
242 int len;
243 if (PyString_AsStringAndSize(op, &s, &len))
244 return -1;
245 return len;
248 static /*const*/ char *
249 string_getbuffer(register PyObject *op)
251 char *s;
252 int len;
253 if (PyString_AsStringAndSize(op, &s, &len))
254 return NULL;
255 return s;
259 PyString_Size(register PyObject *op)
261 if (!PyString_Check(op))
262 return string_getsize(op);
263 return ((PyStringObject *)op) -> ob_size;
266 /*const*/ char *
267 PyString_AsString(register PyObject *op)
269 if (!PyString_Check(op))
270 return string_getbuffer(op);
271 return ((PyStringObject *)op) -> ob_sval;
274 /* Internal API needed by PyString_AsStringAndSize(): */
275 extern
276 PyObject *_PyUnicode_AsDefaultEncodedString(PyObject *unicode,
277 const char *errors);
280 PyString_AsStringAndSize(register PyObject *obj,
281 register char **s,
282 register int *len)
284 if (s == NULL) {
285 PyErr_BadInternalCall();
286 return -1;
289 if (!PyString_Check(obj)) {
290 if (PyUnicode_Check(obj)) {
291 obj = _PyUnicode_AsDefaultEncodedString(obj, NULL);
292 if (obj == NULL)
293 return -1;
295 else {
296 PyErr_Format(PyExc_TypeError,
297 "expected string or Unicode object, "
298 "%.200s found", obj->ob_type->tp_name);
299 return -1;
303 *s = PyString_AS_STRING(obj);
304 if (len != NULL)
305 *len = PyString_GET_SIZE(obj);
306 else if ((int)strlen(*s) != PyString_GET_SIZE(obj)) {
307 PyErr_SetString(PyExc_TypeError,
308 "expected string without null bytes");
309 return -1;
311 return 0;
314 /* Methods */
316 static int
317 string_print(PyStringObject *op, FILE *fp, int flags)
319 int i;
320 char c;
321 int quote;
322 /* XXX Ought to check for interrupts when writing long strings */
323 if (flags & Py_PRINT_RAW) {
324 fwrite(op->ob_sval, 1, (int) op->ob_size, fp);
325 return 0;
328 /* figure out which quote to use; single is preferred */
329 quote = '\'';
330 if (strchr(op->ob_sval, '\'') && !strchr(op->ob_sval, '"'))
331 quote = '"';
333 fputc(quote, fp);
334 for (i = 0; i < op->ob_size; i++) {
335 c = op->ob_sval[i];
336 if (c == quote || c == '\\')
337 fprintf(fp, "\\%c", c);
338 else if (c < ' ' || c >= 0177)
339 fprintf(fp, "\\%03o", c & 0377);
340 else
341 fputc(c, fp);
343 fputc(quote, fp);
344 return 0;
347 static PyObject *
348 string_repr(register PyStringObject *op)
350 size_t newsize = 2 + 4 * op->ob_size * sizeof(char);
351 PyObject *v;
352 if (newsize > INT_MAX) {
353 PyErr_SetString(PyExc_OverflowError,
354 "string is too large to make repr");
356 v = PyString_FromStringAndSize((char *)NULL, newsize);
357 if (v == NULL) {
358 return NULL;
360 else {
361 register int i;
362 register char c;
363 register char *p;
364 int quote;
366 /* figure out which quote to use; single is preferred */
367 quote = '\'';
368 if (strchr(op->ob_sval, '\'') && !strchr(op->ob_sval, '"'))
369 quote = '"';
371 p = ((PyStringObject *)v)->ob_sval;
372 *p++ = quote;
373 for (i = 0; i < op->ob_size; i++) {
374 c = op->ob_sval[i];
375 if (c == quote || c == '\\')
376 *p++ = '\\', *p++ = c;
377 else if (c < ' ' || c >= 0177) {
378 sprintf(p, "\\%03o", c & 0377);
379 while (*p != '\0')
380 p++;
382 else
383 *p++ = c;
385 *p++ = quote;
386 *p = '\0';
387 _PyString_Resize(
388 &v, (int) (p - ((PyStringObject *)v)->ob_sval));
389 return v;
393 static int
394 string_length(PyStringObject *a)
396 return a->ob_size;
399 static PyObject *
400 string_concat(register PyStringObject *a, register PyObject *bb)
402 register unsigned int size;
403 register PyStringObject *op;
404 if (!PyString_Check(bb)) {
405 if (PyUnicode_Check(bb))
406 return PyUnicode_Concat((PyObject *)a, bb);
407 PyErr_Format(PyExc_TypeError,
408 "cannot add type \"%.200s\" to string",
409 bb->ob_type->tp_name);
410 return NULL;
412 #define b ((PyStringObject *)bb)
413 /* Optimize cases with empty left or right operand */
414 if (a->ob_size == 0) {
415 Py_INCREF(bb);
416 return bb;
418 if (b->ob_size == 0) {
419 Py_INCREF(a);
420 return (PyObject *)a;
422 size = a->ob_size + b->ob_size;
423 /* PyObject_NewVar is inlined */
424 op = (PyStringObject *)
425 PyObject_MALLOC(sizeof(PyStringObject) + size * sizeof(char));
426 if (op == NULL)
427 return PyErr_NoMemory();
428 PyObject_INIT_VAR(op, &PyString_Type, size);
429 #ifdef CACHE_HASH
430 op->ob_shash = -1;
431 #endif
432 #ifdef INTERN_STRINGS
433 op->ob_sinterned = NULL;
434 #endif
435 memcpy(op->ob_sval, a->ob_sval, (int) a->ob_size);
436 memcpy(op->ob_sval + a->ob_size, b->ob_sval, (int) b->ob_size);
437 op->ob_sval[size] = '\0';
438 return (PyObject *) op;
439 #undef b
442 static PyObject *
443 string_repeat(register PyStringObject *a, register int n)
445 register int i;
446 register int size;
447 register PyStringObject *op;
448 size_t nbytes;
449 if (n < 0)
450 n = 0;
451 /* watch out for overflows: the size can overflow int,
452 * and the # of bytes needed can overflow size_t
454 size = a->ob_size * n;
455 if (n && size / n != a->ob_size) {
456 PyErr_SetString(PyExc_OverflowError,
457 "repeated string is too long");
458 return NULL;
460 if (size == a->ob_size) {
461 Py_INCREF(a);
462 return (PyObject *)a;
464 nbytes = size * sizeof(char);
465 if (nbytes / sizeof(char) != (size_t)size ||
466 nbytes + sizeof(PyStringObject) <= nbytes) {
467 PyErr_SetString(PyExc_OverflowError,
468 "repeated string is too long");
469 return NULL;
471 op = (PyStringObject *)
472 PyObject_MALLOC(sizeof(PyStringObject) + nbytes);
473 if (op == NULL)
474 return PyErr_NoMemory();
475 PyObject_INIT_VAR(op, &PyString_Type, size);
476 #ifdef CACHE_HASH
477 op->ob_shash = -1;
478 #endif
479 #ifdef INTERN_STRINGS
480 op->ob_sinterned = NULL;
481 #endif
482 for (i = 0; i < size; i += a->ob_size)
483 memcpy(op->ob_sval+i, a->ob_sval, (int) a->ob_size);
484 op->ob_sval[size] = '\0';
485 return (PyObject *) op;
488 /* String slice a[i:j] consists of characters a[i] ... a[j-1] */
490 static PyObject *
491 string_slice(register PyStringObject *a, register int i, register int j)
492 /* j -- may be negative! */
494 if (i < 0)
495 i = 0;
496 if (j < 0)
497 j = 0; /* Avoid signed/unsigned bug in next line */
498 if (j > a->ob_size)
499 j = a->ob_size;
500 if (i == 0 && j == a->ob_size) { /* It's the same as a */
501 Py_INCREF(a);
502 return (PyObject *)a;
504 if (j < i)
505 j = i;
506 return PyString_FromStringAndSize(a->ob_sval + i, (int) (j-i));
509 static int
510 string_contains(PyObject *a, PyObject *el)
512 register char *s, *end;
513 register char c;
514 if (PyUnicode_Check(el))
515 return PyUnicode_Contains(a, el);
516 if (!PyString_Check(el) || PyString_Size(el) != 1) {
517 PyErr_SetString(PyExc_TypeError,
518 "'in <string>' requires character as left operand");
519 return -1;
521 c = PyString_AsString(el)[0];
522 s = PyString_AsString(a);
523 end = s + PyString_Size(a);
524 while (s < end) {
525 if (c == *s++)
526 return 1;
528 return 0;
531 static PyObject *
532 string_item(PyStringObject *a, register int i)
534 int c;
535 PyObject *v;
536 if (i < 0 || i >= a->ob_size) {
537 PyErr_SetString(PyExc_IndexError, "string index out of range");
538 return NULL;
540 c = a->ob_sval[i] & UCHAR_MAX;
541 v = (PyObject *) characters[c];
542 #ifdef COUNT_ALLOCS
543 if (v != NULL)
544 one_strings++;
545 #endif
546 if (v == NULL) {
547 v = PyString_FromStringAndSize((char *)NULL, 1);
548 if (v == NULL)
549 return NULL;
550 characters[c] = (PyStringObject *) v;
551 ((PyStringObject *)v)->ob_sval[0] = c;
553 Py_INCREF(v);
554 return v;
557 static int
558 string_compare(PyStringObject *a, PyStringObject *b)
560 int len_a = a->ob_size, len_b = b->ob_size;
561 int min_len = (len_a < len_b) ? len_a : len_b;
562 int cmp;
563 if (min_len > 0) {
564 cmp = Py_CHARMASK(*a->ob_sval) - Py_CHARMASK(*b->ob_sval);
565 if (cmp == 0)
566 cmp = memcmp(a->ob_sval, b->ob_sval, min_len);
567 if (cmp != 0)
568 return cmp;
570 return (len_a < len_b) ? -1 : (len_a > len_b) ? 1 : 0;
573 static long
574 string_hash(PyStringObject *a)
576 register int len;
577 register unsigned char *p;
578 register long x;
580 #ifdef CACHE_HASH
581 if (a->ob_shash != -1)
582 return a->ob_shash;
583 #ifdef INTERN_STRINGS
584 if (a->ob_sinterned != NULL)
585 return (a->ob_shash =
586 ((PyStringObject *)(a->ob_sinterned))->ob_shash);
587 #endif
588 #endif
589 len = a->ob_size;
590 p = (unsigned char *) a->ob_sval;
591 x = *p << 7;
592 while (--len >= 0)
593 x = (1000003*x) ^ *p++;
594 x ^= a->ob_size;
595 if (x == -1)
596 x = -2;
597 #ifdef CACHE_HASH
598 a->ob_shash = x;
599 #endif
600 return x;
603 static int
604 string_buffer_getreadbuf(PyStringObject *self, int index, const void **ptr)
606 if ( index != 0 ) {
607 PyErr_SetString(PyExc_SystemError,
608 "accessing non-existent string segment");
609 return -1;
611 *ptr = (void *)self->ob_sval;
612 return self->ob_size;
615 static int
616 string_buffer_getwritebuf(PyStringObject *self, int index, const void **ptr)
618 PyErr_SetString(PyExc_TypeError,
619 "Cannot use string as modifiable buffer");
620 return -1;
623 static int
624 string_buffer_getsegcount(PyStringObject *self, int *lenp)
626 if ( lenp )
627 *lenp = self->ob_size;
628 return 1;
631 static int
632 string_buffer_getcharbuf(PyStringObject *self, int index, const char **ptr)
634 if ( index != 0 ) {
635 PyErr_SetString(PyExc_SystemError,
636 "accessing non-existent string segment");
637 return -1;
639 *ptr = self->ob_sval;
640 return self->ob_size;
643 static PySequenceMethods string_as_sequence = {
644 (inquiry)string_length, /*sq_length*/
645 (binaryfunc)string_concat, /*sq_concat*/
646 (intargfunc)string_repeat, /*sq_repeat*/
647 (intargfunc)string_item, /*sq_item*/
648 (intintargfunc)string_slice, /*sq_slice*/
649 0, /*sq_ass_item*/
650 0, /*sq_ass_slice*/
651 (objobjproc)string_contains /*sq_contains*/
654 static PyBufferProcs string_as_buffer = {
655 (getreadbufferproc)string_buffer_getreadbuf,
656 (getwritebufferproc)string_buffer_getwritebuf,
657 (getsegcountproc)string_buffer_getsegcount,
658 (getcharbufferproc)string_buffer_getcharbuf,
663 #define LEFTSTRIP 0
664 #define RIGHTSTRIP 1
665 #define BOTHSTRIP 2
668 static PyObject *
669 split_whitespace(const char *s, int len, int maxsplit)
671 int i, j, err;
672 PyObject* item;
673 PyObject *list = PyList_New(0);
675 if (list == NULL)
676 return NULL;
678 for (i = j = 0; i < len; ) {
679 while (i < len && isspace(Py_CHARMASK(s[i])))
680 i++;
681 j = i;
682 while (i < len && !isspace(Py_CHARMASK(s[i])))
683 i++;
684 if (j < i) {
685 if (maxsplit-- <= 0)
686 break;
687 item = PyString_FromStringAndSize(s+j, (int)(i-j));
688 if (item == NULL)
689 goto finally;
690 err = PyList_Append(list, item);
691 Py_DECREF(item);
692 if (err < 0)
693 goto finally;
694 while (i < len && isspace(Py_CHARMASK(s[i])))
695 i++;
696 j = i;
699 if (j < len) {
700 item = PyString_FromStringAndSize(s+j, (int)(len - j));
701 if (item == NULL)
702 goto finally;
703 err = PyList_Append(list, item);
704 Py_DECREF(item);
705 if (err < 0)
706 goto finally;
708 return list;
709 finally:
710 Py_DECREF(list);
711 return NULL;
715 static char split__doc__[] =
716 "S.split([sep [,maxsplit]]) -> list of strings\n\
718 Return a list of the words in the string S, using sep as the\n\
719 delimiter string. If maxsplit is given, at most maxsplit\n\
720 splits are done. If sep is not specified, any whitespace string\n\
721 is a separator.";
723 static PyObject *
724 string_split(PyStringObject *self, PyObject *args)
726 int len = PyString_GET_SIZE(self), n, i, j, err;
727 int maxsplit = -1;
728 const char *s = PyString_AS_STRING(self), *sub;
729 PyObject *list, *item, *subobj = Py_None;
731 if (!PyArg_ParseTuple(args, "|Oi:split", &subobj, &maxsplit))
732 return NULL;
733 if (maxsplit < 0)
734 maxsplit = INT_MAX;
735 if (subobj == Py_None)
736 return split_whitespace(s, len, maxsplit);
737 if (PyString_Check(subobj)) {
738 sub = PyString_AS_STRING(subobj);
739 n = PyString_GET_SIZE(subobj);
741 else if (PyUnicode_Check(subobj))
742 return PyUnicode_Split((PyObject *)self, subobj, maxsplit);
743 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
744 return NULL;
745 if (n == 0) {
746 PyErr_SetString(PyExc_ValueError, "empty separator");
747 return NULL;
750 list = PyList_New(0);
751 if (list == NULL)
752 return NULL;
754 i = j = 0;
755 while (i+n <= len) {
756 if (s[i] == sub[0] && memcmp(s+i, sub, n) == 0) {
757 if (maxsplit-- <= 0)
758 break;
759 item = PyString_FromStringAndSize(s+j, (int)(i-j));
760 if (item == NULL)
761 goto fail;
762 err = PyList_Append(list, item);
763 Py_DECREF(item);
764 if (err < 0)
765 goto fail;
766 i = j = i + n;
768 else
769 i++;
771 item = PyString_FromStringAndSize(s+j, (int)(len-j));
772 if (item == NULL)
773 goto fail;
774 err = PyList_Append(list, item);
775 Py_DECREF(item);
776 if (err < 0)
777 goto fail;
779 return list;
781 fail:
782 Py_DECREF(list);
783 return NULL;
787 static char join__doc__[] =
788 "S.join(sequence) -> string\n\
790 Return a string which is the concatenation of the strings in the\n\
791 sequence. The separator between elements is S.";
793 static PyObject *
794 string_join(PyStringObject *self, PyObject *args)
796 char *sep = PyString_AS_STRING(self);
797 int seplen = PyString_GET_SIZE(self);
798 PyObject *res = NULL;
799 int reslen = 0;
800 char *p;
801 int seqlen = 0;
802 int sz = 100;
803 int i, slen, sz_incr;
804 PyObject *orig, *seq, *item;
806 if (!PyArg_ParseTuple(args, "O:join", &orig))
807 return NULL;
809 if (!(seq = PySequence_Fast(orig, ""))) {
810 if (PyErr_ExceptionMatches(PyExc_TypeError))
811 PyErr_Format(PyExc_TypeError,
812 "sequence expected, %.80s found",
813 orig->ob_type->tp_name);
814 return NULL;
816 /* From here on out, errors go through finally: for proper
817 * reference count manipulations.
819 seqlen = PySequence_Size(seq);
820 if (seqlen == 1) {
821 item = PySequence_Fast_GET_ITEM(seq, 0);
822 Py_INCREF(item);
823 Py_DECREF(seq);
824 return item;
827 if (!(res = PyString_FromStringAndSize((char*)NULL, sz)))
828 goto finally;
830 p = PyString_AS_STRING(res);
832 for (i = 0; i < seqlen; i++) {
833 item = PySequence_Fast_GET_ITEM(seq, i);
834 if (!PyString_Check(item)){
835 if (PyUnicode_Check(item)) {
836 Py_DECREF(res);
837 Py_DECREF(seq);
838 return PyUnicode_Join((PyObject *)self, orig);
840 PyErr_Format(PyExc_TypeError,
841 "sequence item %i: expected string,"
842 " %.80s found",
843 i, item->ob_type->tp_name);
844 goto finally;
846 slen = PyString_GET_SIZE(item);
847 while (reslen + slen + seplen >= sz) {
848 /* at least double the size of the string */
849 sz_incr = slen + seplen > sz ? slen + seplen : sz;
850 if (_PyString_Resize(&res, sz + sz_incr)) {
851 goto finally;
853 sz += sz_incr;
854 p = PyString_AS_STRING(res) + reslen;
856 if (i > 0) {
857 memcpy(p, sep, seplen);
858 p += seplen;
859 reslen += seplen;
861 memcpy(p, PyString_AS_STRING(item), slen);
862 p += slen;
863 reslen += slen;
865 if (_PyString_Resize(&res, reslen))
866 goto finally;
867 Py_DECREF(seq);
868 return res;
870 finally:
871 Py_DECREF(seq);
872 Py_XDECREF(res);
873 return NULL;
878 static long
879 string_find_internal(PyStringObject *self, PyObject *args, int dir)
881 const char *s = PyString_AS_STRING(self), *sub;
882 int len = PyString_GET_SIZE(self);
883 int n, i = 0, last = INT_MAX;
884 PyObject *subobj;
886 if (!PyArg_ParseTuple(args, "O|O&O&:find/rfind/index/rindex",
887 &subobj, _PyEval_SliceIndex, &i, _PyEval_SliceIndex, &last))
888 return -2;
889 if (PyString_Check(subobj)) {
890 sub = PyString_AS_STRING(subobj);
891 n = PyString_GET_SIZE(subobj);
893 else if (PyUnicode_Check(subobj))
894 return PyUnicode_Find((PyObject *)self, subobj, i, last, 1);
895 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
896 return -2;
898 if (last > len)
899 last = len;
900 if (last < 0)
901 last += len;
902 if (last < 0)
903 last = 0;
904 if (i < 0)
905 i += len;
906 if (i < 0)
907 i = 0;
909 if (dir > 0) {
910 if (n == 0 && i <= last)
911 return (long)i;
912 last -= n;
913 for (; i <= last; ++i)
914 if (s[i] == sub[0] && memcmp(&s[i], sub, n) == 0)
915 return (long)i;
917 else {
918 int j;
920 if (n == 0 && i <= last)
921 return (long)last;
922 for (j = last-n; j >= i; --j)
923 if (s[j] == sub[0] && memcmp(&s[j], sub, n) == 0)
924 return (long)j;
927 return -1;
931 static char find__doc__[] =
932 "S.find(sub [,start [,end]]) -> int\n\
934 Return the lowest index in S where substring sub is found,\n\
935 such that sub is contained within s[start,end]. Optional\n\
936 arguments start and end are interpreted as in slice notation.\n\
938 Return -1 on failure.";
940 static PyObject *
941 string_find(PyStringObject *self, PyObject *args)
943 long result = string_find_internal(self, args, +1);
944 if (result == -2)
945 return NULL;
946 return PyInt_FromLong(result);
950 static char index__doc__[] =
951 "S.index(sub [,start [,end]]) -> int\n\
953 Like S.find() but raise ValueError when the substring is not found.";
955 static PyObject *
956 string_index(PyStringObject *self, PyObject *args)
958 long result = string_find_internal(self, args, +1);
959 if (result == -2)
960 return NULL;
961 if (result == -1) {
962 PyErr_SetString(PyExc_ValueError,
963 "substring not found in string.index");
964 return NULL;
966 return PyInt_FromLong(result);
970 static char rfind__doc__[] =
971 "S.rfind(sub [,start [,end]]) -> int\n\
973 Return the highest index in S where substring sub is found,\n\
974 such that sub is contained within s[start,end]. Optional\n\
975 arguments start and end are interpreted as in slice notation.\n\
977 Return -1 on failure.";
979 static PyObject *
980 string_rfind(PyStringObject *self, PyObject *args)
982 long result = string_find_internal(self, args, -1);
983 if (result == -2)
984 return NULL;
985 return PyInt_FromLong(result);
989 static char rindex__doc__[] =
990 "S.rindex(sub [,start [,end]]) -> int\n\
992 Like S.rfind() but raise ValueError when the substring is not found.";
994 static PyObject *
995 string_rindex(PyStringObject *self, PyObject *args)
997 long result = string_find_internal(self, args, -1);
998 if (result == -2)
999 return NULL;
1000 if (result == -1) {
1001 PyErr_SetString(PyExc_ValueError,
1002 "substring not found in string.rindex");
1003 return NULL;
1005 return PyInt_FromLong(result);
1009 static PyObject *
1010 do_strip(PyStringObject *self, PyObject *args, int striptype)
1012 char *s = PyString_AS_STRING(self);
1013 int len = PyString_GET_SIZE(self), i, j;
1015 if (!PyArg_ParseTuple(args, ":strip"))
1016 return NULL;
1018 i = 0;
1019 if (striptype != RIGHTSTRIP) {
1020 while (i < len && isspace(Py_CHARMASK(s[i]))) {
1021 i++;
1025 j = len;
1026 if (striptype != LEFTSTRIP) {
1027 do {
1028 j--;
1029 } while (j >= i && isspace(Py_CHARMASK(s[j])));
1030 j++;
1033 if (i == 0 && j == len) {
1034 Py_INCREF(self);
1035 return (PyObject*)self;
1037 else
1038 return PyString_FromStringAndSize(s+i, j-i);
1042 static char strip__doc__[] =
1043 "S.strip() -> string\n\
1045 Return a copy of the string S with leading and trailing\n\
1046 whitespace removed.";
1048 static PyObject *
1049 string_strip(PyStringObject *self, PyObject *args)
1051 return do_strip(self, args, BOTHSTRIP);
1055 static char lstrip__doc__[] =
1056 "S.lstrip() -> string\n\
1058 Return a copy of the string S with leading whitespace removed.";
1060 static PyObject *
1061 string_lstrip(PyStringObject *self, PyObject *args)
1063 return do_strip(self, args, LEFTSTRIP);
1067 static char rstrip__doc__[] =
1068 "S.rstrip() -> string\n\
1070 Return a copy of the string S with trailing whitespace removed.";
1072 static PyObject *
1073 string_rstrip(PyStringObject *self, PyObject *args)
1075 return do_strip(self, args, RIGHTSTRIP);
1079 static char lower__doc__[] =
1080 "S.lower() -> string\n\
1082 Return a copy of the string S converted to lowercase.";
1084 static PyObject *
1085 string_lower(PyStringObject *self, PyObject *args)
1087 char *s = PyString_AS_STRING(self), *s_new;
1088 int i, n = PyString_GET_SIZE(self);
1089 PyObject *new;
1091 if (!PyArg_ParseTuple(args, ":lower"))
1092 return NULL;
1093 new = PyString_FromStringAndSize(NULL, n);
1094 if (new == NULL)
1095 return NULL;
1096 s_new = PyString_AsString(new);
1097 for (i = 0; i < n; i++) {
1098 int c = Py_CHARMASK(*s++);
1099 if (isupper(c)) {
1100 *s_new = tolower(c);
1101 } else
1102 *s_new = c;
1103 s_new++;
1105 return new;
1109 static char upper__doc__[] =
1110 "S.upper() -> string\n\
1112 Return a copy of the string S converted to uppercase.";
1114 static PyObject *
1115 string_upper(PyStringObject *self, PyObject *args)
1117 char *s = PyString_AS_STRING(self), *s_new;
1118 int i, n = PyString_GET_SIZE(self);
1119 PyObject *new;
1121 if (!PyArg_ParseTuple(args, ":upper"))
1122 return NULL;
1123 new = PyString_FromStringAndSize(NULL, n);
1124 if (new == NULL)
1125 return NULL;
1126 s_new = PyString_AsString(new);
1127 for (i = 0; i < n; i++) {
1128 int c = Py_CHARMASK(*s++);
1129 if (islower(c)) {
1130 *s_new = toupper(c);
1131 } else
1132 *s_new = c;
1133 s_new++;
1135 return new;
1139 static char title__doc__[] =
1140 "S.title() -> string\n\
1142 Return a titlecased version of S, i.e. words start with uppercase\n\
1143 characters, all remaining cased characters have lowercase.";
1145 static PyObject*
1146 string_title(PyUnicodeObject *self, PyObject *args)
1148 char *s = PyString_AS_STRING(self), *s_new;
1149 int i, n = PyString_GET_SIZE(self);
1150 int previous_is_cased = 0;
1151 PyObject *new;
1153 if (!PyArg_ParseTuple(args, ":title"))
1154 return NULL;
1155 new = PyString_FromStringAndSize(NULL, n);
1156 if (new == NULL)
1157 return NULL;
1158 s_new = PyString_AsString(new);
1159 for (i = 0; i < n; i++) {
1160 int c = Py_CHARMASK(*s++);
1161 if (islower(c)) {
1162 if (!previous_is_cased)
1163 c = toupper(c);
1164 previous_is_cased = 1;
1165 } else if (isupper(c)) {
1166 if (previous_is_cased)
1167 c = tolower(c);
1168 previous_is_cased = 1;
1169 } else
1170 previous_is_cased = 0;
1171 *s_new++ = c;
1173 return new;
1176 static char capitalize__doc__[] =
1177 "S.capitalize() -> string\n\
1179 Return a copy of the string S with only its first character\n\
1180 capitalized.";
1182 static PyObject *
1183 string_capitalize(PyStringObject *self, PyObject *args)
1185 char *s = PyString_AS_STRING(self), *s_new;
1186 int i, n = PyString_GET_SIZE(self);
1187 PyObject *new;
1189 if (!PyArg_ParseTuple(args, ":capitalize"))
1190 return NULL;
1191 new = PyString_FromStringAndSize(NULL, n);
1192 if (new == NULL)
1193 return NULL;
1194 s_new = PyString_AsString(new);
1195 if (0 < n) {
1196 int c = Py_CHARMASK(*s++);
1197 if (islower(c))
1198 *s_new = toupper(c);
1199 else
1200 *s_new = c;
1201 s_new++;
1203 for (i = 1; i < n; i++) {
1204 int c = Py_CHARMASK(*s++);
1205 if (isupper(c))
1206 *s_new = tolower(c);
1207 else
1208 *s_new = c;
1209 s_new++;
1211 return new;
1215 static char count__doc__[] =
1216 "S.count(sub[, start[, end]]) -> int\n\
1218 Return the number of occurrences of substring sub in string\n\
1219 S[start:end]. Optional arguments start and end are\n\
1220 interpreted as in slice notation.";
1222 static PyObject *
1223 string_count(PyStringObject *self, PyObject *args)
1225 const char *s = PyString_AS_STRING(self), *sub;
1226 int len = PyString_GET_SIZE(self), n;
1227 int i = 0, last = INT_MAX;
1228 int m, r;
1229 PyObject *subobj;
1231 if (!PyArg_ParseTuple(args, "O|O&O&:count", &subobj,
1232 _PyEval_SliceIndex, &i, _PyEval_SliceIndex, &last))
1233 return NULL;
1235 if (PyString_Check(subobj)) {
1236 sub = PyString_AS_STRING(subobj);
1237 n = PyString_GET_SIZE(subobj);
1239 else if (PyUnicode_Check(subobj)) {
1240 int count;
1241 count = PyUnicode_Count((PyObject *)self, subobj, i, last);
1242 if (count == -1)
1243 return NULL;
1244 else
1245 return PyInt_FromLong((long) count);
1247 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
1248 return NULL;
1250 if (last > len)
1251 last = len;
1252 if (last < 0)
1253 last += len;
1254 if (last < 0)
1255 last = 0;
1256 if (i < 0)
1257 i += len;
1258 if (i < 0)
1259 i = 0;
1260 m = last + 1 - n;
1261 if (n == 0)
1262 return PyInt_FromLong((long) (m-i));
1264 r = 0;
1265 while (i < m) {
1266 if (!memcmp(s+i, sub, n)) {
1267 r++;
1268 i += n;
1269 } else {
1270 i++;
1273 return PyInt_FromLong((long) r);
1277 static char swapcase__doc__[] =
1278 "S.swapcase() -> string\n\
1280 Return a copy of the string S with uppercase characters\n\
1281 converted to lowercase and vice versa.";
1283 static PyObject *
1284 string_swapcase(PyStringObject *self, PyObject *args)
1286 char *s = PyString_AS_STRING(self), *s_new;
1287 int i, n = PyString_GET_SIZE(self);
1288 PyObject *new;
1290 if (!PyArg_ParseTuple(args, ":swapcase"))
1291 return NULL;
1292 new = PyString_FromStringAndSize(NULL, n);
1293 if (new == NULL)
1294 return NULL;
1295 s_new = PyString_AsString(new);
1296 for (i = 0; i < n; i++) {
1297 int c = Py_CHARMASK(*s++);
1298 if (islower(c)) {
1299 *s_new = toupper(c);
1301 else if (isupper(c)) {
1302 *s_new = tolower(c);
1304 else
1305 *s_new = c;
1306 s_new++;
1308 return new;
1312 static char translate__doc__[] =
1313 "S.translate(table [,deletechars]) -> string\n\
1315 Return a copy of the string S, where all characters occurring\n\
1316 in the optional argument deletechars are removed, and the\n\
1317 remaining characters have been mapped through the given\n\
1318 translation table, which must be a string of length 256.";
1320 static PyObject *
1321 string_translate(PyStringObject *self, PyObject *args)
1323 register char *input, *output;
1324 register const char *table;
1325 register int i, c, changed = 0;
1326 PyObject *input_obj = (PyObject*)self;
1327 const char *table1, *output_start, *del_table=NULL;
1328 int inlen, tablen, dellen = 0;
1329 PyObject *result;
1330 int trans_table[256];
1331 PyObject *tableobj, *delobj = NULL;
1333 if (!PyArg_ParseTuple(args, "O|O:translate",
1334 &tableobj, &delobj))
1335 return NULL;
1337 if (PyString_Check(tableobj)) {
1338 table1 = PyString_AS_STRING(tableobj);
1339 tablen = PyString_GET_SIZE(tableobj);
1341 else if (PyUnicode_Check(tableobj)) {
1342 /* Unicode .translate() does not support the deletechars
1343 parameter; instead a mapping to None will cause characters
1344 to be deleted. */
1345 if (delobj != NULL) {
1346 PyErr_SetString(PyExc_TypeError,
1347 "deletions are implemented differently for unicode");
1348 return NULL;
1350 return PyUnicode_Translate((PyObject *)self, tableobj, NULL);
1352 else if (PyObject_AsCharBuffer(tableobj, &table1, &tablen))
1353 return NULL;
1355 if (delobj != NULL) {
1356 if (PyString_Check(delobj)) {
1357 del_table = PyString_AS_STRING(delobj);
1358 dellen = PyString_GET_SIZE(delobj);
1360 else if (PyUnicode_Check(delobj)) {
1361 PyErr_SetString(PyExc_TypeError,
1362 "deletions are implemented differently for unicode");
1363 return NULL;
1365 else if (PyObject_AsCharBuffer(delobj, &del_table, &dellen))
1366 return NULL;
1368 if (tablen != 256) {
1369 PyErr_SetString(PyExc_ValueError,
1370 "translation table must be 256 characters long");
1371 return NULL;
1374 else {
1375 del_table = NULL;
1376 dellen = 0;
1379 table = table1;
1380 inlen = PyString_Size(input_obj);
1381 result = PyString_FromStringAndSize((char *)NULL, inlen);
1382 if (result == NULL)
1383 return NULL;
1384 output_start = output = PyString_AsString(result);
1385 input = PyString_AsString(input_obj);
1387 if (dellen == 0) {
1388 /* If no deletions are required, use faster code */
1389 for (i = inlen; --i >= 0; ) {
1390 c = Py_CHARMASK(*input++);
1391 if (Py_CHARMASK((*output++ = table[c])) != c)
1392 changed = 1;
1394 if (changed)
1395 return result;
1396 Py_DECREF(result);
1397 Py_INCREF(input_obj);
1398 return input_obj;
1401 for (i = 0; i < 256; i++)
1402 trans_table[i] = Py_CHARMASK(table[i]);
1404 for (i = 0; i < dellen; i++)
1405 trans_table[(int) Py_CHARMASK(del_table[i])] = -1;
1407 for (i = inlen; --i >= 0; ) {
1408 c = Py_CHARMASK(*input++);
1409 if (trans_table[c] != -1)
1410 if (Py_CHARMASK(*output++ = (char)trans_table[c]) == c)
1411 continue;
1412 changed = 1;
1414 if (!changed) {
1415 Py_DECREF(result);
1416 Py_INCREF(input_obj);
1417 return input_obj;
1419 /* Fix the size of the resulting string */
1420 if (inlen > 0 &&_PyString_Resize(&result, output-output_start))
1421 return NULL;
1422 return result;
1426 /* What follows is used for implementing replace(). Perry Stoll. */
1429 mymemfind
1431 strstr replacement for arbitrary blocks of memory.
1433 Locates the first occurrence in the memory pointed to by MEM of the
1434 contents of memory pointed to by PAT. Returns the index into MEM if
1435 found, or -1 if not found. If len of PAT is greater than length of
1436 MEM, the function returns -1.
1438 static int
1439 mymemfind(const char *mem, int len, const char *pat, int pat_len)
1441 register int ii;
1443 /* pattern can not occur in the last pat_len-1 chars */
1444 len -= pat_len;
1446 for (ii = 0; ii <= len; ii++) {
1447 if (mem[ii] == pat[0] && memcmp(&mem[ii], pat, pat_len) == 0) {
1448 return ii;
1451 return -1;
1455 mymemcnt
1457 Return the number of distinct times PAT is found in MEM.
1458 meaning mem=1111 and pat==11 returns 2.
1459 mem=11111 and pat==11 also return 2.
1461 static int
1462 mymemcnt(const char *mem, int len, const char *pat, int pat_len)
1464 register int offset = 0;
1465 int nfound = 0;
1467 while (len >= 0) {
1468 offset = mymemfind(mem, len, pat, pat_len);
1469 if (offset == -1)
1470 break;
1471 mem += offset + pat_len;
1472 len -= offset + pat_len;
1473 nfound++;
1475 return nfound;
1479 mymemreplace
1481 Return a string in which all occurrences of PAT in memory STR are
1482 replaced with SUB.
1484 If length of PAT is less than length of STR or there are no occurrences
1485 of PAT in STR, then the original string is returned. Otherwise, a new
1486 string is allocated here and returned.
1488 on return, out_len is:
1489 the length of output string, or
1490 -1 if the input string is returned, or
1491 unchanged if an error occurs (no memory).
1493 return value is:
1494 the new string allocated locally, or
1495 NULL if an error occurred.
1497 static char *
1498 mymemreplace(const char *str, int len, /* input string */
1499 const char *pat, int pat_len, /* pattern string to find */
1500 const char *sub, int sub_len, /* substitution string */
1501 int count, /* number of replacements */
1502 int *out_len)
1504 char *out_s;
1505 char *new_s;
1506 int nfound, offset, new_len;
1508 if (len == 0 || pat_len > len)
1509 goto return_same;
1511 /* find length of output string */
1512 nfound = mymemcnt(str, len, pat, pat_len);
1513 if (count < 0)
1514 count = INT_MAX;
1515 else if (nfound > count)
1516 nfound = count;
1517 if (nfound == 0)
1518 goto return_same;
1519 new_len = len + nfound*(sub_len - pat_len);
1521 new_s = (char *)PyMem_MALLOC(new_len);
1522 if (new_s == NULL) return NULL;
1524 *out_len = new_len;
1525 out_s = new_s;
1527 while (len > 0) {
1528 /* find index of next instance of pattern */
1529 offset = mymemfind(str, len, pat, pat_len);
1530 /* if not found, break out of loop */
1531 if (offset == -1) break;
1533 /* copy non matching part of input string */
1534 memcpy(new_s, str, offset); /* copy part of str before pat */
1535 str += offset + pat_len; /* move str past pattern */
1536 len -= offset + pat_len; /* reduce length of str remaining */
1538 /* copy substitute into the output string */
1539 new_s += offset; /* move new_s to dest for sub string */
1540 memcpy(new_s, sub, sub_len); /* copy substring into new_s */
1541 new_s += sub_len; /* offset new_s past sub string */
1543 /* break when we've done count replacements */
1544 if (--count == 0) break;
1546 /* copy any remaining values into output string */
1547 if (len > 0)
1548 memcpy(new_s, str, len);
1549 return out_s;
1551 return_same:
1552 *out_len = -1;
1553 return (char*)str; /* have to cast away constness here */
1557 static char replace__doc__[] =
1558 "S.replace (old, new[, maxsplit]) -> string\n\
1560 Return a copy of string S with all occurrences of substring\n\
1561 old replaced by new. If the optional argument maxsplit is\n\
1562 given, only the first maxsplit occurrences are replaced.";
1564 static PyObject *
1565 string_replace(PyStringObject *self, PyObject *args)
1567 const char *str = PyString_AS_STRING(self), *sub, *repl;
1568 char *new_s;
1569 int len = PyString_GET_SIZE(self), sub_len, repl_len, out_len;
1570 int count = -1;
1571 PyObject *new;
1572 PyObject *subobj, *replobj;
1574 if (!PyArg_ParseTuple(args, "OO|i:replace",
1575 &subobj, &replobj, &count))
1576 return NULL;
1578 if (PyString_Check(subobj)) {
1579 sub = PyString_AS_STRING(subobj);
1580 sub_len = PyString_GET_SIZE(subobj);
1582 else if (PyUnicode_Check(subobj))
1583 return PyUnicode_Replace((PyObject *)self,
1584 subobj, replobj, count);
1585 else if (PyObject_AsCharBuffer(subobj, &sub, &sub_len))
1586 return NULL;
1588 if (PyString_Check(replobj)) {
1589 repl = PyString_AS_STRING(replobj);
1590 repl_len = PyString_GET_SIZE(replobj);
1592 else if (PyUnicode_Check(replobj))
1593 return PyUnicode_Replace((PyObject *)self,
1594 subobj, replobj, count);
1595 else if (PyObject_AsCharBuffer(replobj, &repl, &repl_len))
1596 return NULL;
1598 if (sub_len <= 0) {
1599 PyErr_SetString(PyExc_ValueError, "empty pattern string");
1600 return NULL;
1602 new_s = mymemreplace(str,len,sub,sub_len,repl,repl_len,count,&out_len);
1603 if (new_s == NULL) {
1604 PyErr_NoMemory();
1605 return NULL;
1607 if (out_len == -1) {
1608 /* we're returning another reference to self */
1609 new = (PyObject*)self;
1610 Py_INCREF(new);
1612 else {
1613 new = PyString_FromStringAndSize(new_s, out_len);
1614 PyMem_FREE(new_s);
1616 return new;
1620 static char startswith__doc__[] =
1621 "S.startswith(prefix[, start[, end]]) -> int\n\
1623 Return 1 if S starts with the specified prefix, otherwise return 0. With\n\
1624 optional start, test S beginning at that position. With optional end, stop\n\
1625 comparing S at that position.";
1627 static PyObject *
1628 string_startswith(PyStringObject *self, PyObject *args)
1630 const char* str = PyString_AS_STRING(self);
1631 int len = PyString_GET_SIZE(self);
1632 const char* prefix;
1633 int plen;
1634 int start = 0;
1635 int end = -1;
1636 PyObject *subobj;
1638 if (!PyArg_ParseTuple(args, "O|O&O&:startswith", &subobj,
1639 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
1640 return NULL;
1641 if (PyString_Check(subobj)) {
1642 prefix = PyString_AS_STRING(subobj);
1643 plen = PyString_GET_SIZE(subobj);
1645 else if (PyUnicode_Check(subobj)) {
1646 int rc;
1647 rc = PyUnicode_Tailmatch((PyObject *)self,
1648 subobj, start, end, -1);
1649 if (rc == -1)
1650 return NULL;
1651 else
1652 return PyInt_FromLong((long) rc);
1654 else if (PyObject_AsCharBuffer(subobj, &prefix, &plen))
1655 return NULL;
1657 /* adopt Java semantics for index out of range. it is legal for
1658 * offset to be == plen, but this only returns true if prefix is
1659 * the empty string.
1661 if (start < 0 || start+plen > len)
1662 return PyInt_FromLong(0);
1664 if (!memcmp(str+start, prefix, plen)) {
1665 /* did the match end after the specified end? */
1666 if (end < 0)
1667 return PyInt_FromLong(1);
1668 else if (end - start < plen)
1669 return PyInt_FromLong(0);
1670 else
1671 return PyInt_FromLong(1);
1673 else return PyInt_FromLong(0);
1677 static char endswith__doc__[] =
1678 "S.endswith(suffix[, start[, end]]) -> int\n\
1680 Return 1 if S ends with the specified suffix, otherwise return 0. With\n\
1681 optional start, test S beginning at that position. With optional end, stop\n\
1682 comparing S at that position.";
1684 static PyObject *
1685 string_endswith(PyStringObject *self, PyObject *args)
1687 const char* str = PyString_AS_STRING(self);
1688 int len = PyString_GET_SIZE(self);
1689 const char* suffix;
1690 int slen;
1691 int start = 0;
1692 int end = -1;
1693 int lower, upper;
1694 PyObject *subobj;
1696 if (!PyArg_ParseTuple(args, "O|O&O&:endswith", &subobj,
1697 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
1698 return NULL;
1699 if (PyString_Check(subobj)) {
1700 suffix = PyString_AS_STRING(subobj);
1701 slen = PyString_GET_SIZE(subobj);
1703 else if (PyUnicode_Check(subobj)) {
1704 int rc;
1705 rc = PyUnicode_Tailmatch((PyObject *)self,
1706 subobj, start, end, +1);
1707 if (rc == -1)
1708 return NULL;
1709 else
1710 return PyInt_FromLong((long) rc);
1712 else if (PyObject_AsCharBuffer(subobj, &suffix, &slen))
1713 return NULL;
1715 if (start < 0 || start > len || slen > len)
1716 return PyInt_FromLong(0);
1718 upper = (end >= 0 && end <= len) ? end : len;
1719 lower = (upper - slen) > start ? (upper - slen) : start;
1721 if (upper-lower >= slen && !memcmp(str+lower, suffix, slen))
1722 return PyInt_FromLong(1);
1723 else return PyInt_FromLong(0);
1727 static char encode__doc__[] =
1728 "S.encode([encoding[,errors]]) -> string\n\
1730 Return an encoded string version of S. Default encoding is the current\n\
1731 default string encoding. errors may be given to set a different error\n\
1732 handling scheme. Default is 'strict' meaning that encoding errors raise\n\
1733 a ValueError. Other possible values are 'ignore' and 'replace'.";
1735 static PyObject *
1736 string_encode(PyStringObject *self, PyObject *args)
1738 char *encoding = NULL;
1739 char *errors = NULL;
1740 if (!PyArg_ParseTuple(args, "|ss:encode", &encoding, &errors))
1741 return NULL;
1742 return PyString_AsEncodedString((PyObject *)self, encoding, errors);
1746 static char expandtabs__doc__[] =
1747 "S.expandtabs([tabsize]) -> string\n\
1749 Return a copy of S where all tab characters are expanded using spaces.\n\
1750 If tabsize is not given, a tab size of 8 characters is assumed.";
1752 static PyObject*
1753 string_expandtabs(PyStringObject *self, PyObject *args)
1755 const char *e, *p;
1756 char *q;
1757 int i, j;
1758 PyObject *u;
1759 int tabsize = 8;
1761 if (!PyArg_ParseTuple(args, "|i:expandtabs", &tabsize))
1762 return NULL;
1764 /* First pass: determine size of output string */
1765 i = j = 0;
1766 e = PyString_AS_STRING(self) + PyString_GET_SIZE(self);
1767 for (p = PyString_AS_STRING(self); p < e; p++)
1768 if (*p == '\t') {
1769 if (tabsize > 0)
1770 j += tabsize - (j % tabsize);
1772 else {
1773 j++;
1774 if (*p == '\n' || *p == '\r') {
1775 i += j;
1776 j = 0;
1780 /* Second pass: create output string and fill it */
1781 u = PyString_FromStringAndSize(NULL, i + j);
1782 if (!u)
1783 return NULL;
1785 j = 0;
1786 q = PyString_AS_STRING(u);
1788 for (p = PyString_AS_STRING(self); p < e; p++)
1789 if (*p == '\t') {
1790 if (tabsize > 0) {
1791 i = tabsize - (j % tabsize);
1792 j += i;
1793 while (i--)
1794 *q++ = ' ';
1797 else {
1798 j++;
1799 *q++ = *p;
1800 if (*p == '\n' || *p == '\r')
1801 j = 0;
1804 return u;
1807 static
1808 PyObject *pad(PyStringObject *self,
1809 int left,
1810 int right,
1811 char fill)
1813 PyObject *u;
1815 if (left < 0)
1816 left = 0;
1817 if (right < 0)
1818 right = 0;
1820 if (left == 0 && right == 0) {
1821 Py_INCREF(self);
1822 return (PyObject *)self;
1825 u = PyString_FromStringAndSize(NULL,
1826 left + PyString_GET_SIZE(self) + right);
1827 if (u) {
1828 if (left)
1829 memset(PyString_AS_STRING(u), fill, left);
1830 memcpy(PyString_AS_STRING(u) + left,
1831 PyString_AS_STRING(self),
1832 PyString_GET_SIZE(self));
1833 if (right)
1834 memset(PyString_AS_STRING(u) + left + PyString_GET_SIZE(self),
1835 fill, right);
1838 return u;
1841 static char ljust__doc__[] =
1842 "S.ljust(width) -> string\n\
1844 Return S left justified in a string of length width. Padding is\n\
1845 done using spaces.";
1847 static PyObject *
1848 string_ljust(PyStringObject *self, PyObject *args)
1850 int width;
1851 if (!PyArg_ParseTuple(args, "i:ljust", &width))
1852 return NULL;
1854 if (PyString_GET_SIZE(self) >= width) {
1855 Py_INCREF(self);
1856 return (PyObject*) self;
1859 return pad(self, 0, width - PyString_GET_SIZE(self), ' ');
1863 static char rjust__doc__[] =
1864 "S.rjust(width) -> string\n\
1866 Return S right justified in a string of length width. Padding is\n\
1867 done using spaces.";
1869 static PyObject *
1870 string_rjust(PyStringObject *self, PyObject *args)
1872 int width;
1873 if (!PyArg_ParseTuple(args, "i:rjust", &width))
1874 return NULL;
1876 if (PyString_GET_SIZE(self) >= width) {
1877 Py_INCREF(self);
1878 return (PyObject*) self;
1881 return pad(self, width - PyString_GET_SIZE(self), 0, ' ');
1885 static char center__doc__[] =
1886 "S.center(width) -> string\n\
1888 Return S centered in a string of length width. Padding is done\n\
1889 using spaces.";
1891 static PyObject *
1892 string_center(PyStringObject *self, PyObject *args)
1894 int marg, left;
1895 int width;
1897 if (!PyArg_ParseTuple(args, "i:center", &width))
1898 return NULL;
1900 if (PyString_GET_SIZE(self) >= width) {
1901 Py_INCREF(self);
1902 return (PyObject*) self;
1905 marg = width - PyString_GET_SIZE(self);
1906 left = marg / 2 + (marg & width & 1);
1908 return pad(self, left, marg - left, ' ');
1911 #if 0
1912 static char zfill__doc__[] =
1913 "S.zfill(width) -> string\n\
1915 Pad a numeric string x with zeros on the left, to fill a field\n\
1916 of the specified width. The string x is never truncated.";
1918 static PyObject *
1919 string_zfill(PyStringObject *self, PyObject *args)
1921 int fill;
1922 PyObject *u;
1923 char *str;
1925 int width;
1926 if (!PyArg_ParseTuple(args, "i:zfill", &width))
1927 return NULL;
1929 if (PyString_GET_SIZE(self) >= width) {
1930 Py_INCREF(self);
1931 return (PyObject*) self;
1934 fill = width - PyString_GET_SIZE(self);
1936 u = pad(self, fill, 0, '0');
1937 if (u == NULL)
1938 return NULL;
1940 str = PyString_AS_STRING(u);
1941 if (str[fill] == '+' || str[fill] == '-') {
1942 /* move sign to beginning of string */
1943 str[0] = str[fill];
1944 str[fill] = '0';
1947 return u;
1949 #endif
1951 static char isspace__doc__[] =
1952 "S.isspace() -> int\n\
1954 Return 1 if there are only whitespace characters in S,\n\
1955 0 otherwise.";
1957 static PyObject*
1958 string_isspace(PyStringObject *self, PyObject *args)
1960 register const unsigned char *p
1961 = (unsigned char *) PyString_AS_STRING(self);
1962 register const unsigned char *e;
1964 if (!PyArg_NoArgs(args))
1965 return NULL;
1967 /* Shortcut for single character strings */
1968 if (PyString_GET_SIZE(self) == 1 &&
1969 isspace(*p))
1970 return PyInt_FromLong(1);
1972 /* Special case for empty strings */
1973 if (PyString_GET_SIZE(self) == 0)
1974 return PyInt_FromLong(0);
1976 e = p + PyString_GET_SIZE(self);
1977 for (; p < e; p++) {
1978 if (!isspace(*p))
1979 return PyInt_FromLong(0);
1981 return PyInt_FromLong(1);
1985 static char isalpha__doc__[] =
1986 "S.isalpha() -> int\n\
1988 Return 1 if all characters in S are alphabetic\n\
1989 and there is at least one character in S, 0 otherwise.";
1991 static PyObject*
1992 string_isalpha(PyUnicodeObject *self, PyObject *args)
1994 register const unsigned char *p
1995 = (unsigned char *) PyString_AS_STRING(self);
1996 register const unsigned char *e;
1998 if (!PyArg_NoArgs(args))
1999 return NULL;
2001 /* Shortcut for single character strings */
2002 if (PyString_GET_SIZE(self) == 1 &&
2003 isalpha(*p))
2004 return PyInt_FromLong(1);
2006 /* Special case for empty strings */
2007 if (PyString_GET_SIZE(self) == 0)
2008 return PyInt_FromLong(0);
2010 e = p + PyString_GET_SIZE(self);
2011 for (; p < e; p++) {
2012 if (!isalpha(*p))
2013 return PyInt_FromLong(0);
2015 return PyInt_FromLong(1);
2019 static char isalnum__doc__[] =
2020 "S.isalnum() -> int\n\
2022 Return 1 if all characters in S are alphanumeric\n\
2023 and there is at least one character in S, 0 otherwise.";
2025 static PyObject*
2026 string_isalnum(PyUnicodeObject *self, PyObject *args)
2028 register const unsigned char *p
2029 = (unsigned char *) PyString_AS_STRING(self);
2030 register const unsigned char *e;
2032 if (!PyArg_NoArgs(args))
2033 return NULL;
2035 /* Shortcut for single character strings */
2036 if (PyString_GET_SIZE(self) == 1 &&
2037 isalnum(*p))
2038 return PyInt_FromLong(1);
2040 /* Special case for empty strings */
2041 if (PyString_GET_SIZE(self) == 0)
2042 return PyInt_FromLong(0);
2044 e = p + PyString_GET_SIZE(self);
2045 for (; p < e; p++) {
2046 if (!isalnum(*p))
2047 return PyInt_FromLong(0);
2049 return PyInt_FromLong(1);
2053 static char isdigit__doc__[] =
2054 "S.isdigit() -> int\n\
2056 Return 1 if there are only digit characters in S,\n\
2057 0 otherwise.";
2059 static PyObject*
2060 string_isdigit(PyStringObject *self, PyObject *args)
2062 register const unsigned char *p
2063 = (unsigned char *) PyString_AS_STRING(self);
2064 register const unsigned char *e;
2066 if (!PyArg_NoArgs(args))
2067 return NULL;
2069 /* Shortcut for single character strings */
2070 if (PyString_GET_SIZE(self) == 1 &&
2071 isdigit(*p))
2072 return PyInt_FromLong(1);
2074 /* Special case for empty strings */
2075 if (PyString_GET_SIZE(self) == 0)
2076 return PyInt_FromLong(0);
2078 e = p + PyString_GET_SIZE(self);
2079 for (; p < e; p++) {
2080 if (!isdigit(*p))
2081 return PyInt_FromLong(0);
2083 return PyInt_FromLong(1);
2087 static char islower__doc__[] =
2088 "S.islower() -> int\n\
2090 Return 1 if all cased characters in S are lowercase and there is\n\
2091 at least one cased character in S, 0 otherwise.";
2093 static PyObject*
2094 string_islower(PyStringObject *self, PyObject *args)
2096 register const unsigned char *p
2097 = (unsigned char *) PyString_AS_STRING(self);
2098 register const unsigned char *e;
2099 int cased;
2101 if (!PyArg_NoArgs(args))
2102 return NULL;
2104 /* Shortcut for single character strings */
2105 if (PyString_GET_SIZE(self) == 1)
2106 return PyInt_FromLong(islower(*p) != 0);
2108 /* Special case for empty strings */
2109 if (PyString_GET_SIZE(self) == 0)
2110 return PyInt_FromLong(0);
2112 e = p + PyString_GET_SIZE(self);
2113 cased = 0;
2114 for (; p < e; p++) {
2115 if (isupper(*p))
2116 return PyInt_FromLong(0);
2117 else if (!cased && islower(*p))
2118 cased = 1;
2120 return PyInt_FromLong(cased);
2124 static char isupper__doc__[] =
2125 "S.isupper() -> int\n\
2127 Return 1 if all cased characters in S are uppercase and there is\n\
2128 at least one cased character in S, 0 otherwise.";
2130 static PyObject*
2131 string_isupper(PyStringObject *self, PyObject *args)
2133 register const unsigned char *p
2134 = (unsigned char *) PyString_AS_STRING(self);
2135 register const unsigned char *e;
2136 int cased;
2138 if (!PyArg_NoArgs(args))
2139 return NULL;
2141 /* Shortcut for single character strings */
2142 if (PyString_GET_SIZE(self) == 1)
2143 return PyInt_FromLong(isupper(*p) != 0);
2145 /* Special case for empty strings */
2146 if (PyString_GET_SIZE(self) == 0)
2147 return PyInt_FromLong(0);
2149 e = p + PyString_GET_SIZE(self);
2150 cased = 0;
2151 for (; p < e; p++) {
2152 if (islower(*p))
2153 return PyInt_FromLong(0);
2154 else if (!cased && isupper(*p))
2155 cased = 1;
2157 return PyInt_FromLong(cased);
2161 static char istitle__doc__[] =
2162 "S.istitle() -> int\n\
2164 Return 1 if S is a titlecased string, i.e. uppercase characters\n\
2165 may only follow uncased characters and lowercase characters only cased\n\
2166 ones. Return 0 otherwise.";
2168 static PyObject*
2169 string_istitle(PyStringObject *self, PyObject *args)
2171 register const unsigned char *p
2172 = (unsigned char *) PyString_AS_STRING(self);
2173 register const unsigned char *e;
2174 int cased, previous_is_cased;
2176 if (!PyArg_NoArgs(args))
2177 return NULL;
2179 /* Shortcut for single character strings */
2180 if (PyString_GET_SIZE(self) == 1)
2181 return PyInt_FromLong(isupper(*p) != 0);
2183 /* Special case for empty strings */
2184 if (PyString_GET_SIZE(self) == 0)
2185 return PyInt_FromLong(0);
2187 e = p + PyString_GET_SIZE(self);
2188 cased = 0;
2189 previous_is_cased = 0;
2190 for (; p < e; p++) {
2191 register const unsigned char ch = *p;
2193 if (isupper(ch)) {
2194 if (previous_is_cased)
2195 return PyInt_FromLong(0);
2196 previous_is_cased = 1;
2197 cased = 1;
2199 else if (islower(ch)) {
2200 if (!previous_is_cased)
2201 return PyInt_FromLong(0);
2202 previous_is_cased = 1;
2203 cased = 1;
2205 else
2206 previous_is_cased = 0;
2208 return PyInt_FromLong(cased);
2212 static char splitlines__doc__[] =
2213 "S.splitlines([keepends]]) -> list of strings\n\
2215 Return a list of the lines in S, breaking at line boundaries.\n\
2216 Line breaks are not included in the resulting list unless keepends\n\
2217 is given and true.";
2219 #define SPLIT_APPEND(data, left, right) \
2220 str = PyString_FromStringAndSize(data + left, right - left); \
2221 if (!str) \
2222 goto onError; \
2223 if (PyList_Append(list, str)) { \
2224 Py_DECREF(str); \
2225 goto onError; \
2227 else \
2228 Py_DECREF(str);
2230 static PyObject*
2231 string_splitlines(PyStringObject *self, PyObject *args)
2233 register int i;
2234 register int j;
2235 int len;
2236 int keepends = 0;
2237 PyObject *list;
2238 PyObject *str;
2239 char *data;
2241 if (!PyArg_ParseTuple(args, "|i:splitlines", &keepends))
2242 return NULL;
2244 data = PyString_AS_STRING(self);
2245 len = PyString_GET_SIZE(self);
2247 list = PyList_New(0);
2248 if (!list)
2249 goto onError;
2251 for (i = j = 0; i < len; ) {
2252 int eol;
2254 /* Find a line and append it */
2255 while (i < len && data[i] != '\n' && data[i] != '\r')
2256 i++;
2258 /* Skip the line break reading CRLF as one line break */
2259 eol = i;
2260 if (i < len) {
2261 if (data[i] == '\r' && i + 1 < len &&
2262 data[i+1] == '\n')
2263 i += 2;
2264 else
2265 i++;
2266 if (keepends)
2267 eol = i;
2269 SPLIT_APPEND(data, j, eol);
2270 j = i;
2272 if (j < len) {
2273 SPLIT_APPEND(data, j, len);
2276 return list;
2278 onError:
2279 Py_DECREF(list);
2280 return NULL;
2283 #undef SPLIT_APPEND
2286 static PyMethodDef
2287 string_methods[] = {
2288 /* Counterparts of the obsolete stropmodule functions; except
2289 string.maketrans(). */
2290 {"join", (PyCFunction)string_join, 1, join__doc__},
2291 {"split", (PyCFunction)string_split, 1, split__doc__},
2292 {"lower", (PyCFunction)string_lower, 1, lower__doc__},
2293 {"upper", (PyCFunction)string_upper, 1, upper__doc__},
2294 {"islower", (PyCFunction)string_islower, 0, islower__doc__},
2295 {"isupper", (PyCFunction)string_isupper, 0, isupper__doc__},
2296 {"isspace", (PyCFunction)string_isspace, 0, isspace__doc__},
2297 {"isdigit", (PyCFunction)string_isdigit, 0, isdigit__doc__},
2298 {"istitle", (PyCFunction)string_istitle, 0, istitle__doc__},
2299 {"isalpha", (PyCFunction)string_isalpha, 0, isalpha__doc__},
2300 {"isalnum", (PyCFunction)string_isalnum, 0, isalnum__doc__},
2301 {"capitalize", (PyCFunction)string_capitalize, 1, capitalize__doc__},
2302 {"count", (PyCFunction)string_count, 1, count__doc__},
2303 {"endswith", (PyCFunction)string_endswith, 1, endswith__doc__},
2304 {"find", (PyCFunction)string_find, 1, find__doc__},
2305 {"index", (PyCFunction)string_index, 1, index__doc__},
2306 {"lstrip", (PyCFunction)string_lstrip, 1, lstrip__doc__},
2307 {"replace", (PyCFunction)string_replace, 1, replace__doc__},
2308 {"rfind", (PyCFunction)string_rfind, 1, rfind__doc__},
2309 {"rindex", (PyCFunction)string_rindex, 1, rindex__doc__},
2310 {"rstrip", (PyCFunction)string_rstrip, 1, rstrip__doc__},
2311 {"startswith", (PyCFunction)string_startswith, 1, startswith__doc__},
2312 {"strip", (PyCFunction)string_strip, 1, strip__doc__},
2313 {"swapcase", (PyCFunction)string_swapcase, 1, swapcase__doc__},
2314 {"translate", (PyCFunction)string_translate, 1, translate__doc__},
2315 {"title", (PyCFunction)string_title, 1, title__doc__},
2316 {"ljust", (PyCFunction)string_ljust, 1, ljust__doc__},
2317 {"rjust", (PyCFunction)string_rjust, 1, rjust__doc__},
2318 {"center", (PyCFunction)string_center, 1, center__doc__},
2319 {"encode", (PyCFunction)string_encode, 1, encode__doc__},
2320 {"expandtabs", (PyCFunction)string_expandtabs, 1, expandtabs__doc__},
2321 {"splitlines", (PyCFunction)string_splitlines, 1, splitlines__doc__},
2322 #if 0
2323 {"zfill", (PyCFunction)string_zfill, 1, zfill__doc__},
2324 #endif
2325 {NULL, NULL} /* sentinel */
2328 static PyObject *
2329 string_getattr(PyStringObject *s, char *name)
2331 return Py_FindMethod(string_methods, (PyObject*)s, name);
2335 PyTypeObject PyString_Type = {
2336 PyObject_HEAD_INIT(&PyType_Type)
2338 "string",
2339 sizeof(PyStringObject),
2340 sizeof(char),
2341 (destructor)string_dealloc, /*tp_dealloc*/
2342 (printfunc)string_print, /*tp_print*/
2343 (getattrfunc)string_getattr, /*tp_getattr*/
2344 0, /*tp_setattr*/
2345 (cmpfunc)string_compare, /*tp_compare*/
2346 (reprfunc)string_repr, /*tp_repr*/
2347 0, /*tp_as_number*/
2348 &string_as_sequence, /*tp_as_sequence*/
2349 0, /*tp_as_mapping*/
2350 (hashfunc)string_hash, /*tp_hash*/
2351 0, /*tp_call*/
2352 0, /*tp_str*/
2353 0, /*tp_getattro*/
2354 0, /*tp_setattro*/
2355 &string_as_buffer, /*tp_as_buffer*/
2356 Py_TPFLAGS_DEFAULT, /*tp_flags*/
2357 0, /*tp_doc*/
2360 void
2361 PyString_Concat(register PyObject **pv, register PyObject *w)
2363 register PyObject *v;
2364 if (*pv == NULL)
2365 return;
2366 if (w == NULL || !PyString_Check(*pv)) {
2367 Py_DECREF(*pv);
2368 *pv = NULL;
2369 return;
2371 v = string_concat((PyStringObject *) *pv, w);
2372 Py_DECREF(*pv);
2373 *pv = v;
2376 void
2377 PyString_ConcatAndDel(register PyObject **pv, register PyObject *w)
2379 PyString_Concat(pv, w);
2380 Py_XDECREF(w);
2384 /* The following function breaks the notion that strings are immutable:
2385 it changes the size of a string. We get away with this only if there
2386 is only one module referencing the object. You can also think of it
2387 as creating a new string object and destroying the old one, only
2388 more efficiently. In any case, don't use this if the string may
2389 already be known to some other part of the code... */
2392 _PyString_Resize(PyObject **pv, int newsize)
2394 register PyObject *v;
2395 register PyStringObject *sv;
2396 v = *pv;
2397 if (!PyString_Check(v) || v->ob_refcnt != 1) {
2398 *pv = 0;
2399 Py_DECREF(v);
2400 PyErr_BadInternalCall();
2401 return -1;
2403 /* XXX UNREF/NEWREF interface should be more symmetrical */
2404 #ifdef Py_REF_DEBUG
2405 --_Py_RefTotal;
2406 #endif
2407 _Py_ForgetReference(v);
2408 *pv = (PyObject *)
2409 PyObject_REALLOC((char *)v,
2410 sizeof(PyStringObject) + newsize * sizeof(char));
2411 if (*pv == NULL) {
2412 PyObject_DEL(v);
2413 PyErr_NoMemory();
2414 return -1;
2416 _Py_NewReference(*pv);
2417 sv = (PyStringObject *) *pv;
2418 sv->ob_size = newsize;
2419 sv->ob_sval[newsize] = '\0';
2420 return 0;
2423 /* Helpers for formatstring */
2425 static PyObject *
2426 getnextarg(PyObject *args, int arglen, int *p_argidx)
2428 int argidx = *p_argidx;
2429 if (argidx < arglen) {
2430 (*p_argidx)++;
2431 if (arglen < 0)
2432 return args;
2433 else
2434 return PyTuple_GetItem(args, argidx);
2436 PyErr_SetString(PyExc_TypeError,
2437 "not enough arguments for format string");
2438 return NULL;
2441 /* Format codes
2442 * F_LJUST '-'
2443 * F_SIGN '+'
2444 * F_BLANK ' '
2445 * F_ALT '#'
2446 * F_ZERO '0'
2448 #define F_LJUST (1<<0)
2449 #define F_SIGN (1<<1)
2450 #define F_BLANK (1<<2)
2451 #define F_ALT (1<<3)
2452 #define F_ZERO (1<<4)
2454 static int
2455 formatfloat(char *buf, size_t buflen, int flags,
2456 int prec, int type, PyObject *v)
2458 /* fmt = '%#.' + `prec` + `type`
2459 worst case length = 3 + 10 (len of INT_MAX) + 1 = 14 (use 20)*/
2460 char fmt[20];
2461 double x;
2462 if (!PyArg_Parse(v, "d;float argument required", &x))
2463 return -1;
2464 if (prec < 0)
2465 prec = 6;
2466 if (type == 'f' && fabs(x)/1e25 >= 1e25)
2467 type = 'g';
2468 sprintf(fmt, "%%%s.%d%c", (flags&F_ALT) ? "#" : "", prec, type);
2469 /* worst case length calc to ensure no buffer overrun:
2470 fmt = %#.<prec>g
2471 buf = '-' + [0-9]*prec + '.' + 'e+' + (longest exp
2472 for any double rep.)
2473 len = 1 + prec + 1 + 2 + 5 = 9 + prec
2474 If prec=0 the effective precision is 1 (the leading digit is
2475 always given), therefore increase by one to 10+prec. */
2476 if (buflen <= (size_t)10 + (size_t)prec) {
2477 PyErr_SetString(PyExc_OverflowError,
2478 "formatted float is too long (precision too long?)");
2479 return -1;
2481 sprintf(buf, fmt, x);
2482 return strlen(buf);
2485 /* _PyString_FormatLong emulates the format codes d, u, o, x and X, and
2486 * the F_ALT flag, for Python's long (unbounded) ints. It's not used for
2487 * Python's regular ints.
2488 * Return value: a new PyString*, or NULL if error.
2489 * . *pbuf is set to point into it,
2490 * *plen set to the # of chars following that.
2491 * Caller must decref it when done using pbuf.
2492 * The string starting at *pbuf is of the form
2493 * "-"? ("0x" | "0X")? digit+
2494 * "0x"/"0X" are present only for x and X conversions, with F_ALT
2495 * set in flags. The case of hex digits will be correct,
2496 * There will be at least prec digits, zero-filled on the left if
2497 * necessary to get that many.
2498 * val object to be converted
2499 * flags bitmask of format flags; only F_ALT is looked at
2500 * prec minimum number of digits; 0-fill on left if needed
2501 * type a character in [duoxX]; u acts the same as d
2503 * CAUTION: o, x and X conversions on regular ints can never
2504 * produce a '-' sign, but can for Python's unbounded ints.
2506 PyObject*
2507 _PyString_FormatLong(PyObject *val, int flags, int prec, int type,
2508 char **pbuf, int *plen)
2510 PyObject *result = NULL;
2511 char *buf;
2512 int i;
2513 int sign; /* 1 if '-', else 0 */
2514 int len; /* number of characters */
2515 int numdigits; /* len == numnondigits + numdigits */
2516 int numnondigits = 0;
2518 switch (type) {
2519 case 'd':
2520 case 'u':
2521 result = val->ob_type->tp_str(val);
2522 break;
2523 case 'o':
2524 result = val->ob_type->tp_as_number->nb_oct(val);
2525 break;
2526 case 'x':
2527 case 'X':
2528 numnondigits = 2;
2529 result = val->ob_type->tp_as_number->nb_hex(val);
2530 break;
2531 default:
2532 assert(!"'type' not in [duoxX]");
2534 if (!result)
2535 return NULL;
2537 /* To modify the string in-place, there can only be one reference. */
2538 if (result->ob_refcnt != 1) {
2539 PyErr_BadInternalCall();
2540 return NULL;
2542 buf = PyString_AsString(result);
2543 len = PyString_Size(result);
2544 if (buf[len-1] == 'L') {
2545 --len;
2546 buf[len] = '\0';
2548 sign = buf[0] == '-';
2549 numnondigits += sign;
2550 numdigits = len - numnondigits;
2551 assert(numdigits > 0);
2553 /* Get rid of base marker unless F_ALT */
2554 if ((flags & F_ALT) == 0) {
2555 /* Need to skip 0x, 0X or 0. */
2556 int skipped = 0;
2557 switch (type) {
2558 case 'o':
2559 assert(buf[sign] == '0');
2560 /* If 0 is only digit, leave it alone. */
2561 if (numdigits > 1) {
2562 skipped = 1;
2563 --numdigits;
2565 break;
2566 case 'x':
2567 case 'X':
2568 assert(buf[sign] == '0');
2569 assert(buf[sign + 1] == 'x');
2570 skipped = 2;
2571 numnondigits -= 2;
2572 break;
2574 if (skipped) {
2575 buf += skipped;
2576 len -= skipped;
2577 if (sign)
2578 buf[0] = '-';
2580 assert(len == numnondigits + numdigits);
2581 assert(numdigits > 0);
2584 /* Fill with leading zeroes to meet minimum width. */
2585 if (prec > numdigits) {
2586 PyObject *r1 = PyString_FromStringAndSize(NULL,
2587 numnondigits + prec);
2588 char *b1;
2589 if (!r1) {
2590 Py_DECREF(result);
2591 return NULL;
2593 b1 = PyString_AS_STRING(r1);
2594 for (i = 0; i < numnondigits; ++i)
2595 *b1++ = *buf++;
2596 for (i = 0; i < prec - numdigits; i++)
2597 *b1++ = '0';
2598 for (i = 0; i < numdigits; i++)
2599 *b1++ = *buf++;
2600 *b1 = '\0';
2601 Py_DECREF(result);
2602 result = r1;
2603 buf = PyString_AS_STRING(result);
2604 len = numnondigits + prec;
2607 /* Fix up case for hex conversions. */
2608 switch (type) {
2609 case 'x':
2610 /* Need to convert all upper case letters to lower case. */
2611 for (i = 0; i < len; i++)
2612 if (buf[i] >= 'A' && buf[i] <= 'F')
2613 buf[i] += 'a'-'A';
2614 break;
2615 case 'X':
2616 /* Need to convert 0x to 0X (and -0x to -0X). */
2617 if (buf[sign + 1] == 'x')
2618 buf[sign + 1] = 'X';
2619 break;
2621 *pbuf = buf;
2622 *plen = len;
2623 return result;
2626 static int
2627 formatint(char *buf, size_t buflen, int flags,
2628 int prec, int type, PyObject *v)
2630 /* fmt = '%#.' + `prec` + 'l' + `type`
2631 worst case length = 3 + 19 (worst len of INT_MAX on 64-bit machine)
2632 + 1 + 1 = 24 */
2633 char fmt[64]; /* plenty big enough! */
2634 long x;
2635 if (!PyArg_Parse(v, "l;int argument required", &x))
2636 return -1;
2637 if (prec < 0)
2638 prec = 1;
2639 sprintf(fmt, "%%%s.%dl%c", (flags&F_ALT) ? "#" : "", prec, type);
2640 /* buf = '+'/'-'/'0'/'0x' + '[0-9]'*max(prec, len(x in octal))
2641 worst case buf = '0x' + [0-9]*prec, where prec >= 11 */
2642 if (buflen <= 13 || buflen <= (size_t)2 + (size_t)prec) {
2643 PyErr_SetString(PyExc_OverflowError,
2644 "formatted integer is too long (precision too long?)");
2645 return -1;
2647 sprintf(buf, fmt, x);
2648 return strlen(buf);
2651 static int
2652 formatchar(char *buf, size_t buflen, PyObject *v)
2654 /* presume that the buffer is at least 2 characters long */
2655 if (PyString_Check(v)) {
2656 if (!PyArg_Parse(v, "c;%c requires int or char", &buf[0]))
2657 return -1;
2659 else {
2660 if (!PyArg_Parse(v, "b;%c requires int or char", &buf[0]))
2661 return -1;
2663 buf[1] = '\0';
2664 return 1;
2668 /* fmt%(v1,v2,...) is roughly equivalent to sprintf(fmt, v1, v2, ...)
2670 FORMATBUFLEN is the length of the buffer in which the floats, ints, &
2671 chars are formatted. XXX This is a magic number. Each formatting
2672 routine does bounds checking to ensure no overflow, but a better
2673 solution may be to malloc a buffer of appropriate size for each
2674 format. For now, the current solution is sufficient.
2676 #define FORMATBUFLEN (size_t)120
2678 PyObject *
2679 PyString_Format(PyObject *format, PyObject *args)
2681 char *fmt, *res;
2682 int fmtcnt, rescnt, reslen, arglen, argidx;
2683 int args_owned = 0;
2684 PyObject *result, *orig_args, *v, *w;
2685 PyObject *dict = NULL;
2686 if (format == NULL || !PyString_Check(format) || args == NULL) {
2687 PyErr_BadInternalCall();
2688 return NULL;
2690 orig_args = args;
2691 fmt = PyString_AsString(format);
2692 fmtcnt = PyString_Size(format);
2693 reslen = rescnt = fmtcnt + 100;
2694 result = PyString_FromStringAndSize((char *)NULL, reslen);
2695 if (result == NULL)
2696 return NULL;
2697 res = PyString_AsString(result);
2698 if (PyTuple_Check(args)) {
2699 arglen = PyTuple_Size(args);
2700 argidx = 0;
2702 else {
2703 arglen = -1;
2704 argidx = -2;
2706 if (args->ob_type->tp_as_mapping)
2707 dict = args;
2708 while (--fmtcnt >= 0) {
2709 if (*fmt != '%') {
2710 if (--rescnt < 0) {
2711 rescnt = fmtcnt + 100;
2712 reslen += rescnt;
2713 if (_PyString_Resize(&result, reslen) < 0)
2714 return NULL;
2715 res = PyString_AsString(result)
2716 + reslen - rescnt;
2717 --rescnt;
2719 *res++ = *fmt++;
2721 else {
2722 /* Got a format specifier */
2723 int flags = 0;
2724 int width = -1;
2725 int prec = -1;
2726 int size = 0;
2727 int c = '\0';
2728 int fill;
2729 PyObject *v = NULL;
2730 PyObject *temp = NULL;
2731 char *pbuf;
2732 int sign;
2733 int len;
2734 char formatbuf[FORMATBUFLEN]; /* For format{float,int,char}() */
2735 char *fmt_start = fmt;
2737 fmt++;
2738 if (*fmt == '(') {
2739 char *keystart;
2740 int keylen;
2741 PyObject *key;
2742 int pcount = 1;
2744 if (dict == NULL) {
2745 PyErr_SetString(PyExc_TypeError,
2746 "format requires a mapping");
2747 goto error;
2749 ++fmt;
2750 --fmtcnt;
2751 keystart = fmt;
2752 /* Skip over balanced parentheses */
2753 while (pcount > 0 && --fmtcnt >= 0) {
2754 if (*fmt == ')')
2755 --pcount;
2756 else if (*fmt == '(')
2757 ++pcount;
2758 fmt++;
2760 keylen = fmt - keystart - 1;
2761 if (fmtcnt < 0 || pcount > 0) {
2762 PyErr_SetString(PyExc_ValueError,
2763 "incomplete format key");
2764 goto error;
2766 key = PyString_FromStringAndSize(keystart,
2767 keylen);
2768 if (key == NULL)
2769 goto error;
2770 if (args_owned) {
2771 Py_DECREF(args);
2772 args_owned = 0;
2774 args = PyObject_GetItem(dict, key);
2775 Py_DECREF(key);
2776 if (args == NULL) {
2777 goto error;
2779 args_owned = 1;
2780 arglen = -1;
2781 argidx = -2;
2783 while (--fmtcnt >= 0) {
2784 switch (c = *fmt++) {
2785 case '-': flags |= F_LJUST; continue;
2786 case '+': flags |= F_SIGN; continue;
2787 case ' ': flags |= F_BLANK; continue;
2788 case '#': flags |= F_ALT; continue;
2789 case '0': flags |= F_ZERO; continue;
2791 break;
2793 if (c == '*') {
2794 v = getnextarg(args, arglen, &argidx);
2795 if (v == NULL)
2796 goto error;
2797 if (!PyInt_Check(v)) {
2798 PyErr_SetString(PyExc_TypeError,
2799 "* wants int");
2800 goto error;
2802 width = PyInt_AsLong(v);
2803 if (width < 0) {
2804 flags |= F_LJUST;
2805 width = -width;
2807 if (--fmtcnt >= 0)
2808 c = *fmt++;
2810 else if (c >= 0 && isdigit(c)) {
2811 width = c - '0';
2812 while (--fmtcnt >= 0) {
2813 c = Py_CHARMASK(*fmt++);
2814 if (!isdigit(c))
2815 break;
2816 if ((width*10) / 10 != width) {
2817 PyErr_SetString(
2818 PyExc_ValueError,
2819 "width too big");
2820 goto error;
2822 width = width*10 + (c - '0');
2825 if (c == '.') {
2826 prec = 0;
2827 if (--fmtcnt >= 0)
2828 c = *fmt++;
2829 if (c == '*') {
2830 v = getnextarg(args, arglen, &argidx);
2831 if (v == NULL)
2832 goto error;
2833 if (!PyInt_Check(v)) {
2834 PyErr_SetString(
2835 PyExc_TypeError,
2836 "* wants int");
2837 goto error;
2839 prec = PyInt_AsLong(v);
2840 if (prec < 0)
2841 prec = 0;
2842 if (--fmtcnt >= 0)
2843 c = *fmt++;
2845 else if (c >= 0 && isdigit(c)) {
2846 prec = c - '0';
2847 while (--fmtcnt >= 0) {
2848 c = Py_CHARMASK(*fmt++);
2849 if (!isdigit(c))
2850 break;
2851 if ((prec*10) / 10 != prec) {
2852 PyErr_SetString(
2853 PyExc_ValueError,
2854 "prec too big");
2855 goto error;
2857 prec = prec*10 + (c - '0');
2860 } /* prec */
2861 if (fmtcnt >= 0) {
2862 if (c == 'h' || c == 'l' || c == 'L') {
2863 size = c;
2864 if (--fmtcnt >= 0)
2865 c = *fmt++;
2868 if (fmtcnt < 0) {
2869 PyErr_SetString(PyExc_ValueError,
2870 "incomplete format");
2871 goto error;
2873 if (c != '%') {
2874 v = getnextarg(args, arglen, &argidx);
2875 if (v == NULL)
2876 goto error;
2878 sign = 0;
2879 fill = ' ';
2880 switch (c) {
2881 case '%':
2882 pbuf = "%";
2883 len = 1;
2884 break;
2885 case 's':
2886 case 'r':
2887 if (PyUnicode_Check(v)) {
2888 fmt = fmt_start;
2889 goto unicode;
2891 if (c == 's')
2892 temp = PyObject_Str(v);
2893 else
2894 temp = PyObject_Repr(v);
2895 if (temp == NULL)
2896 goto error;
2897 if (!PyString_Check(temp)) {
2898 PyErr_SetString(PyExc_TypeError,
2899 "%s argument has non-string str()");
2900 goto error;
2902 pbuf = PyString_AsString(temp);
2903 len = PyString_Size(temp);
2904 if (prec >= 0 && len > prec)
2905 len = prec;
2906 break;
2907 case 'i':
2908 case 'd':
2909 case 'u':
2910 case 'o':
2911 case 'x':
2912 case 'X':
2913 if (c == 'i')
2914 c = 'd';
2915 if (PyLong_Check(v) && PyLong_AsLong(v) == -1
2916 && PyErr_Occurred()) {
2917 /* Too big for a C long. */
2918 PyErr_Clear();
2919 temp = _PyString_FormatLong(v, flags,
2920 prec, c, &pbuf, &len);
2921 if (!temp)
2922 goto error;
2923 /* unbounded ints can always produce
2924 a sign character! */
2925 sign = 1;
2927 else {
2928 pbuf = formatbuf;
2929 len = formatint(pbuf, sizeof(formatbuf),
2930 flags, prec, c, v);
2931 if (len < 0)
2932 goto error;
2933 /* only d conversion is signed */
2934 sign = c == 'd';
2936 if (flags & F_ZERO)
2937 fill = '0';
2938 break;
2939 case 'e':
2940 case 'E':
2941 case 'f':
2942 case 'g':
2943 case 'G':
2944 pbuf = formatbuf;
2945 len = formatfloat(pbuf, sizeof(formatbuf), flags, prec, c, v);
2946 if (len < 0)
2947 goto error;
2948 sign = 1;
2949 if (flags & F_ZERO)
2950 fill = '0';
2951 break;
2952 case 'c':
2953 pbuf = formatbuf;
2954 len = formatchar(pbuf, sizeof(formatbuf), v);
2955 if (len < 0)
2956 goto error;
2957 break;
2958 default:
2959 PyErr_Format(PyExc_ValueError,
2960 "unsupported format character '%c' (0x%x)",
2961 c, c);
2962 goto error;
2964 if (sign) {
2965 if (*pbuf == '-' || *pbuf == '+') {
2966 sign = *pbuf++;
2967 len--;
2969 else if (flags & F_SIGN)
2970 sign = '+';
2971 else if (flags & F_BLANK)
2972 sign = ' ';
2973 else
2974 sign = 0;
2976 if (width < len)
2977 width = len;
2978 if (rescnt < width + (sign != 0)) {
2979 reslen -= rescnt;
2980 rescnt = width + fmtcnt + 100;
2981 reslen += rescnt;
2982 if (_PyString_Resize(&result, reslen) < 0)
2983 return NULL;
2984 res = PyString_AsString(result)
2985 + reslen - rescnt;
2987 if (sign) {
2988 if (fill != ' ')
2989 *res++ = sign;
2990 rescnt--;
2991 if (width > len)
2992 width--;
2994 if ((flags & F_ALT) && (c == 'x' || c == 'X')) {
2995 assert(pbuf[0] == '0');
2996 assert(pbuf[1] == c);
2997 if (fill != ' ') {
2998 *res++ = *pbuf++;
2999 *res++ = *pbuf++;
3001 rescnt -= 2;
3002 width -= 2;
3003 if (width < 0)
3004 width = 0;
3005 len -= 2;
3007 if (width > len && !(flags & F_LJUST)) {
3008 do {
3009 --rescnt;
3010 *res++ = fill;
3011 } while (--width > len);
3013 if (fill == ' ') {
3014 if (sign)
3015 *res++ = sign;
3016 if ((flags & F_ALT) &&
3017 (c == 'x' || c == 'X')) {
3018 assert(pbuf[0] == '0');
3019 assert(pbuf[1] == c);
3020 *res++ = *pbuf++;
3021 *res++ = *pbuf++;
3024 memcpy(res, pbuf, len);
3025 res += len;
3026 rescnt -= len;
3027 while (--width >= len) {
3028 --rescnt;
3029 *res++ = ' ';
3031 if (dict && (argidx < arglen) && c != '%') {
3032 PyErr_SetString(PyExc_TypeError,
3033 "not all arguments converted");
3034 goto error;
3036 Py_XDECREF(temp);
3037 } /* '%' */
3038 } /* until end */
3039 if (argidx < arglen && !dict) {
3040 PyErr_SetString(PyExc_TypeError,
3041 "not all arguments converted");
3042 goto error;
3044 if (args_owned) {
3045 Py_DECREF(args);
3047 _PyString_Resize(&result, reslen - rescnt);
3048 return result;
3050 unicode:
3051 if (args_owned) {
3052 Py_DECREF(args);
3053 args_owned = 0;
3055 /* Fiddle args right (remove the first argidx-1 arguments) */
3056 --argidx;
3057 if (PyTuple_Check(orig_args) && argidx > 0) {
3058 PyObject *v;
3059 int n = PyTuple_GET_SIZE(orig_args) - argidx;
3060 v = PyTuple_New(n);
3061 if (v == NULL)
3062 goto error;
3063 while (--n >= 0) {
3064 PyObject *w = PyTuple_GET_ITEM(orig_args, n + argidx);
3065 Py_INCREF(w);
3066 PyTuple_SET_ITEM(v, n, w);
3068 args = v;
3069 } else {
3070 Py_INCREF(orig_args);
3071 args = orig_args;
3073 args_owned = 1;
3074 /* Take what we have of the result and let the Unicode formatting
3075 function format the rest of the input. */
3076 rescnt = res - PyString_AS_STRING(result);
3077 if (_PyString_Resize(&result, rescnt))
3078 goto error;
3079 fmtcnt = PyString_GET_SIZE(format) - \
3080 (fmt - PyString_AS_STRING(format));
3081 format = PyUnicode_Decode(fmt, fmtcnt, NULL, NULL);
3082 if (format == NULL)
3083 goto error;
3084 v = PyUnicode_Format(format, args);
3085 Py_DECREF(format);
3086 if (v == NULL)
3087 goto error;
3088 /* Paste what we have (result) to what the Unicode formatting
3089 function returned (v) and return the result (or error) */
3090 w = PyUnicode_Concat(result, v);
3091 Py_DECREF(result);
3092 Py_DECREF(v);
3093 Py_DECREF(args);
3094 return w;
3096 error:
3097 Py_DECREF(result);
3098 if (args_owned) {
3099 Py_DECREF(args);
3101 return NULL;
3105 #ifdef INTERN_STRINGS
3107 /* This dictionary will leak at PyString_Fini() time. That's acceptable
3108 * because PyString_Fini() specifically frees interned strings that are
3109 * only referenced by this dictionary. The CVS log entry for revision 2.45
3110 * says:
3112 * Change the Fini function to only remove otherwise unreferenced
3113 * strings from the interned table. There are references in
3114 * hard-to-find static variables all over the interpreter, and it's not
3115 * worth trying to get rid of all those; but "uninterning" isn't fair
3116 * either and may cause subtle failures later -- so we have to keep them
3117 * in the interned table.
3119 static PyObject *interned;
3121 void
3122 PyString_InternInPlace(PyObject **p)
3124 register PyStringObject *s = (PyStringObject *)(*p);
3125 PyObject *t;
3126 if (s == NULL || !PyString_Check(s))
3127 Py_FatalError("PyString_InternInPlace: strings only please!");
3128 if ((t = s->ob_sinterned) != NULL) {
3129 if (t == (PyObject *)s)
3130 return;
3131 Py_INCREF(t);
3132 *p = t;
3133 Py_DECREF(s);
3134 return;
3136 if (interned == NULL) {
3137 interned = PyDict_New();
3138 if (interned == NULL)
3139 return;
3141 if ((t = PyDict_GetItem(interned, (PyObject *)s)) != NULL) {
3142 Py_INCREF(t);
3143 *p = s->ob_sinterned = t;
3144 Py_DECREF(s);
3145 return;
3147 t = (PyObject *)s;
3148 if (PyDict_SetItem(interned, t, t) == 0) {
3149 s->ob_sinterned = t;
3150 return;
3152 PyErr_Clear();
3156 PyObject *
3157 PyString_InternFromString(const char *cp)
3159 PyObject *s = PyString_FromString(cp);
3160 if (s == NULL)
3161 return NULL;
3162 PyString_InternInPlace(&s);
3163 return s;
3166 #endif
3168 void
3169 PyString_Fini(void)
3171 int i;
3172 for (i = 0; i < UCHAR_MAX + 1; i++) {
3173 Py_XDECREF(characters[i]);
3174 characters[i] = NULL;
3176 #ifndef DONT_SHARE_SHORT_STRINGS
3177 Py_XDECREF(nullstring);
3178 nullstring = NULL;
3179 #endif
3180 #ifdef INTERN_STRINGS
3181 if (interned) {
3182 int pos, changed;
3183 PyObject *key, *value;
3184 do {
3185 changed = 0;
3186 pos = 0;
3187 while (PyDict_Next(interned, &pos, &key, &value)) {
3188 if (key->ob_refcnt == 2 && key == value) {
3189 PyDict_DelItem(interned, key);
3190 changed = 1;
3193 } while (changed);
3195 #endif