This commit was manufactured by cvs2svn to create tag 'r222'.
[python/dscho.git] / Objects / stringobject.c
blobbd416bf1c7e23c3146f649ee325aab33fa1adab4
1 /* String object implementation */
3 #include "Python.h"
5 #include <ctype.h>
7 #ifdef COUNT_ALLOCS
8 int null_strings, one_strings;
9 #endif
11 #if !defined(HAVE_LIMITS_H) && !defined(UCHAR_MAX)
12 #define UCHAR_MAX 255
13 #endif
15 static PyStringObject *characters[UCHAR_MAX + 1];
16 #ifndef DONT_SHARE_SHORT_STRINGS
17 static PyStringObject *nullstring;
18 #endif
21 PyString_FromStringAndSize() and PyString_FromString() try in certain cases
22 to share string objects. When the size of the string is zero, these
23 routines always return a pointer to the same string object; when the size
24 is one, they return a pointer to an already existing object if the contents
25 of the string is known. For PyString_FromString() this is always the case,
26 for PyString_FromStringAndSize() this is the case when the first argument
27 in not NULL.
29 A common practice of allocating a string and then filling it in or changing
30 it must be done carefully. It is only allowed to change the contents of
31 the string if the object was gotten from PyString_FromStringAndSize() with
32 a NULL first argument, because in the future these routines may try to do
33 even more sharing of objects.
35 The string in the `str' parameter does not have to be null-character
36 terminated. (Therefore it is safe to construct a substring by using
37 `PyString_FromStringAndSize(origstring, substrlen)'.)
39 The parameter `size' denotes number of characters to allocate, not
40 counting the null terminating character. If the `str' argument is
41 not NULL, then it points to a of length `size'. For
42 PyString_FromString, this string must be null-terminated.
44 The member `op->ob_size' denotes the number of bytes of data in the string,
45 not counting the null terminating character, and is therefore equal to the
46 `size' parameter.
48 PyObject *
49 PyString_FromStringAndSize(const char *str, int size)
51 register PyStringObject *op;
52 #ifndef DONT_SHARE_SHORT_STRINGS
53 if (size == 0 && (op = nullstring) != NULL) {
54 #ifdef COUNT_ALLOCS
55 null_strings++;
56 #endif
57 Py_INCREF(op);
58 return (PyObject *)op;
60 if (size == 1 && str != NULL &&
61 (op = characters[*str & UCHAR_MAX]) != NULL)
63 #ifdef COUNT_ALLOCS
64 one_strings++;
65 #endif
66 Py_INCREF(op);
67 return (PyObject *)op;
69 #endif /* DONT_SHARE_SHORT_STRINGS */
71 /* PyObject_NewVar is inlined */
72 op = (PyStringObject *)
73 PyObject_MALLOC(sizeof(PyStringObject) + size * sizeof(char));
74 if (op == NULL)
75 return PyErr_NoMemory();
76 PyObject_INIT_VAR(op, &PyString_Type, size);
77 #ifdef CACHE_HASH
78 op->ob_shash = -1;
79 #endif
80 #ifdef INTERN_STRINGS
81 op->ob_sinterned = NULL;
82 #endif
83 if (str != NULL)
84 memcpy(op->ob_sval, str, size);
85 op->ob_sval[size] = '\0';
86 #ifndef DONT_SHARE_SHORT_STRINGS
87 if (size == 0) {
88 PyObject *t = (PyObject *)op;
89 PyString_InternInPlace(&t);
90 op = (PyStringObject *)t;
91 nullstring = op;
92 Py_INCREF(op);
93 } else if (size == 1 && str != NULL) {
94 PyObject *t = (PyObject *)op;
95 PyString_InternInPlace(&t);
96 op = (PyStringObject *)t;
97 characters[*str & UCHAR_MAX] = op;
98 Py_INCREF(op);
100 #endif
101 return (PyObject *) op;
104 PyObject *
105 PyString_FromString(const char *str)
107 register size_t size;
108 register PyStringObject *op;
110 assert(str != NULL);
111 size = strlen(str);
112 if (size > INT_MAX) {
113 PyErr_SetString(PyExc_OverflowError,
114 "string is too long for a Python string");
115 return NULL;
117 #ifndef DONT_SHARE_SHORT_STRINGS
118 if (size == 0 && (op = nullstring) != NULL) {
119 #ifdef COUNT_ALLOCS
120 null_strings++;
121 #endif
122 Py_INCREF(op);
123 return (PyObject *)op;
125 if (size == 1 && (op = characters[*str & UCHAR_MAX]) != NULL) {
126 #ifdef COUNT_ALLOCS
127 one_strings++;
128 #endif
129 Py_INCREF(op);
130 return (PyObject *)op;
132 #endif /* DONT_SHARE_SHORT_STRINGS */
134 /* PyObject_NewVar is inlined */
135 op = (PyStringObject *)
136 PyObject_MALLOC(sizeof(PyStringObject) + size * sizeof(char));
137 if (op == NULL)
138 return PyErr_NoMemory();
139 PyObject_INIT_VAR(op, &PyString_Type, size);
140 #ifdef CACHE_HASH
141 op->ob_shash = -1;
142 #endif
143 #ifdef INTERN_STRINGS
144 op->ob_sinterned = NULL;
145 #endif
146 memcpy(op->ob_sval, str, size+1);
147 #ifndef DONT_SHARE_SHORT_STRINGS
148 if (size == 0) {
149 PyObject *t = (PyObject *)op;
150 PyString_InternInPlace(&t);
151 op = (PyStringObject *)t;
152 nullstring = op;
153 Py_INCREF(op);
154 } else if (size == 1) {
155 PyObject *t = (PyObject *)op;
156 PyString_InternInPlace(&t);
157 op = (PyStringObject *)t;
158 characters[*str & UCHAR_MAX] = op;
159 Py_INCREF(op);
161 #endif
162 return (PyObject *) op;
165 PyObject *
166 PyString_FromFormatV(const char *format, va_list vargs)
168 va_list count;
169 int n = 0;
170 const char* f;
171 char *s;
172 PyObject* string;
174 #ifdef VA_LIST_IS_ARRAY
175 memcpy(count, vargs, sizeof(va_list));
176 #else
177 #ifdef __va_copy
178 __va_copy(count, vargs);
179 #else
180 count = vargs;
181 #endif
182 #endif
183 /* step 1: figure out how large a buffer we need */
184 for (f = format; *f; f++) {
185 if (*f == '%') {
186 const char* p = f;
187 while (*++f && *f != '%' && !isalpha(Py_CHARMASK(*f)))
190 /* skip the 'l' in %ld, since it doesn't change the
191 width. although only %d is supported (see
192 "expand" section below), others can be easily
193 added */
194 if (*f == 'l' && *(f+1) == 'd')
195 ++f;
197 switch (*f) {
198 case 'c':
199 (void)va_arg(count, int);
200 /* fall through... */
201 case '%':
202 n++;
203 break;
204 case 'd': case 'i': case 'x':
205 (void) va_arg(count, int);
206 /* 20 bytes is enough to hold a 64-bit
207 integer. Decimal takes the most space.
208 This isn't enough for octal. */
209 n += 20;
210 break;
211 case 's':
212 s = va_arg(count, char*);
213 n += strlen(s);
214 break;
215 case 'p':
216 (void) va_arg(count, int);
217 /* maximum 64-bit pointer representation:
218 * 0xffffffffffffffff
219 * so 19 characters is enough.
220 * XXX I count 18 -- what's the extra for?
222 n += 19;
223 break;
224 default:
225 /* if we stumble upon an unknown
226 formatting code, copy the rest of
227 the format string to the output
228 string. (we cannot just skip the
229 code, since there's no way to know
230 what's in the argument list) */
231 n += strlen(p);
232 goto expand;
234 } else
235 n++;
237 expand:
238 /* step 2: fill the buffer */
239 /* Since we've analyzed how much space we need for the worst case,
240 use sprintf directly instead of the slower PyOS_snprintf. */
241 string = PyString_FromStringAndSize(NULL, n);
242 if (!string)
243 return NULL;
245 s = PyString_AsString(string);
247 for (f = format; *f; f++) {
248 if (*f == '%') {
249 const char* p = f++;
250 int i, longflag = 0;
251 /* parse the width.precision part (we're only
252 interested in the precision value, if any) */
253 n = 0;
254 while (isdigit(Py_CHARMASK(*f)))
255 n = (n*10) + *f++ - '0';
256 if (*f == '.') {
257 f++;
258 n = 0;
259 while (isdigit(Py_CHARMASK(*f)))
260 n = (n*10) + *f++ - '0';
262 while (*f && *f != '%' && !isalpha(Py_CHARMASK(*f)))
263 f++;
264 /* handle the long flag, but only for %ld. others
265 can be added when necessary. */
266 if (*f == 'l' && *(f+1) == 'd') {
267 longflag = 1;
268 ++f;
271 switch (*f) {
272 case 'c':
273 *s++ = va_arg(vargs, int);
274 break;
275 case 'd':
276 if (longflag)
277 sprintf(s, "%ld", va_arg(vargs, long));
278 else
279 sprintf(s, "%d", va_arg(vargs, int));
280 s += strlen(s);
281 break;
282 case 'i':
283 sprintf(s, "%i", va_arg(vargs, int));
284 s += strlen(s);
285 break;
286 case 'x':
287 sprintf(s, "%x", va_arg(vargs, int));
288 s += strlen(s);
289 break;
290 case 's':
291 p = va_arg(vargs, char*);
292 i = strlen(p);
293 if (n > 0 && i > n)
294 i = n;
295 memcpy(s, p, i);
296 s += i;
297 break;
298 case 'p':
299 sprintf(s, "%p", va_arg(vargs, void*));
300 /* %p is ill-defined: ensure leading 0x. */
301 if (s[1] == 'X')
302 s[1] = 'x';
303 else if (s[1] != 'x') {
304 memmove(s+2, s, strlen(s)+1);
305 s[0] = '0';
306 s[1] = 'x';
308 s += strlen(s);
309 break;
310 case '%':
311 *s++ = '%';
312 break;
313 default:
314 strcpy(s, p);
315 s += strlen(s);
316 goto end;
318 } else
319 *s++ = *f;
322 end:
323 _PyString_Resize(&string, s - PyString_AS_STRING(string));
324 return string;
327 PyObject *
328 PyString_FromFormat(const char *format, ...)
330 PyObject* ret;
331 va_list vargs;
333 #ifdef HAVE_STDARG_PROTOTYPES
334 va_start(vargs, format);
335 #else
336 va_start(vargs);
337 #endif
338 ret = PyString_FromFormatV(format, vargs);
339 va_end(vargs);
340 return ret;
344 PyObject *PyString_Decode(const char *s,
345 int size,
346 const char *encoding,
347 const char *errors)
349 PyObject *v, *str;
351 str = PyString_FromStringAndSize(s, size);
352 if (str == NULL)
353 return NULL;
354 v = PyString_AsDecodedString(str, encoding, errors);
355 Py_DECREF(str);
356 return v;
359 PyObject *PyString_AsDecodedObject(PyObject *str,
360 const char *encoding,
361 const char *errors)
363 PyObject *v;
365 if (!PyString_Check(str)) {
366 PyErr_BadArgument();
367 goto onError;
370 if (encoding == NULL) {
371 #ifdef Py_USING_UNICODE
372 encoding = PyUnicode_GetDefaultEncoding();
373 #else
374 PyErr_SetString(PyExc_ValueError, "no encoding specified");
375 goto onError;
376 #endif
379 /* Decode via the codec registry */
380 v = PyCodec_Decode(str, encoding, errors);
381 if (v == NULL)
382 goto onError;
384 return v;
386 onError:
387 return NULL;
390 PyObject *PyString_AsDecodedString(PyObject *str,
391 const char *encoding,
392 const char *errors)
394 PyObject *v;
396 v = PyString_AsDecodedObject(str, encoding, errors);
397 if (v == NULL)
398 goto onError;
400 #ifdef Py_USING_UNICODE
401 /* Convert Unicode to a string using the default encoding */
402 if (PyUnicode_Check(v)) {
403 PyObject *temp = v;
404 v = PyUnicode_AsEncodedString(v, NULL, NULL);
405 Py_DECREF(temp);
406 if (v == NULL)
407 goto onError;
409 #endif
410 if (!PyString_Check(v)) {
411 PyErr_Format(PyExc_TypeError,
412 "decoder did not return a string object (type=%.400s)",
413 v->ob_type->tp_name);
414 Py_DECREF(v);
415 goto onError;
418 return v;
420 onError:
421 return NULL;
424 PyObject *PyString_Encode(const char *s,
425 int size,
426 const char *encoding,
427 const char *errors)
429 PyObject *v, *str;
431 str = PyString_FromStringAndSize(s, size);
432 if (str == NULL)
433 return NULL;
434 v = PyString_AsEncodedString(str, encoding, errors);
435 Py_DECREF(str);
436 return v;
439 PyObject *PyString_AsEncodedObject(PyObject *str,
440 const char *encoding,
441 const char *errors)
443 PyObject *v;
445 if (!PyString_Check(str)) {
446 PyErr_BadArgument();
447 goto onError;
450 if (encoding == NULL) {
451 #ifdef Py_USING_UNICODE
452 encoding = PyUnicode_GetDefaultEncoding();
453 #else
454 PyErr_SetString(PyExc_ValueError, "no encoding specified");
455 goto onError;
456 #endif
459 /* Encode via the codec registry */
460 v = PyCodec_Encode(str, encoding, errors);
461 if (v == NULL)
462 goto onError;
464 return v;
466 onError:
467 return NULL;
470 PyObject *PyString_AsEncodedString(PyObject *str,
471 const char *encoding,
472 const char *errors)
474 PyObject *v;
476 v = PyString_AsEncodedObject(str, encoding, errors);
477 if (v == NULL)
478 goto onError;
480 #ifdef Py_USING_UNICODE
481 /* Convert Unicode to a string using the default encoding */
482 if (PyUnicode_Check(v)) {
483 PyObject *temp = v;
484 v = PyUnicode_AsEncodedString(v, NULL, NULL);
485 Py_DECREF(temp);
486 if (v == NULL)
487 goto onError;
489 #endif
490 if (!PyString_Check(v)) {
491 PyErr_Format(PyExc_TypeError,
492 "encoder did not return a string object (type=%.400s)",
493 v->ob_type->tp_name);
494 Py_DECREF(v);
495 goto onError;
498 return v;
500 onError:
501 return NULL;
504 static void
505 string_dealloc(PyObject *op)
507 op->ob_type->tp_free(op);
510 static int
511 string_getsize(register PyObject *op)
513 char *s;
514 int len;
515 if (PyString_AsStringAndSize(op, &s, &len))
516 return -1;
517 return len;
520 static /*const*/ char *
521 string_getbuffer(register PyObject *op)
523 char *s;
524 int len;
525 if (PyString_AsStringAndSize(op, &s, &len))
526 return NULL;
527 return s;
531 PyString_Size(register PyObject *op)
533 if (!PyString_Check(op))
534 return string_getsize(op);
535 return ((PyStringObject *)op) -> ob_size;
538 /*const*/ char *
539 PyString_AsString(register PyObject *op)
541 if (!PyString_Check(op))
542 return string_getbuffer(op);
543 return ((PyStringObject *)op) -> ob_sval;
547 PyString_AsStringAndSize(register PyObject *obj,
548 register char **s,
549 register int *len)
551 if (s == NULL) {
552 PyErr_BadInternalCall();
553 return -1;
556 if (!PyString_Check(obj)) {
557 #ifdef Py_USING_UNICODE
558 if (PyUnicode_Check(obj)) {
559 obj = _PyUnicode_AsDefaultEncodedString(obj, NULL);
560 if (obj == NULL)
561 return -1;
563 else
564 #endif
566 PyErr_Format(PyExc_TypeError,
567 "expected string or Unicode object, "
568 "%.200s found", obj->ob_type->tp_name);
569 return -1;
573 *s = PyString_AS_STRING(obj);
574 if (len != NULL)
575 *len = PyString_GET_SIZE(obj);
576 else if ((int)strlen(*s) != PyString_GET_SIZE(obj)) {
577 PyErr_SetString(PyExc_TypeError,
578 "expected string without null bytes");
579 return -1;
581 return 0;
584 /* Methods */
586 static int
587 string_print(PyStringObject *op, FILE *fp, int flags)
589 int i;
590 char c;
591 int quote;
593 /* XXX Ought to check for interrupts when writing long strings */
594 if (! PyString_CheckExact(op)) {
595 int ret;
596 /* A str subclass may have its own __str__ method. */
597 op = (PyStringObject *) PyObject_Str((PyObject *)op);
598 if (op == NULL)
599 return -1;
600 ret = string_print(op, fp, flags);
601 Py_DECREF(op);
602 return ret;
604 if (flags & Py_PRINT_RAW) {
605 fwrite(op->ob_sval, 1, (int) op->ob_size, fp);
606 return 0;
609 /* figure out which quote to use; single is preferred */
610 quote = '\'';
611 if (strchr(op->ob_sval, '\'') &&
612 !strchr(op->ob_sval, '"'))
613 quote = '"';
615 fputc(quote, fp);
616 for (i = 0; i < op->ob_size; i++) {
617 c = op->ob_sval[i];
618 if (c == quote || c == '\\')
619 fprintf(fp, "\\%c", c);
620 else if (c == '\t')
621 fprintf(fp, "\\t");
622 else if (c == '\n')
623 fprintf(fp, "\\n");
624 else if (c == '\r')
625 fprintf(fp, "\\r");
626 else if (c < ' ' || c >= 0x7f)
627 fprintf(fp, "\\x%02x", c & 0xff);
628 else
629 fputc(c, fp);
631 fputc(quote, fp);
632 return 0;
635 static PyObject *
636 string_repr(register PyStringObject *op)
638 size_t newsize = 2 + 4 * op->ob_size * sizeof(char);
639 PyObject *v;
640 if (newsize > INT_MAX) {
641 PyErr_SetString(PyExc_OverflowError,
642 "string is too large to make repr");
644 v = PyString_FromStringAndSize((char *)NULL, newsize);
645 if (v == NULL) {
646 return NULL;
648 else {
649 register int i;
650 register char c;
651 register char *p;
652 int quote;
654 /* figure out which quote to use; single is preferred */
655 quote = '\'';
656 if (strchr(op->ob_sval, '\'') &&
657 !strchr(op->ob_sval, '"'))
658 quote = '"';
660 p = PyString_AS_STRING(v);
661 *p++ = quote;
662 for (i = 0; i < op->ob_size; i++) {
663 /* There's at least enough room for a hex escape
664 and a closing quote. */
665 assert(newsize - (p - PyString_AS_STRING(v)) >= 5);
666 c = op->ob_sval[i];
667 if (c == quote || c == '\\')
668 *p++ = '\\', *p++ = c;
669 else if (c == '\t')
670 *p++ = '\\', *p++ = 't';
671 else if (c == '\n')
672 *p++ = '\\', *p++ = 'n';
673 else if (c == '\r')
674 *p++ = '\\', *p++ = 'r';
675 else if (c < ' ' || c >= 0x7f) {
676 /* For performance, we don't want to call
677 PyOS_snprintf here (extra layers of
678 function call). */
679 sprintf(p, "\\x%02x", c & 0xff);
680 p += 4;
682 else
683 *p++ = c;
685 assert(newsize - (p - PyString_AS_STRING(v)) >= 1);
686 *p++ = quote;
687 *p = '\0';
688 _PyString_Resize(
689 &v, (int) (p - PyString_AS_STRING(v)));
690 return v;
694 static PyObject *
695 string_str(PyObject *s)
697 assert(PyString_Check(s));
698 if (PyString_CheckExact(s)) {
699 Py_INCREF(s);
700 return s;
702 else {
703 /* Subtype -- return genuine string with the same value. */
704 PyStringObject *t = (PyStringObject *) s;
705 return PyString_FromStringAndSize(t->ob_sval, t->ob_size);
709 static int
710 string_length(PyStringObject *a)
712 return a->ob_size;
715 static PyObject *
716 string_concat(register PyStringObject *a, register PyObject *bb)
718 register unsigned int size;
719 register PyStringObject *op;
720 if (!PyString_Check(bb)) {
721 #ifdef Py_USING_UNICODE
722 if (PyUnicode_Check(bb))
723 return PyUnicode_Concat((PyObject *)a, bb);
724 #endif
725 PyErr_Format(PyExc_TypeError,
726 "cannot concatenate 'str' and '%.200s' objects",
727 bb->ob_type->tp_name);
728 return NULL;
730 #define b ((PyStringObject *)bb)
731 /* Optimize cases with empty left or right operand */
732 if ((a->ob_size == 0 || b->ob_size == 0) &&
733 PyString_CheckExact(a) && PyString_CheckExact(b)) {
734 if (a->ob_size == 0) {
735 Py_INCREF(bb);
736 return bb;
738 Py_INCREF(a);
739 return (PyObject *)a;
741 size = a->ob_size + b->ob_size;
742 /* PyObject_NewVar is inlined */
743 op = (PyStringObject *)
744 PyObject_MALLOC(sizeof(PyStringObject) + size * sizeof(char));
745 if (op == NULL)
746 return PyErr_NoMemory();
747 PyObject_INIT_VAR(op, &PyString_Type, size);
748 #ifdef CACHE_HASH
749 op->ob_shash = -1;
750 #endif
751 #ifdef INTERN_STRINGS
752 op->ob_sinterned = NULL;
753 #endif
754 memcpy(op->ob_sval, a->ob_sval, (int) a->ob_size);
755 memcpy(op->ob_sval + a->ob_size, b->ob_sval, (int) b->ob_size);
756 op->ob_sval[size] = '\0';
757 return (PyObject *) op;
758 #undef b
761 static PyObject *
762 string_repeat(register PyStringObject *a, register int n)
764 register int i;
765 register int size;
766 register PyStringObject *op;
767 size_t nbytes;
768 if (n < 0)
769 n = 0;
770 /* watch out for overflows: the size can overflow int,
771 * and the # of bytes needed can overflow size_t
773 size = a->ob_size * n;
774 if (n && size / n != a->ob_size) {
775 PyErr_SetString(PyExc_OverflowError,
776 "repeated string is too long");
777 return NULL;
779 if (size == a->ob_size && PyString_CheckExact(a)) {
780 Py_INCREF(a);
781 return (PyObject *)a;
783 nbytes = size * sizeof(char);
784 if (nbytes / sizeof(char) != (size_t)size ||
785 nbytes + sizeof(PyStringObject) <= nbytes) {
786 PyErr_SetString(PyExc_OverflowError,
787 "repeated string is too long");
788 return NULL;
790 op = (PyStringObject *)
791 PyObject_MALLOC(sizeof(PyStringObject) + nbytes);
792 if (op == NULL)
793 return PyErr_NoMemory();
794 PyObject_INIT_VAR(op, &PyString_Type, size);
795 #ifdef CACHE_HASH
796 op->ob_shash = -1;
797 #endif
798 #ifdef INTERN_STRINGS
799 op->ob_sinterned = NULL;
800 #endif
801 for (i = 0; i < size; i += a->ob_size)
802 memcpy(op->ob_sval+i, a->ob_sval, (int) a->ob_size);
803 op->ob_sval[size] = '\0';
804 return (PyObject *) op;
807 /* String slice a[i:j] consists of characters a[i] ... a[j-1] */
809 static PyObject *
810 string_slice(register PyStringObject *a, register int i, register int j)
811 /* j -- may be negative! */
813 if (i < 0)
814 i = 0;
815 if (j < 0)
816 j = 0; /* Avoid signed/unsigned bug in next line */
817 if (j > a->ob_size)
818 j = a->ob_size;
819 if (i == 0 && j == a->ob_size && PyString_CheckExact(a)) {
820 /* It's the same as a */
821 Py_INCREF(a);
822 return (PyObject *)a;
824 if (j < i)
825 j = i;
826 return PyString_FromStringAndSize(a->ob_sval + i, (int) (j-i));
829 static int
830 string_contains(PyObject *a, PyObject *el)
832 register char *s, *end;
833 register char c;
834 #ifdef Py_USING_UNICODE
835 if (PyUnicode_Check(el))
836 return PyUnicode_Contains(a, el);
837 #endif
838 if (!PyString_Check(el) || PyString_Size(el) != 1) {
839 PyErr_SetString(PyExc_TypeError,
840 "'in <string>' requires character as left operand");
841 return -1;
843 c = PyString_AsString(el)[0];
844 s = PyString_AsString(a);
845 end = s + PyString_Size(a);
846 while (s < end) {
847 if (c == *s++)
848 return 1;
850 return 0;
853 static PyObject *
854 string_item(PyStringObject *a, register int i)
856 PyObject *v;
857 char *pchar;
858 if (i < 0 || i >= a->ob_size) {
859 PyErr_SetString(PyExc_IndexError, "string index out of range");
860 return NULL;
862 pchar = a->ob_sval + i;
863 v = (PyObject *)characters[*pchar & UCHAR_MAX];
864 if (v == NULL)
865 v = PyString_FromStringAndSize(pchar, 1);
866 else {
867 #ifdef COUNT_ALLOCS
868 one_strings++;
869 #endif
870 Py_INCREF(v);
872 return v;
875 static PyObject*
876 string_richcompare(PyStringObject *a, PyStringObject *b, int op)
878 int c;
879 int len_a, len_b;
880 int min_len;
881 PyObject *result;
883 /* Make sure both arguments are strings. */
884 if (!(PyString_Check(a) && PyString_Check(b))) {
885 result = Py_NotImplemented;
886 goto out;
888 if (a == b) {
889 switch (op) {
890 case Py_EQ:case Py_LE:case Py_GE:
891 result = Py_True;
892 goto out;
893 case Py_NE:case Py_LT:case Py_GT:
894 result = Py_False;
895 goto out;
898 if (op == Py_EQ) {
899 /* Supporting Py_NE here as well does not save
900 much time, since Py_NE is rarely used. */
901 if (a->ob_size == b->ob_size
902 && (a->ob_sval[0] == b->ob_sval[0]
903 && memcmp(a->ob_sval, b->ob_sval,
904 a->ob_size) == 0)) {
905 result = Py_True;
906 } else {
907 result = Py_False;
909 goto out;
911 len_a = a->ob_size; len_b = b->ob_size;
912 min_len = (len_a < len_b) ? len_a : len_b;
913 if (min_len > 0) {
914 c = Py_CHARMASK(*a->ob_sval) - Py_CHARMASK(*b->ob_sval);
915 if (c==0)
916 c = memcmp(a->ob_sval, b->ob_sval, min_len);
917 }else
918 c = 0;
919 if (c == 0)
920 c = (len_a < len_b) ? -1 : (len_a > len_b) ? 1 : 0;
921 switch (op) {
922 case Py_LT: c = c < 0; break;
923 case Py_LE: c = c <= 0; break;
924 case Py_EQ: assert(0); break; /* unreachable */
925 case Py_NE: c = c != 0; break;
926 case Py_GT: c = c > 0; break;
927 case Py_GE: c = c >= 0; break;
928 default:
929 result = Py_NotImplemented;
930 goto out;
932 result = c ? Py_True : Py_False;
933 out:
934 Py_INCREF(result);
935 return result;
939 _PyString_Eq(PyObject *o1, PyObject *o2)
941 PyStringObject *a, *b;
942 a = (PyStringObject*)o1;
943 b = (PyStringObject*)o2;
944 return a->ob_size == b->ob_size
945 && *a->ob_sval == *b->ob_sval
946 && memcmp(a->ob_sval, b->ob_sval, a->ob_size) == 0;
949 static long
950 string_hash(PyStringObject *a)
952 register int len;
953 register unsigned char *p;
954 register long x;
956 #ifdef CACHE_HASH
957 if (a->ob_shash != -1)
958 return a->ob_shash;
959 #ifdef INTERN_STRINGS
960 if (a->ob_sinterned != NULL)
961 return (a->ob_shash =
962 ((PyStringObject *)(a->ob_sinterned))->ob_shash);
963 #endif
964 #endif
965 len = a->ob_size;
966 p = (unsigned char *) a->ob_sval;
967 x = *p << 7;
968 while (--len >= 0)
969 x = (1000003*x) ^ *p++;
970 x ^= a->ob_size;
971 if (x == -1)
972 x = -2;
973 #ifdef CACHE_HASH
974 a->ob_shash = x;
975 #endif
976 return x;
979 static int
980 string_buffer_getreadbuf(PyStringObject *self, int index, const void **ptr)
982 if ( index != 0 ) {
983 PyErr_SetString(PyExc_SystemError,
984 "accessing non-existent string segment");
985 return -1;
987 *ptr = (void *)self->ob_sval;
988 return self->ob_size;
991 static int
992 string_buffer_getwritebuf(PyStringObject *self, int index, const void **ptr)
994 PyErr_SetString(PyExc_TypeError,
995 "Cannot use string as modifiable buffer");
996 return -1;
999 static int
1000 string_buffer_getsegcount(PyStringObject *self, int *lenp)
1002 if ( lenp )
1003 *lenp = self->ob_size;
1004 return 1;
1007 static int
1008 string_buffer_getcharbuf(PyStringObject *self, int index, const char **ptr)
1010 if ( index != 0 ) {
1011 PyErr_SetString(PyExc_SystemError,
1012 "accessing non-existent string segment");
1013 return -1;
1015 *ptr = self->ob_sval;
1016 return self->ob_size;
1019 static PySequenceMethods string_as_sequence = {
1020 (inquiry)string_length, /*sq_length*/
1021 (binaryfunc)string_concat, /*sq_concat*/
1022 (intargfunc)string_repeat, /*sq_repeat*/
1023 (intargfunc)string_item, /*sq_item*/
1024 (intintargfunc)string_slice, /*sq_slice*/
1025 0, /*sq_ass_item*/
1026 0, /*sq_ass_slice*/
1027 (objobjproc)string_contains /*sq_contains*/
1030 static PyBufferProcs string_as_buffer = {
1031 (getreadbufferproc)string_buffer_getreadbuf,
1032 (getwritebufferproc)string_buffer_getwritebuf,
1033 (getsegcountproc)string_buffer_getsegcount,
1034 (getcharbufferproc)string_buffer_getcharbuf,
1039 #define LEFTSTRIP 0
1040 #define RIGHTSTRIP 1
1041 #define BOTHSTRIP 2
1043 /* Arrays indexed by above */
1044 static const char *stripformat[] = {"|O:lstrip", "|O:rstrip", "|O:strip"};
1046 #define STRIPNAME(i) (stripformat[i]+3)
1049 static PyObject *
1050 split_whitespace(const char *s, int len, int maxsplit)
1052 int i, j, err;
1053 PyObject* item;
1054 PyObject *list = PyList_New(0);
1056 if (list == NULL)
1057 return NULL;
1059 for (i = j = 0; i < len; ) {
1060 while (i < len && isspace(Py_CHARMASK(s[i])))
1061 i++;
1062 j = i;
1063 while (i < len && !isspace(Py_CHARMASK(s[i])))
1064 i++;
1065 if (j < i) {
1066 if (maxsplit-- <= 0)
1067 break;
1068 item = PyString_FromStringAndSize(s+j, (int)(i-j));
1069 if (item == NULL)
1070 goto finally;
1071 err = PyList_Append(list, item);
1072 Py_DECREF(item);
1073 if (err < 0)
1074 goto finally;
1075 while (i < len && isspace(Py_CHARMASK(s[i])))
1076 i++;
1077 j = i;
1080 if (j < len) {
1081 item = PyString_FromStringAndSize(s+j, (int)(len - j));
1082 if (item == NULL)
1083 goto finally;
1084 err = PyList_Append(list, item);
1085 Py_DECREF(item);
1086 if (err < 0)
1087 goto finally;
1089 return list;
1090 finally:
1091 Py_DECREF(list);
1092 return NULL;
1096 static char split__doc__[] =
1097 "S.split([sep [,maxsplit]]) -> list of strings\n\
1099 Return a list of the words in the string S, using sep as the\n\
1100 delimiter string. If maxsplit is given, at most maxsplit\n\
1101 splits are done. If sep is not specified or is None, any\n\
1102 whitespace string is a separator.";
1104 static PyObject *
1105 string_split(PyStringObject *self, PyObject *args)
1107 int len = PyString_GET_SIZE(self), n, i, j, err;
1108 int maxsplit = -1;
1109 const char *s = PyString_AS_STRING(self), *sub;
1110 PyObject *list, *item, *subobj = Py_None;
1112 if (!PyArg_ParseTuple(args, "|Oi:split", &subobj, &maxsplit))
1113 return NULL;
1114 if (maxsplit < 0)
1115 maxsplit = INT_MAX;
1116 if (subobj == Py_None)
1117 return split_whitespace(s, len, maxsplit);
1118 if (PyString_Check(subobj)) {
1119 sub = PyString_AS_STRING(subobj);
1120 n = PyString_GET_SIZE(subobj);
1122 #ifdef Py_USING_UNICODE
1123 else if (PyUnicode_Check(subobj))
1124 return PyUnicode_Split((PyObject *)self, subobj, maxsplit);
1125 #endif
1126 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
1127 return NULL;
1128 if (n == 0) {
1129 PyErr_SetString(PyExc_ValueError, "empty separator");
1130 return NULL;
1133 list = PyList_New(0);
1134 if (list == NULL)
1135 return NULL;
1137 i = j = 0;
1138 while (i+n <= len) {
1139 if (s[i] == sub[0] && memcmp(s+i, sub, n) == 0) {
1140 if (maxsplit-- <= 0)
1141 break;
1142 item = PyString_FromStringAndSize(s+j, (int)(i-j));
1143 if (item == NULL)
1144 goto fail;
1145 err = PyList_Append(list, item);
1146 Py_DECREF(item);
1147 if (err < 0)
1148 goto fail;
1149 i = j = i + n;
1151 else
1152 i++;
1154 item = PyString_FromStringAndSize(s+j, (int)(len-j));
1155 if (item == NULL)
1156 goto fail;
1157 err = PyList_Append(list, item);
1158 Py_DECREF(item);
1159 if (err < 0)
1160 goto fail;
1162 return list;
1164 fail:
1165 Py_DECREF(list);
1166 return NULL;
1170 static char join__doc__[] =
1171 "S.join(sequence) -> string\n\
1173 Return a string which is the concatenation of the strings in the\n\
1174 sequence. The separator between elements is S.";
1176 static PyObject *
1177 string_join(PyStringObject *self, PyObject *orig)
1179 char *sep = PyString_AS_STRING(self);
1180 const int seplen = PyString_GET_SIZE(self);
1181 PyObject *res = NULL;
1182 char *p;
1183 int seqlen = 0;
1184 size_t sz = 0;
1185 int i;
1186 PyObject *seq, *item;
1188 seq = PySequence_Fast(orig, "");
1189 if (seq == NULL) {
1190 if (PyErr_ExceptionMatches(PyExc_TypeError))
1191 PyErr_Format(PyExc_TypeError,
1192 "sequence expected, %.80s found",
1193 orig->ob_type->tp_name);
1194 return NULL;
1197 seqlen = PySequence_Size(seq);
1198 if (seqlen == 0) {
1199 Py_DECREF(seq);
1200 return PyString_FromString("");
1202 if (seqlen == 1) {
1203 item = PySequence_Fast_GET_ITEM(seq, 0);
1204 if (!PyString_Check(item) && !PyUnicode_Check(item)) {
1205 PyErr_Format(PyExc_TypeError,
1206 "sequence item 0: expected string,"
1207 " %.80s found",
1208 item->ob_type->tp_name);
1209 Py_DECREF(seq);
1210 return NULL;
1212 Py_INCREF(item);
1213 Py_DECREF(seq);
1214 return item;
1217 /* There are at least two things to join. Do a pre-pass to figure out
1218 * the total amount of space we'll need (sz), see whether any argument
1219 * is absurd, and defer to the Unicode join if appropriate.
1221 for (i = 0; i < seqlen; i++) {
1222 const size_t old_sz = sz;
1223 item = PySequence_Fast_GET_ITEM(seq, i);
1224 if (!PyString_Check(item)){
1225 #ifdef Py_USING_UNICODE
1226 if (PyUnicode_Check(item)) {
1227 /* Defer to Unicode join.
1228 * CAUTION: There's no gurantee that the
1229 * original sequence can be iterated over
1230 * again, so we must pass seq here.
1232 PyObject *result;
1233 result = PyUnicode_Join((PyObject *)self, seq);
1234 Py_DECREF(seq);
1235 return result;
1237 #endif
1238 PyErr_Format(PyExc_TypeError,
1239 "sequence item %i: expected string,"
1240 " %.80s found",
1241 i, item->ob_type->tp_name);
1242 Py_DECREF(seq);
1243 return NULL;
1245 sz += PyString_GET_SIZE(item);
1246 if (i != 0)
1247 sz += seplen;
1248 if (sz < old_sz || sz > INT_MAX) {
1249 PyErr_SetString(PyExc_OverflowError,
1250 "join() is too long for a Python string");
1251 Py_DECREF(seq);
1252 return NULL;
1256 /* Allocate result space. */
1257 res = PyString_FromStringAndSize((char*)NULL, (int)sz);
1258 if (res == NULL) {
1259 Py_DECREF(seq);
1260 return NULL;
1263 /* Catenate everything. */
1264 p = PyString_AS_STRING(res);
1265 for (i = 0; i < seqlen; ++i) {
1266 size_t n;
1267 item = PySequence_Fast_GET_ITEM(seq, i);
1268 n = PyString_GET_SIZE(item);
1269 memcpy(p, PyString_AS_STRING(item), n);
1270 p += n;
1271 if (i < seqlen - 1) {
1272 memcpy(p, sep, seplen);
1273 p += seplen;
1277 Py_DECREF(seq);
1278 return res;
1281 PyObject *
1282 _PyString_Join(PyObject *sep, PyObject *x)
1284 assert(sep != NULL && PyString_Check(sep));
1285 assert(x != NULL);
1286 return string_join((PyStringObject *)sep, x);
1289 static long
1290 string_find_internal(PyStringObject *self, PyObject *args, int dir)
1292 const char *s = PyString_AS_STRING(self), *sub;
1293 int len = PyString_GET_SIZE(self);
1294 int n, i = 0, last = INT_MAX;
1295 PyObject *subobj;
1297 if (!PyArg_ParseTuple(args, "O|O&O&:find/rfind/index/rindex",
1298 &subobj, _PyEval_SliceIndex, &i, _PyEval_SliceIndex, &last))
1299 return -2;
1300 if (PyString_Check(subobj)) {
1301 sub = PyString_AS_STRING(subobj);
1302 n = PyString_GET_SIZE(subobj);
1304 #ifdef Py_USING_UNICODE
1305 else if (PyUnicode_Check(subobj))
1306 return PyUnicode_Find((PyObject *)self, subobj, i, last, dir);
1307 #endif
1308 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
1309 return -2;
1311 if (last > len)
1312 last = len;
1313 if (last < 0)
1314 last += len;
1315 if (last < 0)
1316 last = 0;
1317 if (i < 0)
1318 i += len;
1319 if (i < 0)
1320 i = 0;
1322 if (dir > 0) {
1323 if (n == 0 && i <= last)
1324 return (long)i;
1325 last -= n;
1326 for (; i <= last; ++i)
1327 if (s[i] == sub[0] && memcmp(&s[i], sub, n) == 0)
1328 return (long)i;
1330 else {
1331 int j;
1333 if (n == 0 && i <= last)
1334 return (long)last;
1335 for (j = last-n; j >= i; --j)
1336 if (s[j] == sub[0] && memcmp(&s[j], sub, n) == 0)
1337 return (long)j;
1340 return -1;
1344 static char find__doc__[] =
1345 "S.find(sub [,start [,end]]) -> int\n\
1347 Return the lowest index in S where substring sub is found,\n\
1348 such that sub is contained within s[start,end]. Optional\n\
1349 arguments start and end are interpreted as in slice notation.\n\
1351 Return -1 on failure.";
1353 static PyObject *
1354 string_find(PyStringObject *self, PyObject *args)
1356 long result = string_find_internal(self, args, +1);
1357 if (result == -2)
1358 return NULL;
1359 return PyInt_FromLong(result);
1363 static char index__doc__[] =
1364 "S.index(sub [,start [,end]]) -> int\n\
1366 Like S.find() but raise ValueError when the substring is not found.";
1368 static PyObject *
1369 string_index(PyStringObject *self, PyObject *args)
1371 long result = string_find_internal(self, args, +1);
1372 if (result == -2)
1373 return NULL;
1374 if (result == -1) {
1375 PyErr_SetString(PyExc_ValueError,
1376 "substring not found in string.index");
1377 return NULL;
1379 return PyInt_FromLong(result);
1383 static char rfind__doc__[] =
1384 "S.rfind(sub [,start [,end]]) -> int\n\
1386 Return the highest index in S where substring sub is found,\n\
1387 such that sub is contained within s[start,end]. Optional\n\
1388 arguments start and end are interpreted as in slice notation.\n\
1390 Return -1 on failure.";
1392 static PyObject *
1393 string_rfind(PyStringObject *self, PyObject *args)
1395 long result = string_find_internal(self, args, -1);
1396 if (result == -2)
1397 return NULL;
1398 return PyInt_FromLong(result);
1402 static char rindex__doc__[] =
1403 "S.rindex(sub [,start [,end]]) -> int\n\
1405 Like S.rfind() but raise ValueError when the substring is not found.";
1407 static PyObject *
1408 string_rindex(PyStringObject *self, PyObject *args)
1410 long result = string_find_internal(self, args, -1);
1411 if (result == -2)
1412 return NULL;
1413 if (result == -1) {
1414 PyErr_SetString(PyExc_ValueError,
1415 "substring not found in string.rindex");
1416 return NULL;
1418 return PyInt_FromLong(result);
1422 static PyObject *
1423 do_xstrip(PyStringObject *self, int striptype, PyObject *sepobj)
1425 char *s = PyString_AS_STRING(self);
1426 int len = PyString_GET_SIZE(self);
1427 char *sep = PyString_AS_STRING(sepobj);
1428 int seplen = PyString_GET_SIZE(sepobj);
1429 int i, j;
1431 i = 0;
1432 if (striptype != RIGHTSTRIP) {
1433 while (i < len && memchr(sep, Py_CHARMASK(s[i]), seplen)) {
1434 i++;
1438 j = len;
1439 if (striptype != LEFTSTRIP) {
1440 do {
1441 j--;
1442 } while (j >= i && memchr(sep, Py_CHARMASK(s[j]), seplen));
1443 j++;
1446 if (i == 0 && j == len && PyString_CheckExact(self)) {
1447 Py_INCREF(self);
1448 return (PyObject*)self;
1450 else
1451 return PyString_FromStringAndSize(s+i, j-i);
1455 static PyObject *
1456 do_strip(PyStringObject *self, int striptype)
1458 char *s = PyString_AS_STRING(self);
1459 int len = PyString_GET_SIZE(self), i, j;
1461 i = 0;
1462 if (striptype != RIGHTSTRIP) {
1463 while (i < len && isspace(Py_CHARMASK(s[i]))) {
1464 i++;
1468 j = len;
1469 if (striptype != LEFTSTRIP) {
1470 do {
1471 j--;
1472 } while (j >= i && isspace(Py_CHARMASK(s[j])));
1473 j++;
1476 if (i == 0 && j == len && PyString_CheckExact(self)) {
1477 Py_INCREF(self);
1478 return (PyObject*)self;
1480 else
1481 return PyString_FromStringAndSize(s+i, j-i);
1485 static PyObject *
1486 do_argstrip(PyStringObject *self, int striptype, PyObject *args)
1488 PyObject *sep = NULL;
1490 if (!PyArg_ParseTuple(args, (char *)stripformat[striptype], &sep))
1491 return NULL;
1493 if (sep != NULL && sep != Py_None) {
1494 if (PyString_Check(sep))
1495 return do_xstrip(self, striptype, sep);
1496 #ifdef Py_USING_UNICODE
1497 else if (PyUnicode_Check(sep)) {
1498 PyObject *uniself = PyUnicode_FromObject((PyObject *)self);
1499 PyObject *res;
1500 if (uniself==NULL)
1501 return NULL;
1502 res = _PyUnicode_XStrip((PyUnicodeObject *)uniself,
1503 striptype, sep);
1504 Py_DECREF(uniself);
1505 return res;
1507 #endif
1508 else {
1509 PyErr_Format(PyExc_TypeError,
1510 #ifdef Py_USING_UNICODE
1511 "%s arg must be None, str or unicode",
1512 #else
1513 "%s arg must be None or str",
1514 #endif
1515 STRIPNAME(striptype));
1516 return NULL;
1518 return do_xstrip(self, striptype, sep);
1521 return do_strip(self, striptype);
1525 static char strip__doc__[] =
1526 "S.strip([sep]) -> string or unicode\n\
1528 Return a copy of the string S with leading and trailing\n\
1529 whitespace removed.\n\
1530 If sep is given and not None, remove characters in sep instead.\n\
1531 If sep is unicode, S will be converted to unicode before stripping";
1533 static PyObject *
1534 string_strip(PyStringObject *self, PyObject *args)
1536 if (PyTuple_GET_SIZE(args) == 0)
1537 return do_strip(self, BOTHSTRIP); /* Common case */
1538 else
1539 return do_argstrip(self, BOTHSTRIP, args);
1543 static char lstrip__doc__[] =
1544 "S.lstrip([sep]) -> string or unicode\n\
1546 Return a copy of the string S with leading whitespace removed.\n\
1547 If sep is given and not None, remove characters in sep instead.\n\
1548 If sep is unicode, S will be converted to unicode before stripping";
1550 static PyObject *
1551 string_lstrip(PyStringObject *self, PyObject *args)
1553 if (PyTuple_GET_SIZE(args) == 0)
1554 return do_strip(self, LEFTSTRIP); /* Common case */
1555 else
1556 return do_argstrip(self, LEFTSTRIP, args);
1560 static char rstrip__doc__[] =
1561 "S.rstrip([sep]) -> string or unicode\n\
1563 Return a copy of the string S with trailing whitespace removed.\n\
1564 If sep is given and not None, remove characters in sep instead.\n\
1565 If sep is unicode, S will be converted to unicode before stripping";
1567 static PyObject *
1568 string_rstrip(PyStringObject *self, PyObject *args)
1570 if (PyTuple_GET_SIZE(args) == 0)
1571 return do_strip(self, RIGHTSTRIP); /* Common case */
1572 else
1573 return do_argstrip(self, RIGHTSTRIP, args);
1577 static char lower__doc__[] =
1578 "S.lower() -> string\n\
1580 Return a copy of the string S converted to lowercase.";
1582 static PyObject *
1583 string_lower(PyStringObject *self)
1585 char *s = PyString_AS_STRING(self), *s_new;
1586 int i, n = PyString_GET_SIZE(self);
1587 PyObject *new;
1589 new = PyString_FromStringAndSize(NULL, n);
1590 if (new == NULL)
1591 return NULL;
1592 s_new = PyString_AsString(new);
1593 for (i = 0; i < n; i++) {
1594 int c = Py_CHARMASK(*s++);
1595 if (isupper(c)) {
1596 *s_new = tolower(c);
1597 } else
1598 *s_new = c;
1599 s_new++;
1601 return new;
1605 static char upper__doc__[] =
1606 "S.upper() -> string\n\
1608 Return a copy of the string S converted to uppercase.";
1610 static PyObject *
1611 string_upper(PyStringObject *self)
1613 char *s = PyString_AS_STRING(self), *s_new;
1614 int i, n = PyString_GET_SIZE(self);
1615 PyObject *new;
1617 new = PyString_FromStringAndSize(NULL, n);
1618 if (new == NULL)
1619 return NULL;
1620 s_new = PyString_AsString(new);
1621 for (i = 0; i < n; i++) {
1622 int c = Py_CHARMASK(*s++);
1623 if (islower(c)) {
1624 *s_new = toupper(c);
1625 } else
1626 *s_new = c;
1627 s_new++;
1629 return new;
1633 static char title__doc__[] =
1634 "S.title() -> string\n\
1636 Return a titlecased version of S, i.e. words start with uppercase\n\
1637 characters, all remaining cased characters have lowercase.";
1639 static PyObject*
1640 string_title(PyStringObject *self)
1642 char *s = PyString_AS_STRING(self), *s_new;
1643 int i, n = PyString_GET_SIZE(self);
1644 int previous_is_cased = 0;
1645 PyObject *new;
1647 new = PyString_FromStringAndSize(NULL, n);
1648 if (new == NULL)
1649 return NULL;
1650 s_new = PyString_AsString(new);
1651 for (i = 0; i < n; i++) {
1652 int c = Py_CHARMASK(*s++);
1653 if (islower(c)) {
1654 if (!previous_is_cased)
1655 c = toupper(c);
1656 previous_is_cased = 1;
1657 } else if (isupper(c)) {
1658 if (previous_is_cased)
1659 c = tolower(c);
1660 previous_is_cased = 1;
1661 } else
1662 previous_is_cased = 0;
1663 *s_new++ = c;
1665 return new;
1668 static char capitalize__doc__[] =
1669 "S.capitalize() -> string\n\
1671 Return a copy of the string S with only its first character\n\
1672 capitalized.";
1674 static PyObject *
1675 string_capitalize(PyStringObject *self)
1677 char *s = PyString_AS_STRING(self), *s_new;
1678 int i, n = PyString_GET_SIZE(self);
1679 PyObject *new;
1681 new = PyString_FromStringAndSize(NULL, n);
1682 if (new == NULL)
1683 return NULL;
1684 s_new = PyString_AsString(new);
1685 if (0 < n) {
1686 int c = Py_CHARMASK(*s++);
1687 if (islower(c))
1688 *s_new = toupper(c);
1689 else
1690 *s_new = c;
1691 s_new++;
1693 for (i = 1; i < n; i++) {
1694 int c = Py_CHARMASK(*s++);
1695 if (isupper(c))
1696 *s_new = tolower(c);
1697 else
1698 *s_new = c;
1699 s_new++;
1701 return new;
1705 static char count__doc__[] =
1706 "S.count(sub[, start[, end]]) -> int\n\
1708 Return the number of occurrences of substring sub in string\n\
1709 S[start:end]. Optional arguments start and end are\n\
1710 interpreted as in slice notation.";
1712 static PyObject *
1713 string_count(PyStringObject *self, PyObject *args)
1715 const char *s = PyString_AS_STRING(self), *sub;
1716 int len = PyString_GET_SIZE(self), n;
1717 int i = 0, last = INT_MAX;
1718 int m, r;
1719 PyObject *subobj;
1721 if (!PyArg_ParseTuple(args, "O|O&O&:count", &subobj,
1722 _PyEval_SliceIndex, &i, _PyEval_SliceIndex, &last))
1723 return NULL;
1725 if (PyString_Check(subobj)) {
1726 sub = PyString_AS_STRING(subobj);
1727 n = PyString_GET_SIZE(subobj);
1729 #ifdef Py_USING_UNICODE
1730 else if (PyUnicode_Check(subobj)) {
1731 int count;
1732 count = PyUnicode_Count((PyObject *)self, subobj, i, last);
1733 if (count == -1)
1734 return NULL;
1735 else
1736 return PyInt_FromLong((long) count);
1738 #endif
1739 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
1740 return NULL;
1742 if (last > len)
1743 last = len;
1744 if (last < 0)
1745 last += len;
1746 if (last < 0)
1747 last = 0;
1748 if (i < 0)
1749 i += len;
1750 if (i < 0)
1751 i = 0;
1752 m = last + 1 - n;
1753 if (n == 0)
1754 return PyInt_FromLong((long) (m-i));
1756 r = 0;
1757 while (i < m) {
1758 if (!memcmp(s+i, sub, n)) {
1759 r++;
1760 i += n;
1761 } else {
1762 i++;
1765 return PyInt_FromLong((long) r);
1769 static char swapcase__doc__[] =
1770 "S.swapcase() -> string\n\
1772 Return a copy of the string S with uppercase characters\n\
1773 converted to lowercase and vice versa.";
1775 static PyObject *
1776 string_swapcase(PyStringObject *self)
1778 char *s = PyString_AS_STRING(self), *s_new;
1779 int i, n = PyString_GET_SIZE(self);
1780 PyObject *new;
1782 new = PyString_FromStringAndSize(NULL, n);
1783 if (new == NULL)
1784 return NULL;
1785 s_new = PyString_AsString(new);
1786 for (i = 0; i < n; i++) {
1787 int c = Py_CHARMASK(*s++);
1788 if (islower(c)) {
1789 *s_new = toupper(c);
1791 else if (isupper(c)) {
1792 *s_new = tolower(c);
1794 else
1795 *s_new = c;
1796 s_new++;
1798 return new;
1802 static char translate__doc__[] =
1803 "S.translate(table [,deletechars]) -> string\n\
1805 Return a copy of the string S, where all characters occurring\n\
1806 in the optional argument deletechars are removed, and the\n\
1807 remaining characters have been mapped through the given\n\
1808 translation table, which must be a string of length 256.";
1810 static PyObject *
1811 string_translate(PyStringObject *self, PyObject *args)
1813 register char *input, *output;
1814 register const char *table;
1815 register int i, c, changed = 0;
1816 PyObject *input_obj = (PyObject*)self;
1817 const char *table1, *output_start, *del_table=NULL;
1818 int inlen, tablen, dellen = 0;
1819 PyObject *result;
1820 int trans_table[256];
1821 PyObject *tableobj, *delobj = NULL;
1823 if (!PyArg_ParseTuple(args, "O|O:translate",
1824 &tableobj, &delobj))
1825 return NULL;
1827 if (PyString_Check(tableobj)) {
1828 table1 = PyString_AS_STRING(tableobj);
1829 tablen = PyString_GET_SIZE(tableobj);
1831 #ifdef Py_USING_UNICODE
1832 else if (PyUnicode_Check(tableobj)) {
1833 /* Unicode .translate() does not support the deletechars
1834 parameter; instead a mapping to None will cause characters
1835 to be deleted. */
1836 if (delobj != NULL) {
1837 PyErr_SetString(PyExc_TypeError,
1838 "deletions are implemented differently for unicode");
1839 return NULL;
1841 return PyUnicode_Translate((PyObject *)self, tableobj, NULL);
1843 #endif
1844 else if (PyObject_AsCharBuffer(tableobj, &table1, &tablen))
1845 return NULL;
1847 if (delobj != NULL) {
1848 if (PyString_Check(delobj)) {
1849 del_table = PyString_AS_STRING(delobj);
1850 dellen = PyString_GET_SIZE(delobj);
1852 #ifdef Py_USING_UNICODE
1853 else if (PyUnicode_Check(delobj)) {
1854 PyErr_SetString(PyExc_TypeError,
1855 "deletions are implemented differently for unicode");
1856 return NULL;
1858 #endif
1859 else if (PyObject_AsCharBuffer(delobj, &del_table, &dellen))
1860 return NULL;
1862 if (tablen != 256) {
1863 PyErr_SetString(PyExc_ValueError,
1864 "translation table must be 256 characters long");
1865 return NULL;
1868 else {
1869 del_table = NULL;
1870 dellen = 0;
1873 table = table1;
1874 inlen = PyString_Size(input_obj);
1875 result = PyString_FromStringAndSize((char *)NULL, inlen);
1876 if (result == NULL)
1877 return NULL;
1878 output_start = output = PyString_AsString(result);
1879 input = PyString_AsString(input_obj);
1881 if (dellen == 0) {
1882 /* If no deletions are required, use faster code */
1883 for (i = inlen; --i >= 0; ) {
1884 c = Py_CHARMASK(*input++);
1885 if (Py_CHARMASK((*output++ = table[c])) != c)
1886 changed = 1;
1888 if (changed || !PyString_CheckExact(input_obj))
1889 return result;
1890 Py_DECREF(result);
1891 Py_INCREF(input_obj);
1892 return input_obj;
1895 for (i = 0; i < 256; i++)
1896 trans_table[i] = Py_CHARMASK(table[i]);
1898 for (i = 0; i < dellen; i++)
1899 trans_table[(int) Py_CHARMASK(del_table[i])] = -1;
1901 for (i = inlen; --i >= 0; ) {
1902 c = Py_CHARMASK(*input++);
1903 if (trans_table[c] != -1)
1904 if (Py_CHARMASK(*output++ = (char)trans_table[c]) == c)
1905 continue;
1906 changed = 1;
1908 if (!changed && PyString_CheckExact(input_obj)) {
1909 Py_DECREF(result);
1910 Py_INCREF(input_obj);
1911 return input_obj;
1913 /* Fix the size of the resulting string */
1914 if (inlen > 0)
1915 _PyString_Resize(&result, output - output_start);
1916 return result;
1920 /* What follows is used for implementing replace(). Perry Stoll. */
1923 mymemfind
1925 strstr replacement for arbitrary blocks of memory.
1927 Locates the first occurrence in the memory pointed to by MEM of the
1928 contents of memory pointed to by PAT. Returns the index into MEM if
1929 found, or -1 if not found. If len of PAT is greater than length of
1930 MEM, the function returns -1.
1932 static int
1933 mymemfind(const char *mem, int len, const char *pat, int pat_len)
1935 register int ii;
1937 /* pattern can not occur in the last pat_len-1 chars */
1938 len -= pat_len;
1940 for (ii = 0; ii <= len; ii++) {
1941 if (mem[ii] == pat[0] && memcmp(&mem[ii], pat, pat_len) == 0) {
1942 return ii;
1945 return -1;
1949 mymemcnt
1951 Return the number of distinct times PAT is found in MEM.
1952 meaning mem=1111 and pat==11 returns 2.
1953 mem=11111 and pat==11 also return 2.
1955 static int
1956 mymemcnt(const char *mem, int len, const char *pat, int pat_len)
1958 register int offset = 0;
1959 int nfound = 0;
1961 while (len >= 0) {
1962 offset = mymemfind(mem, len, pat, pat_len);
1963 if (offset == -1)
1964 break;
1965 mem += offset + pat_len;
1966 len -= offset + pat_len;
1967 nfound++;
1969 return nfound;
1973 mymemreplace
1975 Return a string in which all occurrences of PAT in memory STR are
1976 replaced with SUB.
1978 If length of PAT is less than length of STR or there are no occurrences
1979 of PAT in STR, then the original string is returned. Otherwise, a new
1980 string is allocated here and returned.
1982 on return, out_len is:
1983 the length of output string, or
1984 -1 if the input string is returned, or
1985 unchanged if an error occurs (no memory).
1987 return value is:
1988 the new string allocated locally, or
1989 NULL if an error occurred.
1991 static char *
1992 mymemreplace(const char *str, int len, /* input string */
1993 const char *pat, int pat_len, /* pattern string to find */
1994 const char *sub, int sub_len, /* substitution string */
1995 int count, /* number of replacements */
1996 int *out_len)
1998 char *out_s;
1999 char *new_s;
2000 int nfound, offset, new_len;
2002 if (len == 0 || pat_len > len)
2003 goto return_same;
2005 /* find length of output string */
2006 nfound = mymemcnt(str, len, pat, pat_len);
2007 if (count < 0)
2008 count = INT_MAX;
2009 else if (nfound > count)
2010 nfound = count;
2011 if (nfound == 0)
2012 goto return_same;
2014 new_len = len + nfound*(sub_len - pat_len);
2015 if (new_len == 0) {
2016 /* Have to allocate something for the caller to free(). */
2017 out_s = (char *)PyMem_MALLOC(1);
2018 if (out_s == NULL)
2019 return NULL;
2020 out_s[0] = '\0';
2022 else {
2023 assert(new_len > 0);
2024 new_s = (char *)PyMem_MALLOC(new_len);
2025 if (new_s == NULL)
2026 return NULL;
2027 out_s = new_s;
2029 for (; count > 0 && len > 0; --count) {
2030 /* find index of next instance of pattern */
2031 offset = mymemfind(str, len, pat, pat_len);
2032 if (offset == -1)
2033 break;
2035 /* copy non matching part of input string */
2036 memcpy(new_s, str, offset);
2037 str += offset + pat_len;
2038 len -= offset + pat_len;
2040 /* copy substitute into the output string */
2041 new_s += offset;
2042 memcpy(new_s, sub, sub_len);
2043 new_s += sub_len;
2045 /* copy any remaining values into output string */
2046 if (len > 0)
2047 memcpy(new_s, str, len);
2049 *out_len = new_len;
2050 return out_s;
2052 return_same:
2053 *out_len = -1;
2054 return (char *)str; /* cast away const */
2058 static char replace__doc__[] =
2059 "S.replace (old, new[, maxsplit]) -> string\n\
2061 Return a copy of string S with all occurrences of substring\n\
2062 old replaced by new. If the optional argument maxsplit is\n\
2063 given, only the first maxsplit occurrences are replaced.";
2065 static PyObject *
2066 string_replace(PyStringObject *self, PyObject *args)
2068 const char *str = PyString_AS_STRING(self), *sub, *repl;
2069 char *new_s;
2070 const int len = PyString_GET_SIZE(self);
2071 int sub_len, repl_len, out_len;
2072 int count = -1;
2073 PyObject *new;
2074 PyObject *subobj, *replobj;
2076 if (!PyArg_ParseTuple(args, "OO|i:replace",
2077 &subobj, &replobj, &count))
2078 return NULL;
2080 if (PyString_Check(subobj)) {
2081 sub = PyString_AS_STRING(subobj);
2082 sub_len = PyString_GET_SIZE(subobj);
2084 #ifdef Py_USING_UNICODE
2085 else if (PyUnicode_Check(subobj))
2086 return PyUnicode_Replace((PyObject *)self,
2087 subobj, replobj, count);
2088 #endif
2089 else if (PyObject_AsCharBuffer(subobj, &sub, &sub_len))
2090 return NULL;
2092 if (PyString_Check(replobj)) {
2093 repl = PyString_AS_STRING(replobj);
2094 repl_len = PyString_GET_SIZE(replobj);
2096 #ifdef Py_USING_UNICODE
2097 else if (PyUnicode_Check(replobj))
2098 return PyUnicode_Replace((PyObject *)self,
2099 subobj, replobj, count);
2100 #endif
2101 else if (PyObject_AsCharBuffer(replobj, &repl, &repl_len))
2102 return NULL;
2104 if (sub_len <= 0) {
2105 PyErr_SetString(PyExc_ValueError, "empty pattern string");
2106 return NULL;
2108 new_s = mymemreplace(str,len,sub,sub_len,repl,repl_len,count,&out_len);
2109 if (new_s == NULL) {
2110 PyErr_NoMemory();
2111 return NULL;
2113 if (out_len == -1) {
2114 if (PyString_CheckExact(self)) {
2115 /* we're returning another reference to self */
2116 new = (PyObject*)self;
2117 Py_INCREF(new);
2119 else {
2120 new = PyString_FromStringAndSize(str, len);
2121 if (new == NULL)
2122 return NULL;
2125 else {
2126 new = PyString_FromStringAndSize(new_s, out_len);
2127 PyMem_FREE(new_s);
2129 return new;
2133 static char startswith__doc__[] =
2134 "S.startswith(prefix[, start[, end]]) -> int\n\
2136 Return 1 if S starts with the specified prefix, otherwise return 0. With\n\
2137 optional start, test S beginning at that position. With optional end, stop\n\
2138 comparing S at that position.";
2140 static PyObject *
2141 string_startswith(PyStringObject *self, PyObject *args)
2143 const char* str = PyString_AS_STRING(self);
2144 int len = PyString_GET_SIZE(self);
2145 const char* prefix;
2146 int plen;
2147 int start = 0;
2148 int end = INT_MAX;
2149 PyObject *subobj;
2151 if (!PyArg_ParseTuple(args, "O|O&O&:startswith", &subobj,
2152 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
2153 return NULL;
2154 if (PyString_Check(subobj)) {
2155 prefix = PyString_AS_STRING(subobj);
2156 plen = PyString_GET_SIZE(subobj);
2158 #ifdef Py_USING_UNICODE
2159 else if (PyUnicode_Check(subobj)) {
2160 int rc;
2161 rc = PyUnicode_Tailmatch((PyObject *)self,
2162 subobj, start, end, -1);
2163 if (rc == -1)
2164 return NULL;
2165 else
2166 return PyInt_FromLong((long) rc);
2168 #endif
2169 else if (PyObject_AsCharBuffer(subobj, &prefix, &plen))
2170 return NULL;
2172 /* adopt Java semantics for index out of range. it is legal for
2173 * offset to be == plen, but this only returns true if prefix is
2174 * the empty string.
2176 if (start < 0 || start+plen > len)
2177 return PyInt_FromLong(0);
2179 if (!memcmp(str+start, prefix, plen)) {
2180 /* did the match end after the specified end? */
2181 if (end < 0)
2182 return PyInt_FromLong(1);
2183 else if (end - start < plen)
2184 return PyInt_FromLong(0);
2185 else
2186 return PyInt_FromLong(1);
2188 else return PyInt_FromLong(0);
2192 static char endswith__doc__[] =
2193 "S.endswith(suffix[, start[, end]]) -> int\n\
2195 Return 1 if S ends with the specified suffix, otherwise return 0. With\n\
2196 optional start, test S beginning at that position. With optional end, stop\n\
2197 comparing S at that position.";
2199 static PyObject *
2200 string_endswith(PyStringObject *self, PyObject *args)
2202 const char* str = PyString_AS_STRING(self);
2203 int len = PyString_GET_SIZE(self);
2204 const char* suffix;
2205 int slen;
2206 int start = 0;
2207 int end = INT_MAX;
2208 int lower, upper;
2209 PyObject *subobj;
2211 if (!PyArg_ParseTuple(args, "O|O&O&:endswith", &subobj,
2212 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
2213 return NULL;
2214 if (PyString_Check(subobj)) {
2215 suffix = PyString_AS_STRING(subobj);
2216 slen = PyString_GET_SIZE(subobj);
2218 #ifdef Py_USING_UNICODE
2219 else if (PyUnicode_Check(subobj)) {
2220 int rc;
2221 rc = PyUnicode_Tailmatch((PyObject *)self,
2222 subobj, start, end, +1);
2223 if (rc == -1)
2224 return NULL;
2225 else
2226 return PyInt_FromLong((long) rc);
2228 #endif
2229 else if (PyObject_AsCharBuffer(subobj, &suffix, &slen))
2230 return NULL;
2232 if (start < 0 || start > len || slen > len)
2233 return PyInt_FromLong(0);
2235 upper = (end >= 0 && end <= len) ? end : len;
2236 lower = (upper - slen) > start ? (upper - slen) : start;
2238 if (upper-lower >= slen && !memcmp(str+lower, suffix, slen))
2239 return PyInt_FromLong(1);
2240 else return PyInt_FromLong(0);
2244 static char encode__doc__[] =
2245 "S.encode([encoding[,errors]]) -> object\n\
2247 Encodes S using the codec registered for encoding. encoding defaults\n\
2248 to the default encoding. errors may be given to set a different error\n\
2249 handling scheme. Default is 'strict' meaning that encoding errors raise\n\
2250 a ValueError. Other possible values are 'ignore' and 'replace'.";
2252 static PyObject *
2253 string_encode(PyStringObject *self, PyObject *args)
2255 char *encoding = NULL;
2256 char *errors = NULL;
2257 if (!PyArg_ParseTuple(args, "|ss:encode", &encoding, &errors))
2258 return NULL;
2259 return PyString_AsEncodedObject((PyObject *)self, encoding, errors);
2263 static char decode__doc__[] =
2264 "S.decode([encoding[,errors]]) -> object\n\
2266 Decodes S using the codec registered for encoding. encoding defaults\n\
2267 to the default encoding. errors may be given to set a different error\n\
2268 handling scheme. Default is 'strict' meaning that encoding errors raise\n\
2269 a ValueError. Other possible values are 'ignore' and 'replace'.";
2271 static PyObject *
2272 string_decode(PyStringObject *self, PyObject *args)
2274 char *encoding = NULL;
2275 char *errors = NULL;
2276 if (!PyArg_ParseTuple(args, "|ss:decode", &encoding, &errors))
2277 return NULL;
2278 return PyString_AsDecodedObject((PyObject *)self, encoding, errors);
2282 static char expandtabs__doc__[] =
2283 "S.expandtabs([tabsize]) -> string\n\
2285 Return a copy of S where all tab characters are expanded using spaces.\n\
2286 If tabsize is not given, a tab size of 8 characters is assumed.";
2288 static PyObject*
2289 string_expandtabs(PyStringObject *self, PyObject *args)
2291 const char *e, *p;
2292 char *q;
2293 int i, j;
2294 PyObject *u;
2295 int tabsize = 8;
2297 if (!PyArg_ParseTuple(args, "|i:expandtabs", &tabsize))
2298 return NULL;
2300 /* First pass: determine size of output string */
2301 i = j = 0;
2302 e = PyString_AS_STRING(self) + PyString_GET_SIZE(self);
2303 for (p = PyString_AS_STRING(self); p < e; p++)
2304 if (*p == '\t') {
2305 if (tabsize > 0)
2306 j += tabsize - (j % tabsize);
2308 else {
2309 j++;
2310 if (*p == '\n' || *p == '\r') {
2311 i += j;
2312 j = 0;
2316 /* Second pass: create output string and fill it */
2317 u = PyString_FromStringAndSize(NULL, i + j);
2318 if (!u)
2319 return NULL;
2321 j = 0;
2322 q = PyString_AS_STRING(u);
2324 for (p = PyString_AS_STRING(self); p < e; p++)
2325 if (*p == '\t') {
2326 if (tabsize > 0) {
2327 i = tabsize - (j % tabsize);
2328 j += i;
2329 while (i--)
2330 *q++ = ' ';
2333 else {
2334 j++;
2335 *q++ = *p;
2336 if (*p == '\n' || *p == '\r')
2337 j = 0;
2340 return u;
2343 static PyObject *
2344 pad(PyStringObject *self, int left, int right, char fill)
2346 PyObject *u;
2348 if (left < 0)
2349 left = 0;
2350 if (right < 0)
2351 right = 0;
2353 if (left == 0 && right == 0 && PyString_CheckExact(self)) {
2354 Py_INCREF(self);
2355 return (PyObject *)self;
2358 u = PyString_FromStringAndSize(NULL,
2359 left + PyString_GET_SIZE(self) + right);
2360 if (u) {
2361 if (left)
2362 memset(PyString_AS_STRING(u), fill, left);
2363 memcpy(PyString_AS_STRING(u) + left,
2364 PyString_AS_STRING(self),
2365 PyString_GET_SIZE(self));
2366 if (right)
2367 memset(PyString_AS_STRING(u) + left + PyString_GET_SIZE(self),
2368 fill, right);
2371 return u;
2374 static char ljust__doc__[] =
2375 "S.ljust(width) -> string\n"
2376 "\n"
2377 "Return S left justified in a string of length width. Padding is\n"
2378 "done using spaces.";
2380 static PyObject *
2381 string_ljust(PyStringObject *self, PyObject *args)
2383 int width;
2384 if (!PyArg_ParseTuple(args, "i:ljust", &width))
2385 return NULL;
2387 if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) {
2388 Py_INCREF(self);
2389 return (PyObject*) self;
2392 return pad(self, 0, width - PyString_GET_SIZE(self), ' ');
2396 static char rjust__doc__[] =
2397 "S.rjust(width) -> string\n"
2398 "\n"
2399 "Return S right justified in a string of length width. Padding is\n"
2400 "done using spaces.";
2402 static PyObject *
2403 string_rjust(PyStringObject *self, PyObject *args)
2405 int width;
2406 if (!PyArg_ParseTuple(args, "i:rjust", &width))
2407 return NULL;
2409 if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) {
2410 Py_INCREF(self);
2411 return (PyObject*) self;
2414 return pad(self, width - PyString_GET_SIZE(self), 0, ' ');
2418 static char center__doc__[] =
2419 "S.center(width) -> string\n"
2420 "\n"
2421 "Return S centered in a string of length width. Padding is done\n"
2422 "using spaces.";
2424 static PyObject *
2425 string_center(PyStringObject *self, PyObject *args)
2427 int marg, left;
2428 int width;
2430 if (!PyArg_ParseTuple(args, "i:center", &width))
2431 return NULL;
2433 if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) {
2434 Py_INCREF(self);
2435 return (PyObject*) self;
2438 marg = width - PyString_GET_SIZE(self);
2439 left = marg / 2 + (marg & width & 1);
2441 return pad(self, left, marg - left, ' ');
2444 static char zfill__doc__[] =
2445 "S.zfill(width) -> string\n"
2446 "\n"
2447 "Pad a numeric string S with zeros on the left, to fill a field\n"
2448 "of the specified width. The string S is never truncated.";
2450 static PyObject *
2451 string_zfill(PyStringObject *self, PyObject *args)
2453 int fill;
2454 PyObject *s;
2455 char *p;
2457 int width;
2458 if (!PyArg_ParseTuple(args, "i:zfill", &width))
2459 return NULL;
2461 if (PyString_GET_SIZE(self) >= width) {
2462 if (PyString_CheckExact(self)) {
2463 Py_INCREF(self);
2464 return (PyObject*) self;
2466 else
2467 return PyString_FromStringAndSize(
2468 PyString_AS_STRING(self),
2469 PyString_GET_SIZE(self)
2473 fill = width - PyString_GET_SIZE(self);
2475 s = pad(self, fill, 0, '0');
2477 if (s == NULL)
2478 return NULL;
2480 p = PyString_AS_STRING(s);
2481 if (p[fill] == '+' || p[fill] == '-') {
2482 /* move sign to beginning of string */
2483 p[0] = p[fill];
2484 p[fill] = '0';
2487 return (PyObject*) s;
2490 static char isspace__doc__[] =
2491 "S.isspace() -> int\n"
2492 "\n"
2493 "Return 1 if there are only whitespace characters in S,\n"
2494 "0 otherwise.";
2496 static PyObject*
2497 string_isspace(PyStringObject *self)
2499 register const unsigned char *p
2500 = (unsigned char *) PyString_AS_STRING(self);
2501 register const unsigned char *e;
2503 /* Shortcut for single character strings */
2504 if (PyString_GET_SIZE(self) == 1 &&
2505 isspace(*p))
2506 return PyInt_FromLong(1);
2508 /* Special case for empty strings */
2509 if (PyString_GET_SIZE(self) == 0)
2510 return PyInt_FromLong(0);
2512 e = p + PyString_GET_SIZE(self);
2513 for (; p < e; p++) {
2514 if (!isspace(*p))
2515 return PyInt_FromLong(0);
2517 return PyInt_FromLong(1);
2521 static char isalpha__doc__[] =
2522 "S.isalpha() -> int\n\
2524 Return 1 if all characters in S are alphabetic\n\
2525 and there is at least one character in S, 0 otherwise.";
2527 static PyObject*
2528 string_isalpha(PyStringObject *self)
2530 register const unsigned char *p
2531 = (unsigned char *) PyString_AS_STRING(self);
2532 register const unsigned char *e;
2534 /* Shortcut for single character strings */
2535 if (PyString_GET_SIZE(self) == 1 &&
2536 isalpha(*p))
2537 return PyInt_FromLong(1);
2539 /* Special case for empty strings */
2540 if (PyString_GET_SIZE(self) == 0)
2541 return PyInt_FromLong(0);
2543 e = p + PyString_GET_SIZE(self);
2544 for (; p < e; p++) {
2545 if (!isalpha(*p))
2546 return PyInt_FromLong(0);
2548 return PyInt_FromLong(1);
2552 static char isalnum__doc__[] =
2553 "S.isalnum() -> int\n\
2555 Return 1 if all characters in S are alphanumeric\n\
2556 and there is at least one character in S, 0 otherwise.";
2558 static PyObject*
2559 string_isalnum(PyStringObject *self)
2561 register const unsigned char *p
2562 = (unsigned char *) PyString_AS_STRING(self);
2563 register const unsigned char *e;
2565 /* Shortcut for single character strings */
2566 if (PyString_GET_SIZE(self) == 1 &&
2567 isalnum(*p))
2568 return PyInt_FromLong(1);
2570 /* Special case for empty strings */
2571 if (PyString_GET_SIZE(self) == 0)
2572 return PyInt_FromLong(0);
2574 e = p + PyString_GET_SIZE(self);
2575 for (; p < e; p++) {
2576 if (!isalnum(*p))
2577 return PyInt_FromLong(0);
2579 return PyInt_FromLong(1);
2583 static char isdigit__doc__[] =
2584 "S.isdigit() -> int\n\
2586 Return 1 if there are only digit characters in S,\n\
2587 0 otherwise.";
2589 static PyObject*
2590 string_isdigit(PyStringObject *self)
2592 register const unsigned char *p
2593 = (unsigned char *) PyString_AS_STRING(self);
2594 register const unsigned char *e;
2596 /* Shortcut for single character strings */
2597 if (PyString_GET_SIZE(self) == 1 &&
2598 isdigit(*p))
2599 return PyInt_FromLong(1);
2601 /* Special case for empty strings */
2602 if (PyString_GET_SIZE(self) == 0)
2603 return PyInt_FromLong(0);
2605 e = p + PyString_GET_SIZE(self);
2606 for (; p < e; p++) {
2607 if (!isdigit(*p))
2608 return PyInt_FromLong(0);
2610 return PyInt_FromLong(1);
2614 static char islower__doc__[] =
2615 "S.islower() -> int\n\
2617 Return 1 if all cased characters in S are lowercase and there is\n\
2618 at least one cased character in S, 0 otherwise.";
2620 static PyObject*
2621 string_islower(PyStringObject *self)
2623 register const unsigned char *p
2624 = (unsigned char *) PyString_AS_STRING(self);
2625 register const unsigned char *e;
2626 int cased;
2628 /* Shortcut for single character strings */
2629 if (PyString_GET_SIZE(self) == 1)
2630 return PyInt_FromLong(islower(*p) != 0);
2632 /* Special case for empty strings */
2633 if (PyString_GET_SIZE(self) == 0)
2634 return PyInt_FromLong(0);
2636 e = p + PyString_GET_SIZE(self);
2637 cased = 0;
2638 for (; p < e; p++) {
2639 if (isupper(*p))
2640 return PyInt_FromLong(0);
2641 else if (!cased && islower(*p))
2642 cased = 1;
2644 return PyInt_FromLong(cased);
2648 static char isupper__doc__[] =
2649 "S.isupper() -> int\n\
2651 Return 1 if all cased characters in S are uppercase and there is\n\
2652 at least one cased character in S, 0 otherwise.";
2654 static PyObject*
2655 string_isupper(PyStringObject *self)
2657 register const unsigned char *p
2658 = (unsigned char *) PyString_AS_STRING(self);
2659 register const unsigned char *e;
2660 int cased;
2662 /* Shortcut for single character strings */
2663 if (PyString_GET_SIZE(self) == 1)
2664 return PyInt_FromLong(isupper(*p) != 0);
2666 /* Special case for empty strings */
2667 if (PyString_GET_SIZE(self) == 0)
2668 return PyInt_FromLong(0);
2670 e = p + PyString_GET_SIZE(self);
2671 cased = 0;
2672 for (; p < e; p++) {
2673 if (islower(*p))
2674 return PyInt_FromLong(0);
2675 else if (!cased && isupper(*p))
2676 cased = 1;
2678 return PyInt_FromLong(cased);
2682 static char istitle__doc__[] =
2683 "S.istitle() -> int\n\
2685 Return 1 if S is a titlecased string, i.e. uppercase characters\n\
2686 may only follow uncased characters and lowercase characters only cased\n\
2687 ones. Return 0 otherwise.";
2689 static PyObject*
2690 string_istitle(PyStringObject *self, PyObject *uncased)
2692 register const unsigned char *p
2693 = (unsigned char *) PyString_AS_STRING(self);
2694 register const unsigned char *e;
2695 int cased, previous_is_cased;
2697 /* Shortcut for single character strings */
2698 if (PyString_GET_SIZE(self) == 1)
2699 return PyInt_FromLong(isupper(*p) != 0);
2701 /* Special case for empty strings */
2702 if (PyString_GET_SIZE(self) == 0)
2703 return PyInt_FromLong(0);
2705 e = p + PyString_GET_SIZE(self);
2706 cased = 0;
2707 previous_is_cased = 0;
2708 for (; p < e; p++) {
2709 register const unsigned char ch = *p;
2711 if (isupper(ch)) {
2712 if (previous_is_cased)
2713 return PyInt_FromLong(0);
2714 previous_is_cased = 1;
2715 cased = 1;
2717 else if (islower(ch)) {
2718 if (!previous_is_cased)
2719 return PyInt_FromLong(0);
2720 previous_is_cased = 1;
2721 cased = 1;
2723 else
2724 previous_is_cased = 0;
2726 return PyInt_FromLong(cased);
2730 static char splitlines__doc__[] =
2731 "S.splitlines([keepends]) -> list of strings\n\
2733 Return a list of the lines in S, breaking at line boundaries.\n\
2734 Line breaks are not included in the resulting list unless keepends\n\
2735 is given and true.";
2737 #define SPLIT_APPEND(data, left, right) \
2738 str = PyString_FromStringAndSize(data + left, right - left); \
2739 if (!str) \
2740 goto onError; \
2741 if (PyList_Append(list, str)) { \
2742 Py_DECREF(str); \
2743 goto onError; \
2745 else \
2746 Py_DECREF(str);
2748 static PyObject*
2749 string_splitlines(PyStringObject *self, PyObject *args)
2751 register int i;
2752 register int j;
2753 int len;
2754 int keepends = 0;
2755 PyObject *list;
2756 PyObject *str;
2757 char *data;
2759 if (!PyArg_ParseTuple(args, "|i:splitlines", &keepends))
2760 return NULL;
2762 data = PyString_AS_STRING(self);
2763 len = PyString_GET_SIZE(self);
2765 list = PyList_New(0);
2766 if (!list)
2767 goto onError;
2769 for (i = j = 0; i < len; ) {
2770 int eol;
2772 /* Find a line and append it */
2773 while (i < len && data[i] != '\n' && data[i] != '\r')
2774 i++;
2776 /* Skip the line break reading CRLF as one line break */
2777 eol = i;
2778 if (i < len) {
2779 if (data[i] == '\r' && i + 1 < len &&
2780 data[i+1] == '\n')
2781 i += 2;
2782 else
2783 i++;
2784 if (keepends)
2785 eol = i;
2787 SPLIT_APPEND(data, j, eol);
2788 j = i;
2790 if (j < len) {
2791 SPLIT_APPEND(data, j, len);
2794 return list;
2796 onError:
2797 Py_DECREF(list);
2798 return NULL;
2801 #undef SPLIT_APPEND
2804 static PyMethodDef
2805 string_methods[] = {
2806 /* Counterparts of the obsolete stropmodule functions; except
2807 string.maketrans(). */
2808 {"join", (PyCFunction)string_join, METH_O, join__doc__},
2809 {"split", (PyCFunction)string_split, METH_VARARGS, split__doc__},
2810 {"lower", (PyCFunction)string_lower, METH_NOARGS, lower__doc__},
2811 {"upper", (PyCFunction)string_upper, METH_NOARGS, upper__doc__},
2812 {"islower", (PyCFunction)string_islower, METH_NOARGS, islower__doc__},
2813 {"isupper", (PyCFunction)string_isupper, METH_NOARGS, isupper__doc__},
2814 {"isspace", (PyCFunction)string_isspace, METH_NOARGS, isspace__doc__},
2815 {"isdigit", (PyCFunction)string_isdigit, METH_NOARGS, isdigit__doc__},
2816 {"istitle", (PyCFunction)string_istitle, METH_NOARGS, istitle__doc__},
2817 {"isalpha", (PyCFunction)string_isalpha, METH_NOARGS, isalpha__doc__},
2818 {"isalnum", (PyCFunction)string_isalnum, METH_NOARGS, isalnum__doc__},
2819 {"capitalize", (PyCFunction)string_capitalize, METH_NOARGS,
2820 capitalize__doc__},
2821 {"count", (PyCFunction)string_count, METH_VARARGS, count__doc__},
2822 {"endswith", (PyCFunction)string_endswith, METH_VARARGS,
2823 endswith__doc__},
2824 {"find", (PyCFunction)string_find, METH_VARARGS, find__doc__},
2825 {"index", (PyCFunction)string_index, METH_VARARGS, index__doc__},
2826 {"lstrip", (PyCFunction)string_lstrip, METH_VARARGS, lstrip__doc__},
2827 {"replace", (PyCFunction)string_replace, METH_VARARGS, replace__doc__},
2828 {"rfind", (PyCFunction)string_rfind, METH_VARARGS, rfind__doc__},
2829 {"rindex", (PyCFunction)string_rindex, METH_VARARGS, rindex__doc__},
2830 {"rstrip", (PyCFunction)string_rstrip, METH_VARARGS, rstrip__doc__},
2831 {"startswith", (PyCFunction)string_startswith, METH_VARARGS,
2832 startswith__doc__},
2833 {"strip", (PyCFunction)string_strip, METH_VARARGS, strip__doc__},
2834 {"swapcase", (PyCFunction)string_swapcase, METH_NOARGS,
2835 swapcase__doc__},
2836 {"translate", (PyCFunction)string_translate, METH_VARARGS,
2837 translate__doc__},
2838 {"title", (PyCFunction)string_title, METH_NOARGS, title__doc__},
2839 {"ljust", (PyCFunction)string_ljust, METH_VARARGS, ljust__doc__},
2840 {"rjust", (PyCFunction)string_rjust, METH_VARARGS, rjust__doc__},
2841 {"center", (PyCFunction)string_center, METH_VARARGS, center__doc__},
2842 {"zfill", (PyCFunction)string_zfill, METH_VARARGS, zfill__doc__},
2843 {"encode", (PyCFunction)string_encode, METH_VARARGS, encode__doc__},
2844 {"decode", (PyCFunction)string_decode, METH_VARARGS, decode__doc__},
2845 {"expandtabs", (PyCFunction)string_expandtabs, METH_VARARGS,
2846 expandtabs__doc__},
2847 {"splitlines", (PyCFunction)string_splitlines, METH_VARARGS,
2848 splitlines__doc__},
2849 {NULL, NULL} /* sentinel */
2852 staticforward PyObject *
2853 str_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds);
2855 static PyObject *
2856 string_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
2858 PyObject *x = NULL;
2859 static char *kwlist[] = {"object", 0};
2861 if (type != &PyString_Type)
2862 return str_subtype_new(type, args, kwds);
2863 if (!PyArg_ParseTupleAndKeywords(args, kwds, "|O:str", kwlist, &x))
2864 return NULL;
2865 if (x == NULL)
2866 return PyString_FromString("");
2867 return PyObject_Str(x);
2870 static PyObject *
2871 str_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
2873 PyObject *tmp, *pnew;
2874 int n;
2876 assert(PyType_IsSubtype(type, &PyString_Type));
2877 tmp = string_new(&PyString_Type, args, kwds);
2878 if (tmp == NULL)
2879 return NULL;
2880 assert(PyString_CheckExact(tmp));
2881 n = PyString_GET_SIZE(tmp);
2882 pnew = type->tp_alloc(type, n);
2883 if (pnew != NULL) {
2884 memcpy(PyString_AS_STRING(pnew), PyString_AS_STRING(tmp), n+1);
2885 #ifdef CACHE_HASH
2886 ((PyStringObject *)pnew)->ob_shash =
2887 ((PyStringObject *)tmp)->ob_shash;
2888 #endif
2889 #ifdef INTERN_STRINGS
2890 ((PyStringObject *)pnew)->ob_sinterned =
2891 ((PyStringObject *)tmp)->ob_sinterned;
2892 #endif
2894 Py_DECREF(tmp);
2895 return pnew;
2898 static char string_doc[] =
2899 "str(object) -> string\n\
2901 Return a nice string representation of the object.\n\
2902 If the argument is a string, the return value is the same object.";
2904 PyTypeObject PyString_Type = {
2905 PyObject_HEAD_INIT(&PyType_Type)
2907 "str",
2908 sizeof(PyStringObject),
2909 sizeof(char),
2910 (destructor)string_dealloc, /* tp_dealloc */
2911 (printfunc)string_print, /* tp_print */
2912 0, /* tp_getattr */
2913 0, /* tp_setattr */
2914 0, /* tp_compare */
2915 (reprfunc)string_repr, /* tp_repr */
2916 0, /* tp_as_number */
2917 &string_as_sequence, /* tp_as_sequence */
2918 0, /* tp_as_mapping */
2919 (hashfunc)string_hash, /* tp_hash */
2920 0, /* tp_call */
2921 (reprfunc)string_str, /* tp_str */
2922 PyObject_GenericGetAttr, /* tp_getattro */
2923 0, /* tp_setattro */
2924 &string_as_buffer, /* tp_as_buffer */
2925 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */
2926 string_doc, /* tp_doc */
2927 0, /* tp_traverse */
2928 0, /* tp_clear */
2929 (richcmpfunc)string_richcompare, /* tp_richcompare */
2930 0, /* tp_weaklistoffset */
2931 0, /* tp_iter */
2932 0, /* tp_iternext */
2933 string_methods, /* tp_methods */
2934 0, /* tp_members */
2935 0, /* tp_getset */
2936 0, /* tp_base */
2937 0, /* tp_dict */
2938 0, /* tp_descr_get */
2939 0, /* tp_descr_set */
2940 0, /* tp_dictoffset */
2941 0, /* tp_init */
2942 0, /* tp_alloc */
2943 string_new, /* tp_new */
2944 _PyObject_Del, /* tp_free */
2947 void
2948 PyString_Concat(register PyObject **pv, register PyObject *w)
2950 register PyObject *v;
2951 if (*pv == NULL)
2952 return;
2953 if (w == NULL || !PyString_Check(*pv)) {
2954 Py_DECREF(*pv);
2955 *pv = NULL;
2956 return;
2958 v = string_concat((PyStringObject *) *pv, w);
2959 Py_DECREF(*pv);
2960 *pv = v;
2963 void
2964 PyString_ConcatAndDel(register PyObject **pv, register PyObject *w)
2966 PyString_Concat(pv, w);
2967 Py_XDECREF(w);
2971 /* The following function breaks the notion that strings are immutable:
2972 it changes the size of a string. We get away with this only if there
2973 is only one module referencing the object. You can also think of it
2974 as creating a new string object and destroying the old one, only
2975 more efficiently. In any case, don't use this if the string may
2976 already be known to some other part of the code...
2977 Note that if there's not enough memory to resize the string, the original
2978 string object at *pv is deallocated, *pv is set to NULL, an "out of
2979 memory" exception is set, and -1 is returned. Else (on success) 0 is
2980 returned, and the value in *pv may or may not be the same as on input.
2981 As always, an extra byte is allocated for a trailing \0 byte (newsize
2982 does *not* include that), and a trailing \0 byte is stored.
2986 _PyString_Resize(PyObject **pv, int newsize)
2988 register PyObject *v;
2989 register PyStringObject *sv;
2990 v = *pv;
2991 if (!PyString_Check(v) || v->ob_refcnt != 1 || newsize < 0) {
2992 *pv = 0;
2993 Py_DECREF(v);
2994 PyErr_BadInternalCall();
2995 return -1;
2997 /* XXX UNREF/NEWREF interface should be more symmetrical */
2998 #ifdef Py_REF_DEBUG
2999 --_Py_RefTotal;
3000 #endif
3001 _Py_ForgetReference(v);
3002 *pv = (PyObject *)
3003 PyObject_REALLOC((char *)v,
3004 sizeof(PyStringObject) + newsize * sizeof(char));
3005 if (*pv == NULL) {
3006 PyObject_DEL(v);
3007 PyErr_NoMemory();
3008 return -1;
3010 _Py_NewReference(*pv);
3011 sv = (PyStringObject *) *pv;
3012 sv->ob_size = newsize;
3013 sv->ob_sval[newsize] = '\0';
3014 return 0;
3017 /* Helpers for formatstring */
3019 static PyObject *
3020 getnextarg(PyObject *args, int arglen, int *p_argidx)
3022 int argidx = *p_argidx;
3023 if (argidx < arglen) {
3024 (*p_argidx)++;
3025 if (arglen < 0)
3026 return args;
3027 else
3028 return PyTuple_GetItem(args, argidx);
3030 PyErr_SetString(PyExc_TypeError,
3031 "not enough arguments for format string");
3032 return NULL;
3035 /* Format codes
3036 * F_LJUST '-'
3037 * F_SIGN '+'
3038 * F_BLANK ' '
3039 * F_ALT '#'
3040 * F_ZERO '0'
3042 #define F_LJUST (1<<0)
3043 #define F_SIGN (1<<1)
3044 #define F_BLANK (1<<2)
3045 #define F_ALT (1<<3)
3046 #define F_ZERO (1<<4)
3048 static int
3049 formatfloat(char *buf, size_t buflen, int flags,
3050 int prec, int type, PyObject *v)
3052 /* fmt = '%#.' + `prec` + `type`
3053 worst case length = 3 + 10 (len of INT_MAX) + 1 = 14 (use 20)*/
3054 char fmt[20];
3055 double x;
3056 if (!PyArg_Parse(v, "d;float argument required", &x))
3057 return -1;
3058 if (prec < 0)
3059 prec = 6;
3060 if (type == 'f' && fabs(x)/1e25 >= 1e25)
3061 type = 'g';
3062 PyOS_snprintf(fmt, sizeof(fmt), "%%%s.%d%c",
3063 (flags&F_ALT) ? "#" : "",
3064 prec, type);
3065 /* worst case length calc to ensure no buffer overrun:
3066 fmt = %#.<prec>g
3067 buf = '-' + [0-9]*prec + '.' + 'e+' + (longest exp
3068 for any double rep.)
3069 len = 1 + prec + 1 + 2 + 5 = 9 + prec
3070 If prec=0 the effective precision is 1 (the leading digit is
3071 always given), therefore increase by one to 10+prec. */
3072 if (buflen <= (size_t)10 + (size_t)prec) {
3073 PyErr_SetString(PyExc_OverflowError,
3074 "formatted float is too long (precision too large?)");
3075 return -1;
3077 PyOS_snprintf(buf, buflen, fmt, x);
3078 return strlen(buf);
3081 /* _PyString_FormatLong emulates the format codes d, u, o, x and X, and
3082 * the F_ALT flag, for Python's long (unbounded) ints. It's not used for
3083 * Python's regular ints.
3084 * Return value: a new PyString*, or NULL if error.
3085 * . *pbuf is set to point into it,
3086 * *plen set to the # of chars following that.
3087 * Caller must decref it when done using pbuf.
3088 * The string starting at *pbuf is of the form
3089 * "-"? ("0x" | "0X")? digit+
3090 * "0x"/"0X" are present only for x and X conversions, with F_ALT
3091 * set in flags. The case of hex digits will be correct,
3092 * There will be at least prec digits, zero-filled on the left if
3093 * necessary to get that many.
3094 * val object to be converted
3095 * flags bitmask of format flags; only F_ALT is looked at
3096 * prec minimum number of digits; 0-fill on left if needed
3097 * type a character in [duoxX]; u acts the same as d
3099 * CAUTION: o, x and X conversions on regular ints can never
3100 * produce a '-' sign, but can for Python's unbounded ints.
3102 PyObject*
3103 _PyString_FormatLong(PyObject *val, int flags, int prec, int type,
3104 char **pbuf, int *plen)
3106 PyObject *result = NULL;
3107 char *buf;
3108 int i;
3109 int sign; /* 1 if '-', else 0 */
3110 int len; /* number of characters */
3111 int numdigits; /* len == numnondigits + numdigits */
3112 int numnondigits = 0;
3114 switch (type) {
3115 case 'd':
3116 case 'u':
3117 result = val->ob_type->tp_str(val);
3118 break;
3119 case 'o':
3120 result = val->ob_type->tp_as_number->nb_oct(val);
3121 break;
3122 case 'x':
3123 case 'X':
3124 numnondigits = 2;
3125 result = val->ob_type->tp_as_number->nb_hex(val);
3126 break;
3127 default:
3128 assert(!"'type' not in [duoxX]");
3130 if (!result)
3131 return NULL;
3133 /* To modify the string in-place, there can only be one reference. */
3134 if (result->ob_refcnt != 1) {
3135 PyErr_BadInternalCall();
3136 return NULL;
3138 buf = PyString_AsString(result);
3139 len = PyString_Size(result);
3140 if (buf[len-1] == 'L') {
3141 --len;
3142 buf[len] = '\0';
3144 sign = buf[0] == '-';
3145 numnondigits += sign;
3146 numdigits = len - numnondigits;
3147 assert(numdigits > 0);
3149 /* Get rid of base marker unless F_ALT */
3150 if ((flags & F_ALT) == 0) {
3151 /* Need to skip 0x, 0X or 0. */
3152 int skipped = 0;
3153 switch (type) {
3154 case 'o':
3155 assert(buf[sign] == '0');
3156 /* If 0 is only digit, leave it alone. */
3157 if (numdigits > 1) {
3158 skipped = 1;
3159 --numdigits;
3161 break;
3162 case 'x':
3163 case 'X':
3164 assert(buf[sign] == '0');
3165 assert(buf[sign + 1] == 'x');
3166 skipped = 2;
3167 numnondigits -= 2;
3168 break;
3170 if (skipped) {
3171 buf += skipped;
3172 len -= skipped;
3173 if (sign)
3174 buf[0] = '-';
3176 assert(len == numnondigits + numdigits);
3177 assert(numdigits > 0);
3180 /* Fill with leading zeroes to meet minimum width. */
3181 if (prec > numdigits) {
3182 PyObject *r1 = PyString_FromStringAndSize(NULL,
3183 numnondigits + prec);
3184 char *b1;
3185 if (!r1) {
3186 Py_DECREF(result);
3187 return NULL;
3189 b1 = PyString_AS_STRING(r1);
3190 for (i = 0; i < numnondigits; ++i)
3191 *b1++ = *buf++;
3192 for (i = 0; i < prec - numdigits; i++)
3193 *b1++ = '0';
3194 for (i = 0; i < numdigits; i++)
3195 *b1++ = *buf++;
3196 *b1 = '\0';
3197 Py_DECREF(result);
3198 result = r1;
3199 buf = PyString_AS_STRING(result);
3200 len = numnondigits + prec;
3203 /* Fix up case for hex conversions. */
3204 switch (type) {
3205 case 'x':
3206 /* Need to convert all upper case letters to lower case. */
3207 for (i = 0; i < len; i++)
3208 if (buf[i] >= 'A' && buf[i] <= 'F')
3209 buf[i] += 'a'-'A';
3210 break;
3211 case 'X':
3212 /* Need to convert 0x to 0X (and -0x to -0X). */
3213 if (buf[sign + 1] == 'x')
3214 buf[sign + 1] = 'X';
3215 break;
3217 *pbuf = buf;
3218 *plen = len;
3219 return result;
3222 static int
3223 formatint(char *buf, size_t buflen, int flags,
3224 int prec, int type, PyObject *v)
3226 /* fmt = '%#.' + `prec` + 'l' + `type`
3227 worst case length = 3 + 19 (worst len of INT_MAX on 64-bit machine)
3228 + 1 + 1 = 24 */
3229 char fmt[64]; /* plenty big enough! */
3230 long x;
3231 if (!PyArg_Parse(v, "l;int argument required", &x))
3232 return -1;
3233 if (prec < 0)
3234 prec = 1;
3235 PyOS_snprintf(fmt, sizeof(fmt), "%%%s.%dl%c",
3236 (flags&F_ALT) ? "#" : "",
3237 prec, type);
3238 /* buf = '+'/'-'/'0'/'0x' + '[0-9]'*max(prec, len(x in octal))
3239 worst case buf = '0x' + [0-9]*prec, where prec >= 11 */
3240 if (buflen <= 13 || buflen <= (size_t)2 + (size_t)prec) {
3241 PyErr_SetString(PyExc_OverflowError,
3242 "formatted integer is too long (precision too large?)");
3243 return -1;
3245 PyOS_snprintf(buf, buflen, fmt, x);
3246 /* When converting 0 under %#x or %#X, C leaves off the base marker,
3247 * but we want it (for consistency with other %#x conversions, and
3248 * for consistency with Python's hex() function).
3249 * BUG 28-Apr-2001 tim: At least two platform Cs (Metrowerks &
3250 * Compaq Tru64) violate the std by converting 0 w/ leading 0x anyway.
3251 * So add it only if the platform didn't already.
3253 if (x == 0 &&
3254 (flags & F_ALT) &&
3255 (type == 'x' || type == 'X') &&
3256 buf[1] != (char)type) /* this last always true under std C */
3258 memmove(buf+2, buf, strlen(buf) + 1);
3259 buf[0] = '0';
3260 buf[1] = (char)type;
3262 return strlen(buf);
3265 static int
3266 formatchar(char *buf, size_t buflen, PyObject *v)
3268 /* presume that the buffer is at least 2 characters long */
3269 if (PyString_Check(v)) {
3270 if (!PyArg_Parse(v, "c;%c requires int or char", &buf[0]))
3271 return -1;
3273 else {
3274 if (!PyArg_Parse(v, "b;%c requires int or char", &buf[0]))
3275 return -1;
3277 buf[1] = '\0';
3278 return 1;
3282 /* fmt%(v1,v2,...) is roughly equivalent to sprintf(fmt, v1, v2, ...)
3284 FORMATBUFLEN is the length of the buffer in which the floats, ints, &
3285 chars are formatted. XXX This is a magic number. Each formatting
3286 routine does bounds checking to ensure no overflow, but a better
3287 solution may be to malloc a buffer of appropriate size for each
3288 format. For now, the current solution is sufficient.
3290 #define FORMATBUFLEN (size_t)120
3292 PyObject *
3293 PyString_Format(PyObject *format, PyObject *args)
3295 char *fmt, *res;
3296 int fmtcnt, rescnt, reslen, arglen, argidx;
3297 int args_owned = 0;
3298 PyObject *result, *orig_args;
3299 #ifdef Py_USING_UNICODE
3300 PyObject *v, *w;
3301 #endif
3302 PyObject *dict = NULL;
3303 if (format == NULL || !PyString_Check(format) || args == NULL) {
3304 PyErr_BadInternalCall();
3305 return NULL;
3307 orig_args = args;
3308 fmt = PyString_AS_STRING(format);
3309 fmtcnt = PyString_GET_SIZE(format);
3310 reslen = rescnt = fmtcnt + 100;
3311 result = PyString_FromStringAndSize((char *)NULL, reslen);
3312 if (result == NULL)
3313 return NULL;
3314 res = PyString_AsString(result);
3315 if (PyTuple_Check(args)) {
3316 arglen = PyTuple_GET_SIZE(args);
3317 argidx = 0;
3319 else {
3320 arglen = -1;
3321 argidx = -2;
3323 if (args->ob_type->tp_as_mapping)
3324 dict = args;
3325 while (--fmtcnt >= 0) {
3326 if (*fmt != '%') {
3327 if (--rescnt < 0) {
3328 rescnt = fmtcnt + 100;
3329 reslen += rescnt;
3330 if (_PyString_Resize(&result, reslen) < 0)
3331 return NULL;
3332 res = PyString_AS_STRING(result)
3333 + reslen - rescnt;
3334 --rescnt;
3336 *res++ = *fmt++;
3338 else {
3339 /* Got a format specifier */
3340 int flags = 0;
3341 int width = -1;
3342 int prec = -1;
3343 int c = '\0';
3344 int fill;
3345 PyObject *v = NULL;
3346 PyObject *temp = NULL;
3347 char *pbuf;
3348 int sign;
3349 int len;
3350 char formatbuf[FORMATBUFLEN];
3351 /* For format{float,int,char}() */
3352 #ifdef Py_USING_UNICODE
3353 char *fmt_start = fmt;
3354 int argidx_start = argidx;
3355 #endif
3357 fmt++;
3358 if (*fmt == '(') {
3359 char *keystart;
3360 int keylen;
3361 PyObject *key;
3362 int pcount = 1;
3364 if (dict == NULL) {
3365 PyErr_SetString(PyExc_TypeError,
3366 "format requires a mapping");
3367 goto error;
3369 ++fmt;
3370 --fmtcnt;
3371 keystart = fmt;
3372 /* Skip over balanced parentheses */
3373 while (pcount > 0 && --fmtcnt >= 0) {
3374 if (*fmt == ')')
3375 --pcount;
3376 else if (*fmt == '(')
3377 ++pcount;
3378 fmt++;
3380 keylen = fmt - keystart - 1;
3381 if (fmtcnt < 0 || pcount > 0) {
3382 PyErr_SetString(PyExc_ValueError,
3383 "incomplete format key");
3384 goto error;
3386 key = PyString_FromStringAndSize(keystart,
3387 keylen);
3388 if (key == NULL)
3389 goto error;
3390 if (args_owned) {
3391 Py_DECREF(args);
3392 args_owned = 0;
3394 args = PyObject_GetItem(dict, key);
3395 Py_DECREF(key);
3396 if (args == NULL) {
3397 goto error;
3399 args_owned = 1;
3400 arglen = -1;
3401 argidx = -2;
3403 while (--fmtcnt >= 0) {
3404 switch (c = *fmt++) {
3405 case '-': flags |= F_LJUST; continue;
3406 case '+': flags |= F_SIGN; continue;
3407 case ' ': flags |= F_BLANK; continue;
3408 case '#': flags |= F_ALT; continue;
3409 case '0': flags |= F_ZERO; continue;
3411 break;
3413 if (c == '*') {
3414 v = getnextarg(args, arglen, &argidx);
3415 if (v == NULL)
3416 goto error;
3417 if (!PyInt_Check(v)) {
3418 PyErr_SetString(PyExc_TypeError,
3419 "* wants int");
3420 goto error;
3422 width = PyInt_AsLong(v);
3423 if (width < 0) {
3424 flags |= F_LJUST;
3425 width = -width;
3427 if (--fmtcnt >= 0)
3428 c = *fmt++;
3430 else if (c >= 0 && isdigit(c)) {
3431 width = c - '0';
3432 while (--fmtcnt >= 0) {
3433 c = Py_CHARMASK(*fmt++);
3434 if (!isdigit(c))
3435 break;
3436 if ((width*10) / 10 != width) {
3437 PyErr_SetString(
3438 PyExc_ValueError,
3439 "width too big");
3440 goto error;
3442 width = width*10 + (c - '0');
3445 if (c == '.') {
3446 prec = 0;
3447 if (--fmtcnt >= 0)
3448 c = *fmt++;
3449 if (c == '*') {
3450 v = getnextarg(args, arglen, &argidx);
3451 if (v == NULL)
3452 goto error;
3453 if (!PyInt_Check(v)) {
3454 PyErr_SetString(
3455 PyExc_TypeError,
3456 "* wants int");
3457 goto error;
3459 prec = PyInt_AsLong(v);
3460 if (prec < 0)
3461 prec = 0;
3462 if (--fmtcnt >= 0)
3463 c = *fmt++;
3465 else if (c >= 0 && isdigit(c)) {
3466 prec = c - '0';
3467 while (--fmtcnt >= 0) {
3468 c = Py_CHARMASK(*fmt++);
3469 if (!isdigit(c))
3470 break;
3471 if ((prec*10) / 10 != prec) {
3472 PyErr_SetString(
3473 PyExc_ValueError,
3474 "prec too big");
3475 goto error;
3477 prec = prec*10 + (c - '0');
3480 } /* prec */
3481 if (fmtcnt >= 0) {
3482 if (c == 'h' || c == 'l' || c == 'L') {
3483 if (--fmtcnt >= 0)
3484 c = *fmt++;
3487 if (fmtcnt < 0) {
3488 PyErr_SetString(PyExc_ValueError,
3489 "incomplete format");
3490 goto error;
3492 if (c != '%') {
3493 v = getnextarg(args, arglen, &argidx);
3494 if (v == NULL)
3495 goto error;
3497 sign = 0;
3498 fill = ' ';
3499 switch (c) {
3500 case '%':
3501 pbuf = "%";
3502 len = 1;
3503 break;
3504 case 's':
3505 #ifdef Py_USING_UNICODE
3506 if (PyUnicode_Check(v)) {
3507 fmt = fmt_start;
3508 argidx = argidx_start;
3509 goto unicode;
3511 #endif
3512 /* Fall through */
3513 case 'r':
3514 if (c == 's')
3515 temp = PyObject_Str(v);
3516 else
3517 temp = PyObject_Repr(v);
3518 if (temp == NULL)
3519 goto error;
3520 if (!PyString_Check(temp)) {
3521 PyErr_SetString(PyExc_TypeError,
3522 "%s argument has non-string str()");
3523 Py_DECREF(temp);
3524 goto error;
3526 pbuf = PyString_AS_STRING(temp);
3527 len = PyString_GET_SIZE(temp);
3528 if (prec >= 0 && len > prec)
3529 len = prec;
3530 break;
3531 case 'i':
3532 case 'd':
3533 case 'u':
3534 case 'o':
3535 case 'x':
3536 case 'X':
3537 if (c == 'i')
3538 c = 'd';
3539 if (PyLong_Check(v)) {
3540 temp = _PyString_FormatLong(v, flags,
3541 prec, c, &pbuf, &len);
3542 if (!temp)
3543 goto error;
3544 /* unbounded ints can always produce
3545 a sign character! */
3546 sign = 1;
3548 else {
3549 pbuf = formatbuf;
3550 len = formatint(pbuf,
3551 sizeof(formatbuf),
3552 flags, prec, c, v);
3553 if (len < 0)
3554 goto error;
3555 /* only d conversion is signed */
3556 sign = c == 'd';
3558 if (flags & F_ZERO)
3559 fill = '0';
3560 break;
3561 case 'e':
3562 case 'E':
3563 case 'f':
3564 case 'g':
3565 case 'G':
3566 pbuf = formatbuf;
3567 len = formatfloat(pbuf, sizeof(formatbuf),
3568 flags, prec, c, v);
3569 if (len < 0)
3570 goto error;
3571 sign = 1;
3572 if (flags & F_ZERO)
3573 fill = '0';
3574 break;
3575 case 'c':
3576 pbuf = formatbuf;
3577 len = formatchar(pbuf, sizeof(formatbuf), v);
3578 if (len < 0)
3579 goto error;
3580 break;
3581 default:
3582 PyErr_Format(PyExc_ValueError,
3583 "unsupported format character '%c' (0x%x) "
3584 "at index %i",
3585 c, c,
3586 (int)(fmt - 1 - PyString_AsString(format)));
3587 goto error;
3589 if (sign) {
3590 if (*pbuf == '-' || *pbuf == '+') {
3591 sign = *pbuf++;
3592 len--;
3594 else if (flags & F_SIGN)
3595 sign = '+';
3596 else if (flags & F_BLANK)
3597 sign = ' ';
3598 else
3599 sign = 0;
3601 if (width < len)
3602 width = len;
3603 if (rescnt - (sign != 0) < width) {
3604 reslen -= rescnt;
3605 rescnt = width + fmtcnt + 100;
3606 reslen += rescnt;
3607 if (reslen < 0) {
3608 Py_DECREF(result);
3609 return PyErr_NoMemory();
3611 if (_PyString_Resize(&result, reslen) < 0)
3612 return NULL;
3613 res = PyString_AS_STRING(result)
3614 + reslen - rescnt;
3616 if (sign) {
3617 if (fill != ' ')
3618 *res++ = sign;
3619 rescnt--;
3620 if (width > len)
3621 width--;
3623 if ((flags & F_ALT) && (c == 'x' || c == 'X')) {
3624 assert(pbuf[0] == '0');
3625 assert(pbuf[1] == c);
3626 if (fill != ' ') {
3627 *res++ = *pbuf++;
3628 *res++ = *pbuf++;
3630 rescnt -= 2;
3631 width -= 2;
3632 if (width < 0)
3633 width = 0;
3634 len -= 2;
3636 if (width > len && !(flags & F_LJUST)) {
3637 do {
3638 --rescnt;
3639 *res++ = fill;
3640 } while (--width > len);
3642 if (fill == ' ') {
3643 if (sign)
3644 *res++ = sign;
3645 if ((flags & F_ALT) &&
3646 (c == 'x' || c == 'X')) {
3647 assert(pbuf[0] == '0');
3648 assert(pbuf[1] == c);
3649 *res++ = *pbuf++;
3650 *res++ = *pbuf++;
3653 memcpy(res, pbuf, len);
3654 res += len;
3655 rescnt -= len;
3656 while (--width >= len) {
3657 --rescnt;
3658 *res++ = ' ';
3660 if (dict && (argidx < arglen) && c != '%') {
3661 PyErr_SetString(PyExc_TypeError,
3662 "not all arguments converted");
3663 goto error;
3665 Py_XDECREF(temp);
3666 } /* '%' */
3667 } /* until end */
3668 if (argidx < arglen && !dict) {
3669 PyErr_SetString(PyExc_TypeError,
3670 "not all arguments converted");
3671 goto error;
3673 if (args_owned) {
3674 Py_DECREF(args);
3676 _PyString_Resize(&result, reslen - rescnt);
3677 return result;
3679 #ifdef Py_USING_UNICODE
3680 unicode:
3681 if (args_owned) {
3682 Py_DECREF(args);
3683 args_owned = 0;
3685 /* Fiddle args right (remove the first argidx arguments) */
3686 if (PyTuple_Check(orig_args) && argidx > 0) {
3687 PyObject *v;
3688 int n = PyTuple_GET_SIZE(orig_args) - argidx;
3689 v = PyTuple_New(n);
3690 if (v == NULL)
3691 goto error;
3692 while (--n >= 0) {
3693 PyObject *w = PyTuple_GET_ITEM(orig_args, n + argidx);
3694 Py_INCREF(w);
3695 PyTuple_SET_ITEM(v, n, w);
3697 args = v;
3698 } else {
3699 Py_INCREF(orig_args);
3700 args = orig_args;
3702 args_owned = 1;
3703 /* Take what we have of the result and let the Unicode formatting
3704 function format the rest of the input. */
3705 rescnt = res - PyString_AS_STRING(result);
3706 if (_PyString_Resize(&result, rescnt))
3707 goto error;
3708 fmtcnt = PyString_GET_SIZE(format) - \
3709 (fmt - PyString_AS_STRING(format));
3710 format = PyUnicode_Decode(fmt, fmtcnt, NULL, NULL);
3711 if (format == NULL)
3712 goto error;
3713 v = PyUnicode_Format(format, args);
3714 Py_DECREF(format);
3715 if (v == NULL)
3716 goto error;
3717 /* Paste what we have (result) to what the Unicode formatting
3718 function returned (v) and return the result (or error) */
3719 w = PyUnicode_Concat(result, v);
3720 Py_DECREF(result);
3721 Py_DECREF(v);
3722 Py_DECREF(args);
3723 return w;
3724 #endif /* Py_USING_UNICODE */
3726 error:
3727 Py_DECREF(result);
3728 if (args_owned) {
3729 Py_DECREF(args);
3731 return NULL;
3735 #ifdef INTERN_STRINGS
3737 /* This dictionary will leak at PyString_Fini() time. That's acceptable
3738 * because PyString_Fini() specifically frees interned strings that are
3739 * only referenced by this dictionary. The CVS log entry for revision 2.45
3740 * says:
3742 * Change the Fini function to only remove otherwise unreferenced
3743 * strings from the interned table. There are references in
3744 * hard-to-find static variables all over the interpreter, and it's not
3745 * worth trying to get rid of all those; but "uninterning" isn't fair
3746 * either and may cause subtle failures later -- so we have to keep them
3747 * in the interned table.
3749 static PyObject *interned;
3751 void
3752 PyString_InternInPlace(PyObject **p)
3754 register PyStringObject *s = (PyStringObject *)(*p);
3755 PyObject *t;
3756 if (s == NULL || !PyString_Check(s))
3757 Py_FatalError("PyString_InternInPlace: strings only please!");
3758 if ((t = s->ob_sinterned) != NULL) {
3759 if (t == (PyObject *)s)
3760 return;
3761 Py_INCREF(t);
3762 *p = t;
3763 Py_DECREF(s);
3764 return;
3766 if (interned == NULL) {
3767 interned = PyDict_New();
3768 if (interned == NULL)
3769 return;
3771 if ((t = PyDict_GetItem(interned, (PyObject *)s)) != NULL) {
3772 Py_INCREF(t);
3773 *p = s->ob_sinterned = t;
3774 Py_DECREF(s);
3775 return;
3777 /* Ensure that only true string objects appear in the intern dict,
3778 and as the value of ob_sinterned. */
3779 if (PyString_CheckExact(s)) {
3780 t = (PyObject *)s;
3781 if (PyDict_SetItem(interned, t, t) == 0) {
3782 s->ob_sinterned = t;
3783 return;
3786 else {
3787 t = PyString_FromStringAndSize(PyString_AS_STRING(s),
3788 PyString_GET_SIZE(s));
3789 if (t != NULL) {
3790 if (PyDict_SetItem(interned, t, t) == 0) {
3791 *p = s->ob_sinterned = t;
3792 Py_DECREF(s);
3793 return;
3795 Py_DECREF(t);
3798 PyErr_Clear();
3802 PyObject *
3803 PyString_InternFromString(const char *cp)
3805 PyObject *s = PyString_FromString(cp);
3806 if (s == NULL)
3807 return NULL;
3808 PyString_InternInPlace(&s);
3809 return s;
3812 #endif
3814 void
3815 PyString_Fini(void)
3817 int i;
3818 for (i = 0; i < UCHAR_MAX + 1; i++) {
3819 Py_XDECREF(characters[i]);
3820 characters[i] = NULL;
3822 #ifndef DONT_SHARE_SHORT_STRINGS
3823 Py_XDECREF(nullstring);
3824 nullstring = NULL;
3825 #endif
3826 #ifdef INTERN_STRINGS
3827 if (interned) {
3828 int pos, changed;
3829 PyObject *key, *value;
3830 do {
3831 changed = 0;
3832 pos = 0;
3833 while (PyDict_Next(interned, &pos, &key, &value)) {
3834 if (key->ob_refcnt == 2 && key == value) {
3835 PyDict_DelItem(interned, key);
3836 changed = 1;
3839 } while (changed);
3841 #endif
3844 #ifdef INTERN_STRINGS
3845 void _Py_ReleaseInternedStrings(void)
3847 if (interned) {
3848 fprintf(stderr, "releasing interned strings\n");
3849 PyDict_Clear(interned);
3850 Py_DECREF(interned);
3851 interned = NULL;
3854 #endif /* INTERN_STRINGS */