This commit was manufactured by cvs2svn to create tag 'r22a4-fork'.
[python/dscho.git] / Objects / stringobject.c
blobe29be5a07007bfc696b5783f0b57180d845def25
2 /* String object implementation */
4 #include "Python.h"
6 #include <ctype.h>
8 #ifdef COUNT_ALLOCS
9 int null_strings, one_strings;
10 #endif
12 #if !defined(HAVE_LIMITS_H) && !defined(UCHAR_MAX)
13 #define UCHAR_MAX 255
14 #endif
16 static PyStringObject *characters[UCHAR_MAX + 1];
17 #ifndef DONT_SHARE_SHORT_STRINGS
18 static PyStringObject *nullstring;
19 #endif
22 Newsizedstringobject() and newstringobject() try in certain cases
23 to share string objects. When the size of the string is zero,
24 these routines always return a pointer to the same string object;
25 when the size is one, they return a pointer to an already existing
26 object if the contents of the string is known. For
27 newstringobject() this is always the case, for
28 newsizedstringobject() this is the case when the first argument in
29 not NULL.
30 A common practice to allocate a string and then fill it in or
31 change it must be done carefully. It is only allowed to change the
32 contents of the string if the obect was gotten from
33 newsizedstringobject() with a NULL first argument, because in the
34 future these routines may try to do even more sharing of objects.
36 PyObject *
37 PyString_FromStringAndSize(const char *str, int size)
39 register PyStringObject *op;
40 #ifndef DONT_SHARE_SHORT_STRINGS
41 if (size == 0 && (op = nullstring) != NULL) {
42 #ifdef COUNT_ALLOCS
43 null_strings++;
44 #endif
45 Py_INCREF(op);
46 return (PyObject *)op;
48 if (size == 1 && str != NULL &&
49 (op = characters[*str & UCHAR_MAX]) != NULL)
51 #ifdef COUNT_ALLOCS
52 one_strings++;
53 #endif
54 Py_INCREF(op);
55 return (PyObject *)op;
57 #endif /* DONT_SHARE_SHORT_STRINGS */
59 /* PyObject_NewVar is inlined */
60 op = (PyStringObject *)
61 PyObject_MALLOC(sizeof(PyStringObject) + size * sizeof(char));
62 if (op == NULL)
63 return PyErr_NoMemory();
64 PyObject_INIT_VAR(op, &PyString_Type, size);
65 #ifdef CACHE_HASH
66 op->ob_shash = -1;
67 #endif
68 #ifdef INTERN_STRINGS
69 op->ob_sinterned = NULL;
70 #endif
71 if (str != NULL)
72 memcpy(op->ob_sval, str, size);
73 op->ob_sval[size] = '\0';
74 #ifndef DONT_SHARE_SHORT_STRINGS
75 if (size == 0) {
76 PyObject *t = (PyObject *)op;
77 PyString_InternInPlace(&t);
78 op = (PyStringObject *)t;
79 nullstring = op;
80 Py_INCREF(op);
81 } else if (size == 1 && str != NULL) {
82 PyObject *t = (PyObject *)op;
83 PyString_InternInPlace(&t);
84 op = (PyStringObject *)t;
85 characters[*str & UCHAR_MAX] = op;
86 Py_INCREF(op);
88 #endif
89 return (PyObject *) op;
92 PyObject *
93 PyString_FromString(const char *str)
95 register size_t size = strlen(str);
96 register PyStringObject *op;
97 if (size > INT_MAX) {
98 PyErr_SetString(PyExc_OverflowError,
99 "string is too long for a Python string");
100 return NULL;
102 #ifndef DONT_SHARE_SHORT_STRINGS
103 if (size == 0 && (op = nullstring) != NULL) {
104 #ifdef COUNT_ALLOCS
105 null_strings++;
106 #endif
107 Py_INCREF(op);
108 return (PyObject *)op;
110 if (size == 1 && (op = characters[*str & UCHAR_MAX]) != NULL) {
111 #ifdef COUNT_ALLOCS
112 one_strings++;
113 #endif
114 Py_INCREF(op);
115 return (PyObject *)op;
117 #endif /* DONT_SHARE_SHORT_STRINGS */
119 /* PyObject_NewVar is inlined */
120 op = (PyStringObject *)
121 PyObject_MALLOC(sizeof(PyStringObject) + size * sizeof(char));
122 if (op == NULL)
123 return PyErr_NoMemory();
124 PyObject_INIT_VAR(op, &PyString_Type, size);
125 #ifdef CACHE_HASH
126 op->ob_shash = -1;
127 #endif
128 #ifdef INTERN_STRINGS
129 op->ob_sinterned = NULL;
130 #endif
131 strcpy(op->ob_sval, str);
132 #ifndef DONT_SHARE_SHORT_STRINGS
133 if (size == 0) {
134 PyObject *t = (PyObject *)op;
135 PyString_InternInPlace(&t);
136 op = (PyStringObject *)t;
137 nullstring = op;
138 Py_INCREF(op);
139 } else if (size == 1) {
140 PyObject *t = (PyObject *)op;
141 PyString_InternInPlace(&t);
142 op = (PyStringObject *)t;
143 characters[*str & UCHAR_MAX] = op;
144 Py_INCREF(op);
146 #endif
147 return (PyObject *) op;
150 PyObject *
151 PyString_FromFormatV(const char *format, va_list vargs)
153 va_list count = vargs;
154 int n = 0;
155 const char* f;
156 char *s;
157 PyObject* string;
159 /* step 1: figure out how large a buffer we need */
160 for (f = format; *f; f++) {
161 if (*f == '%') {
162 const char* p = f;
163 while (*++f && *f != '%' && !isalpha(Py_CHARMASK(*f)))
166 /* skip the 'l' in %ld, since it doesn't change the
167 width. although only %d is supported (see
168 "expand" section below), others can be easily
169 add */
170 if (*f == 'l' && *(f+1) == 'd')
171 ++f;
173 switch (*f) {
174 case 'c':
175 (void)va_arg(count, int);
176 /* fall through... */
177 case '%':
178 n++;
179 break;
180 case 'd': case 'i': case 'x':
181 (void) va_arg(count, int);
182 /* 20 bytes should be enough to hold a 64-bit
183 integer */
184 n += 20;
185 break;
186 case 's':
187 s = va_arg(count, char*);
188 n += strlen(s);
189 break;
190 case 'p':
191 (void) va_arg(count, int);
192 /* maximum 64-bit pointer representation:
193 * 0xffffffffffffffff
194 * so 19 characters is enough.
196 n += 19;
197 break;
198 default:
199 /* if we stumble upon an unknown
200 formatting code, copy the rest of
201 the format string to the output
202 string. (we cannot just skip the
203 code, since there's no way to know
204 what's in the argument list) */
205 n += strlen(p);
206 goto expand;
208 } else
209 n++;
211 expand:
212 /* step 2: fill the buffer */
213 string = PyString_FromStringAndSize(NULL, n);
214 if (!string)
215 return NULL;
217 s = PyString_AsString(string);
219 for (f = format; *f; f++) {
220 if (*f == '%') {
221 const char* p = f++;
222 int i, longflag = 0;
223 /* parse the width.precision part (we're only
224 interested in the precision value, if any) */
225 n = 0;
226 while (isdigit(Py_CHARMASK(*f)))
227 n = (n*10) + *f++ - '0';
228 if (*f == '.') {
229 f++;
230 n = 0;
231 while (isdigit(Py_CHARMASK(*f)))
232 n = (n*10) + *f++ - '0';
234 while (*f && *f != '%' && !isalpha(Py_CHARMASK(*f)))
235 f++;
236 /* handle the long flag, but only for %ld. others
237 can be added when necessary. */
238 if (*f == 'l' && *(f+1) == 'd') {
239 longflag = 1;
240 ++f;
243 switch (*f) {
244 case 'c':
245 *s++ = va_arg(vargs, int);
246 break;
247 case 'd':
248 if (longflag)
249 sprintf(s, "%ld", va_arg(vargs, long));
250 else
251 sprintf(s, "%d", va_arg(vargs, int));
252 s += strlen(s);
253 break;
254 case 'i':
255 sprintf(s, "%i", va_arg(vargs, int));
256 s += strlen(s);
257 break;
258 case 'x':
259 sprintf(s, "%x", va_arg(vargs, int));
260 s += strlen(s);
261 break;
262 case 's':
263 p = va_arg(vargs, char*);
264 i = strlen(p);
265 if (n > 0 && i > n)
266 i = n;
267 memcpy(s, p, i);
268 s += i;
269 break;
270 case 'p':
271 sprintf(s, "%p", va_arg(vargs, void*));
272 /* %p is ill-defined: ensure leading 0x. */
273 if (s[1] == 'X')
274 s[1] = 'x';
275 else if (s[1] != 'x') {
276 memmove(s+2, s, strlen(s)+1);
277 s[0] = '0';
278 s[1] = 'x';
280 s += strlen(s);
281 break;
282 case '%':
283 *s++ = '%';
284 break;
285 default:
286 strcpy(s, p);
287 s += strlen(s);
288 goto end;
290 } else
291 *s++ = *f;
294 end:
295 _PyString_Resize(&string, s - PyString_AS_STRING(string));
296 return string;
299 PyObject *
300 PyString_FromFormat(const char *format, ...)
302 PyObject* ret;
303 va_list vargs;
305 #ifdef HAVE_STDARG_PROTOTYPES
306 va_start(vargs, format);
307 #else
308 va_start(vargs);
309 #endif
310 ret = PyString_FromFormatV(format, vargs);
311 va_end(vargs);
312 return ret;
316 PyObject *PyString_Decode(const char *s,
317 int size,
318 const char *encoding,
319 const char *errors)
321 PyObject *v, *str;
323 str = PyString_FromStringAndSize(s, size);
324 if (str == NULL)
325 return NULL;
326 v = PyString_AsDecodedString(str, encoding, errors);
327 Py_DECREF(str);
328 return v;
331 PyObject *PyString_AsDecodedObject(PyObject *str,
332 const char *encoding,
333 const char *errors)
335 PyObject *v;
337 if (!PyString_Check(str)) {
338 PyErr_BadArgument();
339 goto onError;
342 if (encoding == NULL) {
343 #ifdef Py_USING_UNICODE
344 encoding = PyUnicode_GetDefaultEncoding();
345 #else
346 PyErr_SetString(PyExc_ValueError, "no encoding specified");
347 goto onError;
348 #endif
351 /* Decode via the codec registry */
352 v = PyCodec_Decode(str, encoding, errors);
353 if (v == NULL)
354 goto onError;
356 return v;
358 onError:
359 return NULL;
362 PyObject *PyString_AsDecodedString(PyObject *str,
363 const char *encoding,
364 const char *errors)
366 PyObject *v;
368 v = PyString_AsDecodedObject(str, encoding, errors);
369 if (v == NULL)
370 goto onError;
372 #ifdef Py_USING_UNICODE
373 /* Convert Unicode to a string using the default encoding */
374 if (PyUnicode_Check(v)) {
375 PyObject *temp = v;
376 v = PyUnicode_AsEncodedString(v, NULL, NULL);
377 Py_DECREF(temp);
378 if (v == NULL)
379 goto onError;
381 #endif
382 if (!PyString_Check(v)) {
383 PyErr_Format(PyExc_TypeError,
384 "decoder did not return a string object (type=%.400s)",
385 v->ob_type->tp_name);
386 Py_DECREF(v);
387 goto onError;
390 return v;
392 onError:
393 return NULL;
396 PyObject *PyString_Encode(const char *s,
397 int size,
398 const char *encoding,
399 const char *errors)
401 PyObject *v, *str;
403 str = PyString_FromStringAndSize(s, size);
404 if (str == NULL)
405 return NULL;
406 v = PyString_AsEncodedString(str, encoding, errors);
407 Py_DECREF(str);
408 return v;
411 PyObject *PyString_AsEncodedObject(PyObject *str,
412 const char *encoding,
413 const char *errors)
415 PyObject *v;
417 if (!PyString_Check(str)) {
418 PyErr_BadArgument();
419 goto onError;
422 if (encoding == NULL) {
423 #ifdef Py_USING_UNICODE
424 encoding = PyUnicode_GetDefaultEncoding();
425 #else
426 PyErr_SetString(PyExc_ValueError, "no encoding specified");
427 goto onError;
428 #endif
431 /* Encode via the codec registry */
432 v = PyCodec_Encode(str, encoding, errors);
433 if (v == NULL)
434 goto onError;
436 return v;
438 onError:
439 return NULL;
442 PyObject *PyString_AsEncodedString(PyObject *str,
443 const char *encoding,
444 const char *errors)
446 PyObject *v;
448 v = PyString_AsEncodedObject(str, encoding, errors);
449 if (v == NULL)
450 goto onError;
452 #ifdef Py_USING_UNICODE
453 /* Convert Unicode to a string using the default encoding */
454 if (PyUnicode_Check(v)) {
455 PyObject *temp = v;
456 v = PyUnicode_AsEncodedString(v, NULL, NULL);
457 Py_DECREF(temp);
458 if (v == NULL)
459 goto onError;
461 #endif
462 if (!PyString_Check(v)) {
463 PyErr_Format(PyExc_TypeError,
464 "encoder did not return a string object (type=%.400s)",
465 v->ob_type->tp_name);
466 Py_DECREF(v);
467 goto onError;
470 return v;
472 onError:
473 return NULL;
476 static void
477 string_dealloc(PyObject *op)
479 PyObject_DEL(op);
482 static int
483 string_getsize(register PyObject *op)
485 char *s;
486 int len;
487 if (PyString_AsStringAndSize(op, &s, &len))
488 return -1;
489 return len;
492 static /*const*/ char *
493 string_getbuffer(register PyObject *op)
495 char *s;
496 int len;
497 if (PyString_AsStringAndSize(op, &s, &len))
498 return NULL;
499 return s;
503 PyString_Size(register PyObject *op)
505 if (!PyString_Check(op))
506 return string_getsize(op);
507 return ((PyStringObject *)op) -> ob_size;
510 /*const*/ char *
511 PyString_AsString(register PyObject *op)
513 if (!PyString_Check(op))
514 return string_getbuffer(op);
515 return ((PyStringObject *)op) -> ob_sval;
519 PyString_AsStringAndSize(register PyObject *obj,
520 register char **s,
521 register int *len)
523 if (s == NULL) {
524 PyErr_BadInternalCall();
525 return -1;
528 if (!PyString_Check(obj)) {
529 #ifdef Py_USING_UNICODE
530 if (PyUnicode_Check(obj)) {
531 obj = _PyUnicode_AsDefaultEncodedString(obj, NULL);
532 if (obj == NULL)
533 return -1;
535 else
536 #endif
538 PyErr_Format(PyExc_TypeError,
539 "expected string or Unicode object, "
540 "%.200s found", obj->ob_type->tp_name);
541 return -1;
545 *s = PyString_AS_STRING(obj);
546 if (len != NULL)
547 *len = PyString_GET_SIZE(obj);
548 else if ((int)strlen(*s) != PyString_GET_SIZE(obj)) {
549 PyErr_SetString(PyExc_TypeError,
550 "expected string without null bytes");
551 return -1;
553 return 0;
556 /* Methods */
558 static int
559 string_print(PyStringObject *op, FILE *fp, int flags)
561 int i;
562 char c;
563 int quote;
564 /* XXX Ought to check for interrupts when writing long strings */
565 if (flags & Py_PRINT_RAW) {
566 fwrite(op->ob_sval, 1, (int) op->ob_size, fp);
567 return 0;
570 /* figure out which quote to use; single is preferred */
571 quote = '\'';
572 if (strchr(op->ob_sval, '\'') && !strchr(op->ob_sval, '"'))
573 quote = '"';
575 fputc(quote, fp);
576 for (i = 0; i < op->ob_size; i++) {
577 c = op->ob_sval[i];
578 if (c == quote || c == '\\')
579 fprintf(fp, "\\%c", c);
580 else if (c == '\t')
581 fprintf(fp, "\\t");
582 else if (c == '\n')
583 fprintf(fp, "\\n");
584 else if (c == '\r')
585 fprintf(fp, "\\r");
586 else if (c < ' ' || c >= 0x7f)
587 fprintf(fp, "\\x%02x", c & 0xff);
588 else
589 fputc(c, fp);
591 fputc(quote, fp);
592 return 0;
595 static PyObject *
596 string_repr(register PyStringObject *op)
598 size_t newsize = 2 + 4 * op->ob_size * sizeof(char);
599 PyObject *v;
600 if (newsize > INT_MAX) {
601 PyErr_SetString(PyExc_OverflowError,
602 "string is too large to make repr");
604 v = PyString_FromStringAndSize((char *)NULL, newsize);
605 if (v == NULL) {
606 return NULL;
608 else {
609 register int i;
610 register char c;
611 register char *p;
612 int quote;
614 /* figure out which quote to use; single is preferred */
615 quote = '\'';
616 if (strchr(op->ob_sval, '\'') && !strchr(op->ob_sval, '"'))
617 quote = '"';
619 p = ((PyStringObject *)v)->ob_sval;
620 *p++ = quote;
621 for (i = 0; i < op->ob_size; i++) {
622 c = op->ob_sval[i];
623 if (c == quote || c == '\\')
624 *p++ = '\\', *p++ = c;
625 else if (c == '\t')
626 *p++ = '\\', *p++ = 't';
627 else if (c == '\n')
628 *p++ = '\\', *p++ = 'n';
629 else if (c == '\r')
630 *p++ = '\\', *p++ = 'r';
631 else if (c < ' ' || c >= 0x7f) {
632 sprintf(p, "\\x%02x", c & 0xff);
633 p += 4;
635 else
636 *p++ = c;
638 *p++ = quote;
639 *p = '\0';
640 _PyString_Resize(
641 &v, (int) (p - ((PyStringObject *)v)->ob_sval));
642 return v;
646 static PyObject *
647 string_str(PyObject *s)
649 Py_INCREF(s);
650 return s;
653 static int
654 string_length(PyStringObject *a)
656 return a->ob_size;
659 static PyObject *
660 string_concat(register PyStringObject *a, register PyObject *bb)
662 register unsigned int size;
663 register PyStringObject *op;
664 if (!PyString_Check(bb)) {
665 #ifdef Py_USING_UNICODE
666 if (PyUnicode_Check(bb))
667 return PyUnicode_Concat((PyObject *)a, bb);
668 #endif
669 PyErr_Format(PyExc_TypeError,
670 "cannot add type \"%.200s\" to string",
671 bb->ob_type->tp_name);
672 return NULL;
674 #define b ((PyStringObject *)bb)
675 /* Optimize cases with empty left or right operand */
676 if ((a->ob_size == 0 || b->ob_size == 0) &&
677 PyString_CheckExact(a) && PyString_CheckExact(b)) {
678 if (a->ob_size == 0) {
679 Py_INCREF(bb);
680 return bb;
682 Py_INCREF(a);
683 return (PyObject *)a;
685 size = a->ob_size + b->ob_size;
686 /* PyObject_NewVar is inlined */
687 op = (PyStringObject *)
688 PyObject_MALLOC(sizeof(PyStringObject) + size * sizeof(char));
689 if (op == NULL)
690 return PyErr_NoMemory();
691 PyObject_INIT_VAR(op, &PyString_Type, size);
692 #ifdef CACHE_HASH
693 op->ob_shash = -1;
694 #endif
695 #ifdef INTERN_STRINGS
696 op->ob_sinterned = NULL;
697 #endif
698 memcpy(op->ob_sval, a->ob_sval, (int) a->ob_size);
699 memcpy(op->ob_sval + a->ob_size, b->ob_sval, (int) b->ob_size);
700 op->ob_sval[size] = '\0';
701 return (PyObject *) op;
702 #undef b
705 static PyObject *
706 string_repeat(register PyStringObject *a, register int n)
708 register int i;
709 register int size;
710 register PyStringObject *op;
711 size_t nbytes;
712 if (n < 0)
713 n = 0;
714 /* watch out for overflows: the size can overflow int,
715 * and the # of bytes needed can overflow size_t
717 size = a->ob_size * n;
718 if (n && size / n != a->ob_size) {
719 PyErr_SetString(PyExc_OverflowError,
720 "repeated string is too long");
721 return NULL;
723 if (size == a->ob_size && PyString_CheckExact(a)) {
724 Py_INCREF(a);
725 return (PyObject *)a;
727 nbytes = size * sizeof(char);
728 if (nbytes / sizeof(char) != (size_t)size ||
729 nbytes + sizeof(PyStringObject) <= nbytes) {
730 PyErr_SetString(PyExc_OverflowError,
731 "repeated string is too long");
732 return NULL;
734 op = (PyStringObject *)
735 PyObject_MALLOC(sizeof(PyStringObject) + nbytes);
736 if (op == NULL)
737 return PyErr_NoMemory();
738 PyObject_INIT_VAR(op, &PyString_Type, size);
739 #ifdef CACHE_HASH
740 op->ob_shash = -1;
741 #endif
742 #ifdef INTERN_STRINGS
743 op->ob_sinterned = NULL;
744 #endif
745 for (i = 0; i < size; i += a->ob_size)
746 memcpy(op->ob_sval+i, a->ob_sval, (int) a->ob_size);
747 op->ob_sval[size] = '\0';
748 return (PyObject *) op;
751 /* String slice a[i:j] consists of characters a[i] ... a[j-1] */
753 static PyObject *
754 string_slice(register PyStringObject *a, register int i, register int j)
755 /* j -- may be negative! */
757 if (i < 0)
758 i = 0;
759 if (j < 0)
760 j = 0; /* Avoid signed/unsigned bug in next line */
761 if (j > a->ob_size)
762 j = a->ob_size;
763 if (i == 0 && j == a->ob_size && PyString_CheckExact(a)) {
764 /* It's the same as a */
765 Py_INCREF(a);
766 return (PyObject *)a;
768 if (j < i)
769 j = i;
770 return PyString_FromStringAndSize(a->ob_sval + i, (int) (j-i));
773 static int
774 string_contains(PyObject *a, PyObject *el)
776 register char *s, *end;
777 register char c;
778 #ifdef Py_USING_UNICODE
779 if (PyUnicode_Check(el))
780 return PyUnicode_Contains(a, el);
781 #endif
782 if (!PyString_Check(el) || PyString_Size(el) != 1) {
783 PyErr_SetString(PyExc_TypeError,
784 "'in <string>' requires character as left operand");
785 return -1;
787 c = PyString_AsString(el)[0];
788 s = PyString_AsString(a);
789 end = s + PyString_Size(a);
790 while (s < end) {
791 if (c == *s++)
792 return 1;
794 return 0;
797 static PyObject *
798 string_item(PyStringObject *a, register int i)
800 PyObject *v;
801 char *pchar;
802 if (i < 0 || i >= a->ob_size) {
803 PyErr_SetString(PyExc_IndexError, "string index out of range");
804 return NULL;
806 pchar = a->ob_sval + i;
807 v = (PyObject *)characters[*pchar & UCHAR_MAX];
808 if (v == NULL)
809 v = PyString_FromStringAndSize(pchar, 1);
810 else {
811 #ifdef COUNT_ALLOCS
812 one_strings++;
813 #endif
814 Py_INCREF(v);
816 return v;
819 static PyObject*
820 string_richcompare(PyStringObject *a, PyStringObject *b, int op)
822 int c;
823 int len_a, len_b;
824 int min_len;
825 PyObject *result;
827 /* May sure both arguments use string comparison.
828 This implies PyString_Check(a) && PyString_Check(b). */
829 if (a->ob_type->tp_richcompare != (richcmpfunc)string_richcompare ||
830 b->ob_type->tp_richcompare != (richcmpfunc)string_richcompare) {
831 result = Py_NotImplemented;
832 goto out;
834 if (a == b) {
835 switch (op) {
836 case Py_EQ:case Py_LE:case Py_GE:
837 result = Py_True;
838 goto out;
839 case Py_NE:case Py_LT:case Py_GT:
840 result = Py_False;
841 goto out;
844 if (op == Py_EQ) {
845 /* Supporting Py_NE here as well does not save
846 much time, since Py_NE is rarely used. */
847 if (a->ob_size == b->ob_size
848 && (a->ob_sval[0] == b->ob_sval[0]
849 && memcmp(a->ob_sval, b->ob_sval,
850 a->ob_size) == 0)) {
851 result = Py_True;
852 } else {
853 result = Py_False;
855 goto out;
857 len_a = a->ob_size; len_b = b->ob_size;
858 min_len = (len_a < len_b) ? len_a : len_b;
859 if (min_len > 0) {
860 c = Py_CHARMASK(*a->ob_sval) - Py_CHARMASK(*b->ob_sval);
861 if (c==0)
862 c = memcmp(a->ob_sval, b->ob_sval, min_len);
863 }else
864 c = 0;
865 if (c == 0)
866 c = (len_a < len_b) ? -1 : (len_a > len_b) ? 1 : 0;
867 switch (op) {
868 case Py_LT: c = c < 0; break;
869 case Py_LE: c = c <= 0; break;
870 case Py_EQ: assert(0); break; /* unreachable */
871 case Py_NE: c = c != 0; break;
872 case Py_GT: c = c > 0; break;
873 case Py_GE: c = c >= 0; break;
874 default:
875 result = Py_NotImplemented;
876 goto out;
878 result = c ? Py_True : Py_False;
879 out:
880 Py_INCREF(result);
881 return result;
885 _PyString_Eq(PyObject *o1, PyObject *o2)
887 PyStringObject *a, *b;
888 a = (PyStringObject*)o1;
889 b = (PyStringObject*)o2;
890 return a->ob_size == b->ob_size
891 && *a->ob_sval == *b->ob_sval
892 && memcmp(a->ob_sval, b->ob_sval, a->ob_size) == 0;
895 static long
896 string_hash(PyStringObject *a)
898 register int len;
899 register unsigned char *p;
900 register long x;
902 #ifdef CACHE_HASH
903 if (a->ob_shash != -1)
904 return a->ob_shash;
905 #ifdef INTERN_STRINGS
906 if (a->ob_sinterned != NULL)
907 return (a->ob_shash =
908 ((PyStringObject *)(a->ob_sinterned))->ob_shash);
909 #endif
910 #endif
911 len = a->ob_size;
912 p = (unsigned char *) a->ob_sval;
913 x = *p << 7;
914 while (--len >= 0)
915 x = (1000003*x) ^ *p++;
916 x ^= a->ob_size;
917 if (x == -1)
918 x = -2;
919 #ifdef CACHE_HASH
920 a->ob_shash = x;
921 #endif
922 return x;
925 static int
926 string_buffer_getreadbuf(PyStringObject *self, int index, const void **ptr)
928 if ( index != 0 ) {
929 PyErr_SetString(PyExc_SystemError,
930 "accessing non-existent string segment");
931 return -1;
933 *ptr = (void *)self->ob_sval;
934 return self->ob_size;
937 static int
938 string_buffer_getwritebuf(PyStringObject *self, int index, const void **ptr)
940 PyErr_SetString(PyExc_TypeError,
941 "Cannot use string as modifiable buffer");
942 return -1;
945 static int
946 string_buffer_getsegcount(PyStringObject *self, int *lenp)
948 if ( lenp )
949 *lenp = self->ob_size;
950 return 1;
953 static int
954 string_buffer_getcharbuf(PyStringObject *self, int index, const char **ptr)
956 if ( index != 0 ) {
957 PyErr_SetString(PyExc_SystemError,
958 "accessing non-existent string segment");
959 return -1;
961 *ptr = self->ob_sval;
962 return self->ob_size;
965 static PySequenceMethods string_as_sequence = {
966 (inquiry)string_length, /*sq_length*/
967 (binaryfunc)string_concat, /*sq_concat*/
968 (intargfunc)string_repeat, /*sq_repeat*/
969 (intargfunc)string_item, /*sq_item*/
970 (intintargfunc)string_slice, /*sq_slice*/
971 0, /*sq_ass_item*/
972 0, /*sq_ass_slice*/
973 (objobjproc)string_contains /*sq_contains*/
976 static PyBufferProcs string_as_buffer = {
977 (getreadbufferproc)string_buffer_getreadbuf,
978 (getwritebufferproc)string_buffer_getwritebuf,
979 (getsegcountproc)string_buffer_getsegcount,
980 (getcharbufferproc)string_buffer_getcharbuf,
985 #define LEFTSTRIP 0
986 #define RIGHTSTRIP 1
987 #define BOTHSTRIP 2
990 static PyObject *
991 split_whitespace(const char *s, int len, int maxsplit)
993 int i, j, err;
994 PyObject* item;
995 PyObject *list = PyList_New(0);
997 if (list == NULL)
998 return NULL;
1000 for (i = j = 0; i < len; ) {
1001 while (i < len && isspace(Py_CHARMASK(s[i])))
1002 i++;
1003 j = i;
1004 while (i < len && !isspace(Py_CHARMASK(s[i])))
1005 i++;
1006 if (j < i) {
1007 if (maxsplit-- <= 0)
1008 break;
1009 item = PyString_FromStringAndSize(s+j, (int)(i-j));
1010 if (item == NULL)
1011 goto finally;
1012 err = PyList_Append(list, item);
1013 Py_DECREF(item);
1014 if (err < 0)
1015 goto finally;
1016 while (i < len && isspace(Py_CHARMASK(s[i])))
1017 i++;
1018 j = i;
1021 if (j < len) {
1022 item = PyString_FromStringAndSize(s+j, (int)(len - j));
1023 if (item == NULL)
1024 goto finally;
1025 err = PyList_Append(list, item);
1026 Py_DECREF(item);
1027 if (err < 0)
1028 goto finally;
1030 return list;
1031 finally:
1032 Py_DECREF(list);
1033 return NULL;
1037 static char split__doc__[] =
1038 "S.split([sep [,maxsplit]]) -> list of strings\n\
1040 Return a list of the words in the string S, using sep as the\n\
1041 delimiter string. If maxsplit is given, at most maxsplit\n\
1042 splits are done. If sep is not specified, any whitespace string\n\
1043 is a separator.";
1045 static PyObject *
1046 string_split(PyStringObject *self, PyObject *args)
1048 int len = PyString_GET_SIZE(self), n, i, j, err;
1049 int maxsplit = -1;
1050 const char *s = PyString_AS_STRING(self), *sub;
1051 PyObject *list, *item, *subobj = Py_None;
1053 if (!PyArg_ParseTuple(args, "|Oi:split", &subobj, &maxsplit))
1054 return NULL;
1055 if (maxsplit < 0)
1056 maxsplit = INT_MAX;
1057 if (subobj == Py_None)
1058 return split_whitespace(s, len, maxsplit);
1059 if (PyString_Check(subobj)) {
1060 sub = PyString_AS_STRING(subobj);
1061 n = PyString_GET_SIZE(subobj);
1063 #ifdef Py_USING_UNICODE
1064 else if (PyUnicode_Check(subobj))
1065 return PyUnicode_Split((PyObject *)self, subobj, maxsplit);
1066 #endif
1067 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
1068 return NULL;
1069 if (n == 0) {
1070 PyErr_SetString(PyExc_ValueError, "empty separator");
1071 return NULL;
1074 list = PyList_New(0);
1075 if (list == NULL)
1076 return NULL;
1078 i = j = 0;
1079 while (i+n <= len) {
1080 if (s[i] == sub[0] && memcmp(s+i, sub, n) == 0) {
1081 if (maxsplit-- <= 0)
1082 break;
1083 item = PyString_FromStringAndSize(s+j, (int)(i-j));
1084 if (item == NULL)
1085 goto fail;
1086 err = PyList_Append(list, item);
1087 Py_DECREF(item);
1088 if (err < 0)
1089 goto fail;
1090 i = j = i + n;
1092 else
1093 i++;
1095 item = PyString_FromStringAndSize(s+j, (int)(len-j));
1096 if (item == NULL)
1097 goto fail;
1098 err = PyList_Append(list, item);
1099 Py_DECREF(item);
1100 if (err < 0)
1101 goto fail;
1103 return list;
1105 fail:
1106 Py_DECREF(list);
1107 return NULL;
1111 static char join__doc__[] =
1112 "S.join(sequence) -> string\n\
1114 Return a string which is the concatenation of the strings in the\n\
1115 sequence. The separator between elements is S.";
1117 static PyObject *
1118 string_join(PyStringObject *self, PyObject *orig)
1120 char *sep = PyString_AS_STRING(self);
1121 const int seplen = PyString_GET_SIZE(self);
1122 PyObject *res = NULL;
1123 char *p;
1124 int seqlen = 0;
1125 size_t sz = 0;
1126 int i;
1127 PyObject *seq, *item;
1129 seq = PySequence_Fast(orig, "");
1130 if (seq == NULL) {
1131 if (PyErr_ExceptionMatches(PyExc_TypeError))
1132 PyErr_Format(PyExc_TypeError,
1133 "sequence expected, %.80s found",
1134 orig->ob_type->tp_name);
1135 return NULL;
1138 seqlen = PySequence_Size(seq);
1139 if (seqlen == 0) {
1140 Py_DECREF(seq);
1141 return PyString_FromString("");
1143 if (seqlen == 1) {
1144 item = PySequence_Fast_GET_ITEM(seq, 0);
1145 if (!PyString_Check(item) && !PyUnicode_Check(item)) {
1146 PyErr_Format(PyExc_TypeError,
1147 "sequence item 0: expected string,"
1148 " %.80s found",
1149 item->ob_type->tp_name);
1150 Py_DECREF(seq);
1151 return NULL;
1153 Py_INCREF(item);
1154 Py_DECREF(seq);
1155 return item;
1158 /* There are at least two things to join. Do a pre-pass to figure out
1159 * the total amount of space we'll need (sz), see whether any argument
1160 * is absurd, and defer to the Unicode join if appropriate.
1162 for (i = 0; i < seqlen; i++) {
1163 const size_t old_sz = sz;
1164 item = PySequence_Fast_GET_ITEM(seq, i);
1165 if (!PyString_Check(item)){
1166 #ifdef Py_USING_UNICODE
1167 if (PyUnicode_Check(item)) {
1168 /* Defer to Unicode join.
1169 * CAUTION: There's no gurantee that the
1170 * original sequence can be iterated over
1171 * again, so we must pass seq here.
1173 PyObject *result;
1174 result = PyUnicode_Join((PyObject *)self, seq);
1175 Py_DECREF(seq);
1176 return result;
1178 #endif
1179 PyErr_Format(PyExc_TypeError,
1180 "sequence item %i: expected string,"
1181 " %.80s found",
1182 i, item->ob_type->tp_name);
1183 Py_DECREF(seq);
1184 return NULL;
1186 sz += PyString_GET_SIZE(item);
1187 if (i != 0)
1188 sz += seplen;
1189 if (sz < old_sz || sz > INT_MAX) {
1190 PyErr_SetString(PyExc_OverflowError,
1191 "join() is too long for a Python string");
1192 Py_DECREF(seq);
1193 return NULL;
1197 /* Allocate result space. */
1198 res = PyString_FromStringAndSize((char*)NULL, (int)sz);
1199 if (res == NULL) {
1200 Py_DECREF(seq);
1201 return NULL;
1204 /* Catenate everything. */
1205 p = PyString_AS_STRING(res);
1206 for (i = 0; i < seqlen; ++i) {
1207 size_t n;
1208 item = PySequence_Fast_GET_ITEM(seq, i);
1209 n = PyString_GET_SIZE(item);
1210 memcpy(p, PyString_AS_STRING(item), n);
1211 p += n;
1212 if (i < seqlen - 1) {
1213 memcpy(p, sep, seplen);
1214 p += seplen;
1218 Py_DECREF(seq);
1219 return res;
1222 PyObject *
1223 _PyString_Join(PyObject *sep, PyObject *x)
1225 assert(sep != NULL && PyString_Check(sep));
1226 assert(x != NULL);
1227 return string_join((PyStringObject *)sep, x);
1230 static long
1231 string_find_internal(PyStringObject *self, PyObject *args, int dir)
1233 const char *s = PyString_AS_STRING(self), *sub;
1234 int len = PyString_GET_SIZE(self);
1235 int n, i = 0, last = INT_MAX;
1236 PyObject *subobj;
1238 if (!PyArg_ParseTuple(args, "O|O&O&:find/rfind/index/rindex",
1239 &subobj, _PyEval_SliceIndex, &i, _PyEval_SliceIndex, &last))
1240 return -2;
1241 if (PyString_Check(subobj)) {
1242 sub = PyString_AS_STRING(subobj);
1243 n = PyString_GET_SIZE(subobj);
1245 #ifdef Py_USING_UNICODE
1246 else if (PyUnicode_Check(subobj))
1247 return PyUnicode_Find((PyObject *)self, subobj, i, last, 1);
1248 #endif
1249 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
1250 return -2;
1252 if (last > len)
1253 last = len;
1254 if (last < 0)
1255 last += len;
1256 if (last < 0)
1257 last = 0;
1258 if (i < 0)
1259 i += len;
1260 if (i < 0)
1261 i = 0;
1263 if (dir > 0) {
1264 if (n == 0 && i <= last)
1265 return (long)i;
1266 last -= n;
1267 for (; i <= last; ++i)
1268 if (s[i] == sub[0] && memcmp(&s[i], sub, n) == 0)
1269 return (long)i;
1271 else {
1272 int j;
1274 if (n == 0 && i <= last)
1275 return (long)last;
1276 for (j = last-n; j >= i; --j)
1277 if (s[j] == sub[0] && memcmp(&s[j], sub, n) == 0)
1278 return (long)j;
1281 return -1;
1285 static char find__doc__[] =
1286 "S.find(sub [,start [,end]]) -> int\n\
1288 Return the lowest index in S where substring sub is found,\n\
1289 such that sub is contained within s[start,end]. Optional\n\
1290 arguments start and end are interpreted as in slice notation.\n\
1292 Return -1 on failure.";
1294 static PyObject *
1295 string_find(PyStringObject *self, PyObject *args)
1297 long result = string_find_internal(self, args, +1);
1298 if (result == -2)
1299 return NULL;
1300 return PyInt_FromLong(result);
1304 static char index__doc__[] =
1305 "S.index(sub [,start [,end]]) -> int\n\
1307 Like S.find() but raise ValueError when the substring is not found.";
1309 static PyObject *
1310 string_index(PyStringObject *self, PyObject *args)
1312 long result = string_find_internal(self, args, +1);
1313 if (result == -2)
1314 return NULL;
1315 if (result == -1) {
1316 PyErr_SetString(PyExc_ValueError,
1317 "substring not found in string.index");
1318 return NULL;
1320 return PyInt_FromLong(result);
1324 static char rfind__doc__[] =
1325 "S.rfind(sub [,start [,end]]) -> int\n\
1327 Return the highest index in S where substring sub is found,\n\
1328 such that sub is contained within s[start,end]. Optional\n\
1329 arguments start and end are interpreted as in slice notation.\n\
1331 Return -1 on failure.";
1333 static PyObject *
1334 string_rfind(PyStringObject *self, PyObject *args)
1336 long result = string_find_internal(self, args, -1);
1337 if (result == -2)
1338 return NULL;
1339 return PyInt_FromLong(result);
1343 static char rindex__doc__[] =
1344 "S.rindex(sub [,start [,end]]) -> int\n\
1346 Like S.rfind() but raise ValueError when the substring is not found.";
1348 static PyObject *
1349 string_rindex(PyStringObject *self, PyObject *args)
1351 long result = string_find_internal(self, args, -1);
1352 if (result == -2)
1353 return NULL;
1354 if (result == -1) {
1355 PyErr_SetString(PyExc_ValueError,
1356 "substring not found in string.rindex");
1357 return NULL;
1359 return PyInt_FromLong(result);
1363 static PyObject *
1364 do_strip(PyStringObject *self, int striptype)
1366 char *s = PyString_AS_STRING(self);
1367 int len = PyString_GET_SIZE(self), i, j;
1369 i = 0;
1370 if (striptype != RIGHTSTRIP) {
1371 while (i < len && isspace(Py_CHARMASK(s[i]))) {
1372 i++;
1376 j = len;
1377 if (striptype != LEFTSTRIP) {
1378 do {
1379 j--;
1380 } while (j >= i && isspace(Py_CHARMASK(s[j])));
1381 j++;
1384 if (i == 0 && j == len && PyString_CheckExact(self)) {
1385 Py_INCREF(self);
1386 return (PyObject*)self;
1388 else
1389 return PyString_FromStringAndSize(s+i, j-i);
1393 static char strip__doc__[] =
1394 "S.strip() -> string\n\
1396 Return a copy of the string S with leading and trailing\n\
1397 whitespace removed.";
1399 static PyObject *
1400 string_strip(PyStringObject *self)
1402 return do_strip(self, BOTHSTRIP);
1406 static char lstrip__doc__[] =
1407 "S.lstrip() -> string\n\
1409 Return a copy of the string S with leading whitespace removed.";
1411 static PyObject *
1412 string_lstrip(PyStringObject *self)
1414 return do_strip(self, LEFTSTRIP);
1418 static char rstrip__doc__[] =
1419 "S.rstrip() -> string\n\
1421 Return a copy of the string S with trailing whitespace removed.";
1423 static PyObject *
1424 string_rstrip(PyStringObject *self)
1426 return do_strip(self, RIGHTSTRIP);
1430 static char lower__doc__[] =
1431 "S.lower() -> string\n\
1433 Return a copy of the string S converted to lowercase.";
1435 static PyObject *
1436 string_lower(PyStringObject *self)
1438 char *s = PyString_AS_STRING(self), *s_new;
1439 int i, n = PyString_GET_SIZE(self);
1440 PyObject *new;
1442 new = PyString_FromStringAndSize(NULL, n);
1443 if (new == NULL)
1444 return NULL;
1445 s_new = PyString_AsString(new);
1446 for (i = 0; i < n; i++) {
1447 int c = Py_CHARMASK(*s++);
1448 if (isupper(c)) {
1449 *s_new = tolower(c);
1450 } else
1451 *s_new = c;
1452 s_new++;
1454 return new;
1458 static char upper__doc__[] =
1459 "S.upper() -> string\n\
1461 Return a copy of the string S converted to uppercase.";
1463 static PyObject *
1464 string_upper(PyStringObject *self)
1466 char *s = PyString_AS_STRING(self), *s_new;
1467 int i, n = PyString_GET_SIZE(self);
1468 PyObject *new;
1470 new = PyString_FromStringAndSize(NULL, n);
1471 if (new == NULL)
1472 return NULL;
1473 s_new = PyString_AsString(new);
1474 for (i = 0; i < n; i++) {
1475 int c = Py_CHARMASK(*s++);
1476 if (islower(c)) {
1477 *s_new = toupper(c);
1478 } else
1479 *s_new = c;
1480 s_new++;
1482 return new;
1486 static char title__doc__[] =
1487 "S.title() -> string\n\
1489 Return a titlecased version of S, i.e. words start with uppercase\n\
1490 characters, all remaining cased characters have lowercase.";
1492 static PyObject*
1493 string_title(PyStringObject *self)
1495 char *s = PyString_AS_STRING(self), *s_new;
1496 int i, n = PyString_GET_SIZE(self);
1497 int previous_is_cased = 0;
1498 PyObject *new;
1500 new = PyString_FromStringAndSize(NULL, n);
1501 if (new == NULL)
1502 return NULL;
1503 s_new = PyString_AsString(new);
1504 for (i = 0; i < n; i++) {
1505 int c = Py_CHARMASK(*s++);
1506 if (islower(c)) {
1507 if (!previous_is_cased)
1508 c = toupper(c);
1509 previous_is_cased = 1;
1510 } else if (isupper(c)) {
1511 if (previous_is_cased)
1512 c = tolower(c);
1513 previous_is_cased = 1;
1514 } else
1515 previous_is_cased = 0;
1516 *s_new++ = c;
1518 return new;
1521 static char capitalize__doc__[] =
1522 "S.capitalize() -> string\n\
1524 Return a copy of the string S with only its first character\n\
1525 capitalized.";
1527 static PyObject *
1528 string_capitalize(PyStringObject *self)
1530 char *s = PyString_AS_STRING(self), *s_new;
1531 int i, n = PyString_GET_SIZE(self);
1532 PyObject *new;
1534 new = PyString_FromStringAndSize(NULL, n);
1535 if (new == NULL)
1536 return NULL;
1537 s_new = PyString_AsString(new);
1538 if (0 < n) {
1539 int c = Py_CHARMASK(*s++);
1540 if (islower(c))
1541 *s_new = toupper(c);
1542 else
1543 *s_new = c;
1544 s_new++;
1546 for (i = 1; i < n; i++) {
1547 int c = Py_CHARMASK(*s++);
1548 if (isupper(c))
1549 *s_new = tolower(c);
1550 else
1551 *s_new = c;
1552 s_new++;
1554 return new;
1558 static char count__doc__[] =
1559 "S.count(sub[, start[, end]]) -> int\n\
1561 Return the number of occurrences of substring sub in string\n\
1562 S[start:end]. Optional arguments start and end are\n\
1563 interpreted as in slice notation.";
1565 static PyObject *
1566 string_count(PyStringObject *self, PyObject *args)
1568 const char *s = PyString_AS_STRING(self), *sub;
1569 int len = PyString_GET_SIZE(self), n;
1570 int i = 0, last = INT_MAX;
1571 int m, r;
1572 PyObject *subobj;
1574 if (!PyArg_ParseTuple(args, "O|O&O&:count", &subobj,
1575 _PyEval_SliceIndex, &i, _PyEval_SliceIndex, &last))
1576 return NULL;
1578 if (PyString_Check(subobj)) {
1579 sub = PyString_AS_STRING(subobj);
1580 n = PyString_GET_SIZE(subobj);
1582 #ifdef Py_USING_UNICODE
1583 else if (PyUnicode_Check(subobj)) {
1584 int count;
1585 count = PyUnicode_Count((PyObject *)self, subobj, i, last);
1586 if (count == -1)
1587 return NULL;
1588 else
1589 return PyInt_FromLong((long) count);
1591 #endif
1592 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
1593 return NULL;
1595 if (last > len)
1596 last = len;
1597 if (last < 0)
1598 last += len;
1599 if (last < 0)
1600 last = 0;
1601 if (i < 0)
1602 i += len;
1603 if (i < 0)
1604 i = 0;
1605 m = last + 1 - n;
1606 if (n == 0)
1607 return PyInt_FromLong((long) (m-i));
1609 r = 0;
1610 while (i < m) {
1611 if (!memcmp(s+i, sub, n)) {
1612 r++;
1613 i += n;
1614 } else {
1615 i++;
1618 return PyInt_FromLong((long) r);
1622 static char swapcase__doc__[] =
1623 "S.swapcase() -> string\n\
1625 Return a copy of the string S with uppercase characters\n\
1626 converted to lowercase and vice versa.";
1628 static PyObject *
1629 string_swapcase(PyStringObject *self)
1631 char *s = PyString_AS_STRING(self), *s_new;
1632 int i, n = PyString_GET_SIZE(self);
1633 PyObject *new;
1635 new = PyString_FromStringAndSize(NULL, n);
1636 if (new == NULL)
1637 return NULL;
1638 s_new = PyString_AsString(new);
1639 for (i = 0; i < n; i++) {
1640 int c = Py_CHARMASK(*s++);
1641 if (islower(c)) {
1642 *s_new = toupper(c);
1644 else if (isupper(c)) {
1645 *s_new = tolower(c);
1647 else
1648 *s_new = c;
1649 s_new++;
1651 return new;
1655 static char translate__doc__[] =
1656 "S.translate(table [,deletechars]) -> string\n\
1658 Return a copy of the string S, where all characters occurring\n\
1659 in the optional argument deletechars are removed, and the\n\
1660 remaining characters have been mapped through the given\n\
1661 translation table, which must be a string of length 256.";
1663 static PyObject *
1664 string_translate(PyStringObject *self, PyObject *args)
1666 register char *input, *output;
1667 register const char *table;
1668 register int i, c, changed = 0;
1669 PyObject *input_obj = (PyObject*)self;
1670 const char *table1, *output_start, *del_table=NULL;
1671 int inlen, tablen, dellen = 0;
1672 PyObject *result;
1673 int trans_table[256];
1674 PyObject *tableobj, *delobj = NULL;
1676 if (!PyArg_ParseTuple(args, "O|O:translate",
1677 &tableobj, &delobj))
1678 return NULL;
1680 if (PyString_Check(tableobj)) {
1681 table1 = PyString_AS_STRING(tableobj);
1682 tablen = PyString_GET_SIZE(tableobj);
1684 #ifdef Py_USING_UNICODE
1685 else if (PyUnicode_Check(tableobj)) {
1686 /* Unicode .translate() does not support the deletechars
1687 parameter; instead a mapping to None will cause characters
1688 to be deleted. */
1689 if (delobj != NULL) {
1690 PyErr_SetString(PyExc_TypeError,
1691 "deletions are implemented differently for unicode");
1692 return NULL;
1694 return PyUnicode_Translate((PyObject *)self, tableobj, NULL);
1696 #endif
1697 else if (PyObject_AsCharBuffer(tableobj, &table1, &tablen))
1698 return NULL;
1700 if (delobj != NULL) {
1701 if (PyString_Check(delobj)) {
1702 del_table = PyString_AS_STRING(delobj);
1703 dellen = PyString_GET_SIZE(delobj);
1705 #ifdef Py_USING_UNICODE
1706 else if (PyUnicode_Check(delobj)) {
1707 PyErr_SetString(PyExc_TypeError,
1708 "deletions are implemented differently for unicode");
1709 return NULL;
1711 #endif
1712 else if (PyObject_AsCharBuffer(delobj, &del_table, &dellen))
1713 return NULL;
1715 if (tablen != 256) {
1716 PyErr_SetString(PyExc_ValueError,
1717 "translation table must be 256 characters long");
1718 return NULL;
1721 else {
1722 del_table = NULL;
1723 dellen = 0;
1726 table = table1;
1727 inlen = PyString_Size(input_obj);
1728 result = PyString_FromStringAndSize((char *)NULL, inlen);
1729 if (result == NULL)
1730 return NULL;
1731 output_start = output = PyString_AsString(result);
1732 input = PyString_AsString(input_obj);
1734 if (dellen == 0) {
1735 /* If no deletions are required, use faster code */
1736 for (i = inlen; --i >= 0; ) {
1737 c = Py_CHARMASK(*input++);
1738 if (Py_CHARMASK((*output++ = table[c])) != c)
1739 changed = 1;
1741 if (changed || !PyString_CheckExact(input_obj))
1742 return result;
1743 Py_DECREF(result);
1744 Py_INCREF(input_obj);
1745 return input_obj;
1748 for (i = 0; i < 256; i++)
1749 trans_table[i] = Py_CHARMASK(table[i]);
1751 for (i = 0; i < dellen; i++)
1752 trans_table[(int) Py_CHARMASK(del_table[i])] = -1;
1754 for (i = inlen; --i >= 0; ) {
1755 c = Py_CHARMASK(*input++);
1756 if (trans_table[c] != -1)
1757 if (Py_CHARMASK(*output++ = (char)trans_table[c]) == c)
1758 continue;
1759 changed = 1;
1761 if (!changed && PyString_CheckExact(input_obj)) {
1762 Py_DECREF(result);
1763 Py_INCREF(input_obj);
1764 return input_obj;
1766 /* Fix the size of the resulting string */
1767 if (inlen > 0 &&_PyString_Resize(&result, output-output_start))
1768 return NULL;
1769 return result;
1773 /* What follows is used for implementing replace(). Perry Stoll. */
1776 mymemfind
1778 strstr replacement for arbitrary blocks of memory.
1780 Locates the first occurrence in the memory pointed to by MEM of the
1781 contents of memory pointed to by PAT. Returns the index into MEM if
1782 found, or -1 if not found. If len of PAT is greater than length of
1783 MEM, the function returns -1.
1785 static int
1786 mymemfind(const char *mem, int len, const char *pat, int pat_len)
1788 register int ii;
1790 /* pattern can not occur in the last pat_len-1 chars */
1791 len -= pat_len;
1793 for (ii = 0; ii <= len; ii++) {
1794 if (mem[ii] == pat[0] && memcmp(&mem[ii], pat, pat_len) == 0) {
1795 return ii;
1798 return -1;
1802 mymemcnt
1804 Return the number of distinct times PAT is found in MEM.
1805 meaning mem=1111 and pat==11 returns 2.
1806 mem=11111 and pat==11 also return 2.
1808 static int
1809 mymemcnt(const char *mem, int len, const char *pat, int pat_len)
1811 register int offset = 0;
1812 int nfound = 0;
1814 while (len >= 0) {
1815 offset = mymemfind(mem, len, pat, pat_len);
1816 if (offset == -1)
1817 break;
1818 mem += offset + pat_len;
1819 len -= offset + pat_len;
1820 nfound++;
1822 return nfound;
1826 mymemreplace
1828 Return a string in which all occurrences of PAT in memory STR are
1829 replaced with SUB.
1831 If length of PAT is less than length of STR or there are no occurrences
1832 of PAT in STR, then the original string is returned. Otherwise, a new
1833 string is allocated here and returned.
1835 on return, out_len is:
1836 the length of output string, or
1837 -1 if the input string is returned, or
1838 unchanged if an error occurs (no memory).
1840 return value is:
1841 the new string allocated locally, or
1842 NULL if an error occurred.
1844 static char *
1845 mymemreplace(const char *str, int len, /* input string */
1846 const char *pat, int pat_len, /* pattern string to find */
1847 const char *sub, int sub_len, /* substitution string */
1848 int count, /* number of replacements */
1849 int *out_len)
1851 char *out_s;
1852 char *new_s;
1853 int nfound, offset, new_len;
1855 if (len == 0 || pat_len > len)
1856 goto return_same;
1858 /* find length of output string */
1859 nfound = mymemcnt(str, len, pat, pat_len);
1860 if (count < 0)
1861 count = INT_MAX;
1862 else if (nfound > count)
1863 nfound = count;
1864 if (nfound == 0)
1865 goto return_same;
1867 new_len = len + nfound*(sub_len - pat_len);
1868 if (new_len == 0) {
1869 /* Have to allocate something for the caller to free(). */
1870 out_s = (char *)PyMem_MALLOC(1);
1871 if (out_s == NULL)
1872 return NULL;
1873 out_s[0] = '\0';
1875 else {
1876 assert(new_len > 0);
1877 new_s = (char *)PyMem_MALLOC(new_len);
1878 if (new_s == NULL)
1879 return NULL;
1880 out_s = new_s;
1882 for (; count > 0 && len > 0; --count) {
1883 /* find index of next instance of pattern */
1884 offset = mymemfind(str, len, pat, pat_len);
1885 if (offset == -1)
1886 break;
1888 /* copy non matching part of input string */
1889 memcpy(new_s, str, offset);
1890 str += offset + pat_len;
1891 len -= offset + pat_len;
1893 /* copy substitute into the output string */
1894 new_s += offset;
1895 memcpy(new_s, sub, sub_len);
1896 new_s += sub_len;
1898 /* copy any remaining values into output string */
1899 if (len > 0)
1900 memcpy(new_s, str, len);
1902 *out_len = new_len;
1903 return out_s;
1905 return_same:
1906 *out_len = -1;
1907 return (char *)str; /* cast away const */
1911 static char replace__doc__[] =
1912 "S.replace (old, new[, maxsplit]) -> string\n\
1914 Return a copy of string S with all occurrences of substring\n\
1915 old replaced by new. If the optional argument maxsplit is\n\
1916 given, only the first maxsplit occurrences are replaced.";
1918 static PyObject *
1919 string_replace(PyStringObject *self, PyObject *args)
1921 const char *str = PyString_AS_STRING(self), *sub, *repl;
1922 char *new_s;
1923 const int len = PyString_GET_SIZE(self);
1924 int sub_len, repl_len, out_len;
1925 int count = -1;
1926 PyObject *new;
1927 PyObject *subobj, *replobj;
1929 if (!PyArg_ParseTuple(args, "OO|i:replace",
1930 &subobj, &replobj, &count))
1931 return NULL;
1933 if (PyString_Check(subobj)) {
1934 sub = PyString_AS_STRING(subobj);
1935 sub_len = PyString_GET_SIZE(subobj);
1937 #ifdef Py_USING_UNICODE
1938 else if (PyUnicode_Check(subobj))
1939 return PyUnicode_Replace((PyObject *)self,
1940 subobj, replobj, count);
1941 #endif
1942 else if (PyObject_AsCharBuffer(subobj, &sub, &sub_len))
1943 return NULL;
1945 if (PyString_Check(replobj)) {
1946 repl = PyString_AS_STRING(replobj);
1947 repl_len = PyString_GET_SIZE(replobj);
1949 #ifdef Py_USING_UNICODE
1950 else if (PyUnicode_Check(replobj))
1951 return PyUnicode_Replace((PyObject *)self,
1952 subobj, replobj, count);
1953 #endif
1954 else if (PyObject_AsCharBuffer(replobj, &repl, &repl_len))
1955 return NULL;
1957 if (sub_len <= 0) {
1958 PyErr_SetString(PyExc_ValueError, "empty pattern string");
1959 return NULL;
1961 new_s = mymemreplace(str,len,sub,sub_len,repl,repl_len,count,&out_len);
1962 if (new_s == NULL) {
1963 PyErr_NoMemory();
1964 return NULL;
1966 if (out_len == -1) {
1967 if (PyString_CheckExact(self)) {
1968 /* we're returning another reference to self */
1969 new = (PyObject*)self;
1970 Py_INCREF(new);
1972 else {
1973 new = PyString_FromStringAndSize(str, len);
1974 if (new == NULL)
1975 return NULL;
1978 else {
1979 new = PyString_FromStringAndSize(new_s, out_len);
1980 PyMem_FREE(new_s);
1982 return new;
1986 static char startswith__doc__[] =
1987 "S.startswith(prefix[, start[, end]]) -> int\n\
1989 Return 1 if S starts with the specified prefix, otherwise return 0. With\n\
1990 optional start, test S beginning at that position. With optional end, stop\n\
1991 comparing S at that position.";
1993 static PyObject *
1994 string_startswith(PyStringObject *self, PyObject *args)
1996 const char* str = PyString_AS_STRING(self);
1997 int len = PyString_GET_SIZE(self);
1998 const char* prefix;
1999 int plen;
2000 int start = 0;
2001 int end = -1;
2002 PyObject *subobj;
2004 if (!PyArg_ParseTuple(args, "O|O&O&:startswith", &subobj,
2005 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
2006 return NULL;
2007 if (PyString_Check(subobj)) {
2008 prefix = PyString_AS_STRING(subobj);
2009 plen = PyString_GET_SIZE(subobj);
2011 #ifdef Py_USING_UNICODE
2012 else if (PyUnicode_Check(subobj)) {
2013 int rc;
2014 rc = PyUnicode_Tailmatch((PyObject *)self,
2015 subobj, start, end, -1);
2016 if (rc == -1)
2017 return NULL;
2018 else
2019 return PyInt_FromLong((long) rc);
2021 #endif
2022 else if (PyObject_AsCharBuffer(subobj, &prefix, &plen))
2023 return NULL;
2025 /* adopt Java semantics for index out of range. it is legal for
2026 * offset to be == plen, but this only returns true if prefix is
2027 * the empty string.
2029 if (start < 0 || start+plen > len)
2030 return PyInt_FromLong(0);
2032 if (!memcmp(str+start, prefix, plen)) {
2033 /* did the match end after the specified end? */
2034 if (end < 0)
2035 return PyInt_FromLong(1);
2036 else if (end - start < plen)
2037 return PyInt_FromLong(0);
2038 else
2039 return PyInt_FromLong(1);
2041 else return PyInt_FromLong(0);
2045 static char endswith__doc__[] =
2046 "S.endswith(suffix[, start[, end]]) -> int\n\
2048 Return 1 if S ends with the specified suffix, otherwise return 0. With\n\
2049 optional start, test S beginning at that position. With optional end, stop\n\
2050 comparing S at that position.";
2052 static PyObject *
2053 string_endswith(PyStringObject *self, PyObject *args)
2055 const char* str = PyString_AS_STRING(self);
2056 int len = PyString_GET_SIZE(self);
2057 const char* suffix;
2058 int slen;
2059 int start = 0;
2060 int end = -1;
2061 int lower, upper;
2062 PyObject *subobj;
2064 if (!PyArg_ParseTuple(args, "O|O&O&:endswith", &subobj,
2065 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
2066 return NULL;
2067 if (PyString_Check(subobj)) {
2068 suffix = PyString_AS_STRING(subobj);
2069 slen = PyString_GET_SIZE(subobj);
2071 #ifdef Py_USING_UNICODE
2072 else if (PyUnicode_Check(subobj)) {
2073 int rc;
2074 rc = PyUnicode_Tailmatch((PyObject *)self,
2075 subobj, start, end, +1);
2076 if (rc == -1)
2077 return NULL;
2078 else
2079 return PyInt_FromLong((long) rc);
2081 #endif
2082 else if (PyObject_AsCharBuffer(subobj, &suffix, &slen))
2083 return NULL;
2085 if (start < 0 || start > len || slen > len)
2086 return PyInt_FromLong(0);
2088 upper = (end >= 0 && end <= len) ? end : len;
2089 lower = (upper - slen) > start ? (upper - slen) : start;
2091 if (upper-lower >= slen && !memcmp(str+lower, suffix, slen))
2092 return PyInt_FromLong(1);
2093 else return PyInt_FromLong(0);
2097 static char encode__doc__[] =
2098 "S.encode([encoding[,errors]]) -> object\n\
2100 Encodes S using the codec registered for encoding. encoding defaults\n\
2101 to the default encoding. errors may be given to set a different error\n\
2102 handling scheme. Default is 'strict' meaning that encoding errors raise\n\
2103 a ValueError. Other possible values are 'ignore' and 'replace'.";
2105 static PyObject *
2106 string_encode(PyStringObject *self, PyObject *args)
2108 char *encoding = NULL;
2109 char *errors = NULL;
2110 if (!PyArg_ParseTuple(args, "|ss:encode", &encoding, &errors))
2111 return NULL;
2112 return PyString_AsEncodedObject((PyObject *)self, encoding, errors);
2116 static char decode__doc__[] =
2117 "S.decode([encoding[,errors]]) -> object\n\
2119 Decodes S using the codec registered for encoding. encoding defaults\n\
2120 to the default encoding. errors may be given to set a different error\n\
2121 handling scheme. Default is 'strict' meaning that encoding errors raise\n\
2122 a ValueError. Other possible values are 'ignore' and 'replace'.";
2124 static PyObject *
2125 string_decode(PyStringObject *self, PyObject *args)
2127 char *encoding = NULL;
2128 char *errors = NULL;
2129 if (!PyArg_ParseTuple(args, "|ss:decode", &encoding, &errors))
2130 return NULL;
2131 return PyString_AsDecodedObject((PyObject *)self, encoding, errors);
2135 static char expandtabs__doc__[] =
2136 "S.expandtabs([tabsize]) -> string\n\
2138 Return a copy of S where all tab characters are expanded using spaces.\n\
2139 If tabsize is not given, a tab size of 8 characters is assumed.";
2141 static PyObject*
2142 string_expandtabs(PyStringObject *self, PyObject *args)
2144 const char *e, *p;
2145 char *q;
2146 int i, j;
2147 PyObject *u;
2148 int tabsize = 8;
2150 if (!PyArg_ParseTuple(args, "|i:expandtabs", &tabsize))
2151 return NULL;
2153 /* First pass: determine size of output string */
2154 i = j = 0;
2155 e = PyString_AS_STRING(self) + PyString_GET_SIZE(self);
2156 for (p = PyString_AS_STRING(self); p < e; p++)
2157 if (*p == '\t') {
2158 if (tabsize > 0)
2159 j += tabsize - (j % tabsize);
2161 else {
2162 j++;
2163 if (*p == '\n' || *p == '\r') {
2164 i += j;
2165 j = 0;
2169 /* Second pass: create output string and fill it */
2170 u = PyString_FromStringAndSize(NULL, i + j);
2171 if (!u)
2172 return NULL;
2174 j = 0;
2175 q = PyString_AS_STRING(u);
2177 for (p = PyString_AS_STRING(self); p < e; p++)
2178 if (*p == '\t') {
2179 if (tabsize > 0) {
2180 i = tabsize - (j % tabsize);
2181 j += i;
2182 while (i--)
2183 *q++ = ' ';
2186 else {
2187 j++;
2188 *q++ = *p;
2189 if (*p == '\n' || *p == '\r')
2190 j = 0;
2193 return u;
2196 static PyObject *
2197 pad(PyStringObject *self, int left, int right, char fill)
2199 PyObject *u;
2201 if (left < 0)
2202 left = 0;
2203 if (right < 0)
2204 right = 0;
2206 if (left == 0 && right == 0 && PyString_CheckExact(self)) {
2207 Py_INCREF(self);
2208 return (PyObject *)self;
2211 u = PyString_FromStringAndSize(NULL,
2212 left + PyString_GET_SIZE(self) + right);
2213 if (u) {
2214 if (left)
2215 memset(PyString_AS_STRING(u), fill, left);
2216 memcpy(PyString_AS_STRING(u) + left,
2217 PyString_AS_STRING(self),
2218 PyString_GET_SIZE(self));
2219 if (right)
2220 memset(PyString_AS_STRING(u) + left + PyString_GET_SIZE(self),
2221 fill, right);
2224 return u;
2227 static char ljust__doc__[] =
2228 "S.ljust(width) -> string\n"
2229 "\n"
2230 "Return S left justified in a string of length width. Padding is\n"
2231 "done using spaces.";
2233 static PyObject *
2234 string_ljust(PyStringObject *self, PyObject *args)
2236 int width;
2237 if (!PyArg_ParseTuple(args, "i:ljust", &width))
2238 return NULL;
2240 if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) {
2241 Py_INCREF(self);
2242 return (PyObject*) self;
2245 return pad(self, 0, width - PyString_GET_SIZE(self), ' ');
2249 static char rjust__doc__[] =
2250 "S.rjust(width) -> string\n"
2251 "\n"
2252 "Return S right justified in a string of length width. Padding is\n"
2253 "done using spaces.";
2255 static PyObject *
2256 string_rjust(PyStringObject *self, PyObject *args)
2258 int width;
2259 if (!PyArg_ParseTuple(args, "i:rjust", &width))
2260 return NULL;
2262 if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) {
2263 Py_INCREF(self);
2264 return (PyObject*) self;
2267 return pad(self, width - PyString_GET_SIZE(self), 0, ' ');
2271 static char center__doc__[] =
2272 "S.center(width) -> string\n"
2273 "\n"
2274 "Return S centered in a string of length width. Padding is done\n"
2275 "using spaces.";
2277 static PyObject *
2278 string_center(PyStringObject *self, PyObject *args)
2280 int marg, left;
2281 int width;
2283 if (!PyArg_ParseTuple(args, "i:center", &width))
2284 return NULL;
2286 if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) {
2287 Py_INCREF(self);
2288 return (PyObject*) self;
2291 marg = width - PyString_GET_SIZE(self);
2292 left = marg / 2 + (marg & width & 1);
2294 return pad(self, left, marg - left, ' ');
2297 static char isspace__doc__[] =
2298 "S.isspace() -> int\n"
2299 "\n"
2300 "Return 1 if there are only whitespace characters in S,\n"
2301 "0 otherwise.";
2303 static PyObject*
2304 string_isspace(PyStringObject *self)
2306 register const unsigned char *p
2307 = (unsigned char *) PyString_AS_STRING(self);
2308 register const unsigned char *e;
2310 /* Shortcut for single character strings */
2311 if (PyString_GET_SIZE(self) == 1 &&
2312 isspace(*p))
2313 return PyInt_FromLong(1);
2315 /* Special case for empty strings */
2316 if (PyString_GET_SIZE(self) == 0)
2317 return PyInt_FromLong(0);
2319 e = p + PyString_GET_SIZE(self);
2320 for (; p < e; p++) {
2321 if (!isspace(*p))
2322 return PyInt_FromLong(0);
2324 return PyInt_FromLong(1);
2328 static char isalpha__doc__[] =
2329 "S.isalpha() -> int\n\
2331 Return 1 if all characters in S are alphabetic\n\
2332 and there is at least one character in S, 0 otherwise.";
2334 static PyObject*
2335 string_isalpha(PyStringObject *self)
2337 register const unsigned char *p
2338 = (unsigned char *) PyString_AS_STRING(self);
2339 register const unsigned char *e;
2341 /* Shortcut for single character strings */
2342 if (PyString_GET_SIZE(self) == 1 &&
2343 isalpha(*p))
2344 return PyInt_FromLong(1);
2346 /* Special case for empty strings */
2347 if (PyString_GET_SIZE(self) == 0)
2348 return PyInt_FromLong(0);
2350 e = p + PyString_GET_SIZE(self);
2351 for (; p < e; p++) {
2352 if (!isalpha(*p))
2353 return PyInt_FromLong(0);
2355 return PyInt_FromLong(1);
2359 static char isalnum__doc__[] =
2360 "S.isalnum() -> int\n\
2362 Return 1 if all characters in S are alphanumeric\n\
2363 and there is at least one character in S, 0 otherwise.";
2365 static PyObject*
2366 string_isalnum(PyStringObject *self)
2368 register const unsigned char *p
2369 = (unsigned char *) PyString_AS_STRING(self);
2370 register const unsigned char *e;
2372 /* Shortcut for single character strings */
2373 if (PyString_GET_SIZE(self) == 1 &&
2374 isalnum(*p))
2375 return PyInt_FromLong(1);
2377 /* Special case for empty strings */
2378 if (PyString_GET_SIZE(self) == 0)
2379 return PyInt_FromLong(0);
2381 e = p + PyString_GET_SIZE(self);
2382 for (; p < e; p++) {
2383 if (!isalnum(*p))
2384 return PyInt_FromLong(0);
2386 return PyInt_FromLong(1);
2390 static char isdigit__doc__[] =
2391 "S.isdigit() -> int\n\
2393 Return 1 if there are only digit characters in S,\n\
2394 0 otherwise.";
2396 static PyObject*
2397 string_isdigit(PyStringObject *self)
2399 register const unsigned char *p
2400 = (unsigned char *) PyString_AS_STRING(self);
2401 register const unsigned char *e;
2403 /* Shortcut for single character strings */
2404 if (PyString_GET_SIZE(self) == 1 &&
2405 isdigit(*p))
2406 return PyInt_FromLong(1);
2408 /* Special case for empty strings */
2409 if (PyString_GET_SIZE(self) == 0)
2410 return PyInt_FromLong(0);
2412 e = p + PyString_GET_SIZE(self);
2413 for (; p < e; p++) {
2414 if (!isdigit(*p))
2415 return PyInt_FromLong(0);
2417 return PyInt_FromLong(1);
2421 static char islower__doc__[] =
2422 "S.islower() -> int\n\
2424 Return 1 if all cased characters in S are lowercase and there is\n\
2425 at least one cased character in S, 0 otherwise.";
2427 static PyObject*
2428 string_islower(PyStringObject *self)
2430 register const unsigned char *p
2431 = (unsigned char *) PyString_AS_STRING(self);
2432 register const unsigned char *e;
2433 int cased;
2435 /* Shortcut for single character strings */
2436 if (PyString_GET_SIZE(self) == 1)
2437 return PyInt_FromLong(islower(*p) != 0);
2439 /* Special case for empty strings */
2440 if (PyString_GET_SIZE(self) == 0)
2441 return PyInt_FromLong(0);
2443 e = p + PyString_GET_SIZE(self);
2444 cased = 0;
2445 for (; p < e; p++) {
2446 if (isupper(*p))
2447 return PyInt_FromLong(0);
2448 else if (!cased && islower(*p))
2449 cased = 1;
2451 return PyInt_FromLong(cased);
2455 static char isupper__doc__[] =
2456 "S.isupper() -> int\n\
2458 Return 1 if all cased characters in S are uppercase and there is\n\
2459 at least one cased character in S, 0 otherwise.";
2461 static PyObject*
2462 string_isupper(PyStringObject *self)
2464 register const unsigned char *p
2465 = (unsigned char *) PyString_AS_STRING(self);
2466 register const unsigned char *e;
2467 int cased;
2469 /* Shortcut for single character strings */
2470 if (PyString_GET_SIZE(self) == 1)
2471 return PyInt_FromLong(isupper(*p) != 0);
2473 /* Special case for empty strings */
2474 if (PyString_GET_SIZE(self) == 0)
2475 return PyInt_FromLong(0);
2477 e = p + PyString_GET_SIZE(self);
2478 cased = 0;
2479 for (; p < e; p++) {
2480 if (islower(*p))
2481 return PyInt_FromLong(0);
2482 else if (!cased && isupper(*p))
2483 cased = 1;
2485 return PyInt_FromLong(cased);
2489 static char istitle__doc__[] =
2490 "S.istitle() -> int\n\
2492 Return 1 if S is a titlecased string, i.e. uppercase characters\n\
2493 may only follow uncased characters and lowercase characters only cased\n\
2494 ones. Return 0 otherwise.";
2496 static PyObject*
2497 string_istitle(PyStringObject *self, PyObject *uncased)
2499 register const unsigned char *p
2500 = (unsigned char *) PyString_AS_STRING(self);
2501 register const unsigned char *e;
2502 int cased, previous_is_cased;
2504 /* Shortcut for single character strings */
2505 if (PyString_GET_SIZE(self) == 1)
2506 return PyInt_FromLong(isupper(*p) != 0);
2508 /* Special case for empty strings */
2509 if (PyString_GET_SIZE(self) == 0)
2510 return PyInt_FromLong(0);
2512 e = p + PyString_GET_SIZE(self);
2513 cased = 0;
2514 previous_is_cased = 0;
2515 for (; p < e; p++) {
2516 register const unsigned char ch = *p;
2518 if (isupper(ch)) {
2519 if (previous_is_cased)
2520 return PyInt_FromLong(0);
2521 previous_is_cased = 1;
2522 cased = 1;
2524 else if (islower(ch)) {
2525 if (!previous_is_cased)
2526 return PyInt_FromLong(0);
2527 previous_is_cased = 1;
2528 cased = 1;
2530 else
2531 previous_is_cased = 0;
2533 return PyInt_FromLong(cased);
2537 static char splitlines__doc__[] =
2538 "S.splitlines([keepends]]) -> list of strings\n\
2540 Return a list of the lines in S, breaking at line boundaries.\n\
2541 Line breaks are not included in the resulting list unless keepends\n\
2542 is given and true.";
2544 #define SPLIT_APPEND(data, left, right) \
2545 str = PyString_FromStringAndSize(data + left, right - left); \
2546 if (!str) \
2547 goto onError; \
2548 if (PyList_Append(list, str)) { \
2549 Py_DECREF(str); \
2550 goto onError; \
2552 else \
2553 Py_DECREF(str);
2555 static PyObject*
2556 string_splitlines(PyStringObject *self, PyObject *args)
2558 register int i;
2559 register int j;
2560 int len;
2561 int keepends = 0;
2562 PyObject *list;
2563 PyObject *str;
2564 char *data;
2566 if (!PyArg_ParseTuple(args, "|i:splitlines", &keepends))
2567 return NULL;
2569 data = PyString_AS_STRING(self);
2570 len = PyString_GET_SIZE(self);
2572 list = PyList_New(0);
2573 if (!list)
2574 goto onError;
2576 for (i = j = 0; i < len; ) {
2577 int eol;
2579 /* Find a line and append it */
2580 while (i < len && data[i] != '\n' && data[i] != '\r')
2581 i++;
2583 /* Skip the line break reading CRLF as one line break */
2584 eol = i;
2585 if (i < len) {
2586 if (data[i] == '\r' && i + 1 < len &&
2587 data[i+1] == '\n')
2588 i += 2;
2589 else
2590 i++;
2591 if (keepends)
2592 eol = i;
2594 SPLIT_APPEND(data, j, eol);
2595 j = i;
2597 if (j < len) {
2598 SPLIT_APPEND(data, j, len);
2601 return list;
2603 onError:
2604 Py_DECREF(list);
2605 return NULL;
2608 #undef SPLIT_APPEND
2611 static PyMethodDef
2612 string_methods[] = {
2613 /* Counterparts of the obsolete stropmodule functions; except
2614 string.maketrans(). */
2615 {"join", (PyCFunction)string_join, METH_O, join__doc__},
2616 {"split", (PyCFunction)string_split, METH_VARARGS, split__doc__},
2617 {"lower", (PyCFunction)string_lower, METH_NOARGS, lower__doc__},
2618 {"upper", (PyCFunction)string_upper, METH_NOARGS, upper__doc__},
2619 {"islower", (PyCFunction)string_islower, METH_NOARGS, islower__doc__},
2620 {"isupper", (PyCFunction)string_isupper, METH_NOARGS, isupper__doc__},
2621 {"isspace", (PyCFunction)string_isspace, METH_NOARGS, isspace__doc__},
2622 {"isdigit", (PyCFunction)string_isdigit, METH_NOARGS, isdigit__doc__},
2623 {"istitle", (PyCFunction)string_istitle, METH_NOARGS, istitle__doc__},
2624 {"isalpha", (PyCFunction)string_isalpha, METH_NOARGS, isalpha__doc__},
2625 {"isalnum", (PyCFunction)string_isalnum, METH_NOARGS, isalnum__doc__},
2626 {"capitalize", (PyCFunction)string_capitalize, METH_NOARGS, capitalize__doc__},
2627 {"count", (PyCFunction)string_count, METH_VARARGS, count__doc__},
2628 {"endswith", (PyCFunction)string_endswith, METH_VARARGS, endswith__doc__},
2629 {"find", (PyCFunction)string_find, METH_VARARGS, find__doc__},
2630 {"index", (PyCFunction)string_index, METH_VARARGS, index__doc__},
2631 {"lstrip", (PyCFunction)string_lstrip, METH_NOARGS, lstrip__doc__},
2632 {"replace", (PyCFunction)string_replace, METH_VARARGS, replace__doc__},
2633 {"rfind", (PyCFunction)string_rfind, METH_VARARGS, rfind__doc__},
2634 {"rindex", (PyCFunction)string_rindex, METH_VARARGS, rindex__doc__},
2635 {"rstrip", (PyCFunction)string_rstrip, METH_NOARGS, rstrip__doc__},
2636 {"startswith", (PyCFunction)string_startswith, METH_VARARGS, startswith__doc__},
2637 {"strip", (PyCFunction)string_strip, METH_NOARGS, strip__doc__},
2638 {"swapcase", (PyCFunction)string_swapcase, METH_NOARGS, swapcase__doc__},
2639 {"translate", (PyCFunction)string_translate, METH_VARARGS, translate__doc__},
2640 {"title", (PyCFunction)string_title, METH_NOARGS, title__doc__},
2641 {"ljust", (PyCFunction)string_ljust, METH_VARARGS, ljust__doc__},
2642 {"rjust", (PyCFunction)string_rjust, METH_VARARGS, rjust__doc__},
2643 {"center", (PyCFunction)string_center, METH_VARARGS, center__doc__},
2644 {"encode", (PyCFunction)string_encode, METH_VARARGS, encode__doc__},
2645 {"decode", (PyCFunction)string_decode, METH_VARARGS, decode__doc__},
2646 {"expandtabs", (PyCFunction)string_expandtabs, METH_VARARGS, expandtabs__doc__},
2647 {"splitlines", (PyCFunction)string_splitlines, METH_VARARGS, splitlines__doc__},
2648 #if 0
2649 {"zfill", (PyCFunction)string_zfill, METH_VARARGS, zfill__doc__},
2650 #endif
2651 {NULL, NULL} /* sentinel */
2654 staticforward PyObject *
2655 str_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds);
2657 static PyObject *
2658 string_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
2660 PyObject *x = NULL;
2661 static char *kwlist[] = {"object", 0};
2663 if (type != &PyString_Type)
2664 return str_subtype_new(type, args, kwds);
2665 if (!PyArg_ParseTupleAndKeywords(args, kwds, "|O:str", kwlist, &x))
2666 return NULL;
2667 if (x == NULL)
2668 return PyString_FromString("");
2669 return PyObject_Str(x);
2672 static PyObject *
2673 str_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
2675 PyObject *tmp, *pnew;
2676 int n;
2678 assert(PyType_IsSubtype(type, &PyString_Type));
2679 tmp = string_new(&PyString_Type, args, kwds);
2680 if (tmp == NULL)
2681 return NULL;
2682 assert(PyString_CheckExact(tmp));
2683 n = PyString_GET_SIZE(tmp);
2684 pnew = type->tp_alloc(type, n);
2685 if (pnew != NULL) {
2686 memcpy(PyString_AS_STRING(pnew), PyString_AS_STRING(tmp), n+1);
2687 #ifdef CACHE_HASH
2688 ((PyStringObject *)pnew)->ob_shash =
2689 ((PyStringObject *)tmp)->ob_shash;
2690 #endif
2691 #ifdef INTERN_STRINGS
2692 ((PyStringObject *)pnew)->ob_sinterned =
2693 ((PyStringObject *)tmp)->ob_sinterned;
2694 #endif
2696 Py_DECREF(tmp);
2697 return pnew;
2700 static char string_doc[] =
2701 "str(object) -> string\n\
2703 Return a nice string representation of the object.\n\
2704 If the argument is a string, the return value is the same object.";
2706 PyTypeObject PyString_Type = {
2707 PyObject_HEAD_INIT(&PyType_Type)
2709 "str",
2710 sizeof(PyStringObject),
2711 sizeof(char),
2712 (destructor)string_dealloc, /* tp_dealloc */
2713 (printfunc)string_print, /* tp_print */
2714 0, /* tp_getattr */
2715 0, /* tp_setattr */
2716 0, /* tp_compare */
2717 (reprfunc)string_repr, /* tp_repr */
2718 0, /* tp_as_number */
2719 &string_as_sequence, /* tp_as_sequence */
2720 0, /* tp_as_mapping */
2721 (hashfunc)string_hash, /* tp_hash */
2722 0, /* tp_call */
2723 (reprfunc)string_str, /* tp_str */
2724 PyObject_GenericGetAttr, /* tp_getattro */
2725 0, /* tp_setattro */
2726 &string_as_buffer, /* tp_as_buffer */
2727 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */
2728 string_doc, /* tp_doc */
2729 0, /* tp_traverse */
2730 0, /* tp_clear */
2731 (richcmpfunc)string_richcompare, /* tp_richcompare */
2732 0, /* tp_weaklistoffset */
2733 0, /* tp_iter */
2734 0, /* tp_iternext */
2735 string_methods, /* tp_methods */
2736 0, /* tp_members */
2737 0, /* tp_getset */
2738 0, /* tp_base */
2739 0, /* tp_dict */
2740 0, /* tp_descr_get */
2741 0, /* tp_descr_set */
2742 0, /* tp_dictoffset */
2743 0, /* tp_init */
2744 0, /* tp_alloc */
2745 string_new, /* tp_new */
2748 void
2749 PyString_Concat(register PyObject **pv, register PyObject *w)
2751 register PyObject *v;
2752 if (*pv == NULL)
2753 return;
2754 if (w == NULL || !PyString_Check(*pv)) {
2755 Py_DECREF(*pv);
2756 *pv = NULL;
2757 return;
2759 v = string_concat((PyStringObject *) *pv, w);
2760 Py_DECREF(*pv);
2761 *pv = v;
2764 void
2765 PyString_ConcatAndDel(register PyObject **pv, register PyObject *w)
2767 PyString_Concat(pv, w);
2768 Py_XDECREF(w);
2772 /* The following function breaks the notion that strings are immutable:
2773 it changes the size of a string. We get away with this only if there
2774 is only one module referencing the object. You can also think of it
2775 as creating a new string object and destroying the old one, only
2776 more efficiently. In any case, don't use this if the string may
2777 already be known to some other part of the code... */
2780 _PyString_Resize(PyObject **pv, int newsize)
2782 register PyObject *v;
2783 register PyStringObject *sv;
2784 v = *pv;
2785 if (!PyString_Check(v) || v->ob_refcnt != 1) {
2786 *pv = 0;
2787 Py_DECREF(v);
2788 PyErr_BadInternalCall();
2789 return -1;
2791 /* XXX UNREF/NEWREF interface should be more symmetrical */
2792 #ifdef Py_REF_DEBUG
2793 --_Py_RefTotal;
2794 #endif
2795 _Py_ForgetReference(v);
2796 *pv = (PyObject *)
2797 PyObject_REALLOC((char *)v,
2798 sizeof(PyStringObject) + newsize * sizeof(char));
2799 if (*pv == NULL) {
2800 PyObject_DEL(v);
2801 PyErr_NoMemory();
2802 return -1;
2804 _Py_NewReference(*pv);
2805 sv = (PyStringObject *) *pv;
2806 sv->ob_size = newsize;
2807 sv->ob_sval[newsize] = '\0';
2808 return 0;
2811 /* Helpers for formatstring */
2813 static PyObject *
2814 getnextarg(PyObject *args, int arglen, int *p_argidx)
2816 int argidx = *p_argidx;
2817 if (argidx < arglen) {
2818 (*p_argidx)++;
2819 if (arglen < 0)
2820 return args;
2821 else
2822 return PyTuple_GetItem(args, argidx);
2824 PyErr_SetString(PyExc_TypeError,
2825 "not enough arguments for format string");
2826 return NULL;
2829 /* Format codes
2830 * F_LJUST '-'
2831 * F_SIGN '+'
2832 * F_BLANK ' '
2833 * F_ALT '#'
2834 * F_ZERO '0'
2836 #define F_LJUST (1<<0)
2837 #define F_SIGN (1<<1)
2838 #define F_BLANK (1<<2)
2839 #define F_ALT (1<<3)
2840 #define F_ZERO (1<<4)
2842 static int
2843 formatfloat(char *buf, size_t buflen, int flags,
2844 int prec, int type, PyObject *v)
2846 /* fmt = '%#.' + `prec` + `type`
2847 worst case length = 3 + 10 (len of INT_MAX) + 1 = 14 (use 20)*/
2848 char fmt[20];
2849 double x;
2850 if (!PyArg_Parse(v, "d;float argument required", &x))
2851 return -1;
2852 if (prec < 0)
2853 prec = 6;
2854 if (type == 'f' && fabs(x)/1e25 >= 1e25)
2855 type = 'g';
2856 sprintf(fmt, "%%%s.%d%c", (flags&F_ALT) ? "#" : "", prec, type);
2857 /* worst case length calc to ensure no buffer overrun:
2858 fmt = %#.<prec>g
2859 buf = '-' + [0-9]*prec + '.' + 'e+' + (longest exp
2860 for any double rep.)
2861 len = 1 + prec + 1 + 2 + 5 = 9 + prec
2862 If prec=0 the effective precision is 1 (the leading digit is
2863 always given), therefore increase by one to 10+prec. */
2864 if (buflen <= (size_t)10 + (size_t)prec) {
2865 PyErr_SetString(PyExc_OverflowError,
2866 "formatted float is too long (precision too large?)");
2867 return -1;
2869 sprintf(buf, fmt, x);
2870 return strlen(buf);
2873 /* _PyString_FormatLong emulates the format codes d, u, o, x and X, and
2874 * the F_ALT flag, for Python's long (unbounded) ints. It's not used for
2875 * Python's regular ints.
2876 * Return value: a new PyString*, or NULL if error.
2877 * . *pbuf is set to point into it,
2878 * *plen set to the # of chars following that.
2879 * Caller must decref it when done using pbuf.
2880 * The string starting at *pbuf is of the form
2881 * "-"? ("0x" | "0X")? digit+
2882 * "0x"/"0X" are present only for x and X conversions, with F_ALT
2883 * set in flags. The case of hex digits will be correct,
2884 * There will be at least prec digits, zero-filled on the left if
2885 * necessary to get that many.
2886 * val object to be converted
2887 * flags bitmask of format flags; only F_ALT is looked at
2888 * prec minimum number of digits; 0-fill on left if needed
2889 * type a character in [duoxX]; u acts the same as d
2891 * CAUTION: o, x and X conversions on regular ints can never
2892 * produce a '-' sign, but can for Python's unbounded ints.
2894 PyObject*
2895 _PyString_FormatLong(PyObject *val, int flags, int prec, int type,
2896 char **pbuf, int *plen)
2898 PyObject *result = NULL;
2899 char *buf;
2900 int i;
2901 int sign; /* 1 if '-', else 0 */
2902 int len; /* number of characters */
2903 int numdigits; /* len == numnondigits + numdigits */
2904 int numnondigits = 0;
2906 switch (type) {
2907 case 'd':
2908 case 'u':
2909 result = val->ob_type->tp_str(val);
2910 break;
2911 case 'o':
2912 result = val->ob_type->tp_as_number->nb_oct(val);
2913 break;
2914 case 'x':
2915 case 'X':
2916 numnondigits = 2;
2917 result = val->ob_type->tp_as_number->nb_hex(val);
2918 break;
2919 default:
2920 assert(!"'type' not in [duoxX]");
2922 if (!result)
2923 return NULL;
2925 /* To modify the string in-place, there can only be one reference. */
2926 if (result->ob_refcnt != 1) {
2927 PyErr_BadInternalCall();
2928 return NULL;
2930 buf = PyString_AsString(result);
2931 len = PyString_Size(result);
2932 if (buf[len-1] == 'L') {
2933 --len;
2934 buf[len] = '\0';
2936 sign = buf[0] == '-';
2937 numnondigits += sign;
2938 numdigits = len - numnondigits;
2939 assert(numdigits > 0);
2941 /* Get rid of base marker unless F_ALT */
2942 if ((flags & F_ALT) == 0) {
2943 /* Need to skip 0x, 0X or 0. */
2944 int skipped = 0;
2945 switch (type) {
2946 case 'o':
2947 assert(buf[sign] == '0');
2948 /* If 0 is only digit, leave it alone. */
2949 if (numdigits > 1) {
2950 skipped = 1;
2951 --numdigits;
2953 break;
2954 case 'x':
2955 case 'X':
2956 assert(buf[sign] == '0');
2957 assert(buf[sign + 1] == 'x');
2958 skipped = 2;
2959 numnondigits -= 2;
2960 break;
2962 if (skipped) {
2963 buf += skipped;
2964 len -= skipped;
2965 if (sign)
2966 buf[0] = '-';
2968 assert(len == numnondigits + numdigits);
2969 assert(numdigits > 0);
2972 /* Fill with leading zeroes to meet minimum width. */
2973 if (prec > numdigits) {
2974 PyObject *r1 = PyString_FromStringAndSize(NULL,
2975 numnondigits + prec);
2976 char *b1;
2977 if (!r1) {
2978 Py_DECREF(result);
2979 return NULL;
2981 b1 = PyString_AS_STRING(r1);
2982 for (i = 0; i < numnondigits; ++i)
2983 *b1++ = *buf++;
2984 for (i = 0; i < prec - numdigits; i++)
2985 *b1++ = '0';
2986 for (i = 0; i < numdigits; i++)
2987 *b1++ = *buf++;
2988 *b1 = '\0';
2989 Py_DECREF(result);
2990 result = r1;
2991 buf = PyString_AS_STRING(result);
2992 len = numnondigits + prec;
2995 /* Fix up case for hex conversions. */
2996 switch (type) {
2997 case 'x':
2998 /* Need to convert all upper case letters to lower case. */
2999 for (i = 0; i < len; i++)
3000 if (buf[i] >= 'A' && buf[i] <= 'F')
3001 buf[i] += 'a'-'A';
3002 break;
3003 case 'X':
3004 /* Need to convert 0x to 0X (and -0x to -0X). */
3005 if (buf[sign + 1] == 'x')
3006 buf[sign + 1] = 'X';
3007 break;
3009 *pbuf = buf;
3010 *plen = len;
3011 return result;
3014 static int
3015 formatint(char *buf, size_t buflen, int flags,
3016 int prec, int type, PyObject *v)
3018 /* fmt = '%#.' + `prec` + 'l' + `type`
3019 worst case length = 3 + 19 (worst len of INT_MAX on 64-bit machine)
3020 + 1 + 1 = 24 */
3021 char fmt[64]; /* plenty big enough! */
3022 long x;
3023 if (!PyArg_Parse(v, "l;int argument required", &x))
3024 return -1;
3025 if (prec < 0)
3026 prec = 1;
3027 sprintf(fmt, "%%%s.%dl%c", (flags&F_ALT) ? "#" : "", prec, type);
3028 /* buf = '+'/'-'/'0'/'0x' + '[0-9]'*max(prec, len(x in octal))
3029 worst case buf = '0x' + [0-9]*prec, where prec >= 11 */
3030 if (buflen <= 13 || buflen <= (size_t)2 + (size_t)prec) {
3031 PyErr_SetString(PyExc_OverflowError,
3032 "formatted integer is too long (precision too large?)");
3033 return -1;
3035 sprintf(buf, fmt, x);
3036 /* When converting 0 under %#x or %#X, C leaves off the base marker,
3037 * but we want it (for consistency with other %#x conversions, and
3038 * for consistency with Python's hex() function).
3039 * BUG 28-Apr-2001 tim: At least two platform Cs (Metrowerks &
3040 * Compaq Tru64) violate the std by converting 0 w/ leading 0x anyway.
3041 * So add it only if the platform didn't already.
3043 if (x == 0 &&
3044 (flags & F_ALT) &&
3045 (type == 'x' || type == 'X') &&
3046 buf[1] != (char)type) /* this last always true under std C */
3048 memmove(buf+2, buf, strlen(buf) + 1);
3049 buf[0] = '0';
3050 buf[1] = (char)type;
3052 return strlen(buf);
3055 static int
3056 formatchar(char *buf, size_t buflen, PyObject *v)
3058 /* presume that the buffer is at least 2 characters long */
3059 if (PyString_Check(v)) {
3060 if (!PyArg_Parse(v, "c;%c requires int or char", &buf[0]))
3061 return -1;
3063 else {
3064 if (!PyArg_Parse(v, "b;%c requires int or char", &buf[0]))
3065 return -1;
3067 buf[1] = '\0';
3068 return 1;
3072 /* fmt%(v1,v2,...) is roughly equivalent to sprintf(fmt, v1, v2, ...)
3074 FORMATBUFLEN is the length of the buffer in which the floats, ints, &
3075 chars are formatted. XXX This is a magic number. Each formatting
3076 routine does bounds checking to ensure no overflow, but a better
3077 solution may be to malloc a buffer of appropriate size for each
3078 format. For now, the current solution is sufficient.
3080 #define FORMATBUFLEN (size_t)120
3082 PyObject *
3083 PyString_Format(PyObject *format, PyObject *args)
3085 char *fmt, *res;
3086 int fmtcnt, rescnt, reslen, arglen, argidx;
3087 int args_owned = 0;
3088 PyObject *result, *orig_args;
3089 #ifdef Py_USING_UNICODE
3090 PyObject *v, *w;
3091 #endif
3092 PyObject *dict = NULL;
3093 if (format == NULL || !PyString_Check(format) || args == NULL) {
3094 PyErr_BadInternalCall();
3095 return NULL;
3097 orig_args = args;
3098 fmt = PyString_AsString(format);
3099 fmtcnt = PyString_Size(format);
3100 reslen = rescnt = fmtcnt + 100;
3101 result = PyString_FromStringAndSize((char *)NULL, reslen);
3102 if (result == NULL)
3103 return NULL;
3104 res = PyString_AsString(result);
3105 if (PyTuple_Check(args)) {
3106 arglen = PyTuple_Size(args);
3107 argidx = 0;
3109 else {
3110 arglen = -1;
3111 argidx = -2;
3113 if (args->ob_type->tp_as_mapping)
3114 dict = args;
3115 while (--fmtcnt >= 0) {
3116 if (*fmt != '%') {
3117 if (--rescnt < 0) {
3118 rescnt = fmtcnt + 100;
3119 reslen += rescnt;
3120 if (_PyString_Resize(&result, reslen) < 0)
3121 return NULL;
3122 res = PyString_AsString(result)
3123 + reslen - rescnt;
3124 --rescnt;
3126 *res++ = *fmt++;
3128 else {
3129 /* Got a format specifier */
3130 int flags = 0;
3131 int width = -1;
3132 int prec = -1;
3133 int c = '\0';
3134 int fill;
3135 PyObject *v = NULL;
3136 PyObject *temp = NULL;
3137 char *pbuf;
3138 int sign;
3139 int len;
3140 char formatbuf[FORMATBUFLEN]; /* For format{float,int,char}() */
3141 #ifdef Py_USING_UNICODE
3142 char *fmt_start = fmt;
3143 int argidx_start = argidx;
3144 #endif
3146 fmt++;
3147 if (*fmt == '(') {
3148 char *keystart;
3149 int keylen;
3150 PyObject *key;
3151 int pcount = 1;
3153 if (dict == NULL) {
3154 PyErr_SetString(PyExc_TypeError,
3155 "format requires a mapping");
3156 goto error;
3158 ++fmt;
3159 --fmtcnt;
3160 keystart = fmt;
3161 /* Skip over balanced parentheses */
3162 while (pcount > 0 && --fmtcnt >= 0) {
3163 if (*fmt == ')')
3164 --pcount;
3165 else if (*fmt == '(')
3166 ++pcount;
3167 fmt++;
3169 keylen = fmt - keystart - 1;
3170 if (fmtcnt < 0 || pcount > 0) {
3171 PyErr_SetString(PyExc_ValueError,
3172 "incomplete format key");
3173 goto error;
3175 key = PyString_FromStringAndSize(keystart,
3176 keylen);
3177 if (key == NULL)
3178 goto error;
3179 if (args_owned) {
3180 Py_DECREF(args);
3181 args_owned = 0;
3183 args = PyObject_GetItem(dict, key);
3184 Py_DECREF(key);
3185 if (args == NULL) {
3186 goto error;
3188 args_owned = 1;
3189 arglen = -1;
3190 argidx = -2;
3192 while (--fmtcnt >= 0) {
3193 switch (c = *fmt++) {
3194 case '-': flags |= F_LJUST; continue;
3195 case '+': flags |= F_SIGN; continue;
3196 case ' ': flags |= F_BLANK; continue;
3197 case '#': flags |= F_ALT; continue;
3198 case '0': flags |= F_ZERO; continue;
3200 break;
3202 if (c == '*') {
3203 v = getnextarg(args, arglen, &argidx);
3204 if (v == NULL)
3205 goto error;
3206 if (!PyInt_Check(v)) {
3207 PyErr_SetString(PyExc_TypeError,
3208 "* wants int");
3209 goto error;
3211 width = PyInt_AsLong(v);
3212 if (width < 0) {
3213 flags |= F_LJUST;
3214 width = -width;
3216 if (--fmtcnt >= 0)
3217 c = *fmt++;
3219 else if (c >= 0 && isdigit(c)) {
3220 width = c - '0';
3221 while (--fmtcnt >= 0) {
3222 c = Py_CHARMASK(*fmt++);
3223 if (!isdigit(c))
3224 break;
3225 if ((width*10) / 10 != width) {
3226 PyErr_SetString(
3227 PyExc_ValueError,
3228 "width too big");
3229 goto error;
3231 width = width*10 + (c - '0');
3234 if (c == '.') {
3235 prec = 0;
3236 if (--fmtcnt >= 0)
3237 c = *fmt++;
3238 if (c == '*') {
3239 v = getnextarg(args, arglen, &argidx);
3240 if (v == NULL)
3241 goto error;
3242 if (!PyInt_Check(v)) {
3243 PyErr_SetString(
3244 PyExc_TypeError,
3245 "* wants int");
3246 goto error;
3248 prec = PyInt_AsLong(v);
3249 if (prec < 0)
3250 prec = 0;
3251 if (--fmtcnt >= 0)
3252 c = *fmt++;
3254 else if (c >= 0 && isdigit(c)) {
3255 prec = c - '0';
3256 while (--fmtcnt >= 0) {
3257 c = Py_CHARMASK(*fmt++);
3258 if (!isdigit(c))
3259 break;
3260 if ((prec*10) / 10 != prec) {
3261 PyErr_SetString(
3262 PyExc_ValueError,
3263 "prec too big");
3264 goto error;
3266 prec = prec*10 + (c - '0');
3269 } /* prec */
3270 if (fmtcnt >= 0) {
3271 if (c == 'h' || c == 'l' || c == 'L') {
3272 if (--fmtcnt >= 0)
3273 c = *fmt++;
3276 if (fmtcnt < 0) {
3277 PyErr_SetString(PyExc_ValueError,
3278 "incomplete format");
3279 goto error;
3281 if (c != '%') {
3282 v = getnextarg(args, arglen, &argidx);
3283 if (v == NULL)
3284 goto error;
3286 sign = 0;
3287 fill = ' ';
3288 switch (c) {
3289 case '%':
3290 pbuf = "%";
3291 len = 1;
3292 break;
3293 case 's':
3294 case 'r':
3295 #ifdef Py_USING_UNICODE
3296 if (PyUnicode_Check(v)) {
3297 fmt = fmt_start;
3298 argidx = argidx_start;
3299 goto unicode;
3301 #endif
3302 if (c == 's')
3303 temp = PyObject_Str(v);
3304 else
3305 temp = PyObject_Repr(v);
3306 if (temp == NULL)
3307 goto error;
3308 if (!PyString_Check(temp)) {
3309 PyErr_SetString(PyExc_TypeError,
3310 "%s argument has non-string str()");
3311 goto error;
3313 pbuf = PyString_AsString(temp);
3314 len = PyString_Size(temp);
3315 if (prec >= 0 && len > prec)
3316 len = prec;
3317 break;
3318 case 'i':
3319 case 'd':
3320 case 'u':
3321 case 'o':
3322 case 'x':
3323 case 'X':
3324 if (c == 'i')
3325 c = 'd';
3326 if (PyLong_Check(v)) {
3327 temp = _PyString_FormatLong(v, flags,
3328 prec, c, &pbuf, &len);
3329 if (!temp)
3330 goto error;
3331 /* unbounded ints can always produce
3332 a sign character! */
3333 sign = 1;
3335 else {
3336 pbuf = formatbuf;
3337 len = formatint(pbuf, sizeof(formatbuf),
3338 flags, prec, c, v);
3339 if (len < 0)
3340 goto error;
3341 /* only d conversion is signed */
3342 sign = c == 'd';
3344 if (flags & F_ZERO)
3345 fill = '0';
3346 break;
3347 case 'e':
3348 case 'E':
3349 case 'f':
3350 case 'g':
3351 case 'G':
3352 pbuf = formatbuf;
3353 len = formatfloat(pbuf, sizeof(formatbuf), flags, prec, c, v);
3354 if (len < 0)
3355 goto error;
3356 sign = 1;
3357 if (flags & F_ZERO)
3358 fill = '0';
3359 break;
3360 case 'c':
3361 pbuf = formatbuf;
3362 len = formatchar(pbuf, sizeof(formatbuf), v);
3363 if (len < 0)
3364 goto error;
3365 break;
3366 default:
3367 PyErr_Format(PyExc_ValueError,
3368 "unsupported format character '%c' (0x%x) "
3369 "at index %i",
3370 c, c, fmt - 1 - PyString_AsString(format));
3371 goto error;
3373 if (sign) {
3374 if (*pbuf == '-' || *pbuf == '+') {
3375 sign = *pbuf++;
3376 len--;
3378 else if (flags & F_SIGN)
3379 sign = '+';
3380 else if (flags & F_BLANK)
3381 sign = ' ';
3382 else
3383 sign = 0;
3385 if (width < len)
3386 width = len;
3387 if (rescnt < width + (sign != 0)) {
3388 reslen -= rescnt;
3389 rescnt = width + fmtcnt + 100;
3390 reslen += rescnt;
3391 if (_PyString_Resize(&result, reslen) < 0)
3392 return NULL;
3393 res = PyString_AsString(result)
3394 + reslen - rescnt;
3396 if (sign) {
3397 if (fill != ' ')
3398 *res++ = sign;
3399 rescnt--;
3400 if (width > len)
3401 width--;
3403 if ((flags & F_ALT) && (c == 'x' || c == 'X')) {
3404 assert(pbuf[0] == '0');
3405 assert(pbuf[1] == c);
3406 if (fill != ' ') {
3407 *res++ = *pbuf++;
3408 *res++ = *pbuf++;
3410 rescnt -= 2;
3411 width -= 2;
3412 if (width < 0)
3413 width = 0;
3414 len -= 2;
3416 if (width > len && !(flags & F_LJUST)) {
3417 do {
3418 --rescnt;
3419 *res++ = fill;
3420 } while (--width > len);
3422 if (fill == ' ') {
3423 if (sign)
3424 *res++ = sign;
3425 if ((flags & F_ALT) &&
3426 (c == 'x' || c == 'X')) {
3427 assert(pbuf[0] == '0');
3428 assert(pbuf[1] == c);
3429 *res++ = *pbuf++;
3430 *res++ = *pbuf++;
3433 memcpy(res, pbuf, len);
3434 res += len;
3435 rescnt -= len;
3436 while (--width >= len) {
3437 --rescnt;
3438 *res++ = ' ';
3440 if (dict && (argidx < arglen) && c != '%') {
3441 PyErr_SetString(PyExc_TypeError,
3442 "not all arguments converted");
3443 goto error;
3445 Py_XDECREF(temp);
3446 } /* '%' */
3447 } /* until end */
3448 if (argidx < arglen && !dict) {
3449 PyErr_SetString(PyExc_TypeError,
3450 "not all arguments converted");
3451 goto error;
3453 if (args_owned) {
3454 Py_DECREF(args);
3456 _PyString_Resize(&result, reslen - rescnt);
3457 return result;
3459 #ifdef Py_USING_UNICODE
3460 unicode:
3461 if (args_owned) {
3462 Py_DECREF(args);
3463 args_owned = 0;
3465 /* Fiddle args right (remove the first argidx arguments) */
3466 if (PyTuple_Check(orig_args) && argidx > 0) {
3467 PyObject *v;
3468 int n = PyTuple_GET_SIZE(orig_args) - argidx;
3469 v = PyTuple_New(n);
3470 if (v == NULL)
3471 goto error;
3472 while (--n >= 0) {
3473 PyObject *w = PyTuple_GET_ITEM(orig_args, n + argidx);
3474 Py_INCREF(w);
3475 PyTuple_SET_ITEM(v, n, w);
3477 args = v;
3478 } else {
3479 Py_INCREF(orig_args);
3480 args = orig_args;
3482 args_owned = 1;
3483 /* Take what we have of the result and let the Unicode formatting
3484 function format the rest of the input. */
3485 rescnt = res - PyString_AS_STRING(result);
3486 if (_PyString_Resize(&result, rescnt))
3487 goto error;
3488 fmtcnt = PyString_GET_SIZE(format) - \
3489 (fmt - PyString_AS_STRING(format));
3490 format = PyUnicode_Decode(fmt, fmtcnt, NULL, NULL);
3491 if (format == NULL)
3492 goto error;
3493 v = PyUnicode_Format(format, args);
3494 Py_DECREF(format);
3495 if (v == NULL)
3496 goto error;
3497 /* Paste what we have (result) to what the Unicode formatting
3498 function returned (v) and return the result (or error) */
3499 w = PyUnicode_Concat(result, v);
3500 Py_DECREF(result);
3501 Py_DECREF(v);
3502 Py_DECREF(args);
3503 return w;
3504 #endif /* Py_USING_UNICODE */
3506 error:
3507 Py_DECREF(result);
3508 if (args_owned) {
3509 Py_DECREF(args);
3511 return NULL;
3515 #ifdef INTERN_STRINGS
3517 /* This dictionary will leak at PyString_Fini() time. That's acceptable
3518 * because PyString_Fini() specifically frees interned strings that are
3519 * only referenced by this dictionary. The CVS log entry for revision 2.45
3520 * says:
3522 * Change the Fini function to only remove otherwise unreferenced
3523 * strings from the interned table. There are references in
3524 * hard-to-find static variables all over the interpreter, and it's not
3525 * worth trying to get rid of all those; but "uninterning" isn't fair
3526 * either and may cause subtle failures later -- so we have to keep them
3527 * in the interned table.
3529 static PyObject *interned;
3531 void
3532 PyString_InternInPlace(PyObject **p)
3534 register PyStringObject *s = (PyStringObject *)(*p);
3535 PyObject *t;
3536 if (s == NULL || !PyString_Check(s))
3537 Py_FatalError("PyString_InternInPlace: strings only please!");
3538 if ((t = s->ob_sinterned) != NULL) {
3539 if (t == (PyObject *)s)
3540 return;
3541 Py_INCREF(t);
3542 *p = t;
3543 Py_DECREF(s);
3544 return;
3546 if (interned == NULL) {
3547 interned = PyDict_New();
3548 if (interned == NULL)
3549 return;
3551 if ((t = PyDict_GetItem(interned, (PyObject *)s)) != NULL) {
3552 Py_INCREF(t);
3553 *p = s->ob_sinterned = t;
3554 Py_DECREF(s);
3555 return;
3557 /* Ensure that only true string objects appear in the intern dict,
3558 and as the value of ob_sinterned. */
3559 if (PyString_CheckExact(s)) {
3560 t = (PyObject *)s;
3561 if (PyDict_SetItem(interned, t, t) == 0) {
3562 s->ob_sinterned = t;
3563 return;
3566 else {
3567 t = PyString_FromStringAndSize(PyString_AS_STRING(s),
3568 PyString_GET_SIZE(s));
3569 if (t != NULL) {
3570 if (PyDict_SetItem(interned, t, t) == 0) {
3571 *p = s->ob_sinterned = t;
3572 Py_DECREF(s);
3573 return;
3575 Py_DECREF(t);
3578 PyErr_Clear();
3582 PyObject *
3583 PyString_InternFromString(const char *cp)
3585 PyObject *s = PyString_FromString(cp);
3586 if (s == NULL)
3587 return NULL;
3588 PyString_InternInPlace(&s);
3589 return s;
3592 #endif
3594 void
3595 PyString_Fini(void)
3597 int i;
3598 for (i = 0; i < UCHAR_MAX + 1; i++) {
3599 Py_XDECREF(characters[i]);
3600 characters[i] = NULL;
3602 #ifndef DONT_SHARE_SHORT_STRINGS
3603 Py_XDECREF(nullstring);
3604 nullstring = NULL;
3605 #endif
3606 #ifdef INTERN_STRINGS
3607 if (interned) {
3608 int pos, changed;
3609 PyObject *key, *value;
3610 do {
3611 changed = 0;
3612 pos = 0;
3613 while (PyDict_Next(interned, &pos, &key, &value)) {
3614 if (key->ob_refcnt == 2 && key == value) {
3615 PyDict_DelItem(interned, key);
3616 changed = 1;
3619 } while (changed);
3621 #endif
3624 #ifdef INTERN_STRINGS
3625 void _Py_ReleaseInternedStrings(void)
3627 if (interned) {
3628 fprintf(stderr, "releasing interned strings\n");
3629 PyDict_Clear(interned);
3630 Py_DECREF(interned);
3631 interned = NULL;
3634 #endif /* INTERN_STRINGS */