move sections
[python/dscho.git] / Objects / stringobject.c
blob8d3403a0c3e66724cf1863c908ea8f849ba37eaa
1 /* String (str/bytes) object implementation */
3 #define PY_SSIZE_T_CLEAN
5 #include "Python.h"
6 #include <ctype.h>
7 #include <stddef.h>
9 #ifdef COUNT_ALLOCS
10 Py_ssize_t null_strings, one_strings;
11 #endif
13 static PyStringObject *characters[UCHAR_MAX + 1];
14 static PyStringObject *nullstring;
16 /* This dictionary holds all interned strings. Note that references to
17 strings in this dictionary are *not* counted in the string's ob_refcnt.
18 When the interned string reaches a refcnt of 0 the string deallocation
19 function will delete the reference from this dictionary.
21 Another way to look at this is that to say that the actual reference
22 count of a string is: s->ob_refcnt + (s->ob_sstate?2:0)
24 static PyObject *interned;
26 /* PyStringObject_SIZE gives the basic size of a string; any memory allocation
27 for a string of length n should request PyStringObject_SIZE + n bytes.
29 Using PyStringObject_SIZE instead of sizeof(PyStringObject) saves
30 3 bytes per string allocation on a typical system.
32 #define PyStringObject_SIZE (offsetof(PyStringObject, ob_sval) + 1)
35 For both PyString_FromString() and PyString_FromStringAndSize(), the
36 parameter `size' denotes number of characters to allocate, not counting any
37 null terminating character.
39 For PyString_FromString(), the parameter `str' points to a null-terminated
40 string containing exactly `size' bytes.
42 For PyString_FromStringAndSize(), the parameter the parameter `str' is
43 either NULL or else points to a string containing at least `size' bytes.
44 For PyString_FromStringAndSize(), the string in the `str' parameter does
45 not have to be null-terminated. (Therefore it is safe to construct a
46 substring by calling `PyString_FromStringAndSize(origstring, substrlen)'.)
47 If `str' is NULL then PyString_FromStringAndSize() will allocate `size+1'
48 bytes (setting the last byte to the null terminating character) and you can
49 fill in the data yourself. If `str' is non-NULL then the resulting
50 PyString object must be treated as immutable and you must not fill in nor
51 alter the data yourself, since the strings may be shared.
53 The PyObject member `op->ob_size', which denotes the number of "extra
54 items" in a variable-size object, will contain the number of bytes
55 allocated for string data, not counting the null terminating character. It
56 is therefore equal to the equal to the `size' parameter (for
57 PyString_FromStringAndSize()) or the length of the string in the `str'
58 parameter (for PyString_FromString()).
60 PyObject *
61 PyString_FromStringAndSize(const char *str, Py_ssize_t size)
63 register PyStringObject *op;
64 if (size < 0) {
65 PyErr_SetString(PyExc_SystemError,
66 "Negative size passed to PyString_FromStringAndSize");
67 return NULL;
69 if (size == 0 && (op = nullstring) != NULL) {
70 #ifdef COUNT_ALLOCS
71 null_strings++;
72 #endif
73 Py_INCREF(op);
74 return (PyObject *)op;
76 if (size == 1 && str != NULL &&
77 (op = characters[*str & UCHAR_MAX]) != NULL)
79 #ifdef COUNT_ALLOCS
80 one_strings++;
81 #endif
82 Py_INCREF(op);
83 return (PyObject *)op;
86 if (size > PY_SSIZE_T_MAX - PyStringObject_SIZE) {
87 PyErr_SetString(PyExc_OverflowError, "string is too large");
88 return NULL;
91 /* Inline PyObject_NewVar */
92 op = (PyStringObject *)PyObject_MALLOC(PyStringObject_SIZE + size);
93 if (op == NULL)
94 return PyErr_NoMemory();
95 PyObject_INIT_VAR(op, &PyString_Type, size);
96 op->ob_shash = -1;
97 op->ob_sstate = SSTATE_NOT_INTERNED;
98 if (str != NULL)
99 Py_MEMCPY(op->ob_sval, str, size);
100 op->ob_sval[size] = '\0';
101 /* share short strings */
102 if (size == 0) {
103 PyObject *t = (PyObject *)op;
104 PyString_InternInPlace(&t);
105 op = (PyStringObject *)t;
106 nullstring = op;
107 Py_INCREF(op);
108 } else if (size == 1 && str != NULL) {
109 PyObject *t = (PyObject *)op;
110 PyString_InternInPlace(&t);
111 op = (PyStringObject *)t;
112 characters[*str & UCHAR_MAX] = op;
113 Py_INCREF(op);
115 return (PyObject *) op;
118 PyObject *
119 PyString_FromString(const char *str)
121 register size_t size;
122 register PyStringObject *op;
124 assert(str != NULL);
125 size = strlen(str);
126 if (size > PY_SSIZE_T_MAX - PyStringObject_SIZE) {
127 PyErr_SetString(PyExc_OverflowError,
128 "string is too long for a Python string");
129 return NULL;
131 if (size == 0 && (op = nullstring) != NULL) {
132 #ifdef COUNT_ALLOCS
133 null_strings++;
134 #endif
135 Py_INCREF(op);
136 return (PyObject *)op;
138 if (size == 1 && (op = characters[*str & UCHAR_MAX]) != NULL) {
139 #ifdef COUNT_ALLOCS
140 one_strings++;
141 #endif
142 Py_INCREF(op);
143 return (PyObject *)op;
146 /* Inline PyObject_NewVar */
147 op = (PyStringObject *)PyObject_MALLOC(PyStringObject_SIZE + size);
148 if (op == NULL)
149 return PyErr_NoMemory();
150 PyObject_INIT_VAR(op, &PyString_Type, size);
151 op->ob_shash = -1;
152 op->ob_sstate = SSTATE_NOT_INTERNED;
153 Py_MEMCPY(op->ob_sval, str, size+1);
154 /* share short strings */
155 if (size == 0) {
156 PyObject *t = (PyObject *)op;
157 PyString_InternInPlace(&t);
158 op = (PyStringObject *)t;
159 nullstring = op;
160 Py_INCREF(op);
161 } else if (size == 1) {
162 PyObject *t = (PyObject *)op;
163 PyString_InternInPlace(&t);
164 op = (PyStringObject *)t;
165 characters[*str & UCHAR_MAX] = op;
166 Py_INCREF(op);
168 return (PyObject *) op;
171 PyObject *
172 PyString_FromFormatV(const char *format, va_list vargs)
174 va_list count;
175 Py_ssize_t n = 0;
176 const char* f;
177 char *s;
178 PyObject* string;
180 #ifdef VA_LIST_IS_ARRAY
181 Py_MEMCPY(count, vargs, sizeof(va_list));
182 #else
183 #ifdef __va_copy
184 __va_copy(count, vargs);
185 #else
186 count = vargs;
187 #endif
188 #endif
189 /* step 1: figure out how large a buffer we need */
190 for (f = format; *f; f++) {
191 if (*f == '%') {
192 #ifdef HAVE_LONG_LONG
193 int longlongflag = 0;
194 #endif
195 const char* p = f;
196 while (*++f && *f != '%' && !isalpha(Py_CHARMASK(*f)))
199 /* skip the 'l' or 'z' in {%ld, %zd, %lu, %zu} since
200 * they don't affect the amount of space we reserve.
202 if (*f == 'l') {
203 if (f[1] == 'd' || f[1] == 'u') {
204 ++f;
206 #ifdef HAVE_LONG_LONG
207 else if (f[1] == 'l' &&
208 (f[2] == 'd' || f[2] == 'u')) {
209 longlongflag = 1;
210 f += 2;
212 #endif
214 else if (*f == 'z' && (f[1] == 'd' || f[1] == 'u')) {
215 ++f;
218 switch (*f) {
219 case 'c':
220 (void)va_arg(count, int);
221 /* fall through... */
222 case '%':
223 n++;
224 break;
225 case 'd': case 'u': case 'i': case 'x':
226 (void) va_arg(count, int);
227 #ifdef HAVE_LONG_LONG
228 /* Need at most
229 ceil(log10(256)*SIZEOF_LONG_LONG) digits,
230 plus 1 for the sign. 53/22 is an upper
231 bound for log10(256). */
232 if (longlongflag)
233 n += 2 + (SIZEOF_LONG_LONG*53-1) / 22;
234 else
235 #endif
236 /* 20 bytes is enough to hold a 64-bit
237 integer. Decimal takes the most
238 space. This isn't enough for
239 octal. */
240 n += 20;
242 break;
243 case 's':
244 s = va_arg(count, char*);
245 n += strlen(s);
246 break;
247 case 'p':
248 (void) va_arg(count, int);
249 /* maximum 64-bit pointer representation:
250 * 0xffffffffffffffff
251 * so 19 characters is enough.
252 * XXX I count 18 -- what's the extra for?
254 n += 19;
255 break;
256 default:
257 /* if we stumble upon an unknown
258 formatting code, copy the rest of
259 the format string to the output
260 string. (we cannot just skip the
261 code, since there's no way to know
262 what's in the argument list) */
263 n += strlen(p);
264 goto expand;
266 } else
267 n++;
269 expand:
270 /* step 2: fill the buffer */
271 /* Since we've analyzed how much space we need for the worst case,
272 use sprintf directly instead of the slower PyOS_snprintf. */
273 string = PyString_FromStringAndSize(NULL, n);
274 if (!string)
275 return NULL;
277 s = PyString_AsString(string);
279 for (f = format; *f; f++) {
280 if (*f == '%') {
281 const char* p = f++;
282 Py_ssize_t i;
283 int longflag = 0;
284 #ifdef HAVE_LONG_LONG
285 int longlongflag = 0;
286 #endif
287 int size_tflag = 0;
288 /* parse the width.precision part (we're only
289 interested in the precision value, if any) */
290 n = 0;
291 while (isdigit(Py_CHARMASK(*f)))
292 n = (n*10) + *f++ - '0';
293 if (*f == '.') {
294 f++;
295 n = 0;
296 while (isdigit(Py_CHARMASK(*f)))
297 n = (n*10) + *f++ - '0';
299 while (*f && *f != '%' && !isalpha(Py_CHARMASK(*f)))
300 f++;
301 /* Handle %ld, %lu, %lld and %llu. */
302 if (*f == 'l') {
303 if (f[1] == 'd' || f[1] == 'u') {
304 longflag = 1;
305 ++f;
307 #ifdef HAVE_LONG_LONG
308 else if (f[1] == 'l' &&
309 (f[2] == 'd' || f[2] == 'u')) {
310 longlongflag = 1;
311 f += 2;
313 #endif
315 /* handle the size_t flag. */
316 else if (*f == 'z' && (f[1] == 'd' || f[1] == 'u')) {
317 size_tflag = 1;
318 ++f;
321 switch (*f) {
322 case 'c':
323 *s++ = va_arg(vargs, int);
324 break;
325 case 'd':
326 if (longflag)
327 sprintf(s, "%ld", va_arg(vargs, long));
328 #ifdef HAVE_LONG_LONG
329 else if (longlongflag)
330 sprintf(s, "%" PY_FORMAT_LONG_LONG "d",
331 va_arg(vargs, PY_LONG_LONG));
332 #endif
333 else if (size_tflag)
334 sprintf(s, "%" PY_FORMAT_SIZE_T "d",
335 va_arg(vargs, Py_ssize_t));
336 else
337 sprintf(s, "%d", va_arg(vargs, int));
338 s += strlen(s);
339 break;
340 case 'u':
341 if (longflag)
342 sprintf(s, "%lu",
343 va_arg(vargs, unsigned long));
344 #ifdef HAVE_LONG_LONG
345 else if (longlongflag)
346 sprintf(s, "%" PY_FORMAT_LONG_LONG "u",
347 va_arg(vargs, PY_LONG_LONG));
348 #endif
349 else if (size_tflag)
350 sprintf(s, "%" PY_FORMAT_SIZE_T "u",
351 va_arg(vargs, size_t));
352 else
353 sprintf(s, "%u",
354 va_arg(vargs, unsigned int));
355 s += strlen(s);
356 break;
357 case 'i':
358 sprintf(s, "%i", va_arg(vargs, int));
359 s += strlen(s);
360 break;
361 case 'x':
362 sprintf(s, "%x", va_arg(vargs, int));
363 s += strlen(s);
364 break;
365 case 's':
366 p = va_arg(vargs, char*);
367 i = strlen(p);
368 if (n > 0 && i > n)
369 i = n;
370 Py_MEMCPY(s, p, i);
371 s += i;
372 break;
373 case 'p':
374 sprintf(s, "%p", va_arg(vargs, void*));
375 /* %p is ill-defined: ensure leading 0x. */
376 if (s[1] == 'X')
377 s[1] = 'x';
378 else if (s[1] != 'x') {
379 memmove(s+2, s, strlen(s)+1);
380 s[0] = '0';
381 s[1] = 'x';
383 s += strlen(s);
384 break;
385 case '%':
386 *s++ = '%';
387 break;
388 default:
389 strcpy(s, p);
390 s += strlen(s);
391 goto end;
393 } else
394 *s++ = *f;
397 end:
398 if (_PyString_Resize(&string, s - PyString_AS_STRING(string)))
399 return NULL;
400 return string;
403 PyObject *
404 PyString_FromFormat(const char *format, ...)
406 PyObject* ret;
407 va_list vargs;
409 #ifdef HAVE_STDARG_PROTOTYPES
410 va_start(vargs, format);
411 #else
412 va_start(vargs);
413 #endif
414 ret = PyString_FromFormatV(format, vargs);
415 va_end(vargs);
416 return ret;
420 PyObject *PyString_Decode(const char *s,
421 Py_ssize_t size,
422 const char *encoding,
423 const char *errors)
425 PyObject *v, *str;
427 str = PyString_FromStringAndSize(s, size);
428 if (str == NULL)
429 return NULL;
430 v = PyString_AsDecodedString(str, encoding, errors);
431 Py_DECREF(str);
432 return v;
435 PyObject *PyString_AsDecodedObject(PyObject *str,
436 const char *encoding,
437 const char *errors)
439 PyObject *v;
441 if (!PyString_Check(str)) {
442 PyErr_BadArgument();
443 goto onError;
446 if (encoding == NULL) {
447 #ifdef Py_USING_UNICODE
448 encoding = PyUnicode_GetDefaultEncoding();
449 #else
450 PyErr_SetString(PyExc_ValueError, "no encoding specified");
451 goto onError;
452 #endif
455 /* Decode via the codec registry */
456 v = PyCodec_Decode(str, encoding, errors);
457 if (v == NULL)
458 goto onError;
460 return v;
462 onError:
463 return NULL;
466 PyObject *PyString_AsDecodedString(PyObject *str,
467 const char *encoding,
468 const char *errors)
470 PyObject *v;
472 v = PyString_AsDecodedObject(str, encoding, errors);
473 if (v == NULL)
474 goto onError;
476 #ifdef Py_USING_UNICODE
477 /* Convert Unicode to a string using the default encoding */
478 if (PyUnicode_Check(v)) {
479 PyObject *temp = v;
480 v = PyUnicode_AsEncodedString(v, NULL, NULL);
481 Py_DECREF(temp);
482 if (v == NULL)
483 goto onError;
485 #endif
486 if (!PyString_Check(v)) {
487 PyErr_Format(PyExc_TypeError,
488 "decoder did not return a string object (type=%.400s)",
489 Py_TYPE(v)->tp_name);
490 Py_DECREF(v);
491 goto onError;
494 return v;
496 onError:
497 return NULL;
500 PyObject *PyString_Encode(const char *s,
501 Py_ssize_t size,
502 const char *encoding,
503 const char *errors)
505 PyObject *v, *str;
507 str = PyString_FromStringAndSize(s, size);
508 if (str == NULL)
509 return NULL;
510 v = PyString_AsEncodedString(str, encoding, errors);
511 Py_DECREF(str);
512 return v;
515 PyObject *PyString_AsEncodedObject(PyObject *str,
516 const char *encoding,
517 const char *errors)
519 PyObject *v;
521 if (!PyString_Check(str)) {
522 PyErr_BadArgument();
523 goto onError;
526 if (encoding == NULL) {
527 #ifdef Py_USING_UNICODE
528 encoding = PyUnicode_GetDefaultEncoding();
529 #else
530 PyErr_SetString(PyExc_ValueError, "no encoding specified");
531 goto onError;
532 #endif
535 /* Encode via the codec registry */
536 v = PyCodec_Encode(str, encoding, errors);
537 if (v == NULL)
538 goto onError;
540 return v;
542 onError:
543 return NULL;
546 PyObject *PyString_AsEncodedString(PyObject *str,
547 const char *encoding,
548 const char *errors)
550 PyObject *v;
552 v = PyString_AsEncodedObject(str, encoding, errors);
553 if (v == NULL)
554 goto onError;
556 #ifdef Py_USING_UNICODE
557 /* Convert Unicode to a string using the default encoding */
558 if (PyUnicode_Check(v)) {
559 PyObject *temp = v;
560 v = PyUnicode_AsEncodedString(v, NULL, NULL);
561 Py_DECREF(temp);
562 if (v == NULL)
563 goto onError;
565 #endif
566 if (!PyString_Check(v)) {
567 PyErr_Format(PyExc_TypeError,
568 "encoder did not return a string object (type=%.400s)",
569 Py_TYPE(v)->tp_name);
570 Py_DECREF(v);
571 goto onError;
574 return v;
576 onError:
577 return NULL;
580 static void
581 string_dealloc(PyObject *op)
583 switch (PyString_CHECK_INTERNED(op)) {
584 case SSTATE_NOT_INTERNED:
585 break;
587 case SSTATE_INTERNED_MORTAL:
588 /* revive dead object temporarily for DelItem */
589 Py_REFCNT(op) = 3;
590 if (PyDict_DelItem(interned, op) != 0)
591 Py_FatalError(
592 "deletion of interned string failed");
593 break;
595 case SSTATE_INTERNED_IMMORTAL:
596 Py_FatalError("Immortal interned string died.");
598 default:
599 Py_FatalError("Inconsistent interned string state.");
601 Py_TYPE(op)->tp_free(op);
604 /* Unescape a backslash-escaped string. If unicode is non-zero,
605 the string is a u-literal. If recode_encoding is non-zero,
606 the string is UTF-8 encoded and should be re-encoded in the
607 specified encoding. */
609 PyObject *PyString_DecodeEscape(const char *s,
610 Py_ssize_t len,
611 const char *errors,
612 Py_ssize_t unicode,
613 const char *recode_encoding)
615 int c;
616 char *p, *buf;
617 const char *end;
618 PyObject *v;
619 Py_ssize_t newlen = recode_encoding ? 4*len:len;
620 v = PyString_FromStringAndSize((char *)NULL, newlen);
621 if (v == NULL)
622 return NULL;
623 p = buf = PyString_AsString(v);
624 end = s + len;
625 while (s < end) {
626 if (*s != '\\') {
627 non_esc:
628 #ifdef Py_USING_UNICODE
629 if (recode_encoding && (*s & 0x80)) {
630 PyObject *u, *w;
631 char *r;
632 const char* t;
633 Py_ssize_t rn;
634 t = s;
635 /* Decode non-ASCII bytes as UTF-8. */
636 while (t < end && (*t & 0x80)) t++;
637 u = PyUnicode_DecodeUTF8(s, t - s, errors);
638 if(!u) goto failed;
640 /* Recode them in target encoding. */
641 w = PyUnicode_AsEncodedString(
642 u, recode_encoding, errors);
643 Py_DECREF(u);
644 if (!w) goto failed;
646 /* Append bytes to output buffer. */
647 assert(PyString_Check(w));
648 r = PyString_AS_STRING(w);
649 rn = PyString_GET_SIZE(w);
650 Py_MEMCPY(p, r, rn);
651 p += rn;
652 Py_DECREF(w);
653 s = t;
654 } else {
655 *p++ = *s++;
657 #else
658 *p++ = *s++;
659 #endif
660 continue;
662 s++;
663 if (s==end) {
664 PyErr_SetString(PyExc_ValueError,
665 "Trailing \\ in string");
666 goto failed;
668 switch (*s++) {
669 /* XXX This assumes ASCII! */
670 case '\n': break;
671 case '\\': *p++ = '\\'; break;
672 case '\'': *p++ = '\''; break;
673 case '\"': *p++ = '\"'; break;
674 case 'b': *p++ = '\b'; break;
675 case 'f': *p++ = '\014'; break; /* FF */
676 case 't': *p++ = '\t'; break;
677 case 'n': *p++ = '\n'; break;
678 case 'r': *p++ = '\r'; break;
679 case 'v': *p++ = '\013'; break; /* VT */
680 case 'a': *p++ = '\007'; break; /* BEL, not classic C */
681 case '0': case '1': case '2': case '3':
682 case '4': case '5': case '6': case '7':
683 c = s[-1] - '0';
684 if (s < end && '0' <= *s && *s <= '7') {
685 c = (c<<3) + *s++ - '0';
686 if (s < end && '0' <= *s && *s <= '7')
687 c = (c<<3) + *s++ - '0';
689 *p++ = c;
690 break;
691 case 'x':
692 if (s+1 < end &&
693 isxdigit(Py_CHARMASK(s[0])) &&
694 isxdigit(Py_CHARMASK(s[1])))
696 unsigned int x = 0;
697 c = Py_CHARMASK(*s);
698 s++;
699 if (isdigit(c))
700 x = c - '0';
701 else if (islower(c))
702 x = 10 + c - 'a';
703 else
704 x = 10 + c - 'A';
705 x = x << 4;
706 c = Py_CHARMASK(*s);
707 s++;
708 if (isdigit(c))
709 x += c - '0';
710 else if (islower(c))
711 x += 10 + c - 'a';
712 else
713 x += 10 + c - 'A';
714 *p++ = x;
715 break;
717 if (!errors || strcmp(errors, "strict") == 0) {
718 PyErr_SetString(PyExc_ValueError,
719 "invalid \\x escape");
720 goto failed;
722 if (strcmp(errors, "replace") == 0) {
723 *p++ = '?';
724 } else if (strcmp(errors, "ignore") == 0)
725 /* do nothing */;
726 else {
727 PyErr_Format(PyExc_ValueError,
728 "decoding error; "
729 "unknown error handling code: %.400s",
730 errors);
731 goto failed;
733 #ifndef Py_USING_UNICODE
734 case 'u':
735 case 'U':
736 case 'N':
737 if (unicode) {
738 PyErr_SetString(PyExc_ValueError,
739 "Unicode escapes not legal "
740 "when Unicode disabled");
741 goto failed;
743 #endif
744 default:
745 *p++ = '\\';
746 s--;
747 goto non_esc; /* an arbitry number of unescaped
748 UTF-8 bytes may follow. */
751 if (p-buf < newlen && _PyString_Resize(&v, p - buf))
752 goto failed;
753 return v;
754 failed:
755 Py_DECREF(v);
756 return NULL;
759 /* -------------------------------------------------------------------- */
760 /* object api */
762 static Py_ssize_t
763 string_getsize(register PyObject *op)
765 char *s;
766 Py_ssize_t len;
767 if (PyString_AsStringAndSize(op, &s, &len))
768 return -1;
769 return len;
772 static /*const*/ char *
773 string_getbuffer(register PyObject *op)
775 char *s;
776 Py_ssize_t len;
777 if (PyString_AsStringAndSize(op, &s, &len))
778 return NULL;
779 return s;
782 Py_ssize_t
783 PyString_Size(register PyObject *op)
785 if (!PyString_Check(op))
786 return string_getsize(op);
787 return Py_SIZE(op);
790 /*const*/ char *
791 PyString_AsString(register PyObject *op)
793 if (!PyString_Check(op))
794 return string_getbuffer(op);
795 return ((PyStringObject *)op) -> ob_sval;
799 PyString_AsStringAndSize(register PyObject *obj,
800 register char **s,
801 register Py_ssize_t *len)
803 if (s == NULL) {
804 PyErr_BadInternalCall();
805 return -1;
808 if (!PyString_Check(obj)) {
809 #ifdef Py_USING_UNICODE
810 if (PyUnicode_Check(obj)) {
811 obj = _PyUnicode_AsDefaultEncodedString(obj, NULL);
812 if (obj == NULL)
813 return -1;
815 else
816 #endif
818 PyErr_Format(PyExc_TypeError,
819 "expected string or Unicode object, "
820 "%.200s found", Py_TYPE(obj)->tp_name);
821 return -1;
825 *s = PyString_AS_STRING(obj);
826 if (len != NULL)
827 *len = PyString_GET_SIZE(obj);
828 else if (strlen(*s) != (size_t)PyString_GET_SIZE(obj)) {
829 PyErr_SetString(PyExc_TypeError,
830 "expected string without null bytes");
831 return -1;
833 return 0;
836 /* -------------------------------------------------------------------- */
837 /* Methods */
839 #include "stringlib/stringdefs.h"
840 #include "stringlib/fastsearch.h"
842 #include "stringlib/count.h"
843 #include "stringlib/find.h"
844 #include "stringlib/partition.h"
845 #include "stringlib/split.h"
847 #define _Py_InsertThousandsGrouping _PyString_InsertThousandsGrouping
848 #include "stringlib/localeutil.h"
852 static int
853 string_print(PyStringObject *op, FILE *fp, int flags)
855 Py_ssize_t i, str_len;
856 char c;
857 int quote;
859 /* XXX Ought to check for interrupts when writing long strings */
860 if (! PyString_CheckExact(op)) {
861 int ret;
862 /* A str subclass may have its own __str__ method. */
863 op = (PyStringObject *) PyObject_Str((PyObject *)op);
864 if (op == NULL)
865 return -1;
866 ret = string_print(op, fp, flags);
867 Py_DECREF(op);
868 return ret;
870 if (flags & Py_PRINT_RAW) {
871 char *data = op->ob_sval;
872 Py_ssize_t size = Py_SIZE(op);
873 Py_BEGIN_ALLOW_THREADS
874 while (size > INT_MAX) {
875 /* Very long strings cannot be written atomically.
876 * But don't write exactly INT_MAX bytes at a time
877 * to avoid memory aligment issues.
879 const int chunk_size = INT_MAX & ~0x3FFF;
880 fwrite(data, 1, chunk_size, fp);
881 data += chunk_size;
882 size -= chunk_size;
884 #ifdef __VMS
885 if (size) fwrite(data, (int)size, 1, fp);
886 #else
887 fwrite(data, 1, (int)size, fp);
888 #endif
889 Py_END_ALLOW_THREADS
890 return 0;
893 /* figure out which quote to use; single is preferred */
894 quote = '\'';
895 if (memchr(op->ob_sval, '\'', Py_SIZE(op)) &&
896 !memchr(op->ob_sval, '"', Py_SIZE(op)))
897 quote = '"';
899 str_len = Py_SIZE(op);
900 Py_BEGIN_ALLOW_THREADS
901 fputc(quote, fp);
902 for (i = 0; i < str_len; i++) {
903 /* Since strings are immutable and the caller should have a
904 reference, accessing the interal buffer should not be an issue
905 with the GIL released. */
906 c = op->ob_sval[i];
907 if (c == quote || c == '\\')
908 fprintf(fp, "\\%c", c);
909 else if (c == '\t')
910 fprintf(fp, "\\t");
911 else if (c == '\n')
912 fprintf(fp, "\\n");
913 else if (c == '\r')
914 fprintf(fp, "\\r");
915 else if (c < ' ' || c >= 0x7f)
916 fprintf(fp, "\\x%02x", c & 0xff);
917 else
918 fputc(c, fp);
920 fputc(quote, fp);
921 Py_END_ALLOW_THREADS
922 return 0;
925 PyObject *
926 PyString_Repr(PyObject *obj, int smartquotes)
928 register PyStringObject* op = (PyStringObject*) obj;
929 size_t newsize = 2 + 4 * Py_SIZE(op);
930 PyObject *v;
931 if (newsize > PY_SSIZE_T_MAX || newsize / 4 != Py_SIZE(op)) {
932 PyErr_SetString(PyExc_OverflowError,
933 "string is too large to make repr");
934 return NULL;
936 v = PyString_FromStringAndSize((char *)NULL, newsize);
937 if (v == NULL) {
938 return NULL;
940 else {
941 register Py_ssize_t i;
942 register char c;
943 register char *p;
944 int quote;
946 /* figure out which quote to use; single is preferred */
947 quote = '\'';
948 if (smartquotes &&
949 memchr(op->ob_sval, '\'', Py_SIZE(op)) &&
950 !memchr(op->ob_sval, '"', Py_SIZE(op)))
951 quote = '"';
953 p = PyString_AS_STRING(v);
954 *p++ = quote;
955 for (i = 0; i < Py_SIZE(op); i++) {
956 /* There's at least enough room for a hex escape
957 and a closing quote. */
958 assert(newsize - (p - PyString_AS_STRING(v)) >= 5);
959 c = op->ob_sval[i];
960 if (c == quote || c == '\\')
961 *p++ = '\\', *p++ = c;
962 else if (c == '\t')
963 *p++ = '\\', *p++ = 't';
964 else if (c == '\n')
965 *p++ = '\\', *p++ = 'n';
966 else if (c == '\r')
967 *p++ = '\\', *p++ = 'r';
968 else if (c < ' ' || c >= 0x7f) {
969 /* For performance, we don't want to call
970 PyOS_snprintf here (extra layers of
971 function call). */
972 sprintf(p, "\\x%02x", c & 0xff);
973 p += 4;
975 else
976 *p++ = c;
978 assert(newsize - (p - PyString_AS_STRING(v)) >= 1);
979 *p++ = quote;
980 *p = '\0';
981 if (_PyString_Resize(&v, (p - PyString_AS_STRING(v))))
982 return NULL;
983 return v;
987 static PyObject *
988 string_repr(PyObject *op)
990 return PyString_Repr(op, 1);
993 static PyObject *
994 string_str(PyObject *s)
996 assert(PyString_Check(s));
997 if (PyString_CheckExact(s)) {
998 Py_INCREF(s);
999 return s;
1001 else {
1002 /* Subtype -- return genuine string with the same value. */
1003 PyStringObject *t = (PyStringObject *) s;
1004 return PyString_FromStringAndSize(t->ob_sval, Py_SIZE(t));
1008 static Py_ssize_t
1009 string_length(PyStringObject *a)
1011 return Py_SIZE(a);
1014 static PyObject *
1015 string_concat(register PyStringObject *a, register PyObject *bb)
1017 register Py_ssize_t size;
1018 register PyStringObject *op;
1019 if (!PyString_Check(bb)) {
1020 #ifdef Py_USING_UNICODE
1021 if (PyUnicode_Check(bb))
1022 return PyUnicode_Concat((PyObject *)a, bb);
1023 #endif
1024 if (PyByteArray_Check(bb))
1025 return PyByteArray_Concat((PyObject *)a, bb);
1026 PyErr_Format(PyExc_TypeError,
1027 "cannot concatenate 'str' and '%.200s' objects",
1028 Py_TYPE(bb)->tp_name);
1029 return NULL;
1031 #define b ((PyStringObject *)bb)
1032 /* Optimize cases with empty left or right operand */
1033 if ((Py_SIZE(a) == 0 || Py_SIZE(b) == 0) &&
1034 PyString_CheckExact(a) && PyString_CheckExact(b)) {
1035 if (Py_SIZE(a) == 0) {
1036 Py_INCREF(bb);
1037 return bb;
1039 Py_INCREF(a);
1040 return (PyObject *)a;
1042 size = Py_SIZE(a) + Py_SIZE(b);
1043 /* Check that string sizes are not negative, to prevent an
1044 overflow in cases where we are passed incorrectly-created
1045 strings with negative lengths (due to a bug in other code).
1047 if (Py_SIZE(a) < 0 || Py_SIZE(b) < 0 ||
1048 Py_SIZE(a) > PY_SSIZE_T_MAX - Py_SIZE(b)) {
1049 PyErr_SetString(PyExc_OverflowError,
1050 "strings are too large to concat");
1051 return NULL;
1054 /* Inline PyObject_NewVar */
1055 if (size > PY_SSIZE_T_MAX - PyStringObject_SIZE) {
1056 PyErr_SetString(PyExc_OverflowError,
1057 "strings are too large to concat");
1058 return NULL;
1060 op = (PyStringObject *)PyObject_MALLOC(PyStringObject_SIZE + size);
1061 if (op == NULL)
1062 return PyErr_NoMemory();
1063 PyObject_INIT_VAR(op, &PyString_Type, size);
1064 op->ob_shash = -1;
1065 op->ob_sstate = SSTATE_NOT_INTERNED;
1066 Py_MEMCPY(op->ob_sval, a->ob_sval, Py_SIZE(a));
1067 Py_MEMCPY(op->ob_sval + Py_SIZE(a), b->ob_sval, Py_SIZE(b));
1068 op->ob_sval[size] = '\0';
1069 return (PyObject *) op;
1070 #undef b
1073 static PyObject *
1074 string_repeat(register PyStringObject *a, register Py_ssize_t n)
1076 register Py_ssize_t i;
1077 register Py_ssize_t j;
1078 register Py_ssize_t size;
1079 register PyStringObject *op;
1080 size_t nbytes;
1081 if (n < 0)
1082 n = 0;
1083 /* watch out for overflows: the size can overflow int,
1084 * and the # of bytes needed can overflow size_t
1086 size = Py_SIZE(a) * n;
1087 if (n && size / n != Py_SIZE(a)) {
1088 PyErr_SetString(PyExc_OverflowError,
1089 "repeated string is too long");
1090 return NULL;
1092 if (size == Py_SIZE(a) && PyString_CheckExact(a)) {
1093 Py_INCREF(a);
1094 return (PyObject *)a;
1096 nbytes = (size_t)size;
1097 if (nbytes + PyStringObject_SIZE <= nbytes) {
1098 PyErr_SetString(PyExc_OverflowError,
1099 "repeated string is too long");
1100 return NULL;
1102 op = (PyStringObject *)PyObject_MALLOC(PyStringObject_SIZE + nbytes);
1103 if (op == NULL)
1104 return PyErr_NoMemory();
1105 PyObject_INIT_VAR(op, &PyString_Type, size);
1106 op->ob_shash = -1;
1107 op->ob_sstate = SSTATE_NOT_INTERNED;
1108 op->ob_sval[size] = '\0';
1109 if (Py_SIZE(a) == 1 && n > 0) {
1110 memset(op->ob_sval, a->ob_sval[0] , n);
1111 return (PyObject *) op;
1113 i = 0;
1114 if (i < size) {
1115 Py_MEMCPY(op->ob_sval, a->ob_sval, Py_SIZE(a));
1116 i = Py_SIZE(a);
1118 while (i < size) {
1119 j = (i <= size-i) ? i : size-i;
1120 Py_MEMCPY(op->ob_sval+i, op->ob_sval, j);
1121 i += j;
1123 return (PyObject *) op;
1126 /* String slice a[i:j] consists of characters a[i] ... a[j-1] */
1128 static PyObject *
1129 string_slice(register PyStringObject *a, register Py_ssize_t i,
1130 register Py_ssize_t j)
1131 /* j -- may be negative! */
1133 if (i < 0)
1134 i = 0;
1135 if (j < 0)
1136 j = 0; /* Avoid signed/unsigned bug in next line */
1137 if (j > Py_SIZE(a))
1138 j = Py_SIZE(a);
1139 if (i == 0 && j == Py_SIZE(a) && PyString_CheckExact(a)) {
1140 /* It's the same as a */
1141 Py_INCREF(a);
1142 return (PyObject *)a;
1144 if (j < i)
1145 j = i;
1146 return PyString_FromStringAndSize(a->ob_sval + i, j-i);
1149 static int
1150 string_contains(PyObject *str_obj, PyObject *sub_obj)
1152 if (!PyString_CheckExact(sub_obj)) {
1153 #ifdef Py_USING_UNICODE
1154 if (PyUnicode_Check(sub_obj))
1155 return PyUnicode_Contains(str_obj, sub_obj);
1156 #endif
1157 if (!PyString_Check(sub_obj)) {
1158 PyErr_Format(PyExc_TypeError,
1159 "'in <string>' requires string as left operand, "
1160 "not %.200s", Py_TYPE(sub_obj)->tp_name);
1161 return -1;
1165 return stringlib_contains_obj(str_obj, sub_obj);
1168 static PyObject *
1169 string_item(PyStringObject *a, register Py_ssize_t i)
1171 char pchar;
1172 PyObject *v;
1173 if (i < 0 || i >= Py_SIZE(a)) {
1174 PyErr_SetString(PyExc_IndexError, "string index out of range");
1175 return NULL;
1177 pchar = a->ob_sval[i];
1178 v = (PyObject *)characters[pchar & UCHAR_MAX];
1179 if (v == NULL)
1180 v = PyString_FromStringAndSize(&pchar, 1);
1181 else {
1182 #ifdef COUNT_ALLOCS
1183 one_strings++;
1184 #endif
1185 Py_INCREF(v);
1187 return v;
1190 static PyObject*
1191 string_richcompare(PyStringObject *a, PyStringObject *b, int op)
1193 int c;
1194 Py_ssize_t len_a, len_b;
1195 Py_ssize_t min_len;
1196 PyObject *result;
1198 /* Make sure both arguments are strings. */
1199 if (!(PyString_Check(a) && PyString_Check(b))) {
1200 result = Py_NotImplemented;
1201 goto out;
1203 if (a == b) {
1204 switch (op) {
1205 case Py_EQ:case Py_LE:case Py_GE:
1206 result = Py_True;
1207 goto out;
1208 case Py_NE:case Py_LT:case Py_GT:
1209 result = Py_False;
1210 goto out;
1213 if (op == Py_EQ) {
1214 /* Supporting Py_NE here as well does not save
1215 much time, since Py_NE is rarely used. */
1216 if (Py_SIZE(a) == Py_SIZE(b)
1217 && (a->ob_sval[0] == b->ob_sval[0]
1218 && memcmp(a->ob_sval, b->ob_sval, Py_SIZE(a)) == 0)) {
1219 result = Py_True;
1220 } else {
1221 result = Py_False;
1223 goto out;
1225 len_a = Py_SIZE(a); len_b = Py_SIZE(b);
1226 min_len = (len_a < len_b) ? len_a : len_b;
1227 if (min_len > 0) {
1228 c = Py_CHARMASK(*a->ob_sval) - Py_CHARMASK(*b->ob_sval);
1229 if (c==0)
1230 c = memcmp(a->ob_sval, b->ob_sval, min_len);
1231 } else
1232 c = 0;
1233 if (c == 0)
1234 c = (len_a < len_b) ? -1 : (len_a > len_b) ? 1 : 0;
1235 switch (op) {
1236 case Py_LT: c = c < 0; break;
1237 case Py_LE: c = c <= 0; break;
1238 case Py_EQ: assert(0); break; /* unreachable */
1239 case Py_NE: c = c != 0; break;
1240 case Py_GT: c = c > 0; break;
1241 case Py_GE: c = c >= 0; break;
1242 default:
1243 result = Py_NotImplemented;
1244 goto out;
1246 result = c ? Py_True : Py_False;
1247 out:
1248 Py_INCREF(result);
1249 return result;
1253 _PyString_Eq(PyObject *o1, PyObject *o2)
1255 PyStringObject *a = (PyStringObject*) o1;
1256 PyStringObject *b = (PyStringObject*) o2;
1257 return Py_SIZE(a) == Py_SIZE(b)
1258 && *a->ob_sval == *b->ob_sval
1259 && memcmp(a->ob_sval, b->ob_sval, Py_SIZE(a)) == 0;
1262 static long
1263 string_hash(PyStringObject *a)
1265 register Py_ssize_t len;
1266 register unsigned char *p;
1267 register long x;
1269 if (a->ob_shash != -1)
1270 return a->ob_shash;
1271 len = Py_SIZE(a);
1272 p = (unsigned char *) a->ob_sval;
1273 x = *p << 7;
1274 while (--len >= 0)
1275 x = (1000003*x) ^ *p++;
1276 x ^= Py_SIZE(a);
1277 if (x == -1)
1278 x = -2;
1279 a->ob_shash = x;
1280 return x;
1283 static PyObject*
1284 string_subscript(PyStringObject* self, PyObject* item)
1286 if (PyIndex_Check(item)) {
1287 Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
1288 if (i == -1 && PyErr_Occurred())
1289 return NULL;
1290 if (i < 0)
1291 i += PyString_GET_SIZE(self);
1292 return string_item(self, i);
1294 else if (PySlice_Check(item)) {
1295 Py_ssize_t start, stop, step, slicelength, cur, i;
1296 char* source_buf;
1297 char* result_buf;
1298 PyObject* result;
1300 if (PySlice_GetIndicesEx((PySliceObject*)item,
1301 PyString_GET_SIZE(self),
1302 &start, &stop, &step, &slicelength) < 0) {
1303 return NULL;
1306 if (slicelength <= 0) {
1307 return PyString_FromStringAndSize("", 0);
1309 else if (start == 0 && step == 1 &&
1310 slicelength == PyString_GET_SIZE(self) &&
1311 PyString_CheckExact(self)) {
1312 Py_INCREF(self);
1313 return (PyObject *)self;
1315 else if (step == 1) {
1316 return PyString_FromStringAndSize(
1317 PyString_AS_STRING(self) + start,
1318 slicelength);
1320 else {
1321 source_buf = PyString_AsString((PyObject*)self);
1322 result_buf = (char *)PyMem_Malloc(slicelength);
1323 if (result_buf == NULL)
1324 return PyErr_NoMemory();
1326 for (cur = start, i = 0; i < slicelength;
1327 cur += step, i++) {
1328 result_buf[i] = source_buf[cur];
1331 result = PyString_FromStringAndSize(result_buf,
1332 slicelength);
1333 PyMem_Free(result_buf);
1334 return result;
1337 else {
1338 PyErr_Format(PyExc_TypeError,
1339 "string indices must be integers, not %.200s",
1340 Py_TYPE(item)->tp_name);
1341 return NULL;
1345 static Py_ssize_t
1346 string_buffer_getreadbuf(PyStringObject *self, Py_ssize_t index, const void **ptr)
1348 if ( index != 0 ) {
1349 PyErr_SetString(PyExc_SystemError,
1350 "accessing non-existent string segment");
1351 return -1;
1353 *ptr = (void *)self->ob_sval;
1354 return Py_SIZE(self);
1357 static Py_ssize_t
1358 string_buffer_getwritebuf(PyStringObject *self, Py_ssize_t index, const void **ptr)
1360 PyErr_SetString(PyExc_TypeError,
1361 "Cannot use string as modifiable buffer");
1362 return -1;
1365 static Py_ssize_t
1366 string_buffer_getsegcount(PyStringObject *self, Py_ssize_t *lenp)
1368 if ( lenp )
1369 *lenp = Py_SIZE(self);
1370 return 1;
1373 static Py_ssize_t
1374 string_buffer_getcharbuf(PyStringObject *self, Py_ssize_t index, const char **ptr)
1376 if ( index != 0 ) {
1377 PyErr_SetString(PyExc_SystemError,
1378 "accessing non-existent string segment");
1379 return -1;
1381 *ptr = self->ob_sval;
1382 return Py_SIZE(self);
1385 static int
1386 string_buffer_getbuffer(PyStringObject *self, Py_buffer *view, int flags)
1388 return PyBuffer_FillInfo(view, (PyObject*)self,
1389 (void *)self->ob_sval, Py_SIZE(self),
1390 1, flags);
1393 static PySequenceMethods string_as_sequence = {
1394 (lenfunc)string_length, /*sq_length*/
1395 (binaryfunc)string_concat, /*sq_concat*/
1396 (ssizeargfunc)string_repeat, /*sq_repeat*/
1397 (ssizeargfunc)string_item, /*sq_item*/
1398 (ssizessizeargfunc)string_slice, /*sq_slice*/
1399 0, /*sq_ass_item*/
1400 0, /*sq_ass_slice*/
1401 (objobjproc)string_contains /*sq_contains*/
1404 static PyMappingMethods string_as_mapping = {
1405 (lenfunc)string_length,
1406 (binaryfunc)string_subscript,
1410 static PyBufferProcs string_as_buffer = {
1411 (readbufferproc)string_buffer_getreadbuf,
1412 (writebufferproc)string_buffer_getwritebuf,
1413 (segcountproc)string_buffer_getsegcount,
1414 (charbufferproc)string_buffer_getcharbuf,
1415 (getbufferproc)string_buffer_getbuffer,
1416 0, /* XXX */
1421 #define LEFTSTRIP 0
1422 #define RIGHTSTRIP 1
1423 #define BOTHSTRIP 2
1425 /* Arrays indexed by above */
1426 static const char *stripformat[] = {"|O:lstrip", "|O:rstrip", "|O:strip"};
1428 #define STRIPNAME(i) (stripformat[i]+3)
1430 PyDoc_STRVAR(split__doc__,
1431 "S.split([sep [,maxsplit]]) -> list of strings\n\
1433 Return a list of the words in the string S, using sep as the\n\
1434 delimiter string. If maxsplit is given, at most maxsplit\n\
1435 splits are done. If sep is not specified or is None, any\n\
1436 whitespace string is a separator and empty strings are removed\n\
1437 from the result.");
1439 static PyObject *
1440 string_split(PyStringObject *self, PyObject *args)
1442 Py_ssize_t len = PyString_GET_SIZE(self), n;
1443 Py_ssize_t maxsplit = -1;
1444 const char *s = PyString_AS_STRING(self), *sub;
1445 PyObject *subobj = Py_None;
1447 if (!PyArg_ParseTuple(args, "|On:split", &subobj, &maxsplit))
1448 return NULL;
1449 if (maxsplit < 0)
1450 maxsplit = PY_SSIZE_T_MAX;
1451 if (subobj == Py_None)
1452 return stringlib_split_whitespace((PyObject*) self, s, len, maxsplit);
1453 if (PyString_Check(subobj)) {
1454 sub = PyString_AS_STRING(subobj);
1455 n = PyString_GET_SIZE(subobj);
1457 #ifdef Py_USING_UNICODE
1458 else if (PyUnicode_Check(subobj))
1459 return PyUnicode_Split((PyObject *)self, subobj, maxsplit);
1460 #endif
1461 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
1462 return NULL;
1464 return stringlib_split((PyObject*) self, s, len, sub, n, maxsplit);
1467 PyDoc_STRVAR(partition__doc__,
1468 "S.partition(sep) -> (head, sep, tail)\n\
1470 Search for the separator sep in S, and return the part before it,\n\
1471 the separator itself, and the part after it. If the separator is not\n\
1472 found, return S and two empty strings.");
1474 static PyObject *
1475 string_partition(PyStringObject *self, PyObject *sep_obj)
1477 const char *sep;
1478 Py_ssize_t sep_len;
1480 if (PyString_Check(sep_obj)) {
1481 sep = PyString_AS_STRING(sep_obj);
1482 sep_len = PyString_GET_SIZE(sep_obj);
1484 #ifdef Py_USING_UNICODE
1485 else if (PyUnicode_Check(sep_obj))
1486 return PyUnicode_Partition((PyObject *) self, sep_obj);
1487 #endif
1488 else if (PyObject_AsCharBuffer(sep_obj, &sep, &sep_len))
1489 return NULL;
1491 return stringlib_partition(
1492 (PyObject*) self,
1493 PyString_AS_STRING(self), PyString_GET_SIZE(self),
1494 sep_obj, sep, sep_len
1498 PyDoc_STRVAR(rpartition__doc__,
1499 "S.rpartition(sep) -> (head, sep, tail)\n\
1501 Search for the separator sep in S, starting at the end of S, and return\n\
1502 the part before it, the separator itself, and the part after it. If the\n\
1503 separator is not found, return two empty strings and S.");
1505 static PyObject *
1506 string_rpartition(PyStringObject *self, PyObject *sep_obj)
1508 const char *sep;
1509 Py_ssize_t sep_len;
1511 if (PyString_Check(sep_obj)) {
1512 sep = PyString_AS_STRING(sep_obj);
1513 sep_len = PyString_GET_SIZE(sep_obj);
1515 #ifdef Py_USING_UNICODE
1516 else if (PyUnicode_Check(sep_obj))
1517 return PyUnicode_RPartition((PyObject *) self, sep_obj);
1518 #endif
1519 else if (PyObject_AsCharBuffer(sep_obj, &sep, &sep_len))
1520 return NULL;
1522 return stringlib_rpartition(
1523 (PyObject*) self,
1524 PyString_AS_STRING(self), PyString_GET_SIZE(self),
1525 sep_obj, sep, sep_len
1529 PyDoc_STRVAR(rsplit__doc__,
1530 "S.rsplit([sep [,maxsplit]]) -> list of strings\n\
1532 Return a list of the words in the string S, using sep as the\n\
1533 delimiter string, starting at the end of the string and working\n\
1534 to the front. If maxsplit is given, at most maxsplit splits are\n\
1535 done. If sep is not specified or is None, any whitespace string\n\
1536 is a separator.");
1538 static PyObject *
1539 string_rsplit(PyStringObject *self, PyObject *args)
1541 Py_ssize_t len = PyString_GET_SIZE(self), n;
1542 Py_ssize_t maxsplit = -1;
1543 const char *s = PyString_AS_STRING(self), *sub;
1544 PyObject *subobj = Py_None;
1546 if (!PyArg_ParseTuple(args, "|On:rsplit", &subobj, &maxsplit))
1547 return NULL;
1548 if (maxsplit < 0)
1549 maxsplit = PY_SSIZE_T_MAX;
1550 if (subobj == Py_None)
1551 return stringlib_rsplit_whitespace((PyObject*) self, s, len, maxsplit);
1552 if (PyString_Check(subobj)) {
1553 sub = PyString_AS_STRING(subobj);
1554 n = PyString_GET_SIZE(subobj);
1556 #ifdef Py_USING_UNICODE
1557 else if (PyUnicode_Check(subobj))
1558 return PyUnicode_RSplit((PyObject *)self, subobj, maxsplit);
1559 #endif
1560 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
1561 return NULL;
1563 return stringlib_rsplit((PyObject*) self, s, len, sub, n, maxsplit);
1567 PyDoc_STRVAR(join__doc__,
1568 "S.join(iterable) -> string\n\
1570 Return a string which is the concatenation of the strings in the\n\
1571 iterable. The separator between elements is S.");
1573 static PyObject *
1574 string_join(PyStringObject *self, PyObject *orig)
1576 char *sep = PyString_AS_STRING(self);
1577 const Py_ssize_t seplen = PyString_GET_SIZE(self);
1578 PyObject *res = NULL;
1579 char *p;
1580 Py_ssize_t seqlen = 0;
1581 size_t sz = 0;
1582 Py_ssize_t i;
1583 PyObject *seq, *item;
1585 seq = PySequence_Fast(orig, "");
1586 if (seq == NULL) {
1587 return NULL;
1590 seqlen = PySequence_Size(seq);
1591 if (seqlen == 0) {
1592 Py_DECREF(seq);
1593 return PyString_FromString("");
1595 if (seqlen == 1) {
1596 item = PySequence_Fast_GET_ITEM(seq, 0);
1597 if (PyString_CheckExact(item) || PyUnicode_CheckExact(item)) {
1598 Py_INCREF(item);
1599 Py_DECREF(seq);
1600 return item;
1604 /* There are at least two things to join, or else we have a subclass
1605 * of the builtin types in the sequence.
1606 * Do a pre-pass to figure out the total amount of space we'll
1607 * need (sz), see whether any argument is absurd, and defer to
1608 * the Unicode join if appropriate.
1610 for (i = 0; i < seqlen; i++) {
1611 const size_t old_sz = sz;
1612 item = PySequence_Fast_GET_ITEM(seq, i);
1613 if (!PyString_Check(item)){
1614 #ifdef Py_USING_UNICODE
1615 if (PyUnicode_Check(item)) {
1616 /* Defer to Unicode join.
1617 * CAUTION: There's no gurantee that the
1618 * original sequence can be iterated over
1619 * again, so we must pass seq here.
1621 PyObject *result;
1622 result = PyUnicode_Join((PyObject *)self, seq);
1623 Py_DECREF(seq);
1624 return result;
1626 #endif
1627 PyErr_Format(PyExc_TypeError,
1628 "sequence item %zd: expected string,"
1629 " %.80s found",
1630 i, Py_TYPE(item)->tp_name);
1631 Py_DECREF(seq);
1632 return NULL;
1634 sz += PyString_GET_SIZE(item);
1635 if (i != 0)
1636 sz += seplen;
1637 if (sz < old_sz || sz > PY_SSIZE_T_MAX) {
1638 PyErr_SetString(PyExc_OverflowError,
1639 "join() result is too long for a Python string");
1640 Py_DECREF(seq);
1641 return NULL;
1645 /* Allocate result space. */
1646 res = PyString_FromStringAndSize((char*)NULL, sz);
1647 if (res == NULL) {
1648 Py_DECREF(seq);
1649 return NULL;
1652 /* Catenate everything. */
1653 p = PyString_AS_STRING(res);
1654 for (i = 0; i < seqlen; ++i) {
1655 size_t n;
1656 item = PySequence_Fast_GET_ITEM(seq, i);
1657 n = PyString_GET_SIZE(item);
1658 Py_MEMCPY(p, PyString_AS_STRING(item), n);
1659 p += n;
1660 if (i < seqlen - 1) {
1661 Py_MEMCPY(p, sep, seplen);
1662 p += seplen;
1666 Py_DECREF(seq);
1667 return res;
1670 PyObject *
1671 _PyString_Join(PyObject *sep, PyObject *x)
1673 assert(sep != NULL && PyString_Check(sep));
1674 assert(x != NULL);
1675 return string_join((PyStringObject *)sep, x);
1678 /* helper macro to fixup start/end slice values */
1679 #define ADJUST_INDICES(start, end, len) \
1680 if (end > len) \
1681 end = len; \
1682 else if (end < 0) { \
1683 end += len; \
1684 if (end < 0) \
1685 end = 0; \
1687 if (start < 0) { \
1688 start += len; \
1689 if (start < 0) \
1690 start = 0; \
1693 Py_LOCAL_INLINE(Py_ssize_t)
1694 string_find_internal(PyStringObject *self, PyObject *args, int dir)
1696 PyObject *subobj;
1697 const char *sub;
1698 Py_ssize_t sub_len;
1699 Py_ssize_t start=0, end=PY_SSIZE_T_MAX;
1700 PyObject *obj_start=Py_None, *obj_end=Py_None;
1702 if (!PyArg_ParseTuple(args, "O|OO:find/rfind/index/rindex", &subobj,
1703 &obj_start, &obj_end))
1704 return -2;
1705 /* To support None in "start" and "end" arguments, meaning
1706 the same as if they were not passed.
1708 if (obj_start != Py_None)
1709 if (!_PyEval_SliceIndex(obj_start, &start))
1710 return -2;
1711 if (obj_end != Py_None)
1712 if (!_PyEval_SliceIndex(obj_end, &end))
1713 return -2;
1715 if (PyString_Check(subobj)) {
1716 sub = PyString_AS_STRING(subobj);
1717 sub_len = PyString_GET_SIZE(subobj);
1719 #ifdef Py_USING_UNICODE
1720 else if (PyUnicode_Check(subobj))
1721 return PyUnicode_Find(
1722 (PyObject *)self, subobj, start, end, dir);
1723 #endif
1724 else if (PyObject_AsCharBuffer(subobj, &sub, &sub_len))
1725 /* XXX - the "expected a character buffer object" is pretty
1726 confusing for a non-expert. remap to something else ? */
1727 return -2;
1729 if (dir > 0)
1730 return stringlib_find_slice(
1731 PyString_AS_STRING(self), PyString_GET_SIZE(self),
1732 sub, sub_len, start, end);
1733 else
1734 return stringlib_rfind_slice(
1735 PyString_AS_STRING(self), PyString_GET_SIZE(self),
1736 sub, sub_len, start, end);
1740 PyDoc_STRVAR(find__doc__,
1741 "S.find(sub [,start [,end]]) -> int\n\
1743 Return the lowest index in S where substring sub is found,\n\
1744 such that sub is contained within s[start:end]. Optional\n\
1745 arguments start and end are interpreted as in slice notation.\n\
1747 Return -1 on failure.");
1749 static PyObject *
1750 string_find(PyStringObject *self, PyObject *args)
1752 Py_ssize_t result = string_find_internal(self, args, +1);
1753 if (result == -2)
1754 return NULL;
1755 return PyInt_FromSsize_t(result);
1759 PyDoc_STRVAR(index__doc__,
1760 "S.index(sub [,start [,end]]) -> int\n\
1762 Like S.find() but raise ValueError when the substring is not found.");
1764 static PyObject *
1765 string_index(PyStringObject *self, PyObject *args)
1767 Py_ssize_t result = string_find_internal(self, args, +1);
1768 if (result == -2)
1769 return NULL;
1770 if (result == -1) {
1771 PyErr_SetString(PyExc_ValueError,
1772 "substring not found");
1773 return NULL;
1775 return PyInt_FromSsize_t(result);
1779 PyDoc_STRVAR(rfind__doc__,
1780 "S.rfind(sub [,start [,end]]) -> int\n\
1782 Return the highest index in S where substring sub is found,\n\
1783 such that sub is contained within s[start:end]. Optional\n\
1784 arguments start and end are interpreted as in slice notation.\n\
1786 Return -1 on failure.");
1788 static PyObject *
1789 string_rfind(PyStringObject *self, PyObject *args)
1791 Py_ssize_t result = string_find_internal(self, args, -1);
1792 if (result == -2)
1793 return NULL;
1794 return PyInt_FromSsize_t(result);
1798 PyDoc_STRVAR(rindex__doc__,
1799 "S.rindex(sub [,start [,end]]) -> int\n\
1801 Like S.rfind() but raise ValueError when the substring is not found.");
1803 static PyObject *
1804 string_rindex(PyStringObject *self, PyObject *args)
1806 Py_ssize_t result = string_find_internal(self, args, -1);
1807 if (result == -2)
1808 return NULL;
1809 if (result == -1) {
1810 PyErr_SetString(PyExc_ValueError,
1811 "substring not found");
1812 return NULL;
1814 return PyInt_FromSsize_t(result);
1818 Py_LOCAL_INLINE(PyObject *)
1819 do_xstrip(PyStringObject *self, int striptype, PyObject *sepobj)
1821 char *s = PyString_AS_STRING(self);
1822 Py_ssize_t len = PyString_GET_SIZE(self);
1823 char *sep = PyString_AS_STRING(sepobj);
1824 Py_ssize_t seplen = PyString_GET_SIZE(sepobj);
1825 Py_ssize_t i, j;
1827 i = 0;
1828 if (striptype != RIGHTSTRIP) {
1829 while (i < len && memchr(sep, Py_CHARMASK(s[i]), seplen)) {
1830 i++;
1834 j = len;
1835 if (striptype != LEFTSTRIP) {
1836 do {
1837 j--;
1838 } while (j >= i && memchr(sep, Py_CHARMASK(s[j]), seplen));
1839 j++;
1842 if (i == 0 && j == len && PyString_CheckExact(self)) {
1843 Py_INCREF(self);
1844 return (PyObject*)self;
1846 else
1847 return PyString_FromStringAndSize(s+i, j-i);
1851 Py_LOCAL_INLINE(PyObject *)
1852 do_strip(PyStringObject *self, int striptype)
1854 char *s = PyString_AS_STRING(self);
1855 Py_ssize_t len = PyString_GET_SIZE(self), i, j;
1857 i = 0;
1858 if (striptype != RIGHTSTRIP) {
1859 while (i < len && isspace(Py_CHARMASK(s[i]))) {
1860 i++;
1864 j = len;
1865 if (striptype != LEFTSTRIP) {
1866 do {
1867 j--;
1868 } while (j >= i && isspace(Py_CHARMASK(s[j])));
1869 j++;
1872 if (i == 0 && j == len && PyString_CheckExact(self)) {
1873 Py_INCREF(self);
1874 return (PyObject*)self;
1876 else
1877 return PyString_FromStringAndSize(s+i, j-i);
1881 Py_LOCAL_INLINE(PyObject *)
1882 do_argstrip(PyStringObject *self, int striptype, PyObject *args)
1884 PyObject *sep = NULL;
1886 if (!PyArg_ParseTuple(args, (char *)stripformat[striptype], &sep))
1887 return NULL;
1889 if (sep != NULL && sep != Py_None) {
1890 if (PyString_Check(sep))
1891 return do_xstrip(self, striptype, sep);
1892 #ifdef Py_USING_UNICODE
1893 else if (PyUnicode_Check(sep)) {
1894 PyObject *uniself = PyUnicode_FromObject((PyObject *)self);
1895 PyObject *res;
1896 if (uniself==NULL)
1897 return NULL;
1898 res = _PyUnicode_XStrip((PyUnicodeObject *)uniself,
1899 striptype, sep);
1900 Py_DECREF(uniself);
1901 return res;
1903 #endif
1904 PyErr_Format(PyExc_TypeError,
1905 #ifdef Py_USING_UNICODE
1906 "%s arg must be None, str or unicode",
1907 #else
1908 "%s arg must be None or str",
1909 #endif
1910 STRIPNAME(striptype));
1911 return NULL;
1914 return do_strip(self, striptype);
1918 PyDoc_STRVAR(strip__doc__,
1919 "S.strip([chars]) -> string or unicode\n\
1921 Return a copy of the string S with leading and trailing\n\
1922 whitespace removed.\n\
1923 If chars is given and not None, remove characters in chars instead.\n\
1924 If chars is unicode, S will be converted to unicode before stripping");
1926 static PyObject *
1927 string_strip(PyStringObject *self, PyObject *args)
1929 if (PyTuple_GET_SIZE(args) == 0)
1930 return do_strip(self, BOTHSTRIP); /* Common case */
1931 else
1932 return do_argstrip(self, BOTHSTRIP, args);
1936 PyDoc_STRVAR(lstrip__doc__,
1937 "S.lstrip([chars]) -> string or unicode\n\
1939 Return a copy of the string S with leading whitespace removed.\n\
1940 If chars is given and not None, remove characters in chars instead.\n\
1941 If chars is unicode, S will be converted to unicode before stripping");
1943 static PyObject *
1944 string_lstrip(PyStringObject *self, PyObject *args)
1946 if (PyTuple_GET_SIZE(args) == 0)
1947 return do_strip(self, LEFTSTRIP); /* Common case */
1948 else
1949 return do_argstrip(self, LEFTSTRIP, args);
1953 PyDoc_STRVAR(rstrip__doc__,
1954 "S.rstrip([chars]) -> string or unicode\n\
1956 Return a copy of the string S with trailing whitespace removed.\n\
1957 If chars is given and not None, remove characters in chars instead.\n\
1958 If chars is unicode, S will be converted to unicode before stripping");
1960 static PyObject *
1961 string_rstrip(PyStringObject *self, PyObject *args)
1963 if (PyTuple_GET_SIZE(args) == 0)
1964 return do_strip(self, RIGHTSTRIP); /* Common case */
1965 else
1966 return do_argstrip(self, RIGHTSTRIP, args);
1970 PyDoc_STRVAR(lower__doc__,
1971 "S.lower() -> string\n\
1973 Return a copy of the string S converted to lowercase.");
1975 /* _tolower and _toupper are defined by SUSv2, but they're not ISO C */
1976 #ifndef _tolower
1977 #define _tolower tolower
1978 #endif
1980 static PyObject *
1981 string_lower(PyStringObject *self)
1983 char *s;
1984 Py_ssize_t i, n = PyString_GET_SIZE(self);
1985 PyObject *newobj;
1987 newobj = PyString_FromStringAndSize(NULL, n);
1988 if (!newobj)
1989 return NULL;
1991 s = PyString_AS_STRING(newobj);
1993 Py_MEMCPY(s, PyString_AS_STRING(self), n);
1995 for (i = 0; i < n; i++) {
1996 int c = Py_CHARMASK(s[i]);
1997 if (isupper(c))
1998 s[i] = _tolower(c);
2001 return newobj;
2004 PyDoc_STRVAR(upper__doc__,
2005 "S.upper() -> string\n\
2007 Return a copy of the string S converted to uppercase.");
2009 #ifndef _toupper
2010 #define _toupper toupper
2011 #endif
2013 static PyObject *
2014 string_upper(PyStringObject *self)
2016 char *s;
2017 Py_ssize_t i, n = PyString_GET_SIZE(self);
2018 PyObject *newobj;
2020 newobj = PyString_FromStringAndSize(NULL, n);
2021 if (!newobj)
2022 return NULL;
2024 s = PyString_AS_STRING(newobj);
2026 Py_MEMCPY(s, PyString_AS_STRING(self), n);
2028 for (i = 0; i < n; i++) {
2029 int c = Py_CHARMASK(s[i]);
2030 if (islower(c))
2031 s[i] = _toupper(c);
2034 return newobj;
2037 PyDoc_STRVAR(title__doc__,
2038 "S.title() -> string\n\
2040 Return a titlecased version of S, i.e. words start with uppercase\n\
2041 characters, all remaining cased characters have lowercase.");
2043 static PyObject*
2044 string_title(PyStringObject *self)
2046 char *s = PyString_AS_STRING(self), *s_new;
2047 Py_ssize_t i, n = PyString_GET_SIZE(self);
2048 int previous_is_cased = 0;
2049 PyObject *newobj;
2051 newobj = PyString_FromStringAndSize(NULL, n);
2052 if (newobj == NULL)
2053 return NULL;
2054 s_new = PyString_AsString(newobj);
2055 for (i = 0; i < n; i++) {
2056 int c = Py_CHARMASK(*s++);
2057 if (islower(c)) {
2058 if (!previous_is_cased)
2059 c = toupper(c);
2060 previous_is_cased = 1;
2061 } else if (isupper(c)) {
2062 if (previous_is_cased)
2063 c = tolower(c);
2064 previous_is_cased = 1;
2065 } else
2066 previous_is_cased = 0;
2067 *s_new++ = c;
2069 return newobj;
2072 PyDoc_STRVAR(capitalize__doc__,
2073 "S.capitalize() -> string\n\
2075 Return a copy of the string S with only its first character\n\
2076 capitalized.");
2078 static PyObject *
2079 string_capitalize(PyStringObject *self)
2081 char *s = PyString_AS_STRING(self), *s_new;
2082 Py_ssize_t i, n = PyString_GET_SIZE(self);
2083 PyObject *newobj;
2085 newobj = PyString_FromStringAndSize(NULL, n);
2086 if (newobj == NULL)
2087 return NULL;
2088 s_new = PyString_AsString(newobj);
2089 if (0 < n) {
2090 int c = Py_CHARMASK(*s++);
2091 if (islower(c))
2092 *s_new = toupper(c);
2093 else
2094 *s_new = c;
2095 s_new++;
2097 for (i = 1; i < n; i++) {
2098 int c = Py_CHARMASK(*s++);
2099 if (isupper(c))
2100 *s_new = tolower(c);
2101 else
2102 *s_new = c;
2103 s_new++;
2105 return newobj;
2109 PyDoc_STRVAR(count__doc__,
2110 "S.count(sub[, start[, end]]) -> int\n\
2112 Return the number of non-overlapping occurrences of substring sub in\n\
2113 string S[start:end]. Optional arguments start and end are interpreted\n\
2114 as in slice notation.");
2116 static PyObject *
2117 string_count(PyStringObject *self, PyObject *args)
2119 PyObject *sub_obj;
2120 const char *str = PyString_AS_STRING(self), *sub;
2121 Py_ssize_t sub_len;
2122 Py_ssize_t start = 0, end = PY_SSIZE_T_MAX;
2124 if (!PyArg_ParseTuple(args, "O|O&O&:count", &sub_obj,
2125 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
2126 return NULL;
2128 if (PyString_Check(sub_obj)) {
2129 sub = PyString_AS_STRING(sub_obj);
2130 sub_len = PyString_GET_SIZE(sub_obj);
2132 #ifdef Py_USING_UNICODE
2133 else if (PyUnicode_Check(sub_obj)) {
2134 Py_ssize_t count;
2135 count = PyUnicode_Count((PyObject *)self, sub_obj, start, end);
2136 if (count == -1)
2137 return NULL;
2138 else
2139 return PyInt_FromSsize_t(count);
2141 #endif
2142 else if (PyObject_AsCharBuffer(sub_obj, &sub, &sub_len))
2143 return NULL;
2145 ADJUST_INDICES(start, end, PyString_GET_SIZE(self));
2147 return PyInt_FromSsize_t(
2148 stringlib_count(str + start, end - start, sub, sub_len, PY_SSIZE_T_MAX)
2152 PyDoc_STRVAR(swapcase__doc__,
2153 "S.swapcase() -> string\n\
2155 Return a copy of the string S with uppercase characters\n\
2156 converted to lowercase and vice versa.");
2158 static PyObject *
2159 string_swapcase(PyStringObject *self)
2161 char *s = PyString_AS_STRING(self), *s_new;
2162 Py_ssize_t i, n = PyString_GET_SIZE(self);
2163 PyObject *newobj;
2165 newobj = PyString_FromStringAndSize(NULL, n);
2166 if (newobj == NULL)
2167 return NULL;
2168 s_new = PyString_AsString(newobj);
2169 for (i = 0; i < n; i++) {
2170 int c = Py_CHARMASK(*s++);
2171 if (islower(c)) {
2172 *s_new = toupper(c);
2174 else if (isupper(c)) {
2175 *s_new = tolower(c);
2177 else
2178 *s_new = c;
2179 s_new++;
2181 return newobj;
2185 PyDoc_STRVAR(translate__doc__,
2186 "S.translate(table [,deletechars]) -> string\n\
2188 Return a copy of the string S, where all characters occurring\n\
2189 in the optional argument deletechars are removed, and the\n\
2190 remaining characters have been mapped through the given\n\
2191 translation table, which must be a string of length 256.");
2193 static PyObject *
2194 string_translate(PyStringObject *self, PyObject *args)
2196 register char *input, *output;
2197 const char *table;
2198 register Py_ssize_t i, c, changed = 0;
2199 PyObject *input_obj = (PyObject*)self;
2200 const char *output_start, *del_table=NULL;
2201 Py_ssize_t inlen, tablen, dellen = 0;
2202 PyObject *result;
2203 int trans_table[256];
2204 PyObject *tableobj, *delobj = NULL;
2206 if (!PyArg_UnpackTuple(args, "translate", 1, 2,
2207 &tableobj, &delobj))
2208 return NULL;
2210 if (PyString_Check(tableobj)) {
2211 table = PyString_AS_STRING(tableobj);
2212 tablen = PyString_GET_SIZE(tableobj);
2214 else if (tableobj == Py_None) {
2215 table = NULL;
2216 tablen = 256;
2218 #ifdef Py_USING_UNICODE
2219 else if (PyUnicode_Check(tableobj)) {
2220 /* Unicode .translate() does not support the deletechars
2221 parameter; instead a mapping to None will cause characters
2222 to be deleted. */
2223 if (delobj != NULL) {
2224 PyErr_SetString(PyExc_TypeError,
2225 "deletions are implemented differently for unicode");
2226 return NULL;
2228 return PyUnicode_Translate((PyObject *)self, tableobj, NULL);
2230 #endif
2231 else if (PyObject_AsCharBuffer(tableobj, &table, &tablen))
2232 return NULL;
2234 if (tablen != 256) {
2235 PyErr_SetString(PyExc_ValueError,
2236 "translation table must be 256 characters long");
2237 return NULL;
2240 if (delobj != NULL) {
2241 if (PyString_Check(delobj)) {
2242 del_table = PyString_AS_STRING(delobj);
2243 dellen = PyString_GET_SIZE(delobj);
2245 #ifdef Py_USING_UNICODE
2246 else if (PyUnicode_Check(delobj)) {
2247 PyErr_SetString(PyExc_TypeError,
2248 "deletions are implemented differently for unicode");
2249 return NULL;
2251 #endif
2252 else if (PyObject_AsCharBuffer(delobj, &del_table, &dellen))
2253 return NULL;
2255 else {
2256 del_table = NULL;
2257 dellen = 0;
2260 inlen = PyString_GET_SIZE(input_obj);
2261 result = PyString_FromStringAndSize((char *)NULL, inlen);
2262 if (result == NULL)
2263 return NULL;
2264 output_start = output = PyString_AsString(result);
2265 input = PyString_AS_STRING(input_obj);
2267 if (dellen == 0 && table != NULL) {
2268 /* If no deletions are required, use faster code */
2269 for (i = inlen; --i >= 0; ) {
2270 c = Py_CHARMASK(*input++);
2271 if (Py_CHARMASK((*output++ = table[c])) != c)
2272 changed = 1;
2274 if (changed || !PyString_CheckExact(input_obj))
2275 return result;
2276 Py_DECREF(result);
2277 Py_INCREF(input_obj);
2278 return input_obj;
2281 if (table == NULL) {
2282 for (i = 0; i < 256; i++)
2283 trans_table[i] = Py_CHARMASK(i);
2284 } else {
2285 for (i = 0; i < 256; i++)
2286 trans_table[i] = Py_CHARMASK(table[i]);
2289 for (i = 0; i < dellen; i++)
2290 trans_table[(int) Py_CHARMASK(del_table[i])] = -1;
2292 for (i = inlen; --i >= 0; ) {
2293 c = Py_CHARMASK(*input++);
2294 if (trans_table[c] != -1)
2295 if (Py_CHARMASK(*output++ = (char)trans_table[c]) == c)
2296 continue;
2297 changed = 1;
2299 if (!changed && PyString_CheckExact(input_obj)) {
2300 Py_DECREF(result);
2301 Py_INCREF(input_obj);
2302 return input_obj;
2304 /* Fix the size of the resulting string */
2305 if (inlen > 0 && _PyString_Resize(&result, output - output_start))
2306 return NULL;
2307 return result;
2311 /* find and count characters and substrings */
2313 #define findchar(target, target_len, c) \
2314 ((char *)memchr((const void *)(target), c, target_len))
2316 /* String ops must return a string. */
2317 /* If the object is subclass of string, create a copy */
2318 Py_LOCAL(PyStringObject *)
2319 return_self(PyStringObject *self)
2321 if (PyString_CheckExact(self)) {
2322 Py_INCREF(self);
2323 return self;
2325 return (PyStringObject *)PyString_FromStringAndSize(
2326 PyString_AS_STRING(self),
2327 PyString_GET_SIZE(self));
2330 Py_LOCAL_INLINE(Py_ssize_t)
2331 countchar(const char *target, int target_len, char c, Py_ssize_t maxcount)
2333 Py_ssize_t count=0;
2334 const char *start=target;
2335 const char *end=target+target_len;
2337 while ( (start=findchar(start, end-start, c)) != NULL ) {
2338 count++;
2339 if (count >= maxcount)
2340 break;
2341 start += 1;
2343 return count;
2347 /* Algorithms for different cases of string replacement */
2349 /* len(self)>=1, from="", len(to)>=1, maxcount>=1 */
2350 Py_LOCAL(PyStringObject *)
2351 replace_interleave(PyStringObject *self,
2352 const char *to_s, Py_ssize_t to_len,
2353 Py_ssize_t maxcount)
2355 char *self_s, *result_s;
2356 Py_ssize_t self_len, result_len;
2357 Py_ssize_t count, i, product;
2358 PyStringObject *result;
2360 self_len = PyString_GET_SIZE(self);
2362 /* 1 at the end plus 1 after every character */
2363 count = self_len+1;
2364 if (maxcount < count)
2365 count = maxcount;
2367 /* Check for overflow */
2368 /* result_len = count * to_len + self_len; */
2369 product = count * to_len;
2370 if (product / to_len != count) {
2371 PyErr_SetString(PyExc_OverflowError,
2372 "replace string is too long");
2373 return NULL;
2375 result_len = product + self_len;
2376 if (result_len < 0) {
2377 PyErr_SetString(PyExc_OverflowError,
2378 "replace string is too long");
2379 return NULL;
2382 if (! (result = (PyStringObject *)
2383 PyString_FromStringAndSize(NULL, result_len)) )
2384 return NULL;
2386 self_s = PyString_AS_STRING(self);
2387 result_s = PyString_AS_STRING(result);
2389 /* TODO: special case single character, which doesn't need memcpy */
2391 /* Lay the first one down (guaranteed this will occur) */
2392 Py_MEMCPY(result_s, to_s, to_len);
2393 result_s += to_len;
2394 count -= 1;
2396 for (i=0; i<count; i++) {
2397 *result_s++ = *self_s++;
2398 Py_MEMCPY(result_s, to_s, to_len);
2399 result_s += to_len;
2402 /* Copy the rest of the original string */
2403 Py_MEMCPY(result_s, self_s, self_len-i);
2405 return result;
2408 /* Special case for deleting a single character */
2409 /* len(self)>=1, len(from)==1, to="", maxcount>=1 */
2410 Py_LOCAL(PyStringObject *)
2411 replace_delete_single_character(PyStringObject *self,
2412 char from_c, Py_ssize_t maxcount)
2414 char *self_s, *result_s;
2415 char *start, *next, *end;
2416 Py_ssize_t self_len, result_len;
2417 Py_ssize_t count;
2418 PyStringObject *result;
2420 self_len = PyString_GET_SIZE(self);
2421 self_s = PyString_AS_STRING(self);
2423 count = countchar(self_s, self_len, from_c, maxcount);
2424 if (count == 0) {
2425 return return_self(self);
2428 result_len = self_len - count; /* from_len == 1 */
2429 assert(result_len>=0);
2431 if ( (result = (PyStringObject *)
2432 PyString_FromStringAndSize(NULL, result_len)) == NULL)
2433 return NULL;
2434 result_s = PyString_AS_STRING(result);
2436 start = self_s;
2437 end = self_s + self_len;
2438 while (count-- > 0) {
2439 next = findchar(start, end-start, from_c);
2440 if (next == NULL)
2441 break;
2442 Py_MEMCPY(result_s, start, next-start);
2443 result_s += (next-start);
2444 start = next+1;
2446 Py_MEMCPY(result_s, start, end-start);
2448 return result;
2451 /* len(self)>=1, len(from)>=2, to="", maxcount>=1 */
2453 Py_LOCAL(PyStringObject *)
2454 replace_delete_substring(PyStringObject *self,
2455 const char *from_s, Py_ssize_t from_len,
2456 Py_ssize_t maxcount) {
2457 char *self_s, *result_s;
2458 char *start, *next, *end;
2459 Py_ssize_t self_len, result_len;
2460 Py_ssize_t count, offset;
2461 PyStringObject *result;
2463 self_len = PyString_GET_SIZE(self);
2464 self_s = PyString_AS_STRING(self);
2466 count = stringlib_count(self_s, self_len,
2467 from_s, from_len,
2468 maxcount);
2470 if (count == 0) {
2471 /* no matches */
2472 return return_self(self);
2475 result_len = self_len - (count * from_len);
2476 assert (result_len>=0);
2478 if ( (result = (PyStringObject *)
2479 PyString_FromStringAndSize(NULL, result_len)) == NULL )
2480 return NULL;
2482 result_s = PyString_AS_STRING(result);
2484 start = self_s;
2485 end = self_s + self_len;
2486 while (count-- > 0) {
2487 offset = stringlib_find(start, end-start,
2488 from_s, from_len,
2490 if (offset == -1)
2491 break;
2492 next = start + offset;
2494 Py_MEMCPY(result_s, start, next-start);
2496 result_s += (next-start);
2497 start = next+from_len;
2499 Py_MEMCPY(result_s, start, end-start);
2500 return result;
2503 /* len(self)>=1, len(from)==len(to)==1, maxcount>=1 */
2504 Py_LOCAL(PyStringObject *)
2505 replace_single_character_in_place(PyStringObject *self,
2506 char from_c, char to_c,
2507 Py_ssize_t maxcount)
2509 char *self_s, *result_s, *start, *end, *next;
2510 Py_ssize_t self_len;
2511 PyStringObject *result;
2513 /* The result string will be the same size */
2514 self_s = PyString_AS_STRING(self);
2515 self_len = PyString_GET_SIZE(self);
2517 next = findchar(self_s, self_len, from_c);
2519 if (next == NULL) {
2520 /* No matches; return the original string */
2521 return return_self(self);
2524 /* Need to make a new string */
2525 result = (PyStringObject *) PyString_FromStringAndSize(NULL, self_len);
2526 if (result == NULL)
2527 return NULL;
2528 result_s = PyString_AS_STRING(result);
2529 Py_MEMCPY(result_s, self_s, self_len);
2531 /* change everything in-place, starting with this one */
2532 start = result_s + (next-self_s);
2533 *start = to_c;
2534 start++;
2535 end = result_s + self_len;
2537 while (--maxcount > 0) {
2538 next = findchar(start, end-start, from_c);
2539 if (next == NULL)
2540 break;
2541 *next = to_c;
2542 start = next+1;
2545 return result;
2548 /* len(self)>=1, len(from)==len(to)>=2, maxcount>=1 */
2549 Py_LOCAL(PyStringObject *)
2550 replace_substring_in_place(PyStringObject *self,
2551 const char *from_s, Py_ssize_t from_len,
2552 const char *to_s, Py_ssize_t to_len,
2553 Py_ssize_t maxcount)
2555 char *result_s, *start, *end;
2556 char *self_s;
2557 Py_ssize_t self_len, offset;
2558 PyStringObject *result;
2560 /* The result string will be the same size */
2562 self_s = PyString_AS_STRING(self);
2563 self_len = PyString_GET_SIZE(self);
2565 offset = stringlib_find(self_s, self_len,
2566 from_s, from_len,
2568 if (offset == -1) {
2569 /* No matches; return the original string */
2570 return return_self(self);
2573 /* Need to make a new string */
2574 result = (PyStringObject *) PyString_FromStringAndSize(NULL, self_len);
2575 if (result == NULL)
2576 return NULL;
2577 result_s = PyString_AS_STRING(result);
2578 Py_MEMCPY(result_s, self_s, self_len);
2580 /* change everything in-place, starting with this one */
2581 start = result_s + offset;
2582 Py_MEMCPY(start, to_s, from_len);
2583 start += from_len;
2584 end = result_s + self_len;
2586 while ( --maxcount > 0) {
2587 offset = stringlib_find(start, end-start,
2588 from_s, from_len,
2590 if (offset==-1)
2591 break;
2592 Py_MEMCPY(start+offset, to_s, from_len);
2593 start += offset+from_len;
2596 return result;
2599 /* len(self)>=1, len(from)==1, len(to)>=2, maxcount>=1 */
2600 Py_LOCAL(PyStringObject *)
2601 replace_single_character(PyStringObject *self,
2602 char from_c,
2603 const char *to_s, Py_ssize_t to_len,
2604 Py_ssize_t maxcount)
2606 char *self_s, *result_s;
2607 char *start, *next, *end;
2608 Py_ssize_t self_len, result_len;
2609 Py_ssize_t count, product;
2610 PyStringObject *result;
2612 self_s = PyString_AS_STRING(self);
2613 self_len = PyString_GET_SIZE(self);
2615 count = countchar(self_s, self_len, from_c, maxcount);
2616 if (count == 0) {
2617 /* no matches, return unchanged */
2618 return return_self(self);
2621 /* use the difference between current and new, hence the "-1" */
2622 /* result_len = self_len + count * (to_len-1) */
2623 product = count * (to_len-1);
2624 if (product / (to_len-1) != count) {
2625 PyErr_SetString(PyExc_OverflowError, "replace string is too long");
2626 return NULL;
2628 result_len = self_len + product;
2629 if (result_len < 0) {
2630 PyErr_SetString(PyExc_OverflowError, "replace string is too long");
2631 return NULL;
2634 if ( (result = (PyStringObject *)
2635 PyString_FromStringAndSize(NULL, result_len)) == NULL)
2636 return NULL;
2637 result_s = PyString_AS_STRING(result);
2639 start = self_s;
2640 end = self_s + self_len;
2641 while (count-- > 0) {
2642 next = findchar(start, end-start, from_c);
2643 if (next == NULL)
2644 break;
2646 if (next == start) {
2647 /* replace with the 'to' */
2648 Py_MEMCPY(result_s, to_s, to_len);
2649 result_s += to_len;
2650 start += 1;
2651 } else {
2652 /* copy the unchanged old then the 'to' */
2653 Py_MEMCPY(result_s, start, next-start);
2654 result_s += (next-start);
2655 Py_MEMCPY(result_s, to_s, to_len);
2656 result_s += to_len;
2657 start = next+1;
2660 /* Copy the remainder of the remaining string */
2661 Py_MEMCPY(result_s, start, end-start);
2663 return result;
2666 /* len(self)>=1, len(from)>=2, len(to)>=2, maxcount>=1 */
2667 Py_LOCAL(PyStringObject *)
2668 replace_substring(PyStringObject *self,
2669 const char *from_s, Py_ssize_t from_len,
2670 const char *to_s, Py_ssize_t to_len,
2671 Py_ssize_t maxcount) {
2672 char *self_s, *result_s;
2673 char *start, *next, *end;
2674 Py_ssize_t self_len, result_len;
2675 Py_ssize_t count, offset, product;
2676 PyStringObject *result;
2678 self_s = PyString_AS_STRING(self);
2679 self_len = PyString_GET_SIZE(self);
2681 count = stringlib_count(self_s, self_len,
2682 from_s, from_len,
2683 maxcount);
2685 if (count == 0) {
2686 /* no matches, return unchanged */
2687 return return_self(self);
2690 /* Check for overflow */
2691 /* result_len = self_len + count * (to_len-from_len) */
2692 product = count * (to_len-from_len);
2693 if (product / (to_len-from_len) != count) {
2694 PyErr_SetString(PyExc_OverflowError, "replace string is too long");
2695 return NULL;
2697 result_len = self_len + product;
2698 if (result_len < 0) {
2699 PyErr_SetString(PyExc_OverflowError, "replace string is too long");
2700 return NULL;
2703 if ( (result = (PyStringObject *)
2704 PyString_FromStringAndSize(NULL, result_len)) == NULL)
2705 return NULL;
2706 result_s = PyString_AS_STRING(result);
2708 start = self_s;
2709 end = self_s + self_len;
2710 while (count-- > 0) {
2711 offset = stringlib_find(start, end-start,
2712 from_s, from_len,
2714 if (offset == -1)
2715 break;
2716 next = start+offset;
2717 if (next == start) {
2718 /* replace with the 'to' */
2719 Py_MEMCPY(result_s, to_s, to_len);
2720 result_s += to_len;
2721 start += from_len;
2722 } else {
2723 /* copy the unchanged old then the 'to' */
2724 Py_MEMCPY(result_s, start, next-start);
2725 result_s += (next-start);
2726 Py_MEMCPY(result_s, to_s, to_len);
2727 result_s += to_len;
2728 start = next+from_len;
2731 /* Copy the remainder of the remaining string */
2732 Py_MEMCPY(result_s, start, end-start);
2734 return result;
2738 Py_LOCAL(PyStringObject *)
2739 replace(PyStringObject *self,
2740 const char *from_s, Py_ssize_t from_len,
2741 const char *to_s, Py_ssize_t to_len,
2742 Py_ssize_t maxcount)
2744 if (maxcount < 0) {
2745 maxcount = PY_SSIZE_T_MAX;
2746 } else if (maxcount == 0 || PyString_GET_SIZE(self) == 0) {
2747 /* nothing to do; return the original string */
2748 return return_self(self);
2751 if (maxcount == 0 ||
2752 (from_len == 0 && to_len == 0)) {
2753 /* nothing to do; return the original string */
2754 return return_self(self);
2757 /* Handle zero-length special cases */
2759 if (from_len == 0) {
2760 /* insert the 'to' string everywhere. */
2761 /* >>> "Python".replace("", ".") */
2762 /* '.P.y.t.h.o.n.' */
2763 return replace_interleave(self, to_s, to_len, maxcount);
2766 /* Except for "".replace("", "A") == "A" there is no way beyond this */
2767 /* point for an empty self string to generate a non-empty string */
2768 /* Special case so the remaining code always gets a non-empty string */
2769 if (PyString_GET_SIZE(self) == 0) {
2770 return return_self(self);
2773 if (to_len == 0) {
2774 /* delete all occurances of 'from' string */
2775 if (from_len == 1) {
2776 return replace_delete_single_character(
2777 self, from_s[0], maxcount);
2778 } else {
2779 return replace_delete_substring(self, from_s, from_len, maxcount);
2783 /* Handle special case where both strings have the same length */
2785 if (from_len == to_len) {
2786 if (from_len == 1) {
2787 return replace_single_character_in_place(
2788 self,
2789 from_s[0],
2790 to_s[0],
2791 maxcount);
2792 } else {
2793 return replace_substring_in_place(
2794 self, from_s, from_len, to_s, to_len, maxcount);
2798 /* Otherwise use the more generic algorithms */
2799 if (from_len == 1) {
2800 return replace_single_character(self, from_s[0],
2801 to_s, to_len, maxcount);
2802 } else {
2803 /* len('from')>=2, len('to')>=1 */
2804 return replace_substring(self, from_s, from_len, to_s, to_len, maxcount);
2808 PyDoc_STRVAR(replace__doc__,
2809 "S.replace(old, new[, count]) -> string\n\
2811 Return a copy of string S with all occurrences of substring\n\
2812 old replaced by new. If the optional argument count is\n\
2813 given, only the first count occurrences are replaced.");
2815 static PyObject *
2816 string_replace(PyStringObject *self, PyObject *args)
2818 Py_ssize_t count = -1;
2819 PyObject *from, *to;
2820 const char *from_s, *to_s;
2821 Py_ssize_t from_len, to_len;
2823 if (!PyArg_ParseTuple(args, "OO|n:replace", &from, &to, &count))
2824 return NULL;
2826 if (PyString_Check(from)) {
2827 from_s = PyString_AS_STRING(from);
2828 from_len = PyString_GET_SIZE(from);
2830 #ifdef Py_USING_UNICODE
2831 if (PyUnicode_Check(from))
2832 return PyUnicode_Replace((PyObject *)self,
2833 from, to, count);
2834 #endif
2835 else if (PyObject_AsCharBuffer(from, &from_s, &from_len))
2836 return NULL;
2838 if (PyString_Check(to)) {
2839 to_s = PyString_AS_STRING(to);
2840 to_len = PyString_GET_SIZE(to);
2842 #ifdef Py_USING_UNICODE
2843 else if (PyUnicode_Check(to))
2844 return PyUnicode_Replace((PyObject *)self,
2845 from, to, count);
2846 #endif
2847 else if (PyObject_AsCharBuffer(to, &to_s, &to_len))
2848 return NULL;
2850 return (PyObject *)replace((PyStringObject *) self,
2851 from_s, from_len,
2852 to_s, to_len, count);
2855 /** End DALKE **/
2857 /* Matches the end (direction >= 0) or start (direction < 0) of self
2858 * against substr, using the start and end arguments. Returns
2859 * -1 on error, 0 if not found and 1 if found.
2861 Py_LOCAL(int)
2862 _string_tailmatch(PyStringObject *self, PyObject *substr, Py_ssize_t start,
2863 Py_ssize_t end, int direction)
2865 Py_ssize_t len = PyString_GET_SIZE(self);
2866 Py_ssize_t slen;
2867 const char* sub;
2868 const char* str;
2870 if (PyString_Check(substr)) {
2871 sub = PyString_AS_STRING(substr);
2872 slen = PyString_GET_SIZE(substr);
2874 #ifdef Py_USING_UNICODE
2875 else if (PyUnicode_Check(substr))
2876 return PyUnicode_Tailmatch((PyObject *)self,
2877 substr, start, end, direction);
2878 #endif
2879 else if (PyObject_AsCharBuffer(substr, &sub, &slen))
2880 return -1;
2881 str = PyString_AS_STRING(self);
2883 ADJUST_INDICES(start, end, len);
2885 if (direction < 0) {
2886 /* startswith */
2887 if (start+slen > len)
2888 return 0;
2889 } else {
2890 /* endswith */
2891 if (end-start < slen || start > len)
2892 return 0;
2894 if (end-slen > start)
2895 start = end - slen;
2897 if (end-start >= slen)
2898 return ! memcmp(str+start, sub, slen);
2899 return 0;
2903 PyDoc_STRVAR(startswith__doc__,
2904 "S.startswith(prefix[, start[, end]]) -> bool\n\
2906 Return True if S starts with the specified prefix, False otherwise.\n\
2907 With optional start, test S beginning at that position.\n\
2908 With optional end, stop comparing S at that position.\n\
2909 prefix can also be a tuple of strings to try.");
2911 static PyObject *
2912 string_startswith(PyStringObject *self, PyObject *args)
2914 Py_ssize_t start = 0;
2915 Py_ssize_t end = PY_SSIZE_T_MAX;
2916 PyObject *subobj;
2917 int result;
2919 if (!PyArg_ParseTuple(args, "O|O&O&:startswith", &subobj,
2920 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
2921 return NULL;
2922 if (PyTuple_Check(subobj)) {
2923 Py_ssize_t i;
2924 for (i = 0; i < PyTuple_GET_SIZE(subobj); i++) {
2925 result = _string_tailmatch(self,
2926 PyTuple_GET_ITEM(subobj, i),
2927 start, end, -1);
2928 if (result == -1)
2929 return NULL;
2930 else if (result) {
2931 Py_RETURN_TRUE;
2934 Py_RETURN_FALSE;
2936 result = _string_tailmatch(self, subobj, start, end, -1);
2937 if (result == -1)
2938 return NULL;
2939 else
2940 return PyBool_FromLong(result);
2944 PyDoc_STRVAR(endswith__doc__,
2945 "S.endswith(suffix[, start[, end]]) -> bool\n\
2947 Return True if S ends with the specified suffix, False otherwise.\n\
2948 With optional start, test S beginning at that position.\n\
2949 With optional end, stop comparing S at that position.\n\
2950 suffix can also be a tuple of strings to try.");
2952 static PyObject *
2953 string_endswith(PyStringObject *self, PyObject *args)
2955 Py_ssize_t start = 0;
2956 Py_ssize_t end = PY_SSIZE_T_MAX;
2957 PyObject *subobj;
2958 int result;
2960 if (!PyArg_ParseTuple(args, "O|O&O&:endswith", &subobj,
2961 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
2962 return NULL;
2963 if (PyTuple_Check(subobj)) {
2964 Py_ssize_t i;
2965 for (i = 0; i < PyTuple_GET_SIZE(subobj); i++) {
2966 result = _string_tailmatch(self,
2967 PyTuple_GET_ITEM(subobj, i),
2968 start, end, +1);
2969 if (result == -1)
2970 return NULL;
2971 else if (result) {
2972 Py_RETURN_TRUE;
2975 Py_RETURN_FALSE;
2977 result = _string_tailmatch(self, subobj, start, end, +1);
2978 if (result == -1)
2979 return NULL;
2980 else
2981 return PyBool_FromLong(result);
2985 PyDoc_STRVAR(encode__doc__,
2986 "S.encode([encoding[,errors]]) -> object\n\
2988 Encodes S using the codec registered for encoding. encoding defaults\n\
2989 to the default encoding. errors may be given to set a different error\n\
2990 handling scheme. Default is 'strict' meaning that encoding errors raise\n\
2991 a UnicodeEncodeError. Other possible values are 'ignore', 'replace' and\n\
2992 'xmlcharrefreplace' as well as any other name registered with\n\
2993 codecs.register_error that is able to handle UnicodeEncodeErrors.");
2995 static PyObject *
2996 string_encode(PyStringObject *self, PyObject *args, PyObject *kwargs)
2998 static char *kwlist[] = {"encoding", "errors", 0};
2999 char *encoding = NULL;
3000 char *errors = NULL;
3001 PyObject *v;
3003 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|ss:encode",
3004 kwlist, &encoding, &errors))
3005 return NULL;
3006 v = PyString_AsEncodedObject((PyObject *)self, encoding, errors);
3007 if (v == NULL)
3008 goto onError;
3009 if (!PyString_Check(v) && !PyUnicode_Check(v)) {
3010 PyErr_Format(PyExc_TypeError,
3011 "encoder did not return a string/unicode object "
3012 "(type=%.400s)",
3013 Py_TYPE(v)->tp_name);
3014 Py_DECREF(v);
3015 return NULL;
3017 return v;
3019 onError:
3020 return NULL;
3024 PyDoc_STRVAR(decode__doc__,
3025 "S.decode([encoding[,errors]]) -> object\n\
3027 Decodes S using the codec registered for encoding. encoding defaults\n\
3028 to the default encoding. errors may be given to set a different error\n\
3029 handling scheme. Default is 'strict' meaning that encoding errors raise\n\
3030 a UnicodeDecodeError. Other possible values are 'ignore' and 'replace'\n\
3031 as well as any other name registered with codecs.register_error that is\n\
3032 able to handle UnicodeDecodeErrors.");
3034 static PyObject *
3035 string_decode(PyStringObject *self, PyObject *args, PyObject *kwargs)
3037 static char *kwlist[] = {"encoding", "errors", 0};
3038 char *encoding = NULL;
3039 char *errors = NULL;
3040 PyObject *v;
3042 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|ss:decode",
3043 kwlist, &encoding, &errors))
3044 return NULL;
3045 v = PyString_AsDecodedObject((PyObject *)self, encoding, errors);
3046 if (v == NULL)
3047 goto onError;
3048 if (!PyString_Check(v) && !PyUnicode_Check(v)) {
3049 PyErr_Format(PyExc_TypeError,
3050 "decoder did not return a string/unicode object "
3051 "(type=%.400s)",
3052 Py_TYPE(v)->tp_name);
3053 Py_DECREF(v);
3054 return NULL;
3056 return v;
3058 onError:
3059 return NULL;
3063 PyDoc_STRVAR(expandtabs__doc__,
3064 "S.expandtabs([tabsize]) -> string\n\
3066 Return a copy of S where all tab characters are expanded using spaces.\n\
3067 If tabsize is not given, a tab size of 8 characters is assumed.");
3069 static PyObject*
3070 string_expandtabs(PyStringObject *self, PyObject *args)
3072 const char *e, *p, *qe;
3073 char *q;
3074 Py_ssize_t i, j, incr;
3075 PyObject *u;
3076 int tabsize = 8;
3078 if (!PyArg_ParseTuple(args, "|i:expandtabs", &tabsize))
3079 return NULL;
3081 /* First pass: determine size of output string */
3082 i = 0; /* chars up to and including most recent \n or \r */
3083 j = 0; /* chars since most recent \n or \r (use in tab calculations) */
3084 e = PyString_AS_STRING(self) + PyString_GET_SIZE(self); /* end of input */
3085 for (p = PyString_AS_STRING(self); p < e; p++)
3086 if (*p == '\t') {
3087 if (tabsize > 0) {
3088 incr = tabsize - (j % tabsize);
3089 if (j > PY_SSIZE_T_MAX - incr)
3090 goto overflow1;
3091 j += incr;
3094 else {
3095 if (j > PY_SSIZE_T_MAX - 1)
3096 goto overflow1;
3097 j++;
3098 if (*p == '\n' || *p == '\r') {
3099 if (i > PY_SSIZE_T_MAX - j)
3100 goto overflow1;
3101 i += j;
3102 j = 0;
3106 if (i > PY_SSIZE_T_MAX - j)
3107 goto overflow1;
3109 /* Second pass: create output string and fill it */
3110 u = PyString_FromStringAndSize(NULL, i + j);
3111 if (!u)
3112 return NULL;
3114 j = 0; /* same as in first pass */
3115 q = PyString_AS_STRING(u); /* next output char */
3116 qe = PyString_AS_STRING(u) + PyString_GET_SIZE(u); /* end of output */
3118 for (p = PyString_AS_STRING(self); p < e; p++)
3119 if (*p == '\t') {
3120 if (tabsize > 0) {
3121 i = tabsize - (j % tabsize);
3122 j += i;
3123 while (i--) {
3124 if (q >= qe)
3125 goto overflow2;
3126 *q++ = ' ';
3130 else {
3131 if (q >= qe)
3132 goto overflow2;
3133 *q++ = *p;
3134 j++;
3135 if (*p == '\n' || *p == '\r')
3136 j = 0;
3139 return u;
3141 overflow2:
3142 Py_DECREF(u);
3143 overflow1:
3144 PyErr_SetString(PyExc_OverflowError, "new string is too long");
3145 return NULL;
3148 Py_LOCAL_INLINE(PyObject *)
3149 pad(PyStringObject *self, Py_ssize_t left, Py_ssize_t right, char fill)
3151 PyObject *u;
3153 if (left < 0)
3154 left = 0;
3155 if (right < 0)
3156 right = 0;
3158 if (left == 0 && right == 0 && PyString_CheckExact(self)) {
3159 Py_INCREF(self);
3160 return (PyObject *)self;
3163 u = PyString_FromStringAndSize(NULL,
3164 left + PyString_GET_SIZE(self) + right);
3165 if (u) {
3166 if (left)
3167 memset(PyString_AS_STRING(u), fill, left);
3168 Py_MEMCPY(PyString_AS_STRING(u) + left,
3169 PyString_AS_STRING(self),
3170 PyString_GET_SIZE(self));
3171 if (right)
3172 memset(PyString_AS_STRING(u) + left + PyString_GET_SIZE(self),
3173 fill, right);
3176 return u;
3179 PyDoc_STRVAR(ljust__doc__,
3180 "S.ljust(width[, fillchar]) -> string\n"
3181 "\n"
3182 "Return S left-justified in a string of length width. Padding is\n"
3183 "done using the specified fill character (default is a space).");
3185 static PyObject *
3186 string_ljust(PyStringObject *self, PyObject *args)
3188 Py_ssize_t width;
3189 char fillchar = ' ';
3191 if (!PyArg_ParseTuple(args, "n|c:ljust", &width, &fillchar))
3192 return NULL;
3194 if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) {
3195 Py_INCREF(self);
3196 return (PyObject*) self;
3199 return pad(self, 0, width - PyString_GET_SIZE(self), fillchar);
3203 PyDoc_STRVAR(rjust__doc__,
3204 "S.rjust(width[, fillchar]) -> string\n"
3205 "\n"
3206 "Return S right-justified in a string of length width. Padding is\n"
3207 "done using the specified fill character (default is a space)");
3209 static PyObject *
3210 string_rjust(PyStringObject *self, PyObject *args)
3212 Py_ssize_t width;
3213 char fillchar = ' ';
3215 if (!PyArg_ParseTuple(args, "n|c:rjust", &width, &fillchar))
3216 return NULL;
3218 if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) {
3219 Py_INCREF(self);
3220 return (PyObject*) self;
3223 return pad(self, width - PyString_GET_SIZE(self), 0, fillchar);
3227 PyDoc_STRVAR(center__doc__,
3228 "S.center(width[, fillchar]) -> string\n"
3229 "\n"
3230 "Return S centered in a string of length width. Padding is\n"
3231 "done using the specified fill character (default is a space)");
3233 static PyObject *
3234 string_center(PyStringObject *self, PyObject *args)
3236 Py_ssize_t marg, left;
3237 Py_ssize_t width;
3238 char fillchar = ' ';
3240 if (!PyArg_ParseTuple(args, "n|c:center", &width, &fillchar))
3241 return NULL;
3243 if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) {
3244 Py_INCREF(self);
3245 return (PyObject*) self;
3248 marg = width - PyString_GET_SIZE(self);
3249 left = marg / 2 + (marg & width & 1);
3251 return pad(self, left, marg - left, fillchar);
3254 PyDoc_STRVAR(zfill__doc__,
3255 "S.zfill(width) -> string\n"
3256 "\n"
3257 "Pad a numeric string S with zeros on the left, to fill a field\n"
3258 "of the specified width. The string S is never truncated.");
3260 static PyObject *
3261 string_zfill(PyStringObject *self, PyObject *args)
3263 Py_ssize_t fill;
3264 PyObject *s;
3265 char *p;
3266 Py_ssize_t width;
3268 if (!PyArg_ParseTuple(args, "n:zfill", &width))
3269 return NULL;
3271 if (PyString_GET_SIZE(self) >= width) {
3272 if (PyString_CheckExact(self)) {
3273 Py_INCREF(self);
3274 return (PyObject*) self;
3276 else
3277 return PyString_FromStringAndSize(
3278 PyString_AS_STRING(self),
3279 PyString_GET_SIZE(self)
3283 fill = width - PyString_GET_SIZE(self);
3285 s = pad(self, fill, 0, '0');
3287 if (s == NULL)
3288 return NULL;
3290 p = PyString_AS_STRING(s);
3291 if (p[fill] == '+' || p[fill] == '-') {
3292 /* move sign to beginning of string */
3293 p[0] = p[fill];
3294 p[fill] = '0';
3297 return (PyObject*) s;
3300 PyDoc_STRVAR(isspace__doc__,
3301 "S.isspace() -> bool\n\
3303 Return True if all characters in S are whitespace\n\
3304 and there is at least one character in S, False otherwise.");
3306 static PyObject*
3307 string_isspace(PyStringObject *self)
3309 register const unsigned char *p
3310 = (unsigned char *) PyString_AS_STRING(self);
3311 register const unsigned char *e;
3313 /* Shortcut for single character strings */
3314 if (PyString_GET_SIZE(self) == 1 &&
3315 isspace(*p))
3316 return PyBool_FromLong(1);
3318 /* Special case for empty strings */
3319 if (PyString_GET_SIZE(self) == 0)
3320 return PyBool_FromLong(0);
3322 e = p + PyString_GET_SIZE(self);
3323 for (; p < e; p++) {
3324 if (!isspace(*p))
3325 return PyBool_FromLong(0);
3327 return PyBool_FromLong(1);
3331 PyDoc_STRVAR(isalpha__doc__,
3332 "S.isalpha() -> bool\n\
3334 Return True if all characters in S are alphabetic\n\
3335 and there is at least one character in S, False otherwise.");
3337 static PyObject*
3338 string_isalpha(PyStringObject *self)
3340 register const unsigned char *p
3341 = (unsigned char *) PyString_AS_STRING(self);
3342 register const unsigned char *e;
3344 /* Shortcut for single character strings */
3345 if (PyString_GET_SIZE(self) == 1 &&
3346 isalpha(*p))
3347 return PyBool_FromLong(1);
3349 /* Special case for empty strings */
3350 if (PyString_GET_SIZE(self) == 0)
3351 return PyBool_FromLong(0);
3353 e = p + PyString_GET_SIZE(self);
3354 for (; p < e; p++) {
3355 if (!isalpha(*p))
3356 return PyBool_FromLong(0);
3358 return PyBool_FromLong(1);
3362 PyDoc_STRVAR(isalnum__doc__,
3363 "S.isalnum() -> bool\n\
3365 Return True if all characters in S are alphanumeric\n\
3366 and there is at least one character in S, False otherwise.");
3368 static PyObject*
3369 string_isalnum(PyStringObject *self)
3371 register const unsigned char *p
3372 = (unsigned char *) PyString_AS_STRING(self);
3373 register const unsigned char *e;
3375 /* Shortcut for single character strings */
3376 if (PyString_GET_SIZE(self) == 1 &&
3377 isalnum(*p))
3378 return PyBool_FromLong(1);
3380 /* Special case for empty strings */
3381 if (PyString_GET_SIZE(self) == 0)
3382 return PyBool_FromLong(0);
3384 e = p + PyString_GET_SIZE(self);
3385 for (; p < e; p++) {
3386 if (!isalnum(*p))
3387 return PyBool_FromLong(0);
3389 return PyBool_FromLong(1);
3393 PyDoc_STRVAR(isdigit__doc__,
3394 "S.isdigit() -> bool\n\
3396 Return True if all characters in S are digits\n\
3397 and there is at least one character in S, False otherwise.");
3399 static PyObject*
3400 string_isdigit(PyStringObject *self)
3402 register const unsigned char *p
3403 = (unsigned char *) PyString_AS_STRING(self);
3404 register const unsigned char *e;
3406 /* Shortcut for single character strings */
3407 if (PyString_GET_SIZE(self) == 1 &&
3408 isdigit(*p))
3409 return PyBool_FromLong(1);
3411 /* Special case for empty strings */
3412 if (PyString_GET_SIZE(self) == 0)
3413 return PyBool_FromLong(0);
3415 e = p + PyString_GET_SIZE(self);
3416 for (; p < e; p++) {
3417 if (!isdigit(*p))
3418 return PyBool_FromLong(0);
3420 return PyBool_FromLong(1);
3424 PyDoc_STRVAR(islower__doc__,
3425 "S.islower() -> bool\n\
3427 Return True if all cased characters in S are lowercase and there is\n\
3428 at least one cased character in S, False otherwise.");
3430 static PyObject*
3431 string_islower(PyStringObject *self)
3433 register const unsigned char *p
3434 = (unsigned char *) PyString_AS_STRING(self);
3435 register const unsigned char *e;
3436 int cased;
3438 /* Shortcut for single character strings */
3439 if (PyString_GET_SIZE(self) == 1)
3440 return PyBool_FromLong(islower(*p) != 0);
3442 /* Special case for empty strings */
3443 if (PyString_GET_SIZE(self) == 0)
3444 return PyBool_FromLong(0);
3446 e = p + PyString_GET_SIZE(self);
3447 cased = 0;
3448 for (; p < e; p++) {
3449 if (isupper(*p))
3450 return PyBool_FromLong(0);
3451 else if (!cased && islower(*p))
3452 cased = 1;
3454 return PyBool_FromLong(cased);
3458 PyDoc_STRVAR(isupper__doc__,
3459 "S.isupper() -> bool\n\
3461 Return True if all cased characters in S are uppercase and there is\n\
3462 at least one cased character in S, False otherwise.");
3464 static PyObject*
3465 string_isupper(PyStringObject *self)
3467 register const unsigned char *p
3468 = (unsigned char *) PyString_AS_STRING(self);
3469 register const unsigned char *e;
3470 int cased;
3472 /* Shortcut for single character strings */
3473 if (PyString_GET_SIZE(self) == 1)
3474 return PyBool_FromLong(isupper(*p) != 0);
3476 /* Special case for empty strings */
3477 if (PyString_GET_SIZE(self) == 0)
3478 return PyBool_FromLong(0);
3480 e = p + PyString_GET_SIZE(self);
3481 cased = 0;
3482 for (; p < e; p++) {
3483 if (islower(*p))
3484 return PyBool_FromLong(0);
3485 else if (!cased && isupper(*p))
3486 cased = 1;
3488 return PyBool_FromLong(cased);
3492 PyDoc_STRVAR(istitle__doc__,
3493 "S.istitle() -> bool\n\
3495 Return True if S is a titlecased string and there is at least one\n\
3496 character in S, i.e. uppercase characters may only follow uncased\n\
3497 characters and lowercase characters only cased ones. Return False\n\
3498 otherwise.");
3500 static PyObject*
3501 string_istitle(PyStringObject *self, PyObject *uncased)
3503 register const unsigned char *p
3504 = (unsigned char *) PyString_AS_STRING(self);
3505 register const unsigned char *e;
3506 int cased, previous_is_cased;
3508 /* Shortcut for single character strings */
3509 if (PyString_GET_SIZE(self) == 1)
3510 return PyBool_FromLong(isupper(*p) != 0);
3512 /* Special case for empty strings */
3513 if (PyString_GET_SIZE(self) == 0)
3514 return PyBool_FromLong(0);
3516 e = p + PyString_GET_SIZE(self);
3517 cased = 0;
3518 previous_is_cased = 0;
3519 for (; p < e; p++) {
3520 register const unsigned char ch = *p;
3522 if (isupper(ch)) {
3523 if (previous_is_cased)
3524 return PyBool_FromLong(0);
3525 previous_is_cased = 1;
3526 cased = 1;
3528 else if (islower(ch)) {
3529 if (!previous_is_cased)
3530 return PyBool_FromLong(0);
3531 previous_is_cased = 1;
3532 cased = 1;
3534 else
3535 previous_is_cased = 0;
3537 return PyBool_FromLong(cased);
3541 PyDoc_STRVAR(splitlines__doc__,
3542 "S.splitlines([keepends]) -> list of strings\n\
3544 Return a list of the lines in S, breaking at line boundaries.\n\
3545 Line breaks are not included in the resulting list unless keepends\n\
3546 is given and true.");
3548 static PyObject*
3549 string_splitlines(PyStringObject *self, PyObject *args)
3551 int keepends = 0;
3553 if (!PyArg_ParseTuple(args, "|i:splitlines", &keepends))
3554 return NULL;
3556 return stringlib_splitlines(
3557 (PyObject*) self, PyString_AS_STRING(self), PyString_GET_SIZE(self),
3558 keepends
3562 PyDoc_STRVAR(sizeof__doc__,
3563 "S.__sizeof__() -> size of S in memory, in bytes");
3565 static PyObject *
3566 string_sizeof(PyStringObject *v)
3568 Py_ssize_t res;
3569 res = PyStringObject_SIZE + PyString_GET_SIZE(v) * Py_TYPE(v)->tp_itemsize;
3570 return PyInt_FromSsize_t(res);
3573 static PyObject *
3574 string_getnewargs(PyStringObject *v)
3576 return Py_BuildValue("(s#)", v->ob_sval, Py_SIZE(v));
3580 #include "stringlib/string_format.h"
3582 PyDoc_STRVAR(format__doc__,
3583 "S.format(*args, **kwargs) -> unicode\n\
3587 static PyObject *
3588 string__format__(PyObject* self, PyObject* args)
3590 PyObject *format_spec;
3591 PyObject *result = NULL;
3592 PyObject *tmp = NULL;
3594 /* If 2.x, convert format_spec to the same type as value */
3595 /* This is to allow things like u''.format('') */
3596 if (!PyArg_ParseTuple(args, "O:__format__", &format_spec))
3597 goto done;
3598 if (!(PyString_Check(format_spec) || PyUnicode_Check(format_spec))) {
3599 PyErr_Format(PyExc_TypeError, "__format__ arg must be str "
3600 "or unicode, not %s", Py_TYPE(format_spec)->tp_name);
3601 goto done;
3603 tmp = PyObject_Str(format_spec);
3604 if (tmp == NULL)
3605 goto done;
3606 format_spec = tmp;
3608 result = _PyBytes_FormatAdvanced(self,
3609 PyString_AS_STRING(format_spec),
3610 PyString_GET_SIZE(format_spec));
3611 done:
3612 Py_XDECREF(tmp);
3613 return result;
3616 PyDoc_STRVAR(p_format__doc__,
3617 "S.__format__(format_spec) -> unicode\n\
3622 static PyMethodDef
3623 string_methods[] = {
3624 /* Counterparts of the obsolete stropmodule functions; except
3625 string.maketrans(). */
3626 {"join", (PyCFunction)string_join, METH_O, join__doc__},
3627 {"split", (PyCFunction)string_split, METH_VARARGS, split__doc__},
3628 {"rsplit", (PyCFunction)string_rsplit, METH_VARARGS, rsplit__doc__},
3629 {"lower", (PyCFunction)string_lower, METH_NOARGS, lower__doc__},
3630 {"upper", (PyCFunction)string_upper, METH_NOARGS, upper__doc__},
3631 {"islower", (PyCFunction)string_islower, METH_NOARGS, islower__doc__},
3632 {"isupper", (PyCFunction)string_isupper, METH_NOARGS, isupper__doc__},
3633 {"isspace", (PyCFunction)string_isspace, METH_NOARGS, isspace__doc__},
3634 {"isdigit", (PyCFunction)string_isdigit, METH_NOARGS, isdigit__doc__},
3635 {"istitle", (PyCFunction)string_istitle, METH_NOARGS, istitle__doc__},
3636 {"isalpha", (PyCFunction)string_isalpha, METH_NOARGS, isalpha__doc__},
3637 {"isalnum", (PyCFunction)string_isalnum, METH_NOARGS, isalnum__doc__},
3638 {"capitalize", (PyCFunction)string_capitalize, METH_NOARGS,
3639 capitalize__doc__},
3640 {"count", (PyCFunction)string_count, METH_VARARGS, count__doc__},
3641 {"endswith", (PyCFunction)string_endswith, METH_VARARGS,
3642 endswith__doc__},
3643 {"partition", (PyCFunction)string_partition, METH_O, partition__doc__},
3644 {"find", (PyCFunction)string_find, METH_VARARGS, find__doc__},
3645 {"index", (PyCFunction)string_index, METH_VARARGS, index__doc__},
3646 {"lstrip", (PyCFunction)string_lstrip, METH_VARARGS, lstrip__doc__},
3647 {"replace", (PyCFunction)string_replace, METH_VARARGS, replace__doc__},
3648 {"rfind", (PyCFunction)string_rfind, METH_VARARGS, rfind__doc__},
3649 {"rindex", (PyCFunction)string_rindex, METH_VARARGS, rindex__doc__},
3650 {"rstrip", (PyCFunction)string_rstrip, METH_VARARGS, rstrip__doc__},
3651 {"rpartition", (PyCFunction)string_rpartition, METH_O,
3652 rpartition__doc__},
3653 {"startswith", (PyCFunction)string_startswith, METH_VARARGS,
3654 startswith__doc__},
3655 {"strip", (PyCFunction)string_strip, METH_VARARGS, strip__doc__},
3656 {"swapcase", (PyCFunction)string_swapcase, METH_NOARGS,
3657 swapcase__doc__},
3658 {"translate", (PyCFunction)string_translate, METH_VARARGS,
3659 translate__doc__},
3660 {"title", (PyCFunction)string_title, METH_NOARGS, title__doc__},
3661 {"ljust", (PyCFunction)string_ljust, METH_VARARGS, ljust__doc__},
3662 {"rjust", (PyCFunction)string_rjust, METH_VARARGS, rjust__doc__},
3663 {"center", (PyCFunction)string_center, METH_VARARGS, center__doc__},
3664 {"zfill", (PyCFunction)string_zfill, METH_VARARGS, zfill__doc__},
3665 {"format", (PyCFunction) do_string_format, METH_VARARGS | METH_KEYWORDS, format__doc__},
3666 {"__format__", (PyCFunction) string__format__, METH_VARARGS, p_format__doc__},
3667 {"_formatter_field_name_split", (PyCFunction) formatter_field_name_split, METH_NOARGS},
3668 {"_formatter_parser", (PyCFunction) formatter_parser, METH_NOARGS},
3669 {"encode", (PyCFunction)string_encode, METH_VARARGS | METH_KEYWORDS, encode__doc__},
3670 {"decode", (PyCFunction)string_decode, METH_VARARGS | METH_KEYWORDS, decode__doc__},
3671 {"expandtabs", (PyCFunction)string_expandtabs, METH_VARARGS,
3672 expandtabs__doc__},
3673 {"splitlines", (PyCFunction)string_splitlines, METH_VARARGS,
3674 splitlines__doc__},
3675 {"__sizeof__", (PyCFunction)string_sizeof, METH_NOARGS,
3676 sizeof__doc__},
3677 {"__getnewargs__", (PyCFunction)string_getnewargs, METH_NOARGS},
3678 {NULL, NULL} /* sentinel */
3681 static PyObject *
3682 str_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds);
3684 static PyObject *
3685 string_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
3687 PyObject *x = NULL;
3688 static char *kwlist[] = {"object", 0};
3690 if (type != &PyString_Type)
3691 return str_subtype_new(type, args, kwds);
3692 if (!PyArg_ParseTupleAndKeywords(args, kwds, "|O:str", kwlist, &x))
3693 return NULL;
3694 if (x == NULL)
3695 return PyString_FromString("");
3696 return PyObject_Str(x);
3699 static PyObject *
3700 str_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
3702 PyObject *tmp, *pnew;
3703 Py_ssize_t n;
3705 assert(PyType_IsSubtype(type, &PyString_Type));
3706 tmp = string_new(&PyString_Type, args, kwds);
3707 if (tmp == NULL)
3708 return NULL;
3709 assert(PyString_CheckExact(tmp));
3710 n = PyString_GET_SIZE(tmp);
3711 pnew = type->tp_alloc(type, n);
3712 if (pnew != NULL) {
3713 Py_MEMCPY(PyString_AS_STRING(pnew), PyString_AS_STRING(tmp), n+1);
3714 ((PyStringObject *)pnew)->ob_shash =
3715 ((PyStringObject *)tmp)->ob_shash;
3716 ((PyStringObject *)pnew)->ob_sstate = SSTATE_NOT_INTERNED;
3718 Py_DECREF(tmp);
3719 return pnew;
3722 static PyObject *
3723 basestring_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
3725 PyErr_SetString(PyExc_TypeError,
3726 "The basestring type cannot be instantiated");
3727 return NULL;
3730 static PyObject *
3731 string_mod(PyObject *v, PyObject *w)
3733 if (!PyString_Check(v)) {
3734 Py_INCREF(Py_NotImplemented);
3735 return Py_NotImplemented;
3737 return PyString_Format(v, w);
3740 PyDoc_STRVAR(basestring_doc,
3741 "Type basestring cannot be instantiated; it is the base for str and unicode.");
3743 static PyNumberMethods string_as_number = {
3744 0, /*nb_add*/
3745 0, /*nb_subtract*/
3746 0, /*nb_multiply*/
3747 0, /*nb_divide*/
3748 string_mod, /*nb_remainder*/
3752 PyTypeObject PyBaseString_Type = {
3753 PyVarObject_HEAD_INIT(&PyType_Type, 0)
3754 "basestring",
3757 0, /* tp_dealloc */
3758 0, /* tp_print */
3759 0, /* tp_getattr */
3760 0, /* tp_setattr */
3761 0, /* tp_compare */
3762 0, /* tp_repr */
3763 0, /* tp_as_number */
3764 0, /* tp_as_sequence */
3765 0, /* tp_as_mapping */
3766 0, /* tp_hash */
3767 0, /* tp_call */
3768 0, /* tp_str */
3769 0, /* tp_getattro */
3770 0, /* tp_setattro */
3771 0, /* tp_as_buffer */
3772 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */
3773 basestring_doc, /* tp_doc */
3774 0, /* tp_traverse */
3775 0, /* tp_clear */
3776 0, /* tp_richcompare */
3777 0, /* tp_weaklistoffset */
3778 0, /* tp_iter */
3779 0, /* tp_iternext */
3780 0, /* tp_methods */
3781 0, /* tp_members */
3782 0, /* tp_getset */
3783 &PyBaseObject_Type, /* tp_base */
3784 0, /* tp_dict */
3785 0, /* tp_descr_get */
3786 0, /* tp_descr_set */
3787 0, /* tp_dictoffset */
3788 0, /* tp_init */
3789 0, /* tp_alloc */
3790 basestring_new, /* tp_new */
3791 0, /* tp_free */
3794 PyDoc_STRVAR(string_doc,
3795 "str(object) -> string\n\
3797 Return a nice string representation of the object.\n\
3798 If the argument is a string, the return value is the same object.");
3800 PyTypeObject PyString_Type = {
3801 PyVarObject_HEAD_INIT(&PyType_Type, 0)
3802 "str",
3803 PyStringObject_SIZE,
3804 sizeof(char),
3805 string_dealloc, /* tp_dealloc */
3806 (printfunc)string_print, /* tp_print */
3807 0, /* tp_getattr */
3808 0, /* tp_setattr */
3809 0, /* tp_compare */
3810 string_repr, /* tp_repr */
3811 &string_as_number, /* tp_as_number */
3812 &string_as_sequence, /* tp_as_sequence */
3813 &string_as_mapping, /* tp_as_mapping */
3814 (hashfunc)string_hash, /* tp_hash */
3815 0, /* tp_call */
3816 string_str, /* tp_str */
3817 PyObject_GenericGetAttr, /* tp_getattro */
3818 0, /* tp_setattro */
3819 &string_as_buffer, /* tp_as_buffer */
3820 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_CHECKTYPES |
3821 Py_TPFLAGS_BASETYPE | Py_TPFLAGS_STRING_SUBCLASS |
3822 Py_TPFLAGS_HAVE_NEWBUFFER, /* tp_flags */
3823 string_doc, /* tp_doc */
3824 0, /* tp_traverse */
3825 0, /* tp_clear */
3826 (richcmpfunc)string_richcompare, /* tp_richcompare */
3827 0, /* tp_weaklistoffset */
3828 0, /* tp_iter */
3829 0, /* tp_iternext */
3830 string_methods, /* tp_methods */
3831 0, /* tp_members */
3832 0, /* tp_getset */
3833 &PyBaseString_Type, /* tp_base */
3834 0, /* tp_dict */
3835 0, /* tp_descr_get */
3836 0, /* tp_descr_set */
3837 0, /* tp_dictoffset */
3838 0, /* tp_init */
3839 0, /* tp_alloc */
3840 string_new, /* tp_new */
3841 PyObject_Del, /* tp_free */
3844 void
3845 PyString_Concat(register PyObject **pv, register PyObject *w)
3847 register PyObject *v;
3848 if (*pv == NULL)
3849 return;
3850 if (w == NULL || !PyString_Check(*pv)) {
3851 Py_DECREF(*pv);
3852 *pv = NULL;
3853 return;
3855 v = string_concat((PyStringObject *) *pv, w);
3856 Py_DECREF(*pv);
3857 *pv = v;
3860 void
3861 PyString_ConcatAndDel(register PyObject **pv, register PyObject *w)
3863 PyString_Concat(pv, w);
3864 Py_XDECREF(w);
3868 /* The following function breaks the notion that strings are immutable:
3869 it changes the size of a string. We get away with this only if there
3870 is only one module referencing the object. You can also think of it
3871 as creating a new string object and destroying the old one, only
3872 more efficiently. In any case, don't use this if the string may
3873 already be known to some other part of the code...
3874 Note that if there's not enough memory to resize the string, the original
3875 string object at *pv is deallocated, *pv is set to NULL, an "out of
3876 memory" exception is set, and -1 is returned. Else (on success) 0 is
3877 returned, and the value in *pv may or may not be the same as on input.
3878 As always, an extra byte is allocated for a trailing \0 byte (newsize
3879 does *not* include that), and a trailing \0 byte is stored.
3883 _PyString_Resize(PyObject **pv, Py_ssize_t newsize)
3885 register PyObject *v;
3886 register PyStringObject *sv;
3887 v = *pv;
3888 if (!PyString_Check(v) || Py_REFCNT(v) != 1 || newsize < 0 ||
3889 PyString_CHECK_INTERNED(v)) {
3890 *pv = 0;
3891 Py_DECREF(v);
3892 PyErr_BadInternalCall();
3893 return -1;
3895 /* XXX UNREF/NEWREF interface should be more symmetrical */
3896 _Py_DEC_REFTOTAL;
3897 _Py_ForgetReference(v);
3898 *pv = (PyObject *)
3899 PyObject_REALLOC((char *)v, PyStringObject_SIZE + newsize);
3900 if (*pv == NULL) {
3901 PyObject_Del(v);
3902 PyErr_NoMemory();
3903 return -1;
3905 _Py_NewReference(*pv);
3906 sv = (PyStringObject *) *pv;
3907 Py_SIZE(sv) = newsize;
3908 sv->ob_sval[newsize] = '\0';
3909 sv->ob_shash = -1; /* invalidate cached hash value */
3910 return 0;
3913 /* Helpers for formatstring */
3915 Py_LOCAL_INLINE(PyObject *)
3916 getnextarg(PyObject *args, Py_ssize_t arglen, Py_ssize_t *p_argidx)
3918 Py_ssize_t argidx = *p_argidx;
3919 if (argidx < arglen) {
3920 (*p_argidx)++;
3921 if (arglen < 0)
3922 return args;
3923 else
3924 return PyTuple_GetItem(args, argidx);
3926 PyErr_SetString(PyExc_TypeError,
3927 "not enough arguments for format string");
3928 return NULL;
3931 /* Format codes
3932 * F_LJUST '-'
3933 * F_SIGN '+'
3934 * F_BLANK ' '
3935 * F_ALT '#'
3936 * F_ZERO '0'
3938 #define F_LJUST (1<<0)
3939 #define F_SIGN (1<<1)
3940 #define F_BLANK (1<<2)
3941 #define F_ALT (1<<3)
3942 #define F_ZERO (1<<4)
3944 /* Returns a new reference to a PyString object, or NULL on failure. */
3946 static PyObject *
3947 formatfloat(PyObject *v, int flags, int prec, int type)
3949 char *p;
3950 PyObject *result;
3951 double x;
3953 x = PyFloat_AsDouble(v);
3954 if (x == -1.0 && PyErr_Occurred()) {
3955 PyErr_Format(PyExc_TypeError, "float argument required, "
3956 "not %.200s", Py_TYPE(v)->tp_name);
3957 return NULL;
3960 if (prec < 0)
3961 prec = 6;
3963 p = PyOS_double_to_string(x, type, prec,
3964 (flags & F_ALT) ? Py_DTSF_ALT : 0, NULL);
3966 if (p == NULL)
3967 return NULL;
3968 result = PyString_FromStringAndSize(p, strlen(p));
3969 PyMem_Free(p);
3970 return result;
3973 /* _PyString_FormatLong emulates the format codes d, u, o, x and X, and
3974 * the F_ALT flag, for Python's long (unbounded) ints. It's not used for
3975 * Python's regular ints.
3976 * Return value: a new PyString*, or NULL if error.
3977 * . *pbuf is set to point into it,
3978 * *plen set to the # of chars following that.
3979 * Caller must decref it when done using pbuf.
3980 * The string starting at *pbuf is of the form
3981 * "-"? ("0x" | "0X")? digit+
3982 * "0x"/"0X" are present only for x and X conversions, with F_ALT
3983 * set in flags. The case of hex digits will be correct,
3984 * There will be at least prec digits, zero-filled on the left if
3985 * necessary to get that many.
3986 * val object to be converted
3987 * flags bitmask of format flags; only F_ALT is looked at
3988 * prec minimum number of digits; 0-fill on left if needed
3989 * type a character in [duoxX]; u acts the same as d
3991 * CAUTION: o, x and X conversions on regular ints can never
3992 * produce a '-' sign, but can for Python's unbounded ints.
3994 PyObject*
3995 _PyString_FormatLong(PyObject *val, int flags, int prec, int type,
3996 char **pbuf, int *plen)
3998 PyObject *result = NULL;
3999 char *buf;
4000 Py_ssize_t i;
4001 int sign; /* 1 if '-', else 0 */
4002 int len; /* number of characters */
4003 Py_ssize_t llen;
4004 int numdigits; /* len == numnondigits + numdigits */
4005 int numnondigits = 0;
4007 switch (type) {
4008 case 'd':
4009 case 'u':
4010 result = Py_TYPE(val)->tp_str(val);
4011 break;
4012 case 'o':
4013 result = Py_TYPE(val)->tp_as_number->nb_oct(val);
4014 break;
4015 case 'x':
4016 case 'X':
4017 numnondigits = 2;
4018 result = Py_TYPE(val)->tp_as_number->nb_hex(val);
4019 break;
4020 default:
4021 assert(!"'type' not in [duoxX]");
4023 if (!result)
4024 return NULL;
4026 buf = PyString_AsString(result);
4027 if (!buf) {
4028 Py_DECREF(result);
4029 return NULL;
4032 /* To modify the string in-place, there can only be one reference. */
4033 if (Py_REFCNT(result) != 1) {
4034 PyErr_BadInternalCall();
4035 return NULL;
4037 llen = PyString_Size(result);
4038 if (llen > INT_MAX) {
4039 PyErr_SetString(PyExc_ValueError, "string too large in _PyString_FormatLong");
4040 return NULL;
4042 len = (int)llen;
4043 if (buf[len-1] == 'L') {
4044 --len;
4045 buf[len] = '\0';
4047 sign = buf[0] == '-';
4048 numnondigits += sign;
4049 numdigits = len - numnondigits;
4050 assert(numdigits > 0);
4052 /* Get rid of base marker unless F_ALT */
4053 if ((flags & F_ALT) == 0) {
4054 /* Need to skip 0x, 0X or 0. */
4055 int skipped = 0;
4056 switch (type) {
4057 case 'o':
4058 assert(buf[sign] == '0');
4059 /* If 0 is only digit, leave it alone. */
4060 if (numdigits > 1) {
4061 skipped = 1;
4062 --numdigits;
4064 break;
4065 case 'x':
4066 case 'X':
4067 assert(buf[sign] == '0');
4068 assert(buf[sign + 1] == 'x');
4069 skipped = 2;
4070 numnondigits -= 2;
4071 break;
4073 if (skipped) {
4074 buf += skipped;
4075 len -= skipped;
4076 if (sign)
4077 buf[0] = '-';
4079 assert(len == numnondigits + numdigits);
4080 assert(numdigits > 0);
4083 /* Fill with leading zeroes to meet minimum width. */
4084 if (prec > numdigits) {
4085 PyObject *r1 = PyString_FromStringAndSize(NULL,
4086 numnondigits + prec);
4087 char *b1;
4088 if (!r1) {
4089 Py_DECREF(result);
4090 return NULL;
4092 b1 = PyString_AS_STRING(r1);
4093 for (i = 0; i < numnondigits; ++i)
4094 *b1++ = *buf++;
4095 for (i = 0; i < prec - numdigits; i++)
4096 *b1++ = '0';
4097 for (i = 0; i < numdigits; i++)
4098 *b1++ = *buf++;
4099 *b1 = '\0';
4100 Py_DECREF(result);
4101 result = r1;
4102 buf = PyString_AS_STRING(result);
4103 len = numnondigits + prec;
4106 /* Fix up case for hex conversions. */
4107 if (type == 'X') {
4108 /* Need to convert all lower case letters to upper case.
4109 and need to convert 0x to 0X (and -0x to -0X). */
4110 for (i = 0; i < len; i++)
4111 if (buf[i] >= 'a' && buf[i] <= 'x')
4112 buf[i] -= 'a'-'A';
4114 *pbuf = buf;
4115 *plen = len;
4116 return result;
4119 Py_LOCAL_INLINE(int)
4120 formatint(char *buf, size_t buflen, int flags,
4121 int prec, int type, PyObject *v)
4123 /* fmt = '%#.' + `prec` + 'l' + `type`
4124 worst case length = 3 + 19 (worst len of INT_MAX on 64-bit machine)
4125 + 1 + 1 = 24 */
4126 char fmt[64]; /* plenty big enough! */
4127 char *sign;
4128 long x;
4130 x = PyInt_AsLong(v);
4131 if (x == -1 && PyErr_Occurred()) {
4132 PyErr_Format(PyExc_TypeError, "int argument required, not %.200s",
4133 Py_TYPE(v)->tp_name);
4134 return -1;
4136 if (x < 0 && type == 'u') {
4137 type = 'd';
4139 if (x < 0 && (type == 'x' || type == 'X' || type == 'o'))
4140 sign = "-";
4141 else
4142 sign = "";
4143 if (prec < 0)
4144 prec = 1;
4146 if ((flags & F_ALT) &&
4147 (type == 'x' || type == 'X')) {
4148 /* When converting under %#x or %#X, there are a number
4149 * of issues that cause pain:
4150 * - when 0 is being converted, the C standard leaves off
4151 * the '0x' or '0X', which is inconsistent with other
4152 * %#x/%#X conversions and inconsistent with Python's
4153 * hex() function
4154 * - there are platforms that violate the standard and
4155 * convert 0 with the '0x' or '0X'
4156 * (Metrowerks, Compaq Tru64)
4157 * - there are platforms that give '0x' when converting
4158 * under %#X, but convert 0 in accordance with the
4159 * standard (OS/2 EMX)
4161 * We can achieve the desired consistency by inserting our
4162 * own '0x' or '0X' prefix, and substituting %x/%X in place
4163 * of %#x/%#X.
4165 * Note that this is the same approach as used in
4166 * formatint() in unicodeobject.c
4168 PyOS_snprintf(fmt, sizeof(fmt), "%s0%c%%.%dl%c",
4169 sign, type, prec, type);
4171 else {
4172 PyOS_snprintf(fmt, sizeof(fmt), "%s%%%s.%dl%c",
4173 sign, (flags&F_ALT) ? "#" : "",
4174 prec, type);
4177 /* buf = '+'/'-'/'' + '0'/'0x'/'' + '[0-9]'*max(prec, len(x in octal))
4178 * worst case buf = '-0x' + [0-9]*prec, where prec >= 11
4180 if (buflen <= 14 || buflen <= (size_t)3 + (size_t)prec) {
4181 PyErr_SetString(PyExc_OverflowError,
4182 "formatted integer is too long (precision too large?)");
4183 return -1;
4185 if (sign[0])
4186 PyOS_snprintf(buf, buflen, fmt, -x);
4187 else
4188 PyOS_snprintf(buf, buflen, fmt, x);
4189 return (int)strlen(buf);
4192 Py_LOCAL_INLINE(int)
4193 formatchar(char *buf, size_t buflen, PyObject *v)
4195 /* presume that the buffer is at least 2 characters long */
4196 if (PyString_Check(v)) {
4197 if (!PyArg_Parse(v, "c;%c requires int or char", &buf[0]))
4198 return -1;
4200 else {
4201 if (!PyArg_Parse(v, "b;%c requires int or char", &buf[0]))
4202 return -1;
4204 buf[1] = '\0';
4205 return 1;
4208 /* fmt%(v1,v2,...) is roughly equivalent to sprintf(fmt, v1, v2, ...)
4210 FORMATBUFLEN is the length of the buffer in which the ints &
4211 chars are formatted. XXX This is a magic number. Each formatting
4212 routine does bounds checking to ensure no overflow, but a better
4213 solution may be to malloc a buffer of appropriate size for each
4214 format. For now, the current solution is sufficient.
4216 #define FORMATBUFLEN (size_t)120
4218 PyObject *
4219 PyString_Format(PyObject *format, PyObject *args)
4221 char *fmt, *res;
4222 Py_ssize_t arglen, argidx;
4223 Py_ssize_t reslen, rescnt, fmtcnt;
4224 int args_owned = 0;
4225 PyObject *result, *orig_args;
4226 #ifdef Py_USING_UNICODE
4227 PyObject *v, *w;
4228 #endif
4229 PyObject *dict = NULL;
4230 if (format == NULL || !PyString_Check(format) || args == NULL) {
4231 PyErr_BadInternalCall();
4232 return NULL;
4234 orig_args = args;
4235 fmt = PyString_AS_STRING(format);
4236 fmtcnt = PyString_GET_SIZE(format);
4237 reslen = rescnt = fmtcnt + 100;
4238 result = PyString_FromStringAndSize((char *)NULL, reslen);
4239 if (result == NULL)
4240 return NULL;
4241 res = PyString_AsString(result);
4242 if (PyTuple_Check(args)) {
4243 arglen = PyTuple_GET_SIZE(args);
4244 argidx = 0;
4246 else {
4247 arglen = -1;
4248 argidx = -2;
4250 if (Py_TYPE(args)->tp_as_mapping && !PyTuple_Check(args) &&
4251 !PyObject_TypeCheck(args, &PyBaseString_Type))
4252 dict = args;
4253 while (--fmtcnt >= 0) {
4254 if (*fmt != '%') {
4255 if (--rescnt < 0) {
4256 rescnt = fmtcnt + 100;
4257 reslen += rescnt;
4258 if (_PyString_Resize(&result, reslen))
4259 return NULL;
4260 res = PyString_AS_STRING(result)
4261 + reslen - rescnt;
4262 --rescnt;
4264 *res++ = *fmt++;
4266 else {
4267 /* Got a format specifier */
4268 int flags = 0;
4269 Py_ssize_t width = -1;
4270 int prec = -1;
4271 int c = '\0';
4272 int fill;
4273 int isnumok;
4274 PyObject *v = NULL;
4275 PyObject *temp = NULL;
4276 char *pbuf;
4277 int sign;
4278 Py_ssize_t len;
4279 char formatbuf[FORMATBUFLEN];
4280 /* For format{int,char}() */
4281 #ifdef Py_USING_UNICODE
4282 char *fmt_start = fmt;
4283 Py_ssize_t argidx_start = argidx;
4284 #endif
4286 fmt++;
4287 if (*fmt == '(') {
4288 char *keystart;
4289 Py_ssize_t keylen;
4290 PyObject *key;
4291 int pcount = 1;
4293 if (dict == NULL) {
4294 PyErr_SetString(PyExc_TypeError,
4295 "format requires a mapping");
4296 goto error;
4298 ++fmt;
4299 --fmtcnt;
4300 keystart = fmt;
4301 /* Skip over balanced parentheses */
4302 while (pcount > 0 && --fmtcnt >= 0) {
4303 if (*fmt == ')')
4304 --pcount;
4305 else if (*fmt == '(')
4306 ++pcount;
4307 fmt++;
4309 keylen = fmt - keystart - 1;
4310 if (fmtcnt < 0 || pcount > 0) {
4311 PyErr_SetString(PyExc_ValueError,
4312 "incomplete format key");
4313 goto error;
4315 key = PyString_FromStringAndSize(keystart,
4316 keylen);
4317 if (key == NULL)
4318 goto error;
4319 if (args_owned) {
4320 Py_DECREF(args);
4321 args_owned = 0;
4323 args = PyObject_GetItem(dict, key);
4324 Py_DECREF(key);
4325 if (args == NULL) {
4326 goto error;
4328 args_owned = 1;
4329 arglen = -1;
4330 argidx = -2;
4332 while (--fmtcnt >= 0) {
4333 switch (c = *fmt++) {
4334 case '-': flags |= F_LJUST; continue;
4335 case '+': flags |= F_SIGN; continue;
4336 case ' ': flags |= F_BLANK; continue;
4337 case '#': flags |= F_ALT; continue;
4338 case '0': flags |= F_ZERO; continue;
4340 break;
4342 if (c == '*') {
4343 v = getnextarg(args, arglen, &argidx);
4344 if (v == NULL)
4345 goto error;
4346 if (!PyInt_Check(v)) {
4347 PyErr_SetString(PyExc_TypeError,
4348 "* wants int");
4349 goto error;
4351 width = PyInt_AsLong(v);
4352 if (width < 0) {
4353 flags |= F_LJUST;
4354 width = -width;
4356 if (--fmtcnt >= 0)
4357 c = *fmt++;
4359 else if (c >= 0 && isdigit(c)) {
4360 width = c - '0';
4361 while (--fmtcnt >= 0) {
4362 c = Py_CHARMASK(*fmt++);
4363 if (!isdigit(c))
4364 break;
4365 if ((width*10) / 10 != width) {
4366 PyErr_SetString(
4367 PyExc_ValueError,
4368 "width too big");
4369 goto error;
4371 width = width*10 + (c - '0');
4374 if (c == '.') {
4375 prec = 0;
4376 if (--fmtcnt >= 0)
4377 c = *fmt++;
4378 if (c == '*') {
4379 v = getnextarg(args, arglen, &argidx);
4380 if (v == NULL)
4381 goto error;
4382 if (!PyInt_Check(v)) {
4383 PyErr_SetString(
4384 PyExc_TypeError,
4385 "* wants int");
4386 goto error;
4388 prec = PyInt_AsLong(v);
4389 if (prec < 0)
4390 prec = 0;
4391 if (--fmtcnt >= 0)
4392 c = *fmt++;
4394 else if (c >= 0 && isdigit(c)) {
4395 prec = c - '0';
4396 while (--fmtcnt >= 0) {
4397 c = Py_CHARMASK(*fmt++);
4398 if (!isdigit(c))
4399 break;
4400 if ((prec*10) / 10 != prec) {
4401 PyErr_SetString(
4402 PyExc_ValueError,
4403 "prec too big");
4404 goto error;
4406 prec = prec*10 + (c - '0');
4409 } /* prec */
4410 if (fmtcnt >= 0) {
4411 if (c == 'h' || c == 'l' || c == 'L') {
4412 if (--fmtcnt >= 0)
4413 c = *fmt++;
4416 if (fmtcnt < 0) {
4417 PyErr_SetString(PyExc_ValueError,
4418 "incomplete format");
4419 goto error;
4421 if (c != '%') {
4422 v = getnextarg(args, arglen, &argidx);
4423 if (v == NULL)
4424 goto error;
4426 sign = 0;
4427 fill = ' ';
4428 switch (c) {
4429 case '%':
4430 pbuf = "%";
4431 len = 1;
4432 break;
4433 case 's':
4434 #ifdef Py_USING_UNICODE
4435 if (PyUnicode_Check(v)) {
4436 fmt = fmt_start;
4437 argidx = argidx_start;
4438 goto unicode;
4440 #endif
4441 temp = _PyObject_Str(v);
4442 #ifdef Py_USING_UNICODE
4443 if (temp != NULL && PyUnicode_Check(temp)) {
4444 Py_DECREF(temp);
4445 fmt = fmt_start;
4446 argidx = argidx_start;
4447 goto unicode;
4449 #endif
4450 /* Fall through */
4451 case 'r':
4452 if (c == 'r')
4453 temp = PyObject_Repr(v);
4454 if (temp == NULL)
4455 goto error;
4456 if (!PyString_Check(temp)) {
4457 PyErr_SetString(PyExc_TypeError,
4458 "%s argument has non-string str()");
4459 Py_DECREF(temp);
4460 goto error;
4462 pbuf = PyString_AS_STRING(temp);
4463 len = PyString_GET_SIZE(temp);
4464 if (prec >= 0 && len > prec)
4465 len = prec;
4466 break;
4467 case 'i':
4468 case 'd':
4469 case 'u':
4470 case 'o':
4471 case 'x':
4472 case 'X':
4473 if (c == 'i')
4474 c = 'd';
4475 isnumok = 0;
4476 if (PyNumber_Check(v)) {
4477 PyObject *iobj=NULL;
4479 if (PyInt_Check(v) || (PyLong_Check(v))) {
4480 iobj = v;
4481 Py_INCREF(iobj);
4483 else {
4484 iobj = PyNumber_Int(v);
4485 if (iobj==NULL) iobj = PyNumber_Long(v);
4487 if (iobj!=NULL) {
4488 if (PyInt_Check(iobj)) {
4489 isnumok = 1;
4490 pbuf = formatbuf;
4491 len = formatint(pbuf,
4492 sizeof(formatbuf),
4493 flags, prec, c, iobj);
4494 Py_DECREF(iobj);
4495 if (len < 0)
4496 goto error;
4497 sign = 1;
4499 else if (PyLong_Check(iobj)) {
4500 int ilen;
4502 isnumok = 1;
4503 temp = _PyString_FormatLong(iobj, flags,
4504 prec, c, &pbuf, &ilen);
4505 Py_DECREF(iobj);
4506 len = ilen;
4507 if (!temp)
4508 goto error;
4509 sign = 1;
4511 else {
4512 Py_DECREF(iobj);
4516 if (!isnumok) {
4517 PyErr_Format(PyExc_TypeError,
4518 "%%%c format: a number is required, "
4519 "not %.200s", c, Py_TYPE(v)->tp_name);
4520 goto error;
4522 if (flags & F_ZERO)
4523 fill = '0';
4524 break;
4525 case 'e':
4526 case 'E':
4527 case 'f':
4528 case 'F':
4529 case 'g':
4530 case 'G':
4531 temp = formatfloat(v, flags, prec, c);
4532 if (temp == NULL)
4533 goto error;
4534 pbuf = PyString_AS_STRING(temp);
4535 len = PyString_GET_SIZE(temp);
4536 sign = 1;
4537 if (flags & F_ZERO)
4538 fill = '0';
4539 break;
4540 case 'c':
4541 #ifdef Py_USING_UNICODE
4542 if (PyUnicode_Check(v)) {
4543 fmt = fmt_start;
4544 argidx = argidx_start;
4545 goto unicode;
4547 #endif
4548 pbuf = formatbuf;
4549 len = formatchar(pbuf, sizeof(formatbuf), v);
4550 if (len < 0)
4551 goto error;
4552 break;
4553 default:
4554 PyErr_Format(PyExc_ValueError,
4555 "unsupported format character '%c' (0x%x) "
4556 "at index %zd",
4557 c, c,
4558 (Py_ssize_t)(fmt - 1 -
4559 PyString_AsString(format)));
4560 goto error;
4562 if (sign) {
4563 if (*pbuf == '-' || *pbuf == '+') {
4564 sign = *pbuf++;
4565 len--;
4567 else if (flags & F_SIGN)
4568 sign = '+';
4569 else if (flags & F_BLANK)
4570 sign = ' ';
4571 else
4572 sign = 0;
4574 if (width < len)
4575 width = len;
4576 if (rescnt - (sign != 0) < width) {
4577 reslen -= rescnt;
4578 rescnt = width + fmtcnt + 100;
4579 reslen += rescnt;
4580 if (reslen < 0) {
4581 Py_DECREF(result);
4582 Py_XDECREF(temp);
4583 return PyErr_NoMemory();
4585 if (_PyString_Resize(&result, reslen)) {
4586 Py_XDECREF(temp);
4587 return NULL;
4589 res = PyString_AS_STRING(result)
4590 + reslen - rescnt;
4592 if (sign) {
4593 if (fill != ' ')
4594 *res++ = sign;
4595 rescnt--;
4596 if (width > len)
4597 width--;
4599 if ((flags & F_ALT) && (c == 'x' || c == 'X')) {
4600 assert(pbuf[0] == '0');
4601 assert(pbuf[1] == c);
4602 if (fill != ' ') {
4603 *res++ = *pbuf++;
4604 *res++ = *pbuf++;
4606 rescnt -= 2;
4607 width -= 2;
4608 if (width < 0)
4609 width = 0;
4610 len -= 2;
4612 if (width > len && !(flags & F_LJUST)) {
4613 do {
4614 --rescnt;
4615 *res++ = fill;
4616 } while (--width > len);
4618 if (fill == ' ') {
4619 if (sign)
4620 *res++ = sign;
4621 if ((flags & F_ALT) &&
4622 (c == 'x' || c == 'X')) {
4623 assert(pbuf[0] == '0');
4624 assert(pbuf[1] == c);
4625 *res++ = *pbuf++;
4626 *res++ = *pbuf++;
4629 Py_MEMCPY(res, pbuf, len);
4630 res += len;
4631 rescnt -= len;
4632 while (--width >= len) {
4633 --rescnt;
4634 *res++ = ' ';
4636 if (dict && (argidx < arglen) && c != '%') {
4637 PyErr_SetString(PyExc_TypeError,
4638 "not all arguments converted during string formatting");
4639 Py_XDECREF(temp);
4640 goto error;
4642 Py_XDECREF(temp);
4643 } /* '%' */
4644 } /* until end */
4645 if (argidx < arglen && !dict) {
4646 PyErr_SetString(PyExc_TypeError,
4647 "not all arguments converted during string formatting");
4648 goto error;
4650 if (args_owned) {
4651 Py_DECREF(args);
4653 if (_PyString_Resize(&result, reslen - rescnt))
4654 return NULL;
4655 return result;
4657 #ifdef Py_USING_UNICODE
4658 unicode:
4659 if (args_owned) {
4660 Py_DECREF(args);
4661 args_owned = 0;
4663 /* Fiddle args right (remove the first argidx arguments) */
4664 if (PyTuple_Check(orig_args) && argidx > 0) {
4665 PyObject *v;
4666 Py_ssize_t n = PyTuple_GET_SIZE(orig_args) - argidx;
4667 v = PyTuple_New(n);
4668 if (v == NULL)
4669 goto error;
4670 while (--n >= 0) {
4671 PyObject *w = PyTuple_GET_ITEM(orig_args, n + argidx);
4672 Py_INCREF(w);
4673 PyTuple_SET_ITEM(v, n, w);
4675 args = v;
4676 } else {
4677 Py_INCREF(orig_args);
4678 args = orig_args;
4680 args_owned = 1;
4681 /* Take what we have of the result and let the Unicode formatting
4682 function format the rest of the input. */
4683 rescnt = res - PyString_AS_STRING(result);
4684 if (_PyString_Resize(&result, rescnt))
4685 goto error;
4686 fmtcnt = PyString_GET_SIZE(format) - \
4687 (fmt - PyString_AS_STRING(format));
4688 format = PyUnicode_Decode(fmt, fmtcnt, NULL, NULL);
4689 if (format == NULL)
4690 goto error;
4691 v = PyUnicode_Format(format, args);
4692 Py_DECREF(format);
4693 if (v == NULL)
4694 goto error;
4695 /* Paste what we have (result) to what the Unicode formatting
4696 function returned (v) and return the result (or error) */
4697 w = PyUnicode_Concat(result, v);
4698 Py_DECREF(result);
4699 Py_DECREF(v);
4700 Py_DECREF(args);
4701 return w;
4702 #endif /* Py_USING_UNICODE */
4704 error:
4705 Py_DECREF(result);
4706 if (args_owned) {
4707 Py_DECREF(args);
4709 return NULL;
4712 void
4713 PyString_InternInPlace(PyObject **p)
4715 register PyStringObject *s = (PyStringObject *)(*p);
4716 PyObject *t;
4717 if (s == NULL || !PyString_Check(s))
4718 Py_FatalError("PyString_InternInPlace: strings only please!");
4719 /* If it's a string subclass, we don't really know what putting
4720 it in the interned dict might do. */
4721 if (!PyString_CheckExact(s))
4722 return;
4723 if (PyString_CHECK_INTERNED(s))
4724 return;
4725 if (interned == NULL) {
4726 interned = PyDict_New();
4727 if (interned == NULL) {
4728 PyErr_Clear(); /* Don't leave an exception */
4729 return;
4732 t = PyDict_GetItem(interned, (PyObject *)s);
4733 if (t) {
4734 Py_INCREF(t);
4735 Py_DECREF(*p);
4736 *p = t;
4737 return;
4740 if (PyDict_SetItem(interned, (PyObject *)s, (PyObject *)s) < 0) {
4741 PyErr_Clear();
4742 return;
4744 /* The two references in interned are not counted by refcnt.
4745 The string deallocator will take care of this */
4746 Py_REFCNT(s) -= 2;
4747 PyString_CHECK_INTERNED(s) = SSTATE_INTERNED_MORTAL;
4750 void
4751 PyString_InternImmortal(PyObject **p)
4753 PyString_InternInPlace(p);
4754 if (PyString_CHECK_INTERNED(*p) != SSTATE_INTERNED_IMMORTAL) {
4755 PyString_CHECK_INTERNED(*p) = SSTATE_INTERNED_IMMORTAL;
4756 Py_INCREF(*p);
4761 PyObject *
4762 PyString_InternFromString(const char *cp)
4764 PyObject *s = PyString_FromString(cp);
4765 if (s == NULL)
4766 return NULL;
4767 PyString_InternInPlace(&s);
4768 return s;
4771 void
4772 PyString_Fini(void)
4774 int i;
4775 for (i = 0; i < UCHAR_MAX + 1; i++) {
4776 Py_XDECREF(characters[i]);
4777 characters[i] = NULL;
4779 Py_XDECREF(nullstring);
4780 nullstring = NULL;
4783 void _Py_ReleaseInternedStrings(void)
4785 PyObject *keys;
4786 PyStringObject *s;
4787 Py_ssize_t i, n;
4788 Py_ssize_t immortal_size = 0, mortal_size = 0;
4790 if (interned == NULL || !PyDict_Check(interned))
4791 return;
4792 keys = PyDict_Keys(interned);
4793 if (keys == NULL || !PyList_Check(keys)) {
4794 PyErr_Clear();
4795 return;
4798 /* Since _Py_ReleaseInternedStrings() is intended to help a leak
4799 detector, interned strings are not forcibly deallocated; rather, we
4800 give them their stolen references back, and then clear and DECREF
4801 the interned dict. */
4803 n = PyList_GET_SIZE(keys);
4804 fprintf(stderr, "releasing %" PY_FORMAT_SIZE_T "d interned strings\n",
4806 for (i = 0; i < n; i++) {
4807 s = (PyStringObject *) PyList_GET_ITEM(keys, i);
4808 switch (s->ob_sstate) {
4809 case SSTATE_NOT_INTERNED:
4810 /* XXX Shouldn't happen */
4811 break;
4812 case SSTATE_INTERNED_IMMORTAL:
4813 Py_REFCNT(s) += 1;
4814 immortal_size += Py_SIZE(s);
4815 break;
4816 case SSTATE_INTERNED_MORTAL:
4817 Py_REFCNT(s) += 2;
4818 mortal_size += Py_SIZE(s);
4819 break;
4820 default:
4821 Py_FatalError("Inconsistent interned string state.");
4823 s->ob_sstate = SSTATE_NOT_INTERNED;
4825 fprintf(stderr, "total size of all interned strings: "
4826 "%" PY_FORMAT_SIZE_T "d/%" PY_FORMAT_SIZE_T "d "
4827 "mortal/immortal\n", mortal_size, immortal_size);
4828 Py_DECREF(keys);
4829 PyDict_Clear(interned);
4830 Py_DECREF(interned);
4831 interned = NULL;