More installation info. Bump alpha version.
[python/dscho.git] / Objects / stringobject.c
blob748592ea50761818a9c277ed6f77af004dfec0d7
1 /* String object implementation */
3 #include "Python.h"
5 #include <ctype.h>
7 #ifdef COUNT_ALLOCS
8 int null_strings, one_strings;
9 #endif
11 #if !defined(HAVE_LIMITS_H) && !defined(UCHAR_MAX)
12 #define UCHAR_MAX 255
13 #endif
15 static PyStringObject *characters[UCHAR_MAX + 1];
16 static PyStringObject *nullstring;
18 /* This dictionary holds all interned strings. Note that references to
19 strings in this dictionary are *not* counted in the string's ob_refcnt.
20 When the interned string reaches a refcnt of 0 the string deallocation
21 function will delete the reference from this dictionary.
23 Another way to look at this is that to say that the actual reference
24 count of a string is: s->ob_refcnt + (s->ob_sstate?2:0)
26 static PyObject *interned;
30 For both PyString_FromString() and PyString_FromStringAndSize(), the
31 parameter `size' denotes number of characters to allocate, not counting any
32 null terminating character.
34 For PyString_FromString(), the parameter `str' points to a null-terminated
35 string containing exactly `size' bytes.
37 For PyString_FromStringAndSize(), the parameter the parameter `str' is
38 either NULL or else points to a string containing at least `size' bytes.
39 For PyString_FromStringAndSize(), the string in the `str' parameter does
40 not have to be null-terminated. (Therefore it is safe to construct a
41 substring by calling `PyString_FromStringAndSize(origstring, substrlen)'.)
42 If `str' is NULL then PyString_FromStringAndSize() will allocate `size+1'
43 bytes (setting the last byte to the null terminating character) and you can
44 fill in the data yourself. If `str' is non-NULL then the resulting
45 PyString object must be treated as immutable and you must not fill in nor
46 alter the data yourself, since the strings may be shared.
48 The PyObject member `op->ob_size', which denotes the number of "extra
49 items" in a variable-size object, will contain the number of bytes
50 allocated for string data, not counting the null terminating character. It
51 is therefore equal to the equal to the `size' parameter (for
52 PyString_FromStringAndSize()) or the length of the string in the `str'
53 parameter (for PyString_FromString()).
55 PyObject *
56 PyString_FromStringAndSize(const char *str, int size)
58 register PyStringObject *op;
59 if (size == 0 && (op = nullstring) != NULL) {
60 #ifdef COUNT_ALLOCS
61 null_strings++;
62 #endif
63 Py_INCREF(op);
64 return (PyObject *)op;
66 if (size == 1 && str != NULL &&
67 (op = characters[*str & UCHAR_MAX]) != NULL)
69 #ifdef COUNT_ALLOCS
70 one_strings++;
71 #endif
72 Py_INCREF(op);
73 return (PyObject *)op;
76 /* Inline PyObject_NewVar */
77 op = (PyStringObject *)
78 PyObject_MALLOC(sizeof(PyStringObject) + size * sizeof(char));
79 if (op == NULL)
80 return PyErr_NoMemory();
81 PyObject_INIT_VAR(op, &PyString_Type, size);
82 op->ob_shash = -1;
83 op->ob_sstate = SSTATE_NOT_INTERNED;
84 if (str != NULL)
85 memcpy(op->ob_sval, str, size);
86 op->ob_sval[size] = '\0';
87 /* share short strings */
88 if (size == 0) {
89 PyObject *t = (PyObject *)op;
90 PyString_InternInPlace(&t);
91 op = (PyStringObject *)t;
92 nullstring = op;
93 Py_INCREF(op);
94 } else if (size == 1 && str != NULL) {
95 PyObject *t = (PyObject *)op;
96 PyString_InternInPlace(&t);
97 op = (PyStringObject *)t;
98 characters[*str & UCHAR_MAX] = op;
99 Py_INCREF(op);
101 return (PyObject *) op;
104 PyObject *
105 PyString_FromString(const char *str)
107 register size_t size;
108 register PyStringObject *op;
110 assert(str != NULL);
111 size = strlen(str);
112 if (size > INT_MAX) {
113 PyErr_SetString(PyExc_OverflowError,
114 "string is too long for a Python string");
115 return NULL;
117 if (size == 0 && (op = nullstring) != NULL) {
118 #ifdef COUNT_ALLOCS
119 null_strings++;
120 #endif
121 Py_INCREF(op);
122 return (PyObject *)op;
124 if (size == 1 && (op = characters[*str & UCHAR_MAX]) != NULL) {
125 #ifdef COUNT_ALLOCS
126 one_strings++;
127 #endif
128 Py_INCREF(op);
129 return (PyObject *)op;
132 /* Inline PyObject_NewVar */
133 op = (PyStringObject *)
134 PyObject_MALLOC(sizeof(PyStringObject) + size * sizeof(char));
135 if (op == NULL)
136 return PyErr_NoMemory();
137 PyObject_INIT_VAR(op, &PyString_Type, size);
138 op->ob_shash = -1;
139 op->ob_sstate = SSTATE_NOT_INTERNED;
140 memcpy(op->ob_sval, str, size+1);
141 /* share short strings */
142 if (size == 0) {
143 PyObject *t = (PyObject *)op;
144 PyString_InternInPlace(&t);
145 op = (PyStringObject *)t;
146 nullstring = op;
147 Py_INCREF(op);
148 } else if (size == 1) {
149 PyObject *t = (PyObject *)op;
150 PyString_InternInPlace(&t);
151 op = (PyStringObject *)t;
152 characters[*str & UCHAR_MAX] = op;
153 Py_INCREF(op);
155 return (PyObject *) op;
158 PyObject *
159 PyString_FromFormatV(const char *format, va_list vargs)
161 va_list count;
162 int n = 0;
163 const char* f;
164 char *s;
165 PyObject* string;
167 #ifdef VA_LIST_IS_ARRAY
168 memcpy(count, vargs, sizeof(va_list));
169 #else
170 #ifdef __va_copy
171 __va_copy(count, vargs);
172 #else
173 count = vargs;
174 #endif
175 #endif
176 /* step 1: figure out how large a buffer we need */
177 for (f = format; *f; f++) {
178 if (*f == '%') {
179 const char* p = f;
180 while (*++f && *f != '%' && !isalpha(Py_CHARMASK(*f)))
183 /* skip the 'l' in %ld, since it doesn't change the
184 width. although only %d is supported (see
185 "expand" section below), others can be easily
186 added */
187 if (*f == 'l' && *(f+1) == 'd')
188 ++f;
190 switch (*f) {
191 case 'c':
192 (void)va_arg(count, int);
193 /* fall through... */
194 case '%':
195 n++;
196 break;
197 case 'd': case 'i': case 'x':
198 (void) va_arg(count, int);
199 /* 20 bytes is enough to hold a 64-bit
200 integer. Decimal takes the most space.
201 This isn't enough for octal. */
202 n += 20;
203 break;
204 case 's':
205 s = va_arg(count, char*);
206 n += strlen(s);
207 break;
208 case 'p':
209 (void) va_arg(count, int);
210 /* maximum 64-bit pointer representation:
211 * 0xffffffffffffffff
212 * so 19 characters is enough.
213 * XXX I count 18 -- what's the extra for?
215 n += 19;
216 break;
217 default:
218 /* if we stumble upon an unknown
219 formatting code, copy the rest of
220 the format string to the output
221 string. (we cannot just skip the
222 code, since there's no way to know
223 what's in the argument list) */
224 n += strlen(p);
225 goto expand;
227 } else
228 n++;
230 expand:
231 /* step 2: fill the buffer */
232 /* Since we've analyzed how much space we need for the worst case,
233 use sprintf directly instead of the slower PyOS_snprintf. */
234 string = PyString_FromStringAndSize(NULL, n);
235 if (!string)
236 return NULL;
238 s = PyString_AsString(string);
240 for (f = format; *f; f++) {
241 if (*f == '%') {
242 const char* p = f++;
243 int i, longflag = 0;
244 /* parse the width.precision part (we're only
245 interested in the precision value, if any) */
246 n = 0;
247 while (isdigit(Py_CHARMASK(*f)))
248 n = (n*10) + *f++ - '0';
249 if (*f == '.') {
250 f++;
251 n = 0;
252 while (isdigit(Py_CHARMASK(*f)))
253 n = (n*10) + *f++ - '0';
255 while (*f && *f != '%' && !isalpha(Py_CHARMASK(*f)))
256 f++;
257 /* handle the long flag, but only for %ld. others
258 can be added when necessary. */
259 if (*f == 'l' && *(f+1) == 'd') {
260 longflag = 1;
261 ++f;
264 switch (*f) {
265 case 'c':
266 *s++ = va_arg(vargs, int);
267 break;
268 case 'd':
269 if (longflag)
270 sprintf(s, "%ld", va_arg(vargs, long));
271 else
272 sprintf(s, "%d", va_arg(vargs, int));
273 s += strlen(s);
274 break;
275 case 'i':
276 sprintf(s, "%i", va_arg(vargs, int));
277 s += strlen(s);
278 break;
279 case 'x':
280 sprintf(s, "%x", va_arg(vargs, int));
281 s += strlen(s);
282 break;
283 case 's':
284 p = va_arg(vargs, char*);
285 i = strlen(p);
286 if (n > 0 && i > n)
287 i = n;
288 memcpy(s, p, i);
289 s += i;
290 break;
291 case 'p':
292 sprintf(s, "%p", va_arg(vargs, void*));
293 /* %p is ill-defined: ensure leading 0x. */
294 if (s[1] == 'X')
295 s[1] = 'x';
296 else if (s[1] != 'x') {
297 memmove(s+2, s, strlen(s)+1);
298 s[0] = '0';
299 s[1] = 'x';
301 s += strlen(s);
302 break;
303 case '%':
304 *s++ = '%';
305 break;
306 default:
307 strcpy(s, p);
308 s += strlen(s);
309 goto end;
311 } else
312 *s++ = *f;
315 end:
316 _PyString_Resize(&string, s - PyString_AS_STRING(string));
317 return string;
320 PyObject *
321 PyString_FromFormat(const char *format, ...)
323 PyObject* ret;
324 va_list vargs;
326 #ifdef HAVE_STDARG_PROTOTYPES
327 va_start(vargs, format);
328 #else
329 va_start(vargs);
330 #endif
331 ret = PyString_FromFormatV(format, vargs);
332 va_end(vargs);
333 return ret;
337 PyObject *PyString_Decode(const char *s,
338 int size,
339 const char *encoding,
340 const char *errors)
342 PyObject *v, *str;
344 str = PyString_FromStringAndSize(s, size);
345 if (str == NULL)
346 return NULL;
347 v = PyString_AsDecodedString(str, encoding, errors);
348 Py_DECREF(str);
349 return v;
352 PyObject *PyString_AsDecodedObject(PyObject *str,
353 const char *encoding,
354 const char *errors)
356 PyObject *v;
358 if (!PyString_Check(str)) {
359 PyErr_BadArgument();
360 goto onError;
363 if (encoding == NULL) {
364 #ifdef Py_USING_UNICODE
365 encoding = PyUnicode_GetDefaultEncoding();
366 #else
367 PyErr_SetString(PyExc_ValueError, "no encoding specified");
368 goto onError;
369 #endif
372 /* Decode via the codec registry */
373 v = PyCodec_Decode(str, encoding, errors);
374 if (v == NULL)
375 goto onError;
377 return v;
379 onError:
380 return NULL;
383 PyObject *PyString_AsDecodedString(PyObject *str,
384 const char *encoding,
385 const char *errors)
387 PyObject *v;
389 v = PyString_AsDecodedObject(str, encoding, errors);
390 if (v == NULL)
391 goto onError;
393 #ifdef Py_USING_UNICODE
394 /* Convert Unicode to a string using the default encoding */
395 if (PyUnicode_Check(v)) {
396 PyObject *temp = v;
397 v = PyUnicode_AsEncodedString(v, NULL, NULL);
398 Py_DECREF(temp);
399 if (v == NULL)
400 goto onError;
402 #endif
403 if (!PyString_Check(v)) {
404 PyErr_Format(PyExc_TypeError,
405 "decoder did not return a string object (type=%.400s)",
406 v->ob_type->tp_name);
407 Py_DECREF(v);
408 goto onError;
411 return v;
413 onError:
414 return NULL;
417 PyObject *PyString_Encode(const char *s,
418 int size,
419 const char *encoding,
420 const char *errors)
422 PyObject *v, *str;
424 str = PyString_FromStringAndSize(s, size);
425 if (str == NULL)
426 return NULL;
427 v = PyString_AsEncodedString(str, encoding, errors);
428 Py_DECREF(str);
429 return v;
432 PyObject *PyString_AsEncodedObject(PyObject *str,
433 const char *encoding,
434 const char *errors)
436 PyObject *v;
438 if (!PyString_Check(str)) {
439 PyErr_BadArgument();
440 goto onError;
443 if (encoding == NULL) {
444 #ifdef Py_USING_UNICODE
445 encoding = PyUnicode_GetDefaultEncoding();
446 #else
447 PyErr_SetString(PyExc_ValueError, "no encoding specified");
448 goto onError;
449 #endif
452 /* Encode via the codec registry */
453 v = PyCodec_Encode(str, encoding, errors);
454 if (v == NULL)
455 goto onError;
457 return v;
459 onError:
460 return NULL;
463 PyObject *PyString_AsEncodedString(PyObject *str,
464 const char *encoding,
465 const char *errors)
467 PyObject *v;
469 v = PyString_AsEncodedObject(str, encoding, errors);
470 if (v == NULL)
471 goto onError;
473 #ifdef Py_USING_UNICODE
474 /* Convert Unicode to a string using the default encoding */
475 if (PyUnicode_Check(v)) {
476 PyObject *temp = v;
477 v = PyUnicode_AsEncodedString(v, NULL, NULL);
478 Py_DECREF(temp);
479 if (v == NULL)
480 goto onError;
482 #endif
483 if (!PyString_Check(v)) {
484 PyErr_Format(PyExc_TypeError,
485 "encoder did not return a string object (type=%.400s)",
486 v->ob_type->tp_name);
487 Py_DECREF(v);
488 goto onError;
491 return v;
493 onError:
494 return NULL;
497 static void
498 string_dealloc(PyObject *op)
500 switch (PyString_CHECK_INTERNED(op)) {
501 case SSTATE_NOT_INTERNED:
502 break;
504 case SSTATE_INTERNED_MORTAL:
505 /* revive dead object temporarily for DelItem */
506 op->ob_refcnt = 3;
507 if (PyDict_DelItem(interned, op) != 0)
508 Py_FatalError(
509 "deletion of interned string failed");
510 break;
512 case SSTATE_INTERNED_IMMORTAL:
513 Py_FatalError("Immortal interned string died.");
515 default:
516 Py_FatalError("Inconsistent interned string state.");
518 op->ob_type->tp_free(op);
521 /* Unescape a backslash-escaped string. If unicode is non-zero,
522 the string is a u-literal. If recode_encoding is non-zero,
523 the string is UTF-8 encoded and should be re-encoded in the
524 specified encoding. */
526 PyObject *PyString_DecodeEscape(const char *s,
527 int len,
528 const char *errors,
529 int unicode,
530 const char *recode_encoding)
532 int c;
533 char *p, *buf;
534 const char *end;
535 PyObject *v;
536 int newlen = recode_encoding ? 4*len:len;
537 v = PyString_FromStringAndSize((char *)NULL, newlen);
538 if (v == NULL)
539 return NULL;
540 p = buf = PyString_AsString(v);
541 end = s + len;
542 while (s < end) {
543 if (*s != '\\') {
544 non_esc:
545 #ifdef Py_USING_UNICODE
546 if (recode_encoding && (*s & 0x80)) {
547 PyObject *u, *w;
548 char *r;
549 const char* t;
550 int rn;
551 t = s;
552 /* Decode non-ASCII bytes as UTF-8. */
553 while (t < end && (*t & 0x80)) t++;
554 u = PyUnicode_DecodeUTF8(s, t - s, errors);
555 if(!u) goto failed;
557 /* Recode them in target encoding. */
558 w = PyUnicode_AsEncodedString(
559 u, recode_encoding, errors);
560 Py_DECREF(u);
561 if (!w) goto failed;
563 /* Append bytes to output buffer. */
564 r = PyString_AsString(w);
565 rn = PyString_Size(w);
566 memcpy(p, r, rn);
567 p += rn;
568 Py_DECREF(w);
569 s = t;
570 } else {
571 *p++ = *s++;
573 #else
574 *p++ = *s++;
575 #endif
576 continue;
578 s++;
579 if (s==end) {
580 PyErr_SetString(PyExc_ValueError,
581 "Trailing \\ in string");
582 goto failed;
584 switch (*s++) {
585 /* XXX This assumes ASCII! */
586 case '\n': break;
587 case '\\': *p++ = '\\'; break;
588 case '\'': *p++ = '\''; break;
589 case '\"': *p++ = '\"'; break;
590 case 'b': *p++ = '\b'; break;
591 case 'f': *p++ = '\014'; break; /* FF */
592 case 't': *p++ = '\t'; break;
593 case 'n': *p++ = '\n'; break;
594 case 'r': *p++ = '\r'; break;
595 case 'v': *p++ = '\013'; break; /* VT */
596 case 'a': *p++ = '\007'; break; /* BEL, not classic C */
597 case '0': case '1': case '2': case '3':
598 case '4': case '5': case '6': case '7':
599 c = s[-1] - '0';
600 if ('0' <= *s && *s <= '7') {
601 c = (c<<3) + *s++ - '0';
602 if ('0' <= *s && *s <= '7')
603 c = (c<<3) + *s++ - '0';
605 *p++ = c;
606 break;
607 case 'x':
608 if (isxdigit(Py_CHARMASK(s[0]))
609 && isxdigit(Py_CHARMASK(s[1]))) {
610 unsigned int x = 0;
611 c = Py_CHARMASK(*s);
612 s++;
613 if (isdigit(c))
614 x = c - '0';
615 else if (islower(c))
616 x = 10 + c - 'a';
617 else
618 x = 10 + c - 'A';
619 x = x << 4;
620 c = Py_CHARMASK(*s);
621 s++;
622 if (isdigit(c))
623 x += c - '0';
624 else if (islower(c))
625 x += 10 + c - 'a';
626 else
627 x += 10 + c - 'A';
628 *p++ = x;
629 break;
631 if (!errors || strcmp(errors, "strict") == 0) {
632 PyErr_SetString(PyExc_ValueError,
633 "invalid \\x escape");
634 goto failed;
636 if (strcmp(errors, "replace") == 0) {
637 *p++ = '?';
638 } else if (strcmp(errors, "ignore") == 0)
639 /* do nothing */;
640 else {
641 PyErr_Format(PyExc_ValueError,
642 "decoding error; "
643 "unknown error handling code: %.400s",
644 errors);
645 goto failed;
647 #ifndef Py_USING_UNICODE
648 case 'u':
649 case 'U':
650 case 'N':
651 if (unicode) {
652 PyErr_SetString(PyExc_ValueError,
653 "Unicode escapes not legal "
654 "when Unicode disabled");
655 goto failed;
657 #endif
658 default:
659 *p++ = '\\';
660 s--;
661 goto non_esc; /* an arbitry number of unescaped
662 UTF-8 bytes may follow. */
665 if (p-buf < newlen)
666 _PyString_Resize(&v, (int)(p - buf));
667 return v;
668 failed:
669 Py_DECREF(v);
670 return NULL;
673 static int
674 string_getsize(register PyObject *op)
676 char *s;
677 int len;
678 if (PyString_AsStringAndSize(op, &s, &len))
679 return -1;
680 return len;
683 static /*const*/ char *
684 string_getbuffer(register PyObject *op)
686 char *s;
687 int len;
688 if (PyString_AsStringAndSize(op, &s, &len))
689 return NULL;
690 return s;
694 PyString_Size(register PyObject *op)
696 if (!PyString_Check(op))
697 return string_getsize(op);
698 return ((PyStringObject *)op) -> ob_size;
701 /*const*/ char *
702 PyString_AsString(register PyObject *op)
704 if (!PyString_Check(op))
705 return string_getbuffer(op);
706 return ((PyStringObject *)op) -> ob_sval;
710 PyString_AsStringAndSize(register PyObject *obj,
711 register char **s,
712 register int *len)
714 if (s == NULL) {
715 PyErr_BadInternalCall();
716 return -1;
719 if (!PyString_Check(obj)) {
720 #ifdef Py_USING_UNICODE
721 if (PyUnicode_Check(obj)) {
722 obj = _PyUnicode_AsDefaultEncodedString(obj, NULL);
723 if (obj == NULL)
724 return -1;
726 else
727 #endif
729 PyErr_Format(PyExc_TypeError,
730 "expected string or Unicode object, "
731 "%.200s found", obj->ob_type->tp_name);
732 return -1;
736 *s = PyString_AS_STRING(obj);
737 if (len != NULL)
738 *len = PyString_GET_SIZE(obj);
739 else if ((int)strlen(*s) != PyString_GET_SIZE(obj)) {
740 PyErr_SetString(PyExc_TypeError,
741 "expected string without null bytes");
742 return -1;
744 return 0;
747 /* Methods */
749 static int
750 string_print(PyStringObject *op, FILE *fp, int flags)
752 int i;
753 char c;
754 int quote;
756 /* XXX Ought to check for interrupts when writing long strings */
757 if (! PyString_CheckExact(op)) {
758 int ret;
759 /* A str subclass may have its own __str__ method. */
760 op = (PyStringObject *) PyObject_Str((PyObject *)op);
761 if (op == NULL)
762 return -1;
763 ret = string_print(op, fp, flags);
764 Py_DECREF(op);
765 return ret;
767 if (flags & Py_PRINT_RAW) {
768 #ifdef __VMS
769 if (op->ob_size) fwrite(op->ob_sval, (int) op->ob_size, 1, fp);
770 #else
771 fwrite(op->ob_sval, 1, (int) op->ob_size, fp);
772 #endif
773 return 0;
776 /* figure out which quote to use; single is preferred */
777 quote = '\'';
778 if (memchr(op->ob_sval, '\'', op->ob_size) &&
779 !memchr(op->ob_sval, '"', op->ob_size))
780 quote = '"';
782 fputc(quote, fp);
783 for (i = 0; i < op->ob_size; i++) {
784 c = op->ob_sval[i];
785 if (c == quote || c == '\\')
786 fprintf(fp, "\\%c", c);
787 else if (c == '\t')
788 fprintf(fp, "\\t");
789 else if (c == '\n')
790 fprintf(fp, "\\n");
791 else if (c == '\r')
792 fprintf(fp, "\\r");
793 else if (c < ' ' || c >= 0x7f)
794 fprintf(fp, "\\x%02x", c & 0xff);
795 else
796 fputc(c, fp);
798 fputc(quote, fp);
799 return 0;
802 PyObject *
803 PyString_Repr(PyObject *obj, int smartquotes)
805 register PyStringObject* op = (PyStringObject*) obj;
806 size_t newsize = 2 + 4 * op->ob_size * sizeof(char);
807 PyObject *v;
808 if (newsize > INT_MAX) {
809 PyErr_SetString(PyExc_OverflowError,
810 "string is too large to make repr");
812 v = PyString_FromStringAndSize((char *)NULL, newsize);
813 if (v == NULL) {
814 return NULL;
816 else {
817 register int i;
818 register char c;
819 register char *p;
820 int quote;
822 /* figure out which quote to use; single is preferred */
823 quote = '\'';
824 if (smartquotes &&
825 memchr(op->ob_sval, '\'', op->ob_size) &&
826 !memchr(op->ob_sval, '"', op->ob_size))
827 quote = '"';
829 p = PyString_AS_STRING(v);
830 *p++ = quote;
831 for (i = 0; i < op->ob_size; i++) {
832 /* There's at least enough room for a hex escape
833 and a closing quote. */
834 assert(newsize - (p - PyString_AS_STRING(v)) >= 5);
835 c = op->ob_sval[i];
836 if (c == quote || c == '\\')
837 *p++ = '\\', *p++ = c;
838 else if (c == '\t')
839 *p++ = '\\', *p++ = 't';
840 else if (c == '\n')
841 *p++ = '\\', *p++ = 'n';
842 else if (c == '\r')
843 *p++ = '\\', *p++ = 'r';
844 else if (c < ' ' || c >= 0x7f) {
845 /* For performance, we don't want to call
846 PyOS_snprintf here (extra layers of
847 function call). */
848 sprintf(p, "\\x%02x", c & 0xff);
849 p += 4;
851 else
852 *p++ = c;
854 assert(newsize - (p - PyString_AS_STRING(v)) >= 1);
855 *p++ = quote;
856 *p = '\0';
857 _PyString_Resize(
858 &v, (int) (p - PyString_AS_STRING(v)));
859 return v;
863 static PyObject *
864 string_repr(PyObject *op)
866 return PyString_Repr(op, 1);
869 static PyObject *
870 string_str(PyObject *s)
872 assert(PyString_Check(s));
873 if (PyString_CheckExact(s)) {
874 Py_INCREF(s);
875 return s;
877 else {
878 /* Subtype -- return genuine string with the same value. */
879 PyStringObject *t = (PyStringObject *) s;
880 return PyString_FromStringAndSize(t->ob_sval, t->ob_size);
884 static int
885 string_length(PyStringObject *a)
887 return a->ob_size;
890 static PyObject *
891 string_concat(register PyStringObject *a, register PyObject *bb)
893 register unsigned int size;
894 register PyStringObject *op;
895 if (!PyString_Check(bb)) {
896 #ifdef Py_USING_UNICODE
897 if (PyUnicode_Check(bb))
898 return PyUnicode_Concat((PyObject *)a, bb);
899 #endif
900 PyErr_Format(PyExc_TypeError,
901 "cannot concatenate 'str' and '%.200s' objects",
902 bb->ob_type->tp_name);
903 return NULL;
905 #define b ((PyStringObject *)bb)
906 /* Optimize cases with empty left or right operand */
907 if ((a->ob_size == 0 || b->ob_size == 0) &&
908 PyString_CheckExact(a) && PyString_CheckExact(b)) {
909 if (a->ob_size == 0) {
910 Py_INCREF(bb);
911 return bb;
913 Py_INCREF(a);
914 return (PyObject *)a;
916 size = a->ob_size + b->ob_size;
917 /* Inline PyObject_NewVar */
918 op = (PyStringObject *)
919 PyObject_MALLOC(sizeof(PyStringObject) + size * sizeof(char));
920 if (op == NULL)
921 return PyErr_NoMemory();
922 PyObject_INIT_VAR(op, &PyString_Type, size);
923 op->ob_shash = -1;
924 op->ob_sstate = SSTATE_NOT_INTERNED;
925 memcpy(op->ob_sval, a->ob_sval, (int) a->ob_size);
926 memcpy(op->ob_sval + a->ob_size, b->ob_sval, (int) b->ob_size);
927 op->ob_sval[size] = '\0';
928 return (PyObject *) op;
929 #undef b
932 static PyObject *
933 string_repeat(register PyStringObject *a, register int n)
935 register int i;
936 register int size;
937 register PyStringObject *op;
938 size_t nbytes;
939 if (n < 0)
940 n = 0;
941 /* watch out for overflows: the size can overflow int,
942 * and the # of bytes needed can overflow size_t
944 size = a->ob_size * n;
945 if (n && size / n != a->ob_size) {
946 PyErr_SetString(PyExc_OverflowError,
947 "repeated string is too long");
948 return NULL;
950 if (size == a->ob_size && PyString_CheckExact(a)) {
951 Py_INCREF(a);
952 return (PyObject *)a;
954 nbytes = size * sizeof(char);
955 if (nbytes / sizeof(char) != (size_t)size ||
956 nbytes + sizeof(PyStringObject) <= nbytes) {
957 PyErr_SetString(PyExc_OverflowError,
958 "repeated string is too long");
959 return NULL;
961 op = (PyStringObject *)
962 PyObject_MALLOC(sizeof(PyStringObject) + nbytes);
963 if (op == NULL)
964 return PyErr_NoMemory();
965 PyObject_INIT_VAR(op, &PyString_Type, size);
966 op->ob_shash = -1;
967 op->ob_sstate = SSTATE_NOT_INTERNED;
968 for (i = 0; i < size; i += a->ob_size)
969 memcpy(op->ob_sval+i, a->ob_sval, (int) a->ob_size);
970 op->ob_sval[size] = '\0';
971 return (PyObject *) op;
974 /* String slice a[i:j] consists of characters a[i] ... a[j-1] */
976 static PyObject *
977 string_slice(register PyStringObject *a, register int i, register int j)
978 /* j -- may be negative! */
980 if (i < 0)
981 i = 0;
982 if (j < 0)
983 j = 0; /* Avoid signed/unsigned bug in next line */
984 if (j > a->ob_size)
985 j = a->ob_size;
986 if (i == 0 && j == a->ob_size && PyString_CheckExact(a)) {
987 /* It's the same as a */
988 Py_INCREF(a);
989 return (PyObject *)a;
991 if (j < i)
992 j = i;
993 return PyString_FromStringAndSize(a->ob_sval + i, (int) (j-i));
996 static int
997 string_contains(PyObject *a, PyObject *el)
999 const char *lhs, *rhs, *end;
1000 int size;
1002 if (!PyString_CheckExact(el)) {
1003 #ifdef Py_USING_UNICODE
1004 if (PyUnicode_Check(el))
1005 return PyUnicode_Contains(a, el);
1006 #endif
1007 if (!PyString_Check(el)) {
1008 PyErr_SetString(PyExc_TypeError,
1009 "'in <string>' requires string as left operand");
1010 return -1;
1013 size = PyString_GET_SIZE(el);
1014 rhs = PyString_AS_STRING(el);
1015 lhs = PyString_AS_STRING(a);
1017 /* optimize for a single character */
1018 if (size == 1)
1019 return memchr(lhs, *rhs, PyString_GET_SIZE(a)) != NULL;
1021 end = lhs + (PyString_GET_SIZE(a) - size);
1022 while (lhs <= end) {
1023 if (memcmp(lhs++, rhs, size) == 0)
1024 return 1;
1027 return 0;
1030 static PyObject *
1031 string_item(PyStringObject *a, register int i)
1033 PyObject *v;
1034 char *pchar;
1035 if (i < 0 || i >= a->ob_size) {
1036 PyErr_SetString(PyExc_IndexError, "string index out of range");
1037 return NULL;
1039 pchar = a->ob_sval + i;
1040 v = (PyObject *)characters[*pchar & UCHAR_MAX];
1041 if (v == NULL)
1042 v = PyString_FromStringAndSize(pchar, 1);
1043 else {
1044 #ifdef COUNT_ALLOCS
1045 one_strings++;
1046 #endif
1047 Py_INCREF(v);
1049 return v;
1052 static PyObject*
1053 string_richcompare(PyStringObject *a, PyStringObject *b, int op)
1055 int c;
1056 int len_a, len_b;
1057 int min_len;
1058 PyObject *result;
1060 /* Make sure both arguments are strings. */
1061 if (!(PyString_Check(a) && PyString_Check(b))) {
1062 result = Py_NotImplemented;
1063 goto out;
1065 if (a == b) {
1066 switch (op) {
1067 case Py_EQ:case Py_LE:case Py_GE:
1068 result = Py_True;
1069 goto out;
1070 case Py_NE:case Py_LT:case Py_GT:
1071 result = Py_False;
1072 goto out;
1075 if (op == Py_EQ) {
1076 /* Supporting Py_NE here as well does not save
1077 much time, since Py_NE is rarely used. */
1078 if (a->ob_size == b->ob_size
1079 && (a->ob_sval[0] == b->ob_sval[0]
1080 && memcmp(a->ob_sval, b->ob_sval,
1081 a->ob_size) == 0)) {
1082 result = Py_True;
1083 } else {
1084 result = Py_False;
1086 goto out;
1088 len_a = a->ob_size; len_b = b->ob_size;
1089 min_len = (len_a < len_b) ? len_a : len_b;
1090 if (min_len > 0) {
1091 c = Py_CHARMASK(*a->ob_sval) - Py_CHARMASK(*b->ob_sval);
1092 if (c==0)
1093 c = memcmp(a->ob_sval, b->ob_sval, min_len);
1094 }else
1095 c = 0;
1096 if (c == 0)
1097 c = (len_a < len_b) ? -1 : (len_a > len_b) ? 1 : 0;
1098 switch (op) {
1099 case Py_LT: c = c < 0; break;
1100 case Py_LE: c = c <= 0; break;
1101 case Py_EQ: assert(0); break; /* unreachable */
1102 case Py_NE: c = c != 0; break;
1103 case Py_GT: c = c > 0; break;
1104 case Py_GE: c = c >= 0; break;
1105 default:
1106 result = Py_NotImplemented;
1107 goto out;
1109 result = c ? Py_True : Py_False;
1110 out:
1111 Py_INCREF(result);
1112 return result;
1116 _PyString_Eq(PyObject *o1, PyObject *o2)
1118 PyStringObject *a, *b;
1119 a = (PyStringObject*)o1;
1120 b = (PyStringObject*)o2;
1121 return a->ob_size == b->ob_size
1122 && *a->ob_sval == *b->ob_sval
1123 && memcmp(a->ob_sval, b->ob_sval, a->ob_size) == 0;
1126 static long
1127 string_hash(PyStringObject *a)
1129 register int len;
1130 register unsigned char *p;
1131 register long x;
1133 if (a->ob_shash != -1)
1134 return a->ob_shash;
1135 len = a->ob_size;
1136 p = (unsigned char *) a->ob_sval;
1137 x = *p << 7;
1138 while (--len >= 0)
1139 x = (1000003*x) ^ *p++;
1140 x ^= a->ob_size;
1141 if (x == -1)
1142 x = -2;
1143 a->ob_shash = x;
1144 return x;
1147 static PyObject*
1148 string_subscript(PyStringObject* self, PyObject* item)
1150 if (PyInt_Check(item)) {
1151 long i = PyInt_AS_LONG(item);
1152 if (i < 0)
1153 i += PyString_GET_SIZE(self);
1154 return string_item(self,i);
1156 else if (PyLong_Check(item)) {
1157 long i = PyLong_AsLong(item);
1158 if (i == -1 && PyErr_Occurred())
1159 return NULL;
1160 if (i < 0)
1161 i += PyString_GET_SIZE(self);
1162 return string_item(self,i);
1164 else if (PySlice_Check(item)) {
1165 int start, stop, step, slicelength, cur, i;
1166 char* source_buf;
1167 char* result_buf;
1168 PyObject* result;
1170 if (PySlice_GetIndicesEx((PySliceObject*)item,
1171 PyString_GET_SIZE(self),
1172 &start, &stop, &step, &slicelength) < 0) {
1173 return NULL;
1176 if (slicelength <= 0) {
1177 return PyString_FromStringAndSize("", 0);
1179 else {
1180 source_buf = PyString_AsString((PyObject*)self);
1181 result_buf = PyMem_Malloc(slicelength);
1183 for (cur = start, i = 0; i < slicelength;
1184 cur += step, i++) {
1185 result_buf[i] = source_buf[cur];
1188 result = PyString_FromStringAndSize(result_buf,
1189 slicelength);
1190 PyMem_Free(result_buf);
1191 return result;
1194 else {
1195 PyErr_SetString(PyExc_TypeError,
1196 "string indices must be integers");
1197 return NULL;
1201 static int
1202 string_buffer_getreadbuf(PyStringObject *self, int index, const void **ptr)
1204 if ( index != 0 ) {
1205 PyErr_SetString(PyExc_SystemError,
1206 "accessing non-existent string segment");
1207 return -1;
1209 *ptr = (void *)self->ob_sval;
1210 return self->ob_size;
1213 static int
1214 string_buffer_getwritebuf(PyStringObject *self, int index, const void **ptr)
1216 PyErr_SetString(PyExc_TypeError,
1217 "Cannot use string as modifiable buffer");
1218 return -1;
1221 static int
1222 string_buffer_getsegcount(PyStringObject *self, int *lenp)
1224 if ( lenp )
1225 *lenp = self->ob_size;
1226 return 1;
1229 static int
1230 string_buffer_getcharbuf(PyStringObject *self, int index, const char **ptr)
1232 if ( index != 0 ) {
1233 PyErr_SetString(PyExc_SystemError,
1234 "accessing non-existent string segment");
1235 return -1;
1237 *ptr = self->ob_sval;
1238 return self->ob_size;
1241 static PySequenceMethods string_as_sequence = {
1242 (inquiry)string_length, /*sq_length*/
1243 (binaryfunc)string_concat, /*sq_concat*/
1244 (intargfunc)string_repeat, /*sq_repeat*/
1245 (intargfunc)string_item, /*sq_item*/
1246 (intintargfunc)string_slice, /*sq_slice*/
1247 0, /*sq_ass_item*/
1248 0, /*sq_ass_slice*/
1249 (objobjproc)string_contains /*sq_contains*/
1252 static PyMappingMethods string_as_mapping = {
1253 (inquiry)string_length,
1254 (binaryfunc)string_subscript,
1258 static PyBufferProcs string_as_buffer = {
1259 (getreadbufferproc)string_buffer_getreadbuf,
1260 (getwritebufferproc)string_buffer_getwritebuf,
1261 (getsegcountproc)string_buffer_getsegcount,
1262 (getcharbufferproc)string_buffer_getcharbuf,
1267 #define LEFTSTRIP 0
1268 #define RIGHTSTRIP 1
1269 #define BOTHSTRIP 2
1271 /* Arrays indexed by above */
1272 static const char *stripformat[] = {"|O:lstrip", "|O:rstrip", "|O:strip"};
1274 #define STRIPNAME(i) (stripformat[i]+3)
1277 static PyObject *
1278 split_whitespace(const char *s, int len, int maxsplit)
1280 int i, j, err;
1281 PyObject* item;
1282 PyObject *list = PyList_New(0);
1284 if (list == NULL)
1285 return NULL;
1287 for (i = j = 0; i < len; ) {
1288 while (i < len && isspace(Py_CHARMASK(s[i])))
1289 i++;
1290 j = i;
1291 while (i < len && !isspace(Py_CHARMASK(s[i])))
1292 i++;
1293 if (j < i) {
1294 if (maxsplit-- <= 0)
1295 break;
1296 item = PyString_FromStringAndSize(s+j, (int)(i-j));
1297 if (item == NULL)
1298 goto finally;
1299 err = PyList_Append(list, item);
1300 Py_DECREF(item);
1301 if (err < 0)
1302 goto finally;
1303 while (i < len && isspace(Py_CHARMASK(s[i])))
1304 i++;
1305 j = i;
1308 if (j < len) {
1309 item = PyString_FromStringAndSize(s+j, (int)(len - j));
1310 if (item == NULL)
1311 goto finally;
1312 err = PyList_Append(list, item);
1313 Py_DECREF(item);
1314 if (err < 0)
1315 goto finally;
1317 return list;
1318 finally:
1319 Py_DECREF(list);
1320 return NULL;
1324 PyDoc_STRVAR(split__doc__,
1325 "S.split([sep [,maxsplit]]) -> list of strings\n\
1327 Return a list of the words in the string S, using sep as the\n\
1328 delimiter string. If maxsplit is given, at most maxsplit\n\
1329 splits are done. If sep is not specified or is None, any\n\
1330 whitespace string is a separator.");
1332 static PyObject *
1333 string_split(PyStringObject *self, PyObject *args)
1335 int len = PyString_GET_SIZE(self), n, i, j, err;
1336 int maxsplit = -1;
1337 const char *s = PyString_AS_STRING(self), *sub;
1338 PyObject *list, *item, *subobj = Py_None;
1340 if (!PyArg_ParseTuple(args, "|Oi:split", &subobj, &maxsplit))
1341 return NULL;
1342 if (maxsplit < 0)
1343 maxsplit = INT_MAX;
1344 if (subobj == Py_None)
1345 return split_whitespace(s, len, maxsplit);
1346 if (PyString_Check(subobj)) {
1347 sub = PyString_AS_STRING(subobj);
1348 n = PyString_GET_SIZE(subobj);
1350 #ifdef Py_USING_UNICODE
1351 else if (PyUnicode_Check(subobj))
1352 return PyUnicode_Split((PyObject *)self, subobj, maxsplit);
1353 #endif
1354 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
1355 return NULL;
1356 if (n == 0) {
1357 PyErr_SetString(PyExc_ValueError, "empty separator");
1358 return NULL;
1361 list = PyList_New(0);
1362 if (list == NULL)
1363 return NULL;
1365 i = j = 0;
1366 while (i+n <= len) {
1367 if (s[i] == sub[0] && memcmp(s+i, sub, n) == 0) {
1368 if (maxsplit-- <= 0)
1369 break;
1370 item = PyString_FromStringAndSize(s+j, (int)(i-j));
1371 if (item == NULL)
1372 goto fail;
1373 err = PyList_Append(list, item);
1374 Py_DECREF(item);
1375 if (err < 0)
1376 goto fail;
1377 i = j = i + n;
1379 else
1380 i++;
1382 item = PyString_FromStringAndSize(s+j, (int)(len-j));
1383 if (item == NULL)
1384 goto fail;
1385 err = PyList_Append(list, item);
1386 Py_DECREF(item);
1387 if (err < 0)
1388 goto fail;
1390 return list;
1392 fail:
1393 Py_DECREF(list);
1394 return NULL;
1398 PyDoc_STRVAR(join__doc__,
1399 "S.join(sequence) -> string\n\
1401 Return a string which is the concatenation of the strings in the\n\
1402 sequence. The separator between elements is S.");
1404 static PyObject *
1405 string_join(PyStringObject *self, PyObject *orig)
1407 char *sep = PyString_AS_STRING(self);
1408 const int seplen = PyString_GET_SIZE(self);
1409 PyObject *res = NULL;
1410 char *p;
1411 int seqlen = 0;
1412 size_t sz = 0;
1413 int i;
1414 PyObject *seq, *item;
1416 seq = PySequence_Fast(orig, "");
1417 if (seq == NULL) {
1418 if (PyErr_ExceptionMatches(PyExc_TypeError))
1419 PyErr_Format(PyExc_TypeError,
1420 "sequence expected, %.80s found",
1421 orig->ob_type->tp_name);
1422 return NULL;
1425 seqlen = PySequence_Size(seq);
1426 if (seqlen == 0) {
1427 Py_DECREF(seq);
1428 return PyString_FromString("");
1430 if (seqlen == 1) {
1431 item = PySequence_Fast_GET_ITEM(seq, 0);
1432 if (!PyString_Check(item) && !PyUnicode_Check(item)) {
1433 PyErr_Format(PyExc_TypeError,
1434 "sequence item 0: expected string,"
1435 " %.80s found",
1436 item->ob_type->tp_name);
1437 Py_DECREF(seq);
1438 return NULL;
1440 Py_INCREF(item);
1441 Py_DECREF(seq);
1442 return item;
1445 /* There are at least two things to join. Do a pre-pass to figure out
1446 * the total amount of space we'll need (sz), see whether any argument
1447 * is absurd, and defer to the Unicode join if appropriate.
1449 for (i = 0; i < seqlen; i++) {
1450 const size_t old_sz = sz;
1451 item = PySequence_Fast_GET_ITEM(seq, i);
1452 if (!PyString_Check(item)){
1453 #ifdef Py_USING_UNICODE
1454 if (PyUnicode_Check(item)) {
1455 /* Defer to Unicode join.
1456 * CAUTION: There's no gurantee that the
1457 * original sequence can be iterated over
1458 * again, so we must pass seq here.
1460 PyObject *result;
1461 result = PyUnicode_Join((PyObject *)self, seq);
1462 Py_DECREF(seq);
1463 return result;
1465 #endif
1466 PyErr_Format(PyExc_TypeError,
1467 "sequence item %i: expected string,"
1468 " %.80s found",
1469 i, item->ob_type->tp_name);
1470 Py_DECREF(seq);
1471 return NULL;
1473 sz += PyString_GET_SIZE(item);
1474 if (i != 0)
1475 sz += seplen;
1476 if (sz < old_sz || sz > INT_MAX) {
1477 PyErr_SetString(PyExc_OverflowError,
1478 "join() is too long for a Python string");
1479 Py_DECREF(seq);
1480 return NULL;
1484 /* Allocate result space. */
1485 res = PyString_FromStringAndSize((char*)NULL, (int)sz);
1486 if (res == NULL) {
1487 Py_DECREF(seq);
1488 return NULL;
1491 /* Catenate everything. */
1492 p = PyString_AS_STRING(res);
1493 for (i = 0; i < seqlen; ++i) {
1494 size_t n;
1495 item = PySequence_Fast_GET_ITEM(seq, i);
1496 n = PyString_GET_SIZE(item);
1497 memcpy(p, PyString_AS_STRING(item), n);
1498 p += n;
1499 if (i < seqlen - 1) {
1500 memcpy(p, sep, seplen);
1501 p += seplen;
1505 Py_DECREF(seq);
1506 return res;
1509 PyObject *
1510 _PyString_Join(PyObject *sep, PyObject *x)
1512 assert(sep != NULL && PyString_Check(sep));
1513 assert(x != NULL);
1514 return string_join((PyStringObject *)sep, x);
1517 static void
1518 string_adjust_indices(int *start, int *end, int len)
1520 if (*end > len)
1521 *end = len;
1522 else if (*end < 0)
1523 *end += len;
1524 if (*end < 0)
1525 *end = 0;
1526 if (*start < 0)
1527 *start += len;
1528 if (*start < 0)
1529 *start = 0;
1532 static long
1533 string_find_internal(PyStringObject *self, PyObject *args, int dir)
1535 const char *s = PyString_AS_STRING(self), *sub;
1536 int len = PyString_GET_SIZE(self);
1537 int n, i = 0, last = INT_MAX;
1538 PyObject *subobj;
1540 if (!PyArg_ParseTuple(args, "O|O&O&:find/rfind/index/rindex",
1541 &subobj, _PyEval_SliceIndex, &i, _PyEval_SliceIndex, &last))
1542 return -2;
1543 if (PyString_Check(subobj)) {
1544 sub = PyString_AS_STRING(subobj);
1545 n = PyString_GET_SIZE(subobj);
1547 #ifdef Py_USING_UNICODE
1548 else if (PyUnicode_Check(subobj))
1549 return PyUnicode_Find((PyObject *)self, subobj, i, last, dir);
1550 #endif
1551 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
1552 return -2;
1554 string_adjust_indices(&i, &last, len);
1556 if (dir > 0) {
1557 if (n == 0 && i <= last)
1558 return (long)i;
1559 last -= n;
1560 for (; i <= last; ++i)
1561 if (s[i] == sub[0] && memcmp(&s[i], sub, n) == 0)
1562 return (long)i;
1564 else {
1565 int j;
1567 if (n == 0 && i <= last)
1568 return (long)last;
1569 for (j = last-n; j >= i; --j)
1570 if (s[j] == sub[0] && memcmp(&s[j], sub, n) == 0)
1571 return (long)j;
1574 return -1;
1578 PyDoc_STRVAR(find__doc__,
1579 "S.find(sub [,start [,end]]) -> int\n\
1581 Return the lowest index in S where substring sub is found,\n\
1582 such that sub is contained within s[start,end]. Optional\n\
1583 arguments start and end are interpreted as in slice notation.\n\
1585 Return -1 on failure.");
1587 static PyObject *
1588 string_find(PyStringObject *self, PyObject *args)
1590 long result = string_find_internal(self, args, +1);
1591 if (result == -2)
1592 return NULL;
1593 return PyInt_FromLong(result);
1597 PyDoc_STRVAR(index__doc__,
1598 "S.index(sub [,start [,end]]) -> int\n\
1600 Like S.find() but raise ValueError when the substring is not found.");
1602 static PyObject *
1603 string_index(PyStringObject *self, PyObject *args)
1605 long result = string_find_internal(self, args, +1);
1606 if (result == -2)
1607 return NULL;
1608 if (result == -1) {
1609 PyErr_SetString(PyExc_ValueError,
1610 "substring not found in string.index");
1611 return NULL;
1613 return PyInt_FromLong(result);
1617 PyDoc_STRVAR(rfind__doc__,
1618 "S.rfind(sub [,start [,end]]) -> int\n\
1620 Return the highest index in S where substring sub is found,\n\
1621 such that sub is contained within s[start,end]. Optional\n\
1622 arguments start and end are interpreted as in slice notation.\n\
1624 Return -1 on failure.");
1626 static PyObject *
1627 string_rfind(PyStringObject *self, PyObject *args)
1629 long result = string_find_internal(self, args, -1);
1630 if (result == -2)
1631 return NULL;
1632 return PyInt_FromLong(result);
1636 PyDoc_STRVAR(rindex__doc__,
1637 "S.rindex(sub [,start [,end]]) -> int\n\
1639 Like S.rfind() but raise ValueError when the substring is not found.");
1641 static PyObject *
1642 string_rindex(PyStringObject *self, PyObject *args)
1644 long result = string_find_internal(self, args, -1);
1645 if (result == -2)
1646 return NULL;
1647 if (result == -1) {
1648 PyErr_SetString(PyExc_ValueError,
1649 "substring not found in string.rindex");
1650 return NULL;
1652 return PyInt_FromLong(result);
1656 static PyObject *
1657 do_xstrip(PyStringObject *self, int striptype, PyObject *sepobj)
1659 char *s = PyString_AS_STRING(self);
1660 int len = PyString_GET_SIZE(self);
1661 char *sep = PyString_AS_STRING(sepobj);
1662 int seplen = PyString_GET_SIZE(sepobj);
1663 int i, j;
1665 i = 0;
1666 if (striptype != RIGHTSTRIP) {
1667 while (i < len && memchr(sep, Py_CHARMASK(s[i]), seplen)) {
1668 i++;
1672 j = len;
1673 if (striptype != LEFTSTRIP) {
1674 do {
1675 j--;
1676 } while (j >= i && memchr(sep, Py_CHARMASK(s[j]), seplen));
1677 j++;
1680 if (i == 0 && j == len && PyString_CheckExact(self)) {
1681 Py_INCREF(self);
1682 return (PyObject*)self;
1684 else
1685 return PyString_FromStringAndSize(s+i, j-i);
1689 static PyObject *
1690 do_strip(PyStringObject *self, int striptype)
1692 char *s = PyString_AS_STRING(self);
1693 int len = PyString_GET_SIZE(self), i, j;
1695 i = 0;
1696 if (striptype != RIGHTSTRIP) {
1697 while (i < len && isspace(Py_CHARMASK(s[i]))) {
1698 i++;
1702 j = len;
1703 if (striptype != LEFTSTRIP) {
1704 do {
1705 j--;
1706 } while (j >= i && isspace(Py_CHARMASK(s[j])));
1707 j++;
1710 if (i == 0 && j == len && PyString_CheckExact(self)) {
1711 Py_INCREF(self);
1712 return (PyObject*)self;
1714 else
1715 return PyString_FromStringAndSize(s+i, j-i);
1719 static PyObject *
1720 do_argstrip(PyStringObject *self, int striptype, PyObject *args)
1722 PyObject *sep = NULL;
1724 if (!PyArg_ParseTuple(args, (char *)stripformat[striptype], &sep))
1725 return NULL;
1727 if (sep != NULL && sep != Py_None) {
1728 if (PyString_Check(sep))
1729 return do_xstrip(self, striptype, sep);
1730 #ifdef Py_USING_UNICODE
1731 else if (PyUnicode_Check(sep)) {
1732 PyObject *uniself = PyUnicode_FromObject((PyObject *)self);
1733 PyObject *res;
1734 if (uniself==NULL)
1735 return NULL;
1736 res = _PyUnicode_XStrip((PyUnicodeObject *)uniself,
1737 striptype, sep);
1738 Py_DECREF(uniself);
1739 return res;
1741 #endif
1742 else {
1743 PyErr_Format(PyExc_TypeError,
1744 #ifdef Py_USING_UNICODE
1745 "%s arg must be None, str or unicode",
1746 #else
1747 "%s arg must be None or str",
1748 #endif
1749 STRIPNAME(striptype));
1750 return NULL;
1752 return do_xstrip(self, striptype, sep);
1755 return do_strip(self, striptype);
1759 PyDoc_STRVAR(strip__doc__,
1760 "S.strip([sep]) -> string or unicode\n\
1762 Return a copy of the string S with leading and trailing\n\
1763 whitespace removed.\n\
1764 If sep is given and not None, remove characters in sep instead.\n\
1765 If sep is unicode, S will be converted to unicode before stripping");
1767 static PyObject *
1768 string_strip(PyStringObject *self, PyObject *args)
1770 if (PyTuple_GET_SIZE(args) == 0)
1771 return do_strip(self, BOTHSTRIP); /* Common case */
1772 else
1773 return do_argstrip(self, BOTHSTRIP, args);
1777 PyDoc_STRVAR(lstrip__doc__,
1778 "S.lstrip([sep]) -> string or unicode\n\
1780 Return a copy of the string S with leading whitespace removed.\n\
1781 If sep is given and not None, remove characters in sep instead.\n\
1782 If sep is unicode, S will be converted to unicode before stripping");
1784 static PyObject *
1785 string_lstrip(PyStringObject *self, PyObject *args)
1787 if (PyTuple_GET_SIZE(args) == 0)
1788 return do_strip(self, LEFTSTRIP); /* Common case */
1789 else
1790 return do_argstrip(self, LEFTSTRIP, args);
1794 PyDoc_STRVAR(rstrip__doc__,
1795 "S.rstrip([sep]) -> string or unicode\n\
1797 Return a copy of the string S with trailing whitespace removed.\n\
1798 If sep is given and not None, remove characters in sep instead.\n\
1799 If sep is unicode, S will be converted to unicode before stripping");
1801 static PyObject *
1802 string_rstrip(PyStringObject *self, PyObject *args)
1804 if (PyTuple_GET_SIZE(args) == 0)
1805 return do_strip(self, RIGHTSTRIP); /* Common case */
1806 else
1807 return do_argstrip(self, RIGHTSTRIP, args);
1811 PyDoc_STRVAR(lower__doc__,
1812 "S.lower() -> string\n\
1814 Return a copy of the string S converted to lowercase.");
1816 static PyObject *
1817 string_lower(PyStringObject *self)
1819 char *s = PyString_AS_STRING(self), *s_new;
1820 int i, n = PyString_GET_SIZE(self);
1821 PyObject *new;
1823 new = PyString_FromStringAndSize(NULL, n);
1824 if (new == NULL)
1825 return NULL;
1826 s_new = PyString_AsString(new);
1827 for (i = 0; i < n; i++) {
1828 int c = Py_CHARMASK(*s++);
1829 if (isupper(c)) {
1830 *s_new = tolower(c);
1831 } else
1832 *s_new = c;
1833 s_new++;
1835 return new;
1839 PyDoc_STRVAR(upper__doc__,
1840 "S.upper() -> string\n\
1842 Return a copy of the string S converted to uppercase.");
1844 static PyObject *
1845 string_upper(PyStringObject *self)
1847 char *s = PyString_AS_STRING(self), *s_new;
1848 int i, n = PyString_GET_SIZE(self);
1849 PyObject *new;
1851 new = PyString_FromStringAndSize(NULL, n);
1852 if (new == NULL)
1853 return NULL;
1854 s_new = PyString_AsString(new);
1855 for (i = 0; i < n; i++) {
1856 int c = Py_CHARMASK(*s++);
1857 if (islower(c)) {
1858 *s_new = toupper(c);
1859 } else
1860 *s_new = c;
1861 s_new++;
1863 return new;
1867 PyDoc_STRVAR(title__doc__,
1868 "S.title() -> string\n\
1870 Return a titlecased version of S, i.e. words start with uppercase\n\
1871 characters, all remaining cased characters have lowercase.");
1873 static PyObject*
1874 string_title(PyStringObject *self)
1876 char *s = PyString_AS_STRING(self), *s_new;
1877 int i, n = PyString_GET_SIZE(self);
1878 int previous_is_cased = 0;
1879 PyObject *new;
1881 new = PyString_FromStringAndSize(NULL, n);
1882 if (new == NULL)
1883 return NULL;
1884 s_new = PyString_AsString(new);
1885 for (i = 0; i < n; i++) {
1886 int c = Py_CHARMASK(*s++);
1887 if (islower(c)) {
1888 if (!previous_is_cased)
1889 c = toupper(c);
1890 previous_is_cased = 1;
1891 } else if (isupper(c)) {
1892 if (previous_is_cased)
1893 c = tolower(c);
1894 previous_is_cased = 1;
1895 } else
1896 previous_is_cased = 0;
1897 *s_new++ = c;
1899 return new;
1902 PyDoc_STRVAR(capitalize__doc__,
1903 "S.capitalize() -> string\n\
1905 Return a copy of the string S with only its first character\n\
1906 capitalized.");
1908 static PyObject *
1909 string_capitalize(PyStringObject *self)
1911 char *s = PyString_AS_STRING(self), *s_new;
1912 int i, n = PyString_GET_SIZE(self);
1913 PyObject *new;
1915 new = PyString_FromStringAndSize(NULL, n);
1916 if (new == NULL)
1917 return NULL;
1918 s_new = PyString_AsString(new);
1919 if (0 < n) {
1920 int c = Py_CHARMASK(*s++);
1921 if (islower(c))
1922 *s_new = toupper(c);
1923 else
1924 *s_new = c;
1925 s_new++;
1927 for (i = 1; i < n; i++) {
1928 int c = Py_CHARMASK(*s++);
1929 if (isupper(c))
1930 *s_new = tolower(c);
1931 else
1932 *s_new = c;
1933 s_new++;
1935 return new;
1939 PyDoc_STRVAR(count__doc__,
1940 "S.count(sub[, start[, end]]) -> int\n\
1942 Return the number of occurrences of substring sub in string\n\
1943 S[start:end]. Optional arguments start and end are\n\
1944 interpreted as in slice notation.");
1946 static PyObject *
1947 string_count(PyStringObject *self, PyObject *args)
1949 const char *s = PyString_AS_STRING(self), *sub;
1950 int len = PyString_GET_SIZE(self), n;
1951 int i = 0, last = INT_MAX;
1952 int m, r;
1953 PyObject *subobj;
1955 if (!PyArg_ParseTuple(args, "O|O&O&:count", &subobj,
1956 _PyEval_SliceIndex, &i, _PyEval_SliceIndex, &last))
1957 return NULL;
1959 if (PyString_Check(subobj)) {
1960 sub = PyString_AS_STRING(subobj);
1961 n = PyString_GET_SIZE(subobj);
1963 #ifdef Py_USING_UNICODE
1964 else if (PyUnicode_Check(subobj)) {
1965 int count;
1966 count = PyUnicode_Count((PyObject *)self, subobj, i, last);
1967 if (count == -1)
1968 return NULL;
1969 else
1970 return PyInt_FromLong((long) count);
1972 #endif
1973 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
1974 return NULL;
1976 string_adjust_indices(&i, &last, len);
1978 m = last + 1 - n;
1979 if (n == 0)
1980 return PyInt_FromLong((long) (m-i));
1982 r = 0;
1983 while (i < m) {
1984 if (!memcmp(s+i, sub, n)) {
1985 r++;
1986 i += n;
1987 } else {
1988 i++;
1991 return PyInt_FromLong((long) r);
1995 PyDoc_STRVAR(swapcase__doc__,
1996 "S.swapcase() -> string\n\
1998 Return a copy of the string S with uppercase characters\n\
1999 converted to lowercase and vice versa.");
2001 static PyObject *
2002 string_swapcase(PyStringObject *self)
2004 char *s = PyString_AS_STRING(self), *s_new;
2005 int i, n = PyString_GET_SIZE(self);
2006 PyObject *new;
2008 new = PyString_FromStringAndSize(NULL, n);
2009 if (new == NULL)
2010 return NULL;
2011 s_new = PyString_AsString(new);
2012 for (i = 0; i < n; i++) {
2013 int c = Py_CHARMASK(*s++);
2014 if (islower(c)) {
2015 *s_new = toupper(c);
2017 else if (isupper(c)) {
2018 *s_new = tolower(c);
2020 else
2021 *s_new = c;
2022 s_new++;
2024 return new;
2028 PyDoc_STRVAR(translate__doc__,
2029 "S.translate(table [,deletechars]) -> string\n\
2031 Return a copy of the string S, where all characters occurring\n\
2032 in the optional argument deletechars are removed, and the\n\
2033 remaining characters have been mapped through the given\n\
2034 translation table, which must be a string of length 256.");
2036 static PyObject *
2037 string_translate(PyStringObject *self, PyObject *args)
2039 register char *input, *output;
2040 register const char *table;
2041 register int i, c, changed = 0;
2042 PyObject *input_obj = (PyObject*)self;
2043 const char *table1, *output_start, *del_table=NULL;
2044 int inlen, tablen, dellen = 0;
2045 PyObject *result;
2046 int trans_table[256];
2047 PyObject *tableobj, *delobj = NULL;
2049 if (!PyArg_UnpackTuple(args, "translate", 1, 2,
2050 &tableobj, &delobj))
2051 return NULL;
2053 if (PyString_Check(tableobj)) {
2054 table1 = PyString_AS_STRING(tableobj);
2055 tablen = PyString_GET_SIZE(tableobj);
2057 #ifdef Py_USING_UNICODE
2058 else if (PyUnicode_Check(tableobj)) {
2059 /* Unicode .translate() does not support the deletechars
2060 parameter; instead a mapping to None will cause characters
2061 to be deleted. */
2062 if (delobj != NULL) {
2063 PyErr_SetString(PyExc_TypeError,
2064 "deletions are implemented differently for unicode");
2065 return NULL;
2067 return PyUnicode_Translate((PyObject *)self, tableobj, NULL);
2069 #endif
2070 else if (PyObject_AsCharBuffer(tableobj, &table1, &tablen))
2071 return NULL;
2073 if (tablen != 256) {
2074 PyErr_SetString(PyExc_ValueError,
2075 "translation table must be 256 characters long");
2076 return NULL;
2079 if (delobj != NULL) {
2080 if (PyString_Check(delobj)) {
2081 del_table = PyString_AS_STRING(delobj);
2082 dellen = PyString_GET_SIZE(delobj);
2084 #ifdef Py_USING_UNICODE
2085 else if (PyUnicode_Check(delobj)) {
2086 PyErr_SetString(PyExc_TypeError,
2087 "deletions are implemented differently for unicode");
2088 return NULL;
2090 #endif
2091 else if (PyObject_AsCharBuffer(delobj, &del_table, &dellen))
2092 return NULL;
2094 else {
2095 del_table = NULL;
2096 dellen = 0;
2099 table = table1;
2100 inlen = PyString_Size(input_obj);
2101 result = PyString_FromStringAndSize((char *)NULL, inlen);
2102 if (result == NULL)
2103 return NULL;
2104 output_start = output = PyString_AsString(result);
2105 input = PyString_AsString(input_obj);
2107 if (dellen == 0) {
2108 /* If no deletions are required, use faster code */
2109 for (i = inlen; --i >= 0; ) {
2110 c = Py_CHARMASK(*input++);
2111 if (Py_CHARMASK((*output++ = table[c])) != c)
2112 changed = 1;
2114 if (changed || !PyString_CheckExact(input_obj))
2115 return result;
2116 Py_DECREF(result);
2117 Py_INCREF(input_obj);
2118 return input_obj;
2121 for (i = 0; i < 256; i++)
2122 trans_table[i] = Py_CHARMASK(table[i]);
2124 for (i = 0; i < dellen; i++)
2125 trans_table[(int) Py_CHARMASK(del_table[i])] = -1;
2127 for (i = inlen; --i >= 0; ) {
2128 c = Py_CHARMASK(*input++);
2129 if (trans_table[c] != -1)
2130 if (Py_CHARMASK(*output++ = (char)trans_table[c]) == c)
2131 continue;
2132 changed = 1;
2134 if (!changed && PyString_CheckExact(input_obj)) {
2135 Py_DECREF(result);
2136 Py_INCREF(input_obj);
2137 return input_obj;
2139 /* Fix the size of the resulting string */
2140 if (inlen > 0)
2141 _PyString_Resize(&result, output - output_start);
2142 return result;
2146 /* What follows is used for implementing replace(). Perry Stoll. */
2149 mymemfind
2151 strstr replacement for arbitrary blocks of memory.
2153 Locates the first occurrence in the memory pointed to by MEM of the
2154 contents of memory pointed to by PAT. Returns the index into MEM if
2155 found, or -1 if not found. If len of PAT is greater than length of
2156 MEM, the function returns -1.
2158 static int
2159 mymemfind(const char *mem, int len, const char *pat, int pat_len)
2161 register int ii;
2163 /* pattern can not occur in the last pat_len-1 chars */
2164 len -= pat_len;
2166 for (ii = 0; ii <= len; ii++) {
2167 if (mem[ii] == pat[0] && memcmp(&mem[ii], pat, pat_len) == 0) {
2168 return ii;
2171 return -1;
2175 mymemcnt
2177 Return the number of distinct times PAT is found in MEM.
2178 meaning mem=1111 and pat==11 returns 2.
2179 mem=11111 and pat==11 also return 2.
2181 static int
2182 mymemcnt(const char *mem, int len, const char *pat, int pat_len)
2184 register int offset = 0;
2185 int nfound = 0;
2187 while (len >= 0) {
2188 offset = mymemfind(mem, len, pat, pat_len);
2189 if (offset == -1)
2190 break;
2191 mem += offset + pat_len;
2192 len -= offset + pat_len;
2193 nfound++;
2195 return nfound;
2199 mymemreplace
2201 Return a string in which all occurrences of PAT in memory STR are
2202 replaced with SUB.
2204 If length of PAT is less than length of STR or there are no occurrences
2205 of PAT in STR, then the original string is returned. Otherwise, a new
2206 string is allocated here and returned.
2208 on return, out_len is:
2209 the length of output string, or
2210 -1 if the input string is returned, or
2211 unchanged if an error occurs (no memory).
2213 return value is:
2214 the new string allocated locally, or
2215 NULL if an error occurred.
2217 static char *
2218 mymemreplace(const char *str, int len, /* input string */
2219 const char *pat, int pat_len, /* pattern string to find */
2220 const char *sub, int sub_len, /* substitution string */
2221 int count, /* number of replacements */
2222 int *out_len)
2224 char *out_s;
2225 char *new_s;
2226 int nfound, offset, new_len;
2228 if (len == 0 || (pat_len == 0 && sub_len == 0) || pat_len > len)
2229 goto return_same;
2231 /* find length of output string */
2232 nfound = (pat_len > 0) ? mymemcnt(str, len, pat, pat_len) : len + 1;
2233 if (count < 0)
2234 count = INT_MAX;
2235 else if (nfound > count)
2236 nfound = count;
2237 if (nfound == 0)
2238 goto return_same;
2240 new_len = len + nfound*(sub_len - pat_len);
2241 if (new_len == 0) {
2242 /* Have to allocate something for the caller to free(). */
2243 out_s = (char *)PyMem_MALLOC(1);
2244 if (out_s == NULL)
2245 return NULL;
2246 out_s[0] = '\0';
2248 else {
2249 assert(new_len > 0);
2250 new_s = (char *)PyMem_MALLOC(new_len);
2251 if (new_s == NULL)
2252 return NULL;
2253 out_s = new_s;
2255 if (pat_len > 0) {
2256 for (; nfound > 0; --nfound) {
2257 /* find index of next instance of pattern */
2258 offset = mymemfind(str, len, pat, pat_len);
2259 if (offset == -1)
2260 break;
2262 /* copy non matching part of input string */
2263 memcpy(new_s, str, offset);
2264 str += offset + pat_len;
2265 len -= offset + pat_len;
2267 /* copy substitute into the output string */
2268 new_s += offset;
2269 memcpy(new_s, sub, sub_len);
2270 new_s += sub_len;
2272 /* copy any remaining values into output string */
2273 if (len > 0)
2274 memcpy(new_s, str, len);
2276 else {
2277 for (;;++str, --len) {
2278 memcpy(new_s, sub, sub_len);
2279 new_s += sub_len;
2280 if (--nfound <= 0) {
2281 memcpy(new_s, str, len);
2282 break;
2284 *new_s++ = *str;
2288 *out_len = new_len;
2289 return out_s;
2291 return_same:
2292 *out_len = -1;
2293 return (char *)str; /* cast away const */
2297 PyDoc_STRVAR(replace__doc__,
2298 "S.replace (old, new[, maxsplit]) -> string\n\
2300 Return a copy of string S with all occurrences of substring\n\
2301 old replaced by new. If the optional argument maxsplit is\n\
2302 given, only the first maxsplit occurrences are replaced.");
2304 static PyObject *
2305 string_replace(PyStringObject *self, PyObject *args)
2307 const char *str = PyString_AS_STRING(self), *sub, *repl;
2308 char *new_s;
2309 const int len = PyString_GET_SIZE(self);
2310 int sub_len, repl_len, out_len;
2311 int count = -1;
2312 PyObject *new;
2313 PyObject *subobj, *replobj;
2315 if (!PyArg_ParseTuple(args, "OO|i:replace",
2316 &subobj, &replobj, &count))
2317 return NULL;
2319 if (PyString_Check(subobj)) {
2320 sub = PyString_AS_STRING(subobj);
2321 sub_len = PyString_GET_SIZE(subobj);
2323 #ifdef Py_USING_UNICODE
2324 else if (PyUnicode_Check(subobj))
2325 return PyUnicode_Replace((PyObject *)self,
2326 subobj, replobj, count);
2327 #endif
2328 else if (PyObject_AsCharBuffer(subobj, &sub, &sub_len))
2329 return NULL;
2331 if (PyString_Check(replobj)) {
2332 repl = PyString_AS_STRING(replobj);
2333 repl_len = PyString_GET_SIZE(replobj);
2335 #ifdef Py_USING_UNICODE
2336 else if (PyUnicode_Check(replobj))
2337 return PyUnicode_Replace((PyObject *)self,
2338 subobj, replobj, count);
2339 #endif
2340 else if (PyObject_AsCharBuffer(replobj, &repl, &repl_len))
2341 return NULL;
2343 new_s = mymemreplace(str,len,sub,sub_len,repl,repl_len,count,&out_len);
2344 if (new_s == NULL) {
2345 PyErr_NoMemory();
2346 return NULL;
2348 if (out_len == -1) {
2349 if (PyString_CheckExact(self)) {
2350 /* we're returning another reference to self */
2351 new = (PyObject*)self;
2352 Py_INCREF(new);
2354 else {
2355 new = PyString_FromStringAndSize(str, len);
2356 if (new == NULL)
2357 return NULL;
2360 else {
2361 new = PyString_FromStringAndSize(new_s, out_len);
2362 PyMem_FREE(new_s);
2364 return new;
2368 PyDoc_STRVAR(startswith__doc__,
2369 "S.startswith(prefix[, start[, end]]) -> bool\n\
2371 Return True if S starts with the specified prefix, False otherwise. With\n\
2372 optional start, test S beginning at that position. With optional end, stop\n\
2373 comparing S at that position.");
2375 static PyObject *
2376 string_startswith(PyStringObject *self, PyObject *args)
2378 const char* str = PyString_AS_STRING(self);
2379 int len = PyString_GET_SIZE(self);
2380 const char* prefix;
2381 int plen;
2382 int start = 0;
2383 int end = INT_MAX;
2384 PyObject *subobj;
2386 if (!PyArg_ParseTuple(args, "O|O&O&:startswith", &subobj,
2387 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
2388 return NULL;
2389 if (PyString_Check(subobj)) {
2390 prefix = PyString_AS_STRING(subobj);
2391 plen = PyString_GET_SIZE(subobj);
2393 #ifdef Py_USING_UNICODE
2394 else if (PyUnicode_Check(subobj)) {
2395 int rc;
2396 rc = PyUnicode_Tailmatch((PyObject *)self,
2397 subobj, start, end, -1);
2398 if (rc == -1)
2399 return NULL;
2400 else
2401 return PyBool_FromLong((long) rc);
2403 #endif
2404 else if (PyObject_AsCharBuffer(subobj, &prefix, &plen))
2405 return NULL;
2407 string_adjust_indices(&start, &end, len);
2409 if (start+plen > len)
2410 return PyBool_FromLong(0);
2412 if (end-start >= plen)
2413 return PyBool_FromLong(!memcmp(str+start, prefix, plen));
2414 else
2415 return PyBool_FromLong(0);
2419 PyDoc_STRVAR(endswith__doc__,
2420 "S.endswith(suffix[, start[, end]]) -> bool\n\
2422 Return True if S ends with the specified suffix, False otherwise. With\n\
2423 optional start, test S beginning at that position. With optional end, stop\n\
2424 comparing S at that position.");
2426 static PyObject *
2427 string_endswith(PyStringObject *self, PyObject *args)
2429 const char* str = PyString_AS_STRING(self);
2430 int len = PyString_GET_SIZE(self);
2431 const char* suffix;
2432 int slen;
2433 int start = 0;
2434 int end = INT_MAX;
2435 PyObject *subobj;
2437 if (!PyArg_ParseTuple(args, "O|O&O&:endswith", &subobj,
2438 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
2439 return NULL;
2440 if (PyString_Check(subobj)) {
2441 suffix = PyString_AS_STRING(subobj);
2442 slen = PyString_GET_SIZE(subobj);
2444 #ifdef Py_USING_UNICODE
2445 else if (PyUnicode_Check(subobj)) {
2446 int rc;
2447 rc = PyUnicode_Tailmatch((PyObject *)self,
2448 subobj, start, end, +1);
2449 if (rc == -1)
2450 return NULL;
2451 else
2452 return PyBool_FromLong((long) rc);
2454 #endif
2455 else if (PyObject_AsCharBuffer(subobj, &suffix, &slen))
2456 return NULL;
2458 string_adjust_indices(&start, &end, len);
2460 if (end-start < slen || start > len)
2461 return PyBool_FromLong(0);
2463 if (end-slen > start)
2464 start = end - slen;
2465 if (end-start >= slen)
2466 return PyBool_FromLong(!memcmp(str+start, suffix, slen));
2467 else
2468 return PyBool_FromLong(0);
2472 PyDoc_STRVAR(encode__doc__,
2473 "S.encode([encoding[,errors]]) -> object\n\
2475 Encodes S using the codec registered for encoding. encoding defaults\n\
2476 to the default encoding. errors may be given to set a different error\n\
2477 handling scheme. Default is 'strict' meaning that encoding errors raise\n\
2478 a UnicodeEncodeError. Other possible values are 'ignore', 'replace' and\n\
2479 'xmlcharrefreplace' as well as any other name registered with\n\
2480 codecs.register_error that is able to handle UnicodeEncodeErrors.");
2482 static PyObject *
2483 string_encode(PyStringObject *self, PyObject *args)
2485 char *encoding = NULL;
2486 char *errors = NULL;
2487 if (!PyArg_ParseTuple(args, "|ss:encode", &encoding, &errors))
2488 return NULL;
2489 return PyString_AsEncodedObject((PyObject *)self, encoding, errors);
2493 PyDoc_STRVAR(decode__doc__,
2494 "S.decode([encoding[,errors]]) -> object\n\
2496 Decodes S using the codec registered for encoding. encoding defaults\n\
2497 to the default encoding. errors may be given to set a different error\n\
2498 handling scheme. Default is 'strict' meaning that encoding errors raise\n\
2499 a UnicodeDecodeError. Other possible values are 'ignore' and 'replace'\n\
2500 as well as any other name registerd with codecs.register_error that is\n\
2501 able to handle UnicodeDecodeErrors.");
2503 static PyObject *
2504 string_decode(PyStringObject *self, PyObject *args)
2506 char *encoding = NULL;
2507 char *errors = NULL;
2508 if (!PyArg_ParseTuple(args, "|ss:decode", &encoding, &errors))
2509 return NULL;
2510 return PyString_AsDecodedObject((PyObject *)self, encoding, errors);
2514 PyDoc_STRVAR(expandtabs__doc__,
2515 "S.expandtabs([tabsize]) -> string\n\
2517 Return a copy of S where all tab characters are expanded using spaces.\n\
2518 If tabsize is not given, a tab size of 8 characters is assumed.");
2520 static PyObject*
2521 string_expandtabs(PyStringObject *self, PyObject *args)
2523 const char *e, *p;
2524 char *q;
2525 int i, j;
2526 PyObject *u;
2527 int tabsize = 8;
2529 if (!PyArg_ParseTuple(args, "|i:expandtabs", &tabsize))
2530 return NULL;
2532 /* First pass: determine size of output string */
2533 i = j = 0;
2534 e = PyString_AS_STRING(self) + PyString_GET_SIZE(self);
2535 for (p = PyString_AS_STRING(self); p < e; p++)
2536 if (*p == '\t') {
2537 if (tabsize > 0)
2538 j += tabsize - (j % tabsize);
2540 else {
2541 j++;
2542 if (*p == '\n' || *p == '\r') {
2543 i += j;
2544 j = 0;
2548 /* Second pass: create output string and fill it */
2549 u = PyString_FromStringAndSize(NULL, i + j);
2550 if (!u)
2551 return NULL;
2553 j = 0;
2554 q = PyString_AS_STRING(u);
2556 for (p = PyString_AS_STRING(self); p < e; p++)
2557 if (*p == '\t') {
2558 if (tabsize > 0) {
2559 i = tabsize - (j % tabsize);
2560 j += i;
2561 while (i--)
2562 *q++ = ' ';
2565 else {
2566 j++;
2567 *q++ = *p;
2568 if (*p == '\n' || *p == '\r')
2569 j = 0;
2572 return u;
2575 static PyObject *
2576 pad(PyStringObject *self, int left, int right, char fill)
2578 PyObject *u;
2580 if (left < 0)
2581 left = 0;
2582 if (right < 0)
2583 right = 0;
2585 if (left == 0 && right == 0 && PyString_CheckExact(self)) {
2586 Py_INCREF(self);
2587 return (PyObject *)self;
2590 u = PyString_FromStringAndSize(NULL,
2591 left + PyString_GET_SIZE(self) + right);
2592 if (u) {
2593 if (left)
2594 memset(PyString_AS_STRING(u), fill, left);
2595 memcpy(PyString_AS_STRING(u) + left,
2596 PyString_AS_STRING(self),
2597 PyString_GET_SIZE(self));
2598 if (right)
2599 memset(PyString_AS_STRING(u) + left + PyString_GET_SIZE(self),
2600 fill, right);
2603 return u;
2606 PyDoc_STRVAR(ljust__doc__,
2607 "S.ljust(width) -> string\n"
2608 "\n"
2609 "Return S left justified in a string of length width. Padding is\n"
2610 "done using spaces.");
2612 static PyObject *
2613 string_ljust(PyStringObject *self, PyObject *args)
2615 int width;
2616 if (!PyArg_ParseTuple(args, "i:ljust", &width))
2617 return NULL;
2619 if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) {
2620 Py_INCREF(self);
2621 return (PyObject*) self;
2624 return pad(self, 0, width - PyString_GET_SIZE(self), ' ');
2628 PyDoc_STRVAR(rjust__doc__,
2629 "S.rjust(width) -> string\n"
2630 "\n"
2631 "Return S right justified in a string of length width. Padding is\n"
2632 "done using spaces.");
2634 static PyObject *
2635 string_rjust(PyStringObject *self, PyObject *args)
2637 int width;
2638 if (!PyArg_ParseTuple(args, "i:rjust", &width))
2639 return NULL;
2641 if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) {
2642 Py_INCREF(self);
2643 return (PyObject*) self;
2646 return pad(self, width - PyString_GET_SIZE(self), 0, ' ');
2650 PyDoc_STRVAR(center__doc__,
2651 "S.center(width) -> string\n"
2652 "\n"
2653 "Return S centered in a string of length width. Padding is done\n"
2654 "using spaces.");
2656 static PyObject *
2657 string_center(PyStringObject *self, PyObject *args)
2659 int marg, left;
2660 int width;
2662 if (!PyArg_ParseTuple(args, "i:center", &width))
2663 return NULL;
2665 if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) {
2666 Py_INCREF(self);
2667 return (PyObject*) self;
2670 marg = width - PyString_GET_SIZE(self);
2671 left = marg / 2 + (marg & width & 1);
2673 return pad(self, left, marg - left, ' ');
2676 PyDoc_STRVAR(zfill__doc__,
2677 "S.zfill(width) -> string\n"
2678 "\n"
2679 "Pad a numeric string S with zeros on the left, to fill a field\n"
2680 "of the specified width. The string S is never truncated.");
2682 static PyObject *
2683 string_zfill(PyStringObject *self, PyObject *args)
2685 int fill;
2686 PyObject *s;
2687 char *p;
2689 int width;
2690 if (!PyArg_ParseTuple(args, "i:zfill", &width))
2691 return NULL;
2693 if (PyString_GET_SIZE(self) >= width) {
2694 if (PyString_CheckExact(self)) {
2695 Py_INCREF(self);
2696 return (PyObject*) self;
2698 else
2699 return PyString_FromStringAndSize(
2700 PyString_AS_STRING(self),
2701 PyString_GET_SIZE(self)
2705 fill = width - PyString_GET_SIZE(self);
2707 s = pad(self, fill, 0, '0');
2709 if (s == NULL)
2710 return NULL;
2712 p = PyString_AS_STRING(s);
2713 if (p[fill] == '+' || p[fill] == '-') {
2714 /* move sign to beginning of string */
2715 p[0] = p[fill];
2716 p[fill] = '0';
2719 return (PyObject*) s;
2722 PyDoc_STRVAR(isspace__doc__,
2723 "S.isspace() -> bool\n"
2724 "\n"
2725 "Return True if there are only whitespace characters in S,\n"
2726 "False otherwise.");
2728 static PyObject*
2729 string_isspace(PyStringObject *self)
2731 register const unsigned char *p
2732 = (unsigned char *) PyString_AS_STRING(self);
2733 register const unsigned char *e;
2735 /* Shortcut for single character strings */
2736 if (PyString_GET_SIZE(self) == 1 &&
2737 isspace(*p))
2738 return PyBool_FromLong(1);
2740 /* Special case for empty strings */
2741 if (PyString_GET_SIZE(self) == 0)
2742 return PyBool_FromLong(0);
2744 e = p + PyString_GET_SIZE(self);
2745 for (; p < e; p++) {
2746 if (!isspace(*p))
2747 return PyBool_FromLong(0);
2749 return PyBool_FromLong(1);
2753 PyDoc_STRVAR(isalpha__doc__,
2754 "S.isalpha() -> bool\n\
2756 Return True if all characters in S are alphabetic\n\
2757 and there is at least one character in S, False otherwise.");
2759 static PyObject*
2760 string_isalpha(PyStringObject *self)
2762 register const unsigned char *p
2763 = (unsigned char *) PyString_AS_STRING(self);
2764 register const unsigned char *e;
2766 /* Shortcut for single character strings */
2767 if (PyString_GET_SIZE(self) == 1 &&
2768 isalpha(*p))
2769 return PyBool_FromLong(1);
2771 /* Special case for empty strings */
2772 if (PyString_GET_SIZE(self) == 0)
2773 return PyBool_FromLong(0);
2775 e = p + PyString_GET_SIZE(self);
2776 for (; p < e; p++) {
2777 if (!isalpha(*p))
2778 return PyBool_FromLong(0);
2780 return PyBool_FromLong(1);
2784 PyDoc_STRVAR(isalnum__doc__,
2785 "S.isalnum() -> bool\n\
2787 Return True if all characters in S are alphanumeric\n\
2788 and there is at least one character in S, False otherwise.");
2790 static PyObject*
2791 string_isalnum(PyStringObject *self)
2793 register const unsigned char *p
2794 = (unsigned char *) PyString_AS_STRING(self);
2795 register const unsigned char *e;
2797 /* Shortcut for single character strings */
2798 if (PyString_GET_SIZE(self) == 1 &&
2799 isalnum(*p))
2800 return PyBool_FromLong(1);
2802 /* Special case for empty strings */
2803 if (PyString_GET_SIZE(self) == 0)
2804 return PyBool_FromLong(0);
2806 e = p + PyString_GET_SIZE(self);
2807 for (; p < e; p++) {
2808 if (!isalnum(*p))
2809 return PyBool_FromLong(0);
2811 return PyBool_FromLong(1);
2815 PyDoc_STRVAR(isdigit__doc__,
2816 "S.isdigit() -> bool\n\
2818 Return True if there are only digit characters in S,\n\
2819 False otherwise.");
2821 static PyObject*
2822 string_isdigit(PyStringObject *self)
2824 register const unsigned char *p
2825 = (unsigned char *) PyString_AS_STRING(self);
2826 register const unsigned char *e;
2828 /* Shortcut for single character strings */
2829 if (PyString_GET_SIZE(self) == 1 &&
2830 isdigit(*p))
2831 return PyBool_FromLong(1);
2833 /* Special case for empty strings */
2834 if (PyString_GET_SIZE(self) == 0)
2835 return PyBool_FromLong(0);
2837 e = p + PyString_GET_SIZE(self);
2838 for (; p < e; p++) {
2839 if (!isdigit(*p))
2840 return PyBool_FromLong(0);
2842 return PyBool_FromLong(1);
2846 PyDoc_STRVAR(islower__doc__,
2847 "S.islower() -> bool\n\
2849 Return True if all cased characters in S are lowercase and there is\n\
2850 at least one cased character in S, False otherwise.");
2852 static PyObject*
2853 string_islower(PyStringObject *self)
2855 register const unsigned char *p
2856 = (unsigned char *) PyString_AS_STRING(self);
2857 register const unsigned char *e;
2858 int cased;
2860 /* Shortcut for single character strings */
2861 if (PyString_GET_SIZE(self) == 1)
2862 return PyBool_FromLong(islower(*p) != 0);
2864 /* Special case for empty strings */
2865 if (PyString_GET_SIZE(self) == 0)
2866 return PyBool_FromLong(0);
2868 e = p + PyString_GET_SIZE(self);
2869 cased = 0;
2870 for (; p < e; p++) {
2871 if (isupper(*p))
2872 return PyBool_FromLong(0);
2873 else if (!cased && islower(*p))
2874 cased = 1;
2876 return PyBool_FromLong(cased);
2880 PyDoc_STRVAR(isupper__doc__,
2881 "S.isupper() -> bool\n\
2883 Return True if all cased characters in S are uppercase and there is\n\
2884 at least one cased character in S, False otherwise.");
2886 static PyObject*
2887 string_isupper(PyStringObject *self)
2889 register const unsigned char *p
2890 = (unsigned char *) PyString_AS_STRING(self);
2891 register const unsigned char *e;
2892 int cased;
2894 /* Shortcut for single character strings */
2895 if (PyString_GET_SIZE(self) == 1)
2896 return PyBool_FromLong(isupper(*p) != 0);
2898 /* Special case for empty strings */
2899 if (PyString_GET_SIZE(self) == 0)
2900 return PyBool_FromLong(0);
2902 e = p + PyString_GET_SIZE(self);
2903 cased = 0;
2904 for (; p < e; p++) {
2905 if (islower(*p))
2906 return PyBool_FromLong(0);
2907 else if (!cased && isupper(*p))
2908 cased = 1;
2910 return PyBool_FromLong(cased);
2914 PyDoc_STRVAR(istitle__doc__,
2915 "S.istitle() -> bool\n\
2917 Return True if S is a titlecased string, i.e. uppercase characters\n\
2918 may only follow uncased characters and lowercase characters only cased\n\
2919 ones. Return False otherwise.");
2921 static PyObject*
2922 string_istitle(PyStringObject *self, PyObject *uncased)
2924 register const unsigned char *p
2925 = (unsigned char *) PyString_AS_STRING(self);
2926 register const unsigned char *e;
2927 int cased, previous_is_cased;
2929 /* Shortcut for single character strings */
2930 if (PyString_GET_SIZE(self) == 1)
2931 return PyBool_FromLong(isupper(*p) != 0);
2933 /* Special case for empty strings */
2934 if (PyString_GET_SIZE(self) == 0)
2935 return PyBool_FromLong(0);
2937 e = p + PyString_GET_SIZE(self);
2938 cased = 0;
2939 previous_is_cased = 0;
2940 for (; p < e; p++) {
2941 register const unsigned char ch = *p;
2943 if (isupper(ch)) {
2944 if (previous_is_cased)
2945 return PyBool_FromLong(0);
2946 previous_is_cased = 1;
2947 cased = 1;
2949 else if (islower(ch)) {
2950 if (!previous_is_cased)
2951 return PyBool_FromLong(0);
2952 previous_is_cased = 1;
2953 cased = 1;
2955 else
2956 previous_is_cased = 0;
2958 return PyBool_FromLong(cased);
2962 PyDoc_STRVAR(splitlines__doc__,
2963 "S.splitlines([keepends]) -> list of strings\n\
2965 Return a list of the lines in S, breaking at line boundaries.\n\
2966 Line breaks are not included in the resulting list unless keepends\n\
2967 is given and true.");
2969 #define SPLIT_APPEND(data, left, right) \
2970 str = PyString_FromStringAndSize(data + left, right - left); \
2971 if (!str) \
2972 goto onError; \
2973 if (PyList_Append(list, str)) { \
2974 Py_DECREF(str); \
2975 goto onError; \
2977 else \
2978 Py_DECREF(str);
2980 static PyObject*
2981 string_splitlines(PyStringObject *self, PyObject *args)
2983 register int i;
2984 register int j;
2985 int len;
2986 int keepends = 0;
2987 PyObject *list;
2988 PyObject *str;
2989 char *data;
2991 if (!PyArg_ParseTuple(args, "|i:splitlines", &keepends))
2992 return NULL;
2994 data = PyString_AS_STRING(self);
2995 len = PyString_GET_SIZE(self);
2997 list = PyList_New(0);
2998 if (!list)
2999 goto onError;
3001 for (i = j = 0; i < len; ) {
3002 int eol;
3004 /* Find a line and append it */
3005 while (i < len && data[i] != '\n' && data[i] != '\r')
3006 i++;
3008 /* Skip the line break reading CRLF as one line break */
3009 eol = i;
3010 if (i < len) {
3011 if (data[i] == '\r' && i + 1 < len &&
3012 data[i+1] == '\n')
3013 i += 2;
3014 else
3015 i++;
3016 if (keepends)
3017 eol = i;
3019 SPLIT_APPEND(data, j, eol);
3020 j = i;
3022 if (j < len) {
3023 SPLIT_APPEND(data, j, len);
3026 return list;
3028 onError:
3029 Py_DECREF(list);
3030 return NULL;
3033 #undef SPLIT_APPEND
3036 static PyMethodDef
3037 string_methods[] = {
3038 /* Counterparts of the obsolete stropmodule functions; except
3039 string.maketrans(). */
3040 {"join", (PyCFunction)string_join, METH_O, join__doc__},
3041 {"split", (PyCFunction)string_split, METH_VARARGS, split__doc__},
3042 {"lower", (PyCFunction)string_lower, METH_NOARGS, lower__doc__},
3043 {"upper", (PyCFunction)string_upper, METH_NOARGS, upper__doc__},
3044 {"islower", (PyCFunction)string_islower, METH_NOARGS, islower__doc__},
3045 {"isupper", (PyCFunction)string_isupper, METH_NOARGS, isupper__doc__},
3046 {"isspace", (PyCFunction)string_isspace, METH_NOARGS, isspace__doc__},
3047 {"isdigit", (PyCFunction)string_isdigit, METH_NOARGS, isdigit__doc__},
3048 {"istitle", (PyCFunction)string_istitle, METH_NOARGS, istitle__doc__},
3049 {"isalpha", (PyCFunction)string_isalpha, METH_NOARGS, isalpha__doc__},
3050 {"isalnum", (PyCFunction)string_isalnum, METH_NOARGS, isalnum__doc__},
3051 {"capitalize", (PyCFunction)string_capitalize, METH_NOARGS,
3052 capitalize__doc__},
3053 {"count", (PyCFunction)string_count, METH_VARARGS, count__doc__},
3054 {"endswith", (PyCFunction)string_endswith, METH_VARARGS,
3055 endswith__doc__},
3056 {"find", (PyCFunction)string_find, METH_VARARGS, find__doc__},
3057 {"index", (PyCFunction)string_index, METH_VARARGS, index__doc__},
3058 {"lstrip", (PyCFunction)string_lstrip, METH_VARARGS, lstrip__doc__},
3059 {"replace", (PyCFunction)string_replace, METH_VARARGS, replace__doc__},
3060 {"rfind", (PyCFunction)string_rfind, METH_VARARGS, rfind__doc__},
3061 {"rindex", (PyCFunction)string_rindex, METH_VARARGS, rindex__doc__},
3062 {"rstrip", (PyCFunction)string_rstrip, METH_VARARGS, rstrip__doc__},
3063 {"startswith", (PyCFunction)string_startswith, METH_VARARGS,
3064 startswith__doc__},
3065 {"strip", (PyCFunction)string_strip, METH_VARARGS, strip__doc__},
3066 {"swapcase", (PyCFunction)string_swapcase, METH_NOARGS,
3067 swapcase__doc__},
3068 {"translate", (PyCFunction)string_translate, METH_VARARGS,
3069 translate__doc__},
3070 {"title", (PyCFunction)string_title, METH_NOARGS, title__doc__},
3071 {"ljust", (PyCFunction)string_ljust, METH_VARARGS, ljust__doc__},
3072 {"rjust", (PyCFunction)string_rjust, METH_VARARGS, rjust__doc__},
3073 {"center", (PyCFunction)string_center, METH_VARARGS, center__doc__},
3074 {"zfill", (PyCFunction)string_zfill, METH_VARARGS, zfill__doc__},
3075 {"encode", (PyCFunction)string_encode, METH_VARARGS, encode__doc__},
3076 {"decode", (PyCFunction)string_decode, METH_VARARGS, decode__doc__},
3077 {"expandtabs", (PyCFunction)string_expandtabs, METH_VARARGS,
3078 expandtabs__doc__},
3079 {"splitlines", (PyCFunction)string_splitlines, METH_VARARGS,
3080 splitlines__doc__},
3081 {NULL, NULL} /* sentinel */
3084 static PyObject *
3085 str_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds);
3087 static PyObject *
3088 string_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
3090 PyObject *x = NULL;
3091 static char *kwlist[] = {"object", 0};
3093 if (type != &PyString_Type)
3094 return str_subtype_new(type, args, kwds);
3095 if (!PyArg_ParseTupleAndKeywords(args, kwds, "|O:str", kwlist, &x))
3096 return NULL;
3097 if (x == NULL)
3098 return PyString_FromString("");
3099 return PyObject_Str(x);
3102 static PyObject *
3103 str_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
3105 PyObject *tmp, *pnew;
3106 int n;
3108 assert(PyType_IsSubtype(type, &PyString_Type));
3109 tmp = string_new(&PyString_Type, args, kwds);
3110 if (tmp == NULL)
3111 return NULL;
3112 assert(PyString_CheckExact(tmp));
3113 n = PyString_GET_SIZE(tmp);
3114 pnew = type->tp_alloc(type, n);
3115 if (pnew != NULL) {
3116 memcpy(PyString_AS_STRING(pnew), PyString_AS_STRING(tmp), n+1);
3117 ((PyStringObject *)pnew)->ob_shash =
3118 ((PyStringObject *)tmp)->ob_shash;
3119 ((PyStringObject *)pnew)->ob_sstate = SSTATE_NOT_INTERNED;
3121 Py_DECREF(tmp);
3122 return pnew;
3125 static PyObject *
3126 basestring_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
3128 PyErr_SetString(PyExc_TypeError,
3129 "The basestring type cannot be instantiated");
3130 return NULL;
3133 static PyObject *
3134 string_mod(PyObject *v, PyObject *w)
3136 if (!PyString_Check(v)) {
3137 Py_INCREF(Py_NotImplemented);
3138 return Py_NotImplemented;
3140 return PyString_Format(v, w);
3143 PyDoc_STRVAR(basestring_doc,
3144 "Type basestring cannot be instantiated; it is the base for str and unicode.");
3146 static PyNumberMethods string_as_number = {
3147 0, /*nb_add*/
3148 0, /*nb_subtract*/
3149 0, /*nb_multiply*/
3150 0, /*nb_divide*/
3151 string_mod, /*nb_remainder*/
3155 PyTypeObject PyBaseString_Type = {
3156 PyObject_HEAD_INIT(&PyType_Type)
3158 "basestring",
3161 0, /* tp_dealloc */
3162 0, /* tp_print */
3163 0, /* tp_getattr */
3164 0, /* tp_setattr */
3165 0, /* tp_compare */
3166 0, /* tp_repr */
3167 0, /* tp_as_number */
3168 0, /* tp_as_sequence */
3169 0, /* tp_as_mapping */
3170 0, /* tp_hash */
3171 0, /* tp_call */
3172 0, /* tp_str */
3173 0, /* tp_getattro */
3174 0, /* tp_setattro */
3175 0, /* tp_as_buffer */
3176 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */
3177 basestring_doc, /* tp_doc */
3178 0, /* tp_traverse */
3179 0, /* tp_clear */
3180 0, /* tp_richcompare */
3181 0, /* tp_weaklistoffset */
3182 0, /* tp_iter */
3183 0, /* tp_iternext */
3184 0, /* tp_methods */
3185 0, /* tp_members */
3186 0, /* tp_getset */
3187 &PyBaseObject_Type, /* tp_base */
3188 0, /* tp_dict */
3189 0, /* tp_descr_get */
3190 0, /* tp_descr_set */
3191 0, /* tp_dictoffset */
3192 0, /* tp_init */
3193 0, /* tp_alloc */
3194 basestring_new, /* tp_new */
3195 0, /* tp_free */
3198 PyDoc_STRVAR(string_doc,
3199 "str(object) -> string\n\
3201 Return a nice string representation of the object.\n\
3202 If the argument is a string, the return value is the same object.");
3204 PyTypeObject PyString_Type = {
3205 PyObject_HEAD_INIT(&PyType_Type)
3207 "str",
3208 sizeof(PyStringObject),
3209 sizeof(char),
3210 (destructor)string_dealloc, /* tp_dealloc */
3211 (printfunc)string_print, /* tp_print */
3212 0, /* tp_getattr */
3213 0, /* tp_setattr */
3214 0, /* tp_compare */
3215 (reprfunc)string_repr, /* tp_repr */
3216 &string_as_number, /* tp_as_number */
3217 &string_as_sequence, /* tp_as_sequence */
3218 &string_as_mapping, /* tp_as_mapping */
3219 (hashfunc)string_hash, /* tp_hash */
3220 0, /* tp_call */
3221 (reprfunc)string_str, /* tp_str */
3222 PyObject_GenericGetAttr, /* tp_getattro */
3223 0, /* tp_setattro */
3224 &string_as_buffer, /* tp_as_buffer */
3225 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_CHECKTYPES |
3226 Py_TPFLAGS_BASETYPE, /* tp_flags */
3227 string_doc, /* tp_doc */
3228 0, /* tp_traverse */
3229 0, /* tp_clear */
3230 (richcmpfunc)string_richcompare, /* tp_richcompare */
3231 0, /* tp_weaklistoffset */
3232 0, /* tp_iter */
3233 0, /* tp_iternext */
3234 string_methods, /* tp_methods */
3235 0, /* tp_members */
3236 0, /* tp_getset */
3237 &PyBaseString_Type, /* tp_base */
3238 0, /* tp_dict */
3239 0, /* tp_descr_get */
3240 0, /* tp_descr_set */
3241 0, /* tp_dictoffset */
3242 0, /* tp_init */
3243 0, /* tp_alloc */
3244 string_new, /* tp_new */
3245 PyObject_Del, /* tp_free */
3248 void
3249 PyString_Concat(register PyObject **pv, register PyObject *w)
3251 register PyObject *v;
3252 if (*pv == NULL)
3253 return;
3254 if (w == NULL || !PyString_Check(*pv)) {
3255 Py_DECREF(*pv);
3256 *pv = NULL;
3257 return;
3259 v = string_concat((PyStringObject *) *pv, w);
3260 Py_DECREF(*pv);
3261 *pv = v;
3264 void
3265 PyString_ConcatAndDel(register PyObject **pv, register PyObject *w)
3267 PyString_Concat(pv, w);
3268 Py_XDECREF(w);
3272 /* The following function breaks the notion that strings are immutable:
3273 it changes the size of a string. We get away with this only if there
3274 is only one module referencing the object. You can also think of it
3275 as creating a new string object and destroying the old one, only
3276 more efficiently. In any case, don't use this if the string may
3277 already be known to some other part of the code...
3278 Note that if there's not enough memory to resize the string, the original
3279 string object at *pv is deallocated, *pv is set to NULL, an "out of
3280 memory" exception is set, and -1 is returned. Else (on success) 0 is
3281 returned, and the value in *pv may or may not be the same as on input.
3282 As always, an extra byte is allocated for a trailing \0 byte (newsize
3283 does *not* include that), and a trailing \0 byte is stored.
3287 _PyString_Resize(PyObject **pv, int newsize)
3289 register PyObject *v;
3290 register PyStringObject *sv;
3291 v = *pv;
3292 if (!PyString_Check(v) || v->ob_refcnt != 1 || newsize < 0) {
3293 *pv = 0;
3294 Py_DECREF(v);
3295 PyErr_BadInternalCall();
3296 return -1;
3298 /* XXX UNREF/NEWREF interface should be more symmetrical */
3299 _Py_DEC_REFTOTAL;
3300 _Py_ForgetReference(v);
3301 *pv = (PyObject *)
3302 PyObject_REALLOC((char *)v,
3303 sizeof(PyStringObject) + newsize * sizeof(char));
3304 if (*pv == NULL) {
3305 PyObject_Del(v);
3306 PyErr_NoMemory();
3307 return -1;
3309 _Py_NewReference(*pv);
3310 sv = (PyStringObject *) *pv;
3311 sv->ob_size = newsize;
3312 sv->ob_sval[newsize] = '\0';
3313 return 0;
3316 /* Helpers for formatstring */
3318 static PyObject *
3319 getnextarg(PyObject *args, int arglen, int *p_argidx)
3321 int argidx = *p_argidx;
3322 if (argidx < arglen) {
3323 (*p_argidx)++;
3324 if (arglen < 0)
3325 return args;
3326 else
3327 return PyTuple_GetItem(args, argidx);
3329 PyErr_SetString(PyExc_TypeError,
3330 "not enough arguments for format string");
3331 return NULL;
3334 /* Format codes
3335 * F_LJUST '-'
3336 * F_SIGN '+'
3337 * F_BLANK ' '
3338 * F_ALT '#'
3339 * F_ZERO '0'
3341 #define F_LJUST (1<<0)
3342 #define F_SIGN (1<<1)
3343 #define F_BLANK (1<<2)
3344 #define F_ALT (1<<3)
3345 #define F_ZERO (1<<4)
3347 static int
3348 formatfloat(char *buf, size_t buflen, int flags,
3349 int prec, int type, PyObject *v)
3351 /* fmt = '%#.' + `prec` + `type`
3352 worst case length = 3 + 10 (len of INT_MAX) + 1 = 14 (use 20)*/
3353 char fmt[20];
3354 double x;
3355 x = PyFloat_AsDouble(v);
3356 if (x == -1.0 && PyErr_Occurred()) {
3357 PyErr_SetString(PyExc_TypeError, "float argument required");
3358 return -1;
3360 if (prec < 0)
3361 prec = 6;
3362 if (type == 'f' && fabs(x)/1e25 >= 1e25)
3363 type = 'g';
3364 /* Worst case length calc to ensure no buffer overrun:
3366 'g' formats:
3367 fmt = %#.<prec>g
3368 buf = '-' + [0-9]*prec + '.' + 'e+' + (longest exp
3369 for any double rep.)
3370 len = 1 + prec + 1 + 2 + 5 = 9 + prec
3372 'f' formats:
3373 buf = '-' + [0-9]*x + '.' + [0-9]*prec (with x < 50)
3374 len = 1 + 50 + 1 + prec = 52 + prec
3376 If prec=0 the effective precision is 1 (the leading digit is
3377 always given), therefore increase the length by one.
3380 if ((type == 'g' && buflen <= (size_t)10 + (size_t)prec) ||
3381 (type == 'f' && buflen <= (size_t)53 + (size_t)prec)) {
3382 PyErr_SetString(PyExc_OverflowError,
3383 "formatted float is too long (precision too large?)");
3384 return -1;
3386 PyOS_snprintf(fmt, sizeof(fmt), "%%%s.%d%c",
3387 (flags&F_ALT) ? "#" : "",
3388 prec, type);
3389 PyOS_snprintf(buf, buflen, fmt, x);
3390 return strlen(buf);
3393 /* _PyString_FormatLong emulates the format codes d, u, o, x and X, and
3394 * the F_ALT flag, for Python's long (unbounded) ints. It's not used for
3395 * Python's regular ints.
3396 * Return value: a new PyString*, or NULL if error.
3397 * . *pbuf is set to point into it,
3398 * *plen set to the # of chars following that.
3399 * Caller must decref it when done using pbuf.
3400 * The string starting at *pbuf is of the form
3401 * "-"? ("0x" | "0X")? digit+
3402 * "0x"/"0X" are present only for x and X conversions, with F_ALT
3403 * set in flags. The case of hex digits will be correct,
3404 * There will be at least prec digits, zero-filled on the left if
3405 * necessary to get that many.
3406 * val object to be converted
3407 * flags bitmask of format flags; only F_ALT is looked at
3408 * prec minimum number of digits; 0-fill on left if needed
3409 * type a character in [duoxX]; u acts the same as d
3411 * CAUTION: o, x and X conversions on regular ints can never
3412 * produce a '-' sign, but can for Python's unbounded ints.
3414 PyObject*
3415 _PyString_FormatLong(PyObject *val, int flags, int prec, int type,
3416 char **pbuf, int *plen)
3418 PyObject *result = NULL;
3419 char *buf;
3420 int i;
3421 int sign; /* 1 if '-', else 0 */
3422 int len; /* number of characters */
3423 int numdigits; /* len == numnondigits + numdigits */
3424 int numnondigits = 0;
3426 switch (type) {
3427 case 'd':
3428 case 'u':
3429 result = val->ob_type->tp_str(val);
3430 break;
3431 case 'o':
3432 result = val->ob_type->tp_as_number->nb_oct(val);
3433 break;
3434 case 'x':
3435 case 'X':
3436 numnondigits = 2;
3437 result = val->ob_type->tp_as_number->nb_hex(val);
3438 break;
3439 default:
3440 assert(!"'type' not in [duoxX]");
3442 if (!result)
3443 return NULL;
3445 /* To modify the string in-place, there can only be one reference. */
3446 if (result->ob_refcnt != 1) {
3447 PyErr_BadInternalCall();
3448 return NULL;
3450 buf = PyString_AsString(result);
3451 len = PyString_Size(result);
3452 if (buf[len-1] == 'L') {
3453 --len;
3454 buf[len] = '\0';
3456 sign = buf[0] == '-';
3457 numnondigits += sign;
3458 numdigits = len - numnondigits;
3459 assert(numdigits > 0);
3461 /* Get rid of base marker unless F_ALT */
3462 if ((flags & F_ALT) == 0) {
3463 /* Need to skip 0x, 0X or 0. */
3464 int skipped = 0;
3465 switch (type) {
3466 case 'o':
3467 assert(buf[sign] == '0');
3468 /* If 0 is only digit, leave it alone. */
3469 if (numdigits > 1) {
3470 skipped = 1;
3471 --numdigits;
3473 break;
3474 case 'x':
3475 case 'X':
3476 assert(buf[sign] == '0');
3477 assert(buf[sign + 1] == 'x');
3478 skipped = 2;
3479 numnondigits -= 2;
3480 break;
3482 if (skipped) {
3483 buf += skipped;
3484 len -= skipped;
3485 if (sign)
3486 buf[0] = '-';
3488 assert(len == numnondigits + numdigits);
3489 assert(numdigits > 0);
3492 /* Fill with leading zeroes to meet minimum width. */
3493 if (prec > numdigits) {
3494 PyObject *r1 = PyString_FromStringAndSize(NULL,
3495 numnondigits + prec);
3496 char *b1;
3497 if (!r1) {
3498 Py_DECREF(result);
3499 return NULL;
3501 b1 = PyString_AS_STRING(r1);
3502 for (i = 0; i < numnondigits; ++i)
3503 *b1++ = *buf++;
3504 for (i = 0; i < prec - numdigits; i++)
3505 *b1++ = '0';
3506 for (i = 0; i < numdigits; i++)
3507 *b1++ = *buf++;
3508 *b1 = '\0';
3509 Py_DECREF(result);
3510 result = r1;
3511 buf = PyString_AS_STRING(result);
3512 len = numnondigits + prec;
3515 /* Fix up case for hex conversions. */
3516 switch (type) {
3517 case 'x':
3518 /* Need to convert all upper case letters to lower case. */
3519 for (i = 0; i < len; i++)
3520 if (buf[i] >= 'A' && buf[i] <= 'F')
3521 buf[i] += 'a'-'A';
3522 break;
3523 case 'X':
3524 /* Need to convert 0x to 0X (and -0x to -0X). */
3525 if (buf[sign + 1] == 'x')
3526 buf[sign + 1] = 'X';
3527 break;
3529 *pbuf = buf;
3530 *plen = len;
3531 return result;
3534 static int
3535 formatint(char *buf, size_t buflen, int flags,
3536 int prec, int type, PyObject *v)
3538 /* fmt = '%#.' + `prec` + 'l' + `type`
3539 worst case length = 3 + 19 (worst len of INT_MAX on 64-bit machine)
3540 + 1 + 1 = 24 */
3541 char fmt[64]; /* plenty big enough! */
3542 long x;
3544 x = PyInt_AsLong(v);
3545 if (x == -1 && PyErr_Occurred()) {
3546 PyErr_SetString(PyExc_TypeError, "int argument required");
3547 return -1;
3549 if (x < 0 && type != 'd' && type != 'i') {
3550 if (PyErr_Warn(PyExc_FutureWarning,
3551 "%u/%o/%x/%X of negative int will return "
3552 "a signed string in Python 2.4 and up") < 0)
3553 return -1;
3555 if (prec < 0)
3556 prec = 1;
3558 if ((flags & F_ALT) &&
3559 (type == 'x' || type == 'X')) {
3560 /* When converting under %#x or %#X, there are a number
3561 * of issues that cause pain:
3562 * - when 0 is being converted, the C standard leaves off
3563 * the '0x' or '0X', which is inconsistent with other
3564 * %#x/%#X conversions and inconsistent with Python's
3565 * hex() function
3566 * - there are platforms that violate the standard and
3567 * convert 0 with the '0x' or '0X'
3568 * (Metrowerks, Compaq Tru64)
3569 * - there are platforms that give '0x' when converting
3570 * under %#X, but convert 0 in accordance with the
3571 * standard (OS/2 EMX)
3573 * We can achieve the desired consistency by inserting our
3574 * own '0x' or '0X' prefix, and substituting %x/%X in place
3575 * of %#x/%#X.
3577 * Note that this is the same approach as used in
3578 * formatint() in unicodeobject.c
3580 PyOS_snprintf(fmt, sizeof(fmt), "0%c%%.%dl%c",
3581 type, prec, type);
3583 else {
3584 PyOS_snprintf(fmt, sizeof(fmt), "%%%s.%dl%c",
3585 (flags&F_ALT) ? "#" : "",
3586 prec, type);
3589 /* buf = '+'/'-'/'0'/'0x' + '[0-9]'*max(prec, len(x in octal))
3590 * worst case buf = '0x' + [0-9]*prec, where prec >= 11
3592 if (buflen <= 13 || buflen <= (size_t)2 + (size_t)prec) {
3593 PyErr_SetString(PyExc_OverflowError,
3594 "formatted integer is too long (precision too large?)");
3595 return -1;
3597 PyOS_snprintf(buf, buflen, fmt, x);
3598 return strlen(buf);
3601 static int
3602 formatchar(char *buf, size_t buflen, PyObject *v)
3604 /* presume that the buffer is at least 2 characters long */
3605 if (PyString_Check(v)) {
3606 if (!PyArg_Parse(v, "c;%c requires int or char", &buf[0]))
3607 return -1;
3609 else {
3610 if (!PyArg_Parse(v, "b;%c requires int or char", &buf[0]))
3611 return -1;
3613 buf[1] = '\0';
3614 return 1;
3618 /* fmt%(v1,v2,...) is roughly equivalent to sprintf(fmt, v1, v2, ...)
3620 FORMATBUFLEN is the length of the buffer in which the floats, ints, &
3621 chars are formatted. XXX This is a magic number. Each formatting
3622 routine does bounds checking to ensure no overflow, but a better
3623 solution may be to malloc a buffer of appropriate size for each
3624 format. For now, the current solution is sufficient.
3626 #define FORMATBUFLEN (size_t)120
3628 PyObject *
3629 PyString_Format(PyObject *format, PyObject *args)
3631 char *fmt, *res;
3632 int fmtcnt, rescnt, reslen, arglen, argidx;
3633 int args_owned = 0;
3634 PyObject *result, *orig_args;
3635 #ifdef Py_USING_UNICODE
3636 PyObject *v, *w;
3637 #endif
3638 PyObject *dict = NULL;
3639 if (format == NULL || !PyString_Check(format) || args == NULL) {
3640 PyErr_BadInternalCall();
3641 return NULL;
3643 orig_args = args;
3644 fmt = PyString_AS_STRING(format);
3645 fmtcnt = PyString_GET_SIZE(format);
3646 reslen = rescnt = fmtcnt + 100;
3647 result = PyString_FromStringAndSize((char *)NULL, reslen);
3648 if (result == NULL)
3649 return NULL;
3650 res = PyString_AsString(result);
3651 if (PyTuple_Check(args)) {
3652 arglen = PyTuple_GET_SIZE(args);
3653 argidx = 0;
3655 else {
3656 arglen = -1;
3657 argidx = -2;
3659 if (args->ob_type->tp_as_mapping && !PyTuple_Check(args) &&
3660 !PyObject_TypeCheck(args, &PyBaseString_Type))
3661 dict = args;
3662 while (--fmtcnt >= 0) {
3663 if (*fmt != '%') {
3664 if (--rescnt < 0) {
3665 rescnt = fmtcnt + 100;
3666 reslen += rescnt;
3667 if (_PyString_Resize(&result, reslen) < 0)
3668 return NULL;
3669 res = PyString_AS_STRING(result)
3670 + reslen - rescnt;
3671 --rescnt;
3673 *res++ = *fmt++;
3675 else {
3676 /* Got a format specifier */
3677 int flags = 0;
3678 int width = -1;
3679 int prec = -1;
3680 int c = '\0';
3681 int fill;
3682 PyObject *v = NULL;
3683 PyObject *temp = NULL;
3684 char *pbuf;
3685 int sign;
3686 int len;
3687 char formatbuf[FORMATBUFLEN];
3688 /* For format{float,int,char}() */
3689 #ifdef Py_USING_UNICODE
3690 char *fmt_start = fmt;
3691 int argidx_start = argidx;
3692 #endif
3694 fmt++;
3695 if (*fmt == '(') {
3696 char *keystart;
3697 int keylen;
3698 PyObject *key;
3699 int pcount = 1;
3701 if (dict == NULL) {
3702 PyErr_SetString(PyExc_TypeError,
3703 "format requires a mapping");
3704 goto error;
3706 ++fmt;
3707 --fmtcnt;
3708 keystart = fmt;
3709 /* Skip over balanced parentheses */
3710 while (pcount > 0 && --fmtcnt >= 0) {
3711 if (*fmt == ')')
3712 --pcount;
3713 else if (*fmt == '(')
3714 ++pcount;
3715 fmt++;
3717 keylen = fmt - keystart - 1;
3718 if (fmtcnt < 0 || pcount > 0) {
3719 PyErr_SetString(PyExc_ValueError,
3720 "incomplete format key");
3721 goto error;
3723 key = PyString_FromStringAndSize(keystart,
3724 keylen);
3725 if (key == NULL)
3726 goto error;
3727 if (args_owned) {
3728 Py_DECREF(args);
3729 args_owned = 0;
3731 args = PyObject_GetItem(dict, key);
3732 Py_DECREF(key);
3733 if (args == NULL) {
3734 goto error;
3736 args_owned = 1;
3737 arglen = -1;
3738 argidx = -2;
3740 while (--fmtcnt >= 0) {
3741 switch (c = *fmt++) {
3742 case '-': flags |= F_LJUST; continue;
3743 case '+': flags |= F_SIGN; continue;
3744 case ' ': flags |= F_BLANK; continue;
3745 case '#': flags |= F_ALT; continue;
3746 case '0': flags |= F_ZERO; continue;
3748 break;
3750 if (c == '*') {
3751 v = getnextarg(args, arglen, &argidx);
3752 if (v == NULL)
3753 goto error;
3754 if (!PyInt_Check(v)) {
3755 PyErr_SetString(PyExc_TypeError,
3756 "* wants int");
3757 goto error;
3759 width = PyInt_AsLong(v);
3760 if (width < 0) {
3761 flags |= F_LJUST;
3762 width = -width;
3764 if (--fmtcnt >= 0)
3765 c = *fmt++;
3767 else if (c >= 0 && isdigit(c)) {
3768 width = c - '0';
3769 while (--fmtcnt >= 0) {
3770 c = Py_CHARMASK(*fmt++);
3771 if (!isdigit(c))
3772 break;
3773 if ((width*10) / 10 != width) {
3774 PyErr_SetString(
3775 PyExc_ValueError,
3776 "width too big");
3777 goto error;
3779 width = width*10 + (c - '0');
3782 if (c == '.') {
3783 prec = 0;
3784 if (--fmtcnt >= 0)
3785 c = *fmt++;
3786 if (c == '*') {
3787 v = getnextarg(args, arglen, &argidx);
3788 if (v == NULL)
3789 goto error;
3790 if (!PyInt_Check(v)) {
3791 PyErr_SetString(
3792 PyExc_TypeError,
3793 "* wants int");
3794 goto error;
3796 prec = PyInt_AsLong(v);
3797 if (prec < 0)
3798 prec = 0;
3799 if (--fmtcnt >= 0)
3800 c = *fmt++;
3802 else if (c >= 0 && isdigit(c)) {
3803 prec = c - '0';
3804 while (--fmtcnt >= 0) {
3805 c = Py_CHARMASK(*fmt++);
3806 if (!isdigit(c))
3807 break;
3808 if ((prec*10) / 10 != prec) {
3809 PyErr_SetString(
3810 PyExc_ValueError,
3811 "prec too big");
3812 goto error;
3814 prec = prec*10 + (c - '0');
3817 } /* prec */
3818 if (fmtcnt >= 0) {
3819 if (c == 'h' || c == 'l' || c == 'L') {
3820 if (--fmtcnt >= 0)
3821 c = *fmt++;
3824 if (fmtcnt < 0) {
3825 PyErr_SetString(PyExc_ValueError,
3826 "incomplete format");
3827 goto error;
3829 if (c != '%') {
3830 v = getnextarg(args, arglen, &argidx);
3831 if (v == NULL)
3832 goto error;
3834 sign = 0;
3835 fill = ' ';
3836 switch (c) {
3837 case '%':
3838 pbuf = "%";
3839 len = 1;
3840 break;
3841 case 's':
3842 #ifdef Py_USING_UNICODE
3843 if (PyUnicode_Check(v)) {
3844 fmt = fmt_start;
3845 argidx = argidx_start;
3846 goto unicode;
3848 #endif
3849 /* Fall through */
3850 case 'r':
3851 if (c == 's')
3852 temp = PyObject_Str(v);
3853 else
3854 temp = PyObject_Repr(v);
3855 if (temp == NULL)
3856 goto error;
3857 if (!PyString_Check(temp)) {
3858 /* XXX Note: this should never happen,
3859 since PyObject_Repr() and
3860 PyObject_Str() assure this */
3861 PyErr_SetString(PyExc_TypeError,
3862 "%s argument has non-string str()");
3863 Py_DECREF(temp);
3864 goto error;
3866 pbuf = PyString_AS_STRING(temp);
3867 len = PyString_GET_SIZE(temp);
3868 if (prec >= 0 && len > prec)
3869 len = prec;
3870 break;
3871 case 'i':
3872 case 'd':
3873 case 'u':
3874 case 'o':
3875 case 'x':
3876 case 'X':
3877 if (c == 'i')
3878 c = 'd';
3879 if (PyLong_Check(v)) {
3880 temp = _PyString_FormatLong(v, flags,
3881 prec, c, &pbuf, &len);
3882 if (!temp)
3883 goto error;
3884 /* unbounded ints can always produce
3885 a sign character! */
3886 sign = 1;
3888 else {
3889 pbuf = formatbuf;
3890 len = formatint(pbuf,
3891 sizeof(formatbuf),
3892 flags, prec, c, v);
3893 if (len < 0)
3894 goto error;
3895 /* only d conversion is signed */
3896 sign = c == 'd';
3898 if (flags & F_ZERO)
3899 fill = '0';
3900 break;
3901 case 'e':
3902 case 'E':
3903 case 'f':
3904 case 'g':
3905 case 'G':
3906 pbuf = formatbuf;
3907 len = formatfloat(pbuf, sizeof(formatbuf),
3908 flags, prec, c, v);
3909 if (len < 0)
3910 goto error;
3911 sign = 1;
3912 if (flags & F_ZERO)
3913 fill = '0';
3914 break;
3915 case 'c':
3916 pbuf = formatbuf;
3917 len = formatchar(pbuf, sizeof(formatbuf), v);
3918 if (len < 0)
3919 goto error;
3920 break;
3921 default:
3922 PyErr_Format(PyExc_ValueError,
3923 "unsupported format character '%c' (0x%x) "
3924 "at index %i",
3925 c, c,
3926 (int)(fmt - 1 - PyString_AsString(format)));
3927 goto error;
3929 if (sign) {
3930 if (*pbuf == '-' || *pbuf == '+') {
3931 sign = *pbuf++;
3932 len--;
3934 else if (flags & F_SIGN)
3935 sign = '+';
3936 else if (flags & F_BLANK)
3937 sign = ' ';
3938 else
3939 sign = 0;
3941 if (width < len)
3942 width = len;
3943 if (rescnt - (sign != 0) < width) {
3944 reslen -= rescnt;
3945 rescnt = width + fmtcnt + 100;
3946 reslen += rescnt;
3947 if (reslen < 0) {
3948 Py_DECREF(result);
3949 return PyErr_NoMemory();
3951 if (_PyString_Resize(&result, reslen) < 0)
3952 return NULL;
3953 res = PyString_AS_STRING(result)
3954 + reslen - rescnt;
3956 if (sign) {
3957 if (fill != ' ')
3958 *res++ = sign;
3959 rescnt--;
3960 if (width > len)
3961 width--;
3963 if ((flags & F_ALT) && (c == 'x' || c == 'X')) {
3964 assert(pbuf[0] == '0');
3965 assert(pbuf[1] == c);
3966 if (fill != ' ') {
3967 *res++ = *pbuf++;
3968 *res++ = *pbuf++;
3970 rescnt -= 2;
3971 width -= 2;
3972 if (width < 0)
3973 width = 0;
3974 len -= 2;
3976 if (width > len && !(flags & F_LJUST)) {
3977 do {
3978 --rescnt;
3979 *res++ = fill;
3980 } while (--width > len);
3982 if (fill == ' ') {
3983 if (sign)
3984 *res++ = sign;
3985 if ((flags & F_ALT) &&
3986 (c == 'x' || c == 'X')) {
3987 assert(pbuf[0] == '0');
3988 assert(pbuf[1] == c);
3989 *res++ = *pbuf++;
3990 *res++ = *pbuf++;
3993 memcpy(res, pbuf, len);
3994 res += len;
3995 rescnt -= len;
3996 while (--width >= len) {
3997 --rescnt;
3998 *res++ = ' ';
4000 if (dict && (argidx < arglen) && c != '%') {
4001 PyErr_SetString(PyExc_TypeError,
4002 "not all arguments converted during string formatting");
4003 goto error;
4005 Py_XDECREF(temp);
4006 } /* '%' */
4007 } /* until end */
4008 if (argidx < arglen && !dict) {
4009 PyErr_SetString(PyExc_TypeError,
4010 "not all arguments converted during string formatting");
4011 goto error;
4013 if (args_owned) {
4014 Py_DECREF(args);
4016 _PyString_Resize(&result, reslen - rescnt);
4017 return result;
4019 #ifdef Py_USING_UNICODE
4020 unicode:
4021 if (args_owned) {
4022 Py_DECREF(args);
4023 args_owned = 0;
4025 /* Fiddle args right (remove the first argidx arguments) */
4026 if (PyTuple_Check(orig_args) && argidx > 0) {
4027 PyObject *v;
4028 int n = PyTuple_GET_SIZE(orig_args) - argidx;
4029 v = PyTuple_New(n);
4030 if (v == NULL)
4031 goto error;
4032 while (--n >= 0) {
4033 PyObject *w = PyTuple_GET_ITEM(orig_args, n + argidx);
4034 Py_INCREF(w);
4035 PyTuple_SET_ITEM(v, n, w);
4037 args = v;
4038 } else {
4039 Py_INCREF(orig_args);
4040 args = orig_args;
4042 args_owned = 1;
4043 /* Take what we have of the result and let the Unicode formatting
4044 function format the rest of the input. */
4045 rescnt = res - PyString_AS_STRING(result);
4046 if (_PyString_Resize(&result, rescnt))
4047 goto error;
4048 fmtcnt = PyString_GET_SIZE(format) - \
4049 (fmt - PyString_AS_STRING(format));
4050 format = PyUnicode_Decode(fmt, fmtcnt, NULL, NULL);
4051 if (format == NULL)
4052 goto error;
4053 v = PyUnicode_Format(format, args);
4054 Py_DECREF(format);
4055 if (v == NULL)
4056 goto error;
4057 /* Paste what we have (result) to what the Unicode formatting
4058 function returned (v) and return the result (or error) */
4059 w = PyUnicode_Concat(result, v);
4060 Py_DECREF(result);
4061 Py_DECREF(v);
4062 Py_DECREF(args);
4063 return w;
4064 #endif /* Py_USING_UNICODE */
4066 error:
4067 Py_DECREF(result);
4068 if (args_owned) {
4069 Py_DECREF(args);
4071 return NULL;
4074 void
4075 PyString_InternInPlace(PyObject **p)
4077 register PyStringObject *s = (PyStringObject *)(*p);
4078 PyObject *t;
4079 if (s == NULL || !PyString_Check(s))
4080 Py_FatalError("PyString_InternInPlace: strings only please!");
4081 if (PyString_CHECK_INTERNED(s))
4082 return;
4083 if (interned == NULL) {
4084 interned = PyDict_New();
4085 if (interned == NULL) {
4086 PyErr_Clear(); /* Don't leave an exception */
4087 return;
4090 if ((t = PyDict_GetItem(interned, (PyObject *)s)) != NULL) {
4091 Py_INCREF(t);
4092 Py_DECREF(*p);
4093 *p = t;
4094 return;
4096 /* Ensure that only true string objects appear in the intern dict */
4097 if (!PyString_CheckExact(s)) {
4098 t = PyString_FromStringAndSize(PyString_AS_STRING(s),
4099 PyString_GET_SIZE(s));
4100 if (t == NULL) {
4101 PyErr_Clear();
4102 return;
4104 } else {
4105 t = (PyObject*) s;
4106 Py_INCREF(t);
4109 if (PyDict_SetItem(interned, t, t) == 0) {
4110 /* The two references in interned are not counted by
4111 refcnt. The string deallocator will take care of this */
4112 ((PyObject *)t)->ob_refcnt-=2;
4113 PyString_CHECK_INTERNED(t) = SSTATE_INTERNED_MORTAL;
4114 Py_DECREF(*p);
4115 *p = t;
4116 return;
4118 Py_DECREF(t);
4119 PyErr_Clear();
4122 void
4123 PyString_InternImmortal(PyObject **p)
4125 PyString_InternInPlace(p);
4126 if (PyString_CHECK_INTERNED(*p) != SSTATE_INTERNED_IMMORTAL) {
4127 PyString_CHECK_INTERNED(*p) = SSTATE_INTERNED_IMMORTAL;
4128 Py_INCREF(*p);
4133 PyObject *
4134 PyString_InternFromString(const char *cp)
4136 PyObject *s = PyString_FromString(cp);
4137 if (s == NULL)
4138 return NULL;
4139 PyString_InternInPlace(&s);
4140 return s;
4143 void
4144 PyString_Fini(void)
4146 int i;
4147 for (i = 0; i < UCHAR_MAX + 1; i++) {
4148 Py_XDECREF(characters[i]);
4149 characters[i] = NULL;
4151 Py_XDECREF(nullstring);
4152 nullstring = NULL;
4155 void _Py_ReleaseInternedStrings(void)
4157 PyObject *keys;
4158 PyStringObject *s;
4159 int i, n;
4161 if (interned == NULL || !PyDict_Check(interned))
4162 return;
4163 keys = PyDict_Keys(interned);
4164 if (keys == NULL || !PyList_Check(keys)) {
4165 PyErr_Clear();
4166 return;
4169 /* Since _Py_ReleaseInternedStrings() is intended to help a leak
4170 detector, interned strings are not forcibly deallocated; rather, we
4171 give them their stolen references back, and then clear and DECREF
4172 the interned dict. */
4174 fprintf(stderr, "releasing interned strings\n");
4175 n = PyList_GET_SIZE(keys);
4176 for (i = 0; i < n; i++) {
4177 s = (PyStringObject *) PyList_GET_ITEM(keys, i);
4178 switch (s->ob_sstate) {
4179 case SSTATE_NOT_INTERNED:
4180 /* XXX Shouldn't happen */
4181 break;
4182 case SSTATE_INTERNED_IMMORTAL:
4183 s->ob_refcnt += 1;
4184 break;
4185 case SSTATE_INTERNED_MORTAL:
4186 s->ob_refcnt += 2;
4187 break;
4188 default:
4189 Py_FatalError("Inconsistent interned string state.");
4191 s->ob_sstate = SSTATE_NOT_INTERNED;
4193 Py_DECREF(keys);
4194 PyDict_Clear(interned);
4195 Py_DECREF(interned);
4196 interned = NULL;