1 /* String (str/bytes) object implementation */
3 #define PY_SSIZE_T_CLEAN
10 Py_ssize_t null_strings
, one_strings
;
13 static PyStringObject
*characters
[UCHAR_MAX
+ 1];
14 static PyStringObject
*nullstring
;
16 /* This dictionary holds all interned strings. Note that references to
17 strings in this dictionary are *not* counted in the string's ob_refcnt.
18 When the interned string reaches a refcnt of 0 the string deallocation
19 function will delete the reference from this dictionary.
21 Another way to look at this is that to say that the actual reference
22 count of a string is: s->ob_refcnt + (s->ob_sstate?2:0)
24 static PyObject
*interned
;
26 /* PyStringObject_SIZE gives the basic size of a string; any memory allocation
27 for a string of length n should request PyStringObject_SIZE + n bytes.
29 Using PyStringObject_SIZE instead of sizeof(PyStringObject) saves
30 3 bytes per string allocation on a typical system.
32 #define PyStringObject_SIZE (offsetof(PyStringObject, ob_sval) + 1)
35 For both PyString_FromString() and PyString_FromStringAndSize(), the
36 parameter `size' denotes number of characters to allocate, not counting any
37 null terminating character.
39 For PyString_FromString(), the parameter `str' points to a null-terminated
40 string containing exactly `size' bytes.
42 For PyString_FromStringAndSize(), the parameter the parameter `str' is
43 either NULL or else points to a string containing at least `size' bytes.
44 For PyString_FromStringAndSize(), the string in the `str' parameter does
45 not have to be null-terminated. (Therefore it is safe to construct a
46 substring by calling `PyString_FromStringAndSize(origstring, substrlen)'.)
47 If `str' is NULL then PyString_FromStringAndSize() will allocate `size+1'
48 bytes (setting the last byte to the null terminating character) and you can
49 fill in the data yourself. If `str' is non-NULL then the resulting
50 PyString object must be treated as immutable and you must not fill in nor
51 alter the data yourself, since the strings may be shared.
53 The PyObject member `op->ob_size', which denotes the number of "extra
54 items" in a variable-size object, will contain the number of bytes
55 allocated for string data, not counting the null terminating character. It
56 is therefore equal to the equal to the `size' parameter (for
57 PyString_FromStringAndSize()) or the length of the string in the `str'
58 parameter (for PyString_FromString()).
61 PyString_FromStringAndSize(const char *str
, Py_ssize_t size
)
63 register PyStringObject
*op
;
65 PyErr_SetString(PyExc_SystemError
,
66 "Negative size passed to PyString_FromStringAndSize");
69 if (size
== 0 && (op
= nullstring
) != NULL
) {
74 return (PyObject
*)op
;
76 if (size
== 1 && str
!= NULL
&&
77 (op
= characters
[*str
& UCHAR_MAX
]) != NULL
)
83 return (PyObject
*)op
;
86 if (size
> PY_SSIZE_T_MAX
- PyStringObject_SIZE
) {
87 PyErr_SetString(PyExc_OverflowError
, "string is too large");
91 /* Inline PyObject_NewVar */
92 op
= (PyStringObject
*)PyObject_MALLOC(PyStringObject_SIZE
+ size
);
94 return PyErr_NoMemory();
95 PyObject_INIT_VAR(op
, &PyString_Type
, size
);
97 op
->ob_sstate
= SSTATE_NOT_INTERNED
;
99 Py_MEMCPY(op
->ob_sval
, str
, size
);
100 op
->ob_sval
[size
] = '\0';
101 /* share short strings */
103 PyObject
*t
= (PyObject
*)op
;
104 PyString_InternInPlace(&t
);
105 op
= (PyStringObject
*)t
;
108 } else if (size
== 1 && str
!= NULL
) {
109 PyObject
*t
= (PyObject
*)op
;
110 PyString_InternInPlace(&t
);
111 op
= (PyStringObject
*)t
;
112 characters
[*str
& UCHAR_MAX
] = op
;
115 return (PyObject
*) op
;
119 PyString_FromString(const char *str
)
121 register size_t size
;
122 register PyStringObject
*op
;
126 if (size
> PY_SSIZE_T_MAX
- PyStringObject_SIZE
) {
127 PyErr_SetString(PyExc_OverflowError
,
128 "string is too long for a Python string");
131 if (size
== 0 && (op
= nullstring
) != NULL
) {
136 return (PyObject
*)op
;
138 if (size
== 1 && (op
= characters
[*str
& UCHAR_MAX
]) != NULL
) {
143 return (PyObject
*)op
;
146 /* Inline PyObject_NewVar */
147 op
= (PyStringObject
*)PyObject_MALLOC(PyStringObject_SIZE
+ size
);
149 return PyErr_NoMemory();
150 PyObject_INIT_VAR(op
, &PyString_Type
, size
);
152 op
->ob_sstate
= SSTATE_NOT_INTERNED
;
153 Py_MEMCPY(op
->ob_sval
, str
, size
+1);
154 /* share short strings */
156 PyObject
*t
= (PyObject
*)op
;
157 PyString_InternInPlace(&t
);
158 op
= (PyStringObject
*)t
;
161 } else if (size
== 1) {
162 PyObject
*t
= (PyObject
*)op
;
163 PyString_InternInPlace(&t
);
164 op
= (PyStringObject
*)t
;
165 characters
[*str
& UCHAR_MAX
] = op
;
168 return (PyObject
*) op
;
172 PyString_FromFormatV(const char *format
, va_list vargs
)
180 #ifdef VA_LIST_IS_ARRAY
181 Py_MEMCPY(count
, vargs
, sizeof(va_list));
184 __va_copy(count
, vargs
);
189 /* step 1: figure out how large a buffer we need */
190 for (f
= format
; *f
; f
++) {
192 #ifdef HAVE_LONG_LONG
193 int longlongflag
= 0;
196 while (*++f
&& *f
!= '%' && !isalpha(Py_CHARMASK(*f
)))
199 /* skip the 'l' or 'z' in {%ld, %zd, %lu, %zu} since
200 * they don't affect the amount of space we reserve.
203 if (f
[1] == 'd' || f
[1] == 'u') {
206 #ifdef HAVE_LONG_LONG
207 else if (f
[1] == 'l' &&
208 (f
[2] == 'd' || f
[2] == 'u')) {
214 else if (*f
== 'z' && (f
[1] == 'd' || f
[1] == 'u')) {
220 (void)va_arg(count
, int);
221 /* fall through... */
225 case 'd': case 'u': case 'i': case 'x':
226 (void) va_arg(count
, int);
227 #ifdef HAVE_LONG_LONG
229 ceil(log10(256)*SIZEOF_LONG_LONG) digits,
230 plus 1 for the sign. 53/22 is an upper
231 bound for log10(256). */
233 n
+= 2 + (SIZEOF_LONG_LONG
*53-1) / 22;
236 /* 20 bytes is enough to hold a 64-bit
237 integer. Decimal takes the most
238 space. This isn't enough for
244 s
= va_arg(count
, char*);
248 (void) va_arg(count
, int);
249 /* maximum 64-bit pointer representation:
251 * so 19 characters is enough.
252 * XXX I count 18 -- what's the extra for?
257 /* if we stumble upon an unknown
258 formatting code, copy the rest of
259 the format string to the output
260 string. (we cannot just skip the
261 code, since there's no way to know
262 what's in the argument list) */
270 /* step 2: fill the buffer */
271 /* Since we've analyzed how much space we need for the worst case,
272 use sprintf directly instead of the slower PyOS_snprintf. */
273 string
= PyString_FromStringAndSize(NULL
, n
);
277 s
= PyString_AsString(string
);
279 for (f
= format
; *f
; f
++) {
284 #ifdef HAVE_LONG_LONG
285 int longlongflag
= 0;
288 /* parse the width.precision part (we're only
289 interested in the precision value, if any) */
291 while (isdigit(Py_CHARMASK(*f
)))
292 n
= (n
*10) + *f
++ - '0';
296 while (isdigit(Py_CHARMASK(*f
)))
297 n
= (n
*10) + *f
++ - '0';
299 while (*f
&& *f
!= '%' && !isalpha(Py_CHARMASK(*f
)))
301 /* Handle %ld, %lu, %lld and %llu. */
303 if (f
[1] == 'd' || f
[1] == 'u') {
307 #ifdef HAVE_LONG_LONG
308 else if (f
[1] == 'l' &&
309 (f
[2] == 'd' || f
[2] == 'u')) {
315 /* handle the size_t flag. */
316 else if (*f
== 'z' && (f
[1] == 'd' || f
[1] == 'u')) {
323 *s
++ = va_arg(vargs
, int);
327 sprintf(s
, "%ld", va_arg(vargs
, long));
328 #ifdef HAVE_LONG_LONG
329 else if (longlongflag
)
330 sprintf(s
, "%" PY_FORMAT_LONG_LONG
"d",
331 va_arg(vargs
, PY_LONG_LONG
));
334 sprintf(s
, "%" PY_FORMAT_SIZE_T
"d",
335 va_arg(vargs
, Py_ssize_t
));
337 sprintf(s
, "%d", va_arg(vargs
, int));
343 va_arg(vargs
, unsigned long));
344 #ifdef HAVE_LONG_LONG
345 else if (longlongflag
)
346 sprintf(s
, "%" PY_FORMAT_LONG_LONG
"u",
347 va_arg(vargs
, PY_LONG_LONG
));
350 sprintf(s
, "%" PY_FORMAT_SIZE_T
"u",
351 va_arg(vargs
, size_t));
354 va_arg(vargs
, unsigned int));
358 sprintf(s
, "%i", va_arg(vargs
, int));
362 sprintf(s
, "%x", va_arg(vargs
, int));
366 p
= va_arg(vargs
, char*);
374 sprintf(s
, "%p", va_arg(vargs
, void*));
375 /* %p is ill-defined: ensure leading 0x. */
378 else if (s
[1] != 'x') {
379 memmove(s
+2, s
, strlen(s
)+1);
398 if (_PyString_Resize(&string
, s
- PyString_AS_STRING(string
)))
404 PyString_FromFormat(const char *format
, ...)
409 #ifdef HAVE_STDARG_PROTOTYPES
410 va_start(vargs
, format
);
414 ret
= PyString_FromFormatV(format
, vargs
);
420 PyObject
*PyString_Decode(const char *s
,
422 const char *encoding
,
427 str
= PyString_FromStringAndSize(s
, size
);
430 v
= PyString_AsDecodedString(str
, encoding
, errors
);
435 PyObject
*PyString_AsDecodedObject(PyObject
*str
,
436 const char *encoding
,
441 if (!PyString_Check(str
)) {
446 if (encoding
== NULL
) {
447 #ifdef Py_USING_UNICODE
448 encoding
= PyUnicode_GetDefaultEncoding();
450 PyErr_SetString(PyExc_ValueError
, "no encoding specified");
455 /* Decode via the codec registry */
456 v
= PyCodec_Decode(str
, encoding
, errors
);
466 PyObject
*PyString_AsDecodedString(PyObject
*str
,
467 const char *encoding
,
472 v
= PyString_AsDecodedObject(str
, encoding
, errors
);
476 #ifdef Py_USING_UNICODE
477 /* Convert Unicode to a string using the default encoding */
478 if (PyUnicode_Check(v
)) {
480 v
= PyUnicode_AsEncodedString(v
, NULL
, NULL
);
486 if (!PyString_Check(v
)) {
487 PyErr_Format(PyExc_TypeError
,
488 "decoder did not return a string object (type=%.400s)",
489 Py_TYPE(v
)->tp_name
);
500 PyObject
*PyString_Encode(const char *s
,
502 const char *encoding
,
507 str
= PyString_FromStringAndSize(s
, size
);
510 v
= PyString_AsEncodedString(str
, encoding
, errors
);
515 PyObject
*PyString_AsEncodedObject(PyObject
*str
,
516 const char *encoding
,
521 if (!PyString_Check(str
)) {
526 if (encoding
== NULL
) {
527 #ifdef Py_USING_UNICODE
528 encoding
= PyUnicode_GetDefaultEncoding();
530 PyErr_SetString(PyExc_ValueError
, "no encoding specified");
535 /* Encode via the codec registry */
536 v
= PyCodec_Encode(str
, encoding
, errors
);
546 PyObject
*PyString_AsEncodedString(PyObject
*str
,
547 const char *encoding
,
552 v
= PyString_AsEncodedObject(str
, encoding
, errors
);
556 #ifdef Py_USING_UNICODE
557 /* Convert Unicode to a string using the default encoding */
558 if (PyUnicode_Check(v
)) {
560 v
= PyUnicode_AsEncodedString(v
, NULL
, NULL
);
566 if (!PyString_Check(v
)) {
567 PyErr_Format(PyExc_TypeError
,
568 "encoder did not return a string object (type=%.400s)",
569 Py_TYPE(v
)->tp_name
);
581 string_dealloc(PyObject
*op
)
583 switch (PyString_CHECK_INTERNED(op
)) {
584 case SSTATE_NOT_INTERNED
:
587 case SSTATE_INTERNED_MORTAL
:
588 /* revive dead object temporarily for DelItem */
590 if (PyDict_DelItem(interned
, op
) != 0)
592 "deletion of interned string failed");
595 case SSTATE_INTERNED_IMMORTAL
:
596 Py_FatalError("Immortal interned string died.");
599 Py_FatalError("Inconsistent interned string state.");
601 Py_TYPE(op
)->tp_free(op
);
604 /* Unescape a backslash-escaped string. If unicode is non-zero,
605 the string is a u-literal. If recode_encoding is non-zero,
606 the string is UTF-8 encoded and should be re-encoded in the
607 specified encoding. */
609 PyObject
*PyString_DecodeEscape(const char *s
,
613 const char *recode_encoding
)
619 Py_ssize_t newlen
= recode_encoding
? 4*len
:len
;
620 v
= PyString_FromStringAndSize((char *)NULL
, newlen
);
623 p
= buf
= PyString_AsString(v
);
628 #ifdef Py_USING_UNICODE
629 if (recode_encoding
&& (*s
& 0x80)) {
635 /* Decode non-ASCII bytes as UTF-8. */
636 while (t
< end
&& (*t
& 0x80)) t
++;
637 u
= PyUnicode_DecodeUTF8(s
, t
- s
, errors
);
640 /* Recode them in target encoding. */
641 w
= PyUnicode_AsEncodedString(
642 u
, recode_encoding
, errors
);
646 /* Append bytes to output buffer. */
647 assert(PyString_Check(w
));
648 r
= PyString_AS_STRING(w
);
649 rn
= PyString_GET_SIZE(w
);
664 PyErr_SetString(PyExc_ValueError
,
665 "Trailing \\ in string");
669 /* XXX This assumes ASCII! */
671 case '\\': *p
++ = '\\'; break;
672 case '\'': *p
++ = '\''; break;
673 case '\"': *p
++ = '\"'; break;
674 case 'b': *p
++ = '\b'; break;
675 case 'f': *p
++ = '\014'; break; /* FF */
676 case 't': *p
++ = '\t'; break;
677 case 'n': *p
++ = '\n'; break;
678 case 'r': *p
++ = '\r'; break;
679 case 'v': *p
++ = '\013'; break; /* VT */
680 case 'a': *p
++ = '\007'; break; /* BEL, not classic C */
681 case '0': case '1': case '2': case '3':
682 case '4': case '5': case '6': case '7':
684 if (s
< end
&& '0' <= *s
&& *s
<= '7') {
685 c
= (c
<<3) + *s
++ - '0';
686 if (s
< end
&& '0' <= *s
&& *s
<= '7')
687 c
= (c
<<3) + *s
++ - '0';
693 isxdigit(Py_CHARMASK(s
[0])) &&
694 isxdigit(Py_CHARMASK(s
[1])))
717 if (!errors
|| strcmp(errors
, "strict") == 0) {
718 PyErr_SetString(PyExc_ValueError
,
719 "invalid \\x escape");
722 if (strcmp(errors
, "replace") == 0) {
724 } else if (strcmp(errors
, "ignore") == 0)
727 PyErr_Format(PyExc_ValueError
,
729 "unknown error handling code: %.400s",
733 #ifndef Py_USING_UNICODE
738 PyErr_SetString(PyExc_ValueError
,
739 "Unicode escapes not legal "
740 "when Unicode disabled");
747 goto non_esc
; /* an arbitry number of unescaped
748 UTF-8 bytes may follow. */
751 if (p
-buf
< newlen
&& _PyString_Resize(&v
, p
- buf
))
759 /* -------------------------------------------------------------------- */
763 string_getsize(register PyObject
*op
)
767 if (PyString_AsStringAndSize(op
, &s
, &len
))
772 static /*const*/ char *
773 string_getbuffer(register PyObject
*op
)
777 if (PyString_AsStringAndSize(op
, &s
, &len
))
783 PyString_Size(register PyObject
*op
)
785 if (!PyString_Check(op
))
786 return string_getsize(op
);
791 PyString_AsString(register PyObject
*op
)
793 if (!PyString_Check(op
))
794 return string_getbuffer(op
);
795 return ((PyStringObject
*)op
) -> ob_sval
;
799 PyString_AsStringAndSize(register PyObject
*obj
,
801 register Py_ssize_t
*len
)
804 PyErr_BadInternalCall();
808 if (!PyString_Check(obj
)) {
809 #ifdef Py_USING_UNICODE
810 if (PyUnicode_Check(obj
)) {
811 obj
= _PyUnicode_AsDefaultEncodedString(obj
, NULL
);
818 PyErr_Format(PyExc_TypeError
,
819 "expected string or Unicode object, "
820 "%.200s found", Py_TYPE(obj
)->tp_name
);
825 *s
= PyString_AS_STRING(obj
);
827 *len
= PyString_GET_SIZE(obj
);
828 else if (strlen(*s
) != (size_t)PyString_GET_SIZE(obj
)) {
829 PyErr_SetString(PyExc_TypeError
,
830 "expected string without null bytes");
836 /* -------------------------------------------------------------------- */
839 #include "stringlib/stringdefs.h"
840 #include "stringlib/fastsearch.h"
842 #include "stringlib/count.h"
843 #include "stringlib/find.h"
844 #include "stringlib/partition.h"
845 #include "stringlib/split.h"
847 #define _Py_InsertThousandsGrouping _PyString_InsertThousandsGrouping
848 #include "stringlib/localeutil.h"
853 string_print(PyStringObject
*op
, FILE *fp
, int flags
)
855 Py_ssize_t i
, str_len
;
859 /* XXX Ought to check for interrupts when writing long strings */
860 if (! PyString_CheckExact(op
)) {
862 /* A str subclass may have its own __str__ method. */
863 op
= (PyStringObject
*) PyObject_Str((PyObject
*)op
);
866 ret
= string_print(op
, fp
, flags
);
870 if (flags
& Py_PRINT_RAW
) {
871 char *data
= op
->ob_sval
;
872 Py_ssize_t size
= Py_SIZE(op
);
873 Py_BEGIN_ALLOW_THREADS
874 while (size
> INT_MAX
) {
875 /* Very long strings cannot be written atomically.
876 * But don't write exactly INT_MAX bytes at a time
877 * to avoid memory aligment issues.
879 const int chunk_size
= INT_MAX
& ~0x3FFF;
880 fwrite(data
, 1, chunk_size
, fp
);
885 if (size
) fwrite(data
, (int)size
, 1, fp
);
887 fwrite(data
, 1, (int)size
, fp
);
893 /* figure out which quote to use; single is preferred */
895 if (memchr(op
->ob_sval
, '\'', Py_SIZE(op
)) &&
896 !memchr(op
->ob_sval
, '"', Py_SIZE(op
)))
899 str_len
= Py_SIZE(op
);
900 Py_BEGIN_ALLOW_THREADS
902 for (i
= 0; i
< str_len
; i
++) {
903 /* Since strings are immutable and the caller should have a
904 reference, accessing the interal buffer should not be an issue
905 with the GIL released. */
907 if (c
== quote
|| c
== '\\')
908 fprintf(fp
, "\\%c", c
);
915 else if (c
< ' ' || c
>= 0x7f)
916 fprintf(fp
, "\\x%02x", c
& 0xff);
926 PyString_Repr(PyObject
*obj
, int smartquotes
)
928 register PyStringObject
* op
= (PyStringObject
*) obj
;
929 size_t newsize
= 2 + 4 * Py_SIZE(op
);
931 if (newsize
> PY_SSIZE_T_MAX
|| newsize
/ 4 != Py_SIZE(op
)) {
932 PyErr_SetString(PyExc_OverflowError
,
933 "string is too large to make repr");
936 v
= PyString_FromStringAndSize((char *)NULL
, newsize
);
941 register Py_ssize_t i
;
946 /* figure out which quote to use; single is preferred */
949 memchr(op
->ob_sval
, '\'', Py_SIZE(op
)) &&
950 !memchr(op
->ob_sval
, '"', Py_SIZE(op
)))
953 p
= PyString_AS_STRING(v
);
955 for (i
= 0; i
< Py_SIZE(op
); i
++) {
956 /* There's at least enough room for a hex escape
957 and a closing quote. */
958 assert(newsize
- (p
- PyString_AS_STRING(v
)) >= 5);
960 if (c
== quote
|| c
== '\\')
961 *p
++ = '\\', *p
++ = c
;
963 *p
++ = '\\', *p
++ = 't';
965 *p
++ = '\\', *p
++ = 'n';
967 *p
++ = '\\', *p
++ = 'r';
968 else if (c
< ' ' || c
>= 0x7f) {
969 /* For performance, we don't want to call
970 PyOS_snprintf here (extra layers of
972 sprintf(p
, "\\x%02x", c
& 0xff);
978 assert(newsize
- (p
- PyString_AS_STRING(v
)) >= 1);
981 if (_PyString_Resize(&v
, (p
- PyString_AS_STRING(v
))))
988 string_repr(PyObject
*op
)
990 return PyString_Repr(op
, 1);
994 string_str(PyObject
*s
)
996 assert(PyString_Check(s
));
997 if (PyString_CheckExact(s
)) {
1002 /* Subtype -- return genuine string with the same value. */
1003 PyStringObject
*t
= (PyStringObject
*) s
;
1004 return PyString_FromStringAndSize(t
->ob_sval
, Py_SIZE(t
));
1009 string_length(PyStringObject
*a
)
1015 string_concat(register PyStringObject
*a
, register PyObject
*bb
)
1017 register Py_ssize_t size
;
1018 register PyStringObject
*op
;
1019 if (!PyString_Check(bb
)) {
1020 #ifdef Py_USING_UNICODE
1021 if (PyUnicode_Check(bb
))
1022 return PyUnicode_Concat((PyObject
*)a
, bb
);
1024 if (PyByteArray_Check(bb
))
1025 return PyByteArray_Concat((PyObject
*)a
, bb
);
1026 PyErr_Format(PyExc_TypeError
,
1027 "cannot concatenate 'str' and '%.200s' objects",
1028 Py_TYPE(bb
)->tp_name
);
1031 #define b ((PyStringObject *)bb)
1032 /* Optimize cases with empty left or right operand */
1033 if ((Py_SIZE(a
) == 0 || Py_SIZE(b
) == 0) &&
1034 PyString_CheckExact(a
) && PyString_CheckExact(b
)) {
1035 if (Py_SIZE(a
) == 0) {
1040 return (PyObject
*)a
;
1042 size
= Py_SIZE(a
) + Py_SIZE(b
);
1043 /* Check that string sizes are not negative, to prevent an
1044 overflow in cases where we are passed incorrectly-created
1045 strings with negative lengths (due to a bug in other code).
1047 if (Py_SIZE(a
) < 0 || Py_SIZE(b
) < 0 ||
1048 Py_SIZE(a
) > PY_SSIZE_T_MAX
- Py_SIZE(b
)) {
1049 PyErr_SetString(PyExc_OverflowError
,
1050 "strings are too large to concat");
1054 /* Inline PyObject_NewVar */
1055 if (size
> PY_SSIZE_T_MAX
- PyStringObject_SIZE
) {
1056 PyErr_SetString(PyExc_OverflowError
,
1057 "strings are too large to concat");
1060 op
= (PyStringObject
*)PyObject_MALLOC(PyStringObject_SIZE
+ size
);
1062 return PyErr_NoMemory();
1063 PyObject_INIT_VAR(op
, &PyString_Type
, size
);
1065 op
->ob_sstate
= SSTATE_NOT_INTERNED
;
1066 Py_MEMCPY(op
->ob_sval
, a
->ob_sval
, Py_SIZE(a
));
1067 Py_MEMCPY(op
->ob_sval
+ Py_SIZE(a
), b
->ob_sval
, Py_SIZE(b
));
1068 op
->ob_sval
[size
] = '\0';
1069 return (PyObject
*) op
;
1074 string_repeat(register PyStringObject
*a
, register Py_ssize_t n
)
1076 register Py_ssize_t i
;
1077 register Py_ssize_t j
;
1078 register Py_ssize_t size
;
1079 register PyStringObject
*op
;
1083 /* watch out for overflows: the size can overflow int,
1084 * and the # of bytes needed can overflow size_t
1086 size
= Py_SIZE(a
) * n
;
1087 if (n
&& size
/ n
!= Py_SIZE(a
)) {
1088 PyErr_SetString(PyExc_OverflowError
,
1089 "repeated string is too long");
1092 if (size
== Py_SIZE(a
) && PyString_CheckExact(a
)) {
1094 return (PyObject
*)a
;
1096 nbytes
= (size_t)size
;
1097 if (nbytes
+ PyStringObject_SIZE
<= nbytes
) {
1098 PyErr_SetString(PyExc_OverflowError
,
1099 "repeated string is too long");
1102 op
= (PyStringObject
*)PyObject_MALLOC(PyStringObject_SIZE
+ nbytes
);
1104 return PyErr_NoMemory();
1105 PyObject_INIT_VAR(op
, &PyString_Type
, size
);
1107 op
->ob_sstate
= SSTATE_NOT_INTERNED
;
1108 op
->ob_sval
[size
] = '\0';
1109 if (Py_SIZE(a
) == 1 && n
> 0) {
1110 memset(op
->ob_sval
, a
->ob_sval
[0] , n
);
1111 return (PyObject
*) op
;
1115 Py_MEMCPY(op
->ob_sval
, a
->ob_sval
, Py_SIZE(a
));
1119 j
= (i
<= size
-i
) ? i
: size
-i
;
1120 Py_MEMCPY(op
->ob_sval
+i
, op
->ob_sval
, j
);
1123 return (PyObject
*) op
;
1126 /* String slice a[i:j] consists of characters a[i] ... a[j-1] */
1129 string_slice(register PyStringObject
*a
, register Py_ssize_t i
,
1130 register Py_ssize_t j
)
1131 /* j -- may be negative! */
1136 j
= 0; /* Avoid signed/unsigned bug in next line */
1139 if (i
== 0 && j
== Py_SIZE(a
) && PyString_CheckExact(a
)) {
1140 /* It's the same as a */
1142 return (PyObject
*)a
;
1146 return PyString_FromStringAndSize(a
->ob_sval
+ i
, j
-i
);
1150 string_contains(PyObject
*str_obj
, PyObject
*sub_obj
)
1152 if (!PyString_CheckExact(sub_obj
)) {
1153 #ifdef Py_USING_UNICODE
1154 if (PyUnicode_Check(sub_obj
))
1155 return PyUnicode_Contains(str_obj
, sub_obj
);
1157 if (!PyString_Check(sub_obj
)) {
1158 PyErr_Format(PyExc_TypeError
,
1159 "'in <string>' requires string as left operand, "
1160 "not %.200s", Py_TYPE(sub_obj
)->tp_name
);
1165 return stringlib_contains_obj(str_obj
, sub_obj
);
1169 string_item(PyStringObject
*a
, register Py_ssize_t i
)
1173 if (i
< 0 || i
>= Py_SIZE(a
)) {
1174 PyErr_SetString(PyExc_IndexError
, "string index out of range");
1177 pchar
= a
->ob_sval
[i
];
1178 v
= (PyObject
*)characters
[pchar
& UCHAR_MAX
];
1180 v
= PyString_FromStringAndSize(&pchar
, 1);
1191 string_richcompare(PyStringObject
*a
, PyStringObject
*b
, int op
)
1194 Py_ssize_t len_a
, len_b
;
1198 /* Make sure both arguments are strings. */
1199 if (!(PyString_Check(a
) && PyString_Check(b
))) {
1200 result
= Py_NotImplemented
;
1205 case Py_EQ
:case Py_LE
:case Py_GE
:
1208 case Py_NE
:case Py_LT
:case Py_GT
:
1214 /* Supporting Py_NE here as well does not save
1215 much time, since Py_NE is rarely used. */
1216 if (Py_SIZE(a
) == Py_SIZE(b
)
1217 && (a
->ob_sval
[0] == b
->ob_sval
[0]
1218 && memcmp(a
->ob_sval
, b
->ob_sval
, Py_SIZE(a
)) == 0)) {
1225 len_a
= Py_SIZE(a
); len_b
= Py_SIZE(b
);
1226 min_len
= (len_a
< len_b
) ? len_a
: len_b
;
1228 c
= Py_CHARMASK(*a
->ob_sval
) - Py_CHARMASK(*b
->ob_sval
);
1230 c
= memcmp(a
->ob_sval
, b
->ob_sval
, min_len
);
1234 c
= (len_a
< len_b
) ? -1 : (len_a
> len_b
) ? 1 : 0;
1236 case Py_LT
: c
= c
< 0; break;
1237 case Py_LE
: c
= c
<= 0; break;
1238 case Py_EQ
: assert(0); break; /* unreachable */
1239 case Py_NE
: c
= c
!= 0; break;
1240 case Py_GT
: c
= c
> 0; break;
1241 case Py_GE
: c
= c
>= 0; break;
1243 result
= Py_NotImplemented
;
1246 result
= c
? Py_True
: Py_False
;
1253 _PyString_Eq(PyObject
*o1
, PyObject
*o2
)
1255 PyStringObject
*a
= (PyStringObject
*) o1
;
1256 PyStringObject
*b
= (PyStringObject
*) o2
;
1257 return Py_SIZE(a
) == Py_SIZE(b
)
1258 && *a
->ob_sval
== *b
->ob_sval
1259 && memcmp(a
->ob_sval
, b
->ob_sval
, Py_SIZE(a
)) == 0;
1263 string_hash(PyStringObject
*a
)
1265 register Py_ssize_t len
;
1266 register unsigned char *p
;
1269 if (a
->ob_shash
!= -1)
1272 p
= (unsigned char *) a
->ob_sval
;
1275 x
= (1000003*x
) ^ *p
++;
1284 string_subscript(PyStringObject
* self
, PyObject
* item
)
1286 if (PyIndex_Check(item
)) {
1287 Py_ssize_t i
= PyNumber_AsSsize_t(item
, PyExc_IndexError
);
1288 if (i
== -1 && PyErr_Occurred())
1291 i
+= PyString_GET_SIZE(self
);
1292 return string_item(self
, i
);
1294 else if (PySlice_Check(item
)) {
1295 Py_ssize_t start
, stop
, step
, slicelength
, cur
, i
;
1300 if (PySlice_GetIndicesEx((PySliceObject
*)item
,
1301 PyString_GET_SIZE(self
),
1302 &start
, &stop
, &step
, &slicelength
) < 0) {
1306 if (slicelength
<= 0) {
1307 return PyString_FromStringAndSize("", 0);
1309 else if (start
== 0 && step
== 1 &&
1310 slicelength
== PyString_GET_SIZE(self
) &&
1311 PyString_CheckExact(self
)) {
1313 return (PyObject
*)self
;
1315 else if (step
== 1) {
1316 return PyString_FromStringAndSize(
1317 PyString_AS_STRING(self
) + start
,
1321 source_buf
= PyString_AsString((PyObject
*)self
);
1322 result_buf
= (char *)PyMem_Malloc(slicelength
);
1323 if (result_buf
== NULL
)
1324 return PyErr_NoMemory();
1326 for (cur
= start
, i
= 0; i
< slicelength
;
1328 result_buf
[i
] = source_buf
[cur
];
1331 result
= PyString_FromStringAndSize(result_buf
,
1333 PyMem_Free(result_buf
);
1338 PyErr_Format(PyExc_TypeError
,
1339 "string indices must be integers, not %.200s",
1340 Py_TYPE(item
)->tp_name
);
1346 string_buffer_getreadbuf(PyStringObject
*self
, Py_ssize_t index
, const void **ptr
)
1349 PyErr_SetString(PyExc_SystemError
,
1350 "accessing non-existent string segment");
1353 *ptr
= (void *)self
->ob_sval
;
1354 return Py_SIZE(self
);
1358 string_buffer_getwritebuf(PyStringObject
*self
, Py_ssize_t index
, const void **ptr
)
1360 PyErr_SetString(PyExc_TypeError
,
1361 "Cannot use string as modifiable buffer");
1366 string_buffer_getsegcount(PyStringObject
*self
, Py_ssize_t
*lenp
)
1369 *lenp
= Py_SIZE(self
);
1374 string_buffer_getcharbuf(PyStringObject
*self
, Py_ssize_t index
, const char **ptr
)
1377 PyErr_SetString(PyExc_SystemError
,
1378 "accessing non-existent string segment");
1381 *ptr
= self
->ob_sval
;
1382 return Py_SIZE(self
);
1386 string_buffer_getbuffer(PyStringObject
*self
, Py_buffer
*view
, int flags
)
1388 return PyBuffer_FillInfo(view
, (PyObject
*)self
,
1389 (void *)self
->ob_sval
, Py_SIZE(self
),
1393 static PySequenceMethods string_as_sequence
= {
1394 (lenfunc
)string_length
, /*sq_length*/
1395 (binaryfunc
)string_concat
, /*sq_concat*/
1396 (ssizeargfunc
)string_repeat
, /*sq_repeat*/
1397 (ssizeargfunc
)string_item
, /*sq_item*/
1398 (ssizessizeargfunc
)string_slice
, /*sq_slice*/
1401 (objobjproc
)string_contains
/*sq_contains*/
1404 static PyMappingMethods string_as_mapping
= {
1405 (lenfunc
)string_length
,
1406 (binaryfunc
)string_subscript
,
1410 static PyBufferProcs string_as_buffer
= {
1411 (readbufferproc
)string_buffer_getreadbuf
,
1412 (writebufferproc
)string_buffer_getwritebuf
,
1413 (segcountproc
)string_buffer_getsegcount
,
1414 (charbufferproc
)string_buffer_getcharbuf
,
1415 (getbufferproc
)string_buffer_getbuffer
,
1422 #define RIGHTSTRIP 1
1425 /* Arrays indexed by above */
1426 static const char *stripformat
[] = {"|O:lstrip", "|O:rstrip", "|O:strip"};
1428 #define STRIPNAME(i) (stripformat[i]+3)
1430 PyDoc_STRVAR(split__doc__
,
1431 "S.split([sep [,maxsplit]]) -> list of strings\n\
1433 Return a list of the words in the string S, using sep as the\n\
1434 delimiter string. If maxsplit is given, at most maxsplit\n\
1435 splits are done. If sep is not specified or is None, any\n\
1436 whitespace string is a separator and empty strings are removed\n\
1440 string_split(PyStringObject
*self
, PyObject
*args
)
1442 Py_ssize_t len
= PyString_GET_SIZE(self
), n
;
1443 Py_ssize_t maxsplit
= -1;
1444 const char *s
= PyString_AS_STRING(self
), *sub
;
1445 PyObject
*subobj
= Py_None
;
1447 if (!PyArg_ParseTuple(args
, "|On:split", &subobj
, &maxsplit
))
1450 maxsplit
= PY_SSIZE_T_MAX
;
1451 if (subobj
== Py_None
)
1452 return stringlib_split_whitespace((PyObject
*) self
, s
, len
, maxsplit
);
1453 if (PyString_Check(subobj
)) {
1454 sub
= PyString_AS_STRING(subobj
);
1455 n
= PyString_GET_SIZE(subobj
);
1457 #ifdef Py_USING_UNICODE
1458 else if (PyUnicode_Check(subobj
))
1459 return PyUnicode_Split((PyObject
*)self
, subobj
, maxsplit
);
1461 else if (PyObject_AsCharBuffer(subobj
, &sub
, &n
))
1464 return stringlib_split((PyObject
*) self
, s
, len
, sub
, n
, maxsplit
);
1467 PyDoc_STRVAR(partition__doc__
,
1468 "S.partition(sep) -> (head, sep, tail)\n\
1470 Search for the separator sep in S, and return the part before it,\n\
1471 the separator itself, and the part after it. If the separator is not\n\
1472 found, return S and two empty strings.");
1475 string_partition(PyStringObject
*self
, PyObject
*sep_obj
)
1480 if (PyString_Check(sep_obj
)) {
1481 sep
= PyString_AS_STRING(sep_obj
);
1482 sep_len
= PyString_GET_SIZE(sep_obj
);
1484 #ifdef Py_USING_UNICODE
1485 else if (PyUnicode_Check(sep_obj
))
1486 return PyUnicode_Partition((PyObject
*) self
, sep_obj
);
1488 else if (PyObject_AsCharBuffer(sep_obj
, &sep
, &sep_len
))
1491 return stringlib_partition(
1493 PyString_AS_STRING(self
), PyString_GET_SIZE(self
),
1494 sep_obj
, sep
, sep_len
1498 PyDoc_STRVAR(rpartition__doc__
,
1499 "S.rpartition(sep) -> (head, sep, tail)\n\
1501 Search for the separator sep in S, starting at the end of S, and return\n\
1502 the part before it, the separator itself, and the part after it. If the\n\
1503 separator is not found, return two empty strings and S.");
1506 string_rpartition(PyStringObject
*self
, PyObject
*sep_obj
)
1511 if (PyString_Check(sep_obj
)) {
1512 sep
= PyString_AS_STRING(sep_obj
);
1513 sep_len
= PyString_GET_SIZE(sep_obj
);
1515 #ifdef Py_USING_UNICODE
1516 else if (PyUnicode_Check(sep_obj
))
1517 return PyUnicode_RPartition((PyObject
*) self
, sep_obj
);
1519 else if (PyObject_AsCharBuffer(sep_obj
, &sep
, &sep_len
))
1522 return stringlib_rpartition(
1524 PyString_AS_STRING(self
), PyString_GET_SIZE(self
),
1525 sep_obj
, sep
, sep_len
1529 PyDoc_STRVAR(rsplit__doc__
,
1530 "S.rsplit([sep [,maxsplit]]) -> list of strings\n\
1532 Return a list of the words in the string S, using sep as the\n\
1533 delimiter string, starting at the end of the string and working\n\
1534 to the front. If maxsplit is given, at most maxsplit splits are\n\
1535 done. If sep is not specified or is None, any whitespace string\n\
1539 string_rsplit(PyStringObject
*self
, PyObject
*args
)
1541 Py_ssize_t len
= PyString_GET_SIZE(self
), n
;
1542 Py_ssize_t maxsplit
= -1;
1543 const char *s
= PyString_AS_STRING(self
), *sub
;
1544 PyObject
*subobj
= Py_None
;
1546 if (!PyArg_ParseTuple(args
, "|On:rsplit", &subobj
, &maxsplit
))
1549 maxsplit
= PY_SSIZE_T_MAX
;
1550 if (subobj
== Py_None
)
1551 return stringlib_rsplit_whitespace((PyObject
*) self
, s
, len
, maxsplit
);
1552 if (PyString_Check(subobj
)) {
1553 sub
= PyString_AS_STRING(subobj
);
1554 n
= PyString_GET_SIZE(subobj
);
1556 #ifdef Py_USING_UNICODE
1557 else if (PyUnicode_Check(subobj
))
1558 return PyUnicode_RSplit((PyObject
*)self
, subobj
, maxsplit
);
1560 else if (PyObject_AsCharBuffer(subobj
, &sub
, &n
))
1563 return stringlib_rsplit((PyObject
*) self
, s
, len
, sub
, n
, maxsplit
);
1567 PyDoc_STRVAR(join__doc__
,
1568 "S.join(iterable) -> string\n\
1570 Return a string which is the concatenation of the strings in the\n\
1571 iterable. The separator between elements is S.");
1574 string_join(PyStringObject
*self
, PyObject
*orig
)
1576 char *sep
= PyString_AS_STRING(self
);
1577 const Py_ssize_t seplen
= PyString_GET_SIZE(self
);
1578 PyObject
*res
= NULL
;
1580 Py_ssize_t seqlen
= 0;
1583 PyObject
*seq
, *item
;
1585 seq
= PySequence_Fast(orig
, "");
1590 seqlen
= PySequence_Size(seq
);
1593 return PyString_FromString("");
1596 item
= PySequence_Fast_GET_ITEM(seq
, 0);
1597 if (PyString_CheckExact(item
) || PyUnicode_CheckExact(item
)) {
1604 /* There are at least two things to join, or else we have a subclass
1605 * of the builtin types in the sequence.
1606 * Do a pre-pass to figure out the total amount of space we'll
1607 * need (sz), see whether any argument is absurd, and defer to
1608 * the Unicode join if appropriate.
1610 for (i
= 0; i
< seqlen
; i
++) {
1611 const size_t old_sz
= sz
;
1612 item
= PySequence_Fast_GET_ITEM(seq
, i
);
1613 if (!PyString_Check(item
)){
1614 #ifdef Py_USING_UNICODE
1615 if (PyUnicode_Check(item
)) {
1616 /* Defer to Unicode join.
1617 * CAUTION: There's no gurantee that the
1618 * original sequence can be iterated over
1619 * again, so we must pass seq here.
1622 result
= PyUnicode_Join((PyObject
*)self
, seq
);
1627 PyErr_Format(PyExc_TypeError
,
1628 "sequence item %zd: expected string,"
1630 i
, Py_TYPE(item
)->tp_name
);
1634 sz
+= PyString_GET_SIZE(item
);
1637 if (sz
< old_sz
|| sz
> PY_SSIZE_T_MAX
) {
1638 PyErr_SetString(PyExc_OverflowError
,
1639 "join() result is too long for a Python string");
1645 /* Allocate result space. */
1646 res
= PyString_FromStringAndSize((char*)NULL
, sz
);
1652 /* Catenate everything. */
1653 p
= PyString_AS_STRING(res
);
1654 for (i
= 0; i
< seqlen
; ++i
) {
1656 item
= PySequence_Fast_GET_ITEM(seq
, i
);
1657 n
= PyString_GET_SIZE(item
);
1658 Py_MEMCPY(p
, PyString_AS_STRING(item
), n
);
1660 if (i
< seqlen
- 1) {
1661 Py_MEMCPY(p
, sep
, seplen
);
1671 _PyString_Join(PyObject
*sep
, PyObject
*x
)
1673 assert(sep
!= NULL
&& PyString_Check(sep
));
1675 return string_join((PyStringObject
*)sep
, x
);
1678 /* helper macro to fixup start/end slice values */
1679 #define ADJUST_INDICES(start, end, len) \
1682 else if (end < 0) { \
1693 Py_LOCAL_INLINE(Py_ssize_t
)
1694 string_find_internal(PyStringObject
*self
, PyObject
*args
, int dir
)
1699 Py_ssize_t start
=0, end
=PY_SSIZE_T_MAX
;
1700 PyObject
*obj_start
=Py_None
, *obj_end
=Py_None
;
1702 if (!PyArg_ParseTuple(args
, "O|OO:find/rfind/index/rindex", &subobj
,
1703 &obj_start
, &obj_end
))
1705 /* To support None in "start" and "end" arguments, meaning
1706 the same as if they were not passed.
1708 if (obj_start
!= Py_None
)
1709 if (!_PyEval_SliceIndex(obj_start
, &start
))
1711 if (obj_end
!= Py_None
)
1712 if (!_PyEval_SliceIndex(obj_end
, &end
))
1715 if (PyString_Check(subobj
)) {
1716 sub
= PyString_AS_STRING(subobj
);
1717 sub_len
= PyString_GET_SIZE(subobj
);
1719 #ifdef Py_USING_UNICODE
1720 else if (PyUnicode_Check(subobj
))
1721 return PyUnicode_Find(
1722 (PyObject
*)self
, subobj
, start
, end
, dir
);
1724 else if (PyObject_AsCharBuffer(subobj
, &sub
, &sub_len
))
1725 /* XXX - the "expected a character buffer object" is pretty
1726 confusing for a non-expert. remap to something else ? */
1730 return stringlib_find_slice(
1731 PyString_AS_STRING(self
), PyString_GET_SIZE(self
),
1732 sub
, sub_len
, start
, end
);
1734 return stringlib_rfind_slice(
1735 PyString_AS_STRING(self
), PyString_GET_SIZE(self
),
1736 sub
, sub_len
, start
, end
);
1740 PyDoc_STRVAR(find__doc__
,
1741 "S.find(sub [,start [,end]]) -> int\n\
1743 Return the lowest index in S where substring sub is found,\n\
1744 such that sub is contained within s[start:end]. Optional\n\
1745 arguments start and end are interpreted as in slice notation.\n\
1747 Return -1 on failure.");
1750 string_find(PyStringObject
*self
, PyObject
*args
)
1752 Py_ssize_t result
= string_find_internal(self
, args
, +1);
1755 return PyInt_FromSsize_t(result
);
1759 PyDoc_STRVAR(index__doc__
,
1760 "S.index(sub [,start [,end]]) -> int\n\
1762 Like S.find() but raise ValueError when the substring is not found.");
1765 string_index(PyStringObject
*self
, PyObject
*args
)
1767 Py_ssize_t result
= string_find_internal(self
, args
, +1);
1771 PyErr_SetString(PyExc_ValueError
,
1772 "substring not found");
1775 return PyInt_FromSsize_t(result
);
1779 PyDoc_STRVAR(rfind__doc__
,
1780 "S.rfind(sub [,start [,end]]) -> int\n\
1782 Return the highest index in S where substring sub is found,\n\
1783 such that sub is contained within s[start:end]. Optional\n\
1784 arguments start and end are interpreted as in slice notation.\n\
1786 Return -1 on failure.");
1789 string_rfind(PyStringObject
*self
, PyObject
*args
)
1791 Py_ssize_t result
= string_find_internal(self
, args
, -1);
1794 return PyInt_FromSsize_t(result
);
1798 PyDoc_STRVAR(rindex__doc__
,
1799 "S.rindex(sub [,start [,end]]) -> int\n\
1801 Like S.rfind() but raise ValueError when the substring is not found.");
1804 string_rindex(PyStringObject
*self
, PyObject
*args
)
1806 Py_ssize_t result
= string_find_internal(self
, args
, -1);
1810 PyErr_SetString(PyExc_ValueError
,
1811 "substring not found");
1814 return PyInt_FromSsize_t(result
);
1818 Py_LOCAL_INLINE(PyObject
*)
1819 do_xstrip(PyStringObject
*self
, int striptype
, PyObject
*sepobj
)
1821 char *s
= PyString_AS_STRING(self
);
1822 Py_ssize_t len
= PyString_GET_SIZE(self
);
1823 char *sep
= PyString_AS_STRING(sepobj
);
1824 Py_ssize_t seplen
= PyString_GET_SIZE(sepobj
);
1828 if (striptype
!= RIGHTSTRIP
) {
1829 while (i
< len
&& memchr(sep
, Py_CHARMASK(s
[i
]), seplen
)) {
1835 if (striptype
!= LEFTSTRIP
) {
1838 } while (j
>= i
&& memchr(sep
, Py_CHARMASK(s
[j
]), seplen
));
1842 if (i
== 0 && j
== len
&& PyString_CheckExact(self
)) {
1844 return (PyObject
*)self
;
1847 return PyString_FromStringAndSize(s
+i
, j
-i
);
1851 Py_LOCAL_INLINE(PyObject
*)
1852 do_strip(PyStringObject
*self
, int striptype
)
1854 char *s
= PyString_AS_STRING(self
);
1855 Py_ssize_t len
= PyString_GET_SIZE(self
), i
, j
;
1858 if (striptype
!= RIGHTSTRIP
) {
1859 while (i
< len
&& isspace(Py_CHARMASK(s
[i
]))) {
1865 if (striptype
!= LEFTSTRIP
) {
1868 } while (j
>= i
&& isspace(Py_CHARMASK(s
[j
])));
1872 if (i
== 0 && j
== len
&& PyString_CheckExact(self
)) {
1874 return (PyObject
*)self
;
1877 return PyString_FromStringAndSize(s
+i
, j
-i
);
1881 Py_LOCAL_INLINE(PyObject
*)
1882 do_argstrip(PyStringObject
*self
, int striptype
, PyObject
*args
)
1884 PyObject
*sep
= NULL
;
1886 if (!PyArg_ParseTuple(args
, (char *)stripformat
[striptype
], &sep
))
1889 if (sep
!= NULL
&& sep
!= Py_None
) {
1890 if (PyString_Check(sep
))
1891 return do_xstrip(self
, striptype
, sep
);
1892 #ifdef Py_USING_UNICODE
1893 else if (PyUnicode_Check(sep
)) {
1894 PyObject
*uniself
= PyUnicode_FromObject((PyObject
*)self
);
1898 res
= _PyUnicode_XStrip((PyUnicodeObject
*)uniself
,
1904 PyErr_Format(PyExc_TypeError
,
1905 #ifdef Py_USING_UNICODE
1906 "%s arg must be None, str or unicode",
1908 "%s arg must be None or str",
1910 STRIPNAME(striptype
));
1914 return do_strip(self
, striptype
);
1918 PyDoc_STRVAR(strip__doc__
,
1919 "S.strip([chars]) -> string or unicode\n\
1921 Return a copy of the string S with leading and trailing\n\
1922 whitespace removed.\n\
1923 If chars is given and not None, remove characters in chars instead.\n\
1924 If chars is unicode, S will be converted to unicode before stripping");
1927 string_strip(PyStringObject
*self
, PyObject
*args
)
1929 if (PyTuple_GET_SIZE(args
) == 0)
1930 return do_strip(self
, BOTHSTRIP
); /* Common case */
1932 return do_argstrip(self
, BOTHSTRIP
, args
);
1936 PyDoc_STRVAR(lstrip__doc__
,
1937 "S.lstrip([chars]) -> string or unicode\n\
1939 Return a copy of the string S with leading whitespace removed.\n\
1940 If chars is given and not None, remove characters in chars instead.\n\
1941 If chars is unicode, S will be converted to unicode before stripping");
1944 string_lstrip(PyStringObject
*self
, PyObject
*args
)
1946 if (PyTuple_GET_SIZE(args
) == 0)
1947 return do_strip(self
, LEFTSTRIP
); /* Common case */
1949 return do_argstrip(self
, LEFTSTRIP
, args
);
1953 PyDoc_STRVAR(rstrip__doc__
,
1954 "S.rstrip([chars]) -> string or unicode\n\
1956 Return a copy of the string S with trailing whitespace removed.\n\
1957 If chars is given and not None, remove characters in chars instead.\n\
1958 If chars is unicode, S will be converted to unicode before stripping");
1961 string_rstrip(PyStringObject
*self
, PyObject
*args
)
1963 if (PyTuple_GET_SIZE(args
) == 0)
1964 return do_strip(self
, RIGHTSTRIP
); /* Common case */
1966 return do_argstrip(self
, RIGHTSTRIP
, args
);
1970 PyDoc_STRVAR(lower__doc__
,
1971 "S.lower() -> string\n\
1973 Return a copy of the string S converted to lowercase.");
1975 /* _tolower and _toupper are defined by SUSv2, but they're not ISO C */
1977 #define _tolower tolower
1981 string_lower(PyStringObject
*self
)
1984 Py_ssize_t i
, n
= PyString_GET_SIZE(self
);
1987 newobj
= PyString_FromStringAndSize(NULL
, n
);
1991 s
= PyString_AS_STRING(newobj
);
1993 Py_MEMCPY(s
, PyString_AS_STRING(self
), n
);
1995 for (i
= 0; i
< n
; i
++) {
1996 int c
= Py_CHARMASK(s
[i
]);
2004 PyDoc_STRVAR(upper__doc__
,
2005 "S.upper() -> string\n\
2007 Return a copy of the string S converted to uppercase.");
2010 #define _toupper toupper
2014 string_upper(PyStringObject
*self
)
2017 Py_ssize_t i
, n
= PyString_GET_SIZE(self
);
2020 newobj
= PyString_FromStringAndSize(NULL
, n
);
2024 s
= PyString_AS_STRING(newobj
);
2026 Py_MEMCPY(s
, PyString_AS_STRING(self
), n
);
2028 for (i
= 0; i
< n
; i
++) {
2029 int c
= Py_CHARMASK(s
[i
]);
2037 PyDoc_STRVAR(title__doc__
,
2038 "S.title() -> string\n\
2040 Return a titlecased version of S, i.e. words start with uppercase\n\
2041 characters, all remaining cased characters have lowercase.");
2044 string_title(PyStringObject
*self
)
2046 char *s
= PyString_AS_STRING(self
), *s_new
;
2047 Py_ssize_t i
, n
= PyString_GET_SIZE(self
);
2048 int previous_is_cased
= 0;
2051 newobj
= PyString_FromStringAndSize(NULL
, n
);
2054 s_new
= PyString_AsString(newobj
);
2055 for (i
= 0; i
< n
; i
++) {
2056 int c
= Py_CHARMASK(*s
++);
2058 if (!previous_is_cased
)
2060 previous_is_cased
= 1;
2061 } else if (isupper(c
)) {
2062 if (previous_is_cased
)
2064 previous_is_cased
= 1;
2066 previous_is_cased
= 0;
2072 PyDoc_STRVAR(capitalize__doc__
,
2073 "S.capitalize() -> string\n\
2075 Return a copy of the string S with only its first character\n\
2079 string_capitalize(PyStringObject
*self
)
2081 char *s
= PyString_AS_STRING(self
), *s_new
;
2082 Py_ssize_t i
, n
= PyString_GET_SIZE(self
);
2085 newobj
= PyString_FromStringAndSize(NULL
, n
);
2088 s_new
= PyString_AsString(newobj
);
2090 int c
= Py_CHARMASK(*s
++);
2092 *s_new
= toupper(c
);
2097 for (i
= 1; i
< n
; i
++) {
2098 int c
= Py_CHARMASK(*s
++);
2100 *s_new
= tolower(c
);
2109 PyDoc_STRVAR(count__doc__
,
2110 "S.count(sub[, start[, end]]) -> int\n\
2112 Return the number of non-overlapping occurrences of substring sub in\n\
2113 string S[start:end]. Optional arguments start and end are interpreted\n\
2114 as in slice notation.");
2117 string_count(PyStringObject
*self
, PyObject
*args
)
2120 const char *str
= PyString_AS_STRING(self
), *sub
;
2122 Py_ssize_t start
= 0, end
= PY_SSIZE_T_MAX
;
2124 if (!PyArg_ParseTuple(args
, "O|O&O&:count", &sub_obj
,
2125 _PyEval_SliceIndex
, &start
, _PyEval_SliceIndex
, &end
))
2128 if (PyString_Check(sub_obj
)) {
2129 sub
= PyString_AS_STRING(sub_obj
);
2130 sub_len
= PyString_GET_SIZE(sub_obj
);
2132 #ifdef Py_USING_UNICODE
2133 else if (PyUnicode_Check(sub_obj
)) {
2135 count
= PyUnicode_Count((PyObject
*)self
, sub_obj
, start
, end
);
2139 return PyInt_FromSsize_t(count
);
2142 else if (PyObject_AsCharBuffer(sub_obj
, &sub
, &sub_len
))
2145 ADJUST_INDICES(start
, end
, PyString_GET_SIZE(self
));
2147 return PyInt_FromSsize_t(
2148 stringlib_count(str
+ start
, end
- start
, sub
, sub_len
, PY_SSIZE_T_MAX
)
2152 PyDoc_STRVAR(swapcase__doc__
,
2153 "S.swapcase() -> string\n\
2155 Return a copy of the string S with uppercase characters\n\
2156 converted to lowercase and vice versa.");
2159 string_swapcase(PyStringObject
*self
)
2161 char *s
= PyString_AS_STRING(self
), *s_new
;
2162 Py_ssize_t i
, n
= PyString_GET_SIZE(self
);
2165 newobj
= PyString_FromStringAndSize(NULL
, n
);
2168 s_new
= PyString_AsString(newobj
);
2169 for (i
= 0; i
< n
; i
++) {
2170 int c
= Py_CHARMASK(*s
++);
2172 *s_new
= toupper(c
);
2174 else if (isupper(c
)) {
2175 *s_new
= tolower(c
);
2185 PyDoc_STRVAR(translate__doc__
,
2186 "S.translate(table [,deletechars]) -> string\n\
2188 Return a copy of the string S, where all characters occurring\n\
2189 in the optional argument deletechars are removed, and the\n\
2190 remaining characters have been mapped through the given\n\
2191 translation table, which must be a string of length 256.");
2194 string_translate(PyStringObject
*self
, PyObject
*args
)
2196 register char *input
, *output
;
2198 register Py_ssize_t i
, c
, changed
= 0;
2199 PyObject
*input_obj
= (PyObject
*)self
;
2200 const char *output_start
, *del_table
=NULL
;
2201 Py_ssize_t inlen
, tablen
, dellen
= 0;
2203 int trans_table
[256];
2204 PyObject
*tableobj
, *delobj
= NULL
;
2206 if (!PyArg_UnpackTuple(args
, "translate", 1, 2,
2207 &tableobj
, &delobj
))
2210 if (PyString_Check(tableobj
)) {
2211 table
= PyString_AS_STRING(tableobj
);
2212 tablen
= PyString_GET_SIZE(tableobj
);
2214 else if (tableobj
== Py_None
) {
2218 #ifdef Py_USING_UNICODE
2219 else if (PyUnicode_Check(tableobj
)) {
2220 /* Unicode .translate() does not support the deletechars
2221 parameter; instead a mapping to None will cause characters
2223 if (delobj
!= NULL
) {
2224 PyErr_SetString(PyExc_TypeError
,
2225 "deletions are implemented differently for unicode");
2228 return PyUnicode_Translate((PyObject
*)self
, tableobj
, NULL
);
2231 else if (PyObject_AsCharBuffer(tableobj
, &table
, &tablen
))
2234 if (tablen
!= 256) {
2235 PyErr_SetString(PyExc_ValueError
,
2236 "translation table must be 256 characters long");
2240 if (delobj
!= NULL
) {
2241 if (PyString_Check(delobj
)) {
2242 del_table
= PyString_AS_STRING(delobj
);
2243 dellen
= PyString_GET_SIZE(delobj
);
2245 #ifdef Py_USING_UNICODE
2246 else if (PyUnicode_Check(delobj
)) {
2247 PyErr_SetString(PyExc_TypeError
,
2248 "deletions are implemented differently for unicode");
2252 else if (PyObject_AsCharBuffer(delobj
, &del_table
, &dellen
))
2260 inlen
= PyString_GET_SIZE(input_obj
);
2261 result
= PyString_FromStringAndSize((char *)NULL
, inlen
);
2264 output_start
= output
= PyString_AsString(result
);
2265 input
= PyString_AS_STRING(input_obj
);
2267 if (dellen
== 0 && table
!= NULL
) {
2268 /* If no deletions are required, use faster code */
2269 for (i
= inlen
; --i
>= 0; ) {
2270 c
= Py_CHARMASK(*input
++);
2271 if (Py_CHARMASK((*output
++ = table
[c
])) != c
)
2274 if (changed
|| !PyString_CheckExact(input_obj
))
2277 Py_INCREF(input_obj
);
2281 if (table
== NULL
) {
2282 for (i
= 0; i
< 256; i
++)
2283 trans_table
[i
] = Py_CHARMASK(i
);
2285 for (i
= 0; i
< 256; i
++)
2286 trans_table
[i
] = Py_CHARMASK(table
[i
]);
2289 for (i
= 0; i
< dellen
; i
++)
2290 trans_table
[(int) Py_CHARMASK(del_table
[i
])] = -1;
2292 for (i
= inlen
; --i
>= 0; ) {
2293 c
= Py_CHARMASK(*input
++);
2294 if (trans_table
[c
] != -1)
2295 if (Py_CHARMASK(*output
++ = (char)trans_table
[c
]) == c
)
2299 if (!changed
&& PyString_CheckExact(input_obj
)) {
2301 Py_INCREF(input_obj
);
2304 /* Fix the size of the resulting string */
2305 if (inlen
> 0 && _PyString_Resize(&result
, output
- output_start
))
2311 /* find and count characters and substrings */
2313 #define findchar(target, target_len, c) \
2314 ((char *)memchr((const void *)(target), c, target_len))
2316 /* String ops must return a string. */
2317 /* If the object is subclass of string, create a copy */
2318 Py_LOCAL(PyStringObject
*)
2319 return_self(PyStringObject
*self
)
2321 if (PyString_CheckExact(self
)) {
2325 return (PyStringObject
*)PyString_FromStringAndSize(
2326 PyString_AS_STRING(self
),
2327 PyString_GET_SIZE(self
));
2330 Py_LOCAL_INLINE(Py_ssize_t
)
2331 countchar(const char *target
, int target_len
, char c
, Py_ssize_t maxcount
)
2334 const char *start
=target
;
2335 const char *end
=target
+target_len
;
2337 while ( (start
=findchar(start
, end
-start
, c
)) != NULL
) {
2339 if (count
>= maxcount
)
2347 /* Algorithms for different cases of string replacement */
2349 /* len(self)>=1, from="", len(to)>=1, maxcount>=1 */
2350 Py_LOCAL(PyStringObject
*)
2351 replace_interleave(PyStringObject
*self
,
2352 const char *to_s
, Py_ssize_t to_len
,
2353 Py_ssize_t maxcount
)
2355 char *self_s
, *result_s
;
2356 Py_ssize_t self_len
, result_len
;
2357 Py_ssize_t count
, i
, product
;
2358 PyStringObject
*result
;
2360 self_len
= PyString_GET_SIZE(self
);
2362 /* 1 at the end plus 1 after every character */
2364 if (maxcount
< count
)
2367 /* Check for overflow */
2368 /* result_len = count * to_len + self_len; */
2369 product
= count
* to_len
;
2370 if (product
/ to_len
!= count
) {
2371 PyErr_SetString(PyExc_OverflowError
,
2372 "replace string is too long");
2375 result_len
= product
+ self_len
;
2376 if (result_len
< 0) {
2377 PyErr_SetString(PyExc_OverflowError
,
2378 "replace string is too long");
2382 if (! (result
= (PyStringObject
*)
2383 PyString_FromStringAndSize(NULL
, result_len
)) )
2386 self_s
= PyString_AS_STRING(self
);
2387 result_s
= PyString_AS_STRING(result
);
2389 /* TODO: special case single character, which doesn't need memcpy */
2391 /* Lay the first one down (guaranteed this will occur) */
2392 Py_MEMCPY(result_s
, to_s
, to_len
);
2396 for (i
=0; i
<count
; i
++) {
2397 *result_s
++ = *self_s
++;
2398 Py_MEMCPY(result_s
, to_s
, to_len
);
2402 /* Copy the rest of the original string */
2403 Py_MEMCPY(result_s
, self_s
, self_len
-i
);
2408 /* Special case for deleting a single character */
2409 /* len(self)>=1, len(from)==1, to="", maxcount>=1 */
2410 Py_LOCAL(PyStringObject
*)
2411 replace_delete_single_character(PyStringObject
*self
,
2412 char from_c
, Py_ssize_t maxcount
)
2414 char *self_s
, *result_s
;
2415 char *start
, *next
, *end
;
2416 Py_ssize_t self_len
, result_len
;
2418 PyStringObject
*result
;
2420 self_len
= PyString_GET_SIZE(self
);
2421 self_s
= PyString_AS_STRING(self
);
2423 count
= countchar(self_s
, self_len
, from_c
, maxcount
);
2425 return return_self(self
);
2428 result_len
= self_len
- count
; /* from_len == 1 */
2429 assert(result_len
>=0);
2431 if ( (result
= (PyStringObject
*)
2432 PyString_FromStringAndSize(NULL
, result_len
)) == NULL
)
2434 result_s
= PyString_AS_STRING(result
);
2437 end
= self_s
+ self_len
;
2438 while (count
-- > 0) {
2439 next
= findchar(start
, end
-start
, from_c
);
2442 Py_MEMCPY(result_s
, start
, next
-start
);
2443 result_s
+= (next
-start
);
2446 Py_MEMCPY(result_s
, start
, end
-start
);
2451 /* len(self)>=1, len(from)>=2, to="", maxcount>=1 */
2453 Py_LOCAL(PyStringObject
*)
2454 replace_delete_substring(PyStringObject
*self
,
2455 const char *from_s
, Py_ssize_t from_len
,
2456 Py_ssize_t maxcount
) {
2457 char *self_s
, *result_s
;
2458 char *start
, *next
, *end
;
2459 Py_ssize_t self_len
, result_len
;
2460 Py_ssize_t count
, offset
;
2461 PyStringObject
*result
;
2463 self_len
= PyString_GET_SIZE(self
);
2464 self_s
= PyString_AS_STRING(self
);
2466 count
= stringlib_count(self_s
, self_len
,
2472 return return_self(self
);
2475 result_len
= self_len
- (count
* from_len
);
2476 assert (result_len
>=0);
2478 if ( (result
= (PyStringObject
*)
2479 PyString_FromStringAndSize(NULL
, result_len
)) == NULL
)
2482 result_s
= PyString_AS_STRING(result
);
2485 end
= self_s
+ self_len
;
2486 while (count
-- > 0) {
2487 offset
= stringlib_find(start
, end
-start
,
2492 next
= start
+ offset
;
2494 Py_MEMCPY(result_s
, start
, next
-start
);
2496 result_s
+= (next
-start
);
2497 start
= next
+from_len
;
2499 Py_MEMCPY(result_s
, start
, end
-start
);
2503 /* len(self)>=1, len(from)==len(to)==1, maxcount>=1 */
2504 Py_LOCAL(PyStringObject
*)
2505 replace_single_character_in_place(PyStringObject
*self
,
2506 char from_c
, char to_c
,
2507 Py_ssize_t maxcount
)
2509 char *self_s
, *result_s
, *start
, *end
, *next
;
2510 Py_ssize_t self_len
;
2511 PyStringObject
*result
;
2513 /* The result string will be the same size */
2514 self_s
= PyString_AS_STRING(self
);
2515 self_len
= PyString_GET_SIZE(self
);
2517 next
= findchar(self_s
, self_len
, from_c
);
2520 /* No matches; return the original string */
2521 return return_self(self
);
2524 /* Need to make a new string */
2525 result
= (PyStringObject
*) PyString_FromStringAndSize(NULL
, self_len
);
2528 result_s
= PyString_AS_STRING(result
);
2529 Py_MEMCPY(result_s
, self_s
, self_len
);
2531 /* change everything in-place, starting with this one */
2532 start
= result_s
+ (next
-self_s
);
2535 end
= result_s
+ self_len
;
2537 while (--maxcount
> 0) {
2538 next
= findchar(start
, end
-start
, from_c
);
2548 /* len(self)>=1, len(from)==len(to)>=2, maxcount>=1 */
2549 Py_LOCAL(PyStringObject
*)
2550 replace_substring_in_place(PyStringObject
*self
,
2551 const char *from_s
, Py_ssize_t from_len
,
2552 const char *to_s
, Py_ssize_t to_len
,
2553 Py_ssize_t maxcount
)
2555 char *result_s
, *start
, *end
;
2557 Py_ssize_t self_len
, offset
;
2558 PyStringObject
*result
;
2560 /* The result string will be the same size */
2562 self_s
= PyString_AS_STRING(self
);
2563 self_len
= PyString_GET_SIZE(self
);
2565 offset
= stringlib_find(self_s
, self_len
,
2569 /* No matches; return the original string */
2570 return return_self(self
);
2573 /* Need to make a new string */
2574 result
= (PyStringObject
*) PyString_FromStringAndSize(NULL
, self_len
);
2577 result_s
= PyString_AS_STRING(result
);
2578 Py_MEMCPY(result_s
, self_s
, self_len
);
2580 /* change everything in-place, starting with this one */
2581 start
= result_s
+ offset
;
2582 Py_MEMCPY(start
, to_s
, from_len
);
2584 end
= result_s
+ self_len
;
2586 while ( --maxcount
> 0) {
2587 offset
= stringlib_find(start
, end
-start
,
2592 Py_MEMCPY(start
+offset
, to_s
, from_len
);
2593 start
+= offset
+from_len
;
2599 /* len(self)>=1, len(from)==1, len(to)>=2, maxcount>=1 */
2600 Py_LOCAL(PyStringObject
*)
2601 replace_single_character(PyStringObject
*self
,
2603 const char *to_s
, Py_ssize_t to_len
,
2604 Py_ssize_t maxcount
)
2606 char *self_s
, *result_s
;
2607 char *start
, *next
, *end
;
2608 Py_ssize_t self_len
, result_len
;
2609 Py_ssize_t count
, product
;
2610 PyStringObject
*result
;
2612 self_s
= PyString_AS_STRING(self
);
2613 self_len
= PyString_GET_SIZE(self
);
2615 count
= countchar(self_s
, self_len
, from_c
, maxcount
);
2617 /* no matches, return unchanged */
2618 return return_self(self
);
2621 /* use the difference between current and new, hence the "-1" */
2622 /* result_len = self_len + count * (to_len-1) */
2623 product
= count
* (to_len
-1);
2624 if (product
/ (to_len
-1) != count
) {
2625 PyErr_SetString(PyExc_OverflowError
, "replace string is too long");
2628 result_len
= self_len
+ product
;
2629 if (result_len
< 0) {
2630 PyErr_SetString(PyExc_OverflowError
, "replace string is too long");
2634 if ( (result
= (PyStringObject
*)
2635 PyString_FromStringAndSize(NULL
, result_len
)) == NULL
)
2637 result_s
= PyString_AS_STRING(result
);
2640 end
= self_s
+ self_len
;
2641 while (count
-- > 0) {
2642 next
= findchar(start
, end
-start
, from_c
);
2646 if (next
== start
) {
2647 /* replace with the 'to' */
2648 Py_MEMCPY(result_s
, to_s
, to_len
);
2652 /* copy the unchanged old then the 'to' */
2653 Py_MEMCPY(result_s
, start
, next
-start
);
2654 result_s
+= (next
-start
);
2655 Py_MEMCPY(result_s
, to_s
, to_len
);
2660 /* Copy the remainder of the remaining string */
2661 Py_MEMCPY(result_s
, start
, end
-start
);
2666 /* len(self)>=1, len(from)>=2, len(to)>=2, maxcount>=1 */
2667 Py_LOCAL(PyStringObject
*)
2668 replace_substring(PyStringObject
*self
,
2669 const char *from_s
, Py_ssize_t from_len
,
2670 const char *to_s
, Py_ssize_t to_len
,
2671 Py_ssize_t maxcount
) {
2672 char *self_s
, *result_s
;
2673 char *start
, *next
, *end
;
2674 Py_ssize_t self_len
, result_len
;
2675 Py_ssize_t count
, offset
, product
;
2676 PyStringObject
*result
;
2678 self_s
= PyString_AS_STRING(self
);
2679 self_len
= PyString_GET_SIZE(self
);
2681 count
= stringlib_count(self_s
, self_len
,
2686 /* no matches, return unchanged */
2687 return return_self(self
);
2690 /* Check for overflow */
2691 /* result_len = self_len + count * (to_len-from_len) */
2692 product
= count
* (to_len
-from_len
);
2693 if (product
/ (to_len
-from_len
) != count
) {
2694 PyErr_SetString(PyExc_OverflowError
, "replace string is too long");
2697 result_len
= self_len
+ product
;
2698 if (result_len
< 0) {
2699 PyErr_SetString(PyExc_OverflowError
, "replace string is too long");
2703 if ( (result
= (PyStringObject
*)
2704 PyString_FromStringAndSize(NULL
, result_len
)) == NULL
)
2706 result_s
= PyString_AS_STRING(result
);
2709 end
= self_s
+ self_len
;
2710 while (count
-- > 0) {
2711 offset
= stringlib_find(start
, end
-start
,
2716 next
= start
+offset
;
2717 if (next
== start
) {
2718 /* replace with the 'to' */
2719 Py_MEMCPY(result_s
, to_s
, to_len
);
2723 /* copy the unchanged old then the 'to' */
2724 Py_MEMCPY(result_s
, start
, next
-start
);
2725 result_s
+= (next
-start
);
2726 Py_MEMCPY(result_s
, to_s
, to_len
);
2728 start
= next
+from_len
;
2731 /* Copy the remainder of the remaining string */
2732 Py_MEMCPY(result_s
, start
, end
-start
);
2738 Py_LOCAL(PyStringObject
*)
2739 replace(PyStringObject
*self
,
2740 const char *from_s
, Py_ssize_t from_len
,
2741 const char *to_s
, Py_ssize_t to_len
,
2742 Py_ssize_t maxcount
)
2745 maxcount
= PY_SSIZE_T_MAX
;
2746 } else if (maxcount
== 0 || PyString_GET_SIZE(self
) == 0) {
2747 /* nothing to do; return the original string */
2748 return return_self(self
);
2751 if (maxcount
== 0 ||
2752 (from_len
== 0 && to_len
== 0)) {
2753 /* nothing to do; return the original string */
2754 return return_self(self
);
2757 /* Handle zero-length special cases */
2759 if (from_len
== 0) {
2760 /* insert the 'to' string everywhere. */
2761 /* >>> "Python".replace("", ".") */
2762 /* '.P.y.t.h.o.n.' */
2763 return replace_interleave(self
, to_s
, to_len
, maxcount
);
2766 /* Except for "".replace("", "A") == "A" there is no way beyond this */
2767 /* point for an empty self string to generate a non-empty string */
2768 /* Special case so the remaining code always gets a non-empty string */
2769 if (PyString_GET_SIZE(self
) == 0) {
2770 return return_self(self
);
2774 /* delete all occurances of 'from' string */
2775 if (from_len
== 1) {
2776 return replace_delete_single_character(
2777 self
, from_s
[0], maxcount
);
2779 return replace_delete_substring(self
, from_s
, from_len
, maxcount
);
2783 /* Handle special case where both strings have the same length */
2785 if (from_len
== to_len
) {
2786 if (from_len
== 1) {
2787 return replace_single_character_in_place(
2793 return replace_substring_in_place(
2794 self
, from_s
, from_len
, to_s
, to_len
, maxcount
);
2798 /* Otherwise use the more generic algorithms */
2799 if (from_len
== 1) {
2800 return replace_single_character(self
, from_s
[0],
2801 to_s
, to_len
, maxcount
);
2803 /* len('from')>=2, len('to')>=1 */
2804 return replace_substring(self
, from_s
, from_len
, to_s
, to_len
, maxcount
);
2808 PyDoc_STRVAR(replace__doc__
,
2809 "S.replace(old, new[, count]) -> string\n\
2811 Return a copy of string S with all occurrences of substring\n\
2812 old replaced by new. If the optional argument count is\n\
2813 given, only the first count occurrences are replaced.");
2816 string_replace(PyStringObject
*self
, PyObject
*args
)
2818 Py_ssize_t count
= -1;
2819 PyObject
*from
, *to
;
2820 const char *from_s
, *to_s
;
2821 Py_ssize_t from_len
, to_len
;
2823 if (!PyArg_ParseTuple(args
, "OO|n:replace", &from
, &to
, &count
))
2826 if (PyString_Check(from
)) {
2827 from_s
= PyString_AS_STRING(from
);
2828 from_len
= PyString_GET_SIZE(from
);
2830 #ifdef Py_USING_UNICODE
2831 if (PyUnicode_Check(from
))
2832 return PyUnicode_Replace((PyObject
*)self
,
2835 else if (PyObject_AsCharBuffer(from
, &from_s
, &from_len
))
2838 if (PyString_Check(to
)) {
2839 to_s
= PyString_AS_STRING(to
);
2840 to_len
= PyString_GET_SIZE(to
);
2842 #ifdef Py_USING_UNICODE
2843 else if (PyUnicode_Check(to
))
2844 return PyUnicode_Replace((PyObject
*)self
,
2847 else if (PyObject_AsCharBuffer(to
, &to_s
, &to_len
))
2850 return (PyObject
*)replace((PyStringObject
*) self
,
2852 to_s
, to_len
, count
);
2857 /* Matches the end (direction >= 0) or start (direction < 0) of self
2858 * against substr, using the start and end arguments. Returns
2859 * -1 on error, 0 if not found and 1 if found.
2862 _string_tailmatch(PyStringObject
*self
, PyObject
*substr
, Py_ssize_t start
,
2863 Py_ssize_t end
, int direction
)
2865 Py_ssize_t len
= PyString_GET_SIZE(self
);
2870 if (PyString_Check(substr
)) {
2871 sub
= PyString_AS_STRING(substr
);
2872 slen
= PyString_GET_SIZE(substr
);
2874 #ifdef Py_USING_UNICODE
2875 else if (PyUnicode_Check(substr
))
2876 return PyUnicode_Tailmatch((PyObject
*)self
,
2877 substr
, start
, end
, direction
);
2879 else if (PyObject_AsCharBuffer(substr
, &sub
, &slen
))
2881 str
= PyString_AS_STRING(self
);
2883 ADJUST_INDICES(start
, end
, len
);
2885 if (direction
< 0) {
2887 if (start
+slen
> len
)
2891 if (end
-start
< slen
|| start
> len
)
2894 if (end
-slen
> start
)
2897 if (end
-start
>= slen
)
2898 return ! memcmp(str
+start
, sub
, slen
);
2903 PyDoc_STRVAR(startswith__doc__
,
2904 "S.startswith(prefix[, start[, end]]) -> bool\n\
2906 Return True if S starts with the specified prefix, False otherwise.\n\
2907 With optional start, test S beginning at that position.\n\
2908 With optional end, stop comparing S at that position.\n\
2909 prefix can also be a tuple of strings to try.");
2912 string_startswith(PyStringObject
*self
, PyObject
*args
)
2914 Py_ssize_t start
= 0;
2915 Py_ssize_t end
= PY_SSIZE_T_MAX
;
2919 if (!PyArg_ParseTuple(args
, "O|O&O&:startswith", &subobj
,
2920 _PyEval_SliceIndex
, &start
, _PyEval_SliceIndex
, &end
))
2922 if (PyTuple_Check(subobj
)) {
2924 for (i
= 0; i
< PyTuple_GET_SIZE(subobj
); i
++) {
2925 result
= _string_tailmatch(self
,
2926 PyTuple_GET_ITEM(subobj
, i
),
2936 result
= _string_tailmatch(self
, subobj
, start
, end
, -1);
2940 return PyBool_FromLong(result
);
2944 PyDoc_STRVAR(endswith__doc__
,
2945 "S.endswith(suffix[, start[, end]]) -> bool\n\
2947 Return True if S ends with the specified suffix, False otherwise.\n\
2948 With optional start, test S beginning at that position.\n\
2949 With optional end, stop comparing S at that position.\n\
2950 suffix can also be a tuple of strings to try.");
2953 string_endswith(PyStringObject
*self
, PyObject
*args
)
2955 Py_ssize_t start
= 0;
2956 Py_ssize_t end
= PY_SSIZE_T_MAX
;
2960 if (!PyArg_ParseTuple(args
, "O|O&O&:endswith", &subobj
,
2961 _PyEval_SliceIndex
, &start
, _PyEval_SliceIndex
, &end
))
2963 if (PyTuple_Check(subobj
)) {
2965 for (i
= 0; i
< PyTuple_GET_SIZE(subobj
); i
++) {
2966 result
= _string_tailmatch(self
,
2967 PyTuple_GET_ITEM(subobj
, i
),
2977 result
= _string_tailmatch(self
, subobj
, start
, end
, +1);
2981 return PyBool_FromLong(result
);
2985 PyDoc_STRVAR(encode__doc__
,
2986 "S.encode([encoding[,errors]]) -> object\n\
2988 Encodes S using the codec registered for encoding. encoding defaults\n\
2989 to the default encoding. errors may be given to set a different error\n\
2990 handling scheme. Default is 'strict' meaning that encoding errors raise\n\
2991 a UnicodeEncodeError. Other possible values are 'ignore', 'replace' and\n\
2992 'xmlcharrefreplace' as well as any other name registered with\n\
2993 codecs.register_error that is able to handle UnicodeEncodeErrors.");
2996 string_encode(PyStringObject
*self
, PyObject
*args
, PyObject
*kwargs
)
2998 static char *kwlist
[] = {"encoding", "errors", 0};
2999 char *encoding
= NULL
;
3000 char *errors
= NULL
;
3003 if (!PyArg_ParseTupleAndKeywords(args
, kwargs
, "|ss:encode",
3004 kwlist
, &encoding
, &errors
))
3006 v
= PyString_AsEncodedObject((PyObject
*)self
, encoding
, errors
);
3009 if (!PyString_Check(v
) && !PyUnicode_Check(v
)) {
3010 PyErr_Format(PyExc_TypeError
,
3011 "encoder did not return a string/unicode object "
3013 Py_TYPE(v
)->tp_name
);
3024 PyDoc_STRVAR(decode__doc__
,
3025 "S.decode([encoding[,errors]]) -> object\n\
3027 Decodes S using the codec registered for encoding. encoding defaults\n\
3028 to the default encoding. errors may be given to set a different error\n\
3029 handling scheme. Default is 'strict' meaning that encoding errors raise\n\
3030 a UnicodeDecodeError. Other possible values are 'ignore' and 'replace'\n\
3031 as well as any other name registered with codecs.register_error that is\n\
3032 able to handle UnicodeDecodeErrors.");
3035 string_decode(PyStringObject
*self
, PyObject
*args
, PyObject
*kwargs
)
3037 static char *kwlist
[] = {"encoding", "errors", 0};
3038 char *encoding
= NULL
;
3039 char *errors
= NULL
;
3042 if (!PyArg_ParseTupleAndKeywords(args
, kwargs
, "|ss:decode",
3043 kwlist
, &encoding
, &errors
))
3045 v
= PyString_AsDecodedObject((PyObject
*)self
, encoding
, errors
);
3048 if (!PyString_Check(v
) && !PyUnicode_Check(v
)) {
3049 PyErr_Format(PyExc_TypeError
,
3050 "decoder did not return a string/unicode object "
3052 Py_TYPE(v
)->tp_name
);
3063 PyDoc_STRVAR(expandtabs__doc__
,
3064 "S.expandtabs([tabsize]) -> string\n\
3066 Return a copy of S where all tab characters are expanded using spaces.\n\
3067 If tabsize is not given, a tab size of 8 characters is assumed.");
3070 string_expandtabs(PyStringObject
*self
, PyObject
*args
)
3072 const char *e
, *p
, *qe
;
3074 Py_ssize_t i
, j
, incr
;
3078 if (!PyArg_ParseTuple(args
, "|i:expandtabs", &tabsize
))
3081 /* First pass: determine size of output string */
3082 i
= 0; /* chars up to and including most recent \n or \r */
3083 j
= 0; /* chars since most recent \n or \r (use in tab calculations) */
3084 e
= PyString_AS_STRING(self
) + PyString_GET_SIZE(self
); /* end of input */
3085 for (p
= PyString_AS_STRING(self
); p
< e
; p
++)
3088 incr
= tabsize
- (j
% tabsize
);
3089 if (j
> PY_SSIZE_T_MAX
- incr
)
3095 if (j
> PY_SSIZE_T_MAX
- 1)
3098 if (*p
== '\n' || *p
== '\r') {
3099 if (i
> PY_SSIZE_T_MAX
- j
)
3106 if (i
> PY_SSIZE_T_MAX
- j
)
3109 /* Second pass: create output string and fill it */
3110 u
= PyString_FromStringAndSize(NULL
, i
+ j
);
3114 j
= 0; /* same as in first pass */
3115 q
= PyString_AS_STRING(u
); /* next output char */
3116 qe
= PyString_AS_STRING(u
) + PyString_GET_SIZE(u
); /* end of output */
3118 for (p
= PyString_AS_STRING(self
); p
< e
; p
++)
3121 i
= tabsize
- (j
% tabsize
);
3135 if (*p
== '\n' || *p
== '\r')
3144 PyErr_SetString(PyExc_OverflowError
, "new string is too long");
3148 Py_LOCAL_INLINE(PyObject
*)
3149 pad(PyStringObject
*self
, Py_ssize_t left
, Py_ssize_t right
, char fill
)
3158 if (left
== 0 && right
== 0 && PyString_CheckExact(self
)) {
3160 return (PyObject
*)self
;
3163 u
= PyString_FromStringAndSize(NULL
,
3164 left
+ PyString_GET_SIZE(self
) + right
);
3167 memset(PyString_AS_STRING(u
), fill
, left
);
3168 Py_MEMCPY(PyString_AS_STRING(u
) + left
,
3169 PyString_AS_STRING(self
),
3170 PyString_GET_SIZE(self
));
3172 memset(PyString_AS_STRING(u
) + left
+ PyString_GET_SIZE(self
),
3179 PyDoc_STRVAR(ljust__doc__
,
3180 "S.ljust(width[, fillchar]) -> string\n"
3182 "Return S left-justified in a string of length width. Padding is\n"
3183 "done using the specified fill character (default is a space).");
3186 string_ljust(PyStringObject
*self
, PyObject
*args
)
3189 char fillchar
= ' ';
3191 if (!PyArg_ParseTuple(args
, "n|c:ljust", &width
, &fillchar
))
3194 if (PyString_GET_SIZE(self
) >= width
&& PyString_CheckExact(self
)) {
3196 return (PyObject
*) self
;
3199 return pad(self
, 0, width
- PyString_GET_SIZE(self
), fillchar
);
3203 PyDoc_STRVAR(rjust__doc__
,
3204 "S.rjust(width[, fillchar]) -> string\n"
3206 "Return S right-justified in a string of length width. Padding is\n"
3207 "done using the specified fill character (default is a space)");
3210 string_rjust(PyStringObject
*self
, PyObject
*args
)
3213 char fillchar
= ' ';
3215 if (!PyArg_ParseTuple(args
, "n|c:rjust", &width
, &fillchar
))
3218 if (PyString_GET_SIZE(self
) >= width
&& PyString_CheckExact(self
)) {
3220 return (PyObject
*) self
;
3223 return pad(self
, width
- PyString_GET_SIZE(self
), 0, fillchar
);
3227 PyDoc_STRVAR(center__doc__
,
3228 "S.center(width[, fillchar]) -> string\n"
3230 "Return S centered in a string of length width. Padding is\n"
3231 "done using the specified fill character (default is a space)");
3234 string_center(PyStringObject
*self
, PyObject
*args
)
3236 Py_ssize_t marg
, left
;
3238 char fillchar
= ' ';
3240 if (!PyArg_ParseTuple(args
, "n|c:center", &width
, &fillchar
))
3243 if (PyString_GET_SIZE(self
) >= width
&& PyString_CheckExact(self
)) {
3245 return (PyObject
*) self
;
3248 marg
= width
- PyString_GET_SIZE(self
);
3249 left
= marg
/ 2 + (marg
& width
& 1);
3251 return pad(self
, left
, marg
- left
, fillchar
);
3254 PyDoc_STRVAR(zfill__doc__
,
3255 "S.zfill(width) -> string\n"
3257 "Pad a numeric string S with zeros on the left, to fill a field\n"
3258 "of the specified width. The string S is never truncated.");
3261 string_zfill(PyStringObject
*self
, PyObject
*args
)
3268 if (!PyArg_ParseTuple(args
, "n:zfill", &width
))
3271 if (PyString_GET_SIZE(self
) >= width
) {
3272 if (PyString_CheckExact(self
)) {
3274 return (PyObject
*) self
;
3277 return PyString_FromStringAndSize(
3278 PyString_AS_STRING(self
),
3279 PyString_GET_SIZE(self
)
3283 fill
= width
- PyString_GET_SIZE(self
);
3285 s
= pad(self
, fill
, 0, '0');
3290 p
= PyString_AS_STRING(s
);
3291 if (p
[fill
] == '+' || p
[fill
] == '-') {
3292 /* move sign to beginning of string */
3297 return (PyObject
*) s
;
3300 PyDoc_STRVAR(isspace__doc__
,
3301 "S.isspace() -> bool\n\
3303 Return True if all characters in S are whitespace\n\
3304 and there is at least one character in S, False otherwise.");
3307 string_isspace(PyStringObject
*self
)
3309 register const unsigned char *p
3310 = (unsigned char *) PyString_AS_STRING(self
);
3311 register const unsigned char *e
;
3313 /* Shortcut for single character strings */
3314 if (PyString_GET_SIZE(self
) == 1 &&
3316 return PyBool_FromLong(1);
3318 /* Special case for empty strings */
3319 if (PyString_GET_SIZE(self
) == 0)
3320 return PyBool_FromLong(0);
3322 e
= p
+ PyString_GET_SIZE(self
);
3323 for (; p
< e
; p
++) {
3325 return PyBool_FromLong(0);
3327 return PyBool_FromLong(1);
3331 PyDoc_STRVAR(isalpha__doc__
,
3332 "S.isalpha() -> bool\n\
3334 Return True if all characters in S are alphabetic\n\
3335 and there is at least one character in S, False otherwise.");
3338 string_isalpha(PyStringObject
*self
)
3340 register const unsigned char *p
3341 = (unsigned char *) PyString_AS_STRING(self
);
3342 register const unsigned char *e
;
3344 /* Shortcut for single character strings */
3345 if (PyString_GET_SIZE(self
) == 1 &&
3347 return PyBool_FromLong(1);
3349 /* Special case for empty strings */
3350 if (PyString_GET_SIZE(self
) == 0)
3351 return PyBool_FromLong(0);
3353 e
= p
+ PyString_GET_SIZE(self
);
3354 for (; p
< e
; p
++) {
3356 return PyBool_FromLong(0);
3358 return PyBool_FromLong(1);
3362 PyDoc_STRVAR(isalnum__doc__
,
3363 "S.isalnum() -> bool\n\
3365 Return True if all characters in S are alphanumeric\n\
3366 and there is at least one character in S, False otherwise.");
3369 string_isalnum(PyStringObject
*self
)
3371 register const unsigned char *p
3372 = (unsigned char *) PyString_AS_STRING(self
);
3373 register const unsigned char *e
;
3375 /* Shortcut for single character strings */
3376 if (PyString_GET_SIZE(self
) == 1 &&
3378 return PyBool_FromLong(1);
3380 /* Special case for empty strings */
3381 if (PyString_GET_SIZE(self
) == 0)
3382 return PyBool_FromLong(0);
3384 e
= p
+ PyString_GET_SIZE(self
);
3385 for (; p
< e
; p
++) {
3387 return PyBool_FromLong(0);
3389 return PyBool_FromLong(1);
3393 PyDoc_STRVAR(isdigit__doc__
,
3394 "S.isdigit() -> bool\n\
3396 Return True if all characters in S are digits\n\
3397 and there is at least one character in S, False otherwise.");
3400 string_isdigit(PyStringObject
*self
)
3402 register const unsigned char *p
3403 = (unsigned char *) PyString_AS_STRING(self
);
3404 register const unsigned char *e
;
3406 /* Shortcut for single character strings */
3407 if (PyString_GET_SIZE(self
) == 1 &&
3409 return PyBool_FromLong(1);
3411 /* Special case for empty strings */
3412 if (PyString_GET_SIZE(self
) == 0)
3413 return PyBool_FromLong(0);
3415 e
= p
+ PyString_GET_SIZE(self
);
3416 for (; p
< e
; p
++) {
3418 return PyBool_FromLong(0);
3420 return PyBool_FromLong(1);
3424 PyDoc_STRVAR(islower__doc__
,
3425 "S.islower() -> bool\n\
3427 Return True if all cased characters in S are lowercase and there is\n\
3428 at least one cased character in S, False otherwise.");
3431 string_islower(PyStringObject
*self
)
3433 register const unsigned char *p
3434 = (unsigned char *) PyString_AS_STRING(self
);
3435 register const unsigned char *e
;
3438 /* Shortcut for single character strings */
3439 if (PyString_GET_SIZE(self
) == 1)
3440 return PyBool_FromLong(islower(*p
) != 0);
3442 /* Special case for empty strings */
3443 if (PyString_GET_SIZE(self
) == 0)
3444 return PyBool_FromLong(0);
3446 e
= p
+ PyString_GET_SIZE(self
);
3448 for (; p
< e
; p
++) {
3450 return PyBool_FromLong(0);
3451 else if (!cased
&& islower(*p
))
3454 return PyBool_FromLong(cased
);
3458 PyDoc_STRVAR(isupper__doc__
,
3459 "S.isupper() -> bool\n\
3461 Return True if all cased characters in S are uppercase and there is\n\
3462 at least one cased character in S, False otherwise.");
3465 string_isupper(PyStringObject
*self
)
3467 register const unsigned char *p
3468 = (unsigned char *) PyString_AS_STRING(self
);
3469 register const unsigned char *e
;
3472 /* Shortcut for single character strings */
3473 if (PyString_GET_SIZE(self
) == 1)
3474 return PyBool_FromLong(isupper(*p
) != 0);
3476 /* Special case for empty strings */
3477 if (PyString_GET_SIZE(self
) == 0)
3478 return PyBool_FromLong(0);
3480 e
= p
+ PyString_GET_SIZE(self
);
3482 for (; p
< e
; p
++) {
3484 return PyBool_FromLong(0);
3485 else if (!cased
&& isupper(*p
))
3488 return PyBool_FromLong(cased
);
3492 PyDoc_STRVAR(istitle__doc__
,
3493 "S.istitle() -> bool\n\
3495 Return True if S is a titlecased string and there is at least one\n\
3496 character in S, i.e. uppercase characters may only follow uncased\n\
3497 characters and lowercase characters only cased ones. Return False\n\
3501 string_istitle(PyStringObject
*self
, PyObject
*uncased
)
3503 register const unsigned char *p
3504 = (unsigned char *) PyString_AS_STRING(self
);
3505 register const unsigned char *e
;
3506 int cased
, previous_is_cased
;
3508 /* Shortcut for single character strings */
3509 if (PyString_GET_SIZE(self
) == 1)
3510 return PyBool_FromLong(isupper(*p
) != 0);
3512 /* Special case for empty strings */
3513 if (PyString_GET_SIZE(self
) == 0)
3514 return PyBool_FromLong(0);
3516 e
= p
+ PyString_GET_SIZE(self
);
3518 previous_is_cased
= 0;
3519 for (; p
< e
; p
++) {
3520 register const unsigned char ch
= *p
;
3523 if (previous_is_cased
)
3524 return PyBool_FromLong(0);
3525 previous_is_cased
= 1;
3528 else if (islower(ch
)) {
3529 if (!previous_is_cased
)
3530 return PyBool_FromLong(0);
3531 previous_is_cased
= 1;
3535 previous_is_cased
= 0;
3537 return PyBool_FromLong(cased
);
3541 PyDoc_STRVAR(splitlines__doc__
,
3542 "S.splitlines([keepends]) -> list of strings\n\
3544 Return a list of the lines in S, breaking at line boundaries.\n\
3545 Line breaks are not included in the resulting list unless keepends\n\
3546 is given and true.");
3549 string_splitlines(PyStringObject
*self
, PyObject
*args
)
3553 if (!PyArg_ParseTuple(args
, "|i:splitlines", &keepends
))
3556 return stringlib_splitlines(
3557 (PyObject
*) self
, PyString_AS_STRING(self
), PyString_GET_SIZE(self
),
3562 PyDoc_STRVAR(sizeof__doc__
,
3563 "S.__sizeof__() -> size of S in memory, in bytes");
3566 string_sizeof(PyStringObject
*v
)
3569 res
= PyStringObject_SIZE
+ PyString_GET_SIZE(v
) * Py_TYPE(v
)->tp_itemsize
;
3570 return PyInt_FromSsize_t(res
);
3574 string_getnewargs(PyStringObject
*v
)
3576 return Py_BuildValue("(s#)", v
->ob_sval
, Py_SIZE(v
));
3580 #include "stringlib/string_format.h"
3582 PyDoc_STRVAR(format__doc__
,
3583 "S.format(*args, **kwargs) -> unicode\n\
3588 string__format__(PyObject
* self
, PyObject
* args
)
3590 PyObject
*format_spec
;
3591 PyObject
*result
= NULL
;
3592 PyObject
*tmp
= NULL
;
3594 /* If 2.x, convert format_spec to the same type as value */
3595 /* This is to allow things like u''.format('') */
3596 if (!PyArg_ParseTuple(args
, "O:__format__", &format_spec
))
3598 if (!(PyString_Check(format_spec
) || PyUnicode_Check(format_spec
))) {
3599 PyErr_Format(PyExc_TypeError
, "__format__ arg must be str "
3600 "or unicode, not %s", Py_TYPE(format_spec
)->tp_name
);
3603 tmp
= PyObject_Str(format_spec
);
3608 result
= _PyBytes_FormatAdvanced(self
,
3609 PyString_AS_STRING(format_spec
),
3610 PyString_GET_SIZE(format_spec
));
3616 PyDoc_STRVAR(p_format__doc__
,
3617 "S.__format__(format_spec) -> unicode\n\
3623 string_methods
[] = {
3624 /* Counterparts of the obsolete stropmodule functions; except
3625 string.maketrans(). */
3626 {"join", (PyCFunction
)string_join
, METH_O
, join__doc__
},
3627 {"split", (PyCFunction
)string_split
, METH_VARARGS
, split__doc__
},
3628 {"rsplit", (PyCFunction
)string_rsplit
, METH_VARARGS
, rsplit__doc__
},
3629 {"lower", (PyCFunction
)string_lower
, METH_NOARGS
, lower__doc__
},
3630 {"upper", (PyCFunction
)string_upper
, METH_NOARGS
, upper__doc__
},
3631 {"islower", (PyCFunction
)string_islower
, METH_NOARGS
, islower__doc__
},
3632 {"isupper", (PyCFunction
)string_isupper
, METH_NOARGS
, isupper__doc__
},
3633 {"isspace", (PyCFunction
)string_isspace
, METH_NOARGS
, isspace__doc__
},
3634 {"isdigit", (PyCFunction
)string_isdigit
, METH_NOARGS
, isdigit__doc__
},
3635 {"istitle", (PyCFunction
)string_istitle
, METH_NOARGS
, istitle__doc__
},
3636 {"isalpha", (PyCFunction
)string_isalpha
, METH_NOARGS
, isalpha__doc__
},
3637 {"isalnum", (PyCFunction
)string_isalnum
, METH_NOARGS
, isalnum__doc__
},
3638 {"capitalize", (PyCFunction
)string_capitalize
, METH_NOARGS
,
3640 {"count", (PyCFunction
)string_count
, METH_VARARGS
, count__doc__
},
3641 {"endswith", (PyCFunction
)string_endswith
, METH_VARARGS
,
3643 {"partition", (PyCFunction
)string_partition
, METH_O
, partition__doc__
},
3644 {"find", (PyCFunction
)string_find
, METH_VARARGS
, find__doc__
},
3645 {"index", (PyCFunction
)string_index
, METH_VARARGS
, index__doc__
},
3646 {"lstrip", (PyCFunction
)string_lstrip
, METH_VARARGS
, lstrip__doc__
},
3647 {"replace", (PyCFunction
)string_replace
, METH_VARARGS
, replace__doc__
},
3648 {"rfind", (PyCFunction
)string_rfind
, METH_VARARGS
, rfind__doc__
},
3649 {"rindex", (PyCFunction
)string_rindex
, METH_VARARGS
, rindex__doc__
},
3650 {"rstrip", (PyCFunction
)string_rstrip
, METH_VARARGS
, rstrip__doc__
},
3651 {"rpartition", (PyCFunction
)string_rpartition
, METH_O
,
3653 {"startswith", (PyCFunction
)string_startswith
, METH_VARARGS
,
3655 {"strip", (PyCFunction
)string_strip
, METH_VARARGS
, strip__doc__
},
3656 {"swapcase", (PyCFunction
)string_swapcase
, METH_NOARGS
,
3658 {"translate", (PyCFunction
)string_translate
, METH_VARARGS
,
3660 {"title", (PyCFunction
)string_title
, METH_NOARGS
, title__doc__
},
3661 {"ljust", (PyCFunction
)string_ljust
, METH_VARARGS
, ljust__doc__
},
3662 {"rjust", (PyCFunction
)string_rjust
, METH_VARARGS
, rjust__doc__
},
3663 {"center", (PyCFunction
)string_center
, METH_VARARGS
, center__doc__
},
3664 {"zfill", (PyCFunction
)string_zfill
, METH_VARARGS
, zfill__doc__
},
3665 {"format", (PyCFunction
) do_string_format
, METH_VARARGS
| METH_KEYWORDS
, format__doc__
},
3666 {"__format__", (PyCFunction
) string__format__
, METH_VARARGS
, p_format__doc__
},
3667 {"_formatter_field_name_split", (PyCFunction
) formatter_field_name_split
, METH_NOARGS
},
3668 {"_formatter_parser", (PyCFunction
) formatter_parser
, METH_NOARGS
},
3669 {"encode", (PyCFunction
)string_encode
, METH_VARARGS
| METH_KEYWORDS
, encode__doc__
},
3670 {"decode", (PyCFunction
)string_decode
, METH_VARARGS
| METH_KEYWORDS
, decode__doc__
},
3671 {"expandtabs", (PyCFunction
)string_expandtabs
, METH_VARARGS
,
3673 {"splitlines", (PyCFunction
)string_splitlines
, METH_VARARGS
,
3675 {"__sizeof__", (PyCFunction
)string_sizeof
, METH_NOARGS
,
3677 {"__getnewargs__", (PyCFunction
)string_getnewargs
, METH_NOARGS
},
3678 {NULL
, NULL
} /* sentinel */
3682 str_subtype_new(PyTypeObject
*type
, PyObject
*args
, PyObject
*kwds
);
3685 string_new(PyTypeObject
*type
, PyObject
*args
, PyObject
*kwds
)
3688 static char *kwlist
[] = {"object", 0};
3690 if (type
!= &PyString_Type
)
3691 return str_subtype_new(type
, args
, kwds
);
3692 if (!PyArg_ParseTupleAndKeywords(args
, kwds
, "|O:str", kwlist
, &x
))
3695 return PyString_FromString("");
3696 return PyObject_Str(x
);
3700 str_subtype_new(PyTypeObject
*type
, PyObject
*args
, PyObject
*kwds
)
3702 PyObject
*tmp
, *pnew
;
3705 assert(PyType_IsSubtype(type
, &PyString_Type
));
3706 tmp
= string_new(&PyString_Type
, args
, kwds
);
3709 assert(PyString_CheckExact(tmp
));
3710 n
= PyString_GET_SIZE(tmp
);
3711 pnew
= type
->tp_alloc(type
, n
);
3713 Py_MEMCPY(PyString_AS_STRING(pnew
), PyString_AS_STRING(tmp
), n
+1);
3714 ((PyStringObject
*)pnew
)->ob_shash
=
3715 ((PyStringObject
*)tmp
)->ob_shash
;
3716 ((PyStringObject
*)pnew
)->ob_sstate
= SSTATE_NOT_INTERNED
;
3723 basestring_new(PyTypeObject
*type
, PyObject
*args
, PyObject
*kwds
)
3725 PyErr_SetString(PyExc_TypeError
,
3726 "The basestring type cannot be instantiated");
3731 string_mod(PyObject
*v
, PyObject
*w
)
3733 if (!PyString_Check(v
)) {
3734 Py_INCREF(Py_NotImplemented
);
3735 return Py_NotImplemented
;
3737 return PyString_Format(v
, w
);
3740 PyDoc_STRVAR(basestring_doc
,
3741 "Type basestring cannot be instantiated; it is the base for str and unicode.");
3743 static PyNumberMethods string_as_number
= {
3748 string_mod
, /*nb_remainder*/
3752 PyTypeObject PyBaseString_Type
= {
3753 PyVarObject_HEAD_INIT(&PyType_Type
, 0)
3763 0, /* tp_as_number */
3764 0, /* tp_as_sequence */
3765 0, /* tp_as_mapping */
3769 0, /* tp_getattro */
3770 0, /* tp_setattro */
3771 0, /* tp_as_buffer */
3772 Py_TPFLAGS_DEFAULT
| Py_TPFLAGS_BASETYPE
, /* tp_flags */
3773 basestring_doc
, /* tp_doc */
3774 0, /* tp_traverse */
3776 0, /* tp_richcompare */
3777 0, /* tp_weaklistoffset */
3779 0, /* tp_iternext */
3783 &PyBaseObject_Type
, /* tp_base */
3785 0, /* tp_descr_get */
3786 0, /* tp_descr_set */
3787 0, /* tp_dictoffset */
3790 basestring_new
, /* tp_new */
3794 PyDoc_STRVAR(string_doc
,
3795 "str(object) -> string\n\
3797 Return a nice string representation of the object.\n\
3798 If the argument is a string, the return value is the same object.");
3800 PyTypeObject PyString_Type
= {
3801 PyVarObject_HEAD_INIT(&PyType_Type
, 0)
3803 PyStringObject_SIZE
,
3805 string_dealloc
, /* tp_dealloc */
3806 (printfunc
)string_print
, /* tp_print */
3810 string_repr
, /* tp_repr */
3811 &string_as_number
, /* tp_as_number */
3812 &string_as_sequence
, /* tp_as_sequence */
3813 &string_as_mapping
, /* tp_as_mapping */
3814 (hashfunc
)string_hash
, /* tp_hash */
3816 string_str
, /* tp_str */
3817 PyObject_GenericGetAttr
, /* tp_getattro */
3818 0, /* tp_setattro */
3819 &string_as_buffer
, /* tp_as_buffer */
3820 Py_TPFLAGS_DEFAULT
| Py_TPFLAGS_CHECKTYPES
|
3821 Py_TPFLAGS_BASETYPE
| Py_TPFLAGS_STRING_SUBCLASS
|
3822 Py_TPFLAGS_HAVE_NEWBUFFER
, /* tp_flags */
3823 string_doc
, /* tp_doc */
3824 0, /* tp_traverse */
3826 (richcmpfunc
)string_richcompare
, /* tp_richcompare */
3827 0, /* tp_weaklistoffset */
3829 0, /* tp_iternext */
3830 string_methods
, /* tp_methods */
3833 &PyBaseString_Type
, /* tp_base */
3835 0, /* tp_descr_get */
3836 0, /* tp_descr_set */
3837 0, /* tp_dictoffset */
3840 string_new
, /* tp_new */
3841 PyObject_Del
, /* tp_free */
3845 PyString_Concat(register PyObject
**pv
, register PyObject
*w
)
3847 register PyObject
*v
;
3850 if (w
== NULL
|| !PyString_Check(*pv
)) {
3855 v
= string_concat((PyStringObject
*) *pv
, w
);
3861 PyString_ConcatAndDel(register PyObject
**pv
, register PyObject
*w
)
3863 PyString_Concat(pv
, w
);
3868 /* The following function breaks the notion that strings are immutable:
3869 it changes the size of a string. We get away with this only if there
3870 is only one module referencing the object. You can also think of it
3871 as creating a new string object and destroying the old one, only
3872 more efficiently. In any case, don't use this if the string may
3873 already be known to some other part of the code...
3874 Note that if there's not enough memory to resize the string, the original
3875 string object at *pv is deallocated, *pv is set to NULL, an "out of
3876 memory" exception is set, and -1 is returned. Else (on success) 0 is
3877 returned, and the value in *pv may or may not be the same as on input.
3878 As always, an extra byte is allocated for a trailing \0 byte (newsize
3879 does *not* include that), and a trailing \0 byte is stored.
3883 _PyString_Resize(PyObject
**pv
, Py_ssize_t newsize
)
3885 register PyObject
*v
;
3886 register PyStringObject
*sv
;
3888 if (!PyString_Check(v
) || Py_REFCNT(v
) != 1 || newsize
< 0 ||
3889 PyString_CHECK_INTERNED(v
)) {
3892 PyErr_BadInternalCall();
3895 /* XXX UNREF/NEWREF interface should be more symmetrical */
3897 _Py_ForgetReference(v
);
3899 PyObject_REALLOC((char *)v
, PyStringObject_SIZE
+ newsize
);
3905 _Py_NewReference(*pv
);
3906 sv
= (PyStringObject
*) *pv
;
3907 Py_SIZE(sv
) = newsize
;
3908 sv
->ob_sval
[newsize
] = '\0';
3909 sv
->ob_shash
= -1; /* invalidate cached hash value */
3913 /* Helpers for formatstring */
3915 Py_LOCAL_INLINE(PyObject
*)
3916 getnextarg(PyObject
*args
, Py_ssize_t arglen
, Py_ssize_t
*p_argidx
)
3918 Py_ssize_t argidx
= *p_argidx
;
3919 if (argidx
< arglen
) {
3924 return PyTuple_GetItem(args
, argidx
);
3926 PyErr_SetString(PyExc_TypeError
,
3927 "not enough arguments for format string");
3938 #define F_LJUST (1<<0)
3939 #define F_SIGN (1<<1)
3940 #define F_BLANK (1<<2)
3941 #define F_ALT (1<<3)
3942 #define F_ZERO (1<<4)
3944 /* Returns a new reference to a PyString object, or NULL on failure. */
3947 formatfloat(PyObject
*v
, int flags
, int prec
, int type
)
3953 x
= PyFloat_AsDouble(v
);
3954 if (x
== -1.0 && PyErr_Occurred()) {
3955 PyErr_Format(PyExc_TypeError
, "float argument required, "
3956 "not %.200s", Py_TYPE(v
)->tp_name
);
3963 p
= PyOS_double_to_string(x
, type
, prec
,
3964 (flags
& F_ALT
) ? Py_DTSF_ALT
: 0, NULL
);
3968 result
= PyString_FromStringAndSize(p
, strlen(p
));
3973 /* _PyString_FormatLong emulates the format codes d, u, o, x and X, and
3974 * the F_ALT flag, for Python's long (unbounded) ints. It's not used for
3975 * Python's regular ints.
3976 * Return value: a new PyString*, or NULL if error.
3977 * . *pbuf is set to point into it,
3978 * *plen set to the # of chars following that.
3979 * Caller must decref it when done using pbuf.
3980 * The string starting at *pbuf is of the form
3981 * "-"? ("0x" | "0X")? digit+
3982 * "0x"/"0X" are present only for x and X conversions, with F_ALT
3983 * set in flags. The case of hex digits will be correct,
3984 * There will be at least prec digits, zero-filled on the left if
3985 * necessary to get that many.
3986 * val object to be converted
3987 * flags bitmask of format flags; only F_ALT is looked at
3988 * prec minimum number of digits; 0-fill on left if needed
3989 * type a character in [duoxX]; u acts the same as d
3991 * CAUTION: o, x and X conversions on regular ints can never
3992 * produce a '-' sign, but can for Python's unbounded ints.
3995 _PyString_FormatLong(PyObject
*val
, int flags
, int prec
, int type
,
3996 char **pbuf
, int *plen
)
3998 PyObject
*result
= NULL
;
4001 int sign
; /* 1 if '-', else 0 */
4002 int len
; /* number of characters */
4004 int numdigits
; /* len == numnondigits + numdigits */
4005 int numnondigits
= 0;
4010 result
= Py_TYPE(val
)->tp_str(val
);
4013 result
= Py_TYPE(val
)->tp_as_number
->nb_oct(val
);
4018 result
= Py_TYPE(val
)->tp_as_number
->nb_hex(val
);
4021 assert(!"'type' not in [duoxX]");
4026 buf
= PyString_AsString(result
);
4032 /* To modify the string in-place, there can only be one reference. */
4033 if (Py_REFCNT(result
) != 1) {
4034 PyErr_BadInternalCall();
4037 llen
= PyString_Size(result
);
4038 if (llen
> INT_MAX
) {
4039 PyErr_SetString(PyExc_ValueError
, "string too large in _PyString_FormatLong");
4043 if (buf
[len
-1] == 'L') {
4047 sign
= buf
[0] == '-';
4048 numnondigits
+= sign
;
4049 numdigits
= len
- numnondigits
;
4050 assert(numdigits
> 0);
4052 /* Get rid of base marker unless F_ALT */
4053 if ((flags
& F_ALT
) == 0) {
4054 /* Need to skip 0x, 0X or 0. */
4058 assert(buf
[sign
] == '0');
4059 /* If 0 is only digit, leave it alone. */
4060 if (numdigits
> 1) {
4067 assert(buf
[sign
] == '0');
4068 assert(buf
[sign
+ 1] == 'x');
4079 assert(len
== numnondigits
+ numdigits
);
4080 assert(numdigits
> 0);
4083 /* Fill with leading zeroes to meet minimum width. */
4084 if (prec
> numdigits
) {
4085 PyObject
*r1
= PyString_FromStringAndSize(NULL
,
4086 numnondigits
+ prec
);
4092 b1
= PyString_AS_STRING(r1
);
4093 for (i
= 0; i
< numnondigits
; ++i
)
4095 for (i
= 0; i
< prec
- numdigits
; i
++)
4097 for (i
= 0; i
< numdigits
; i
++)
4102 buf
= PyString_AS_STRING(result
);
4103 len
= numnondigits
+ prec
;
4106 /* Fix up case for hex conversions. */
4108 /* Need to convert all lower case letters to upper case.
4109 and need to convert 0x to 0X (and -0x to -0X). */
4110 for (i
= 0; i
< len
; i
++)
4111 if (buf
[i
] >= 'a' && buf
[i
] <= 'x')
4119 Py_LOCAL_INLINE(int)
4120 formatint(char *buf
, size_t buflen
, int flags
,
4121 int prec
, int type
, PyObject
*v
)
4123 /* fmt = '%#.' + `prec` + 'l' + `type`
4124 worst case length = 3 + 19 (worst len of INT_MAX on 64-bit machine)
4126 char fmt
[64]; /* plenty big enough! */
4130 x
= PyInt_AsLong(v
);
4131 if (x
== -1 && PyErr_Occurred()) {
4132 PyErr_Format(PyExc_TypeError
, "int argument required, not %.200s",
4133 Py_TYPE(v
)->tp_name
);
4136 if (x
< 0 && type
== 'u') {
4139 if (x
< 0 && (type
== 'x' || type
== 'X' || type
== 'o'))
4146 if ((flags
& F_ALT
) &&
4147 (type
== 'x' || type
== 'X')) {
4148 /* When converting under %#x or %#X, there are a number
4149 * of issues that cause pain:
4150 * - when 0 is being converted, the C standard leaves off
4151 * the '0x' or '0X', which is inconsistent with other
4152 * %#x/%#X conversions and inconsistent with Python's
4154 * - there are platforms that violate the standard and
4155 * convert 0 with the '0x' or '0X'
4156 * (Metrowerks, Compaq Tru64)
4157 * - there are platforms that give '0x' when converting
4158 * under %#X, but convert 0 in accordance with the
4159 * standard (OS/2 EMX)
4161 * We can achieve the desired consistency by inserting our
4162 * own '0x' or '0X' prefix, and substituting %x/%X in place
4165 * Note that this is the same approach as used in
4166 * formatint() in unicodeobject.c
4168 PyOS_snprintf(fmt
, sizeof(fmt
), "%s0%c%%.%dl%c",
4169 sign
, type
, prec
, type
);
4172 PyOS_snprintf(fmt
, sizeof(fmt
), "%s%%%s.%dl%c",
4173 sign
, (flags
&F_ALT
) ? "#" : "",
4177 /* buf = '+'/'-'/'' + '0'/'0x'/'' + '[0-9]'*max(prec, len(x in octal))
4178 * worst case buf = '-0x' + [0-9]*prec, where prec >= 11
4180 if (buflen
<= 14 || buflen
<= (size_t)3 + (size_t)prec
) {
4181 PyErr_SetString(PyExc_OverflowError
,
4182 "formatted integer is too long (precision too large?)");
4186 PyOS_snprintf(buf
, buflen
, fmt
, -x
);
4188 PyOS_snprintf(buf
, buflen
, fmt
, x
);
4189 return (int)strlen(buf
);
4192 Py_LOCAL_INLINE(int)
4193 formatchar(char *buf
, size_t buflen
, PyObject
*v
)
4195 /* presume that the buffer is at least 2 characters long */
4196 if (PyString_Check(v
)) {
4197 if (!PyArg_Parse(v
, "c;%c requires int or char", &buf
[0]))
4201 if (!PyArg_Parse(v
, "b;%c requires int or char", &buf
[0]))
4208 /* fmt%(v1,v2,...) is roughly equivalent to sprintf(fmt, v1, v2, ...)
4210 FORMATBUFLEN is the length of the buffer in which the ints &
4211 chars are formatted. XXX This is a magic number. Each formatting
4212 routine does bounds checking to ensure no overflow, but a better
4213 solution may be to malloc a buffer of appropriate size for each
4214 format. For now, the current solution is sufficient.
4216 #define FORMATBUFLEN (size_t)120
4219 PyString_Format(PyObject
*format
, PyObject
*args
)
4222 Py_ssize_t arglen
, argidx
;
4223 Py_ssize_t reslen
, rescnt
, fmtcnt
;
4225 PyObject
*result
, *orig_args
;
4226 #ifdef Py_USING_UNICODE
4229 PyObject
*dict
= NULL
;
4230 if (format
== NULL
|| !PyString_Check(format
) || args
== NULL
) {
4231 PyErr_BadInternalCall();
4235 fmt
= PyString_AS_STRING(format
);
4236 fmtcnt
= PyString_GET_SIZE(format
);
4237 reslen
= rescnt
= fmtcnt
+ 100;
4238 result
= PyString_FromStringAndSize((char *)NULL
, reslen
);
4241 res
= PyString_AsString(result
);
4242 if (PyTuple_Check(args
)) {
4243 arglen
= PyTuple_GET_SIZE(args
);
4250 if (Py_TYPE(args
)->tp_as_mapping
&& !PyTuple_Check(args
) &&
4251 !PyObject_TypeCheck(args
, &PyBaseString_Type
))
4253 while (--fmtcnt
>= 0) {
4256 rescnt
= fmtcnt
+ 100;
4258 if (_PyString_Resize(&result
, reslen
))
4260 res
= PyString_AS_STRING(result
)
4267 /* Got a format specifier */
4269 Py_ssize_t width
= -1;
4275 PyObject
*temp
= NULL
;
4279 char formatbuf
[FORMATBUFLEN
];
4280 /* For format{int,char}() */
4281 #ifdef Py_USING_UNICODE
4282 char *fmt_start
= fmt
;
4283 Py_ssize_t argidx_start
= argidx
;
4294 PyErr_SetString(PyExc_TypeError
,
4295 "format requires a mapping");
4301 /* Skip over balanced parentheses */
4302 while (pcount
> 0 && --fmtcnt
>= 0) {
4305 else if (*fmt
== '(')
4309 keylen
= fmt
- keystart
- 1;
4310 if (fmtcnt
< 0 || pcount
> 0) {
4311 PyErr_SetString(PyExc_ValueError
,
4312 "incomplete format key");
4315 key
= PyString_FromStringAndSize(keystart
,
4323 args
= PyObject_GetItem(dict
, key
);
4332 while (--fmtcnt
>= 0) {
4333 switch (c
= *fmt
++) {
4334 case '-': flags
|= F_LJUST
; continue;
4335 case '+': flags
|= F_SIGN
; continue;
4336 case ' ': flags
|= F_BLANK
; continue;
4337 case '#': flags
|= F_ALT
; continue;
4338 case '0': flags
|= F_ZERO
; continue;
4343 v
= getnextarg(args
, arglen
, &argidx
);
4346 if (!PyInt_Check(v
)) {
4347 PyErr_SetString(PyExc_TypeError
,
4351 width
= PyInt_AsLong(v
);
4359 else if (c
>= 0 && isdigit(c
)) {
4361 while (--fmtcnt
>= 0) {
4362 c
= Py_CHARMASK(*fmt
++);
4365 if ((width
*10) / 10 != width
) {
4371 width
= width
*10 + (c
- '0');
4379 v
= getnextarg(args
, arglen
, &argidx
);
4382 if (!PyInt_Check(v
)) {
4388 prec
= PyInt_AsLong(v
);
4394 else if (c
>= 0 && isdigit(c
)) {
4396 while (--fmtcnt
>= 0) {
4397 c
= Py_CHARMASK(*fmt
++);
4400 if ((prec
*10) / 10 != prec
) {
4406 prec
= prec
*10 + (c
- '0');
4411 if (c
== 'h' || c
== 'l' || c
== 'L') {
4417 PyErr_SetString(PyExc_ValueError
,
4418 "incomplete format");
4422 v
= getnextarg(args
, arglen
, &argidx
);
4434 #ifdef Py_USING_UNICODE
4435 if (PyUnicode_Check(v
)) {
4437 argidx
= argidx_start
;
4441 temp
= _PyObject_Str(v
);
4442 #ifdef Py_USING_UNICODE
4443 if (temp
!= NULL
&& PyUnicode_Check(temp
)) {
4446 argidx
= argidx_start
;
4453 temp
= PyObject_Repr(v
);
4456 if (!PyString_Check(temp
)) {
4457 PyErr_SetString(PyExc_TypeError
,
4458 "%s argument has non-string str()");
4462 pbuf
= PyString_AS_STRING(temp
);
4463 len
= PyString_GET_SIZE(temp
);
4464 if (prec
>= 0 && len
> prec
)
4476 if (PyNumber_Check(v
)) {
4477 PyObject
*iobj
=NULL
;
4479 if (PyInt_Check(v
) || (PyLong_Check(v
))) {
4484 iobj
= PyNumber_Int(v
);
4485 if (iobj
==NULL
) iobj
= PyNumber_Long(v
);
4488 if (PyInt_Check(iobj
)) {
4491 len
= formatint(pbuf
,
4493 flags
, prec
, c
, iobj
);
4499 else if (PyLong_Check(iobj
)) {
4503 temp
= _PyString_FormatLong(iobj
, flags
,
4504 prec
, c
, &pbuf
, &ilen
);
4517 PyErr_Format(PyExc_TypeError
,
4518 "%%%c format: a number is required, "
4519 "not %.200s", c
, Py_TYPE(v
)->tp_name
);
4531 temp
= formatfloat(v
, flags
, prec
, c
);
4534 pbuf
= PyString_AS_STRING(temp
);
4535 len
= PyString_GET_SIZE(temp
);
4541 #ifdef Py_USING_UNICODE
4542 if (PyUnicode_Check(v
)) {
4544 argidx
= argidx_start
;
4549 len
= formatchar(pbuf
, sizeof(formatbuf
), v
);
4554 PyErr_Format(PyExc_ValueError
,
4555 "unsupported format character '%c' (0x%x) "
4558 (Py_ssize_t
)(fmt
- 1 -
4559 PyString_AsString(format
)));
4563 if (*pbuf
== '-' || *pbuf
== '+') {
4567 else if (flags
& F_SIGN
)
4569 else if (flags
& F_BLANK
)
4576 if (rescnt
- (sign
!= 0) < width
) {
4578 rescnt
= width
+ fmtcnt
+ 100;
4583 return PyErr_NoMemory();
4585 if (_PyString_Resize(&result
, reslen
)) {
4589 res
= PyString_AS_STRING(result
)
4599 if ((flags
& F_ALT
) && (c
== 'x' || c
== 'X')) {
4600 assert(pbuf
[0] == '0');
4601 assert(pbuf
[1] == c
);
4612 if (width
> len
&& !(flags
& F_LJUST
)) {
4616 } while (--width
> len
);
4621 if ((flags
& F_ALT
) &&
4622 (c
== 'x' || c
== 'X')) {
4623 assert(pbuf
[0] == '0');
4624 assert(pbuf
[1] == c
);
4629 Py_MEMCPY(res
, pbuf
, len
);
4632 while (--width
>= len
) {
4636 if (dict
&& (argidx
< arglen
) && c
!= '%') {
4637 PyErr_SetString(PyExc_TypeError
,
4638 "not all arguments converted during string formatting");
4645 if (argidx
< arglen
&& !dict
) {
4646 PyErr_SetString(PyExc_TypeError
,
4647 "not all arguments converted during string formatting");
4653 if (_PyString_Resize(&result
, reslen
- rescnt
))
4657 #ifdef Py_USING_UNICODE
4663 /* Fiddle args right (remove the first argidx arguments) */
4664 if (PyTuple_Check(orig_args
) && argidx
> 0) {
4666 Py_ssize_t n
= PyTuple_GET_SIZE(orig_args
) - argidx
;
4671 PyObject
*w
= PyTuple_GET_ITEM(orig_args
, n
+ argidx
);
4673 PyTuple_SET_ITEM(v
, n
, w
);
4677 Py_INCREF(orig_args
);
4681 /* Take what we have of the result and let the Unicode formatting
4682 function format the rest of the input. */
4683 rescnt
= res
- PyString_AS_STRING(result
);
4684 if (_PyString_Resize(&result
, rescnt
))
4686 fmtcnt
= PyString_GET_SIZE(format
) - \
4687 (fmt
- PyString_AS_STRING(format
));
4688 format
= PyUnicode_Decode(fmt
, fmtcnt
, NULL
, NULL
);
4691 v
= PyUnicode_Format(format
, args
);
4695 /* Paste what we have (result) to what the Unicode formatting
4696 function returned (v) and return the result (or error) */
4697 w
= PyUnicode_Concat(result
, v
);
4702 #endif /* Py_USING_UNICODE */
4713 PyString_InternInPlace(PyObject
**p
)
4715 register PyStringObject
*s
= (PyStringObject
*)(*p
);
4717 if (s
== NULL
|| !PyString_Check(s
))
4718 Py_FatalError("PyString_InternInPlace: strings only please!");
4719 /* If it's a string subclass, we don't really know what putting
4720 it in the interned dict might do. */
4721 if (!PyString_CheckExact(s
))
4723 if (PyString_CHECK_INTERNED(s
))
4725 if (interned
== NULL
) {
4726 interned
= PyDict_New();
4727 if (interned
== NULL
) {
4728 PyErr_Clear(); /* Don't leave an exception */
4732 t
= PyDict_GetItem(interned
, (PyObject
*)s
);
4740 if (PyDict_SetItem(interned
, (PyObject
*)s
, (PyObject
*)s
) < 0) {
4744 /* The two references in interned are not counted by refcnt.
4745 The string deallocator will take care of this */
4747 PyString_CHECK_INTERNED(s
) = SSTATE_INTERNED_MORTAL
;
4751 PyString_InternImmortal(PyObject
**p
)
4753 PyString_InternInPlace(p
);
4754 if (PyString_CHECK_INTERNED(*p
) != SSTATE_INTERNED_IMMORTAL
) {
4755 PyString_CHECK_INTERNED(*p
) = SSTATE_INTERNED_IMMORTAL
;
4762 PyString_InternFromString(const char *cp
)
4764 PyObject
*s
= PyString_FromString(cp
);
4767 PyString_InternInPlace(&s
);
4775 for (i
= 0; i
< UCHAR_MAX
+ 1; i
++) {
4776 Py_XDECREF(characters
[i
]);
4777 characters
[i
] = NULL
;
4779 Py_XDECREF(nullstring
);
4783 void _Py_ReleaseInternedStrings(void)
4788 Py_ssize_t immortal_size
= 0, mortal_size
= 0;
4790 if (interned
== NULL
|| !PyDict_Check(interned
))
4792 keys
= PyDict_Keys(interned
);
4793 if (keys
== NULL
|| !PyList_Check(keys
)) {
4798 /* Since _Py_ReleaseInternedStrings() is intended to help a leak
4799 detector, interned strings are not forcibly deallocated; rather, we
4800 give them their stolen references back, and then clear and DECREF
4801 the interned dict. */
4803 n
= PyList_GET_SIZE(keys
);
4804 fprintf(stderr
, "releasing %" PY_FORMAT_SIZE_T
"d interned strings\n",
4806 for (i
= 0; i
< n
; i
++) {
4807 s
= (PyStringObject
*) PyList_GET_ITEM(keys
, i
);
4808 switch (s
->ob_sstate
) {
4809 case SSTATE_NOT_INTERNED
:
4810 /* XXX Shouldn't happen */
4812 case SSTATE_INTERNED_IMMORTAL
:
4814 immortal_size
+= Py_SIZE(s
);
4816 case SSTATE_INTERNED_MORTAL
:
4818 mortal_size
+= Py_SIZE(s
);
4821 Py_FatalError("Inconsistent interned string state.");
4823 s
->ob_sstate
= SSTATE_NOT_INTERNED
;
4825 fprintf(stderr
, "total size of all interned strings: "
4826 "%" PY_FORMAT_SIZE_T
"d/%" PY_FORMAT_SIZE_T
"d "
4827 "mortal/immortal\n", mortal_size
, immortal_size
);
4829 PyDict_Clear(interned
);
4830 Py_DECREF(interned
);