1 /* String object implementation */
8 int null_strings
, one_strings
;
11 #if !defined(HAVE_LIMITS_H) && !defined(UCHAR_MAX)
15 static PyStringObject
*characters
[UCHAR_MAX
+ 1];
16 #ifndef DONT_SHARE_SHORT_STRINGS
17 static PyStringObject
*nullstring
;
21 PyString_FromStringAndSize() and PyString_FromString() try in certain cases
22 to share string objects. When the size of the string is zero, these
23 routines always return a pointer to the same string object; when the size
24 is one, they return a pointer to an already existing object if the contents
25 of the string is known. For PyString_FromString() this is always the case,
26 for PyString_FromStringAndSize() this is the case when the first argument
29 A common practice of allocating a string and then filling it in or changing
30 it must be done carefully. It is only allowed to change the contents of
31 the string if the object was gotten from PyString_FromStringAndSize() with
32 a NULL first argument, because in the future these routines may try to do
33 even more sharing of objects.
35 The string in the `str' parameter does not have to be null-character
36 terminated. (Therefore it is safe to construct a substring by using
37 `PyString_FromStringAndSize(origstring, substrlen)'.)
39 The parameter `size' denotes number of characters to allocate, not
40 counting the null terminating character. If the `str' argument is
41 not NULL, then it points to a of length `size'. For
42 PyString_FromString, this string must be null-terminated.
44 The member `op->ob_size' denotes the number of bytes of data in the string,
45 not counting the null terminating character, and is therefore equal to the
49 PyString_FromStringAndSize(const char *str
, int size
)
51 register PyStringObject
*op
;
52 #ifndef DONT_SHARE_SHORT_STRINGS
53 if (size
== 0 && (op
= nullstring
) != NULL
) {
58 return (PyObject
*)op
;
60 if (size
== 1 && str
!= NULL
&&
61 (op
= characters
[*str
& UCHAR_MAX
]) != NULL
)
67 return (PyObject
*)op
;
69 #endif /* DONT_SHARE_SHORT_STRINGS */
71 /* PyObject_NewVar is inlined */
72 op
= (PyStringObject
*)
73 PyObject_MALLOC(sizeof(PyStringObject
) + size
* sizeof(char));
75 return PyErr_NoMemory();
76 PyObject_INIT_VAR(op
, &PyString_Type
, size
);
81 op
->ob_sinterned
= NULL
;
84 memcpy(op
->ob_sval
, str
, size
);
85 op
->ob_sval
[size
] = '\0';
86 #ifndef DONT_SHARE_SHORT_STRINGS
88 PyObject
*t
= (PyObject
*)op
;
89 PyString_InternInPlace(&t
);
90 op
= (PyStringObject
*)t
;
93 } else if (size
== 1 && str
!= NULL
) {
94 PyObject
*t
= (PyObject
*)op
;
95 PyString_InternInPlace(&t
);
96 op
= (PyStringObject
*)t
;
97 characters
[*str
& UCHAR_MAX
] = op
;
101 return (PyObject
*) op
;
105 PyString_FromString(const char *str
)
107 register size_t size
;
108 register PyStringObject
*op
;
112 if (size
> INT_MAX
) {
113 PyErr_SetString(PyExc_OverflowError
,
114 "string is too long for a Python string");
117 #ifndef DONT_SHARE_SHORT_STRINGS
118 if (size
== 0 && (op
= nullstring
) != NULL
) {
123 return (PyObject
*)op
;
125 if (size
== 1 && (op
= characters
[*str
& UCHAR_MAX
]) != NULL
) {
130 return (PyObject
*)op
;
132 #endif /* DONT_SHARE_SHORT_STRINGS */
134 /* PyObject_NewVar is inlined */
135 op
= (PyStringObject
*)
136 PyObject_MALLOC(sizeof(PyStringObject
) + size
* sizeof(char));
138 return PyErr_NoMemory();
139 PyObject_INIT_VAR(op
, &PyString_Type
, size
);
143 #ifdef INTERN_STRINGS
144 op
->ob_sinterned
= NULL
;
146 memcpy(op
->ob_sval
, str
, size
+1);
147 #ifndef DONT_SHARE_SHORT_STRINGS
149 PyObject
*t
= (PyObject
*)op
;
150 PyString_InternInPlace(&t
);
151 op
= (PyStringObject
*)t
;
154 } else if (size
== 1) {
155 PyObject
*t
= (PyObject
*)op
;
156 PyString_InternInPlace(&t
);
157 op
= (PyStringObject
*)t
;
158 characters
[*str
& UCHAR_MAX
] = op
;
162 return (PyObject
*) op
;
166 PyString_FromFormatV(const char *format
, va_list vargs
)
174 #ifdef VA_LIST_IS_ARRAY
175 memcpy(count
, vargs
, sizeof(va_list));
178 __va_copy(count
, vargs
);
183 /* step 1: figure out how large a buffer we need */
184 for (f
= format
; *f
; f
++) {
187 while (*++f
&& *f
!= '%' && !isalpha(Py_CHARMASK(*f
)))
190 /* skip the 'l' in %ld, since it doesn't change the
191 width. although only %d is supported (see
192 "expand" section below), others can be easily
194 if (*f
== 'l' && *(f
+1) == 'd')
199 (void)va_arg(count
, int);
200 /* fall through... */
204 case 'd': case 'i': case 'x':
205 (void) va_arg(count
, int);
206 /* 20 bytes is enough to hold a 64-bit
207 integer. Decimal takes the most space.
208 This isn't enough for octal. */
212 s
= va_arg(count
, char*);
216 (void) va_arg(count
, int);
217 /* maximum 64-bit pointer representation:
219 * so 19 characters is enough.
220 * XXX I count 18 -- what's the extra for?
225 /* if we stumble upon an unknown
226 formatting code, copy the rest of
227 the format string to the output
228 string. (we cannot just skip the
229 code, since there's no way to know
230 what's in the argument list) */
238 /* step 2: fill the buffer */
239 /* Since we've analyzed how much space we need for the worst case,
240 use sprintf directly instead of the slower PyOS_snprintf. */
241 string
= PyString_FromStringAndSize(NULL
, n
);
245 s
= PyString_AsString(string
);
247 for (f
= format
; *f
; f
++) {
251 /* parse the width.precision part (we're only
252 interested in the precision value, if any) */
254 while (isdigit(Py_CHARMASK(*f
)))
255 n
= (n
*10) + *f
++ - '0';
259 while (isdigit(Py_CHARMASK(*f
)))
260 n
= (n
*10) + *f
++ - '0';
262 while (*f
&& *f
!= '%' && !isalpha(Py_CHARMASK(*f
)))
264 /* handle the long flag, but only for %ld. others
265 can be added when necessary. */
266 if (*f
== 'l' && *(f
+1) == 'd') {
273 *s
++ = va_arg(vargs
, int);
277 sprintf(s
, "%ld", va_arg(vargs
, long));
279 sprintf(s
, "%d", va_arg(vargs
, int));
283 sprintf(s
, "%i", va_arg(vargs
, int));
287 sprintf(s
, "%x", va_arg(vargs
, int));
291 p
= va_arg(vargs
, char*);
299 sprintf(s
, "%p", va_arg(vargs
, void*));
300 /* %p is ill-defined: ensure leading 0x. */
303 else if (s
[1] != 'x') {
304 memmove(s
+2, s
, strlen(s
)+1);
323 _PyString_Resize(&string
, s
- PyString_AS_STRING(string
));
328 PyString_FromFormat(const char *format
, ...)
333 #ifdef HAVE_STDARG_PROTOTYPES
334 va_start(vargs
, format
);
338 ret
= PyString_FromFormatV(format
, vargs
);
344 PyObject
*PyString_Decode(const char *s
,
346 const char *encoding
,
351 str
= PyString_FromStringAndSize(s
, size
);
354 v
= PyString_AsDecodedString(str
, encoding
, errors
);
359 PyObject
*PyString_AsDecodedObject(PyObject
*str
,
360 const char *encoding
,
365 if (!PyString_Check(str
)) {
370 if (encoding
== NULL
) {
371 #ifdef Py_USING_UNICODE
372 encoding
= PyUnicode_GetDefaultEncoding();
374 PyErr_SetString(PyExc_ValueError
, "no encoding specified");
379 /* Decode via the codec registry */
380 v
= PyCodec_Decode(str
, encoding
, errors
);
390 PyObject
*PyString_AsDecodedString(PyObject
*str
,
391 const char *encoding
,
396 v
= PyString_AsDecodedObject(str
, encoding
, errors
);
400 #ifdef Py_USING_UNICODE
401 /* Convert Unicode to a string using the default encoding */
402 if (PyUnicode_Check(v
)) {
404 v
= PyUnicode_AsEncodedString(v
, NULL
, NULL
);
410 if (!PyString_Check(v
)) {
411 PyErr_Format(PyExc_TypeError
,
412 "decoder did not return a string object (type=%.400s)",
413 v
->ob_type
->tp_name
);
424 PyObject
*PyString_Encode(const char *s
,
426 const char *encoding
,
431 str
= PyString_FromStringAndSize(s
, size
);
434 v
= PyString_AsEncodedString(str
, encoding
, errors
);
439 PyObject
*PyString_AsEncodedObject(PyObject
*str
,
440 const char *encoding
,
445 if (!PyString_Check(str
)) {
450 if (encoding
== NULL
) {
451 #ifdef Py_USING_UNICODE
452 encoding
= PyUnicode_GetDefaultEncoding();
454 PyErr_SetString(PyExc_ValueError
, "no encoding specified");
459 /* Encode via the codec registry */
460 v
= PyCodec_Encode(str
, encoding
, errors
);
470 PyObject
*PyString_AsEncodedString(PyObject
*str
,
471 const char *encoding
,
476 v
= PyString_AsEncodedObject(str
, encoding
, errors
);
480 #ifdef Py_USING_UNICODE
481 /* Convert Unicode to a string using the default encoding */
482 if (PyUnicode_Check(v
)) {
484 v
= PyUnicode_AsEncodedString(v
, NULL
, NULL
);
490 if (!PyString_Check(v
)) {
491 PyErr_Format(PyExc_TypeError
,
492 "encoder did not return a string object (type=%.400s)",
493 v
->ob_type
->tp_name
);
505 string_dealloc(PyObject
*op
)
507 op
->ob_type
->tp_free(op
);
511 string_getsize(register PyObject
*op
)
515 if (PyString_AsStringAndSize(op
, &s
, &len
))
520 static /*const*/ char *
521 string_getbuffer(register PyObject
*op
)
525 if (PyString_AsStringAndSize(op
, &s
, &len
))
531 PyString_Size(register PyObject
*op
)
533 if (!PyString_Check(op
))
534 return string_getsize(op
);
535 return ((PyStringObject
*)op
) -> ob_size
;
539 PyString_AsString(register PyObject
*op
)
541 if (!PyString_Check(op
))
542 return string_getbuffer(op
);
543 return ((PyStringObject
*)op
) -> ob_sval
;
547 PyString_AsStringAndSize(register PyObject
*obj
,
552 PyErr_BadInternalCall();
556 if (!PyString_Check(obj
)) {
557 #ifdef Py_USING_UNICODE
558 if (PyUnicode_Check(obj
)) {
559 obj
= _PyUnicode_AsDefaultEncodedString(obj
, NULL
);
566 PyErr_Format(PyExc_TypeError
,
567 "expected string or Unicode object, "
568 "%.200s found", obj
->ob_type
->tp_name
);
573 *s
= PyString_AS_STRING(obj
);
575 *len
= PyString_GET_SIZE(obj
);
576 else if ((int)strlen(*s
) != PyString_GET_SIZE(obj
)) {
577 PyErr_SetString(PyExc_TypeError
,
578 "expected string without null bytes");
587 string_print(PyStringObject
*op
, FILE *fp
, int flags
)
593 /* XXX Ought to check for interrupts when writing long strings */
594 if (! PyString_CheckExact(op
)) {
596 /* A str subclass may have its own __str__ method. */
597 op
= (PyStringObject
*) PyObject_Str((PyObject
*)op
);
600 ret
= string_print(op
, fp
, flags
);
604 if (flags
& Py_PRINT_RAW
) {
605 fwrite(op
->ob_sval
, 1, (int) op
->ob_size
, fp
);
609 /* figure out which quote to use; single is preferred */
611 if (strchr(op
->ob_sval
, '\'') &&
612 !strchr(op
->ob_sval
, '"'))
616 for (i
= 0; i
< op
->ob_size
; i
++) {
618 if (c
== quote
|| c
== '\\')
619 fprintf(fp
, "\\%c", c
);
626 else if (c
< ' ' || c
>= 0x7f)
627 fprintf(fp
, "\\x%02x", c
& 0xff);
636 string_repr(register PyStringObject
*op
)
638 size_t newsize
= 2 + 4 * op
->ob_size
* sizeof(char);
640 if (newsize
> INT_MAX
) {
641 PyErr_SetString(PyExc_OverflowError
,
642 "string is too large to make repr");
644 v
= PyString_FromStringAndSize((char *)NULL
, newsize
);
654 /* figure out which quote to use; single is preferred */
656 if (strchr(op
->ob_sval
, '\'') &&
657 !strchr(op
->ob_sval
, '"'))
660 p
= PyString_AS_STRING(v
);
662 for (i
= 0; i
< op
->ob_size
; i
++) {
663 /* There's at least enough room for a hex escape
664 and a closing quote. */
665 assert(newsize
- (p
- PyString_AS_STRING(v
)) >= 5);
667 if (c
== quote
|| c
== '\\')
668 *p
++ = '\\', *p
++ = c
;
670 *p
++ = '\\', *p
++ = 't';
672 *p
++ = '\\', *p
++ = 'n';
674 *p
++ = '\\', *p
++ = 'r';
675 else if (c
< ' ' || c
>= 0x7f) {
676 /* For performance, we don't want to call
677 PyOS_snprintf here (extra layers of
679 sprintf(p
, "\\x%02x", c
& 0xff);
685 assert(newsize
- (p
- PyString_AS_STRING(v
)) >= 1);
689 &v
, (int) (p
- PyString_AS_STRING(v
)));
695 string_str(PyObject
*s
)
697 assert(PyString_Check(s
));
698 if (PyString_CheckExact(s
)) {
703 /* Subtype -- return genuine string with the same value. */
704 PyStringObject
*t
= (PyStringObject
*) s
;
705 return PyString_FromStringAndSize(t
->ob_sval
, t
->ob_size
);
710 string_length(PyStringObject
*a
)
716 string_concat(register PyStringObject
*a
, register PyObject
*bb
)
718 register unsigned int size
;
719 register PyStringObject
*op
;
720 if (!PyString_Check(bb
)) {
721 #ifdef Py_USING_UNICODE
722 if (PyUnicode_Check(bb
))
723 return PyUnicode_Concat((PyObject
*)a
, bb
);
725 PyErr_Format(PyExc_TypeError
,
726 "cannot concatenate 'str' and '%.200s' objects",
727 bb
->ob_type
->tp_name
);
730 #define b ((PyStringObject *)bb)
731 /* Optimize cases with empty left or right operand */
732 if ((a
->ob_size
== 0 || b
->ob_size
== 0) &&
733 PyString_CheckExact(a
) && PyString_CheckExact(b
)) {
734 if (a
->ob_size
== 0) {
739 return (PyObject
*)a
;
741 size
= a
->ob_size
+ b
->ob_size
;
742 /* PyObject_NewVar is inlined */
743 op
= (PyStringObject
*)
744 PyObject_MALLOC(sizeof(PyStringObject
) + size
* sizeof(char));
746 return PyErr_NoMemory();
747 PyObject_INIT_VAR(op
, &PyString_Type
, size
);
751 #ifdef INTERN_STRINGS
752 op
->ob_sinterned
= NULL
;
754 memcpy(op
->ob_sval
, a
->ob_sval
, (int) a
->ob_size
);
755 memcpy(op
->ob_sval
+ a
->ob_size
, b
->ob_sval
, (int) b
->ob_size
);
756 op
->ob_sval
[size
] = '\0';
757 return (PyObject
*) op
;
762 string_repeat(register PyStringObject
*a
, register int n
)
766 register PyStringObject
*op
;
770 /* watch out for overflows: the size can overflow int,
771 * and the # of bytes needed can overflow size_t
773 size
= a
->ob_size
* n
;
774 if (n
&& size
/ n
!= a
->ob_size
) {
775 PyErr_SetString(PyExc_OverflowError
,
776 "repeated string is too long");
779 if (size
== a
->ob_size
&& PyString_CheckExact(a
)) {
781 return (PyObject
*)a
;
783 nbytes
= size
* sizeof(char);
784 if (nbytes
/ sizeof(char) != (size_t)size
||
785 nbytes
+ sizeof(PyStringObject
) <= nbytes
) {
786 PyErr_SetString(PyExc_OverflowError
,
787 "repeated string is too long");
790 op
= (PyStringObject
*)
791 PyObject_MALLOC(sizeof(PyStringObject
) + nbytes
);
793 return PyErr_NoMemory();
794 PyObject_INIT_VAR(op
, &PyString_Type
, size
);
798 #ifdef INTERN_STRINGS
799 op
->ob_sinterned
= NULL
;
801 for (i
= 0; i
< size
; i
+= a
->ob_size
)
802 memcpy(op
->ob_sval
+i
, a
->ob_sval
, (int) a
->ob_size
);
803 op
->ob_sval
[size
] = '\0';
804 return (PyObject
*) op
;
807 /* String slice a[i:j] consists of characters a[i] ... a[j-1] */
810 string_slice(register PyStringObject
*a
, register int i
, register int j
)
811 /* j -- may be negative! */
816 j
= 0; /* Avoid signed/unsigned bug in next line */
819 if (i
== 0 && j
== a
->ob_size
&& PyString_CheckExact(a
)) {
820 /* It's the same as a */
822 return (PyObject
*)a
;
826 return PyString_FromStringAndSize(a
->ob_sval
+ i
, (int) (j
-i
));
830 string_contains(PyObject
*a
, PyObject
*el
)
832 register char *s
, *end
;
834 #ifdef Py_USING_UNICODE
835 if (PyUnicode_Check(el
))
836 return PyUnicode_Contains(a
, el
);
838 if (!PyString_Check(el
) || PyString_Size(el
) != 1) {
839 PyErr_SetString(PyExc_TypeError
,
840 "'in <string>' requires character as left operand");
843 c
= PyString_AsString(el
)[0];
844 s
= PyString_AsString(a
);
845 end
= s
+ PyString_Size(a
);
854 string_item(PyStringObject
*a
, register int i
)
858 if (i
< 0 || i
>= a
->ob_size
) {
859 PyErr_SetString(PyExc_IndexError
, "string index out of range");
862 pchar
= a
->ob_sval
+ i
;
863 v
= (PyObject
*)characters
[*pchar
& UCHAR_MAX
];
865 v
= PyString_FromStringAndSize(pchar
, 1);
876 string_richcompare(PyStringObject
*a
, PyStringObject
*b
, int op
)
883 /* Make sure both arguments are strings. */
884 if (!(PyString_Check(a
) && PyString_Check(b
))) {
885 result
= Py_NotImplemented
;
890 case Py_EQ
:case Py_LE
:case Py_GE
:
893 case Py_NE
:case Py_LT
:case Py_GT
:
899 /* Supporting Py_NE here as well does not save
900 much time, since Py_NE is rarely used. */
901 if (a
->ob_size
== b
->ob_size
902 && (a
->ob_sval
[0] == b
->ob_sval
[0]
903 && memcmp(a
->ob_sval
, b
->ob_sval
,
911 len_a
= a
->ob_size
; len_b
= b
->ob_size
;
912 min_len
= (len_a
< len_b
) ? len_a
: len_b
;
914 c
= Py_CHARMASK(*a
->ob_sval
) - Py_CHARMASK(*b
->ob_sval
);
916 c
= memcmp(a
->ob_sval
, b
->ob_sval
, min_len
);
920 c
= (len_a
< len_b
) ? -1 : (len_a
> len_b
) ? 1 : 0;
922 case Py_LT
: c
= c
< 0; break;
923 case Py_LE
: c
= c
<= 0; break;
924 case Py_EQ
: assert(0); break; /* unreachable */
925 case Py_NE
: c
= c
!= 0; break;
926 case Py_GT
: c
= c
> 0; break;
927 case Py_GE
: c
= c
>= 0; break;
929 result
= Py_NotImplemented
;
932 result
= c
? Py_True
: Py_False
;
939 _PyString_Eq(PyObject
*o1
, PyObject
*o2
)
941 PyStringObject
*a
, *b
;
942 a
= (PyStringObject
*)o1
;
943 b
= (PyStringObject
*)o2
;
944 return a
->ob_size
== b
->ob_size
945 && *a
->ob_sval
== *b
->ob_sval
946 && memcmp(a
->ob_sval
, b
->ob_sval
, a
->ob_size
) == 0;
950 string_hash(PyStringObject
*a
)
953 register unsigned char *p
;
957 if (a
->ob_shash
!= -1)
959 #ifdef INTERN_STRINGS
960 if (a
->ob_sinterned
!= NULL
)
961 return (a
->ob_shash
=
962 ((PyStringObject
*)(a
->ob_sinterned
))->ob_shash
);
966 p
= (unsigned char *) a
->ob_sval
;
969 x
= (1000003*x
) ^ *p
++;
980 string_buffer_getreadbuf(PyStringObject
*self
, int index
, const void **ptr
)
983 PyErr_SetString(PyExc_SystemError
,
984 "accessing non-existent string segment");
987 *ptr
= (void *)self
->ob_sval
;
988 return self
->ob_size
;
992 string_buffer_getwritebuf(PyStringObject
*self
, int index
, const void **ptr
)
994 PyErr_SetString(PyExc_TypeError
,
995 "Cannot use string as modifiable buffer");
1000 string_buffer_getsegcount(PyStringObject
*self
, int *lenp
)
1003 *lenp
= self
->ob_size
;
1008 string_buffer_getcharbuf(PyStringObject
*self
, int index
, const char **ptr
)
1011 PyErr_SetString(PyExc_SystemError
,
1012 "accessing non-existent string segment");
1015 *ptr
= self
->ob_sval
;
1016 return self
->ob_size
;
1019 static PySequenceMethods string_as_sequence
= {
1020 (inquiry
)string_length
, /*sq_length*/
1021 (binaryfunc
)string_concat
, /*sq_concat*/
1022 (intargfunc
)string_repeat
, /*sq_repeat*/
1023 (intargfunc
)string_item
, /*sq_item*/
1024 (intintargfunc
)string_slice
, /*sq_slice*/
1027 (objobjproc
)string_contains
/*sq_contains*/
1030 static PyBufferProcs string_as_buffer
= {
1031 (getreadbufferproc
)string_buffer_getreadbuf
,
1032 (getwritebufferproc
)string_buffer_getwritebuf
,
1033 (getsegcountproc
)string_buffer_getsegcount
,
1034 (getcharbufferproc
)string_buffer_getcharbuf
,
1040 #define RIGHTSTRIP 1
1043 /* Arrays indexed by above */
1044 static const char *stripformat
[] = {"|O:lstrip", "|O:rstrip", "|O:strip"};
1046 #define STRIPNAME(i) (stripformat[i]+3)
1050 split_whitespace(const char *s
, int len
, int maxsplit
)
1054 PyObject
*list
= PyList_New(0);
1059 for (i
= j
= 0; i
< len
; ) {
1060 while (i
< len
&& isspace(Py_CHARMASK(s
[i
])))
1063 while (i
< len
&& !isspace(Py_CHARMASK(s
[i
])))
1066 if (maxsplit
-- <= 0)
1068 item
= PyString_FromStringAndSize(s
+j
, (int)(i
-j
));
1071 err
= PyList_Append(list
, item
);
1075 while (i
< len
&& isspace(Py_CHARMASK(s
[i
])))
1081 item
= PyString_FromStringAndSize(s
+j
, (int)(len
- j
));
1084 err
= PyList_Append(list
, item
);
1096 static char split__doc__
[] =
1097 "S.split([sep [,maxsplit]]) -> list of strings\n\
1099 Return a list of the words in the string S, using sep as the\n\
1100 delimiter string. If maxsplit is given, at most maxsplit\n\
1101 splits are done. If sep is not specified or is None, any\n\
1102 whitespace string is a separator.";
1105 string_split(PyStringObject
*self
, PyObject
*args
)
1107 int len
= PyString_GET_SIZE(self
), n
, i
, j
, err
;
1109 const char *s
= PyString_AS_STRING(self
), *sub
;
1110 PyObject
*list
, *item
, *subobj
= Py_None
;
1112 if (!PyArg_ParseTuple(args
, "|Oi:split", &subobj
, &maxsplit
))
1116 if (subobj
== Py_None
)
1117 return split_whitespace(s
, len
, maxsplit
);
1118 if (PyString_Check(subobj
)) {
1119 sub
= PyString_AS_STRING(subobj
);
1120 n
= PyString_GET_SIZE(subobj
);
1122 #ifdef Py_USING_UNICODE
1123 else if (PyUnicode_Check(subobj
))
1124 return PyUnicode_Split((PyObject
*)self
, subobj
, maxsplit
);
1126 else if (PyObject_AsCharBuffer(subobj
, &sub
, &n
))
1129 PyErr_SetString(PyExc_ValueError
, "empty separator");
1133 list
= PyList_New(0);
1138 while (i
+n
<= len
) {
1139 if (s
[i
] == sub
[0] && memcmp(s
+i
, sub
, n
) == 0) {
1140 if (maxsplit
-- <= 0)
1142 item
= PyString_FromStringAndSize(s
+j
, (int)(i
-j
));
1145 err
= PyList_Append(list
, item
);
1154 item
= PyString_FromStringAndSize(s
+j
, (int)(len
-j
));
1157 err
= PyList_Append(list
, item
);
1170 static char join__doc__
[] =
1171 "S.join(sequence) -> string\n\
1173 Return a string which is the concatenation of the strings in the\n\
1174 sequence. The separator between elements is S.";
1177 string_join(PyStringObject
*self
, PyObject
*orig
)
1179 char *sep
= PyString_AS_STRING(self
);
1180 const int seplen
= PyString_GET_SIZE(self
);
1181 PyObject
*res
= NULL
;
1186 PyObject
*seq
, *item
;
1188 seq
= PySequence_Fast(orig
, "");
1190 if (PyErr_ExceptionMatches(PyExc_TypeError
))
1191 PyErr_Format(PyExc_TypeError
,
1192 "sequence expected, %.80s found",
1193 orig
->ob_type
->tp_name
);
1197 seqlen
= PySequence_Size(seq
);
1200 return PyString_FromString("");
1203 item
= PySequence_Fast_GET_ITEM(seq
, 0);
1204 if (!PyString_Check(item
) && !PyUnicode_Check(item
)) {
1205 PyErr_Format(PyExc_TypeError
,
1206 "sequence item 0: expected string,"
1208 item
->ob_type
->tp_name
);
1217 /* There are at least two things to join. Do a pre-pass to figure out
1218 * the total amount of space we'll need (sz), see whether any argument
1219 * is absurd, and defer to the Unicode join if appropriate.
1221 for (i
= 0; i
< seqlen
; i
++) {
1222 const size_t old_sz
= sz
;
1223 item
= PySequence_Fast_GET_ITEM(seq
, i
);
1224 if (!PyString_Check(item
)){
1225 #ifdef Py_USING_UNICODE
1226 if (PyUnicode_Check(item
)) {
1227 /* Defer to Unicode join.
1228 * CAUTION: There's no gurantee that the
1229 * original sequence can be iterated over
1230 * again, so we must pass seq here.
1233 result
= PyUnicode_Join((PyObject
*)self
, seq
);
1238 PyErr_Format(PyExc_TypeError
,
1239 "sequence item %i: expected string,"
1241 i
, item
->ob_type
->tp_name
);
1245 sz
+= PyString_GET_SIZE(item
);
1248 if (sz
< old_sz
|| sz
> INT_MAX
) {
1249 PyErr_SetString(PyExc_OverflowError
,
1250 "join() is too long for a Python string");
1256 /* Allocate result space. */
1257 res
= PyString_FromStringAndSize((char*)NULL
, (int)sz
);
1263 /* Catenate everything. */
1264 p
= PyString_AS_STRING(res
);
1265 for (i
= 0; i
< seqlen
; ++i
) {
1267 item
= PySequence_Fast_GET_ITEM(seq
, i
);
1268 n
= PyString_GET_SIZE(item
);
1269 memcpy(p
, PyString_AS_STRING(item
), n
);
1271 if (i
< seqlen
- 1) {
1272 memcpy(p
, sep
, seplen
);
1282 _PyString_Join(PyObject
*sep
, PyObject
*x
)
1284 assert(sep
!= NULL
&& PyString_Check(sep
));
1286 return string_join((PyStringObject
*)sep
, x
);
1290 string_find_internal(PyStringObject
*self
, PyObject
*args
, int dir
)
1292 const char *s
= PyString_AS_STRING(self
), *sub
;
1293 int len
= PyString_GET_SIZE(self
);
1294 int n
, i
= 0, last
= INT_MAX
;
1297 if (!PyArg_ParseTuple(args
, "O|O&O&:find/rfind/index/rindex",
1298 &subobj
, _PyEval_SliceIndex
, &i
, _PyEval_SliceIndex
, &last
))
1300 if (PyString_Check(subobj
)) {
1301 sub
= PyString_AS_STRING(subobj
);
1302 n
= PyString_GET_SIZE(subobj
);
1304 #ifdef Py_USING_UNICODE
1305 else if (PyUnicode_Check(subobj
))
1306 return PyUnicode_Find((PyObject
*)self
, subobj
, i
, last
, dir
);
1308 else if (PyObject_AsCharBuffer(subobj
, &sub
, &n
))
1323 if (n
== 0 && i
<= last
)
1326 for (; i
<= last
; ++i
)
1327 if (s
[i
] == sub
[0] && memcmp(&s
[i
], sub
, n
) == 0)
1333 if (n
== 0 && i
<= last
)
1335 for (j
= last
-n
; j
>= i
; --j
)
1336 if (s
[j
] == sub
[0] && memcmp(&s
[j
], sub
, n
) == 0)
1344 static char find__doc__
[] =
1345 "S.find(sub [,start [,end]]) -> int\n\
1347 Return the lowest index in S where substring sub is found,\n\
1348 such that sub is contained within s[start,end]. Optional\n\
1349 arguments start and end are interpreted as in slice notation.\n\
1351 Return -1 on failure.";
1354 string_find(PyStringObject
*self
, PyObject
*args
)
1356 long result
= string_find_internal(self
, args
, +1);
1359 return PyInt_FromLong(result
);
1363 static char index__doc__
[] =
1364 "S.index(sub [,start [,end]]) -> int\n\
1366 Like S.find() but raise ValueError when the substring is not found.";
1369 string_index(PyStringObject
*self
, PyObject
*args
)
1371 long result
= string_find_internal(self
, args
, +1);
1375 PyErr_SetString(PyExc_ValueError
,
1376 "substring not found in string.index");
1379 return PyInt_FromLong(result
);
1383 static char rfind__doc__
[] =
1384 "S.rfind(sub [,start [,end]]) -> int\n\
1386 Return the highest index in S where substring sub is found,\n\
1387 such that sub is contained within s[start,end]. Optional\n\
1388 arguments start and end are interpreted as in slice notation.\n\
1390 Return -1 on failure.";
1393 string_rfind(PyStringObject
*self
, PyObject
*args
)
1395 long result
= string_find_internal(self
, args
, -1);
1398 return PyInt_FromLong(result
);
1402 static char rindex__doc__
[] =
1403 "S.rindex(sub [,start [,end]]) -> int\n\
1405 Like S.rfind() but raise ValueError when the substring is not found.";
1408 string_rindex(PyStringObject
*self
, PyObject
*args
)
1410 long result
= string_find_internal(self
, args
, -1);
1414 PyErr_SetString(PyExc_ValueError
,
1415 "substring not found in string.rindex");
1418 return PyInt_FromLong(result
);
1423 do_xstrip(PyStringObject
*self
, int striptype
, PyObject
*sepobj
)
1425 char *s
= PyString_AS_STRING(self
);
1426 int len
= PyString_GET_SIZE(self
);
1427 char *sep
= PyString_AS_STRING(sepobj
);
1428 int seplen
= PyString_GET_SIZE(sepobj
);
1432 if (striptype
!= RIGHTSTRIP
) {
1433 while (i
< len
&& memchr(sep
, Py_CHARMASK(s
[i
]), seplen
)) {
1439 if (striptype
!= LEFTSTRIP
) {
1442 } while (j
>= i
&& memchr(sep
, Py_CHARMASK(s
[j
]), seplen
));
1446 if (i
== 0 && j
== len
&& PyString_CheckExact(self
)) {
1448 return (PyObject
*)self
;
1451 return PyString_FromStringAndSize(s
+i
, j
-i
);
1456 do_strip(PyStringObject
*self
, int striptype
)
1458 char *s
= PyString_AS_STRING(self
);
1459 int len
= PyString_GET_SIZE(self
), i
, j
;
1462 if (striptype
!= RIGHTSTRIP
) {
1463 while (i
< len
&& isspace(Py_CHARMASK(s
[i
]))) {
1469 if (striptype
!= LEFTSTRIP
) {
1472 } while (j
>= i
&& isspace(Py_CHARMASK(s
[j
])));
1476 if (i
== 0 && j
== len
&& PyString_CheckExact(self
)) {
1478 return (PyObject
*)self
;
1481 return PyString_FromStringAndSize(s
+i
, j
-i
);
1486 do_argstrip(PyStringObject
*self
, int striptype
, PyObject
*args
)
1488 PyObject
*sep
= NULL
;
1490 if (!PyArg_ParseTuple(args
, (char *)stripformat
[striptype
], &sep
))
1493 if (sep
!= NULL
&& sep
!= Py_None
) {
1494 if (PyString_Check(sep
))
1495 return do_xstrip(self
, striptype
, sep
);
1496 #ifdef Py_USING_UNICODE
1497 else if (PyUnicode_Check(sep
)) {
1498 PyObject
*uniself
= PyUnicode_FromObject((PyObject
*)self
);
1502 res
= _PyUnicode_XStrip((PyUnicodeObject
*)uniself
,
1509 PyErr_Format(PyExc_TypeError
,
1510 #ifdef Py_USING_UNICODE
1511 "%s arg must be None, str or unicode",
1513 "%s arg must be None or str",
1515 STRIPNAME(striptype
));
1518 return do_xstrip(self
, striptype
, sep
);
1521 return do_strip(self
, striptype
);
1525 static char strip__doc__
[] =
1526 "S.strip([sep]) -> string or unicode\n\
1528 Return a copy of the string S with leading and trailing\n\
1529 whitespace removed.\n\
1530 If sep is given and not None, remove characters in sep instead.\n\
1531 If sep is unicode, S will be converted to unicode before stripping";
1534 string_strip(PyStringObject
*self
, PyObject
*args
)
1536 if (PyTuple_GET_SIZE(args
) == 0)
1537 return do_strip(self
, BOTHSTRIP
); /* Common case */
1539 return do_argstrip(self
, BOTHSTRIP
, args
);
1543 static char lstrip__doc__
[] =
1544 "S.lstrip([sep]) -> string or unicode\n\
1546 Return a copy of the string S with leading whitespace removed.\n\
1547 If sep is given and not None, remove characters in sep instead.\n\
1548 If sep is unicode, S will be converted to unicode before stripping";
1551 string_lstrip(PyStringObject
*self
, PyObject
*args
)
1553 if (PyTuple_GET_SIZE(args
) == 0)
1554 return do_strip(self
, LEFTSTRIP
); /* Common case */
1556 return do_argstrip(self
, LEFTSTRIP
, args
);
1560 static char rstrip__doc__
[] =
1561 "S.rstrip([sep]) -> string or unicode\n\
1563 Return a copy of the string S with trailing whitespace removed.\n\
1564 If sep is given and not None, remove characters in sep instead.\n\
1565 If sep is unicode, S will be converted to unicode before stripping";
1568 string_rstrip(PyStringObject
*self
, PyObject
*args
)
1570 if (PyTuple_GET_SIZE(args
) == 0)
1571 return do_strip(self
, RIGHTSTRIP
); /* Common case */
1573 return do_argstrip(self
, RIGHTSTRIP
, args
);
1577 static char lower__doc__
[] =
1578 "S.lower() -> string\n\
1580 Return a copy of the string S converted to lowercase.";
1583 string_lower(PyStringObject
*self
)
1585 char *s
= PyString_AS_STRING(self
), *s_new
;
1586 int i
, n
= PyString_GET_SIZE(self
);
1589 new = PyString_FromStringAndSize(NULL
, n
);
1592 s_new
= PyString_AsString(new);
1593 for (i
= 0; i
< n
; i
++) {
1594 int c
= Py_CHARMASK(*s
++);
1596 *s_new
= tolower(c
);
1605 static char upper__doc__
[] =
1606 "S.upper() -> string\n\
1608 Return a copy of the string S converted to uppercase.";
1611 string_upper(PyStringObject
*self
)
1613 char *s
= PyString_AS_STRING(self
), *s_new
;
1614 int i
, n
= PyString_GET_SIZE(self
);
1617 new = PyString_FromStringAndSize(NULL
, n
);
1620 s_new
= PyString_AsString(new);
1621 for (i
= 0; i
< n
; i
++) {
1622 int c
= Py_CHARMASK(*s
++);
1624 *s_new
= toupper(c
);
1633 static char title__doc__
[] =
1634 "S.title() -> string\n\
1636 Return a titlecased version of S, i.e. words start with uppercase\n\
1637 characters, all remaining cased characters have lowercase.";
1640 string_title(PyStringObject
*self
)
1642 char *s
= PyString_AS_STRING(self
), *s_new
;
1643 int i
, n
= PyString_GET_SIZE(self
);
1644 int previous_is_cased
= 0;
1647 new = PyString_FromStringAndSize(NULL
, n
);
1650 s_new
= PyString_AsString(new);
1651 for (i
= 0; i
< n
; i
++) {
1652 int c
= Py_CHARMASK(*s
++);
1654 if (!previous_is_cased
)
1656 previous_is_cased
= 1;
1657 } else if (isupper(c
)) {
1658 if (previous_is_cased
)
1660 previous_is_cased
= 1;
1662 previous_is_cased
= 0;
1668 static char capitalize__doc__
[] =
1669 "S.capitalize() -> string\n\
1671 Return a copy of the string S with only its first character\n\
1675 string_capitalize(PyStringObject
*self
)
1677 char *s
= PyString_AS_STRING(self
), *s_new
;
1678 int i
, n
= PyString_GET_SIZE(self
);
1681 new = PyString_FromStringAndSize(NULL
, n
);
1684 s_new
= PyString_AsString(new);
1686 int c
= Py_CHARMASK(*s
++);
1688 *s_new
= toupper(c
);
1693 for (i
= 1; i
< n
; i
++) {
1694 int c
= Py_CHARMASK(*s
++);
1696 *s_new
= tolower(c
);
1705 static char count__doc__
[] =
1706 "S.count(sub[, start[, end]]) -> int\n\
1708 Return the number of occurrences of substring sub in string\n\
1709 S[start:end]. Optional arguments start and end are\n\
1710 interpreted as in slice notation.";
1713 string_count(PyStringObject
*self
, PyObject
*args
)
1715 const char *s
= PyString_AS_STRING(self
), *sub
;
1716 int len
= PyString_GET_SIZE(self
), n
;
1717 int i
= 0, last
= INT_MAX
;
1721 if (!PyArg_ParseTuple(args
, "O|O&O&:count", &subobj
,
1722 _PyEval_SliceIndex
, &i
, _PyEval_SliceIndex
, &last
))
1725 if (PyString_Check(subobj
)) {
1726 sub
= PyString_AS_STRING(subobj
);
1727 n
= PyString_GET_SIZE(subobj
);
1729 #ifdef Py_USING_UNICODE
1730 else if (PyUnicode_Check(subobj
)) {
1732 count
= PyUnicode_Count((PyObject
*)self
, subobj
, i
, last
);
1736 return PyInt_FromLong((long) count
);
1739 else if (PyObject_AsCharBuffer(subobj
, &sub
, &n
))
1754 return PyInt_FromLong((long) (m
-i
));
1758 if (!memcmp(s
+i
, sub
, n
)) {
1765 return PyInt_FromLong((long) r
);
1769 static char swapcase__doc__
[] =
1770 "S.swapcase() -> string\n\
1772 Return a copy of the string S with uppercase characters\n\
1773 converted to lowercase and vice versa.";
1776 string_swapcase(PyStringObject
*self
)
1778 char *s
= PyString_AS_STRING(self
), *s_new
;
1779 int i
, n
= PyString_GET_SIZE(self
);
1782 new = PyString_FromStringAndSize(NULL
, n
);
1785 s_new
= PyString_AsString(new);
1786 for (i
= 0; i
< n
; i
++) {
1787 int c
= Py_CHARMASK(*s
++);
1789 *s_new
= toupper(c
);
1791 else if (isupper(c
)) {
1792 *s_new
= tolower(c
);
1802 static char translate__doc__
[] =
1803 "S.translate(table [,deletechars]) -> string\n\
1805 Return a copy of the string S, where all characters occurring\n\
1806 in the optional argument deletechars are removed, and the\n\
1807 remaining characters have been mapped through the given\n\
1808 translation table, which must be a string of length 256.";
1811 string_translate(PyStringObject
*self
, PyObject
*args
)
1813 register char *input
, *output
;
1814 register const char *table
;
1815 register int i
, c
, changed
= 0;
1816 PyObject
*input_obj
= (PyObject
*)self
;
1817 const char *table1
, *output_start
, *del_table
=NULL
;
1818 int inlen
, tablen
, dellen
= 0;
1820 int trans_table
[256];
1821 PyObject
*tableobj
, *delobj
= NULL
;
1823 if (!PyArg_ParseTuple(args
, "O|O:translate",
1824 &tableobj
, &delobj
))
1827 if (PyString_Check(tableobj
)) {
1828 table1
= PyString_AS_STRING(tableobj
);
1829 tablen
= PyString_GET_SIZE(tableobj
);
1831 #ifdef Py_USING_UNICODE
1832 else if (PyUnicode_Check(tableobj
)) {
1833 /* Unicode .translate() does not support the deletechars
1834 parameter; instead a mapping to None will cause characters
1836 if (delobj
!= NULL
) {
1837 PyErr_SetString(PyExc_TypeError
,
1838 "deletions are implemented differently for unicode");
1841 return PyUnicode_Translate((PyObject
*)self
, tableobj
, NULL
);
1844 else if (PyObject_AsCharBuffer(tableobj
, &table1
, &tablen
))
1847 if (delobj
!= NULL
) {
1848 if (PyString_Check(delobj
)) {
1849 del_table
= PyString_AS_STRING(delobj
);
1850 dellen
= PyString_GET_SIZE(delobj
);
1852 #ifdef Py_USING_UNICODE
1853 else if (PyUnicode_Check(delobj
)) {
1854 PyErr_SetString(PyExc_TypeError
,
1855 "deletions are implemented differently for unicode");
1859 else if (PyObject_AsCharBuffer(delobj
, &del_table
, &dellen
))
1862 if (tablen
!= 256) {
1863 PyErr_SetString(PyExc_ValueError
,
1864 "translation table must be 256 characters long");
1874 inlen
= PyString_Size(input_obj
);
1875 result
= PyString_FromStringAndSize((char *)NULL
, inlen
);
1878 output_start
= output
= PyString_AsString(result
);
1879 input
= PyString_AsString(input_obj
);
1882 /* If no deletions are required, use faster code */
1883 for (i
= inlen
; --i
>= 0; ) {
1884 c
= Py_CHARMASK(*input
++);
1885 if (Py_CHARMASK((*output
++ = table
[c
])) != c
)
1888 if (changed
|| !PyString_CheckExact(input_obj
))
1891 Py_INCREF(input_obj
);
1895 for (i
= 0; i
< 256; i
++)
1896 trans_table
[i
] = Py_CHARMASK(table
[i
]);
1898 for (i
= 0; i
< dellen
; i
++)
1899 trans_table
[(int) Py_CHARMASK(del_table
[i
])] = -1;
1901 for (i
= inlen
; --i
>= 0; ) {
1902 c
= Py_CHARMASK(*input
++);
1903 if (trans_table
[c
] != -1)
1904 if (Py_CHARMASK(*output
++ = (char)trans_table
[c
]) == c
)
1908 if (!changed
&& PyString_CheckExact(input_obj
)) {
1910 Py_INCREF(input_obj
);
1913 /* Fix the size of the resulting string */
1915 _PyString_Resize(&result
, output
- output_start
);
1920 /* What follows is used for implementing replace(). Perry Stoll. */
1925 strstr replacement for arbitrary blocks of memory.
1927 Locates the first occurrence in the memory pointed to by MEM of the
1928 contents of memory pointed to by PAT. Returns the index into MEM if
1929 found, or -1 if not found. If len of PAT is greater than length of
1930 MEM, the function returns -1.
1933 mymemfind(const char *mem
, int len
, const char *pat
, int pat_len
)
1937 /* pattern can not occur in the last pat_len-1 chars */
1940 for (ii
= 0; ii
<= len
; ii
++) {
1941 if (mem
[ii
] == pat
[0] && memcmp(&mem
[ii
], pat
, pat_len
) == 0) {
1951 Return the number of distinct times PAT is found in MEM.
1952 meaning mem=1111 and pat==11 returns 2.
1953 mem=11111 and pat==11 also return 2.
1956 mymemcnt(const char *mem
, int len
, const char *pat
, int pat_len
)
1958 register int offset
= 0;
1962 offset
= mymemfind(mem
, len
, pat
, pat_len
);
1965 mem
+= offset
+ pat_len
;
1966 len
-= offset
+ pat_len
;
1975 Return a string in which all occurrences of PAT in memory STR are
1978 If length of PAT is less than length of STR or there are no occurrences
1979 of PAT in STR, then the original string is returned. Otherwise, a new
1980 string is allocated here and returned.
1982 on return, out_len is:
1983 the length of output string, or
1984 -1 if the input string is returned, or
1985 unchanged if an error occurs (no memory).
1988 the new string allocated locally, or
1989 NULL if an error occurred.
1992 mymemreplace(const char *str
, int len
, /* input string */
1993 const char *pat
, int pat_len
, /* pattern string to find */
1994 const char *sub
, int sub_len
, /* substitution string */
1995 int count
, /* number of replacements */
2000 int nfound
, offset
, new_len
;
2002 if (len
== 0 || pat_len
> len
)
2005 /* find length of output string */
2006 nfound
= mymemcnt(str
, len
, pat
, pat_len
);
2009 else if (nfound
> count
)
2014 new_len
= len
+ nfound
*(sub_len
- pat_len
);
2016 /* Have to allocate something for the caller to free(). */
2017 out_s
= (char *)PyMem_MALLOC(1);
2023 assert(new_len
> 0);
2024 new_s
= (char *)PyMem_MALLOC(new_len
);
2029 for (; count
> 0 && len
> 0; --count
) {
2030 /* find index of next instance of pattern */
2031 offset
= mymemfind(str
, len
, pat
, pat_len
);
2035 /* copy non matching part of input string */
2036 memcpy(new_s
, str
, offset
);
2037 str
+= offset
+ pat_len
;
2038 len
-= offset
+ pat_len
;
2040 /* copy substitute into the output string */
2042 memcpy(new_s
, sub
, sub_len
);
2045 /* copy any remaining values into output string */
2047 memcpy(new_s
, str
, len
);
2054 return (char *)str
; /* cast away const */
2058 static char replace__doc__
[] =
2059 "S.replace (old, new[, maxsplit]) -> string\n\
2061 Return a copy of string S with all occurrences of substring\n\
2062 old replaced by new. If the optional argument maxsplit is\n\
2063 given, only the first maxsplit occurrences are replaced.";
2066 string_replace(PyStringObject
*self
, PyObject
*args
)
2068 const char *str
= PyString_AS_STRING(self
), *sub
, *repl
;
2070 const int len
= PyString_GET_SIZE(self
);
2071 int sub_len
, repl_len
, out_len
;
2074 PyObject
*subobj
, *replobj
;
2076 if (!PyArg_ParseTuple(args
, "OO|i:replace",
2077 &subobj
, &replobj
, &count
))
2080 if (PyString_Check(subobj
)) {
2081 sub
= PyString_AS_STRING(subobj
);
2082 sub_len
= PyString_GET_SIZE(subobj
);
2084 #ifdef Py_USING_UNICODE
2085 else if (PyUnicode_Check(subobj
))
2086 return PyUnicode_Replace((PyObject
*)self
,
2087 subobj
, replobj
, count
);
2089 else if (PyObject_AsCharBuffer(subobj
, &sub
, &sub_len
))
2092 if (PyString_Check(replobj
)) {
2093 repl
= PyString_AS_STRING(replobj
);
2094 repl_len
= PyString_GET_SIZE(replobj
);
2096 #ifdef Py_USING_UNICODE
2097 else if (PyUnicode_Check(replobj
))
2098 return PyUnicode_Replace((PyObject
*)self
,
2099 subobj
, replobj
, count
);
2101 else if (PyObject_AsCharBuffer(replobj
, &repl
, &repl_len
))
2105 PyErr_SetString(PyExc_ValueError
, "empty pattern string");
2108 new_s
= mymemreplace(str
,len
,sub
,sub_len
,repl
,repl_len
,count
,&out_len
);
2109 if (new_s
== NULL
) {
2113 if (out_len
== -1) {
2114 if (PyString_CheckExact(self
)) {
2115 /* we're returning another reference to self */
2116 new = (PyObject
*)self
;
2120 new = PyString_FromStringAndSize(str
, len
);
2126 new = PyString_FromStringAndSize(new_s
, out_len
);
2133 static char startswith__doc__
[] =
2134 "S.startswith(prefix[, start[, end]]) -> int\n\
2136 Return 1 if S starts with the specified prefix, otherwise return 0. With\n\
2137 optional start, test S beginning at that position. With optional end, stop\n\
2138 comparing S at that position.";
2141 string_startswith(PyStringObject
*self
, PyObject
*args
)
2143 const char* str
= PyString_AS_STRING(self
);
2144 int len
= PyString_GET_SIZE(self
);
2151 if (!PyArg_ParseTuple(args
, "O|O&O&:startswith", &subobj
,
2152 _PyEval_SliceIndex
, &start
, _PyEval_SliceIndex
, &end
))
2154 if (PyString_Check(subobj
)) {
2155 prefix
= PyString_AS_STRING(subobj
);
2156 plen
= PyString_GET_SIZE(subobj
);
2158 #ifdef Py_USING_UNICODE
2159 else if (PyUnicode_Check(subobj
)) {
2161 rc
= PyUnicode_Tailmatch((PyObject
*)self
,
2162 subobj
, start
, end
, -1);
2166 return PyInt_FromLong((long) rc
);
2169 else if (PyObject_AsCharBuffer(subobj
, &prefix
, &plen
))
2172 /* adopt Java semantics for index out of range. it is legal for
2173 * offset to be == plen, but this only returns true if prefix is
2176 if (start
< 0 || start
+plen
> len
)
2177 return PyInt_FromLong(0);
2179 if (!memcmp(str
+start
, prefix
, plen
)) {
2180 /* did the match end after the specified end? */
2182 return PyInt_FromLong(1);
2183 else if (end
- start
< plen
)
2184 return PyInt_FromLong(0);
2186 return PyInt_FromLong(1);
2188 else return PyInt_FromLong(0);
2192 static char endswith__doc__
[] =
2193 "S.endswith(suffix[, start[, end]]) -> int\n\
2195 Return 1 if S ends with the specified suffix, otherwise return 0. With\n\
2196 optional start, test S beginning at that position. With optional end, stop\n\
2197 comparing S at that position.";
2200 string_endswith(PyStringObject
*self
, PyObject
*args
)
2202 const char* str
= PyString_AS_STRING(self
);
2203 int len
= PyString_GET_SIZE(self
);
2211 if (!PyArg_ParseTuple(args
, "O|O&O&:endswith", &subobj
,
2212 _PyEval_SliceIndex
, &start
, _PyEval_SliceIndex
, &end
))
2214 if (PyString_Check(subobj
)) {
2215 suffix
= PyString_AS_STRING(subobj
);
2216 slen
= PyString_GET_SIZE(subobj
);
2218 #ifdef Py_USING_UNICODE
2219 else if (PyUnicode_Check(subobj
)) {
2221 rc
= PyUnicode_Tailmatch((PyObject
*)self
,
2222 subobj
, start
, end
, +1);
2226 return PyInt_FromLong((long) rc
);
2229 else if (PyObject_AsCharBuffer(subobj
, &suffix
, &slen
))
2232 if (start
< 0 || start
> len
|| slen
> len
)
2233 return PyInt_FromLong(0);
2235 upper
= (end
>= 0 && end
<= len
) ? end
: len
;
2236 lower
= (upper
- slen
) > start
? (upper
- slen
) : start
;
2238 if (upper
-lower
>= slen
&& !memcmp(str
+lower
, suffix
, slen
))
2239 return PyInt_FromLong(1);
2240 else return PyInt_FromLong(0);
2244 static char encode__doc__
[] =
2245 "S.encode([encoding[,errors]]) -> object\n\
2247 Encodes S using the codec registered for encoding. encoding defaults\n\
2248 to the default encoding. errors may be given to set a different error\n\
2249 handling scheme. Default is 'strict' meaning that encoding errors raise\n\
2250 a ValueError. Other possible values are 'ignore' and 'replace'.";
2253 string_encode(PyStringObject
*self
, PyObject
*args
)
2255 char *encoding
= NULL
;
2256 char *errors
= NULL
;
2257 if (!PyArg_ParseTuple(args
, "|ss:encode", &encoding
, &errors
))
2259 return PyString_AsEncodedObject((PyObject
*)self
, encoding
, errors
);
2263 static char decode__doc__
[] =
2264 "S.decode([encoding[,errors]]) -> object\n\
2266 Decodes S using the codec registered for encoding. encoding defaults\n\
2267 to the default encoding. errors may be given to set a different error\n\
2268 handling scheme. Default is 'strict' meaning that encoding errors raise\n\
2269 a ValueError. Other possible values are 'ignore' and 'replace'.";
2272 string_decode(PyStringObject
*self
, PyObject
*args
)
2274 char *encoding
= NULL
;
2275 char *errors
= NULL
;
2276 if (!PyArg_ParseTuple(args
, "|ss:decode", &encoding
, &errors
))
2278 return PyString_AsDecodedObject((PyObject
*)self
, encoding
, errors
);
2282 static char expandtabs__doc__
[] =
2283 "S.expandtabs([tabsize]) -> string\n\
2285 Return a copy of S where all tab characters are expanded using spaces.\n\
2286 If tabsize is not given, a tab size of 8 characters is assumed.";
2289 string_expandtabs(PyStringObject
*self
, PyObject
*args
)
2297 if (!PyArg_ParseTuple(args
, "|i:expandtabs", &tabsize
))
2300 /* First pass: determine size of output string */
2302 e
= PyString_AS_STRING(self
) + PyString_GET_SIZE(self
);
2303 for (p
= PyString_AS_STRING(self
); p
< e
; p
++)
2306 j
+= tabsize
- (j
% tabsize
);
2310 if (*p
== '\n' || *p
== '\r') {
2316 /* Second pass: create output string and fill it */
2317 u
= PyString_FromStringAndSize(NULL
, i
+ j
);
2322 q
= PyString_AS_STRING(u
);
2324 for (p
= PyString_AS_STRING(self
); p
< e
; p
++)
2327 i
= tabsize
- (j
% tabsize
);
2336 if (*p
== '\n' || *p
== '\r')
2344 pad(PyStringObject
*self
, int left
, int right
, char fill
)
2353 if (left
== 0 && right
== 0 && PyString_CheckExact(self
)) {
2355 return (PyObject
*)self
;
2358 u
= PyString_FromStringAndSize(NULL
,
2359 left
+ PyString_GET_SIZE(self
) + right
);
2362 memset(PyString_AS_STRING(u
), fill
, left
);
2363 memcpy(PyString_AS_STRING(u
) + left
,
2364 PyString_AS_STRING(self
),
2365 PyString_GET_SIZE(self
));
2367 memset(PyString_AS_STRING(u
) + left
+ PyString_GET_SIZE(self
),
2374 static char ljust__doc__
[] =
2375 "S.ljust(width) -> string\n"
2377 "Return S left justified in a string of length width. Padding is\n"
2378 "done using spaces.";
2381 string_ljust(PyStringObject
*self
, PyObject
*args
)
2384 if (!PyArg_ParseTuple(args
, "i:ljust", &width
))
2387 if (PyString_GET_SIZE(self
) >= width
&& PyString_CheckExact(self
)) {
2389 return (PyObject
*) self
;
2392 return pad(self
, 0, width
- PyString_GET_SIZE(self
), ' ');
2396 static char rjust__doc__
[] =
2397 "S.rjust(width) -> string\n"
2399 "Return S right justified in a string of length width. Padding is\n"
2400 "done using spaces.";
2403 string_rjust(PyStringObject
*self
, PyObject
*args
)
2406 if (!PyArg_ParseTuple(args
, "i:rjust", &width
))
2409 if (PyString_GET_SIZE(self
) >= width
&& PyString_CheckExact(self
)) {
2411 return (PyObject
*) self
;
2414 return pad(self
, width
- PyString_GET_SIZE(self
), 0, ' ');
2418 static char center__doc__
[] =
2419 "S.center(width) -> string\n"
2421 "Return S centered in a string of length width. Padding is done\n"
2425 string_center(PyStringObject
*self
, PyObject
*args
)
2430 if (!PyArg_ParseTuple(args
, "i:center", &width
))
2433 if (PyString_GET_SIZE(self
) >= width
&& PyString_CheckExact(self
)) {
2435 return (PyObject
*) self
;
2438 marg
= width
- PyString_GET_SIZE(self
);
2439 left
= marg
/ 2 + (marg
& width
& 1);
2441 return pad(self
, left
, marg
- left
, ' ');
2444 static char zfill__doc__
[] =
2445 "S.zfill(width) -> string\n"
2447 "Pad a numeric string S with zeros on the left, to fill a field\n"
2448 "of the specified width. The string S is never truncated.";
2451 string_zfill(PyStringObject
*self
, PyObject
*args
)
2458 if (!PyArg_ParseTuple(args
, "i:zfill", &width
))
2461 if (PyString_GET_SIZE(self
) >= width
) {
2462 if (PyString_CheckExact(self
)) {
2464 return (PyObject
*) self
;
2467 return PyString_FromStringAndSize(
2468 PyString_AS_STRING(self
),
2469 PyString_GET_SIZE(self
)
2473 fill
= width
- PyString_GET_SIZE(self
);
2475 s
= pad(self
, fill
, 0, '0');
2480 p
= PyString_AS_STRING(s
);
2481 if (p
[fill
] == '+' || p
[fill
] == '-') {
2482 /* move sign to beginning of string */
2487 return (PyObject
*) s
;
2490 static char isspace__doc__
[] =
2491 "S.isspace() -> int\n"
2493 "Return 1 if there are only whitespace characters in S,\n"
2497 string_isspace(PyStringObject
*self
)
2499 register const unsigned char *p
2500 = (unsigned char *) PyString_AS_STRING(self
);
2501 register const unsigned char *e
;
2503 /* Shortcut for single character strings */
2504 if (PyString_GET_SIZE(self
) == 1 &&
2506 return PyInt_FromLong(1);
2508 /* Special case for empty strings */
2509 if (PyString_GET_SIZE(self
) == 0)
2510 return PyInt_FromLong(0);
2512 e
= p
+ PyString_GET_SIZE(self
);
2513 for (; p
< e
; p
++) {
2515 return PyInt_FromLong(0);
2517 return PyInt_FromLong(1);
2521 static char isalpha__doc__
[] =
2522 "S.isalpha() -> int\n\
2524 Return 1 if all characters in S are alphabetic\n\
2525 and there is at least one character in S, 0 otherwise.";
2528 string_isalpha(PyStringObject
*self
)
2530 register const unsigned char *p
2531 = (unsigned char *) PyString_AS_STRING(self
);
2532 register const unsigned char *e
;
2534 /* Shortcut for single character strings */
2535 if (PyString_GET_SIZE(self
) == 1 &&
2537 return PyInt_FromLong(1);
2539 /* Special case for empty strings */
2540 if (PyString_GET_SIZE(self
) == 0)
2541 return PyInt_FromLong(0);
2543 e
= p
+ PyString_GET_SIZE(self
);
2544 for (; p
< e
; p
++) {
2546 return PyInt_FromLong(0);
2548 return PyInt_FromLong(1);
2552 static char isalnum__doc__
[] =
2553 "S.isalnum() -> int\n\
2555 Return 1 if all characters in S are alphanumeric\n\
2556 and there is at least one character in S, 0 otherwise.";
2559 string_isalnum(PyStringObject
*self
)
2561 register const unsigned char *p
2562 = (unsigned char *) PyString_AS_STRING(self
);
2563 register const unsigned char *e
;
2565 /* Shortcut for single character strings */
2566 if (PyString_GET_SIZE(self
) == 1 &&
2568 return PyInt_FromLong(1);
2570 /* Special case for empty strings */
2571 if (PyString_GET_SIZE(self
) == 0)
2572 return PyInt_FromLong(0);
2574 e
= p
+ PyString_GET_SIZE(self
);
2575 for (; p
< e
; p
++) {
2577 return PyInt_FromLong(0);
2579 return PyInt_FromLong(1);
2583 static char isdigit__doc__
[] =
2584 "S.isdigit() -> int\n\
2586 Return 1 if there are only digit characters in S,\n\
2590 string_isdigit(PyStringObject
*self
)
2592 register const unsigned char *p
2593 = (unsigned char *) PyString_AS_STRING(self
);
2594 register const unsigned char *e
;
2596 /* Shortcut for single character strings */
2597 if (PyString_GET_SIZE(self
) == 1 &&
2599 return PyInt_FromLong(1);
2601 /* Special case for empty strings */
2602 if (PyString_GET_SIZE(self
) == 0)
2603 return PyInt_FromLong(0);
2605 e
= p
+ PyString_GET_SIZE(self
);
2606 for (; p
< e
; p
++) {
2608 return PyInt_FromLong(0);
2610 return PyInt_FromLong(1);
2614 static char islower__doc__
[] =
2615 "S.islower() -> int\n\
2617 Return 1 if all cased characters in S are lowercase and there is\n\
2618 at least one cased character in S, 0 otherwise.";
2621 string_islower(PyStringObject
*self
)
2623 register const unsigned char *p
2624 = (unsigned char *) PyString_AS_STRING(self
);
2625 register const unsigned char *e
;
2628 /* Shortcut for single character strings */
2629 if (PyString_GET_SIZE(self
) == 1)
2630 return PyInt_FromLong(islower(*p
) != 0);
2632 /* Special case for empty strings */
2633 if (PyString_GET_SIZE(self
) == 0)
2634 return PyInt_FromLong(0);
2636 e
= p
+ PyString_GET_SIZE(self
);
2638 for (; p
< e
; p
++) {
2640 return PyInt_FromLong(0);
2641 else if (!cased
&& islower(*p
))
2644 return PyInt_FromLong(cased
);
2648 static char isupper__doc__
[] =
2649 "S.isupper() -> int\n\
2651 Return 1 if all cased characters in S are uppercase and there is\n\
2652 at least one cased character in S, 0 otherwise.";
2655 string_isupper(PyStringObject
*self
)
2657 register const unsigned char *p
2658 = (unsigned char *) PyString_AS_STRING(self
);
2659 register const unsigned char *e
;
2662 /* Shortcut for single character strings */
2663 if (PyString_GET_SIZE(self
) == 1)
2664 return PyInt_FromLong(isupper(*p
) != 0);
2666 /* Special case for empty strings */
2667 if (PyString_GET_SIZE(self
) == 0)
2668 return PyInt_FromLong(0);
2670 e
= p
+ PyString_GET_SIZE(self
);
2672 for (; p
< e
; p
++) {
2674 return PyInt_FromLong(0);
2675 else if (!cased
&& isupper(*p
))
2678 return PyInt_FromLong(cased
);
2682 static char istitle__doc__
[] =
2683 "S.istitle() -> int\n\
2685 Return 1 if S is a titlecased string, i.e. uppercase characters\n\
2686 may only follow uncased characters and lowercase characters only cased\n\
2687 ones. Return 0 otherwise.";
2690 string_istitle(PyStringObject
*self
, PyObject
*uncased
)
2692 register const unsigned char *p
2693 = (unsigned char *) PyString_AS_STRING(self
);
2694 register const unsigned char *e
;
2695 int cased
, previous_is_cased
;
2697 /* Shortcut for single character strings */
2698 if (PyString_GET_SIZE(self
) == 1)
2699 return PyInt_FromLong(isupper(*p
) != 0);
2701 /* Special case for empty strings */
2702 if (PyString_GET_SIZE(self
) == 0)
2703 return PyInt_FromLong(0);
2705 e
= p
+ PyString_GET_SIZE(self
);
2707 previous_is_cased
= 0;
2708 for (; p
< e
; p
++) {
2709 register const unsigned char ch
= *p
;
2712 if (previous_is_cased
)
2713 return PyInt_FromLong(0);
2714 previous_is_cased
= 1;
2717 else if (islower(ch
)) {
2718 if (!previous_is_cased
)
2719 return PyInt_FromLong(0);
2720 previous_is_cased
= 1;
2724 previous_is_cased
= 0;
2726 return PyInt_FromLong(cased
);
2730 static char splitlines__doc__
[] =
2731 "S.splitlines([keepends]) -> list of strings\n\
2733 Return a list of the lines in S, breaking at line boundaries.\n\
2734 Line breaks are not included in the resulting list unless keepends\n\
2735 is given and true.";
2737 #define SPLIT_APPEND(data, left, right) \
2738 str = PyString_FromStringAndSize(data + left, right - left); \
2741 if (PyList_Append(list, str)) { \
2749 string_splitlines(PyStringObject
*self
, PyObject
*args
)
2759 if (!PyArg_ParseTuple(args
, "|i:splitlines", &keepends
))
2762 data
= PyString_AS_STRING(self
);
2763 len
= PyString_GET_SIZE(self
);
2765 list
= PyList_New(0);
2769 for (i
= j
= 0; i
< len
; ) {
2772 /* Find a line and append it */
2773 while (i
< len
&& data
[i
] != '\n' && data
[i
] != '\r')
2776 /* Skip the line break reading CRLF as one line break */
2779 if (data
[i
] == '\r' && i
+ 1 < len
&&
2787 SPLIT_APPEND(data
, j
, eol
);
2791 SPLIT_APPEND(data
, j
, len
);
2805 string_methods
[] = {
2806 /* Counterparts of the obsolete stropmodule functions; except
2807 string.maketrans(). */
2808 {"join", (PyCFunction
)string_join
, METH_O
, join__doc__
},
2809 {"split", (PyCFunction
)string_split
, METH_VARARGS
, split__doc__
},
2810 {"lower", (PyCFunction
)string_lower
, METH_NOARGS
, lower__doc__
},
2811 {"upper", (PyCFunction
)string_upper
, METH_NOARGS
, upper__doc__
},
2812 {"islower", (PyCFunction
)string_islower
, METH_NOARGS
, islower__doc__
},
2813 {"isupper", (PyCFunction
)string_isupper
, METH_NOARGS
, isupper__doc__
},
2814 {"isspace", (PyCFunction
)string_isspace
, METH_NOARGS
, isspace__doc__
},
2815 {"isdigit", (PyCFunction
)string_isdigit
, METH_NOARGS
, isdigit__doc__
},
2816 {"istitle", (PyCFunction
)string_istitle
, METH_NOARGS
, istitle__doc__
},
2817 {"isalpha", (PyCFunction
)string_isalpha
, METH_NOARGS
, isalpha__doc__
},
2818 {"isalnum", (PyCFunction
)string_isalnum
, METH_NOARGS
, isalnum__doc__
},
2819 {"capitalize", (PyCFunction
)string_capitalize
, METH_NOARGS
,
2821 {"count", (PyCFunction
)string_count
, METH_VARARGS
, count__doc__
},
2822 {"endswith", (PyCFunction
)string_endswith
, METH_VARARGS
,
2824 {"find", (PyCFunction
)string_find
, METH_VARARGS
, find__doc__
},
2825 {"index", (PyCFunction
)string_index
, METH_VARARGS
, index__doc__
},
2826 {"lstrip", (PyCFunction
)string_lstrip
, METH_VARARGS
, lstrip__doc__
},
2827 {"replace", (PyCFunction
)string_replace
, METH_VARARGS
, replace__doc__
},
2828 {"rfind", (PyCFunction
)string_rfind
, METH_VARARGS
, rfind__doc__
},
2829 {"rindex", (PyCFunction
)string_rindex
, METH_VARARGS
, rindex__doc__
},
2830 {"rstrip", (PyCFunction
)string_rstrip
, METH_VARARGS
, rstrip__doc__
},
2831 {"startswith", (PyCFunction
)string_startswith
, METH_VARARGS
,
2833 {"strip", (PyCFunction
)string_strip
, METH_VARARGS
, strip__doc__
},
2834 {"swapcase", (PyCFunction
)string_swapcase
, METH_NOARGS
,
2836 {"translate", (PyCFunction
)string_translate
, METH_VARARGS
,
2838 {"title", (PyCFunction
)string_title
, METH_NOARGS
, title__doc__
},
2839 {"ljust", (PyCFunction
)string_ljust
, METH_VARARGS
, ljust__doc__
},
2840 {"rjust", (PyCFunction
)string_rjust
, METH_VARARGS
, rjust__doc__
},
2841 {"center", (PyCFunction
)string_center
, METH_VARARGS
, center__doc__
},
2842 {"zfill", (PyCFunction
)string_zfill
, METH_VARARGS
, zfill__doc__
},
2843 {"encode", (PyCFunction
)string_encode
, METH_VARARGS
, encode__doc__
},
2844 {"decode", (PyCFunction
)string_decode
, METH_VARARGS
, decode__doc__
},
2845 {"expandtabs", (PyCFunction
)string_expandtabs
, METH_VARARGS
,
2847 {"splitlines", (PyCFunction
)string_splitlines
, METH_VARARGS
,
2849 {NULL
, NULL
} /* sentinel */
2852 staticforward PyObject
*
2853 str_subtype_new(PyTypeObject
*type
, PyObject
*args
, PyObject
*kwds
);
2856 string_new(PyTypeObject
*type
, PyObject
*args
, PyObject
*kwds
)
2859 static char *kwlist
[] = {"object", 0};
2861 if (type
!= &PyString_Type
)
2862 return str_subtype_new(type
, args
, kwds
);
2863 if (!PyArg_ParseTupleAndKeywords(args
, kwds
, "|O:str", kwlist
, &x
))
2866 return PyString_FromString("");
2867 return PyObject_Str(x
);
2871 str_subtype_new(PyTypeObject
*type
, PyObject
*args
, PyObject
*kwds
)
2873 PyObject
*tmp
, *pnew
;
2876 assert(PyType_IsSubtype(type
, &PyString_Type
));
2877 tmp
= string_new(&PyString_Type
, args
, kwds
);
2880 assert(PyString_CheckExact(tmp
));
2881 n
= PyString_GET_SIZE(tmp
);
2882 pnew
= type
->tp_alloc(type
, n
);
2884 memcpy(PyString_AS_STRING(pnew
), PyString_AS_STRING(tmp
), n
+1);
2886 ((PyStringObject
*)pnew
)->ob_shash
=
2887 ((PyStringObject
*)tmp
)->ob_shash
;
2889 #ifdef INTERN_STRINGS
2890 ((PyStringObject
*)pnew
)->ob_sinterned
=
2891 ((PyStringObject
*)tmp
)->ob_sinterned
;
2898 static char string_doc
[] =
2899 "str(object) -> string\n\
2901 Return a nice string representation of the object.\n\
2902 If the argument is a string, the return value is the same object.";
2904 PyTypeObject PyString_Type
= {
2905 PyObject_HEAD_INIT(&PyType_Type
)
2908 sizeof(PyStringObject
),
2910 (destructor
)string_dealloc
, /* tp_dealloc */
2911 (printfunc
)string_print
, /* tp_print */
2915 (reprfunc
)string_repr
, /* tp_repr */
2916 0, /* tp_as_number */
2917 &string_as_sequence
, /* tp_as_sequence */
2918 0, /* tp_as_mapping */
2919 (hashfunc
)string_hash
, /* tp_hash */
2921 (reprfunc
)string_str
, /* tp_str */
2922 PyObject_GenericGetAttr
, /* tp_getattro */
2923 0, /* tp_setattro */
2924 &string_as_buffer
, /* tp_as_buffer */
2925 Py_TPFLAGS_DEFAULT
| Py_TPFLAGS_BASETYPE
, /* tp_flags */
2926 string_doc
, /* tp_doc */
2927 0, /* tp_traverse */
2929 (richcmpfunc
)string_richcompare
, /* tp_richcompare */
2930 0, /* tp_weaklistoffset */
2932 0, /* tp_iternext */
2933 string_methods
, /* tp_methods */
2938 0, /* tp_descr_get */
2939 0, /* tp_descr_set */
2940 0, /* tp_dictoffset */
2943 string_new
, /* tp_new */
2944 _PyObject_Del
, /* tp_free */
2948 PyString_Concat(register PyObject
**pv
, register PyObject
*w
)
2950 register PyObject
*v
;
2953 if (w
== NULL
|| !PyString_Check(*pv
)) {
2958 v
= string_concat((PyStringObject
*) *pv
, w
);
2964 PyString_ConcatAndDel(register PyObject
**pv
, register PyObject
*w
)
2966 PyString_Concat(pv
, w
);
2971 /* The following function breaks the notion that strings are immutable:
2972 it changes the size of a string. We get away with this only if there
2973 is only one module referencing the object. You can also think of it
2974 as creating a new string object and destroying the old one, only
2975 more efficiently. In any case, don't use this if the string may
2976 already be known to some other part of the code...
2977 Note that if there's not enough memory to resize the string, the original
2978 string object at *pv is deallocated, *pv is set to NULL, an "out of
2979 memory" exception is set, and -1 is returned. Else (on success) 0 is
2980 returned, and the value in *pv may or may not be the same as on input.
2981 As always, an extra byte is allocated for a trailing \0 byte (newsize
2982 does *not* include that), and a trailing \0 byte is stored.
2986 _PyString_Resize(PyObject
**pv
, int newsize
)
2988 register PyObject
*v
;
2989 register PyStringObject
*sv
;
2991 if (!PyString_Check(v
) || v
->ob_refcnt
!= 1 || newsize
< 0) {
2994 PyErr_BadInternalCall();
2997 /* XXX UNREF/NEWREF interface should be more symmetrical */
3001 _Py_ForgetReference(v
);
3003 PyObject_REALLOC((char *)v
,
3004 sizeof(PyStringObject
) + newsize
* sizeof(char));
3010 _Py_NewReference(*pv
);
3011 sv
= (PyStringObject
*) *pv
;
3012 sv
->ob_size
= newsize
;
3013 sv
->ob_sval
[newsize
] = '\0';
3017 /* Helpers for formatstring */
3020 getnextarg(PyObject
*args
, int arglen
, int *p_argidx
)
3022 int argidx
= *p_argidx
;
3023 if (argidx
< arglen
) {
3028 return PyTuple_GetItem(args
, argidx
);
3030 PyErr_SetString(PyExc_TypeError
,
3031 "not enough arguments for format string");
3042 #define F_LJUST (1<<0)
3043 #define F_SIGN (1<<1)
3044 #define F_BLANK (1<<2)
3045 #define F_ALT (1<<3)
3046 #define F_ZERO (1<<4)
3049 formatfloat(char *buf
, size_t buflen
, int flags
,
3050 int prec
, int type
, PyObject
*v
)
3052 /* fmt = '%#.' + `prec` + `type`
3053 worst case length = 3 + 10 (len of INT_MAX) + 1 = 14 (use 20)*/
3056 if (!PyArg_Parse(v
, "d;float argument required", &x
))
3060 if (type
== 'f' && fabs(x
)/1e25
>= 1e25
)
3062 PyOS_snprintf(fmt
, sizeof(fmt
), "%%%s.%d%c",
3063 (flags
&F_ALT
) ? "#" : "",
3065 /* worst case length calc to ensure no buffer overrun:
3067 buf = '-' + [0-9]*prec + '.' + 'e+' + (longest exp
3068 for any double rep.)
3069 len = 1 + prec + 1 + 2 + 5 = 9 + prec
3070 If prec=0 the effective precision is 1 (the leading digit is
3071 always given), therefore increase by one to 10+prec. */
3072 if (buflen
<= (size_t)10 + (size_t)prec
) {
3073 PyErr_SetString(PyExc_OverflowError
,
3074 "formatted float is too long (precision too large?)");
3077 PyOS_snprintf(buf
, buflen
, fmt
, x
);
3081 /* _PyString_FormatLong emulates the format codes d, u, o, x and X, and
3082 * the F_ALT flag, for Python's long (unbounded) ints. It's not used for
3083 * Python's regular ints.
3084 * Return value: a new PyString*, or NULL if error.
3085 * . *pbuf is set to point into it,
3086 * *plen set to the # of chars following that.
3087 * Caller must decref it when done using pbuf.
3088 * The string starting at *pbuf is of the form
3089 * "-"? ("0x" | "0X")? digit+
3090 * "0x"/"0X" are present only for x and X conversions, with F_ALT
3091 * set in flags. The case of hex digits will be correct,
3092 * There will be at least prec digits, zero-filled on the left if
3093 * necessary to get that many.
3094 * val object to be converted
3095 * flags bitmask of format flags; only F_ALT is looked at
3096 * prec minimum number of digits; 0-fill on left if needed
3097 * type a character in [duoxX]; u acts the same as d
3099 * CAUTION: o, x and X conversions on regular ints can never
3100 * produce a '-' sign, but can for Python's unbounded ints.
3103 _PyString_FormatLong(PyObject
*val
, int flags
, int prec
, int type
,
3104 char **pbuf
, int *plen
)
3106 PyObject
*result
= NULL
;
3109 int sign
; /* 1 if '-', else 0 */
3110 int len
; /* number of characters */
3111 int numdigits
; /* len == numnondigits + numdigits */
3112 int numnondigits
= 0;
3117 result
= val
->ob_type
->tp_str(val
);
3120 result
= val
->ob_type
->tp_as_number
->nb_oct(val
);
3125 result
= val
->ob_type
->tp_as_number
->nb_hex(val
);
3128 assert(!"'type' not in [duoxX]");
3133 /* To modify the string in-place, there can only be one reference. */
3134 if (result
->ob_refcnt
!= 1) {
3135 PyErr_BadInternalCall();
3138 buf
= PyString_AsString(result
);
3139 len
= PyString_Size(result
);
3140 if (buf
[len
-1] == 'L') {
3144 sign
= buf
[0] == '-';
3145 numnondigits
+= sign
;
3146 numdigits
= len
- numnondigits
;
3147 assert(numdigits
> 0);
3149 /* Get rid of base marker unless F_ALT */
3150 if ((flags
& F_ALT
) == 0) {
3151 /* Need to skip 0x, 0X or 0. */
3155 assert(buf
[sign
] == '0');
3156 /* If 0 is only digit, leave it alone. */
3157 if (numdigits
> 1) {
3164 assert(buf
[sign
] == '0');
3165 assert(buf
[sign
+ 1] == 'x');
3176 assert(len
== numnondigits
+ numdigits
);
3177 assert(numdigits
> 0);
3180 /* Fill with leading zeroes to meet minimum width. */
3181 if (prec
> numdigits
) {
3182 PyObject
*r1
= PyString_FromStringAndSize(NULL
,
3183 numnondigits
+ prec
);
3189 b1
= PyString_AS_STRING(r1
);
3190 for (i
= 0; i
< numnondigits
; ++i
)
3192 for (i
= 0; i
< prec
- numdigits
; i
++)
3194 for (i
= 0; i
< numdigits
; i
++)
3199 buf
= PyString_AS_STRING(result
);
3200 len
= numnondigits
+ prec
;
3203 /* Fix up case for hex conversions. */
3206 /* Need to convert all upper case letters to lower case. */
3207 for (i
= 0; i
< len
; i
++)
3208 if (buf
[i
] >= 'A' && buf
[i
] <= 'F')
3212 /* Need to convert 0x to 0X (and -0x to -0X). */
3213 if (buf
[sign
+ 1] == 'x')
3214 buf
[sign
+ 1] = 'X';
3223 formatint(char *buf
, size_t buflen
, int flags
,
3224 int prec
, int type
, PyObject
*v
)
3226 /* fmt = '%#.' + `prec` + 'l' + `type`
3227 worst case length = 3 + 19 (worst len of INT_MAX on 64-bit machine)
3229 char fmt
[64]; /* plenty big enough! */
3231 if (!PyArg_Parse(v
, "l;int argument required", &x
))
3235 PyOS_snprintf(fmt
, sizeof(fmt
), "%%%s.%dl%c",
3236 (flags
&F_ALT
) ? "#" : "",
3238 /* buf = '+'/'-'/'0'/'0x' + '[0-9]'*max(prec, len(x in octal))
3239 worst case buf = '0x' + [0-9]*prec, where prec >= 11 */
3240 if (buflen
<= 13 || buflen
<= (size_t)2 + (size_t)prec
) {
3241 PyErr_SetString(PyExc_OverflowError
,
3242 "formatted integer is too long (precision too large?)");
3245 PyOS_snprintf(buf
, buflen
, fmt
, x
);
3246 /* When converting 0 under %#x or %#X, C leaves off the base marker,
3247 * but we want it (for consistency with other %#x conversions, and
3248 * for consistency with Python's hex() function).
3249 * BUG 28-Apr-2001 tim: At least two platform Cs (Metrowerks &
3250 * Compaq Tru64) violate the std by converting 0 w/ leading 0x anyway.
3251 * So add it only if the platform didn't already.
3255 (type
== 'x' || type
== 'X') &&
3256 buf
[1] != (char)type
) /* this last always true under std C */
3258 memmove(buf
+2, buf
, strlen(buf
) + 1);
3260 buf
[1] = (char)type
;
3266 formatchar(char *buf
, size_t buflen
, PyObject
*v
)
3268 /* presume that the buffer is at least 2 characters long */
3269 if (PyString_Check(v
)) {
3270 if (!PyArg_Parse(v
, "c;%c requires int or char", &buf
[0]))
3274 if (!PyArg_Parse(v
, "b;%c requires int or char", &buf
[0]))
3282 /* fmt%(v1,v2,...) is roughly equivalent to sprintf(fmt, v1, v2, ...)
3284 FORMATBUFLEN is the length of the buffer in which the floats, ints, &
3285 chars are formatted. XXX This is a magic number. Each formatting
3286 routine does bounds checking to ensure no overflow, but a better
3287 solution may be to malloc a buffer of appropriate size for each
3288 format. For now, the current solution is sufficient.
3290 #define FORMATBUFLEN (size_t)120
3293 PyString_Format(PyObject
*format
, PyObject
*args
)
3296 int fmtcnt
, rescnt
, reslen
, arglen
, argidx
;
3298 PyObject
*result
, *orig_args
;
3299 #ifdef Py_USING_UNICODE
3302 PyObject
*dict
= NULL
;
3303 if (format
== NULL
|| !PyString_Check(format
) || args
== NULL
) {
3304 PyErr_BadInternalCall();
3308 fmt
= PyString_AS_STRING(format
);
3309 fmtcnt
= PyString_GET_SIZE(format
);
3310 reslen
= rescnt
= fmtcnt
+ 100;
3311 result
= PyString_FromStringAndSize((char *)NULL
, reslen
);
3314 res
= PyString_AsString(result
);
3315 if (PyTuple_Check(args
)) {
3316 arglen
= PyTuple_GET_SIZE(args
);
3323 if (args
->ob_type
->tp_as_mapping
)
3325 while (--fmtcnt
>= 0) {
3328 rescnt
= fmtcnt
+ 100;
3330 if (_PyString_Resize(&result
, reslen
) < 0)
3332 res
= PyString_AS_STRING(result
)
3339 /* Got a format specifier */
3346 PyObject
*temp
= NULL
;
3350 char formatbuf
[FORMATBUFLEN
];
3351 /* For format{float,int,char}() */
3352 #ifdef Py_USING_UNICODE
3353 char *fmt_start
= fmt
;
3354 int argidx_start
= argidx
;
3365 PyErr_SetString(PyExc_TypeError
,
3366 "format requires a mapping");
3372 /* Skip over balanced parentheses */
3373 while (pcount
> 0 && --fmtcnt
>= 0) {
3376 else if (*fmt
== '(')
3380 keylen
= fmt
- keystart
- 1;
3381 if (fmtcnt
< 0 || pcount
> 0) {
3382 PyErr_SetString(PyExc_ValueError
,
3383 "incomplete format key");
3386 key
= PyString_FromStringAndSize(keystart
,
3394 args
= PyObject_GetItem(dict
, key
);
3403 while (--fmtcnt
>= 0) {
3404 switch (c
= *fmt
++) {
3405 case '-': flags
|= F_LJUST
; continue;
3406 case '+': flags
|= F_SIGN
; continue;
3407 case ' ': flags
|= F_BLANK
; continue;
3408 case '#': flags
|= F_ALT
; continue;
3409 case '0': flags
|= F_ZERO
; continue;
3414 v
= getnextarg(args
, arglen
, &argidx
);
3417 if (!PyInt_Check(v
)) {
3418 PyErr_SetString(PyExc_TypeError
,
3422 width
= PyInt_AsLong(v
);
3430 else if (c
>= 0 && isdigit(c
)) {
3432 while (--fmtcnt
>= 0) {
3433 c
= Py_CHARMASK(*fmt
++);
3436 if ((width
*10) / 10 != width
) {
3442 width
= width
*10 + (c
- '0');
3450 v
= getnextarg(args
, arglen
, &argidx
);
3453 if (!PyInt_Check(v
)) {
3459 prec
= PyInt_AsLong(v
);
3465 else if (c
>= 0 && isdigit(c
)) {
3467 while (--fmtcnt
>= 0) {
3468 c
= Py_CHARMASK(*fmt
++);
3471 if ((prec
*10) / 10 != prec
) {
3477 prec
= prec
*10 + (c
- '0');
3482 if (c
== 'h' || c
== 'l' || c
== 'L') {
3488 PyErr_SetString(PyExc_ValueError
,
3489 "incomplete format");
3493 v
= getnextarg(args
, arglen
, &argidx
);
3505 #ifdef Py_USING_UNICODE
3506 if (PyUnicode_Check(v
)) {
3508 argidx
= argidx_start
;
3515 temp
= PyObject_Str(v
);
3517 temp
= PyObject_Repr(v
);
3520 if (!PyString_Check(temp
)) {
3521 PyErr_SetString(PyExc_TypeError
,
3522 "%s argument has non-string str()");
3526 pbuf
= PyString_AS_STRING(temp
);
3527 len
= PyString_GET_SIZE(temp
);
3528 if (prec
>= 0 && len
> prec
)
3539 if (PyLong_Check(v
)) {
3540 temp
= _PyString_FormatLong(v
, flags
,
3541 prec
, c
, &pbuf
, &len
);
3544 /* unbounded ints can always produce
3545 a sign character! */
3550 len
= formatint(pbuf
,
3555 /* only d conversion is signed */
3567 len
= formatfloat(pbuf
, sizeof(formatbuf
),
3577 len
= formatchar(pbuf
, sizeof(formatbuf
), v
);
3582 PyErr_Format(PyExc_ValueError
,
3583 "unsupported format character '%c' (0x%x) "
3586 (int)(fmt
- 1 - PyString_AsString(format
)));
3590 if (*pbuf
== '-' || *pbuf
== '+') {
3594 else if (flags
& F_SIGN
)
3596 else if (flags
& F_BLANK
)
3603 if (rescnt
- (sign
!= 0) < width
) {
3605 rescnt
= width
+ fmtcnt
+ 100;
3609 return PyErr_NoMemory();
3611 if (_PyString_Resize(&result
, reslen
) < 0)
3613 res
= PyString_AS_STRING(result
)
3623 if ((flags
& F_ALT
) && (c
== 'x' || c
== 'X')) {
3624 assert(pbuf
[0] == '0');
3625 assert(pbuf
[1] == c
);
3636 if (width
> len
&& !(flags
& F_LJUST
)) {
3640 } while (--width
> len
);
3645 if ((flags
& F_ALT
) &&
3646 (c
== 'x' || c
== 'X')) {
3647 assert(pbuf
[0] == '0');
3648 assert(pbuf
[1] == c
);
3653 memcpy(res
, pbuf
, len
);
3656 while (--width
>= len
) {
3660 if (dict
&& (argidx
< arglen
) && c
!= '%') {
3661 PyErr_SetString(PyExc_TypeError
,
3662 "not all arguments converted");
3668 if (argidx
< arglen
&& !dict
) {
3669 PyErr_SetString(PyExc_TypeError
,
3670 "not all arguments converted");
3676 _PyString_Resize(&result
, reslen
- rescnt
);
3679 #ifdef Py_USING_UNICODE
3685 /* Fiddle args right (remove the first argidx arguments) */
3686 if (PyTuple_Check(orig_args
) && argidx
> 0) {
3688 int n
= PyTuple_GET_SIZE(orig_args
) - argidx
;
3693 PyObject
*w
= PyTuple_GET_ITEM(orig_args
, n
+ argidx
);
3695 PyTuple_SET_ITEM(v
, n
, w
);
3699 Py_INCREF(orig_args
);
3703 /* Take what we have of the result and let the Unicode formatting
3704 function format the rest of the input. */
3705 rescnt
= res
- PyString_AS_STRING(result
);
3706 if (_PyString_Resize(&result
, rescnt
))
3708 fmtcnt
= PyString_GET_SIZE(format
) - \
3709 (fmt
- PyString_AS_STRING(format
));
3710 format
= PyUnicode_Decode(fmt
, fmtcnt
, NULL
, NULL
);
3713 v
= PyUnicode_Format(format
, args
);
3717 /* Paste what we have (result) to what the Unicode formatting
3718 function returned (v) and return the result (or error) */
3719 w
= PyUnicode_Concat(result
, v
);
3724 #endif /* Py_USING_UNICODE */
3735 #ifdef INTERN_STRINGS
3737 /* This dictionary will leak at PyString_Fini() time. That's acceptable
3738 * because PyString_Fini() specifically frees interned strings that are
3739 * only referenced by this dictionary. The CVS log entry for revision 2.45
3742 * Change the Fini function to only remove otherwise unreferenced
3743 * strings from the interned table. There are references in
3744 * hard-to-find static variables all over the interpreter, and it's not
3745 * worth trying to get rid of all those; but "uninterning" isn't fair
3746 * either and may cause subtle failures later -- so we have to keep them
3747 * in the interned table.
3749 static PyObject
*interned
;
3752 PyString_InternInPlace(PyObject
**p
)
3754 register PyStringObject
*s
= (PyStringObject
*)(*p
);
3756 if (s
== NULL
|| !PyString_Check(s
))
3757 Py_FatalError("PyString_InternInPlace: strings only please!");
3758 if ((t
= s
->ob_sinterned
) != NULL
) {
3759 if (t
== (PyObject
*)s
)
3766 if (interned
== NULL
) {
3767 interned
= PyDict_New();
3768 if (interned
== NULL
)
3771 if ((t
= PyDict_GetItem(interned
, (PyObject
*)s
)) != NULL
) {
3773 *p
= s
->ob_sinterned
= t
;
3777 /* Ensure that only true string objects appear in the intern dict,
3778 and as the value of ob_sinterned. */
3779 if (PyString_CheckExact(s
)) {
3781 if (PyDict_SetItem(interned
, t
, t
) == 0) {
3782 s
->ob_sinterned
= t
;
3787 t
= PyString_FromStringAndSize(PyString_AS_STRING(s
),
3788 PyString_GET_SIZE(s
));
3790 if (PyDict_SetItem(interned
, t
, t
) == 0) {
3791 *p
= s
->ob_sinterned
= t
;
3803 PyString_InternFromString(const char *cp
)
3805 PyObject
*s
= PyString_FromString(cp
);
3808 PyString_InternInPlace(&s
);
3818 for (i
= 0; i
< UCHAR_MAX
+ 1; i
++) {
3819 Py_XDECREF(characters
[i
]);
3820 characters
[i
] = NULL
;
3822 #ifndef DONT_SHARE_SHORT_STRINGS
3823 Py_XDECREF(nullstring
);
3826 #ifdef INTERN_STRINGS
3829 PyObject
*key
, *value
;
3833 while (PyDict_Next(interned
, &pos
, &key
, &value
)) {
3834 if (key
->ob_refcnt
== 2 && key
== value
) {
3835 PyDict_DelItem(interned
, key
);
3844 #ifdef INTERN_STRINGS
3845 void _Py_ReleaseInternedStrings(void)
3848 fprintf(stderr
, "releasing interned strings\n");
3849 PyDict_Clear(interned
);
3850 Py_DECREF(interned
);
3854 #endif /* INTERN_STRINGS */