2 /* String object implementation */
9 int null_strings
, one_strings
;
12 #if !defined(HAVE_LIMITS_H) && !defined(UCHAR_MAX)
16 static PyStringObject
*characters
[UCHAR_MAX
+ 1];
17 #ifndef DONT_SHARE_SHORT_STRINGS
18 static PyStringObject
*nullstring
;
22 Newsizedstringobject() and newstringobject() try in certain cases
23 to share string objects. When the size of the string is zero,
24 these routines always return a pointer to the same string object;
25 when the size is one, they return a pointer to an already existing
26 object if the contents of the string is known. For
27 newstringobject() this is always the case, for
28 newsizedstringobject() this is the case when the first argument in
30 A common practice to allocate a string and then fill it in or
31 change it must be done carefully. It is only allowed to change the
32 contents of the string if the obect was gotten from
33 newsizedstringobject() with a NULL first argument, because in the
34 future these routines may try to do even more sharing of objects.
37 PyString_FromStringAndSize(const char *str
, int size
)
39 register PyStringObject
*op
;
40 #ifndef DONT_SHARE_SHORT_STRINGS
41 if (size
== 0 && (op
= nullstring
) != NULL
) {
46 return (PyObject
*)op
;
48 if (size
== 1 && str
!= NULL
&&
49 (op
= characters
[*str
& UCHAR_MAX
]) != NULL
)
55 return (PyObject
*)op
;
57 #endif /* DONT_SHARE_SHORT_STRINGS */
59 /* PyObject_NewVar is inlined */
60 op
= (PyStringObject
*)
61 PyObject_MALLOC(sizeof(PyStringObject
) + size
* sizeof(char));
63 return PyErr_NoMemory();
64 PyObject_INIT_VAR(op
, &PyString_Type
, size
);
69 op
->ob_sinterned
= NULL
;
72 memcpy(op
->ob_sval
, str
, size
);
73 op
->ob_sval
[size
] = '\0';
74 #ifndef DONT_SHARE_SHORT_STRINGS
78 } else if (size
== 1 && str
!= NULL
) {
79 characters
[*str
& UCHAR_MAX
] = op
;
83 return (PyObject
*) op
;
87 PyString_FromString(const char *str
)
89 register size_t size
= strlen(str
);
90 register PyStringObject
*op
;
92 PyErr_SetString(PyExc_OverflowError
,
93 "string is too long for a Python string");
96 #ifndef DONT_SHARE_SHORT_STRINGS
97 if (size
== 0 && (op
= nullstring
) != NULL
) {
102 return (PyObject
*)op
;
104 if (size
== 1 && (op
= characters
[*str
& UCHAR_MAX
]) != NULL
) {
109 return (PyObject
*)op
;
111 #endif /* DONT_SHARE_SHORT_STRINGS */
113 /* PyObject_NewVar is inlined */
114 op
= (PyStringObject
*)
115 PyObject_MALLOC(sizeof(PyStringObject
) + size
* sizeof(char));
117 return PyErr_NoMemory();
118 PyObject_INIT_VAR(op
, &PyString_Type
, size
);
122 #ifdef INTERN_STRINGS
123 op
->ob_sinterned
= NULL
;
125 strcpy(op
->ob_sval
, str
);
126 #ifndef DONT_SHARE_SHORT_STRINGS
130 } else if (size
== 1) {
131 characters
[*str
& UCHAR_MAX
] = op
;
135 return (PyObject
*) op
;
138 PyObject
*PyString_Decode(const char *s
,
140 const char *encoding
,
143 PyObject
*buffer
= NULL
, *str
;
145 if (encoding
== NULL
)
146 encoding
= PyUnicode_GetDefaultEncoding();
148 /* Decode via the codec registry */
149 buffer
= PyBuffer_FromMemory((void *)s
, size
);
152 str
= PyCodec_Decode(buffer
, encoding
, errors
);
155 /* Convert Unicode to a string using the default encoding */
156 if (PyUnicode_Check(str
)) {
157 PyObject
*temp
= str
;
158 str
= PyUnicode_AsEncodedString(str
, NULL
, NULL
);
163 if (!PyString_Check(str
)) {
164 PyErr_Format(PyExc_TypeError
,
165 "decoder did not return a string object (type=%.400s)",
166 str
->ob_type
->tp_name
);
178 PyObject
*PyString_Encode(const char *s
,
180 const char *encoding
,
185 str
= PyString_FromStringAndSize(s
, size
);
188 v
= PyString_AsEncodedString(str
, encoding
, errors
);
193 PyObject
*PyString_AsEncodedString(PyObject
*str
,
194 const char *encoding
,
199 if (!PyString_Check(str
)) {
204 if (encoding
== NULL
)
205 encoding
= PyUnicode_GetDefaultEncoding();
207 /* Encode via the codec registry */
208 v
= PyCodec_Encode(str
, encoding
, errors
);
211 /* Convert Unicode to a string using the default encoding */
212 if (PyUnicode_Check(v
)) {
214 v
= PyUnicode_AsEncodedString(v
, NULL
, NULL
);
219 if (!PyString_Check(v
)) {
220 PyErr_Format(PyExc_TypeError
,
221 "encoder did not return a string object (type=%.400s)",
222 v
->ob_type
->tp_name
);
233 string_dealloc(PyObject
*op
)
239 string_getsize(register PyObject
*op
)
243 if (PyString_AsStringAndSize(op
, &s
, &len
))
248 static /*const*/ char *
249 string_getbuffer(register PyObject
*op
)
253 if (PyString_AsStringAndSize(op
, &s
, &len
))
259 PyString_Size(register PyObject
*op
)
261 if (!PyString_Check(op
))
262 return string_getsize(op
);
263 return ((PyStringObject
*)op
) -> ob_size
;
267 PyString_AsString(register PyObject
*op
)
269 if (!PyString_Check(op
))
270 return string_getbuffer(op
);
271 return ((PyStringObject
*)op
) -> ob_sval
;
274 /* Internal API needed by PyString_AsStringAndSize(): */
276 PyObject
*_PyUnicode_AsDefaultEncodedString(PyObject
*unicode
,
280 PyString_AsStringAndSize(register PyObject
*obj
,
285 PyErr_BadInternalCall();
289 if (!PyString_Check(obj
)) {
290 if (PyUnicode_Check(obj
)) {
291 obj
= _PyUnicode_AsDefaultEncodedString(obj
, NULL
);
296 PyErr_Format(PyExc_TypeError
,
297 "expected string or Unicode object, "
298 "%.200s found", obj
->ob_type
->tp_name
);
303 *s
= PyString_AS_STRING(obj
);
305 *len
= PyString_GET_SIZE(obj
);
306 else if ((int)strlen(*s
) != PyString_GET_SIZE(obj
)) {
307 PyErr_SetString(PyExc_TypeError
,
308 "expected string without null bytes");
317 string_print(PyStringObject
*op
, FILE *fp
, int flags
)
322 /* XXX Ought to check for interrupts when writing long strings */
323 if (flags
& Py_PRINT_RAW
) {
324 fwrite(op
->ob_sval
, 1, (int) op
->ob_size
, fp
);
328 /* figure out which quote to use; single is preferred */
330 if (strchr(op
->ob_sval
, '\'') && !strchr(op
->ob_sval
, '"'))
334 for (i
= 0; i
< op
->ob_size
; i
++) {
336 if (c
== quote
|| c
== '\\')
337 fprintf(fp
, "\\%c", c
);
344 else if (c
< ' ' || c
>= 0x7f)
345 fprintf(fp
, "\\x%02x", c
& 0xff);
354 string_repr(register PyStringObject
*op
)
356 size_t newsize
= 2 + 4 * op
->ob_size
* sizeof(char);
358 if (newsize
> INT_MAX
) {
359 PyErr_SetString(PyExc_OverflowError
,
360 "string is too large to make repr");
362 v
= PyString_FromStringAndSize((char *)NULL
, newsize
);
372 /* figure out which quote to use; single is preferred */
374 if (strchr(op
->ob_sval
, '\'') && !strchr(op
->ob_sval
, '"'))
377 p
= ((PyStringObject
*)v
)->ob_sval
;
379 for (i
= 0; i
< op
->ob_size
; i
++) {
381 if (c
== quote
|| c
== '\\')
382 *p
++ = '\\', *p
++ = c
;
384 *p
++ = '\\', *p
++ = 't';
386 *p
++ = '\\', *p
++ = 'n';
388 *p
++ = '\\', *p
++ = 'r';
389 else if (c
< ' ' || c
>= 0x7f) {
390 sprintf(p
, "\\x%02x", c
& 0xff);
399 &v
, (int) (p
- ((PyStringObject
*)v
)->ob_sval
));
405 string_str(PyObject
*s
)
412 string_length(PyStringObject
*a
)
418 string_concat(register PyStringObject
*a
, register PyObject
*bb
)
420 register unsigned int size
;
421 register PyStringObject
*op
;
422 if (!PyString_Check(bb
)) {
423 if (PyUnicode_Check(bb
))
424 return PyUnicode_Concat((PyObject
*)a
, bb
);
425 PyErr_Format(PyExc_TypeError
,
426 "cannot add type \"%.200s\" to string",
427 bb
->ob_type
->tp_name
);
430 #define b ((PyStringObject *)bb)
431 /* Optimize cases with empty left or right operand */
432 if (a
->ob_size
== 0) {
436 if (b
->ob_size
== 0) {
438 return (PyObject
*)a
;
440 size
= a
->ob_size
+ b
->ob_size
;
441 /* PyObject_NewVar is inlined */
442 op
= (PyStringObject
*)
443 PyObject_MALLOC(sizeof(PyStringObject
) + size
* sizeof(char));
445 return PyErr_NoMemory();
446 PyObject_INIT_VAR(op
, &PyString_Type
, size
);
450 #ifdef INTERN_STRINGS
451 op
->ob_sinterned
= NULL
;
453 memcpy(op
->ob_sval
, a
->ob_sval
, (int) a
->ob_size
);
454 memcpy(op
->ob_sval
+ a
->ob_size
, b
->ob_sval
, (int) b
->ob_size
);
455 op
->ob_sval
[size
] = '\0';
456 return (PyObject
*) op
;
461 string_repeat(register PyStringObject
*a
, register int n
)
465 register PyStringObject
*op
;
469 /* watch out for overflows: the size can overflow int,
470 * and the # of bytes needed can overflow size_t
472 size
= a
->ob_size
* n
;
473 if (n
&& size
/ n
!= a
->ob_size
) {
474 PyErr_SetString(PyExc_OverflowError
,
475 "repeated string is too long");
478 if (size
== a
->ob_size
) {
480 return (PyObject
*)a
;
482 nbytes
= size
* sizeof(char);
483 if (nbytes
/ sizeof(char) != (size_t)size
||
484 nbytes
+ sizeof(PyStringObject
) <= nbytes
) {
485 PyErr_SetString(PyExc_OverflowError
,
486 "repeated string is too long");
489 op
= (PyStringObject
*)
490 PyObject_MALLOC(sizeof(PyStringObject
) + nbytes
);
492 return PyErr_NoMemory();
493 PyObject_INIT_VAR(op
, &PyString_Type
, size
);
497 #ifdef INTERN_STRINGS
498 op
->ob_sinterned
= NULL
;
500 for (i
= 0; i
< size
; i
+= a
->ob_size
)
501 memcpy(op
->ob_sval
+i
, a
->ob_sval
, (int) a
->ob_size
);
502 op
->ob_sval
[size
] = '\0';
503 return (PyObject
*) op
;
506 /* String slice a[i:j] consists of characters a[i] ... a[j-1] */
509 string_slice(register PyStringObject
*a
, register int i
, register int j
)
510 /* j -- may be negative! */
515 j
= 0; /* Avoid signed/unsigned bug in next line */
518 if (i
== 0 && j
== a
->ob_size
) { /* It's the same as a */
520 return (PyObject
*)a
;
524 return PyString_FromStringAndSize(a
->ob_sval
+ i
, (int) (j
-i
));
528 string_contains(PyObject
*a
, PyObject
*el
)
530 register char *s
, *end
;
532 if (PyUnicode_Check(el
))
533 return PyUnicode_Contains(a
, el
);
534 if (!PyString_Check(el
) || PyString_Size(el
) != 1) {
535 PyErr_SetString(PyExc_TypeError
,
536 "'in <string>' requires character as left operand");
539 c
= PyString_AsString(el
)[0];
540 s
= PyString_AsString(a
);
541 end
= s
+ PyString_Size(a
);
550 string_item(PyStringObject
*a
, register int i
)
554 if (i
< 0 || i
>= a
->ob_size
) {
555 PyErr_SetString(PyExc_IndexError
, "string index out of range");
558 c
= a
->ob_sval
[i
] & UCHAR_MAX
;
559 v
= (PyObject
*) characters
[c
];
565 v
= PyString_FromStringAndSize((char *)NULL
, 1);
568 characters
[c
] = (PyStringObject
*) v
;
569 ((PyStringObject
*)v
)->ob_sval
[0] = c
;
576 string_compare(PyStringObject
*a
, PyStringObject
*b
)
578 int len_a
= a
->ob_size
, len_b
= b
->ob_size
;
579 int min_len
= (len_a
< len_b
) ? len_a
: len_b
;
582 cmp
= Py_CHARMASK(*a
->ob_sval
) - Py_CHARMASK(*b
->ob_sval
);
584 cmp
= memcmp(a
->ob_sval
, b
->ob_sval
, min_len
);
588 return (len_a
< len_b
) ? -1 : (len_a
> len_b
) ? 1 : 0;
592 string_hash(PyStringObject
*a
)
595 register unsigned char *p
;
599 if (a
->ob_shash
!= -1)
601 #ifdef INTERN_STRINGS
602 if (a
->ob_sinterned
!= NULL
)
603 return (a
->ob_shash
=
604 ((PyStringObject
*)(a
->ob_sinterned
))->ob_shash
);
608 p
= (unsigned char *) a
->ob_sval
;
611 x
= (1000003*x
) ^ *p
++;
622 string_buffer_getreadbuf(PyStringObject
*self
, int index
, const void **ptr
)
625 PyErr_SetString(PyExc_SystemError
,
626 "accessing non-existent string segment");
629 *ptr
= (void *)self
->ob_sval
;
630 return self
->ob_size
;
634 string_buffer_getwritebuf(PyStringObject
*self
, int index
, const void **ptr
)
636 PyErr_SetString(PyExc_TypeError
,
637 "Cannot use string as modifiable buffer");
642 string_buffer_getsegcount(PyStringObject
*self
, int *lenp
)
645 *lenp
= self
->ob_size
;
650 string_buffer_getcharbuf(PyStringObject
*self
, int index
, const char **ptr
)
653 PyErr_SetString(PyExc_SystemError
,
654 "accessing non-existent string segment");
657 *ptr
= self
->ob_sval
;
658 return self
->ob_size
;
661 static PySequenceMethods string_as_sequence
= {
662 (inquiry
)string_length
, /*sq_length*/
663 (binaryfunc
)string_concat
, /*sq_concat*/
664 (intargfunc
)string_repeat
, /*sq_repeat*/
665 (intargfunc
)string_item
, /*sq_item*/
666 (intintargfunc
)string_slice
, /*sq_slice*/
669 (objobjproc
)string_contains
/*sq_contains*/
672 static PyBufferProcs string_as_buffer
= {
673 (getreadbufferproc
)string_buffer_getreadbuf
,
674 (getwritebufferproc
)string_buffer_getwritebuf
,
675 (getsegcountproc
)string_buffer_getsegcount
,
676 (getcharbufferproc
)string_buffer_getcharbuf
,
687 split_whitespace(const char *s
, int len
, int maxsplit
)
691 PyObject
*list
= PyList_New(0);
696 for (i
= j
= 0; i
< len
; ) {
697 while (i
< len
&& isspace(Py_CHARMASK(s
[i
])))
700 while (i
< len
&& !isspace(Py_CHARMASK(s
[i
])))
705 item
= PyString_FromStringAndSize(s
+j
, (int)(i
-j
));
708 err
= PyList_Append(list
, item
);
712 while (i
< len
&& isspace(Py_CHARMASK(s
[i
])))
718 item
= PyString_FromStringAndSize(s
+j
, (int)(len
- j
));
721 err
= PyList_Append(list
, item
);
733 static char split__doc__
[] =
734 "S.split([sep [,maxsplit]]) -> list of strings\n\
736 Return a list of the words in the string S, using sep as the\n\
737 delimiter string. If maxsplit is given, at most maxsplit\n\
738 splits are done. If sep is not specified, any whitespace string\n\
742 string_split(PyStringObject
*self
, PyObject
*args
)
744 int len
= PyString_GET_SIZE(self
), n
, i
, j
, err
;
746 const char *s
= PyString_AS_STRING(self
), *sub
;
747 PyObject
*list
, *item
, *subobj
= Py_None
;
749 if (!PyArg_ParseTuple(args
, "|Oi:split", &subobj
, &maxsplit
))
753 if (subobj
== Py_None
)
754 return split_whitespace(s
, len
, maxsplit
);
755 if (PyString_Check(subobj
)) {
756 sub
= PyString_AS_STRING(subobj
);
757 n
= PyString_GET_SIZE(subobj
);
759 else if (PyUnicode_Check(subobj
))
760 return PyUnicode_Split((PyObject
*)self
, subobj
, maxsplit
);
761 else if (PyObject_AsCharBuffer(subobj
, &sub
, &n
))
764 PyErr_SetString(PyExc_ValueError
, "empty separator");
768 list
= PyList_New(0);
774 if (s
[i
] == sub
[0] && memcmp(s
+i
, sub
, n
) == 0) {
777 item
= PyString_FromStringAndSize(s
+j
, (int)(i
-j
));
780 err
= PyList_Append(list
, item
);
789 item
= PyString_FromStringAndSize(s
+j
, (int)(len
-j
));
792 err
= PyList_Append(list
, item
);
805 static char join__doc__
[] =
806 "S.join(sequence) -> string\n\
808 Return a string which is the concatenation of the strings in the\n\
809 sequence. The separator between elements is S.";
812 string_join(PyStringObject
*self
, PyObject
*args
)
814 char *sep
= PyString_AS_STRING(self
);
815 const int seplen
= PyString_GET_SIZE(self
);
816 PyObject
*res
= NULL
;
821 PyObject
*orig
, *seq
, *item
;
823 if (!PyArg_ParseTuple(args
, "O:join", &orig
))
826 seq
= PySequence_Fast(orig
, "");
828 if (PyErr_ExceptionMatches(PyExc_TypeError
))
829 PyErr_Format(PyExc_TypeError
,
830 "sequence expected, %.80s found",
831 orig
->ob_type
->tp_name
);
835 seqlen
= PySequence_Size(seq
);
838 return PyString_FromString("");
841 item
= PySequence_Fast_GET_ITEM(seq
, 0);
842 if (!PyString_Check(item
) && !PyUnicode_Check(item
)) {
843 PyErr_Format(PyExc_TypeError
,
844 "sequence item 0: expected string,"
846 item
->ob_type
->tp_name
);
855 /* There are at least two things to join. Do a pre-pass to figure out
856 * the total amount of space we'll need (sz), see whether any argument
857 * is absurd, and defer to the Unicode join if appropriate.
859 for (i
= 0; i
< seqlen
; i
++) {
860 const size_t old_sz
= sz
;
861 item
= PySequence_Fast_GET_ITEM(seq
, i
);
862 if (!PyString_Check(item
)){
863 if (PyUnicode_Check(item
)) {
865 return PyUnicode_Join((PyObject
*)self
, orig
);
867 PyErr_Format(PyExc_TypeError
,
868 "sequence item %i: expected string,"
870 i
, item
->ob_type
->tp_name
);
874 sz
+= PyString_GET_SIZE(item
);
877 if (sz
< old_sz
|| sz
> INT_MAX
) {
878 PyErr_SetString(PyExc_OverflowError
,
879 "join() is too long for a Python string");
885 /* Allocate result space. */
886 res
= PyString_FromStringAndSize((char*)NULL
, (int)sz
);
892 /* Catenate everything. */
893 p
= PyString_AS_STRING(res
);
894 for (i
= 0; i
< seqlen
; ++i
) {
896 item
= PySequence_Fast_GET_ITEM(seq
, i
);
897 n
= PyString_GET_SIZE(item
);
898 memcpy(p
, PyString_AS_STRING(item
), n
);
900 if (i
< seqlen
- 1) {
901 memcpy(p
, sep
, seplen
);
911 string_find_internal(PyStringObject
*self
, PyObject
*args
, int dir
)
913 const char *s
= PyString_AS_STRING(self
), *sub
;
914 int len
= PyString_GET_SIZE(self
);
915 int n
, i
= 0, last
= INT_MAX
;
918 if (!PyArg_ParseTuple(args
, "O|O&O&:find/rfind/index/rindex",
919 &subobj
, _PyEval_SliceIndex
, &i
, _PyEval_SliceIndex
, &last
))
921 if (PyString_Check(subobj
)) {
922 sub
= PyString_AS_STRING(subobj
);
923 n
= PyString_GET_SIZE(subobj
);
925 else if (PyUnicode_Check(subobj
))
926 return PyUnicode_Find((PyObject
*)self
, subobj
, i
, last
, 1);
927 else if (PyObject_AsCharBuffer(subobj
, &sub
, &n
))
942 if (n
== 0 && i
<= last
)
945 for (; i
<= last
; ++i
)
946 if (s
[i
] == sub
[0] && memcmp(&s
[i
], sub
, n
) == 0)
952 if (n
== 0 && i
<= last
)
954 for (j
= last
-n
; j
>= i
; --j
)
955 if (s
[j
] == sub
[0] && memcmp(&s
[j
], sub
, n
) == 0)
963 static char find__doc__
[] =
964 "S.find(sub [,start [,end]]) -> int\n\
966 Return the lowest index in S where substring sub is found,\n\
967 such that sub is contained within s[start,end]. Optional\n\
968 arguments start and end are interpreted as in slice notation.\n\
970 Return -1 on failure.";
973 string_find(PyStringObject
*self
, PyObject
*args
)
975 long result
= string_find_internal(self
, args
, +1);
978 return PyInt_FromLong(result
);
982 static char index__doc__
[] =
983 "S.index(sub [,start [,end]]) -> int\n\
985 Like S.find() but raise ValueError when the substring is not found.";
988 string_index(PyStringObject
*self
, PyObject
*args
)
990 long result
= string_find_internal(self
, args
, +1);
994 PyErr_SetString(PyExc_ValueError
,
995 "substring not found in string.index");
998 return PyInt_FromLong(result
);
1002 static char rfind__doc__
[] =
1003 "S.rfind(sub [,start [,end]]) -> int\n\
1005 Return the highest index in S where substring sub is found,\n\
1006 such that sub is contained within s[start,end]. Optional\n\
1007 arguments start and end are interpreted as in slice notation.\n\
1009 Return -1 on failure.";
1012 string_rfind(PyStringObject
*self
, PyObject
*args
)
1014 long result
= string_find_internal(self
, args
, -1);
1017 return PyInt_FromLong(result
);
1021 static char rindex__doc__
[] =
1022 "S.rindex(sub [,start [,end]]) -> int\n\
1024 Like S.rfind() but raise ValueError when the substring is not found.";
1027 string_rindex(PyStringObject
*self
, PyObject
*args
)
1029 long result
= string_find_internal(self
, args
, -1);
1033 PyErr_SetString(PyExc_ValueError
,
1034 "substring not found in string.rindex");
1037 return PyInt_FromLong(result
);
1042 do_strip(PyStringObject
*self
, PyObject
*args
, int striptype
)
1044 char *s
= PyString_AS_STRING(self
);
1045 int len
= PyString_GET_SIZE(self
), i
, j
;
1047 if (!PyArg_ParseTuple(args
, ":strip"))
1051 if (striptype
!= RIGHTSTRIP
) {
1052 while (i
< len
&& isspace(Py_CHARMASK(s
[i
]))) {
1058 if (striptype
!= LEFTSTRIP
) {
1061 } while (j
>= i
&& isspace(Py_CHARMASK(s
[j
])));
1065 if (i
== 0 && j
== len
) {
1067 return (PyObject
*)self
;
1070 return PyString_FromStringAndSize(s
+i
, j
-i
);
1074 static char strip__doc__
[] =
1075 "S.strip() -> string\n\
1077 Return a copy of the string S with leading and trailing\n\
1078 whitespace removed.";
1081 string_strip(PyStringObject
*self
, PyObject
*args
)
1083 return do_strip(self
, args
, BOTHSTRIP
);
1087 static char lstrip__doc__
[] =
1088 "S.lstrip() -> string\n\
1090 Return a copy of the string S with leading whitespace removed.";
1093 string_lstrip(PyStringObject
*self
, PyObject
*args
)
1095 return do_strip(self
, args
, LEFTSTRIP
);
1099 static char rstrip__doc__
[] =
1100 "S.rstrip() -> string\n\
1102 Return a copy of the string S with trailing whitespace removed.";
1105 string_rstrip(PyStringObject
*self
, PyObject
*args
)
1107 return do_strip(self
, args
, RIGHTSTRIP
);
1111 static char lower__doc__
[] =
1112 "S.lower() -> string\n\
1114 Return a copy of the string S converted to lowercase.";
1117 string_lower(PyStringObject
*self
, PyObject
*args
)
1119 char *s
= PyString_AS_STRING(self
), *s_new
;
1120 int i
, n
= PyString_GET_SIZE(self
);
1123 if (!PyArg_ParseTuple(args
, ":lower"))
1125 new = PyString_FromStringAndSize(NULL
, n
);
1128 s_new
= PyString_AsString(new);
1129 for (i
= 0; i
< n
; i
++) {
1130 int c
= Py_CHARMASK(*s
++);
1132 *s_new
= tolower(c
);
1141 static char upper__doc__
[] =
1142 "S.upper() -> string\n\
1144 Return a copy of the string S converted to uppercase.";
1147 string_upper(PyStringObject
*self
, PyObject
*args
)
1149 char *s
= PyString_AS_STRING(self
), *s_new
;
1150 int i
, n
= PyString_GET_SIZE(self
);
1153 if (!PyArg_ParseTuple(args
, ":upper"))
1155 new = PyString_FromStringAndSize(NULL
, n
);
1158 s_new
= PyString_AsString(new);
1159 for (i
= 0; i
< n
; i
++) {
1160 int c
= Py_CHARMASK(*s
++);
1162 *s_new
= toupper(c
);
1171 static char title__doc__
[] =
1172 "S.title() -> string\n\
1174 Return a titlecased version of S, i.e. words start with uppercase\n\
1175 characters, all remaining cased characters have lowercase.";
1178 string_title(PyStringObject
*self
, PyObject
*args
)
1180 char *s
= PyString_AS_STRING(self
), *s_new
;
1181 int i
, n
= PyString_GET_SIZE(self
);
1182 int previous_is_cased
= 0;
1185 if (!PyArg_ParseTuple(args
, ":title"))
1187 new = PyString_FromStringAndSize(NULL
, n
);
1190 s_new
= PyString_AsString(new);
1191 for (i
= 0; i
< n
; i
++) {
1192 int c
= Py_CHARMASK(*s
++);
1194 if (!previous_is_cased
)
1196 previous_is_cased
= 1;
1197 } else if (isupper(c
)) {
1198 if (previous_is_cased
)
1200 previous_is_cased
= 1;
1202 previous_is_cased
= 0;
1208 static char capitalize__doc__
[] =
1209 "S.capitalize() -> string\n\
1211 Return a copy of the string S with only its first character\n\
1215 string_capitalize(PyStringObject
*self
, PyObject
*args
)
1217 char *s
= PyString_AS_STRING(self
), *s_new
;
1218 int i
, n
= PyString_GET_SIZE(self
);
1221 if (!PyArg_ParseTuple(args
, ":capitalize"))
1223 new = PyString_FromStringAndSize(NULL
, n
);
1226 s_new
= PyString_AsString(new);
1228 int c
= Py_CHARMASK(*s
++);
1230 *s_new
= toupper(c
);
1235 for (i
= 1; i
< n
; i
++) {
1236 int c
= Py_CHARMASK(*s
++);
1238 *s_new
= tolower(c
);
1247 static char count__doc__
[] =
1248 "S.count(sub[, start[, end]]) -> int\n\
1250 Return the number of occurrences of substring sub in string\n\
1251 S[start:end]. Optional arguments start and end are\n\
1252 interpreted as in slice notation.";
1255 string_count(PyStringObject
*self
, PyObject
*args
)
1257 const char *s
= PyString_AS_STRING(self
), *sub
;
1258 int len
= PyString_GET_SIZE(self
), n
;
1259 int i
= 0, last
= INT_MAX
;
1263 if (!PyArg_ParseTuple(args
, "O|O&O&:count", &subobj
,
1264 _PyEval_SliceIndex
, &i
, _PyEval_SliceIndex
, &last
))
1267 if (PyString_Check(subobj
)) {
1268 sub
= PyString_AS_STRING(subobj
);
1269 n
= PyString_GET_SIZE(subobj
);
1271 else if (PyUnicode_Check(subobj
)) {
1273 count
= PyUnicode_Count((PyObject
*)self
, subobj
, i
, last
);
1277 return PyInt_FromLong((long) count
);
1279 else if (PyObject_AsCharBuffer(subobj
, &sub
, &n
))
1294 return PyInt_FromLong((long) (m
-i
));
1298 if (!memcmp(s
+i
, sub
, n
)) {
1305 return PyInt_FromLong((long) r
);
1309 static char swapcase__doc__
[] =
1310 "S.swapcase() -> string\n\
1312 Return a copy of the string S with uppercase characters\n\
1313 converted to lowercase and vice versa.";
1316 string_swapcase(PyStringObject
*self
, PyObject
*args
)
1318 char *s
= PyString_AS_STRING(self
), *s_new
;
1319 int i
, n
= PyString_GET_SIZE(self
);
1322 if (!PyArg_ParseTuple(args
, ":swapcase"))
1324 new = PyString_FromStringAndSize(NULL
, n
);
1327 s_new
= PyString_AsString(new);
1328 for (i
= 0; i
< n
; i
++) {
1329 int c
= Py_CHARMASK(*s
++);
1331 *s_new
= toupper(c
);
1333 else if (isupper(c
)) {
1334 *s_new
= tolower(c
);
1344 static char translate__doc__
[] =
1345 "S.translate(table [,deletechars]) -> string\n\
1347 Return a copy of the string S, where all characters occurring\n\
1348 in the optional argument deletechars are removed, and the\n\
1349 remaining characters have been mapped through the given\n\
1350 translation table, which must be a string of length 256.";
1353 string_translate(PyStringObject
*self
, PyObject
*args
)
1355 register char *input
, *output
;
1356 register const char *table
;
1357 register int i
, c
, changed
= 0;
1358 PyObject
*input_obj
= (PyObject
*)self
;
1359 const char *table1
, *output_start
, *del_table
=NULL
;
1360 int inlen
, tablen
, dellen
= 0;
1362 int trans_table
[256];
1363 PyObject
*tableobj
, *delobj
= NULL
;
1365 if (!PyArg_ParseTuple(args
, "O|O:translate",
1366 &tableobj
, &delobj
))
1369 if (PyString_Check(tableobj
)) {
1370 table1
= PyString_AS_STRING(tableobj
);
1371 tablen
= PyString_GET_SIZE(tableobj
);
1373 else if (PyUnicode_Check(tableobj
)) {
1374 /* Unicode .translate() does not support the deletechars
1375 parameter; instead a mapping to None will cause characters
1377 if (delobj
!= NULL
) {
1378 PyErr_SetString(PyExc_TypeError
,
1379 "deletions are implemented differently for unicode");
1382 return PyUnicode_Translate((PyObject
*)self
, tableobj
, NULL
);
1384 else if (PyObject_AsCharBuffer(tableobj
, &table1
, &tablen
))
1387 if (delobj
!= NULL
) {
1388 if (PyString_Check(delobj
)) {
1389 del_table
= PyString_AS_STRING(delobj
);
1390 dellen
= PyString_GET_SIZE(delobj
);
1392 else if (PyUnicode_Check(delobj
)) {
1393 PyErr_SetString(PyExc_TypeError
,
1394 "deletions are implemented differently for unicode");
1397 else if (PyObject_AsCharBuffer(delobj
, &del_table
, &dellen
))
1400 if (tablen
!= 256) {
1401 PyErr_SetString(PyExc_ValueError
,
1402 "translation table must be 256 characters long");
1412 inlen
= PyString_Size(input_obj
);
1413 result
= PyString_FromStringAndSize((char *)NULL
, inlen
);
1416 output_start
= output
= PyString_AsString(result
);
1417 input
= PyString_AsString(input_obj
);
1420 /* If no deletions are required, use faster code */
1421 for (i
= inlen
; --i
>= 0; ) {
1422 c
= Py_CHARMASK(*input
++);
1423 if (Py_CHARMASK((*output
++ = table
[c
])) != c
)
1429 Py_INCREF(input_obj
);
1433 for (i
= 0; i
< 256; i
++)
1434 trans_table
[i
] = Py_CHARMASK(table
[i
]);
1436 for (i
= 0; i
< dellen
; i
++)
1437 trans_table
[(int) Py_CHARMASK(del_table
[i
])] = -1;
1439 for (i
= inlen
; --i
>= 0; ) {
1440 c
= Py_CHARMASK(*input
++);
1441 if (trans_table
[c
] != -1)
1442 if (Py_CHARMASK(*output
++ = (char)trans_table
[c
]) == c
)
1448 Py_INCREF(input_obj
);
1451 /* Fix the size of the resulting string */
1452 if (inlen
> 0 &&_PyString_Resize(&result
, output
-output_start
))
1458 /* What follows is used for implementing replace(). Perry Stoll. */
1463 strstr replacement for arbitrary blocks of memory.
1465 Locates the first occurrence in the memory pointed to by MEM of the
1466 contents of memory pointed to by PAT. Returns the index into MEM if
1467 found, or -1 if not found. If len of PAT is greater than length of
1468 MEM, the function returns -1.
1471 mymemfind(const char *mem
, int len
, const char *pat
, int pat_len
)
1475 /* pattern can not occur in the last pat_len-1 chars */
1478 for (ii
= 0; ii
<= len
; ii
++) {
1479 if (mem
[ii
] == pat
[0] && memcmp(&mem
[ii
], pat
, pat_len
) == 0) {
1489 Return the number of distinct times PAT is found in MEM.
1490 meaning mem=1111 and pat==11 returns 2.
1491 mem=11111 and pat==11 also return 2.
1494 mymemcnt(const char *mem
, int len
, const char *pat
, int pat_len
)
1496 register int offset
= 0;
1500 offset
= mymemfind(mem
, len
, pat
, pat_len
);
1503 mem
+= offset
+ pat_len
;
1504 len
-= offset
+ pat_len
;
1513 Return a string in which all occurrences of PAT in memory STR are
1516 If length of PAT is less than length of STR or there are no occurrences
1517 of PAT in STR, then the original string is returned. Otherwise, a new
1518 string is allocated here and returned.
1520 on return, out_len is:
1521 the length of output string, or
1522 -1 if the input string is returned, or
1523 unchanged if an error occurs (no memory).
1526 the new string allocated locally, or
1527 NULL if an error occurred.
1530 mymemreplace(const char *str
, int len
, /* input string */
1531 const char *pat
, int pat_len
, /* pattern string to find */
1532 const char *sub
, int sub_len
, /* substitution string */
1533 int count
, /* number of replacements */
1538 int nfound
, offset
, new_len
;
1540 if (len
== 0 || pat_len
> len
)
1543 /* find length of output string */
1544 nfound
= mymemcnt(str
, len
, pat
, pat_len
);
1547 else if (nfound
> count
)
1552 new_len
= len
+ nfound
*(sub_len
- pat_len
);
1554 /* Have to allocate something for the caller to free(). */
1555 out_s
= (char *)PyMem_MALLOC(1);
1561 assert(new_len
> 0);
1562 new_s
= (char *)PyMem_MALLOC(new_len
);
1567 for (; count
> 0 && len
> 0; --count
) {
1568 /* find index of next instance of pattern */
1569 offset
= mymemfind(str
, len
, pat
, pat_len
);
1573 /* copy non matching part of input string */
1574 memcpy(new_s
, str
, offset
);
1575 str
+= offset
+ pat_len
;
1576 len
-= offset
+ pat_len
;
1578 /* copy substitute into the output string */
1580 memcpy(new_s
, sub
, sub_len
);
1583 /* copy any remaining values into output string */
1585 memcpy(new_s
, str
, len
);
1592 return (char *)str
; /* cast away const */
1596 static char replace__doc__
[] =
1597 "S.replace (old, new[, maxsplit]) -> string\n\
1599 Return a copy of string S with all occurrences of substring\n\
1600 old replaced by new. If the optional argument maxsplit is\n\
1601 given, only the first maxsplit occurrences are replaced.";
1604 string_replace(PyStringObject
*self
, PyObject
*args
)
1606 const char *str
= PyString_AS_STRING(self
), *sub
, *repl
;
1608 int len
= PyString_GET_SIZE(self
), sub_len
, repl_len
, out_len
;
1611 PyObject
*subobj
, *replobj
;
1613 if (!PyArg_ParseTuple(args
, "OO|i:replace",
1614 &subobj
, &replobj
, &count
))
1617 if (PyString_Check(subobj
)) {
1618 sub
= PyString_AS_STRING(subobj
);
1619 sub_len
= PyString_GET_SIZE(subobj
);
1621 else if (PyUnicode_Check(subobj
))
1622 return PyUnicode_Replace((PyObject
*)self
,
1623 subobj
, replobj
, count
);
1624 else if (PyObject_AsCharBuffer(subobj
, &sub
, &sub_len
))
1627 if (PyString_Check(replobj
)) {
1628 repl
= PyString_AS_STRING(replobj
);
1629 repl_len
= PyString_GET_SIZE(replobj
);
1631 else if (PyUnicode_Check(replobj
))
1632 return PyUnicode_Replace((PyObject
*)self
,
1633 subobj
, replobj
, count
);
1634 else if (PyObject_AsCharBuffer(replobj
, &repl
, &repl_len
))
1638 PyErr_SetString(PyExc_ValueError
, "empty pattern string");
1641 new_s
= mymemreplace(str
,len
,sub
,sub_len
,repl
,repl_len
,count
,&out_len
);
1642 if (new_s
== NULL
) {
1646 if (out_len
== -1) {
1647 /* we're returning another reference to self */
1648 new = (PyObject
*)self
;
1652 new = PyString_FromStringAndSize(new_s
, out_len
);
1659 static char startswith__doc__
[] =
1660 "S.startswith(prefix[, start[, end]]) -> int\n\
1662 Return 1 if S starts with the specified prefix, otherwise return 0. With\n\
1663 optional start, test S beginning at that position. With optional end, stop\n\
1664 comparing S at that position.";
1667 string_startswith(PyStringObject
*self
, PyObject
*args
)
1669 const char* str
= PyString_AS_STRING(self
);
1670 int len
= PyString_GET_SIZE(self
);
1677 if (!PyArg_ParseTuple(args
, "O|O&O&:startswith", &subobj
,
1678 _PyEval_SliceIndex
, &start
, _PyEval_SliceIndex
, &end
))
1680 if (PyString_Check(subobj
)) {
1681 prefix
= PyString_AS_STRING(subobj
);
1682 plen
= PyString_GET_SIZE(subobj
);
1684 else if (PyUnicode_Check(subobj
)) {
1686 rc
= PyUnicode_Tailmatch((PyObject
*)self
,
1687 subobj
, start
, end
, -1);
1691 return PyInt_FromLong((long) rc
);
1693 else if (PyObject_AsCharBuffer(subobj
, &prefix
, &plen
))
1696 /* adopt Java semantics for index out of range. it is legal for
1697 * offset to be == plen, but this only returns true if prefix is
1700 if (start
< 0 || start
+plen
> len
)
1701 return PyInt_FromLong(0);
1703 if (!memcmp(str
+start
, prefix
, plen
)) {
1704 /* did the match end after the specified end? */
1706 return PyInt_FromLong(1);
1707 else if (end
- start
< plen
)
1708 return PyInt_FromLong(0);
1710 return PyInt_FromLong(1);
1712 else return PyInt_FromLong(0);
1716 static char endswith__doc__
[] =
1717 "S.endswith(suffix[, start[, end]]) -> int\n\
1719 Return 1 if S ends with the specified suffix, otherwise return 0. With\n\
1720 optional start, test S beginning at that position. With optional end, stop\n\
1721 comparing S at that position.";
1724 string_endswith(PyStringObject
*self
, PyObject
*args
)
1726 const char* str
= PyString_AS_STRING(self
);
1727 int len
= PyString_GET_SIZE(self
);
1735 if (!PyArg_ParseTuple(args
, "O|O&O&:endswith", &subobj
,
1736 _PyEval_SliceIndex
, &start
, _PyEval_SliceIndex
, &end
))
1738 if (PyString_Check(subobj
)) {
1739 suffix
= PyString_AS_STRING(subobj
);
1740 slen
= PyString_GET_SIZE(subobj
);
1742 else if (PyUnicode_Check(subobj
)) {
1744 rc
= PyUnicode_Tailmatch((PyObject
*)self
,
1745 subobj
, start
, end
, +1);
1749 return PyInt_FromLong((long) rc
);
1751 else if (PyObject_AsCharBuffer(subobj
, &suffix
, &slen
))
1754 if (start
< 0 || start
> len
|| slen
> len
)
1755 return PyInt_FromLong(0);
1757 upper
= (end
>= 0 && end
<= len
) ? end
: len
;
1758 lower
= (upper
- slen
) > start
? (upper
- slen
) : start
;
1760 if (upper
-lower
>= slen
&& !memcmp(str
+lower
, suffix
, slen
))
1761 return PyInt_FromLong(1);
1762 else return PyInt_FromLong(0);
1766 static char encode__doc__
[] =
1767 "S.encode([encoding[,errors]]) -> string\n\
1769 Return an encoded string version of S. Default encoding is the current\n\
1770 default string encoding. errors may be given to set a different error\n\
1771 handling scheme. Default is 'strict' meaning that encoding errors raise\n\
1772 a ValueError. Other possible values are 'ignore' and 'replace'.";
1775 string_encode(PyStringObject
*self
, PyObject
*args
)
1777 char *encoding
= NULL
;
1778 char *errors
= NULL
;
1779 if (!PyArg_ParseTuple(args
, "|ss:encode", &encoding
, &errors
))
1781 return PyString_AsEncodedString((PyObject
*)self
, encoding
, errors
);
1785 static char expandtabs__doc__
[] =
1786 "S.expandtabs([tabsize]) -> string\n\
1788 Return a copy of S where all tab characters are expanded using spaces.\n\
1789 If tabsize is not given, a tab size of 8 characters is assumed.";
1792 string_expandtabs(PyStringObject
*self
, PyObject
*args
)
1800 if (!PyArg_ParseTuple(args
, "|i:expandtabs", &tabsize
))
1803 /* First pass: determine size of output string */
1805 e
= PyString_AS_STRING(self
) + PyString_GET_SIZE(self
);
1806 for (p
= PyString_AS_STRING(self
); p
< e
; p
++)
1809 j
+= tabsize
- (j
% tabsize
);
1813 if (*p
== '\n' || *p
== '\r') {
1819 /* Second pass: create output string and fill it */
1820 u
= PyString_FromStringAndSize(NULL
, i
+ j
);
1825 q
= PyString_AS_STRING(u
);
1827 for (p
= PyString_AS_STRING(self
); p
< e
; p
++)
1830 i
= tabsize
- (j
% tabsize
);
1839 if (*p
== '\n' || *p
== '\r')
1847 PyObject
*pad(PyStringObject
*self
,
1859 if (left
== 0 && right
== 0) {
1861 return (PyObject
*)self
;
1864 u
= PyString_FromStringAndSize(NULL
,
1865 left
+ PyString_GET_SIZE(self
) + right
);
1868 memset(PyString_AS_STRING(u
), fill
, left
);
1869 memcpy(PyString_AS_STRING(u
) + left
,
1870 PyString_AS_STRING(self
),
1871 PyString_GET_SIZE(self
));
1873 memset(PyString_AS_STRING(u
) + left
+ PyString_GET_SIZE(self
),
1880 static char ljust__doc__
[] =
1881 "S.ljust(width) -> string\n\
1883 Return S left justified in a string of length width. Padding is\n\
1884 done using spaces.";
1887 string_ljust(PyStringObject
*self
, PyObject
*args
)
1890 if (!PyArg_ParseTuple(args
, "i:ljust", &width
))
1893 if (PyString_GET_SIZE(self
) >= width
) {
1895 return (PyObject
*) self
;
1898 return pad(self
, 0, width
- PyString_GET_SIZE(self
), ' ');
1902 static char rjust__doc__
[] =
1903 "S.rjust(width) -> string\n\
1905 Return S right justified in a string of length width. Padding is\n\
1906 done using spaces.";
1909 string_rjust(PyStringObject
*self
, PyObject
*args
)
1912 if (!PyArg_ParseTuple(args
, "i:rjust", &width
))
1915 if (PyString_GET_SIZE(self
) >= width
) {
1917 return (PyObject
*) self
;
1920 return pad(self
, width
- PyString_GET_SIZE(self
), 0, ' ');
1924 static char center__doc__
[] =
1925 "S.center(width) -> string\n\
1927 Return S centered in a string of length width. Padding is done\n\
1931 string_center(PyStringObject
*self
, PyObject
*args
)
1936 if (!PyArg_ParseTuple(args
, "i:center", &width
))
1939 if (PyString_GET_SIZE(self
) >= width
) {
1941 return (PyObject
*) self
;
1944 marg
= width
- PyString_GET_SIZE(self
);
1945 left
= marg
/ 2 + (marg
& width
& 1);
1947 return pad(self
, left
, marg
- left
, ' ');
1951 static char zfill__doc__
[] =
1952 "S.zfill(width) -> string\n\
1954 Pad a numeric string x with zeros on the left, to fill a field\n\
1955 of the specified width. The string x is never truncated.";
1958 string_zfill(PyStringObject
*self
, PyObject
*args
)
1965 if (!PyArg_ParseTuple(args
, "i:zfill", &width
))
1968 if (PyString_GET_SIZE(self
) >= width
) {
1970 return (PyObject
*) self
;
1973 fill
= width
- PyString_GET_SIZE(self
);
1975 u
= pad(self
, fill
, 0, '0');
1979 str
= PyString_AS_STRING(u
);
1980 if (str
[fill
] == '+' || str
[fill
] == '-') {
1981 /* move sign to beginning of string */
1990 static char isspace__doc__
[] =
1991 "S.isspace() -> int\n\
1993 Return 1 if there are only whitespace characters in S,\n\
1997 string_isspace(PyStringObject
*self
, PyObject
*args
)
1999 register const unsigned char *p
2000 = (unsigned char *) PyString_AS_STRING(self
);
2001 register const unsigned char *e
;
2003 if (!PyArg_NoArgs(args
))
2006 /* Shortcut for single character strings */
2007 if (PyString_GET_SIZE(self
) == 1 &&
2009 return PyInt_FromLong(1);
2011 /* Special case for empty strings */
2012 if (PyString_GET_SIZE(self
) == 0)
2013 return PyInt_FromLong(0);
2015 e
= p
+ PyString_GET_SIZE(self
);
2016 for (; p
< e
; p
++) {
2018 return PyInt_FromLong(0);
2020 return PyInt_FromLong(1);
2024 static char isalpha__doc__
[] =
2025 "S.isalpha() -> int\n\
2027 Return 1 if all characters in S are alphabetic\n\
2028 and there is at least one character in S, 0 otherwise.";
2031 string_isalpha(PyStringObject
*self
, PyObject
*args
)
2033 register const unsigned char *p
2034 = (unsigned char *) PyString_AS_STRING(self
);
2035 register const unsigned char *e
;
2037 if (!PyArg_NoArgs(args
))
2040 /* Shortcut for single character strings */
2041 if (PyString_GET_SIZE(self
) == 1 &&
2043 return PyInt_FromLong(1);
2045 /* Special case for empty strings */
2046 if (PyString_GET_SIZE(self
) == 0)
2047 return PyInt_FromLong(0);
2049 e
= p
+ PyString_GET_SIZE(self
);
2050 for (; p
< e
; p
++) {
2052 return PyInt_FromLong(0);
2054 return PyInt_FromLong(1);
2058 static char isalnum__doc__
[] =
2059 "S.isalnum() -> int\n\
2061 Return 1 if all characters in S are alphanumeric\n\
2062 and there is at least one character in S, 0 otherwise.";
2065 string_isalnum(PyStringObject
*self
, PyObject
*args
)
2067 register const unsigned char *p
2068 = (unsigned char *) PyString_AS_STRING(self
);
2069 register const unsigned char *e
;
2071 if (!PyArg_NoArgs(args
))
2074 /* Shortcut for single character strings */
2075 if (PyString_GET_SIZE(self
) == 1 &&
2077 return PyInt_FromLong(1);
2079 /* Special case for empty strings */
2080 if (PyString_GET_SIZE(self
) == 0)
2081 return PyInt_FromLong(0);
2083 e
= p
+ PyString_GET_SIZE(self
);
2084 for (; p
< e
; p
++) {
2086 return PyInt_FromLong(0);
2088 return PyInt_FromLong(1);
2092 static char isdigit__doc__
[] =
2093 "S.isdigit() -> int\n\
2095 Return 1 if there are only digit characters in S,\n\
2099 string_isdigit(PyStringObject
*self
, PyObject
*args
)
2101 register const unsigned char *p
2102 = (unsigned char *) PyString_AS_STRING(self
);
2103 register const unsigned char *e
;
2105 if (!PyArg_NoArgs(args
))
2108 /* Shortcut for single character strings */
2109 if (PyString_GET_SIZE(self
) == 1 &&
2111 return PyInt_FromLong(1);
2113 /* Special case for empty strings */
2114 if (PyString_GET_SIZE(self
) == 0)
2115 return PyInt_FromLong(0);
2117 e
= p
+ PyString_GET_SIZE(self
);
2118 for (; p
< e
; p
++) {
2120 return PyInt_FromLong(0);
2122 return PyInt_FromLong(1);
2126 static char islower__doc__
[] =
2127 "S.islower() -> int\n\
2129 Return 1 if all cased characters in S are lowercase and there is\n\
2130 at least one cased character in S, 0 otherwise.";
2133 string_islower(PyStringObject
*self
, PyObject
*args
)
2135 register const unsigned char *p
2136 = (unsigned char *) PyString_AS_STRING(self
);
2137 register const unsigned char *e
;
2140 if (!PyArg_NoArgs(args
))
2143 /* Shortcut for single character strings */
2144 if (PyString_GET_SIZE(self
) == 1)
2145 return PyInt_FromLong(islower(*p
) != 0);
2147 /* Special case for empty strings */
2148 if (PyString_GET_SIZE(self
) == 0)
2149 return PyInt_FromLong(0);
2151 e
= p
+ PyString_GET_SIZE(self
);
2153 for (; p
< e
; p
++) {
2155 return PyInt_FromLong(0);
2156 else if (!cased
&& islower(*p
))
2159 return PyInt_FromLong(cased
);
2163 static char isupper__doc__
[] =
2164 "S.isupper() -> int\n\
2166 Return 1 if all cased characters in S are uppercase and there is\n\
2167 at least one cased character in S, 0 otherwise.";
2170 string_isupper(PyStringObject
*self
, PyObject
*args
)
2172 register const unsigned char *p
2173 = (unsigned char *) PyString_AS_STRING(self
);
2174 register const unsigned char *e
;
2177 if (!PyArg_NoArgs(args
))
2180 /* Shortcut for single character strings */
2181 if (PyString_GET_SIZE(self
) == 1)
2182 return PyInt_FromLong(isupper(*p
) != 0);
2184 /* Special case for empty strings */
2185 if (PyString_GET_SIZE(self
) == 0)
2186 return PyInt_FromLong(0);
2188 e
= p
+ PyString_GET_SIZE(self
);
2190 for (; p
< e
; p
++) {
2192 return PyInt_FromLong(0);
2193 else if (!cased
&& isupper(*p
))
2196 return PyInt_FromLong(cased
);
2200 static char istitle__doc__
[] =
2201 "S.istitle() -> int\n\
2203 Return 1 if S is a titlecased string, i.e. uppercase characters\n\
2204 may only follow uncased characters and lowercase characters only cased\n\
2205 ones. Return 0 otherwise.";
2208 string_istitle(PyStringObject
*self
, PyObject
*args
)
2210 register const unsigned char *p
2211 = (unsigned char *) PyString_AS_STRING(self
);
2212 register const unsigned char *e
;
2213 int cased
, previous_is_cased
;
2215 if (!PyArg_NoArgs(args
))
2218 /* Shortcut for single character strings */
2219 if (PyString_GET_SIZE(self
) == 1)
2220 return PyInt_FromLong(isupper(*p
) != 0);
2222 /* Special case for empty strings */
2223 if (PyString_GET_SIZE(self
) == 0)
2224 return PyInt_FromLong(0);
2226 e
= p
+ PyString_GET_SIZE(self
);
2228 previous_is_cased
= 0;
2229 for (; p
< e
; p
++) {
2230 register const unsigned char ch
= *p
;
2233 if (previous_is_cased
)
2234 return PyInt_FromLong(0);
2235 previous_is_cased
= 1;
2238 else if (islower(ch
)) {
2239 if (!previous_is_cased
)
2240 return PyInt_FromLong(0);
2241 previous_is_cased
= 1;
2245 previous_is_cased
= 0;
2247 return PyInt_FromLong(cased
);
2251 static char splitlines__doc__
[] =
2252 "S.splitlines([keepends]]) -> list of strings\n\
2254 Return a list of the lines in S, breaking at line boundaries.\n\
2255 Line breaks are not included in the resulting list unless keepends\n\
2256 is given and true.";
2258 #define SPLIT_APPEND(data, left, right) \
2259 str = PyString_FromStringAndSize(data + left, right - left); \
2262 if (PyList_Append(list, str)) { \
2270 string_splitlines(PyStringObject
*self
, PyObject
*args
)
2280 if (!PyArg_ParseTuple(args
, "|i:splitlines", &keepends
))
2283 data
= PyString_AS_STRING(self
);
2284 len
= PyString_GET_SIZE(self
);
2286 list
= PyList_New(0);
2290 for (i
= j
= 0; i
< len
; ) {
2293 /* Find a line and append it */
2294 while (i
< len
&& data
[i
] != '\n' && data
[i
] != '\r')
2297 /* Skip the line break reading CRLF as one line break */
2300 if (data
[i
] == '\r' && i
+ 1 < len
&&
2308 SPLIT_APPEND(data
, j
, eol
);
2312 SPLIT_APPEND(data
, j
, len
);
2326 string_methods
[] = {
2327 /* Counterparts of the obsolete stropmodule functions; except
2328 string.maketrans(). */
2329 {"join", (PyCFunction
)string_join
, 1, join__doc__
},
2330 {"split", (PyCFunction
)string_split
, 1, split__doc__
},
2331 {"lower", (PyCFunction
)string_lower
, 1, lower__doc__
},
2332 {"upper", (PyCFunction
)string_upper
, 1, upper__doc__
},
2333 {"islower", (PyCFunction
)string_islower
, 0, islower__doc__
},
2334 {"isupper", (PyCFunction
)string_isupper
, 0, isupper__doc__
},
2335 {"isspace", (PyCFunction
)string_isspace
, 0, isspace__doc__
},
2336 {"isdigit", (PyCFunction
)string_isdigit
, 0, isdigit__doc__
},
2337 {"istitle", (PyCFunction
)string_istitle
, 0, istitle__doc__
},
2338 {"isalpha", (PyCFunction
)string_isalpha
, 0, isalpha__doc__
},
2339 {"isalnum", (PyCFunction
)string_isalnum
, 0, isalnum__doc__
},
2340 {"capitalize", (PyCFunction
)string_capitalize
, 1, capitalize__doc__
},
2341 {"count", (PyCFunction
)string_count
, 1, count__doc__
},
2342 {"endswith", (PyCFunction
)string_endswith
, 1, endswith__doc__
},
2343 {"find", (PyCFunction
)string_find
, 1, find__doc__
},
2344 {"index", (PyCFunction
)string_index
, 1, index__doc__
},
2345 {"lstrip", (PyCFunction
)string_lstrip
, 1, lstrip__doc__
},
2346 {"replace", (PyCFunction
)string_replace
, 1, replace__doc__
},
2347 {"rfind", (PyCFunction
)string_rfind
, 1, rfind__doc__
},
2348 {"rindex", (PyCFunction
)string_rindex
, 1, rindex__doc__
},
2349 {"rstrip", (PyCFunction
)string_rstrip
, 1, rstrip__doc__
},
2350 {"startswith", (PyCFunction
)string_startswith
, 1, startswith__doc__
},
2351 {"strip", (PyCFunction
)string_strip
, 1, strip__doc__
},
2352 {"swapcase", (PyCFunction
)string_swapcase
, 1, swapcase__doc__
},
2353 {"translate", (PyCFunction
)string_translate
, 1, translate__doc__
},
2354 {"title", (PyCFunction
)string_title
, 1, title__doc__
},
2355 {"ljust", (PyCFunction
)string_ljust
, 1, ljust__doc__
},
2356 {"rjust", (PyCFunction
)string_rjust
, 1, rjust__doc__
},
2357 {"center", (PyCFunction
)string_center
, 1, center__doc__
},
2358 {"encode", (PyCFunction
)string_encode
, 1, encode__doc__
},
2359 {"expandtabs", (PyCFunction
)string_expandtabs
, 1, expandtabs__doc__
},
2360 {"splitlines", (PyCFunction
)string_splitlines
, 1, splitlines__doc__
},
2362 {"zfill", (PyCFunction
)string_zfill
, 1, zfill__doc__
},
2364 {NULL
, NULL
} /* sentinel */
2368 string_getattr(PyStringObject
*s
, char *name
)
2370 return Py_FindMethod(string_methods
, (PyObject
*)s
, name
);
2374 PyTypeObject PyString_Type
= {
2375 PyObject_HEAD_INIT(&PyType_Type
)
2378 sizeof(PyStringObject
),
2380 (destructor
)string_dealloc
, /*tp_dealloc*/
2381 (printfunc
)string_print
, /*tp_print*/
2382 (getattrfunc
)string_getattr
, /*tp_getattr*/
2384 (cmpfunc
)string_compare
, /*tp_compare*/
2385 (reprfunc
)string_repr
, /*tp_repr*/
2387 &string_as_sequence
, /*tp_as_sequence*/
2388 0, /*tp_as_mapping*/
2389 (hashfunc
)string_hash
, /*tp_hash*/
2391 (reprfunc
)string_str
, /*tp_str*/
2394 &string_as_buffer
, /*tp_as_buffer*/
2395 Py_TPFLAGS_DEFAULT
, /*tp_flags*/
2400 PyString_Concat(register PyObject
**pv
, register PyObject
*w
)
2402 register PyObject
*v
;
2405 if (w
== NULL
|| !PyString_Check(*pv
)) {
2410 v
= string_concat((PyStringObject
*) *pv
, w
);
2416 PyString_ConcatAndDel(register PyObject
**pv
, register PyObject
*w
)
2418 PyString_Concat(pv
, w
);
2423 /* The following function breaks the notion that strings are immutable:
2424 it changes the size of a string. We get away with this only if there
2425 is only one module referencing the object. You can also think of it
2426 as creating a new string object and destroying the old one, only
2427 more efficiently. In any case, don't use this if the string may
2428 already be known to some other part of the code... */
2431 _PyString_Resize(PyObject
**pv
, int newsize
)
2433 register PyObject
*v
;
2434 register PyStringObject
*sv
;
2436 if (!PyString_Check(v
) || v
->ob_refcnt
!= 1) {
2439 PyErr_BadInternalCall();
2442 /* XXX UNREF/NEWREF interface should be more symmetrical */
2446 _Py_ForgetReference(v
);
2448 PyObject_REALLOC((char *)v
,
2449 sizeof(PyStringObject
) + newsize
* sizeof(char));
2455 _Py_NewReference(*pv
);
2456 sv
= (PyStringObject
*) *pv
;
2457 sv
->ob_size
= newsize
;
2458 sv
->ob_sval
[newsize
] = '\0';
2462 /* Helpers for formatstring */
2465 getnextarg(PyObject
*args
, int arglen
, int *p_argidx
)
2467 int argidx
= *p_argidx
;
2468 if (argidx
< arglen
) {
2473 return PyTuple_GetItem(args
, argidx
);
2475 PyErr_SetString(PyExc_TypeError
,
2476 "not enough arguments for format string");
2487 #define F_LJUST (1<<0)
2488 #define F_SIGN (1<<1)
2489 #define F_BLANK (1<<2)
2490 #define F_ALT (1<<3)
2491 #define F_ZERO (1<<4)
2494 formatfloat(char *buf
, size_t buflen
, int flags
,
2495 int prec
, int type
, PyObject
*v
)
2497 /* fmt = '%#.' + `prec` + `type`
2498 worst case length = 3 + 10 (len of INT_MAX) + 1 = 14 (use 20)*/
2501 if (!PyArg_Parse(v
, "d;float argument required", &x
))
2505 if (type
== 'f' && fabs(x
)/1e25
>= 1e25
)
2507 sprintf(fmt
, "%%%s.%d%c", (flags
&F_ALT
) ? "#" : "", prec
, type
);
2508 /* worst case length calc to ensure no buffer overrun:
2510 buf = '-' + [0-9]*prec + '.' + 'e+' + (longest exp
2511 for any double rep.)
2512 len = 1 + prec + 1 + 2 + 5 = 9 + prec
2513 If prec=0 the effective precision is 1 (the leading digit is
2514 always given), therefore increase by one to 10+prec. */
2515 if (buflen
<= (size_t)10 + (size_t)prec
) {
2516 PyErr_SetString(PyExc_OverflowError
,
2517 "formatted float is too long (precision too large?)");
2520 sprintf(buf
, fmt
, x
);
2524 /* _PyString_FormatLong emulates the format codes d, u, o, x and X, and
2525 * the F_ALT flag, for Python's long (unbounded) ints. It's not used for
2526 * Python's regular ints.
2527 * Return value: a new PyString*, or NULL if error.
2528 * . *pbuf is set to point into it,
2529 * *plen set to the # of chars following that.
2530 * Caller must decref it when done using pbuf.
2531 * The string starting at *pbuf is of the form
2532 * "-"? ("0x" | "0X")? digit+
2533 * "0x"/"0X" are present only for x and X conversions, with F_ALT
2534 * set in flags. The case of hex digits will be correct,
2535 * There will be at least prec digits, zero-filled on the left if
2536 * necessary to get that many.
2537 * val object to be converted
2538 * flags bitmask of format flags; only F_ALT is looked at
2539 * prec minimum number of digits; 0-fill on left if needed
2540 * type a character in [duoxX]; u acts the same as d
2542 * CAUTION: o, x and X conversions on regular ints can never
2543 * produce a '-' sign, but can for Python's unbounded ints.
2546 _PyString_FormatLong(PyObject
*val
, int flags
, int prec
, int type
,
2547 char **pbuf
, int *plen
)
2549 PyObject
*result
= NULL
;
2552 int sign
; /* 1 if '-', else 0 */
2553 int len
; /* number of characters */
2554 int numdigits
; /* len == numnondigits + numdigits */
2555 int numnondigits
= 0;
2560 result
= val
->ob_type
->tp_str(val
);
2563 result
= val
->ob_type
->tp_as_number
->nb_oct(val
);
2568 result
= val
->ob_type
->tp_as_number
->nb_hex(val
);
2571 assert(!"'type' not in [duoxX]");
2576 /* To modify the string in-place, there can only be one reference. */
2577 if (result
->ob_refcnt
!= 1) {
2578 PyErr_BadInternalCall();
2581 buf
= PyString_AsString(result
);
2582 len
= PyString_Size(result
);
2583 if (buf
[len
-1] == 'L') {
2587 sign
= buf
[0] == '-';
2588 numnondigits
+= sign
;
2589 numdigits
= len
- numnondigits
;
2590 assert(numdigits
> 0);
2592 /* Get rid of base marker unless F_ALT */
2593 if ((flags
& F_ALT
) == 0) {
2594 /* Need to skip 0x, 0X or 0. */
2598 assert(buf
[sign
] == '0');
2599 /* If 0 is only digit, leave it alone. */
2600 if (numdigits
> 1) {
2607 assert(buf
[sign
] == '0');
2608 assert(buf
[sign
+ 1] == 'x');
2619 assert(len
== numnondigits
+ numdigits
);
2620 assert(numdigits
> 0);
2623 /* Fill with leading zeroes to meet minimum width. */
2624 if (prec
> numdigits
) {
2625 PyObject
*r1
= PyString_FromStringAndSize(NULL
,
2626 numnondigits
+ prec
);
2632 b1
= PyString_AS_STRING(r1
);
2633 for (i
= 0; i
< numnondigits
; ++i
)
2635 for (i
= 0; i
< prec
- numdigits
; i
++)
2637 for (i
= 0; i
< numdigits
; i
++)
2642 buf
= PyString_AS_STRING(result
);
2643 len
= numnondigits
+ prec
;
2646 /* Fix up case for hex conversions. */
2649 /* Need to convert all upper case letters to lower case. */
2650 for (i
= 0; i
< len
; i
++)
2651 if (buf
[i
] >= 'A' && buf
[i
] <= 'F')
2655 /* Need to convert 0x to 0X (and -0x to -0X). */
2656 if (buf
[sign
+ 1] == 'x')
2657 buf
[sign
+ 1] = 'X';
2666 formatint(char *buf
, size_t buflen
, int flags
,
2667 int prec
, int type
, PyObject
*v
)
2669 /* fmt = '%#.' + `prec` + 'l' + `type`
2670 worst case length = 3 + 19 (worst len of INT_MAX on 64-bit machine)
2672 char fmt
[64]; /* plenty big enough! */
2674 if (!PyArg_Parse(v
, "l;int argument required", &x
))
2678 sprintf(fmt
, "%%%s.%dl%c", (flags
&F_ALT
) ? "#" : "", prec
, type
);
2679 /* buf = '+'/'-'/'0'/'0x' + '[0-9]'*max(prec, len(x in octal))
2680 worst case buf = '0x' + [0-9]*prec, where prec >= 11 */
2681 if (buflen
<= 13 || buflen
<= (size_t)2 + (size_t)prec
) {
2682 PyErr_SetString(PyExc_OverflowError
,
2683 "formatted integer is too long (precision too large?)");
2686 sprintf(buf
, fmt
, x
);
2687 /* When converting 0 under %#x or %#X, C leaves off the base marker,
2688 * but we want it (for consistency with other %#x conversions, and
2689 * for consistency with Python's hex() function).
2690 * BUG 28-Apr-2001 tim: At least two platform Cs (Metrowerks &
2691 * Compaq Tru64) violate the std by converting 0 w/ leading 0x anyway.
2692 * So add it only if the platform didn't already.
2694 if (x
== 0 && (flags
& F_ALT
) && (type
== 'x' || type
== 'X') &&
2695 buf
[1] != (char)type
) /* this last always true under std C */
2697 memmove(buf
+2, buf
, strlen(buf
) + 1);
2699 buf
[1] = (char)type
;
2705 formatchar(char *buf
, size_t buflen
, PyObject
*v
)
2707 /* presume that the buffer is at least 2 characters long */
2708 if (PyString_Check(v
)) {
2709 if (!PyArg_Parse(v
, "c;%c requires int or char", &buf
[0]))
2713 if (!PyArg_Parse(v
, "b;%c requires int or char", &buf
[0]))
2721 /* fmt%(v1,v2,...) is roughly equivalent to sprintf(fmt, v1, v2, ...)
2723 FORMATBUFLEN is the length of the buffer in which the floats, ints, &
2724 chars are formatted. XXX This is a magic number. Each formatting
2725 routine does bounds checking to ensure no overflow, but a better
2726 solution may be to malloc a buffer of appropriate size for each
2727 format. For now, the current solution is sufficient.
2729 #define FORMATBUFLEN (size_t)120
2732 PyString_Format(PyObject
*format
, PyObject
*args
)
2735 int fmtcnt
, rescnt
, reslen
, arglen
, argidx
;
2737 PyObject
*result
, *orig_args
, *v
, *w
;
2738 PyObject
*dict
= NULL
;
2739 if (format
== NULL
|| !PyString_Check(format
) || args
== NULL
) {
2740 PyErr_BadInternalCall();
2744 fmt
= PyString_AsString(format
);
2745 fmtcnt
= PyString_Size(format
);
2746 reslen
= rescnt
= fmtcnt
+ 100;
2747 result
= PyString_FromStringAndSize((char *)NULL
, reslen
);
2750 res
= PyString_AsString(result
);
2751 if (PyTuple_Check(args
)) {
2752 arglen
= PyTuple_Size(args
);
2759 if (args
->ob_type
->tp_as_mapping
)
2761 while (--fmtcnt
>= 0) {
2764 rescnt
= fmtcnt
+ 100;
2766 if (_PyString_Resize(&result
, reslen
) < 0)
2768 res
= PyString_AsString(result
)
2775 /* Got a format specifier */
2783 PyObject
*temp
= NULL
;
2787 char formatbuf
[FORMATBUFLEN
]; /* For format{float,int,char}() */
2788 char *fmt_start
= fmt
;
2789 int argidx_start
= argidx
;
2799 PyErr_SetString(PyExc_TypeError
,
2800 "format requires a mapping");
2806 /* Skip over balanced parentheses */
2807 while (pcount
> 0 && --fmtcnt
>= 0) {
2810 else if (*fmt
== '(')
2814 keylen
= fmt
- keystart
- 1;
2815 if (fmtcnt
< 0 || pcount
> 0) {
2816 PyErr_SetString(PyExc_ValueError
,
2817 "incomplete format key");
2820 key
= PyString_FromStringAndSize(keystart
,
2828 args
= PyObject_GetItem(dict
, key
);
2837 while (--fmtcnt
>= 0) {
2838 switch (c
= *fmt
++) {
2839 case '-': flags
|= F_LJUST
; continue;
2840 case '+': flags
|= F_SIGN
; continue;
2841 case ' ': flags
|= F_BLANK
; continue;
2842 case '#': flags
|= F_ALT
; continue;
2843 case '0': flags
|= F_ZERO
; continue;
2848 v
= getnextarg(args
, arglen
, &argidx
);
2851 if (!PyInt_Check(v
)) {
2852 PyErr_SetString(PyExc_TypeError
,
2856 width
= PyInt_AsLong(v
);
2864 else if (c
>= 0 && isdigit(c
)) {
2866 while (--fmtcnt
>= 0) {
2867 c
= Py_CHARMASK(*fmt
++);
2870 if ((width
*10) / 10 != width
) {
2876 width
= width
*10 + (c
- '0');
2884 v
= getnextarg(args
, arglen
, &argidx
);
2887 if (!PyInt_Check(v
)) {
2893 prec
= PyInt_AsLong(v
);
2899 else if (c
>= 0 && isdigit(c
)) {
2901 while (--fmtcnt
>= 0) {
2902 c
= Py_CHARMASK(*fmt
++);
2905 if ((prec
*10) / 10 != prec
) {
2911 prec
= prec
*10 + (c
- '0');
2916 if (c
== 'h' || c
== 'l' || c
== 'L') {
2923 PyErr_SetString(PyExc_ValueError
,
2924 "incomplete format");
2928 v
= getnextarg(args
, arglen
, &argidx
);
2941 if (PyUnicode_Check(v
)) {
2943 argidx
= argidx_start
;
2947 temp
= PyObject_Str(v
);
2949 temp
= PyObject_Repr(v
);
2952 if (!PyString_Check(temp
)) {
2953 PyErr_SetString(PyExc_TypeError
,
2954 "%s argument has non-string str()");
2957 pbuf
= PyString_AsString(temp
);
2958 len
= PyString_Size(temp
);
2959 if (prec
>= 0 && len
> prec
)
2970 if (PyLong_Check(v
)) {
2971 temp
= _PyString_FormatLong(v
, flags
,
2972 prec
, c
, &pbuf
, &len
);
2975 /* unbounded ints can always produce
2976 a sign character! */
2981 len
= formatint(pbuf
, sizeof(formatbuf
),
2985 /* only d conversion is signed */
2997 len
= formatfloat(pbuf
, sizeof(formatbuf
), flags
, prec
, c
, v
);
3006 len
= formatchar(pbuf
, sizeof(formatbuf
), v
);
3011 PyErr_Format(PyExc_ValueError
,
3012 "unsupported format character '%c' (0x%x) "
3014 c
, c
, fmt
- 1 - PyString_AsString(format
));
3018 if (*pbuf
== '-' || *pbuf
== '+') {
3022 else if (flags
& F_SIGN
)
3024 else if (flags
& F_BLANK
)
3031 if (rescnt
< width
+ (sign
!= 0)) {
3033 rescnt
= width
+ fmtcnt
+ 100;
3035 if (_PyString_Resize(&result
, reslen
) < 0)
3037 res
= PyString_AsString(result
)
3047 if ((flags
& F_ALT
) && (c
== 'x' || c
== 'X')) {
3048 assert(pbuf
[0] == '0');
3049 assert(pbuf
[1] == c
);
3060 if (width
> len
&& !(flags
& F_LJUST
)) {
3064 } while (--width
> len
);
3069 if ((flags
& F_ALT
) &&
3070 (c
== 'x' || c
== 'X')) {
3071 assert(pbuf
[0] == '0');
3072 assert(pbuf
[1] == c
);
3077 memcpy(res
, pbuf
, len
);
3080 while (--width
>= len
) {
3084 if (dict
&& (argidx
< arglen
) && c
!= '%') {
3085 PyErr_SetString(PyExc_TypeError
,
3086 "not all arguments converted");
3092 if (argidx
< arglen
&& !dict
) {
3093 PyErr_SetString(PyExc_TypeError
,
3094 "not all arguments converted");
3100 _PyString_Resize(&result
, reslen
- rescnt
);
3108 /* Fiddle args right (remove the first argidx arguments) */
3109 if (PyTuple_Check(orig_args
) && argidx
> 0) {
3111 int n
= PyTuple_GET_SIZE(orig_args
) - argidx
;
3116 PyObject
*w
= PyTuple_GET_ITEM(orig_args
, n
+ argidx
);
3118 PyTuple_SET_ITEM(v
, n
, w
);
3122 Py_INCREF(orig_args
);
3126 /* Take what we have of the result and let the Unicode formatting
3127 function format the rest of the input. */
3128 rescnt
= res
- PyString_AS_STRING(result
);
3129 if (_PyString_Resize(&result
, rescnt
))
3131 fmtcnt
= PyString_GET_SIZE(format
) - \
3132 (fmt
- PyString_AS_STRING(format
));
3133 format
= PyUnicode_Decode(fmt
, fmtcnt
, NULL
, NULL
);
3136 v
= PyUnicode_Format(format
, args
);
3140 /* Paste what we have (result) to what the Unicode formatting
3141 function returned (v) and return the result (or error) */
3142 w
= PyUnicode_Concat(result
, v
);
3157 #ifdef INTERN_STRINGS
3159 /* This dictionary will leak at PyString_Fini() time. That's acceptable
3160 * because PyString_Fini() specifically frees interned strings that are
3161 * only referenced by this dictionary. The CVS log entry for revision 2.45
3164 * Change the Fini function to only remove otherwise unreferenced
3165 * strings from the interned table. There are references in
3166 * hard-to-find static variables all over the interpreter, and it's not
3167 * worth trying to get rid of all those; but "uninterning" isn't fair
3168 * either and may cause subtle failures later -- so we have to keep them
3169 * in the interned table.
3171 static PyObject
*interned
;
3174 PyString_InternInPlace(PyObject
**p
)
3176 register PyStringObject
*s
= (PyStringObject
*)(*p
);
3178 if (s
== NULL
|| !PyString_Check(s
))
3179 Py_FatalError("PyString_InternInPlace: strings only please!");
3180 if ((t
= s
->ob_sinterned
) != NULL
) {
3181 if (t
== (PyObject
*)s
)
3188 if (interned
== NULL
) {
3189 interned
= PyDict_New();
3190 if (interned
== NULL
)
3193 if ((t
= PyDict_GetItem(interned
, (PyObject
*)s
)) != NULL
) {
3195 *p
= s
->ob_sinterned
= t
;
3200 if (PyDict_SetItem(interned
, t
, t
) == 0) {
3201 s
->ob_sinterned
= t
;
3209 PyString_InternFromString(const char *cp
)
3211 PyObject
*s
= PyString_FromString(cp
);
3214 PyString_InternInPlace(&s
);
3224 for (i
= 0; i
< UCHAR_MAX
+ 1; i
++) {
3225 Py_XDECREF(characters
[i
]);
3226 characters
[i
] = NULL
;
3228 #ifndef DONT_SHARE_SHORT_STRINGS
3229 Py_XDECREF(nullstring
);
3232 #ifdef INTERN_STRINGS
3235 PyObject
*key
, *value
;
3239 while (PyDict_Next(interned
, &pos
, &key
, &value
)) {
3240 if (key
->ob_refcnt
== 2 && key
== value
) {
3241 PyDict_DelItem(interned
, key
);
3250 #ifdef INTERN_STRINGS
3251 void _Py_ReleaseInternedStrings(void)
3254 Py_DECREF(interned
);
3258 #endif /* INTERN_STRINGS */