2 /* String object implementation */
9 int null_strings
, one_strings
;
12 #if !defined(HAVE_LIMITS_H) && !defined(UCHAR_MAX)
16 static PyStringObject
*characters
[UCHAR_MAX
+ 1];
17 #ifndef DONT_SHARE_SHORT_STRINGS
18 static PyStringObject
*nullstring
;
22 Newsizedstringobject() and newstringobject() try in certain cases
23 to share string objects. When the size of the string is zero,
24 these routines always return a pointer to the same string object;
25 when the size is one, they return a pointer to an already existing
26 object if the contents of the string is known. For
27 newstringobject() this is always the case, for
28 newsizedstringobject() this is the case when the first argument in
30 A common practice to allocate a string and then fill it in or
31 change it must be done carefully. It is only allowed to change the
32 contents of the string if the obect was gotten from
33 newsizedstringobject() with a NULL first argument, because in the
34 future these routines may try to do even more sharing of objects.
37 PyString_FromStringAndSize(const char *str
, int size
)
39 register PyStringObject
*op
;
40 #ifndef DONT_SHARE_SHORT_STRINGS
41 if (size
== 0 && (op
= nullstring
) != NULL
) {
46 return (PyObject
*)op
;
48 if (size
== 1 && str
!= NULL
&&
49 (op
= characters
[*str
& UCHAR_MAX
]) != NULL
)
55 return (PyObject
*)op
;
57 #endif /* DONT_SHARE_SHORT_STRINGS */
59 /* PyObject_NewVar is inlined */
60 op
= (PyStringObject
*)
61 PyObject_MALLOC(sizeof(PyStringObject
) + size
* sizeof(char));
63 return PyErr_NoMemory();
64 PyObject_INIT_VAR(op
, &PyString_Type
, size
);
69 op
->ob_sinterned
= NULL
;
72 memcpy(op
->ob_sval
, str
, size
);
73 op
->ob_sval
[size
] = '\0';
74 #ifndef DONT_SHARE_SHORT_STRINGS
78 } else if (size
== 1 && str
!= NULL
) {
79 characters
[*str
& UCHAR_MAX
] = op
;
83 return (PyObject
*) op
;
87 PyString_FromString(const char *str
)
89 register size_t size
= strlen(str
);
90 register PyStringObject
*op
;
92 PyErr_SetString(PyExc_OverflowError
,
93 "string is too long for a Python string");
96 #ifndef DONT_SHARE_SHORT_STRINGS
97 if (size
== 0 && (op
= nullstring
) != NULL
) {
102 return (PyObject
*)op
;
104 if (size
== 1 && (op
= characters
[*str
& UCHAR_MAX
]) != NULL
) {
109 return (PyObject
*)op
;
111 #endif /* DONT_SHARE_SHORT_STRINGS */
113 /* PyObject_NewVar is inlined */
114 op
= (PyStringObject
*)
115 PyObject_MALLOC(sizeof(PyStringObject
) + size
* sizeof(char));
117 return PyErr_NoMemory();
118 PyObject_INIT_VAR(op
, &PyString_Type
, size
);
122 #ifdef INTERN_STRINGS
123 op
->ob_sinterned
= NULL
;
125 strcpy(op
->ob_sval
, str
);
126 #ifndef DONT_SHARE_SHORT_STRINGS
130 } else if (size
== 1) {
131 characters
[*str
& UCHAR_MAX
] = op
;
135 return (PyObject
*) op
;
138 PyObject
*PyString_Decode(const char *s
,
140 const char *encoding
,
143 PyObject
*buffer
= NULL
, *str
;
145 if (encoding
== NULL
)
146 encoding
= PyUnicode_GetDefaultEncoding();
148 /* Decode via the codec registry */
149 buffer
= PyBuffer_FromMemory((void *)s
, size
);
152 str
= PyCodec_Decode(buffer
, encoding
, errors
);
155 /* Convert Unicode to a string using the default encoding */
156 if (PyUnicode_Check(str
)) {
157 PyObject
*temp
= str
;
158 str
= PyUnicode_AsEncodedString(str
, NULL
, NULL
);
163 if (!PyString_Check(str
)) {
164 PyErr_Format(PyExc_TypeError
,
165 "decoder did not return a string object (type=%.400s)",
166 str
->ob_type
->tp_name
);
178 PyObject
*PyString_Encode(const char *s
,
180 const char *encoding
,
185 str
= PyString_FromStringAndSize(s
, size
);
188 v
= PyString_AsEncodedString(str
, encoding
, errors
);
193 PyObject
*PyString_AsEncodedString(PyObject
*str
,
194 const char *encoding
,
199 if (!PyString_Check(str
)) {
204 if (encoding
== NULL
)
205 encoding
= PyUnicode_GetDefaultEncoding();
207 /* Encode via the codec registry */
208 v
= PyCodec_Encode(str
, encoding
, errors
);
211 /* Convert Unicode to a string using the default encoding */
212 if (PyUnicode_Check(v
)) {
214 v
= PyUnicode_AsEncodedString(v
, NULL
, NULL
);
219 if (!PyString_Check(v
)) {
220 PyErr_Format(PyExc_TypeError
,
221 "encoder did not return a string object (type=%.400s)",
222 v
->ob_type
->tp_name
);
233 string_dealloc(PyObject
*op
)
239 string_getsize(register PyObject
*op
)
243 if (PyString_AsStringAndSize(op
, &s
, &len
))
248 static /*const*/ char *
249 string_getbuffer(register PyObject
*op
)
253 if (PyString_AsStringAndSize(op
, &s
, &len
))
259 PyString_Size(register PyObject
*op
)
261 if (!PyString_Check(op
))
262 return string_getsize(op
);
263 return ((PyStringObject
*)op
) -> ob_size
;
267 PyString_AsString(register PyObject
*op
)
269 if (!PyString_Check(op
))
270 return string_getbuffer(op
);
271 return ((PyStringObject
*)op
) -> ob_sval
;
274 /* Internal API needed by PyString_AsStringAndSize(): */
276 PyObject
*_PyUnicode_AsDefaultEncodedString(PyObject
*unicode
,
280 PyString_AsStringAndSize(register PyObject
*obj
,
285 PyErr_BadInternalCall();
289 if (!PyString_Check(obj
)) {
290 if (PyUnicode_Check(obj
)) {
291 obj
= _PyUnicode_AsDefaultEncodedString(obj
, NULL
);
296 PyErr_Format(PyExc_TypeError
,
297 "expected string or Unicode object, "
298 "%.200s found", obj
->ob_type
->tp_name
);
303 *s
= PyString_AS_STRING(obj
);
305 *len
= PyString_GET_SIZE(obj
);
306 else if ((int)strlen(*s
) != PyString_GET_SIZE(obj
)) {
307 PyErr_SetString(PyExc_TypeError
,
308 "expected string without null bytes");
317 string_print(PyStringObject
*op
, FILE *fp
, int flags
)
322 /* XXX Ought to check for interrupts when writing long strings */
323 if (flags
& Py_PRINT_RAW
) {
324 fwrite(op
->ob_sval
, 1, (int) op
->ob_size
, fp
);
328 /* figure out which quote to use; single is preferred */
330 if (strchr(op
->ob_sval
, '\'') && !strchr(op
->ob_sval
, '"'))
334 for (i
= 0; i
< op
->ob_size
; i
++) {
336 if (c
== quote
|| c
== '\\')
337 fprintf(fp
, "\\%c", c
);
338 else if (c
< ' ' || c
>= 0177)
339 fprintf(fp
, "\\%03o", c
& 0377);
348 string_repr(register PyStringObject
*op
)
350 size_t newsize
= 2 + 4 * op
->ob_size
* sizeof(char);
352 if (newsize
> INT_MAX
) {
353 PyErr_SetString(PyExc_OverflowError
,
354 "string is too large to make repr");
356 v
= PyString_FromStringAndSize((char *)NULL
, newsize
);
366 /* figure out which quote to use; single is preferred */
368 if (strchr(op
->ob_sval
, '\'') && !strchr(op
->ob_sval
, '"'))
371 p
= ((PyStringObject
*)v
)->ob_sval
;
373 for (i
= 0; i
< op
->ob_size
; i
++) {
375 if (c
== quote
|| c
== '\\')
376 *p
++ = '\\', *p
++ = c
;
377 else if (c
< ' ' || c
>= 0177) {
378 sprintf(p
, "\\%03o", c
& 0377);
388 &v
, (int) (p
- ((PyStringObject
*)v
)->ob_sval
));
394 string_length(PyStringObject
*a
)
400 string_concat(register PyStringObject
*a
, register PyObject
*bb
)
402 register unsigned int size
;
403 register PyStringObject
*op
;
404 if (!PyString_Check(bb
)) {
405 if (PyUnicode_Check(bb
))
406 return PyUnicode_Concat((PyObject
*)a
, bb
);
407 PyErr_Format(PyExc_TypeError
,
408 "cannot add type \"%.200s\" to string",
409 bb
->ob_type
->tp_name
);
412 #define b ((PyStringObject *)bb)
413 /* Optimize cases with empty left or right operand */
414 if (a
->ob_size
== 0) {
418 if (b
->ob_size
== 0) {
420 return (PyObject
*)a
;
422 size
= a
->ob_size
+ b
->ob_size
;
423 /* PyObject_NewVar is inlined */
424 op
= (PyStringObject
*)
425 PyObject_MALLOC(sizeof(PyStringObject
) + size
* sizeof(char));
427 return PyErr_NoMemory();
428 PyObject_INIT_VAR(op
, &PyString_Type
, size
);
432 #ifdef INTERN_STRINGS
433 op
->ob_sinterned
= NULL
;
435 memcpy(op
->ob_sval
, a
->ob_sval
, (int) a
->ob_size
);
436 memcpy(op
->ob_sval
+ a
->ob_size
, b
->ob_sval
, (int) b
->ob_size
);
437 op
->ob_sval
[size
] = '\0';
438 return (PyObject
*) op
;
443 string_repeat(register PyStringObject
*a
, register int n
)
447 register PyStringObject
*op
;
451 /* watch out for overflows: the size can overflow int,
452 * and the # of bytes needed can overflow size_t
454 size
= a
->ob_size
* n
;
455 if (n
&& size
/ n
!= a
->ob_size
) {
456 PyErr_SetString(PyExc_OverflowError
,
457 "repeated string is too long");
460 if (size
== a
->ob_size
) {
462 return (PyObject
*)a
;
464 nbytes
= size
* sizeof(char);
465 if (nbytes
/ sizeof(char) != (size_t)size
||
466 nbytes
+ sizeof(PyStringObject
) <= nbytes
) {
467 PyErr_SetString(PyExc_OverflowError
,
468 "repeated string is too long");
471 op
= (PyStringObject
*)
472 PyObject_MALLOC(sizeof(PyStringObject
) + nbytes
);
474 return PyErr_NoMemory();
475 PyObject_INIT_VAR(op
, &PyString_Type
, size
);
479 #ifdef INTERN_STRINGS
480 op
->ob_sinterned
= NULL
;
482 for (i
= 0; i
< size
; i
+= a
->ob_size
)
483 memcpy(op
->ob_sval
+i
, a
->ob_sval
, (int) a
->ob_size
);
484 op
->ob_sval
[size
] = '\0';
485 return (PyObject
*) op
;
488 /* String slice a[i:j] consists of characters a[i] ... a[j-1] */
491 string_slice(register PyStringObject
*a
, register int i
, register int j
)
492 /* j -- may be negative! */
497 j
= 0; /* Avoid signed/unsigned bug in next line */
500 if (i
== 0 && j
== a
->ob_size
) { /* It's the same as a */
502 return (PyObject
*)a
;
506 return PyString_FromStringAndSize(a
->ob_sval
+ i
, (int) (j
-i
));
510 string_contains(PyObject
*a
, PyObject
*el
)
512 register char *s
, *end
;
514 if (PyUnicode_Check(el
))
515 return PyUnicode_Contains(a
, el
);
516 if (!PyString_Check(el
) || PyString_Size(el
) != 1) {
517 PyErr_SetString(PyExc_TypeError
,
518 "'in <string>' requires character as left operand");
521 c
= PyString_AsString(el
)[0];
522 s
= PyString_AsString(a
);
523 end
= s
+ PyString_Size(a
);
532 string_item(PyStringObject
*a
, register int i
)
536 if (i
< 0 || i
>= a
->ob_size
) {
537 PyErr_SetString(PyExc_IndexError
, "string index out of range");
540 c
= a
->ob_sval
[i
] & UCHAR_MAX
;
541 v
= (PyObject
*) characters
[c
];
547 v
= PyString_FromStringAndSize((char *)NULL
, 1);
550 characters
[c
] = (PyStringObject
*) v
;
551 ((PyStringObject
*)v
)->ob_sval
[0] = c
;
558 string_compare(PyStringObject
*a
, PyStringObject
*b
)
560 int len_a
= a
->ob_size
, len_b
= b
->ob_size
;
561 int min_len
= (len_a
< len_b
) ? len_a
: len_b
;
564 cmp
= Py_CHARMASK(*a
->ob_sval
) - Py_CHARMASK(*b
->ob_sval
);
566 cmp
= memcmp(a
->ob_sval
, b
->ob_sval
, min_len
);
570 return (len_a
< len_b
) ? -1 : (len_a
> len_b
) ? 1 : 0;
574 string_hash(PyStringObject
*a
)
577 register unsigned char *p
;
581 if (a
->ob_shash
!= -1)
583 #ifdef INTERN_STRINGS
584 if (a
->ob_sinterned
!= NULL
)
585 return (a
->ob_shash
=
586 ((PyStringObject
*)(a
->ob_sinterned
))->ob_shash
);
590 p
= (unsigned char *) a
->ob_sval
;
593 x
= (1000003*x
) ^ *p
++;
604 string_buffer_getreadbuf(PyStringObject
*self
, int index
, const void **ptr
)
607 PyErr_SetString(PyExc_SystemError
,
608 "accessing non-existent string segment");
611 *ptr
= (void *)self
->ob_sval
;
612 return self
->ob_size
;
616 string_buffer_getwritebuf(PyStringObject
*self
, int index
, const void **ptr
)
618 PyErr_SetString(PyExc_TypeError
,
619 "Cannot use string as modifiable buffer");
624 string_buffer_getsegcount(PyStringObject
*self
, int *lenp
)
627 *lenp
= self
->ob_size
;
632 string_buffer_getcharbuf(PyStringObject
*self
, int index
, const char **ptr
)
635 PyErr_SetString(PyExc_SystemError
,
636 "accessing non-existent string segment");
639 *ptr
= self
->ob_sval
;
640 return self
->ob_size
;
643 static PySequenceMethods string_as_sequence
= {
644 (inquiry
)string_length
, /*sq_length*/
645 (binaryfunc
)string_concat
, /*sq_concat*/
646 (intargfunc
)string_repeat
, /*sq_repeat*/
647 (intargfunc
)string_item
, /*sq_item*/
648 (intintargfunc
)string_slice
, /*sq_slice*/
651 (objobjproc
)string_contains
/*sq_contains*/
654 static PyBufferProcs string_as_buffer
= {
655 (getreadbufferproc
)string_buffer_getreadbuf
,
656 (getwritebufferproc
)string_buffer_getwritebuf
,
657 (getsegcountproc
)string_buffer_getsegcount
,
658 (getcharbufferproc
)string_buffer_getcharbuf
,
669 split_whitespace(const char *s
, int len
, int maxsplit
)
673 PyObject
*list
= PyList_New(0);
678 for (i
= j
= 0; i
< len
; ) {
679 while (i
< len
&& isspace(Py_CHARMASK(s
[i
])))
682 while (i
< len
&& !isspace(Py_CHARMASK(s
[i
])))
687 item
= PyString_FromStringAndSize(s
+j
, (int)(i
-j
));
690 err
= PyList_Append(list
, item
);
694 while (i
< len
&& isspace(Py_CHARMASK(s
[i
])))
700 item
= PyString_FromStringAndSize(s
+j
, (int)(len
- j
));
703 err
= PyList_Append(list
, item
);
715 static char split__doc__
[] =
716 "S.split([sep [,maxsplit]]) -> list of strings\n\
718 Return a list of the words in the string S, using sep as the\n\
719 delimiter string. If maxsplit is given, at most maxsplit\n\
720 splits are done. If sep is not specified, any whitespace string\n\
724 string_split(PyStringObject
*self
, PyObject
*args
)
726 int len
= PyString_GET_SIZE(self
), n
, i
, j
, err
;
728 const char *s
= PyString_AS_STRING(self
), *sub
;
729 PyObject
*list
, *item
, *subobj
= Py_None
;
731 if (!PyArg_ParseTuple(args
, "|Oi:split", &subobj
, &maxsplit
))
735 if (subobj
== Py_None
)
736 return split_whitespace(s
, len
, maxsplit
);
737 if (PyString_Check(subobj
)) {
738 sub
= PyString_AS_STRING(subobj
);
739 n
= PyString_GET_SIZE(subobj
);
741 else if (PyUnicode_Check(subobj
))
742 return PyUnicode_Split((PyObject
*)self
, subobj
, maxsplit
);
743 else if (PyObject_AsCharBuffer(subobj
, &sub
, &n
))
746 PyErr_SetString(PyExc_ValueError
, "empty separator");
750 list
= PyList_New(0);
756 if (s
[i
] == sub
[0] && memcmp(s
+i
, sub
, n
) == 0) {
759 item
= PyString_FromStringAndSize(s
+j
, (int)(i
-j
));
762 err
= PyList_Append(list
, item
);
771 item
= PyString_FromStringAndSize(s
+j
, (int)(len
-j
));
774 err
= PyList_Append(list
, item
);
787 static char join__doc__
[] =
788 "S.join(sequence) -> string\n\
790 Return a string which is the concatenation of the strings in the\n\
791 sequence. The separator between elements is S.";
794 string_join(PyStringObject
*self
, PyObject
*args
)
796 char *sep
= PyString_AS_STRING(self
);
797 int seplen
= PyString_GET_SIZE(self
);
798 PyObject
*res
= NULL
;
803 int i
, slen
, sz_incr
;
804 PyObject
*orig
, *seq
, *item
;
806 if (!PyArg_ParseTuple(args
, "O:join", &orig
))
809 if (!(seq
= PySequence_Fast(orig
, ""))) {
810 if (PyErr_ExceptionMatches(PyExc_TypeError
))
811 PyErr_Format(PyExc_TypeError
,
812 "sequence expected, %.80s found",
813 orig
->ob_type
->tp_name
);
816 /* From here on out, errors go through finally: for proper
817 * reference count manipulations.
819 seqlen
= PySequence_Size(seq
);
821 item
= PySequence_Fast_GET_ITEM(seq
, 0);
827 if (!(res
= PyString_FromStringAndSize((char*)NULL
, sz
)))
830 p
= PyString_AS_STRING(res
);
832 for (i
= 0; i
< seqlen
; i
++) {
833 item
= PySequence_Fast_GET_ITEM(seq
, i
);
834 if (!PyString_Check(item
)){
835 if (PyUnicode_Check(item
)) {
838 return PyUnicode_Join((PyObject
*)self
, orig
);
840 PyErr_Format(PyExc_TypeError
,
841 "sequence item %i: expected string,"
843 i
, item
->ob_type
->tp_name
);
846 slen
= PyString_GET_SIZE(item
);
847 while (reslen
+ slen
+ seplen
>= sz
) {
848 /* at least double the size of the string */
849 sz_incr
= slen
+ seplen
> sz
? slen
+ seplen
: sz
;
850 if (_PyString_Resize(&res
, sz
+ sz_incr
)) {
854 p
= PyString_AS_STRING(res
) + reslen
;
857 memcpy(p
, sep
, seplen
);
861 memcpy(p
, PyString_AS_STRING(item
), slen
);
865 if (_PyString_Resize(&res
, reslen
))
879 string_find_internal(PyStringObject
*self
, PyObject
*args
, int dir
)
881 const char *s
= PyString_AS_STRING(self
), *sub
;
882 int len
= PyString_GET_SIZE(self
);
883 int n
, i
= 0, last
= INT_MAX
;
886 if (!PyArg_ParseTuple(args
, "O|O&O&:find/rfind/index/rindex",
887 &subobj
, _PyEval_SliceIndex
, &i
, _PyEval_SliceIndex
, &last
))
889 if (PyString_Check(subobj
)) {
890 sub
= PyString_AS_STRING(subobj
);
891 n
= PyString_GET_SIZE(subobj
);
893 else if (PyUnicode_Check(subobj
))
894 return PyUnicode_Find((PyObject
*)self
, subobj
, i
, last
, 1);
895 else if (PyObject_AsCharBuffer(subobj
, &sub
, &n
))
910 if (n
== 0 && i
<= last
)
913 for (; i
<= last
; ++i
)
914 if (s
[i
] == sub
[0] && memcmp(&s
[i
], sub
, n
) == 0)
920 if (n
== 0 && i
<= last
)
922 for (j
= last
-n
; j
>= i
; --j
)
923 if (s
[j
] == sub
[0] && memcmp(&s
[j
], sub
, n
) == 0)
931 static char find__doc__
[] =
932 "S.find(sub [,start [,end]]) -> int\n\
934 Return the lowest index in S where substring sub is found,\n\
935 such that sub is contained within s[start,end]. Optional\n\
936 arguments start and end are interpreted as in slice notation.\n\
938 Return -1 on failure.";
941 string_find(PyStringObject
*self
, PyObject
*args
)
943 long result
= string_find_internal(self
, args
, +1);
946 return PyInt_FromLong(result
);
950 static char index__doc__
[] =
951 "S.index(sub [,start [,end]]) -> int\n\
953 Like S.find() but raise ValueError when the substring is not found.";
956 string_index(PyStringObject
*self
, PyObject
*args
)
958 long result
= string_find_internal(self
, args
, +1);
962 PyErr_SetString(PyExc_ValueError
,
963 "substring not found in string.index");
966 return PyInt_FromLong(result
);
970 static char rfind__doc__
[] =
971 "S.rfind(sub [,start [,end]]) -> int\n\
973 Return the highest index in S where substring sub is found,\n\
974 such that sub is contained within s[start,end]. Optional\n\
975 arguments start and end are interpreted as in slice notation.\n\
977 Return -1 on failure.";
980 string_rfind(PyStringObject
*self
, PyObject
*args
)
982 long result
= string_find_internal(self
, args
, -1);
985 return PyInt_FromLong(result
);
989 static char rindex__doc__
[] =
990 "S.rindex(sub [,start [,end]]) -> int\n\
992 Like S.rfind() but raise ValueError when the substring is not found.";
995 string_rindex(PyStringObject
*self
, PyObject
*args
)
997 long result
= string_find_internal(self
, args
, -1);
1001 PyErr_SetString(PyExc_ValueError
,
1002 "substring not found in string.rindex");
1005 return PyInt_FromLong(result
);
1010 do_strip(PyStringObject
*self
, PyObject
*args
, int striptype
)
1012 char *s
= PyString_AS_STRING(self
);
1013 int len
= PyString_GET_SIZE(self
), i
, j
;
1015 if (!PyArg_ParseTuple(args
, ":strip"))
1019 if (striptype
!= RIGHTSTRIP
) {
1020 while (i
< len
&& isspace(Py_CHARMASK(s
[i
]))) {
1026 if (striptype
!= LEFTSTRIP
) {
1029 } while (j
>= i
&& isspace(Py_CHARMASK(s
[j
])));
1033 if (i
== 0 && j
== len
) {
1035 return (PyObject
*)self
;
1038 return PyString_FromStringAndSize(s
+i
, j
-i
);
1042 static char strip__doc__
[] =
1043 "S.strip() -> string\n\
1045 Return a copy of the string S with leading and trailing\n\
1046 whitespace removed.";
1049 string_strip(PyStringObject
*self
, PyObject
*args
)
1051 return do_strip(self
, args
, BOTHSTRIP
);
1055 static char lstrip__doc__
[] =
1056 "S.lstrip() -> string\n\
1058 Return a copy of the string S with leading whitespace removed.";
1061 string_lstrip(PyStringObject
*self
, PyObject
*args
)
1063 return do_strip(self
, args
, LEFTSTRIP
);
1067 static char rstrip__doc__
[] =
1068 "S.rstrip() -> string\n\
1070 Return a copy of the string S with trailing whitespace removed.";
1073 string_rstrip(PyStringObject
*self
, PyObject
*args
)
1075 return do_strip(self
, args
, RIGHTSTRIP
);
1079 static char lower__doc__
[] =
1080 "S.lower() -> string\n\
1082 Return a copy of the string S converted to lowercase.";
1085 string_lower(PyStringObject
*self
, PyObject
*args
)
1087 char *s
= PyString_AS_STRING(self
), *s_new
;
1088 int i
, n
= PyString_GET_SIZE(self
);
1091 if (!PyArg_ParseTuple(args
, ":lower"))
1093 new = PyString_FromStringAndSize(NULL
, n
);
1096 s_new
= PyString_AsString(new);
1097 for (i
= 0; i
< n
; i
++) {
1098 int c
= Py_CHARMASK(*s
++);
1100 *s_new
= tolower(c
);
1109 static char upper__doc__
[] =
1110 "S.upper() -> string\n\
1112 Return a copy of the string S converted to uppercase.";
1115 string_upper(PyStringObject
*self
, PyObject
*args
)
1117 char *s
= PyString_AS_STRING(self
), *s_new
;
1118 int i
, n
= PyString_GET_SIZE(self
);
1121 if (!PyArg_ParseTuple(args
, ":upper"))
1123 new = PyString_FromStringAndSize(NULL
, n
);
1126 s_new
= PyString_AsString(new);
1127 for (i
= 0; i
< n
; i
++) {
1128 int c
= Py_CHARMASK(*s
++);
1130 *s_new
= toupper(c
);
1139 static char title__doc__
[] =
1140 "S.title() -> string\n\
1142 Return a titlecased version of S, i.e. words start with uppercase\n\
1143 characters, all remaining cased characters have lowercase.";
1146 string_title(PyUnicodeObject
*self
, PyObject
*args
)
1148 char *s
= PyString_AS_STRING(self
), *s_new
;
1149 int i
, n
= PyString_GET_SIZE(self
);
1150 int previous_is_cased
= 0;
1153 if (!PyArg_ParseTuple(args
, ":title"))
1155 new = PyString_FromStringAndSize(NULL
, n
);
1158 s_new
= PyString_AsString(new);
1159 for (i
= 0; i
< n
; i
++) {
1160 int c
= Py_CHARMASK(*s
++);
1162 if (!previous_is_cased
)
1164 previous_is_cased
= 1;
1165 } else if (isupper(c
)) {
1166 if (previous_is_cased
)
1168 previous_is_cased
= 1;
1170 previous_is_cased
= 0;
1176 static char capitalize__doc__
[] =
1177 "S.capitalize() -> string\n\
1179 Return a copy of the string S with only its first character\n\
1183 string_capitalize(PyStringObject
*self
, PyObject
*args
)
1185 char *s
= PyString_AS_STRING(self
), *s_new
;
1186 int i
, n
= PyString_GET_SIZE(self
);
1189 if (!PyArg_ParseTuple(args
, ":capitalize"))
1191 new = PyString_FromStringAndSize(NULL
, n
);
1194 s_new
= PyString_AsString(new);
1196 int c
= Py_CHARMASK(*s
++);
1198 *s_new
= toupper(c
);
1203 for (i
= 1; i
< n
; i
++) {
1204 int c
= Py_CHARMASK(*s
++);
1206 *s_new
= tolower(c
);
1215 static char count__doc__
[] =
1216 "S.count(sub[, start[, end]]) -> int\n\
1218 Return the number of occurrences of substring sub in string\n\
1219 S[start:end]. Optional arguments start and end are\n\
1220 interpreted as in slice notation.";
1223 string_count(PyStringObject
*self
, PyObject
*args
)
1225 const char *s
= PyString_AS_STRING(self
), *sub
;
1226 int len
= PyString_GET_SIZE(self
), n
;
1227 int i
= 0, last
= INT_MAX
;
1231 if (!PyArg_ParseTuple(args
, "O|O&O&:count", &subobj
,
1232 _PyEval_SliceIndex
, &i
, _PyEval_SliceIndex
, &last
))
1235 if (PyString_Check(subobj
)) {
1236 sub
= PyString_AS_STRING(subobj
);
1237 n
= PyString_GET_SIZE(subobj
);
1239 else if (PyUnicode_Check(subobj
)) {
1241 count
= PyUnicode_Count((PyObject
*)self
, subobj
, i
, last
);
1245 return PyInt_FromLong((long) count
);
1247 else if (PyObject_AsCharBuffer(subobj
, &sub
, &n
))
1262 return PyInt_FromLong((long) (m
-i
));
1266 if (!memcmp(s
+i
, sub
, n
)) {
1273 return PyInt_FromLong((long) r
);
1277 static char swapcase__doc__
[] =
1278 "S.swapcase() -> string\n\
1280 Return a copy of the string S with uppercase characters\n\
1281 converted to lowercase and vice versa.";
1284 string_swapcase(PyStringObject
*self
, PyObject
*args
)
1286 char *s
= PyString_AS_STRING(self
), *s_new
;
1287 int i
, n
= PyString_GET_SIZE(self
);
1290 if (!PyArg_ParseTuple(args
, ":swapcase"))
1292 new = PyString_FromStringAndSize(NULL
, n
);
1295 s_new
= PyString_AsString(new);
1296 for (i
= 0; i
< n
; i
++) {
1297 int c
= Py_CHARMASK(*s
++);
1299 *s_new
= toupper(c
);
1301 else if (isupper(c
)) {
1302 *s_new
= tolower(c
);
1312 static char translate__doc__
[] =
1313 "S.translate(table [,deletechars]) -> string\n\
1315 Return a copy of the string S, where all characters occurring\n\
1316 in the optional argument deletechars are removed, and the\n\
1317 remaining characters have been mapped through the given\n\
1318 translation table, which must be a string of length 256.";
1321 string_translate(PyStringObject
*self
, PyObject
*args
)
1323 register char *input
, *output
;
1324 register const char *table
;
1325 register int i
, c
, changed
= 0;
1326 PyObject
*input_obj
= (PyObject
*)self
;
1327 const char *table1
, *output_start
, *del_table
=NULL
;
1328 int inlen
, tablen
, dellen
= 0;
1330 int trans_table
[256];
1331 PyObject
*tableobj
, *delobj
= NULL
;
1333 if (!PyArg_ParseTuple(args
, "O|O:translate",
1334 &tableobj
, &delobj
))
1337 if (PyString_Check(tableobj
)) {
1338 table1
= PyString_AS_STRING(tableobj
);
1339 tablen
= PyString_GET_SIZE(tableobj
);
1341 else if (PyUnicode_Check(tableobj
)) {
1342 /* Unicode .translate() does not support the deletechars
1343 parameter; instead a mapping to None will cause characters
1345 if (delobj
!= NULL
) {
1346 PyErr_SetString(PyExc_TypeError
,
1347 "deletions are implemented differently for unicode");
1350 return PyUnicode_Translate((PyObject
*)self
, tableobj
, NULL
);
1352 else if (PyObject_AsCharBuffer(tableobj
, &table1
, &tablen
))
1355 if (delobj
!= NULL
) {
1356 if (PyString_Check(delobj
)) {
1357 del_table
= PyString_AS_STRING(delobj
);
1358 dellen
= PyString_GET_SIZE(delobj
);
1360 else if (PyUnicode_Check(delobj
)) {
1361 PyErr_SetString(PyExc_TypeError
,
1362 "deletions are implemented differently for unicode");
1365 else if (PyObject_AsCharBuffer(delobj
, &del_table
, &dellen
))
1368 if (tablen
!= 256) {
1369 PyErr_SetString(PyExc_ValueError
,
1370 "translation table must be 256 characters long");
1380 inlen
= PyString_Size(input_obj
);
1381 result
= PyString_FromStringAndSize((char *)NULL
, inlen
);
1384 output_start
= output
= PyString_AsString(result
);
1385 input
= PyString_AsString(input_obj
);
1388 /* If no deletions are required, use faster code */
1389 for (i
= inlen
; --i
>= 0; ) {
1390 c
= Py_CHARMASK(*input
++);
1391 if (Py_CHARMASK((*output
++ = table
[c
])) != c
)
1397 Py_INCREF(input_obj
);
1401 for (i
= 0; i
< 256; i
++)
1402 trans_table
[i
] = Py_CHARMASK(table
[i
]);
1404 for (i
= 0; i
< dellen
; i
++)
1405 trans_table
[(int) Py_CHARMASK(del_table
[i
])] = -1;
1407 for (i
= inlen
; --i
>= 0; ) {
1408 c
= Py_CHARMASK(*input
++);
1409 if (trans_table
[c
] != -1)
1410 if (Py_CHARMASK(*output
++ = (char)trans_table
[c
]) == c
)
1416 Py_INCREF(input_obj
);
1419 /* Fix the size of the resulting string */
1420 if (inlen
> 0 &&_PyString_Resize(&result
, output
-output_start
))
1426 /* What follows is used for implementing replace(). Perry Stoll. */
1431 strstr replacement for arbitrary blocks of memory.
1433 Locates the first occurrence in the memory pointed to by MEM of the
1434 contents of memory pointed to by PAT. Returns the index into MEM if
1435 found, or -1 if not found. If len of PAT is greater than length of
1436 MEM, the function returns -1.
1439 mymemfind(const char *mem
, int len
, const char *pat
, int pat_len
)
1443 /* pattern can not occur in the last pat_len-1 chars */
1446 for (ii
= 0; ii
<= len
; ii
++) {
1447 if (mem
[ii
] == pat
[0] && memcmp(&mem
[ii
], pat
, pat_len
) == 0) {
1457 Return the number of distinct times PAT is found in MEM.
1458 meaning mem=1111 and pat==11 returns 2.
1459 mem=11111 and pat==11 also return 2.
1462 mymemcnt(const char *mem
, int len
, const char *pat
, int pat_len
)
1464 register int offset
= 0;
1468 offset
= mymemfind(mem
, len
, pat
, pat_len
);
1471 mem
+= offset
+ pat_len
;
1472 len
-= offset
+ pat_len
;
1481 Return a string in which all occurrences of PAT in memory STR are
1484 If length of PAT is less than length of STR or there are no occurrences
1485 of PAT in STR, then the original string is returned. Otherwise, a new
1486 string is allocated here and returned.
1488 on return, out_len is:
1489 the length of output string, or
1490 -1 if the input string is returned, or
1491 unchanged if an error occurs (no memory).
1494 the new string allocated locally, or
1495 NULL if an error occurred.
1498 mymemreplace(const char *str
, int len
, /* input string */
1499 const char *pat
, int pat_len
, /* pattern string to find */
1500 const char *sub
, int sub_len
, /* substitution string */
1501 int count
, /* number of replacements */
1506 int nfound
, offset
, new_len
;
1508 if (len
== 0 || pat_len
> len
)
1511 /* find length of output string */
1512 nfound
= mymemcnt(str
, len
, pat
, pat_len
);
1515 else if (nfound
> count
)
1519 new_len
= len
+ nfound
*(sub_len
- pat_len
);
1521 new_s
= (char *)PyMem_MALLOC(new_len
);
1522 if (new_s
== NULL
) return NULL
;
1528 /* find index of next instance of pattern */
1529 offset
= mymemfind(str
, len
, pat
, pat_len
);
1530 /* if not found, break out of loop */
1531 if (offset
== -1) break;
1533 /* copy non matching part of input string */
1534 memcpy(new_s
, str
, offset
); /* copy part of str before pat */
1535 str
+= offset
+ pat_len
; /* move str past pattern */
1536 len
-= offset
+ pat_len
; /* reduce length of str remaining */
1538 /* copy substitute into the output string */
1539 new_s
+= offset
; /* move new_s to dest for sub string */
1540 memcpy(new_s
, sub
, sub_len
); /* copy substring into new_s */
1541 new_s
+= sub_len
; /* offset new_s past sub string */
1543 /* break when we've done count replacements */
1544 if (--count
== 0) break;
1546 /* copy any remaining values into output string */
1548 memcpy(new_s
, str
, len
);
1553 return (char*)str
; /* have to cast away constness here */
1557 static char replace__doc__
[] =
1558 "S.replace (old, new[, maxsplit]) -> string\n\
1560 Return a copy of string S with all occurrences of substring\n\
1561 old replaced by new. If the optional argument maxsplit is\n\
1562 given, only the first maxsplit occurrences are replaced.";
1565 string_replace(PyStringObject
*self
, PyObject
*args
)
1567 const char *str
= PyString_AS_STRING(self
), *sub
, *repl
;
1569 int len
= PyString_GET_SIZE(self
), sub_len
, repl_len
, out_len
;
1572 PyObject
*subobj
, *replobj
;
1574 if (!PyArg_ParseTuple(args
, "OO|i:replace",
1575 &subobj
, &replobj
, &count
))
1578 if (PyString_Check(subobj
)) {
1579 sub
= PyString_AS_STRING(subobj
);
1580 sub_len
= PyString_GET_SIZE(subobj
);
1582 else if (PyUnicode_Check(subobj
))
1583 return PyUnicode_Replace((PyObject
*)self
,
1584 subobj
, replobj
, count
);
1585 else if (PyObject_AsCharBuffer(subobj
, &sub
, &sub_len
))
1588 if (PyString_Check(replobj
)) {
1589 repl
= PyString_AS_STRING(replobj
);
1590 repl_len
= PyString_GET_SIZE(replobj
);
1592 else if (PyUnicode_Check(replobj
))
1593 return PyUnicode_Replace((PyObject
*)self
,
1594 subobj
, replobj
, count
);
1595 else if (PyObject_AsCharBuffer(replobj
, &repl
, &repl_len
))
1599 PyErr_SetString(PyExc_ValueError
, "empty pattern string");
1602 new_s
= mymemreplace(str
,len
,sub
,sub_len
,repl
,repl_len
,count
,&out_len
);
1603 if (new_s
== NULL
) {
1607 if (out_len
== -1) {
1608 /* we're returning another reference to self */
1609 new = (PyObject
*)self
;
1613 new = PyString_FromStringAndSize(new_s
, out_len
);
1620 static char startswith__doc__
[] =
1621 "S.startswith(prefix[, start[, end]]) -> int\n\
1623 Return 1 if S starts with the specified prefix, otherwise return 0. With\n\
1624 optional start, test S beginning at that position. With optional end, stop\n\
1625 comparing S at that position.";
1628 string_startswith(PyStringObject
*self
, PyObject
*args
)
1630 const char* str
= PyString_AS_STRING(self
);
1631 int len
= PyString_GET_SIZE(self
);
1638 if (!PyArg_ParseTuple(args
, "O|O&O&:startswith", &subobj
,
1639 _PyEval_SliceIndex
, &start
, _PyEval_SliceIndex
, &end
))
1641 if (PyString_Check(subobj
)) {
1642 prefix
= PyString_AS_STRING(subobj
);
1643 plen
= PyString_GET_SIZE(subobj
);
1645 else if (PyUnicode_Check(subobj
)) {
1647 rc
= PyUnicode_Tailmatch((PyObject
*)self
,
1648 subobj
, start
, end
, -1);
1652 return PyInt_FromLong((long) rc
);
1654 else if (PyObject_AsCharBuffer(subobj
, &prefix
, &plen
))
1657 /* adopt Java semantics for index out of range. it is legal for
1658 * offset to be == plen, but this only returns true if prefix is
1661 if (start
< 0 || start
+plen
> len
)
1662 return PyInt_FromLong(0);
1664 if (!memcmp(str
+start
, prefix
, plen
)) {
1665 /* did the match end after the specified end? */
1667 return PyInt_FromLong(1);
1668 else if (end
- start
< plen
)
1669 return PyInt_FromLong(0);
1671 return PyInt_FromLong(1);
1673 else return PyInt_FromLong(0);
1677 static char endswith__doc__
[] =
1678 "S.endswith(suffix[, start[, end]]) -> int\n\
1680 Return 1 if S ends with the specified suffix, otherwise return 0. With\n\
1681 optional start, test S beginning at that position. With optional end, stop\n\
1682 comparing S at that position.";
1685 string_endswith(PyStringObject
*self
, PyObject
*args
)
1687 const char* str
= PyString_AS_STRING(self
);
1688 int len
= PyString_GET_SIZE(self
);
1696 if (!PyArg_ParseTuple(args
, "O|O&O&:endswith", &subobj
,
1697 _PyEval_SliceIndex
, &start
, _PyEval_SliceIndex
, &end
))
1699 if (PyString_Check(subobj
)) {
1700 suffix
= PyString_AS_STRING(subobj
);
1701 slen
= PyString_GET_SIZE(subobj
);
1703 else if (PyUnicode_Check(subobj
)) {
1705 rc
= PyUnicode_Tailmatch((PyObject
*)self
,
1706 subobj
, start
, end
, +1);
1710 return PyInt_FromLong((long) rc
);
1712 else if (PyObject_AsCharBuffer(subobj
, &suffix
, &slen
))
1715 if (start
< 0 || start
> len
|| slen
> len
)
1716 return PyInt_FromLong(0);
1718 upper
= (end
>= 0 && end
<= len
) ? end
: len
;
1719 lower
= (upper
- slen
) > start
? (upper
- slen
) : start
;
1721 if (upper
-lower
>= slen
&& !memcmp(str
+lower
, suffix
, slen
))
1722 return PyInt_FromLong(1);
1723 else return PyInt_FromLong(0);
1727 static char encode__doc__
[] =
1728 "S.encode([encoding[,errors]]) -> string\n\
1730 Return an encoded string version of S. Default encoding is the current\n\
1731 default string encoding. errors may be given to set a different error\n\
1732 handling scheme. Default is 'strict' meaning that encoding errors raise\n\
1733 a ValueError. Other possible values are 'ignore' and 'replace'.";
1736 string_encode(PyStringObject
*self
, PyObject
*args
)
1738 char *encoding
= NULL
;
1739 char *errors
= NULL
;
1740 if (!PyArg_ParseTuple(args
, "|ss:encode", &encoding
, &errors
))
1742 return PyString_AsEncodedString((PyObject
*)self
, encoding
, errors
);
1746 static char expandtabs__doc__
[] =
1747 "S.expandtabs([tabsize]) -> string\n\
1749 Return a copy of S where all tab characters are expanded using spaces.\n\
1750 If tabsize is not given, a tab size of 8 characters is assumed.";
1753 string_expandtabs(PyStringObject
*self
, PyObject
*args
)
1761 if (!PyArg_ParseTuple(args
, "|i:expandtabs", &tabsize
))
1764 /* First pass: determine size of output string */
1766 e
= PyString_AS_STRING(self
) + PyString_GET_SIZE(self
);
1767 for (p
= PyString_AS_STRING(self
); p
< e
; p
++)
1770 j
+= tabsize
- (j
% tabsize
);
1774 if (*p
== '\n' || *p
== '\r') {
1780 /* Second pass: create output string and fill it */
1781 u
= PyString_FromStringAndSize(NULL
, i
+ j
);
1786 q
= PyString_AS_STRING(u
);
1788 for (p
= PyString_AS_STRING(self
); p
< e
; p
++)
1791 i
= tabsize
- (j
% tabsize
);
1800 if (*p
== '\n' || *p
== '\r')
1808 PyObject
*pad(PyStringObject
*self
,
1820 if (left
== 0 && right
== 0) {
1822 return (PyObject
*)self
;
1825 u
= PyString_FromStringAndSize(NULL
,
1826 left
+ PyString_GET_SIZE(self
) + right
);
1829 memset(PyString_AS_STRING(u
), fill
, left
);
1830 memcpy(PyString_AS_STRING(u
) + left
,
1831 PyString_AS_STRING(self
),
1832 PyString_GET_SIZE(self
));
1834 memset(PyString_AS_STRING(u
) + left
+ PyString_GET_SIZE(self
),
1841 static char ljust__doc__
[] =
1842 "S.ljust(width) -> string\n\
1844 Return S left justified in a string of length width. Padding is\n\
1845 done using spaces.";
1848 string_ljust(PyStringObject
*self
, PyObject
*args
)
1851 if (!PyArg_ParseTuple(args
, "i:ljust", &width
))
1854 if (PyString_GET_SIZE(self
) >= width
) {
1856 return (PyObject
*) self
;
1859 return pad(self
, 0, width
- PyString_GET_SIZE(self
), ' ');
1863 static char rjust__doc__
[] =
1864 "S.rjust(width) -> string\n\
1866 Return S right justified in a string of length width. Padding is\n\
1867 done using spaces.";
1870 string_rjust(PyStringObject
*self
, PyObject
*args
)
1873 if (!PyArg_ParseTuple(args
, "i:rjust", &width
))
1876 if (PyString_GET_SIZE(self
) >= width
) {
1878 return (PyObject
*) self
;
1881 return pad(self
, width
- PyString_GET_SIZE(self
), 0, ' ');
1885 static char center__doc__
[] =
1886 "S.center(width) -> string\n\
1888 Return S centered in a string of length width. Padding is done\n\
1892 string_center(PyStringObject
*self
, PyObject
*args
)
1897 if (!PyArg_ParseTuple(args
, "i:center", &width
))
1900 if (PyString_GET_SIZE(self
) >= width
) {
1902 return (PyObject
*) self
;
1905 marg
= width
- PyString_GET_SIZE(self
);
1906 left
= marg
/ 2 + (marg
& width
& 1);
1908 return pad(self
, left
, marg
- left
, ' ');
1912 static char zfill__doc__
[] =
1913 "S.zfill(width) -> string\n\
1915 Pad a numeric string x with zeros on the left, to fill a field\n\
1916 of the specified width. The string x is never truncated.";
1919 string_zfill(PyStringObject
*self
, PyObject
*args
)
1926 if (!PyArg_ParseTuple(args
, "i:zfill", &width
))
1929 if (PyString_GET_SIZE(self
) >= width
) {
1931 return (PyObject
*) self
;
1934 fill
= width
- PyString_GET_SIZE(self
);
1936 u
= pad(self
, fill
, 0, '0');
1940 str
= PyString_AS_STRING(u
);
1941 if (str
[fill
] == '+' || str
[fill
] == '-') {
1942 /* move sign to beginning of string */
1951 static char isspace__doc__
[] =
1952 "S.isspace() -> int\n\
1954 Return 1 if there are only whitespace characters in S,\n\
1958 string_isspace(PyStringObject
*self
, PyObject
*args
)
1960 register const unsigned char *p
1961 = (unsigned char *) PyString_AS_STRING(self
);
1962 register const unsigned char *e
;
1964 if (!PyArg_NoArgs(args
))
1967 /* Shortcut for single character strings */
1968 if (PyString_GET_SIZE(self
) == 1 &&
1970 return PyInt_FromLong(1);
1972 /* Special case for empty strings */
1973 if (PyString_GET_SIZE(self
) == 0)
1974 return PyInt_FromLong(0);
1976 e
= p
+ PyString_GET_SIZE(self
);
1977 for (; p
< e
; p
++) {
1979 return PyInt_FromLong(0);
1981 return PyInt_FromLong(1);
1985 static char isalpha__doc__
[] =
1986 "S.isalpha() -> int\n\
1988 Return 1 if all characters in S are alphabetic\n\
1989 and there is at least one character in S, 0 otherwise.";
1992 string_isalpha(PyUnicodeObject
*self
, PyObject
*args
)
1994 register const unsigned char *p
1995 = (unsigned char *) PyString_AS_STRING(self
);
1996 register const unsigned char *e
;
1998 if (!PyArg_NoArgs(args
))
2001 /* Shortcut for single character strings */
2002 if (PyString_GET_SIZE(self
) == 1 &&
2004 return PyInt_FromLong(1);
2006 /* Special case for empty strings */
2007 if (PyString_GET_SIZE(self
) == 0)
2008 return PyInt_FromLong(0);
2010 e
= p
+ PyString_GET_SIZE(self
);
2011 for (; p
< e
; p
++) {
2013 return PyInt_FromLong(0);
2015 return PyInt_FromLong(1);
2019 static char isalnum__doc__
[] =
2020 "S.isalnum() -> int\n\
2022 Return 1 if all characters in S are alphanumeric\n\
2023 and there is at least one character in S, 0 otherwise.";
2026 string_isalnum(PyUnicodeObject
*self
, PyObject
*args
)
2028 register const unsigned char *p
2029 = (unsigned char *) PyString_AS_STRING(self
);
2030 register const unsigned char *e
;
2032 if (!PyArg_NoArgs(args
))
2035 /* Shortcut for single character strings */
2036 if (PyString_GET_SIZE(self
) == 1 &&
2038 return PyInt_FromLong(1);
2040 /* Special case for empty strings */
2041 if (PyString_GET_SIZE(self
) == 0)
2042 return PyInt_FromLong(0);
2044 e
= p
+ PyString_GET_SIZE(self
);
2045 for (; p
< e
; p
++) {
2047 return PyInt_FromLong(0);
2049 return PyInt_FromLong(1);
2053 static char isdigit__doc__
[] =
2054 "S.isdigit() -> int\n\
2056 Return 1 if there are only digit characters in S,\n\
2060 string_isdigit(PyStringObject
*self
, PyObject
*args
)
2062 register const unsigned char *p
2063 = (unsigned char *) PyString_AS_STRING(self
);
2064 register const unsigned char *e
;
2066 if (!PyArg_NoArgs(args
))
2069 /* Shortcut for single character strings */
2070 if (PyString_GET_SIZE(self
) == 1 &&
2072 return PyInt_FromLong(1);
2074 /* Special case for empty strings */
2075 if (PyString_GET_SIZE(self
) == 0)
2076 return PyInt_FromLong(0);
2078 e
= p
+ PyString_GET_SIZE(self
);
2079 for (; p
< e
; p
++) {
2081 return PyInt_FromLong(0);
2083 return PyInt_FromLong(1);
2087 static char islower__doc__
[] =
2088 "S.islower() -> int\n\
2090 Return 1 if all cased characters in S are lowercase and there is\n\
2091 at least one cased character in S, 0 otherwise.";
2094 string_islower(PyStringObject
*self
, PyObject
*args
)
2096 register const unsigned char *p
2097 = (unsigned char *) PyString_AS_STRING(self
);
2098 register const unsigned char *e
;
2101 if (!PyArg_NoArgs(args
))
2104 /* Shortcut for single character strings */
2105 if (PyString_GET_SIZE(self
) == 1)
2106 return PyInt_FromLong(islower(*p
) != 0);
2108 /* Special case for empty strings */
2109 if (PyString_GET_SIZE(self
) == 0)
2110 return PyInt_FromLong(0);
2112 e
= p
+ PyString_GET_SIZE(self
);
2114 for (; p
< e
; p
++) {
2116 return PyInt_FromLong(0);
2117 else if (!cased
&& islower(*p
))
2120 return PyInt_FromLong(cased
);
2124 static char isupper__doc__
[] =
2125 "S.isupper() -> int\n\
2127 Return 1 if all cased characters in S are uppercase and there is\n\
2128 at least one cased character in S, 0 otherwise.";
2131 string_isupper(PyStringObject
*self
, PyObject
*args
)
2133 register const unsigned char *p
2134 = (unsigned char *) PyString_AS_STRING(self
);
2135 register const unsigned char *e
;
2138 if (!PyArg_NoArgs(args
))
2141 /* Shortcut for single character strings */
2142 if (PyString_GET_SIZE(self
) == 1)
2143 return PyInt_FromLong(isupper(*p
) != 0);
2145 /* Special case for empty strings */
2146 if (PyString_GET_SIZE(self
) == 0)
2147 return PyInt_FromLong(0);
2149 e
= p
+ PyString_GET_SIZE(self
);
2151 for (; p
< e
; p
++) {
2153 return PyInt_FromLong(0);
2154 else if (!cased
&& isupper(*p
))
2157 return PyInt_FromLong(cased
);
2161 static char istitle__doc__
[] =
2162 "S.istitle() -> int\n\
2164 Return 1 if S is a titlecased string, i.e. uppercase characters\n\
2165 may only follow uncased characters and lowercase characters only cased\n\
2166 ones. Return 0 otherwise.";
2169 string_istitle(PyStringObject
*self
, PyObject
*args
)
2171 register const unsigned char *p
2172 = (unsigned char *) PyString_AS_STRING(self
);
2173 register const unsigned char *e
;
2174 int cased
, previous_is_cased
;
2176 if (!PyArg_NoArgs(args
))
2179 /* Shortcut for single character strings */
2180 if (PyString_GET_SIZE(self
) == 1)
2181 return PyInt_FromLong(isupper(*p
) != 0);
2183 /* Special case for empty strings */
2184 if (PyString_GET_SIZE(self
) == 0)
2185 return PyInt_FromLong(0);
2187 e
= p
+ PyString_GET_SIZE(self
);
2189 previous_is_cased
= 0;
2190 for (; p
< e
; p
++) {
2191 register const unsigned char ch
= *p
;
2194 if (previous_is_cased
)
2195 return PyInt_FromLong(0);
2196 previous_is_cased
= 1;
2199 else if (islower(ch
)) {
2200 if (!previous_is_cased
)
2201 return PyInt_FromLong(0);
2202 previous_is_cased
= 1;
2206 previous_is_cased
= 0;
2208 return PyInt_FromLong(cased
);
2212 static char splitlines__doc__
[] =
2213 "S.splitlines([keepends]]) -> list of strings\n\
2215 Return a list of the lines in S, breaking at line boundaries.\n\
2216 Line breaks are not included in the resulting list unless keepends\n\
2217 is given and true.";
2219 #define SPLIT_APPEND(data, left, right) \
2220 str = PyString_FromStringAndSize(data + left, right - left); \
2223 if (PyList_Append(list, str)) { \
2231 string_splitlines(PyStringObject
*self
, PyObject
*args
)
2241 if (!PyArg_ParseTuple(args
, "|i:splitlines", &keepends
))
2244 data
= PyString_AS_STRING(self
);
2245 len
= PyString_GET_SIZE(self
);
2247 list
= PyList_New(0);
2251 for (i
= j
= 0; i
< len
; ) {
2254 /* Find a line and append it */
2255 while (i
< len
&& data
[i
] != '\n' && data
[i
] != '\r')
2258 /* Skip the line break reading CRLF as one line break */
2261 if (data
[i
] == '\r' && i
+ 1 < len
&&
2269 SPLIT_APPEND(data
, j
, eol
);
2273 SPLIT_APPEND(data
, j
, len
);
2287 string_methods
[] = {
2288 /* Counterparts of the obsolete stropmodule functions; except
2289 string.maketrans(). */
2290 {"join", (PyCFunction
)string_join
, 1, join__doc__
},
2291 {"split", (PyCFunction
)string_split
, 1, split__doc__
},
2292 {"lower", (PyCFunction
)string_lower
, 1, lower__doc__
},
2293 {"upper", (PyCFunction
)string_upper
, 1, upper__doc__
},
2294 {"islower", (PyCFunction
)string_islower
, 0, islower__doc__
},
2295 {"isupper", (PyCFunction
)string_isupper
, 0, isupper__doc__
},
2296 {"isspace", (PyCFunction
)string_isspace
, 0, isspace__doc__
},
2297 {"isdigit", (PyCFunction
)string_isdigit
, 0, isdigit__doc__
},
2298 {"istitle", (PyCFunction
)string_istitle
, 0, istitle__doc__
},
2299 {"isalpha", (PyCFunction
)string_isalpha
, 0, isalpha__doc__
},
2300 {"isalnum", (PyCFunction
)string_isalnum
, 0, isalnum__doc__
},
2301 {"capitalize", (PyCFunction
)string_capitalize
, 1, capitalize__doc__
},
2302 {"count", (PyCFunction
)string_count
, 1, count__doc__
},
2303 {"endswith", (PyCFunction
)string_endswith
, 1, endswith__doc__
},
2304 {"find", (PyCFunction
)string_find
, 1, find__doc__
},
2305 {"index", (PyCFunction
)string_index
, 1, index__doc__
},
2306 {"lstrip", (PyCFunction
)string_lstrip
, 1, lstrip__doc__
},
2307 {"replace", (PyCFunction
)string_replace
, 1, replace__doc__
},
2308 {"rfind", (PyCFunction
)string_rfind
, 1, rfind__doc__
},
2309 {"rindex", (PyCFunction
)string_rindex
, 1, rindex__doc__
},
2310 {"rstrip", (PyCFunction
)string_rstrip
, 1, rstrip__doc__
},
2311 {"startswith", (PyCFunction
)string_startswith
, 1, startswith__doc__
},
2312 {"strip", (PyCFunction
)string_strip
, 1, strip__doc__
},
2313 {"swapcase", (PyCFunction
)string_swapcase
, 1, swapcase__doc__
},
2314 {"translate", (PyCFunction
)string_translate
, 1, translate__doc__
},
2315 {"title", (PyCFunction
)string_title
, 1, title__doc__
},
2316 {"ljust", (PyCFunction
)string_ljust
, 1, ljust__doc__
},
2317 {"rjust", (PyCFunction
)string_rjust
, 1, rjust__doc__
},
2318 {"center", (PyCFunction
)string_center
, 1, center__doc__
},
2319 {"encode", (PyCFunction
)string_encode
, 1, encode__doc__
},
2320 {"expandtabs", (PyCFunction
)string_expandtabs
, 1, expandtabs__doc__
},
2321 {"splitlines", (PyCFunction
)string_splitlines
, 1, splitlines__doc__
},
2323 {"zfill", (PyCFunction
)string_zfill
, 1, zfill__doc__
},
2325 {NULL
, NULL
} /* sentinel */
2329 string_getattr(PyStringObject
*s
, char *name
)
2331 return Py_FindMethod(string_methods
, (PyObject
*)s
, name
);
2335 PyTypeObject PyString_Type
= {
2336 PyObject_HEAD_INIT(&PyType_Type
)
2339 sizeof(PyStringObject
),
2341 (destructor
)string_dealloc
, /*tp_dealloc*/
2342 (printfunc
)string_print
, /*tp_print*/
2343 (getattrfunc
)string_getattr
, /*tp_getattr*/
2345 (cmpfunc
)string_compare
, /*tp_compare*/
2346 (reprfunc
)string_repr
, /*tp_repr*/
2348 &string_as_sequence
, /*tp_as_sequence*/
2349 0, /*tp_as_mapping*/
2350 (hashfunc
)string_hash
, /*tp_hash*/
2355 &string_as_buffer
, /*tp_as_buffer*/
2356 Py_TPFLAGS_DEFAULT
, /*tp_flags*/
2361 PyString_Concat(register PyObject
**pv
, register PyObject
*w
)
2363 register PyObject
*v
;
2366 if (w
== NULL
|| !PyString_Check(*pv
)) {
2371 v
= string_concat((PyStringObject
*) *pv
, w
);
2377 PyString_ConcatAndDel(register PyObject
**pv
, register PyObject
*w
)
2379 PyString_Concat(pv
, w
);
2384 /* The following function breaks the notion that strings are immutable:
2385 it changes the size of a string. We get away with this only if there
2386 is only one module referencing the object. You can also think of it
2387 as creating a new string object and destroying the old one, only
2388 more efficiently. In any case, don't use this if the string may
2389 already be known to some other part of the code... */
2392 _PyString_Resize(PyObject
**pv
, int newsize
)
2394 register PyObject
*v
;
2395 register PyStringObject
*sv
;
2397 if (!PyString_Check(v
) || v
->ob_refcnt
!= 1) {
2400 PyErr_BadInternalCall();
2403 /* XXX UNREF/NEWREF interface should be more symmetrical */
2407 _Py_ForgetReference(v
);
2409 PyObject_REALLOC((char *)v
,
2410 sizeof(PyStringObject
) + newsize
* sizeof(char));
2416 _Py_NewReference(*pv
);
2417 sv
= (PyStringObject
*) *pv
;
2418 sv
->ob_size
= newsize
;
2419 sv
->ob_sval
[newsize
] = '\0';
2423 /* Helpers for formatstring */
2426 getnextarg(PyObject
*args
, int arglen
, int *p_argidx
)
2428 int argidx
= *p_argidx
;
2429 if (argidx
< arglen
) {
2434 return PyTuple_GetItem(args
, argidx
);
2436 PyErr_SetString(PyExc_TypeError
,
2437 "not enough arguments for format string");
2448 #define F_LJUST (1<<0)
2449 #define F_SIGN (1<<1)
2450 #define F_BLANK (1<<2)
2451 #define F_ALT (1<<3)
2452 #define F_ZERO (1<<4)
2455 formatfloat(char *buf
, size_t buflen
, int flags
,
2456 int prec
, int type
, PyObject
*v
)
2458 /* fmt = '%#.' + `prec` + `type`
2459 worst case length = 3 + 10 (len of INT_MAX) + 1 = 14 (use 20)*/
2462 if (!PyArg_Parse(v
, "d;float argument required", &x
))
2466 if (type
== 'f' && fabs(x
)/1e25
>= 1e25
)
2468 sprintf(fmt
, "%%%s.%d%c", (flags
&F_ALT
) ? "#" : "", prec
, type
);
2469 /* worst case length calc to ensure no buffer overrun:
2471 buf = '-' + [0-9]*prec + '.' + 'e+' + (longest exp
2472 for any double rep.)
2473 len = 1 + prec + 1 + 2 + 5 = 9 + prec
2474 If prec=0 the effective precision is 1 (the leading digit is
2475 always given), therefore increase by one to 10+prec. */
2476 if (buflen
<= (size_t)10 + (size_t)prec
) {
2477 PyErr_SetString(PyExc_OverflowError
,
2478 "formatted float is too long (precision too long?)");
2481 sprintf(buf
, fmt
, x
);
2485 /* _PyString_FormatLong emulates the format codes d, u, o, x and X, and
2486 * the F_ALT flag, for Python's long (unbounded) ints. It's not used for
2487 * Python's regular ints.
2488 * Return value: a new PyString*, or NULL if error.
2489 * . *pbuf is set to point into it,
2490 * *plen set to the # of chars following that.
2491 * Caller must decref it when done using pbuf.
2492 * The string starting at *pbuf is of the form
2493 * "-"? ("0x" | "0X")? digit+
2494 * "0x"/"0X" are present only for x and X conversions, with F_ALT
2495 * set in flags. The case of hex digits will be correct,
2496 * There will be at least prec digits, zero-filled on the left if
2497 * necessary to get that many.
2498 * val object to be converted
2499 * flags bitmask of format flags; only F_ALT is looked at
2500 * prec minimum number of digits; 0-fill on left if needed
2501 * type a character in [duoxX]; u acts the same as d
2503 * CAUTION: o, x and X conversions on regular ints can never
2504 * produce a '-' sign, but can for Python's unbounded ints.
2507 _PyString_FormatLong(PyObject
*val
, int flags
, int prec
, int type
,
2508 char **pbuf
, int *plen
)
2510 PyObject
*result
= NULL
;
2513 int sign
; /* 1 if '-', else 0 */
2514 int len
; /* number of characters */
2515 int numdigits
; /* len == numnondigits + numdigits */
2516 int numnondigits
= 0;
2521 result
= val
->ob_type
->tp_str(val
);
2524 result
= val
->ob_type
->tp_as_number
->nb_oct(val
);
2529 result
= val
->ob_type
->tp_as_number
->nb_hex(val
);
2532 assert(!"'type' not in [duoxX]");
2537 /* To modify the string in-place, there can only be one reference. */
2538 if (result
->ob_refcnt
!= 1) {
2539 PyErr_BadInternalCall();
2542 buf
= PyString_AsString(result
);
2543 len
= PyString_Size(result
);
2544 if (buf
[len
-1] == 'L') {
2548 sign
= buf
[0] == '-';
2549 numnondigits
+= sign
;
2550 numdigits
= len
- numnondigits
;
2551 assert(numdigits
> 0);
2553 /* Get rid of base marker unless F_ALT */
2554 if ((flags
& F_ALT
) == 0) {
2555 /* Need to skip 0x, 0X or 0. */
2559 assert(buf
[sign
] == '0');
2560 /* If 0 is only digit, leave it alone. */
2561 if (numdigits
> 1) {
2568 assert(buf
[sign
] == '0');
2569 assert(buf
[sign
+ 1] == 'x');
2580 assert(len
== numnondigits
+ numdigits
);
2581 assert(numdigits
> 0);
2584 /* Fill with leading zeroes to meet minimum width. */
2585 if (prec
> numdigits
) {
2586 PyObject
*r1
= PyString_FromStringAndSize(NULL
,
2587 numnondigits
+ prec
);
2593 b1
= PyString_AS_STRING(r1
);
2594 for (i
= 0; i
< numnondigits
; ++i
)
2596 for (i
= 0; i
< prec
- numdigits
; i
++)
2598 for (i
= 0; i
< numdigits
; i
++)
2603 buf
= PyString_AS_STRING(result
);
2604 len
= numnondigits
+ prec
;
2607 /* Fix up case for hex conversions. */
2610 /* Need to convert all upper case letters to lower case. */
2611 for (i
= 0; i
< len
; i
++)
2612 if (buf
[i
] >= 'A' && buf
[i
] <= 'F')
2616 /* Need to convert 0x to 0X (and -0x to -0X). */
2617 if (buf
[sign
+ 1] == 'x')
2618 buf
[sign
+ 1] = 'X';
2627 formatint(char *buf
, size_t buflen
, int flags
,
2628 int prec
, int type
, PyObject
*v
)
2630 /* fmt = '%#.' + `prec` + 'l' + `type`
2631 worst case length = 3 + 19 (worst len of INT_MAX on 64-bit machine)
2633 char fmt
[64]; /* plenty big enough! */
2635 if (!PyArg_Parse(v
, "l;int argument required", &x
))
2639 sprintf(fmt
, "%%%s.%dl%c", (flags
&F_ALT
) ? "#" : "", prec
, type
);
2640 /* buf = '+'/'-'/'0'/'0x' + '[0-9]'*max(prec, len(x in octal))
2641 worst case buf = '0x' + [0-9]*prec, where prec >= 11 */
2642 if (buflen
<= 13 || buflen
<= (size_t)2 + (size_t)prec
) {
2643 PyErr_SetString(PyExc_OverflowError
,
2644 "formatted integer is too long (precision too long?)");
2647 sprintf(buf
, fmt
, x
);
2652 formatchar(char *buf
, size_t buflen
, PyObject
*v
)
2654 /* presume that the buffer is at least 2 characters long */
2655 if (PyString_Check(v
)) {
2656 if (!PyArg_Parse(v
, "c;%c requires int or char", &buf
[0]))
2660 if (!PyArg_Parse(v
, "b;%c requires int or char", &buf
[0]))
2668 /* fmt%(v1,v2,...) is roughly equivalent to sprintf(fmt, v1, v2, ...)
2670 FORMATBUFLEN is the length of the buffer in which the floats, ints, &
2671 chars are formatted. XXX This is a magic number. Each formatting
2672 routine does bounds checking to ensure no overflow, but a better
2673 solution may be to malloc a buffer of appropriate size for each
2674 format. For now, the current solution is sufficient.
2676 #define FORMATBUFLEN (size_t)120
2679 PyString_Format(PyObject
*format
, PyObject
*args
)
2682 int fmtcnt
, rescnt
, reslen
, arglen
, argidx
;
2684 PyObject
*result
, *orig_args
, *v
, *w
;
2685 PyObject
*dict
= NULL
;
2686 if (format
== NULL
|| !PyString_Check(format
) || args
== NULL
) {
2687 PyErr_BadInternalCall();
2691 fmt
= PyString_AsString(format
);
2692 fmtcnt
= PyString_Size(format
);
2693 reslen
= rescnt
= fmtcnt
+ 100;
2694 result
= PyString_FromStringAndSize((char *)NULL
, reslen
);
2697 res
= PyString_AsString(result
);
2698 if (PyTuple_Check(args
)) {
2699 arglen
= PyTuple_Size(args
);
2706 if (args
->ob_type
->tp_as_mapping
)
2708 while (--fmtcnt
>= 0) {
2711 rescnt
= fmtcnt
+ 100;
2713 if (_PyString_Resize(&result
, reslen
) < 0)
2715 res
= PyString_AsString(result
)
2722 /* Got a format specifier */
2730 PyObject
*temp
= NULL
;
2734 char formatbuf
[FORMATBUFLEN
]; /* For format{float,int,char}() */
2735 char *fmt_start
= fmt
;
2745 PyErr_SetString(PyExc_TypeError
,
2746 "format requires a mapping");
2752 /* Skip over balanced parentheses */
2753 while (pcount
> 0 && --fmtcnt
>= 0) {
2756 else if (*fmt
== '(')
2760 keylen
= fmt
- keystart
- 1;
2761 if (fmtcnt
< 0 || pcount
> 0) {
2762 PyErr_SetString(PyExc_ValueError
,
2763 "incomplete format key");
2766 key
= PyString_FromStringAndSize(keystart
,
2774 args
= PyObject_GetItem(dict
, key
);
2783 while (--fmtcnt
>= 0) {
2784 switch (c
= *fmt
++) {
2785 case '-': flags
|= F_LJUST
; continue;
2786 case '+': flags
|= F_SIGN
; continue;
2787 case ' ': flags
|= F_BLANK
; continue;
2788 case '#': flags
|= F_ALT
; continue;
2789 case '0': flags
|= F_ZERO
; continue;
2794 v
= getnextarg(args
, arglen
, &argidx
);
2797 if (!PyInt_Check(v
)) {
2798 PyErr_SetString(PyExc_TypeError
,
2802 width
= PyInt_AsLong(v
);
2810 else if (c
>= 0 && isdigit(c
)) {
2812 while (--fmtcnt
>= 0) {
2813 c
= Py_CHARMASK(*fmt
++);
2816 if ((width
*10) / 10 != width
) {
2822 width
= width
*10 + (c
- '0');
2830 v
= getnextarg(args
, arglen
, &argidx
);
2833 if (!PyInt_Check(v
)) {
2839 prec
= PyInt_AsLong(v
);
2845 else if (c
>= 0 && isdigit(c
)) {
2847 while (--fmtcnt
>= 0) {
2848 c
= Py_CHARMASK(*fmt
++);
2851 if ((prec
*10) / 10 != prec
) {
2857 prec
= prec
*10 + (c
- '0');
2862 if (c
== 'h' || c
== 'l' || c
== 'L') {
2869 PyErr_SetString(PyExc_ValueError
,
2870 "incomplete format");
2874 v
= getnextarg(args
, arglen
, &argidx
);
2887 if (PyUnicode_Check(v
)) {
2892 temp
= PyObject_Str(v
);
2894 temp
= PyObject_Repr(v
);
2897 if (!PyString_Check(temp
)) {
2898 PyErr_SetString(PyExc_TypeError
,
2899 "%s argument has non-string str()");
2902 pbuf
= PyString_AsString(temp
);
2903 len
= PyString_Size(temp
);
2904 if (prec
>= 0 && len
> prec
)
2915 if (PyLong_Check(v
) && PyLong_AsLong(v
) == -1
2916 && PyErr_Occurred()) {
2917 /* Too big for a C long. */
2919 temp
= _PyString_FormatLong(v
, flags
,
2920 prec
, c
, &pbuf
, &len
);
2923 /* unbounded ints can always produce
2924 a sign character! */
2929 len
= formatint(pbuf
, sizeof(formatbuf
),
2933 /* only d conversion is signed */
2945 len
= formatfloat(pbuf
, sizeof(formatbuf
), flags
, prec
, c
, v
);
2954 len
= formatchar(pbuf
, sizeof(formatbuf
), v
);
2959 PyErr_Format(PyExc_ValueError
,
2960 "unsupported format character '%c' (0x%x)",
2965 if (*pbuf
== '-' || *pbuf
== '+') {
2969 else if (flags
& F_SIGN
)
2971 else if (flags
& F_BLANK
)
2978 if (rescnt
< width
+ (sign
!= 0)) {
2980 rescnt
= width
+ fmtcnt
+ 100;
2982 if (_PyString_Resize(&result
, reslen
) < 0)
2984 res
= PyString_AsString(result
)
2994 if ((flags
& F_ALT
) && (c
== 'x' || c
== 'X')) {
2995 assert(pbuf
[0] == '0');
2996 assert(pbuf
[1] == c
);
3007 if (width
> len
&& !(flags
& F_LJUST
)) {
3011 } while (--width
> len
);
3016 if ((flags
& F_ALT
) &&
3017 (c
== 'x' || c
== 'X')) {
3018 assert(pbuf
[0] == '0');
3019 assert(pbuf
[1] == c
);
3024 memcpy(res
, pbuf
, len
);
3027 while (--width
>= len
) {
3031 if (dict
&& (argidx
< arglen
) && c
!= '%') {
3032 PyErr_SetString(PyExc_TypeError
,
3033 "not all arguments converted");
3039 if (argidx
< arglen
&& !dict
) {
3040 PyErr_SetString(PyExc_TypeError
,
3041 "not all arguments converted");
3047 _PyString_Resize(&result
, reslen
- rescnt
);
3055 /* Fiddle args right (remove the first argidx-1 arguments) */
3057 if (PyTuple_Check(orig_args
) && argidx
> 0) {
3059 int n
= PyTuple_GET_SIZE(orig_args
) - argidx
;
3064 PyObject
*w
= PyTuple_GET_ITEM(orig_args
, n
+ argidx
);
3066 PyTuple_SET_ITEM(v
, n
, w
);
3070 Py_INCREF(orig_args
);
3074 /* Take what we have of the result and let the Unicode formatting
3075 function format the rest of the input. */
3076 rescnt
= res
- PyString_AS_STRING(result
);
3077 if (_PyString_Resize(&result
, rescnt
))
3079 fmtcnt
= PyString_GET_SIZE(format
) - \
3080 (fmt
- PyString_AS_STRING(format
));
3081 format
= PyUnicode_Decode(fmt
, fmtcnt
, NULL
, NULL
);
3084 v
= PyUnicode_Format(format
, args
);
3088 /* Paste what we have (result) to what the Unicode formatting
3089 function returned (v) and return the result (or error) */
3090 w
= PyUnicode_Concat(result
, v
);
3105 #ifdef INTERN_STRINGS
3107 /* This dictionary will leak at PyString_Fini() time. That's acceptable
3108 * because PyString_Fini() specifically frees interned strings that are
3109 * only referenced by this dictionary. The CVS log entry for revision 2.45
3112 * Change the Fini function to only remove otherwise unreferenced
3113 * strings from the interned table. There are references in
3114 * hard-to-find static variables all over the interpreter, and it's not
3115 * worth trying to get rid of all those; but "uninterning" isn't fair
3116 * either and may cause subtle failures later -- so we have to keep them
3117 * in the interned table.
3119 static PyObject
*interned
;
3122 PyString_InternInPlace(PyObject
**p
)
3124 register PyStringObject
*s
= (PyStringObject
*)(*p
);
3126 if (s
== NULL
|| !PyString_Check(s
))
3127 Py_FatalError("PyString_InternInPlace: strings only please!");
3128 if ((t
= s
->ob_sinterned
) != NULL
) {
3129 if (t
== (PyObject
*)s
)
3136 if (interned
== NULL
) {
3137 interned
= PyDict_New();
3138 if (interned
== NULL
)
3141 if ((t
= PyDict_GetItem(interned
, (PyObject
*)s
)) != NULL
) {
3143 *p
= s
->ob_sinterned
= t
;
3148 if (PyDict_SetItem(interned
, t
, t
) == 0) {
3149 s
->ob_sinterned
= t
;
3157 PyString_InternFromString(const char *cp
)
3159 PyObject
*s
= PyString_FromString(cp
);
3162 PyString_InternInPlace(&s
);
3172 for (i
= 0; i
< UCHAR_MAX
+ 1; i
++) {
3173 Py_XDECREF(characters
[i
]);
3174 characters
[i
] = NULL
;
3176 #ifndef DONT_SHARE_SHORT_STRINGS
3177 Py_XDECREF(nullstring
);
3180 #ifdef INTERN_STRINGS
3183 PyObject
*key
, *value
;
3187 while (PyDict_Next(interned
, &pos
, &key
, &value
)) {
3188 if (key
->ob_refcnt
== 2 && key
== value
) {
3189 PyDict_DelItem(interned
, key
);