1 /***********************************************************
2 Copyright (c) 2000, BeOpen.com.
3 Copyright (c) 1995-2000, Corporation for National Research Initiatives.
4 Copyright (c) 1990-1995, Stichting Mathematisch Centrum.
7 See the file "Misc/COPYRIGHT" for information on usage and
8 redistribution of this file, and for a DISCLAIMER OF ALL WARRANTIES.
9 ******************************************************************/
11 /* String object implementation */
18 int null_strings
, one_strings
;
29 static PyStringObject
*characters
[UCHAR_MAX
+ 1];
30 #ifndef DONT_SHARE_SHORT_STRINGS
31 static PyStringObject
*nullstring
;
35 Newsizedstringobject() and newstringobject() try in certain cases
36 to share string objects. When the size of the string is zero,
37 these routines always return a pointer to the same string object;
38 when the size is one, they return a pointer to an already existing
39 object if the contents of the string is known. For
40 newstringobject() this is always the case, for
41 newsizedstringobject() this is the case when the first argument in
43 A common practice to allocate a string and then fill it in or
44 change it must be done carefully. It is only allowed to change the
45 contents of the string if the obect was gotten from
46 newsizedstringobject() with a NULL first argument, because in the
47 future these routines may try to do even more sharing of objects.
50 PyString_FromStringAndSize(const char *str
, int size
)
52 register PyStringObject
*op
;
53 #ifndef DONT_SHARE_SHORT_STRINGS
54 if (size
== 0 && (op
= nullstring
) != NULL
) {
59 return (PyObject
*)op
;
61 if (size
== 1 && str
!= NULL
&&
62 (op
= characters
[*str
& UCHAR_MAX
]) != NULL
)
68 return (PyObject
*)op
;
70 #endif /* DONT_SHARE_SHORT_STRINGS */
72 /* PyObject_NewVar is inlined */
73 op
= (PyStringObject
*)
74 PyObject_MALLOC(sizeof(PyStringObject
) + size
* sizeof(char));
76 return PyErr_NoMemory();
77 PyObject_INIT_VAR(op
, &PyString_Type
, size
);
82 op
->ob_sinterned
= NULL
;
85 memcpy(op
->ob_sval
, str
, size
);
86 op
->ob_sval
[size
] = '\0';
87 #ifndef DONT_SHARE_SHORT_STRINGS
91 } else if (size
== 1 && str
!= NULL
) {
92 characters
[*str
& UCHAR_MAX
] = op
;
96 return (PyObject
*) op
;
100 PyString_FromString(const char *str
)
102 register size_t size
= strlen(str
);
103 register PyStringObject
*op
;
104 if (size
> INT_MAX
) {
105 PyErr_SetString(PyExc_OverflowError
,
106 "string is too long for a Python string");
109 #ifndef DONT_SHARE_SHORT_STRINGS
110 if (size
== 0 && (op
= nullstring
) != NULL
) {
115 return (PyObject
*)op
;
117 if (size
== 1 && (op
= characters
[*str
& UCHAR_MAX
]) != NULL
) {
122 return (PyObject
*)op
;
124 #endif /* DONT_SHARE_SHORT_STRINGS */
126 /* PyObject_NewVar is inlined */
127 op
= (PyStringObject
*)
128 PyObject_MALLOC(sizeof(PyStringObject
) + size
* sizeof(char));
130 return PyErr_NoMemory();
131 PyObject_INIT_VAR(op
, &PyString_Type
, size
);
135 #ifdef INTERN_STRINGS
136 op
->ob_sinterned
= NULL
;
138 strcpy(op
->ob_sval
, str
);
139 #ifndef DONT_SHARE_SHORT_STRINGS
143 } else if (size
== 1) {
144 characters
[*str
& UCHAR_MAX
] = op
;
148 return (PyObject
*) op
;
151 PyObject
*PyString_Decode(const char *s
,
153 const char *encoding
,
156 PyObject
*buffer
= NULL
, *str
;
158 if (encoding
== NULL
)
159 encoding
= PyUnicode_GetDefaultEncoding();
161 /* Decode via the codec registry */
162 buffer
= PyBuffer_FromMemory((void *)s
, size
);
165 str
= PyCodec_Decode(buffer
, encoding
, errors
);
168 /* Convert Unicode to a string using the default encoding */
169 if (PyUnicode_Check(str
)) {
170 PyObject
*temp
= str
;
171 str
= PyUnicode_AsEncodedString(str
, NULL
, NULL
);
176 if (!PyString_Check(str
)) {
177 PyErr_Format(PyExc_TypeError
,
178 "decoder did not return a string object (type=%.400s)",
179 str
->ob_type
->tp_name
);
191 PyObject
*PyString_Encode(const char *s
,
193 const char *encoding
,
198 str
= PyString_FromStringAndSize(s
, size
);
201 v
= PyString_AsEncodedString(str
, encoding
, errors
);
206 PyObject
*PyString_AsEncodedString(PyObject
*str
,
207 const char *encoding
,
212 if (!PyString_Check(str
)) {
217 if (encoding
== NULL
)
218 encoding
= PyUnicode_GetDefaultEncoding();
220 /* Encode via the codec registry */
221 v
= PyCodec_Encode(str
, encoding
, errors
);
224 /* Convert Unicode to a string using the default encoding */
225 if (PyUnicode_Check(v
)) {
227 v
= PyUnicode_AsEncodedString(v
, NULL
, NULL
);
232 if (!PyString_Check(v
)) {
233 PyErr_Format(PyExc_TypeError
,
234 "encoder did not return a string object (type=%.400s)",
235 v
->ob_type
->tp_name
);
246 string_dealloc(PyObject
*op
)
252 PyString_Size(register PyObject
*op
)
254 if (!PyString_Check(op
)) {
255 PyErr_BadInternalCall();
258 return ((PyStringObject
*)op
) -> ob_size
;
262 PyString_AsString(register PyObject
*op
)
264 if (!PyString_Check(op
)) {
265 PyErr_BadInternalCall();
268 return ((PyStringObject
*)op
) -> ob_sval
;
274 string_print(PyStringObject
*op
, FILE *fp
, int flags
)
279 /* XXX Ought to check for interrupts when writing long strings */
280 if (flags
& Py_PRINT_RAW
) {
281 fwrite(op
->ob_sval
, 1, (int) op
->ob_size
, fp
);
285 /* figure out which quote to use; single is preferred */
287 if (strchr(op
->ob_sval
, '\'') && !strchr(op
->ob_sval
, '"'))
291 for (i
= 0; i
< op
->ob_size
; i
++) {
293 if (c
== quote
|| c
== '\\')
294 fprintf(fp
, "\\%c", c
);
295 else if (c
< ' ' || c
>= 0177)
296 fprintf(fp
, "\\%03o", c
& 0377);
305 string_repr(register PyStringObject
*op
)
307 size_t newsize
= 2 + 4 * op
->ob_size
* sizeof(char);
309 if (newsize
> INT_MAX
) {
310 PyErr_SetString(PyExc_OverflowError
,
311 "string is too large to make repr");
313 v
= PyString_FromStringAndSize((char *)NULL
, newsize
);
323 /* figure out which quote to use; single is preferred */
325 if (strchr(op
->ob_sval
, '\'') && !strchr(op
->ob_sval
, '"'))
328 p
= ((PyStringObject
*)v
)->ob_sval
;
330 for (i
= 0; i
< op
->ob_size
; i
++) {
332 if (c
== quote
|| c
== '\\')
333 *p
++ = '\\', *p
++ = c
;
334 else if (c
< ' ' || c
>= 0177) {
335 sprintf(p
, "\\%03o", c
& 0377);
345 &v
, (int) (p
- ((PyStringObject
*)v
)->ob_sval
));
351 string_length(PyStringObject
*a
)
357 string_concat(register PyStringObject
*a
, register PyObject
*bb
)
359 register unsigned int size
;
360 register PyStringObject
*op
;
361 if (!PyString_Check(bb
)) {
362 if (PyUnicode_Check(bb
))
363 return PyUnicode_Concat((PyObject
*)a
, bb
);
364 PyErr_Format(PyExc_TypeError
,
365 "cannot add type \"%.200s\" to string",
366 bb
->ob_type
->tp_name
);
369 #define b ((PyStringObject *)bb)
370 /* Optimize cases with empty left or right operand */
371 if (a
->ob_size
== 0) {
375 if (b
->ob_size
== 0) {
377 return (PyObject
*)a
;
379 size
= a
->ob_size
+ b
->ob_size
;
380 /* PyObject_NewVar is inlined */
381 op
= (PyStringObject
*)
382 PyObject_MALLOC(sizeof(PyStringObject
) + size
* sizeof(char));
384 return PyErr_NoMemory();
385 PyObject_INIT_VAR(op
, &PyString_Type
, size
);
389 #ifdef INTERN_STRINGS
390 op
->ob_sinterned
= NULL
;
392 memcpy(op
->ob_sval
, a
->ob_sval
, (int) a
->ob_size
);
393 memcpy(op
->ob_sval
+ a
->ob_size
, b
->ob_sval
, (int) b
->ob_size
);
394 op
->ob_sval
[size
] = '\0';
395 return (PyObject
*) op
;
400 string_repeat(register PyStringObject
*a
, register int n
)
404 register PyStringObject
*op
;
407 size
= a
->ob_size
* n
;
408 if (size
== a
->ob_size
) {
410 return (PyObject
*)a
;
412 /* PyObject_NewVar is inlined */
413 op
= (PyStringObject
*)
414 PyObject_MALLOC(sizeof(PyStringObject
) + size
* sizeof(char));
416 return PyErr_NoMemory();
417 PyObject_INIT_VAR(op
, &PyString_Type
, size
);
421 #ifdef INTERN_STRINGS
422 op
->ob_sinterned
= NULL
;
424 for (i
= 0; i
< size
; i
+= a
->ob_size
)
425 memcpy(op
->ob_sval
+i
, a
->ob_sval
, (int) a
->ob_size
);
426 op
->ob_sval
[size
] = '\0';
427 return (PyObject
*) op
;
430 /* String slice a[i:j] consists of characters a[i] ... a[j-1] */
433 string_slice(register PyStringObject
*a
, register int i
, register int j
)
434 /* j -- may be negative! */
439 j
= 0; /* Avoid signed/unsigned bug in next line */
442 if (i
== 0 && j
== a
->ob_size
) { /* It's the same as a */
444 return (PyObject
*)a
;
448 return PyString_FromStringAndSize(a
->ob_sval
+ i
, (int) (j
-i
));
452 string_contains(PyObject
*a
, PyObject
*el
)
454 register char *s
, *end
;
456 if (PyUnicode_Check(el
))
457 return PyUnicode_Contains(a
, el
);
458 if (!PyString_Check(el
) || PyString_Size(el
) != 1) {
459 PyErr_SetString(PyExc_TypeError
,
460 "'in <string>' requires character as left operand");
463 c
= PyString_AsString(el
)[0];
464 s
= PyString_AsString(a
);
465 end
= s
+ PyString_Size(a
);
474 string_item(PyStringObject
*a
, register int i
)
478 if (i
< 0 || i
>= a
->ob_size
) {
479 PyErr_SetString(PyExc_IndexError
, "string index out of range");
482 c
= a
->ob_sval
[i
] & UCHAR_MAX
;
483 v
= (PyObject
*) characters
[c
];
489 v
= PyString_FromStringAndSize((char *)NULL
, 1);
492 characters
[c
] = (PyStringObject
*) v
;
493 ((PyStringObject
*)v
)->ob_sval
[0] = c
;
500 string_compare(PyStringObject
*a
, PyStringObject
*b
)
502 int len_a
= a
->ob_size
, len_b
= b
->ob_size
;
503 int min_len
= (len_a
< len_b
) ? len_a
: len_b
;
506 cmp
= Py_CHARMASK(*a
->ob_sval
) - Py_CHARMASK(*b
->ob_sval
);
508 cmp
= memcmp(a
->ob_sval
, b
->ob_sval
, min_len
);
512 return (len_a
< len_b
) ? -1 : (len_a
> len_b
) ? 1 : 0;
516 string_hash(PyStringObject
*a
)
519 register unsigned char *p
;
523 if (a
->ob_shash
!= -1)
525 #ifdef INTERN_STRINGS
526 if (a
->ob_sinterned
!= NULL
)
527 return (a
->ob_shash
=
528 ((PyStringObject
*)(a
->ob_sinterned
))->ob_shash
);
532 p
= (unsigned char *) a
->ob_sval
;
535 x
= (1000003*x
) ^ *p
++;
546 string_buffer_getreadbuf(PyStringObject
*self
, int index
, const void **ptr
)
549 PyErr_SetString(PyExc_SystemError
,
550 "accessing non-existent string segment");
553 *ptr
= (void *)self
->ob_sval
;
554 return self
->ob_size
;
558 string_buffer_getwritebuf(PyStringObject
*self
, int index
, const void **ptr
)
560 PyErr_SetString(PyExc_TypeError
,
561 "Cannot use string as modifiable buffer");
566 string_buffer_getsegcount(PyStringObject
*self
, int *lenp
)
569 *lenp
= self
->ob_size
;
574 string_buffer_getcharbuf(PyStringObject
*self
, int index
, const char **ptr
)
577 PyErr_SetString(PyExc_SystemError
,
578 "accessing non-existent string segment");
581 *ptr
= self
->ob_sval
;
582 return self
->ob_size
;
585 static PySequenceMethods string_as_sequence
= {
586 (inquiry
)string_length
, /*sq_length*/
587 (binaryfunc
)string_concat
, /*sq_concat*/
588 (intargfunc
)string_repeat
, /*sq_repeat*/
589 (intargfunc
)string_item
, /*sq_item*/
590 (intintargfunc
)string_slice
, /*sq_slice*/
593 (objobjproc
)string_contains
/*sq_contains*/
596 static PyBufferProcs string_as_buffer
= {
597 (getreadbufferproc
)string_buffer_getreadbuf
,
598 (getwritebufferproc
)string_buffer_getwritebuf
,
599 (getsegcountproc
)string_buffer_getsegcount
,
600 (getcharbufferproc
)string_buffer_getcharbuf
,
611 split_whitespace(const char *s
, int len
, int maxsplit
)
615 PyObject
*list
= PyList_New(0);
620 for (i
= j
= 0; i
< len
; ) {
621 while (i
< len
&& isspace(Py_CHARMASK(s
[i
])))
624 while (i
< len
&& !isspace(Py_CHARMASK(s
[i
])))
629 item
= PyString_FromStringAndSize(s
+j
, (int)(i
-j
));
632 err
= PyList_Append(list
, item
);
636 while (i
< len
&& isspace(Py_CHARMASK(s
[i
])))
642 item
= PyString_FromStringAndSize(s
+j
, (int)(len
- j
));
645 err
= PyList_Append(list
, item
);
657 static char split__doc__
[] =
658 "S.split([sep [,maxsplit]]) -> list of strings\n\
660 Return a list of the words in the string S, using sep as the\n\
661 delimiter string. If maxsplit is given, at most maxsplit\n\
662 splits are done. If sep is not specified, any whitespace string\n\
666 string_split(PyStringObject
*self
, PyObject
*args
)
668 int len
= PyString_GET_SIZE(self
), n
, i
, j
, err
;
670 const char *s
= PyString_AS_STRING(self
), *sub
;
671 PyObject
*list
, *item
, *subobj
= Py_None
;
673 if (!PyArg_ParseTuple(args
, "|Oi:split", &subobj
, &maxsplit
))
677 if (subobj
== Py_None
)
678 return split_whitespace(s
, len
, maxsplit
);
679 if (PyString_Check(subobj
)) {
680 sub
= PyString_AS_STRING(subobj
);
681 n
= PyString_GET_SIZE(subobj
);
683 else if (PyUnicode_Check(subobj
))
684 return PyUnicode_Split((PyObject
*)self
, subobj
, maxsplit
);
685 else if (PyObject_AsCharBuffer(subobj
, &sub
, &n
))
688 PyErr_SetString(PyExc_ValueError
, "empty separator");
692 list
= PyList_New(0);
698 if (s
[i
] == sub
[0] && memcmp(s
+i
, sub
, n
) == 0) {
701 item
= PyString_FromStringAndSize(s
+j
, (int)(i
-j
));
704 err
= PyList_Append(list
, item
);
713 item
= PyString_FromStringAndSize(s
+j
, (int)(len
-j
));
716 err
= PyList_Append(list
, item
);
729 static char join__doc__
[] =
730 "S.join(sequence) -> string\n\
732 Return a string which is the concatenation of the strings in the\n\
733 sequence. The separator between elements is S.";
736 string_join(PyStringObject
*self
, PyObject
*args
)
738 char *sep
= PyString_AS_STRING(self
);
739 int seplen
= PyString_GET_SIZE(self
);
740 PyObject
*res
= NULL
;
745 int i
, slen
, sz_incr
;
746 PyObject
*orig
, *seq
, *item
;
748 if (!PyArg_ParseTuple(args
, "O:join", &orig
))
751 if (!(seq
= PySequence_Fast(orig
, ""))) {
752 if (PyErr_ExceptionMatches(PyExc_TypeError
))
753 PyErr_Format(PyExc_TypeError
,
754 "sequence expected, %.80s found",
755 orig
->ob_type
->tp_name
);
758 /* From here on out, errors go through finally: for proper
759 * reference count manipulations.
761 seqlen
= PySequence_Size(seq
);
763 item
= PySequence_Fast_GET_ITEM(seq
, 0);
769 if (!(res
= PyString_FromStringAndSize((char*)NULL
, sz
)))
772 p
= PyString_AS_STRING(res
);
774 for (i
= 0; i
< seqlen
; i
++) {
775 item
= PySequence_Fast_GET_ITEM(seq
, i
);
776 if (!PyString_Check(item
)){
777 if (PyUnicode_Check(item
)) {
780 return PyUnicode_Join((PyObject
*)self
, seq
);
782 PyErr_Format(PyExc_TypeError
,
783 "sequence item %i: expected string,"
785 i
, item
->ob_type
->tp_name
);
788 slen
= PyString_GET_SIZE(item
);
789 while (reslen
+ slen
+ seplen
>= sz
) {
790 /* at least double the size of the string */
791 sz_incr
= slen
+ seplen
> sz
? slen
+ seplen
: sz
;
792 if (_PyString_Resize(&res
, sz
+ sz_incr
)) {
796 p
= PyString_AS_STRING(res
) + reslen
;
799 memcpy(p
, sep
, seplen
);
803 memcpy(p
, PyString_AS_STRING(item
), slen
);
807 if (_PyString_Resize(&res
, reslen
))
821 string_find_internal(PyStringObject
*self
, PyObject
*args
, int dir
)
823 const char *s
= PyString_AS_STRING(self
), *sub
;
824 int len
= PyString_GET_SIZE(self
);
825 int n
, i
= 0, last
= INT_MAX
;
828 if (!PyArg_ParseTuple(args
, "O|O&O&:find/rfind/index/rindex",
829 &subobj
, _PyEval_SliceIndex
, &i
, _PyEval_SliceIndex
, &last
))
831 if (PyString_Check(subobj
)) {
832 sub
= PyString_AS_STRING(subobj
);
833 n
= PyString_GET_SIZE(subobj
);
835 else if (PyUnicode_Check(subobj
))
836 return PyUnicode_Find((PyObject
*)self
, subobj
, i
, last
, 1);
837 else if (PyObject_AsCharBuffer(subobj
, &sub
, &n
))
852 if (n
== 0 && i
<= last
)
855 for (; i
<= last
; ++i
)
856 if (s
[i
] == sub
[0] && memcmp(&s
[i
], sub
, n
) == 0)
862 if (n
== 0 && i
<= last
)
864 for (j
= last
-n
; j
>= i
; --j
)
865 if (s
[j
] == sub
[0] && memcmp(&s
[j
], sub
, n
) == 0)
873 static char find__doc__
[] =
874 "S.find(sub [,start [,end]]) -> int\n\
876 Return the lowest index in S where substring sub is found,\n\
877 such that sub is contained within s[start,end]. Optional\n\
878 arguments start and end are interpreted as in slice notation.\n\
880 Return -1 on failure.";
883 string_find(PyStringObject
*self
, PyObject
*args
)
885 long result
= string_find_internal(self
, args
, +1);
888 return PyInt_FromLong(result
);
892 static char index__doc__
[] =
893 "S.index(sub [,start [,end]]) -> int\n\
895 Like S.find() but raise ValueError when the substring is not found.";
898 string_index(PyStringObject
*self
, PyObject
*args
)
900 long result
= string_find_internal(self
, args
, +1);
904 PyErr_SetString(PyExc_ValueError
,
905 "substring not found in string.index");
908 return PyInt_FromLong(result
);
912 static char rfind__doc__
[] =
913 "S.rfind(sub [,start [,end]]) -> int\n\
915 Return the highest index in S where substring sub is found,\n\
916 such that sub is contained within s[start,end]. Optional\n\
917 arguments start and end are interpreted as in slice notation.\n\
919 Return -1 on failure.";
922 string_rfind(PyStringObject
*self
, PyObject
*args
)
924 long result
= string_find_internal(self
, args
, -1);
927 return PyInt_FromLong(result
);
931 static char rindex__doc__
[] =
932 "S.rindex(sub [,start [,end]]) -> int\n\
934 Like S.rfind() but raise ValueError when the substring is not found.";
937 string_rindex(PyStringObject
*self
, PyObject
*args
)
939 long result
= string_find_internal(self
, args
, -1);
943 PyErr_SetString(PyExc_ValueError
,
944 "substring not found in string.rindex");
947 return PyInt_FromLong(result
);
952 do_strip(PyStringObject
*self
, PyObject
*args
, int striptype
)
954 char *s
= PyString_AS_STRING(self
);
955 int len
= PyString_GET_SIZE(self
), i
, j
;
957 if (!PyArg_ParseTuple(args
, ":strip"))
961 if (striptype
!= RIGHTSTRIP
) {
962 while (i
< len
&& isspace(Py_CHARMASK(s
[i
]))) {
968 if (striptype
!= LEFTSTRIP
) {
971 } while (j
>= i
&& isspace(Py_CHARMASK(s
[j
])));
975 if (i
== 0 && j
== len
) {
977 return (PyObject
*)self
;
980 return PyString_FromStringAndSize(s
+i
, j
-i
);
984 static char strip__doc__
[] =
985 "S.strip() -> string\n\
987 Return a copy of the string S with leading and trailing\n\
988 whitespace removed.";
991 string_strip(PyStringObject
*self
, PyObject
*args
)
993 return do_strip(self
, args
, BOTHSTRIP
);
997 static char lstrip__doc__
[] =
998 "S.lstrip() -> string\n\
1000 Return a copy of the string S with leading whitespace removed.";
1003 string_lstrip(PyStringObject
*self
, PyObject
*args
)
1005 return do_strip(self
, args
, LEFTSTRIP
);
1009 static char rstrip__doc__
[] =
1010 "S.rstrip() -> string\n\
1012 Return a copy of the string S with trailing whitespace removed.";
1015 string_rstrip(PyStringObject
*self
, PyObject
*args
)
1017 return do_strip(self
, args
, RIGHTSTRIP
);
1021 static char lower__doc__
[] =
1022 "S.lower() -> string\n\
1024 Return a copy of the string S converted to lowercase.";
1027 string_lower(PyStringObject
*self
, PyObject
*args
)
1029 char *s
= PyString_AS_STRING(self
), *s_new
;
1030 int i
, n
= PyString_GET_SIZE(self
);
1033 if (!PyArg_ParseTuple(args
, ":lower"))
1035 new = PyString_FromStringAndSize(NULL
, n
);
1038 s_new
= PyString_AsString(new);
1039 for (i
= 0; i
< n
; i
++) {
1040 int c
= Py_CHARMASK(*s
++);
1042 *s_new
= tolower(c
);
1051 static char upper__doc__
[] =
1052 "S.upper() -> string\n\
1054 Return a copy of the string S converted to uppercase.";
1057 string_upper(PyStringObject
*self
, PyObject
*args
)
1059 char *s
= PyString_AS_STRING(self
), *s_new
;
1060 int i
, n
= PyString_GET_SIZE(self
);
1063 if (!PyArg_ParseTuple(args
, ":upper"))
1065 new = PyString_FromStringAndSize(NULL
, n
);
1068 s_new
= PyString_AsString(new);
1069 for (i
= 0; i
< n
; i
++) {
1070 int c
= Py_CHARMASK(*s
++);
1072 *s_new
= toupper(c
);
1081 static char title__doc__
[] =
1082 "S.title() -> string\n\
1084 Return a titlecased version of S, i.e. words start with uppercase\n\
1085 characters, all remaining cased characters have lowercase.";
1088 string_title(PyUnicodeObject
*self
, PyObject
*args
)
1090 char *s
= PyString_AS_STRING(self
), *s_new
;
1091 int i
, n
= PyString_GET_SIZE(self
);
1092 int previous_is_cased
= 0;
1095 if (!PyArg_ParseTuple(args
, ":title"))
1097 new = PyString_FromStringAndSize(NULL
, n
);
1100 s_new
= PyString_AsString(new);
1101 for (i
= 0; i
< n
; i
++) {
1102 int c
= Py_CHARMASK(*s
++);
1104 if (!previous_is_cased
)
1106 previous_is_cased
= 1;
1107 } else if (isupper(c
)) {
1108 if (previous_is_cased
)
1110 previous_is_cased
= 1;
1112 previous_is_cased
= 0;
1118 static char capitalize__doc__
[] =
1119 "S.capitalize() -> string\n\
1121 Return a copy of the string S with only its first character\n\
1125 string_capitalize(PyStringObject
*self
, PyObject
*args
)
1127 char *s
= PyString_AS_STRING(self
), *s_new
;
1128 int i
, n
= PyString_GET_SIZE(self
);
1131 if (!PyArg_ParseTuple(args
, ":capitalize"))
1133 new = PyString_FromStringAndSize(NULL
, n
);
1136 s_new
= PyString_AsString(new);
1138 int c
= Py_CHARMASK(*s
++);
1140 *s_new
= toupper(c
);
1145 for (i
= 1; i
< n
; i
++) {
1146 int c
= Py_CHARMASK(*s
++);
1148 *s_new
= tolower(c
);
1157 static char count__doc__
[] =
1158 "S.count(sub[, start[, end]]) -> int\n\
1160 Return the number of occurrences of substring sub in string\n\
1161 S[start:end]. Optional arguments start and end are\n\
1162 interpreted as in slice notation.";
1165 string_count(PyStringObject
*self
, PyObject
*args
)
1167 const char *s
= PyString_AS_STRING(self
), *sub
;
1168 int len
= PyString_GET_SIZE(self
), n
;
1169 int i
= 0, last
= INT_MAX
;
1173 if (!PyArg_ParseTuple(args
, "O|O&O&:count", &subobj
,
1174 _PyEval_SliceIndex
, &i
, _PyEval_SliceIndex
, &last
))
1177 if (PyString_Check(subobj
)) {
1178 sub
= PyString_AS_STRING(subobj
);
1179 n
= PyString_GET_SIZE(subobj
);
1181 else if (PyUnicode_Check(subobj
))
1182 return PyInt_FromLong(
1183 PyUnicode_Count((PyObject
*)self
, subobj
, i
, last
));
1184 else if (PyObject_AsCharBuffer(subobj
, &sub
, &n
))
1199 return PyInt_FromLong((long) (m
-i
));
1203 if (!memcmp(s
+i
, sub
, n
)) {
1210 return PyInt_FromLong((long) r
);
1214 static char swapcase__doc__
[] =
1215 "S.swapcase() -> string\n\
1217 Return a copy of the string S with uppercase characters\n\
1218 converted to lowercase and vice versa.";
1221 string_swapcase(PyStringObject
*self
, PyObject
*args
)
1223 char *s
= PyString_AS_STRING(self
), *s_new
;
1224 int i
, n
= PyString_GET_SIZE(self
);
1227 if (!PyArg_ParseTuple(args
, ":swapcase"))
1229 new = PyString_FromStringAndSize(NULL
, n
);
1232 s_new
= PyString_AsString(new);
1233 for (i
= 0; i
< n
; i
++) {
1234 int c
= Py_CHARMASK(*s
++);
1236 *s_new
= toupper(c
);
1238 else if (isupper(c
)) {
1239 *s_new
= tolower(c
);
1249 static char translate__doc__
[] =
1250 "S.translate(table [,deletechars]) -> string\n\
1252 Return a copy of the string S, where all characters occurring\n\
1253 in the optional argument deletechars are removed, and the\n\
1254 remaining characters have been mapped through the given\n\
1255 translation table, which must be a string of length 256.";
1258 string_translate(PyStringObject
*self
, PyObject
*args
)
1260 register char *input
, *output
;
1261 register const char *table
;
1262 register int i
, c
, changed
= 0;
1263 PyObject
*input_obj
= (PyObject
*)self
;
1264 const char *table1
, *output_start
, *del_table
=NULL
;
1265 int inlen
, tablen
, dellen
= 0;
1267 int trans_table
[256];
1268 PyObject
*tableobj
, *delobj
= NULL
;
1270 if (!PyArg_ParseTuple(args
, "O|O:translate",
1271 &tableobj
, &delobj
))
1274 if (PyString_Check(tableobj
)) {
1275 table1
= PyString_AS_STRING(tableobj
);
1276 tablen
= PyString_GET_SIZE(tableobj
);
1278 else if (PyUnicode_Check(tableobj
)) {
1279 /* Unicode .translate() does not support the deletechars
1280 parameter; instead a mapping to None will cause characters
1282 if (delobj
!= NULL
) {
1283 PyErr_SetString(PyExc_TypeError
,
1284 "deletions are implemented differently for unicode");
1287 return PyUnicode_Translate((PyObject
*)self
, tableobj
, NULL
);
1289 else if (PyObject_AsCharBuffer(tableobj
, &table1
, &tablen
))
1292 if (delobj
!= NULL
) {
1293 if (PyString_Check(delobj
)) {
1294 del_table
= PyString_AS_STRING(delobj
);
1295 dellen
= PyString_GET_SIZE(delobj
);
1297 else if (PyUnicode_Check(delobj
)) {
1298 PyErr_SetString(PyExc_TypeError
,
1299 "deletions are implemented differently for unicode");
1302 else if (PyObject_AsCharBuffer(delobj
, &del_table
, &dellen
))
1305 if (tablen
!= 256) {
1306 PyErr_SetString(PyExc_ValueError
,
1307 "translation table must be 256 characters long");
1317 inlen
= PyString_Size(input_obj
);
1318 result
= PyString_FromStringAndSize((char *)NULL
, inlen
);
1321 output_start
= output
= PyString_AsString(result
);
1322 input
= PyString_AsString(input_obj
);
1325 /* If no deletions are required, use faster code */
1326 for (i
= inlen
; --i
>= 0; ) {
1327 c
= Py_CHARMASK(*input
++);
1328 if (Py_CHARMASK((*output
++ = table
[c
])) != c
)
1334 Py_INCREF(input_obj
);
1338 for (i
= 0; i
< 256; i
++)
1339 trans_table
[i
] = Py_CHARMASK(table
[i
]);
1341 for (i
= 0; i
< dellen
; i
++)
1342 trans_table
[(int) Py_CHARMASK(del_table
[i
])] = -1;
1344 for (i
= inlen
; --i
>= 0; ) {
1345 c
= Py_CHARMASK(*input
++);
1346 if (trans_table
[c
] != -1)
1347 if (Py_CHARMASK(*output
++ = (char)trans_table
[c
]) == c
)
1353 Py_INCREF(input_obj
);
1356 /* Fix the size of the resulting string */
1357 if (inlen
> 0 &&_PyString_Resize(&result
, output
-output_start
))
1363 /* What follows is used for implementing replace(). Perry Stoll. */
1368 strstr replacement for arbitrary blocks of memory.
1370 Locates the first occurrence in the memory pointed to by MEM of the
1371 contents of memory pointed to by PAT. Returns the index into MEM if
1372 found, or -1 if not found. If len of PAT is greater than length of
1373 MEM, the function returns -1.
1376 mymemfind(const char *mem
, int len
, const char *pat
, int pat_len
)
1380 /* pattern can not occur in the last pat_len-1 chars */
1383 for (ii
= 0; ii
<= len
; ii
++) {
1384 if (mem
[ii
] == pat
[0] && memcmp(&mem
[ii
], pat
, pat_len
) == 0) {
1394 Return the number of distinct times PAT is found in MEM.
1395 meaning mem=1111 and pat==11 returns 2.
1396 mem=11111 and pat==11 also return 2.
1399 mymemcnt(const char *mem
, int len
, const char *pat
, int pat_len
)
1401 register int offset
= 0;
1405 offset
= mymemfind(mem
, len
, pat
, pat_len
);
1408 mem
+= offset
+ pat_len
;
1409 len
-= offset
+ pat_len
;
1418 Return a string in which all occurrences of PAT in memory STR are
1421 If length of PAT is less than length of STR or there are no occurrences
1422 of PAT in STR, then the original string is returned. Otherwise, a new
1423 string is allocated here and returned.
1425 on return, out_len is:
1426 the length of output string, or
1427 -1 if the input string is returned, or
1428 unchanged if an error occurs (no memory).
1431 the new string allocated locally, or
1432 NULL if an error occurred.
1435 mymemreplace(const char *str
, int len
, /* input string */
1436 const char *pat
, int pat_len
, /* pattern string to find */
1437 const char *sub
, int sub_len
, /* substitution string */
1438 int count
, /* number of replacements */
1443 int nfound
, offset
, new_len
;
1445 if (len
== 0 || pat_len
> len
)
1448 /* find length of output string */
1449 nfound
= mymemcnt(str
, len
, pat
, pat_len
);
1452 else if (nfound
> count
)
1456 new_len
= len
+ nfound
*(sub_len
- pat_len
);
1458 new_s
= (char *)PyMem_MALLOC(new_len
);
1459 if (new_s
== NULL
) return NULL
;
1465 /* find index of next instance of pattern */
1466 offset
= mymemfind(str
, len
, pat
, pat_len
);
1467 /* if not found, break out of loop */
1468 if (offset
== -1) break;
1470 /* copy non matching part of input string */
1471 memcpy(new_s
, str
, offset
); /* copy part of str before pat */
1472 str
+= offset
+ pat_len
; /* move str past pattern */
1473 len
-= offset
+ pat_len
; /* reduce length of str remaining */
1475 /* copy substitute into the output string */
1476 new_s
+= offset
; /* move new_s to dest for sub string */
1477 memcpy(new_s
, sub
, sub_len
); /* copy substring into new_s */
1478 new_s
+= sub_len
; /* offset new_s past sub string */
1480 /* break when we've done count replacements */
1481 if (--count
== 0) break;
1483 /* copy any remaining values into output string */
1485 memcpy(new_s
, str
, len
);
1490 return (char*)str
; /* have to cast away constness here */
1494 static char replace__doc__
[] =
1495 "S.replace (old, new[, maxsplit]) -> string\n\
1497 Return a copy of string S with all occurrences of substring\n\
1498 old replaced by new. If the optional argument maxsplit is\n\
1499 given, only the first maxsplit occurrences are replaced.";
1502 string_replace(PyStringObject
*self
, PyObject
*args
)
1504 const char *str
= PyString_AS_STRING(self
), *sub
, *repl
;
1506 int len
= PyString_GET_SIZE(self
), sub_len
, repl_len
, out_len
;
1509 PyObject
*subobj
, *replobj
;
1511 if (!PyArg_ParseTuple(args
, "OO|i:replace",
1512 &subobj
, &replobj
, &count
))
1515 if (PyString_Check(subobj
)) {
1516 sub
= PyString_AS_STRING(subobj
);
1517 sub_len
= PyString_GET_SIZE(subobj
);
1519 else if (PyUnicode_Check(subobj
))
1520 return PyUnicode_Replace((PyObject
*)self
,
1521 subobj
, replobj
, count
);
1522 else if (PyObject_AsCharBuffer(subobj
, &sub
, &sub_len
))
1525 if (PyString_Check(replobj
)) {
1526 repl
= PyString_AS_STRING(replobj
);
1527 repl_len
= PyString_GET_SIZE(replobj
);
1529 else if (PyUnicode_Check(replobj
))
1530 return PyUnicode_Replace((PyObject
*)self
,
1531 subobj
, replobj
, count
);
1532 else if (PyObject_AsCharBuffer(replobj
, &repl
, &repl_len
))
1536 PyErr_SetString(PyExc_ValueError
, "empty pattern string");
1539 new_s
= mymemreplace(str
,len
,sub
,sub_len
,repl
,repl_len
,count
,&out_len
);
1540 if (new_s
== NULL
) {
1544 if (out_len
== -1) {
1545 /* we're returning another reference to self */
1546 new = (PyObject
*)self
;
1550 new = PyString_FromStringAndSize(new_s
, out_len
);
1557 static char startswith__doc__
[] =
1558 "S.startswith(prefix[, start[, end]]) -> int\n\
1560 Return 1 if S starts with the specified prefix, otherwise return 0. With\n\
1561 optional start, test S beginning at that position. With optional end, stop\n\
1562 comparing S at that position.";
1565 string_startswith(PyStringObject
*self
, PyObject
*args
)
1567 const char* str
= PyString_AS_STRING(self
);
1568 int len
= PyString_GET_SIZE(self
);
1575 if (!PyArg_ParseTuple(args
, "O|O&O&:startswith", &subobj
,
1576 _PyEval_SliceIndex
, &start
, _PyEval_SliceIndex
, &end
))
1578 if (PyString_Check(subobj
)) {
1579 prefix
= PyString_AS_STRING(subobj
);
1580 plen
= PyString_GET_SIZE(subobj
);
1582 else if (PyUnicode_Check(subobj
))
1583 return PyInt_FromLong(
1584 PyUnicode_Tailmatch((PyObject
*)self
,
1585 subobj
, start
, end
, -1));
1586 else if (PyObject_AsCharBuffer(subobj
, &prefix
, &plen
))
1589 /* adopt Java semantics for index out of range. it is legal for
1590 * offset to be == plen, but this only returns true if prefix is
1593 if (start
< 0 || start
+plen
> len
)
1594 return PyInt_FromLong(0);
1596 if (!memcmp(str
+start
, prefix
, plen
)) {
1597 /* did the match end after the specified end? */
1599 return PyInt_FromLong(1);
1600 else if (end
- start
< plen
)
1601 return PyInt_FromLong(0);
1603 return PyInt_FromLong(1);
1605 else return PyInt_FromLong(0);
1609 static char endswith__doc__
[] =
1610 "S.endswith(suffix[, start[, end]]) -> int\n\
1612 Return 1 if S ends with the specified suffix, otherwise return 0. With\n\
1613 optional start, test S beginning at that position. With optional end, stop\n\
1614 comparing S at that position.";
1617 string_endswith(PyStringObject
*self
, PyObject
*args
)
1619 const char* str
= PyString_AS_STRING(self
);
1620 int len
= PyString_GET_SIZE(self
);
1628 if (!PyArg_ParseTuple(args
, "O|O&O&:endswith", &subobj
,
1629 _PyEval_SliceIndex
, &start
, _PyEval_SliceIndex
, &end
))
1631 if (PyString_Check(subobj
)) {
1632 suffix
= PyString_AS_STRING(subobj
);
1633 slen
= PyString_GET_SIZE(subobj
);
1635 else if (PyUnicode_Check(subobj
))
1636 return PyInt_FromLong(
1637 PyUnicode_Tailmatch((PyObject
*)self
,
1638 subobj
, start
, end
, +1));
1639 else if (PyObject_AsCharBuffer(subobj
, &suffix
, &slen
))
1642 if (start
< 0 || start
> len
|| slen
> len
)
1643 return PyInt_FromLong(0);
1645 upper
= (end
>= 0 && end
<= len
) ? end
: len
;
1646 lower
= (upper
- slen
) > start
? (upper
- slen
) : start
;
1648 if (upper
-lower
>= slen
&& !memcmp(str
+lower
, suffix
, slen
))
1649 return PyInt_FromLong(1);
1650 else return PyInt_FromLong(0);
1654 static char encode__doc__
[] =
1655 "S.encode([encoding[,errors]]) -> string\n\
1657 Return an encoded string version of S. Default encoding is the current\n\
1658 default string encoding. errors may be given to set a different error\n\
1659 handling scheme. Default is 'strict' meaning that encoding errors raise\n\
1660 a ValueError. Other possible values are 'ignore' and 'replace'.";
1663 string_encode(PyStringObject
*self
, PyObject
*args
)
1665 char *encoding
= NULL
;
1666 char *errors
= NULL
;
1667 if (!PyArg_ParseTuple(args
, "|ss:encode", &encoding
, &errors
))
1669 return PyString_AsEncodedString((PyObject
*)self
, encoding
, errors
);
1673 static char expandtabs__doc__
[] =
1674 "S.expandtabs([tabsize]) -> string\n\
1676 Return a copy of S where all tab characters are expanded using spaces.\n\
1677 If tabsize is not given, a tab size of 8 characters is assumed.";
1680 string_expandtabs(PyStringObject
*self
, PyObject
*args
)
1688 if (!PyArg_ParseTuple(args
, "|i:expandtabs", &tabsize
))
1691 /* First pass: determine size of output string */
1693 e
= PyString_AS_STRING(self
) + PyString_GET_SIZE(self
);
1694 for (p
= PyString_AS_STRING(self
); p
< e
; p
++)
1697 j
+= tabsize
- (j
% tabsize
);
1701 if (*p
== '\n' || *p
== '\r') {
1707 /* Second pass: create output string and fill it */
1708 u
= PyString_FromStringAndSize(NULL
, i
+ j
);
1713 q
= PyString_AS_STRING(u
);
1715 for (p
= PyString_AS_STRING(self
); p
< e
; p
++)
1718 i
= tabsize
- (j
% tabsize
);
1727 if (*p
== '\n' || *p
== '\r')
1735 PyObject
*pad(PyStringObject
*self
,
1747 if (left
== 0 && right
== 0) {
1749 return (PyObject
*)self
;
1752 u
= PyString_FromStringAndSize(NULL
,
1753 left
+ PyString_GET_SIZE(self
) + right
);
1756 memset(PyString_AS_STRING(u
), fill
, left
);
1757 memcpy(PyString_AS_STRING(u
) + left
,
1758 PyString_AS_STRING(self
),
1759 PyString_GET_SIZE(self
));
1761 memset(PyString_AS_STRING(u
) + left
+ PyString_GET_SIZE(self
),
1768 static char ljust__doc__
[] =
1769 "S.ljust(width) -> string\n\
1771 Return S left justified in a string of length width. Padding is\n\
1772 done using spaces.";
1775 string_ljust(PyStringObject
*self
, PyObject
*args
)
1778 if (!PyArg_ParseTuple(args
, "i:ljust", &width
))
1781 if (PyString_GET_SIZE(self
) >= width
) {
1783 return (PyObject
*) self
;
1786 return pad(self
, 0, width
- PyString_GET_SIZE(self
), ' ');
1790 static char rjust__doc__
[] =
1791 "S.rjust(width) -> string\n\
1793 Return S right justified in a string of length width. Padding is\n\
1794 done using spaces.";
1797 string_rjust(PyStringObject
*self
, PyObject
*args
)
1800 if (!PyArg_ParseTuple(args
, "i:rjust", &width
))
1803 if (PyString_GET_SIZE(self
) >= width
) {
1805 return (PyObject
*) self
;
1808 return pad(self
, width
- PyString_GET_SIZE(self
), 0, ' ');
1812 static char center__doc__
[] =
1813 "S.center(width) -> string\n\
1815 Return S centered in a string of length width. Padding is done\n\
1819 string_center(PyStringObject
*self
, PyObject
*args
)
1824 if (!PyArg_ParseTuple(args
, "i:center", &width
))
1827 if (PyString_GET_SIZE(self
) >= width
) {
1829 return (PyObject
*) self
;
1832 marg
= width
- PyString_GET_SIZE(self
);
1833 left
= marg
/ 2 + (marg
& width
& 1);
1835 return pad(self
, left
, marg
- left
, ' ');
1839 static char zfill__doc__
[] =
1840 "S.zfill(width) -> string\n\
1842 Pad a numeric string x with zeros on the left, to fill a field\n\
1843 of the specified width. The string x is never truncated.";
1846 string_zfill(PyStringObject
*self
, PyObject
*args
)
1853 if (!PyArg_ParseTuple(args
, "i:zfill", &width
))
1856 if (PyString_GET_SIZE(self
) >= width
) {
1858 return (PyObject
*) self
;
1861 fill
= width
- PyString_GET_SIZE(self
);
1863 u
= pad(self
, fill
, 0, '0');
1867 str
= PyString_AS_STRING(u
);
1868 if (str
[fill
] == '+' || str
[fill
] == '-') {
1869 /* move sign to beginning of string */
1878 static char isspace__doc__
[] =
1879 "S.isspace() -> int\n\
1881 Return 1 if there are only whitespace characters in S,\n\
1885 string_isspace(PyStringObject
*self
, PyObject
*args
)
1887 register const unsigned char *p
1888 = (unsigned char *) PyString_AS_STRING(self
);
1889 register const unsigned char *e
;
1891 if (!PyArg_NoArgs(args
))
1894 /* Shortcut for single character strings */
1895 if (PyString_GET_SIZE(self
) == 1 &&
1897 return PyInt_FromLong(1);
1899 /* Special case for empty strings */
1900 if (PyString_GET_SIZE(self
) == 0)
1901 return PyInt_FromLong(0);
1903 e
= p
+ PyString_GET_SIZE(self
);
1904 for (; p
< e
; p
++) {
1906 return PyInt_FromLong(0);
1908 return PyInt_FromLong(1);
1912 static char isalpha__doc__
[] =
1913 "S.isalpha() -> int\n\
1915 Return 1 if all characters in S are alphabetic\n\
1916 and there is at least one character in S, 0 otherwise.";
1919 string_isalpha(PyUnicodeObject
*self
, PyObject
*args
)
1921 register const unsigned char *p
1922 = (unsigned char *) PyString_AS_STRING(self
);
1923 register const unsigned char *e
;
1925 if (!PyArg_NoArgs(args
))
1928 /* Shortcut for single character strings */
1929 if (PyString_GET_SIZE(self
) == 1 &&
1931 return PyInt_FromLong(1);
1933 /* Special case for empty strings */
1934 if (PyString_GET_SIZE(self
) == 0)
1935 return PyInt_FromLong(0);
1937 e
= p
+ PyString_GET_SIZE(self
);
1938 for (; p
< e
; p
++) {
1940 return PyInt_FromLong(0);
1942 return PyInt_FromLong(1);
1946 static char isalnum__doc__
[] =
1947 "S.isalnum() -> int\n\
1949 Return 1 if all characters in S are alphanumeric\n\
1950 and there is at least one character in S, 0 otherwise.";
1953 string_isalnum(PyUnicodeObject
*self
, PyObject
*args
)
1955 register const unsigned char *p
1956 = (unsigned char *) PyString_AS_STRING(self
);
1957 register const unsigned char *e
;
1959 if (!PyArg_NoArgs(args
))
1962 /* Shortcut for single character strings */
1963 if (PyString_GET_SIZE(self
) == 1 &&
1965 return PyInt_FromLong(1);
1967 /* Special case for empty strings */
1968 if (PyString_GET_SIZE(self
) == 0)
1969 return PyInt_FromLong(0);
1971 e
= p
+ PyString_GET_SIZE(self
);
1972 for (; p
< e
; p
++) {
1974 return PyInt_FromLong(0);
1976 return PyInt_FromLong(1);
1980 static char isdigit__doc__
[] =
1981 "S.isdigit() -> int\n\
1983 Return 1 if there are only digit characters in S,\n\
1987 string_isdigit(PyStringObject
*self
, PyObject
*args
)
1989 register const unsigned char *p
1990 = (unsigned char *) PyString_AS_STRING(self
);
1991 register const unsigned char *e
;
1993 if (!PyArg_NoArgs(args
))
1996 /* Shortcut for single character strings */
1997 if (PyString_GET_SIZE(self
) == 1 &&
1999 return PyInt_FromLong(1);
2001 /* Special case for empty strings */
2002 if (PyString_GET_SIZE(self
) == 0)
2003 return PyInt_FromLong(0);
2005 e
= p
+ PyString_GET_SIZE(self
);
2006 for (; p
< e
; p
++) {
2008 return PyInt_FromLong(0);
2010 return PyInt_FromLong(1);
2014 static char islower__doc__
[] =
2015 "S.islower() -> int\n\
2017 Return 1 if all cased characters in S are lowercase and there is\n\
2018 at least one cased character in S, 0 otherwise.";
2021 string_islower(PyStringObject
*self
, PyObject
*args
)
2023 register const unsigned char *p
2024 = (unsigned char *) PyString_AS_STRING(self
);
2025 register const unsigned char *e
;
2028 if (!PyArg_NoArgs(args
))
2031 /* Shortcut for single character strings */
2032 if (PyString_GET_SIZE(self
) == 1)
2033 return PyInt_FromLong(islower(*p
) != 0);
2035 /* Special case for empty strings */
2036 if (PyString_GET_SIZE(self
) == 0)
2037 return PyInt_FromLong(0);
2039 e
= p
+ PyString_GET_SIZE(self
);
2041 for (; p
< e
; p
++) {
2043 return PyInt_FromLong(0);
2044 else if (!cased
&& islower(*p
))
2047 return PyInt_FromLong(cased
);
2051 static char isupper__doc__
[] =
2052 "S.isupper() -> int\n\
2054 Return 1 if all cased characters in S are uppercase and there is\n\
2055 at least one cased character in S, 0 otherwise.";
2058 string_isupper(PyStringObject
*self
, PyObject
*args
)
2060 register const unsigned char *p
2061 = (unsigned char *) PyString_AS_STRING(self
);
2062 register const unsigned char *e
;
2065 if (!PyArg_NoArgs(args
))
2068 /* Shortcut for single character strings */
2069 if (PyString_GET_SIZE(self
) == 1)
2070 return PyInt_FromLong(isupper(*p
) != 0);
2072 /* Special case for empty strings */
2073 if (PyString_GET_SIZE(self
) == 0)
2074 return PyInt_FromLong(0);
2076 e
= p
+ PyString_GET_SIZE(self
);
2078 for (; p
< e
; p
++) {
2080 return PyInt_FromLong(0);
2081 else if (!cased
&& isupper(*p
))
2084 return PyInt_FromLong(cased
);
2088 static char istitle__doc__
[] =
2089 "S.istitle() -> int\n\
2091 Return 1 if S is a titlecased string, i.e. uppercase characters\n\
2092 may only follow uncased characters and lowercase characters only cased\n\
2093 ones. Return 0 otherwise.";
2096 string_istitle(PyStringObject
*self
, PyObject
*args
)
2098 register const unsigned char *p
2099 = (unsigned char *) PyString_AS_STRING(self
);
2100 register const unsigned char *e
;
2101 int cased
, previous_is_cased
;
2103 if (!PyArg_NoArgs(args
))
2106 /* Shortcut for single character strings */
2107 if (PyString_GET_SIZE(self
) == 1)
2108 return PyInt_FromLong(isupper(*p
) != 0);
2110 /* Special case for empty strings */
2111 if (PyString_GET_SIZE(self
) == 0)
2112 return PyInt_FromLong(0);
2114 e
= p
+ PyString_GET_SIZE(self
);
2116 previous_is_cased
= 0;
2117 for (; p
< e
; p
++) {
2118 register const unsigned char ch
= *p
;
2121 if (previous_is_cased
)
2122 return PyInt_FromLong(0);
2123 previous_is_cased
= 1;
2126 else if (islower(ch
)) {
2127 if (!previous_is_cased
)
2128 return PyInt_FromLong(0);
2129 previous_is_cased
= 1;
2133 previous_is_cased
= 0;
2135 return PyInt_FromLong(cased
);
2139 static char splitlines__doc__
[] =
2140 "S.splitlines([keepends]]) -> list of strings\n\
2142 Return a list of the lines in S, breaking at line boundaries.\n\
2143 Line breaks are not included in the resulting list unless keepends\n\
2144 is given and true.";
2146 #define SPLIT_APPEND(data, left, right) \
2147 str = PyString_FromStringAndSize(data + left, right - left); \
2150 if (PyList_Append(list, str)) { \
2158 string_splitlines(PyStringObject
*self
, PyObject
*args
)
2168 if (!PyArg_ParseTuple(args
, "|i:splitlines", &keepends
))
2171 data
= PyString_AS_STRING(self
);
2172 len
= PyString_GET_SIZE(self
);
2174 list
= PyList_New(0);
2178 for (i
= j
= 0; i
< len
; ) {
2181 /* Find a line and append it */
2182 while (i
< len
&& data
[i
] != '\n' && data
[i
] != '\r')
2185 /* Skip the line break reading CRLF as one line break */
2188 if (data
[i
] == '\r' && i
+ 1 < len
&&
2196 SPLIT_APPEND(data
, j
, eol
);
2200 SPLIT_APPEND(data
, j
, len
);
2214 string_methods
[] = {
2215 /* Counterparts of the obsolete stropmodule functions; except
2216 string.maketrans(). */
2217 {"join", (PyCFunction
)string_join
, 1, join__doc__
},
2218 {"split", (PyCFunction
)string_split
, 1, split__doc__
},
2219 {"lower", (PyCFunction
)string_lower
, 1, lower__doc__
},
2220 {"upper", (PyCFunction
)string_upper
, 1, upper__doc__
},
2221 {"islower", (PyCFunction
)string_islower
, 0, islower__doc__
},
2222 {"isupper", (PyCFunction
)string_isupper
, 0, isupper__doc__
},
2223 {"isspace", (PyCFunction
)string_isspace
, 0, isspace__doc__
},
2224 {"isdigit", (PyCFunction
)string_isdigit
, 0, isdigit__doc__
},
2225 {"istitle", (PyCFunction
)string_istitle
, 0, istitle__doc__
},
2226 {"isalpha", (PyCFunction
)string_isalpha
, 0, isalpha__doc__
},
2227 {"isalnum", (PyCFunction
)string_isalnum
, 0, isalnum__doc__
},
2228 {"capitalize", (PyCFunction
)string_capitalize
, 1, capitalize__doc__
},
2229 {"count", (PyCFunction
)string_count
, 1, count__doc__
},
2230 {"endswith", (PyCFunction
)string_endswith
, 1, endswith__doc__
},
2231 {"find", (PyCFunction
)string_find
, 1, find__doc__
},
2232 {"index", (PyCFunction
)string_index
, 1, index__doc__
},
2233 {"lstrip", (PyCFunction
)string_lstrip
, 1, lstrip__doc__
},
2234 {"replace", (PyCFunction
)string_replace
, 1, replace__doc__
},
2235 {"rfind", (PyCFunction
)string_rfind
, 1, rfind__doc__
},
2236 {"rindex", (PyCFunction
)string_rindex
, 1, rindex__doc__
},
2237 {"rstrip", (PyCFunction
)string_rstrip
, 1, rstrip__doc__
},
2238 {"startswith", (PyCFunction
)string_startswith
, 1, startswith__doc__
},
2239 {"strip", (PyCFunction
)string_strip
, 1, strip__doc__
},
2240 {"swapcase", (PyCFunction
)string_swapcase
, 1, swapcase__doc__
},
2241 {"translate", (PyCFunction
)string_translate
, 1, translate__doc__
},
2242 {"title", (PyCFunction
)string_title
, 1, title__doc__
},
2243 {"ljust", (PyCFunction
)string_ljust
, 1, ljust__doc__
},
2244 {"rjust", (PyCFunction
)string_rjust
, 1, rjust__doc__
},
2245 {"center", (PyCFunction
)string_center
, 1, center__doc__
},
2246 {"encode", (PyCFunction
)string_encode
, 1, encode__doc__
},
2247 {"expandtabs", (PyCFunction
)string_expandtabs
, 1, expandtabs__doc__
},
2248 {"splitlines", (PyCFunction
)string_splitlines
, 1, splitlines__doc__
},
2250 {"zfill", (PyCFunction
)string_zfill
, 1, zfill__doc__
},
2252 {NULL
, NULL
} /* sentinel */
2256 string_getattr(PyStringObject
*s
, char *name
)
2258 return Py_FindMethod(string_methods
, (PyObject
*)s
, name
);
2262 PyTypeObject PyString_Type
= {
2263 PyObject_HEAD_INIT(&PyType_Type
)
2266 sizeof(PyStringObject
),
2268 (destructor
)string_dealloc
, /*tp_dealloc*/
2269 (printfunc
)string_print
, /*tp_print*/
2270 (getattrfunc
)string_getattr
, /*tp_getattr*/
2272 (cmpfunc
)string_compare
, /*tp_compare*/
2273 (reprfunc
)string_repr
, /*tp_repr*/
2275 &string_as_sequence
, /*tp_as_sequence*/
2276 0, /*tp_as_mapping*/
2277 (hashfunc
)string_hash
, /*tp_hash*/
2282 &string_as_buffer
, /*tp_as_buffer*/
2283 Py_TPFLAGS_DEFAULT
, /*tp_flags*/
2288 PyString_Concat(register PyObject
**pv
, register PyObject
*w
)
2290 register PyObject
*v
;
2293 if (w
== NULL
|| !PyString_Check(*pv
)) {
2298 v
= string_concat((PyStringObject
*) *pv
, w
);
2304 PyString_ConcatAndDel(register PyObject
**pv
, register PyObject
*w
)
2306 PyString_Concat(pv
, w
);
2311 /* The following function breaks the notion that strings are immutable:
2312 it changes the size of a string. We get away with this only if there
2313 is only one module referencing the object. You can also think of it
2314 as creating a new string object and destroying the old one, only
2315 more efficiently. In any case, don't use this if the string may
2316 already be known to some other part of the code... */
2319 _PyString_Resize(PyObject
**pv
, int newsize
)
2321 register PyObject
*v
;
2322 register PyStringObject
*sv
;
2324 if (!PyString_Check(v
) || v
->ob_refcnt
!= 1) {
2327 PyErr_BadInternalCall();
2330 /* XXX UNREF/NEWREF interface should be more symmetrical */
2334 _Py_ForgetReference(v
);
2336 PyObject_REALLOC((char *)v
,
2337 sizeof(PyStringObject
) + newsize
* sizeof(char));
2343 _Py_NewReference(*pv
);
2344 sv
= (PyStringObject
*) *pv
;
2345 sv
->ob_size
= newsize
;
2346 sv
->ob_sval
[newsize
] = '\0';
2350 /* Helpers for formatstring */
2353 getnextarg(PyObject
*args
, int arglen
, int *p_argidx
)
2355 int argidx
= *p_argidx
;
2356 if (argidx
< arglen
) {
2361 return PyTuple_GetItem(args
, argidx
);
2363 PyErr_SetString(PyExc_TypeError
,
2364 "not enough arguments for format string");
2368 #define F_LJUST (1<<0)
2369 #define F_SIGN (1<<1)
2370 #define F_BLANK (1<<2)
2371 #define F_ALT (1<<3)
2372 #define F_ZERO (1<<4)
2375 formatfloat(char *buf
, size_t buflen
, int flags
,
2376 int prec
, int type
, PyObject
*v
)
2378 /* fmt = '%#.' + `prec` + `type`
2379 worst case length = 3 + 10 (len of INT_MAX) + 1 = 14 (use 20)*/
2382 if (!PyArg_Parse(v
, "d;float argument required", &x
))
2386 if (type
== 'f' && fabs(x
)/1e25
>= 1e25
)
2388 sprintf(fmt
, "%%%s.%d%c", (flags
&F_ALT
) ? "#" : "", prec
, type
);
2389 /* worst case length calc to ensure no buffer overrun:
2391 buf = '-' + [0-9]*prec + '.' + 'e+' + (longest exp
2392 for any double rep.)
2393 len = 1 + prec + 1 + 2 + 5 = 9 + prec
2394 If prec=0 the effective precision is 1 (the leading digit is
2395 always given), therefore increase by one to 10+prec. */
2396 if (buflen
<= (size_t)10 + (size_t)prec
) {
2397 PyErr_SetString(PyExc_OverflowError
,
2398 "formatted float is too long (precision too long?)");
2401 sprintf(buf
, fmt
, x
);
2406 formatint(char *buf
, size_t buflen
, int flags
,
2407 int prec
, int type
, PyObject
*v
)
2409 /* fmt = '%#.' + `prec` + 'l' + `type`
2410 worst case length = 3 + 10 (len of INT_MAX) + 1 + 1 = 15 (use 20)*/
2413 if (!PyArg_Parse(v
, "l;int argument required", &x
))
2417 sprintf(fmt
, "%%%s.%dl%c", (flags
&F_ALT
) ? "#" : "", prec
, type
);
2418 /* buf = '+'/'-'/'0'/'0x' + '[0-9]'*max(prec,len(x in octal))
2419 worst case buf = '0x' + [0-9]*prec, where prec >= 11 */
2420 if (buflen
<= 13 || buflen
<= (size_t)2+(size_t)prec
) {
2421 PyErr_SetString(PyExc_OverflowError
,
2422 "formatted integer is too long (precision too long?)");
2425 sprintf(buf
, fmt
, x
);
2430 formatchar(char *buf
, size_t buflen
, PyObject
*v
)
2432 /* presume that the buffer is at least 2 characters long */
2433 if (PyString_Check(v
)) {
2434 if (!PyArg_Parse(v
, "c;%c requires int or char", &buf
[0]))
2438 if (!PyArg_Parse(v
, "b;%c requires int or char", &buf
[0]))
2446 /* fmt%(v1,v2,...) is roughly equivalent to sprintf(fmt, v1, v2, ...)
2448 FORMATBUFLEN is the length of the buffer in which the floats, ints, &
2449 chars are formatted. XXX This is a magic number. Each formatting
2450 routine does bounds checking to ensure no overflow, but a better
2451 solution may be to malloc a buffer of appropriate size for each
2452 format. For now, the current solution is sufficient.
2454 #define FORMATBUFLEN (size_t)120
2457 PyString_Format(PyObject
*format
, PyObject
*args
)
2460 int fmtcnt
, rescnt
, reslen
, arglen
, argidx
;
2462 PyObject
*result
, *orig_args
;
2463 PyObject
*dict
= NULL
;
2464 if (format
== NULL
|| !PyString_Check(format
) || args
== NULL
) {
2465 PyErr_BadInternalCall();
2469 fmt
= PyString_AsString(format
);
2470 fmtcnt
= PyString_Size(format
);
2471 reslen
= rescnt
= fmtcnt
+ 100;
2472 result
= PyString_FromStringAndSize((char *)NULL
, reslen
);
2475 res
= PyString_AsString(result
);
2476 if (PyTuple_Check(args
)) {
2477 arglen
= PyTuple_Size(args
);
2484 if (args
->ob_type
->tp_as_mapping
)
2486 while (--fmtcnt
>= 0) {
2489 rescnt
= fmtcnt
+ 100;
2491 if (_PyString_Resize(&result
, reslen
) < 0)
2493 res
= PyString_AsString(result
)
2500 /* Got a format specifier */
2508 PyObject
*temp
= NULL
;
2512 char formatbuf
[FORMATBUFLEN
]; /* For format{float,int,char}() */
2513 char *fmt_start
= fmt
;
2523 PyErr_SetString(PyExc_TypeError
,
2524 "format requires a mapping");
2530 /* Skip over balanced parentheses */
2531 while (pcount
> 0 && --fmtcnt
>= 0) {
2534 else if (*fmt
== '(')
2538 keylen
= fmt
- keystart
- 1;
2539 if (fmtcnt
< 0 || pcount
> 0) {
2540 PyErr_SetString(PyExc_ValueError
,
2541 "incomplete format key");
2544 key
= PyString_FromStringAndSize(keystart
,
2552 args
= PyObject_GetItem(dict
, key
);
2561 while (--fmtcnt
>= 0) {
2562 switch (c
= *fmt
++) {
2563 case '-': flags
|= F_LJUST
; continue;
2564 case '+': flags
|= F_SIGN
; continue;
2565 case ' ': flags
|= F_BLANK
; continue;
2566 case '#': flags
|= F_ALT
; continue;
2567 case '0': flags
|= F_ZERO
; continue;
2572 v
= getnextarg(args
, arglen
, &argidx
);
2575 if (!PyInt_Check(v
)) {
2576 PyErr_SetString(PyExc_TypeError
,
2580 width
= PyInt_AsLong(v
);
2588 else if (c
>= 0 && isdigit(c
)) {
2590 while (--fmtcnt
>= 0) {
2591 c
= Py_CHARMASK(*fmt
++);
2594 if ((width
*10) / 10 != width
) {
2600 width
= width
*10 + (c
- '0');
2608 v
= getnextarg(args
, arglen
, &argidx
);
2611 if (!PyInt_Check(v
)) {
2617 prec
= PyInt_AsLong(v
);
2623 else if (c
>= 0 && isdigit(c
)) {
2625 while (--fmtcnt
>= 0) {
2626 c
= Py_CHARMASK(*fmt
++);
2629 if ((prec
*10) / 10 != prec
) {
2635 prec
= prec
*10 + (c
- '0');
2640 if (c
== 'h' || c
== 'l' || c
== 'L') {
2647 PyErr_SetString(PyExc_ValueError
,
2648 "incomplete format");
2652 v
= getnextarg(args
, arglen
, &argidx
);
2665 if (PyUnicode_Check(v
)) {
2670 temp
= PyObject_Str(v
);
2672 temp
= PyObject_Repr(v
);
2675 if (!PyString_Check(temp
)) {
2676 PyErr_SetString(PyExc_TypeError
,
2677 "%s argument has non-string str()");
2680 pbuf
= PyString_AsString(temp
);
2681 len
= PyString_Size(temp
);
2682 if (prec
>= 0 && len
> prec
)
2694 len
= formatint(pbuf
, sizeof(formatbuf
), flags
, prec
, c
, v
);
2700 if ((flags
&F_ALT
) &&
2701 (c
== 'x' || c
== 'X') &&
2702 pbuf
[0] == '0' && pbuf
[1] == c
) {
2719 len
= formatfloat(pbuf
, sizeof(formatbuf
), flags
, prec
, c
, v
);
2728 len
= formatchar(pbuf
, sizeof(formatbuf
), v
);
2733 PyErr_Format(PyExc_ValueError
,
2734 "unsupported format character '%c' (0x%x)",
2739 if (*pbuf
== '-' || *pbuf
== '+') {
2743 else if (flags
& F_SIGN
)
2745 else if (flags
& F_BLANK
)
2752 if (rescnt
< width
+ (sign
!= '\0')) {
2754 rescnt
= width
+ fmtcnt
+ 100;
2756 if (_PyString_Resize(&result
, reslen
) < 0)
2758 res
= PyString_AsString(result
)
2768 if (width
> len
&& !(flags
&F_LJUST
)) {
2772 } while (--width
> len
);
2774 if (sign
&& fill
== ' ')
2776 memcpy(res
, pbuf
, len
);
2779 while (--width
>= len
) {
2783 if (dict
&& (argidx
< arglen
) && c
!= '%') {
2784 PyErr_SetString(PyExc_TypeError
,
2785 "not all arguments converted");
2791 if (argidx
< arglen
&& !dict
) {
2792 PyErr_SetString(PyExc_TypeError
,
2793 "not all arguments converted");
2799 _PyString_Resize(&result
, reslen
- rescnt
);
2807 /* Fiddle args right (remove the first argidx-1 arguments) */
2809 if (PyTuple_Check(orig_args
) && argidx
> 0) {
2811 int n
= PyTuple_GET_SIZE(orig_args
) - argidx
;
2816 PyObject
*w
= PyTuple_GET_ITEM(orig_args
, n
+ argidx
);
2818 PyTuple_SET_ITEM(v
, n
, w
);
2822 Py_INCREF(orig_args
);
2825 /* Paste rest of format string to what we have of the result
2826 string; we reuse result for this */
2827 rescnt
= res
- PyString_AS_STRING(result
);
2828 fmtcnt
= PyString_GET_SIZE(format
) - \
2829 (fmt
- PyString_AS_STRING(format
));
2830 if (_PyString_Resize(&result
, rescnt
+ fmtcnt
)) {
2834 memcpy(PyString_AS_STRING(result
) + rescnt
, fmt
, fmtcnt
);
2836 /* Let Unicode do its magic */
2837 result
= PyUnicode_Format(format
, args
);
2851 #ifdef INTERN_STRINGS
2853 static PyObject
*interned
;
2856 PyString_InternInPlace(PyObject
**p
)
2858 register PyStringObject
*s
= (PyStringObject
*)(*p
);
2860 if (s
== NULL
|| !PyString_Check(s
))
2861 Py_FatalError("PyString_InternInPlace: strings only please!");
2862 if ((t
= s
->ob_sinterned
) != NULL
) {
2863 if (t
== (PyObject
*)s
)
2870 if (interned
== NULL
) {
2871 interned
= PyDict_New();
2872 if (interned
== NULL
)
2875 if ((t
= PyDict_GetItem(interned
, (PyObject
*)s
)) != NULL
) {
2877 *p
= s
->ob_sinterned
= t
;
2882 if (PyDict_SetItem(interned
, t
, t
) == 0) {
2883 s
->ob_sinterned
= t
;
2891 PyString_InternFromString(const char *cp
)
2893 PyObject
*s
= PyString_FromString(cp
);
2896 PyString_InternInPlace(&s
);
2906 for (i
= 0; i
< UCHAR_MAX
+ 1; i
++) {
2907 Py_XDECREF(characters
[i
]);
2908 characters
[i
] = NULL
;
2910 #ifndef DONT_SHARE_SHORT_STRINGS
2911 Py_XDECREF(nullstring
);
2914 #ifdef INTERN_STRINGS
2917 PyObject
*key
, *value
;
2921 while (PyDict_Next(interned
, &pos
, &key
, &value
)) {
2922 if (key
->ob_refcnt
== 2 && key
== value
) {
2923 PyDict_DelItem(interned
, key
);