2 /* String object implementation */
9 int null_strings
, one_strings
;
12 #if !defined(HAVE_LIMITS_H) && !defined(UCHAR_MAX)
16 static PyStringObject
*characters
[UCHAR_MAX
+ 1];
17 #ifndef DONT_SHARE_SHORT_STRINGS
18 static PyStringObject
*nullstring
;
22 Newsizedstringobject() and newstringobject() try in certain cases
23 to share string objects. When the size of the string is zero,
24 these routines always return a pointer to the same string object;
25 when the size is one, they return a pointer to an already existing
26 object if the contents of the string is known. For
27 newstringobject() this is always the case, for
28 newsizedstringobject() this is the case when the first argument in
30 A common practice to allocate a string and then fill it in or
31 change it must be done carefully. It is only allowed to change the
32 contents of the string if the obect was gotten from
33 newsizedstringobject() with a NULL first argument, because in the
34 future these routines may try to do even more sharing of objects.
37 PyString_FromStringAndSize(const char *str
, int size
)
39 register PyStringObject
*op
;
40 #ifndef DONT_SHARE_SHORT_STRINGS
41 if (size
== 0 && (op
= nullstring
) != NULL
) {
46 return (PyObject
*)op
;
48 if (size
== 1 && str
!= NULL
&&
49 (op
= characters
[*str
& UCHAR_MAX
]) != NULL
)
55 return (PyObject
*)op
;
57 #endif /* DONT_SHARE_SHORT_STRINGS */
59 /* PyObject_NewVar is inlined */
60 op
= (PyStringObject
*)
61 PyObject_MALLOC(sizeof(PyStringObject
) + size
* sizeof(char));
63 return PyErr_NoMemory();
64 PyObject_INIT_VAR(op
, &PyString_Type
, size
);
69 op
->ob_sinterned
= NULL
;
72 memcpy(op
->ob_sval
, str
, size
);
73 op
->ob_sval
[size
] = '\0';
74 #ifndef DONT_SHARE_SHORT_STRINGS
76 PyObject
*t
= (PyObject
*)op
;
77 PyString_InternInPlace(&t
);
78 op
= (PyStringObject
*)t
;
81 } else if (size
== 1 && str
!= NULL
) {
82 PyObject
*t
= (PyObject
*)op
;
83 PyString_InternInPlace(&t
);
84 op
= (PyStringObject
*)t
;
85 characters
[*str
& UCHAR_MAX
] = op
;
89 return (PyObject
*) op
;
93 PyString_FromString(const char *str
)
95 register size_t size
= strlen(str
);
96 register PyStringObject
*op
;
98 PyErr_SetString(PyExc_OverflowError
,
99 "string is too long for a Python string");
102 #ifndef DONT_SHARE_SHORT_STRINGS
103 if (size
== 0 && (op
= nullstring
) != NULL
) {
108 return (PyObject
*)op
;
110 if (size
== 1 && (op
= characters
[*str
& UCHAR_MAX
]) != NULL
) {
115 return (PyObject
*)op
;
117 #endif /* DONT_SHARE_SHORT_STRINGS */
119 /* PyObject_NewVar is inlined */
120 op
= (PyStringObject
*)
121 PyObject_MALLOC(sizeof(PyStringObject
) + size
* sizeof(char));
123 return PyErr_NoMemory();
124 PyObject_INIT_VAR(op
, &PyString_Type
, size
);
128 #ifdef INTERN_STRINGS
129 op
->ob_sinterned
= NULL
;
131 strcpy(op
->ob_sval
, str
);
132 #ifndef DONT_SHARE_SHORT_STRINGS
134 PyObject
*t
= (PyObject
*)op
;
135 PyString_InternInPlace(&t
);
136 op
= (PyStringObject
*)t
;
139 } else if (size
== 1) {
140 PyObject
*t
= (PyObject
*)op
;
141 PyString_InternInPlace(&t
);
142 op
= (PyStringObject
*)t
;
143 characters
[*str
& UCHAR_MAX
] = op
;
147 return (PyObject
*) op
;
151 PyString_FromFormatV(const char *format
, va_list vargs
)
153 va_list count
= vargs
;
159 /* step 1: figure out how large a buffer we need */
160 for (f
= format
; *f
; f
++) {
163 while (*++f
&& *f
!= '%' && !isalpha(Py_CHARMASK(*f
)))
166 /* skip the 'l' in %ld, since it doesn't change the
167 width. although only %d is supported (see
168 "expand" section below), others can be easily
170 if (*f
== 'l' && *(f
+1) == 'd')
175 (void)va_arg(count
, int);
176 /* fall through... */
180 case 'd': case 'i': case 'x':
181 (void) va_arg(count
, int);
182 /* 20 bytes should be enough to hold a 64-bit
187 s
= va_arg(count
, char*);
191 (void) va_arg(count
, int);
192 /* maximum 64-bit pointer representation:
194 * so 19 characters is enough.
199 /* if we stumble upon an unknown
200 formatting code, copy the rest of
201 the format string to the output
202 string. (we cannot just skip the
203 code, since there's no way to know
204 what's in the argument list) */
212 /* step 2: fill the buffer */
213 string
= PyString_FromStringAndSize(NULL
, n
);
217 s
= PyString_AsString(string
);
219 for (f
= format
; *f
; f
++) {
223 /* parse the width.precision part (we're only
224 interested in the precision value, if any) */
226 while (isdigit(Py_CHARMASK(*f
)))
227 n
= (n
*10) + *f
++ - '0';
231 while (isdigit(Py_CHARMASK(*f
)))
232 n
= (n
*10) + *f
++ - '0';
234 while (*f
&& *f
!= '%' && !isalpha(Py_CHARMASK(*f
)))
236 /* handle the long flag, but only for %ld. others
237 can be added when necessary. */
238 if (*f
== 'l' && *(f
+1) == 'd') {
245 *s
++ = va_arg(vargs
, int);
249 sprintf(s
, "%ld", va_arg(vargs
, long));
251 sprintf(s
, "%d", va_arg(vargs
, int));
255 sprintf(s
, "%i", va_arg(vargs
, int));
259 sprintf(s
, "%x", va_arg(vargs
, int));
263 p
= va_arg(vargs
, char*);
271 sprintf(s
, "%p", va_arg(vargs
, void*));
272 /* %p is ill-defined: ensure leading 0x. */
275 else if (s
[1] != 'x') {
276 memmove(s
+2, s
, strlen(s
)+1);
295 _PyString_Resize(&string
, s
- PyString_AS_STRING(string
));
300 PyString_FromFormat(const char *format
, ...)
305 #ifdef HAVE_STDARG_PROTOTYPES
306 va_start(vargs
, format
);
310 ret
= PyString_FromFormatV(format
, vargs
);
316 PyObject
*PyString_Decode(const char *s
,
318 const char *encoding
,
323 str
= PyString_FromStringAndSize(s
, size
);
326 v
= PyString_AsDecodedString(str
, encoding
, errors
);
331 PyObject
*PyString_AsDecodedObject(PyObject
*str
,
332 const char *encoding
,
337 if (!PyString_Check(str
)) {
342 if (encoding
== NULL
) {
343 #ifdef Py_USING_UNICODE
344 encoding
= PyUnicode_GetDefaultEncoding();
346 PyErr_SetString(PyExc_ValueError
, "no encoding specified");
351 /* Decode via the codec registry */
352 v
= PyCodec_Decode(str
, encoding
, errors
);
362 PyObject
*PyString_AsDecodedString(PyObject
*str
,
363 const char *encoding
,
368 v
= PyString_AsDecodedObject(str
, encoding
, errors
);
372 #ifdef Py_USING_UNICODE
373 /* Convert Unicode to a string using the default encoding */
374 if (PyUnicode_Check(v
)) {
376 v
= PyUnicode_AsEncodedString(v
, NULL
, NULL
);
382 if (!PyString_Check(v
)) {
383 PyErr_Format(PyExc_TypeError
,
384 "decoder did not return a string object (type=%.400s)",
385 v
->ob_type
->tp_name
);
396 PyObject
*PyString_Encode(const char *s
,
398 const char *encoding
,
403 str
= PyString_FromStringAndSize(s
, size
);
406 v
= PyString_AsEncodedString(str
, encoding
, errors
);
411 PyObject
*PyString_AsEncodedObject(PyObject
*str
,
412 const char *encoding
,
417 if (!PyString_Check(str
)) {
422 if (encoding
== NULL
) {
423 #ifdef Py_USING_UNICODE
424 encoding
= PyUnicode_GetDefaultEncoding();
426 PyErr_SetString(PyExc_ValueError
, "no encoding specified");
431 /* Encode via the codec registry */
432 v
= PyCodec_Encode(str
, encoding
, errors
);
442 PyObject
*PyString_AsEncodedString(PyObject
*str
,
443 const char *encoding
,
448 v
= PyString_AsEncodedObject(str
, encoding
, errors
);
452 #ifdef Py_USING_UNICODE
453 /* Convert Unicode to a string using the default encoding */
454 if (PyUnicode_Check(v
)) {
456 v
= PyUnicode_AsEncodedString(v
, NULL
, NULL
);
462 if (!PyString_Check(v
)) {
463 PyErr_Format(PyExc_TypeError
,
464 "encoder did not return a string object (type=%.400s)",
465 v
->ob_type
->tp_name
);
477 string_dealloc(PyObject
*op
)
483 string_getsize(register PyObject
*op
)
487 if (PyString_AsStringAndSize(op
, &s
, &len
))
492 static /*const*/ char *
493 string_getbuffer(register PyObject
*op
)
497 if (PyString_AsStringAndSize(op
, &s
, &len
))
503 PyString_Size(register PyObject
*op
)
505 if (!PyString_Check(op
))
506 return string_getsize(op
);
507 return ((PyStringObject
*)op
) -> ob_size
;
511 PyString_AsString(register PyObject
*op
)
513 if (!PyString_Check(op
))
514 return string_getbuffer(op
);
515 return ((PyStringObject
*)op
) -> ob_sval
;
519 PyString_AsStringAndSize(register PyObject
*obj
,
524 PyErr_BadInternalCall();
528 if (!PyString_Check(obj
)) {
529 #ifdef Py_USING_UNICODE
530 if (PyUnicode_Check(obj
)) {
531 obj
= _PyUnicode_AsDefaultEncodedString(obj
, NULL
);
538 PyErr_Format(PyExc_TypeError
,
539 "expected string or Unicode object, "
540 "%.200s found", obj
->ob_type
->tp_name
);
545 *s
= PyString_AS_STRING(obj
);
547 *len
= PyString_GET_SIZE(obj
);
548 else if ((int)strlen(*s
) != PyString_GET_SIZE(obj
)) {
549 PyErr_SetString(PyExc_TypeError
,
550 "expected string without null bytes");
559 string_print(PyStringObject
*op
, FILE *fp
, int flags
)
564 /* XXX Ought to check for interrupts when writing long strings */
565 if (flags
& Py_PRINT_RAW
) {
566 fwrite(op
->ob_sval
, 1, (int) op
->ob_size
, fp
);
570 /* figure out which quote to use; single is preferred */
572 if (strchr(op
->ob_sval
, '\'') && !strchr(op
->ob_sval
, '"'))
576 for (i
= 0; i
< op
->ob_size
; i
++) {
578 if (c
== quote
|| c
== '\\')
579 fprintf(fp
, "\\%c", c
);
586 else if (c
< ' ' || c
>= 0x7f)
587 fprintf(fp
, "\\x%02x", c
& 0xff);
596 string_repr(register PyStringObject
*op
)
598 size_t newsize
= 2 + 4 * op
->ob_size
* sizeof(char);
600 if (newsize
> INT_MAX
) {
601 PyErr_SetString(PyExc_OverflowError
,
602 "string is too large to make repr");
604 v
= PyString_FromStringAndSize((char *)NULL
, newsize
);
614 /* figure out which quote to use; single is preferred */
616 if (strchr(op
->ob_sval
, '\'') && !strchr(op
->ob_sval
, '"'))
619 p
= ((PyStringObject
*)v
)->ob_sval
;
621 for (i
= 0; i
< op
->ob_size
; i
++) {
623 if (c
== quote
|| c
== '\\')
624 *p
++ = '\\', *p
++ = c
;
626 *p
++ = '\\', *p
++ = 't';
628 *p
++ = '\\', *p
++ = 'n';
630 *p
++ = '\\', *p
++ = 'r';
631 else if (c
< ' ' || c
>= 0x7f) {
632 sprintf(p
, "\\x%02x", c
& 0xff);
641 &v
, (int) (p
- ((PyStringObject
*)v
)->ob_sval
));
647 string_str(PyObject
*s
)
654 string_length(PyStringObject
*a
)
660 string_concat(register PyStringObject
*a
, register PyObject
*bb
)
662 register unsigned int size
;
663 register PyStringObject
*op
;
664 if (!PyString_Check(bb
)) {
665 #ifdef Py_USING_UNICODE
666 if (PyUnicode_Check(bb
))
667 return PyUnicode_Concat((PyObject
*)a
, bb
);
669 PyErr_Format(PyExc_TypeError
,
670 "cannot add type \"%.200s\" to string",
671 bb
->ob_type
->tp_name
);
674 #define b ((PyStringObject *)bb)
675 /* Optimize cases with empty left or right operand */
676 if ((a
->ob_size
== 0 || b
->ob_size
== 0) &&
677 PyString_CheckExact(a
) && PyString_CheckExact(b
)) {
678 if (a
->ob_size
== 0) {
683 return (PyObject
*)a
;
685 size
= a
->ob_size
+ b
->ob_size
;
686 /* PyObject_NewVar is inlined */
687 op
= (PyStringObject
*)
688 PyObject_MALLOC(sizeof(PyStringObject
) + size
* sizeof(char));
690 return PyErr_NoMemory();
691 PyObject_INIT_VAR(op
, &PyString_Type
, size
);
695 #ifdef INTERN_STRINGS
696 op
->ob_sinterned
= NULL
;
698 memcpy(op
->ob_sval
, a
->ob_sval
, (int) a
->ob_size
);
699 memcpy(op
->ob_sval
+ a
->ob_size
, b
->ob_sval
, (int) b
->ob_size
);
700 op
->ob_sval
[size
] = '\0';
701 return (PyObject
*) op
;
706 string_repeat(register PyStringObject
*a
, register int n
)
710 register PyStringObject
*op
;
714 /* watch out for overflows: the size can overflow int,
715 * and the # of bytes needed can overflow size_t
717 size
= a
->ob_size
* n
;
718 if (n
&& size
/ n
!= a
->ob_size
) {
719 PyErr_SetString(PyExc_OverflowError
,
720 "repeated string is too long");
723 if (size
== a
->ob_size
&& PyString_CheckExact(a
)) {
725 return (PyObject
*)a
;
727 nbytes
= size
* sizeof(char);
728 if (nbytes
/ sizeof(char) != (size_t)size
||
729 nbytes
+ sizeof(PyStringObject
) <= nbytes
) {
730 PyErr_SetString(PyExc_OverflowError
,
731 "repeated string is too long");
734 op
= (PyStringObject
*)
735 PyObject_MALLOC(sizeof(PyStringObject
) + nbytes
);
737 return PyErr_NoMemory();
738 PyObject_INIT_VAR(op
, &PyString_Type
, size
);
742 #ifdef INTERN_STRINGS
743 op
->ob_sinterned
= NULL
;
745 for (i
= 0; i
< size
; i
+= a
->ob_size
)
746 memcpy(op
->ob_sval
+i
, a
->ob_sval
, (int) a
->ob_size
);
747 op
->ob_sval
[size
] = '\0';
748 return (PyObject
*) op
;
751 /* String slice a[i:j] consists of characters a[i] ... a[j-1] */
754 string_slice(register PyStringObject
*a
, register int i
, register int j
)
755 /* j -- may be negative! */
760 j
= 0; /* Avoid signed/unsigned bug in next line */
763 if (i
== 0 && j
== a
->ob_size
&& PyString_CheckExact(a
)) {
764 /* It's the same as a */
766 return (PyObject
*)a
;
770 return PyString_FromStringAndSize(a
->ob_sval
+ i
, (int) (j
-i
));
774 string_contains(PyObject
*a
, PyObject
*el
)
776 register char *s
, *end
;
778 #ifdef Py_USING_UNICODE
779 if (PyUnicode_Check(el
))
780 return PyUnicode_Contains(a
, el
);
782 if (!PyString_Check(el
) || PyString_Size(el
) != 1) {
783 PyErr_SetString(PyExc_TypeError
,
784 "'in <string>' requires character as left operand");
787 c
= PyString_AsString(el
)[0];
788 s
= PyString_AsString(a
);
789 end
= s
+ PyString_Size(a
);
798 string_item(PyStringObject
*a
, register int i
)
802 if (i
< 0 || i
>= a
->ob_size
) {
803 PyErr_SetString(PyExc_IndexError
, "string index out of range");
806 pchar
= a
->ob_sval
+ i
;
807 v
= (PyObject
*)characters
[*pchar
& UCHAR_MAX
];
809 v
= PyString_FromStringAndSize(pchar
, 1);
820 string_richcompare(PyStringObject
*a
, PyStringObject
*b
, int op
)
827 /* May sure both arguments use string comparison.
828 This implies PyString_Check(a) && PyString_Check(b). */
829 if (a
->ob_type
->tp_richcompare
!= (richcmpfunc
)string_richcompare
||
830 b
->ob_type
->tp_richcompare
!= (richcmpfunc
)string_richcompare
) {
831 result
= Py_NotImplemented
;
836 case Py_EQ
:case Py_LE
:case Py_GE
:
839 case Py_NE
:case Py_LT
:case Py_GT
:
845 /* Supporting Py_NE here as well does not save
846 much time, since Py_NE is rarely used. */
847 if (a
->ob_size
== b
->ob_size
848 && (a
->ob_sval
[0] == b
->ob_sval
[0]
849 && memcmp(a
->ob_sval
, b
->ob_sval
,
857 len_a
= a
->ob_size
; len_b
= b
->ob_size
;
858 min_len
= (len_a
< len_b
) ? len_a
: len_b
;
860 c
= Py_CHARMASK(*a
->ob_sval
) - Py_CHARMASK(*b
->ob_sval
);
862 c
= memcmp(a
->ob_sval
, b
->ob_sval
, min_len
);
866 c
= (len_a
< len_b
) ? -1 : (len_a
> len_b
) ? 1 : 0;
868 case Py_LT
: c
= c
< 0; break;
869 case Py_LE
: c
= c
<= 0; break;
870 case Py_EQ
: assert(0); break; /* unreachable */
871 case Py_NE
: c
= c
!= 0; break;
872 case Py_GT
: c
= c
> 0; break;
873 case Py_GE
: c
= c
>= 0; break;
875 result
= Py_NotImplemented
;
878 result
= c
? Py_True
: Py_False
;
885 _PyString_Eq(PyObject
*o1
, PyObject
*o2
)
887 PyStringObject
*a
, *b
;
888 a
= (PyStringObject
*)o1
;
889 b
= (PyStringObject
*)o2
;
890 return a
->ob_size
== b
->ob_size
891 && *a
->ob_sval
== *b
->ob_sval
892 && memcmp(a
->ob_sval
, b
->ob_sval
, a
->ob_size
) == 0;
896 string_hash(PyStringObject
*a
)
899 register unsigned char *p
;
903 if (a
->ob_shash
!= -1)
905 #ifdef INTERN_STRINGS
906 if (a
->ob_sinterned
!= NULL
)
907 return (a
->ob_shash
=
908 ((PyStringObject
*)(a
->ob_sinterned
))->ob_shash
);
912 p
= (unsigned char *) a
->ob_sval
;
915 x
= (1000003*x
) ^ *p
++;
926 string_buffer_getreadbuf(PyStringObject
*self
, int index
, const void **ptr
)
929 PyErr_SetString(PyExc_SystemError
,
930 "accessing non-existent string segment");
933 *ptr
= (void *)self
->ob_sval
;
934 return self
->ob_size
;
938 string_buffer_getwritebuf(PyStringObject
*self
, int index
, const void **ptr
)
940 PyErr_SetString(PyExc_TypeError
,
941 "Cannot use string as modifiable buffer");
946 string_buffer_getsegcount(PyStringObject
*self
, int *lenp
)
949 *lenp
= self
->ob_size
;
954 string_buffer_getcharbuf(PyStringObject
*self
, int index
, const char **ptr
)
957 PyErr_SetString(PyExc_SystemError
,
958 "accessing non-existent string segment");
961 *ptr
= self
->ob_sval
;
962 return self
->ob_size
;
965 static PySequenceMethods string_as_sequence
= {
966 (inquiry
)string_length
, /*sq_length*/
967 (binaryfunc
)string_concat
, /*sq_concat*/
968 (intargfunc
)string_repeat
, /*sq_repeat*/
969 (intargfunc
)string_item
, /*sq_item*/
970 (intintargfunc
)string_slice
, /*sq_slice*/
973 (objobjproc
)string_contains
/*sq_contains*/
976 static PyBufferProcs string_as_buffer
= {
977 (getreadbufferproc
)string_buffer_getreadbuf
,
978 (getwritebufferproc
)string_buffer_getwritebuf
,
979 (getsegcountproc
)string_buffer_getsegcount
,
980 (getcharbufferproc
)string_buffer_getcharbuf
,
991 split_whitespace(const char *s
, int len
, int maxsplit
)
995 PyObject
*list
= PyList_New(0);
1000 for (i
= j
= 0; i
< len
; ) {
1001 while (i
< len
&& isspace(Py_CHARMASK(s
[i
])))
1004 while (i
< len
&& !isspace(Py_CHARMASK(s
[i
])))
1007 if (maxsplit
-- <= 0)
1009 item
= PyString_FromStringAndSize(s
+j
, (int)(i
-j
));
1012 err
= PyList_Append(list
, item
);
1016 while (i
< len
&& isspace(Py_CHARMASK(s
[i
])))
1022 item
= PyString_FromStringAndSize(s
+j
, (int)(len
- j
));
1025 err
= PyList_Append(list
, item
);
1037 static char split__doc__
[] =
1038 "S.split([sep [,maxsplit]]) -> list of strings\n\
1040 Return a list of the words in the string S, using sep as the\n\
1041 delimiter string. If maxsplit is given, at most maxsplit\n\
1042 splits are done. If sep is not specified, any whitespace string\n\
1046 string_split(PyStringObject
*self
, PyObject
*args
)
1048 int len
= PyString_GET_SIZE(self
), n
, i
, j
, err
;
1050 const char *s
= PyString_AS_STRING(self
), *sub
;
1051 PyObject
*list
, *item
, *subobj
= Py_None
;
1053 if (!PyArg_ParseTuple(args
, "|Oi:split", &subobj
, &maxsplit
))
1057 if (subobj
== Py_None
)
1058 return split_whitespace(s
, len
, maxsplit
);
1059 if (PyString_Check(subobj
)) {
1060 sub
= PyString_AS_STRING(subobj
);
1061 n
= PyString_GET_SIZE(subobj
);
1063 #ifdef Py_USING_UNICODE
1064 else if (PyUnicode_Check(subobj
))
1065 return PyUnicode_Split((PyObject
*)self
, subobj
, maxsplit
);
1067 else if (PyObject_AsCharBuffer(subobj
, &sub
, &n
))
1070 PyErr_SetString(PyExc_ValueError
, "empty separator");
1074 list
= PyList_New(0);
1079 while (i
+n
<= len
) {
1080 if (s
[i
] == sub
[0] && memcmp(s
+i
, sub
, n
) == 0) {
1081 if (maxsplit
-- <= 0)
1083 item
= PyString_FromStringAndSize(s
+j
, (int)(i
-j
));
1086 err
= PyList_Append(list
, item
);
1095 item
= PyString_FromStringAndSize(s
+j
, (int)(len
-j
));
1098 err
= PyList_Append(list
, item
);
1111 static char join__doc__
[] =
1112 "S.join(sequence) -> string\n\
1114 Return a string which is the concatenation of the strings in the\n\
1115 sequence. The separator between elements is S.";
1118 string_join(PyStringObject
*self
, PyObject
*orig
)
1120 char *sep
= PyString_AS_STRING(self
);
1121 const int seplen
= PyString_GET_SIZE(self
);
1122 PyObject
*res
= NULL
;
1127 PyObject
*seq
, *item
;
1129 seq
= PySequence_Fast(orig
, "");
1131 if (PyErr_ExceptionMatches(PyExc_TypeError
))
1132 PyErr_Format(PyExc_TypeError
,
1133 "sequence expected, %.80s found",
1134 orig
->ob_type
->tp_name
);
1138 seqlen
= PySequence_Size(seq
);
1141 return PyString_FromString("");
1144 item
= PySequence_Fast_GET_ITEM(seq
, 0);
1145 if (!PyString_Check(item
) && !PyUnicode_Check(item
)) {
1146 PyErr_Format(PyExc_TypeError
,
1147 "sequence item 0: expected string,"
1149 item
->ob_type
->tp_name
);
1158 /* There are at least two things to join. Do a pre-pass to figure out
1159 * the total amount of space we'll need (sz), see whether any argument
1160 * is absurd, and defer to the Unicode join if appropriate.
1162 for (i
= 0; i
< seqlen
; i
++) {
1163 const size_t old_sz
= sz
;
1164 item
= PySequence_Fast_GET_ITEM(seq
, i
);
1165 if (!PyString_Check(item
)){
1166 #ifdef Py_USING_UNICODE
1167 if (PyUnicode_Check(item
)) {
1168 /* Defer to Unicode join.
1169 * CAUTION: There's no gurantee that the
1170 * original sequence can be iterated over
1171 * again, so we must pass seq here.
1174 result
= PyUnicode_Join((PyObject
*)self
, seq
);
1179 PyErr_Format(PyExc_TypeError
,
1180 "sequence item %i: expected string,"
1182 i
, item
->ob_type
->tp_name
);
1186 sz
+= PyString_GET_SIZE(item
);
1189 if (sz
< old_sz
|| sz
> INT_MAX
) {
1190 PyErr_SetString(PyExc_OverflowError
,
1191 "join() is too long for a Python string");
1197 /* Allocate result space. */
1198 res
= PyString_FromStringAndSize((char*)NULL
, (int)sz
);
1204 /* Catenate everything. */
1205 p
= PyString_AS_STRING(res
);
1206 for (i
= 0; i
< seqlen
; ++i
) {
1208 item
= PySequence_Fast_GET_ITEM(seq
, i
);
1209 n
= PyString_GET_SIZE(item
);
1210 memcpy(p
, PyString_AS_STRING(item
), n
);
1212 if (i
< seqlen
- 1) {
1213 memcpy(p
, sep
, seplen
);
1223 _PyString_Join(PyObject
*sep
, PyObject
*x
)
1225 assert(sep
!= NULL
&& PyString_Check(sep
));
1227 return string_join((PyStringObject
*)sep
, x
);
1231 string_find_internal(PyStringObject
*self
, PyObject
*args
, int dir
)
1233 const char *s
= PyString_AS_STRING(self
), *sub
;
1234 int len
= PyString_GET_SIZE(self
);
1235 int n
, i
= 0, last
= INT_MAX
;
1238 if (!PyArg_ParseTuple(args
, "O|O&O&:find/rfind/index/rindex",
1239 &subobj
, _PyEval_SliceIndex
, &i
, _PyEval_SliceIndex
, &last
))
1241 if (PyString_Check(subobj
)) {
1242 sub
= PyString_AS_STRING(subobj
);
1243 n
= PyString_GET_SIZE(subobj
);
1245 #ifdef Py_USING_UNICODE
1246 else if (PyUnicode_Check(subobj
))
1247 return PyUnicode_Find((PyObject
*)self
, subobj
, i
, last
, 1);
1249 else if (PyObject_AsCharBuffer(subobj
, &sub
, &n
))
1264 if (n
== 0 && i
<= last
)
1267 for (; i
<= last
; ++i
)
1268 if (s
[i
] == sub
[0] && memcmp(&s
[i
], sub
, n
) == 0)
1274 if (n
== 0 && i
<= last
)
1276 for (j
= last
-n
; j
>= i
; --j
)
1277 if (s
[j
] == sub
[0] && memcmp(&s
[j
], sub
, n
) == 0)
1285 static char find__doc__
[] =
1286 "S.find(sub [,start [,end]]) -> int\n\
1288 Return the lowest index in S where substring sub is found,\n\
1289 such that sub is contained within s[start,end]. Optional\n\
1290 arguments start and end are interpreted as in slice notation.\n\
1292 Return -1 on failure.";
1295 string_find(PyStringObject
*self
, PyObject
*args
)
1297 long result
= string_find_internal(self
, args
, +1);
1300 return PyInt_FromLong(result
);
1304 static char index__doc__
[] =
1305 "S.index(sub [,start [,end]]) -> int\n\
1307 Like S.find() but raise ValueError when the substring is not found.";
1310 string_index(PyStringObject
*self
, PyObject
*args
)
1312 long result
= string_find_internal(self
, args
, +1);
1316 PyErr_SetString(PyExc_ValueError
,
1317 "substring not found in string.index");
1320 return PyInt_FromLong(result
);
1324 static char rfind__doc__
[] =
1325 "S.rfind(sub [,start [,end]]) -> int\n\
1327 Return the highest index in S where substring sub is found,\n\
1328 such that sub is contained within s[start,end]. Optional\n\
1329 arguments start and end are interpreted as in slice notation.\n\
1331 Return -1 on failure.";
1334 string_rfind(PyStringObject
*self
, PyObject
*args
)
1336 long result
= string_find_internal(self
, args
, -1);
1339 return PyInt_FromLong(result
);
1343 static char rindex__doc__
[] =
1344 "S.rindex(sub [,start [,end]]) -> int\n\
1346 Like S.rfind() but raise ValueError when the substring is not found.";
1349 string_rindex(PyStringObject
*self
, PyObject
*args
)
1351 long result
= string_find_internal(self
, args
, -1);
1355 PyErr_SetString(PyExc_ValueError
,
1356 "substring not found in string.rindex");
1359 return PyInt_FromLong(result
);
1364 do_strip(PyStringObject
*self
, int striptype
)
1366 char *s
= PyString_AS_STRING(self
);
1367 int len
= PyString_GET_SIZE(self
), i
, j
;
1370 if (striptype
!= RIGHTSTRIP
) {
1371 while (i
< len
&& isspace(Py_CHARMASK(s
[i
]))) {
1377 if (striptype
!= LEFTSTRIP
) {
1380 } while (j
>= i
&& isspace(Py_CHARMASK(s
[j
])));
1384 if (i
== 0 && j
== len
&& PyString_CheckExact(self
)) {
1386 return (PyObject
*)self
;
1389 return PyString_FromStringAndSize(s
+i
, j
-i
);
1393 static char strip__doc__
[] =
1394 "S.strip() -> string\n\
1396 Return a copy of the string S with leading and trailing\n\
1397 whitespace removed.";
1400 string_strip(PyStringObject
*self
)
1402 return do_strip(self
, BOTHSTRIP
);
1406 static char lstrip__doc__
[] =
1407 "S.lstrip() -> string\n\
1409 Return a copy of the string S with leading whitespace removed.";
1412 string_lstrip(PyStringObject
*self
)
1414 return do_strip(self
, LEFTSTRIP
);
1418 static char rstrip__doc__
[] =
1419 "S.rstrip() -> string\n\
1421 Return a copy of the string S with trailing whitespace removed.";
1424 string_rstrip(PyStringObject
*self
)
1426 return do_strip(self
, RIGHTSTRIP
);
1430 static char lower__doc__
[] =
1431 "S.lower() -> string\n\
1433 Return a copy of the string S converted to lowercase.";
1436 string_lower(PyStringObject
*self
)
1438 char *s
= PyString_AS_STRING(self
), *s_new
;
1439 int i
, n
= PyString_GET_SIZE(self
);
1442 new = PyString_FromStringAndSize(NULL
, n
);
1445 s_new
= PyString_AsString(new);
1446 for (i
= 0; i
< n
; i
++) {
1447 int c
= Py_CHARMASK(*s
++);
1449 *s_new
= tolower(c
);
1458 static char upper__doc__
[] =
1459 "S.upper() -> string\n\
1461 Return a copy of the string S converted to uppercase.";
1464 string_upper(PyStringObject
*self
)
1466 char *s
= PyString_AS_STRING(self
), *s_new
;
1467 int i
, n
= PyString_GET_SIZE(self
);
1470 new = PyString_FromStringAndSize(NULL
, n
);
1473 s_new
= PyString_AsString(new);
1474 for (i
= 0; i
< n
; i
++) {
1475 int c
= Py_CHARMASK(*s
++);
1477 *s_new
= toupper(c
);
1486 static char title__doc__
[] =
1487 "S.title() -> string\n\
1489 Return a titlecased version of S, i.e. words start with uppercase\n\
1490 characters, all remaining cased characters have lowercase.";
1493 string_title(PyStringObject
*self
)
1495 char *s
= PyString_AS_STRING(self
), *s_new
;
1496 int i
, n
= PyString_GET_SIZE(self
);
1497 int previous_is_cased
= 0;
1500 new = PyString_FromStringAndSize(NULL
, n
);
1503 s_new
= PyString_AsString(new);
1504 for (i
= 0; i
< n
; i
++) {
1505 int c
= Py_CHARMASK(*s
++);
1507 if (!previous_is_cased
)
1509 previous_is_cased
= 1;
1510 } else if (isupper(c
)) {
1511 if (previous_is_cased
)
1513 previous_is_cased
= 1;
1515 previous_is_cased
= 0;
1521 static char capitalize__doc__
[] =
1522 "S.capitalize() -> string\n\
1524 Return a copy of the string S with only its first character\n\
1528 string_capitalize(PyStringObject
*self
)
1530 char *s
= PyString_AS_STRING(self
), *s_new
;
1531 int i
, n
= PyString_GET_SIZE(self
);
1534 new = PyString_FromStringAndSize(NULL
, n
);
1537 s_new
= PyString_AsString(new);
1539 int c
= Py_CHARMASK(*s
++);
1541 *s_new
= toupper(c
);
1546 for (i
= 1; i
< n
; i
++) {
1547 int c
= Py_CHARMASK(*s
++);
1549 *s_new
= tolower(c
);
1558 static char count__doc__
[] =
1559 "S.count(sub[, start[, end]]) -> int\n\
1561 Return the number of occurrences of substring sub in string\n\
1562 S[start:end]. Optional arguments start and end are\n\
1563 interpreted as in slice notation.";
1566 string_count(PyStringObject
*self
, PyObject
*args
)
1568 const char *s
= PyString_AS_STRING(self
), *sub
;
1569 int len
= PyString_GET_SIZE(self
), n
;
1570 int i
= 0, last
= INT_MAX
;
1574 if (!PyArg_ParseTuple(args
, "O|O&O&:count", &subobj
,
1575 _PyEval_SliceIndex
, &i
, _PyEval_SliceIndex
, &last
))
1578 if (PyString_Check(subobj
)) {
1579 sub
= PyString_AS_STRING(subobj
);
1580 n
= PyString_GET_SIZE(subobj
);
1582 #ifdef Py_USING_UNICODE
1583 else if (PyUnicode_Check(subobj
)) {
1585 count
= PyUnicode_Count((PyObject
*)self
, subobj
, i
, last
);
1589 return PyInt_FromLong((long) count
);
1592 else if (PyObject_AsCharBuffer(subobj
, &sub
, &n
))
1607 return PyInt_FromLong((long) (m
-i
));
1611 if (!memcmp(s
+i
, sub
, n
)) {
1618 return PyInt_FromLong((long) r
);
1622 static char swapcase__doc__
[] =
1623 "S.swapcase() -> string\n\
1625 Return a copy of the string S with uppercase characters\n\
1626 converted to lowercase and vice versa.";
1629 string_swapcase(PyStringObject
*self
)
1631 char *s
= PyString_AS_STRING(self
), *s_new
;
1632 int i
, n
= PyString_GET_SIZE(self
);
1635 new = PyString_FromStringAndSize(NULL
, n
);
1638 s_new
= PyString_AsString(new);
1639 for (i
= 0; i
< n
; i
++) {
1640 int c
= Py_CHARMASK(*s
++);
1642 *s_new
= toupper(c
);
1644 else if (isupper(c
)) {
1645 *s_new
= tolower(c
);
1655 static char translate__doc__
[] =
1656 "S.translate(table [,deletechars]) -> string\n\
1658 Return a copy of the string S, where all characters occurring\n\
1659 in the optional argument deletechars are removed, and the\n\
1660 remaining characters have been mapped through the given\n\
1661 translation table, which must be a string of length 256.";
1664 string_translate(PyStringObject
*self
, PyObject
*args
)
1666 register char *input
, *output
;
1667 register const char *table
;
1668 register int i
, c
, changed
= 0;
1669 PyObject
*input_obj
= (PyObject
*)self
;
1670 const char *table1
, *output_start
, *del_table
=NULL
;
1671 int inlen
, tablen
, dellen
= 0;
1673 int trans_table
[256];
1674 PyObject
*tableobj
, *delobj
= NULL
;
1676 if (!PyArg_ParseTuple(args
, "O|O:translate",
1677 &tableobj
, &delobj
))
1680 if (PyString_Check(tableobj
)) {
1681 table1
= PyString_AS_STRING(tableobj
);
1682 tablen
= PyString_GET_SIZE(tableobj
);
1684 #ifdef Py_USING_UNICODE
1685 else if (PyUnicode_Check(tableobj
)) {
1686 /* Unicode .translate() does not support the deletechars
1687 parameter; instead a mapping to None will cause characters
1689 if (delobj
!= NULL
) {
1690 PyErr_SetString(PyExc_TypeError
,
1691 "deletions are implemented differently for unicode");
1694 return PyUnicode_Translate((PyObject
*)self
, tableobj
, NULL
);
1697 else if (PyObject_AsCharBuffer(tableobj
, &table1
, &tablen
))
1700 if (delobj
!= NULL
) {
1701 if (PyString_Check(delobj
)) {
1702 del_table
= PyString_AS_STRING(delobj
);
1703 dellen
= PyString_GET_SIZE(delobj
);
1705 #ifdef Py_USING_UNICODE
1706 else if (PyUnicode_Check(delobj
)) {
1707 PyErr_SetString(PyExc_TypeError
,
1708 "deletions are implemented differently for unicode");
1712 else if (PyObject_AsCharBuffer(delobj
, &del_table
, &dellen
))
1715 if (tablen
!= 256) {
1716 PyErr_SetString(PyExc_ValueError
,
1717 "translation table must be 256 characters long");
1727 inlen
= PyString_Size(input_obj
);
1728 result
= PyString_FromStringAndSize((char *)NULL
, inlen
);
1731 output_start
= output
= PyString_AsString(result
);
1732 input
= PyString_AsString(input_obj
);
1735 /* If no deletions are required, use faster code */
1736 for (i
= inlen
; --i
>= 0; ) {
1737 c
= Py_CHARMASK(*input
++);
1738 if (Py_CHARMASK((*output
++ = table
[c
])) != c
)
1741 if (changed
|| !PyString_CheckExact(input_obj
))
1744 Py_INCREF(input_obj
);
1748 for (i
= 0; i
< 256; i
++)
1749 trans_table
[i
] = Py_CHARMASK(table
[i
]);
1751 for (i
= 0; i
< dellen
; i
++)
1752 trans_table
[(int) Py_CHARMASK(del_table
[i
])] = -1;
1754 for (i
= inlen
; --i
>= 0; ) {
1755 c
= Py_CHARMASK(*input
++);
1756 if (trans_table
[c
] != -1)
1757 if (Py_CHARMASK(*output
++ = (char)trans_table
[c
]) == c
)
1761 if (!changed
&& PyString_CheckExact(input_obj
)) {
1763 Py_INCREF(input_obj
);
1766 /* Fix the size of the resulting string */
1767 if (inlen
> 0 &&_PyString_Resize(&result
, output
-output_start
))
1773 /* What follows is used for implementing replace(). Perry Stoll. */
1778 strstr replacement for arbitrary blocks of memory.
1780 Locates the first occurrence in the memory pointed to by MEM of the
1781 contents of memory pointed to by PAT. Returns the index into MEM if
1782 found, or -1 if not found. If len of PAT is greater than length of
1783 MEM, the function returns -1.
1786 mymemfind(const char *mem
, int len
, const char *pat
, int pat_len
)
1790 /* pattern can not occur in the last pat_len-1 chars */
1793 for (ii
= 0; ii
<= len
; ii
++) {
1794 if (mem
[ii
] == pat
[0] && memcmp(&mem
[ii
], pat
, pat_len
) == 0) {
1804 Return the number of distinct times PAT is found in MEM.
1805 meaning mem=1111 and pat==11 returns 2.
1806 mem=11111 and pat==11 also return 2.
1809 mymemcnt(const char *mem
, int len
, const char *pat
, int pat_len
)
1811 register int offset
= 0;
1815 offset
= mymemfind(mem
, len
, pat
, pat_len
);
1818 mem
+= offset
+ pat_len
;
1819 len
-= offset
+ pat_len
;
1828 Return a string in which all occurrences of PAT in memory STR are
1831 If length of PAT is less than length of STR or there are no occurrences
1832 of PAT in STR, then the original string is returned. Otherwise, a new
1833 string is allocated here and returned.
1835 on return, out_len is:
1836 the length of output string, or
1837 -1 if the input string is returned, or
1838 unchanged if an error occurs (no memory).
1841 the new string allocated locally, or
1842 NULL if an error occurred.
1845 mymemreplace(const char *str
, int len
, /* input string */
1846 const char *pat
, int pat_len
, /* pattern string to find */
1847 const char *sub
, int sub_len
, /* substitution string */
1848 int count
, /* number of replacements */
1853 int nfound
, offset
, new_len
;
1855 if (len
== 0 || pat_len
> len
)
1858 /* find length of output string */
1859 nfound
= mymemcnt(str
, len
, pat
, pat_len
);
1862 else if (nfound
> count
)
1867 new_len
= len
+ nfound
*(sub_len
- pat_len
);
1869 /* Have to allocate something for the caller to free(). */
1870 out_s
= (char *)PyMem_MALLOC(1);
1876 assert(new_len
> 0);
1877 new_s
= (char *)PyMem_MALLOC(new_len
);
1882 for (; count
> 0 && len
> 0; --count
) {
1883 /* find index of next instance of pattern */
1884 offset
= mymemfind(str
, len
, pat
, pat_len
);
1888 /* copy non matching part of input string */
1889 memcpy(new_s
, str
, offset
);
1890 str
+= offset
+ pat_len
;
1891 len
-= offset
+ pat_len
;
1893 /* copy substitute into the output string */
1895 memcpy(new_s
, sub
, sub_len
);
1898 /* copy any remaining values into output string */
1900 memcpy(new_s
, str
, len
);
1907 return (char *)str
; /* cast away const */
1911 static char replace__doc__
[] =
1912 "S.replace (old, new[, maxsplit]) -> string\n\
1914 Return a copy of string S with all occurrences of substring\n\
1915 old replaced by new. If the optional argument maxsplit is\n\
1916 given, only the first maxsplit occurrences are replaced.";
1919 string_replace(PyStringObject
*self
, PyObject
*args
)
1921 const char *str
= PyString_AS_STRING(self
), *sub
, *repl
;
1923 const int len
= PyString_GET_SIZE(self
);
1924 int sub_len
, repl_len
, out_len
;
1927 PyObject
*subobj
, *replobj
;
1929 if (!PyArg_ParseTuple(args
, "OO|i:replace",
1930 &subobj
, &replobj
, &count
))
1933 if (PyString_Check(subobj
)) {
1934 sub
= PyString_AS_STRING(subobj
);
1935 sub_len
= PyString_GET_SIZE(subobj
);
1937 #ifdef Py_USING_UNICODE
1938 else if (PyUnicode_Check(subobj
))
1939 return PyUnicode_Replace((PyObject
*)self
,
1940 subobj
, replobj
, count
);
1942 else if (PyObject_AsCharBuffer(subobj
, &sub
, &sub_len
))
1945 if (PyString_Check(replobj
)) {
1946 repl
= PyString_AS_STRING(replobj
);
1947 repl_len
= PyString_GET_SIZE(replobj
);
1949 #ifdef Py_USING_UNICODE
1950 else if (PyUnicode_Check(replobj
))
1951 return PyUnicode_Replace((PyObject
*)self
,
1952 subobj
, replobj
, count
);
1954 else if (PyObject_AsCharBuffer(replobj
, &repl
, &repl_len
))
1958 PyErr_SetString(PyExc_ValueError
, "empty pattern string");
1961 new_s
= mymemreplace(str
,len
,sub
,sub_len
,repl
,repl_len
,count
,&out_len
);
1962 if (new_s
== NULL
) {
1966 if (out_len
== -1) {
1967 if (PyString_CheckExact(self
)) {
1968 /* we're returning another reference to self */
1969 new = (PyObject
*)self
;
1973 new = PyString_FromStringAndSize(str
, len
);
1979 new = PyString_FromStringAndSize(new_s
, out_len
);
1986 static char startswith__doc__
[] =
1987 "S.startswith(prefix[, start[, end]]) -> int\n\
1989 Return 1 if S starts with the specified prefix, otherwise return 0. With\n\
1990 optional start, test S beginning at that position. With optional end, stop\n\
1991 comparing S at that position.";
1994 string_startswith(PyStringObject
*self
, PyObject
*args
)
1996 const char* str
= PyString_AS_STRING(self
);
1997 int len
= PyString_GET_SIZE(self
);
2004 if (!PyArg_ParseTuple(args
, "O|O&O&:startswith", &subobj
,
2005 _PyEval_SliceIndex
, &start
, _PyEval_SliceIndex
, &end
))
2007 if (PyString_Check(subobj
)) {
2008 prefix
= PyString_AS_STRING(subobj
);
2009 plen
= PyString_GET_SIZE(subobj
);
2011 #ifdef Py_USING_UNICODE
2012 else if (PyUnicode_Check(subobj
)) {
2014 rc
= PyUnicode_Tailmatch((PyObject
*)self
,
2015 subobj
, start
, end
, -1);
2019 return PyInt_FromLong((long) rc
);
2022 else if (PyObject_AsCharBuffer(subobj
, &prefix
, &plen
))
2025 /* adopt Java semantics for index out of range. it is legal for
2026 * offset to be == plen, but this only returns true if prefix is
2029 if (start
< 0 || start
+plen
> len
)
2030 return PyInt_FromLong(0);
2032 if (!memcmp(str
+start
, prefix
, plen
)) {
2033 /* did the match end after the specified end? */
2035 return PyInt_FromLong(1);
2036 else if (end
- start
< plen
)
2037 return PyInt_FromLong(0);
2039 return PyInt_FromLong(1);
2041 else return PyInt_FromLong(0);
2045 static char endswith__doc__
[] =
2046 "S.endswith(suffix[, start[, end]]) -> int\n\
2048 Return 1 if S ends with the specified suffix, otherwise return 0. With\n\
2049 optional start, test S beginning at that position. With optional end, stop\n\
2050 comparing S at that position.";
2053 string_endswith(PyStringObject
*self
, PyObject
*args
)
2055 const char* str
= PyString_AS_STRING(self
);
2056 int len
= PyString_GET_SIZE(self
);
2064 if (!PyArg_ParseTuple(args
, "O|O&O&:endswith", &subobj
,
2065 _PyEval_SliceIndex
, &start
, _PyEval_SliceIndex
, &end
))
2067 if (PyString_Check(subobj
)) {
2068 suffix
= PyString_AS_STRING(subobj
);
2069 slen
= PyString_GET_SIZE(subobj
);
2071 #ifdef Py_USING_UNICODE
2072 else if (PyUnicode_Check(subobj
)) {
2074 rc
= PyUnicode_Tailmatch((PyObject
*)self
,
2075 subobj
, start
, end
, +1);
2079 return PyInt_FromLong((long) rc
);
2082 else if (PyObject_AsCharBuffer(subobj
, &suffix
, &slen
))
2085 if (start
< 0 || start
> len
|| slen
> len
)
2086 return PyInt_FromLong(0);
2088 upper
= (end
>= 0 && end
<= len
) ? end
: len
;
2089 lower
= (upper
- slen
) > start
? (upper
- slen
) : start
;
2091 if (upper
-lower
>= slen
&& !memcmp(str
+lower
, suffix
, slen
))
2092 return PyInt_FromLong(1);
2093 else return PyInt_FromLong(0);
2097 static char encode__doc__
[] =
2098 "S.encode([encoding[,errors]]) -> object\n\
2100 Encodes S using the codec registered for encoding. encoding defaults\n\
2101 to the default encoding. errors may be given to set a different error\n\
2102 handling scheme. Default is 'strict' meaning that encoding errors raise\n\
2103 a ValueError. Other possible values are 'ignore' and 'replace'.";
2106 string_encode(PyStringObject
*self
, PyObject
*args
)
2108 char *encoding
= NULL
;
2109 char *errors
= NULL
;
2110 if (!PyArg_ParseTuple(args
, "|ss:encode", &encoding
, &errors
))
2112 return PyString_AsEncodedObject((PyObject
*)self
, encoding
, errors
);
2116 static char decode__doc__
[] =
2117 "S.decode([encoding[,errors]]) -> object\n\
2119 Decodes S using the codec registered for encoding. encoding defaults\n\
2120 to the default encoding. errors may be given to set a different error\n\
2121 handling scheme. Default is 'strict' meaning that encoding errors raise\n\
2122 a ValueError. Other possible values are 'ignore' and 'replace'.";
2125 string_decode(PyStringObject
*self
, PyObject
*args
)
2127 char *encoding
= NULL
;
2128 char *errors
= NULL
;
2129 if (!PyArg_ParseTuple(args
, "|ss:decode", &encoding
, &errors
))
2131 return PyString_AsDecodedObject((PyObject
*)self
, encoding
, errors
);
2135 static char expandtabs__doc__
[] =
2136 "S.expandtabs([tabsize]) -> string\n\
2138 Return a copy of S where all tab characters are expanded using spaces.\n\
2139 If tabsize is not given, a tab size of 8 characters is assumed.";
2142 string_expandtabs(PyStringObject
*self
, PyObject
*args
)
2150 if (!PyArg_ParseTuple(args
, "|i:expandtabs", &tabsize
))
2153 /* First pass: determine size of output string */
2155 e
= PyString_AS_STRING(self
) + PyString_GET_SIZE(self
);
2156 for (p
= PyString_AS_STRING(self
); p
< e
; p
++)
2159 j
+= tabsize
- (j
% tabsize
);
2163 if (*p
== '\n' || *p
== '\r') {
2169 /* Second pass: create output string and fill it */
2170 u
= PyString_FromStringAndSize(NULL
, i
+ j
);
2175 q
= PyString_AS_STRING(u
);
2177 for (p
= PyString_AS_STRING(self
); p
< e
; p
++)
2180 i
= tabsize
- (j
% tabsize
);
2189 if (*p
== '\n' || *p
== '\r')
2197 pad(PyStringObject
*self
, int left
, int right
, char fill
)
2206 if (left
== 0 && right
== 0 && PyString_CheckExact(self
)) {
2208 return (PyObject
*)self
;
2211 u
= PyString_FromStringAndSize(NULL
,
2212 left
+ PyString_GET_SIZE(self
) + right
);
2215 memset(PyString_AS_STRING(u
), fill
, left
);
2216 memcpy(PyString_AS_STRING(u
) + left
,
2217 PyString_AS_STRING(self
),
2218 PyString_GET_SIZE(self
));
2220 memset(PyString_AS_STRING(u
) + left
+ PyString_GET_SIZE(self
),
2227 static char ljust__doc__
[] =
2228 "S.ljust(width) -> string\n"
2230 "Return S left justified in a string of length width. Padding is\n"
2231 "done using spaces.";
2234 string_ljust(PyStringObject
*self
, PyObject
*args
)
2237 if (!PyArg_ParseTuple(args
, "i:ljust", &width
))
2240 if (PyString_GET_SIZE(self
) >= width
&& PyString_CheckExact(self
)) {
2242 return (PyObject
*) self
;
2245 return pad(self
, 0, width
- PyString_GET_SIZE(self
), ' ');
2249 static char rjust__doc__
[] =
2250 "S.rjust(width) -> string\n"
2252 "Return S right justified in a string of length width. Padding is\n"
2253 "done using spaces.";
2256 string_rjust(PyStringObject
*self
, PyObject
*args
)
2259 if (!PyArg_ParseTuple(args
, "i:rjust", &width
))
2262 if (PyString_GET_SIZE(self
) >= width
&& PyString_CheckExact(self
)) {
2264 return (PyObject
*) self
;
2267 return pad(self
, width
- PyString_GET_SIZE(self
), 0, ' ');
2271 static char center__doc__
[] =
2272 "S.center(width) -> string\n"
2274 "Return S centered in a string of length width. Padding is done\n"
2278 string_center(PyStringObject
*self
, PyObject
*args
)
2283 if (!PyArg_ParseTuple(args
, "i:center", &width
))
2286 if (PyString_GET_SIZE(self
) >= width
&& PyString_CheckExact(self
)) {
2288 return (PyObject
*) self
;
2291 marg
= width
- PyString_GET_SIZE(self
);
2292 left
= marg
/ 2 + (marg
& width
& 1);
2294 return pad(self
, left
, marg
- left
, ' ');
2297 static char isspace__doc__
[] =
2298 "S.isspace() -> int\n"
2300 "Return 1 if there are only whitespace characters in S,\n"
2304 string_isspace(PyStringObject
*self
)
2306 register const unsigned char *p
2307 = (unsigned char *) PyString_AS_STRING(self
);
2308 register const unsigned char *e
;
2310 /* Shortcut for single character strings */
2311 if (PyString_GET_SIZE(self
) == 1 &&
2313 return PyInt_FromLong(1);
2315 /* Special case for empty strings */
2316 if (PyString_GET_SIZE(self
) == 0)
2317 return PyInt_FromLong(0);
2319 e
= p
+ PyString_GET_SIZE(self
);
2320 for (; p
< e
; p
++) {
2322 return PyInt_FromLong(0);
2324 return PyInt_FromLong(1);
2328 static char isalpha__doc__
[] =
2329 "S.isalpha() -> int\n\
2331 Return 1 if all characters in S are alphabetic\n\
2332 and there is at least one character in S, 0 otherwise.";
2335 string_isalpha(PyStringObject
*self
)
2337 register const unsigned char *p
2338 = (unsigned char *) PyString_AS_STRING(self
);
2339 register const unsigned char *e
;
2341 /* Shortcut for single character strings */
2342 if (PyString_GET_SIZE(self
) == 1 &&
2344 return PyInt_FromLong(1);
2346 /* Special case for empty strings */
2347 if (PyString_GET_SIZE(self
) == 0)
2348 return PyInt_FromLong(0);
2350 e
= p
+ PyString_GET_SIZE(self
);
2351 for (; p
< e
; p
++) {
2353 return PyInt_FromLong(0);
2355 return PyInt_FromLong(1);
2359 static char isalnum__doc__
[] =
2360 "S.isalnum() -> int\n\
2362 Return 1 if all characters in S are alphanumeric\n\
2363 and there is at least one character in S, 0 otherwise.";
2366 string_isalnum(PyStringObject
*self
)
2368 register const unsigned char *p
2369 = (unsigned char *) PyString_AS_STRING(self
);
2370 register const unsigned char *e
;
2372 /* Shortcut for single character strings */
2373 if (PyString_GET_SIZE(self
) == 1 &&
2375 return PyInt_FromLong(1);
2377 /* Special case for empty strings */
2378 if (PyString_GET_SIZE(self
) == 0)
2379 return PyInt_FromLong(0);
2381 e
= p
+ PyString_GET_SIZE(self
);
2382 for (; p
< e
; p
++) {
2384 return PyInt_FromLong(0);
2386 return PyInt_FromLong(1);
2390 static char isdigit__doc__
[] =
2391 "S.isdigit() -> int\n\
2393 Return 1 if there are only digit characters in S,\n\
2397 string_isdigit(PyStringObject
*self
)
2399 register const unsigned char *p
2400 = (unsigned char *) PyString_AS_STRING(self
);
2401 register const unsigned char *e
;
2403 /* Shortcut for single character strings */
2404 if (PyString_GET_SIZE(self
) == 1 &&
2406 return PyInt_FromLong(1);
2408 /* Special case for empty strings */
2409 if (PyString_GET_SIZE(self
) == 0)
2410 return PyInt_FromLong(0);
2412 e
= p
+ PyString_GET_SIZE(self
);
2413 for (; p
< e
; p
++) {
2415 return PyInt_FromLong(0);
2417 return PyInt_FromLong(1);
2421 static char islower__doc__
[] =
2422 "S.islower() -> int\n\
2424 Return 1 if all cased characters in S are lowercase and there is\n\
2425 at least one cased character in S, 0 otherwise.";
2428 string_islower(PyStringObject
*self
)
2430 register const unsigned char *p
2431 = (unsigned char *) PyString_AS_STRING(self
);
2432 register const unsigned char *e
;
2435 /* Shortcut for single character strings */
2436 if (PyString_GET_SIZE(self
) == 1)
2437 return PyInt_FromLong(islower(*p
) != 0);
2439 /* Special case for empty strings */
2440 if (PyString_GET_SIZE(self
) == 0)
2441 return PyInt_FromLong(0);
2443 e
= p
+ PyString_GET_SIZE(self
);
2445 for (; p
< e
; p
++) {
2447 return PyInt_FromLong(0);
2448 else if (!cased
&& islower(*p
))
2451 return PyInt_FromLong(cased
);
2455 static char isupper__doc__
[] =
2456 "S.isupper() -> int\n\
2458 Return 1 if all cased characters in S are uppercase and there is\n\
2459 at least one cased character in S, 0 otherwise.";
2462 string_isupper(PyStringObject
*self
)
2464 register const unsigned char *p
2465 = (unsigned char *) PyString_AS_STRING(self
);
2466 register const unsigned char *e
;
2469 /* Shortcut for single character strings */
2470 if (PyString_GET_SIZE(self
) == 1)
2471 return PyInt_FromLong(isupper(*p
) != 0);
2473 /* Special case for empty strings */
2474 if (PyString_GET_SIZE(self
) == 0)
2475 return PyInt_FromLong(0);
2477 e
= p
+ PyString_GET_SIZE(self
);
2479 for (; p
< e
; p
++) {
2481 return PyInt_FromLong(0);
2482 else if (!cased
&& isupper(*p
))
2485 return PyInt_FromLong(cased
);
2489 static char istitle__doc__
[] =
2490 "S.istitle() -> int\n\
2492 Return 1 if S is a titlecased string, i.e. uppercase characters\n\
2493 may only follow uncased characters and lowercase characters only cased\n\
2494 ones. Return 0 otherwise.";
2497 string_istitle(PyStringObject
*self
, PyObject
*uncased
)
2499 register const unsigned char *p
2500 = (unsigned char *) PyString_AS_STRING(self
);
2501 register const unsigned char *e
;
2502 int cased
, previous_is_cased
;
2504 /* Shortcut for single character strings */
2505 if (PyString_GET_SIZE(self
) == 1)
2506 return PyInt_FromLong(isupper(*p
) != 0);
2508 /* Special case for empty strings */
2509 if (PyString_GET_SIZE(self
) == 0)
2510 return PyInt_FromLong(0);
2512 e
= p
+ PyString_GET_SIZE(self
);
2514 previous_is_cased
= 0;
2515 for (; p
< e
; p
++) {
2516 register const unsigned char ch
= *p
;
2519 if (previous_is_cased
)
2520 return PyInt_FromLong(0);
2521 previous_is_cased
= 1;
2524 else if (islower(ch
)) {
2525 if (!previous_is_cased
)
2526 return PyInt_FromLong(0);
2527 previous_is_cased
= 1;
2531 previous_is_cased
= 0;
2533 return PyInt_FromLong(cased
);
2537 static char splitlines__doc__
[] =
2538 "S.splitlines([keepends]]) -> list of strings\n\
2540 Return a list of the lines in S, breaking at line boundaries.\n\
2541 Line breaks are not included in the resulting list unless keepends\n\
2542 is given and true.";
2544 #define SPLIT_APPEND(data, left, right) \
2545 str = PyString_FromStringAndSize(data + left, right - left); \
2548 if (PyList_Append(list, str)) { \
2556 string_splitlines(PyStringObject
*self
, PyObject
*args
)
2566 if (!PyArg_ParseTuple(args
, "|i:splitlines", &keepends
))
2569 data
= PyString_AS_STRING(self
);
2570 len
= PyString_GET_SIZE(self
);
2572 list
= PyList_New(0);
2576 for (i
= j
= 0; i
< len
; ) {
2579 /* Find a line and append it */
2580 while (i
< len
&& data
[i
] != '\n' && data
[i
] != '\r')
2583 /* Skip the line break reading CRLF as one line break */
2586 if (data
[i
] == '\r' && i
+ 1 < len
&&
2594 SPLIT_APPEND(data
, j
, eol
);
2598 SPLIT_APPEND(data
, j
, len
);
2612 string_methods
[] = {
2613 /* Counterparts of the obsolete stropmodule functions; except
2614 string.maketrans(). */
2615 {"join", (PyCFunction
)string_join
, METH_O
, join__doc__
},
2616 {"split", (PyCFunction
)string_split
, METH_VARARGS
, split__doc__
},
2617 {"lower", (PyCFunction
)string_lower
, METH_NOARGS
, lower__doc__
},
2618 {"upper", (PyCFunction
)string_upper
, METH_NOARGS
, upper__doc__
},
2619 {"islower", (PyCFunction
)string_islower
, METH_NOARGS
, islower__doc__
},
2620 {"isupper", (PyCFunction
)string_isupper
, METH_NOARGS
, isupper__doc__
},
2621 {"isspace", (PyCFunction
)string_isspace
, METH_NOARGS
, isspace__doc__
},
2622 {"isdigit", (PyCFunction
)string_isdigit
, METH_NOARGS
, isdigit__doc__
},
2623 {"istitle", (PyCFunction
)string_istitle
, METH_NOARGS
, istitle__doc__
},
2624 {"isalpha", (PyCFunction
)string_isalpha
, METH_NOARGS
, isalpha__doc__
},
2625 {"isalnum", (PyCFunction
)string_isalnum
, METH_NOARGS
, isalnum__doc__
},
2626 {"capitalize", (PyCFunction
)string_capitalize
, METH_NOARGS
, capitalize__doc__
},
2627 {"count", (PyCFunction
)string_count
, METH_VARARGS
, count__doc__
},
2628 {"endswith", (PyCFunction
)string_endswith
, METH_VARARGS
, endswith__doc__
},
2629 {"find", (PyCFunction
)string_find
, METH_VARARGS
, find__doc__
},
2630 {"index", (PyCFunction
)string_index
, METH_VARARGS
, index__doc__
},
2631 {"lstrip", (PyCFunction
)string_lstrip
, METH_NOARGS
, lstrip__doc__
},
2632 {"replace", (PyCFunction
)string_replace
, METH_VARARGS
, replace__doc__
},
2633 {"rfind", (PyCFunction
)string_rfind
, METH_VARARGS
, rfind__doc__
},
2634 {"rindex", (PyCFunction
)string_rindex
, METH_VARARGS
, rindex__doc__
},
2635 {"rstrip", (PyCFunction
)string_rstrip
, METH_NOARGS
, rstrip__doc__
},
2636 {"startswith", (PyCFunction
)string_startswith
, METH_VARARGS
, startswith__doc__
},
2637 {"strip", (PyCFunction
)string_strip
, METH_NOARGS
, strip__doc__
},
2638 {"swapcase", (PyCFunction
)string_swapcase
, METH_NOARGS
, swapcase__doc__
},
2639 {"translate", (PyCFunction
)string_translate
, METH_VARARGS
, translate__doc__
},
2640 {"title", (PyCFunction
)string_title
, METH_NOARGS
, title__doc__
},
2641 {"ljust", (PyCFunction
)string_ljust
, METH_VARARGS
, ljust__doc__
},
2642 {"rjust", (PyCFunction
)string_rjust
, METH_VARARGS
, rjust__doc__
},
2643 {"center", (PyCFunction
)string_center
, METH_VARARGS
, center__doc__
},
2644 {"encode", (PyCFunction
)string_encode
, METH_VARARGS
, encode__doc__
},
2645 {"decode", (PyCFunction
)string_decode
, METH_VARARGS
, decode__doc__
},
2646 {"expandtabs", (PyCFunction
)string_expandtabs
, METH_VARARGS
, expandtabs__doc__
},
2647 {"splitlines", (PyCFunction
)string_splitlines
, METH_VARARGS
, splitlines__doc__
},
2649 {"zfill", (PyCFunction
)string_zfill
, METH_VARARGS
, zfill__doc__
},
2651 {NULL
, NULL
} /* sentinel */
2654 staticforward PyObject
*
2655 str_subtype_new(PyTypeObject
*type
, PyObject
*args
, PyObject
*kwds
);
2658 string_new(PyTypeObject
*type
, PyObject
*args
, PyObject
*kwds
)
2661 static char *kwlist
[] = {"object", 0};
2663 if (type
!= &PyString_Type
)
2664 return str_subtype_new(type
, args
, kwds
);
2665 if (!PyArg_ParseTupleAndKeywords(args
, kwds
, "|O:str", kwlist
, &x
))
2668 return PyString_FromString("");
2669 return PyObject_Str(x
);
2673 str_subtype_new(PyTypeObject
*type
, PyObject
*args
, PyObject
*kwds
)
2675 PyObject
*tmp
, *pnew
;
2678 assert(PyType_IsSubtype(type
, &PyString_Type
));
2679 tmp
= string_new(&PyString_Type
, args
, kwds
);
2682 assert(PyString_CheckExact(tmp
));
2683 n
= PyString_GET_SIZE(tmp
);
2684 pnew
= type
->tp_alloc(type
, n
);
2686 memcpy(PyString_AS_STRING(pnew
), PyString_AS_STRING(tmp
), n
+1);
2688 ((PyStringObject
*)pnew
)->ob_shash
=
2689 ((PyStringObject
*)tmp
)->ob_shash
;
2691 #ifdef INTERN_STRINGS
2692 ((PyStringObject
*)pnew
)->ob_sinterned
=
2693 ((PyStringObject
*)tmp
)->ob_sinterned
;
2700 static char string_doc
[] =
2701 "str(object) -> string\n\
2703 Return a nice string representation of the object.\n\
2704 If the argument is a string, the return value is the same object.";
2706 PyTypeObject PyString_Type
= {
2707 PyObject_HEAD_INIT(&PyType_Type
)
2710 sizeof(PyStringObject
),
2712 (destructor
)string_dealloc
, /* tp_dealloc */
2713 (printfunc
)string_print
, /* tp_print */
2717 (reprfunc
)string_repr
, /* tp_repr */
2718 0, /* tp_as_number */
2719 &string_as_sequence
, /* tp_as_sequence */
2720 0, /* tp_as_mapping */
2721 (hashfunc
)string_hash
, /* tp_hash */
2723 (reprfunc
)string_str
, /* tp_str */
2724 PyObject_GenericGetAttr
, /* tp_getattro */
2725 0, /* tp_setattro */
2726 &string_as_buffer
, /* tp_as_buffer */
2727 Py_TPFLAGS_DEFAULT
| Py_TPFLAGS_BASETYPE
, /* tp_flags */
2728 string_doc
, /* tp_doc */
2729 0, /* tp_traverse */
2731 (richcmpfunc
)string_richcompare
, /* tp_richcompare */
2732 0, /* tp_weaklistoffset */
2734 0, /* tp_iternext */
2735 string_methods
, /* tp_methods */
2740 0, /* tp_descr_get */
2741 0, /* tp_descr_set */
2742 0, /* tp_dictoffset */
2745 string_new
, /* tp_new */
2749 PyString_Concat(register PyObject
**pv
, register PyObject
*w
)
2751 register PyObject
*v
;
2754 if (w
== NULL
|| !PyString_Check(*pv
)) {
2759 v
= string_concat((PyStringObject
*) *pv
, w
);
2765 PyString_ConcatAndDel(register PyObject
**pv
, register PyObject
*w
)
2767 PyString_Concat(pv
, w
);
2772 /* The following function breaks the notion that strings are immutable:
2773 it changes the size of a string. We get away with this only if there
2774 is only one module referencing the object. You can also think of it
2775 as creating a new string object and destroying the old one, only
2776 more efficiently. In any case, don't use this if the string may
2777 already be known to some other part of the code... */
2780 _PyString_Resize(PyObject
**pv
, int newsize
)
2782 register PyObject
*v
;
2783 register PyStringObject
*sv
;
2785 if (!PyString_Check(v
) || v
->ob_refcnt
!= 1) {
2788 PyErr_BadInternalCall();
2791 /* XXX UNREF/NEWREF interface should be more symmetrical */
2795 _Py_ForgetReference(v
);
2797 PyObject_REALLOC((char *)v
,
2798 sizeof(PyStringObject
) + newsize
* sizeof(char));
2804 _Py_NewReference(*pv
);
2805 sv
= (PyStringObject
*) *pv
;
2806 sv
->ob_size
= newsize
;
2807 sv
->ob_sval
[newsize
] = '\0';
2811 /* Helpers for formatstring */
2814 getnextarg(PyObject
*args
, int arglen
, int *p_argidx
)
2816 int argidx
= *p_argidx
;
2817 if (argidx
< arglen
) {
2822 return PyTuple_GetItem(args
, argidx
);
2824 PyErr_SetString(PyExc_TypeError
,
2825 "not enough arguments for format string");
2836 #define F_LJUST (1<<0)
2837 #define F_SIGN (1<<1)
2838 #define F_BLANK (1<<2)
2839 #define F_ALT (1<<3)
2840 #define F_ZERO (1<<4)
2843 formatfloat(char *buf
, size_t buflen
, int flags
,
2844 int prec
, int type
, PyObject
*v
)
2846 /* fmt = '%#.' + `prec` + `type`
2847 worst case length = 3 + 10 (len of INT_MAX) + 1 = 14 (use 20)*/
2850 if (!PyArg_Parse(v
, "d;float argument required", &x
))
2854 if (type
== 'f' && fabs(x
)/1e25
>= 1e25
)
2856 sprintf(fmt
, "%%%s.%d%c", (flags
&F_ALT
) ? "#" : "", prec
, type
);
2857 /* worst case length calc to ensure no buffer overrun:
2859 buf = '-' + [0-9]*prec + '.' + 'e+' + (longest exp
2860 for any double rep.)
2861 len = 1 + prec + 1 + 2 + 5 = 9 + prec
2862 If prec=0 the effective precision is 1 (the leading digit is
2863 always given), therefore increase by one to 10+prec. */
2864 if (buflen
<= (size_t)10 + (size_t)prec
) {
2865 PyErr_SetString(PyExc_OverflowError
,
2866 "formatted float is too long (precision too large?)");
2869 sprintf(buf
, fmt
, x
);
2873 /* _PyString_FormatLong emulates the format codes d, u, o, x and X, and
2874 * the F_ALT flag, for Python's long (unbounded) ints. It's not used for
2875 * Python's regular ints.
2876 * Return value: a new PyString*, or NULL if error.
2877 * . *pbuf is set to point into it,
2878 * *plen set to the # of chars following that.
2879 * Caller must decref it when done using pbuf.
2880 * The string starting at *pbuf is of the form
2881 * "-"? ("0x" | "0X")? digit+
2882 * "0x"/"0X" are present only for x and X conversions, with F_ALT
2883 * set in flags. The case of hex digits will be correct,
2884 * There will be at least prec digits, zero-filled on the left if
2885 * necessary to get that many.
2886 * val object to be converted
2887 * flags bitmask of format flags; only F_ALT is looked at
2888 * prec minimum number of digits; 0-fill on left if needed
2889 * type a character in [duoxX]; u acts the same as d
2891 * CAUTION: o, x and X conversions on regular ints can never
2892 * produce a '-' sign, but can for Python's unbounded ints.
2895 _PyString_FormatLong(PyObject
*val
, int flags
, int prec
, int type
,
2896 char **pbuf
, int *plen
)
2898 PyObject
*result
= NULL
;
2901 int sign
; /* 1 if '-', else 0 */
2902 int len
; /* number of characters */
2903 int numdigits
; /* len == numnondigits + numdigits */
2904 int numnondigits
= 0;
2909 result
= val
->ob_type
->tp_str(val
);
2912 result
= val
->ob_type
->tp_as_number
->nb_oct(val
);
2917 result
= val
->ob_type
->tp_as_number
->nb_hex(val
);
2920 assert(!"'type' not in [duoxX]");
2925 /* To modify the string in-place, there can only be one reference. */
2926 if (result
->ob_refcnt
!= 1) {
2927 PyErr_BadInternalCall();
2930 buf
= PyString_AsString(result
);
2931 len
= PyString_Size(result
);
2932 if (buf
[len
-1] == 'L') {
2936 sign
= buf
[0] == '-';
2937 numnondigits
+= sign
;
2938 numdigits
= len
- numnondigits
;
2939 assert(numdigits
> 0);
2941 /* Get rid of base marker unless F_ALT */
2942 if ((flags
& F_ALT
) == 0) {
2943 /* Need to skip 0x, 0X or 0. */
2947 assert(buf
[sign
] == '0');
2948 /* If 0 is only digit, leave it alone. */
2949 if (numdigits
> 1) {
2956 assert(buf
[sign
] == '0');
2957 assert(buf
[sign
+ 1] == 'x');
2968 assert(len
== numnondigits
+ numdigits
);
2969 assert(numdigits
> 0);
2972 /* Fill with leading zeroes to meet minimum width. */
2973 if (prec
> numdigits
) {
2974 PyObject
*r1
= PyString_FromStringAndSize(NULL
,
2975 numnondigits
+ prec
);
2981 b1
= PyString_AS_STRING(r1
);
2982 for (i
= 0; i
< numnondigits
; ++i
)
2984 for (i
= 0; i
< prec
- numdigits
; i
++)
2986 for (i
= 0; i
< numdigits
; i
++)
2991 buf
= PyString_AS_STRING(result
);
2992 len
= numnondigits
+ prec
;
2995 /* Fix up case for hex conversions. */
2998 /* Need to convert all upper case letters to lower case. */
2999 for (i
= 0; i
< len
; i
++)
3000 if (buf
[i
] >= 'A' && buf
[i
] <= 'F')
3004 /* Need to convert 0x to 0X (and -0x to -0X). */
3005 if (buf
[sign
+ 1] == 'x')
3006 buf
[sign
+ 1] = 'X';
3015 formatint(char *buf
, size_t buflen
, int flags
,
3016 int prec
, int type
, PyObject
*v
)
3018 /* fmt = '%#.' + `prec` + 'l' + `type`
3019 worst case length = 3 + 19 (worst len of INT_MAX on 64-bit machine)
3021 char fmt
[64]; /* plenty big enough! */
3023 if (!PyArg_Parse(v
, "l;int argument required", &x
))
3027 sprintf(fmt
, "%%%s.%dl%c", (flags
&F_ALT
) ? "#" : "", prec
, type
);
3028 /* buf = '+'/'-'/'0'/'0x' + '[0-9]'*max(prec, len(x in octal))
3029 worst case buf = '0x' + [0-9]*prec, where prec >= 11 */
3030 if (buflen
<= 13 || buflen
<= (size_t)2 + (size_t)prec
) {
3031 PyErr_SetString(PyExc_OverflowError
,
3032 "formatted integer is too long (precision too large?)");
3035 sprintf(buf
, fmt
, x
);
3036 /* When converting 0 under %#x or %#X, C leaves off the base marker,
3037 * but we want it (for consistency with other %#x conversions, and
3038 * for consistency with Python's hex() function).
3039 * BUG 28-Apr-2001 tim: At least two platform Cs (Metrowerks &
3040 * Compaq Tru64) violate the std by converting 0 w/ leading 0x anyway.
3041 * So add it only if the platform didn't already.
3045 (type
== 'x' || type
== 'X') &&
3046 buf
[1] != (char)type
) /* this last always true under std C */
3048 memmove(buf
+2, buf
, strlen(buf
) + 1);
3050 buf
[1] = (char)type
;
3056 formatchar(char *buf
, size_t buflen
, PyObject
*v
)
3058 /* presume that the buffer is at least 2 characters long */
3059 if (PyString_Check(v
)) {
3060 if (!PyArg_Parse(v
, "c;%c requires int or char", &buf
[0]))
3064 if (!PyArg_Parse(v
, "b;%c requires int or char", &buf
[0]))
3072 /* fmt%(v1,v2,...) is roughly equivalent to sprintf(fmt, v1, v2, ...)
3074 FORMATBUFLEN is the length of the buffer in which the floats, ints, &
3075 chars are formatted. XXX This is a magic number. Each formatting
3076 routine does bounds checking to ensure no overflow, but a better
3077 solution may be to malloc a buffer of appropriate size for each
3078 format. For now, the current solution is sufficient.
3080 #define FORMATBUFLEN (size_t)120
3083 PyString_Format(PyObject
*format
, PyObject
*args
)
3086 int fmtcnt
, rescnt
, reslen
, arglen
, argidx
;
3088 PyObject
*result
, *orig_args
;
3089 #ifdef Py_USING_UNICODE
3092 PyObject
*dict
= NULL
;
3093 if (format
== NULL
|| !PyString_Check(format
) || args
== NULL
) {
3094 PyErr_BadInternalCall();
3098 fmt
= PyString_AsString(format
);
3099 fmtcnt
= PyString_Size(format
);
3100 reslen
= rescnt
= fmtcnt
+ 100;
3101 result
= PyString_FromStringAndSize((char *)NULL
, reslen
);
3104 res
= PyString_AsString(result
);
3105 if (PyTuple_Check(args
)) {
3106 arglen
= PyTuple_Size(args
);
3113 if (args
->ob_type
->tp_as_mapping
)
3115 while (--fmtcnt
>= 0) {
3118 rescnt
= fmtcnt
+ 100;
3120 if (_PyString_Resize(&result
, reslen
) < 0)
3122 res
= PyString_AsString(result
)
3129 /* Got a format specifier */
3136 PyObject
*temp
= NULL
;
3140 char formatbuf
[FORMATBUFLEN
]; /* For format{float,int,char}() */
3141 #ifdef Py_USING_UNICODE
3142 char *fmt_start
= fmt
;
3143 int argidx_start
= argidx
;
3154 PyErr_SetString(PyExc_TypeError
,
3155 "format requires a mapping");
3161 /* Skip over balanced parentheses */
3162 while (pcount
> 0 && --fmtcnt
>= 0) {
3165 else if (*fmt
== '(')
3169 keylen
= fmt
- keystart
- 1;
3170 if (fmtcnt
< 0 || pcount
> 0) {
3171 PyErr_SetString(PyExc_ValueError
,
3172 "incomplete format key");
3175 key
= PyString_FromStringAndSize(keystart
,
3183 args
= PyObject_GetItem(dict
, key
);
3192 while (--fmtcnt
>= 0) {
3193 switch (c
= *fmt
++) {
3194 case '-': flags
|= F_LJUST
; continue;
3195 case '+': flags
|= F_SIGN
; continue;
3196 case ' ': flags
|= F_BLANK
; continue;
3197 case '#': flags
|= F_ALT
; continue;
3198 case '0': flags
|= F_ZERO
; continue;
3203 v
= getnextarg(args
, arglen
, &argidx
);
3206 if (!PyInt_Check(v
)) {
3207 PyErr_SetString(PyExc_TypeError
,
3211 width
= PyInt_AsLong(v
);
3219 else if (c
>= 0 && isdigit(c
)) {
3221 while (--fmtcnt
>= 0) {
3222 c
= Py_CHARMASK(*fmt
++);
3225 if ((width
*10) / 10 != width
) {
3231 width
= width
*10 + (c
- '0');
3239 v
= getnextarg(args
, arglen
, &argidx
);
3242 if (!PyInt_Check(v
)) {
3248 prec
= PyInt_AsLong(v
);
3254 else if (c
>= 0 && isdigit(c
)) {
3256 while (--fmtcnt
>= 0) {
3257 c
= Py_CHARMASK(*fmt
++);
3260 if ((prec
*10) / 10 != prec
) {
3266 prec
= prec
*10 + (c
- '0');
3271 if (c
== 'h' || c
== 'l' || c
== 'L') {
3277 PyErr_SetString(PyExc_ValueError
,
3278 "incomplete format");
3282 v
= getnextarg(args
, arglen
, &argidx
);
3295 #ifdef Py_USING_UNICODE
3296 if (PyUnicode_Check(v
)) {
3298 argidx
= argidx_start
;
3303 temp
= PyObject_Str(v
);
3305 temp
= PyObject_Repr(v
);
3308 if (!PyString_Check(temp
)) {
3309 PyErr_SetString(PyExc_TypeError
,
3310 "%s argument has non-string str()");
3313 pbuf
= PyString_AsString(temp
);
3314 len
= PyString_Size(temp
);
3315 if (prec
>= 0 && len
> prec
)
3326 if (PyLong_Check(v
)) {
3327 temp
= _PyString_FormatLong(v
, flags
,
3328 prec
, c
, &pbuf
, &len
);
3331 /* unbounded ints can always produce
3332 a sign character! */
3337 len
= formatint(pbuf
, sizeof(formatbuf
),
3341 /* only d conversion is signed */
3353 len
= formatfloat(pbuf
, sizeof(formatbuf
), flags
, prec
, c
, v
);
3362 len
= formatchar(pbuf
, sizeof(formatbuf
), v
);
3367 PyErr_Format(PyExc_ValueError
,
3368 "unsupported format character '%c' (0x%x) "
3370 c
, c
, fmt
- 1 - PyString_AsString(format
));
3374 if (*pbuf
== '-' || *pbuf
== '+') {
3378 else if (flags
& F_SIGN
)
3380 else if (flags
& F_BLANK
)
3387 if (rescnt
< width
+ (sign
!= 0)) {
3389 rescnt
= width
+ fmtcnt
+ 100;
3391 if (_PyString_Resize(&result
, reslen
) < 0)
3393 res
= PyString_AsString(result
)
3403 if ((flags
& F_ALT
) && (c
== 'x' || c
== 'X')) {
3404 assert(pbuf
[0] == '0');
3405 assert(pbuf
[1] == c
);
3416 if (width
> len
&& !(flags
& F_LJUST
)) {
3420 } while (--width
> len
);
3425 if ((flags
& F_ALT
) &&
3426 (c
== 'x' || c
== 'X')) {
3427 assert(pbuf
[0] == '0');
3428 assert(pbuf
[1] == c
);
3433 memcpy(res
, pbuf
, len
);
3436 while (--width
>= len
) {
3440 if (dict
&& (argidx
< arglen
) && c
!= '%') {
3441 PyErr_SetString(PyExc_TypeError
,
3442 "not all arguments converted");
3448 if (argidx
< arglen
&& !dict
) {
3449 PyErr_SetString(PyExc_TypeError
,
3450 "not all arguments converted");
3456 _PyString_Resize(&result
, reslen
- rescnt
);
3459 #ifdef Py_USING_UNICODE
3465 /* Fiddle args right (remove the first argidx arguments) */
3466 if (PyTuple_Check(orig_args
) && argidx
> 0) {
3468 int n
= PyTuple_GET_SIZE(orig_args
) - argidx
;
3473 PyObject
*w
= PyTuple_GET_ITEM(orig_args
, n
+ argidx
);
3475 PyTuple_SET_ITEM(v
, n
, w
);
3479 Py_INCREF(orig_args
);
3483 /* Take what we have of the result and let the Unicode formatting
3484 function format the rest of the input. */
3485 rescnt
= res
- PyString_AS_STRING(result
);
3486 if (_PyString_Resize(&result
, rescnt
))
3488 fmtcnt
= PyString_GET_SIZE(format
) - \
3489 (fmt
- PyString_AS_STRING(format
));
3490 format
= PyUnicode_Decode(fmt
, fmtcnt
, NULL
, NULL
);
3493 v
= PyUnicode_Format(format
, args
);
3497 /* Paste what we have (result) to what the Unicode formatting
3498 function returned (v) and return the result (or error) */
3499 w
= PyUnicode_Concat(result
, v
);
3504 #endif /* Py_USING_UNICODE */
3515 #ifdef INTERN_STRINGS
3517 /* This dictionary will leak at PyString_Fini() time. That's acceptable
3518 * because PyString_Fini() specifically frees interned strings that are
3519 * only referenced by this dictionary. The CVS log entry for revision 2.45
3522 * Change the Fini function to only remove otherwise unreferenced
3523 * strings from the interned table. There are references in
3524 * hard-to-find static variables all over the interpreter, and it's not
3525 * worth trying to get rid of all those; but "uninterning" isn't fair
3526 * either and may cause subtle failures later -- so we have to keep them
3527 * in the interned table.
3529 static PyObject
*interned
;
3532 PyString_InternInPlace(PyObject
**p
)
3534 register PyStringObject
*s
= (PyStringObject
*)(*p
);
3536 if (s
== NULL
|| !PyString_Check(s
))
3537 Py_FatalError("PyString_InternInPlace: strings only please!");
3538 if ((t
= s
->ob_sinterned
) != NULL
) {
3539 if (t
== (PyObject
*)s
)
3546 if (interned
== NULL
) {
3547 interned
= PyDict_New();
3548 if (interned
== NULL
)
3551 if ((t
= PyDict_GetItem(interned
, (PyObject
*)s
)) != NULL
) {
3553 *p
= s
->ob_sinterned
= t
;
3557 /* Ensure that only true string objects appear in the intern dict,
3558 and as the value of ob_sinterned. */
3559 if (PyString_CheckExact(s
)) {
3561 if (PyDict_SetItem(interned
, t
, t
) == 0) {
3562 s
->ob_sinterned
= t
;
3567 t
= PyString_FromStringAndSize(PyString_AS_STRING(s
),
3568 PyString_GET_SIZE(s
));
3570 if (PyDict_SetItem(interned
, t
, t
) == 0) {
3571 *p
= s
->ob_sinterned
= t
;
3583 PyString_InternFromString(const char *cp
)
3585 PyObject
*s
= PyString_FromString(cp
);
3588 PyString_InternInPlace(&s
);
3598 for (i
= 0; i
< UCHAR_MAX
+ 1; i
++) {
3599 Py_XDECREF(characters
[i
]);
3600 characters
[i
] = NULL
;
3602 #ifndef DONT_SHARE_SHORT_STRINGS
3603 Py_XDECREF(nullstring
);
3606 #ifdef INTERN_STRINGS
3609 PyObject
*key
, *value
;
3613 while (PyDict_Next(interned
, &pos
, &key
, &value
)) {
3614 if (key
->ob_refcnt
== 2 && key
== value
) {
3615 PyDict_DelItem(interned
, key
);
3624 #ifdef INTERN_STRINGS
3625 void _Py_ReleaseInternedStrings(void)
3628 fprintf(stderr
, "releasing interned strings\n");
3629 PyDict_Clear(interned
);
3630 Py_DECREF(interned
);
3634 #endif /* INTERN_STRINGS */