1 /* String object implementation */
8 int null_strings
, one_strings
;
11 #if !defined(HAVE_LIMITS_H) && !defined(UCHAR_MAX)
15 static PyStringObject
*characters
[UCHAR_MAX
+ 1];
16 static PyStringObject
*nullstring
;
18 /* This dictionary holds all interned strings. Note that references to
19 strings in this dictionary are *not* counted in the string's ob_refcnt.
20 When the interned string reaches a refcnt of 0 the string deallocation
21 function will delete the reference from this dictionary.
23 Another way to look at this is that to say that the actual reference
24 count of a string is: s->ob_refcnt + (s->ob_sstate?2:0)
26 static PyObject
*interned
;
30 For both PyString_FromString() and PyString_FromStringAndSize(), the
31 parameter `size' denotes number of characters to allocate, not counting any
32 null terminating character.
34 For PyString_FromString(), the parameter `str' points to a null-terminated
35 string containing exactly `size' bytes.
37 For PyString_FromStringAndSize(), the parameter the parameter `str' is
38 either NULL or else points to a string containing at least `size' bytes.
39 For PyString_FromStringAndSize(), the string in the `str' parameter does
40 not have to be null-terminated. (Therefore it is safe to construct a
41 substring by calling `PyString_FromStringAndSize(origstring, substrlen)'.)
42 If `str' is NULL then PyString_FromStringAndSize() will allocate `size+1'
43 bytes (setting the last byte to the null terminating character) and you can
44 fill in the data yourself. If `str' is non-NULL then the resulting
45 PyString object must be treated as immutable and you must not fill in nor
46 alter the data yourself, since the strings may be shared.
48 The PyObject member `op->ob_size', which denotes the number of "extra
49 items" in a variable-size object, will contain the number of bytes
50 allocated for string data, not counting the null terminating character. It
51 is therefore equal to the equal to the `size' parameter (for
52 PyString_FromStringAndSize()) or the length of the string in the `str'
53 parameter (for PyString_FromString()).
56 PyString_FromStringAndSize(const char *str
, int size
)
58 register PyStringObject
*op
;
59 if (size
== 0 && (op
= nullstring
) != NULL
) {
64 return (PyObject
*)op
;
66 if (size
== 1 && str
!= NULL
&&
67 (op
= characters
[*str
& UCHAR_MAX
]) != NULL
)
73 return (PyObject
*)op
;
76 /* Inline PyObject_NewVar */
77 op
= (PyStringObject
*)
78 PyObject_MALLOC(sizeof(PyStringObject
) + size
* sizeof(char));
80 return PyErr_NoMemory();
81 PyObject_INIT_VAR(op
, &PyString_Type
, size
);
83 op
->ob_sstate
= SSTATE_NOT_INTERNED
;
85 memcpy(op
->ob_sval
, str
, size
);
86 op
->ob_sval
[size
] = '\0';
87 /* share short strings */
89 PyObject
*t
= (PyObject
*)op
;
90 PyString_InternInPlace(&t
);
91 op
= (PyStringObject
*)t
;
94 } else if (size
== 1 && str
!= NULL
) {
95 PyObject
*t
= (PyObject
*)op
;
96 PyString_InternInPlace(&t
);
97 op
= (PyStringObject
*)t
;
98 characters
[*str
& UCHAR_MAX
] = op
;
101 return (PyObject
*) op
;
105 PyString_FromString(const char *str
)
107 register size_t size
;
108 register PyStringObject
*op
;
112 if (size
> INT_MAX
) {
113 PyErr_SetString(PyExc_OverflowError
,
114 "string is too long for a Python string");
117 if (size
== 0 && (op
= nullstring
) != NULL
) {
122 return (PyObject
*)op
;
124 if (size
== 1 && (op
= characters
[*str
& UCHAR_MAX
]) != NULL
) {
129 return (PyObject
*)op
;
132 /* Inline PyObject_NewVar */
133 op
= (PyStringObject
*)
134 PyObject_MALLOC(sizeof(PyStringObject
) + size
* sizeof(char));
136 return PyErr_NoMemory();
137 PyObject_INIT_VAR(op
, &PyString_Type
, size
);
139 op
->ob_sstate
= SSTATE_NOT_INTERNED
;
140 memcpy(op
->ob_sval
, str
, size
+1);
141 /* share short strings */
143 PyObject
*t
= (PyObject
*)op
;
144 PyString_InternInPlace(&t
);
145 op
= (PyStringObject
*)t
;
148 } else if (size
== 1) {
149 PyObject
*t
= (PyObject
*)op
;
150 PyString_InternInPlace(&t
);
151 op
= (PyStringObject
*)t
;
152 characters
[*str
& UCHAR_MAX
] = op
;
155 return (PyObject
*) op
;
159 PyString_FromFormatV(const char *format
, va_list vargs
)
167 #ifdef VA_LIST_IS_ARRAY
168 memcpy(count
, vargs
, sizeof(va_list));
171 __va_copy(count
, vargs
);
176 /* step 1: figure out how large a buffer we need */
177 for (f
= format
; *f
; f
++) {
180 while (*++f
&& *f
!= '%' && !isalpha(Py_CHARMASK(*f
)))
183 /* skip the 'l' in %ld, since it doesn't change the
184 width. although only %d is supported (see
185 "expand" section below), others can be easily
187 if (*f
== 'l' && *(f
+1) == 'd')
192 (void)va_arg(count
, int);
193 /* fall through... */
197 case 'd': case 'i': case 'x':
198 (void) va_arg(count
, int);
199 /* 20 bytes is enough to hold a 64-bit
200 integer. Decimal takes the most space.
201 This isn't enough for octal. */
205 s
= va_arg(count
, char*);
209 (void) va_arg(count
, int);
210 /* maximum 64-bit pointer representation:
212 * so 19 characters is enough.
213 * XXX I count 18 -- what's the extra for?
218 /* if we stumble upon an unknown
219 formatting code, copy the rest of
220 the format string to the output
221 string. (we cannot just skip the
222 code, since there's no way to know
223 what's in the argument list) */
231 /* step 2: fill the buffer */
232 /* Since we've analyzed how much space we need for the worst case,
233 use sprintf directly instead of the slower PyOS_snprintf. */
234 string
= PyString_FromStringAndSize(NULL
, n
);
238 s
= PyString_AsString(string
);
240 for (f
= format
; *f
; f
++) {
244 /* parse the width.precision part (we're only
245 interested in the precision value, if any) */
247 while (isdigit(Py_CHARMASK(*f
)))
248 n
= (n
*10) + *f
++ - '0';
252 while (isdigit(Py_CHARMASK(*f
)))
253 n
= (n
*10) + *f
++ - '0';
255 while (*f
&& *f
!= '%' && !isalpha(Py_CHARMASK(*f
)))
257 /* handle the long flag, but only for %ld. others
258 can be added when necessary. */
259 if (*f
== 'l' && *(f
+1) == 'd') {
266 *s
++ = va_arg(vargs
, int);
270 sprintf(s
, "%ld", va_arg(vargs
, long));
272 sprintf(s
, "%d", va_arg(vargs
, int));
276 sprintf(s
, "%i", va_arg(vargs
, int));
280 sprintf(s
, "%x", va_arg(vargs
, int));
284 p
= va_arg(vargs
, char*);
292 sprintf(s
, "%p", va_arg(vargs
, void*));
293 /* %p is ill-defined: ensure leading 0x. */
296 else if (s
[1] != 'x') {
297 memmove(s
+2, s
, strlen(s
)+1);
316 _PyString_Resize(&string
, s
- PyString_AS_STRING(string
));
321 PyString_FromFormat(const char *format
, ...)
326 #ifdef HAVE_STDARG_PROTOTYPES
327 va_start(vargs
, format
);
331 ret
= PyString_FromFormatV(format
, vargs
);
337 PyObject
*PyString_Decode(const char *s
,
339 const char *encoding
,
344 str
= PyString_FromStringAndSize(s
, size
);
347 v
= PyString_AsDecodedString(str
, encoding
, errors
);
352 PyObject
*PyString_AsDecodedObject(PyObject
*str
,
353 const char *encoding
,
358 if (!PyString_Check(str
)) {
363 if (encoding
== NULL
) {
364 #ifdef Py_USING_UNICODE
365 encoding
= PyUnicode_GetDefaultEncoding();
367 PyErr_SetString(PyExc_ValueError
, "no encoding specified");
372 /* Decode via the codec registry */
373 v
= PyCodec_Decode(str
, encoding
, errors
);
383 PyObject
*PyString_AsDecodedString(PyObject
*str
,
384 const char *encoding
,
389 v
= PyString_AsDecodedObject(str
, encoding
, errors
);
393 #ifdef Py_USING_UNICODE
394 /* Convert Unicode to a string using the default encoding */
395 if (PyUnicode_Check(v
)) {
397 v
= PyUnicode_AsEncodedString(v
, NULL
, NULL
);
403 if (!PyString_Check(v
)) {
404 PyErr_Format(PyExc_TypeError
,
405 "decoder did not return a string object (type=%.400s)",
406 v
->ob_type
->tp_name
);
417 PyObject
*PyString_Encode(const char *s
,
419 const char *encoding
,
424 str
= PyString_FromStringAndSize(s
, size
);
427 v
= PyString_AsEncodedString(str
, encoding
, errors
);
432 PyObject
*PyString_AsEncodedObject(PyObject
*str
,
433 const char *encoding
,
438 if (!PyString_Check(str
)) {
443 if (encoding
== NULL
) {
444 #ifdef Py_USING_UNICODE
445 encoding
= PyUnicode_GetDefaultEncoding();
447 PyErr_SetString(PyExc_ValueError
, "no encoding specified");
452 /* Encode via the codec registry */
453 v
= PyCodec_Encode(str
, encoding
, errors
);
463 PyObject
*PyString_AsEncodedString(PyObject
*str
,
464 const char *encoding
,
469 v
= PyString_AsEncodedObject(str
, encoding
, errors
);
473 #ifdef Py_USING_UNICODE
474 /* Convert Unicode to a string using the default encoding */
475 if (PyUnicode_Check(v
)) {
477 v
= PyUnicode_AsEncodedString(v
, NULL
, NULL
);
483 if (!PyString_Check(v
)) {
484 PyErr_Format(PyExc_TypeError
,
485 "encoder did not return a string object (type=%.400s)",
486 v
->ob_type
->tp_name
);
498 string_dealloc(PyObject
*op
)
500 switch (PyString_CHECK_INTERNED(op
)) {
501 case SSTATE_NOT_INTERNED
:
504 case SSTATE_INTERNED_MORTAL
:
505 /* revive dead object temporarily for DelItem */
507 if (PyDict_DelItem(interned
, op
) != 0)
509 "deletion of interned string failed");
512 case SSTATE_INTERNED_IMMORTAL
:
513 Py_FatalError("Immortal interned string died.");
516 Py_FatalError("Inconsistent interned string state.");
518 op
->ob_type
->tp_free(op
);
521 /* Unescape a backslash-escaped string. If unicode is non-zero,
522 the string is a u-literal. If recode_encoding is non-zero,
523 the string is UTF-8 encoded and should be re-encoded in the
524 specified encoding. */
526 PyObject
*PyString_DecodeEscape(const char *s
,
530 const char *recode_encoding
)
536 int newlen
= recode_encoding
? 4*len
:len
;
537 v
= PyString_FromStringAndSize((char *)NULL
, newlen
);
540 p
= buf
= PyString_AsString(v
);
545 #ifdef Py_USING_UNICODE
546 if (recode_encoding
&& (*s
& 0x80)) {
552 /* Decode non-ASCII bytes as UTF-8. */
553 while (t
< end
&& (*t
& 0x80)) t
++;
554 u
= PyUnicode_DecodeUTF8(s
, t
- s
, errors
);
557 /* Recode them in target encoding. */
558 w
= PyUnicode_AsEncodedString(
559 u
, recode_encoding
, errors
);
563 /* Append bytes to output buffer. */
564 r
= PyString_AsString(w
);
565 rn
= PyString_Size(w
);
580 PyErr_SetString(PyExc_ValueError
,
581 "Trailing \\ in string");
585 /* XXX This assumes ASCII! */
587 case '\\': *p
++ = '\\'; break;
588 case '\'': *p
++ = '\''; break;
589 case '\"': *p
++ = '\"'; break;
590 case 'b': *p
++ = '\b'; break;
591 case 'f': *p
++ = '\014'; break; /* FF */
592 case 't': *p
++ = '\t'; break;
593 case 'n': *p
++ = '\n'; break;
594 case 'r': *p
++ = '\r'; break;
595 case 'v': *p
++ = '\013'; break; /* VT */
596 case 'a': *p
++ = '\007'; break; /* BEL, not classic C */
597 case '0': case '1': case '2': case '3':
598 case '4': case '5': case '6': case '7':
600 if ('0' <= *s
&& *s
<= '7') {
601 c
= (c
<<3) + *s
++ - '0';
602 if ('0' <= *s
&& *s
<= '7')
603 c
= (c
<<3) + *s
++ - '0';
608 if (isxdigit(Py_CHARMASK(s
[0]))
609 && isxdigit(Py_CHARMASK(s
[1]))) {
631 if (!errors
|| strcmp(errors
, "strict") == 0) {
632 PyErr_SetString(PyExc_ValueError
,
633 "invalid \\x escape");
636 if (strcmp(errors
, "replace") == 0) {
638 } else if (strcmp(errors
, "ignore") == 0)
641 PyErr_Format(PyExc_ValueError
,
643 "unknown error handling code: %.400s",
647 #ifndef Py_USING_UNICODE
652 PyErr_SetString(PyExc_ValueError
,
653 "Unicode escapes not legal "
654 "when Unicode disabled");
661 goto non_esc
; /* an arbitry number of unescaped
662 UTF-8 bytes may follow. */
666 _PyString_Resize(&v
, (int)(p
- buf
));
674 string_getsize(register PyObject
*op
)
678 if (PyString_AsStringAndSize(op
, &s
, &len
))
683 static /*const*/ char *
684 string_getbuffer(register PyObject
*op
)
688 if (PyString_AsStringAndSize(op
, &s
, &len
))
694 PyString_Size(register PyObject
*op
)
696 if (!PyString_Check(op
))
697 return string_getsize(op
);
698 return ((PyStringObject
*)op
) -> ob_size
;
702 PyString_AsString(register PyObject
*op
)
704 if (!PyString_Check(op
))
705 return string_getbuffer(op
);
706 return ((PyStringObject
*)op
) -> ob_sval
;
710 PyString_AsStringAndSize(register PyObject
*obj
,
715 PyErr_BadInternalCall();
719 if (!PyString_Check(obj
)) {
720 #ifdef Py_USING_UNICODE
721 if (PyUnicode_Check(obj
)) {
722 obj
= _PyUnicode_AsDefaultEncodedString(obj
, NULL
);
729 PyErr_Format(PyExc_TypeError
,
730 "expected string or Unicode object, "
731 "%.200s found", obj
->ob_type
->tp_name
);
736 *s
= PyString_AS_STRING(obj
);
738 *len
= PyString_GET_SIZE(obj
);
739 else if ((int)strlen(*s
) != PyString_GET_SIZE(obj
)) {
740 PyErr_SetString(PyExc_TypeError
,
741 "expected string without null bytes");
750 string_print(PyStringObject
*op
, FILE *fp
, int flags
)
756 /* XXX Ought to check for interrupts when writing long strings */
757 if (! PyString_CheckExact(op
)) {
759 /* A str subclass may have its own __str__ method. */
760 op
= (PyStringObject
*) PyObject_Str((PyObject
*)op
);
763 ret
= string_print(op
, fp
, flags
);
767 if (flags
& Py_PRINT_RAW
) {
769 if (op
->ob_size
) fwrite(op
->ob_sval
, (int) op
->ob_size
, 1, fp
);
771 fwrite(op
->ob_sval
, 1, (int) op
->ob_size
, fp
);
776 /* figure out which quote to use; single is preferred */
778 if (memchr(op
->ob_sval
, '\'', op
->ob_size
) &&
779 !memchr(op
->ob_sval
, '"', op
->ob_size
))
783 for (i
= 0; i
< op
->ob_size
; i
++) {
785 if (c
== quote
|| c
== '\\')
786 fprintf(fp
, "\\%c", c
);
793 else if (c
< ' ' || c
>= 0x7f)
794 fprintf(fp
, "\\x%02x", c
& 0xff);
803 PyString_Repr(PyObject
*obj
, int smartquotes
)
805 register PyStringObject
* op
= (PyStringObject
*) obj
;
806 size_t newsize
= 2 + 4 * op
->ob_size
* sizeof(char);
808 if (newsize
> INT_MAX
) {
809 PyErr_SetString(PyExc_OverflowError
,
810 "string is too large to make repr");
812 v
= PyString_FromStringAndSize((char *)NULL
, newsize
);
822 /* figure out which quote to use; single is preferred */
825 memchr(op
->ob_sval
, '\'', op
->ob_size
) &&
826 !memchr(op
->ob_sval
, '"', op
->ob_size
))
829 p
= PyString_AS_STRING(v
);
831 for (i
= 0; i
< op
->ob_size
; i
++) {
832 /* There's at least enough room for a hex escape
833 and a closing quote. */
834 assert(newsize
- (p
- PyString_AS_STRING(v
)) >= 5);
836 if (c
== quote
|| c
== '\\')
837 *p
++ = '\\', *p
++ = c
;
839 *p
++ = '\\', *p
++ = 't';
841 *p
++ = '\\', *p
++ = 'n';
843 *p
++ = '\\', *p
++ = 'r';
844 else if (c
< ' ' || c
>= 0x7f) {
845 /* For performance, we don't want to call
846 PyOS_snprintf here (extra layers of
848 sprintf(p
, "\\x%02x", c
& 0xff);
854 assert(newsize
- (p
- PyString_AS_STRING(v
)) >= 1);
858 &v
, (int) (p
- PyString_AS_STRING(v
)));
864 string_repr(PyObject
*op
)
866 return PyString_Repr(op
, 1);
870 string_str(PyObject
*s
)
872 assert(PyString_Check(s
));
873 if (PyString_CheckExact(s
)) {
878 /* Subtype -- return genuine string with the same value. */
879 PyStringObject
*t
= (PyStringObject
*) s
;
880 return PyString_FromStringAndSize(t
->ob_sval
, t
->ob_size
);
885 string_length(PyStringObject
*a
)
891 string_concat(register PyStringObject
*a
, register PyObject
*bb
)
893 register unsigned int size
;
894 register PyStringObject
*op
;
895 if (!PyString_Check(bb
)) {
896 #ifdef Py_USING_UNICODE
897 if (PyUnicode_Check(bb
))
898 return PyUnicode_Concat((PyObject
*)a
, bb
);
900 PyErr_Format(PyExc_TypeError
,
901 "cannot concatenate 'str' and '%.200s' objects",
902 bb
->ob_type
->tp_name
);
905 #define b ((PyStringObject *)bb)
906 /* Optimize cases with empty left or right operand */
907 if ((a
->ob_size
== 0 || b
->ob_size
== 0) &&
908 PyString_CheckExact(a
) && PyString_CheckExact(b
)) {
909 if (a
->ob_size
== 0) {
914 return (PyObject
*)a
;
916 size
= a
->ob_size
+ b
->ob_size
;
917 /* Inline PyObject_NewVar */
918 op
= (PyStringObject
*)
919 PyObject_MALLOC(sizeof(PyStringObject
) + size
* sizeof(char));
921 return PyErr_NoMemory();
922 PyObject_INIT_VAR(op
, &PyString_Type
, size
);
924 op
->ob_sstate
= SSTATE_NOT_INTERNED
;
925 memcpy(op
->ob_sval
, a
->ob_sval
, (int) a
->ob_size
);
926 memcpy(op
->ob_sval
+ a
->ob_size
, b
->ob_sval
, (int) b
->ob_size
);
927 op
->ob_sval
[size
] = '\0';
928 return (PyObject
*) op
;
933 string_repeat(register PyStringObject
*a
, register int n
)
937 register PyStringObject
*op
;
941 /* watch out for overflows: the size can overflow int,
942 * and the # of bytes needed can overflow size_t
944 size
= a
->ob_size
* n
;
945 if (n
&& size
/ n
!= a
->ob_size
) {
946 PyErr_SetString(PyExc_OverflowError
,
947 "repeated string is too long");
950 if (size
== a
->ob_size
&& PyString_CheckExact(a
)) {
952 return (PyObject
*)a
;
954 nbytes
= size
* sizeof(char);
955 if (nbytes
/ sizeof(char) != (size_t)size
||
956 nbytes
+ sizeof(PyStringObject
) <= nbytes
) {
957 PyErr_SetString(PyExc_OverflowError
,
958 "repeated string is too long");
961 op
= (PyStringObject
*)
962 PyObject_MALLOC(sizeof(PyStringObject
) + nbytes
);
964 return PyErr_NoMemory();
965 PyObject_INIT_VAR(op
, &PyString_Type
, size
);
967 op
->ob_sstate
= SSTATE_NOT_INTERNED
;
968 for (i
= 0; i
< size
; i
+= a
->ob_size
)
969 memcpy(op
->ob_sval
+i
, a
->ob_sval
, (int) a
->ob_size
);
970 op
->ob_sval
[size
] = '\0';
971 return (PyObject
*) op
;
974 /* String slice a[i:j] consists of characters a[i] ... a[j-1] */
977 string_slice(register PyStringObject
*a
, register int i
, register int j
)
978 /* j -- may be negative! */
983 j
= 0; /* Avoid signed/unsigned bug in next line */
986 if (i
== 0 && j
== a
->ob_size
&& PyString_CheckExact(a
)) {
987 /* It's the same as a */
989 return (PyObject
*)a
;
993 return PyString_FromStringAndSize(a
->ob_sval
+ i
, (int) (j
-i
));
997 string_contains(PyObject
*a
, PyObject
*el
)
999 const char *lhs
, *rhs
, *end
;
1002 if (!PyString_CheckExact(el
)) {
1003 #ifdef Py_USING_UNICODE
1004 if (PyUnicode_Check(el
))
1005 return PyUnicode_Contains(a
, el
);
1007 if (!PyString_Check(el
)) {
1008 PyErr_SetString(PyExc_TypeError
,
1009 "'in <string>' requires string as left operand");
1013 size
= PyString_GET_SIZE(el
);
1014 rhs
= PyString_AS_STRING(el
);
1015 lhs
= PyString_AS_STRING(a
);
1017 /* optimize for a single character */
1019 return memchr(lhs
, *rhs
, PyString_GET_SIZE(a
)) != NULL
;
1021 end
= lhs
+ (PyString_GET_SIZE(a
) - size
);
1022 while (lhs
<= end
) {
1023 if (memcmp(lhs
++, rhs
, size
) == 0)
1031 string_item(PyStringObject
*a
, register int i
)
1035 if (i
< 0 || i
>= a
->ob_size
) {
1036 PyErr_SetString(PyExc_IndexError
, "string index out of range");
1039 pchar
= a
->ob_sval
+ i
;
1040 v
= (PyObject
*)characters
[*pchar
& UCHAR_MAX
];
1042 v
= PyString_FromStringAndSize(pchar
, 1);
1053 string_richcompare(PyStringObject
*a
, PyStringObject
*b
, int op
)
1060 /* Make sure both arguments are strings. */
1061 if (!(PyString_Check(a
) && PyString_Check(b
))) {
1062 result
= Py_NotImplemented
;
1067 case Py_EQ
:case Py_LE
:case Py_GE
:
1070 case Py_NE
:case Py_LT
:case Py_GT
:
1076 /* Supporting Py_NE here as well does not save
1077 much time, since Py_NE is rarely used. */
1078 if (a
->ob_size
== b
->ob_size
1079 && (a
->ob_sval
[0] == b
->ob_sval
[0]
1080 && memcmp(a
->ob_sval
, b
->ob_sval
,
1081 a
->ob_size
) == 0)) {
1088 len_a
= a
->ob_size
; len_b
= b
->ob_size
;
1089 min_len
= (len_a
< len_b
) ? len_a
: len_b
;
1091 c
= Py_CHARMASK(*a
->ob_sval
) - Py_CHARMASK(*b
->ob_sval
);
1093 c
= memcmp(a
->ob_sval
, b
->ob_sval
, min_len
);
1097 c
= (len_a
< len_b
) ? -1 : (len_a
> len_b
) ? 1 : 0;
1099 case Py_LT
: c
= c
< 0; break;
1100 case Py_LE
: c
= c
<= 0; break;
1101 case Py_EQ
: assert(0); break; /* unreachable */
1102 case Py_NE
: c
= c
!= 0; break;
1103 case Py_GT
: c
= c
> 0; break;
1104 case Py_GE
: c
= c
>= 0; break;
1106 result
= Py_NotImplemented
;
1109 result
= c
? Py_True
: Py_False
;
1116 _PyString_Eq(PyObject
*o1
, PyObject
*o2
)
1118 PyStringObject
*a
, *b
;
1119 a
= (PyStringObject
*)o1
;
1120 b
= (PyStringObject
*)o2
;
1121 return a
->ob_size
== b
->ob_size
1122 && *a
->ob_sval
== *b
->ob_sval
1123 && memcmp(a
->ob_sval
, b
->ob_sval
, a
->ob_size
) == 0;
1127 string_hash(PyStringObject
*a
)
1130 register unsigned char *p
;
1133 if (a
->ob_shash
!= -1)
1136 p
= (unsigned char *) a
->ob_sval
;
1139 x
= (1000003*x
) ^ *p
++;
1148 string_subscript(PyStringObject
* self
, PyObject
* item
)
1150 if (PyInt_Check(item
)) {
1151 long i
= PyInt_AS_LONG(item
);
1153 i
+= PyString_GET_SIZE(self
);
1154 return string_item(self
,i
);
1156 else if (PyLong_Check(item
)) {
1157 long i
= PyLong_AsLong(item
);
1158 if (i
== -1 && PyErr_Occurred())
1161 i
+= PyString_GET_SIZE(self
);
1162 return string_item(self
,i
);
1164 else if (PySlice_Check(item
)) {
1165 int start
, stop
, step
, slicelength
, cur
, i
;
1170 if (PySlice_GetIndicesEx((PySliceObject
*)item
,
1171 PyString_GET_SIZE(self
),
1172 &start
, &stop
, &step
, &slicelength
) < 0) {
1176 if (slicelength
<= 0) {
1177 return PyString_FromStringAndSize("", 0);
1180 source_buf
= PyString_AsString((PyObject
*)self
);
1181 result_buf
= PyMem_Malloc(slicelength
);
1183 for (cur
= start
, i
= 0; i
< slicelength
;
1185 result_buf
[i
] = source_buf
[cur
];
1188 result
= PyString_FromStringAndSize(result_buf
,
1190 PyMem_Free(result_buf
);
1195 PyErr_SetString(PyExc_TypeError
,
1196 "string indices must be integers");
1202 string_buffer_getreadbuf(PyStringObject
*self
, int index
, const void **ptr
)
1205 PyErr_SetString(PyExc_SystemError
,
1206 "accessing non-existent string segment");
1209 *ptr
= (void *)self
->ob_sval
;
1210 return self
->ob_size
;
1214 string_buffer_getwritebuf(PyStringObject
*self
, int index
, const void **ptr
)
1216 PyErr_SetString(PyExc_TypeError
,
1217 "Cannot use string as modifiable buffer");
1222 string_buffer_getsegcount(PyStringObject
*self
, int *lenp
)
1225 *lenp
= self
->ob_size
;
1230 string_buffer_getcharbuf(PyStringObject
*self
, int index
, const char **ptr
)
1233 PyErr_SetString(PyExc_SystemError
,
1234 "accessing non-existent string segment");
1237 *ptr
= self
->ob_sval
;
1238 return self
->ob_size
;
1241 static PySequenceMethods string_as_sequence
= {
1242 (inquiry
)string_length
, /*sq_length*/
1243 (binaryfunc
)string_concat
, /*sq_concat*/
1244 (intargfunc
)string_repeat
, /*sq_repeat*/
1245 (intargfunc
)string_item
, /*sq_item*/
1246 (intintargfunc
)string_slice
, /*sq_slice*/
1249 (objobjproc
)string_contains
/*sq_contains*/
1252 static PyMappingMethods string_as_mapping
= {
1253 (inquiry
)string_length
,
1254 (binaryfunc
)string_subscript
,
1258 static PyBufferProcs string_as_buffer
= {
1259 (getreadbufferproc
)string_buffer_getreadbuf
,
1260 (getwritebufferproc
)string_buffer_getwritebuf
,
1261 (getsegcountproc
)string_buffer_getsegcount
,
1262 (getcharbufferproc
)string_buffer_getcharbuf
,
1268 #define RIGHTSTRIP 1
1271 /* Arrays indexed by above */
1272 static const char *stripformat
[] = {"|O:lstrip", "|O:rstrip", "|O:strip"};
1274 #define STRIPNAME(i) (stripformat[i]+3)
1278 split_whitespace(const char *s
, int len
, int maxsplit
)
1282 PyObject
*list
= PyList_New(0);
1287 for (i
= j
= 0; i
< len
; ) {
1288 while (i
< len
&& isspace(Py_CHARMASK(s
[i
])))
1291 while (i
< len
&& !isspace(Py_CHARMASK(s
[i
])))
1294 if (maxsplit
-- <= 0)
1296 item
= PyString_FromStringAndSize(s
+j
, (int)(i
-j
));
1299 err
= PyList_Append(list
, item
);
1303 while (i
< len
&& isspace(Py_CHARMASK(s
[i
])))
1309 item
= PyString_FromStringAndSize(s
+j
, (int)(len
- j
));
1312 err
= PyList_Append(list
, item
);
1324 PyDoc_STRVAR(split__doc__
,
1325 "S.split([sep [,maxsplit]]) -> list of strings\n\
1327 Return a list of the words in the string S, using sep as the\n\
1328 delimiter string. If maxsplit is given, at most maxsplit\n\
1329 splits are done. If sep is not specified or is None, any\n\
1330 whitespace string is a separator.");
1333 string_split(PyStringObject
*self
, PyObject
*args
)
1335 int len
= PyString_GET_SIZE(self
), n
, i
, j
, err
;
1337 const char *s
= PyString_AS_STRING(self
), *sub
;
1338 PyObject
*list
, *item
, *subobj
= Py_None
;
1340 if (!PyArg_ParseTuple(args
, "|Oi:split", &subobj
, &maxsplit
))
1344 if (subobj
== Py_None
)
1345 return split_whitespace(s
, len
, maxsplit
);
1346 if (PyString_Check(subobj
)) {
1347 sub
= PyString_AS_STRING(subobj
);
1348 n
= PyString_GET_SIZE(subobj
);
1350 #ifdef Py_USING_UNICODE
1351 else if (PyUnicode_Check(subobj
))
1352 return PyUnicode_Split((PyObject
*)self
, subobj
, maxsplit
);
1354 else if (PyObject_AsCharBuffer(subobj
, &sub
, &n
))
1357 PyErr_SetString(PyExc_ValueError
, "empty separator");
1361 list
= PyList_New(0);
1366 while (i
+n
<= len
) {
1367 if (s
[i
] == sub
[0] && memcmp(s
+i
, sub
, n
) == 0) {
1368 if (maxsplit
-- <= 0)
1370 item
= PyString_FromStringAndSize(s
+j
, (int)(i
-j
));
1373 err
= PyList_Append(list
, item
);
1382 item
= PyString_FromStringAndSize(s
+j
, (int)(len
-j
));
1385 err
= PyList_Append(list
, item
);
1398 PyDoc_STRVAR(join__doc__
,
1399 "S.join(sequence) -> string\n\
1401 Return a string which is the concatenation of the strings in the\n\
1402 sequence. The separator between elements is S.");
1405 string_join(PyStringObject
*self
, PyObject
*orig
)
1407 char *sep
= PyString_AS_STRING(self
);
1408 const int seplen
= PyString_GET_SIZE(self
);
1409 PyObject
*res
= NULL
;
1414 PyObject
*seq
, *item
;
1416 seq
= PySequence_Fast(orig
, "");
1418 if (PyErr_ExceptionMatches(PyExc_TypeError
))
1419 PyErr_Format(PyExc_TypeError
,
1420 "sequence expected, %.80s found",
1421 orig
->ob_type
->tp_name
);
1425 seqlen
= PySequence_Size(seq
);
1428 return PyString_FromString("");
1431 item
= PySequence_Fast_GET_ITEM(seq
, 0);
1432 if (!PyString_Check(item
) && !PyUnicode_Check(item
)) {
1433 PyErr_Format(PyExc_TypeError
,
1434 "sequence item 0: expected string,"
1436 item
->ob_type
->tp_name
);
1445 /* There are at least two things to join. Do a pre-pass to figure out
1446 * the total amount of space we'll need (sz), see whether any argument
1447 * is absurd, and defer to the Unicode join if appropriate.
1449 for (i
= 0; i
< seqlen
; i
++) {
1450 const size_t old_sz
= sz
;
1451 item
= PySequence_Fast_GET_ITEM(seq
, i
);
1452 if (!PyString_Check(item
)){
1453 #ifdef Py_USING_UNICODE
1454 if (PyUnicode_Check(item
)) {
1455 /* Defer to Unicode join.
1456 * CAUTION: There's no gurantee that the
1457 * original sequence can be iterated over
1458 * again, so we must pass seq here.
1461 result
= PyUnicode_Join((PyObject
*)self
, seq
);
1466 PyErr_Format(PyExc_TypeError
,
1467 "sequence item %i: expected string,"
1469 i
, item
->ob_type
->tp_name
);
1473 sz
+= PyString_GET_SIZE(item
);
1476 if (sz
< old_sz
|| sz
> INT_MAX
) {
1477 PyErr_SetString(PyExc_OverflowError
,
1478 "join() is too long for a Python string");
1484 /* Allocate result space. */
1485 res
= PyString_FromStringAndSize((char*)NULL
, (int)sz
);
1491 /* Catenate everything. */
1492 p
= PyString_AS_STRING(res
);
1493 for (i
= 0; i
< seqlen
; ++i
) {
1495 item
= PySequence_Fast_GET_ITEM(seq
, i
);
1496 n
= PyString_GET_SIZE(item
);
1497 memcpy(p
, PyString_AS_STRING(item
), n
);
1499 if (i
< seqlen
- 1) {
1500 memcpy(p
, sep
, seplen
);
1510 _PyString_Join(PyObject
*sep
, PyObject
*x
)
1512 assert(sep
!= NULL
&& PyString_Check(sep
));
1514 return string_join((PyStringObject
*)sep
, x
);
1518 string_adjust_indices(int *start
, int *end
, int len
)
1533 string_find_internal(PyStringObject
*self
, PyObject
*args
, int dir
)
1535 const char *s
= PyString_AS_STRING(self
), *sub
;
1536 int len
= PyString_GET_SIZE(self
);
1537 int n
, i
= 0, last
= INT_MAX
;
1540 if (!PyArg_ParseTuple(args
, "O|O&O&:find/rfind/index/rindex",
1541 &subobj
, _PyEval_SliceIndex
, &i
, _PyEval_SliceIndex
, &last
))
1543 if (PyString_Check(subobj
)) {
1544 sub
= PyString_AS_STRING(subobj
);
1545 n
= PyString_GET_SIZE(subobj
);
1547 #ifdef Py_USING_UNICODE
1548 else if (PyUnicode_Check(subobj
))
1549 return PyUnicode_Find((PyObject
*)self
, subobj
, i
, last
, dir
);
1551 else if (PyObject_AsCharBuffer(subobj
, &sub
, &n
))
1554 string_adjust_indices(&i
, &last
, len
);
1557 if (n
== 0 && i
<= last
)
1560 for (; i
<= last
; ++i
)
1561 if (s
[i
] == sub
[0] && memcmp(&s
[i
], sub
, n
) == 0)
1567 if (n
== 0 && i
<= last
)
1569 for (j
= last
-n
; j
>= i
; --j
)
1570 if (s
[j
] == sub
[0] && memcmp(&s
[j
], sub
, n
) == 0)
1578 PyDoc_STRVAR(find__doc__
,
1579 "S.find(sub [,start [,end]]) -> int\n\
1581 Return the lowest index in S where substring sub is found,\n\
1582 such that sub is contained within s[start,end]. Optional\n\
1583 arguments start and end are interpreted as in slice notation.\n\
1585 Return -1 on failure.");
1588 string_find(PyStringObject
*self
, PyObject
*args
)
1590 long result
= string_find_internal(self
, args
, +1);
1593 return PyInt_FromLong(result
);
1597 PyDoc_STRVAR(index__doc__
,
1598 "S.index(sub [,start [,end]]) -> int\n\
1600 Like S.find() but raise ValueError when the substring is not found.");
1603 string_index(PyStringObject
*self
, PyObject
*args
)
1605 long result
= string_find_internal(self
, args
, +1);
1609 PyErr_SetString(PyExc_ValueError
,
1610 "substring not found in string.index");
1613 return PyInt_FromLong(result
);
1617 PyDoc_STRVAR(rfind__doc__
,
1618 "S.rfind(sub [,start [,end]]) -> int\n\
1620 Return the highest index in S where substring sub is found,\n\
1621 such that sub is contained within s[start,end]. Optional\n\
1622 arguments start and end are interpreted as in slice notation.\n\
1624 Return -1 on failure.");
1627 string_rfind(PyStringObject
*self
, PyObject
*args
)
1629 long result
= string_find_internal(self
, args
, -1);
1632 return PyInt_FromLong(result
);
1636 PyDoc_STRVAR(rindex__doc__
,
1637 "S.rindex(sub [,start [,end]]) -> int\n\
1639 Like S.rfind() but raise ValueError when the substring is not found.");
1642 string_rindex(PyStringObject
*self
, PyObject
*args
)
1644 long result
= string_find_internal(self
, args
, -1);
1648 PyErr_SetString(PyExc_ValueError
,
1649 "substring not found in string.rindex");
1652 return PyInt_FromLong(result
);
1657 do_xstrip(PyStringObject
*self
, int striptype
, PyObject
*sepobj
)
1659 char *s
= PyString_AS_STRING(self
);
1660 int len
= PyString_GET_SIZE(self
);
1661 char *sep
= PyString_AS_STRING(sepobj
);
1662 int seplen
= PyString_GET_SIZE(sepobj
);
1666 if (striptype
!= RIGHTSTRIP
) {
1667 while (i
< len
&& memchr(sep
, Py_CHARMASK(s
[i
]), seplen
)) {
1673 if (striptype
!= LEFTSTRIP
) {
1676 } while (j
>= i
&& memchr(sep
, Py_CHARMASK(s
[j
]), seplen
));
1680 if (i
== 0 && j
== len
&& PyString_CheckExact(self
)) {
1682 return (PyObject
*)self
;
1685 return PyString_FromStringAndSize(s
+i
, j
-i
);
1690 do_strip(PyStringObject
*self
, int striptype
)
1692 char *s
= PyString_AS_STRING(self
);
1693 int len
= PyString_GET_SIZE(self
), i
, j
;
1696 if (striptype
!= RIGHTSTRIP
) {
1697 while (i
< len
&& isspace(Py_CHARMASK(s
[i
]))) {
1703 if (striptype
!= LEFTSTRIP
) {
1706 } while (j
>= i
&& isspace(Py_CHARMASK(s
[j
])));
1710 if (i
== 0 && j
== len
&& PyString_CheckExact(self
)) {
1712 return (PyObject
*)self
;
1715 return PyString_FromStringAndSize(s
+i
, j
-i
);
1720 do_argstrip(PyStringObject
*self
, int striptype
, PyObject
*args
)
1722 PyObject
*sep
= NULL
;
1724 if (!PyArg_ParseTuple(args
, (char *)stripformat
[striptype
], &sep
))
1727 if (sep
!= NULL
&& sep
!= Py_None
) {
1728 if (PyString_Check(sep
))
1729 return do_xstrip(self
, striptype
, sep
);
1730 #ifdef Py_USING_UNICODE
1731 else if (PyUnicode_Check(sep
)) {
1732 PyObject
*uniself
= PyUnicode_FromObject((PyObject
*)self
);
1736 res
= _PyUnicode_XStrip((PyUnicodeObject
*)uniself
,
1743 PyErr_Format(PyExc_TypeError
,
1744 #ifdef Py_USING_UNICODE
1745 "%s arg must be None, str or unicode",
1747 "%s arg must be None or str",
1749 STRIPNAME(striptype
));
1752 return do_xstrip(self
, striptype
, sep
);
1755 return do_strip(self
, striptype
);
1759 PyDoc_STRVAR(strip__doc__
,
1760 "S.strip([sep]) -> string or unicode\n\
1762 Return a copy of the string S with leading and trailing\n\
1763 whitespace removed.\n\
1764 If sep is given and not None, remove characters in sep instead.\n\
1765 If sep is unicode, S will be converted to unicode before stripping");
1768 string_strip(PyStringObject
*self
, PyObject
*args
)
1770 if (PyTuple_GET_SIZE(args
) == 0)
1771 return do_strip(self
, BOTHSTRIP
); /* Common case */
1773 return do_argstrip(self
, BOTHSTRIP
, args
);
1777 PyDoc_STRVAR(lstrip__doc__
,
1778 "S.lstrip([sep]) -> string or unicode\n\
1780 Return a copy of the string S with leading whitespace removed.\n\
1781 If sep is given and not None, remove characters in sep instead.\n\
1782 If sep is unicode, S will be converted to unicode before stripping");
1785 string_lstrip(PyStringObject
*self
, PyObject
*args
)
1787 if (PyTuple_GET_SIZE(args
) == 0)
1788 return do_strip(self
, LEFTSTRIP
); /* Common case */
1790 return do_argstrip(self
, LEFTSTRIP
, args
);
1794 PyDoc_STRVAR(rstrip__doc__
,
1795 "S.rstrip([sep]) -> string or unicode\n\
1797 Return a copy of the string S with trailing whitespace removed.\n\
1798 If sep is given and not None, remove characters in sep instead.\n\
1799 If sep is unicode, S will be converted to unicode before stripping");
1802 string_rstrip(PyStringObject
*self
, PyObject
*args
)
1804 if (PyTuple_GET_SIZE(args
) == 0)
1805 return do_strip(self
, RIGHTSTRIP
); /* Common case */
1807 return do_argstrip(self
, RIGHTSTRIP
, args
);
1811 PyDoc_STRVAR(lower__doc__
,
1812 "S.lower() -> string\n\
1814 Return a copy of the string S converted to lowercase.");
1817 string_lower(PyStringObject
*self
)
1819 char *s
= PyString_AS_STRING(self
), *s_new
;
1820 int i
, n
= PyString_GET_SIZE(self
);
1823 new = PyString_FromStringAndSize(NULL
, n
);
1826 s_new
= PyString_AsString(new);
1827 for (i
= 0; i
< n
; i
++) {
1828 int c
= Py_CHARMASK(*s
++);
1830 *s_new
= tolower(c
);
1839 PyDoc_STRVAR(upper__doc__
,
1840 "S.upper() -> string\n\
1842 Return a copy of the string S converted to uppercase.");
1845 string_upper(PyStringObject
*self
)
1847 char *s
= PyString_AS_STRING(self
), *s_new
;
1848 int i
, n
= PyString_GET_SIZE(self
);
1851 new = PyString_FromStringAndSize(NULL
, n
);
1854 s_new
= PyString_AsString(new);
1855 for (i
= 0; i
< n
; i
++) {
1856 int c
= Py_CHARMASK(*s
++);
1858 *s_new
= toupper(c
);
1867 PyDoc_STRVAR(title__doc__
,
1868 "S.title() -> string\n\
1870 Return a titlecased version of S, i.e. words start with uppercase\n\
1871 characters, all remaining cased characters have lowercase.");
1874 string_title(PyStringObject
*self
)
1876 char *s
= PyString_AS_STRING(self
), *s_new
;
1877 int i
, n
= PyString_GET_SIZE(self
);
1878 int previous_is_cased
= 0;
1881 new = PyString_FromStringAndSize(NULL
, n
);
1884 s_new
= PyString_AsString(new);
1885 for (i
= 0; i
< n
; i
++) {
1886 int c
= Py_CHARMASK(*s
++);
1888 if (!previous_is_cased
)
1890 previous_is_cased
= 1;
1891 } else if (isupper(c
)) {
1892 if (previous_is_cased
)
1894 previous_is_cased
= 1;
1896 previous_is_cased
= 0;
1902 PyDoc_STRVAR(capitalize__doc__
,
1903 "S.capitalize() -> string\n\
1905 Return a copy of the string S with only its first character\n\
1909 string_capitalize(PyStringObject
*self
)
1911 char *s
= PyString_AS_STRING(self
), *s_new
;
1912 int i
, n
= PyString_GET_SIZE(self
);
1915 new = PyString_FromStringAndSize(NULL
, n
);
1918 s_new
= PyString_AsString(new);
1920 int c
= Py_CHARMASK(*s
++);
1922 *s_new
= toupper(c
);
1927 for (i
= 1; i
< n
; i
++) {
1928 int c
= Py_CHARMASK(*s
++);
1930 *s_new
= tolower(c
);
1939 PyDoc_STRVAR(count__doc__
,
1940 "S.count(sub[, start[, end]]) -> int\n\
1942 Return the number of occurrences of substring sub in string\n\
1943 S[start:end]. Optional arguments start and end are\n\
1944 interpreted as in slice notation.");
1947 string_count(PyStringObject
*self
, PyObject
*args
)
1949 const char *s
= PyString_AS_STRING(self
), *sub
;
1950 int len
= PyString_GET_SIZE(self
), n
;
1951 int i
= 0, last
= INT_MAX
;
1955 if (!PyArg_ParseTuple(args
, "O|O&O&:count", &subobj
,
1956 _PyEval_SliceIndex
, &i
, _PyEval_SliceIndex
, &last
))
1959 if (PyString_Check(subobj
)) {
1960 sub
= PyString_AS_STRING(subobj
);
1961 n
= PyString_GET_SIZE(subobj
);
1963 #ifdef Py_USING_UNICODE
1964 else if (PyUnicode_Check(subobj
)) {
1966 count
= PyUnicode_Count((PyObject
*)self
, subobj
, i
, last
);
1970 return PyInt_FromLong((long) count
);
1973 else if (PyObject_AsCharBuffer(subobj
, &sub
, &n
))
1976 string_adjust_indices(&i
, &last
, len
);
1980 return PyInt_FromLong((long) (m
-i
));
1984 if (!memcmp(s
+i
, sub
, n
)) {
1991 return PyInt_FromLong((long) r
);
1995 PyDoc_STRVAR(swapcase__doc__
,
1996 "S.swapcase() -> string\n\
1998 Return a copy of the string S with uppercase characters\n\
1999 converted to lowercase and vice versa.");
2002 string_swapcase(PyStringObject
*self
)
2004 char *s
= PyString_AS_STRING(self
), *s_new
;
2005 int i
, n
= PyString_GET_SIZE(self
);
2008 new = PyString_FromStringAndSize(NULL
, n
);
2011 s_new
= PyString_AsString(new);
2012 for (i
= 0; i
< n
; i
++) {
2013 int c
= Py_CHARMASK(*s
++);
2015 *s_new
= toupper(c
);
2017 else if (isupper(c
)) {
2018 *s_new
= tolower(c
);
2028 PyDoc_STRVAR(translate__doc__
,
2029 "S.translate(table [,deletechars]) -> string\n\
2031 Return a copy of the string S, where all characters occurring\n\
2032 in the optional argument deletechars are removed, and the\n\
2033 remaining characters have been mapped through the given\n\
2034 translation table, which must be a string of length 256.");
2037 string_translate(PyStringObject
*self
, PyObject
*args
)
2039 register char *input
, *output
;
2040 register const char *table
;
2041 register int i
, c
, changed
= 0;
2042 PyObject
*input_obj
= (PyObject
*)self
;
2043 const char *table1
, *output_start
, *del_table
=NULL
;
2044 int inlen
, tablen
, dellen
= 0;
2046 int trans_table
[256];
2047 PyObject
*tableobj
, *delobj
= NULL
;
2049 if (!PyArg_UnpackTuple(args
, "translate", 1, 2,
2050 &tableobj
, &delobj
))
2053 if (PyString_Check(tableobj
)) {
2054 table1
= PyString_AS_STRING(tableobj
);
2055 tablen
= PyString_GET_SIZE(tableobj
);
2057 #ifdef Py_USING_UNICODE
2058 else if (PyUnicode_Check(tableobj
)) {
2059 /* Unicode .translate() does not support the deletechars
2060 parameter; instead a mapping to None will cause characters
2062 if (delobj
!= NULL
) {
2063 PyErr_SetString(PyExc_TypeError
,
2064 "deletions are implemented differently for unicode");
2067 return PyUnicode_Translate((PyObject
*)self
, tableobj
, NULL
);
2070 else if (PyObject_AsCharBuffer(tableobj
, &table1
, &tablen
))
2073 if (tablen
!= 256) {
2074 PyErr_SetString(PyExc_ValueError
,
2075 "translation table must be 256 characters long");
2079 if (delobj
!= NULL
) {
2080 if (PyString_Check(delobj
)) {
2081 del_table
= PyString_AS_STRING(delobj
);
2082 dellen
= PyString_GET_SIZE(delobj
);
2084 #ifdef Py_USING_UNICODE
2085 else if (PyUnicode_Check(delobj
)) {
2086 PyErr_SetString(PyExc_TypeError
,
2087 "deletions are implemented differently for unicode");
2091 else if (PyObject_AsCharBuffer(delobj
, &del_table
, &dellen
))
2100 inlen
= PyString_Size(input_obj
);
2101 result
= PyString_FromStringAndSize((char *)NULL
, inlen
);
2104 output_start
= output
= PyString_AsString(result
);
2105 input
= PyString_AsString(input_obj
);
2108 /* If no deletions are required, use faster code */
2109 for (i
= inlen
; --i
>= 0; ) {
2110 c
= Py_CHARMASK(*input
++);
2111 if (Py_CHARMASK((*output
++ = table
[c
])) != c
)
2114 if (changed
|| !PyString_CheckExact(input_obj
))
2117 Py_INCREF(input_obj
);
2121 for (i
= 0; i
< 256; i
++)
2122 trans_table
[i
] = Py_CHARMASK(table
[i
]);
2124 for (i
= 0; i
< dellen
; i
++)
2125 trans_table
[(int) Py_CHARMASK(del_table
[i
])] = -1;
2127 for (i
= inlen
; --i
>= 0; ) {
2128 c
= Py_CHARMASK(*input
++);
2129 if (trans_table
[c
] != -1)
2130 if (Py_CHARMASK(*output
++ = (char)trans_table
[c
]) == c
)
2134 if (!changed
&& PyString_CheckExact(input_obj
)) {
2136 Py_INCREF(input_obj
);
2139 /* Fix the size of the resulting string */
2141 _PyString_Resize(&result
, output
- output_start
);
2146 /* What follows is used for implementing replace(). Perry Stoll. */
2151 strstr replacement for arbitrary blocks of memory.
2153 Locates the first occurrence in the memory pointed to by MEM of the
2154 contents of memory pointed to by PAT. Returns the index into MEM if
2155 found, or -1 if not found. If len of PAT is greater than length of
2156 MEM, the function returns -1.
2159 mymemfind(const char *mem
, int len
, const char *pat
, int pat_len
)
2163 /* pattern can not occur in the last pat_len-1 chars */
2166 for (ii
= 0; ii
<= len
; ii
++) {
2167 if (mem
[ii
] == pat
[0] && memcmp(&mem
[ii
], pat
, pat_len
) == 0) {
2177 Return the number of distinct times PAT is found in MEM.
2178 meaning mem=1111 and pat==11 returns 2.
2179 mem=11111 and pat==11 also return 2.
2182 mymemcnt(const char *mem
, int len
, const char *pat
, int pat_len
)
2184 register int offset
= 0;
2188 offset
= mymemfind(mem
, len
, pat
, pat_len
);
2191 mem
+= offset
+ pat_len
;
2192 len
-= offset
+ pat_len
;
2201 Return a string in which all occurrences of PAT in memory STR are
2204 If length of PAT is less than length of STR or there are no occurrences
2205 of PAT in STR, then the original string is returned. Otherwise, a new
2206 string is allocated here and returned.
2208 on return, out_len is:
2209 the length of output string, or
2210 -1 if the input string is returned, or
2211 unchanged if an error occurs (no memory).
2214 the new string allocated locally, or
2215 NULL if an error occurred.
2218 mymemreplace(const char *str
, int len
, /* input string */
2219 const char *pat
, int pat_len
, /* pattern string to find */
2220 const char *sub
, int sub_len
, /* substitution string */
2221 int count
, /* number of replacements */
2226 int nfound
, offset
, new_len
;
2228 if (len
== 0 || (pat_len
== 0 && sub_len
== 0) || pat_len
> len
)
2231 /* find length of output string */
2232 nfound
= (pat_len
> 0) ? mymemcnt(str
, len
, pat
, pat_len
) : len
+ 1;
2235 else if (nfound
> count
)
2240 new_len
= len
+ nfound
*(sub_len
- pat_len
);
2242 /* Have to allocate something for the caller to free(). */
2243 out_s
= (char *)PyMem_MALLOC(1);
2249 assert(new_len
> 0);
2250 new_s
= (char *)PyMem_MALLOC(new_len
);
2256 for (; nfound
> 0; --nfound
) {
2257 /* find index of next instance of pattern */
2258 offset
= mymemfind(str
, len
, pat
, pat_len
);
2262 /* copy non matching part of input string */
2263 memcpy(new_s
, str
, offset
);
2264 str
+= offset
+ pat_len
;
2265 len
-= offset
+ pat_len
;
2267 /* copy substitute into the output string */
2269 memcpy(new_s
, sub
, sub_len
);
2272 /* copy any remaining values into output string */
2274 memcpy(new_s
, str
, len
);
2277 for (;;++str
, --len
) {
2278 memcpy(new_s
, sub
, sub_len
);
2280 if (--nfound
<= 0) {
2281 memcpy(new_s
, str
, len
);
2293 return (char *)str
; /* cast away const */
2297 PyDoc_STRVAR(replace__doc__
,
2298 "S.replace (old, new[, maxsplit]) -> string\n\
2300 Return a copy of string S with all occurrences of substring\n\
2301 old replaced by new. If the optional argument maxsplit is\n\
2302 given, only the first maxsplit occurrences are replaced.");
2305 string_replace(PyStringObject
*self
, PyObject
*args
)
2307 const char *str
= PyString_AS_STRING(self
), *sub
, *repl
;
2309 const int len
= PyString_GET_SIZE(self
);
2310 int sub_len
, repl_len
, out_len
;
2313 PyObject
*subobj
, *replobj
;
2315 if (!PyArg_ParseTuple(args
, "OO|i:replace",
2316 &subobj
, &replobj
, &count
))
2319 if (PyString_Check(subobj
)) {
2320 sub
= PyString_AS_STRING(subobj
);
2321 sub_len
= PyString_GET_SIZE(subobj
);
2323 #ifdef Py_USING_UNICODE
2324 else if (PyUnicode_Check(subobj
))
2325 return PyUnicode_Replace((PyObject
*)self
,
2326 subobj
, replobj
, count
);
2328 else if (PyObject_AsCharBuffer(subobj
, &sub
, &sub_len
))
2331 if (PyString_Check(replobj
)) {
2332 repl
= PyString_AS_STRING(replobj
);
2333 repl_len
= PyString_GET_SIZE(replobj
);
2335 #ifdef Py_USING_UNICODE
2336 else if (PyUnicode_Check(replobj
))
2337 return PyUnicode_Replace((PyObject
*)self
,
2338 subobj
, replobj
, count
);
2340 else if (PyObject_AsCharBuffer(replobj
, &repl
, &repl_len
))
2343 new_s
= mymemreplace(str
,len
,sub
,sub_len
,repl
,repl_len
,count
,&out_len
);
2344 if (new_s
== NULL
) {
2348 if (out_len
== -1) {
2349 if (PyString_CheckExact(self
)) {
2350 /* we're returning another reference to self */
2351 new = (PyObject
*)self
;
2355 new = PyString_FromStringAndSize(str
, len
);
2361 new = PyString_FromStringAndSize(new_s
, out_len
);
2368 PyDoc_STRVAR(startswith__doc__
,
2369 "S.startswith(prefix[, start[, end]]) -> bool\n\
2371 Return True if S starts with the specified prefix, False otherwise. With\n\
2372 optional start, test S beginning at that position. With optional end, stop\n\
2373 comparing S at that position.");
2376 string_startswith(PyStringObject
*self
, PyObject
*args
)
2378 const char* str
= PyString_AS_STRING(self
);
2379 int len
= PyString_GET_SIZE(self
);
2386 if (!PyArg_ParseTuple(args
, "O|O&O&:startswith", &subobj
,
2387 _PyEval_SliceIndex
, &start
, _PyEval_SliceIndex
, &end
))
2389 if (PyString_Check(subobj
)) {
2390 prefix
= PyString_AS_STRING(subobj
);
2391 plen
= PyString_GET_SIZE(subobj
);
2393 #ifdef Py_USING_UNICODE
2394 else if (PyUnicode_Check(subobj
)) {
2396 rc
= PyUnicode_Tailmatch((PyObject
*)self
,
2397 subobj
, start
, end
, -1);
2401 return PyBool_FromLong((long) rc
);
2404 else if (PyObject_AsCharBuffer(subobj
, &prefix
, &plen
))
2407 string_adjust_indices(&start
, &end
, len
);
2409 if (start
+plen
> len
)
2410 return PyBool_FromLong(0);
2412 if (end
-start
>= plen
)
2413 return PyBool_FromLong(!memcmp(str
+start
, prefix
, plen
));
2415 return PyBool_FromLong(0);
2419 PyDoc_STRVAR(endswith__doc__
,
2420 "S.endswith(suffix[, start[, end]]) -> bool\n\
2422 Return True if S ends with the specified suffix, False otherwise. With\n\
2423 optional start, test S beginning at that position. With optional end, stop\n\
2424 comparing S at that position.");
2427 string_endswith(PyStringObject
*self
, PyObject
*args
)
2429 const char* str
= PyString_AS_STRING(self
);
2430 int len
= PyString_GET_SIZE(self
);
2437 if (!PyArg_ParseTuple(args
, "O|O&O&:endswith", &subobj
,
2438 _PyEval_SliceIndex
, &start
, _PyEval_SliceIndex
, &end
))
2440 if (PyString_Check(subobj
)) {
2441 suffix
= PyString_AS_STRING(subobj
);
2442 slen
= PyString_GET_SIZE(subobj
);
2444 #ifdef Py_USING_UNICODE
2445 else if (PyUnicode_Check(subobj
)) {
2447 rc
= PyUnicode_Tailmatch((PyObject
*)self
,
2448 subobj
, start
, end
, +1);
2452 return PyBool_FromLong((long) rc
);
2455 else if (PyObject_AsCharBuffer(subobj
, &suffix
, &slen
))
2458 string_adjust_indices(&start
, &end
, len
);
2460 if (end
-start
< slen
|| start
> len
)
2461 return PyBool_FromLong(0);
2463 if (end
-slen
> start
)
2465 if (end
-start
>= slen
)
2466 return PyBool_FromLong(!memcmp(str
+start
, suffix
, slen
));
2468 return PyBool_FromLong(0);
2472 PyDoc_STRVAR(encode__doc__
,
2473 "S.encode([encoding[,errors]]) -> object\n\
2475 Encodes S using the codec registered for encoding. encoding defaults\n\
2476 to the default encoding. errors may be given to set a different error\n\
2477 handling scheme. Default is 'strict' meaning that encoding errors raise\n\
2478 a UnicodeEncodeError. Other possible values are 'ignore', 'replace' and\n\
2479 'xmlcharrefreplace' as well as any other name registered with\n\
2480 codecs.register_error that is able to handle UnicodeEncodeErrors.");
2483 string_encode(PyStringObject
*self
, PyObject
*args
)
2485 char *encoding
= NULL
;
2486 char *errors
= NULL
;
2487 if (!PyArg_ParseTuple(args
, "|ss:encode", &encoding
, &errors
))
2489 return PyString_AsEncodedObject((PyObject
*)self
, encoding
, errors
);
2493 PyDoc_STRVAR(decode__doc__
,
2494 "S.decode([encoding[,errors]]) -> object\n\
2496 Decodes S using the codec registered for encoding. encoding defaults\n\
2497 to the default encoding. errors may be given to set a different error\n\
2498 handling scheme. Default is 'strict' meaning that encoding errors raise\n\
2499 a UnicodeDecodeError. Other possible values are 'ignore' and 'replace'\n\
2500 as well as any other name registerd with codecs.register_error that is\n\
2501 able to handle UnicodeDecodeErrors.");
2504 string_decode(PyStringObject
*self
, PyObject
*args
)
2506 char *encoding
= NULL
;
2507 char *errors
= NULL
;
2508 if (!PyArg_ParseTuple(args
, "|ss:decode", &encoding
, &errors
))
2510 return PyString_AsDecodedObject((PyObject
*)self
, encoding
, errors
);
2514 PyDoc_STRVAR(expandtabs__doc__
,
2515 "S.expandtabs([tabsize]) -> string\n\
2517 Return a copy of S where all tab characters are expanded using spaces.\n\
2518 If tabsize is not given, a tab size of 8 characters is assumed.");
2521 string_expandtabs(PyStringObject
*self
, PyObject
*args
)
2529 if (!PyArg_ParseTuple(args
, "|i:expandtabs", &tabsize
))
2532 /* First pass: determine size of output string */
2534 e
= PyString_AS_STRING(self
) + PyString_GET_SIZE(self
);
2535 for (p
= PyString_AS_STRING(self
); p
< e
; p
++)
2538 j
+= tabsize
- (j
% tabsize
);
2542 if (*p
== '\n' || *p
== '\r') {
2548 /* Second pass: create output string and fill it */
2549 u
= PyString_FromStringAndSize(NULL
, i
+ j
);
2554 q
= PyString_AS_STRING(u
);
2556 for (p
= PyString_AS_STRING(self
); p
< e
; p
++)
2559 i
= tabsize
- (j
% tabsize
);
2568 if (*p
== '\n' || *p
== '\r')
2576 pad(PyStringObject
*self
, int left
, int right
, char fill
)
2585 if (left
== 0 && right
== 0 && PyString_CheckExact(self
)) {
2587 return (PyObject
*)self
;
2590 u
= PyString_FromStringAndSize(NULL
,
2591 left
+ PyString_GET_SIZE(self
) + right
);
2594 memset(PyString_AS_STRING(u
), fill
, left
);
2595 memcpy(PyString_AS_STRING(u
) + left
,
2596 PyString_AS_STRING(self
),
2597 PyString_GET_SIZE(self
));
2599 memset(PyString_AS_STRING(u
) + left
+ PyString_GET_SIZE(self
),
2606 PyDoc_STRVAR(ljust__doc__
,
2607 "S.ljust(width) -> string\n"
2609 "Return S left justified in a string of length width. Padding is\n"
2610 "done using spaces.");
2613 string_ljust(PyStringObject
*self
, PyObject
*args
)
2616 if (!PyArg_ParseTuple(args
, "i:ljust", &width
))
2619 if (PyString_GET_SIZE(self
) >= width
&& PyString_CheckExact(self
)) {
2621 return (PyObject
*) self
;
2624 return pad(self
, 0, width
- PyString_GET_SIZE(self
), ' ');
2628 PyDoc_STRVAR(rjust__doc__
,
2629 "S.rjust(width) -> string\n"
2631 "Return S right justified in a string of length width. Padding is\n"
2632 "done using spaces.");
2635 string_rjust(PyStringObject
*self
, PyObject
*args
)
2638 if (!PyArg_ParseTuple(args
, "i:rjust", &width
))
2641 if (PyString_GET_SIZE(self
) >= width
&& PyString_CheckExact(self
)) {
2643 return (PyObject
*) self
;
2646 return pad(self
, width
- PyString_GET_SIZE(self
), 0, ' ');
2650 PyDoc_STRVAR(center__doc__
,
2651 "S.center(width) -> string\n"
2653 "Return S centered in a string of length width. Padding is done\n"
2657 string_center(PyStringObject
*self
, PyObject
*args
)
2662 if (!PyArg_ParseTuple(args
, "i:center", &width
))
2665 if (PyString_GET_SIZE(self
) >= width
&& PyString_CheckExact(self
)) {
2667 return (PyObject
*) self
;
2670 marg
= width
- PyString_GET_SIZE(self
);
2671 left
= marg
/ 2 + (marg
& width
& 1);
2673 return pad(self
, left
, marg
- left
, ' ');
2676 PyDoc_STRVAR(zfill__doc__
,
2677 "S.zfill(width) -> string\n"
2679 "Pad a numeric string S with zeros on the left, to fill a field\n"
2680 "of the specified width. The string S is never truncated.");
2683 string_zfill(PyStringObject
*self
, PyObject
*args
)
2690 if (!PyArg_ParseTuple(args
, "i:zfill", &width
))
2693 if (PyString_GET_SIZE(self
) >= width
) {
2694 if (PyString_CheckExact(self
)) {
2696 return (PyObject
*) self
;
2699 return PyString_FromStringAndSize(
2700 PyString_AS_STRING(self
),
2701 PyString_GET_SIZE(self
)
2705 fill
= width
- PyString_GET_SIZE(self
);
2707 s
= pad(self
, fill
, 0, '0');
2712 p
= PyString_AS_STRING(s
);
2713 if (p
[fill
] == '+' || p
[fill
] == '-') {
2714 /* move sign to beginning of string */
2719 return (PyObject
*) s
;
2722 PyDoc_STRVAR(isspace__doc__
,
2723 "S.isspace() -> bool\n"
2725 "Return True if there are only whitespace characters in S,\n"
2726 "False otherwise.");
2729 string_isspace(PyStringObject
*self
)
2731 register const unsigned char *p
2732 = (unsigned char *) PyString_AS_STRING(self
);
2733 register const unsigned char *e
;
2735 /* Shortcut for single character strings */
2736 if (PyString_GET_SIZE(self
) == 1 &&
2738 return PyBool_FromLong(1);
2740 /* Special case for empty strings */
2741 if (PyString_GET_SIZE(self
) == 0)
2742 return PyBool_FromLong(0);
2744 e
= p
+ PyString_GET_SIZE(self
);
2745 for (; p
< e
; p
++) {
2747 return PyBool_FromLong(0);
2749 return PyBool_FromLong(1);
2753 PyDoc_STRVAR(isalpha__doc__
,
2754 "S.isalpha() -> bool\n\
2756 Return True if all characters in S are alphabetic\n\
2757 and there is at least one character in S, False otherwise.");
2760 string_isalpha(PyStringObject
*self
)
2762 register const unsigned char *p
2763 = (unsigned char *) PyString_AS_STRING(self
);
2764 register const unsigned char *e
;
2766 /* Shortcut for single character strings */
2767 if (PyString_GET_SIZE(self
) == 1 &&
2769 return PyBool_FromLong(1);
2771 /* Special case for empty strings */
2772 if (PyString_GET_SIZE(self
) == 0)
2773 return PyBool_FromLong(0);
2775 e
= p
+ PyString_GET_SIZE(self
);
2776 for (; p
< e
; p
++) {
2778 return PyBool_FromLong(0);
2780 return PyBool_FromLong(1);
2784 PyDoc_STRVAR(isalnum__doc__
,
2785 "S.isalnum() -> bool\n\
2787 Return True if all characters in S are alphanumeric\n\
2788 and there is at least one character in S, False otherwise.");
2791 string_isalnum(PyStringObject
*self
)
2793 register const unsigned char *p
2794 = (unsigned char *) PyString_AS_STRING(self
);
2795 register const unsigned char *e
;
2797 /* Shortcut for single character strings */
2798 if (PyString_GET_SIZE(self
) == 1 &&
2800 return PyBool_FromLong(1);
2802 /* Special case for empty strings */
2803 if (PyString_GET_SIZE(self
) == 0)
2804 return PyBool_FromLong(0);
2806 e
= p
+ PyString_GET_SIZE(self
);
2807 for (; p
< e
; p
++) {
2809 return PyBool_FromLong(0);
2811 return PyBool_FromLong(1);
2815 PyDoc_STRVAR(isdigit__doc__
,
2816 "S.isdigit() -> bool\n\
2818 Return True if there are only digit characters in S,\n\
2822 string_isdigit(PyStringObject
*self
)
2824 register const unsigned char *p
2825 = (unsigned char *) PyString_AS_STRING(self
);
2826 register const unsigned char *e
;
2828 /* Shortcut for single character strings */
2829 if (PyString_GET_SIZE(self
) == 1 &&
2831 return PyBool_FromLong(1);
2833 /* Special case for empty strings */
2834 if (PyString_GET_SIZE(self
) == 0)
2835 return PyBool_FromLong(0);
2837 e
= p
+ PyString_GET_SIZE(self
);
2838 for (; p
< e
; p
++) {
2840 return PyBool_FromLong(0);
2842 return PyBool_FromLong(1);
2846 PyDoc_STRVAR(islower__doc__
,
2847 "S.islower() -> bool\n\
2849 Return True if all cased characters in S are lowercase and there is\n\
2850 at least one cased character in S, False otherwise.");
2853 string_islower(PyStringObject
*self
)
2855 register const unsigned char *p
2856 = (unsigned char *) PyString_AS_STRING(self
);
2857 register const unsigned char *e
;
2860 /* Shortcut for single character strings */
2861 if (PyString_GET_SIZE(self
) == 1)
2862 return PyBool_FromLong(islower(*p
) != 0);
2864 /* Special case for empty strings */
2865 if (PyString_GET_SIZE(self
) == 0)
2866 return PyBool_FromLong(0);
2868 e
= p
+ PyString_GET_SIZE(self
);
2870 for (; p
< e
; p
++) {
2872 return PyBool_FromLong(0);
2873 else if (!cased
&& islower(*p
))
2876 return PyBool_FromLong(cased
);
2880 PyDoc_STRVAR(isupper__doc__
,
2881 "S.isupper() -> bool\n\
2883 Return True if all cased characters in S are uppercase and there is\n\
2884 at least one cased character in S, False otherwise.");
2887 string_isupper(PyStringObject
*self
)
2889 register const unsigned char *p
2890 = (unsigned char *) PyString_AS_STRING(self
);
2891 register const unsigned char *e
;
2894 /* Shortcut for single character strings */
2895 if (PyString_GET_SIZE(self
) == 1)
2896 return PyBool_FromLong(isupper(*p
) != 0);
2898 /* Special case for empty strings */
2899 if (PyString_GET_SIZE(self
) == 0)
2900 return PyBool_FromLong(0);
2902 e
= p
+ PyString_GET_SIZE(self
);
2904 for (; p
< e
; p
++) {
2906 return PyBool_FromLong(0);
2907 else if (!cased
&& isupper(*p
))
2910 return PyBool_FromLong(cased
);
2914 PyDoc_STRVAR(istitle__doc__
,
2915 "S.istitle() -> bool\n\
2917 Return True if S is a titlecased string, i.e. uppercase characters\n\
2918 may only follow uncased characters and lowercase characters only cased\n\
2919 ones. Return False otherwise.");
2922 string_istitle(PyStringObject
*self
, PyObject
*uncased
)
2924 register const unsigned char *p
2925 = (unsigned char *) PyString_AS_STRING(self
);
2926 register const unsigned char *e
;
2927 int cased
, previous_is_cased
;
2929 /* Shortcut for single character strings */
2930 if (PyString_GET_SIZE(self
) == 1)
2931 return PyBool_FromLong(isupper(*p
) != 0);
2933 /* Special case for empty strings */
2934 if (PyString_GET_SIZE(self
) == 0)
2935 return PyBool_FromLong(0);
2937 e
= p
+ PyString_GET_SIZE(self
);
2939 previous_is_cased
= 0;
2940 for (; p
< e
; p
++) {
2941 register const unsigned char ch
= *p
;
2944 if (previous_is_cased
)
2945 return PyBool_FromLong(0);
2946 previous_is_cased
= 1;
2949 else if (islower(ch
)) {
2950 if (!previous_is_cased
)
2951 return PyBool_FromLong(0);
2952 previous_is_cased
= 1;
2956 previous_is_cased
= 0;
2958 return PyBool_FromLong(cased
);
2962 PyDoc_STRVAR(splitlines__doc__
,
2963 "S.splitlines([keepends]) -> list of strings\n\
2965 Return a list of the lines in S, breaking at line boundaries.\n\
2966 Line breaks are not included in the resulting list unless keepends\n\
2967 is given and true.");
2969 #define SPLIT_APPEND(data, left, right) \
2970 str = PyString_FromStringAndSize(data + left, right - left); \
2973 if (PyList_Append(list, str)) { \
2981 string_splitlines(PyStringObject
*self
, PyObject
*args
)
2991 if (!PyArg_ParseTuple(args
, "|i:splitlines", &keepends
))
2994 data
= PyString_AS_STRING(self
);
2995 len
= PyString_GET_SIZE(self
);
2997 list
= PyList_New(0);
3001 for (i
= j
= 0; i
< len
; ) {
3004 /* Find a line and append it */
3005 while (i
< len
&& data
[i
] != '\n' && data
[i
] != '\r')
3008 /* Skip the line break reading CRLF as one line break */
3011 if (data
[i
] == '\r' && i
+ 1 < len
&&
3019 SPLIT_APPEND(data
, j
, eol
);
3023 SPLIT_APPEND(data
, j
, len
);
3037 string_methods
[] = {
3038 /* Counterparts of the obsolete stropmodule functions; except
3039 string.maketrans(). */
3040 {"join", (PyCFunction
)string_join
, METH_O
, join__doc__
},
3041 {"split", (PyCFunction
)string_split
, METH_VARARGS
, split__doc__
},
3042 {"lower", (PyCFunction
)string_lower
, METH_NOARGS
, lower__doc__
},
3043 {"upper", (PyCFunction
)string_upper
, METH_NOARGS
, upper__doc__
},
3044 {"islower", (PyCFunction
)string_islower
, METH_NOARGS
, islower__doc__
},
3045 {"isupper", (PyCFunction
)string_isupper
, METH_NOARGS
, isupper__doc__
},
3046 {"isspace", (PyCFunction
)string_isspace
, METH_NOARGS
, isspace__doc__
},
3047 {"isdigit", (PyCFunction
)string_isdigit
, METH_NOARGS
, isdigit__doc__
},
3048 {"istitle", (PyCFunction
)string_istitle
, METH_NOARGS
, istitle__doc__
},
3049 {"isalpha", (PyCFunction
)string_isalpha
, METH_NOARGS
, isalpha__doc__
},
3050 {"isalnum", (PyCFunction
)string_isalnum
, METH_NOARGS
, isalnum__doc__
},
3051 {"capitalize", (PyCFunction
)string_capitalize
, METH_NOARGS
,
3053 {"count", (PyCFunction
)string_count
, METH_VARARGS
, count__doc__
},
3054 {"endswith", (PyCFunction
)string_endswith
, METH_VARARGS
,
3056 {"find", (PyCFunction
)string_find
, METH_VARARGS
, find__doc__
},
3057 {"index", (PyCFunction
)string_index
, METH_VARARGS
, index__doc__
},
3058 {"lstrip", (PyCFunction
)string_lstrip
, METH_VARARGS
, lstrip__doc__
},
3059 {"replace", (PyCFunction
)string_replace
, METH_VARARGS
, replace__doc__
},
3060 {"rfind", (PyCFunction
)string_rfind
, METH_VARARGS
, rfind__doc__
},
3061 {"rindex", (PyCFunction
)string_rindex
, METH_VARARGS
, rindex__doc__
},
3062 {"rstrip", (PyCFunction
)string_rstrip
, METH_VARARGS
, rstrip__doc__
},
3063 {"startswith", (PyCFunction
)string_startswith
, METH_VARARGS
,
3065 {"strip", (PyCFunction
)string_strip
, METH_VARARGS
, strip__doc__
},
3066 {"swapcase", (PyCFunction
)string_swapcase
, METH_NOARGS
,
3068 {"translate", (PyCFunction
)string_translate
, METH_VARARGS
,
3070 {"title", (PyCFunction
)string_title
, METH_NOARGS
, title__doc__
},
3071 {"ljust", (PyCFunction
)string_ljust
, METH_VARARGS
, ljust__doc__
},
3072 {"rjust", (PyCFunction
)string_rjust
, METH_VARARGS
, rjust__doc__
},
3073 {"center", (PyCFunction
)string_center
, METH_VARARGS
, center__doc__
},
3074 {"zfill", (PyCFunction
)string_zfill
, METH_VARARGS
, zfill__doc__
},
3075 {"encode", (PyCFunction
)string_encode
, METH_VARARGS
, encode__doc__
},
3076 {"decode", (PyCFunction
)string_decode
, METH_VARARGS
, decode__doc__
},
3077 {"expandtabs", (PyCFunction
)string_expandtabs
, METH_VARARGS
,
3079 {"splitlines", (PyCFunction
)string_splitlines
, METH_VARARGS
,
3081 {NULL
, NULL
} /* sentinel */
3085 str_subtype_new(PyTypeObject
*type
, PyObject
*args
, PyObject
*kwds
);
3088 string_new(PyTypeObject
*type
, PyObject
*args
, PyObject
*kwds
)
3091 static char *kwlist
[] = {"object", 0};
3093 if (type
!= &PyString_Type
)
3094 return str_subtype_new(type
, args
, kwds
);
3095 if (!PyArg_ParseTupleAndKeywords(args
, kwds
, "|O:str", kwlist
, &x
))
3098 return PyString_FromString("");
3099 return PyObject_Str(x
);
3103 str_subtype_new(PyTypeObject
*type
, PyObject
*args
, PyObject
*kwds
)
3105 PyObject
*tmp
, *pnew
;
3108 assert(PyType_IsSubtype(type
, &PyString_Type
));
3109 tmp
= string_new(&PyString_Type
, args
, kwds
);
3112 assert(PyString_CheckExact(tmp
));
3113 n
= PyString_GET_SIZE(tmp
);
3114 pnew
= type
->tp_alloc(type
, n
);
3116 memcpy(PyString_AS_STRING(pnew
), PyString_AS_STRING(tmp
), n
+1);
3117 ((PyStringObject
*)pnew
)->ob_shash
=
3118 ((PyStringObject
*)tmp
)->ob_shash
;
3119 ((PyStringObject
*)pnew
)->ob_sstate
= SSTATE_NOT_INTERNED
;
3126 basestring_new(PyTypeObject
*type
, PyObject
*args
, PyObject
*kwds
)
3128 PyErr_SetString(PyExc_TypeError
,
3129 "The basestring type cannot be instantiated");
3134 string_mod(PyObject
*v
, PyObject
*w
)
3136 if (!PyString_Check(v
)) {
3137 Py_INCREF(Py_NotImplemented
);
3138 return Py_NotImplemented
;
3140 return PyString_Format(v
, w
);
3143 PyDoc_STRVAR(basestring_doc
,
3144 "Type basestring cannot be instantiated; it is the base for str and unicode.");
3146 static PyNumberMethods string_as_number
= {
3151 string_mod
, /*nb_remainder*/
3155 PyTypeObject PyBaseString_Type
= {
3156 PyObject_HEAD_INIT(&PyType_Type
)
3167 0, /* tp_as_number */
3168 0, /* tp_as_sequence */
3169 0, /* tp_as_mapping */
3173 0, /* tp_getattro */
3174 0, /* tp_setattro */
3175 0, /* tp_as_buffer */
3176 Py_TPFLAGS_DEFAULT
| Py_TPFLAGS_BASETYPE
, /* tp_flags */
3177 basestring_doc
, /* tp_doc */
3178 0, /* tp_traverse */
3180 0, /* tp_richcompare */
3181 0, /* tp_weaklistoffset */
3183 0, /* tp_iternext */
3187 &PyBaseObject_Type
, /* tp_base */
3189 0, /* tp_descr_get */
3190 0, /* tp_descr_set */
3191 0, /* tp_dictoffset */
3194 basestring_new
, /* tp_new */
3198 PyDoc_STRVAR(string_doc
,
3199 "str(object) -> string\n\
3201 Return a nice string representation of the object.\n\
3202 If the argument is a string, the return value is the same object.");
3204 PyTypeObject PyString_Type
= {
3205 PyObject_HEAD_INIT(&PyType_Type
)
3208 sizeof(PyStringObject
),
3210 (destructor
)string_dealloc
, /* tp_dealloc */
3211 (printfunc
)string_print
, /* tp_print */
3215 (reprfunc
)string_repr
, /* tp_repr */
3216 &string_as_number
, /* tp_as_number */
3217 &string_as_sequence
, /* tp_as_sequence */
3218 &string_as_mapping
, /* tp_as_mapping */
3219 (hashfunc
)string_hash
, /* tp_hash */
3221 (reprfunc
)string_str
, /* tp_str */
3222 PyObject_GenericGetAttr
, /* tp_getattro */
3223 0, /* tp_setattro */
3224 &string_as_buffer
, /* tp_as_buffer */
3225 Py_TPFLAGS_DEFAULT
| Py_TPFLAGS_CHECKTYPES
|
3226 Py_TPFLAGS_BASETYPE
, /* tp_flags */
3227 string_doc
, /* tp_doc */
3228 0, /* tp_traverse */
3230 (richcmpfunc
)string_richcompare
, /* tp_richcompare */
3231 0, /* tp_weaklistoffset */
3233 0, /* tp_iternext */
3234 string_methods
, /* tp_methods */
3237 &PyBaseString_Type
, /* tp_base */
3239 0, /* tp_descr_get */
3240 0, /* tp_descr_set */
3241 0, /* tp_dictoffset */
3244 string_new
, /* tp_new */
3245 PyObject_Del
, /* tp_free */
3249 PyString_Concat(register PyObject
**pv
, register PyObject
*w
)
3251 register PyObject
*v
;
3254 if (w
== NULL
|| !PyString_Check(*pv
)) {
3259 v
= string_concat((PyStringObject
*) *pv
, w
);
3265 PyString_ConcatAndDel(register PyObject
**pv
, register PyObject
*w
)
3267 PyString_Concat(pv
, w
);
3272 /* The following function breaks the notion that strings are immutable:
3273 it changes the size of a string. We get away with this only if there
3274 is only one module referencing the object. You can also think of it
3275 as creating a new string object and destroying the old one, only
3276 more efficiently. In any case, don't use this if the string may
3277 already be known to some other part of the code...
3278 Note that if there's not enough memory to resize the string, the original
3279 string object at *pv is deallocated, *pv is set to NULL, an "out of
3280 memory" exception is set, and -1 is returned. Else (on success) 0 is
3281 returned, and the value in *pv may or may not be the same as on input.
3282 As always, an extra byte is allocated for a trailing \0 byte (newsize
3283 does *not* include that), and a trailing \0 byte is stored.
3287 _PyString_Resize(PyObject
**pv
, int newsize
)
3289 register PyObject
*v
;
3290 register PyStringObject
*sv
;
3292 if (!PyString_Check(v
) || v
->ob_refcnt
!= 1 || newsize
< 0) {
3295 PyErr_BadInternalCall();
3298 /* XXX UNREF/NEWREF interface should be more symmetrical */
3300 _Py_ForgetReference(v
);
3302 PyObject_REALLOC((char *)v
,
3303 sizeof(PyStringObject
) + newsize
* sizeof(char));
3309 _Py_NewReference(*pv
);
3310 sv
= (PyStringObject
*) *pv
;
3311 sv
->ob_size
= newsize
;
3312 sv
->ob_sval
[newsize
] = '\0';
3316 /* Helpers for formatstring */
3319 getnextarg(PyObject
*args
, int arglen
, int *p_argidx
)
3321 int argidx
= *p_argidx
;
3322 if (argidx
< arglen
) {
3327 return PyTuple_GetItem(args
, argidx
);
3329 PyErr_SetString(PyExc_TypeError
,
3330 "not enough arguments for format string");
3341 #define F_LJUST (1<<0)
3342 #define F_SIGN (1<<1)
3343 #define F_BLANK (1<<2)
3344 #define F_ALT (1<<3)
3345 #define F_ZERO (1<<4)
3348 formatfloat(char *buf
, size_t buflen
, int flags
,
3349 int prec
, int type
, PyObject
*v
)
3351 /* fmt = '%#.' + `prec` + `type`
3352 worst case length = 3 + 10 (len of INT_MAX) + 1 = 14 (use 20)*/
3355 x
= PyFloat_AsDouble(v
);
3356 if (x
== -1.0 && PyErr_Occurred()) {
3357 PyErr_SetString(PyExc_TypeError
, "float argument required");
3362 if (type
== 'f' && fabs(x
)/1e25
>= 1e25
)
3364 /* Worst case length calc to ensure no buffer overrun:
3368 buf = '-' + [0-9]*prec + '.' + 'e+' + (longest exp
3369 for any double rep.)
3370 len = 1 + prec + 1 + 2 + 5 = 9 + prec
3373 buf = '-' + [0-9]*x + '.' + [0-9]*prec (with x < 50)
3374 len = 1 + 50 + 1 + prec = 52 + prec
3376 If prec=0 the effective precision is 1 (the leading digit is
3377 always given), therefore increase the length by one.
3380 if ((type
== 'g' && buflen
<= (size_t)10 + (size_t)prec
) ||
3381 (type
== 'f' && buflen
<= (size_t)53 + (size_t)prec
)) {
3382 PyErr_SetString(PyExc_OverflowError
,
3383 "formatted float is too long (precision too large?)");
3386 PyOS_snprintf(fmt
, sizeof(fmt
), "%%%s.%d%c",
3387 (flags
&F_ALT
) ? "#" : "",
3389 PyOS_snprintf(buf
, buflen
, fmt
, x
);
3393 /* _PyString_FormatLong emulates the format codes d, u, o, x and X, and
3394 * the F_ALT flag, for Python's long (unbounded) ints. It's not used for
3395 * Python's regular ints.
3396 * Return value: a new PyString*, or NULL if error.
3397 * . *pbuf is set to point into it,
3398 * *plen set to the # of chars following that.
3399 * Caller must decref it when done using pbuf.
3400 * The string starting at *pbuf is of the form
3401 * "-"? ("0x" | "0X")? digit+
3402 * "0x"/"0X" are present only for x and X conversions, with F_ALT
3403 * set in flags. The case of hex digits will be correct,
3404 * There will be at least prec digits, zero-filled on the left if
3405 * necessary to get that many.
3406 * val object to be converted
3407 * flags bitmask of format flags; only F_ALT is looked at
3408 * prec minimum number of digits; 0-fill on left if needed
3409 * type a character in [duoxX]; u acts the same as d
3411 * CAUTION: o, x and X conversions on regular ints can never
3412 * produce a '-' sign, but can for Python's unbounded ints.
3415 _PyString_FormatLong(PyObject
*val
, int flags
, int prec
, int type
,
3416 char **pbuf
, int *plen
)
3418 PyObject
*result
= NULL
;
3421 int sign
; /* 1 if '-', else 0 */
3422 int len
; /* number of characters */
3423 int numdigits
; /* len == numnondigits + numdigits */
3424 int numnondigits
= 0;
3429 result
= val
->ob_type
->tp_str(val
);
3432 result
= val
->ob_type
->tp_as_number
->nb_oct(val
);
3437 result
= val
->ob_type
->tp_as_number
->nb_hex(val
);
3440 assert(!"'type' not in [duoxX]");
3445 /* To modify the string in-place, there can only be one reference. */
3446 if (result
->ob_refcnt
!= 1) {
3447 PyErr_BadInternalCall();
3450 buf
= PyString_AsString(result
);
3451 len
= PyString_Size(result
);
3452 if (buf
[len
-1] == 'L') {
3456 sign
= buf
[0] == '-';
3457 numnondigits
+= sign
;
3458 numdigits
= len
- numnondigits
;
3459 assert(numdigits
> 0);
3461 /* Get rid of base marker unless F_ALT */
3462 if ((flags
& F_ALT
) == 0) {
3463 /* Need to skip 0x, 0X or 0. */
3467 assert(buf
[sign
] == '0');
3468 /* If 0 is only digit, leave it alone. */
3469 if (numdigits
> 1) {
3476 assert(buf
[sign
] == '0');
3477 assert(buf
[sign
+ 1] == 'x');
3488 assert(len
== numnondigits
+ numdigits
);
3489 assert(numdigits
> 0);
3492 /* Fill with leading zeroes to meet minimum width. */
3493 if (prec
> numdigits
) {
3494 PyObject
*r1
= PyString_FromStringAndSize(NULL
,
3495 numnondigits
+ prec
);
3501 b1
= PyString_AS_STRING(r1
);
3502 for (i
= 0; i
< numnondigits
; ++i
)
3504 for (i
= 0; i
< prec
- numdigits
; i
++)
3506 for (i
= 0; i
< numdigits
; i
++)
3511 buf
= PyString_AS_STRING(result
);
3512 len
= numnondigits
+ prec
;
3515 /* Fix up case for hex conversions. */
3518 /* Need to convert all upper case letters to lower case. */
3519 for (i
= 0; i
< len
; i
++)
3520 if (buf
[i
] >= 'A' && buf
[i
] <= 'F')
3524 /* Need to convert 0x to 0X (and -0x to -0X). */
3525 if (buf
[sign
+ 1] == 'x')
3526 buf
[sign
+ 1] = 'X';
3535 formatint(char *buf
, size_t buflen
, int flags
,
3536 int prec
, int type
, PyObject
*v
)
3538 /* fmt = '%#.' + `prec` + 'l' + `type`
3539 worst case length = 3 + 19 (worst len of INT_MAX on 64-bit machine)
3541 char fmt
[64]; /* plenty big enough! */
3544 x
= PyInt_AsLong(v
);
3545 if (x
== -1 && PyErr_Occurred()) {
3546 PyErr_SetString(PyExc_TypeError
, "int argument required");
3549 if (x
< 0 && type
!= 'd' && type
!= 'i') {
3550 if (PyErr_Warn(PyExc_FutureWarning
,
3551 "%u/%o/%x/%X of negative int will return "
3552 "a signed string in Python 2.4 and up") < 0)
3558 if ((flags
& F_ALT
) &&
3559 (type
== 'x' || type
== 'X')) {
3560 /* When converting under %#x or %#X, there are a number
3561 * of issues that cause pain:
3562 * - when 0 is being converted, the C standard leaves off
3563 * the '0x' or '0X', which is inconsistent with other
3564 * %#x/%#X conversions and inconsistent with Python's
3566 * - there are platforms that violate the standard and
3567 * convert 0 with the '0x' or '0X'
3568 * (Metrowerks, Compaq Tru64)
3569 * - there are platforms that give '0x' when converting
3570 * under %#X, but convert 0 in accordance with the
3571 * standard (OS/2 EMX)
3573 * We can achieve the desired consistency by inserting our
3574 * own '0x' or '0X' prefix, and substituting %x/%X in place
3577 * Note that this is the same approach as used in
3578 * formatint() in unicodeobject.c
3580 PyOS_snprintf(fmt
, sizeof(fmt
), "0%c%%.%dl%c",
3584 PyOS_snprintf(fmt
, sizeof(fmt
), "%%%s.%dl%c",
3585 (flags
&F_ALT
) ? "#" : "",
3589 /* buf = '+'/'-'/'0'/'0x' + '[0-9]'*max(prec, len(x in octal))
3590 * worst case buf = '0x' + [0-9]*prec, where prec >= 11
3592 if (buflen
<= 13 || buflen
<= (size_t)2 + (size_t)prec
) {
3593 PyErr_SetString(PyExc_OverflowError
,
3594 "formatted integer is too long (precision too large?)");
3597 PyOS_snprintf(buf
, buflen
, fmt
, x
);
3602 formatchar(char *buf
, size_t buflen
, PyObject
*v
)
3604 /* presume that the buffer is at least 2 characters long */
3605 if (PyString_Check(v
)) {
3606 if (!PyArg_Parse(v
, "c;%c requires int or char", &buf
[0]))
3610 if (!PyArg_Parse(v
, "b;%c requires int or char", &buf
[0]))
3618 /* fmt%(v1,v2,...) is roughly equivalent to sprintf(fmt, v1, v2, ...)
3620 FORMATBUFLEN is the length of the buffer in which the floats, ints, &
3621 chars are formatted. XXX This is a magic number. Each formatting
3622 routine does bounds checking to ensure no overflow, but a better
3623 solution may be to malloc a buffer of appropriate size for each
3624 format. For now, the current solution is sufficient.
3626 #define FORMATBUFLEN (size_t)120
3629 PyString_Format(PyObject
*format
, PyObject
*args
)
3632 int fmtcnt
, rescnt
, reslen
, arglen
, argidx
;
3634 PyObject
*result
, *orig_args
;
3635 #ifdef Py_USING_UNICODE
3638 PyObject
*dict
= NULL
;
3639 if (format
== NULL
|| !PyString_Check(format
) || args
== NULL
) {
3640 PyErr_BadInternalCall();
3644 fmt
= PyString_AS_STRING(format
);
3645 fmtcnt
= PyString_GET_SIZE(format
);
3646 reslen
= rescnt
= fmtcnt
+ 100;
3647 result
= PyString_FromStringAndSize((char *)NULL
, reslen
);
3650 res
= PyString_AsString(result
);
3651 if (PyTuple_Check(args
)) {
3652 arglen
= PyTuple_GET_SIZE(args
);
3659 if (args
->ob_type
->tp_as_mapping
&& !PyTuple_Check(args
) &&
3660 !PyObject_TypeCheck(args
, &PyBaseString_Type
))
3662 while (--fmtcnt
>= 0) {
3665 rescnt
= fmtcnt
+ 100;
3667 if (_PyString_Resize(&result
, reslen
) < 0)
3669 res
= PyString_AS_STRING(result
)
3676 /* Got a format specifier */
3683 PyObject
*temp
= NULL
;
3687 char formatbuf
[FORMATBUFLEN
];
3688 /* For format{float,int,char}() */
3689 #ifdef Py_USING_UNICODE
3690 char *fmt_start
= fmt
;
3691 int argidx_start
= argidx
;
3702 PyErr_SetString(PyExc_TypeError
,
3703 "format requires a mapping");
3709 /* Skip over balanced parentheses */
3710 while (pcount
> 0 && --fmtcnt
>= 0) {
3713 else if (*fmt
== '(')
3717 keylen
= fmt
- keystart
- 1;
3718 if (fmtcnt
< 0 || pcount
> 0) {
3719 PyErr_SetString(PyExc_ValueError
,
3720 "incomplete format key");
3723 key
= PyString_FromStringAndSize(keystart
,
3731 args
= PyObject_GetItem(dict
, key
);
3740 while (--fmtcnt
>= 0) {
3741 switch (c
= *fmt
++) {
3742 case '-': flags
|= F_LJUST
; continue;
3743 case '+': flags
|= F_SIGN
; continue;
3744 case ' ': flags
|= F_BLANK
; continue;
3745 case '#': flags
|= F_ALT
; continue;
3746 case '0': flags
|= F_ZERO
; continue;
3751 v
= getnextarg(args
, arglen
, &argidx
);
3754 if (!PyInt_Check(v
)) {
3755 PyErr_SetString(PyExc_TypeError
,
3759 width
= PyInt_AsLong(v
);
3767 else if (c
>= 0 && isdigit(c
)) {
3769 while (--fmtcnt
>= 0) {
3770 c
= Py_CHARMASK(*fmt
++);
3773 if ((width
*10) / 10 != width
) {
3779 width
= width
*10 + (c
- '0');
3787 v
= getnextarg(args
, arglen
, &argidx
);
3790 if (!PyInt_Check(v
)) {
3796 prec
= PyInt_AsLong(v
);
3802 else if (c
>= 0 && isdigit(c
)) {
3804 while (--fmtcnt
>= 0) {
3805 c
= Py_CHARMASK(*fmt
++);
3808 if ((prec
*10) / 10 != prec
) {
3814 prec
= prec
*10 + (c
- '0');
3819 if (c
== 'h' || c
== 'l' || c
== 'L') {
3825 PyErr_SetString(PyExc_ValueError
,
3826 "incomplete format");
3830 v
= getnextarg(args
, arglen
, &argidx
);
3842 #ifdef Py_USING_UNICODE
3843 if (PyUnicode_Check(v
)) {
3845 argidx
= argidx_start
;
3852 temp
= PyObject_Str(v
);
3854 temp
= PyObject_Repr(v
);
3857 if (!PyString_Check(temp
)) {
3858 /* XXX Note: this should never happen,
3859 since PyObject_Repr() and
3860 PyObject_Str() assure this */
3861 PyErr_SetString(PyExc_TypeError
,
3862 "%s argument has non-string str()");
3866 pbuf
= PyString_AS_STRING(temp
);
3867 len
= PyString_GET_SIZE(temp
);
3868 if (prec
>= 0 && len
> prec
)
3879 if (PyLong_Check(v
)) {
3880 temp
= _PyString_FormatLong(v
, flags
,
3881 prec
, c
, &pbuf
, &len
);
3884 /* unbounded ints can always produce
3885 a sign character! */
3890 len
= formatint(pbuf
,
3895 /* only d conversion is signed */
3907 len
= formatfloat(pbuf
, sizeof(formatbuf
),
3917 len
= formatchar(pbuf
, sizeof(formatbuf
), v
);
3922 PyErr_Format(PyExc_ValueError
,
3923 "unsupported format character '%c' (0x%x) "
3926 (int)(fmt
- 1 - PyString_AsString(format
)));
3930 if (*pbuf
== '-' || *pbuf
== '+') {
3934 else if (flags
& F_SIGN
)
3936 else if (flags
& F_BLANK
)
3943 if (rescnt
- (sign
!= 0) < width
) {
3945 rescnt
= width
+ fmtcnt
+ 100;
3949 return PyErr_NoMemory();
3951 if (_PyString_Resize(&result
, reslen
) < 0)
3953 res
= PyString_AS_STRING(result
)
3963 if ((flags
& F_ALT
) && (c
== 'x' || c
== 'X')) {
3964 assert(pbuf
[0] == '0');
3965 assert(pbuf
[1] == c
);
3976 if (width
> len
&& !(flags
& F_LJUST
)) {
3980 } while (--width
> len
);
3985 if ((flags
& F_ALT
) &&
3986 (c
== 'x' || c
== 'X')) {
3987 assert(pbuf
[0] == '0');
3988 assert(pbuf
[1] == c
);
3993 memcpy(res
, pbuf
, len
);
3996 while (--width
>= len
) {
4000 if (dict
&& (argidx
< arglen
) && c
!= '%') {
4001 PyErr_SetString(PyExc_TypeError
,
4002 "not all arguments converted during string formatting");
4008 if (argidx
< arglen
&& !dict
) {
4009 PyErr_SetString(PyExc_TypeError
,
4010 "not all arguments converted during string formatting");
4016 _PyString_Resize(&result
, reslen
- rescnt
);
4019 #ifdef Py_USING_UNICODE
4025 /* Fiddle args right (remove the first argidx arguments) */
4026 if (PyTuple_Check(orig_args
) && argidx
> 0) {
4028 int n
= PyTuple_GET_SIZE(orig_args
) - argidx
;
4033 PyObject
*w
= PyTuple_GET_ITEM(orig_args
, n
+ argidx
);
4035 PyTuple_SET_ITEM(v
, n
, w
);
4039 Py_INCREF(orig_args
);
4043 /* Take what we have of the result and let the Unicode formatting
4044 function format the rest of the input. */
4045 rescnt
= res
- PyString_AS_STRING(result
);
4046 if (_PyString_Resize(&result
, rescnt
))
4048 fmtcnt
= PyString_GET_SIZE(format
) - \
4049 (fmt
- PyString_AS_STRING(format
));
4050 format
= PyUnicode_Decode(fmt
, fmtcnt
, NULL
, NULL
);
4053 v
= PyUnicode_Format(format
, args
);
4057 /* Paste what we have (result) to what the Unicode formatting
4058 function returned (v) and return the result (or error) */
4059 w
= PyUnicode_Concat(result
, v
);
4064 #endif /* Py_USING_UNICODE */
4075 PyString_InternInPlace(PyObject
**p
)
4077 register PyStringObject
*s
= (PyStringObject
*)(*p
);
4079 if (s
== NULL
|| !PyString_Check(s
))
4080 Py_FatalError("PyString_InternInPlace: strings only please!");
4081 if (PyString_CHECK_INTERNED(s
))
4083 if (interned
== NULL
) {
4084 interned
= PyDict_New();
4085 if (interned
== NULL
) {
4086 PyErr_Clear(); /* Don't leave an exception */
4090 if ((t
= PyDict_GetItem(interned
, (PyObject
*)s
)) != NULL
) {
4096 /* Ensure that only true string objects appear in the intern dict */
4097 if (!PyString_CheckExact(s
)) {
4098 t
= PyString_FromStringAndSize(PyString_AS_STRING(s
),
4099 PyString_GET_SIZE(s
));
4109 if (PyDict_SetItem(interned
, t
, t
) == 0) {
4110 /* The two references in interned are not counted by
4111 refcnt. The string deallocator will take care of this */
4112 ((PyObject
*)t
)->ob_refcnt
-=2;
4113 PyString_CHECK_INTERNED(t
) = SSTATE_INTERNED_MORTAL
;
4123 PyString_InternImmortal(PyObject
**p
)
4125 PyString_InternInPlace(p
);
4126 if (PyString_CHECK_INTERNED(*p
) != SSTATE_INTERNED_IMMORTAL
) {
4127 PyString_CHECK_INTERNED(*p
) = SSTATE_INTERNED_IMMORTAL
;
4134 PyString_InternFromString(const char *cp
)
4136 PyObject
*s
= PyString_FromString(cp
);
4139 PyString_InternInPlace(&s
);
4147 for (i
= 0; i
< UCHAR_MAX
+ 1; i
++) {
4148 Py_XDECREF(characters
[i
]);
4149 characters
[i
] = NULL
;
4151 Py_XDECREF(nullstring
);
4155 void _Py_ReleaseInternedStrings(void)
4161 if (interned
== NULL
|| !PyDict_Check(interned
))
4163 keys
= PyDict_Keys(interned
);
4164 if (keys
== NULL
|| !PyList_Check(keys
)) {
4169 /* Since _Py_ReleaseInternedStrings() is intended to help a leak
4170 detector, interned strings are not forcibly deallocated; rather, we
4171 give them their stolen references back, and then clear and DECREF
4172 the interned dict. */
4174 fprintf(stderr
, "releasing interned strings\n");
4175 n
= PyList_GET_SIZE(keys
);
4176 for (i
= 0; i
< n
; i
++) {
4177 s
= (PyStringObject
*) PyList_GET_ITEM(keys
, i
);
4178 switch (s
->ob_sstate
) {
4179 case SSTATE_NOT_INTERNED
:
4180 /* XXX Shouldn't happen */
4182 case SSTATE_INTERNED_IMMORTAL
:
4185 case SSTATE_INTERNED_MORTAL
:
4189 Py_FatalError("Inconsistent interned string state.");
4191 s
->ob_sstate
= SSTATE_NOT_INTERNED
;
4194 PyDict_Clear(interned
);
4195 Py_DECREF(interned
);