1 /***********************************************************
2 Copyright 1991-1995 by Stichting Mathematisch Centrum, Amsterdam,
7 Permission to use, copy, modify, and distribute this software and its
8 documentation for any purpose and without fee is hereby granted,
9 provided that the above copyright notice appear in all copies and that
10 both that copyright notice and this permission notice appear in
11 supporting documentation, and that the names of Stichting Mathematisch
12 Centrum or CWI or Corporation for National Research Initiatives or
13 CNRI not be used in advertising or publicity pertaining to
14 distribution of the software without specific, written prior
17 While CWI is the initial source for this software, a modified version
18 is made available by the Corporation for National Research Initiatives
19 (CNRI) at the Internet address ftp://ftp.python.org.
21 STICHTING MATHEMATISCH CENTRUM AND CNRI DISCLAIM ALL WARRANTIES WITH
22 REGARD TO THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF
23 MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL STICHTING MATHEMATISCH
24 CENTRUM OR CNRI BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL
25 DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR
26 PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
27 TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
28 PERFORMANCE OF THIS SOFTWARE.
30 ******************************************************************/
32 /* String object implementation */
40 int null_strings
, one_strings
;
51 static PyStringObject
*characters
[UCHAR_MAX
+ 1];
52 #ifndef DONT_SHARE_SHORT_STRINGS
53 static PyStringObject
*nullstring
;
57 Newsizedstringobject() and newstringobject() try in certain cases
58 to share string objects. When the size of the string is zero,
59 these routines always return a pointer to the same string object;
60 when the size is one, they return a pointer to an already existing
61 object if the contents of the string is known. For
62 newstringobject() this is always the case, for
63 newsizedstringobject() this is the case when the first argument in
65 A common practice to allocate a string and then fill it in or
66 change it must be done carefully. It is only allowed to change the
67 contents of the string if the obect was gotten from
68 newsizedstringobject() with a NULL first argument, because in the
69 future these routines may try to do even more sharing of objects.
72 PyString_FromStringAndSize(str
, size
)
76 register PyStringObject
*op
;
77 #ifndef DONT_SHARE_SHORT_STRINGS
78 if (size
== 0 && (op
= nullstring
) != NULL
) {
83 return (PyObject
*)op
;
85 if (size
== 1 && str
!= NULL
&&
86 (op
= characters
[*str
& UCHAR_MAX
]) != NULL
)
92 return (PyObject
*)op
;
94 #endif /* DONT_SHARE_SHORT_STRINGS */
95 op
= (PyStringObject
*)
96 malloc(sizeof(PyStringObject
) + size
* sizeof(char));
98 return PyErr_NoMemory();
99 op
->ob_type
= &PyString_Type
;
104 #ifdef INTERN_STRINGS
105 op
->ob_sinterned
= NULL
;
107 _Py_NewReference(op
);
109 memcpy(op
->ob_sval
, str
, size
);
110 op
->ob_sval
[size
] = '\0';
111 #ifndef DONT_SHARE_SHORT_STRINGS
115 } else if (size
== 1 && str
!= NULL
) {
116 characters
[*str
& UCHAR_MAX
] = op
;
120 return (PyObject
*) op
;
124 PyString_FromString(str
)
127 register unsigned int size
= strlen(str
);
128 register PyStringObject
*op
;
129 #ifndef DONT_SHARE_SHORT_STRINGS
130 if (size
== 0 && (op
= nullstring
) != NULL
) {
135 return (PyObject
*)op
;
137 if (size
== 1 && (op
= characters
[*str
& UCHAR_MAX
]) != NULL
) {
142 return (PyObject
*)op
;
144 #endif /* DONT_SHARE_SHORT_STRINGS */
145 op
= (PyStringObject
*)
146 malloc(sizeof(PyStringObject
) + size
* sizeof(char));
148 return PyErr_NoMemory();
149 op
->ob_type
= &PyString_Type
;
154 #ifdef INTERN_STRINGS
155 op
->ob_sinterned
= NULL
;
157 _Py_NewReference(op
);
158 strcpy(op
->ob_sval
, str
);
159 #ifndef DONT_SHARE_SHORT_STRINGS
163 } else if (size
== 1) {
164 characters
[*str
& UCHAR_MAX
] = op
;
168 return (PyObject
*) op
;
180 register PyObject
*op
;
182 if (!PyString_Check(op
)) {
183 PyErr_BadInternalCall();
186 return ((PyStringObject
*)op
) -> ob_size
;
190 PyString_AsString(op
)
191 register PyObject
*op
;
193 if (!PyString_Check(op
)) {
194 PyErr_BadInternalCall();
197 return ((PyStringObject
*)op
) -> ob_sval
;
203 string_print(op
, fp
, flags
)
211 /* XXX Ought to check for interrupts when writing long strings */
212 if (flags
& Py_PRINT_RAW
) {
213 fwrite(op
->ob_sval
, 1, (int) op
->ob_size
, fp
);
217 /* figure out which quote to use; single is prefered */
219 if (strchr(op
->ob_sval
, '\'') && !strchr(op
->ob_sval
, '"'))
223 for (i
= 0; i
< op
->ob_size
; i
++) {
225 if (c
== quote
|| c
== '\\')
226 fprintf(fp
, "\\%c", c
);
227 else if (c
< ' ' || c
>= 0177)
228 fprintf(fp
, "\\%03o", c
& 0377);
238 register PyStringObject
*op
;
241 int newsize
= 2 + 4 * op
->ob_size
* sizeof(char);
242 PyObject
*v
= PyString_FromStringAndSize((char *)NULL
, newsize
);
252 /* figure out which quote to use; single is prefered */
254 if (strchr(op
->ob_sval
, '\'') && !strchr(op
->ob_sval
, '"'))
257 p
= ((PyStringObject
*)v
)->ob_sval
;
259 for (i
= 0; i
< op
->ob_size
; i
++) {
261 if (c
== quote
|| c
== '\\')
262 *p
++ = '\\', *p
++ = c
;
263 else if (c
< ' ' || c
>= 0177) {
264 sprintf(p
, "\\%03o", c
& 0377);
274 &v
, (int) (p
- ((PyStringObject
*)v
)->ob_sval
));
288 register PyStringObject
*a
;
289 register PyObject
*bb
;
291 register unsigned int size
;
292 register PyStringObject
*op
;
293 if (!PyString_Check(bb
)) {
297 #define b ((PyStringObject *)bb)
298 /* Optimize cases with empty left or right operand */
299 if (a
->ob_size
== 0) {
303 if (b
->ob_size
== 0) {
305 return (PyObject
*)a
;
307 size
= a
->ob_size
+ b
->ob_size
;
308 op
= (PyStringObject
*)
309 malloc(sizeof(PyStringObject
) + size
* sizeof(char));
311 return PyErr_NoMemory();
312 op
->ob_type
= &PyString_Type
;
317 #ifdef INTERN_STRINGS
318 op
->ob_sinterned
= NULL
;
320 _Py_NewReference(op
);
321 memcpy(op
->ob_sval
, a
->ob_sval
, (int) a
->ob_size
);
322 memcpy(op
->ob_sval
+ a
->ob_size
, b
->ob_sval
, (int) b
->ob_size
);
323 op
->ob_sval
[size
] = '\0';
324 return (PyObject
*) op
;
330 register PyStringObject
*a
;
335 register PyStringObject
*op
;
338 size
= a
->ob_size
* n
;
339 if (size
== a
->ob_size
) {
341 return (PyObject
*)a
;
343 op
= (PyStringObject
*)
344 malloc(sizeof(PyStringObject
) + size
* sizeof(char));
346 return PyErr_NoMemory();
347 op
->ob_type
= &PyString_Type
;
352 #ifdef INTERN_STRINGS
353 op
->ob_sinterned
= NULL
;
355 _Py_NewReference(op
);
356 for (i
= 0; i
< size
; i
+= a
->ob_size
)
357 memcpy(op
->ob_sval
+i
, a
->ob_sval
, (int) a
->ob_size
);
358 op
->ob_sval
[size
] = '\0';
359 return (PyObject
*) op
;
362 /* String slice a[i:j] consists of characters a[i] ... a[j-1] */
365 string_slice(a
, i
, j
)
366 register PyStringObject
*a
;
367 register int i
, j
; /* May be negative! */
372 j
= 0; /* Avoid signed/unsigned bug in next line */
375 if (i
== 0 && j
== a
->ob_size
) { /* It's the same as a */
377 return (PyObject
*)a
;
381 return PyString_FromStringAndSize(a
->ob_sval
+ i
, (int) (j
-i
));
391 if (i
< 0 || i
>= a
->ob_size
) {
392 PyErr_SetString(PyExc_IndexError
, "string index out of range");
395 c
= a
->ob_sval
[i
] & UCHAR_MAX
;
396 v
= (PyObject
*) characters
[c
];
402 v
= PyString_FromStringAndSize((char *)NULL
, 1);
405 characters
[c
] = (PyStringObject
*) v
;
406 ((PyStringObject
*)v
)->ob_sval
[0] = c
;
414 PyStringObject
*a
, *b
;
416 int len_a
= a
->ob_size
, len_b
= b
->ob_size
;
417 int min_len
= (len_a
< len_b
) ? len_a
: len_b
;
420 cmp
= Py_CHARMASK(*a
->ob_sval
) - Py_CHARMASK(*b
->ob_sval
);
422 cmp
= memcmp(a
->ob_sval
, b
->ob_sval
, min_len
);
426 return (len_a
< len_b
) ? -1 : (len_a
> len_b
) ? 1 : 0;
434 register unsigned char *p
;
438 if (a
->ob_shash
!= -1)
440 #ifdef INTERN_STRINGS
441 if (a
->ob_sinterned
!= NULL
)
442 return (a
->ob_shash
=
443 ((PyStringObject
*)(a
->ob_sinterned
))->ob_shash
);
447 p
= (unsigned char *) a
->ob_sval
;
450 x
= (1000003*x
) ^ *p
++;
461 string_buffer_getreadbuf(self
, index
, ptr
)
462 PyStringObject
*self
;
467 PyErr_SetString(PyExc_SystemError
,
468 "accessing non-existent string segment");
471 *ptr
= (void *)self
->ob_sval
;
472 return self
->ob_size
;
476 string_buffer_getwritebuf(self
, index
, ptr
)
477 PyStringObject
*self
;
481 PyErr_SetString(PyExc_TypeError
,
482 "Cannot use string as modifiable buffer");
487 string_buffer_getsegcount(self
, lenp
)
488 PyStringObject
*self
;
492 *lenp
= self
->ob_size
;
497 string_buffer_getcharbuf(self
, index
, ptr
)
498 PyStringObject
*self
;
503 PyErr_SetString(PyExc_SystemError
,
504 "accessing non-existent string segment");
507 *ptr
= self
->ob_sval
;
508 return self
->ob_size
;
511 static PySequenceMethods string_as_sequence
= {
512 (inquiry
)string_length
, /*sq_length*/
513 (binaryfunc
)string_concat
, /*sq_concat*/
514 (intargfunc
)string_repeat
, /*sq_repeat*/
515 (intargfunc
)string_item
, /*sq_item*/
516 (intintargfunc
)string_slice
, /*sq_slice*/
521 static PyBufferProcs string_as_buffer
= {
522 (getreadbufferproc
)string_buffer_getreadbuf
,
523 (getwritebufferproc
)string_buffer_getwritebuf
,
524 (getsegcountproc
)string_buffer_getsegcount
,
525 (getcharbufferproc
)string_buffer_getcharbuf
,
528 PyTypeObject PyString_Type
= {
529 PyObject_HEAD_INIT(&PyType_Type
)
532 sizeof(PyStringObject
),
534 (destructor
)string_dealloc
, /*tp_dealloc*/
535 (printfunc
)string_print
, /*tp_print*/
538 (cmpfunc
)string_compare
, /*tp_compare*/
539 (reprfunc
)string_repr
, /*tp_repr*/
541 &string_as_sequence
, /*tp_as_sequence*/
543 (hashfunc
)string_hash
, /*tp_hash*/
548 &string_as_buffer
, /*tp_as_buffer*/
549 Py_TPFLAGS_DEFAULT
, /*tp_flags*/
554 PyString_Concat(pv
, w
)
555 register PyObject
**pv
;
556 register PyObject
*w
;
558 register PyObject
*v
;
561 if (w
== NULL
|| !PyString_Check(*pv
)) {
566 v
= string_concat((PyStringObject
*) *pv
, w
);
572 PyString_ConcatAndDel(pv
, w
)
573 register PyObject
**pv
;
574 register PyObject
*w
;
576 PyString_Concat(pv
, w
);
581 /* The following function breaks the notion that strings are immutable:
582 it changes the size of a string. We get away with this only if there
583 is only one module referencing the object. You can also think of it
584 as creating a new string object and destroying the old one, only
585 more efficiently. In any case, don't use this if the string may
586 already be known to some other part of the code... */
589 _PyString_Resize(pv
, newsize
)
593 register PyObject
*v
;
594 register PyStringObject
*sv
;
596 if (!PyString_Check(v
) || v
->ob_refcnt
!= 1) {
599 PyErr_BadInternalCall();
602 /* XXX UNREF/NEWREF interface should be more symmetrical */
606 _Py_ForgetReference(v
);
609 sizeof(PyStringObject
) + newsize
* sizeof(char));
615 _Py_NewReference(*pv
);
616 sv
= (PyStringObject
*) *pv
;
617 sv
->ob_size
= newsize
;
618 sv
->ob_sval
[newsize
] = '\0';
622 /* Helpers for formatstring */
625 getnextarg(args
, arglen
, p_argidx
)
630 int argidx
= *p_argidx
;
631 if (argidx
< arglen
) {
636 return PyTuple_GetItem(args
, argidx
);
638 PyErr_SetString(PyExc_TypeError
,
639 "not enough arguments for format string");
643 #define F_LJUST (1<<0)
644 #define F_SIGN (1<<1)
645 #define F_BLANK (1<<2)
647 #define F_ZERO (1<<4)
650 formatfloat(buf
, flags
, prec
, type
, v
)
659 if (!PyArg_Parse(v
, "d;float argument required", &x
))
664 prec
= 50; /* Arbitrary limitation */
665 if (type
== 'f' && fabs(x
)/1e25
>= 1e25
)
667 sprintf(fmt
, "%%%s.%d%c", (flags
&F_ALT
) ? "#" : "", prec
, type
);
668 sprintf(buf
, fmt
, x
);
673 formatint(buf
, flags
, prec
, type
, v
)
682 if (!PyArg_Parse(v
, "l;int argument required", &x
))
686 sprintf(fmt
, "%%%s.%dl%c", (flags
&F_ALT
) ? "#" : "", prec
, type
);
687 sprintf(buf
, fmt
, x
);
696 if (PyString_Check(v
)) {
697 if (!PyArg_Parse(v
, "c;%c requires int or char", &buf
[0]))
701 if (!PyArg_Parse(v
, "b;%c requires int or char", &buf
[0]))
709 /* fmt%(v1,v2,...) is roughly equivalent to sprintf(fmt, v1, v2, ...) */
712 PyString_Format(format
, args
)
717 int fmtcnt
, rescnt
, reslen
, arglen
, argidx
;
720 PyObject
*dict
= NULL
;
721 if (format
== NULL
|| !PyString_Check(format
) || args
== NULL
) {
722 PyErr_BadInternalCall();
725 fmt
= PyString_AsString(format
);
726 fmtcnt
= PyString_Size(format
);
727 reslen
= rescnt
= fmtcnt
+ 100;
728 result
= PyString_FromStringAndSize((char *)NULL
, reslen
);
731 res
= PyString_AsString(result
);
732 if (PyTuple_Check(args
)) {
733 arglen
= PyTuple_Size(args
);
740 if (args
->ob_type
->tp_as_mapping
)
742 while (--fmtcnt
>= 0) {
745 rescnt
= fmtcnt
+ 100;
747 if (_PyString_Resize(&result
, reslen
) < 0)
749 res
= PyString_AsString(result
)
756 /* Got a format specifier */
764 PyObject
*temp
= NULL
;
768 char tmpbuf
[120]; /* For format{float,int,char}() */
777 PyErr_SetString(PyExc_TypeError
,
778 "format requires a mapping");
784 /* Skip over balanced parentheses */
785 while (pcount
> 0 && --fmtcnt
>= 0) {
788 else if (*fmt
== '(')
792 keylen
= fmt
- keystart
- 1;
793 if (fmtcnt
< 0 || pcount
> 0) {
794 PyErr_SetString(PyExc_ValueError
,
795 "incomplete format key");
798 key
= PyString_FromStringAndSize(keystart
,
806 args
= PyObject_GetItem(dict
, key
);
815 while (--fmtcnt
>= 0) {
816 switch (c
= *fmt
++) {
817 case '-': flags
|= F_LJUST
; continue;
818 case '+': flags
|= F_SIGN
; continue;
819 case ' ': flags
|= F_BLANK
; continue;
820 case '#': flags
|= F_ALT
; continue;
821 case '0': flags
|= F_ZERO
; continue;
826 v
= getnextarg(args
, arglen
, &argidx
);
829 if (!PyInt_Check(v
)) {
830 PyErr_SetString(PyExc_TypeError
,
834 width
= PyInt_AsLong(v
);
840 else if (c
>= 0 && isdigit(c
)) {
842 while (--fmtcnt
>= 0) {
843 c
= Py_CHARMASK(*fmt
++);
846 if ((width
*10) / 10 != width
) {
852 width
= width
*10 + (c
- '0');
860 v
= getnextarg(args
, arglen
, &argidx
);
863 if (!PyInt_Check(v
)) {
869 prec
= PyInt_AsLong(v
);
875 else if (c
>= 0 && isdigit(c
)) {
877 while (--fmtcnt
>= 0) {
878 c
= Py_CHARMASK(*fmt
++);
881 if ((prec
*10) / 10 != prec
) {
887 prec
= prec
*10 + (c
- '0');
892 if (c
== 'h' || c
== 'l' || c
== 'L') {
899 PyErr_SetString(PyExc_ValueError
,
900 "incomplete format");
904 v
= getnextarg(args
, arglen
, &argidx
);
916 temp
= PyObject_Str(v
);
919 if (!PyString_Check(temp
)) {
920 PyErr_SetString(PyExc_TypeError
,
921 "%s argument has non-string str()");
924 buf
= PyString_AsString(temp
);
925 len
= PyString_Size(temp
);
926 if (prec
>= 0 && len
> prec
)
938 len
= formatint(buf
, flags
, prec
, c
, v
);
945 (c
== 'x' || c
== 'X') &&
946 buf
[0] == '0' && buf
[1] == c
) {
963 len
= formatfloat(buf
, flags
, prec
, c
, v
);
972 len
= formatchar(buf
, v
);
977 PyErr_Format(PyExc_ValueError
,
978 "unsupported format character '%c' (0x%x)",
983 if (*buf
== '-' || *buf
== '+') {
987 else if (flags
& F_SIGN
)
989 else if (flags
& F_BLANK
)
996 if (rescnt
< width
+ (sign
!= '\0')) {
998 rescnt
= width
+ fmtcnt
+ 100;
1000 if (_PyString_Resize(&result
, reslen
) < 0)
1002 res
= PyString_AsString(result
)
1012 if (width
> len
&& !(flags
&F_LJUST
)) {
1016 } while (--width
> len
);
1018 if (sign
&& fill
== ' ')
1020 memcpy(res
, buf
, len
);
1023 while (--width
>= len
) {
1027 if (dict
&& (argidx
< arglen
) && c
!= '%') {
1028 PyErr_SetString(PyExc_TypeError
,
1029 "not all arguments converted");
1035 if (argidx
< arglen
&& !dict
) {
1036 PyErr_SetString(PyExc_TypeError
,
1037 "not all arguments converted");
1043 _PyString_Resize(&result
, reslen
- rescnt
);
1054 #ifdef INTERN_STRINGS
1056 static PyObject
*interned
;
1059 PyString_InternInPlace(p
)
1062 register PyStringObject
*s
= (PyStringObject
*)(*p
);
1064 if (s
== NULL
|| !PyString_Check(s
))
1065 Py_FatalError("PyString_InternInPlace: strings only please!");
1066 if ((t
= s
->ob_sinterned
) != NULL
) {
1067 if (t
== (PyObject
*)s
)
1074 if (interned
== NULL
) {
1075 interned
= PyDict_New();
1076 if (interned
== NULL
)
1079 if ((t
= PyDict_GetItem(interned
, (PyObject
*)s
)) != NULL
) {
1081 *p
= s
->ob_sinterned
= t
;
1086 if (PyDict_SetItem(interned
, t
, t
) == 0) {
1087 s
->ob_sinterned
= t
;
1095 PyString_InternFromString(cp
)
1098 PyObject
*s
= PyString_FromString(cp
);
1101 PyString_InternInPlace(&s
);
1111 for (i
= 0; i
< UCHAR_MAX
+ 1; i
++) {
1112 Py_XDECREF(characters
[i
]);
1113 characters
[i
] = NULL
;
1115 #ifndef DONT_SHARE_SHORT_STRINGS
1116 Py_XDECREF(nullstring
);
1119 #ifdef INTERN_STRINGS
1122 PyObject
*key
, *value
;
1126 while (PyDict_Next(interned
, &pos
, &key
, &value
)) {
1127 if (key
->ob_refcnt
== 2 && key
== value
) {
1128 PyDict_DelItem(interned
, key
);