Bump version to 0.9.1.
[python/dscho.git] / Objects / stringobject.c
blob6d25ddb4e792eb6503ace6e01b1879da58081421
1 /***********************************************************
2 Copyright (c) 2000, BeOpen.com.
3 Copyright (c) 1995-2000, Corporation for National Research Initiatives.
4 Copyright (c) 1990-1995, Stichting Mathematisch Centrum.
5 All rights reserved.
7 See the file "Misc/COPYRIGHT" for information on usage and
8 redistribution of this file, and for a DISCLAIMER OF ALL WARRANTIES.
9 ******************************************************************/
11 /* String object implementation */
13 #include "Python.h"
15 #include <ctype.h>
17 #ifdef COUNT_ALLOCS
18 int null_strings, one_strings;
19 #endif
21 #ifdef HAVE_LIMITS_H
22 #include <limits.h>
23 #else
24 #ifndef UCHAR_MAX
25 #define UCHAR_MAX 255
26 #endif
27 #endif
29 static PyStringObject *characters[UCHAR_MAX + 1];
30 #ifndef DONT_SHARE_SHORT_STRINGS
31 static PyStringObject *nullstring;
32 #endif
35 Newsizedstringobject() and newstringobject() try in certain cases
36 to share string objects. When the size of the string is zero,
37 these routines always return a pointer to the same string object;
38 when the size is one, they return a pointer to an already existing
39 object if the contents of the string is known. For
40 newstringobject() this is always the case, for
41 newsizedstringobject() this is the case when the first argument in
42 not NULL.
43 A common practice to allocate a string and then fill it in or
44 change it must be done carefully. It is only allowed to change the
45 contents of the string if the obect was gotten from
46 newsizedstringobject() with a NULL first argument, because in the
47 future these routines may try to do even more sharing of objects.
49 PyObject *
50 PyString_FromStringAndSize(const char *str, int size)
52 register PyStringObject *op;
53 #ifndef DONT_SHARE_SHORT_STRINGS
54 if (size == 0 && (op = nullstring) != NULL) {
55 #ifdef COUNT_ALLOCS
56 null_strings++;
57 #endif
58 Py_INCREF(op);
59 return (PyObject *)op;
61 if (size == 1 && str != NULL &&
62 (op = characters[*str & UCHAR_MAX]) != NULL)
64 #ifdef COUNT_ALLOCS
65 one_strings++;
66 #endif
67 Py_INCREF(op);
68 return (PyObject *)op;
70 #endif /* DONT_SHARE_SHORT_STRINGS */
72 /* PyObject_NewVar is inlined */
73 op = (PyStringObject *)
74 PyObject_MALLOC(sizeof(PyStringObject) + size * sizeof(char));
75 if (op == NULL)
76 return PyErr_NoMemory();
77 PyObject_INIT_VAR(op, &PyString_Type, size);
78 #ifdef CACHE_HASH
79 op->ob_shash = -1;
80 #endif
81 #ifdef INTERN_STRINGS
82 op->ob_sinterned = NULL;
83 #endif
84 if (str != NULL)
85 memcpy(op->ob_sval, str, size);
86 op->ob_sval[size] = '\0';
87 #ifndef DONT_SHARE_SHORT_STRINGS
88 if (size == 0) {
89 nullstring = op;
90 Py_INCREF(op);
91 } else if (size == 1 && str != NULL) {
92 characters[*str & UCHAR_MAX] = op;
93 Py_INCREF(op);
95 #endif
96 return (PyObject *) op;
99 PyObject *
100 PyString_FromString(const char *str)
102 register size_t size = strlen(str);
103 register PyStringObject *op;
104 if (size > INT_MAX) {
105 PyErr_SetString(PyExc_OverflowError,
106 "string is too long for a Python string");
107 return NULL;
109 #ifndef DONT_SHARE_SHORT_STRINGS
110 if (size == 0 && (op = nullstring) != NULL) {
111 #ifdef COUNT_ALLOCS
112 null_strings++;
113 #endif
114 Py_INCREF(op);
115 return (PyObject *)op;
117 if (size == 1 && (op = characters[*str & UCHAR_MAX]) != NULL) {
118 #ifdef COUNT_ALLOCS
119 one_strings++;
120 #endif
121 Py_INCREF(op);
122 return (PyObject *)op;
124 #endif /* DONT_SHARE_SHORT_STRINGS */
126 /* PyObject_NewVar is inlined */
127 op = (PyStringObject *)
128 PyObject_MALLOC(sizeof(PyStringObject) + size * sizeof(char));
129 if (op == NULL)
130 return PyErr_NoMemory();
131 PyObject_INIT_VAR(op, &PyString_Type, size);
132 #ifdef CACHE_HASH
133 op->ob_shash = -1;
134 #endif
135 #ifdef INTERN_STRINGS
136 op->ob_sinterned = NULL;
137 #endif
138 strcpy(op->ob_sval, str);
139 #ifndef DONT_SHARE_SHORT_STRINGS
140 if (size == 0) {
141 nullstring = op;
142 Py_INCREF(op);
143 } else if (size == 1) {
144 characters[*str & UCHAR_MAX] = op;
145 Py_INCREF(op);
147 #endif
148 return (PyObject *) op;
151 PyObject *PyString_Decode(const char *s,
152 int size,
153 const char *encoding,
154 const char *errors)
156 PyObject *buffer = NULL, *str;
158 if (encoding == NULL)
159 encoding = PyUnicode_GetDefaultEncoding();
161 /* Decode via the codec registry */
162 buffer = PyBuffer_FromMemory((void *)s, size);
163 if (buffer == NULL)
164 goto onError;
165 str = PyCodec_Decode(buffer, encoding, errors);
166 if (str == NULL)
167 goto onError;
168 /* Convert Unicode to a string using the default encoding */
169 if (PyUnicode_Check(str)) {
170 PyObject *temp = str;
171 str = PyUnicode_AsEncodedString(str, NULL, NULL);
172 Py_DECREF(temp);
173 if (str == NULL)
174 goto onError;
176 if (!PyString_Check(str)) {
177 PyErr_Format(PyExc_TypeError,
178 "decoder did not return a string object (type=%.400s)",
179 str->ob_type->tp_name);
180 Py_DECREF(str);
181 goto onError;
183 Py_DECREF(buffer);
184 return str;
186 onError:
187 Py_XDECREF(buffer);
188 return NULL;
191 PyObject *PyString_Encode(const char *s,
192 int size,
193 const char *encoding,
194 const char *errors)
196 PyObject *v, *str;
198 str = PyString_FromStringAndSize(s, size);
199 if (str == NULL)
200 return NULL;
201 v = PyString_AsEncodedString(str, encoding, errors);
202 Py_DECREF(str);
203 return v;
206 PyObject *PyString_AsEncodedString(PyObject *str,
207 const char *encoding,
208 const char *errors)
210 PyObject *v;
212 if (!PyString_Check(str)) {
213 PyErr_BadArgument();
214 goto onError;
217 if (encoding == NULL)
218 encoding = PyUnicode_GetDefaultEncoding();
220 /* Encode via the codec registry */
221 v = PyCodec_Encode(str, encoding, errors);
222 if (v == NULL)
223 goto onError;
224 /* Convert Unicode to a string using the default encoding */
225 if (PyUnicode_Check(v)) {
226 PyObject *temp = v;
227 v = PyUnicode_AsEncodedString(v, NULL, NULL);
228 Py_DECREF(temp);
229 if (v == NULL)
230 goto onError;
232 if (!PyString_Check(v)) {
233 PyErr_Format(PyExc_TypeError,
234 "encoder did not return a string object (type=%.400s)",
235 v->ob_type->tp_name);
236 Py_DECREF(v);
237 goto onError;
239 return v;
241 onError:
242 return NULL;
245 static void
246 string_dealloc(PyObject *op)
248 PyObject_DEL(op);
252 PyString_Size(register PyObject *op)
254 if (!PyString_Check(op)) {
255 PyErr_BadInternalCall();
256 return -1;
258 return ((PyStringObject *)op) -> ob_size;
261 /*const*/ char *
262 PyString_AsString(register PyObject *op)
264 if (!PyString_Check(op)) {
265 PyErr_BadInternalCall();
266 return NULL;
268 return ((PyStringObject *)op) -> ob_sval;
271 /* Methods */
273 static int
274 string_print(PyStringObject *op, FILE *fp, int flags)
276 int i;
277 char c;
278 int quote;
279 /* XXX Ought to check for interrupts when writing long strings */
280 if (flags & Py_PRINT_RAW) {
281 fwrite(op->ob_sval, 1, (int) op->ob_size, fp);
282 return 0;
285 /* figure out which quote to use; single is preferred */
286 quote = '\'';
287 if (strchr(op->ob_sval, '\'') && !strchr(op->ob_sval, '"'))
288 quote = '"';
290 fputc(quote, fp);
291 for (i = 0; i < op->ob_size; i++) {
292 c = op->ob_sval[i];
293 if (c == quote || c == '\\')
294 fprintf(fp, "\\%c", c);
295 else if (c < ' ' || c >= 0177)
296 fprintf(fp, "\\%03o", c & 0377);
297 else
298 fputc(c, fp);
300 fputc(quote, fp);
301 return 0;
304 static PyObject *
305 string_repr(register PyStringObject *op)
307 size_t newsize = 2 + 4 * op->ob_size * sizeof(char);
308 PyObject *v;
309 if (newsize > INT_MAX) {
310 PyErr_SetString(PyExc_OverflowError,
311 "string is too large to make repr");
313 v = PyString_FromStringAndSize((char *)NULL, newsize);
314 if (v == NULL) {
315 return NULL;
317 else {
318 register int i;
319 register char c;
320 register char *p;
321 int quote;
323 /* figure out which quote to use; single is preferred */
324 quote = '\'';
325 if (strchr(op->ob_sval, '\'') && !strchr(op->ob_sval, '"'))
326 quote = '"';
328 p = ((PyStringObject *)v)->ob_sval;
329 *p++ = quote;
330 for (i = 0; i < op->ob_size; i++) {
331 c = op->ob_sval[i];
332 if (c == quote || c == '\\')
333 *p++ = '\\', *p++ = c;
334 else if (c < ' ' || c >= 0177) {
335 sprintf(p, "\\%03o", c & 0377);
336 while (*p != '\0')
337 p++;
339 else
340 *p++ = c;
342 *p++ = quote;
343 *p = '\0';
344 _PyString_Resize(
345 &v, (int) (p - ((PyStringObject *)v)->ob_sval));
346 return v;
350 static int
351 string_length(PyStringObject *a)
353 return a->ob_size;
356 static PyObject *
357 string_concat(register PyStringObject *a, register PyObject *bb)
359 register unsigned int size;
360 register PyStringObject *op;
361 if (!PyString_Check(bb)) {
362 if (PyUnicode_Check(bb))
363 return PyUnicode_Concat((PyObject *)a, bb);
364 PyErr_Format(PyExc_TypeError,
365 "cannot add type \"%.200s\" to string",
366 bb->ob_type->tp_name);
367 return NULL;
369 #define b ((PyStringObject *)bb)
370 /* Optimize cases with empty left or right operand */
371 if (a->ob_size == 0) {
372 Py_INCREF(bb);
373 return bb;
375 if (b->ob_size == 0) {
376 Py_INCREF(a);
377 return (PyObject *)a;
379 size = a->ob_size + b->ob_size;
380 /* PyObject_NewVar is inlined */
381 op = (PyStringObject *)
382 PyObject_MALLOC(sizeof(PyStringObject) + size * sizeof(char));
383 if (op == NULL)
384 return PyErr_NoMemory();
385 PyObject_INIT_VAR(op, &PyString_Type, size);
386 #ifdef CACHE_HASH
387 op->ob_shash = -1;
388 #endif
389 #ifdef INTERN_STRINGS
390 op->ob_sinterned = NULL;
391 #endif
392 memcpy(op->ob_sval, a->ob_sval, (int) a->ob_size);
393 memcpy(op->ob_sval + a->ob_size, b->ob_sval, (int) b->ob_size);
394 op->ob_sval[size] = '\0';
395 return (PyObject *) op;
396 #undef b
399 static PyObject *
400 string_repeat(register PyStringObject *a, register int n)
402 register int i;
403 register int size;
404 register PyStringObject *op;
405 if (n < 0)
406 n = 0;
407 size = a->ob_size * n;
408 if (size == a->ob_size) {
409 Py_INCREF(a);
410 return (PyObject *)a;
412 /* PyObject_NewVar is inlined */
413 op = (PyStringObject *)
414 PyObject_MALLOC(sizeof(PyStringObject) + size * sizeof(char));
415 if (op == NULL)
416 return PyErr_NoMemory();
417 PyObject_INIT_VAR(op, &PyString_Type, size);
418 #ifdef CACHE_HASH
419 op->ob_shash = -1;
420 #endif
421 #ifdef INTERN_STRINGS
422 op->ob_sinterned = NULL;
423 #endif
424 for (i = 0; i < size; i += a->ob_size)
425 memcpy(op->ob_sval+i, a->ob_sval, (int) a->ob_size);
426 op->ob_sval[size] = '\0';
427 return (PyObject *) op;
430 /* String slice a[i:j] consists of characters a[i] ... a[j-1] */
432 static PyObject *
433 string_slice(register PyStringObject *a, register int i, register int j)
434 /* j -- may be negative! */
436 if (i < 0)
437 i = 0;
438 if (j < 0)
439 j = 0; /* Avoid signed/unsigned bug in next line */
440 if (j > a->ob_size)
441 j = a->ob_size;
442 if (i == 0 && j == a->ob_size) { /* It's the same as a */
443 Py_INCREF(a);
444 return (PyObject *)a;
446 if (j < i)
447 j = i;
448 return PyString_FromStringAndSize(a->ob_sval + i, (int) (j-i));
451 static int
452 string_contains(PyObject *a, PyObject *el)
454 register char *s, *end;
455 register char c;
456 if (PyUnicode_Check(el))
457 return PyUnicode_Contains(a, el);
458 if (!PyString_Check(el) || PyString_Size(el) != 1) {
459 PyErr_SetString(PyExc_TypeError,
460 "'in <string>' requires character as left operand");
461 return -1;
463 c = PyString_AsString(el)[0];
464 s = PyString_AsString(a);
465 end = s + PyString_Size(a);
466 while (s < end) {
467 if (c == *s++)
468 return 1;
470 return 0;
473 static PyObject *
474 string_item(PyStringObject *a, register int i)
476 int c;
477 PyObject *v;
478 if (i < 0 || i >= a->ob_size) {
479 PyErr_SetString(PyExc_IndexError, "string index out of range");
480 return NULL;
482 c = a->ob_sval[i] & UCHAR_MAX;
483 v = (PyObject *) characters[c];
484 #ifdef COUNT_ALLOCS
485 if (v != NULL)
486 one_strings++;
487 #endif
488 if (v == NULL) {
489 v = PyString_FromStringAndSize((char *)NULL, 1);
490 if (v == NULL)
491 return NULL;
492 characters[c] = (PyStringObject *) v;
493 ((PyStringObject *)v)->ob_sval[0] = c;
495 Py_INCREF(v);
496 return v;
499 static int
500 string_compare(PyStringObject *a, PyStringObject *b)
502 int len_a = a->ob_size, len_b = b->ob_size;
503 int min_len = (len_a < len_b) ? len_a : len_b;
504 int cmp;
505 if (min_len > 0) {
506 cmp = Py_CHARMASK(*a->ob_sval) - Py_CHARMASK(*b->ob_sval);
507 if (cmp == 0)
508 cmp = memcmp(a->ob_sval, b->ob_sval, min_len);
509 if (cmp != 0)
510 return cmp;
512 return (len_a < len_b) ? -1 : (len_a > len_b) ? 1 : 0;
515 static long
516 string_hash(PyStringObject *a)
518 register int len;
519 register unsigned char *p;
520 register long x;
522 #ifdef CACHE_HASH
523 if (a->ob_shash != -1)
524 return a->ob_shash;
525 #ifdef INTERN_STRINGS
526 if (a->ob_sinterned != NULL)
527 return (a->ob_shash =
528 ((PyStringObject *)(a->ob_sinterned))->ob_shash);
529 #endif
530 #endif
531 len = a->ob_size;
532 p = (unsigned char *) a->ob_sval;
533 x = *p << 7;
534 while (--len >= 0)
535 x = (1000003*x) ^ *p++;
536 x ^= a->ob_size;
537 if (x == -1)
538 x = -2;
539 #ifdef CACHE_HASH
540 a->ob_shash = x;
541 #endif
542 return x;
545 static int
546 string_buffer_getreadbuf(PyStringObject *self, int index, const void **ptr)
548 if ( index != 0 ) {
549 PyErr_SetString(PyExc_SystemError,
550 "accessing non-existent string segment");
551 return -1;
553 *ptr = (void *)self->ob_sval;
554 return self->ob_size;
557 static int
558 string_buffer_getwritebuf(PyStringObject *self, int index, const void **ptr)
560 PyErr_SetString(PyExc_TypeError,
561 "Cannot use string as modifiable buffer");
562 return -1;
565 static int
566 string_buffer_getsegcount(PyStringObject *self, int *lenp)
568 if ( lenp )
569 *lenp = self->ob_size;
570 return 1;
573 static int
574 string_buffer_getcharbuf(PyStringObject *self, int index, const char **ptr)
576 if ( index != 0 ) {
577 PyErr_SetString(PyExc_SystemError,
578 "accessing non-existent string segment");
579 return -1;
581 *ptr = self->ob_sval;
582 return self->ob_size;
585 static PySequenceMethods string_as_sequence = {
586 (inquiry)string_length, /*sq_length*/
587 (binaryfunc)string_concat, /*sq_concat*/
588 (intargfunc)string_repeat, /*sq_repeat*/
589 (intargfunc)string_item, /*sq_item*/
590 (intintargfunc)string_slice, /*sq_slice*/
591 0, /*sq_ass_item*/
592 0, /*sq_ass_slice*/
593 (objobjproc)string_contains /*sq_contains*/
596 static PyBufferProcs string_as_buffer = {
597 (getreadbufferproc)string_buffer_getreadbuf,
598 (getwritebufferproc)string_buffer_getwritebuf,
599 (getsegcountproc)string_buffer_getsegcount,
600 (getcharbufferproc)string_buffer_getcharbuf,
605 #define LEFTSTRIP 0
606 #define RIGHTSTRIP 1
607 #define BOTHSTRIP 2
610 static PyObject *
611 split_whitespace(const char *s, int len, int maxsplit)
613 int i, j, err;
614 PyObject* item;
615 PyObject *list = PyList_New(0);
617 if (list == NULL)
618 return NULL;
620 for (i = j = 0; i < len; ) {
621 while (i < len && isspace(Py_CHARMASK(s[i])))
622 i++;
623 j = i;
624 while (i < len && !isspace(Py_CHARMASK(s[i])))
625 i++;
626 if (j < i) {
627 if (maxsplit-- <= 0)
628 break;
629 item = PyString_FromStringAndSize(s+j, (int)(i-j));
630 if (item == NULL)
631 goto finally;
632 err = PyList_Append(list, item);
633 Py_DECREF(item);
634 if (err < 0)
635 goto finally;
636 while (i < len && isspace(Py_CHARMASK(s[i])))
637 i++;
638 j = i;
641 if (j < len) {
642 item = PyString_FromStringAndSize(s+j, (int)(len - j));
643 if (item == NULL)
644 goto finally;
645 err = PyList_Append(list, item);
646 Py_DECREF(item);
647 if (err < 0)
648 goto finally;
650 return list;
651 finally:
652 Py_DECREF(list);
653 return NULL;
657 static char split__doc__[] =
658 "S.split([sep [,maxsplit]]) -> list of strings\n\
660 Return a list of the words in the string S, using sep as the\n\
661 delimiter string. If maxsplit is given, at most maxsplit\n\
662 splits are done. If sep is not specified, any whitespace string\n\
663 is a separator.";
665 static PyObject *
666 string_split(PyStringObject *self, PyObject *args)
668 int len = PyString_GET_SIZE(self), n, i, j, err;
669 int maxsplit = -1;
670 const char *s = PyString_AS_STRING(self), *sub;
671 PyObject *list, *item, *subobj = Py_None;
673 if (!PyArg_ParseTuple(args, "|Oi:split", &subobj, &maxsplit))
674 return NULL;
675 if (maxsplit < 0)
676 maxsplit = INT_MAX;
677 if (subobj == Py_None)
678 return split_whitespace(s, len, maxsplit);
679 if (PyString_Check(subobj)) {
680 sub = PyString_AS_STRING(subobj);
681 n = PyString_GET_SIZE(subobj);
683 else if (PyUnicode_Check(subobj))
684 return PyUnicode_Split((PyObject *)self, subobj, maxsplit);
685 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
686 return NULL;
687 if (n == 0) {
688 PyErr_SetString(PyExc_ValueError, "empty separator");
689 return NULL;
692 list = PyList_New(0);
693 if (list == NULL)
694 return NULL;
696 i = j = 0;
697 while (i+n <= len) {
698 if (s[i] == sub[0] && memcmp(s+i, sub, n) == 0) {
699 if (maxsplit-- <= 0)
700 break;
701 item = PyString_FromStringAndSize(s+j, (int)(i-j));
702 if (item == NULL)
703 goto fail;
704 err = PyList_Append(list, item);
705 Py_DECREF(item);
706 if (err < 0)
707 goto fail;
708 i = j = i + n;
710 else
711 i++;
713 item = PyString_FromStringAndSize(s+j, (int)(len-j));
714 if (item == NULL)
715 goto fail;
716 err = PyList_Append(list, item);
717 Py_DECREF(item);
718 if (err < 0)
719 goto fail;
721 return list;
723 fail:
724 Py_DECREF(list);
725 return NULL;
729 static char join__doc__[] =
730 "S.join(sequence) -> string\n\
732 Return a string which is the concatenation of the strings in the\n\
733 sequence. The separator between elements is S.";
735 static PyObject *
736 string_join(PyStringObject *self, PyObject *args)
738 char *sep = PyString_AS_STRING(self);
739 int seplen = PyString_GET_SIZE(self);
740 PyObject *res = NULL;
741 int reslen = 0;
742 char *p;
743 int seqlen = 0;
744 int sz = 100;
745 int i, slen, sz_incr;
746 PyObject *orig, *seq, *item;
748 if (!PyArg_ParseTuple(args, "O:join", &orig))
749 return NULL;
751 if (!(seq = PySequence_Fast(orig, ""))) {
752 if (PyErr_ExceptionMatches(PyExc_TypeError))
753 PyErr_Format(PyExc_TypeError,
754 "sequence expected, %.80s found",
755 orig->ob_type->tp_name);
756 return NULL;
758 /* From here on out, errors go through finally: for proper
759 * reference count manipulations.
761 seqlen = PySequence_Size(seq);
762 if (seqlen == 1) {
763 item = PySequence_Fast_GET_ITEM(seq, 0);
764 Py_INCREF(item);
765 Py_DECREF(seq);
766 return item;
769 if (!(res = PyString_FromStringAndSize((char*)NULL, sz)))
770 goto finally;
772 p = PyString_AS_STRING(res);
774 for (i = 0; i < seqlen; i++) {
775 item = PySequence_Fast_GET_ITEM(seq, i);
776 if (!PyString_Check(item)){
777 if (PyUnicode_Check(item)) {
778 Py_DECREF(res);
779 Py_DECREF(seq);
780 return PyUnicode_Join((PyObject *)self, seq);
782 PyErr_Format(PyExc_TypeError,
783 "sequence item %i: expected string,"
784 " %.80s found",
785 i, item->ob_type->tp_name);
786 goto finally;
788 slen = PyString_GET_SIZE(item);
789 while (reslen + slen + seplen >= sz) {
790 /* at least double the size of the string */
791 sz_incr = slen + seplen > sz ? slen + seplen : sz;
792 if (_PyString_Resize(&res, sz + sz_incr)) {
793 goto finally;
795 sz += sz_incr;
796 p = PyString_AS_STRING(res) + reslen;
798 if (i > 0) {
799 memcpy(p, sep, seplen);
800 p += seplen;
801 reslen += seplen;
803 memcpy(p, PyString_AS_STRING(item), slen);
804 p += slen;
805 reslen += slen;
807 if (_PyString_Resize(&res, reslen))
808 goto finally;
809 Py_DECREF(seq);
810 return res;
812 finally:
813 Py_DECREF(seq);
814 Py_XDECREF(res);
815 return NULL;
820 static long
821 string_find_internal(PyStringObject *self, PyObject *args, int dir)
823 const char *s = PyString_AS_STRING(self), *sub;
824 int len = PyString_GET_SIZE(self);
825 int n, i = 0, last = INT_MAX;
826 PyObject *subobj;
828 if (!PyArg_ParseTuple(args, "O|O&O&:find/rfind/index/rindex",
829 &subobj, _PyEval_SliceIndex, &i, _PyEval_SliceIndex, &last))
830 return -2;
831 if (PyString_Check(subobj)) {
832 sub = PyString_AS_STRING(subobj);
833 n = PyString_GET_SIZE(subobj);
835 else if (PyUnicode_Check(subobj))
836 return PyUnicode_Find((PyObject *)self, subobj, i, last, 1);
837 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
838 return -2;
840 if (last > len)
841 last = len;
842 if (last < 0)
843 last += len;
844 if (last < 0)
845 last = 0;
846 if (i < 0)
847 i += len;
848 if (i < 0)
849 i = 0;
851 if (dir > 0) {
852 if (n == 0 && i <= last)
853 return (long)i;
854 last -= n;
855 for (; i <= last; ++i)
856 if (s[i] == sub[0] && memcmp(&s[i], sub, n) == 0)
857 return (long)i;
859 else {
860 int j;
862 if (n == 0 && i <= last)
863 return (long)last;
864 for (j = last-n; j >= i; --j)
865 if (s[j] == sub[0] && memcmp(&s[j], sub, n) == 0)
866 return (long)j;
869 return -1;
873 static char find__doc__[] =
874 "S.find(sub [,start [,end]]) -> int\n\
876 Return the lowest index in S where substring sub is found,\n\
877 such that sub is contained within s[start,end]. Optional\n\
878 arguments start and end are interpreted as in slice notation.\n\
880 Return -1 on failure.";
882 static PyObject *
883 string_find(PyStringObject *self, PyObject *args)
885 long result = string_find_internal(self, args, +1);
886 if (result == -2)
887 return NULL;
888 return PyInt_FromLong(result);
892 static char index__doc__[] =
893 "S.index(sub [,start [,end]]) -> int\n\
895 Like S.find() but raise ValueError when the substring is not found.";
897 static PyObject *
898 string_index(PyStringObject *self, PyObject *args)
900 long result = string_find_internal(self, args, +1);
901 if (result == -2)
902 return NULL;
903 if (result == -1) {
904 PyErr_SetString(PyExc_ValueError,
905 "substring not found in string.index");
906 return NULL;
908 return PyInt_FromLong(result);
912 static char rfind__doc__[] =
913 "S.rfind(sub [,start [,end]]) -> int\n\
915 Return the highest index in S where substring sub is found,\n\
916 such that sub is contained within s[start,end]. Optional\n\
917 arguments start and end are interpreted as in slice notation.\n\
919 Return -1 on failure.";
921 static PyObject *
922 string_rfind(PyStringObject *self, PyObject *args)
924 long result = string_find_internal(self, args, -1);
925 if (result == -2)
926 return NULL;
927 return PyInt_FromLong(result);
931 static char rindex__doc__[] =
932 "S.rindex(sub [,start [,end]]) -> int\n\
934 Like S.rfind() but raise ValueError when the substring is not found.";
936 static PyObject *
937 string_rindex(PyStringObject *self, PyObject *args)
939 long result = string_find_internal(self, args, -1);
940 if (result == -2)
941 return NULL;
942 if (result == -1) {
943 PyErr_SetString(PyExc_ValueError,
944 "substring not found in string.rindex");
945 return NULL;
947 return PyInt_FromLong(result);
951 static PyObject *
952 do_strip(PyStringObject *self, PyObject *args, int striptype)
954 char *s = PyString_AS_STRING(self);
955 int len = PyString_GET_SIZE(self), i, j;
957 if (!PyArg_ParseTuple(args, ":strip"))
958 return NULL;
960 i = 0;
961 if (striptype != RIGHTSTRIP) {
962 while (i < len && isspace(Py_CHARMASK(s[i]))) {
963 i++;
967 j = len;
968 if (striptype != LEFTSTRIP) {
969 do {
970 j--;
971 } while (j >= i && isspace(Py_CHARMASK(s[j])));
972 j++;
975 if (i == 0 && j == len) {
976 Py_INCREF(self);
977 return (PyObject*)self;
979 else
980 return PyString_FromStringAndSize(s+i, j-i);
984 static char strip__doc__[] =
985 "S.strip() -> string\n\
987 Return a copy of the string S with leading and trailing\n\
988 whitespace removed.";
990 static PyObject *
991 string_strip(PyStringObject *self, PyObject *args)
993 return do_strip(self, args, BOTHSTRIP);
997 static char lstrip__doc__[] =
998 "S.lstrip() -> string\n\
1000 Return a copy of the string S with leading whitespace removed.";
1002 static PyObject *
1003 string_lstrip(PyStringObject *self, PyObject *args)
1005 return do_strip(self, args, LEFTSTRIP);
1009 static char rstrip__doc__[] =
1010 "S.rstrip() -> string\n\
1012 Return a copy of the string S with trailing whitespace removed.";
1014 static PyObject *
1015 string_rstrip(PyStringObject *self, PyObject *args)
1017 return do_strip(self, args, RIGHTSTRIP);
1021 static char lower__doc__[] =
1022 "S.lower() -> string\n\
1024 Return a copy of the string S converted to lowercase.";
1026 static PyObject *
1027 string_lower(PyStringObject *self, PyObject *args)
1029 char *s = PyString_AS_STRING(self), *s_new;
1030 int i, n = PyString_GET_SIZE(self);
1031 PyObject *new;
1033 if (!PyArg_ParseTuple(args, ":lower"))
1034 return NULL;
1035 new = PyString_FromStringAndSize(NULL, n);
1036 if (new == NULL)
1037 return NULL;
1038 s_new = PyString_AsString(new);
1039 for (i = 0; i < n; i++) {
1040 int c = Py_CHARMASK(*s++);
1041 if (isupper(c)) {
1042 *s_new = tolower(c);
1043 } else
1044 *s_new = c;
1045 s_new++;
1047 return new;
1051 static char upper__doc__[] =
1052 "S.upper() -> string\n\
1054 Return a copy of the string S converted to uppercase.";
1056 static PyObject *
1057 string_upper(PyStringObject *self, PyObject *args)
1059 char *s = PyString_AS_STRING(self), *s_new;
1060 int i, n = PyString_GET_SIZE(self);
1061 PyObject *new;
1063 if (!PyArg_ParseTuple(args, ":upper"))
1064 return NULL;
1065 new = PyString_FromStringAndSize(NULL, n);
1066 if (new == NULL)
1067 return NULL;
1068 s_new = PyString_AsString(new);
1069 for (i = 0; i < n; i++) {
1070 int c = Py_CHARMASK(*s++);
1071 if (islower(c)) {
1072 *s_new = toupper(c);
1073 } else
1074 *s_new = c;
1075 s_new++;
1077 return new;
1081 static char title__doc__[] =
1082 "S.title() -> string\n\
1084 Return a titlecased version of S, i.e. words start with uppercase\n\
1085 characters, all remaining cased characters have lowercase.";
1087 static PyObject*
1088 string_title(PyUnicodeObject *self, PyObject *args)
1090 char *s = PyString_AS_STRING(self), *s_new;
1091 int i, n = PyString_GET_SIZE(self);
1092 int previous_is_cased = 0;
1093 PyObject *new;
1095 if (!PyArg_ParseTuple(args, ":title"))
1096 return NULL;
1097 new = PyString_FromStringAndSize(NULL, n);
1098 if (new == NULL)
1099 return NULL;
1100 s_new = PyString_AsString(new);
1101 for (i = 0; i < n; i++) {
1102 int c = Py_CHARMASK(*s++);
1103 if (islower(c)) {
1104 if (!previous_is_cased)
1105 c = toupper(c);
1106 previous_is_cased = 1;
1107 } else if (isupper(c)) {
1108 if (previous_is_cased)
1109 c = tolower(c);
1110 previous_is_cased = 1;
1111 } else
1112 previous_is_cased = 0;
1113 *s_new++ = c;
1115 return new;
1118 static char capitalize__doc__[] =
1119 "S.capitalize() -> string\n\
1121 Return a copy of the string S with only its first character\n\
1122 capitalized.";
1124 static PyObject *
1125 string_capitalize(PyStringObject *self, PyObject *args)
1127 char *s = PyString_AS_STRING(self), *s_new;
1128 int i, n = PyString_GET_SIZE(self);
1129 PyObject *new;
1131 if (!PyArg_ParseTuple(args, ":capitalize"))
1132 return NULL;
1133 new = PyString_FromStringAndSize(NULL, n);
1134 if (new == NULL)
1135 return NULL;
1136 s_new = PyString_AsString(new);
1137 if (0 < n) {
1138 int c = Py_CHARMASK(*s++);
1139 if (islower(c))
1140 *s_new = toupper(c);
1141 else
1142 *s_new = c;
1143 s_new++;
1145 for (i = 1; i < n; i++) {
1146 int c = Py_CHARMASK(*s++);
1147 if (isupper(c))
1148 *s_new = tolower(c);
1149 else
1150 *s_new = c;
1151 s_new++;
1153 return new;
1157 static char count__doc__[] =
1158 "S.count(sub[, start[, end]]) -> int\n\
1160 Return the number of occurrences of substring sub in string\n\
1161 S[start:end]. Optional arguments start and end are\n\
1162 interpreted as in slice notation.";
1164 static PyObject *
1165 string_count(PyStringObject *self, PyObject *args)
1167 const char *s = PyString_AS_STRING(self), *sub;
1168 int len = PyString_GET_SIZE(self), n;
1169 int i = 0, last = INT_MAX;
1170 int m, r;
1171 PyObject *subobj;
1173 if (!PyArg_ParseTuple(args, "O|O&O&:count", &subobj,
1174 _PyEval_SliceIndex, &i, _PyEval_SliceIndex, &last))
1175 return NULL;
1177 if (PyString_Check(subobj)) {
1178 sub = PyString_AS_STRING(subobj);
1179 n = PyString_GET_SIZE(subobj);
1181 else if (PyUnicode_Check(subobj))
1182 return PyInt_FromLong(
1183 PyUnicode_Count((PyObject *)self, subobj, i, last));
1184 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
1185 return NULL;
1187 if (last > len)
1188 last = len;
1189 if (last < 0)
1190 last += len;
1191 if (last < 0)
1192 last = 0;
1193 if (i < 0)
1194 i += len;
1195 if (i < 0)
1196 i = 0;
1197 m = last + 1 - n;
1198 if (n == 0)
1199 return PyInt_FromLong((long) (m-i));
1201 r = 0;
1202 while (i < m) {
1203 if (!memcmp(s+i, sub, n)) {
1204 r++;
1205 i += n;
1206 } else {
1207 i++;
1210 return PyInt_FromLong((long) r);
1214 static char swapcase__doc__[] =
1215 "S.swapcase() -> string\n\
1217 Return a copy of the string S with uppercase characters\n\
1218 converted to lowercase and vice versa.";
1220 static PyObject *
1221 string_swapcase(PyStringObject *self, PyObject *args)
1223 char *s = PyString_AS_STRING(self), *s_new;
1224 int i, n = PyString_GET_SIZE(self);
1225 PyObject *new;
1227 if (!PyArg_ParseTuple(args, ":swapcase"))
1228 return NULL;
1229 new = PyString_FromStringAndSize(NULL, n);
1230 if (new == NULL)
1231 return NULL;
1232 s_new = PyString_AsString(new);
1233 for (i = 0; i < n; i++) {
1234 int c = Py_CHARMASK(*s++);
1235 if (islower(c)) {
1236 *s_new = toupper(c);
1238 else if (isupper(c)) {
1239 *s_new = tolower(c);
1241 else
1242 *s_new = c;
1243 s_new++;
1245 return new;
1249 static char translate__doc__[] =
1250 "S.translate(table [,deletechars]) -> string\n\
1252 Return a copy of the string S, where all characters occurring\n\
1253 in the optional argument deletechars are removed, and the\n\
1254 remaining characters have been mapped through the given\n\
1255 translation table, which must be a string of length 256.";
1257 static PyObject *
1258 string_translate(PyStringObject *self, PyObject *args)
1260 register char *input, *output;
1261 register const char *table;
1262 register int i, c, changed = 0;
1263 PyObject *input_obj = (PyObject*)self;
1264 const char *table1, *output_start, *del_table=NULL;
1265 int inlen, tablen, dellen = 0;
1266 PyObject *result;
1267 int trans_table[256];
1268 PyObject *tableobj, *delobj = NULL;
1270 if (!PyArg_ParseTuple(args, "O|O:translate",
1271 &tableobj, &delobj))
1272 return NULL;
1274 if (PyString_Check(tableobj)) {
1275 table1 = PyString_AS_STRING(tableobj);
1276 tablen = PyString_GET_SIZE(tableobj);
1278 else if (PyUnicode_Check(tableobj)) {
1279 /* Unicode .translate() does not support the deletechars
1280 parameter; instead a mapping to None will cause characters
1281 to be deleted. */
1282 if (delobj != NULL) {
1283 PyErr_SetString(PyExc_TypeError,
1284 "deletions are implemented differently for unicode");
1285 return NULL;
1287 return PyUnicode_Translate((PyObject *)self, tableobj, NULL);
1289 else if (PyObject_AsCharBuffer(tableobj, &table1, &tablen))
1290 return NULL;
1292 if (delobj != NULL) {
1293 if (PyString_Check(delobj)) {
1294 del_table = PyString_AS_STRING(delobj);
1295 dellen = PyString_GET_SIZE(delobj);
1297 else if (PyUnicode_Check(delobj)) {
1298 PyErr_SetString(PyExc_TypeError,
1299 "deletions are implemented differently for unicode");
1300 return NULL;
1302 else if (PyObject_AsCharBuffer(delobj, &del_table, &dellen))
1303 return NULL;
1305 if (tablen != 256) {
1306 PyErr_SetString(PyExc_ValueError,
1307 "translation table must be 256 characters long");
1308 return NULL;
1311 else {
1312 del_table = NULL;
1313 dellen = 0;
1316 table = table1;
1317 inlen = PyString_Size(input_obj);
1318 result = PyString_FromStringAndSize((char *)NULL, inlen);
1319 if (result == NULL)
1320 return NULL;
1321 output_start = output = PyString_AsString(result);
1322 input = PyString_AsString(input_obj);
1324 if (dellen == 0) {
1325 /* If no deletions are required, use faster code */
1326 for (i = inlen; --i >= 0; ) {
1327 c = Py_CHARMASK(*input++);
1328 if (Py_CHARMASK((*output++ = table[c])) != c)
1329 changed = 1;
1331 if (changed)
1332 return result;
1333 Py_DECREF(result);
1334 Py_INCREF(input_obj);
1335 return input_obj;
1338 for (i = 0; i < 256; i++)
1339 trans_table[i] = Py_CHARMASK(table[i]);
1341 for (i = 0; i < dellen; i++)
1342 trans_table[(int) Py_CHARMASK(del_table[i])] = -1;
1344 for (i = inlen; --i >= 0; ) {
1345 c = Py_CHARMASK(*input++);
1346 if (trans_table[c] != -1)
1347 if (Py_CHARMASK(*output++ = (char)trans_table[c]) == c)
1348 continue;
1349 changed = 1;
1351 if (!changed) {
1352 Py_DECREF(result);
1353 Py_INCREF(input_obj);
1354 return input_obj;
1356 /* Fix the size of the resulting string */
1357 if (inlen > 0 &&_PyString_Resize(&result, output-output_start))
1358 return NULL;
1359 return result;
1363 /* What follows is used for implementing replace(). Perry Stoll. */
1366 mymemfind
1368 strstr replacement for arbitrary blocks of memory.
1370 Locates the first occurrence in the memory pointed to by MEM of the
1371 contents of memory pointed to by PAT. Returns the index into MEM if
1372 found, or -1 if not found. If len of PAT is greater than length of
1373 MEM, the function returns -1.
1375 static int
1376 mymemfind(const char *mem, int len, const char *pat, int pat_len)
1378 register int ii;
1380 /* pattern can not occur in the last pat_len-1 chars */
1381 len -= pat_len;
1383 for (ii = 0; ii <= len; ii++) {
1384 if (mem[ii] == pat[0] && memcmp(&mem[ii], pat, pat_len) == 0) {
1385 return ii;
1388 return -1;
1392 mymemcnt
1394 Return the number of distinct times PAT is found in MEM.
1395 meaning mem=1111 and pat==11 returns 2.
1396 mem=11111 and pat==11 also return 2.
1398 static int
1399 mymemcnt(const char *mem, int len, const char *pat, int pat_len)
1401 register int offset = 0;
1402 int nfound = 0;
1404 while (len >= 0) {
1405 offset = mymemfind(mem, len, pat, pat_len);
1406 if (offset == -1)
1407 break;
1408 mem += offset + pat_len;
1409 len -= offset + pat_len;
1410 nfound++;
1412 return nfound;
1416 mymemreplace
1418 Return a string in which all occurrences of PAT in memory STR are
1419 replaced with SUB.
1421 If length of PAT is less than length of STR or there are no occurrences
1422 of PAT in STR, then the original string is returned. Otherwise, a new
1423 string is allocated here and returned.
1425 on return, out_len is:
1426 the length of output string, or
1427 -1 if the input string is returned, or
1428 unchanged if an error occurs (no memory).
1430 return value is:
1431 the new string allocated locally, or
1432 NULL if an error occurred.
1434 static char *
1435 mymemreplace(const char *str, int len, /* input string */
1436 const char *pat, int pat_len, /* pattern string to find */
1437 const char *sub, int sub_len, /* substitution string */
1438 int count, /* number of replacements */
1439 int *out_len)
1441 char *out_s;
1442 char *new_s;
1443 int nfound, offset, new_len;
1445 if (len == 0 || pat_len > len)
1446 goto return_same;
1448 /* find length of output string */
1449 nfound = mymemcnt(str, len, pat, pat_len);
1450 if (count < 0)
1451 count = INT_MAX;
1452 else if (nfound > count)
1453 nfound = count;
1454 if (nfound == 0)
1455 goto return_same;
1456 new_len = len + nfound*(sub_len - pat_len);
1458 new_s = (char *)PyMem_MALLOC(new_len);
1459 if (new_s == NULL) return NULL;
1461 *out_len = new_len;
1462 out_s = new_s;
1464 while (len > 0) {
1465 /* find index of next instance of pattern */
1466 offset = mymemfind(str, len, pat, pat_len);
1467 /* if not found, break out of loop */
1468 if (offset == -1) break;
1470 /* copy non matching part of input string */
1471 memcpy(new_s, str, offset); /* copy part of str before pat */
1472 str += offset + pat_len; /* move str past pattern */
1473 len -= offset + pat_len; /* reduce length of str remaining */
1475 /* copy substitute into the output string */
1476 new_s += offset; /* move new_s to dest for sub string */
1477 memcpy(new_s, sub, sub_len); /* copy substring into new_s */
1478 new_s += sub_len; /* offset new_s past sub string */
1480 /* break when we've done count replacements */
1481 if (--count == 0) break;
1483 /* copy any remaining values into output string */
1484 if (len > 0)
1485 memcpy(new_s, str, len);
1486 return out_s;
1488 return_same:
1489 *out_len = -1;
1490 return (char*)str; /* have to cast away constness here */
1494 static char replace__doc__[] =
1495 "S.replace (old, new[, maxsplit]) -> string\n\
1497 Return a copy of string S with all occurrences of substring\n\
1498 old replaced by new. If the optional argument maxsplit is\n\
1499 given, only the first maxsplit occurrences are replaced.";
1501 static PyObject *
1502 string_replace(PyStringObject *self, PyObject *args)
1504 const char *str = PyString_AS_STRING(self), *sub, *repl;
1505 char *new_s;
1506 int len = PyString_GET_SIZE(self), sub_len, repl_len, out_len;
1507 int count = -1;
1508 PyObject *new;
1509 PyObject *subobj, *replobj;
1511 if (!PyArg_ParseTuple(args, "OO|i:replace",
1512 &subobj, &replobj, &count))
1513 return NULL;
1515 if (PyString_Check(subobj)) {
1516 sub = PyString_AS_STRING(subobj);
1517 sub_len = PyString_GET_SIZE(subobj);
1519 else if (PyUnicode_Check(subobj))
1520 return PyUnicode_Replace((PyObject *)self,
1521 subobj, replobj, count);
1522 else if (PyObject_AsCharBuffer(subobj, &sub, &sub_len))
1523 return NULL;
1525 if (PyString_Check(replobj)) {
1526 repl = PyString_AS_STRING(replobj);
1527 repl_len = PyString_GET_SIZE(replobj);
1529 else if (PyUnicode_Check(replobj))
1530 return PyUnicode_Replace((PyObject *)self,
1531 subobj, replobj, count);
1532 else if (PyObject_AsCharBuffer(replobj, &repl, &repl_len))
1533 return NULL;
1535 if (sub_len <= 0) {
1536 PyErr_SetString(PyExc_ValueError, "empty pattern string");
1537 return NULL;
1539 new_s = mymemreplace(str,len,sub,sub_len,repl,repl_len,count,&out_len);
1540 if (new_s == NULL) {
1541 PyErr_NoMemory();
1542 return NULL;
1544 if (out_len == -1) {
1545 /* we're returning another reference to self */
1546 new = (PyObject*)self;
1547 Py_INCREF(new);
1549 else {
1550 new = PyString_FromStringAndSize(new_s, out_len);
1551 PyMem_FREE(new_s);
1553 return new;
1557 static char startswith__doc__[] =
1558 "S.startswith(prefix[, start[, end]]) -> int\n\
1560 Return 1 if S starts with the specified prefix, otherwise return 0. With\n\
1561 optional start, test S beginning at that position. With optional end, stop\n\
1562 comparing S at that position.";
1564 static PyObject *
1565 string_startswith(PyStringObject *self, PyObject *args)
1567 const char* str = PyString_AS_STRING(self);
1568 int len = PyString_GET_SIZE(self);
1569 const char* prefix;
1570 int plen;
1571 int start = 0;
1572 int end = -1;
1573 PyObject *subobj;
1575 if (!PyArg_ParseTuple(args, "O|O&O&:startswith", &subobj,
1576 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
1577 return NULL;
1578 if (PyString_Check(subobj)) {
1579 prefix = PyString_AS_STRING(subobj);
1580 plen = PyString_GET_SIZE(subobj);
1582 else if (PyUnicode_Check(subobj))
1583 return PyInt_FromLong(
1584 PyUnicode_Tailmatch((PyObject *)self,
1585 subobj, start, end, -1));
1586 else if (PyObject_AsCharBuffer(subobj, &prefix, &plen))
1587 return NULL;
1589 /* adopt Java semantics for index out of range. it is legal for
1590 * offset to be == plen, but this only returns true if prefix is
1591 * the empty string.
1593 if (start < 0 || start+plen > len)
1594 return PyInt_FromLong(0);
1596 if (!memcmp(str+start, prefix, plen)) {
1597 /* did the match end after the specified end? */
1598 if (end < 0)
1599 return PyInt_FromLong(1);
1600 else if (end - start < plen)
1601 return PyInt_FromLong(0);
1602 else
1603 return PyInt_FromLong(1);
1605 else return PyInt_FromLong(0);
1609 static char endswith__doc__[] =
1610 "S.endswith(suffix[, start[, end]]) -> int\n\
1612 Return 1 if S ends with the specified suffix, otherwise return 0. With\n\
1613 optional start, test S beginning at that position. With optional end, stop\n\
1614 comparing S at that position.";
1616 static PyObject *
1617 string_endswith(PyStringObject *self, PyObject *args)
1619 const char* str = PyString_AS_STRING(self);
1620 int len = PyString_GET_SIZE(self);
1621 const char* suffix;
1622 int slen;
1623 int start = 0;
1624 int end = -1;
1625 int lower, upper;
1626 PyObject *subobj;
1628 if (!PyArg_ParseTuple(args, "O|O&O&:endswith", &subobj,
1629 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
1630 return NULL;
1631 if (PyString_Check(subobj)) {
1632 suffix = PyString_AS_STRING(subobj);
1633 slen = PyString_GET_SIZE(subobj);
1635 else if (PyUnicode_Check(subobj))
1636 return PyInt_FromLong(
1637 PyUnicode_Tailmatch((PyObject *)self,
1638 subobj, start, end, +1));
1639 else if (PyObject_AsCharBuffer(subobj, &suffix, &slen))
1640 return NULL;
1642 if (start < 0 || start > len || slen > len)
1643 return PyInt_FromLong(0);
1645 upper = (end >= 0 && end <= len) ? end : len;
1646 lower = (upper - slen) > start ? (upper - slen) : start;
1648 if (upper-lower >= slen && !memcmp(str+lower, suffix, slen))
1649 return PyInt_FromLong(1);
1650 else return PyInt_FromLong(0);
1654 static char encode__doc__[] =
1655 "S.encode([encoding[,errors]]) -> string\n\
1657 Return an encoded string version of S. Default encoding is the current\n\
1658 default string encoding. errors may be given to set a different error\n\
1659 handling scheme. Default is 'strict' meaning that encoding errors raise\n\
1660 a ValueError. Other possible values are 'ignore' and 'replace'.";
1662 static PyObject *
1663 string_encode(PyStringObject *self, PyObject *args)
1665 char *encoding = NULL;
1666 char *errors = NULL;
1667 if (!PyArg_ParseTuple(args, "|ss:encode", &encoding, &errors))
1668 return NULL;
1669 return PyString_AsEncodedString((PyObject *)self, encoding, errors);
1673 static char expandtabs__doc__[] =
1674 "S.expandtabs([tabsize]) -> string\n\
1676 Return a copy of S where all tab characters are expanded using spaces.\n\
1677 If tabsize is not given, a tab size of 8 characters is assumed.";
1679 static PyObject*
1680 string_expandtabs(PyStringObject *self, PyObject *args)
1682 const char *e, *p;
1683 char *q;
1684 int i, j;
1685 PyObject *u;
1686 int tabsize = 8;
1688 if (!PyArg_ParseTuple(args, "|i:expandtabs", &tabsize))
1689 return NULL;
1691 /* First pass: determine size of output string */
1692 i = j = 0;
1693 e = PyString_AS_STRING(self) + PyString_GET_SIZE(self);
1694 for (p = PyString_AS_STRING(self); p < e; p++)
1695 if (*p == '\t') {
1696 if (tabsize > 0)
1697 j += tabsize - (j % tabsize);
1699 else {
1700 j++;
1701 if (*p == '\n' || *p == '\r') {
1702 i += j;
1703 j = 0;
1707 /* Second pass: create output string and fill it */
1708 u = PyString_FromStringAndSize(NULL, i + j);
1709 if (!u)
1710 return NULL;
1712 j = 0;
1713 q = PyString_AS_STRING(u);
1715 for (p = PyString_AS_STRING(self); p < e; p++)
1716 if (*p == '\t') {
1717 if (tabsize > 0) {
1718 i = tabsize - (j % tabsize);
1719 j += i;
1720 while (i--)
1721 *q++ = ' ';
1724 else {
1725 j++;
1726 *q++ = *p;
1727 if (*p == '\n' || *p == '\r')
1728 j = 0;
1731 return u;
1734 static
1735 PyObject *pad(PyStringObject *self,
1736 int left,
1737 int right,
1738 char fill)
1740 PyObject *u;
1742 if (left < 0)
1743 left = 0;
1744 if (right < 0)
1745 right = 0;
1747 if (left == 0 && right == 0) {
1748 Py_INCREF(self);
1749 return (PyObject *)self;
1752 u = PyString_FromStringAndSize(NULL,
1753 left + PyString_GET_SIZE(self) + right);
1754 if (u) {
1755 if (left)
1756 memset(PyString_AS_STRING(u), fill, left);
1757 memcpy(PyString_AS_STRING(u) + left,
1758 PyString_AS_STRING(self),
1759 PyString_GET_SIZE(self));
1760 if (right)
1761 memset(PyString_AS_STRING(u) + left + PyString_GET_SIZE(self),
1762 fill, right);
1765 return u;
1768 static char ljust__doc__[] =
1769 "S.ljust(width) -> string\n\
1771 Return S left justified in a string of length width. Padding is\n\
1772 done using spaces.";
1774 static PyObject *
1775 string_ljust(PyStringObject *self, PyObject *args)
1777 int width;
1778 if (!PyArg_ParseTuple(args, "i:ljust", &width))
1779 return NULL;
1781 if (PyString_GET_SIZE(self) >= width) {
1782 Py_INCREF(self);
1783 return (PyObject*) self;
1786 return pad(self, 0, width - PyString_GET_SIZE(self), ' ');
1790 static char rjust__doc__[] =
1791 "S.rjust(width) -> string\n\
1793 Return S right justified in a string of length width. Padding is\n\
1794 done using spaces.";
1796 static PyObject *
1797 string_rjust(PyStringObject *self, PyObject *args)
1799 int width;
1800 if (!PyArg_ParseTuple(args, "i:rjust", &width))
1801 return NULL;
1803 if (PyString_GET_SIZE(self) >= width) {
1804 Py_INCREF(self);
1805 return (PyObject*) self;
1808 return pad(self, width - PyString_GET_SIZE(self), 0, ' ');
1812 static char center__doc__[] =
1813 "S.center(width) -> string\n\
1815 Return S centered in a string of length width. Padding is done\n\
1816 using spaces.";
1818 static PyObject *
1819 string_center(PyStringObject *self, PyObject *args)
1821 int marg, left;
1822 int width;
1824 if (!PyArg_ParseTuple(args, "i:center", &width))
1825 return NULL;
1827 if (PyString_GET_SIZE(self) >= width) {
1828 Py_INCREF(self);
1829 return (PyObject*) self;
1832 marg = width - PyString_GET_SIZE(self);
1833 left = marg / 2 + (marg & width & 1);
1835 return pad(self, left, marg - left, ' ');
1838 #if 0
1839 static char zfill__doc__[] =
1840 "S.zfill(width) -> string\n\
1842 Pad a numeric string x with zeros on the left, to fill a field\n\
1843 of the specified width. The string x is never truncated.";
1845 static PyObject *
1846 string_zfill(PyStringObject *self, PyObject *args)
1848 int fill;
1849 PyObject *u;
1850 char *str;
1852 int width;
1853 if (!PyArg_ParseTuple(args, "i:zfill", &width))
1854 return NULL;
1856 if (PyString_GET_SIZE(self) >= width) {
1857 Py_INCREF(self);
1858 return (PyObject*) self;
1861 fill = width - PyString_GET_SIZE(self);
1863 u = pad(self, fill, 0, '0');
1864 if (u == NULL)
1865 return NULL;
1867 str = PyString_AS_STRING(u);
1868 if (str[fill] == '+' || str[fill] == '-') {
1869 /* move sign to beginning of string */
1870 str[0] = str[fill];
1871 str[fill] = '0';
1874 return u;
1876 #endif
1878 static char isspace__doc__[] =
1879 "S.isspace() -> int\n\
1881 Return 1 if there are only whitespace characters in S,\n\
1882 0 otherwise.";
1884 static PyObject*
1885 string_isspace(PyStringObject *self, PyObject *args)
1887 register const unsigned char *p
1888 = (unsigned char *) PyString_AS_STRING(self);
1889 register const unsigned char *e;
1891 if (!PyArg_NoArgs(args))
1892 return NULL;
1894 /* Shortcut for single character strings */
1895 if (PyString_GET_SIZE(self) == 1 &&
1896 isspace(*p))
1897 return PyInt_FromLong(1);
1899 /* Special case for empty strings */
1900 if (PyString_GET_SIZE(self) == 0)
1901 return PyInt_FromLong(0);
1903 e = p + PyString_GET_SIZE(self);
1904 for (; p < e; p++) {
1905 if (!isspace(*p))
1906 return PyInt_FromLong(0);
1908 return PyInt_FromLong(1);
1912 static char isalpha__doc__[] =
1913 "S.isalpha() -> int\n\
1915 Return 1 if all characters in S are alphabetic\n\
1916 and there is at least one character in S, 0 otherwise.";
1918 static PyObject*
1919 string_isalpha(PyUnicodeObject *self, PyObject *args)
1921 register const unsigned char *p
1922 = (unsigned char *) PyString_AS_STRING(self);
1923 register const unsigned char *e;
1925 if (!PyArg_NoArgs(args))
1926 return NULL;
1928 /* Shortcut for single character strings */
1929 if (PyString_GET_SIZE(self) == 1 &&
1930 isalpha(*p))
1931 return PyInt_FromLong(1);
1933 /* Special case for empty strings */
1934 if (PyString_GET_SIZE(self) == 0)
1935 return PyInt_FromLong(0);
1937 e = p + PyString_GET_SIZE(self);
1938 for (; p < e; p++) {
1939 if (!isalpha(*p))
1940 return PyInt_FromLong(0);
1942 return PyInt_FromLong(1);
1946 static char isalnum__doc__[] =
1947 "S.isalnum() -> int\n\
1949 Return 1 if all characters in S are alphanumeric\n\
1950 and there is at least one character in S, 0 otherwise.";
1952 static PyObject*
1953 string_isalnum(PyUnicodeObject *self, PyObject *args)
1955 register const unsigned char *p
1956 = (unsigned char *) PyString_AS_STRING(self);
1957 register const unsigned char *e;
1959 if (!PyArg_NoArgs(args))
1960 return NULL;
1962 /* Shortcut for single character strings */
1963 if (PyString_GET_SIZE(self) == 1 &&
1964 isalnum(*p))
1965 return PyInt_FromLong(1);
1967 /* Special case for empty strings */
1968 if (PyString_GET_SIZE(self) == 0)
1969 return PyInt_FromLong(0);
1971 e = p + PyString_GET_SIZE(self);
1972 for (; p < e; p++) {
1973 if (!isalnum(*p))
1974 return PyInt_FromLong(0);
1976 return PyInt_FromLong(1);
1980 static char isdigit__doc__[] =
1981 "S.isdigit() -> int\n\
1983 Return 1 if there are only digit characters in S,\n\
1984 0 otherwise.";
1986 static PyObject*
1987 string_isdigit(PyStringObject *self, PyObject *args)
1989 register const unsigned char *p
1990 = (unsigned char *) PyString_AS_STRING(self);
1991 register const unsigned char *e;
1993 if (!PyArg_NoArgs(args))
1994 return NULL;
1996 /* Shortcut for single character strings */
1997 if (PyString_GET_SIZE(self) == 1 &&
1998 isdigit(*p))
1999 return PyInt_FromLong(1);
2001 /* Special case for empty strings */
2002 if (PyString_GET_SIZE(self) == 0)
2003 return PyInt_FromLong(0);
2005 e = p + PyString_GET_SIZE(self);
2006 for (; p < e; p++) {
2007 if (!isdigit(*p))
2008 return PyInt_FromLong(0);
2010 return PyInt_FromLong(1);
2014 static char islower__doc__[] =
2015 "S.islower() -> int\n\
2017 Return 1 if all cased characters in S are lowercase and there is\n\
2018 at least one cased character in S, 0 otherwise.";
2020 static PyObject*
2021 string_islower(PyStringObject *self, PyObject *args)
2023 register const unsigned char *p
2024 = (unsigned char *) PyString_AS_STRING(self);
2025 register const unsigned char *e;
2026 int cased;
2028 if (!PyArg_NoArgs(args))
2029 return NULL;
2031 /* Shortcut for single character strings */
2032 if (PyString_GET_SIZE(self) == 1)
2033 return PyInt_FromLong(islower(*p) != 0);
2035 /* Special case for empty strings */
2036 if (PyString_GET_SIZE(self) == 0)
2037 return PyInt_FromLong(0);
2039 e = p + PyString_GET_SIZE(self);
2040 cased = 0;
2041 for (; p < e; p++) {
2042 if (isupper(*p))
2043 return PyInt_FromLong(0);
2044 else if (!cased && islower(*p))
2045 cased = 1;
2047 return PyInt_FromLong(cased);
2051 static char isupper__doc__[] =
2052 "S.isupper() -> int\n\
2054 Return 1 if all cased characters in S are uppercase and there is\n\
2055 at least one cased character in S, 0 otherwise.";
2057 static PyObject*
2058 string_isupper(PyStringObject *self, PyObject *args)
2060 register const unsigned char *p
2061 = (unsigned char *) PyString_AS_STRING(self);
2062 register const unsigned char *e;
2063 int cased;
2065 if (!PyArg_NoArgs(args))
2066 return NULL;
2068 /* Shortcut for single character strings */
2069 if (PyString_GET_SIZE(self) == 1)
2070 return PyInt_FromLong(isupper(*p) != 0);
2072 /* Special case for empty strings */
2073 if (PyString_GET_SIZE(self) == 0)
2074 return PyInt_FromLong(0);
2076 e = p + PyString_GET_SIZE(self);
2077 cased = 0;
2078 for (; p < e; p++) {
2079 if (islower(*p))
2080 return PyInt_FromLong(0);
2081 else if (!cased && isupper(*p))
2082 cased = 1;
2084 return PyInt_FromLong(cased);
2088 static char istitle__doc__[] =
2089 "S.istitle() -> int\n\
2091 Return 1 if S is a titlecased string, i.e. uppercase characters\n\
2092 may only follow uncased characters and lowercase characters only cased\n\
2093 ones. Return 0 otherwise.";
2095 static PyObject*
2096 string_istitle(PyStringObject *self, PyObject *args)
2098 register const unsigned char *p
2099 = (unsigned char *) PyString_AS_STRING(self);
2100 register const unsigned char *e;
2101 int cased, previous_is_cased;
2103 if (!PyArg_NoArgs(args))
2104 return NULL;
2106 /* Shortcut for single character strings */
2107 if (PyString_GET_SIZE(self) == 1)
2108 return PyInt_FromLong(isupper(*p) != 0);
2110 /* Special case for empty strings */
2111 if (PyString_GET_SIZE(self) == 0)
2112 return PyInt_FromLong(0);
2114 e = p + PyString_GET_SIZE(self);
2115 cased = 0;
2116 previous_is_cased = 0;
2117 for (; p < e; p++) {
2118 register const unsigned char ch = *p;
2120 if (isupper(ch)) {
2121 if (previous_is_cased)
2122 return PyInt_FromLong(0);
2123 previous_is_cased = 1;
2124 cased = 1;
2126 else if (islower(ch)) {
2127 if (!previous_is_cased)
2128 return PyInt_FromLong(0);
2129 previous_is_cased = 1;
2130 cased = 1;
2132 else
2133 previous_is_cased = 0;
2135 return PyInt_FromLong(cased);
2139 static char splitlines__doc__[] =
2140 "S.splitlines([keepends]]) -> list of strings\n\
2142 Return a list of the lines in S, breaking at line boundaries.\n\
2143 Line breaks are not included in the resulting list unless keepends\n\
2144 is given and true.";
2146 #define SPLIT_APPEND(data, left, right) \
2147 str = PyString_FromStringAndSize(data + left, right - left); \
2148 if (!str) \
2149 goto onError; \
2150 if (PyList_Append(list, str)) { \
2151 Py_DECREF(str); \
2152 goto onError; \
2154 else \
2155 Py_DECREF(str);
2157 static PyObject*
2158 string_splitlines(PyStringObject *self, PyObject *args)
2160 register int i;
2161 register int j;
2162 int len;
2163 int keepends = 0;
2164 PyObject *list;
2165 PyObject *str;
2166 char *data;
2168 if (!PyArg_ParseTuple(args, "|i:splitlines", &keepends))
2169 return NULL;
2171 data = PyString_AS_STRING(self);
2172 len = PyString_GET_SIZE(self);
2174 list = PyList_New(0);
2175 if (!list)
2176 goto onError;
2178 for (i = j = 0; i < len; ) {
2179 int eol;
2181 /* Find a line and append it */
2182 while (i < len && data[i] != '\n' && data[i] != '\r')
2183 i++;
2185 /* Skip the line break reading CRLF as one line break */
2186 eol = i;
2187 if (i < len) {
2188 if (data[i] == '\r' && i + 1 < len &&
2189 data[i+1] == '\n')
2190 i += 2;
2191 else
2192 i++;
2193 if (keepends)
2194 eol = i;
2196 SPLIT_APPEND(data, j, eol);
2197 j = i;
2199 if (j < len) {
2200 SPLIT_APPEND(data, j, len);
2203 return list;
2205 onError:
2206 Py_DECREF(list);
2207 return NULL;
2210 #undef SPLIT_APPEND
2213 static PyMethodDef
2214 string_methods[] = {
2215 /* Counterparts of the obsolete stropmodule functions; except
2216 string.maketrans(). */
2217 {"join", (PyCFunction)string_join, 1, join__doc__},
2218 {"split", (PyCFunction)string_split, 1, split__doc__},
2219 {"lower", (PyCFunction)string_lower, 1, lower__doc__},
2220 {"upper", (PyCFunction)string_upper, 1, upper__doc__},
2221 {"islower", (PyCFunction)string_islower, 0, islower__doc__},
2222 {"isupper", (PyCFunction)string_isupper, 0, isupper__doc__},
2223 {"isspace", (PyCFunction)string_isspace, 0, isspace__doc__},
2224 {"isdigit", (PyCFunction)string_isdigit, 0, isdigit__doc__},
2225 {"istitle", (PyCFunction)string_istitle, 0, istitle__doc__},
2226 {"isalpha", (PyCFunction)string_isalpha, 0, isalpha__doc__},
2227 {"isalnum", (PyCFunction)string_isalnum, 0, isalnum__doc__},
2228 {"capitalize", (PyCFunction)string_capitalize, 1, capitalize__doc__},
2229 {"count", (PyCFunction)string_count, 1, count__doc__},
2230 {"endswith", (PyCFunction)string_endswith, 1, endswith__doc__},
2231 {"find", (PyCFunction)string_find, 1, find__doc__},
2232 {"index", (PyCFunction)string_index, 1, index__doc__},
2233 {"lstrip", (PyCFunction)string_lstrip, 1, lstrip__doc__},
2234 {"replace", (PyCFunction)string_replace, 1, replace__doc__},
2235 {"rfind", (PyCFunction)string_rfind, 1, rfind__doc__},
2236 {"rindex", (PyCFunction)string_rindex, 1, rindex__doc__},
2237 {"rstrip", (PyCFunction)string_rstrip, 1, rstrip__doc__},
2238 {"startswith", (PyCFunction)string_startswith, 1, startswith__doc__},
2239 {"strip", (PyCFunction)string_strip, 1, strip__doc__},
2240 {"swapcase", (PyCFunction)string_swapcase, 1, swapcase__doc__},
2241 {"translate", (PyCFunction)string_translate, 1, translate__doc__},
2242 {"title", (PyCFunction)string_title, 1, title__doc__},
2243 {"ljust", (PyCFunction)string_ljust, 1, ljust__doc__},
2244 {"rjust", (PyCFunction)string_rjust, 1, rjust__doc__},
2245 {"center", (PyCFunction)string_center, 1, center__doc__},
2246 {"encode", (PyCFunction)string_encode, 1, encode__doc__},
2247 {"expandtabs", (PyCFunction)string_expandtabs, 1, expandtabs__doc__},
2248 {"splitlines", (PyCFunction)string_splitlines, 1, splitlines__doc__},
2249 #if 0
2250 {"zfill", (PyCFunction)string_zfill, 1, zfill__doc__},
2251 #endif
2252 {NULL, NULL} /* sentinel */
2255 static PyObject *
2256 string_getattr(PyStringObject *s, char *name)
2258 return Py_FindMethod(string_methods, (PyObject*)s, name);
2262 PyTypeObject PyString_Type = {
2263 PyObject_HEAD_INIT(&PyType_Type)
2265 "string",
2266 sizeof(PyStringObject),
2267 sizeof(char),
2268 (destructor)string_dealloc, /*tp_dealloc*/
2269 (printfunc)string_print, /*tp_print*/
2270 (getattrfunc)string_getattr, /*tp_getattr*/
2271 0, /*tp_setattr*/
2272 (cmpfunc)string_compare, /*tp_compare*/
2273 (reprfunc)string_repr, /*tp_repr*/
2274 0, /*tp_as_number*/
2275 &string_as_sequence, /*tp_as_sequence*/
2276 0, /*tp_as_mapping*/
2277 (hashfunc)string_hash, /*tp_hash*/
2278 0, /*tp_call*/
2279 0, /*tp_str*/
2280 0, /*tp_getattro*/
2281 0, /*tp_setattro*/
2282 &string_as_buffer, /*tp_as_buffer*/
2283 Py_TPFLAGS_DEFAULT, /*tp_flags*/
2284 0, /*tp_doc*/
2287 void
2288 PyString_Concat(register PyObject **pv, register PyObject *w)
2290 register PyObject *v;
2291 if (*pv == NULL)
2292 return;
2293 if (w == NULL || !PyString_Check(*pv)) {
2294 Py_DECREF(*pv);
2295 *pv = NULL;
2296 return;
2298 v = string_concat((PyStringObject *) *pv, w);
2299 Py_DECREF(*pv);
2300 *pv = v;
2303 void
2304 PyString_ConcatAndDel(register PyObject **pv, register PyObject *w)
2306 PyString_Concat(pv, w);
2307 Py_XDECREF(w);
2311 /* The following function breaks the notion that strings are immutable:
2312 it changes the size of a string. We get away with this only if there
2313 is only one module referencing the object. You can also think of it
2314 as creating a new string object and destroying the old one, only
2315 more efficiently. In any case, don't use this if the string may
2316 already be known to some other part of the code... */
2319 _PyString_Resize(PyObject **pv, int newsize)
2321 register PyObject *v;
2322 register PyStringObject *sv;
2323 v = *pv;
2324 if (!PyString_Check(v) || v->ob_refcnt != 1) {
2325 *pv = 0;
2326 Py_DECREF(v);
2327 PyErr_BadInternalCall();
2328 return -1;
2330 /* XXX UNREF/NEWREF interface should be more symmetrical */
2331 #ifdef Py_REF_DEBUG
2332 --_Py_RefTotal;
2333 #endif
2334 _Py_ForgetReference(v);
2335 *pv = (PyObject *)
2336 PyObject_REALLOC((char *)v,
2337 sizeof(PyStringObject) + newsize * sizeof(char));
2338 if (*pv == NULL) {
2339 PyObject_DEL(v);
2340 PyErr_NoMemory();
2341 return -1;
2343 _Py_NewReference(*pv);
2344 sv = (PyStringObject *) *pv;
2345 sv->ob_size = newsize;
2346 sv->ob_sval[newsize] = '\0';
2347 return 0;
2350 /* Helpers for formatstring */
2352 static PyObject *
2353 getnextarg(PyObject *args, int arglen, int *p_argidx)
2355 int argidx = *p_argidx;
2356 if (argidx < arglen) {
2357 (*p_argidx)++;
2358 if (arglen < 0)
2359 return args;
2360 else
2361 return PyTuple_GetItem(args, argidx);
2363 PyErr_SetString(PyExc_TypeError,
2364 "not enough arguments for format string");
2365 return NULL;
2368 #define F_LJUST (1<<0)
2369 #define F_SIGN (1<<1)
2370 #define F_BLANK (1<<2)
2371 #define F_ALT (1<<3)
2372 #define F_ZERO (1<<4)
2374 static int
2375 formatfloat(char *buf, size_t buflen, int flags,
2376 int prec, int type, PyObject *v)
2378 /* fmt = '%#.' + `prec` + `type`
2379 worst case length = 3 + 10 (len of INT_MAX) + 1 = 14 (use 20)*/
2380 char fmt[20];
2381 double x;
2382 if (!PyArg_Parse(v, "d;float argument required", &x))
2383 return -1;
2384 if (prec < 0)
2385 prec = 6;
2386 if (type == 'f' && fabs(x)/1e25 >= 1e25)
2387 type = 'g';
2388 sprintf(fmt, "%%%s.%d%c", (flags&F_ALT) ? "#" : "", prec, type);
2389 /* worst case length calc to ensure no buffer overrun:
2390 fmt = %#.<prec>g
2391 buf = '-' + [0-9]*prec + '.' + 'e+' + (longest exp
2392 for any double rep.)
2393 len = 1 + prec + 1 + 2 + 5 = 9 + prec
2394 If prec=0 the effective precision is 1 (the leading digit is
2395 always given), therefore increase by one to 10+prec. */
2396 if (buflen <= (size_t)10 + (size_t)prec) {
2397 PyErr_SetString(PyExc_OverflowError,
2398 "formatted float is too long (precision too long?)");
2399 return -1;
2401 sprintf(buf, fmt, x);
2402 return strlen(buf);
2405 static int
2406 formatint(char *buf, size_t buflen, int flags,
2407 int prec, int type, PyObject *v)
2409 /* fmt = '%#.' + `prec` + 'l' + `type`
2410 worst case length = 3 + 10 (len of INT_MAX) + 1 + 1 = 15 (use 20)*/
2411 char fmt[20];
2412 long x;
2413 if (!PyArg_Parse(v, "l;int argument required", &x))
2414 return -1;
2415 if (prec < 0)
2416 prec = 1;
2417 sprintf(fmt, "%%%s.%dl%c", (flags&F_ALT) ? "#" : "", prec, type);
2418 /* buf = '+'/'-'/'0'/'0x' + '[0-9]'*max(prec,len(x in octal))
2419 worst case buf = '0x' + [0-9]*prec, where prec >= 11 */
2420 if (buflen <= 13 || buflen <= (size_t)2+(size_t)prec) {
2421 PyErr_SetString(PyExc_OverflowError,
2422 "formatted integer is too long (precision too long?)");
2423 return -1;
2425 sprintf(buf, fmt, x);
2426 return strlen(buf);
2429 static int
2430 formatchar(char *buf, size_t buflen, PyObject *v)
2432 /* presume that the buffer is at least 2 characters long */
2433 if (PyString_Check(v)) {
2434 if (!PyArg_Parse(v, "c;%c requires int or char", &buf[0]))
2435 return -1;
2437 else {
2438 if (!PyArg_Parse(v, "b;%c requires int or char", &buf[0]))
2439 return -1;
2441 buf[1] = '\0';
2442 return 1;
2446 /* fmt%(v1,v2,...) is roughly equivalent to sprintf(fmt, v1, v2, ...)
2448 FORMATBUFLEN is the length of the buffer in which the floats, ints, &
2449 chars are formatted. XXX This is a magic number. Each formatting
2450 routine does bounds checking to ensure no overflow, but a better
2451 solution may be to malloc a buffer of appropriate size for each
2452 format. For now, the current solution is sufficient.
2454 #define FORMATBUFLEN (size_t)120
2456 PyObject *
2457 PyString_Format(PyObject *format, PyObject *args)
2459 char *fmt, *res;
2460 int fmtcnt, rescnt, reslen, arglen, argidx;
2461 int args_owned = 0;
2462 PyObject *result, *orig_args;
2463 PyObject *dict = NULL;
2464 if (format == NULL || !PyString_Check(format) || args == NULL) {
2465 PyErr_BadInternalCall();
2466 return NULL;
2468 orig_args = args;
2469 fmt = PyString_AsString(format);
2470 fmtcnt = PyString_Size(format);
2471 reslen = rescnt = fmtcnt + 100;
2472 result = PyString_FromStringAndSize((char *)NULL, reslen);
2473 if (result == NULL)
2474 return NULL;
2475 res = PyString_AsString(result);
2476 if (PyTuple_Check(args)) {
2477 arglen = PyTuple_Size(args);
2478 argidx = 0;
2480 else {
2481 arglen = -1;
2482 argidx = -2;
2484 if (args->ob_type->tp_as_mapping)
2485 dict = args;
2486 while (--fmtcnt >= 0) {
2487 if (*fmt != '%') {
2488 if (--rescnt < 0) {
2489 rescnt = fmtcnt + 100;
2490 reslen += rescnt;
2491 if (_PyString_Resize(&result, reslen) < 0)
2492 return NULL;
2493 res = PyString_AsString(result)
2494 + reslen - rescnt;
2495 --rescnt;
2497 *res++ = *fmt++;
2499 else {
2500 /* Got a format specifier */
2501 int flags = 0;
2502 int width = -1;
2503 int prec = -1;
2504 int size = 0;
2505 int c = '\0';
2506 int fill;
2507 PyObject *v = NULL;
2508 PyObject *temp = NULL;
2509 char *pbuf;
2510 int sign;
2511 int len;
2512 char formatbuf[FORMATBUFLEN]; /* For format{float,int,char}() */
2513 char *fmt_start = fmt;
2515 fmt++;
2516 if (*fmt == '(') {
2517 char *keystart;
2518 int keylen;
2519 PyObject *key;
2520 int pcount = 1;
2522 if (dict == NULL) {
2523 PyErr_SetString(PyExc_TypeError,
2524 "format requires a mapping");
2525 goto error;
2527 ++fmt;
2528 --fmtcnt;
2529 keystart = fmt;
2530 /* Skip over balanced parentheses */
2531 while (pcount > 0 && --fmtcnt >= 0) {
2532 if (*fmt == ')')
2533 --pcount;
2534 else if (*fmt == '(')
2535 ++pcount;
2536 fmt++;
2538 keylen = fmt - keystart - 1;
2539 if (fmtcnt < 0 || pcount > 0) {
2540 PyErr_SetString(PyExc_ValueError,
2541 "incomplete format key");
2542 goto error;
2544 key = PyString_FromStringAndSize(keystart,
2545 keylen);
2546 if (key == NULL)
2547 goto error;
2548 if (args_owned) {
2549 Py_DECREF(args);
2550 args_owned = 0;
2552 args = PyObject_GetItem(dict, key);
2553 Py_DECREF(key);
2554 if (args == NULL) {
2555 goto error;
2557 args_owned = 1;
2558 arglen = -1;
2559 argidx = -2;
2561 while (--fmtcnt >= 0) {
2562 switch (c = *fmt++) {
2563 case '-': flags |= F_LJUST; continue;
2564 case '+': flags |= F_SIGN; continue;
2565 case ' ': flags |= F_BLANK; continue;
2566 case '#': flags |= F_ALT; continue;
2567 case '0': flags |= F_ZERO; continue;
2569 break;
2571 if (c == '*') {
2572 v = getnextarg(args, arglen, &argidx);
2573 if (v == NULL)
2574 goto error;
2575 if (!PyInt_Check(v)) {
2576 PyErr_SetString(PyExc_TypeError,
2577 "* wants int");
2578 goto error;
2580 width = PyInt_AsLong(v);
2581 if (width < 0) {
2582 flags |= F_LJUST;
2583 width = -width;
2585 if (--fmtcnt >= 0)
2586 c = *fmt++;
2588 else if (c >= 0 && isdigit(c)) {
2589 width = c - '0';
2590 while (--fmtcnt >= 0) {
2591 c = Py_CHARMASK(*fmt++);
2592 if (!isdigit(c))
2593 break;
2594 if ((width*10) / 10 != width) {
2595 PyErr_SetString(
2596 PyExc_ValueError,
2597 "width too big");
2598 goto error;
2600 width = width*10 + (c - '0');
2603 if (c == '.') {
2604 prec = 0;
2605 if (--fmtcnt >= 0)
2606 c = *fmt++;
2607 if (c == '*') {
2608 v = getnextarg(args, arglen, &argidx);
2609 if (v == NULL)
2610 goto error;
2611 if (!PyInt_Check(v)) {
2612 PyErr_SetString(
2613 PyExc_TypeError,
2614 "* wants int");
2615 goto error;
2617 prec = PyInt_AsLong(v);
2618 if (prec < 0)
2619 prec = 0;
2620 if (--fmtcnt >= 0)
2621 c = *fmt++;
2623 else if (c >= 0 && isdigit(c)) {
2624 prec = c - '0';
2625 while (--fmtcnt >= 0) {
2626 c = Py_CHARMASK(*fmt++);
2627 if (!isdigit(c))
2628 break;
2629 if ((prec*10) / 10 != prec) {
2630 PyErr_SetString(
2631 PyExc_ValueError,
2632 "prec too big");
2633 goto error;
2635 prec = prec*10 + (c - '0');
2638 } /* prec */
2639 if (fmtcnt >= 0) {
2640 if (c == 'h' || c == 'l' || c == 'L') {
2641 size = c;
2642 if (--fmtcnt >= 0)
2643 c = *fmt++;
2646 if (fmtcnt < 0) {
2647 PyErr_SetString(PyExc_ValueError,
2648 "incomplete format");
2649 goto error;
2651 if (c != '%') {
2652 v = getnextarg(args, arglen, &argidx);
2653 if (v == NULL)
2654 goto error;
2656 sign = 0;
2657 fill = ' ';
2658 switch (c) {
2659 case '%':
2660 pbuf = "%";
2661 len = 1;
2662 break;
2663 case 's':
2664 case 'r':
2665 if (PyUnicode_Check(v)) {
2666 fmt = fmt_start;
2667 goto unicode;
2669 if (c == 's')
2670 temp = PyObject_Str(v);
2671 else
2672 temp = PyObject_Repr(v);
2673 if (temp == NULL)
2674 goto error;
2675 if (!PyString_Check(temp)) {
2676 PyErr_SetString(PyExc_TypeError,
2677 "%s argument has non-string str()");
2678 goto error;
2680 pbuf = PyString_AsString(temp);
2681 len = PyString_Size(temp);
2682 if (prec >= 0 && len > prec)
2683 len = prec;
2684 break;
2685 case 'i':
2686 case 'd':
2687 case 'u':
2688 case 'o':
2689 case 'x':
2690 case 'X':
2691 if (c == 'i')
2692 c = 'd';
2693 pbuf = formatbuf;
2694 len = formatint(pbuf, sizeof(formatbuf), flags, prec, c, v);
2695 if (len < 0)
2696 goto error;
2697 sign = (c == 'd');
2698 if (flags&F_ZERO) {
2699 fill = '0';
2700 if ((flags&F_ALT) &&
2701 (c == 'x' || c == 'X') &&
2702 pbuf[0] == '0' && pbuf[1] == c) {
2703 *res++ = *pbuf++;
2704 *res++ = *pbuf++;
2705 rescnt -= 2;
2706 len -= 2;
2707 width -= 2;
2708 if (width < 0)
2709 width = 0;
2712 break;
2713 case 'e':
2714 case 'E':
2715 case 'f':
2716 case 'g':
2717 case 'G':
2718 pbuf = formatbuf;
2719 len = formatfloat(pbuf, sizeof(formatbuf), flags, prec, c, v);
2720 if (len < 0)
2721 goto error;
2722 sign = 1;
2723 if (flags&F_ZERO)
2724 fill = '0';
2725 break;
2726 case 'c':
2727 pbuf = formatbuf;
2728 len = formatchar(pbuf, sizeof(formatbuf), v);
2729 if (len < 0)
2730 goto error;
2731 break;
2732 default:
2733 PyErr_Format(PyExc_ValueError,
2734 "unsupported format character '%c' (0x%x)",
2735 c, c);
2736 goto error;
2738 if (sign) {
2739 if (*pbuf == '-' || *pbuf == '+') {
2740 sign = *pbuf++;
2741 len--;
2743 else if (flags & F_SIGN)
2744 sign = '+';
2745 else if (flags & F_BLANK)
2746 sign = ' ';
2747 else
2748 sign = '\0';
2750 if (width < len)
2751 width = len;
2752 if (rescnt < width + (sign != '\0')) {
2753 reslen -= rescnt;
2754 rescnt = width + fmtcnt + 100;
2755 reslen += rescnt;
2756 if (_PyString_Resize(&result, reslen) < 0)
2757 return NULL;
2758 res = PyString_AsString(result)
2759 + reslen - rescnt;
2761 if (sign) {
2762 if (fill != ' ')
2763 *res++ = sign;
2764 rescnt--;
2765 if (width > len)
2766 width--;
2768 if (width > len && !(flags&F_LJUST)) {
2769 do {
2770 --rescnt;
2771 *res++ = fill;
2772 } while (--width > len);
2774 if (sign && fill == ' ')
2775 *res++ = sign;
2776 memcpy(res, pbuf, len);
2777 res += len;
2778 rescnt -= len;
2779 while (--width >= len) {
2780 --rescnt;
2781 *res++ = ' ';
2783 if (dict && (argidx < arglen) && c != '%') {
2784 PyErr_SetString(PyExc_TypeError,
2785 "not all arguments converted");
2786 goto error;
2788 Py_XDECREF(temp);
2789 } /* '%' */
2790 } /* until end */
2791 if (argidx < arglen && !dict) {
2792 PyErr_SetString(PyExc_TypeError,
2793 "not all arguments converted");
2794 goto error;
2796 if (args_owned) {
2797 Py_DECREF(args);
2799 _PyString_Resize(&result, reslen - rescnt);
2800 return result;
2802 unicode:
2803 if (args_owned) {
2804 Py_DECREF(args);
2805 args_owned = 0;
2807 /* Fiddle args right (remove the first argidx-1 arguments) */
2808 --argidx;
2809 if (PyTuple_Check(orig_args) && argidx > 0) {
2810 PyObject *v;
2811 int n = PyTuple_GET_SIZE(orig_args) - argidx;
2812 v = PyTuple_New(n);
2813 if (v == NULL)
2814 goto error;
2815 while (--n >= 0) {
2816 PyObject *w = PyTuple_GET_ITEM(orig_args, n + argidx);
2817 Py_INCREF(w);
2818 PyTuple_SET_ITEM(v, n, w);
2820 args = v;
2821 } else {
2822 Py_INCREF(orig_args);
2823 args = orig_args;
2825 /* Paste rest of format string to what we have of the result
2826 string; we reuse result for this */
2827 rescnt = res - PyString_AS_STRING(result);
2828 fmtcnt = PyString_GET_SIZE(format) - \
2829 (fmt - PyString_AS_STRING(format));
2830 if (_PyString_Resize(&result, rescnt + fmtcnt)) {
2831 Py_DECREF(args);
2832 goto error;
2834 memcpy(PyString_AS_STRING(result) + rescnt, fmt, fmtcnt);
2835 format = result;
2836 /* Let Unicode do its magic */
2837 result = PyUnicode_Format(format, args);
2838 Py_DECREF(format);
2839 Py_DECREF(args);
2840 return result;
2842 error:
2843 Py_DECREF(result);
2844 if (args_owned) {
2845 Py_DECREF(args);
2847 return NULL;
2851 #ifdef INTERN_STRINGS
2853 static PyObject *interned;
2855 void
2856 PyString_InternInPlace(PyObject **p)
2858 register PyStringObject *s = (PyStringObject *)(*p);
2859 PyObject *t;
2860 if (s == NULL || !PyString_Check(s))
2861 Py_FatalError("PyString_InternInPlace: strings only please!");
2862 if ((t = s->ob_sinterned) != NULL) {
2863 if (t == (PyObject *)s)
2864 return;
2865 Py_INCREF(t);
2866 *p = t;
2867 Py_DECREF(s);
2868 return;
2870 if (interned == NULL) {
2871 interned = PyDict_New();
2872 if (interned == NULL)
2873 return;
2875 if ((t = PyDict_GetItem(interned, (PyObject *)s)) != NULL) {
2876 Py_INCREF(t);
2877 *p = s->ob_sinterned = t;
2878 Py_DECREF(s);
2879 return;
2881 t = (PyObject *)s;
2882 if (PyDict_SetItem(interned, t, t) == 0) {
2883 s->ob_sinterned = t;
2884 return;
2886 PyErr_Clear();
2890 PyObject *
2891 PyString_InternFromString(const char *cp)
2893 PyObject *s = PyString_FromString(cp);
2894 if (s == NULL)
2895 return NULL;
2896 PyString_InternInPlace(&s);
2897 return s;
2900 #endif
2902 void
2903 PyString_Fini(void)
2905 int i;
2906 for (i = 0; i < UCHAR_MAX + 1; i++) {
2907 Py_XDECREF(characters[i]);
2908 characters[i] = NULL;
2910 #ifndef DONT_SHARE_SHORT_STRINGS
2911 Py_XDECREF(nullstring);
2912 nullstring = NULL;
2913 #endif
2914 #ifdef INTERN_STRINGS
2915 if (interned) {
2916 int pos, changed;
2917 PyObject *key, *value;
2918 do {
2919 changed = 0;
2920 pos = 0;
2921 while (PyDict_Next(interned, &pos, &key, &value)) {
2922 if (key->ob_refcnt == 2 && key == value) {
2923 PyDict_DelItem(interned, key);
2924 changed = 1;
2927 } while (changed);
2929 #endif