This commit was manufactured by cvs2svn to create tag 'r22a4-fork'.
[python/dscho.git] / Modules / stropmodule.c
blobbd56ee03f73eb202e55a45c79d2f4e2a8f1bcce4
1 /* strop module */
3 static char strop_module__doc__[] =
4 "Common string manipulations, optimized for speed.\n"
5 "\n"
6 "Always use \"import string\" rather than referencing\n"
7 "this module directly.";
9 #include "Python.h"
11 #include <ctype.h>
12 /* XXX This file assumes that the <ctype.h> is*() functions
13 XXX are defined for all 8-bit characters! */
15 #define WARN if (PyErr_Warn(PyExc_DeprecationWarning, \
16 "strop functions are obsolete; use string methods")) \
17 return NULL
19 /* The lstrip(), rstrip() and strip() functions are implemented
20 in do_strip(), which uses an additional parameter to indicate what
21 type of strip should occur. */
23 #define LEFTSTRIP 0
24 #define RIGHTSTRIP 1
25 #define BOTHSTRIP 2
28 static PyObject *
29 split_whitespace(char *s, int len, int maxsplit)
31 int i = 0, j, err;
32 int countsplit = 0;
33 PyObject* item;
34 PyObject *list = PyList_New(0);
36 if (list == NULL)
37 return NULL;
39 while (i < len) {
40 while (i < len && isspace(Py_CHARMASK(s[i]))) {
41 i = i+1;
43 j = i;
44 while (i < len && !isspace(Py_CHARMASK(s[i]))) {
45 i = i+1;
47 if (j < i) {
48 item = PyString_FromStringAndSize(s+j, (int)(i-j));
49 if (item == NULL)
50 goto finally;
52 err = PyList_Append(list, item);
53 Py_DECREF(item);
54 if (err < 0)
55 goto finally;
57 countsplit++;
58 while (i < len && isspace(Py_CHARMASK(s[i]))) {
59 i = i+1;
61 if (maxsplit && (countsplit >= maxsplit) && i < len) {
62 item = PyString_FromStringAndSize(
63 s+i, (int)(len - i));
64 if (item == NULL)
65 goto finally;
67 err = PyList_Append(list, item);
68 Py_DECREF(item);
69 if (err < 0)
70 goto finally;
72 i = len;
76 return list;
77 finally:
78 Py_DECREF(list);
79 return NULL;
83 static char splitfields__doc__[] =
84 "split(s [,sep [,maxsplit]]) -> list of strings\n"
85 "splitfields(s [,sep [,maxsplit]]) -> list of strings\n"
86 "\n"
87 "Return a list of the words in the string s, using sep as the\n"
88 "delimiter string. If maxsplit is nonzero, splits into at most\n"
89 "maxsplit words. If sep is not specified, any whitespace string\n"
90 "is a separator. Maxsplit defaults to 0.\n"
91 "\n"
92 "(split and splitfields are synonymous)";
94 static PyObject *
95 strop_splitfields(PyObject *self, PyObject *args)
97 int len, n, i, j, err;
98 int splitcount, maxsplit;
99 char *s, *sub;
100 PyObject *list, *item;
102 WARN;
103 sub = NULL;
104 n = 0;
105 splitcount = 0;
106 maxsplit = 0;
107 if (!PyArg_ParseTuple(args, "t#|z#i:split", &s, &len, &sub, &n, &maxsplit))
108 return NULL;
109 if (sub == NULL)
110 return split_whitespace(s, len, maxsplit);
111 if (n == 0) {
112 PyErr_SetString(PyExc_ValueError, "empty separator");
113 return NULL;
116 list = PyList_New(0);
117 if (list == NULL)
118 return NULL;
120 i = j = 0;
121 while (i+n <= len) {
122 if (s[i] == sub[0] && (n == 1 || memcmp(s+i, sub, n) == 0)) {
123 item = PyString_FromStringAndSize(s+j, (int)(i-j));
124 if (item == NULL)
125 goto fail;
126 err = PyList_Append(list, item);
127 Py_DECREF(item);
128 if (err < 0)
129 goto fail;
130 i = j = i + n;
131 splitcount++;
132 if (maxsplit && (splitcount >= maxsplit))
133 break;
135 else
136 i++;
138 item = PyString_FromStringAndSize(s+j, (int)(len-j));
139 if (item == NULL)
140 goto fail;
141 err = PyList_Append(list, item);
142 Py_DECREF(item);
143 if (err < 0)
144 goto fail;
146 return list;
148 fail:
149 Py_DECREF(list);
150 return NULL;
154 static char joinfields__doc__[] =
155 "join(list [,sep]) -> string\n"
156 "joinfields(list [,sep]) -> string\n"
157 "\n"
158 "Return a string composed of the words in list, with\n"
159 "intervening occurrences of sep. Sep defaults to a single\n"
160 "space.\n"
161 "\n"
162 "(join and joinfields are synonymous)";
164 static PyObject *
165 strop_joinfields(PyObject *self, PyObject *args)
167 PyObject *seq;
168 char *sep = NULL;
169 int seqlen, seplen = 0;
170 int i, reslen = 0, slen = 0, sz = 100;
171 PyObject *res = NULL;
172 char* p = NULL;
173 intargfunc getitemfunc;
175 WARN;
176 if (!PyArg_ParseTuple(args, "O|t#:join", &seq, &sep, &seplen))
177 return NULL;
178 if (sep == NULL) {
179 sep = " ";
180 seplen = 1;
183 seqlen = PySequence_Size(seq);
184 if (seqlen < 0 && PyErr_Occurred())
185 return NULL;
187 if (seqlen == 1) {
188 /* Optimization if there's only one item */
189 PyObject *item = PySequence_GetItem(seq, 0);
190 if (item && !PyString_Check(item)) {
191 PyErr_SetString(PyExc_TypeError,
192 "first argument must be sequence of strings");
193 Py_DECREF(item);
194 return NULL;
196 return item;
199 if (!(res = PyString_FromStringAndSize((char*)NULL, sz)))
200 return NULL;
201 p = PyString_AsString(res);
203 /* optimize for lists, since it's the most common case. all others
204 * (tuples and arbitrary sequences) just use the sequence abstract
205 * interface.
207 if (PyList_Check(seq)) {
208 for (i = 0; i < seqlen; i++) {
209 PyObject *item = PyList_GET_ITEM(seq, i);
210 if (!PyString_Check(item)) {
211 PyErr_SetString(PyExc_TypeError,
212 "first argument must be sequence of strings");
213 Py_DECREF(res);
214 return NULL;
216 slen = PyString_GET_SIZE(item);
217 while (reslen + slen + seplen >= sz) {
218 if (_PyString_Resize(&res, sz * 2)) {
219 Py_DECREF(res);
220 return NULL;
222 sz *= 2;
223 p = PyString_AsString(res) + reslen;
225 if (i > 0) {
226 memcpy(p, sep, seplen);
227 p += seplen;
228 reslen += seplen;
230 memcpy(p, PyString_AS_STRING(item), slen);
231 p += slen;
232 reslen += slen;
234 if (_PyString_Resize(&res, reslen)) {
235 Py_DECREF(res);
236 res = NULL;
238 return res;
241 if (seq->ob_type->tp_as_sequence == NULL ||
242 (getitemfunc = seq->ob_type->tp_as_sequence->sq_item) == NULL)
244 PyErr_SetString(PyExc_TypeError,
245 "first argument must be a sequence");
246 return NULL;
248 /* This is now type safe */
249 for (i = 0; i < seqlen; i++) {
250 PyObject *item = getitemfunc(seq, i);
251 if (!item || !PyString_Check(item)) {
252 PyErr_SetString(PyExc_TypeError,
253 "first argument must be sequence of strings");
254 Py_DECREF(res);
255 Py_XDECREF(item);
256 return NULL;
258 slen = PyString_GET_SIZE(item);
259 while (reslen + slen + seplen >= sz) {
260 if (_PyString_Resize(&res, sz * 2)) {
261 Py_DECREF(res);
262 Py_DECREF(item);
263 return NULL;
265 sz *= 2;
266 p = PyString_AsString(res) + reslen;
268 if (i > 0) {
269 memcpy(p, sep, seplen);
270 p += seplen;
271 reslen += seplen;
273 memcpy(p, PyString_AS_STRING(item), slen);
274 p += slen;
275 reslen += slen;
276 Py_DECREF(item);
278 if (_PyString_Resize(&res, reslen)) {
279 Py_DECREF(res);
280 res = NULL;
282 return res;
286 static char find__doc__[] =
287 "find(s, sub [,start [,end]]) -> in\n"
288 "\n"
289 "Return the lowest index in s where substring sub is found,\n"
290 "such that sub is contained within s[start,end]. Optional\n"
291 "arguments start and end are interpreted as in slice notation.\n"
292 "\n"
293 "Return -1 on failure.";
295 static PyObject *
296 strop_find(PyObject *self, PyObject *args)
298 char *s, *sub;
299 int len, n, i = 0, last = INT_MAX;
301 WARN;
302 if (!PyArg_ParseTuple(args, "t#t#|ii:find", &s, &len, &sub, &n, &i, &last))
303 return NULL;
305 if (last > len)
306 last = len;
307 if (last < 0)
308 last += len;
309 if (last < 0)
310 last = 0;
311 if (i < 0)
312 i += len;
313 if (i < 0)
314 i = 0;
316 if (n == 0 && i <= last)
317 return PyInt_FromLong((long)i);
319 last -= n;
320 for (; i <= last; ++i)
321 if (s[i] == sub[0] &&
322 (n == 1 || memcmp(&s[i+1], &sub[1], n-1) == 0))
323 return PyInt_FromLong((long)i);
325 return PyInt_FromLong(-1L);
329 static char rfind__doc__[] =
330 "rfind(s, sub [,start [,end]]) -> int\n"
331 "\n"
332 "Return the highest index in s where substring sub is found,\n"
333 "such that sub is contained within s[start,end]. Optional\n"
334 "arguments start and end are interpreted as in slice notation.\n"
335 "\n"
336 "Return -1 on failure.";
338 static PyObject *
339 strop_rfind(PyObject *self, PyObject *args)
341 char *s, *sub;
342 int len, n, j;
343 int i = 0, last = INT_MAX;
345 WARN;
346 if (!PyArg_ParseTuple(args, "t#t#|ii:rfind", &s, &len, &sub, &n, &i, &last))
347 return NULL;
349 if (last > len)
350 last = len;
351 if (last < 0)
352 last += len;
353 if (last < 0)
354 last = 0;
355 if (i < 0)
356 i += len;
357 if (i < 0)
358 i = 0;
360 if (n == 0 && i <= last)
361 return PyInt_FromLong((long)last);
363 for (j = last-n; j >= i; --j)
364 if (s[j] == sub[0] &&
365 (n == 1 || memcmp(&s[j+1], &sub[1], n-1) == 0))
366 return PyInt_FromLong((long)j);
368 return PyInt_FromLong(-1L);
372 static PyObject *
373 do_strip(PyObject *args, int striptype)
375 char *s;
376 int len, i, j;
379 if (!PyArg_Parse(args, "t#", &s, &len))
380 return NULL;
382 i = 0;
383 if (striptype != RIGHTSTRIP) {
384 while (i < len && isspace(Py_CHARMASK(s[i]))) {
385 i++;
389 j = len;
390 if (striptype != LEFTSTRIP) {
391 do {
392 j--;
393 } while (j >= i && isspace(Py_CHARMASK(s[j])));
394 j++;
397 if (i == 0 && j == len) {
398 Py_INCREF(args);
399 return args;
401 else
402 return PyString_FromStringAndSize(s+i, j-i);
406 static char strip__doc__[] =
407 "strip(s) -> string\n"
408 "\n"
409 "Return a copy of the string s with leading and trailing\n"
410 "whitespace removed.";
412 static PyObject *
413 strop_strip(PyObject *self, PyObject *args)
415 WARN;
416 return do_strip(args, BOTHSTRIP);
420 static char lstrip__doc__[] =
421 "lstrip(s) -> string\n"
422 "\n"
423 "Return a copy of the string s with leading whitespace removed.";
425 static PyObject *
426 strop_lstrip(PyObject *self, PyObject *args)
428 WARN;
429 return do_strip(args, LEFTSTRIP);
433 static char rstrip__doc__[] =
434 "rstrip(s) -> string\n"
435 "\n"
436 "Return a copy of the string s with trailing whitespace removed.";
438 static PyObject *
439 strop_rstrip(PyObject *self, PyObject *args)
441 WARN;
442 return do_strip(args, RIGHTSTRIP);
446 static char lower__doc__[] =
447 "lower(s) -> string\n"
448 "\n"
449 "Return a copy of the string s converted to lowercase.";
451 static PyObject *
452 strop_lower(PyObject *self, PyObject *args)
454 char *s, *s_new;
455 int i, n;
456 PyObject *new;
457 int changed;
459 WARN;
460 if (!PyArg_Parse(args, "t#", &s, &n))
461 return NULL;
462 new = PyString_FromStringAndSize(NULL, n);
463 if (new == NULL)
464 return NULL;
465 s_new = PyString_AsString(new);
466 changed = 0;
467 for (i = 0; i < n; i++) {
468 int c = Py_CHARMASK(*s++);
469 if (isupper(c)) {
470 changed = 1;
471 *s_new = tolower(c);
472 } else
473 *s_new = c;
474 s_new++;
476 if (!changed) {
477 Py_DECREF(new);
478 Py_INCREF(args);
479 return args;
481 return new;
485 static char upper__doc__[] =
486 "upper(s) -> string\n"
487 "\n"
488 "Return a copy of the string s converted to uppercase.";
490 static PyObject *
491 strop_upper(PyObject *self, PyObject *args)
493 char *s, *s_new;
494 int i, n;
495 PyObject *new;
496 int changed;
498 WARN;
499 if (!PyArg_Parse(args, "t#", &s, &n))
500 return NULL;
501 new = PyString_FromStringAndSize(NULL, n);
502 if (new == NULL)
503 return NULL;
504 s_new = PyString_AsString(new);
505 changed = 0;
506 for (i = 0; i < n; i++) {
507 int c = Py_CHARMASK(*s++);
508 if (islower(c)) {
509 changed = 1;
510 *s_new = toupper(c);
511 } else
512 *s_new = c;
513 s_new++;
515 if (!changed) {
516 Py_DECREF(new);
517 Py_INCREF(args);
518 return args;
520 return new;
524 static char capitalize__doc__[] =
525 "capitalize(s) -> string\n"
526 "\n"
527 "Return a copy of the string s with only its first character\n"
528 "capitalized.";
530 static PyObject *
531 strop_capitalize(PyObject *self, PyObject *args)
533 char *s, *s_new;
534 int i, n;
535 PyObject *new;
536 int changed;
538 WARN;
539 if (!PyArg_Parse(args, "t#", &s, &n))
540 return NULL;
541 new = PyString_FromStringAndSize(NULL, n);
542 if (new == NULL)
543 return NULL;
544 s_new = PyString_AsString(new);
545 changed = 0;
546 if (0 < n) {
547 int c = Py_CHARMASK(*s++);
548 if (islower(c)) {
549 changed = 1;
550 *s_new = toupper(c);
551 } else
552 *s_new = c;
553 s_new++;
555 for (i = 1; i < n; i++) {
556 int c = Py_CHARMASK(*s++);
557 if (isupper(c)) {
558 changed = 1;
559 *s_new = tolower(c);
560 } else
561 *s_new = c;
562 s_new++;
564 if (!changed) {
565 Py_DECREF(new);
566 Py_INCREF(args);
567 return args;
569 return new;
573 static char expandtabs__doc__[] =
574 "expandtabs(string, [tabsize]) -> string\n"
575 "\n"
576 "Expand tabs in a string, i.e. replace them by one or more spaces,\n"
577 "depending on the current column and the given tab size (default 8).\n"
578 "The column number is reset to zero after each newline occurring in the\n"
579 "string. This doesn't understand other non-printing characters.";
581 static PyObject *
582 strop_expandtabs(PyObject *self, PyObject *args)
584 /* Original by Fredrik Lundh */
585 char* e;
586 char* p;
587 char* q;
588 int i, j;
589 PyObject* out;
590 char* string;
591 int stringlen;
592 int tabsize = 8;
594 WARN;
595 /* Get arguments */
596 if (!PyArg_ParseTuple(args, "s#|i:expandtabs", &string, &stringlen, &tabsize))
597 return NULL;
598 if (tabsize < 1) {
599 PyErr_SetString(PyExc_ValueError,
600 "tabsize must be at least 1");
601 return NULL;
604 /* First pass: determine size of output string */
605 i = j = 0; /* j: current column; i: total of previous lines */
606 e = string + stringlen;
607 for (p = string; p < e; p++) {
608 if (*p == '\t')
609 j += tabsize - (j%tabsize);
610 else {
611 j++;
612 if (*p == '\n') {
613 i += j;
614 j = 0;
619 /* Second pass: create output string and fill it */
620 out = PyString_FromStringAndSize(NULL, i+j);
621 if (out == NULL)
622 return NULL;
624 i = 0;
625 q = PyString_AS_STRING(out);
627 for (p = string; p < e; p++) {
628 if (*p == '\t') {
629 j = tabsize - (i%tabsize);
630 i += j;
631 while (j-- > 0)
632 *q++ = ' ';
633 } else {
634 *q++ = *p;
635 i++;
636 if (*p == '\n')
637 i = 0;
641 return out;
645 static char count__doc__[] =
646 "count(s, sub[, start[, end]]) -> int\n"
647 "\n"
648 "Return the number of occurrences of substring sub in string\n"
649 "s[start:end]. Optional arguments start and end are\n"
650 "interpreted as in slice notation.";
652 static PyObject *
653 strop_count(PyObject *self, PyObject *args)
655 char *s, *sub;
656 int len, n;
657 int i = 0, last = INT_MAX;
658 int m, r;
660 WARN;
661 if (!PyArg_ParseTuple(args, "t#t#|ii:count", &s, &len, &sub, &n, &i, &last))
662 return NULL;
663 if (last > len)
664 last = len;
665 if (last < 0)
666 last += len;
667 if (last < 0)
668 last = 0;
669 if (i < 0)
670 i += len;
671 if (i < 0)
672 i = 0;
673 m = last + 1 - n;
674 if (n == 0)
675 return PyInt_FromLong((long) (m-i));
677 r = 0;
678 while (i < m) {
679 if (!memcmp(s+i, sub, n)) {
680 r++;
681 i += n;
682 } else {
683 i++;
686 return PyInt_FromLong((long) r);
690 static char swapcase__doc__[] =
691 "swapcase(s) -> string\n"
692 "\n"
693 "Return a copy of the string s with upper case characters\n"
694 "converted to lowercase and vice versa.";
696 static PyObject *
697 strop_swapcase(PyObject *self, PyObject *args)
699 char *s, *s_new;
700 int i, n;
701 PyObject *new;
702 int changed;
704 WARN;
705 if (!PyArg_Parse(args, "t#", &s, &n))
706 return NULL;
707 new = PyString_FromStringAndSize(NULL, n);
708 if (new == NULL)
709 return NULL;
710 s_new = PyString_AsString(new);
711 changed = 0;
712 for (i = 0; i < n; i++) {
713 int c = Py_CHARMASK(*s++);
714 if (islower(c)) {
715 changed = 1;
716 *s_new = toupper(c);
718 else if (isupper(c)) {
719 changed = 1;
720 *s_new = tolower(c);
722 else
723 *s_new = c;
724 s_new++;
726 if (!changed) {
727 Py_DECREF(new);
728 Py_INCREF(args);
729 return args;
731 return new;
735 static char atoi__doc__[] =
736 "atoi(s [,base]) -> int\n"
737 "\n"
738 "Return the integer represented by the string s in the given\n"
739 "base, which defaults to 10. The string s must consist of one\n"
740 "or more digits, possibly preceded by a sign. If base is 0, it\n"
741 "is chosen from the leading characters of s, 0 for octal, 0x or\n"
742 "0X for hexadecimal. If base is 16, a preceding 0x or 0X is\n"
743 "accepted.";
745 static PyObject *
746 strop_atoi(PyObject *self, PyObject *args)
748 char *s, *end;
749 int base = 10;
750 long x;
751 char buffer[256]; /* For errors */
753 WARN;
754 if (!PyArg_ParseTuple(args, "s|i:atoi", &s, &base))
755 return NULL;
757 if ((base != 0 && base < 2) || base > 36) {
758 PyErr_SetString(PyExc_ValueError, "invalid base for atoi()");
759 return NULL;
762 while (*s && isspace(Py_CHARMASK(*s)))
763 s++;
764 errno = 0;
765 if (base == 0 && s[0] == '0')
766 x = (long) PyOS_strtoul(s, &end, base);
767 else
768 x = PyOS_strtol(s, &end, base);
769 if (end == s || !isalnum(end[-1]))
770 goto bad;
771 while (*end && isspace(Py_CHARMASK(*end)))
772 end++;
773 if (*end != '\0') {
774 bad:
775 sprintf(buffer, "invalid literal for atoi(): %.200s", s);
776 PyErr_SetString(PyExc_ValueError, buffer);
777 return NULL;
779 else if (errno != 0) {
780 sprintf(buffer, "atoi() literal too large: %.200s", s);
781 PyErr_SetString(PyExc_ValueError, buffer);
782 return NULL;
784 return PyInt_FromLong(x);
788 static char atol__doc__[] =
789 "atol(s [,base]) -> long\n"
790 "\n"
791 "Return the long integer represented by the string s in the\n"
792 "given base, which defaults to 10. The string s must consist\n"
793 "of one or more digits, possibly preceded by a sign. If base\n"
794 "is 0, it is chosen from the leading characters of s, 0 for\n"
795 "octal, 0x or 0X for hexadecimal. If base is 16, a preceding\n"
796 "0x or 0X is accepted. A trailing L or l is not accepted,\n"
797 "unless base is 0.";
799 static PyObject *
800 strop_atol(PyObject *self, PyObject *args)
802 char *s, *end;
803 int base = 10;
804 PyObject *x;
805 char buffer[256]; /* For errors */
807 WARN;
808 if (!PyArg_ParseTuple(args, "s|i:atol", &s, &base))
809 return NULL;
811 if ((base != 0 && base < 2) || base > 36) {
812 PyErr_SetString(PyExc_ValueError, "invalid base for atol()");
813 return NULL;
816 while (*s && isspace(Py_CHARMASK(*s)))
817 s++;
818 if (s[0] == '\0') {
819 PyErr_SetString(PyExc_ValueError, "empty string for atol()");
820 return NULL;
822 x = PyLong_FromString(s, &end, base);
823 if (x == NULL)
824 return NULL;
825 if (base == 0 && (*end == 'l' || *end == 'L'))
826 end++;
827 while (*end && isspace(Py_CHARMASK(*end)))
828 end++;
829 if (*end != '\0') {
830 sprintf(buffer, "invalid literal for atol(): %.200s", s);
831 PyErr_SetString(PyExc_ValueError, buffer);
832 Py_DECREF(x);
833 return NULL;
835 return x;
839 static char atof__doc__[] =
840 "atof(s) -> float\n"
841 "\n"
842 "Return the floating point number represented by the string s.";
844 static PyObject *
845 strop_atof(PyObject *self, PyObject *args)
847 extern double strtod(const char *, char **);
848 char *s, *end;
849 double x;
850 char buffer[256]; /* For errors */
852 WARN;
853 if (!PyArg_ParseTuple(args, "s:atof", &s))
854 return NULL;
855 while (*s && isspace(Py_CHARMASK(*s)))
856 s++;
857 if (s[0] == '\0') {
858 PyErr_SetString(PyExc_ValueError, "empty string for atof()");
859 return NULL;
861 errno = 0;
862 PyFPE_START_PROTECT("strop_atof", return 0)
863 x = strtod(s, &end);
864 PyFPE_END_PROTECT(x)
865 while (*end && isspace(Py_CHARMASK(*end)))
866 end++;
867 if (*end != '\0') {
868 sprintf(buffer, "invalid literal for atof(): %.200s", s);
869 PyErr_SetString(PyExc_ValueError, buffer);
870 return NULL;
872 else if (errno != 0) {
873 sprintf(buffer, "atof() literal too large: %.200s", s);
874 PyErr_SetString(PyExc_ValueError, buffer);
875 return NULL;
877 return PyFloat_FromDouble(x);
881 static char maketrans__doc__[] =
882 "maketrans(frm, to) -> string\n"
883 "\n"
884 "Return a translation table (a string of 256 bytes long)\n"
885 "suitable for use in string.translate. The strings frm and to\n"
886 "must be of the same length.";
888 static PyObject *
889 strop_maketrans(PyObject *self, PyObject *args)
891 unsigned char *c, *from=NULL, *to=NULL;
892 int i, fromlen=0, tolen=0;
893 PyObject *result;
895 if (!PyArg_ParseTuple(args, "t#t#:maketrans", &from, &fromlen, &to, &tolen))
896 return NULL;
898 if (fromlen != tolen) {
899 PyErr_SetString(PyExc_ValueError,
900 "maketrans arguments must have same length");
901 return NULL;
904 result = PyString_FromStringAndSize((char *)NULL, 256);
905 if (result == NULL)
906 return NULL;
907 c = (unsigned char *) PyString_AS_STRING((PyStringObject *)result);
908 for (i = 0; i < 256; i++)
909 c[i]=(unsigned char)i;
910 for (i = 0; i < fromlen; i++)
911 c[from[i]]=to[i];
913 return result;
917 static char translate__doc__[] =
918 "translate(s,table [,deletechars]) -> string\n"
919 "\n"
920 "Return a copy of the string s, where all characters occurring\n"
921 "in the optional argument deletechars are removed, and the\n"
922 "remaining characters have been mapped through the given\n"
923 "translation table, which must be a string of length 256.";
925 static PyObject *
926 strop_translate(PyObject *self, PyObject *args)
928 register char *input, *table, *output;
929 register int i, c, changed = 0;
930 PyObject *input_obj;
931 char *table1, *output_start, *del_table=NULL;
932 int inlen, tablen, dellen = 0;
933 PyObject *result;
934 int trans_table[256];
936 WARN;
937 if (!PyArg_ParseTuple(args, "St#|t#:translate", &input_obj,
938 &table1, &tablen, &del_table, &dellen))
939 return NULL;
940 if (tablen != 256) {
941 PyErr_SetString(PyExc_ValueError,
942 "translation table must be 256 characters long");
943 return NULL;
946 table = table1;
947 inlen = PyString_Size(input_obj);
948 result = PyString_FromStringAndSize((char *)NULL, inlen);
949 if (result == NULL)
950 return NULL;
951 output_start = output = PyString_AsString(result);
952 input = PyString_AsString(input_obj);
954 if (dellen == 0) {
955 /* If no deletions are required, use faster code */
956 for (i = inlen; --i >= 0; ) {
957 c = Py_CHARMASK(*input++);
958 if (Py_CHARMASK((*output++ = table[c])) != c)
959 changed = 1;
961 if (changed)
962 return result;
963 Py_DECREF(result);
964 Py_INCREF(input_obj);
965 return input_obj;
968 for (i = 0; i < 256; i++)
969 trans_table[i] = Py_CHARMASK(table[i]);
971 for (i = 0; i < dellen; i++)
972 trans_table[(int) Py_CHARMASK(del_table[i])] = -1;
974 for (i = inlen; --i >= 0; ) {
975 c = Py_CHARMASK(*input++);
976 if (trans_table[c] != -1)
977 if (Py_CHARMASK(*output++ = (char)trans_table[c]) == c)
978 continue;
979 changed = 1;
981 if (!changed) {
982 Py_DECREF(result);
983 Py_INCREF(input_obj);
984 return input_obj;
986 /* Fix the size of the resulting string */
987 if (inlen > 0 &&_PyString_Resize(&result, output-output_start))
988 return NULL;
989 return result;
993 /* What follows is used for implementing replace(). Perry Stoll. */
996 mymemfind
998 strstr replacement for arbitrary blocks of memory.
1000 Locates the first occurrence in the memory pointed to by MEM of the
1001 contents of memory pointed to by PAT. Returns the index into MEM if
1002 found, or -1 if not found. If len of PAT is greater than length of
1003 MEM, the function returns -1.
1005 static int
1006 mymemfind(const char *mem, int len, const char *pat, int pat_len)
1008 register int ii;
1010 /* pattern can not occur in the last pat_len-1 chars */
1011 len -= pat_len;
1013 for (ii = 0; ii <= len; ii++) {
1014 if (mem[ii] == pat[0] &&
1015 (pat_len == 1 ||
1016 memcmp(&mem[ii+1], &pat[1], pat_len-1) == 0)) {
1017 return ii;
1020 return -1;
1024 mymemcnt
1026 Return the number of distinct times PAT is found in MEM.
1027 meaning mem=1111 and pat==11 returns 2.
1028 mem=11111 and pat==11 also return 2.
1030 static int
1031 mymemcnt(const char *mem, int len, const char *pat, int pat_len)
1033 register int offset = 0;
1034 int nfound = 0;
1036 while (len >= 0) {
1037 offset = mymemfind(mem, len, pat, pat_len);
1038 if (offset == -1)
1039 break;
1040 mem += offset + pat_len;
1041 len -= offset + pat_len;
1042 nfound++;
1044 return nfound;
1048 mymemreplace
1050 Return a string in which all occurrences of PAT in memory STR are
1051 replaced with SUB.
1053 If length of PAT is less than length of STR or there are no occurrences
1054 of PAT in STR, then the original string is returned. Otherwise, a new
1055 string is allocated here and returned.
1057 on return, out_len is:
1058 the length of output string, or
1059 -1 if the input string is returned, or
1060 unchanged if an error occurs (no memory).
1062 return value is:
1063 the new string allocated locally, or
1064 NULL if an error occurred.
1066 static char *
1067 mymemreplace(const char *str, int len, /* input string */
1068 const char *pat, int pat_len, /* pattern string to find */
1069 const char *sub, int sub_len, /* substitution string */
1070 int count, /* number of replacements */
1071 int *out_len)
1073 char *out_s;
1074 char *new_s;
1075 int nfound, offset, new_len;
1077 if (len == 0 || pat_len > len)
1078 goto return_same;
1080 /* find length of output string */
1081 nfound = mymemcnt(str, len, pat, pat_len);
1082 if (count < 0)
1083 count = INT_MAX;
1084 else if (nfound > count)
1085 nfound = count;
1086 if (nfound == 0)
1087 goto return_same;
1089 new_len = len + nfound*(sub_len - pat_len);
1090 if (new_len == 0) {
1091 /* Have to allocate something for the caller to free(). */
1092 out_s = (char *)PyMem_MALLOC(1);
1093 if (out_s == NULL)
1094 return NULL;
1095 out_s[0] = '\0';
1097 else {
1098 assert(new_len > 0);
1099 new_s = (char *)PyMem_MALLOC(new_len);
1100 if (new_s == NULL)
1101 return NULL;
1102 out_s = new_s;
1104 for (; count > 0 && len > 0; --count) {
1105 /* find index of next instance of pattern */
1106 offset = mymemfind(str, len, pat, pat_len);
1107 if (offset == -1)
1108 break;
1110 /* copy non matching part of input string */
1111 memcpy(new_s, str, offset);
1112 str += offset + pat_len;
1113 len -= offset + pat_len;
1115 /* copy substitute into the output string */
1116 new_s += offset;
1117 memcpy(new_s, sub, sub_len);
1118 new_s += sub_len;
1120 /* copy any remaining values into output string */
1121 if (len > 0)
1122 memcpy(new_s, str, len);
1124 *out_len = new_len;
1125 return out_s;
1127 return_same:
1128 *out_len = -1;
1129 return (char *)str; /* cast away const */
1133 static char replace__doc__[] =
1134 "replace (str, old, new[, maxsplit]) -> string\n"
1135 "\n"
1136 "Return a copy of string str with all occurrences of substring\n"
1137 "old replaced by new. If the optional argument maxsplit is\n"
1138 "given, only the first maxsplit occurrences are replaced.";
1140 static PyObject *
1141 strop_replace(PyObject *self, PyObject *args)
1143 char *str, *pat,*sub,*new_s;
1144 int len,pat_len,sub_len,out_len;
1145 int count = -1;
1146 PyObject *new;
1148 WARN;
1149 if (!PyArg_ParseTuple(args, "t#t#t#|i:replace",
1150 &str, &len, &pat, &pat_len, &sub, &sub_len,
1151 &count))
1152 return NULL;
1153 if (pat_len <= 0) {
1154 PyErr_SetString(PyExc_ValueError, "empty pattern string");
1155 return NULL;
1157 /* CAUTION: strop treats a replace count of 0 as infinity, unlke
1158 * current (2.1) string.py and string methods. Preserve this for
1159 * ... well, hard to say for what <wink>.
1161 if (count == 0)
1162 count = -1;
1163 new_s = mymemreplace(str,len,pat,pat_len,sub,sub_len,count,&out_len);
1164 if (new_s == NULL) {
1165 PyErr_NoMemory();
1166 return NULL;
1168 if (out_len == -1) {
1169 /* we're returning another reference to the input string */
1170 new = PyTuple_GetItem(args, 0);
1171 Py_XINCREF(new);
1173 else {
1174 new = PyString_FromStringAndSize(new_s, out_len);
1175 PyMem_FREE(new_s);
1177 return new;
1181 /* List of functions defined in the module */
1183 static PyMethodDef
1184 strop_methods[] = {
1185 {"atof", strop_atof, METH_VARARGS, atof__doc__},
1186 {"atoi", strop_atoi, METH_VARARGS, atoi__doc__},
1187 {"atol", strop_atol, METH_VARARGS, atol__doc__},
1188 {"capitalize", strop_capitalize, METH_OLDARGS, capitalize__doc__},
1189 {"count", strop_count, METH_VARARGS, count__doc__},
1190 {"expandtabs", strop_expandtabs, METH_VARARGS, expandtabs__doc__},
1191 {"find", strop_find, METH_VARARGS, find__doc__},
1192 {"join", strop_joinfields, METH_VARARGS, joinfields__doc__},
1193 {"joinfields", strop_joinfields, METH_VARARGS, joinfields__doc__},
1194 {"lstrip", strop_lstrip, METH_OLDARGS, lstrip__doc__},
1195 {"lower", strop_lower, METH_OLDARGS, lower__doc__},
1196 {"maketrans", strop_maketrans, METH_VARARGS, maketrans__doc__},
1197 {"replace", strop_replace, METH_VARARGS, replace__doc__},
1198 {"rfind", strop_rfind, METH_VARARGS, rfind__doc__},
1199 {"rstrip", strop_rstrip, METH_OLDARGS, rstrip__doc__},
1200 {"split", strop_splitfields, METH_VARARGS, splitfields__doc__},
1201 {"splitfields", strop_splitfields, METH_VARARGS, splitfields__doc__},
1202 {"strip", strop_strip, METH_OLDARGS, strip__doc__},
1203 {"swapcase", strop_swapcase, METH_OLDARGS, swapcase__doc__},
1204 {"translate", strop_translate, METH_VARARGS, translate__doc__},
1205 {"upper", strop_upper, METH_OLDARGS, upper__doc__},
1206 {NULL, NULL} /* sentinel */
1210 DL_EXPORT(void)
1211 initstrop(void)
1213 PyObject *m, *d, *s;
1214 char buf[256];
1215 int c, n;
1216 m = Py_InitModule4("strop", strop_methods, strop_module__doc__,
1217 (PyObject*)NULL, PYTHON_API_VERSION);
1218 d = PyModule_GetDict(m);
1220 /* Create 'whitespace' object */
1221 n = 0;
1222 for (c = 0; c < 256; c++) {
1223 if (isspace(c))
1224 buf[n++] = c;
1226 s = PyString_FromStringAndSize(buf, n);
1227 if (s) {
1228 PyDict_SetItemString(d, "whitespace", s);
1229 Py_DECREF(s);
1231 /* Create 'lowercase' object */
1232 n = 0;
1233 for (c = 0; c < 256; c++) {
1234 if (islower(c))
1235 buf[n++] = c;
1237 s = PyString_FromStringAndSize(buf, n);
1238 if (s) {
1239 PyDict_SetItemString(d, "lowercase", s);
1240 Py_DECREF(s);
1243 /* Create 'uppercase' object */
1244 n = 0;
1245 for (c = 0; c < 256; c++) {
1246 if (isupper(c))
1247 buf[n++] = c;
1249 s = PyString_FromStringAndSize(buf, n);
1250 if (s) {
1251 PyDict_SetItemString(d, "uppercase", s);
1252 Py_DECREF(s);