Updated for 2.1a3
[python/dscho.git] / Modules / stropmodule.c
blob203feb942d556f79f8d593582fc1fd7f66f86042
2 /* strop module */
4 static char strop_module__doc__[] =
5 "Common string manipulations, optimized for speed.\n\
6 \n\
7 Always use \"import string\" rather than referencing\n\
8 this module directly.";
10 #include "Python.h"
12 #include <ctype.h>
13 /* XXX This file assumes that the <ctype.h> is*() functions
14 XXX are defined for all 8-bit characters! */
16 /* The lstrip(), rstrip() and strip() functions are implemented
17 in do_strip(), which uses an additional parameter to indicate what
18 type of strip should occur. */
20 #define LEFTSTRIP 0
21 #define RIGHTSTRIP 1
22 #define BOTHSTRIP 2
25 static PyObject *
26 split_whitespace(char *s, int len, int maxsplit)
28 int i = 0, j, err;
29 int countsplit = 0;
30 PyObject* item;
31 PyObject *list = PyList_New(0);
33 if (list == NULL)
34 return NULL;
36 while (i < len) {
37 while (i < len && isspace(Py_CHARMASK(s[i]))) {
38 i = i+1;
40 j = i;
41 while (i < len && !isspace(Py_CHARMASK(s[i]))) {
42 i = i+1;
44 if (j < i) {
45 item = PyString_FromStringAndSize(s+j, (int)(i-j));
46 if (item == NULL)
47 goto finally;
49 err = PyList_Append(list, item);
50 Py_DECREF(item);
51 if (err < 0)
52 goto finally;
54 countsplit++;
55 while (i < len && isspace(Py_CHARMASK(s[i]))) {
56 i = i+1;
58 if (maxsplit && (countsplit >= maxsplit) && i < len) {
59 item = PyString_FromStringAndSize(
60 s+i, (int)(len - i));
61 if (item == NULL)
62 goto finally;
64 err = PyList_Append(list, item);
65 Py_DECREF(item);
66 if (err < 0)
67 goto finally;
69 i = len;
73 return list;
74 finally:
75 Py_DECREF(list);
76 return NULL;
80 static char splitfields__doc__[] =
81 "split(s [,sep [,maxsplit]]) -> list of strings\n\
82 splitfields(s [,sep [,maxsplit]]) -> list of strings\n\
83 \n\
84 Return a list of the words in the string s, using sep as the\n\
85 delimiter string. If maxsplit is nonzero, splits into at most\n\
86 maxsplit words. If sep is not specified, any whitespace string\n\
87 is a separator. Maxsplit defaults to 0.\n\
88 \n\
89 (split and splitfields are synonymous)";
91 static PyObject *
92 strop_splitfields(PyObject *self, PyObject *args)
94 int len, n, i, j, err;
95 int splitcount, maxsplit;
96 char *s, *sub;
97 PyObject *list, *item;
99 sub = NULL;
100 n = 0;
101 splitcount = 0;
102 maxsplit = 0;
103 if (!PyArg_ParseTuple(args, "t#|z#i:split", &s, &len, &sub, &n, &maxsplit))
104 return NULL;
105 if (sub == NULL)
106 return split_whitespace(s, len, maxsplit);
107 if (n == 0) {
108 PyErr_SetString(PyExc_ValueError, "empty separator");
109 return NULL;
112 list = PyList_New(0);
113 if (list == NULL)
114 return NULL;
116 i = j = 0;
117 while (i+n <= len) {
118 if (s[i] == sub[0] && (n == 1 || memcmp(s+i, sub, n) == 0)) {
119 item = PyString_FromStringAndSize(s+j, (int)(i-j));
120 if (item == NULL)
121 goto fail;
122 err = PyList_Append(list, item);
123 Py_DECREF(item);
124 if (err < 0)
125 goto fail;
126 i = j = i + n;
127 splitcount++;
128 if (maxsplit && (splitcount >= maxsplit))
129 break;
131 else
132 i++;
134 item = PyString_FromStringAndSize(s+j, (int)(len-j));
135 if (item == NULL)
136 goto fail;
137 err = PyList_Append(list, item);
138 Py_DECREF(item);
139 if (err < 0)
140 goto fail;
142 return list;
144 fail:
145 Py_DECREF(list);
146 return NULL;
150 static char joinfields__doc__[] =
151 "join(list [,sep]) -> string\n\
152 joinfields(list [,sep]) -> string\n\
154 Return a string composed of the words in list, with\n\
155 intervening occurrences of sep. Sep defaults to a single\n\
156 space.\n\
158 (join and joinfields are synonymous)";
160 static PyObject *
161 strop_joinfields(PyObject *self, PyObject *args)
163 PyObject *seq;
164 char *sep = NULL;
165 int seqlen, seplen = 0;
166 int i, reslen = 0, slen = 0, sz = 100;
167 PyObject *res = NULL;
168 char* p = NULL;
169 intargfunc getitemfunc;
171 if (!PyArg_ParseTuple(args, "O|t#:join", &seq, &sep, &seplen))
172 return NULL;
173 if (sep == NULL) {
174 sep = " ";
175 seplen = 1;
178 seqlen = PySequence_Size(seq);
179 if (seqlen < 0 && PyErr_Occurred())
180 return NULL;
182 if (seqlen == 1) {
183 /* Optimization if there's only one item */
184 PyObject *item = PySequence_GetItem(seq, 0);
185 if (item && !PyString_Check(item)) {
186 PyErr_SetString(PyExc_TypeError,
187 "first argument must be sequence of strings");
188 Py_DECREF(item);
189 return NULL;
191 return item;
194 if (!(res = PyString_FromStringAndSize((char*)NULL, sz)))
195 return NULL;
196 p = PyString_AsString(res);
198 /* optimize for lists, since it's the most common case. all others
199 * (tuples and arbitrary sequences) just use the sequence abstract
200 * interface.
202 if (PyList_Check(seq)) {
203 for (i = 0; i < seqlen; i++) {
204 PyObject *item = PyList_GET_ITEM(seq, i);
205 if (!PyString_Check(item)) {
206 PyErr_SetString(PyExc_TypeError,
207 "first argument must be sequence of strings");
208 Py_DECREF(res);
209 return NULL;
211 slen = PyString_GET_SIZE(item);
212 while (reslen + slen + seplen >= sz) {
213 if (_PyString_Resize(&res, sz * 2)) {
214 Py_DECREF(res);
215 return NULL;
217 sz *= 2;
218 p = PyString_AsString(res) + reslen;
220 if (i > 0) {
221 memcpy(p, sep, seplen);
222 p += seplen;
223 reslen += seplen;
225 memcpy(p, PyString_AS_STRING(item), slen);
226 p += slen;
227 reslen += slen;
229 if (_PyString_Resize(&res, reslen)) {
230 Py_DECREF(res);
231 res = NULL;
233 return res;
236 if (seq->ob_type->tp_as_sequence == NULL ||
237 (getitemfunc = seq->ob_type->tp_as_sequence->sq_item) == NULL)
239 PyErr_SetString(PyExc_TypeError,
240 "first argument must be a sequence");
241 return NULL;
243 /* This is now type safe */
244 for (i = 0; i < seqlen; i++) {
245 PyObject *item = getitemfunc(seq, i);
246 if (!item || !PyString_Check(item)) {
247 PyErr_SetString(PyExc_TypeError,
248 "first argument must be sequence of strings");
249 Py_DECREF(res);
250 Py_XDECREF(item);
251 return NULL;
253 slen = PyString_GET_SIZE(item);
254 while (reslen + slen + seplen >= sz) {
255 if (_PyString_Resize(&res, sz * 2)) {
256 Py_DECREF(res);
257 Py_DECREF(item);
258 return NULL;
260 sz *= 2;
261 p = PyString_AsString(res) + reslen;
263 if (i > 0) {
264 memcpy(p, sep, seplen);
265 p += seplen;
266 reslen += seplen;
268 memcpy(p, PyString_AS_STRING(item), slen);
269 p += slen;
270 reslen += slen;
271 Py_DECREF(item);
273 if (_PyString_Resize(&res, reslen)) {
274 Py_DECREF(res);
275 res = NULL;
277 return res;
281 static char find__doc__[] =
282 "find(s, sub [,start [,end]]) -> in\n\
284 Return the lowest index in s where substring sub is found,\n\
285 such that sub is contained within s[start,end]. Optional\n\
286 arguments start and end are interpreted as in slice notation.\n\
288 Return -1 on failure.";
290 static PyObject *
291 strop_find(PyObject *self, PyObject *args)
293 char *s, *sub;
294 int len, n, i = 0, last = INT_MAX;
296 if (!PyArg_ParseTuple(args, "t#t#|ii:find", &s, &len, &sub, &n, &i, &last))
297 return NULL;
299 if (last > len)
300 last = len;
301 if (last < 0)
302 last += len;
303 if (last < 0)
304 last = 0;
305 if (i < 0)
306 i += len;
307 if (i < 0)
308 i = 0;
310 if (n == 0 && i <= last)
311 return PyInt_FromLong((long)i);
313 last -= n;
314 for (; i <= last; ++i)
315 if (s[i] == sub[0] &&
316 (n == 1 || memcmp(&s[i+1], &sub[1], n-1) == 0))
317 return PyInt_FromLong((long)i);
319 return PyInt_FromLong(-1L);
323 static char rfind__doc__[] =
324 "rfind(s, sub [,start [,end]]) -> int\n\
326 Return the highest index in s where substring sub is found,\n\
327 such that sub is contained within s[start,end]. Optional\n\
328 arguments start and end are interpreted as in slice notation.\n\
330 Return -1 on failure.";
332 static PyObject *
333 strop_rfind(PyObject *self, PyObject *args)
335 char *s, *sub;
336 int len, n, j;
337 int i = 0, last = INT_MAX;
339 if (!PyArg_ParseTuple(args, "t#t#|ii:rfind", &s, &len, &sub, &n, &i, &last))
340 return NULL;
342 if (last > len)
343 last = len;
344 if (last < 0)
345 last += len;
346 if (last < 0)
347 last = 0;
348 if (i < 0)
349 i += len;
350 if (i < 0)
351 i = 0;
353 if (n == 0 && i <= last)
354 return PyInt_FromLong((long)last);
356 for (j = last-n; j >= i; --j)
357 if (s[j] == sub[0] &&
358 (n == 1 || memcmp(&s[j+1], &sub[1], n-1) == 0))
359 return PyInt_FromLong((long)j);
361 return PyInt_FromLong(-1L);
365 static PyObject *
366 do_strip(PyObject *args, int striptype)
368 char *s;
369 int len, i, j;
372 if (!PyArg_Parse(args, "t#", &s, &len))
373 return NULL;
375 i = 0;
376 if (striptype != RIGHTSTRIP) {
377 while (i < len && isspace(Py_CHARMASK(s[i]))) {
378 i++;
382 j = len;
383 if (striptype != LEFTSTRIP) {
384 do {
385 j--;
386 } while (j >= i && isspace(Py_CHARMASK(s[j])));
387 j++;
390 if (i == 0 && j == len) {
391 Py_INCREF(args);
392 return args;
394 else
395 return PyString_FromStringAndSize(s+i, j-i);
399 static char strip__doc__[] =
400 "strip(s) -> string\n\
402 Return a copy of the string s with leading and trailing\n\
403 whitespace removed.";
405 static PyObject *
406 strop_strip(PyObject *self, PyObject *args)
408 return do_strip(args, BOTHSTRIP);
412 static char lstrip__doc__[] =
413 "lstrip(s) -> string\n\
415 Return a copy of the string s with leading whitespace removed.";
417 static PyObject *
418 strop_lstrip(PyObject *self, PyObject *args)
420 return do_strip(args, LEFTSTRIP);
424 static char rstrip__doc__[] =
425 "rstrip(s) -> string\n\
427 Return a copy of the string s with trailing whitespace removed.";
429 static PyObject *
430 strop_rstrip(PyObject *self, PyObject *args)
432 return do_strip(args, RIGHTSTRIP);
436 static char lower__doc__[] =
437 "lower(s) -> string\n\
439 Return a copy of the string s converted to lowercase.";
441 static PyObject *
442 strop_lower(PyObject *self, PyObject *args)
444 char *s, *s_new;
445 int i, n;
446 PyObject *new;
447 int changed;
449 if (!PyArg_Parse(args, "t#", &s, &n))
450 return NULL;
451 new = PyString_FromStringAndSize(NULL, n);
452 if (new == NULL)
453 return NULL;
454 s_new = PyString_AsString(new);
455 changed = 0;
456 for (i = 0; i < n; i++) {
457 int c = Py_CHARMASK(*s++);
458 if (isupper(c)) {
459 changed = 1;
460 *s_new = tolower(c);
461 } else
462 *s_new = c;
463 s_new++;
465 if (!changed) {
466 Py_DECREF(new);
467 Py_INCREF(args);
468 return args;
470 return new;
474 static char upper__doc__[] =
475 "upper(s) -> string\n\
477 Return a copy of the string s converted to uppercase.";
479 static PyObject *
480 strop_upper(PyObject *self, PyObject *args)
482 char *s, *s_new;
483 int i, n;
484 PyObject *new;
485 int changed;
487 if (!PyArg_Parse(args, "t#", &s, &n))
488 return NULL;
489 new = PyString_FromStringAndSize(NULL, n);
490 if (new == NULL)
491 return NULL;
492 s_new = PyString_AsString(new);
493 changed = 0;
494 for (i = 0; i < n; i++) {
495 int c = Py_CHARMASK(*s++);
496 if (islower(c)) {
497 changed = 1;
498 *s_new = toupper(c);
499 } else
500 *s_new = c;
501 s_new++;
503 if (!changed) {
504 Py_DECREF(new);
505 Py_INCREF(args);
506 return args;
508 return new;
512 static char capitalize__doc__[] =
513 "capitalize(s) -> string\n\
515 Return a copy of the string s with only its first character\n\
516 capitalized.";
518 static PyObject *
519 strop_capitalize(PyObject *self, PyObject *args)
521 char *s, *s_new;
522 int i, n;
523 PyObject *new;
524 int changed;
526 if (!PyArg_Parse(args, "t#", &s, &n))
527 return NULL;
528 new = PyString_FromStringAndSize(NULL, n);
529 if (new == NULL)
530 return NULL;
531 s_new = PyString_AsString(new);
532 changed = 0;
533 if (0 < n) {
534 int c = Py_CHARMASK(*s++);
535 if (islower(c)) {
536 changed = 1;
537 *s_new = toupper(c);
538 } else
539 *s_new = c;
540 s_new++;
542 for (i = 1; i < n; i++) {
543 int c = Py_CHARMASK(*s++);
544 if (isupper(c)) {
545 changed = 1;
546 *s_new = tolower(c);
547 } else
548 *s_new = c;
549 s_new++;
551 if (!changed) {
552 Py_DECREF(new);
553 Py_INCREF(args);
554 return args;
556 return new;
560 static char expandtabs__doc__[] =
561 "expandtabs(string, [tabsize]) -> string\n\
563 Expand tabs in a string, i.e. replace them by one or more spaces,\n\
564 depending on the current column and the given tab size (default 8).\n\
565 The column number is reset to zero after each newline occurring in the\n\
566 string. This doesn't understand other non-printing characters.";
568 static PyObject *
569 strop_expandtabs(PyObject *self, PyObject *args)
571 /* Original by Fredrik Lundh */
572 char* e;
573 char* p;
574 char* q;
575 int i, j;
576 PyObject* out;
577 char* string;
578 int stringlen;
579 int tabsize = 8;
581 /* Get arguments */
582 if (!PyArg_ParseTuple(args, "s#|i:expandtabs", &string, &stringlen, &tabsize))
583 return NULL;
584 if (tabsize < 1) {
585 PyErr_SetString(PyExc_ValueError,
586 "tabsize must be at least 1");
587 return NULL;
590 /* First pass: determine size of output string */
591 i = j = 0; /* j: current column; i: total of previous lines */
592 e = string + stringlen;
593 for (p = string; p < e; p++) {
594 if (*p == '\t')
595 j += tabsize - (j%tabsize);
596 else {
597 j++;
598 if (*p == '\n') {
599 i += j;
600 j = 0;
605 /* Second pass: create output string and fill it */
606 out = PyString_FromStringAndSize(NULL, i+j);
607 if (out == NULL)
608 return NULL;
610 i = 0;
611 q = PyString_AS_STRING(out);
613 for (p = string; p < e; p++) {
614 if (*p == '\t') {
615 j = tabsize - (i%tabsize);
616 i += j;
617 while (j-- > 0)
618 *q++ = ' ';
619 } else {
620 *q++ = *p;
621 i++;
622 if (*p == '\n')
623 i = 0;
627 return out;
631 static char count__doc__[] =
632 "count(s, sub[, start[, end]]) -> int\n\
634 Return the number of occurrences of substring sub in string\n\
635 s[start:end]. Optional arguments start and end are\n\
636 interpreted as in slice notation.";
638 static PyObject *
639 strop_count(PyObject *self, PyObject *args)
641 char *s, *sub;
642 int len, n;
643 int i = 0, last = INT_MAX;
644 int m, r;
646 if (!PyArg_ParseTuple(args, "t#t#|ii:count", &s, &len, &sub, &n, &i, &last))
647 return NULL;
648 if (last > len)
649 last = len;
650 if (last < 0)
651 last += len;
652 if (last < 0)
653 last = 0;
654 if (i < 0)
655 i += len;
656 if (i < 0)
657 i = 0;
658 m = last + 1 - n;
659 if (n == 0)
660 return PyInt_FromLong((long) (m-i));
662 r = 0;
663 while (i < m) {
664 if (!memcmp(s+i, sub, n)) {
665 r++;
666 i += n;
667 } else {
668 i++;
671 return PyInt_FromLong((long) r);
675 static char swapcase__doc__[] =
676 "swapcase(s) -> string\n\
678 Return a copy of the string s with upper case characters\n\
679 converted to lowercase and vice versa.";
681 static PyObject *
682 strop_swapcase(PyObject *self, PyObject *args)
684 char *s, *s_new;
685 int i, n;
686 PyObject *new;
687 int changed;
689 if (!PyArg_Parse(args, "t#", &s, &n))
690 return NULL;
691 new = PyString_FromStringAndSize(NULL, n);
692 if (new == NULL)
693 return NULL;
694 s_new = PyString_AsString(new);
695 changed = 0;
696 for (i = 0; i < n; i++) {
697 int c = Py_CHARMASK(*s++);
698 if (islower(c)) {
699 changed = 1;
700 *s_new = toupper(c);
702 else if (isupper(c)) {
703 changed = 1;
704 *s_new = tolower(c);
706 else
707 *s_new = c;
708 s_new++;
710 if (!changed) {
711 Py_DECREF(new);
712 Py_INCREF(args);
713 return args;
715 return new;
719 static char atoi__doc__[] =
720 "atoi(s [,base]) -> int\n\
722 Return the integer represented by the string s in the given\n\
723 base, which defaults to 10. The string s must consist of one\n\
724 or more digits, possibly preceded by a sign. If base is 0, it\n\
725 is chosen from the leading characters of s, 0 for octal, 0x or\n\
726 0X for hexadecimal. If base is 16, a preceding 0x or 0X is\n\
727 accepted.";
729 static PyObject *
730 strop_atoi(PyObject *self, PyObject *args)
732 char *s, *end;
733 int base = 10;
734 long x;
735 char buffer[256]; /* For errors */
737 if (!PyArg_ParseTuple(args, "s|i:atoi", &s, &base))
738 return NULL;
740 if ((base != 0 && base < 2) || base > 36) {
741 PyErr_SetString(PyExc_ValueError, "invalid base for atoi()");
742 return NULL;
745 while (*s && isspace(Py_CHARMASK(*s)))
746 s++;
747 errno = 0;
748 if (base == 0 && s[0] == '0')
749 x = (long) PyOS_strtoul(s, &end, base);
750 else
751 x = PyOS_strtol(s, &end, base);
752 if (end == s || !isalnum(end[-1]))
753 goto bad;
754 while (*end && isspace(Py_CHARMASK(*end)))
755 end++;
756 if (*end != '\0') {
757 bad:
758 sprintf(buffer, "invalid literal for atoi(): %.200s", s);
759 PyErr_SetString(PyExc_ValueError, buffer);
760 return NULL;
762 else if (errno != 0) {
763 sprintf(buffer, "atoi() literal too large: %.200s", s);
764 PyErr_SetString(PyExc_ValueError, buffer);
765 return NULL;
767 return PyInt_FromLong(x);
771 static char atol__doc__[] =
772 "atol(s [,base]) -> long\n\
774 Return the long integer represented by the string s in the\n\
775 given base, which defaults to 10. The string s must consist\n\
776 of one or more digits, possibly preceded by a sign. If base\n\
777 is 0, it is chosen from the leading characters of s, 0 for\n\
778 octal, 0x or 0X for hexadecimal. If base is 16, a preceding\n\
779 0x or 0X is accepted. A trailing L or l is not accepted,\n\
780 unless base is 0.";
782 static PyObject *
783 strop_atol(PyObject *self, PyObject *args)
785 char *s, *end;
786 int base = 10;
787 PyObject *x;
788 char buffer[256]; /* For errors */
790 if (!PyArg_ParseTuple(args, "s|i:atol", &s, &base))
791 return NULL;
793 if ((base != 0 && base < 2) || base > 36) {
794 PyErr_SetString(PyExc_ValueError, "invalid base for atol()");
795 return NULL;
798 while (*s && isspace(Py_CHARMASK(*s)))
799 s++;
800 if (s[0] == '\0') {
801 PyErr_SetString(PyExc_ValueError, "empty string for atol()");
802 return NULL;
804 x = PyLong_FromString(s, &end, base);
805 if (x == NULL)
806 return NULL;
807 if (base == 0 && (*end == 'l' || *end == 'L'))
808 end++;
809 while (*end && isspace(Py_CHARMASK(*end)))
810 end++;
811 if (*end != '\0') {
812 sprintf(buffer, "invalid literal for atol(): %.200s", s);
813 PyErr_SetString(PyExc_ValueError, buffer);
814 Py_DECREF(x);
815 return NULL;
817 return x;
821 static char atof__doc__[] =
822 "atof(s) -> float\n\
824 Return the floating point number represented by the string s.";
826 static PyObject *
827 strop_atof(PyObject *self, PyObject *args)
829 extern double strtod(const char *, char **);
830 char *s, *end;
831 double x;
832 char buffer[256]; /* For errors */
834 if (!PyArg_ParseTuple(args, "s:atof", &s))
835 return NULL;
836 while (*s && isspace(Py_CHARMASK(*s)))
837 s++;
838 if (s[0] == '\0') {
839 PyErr_SetString(PyExc_ValueError, "empty string for atof()");
840 return NULL;
842 errno = 0;
843 PyFPE_START_PROTECT("strop_atof", return 0)
844 x = strtod(s, &end);
845 PyFPE_END_PROTECT(x)
846 while (*end && isspace(Py_CHARMASK(*end)))
847 end++;
848 if (*end != '\0') {
849 sprintf(buffer, "invalid literal for atof(): %.200s", s);
850 PyErr_SetString(PyExc_ValueError, buffer);
851 return NULL;
853 else if (errno != 0) {
854 sprintf(buffer, "atof() literal too large: %.200s", s);
855 PyErr_SetString(PyExc_ValueError, buffer);
856 return NULL;
858 return PyFloat_FromDouble(x);
862 static char maketrans__doc__[] =
863 "maketrans(frm, to) -> string\n\
865 Return a translation table (a string of 256 bytes long)\n\
866 suitable for use in string.translate. The strings frm and to\n\
867 must be of the same length.";
869 static PyObject *
870 strop_maketrans(PyObject *self, PyObject *args)
872 unsigned char *c, *from=NULL, *to=NULL;
873 int i, fromlen=0, tolen=0;
874 PyObject *result;
876 if (!PyArg_ParseTuple(args, "t#t#:maketrans", &from, &fromlen, &to, &tolen))
877 return NULL;
879 if (fromlen != tolen) {
880 PyErr_SetString(PyExc_ValueError,
881 "maketrans arguments must have same length");
882 return NULL;
885 result = PyString_FromStringAndSize((char *)NULL, 256);
886 if (result == NULL)
887 return NULL;
888 c = (unsigned char *) PyString_AS_STRING((PyStringObject *)result);
889 for (i = 0; i < 256; i++)
890 c[i]=(unsigned char)i;
891 for (i = 0; i < fromlen; i++)
892 c[from[i]]=to[i];
894 return result;
898 static char translate__doc__[] =
899 "translate(s,table [,deletechars]) -> string\n\
901 Return a copy of the string s, where all characters occurring\n\
902 in the optional argument deletechars are removed, and the\n\
903 remaining characters have been mapped through the given\n\
904 translation table, which must be a string of length 256.";
906 static PyObject *
907 strop_translate(PyObject *self, PyObject *args)
909 register char *input, *table, *output;
910 register int i, c, changed = 0;
911 PyObject *input_obj;
912 char *table1, *output_start, *del_table=NULL;
913 int inlen, tablen, dellen = 0;
914 PyObject *result;
915 int trans_table[256];
917 if (!PyArg_ParseTuple(args, "St#|t#:translate", &input_obj,
918 &table1, &tablen, &del_table, &dellen))
919 return NULL;
920 if (tablen != 256) {
921 PyErr_SetString(PyExc_ValueError,
922 "translation table must be 256 characters long");
923 return NULL;
926 table = table1;
927 inlen = PyString_Size(input_obj);
928 result = PyString_FromStringAndSize((char *)NULL, inlen);
929 if (result == NULL)
930 return NULL;
931 output_start = output = PyString_AsString(result);
932 input = PyString_AsString(input_obj);
934 if (dellen == 0) {
935 /* If no deletions are required, use faster code */
936 for (i = inlen; --i >= 0; ) {
937 c = Py_CHARMASK(*input++);
938 if (Py_CHARMASK((*output++ = table[c])) != c)
939 changed = 1;
941 if (changed)
942 return result;
943 Py_DECREF(result);
944 Py_INCREF(input_obj);
945 return input_obj;
948 for (i = 0; i < 256; i++)
949 trans_table[i] = Py_CHARMASK(table[i]);
951 for (i = 0; i < dellen; i++)
952 trans_table[(int) Py_CHARMASK(del_table[i])] = -1;
954 for (i = inlen; --i >= 0; ) {
955 c = Py_CHARMASK(*input++);
956 if (trans_table[c] != -1)
957 if (Py_CHARMASK(*output++ = (char)trans_table[c]) == c)
958 continue;
959 changed = 1;
961 if (!changed) {
962 Py_DECREF(result);
963 Py_INCREF(input_obj);
964 return input_obj;
966 /* Fix the size of the resulting string */
967 if (inlen > 0 &&_PyString_Resize(&result, output-output_start))
968 return NULL;
969 return result;
973 /* What follows is used for implementing replace(). Perry Stoll. */
976 mymemfind
978 strstr replacement for arbitrary blocks of memory.
980 Locates the first occurrence in the memory pointed to by MEM of the
981 contents of memory pointed to by PAT. Returns the index into MEM if
982 found, or -1 if not found. If len of PAT is greater than length of
983 MEM, the function returns -1.
985 static int mymemfind(char *mem, int len, char *pat, int pat_len)
987 register int ii;
989 /* pattern can not occur in the last pat_len-1 chars */
990 len -= pat_len;
992 for (ii = 0; ii <= len; ii++) {
993 if (mem[ii] == pat[0] &&
994 (pat_len == 1 ||
995 memcmp(&mem[ii+1], &pat[1], pat_len-1) == 0)) {
996 return ii;
999 return -1;
1003 mymemcnt
1005 Return the number of distinct times PAT is found in MEM.
1006 meaning mem=1111 and pat==11 returns 2.
1007 mem=11111 and pat==11 also return 2.
1009 static int mymemcnt(char *mem, int len, char *pat, int pat_len)
1011 register int offset = 0;
1012 int nfound = 0;
1014 while (len >= 0) {
1015 offset = mymemfind(mem, len, pat, pat_len);
1016 if (offset == -1)
1017 break;
1018 mem += offset + pat_len;
1019 len -= offset + pat_len;
1020 nfound++;
1022 return nfound;
1026 mymemreplace
1028 Return a string in which all occurrences of PAT in memory STR are
1029 replaced with SUB.
1031 If length of PAT is less than length of STR or there are no occurrences
1032 of PAT in STR, then the original string is returned. Otherwise, a new
1033 string is allocated here and returned.
1035 on return, out_len is:
1036 the length of output string, or
1037 -1 if the input string is returned, or
1038 unchanged if an error occurs (no memory).
1040 return value is:
1041 the new string allocated locally, or
1042 NULL if an error occurred.
1044 static char *mymemreplace(char *str, int len, char *pat, int pat_len, char *sub, int sub_len, int count, int *out_len)
1046 char *out_s;
1047 char *new_s;
1048 int nfound, offset, new_len;
1050 if (len == 0 || pat_len > len)
1051 goto return_same;
1053 /* find length of output string */
1054 nfound = mymemcnt(str, len, pat, pat_len);
1055 if (count > 0)
1056 nfound = nfound > count ? count : nfound;
1057 if (nfound == 0)
1058 goto return_same;
1059 new_len = len + nfound*(sub_len - pat_len);
1061 new_s = (char *)PyMem_MALLOC(new_len);
1062 if (new_s == NULL) return NULL;
1064 *out_len = new_len;
1065 out_s = new_s;
1067 while (len > 0) {
1068 /* find index of next instance of pattern */
1069 offset = mymemfind(str, len, pat, pat_len);
1070 /* if not found, break out of loop */
1071 if (offset == -1) break;
1073 /* copy non matching part of input string */
1074 memcpy(new_s, str, offset); /* copy part of str before pat */
1075 str += offset + pat_len; /* move str past pattern */
1076 len -= offset + pat_len; /* reduce length of str remaining */
1078 /* copy substitute into the output string */
1079 new_s += offset; /* move new_s to dest for sub string */
1080 memcpy(new_s, sub, sub_len); /* copy substring into new_s */
1081 new_s += sub_len; /* offset new_s past sub string */
1083 /* break when we've done count replacements */
1084 if (--count == 0) break;
1086 /* copy any remaining values into output string */
1087 if (len > 0)
1088 memcpy(new_s, str, len);
1089 return out_s;
1091 return_same:
1092 *out_len = -1;
1093 return str;
1097 static char replace__doc__[] =
1098 "replace (str, old, new[, maxsplit]) -> string\n\
1100 Return a copy of string str with all occurrences of substring\n\
1101 old replaced by new. If the optional argument maxsplit is\n\
1102 given, only the first maxsplit occurrences are replaced.";
1104 static PyObject *
1105 strop_replace(PyObject *self, PyObject *args)
1107 char *str, *pat,*sub,*new_s;
1108 int len,pat_len,sub_len,out_len;
1109 int count = 0;
1110 PyObject *new;
1112 if (!PyArg_ParseTuple(args, "t#t#t#|i:replace",
1113 &str, &len, &pat, &pat_len, &sub, &sub_len,
1114 &count))
1115 return NULL;
1116 if (pat_len <= 0) {
1117 PyErr_SetString(PyExc_ValueError, "empty pattern string");
1118 return NULL;
1120 new_s = mymemreplace(str,len,pat,pat_len,sub,sub_len,count,&out_len);
1121 if (new_s == NULL) {
1122 PyErr_NoMemory();
1123 return NULL;
1125 if (out_len == -1) {
1126 /* we're returning another reference to the input string */
1127 new = PyTuple_GetItem(args, 0);
1128 Py_XINCREF(new);
1130 else {
1131 new = PyString_FromStringAndSize(new_s, out_len);
1132 PyMem_FREE(new_s);
1134 return new;
1138 /* List of functions defined in the module */
1140 static PyMethodDef
1141 strop_methods[] = {
1142 {"atof", strop_atof,
1143 METH_VARARGS, atof__doc__},
1144 {"atoi", strop_atoi,
1145 METH_VARARGS, atoi__doc__},
1146 {"atol", strop_atol,
1147 METH_VARARGS, atol__doc__},
1148 {"capitalize", strop_capitalize,
1149 METH_OLDARGS, capitalize__doc__},
1150 {"count", strop_count,
1151 METH_VARARGS, count__doc__},
1152 {"expandtabs", strop_expandtabs,
1153 METH_VARARGS, expandtabs__doc__},
1154 {"find", strop_find,
1155 METH_VARARGS, find__doc__},
1156 {"join", strop_joinfields,
1157 METH_VARARGS, joinfields__doc__},
1158 {"joinfields", strop_joinfields,
1159 METH_VARARGS, joinfields__doc__},
1160 {"lstrip", strop_lstrip,
1161 METH_OLDARGS, lstrip__doc__},
1162 {"lower", strop_lower,
1163 METH_OLDARGS, lower__doc__},
1164 {"maketrans", strop_maketrans,
1165 METH_VARARGS, maketrans__doc__},
1166 {"replace", strop_replace,
1167 METH_VARARGS, replace__doc__},
1168 {"rfind", strop_rfind,
1169 METH_VARARGS, rfind__doc__},
1170 {"rstrip", strop_rstrip,
1171 METH_OLDARGS, rstrip__doc__},
1172 {"split", strop_splitfields,
1173 METH_VARARGS, splitfields__doc__},
1174 {"splitfields", strop_splitfields,
1175 METH_VARARGS, splitfields__doc__},
1176 {"strip", strop_strip,
1177 METH_OLDARGS, strip__doc__},
1178 {"swapcase", strop_swapcase,
1179 METH_OLDARGS, swapcase__doc__},
1180 {"translate", strop_translate,
1181 METH_VARARGS, translate__doc__},
1182 {"upper", strop_upper,
1183 METH_OLDARGS, upper__doc__},
1184 {NULL, NULL} /* sentinel */
1188 DL_EXPORT(void)
1189 initstrop(void)
1191 PyObject *m, *d, *s;
1192 char buf[256];
1193 int c, n;
1194 m = Py_InitModule4("strop", strop_methods, strop_module__doc__,
1195 (PyObject*)NULL, PYTHON_API_VERSION);
1196 d = PyModule_GetDict(m);
1198 /* Create 'whitespace' object */
1199 n = 0;
1200 for (c = 0; c < 256; c++) {
1201 if (isspace(c))
1202 buf[n++] = c;
1204 s = PyString_FromStringAndSize(buf, n);
1205 if (s) {
1206 PyDict_SetItemString(d, "whitespace", s);
1207 Py_DECREF(s);
1209 /* Create 'lowercase' object */
1210 n = 0;
1211 for (c = 0; c < 256; c++) {
1212 if (islower(c))
1213 buf[n++] = c;
1215 s = PyString_FromStringAndSize(buf, n);
1216 if (s) {
1217 PyDict_SetItemString(d, "lowercase", s);
1218 Py_DECREF(s);
1221 /* Create 'uppercase' object */
1222 n = 0;
1223 for (c = 0; c < 256; c++) {
1224 if (isupper(c))
1225 buf[n++] = c;
1227 s = PyString_FromStringAndSize(buf, n);
1228 if (s) {
1229 PyDict_SetItemString(d, "uppercase", s);
1230 Py_DECREF(s);