Remove a ?? in the description of Mac OS support.
[python/dscho.git] / Modules / stropmodule.c
blobb8f7519f2dfee07933a60e3ce3aa79af9a7e2314
2 /* strop module */
4 static char strop_module__doc__[] =
5 "Common string manipulations, optimized for speed.\n\
6 \n\
7 Always use \"import string\" rather than referencing\n\
8 this module directly.";
10 #include "Python.h"
12 #ifdef HAVE_LIMITS_H
13 #include <limits.h>
14 #else
15 #define INT_MAX 2147483647
16 #endif
18 #include <ctype.h>
19 /* XXX This file assumes that the <ctype.h> is*() functions
20 XXX are defined for all 8-bit characters! */
22 /* The lstrip(), rstrip() and strip() functions are implemented
23 in do_strip(), which uses an additional parameter to indicate what
24 type of strip should occur. */
26 #define LEFTSTRIP 0
27 #define RIGHTSTRIP 1
28 #define BOTHSTRIP 2
31 static PyObject *
32 split_whitespace(char *s, int len, int maxsplit)
34 int i = 0, j, err;
35 int countsplit = 0;
36 PyObject* item;
37 PyObject *list = PyList_New(0);
39 if (list == NULL)
40 return NULL;
42 while (i < len) {
43 while (i < len && isspace(Py_CHARMASK(s[i]))) {
44 i = i+1;
46 j = i;
47 while (i < len && !isspace(Py_CHARMASK(s[i]))) {
48 i = i+1;
50 if (j < i) {
51 item = PyString_FromStringAndSize(s+j, (int)(i-j));
52 if (item == NULL)
53 goto finally;
55 err = PyList_Append(list, item);
56 Py_DECREF(item);
57 if (err < 0)
58 goto finally;
60 countsplit++;
61 while (i < len && isspace(Py_CHARMASK(s[i]))) {
62 i = i+1;
64 if (maxsplit && (countsplit >= maxsplit) && i < len) {
65 item = PyString_FromStringAndSize(
66 s+i, (int)(len - i));
67 if (item == NULL)
68 goto finally;
70 err = PyList_Append(list, item);
71 Py_DECREF(item);
72 if (err < 0)
73 goto finally;
75 i = len;
79 return list;
80 finally:
81 Py_DECREF(list);
82 return NULL;
86 static char splitfields__doc__[] =
87 "split(s [,sep [,maxsplit]]) -> list of strings\n\
88 splitfields(s [,sep [,maxsplit]]) -> list of strings\n\
89 \n\
90 Return a list of the words in the string s, using sep as the\n\
91 delimiter string. If maxsplit is nonzero, splits into at most\n\
92 maxsplit words. If sep is not specified, any whitespace string\n\
93 is a separator. Maxsplit defaults to 0.\n\
94 \n\
95 (split and splitfields are synonymous)";
97 static PyObject *
98 strop_splitfields(PyObject *self, PyObject *args)
100 int len, n, i, j, err;
101 int splitcount, maxsplit;
102 char *s, *sub;
103 PyObject *list, *item;
105 sub = NULL;
106 n = 0;
107 splitcount = 0;
108 maxsplit = 0;
109 if (!PyArg_ParseTuple(args, "t#|z#i:split", &s, &len, &sub, &n, &maxsplit))
110 return NULL;
111 if (sub == NULL)
112 return split_whitespace(s, len, maxsplit);
113 if (n == 0) {
114 PyErr_SetString(PyExc_ValueError, "empty separator");
115 return NULL;
118 list = PyList_New(0);
119 if (list == NULL)
120 return NULL;
122 i = j = 0;
123 while (i+n <= len) {
124 if (s[i] == sub[0] && (n == 1 || memcmp(s+i, sub, n) == 0)) {
125 item = PyString_FromStringAndSize(s+j, (int)(i-j));
126 if (item == NULL)
127 goto fail;
128 err = PyList_Append(list, item);
129 Py_DECREF(item);
130 if (err < 0)
131 goto fail;
132 i = j = i + n;
133 splitcount++;
134 if (maxsplit && (splitcount >= maxsplit))
135 break;
137 else
138 i++;
140 item = PyString_FromStringAndSize(s+j, (int)(len-j));
141 if (item == NULL)
142 goto fail;
143 err = PyList_Append(list, item);
144 Py_DECREF(item);
145 if (err < 0)
146 goto fail;
148 return list;
150 fail:
151 Py_DECREF(list);
152 return NULL;
156 static char joinfields__doc__[] =
157 "join(list [,sep]) -> string\n\
158 joinfields(list [,sep]) -> string\n\
160 Return a string composed of the words in list, with\n\
161 intervening occurrences of sep. Sep defaults to a single\n\
162 space.\n\
164 (join and joinfields are synonymous)";
166 static PyObject *
167 strop_joinfields(PyObject *self, PyObject *args)
169 PyObject *seq;
170 char *sep = NULL;
171 int seqlen, seplen = 0;
172 int i, reslen = 0, slen = 0, sz = 100;
173 PyObject *res = NULL;
174 char* p = NULL;
175 intargfunc getitemfunc;
177 if (!PyArg_ParseTuple(args, "O|t#:join", &seq, &sep, &seplen))
178 return NULL;
179 if (sep == NULL) {
180 sep = " ";
181 seplen = 1;
184 seqlen = PySequence_Size(seq);
185 if (seqlen < 0 && PyErr_Occurred())
186 return NULL;
188 if (seqlen == 1) {
189 /* Optimization if there's only one item */
190 PyObject *item = PySequence_GetItem(seq, 0);
191 if (item && !PyString_Check(item)) {
192 PyErr_SetString(PyExc_TypeError,
193 "first argument must be sequence of strings");
194 Py_DECREF(item);
195 return NULL;
197 return item;
200 if (!(res = PyString_FromStringAndSize((char*)NULL, sz)))
201 return NULL;
202 p = PyString_AsString(res);
204 /* optimize for lists, since it's the most common case. all others
205 * (tuples and arbitrary sequences) just use the sequence abstract
206 * interface.
208 if (PyList_Check(seq)) {
209 for (i = 0; i < seqlen; i++) {
210 PyObject *item = PyList_GET_ITEM(seq, i);
211 if (!PyString_Check(item)) {
212 PyErr_SetString(PyExc_TypeError,
213 "first argument must be sequence of strings");
214 Py_DECREF(res);
215 return NULL;
217 slen = PyString_GET_SIZE(item);
218 while (reslen + slen + seplen >= sz) {
219 if (_PyString_Resize(&res, sz * 2)) {
220 Py_DECREF(res);
221 return NULL;
223 sz *= 2;
224 p = PyString_AsString(res) + reslen;
226 if (i > 0) {
227 memcpy(p, sep, seplen);
228 p += seplen;
229 reslen += seplen;
231 memcpy(p, PyString_AS_STRING(item), slen);
232 p += slen;
233 reslen += slen;
235 if (_PyString_Resize(&res, reslen)) {
236 Py_DECREF(res);
237 res = NULL;
239 return res;
242 if (seq->ob_type->tp_as_sequence == NULL ||
243 (getitemfunc = seq->ob_type->tp_as_sequence->sq_item) == NULL)
245 PyErr_SetString(PyExc_TypeError,
246 "first argument must be a sequence");
247 return NULL;
249 /* This is now type safe */
250 for (i = 0; i < seqlen; i++) {
251 PyObject *item = getitemfunc(seq, i);
252 if (!item || !PyString_Check(item)) {
253 PyErr_SetString(PyExc_TypeError,
254 "first argument must be sequence of strings");
255 Py_DECREF(res);
256 Py_XDECREF(item);
257 return NULL;
259 slen = PyString_GET_SIZE(item);
260 while (reslen + slen + seplen >= sz) {
261 if (_PyString_Resize(&res, sz * 2)) {
262 Py_DECREF(res);
263 Py_DECREF(item);
264 return NULL;
266 sz *= 2;
267 p = PyString_AsString(res) + reslen;
269 if (i > 0) {
270 memcpy(p, sep, seplen);
271 p += seplen;
272 reslen += seplen;
274 memcpy(p, PyString_AS_STRING(item), slen);
275 p += slen;
276 reslen += slen;
277 Py_DECREF(item);
279 if (_PyString_Resize(&res, reslen)) {
280 Py_DECREF(res);
281 res = NULL;
283 return res;
287 static char find__doc__[] =
288 "find(s, sub [,start [,end]]) -> in\n\
290 Return the lowest index in s where substring sub is found,\n\
291 such that sub is contained within s[start,end]. Optional\n\
292 arguments start and end are interpreted as in slice notation.\n\
294 Return -1 on failure.";
296 static PyObject *
297 strop_find(PyObject *self, PyObject *args)
299 char *s, *sub;
300 int len, n, i = 0, last = INT_MAX;
302 if (!PyArg_ParseTuple(args, "t#t#|ii:find", &s, &len, &sub, &n, &i, &last))
303 return NULL;
305 if (last > len)
306 last = len;
307 if (last < 0)
308 last += len;
309 if (last < 0)
310 last = 0;
311 if (i < 0)
312 i += len;
313 if (i < 0)
314 i = 0;
316 if (n == 0 && i <= last)
317 return PyInt_FromLong((long)i);
319 last -= n;
320 for (; i <= last; ++i)
321 if (s[i] == sub[0] &&
322 (n == 1 || memcmp(&s[i+1], &sub[1], n-1) == 0))
323 return PyInt_FromLong((long)i);
325 return PyInt_FromLong(-1L);
329 static char rfind__doc__[] =
330 "rfind(s, sub [,start [,end]]) -> int\n\
332 Return the highest index in s where substring sub is found,\n\
333 such that sub is contained within s[start,end]. Optional\n\
334 arguments start and end are interpreted as in slice notation.\n\
336 Return -1 on failure.";
338 static PyObject *
339 strop_rfind(PyObject *self, PyObject *args)
341 char *s, *sub;
342 int len, n, j;
343 int i = 0, last = INT_MAX;
345 if (!PyArg_ParseTuple(args, "t#t#|ii:rfind", &s, &len, &sub, &n, &i, &last))
346 return NULL;
348 if (last > len)
349 last = len;
350 if (last < 0)
351 last += len;
352 if (last < 0)
353 last = 0;
354 if (i < 0)
355 i += len;
356 if (i < 0)
357 i = 0;
359 if (n == 0 && i <= last)
360 return PyInt_FromLong((long)last);
362 for (j = last-n; j >= i; --j)
363 if (s[j] == sub[0] &&
364 (n == 1 || memcmp(&s[j+1], &sub[1], n-1) == 0))
365 return PyInt_FromLong((long)j);
367 return PyInt_FromLong(-1L);
371 static PyObject *
372 do_strip(PyObject *args, int striptype)
374 char *s;
375 int len, i, j;
378 if (!PyArg_Parse(args, "t#", &s, &len))
379 return NULL;
381 i = 0;
382 if (striptype != RIGHTSTRIP) {
383 while (i < len && isspace(Py_CHARMASK(s[i]))) {
384 i++;
388 j = len;
389 if (striptype != LEFTSTRIP) {
390 do {
391 j--;
392 } while (j >= i && isspace(Py_CHARMASK(s[j])));
393 j++;
396 if (i == 0 && j == len) {
397 Py_INCREF(args);
398 return args;
400 else
401 return PyString_FromStringAndSize(s+i, j-i);
405 static char strip__doc__[] =
406 "strip(s) -> string\n\
408 Return a copy of the string s with leading and trailing\n\
409 whitespace removed.";
411 static PyObject *
412 strop_strip(PyObject *self, PyObject *args)
414 return do_strip(args, BOTHSTRIP);
418 static char lstrip__doc__[] =
419 "lstrip(s) -> string\n\
421 Return a copy of the string s with leading whitespace removed.";
423 static PyObject *
424 strop_lstrip(PyObject *self, PyObject *args)
426 return do_strip(args, LEFTSTRIP);
430 static char rstrip__doc__[] =
431 "rstrip(s) -> string\n\
433 Return a copy of the string s with trailing whitespace removed.";
435 static PyObject *
436 strop_rstrip(PyObject *self, PyObject *args)
438 return do_strip(args, RIGHTSTRIP);
442 static char lower__doc__[] =
443 "lower(s) -> string\n\
445 Return a copy of the string s converted to lowercase.";
447 static PyObject *
448 strop_lower(PyObject *self, PyObject *args)
450 char *s, *s_new;
451 int i, n;
452 PyObject *new;
453 int changed;
455 if (!PyArg_Parse(args, "t#", &s, &n))
456 return NULL;
457 new = PyString_FromStringAndSize(NULL, n);
458 if (new == NULL)
459 return NULL;
460 s_new = PyString_AsString(new);
461 changed = 0;
462 for (i = 0; i < n; i++) {
463 int c = Py_CHARMASK(*s++);
464 if (isupper(c)) {
465 changed = 1;
466 *s_new = tolower(c);
467 } else
468 *s_new = c;
469 s_new++;
471 if (!changed) {
472 Py_DECREF(new);
473 Py_INCREF(args);
474 return args;
476 return new;
480 static char upper__doc__[] =
481 "upper(s) -> string\n\
483 Return a copy of the string s converted to uppercase.";
485 static PyObject *
486 strop_upper(PyObject *self, PyObject *args)
488 char *s, *s_new;
489 int i, n;
490 PyObject *new;
491 int changed;
493 if (!PyArg_Parse(args, "t#", &s, &n))
494 return NULL;
495 new = PyString_FromStringAndSize(NULL, n);
496 if (new == NULL)
497 return NULL;
498 s_new = PyString_AsString(new);
499 changed = 0;
500 for (i = 0; i < n; i++) {
501 int c = Py_CHARMASK(*s++);
502 if (islower(c)) {
503 changed = 1;
504 *s_new = toupper(c);
505 } else
506 *s_new = c;
507 s_new++;
509 if (!changed) {
510 Py_DECREF(new);
511 Py_INCREF(args);
512 return args;
514 return new;
518 static char capitalize__doc__[] =
519 "capitalize(s) -> string\n\
521 Return a copy of the string s with only its first character\n\
522 capitalized.";
524 static PyObject *
525 strop_capitalize(PyObject *self, PyObject *args)
527 char *s, *s_new;
528 int i, n;
529 PyObject *new;
530 int changed;
532 if (!PyArg_Parse(args, "t#", &s, &n))
533 return NULL;
534 new = PyString_FromStringAndSize(NULL, n);
535 if (new == NULL)
536 return NULL;
537 s_new = PyString_AsString(new);
538 changed = 0;
539 if (0 < n) {
540 int c = Py_CHARMASK(*s++);
541 if (islower(c)) {
542 changed = 1;
543 *s_new = toupper(c);
544 } else
545 *s_new = c;
546 s_new++;
548 for (i = 1; i < n; i++) {
549 int c = Py_CHARMASK(*s++);
550 if (isupper(c)) {
551 changed = 1;
552 *s_new = tolower(c);
553 } else
554 *s_new = c;
555 s_new++;
557 if (!changed) {
558 Py_DECREF(new);
559 Py_INCREF(args);
560 return args;
562 return new;
566 static char expandtabs__doc__[] =
567 "expandtabs(string, [tabsize]) -> string\n\
569 Expand tabs in a string, i.e. replace them by one or more spaces,\n\
570 depending on the current column and the given tab size (default 8).\n\
571 The column number is reset to zero after each newline occurring in the\n\
572 string. This doesn't understand other non-printing characters.";
574 static PyObject *
575 strop_expandtabs(PyObject *self, PyObject *args)
577 /* Original by Fredrik Lundh */
578 char* e;
579 char* p;
580 char* q;
581 int i, j;
582 PyObject* out;
583 char* string;
584 int stringlen;
585 int tabsize = 8;
587 /* Get arguments */
588 if (!PyArg_ParseTuple(args, "s#|i:expandtabs", &string, &stringlen, &tabsize))
589 return NULL;
590 if (tabsize < 1) {
591 PyErr_SetString(PyExc_ValueError,
592 "tabsize must be at least 1");
593 return NULL;
596 /* First pass: determine size of output string */
597 i = j = 0; /* j: current column; i: total of previous lines */
598 e = string + stringlen;
599 for (p = string; p < e; p++) {
600 if (*p == '\t')
601 j += tabsize - (j%tabsize);
602 else {
603 j++;
604 if (*p == '\n') {
605 i += j;
606 j = 0;
611 /* Second pass: create output string and fill it */
612 out = PyString_FromStringAndSize(NULL, i+j);
613 if (out == NULL)
614 return NULL;
616 i = 0;
617 q = PyString_AS_STRING(out);
619 for (p = string; p < e; p++) {
620 if (*p == '\t') {
621 j = tabsize - (i%tabsize);
622 i += j;
623 while (j-- > 0)
624 *q++ = ' ';
625 } else {
626 *q++ = *p;
627 i++;
628 if (*p == '\n')
629 i = 0;
633 return out;
637 static char count__doc__[] =
638 "count(s, sub[, start[, end]]) -> int\n\
640 Return the number of occurrences of substring sub in string\n\
641 s[start:end]. Optional arguments start and end are\n\
642 interpreted as in slice notation.";
644 static PyObject *
645 strop_count(PyObject *self, PyObject *args)
647 char *s, *sub;
648 int len, n;
649 int i = 0, last = INT_MAX;
650 int m, r;
652 if (!PyArg_ParseTuple(args, "t#t#|ii:count", &s, &len, &sub, &n, &i, &last))
653 return NULL;
654 if (last > len)
655 last = len;
656 if (last < 0)
657 last += len;
658 if (last < 0)
659 last = 0;
660 if (i < 0)
661 i += len;
662 if (i < 0)
663 i = 0;
664 m = last + 1 - n;
665 if (n == 0)
666 return PyInt_FromLong((long) (m-i));
668 r = 0;
669 while (i < m) {
670 if (!memcmp(s+i, sub, n)) {
671 r++;
672 i += n;
673 } else {
674 i++;
677 return PyInt_FromLong((long) r);
681 static char swapcase__doc__[] =
682 "swapcase(s) -> string\n\
684 Return a copy of the string s with upper case characters\n\
685 converted to lowercase and vice versa.";
687 static PyObject *
688 strop_swapcase(PyObject *self, PyObject *args)
690 char *s, *s_new;
691 int i, n;
692 PyObject *new;
693 int changed;
695 if (!PyArg_Parse(args, "t#", &s, &n))
696 return NULL;
697 new = PyString_FromStringAndSize(NULL, n);
698 if (new == NULL)
699 return NULL;
700 s_new = PyString_AsString(new);
701 changed = 0;
702 for (i = 0; i < n; i++) {
703 int c = Py_CHARMASK(*s++);
704 if (islower(c)) {
705 changed = 1;
706 *s_new = toupper(c);
708 else if (isupper(c)) {
709 changed = 1;
710 *s_new = tolower(c);
712 else
713 *s_new = c;
714 s_new++;
716 if (!changed) {
717 Py_DECREF(new);
718 Py_INCREF(args);
719 return args;
721 return new;
725 static char atoi__doc__[] =
726 "atoi(s [,base]) -> int\n\
728 Return the integer represented by the string s in the given\n\
729 base, which defaults to 10. The string s must consist of one\n\
730 or more digits, possibly preceded by a sign. If base is 0, it\n\
731 is chosen from the leading characters of s, 0 for octal, 0x or\n\
732 0X for hexadecimal. If base is 16, a preceding 0x or 0X is\n\
733 accepted.";
735 static PyObject *
736 strop_atoi(PyObject *self, PyObject *args)
738 char *s, *end;
739 int base = 10;
740 long x;
741 char buffer[256]; /* For errors */
743 if (!PyArg_ParseTuple(args, "s|i:atoi", &s, &base))
744 return NULL;
746 if ((base != 0 && base < 2) || base > 36) {
747 PyErr_SetString(PyExc_ValueError, "invalid base for atoi()");
748 return NULL;
751 while (*s && isspace(Py_CHARMASK(*s)))
752 s++;
753 errno = 0;
754 if (base == 0 && s[0] == '0')
755 x = (long) PyOS_strtoul(s, &end, base);
756 else
757 x = PyOS_strtol(s, &end, base);
758 if (end == s || !isalnum(end[-1]))
759 goto bad;
760 while (*end && isspace(Py_CHARMASK(*end)))
761 end++;
762 if (*end != '\0') {
763 bad:
764 sprintf(buffer, "invalid literal for atoi(): %.200s", s);
765 PyErr_SetString(PyExc_ValueError, buffer);
766 return NULL;
768 else if (errno != 0) {
769 sprintf(buffer, "atoi() literal too large: %.200s", s);
770 PyErr_SetString(PyExc_ValueError, buffer);
771 return NULL;
773 return PyInt_FromLong(x);
777 static char atol__doc__[] =
778 "atol(s [,base]) -> long\n\
780 Return the long integer represented by the string s in the\n\
781 given base, which defaults to 10. The string s must consist\n\
782 of one or more digits, possibly preceded by a sign. If base\n\
783 is 0, it is chosen from the leading characters of s, 0 for\n\
784 octal, 0x or 0X for hexadecimal. If base is 16, a preceding\n\
785 0x or 0X is accepted. A trailing L or l is not accepted,\n\
786 unless base is 0.";
788 static PyObject *
789 strop_atol(PyObject *self, PyObject *args)
791 char *s, *end;
792 int base = 10;
793 PyObject *x;
794 char buffer[256]; /* For errors */
796 if (!PyArg_ParseTuple(args, "s|i:atol", &s, &base))
797 return NULL;
799 if ((base != 0 && base < 2) || base > 36) {
800 PyErr_SetString(PyExc_ValueError, "invalid base for atol()");
801 return NULL;
804 while (*s && isspace(Py_CHARMASK(*s)))
805 s++;
806 if (s[0] == '\0') {
807 PyErr_SetString(PyExc_ValueError, "empty string for atol()");
808 return NULL;
810 x = PyLong_FromString(s, &end, base);
811 if (x == NULL)
812 return NULL;
813 if (base == 0 && (*end == 'l' || *end == 'L'))
814 end++;
815 while (*end && isspace(Py_CHARMASK(*end)))
816 end++;
817 if (*end != '\0') {
818 sprintf(buffer, "invalid literal for atol(): %.200s", s);
819 PyErr_SetString(PyExc_ValueError, buffer);
820 Py_DECREF(x);
821 return NULL;
823 return x;
827 static char atof__doc__[] =
828 "atof(s) -> float\n\
830 Return the floating point number represented by the string s.";
832 static PyObject *
833 strop_atof(PyObject *self, PyObject *args)
835 extern double strtod(const char *, char **);
836 char *s, *end;
837 double x;
838 char buffer[256]; /* For errors */
840 if (!PyArg_ParseTuple(args, "s:atof", &s))
841 return NULL;
842 while (*s && isspace(Py_CHARMASK(*s)))
843 s++;
844 if (s[0] == '\0') {
845 PyErr_SetString(PyExc_ValueError, "empty string for atof()");
846 return NULL;
848 errno = 0;
849 PyFPE_START_PROTECT("strop_atof", return 0)
850 x = strtod(s, &end);
851 PyFPE_END_PROTECT(x)
852 while (*end && isspace(Py_CHARMASK(*end)))
853 end++;
854 if (*end != '\0') {
855 sprintf(buffer, "invalid literal for atof(): %.200s", s);
856 PyErr_SetString(PyExc_ValueError, buffer);
857 return NULL;
859 else if (errno != 0) {
860 sprintf(buffer, "atof() literal too large: %.200s", s);
861 PyErr_SetString(PyExc_ValueError, buffer);
862 return NULL;
864 return PyFloat_FromDouble(x);
868 static char maketrans__doc__[] =
869 "maketrans(frm, to) -> string\n\
871 Return a translation table (a string of 256 bytes long)\n\
872 suitable for use in string.translate. The strings frm and to\n\
873 must be of the same length.";
875 static PyObject *
876 strop_maketrans(PyObject *self, PyObject *args)
878 unsigned char *c, *from=NULL, *to=NULL;
879 int i, fromlen=0, tolen=0;
880 PyObject *result;
882 if (!PyArg_ParseTuple(args, "t#t#:maketrans", &from, &fromlen, &to, &tolen))
883 return NULL;
885 if (fromlen != tolen) {
886 PyErr_SetString(PyExc_ValueError,
887 "maketrans arguments must have same length");
888 return NULL;
891 result = PyString_FromStringAndSize((char *)NULL, 256);
892 if (result == NULL)
893 return NULL;
894 c = (unsigned char *) PyString_AS_STRING((PyStringObject *)result);
895 for (i = 0; i < 256; i++)
896 c[i]=(unsigned char)i;
897 for (i = 0; i < fromlen; i++)
898 c[from[i]]=to[i];
900 return result;
904 static char translate__doc__[] =
905 "translate(s,table [,deletechars]) -> string\n\
907 Return a copy of the string s, where all characters occurring\n\
908 in the optional argument deletechars are removed, and the\n\
909 remaining characters have been mapped through the given\n\
910 translation table, which must be a string of length 256.";
912 static PyObject *
913 strop_translate(PyObject *self, PyObject *args)
915 register char *input, *table, *output;
916 register int i, c, changed = 0;
917 PyObject *input_obj;
918 char *table1, *output_start, *del_table=NULL;
919 int inlen, tablen, dellen = 0;
920 PyObject *result;
921 int trans_table[256];
923 if (!PyArg_ParseTuple(args, "St#|t#:translate", &input_obj,
924 &table1, &tablen, &del_table, &dellen))
925 return NULL;
926 if (tablen != 256) {
927 PyErr_SetString(PyExc_ValueError,
928 "translation table must be 256 characters long");
929 return NULL;
932 table = table1;
933 inlen = PyString_Size(input_obj);
934 result = PyString_FromStringAndSize((char *)NULL, inlen);
935 if (result == NULL)
936 return NULL;
937 output_start = output = PyString_AsString(result);
938 input = PyString_AsString(input_obj);
940 if (dellen == 0) {
941 /* If no deletions are required, use faster code */
942 for (i = inlen; --i >= 0; ) {
943 c = Py_CHARMASK(*input++);
944 if (Py_CHARMASK((*output++ = table[c])) != c)
945 changed = 1;
947 if (changed)
948 return result;
949 Py_DECREF(result);
950 Py_INCREF(input_obj);
951 return input_obj;
954 for (i = 0; i < 256; i++)
955 trans_table[i] = Py_CHARMASK(table[i]);
957 for (i = 0; i < dellen; i++)
958 trans_table[(int) Py_CHARMASK(del_table[i])] = -1;
960 for (i = inlen; --i >= 0; ) {
961 c = Py_CHARMASK(*input++);
962 if (trans_table[c] != -1)
963 if (Py_CHARMASK(*output++ = (char)trans_table[c]) == c)
964 continue;
965 changed = 1;
967 if (!changed) {
968 Py_DECREF(result);
969 Py_INCREF(input_obj);
970 return input_obj;
972 /* Fix the size of the resulting string */
973 if (inlen > 0 &&_PyString_Resize(&result, output-output_start))
974 return NULL;
975 return result;
979 /* What follows is used for implementing replace(). Perry Stoll. */
982 mymemfind
984 strstr replacement for arbitrary blocks of memory.
986 Locates the first occurrence in the memory pointed to by MEM of the
987 contents of memory pointed to by PAT. Returns the index into MEM if
988 found, or -1 if not found. If len of PAT is greater than length of
989 MEM, the function returns -1.
991 static int mymemfind(char *mem, int len, char *pat, int pat_len)
993 register int ii;
995 /* pattern can not occur in the last pat_len-1 chars */
996 len -= pat_len;
998 for (ii = 0; ii <= len; ii++) {
999 if (mem[ii] == pat[0] &&
1000 (pat_len == 1 ||
1001 memcmp(&mem[ii+1], &pat[1], pat_len-1) == 0)) {
1002 return ii;
1005 return -1;
1009 mymemcnt
1011 Return the number of distinct times PAT is found in MEM.
1012 meaning mem=1111 and pat==11 returns 2.
1013 mem=11111 and pat==11 also return 2.
1015 static int mymemcnt(char *mem, int len, char *pat, int pat_len)
1017 register int offset = 0;
1018 int nfound = 0;
1020 while (len >= 0) {
1021 offset = mymemfind(mem, len, pat, pat_len);
1022 if (offset == -1)
1023 break;
1024 mem += offset + pat_len;
1025 len -= offset + pat_len;
1026 nfound++;
1028 return nfound;
1032 mymemreplace
1034 Return a string in which all occurrences of PAT in memory STR are
1035 replaced with SUB.
1037 If length of PAT is less than length of STR or there are no occurrences
1038 of PAT in STR, then the original string is returned. Otherwise, a new
1039 string is allocated here and returned.
1041 on return, out_len is:
1042 the length of output string, or
1043 -1 if the input string is returned, or
1044 unchanged if an error occurs (no memory).
1046 return value is:
1047 the new string allocated locally, or
1048 NULL if an error occurred.
1050 static char *mymemreplace(char *str, int len, char *pat, int pat_len, char *sub, int sub_len, int count, int *out_len)
1052 char *out_s;
1053 char *new_s;
1054 int nfound, offset, new_len;
1056 if (len == 0 || pat_len > len)
1057 goto return_same;
1059 /* find length of output string */
1060 nfound = mymemcnt(str, len, pat, pat_len);
1061 if (count > 0)
1062 nfound = nfound > count ? count : nfound;
1063 if (nfound == 0)
1064 goto return_same;
1065 new_len = len + nfound*(sub_len - pat_len);
1067 new_s = (char *)PyMem_MALLOC(new_len);
1068 if (new_s == NULL) return NULL;
1070 *out_len = new_len;
1071 out_s = new_s;
1073 while (len > 0) {
1074 /* find index of next instance of pattern */
1075 offset = mymemfind(str, len, pat, pat_len);
1076 /* if not found, break out of loop */
1077 if (offset == -1) break;
1079 /* copy non matching part of input string */
1080 memcpy(new_s, str, offset); /* copy part of str before pat */
1081 str += offset + pat_len; /* move str past pattern */
1082 len -= offset + pat_len; /* reduce length of str remaining */
1084 /* copy substitute into the output string */
1085 new_s += offset; /* move new_s to dest for sub string */
1086 memcpy(new_s, sub, sub_len); /* copy substring into new_s */
1087 new_s += sub_len; /* offset new_s past sub string */
1089 /* break when we've done count replacements */
1090 if (--count == 0) break;
1092 /* copy any remaining values into output string */
1093 if (len > 0)
1094 memcpy(new_s, str, len);
1095 return out_s;
1097 return_same:
1098 *out_len = -1;
1099 return str;
1103 static char replace__doc__[] =
1104 "replace (str, old, new[, maxsplit]) -> string\n\
1106 Return a copy of string str with all occurrences of substring\n\
1107 old replaced by new. If the optional argument maxsplit is\n\
1108 given, only the first maxsplit occurrences are replaced.";
1110 static PyObject *
1111 strop_replace(PyObject *self, PyObject *args)
1113 char *str, *pat,*sub,*new_s;
1114 int len,pat_len,sub_len,out_len;
1115 int count = 0;
1116 PyObject *new;
1118 if (!PyArg_ParseTuple(args, "t#t#t#|i:replace",
1119 &str, &len, &pat, &pat_len, &sub, &sub_len,
1120 &count))
1121 return NULL;
1122 if (pat_len <= 0) {
1123 PyErr_SetString(PyExc_ValueError, "empty pattern string");
1124 return NULL;
1126 new_s = mymemreplace(str,len,pat,pat_len,sub,sub_len,count,&out_len);
1127 if (new_s == NULL) {
1128 PyErr_NoMemory();
1129 return NULL;
1131 if (out_len == -1) {
1132 /* we're returning another reference to the input string */
1133 new = PyTuple_GetItem(args, 0);
1134 Py_XINCREF(new);
1136 else {
1137 new = PyString_FromStringAndSize(new_s, out_len);
1138 PyMem_FREE(new_s);
1140 return new;
1144 /* List of functions defined in the module */
1146 static PyMethodDef
1147 strop_methods[] = {
1148 {"atof", strop_atof,
1149 METH_VARARGS, atof__doc__},
1150 {"atoi", strop_atoi,
1151 METH_VARARGS, atoi__doc__},
1152 {"atol", strop_atol,
1153 METH_VARARGS, atol__doc__},
1154 {"capitalize", strop_capitalize,
1155 METH_OLDARGS, capitalize__doc__},
1156 {"count", strop_count,
1157 METH_VARARGS, count__doc__},
1158 {"expandtabs", strop_expandtabs,
1159 METH_VARARGS, expandtabs__doc__},
1160 {"find", strop_find,
1161 METH_VARARGS, find__doc__},
1162 {"join", strop_joinfields,
1163 METH_VARARGS, joinfields__doc__},
1164 {"joinfields", strop_joinfields,
1165 METH_VARARGS, joinfields__doc__},
1166 {"lstrip", strop_lstrip,
1167 METH_OLDARGS, lstrip__doc__},
1168 {"lower", strop_lower,
1169 METH_OLDARGS, lower__doc__},
1170 {"maketrans", strop_maketrans,
1171 METH_VARARGS, maketrans__doc__},
1172 {"replace", strop_replace,
1173 METH_VARARGS, replace__doc__},
1174 {"rfind", strop_rfind,
1175 METH_VARARGS, rfind__doc__},
1176 {"rstrip", strop_rstrip,
1177 METH_OLDARGS, rstrip__doc__},
1178 {"split", strop_splitfields,
1179 METH_VARARGS, splitfields__doc__},
1180 {"splitfields", strop_splitfields,
1181 METH_VARARGS, splitfields__doc__},
1182 {"strip", strop_strip,
1183 METH_OLDARGS, strip__doc__},
1184 {"swapcase", strop_swapcase,
1185 METH_OLDARGS, swapcase__doc__},
1186 {"translate", strop_translate,
1187 METH_VARARGS, translate__doc__},
1188 {"upper", strop_upper,
1189 METH_OLDARGS, upper__doc__},
1190 {NULL, NULL} /* sentinel */
1194 DL_EXPORT(void)
1195 initstrop(void)
1197 PyObject *m, *d, *s;
1198 char buf[256];
1199 int c, n;
1200 m = Py_InitModule4("strop", strop_methods, strop_module__doc__,
1201 (PyObject*)NULL, PYTHON_API_VERSION);
1202 d = PyModule_GetDict(m);
1204 /* Create 'whitespace' object */
1205 n = 0;
1206 for (c = 0; c < 256; c++) {
1207 if (isspace(c))
1208 buf[n++] = c;
1210 s = PyString_FromStringAndSize(buf, n);
1211 if (s) {
1212 PyDict_SetItemString(d, "whitespace", s);
1213 Py_DECREF(s);
1215 /* Create 'lowercase' object */
1216 n = 0;
1217 for (c = 0; c < 256; c++) {
1218 if (islower(c))
1219 buf[n++] = c;
1221 s = PyString_FromStringAndSize(buf, n);
1222 if (s) {
1223 PyDict_SetItemString(d, "lowercase", s);
1224 Py_DECREF(s);
1227 /* Create 'uppercase' object */
1228 n = 0;
1229 for (c = 0; c < 256; c++) {
1230 if (isupper(c))
1231 buf[n++] = c;
1233 s = PyString_FromStringAndSize(buf, n);
1234 if (s) {
1235 PyDict_SetItemString(d, "uppercase", s);
1236 Py_DECREF(s);