The 0.5 release happened on 2/15, not on 2/14. :-)
[python/dscho.git] / Modules / stropmodule.c
blob7a56e3751feae0659bbd879bfbeb1361e37a6b25
1 /***********************************************************
2 Copyright 1991-1995 by Stichting Mathematisch Centrum, Amsterdam,
3 The Netherlands.
5 All Rights Reserved
7 Permission to use, copy, modify, and distribute this software and its
8 documentation for any purpose and without fee is hereby granted,
9 provided that the above copyright notice appear in all copies and that
10 both that copyright notice and this permission notice appear in
11 supporting documentation, and that the names of Stichting Mathematisch
12 Centrum or CWI or Corporation for National Research Initiatives or
13 CNRI not be used in advertising or publicity pertaining to
14 distribution of the software without specific, written prior
15 permission.
17 While CWI is the initial source for this software, a modified version
18 is made available by the Corporation for National Research Initiatives
19 (CNRI) at the Internet address ftp://ftp.python.org.
21 STICHTING MATHEMATISCH CENTRUM AND CNRI DISCLAIM ALL WARRANTIES WITH
22 REGARD TO THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF
23 MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL STICHTING MATHEMATISCH
24 CENTRUM OR CNRI BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL
25 DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR
26 PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
27 TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
28 PERFORMANCE OF THIS SOFTWARE.
30 ******************************************************************/
32 /* strop module */
34 static char strop_module__doc__[] =
35 "Common string manipulations, optimized for speed.\n\
36 \n\
37 Always use \"import string\" rather than referencing\n\
38 this module directly.";
40 #include "Python.h"
42 #ifdef HAVE_LIMITS_H
43 #include <limits.h>
44 #else
45 #define INT_MAX 2147483647
46 #endif
48 #include <ctype.h>
49 /* XXX This file assumes that the <ctype.h> is*() functions
50 XXX are defined for all 8-bit characters! */
52 /* The lstrip(), rstrip() and strip() functions are implemented
53 in do_strip(), which uses an additional parameter to indicate what
54 type of strip should occur. */
56 #define LEFTSTRIP 0
57 #define RIGHTSTRIP 1
58 #define BOTHSTRIP 2
61 static PyObject *
62 split_whitespace(s, len, maxsplit)
63 char *s;
64 int len;
65 int maxsplit;
67 int i = 0, j, err;
68 int countsplit = 0;
69 PyObject* item;
70 PyObject *list = PyList_New(0);
72 if (list == NULL)
73 return NULL;
75 while (i < len) {
76 while (i < len && isspace(Py_CHARMASK(s[i]))) {
77 i = i+1;
79 j = i;
80 while (i < len && !isspace(Py_CHARMASK(s[i]))) {
81 i = i+1;
83 if (j < i) {
84 item = PyString_FromStringAndSize(s+j, (int)(i-j));
85 if (item == NULL)
86 goto finally;
88 err = PyList_Append(list, item);
89 Py_DECREF(item);
90 if (err < 0)
91 goto finally;
93 countsplit++;
94 while (i < len && isspace(Py_CHARMASK(s[i]))) {
95 i = i+1;
97 if (maxsplit && (countsplit >= maxsplit) && i < len) {
98 item = PyString_FromStringAndSize(
99 s+i, (int)(len - i));
100 if (item == NULL)
101 goto finally;
103 err = PyList_Append(list, item);
104 Py_DECREF(item);
105 if (err < 0)
106 goto finally;
108 i = len;
112 return list;
113 finally:
114 Py_DECREF(list);
115 return NULL;
119 static char splitfields__doc__[] =
120 "split(s [,sep [,maxsplit]]) -> list of strings\n\
121 splitfields(s [,sep [,maxsplit]]) -> list of strings\n\
123 Return a list of the words in the string s, using sep as the\n\
124 delimiter string. If maxsplit is nonzero, splits into at most\n\
125 maxsplit words. If sep is not specified, any whitespace string\n\
126 is a separator. Maxsplit defaults to 0.\n\
128 (split and splitfields are synonymous)";
130 static PyObject *
131 strop_splitfields(self, args)
132 PyObject *self; /* Not used */
133 PyObject *args;
135 int len, n, i, j, err;
136 int splitcount, maxsplit;
137 char *s, *sub;
138 PyObject *list, *item;
140 sub = NULL;
141 n = 0;
142 splitcount = 0;
143 maxsplit = 0;
144 if (!PyArg_ParseTuple(args, "t#|z#i", &s, &len, &sub, &n, &maxsplit))
145 return NULL;
146 if (sub == NULL)
147 return split_whitespace(s, len, maxsplit);
148 if (n == 0) {
149 PyErr_SetString(PyExc_ValueError, "empty separator");
150 return NULL;
153 list = PyList_New(0);
154 if (list == NULL)
155 return NULL;
157 i = j = 0;
158 while (i+n <= len) {
159 if (s[i] == sub[0] && (n == 1 || memcmp(s+i, sub, n) == 0)) {
160 item = PyString_FromStringAndSize(s+j, (int)(i-j));
161 if (item == NULL)
162 goto fail;
163 err = PyList_Append(list, item);
164 Py_DECREF(item);
165 if (err < 0)
166 goto fail;
167 i = j = i + n;
168 splitcount++;
169 if (maxsplit && (splitcount >= maxsplit))
170 break;
172 else
173 i++;
175 item = PyString_FromStringAndSize(s+j, (int)(len-j));
176 if (item == NULL)
177 goto fail;
178 err = PyList_Append(list, item);
179 Py_DECREF(item);
180 if (err < 0)
181 goto fail;
183 return list;
185 fail:
186 Py_DECREF(list);
187 return NULL;
191 static char joinfields__doc__[] =
192 "join(list [,sep]) -> string\n\
193 joinfields(list [,sep]) -> string\n\
195 Return a string composed of the words in list, with\n\
196 intervening occurences of sep. Sep defaults to a single\n\
197 space.\n\
199 (join and joinfields are synonymous)";
201 static PyObject *
202 strop_joinfields(self, args)
203 PyObject *self; /* Not used */
204 PyObject *args;
206 PyObject *seq;
207 char *sep = NULL;
208 int seqlen, seplen = 0;
209 int i, reslen = 0, slen = 0, sz = 100;
210 PyObject *res = NULL;
211 char* p = NULL;
212 intargfunc getitemfunc;
214 if (!PyArg_ParseTuple(args, "O|t#", &seq, &sep, &seplen))
215 return NULL;
216 if (sep == NULL) {
217 sep = " ";
218 seplen = 1;
221 seqlen = PySequence_Length(seq);
222 if (seqlen < 0 && PyErr_Occurred())
223 return NULL;
225 if (seqlen == 1) {
226 /* Optimization if there's only one item */
227 PyObject *item = PySequence_GetItem(seq, 0);
228 if (item && !PyString_Check(item)) {
229 PyErr_SetString(PyExc_TypeError,
230 "first argument must be sequence of strings");
231 Py_DECREF(item);
232 return NULL;
234 return item;
237 if (!(res = PyString_FromStringAndSize((char*)NULL, sz)))
238 return NULL;
239 p = PyString_AsString(res);
241 /* optimize for lists, since it's the most common case. all others
242 * (tuples and arbitrary sequences) just use the sequence abstract
243 * interface.
245 if (PyList_Check(seq)) {
246 for (i = 0; i < seqlen; i++) {
247 PyObject *item = PyList_GET_ITEM(seq, i);
248 if (!PyString_Check(item)) {
249 PyErr_SetString(PyExc_TypeError,
250 "first argument must be sequence of strings");
251 Py_DECREF(res);
252 return NULL;
254 slen = PyString_GET_SIZE(item);
255 while (reslen + slen + seplen >= sz) {
256 if (_PyString_Resize(&res, sz * 2)) {
257 Py_DECREF(res);
258 return NULL;
260 sz *= 2;
261 p = PyString_AsString(res) + reslen;
263 if (i > 0) {
264 memcpy(p, sep, seplen);
265 p += seplen;
266 reslen += seplen;
268 memcpy(p, PyString_AS_STRING(item), slen);
269 p += slen;
270 reslen += slen;
272 if (_PyString_Resize(&res, reslen)) {
273 Py_DECREF(res);
274 res = NULL;
276 return res;
279 if (seq->ob_type->tp_as_sequence == NULL ||
280 (getitemfunc = seq->ob_type->tp_as_sequence->sq_item) == NULL)
282 PyErr_SetString(PyExc_TypeError,
283 "first argument must be a sequence");
284 return NULL;
286 /* This is now type safe */
287 for (i = 0; i < seqlen; i++) {
288 PyObject *item = getitemfunc(seq, i);
289 if (!item || !PyString_Check(item)) {
290 PyErr_SetString(PyExc_TypeError,
291 "first argument must be sequence of strings");
292 Py_DECREF(res);
293 Py_XDECREF(item);
294 return NULL;
296 slen = PyString_GET_SIZE(item);
297 while (reslen + slen + seplen >= sz) {
298 if (_PyString_Resize(&res, sz * 2)) {
299 Py_DECREF(res);
300 Py_DECREF(item);
301 return NULL;
303 sz *= 2;
304 p = PyString_AsString(res) + reslen;
306 if (i > 0) {
307 memcpy(p, sep, seplen);
308 p += seplen;
309 reslen += seplen;
311 memcpy(p, PyString_AS_STRING(item), slen);
312 p += slen;
313 reslen += slen;
314 Py_DECREF(item);
316 if (_PyString_Resize(&res, reslen)) {
317 Py_DECREF(res);
318 res = NULL;
320 return res;
324 static char find__doc__[] =
325 "find(s, sub [,start [,end]]) -> in\n\
327 Return the lowest index in s where substring sub is found,\n\
328 such that sub is contained within s[start,end]. Optional\n\
329 arguments start and end are interpreted as in slice notation.\n\
331 Return -1 on failure.";
333 static PyObject *
334 strop_find(self, args)
335 PyObject *self; /* Not used */
336 PyObject *args;
338 char *s, *sub;
339 int len, n, i = 0, last = INT_MAX;
341 if (!PyArg_ParseTuple(args, "t#t#|ii", &s, &len, &sub, &n, &i, &last))
342 return NULL;
344 if (last > len)
345 last = len;
346 if (last < 0)
347 last += len;
348 if (last < 0)
349 last = 0;
350 if (i < 0)
351 i += len;
352 if (i < 0)
353 i = 0;
355 if (n == 0 && i <= last)
356 return PyInt_FromLong((long)i);
358 last -= n;
359 for (; i <= last; ++i)
360 if (s[i] == sub[0] &&
361 (n == 1 || memcmp(&s[i+1], &sub[1], n-1) == 0))
362 return PyInt_FromLong((long)i);
364 return PyInt_FromLong(-1L);
368 static char rfind__doc__[] =
369 "rfind(s, sub [,start [,end]]) -> int\n\
371 Return the highest index in s where substring sub is found,\n\
372 such that sub is contained within s[start,end]. Optional\n\
373 arguments start and end are interpreted as in slice notation.\n\
375 Return -1 on failure.";
377 static PyObject *
378 strop_rfind(self, args)
379 PyObject *self; /* Not used */
380 PyObject *args;
382 char *s, *sub;
383 int len, n, j;
384 int i = 0, last = INT_MAX;
386 if (!PyArg_ParseTuple(args, "t#t#|ii", &s, &len, &sub, &n, &i, &last))
387 return NULL;
389 if (last > len)
390 last = len;
391 if (last < 0)
392 last += len;
393 if (last < 0)
394 last = 0;
395 if (i < 0)
396 i += len;
397 if (i < 0)
398 i = 0;
400 if (n == 0 && i <= last)
401 return PyInt_FromLong((long)last);
403 for (j = last-n; j >= i; --j)
404 if (s[j] == sub[0] &&
405 (n == 1 || memcmp(&s[j+1], &sub[1], n-1) == 0))
406 return PyInt_FromLong((long)j);
408 return PyInt_FromLong(-1L);
412 static PyObject *
413 do_strip(args, striptype)
414 PyObject *args;
415 int striptype;
417 char *s;
418 int len, i, j;
421 if (!PyArg_Parse(args, "t#", &s, &len))
422 return NULL;
424 i = 0;
425 if (striptype != RIGHTSTRIP) {
426 while (i < len && isspace(Py_CHARMASK(s[i]))) {
427 i++;
431 j = len;
432 if (striptype != LEFTSTRIP) {
433 do {
434 j--;
435 } while (j >= i && isspace(Py_CHARMASK(s[j])));
436 j++;
439 if (i == 0 && j == len) {
440 Py_INCREF(args);
441 return args;
443 else
444 return PyString_FromStringAndSize(s+i, j-i);
448 static char strip__doc__[] =
449 "strip(s) -> string\n\
451 Return a copy of the string s with leading and trailing\n\
452 whitespace removed.";
454 static PyObject *
455 strop_strip(self, args)
456 PyObject *self; /* Not used */
457 PyObject *args;
459 return do_strip(args, BOTHSTRIP);
463 static char lstrip__doc__[] =
464 "lstrip(s) -> string\n\
466 Return a copy of the string s with leading whitespace removed.";
468 static PyObject *
469 strop_lstrip(self, args)
470 PyObject *self; /* Not used */
471 PyObject *args;
473 return do_strip(args, LEFTSTRIP);
477 static char rstrip__doc__[] =
478 "rstrip(s) -> string\n\
480 Return a copy of the string s with trailing whitespace removed.";
482 static PyObject *
483 strop_rstrip(self, args)
484 PyObject *self; /* Not used */
485 PyObject *args;
487 return do_strip(args, RIGHTSTRIP);
491 static char lower__doc__[] =
492 "lower(s) -> string\n\
494 Return a copy of the string s converted to lowercase.";
496 static PyObject *
497 strop_lower(self, args)
498 PyObject *self; /* Not used */
499 PyObject *args;
501 char *s, *s_new;
502 int i, n;
503 PyObject *new;
504 int changed;
506 if (!PyArg_Parse(args, "t#", &s, &n))
507 return NULL;
508 new = PyString_FromStringAndSize(NULL, n);
509 if (new == NULL)
510 return NULL;
511 s_new = PyString_AsString(new);
512 changed = 0;
513 for (i = 0; i < n; i++) {
514 int c = Py_CHARMASK(*s++);
515 if (isupper(c)) {
516 changed = 1;
517 *s_new = tolower(c);
518 } else
519 *s_new = c;
520 s_new++;
522 if (!changed) {
523 Py_DECREF(new);
524 Py_INCREF(args);
525 return args;
527 return new;
531 static char upper__doc__[] =
532 "upper(s) -> string\n\
534 Return a copy of the string s converted to uppercase.";
536 static PyObject *
537 strop_upper(self, args)
538 PyObject *self; /* Not used */
539 PyObject *args;
541 char *s, *s_new;
542 int i, n;
543 PyObject *new;
544 int changed;
546 if (!PyArg_Parse(args, "t#", &s, &n))
547 return NULL;
548 new = PyString_FromStringAndSize(NULL, n);
549 if (new == NULL)
550 return NULL;
551 s_new = PyString_AsString(new);
552 changed = 0;
553 for (i = 0; i < n; i++) {
554 int c = Py_CHARMASK(*s++);
555 if (islower(c)) {
556 changed = 1;
557 *s_new = toupper(c);
558 } else
559 *s_new = c;
560 s_new++;
562 if (!changed) {
563 Py_DECREF(new);
564 Py_INCREF(args);
565 return args;
567 return new;
571 static char capitalize__doc__[] =
572 "capitalize(s) -> string\n\
574 Return a copy of the string s with only its first character\n\
575 capitalized.";
577 static PyObject *
578 strop_capitalize(self, args)
579 PyObject *self; /* Not used */
580 PyObject *args;
582 char *s, *s_new;
583 int i, n;
584 PyObject *new;
585 int changed;
587 if (!PyArg_Parse(args, "t#", &s, &n))
588 return NULL;
589 new = PyString_FromStringAndSize(NULL, n);
590 if (new == NULL)
591 return NULL;
592 s_new = PyString_AsString(new);
593 changed = 0;
594 if (0 < n) {
595 int c = Py_CHARMASK(*s++);
596 if (islower(c)) {
597 changed = 1;
598 *s_new = toupper(c);
599 } else
600 *s_new = c;
601 s_new++;
603 for (i = 1; i < n; i++) {
604 int c = Py_CHARMASK(*s++);
605 if (isupper(c)) {
606 changed = 1;
607 *s_new = tolower(c);
608 } else
609 *s_new = c;
610 s_new++;
612 if (!changed) {
613 Py_DECREF(new);
614 Py_INCREF(args);
615 return args;
617 return new;
621 static char expandtabs__doc__[] =
622 "expandtabs(string, [tabsize]) -> string\n\
624 Expand tabs in a string, i.e. replace them by one or more spaces,\n\
625 depending on the current column and the given tab size (default 8).\n\
626 The column number is reset to zero after each newline occurring in the\n\
627 string. This doesn't understand other non-printing characters.";
629 static PyObject *
630 strop_expandtabs(self, args)
631 PyObject *self;
632 PyObject *args;
634 /* Original by Fredrik Lundh */
635 char* e;
636 char* p;
637 char* q;
638 int i, j;
639 PyObject* out;
640 char* string;
641 int stringlen;
642 int tabsize = 8;
644 /* Get arguments */
645 if (!PyArg_ParseTuple(args, "s#|i", &string, &stringlen, &tabsize))
646 return NULL;
647 if (tabsize < 1) {
648 PyErr_SetString(PyExc_ValueError,
649 "tabsize must be at least 1");
650 return NULL;
653 /* First pass: determine size of output string */
654 i = j = 0; /* j: current column; i: total of previous lines */
655 e = string + stringlen;
656 for (p = string; p < e; p++) {
657 if (*p == '\t')
658 j += tabsize - (j%tabsize);
659 else {
660 j++;
661 if (*p == '\n') {
662 i += j;
663 j = 0;
668 /* Second pass: create output string and fill it */
669 out = PyString_FromStringAndSize(NULL, i+j);
670 if (out == NULL)
671 return NULL;
673 i = 0;
674 q = PyString_AS_STRING(out);
676 for (p = string; p < e; p++) {
677 if (*p == '\t') {
678 j = tabsize - (i%tabsize);
679 i += j;
680 while (j-- > 0)
681 *q++ = ' ';
682 } else {
683 *q++ = *p;
684 i++;
685 if (*p == '\n')
686 i = 0;
690 return out;
694 static char count__doc__[] =
695 "count(s, sub[, start[, end]]) -> int\n\
697 Return the number of occurrences of substring sub in string\n\
698 s[start:end]. Optional arguments start and end are\n\
699 interpreted as in slice notation.";
701 static PyObject *
702 strop_count(self, args)
703 PyObject *self; /* Not used */
704 PyObject *args;
706 char *s, *sub;
707 int len, n;
708 int i = 0, last = INT_MAX;
709 int m, r;
711 if (!PyArg_ParseTuple(args, "t#t#|ii", &s, &len, &sub, &n, &i, &last))
712 return NULL;
713 if (last > len)
714 last = len;
715 if (last < 0)
716 last += len;
717 if (last < 0)
718 last = 0;
719 if (i < 0)
720 i += len;
721 if (i < 0)
722 i = 0;
723 m = last + 1 - n;
724 if (n == 0)
725 return PyInt_FromLong((long) (m-i));
727 r = 0;
728 while (i < m) {
729 if (!memcmp(s+i, sub, n)) {
730 r++;
731 i += n;
732 } else {
733 i++;
736 return PyInt_FromLong((long) r);
740 static char swapcase__doc__[] =
741 "swapcase(s) -> string\n\
743 Return a copy of the string s with upper case characters\n\
744 converted to lowercase and vice versa.";
746 static PyObject *
747 strop_swapcase(self, args)
748 PyObject *self; /* Not used */
749 PyObject *args;
751 char *s, *s_new;
752 int i, n;
753 PyObject *new;
754 int changed;
756 if (!PyArg_Parse(args, "t#", &s, &n))
757 return NULL;
758 new = PyString_FromStringAndSize(NULL, n);
759 if (new == NULL)
760 return NULL;
761 s_new = PyString_AsString(new);
762 changed = 0;
763 for (i = 0; i < n; i++) {
764 int c = Py_CHARMASK(*s++);
765 if (islower(c)) {
766 changed = 1;
767 *s_new = toupper(c);
769 else if (isupper(c)) {
770 changed = 1;
771 *s_new = tolower(c);
773 else
774 *s_new = c;
775 s_new++;
777 if (!changed) {
778 Py_DECREF(new);
779 Py_INCREF(args);
780 return args;
782 return new;
786 static char atoi__doc__[] =
787 "atoi(s [,base]) -> int\n\
789 Return the integer represented by the string s in the given\n\
790 base, which defaults to 10. The string s must consist of one\n\
791 or more digits, possibly preceded by a sign. If base is 0, it\n\
792 is chosen from the leading characters of s, 0 for octal, 0x or\n\
793 0X for hexadecimal. If base is 16, a preceding 0x or 0X is\n\
794 accepted.";
796 static PyObject *
797 strop_atoi(self, args)
798 PyObject *self; /* Not used */
799 PyObject *args;
801 char *s, *end;
802 int base = 10;
803 long x;
804 char buffer[256]; /* For errors */
806 if (!PyArg_ParseTuple(args, "s|i", &s, &base))
807 return NULL;
809 if ((base != 0 && base < 2) || base > 36) {
810 PyErr_SetString(PyExc_ValueError, "invalid base for atoi()");
811 return NULL;
814 while (*s && isspace(Py_CHARMASK(*s)))
815 s++;
816 errno = 0;
817 if (base == 0 && s[0] == '0')
818 x = (long) PyOS_strtoul(s, &end, base);
819 else
820 x = PyOS_strtol(s, &end, base);
821 if (end == s || !isalnum(end[-1]))
822 goto bad;
823 while (*end && isspace(Py_CHARMASK(*end)))
824 end++;
825 if (*end != '\0') {
826 bad:
827 sprintf(buffer, "invalid literal for atoi(): %.200s", s);
828 PyErr_SetString(PyExc_ValueError, buffer);
829 return NULL;
831 else if (errno != 0) {
832 sprintf(buffer, "atoi() literal too large: %.200s", s);
833 PyErr_SetString(PyExc_ValueError, buffer);
834 return NULL;
836 return PyInt_FromLong(x);
840 static char atol__doc__[] =
841 "atol(s [,base]) -> long\n\
843 Return the long integer represented by the string s in the\n\
844 given base, which defaults to 10. The string s must consist\n\
845 of one or more digits, possibly preceded by a sign. If base\n\
846 is 0, it is chosen from the leading characters of s, 0 for\n\
847 octal, 0x or 0X for hexadecimal. If base is 16, a preceding\n\
848 0x or 0X is accepted. A trailing L or l is not accepted,\n\
849 unless base is 0.";
851 static PyObject *
852 strop_atol(self, args)
853 PyObject *self; /* Not used */
854 PyObject *args;
856 char *s, *end;
857 int base = 10;
858 PyObject *x;
859 char buffer[256]; /* For errors */
861 if (!PyArg_ParseTuple(args, "s|i", &s, &base))
862 return NULL;
864 if ((base != 0 && base < 2) || base > 36) {
865 PyErr_SetString(PyExc_ValueError, "invalid base for atol()");
866 return NULL;
869 while (*s && isspace(Py_CHARMASK(*s)))
870 s++;
871 if (s[0] == '\0') {
872 PyErr_SetString(PyExc_ValueError, "empty string for atol()");
873 return NULL;
875 x = PyLong_FromString(s, &end, base);
876 if (x == NULL)
877 return NULL;
878 if (base == 0 && (*end == 'l' || *end == 'L'))
879 end++;
880 while (*end && isspace(Py_CHARMASK(*end)))
881 end++;
882 if (*end != '\0') {
883 sprintf(buffer, "invalid literal for atol(): %.200s", s);
884 PyErr_SetString(PyExc_ValueError, buffer);
885 Py_DECREF(x);
886 return NULL;
888 return x;
892 static char atof__doc__[] =
893 "atof(s) -> float\n\
895 Return the floating point number represented by the string s.";
897 static PyObject *
898 strop_atof(self, args)
899 PyObject *self; /* Not used */
900 PyObject *args;
902 extern double strtod Py_PROTO((const char *, char **));
903 char *s, *end;
904 double x;
905 char buffer[256]; /* For errors */
907 if (!PyArg_ParseTuple(args, "s", &s))
908 return NULL;
909 while (*s && isspace(Py_CHARMASK(*s)))
910 s++;
911 if (s[0] == '\0') {
912 PyErr_SetString(PyExc_ValueError, "empty string for atof()");
913 return NULL;
915 errno = 0;
916 PyFPE_START_PROTECT("strop_atof", return 0)
917 x = strtod(s, &end);
918 PyFPE_END_PROTECT(x)
919 while (*end && isspace(Py_CHARMASK(*end)))
920 end++;
921 if (*end != '\0') {
922 sprintf(buffer, "invalid literal for atof(): %.200s", s);
923 PyErr_SetString(PyExc_ValueError, buffer);
924 return NULL;
926 else if (errno != 0) {
927 sprintf(buffer, "atof() literal too large: %.200s", s);
928 PyErr_SetString(PyExc_ValueError, buffer);
929 return NULL;
931 return PyFloat_FromDouble(x);
935 static char maketrans__doc__[] =
936 "maketrans(frm, to) -> string\n\
938 Return a translation table (a string of 256 bytes long)\n\
939 suitable for use in string.translate. The strings frm and to\n\
940 must be of the same length.";
942 static PyObject *
943 strop_maketrans(self, args)
944 PyObject *self; /* Not used */
945 PyObject *args;
947 unsigned char *c, *from=NULL, *to=NULL;
948 int i, fromlen=0, tolen=0;
949 PyObject *result;
951 if (!PyArg_ParseTuple(args, "t#t#", &from, &fromlen, &to, &tolen))
952 return NULL;
954 if (fromlen != tolen) {
955 PyErr_SetString(PyExc_ValueError,
956 "maketrans arguments must have same length");
957 return NULL;
960 result = PyString_FromStringAndSize((char *)NULL, 256);
961 if (result == NULL)
962 return NULL;
963 c = (unsigned char *) PyString_AS_STRING((PyStringObject *)result);
964 for (i = 0; i < 256; i++)
965 c[i]=(unsigned char)i;
966 for (i = 0; i < fromlen; i++)
967 c[from[i]]=to[i];
969 return result;
973 static char translate__doc__[] =
974 "translate(s,table [,deletechars]) -> string\n\
976 Return a copy of the string s, where all characters occurring\n\
977 in the optional argument deletechars are removed, and the\n\
978 remaining characters have been mapped through the given\n\
979 translation table, which must be a string of length 256.";
981 static PyObject *
982 strop_translate(self, args)
983 PyObject *self;
984 PyObject *args;
986 register char *input, *table, *output;
987 register int i, c, changed = 0;
988 PyObject *input_obj;
989 char *table1, *output_start, *del_table=NULL;
990 int inlen, tablen, dellen = 0;
991 PyObject *result;
992 int trans_table[256];
994 if (!PyArg_ParseTuple(args, "St#|t#", &input_obj,
995 &table1, &tablen, &del_table, &dellen))
996 return NULL;
997 if (tablen != 256) {
998 PyErr_SetString(PyExc_ValueError,
999 "translation table must be 256 characters long");
1000 return NULL;
1003 table = table1;
1004 inlen = PyString_Size(input_obj);
1005 result = PyString_FromStringAndSize((char *)NULL, inlen);
1006 if (result == NULL)
1007 return NULL;
1008 output_start = output = PyString_AsString(result);
1009 input = PyString_AsString(input_obj);
1011 if (dellen == 0) {
1012 /* If no deletions are required, use faster code */
1013 for (i = inlen; --i >= 0; ) {
1014 c = Py_CHARMASK(*input++);
1015 if (Py_CHARMASK((*output++ = table[c])) != c)
1016 changed = 1;
1018 if (changed)
1019 return result;
1020 Py_DECREF(result);
1021 Py_INCREF(input_obj);
1022 return input_obj;
1025 for (i = 0; i < 256; i++)
1026 trans_table[i] = Py_CHARMASK(table[i]);
1028 for (i = 0; i < dellen; i++)
1029 trans_table[(int) Py_CHARMASK(del_table[i])] = -1;
1031 for (i = inlen; --i >= 0; ) {
1032 c = Py_CHARMASK(*input++);
1033 if (trans_table[c] != -1)
1034 if (Py_CHARMASK(*output++ = (char)trans_table[c]) == c)
1035 continue;
1036 changed = 1;
1038 if (!changed) {
1039 Py_DECREF(result);
1040 Py_INCREF(input_obj);
1041 return input_obj;
1043 /* Fix the size of the resulting string */
1044 if (inlen > 0 &&_PyString_Resize(&result, output-output_start))
1045 return NULL;
1046 return result;
1050 /* What follows is used for implementing replace(). Perry Stoll. */
1053 mymemfind
1055 strstr replacement for arbitrary blocks of memory.
1057 Locates the first occurance in the memory pointed to by MEM of the
1058 contents of memory pointed to by PAT. Returns the index into MEM if
1059 found, or -1 if not found. If len of PAT is greater than length of
1060 MEM, the function returns -1.
1062 static int mymemfind(mem, len, pat, pat_len)
1063 char *mem;
1064 int len;
1065 char *pat;
1066 int pat_len;
1068 register int ii;
1070 /* pattern can not occur in the last pat_len-1 chars */
1071 len -= pat_len;
1073 for (ii = 0; ii <= len; ii++) {
1074 if (mem[ii] == pat[0] &&
1075 (pat_len == 1 ||
1076 memcmp(&mem[ii+1], &pat[1], pat_len-1) == 0)) {
1077 return ii;
1080 return -1;
1084 mymemcnt
1086 Return the number of distinct times PAT is found in MEM.
1087 meaning mem=1111 and pat==11 returns 2.
1088 mem=11111 and pat==11 also return 2.
1090 static int mymemcnt(mem, len, pat, pat_len)
1091 char *mem;
1092 int len;
1093 char *pat;
1094 int pat_len;
1096 register int offset = 0;
1097 int nfound = 0;
1099 while (len >= 0) {
1100 offset = mymemfind(mem, len, pat, pat_len);
1101 if (offset == -1)
1102 break;
1103 mem += offset + pat_len;
1104 len -= offset + pat_len;
1105 nfound++;
1107 return nfound;
1111 mymemreplace
1113 Return a string in which all occurences of PAT in memory STR are
1114 replaced with SUB.
1116 If length of PAT is less than length of STR or there are no occurences
1117 of PAT in STR, then the original string is returned. Otherwise, a new
1118 string is allocated here and returned.
1120 on return, out_len is:
1121 the length of output string, or
1122 -1 if the input string is returned, or
1123 unchanged if an error occurs (no memory).
1125 return value is:
1126 the new string allocated locally, or
1127 NULL if an error occurred.
1129 static char *mymemreplace(str, len, pat, pat_len, sub, sub_len, count, out_len)
1130 char *str;
1131 int len; /* input string */
1132 char *pat;
1133 int pat_len; /* pattern string to find */
1134 char *sub;
1135 int sub_len; /* substitution string */
1136 int count; /* number of replacements, 0 == all */
1137 int *out_len;
1140 char *out_s;
1141 char *new_s;
1142 int nfound, offset, new_len;
1144 if (len == 0 || pat_len > len)
1145 goto return_same;
1147 /* find length of output string */
1148 nfound = mymemcnt(str, len, pat, pat_len);
1149 if (count > 0)
1150 nfound = nfound > count ? count : nfound;
1151 if (nfound == 0)
1152 goto return_same;
1153 new_len = len + nfound*(sub_len - pat_len);
1155 new_s = (char *)malloc(new_len);
1156 if (new_s == NULL) return NULL;
1158 *out_len = new_len;
1159 out_s = new_s;
1161 while (len > 0) {
1162 /* find index of next instance of pattern */
1163 offset = mymemfind(str, len, pat, pat_len);
1164 /* if not found, break out of loop */
1165 if (offset == -1) break;
1167 /* copy non matching part of input string */
1168 memcpy(new_s, str, offset); /* copy part of str before pat */
1169 str += offset + pat_len; /* move str past pattern */
1170 len -= offset + pat_len; /* reduce length of str remaining */
1172 /* copy substitute into the output string */
1173 new_s += offset; /* move new_s to dest for sub string */
1174 memcpy(new_s, sub, sub_len); /* copy substring into new_s */
1175 new_s += sub_len; /* offset new_s past sub string */
1177 /* break when we've done count replacements */
1178 if (--count == 0) break;
1180 /* copy any remaining values into output string */
1181 if (len > 0)
1182 memcpy(new_s, str, len);
1183 return out_s;
1185 return_same:
1186 *out_len = -1;
1187 return str;
1191 static char replace__doc__[] =
1192 "replace (str, old, new[, maxsplit]) -> string\n\
1194 Return a copy of string str with all occurrences of substring\n\
1195 old replaced by new. If the optional argument maxsplit is\n\
1196 given, only the first maxsplit occurrences are replaced.";
1198 static PyObject *
1199 strop_replace(self, args)
1200 PyObject *self; /* Not used */
1201 PyObject *args;
1203 char *str, *pat,*sub,*new_s;
1204 int len,pat_len,sub_len,out_len;
1205 int count = 0;
1206 PyObject *new;
1208 if (!PyArg_ParseTuple(args, "t#t#t#|i",
1209 &str, &len, &pat, &pat_len, &sub, &sub_len,
1210 &count))
1211 return NULL;
1212 if (pat_len <= 0) {
1213 PyErr_SetString(PyExc_ValueError, "empty pattern string");
1214 return NULL;
1216 new_s = mymemreplace(str,len,pat,pat_len,sub,sub_len,count,&out_len);
1217 if (new_s == NULL) {
1218 PyErr_NoMemory();
1219 return NULL;
1221 if (out_len == -1) {
1222 /* we're returning another reference to the input string */
1223 new = PyTuple_GetItem(args, 0);
1224 Py_XINCREF(new);
1226 else {
1227 new = PyString_FromStringAndSize(new_s, out_len);
1228 free(new_s);
1230 return new;
1234 /* List of functions defined in the module */
1236 static PyMethodDef
1237 strop_methods[] = {
1238 {"atof", strop_atof, 1, atof__doc__},
1239 {"atoi", strop_atoi, 1, atoi__doc__},
1240 {"atol", strop_atol, 1, atol__doc__},
1241 {"capitalize", strop_capitalize, 0, capitalize__doc__},
1242 {"count", strop_count, 1, count__doc__},
1243 {"expandtabs", strop_expandtabs, 1, expandtabs__doc__},
1244 {"find", strop_find, 1, find__doc__},
1245 {"join", strop_joinfields, 1, joinfields__doc__},
1246 {"joinfields", strop_joinfields, 1, joinfields__doc__},
1247 {"lstrip", strop_lstrip, 0, lstrip__doc__},
1248 {"lower", strop_lower, 0, lower__doc__},
1249 {"maketrans", strop_maketrans, 1, maketrans__doc__},
1250 {"replace", strop_replace, 1, replace__doc__},
1251 {"rfind", strop_rfind, 1, rfind__doc__},
1252 {"rstrip", strop_rstrip, 0, rstrip__doc__},
1253 {"split", strop_splitfields, 1, splitfields__doc__},
1254 {"splitfields", strop_splitfields, 1, splitfields__doc__},
1255 {"strip", strop_strip, 0, strip__doc__},
1256 {"swapcase", strop_swapcase, 0, swapcase__doc__},
1257 {"translate", strop_translate, 1, translate__doc__},
1258 {"upper", strop_upper, 0, upper__doc__},
1259 {NULL, NULL} /* sentinel */
1263 DL_EXPORT(void)
1264 initstrop()
1266 PyObject *m, *d, *s;
1267 char buf[256];
1268 int c, n;
1269 m = Py_InitModule4("strop", strop_methods, strop_module__doc__,
1270 (PyObject*)NULL, PYTHON_API_VERSION);
1271 d = PyModule_GetDict(m);
1273 /* Create 'whitespace' object */
1274 n = 0;
1275 for (c = 0; c < 256; c++) {
1276 if (isspace(c))
1277 buf[n++] = c;
1279 s = PyString_FromStringAndSize(buf, n);
1280 if (s) {
1281 PyDict_SetItemString(d, "whitespace", s);
1282 Py_DECREF(s);
1284 /* Create 'lowercase' object */
1285 n = 0;
1286 for (c = 0; c < 256; c++) {
1287 if (islower(c))
1288 buf[n++] = c;
1290 s = PyString_FromStringAndSize(buf, n);
1291 if (s) {
1292 PyDict_SetItemString(d, "lowercase", s);
1293 Py_DECREF(s);
1296 /* Create 'uppercase' object */
1297 n = 0;
1298 for (c = 0; c < 256; c++) {
1299 if (isupper(c))
1300 buf[n++] = c;
1302 s = PyString_FromStringAndSize(buf, n);
1303 if (s) {
1304 PyDict_SetItemString(d, "uppercase", s);
1305 Py_DECREF(s);
1308 if (PyErr_Occurred())
1309 Py_FatalError("can't initialize module strop");