Improved some error messages for command line processing.
[python/dscho.git] / Modules / stropmodule.c
blob34ac71ad658d1002377ffe871df1ef5f514a02c6
1 /***********************************************************
2 Copyright 1991-1995 by Stichting Mathematisch Centrum, Amsterdam,
3 The Netherlands.
5 All Rights Reserved
7 Permission to use, copy, modify, and distribute this software and its
8 documentation for any purpose and without fee is hereby granted,
9 provided that the above copyright notice appear in all copies and that
10 both that copyright notice and this permission notice appear in
11 supporting documentation, and that the names of Stichting Mathematisch
12 Centrum or CWI or Corporation for National Research Initiatives or
13 CNRI not be used in advertising or publicity pertaining to
14 distribution of the software without specific, written prior
15 permission.
17 While CWI is the initial source for this software, a modified version
18 is made available by the Corporation for National Research Initiatives
19 (CNRI) at the Internet address ftp://ftp.python.org.
21 STICHTING MATHEMATISCH CENTRUM AND CNRI DISCLAIM ALL WARRANTIES WITH
22 REGARD TO THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF
23 MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL STICHTING MATHEMATISCH
24 CENTRUM OR CNRI BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL
25 DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR
26 PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
27 TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
28 PERFORMANCE OF THIS SOFTWARE.
30 ******************************************************************/
32 /* strop module */
34 static char strop_module__doc__[] =
35 "Common string manipulations, optimized for speed.\n\
36 \n\
37 Always use \"import string\" rather than referencing\n\
38 this module directly.";
40 #include "Python.h"
42 #ifdef HAVE_LIMITS_H
43 #include <limits.h>
44 #else
45 #define INT_MAX 2147483647
46 #endif
48 #include <ctype.h>
49 /* XXX This file assumes that the <ctype.h> is*() functions
50 XXX are defined for all 8-bit characters! */
52 /* The lstrip(), rstrip() and strip() functions are implemented
53 in do_strip(), which uses an additional parameter to indicate what
54 type of strip should occur. */
56 #define LEFTSTRIP 0
57 #define RIGHTSTRIP 1
58 #define BOTHSTRIP 2
61 static PyObject *
62 split_whitespace(s, len, maxsplit)
63 char *s;
64 int len;
65 int maxsplit;
67 int i = 0, j, err;
68 int countsplit = 0;
69 PyObject* item;
70 PyObject *list = PyList_New(0);
72 if (list == NULL)
73 return NULL;
75 while (i < len) {
76 while (i < len && isspace(Py_CHARMASK(s[i]))) {
77 i = i+1;
79 j = i;
80 while (i < len && !isspace(Py_CHARMASK(s[i]))) {
81 i = i+1;
83 if (j < i) {
84 item = PyString_FromStringAndSize(s+j, (int)(i-j));
85 if (item == NULL)
86 goto finally;
88 err = PyList_Append(list, item);
89 Py_DECREF(item);
90 if (err < 0)
91 goto finally;
93 countsplit++;
94 while (i < len && isspace(Py_CHARMASK(s[i]))) {
95 i = i+1;
97 if (maxsplit && (countsplit >= maxsplit) && i < len) {
98 item = PyString_FromStringAndSize(
99 s+i, (int)(len - i));
100 if (item == NULL)
101 goto finally;
103 err = PyList_Append(list, item);
104 Py_DECREF(item);
105 if (err < 0)
106 goto finally;
108 i = len;
112 return list;
113 finally:
114 Py_DECREF(list);
115 return NULL;
119 static char splitfields__doc__[] =
120 "split(str [,sep [,maxsplit]]) -> list of strings\n\
121 splitfields(str [,sep [,maxsplit]]) -> list of strings\n\
123 Return a list of the words in the string s, using sep as the\n\
124 delimiter string. If maxsplit is nonzero, splits into at most\n\
125 maxsplit words If sep is not specified, any whitespace string\n\
126 is a separator. Maxsplit defaults to 0.\n\
128 (split and splitfields are synonymous)";
130 static PyObject *
131 strop_splitfields(self, args)
132 PyObject *self; /* Not used */
133 PyObject *args;
135 int len, n, i, j, err;
136 int splitcount, maxsplit;
137 char *s, *sub;
138 PyObject *list, *item;
140 sub = NULL;
141 n = 0;
142 splitcount = 0;
143 maxsplit = 0;
144 if (!PyArg_ParseTuple(args, "s#|z#i", &s, &len, &sub, &n, &maxsplit))
145 return NULL;
146 if (sub == NULL)
147 return split_whitespace(s, len, maxsplit);
148 if (n == 0) {
149 PyErr_SetString(PyExc_ValueError, "empty separator");
150 return NULL;
153 list = PyList_New(0);
154 if (list == NULL)
155 return NULL;
157 i = j = 0;
158 while (i+n <= len) {
159 if (s[i] == sub[0] && (n == 1 || memcmp(s+i, sub, n) == 0)) {
160 item = PyString_FromStringAndSize(s+j, (int)(i-j));
161 if (item == NULL)
162 goto fail;
163 err = PyList_Append(list, item);
164 Py_DECREF(item);
165 if (err < 0)
166 goto fail;
167 i = j = i + n;
168 splitcount++;
169 if (maxsplit && (splitcount >= maxsplit))
170 break;
172 else
173 i++;
175 item = PyString_FromStringAndSize(s+j, (int)(len-j));
176 if (item == NULL)
177 goto fail;
178 err = PyList_Append(list, item);
179 Py_DECREF(item);
180 if (err < 0)
181 goto fail;
183 return list;
185 fail:
186 Py_DECREF(list);
187 return NULL;
191 static char joinfields__doc__[] =
192 "join(list [,sep]) -> string\n\
193 joinfields(list [,sep]) -> string\n\
195 Return a string composed of the words in list, with\n\
196 intervening occurences of sep. Sep defaults to a single\n\
197 space.\n\
199 (join and joinfields are synonymous)";
201 static PyObject *
202 strop_joinfields(self, args)
203 PyObject *self; /* Not used */
204 PyObject *args;
206 PyObject *seq;
207 char *sep = NULL;
208 int seqlen, seplen = 0;
209 int i, reslen = 0, slen = 0, sz = 100;
210 PyObject *res = NULL;
211 char* p = NULL;
212 intargfunc getitemfunc;
214 if (!PyArg_ParseTuple(args, "O|s#", &seq, &sep, &seplen))
215 return NULL;
216 if (sep == NULL) {
217 sep = " ";
218 seplen = 1;
221 seqlen = PySequence_Length(seq);
222 if (seqlen < 0 && PyErr_Occurred())
223 return NULL;
225 if (seqlen == 1) {
226 /* Optimization if there's only one item */
227 PyObject *item = PySequence_GetItem(seq, 0);
228 if (item && !PyString_Check(item)) {
229 PyErr_SetString(PyExc_TypeError,
230 "first argument must be sequence of strings");
231 return NULL;
233 return item;
236 if (!(res = PyString_FromStringAndSize((char*)NULL, sz)))
237 return NULL;
238 p = PyString_AsString(res);
240 /* optimize for lists, since it's the most common case. all others
241 * (tuples and arbitrary sequences) just use the sequence abstract
242 * interface.
244 if (PyList_Check(seq)) {
245 for (i = 0; i < seqlen; i++) {
246 PyObject *item = PyList_GET_ITEM(seq, i);
247 if (!PyString_Check(item)) {
248 PyErr_SetString(PyExc_TypeError,
249 "first argument must be sequence of strings");
250 Py_DECREF(res);
251 return NULL;
253 slen = PyString_GET_SIZE(item);
254 while (reslen + slen + seplen >= sz) {
255 if (_PyString_Resize(&res, sz * 2)) {
256 Py_DECREF(res);
257 return NULL;
259 sz *= 2;
260 p = PyString_AsString(res) + reslen;
262 if (i > 0) {
263 memcpy(p, sep, seplen);
264 p += seplen;
265 reslen += seplen;
267 memcpy(p, PyString_AS_STRING(item), slen);
268 p += slen;
269 reslen += slen;
271 if (_PyString_Resize(&res, reslen)) {
272 Py_DECREF(res);
273 res = NULL;
275 return res;
278 if (seq->ob_type->tp_as_sequence == NULL ||
279 (getitemfunc = seq->ob_type->tp_as_sequence->sq_item) == NULL)
281 PyErr_SetString(PyExc_TypeError,
282 "first argument must be a sequence");
283 return NULL;
285 /* This is now type safe */
286 for (i = 0; i < seqlen; i++) {
287 PyObject *item = getitemfunc(seq, i);
288 if (!item || !PyString_Check(item)) {
289 PyErr_SetString(PyExc_TypeError,
290 "first argument must be sequence of strings");
291 Py_DECREF(res);
292 Py_XDECREF(item);
293 return NULL;
295 slen = PyString_GET_SIZE(item);
296 while (reslen + slen + seplen >= sz) {
297 if (_PyString_Resize(&res, sz * 2)) {
298 Py_DECREF(res);
299 Py_DECREF(item);
300 return NULL;
302 sz *= 2;
303 p = PyString_AsString(res) + reslen;
305 if (i > 0) {
306 memcpy(p, sep, seplen);
307 p += seplen;
308 reslen += seplen;
310 memcpy(p, PyString_AS_STRING(item), slen);
311 p += slen;
312 reslen += slen;
313 Py_DECREF(item);
315 if (_PyString_Resize(&res, reslen)) {
316 Py_DECREF(res);
317 res = NULL;
319 return res;
323 static char find__doc__[] =
324 "find(s, sub [,start [,end]]) -> in\n\
326 Return the lowest index in s where substring sub is found,\n\
327 such that sub is contained within s[start,end]. Optional\n\
328 arguments start and end are interpreted as in slice notation.\n\
330 Return -1 on failure.";
332 static PyObject *
333 strop_find(self, args)
334 PyObject *self; /* Not used */
335 PyObject *args;
337 char *s, *sub;
338 int len, n, i = 0, last = INT_MAX;
340 if (!PyArg_ParseTuple(args, "s#s#|ii", &s, &len, &sub, &n, &i, &last))
341 return NULL;
343 if (last > len)
344 last = len;
345 if (last < 0)
346 last += len;
347 if (last < 0)
348 last = 0;
349 if (i < 0)
350 i += len;
351 if (i < 0)
352 i = 0;
354 if (n == 0 && i <= last)
355 return PyInt_FromLong((long)i);
357 last -= n;
358 for (; i <= last; ++i)
359 if (s[i] == sub[0] &&
360 (n == 1 || memcmp(&s[i+1], &sub[1], n-1) == 0))
361 return PyInt_FromLong((long)i);
363 return PyInt_FromLong(-1L);
367 static char rfind__doc__[] =
368 "rfind(s, sub [,start [,end]]) -> int\n\
370 Return the highest index in s where substring sub is found,\n\
371 such that sub is contained within s[start,end]. Optional\n\
372 arguments start and end are interpreted as in slice notation.\n\
374 Return -1 on failure.";
376 static PyObject *
377 strop_rfind(self, args)
378 PyObject *self; /* Not used */
379 PyObject *args;
381 char *s, *sub;
382 int len, n, j;
383 int i = 0, last = INT_MAX;
385 if (!PyArg_ParseTuple(args, "s#s#|ii", &s, &len, &sub, &n, &i, &last))
386 return NULL;
388 if (last > len)
389 last = len;
390 if (last < 0)
391 last += len;
392 if (last < 0)
393 last = 0;
394 if (i < 0)
395 i += len;
396 if (i < 0)
397 i = 0;
399 if (n == 0 && i <= last)
400 return PyInt_FromLong((long)last);
402 for (j = last-n; j >= i; --j)
403 if (s[j] == sub[0] &&
404 (n == 1 || memcmp(&s[j+1], &sub[1], n-1) == 0))
405 return PyInt_FromLong((long)j);
407 return PyInt_FromLong(-1L);
411 static PyObject *
412 do_strip(args, striptype)
413 PyObject *args;
414 int striptype;
416 char *s;
417 int len, i, j;
420 if (!PyArg_Parse(args, "s#", &s, &len))
421 return NULL;
423 i = 0;
424 if (striptype != RIGHTSTRIP) {
425 while (i < len && isspace(Py_CHARMASK(s[i]))) {
426 i++;
430 j = len;
431 if (striptype != LEFTSTRIP) {
432 do {
433 j--;
434 } while (j >= i && isspace(Py_CHARMASK(s[j])));
435 j++;
438 if (i == 0 && j == len) {
439 Py_INCREF(args);
440 return args;
442 else
443 return PyString_FromStringAndSize(s+i, j-i);
447 static char strip__doc__[] =
448 "strip(s) -> string\n\
450 Return a copy of the string s with leading and trailing\n\
451 whitespace removed.";
453 static PyObject *
454 strop_strip(self, args)
455 PyObject *self; /* Not used */
456 PyObject *args;
458 return do_strip(args, BOTHSTRIP);
462 static char lstrip__doc__[] =
463 "lstrip(s) -> string\n\
465 Return a copy of the string s with leading whitespace removed.";
467 static PyObject *
468 strop_lstrip(self, args)
469 PyObject *self; /* Not used */
470 PyObject *args;
472 return do_strip(args, LEFTSTRIP);
476 static char rstrip__doc__[] =
477 "rstrip(s) -> string\n\
479 Return a copy of the string s with trailing whitespace removed.";
481 static PyObject *
482 strop_rstrip(self, args)
483 PyObject *self; /* Not used */
484 PyObject *args;
486 return do_strip(args, RIGHTSTRIP);
490 static char lower__doc__[] =
491 "lower(s) -> string\n\
493 Return a copy of the string s converted to lowercase.";
495 static PyObject *
496 strop_lower(self, args)
497 PyObject *self; /* Not used */
498 PyObject *args;
500 char *s, *s_new;
501 int i, n;
502 PyObject *new;
503 int changed;
505 if (!PyArg_Parse(args, "s#", &s, &n))
506 return NULL;
507 new = PyString_FromStringAndSize(NULL, n);
508 if (new == NULL)
509 return NULL;
510 s_new = PyString_AsString(new);
511 changed = 0;
512 for (i = 0; i < n; i++) {
513 int c = Py_CHARMASK(*s++);
514 if (isupper(c)) {
515 changed = 1;
516 *s_new = tolower(c);
517 } else
518 *s_new = c;
519 s_new++;
521 if (!changed) {
522 Py_DECREF(new);
523 Py_INCREF(args);
524 return args;
526 return new;
530 static char upper__doc__[] =
531 "upper(s) -> string\n\
533 Return a copy of the string s converted to uppercase.";
535 static PyObject *
536 strop_upper(self, args)
537 PyObject *self; /* Not used */
538 PyObject *args;
540 char *s, *s_new;
541 int i, n;
542 PyObject *new;
543 int changed;
545 if (!PyArg_Parse(args, "s#", &s, &n))
546 return NULL;
547 new = PyString_FromStringAndSize(NULL, n);
548 if (new == NULL)
549 return NULL;
550 s_new = PyString_AsString(new);
551 changed = 0;
552 for (i = 0; i < n; i++) {
553 int c = Py_CHARMASK(*s++);
554 if (islower(c)) {
555 changed = 1;
556 *s_new = toupper(c);
557 } else
558 *s_new = c;
559 s_new++;
561 if (!changed) {
562 Py_DECREF(new);
563 Py_INCREF(args);
564 return args;
566 return new;
570 static char capitalize__doc__[] =
571 "capitalize(s) -> string\n\
573 Return a copy of the string s with only its first character\n\
574 capitalized.";
576 static PyObject *
577 strop_capitalize(self, args)
578 PyObject *self; /* Not used */
579 PyObject *args;
581 char *s, *s_new;
582 int i, n;
583 PyObject *new;
584 int changed;
586 if (!PyArg_Parse(args, "s#", &s, &n))
587 return NULL;
588 new = PyString_FromStringAndSize(NULL, n);
589 if (new == NULL)
590 return NULL;
591 s_new = PyString_AsString(new);
592 changed = 0;
593 if (0 < n) {
594 int c = Py_CHARMASK(*s++);
595 if (islower(c)) {
596 changed = 1;
597 *s_new = toupper(c);
598 } else
599 *s_new = c;
600 s_new++;
602 for (i = 1; i < n; i++) {
603 int c = Py_CHARMASK(*s++);
604 if (isupper(c)) {
605 changed = 1;
606 *s_new = tolower(c);
607 } else
608 *s_new = c;
609 s_new++;
611 if (!changed) {
612 Py_DECREF(new);
613 Py_INCREF(args);
614 return args;
616 return new;
620 static char swapcase__doc__[] =
621 "swapcase(s) -> string\n\
623 Return a copy of the string s with upper case characters\n\
624 converted to lowercase and vice versa.";
626 static PyObject *
627 strop_swapcase(self, args)
628 PyObject *self; /* Not used */
629 PyObject *args;
631 char *s, *s_new;
632 int i, n;
633 PyObject *new;
634 int changed;
636 if (!PyArg_Parse(args, "s#", &s, &n))
637 return NULL;
638 new = PyString_FromStringAndSize(NULL, n);
639 if (new == NULL)
640 return NULL;
641 s_new = PyString_AsString(new);
642 changed = 0;
643 for (i = 0; i < n; i++) {
644 int c = Py_CHARMASK(*s++);
645 if (islower(c)) {
646 changed = 1;
647 *s_new = toupper(c);
649 else if (isupper(c)) {
650 changed = 1;
651 *s_new = tolower(c);
653 else
654 *s_new = c;
655 s_new++;
657 if (!changed) {
658 Py_DECREF(new);
659 Py_INCREF(args);
660 return args;
662 return new;
666 static char atoi__doc__[] =
667 "atoi(s [,base]) -> int\n\
669 Return the integer represented by the string s in the given\n\
670 base, which defaults to 10. The string s must consist of one\n\
671 or more digits, possibly preceded by a sign. If base is 0, it\n\
672 is chosen from the leading characters of s, 0 for octal, 0x or\n\
673 0X for hexadecimal. If base is 16, a preceding 0x or 0X is\n\
674 accepted.";
676 static PyObject *
677 strop_atoi(self, args)
678 PyObject *self; /* Not used */
679 PyObject *args;
681 extern long PyOS_strtol Py_PROTO((const char *, char **, int));
682 extern unsigned long
683 PyOS_strtoul Py_PROTO((const char *, char **, int));
684 char *s, *end;
685 int base = 10;
686 long x;
687 char buffer[256]; /* For errors */
689 if (!PyArg_ParseTuple(args, "s|i", &s, &base))
690 return NULL;
692 if ((base != 0 && base < 2) || base > 36) {
693 PyErr_SetString(PyExc_ValueError, "invalid base for atoi()");
694 return NULL;
697 while (*s && isspace(Py_CHARMASK(*s)))
698 s++;
699 errno = 0;
700 if (base == 0 && s[0] == '0')
701 x = (long) PyOS_strtoul(s, &end, base);
702 else
703 x = PyOS_strtol(s, &end, base);
704 if (end == s || !isxdigit(end[-1]))
705 goto bad;
706 while (*end && isspace(Py_CHARMASK(*end)))
707 end++;
708 if (*end != '\0') {
709 bad:
710 sprintf(buffer, "invalid literal for atoi(): %.200s", s);
711 PyErr_SetString(PyExc_ValueError, buffer);
712 return NULL;
714 else if (errno != 0) {
715 sprintf(buffer, "atoi() literal too large: %.200s", s);
716 PyErr_SetString(PyExc_ValueError, buffer);
717 return NULL;
719 return PyInt_FromLong(x);
723 static char atol__doc__[] =
724 "atol(s [,base]) -> long\n\
726 Return the long integer represented by the string s in the\n\
727 given base, which defaults to 10. The string s must consist\n\
728 of one or more digits, possibly preceded by a sign. If base\n\
729 is 0, it is chosen from the leading characters of s, 0 for\n\
730 octal, 0x or 0X for hexadecimal. If base is 16, a preceding\n\
731 0x or 0X is accepted. A trailing L or l is not accepted,\n\
732 unless base is 0.";
734 static PyObject *
735 strop_atol(self, args)
736 PyObject *self; /* Not used */
737 PyObject *args;
739 char *s, *end;
740 int base = 10;
741 PyObject *x;
742 char buffer[256]; /* For errors */
744 if (!PyArg_ParseTuple(args, "s|i", &s, &base))
745 return NULL;
747 if ((base != 0 && base < 2) || base > 36) {
748 PyErr_SetString(PyExc_ValueError, "invalid base for atol()");
749 return NULL;
752 while (*s && isspace(Py_CHARMASK(*s)))
753 s++;
754 if (s[0] == '\0') {
755 PyErr_SetString(PyExc_ValueError, "empty string for atol()");
756 return NULL;
758 x = PyLong_FromString(s, &end, base);
759 if (x == NULL)
760 return NULL;
761 if (base == 0 && (*end == 'l' || *end == 'L'))
762 end++;
763 while (*end && isspace(Py_CHARMASK(*end)))
764 end++;
765 if (*end != '\0') {
766 sprintf(buffer, "invalid literal for atol(): %.200s", s);
767 PyErr_SetString(PyExc_ValueError, buffer);
768 Py_DECREF(x);
769 return NULL;
771 return x;
775 static char atof__doc__[] =
776 "atof(s) -> float\n\
778 Return the floating point number represented by the string s.";
780 static PyObject *
781 strop_atof(self, args)
782 PyObject *self; /* Not used */
783 PyObject *args;
785 extern double strtod Py_PROTO((const char *, char **));
786 char *s, *end;
787 double x;
788 char buffer[256]; /* For errors */
790 if (!PyArg_ParseTuple(args, "s", &s))
791 return NULL;
792 while (*s && isspace(Py_CHARMASK(*s)))
793 s++;
794 if (s[0] == '\0') {
795 PyErr_SetString(PyExc_ValueError, "empty string for atof()");
796 return NULL;
798 errno = 0;
799 PyFPE_START_PROTECT("strop_atof", return 0)
800 x = strtod(s, &end);
801 PyFPE_END_PROTECT(x)
802 while (*end && isspace(Py_CHARMASK(*end)))
803 end++;
804 if (*end != '\0') {
805 sprintf(buffer, "invalid literal for atof(): %.200s", s);
806 PyErr_SetString(PyExc_ValueError, buffer);
807 return NULL;
809 else if (errno != 0) {
810 sprintf(buffer, "atof() literal too large: %.200s", s);
811 PyErr_SetString(PyExc_ValueError, buffer);
812 return NULL;
814 return PyFloat_FromDouble(x);
818 static char maketrans__doc__[] =
819 "maketrans(frm, to) -> string\n\
821 Return a translation table (a string of 256 bytes long)\n\
822 suitable for use in string.translate. The strings frm and to\n\
823 must be of the same length.";
825 static PyObject *
826 strop_maketrans(self, args)
827 PyObject *self; /* Not used */
828 PyObject *args;
830 unsigned char *c, *from=NULL, *to=NULL;
831 int i, fromlen=0, tolen=0;
832 PyObject *result;
834 if (!PyArg_ParseTuple(args, "s#s#", &from, &fromlen, &to, &tolen))
835 return NULL;
837 if (fromlen != tolen) {
838 PyErr_SetString(PyExc_ValueError,
839 "maketrans arguments must have same length");
840 return NULL;
843 result = PyString_FromStringAndSize((char *)NULL, 256);
844 if (result == NULL)
845 return NULL;
846 c = (unsigned char *) PyString_AS_STRING((PyStringObject *)result);
847 for (i = 0; i < 256; i++)
848 c[i]=(unsigned char)i;
849 for (i = 0; i < fromlen; i++)
850 c[from[i]]=to[i];
852 return result;
856 static char translate__doc__[] =
857 "translate(s,table [,deletechars]) -> string\n\
859 Return a copy of the string s, where all characters occurring\n\
860 in the optional argument deletechars are removed, and the\n\
861 remaining characters have been mapped through the given\n\
862 translation table, which must be a string of length 256.";
864 static PyObject *
865 strop_translate(self, args)
866 PyObject *self;
867 PyObject *args;
869 register char *input, *table, *output;
870 register int i, c, changed = 0;
871 PyObject *input_obj;
872 char *table1, *output_start, *del_table=NULL;
873 int inlen, tablen, dellen = 0;
874 PyObject *result;
875 int trans_table[256];
877 if (!PyArg_ParseTuple(args, "Ss#|s#", &input_obj,
878 &table1, &tablen, &del_table, &dellen))
879 return NULL;
880 if (tablen != 256) {
881 PyErr_SetString(PyExc_ValueError,
882 "translation table must be 256 characters long");
883 return NULL;
886 table = table1;
887 inlen = PyString_Size(input_obj);
888 result = PyString_FromStringAndSize((char *)NULL, inlen);
889 if (result == NULL)
890 return NULL;
891 output_start = output = PyString_AsString(result);
892 input = PyString_AsString(input_obj);
894 if (dellen == 0) {
895 /* If no deletions are required, use faster code */
896 for (i = inlen; --i >= 0; ) {
897 c = Py_CHARMASK(*input++);
898 if (Py_CHARMASK((*output++ = table[c])) != c)
899 changed = 1;
901 if (changed)
902 return result;
903 Py_DECREF(result);
904 Py_INCREF(input_obj);
905 return input_obj;
908 for (i = 0; i < 256; i++)
909 trans_table[i] = Py_CHARMASK(table[i]);
911 for (i = 0; i < dellen; i++)
912 trans_table[(int) Py_CHARMASK(del_table[i])] = -1;
914 for (i = inlen; --i >= 0; ) {
915 c = Py_CHARMASK(*input++);
916 if (trans_table[c] != -1)
917 if (Py_CHARMASK(*output++ = (char)trans_table[c]) == c)
918 continue;
919 changed = 1;
921 if (!changed) {
922 Py_DECREF(result);
923 Py_INCREF(input_obj);
924 return input_obj;
926 /* Fix the size of the resulting string */
927 if (inlen > 0 &&_PyString_Resize(&result, output-output_start))
928 return NULL;
929 return result;
933 /* What follows is used for implementing replace(). Perry Stoll. */
936 mymemfind
938 strstr replacement for arbitrary blocks of memory.
940 Locates the first occurance in the memory pointed to by MEM of the
941 contents of memory pointed to by PAT. Returns the index into MEM if
942 found, or -1 if not found. If len of PAT is greater than length of
943 MEM, the function returns -1.
945 static int mymemfind(mem, len, pat, pat_len)
946 char *mem;
947 int len;
948 char *pat;
949 int pat_len;
951 register int ii;
953 /* pattern can not occur in the last pat_len-1 chars */
954 len -= pat_len;
956 for (ii = 0; ii <= len; ii++) {
957 if (mem[ii] == pat[0] &&
958 (pat_len == 1 ||
959 memcmp(&mem[ii+1], &pat[1], pat_len-1) == 0)) {
960 return ii;
963 return -1;
967 mymemcnt
969 Return the number of distinct times PAT is found in MEM.
970 meaning mem=1111 and pat==11 returns 2.
971 mem=11111 and pat==11 also return 2.
973 static int mymemcnt(mem, len, pat, pat_len)
974 char *mem;
975 int len;
976 char *pat;
977 int pat_len;
979 register int offset = 0;
980 int nfound = 0;
982 while (len >= 0) {
983 offset = mymemfind(mem, len, pat, pat_len);
984 if (offset == -1)
985 break;
986 mem += offset + pat_len;
987 len -= offset + pat_len;
988 nfound++;
990 return nfound;
994 mymemreplace
996 Return a string in which all occurences of PAT in memory STR are
997 replaced with SUB.
999 If length of PAT is less than length of STR or there are no occurences
1000 of PAT in STR, then the original string is returned. Otherwise, a new
1001 string is allocated here and returned.
1003 on return, out_len is:
1004 the length of output string, or
1005 -1 if the input string is returned, or
1006 unchanged if an error occurs (no memory).
1008 return value is:
1009 the new string allocated locally, or
1010 NULL if an error occurred.
1012 static char *mymemreplace(str, len, pat, pat_len, sub, sub_len, count, out_len)
1013 char *str;
1014 int len; /* input string */
1015 char *pat;
1016 int pat_len; /* pattern string to find */
1017 char *sub;
1018 int sub_len; /* substitution string */
1019 int count; /* number of replacements, 0 == all */
1020 int *out_len;
1023 char *out_s;
1024 char *new_s;
1025 int nfound, offset, new_len;
1027 if (len == 0 || pat_len > len)
1028 goto return_same;
1030 /* find length of output string */
1031 nfound = mymemcnt(str, len, pat, pat_len);
1032 if (count > 0)
1033 nfound = nfound > count ? count : nfound;
1034 if (nfound == 0)
1035 goto return_same;
1036 new_len = len + nfound*(sub_len - pat_len);
1038 new_s = (char *)malloc(new_len);
1039 if (new_s == NULL) return NULL;
1041 *out_len = new_len;
1042 out_s = new_s;
1044 while (len > 0) {
1045 /* find index of next instance of pattern */
1046 offset = mymemfind(str, len, pat, pat_len);
1047 /* if not found, break out of loop */
1048 if (offset == -1) break;
1050 /* copy non matching part of input string */
1051 memcpy(new_s, str, offset); /* copy part of str before pat */
1052 str += offset + pat_len; /* move str past pattern */
1053 len -= offset + pat_len; /* reduce length of str remaining */
1055 /* copy substitute into the output string */
1056 new_s += offset; /* move new_s to dest for sub string */
1057 memcpy(new_s, sub, sub_len); /* copy substring into new_s */
1058 new_s += sub_len; /* offset new_s past sub string */
1060 /* break when we've done count replacements */
1061 if (--count == 0) break;
1063 /* copy any remaining values into output string */
1064 if (len > 0)
1065 memcpy(new_s, str, len);
1066 return out_s;
1068 return_same:
1069 *out_len = -1;
1070 return str;
1074 static char replace__doc__[] =
1075 "replace (str, old, new[, maxsplit]) -> string\n\
1077 Return a copy of string str with all occurrences of substring\n\
1078 old replaced by new. If the optional argument maxsplit is\n\
1079 given, only the first maxsplit occurrences are replaced.";
1081 static PyObject *
1082 strop_replace(self, args)
1083 PyObject *self; /* Not used */
1084 PyObject *args;
1086 char *str, *pat,*sub,*new_s;
1087 int len,pat_len,sub_len,out_len;
1088 int count = 0;
1089 PyObject *new;
1091 if (!PyArg_ParseTuple(args, "s#s#s#|i",
1092 &str, &len, &pat, &pat_len, &sub, &sub_len,
1093 &count))
1094 return NULL;
1095 if (pat_len <= 0) {
1096 PyErr_SetString(PyExc_ValueError, "empty pattern string");
1097 return NULL;
1099 new_s = mymemreplace(str,len,pat,pat_len,sub,sub_len,count,&out_len);
1100 if (new_s == NULL) {
1101 PyErr_NoMemory();
1102 return NULL;
1104 if (out_len == -1) {
1105 /* we're returning another reference to the input string */
1106 new = PyTuple_GetItem(args, 0);
1107 Py_XINCREF(new);
1109 else {
1110 new = PyString_FromStringAndSize(new_s, out_len);
1111 free(new_s);
1113 return new;
1117 /* List of functions defined in the module */
1119 static PyMethodDef
1120 strop_methods[] = {
1121 {"atof", strop_atof, 1, atof__doc__},
1122 {"atoi", strop_atoi, 1, atoi__doc__},
1123 {"atol", strop_atol, 1, atol__doc__},
1124 {"capitalize", strop_capitalize, 0, capitalize__doc__},
1125 {"find", strop_find, 1, find__doc__},
1126 {"join", strop_joinfields, 1, joinfields__doc__},
1127 {"joinfields", strop_joinfields, 1, joinfields__doc__},
1128 {"lstrip", strop_lstrip, 0, lstrip__doc__},
1129 {"lower", strop_lower, 0, lower__doc__},
1130 {"maketrans", strop_maketrans, 1, maketrans__doc__},
1131 {"replace", strop_replace, 1, replace__doc__},
1132 {"rfind", strop_rfind, 1, rfind__doc__},
1133 {"rstrip", strop_rstrip, 0, rstrip__doc__},
1134 {"split", strop_splitfields, 1, splitfields__doc__},
1135 {"splitfields", strop_splitfields, 1, splitfields__doc__},
1136 {"strip", strop_strip, 0, strip__doc__},
1137 {"swapcase", strop_swapcase, 0, swapcase__doc__},
1138 {"translate", strop_translate, 1, translate__doc__},
1139 {"upper", strop_upper, 0, upper__doc__},
1140 {NULL, NULL} /* sentinel */
1144 void
1145 initstrop()
1147 PyObject *m, *d, *s;
1148 char buf[256];
1149 int c, n;
1150 m = Py_InitModule4("strop", strop_methods, strop_module__doc__,
1151 (PyObject*)NULL, PYTHON_API_VERSION);
1152 d = PyModule_GetDict(m);
1154 /* Create 'whitespace' object */
1155 n = 0;
1156 for (c = 0; c < 256; c++) {
1157 if (isspace(c))
1158 buf[n++] = c;
1160 s = PyString_FromStringAndSize(buf, n);
1161 if (s) {
1162 PyDict_SetItemString(d, "whitespace", s);
1163 Py_DECREF(s);
1165 /* Create 'lowercase' object */
1166 n = 0;
1167 for (c = 0; c < 256; c++) {
1168 if (islower(c))
1169 buf[n++] = c;
1171 s = PyString_FromStringAndSize(buf, n);
1172 if (s) {
1173 PyDict_SetItemString(d, "lowercase", s);
1174 Py_DECREF(s);
1177 /* Create 'uppercase' object */
1178 n = 0;
1179 for (c = 0; c < 256; c++) {
1180 if (isupper(c))
1181 buf[n++] = c;
1183 s = PyString_FromStringAndSize(buf, n);
1184 if (s) {
1185 PyDict_SetItemString(d, "uppercase", s);
1186 Py_DECREF(s);
1189 if (PyErr_Occurred())
1190 Py_FatalError("can't initialize module strop");