Updated for hfsplus module, new gusi libs.
[python/dscho.git] / Modules / stropmodule.c
blob54d444f188bde760744a50b457ab4dc17eaccbef
1 /* strop module */
3 static char strop_module__doc__[] =
4 "Common string manipulations, optimized for speed.\n"
5 "\n"
6 "Always use \"import string\" rather than referencing\n"
7 "this module directly.";
9 #include "Python.h"
11 #include <ctype.h>
12 /* XXX This file assumes that the <ctype.h> is*() functions
13 XXX are defined for all 8-bit characters! */
15 #define WARN if (PyErr_Warn(PyExc_DeprecationWarning, \
16 "strop functions are obsolete; use string methods")) \
17 return NULL
19 /* The lstrip(), rstrip() and strip() functions are implemented
20 in do_strip(), which uses an additional parameter to indicate what
21 type of strip should occur. */
23 #define LEFTSTRIP 0
24 #define RIGHTSTRIP 1
25 #define BOTHSTRIP 2
28 static PyObject *
29 split_whitespace(char *s, int len, int maxsplit)
31 int i = 0, j, err;
32 int countsplit = 0;
33 PyObject* item;
34 PyObject *list = PyList_New(0);
36 if (list == NULL)
37 return NULL;
39 while (i < len) {
40 while (i < len && isspace(Py_CHARMASK(s[i]))) {
41 i = i+1;
43 j = i;
44 while (i < len && !isspace(Py_CHARMASK(s[i]))) {
45 i = i+1;
47 if (j < i) {
48 item = PyString_FromStringAndSize(s+j, (int)(i-j));
49 if (item == NULL)
50 goto finally;
52 err = PyList_Append(list, item);
53 Py_DECREF(item);
54 if (err < 0)
55 goto finally;
57 countsplit++;
58 while (i < len && isspace(Py_CHARMASK(s[i]))) {
59 i = i+1;
61 if (maxsplit && (countsplit >= maxsplit) && i < len) {
62 item = PyString_FromStringAndSize(
63 s+i, (int)(len - i));
64 if (item == NULL)
65 goto finally;
67 err = PyList_Append(list, item);
68 Py_DECREF(item);
69 if (err < 0)
70 goto finally;
72 i = len;
76 return list;
77 finally:
78 Py_DECREF(list);
79 return NULL;
83 static char splitfields__doc__[] =
84 "split(s [,sep [,maxsplit]]) -> list of strings\n"
85 "splitfields(s [,sep [,maxsplit]]) -> list of strings\n"
86 "\n"
87 "Return a list of the words in the string s, using sep as the\n"
88 "delimiter string. If maxsplit is nonzero, splits into at most\n"
89 "maxsplit words. If sep is not specified, any whitespace string\n"
90 "is a separator. Maxsplit defaults to 0.\n"
91 "\n"
92 "(split and splitfields are synonymous)";
94 static PyObject *
95 strop_splitfields(PyObject *self, PyObject *args)
97 int len, n, i, j, err;
98 int splitcount, maxsplit;
99 char *s, *sub;
100 PyObject *list, *item;
102 WARN;
103 sub = NULL;
104 n = 0;
105 splitcount = 0;
106 maxsplit = 0;
107 if (!PyArg_ParseTuple(args, "t#|z#i:split", &s, &len, &sub, &n, &maxsplit))
108 return NULL;
109 if (sub == NULL)
110 return split_whitespace(s, len, maxsplit);
111 if (n == 0) {
112 PyErr_SetString(PyExc_ValueError, "empty separator");
113 return NULL;
116 list = PyList_New(0);
117 if (list == NULL)
118 return NULL;
120 i = j = 0;
121 while (i+n <= len) {
122 if (s[i] == sub[0] && (n == 1 || memcmp(s+i, sub, n) == 0)) {
123 item = PyString_FromStringAndSize(s+j, (int)(i-j));
124 if (item == NULL)
125 goto fail;
126 err = PyList_Append(list, item);
127 Py_DECREF(item);
128 if (err < 0)
129 goto fail;
130 i = j = i + n;
131 splitcount++;
132 if (maxsplit && (splitcount >= maxsplit))
133 break;
135 else
136 i++;
138 item = PyString_FromStringAndSize(s+j, (int)(len-j));
139 if (item == NULL)
140 goto fail;
141 err = PyList_Append(list, item);
142 Py_DECREF(item);
143 if (err < 0)
144 goto fail;
146 return list;
148 fail:
149 Py_DECREF(list);
150 return NULL;
154 static char joinfields__doc__[] =
155 "join(list [,sep]) -> string\n"
156 "joinfields(list [,sep]) -> string\n"
157 "\n"
158 "Return a string composed of the words in list, with\n"
159 "intervening occurrences of sep. Sep defaults to a single\n"
160 "space.\n"
161 "\n"
162 "(join and joinfields are synonymous)";
164 static PyObject *
165 strop_joinfields(PyObject *self, PyObject *args)
167 PyObject *seq;
168 char *sep = NULL;
169 int seqlen, seplen = 0;
170 int i, reslen = 0, slen = 0, sz = 100;
171 PyObject *res = NULL;
172 char* p = NULL;
173 intargfunc getitemfunc;
175 WARN;
176 if (!PyArg_ParseTuple(args, "O|t#:join", &seq, &sep, &seplen))
177 return NULL;
178 if (sep == NULL) {
179 sep = " ";
180 seplen = 1;
183 seqlen = PySequence_Size(seq);
184 if (seqlen < 0 && PyErr_Occurred())
185 return NULL;
187 if (seqlen == 1) {
188 /* Optimization if there's only one item */
189 PyObject *item = PySequence_GetItem(seq, 0);
190 if (item && !PyString_Check(item)) {
191 PyErr_SetString(PyExc_TypeError,
192 "first argument must be sequence of strings");
193 Py_DECREF(item);
194 return NULL;
196 return item;
199 if (!(res = PyString_FromStringAndSize((char*)NULL, sz)))
200 return NULL;
201 p = PyString_AsString(res);
203 /* optimize for lists, since it's the most common case. all others
204 * (tuples and arbitrary sequences) just use the sequence abstract
205 * interface.
207 if (PyList_Check(seq)) {
208 for (i = 0; i < seqlen; i++) {
209 PyObject *item = PyList_GET_ITEM(seq, i);
210 if (!PyString_Check(item)) {
211 PyErr_SetString(PyExc_TypeError,
212 "first argument must be sequence of strings");
213 Py_DECREF(res);
214 return NULL;
216 slen = PyString_GET_SIZE(item);
217 while (reslen + slen + seplen >= sz) {
218 if (_PyString_Resize(&res, sz * 2)) {
219 Py_DECREF(res);
220 return NULL;
222 sz *= 2;
223 p = PyString_AsString(res) + reslen;
225 if (i > 0) {
226 memcpy(p, sep, seplen);
227 p += seplen;
228 reslen += seplen;
230 memcpy(p, PyString_AS_STRING(item), slen);
231 p += slen;
232 reslen += slen;
234 if (_PyString_Resize(&res, reslen)) {
235 Py_DECREF(res);
236 res = NULL;
238 return res;
241 if (seq->ob_type->tp_as_sequence == NULL ||
242 (getitemfunc = seq->ob_type->tp_as_sequence->sq_item) == NULL)
244 PyErr_SetString(PyExc_TypeError,
245 "first argument must be a sequence");
246 return NULL;
248 /* This is now type safe */
249 for (i = 0; i < seqlen; i++) {
250 PyObject *item = getitemfunc(seq, i);
251 if (!item || !PyString_Check(item)) {
252 PyErr_SetString(PyExc_TypeError,
253 "first argument must be sequence of strings");
254 Py_DECREF(res);
255 Py_XDECREF(item);
256 return NULL;
258 slen = PyString_GET_SIZE(item);
259 while (reslen + slen + seplen >= sz) {
260 if (_PyString_Resize(&res, sz * 2)) {
261 Py_DECREF(res);
262 Py_DECREF(item);
263 return NULL;
265 sz *= 2;
266 p = PyString_AsString(res) + reslen;
268 if (i > 0) {
269 memcpy(p, sep, seplen);
270 p += seplen;
271 reslen += seplen;
273 memcpy(p, PyString_AS_STRING(item), slen);
274 p += slen;
275 reslen += slen;
276 Py_DECREF(item);
278 if (_PyString_Resize(&res, reslen)) {
279 Py_DECREF(res);
280 res = NULL;
282 return res;
286 static char find__doc__[] =
287 "find(s, sub [,start [,end]]) -> in\n"
288 "\n"
289 "Return the lowest index in s where substring sub is found,\n"
290 "such that sub is contained within s[start,end]. Optional\n"
291 "arguments start and end are interpreted as in slice notation.\n"
292 "\n"
293 "Return -1 on failure.";
295 static PyObject *
296 strop_find(PyObject *self, PyObject *args)
298 char *s, *sub;
299 int len, n, i = 0, last = INT_MAX;
301 WARN;
302 if (!PyArg_ParseTuple(args, "t#t#|ii:find", &s, &len, &sub, &n, &i, &last))
303 return NULL;
305 if (last > len)
306 last = len;
307 if (last < 0)
308 last += len;
309 if (last < 0)
310 last = 0;
311 if (i < 0)
312 i += len;
313 if (i < 0)
314 i = 0;
316 if (n == 0 && i <= last)
317 return PyInt_FromLong((long)i);
319 last -= n;
320 for (; i <= last; ++i)
321 if (s[i] == sub[0] &&
322 (n == 1 || memcmp(&s[i+1], &sub[1], n-1) == 0))
323 return PyInt_FromLong((long)i);
325 return PyInt_FromLong(-1L);
329 static char rfind__doc__[] =
330 "rfind(s, sub [,start [,end]]) -> int\n"
331 "\n"
332 "Return the highest index in s where substring sub is found,\n"
333 "such that sub is contained within s[start,end]. Optional\n"
334 "arguments start and end are interpreted as in slice notation.\n"
335 "\n"
336 "Return -1 on failure.";
338 static PyObject *
339 strop_rfind(PyObject *self, PyObject *args)
341 char *s, *sub;
342 int len, n, j;
343 int i = 0, last = INT_MAX;
345 WARN;
346 if (!PyArg_ParseTuple(args, "t#t#|ii:rfind", &s, &len, &sub, &n, &i, &last))
347 return NULL;
349 if (last > len)
350 last = len;
351 if (last < 0)
352 last += len;
353 if (last < 0)
354 last = 0;
355 if (i < 0)
356 i += len;
357 if (i < 0)
358 i = 0;
360 if (n == 0 && i <= last)
361 return PyInt_FromLong((long)last);
363 for (j = last-n; j >= i; --j)
364 if (s[j] == sub[0] &&
365 (n == 1 || memcmp(&s[j+1], &sub[1], n-1) == 0))
366 return PyInt_FromLong((long)j);
368 return PyInt_FromLong(-1L);
372 static PyObject *
373 do_strip(PyObject *args, int striptype)
375 char *s;
376 int len, i, j;
379 if (!PyArg_Parse(args, "t#", &s, &len))
380 return NULL;
382 i = 0;
383 if (striptype != RIGHTSTRIP) {
384 while (i < len && isspace(Py_CHARMASK(s[i]))) {
385 i++;
389 j = len;
390 if (striptype != LEFTSTRIP) {
391 do {
392 j--;
393 } while (j >= i && isspace(Py_CHARMASK(s[j])));
394 j++;
397 if (i == 0 && j == len) {
398 Py_INCREF(args);
399 return args;
401 else
402 return PyString_FromStringAndSize(s+i, j-i);
406 static char strip__doc__[] =
407 "strip(s) -> string\n"
408 "\n"
409 "Return a copy of the string s with leading and trailing\n"
410 "whitespace removed.";
412 static PyObject *
413 strop_strip(PyObject *self, PyObject *args)
415 WARN;
416 return do_strip(args, BOTHSTRIP);
420 static char lstrip__doc__[] =
421 "lstrip(s) -> string\n"
422 "\n"
423 "Return a copy of the string s with leading whitespace removed.";
425 static PyObject *
426 strop_lstrip(PyObject *self, PyObject *args)
428 WARN;
429 return do_strip(args, LEFTSTRIP);
433 static char rstrip__doc__[] =
434 "rstrip(s) -> string\n"
435 "\n"
436 "Return a copy of the string s with trailing whitespace removed.";
438 static PyObject *
439 strop_rstrip(PyObject *self, PyObject *args)
441 WARN;
442 return do_strip(args, RIGHTSTRIP);
446 static char lower__doc__[] =
447 "lower(s) -> string\n"
448 "\n"
449 "Return a copy of the string s converted to lowercase.";
451 static PyObject *
452 strop_lower(PyObject *self, PyObject *args)
454 char *s, *s_new;
455 int i, n;
456 PyObject *new;
457 int changed;
459 WARN;
460 if (!PyArg_Parse(args, "t#", &s, &n))
461 return NULL;
462 new = PyString_FromStringAndSize(NULL, n);
463 if (new == NULL)
464 return NULL;
465 s_new = PyString_AsString(new);
466 changed = 0;
467 for (i = 0; i < n; i++) {
468 int c = Py_CHARMASK(*s++);
469 if (isupper(c)) {
470 changed = 1;
471 *s_new = tolower(c);
472 } else
473 *s_new = c;
474 s_new++;
476 if (!changed) {
477 Py_DECREF(new);
478 Py_INCREF(args);
479 return args;
481 return new;
485 static char upper__doc__[] =
486 "upper(s) -> string\n"
487 "\n"
488 "Return a copy of the string s converted to uppercase.";
490 static PyObject *
491 strop_upper(PyObject *self, PyObject *args)
493 char *s, *s_new;
494 int i, n;
495 PyObject *new;
496 int changed;
498 WARN;
499 if (!PyArg_Parse(args, "t#", &s, &n))
500 return NULL;
501 new = PyString_FromStringAndSize(NULL, n);
502 if (new == NULL)
503 return NULL;
504 s_new = PyString_AsString(new);
505 changed = 0;
506 for (i = 0; i < n; i++) {
507 int c = Py_CHARMASK(*s++);
508 if (islower(c)) {
509 changed = 1;
510 *s_new = toupper(c);
511 } else
512 *s_new = c;
513 s_new++;
515 if (!changed) {
516 Py_DECREF(new);
517 Py_INCREF(args);
518 return args;
520 return new;
524 static char capitalize__doc__[] =
525 "capitalize(s) -> string\n"
526 "\n"
527 "Return a copy of the string s with only its first character\n"
528 "capitalized.";
530 static PyObject *
531 strop_capitalize(PyObject *self, PyObject *args)
533 char *s, *s_new;
534 int i, n;
535 PyObject *new;
536 int changed;
538 WARN;
539 if (!PyArg_Parse(args, "t#", &s, &n))
540 return NULL;
541 new = PyString_FromStringAndSize(NULL, n);
542 if (new == NULL)
543 return NULL;
544 s_new = PyString_AsString(new);
545 changed = 0;
546 if (0 < n) {
547 int c = Py_CHARMASK(*s++);
548 if (islower(c)) {
549 changed = 1;
550 *s_new = toupper(c);
551 } else
552 *s_new = c;
553 s_new++;
555 for (i = 1; i < n; i++) {
556 int c = Py_CHARMASK(*s++);
557 if (isupper(c)) {
558 changed = 1;
559 *s_new = tolower(c);
560 } else
561 *s_new = c;
562 s_new++;
564 if (!changed) {
565 Py_DECREF(new);
566 Py_INCREF(args);
567 return args;
569 return new;
573 static char expandtabs__doc__[] =
574 "expandtabs(string, [tabsize]) -> string\n"
575 "\n"
576 "Expand tabs in a string, i.e. replace them by one or more spaces,\n"
577 "depending on the current column and the given tab size (default 8).\n"
578 "The column number is reset to zero after each newline occurring in the\n"
579 "string. This doesn't understand other non-printing characters.";
581 static PyObject *
582 strop_expandtabs(PyObject *self, PyObject *args)
584 /* Original by Fredrik Lundh */
585 char* e;
586 char* p;
587 char* q;
588 int i, j;
589 PyObject* out;
590 char* string;
591 int stringlen;
592 int tabsize = 8;
594 WARN;
595 /* Get arguments */
596 if (!PyArg_ParseTuple(args, "s#|i:expandtabs", &string, &stringlen, &tabsize))
597 return NULL;
598 if (tabsize < 1) {
599 PyErr_SetString(PyExc_ValueError,
600 "tabsize must be at least 1");
601 return NULL;
604 /* First pass: determine size of output string */
605 i = j = 0; /* j: current column; i: total of previous lines */
606 e = string + stringlen;
607 for (p = string; p < e; p++) {
608 if (*p == '\t')
609 j += tabsize - (j%tabsize);
610 else {
611 j++;
612 if (*p == '\n') {
613 i += j;
614 j = 0;
619 /* Second pass: create output string and fill it */
620 out = PyString_FromStringAndSize(NULL, i+j);
621 if (out == NULL)
622 return NULL;
624 i = 0;
625 q = PyString_AS_STRING(out);
627 for (p = string; p < e; p++) {
628 if (*p == '\t') {
629 j = tabsize - (i%tabsize);
630 i += j;
631 while (j-- > 0)
632 *q++ = ' ';
633 } else {
634 *q++ = *p;
635 i++;
636 if (*p == '\n')
637 i = 0;
641 return out;
645 static char count__doc__[] =
646 "count(s, sub[, start[, end]]) -> int\n"
647 "\n"
648 "Return the number of occurrences of substring sub in string\n"
649 "s[start:end]. Optional arguments start and end are\n"
650 "interpreted as in slice notation.";
652 static PyObject *
653 strop_count(PyObject *self, PyObject *args)
655 char *s, *sub;
656 int len, n;
657 int i = 0, last = INT_MAX;
658 int m, r;
660 WARN;
661 if (!PyArg_ParseTuple(args, "t#t#|ii:count", &s, &len, &sub, &n, &i, &last))
662 return NULL;
663 if (last > len)
664 last = len;
665 if (last < 0)
666 last += len;
667 if (last < 0)
668 last = 0;
669 if (i < 0)
670 i += len;
671 if (i < 0)
672 i = 0;
673 m = last + 1 - n;
674 if (n == 0)
675 return PyInt_FromLong((long) (m-i));
677 r = 0;
678 while (i < m) {
679 if (!memcmp(s+i, sub, n)) {
680 r++;
681 i += n;
682 } else {
683 i++;
686 return PyInt_FromLong((long) r);
690 static char swapcase__doc__[] =
691 "swapcase(s) -> string\n"
692 "\n"
693 "Return a copy of the string s with upper case characters\n"
694 "converted to lowercase and vice versa.";
696 static PyObject *
697 strop_swapcase(PyObject *self, PyObject *args)
699 char *s, *s_new;
700 int i, n;
701 PyObject *new;
702 int changed;
704 WARN;
705 if (!PyArg_Parse(args, "t#", &s, &n))
706 return NULL;
707 new = PyString_FromStringAndSize(NULL, n);
708 if (new == NULL)
709 return NULL;
710 s_new = PyString_AsString(new);
711 changed = 0;
712 for (i = 0; i < n; i++) {
713 int c = Py_CHARMASK(*s++);
714 if (islower(c)) {
715 changed = 1;
716 *s_new = toupper(c);
718 else if (isupper(c)) {
719 changed = 1;
720 *s_new = tolower(c);
722 else
723 *s_new = c;
724 s_new++;
726 if (!changed) {
727 Py_DECREF(new);
728 Py_INCREF(args);
729 return args;
731 return new;
735 static char atoi__doc__[] =
736 "atoi(s [,base]) -> int\n"
737 "\n"
738 "Return the integer represented by the string s in the given\n"
739 "base, which defaults to 10. The string s must consist of one\n"
740 "or more digits, possibly preceded by a sign. If base is 0, it\n"
741 "is chosen from the leading characters of s, 0 for octal, 0x or\n"
742 "0X for hexadecimal. If base is 16, a preceding 0x or 0X is\n"
743 "accepted.";
745 static PyObject *
746 strop_atoi(PyObject *self, PyObject *args)
748 char *s, *end;
749 int base = 10;
750 long x;
751 char buffer[256]; /* For errors */
753 WARN;
754 if (!PyArg_ParseTuple(args, "s|i:atoi", &s, &base))
755 return NULL;
757 if ((base != 0 && base < 2) || base > 36) {
758 PyErr_SetString(PyExc_ValueError, "invalid base for atoi()");
759 return NULL;
762 while (*s && isspace(Py_CHARMASK(*s)))
763 s++;
764 errno = 0;
765 if (base == 0 && s[0] == '0')
766 x = (long) PyOS_strtoul(s, &end, base);
767 else
768 x = PyOS_strtol(s, &end, base);
769 if (end == s || !isalnum(end[-1]))
770 goto bad;
771 while (*end && isspace(Py_CHARMASK(*end)))
772 end++;
773 if (*end != '\0') {
774 bad:
775 PyOS_snprintf(buffer, sizeof(buffer),
776 "invalid literal for atoi(): %.200s", s);
777 PyErr_SetString(PyExc_ValueError, buffer);
778 return NULL;
780 else if (errno != 0) {
781 PyOS_snprintf(buffer, sizeof(buffer),
782 "atoi() literal too large: %.200s", s);
783 PyErr_SetString(PyExc_ValueError, buffer);
784 return NULL;
786 return PyInt_FromLong(x);
790 static char atol__doc__[] =
791 "atol(s [,base]) -> long\n"
792 "\n"
793 "Return the long integer represented by the string s in the\n"
794 "given base, which defaults to 10. The string s must consist\n"
795 "of one or more digits, possibly preceded by a sign. If base\n"
796 "is 0, it is chosen from the leading characters of s, 0 for\n"
797 "octal, 0x or 0X for hexadecimal. If base is 16, a preceding\n"
798 "0x or 0X is accepted. A trailing L or l is not accepted,\n"
799 "unless base is 0.";
801 static PyObject *
802 strop_atol(PyObject *self, PyObject *args)
804 char *s, *end;
805 int base = 10;
806 PyObject *x;
807 char buffer[256]; /* For errors */
809 WARN;
810 if (!PyArg_ParseTuple(args, "s|i:atol", &s, &base))
811 return NULL;
813 if ((base != 0 && base < 2) || base > 36) {
814 PyErr_SetString(PyExc_ValueError, "invalid base for atol()");
815 return NULL;
818 while (*s && isspace(Py_CHARMASK(*s)))
819 s++;
820 if (s[0] == '\0') {
821 PyErr_SetString(PyExc_ValueError, "empty string for atol()");
822 return NULL;
824 x = PyLong_FromString(s, &end, base);
825 if (x == NULL)
826 return NULL;
827 if (base == 0 && (*end == 'l' || *end == 'L'))
828 end++;
829 while (*end && isspace(Py_CHARMASK(*end)))
830 end++;
831 if (*end != '\0') {
832 PyOS_snprintf(buffer, sizeof(buffer),
833 "invalid literal for atol(): %.200s", s);
834 PyErr_SetString(PyExc_ValueError, buffer);
835 Py_DECREF(x);
836 return NULL;
838 return x;
842 static char atof__doc__[] =
843 "atof(s) -> float\n"
844 "\n"
845 "Return the floating point number represented by the string s.";
847 static PyObject *
848 strop_atof(PyObject *self, PyObject *args)
850 extern double strtod(const char *, char **);
851 char *s, *end;
852 double x;
853 char buffer[256]; /* For errors */
855 WARN;
856 if (!PyArg_ParseTuple(args, "s:atof", &s))
857 return NULL;
858 while (*s && isspace(Py_CHARMASK(*s)))
859 s++;
860 if (s[0] == '\0') {
861 PyErr_SetString(PyExc_ValueError, "empty string for atof()");
862 return NULL;
864 errno = 0;
865 PyFPE_START_PROTECT("strop_atof", return 0)
866 x = strtod(s, &end);
867 PyFPE_END_PROTECT(x)
868 while (*end && isspace(Py_CHARMASK(*end)))
869 end++;
870 if (*end != '\0') {
871 PyOS_snprintf(buffer, sizeof(buffer),
872 "invalid literal for atof(): %.200s", s);
873 PyErr_SetString(PyExc_ValueError, buffer);
874 return NULL;
876 else if (errno != 0) {
877 PyOS_snprintf(buffer, sizeof(buffer),
878 "atof() literal too large: %.200s", s);
879 PyErr_SetString(PyExc_ValueError, buffer);
880 return NULL;
882 return PyFloat_FromDouble(x);
886 static char maketrans__doc__[] =
887 "maketrans(frm, to) -> string\n"
888 "\n"
889 "Return a translation table (a string of 256 bytes long)\n"
890 "suitable for use in string.translate. The strings frm and to\n"
891 "must be of the same length.";
893 static PyObject *
894 strop_maketrans(PyObject *self, PyObject *args)
896 unsigned char *c, *from=NULL, *to=NULL;
897 int i, fromlen=0, tolen=0;
898 PyObject *result;
900 if (!PyArg_ParseTuple(args, "t#t#:maketrans", &from, &fromlen, &to, &tolen))
901 return NULL;
903 if (fromlen != tolen) {
904 PyErr_SetString(PyExc_ValueError,
905 "maketrans arguments must have same length");
906 return NULL;
909 result = PyString_FromStringAndSize((char *)NULL, 256);
910 if (result == NULL)
911 return NULL;
912 c = (unsigned char *) PyString_AS_STRING((PyStringObject *)result);
913 for (i = 0; i < 256; i++)
914 c[i]=(unsigned char)i;
915 for (i = 0; i < fromlen; i++)
916 c[from[i]]=to[i];
918 return result;
922 static char translate__doc__[] =
923 "translate(s,table [,deletechars]) -> string\n"
924 "\n"
925 "Return a copy of the string s, where all characters occurring\n"
926 "in the optional argument deletechars are removed, and the\n"
927 "remaining characters have been mapped through the given\n"
928 "translation table, which must be a string of length 256.";
930 static PyObject *
931 strop_translate(PyObject *self, PyObject *args)
933 register char *input, *table, *output;
934 register int i, c, changed = 0;
935 PyObject *input_obj;
936 char *table1, *output_start, *del_table=NULL;
937 int inlen, tablen, dellen = 0;
938 PyObject *result;
939 int trans_table[256];
941 WARN;
942 if (!PyArg_ParseTuple(args, "St#|t#:translate", &input_obj,
943 &table1, &tablen, &del_table, &dellen))
944 return NULL;
945 if (tablen != 256) {
946 PyErr_SetString(PyExc_ValueError,
947 "translation table must be 256 characters long");
948 return NULL;
951 table = table1;
952 inlen = PyString_Size(input_obj);
953 result = PyString_FromStringAndSize((char *)NULL, inlen);
954 if (result == NULL)
955 return NULL;
956 output_start = output = PyString_AsString(result);
957 input = PyString_AsString(input_obj);
959 if (dellen == 0) {
960 /* If no deletions are required, use faster code */
961 for (i = inlen; --i >= 0; ) {
962 c = Py_CHARMASK(*input++);
963 if (Py_CHARMASK((*output++ = table[c])) != c)
964 changed = 1;
966 if (changed)
967 return result;
968 Py_DECREF(result);
969 Py_INCREF(input_obj);
970 return input_obj;
973 for (i = 0; i < 256; i++)
974 trans_table[i] = Py_CHARMASK(table[i]);
976 for (i = 0; i < dellen; i++)
977 trans_table[(int) Py_CHARMASK(del_table[i])] = -1;
979 for (i = inlen; --i >= 0; ) {
980 c = Py_CHARMASK(*input++);
981 if (trans_table[c] != -1)
982 if (Py_CHARMASK(*output++ = (char)trans_table[c]) == c)
983 continue;
984 changed = 1;
986 if (!changed) {
987 Py_DECREF(result);
988 Py_INCREF(input_obj);
989 return input_obj;
991 /* Fix the size of the resulting string */
992 if (inlen > 0 &&_PyString_Resize(&result, output-output_start))
993 return NULL;
994 return result;
998 /* What follows is used for implementing replace(). Perry Stoll. */
1001 mymemfind
1003 strstr replacement for arbitrary blocks of memory.
1005 Locates the first occurrence in the memory pointed to by MEM of the
1006 contents of memory pointed to by PAT. Returns the index into MEM if
1007 found, or -1 if not found. If len of PAT is greater than length of
1008 MEM, the function returns -1.
1010 static int
1011 mymemfind(const char *mem, int len, const char *pat, int pat_len)
1013 register int ii;
1015 /* pattern can not occur in the last pat_len-1 chars */
1016 len -= pat_len;
1018 for (ii = 0; ii <= len; ii++) {
1019 if (mem[ii] == pat[0] &&
1020 (pat_len == 1 ||
1021 memcmp(&mem[ii+1], &pat[1], pat_len-1) == 0)) {
1022 return ii;
1025 return -1;
1029 mymemcnt
1031 Return the number of distinct times PAT is found in MEM.
1032 meaning mem=1111 and pat==11 returns 2.
1033 mem=11111 and pat==11 also return 2.
1035 static int
1036 mymemcnt(const char *mem, int len, const char *pat, int pat_len)
1038 register int offset = 0;
1039 int nfound = 0;
1041 while (len >= 0) {
1042 offset = mymemfind(mem, len, pat, pat_len);
1043 if (offset == -1)
1044 break;
1045 mem += offset + pat_len;
1046 len -= offset + pat_len;
1047 nfound++;
1049 return nfound;
1053 mymemreplace
1055 Return a string in which all occurrences of PAT in memory STR are
1056 replaced with SUB.
1058 If length of PAT is less than length of STR or there are no occurrences
1059 of PAT in STR, then the original string is returned. Otherwise, a new
1060 string is allocated here and returned.
1062 on return, out_len is:
1063 the length of output string, or
1064 -1 if the input string is returned, or
1065 unchanged if an error occurs (no memory).
1067 return value is:
1068 the new string allocated locally, or
1069 NULL if an error occurred.
1071 static char *
1072 mymemreplace(const char *str, int len, /* input string */
1073 const char *pat, int pat_len, /* pattern string to find */
1074 const char *sub, int sub_len, /* substitution string */
1075 int count, /* number of replacements */
1076 int *out_len)
1078 char *out_s;
1079 char *new_s;
1080 int nfound, offset, new_len;
1082 if (len == 0 || pat_len > len)
1083 goto return_same;
1085 /* find length of output string */
1086 nfound = mymemcnt(str, len, pat, pat_len);
1087 if (count < 0)
1088 count = INT_MAX;
1089 else if (nfound > count)
1090 nfound = count;
1091 if (nfound == 0)
1092 goto return_same;
1094 new_len = len + nfound*(sub_len - pat_len);
1095 if (new_len == 0) {
1096 /* Have to allocate something for the caller to free(). */
1097 out_s = (char *)PyMem_MALLOC(1);
1098 if (out_s == NULL)
1099 return NULL;
1100 out_s[0] = '\0';
1102 else {
1103 assert(new_len > 0);
1104 new_s = (char *)PyMem_MALLOC(new_len);
1105 if (new_s == NULL)
1106 return NULL;
1107 out_s = new_s;
1109 for (; count > 0 && len > 0; --count) {
1110 /* find index of next instance of pattern */
1111 offset = mymemfind(str, len, pat, pat_len);
1112 if (offset == -1)
1113 break;
1115 /* copy non matching part of input string */
1116 memcpy(new_s, str, offset);
1117 str += offset + pat_len;
1118 len -= offset + pat_len;
1120 /* copy substitute into the output string */
1121 new_s += offset;
1122 memcpy(new_s, sub, sub_len);
1123 new_s += sub_len;
1125 /* copy any remaining values into output string */
1126 if (len > 0)
1127 memcpy(new_s, str, len);
1129 *out_len = new_len;
1130 return out_s;
1132 return_same:
1133 *out_len = -1;
1134 return (char *)str; /* cast away const */
1138 static char replace__doc__[] =
1139 "replace (str, old, new[, maxsplit]) -> string\n"
1140 "\n"
1141 "Return a copy of string str with all occurrences of substring\n"
1142 "old replaced by new. If the optional argument maxsplit is\n"
1143 "given, only the first maxsplit occurrences are replaced.";
1145 static PyObject *
1146 strop_replace(PyObject *self, PyObject *args)
1148 char *str, *pat,*sub,*new_s;
1149 int len,pat_len,sub_len,out_len;
1150 int count = -1;
1151 PyObject *new;
1153 WARN;
1154 if (!PyArg_ParseTuple(args, "t#t#t#|i:replace",
1155 &str, &len, &pat, &pat_len, &sub, &sub_len,
1156 &count))
1157 return NULL;
1158 if (pat_len <= 0) {
1159 PyErr_SetString(PyExc_ValueError, "empty pattern string");
1160 return NULL;
1162 /* CAUTION: strop treats a replace count of 0 as infinity, unlke
1163 * current (2.1) string.py and string methods. Preserve this for
1164 * ... well, hard to say for what <wink>.
1166 if (count == 0)
1167 count = -1;
1168 new_s = mymemreplace(str,len,pat,pat_len,sub,sub_len,count,&out_len);
1169 if (new_s == NULL) {
1170 PyErr_NoMemory();
1171 return NULL;
1173 if (out_len == -1) {
1174 /* we're returning another reference to the input string */
1175 new = PyTuple_GetItem(args, 0);
1176 Py_XINCREF(new);
1178 else {
1179 new = PyString_FromStringAndSize(new_s, out_len);
1180 PyMem_FREE(new_s);
1182 return new;
1186 /* List of functions defined in the module */
1188 static PyMethodDef
1189 strop_methods[] = {
1190 {"atof", strop_atof, METH_VARARGS, atof__doc__},
1191 {"atoi", strop_atoi, METH_VARARGS, atoi__doc__},
1192 {"atol", strop_atol, METH_VARARGS, atol__doc__},
1193 {"capitalize", strop_capitalize, METH_OLDARGS, capitalize__doc__},
1194 {"count", strop_count, METH_VARARGS, count__doc__},
1195 {"expandtabs", strop_expandtabs, METH_VARARGS, expandtabs__doc__},
1196 {"find", strop_find, METH_VARARGS, find__doc__},
1197 {"join", strop_joinfields, METH_VARARGS, joinfields__doc__},
1198 {"joinfields", strop_joinfields, METH_VARARGS, joinfields__doc__},
1199 {"lstrip", strop_lstrip, METH_OLDARGS, lstrip__doc__},
1200 {"lower", strop_lower, METH_OLDARGS, lower__doc__},
1201 {"maketrans", strop_maketrans, METH_VARARGS, maketrans__doc__},
1202 {"replace", strop_replace, METH_VARARGS, replace__doc__},
1203 {"rfind", strop_rfind, METH_VARARGS, rfind__doc__},
1204 {"rstrip", strop_rstrip, METH_OLDARGS, rstrip__doc__},
1205 {"split", strop_splitfields, METH_VARARGS, splitfields__doc__},
1206 {"splitfields", strop_splitfields, METH_VARARGS, splitfields__doc__},
1207 {"strip", strop_strip, METH_OLDARGS, strip__doc__},
1208 {"swapcase", strop_swapcase, METH_OLDARGS, swapcase__doc__},
1209 {"translate", strop_translate, METH_VARARGS, translate__doc__},
1210 {"upper", strop_upper, METH_OLDARGS, upper__doc__},
1211 {NULL, NULL} /* sentinel */
1215 DL_EXPORT(void)
1216 initstrop(void)
1218 PyObject *m, *d, *s;
1219 char buf[256];
1220 int c, n;
1221 m = Py_InitModule4("strop", strop_methods, strop_module__doc__,
1222 (PyObject*)NULL, PYTHON_API_VERSION);
1223 d = PyModule_GetDict(m);
1225 /* Create 'whitespace' object */
1226 n = 0;
1227 for (c = 0; c < 256; c++) {
1228 if (isspace(c))
1229 buf[n++] = c;
1231 s = PyString_FromStringAndSize(buf, n);
1232 if (s) {
1233 PyDict_SetItemString(d, "whitespace", s);
1234 Py_DECREF(s);
1236 /* Create 'lowercase' object */
1237 n = 0;
1238 for (c = 0; c < 256; c++) {
1239 if (islower(c))
1240 buf[n++] = c;
1242 s = PyString_FromStringAndSize(buf, n);
1243 if (s) {
1244 PyDict_SetItemString(d, "lowercase", s);
1245 Py_DECREF(s);
1248 /* Create 'uppercase' object */
1249 n = 0;
1250 for (c = 0; c < 256; c++) {
1251 if (isupper(c))
1252 buf[n++] = c;
1254 s = PyString_FromStringAndSize(buf, n);
1255 if (s) {
1256 PyDict_SetItemString(d, "uppercase", s);
1257 Py_DECREF(s);