Modules/stropmodule.c

   1 /* strop module */
   2
   3 static char strop_module__doc__[] =
   4 "Common string manipulations, optimized for speed.\n"
   5 "\n"
   6 "Always use \"import string\" rather than referencing\n"
   7 "this module directly.";
   8
   9 #include "Python.h"
  10
  11 #include <ctype.h>
  12 /* XXX This file assumes that the <ctype.h> is*() functions
  13    XXX are defined for all 8-bit characters! */
  14
  15 #define WARN if (PyErr_Warn(PyExc_DeprecationWarning, \
  16                        "strop functions are obsolete; use string methods")) \
  17              return NULL
  18
  19 /* The lstrip(), rstrip() and strip() functions are implemented
  20    in do_strip(), which uses an additional parameter to indicate what
  21    type of strip should occur. */
  22
  23 #define LEFTSTRIP 0
  24 #define RIGHTSTRIP 1
  25 #define BOTHSTRIP 2
  26
  27
  28 static PyObject *
  29 split_whitespace(char *s, int len, int maxsplit)
  30 {
  31         int i = 0, j, err;
  32         int countsplit = 0;
  33         PyObject* item;
  34         PyObject *list = PyList_New(0);
  35
  36         if (list == NULL)
  37                 return NULL;
  38
  39         while (i < len) {
  40                 while (i < len && isspace(Py_CHARMASK(s[i]))) {
  41                         i = i+1;
  42                 }
  43                 j = i;
  44                 while (i < len && !isspace(Py_CHARMASK(s[i]))) {
  45                         i = i+1;
  46                 }
  47                 if (j < i) {
  48                         item = PyString_FromStringAndSize(s+j, (int)(i-j));
  49                         if (item == NULL)
  50                                 goto finally;
  51
  52                         err = PyList_Append(list, item);
  53                         Py_DECREF(item);
  54                         if (err < 0)
  55                                 goto finally;
  56
  57                         countsplit++;
  58                         while (i < len && isspace(Py_CHARMASK(s[i]))) {
  59                                 i = i+1;
  60                         }
  61                         if (maxsplit && (countsplit >= maxsplit) && i < len) {
  62                                 item = PyString_FromStringAndSize(
  63                                         s+i, (int)(len - i));
  64                                 if (item == NULL)
  65                                         goto finally;
  66
  67                                 err = PyList_Append(list, item);
  68                                 Py_DECREF(item);
  69                                 if (err < 0)
  70                                         goto finally;
  71
  72                                 i = len;
  73                         }
  74                 }
  75         }
  76         return list;
  77   finally:
  78         Py_DECREF(list);
  79         return NULL;
  80 }
  81
  82
  83 static char splitfields__doc__[] =
  84 "split(s [,sep [,maxsplit]]) -> list of strings\n"
  85 "splitfields(s [,sep [,maxsplit]]) -> list of strings\n"
  86 "\n"
  87 "Return a list of the words in the string s, using sep as the\n"
  88 "delimiter string.  If maxsplit is nonzero, splits into at most\n"
  89 "maxsplit words.  If sep is not specified, any whitespace string\n"
  90 "is a separator.  Maxsplit defaults to 0.\n"
  91 "\n"
  92 "(split and splitfields are synonymous)";
  93
  94 static PyObject *
  95 strop_splitfields(PyObject *self, PyObject *args)
  96 {
  97         int len, n, i, j, err;
  98         int splitcount, maxsplit;
  99         char *s, *sub;
 100         PyObject *list, *item;
 101
 102         WARN;
 103         sub = NULL;
 104         n = 0;
 105         splitcount = 0;
 106         maxsplit = 0;
 107         if (!PyArg_ParseTuple(args, "t#|z#i:split", &s, &len, &sub, &n, &maxsplit))
 108                 return NULL;
 109         if (sub == NULL)
 110                 return split_whitespace(s, len, maxsplit);
 111         if (n == 0) {
 112                 PyErr_SetString(PyExc_ValueError, "empty separator");
 113                 return NULL;
 114         }
 115
 116         list = PyList_New(0);
 117         if (list == NULL)
 118                 return NULL;
 119
 120         i = j = 0;
 121         while (i+n <= len) {
 122                 if (s[i] == sub[0] && (n == 1 || memcmp(s+i, sub, n) == 0)) {
 123                         item = PyString_FromStringAndSize(s+j, (int)(i-j));
 124                         if (item == NULL)
 125                                 goto fail;
 126                         err = PyList_Append(list, item);
 127                         Py_DECREF(item);
 128                         if (err < 0)
 129                                 goto fail;
 130                         i = j = i + n;
 131                         splitcount++;
 132                         if (maxsplit && (splitcount >= maxsplit))
 133                                 break;
 134                 }
 135                 else
 136                         i++;
 137         }
 138         item = PyString_FromStringAndSize(s+j, (int)(len-j));
 139         if (item == NULL)
 140                 goto fail;
 141         err = PyList_Append(list, item);
 142         Py_DECREF(item);
 143         if (err < 0)
 144                 goto fail;
 145
 146         return list;
 147
 148  fail:
 149         Py_DECREF(list);
 150         return NULL;
 151 }
 152
 153
 154 static char joinfields__doc__[] =
 155 "join(list [,sep]) -> string\n"
 156 "joinfields(list [,sep]) -> string\n"
 157 "\n"
 158 "Return a string composed of the words in list, with\n"
 159 "intervening occurrences of sep.  Sep defaults to a single\n"
 160 "space.\n"
 161 "\n"
 162 "(join and joinfields are synonymous)";
 163
 164 static PyObject *
 165 strop_joinfields(PyObject *self, PyObject *args)
 166 {
 167         PyObject *seq;
 168         char *sep = NULL;
 169         int seqlen, seplen = 0;
 170         int i, reslen = 0, slen = 0, sz = 100;
 171         PyObject *res = NULL;
 172         char* p = NULL;
 173         intargfunc getitemfunc;
 174
 175         WARN;
 176         if (!PyArg_ParseTuple(args, "O|t#:join", &seq, &sep, &seplen))
 177                 return NULL;
 178         if (sep == NULL) {
 179                 sep = " ";
 180                 seplen = 1;
 181         }
 182
 183         seqlen = PySequence_Size(seq);
 184         if (seqlen < 0 && PyErr_Occurred())
 185                 return NULL;
 186
 187         if (seqlen == 1) {
 188                 /* Optimization if there's only one item */
 189                 PyObject *item = PySequence_GetItem(seq, 0);
 190                 if (item && !PyString_Check(item)) {
 191                         PyErr_SetString(PyExc_TypeError,
 192                                  "first argument must be sequence of strings");
 193                         Py_DECREF(item);
 194                         return NULL;
 195                 }
 196                 return item;
 197         }
 198
 199         if (!(res = PyString_FromStringAndSize((char*)NULL, sz)))
 200                 return NULL;
 201         p = PyString_AsString(res);
 202
 203         /* optimize for lists, since it's the most common case.  all others
 204          * (tuples and arbitrary sequences) just use the sequence abstract
 205          * interface.
 206          */
 207         if (PyList_Check(seq)) {
 208                 for (i = 0; i < seqlen; i++) {
 209                         PyObject *item = PyList_GET_ITEM(seq, i);
 210                         if (!PyString_Check(item)) {
 211                                 PyErr_SetString(PyExc_TypeError,
 212                                 "first argument must be sequence of strings");
 213                                 Py_DECREF(res);
 214                                 return NULL;
 215                         }
 216                         slen = PyString_GET_SIZE(item);
 217                         while (reslen + slen + seplen >= sz) {
 218                                 if (_PyString_Resize(&res, sz * 2)) {
 219                                         Py_DECREF(res);
 220                                         return NULL;
 221                                 }
 222                                 sz *= 2;
 223                                 p = PyString_AsString(res) + reslen;
 224                         }
 225                         if (i > 0) {
 226                                 memcpy(p, sep, seplen);
 227                                 p += seplen;
 228                                 reslen += seplen;
 229                         }
 230                         memcpy(p, PyString_AS_STRING(item), slen);
 231                         p += slen;
 232                         reslen += slen;
 233                 }
 234                 if (_PyString_Resize(&res, reslen)) {
 235                         Py_DECREF(res);
 236                         res = NULL;
 237                 }
 238                 return res;
 239         }
 240
 241         if (seq->ob_type->tp_as_sequence == NULL ||
 242                  (getitemfunc = seq->ob_type->tp_as_sequence->sq_item) == NULL)
 243         {
 244                 PyErr_SetString(PyExc_TypeError,
 245                                 "first argument must be a sequence");
 246                 return NULL;
 247         }
 248         /* This is now type safe */
 249         for (i = 0; i < seqlen; i++) {
 250                 PyObject *item = getitemfunc(seq, i);
 251                 if (!item || !PyString_Check(item)) {
 252                         PyErr_SetString(PyExc_TypeError,
 253                                  "first argument must be sequence of strings");
 254                         Py_DECREF(res);
 255                         Py_XDECREF(item);
 256                         return NULL;
 257                 }
 258                 slen = PyString_GET_SIZE(item);
 259                 while (reslen + slen + seplen >= sz) {
 260                         if (_PyString_Resize(&res, sz * 2)) {
 261                                 Py_DECREF(res);
 262                                 Py_DECREF(item);
 263                                 return NULL;
 264                         }
 265                         sz *= 2;
 266                         p = PyString_AsString(res) + reslen;
 267                 }
 268                 if (i > 0) {
 269                         memcpy(p, sep, seplen);
 270                         p += seplen;
 271                         reslen += seplen;
 272                 }
 273                 memcpy(p, PyString_AS_STRING(item), slen);
 274                 p += slen;
 275                 reslen += slen;
 276                 Py_DECREF(item);
 277         }
 278         if (_PyString_Resize(&res, reslen)) {
 279                 Py_DECREF(res);
 280                 res = NULL;
 281         }
 282         return res;
 283 }
 284
 285
 286 static char find__doc__[] =
 287 "find(s, sub [,start [,end]]) -> in\n"
 288 "\n"
 289 "Return the lowest index in s where substring sub is found,\n"
 290 "such that sub is contained within s[start,end].  Optional\n"
 291 "arguments start and end are interpreted as in slice notation.\n"
 292 "\n"
 293 "Return -1 on failure.";
 294
 295 static PyObject *
 296 strop_find(PyObject *self, PyObject *args)
 297 {
 298         char *s, *sub;
 299         int len, n, i = 0, last = INT_MAX;
 300
 301         WARN;
 302         if (!PyArg_ParseTuple(args, "t#t#|ii:find", &s, &len, &sub, &n, &i, &last))
 303                 return NULL;
 304
 305         if (last > len)
 306                 last = len;
 307         if (last < 0)
 308                 last += len;
 309         if (last < 0)
 310                 last = 0;
 311         if (i < 0)
 312                 i += len;
 313         if (i < 0)
 314                 i = 0;
 315
 316         if (n == 0 && i <= last)
 317                 return PyInt_FromLong((long)i);
 318
 319         last -= n;
 320         for (; i <= last; ++i)
 321                 if (s[i] == sub[0] &&
 322                     (n == 1 || memcmp(&s[i+1], &sub[1], n-1) == 0))
 323                         return PyInt_FromLong((long)i);
 324
 325         return PyInt_FromLong(-1L);
 326 }
 327
 328
 329 static char rfind__doc__[] =
 330 "rfind(s, sub [,start [,end]]) -> int\n"
 331 "\n"
 332 "Return the highest index in s where substring sub is found,\n"
 333 "such that sub is contained within s[start,end].  Optional\n"
 334 "arguments start and end are interpreted as in slice notation.\n"
 335 "\n"
 336 "Return -1 on failure.";
 337
 338 static PyObject *
 339 strop_rfind(PyObject *self, PyObject *args)
 340 {
 341         char *s, *sub;
 342         int len, n, j;
 343         int i = 0, last = INT_MAX;
 344
 345         WARN;
 346         if (!PyArg_ParseTuple(args, "t#t#|ii:rfind", &s, &len, &sub, &n, &i, &last))
 347                 return NULL;
 348
 349         if (last > len)
 350                 last = len;
 351         if (last < 0)
 352                 last += len;
 353         if (last < 0)
 354                 last = 0;
 355         if (i < 0)
 356                 i += len;
 357         if (i < 0)
 358                 i = 0;
 359
 360         if (n == 0 && i <= last)
 361                 return PyInt_FromLong((long)last);
 362
 363         for (j = last-n; j >= i; --j)
 364                 if (s[j] == sub[0] &&
 365                     (n == 1 || memcmp(&s[j+1], &sub[1], n-1) == 0))
 366                         return PyInt_FromLong((long)j);
 367
 368         return PyInt_FromLong(-1L);
 369 }
 370
 371
 372 static PyObject *
 373 do_strip(PyObject *args, int striptype)
 374 {
 375         char *s;
 376         int len, i, j;
 377
 378
 379         if (!PyArg_Parse(args, "t#", &s, &len))
 380                 return NULL;
 381
 382         i = 0;
 383         if (striptype != RIGHTSTRIP) {
 384                 while (i < len && isspace(Py_CHARMASK(s[i]))) {
 385                         i++;
 386                 }
 387         }
 388
 389         j = len;
 390         if (striptype != LEFTSTRIP) {
 391                 do {
 392                         j--;
 393                 } while (j >= i && isspace(Py_CHARMASK(s[j])));
 394                 j++;
 395         }
 396
 397         if (i == 0 && j == len) {
 398                 Py_INCREF(args);
 399                 return args;
 400         }
 401         else
 402                 return PyString_FromStringAndSize(s+i, j-i);
 403 }
 404
 405
 406 static char strip__doc__[] =
 407 "strip(s) -> string\n"
 408 "\n"
 409 "Return a copy of the string s with leading and trailing\n"
 410 "whitespace removed.";
 411
 412 static PyObject *
 413 strop_strip(PyObject *self, PyObject *args)
 414 {
 415         WARN;
 416         return do_strip(args, BOTHSTRIP);
 417 }
 418
 419
 420 static char lstrip__doc__[] =
 421 "lstrip(s) -> string\n"
 422 "\n"
 423 "Return a copy of the string s with leading whitespace removed.";
 424
 425 static PyObject *
 426 strop_lstrip(PyObject *self, PyObject *args)
 427 {
 428         WARN;
 429         return do_strip(args, LEFTSTRIP);
 430 }
 431
 432
 433 static char rstrip__doc__[] =
 434 "rstrip(s) -> string\n"
 435 "\n"
 436 "Return a copy of the string s with trailing whitespace removed.";
 437
 438 static PyObject *
 439 strop_rstrip(PyObject *self, PyObject *args)
 440 {
 441         WARN;
 442         return do_strip(args, RIGHTSTRIP);
 443 }
 444
 445
 446 static char lower__doc__[] =
 447 "lower(s) -> string\n"
 448 "\n"
 449 "Return a copy of the string s converted to lowercase.";
 450
 451 static PyObject *
 452 strop_lower(PyObject *self, PyObject *args)
 453 {
 454         char *s, *s_new;
 455         int i, n;
 456         PyObject *new;
 457         int changed;
 458
 459         WARN;
 460         if (!PyArg_Parse(args, "t#", &s, &n))
 461                 return NULL;
 462         new = PyString_FromStringAndSize(NULL, n);
 463         if (new == NULL)
 464                 return NULL;
 465         s_new = PyString_AsString(new);
 466         changed = 0;
 467         for (i = 0; i < n; i++) {
 468                 int c = Py_CHARMASK(*s++);
 469                 if (isupper(c)) {
 470                         changed = 1;
 471                         *s_new = tolower(c);
 472                 } else
 473                         *s_new = c;
 474                 s_new++;
 475         }
 476         if (!changed) {
 477                 Py_DECREF(new);
 478                 Py_INCREF(args);
 479                 return args;
 480         }
 481         return new;
 482 }
 483
 484
 485 static char upper__doc__[] =
 486 "upper(s) -> string\n"
 487 "\n"
 488 "Return a copy of the string s converted to uppercase.";
 489
 490 static PyObject *
 491 strop_upper(PyObject *self, PyObject *args)
 492 {
 493         char *s, *s_new;
 494         int i, n;
 495         PyObject *new;
 496         int changed;
 497
 498         WARN;
 499         if (!PyArg_Parse(args, "t#", &s, &n))
 500                 return NULL;
 501         new = PyString_FromStringAndSize(NULL, n);
 502         if (new == NULL)
 503                 return NULL;
 504         s_new = PyString_AsString(new);
 505         changed = 0;
 506         for (i = 0; i < n; i++) {
 507                 int c = Py_CHARMASK(*s++);
 508                 if (islower(c)) {
 509                         changed = 1;
 510                         *s_new = toupper(c);
 511                 } else
 512                         *s_new = c;
 513                 s_new++;
 514         }
 515         if (!changed) {
 516                 Py_DECREF(new);
 517                 Py_INCREF(args);
 518                 return args;
 519         }
 520         return new;
 521 }
 522
 523
 524 static char capitalize__doc__[] =
 525 "capitalize(s) -> string\n"
 526 "\n"
 527 "Return a copy of the string s with only its first character\n"
 528 "capitalized.";
 529
 530 static PyObject *
 531 strop_capitalize(PyObject *self, PyObject *args)
 532 {
 533         char *s, *s_new;
 534         int i, n;
 535         PyObject *new;
 536         int changed;
 537
 538         WARN;
 539         if (!PyArg_Parse(args, "t#", &s, &n))
 540                 return NULL;
 541         new = PyString_FromStringAndSize(NULL, n);
 542         if (new == NULL)
 543                 return NULL;
 544         s_new = PyString_AsString(new);
 545         changed = 0;
 546         if (0 < n) {
 547                 int c = Py_CHARMASK(*s++);
 548                 if (islower(c)) {
 549                         changed = 1;
 550                         *s_new = toupper(c);
 551                 } else
 552                         *s_new = c;
 553                 s_new++;
 554         }
 555         for (i = 1; i < n; i++) {
 556                 int c = Py_CHARMASK(*s++);
 557                 if (isupper(c)) {
 558                         changed = 1;
 559                         *s_new = tolower(c);
 560                 } else
 561                         *s_new = c;
 562                 s_new++;
 563         }
 564         if (!changed) {
 565                 Py_DECREF(new);
 566                 Py_INCREF(args);
 567                 return args;
 568         }
 569         return new;
 570 }
 571
 572
 573 static char expandtabs__doc__[] =
 574 "expandtabs(string, [tabsize]) -> string\n"
 575 "\n"
 576 "Expand tabs in a string, i.e. replace them by one or more spaces,\n"
 577 "depending on the current column and the given tab size (default 8).\n"
 578 "The column number is reset to zero after each newline occurring in the\n"
 579 "string.  This doesn't understand other non-printing characters.";
 580
 581 static PyObject *
 582 strop_expandtabs(PyObject *self, PyObject *args)
 583 {
 584         /* Original by Fredrik Lundh */
 585         char* e;
 586         char* p;
 587         char* q;
 588         int i, j;
 589         PyObject* out;
 590         char* string;
 591         int stringlen;
 592         int tabsize = 8;
 593
 594         WARN;
 595         /* Get arguments */
 596         if (!PyArg_ParseTuple(args, "s#|i:expandtabs", &string, &stringlen, &tabsize))
 597                 return NULL;
 598         if (tabsize < 1) {
 599                 PyErr_SetString(PyExc_ValueError,
 600                                 "tabsize must be at least 1");
 601                 return NULL;
 602         }
 603
 604         /* First pass: determine size of output string */
 605         i = j = 0; /* j: current column; i: total of previous lines */
 606         e = string + stringlen;
 607         for (p = string; p < e; p++) {
 608                 if (*p == '\t')
 609                         j += tabsize - (j%tabsize);
 610                 else {
 611                         j++;
 612                         if (*p == '\n') {
 613                                 i += j;
 614                                 j = 0;
 615                         }
 616                 }
 617         }
 618
 619         /* Second pass: create output string and fill it */
 620         out = PyString_FromStringAndSize(NULL, i+j);
 621         if (out == NULL)
 622                 return NULL;
 623
 624         i = 0;
 625         q = PyString_AS_STRING(out);
 626
 627         for (p = string; p < e; p++) {
 628                 if (*p == '\t') {
 629                         j = tabsize - (i%tabsize);
 630                         i += j;
 631                         while (j-- > 0)
 632                                 *q++ = ' ';
 633                 } else {
 634                         *q++ = *p;
 635                         i++;
 636                         if (*p == '\n')
 637                                 i = 0;
 638                 }
 639         }
 640
 641         return out;
 642 }
 643
 644
 645 static char count__doc__[] =
 646 "count(s, sub[, start[, end]]) -> int\n"
 647 "\n"
 648 "Return the number of occurrences of substring sub in string\n"
 649 "s[start:end].  Optional arguments start and end are\n"
 650 "interpreted as in slice notation.";
 651
 652 static PyObject *
 653 strop_count(PyObject *self, PyObject *args)
 654 {
 655         char *s, *sub;
 656         int len, n;
 657         int i = 0, last = INT_MAX;
 658         int m, r;
 659
 660         WARN;
 661         if (!PyArg_ParseTuple(args, "t#t#|ii:count", &s, &len, &sub, &n, &i, &last))
 662                 return NULL;
 663         if (last > len)
 664                 last = len;
 665         if (last < 0)
 666                 last += len;
 667         if (last < 0)
 668                 last = 0;
 669         if (i < 0)
 670                 i += len;
 671         if (i < 0)
 672                 i = 0;
 673         m = last + 1 - n;
 674         if (n == 0)
 675                 return PyInt_FromLong((long) (m-i));
 676
 677         r = 0;
 678         while (i < m) {
 679                 if (!memcmp(s+i, sub, n)) {
 680                         r++;
 681                         i += n;
 682                 } else {
 683                         i++;
 684                 }
 685         }
 686         return PyInt_FromLong((long) r);
 687 }
 688
 689
 690 static char swapcase__doc__[] =
 691 "swapcase(s) -> string\n"
 692 "\n"
 693 "Return a copy of the string s with upper case characters\n"
 694 "converted to lowercase and vice versa.";
 695
 696 static PyObject *
 697 strop_swapcase(PyObject *self, PyObject *args)
 698 {
 699         char *s, *s_new;
 700         int i, n;
 701         PyObject *new;
 702         int changed;
 703
 704         WARN;
 705         if (!PyArg_Parse(args, "t#", &s, &n))
 706                 return NULL;
 707         new = PyString_FromStringAndSize(NULL, n);
 708         if (new == NULL)
 709                 return NULL;
 710         s_new = PyString_AsString(new);
 711         changed = 0;
 712         for (i = 0; i < n; i++) {
 713                 int c = Py_CHARMASK(*s++);
 714                 if (islower(c)) {
 715                         changed = 1;
 716                         *s_new = toupper(c);
 717                 }
 718                 else if (isupper(c)) {
 719                         changed = 1;
 720                         *s_new = tolower(c);
 721                 }
 722                 else
 723                         *s_new = c;
 724                 s_new++;
 725         }
 726         if (!changed) {
 727                 Py_DECREF(new);
 728                 Py_INCREF(args);
 729                 return args;
 730         }
 731         return new;
 732 }
 733
 734
 735 static char atoi__doc__[] =
 736 "atoi(s [,base]) -> int\n"
 737 "\n"
 738 "Return the integer represented by the string s in the given\n"
 739 "base, which defaults to 10.  The string s must consist of one\n"
 740 "or more digits, possibly preceded by a sign.  If base is 0, it\n"
 741 "is chosen from the leading characters of s, 0 for octal, 0x or\n"
 742 "0X for hexadecimal.  If base is 16, a preceding 0x or 0X is\n"
 743 "accepted.";
 744
 745 static PyObject *
 746 strop_atoi(PyObject *self, PyObject *args)
 747 {
 748         char *s, *end;
 749         int base = 10;
 750         long x;
 751         char buffer[256]; /* For errors */
 752
 753         WARN;
 754         if (!PyArg_ParseTuple(args, "s|i:atoi", &s, &base))
 755                 return NULL;
 756
 757         if ((base != 0 && base < 2) || base > 36) {
 758                 PyErr_SetString(PyExc_ValueError, "invalid base for atoi()");
 759                 return NULL;
 760         }
 761
 762         while (*s && isspace(Py_CHARMASK(*s)))
 763                 s++;
 764         errno = 0;
 765         if (base == 0 && s[0] == '0')
 766                 x = (long) PyOS_strtoul(s, &end, base);
 767         else
 768                 x = PyOS_strtol(s, &end, base);
 769         if (end == s || !isalnum(end[-1]))
 770                 goto bad;
 771         while (*end && isspace(Py_CHARMASK(*end)))
 772                 end++;
 773         if (*end != '\0') {
 774   bad:
 775                 sprintf(buffer, "invalid literal for atoi(): %.200s", s);
 776                 PyErr_SetString(PyExc_ValueError, buffer);
 777                 return NULL;
 778         }
 779         else if (errno != 0) {
 780                 sprintf(buffer, "atoi() literal too large: %.200s", s);
 781                 PyErr_SetString(PyExc_ValueError, buffer);
 782                 return NULL;
 783         }
 784         return PyInt_FromLong(x);
 785 }
 786
 787
 788 static char atol__doc__[] =
 789 "atol(s [,base]) -> long\n"
 790 "\n"
 791 "Return the long integer represented by the string s in the\n"
 792 "given base, which defaults to 10.  The string s must consist\n"
 793 "of one or more digits, possibly preceded by a sign.  If base\n"
 794 "is 0, it is chosen from the leading characters of s, 0 for\n"
 795 "octal, 0x or 0X for hexadecimal.  If base is 16, a preceding\n"
 796 "0x or 0X is accepted.  A trailing L or l is not accepted,\n"
 797 "unless base is 0.";
 798
 799 static PyObject *
 800 strop_atol(PyObject *self, PyObject *args)
 801 {
 802         char *s, *end;
 803         int base = 10;
 804         PyObject *x;
 805         char buffer[256]; /* For errors */
 806
 807         WARN;
 808         if (!PyArg_ParseTuple(args, "s|i:atol", &s, &base))
 809                 return NULL;
 810
 811         if ((base != 0 && base < 2) || base > 36) {
 812                 PyErr_SetString(PyExc_ValueError, "invalid base for atol()");
 813                 return NULL;
 814         }
 815
 816         while (*s && isspace(Py_CHARMASK(*s)))
 817                 s++;
 818         if (s[0] == '\0') {
 819                 PyErr_SetString(PyExc_ValueError, "empty string for atol()");
 820                 return NULL;
 821         }
 822         x = PyLong_FromString(s, &end, base);
 823         if (x == NULL)
 824                 return NULL;
 825         if (base == 0 && (*end == 'l' || *end == 'L'))
 826                 end++;
 827         while (*end && isspace(Py_CHARMASK(*end)))
 828                 end++;
 829         if (*end != '\0') {
 830                 sprintf(buffer, "invalid literal for atol(): %.200s", s);
 831                 PyErr_SetString(PyExc_ValueError, buffer);
 832                 Py_DECREF(x);
 833                 return NULL;
 834         }
 835         return x;
 836 }
 837
 838
 839 static char atof__doc__[] =
 840 "atof(s) -> float\n"
 841 "\n"
 842 "Return the floating point number represented by the string s.";
 843
 844 static PyObject *
 845 strop_atof(PyObject *self, PyObject *args)
 846 {
 847         extern double strtod(const char *, char **);
 848         char *s, *end;
 849         double x;
 850         char buffer[256]; /* For errors */
 851
 852         WARN;
 853         if (!PyArg_ParseTuple(args, "s:atof", &s))
 854                 return NULL;
 855         while (*s && isspace(Py_CHARMASK(*s)))
 856                 s++;
 857         if (s[0] == '\0') {
 858                 PyErr_SetString(PyExc_ValueError, "empty string for atof()");
 859                 return NULL;
 860         }
 861         errno = 0;
 862         PyFPE_START_PROTECT("strop_atof", return 0)
 863         x = strtod(s, &end);
 864         PyFPE_END_PROTECT(x)
 865         while (*end && isspace(Py_CHARMASK(*end)))
 866                 end++;
 867         if (*end != '\0') {
 868                 sprintf(buffer, "invalid literal for atof(): %.200s", s);
 869                 PyErr_SetString(PyExc_ValueError, buffer);
 870                 return NULL;
 871         }
 872         else if (errno != 0) {
 873                 sprintf(buffer, "atof() literal too large: %.200s", s);
 874                 PyErr_SetString(PyExc_ValueError, buffer);
 875                 return NULL;
 876         }
 877         return PyFloat_FromDouble(x);
 878 }
 879
 880
 881 static char maketrans__doc__[] =
 882 "maketrans(frm, to) -> string\n"
 883 "\n"
 884 "Return a translation table (a string of 256 bytes long)\n"
 885 "suitable for use in string.translate.  The strings frm and to\n"
 886 "must be of the same length.";
 887
 888 static PyObject *
 889 strop_maketrans(PyObject *self, PyObject *args)
 890 {
 891         unsigned char *c, *from=NULL, *to=NULL;
 892         int i, fromlen=0, tolen=0;
 893         PyObject *result;
 894
 895         if (!PyArg_ParseTuple(args, "t#t#:maketrans", &from, &fromlen, &to, &tolen))
 896                 return NULL;
 897
 898         if (fromlen != tolen) {
 899                 PyErr_SetString(PyExc_ValueError,
 900                                 "maketrans arguments must have same length");
 901                 return NULL;
 902         }
 903
 904         result = PyString_FromStringAndSize((char *)NULL, 256);
 905         if (result == NULL)
 906                 return NULL;
 907         c = (unsigned char *) PyString_AS_STRING((PyStringObject *)result);
 908         for (i = 0; i < 256; i++)
 909                 c[i]=(unsigned char)i;
 910         for (i = 0; i < fromlen; i++)
 911                 c[from[i]]=to[i];
 912
 913         return result;
 914 }
 915
 916
 917 static char translate__doc__[] =
 918 "translate(s,table [,deletechars]) -> string\n"
 919 "\n"
 920 "Return a copy of the string s, where all characters occurring\n"
 921 "in the optional argument deletechars are removed, and the\n"
 922 "remaining characters have been mapped through the given\n"
 923 "translation table, which must be a string of length 256.";
 924
 925 static PyObject *
 926 strop_translate(PyObject *self, PyObject *args)
 927 {
 928         register char *input, *table, *output;
 929         register int i, c, changed = 0;
 930         PyObject *input_obj;
 931         char *table1, *output_start, *del_table=NULL;
 932         int inlen, tablen, dellen = 0;
 933         PyObject *result;
 934         int trans_table[256];
 935
 936         WARN;
 937         if (!PyArg_ParseTuple(args, "St#|t#:translate", &input_obj,
 938                               &table1, &tablen, &del_table, &dellen))
 939                 return NULL;
 940         if (tablen != 256) {
 941                 PyErr_SetString(PyExc_ValueError,
 942                               "translation table must be 256 characters long");
 943                 return NULL;
 944         }
 945
 946         table = table1;
 947         inlen = PyString_Size(input_obj);
 948         result = PyString_FromStringAndSize((char *)NULL, inlen);
 949         if (result == NULL)
 950                 return NULL;
 951         output_start = output = PyString_AsString(result);
 952         input = PyString_AsString(input_obj);
 953
 954         if (dellen == 0) {
 955                 /* If no deletions are required, use faster code */
 956                 for (i = inlen; --i >= 0; ) {
 957                         c = Py_CHARMASK(*input++);
 958                         if (Py_CHARMASK((*output++ = table[c])) != c)
 959                                 changed = 1;
 960                 }
 961                 if (changed)
 962                         return result;
 963                 Py_DECREF(result);
 964                 Py_INCREF(input_obj);
 965                 return input_obj;
 966         }
 967
 968         for (i = 0; i < 256; i++)
 969                 trans_table[i] = Py_CHARMASK(table[i]);
 970
 971         for (i = 0; i < dellen; i++)
 972                 trans_table[(int) Py_CHARMASK(del_table[i])] = -1;
 973
 974         for (i = inlen; --i >= 0; ) {
 975                 c = Py_CHARMASK(*input++);
 976                 if (trans_table[c] != -1)
 977                         if (Py_CHARMASK(*output++ = (char)trans_table[c]) == c)
 978                                 continue;
 979                 changed = 1;
 980         }
 981         if (!changed) {
 982                 Py_DECREF(result);
 983                 Py_INCREF(input_obj);
 984                 return input_obj;
 985         }
 986         /* Fix the size of the resulting string */
 987         if (inlen > 0 &&_PyString_Resize(&result, output-output_start))
 988                 return NULL;
 989         return result;
 990 }
 991
 992
 993 /* What follows is used for implementing replace().  Perry Stoll. */
 994
 995 /*
 996   mymemfind
 997
 998   strstr replacement for arbitrary blocks of memory.
 999
1000   Locates the first occurrence in the memory pointed to by MEM of the
1001   contents of memory pointed to by PAT.  Returns the index into MEM if
1002   found, or -1 if not found.  If len of PAT is greater than length of
1003   MEM, the function returns -1.
1004 */
1005 static int
1006 mymemfind(const char *mem, int len, const char *pat, int pat_len)
1007 {
1008         register int ii;
1009
1010         /* pattern can not occur in the last pat_len-1 chars */
1011         len -= pat_len;
1012
1013         for (ii = 0; ii <= len; ii++) {
1014                 if (mem[ii] == pat[0] &&
1015                     (pat_len == 1 ||
1016                      memcmp(&mem[ii+1], &pat[1], pat_len-1) == 0)) {
1017                         return ii;
1018                 }
1019         }
1020         return -1;
1021 }
1022
1023 /*
1024   mymemcnt
1025
1026    Return the number of distinct times PAT is found in MEM.
1027    meaning mem=1111 and pat==11 returns 2.
1028            mem=11111 and pat==11 also return 2.
1029  */
1030 static int
1031 mymemcnt(const char *mem, int len, const char *pat, int pat_len)
1032 {
1033         register int offset = 0;
1034         int nfound = 0;
1035
1036         while (len >= 0) {
1037                 offset = mymemfind(mem, len, pat, pat_len);
1038                 if (offset == -1)
1039                         break;
1040                 mem += offset + pat_len;
1041                 len -= offset + pat_len;
1042                 nfound++;
1043         }
1044         return nfound;
1045 }
1046
1047 /*
1048    mymemreplace
1049
1050    Return a string in which all occurrences of PAT in memory STR are
1051    replaced with SUB.
1052
1053    If length of PAT is less than length of STR or there are no occurrences
1054    of PAT in STR, then the original string is returned. Otherwise, a new
1055    string is allocated here and returned.
1056
1057    on return, out_len is:
1058        the length of output string, or
1059        -1 if the input string is returned, or
1060        unchanged if an error occurs (no memory).
1061
1062    return value is:
1063        the new string allocated locally, or
1064        NULL if an error occurred.
1065 */
1066 static char *
1067 mymemreplace(const char *str, int len,          /* input string */
1068              const char *pat, int pat_len,      /* pattern string to find */
1069              const char *sub, int sub_len,      /* substitution string */
1070              int count,                         /* number of replacements */
1071              int *out_len)
1072 {
1073         char *out_s;
1074         char *new_s;
1075         int nfound, offset, new_len;
1076
1077         if (len == 0 || pat_len > len)
1078                 goto return_same;
1079
1080         /* find length of output string */
1081         nfound = mymemcnt(str, len, pat, pat_len);
1082         if (count < 0)
1083                 count = INT_MAX;
1084         else if (nfound > count)
1085                 nfound = count;
1086         if (nfound == 0)
1087                 goto return_same;
1088
1089         new_len = len + nfound*(sub_len - pat_len);
1090         if (new_len == 0) {
1091                 /* Have to allocate something for the caller to free(). */
1092                 out_s = (char *)PyMem_MALLOC(1);
1093                 if (out_s == NULL)
1094                         return NULL;
1095                 out_s[0] = '\0';
1096         }
1097         else {
1098                 assert(new_len > 0);
1099                 new_s = (char *)PyMem_MALLOC(new_len);
1100                 if (new_s == NULL)
1101                         return NULL;
1102                 out_s = new_s;
1103
1104                 for (; count > 0 && len > 0; --count) {
1105                         /* find index of next instance of pattern */
1106                         offset = mymemfind(str, len, pat, pat_len);
1107                         if (offset == -1)
1108                                 break;
1109
1110                         /* copy non matching part of input string */
1111                         memcpy(new_s, str, offset);
1112                         str += offset + pat_len;
1113                         len -= offset + pat_len;
1114
1115                         /* copy substitute into the output string */
1116                         new_s += offset;
1117                         memcpy(new_s, sub, sub_len);
1118                         new_s += sub_len;
1119                 }
1120                 /* copy any remaining values into output string */
1121                 if (len > 0)
1122                         memcpy(new_s, str, len);
1123         }
1124         *out_len = new_len;
1125         return out_s;
1126
1127   return_same:
1128         *out_len = -1;
1129         return (char *)str; /* cast away const */
1130 }
1131
1132
1133 static char replace__doc__[] =
1134 "replace (str, old, new[, maxsplit]) -> string\n"
1135 "\n"
1136 "Return a copy of string str with all occurrences of substring\n"
1137 "old replaced by new. If the optional argument maxsplit is\n"
1138 "given, only the first maxsplit occurrences are replaced.";
1139
1140 static PyObject *
1141 strop_replace(PyObject *self, PyObject *args)
1142 {
1143         char *str, *pat,*sub,*new_s;
1144         int len,pat_len,sub_len,out_len;
1145         int count = -1;
1146         PyObject *new;
1147
1148         WARN;
1149         if (!PyArg_ParseTuple(args, "t#t#t#|i:replace",
1150                               &str, &len, &pat, &pat_len, &sub, &sub_len,
1151                               &count))
1152                 return NULL;
1153         if (pat_len <= 0) {
1154                 PyErr_SetString(PyExc_ValueError, "empty pattern string");
1155                 return NULL;
1156         }
1157         /* CAUTION:  strop treats a replace count of 0 as infinity, unlke
1158          * current (2.1) string.py and string methods.  Preserve this for
1159          * ... well, hard to say for what <wink>.
1160          */
1161         if (count == 0)
1162                 count = -1;
1163         new_s = mymemreplace(str,len,pat,pat_len,sub,sub_len,count,&out_len);
1164         if (new_s == NULL) {
1165                 PyErr_NoMemory();
1166                 return NULL;
1167         }
1168         if (out_len == -1) {
1169                 /* we're returning another reference to the input string */
1170                 new = PyTuple_GetItem(args, 0);
1171                 Py_XINCREF(new);
1172         }
1173         else {
1174                 new = PyString_FromStringAndSize(new_s, out_len);
1175                 PyMem_FREE(new_s);
1176         }
1177         return new;
1178 }
1179
1180
1181 /* List of functions defined in the module */
1182
1183 static PyMethodDef
1184 strop_methods[] = {
1185         {"atof",        strop_atof,        METH_VARARGS, atof__doc__},
1186         {"atoi",        strop_atoi,        METH_VARARGS, atoi__doc__},
1187         {"atol",        strop_atol,        METH_VARARGS, atol__doc__},
1188         {"capitalize",  strop_capitalize,  METH_OLDARGS, capitalize__doc__},
1189         {"count",       strop_count,       METH_VARARGS, count__doc__},
1190         {"expandtabs",  strop_expandtabs,  METH_VARARGS, expandtabs__doc__},
1191         {"find",        strop_find,        METH_VARARGS, find__doc__},
1192         {"join",        strop_joinfields,  METH_VARARGS, joinfields__doc__},
1193         {"joinfields",  strop_joinfields,  METH_VARARGS, joinfields__doc__},
1194         {"lstrip",      strop_lstrip,      METH_OLDARGS, lstrip__doc__},
1195         {"lower",       strop_lower,       METH_OLDARGS, lower__doc__},
1196         {"maketrans",   strop_maketrans,   METH_VARARGS, maketrans__doc__},
1197         {"replace",     strop_replace,     METH_VARARGS, replace__doc__},
1198         {"rfind",       strop_rfind,       METH_VARARGS, rfind__doc__},
1199         {"rstrip",      strop_rstrip,      METH_OLDARGS, rstrip__doc__},
1200         {"split",       strop_splitfields, METH_VARARGS, splitfields__doc__},
1201         {"splitfields", strop_splitfields, METH_VARARGS, splitfields__doc__},
1202         {"strip",       strop_strip,       METH_OLDARGS, strip__doc__},
1203         {"swapcase",    strop_swapcase,    METH_OLDARGS, swapcase__doc__},
1204         {"translate",   strop_translate,   METH_VARARGS, translate__doc__},
1205         {"upper",       strop_upper,       METH_OLDARGS, upper__doc__},
1206         {NULL,          NULL}   /* sentinel */
1207 };
1208
1209
1210 DL_EXPORT(void)
1211 initstrop(void)
1212 {
1213         PyObject *m, *d, *s;
1214         char buf[256];
1215         int c, n;
1216         m = Py_InitModule4("strop", strop_methods, strop_module__doc__,
1217                            (PyObject*)NULL, PYTHON_API_VERSION);
1218         d = PyModule_GetDict(m);
1219
1220         /* Create 'whitespace' object */
1221         n = 0;
1222         for (c = 0; c < 256; c++) {
1223                 if (isspace(c))
1224                         buf[n++] = c;
1225         }
1226         s = PyString_FromStringAndSize(buf, n);
1227         if (s) {
1228                 PyDict_SetItemString(d, "whitespace", s);
1229                 Py_DECREF(s);
1230         }
1231         /* Create 'lowercase' object */
1232         n = 0;
1233         for (c = 0; c < 256; c++) {
1234                 if (islower(c))
1235                         buf[n++] = c;
1236         }
1237         s = PyString_FromStringAndSize(buf, n);
1238         if (s) {
1239                 PyDict_SetItemString(d, "lowercase", s);
1240                 Py_DECREF(s);
1241         }
1242
1243         /* Create 'uppercase' object */
1244         n = 0;
1245         for (c = 0; c < 256; c++) {
1246                 if (isupper(c))
1247                         buf[n++] = c;
1248         }
1249         s = PyString_FromStringAndSize(buf, n);
1250         if (s) {
1251                 PyDict_SetItemString(d, "uppercase", s);
1252                 Py_DECREF(s);
1253         }
1254 }