Modules/stropmodule.c

   1
   2 /* strop module */
   3
   4 static char strop_module__doc__[] =
   5 "Common string manipulations, optimized for speed.\n\
   6 \n\
   7 Always use \"import string\" rather than referencing\n\
   8 this module directly.";
   9
  10 #include "Python.h"
  11
  12 #include <ctype.h>
  13 /* XXX This file assumes that the <ctype.h> is*() functions
  14    XXX are defined for all 8-bit characters! */
  15
  16 /* The lstrip(), rstrip() and strip() functions are implemented
  17    in do_strip(), which uses an additional parameter to indicate what
  18    type of strip should occur. */
  19
  20 #define LEFTSTRIP 0
  21 #define RIGHTSTRIP 1
  22 #define BOTHSTRIP 2
  23
  24
  25 static PyObject *
  26 split_whitespace(char *s, int len, int maxsplit)
  27 {
  28         int i = 0, j, err;
  29         int countsplit = 0;
  30         PyObject* item;
  31         PyObject *list = PyList_New(0);
  32
  33         if (list == NULL)
  34                 return NULL;
  35
  36         while (i < len) {
  37                 while (i < len && isspace(Py_CHARMASK(s[i]))) {
  38                         i = i+1;
  39                 }
  40                 j = i;
  41                 while (i < len && !isspace(Py_CHARMASK(s[i]))) {
  42                         i = i+1;
  43                 }
  44                 if (j < i) {
  45                         item = PyString_FromStringAndSize(s+j, (int)(i-j));
  46                         if (item == NULL)
  47                                 goto finally;
  48
  49                         err = PyList_Append(list, item);
  50                         Py_DECREF(item);
  51                         if (err < 0)
  52                                 goto finally;
  53
  54                         countsplit++;
  55                         while (i < len && isspace(Py_CHARMASK(s[i]))) {
  56                                 i = i+1;
  57                         }
  58                         if (maxsplit && (countsplit >= maxsplit) && i < len) {
  59                                 item = PyString_FromStringAndSize(
  60                                         s+i, (int)(len - i));
  61                                 if (item == NULL)
  62                                         goto finally;
  63
  64                                 err = PyList_Append(list, item);
  65                                 Py_DECREF(item);
  66                                 if (err < 0)
  67                                         goto finally;
  68
  69                                 i = len;
  70                         }
  71                 }
  72         }
  73         return list;
  74   finally:
  75         Py_DECREF(list);
  76         return NULL;
  77 }
  78
  79
  80 static char splitfields__doc__[] =
  81 "split(s [,sep [,maxsplit]]) -> list of strings\n\
  82 splitfields(s [,sep [,maxsplit]]) -> list of strings\n\
  83 \n\
  84 Return a list of the words in the string s, using sep as the\n\
  85 delimiter string.  If maxsplit is nonzero, splits into at most\n\
  86 maxsplit words.  If sep is not specified, any whitespace string\n\
  87 is a separator.  Maxsplit defaults to 0.\n\
  88 \n\
  89 (split and splitfields are synonymous)";
  90
  91 static PyObject *
  92 strop_splitfields(PyObject *self, PyObject *args)
  93 {
  94         int len, n, i, j, err;
  95         int splitcount, maxsplit;
  96         char *s, *sub;
  97         PyObject *list, *item;
  98
  99         sub = NULL;
 100         n = 0;
 101         splitcount = 0;
 102         maxsplit = 0;
 103         if (!PyArg_ParseTuple(args, "t#|z#i:split", &s, &len, &sub, &n, &maxsplit))
 104                 return NULL;
 105         if (sub == NULL)
 106                 return split_whitespace(s, len, maxsplit);
 107         if (n == 0) {
 108                 PyErr_SetString(PyExc_ValueError, "empty separator");
 109                 return NULL;
 110         }
 111
 112         list = PyList_New(0);
 113         if (list == NULL)
 114                 return NULL;
 115
 116         i = j = 0;
 117         while (i+n <= len) {
 118                 if (s[i] == sub[0] && (n == 1 || memcmp(s+i, sub, n) == 0)) {
 119                         item = PyString_FromStringAndSize(s+j, (int)(i-j));
 120                         if (item == NULL)
 121                                 goto fail;
 122                         err = PyList_Append(list, item);
 123                         Py_DECREF(item);
 124                         if (err < 0)
 125                                 goto fail;
 126                         i = j = i + n;
 127                         splitcount++;
 128                         if (maxsplit && (splitcount >= maxsplit))
 129                                 break;
 130                 }
 131                 else
 132                         i++;
 133         }
 134         item = PyString_FromStringAndSize(s+j, (int)(len-j));
 135         if (item == NULL)
 136                 goto fail;
 137         err = PyList_Append(list, item);
 138         Py_DECREF(item);
 139         if (err < 0)
 140                 goto fail;
 141
 142         return list;
 143
 144  fail:
 145         Py_DECREF(list);
 146         return NULL;
 147 }
 148
 149
 150 static char joinfields__doc__[] =
 151 "join(list [,sep]) -> string\n\
 152 joinfields(list [,sep]) -> string\n\
 153 \n\
 154 Return a string composed of the words in list, with\n\
 155 intervening occurrences of sep.  Sep defaults to a single\n\
 156 space.\n\
 157 \n\
 158 (join and joinfields are synonymous)";
 159
 160 static PyObject *
 161 strop_joinfields(PyObject *self, PyObject *args)
 162 {
 163         PyObject *seq;
 164         char *sep = NULL;
 165         int seqlen, seplen = 0;
 166         int i, reslen = 0, slen = 0, sz = 100;
 167         PyObject *res = NULL;
 168         char* p = NULL;
 169         intargfunc getitemfunc;
 170
 171         if (!PyArg_ParseTuple(args, "O|t#:join", &seq, &sep, &seplen))
 172                 return NULL;
 173         if (sep == NULL) {
 174                 sep = " ";
 175                 seplen = 1;
 176         }
 177
 178         seqlen = PySequence_Size(seq);
 179         if (seqlen < 0 && PyErr_Occurred())
 180                 return NULL;
 181
 182         if (seqlen == 1) {
 183                 /* Optimization if there's only one item */
 184                 PyObject *item = PySequence_GetItem(seq, 0);
 185                 if (item && !PyString_Check(item)) {
 186                         PyErr_SetString(PyExc_TypeError,
 187                                  "first argument must be sequence of strings");
 188                         Py_DECREF(item);
 189                         return NULL;
 190                 }
 191                 return item;
 192         }
 193
 194         if (!(res = PyString_FromStringAndSize((char*)NULL, sz)))
 195                 return NULL;
 196         p = PyString_AsString(res);
 197
 198         /* optimize for lists, since it's the most common case.  all others
 199          * (tuples and arbitrary sequences) just use the sequence abstract
 200          * interface.
 201          */
 202         if (PyList_Check(seq)) {
 203                 for (i = 0; i < seqlen; i++) {
 204                         PyObject *item = PyList_GET_ITEM(seq, i);
 205                         if (!PyString_Check(item)) {
 206                                 PyErr_SetString(PyExc_TypeError,
 207                                 "first argument must be sequence of strings");
 208                                 Py_DECREF(res);
 209                                 return NULL;
 210                         }
 211                         slen = PyString_GET_SIZE(item);
 212                         while (reslen + slen + seplen >= sz) {
 213                                 if (_PyString_Resize(&res, sz * 2)) {
 214                                         Py_DECREF(res);
 215                                         return NULL;
 216                                 }
 217                                 sz *= 2;
 218                                 p = PyString_AsString(res) + reslen;
 219                         }
 220                         if (i > 0) {
 221                                 memcpy(p, sep, seplen);
 222                                 p += seplen;
 223                                 reslen += seplen;
 224                         }
 225                         memcpy(p, PyString_AS_STRING(item), slen);
 226                         p += slen;
 227                         reslen += slen;
 228                 }
 229                 if (_PyString_Resize(&res, reslen)) {
 230                         Py_DECREF(res);
 231                         res = NULL;
 232                 }
 233                 return res;
 234         }
 235
 236         if (seq->ob_type->tp_as_sequence == NULL ||
 237                  (getitemfunc = seq->ob_type->tp_as_sequence->sq_item) == NULL)
 238         {
 239                 PyErr_SetString(PyExc_TypeError,
 240                                 "first argument must be a sequence");
 241                 return NULL;
 242         }
 243         /* This is now type safe */
 244         for (i = 0; i < seqlen; i++) {
 245                 PyObject *item = getitemfunc(seq, i);
 246                 if (!item || !PyString_Check(item)) {
 247                         PyErr_SetString(PyExc_TypeError,
 248                                  "first argument must be sequence of strings");
 249                         Py_DECREF(res);
 250                         Py_XDECREF(item);
 251                         return NULL;
 252                 }
 253                 slen = PyString_GET_SIZE(item);
 254                 while (reslen + slen + seplen >= sz) {
 255                         if (_PyString_Resize(&res, sz * 2)) {
 256                                 Py_DECREF(res);
 257                                 Py_DECREF(item);
 258                                 return NULL;
 259                         }
 260                         sz *= 2;
 261                         p = PyString_AsString(res) + reslen;
 262                 }
 263                 if (i > 0) {
 264                         memcpy(p, sep, seplen);
 265                         p += seplen;
 266                         reslen += seplen;
 267                 }
 268                 memcpy(p, PyString_AS_STRING(item), slen);
 269                 p += slen;
 270                 reslen += slen;
 271                 Py_DECREF(item);
 272         }
 273         if (_PyString_Resize(&res, reslen)) {
 274                 Py_DECREF(res);
 275                 res = NULL;
 276         }
 277         return res;
 278 }
 279
 280
 281 static char find__doc__[] =
 282 "find(s, sub [,start [,end]]) -> in\n\
 283 \n\
 284 Return the lowest index in s where substring sub is found,\n\
 285 such that sub is contained within s[start,end].  Optional\n\
 286 arguments start and end are interpreted as in slice notation.\n\
 287 \n\
 288 Return -1 on failure.";
 289
 290 static PyObject *
 291 strop_find(PyObject *self, PyObject *args)
 292 {
 293         char *s, *sub;
 294         int len, n, i = 0, last = INT_MAX;
 295
 296         if (!PyArg_ParseTuple(args, "t#t#|ii:find", &s, &len, &sub, &n, &i, &last))
 297                 return NULL;
 298
 299         if (last > len)
 300                 last = len;
 301         if (last < 0)
 302                 last += len;
 303         if (last < 0)
 304                 last = 0;
 305         if (i < 0)
 306                 i += len;
 307         if (i < 0)
 308                 i = 0;
 309
 310         if (n == 0 && i <= last)
 311                 return PyInt_FromLong((long)i);
 312
 313         last -= n;
 314         for (; i <= last; ++i)
 315                 if (s[i] == sub[0] &&
 316                     (n == 1 || memcmp(&s[i+1], &sub[1], n-1) == 0))
 317                         return PyInt_FromLong((long)i);
 318
 319         return PyInt_FromLong(-1L);
 320 }
 321
 322
 323 static char rfind__doc__[] =
 324 "rfind(s, sub [,start [,end]]) -> int\n\
 325 \n\
 326 Return the highest index in s where substring sub is found,\n\
 327 such that sub is contained within s[start,end].  Optional\n\
 328 arguments start and end are interpreted as in slice notation.\n\
 329 \n\
 330 Return -1 on failure.";
 331
 332 static PyObject *
 333 strop_rfind(PyObject *self, PyObject *args)
 334 {
 335         char *s, *sub;
 336         int len, n, j;
 337         int i = 0, last = INT_MAX;
 338
 339         if (!PyArg_ParseTuple(args, "t#t#|ii:rfind", &s, &len, &sub, &n, &i, &last))
 340                 return NULL;
 341
 342         if (last > len)
 343                 last = len;
 344         if (last < 0)
 345                 last += len;
 346         if (last < 0)
 347                 last = 0;
 348         if (i < 0)
 349                 i += len;
 350         if (i < 0)
 351                 i = 0;
 352
 353         if (n == 0 && i <= last)
 354                 return PyInt_FromLong((long)last);
 355
 356         for (j = last-n; j >= i; --j)
 357                 if (s[j] == sub[0] &&
 358                     (n == 1 || memcmp(&s[j+1], &sub[1], n-1) == 0))
 359                         return PyInt_FromLong((long)j);
 360
 361         return PyInt_FromLong(-1L);
 362 }
 363
 364
 365 static PyObject *
 366 do_strip(PyObject *args, int striptype)
 367 {
 368         char *s;
 369         int len, i, j;
 370
 371
 372         if (!PyArg_Parse(args, "t#", &s, &len))
 373                 return NULL;
 374
 375         i = 0;
 376         if (striptype != RIGHTSTRIP) {
 377                 while (i < len && isspace(Py_CHARMASK(s[i]))) {
 378                         i++;
 379                 }
 380         }
 381
 382         j = len;
 383         if (striptype != LEFTSTRIP) {
 384                 do {
 385                         j--;
 386                 } while (j >= i && isspace(Py_CHARMASK(s[j])));
 387                 j++;
 388         }
 389
 390         if (i == 0 && j == len) {
 391                 Py_INCREF(args);
 392                 return args;
 393         }
 394         else
 395                 return PyString_FromStringAndSize(s+i, j-i);
 396 }
 397
 398
 399 static char strip__doc__[] =
 400 "strip(s) -> string\n\
 401 \n\
 402 Return a copy of the string s with leading and trailing\n\
 403 whitespace removed.";
 404
 405 static PyObject *
 406 strop_strip(PyObject *self, PyObject *args)
 407 {
 408         return do_strip(args, BOTHSTRIP);
 409 }
 410
 411
 412 static char lstrip__doc__[] =
 413 "lstrip(s) -> string\n\
 414 \n\
 415 Return a copy of the string s with leading whitespace removed.";
 416
 417 static PyObject *
 418 strop_lstrip(PyObject *self, PyObject *args)
 419 {
 420         return do_strip(args, LEFTSTRIP);
 421 }
 422
 423
 424 static char rstrip__doc__[] =
 425 "rstrip(s) -> string\n\
 426 \n\
 427 Return a copy of the string s with trailing whitespace removed.";
 428
 429 static PyObject *
 430 strop_rstrip(PyObject *self, PyObject *args)
 431 {
 432         return do_strip(args, RIGHTSTRIP);
 433 }
 434
 435
 436 static char lower__doc__[] =
 437 "lower(s) -> string\n\
 438 \n\
 439 Return a copy of the string s converted to lowercase.";
 440
 441 static PyObject *
 442 strop_lower(PyObject *self, PyObject *args)
 443 {
 444         char *s, *s_new;
 445         int i, n;
 446         PyObject *new;
 447         int changed;
 448
 449         if (!PyArg_Parse(args, "t#", &s, &n))
 450                 return NULL;
 451         new = PyString_FromStringAndSize(NULL, n);
 452         if (new == NULL)
 453                 return NULL;
 454         s_new = PyString_AsString(new);
 455         changed = 0;
 456         for (i = 0; i < n; i++) {
 457                 int c = Py_CHARMASK(*s++);
 458                 if (isupper(c)) {
 459                         changed = 1;
 460                         *s_new = tolower(c);
 461                 } else
 462                         *s_new = c;
 463                 s_new++;
 464         }
 465         if (!changed) {
 466                 Py_DECREF(new);
 467                 Py_INCREF(args);
 468                 return args;
 469         }
 470         return new;
 471 }
 472
 473
 474 static char upper__doc__[] =
 475 "upper(s) -> string\n\
 476 \n\
 477 Return a copy of the string s converted to uppercase.";
 478
 479 static PyObject *
 480 strop_upper(PyObject *self, PyObject *args)
 481 {
 482         char *s, *s_new;
 483         int i, n;
 484         PyObject *new;
 485         int changed;
 486
 487         if (!PyArg_Parse(args, "t#", &s, &n))
 488                 return NULL;
 489         new = PyString_FromStringAndSize(NULL, n);
 490         if (new == NULL)
 491                 return NULL;
 492         s_new = PyString_AsString(new);
 493         changed = 0;
 494         for (i = 0; i < n; i++) {
 495                 int c = Py_CHARMASK(*s++);
 496                 if (islower(c)) {
 497                         changed = 1;
 498                         *s_new = toupper(c);
 499                 } else
 500                         *s_new = c;
 501                 s_new++;
 502         }
 503         if (!changed) {
 504                 Py_DECREF(new);
 505                 Py_INCREF(args);
 506                 return args;
 507         }
 508         return new;
 509 }
 510
 511
 512 static char capitalize__doc__[] =
 513 "capitalize(s) -> string\n\
 514 \n\
 515 Return a copy of the string s with only its first character\n\
 516 capitalized.";
 517
 518 static PyObject *
 519 strop_capitalize(PyObject *self, PyObject *args)
 520 {
 521         char *s, *s_new;
 522         int i, n;
 523         PyObject *new;
 524         int changed;
 525
 526         if (!PyArg_Parse(args, "t#", &s, &n))
 527                 return NULL;
 528         new = PyString_FromStringAndSize(NULL, n);
 529         if (new == NULL)
 530                 return NULL;
 531         s_new = PyString_AsString(new);
 532         changed = 0;
 533         if (0 < n) {
 534                 int c = Py_CHARMASK(*s++);
 535                 if (islower(c)) {
 536                         changed = 1;
 537                         *s_new = toupper(c);
 538                 } else
 539                         *s_new = c;
 540                 s_new++;
 541         }
 542         for (i = 1; i < n; i++) {
 543                 int c = Py_CHARMASK(*s++);
 544                 if (isupper(c)) {
 545                         changed = 1;
 546                         *s_new = tolower(c);
 547                 } else
 548                         *s_new = c;
 549                 s_new++;
 550         }
 551         if (!changed) {
 552                 Py_DECREF(new);
 553                 Py_INCREF(args);
 554                 return args;
 555         }
 556         return new;
 557 }
 558
 559
 560 static char expandtabs__doc__[] =
 561 "expandtabs(string, [tabsize]) -> string\n\
 562 \n\
 563 Expand tabs in a string, i.e. replace them by one or more spaces,\n\
 564 depending on the current column and the given tab size (default 8).\n\
 565 The column number is reset to zero after each newline occurring in the\n\
 566 string.  This doesn't understand other non-printing characters.";
 567
 568 static PyObject *
 569 strop_expandtabs(PyObject *self, PyObject *args)
 570 {
 571         /* Original by Fredrik Lundh */
 572         char* e;
 573         char* p;
 574         char* q;
 575         int i, j;
 576         PyObject* out;
 577         char* string;
 578         int stringlen;
 579         int tabsize = 8;
 580
 581         /* Get arguments */
 582         if (!PyArg_ParseTuple(args, "s#|i:expandtabs", &string, &stringlen, &tabsize))
 583                 return NULL;
 584         if (tabsize < 1) {
 585                 PyErr_SetString(PyExc_ValueError,
 586                                 "tabsize must be at least 1");
 587                 return NULL;
 588         }
 589
 590         /* First pass: determine size of output string */
 591         i = j = 0; /* j: current column; i: total of previous lines */
 592         e = string + stringlen;
 593         for (p = string; p < e; p++) {
 594                 if (*p == '\t')
 595                         j += tabsize - (j%tabsize);
 596                 else {
 597                         j++;
 598                         if (*p == '\n') {
 599                                 i += j;
 600                                 j = 0;
 601                         }
 602                 }
 603         }
 604
 605         /* Second pass: create output string and fill it */
 606         out = PyString_FromStringAndSize(NULL, i+j);
 607         if (out == NULL)
 608                 return NULL;
 609
 610         i = 0;
 611         q = PyString_AS_STRING(out);
 612
 613         for (p = string; p < e; p++) {
 614                 if (*p == '\t') {
 615                         j = tabsize - (i%tabsize);
 616                         i += j;
 617                         while (j-- > 0)
 618                                 *q++ = ' ';
 619                 } else {
 620                         *q++ = *p;
 621                         i++;
 622                         if (*p == '\n')
 623                                 i = 0;
 624                 }
 625         }
 626
 627         return out;
 628 }
 629
 630
 631 static char count__doc__[] =
 632 "count(s, sub[, start[, end]]) -> int\n\
 633 \n\
 634 Return the number of occurrences of substring sub in string\n\
 635 s[start:end].  Optional arguments start and end are\n\
 636 interpreted as in slice notation.";
 637
 638 static PyObject *
 639 strop_count(PyObject *self, PyObject *args)
 640 {
 641         char *s, *sub;
 642         int len, n;
 643         int i = 0, last = INT_MAX;
 644         int m, r;
 645
 646         if (!PyArg_ParseTuple(args, "t#t#|ii:count", &s, &len, &sub, &n, &i, &last))
 647                 return NULL;
 648         if (last > len)
 649                 last = len;
 650         if (last < 0)
 651                 last += len;
 652         if (last < 0)
 653                 last = 0;
 654         if (i < 0)
 655                 i += len;
 656         if (i < 0)
 657                 i = 0;
 658         m = last + 1 - n;
 659         if (n == 0)
 660                 return PyInt_FromLong((long) (m-i));
 661
 662         r = 0;
 663         while (i < m) {
 664                 if (!memcmp(s+i, sub, n)) {
 665                         r++;
 666                         i += n;
 667                 } else {
 668                         i++;
 669                 }
 670         }
 671         return PyInt_FromLong((long) r);
 672 }
 673
 674
 675 static char swapcase__doc__[] =
 676 "swapcase(s) -> string\n\
 677 \n\
 678 Return a copy of the string s with upper case characters\n\
 679 converted to lowercase and vice versa.";
 680
 681 static PyObject *
 682 strop_swapcase(PyObject *self, PyObject *args)
 683 {
 684         char *s, *s_new;
 685         int i, n;
 686         PyObject *new;
 687         int changed;
 688
 689         if (!PyArg_Parse(args, "t#", &s, &n))
 690                 return NULL;
 691         new = PyString_FromStringAndSize(NULL, n);
 692         if (new == NULL)
 693                 return NULL;
 694         s_new = PyString_AsString(new);
 695         changed = 0;
 696         for (i = 0; i < n; i++) {
 697                 int c = Py_CHARMASK(*s++);
 698                 if (islower(c)) {
 699                         changed = 1;
 700                         *s_new = toupper(c);
 701                 }
 702                 else if (isupper(c)) {
 703                         changed = 1;
 704                         *s_new = tolower(c);
 705                 }
 706                 else
 707                         *s_new = c;
 708                 s_new++;
 709         }
 710         if (!changed) {
 711                 Py_DECREF(new);
 712                 Py_INCREF(args);
 713                 return args;
 714         }
 715         return new;
 716 }
 717
 718
 719 static char atoi__doc__[] =
 720 "atoi(s [,base]) -> int\n\
 721 \n\
 722 Return the integer represented by the string s in the given\n\
 723 base, which defaults to 10.  The string s must consist of one\n\
 724 or more digits, possibly preceded by a sign.  If base is 0, it\n\
 725 is chosen from the leading characters of s, 0 for octal, 0x or\n\
 726 0X for hexadecimal.  If base is 16, a preceding 0x or 0X is\n\
 727 accepted.";
 728
 729 static PyObject *
 730 strop_atoi(PyObject *self, PyObject *args)
 731 {
 732         char *s, *end;
 733         int base = 10;
 734         long x;
 735         char buffer[256]; /* For errors */
 736
 737         if (!PyArg_ParseTuple(args, "s|i:atoi", &s, &base))
 738                 return NULL;
 739
 740         if ((base != 0 && base < 2) || base > 36) {
 741                 PyErr_SetString(PyExc_ValueError, "invalid base for atoi()");
 742                 return NULL;
 743         }
 744
 745         while (*s && isspace(Py_CHARMASK(*s)))
 746                 s++;
 747         errno = 0;
 748         if (base == 0 && s[0] == '0')
 749                 x = (long) PyOS_strtoul(s, &end, base);
 750         else
 751                 x = PyOS_strtol(s, &end, base);
 752         if (end == s || !isalnum(end[-1]))
 753                 goto bad;
 754         while (*end && isspace(Py_CHARMASK(*end)))
 755                 end++;
 756         if (*end != '\0') {
 757   bad:
 758                 sprintf(buffer, "invalid literal for atoi(): %.200s", s);
 759                 PyErr_SetString(PyExc_ValueError, buffer);
 760                 return NULL;
 761         }
 762         else if (errno != 0) {
 763                 sprintf(buffer, "atoi() literal too large: %.200s", s);
 764                 PyErr_SetString(PyExc_ValueError, buffer);
 765                 return NULL;
 766         }
 767         return PyInt_FromLong(x);
 768 }
 769
 770
 771 static char atol__doc__[] =
 772 "atol(s [,base]) -> long\n\
 773 \n\
 774 Return the long integer represented by the string s in the\n\
 775 given base, which defaults to 10.  The string s must consist\n\
 776 of one or more digits, possibly preceded by a sign.  If base\n\
 777 is 0, it is chosen from the leading characters of s, 0 for\n\
 778 octal, 0x or 0X for hexadecimal.  If base is 16, a preceding\n\
 779 0x or 0X is accepted.  A trailing L or l is not accepted,\n\
 780 unless base is 0.";
 781
 782 static PyObject *
 783 strop_atol(PyObject *self, PyObject *args)
 784 {
 785         char *s, *end;
 786         int base = 10;
 787         PyObject *x;
 788         char buffer[256]; /* For errors */
 789
 790         if (!PyArg_ParseTuple(args, "s|i:atol", &s, &base))
 791                 return NULL;
 792
 793         if ((base != 0 && base < 2) || base > 36) {
 794                 PyErr_SetString(PyExc_ValueError, "invalid base for atol()");
 795                 return NULL;
 796         }
 797
 798         while (*s && isspace(Py_CHARMASK(*s)))
 799                 s++;
 800         if (s[0] == '\0') {
 801                 PyErr_SetString(PyExc_ValueError, "empty string for atol()");
 802                 return NULL;
 803         }
 804         x = PyLong_FromString(s, &end, base);
 805         if (x == NULL)
 806                 return NULL;
 807         if (base == 0 && (*end == 'l' || *end == 'L'))
 808                 end++;
 809         while (*end && isspace(Py_CHARMASK(*end)))
 810                 end++;
 811         if (*end != '\0') {
 812                 sprintf(buffer, "invalid literal for atol(): %.200s", s);
 813                 PyErr_SetString(PyExc_ValueError, buffer);
 814                 Py_DECREF(x);
 815                 return NULL;
 816         }
 817         return x;
 818 }
 819
 820
 821 static char atof__doc__[] =
 822 "atof(s) -> float\n\
 823 \n\
 824 Return the floating point number represented by the string s.";
 825
 826 static PyObject *
 827 strop_atof(PyObject *self, PyObject *args)
 828 {
 829         extern double strtod(const char *, char **);
 830         char *s, *end;
 831         double x;
 832         char buffer[256]; /* For errors */
 833
 834         if (!PyArg_ParseTuple(args, "s:atof", &s))
 835                 return NULL;
 836         while (*s && isspace(Py_CHARMASK(*s)))
 837                 s++;
 838         if (s[0] == '\0') {
 839                 PyErr_SetString(PyExc_ValueError, "empty string for atof()");
 840                 return NULL;
 841         }
 842         errno = 0;
 843         PyFPE_START_PROTECT("strop_atof", return 0)
 844         x = strtod(s, &end);
 845         PyFPE_END_PROTECT(x)
 846         while (*end && isspace(Py_CHARMASK(*end)))
 847                 end++;
 848         if (*end != '\0') {
 849                 sprintf(buffer, "invalid literal for atof(): %.200s", s);
 850                 PyErr_SetString(PyExc_ValueError, buffer);
 851                 return NULL;
 852         }
 853         else if (errno != 0) {
 854                 sprintf(buffer, "atof() literal too large: %.200s", s);
 855                 PyErr_SetString(PyExc_ValueError, buffer);
 856                 return NULL;
 857         }
 858         return PyFloat_FromDouble(x);
 859 }
 860
 861
 862 static char maketrans__doc__[] =
 863 "maketrans(frm, to) -> string\n\
 864 \n\
 865 Return a translation table (a string of 256 bytes long)\n\
 866 suitable for use in string.translate.  The strings frm and to\n\
 867 must be of the same length.";
 868
 869 static PyObject *
 870 strop_maketrans(PyObject *self, PyObject *args)
 871 {
 872         unsigned char *c, *from=NULL, *to=NULL;
 873         int i, fromlen=0, tolen=0;
 874         PyObject *result;
 875
 876         if (!PyArg_ParseTuple(args, "t#t#:maketrans", &from, &fromlen, &to, &tolen))
 877                 return NULL;
 878
 879         if (fromlen != tolen) {
 880                 PyErr_SetString(PyExc_ValueError,
 881                                 "maketrans arguments must have same length");
 882                 return NULL;
 883         }
 884
 885         result = PyString_FromStringAndSize((char *)NULL, 256);
 886         if (result == NULL)
 887                 return NULL;
 888         c = (unsigned char *) PyString_AS_STRING((PyStringObject *)result);
 889         for (i = 0; i < 256; i++)
 890                 c[i]=(unsigned char)i;
 891         for (i = 0; i < fromlen; i++)
 892                 c[from[i]]=to[i];
 893
 894         return result;
 895 }
 896
 897
 898 static char translate__doc__[] =
 899 "translate(s,table [,deletechars]) -> string\n\
 900 \n\
 901 Return a copy of the string s, where all characters occurring\n\
 902 in the optional argument deletechars are removed, and the\n\
 903 remaining characters have been mapped through the given\n\
 904 translation table, which must be a string of length 256.";
 905
 906 static PyObject *
 907 strop_translate(PyObject *self, PyObject *args)
 908 {
 909         register char *input, *table, *output;
 910         register int i, c, changed = 0;
 911         PyObject *input_obj;
 912         char *table1, *output_start, *del_table=NULL;
 913         int inlen, tablen, dellen = 0;
 914         PyObject *result;
 915         int trans_table[256];
 916
 917         if (!PyArg_ParseTuple(args, "St#|t#:translate", &input_obj,
 918                               &table1, &tablen, &del_table, &dellen))
 919                 return NULL;
 920         if (tablen != 256) {
 921                 PyErr_SetString(PyExc_ValueError,
 922                               "translation table must be 256 characters long");
 923                 return NULL;
 924         }
 925
 926         table = table1;
 927         inlen = PyString_Size(input_obj);
 928         result = PyString_FromStringAndSize((char *)NULL, inlen);
 929         if (result == NULL)
 930                 return NULL;
 931         output_start = output = PyString_AsString(result);
 932         input = PyString_AsString(input_obj);
 933
 934         if (dellen == 0) {
 935                 /* If no deletions are required, use faster code */
 936                 for (i = inlen; --i >= 0; ) {
 937                         c = Py_CHARMASK(*input++);
 938                         if (Py_CHARMASK((*output++ = table[c])) != c)
 939                                 changed = 1;
 940                 }
 941                 if (changed)
 942                         return result;
 943                 Py_DECREF(result);
 944                 Py_INCREF(input_obj);
 945                 return input_obj;
 946         }
 947
 948         for (i = 0; i < 256; i++)
 949                 trans_table[i] = Py_CHARMASK(table[i]);
 950
 951         for (i = 0; i < dellen; i++)
 952                 trans_table[(int) Py_CHARMASK(del_table[i])] = -1;
 953
 954         for (i = inlen; --i >= 0; ) {
 955                 c = Py_CHARMASK(*input++);
 956                 if (trans_table[c] != -1)
 957                         if (Py_CHARMASK(*output++ = (char)trans_table[c]) == c)
 958                                 continue;
 959                 changed = 1;
 960         }
 961         if (!changed) {
 962                 Py_DECREF(result);
 963                 Py_INCREF(input_obj);
 964                 return input_obj;
 965         }
 966         /* Fix the size of the resulting string */
 967         if (inlen > 0 &&_PyString_Resize(&result, output-output_start))
 968                 return NULL;
 969         return result;
 970 }
 971
 972
 973 /* What follows is used for implementing replace().  Perry Stoll. */
 974
 975 /*
 976   mymemfind
 977
 978   strstr replacement for arbitrary blocks of memory.
 979
 980   Locates the first occurrence in the memory pointed to by MEM of the
 981   contents of memory pointed to by PAT.  Returns the index into MEM if
 982   found, or -1 if not found.  If len of PAT is greater than length of
 983   MEM, the function returns -1.
 984 */
 985 static int mymemfind(char *mem, int len, char *pat, int pat_len)
 986 {
 987         register int ii;
 988
 989         /* pattern can not occur in the last pat_len-1 chars */
 990         len -= pat_len;
 991
 992         for (ii = 0; ii <= len; ii++) {
 993                 if (mem[ii] == pat[0] &&
 994                     (pat_len == 1 ||
 995                      memcmp(&mem[ii+1], &pat[1], pat_len-1) == 0)) {
 996                         return ii;
 997                 }
 998         }
 999         return -1;
1000 }
1001
1002 /*
1003   mymemcnt
1004
1005    Return the number of distinct times PAT is found in MEM.
1006    meaning mem=1111 and pat==11 returns 2.
1007            mem=11111 and pat==11 also return 2.
1008  */
1009 static int mymemcnt(char *mem, int len, char *pat, int pat_len)
1010 {
1011         register int offset = 0;
1012         int nfound = 0;
1013
1014         while (len >= 0) {
1015                 offset = mymemfind(mem, len, pat, pat_len);
1016                 if (offset == -1)
1017                         break;
1018                 mem += offset + pat_len;
1019                 len -= offset + pat_len;
1020                 nfound++;
1021         }
1022         return nfound;
1023 }
1024
1025 /*
1026    mymemreplace
1027
1028    Return a string in which all occurrences of PAT in memory STR are
1029    replaced with SUB.
1030
1031    If length of PAT is less than length of STR or there are no occurrences
1032    of PAT in STR, then the original string is returned. Otherwise, a new
1033    string is allocated here and returned.
1034
1035    on return, out_len is:
1036        the length of output string, or
1037        -1 if the input string is returned, or
1038        unchanged if an error occurs (no memory).
1039
1040    return value is:
1041        the new string allocated locally, or
1042        NULL if an error occurred.
1043 */
1044 static char *mymemreplace(char *str, int len, char *pat, int pat_len, char *sub, int sub_len, int count, int *out_len)
1045 {
1046         char *out_s;
1047         char *new_s;
1048         int nfound, offset, new_len;
1049
1050         if (len == 0 || pat_len > len)
1051                 goto return_same;
1052
1053         /* find length of output string */
1054         nfound = mymemcnt(str, len, pat, pat_len);
1055         if (count > 0)
1056                 nfound = nfound > count ? count : nfound;
1057         if (nfound == 0)
1058                 goto return_same;
1059         new_len = len + nfound*(sub_len - pat_len);
1060
1061         new_s = (char *)PyMem_MALLOC(new_len);
1062         if (new_s == NULL) return NULL;
1063
1064         *out_len = new_len;
1065         out_s = new_s;
1066
1067         while (len > 0) {
1068                 /* find index of next instance of pattern */
1069                 offset = mymemfind(str, len, pat, pat_len);
1070                 /* if not found,  break out of loop */
1071                 if (offset == -1) break;
1072
1073                 /* copy non matching part of input string */
1074                 memcpy(new_s, str, offset); /* copy part of str before pat */
1075                 str += offset + pat_len; /* move str past pattern */
1076                 len -= offset + pat_len; /* reduce length of str remaining */
1077
1078                 /* copy substitute into the output string */
1079                 new_s += offset; /* move new_s to dest for sub string */
1080                 memcpy(new_s, sub, sub_len); /* copy substring into new_s */
1081                 new_s += sub_len; /* offset new_s past sub string */
1082
1083                 /* break when we've done count replacements */
1084                 if (--count == 0) break;
1085         }
1086         /* copy any remaining values into output string */
1087         if (len > 0)
1088                 memcpy(new_s, str, len);
1089         return out_s;
1090
1091   return_same:
1092         *out_len = -1;
1093         return str;
1094 }
1095
1096
1097 static char replace__doc__[] =
1098 "replace (str, old, new[, maxsplit]) -> string\n\
1099 \n\
1100 Return a copy of string str with all occurrences of substring\n\
1101 old replaced by new. If the optional argument maxsplit is\n\
1102 given, only the first maxsplit occurrences are replaced.";
1103
1104 static PyObject *
1105 strop_replace(PyObject *self, PyObject *args)
1106 {
1107         char *str, *pat,*sub,*new_s;
1108         int len,pat_len,sub_len,out_len;
1109         int count = 0;
1110         PyObject *new;
1111
1112         if (!PyArg_ParseTuple(args, "t#t#t#|i:replace",
1113                               &str, &len, &pat, &pat_len, &sub, &sub_len,
1114                               &count))
1115                 return NULL;
1116         if (pat_len <= 0) {
1117                 PyErr_SetString(PyExc_ValueError, "empty pattern string");
1118                 return NULL;
1119         }
1120         new_s = mymemreplace(str,len,pat,pat_len,sub,sub_len,count,&out_len);
1121         if (new_s == NULL) {
1122                 PyErr_NoMemory();
1123                 return NULL;
1124         }
1125         if (out_len == -1) {
1126                 /* we're returning another reference to the input string */
1127                 new = PyTuple_GetItem(args, 0);
1128                 Py_XINCREF(new);
1129         }
1130         else {
1131                 new = PyString_FromStringAndSize(new_s, out_len);
1132                 PyMem_FREE(new_s);
1133         }
1134         return new;
1135 }
1136
1137
1138 /* List of functions defined in the module */
1139
1140 static PyMethodDef
1141 strop_methods[] = {
1142         {"atof",        strop_atof,
1143          METH_VARARGS, atof__doc__},
1144         {"atoi",        strop_atoi,
1145          METH_VARARGS, atoi__doc__},
1146         {"atol",        strop_atol,
1147          METH_VARARGS, atol__doc__},
1148         {"capitalize",  strop_capitalize,
1149          METH_OLDARGS, capitalize__doc__},
1150         {"count",       strop_count,
1151          METH_VARARGS, count__doc__},
1152         {"expandtabs",  strop_expandtabs,
1153          METH_VARARGS, expandtabs__doc__},
1154         {"find",        strop_find,
1155          METH_VARARGS, find__doc__},
1156         {"join",        strop_joinfields,
1157          METH_VARARGS, joinfields__doc__},
1158         {"joinfields",  strop_joinfields,
1159          METH_VARARGS, joinfields__doc__},
1160         {"lstrip",      strop_lstrip,
1161          METH_OLDARGS, lstrip__doc__},
1162         {"lower",       strop_lower,
1163          METH_OLDARGS, lower__doc__},
1164         {"maketrans",   strop_maketrans,
1165          METH_VARARGS, maketrans__doc__},
1166         {"replace",     strop_replace,
1167          METH_VARARGS, replace__doc__},
1168         {"rfind",       strop_rfind,
1169          METH_VARARGS, rfind__doc__},
1170         {"rstrip",      strop_rstrip,
1171          METH_OLDARGS, rstrip__doc__},
1172         {"split",       strop_splitfields,
1173          METH_VARARGS, splitfields__doc__},
1174         {"splitfields", strop_splitfields,
1175          METH_VARARGS, splitfields__doc__},
1176         {"strip",       strop_strip,
1177          METH_OLDARGS, strip__doc__},
1178         {"swapcase",    strop_swapcase,
1179          METH_OLDARGS, swapcase__doc__},
1180         {"translate",   strop_translate,
1181          METH_VARARGS, translate__doc__},
1182         {"upper",       strop_upper,
1183          METH_OLDARGS, upper__doc__},
1184         {NULL,          NULL}   /* sentinel */
1185 };
1186
1187
1188 DL_EXPORT(void)
1189 initstrop(void)
1190 {
1191         PyObject *m, *d, *s;
1192         char buf[256];
1193         int c, n;
1194         m = Py_InitModule4("strop", strop_methods, strop_module__doc__,
1195                            (PyObject*)NULL, PYTHON_API_VERSION);
1196         d = PyModule_GetDict(m);
1197
1198         /* Create 'whitespace' object */
1199         n = 0;
1200         for (c = 0; c < 256; c++) {
1201                 if (isspace(c))
1202                         buf[n++] = c;
1203         }
1204         s = PyString_FromStringAndSize(buf, n);
1205         if (s) {
1206                 PyDict_SetItemString(d, "whitespace", s);
1207                 Py_DECREF(s);
1208         }
1209         /* Create 'lowercase' object */
1210         n = 0;
1211         for (c = 0; c < 256; c++) {
1212                 if (islower(c))
1213                         buf[n++] = c;
1214         }
1215         s = PyString_FromStringAndSize(buf, n);
1216         if (s) {
1217                 PyDict_SetItemString(d, "lowercase", s);
1218                 Py_DECREF(s);
1219         }
1220
1221         /* Create 'uppercase' object */
1222         n = 0;
1223         for (c = 0; c < 256; c++) {
1224                 if (isupper(c))
1225                         buf[n++] = c;
1226         }
1227         s = PyString_FromStringAndSize(buf, n);
1228         if (s) {
1229                 PyDict_SetItemString(d, "uppercase", s);
1230                 Py_DECREF(s);
1231         }
1232 }