Modules/stropmodule.c

   1
   2 /* strop module */
   3
   4 static char strop_module__doc__[] =
   5 "Common string manipulations, optimized for speed.\n\
   6 \n\
   7 Always use \"import string\" rather than referencing\n\
   8 this module directly.";
   9
  10 #include "Python.h"
  11
  12 #ifdef HAVE_LIMITS_H
  13 #include <limits.h>
  14 #else
  15 #define INT_MAX 2147483647
  16 #endif
  17
  18 #include <ctype.h>
  19 /* XXX This file assumes that the <ctype.h> is*() functions
  20    XXX are defined for all 8-bit characters! */
  21
  22 /* The lstrip(), rstrip() and strip() functions are implemented
  23    in do_strip(), which uses an additional parameter to indicate what
  24    type of strip should occur. */
  25
  26 #define LEFTSTRIP 0
  27 #define RIGHTSTRIP 1
  28 #define BOTHSTRIP 2
  29
  30
  31 static PyObject *
  32 split_whitespace(char *s, int len, int maxsplit)
  33 {
  34         int i = 0, j, err;
  35         int countsplit = 0;
  36         PyObject* item;
  37         PyObject *list = PyList_New(0);
  38
  39         if (list == NULL)
  40                 return NULL;
  41
  42         while (i < len) {
  43                 while (i < len && isspace(Py_CHARMASK(s[i]))) {
  44                         i = i+1;
  45                 }
  46                 j = i;
  47                 while (i < len && !isspace(Py_CHARMASK(s[i]))) {
  48                         i = i+1;
  49                 }
  50                 if (j < i) {
  51                         item = PyString_FromStringAndSize(s+j, (int)(i-j));
  52                         if (item == NULL)
  53                                 goto finally;
  54
  55                         err = PyList_Append(list, item);
  56                         Py_DECREF(item);
  57                         if (err < 0)
  58                                 goto finally;
  59
  60                         countsplit++;
  61                         while (i < len && isspace(Py_CHARMASK(s[i]))) {
  62                                 i = i+1;
  63                         }
  64                         if (maxsplit && (countsplit >= maxsplit) && i < len) {
  65                                 item = PyString_FromStringAndSize(
  66                                         s+i, (int)(len - i));
  67                                 if (item == NULL)
  68                                         goto finally;
  69
  70                                 err = PyList_Append(list, item);
  71                                 Py_DECREF(item);
  72                                 if (err < 0)
  73                                         goto finally;
  74
  75                                 i = len;
  76                         }
  77                 }
  78         }
  79         return list;
  80   finally:
  81         Py_DECREF(list);
  82         return NULL;
  83 }
  84
  85
  86 static char splitfields__doc__[] =
  87 "split(s [,sep [,maxsplit]]) -> list of strings\n\
  88 splitfields(s [,sep [,maxsplit]]) -> list of strings\n\
  89 \n\
  90 Return a list of the words in the string s, using sep as the\n\
  91 delimiter string.  If maxsplit is nonzero, splits into at most\n\
  92 maxsplit words.  If sep is not specified, any whitespace string\n\
  93 is a separator.  Maxsplit defaults to 0.\n\
  94 \n\
  95 (split and splitfields are synonymous)";
  96
  97 static PyObject *
  98 strop_splitfields(PyObject *self, PyObject *args)
  99 {
 100         int len, n, i, j, err;
 101         int splitcount, maxsplit;
 102         char *s, *sub;
 103         PyObject *list, *item;
 104
 105         sub = NULL;
 106         n = 0;
 107         splitcount = 0;
 108         maxsplit = 0;
 109         if (!PyArg_ParseTuple(args, "t#|z#i:split", &s, &len, &sub, &n, &maxsplit))
 110                 return NULL;
 111         if (sub == NULL)
 112                 return split_whitespace(s, len, maxsplit);
 113         if (n == 0) {
 114                 PyErr_SetString(PyExc_ValueError, "empty separator");
 115                 return NULL;
 116         }
 117
 118         list = PyList_New(0);
 119         if (list == NULL)
 120                 return NULL;
 121
 122         i = j = 0;
 123         while (i+n <= len) {
 124                 if (s[i] == sub[0] && (n == 1 || memcmp(s+i, sub, n) == 0)) {
 125                         item = PyString_FromStringAndSize(s+j, (int)(i-j));
 126                         if (item == NULL)
 127                                 goto fail;
 128                         err = PyList_Append(list, item);
 129                         Py_DECREF(item);
 130                         if (err < 0)
 131                                 goto fail;
 132                         i = j = i + n;
 133                         splitcount++;
 134                         if (maxsplit && (splitcount >= maxsplit))
 135                                 break;
 136                 }
 137                 else
 138                         i++;
 139         }
 140         item = PyString_FromStringAndSize(s+j, (int)(len-j));
 141         if (item == NULL)
 142                 goto fail;
 143         err = PyList_Append(list, item);
 144         Py_DECREF(item);
 145         if (err < 0)
 146                 goto fail;
 147
 148         return list;
 149
 150  fail:
 151         Py_DECREF(list);
 152         return NULL;
 153 }
 154
 155
 156 static char joinfields__doc__[] =
 157 "join(list [,sep]) -> string\n\
 158 joinfields(list [,sep]) -> string\n\
 159 \n\
 160 Return a string composed of the words in list, with\n\
 161 intervening occurrences of sep.  Sep defaults to a single\n\
 162 space.\n\
 163 \n\
 164 (join and joinfields are synonymous)";
 165
 166 static PyObject *
 167 strop_joinfields(PyObject *self, PyObject *args)
 168 {
 169         PyObject *seq;
 170         char *sep = NULL;
 171         int seqlen, seplen = 0;
 172         int i, reslen = 0, slen = 0, sz = 100;
 173         PyObject *res = NULL;
 174         char* p = NULL;
 175         intargfunc getitemfunc;
 176
 177         if (!PyArg_ParseTuple(args, "O|t#:join", &seq, &sep, &seplen))
 178                 return NULL;
 179         if (sep == NULL) {
 180                 sep = " ";
 181                 seplen = 1;
 182         }
 183
 184         seqlen = PySequence_Size(seq);
 185         if (seqlen < 0 && PyErr_Occurred())
 186                 return NULL;
 187
 188         if (seqlen == 1) {
 189                 /* Optimization if there's only one item */
 190                 PyObject *item = PySequence_GetItem(seq, 0);
 191                 if (item && !PyString_Check(item)) {
 192                         PyErr_SetString(PyExc_TypeError,
 193                                  "first argument must be sequence of strings");
 194                         Py_DECREF(item);
 195                         return NULL;
 196                 }
 197                 return item;
 198         }
 199
 200         if (!(res = PyString_FromStringAndSize((char*)NULL, sz)))
 201                 return NULL;
 202         p = PyString_AsString(res);
 203
 204         /* optimize for lists, since it's the most common case.  all others
 205          * (tuples and arbitrary sequences) just use the sequence abstract
 206          * interface.
 207          */
 208         if (PyList_Check(seq)) {
 209                 for (i = 0; i < seqlen; i++) {
 210                         PyObject *item = PyList_GET_ITEM(seq, i);
 211                         if (!PyString_Check(item)) {
 212                                 PyErr_SetString(PyExc_TypeError,
 213                                 "first argument must be sequence of strings");
 214                                 Py_DECREF(res);
 215                                 return NULL;
 216                         }
 217                         slen = PyString_GET_SIZE(item);
 218                         while (reslen + slen + seplen >= sz) {
 219                                 if (_PyString_Resize(&res, sz * 2)) {
 220                                         Py_DECREF(res);
 221                                         return NULL;
 222                                 }
 223                                 sz *= 2;
 224                                 p = PyString_AsString(res) + reslen;
 225                         }
 226                         if (i > 0) {
 227                                 memcpy(p, sep, seplen);
 228                                 p += seplen;
 229                                 reslen += seplen;
 230                         }
 231                         memcpy(p, PyString_AS_STRING(item), slen);
 232                         p += slen;
 233                         reslen += slen;
 234                 }
 235                 if (_PyString_Resize(&res, reslen)) {
 236                         Py_DECREF(res);
 237                         res = NULL;
 238                 }
 239                 return res;
 240         }
 241
 242         if (seq->ob_type->tp_as_sequence == NULL ||
 243                  (getitemfunc = seq->ob_type->tp_as_sequence->sq_item) == NULL)
 244         {
 245                 PyErr_SetString(PyExc_TypeError,
 246                                 "first argument must be a sequence");
 247                 return NULL;
 248         }
 249         /* This is now type safe */
 250         for (i = 0; i < seqlen; i++) {
 251                 PyObject *item = getitemfunc(seq, i);
 252                 if (!item || !PyString_Check(item)) {
 253                         PyErr_SetString(PyExc_TypeError,
 254                                  "first argument must be sequence of strings");
 255                         Py_DECREF(res);
 256                         Py_XDECREF(item);
 257                         return NULL;
 258                 }
 259                 slen = PyString_GET_SIZE(item);
 260                 while (reslen + slen + seplen >= sz) {
 261                         if (_PyString_Resize(&res, sz * 2)) {
 262                                 Py_DECREF(res);
 263                                 Py_DECREF(item);
 264                                 return NULL;
 265                         }
 266                         sz *= 2;
 267                         p = PyString_AsString(res) + reslen;
 268                 }
 269                 if (i > 0) {
 270                         memcpy(p, sep, seplen);
 271                         p += seplen;
 272                         reslen += seplen;
 273                 }
 274                 memcpy(p, PyString_AS_STRING(item), slen);
 275                 p += slen;
 276                 reslen += slen;
 277                 Py_DECREF(item);
 278         }
 279         if (_PyString_Resize(&res, reslen)) {
 280                 Py_DECREF(res);
 281                 res = NULL;
 282         }
 283         return res;
 284 }
 285
 286
 287 static char find__doc__[] =
 288 "find(s, sub [,start [,end]]) -> in\n\
 289 \n\
 290 Return the lowest index in s where substring sub is found,\n\
 291 such that sub is contained within s[start,end].  Optional\n\
 292 arguments start and end are interpreted as in slice notation.\n\
 293 \n\
 294 Return -1 on failure.";
 295
 296 static PyObject *
 297 strop_find(PyObject *self, PyObject *args)
 298 {
 299         char *s, *sub;
 300         int len, n, i = 0, last = INT_MAX;
 301
 302         if (!PyArg_ParseTuple(args, "t#t#|ii:find", &s, &len, &sub, &n, &i, &last))
 303                 return NULL;
 304
 305         if (last > len)
 306                 last = len;
 307         if (last < 0)
 308                 last += len;
 309         if (last < 0)
 310                 last = 0;
 311         if (i < 0)
 312                 i += len;
 313         if (i < 0)
 314                 i = 0;
 315
 316         if (n == 0 && i <= last)
 317                 return PyInt_FromLong((long)i);
 318
 319         last -= n;
 320         for (; i <= last; ++i)
 321                 if (s[i] == sub[0] &&
 322                     (n == 1 || memcmp(&s[i+1], &sub[1], n-1) == 0))
 323                         return PyInt_FromLong((long)i);
 324
 325         return PyInt_FromLong(-1L);
 326 }
 327
 328
 329 static char rfind__doc__[] =
 330 "rfind(s, sub [,start [,end]]) -> int\n\
 331 \n\
 332 Return the highest index in s where substring sub is found,\n\
 333 such that sub is contained within s[start,end].  Optional\n\
 334 arguments start and end are interpreted as in slice notation.\n\
 335 \n\
 336 Return -1 on failure.";
 337
 338 static PyObject *
 339 strop_rfind(PyObject *self, PyObject *args)
 340 {
 341         char *s, *sub;
 342         int len, n, j;
 343         int i = 0, last = INT_MAX;
 344
 345         if (!PyArg_ParseTuple(args, "t#t#|ii:rfind", &s, &len, &sub, &n, &i, &last))
 346                 return NULL;
 347
 348         if (last > len)
 349                 last = len;
 350         if (last < 0)
 351                 last += len;
 352         if (last < 0)
 353                 last = 0;
 354         if (i < 0)
 355                 i += len;
 356         if (i < 0)
 357                 i = 0;
 358
 359         if (n == 0 && i <= last)
 360                 return PyInt_FromLong((long)last);
 361
 362         for (j = last-n; j >= i; --j)
 363                 if (s[j] == sub[0] &&
 364                     (n == 1 || memcmp(&s[j+1], &sub[1], n-1) == 0))
 365                         return PyInt_FromLong((long)j);
 366
 367         return PyInt_FromLong(-1L);
 368 }
 369
 370
 371 static PyObject *
 372 do_strip(PyObject *args, int striptype)
 373 {
 374         char *s;
 375         int len, i, j;
 376
 377
 378         if (!PyArg_Parse(args, "t#", &s, &len))
 379                 return NULL;
 380
 381         i = 0;
 382         if (striptype != RIGHTSTRIP) {
 383                 while (i < len && isspace(Py_CHARMASK(s[i]))) {
 384                         i++;
 385                 }
 386         }
 387
 388         j = len;
 389         if (striptype != LEFTSTRIP) {
 390                 do {
 391                         j--;
 392                 } while (j >= i && isspace(Py_CHARMASK(s[j])));
 393                 j++;
 394         }
 395
 396         if (i == 0 && j == len) {
 397                 Py_INCREF(args);
 398                 return args;
 399         }
 400         else
 401                 return PyString_FromStringAndSize(s+i, j-i);
 402 }
 403
 404
 405 static char strip__doc__[] =
 406 "strip(s) -> string\n\
 407 \n\
 408 Return a copy of the string s with leading and trailing\n\
 409 whitespace removed.";
 410
 411 static PyObject *
 412 strop_strip(PyObject *self, PyObject *args)
 413 {
 414         return do_strip(args, BOTHSTRIP);
 415 }
 416
 417
 418 static char lstrip__doc__[] =
 419 "lstrip(s) -> string\n\
 420 \n\
 421 Return a copy of the string s with leading whitespace removed.";
 422
 423 static PyObject *
 424 strop_lstrip(PyObject *self, PyObject *args)
 425 {
 426         return do_strip(args, LEFTSTRIP);
 427 }
 428
 429
 430 static char rstrip__doc__[] =
 431 "rstrip(s) -> string\n\
 432 \n\
 433 Return a copy of the string s with trailing whitespace removed.";
 434
 435 static PyObject *
 436 strop_rstrip(PyObject *self, PyObject *args)
 437 {
 438         return do_strip(args, RIGHTSTRIP);
 439 }
 440
 441
 442 static char lower__doc__[] =
 443 "lower(s) -> string\n\
 444 \n\
 445 Return a copy of the string s converted to lowercase.";
 446
 447 static PyObject *
 448 strop_lower(PyObject *self, PyObject *args)
 449 {
 450         char *s, *s_new;
 451         int i, n;
 452         PyObject *new;
 453         int changed;
 454
 455         if (!PyArg_Parse(args, "t#", &s, &n))
 456                 return NULL;
 457         new = PyString_FromStringAndSize(NULL, n);
 458         if (new == NULL)
 459                 return NULL;
 460         s_new = PyString_AsString(new);
 461         changed = 0;
 462         for (i = 0; i < n; i++) {
 463                 int c = Py_CHARMASK(*s++);
 464                 if (isupper(c)) {
 465                         changed = 1;
 466                         *s_new = tolower(c);
 467                 } else
 468                         *s_new = c;
 469                 s_new++;
 470         }
 471         if (!changed) {
 472                 Py_DECREF(new);
 473                 Py_INCREF(args);
 474                 return args;
 475         }
 476         return new;
 477 }
 478
 479
 480 static char upper__doc__[] =
 481 "upper(s) -> string\n\
 482 \n\
 483 Return a copy of the string s converted to uppercase.";
 484
 485 static PyObject *
 486 strop_upper(PyObject *self, PyObject *args)
 487 {
 488         char *s, *s_new;
 489         int i, n;
 490         PyObject *new;
 491         int changed;
 492
 493         if (!PyArg_Parse(args, "t#", &s, &n))
 494                 return NULL;
 495         new = PyString_FromStringAndSize(NULL, n);
 496         if (new == NULL)
 497                 return NULL;
 498         s_new = PyString_AsString(new);
 499         changed = 0;
 500         for (i = 0; i < n; i++) {
 501                 int c = Py_CHARMASK(*s++);
 502                 if (islower(c)) {
 503                         changed = 1;
 504                         *s_new = toupper(c);
 505                 } else
 506                         *s_new = c;
 507                 s_new++;
 508         }
 509         if (!changed) {
 510                 Py_DECREF(new);
 511                 Py_INCREF(args);
 512                 return args;
 513         }
 514         return new;
 515 }
 516
 517
 518 static char capitalize__doc__[] =
 519 "capitalize(s) -> string\n\
 520 \n\
 521 Return a copy of the string s with only its first character\n\
 522 capitalized.";
 523
 524 static PyObject *
 525 strop_capitalize(PyObject *self, PyObject *args)
 526 {
 527         char *s, *s_new;
 528         int i, n;
 529         PyObject *new;
 530         int changed;
 531
 532         if (!PyArg_Parse(args, "t#", &s, &n))
 533                 return NULL;
 534         new = PyString_FromStringAndSize(NULL, n);
 535         if (new == NULL)
 536                 return NULL;
 537         s_new = PyString_AsString(new);
 538         changed = 0;
 539         if (0 < n) {
 540                 int c = Py_CHARMASK(*s++);
 541                 if (islower(c)) {
 542                         changed = 1;
 543                         *s_new = toupper(c);
 544                 } else
 545                         *s_new = c;
 546                 s_new++;
 547         }
 548         for (i = 1; i < n; i++) {
 549                 int c = Py_CHARMASK(*s++);
 550                 if (isupper(c)) {
 551                         changed = 1;
 552                         *s_new = tolower(c);
 553                 } else
 554                         *s_new = c;
 555                 s_new++;
 556         }
 557         if (!changed) {
 558                 Py_DECREF(new);
 559                 Py_INCREF(args);
 560                 return args;
 561         }
 562         return new;
 563 }
 564
 565
 566 static char expandtabs__doc__[] =
 567 "expandtabs(string, [tabsize]) -> string\n\
 568 \n\
 569 Expand tabs in a string, i.e. replace them by one or more spaces,\n\
 570 depending on the current column and the given tab size (default 8).\n\
 571 The column number is reset to zero after each newline occurring in the\n\
 572 string.  This doesn't understand other non-printing characters.";
 573
 574 static PyObject *
 575 strop_expandtabs(PyObject *self, PyObject *args)
 576 {
 577         /* Original by Fredrik Lundh */
 578         char* e;
 579         char* p;
 580         char* q;
 581         int i, j;
 582         PyObject* out;
 583         char* string;
 584         int stringlen;
 585         int tabsize = 8;
 586
 587         /* Get arguments */
 588         if (!PyArg_ParseTuple(args, "s#|i:expandtabs", &string, &stringlen, &tabsize))
 589                 return NULL;
 590         if (tabsize < 1) {
 591                 PyErr_SetString(PyExc_ValueError,
 592                                 "tabsize must be at least 1");
 593                 return NULL;
 594         }
 595
 596         /* First pass: determine size of output string */
 597         i = j = 0; /* j: current column; i: total of previous lines */
 598         e = string + stringlen;
 599         for (p = string; p < e; p++) {
 600                 if (*p == '\t')
 601                         j += tabsize - (j%tabsize);
 602                 else {
 603                         j++;
 604                         if (*p == '\n') {
 605                                 i += j;
 606                                 j = 0;
 607                         }
 608                 }
 609         }
 610
 611         /* Second pass: create output string and fill it */
 612         out = PyString_FromStringAndSize(NULL, i+j);
 613         if (out == NULL)
 614                 return NULL;
 615
 616         i = 0;
 617         q = PyString_AS_STRING(out);
 618
 619         for (p = string; p < e; p++) {
 620                 if (*p == '\t') {
 621                         j = tabsize - (i%tabsize);
 622                         i += j;
 623                         while (j-- > 0)
 624                                 *q++ = ' ';
 625                 } else {
 626                         *q++ = *p;
 627                         i++;
 628                         if (*p == '\n')
 629                                 i = 0;
 630                 }
 631         }
 632
 633         return out;
 634 }
 635
 636
 637 static char count__doc__[] =
 638 "count(s, sub[, start[, end]]) -> int\n\
 639 \n\
 640 Return the number of occurrences of substring sub in string\n\
 641 s[start:end].  Optional arguments start and end are\n\
 642 interpreted as in slice notation.";
 643
 644 static PyObject *
 645 strop_count(PyObject *self, PyObject *args)
 646 {
 647         char *s, *sub;
 648         int len, n;
 649         int i = 0, last = INT_MAX;
 650         int m, r;
 651
 652         if (!PyArg_ParseTuple(args, "t#t#|ii:count", &s, &len, &sub, &n, &i, &last))
 653                 return NULL;
 654         if (last > len)
 655                 last = len;
 656         if (last < 0)
 657                 last += len;
 658         if (last < 0)
 659                 last = 0;
 660         if (i < 0)
 661                 i += len;
 662         if (i < 0)
 663                 i = 0;
 664         m = last + 1 - n;
 665         if (n == 0)
 666                 return PyInt_FromLong((long) (m-i));
 667
 668         r = 0;
 669         while (i < m) {
 670                 if (!memcmp(s+i, sub, n)) {
 671                         r++;
 672                         i += n;
 673                 } else {
 674                         i++;
 675                 }
 676         }
 677         return PyInt_FromLong((long) r);
 678 }
 679
 680
 681 static char swapcase__doc__[] =
 682 "swapcase(s) -> string\n\
 683 \n\
 684 Return a copy of the string s with upper case characters\n\
 685 converted to lowercase and vice versa.";
 686
 687 static PyObject *
 688 strop_swapcase(PyObject *self, PyObject *args)
 689 {
 690         char *s, *s_new;
 691         int i, n;
 692         PyObject *new;
 693         int changed;
 694
 695         if (!PyArg_Parse(args, "t#", &s, &n))
 696                 return NULL;
 697         new = PyString_FromStringAndSize(NULL, n);
 698         if (new == NULL)
 699                 return NULL;
 700         s_new = PyString_AsString(new);
 701         changed = 0;
 702         for (i = 0; i < n; i++) {
 703                 int c = Py_CHARMASK(*s++);
 704                 if (islower(c)) {
 705                         changed = 1;
 706                         *s_new = toupper(c);
 707                 }
 708                 else if (isupper(c)) {
 709                         changed = 1;
 710                         *s_new = tolower(c);
 711                 }
 712                 else
 713                         *s_new = c;
 714                 s_new++;
 715         }
 716         if (!changed) {
 717                 Py_DECREF(new);
 718                 Py_INCREF(args);
 719                 return args;
 720         }
 721         return new;
 722 }
 723
 724
 725 static char atoi__doc__[] =
 726 "atoi(s [,base]) -> int\n\
 727 \n\
 728 Return the integer represented by the string s in the given\n\
 729 base, which defaults to 10.  The string s must consist of one\n\
 730 or more digits, possibly preceded by a sign.  If base is 0, it\n\
 731 is chosen from the leading characters of s, 0 for octal, 0x or\n\
 732 0X for hexadecimal.  If base is 16, a preceding 0x or 0X is\n\
 733 accepted.";
 734
 735 static PyObject *
 736 strop_atoi(PyObject *self, PyObject *args)
 737 {
 738         char *s, *end;
 739         int base = 10;
 740         long x;
 741         char buffer[256]; /* For errors */
 742
 743         if (!PyArg_ParseTuple(args, "s|i:atoi", &s, &base))
 744                 return NULL;
 745
 746         if ((base != 0 && base < 2) || base > 36) {
 747                 PyErr_SetString(PyExc_ValueError, "invalid base for atoi()");
 748                 return NULL;
 749         }
 750
 751         while (*s && isspace(Py_CHARMASK(*s)))
 752                 s++;
 753         errno = 0;
 754         if (base == 0 && s[0] == '0')
 755                 x = (long) PyOS_strtoul(s, &end, base);
 756         else
 757                 x = PyOS_strtol(s, &end, base);
 758         if (end == s || !isalnum(end[-1]))
 759                 goto bad;
 760         while (*end && isspace(Py_CHARMASK(*end)))
 761                 end++;
 762         if (*end != '\0') {
 763   bad:
 764                 sprintf(buffer, "invalid literal for atoi(): %.200s", s);
 765                 PyErr_SetString(PyExc_ValueError, buffer);
 766                 return NULL;
 767         }
 768         else if (errno != 0) {
 769                 sprintf(buffer, "atoi() literal too large: %.200s", s);
 770                 PyErr_SetString(PyExc_ValueError, buffer);
 771                 return NULL;
 772         }
 773         return PyInt_FromLong(x);
 774 }
 775
 776
 777 static char atol__doc__[] =
 778 "atol(s [,base]) -> long\n\
 779 \n\
 780 Return the long integer represented by the string s in the\n\
 781 given base, which defaults to 10.  The string s must consist\n\
 782 of one or more digits, possibly preceded by a sign.  If base\n\
 783 is 0, it is chosen from the leading characters of s, 0 for\n\
 784 octal, 0x or 0X for hexadecimal.  If base is 16, a preceding\n\
 785 0x or 0X is accepted.  A trailing L or l is not accepted,\n\
 786 unless base is 0.";
 787
 788 static PyObject *
 789 strop_atol(PyObject *self, PyObject *args)
 790 {
 791         char *s, *end;
 792         int base = 10;
 793         PyObject *x;
 794         char buffer[256]; /* For errors */
 795
 796         if (!PyArg_ParseTuple(args, "s|i:atol", &s, &base))
 797                 return NULL;
 798
 799         if ((base != 0 && base < 2) || base > 36) {
 800                 PyErr_SetString(PyExc_ValueError, "invalid base for atol()");
 801                 return NULL;
 802         }
 803
 804         while (*s && isspace(Py_CHARMASK(*s)))
 805                 s++;
 806         if (s[0] == '\0') {
 807                 PyErr_SetString(PyExc_ValueError, "empty string for atol()");
 808                 return NULL;
 809         }
 810         x = PyLong_FromString(s, &end, base);
 811         if (x == NULL)
 812                 return NULL;
 813         if (base == 0 && (*end == 'l' || *end == 'L'))
 814                 end++;
 815         while (*end && isspace(Py_CHARMASK(*end)))
 816                 end++;
 817         if (*end != '\0') {
 818                 sprintf(buffer, "invalid literal for atol(): %.200s", s);
 819                 PyErr_SetString(PyExc_ValueError, buffer);
 820                 Py_DECREF(x);
 821                 return NULL;
 822         }
 823         return x;
 824 }
 825
 826
 827 static char atof__doc__[] =
 828 "atof(s) -> float\n\
 829 \n\
 830 Return the floating point number represented by the string s.";
 831
 832 static PyObject *
 833 strop_atof(PyObject *self, PyObject *args)
 834 {
 835         extern double strtod(const char *, char **);
 836         char *s, *end;
 837         double x;
 838         char buffer[256]; /* For errors */
 839
 840         if (!PyArg_ParseTuple(args, "s:atof", &s))
 841                 return NULL;
 842         while (*s && isspace(Py_CHARMASK(*s)))
 843                 s++;
 844         if (s[0] == '\0') {
 845                 PyErr_SetString(PyExc_ValueError, "empty string for atof()");
 846                 return NULL;
 847         }
 848         errno = 0;
 849         PyFPE_START_PROTECT("strop_atof", return 0)
 850         x = strtod(s, &end);
 851         PyFPE_END_PROTECT(x)
 852         while (*end && isspace(Py_CHARMASK(*end)))
 853                 end++;
 854         if (*end != '\0') {
 855                 sprintf(buffer, "invalid literal for atof(): %.200s", s);
 856                 PyErr_SetString(PyExc_ValueError, buffer);
 857                 return NULL;
 858         }
 859         else if (errno != 0) {
 860                 sprintf(buffer, "atof() literal too large: %.200s", s);
 861                 PyErr_SetString(PyExc_ValueError, buffer);
 862                 return NULL;
 863         }
 864         return PyFloat_FromDouble(x);
 865 }
 866
 867
 868 static char maketrans__doc__[] =
 869 "maketrans(frm, to) -> string\n\
 870 \n\
 871 Return a translation table (a string of 256 bytes long)\n\
 872 suitable for use in string.translate.  The strings frm and to\n\
 873 must be of the same length.";
 874
 875 static PyObject *
 876 strop_maketrans(PyObject *self, PyObject *args)
 877 {
 878         unsigned char *c, *from=NULL, *to=NULL;
 879         int i, fromlen=0, tolen=0;
 880         PyObject *result;
 881
 882         if (!PyArg_ParseTuple(args, "t#t#:maketrans", &from, &fromlen, &to, &tolen))
 883                 return NULL;
 884
 885         if (fromlen != tolen) {
 886                 PyErr_SetString(PyExc_ValueError,
 887                                 "maketrans arguments must have same length");
 888                 return NULL;
 889         }
 890
 891         result = PyString_FromStringAndSize((char *)NULL, 256);
 892         if (result == NULL)
 893                 return NULL;
 894         c = (unsigned char *) PyString_AS_STRING((PyStringObject *)result);
 895         for (i = 0; i < 256; i++)
 896                 c[i]=(unsigned char)i;
 897         for (i = 0; i < fromlen; i++)
 898                 c[from[i]]=to[i];
 899
 900         return result;
 901 }
 902
 903
 904 static char translate__doc__[] =
 905 "translate(s,table [,deletechars]) -> string\n\
 906 \n\
 907 Return a copy of the string s, where all characters occurring\n\
 908 in the optional argument deletechars are removed, and the\n\
 909 remaining characters have been mapped through the given\n\
 910 translation table, which must be a string of length 256.";
 911
 912 static PyObject *
 913 strop_translate(PyObject *self, PyObject *args)
 914 {
 915         register char *input, *table, *output;
 916         register int i, c, changed = 0;
 917         PyObject *input_obj;
 918         char *table1, *output_start, *del_table=NULL;
 919         int inlen, tablen, dellen = 0;
 920         PyObject *result;
 921         int trans_table[256];
 922
 923         if (!PyArg_ParseTuple(args, "St#|t#:translate", &input_obj,
 924                               &table1, &tablen, &del_table, &dellen))
 925                 return NULL;
 926         if (tablen != 256) {
 927                 PyErr_SetString(PyExc_ValueError,
 928                               "translation table must be 256 characters long");
 929                 return NULL;
 930         }
 931
 932         table = table1;
 933         inlen = PyString_Size(input_obj);
 934         result = PyString_FromStringAndSize((char *)NULL, inlen);
 935         if (result == NULL)
 936                 return NULL;
 937         output_start = output = PyString_AsString(result);
 938         input = PyString_AsString(input_obj);
 939
 940         if (dellen == 0) {
 941                 /* If no deletions are required, use faster code */
 942                 for (i = inlen; --i >= 0; ) {
 943                         c = Py_CHARMASK(*input++);
 944                         if (Py_CHARMASK((*output++ = table[c])) != c)
 945                                 changed = 1;
 946                 }
 947                 if (changed)
 948                         return result;
 949                 Py_DECREF(result);
 950                 Py_INCREF(input_obj);
 951                 return input_obj;
 952         }
 953
 954         for (i = 0; i < 256; i++)
 955                 trans_table[i] = Py_CHARMASK(table[i]);
 956
 957         for (i = 0; i < dellen; i++)
 958                 trans_table[(int) Py_CHARMASK(del_table[i])] = -1;
 959
 960         for (i = inlen; --i >= 0; ) {
 961                 c = Py_CHARMASK(*input++);
 962                 if (trans_table[c] != -1)
 963                         if (Py_CHARMASK(*output++ = (char)trans_table[c]) == c)
 964                                 continue;
 965                 changed = 1;
 966         }
 967         if (!changed) {
 968                 Py_DECREF(result);
 969                 Py_INCREF(input_obj);
 970                 return input_obj;
 971         }
 972         /* Fix the size of the resulting string */
 973         if (inlen > 0 &&_PyString_Resize(&result, output-output_start))
 974                 return NULL;
 975         return result;
 976 }
 977
 978
 979 /* What follows is used for implementing replace().  Perry Stoll. */
 980
 981 /*
 982   mymemfind
 983
 984   strstr replacement for arbitrary blocks of memory.
 985
 986   Locates the first occurrence in the memory pointed to by MEM of the
 987   contents of memory pointed to by PAT.  Returns the index into MEM if
 988   found, or -1 if not found.  If len of PAT is greater than length of
 989   MEM, the function returns -1.
 990 */
 991 static int mymemfind(char *mem, int len, char *pat, int pat_len)
 992 {
 993         register int ii;
 994
 995         /* pattern can not occur in the last pat_len-1 chars */
 996         len -= pat_len;
 997
 998         for (ii = 0; ii <= len; ii++) {
 999                 if (mem[ii] == pat[0] &&
1000                     (pat_len == 1 ||
1001                      memcmp(&mem[ii+1], &pat[1], pat_len-1) == 0)) {
1002                         return ii;
1003                 }
1004         }
1005         return -1;
1006 }
1007
1008 /*
1009   mymemcnt
1010
1011    Return the number of distinct times PAT is found in MEM.
1012    meaning mem=1111 and pat==11 returns 2.
1013            mem=11111 and pat==11 also return 2.
1014  */
1015 static int mymemcnt(char *mem, int len, char *pat, int pat_len)
1016 {
1017         register int offset = 0;
1018         int nfound = 0;
1019
1020         while (len >= 0) {
1021                 offset = mymemfind(mem, len, pat, pat_len);
1022                 if (offset == -1)
1023                         break;
1024                 mem += offset + pat_len;
1025                 len -= offset + pat_len;
1026                 nfound++;
1027         }
1028         return nfound;
1029 }
1030
1031 /*
1032    mymemreplace
1033
1034    Return a string in which all occurrences of PAT in memory STR are
1035    replaced with SUB.
1036
1037    If length of PAT is less than length of STR or there are no occurrences
1038    of PAT in STR, then the original string is returned. Otherwise, a new
1039    string is allocated here and returned.
1040
1041    on return, out_len is:
1042        the length of output string, or
1043        -1 if the input string is returned, or
1044        unchanged if an error occurs (no memory).
1045
1046    return value is:
1047        the new string allocated locally, or
1048        NULL if an error occurred.
1049 */
1050 static char *mymemreplace(char *str, int len, char *pat, int pat_len, char *sub, int sub_len, int count, int *out_len)
1051 {
1052         char *out_s;
1053         char *new_s;
1054         int nfound, offset, new_len;
1055
1056         if (len == 0 || pat_len > len)
1057                 goto return_same;
1058
1059         /* find length of output string */
1060         nfound = mymemcnt(str, len, pat, pat_len);
1061         if (count > 0)
1062                 nfound = nfound > count ? count : nfound;
1063         if (nfound == 0)
1064                 goto return_same;
1065         new_len = len + nfound*(sub_len - pat_len);
1066
1067         new_s = (char *)PyMem_MALLOC(new_len);
1068         if (new_s == NULL) return NULL;
1069
1070         *out_len = new_len;
1071         out_s = new_s;
1072
1073         while (len > 0) {
1074                 /* find index of next instance of pattern */
1075                 offset = mymemfind(str, len, pat, pat_len);
1076                 /* if not found,  break out of loop */
1077                 if (offset == -1) break;
1078
1079                 /* copy non matching part of input string */
1080                 memcpy(new_s, str, offset); /* copy part of str before pat */
1081                 str += offset + pat_len; /* move str past pattern */
1082                 len -= offset + pat_len; /* reduce length of str remaining */
1083
1084                 /* copy substitute into the output string */
1085                 new_s += offset; /* move new_s to dest for sub string */
1086                 memcpy(new_s, sub, sub_len); /* copy substring into new_s */
1087                 new_s += sub_len; /* offset new_s past sub string */
1088
1089                 /* break when we've done count replacements */
1090                 if (--count == 0) break;
1091         }
1092         /* copy any remaining values into output string */
1093         if (len > 0)
1094                 memcpy(new_s, str, len);
1095         return out_s;
1096
1097   return_same:
1098         *out_len = -1;
1099         return str;
1100 }
1101
1102
1103 static char replace__doc__[] =
1104 "replace (str, old, new[, maxsplit]) -> string\n\
1105 \n\
1106 Return a copy of string str with all occurrences of substring\n\
1107 old replaced by new. If the optional argument maxsplit is\n\
1108 given, only the first maxsplit occurrences are replaced.";
1109
1110 static PyObject *
1111 strop_replace(PyObject *self, PyObject *args)
1112 {
1113         char *str, *pat,*sub,*new_s;
1114         int len,pat_len,sub_len,out_len;
1115         int count = 0;
1116         PyObject *new;
1117
1118         if (!PyArg_ParseTuple(args, "t#t#t#|i:replace",
1119                               &str, &len, &pat, &pat_len, &sub, &sub_len,
1120                               &count))
1121                 return NULL;
1122         if (pat_len <= 0) {
1123                 PyErr_SetString(PyExc_ValueError, "empty pattern string");
1124                 return NULL;
1125         }
1126         new_s = mymemreplace(str,len,pat,pat_len,sub,sub_len,count,&out_len);
1127         if (new_s == NULL) {
1128                 PyErr_NoMemory();
1129                 return NULL;
1130         }
1131         if (out_len == -1) {
1132                 /* we're returning another reference to the input string */
1133                 new = PyTuple_GetItem(args, 0);
1134                 Py_XINCREF(new);
1135         }
1136         else {
1137                 new = PyString_FromStringAndSize(new_s, out_len);
1138                 PyMem_FREE(new_s);
1139         }
1140         return new;
1141 }
1142
1143
1144 /* List of functions defined in the module */
1145
1146 static PyMethodDef
1147 strop_methods[] = {
1148         {"atof",        strop_atof,
1149          METH_VARARGS, atof__doc__},
1150         {"atoi",        strop_atoi,
1151          METH_VARARGS, atoi__doc__},
1152         {"atol",        strop_atol,
1153          METH_VARARGS, atol__doc__},
1154         {"capitalize",  strop_capitalize,
1155          METH_OLDARGS, capitalize__doc__},
1156         {"count",       strop_count,
1157          METH_VARARGS, count__doc__},
1158         {"expandtabs",  strop_expandtabs,
1159          METH_VARARGS, expandtabs__doc__},
1160         {"find",        strop_find,
1161          METH_VARARGS, find__doc__},
1162         {"join",        strop_joinfields,
1163          METH_VARARGS, joinfields__doc__},
1164         {"joinfields",  strop_joinfields,
1165          METH_VARARGS, joinfields__doc__},
1166         {"lstrip",      strop_lstrip,
1167          METH_OLDARGS, lstrip__doc__},
1168         {"lower",       strop_lower,
1169          METH_OLDARGS, lower__doc__},
1170         {"maketrans",   strop_maketrans,
1171          METH_VARARGS, maketrans__doc__},
1172         {"replace",     strop_replace,
1173          METH_VARARGS, replace__doc__},
1174         {"rfind",       strop_rfind,
1175          METH_VARARGS, rfind__doc__},
1176         {"rstrip",      strop_rstrip,
1177          METH_OLDARGS, rstrip__doc__},
1178         {"split",       strop_splitfields,
1179          METH_VARARGS, splitfields__doc__},
1180         {"splitfields", strop_splitfields,
1181          METH_VARARGS, splitfields__doc__},
1182         {"strip",       strop_strip,
1183          METH_OLDARGS, strip__doc__},
1184         {"swapcase",    strop_swapcase,
1185          METH_OLDARGS, swapcase__doc__},
1186         {"translate",   strop_translate,
1187          METH_VARARGS, translate__doc__},
1188         {"upper",       strop_upper,
1189          METH_OLDARGS, upper__doc__},
1190         {NULL,          NULL}   /* sentinel */
1191 };
1192
1193
1194 DL_EXPORT(void)
1195 initstrop(void)
1196 {
1197         PyObject *m, *d, *s;
1198         char buf[256];
1199         int c, n;
1200         m = Py_InitModule4("strop", strop_methods, strop_module__doc__,
1201                            (PyObject*)NULL, PYTHON_API_VERSION);
1202         d = PyModule_GetDict(m);
1203
1204         /* Create 'whitespace' object */
1205         n = 0;
1206         for (c = 0; c < 256; c++) {
1207                 if (isspace(c))
1208                         buf[n++] = c;
1209         }
1210         s = PyString_FromStringAndSize(buf, n);
1211         if (s) {
1212                 PyDict_SetItemString(d, "whitespace", s);
1213                 Py_DECREF(s);
1214         }
1215         /* Create 'lowercase' object */
1216         n = 0;
1217         for (c = 0; c < 256; c++) {
1218                 if (islower(c))
1219                         buf[n++] = c;
1220         }
1221         s = PyString_FromStringAndSize(buf, n);
1222         if (s) {
1223                 PyDict_SetItemString(d, "lowercase", s);
1224                 Py_DECREF(s);
1225         }
1226
1227         /* Create 'uppercase' object */
1228         n = 0;
1229         for (c = 0; c < 256; c++) {
1230                 if (isupper(c))
1231                         buf[n++] = c;
1232         }
1233         s = PyString_FromStringAndSize(buf, n);
1234         if (s) {
1235                 PyDict_SetItemString(d, "uppercase", s);
1236                 Py_DECREF(s);
1237         }
1238 }