commands/elvis/regexp.c

   1 /* regexp.c */
   2
   3 /* This file contains the code that compiles regular expressions and executes
   4  * them.  It supports the same syntax and features as vi's regular expression
   5  * code.  Specifically, the meta characters are:
   6  *      ^       matches the beginning of a line
   7  *      $       matches the end of a line
   8  *      \<      matches the beginning of a word
   9  *      \>      matches the end of a word
  10  *      .       matches any single character
  11  *      []      matches any character in a character class
  12  *      \(      delimits the start of a subexpression
  13  *      \)      delimits the end of a subexpression
  14  *      *       repeats the preceding 0 or more times
  15  * NOTE: You cannot follow a \) with a *.
  16  *
  17  * The physical structure of a compiled RE is as follows:
  18  *      - First, there is a one-byte value that says how many character classes
  19  *        are used in this regular expression
  20  *      - Next, each character class is stored as a bitmap that is 256 bits
  21  *        (32 bytes) long.
  22  *      - A mixture of literal characters and compiled meta characters follows.
  23  *        This begins with M_BEGIN(0) and ends with M_END(0).  All meta chars
  24  *        are stored as a \n followed by a one-byte code, so they take up two
  25  *        bytes apiece.  Literal characters take up one byte apiece.  \n can't
  26  *        be used as a literal character.
  27  *
  28  * If NO_MAGIC is defined, then a different set of functions is used instead.
  29  * That right, this file contains TWO versions of the code.
  30  */
  31
  32 #include <setjmp.h>
  33 #include "config.h"
  34 #include "ctype.h"
  35 #include "vi.h"
  36 #include "regexp.h"
  37
  38
  39
  40 static char     *previous;      /* the previous regexp, used when null regexp is given */
  41
  42
  43 #ifndef NO_MAGIC
  44 /* THE REAL REGEXP PACKAGE IS USED UNLESS "NO_MAGIC" IS DEFINED */
  45
  46 /* These are used to classify or recognize meta-characters */
  47 #define META            '\0'
  48 #define BASE_META(m)    ((m) - 256)
  49 #define INT_META(c)     ((c) + 256)
  50 #define IS_META(m)      ((m) >= 256)
  51 #define IS_CLASS(m)     ((m) >= M_CLASS(0) && (m) <= M_CLASS(9))
  52 #define IS_START(m)     ((m) >= M_START(0) && (m) <= M_START(9))
  53 #define IS_END(m)       ((m) >= M_END(0) && (m) <= M_END(9))
  54 #define IS_CLOSURE(m)   ((m) >= M_SPLAT && (m) <= M_RANGE)
  55 #define ADD_META(s,m)   (*(s)++ = META, *(s)++ = BASE_META(m))
  56 #define GET_META(s)     (*(s) == META ? INT_META(*++(s)) : *s)
  57
  58 /* These are the internal codes used for each type of meta-character */
  59 #define M_BEGLINE       256             /* internal code for ^ */
  60 #define M_ENDLINE       257             /* internal code for $ */
  61 #define M_BEGWORD       258             /* internal code for \< */
  62 #define M_ENDWORD       259             /* internal code for \> */
  63 #define M_ANY           260             /* internal code for . */
  64 #define M_SPLAT         261             /* internal code for * */
  65 #define M_PLUS          262             /* internal code for \+ */
  66 #define M_QMARK         263             /* internal code for \? */
  67 #define M_RANGE         264             /* internal code for \{ */
  68 #define M_CLASS(n)      (265+(n))       /* internal code for [] */
  69 #define M_START(n)      (275+(n))       /* internal code for \( */
  70 #define M_END(n)        (285+(n))       /* internal code for \) */
  71
  72 /* These are used during compilation */
  73 static int      class_cnt;      /* used to assign class IDs */
  74 static int      start_cnt;      /* used to assign start IDs */
  75 static int      end_stk[NSUBEXP];/* used to assign end IDs */
  76 static int      end_sp;
  77 static char     *retext;        /* points to the text being compiled */
  78
  79 /* error-handling stuff */
  80 jmp_buf errorhandler;
  81 #define FAIL(why)       regerror(why); longjmp(errorhandler, 1)
  82
  83
  84
  85
  86
  87 /* This function builds a bitmap for a particular class */
  88 static char *makeclass(text, bmap)
  89         REG char        *text;  /* start of the class */
  90         REG char        *bmap;  /* the bitmap */
  91 {
  92         REG int         i;
  93         int             complement = 0;
  94
  95
  96         /* zero the bitmap */
  97         for (i = 0; bmap && i < 32; i++)
  98         {
  99                 bmap[i] = 0;
 100         }
 101
 102         /* see if we're going to complement this class */
 103         if (*text == '^')
 104         {
 105                 text++;
 106                 complement = 1;
 107         }
 108
 109         /* add in the characters */
 110         while (*text && *text != ']')
 111         {
 112                 /* is this a span of characters? */
 113                 if (text[1] == '-' && text[2])
 114                 {
 115                         /* spans can't be backwards */
 116                         if (text[0] > text[2])
 117                         {
 118                                 FAIL("Backwards span in []");
 119                         }
 120
 121                         /* add each character in the span to the bitmap */
 122                         for (i = text[0]; bmap && i <= text[2]; i++)
 123                         {
 124                                 bmap[i >> 3] |= (1 << (i & 7));
 125                         }
 126
 127                         /* move past this span */
 128                         text += 3;
 129                 }
 130                 else
 131                 {
 132                         /* add this single character to the span */
 133                         i = *text++;
 134                         if (bmap)
 135                         {
 136                                 bmap[i >> 3] |= (1 << (i & 7));
 137                         }
 138                 }
 139         }
 140
 141         /* make sure the closing ] is missing */
 142         if (*text++ != ']')
 143         {
 144                 FAIL("] missing");
 145         }
 146
 147         /* if we're supposed to complement this class, then do so */
 148         if (complement && bmap)
 149         {
 150                 for (i = 0; i < 32; i++)
 151                 {
 152                         bmap[i] = ~bmap[i];
 153                 }
 154         }
 155
 156         return text;
 157 }
 158
 159
 160
 161
 162 /* This function gets the next character or meta character from a string.
 163  * The pointer is incremented by 1, or by 2 for \-quoted characters.  For [],
 164  * a bitmap is generated via makeclass() (if re is given), and the
 165  * character-class text is skipped.
 166  */
 167 static int gettoken(sptr, re)
 168         char    **sptr;
 169         regexp  *re;
 170 {
 171         int     c;
 172
 173         c = **sptr;
 174         ++*sptr;
 175         if (c == '\\')
 176         {
 177                 c = **sptr;
 178                 ++*sptr;
 179                 switch (c)
 180                 {
 181                   case '<':
 182                         return M_BEGWORD;
 183
 184                   case '>':
 185                         return M_ENDWORD;
 186
 187                   case '(':
 188                         if (start_cnt >= NSUBEXP)
 189                         {
 190                                 FAIL("Too many \\(s");
 191                         }
 192                         end_stk[end_sp++] = start_cnt;
 193                         return M_START(start_cnt++);
 194
 195                   case ')':
 196                         if (end_sp <= 0)
 197                         {
 198                                 FAIL("Mismatched \\)");
 199                         }
 200                         return M_END(end_stk[--end_sp]);
 201
 202                   case '*':
 203                         return (*o_magic ? c : M_SPLAT);
 204
 205                   case '.':
 206                         return (*o_magic ? c : M_ANY);
 207
 208                   case '+':
 209                         return M_PLUS;
 210
 211                   case '?':
 212                         return M_QMARK;
 213 #ifndef CRUNCH
 214                   case '{':
 215                         return M_RANGE;
 216 #endif
 217                   default:
 218                         return c;
 219                 }
 220         }
 221         else if (*o_magic)
 222         {
 223                 switch (c)
 224                 {
 225                   case '^':
 226                         if (*sptr == retext + 1)
 227                         {
 228                                 return M_BEGLINE;
 229                         }
 230                         return c;
 231
 232                   case '$':
 233                         if (!**sptr)
 234                         {
 235                                 return M_ENDLINE;
 236                         }
 237                         return c;
 238
 239                   case '.':
 240                         return M_ANY;
 241
 242                   case '*':
 243                         return M_SPLAT;
 244
 245                   case '[':
 246                         /* make sure we don't have too many classes */
 247                         if (class_cnt >= 10)
 248                         {
 249                                 FAIL("Too many []s");
 250                         }
 251
 252                         /* process the character list for this class */
 253                         if (re)
 254                         {
 255                                 /* generate the bitmap for this class */
 256                                 *sptr = makeclass(*sptr, re->program + 1 + 32 * class_cnt);
 257                         }
 258                         else
 259                         {
 260                                 /* skip to end of the class */
 261                                 *sptr = makeclass(*sptr, (char *)0);
 262                         }
 263                         return M_CLASS(class_cnt++);
 264
 265                   default:
 266                         return c;
 267                 }
 268         }
 269         else    /* unquoted nomagic */
 270         {
 271                 switch (c)
 272                 {
 273                   case '^':
 274                         if (*sptr == retext + 1)
 275                         {
 276                                 return M_BEGLINE;
 277                         }
 278                         return c;
 279
 280                   case '$':
 281                         if (!**sptr)
 282                         {
 283                                 return M_ENDLINE;
 284                         }
 285                         return c;
 286
 287                   default:
 288                         return c;
 289                 }
 290         }
 291         /*NOTREACHED*/
 292 }
 293
 294
 295
 296
 297 /* This function calculates the number of bytes that will be needed for a
 298  * compiled RE.  Its argument is the uncompiled version.  It is not clever
 299  * about catching syntax errors; that is done in a later pass.
 300  */
 301 static unsigned calcsize(text)
 302         char            *text;
 303 {
 304         unsigned        size;
 305         int             token;
 306
 307         retext = text;
 308         class_cnt = 0;
 309         start_cnt = 1;
 310         end_sp = 0;
 311         size = 5;
 312         while ((token = gettoken(&text, (regexp *)0)) != 0)
 313         {
 314                 if (IS_CLASS(token))
 315                 {
 316                         size += 34;
 317                 }
 318 #ifndef CRUNCH
 319                 else if (token == M_RANGE)
 320                 {
 321                         size += 4;
 322                         while ((token = gettoken(&text, (regexp *)0)) != 0
 323                             && token != '}')
 324                         {
 325                         }
 326                         if (!token)
 327                         {
 328                                 return size;
 329                         }
 330                 }
 331 #endif
 332                 else if (IS_META(token))
 333                 {
 334                         size += 2;
 335                 }
 336                 else
 337                 {
 338                         size++;
 339                 }
 340         }
 341
 342         return size;
 343 }
 344
 345
 346
 347 /* This function compiles a regexp. */
 348 regexp *regcomp(exp)
 349         char            *exp;
 350 {
 351         int             needfirst;
 352         unsigned        size;
 353         int             token;
 354         int             peek;
 355         char            *build;
 356         regexp          *re;
 357 #ifndef CRUNCH
 358         int             from;
 359         int             to;
 360         int             digit;
 361 #endif
 362
 363
 364         /* prepare for error handling */
 365         re = (regexp *)0;
 366         if (setjmp(errorhandler))
 367         {
 368                 if (re)
 369                 {
 370                         free(re);
 371                 }
 372                 return (regexp *)0;
 373         }
 374
 375         /* if an empty regexp string was given, use the previous one */
 376         if (*exp == 0)
 377         {
 378                 if (!previous)
 379                 {
 380                         FAIL("No previous RE");
 381                 }
 382                 exp = previous;
 383         }
 384         else /* non-empty regexp given, so remember it */
 385         {
 386                 if (previous)
 387                         free(previous);
 388                 previous = (char *)malloc((unsigned)(strlen(exp) + 1));
 389                 if (previous)
 390                         strcpy(previous, exp);
 391         }
 392
 393         /* allocate memory */
 394         class_cnt = 0;
 395         start_cnt = 1;
 396         end_sp = 0;
 397         retext = exp;
 398         size = calcsize(exp) + sizeof(regexp) + 10; /* !!! 10 bytes for slop */
 399 #ifdef lint
 400         re = ((regexp *)0) + size;
 401 #else
 402         re = (regexp *)malloc((unsigned)size);
 403 #endif
 404         if (!re)
 405         {
 406                 FAIL("Not enough memory for this RE");
 407         }
 408
 409         /* compile it */
 410         build = &re->program[1 + 32 * class_cnt];
 411         re->program[0] = class_cnt;
 412         for (token = 0; token < NSUBEXP; token++)
 413         {
 414                 re->startp[token] = re->endp[token] = (char *)0;
 415         }
 416         re->first = 0;
 417         re->bol = 0;
 418         re->minlen = 0;
 419         needfirst = 1;
 420         class_cnt = 0;
 421         start_cnt = 1;
 422         end_sp = 0;
 423         retext = exp;
 424         for (token = M_START(0), peek = gettoken(&exp, re);
 425              token;
 426              token = peek, peek = gettoken(&exp, re))
 427         {
 428                 /* special processing for the closure operator */
 429                 if (IS_CLOSURE(peek))
 430                 {
 431                         /* detect misuse of closure operator */
 432                         if (IS_START(token))
 433                         {
 434                                 FAIL("Closure operator follows nothing");
 435                         }
 436                         else if (IS_META(token) && token != M_ANY && !IS_CLASS(token))
 437                         {
 438                                 FAIL("Closure operators can only follow a normal character or . or []");
 439                         }
 440
 441 #ifndef CRUNCH
 442                         /* if \{ \} then read the range */
 443                         if (peek == M_RANGE)
 444                         {
 445                                 from = 0;
 446                                 for (digit = gettoken(&exp, re);
 447                                      !IS_META(digit) && isdigit(digit);
 448                                      digit = gettoken(&exp, re))
 449                                 {
 450                                         from = from * 10 + digit - '0';
 451                                 }
 452                                 if (digit == '}')
 453                                 {
 454                                         to = from;
 455                                 }
 456                                 else if (digit == ',')
 457                                 {
 458                                         to = 0;
 459                                         for (digit = gettoken(&exp, re);
 460                                              !IS_META(digit) && isdigit(digit);
 461                                              digit = gettoken(&exp, re))
 462                                         {
 463                                                 to = to * 10 + digit - '0';
 464                                         }
 465                                         if (to == 0)
 466                                         {
 467                                                 to = 255;
 468                                         }
 469                                 }
 470                                 if (digit != '}')
 471                                 {
 472                                         FAIL("Bad characters after \\{");
 473                                 }
 474                                 else if (to < from || to == 0 || from >= 255)
 475                                 {
 476                                         FAIL("Invalid range for \\{ \\}");
 477                                 }
 478                                 re->minlen += from;
 479                         }
 480                         else
 481 #endif
 482                         if (peek != M_SPLAT)
 483                         {
 484                                 re->minlen++;
 485                         }
 486
 487                         /* it is okay -- make it prefix instead of postfix */
 488                         ADD_META(build, peek);
 489 #ifndef CRUNCH
 490                         if (peek == M_RANGE)
 491                         {
 492                                 *build++ = from;
 493                                 *build++ = (to < 255 ? to : 255);
 494                         }
 495 #endif
 496
 497
 498                         /* take care of "needfirst" - is this the first char? */
 499                         if (needfirst && peek == M_PLUS && !IS_META(token))
 500                         {
 501                                 re->first = token;
 502                         }
 503                         needfirst = 0;
 504
 505                         /* we used "peek" -- need to refill it */
 506                         peek = gettoken(&exp, re);
 507                         if (IS_CLOSURE(peek))
 508                         {
 509                                 FAIL("* or \\+ or \\? doubled up");
 510                         }
 511                 }
 512                 else if (!IS_META(token))
 513                 {
 514                         /* normal char is NOT argument of closure */
 515                         if (needfirst)
 516                         {
 517                                 re->first = token;
 518                                 needfirst = 0;
 519                         }
 520                         re->minlen++;
 521                 }
 522                 else if (token == M_ANY || IS_CLASS(token))
 523                 {
 524                         /* . or [] is NOT argument of closure */
 525                         needfirst = 0;
 526                         re->minlen++;
 527                 }
 528
 529                 /* the "token" character is not closure -- process it normally */
 530                 if (token == M_BEGLINE)
 531                 {
 532                         /* set the BOL flag instead of storing M_BEGLINE */
 533                         re->bol = 1;
 534                 }
 535                 else if (IS_META(token))
 536                 {
 537                         ADD_META(build, token);
 538                 }
 539                 else
 540                 {
 541                         *build++ = token;
 542                 }
 543         }
 544
 545         /* end it with a \) which MUST MATCH the opening \( */
 546         ADD_META(build, M_END(0));
 547         if (end_sp > 0)
 548         {
 549                 FAIL("Not enough \\)s");
 550         }
 551
 552         return re;
 553 }
 554
 555
 556
 557 /*---------------------------------------------------------------------------*/
 558
 559
 560 /* This function checks for a match between a character and a token which is
 561  * known to represent a single character.  It returns 0 if they match, or
 562  * 1 if they don't.
 563  */
 564 int match1(re, ch, token)
 565         regexp          *re;
 566         REG char        ch;
 567         REG int         token;
 568 {
 569         if (!ch)
 570         {
 571                 /* the end of a line can't match any RE of width 1 */
 572                 return 1;
 573         }
 574         if (token == M_ANY)
 575         {
 576                 return 0;
 577         }
 578         else if (IS_CLASS(token))
 579         {
 580                 if (re->program[1 + 32 * (token - M_CLASS(0)) + (ch >> 3)] & (1 << (ch & 7)))
 581                         return 0;
 582         }
 583         else if (ch == token || *o_ignorecase && tolower(ch) == tolower(token))
 584         {
 585                 return 0;
 586         }
 587         return 1;
 588 }
 589
 590
 591
 592 /* This function checks characters up to and including the next closure, at
 593  * which point it does a recursive call to check the rest of it.  This function
 594  * returns 0 if everything matches, or 1 if something doesn't match.
 595  */
 596 int match(re, str, prog, here)
 597         regexp          *re;    /* the regular expression */
 598         char            *str;   /* the string */
 599         REG char        *prog;  /* a portion of re->program, an compiled RE */
 600         REG char        *here;  /* a portion of str, the string to compare it to */
 601 {
 602         REG int         token;  /* the roken pointed to by prog */
 603         REG int         nmatched;/* counter, used during closure matching */
 604         REG int         closure;/* the token denoting the type of closure */
 605         int             from;   /* minimum number of matches in closure */
 606         int             to;     /* maximum number of matches in closure */
 607
 608         for (token = GET_META(prog); !IS_CLOSURE(token); prog++, token = GET_META(prog))
 609         {
 610                 switch (token)
 611                 {
 612                 /*case M_BEGLINE: can't happen; re->bol is used instead */
 613                   case M_ENDLINE:
 614                         if (*here)
 615                                 return 1;
 616                         break;
 617
 618                   case M_BEGWORD:
 619                         if (here != str &&
 620                            (here[-1] == '_' || isalnum(here[-1])))
 621                                 return 1;
 622                         break;
 623
 624                   case M_ENDWORD:
 625                         if (here[0] == '_' || isalnum(here[0]))
 626                                 return 1;
 627                         break;
 628
 629                   case M_START(0):
 630                   case M_START(1):
 631                   case M_START(2):
 632                   case M_START(3):
 633                   case M_START(4):
 634                   case M_START(5):
 635                   case M_START(6):
 636                   case M_START(7):
 637                   case M_START(8):
 638                   case M_START(9):
 639                         re->startp[token - M_START(0)] = (char *)here;
 640                         break;
 641
 642                   case M_END(0):
 643                   case M_END(1):
 644                   case M_END(2):
 645                   case M_END(3):
 646                   case M_END(4):
 647                   case M_END(5):
 648                   case M_END(6):
 649                   case M_END(7):
 650                   case M_END(8):
 651                   case M_END(9):
 652                         re->endp[token - M_END(0)] = (char *)here;
 653                         if (token == M_END(0))
 654                         {
 655                                 return 0;
 656                         }
 657                         break;
 658
 659                   default: /* literal, M_CLASS(n), or M_ANY */
 660                         if (match1(re, *here, token) != 0)
 661                                 return 1;
 662                         here++;
 663                 }
 664         }
 665
 666         /* C L O S U R E */
 667
 668         /* step 1: see what we have to match against, and move "prog" to point
 669          * to the remainder of the compiled RE.
 670          */
 671         closure = token;
 672         prog++;
 673         switch (closure)
 674         {
 675           case M_SPLAT:
 676                 from = 0;
 677                 to = strlen(str);       /* infinity */
 678                 break;
 679
 680           case M_PLUS:
 681                 from = 1;
 682                 to = strlen(str);       /* infinity */
 683                 break;
 684
 685           case M_QMARK:
 686                 from = 0;
 687                 to = 1;
 688                 break;
 689
 690 #ifndef CRUNCH
 691           case M_RANGE:
 692                 from = UCHAR(*prog++);
 693                 to = UCHAR(*prog++);
 694                 if (to == 255)
 695                 {
 696                         to = strlen(str); /* infinity */
 697                 }
 698                 break;
 699 #endif
 700         }
 701         token = GET_META(prog);
 702         prog++;
 703
 704         /* step 2: see how many times we can match that token against the string */
 705         for (nmatched = 0;
 706              nmatched < to && *here && match1(re, *here, token) == 0;
 707              nmatched++, here++)
 708         {
 709         }
 710
 711         /* step 3: try to match the remainder, and back off if it doesn't */
 712         while (nmatched >= from && match(re, str, prog, here) != 0)
 713         {
 714                 nmatched--;
 715                 here--;
 716         }
 717
 718         /* so how did it work out? */
 719         if (nmatched >= from)
 720                 return 0;
 721         return 1;
 722 }
 723
 724
 725
 726 /* This function searches through a string for text that matches an RE. */
 727 int regexec(re, str, bol)
 728         regexp  *re;    /* the compiled regexp to search for */
 729         char    *str;   /* the string to search through */
 730         int     bol;    /* boolean: does str start at the beginning of a line? */
 731 {
 732         char    *prog;  /* the entry point of re->program */
 733         int     len;    /* length of the string */
 734         REG char        *here;
 735
 736         /* if must start at the beginning of a line, and this isn't, then fail */
 737         if (re->bol && !bol)
 738         {
 739                 return 0;
 740         }
 741
 742         len = strlen(str);
 743         prog = re->program + 1 + 32 * re->program[0];
 744
 745         /* search for the RE in the string */
 746         if (re->bol)
 747         {
 748                 /* must occur at BOL */
 749                 if ((re->first
 750                         && match1(re, *(char *)str, re->first))/* wrong first letter? */
 751                  || len < re->minlen                    /* not long enough? */
 752                  || match(re, (char *)str, prog, str))  /* doesn't match? */
 753                         return 0;                       /* THEN FAIL! */
 754         }
 755 #ifndef CRUNCH
 756         else if (!*o_ignorecase)
 757         {
 758                 /* can occur anywhere in the line, noignorecase */
 759                 for (here = (char *)str;
 760                      (re->first && re->first != *here)
 761                         || match(re, (char *)str, prog, here);
 762                      here++, len--)
 763                 {
 764                         if (len < re->minlen)
 765                                 return 0;
 766                 }
 767         }
 768 #endif
 769         else
 770         {
 771                 /* can occur anywhere in the line, ignorecase */
 772                 for (here = (char *)str;
 773                      (re->first && match1(re, *here, (int)re->first))
 774                         || match(re, (char *)str, prog, here);
 775                      here++, len--)
 776                 {
 777                         if (len < re->minlen)
 778                                 return 0;
 779                 }
 780         }
 781
 782         /* if we didn't fail, then we must have succeeded */
 783         return 1;
 784 }
 785
 786 /*============================================================================*/
 787 #else /* NO_MAGIC */
 788
 789 regexp *regcomp(exp)
 790         char    *exp;
 791 {
 792         char    *src;
 793         char    *dest;
 794         regexp  *re;
 795         int     i;
 796
 797         /* allocate a big enough regexp structure */
 798 #ifdef lint
 799         re = (regexp *)0;
 800 #else
 801         re = (regexp *)malloc((unsigned)(strlen(exp) + 1 + sizeof(struct regexp)));
 802 #endif
 803         if (!re)
 804         {
 805                 regerror("Could not malloc a regexp structure");
 806                 return (regexp *)0;
 807         }
 808
 809         /* initialize all fields of the structure */
 810         for (i = 0; i < NSUBEXP; i++)
 811         {
 812                 re->startp[i] = re->endp[i] = (char *)0;
 813         }
 814         re->minlen = 0;
 815         re->first = 0;
 816         re->bol = 0;
 817
 818         /* copy the string into it, translating ^ and $ as needed */
 819         for (src = exp, dest = re->program + 1; *src; src++)
 820         {
 821                 switch (*src)
 822                 {
 823                   case '^':
 824                         if (src == exp)
 825                         {
 826                                 re->bol += 1;
 827                         }
 828                         else
 829                         {
 830                                 *dest++ = '^';
 831                                 re->minlen++;
 832                         }
 833                         break;
 834
 835                   case '$':
 836                         if (!src[1])
 837                         {
 838                                 re->bol += 2;
 839                         }
 840                         else
 841                         {
 842                                 *dest++ = '$';
 843                                 re->minlen++;
 844                         }
 845                         break;
 846
 847                   case '\\':
 848                         if (src[1])
 849                         {
 850                                 *dest++ = *++src;
 851                                 re->minlen++;
 852                         }
 853                         else
 854                         {
 855                                 regerror("extra \\ at end of regular expression");
 856                         }
 857                         break;
 858
 859                   default:
 860                         *dest++ = *src;
 861                         re->minlen++;
 862                 }
 863         }
 864         *dest = '\0';
 865
 866         return re;
 867 }
 868
 869
 870 /* This "helper" function checks for a match at a given location.  It returns
 871  * 1 if it matches, 0 if it doesn't match here but might match later on in the
 872  * string, or -1 if it could not possibly match
 873  */
 874 static int reghelp(prog, string, bolflag)
 875         struct regexp   *prog;
 876         char            *string;
 877         int             bolflag;
 878 {
 879         char            *scan;
 880         char            *str;
 881
 882         /* if ^, then require bolflag */
 883         if ((prog->bol & 1) && !bolflag)
 884         {
 885                 return -1;
 886         }
 887
 888         /* if it matches, then it will start here */
 889         prog->startp[0] = string;
 890
 891         /* compare, possibly ignoring case */
 892         if (*o_ignorecase)
 893         {
 894                 for (scan = &prog->program[1]; *scan; scan++, string++)
 895                         if (tolower(*scan) != tolower(*string))
 896                                 return *string ? 0 : -1;
 897         }
 898         else
 899         {
 900                 for (scan = &prog->program[1]; *scan; scan++, string++)
 901                         if (*scan != *string)
 902                                 return *string ? 0 : -1;
 903         }
 904
 905         /* if $, then require string to end here, too */
 906         if ((prog->bol & 2) && *string)
 907         {
 908                 return 0;
 909         }
 910
 911         /* if we get to here, it matches */
 912         prog->endp[0] = string;
 913         return 1;
 914 }
 915
 916
 917
 918 int regexec(prog, string, bolflag)
 919         struct regexp   *prog;
 920         char            *string;
 921         int             bolflag;
 922 {
 923         int             rc;
 924
 925         /* keep trying to match it */
 926         for (rc = reghelp(prog, string, bolflag); rc == 0; rc = reghelp(prog, string, 0))
 927         {
 928                 string++;
 929         }
 930
 931         /* did we match? */
 932         return rc == 1;
 933 }
 934 #endif