external/bsd/nvi/dist/ex/ex_subst.c

   1 /*      $NetBSD: ex_subst.c,v 1.4 2014/01/26 21:43:45 christos Exp $ */
   2 /*-
   3  * Copyright (c) 1992, 1993, 1994
   4  *      The Regents of the University of California.  All rights reserved.
   5  * Copyright (c) 1992, 1993, 1994, 1995, 1996
   6  *      Keith Bostic.  All rights reserved.
   7  *
   8  * See the LICENSE file for redistribution information.
   9  */
  10
  11 #include "config.h"
  12
  13 #include <sys/cdefs.h>
  14 #if 0
  15 #ifndef lint
  16 static const char sccsid[] = "Id: ex_subst.c,v 10.50 2002/02/09 21:18:23 skimo Exp  (Berkeley) Date: 2002/02/09 21:18:23 ";
  17 #endif /* not lint */
  18 #else
  19 __RCSID("$NetBSD: ex_subst.c,v 1.4 2014/01/26 21:43:45 christos Exp $");
  20 #endif
  21
  22 #include <sys/types.h>
  23 #include <sys/queue.h>
  24 #include <sys/time.h>
  25
  26 #include <bitstring.h>
  27 #include <ctype.h>
  28 #include <errno.h>
  29 #include <limits.h>
  30 #include <stdio.h>
  31 #include <stdlib.h>
  32 #include <string.h>
  33 #include <unistd.h>
  34
  35 #include "../common/common.h"
  36 #include "../vi/vi.h"
  37
  38 #define SUB_FIRST       0x01            /* The 'r' flag isn't reasonable. */
  39 #define SUB_MUSTSETR    0x02            /* The 'r' flag is required. */
  40
  41 static int re_conv __P((SCR *, CHAR_T **, size_t *, int *));
  42 static int re_cscope_conv __P((SCR *, CHAR_T **, size_t *, int *));
  43 static int re_sub __P((SCR *,
  44                 CHAR_T *, CHAR_T **, size_t *, size_t *, regmatch_t [10]));
  45 static int re_tag_conv __P((SCR *, CHAR_T **, size_t *, int *));
  46 static int s __P((SCR *, EXCMD *, CHAR_T *, regex_t *, u_int));
  47
  48 /*
  49  * ex_s --
  50  *      [line [,line]] s[ubstitute] [[/;]pat[/;]/repl[/;] [cgr] [count] [#lp]]
  51  *
  52  *      Substitute on lines matching a pattern.
  53  *
  54  * PUBLIC: int ex_s __P((SCR *, EXCMD *));
  55  */
  56 int
  57 ex_s(SCR *sp, EXCMD *cmdp)
  58 {
  59         regex_t *re;
  60         size_t blen, len;
  61         u_int flags;
  62         ARG_CHAR_T delim;
  63         CHAR_T *bp, *p, *ptrn, *rep, *t;
  64
  65         /*
  66          * Skip leading white space.
  67          *
  68          * !!!
  69          * Historic vi allowed any non-alphanumeric to serve as the
  70          * substitution command delimiter.
  71          *
  72          * !!!
  73          * If the arguments are empty, it's the same as &, i.e. we
  74          * repeat the last substitution.
  75          */
  76         if (cmdp->argc == 0)
  77                 goto subagain;
  78         for (p = cmdp->argv[0]->bp,
  79             len = cmdp->argv[0]->len; len > 0; --len, ++p) {
  80                 if (!ISBLANK((UCHAR_T)*p))
  81                         break;
  82         }
  83         if (len == 0)
  84 subagain:       return (ex_subagain(sp, cmdp));
  85
  86         delim = (UCHAR_T)*p++;
  87         if (ISALNUM(delim) || delim == '\\')
  88                 return (s(sp, cmdp, p, &sp->subre_c, SUB_MUSTSETR));
  89
  90         /*
  91          * !!!
  92          * The full-blown substitute command reset the remembered
  93          * state of the 'c' and 'g' suffices.
  94          */
  95         sp->c_suffix = sp->g_suffix = 0;
  96
  97         /*
  98          * Get the pattern string, toss escaping characters.
  99          *
 100          * !!!
 101          * Historic vi accepted any of the following forms:
 102          *
 103          *      :s/abc/def/             change "abc" to "def"
 104          *      :s/abc/def              change "abc" to "def"
 105          *      :s/abc/                 delete "abc"
 106          *      :s/abc                  delete "abc"
 107          *
 108          * QUOTING NOTE:
 109          *
 110          * Only toss an escaping character if it escapes a delimiter.
 111          * This means that "s/A/\\\\f" replaces "A" with "\\f".  It
 112          * would be nice to be more regular, i.e. for each layer of
 113          * escaping a single escaping character is removed, but that's
 114          * not how the historic vi worked.
 115          */
 116         for (ptrn = t = p;;) {
 117                 if (p[0] == '\0' || p[0] == delim) {
 118                         if (p[0] == delim)
 119                                 ++p;
 120                         /*
 121                          * !!!
 122                          * Nul terminate the pattern string -- it's passed
 123                          * to regcomp which doesn't understand anything else.
 124                          */
 125                         *t = '\0';
 126                         break;
 127                 }
 128                 if (p[0] == '\\') {
 129                         if (p[1] == delim)
 130                                 ++p;
 131                         else if (p[1] == '\\')
 132                                 *t++ = *p++;
 133                 }
 134                 *t++ = *p++;
 135         }
 136
 137         /*
 138          * If the pattern string is empty, use the last RE (not just the
 139          * last substitution RE).
 140          */
 141         if (*ptrn == '\0') {
 142                 if (sp->re == NULL) {
 143                         ex_emsg(sp, NULL, EXM_NOPREVRE);
 144                         return (1);
 145                 }
 146
 147                 /* Re-compile the RE if necessary. */
 148                 if (!F_ISSET(sp, SC_RE_SEARCH) &&
 149                     re_compile(sp, sp->re, sp->re_len,
 150                     NULL, NULL, &sp->re_c, SEARCH_CSEARCH | SEARCH_MSG))
 151                         return (1);
 152                 flags = 0;
 153         } else {
 154                 /*
 155                  * !!!
 156                  * Compile the RE.  Historic practice is that substitutes set
 157                  * the search direction as well as both substitute and search
 158                  * RE's.  We compile the RE twice, as we don't want to bother
 159                  * ref counting the pattern string and (opaque) structure.
 160                  */
 161                 if (re_compile(sp, ptrn, t - ptrn, &sp->re,
 162                     &sp->re_len, &sp->re_c, SEARCH_CSEARCH | SEARCH_MSG))
 163                         return (1);
 164                 if (re_compile(sp, ptrn, t - ptrn, &sp->subre,
 165                     &sp->subre_len, &sp->subre_c, SEARCH_CSUBST | SEARCH_MSG))
 166                         return (1);
 167
 168                 flags = SUB_FIRST;
 169                 sp->searchdir = FORWARD;
 170         }
 171         re = &sp->re_c;
 172
 173         /*
 174          * Get the replacement string.
 175          *
 176          * The special character & (\& if O_MAGIC not set) matches the
 177          * entire RE.  No handling of & is required here, it's done by
 178          * re_sub().
 179          *
 180          * The special character ~ (\~ if O_MAGIC not set) inserts the
 181          * previous replacement string into this replacement string.
 182          * Count ~'s to figure out how much space we need.  We could
 183          * special case nonexistent last patterns or whether or not
 184          * O_MAGIC is set, but it's probably not worth the effort.
 185          *
 186          * QUOTING NOTE:
 187          *
 188          * Only toss an escaping character if it escapes a delimiter or
 189          * if O_MAGIC is set and it escapes a tilde.
 190          *
 191          * !!!
 192          * If the entire replacement pattern is "%", then use the last
 193          * replacement pattern.  This semantic was added to vi in System
 194          * V and then percolated elsewhere, presumably around the time
 195          * that it was added to their version of ed(1).
 196          */
 197         if (p[0] == L('\0') || p[0] == delim) {
 198                 if (p[0] == delim)
 199                         ++p;
 200                 if (sp->repl != NULL)
 201                         free(sp->repl);
 202                 sp->repl = NULL;
 203                 sp->repl_len = 0;
 204         } else if (p[0] == L('%') && (p[1] == L('\0') || p[1] == delim))
 205                 p += p[1] == delim ? 2 : 1;
 206         else {
 207                 for (rep = p, len = 0;
 208                     p[0] != L('\0') && p[0] != delim; ++p, ++len)
 209                         if (p[0] == L('~'))
 210                                 len += sp->repl_len;
 211                 GET_SPACE_RETW(sp, bp, blen, len);
 212                 for (t = bp, len = 0, p = rep;;) {
 213                         if (p[0] == L('\0') || p[0] == delim) {
 214                                 if (p[0] == delim)
 215                                         ++p;
 216                                 break;
 217                         }
 218                         if (p[0] == L('\\')) {
 219                                 if (p[1] == delim)
 220                                         ++p;
 221                                 else if (p[1] == L('\\')) {
 222                                         *t++ = *p++;
 223                                         ++len;
 224                                 } else if (p[1] == L('~')) {
 225                                         ++p;
 226                                         if (!O_ISSET(sp, O_MAGIC))
 227                                                 goto tilde;
 228                                 }
 229                         } else if (p[0] == L('~') && O_ISSET(sp, O_MAGIC)) {
 230 tilde:                          ++p;
 231                                 MEMCPYW(t, sp->repl, sp->repl_len);
 232                                 t += sp->repl_len;
 233                                 len += sp->repl_len;
 234                                 continue;
 235                         }
 236                         *t++ = *p++;
 237                         ++len;
 238                 }
 239                 if ((sp->repl_len = len) != 0) {
 240                         if (sp->repl != NULL)
 241                                 free(sp->repl);
 242                         if ((sp->repl = malloc(len * sizeof(CHAR_T))) == NULL) {
 243                                 msgq(sp, M_SYSERR, NULL);
 244                                 FREE_SPACEW(sp, bp, blen);
 245                                 return (1);
 246                         }
 247                         MEMCPYW(sp->repl, bp, len);
 248                 }
 249                 FREE_SPACEW(sp, bp, blen);
 250         }
 251         return (s(sp, cmdp, p, re, flags));
 252 }
 253
 254 /*
 255  * ex_subagain --
 256  *      [line [,line]] & [cgr] [count] [#lp]]
 257  *
 258  *      Substitute using the last substitute RE and replacement pattern.
 259  *
 260  * PUBLIC: int ex_subagain __P((SCR *, EXCMD *));
 261  */
 262 int
 263 ex_subagain(SCR *sp, EXCMD *cmdp)
 264 {
 265         if (sp->subre == NULL) {
 266                 ex_emsg(sp, NULL, EXM_NOPREVRE);
 267                 return (1);
 268         }
 269         if (!F_ISSET(sp, SC_RE_SUBST) &&
 270             re_compile(sp, sp->subre, sp->subre_len,
 271             NULL, NULL, &sp->subre_c, SEARCH_CSUBST | SEARCH_MSG))
 272                 return (1);
 273         return (s(sp,
 274             cmdp, cmdp->argc ? cmdp->argv[0]->bp : NULL, &sp->subre_c, 0));
 275 }
 276
 277 /*
 278  * ex_subtilde --
 279  *      [line [,line]] ~ [cgr] [count] [#lp]]
 280  *
 281  *      Substitute using the last RE and last substitute replacement pattern.
 282  *
 283  * PUBLIC: int ex_subtilde __P((SCR *, EXCMD *));
 284  */
 285 int
 286 ex_subtilde(SCR *sp, EXCMD *cmdp)
 287 {
 288         if (sp->re == NULL) {
 289                 ex_emsg(sp, NULL, EXM_NOPREVRE);
 290                 return (1);
 291         }
 292         if (!F_ISSET(sp, SC_RE_SEARCH) && re_compile(sp, sp->re,
 293             sp->re_len, NULL, NULL, &sp->re_c, SEARCH_CSEARCH | SEARCH_MSG))
 294                 return (1);
 295         return (s(sp,
 296             cmdp, cmdp->argc ? cmdp->argv[0]->bp : NULL, &sp->re_c, 0));
 297 }
 298
 299 /*
 300  * s --
 301  * Do the substitution.  This stuff is *really* tricky.  There are lots of
 302  * special cases, and general nastiness.  Don't mess with it unless you're
 303  * pretty confident.
 304  *
 305  * The nasty part of the substitution is what happens when the replacement
 306  * string contains newlines.  It's a bit tricky -- consider the information
 307  * that has to be retained for "s/f\(o\)o/^M\1^M\1/".  The solution here is
 308  * to build a set of newline offsets which we use to break the line up later,
 309  * when the replacement is done.  Don't change it unless you're *damned*
 310  * confident.
 311  */
 312 #define NEEDNEWLINE(sp) {                                               \
 313         if (sp->newl_len == sp->newl_cnt) {                             \
 314                 sp->newl_len += 25;                                     \
 315                 REALLOC(sp, sp->newl, size_t *,                         \
 316                     sp->newl_len * sizeof(size_t));                     \
 317                 if (sp->newl == NULL) {                                 \
 318                         sp->newl_len = 0;                               \
 319                         return (1);                                     \
 320                 }                                                       \
 321         }                                                               \
 322 }
 323
 324 #define BUILD(sp, l, len) {                                             \
 325         if (lbclen + (len) > lblen) {                                   \
 326                 lblen += MAX(lbclen + (len), 256);                      \
 327                 REALLOC(sp, lb, CHAR_T *, lblen * sizeof(CHAR_T));      \
 328                 if (lb == NULL) {                                       \
 329                         lbclen = 0;                                     \
 330                         return (1);                                     \
 331                 }                                                       \
 332         }                                                               \
 333         MEMCPYW(lb + lbclen, l, len);                                   \
 334         lbclen += len;                                                  \
 335 }
 336
 337 #define NEEDSP(sp, len, pnt) {                                          \
 338         if (lbclen + (len) > lblen) {                                   \
 339                 lblen += MAX(lbclen + (len), 256);                      \
 340                 REALLOC(sp, lb, CHAR_T *, lblen * sizeof(CHAR_T));      \
 341                 if (lb == NULL) {                                       \
 342                         lbclen = 0;                                     \
 343                         return (1);                                     \
 344                 }                                                       \
 345                 pnt = lb + lbclen;                                      \
 346         }                                                               \
 347 }
 348
 349 static int
 350 s(SCR *sp, EXCMD *cmdp, CHAR_T *st, regex_t *re, u_int flags)
 351 {
 352         EVENT ev;
 353         MARK from, to;
 354         TEXTH tiq;
 355         db_recno_t elno, lno, slno;
 356         u_long ul;
 357         regmatch_t match[10];
 358         size_t blen, cnt, last, lbclen, lblen, len, llen;
 359         size_t offset, saved_offset, scno;
 360         int lflag, nflag, pflag, rflag;
 361         int didsub, do_eol_match, eflags, empty_ok, eval;
 362         int linechanged, matched, quit, rval;
 363         CHAR_T *lb, *bp;
 364         enum nresult nret;
 365
 366         NEEDFILE(sp, cmdp);
 367
 368         slno = sp->lno;
 369         scno = sp->cno;
 370
 371         /*
 372          * !!!
 373          * Historically, the 'g' and 'c' suffices were always toggled as flags,
 374          * so ":s/A/B/" was the same as ":s/A/B/ccgg".  If O_EDCOMPATIBLE was
 375          * not set, they were initialized to 0 for all substitute commands.  If
 376          * O_EDCOMPATIBLE was set, they were initialized to 0 only if the user
 377          * specified substitute/replacement patterns (see ex_s()).
 378          */
 379         if (!O_ISSET(sp, O_EDCOMPATIBLE))
 380                 sp->c_suffix = sp->g_suffix = 0;
 381
 382         /*
 383          * Historic vi permitted the '#', 'l' and 'p' options in vi mode, but
 384          * it only displayed the last change.  I'd disallow them, but they are
 385          * useful in combination with the [v]global commands.  In the current
 386          * model the problem is combining them with the 'c' flag -- the screen
 387          * would have to flip back and forth between the confirm screen and the
 388          * ex print screen, which would be pretty awful.  We do display all
 389          * changes, though, for what that's worth.
 390          *
 391          * !!!
 392          * Historic vi was fairly strict about the order of "options", the
 393          * count, and "flags".  I'm somewhat fuzzy on the difference between
 394          * options and flags, anyway, so this is a simpler approach, and we
 395          * just take it them in whatever order the user gives them.  (The ex
 396          * usage statement doesn't reflect this.)
 397          */
 398         lflag = nflag = pflag = rflag = 0;
 399         if (st == NULL)
 400                 goto noargs;
 401         for (lno = OOBLNO; *st != '\0'; ++st)
 402                 switch (*st) {
 403                 case ' ':
 404                 case '\t':
 405                         continue;
 406                 case '+':
 407                         ++cmdp->flagoff;
 408                         break;
 409                 case '-':
 410                         --cmdp->flagoff;
 411                         break;
 412                 case '0': case '1': case '2': case '3': case '4':
 413                 case '5': case '6': case '7': case '8': case '9':
 414                         if (lno != OOBLNO)
 415                                 goto usage;
 416                         errno = 0;
 417                         nret = nget_uslong(sp, &ul, st, &st, 10);
 418                         lno = ul;
 419                         if (*st == '\0')                /* Loop increment correction. */
 420                                 --st;
 421                         if (nret != NUM_OK) {
 422                                 if (nret == NUM_OVER)
 423                                         msgq(sp, M_ERR, "153|Count overflow");
 424                                 else if (nret == NUM_UNDER)
 425                                         msgq(sp, M_ERR, "154|Count underflow");
 426                                 else
 427                                         msgq(sp, M_SYSERR, NULL);
 428                                 return (1);
 429                         }
 430                         /*
 431                          * In historic vi, the count was inclusive from the
 432                          * second address.
 433                          */
 434                         cmdp->addr1.lno = cmdp->addr2.lno;
 435                         cmdp->addr2.lno += lno - 1;
 436                         if (!db_exist(sp, cmdp->addr2.lno) &&
 437                             db_last(sp, &cmdp->addr2.lno))
 438                                 return (1);
 439                         break;
 440                 case '#':
 441                         nflag = 1;
 442                         break;
 443                 case 'c':
 444                         sp->c_suffix = !sp->c_suffix;
 445
 446                         /* Ex text structure initialization. */
 447                         if (F_ISSET(sp, SC_EX)) {
 448                                 memset(&tiq, 0, sizeof(TEXTH));
 449                                 TAILQ_INIT(&tiq);
 450                         }
 451                         break;
 452                 case 'g':
 453                         sp->g_suffix = !sp->g_suffix;
 454                         break;
 455                 case 'l':
 456                         lflag = 1;
 457                         break;
 458                 case 'p':
 459                         pflag = 1;
 460                         break;
 461                 case 'r':
 462                         if (LF_ISSET(SUB_FIRST)) {
 463                                 msgq(sp, M_ERR,
 464                     "155|Regular expression specified; r flag meaningless");
 465                                 return (1);
 466                         }
 467                         if (!F_ISSET(sp, SC_RE_SEARCH)) {
 468                                 ex_emsg(sp, NULL, EXM_NOPREVRE);
 469                                 return (1);
 470                         }
 471                         rflag = 1;
 472                         re = &sp->re_c;
 473                         break;
 474                 default:
 475                         goto usage;
 476                 }
 477
 478         if (*st != '\0' || (!rflag && LF_ISSET(SUB_MUSTSETR))) {
 479 usage:          ex_emsg(sp, cmdp->cmd->usage, EXM_USAGE);
 480                 return (1);
 481         }
 482
 483 noargs: if (F_ISSET(sp, SC_VI) && sp->c_suffix && (lflag || nflag || pflag)) {
 484                 msgq(sp, M_ERR,
 485 "156|The #, l and p flags may not be combined with the c flag in vi mode");
 486                 return (1);
 487         }
 488
 489         /*
 490          * bp:          if interactive, line cache
 491          * blen:        if interactive, line cache length
 492          * lb:          build buffer pointer.
 493          * lbclen:      current length of built buffer.
 494          * lblen;       length of build buffer.
 495          */
 496         bp = lb = NULL;
 497         blen = lbclen = lblen = 0;
 498
 499         /* For each line... */
 500         lno = cmdp->addr1.lno == 0 ? 1 : cmdp->addr1.lno;
 501         for (matched = quit = 0,
 502             elno = cmdp->addr2.lno; !quit && lno <= elno; ++lno) {
 503
 504                 /* Someone's unhappy, time to stop. */
 505                 if (INTERRUPTED(sp))
 506                         break;
 507
 508                 /* Get the line. */
 509                 if (db_get(sp, lno, DBG_FATAL, &st, &llen))
 510                         goto err;
 511
 512                 /*
 513                  * Make a local copy if doing confirmation -- when calling
 514                  * the confirm routine we're likely to lose the cached copy.
 515                  */
 516                 if (sp->c_suffix) {
 517                         if (bp == NULL) {
 518                                 GET_SPACE_RETW(sp, bp, blen, llen);
 519                         } else
 520                                 ADD_SPACE_RETW(sp, bp, blen, llen);
 521                         MEMCPYW(bp, st, llen);
 522                         st = bp;
 523                 }
 524
 525                 /* Start searching from the beginning. */
 526                 offset = 0;
 527                 len = llen;
 528
 529                 /* Reset the build buffer offset. */
 530                 lbclen = 0;
 531
 532                 /* Reset empty match flag. */
 533                 empty_ok = 1;
 534
 535                 /*
 536                  * We don't want to have to do a setline if the line didn't
 537                  * change -- keep track of whether or not this line changed.
 538                  * If doing confirmations, don't want to keep setting the
 539                  * line if change is refused -- keep track of substitutions.
 540                  */
 541                 didsub = linechanged = 0;
 542
 543                 /* New line, do an EOL match. */
 544                 do_eol_match = 1;
 545
 546                 /* It's not nul terminated, but we pretend it is. */
 547                 eflags = REG_STARTEND;
 548
 549                 /*
 550                  * The search area is from st + offset to the EOL.
 551                  *
 552                  * Generally, match[0].rm_so is the offset of the start
 553                  * of the match from the start of the search, and offset
 554                  * is the offset of the start of the last search.
 555                  */
 556 nextmatch:      match[0].rm_so = 0;
 557                 match[0].rm_eo = len;
 558
 559                 /* Get the next match. */
 560                 eval = regexec(re, st + offset, 10, match, eflags);
 561
 562                 /*
 563                  * There wasn't a match or if there was an error, deal with
 564                  * it.  If there was a previous match in this line, resolve
 565                  * the changes into the database.  Otherwise, just move on.
 566                  */
 567                 if (eval == REG_NOMATCH)
 568                         goto endmatch;
 569                 if (eval != 0) {
 570                         re_error(sp, eval, re);
 571                         goto err;
 572                 }
 573                 matched = 1;
 574
 575                 /* Only the first search can match an anchored expression. */
 576                 eflags |= REG_NOTBOL;
 577
 578                 /*
 579                  * !!!
 580                  * It's possible to match 0-length strings -- for example, the
 581                  * command s;a*;X;, when matched against the string "aabb" will
 582                  * result in "XbXbX", i.e. the matches are "aa", the space
 583                  * between the b's and the space between the b's and the end of
 584                  * the string.  There is a similar space between the beginning
 585                  * of the string and the a's.  The rule that we use (because vi
 586                  * historically used it) is that any 0-length match, occurring
 587                  * immediately after a match, is ignored.  Otherwise, the above
 588                  * example would have resulted in "XXbXbX".  Another example is
 589                  * incorrectly using " *" to replace groups of spaces with one
 590                  * space.
 591                  *
 592                  * The way we do this is that if we just had a successful match,
 593                  * the starting offset does not skip characters, and the match
 594                  * is empty, ignore the match and move forward.  If there's no
 595                  * more characters in the string, we were attempting to match
 596                  * after the last character, so quit.
 597                  */
 598                 if (!empty_ok && match[0].rm_so == 0 && match[0].rm_eo == 0) {
 599                         empty_ok = 1;
 600                         if (len == 0)
 601                                 goto endmatch;
 602                         BUILD(sp, st + offset, 1)
 603                         ++offset;
 604                         --len;
 605                         goto nextmatch;
 606                 }
 607
 608                 /* Confirm change. */
 609                 if (sp->c_suffix) {
 610                         /*
 611                          * Set the cursor position for confirmation.  Note,
 612                          * if we matched on a '$', the cursor may be past
 613                          * the end of line.
 614                          */
 615                         from.lno = to.lno = lno;
 616                         from.cno = match[0].rm_so + offset;
 617                         to.cno = match[0].rm_eo + offset;
 618                         /*
 619                          * Both ex and vi have to correct for a change before
 620                          * the first character in the line.
 621                          */
 622                         if (llen == 0)
 623                                 from.cno = to.cno = 0;
 624                         if (F_ISSET(sp, SC_VI)) {
 625                                 /*
 626                                  * Only vi has to correct for a change after
 627                                  * the last character in the line.
 628                                  *
 629                                  * XXX
 630                                  * It would be nice to change the vi code so
 631                                  * that we could display a cursor past EOL.
 632                                  */
 633                                 if (to.cno >= llen)
 634                                         to.cno = llen - 1;
 635                                 if (from.cno >= llen)
 636                                         from.cno = llen - 1;
 637
 638                                 sp->lno = from.lno;
 639                                 sp->cno = from.cno;
 640                                 if (vs_refresh(sp, 1))
 641                                         goto err;
 642
 643                                 vs_update(sp, msg_cat(sp,
 644                                     "169|Confirm change? [n]", NULL), NULL);
 645
 646                                 if (v_event_get(sp, &ev, 0, 0))
 647                                         goto err;
 648                                 switch (ev.e_event) {
 649                                 case E_CHARACTER:
 650                                         break;
 651                                 case E_EOF:
 652                                 case E_ERR:
 653                                 case E_INTERRUPT:
 654                                         goto lquit;
 655                                 default:
 656                                         v_event_err(sp, &ev);
 657                                         goto lquit;
 658                                 }
 659                         } else {
 660                                 if (ex_print(sp, cmdp, &from, &to, 0) ||
 661                                     ex_scprint(sp, &from, &to))
 662                                         goto lquit;
 663                                 if (ex_txt(sp, &tiq, 0, TXT_CR))
 664                                         goto err;
 665                                 ev.e_c = TAILQ_FIRST(&tiq)->lb[0];
 666                         }
 667
 668                         switch (ev.e_c) {
 669                         case CH_YES:
 670                                 break;
 671                         default:
 672                         case CH_NO:
 673                                 didsub = 0;
 674                                 BUILD(sp, st + offset, match[0].rm_eo);
 675                                 goto skip;
 676                         case CH_QUIT:
 677                                 /* Set the quit/interrupted flags. */
 678 lquit:                          quit = 1;
 679                                 F_SET(sp->gp, G_INTERRUPTED);
 680
 681                                 /*
 682                                  * Resolve any changes, then return to (and
 683                                  * exit from) the main loop.
 684                                  */
 685                                 goto endmatch;
 686                         }
 687                 }
 688
 689                 /*
 690                  * Set the cursor to the last position changed, converting
 691                  * from 1-based to 0-based.
 692                  */
 693                 sp->lno = lno;
 694                 sp->cno = match[0].rm_so;
 695
 696                 /* Copy the bytes before the match into the build buffer. */
 697                 BUILD(sp, st + offset, match[0].rm_so);
 698
 699                 /* Substitute the matching bytes. */
 700                 didsub = 1;
 701                 if (re_sub(sp, st + offset, &lb, &lbclen, &lblen, match))
 702                         goto err;
 703
 704                 /* Set the change flag so we know this line was modified. */
 705                 linechanged = 1;
 706
 707                 /* Move past the matched bytes. */
 708 skip:           offset += match[0].rm_eo;
 709                 len -= match[0].rm_eo;
 710
 711                 /* A match cannot be followed by an empty pattern. */
 712                 empty_ok = 0;
 713
 714                 /*
 715                  * If doing a global change with confirmation, we have to
 716                  * update the screen.  The basic idea is to store the line
 717                  * so the screen update routines can find it, and restart.
 718                  */
 719                 if (didsub && sp->c_suffix && sp->g_suffix) {
 720                         /*
 721                          * The new search offset will be the end of the
 722                          * modified line.
 723                          */
 724                         saved_offset = lbclen;
 725
 726                         /* Copy the rest of the line. */
 727                         if (len)
 728                                 BUILD(sp, st + offset, len)
 729
 730                         /* Set the new offset. */
 731                         offset = saved_offset;
 732
 733                         /* Store inserted lines, adjusting the build buffer. */
 734                         last = 0;
 735                         if (sp->newl_cnt) {
 736                                 for (cnt = 0;
 737                                     cnt < sp->newl_cnt; ++cnt, ++lno, ++elno) {
 738                                         if (db_insert(sp, lno,
 739                                             lb + last, sp->newl[cnt] - last))
 740                                                 goto err;
 741                                         last = sp->newl[cnt] + 1;
 742                                         ++sp->rptlines[L_ADDED];
 743                                 }
 744                                 lbclen -= last;
 745                                 offset -= last;
 746                                 sp->newl_cnt = 0;
 747                         }
 748
 749                         /* Store and retrieve the line. */
 750                         if (db_set(sp, lno, lb + last, lbclen))
 751                                 goto err;
 752                         if (db_get(sp, lno, DBG_FATAL, &st, &llen))
 753                                 goto err;
 754                         ADD_SPACE_RETW(sp, bp, blen, llen)
 755                         MEMCPYW(bp, st, llen);
 756                         st = bp;
 757                         len = llen - offset;
 758
 759                         /* Restart the build. */
 760                         lbclen = 0;
 761                         BUILD(sp, st, offset);
 762
 763                         /*
 764                          * If we haven't already done the after-the-string
 765                          * match, do one.  Set REG_NOTEOL so the '$' pattern
 766                          * only matches once.
 767                          */
 768                         if (!do_eol_match)
 769                                 goto endmatch;
 770                         if (offset == len) {
 771                                 do_eol_match = 0;
 772                                 eflags |= REG_NOTEOL;
 773                         }
 774                         goto nextmatch;
 775                 }
 776
 777                 /*
 778                  * If it's a global:
 779                  *
 780                  * If at the end of the string, do a test for the after
 781                  * the string match.  Set REG_NOTEOL so the '$' pattern
 782                  * only matches once.
 783                  */
 784                 if (sp->g_suffix && do_eol_match) {
 785                         if (len == 0) {
 786                                 do_eol_match = 0;
 787                                 eflags |= REG_NOTEOL;
 788                         }
 789                         goto nextmatch;
 790                 }
 791
 792 endmatch:       if (!linechanged)
 793                         continue;
 794
 795                 /* Copy any remaining bytes into the build buffer. */
 796                 if (len)
 797                         BUILD(sp, st + offset, len)
 798
 799                 /* Store inserted lines, adjusting the build buffer. */
 800                 last = 0;
 801                 if (sp->newl_cnt) {
 802                         for (cnt = 0;
 803                             cnt < sp->newl_cnt; ++cnt, ++lno, ++elno) {
 804                                 if (db_insert(sp,
 805                                     lno, lb + last, sp->newl[cnt] - last))
 806                                         goto err;
 807                                 last = sp->newl[cnt] + 1;
 808                                 ++sp->rptlines[L_ADDED];
 809                         }
 810                         lbclen -= last;
 811                         sp->newl_cnt = 0;
 812                 }
 813
 814                 /* Store the changed line. */
 815                 if (db_set(sp, lno, lb + last, lbclen))
 816                         goto err;
 817
 818                 /* Update changed line counter. */
 819                 if (sp->rptlchange != lno) {
 820                         sp->rptlchange = lno;
 821                         ++sp->rptlines[L_CHANGED];
 822                 }
 823
 824                 /*
 825                  * !!!
 826                  * Display as necessary.  Historic practice is to only
 827                  * display the last line of a line split into multiple
 828                  * lines.
 829                  */
 830                 if (lflag || nflag || pflag) {
 831                         from.lno = to.lno = lno;
 832                         from.cno = to.cno = 0;
 833                         if (lflag)
 834                                 (void)ex_print(sp, cmdp, &from, &to, E_C_LIST);
 835                         if (nflag)
 836                                 (void)ex_print(sp, cmdp, &from, &to, E_C_HASH);
 837                         if (pflag)
 838                                 (void)ex_print(sp, cmdp, &from, &to, E_C_PRINT);
 839                 }
 840         }
 841
 842         /*
 843          * !!!
 844          * Historically, vi attempted to leave the cursor at the same place if
 845          * the substitution was done at the current cursor position.  Otherwise
 846          * it moved it to the first non-blank of the last line changed.  There
 847          * were some problems: for example, :s/$/foo/ with the cursor on the
 848          * last character of the line left the cursor on the last character, or
 849          * the & command with multiple occurrences of the matching string in the
 850          * line usually left the cursor in a fairly random position.
 851          *
 852          * We try to do the same thing, with the exception that if the user is
 853          * doing substitution with confirmation, we move to the last line about
 854          * which the user was consulted, as opposed to the last line that they
 855          * actually changed.  This prevents a screen flash if the user doesn't
 856          * change many of the possible lines.
 857          */
 858         if (!sp->c_suffix && (sp->lno != slno || sp->cno != scno)) {
 859                 sp->cno = 0;
 860                 (void)nonblank(sp, sp->lno, &sp->cno);
 861         }
 862
 863         /*
 864          * If not in a global command, and nothing matched, say so.
 865          * Else, if none of the lines displayed, put something up.
 866          */
 867         rval = 0;
 868         if (!matched) {
 869                 if (!F_ISSET(sp, SC_EX_GLOBAL)) {
 870                         msgq(sp, M_ERR, "157|No match found");
 871                         goto err;
 872                 }
 873         } else if (!lflag && !nflag && !pflag)
 874                 F_SET(cmdp, E_AUTOPRINT);
 875
 876         if (0) {
 877 err:            rval = 1;
 878         }
 879
 880         if (bp != NULL)
 881                 FREE_SPACEW(sp, bp, blen);
 882         if (lb != NULL)
 883                 free(lb);
 884         return (rval);
 885 }
 886
 887 /*
 888  * re_compile --
 889  *      Compile the RE.
 890  *
 891  * PUBLIC: int re_compile __P((SCR *,
 892  * PUBLIC:     CHAR_T *, size_t, CHAR_T **, size_t *, regex_t *, u_int));
 893  */
 894 int
 895 re_compile(SCR *sp, CHAR_T *ptrn, size_t plen, CHAR_T **ptrnp, size_t *lenp, regex_t *rep, u_int flags)
 896 {
 897         size_t len;
 898         int reflags, replaced, rval;
 899         CHAR_T *p;
 900
 901         /* Set RE flags. */
 902         reflags = 0;
 903         if (LF_ISSET(SEARCH_EXTEND))
 904                 reflags |= REG_EXTENDED;
 905         if (LF_ISSET(SEARCH_IC))
 906                 reflags |= REG_ICASE;
 907         if (LF_ISSET(SEARCH_LITERAL))
 908                 reflags |= REG_NOSPEC;
 909         if (!LF_ISSET(SEARCH_NOOPT | SEARCH_CSCOPE | SEARCH_TAG)) {
 910                 if (O_ISSET(sp, O_EXTENDED))
 911                         reflags |= REG_EXTENDED;
 912                 if (O_ISSET(sp, O_IGNORECASE))
 913                         reflags |= REG_ICASE;
 914                 if (O_ISSET(sp, O_ICLOWER))
 915                         goto iclower;
 916         }
 917         if (LF_ISSET(SEARCH_ICL)) {
 918 iclower:        for (p = ptrn, len = plen; len > 0; ++p, --len)
 919                         if (ISUPPER((UCHAR_T)*p))
 920                                 break;
 921                 if (len == 0)
 922                         reflags |= REG_ICASE;
 923         }
 924
 925         /* If we're replacing a saved value, clear the old one. */
 926         if (LF_ISSET(SEARCH_CSEARCH) && F_ISSET(sp, SC_RE_SEARCH)) {
 927                 regfree(&sp->re_c);
 928                 F_CLR(sp, SC_RE_SEARCH);
 929         }
 930         if (LF_ISSET(SEARCH_CSUBST) && F_ISSET(sp, SC_RE_SUBST)) {
 931                 regfree(&sp->subre_c);
 932                 F_CLR(sp, SC_RE_SUBST);
 933         }
 934
 935         /*
 936          * If we're saving the string, it's a pattern we haven't seen before,
 937          * so convert the vi-style RE's to POSIX 1003.2 RE's.  Save a copy for
 938          * later recompilation.   Free any previously saved value.
 939          */
 940         if (ptrnp != NULL) {
 941                 replaced = 0;
 942                 if (LF_ISSET(SEARCH_CSCOPE)) {
 943                         if (re_cscope_conv(sp, &ptrn, &plen, &replaced))
 944                                 return (1);
 945                         /*
 946                          * XXX
 947                          * Currently, the match-any-<blank> expression used in
 948                          * re_cscope_conv() requires extended RE's.  This may
 949                          * not be right or safe.
 950                          */
 951                         reflags |= REG_EXTENDED;
 952                 } else if (LF_ISSET(SEARCH_TAG)) {
 953                         if (re_tag_conv(sp, &ptrn, &plen, &replaced))
 954                                 return (1);
 955                 } else if (!LF_ISSET(SEARCH_LITERAL))
 956                         if (re_conv(sp, &ptrn, &plen, &replaced))
 957                                 return (1);
 958
 959                 /* Discard previous pattern. */
 960                 if (*ptrnp != NULL) {
 961                         free(*ptrnp);
 962                         *ptrnp = NULL;
 963                 }
 964                 if (lenp != NULL)
 965                         *lenp = plen;
 966
 967                 /*
 968                  * Copy the string into allocated memory.
 969                  *
 970                  * XXX
 971                  * Regcomp isn't 8-bit clean, so the pattern is nul-terminated
 972                  * for now.  There's just no other solution.
 973                  */
 974                 MALLOC(sp, *ptrnp, CHAR_T *, (plen + 1) * sizeof(CHAR_T));
 975                 if (*ptrnp != NULL) {
 976                         MEMCPYW(*ptrnp, ptrn, plen);
 977                         (*ptrnp)[plen] = '\0';
 978                 }
 979
 980                 /* Free up conversion-routine-allocated memory. */
 981                 if (replaced)
 982                         FREE_SPACEW(sp, ptrn, 0);
 983
 984                 if (*ptrnp == NULL)
 985                         return (1);
 986
 987                 ptrn = *ptrnp;
 988         }
 989
 990         /*
 991          * XXX
 992          * Regcomp isn't 8-bit clean, so we just lost if the pattern
 993          * contained a nul.  Bummer!
 994          */
 995         if ((rval = regcomp(rep, ptrn, /* plen, */ reflags)) != 0) {
 996                 if (LF_ISSET(SEARCH_MSG))
 997                         re_error(sp, rval, rep);
 998                 return (1);
 999         }
1000
1001         if (LF_ISSET(SEARCH_CSEARCH))
1002                 F_SET(sp, SC_RE_SEARCH);
1003         if (LF_ISSET(SEARCH_CSUBST))
1004                 F_SET(sp, SC_RE_SUBST);
1005
1006         return (0);
1007 }
1008
1009 /*
1010  * re_conv --
1011  *      Convert vi's regular expressions into something that the
1012  *      the POSIX 1003.2 RE functions can handle.
1013  *
1014  * There are three conversions we make to make vi's RE's (specifically
1015  * the global, search, and substitute patterns) work with POSIX RE's.
1016  *
1017  * 1: If O_MAGIC is not set, strip backslashes from the magic character
1018  *    set (.[*~) that have them, and add them to the ones that don't.
1019  * 2: If O_MAGIC is not set, the string "\~" is replaced with the text
1020  *    from the last substitute command's replacement string.  If O_MAGIC
1021  *    is set, it's the string "~".
1022  * 3: The pattern \<ptrn\> does "word" searches, convert it to use the
1023  *    new RE escapes.
1024  *
1025  * !!!/XXX
1026  * This doesn't exactly match the historic behavior of vi because we do
1027  * the ~ substitution before calling the RE engine, so magic characters
1028  * in the replacement string will be expanded by the RE engine, and they
1029  * weren't historically.  It's a bug.
1030  */
1031 static int
1032 re_conv(SCR *sp, CHAR_T **ptrnp, size_t *plenp, int *replacedp)
1033 {
1034         size_t blen, len, needlen;
1035         int magic;
1036         CHAR_T *bp, *p, *t;
1037
1038         /*
1039          * First pass through, we figure out how much space we'll need.
1040          * We do it in two passes, on the grounds that most of the time
1041          * the user is doing a search and won't have magic characters.
1042          * That way we can skip most of the memory allocation and copies.
1043          */
1044         magic = 0;
1045         for (p = *ptrnp, len = *plenp, needlen = 0; len > 0; ++p, --len)
1046                 switch (*p) {
1047                 case '\\':
1048                         if (len > 1) {
1049                                 --len;
1050                                 switch (*++p) {
1051                                 case '<':
1052                                         magic = 1;
1053                                         needlen += RE_WSTART_LEN + 1;
1054                                         break;
1055                                 case '>':
1056                                         magic = 1;
1057                                         needlen += RE_WSTOP_LEN + 1;
1058                                         break;
1059                                 case '~':
1060                                         if (!O_ISSET(sp, O_MAGIC)) {
1061                                                 magic = 1;
1062                                                 needlen += sp->repl_len;
1063                                         }
1064                                         break;
1065                                 case '.':
1066                                 case '[':
1067                                 case '*':
1068                                         if (!O_ISSET(sp, O_MAGIC)) {
1069                                                 magic = 1;
1070                                                 needlen += 1;
1071                                         }
1072                                         break;
1073                                 default:
1074                                         needlen += 2;
1075                                 }
1076                         } else
1077                                 needlen += 1;
1078                         break;
1079                 case '~':
1080                         if (O_ISSET(sp, O_MAGIC)) {
1081                                 magic = 1;
1082                                 needlen += sp->repl_len;
1083                         }
1084                         break;
1085                 case '.':
1086                 case '[':
1087                 case '*':
1088                         if (!O_ISSET(sp, O_MAGIC)) {
1089                                 magic = 1;
1090                                 needlen += 2;
1091                         }
1092                         break;
1093                 default:
1094                         needlen += 1;
1095                         break;
1096                 }
1097
1098         if (!magic) {
1099                 *replacedp = 0;
1100                 return (0);
1101         }
1102
1103         /* Get enough memory to hold the final pattern. */
1104         *replacedp = 1;
1105         GET_SPACE_RETW(sp, bp, blen, needlen);
1106
1107         for (p = *ptrnp, len = *plenp, t = bp; len > 0; ++p, --len)
1108                 switch (*p) {
1109                 case '\\':
1110                         if (len > 1) {
1111                                 --len;
1112                                 switch (*++p) {
1113                                 case '<':
1114                                         MEMCPY(t,
1115                                             RE_WSTART, RE_WSTART_LEN);
1116                                         t += RE_WSTART_LEN;
1117                                         break;
1118                                 case '>':
1119                                         MEMCPY(t,
1120                                             RE_WSTOP, RE_WSTOP_LEN);
1121                                         t += RE_WSTOP_LEN;
1122                                         break;
1123                                 case '~':
1124                                         if (O_ISSET(sp, O_MAGIC))
1125                                                 *t++ = '~';
1126                                         else {
1127                                                 MEMCPYW(t,
1128                                                     sp->repl, sp->repl_len);
1129                                                 t += sp->repl_len;
1130                                         }
1131                                         break;
1132                                 case '.':
1133                                 case '[':
1134                                 case '*':
1135                                         if (O_ISSET(sp, O_MAGIC))
1136                                                 *t++ = '\\';
1137                                         *t++ = *p;
1138                                         break;
1139                                 default:
1140                                         *t++ = '\\';
1141                                         *t++ = *p;
1142                                 }
1143                         } else
1144                                 *t++ = '\\';
1145                         break;
1146                 case '~':
1147                         if (O_ISSET(sp, O_MAGIC)) {
1148                                 MEMCPYW(t, sp->repl, sp->repl_len);
1149                                 t += sp->repl_len;
1150                         } else
1151                                 *t++ = '~';
1152                         break;
1153                 case '.':
1154                 case '[':
1155                 case '*':
1156                         if (!O_ISSET(sp, O_MAGIC))
1157                                 *t++ = '\\';
1158                         *t++ = *p;
1159                         break;
1160                 default:
1161                         *t++ = *p;
1162                         break;
1163                 }
1164
1165         *ptrnp = bp;
1166         *plenp = t - bp;
1167         return (0);
1168 }
1169
1170 /*
1171  * re_tag_conv --
1172  *      Convert a tags search path into something that the POSIX
1173  *      1003.2 RE functions can handle.
1174  */
1175 static int
1176 re_tag_conv(SCR *sp, CHAR_T **ptrnp, size_t *plenp, int *replacedp)
1177 {
1178         size_t blen, len;
1179         int lastdollar;
1180         CHAR_T *bp, *p, *t;
1181
1182         len = *plenp;
1183
1184         /* Max memory usage is 2 times the length of the string. */
1185         *replacedp = 1;
1186         GET_SPACE_RETW(sp, bp, blen, len * 2);
1187
1188         p = *ptrnp;
1189         t = bp;
1190
1191         /* If the last character is a '/' or '?', we just strip it. */
1192         if (len > 0 && (p[len - 1] == '/' || p[len - 1] == '?'))
1193                 --len;
1194
1195         /* If the next-to-last or last character is a '$', it's magic. */
1196         if (len > 0 && p[len - 1] == '$') {
1197                 --len;
1198                 lastdollar = 1;
1199         } else
1200                 lastdollar = 0;
1201
1202         /* If the first character is a '/' or '?', we just strip it. */
1203         if (len > 0 && (p[0] == '/' || p[0] == '?')) {
1204                 ++p;
1205                 --len;
1206         }
1207
1208         /* If the first or second character is a '^', it's magic. */
1209         if (p[0] == '^') {
1210                 *t++ = *p++;
1211                 --len;
1212         }
1213
1214         /*
1215          * Escape every other magic character we can find, meanwhile stripping
1216          * the backslashes ctags inserts when escaping the search delimiter
1217          * characters.
1218          */
1219         for (; len > 0; --len) {
1220                 if (p[0] == '\\' && (p[1] == '/' || p[1] == '?')) {
1221                         ++p;
1222                         --len;
1223                 } else if (strchr("^.[]$*", p[0]))
1224                         *t++ = '\\';
1225                 *t++ = *p++;
1226         }
1227         if (lastdollar)
1228                 *t++ = '$';
1229
1230         *ptrnp = bp;
1231         *plenp = t - bp;
1232         return (0);
1233 }
1234
1235 /*
1236  * re_cscope_conv --
1237  *       Convert a cscope search path into something that the POSIX
1238  *      1003.2 RE functions can handle.
1239  */
1240 static int
1241 re_cscope_conv(SCR *sp, CHAR_T **ptrnp, size_t *plenp, int *replacedp)
1242 {
1243         size_t blen, len, nspaces;
1244         CHAR_T *bp, *t;
1245         CHAR_T *p;
1246         const CHAR_T *wp;
1247         size_t wlen;
1248
1249         /*
1250          * Each space in the source line printed by cscope represents an
1251          * arbitrary sequence of spaces, tabs, and comments.
1252          */
1253 #define CSCOPE_RE_SPACE         "([ \t]|/\\*([^*]|\\*/)*\\*/)*"
1254 #define CSCOPE_LEN      sizeof(CSCOPE_RE_SPACE) - 1
1255         CHAR2INT(sp, CSCOPE_RE_SPACE, CSCOPE_LEN, wp, wlen);
1256         for (nspaces = 0, p = *ptrnp, len = *plenp; len > 0; ++p, --len)
1257                 if (*p == ' ')
1258                         ++nspaces;
1259
1260         /*
1261          * Allocate plenty of space:
1262          *      the string, plus potential escaping characters;
1263          *      nspaces + 2 copies of CSCOPE_RE_SPACE;
1264          *      ^, $, nul terminator characters.
1265          */
1266         *replacedp = 1;
1267         len = (p - *ptrnp) * 2 + (nspaces + 2) * sizeof(CSCOPE_RE_SPACE) + 3;
1268         GET_SPACE_RETW(sp, bp, blen, len);
1269
1270         p = *ptrnp;
1271         t = bp;
1272
1273         *t++ = '^';
1274         MEMCPYW(t, wp, wlen);
1275         t += wlen;
1276
1277         for (len = *plenp; len > 0; ++p, --len)
1278                 if (*p == ' ') {
1279                         MEMCPYW(t, wp, wlen);
1280                         t += wlen;
1281                 } else {
1282                         if (strchr("\\^.[]$*+?()|{}", *p))
1283                                 *t++ = '\\';
1284                         *t++ = *p;
1285                 }
1286
1287         MEMCPYW(t, wp, wlen);
1288         t += wlen;
1289         *t++ = '$';
1290
1291         *ptrnp = bp;
1292         *plenp = t - bp;
1293         return (0);
1294 }
1295
1296 /*
1297  * re_error --
1298  *      Report a regular expression error.
1299  *
1300  * PUBLIC: void re_error __P((SCR *, int, regex_t *));
1301  */
1302 void
1303 re_error(SCR *sp, int errcode, regex_t *preg)
1304 {
1305         size_t sz;
1306         char *oe;
1307
1308         sz = regerror(errcode, preg, NULL, 0);
1309         if ((oe = malloc(sz)) == NULL)
1310                 msgq(sp, M_SYSERR, NULL);
1311         else {
1312                 (void)regerror(errcode, preg, oe, sz);
1313                 msgq(sp, M_ERR, "RE error: %s", oe);
1314                 free(oe);
1315         }
1316 }
1317
1318 /*
1319  * re_sub --
1320  *      Do the substitution for a regular expression.
1321  */
1322 static int
1323 re_sub(SCR *sp, CHAR_T *ip, CHAR_T **lbp, size_t *lbclenp, size_t *lblenp, regmatch_t *match)
1324
1325                                         /* Input line. */
1326
1327
1328
1329 {
1330         enum { C_NOT_SET, C_LOWER, C_ONE_LOWER, C_ONE_UPPER, C_UPPER } conv;
1331         size_t lbclen, lblen;           /* Local copies. */
1332         size_t mlen;                    /* Match length. */
1333         size_t rpl;                     /* Remaining replacement length. */
1334         CHAR_T *rp;                     /* Replacement pointer. */
1335         int ch;
1336         int no;                         /* Match replacement offset. */
1337         CHAR_T *p, *t;                  /* Buffer pointers. */
1338         CHAR_T *lb;                     /* Local copies. */
1339
1340         lb = *lbp;                      /* Get local copies. */
1341         lbclen = *lbclenp;
1342         lblen = *lblenp;
1343
1344         /*
1345          * QUOTING NOTE:
1346          *
1347          * There are some special sequences that vi provides in the
1348          * replacement patterns.
1349          *       & string the RE matched (\& if nomagic set)
1350          *      \# n-th regular subexpression
1351          *      \E end \U, \L conversion
1352          *      \e end \U, \L conversion
1353          *      \l convert the next character to lower-case
1354          *      \L convert to lower-case, until \E, \e, or end of replacement
1355          *      \u convert the next character to upper-case
1356          *      \U convert to upper-case, until \E, \e, or end of replacement
1357          *
1358          * Otherwise, since this is the lowest level of replacement, discard
1359          * all escaping characters.  This (hopefully) matches historic practice.
1360          */
1361 #define OUTCH(ch, nltrans) {                                            \
1362         ARG_CHAR_T __ch = (ch);                                         \
1363         e_key_t __value = KEY_VAL(sp, __ch);                            \
1364         if (nltrans && (__value == K_CR || __value == K_NL)) {          \
1365                 NEEDNEWLINE(sp);                                        \
1366                 sp->newl[sp->newl_cnt++] = lbclen;                      \
1367         } else if (conv != C_NOT_SET) {                                 \
1368                 switch (conv) {                                         \
1369                 case C_ONE_LOWER:                                       \
1370                         conv = C_NOT_SET;                               \
1371                         /* FALLTHROUGH */                               \
1372                 case C_LOWER:                                           \
1373                         if (ISUPPER(__ch))                              \
1374                                 __ch = TOLOWER(__ch);                   \
1375                         break;                                          \
1376                 case C_ONE_UPPER:                                       \
1377                         conv = C_NOT_SET;                               \
1378                         /* FALLTHROUGH */                               \
1379                 case C_UPPER:                                           \
1380                         if (ISLOWER(__ch))                              \
1381                                 __ch = TOUPPER(__ch);                   \
1382                         break;                                          \
1383                 default:                                                \
1384                         abort();                                        \
1385                 }                                                       \
1386         }                                                               \
1387         NEEDSP(sp, 1, p);                                               \
1388         *p++ = __ch;                                                    \
1389         ++lbclen;                                                       \
1390 }
1391         conv = C_NOT_SET;
1392         for (rp = sp->repl, rpl = sp->repl_len, p = lb + lbclen; rpl--;) {
1393                 switch (ch = *rp++) {
1394                 case '&':
1395                         if (O_ISSET(sp, O_MAGIC)) {
1396                                 no = 0;
1397                                 goto subzero;
1398                         }
1399                         break;
1400                 case '\\':
1401                         if (rpl == 0)
1402                                 break;
1403                         --rpl;
1404                         switch (ch = *rp) {
1405                         case '&':
1406                                 ++rp;
1407                                 if (!O_ISSET(sp, O_MAGIC)) {
1408                                         no = 0;
1409                                         goto subzero;
1410                                 }
1411                                 break;
1412                         case '0': case '1': case '2': case '3': case '4':
1413                         case '5': case '6': case '7': case '8': case '9':
1414                                 no = *rp++ - '0';
1415 subzero:                        if (match[no].rm_so == -1 ||
1416                                     match[no].rm_eo == -1)
1417                                         break;
1418                                 mlen = match[no].rm_eo - match[no].rm_so;
1419                                 for (t = ip + match[no].rm_so; mlen--; ++t)
1420                                         OUTCH((UCHAR_T)*t, 0);
1421                                 continue;
1422                         case 'e':
1423                         case 'E':
1424                                 ++rp;
1425                                 conv = C_NOT_SET;
1426                                 continue;
1427                         case 'l':
1428                                 ++rp;
1429                                 conv = C_ONE_LOWER;
1430                                 continue;
1431                         case 'L':
1432                                 ++rp;
1433                                 conv = C_LOWER;
1434                                 continue;
1435                         case 'u':
1436                                 ++rp;
1437                                 conv = C_ONE_UPPER;
1438                                 continue;
1439                         case 'U':
1440                                 ++rp;
1441                                 conv = C_UPPER;
1442                                 continue;
1443                         default:
1444                                 ++rp;
1445                                 break;
1446                         }
1447                 }
1448                 OUTCH(ch, 1);
1449         }
1450
1451         *lbp = lb;                      /* Update caller's information. */
1452         *lbclenp = lbclen;
1453         *lblenp = lblen;
1454         return (0);
1455 }