tools/llvm: Do not build with symbols
[minix3.git] / external / bsd / nvi / dist / ex / ex_subst.c
blobd02326b25ae4e857c69887fe505cb3fb3b7862ae
1 /* $NetBSD: ex_subst.c,v 1.3 2013/11/25 22:43:46 christos Exp $ */
2 /*-
3 * Copyright (c) 1992, 1993, 1994
4 * The Regents of the University of California. All rights reserved.
5 * Copyright (c) 1992, 1993, 1994, 1995, 1996
6 * Keith Bostic. All rights reserved.
8 * See the LICENSE file for redistribution information.
9 */
11 #include "config.h"
13 #ifndef lint
14 static const char sccsid[] = "Id: ex_subst.c,v 10.50 2002/02/09 21:18:23 skimo Exp (Berkeley) Date: 2002/02/09 21:18:23 ";
15 #endif /* not lint */
17 #include <sys/types.h>
18 #include <sys/queue.h>
19 #include <sys/time.h>
21 #include <bitstring.h>
22 #include <ctype.h>
23 #include <errno.h>
24 #include <limits.h>
25 #include <stdio.h>
26 #include <stdlib.h>
27 #include <string.h>
28 #include <unistd.h>
30 #include "../common/common.h"
31 #include "../vi/vi.h"
33 #define SUB_FIRST 0x01 /* The 'r' flag isn't reasonable. */
34 #define SUB_MUSTSETR 0x02 /* The 'r' flag is required. */
36 static int re_conv __P((SCR *, CHAR_T **, size_t *, int *));
37 static int re_cscope_conv __P((SCR *, CHAR_T **, size_t *, int *));
38 static int re_sub __P((SCR *,
39 CHAR_T *, CHAR_T **, size_t *, size_t *, regmatch_t [10]));
40 static int re_tag_conv __P((SCR *, CHAR_T **, size_t *, int *));
41 static int s __P((SCR *, EXCMD *, CHAR_T *, regex_t *, u_int));
44 * ex_s --
45 * [line [,line]] s[ubstitute] [[/;]pat[/;]/repl[/;] [cgr] [count] [#lp]]
47 * Substitute on lines matching a pattern.
49 * PUBLIC: int ex_s __P((SCR *, EXCMD *));
51 int
52 ex_s(SCR *sp, EXCMD *cmdp)
54 regex_t *re;
55 size_t blen, len;
56 u_int flags;
57 ARG_CHAR_T delim;
58 CHAR_T *bp, *p, *ptrn, *rep, *t;
61 * Skip leading white space.
63 * !!!
64 * Historic vi allowed any non-alphanumeric to serve as the
65 * substitution command delimiter.
67 * !!!
68 * If the arguments are empty, it's the same as &, i.e. we
69 * repeat the last substitution.
71 if (cmdp->argc == 0)
72 goto subagain;
73 for (p = cmdp->argv[0]->bp,
74 len = cmdp->argv[0]->len; len > 0; --len, ++p) {
75 if (!ISBLANK((UCHAR_T)*p))
76 break;
78 if (len == 0)
79 subagain: return (ex_subagain(sp, cmdp));
81 delim = (UCHAR_T)*p++;
82 if (ISALNUM(delim) || delim == '\\')
83 return (s(sp, cmdp, p, &sp->subre_c, SUB_MUSTSETR));
86 * !!!
87 * The full-blown substitute command reset the remembered
88 * state of the 'c' and 'g' suffices.
90 sp->c_suffix = sp->g_suffix = 0;
93 * Get the pattern string, toss escaping characters.
95 * !!!
96 * Historic vi accepted any of the following forms:
98 * :s/abc/def/ change "abc" to "def"
99 * :s/abc/def change "abc" to "def"
100 * :s/abc/ delete "abc"
101 * :s/abc delete "abc"
103 * QUOTING NOTE:
105 * Only toss an escaping character if it escapes a delimiter.
106 * This means that "s/A/\\\\f" replaces "A" with "\\f". It
107 * would be nice to be more regular, i.e. for each layer of
108 * escaping a single escaping character is removed, but that's
109 * not how the historic vi worked.
111 for (ptrn = t = p;;) {
112 if (p[0] == '\0' || p[0] == delim) {
113 if (p[0] == delim)
114 ++p;
116 * !!!
117 * Nul terminate the pattern string -- it's passed
118 * to regcomp which doesn't understand anything else.
120 *t = '\0';
121 break;
123 if (p[0] == '\\') {
124 if (p[1] == delim)
125 ++p;
126 else if (p[1] == '\\')
127 *t++ = *p++;
129 *t++ = *p++;
133 * If the pattern string is empty, use the last RE (not just the
134 * last substitution RE).
136 if (*ptrn == '\0') {
137 if (sp->re == NULL) {
138 ex_emsg(sp, NULL, EXM_NOPREVRE);
139 return (1);
142 /* Re-compile the RE if necessary. */
143 if (!F_ISSET(sp, SC_RE_SEARCH) &&
144 re_compile(sp, sp->re, sp->re_len,
145 NULL, NULL, &sp->re_c, SEARCH_CSEARCH | SEARCH_MSG))
146 return (1);
147 flags = 0;
148 } else {
150 * !!!
151 * Compile the RE. Historic practice is that substitutes set
152 * the search direction as well as both substitute and search
153 * RE's. We compile the RE twice, as we don't want to bother
154 * ref counting the pattern string and (opaque) structure.
156 if (re_compile(sp, ptrn, t - ptrn, &sp->re,
157 &sp->re_len, &sp->re_c, SEARCH_CSEARCH | SEARCH_MSG))
158 return (1);
159 if (re_compile(sp, ptrn, t - ptrn, &sp->subre,
160 &sp->subre_len, &sp->subre_c, SEARCH_CSUBST | SEARCH_MSG))
161 return (1);
163 flags = SUB_FIRST;
164 sp->searchdir = FORWARD;
166 re = &sp->re_c;
169 * Get the replacement string.
171 * The special character & (\& if O_MAGIC not set) matches the
172 * entire RE. No handling of & is required here, it's done by
173 * re_sub().
175 * The special character ~ (\~ if O_MAGIC not set) inserts the
176 * previous replacement string into this replacement string.
177 * Count ~'s to figure out how much space we need. We could
178 * special case nonexistent last patterns or whether or not
179 * O_MAGIC is set, but it's probably not worth the effort.
181 * QUOTING NOTE:
183 * Only toss an escaping character if it escapes a delimiter or
184 * if O_MAGIC is set and it escapes a tilde.
186 * !!!
187 * If the entire replacement pattern is "%", then use the last
188 * replacement pattern. This semantic was added to vi in System
189 * V and then percolated elsewhere, presumably around the time
190 * that it was added to their version of ed(1).
192 if (p[0] == L('\0') || p[0] == delim) {
193 if (p[0] == delim)
194 ++p;
195 if (sp->repl != NULL)
196 free(sp->repl);
197 sp->repl = NULL;
198 sp->repl_len = 0;
199 } else if (p[0] == L('%') && (p[1] == L('\0') || p[1] == delim))
200 p += p[1] == delim ? 2 : 1;
201 else {
202 for (rep = p, len = 0;
203 p[0] != L('\0') && p[0] != delim; ++p, ++len)
204 if (p[0] == L('~'))
205 len += sp->repl_len;
206 GET_SPACE_RETW(sp, bp, blen, len);
207 for (t = bp, len = 0, p = rep;;) {
208 if (p[0] == L('\0') || p[0] == delim) {
209 if (p[0] == delim)
210 ++p;
211 break;
213 if (p[0] == L('\\')) {
214 if (p[1] == delim)
215 ++p;
216 else if (p[1] == L('\\')) {
217 *t++ = *p++;
218 ++len;
219 } else if (p[1] == L('~')) {
220 ++p;
221 if (!O_ISSET(sp, O_MAGIC))
222 goto tilde;
224 } else if (p[0] == L('~') && O_ISSET(sp, O_MAGIC)) {
225 tilde: ++p;
226 MEMCPYW(t, sp->repl, sp->repl_len);
227 t += sp->repl_len;
228 len += sp->repl_len;
229 continue;
231 *t++ = *p++;
232 ++len;
234 if ((sp->repl_len = len) != 0) {
235 if (sp->repl != NULL)
236 free(sp->repl);
237 if ((sp->repl = malloc(len * sizeof(CHAR_T))) == NULL) {
238 msgq(sp, M_SYSERR, NULL);
239 FREE_SPACEW(sp, bp, blen);
240 return (1);
242 MEMCPYW(sp->repl, bp, len);
244 FREE_SPACEW(sp, bp, blen);
246 return (s(sp, cmdp, p, re, flags));
250 * ex_subagain --
251 * [line [,line]] & [cgr] [count] [#lp]]
253 * Substitute using the last substitute RE and replacement pattern.
255 * PUBLIC: int ex_subagain __P((SCR *, EXCMD *));
258 ex_subagain(SCR *sp, EXCMD *cmdp)
260 if (sp->subre == NULL) {
261 ex_emsg(sp, NULL, EXM_NOPREVRE);
262 return (1);
264 if (!F_ISSET(sp, SC_RE_SUBST) &&
265 re_compile(sp, sp->subre, sp->subre_len,
266 NULL, NULL, &sp->subre_c, SEARCH_CSUBST | SEARCH_MSG))
267 return (1);
268 return (s(sp,
269 cmdp, cmdp->argc ? cmdp->argv[0]->bp : NULL, &sp->subre_c, 0));
273 * ex_subtilde --
274 * [line [,line]] ~ [cgr] [count] [#lp]]
276 * Substitute using the last RE and last substitute replacement pattern.
278 * PUBLIC: int ex_subtilde __P((SCR *, EXCMD *));
281 ex_subtilde(SCR *sp, EXCMD *cmdp)
283 if (sp->re == NULL) {
284 ex_emsg(sp, NULL, EXM_NOPREVRE);
285 return (1);
287 if (!F_ISSET(sp, SC_RE_SEARCH) && re_compile(sp, sp->re,
288 sp->re_len, NULL, NULL, &sp->re_c, SEARCH_CSEARCH | SEARCH_MSG))
289 return (1);
290 return (s(sp,
291 cmdp, cmdp->argc ? cmdp->argv[0]->bp : NULL, &sp->re_c, 0));
295 * s --
296 * Do the substitution. This stuff is *really* tricky. There are lots of
297 * special cases, and general nastiness. Don't mess with it unless you're
298 * pretty confident.
300 * The nasty part of the substitution is what happens when the replacement
301 * string contains newlines. It's a bit tricky -- consider the information
302 * that has to be retained for "s/f\(o\)o/^M\1^M\1/". The solution here is
303 * to build a set of newline offsets which we use to break the line up later,
304 * when the replacement is done. Don't change it unless you're *damned*
305 * confident.
307 #define NEEDNEWLINE(sp) { \
308 if (sp->newl_len == sp->newl_cnt) { \
309 sp->newl_len += 25; \
310 REALLOC(sp, sp->newl, size_t *, \
311 sp->newl_len * sizeof(size_t)); \
312 if (sp->newl == NULL) { \
313 sp->newl_len = 0; \
314 return (1); \
319 #define BUILD(sp, l, len) { \
320 if (lbclen + (len) > lblen) { \
321 lblen += MAX(lbclen + (len), 256); \
322 REALLOC(sp, lb, CHAR_T *, lblen * sizeof(CHAR_T)); \
323 if (lb == NULL) { \
324 lbclen = 0; \
325 return (1); \
328 MEMCPYW(lb + lbclen, l, len); \
329 lbclen += len; \
332 #define NEEDSP(sp, len, pnt) { \
333 if (lbclen + (len) > lblen) { \
334 lblen += MAX(lbclen + (len), 256); \
335 REALLOC(sp, lb, CHAR_T *, lblen * sizeof(CHAR_T)); \
336 if (lb == NULL) { \
337 lbclen = 0; \
338 return (1); \
340 pnt = lb + lbclen; \
344 static int
345 s(SCR *sp, EXCMD *cmdp, CHAR_T *st, regex_t *re, u_int flags)
347 EVENT ev;
348 MARK from, to;
349 TEXTH tiq;
350 db_recno_t elno, lno, slno;
351 u_long ul;
352 regmatch_t match[10];
353 size_t blen, cnt, last, lbclen, lblen, len, llen;
354 size_t offset, saved_offset, scno;
355 int lflag, nflag, pflag, rflag;
356 int didsub, do_eol_match, eflags, empty_ok, eval;
357 int linechanged, matched, quit, rval;
358 CHAR_T *lb, *bp;
359 enum nresult nret;
361 NEEDFILE(sp, cmdp);
363 slno = sp->lno;
364 scno = sp->cno;
367 * !!!
368 * Historically, the 'g' and 'c' suffices were always toggled as flags,
369 * so ":s/A/B/" was the same as ":s/A/B/ccgg". If O_EDCOMPATIBLE was
370 * not set, they were initialized to 0 for all substitute commands. If
371 * O_EDCOMPATIBLE was set, they were initialized to 0 only if the user
372 * specified substitute/replacement patterns (see ex_s()).
374 if (!O_ISSET(sp, O_EDCOMPATIBLE))
375 sp->c_suffix = sp->g_suffix = 0;
378 * Historic vi permitted the '#', 'l' and 'p' options in vi mode, but
379 * it only displayed the last change. I'd disallow them, but they are
380 * useful in combination with the [v]global commands. In the current
381 * model the problem is combining them with the 'c' flag -- the screen
382 * would have to flip back and forth between the confirm screen and the
383 * ex print screen, which would be pretty awful. We do display all
384 * changes, though, for what that's worth.
386 * !!!
387 * Historic vi was fairly strict about the order of "options", the
388 * count, and "flags". I'm somewhat fuzzy on the difference between
389 * options and flags, anyway, so this is a simpler approach, and we
390 * just take it them in whatever order the user gives them. (The ex
391 * usage statement doesn't reflect this.)
393 lflag = nflag = pflag = rflag = 0;
394 if (st == NULL)
395 goto noargs;
396 for (lno = OOBLNO; *st != '\0'; ++st)
397 switch (*st) {
398 case ' ':
399 case '\t':
400 continue;
401 case '+':
402 ++cmdp->flagoff;
403 break;
404 case '-':
405 --cmdp->flagoff;
406 break;
407 case '0': case '1': case '2': case '3': case '4':
408 case '5': case '6': case '7': case '8': case '9':
409 if (lno != OOBLNO)
410 goto usage;
411 errno = 0;
412 nret = nget_uslong(sp, &ul, st, &st, 10);
413 lno = ul;
414 if (*st == '\0') /* Loop increment correction. */
415 --st;
416 if (nret != NUM_OK) {
417 if (nret == NUM_OVER)
418 msgq(sp, M_ERR, "153|Count overflow");
419 else if (nret == NUM_UNDER)
420 msgq(sp, M_ERR, "154|Count underflow");
421 else
422 msgq(sp, M_SYSERR, NULL);
423 return (1);
426 * In historic vi, the count was inclusive from the
427 * second address.
429 cmdp->addr1.lno = cmdp->addr2.lno;
430 cmdp->addr2.lno += lno - 1;
431 if (!db_exist(sp, cmdp->addr2.lno) &&
432 db_last(sp, &cmdp->addr2.lno))
433 return (1);
434 break;
435 case '#':
436 nflag = 1;
437 break;
438 case 'c':
439 sp->c_suffix = !sp->c_suffix;
441 /* Ex text structure initialization. */
442 if (F_ISSET(sp, SC_EX)) {
443 memset(&tiq, 0, sizeof(TEXTH));
444 TAILQ_INIT(&tiq);
446 break;
447 case 'g':
448 sp->g_suffix = !sp->g_suffix;
449 break;
450 case 'l':
451 lflag = 1;
452 break;
453 case 'p':
454 pflag = 1;
455 break;
456 case 'r':
457 if (LF_ISSET(SUB_FIRST)) {
458 msgq(sp, M_ERR,
459 "155|Regular expression specified; r flag meaningless");
460 return (1);
462 if (!F_ISSET(sp, SC_RE_SEARCH)) {
463 ex_emsg(sp, NULL, EXM_NOPREVRE);
464 return (1);
466 rflag = 1;
467 re = &sp->re_c;
468 break;
469 default:
470 goto usage;
473 if (*st != '\0' || (!rflag && LF_ISSET(SUB_MUSTSETR))) {
474 usage: ex_emsg(sp, cmdp->cmd->usage, EXM_USAGE);
475 return (1);
478 noargs: if (F_ISSET(sp, SC_VI) && sp->c_suffix && (lflag || nflag || pflag)) {
479 msgq(sp, M_ERR,
480 "156|The #, l and p flags may not be combined with the c flag in vi mode");
481 return (1);
485 * bp: if interactive, line cache
486 * blen: if interactive, line cache length
487 * lb: build buffer pointer.
488 * lbclen: current length of built buffer.
489 * lblen; length of build buffer.
491 bp = lb = NULL;
492 blen = lbclen = lblen = 0;
494 /* For each line... */
495 lno = cmdp->addr1.lno == 0 ? 1 : cmdp->addr1.lno;
496 for (matched = quit = 0,
497 elno = cmdp->addr2.lno; !quit && lno <= elno; ++lno) {
499 /* Someone's unhappy, time to stop. */
500 if (INTERRUPTED(sp))
501 break;
503 /* Get the line. */
504 if (db_get(sp, lno, DBG_FATAL, &st, &llen))
505 goto err;
508 * Make a local copy if doing confirmation -- when calling
509 * the confirm routine we're likely to lose the cached copy.
511 if (sp->c_suffix) {
512 if (bp == NULL) {
513 GET_SPACE_RETW(sp, bp, blen, llen);
514 } else
515 ADD_SPACE_RETW(sp, bp, blen, llen);
516 MEMCPYW(bp, st, llen);
517 st = bp;
520 /* Start searching from the beginning. */
521 offset = 0;
522 len = llen;
524 /* Reset the build buffer offset. */
525 lbclen = 0;
527 /* Reset empty match flag. */
528 empty_ok = 1;
531 * We don't want to have to do a setline if the line didn't
532 * change -- keep track of whether or not this line changed.
533 * If doing confirmations, don't want to keep setting the
534 * line if change is refused -- keep track of substitutions.
536 didsub = linechanged = 0;
538 /* New line, do an EOL match. */
539 do_eol_match = 1;
541 /* It's not nul terminated, but we pretend it is. */
542 eflags = REG_STARTEND;
545 * The search area is from st + offset to the EOL.
547 * Generally, match[0].rm_so is the offset of the start
548 * of the match from the start of the search, and offset
549 * is the offset of the start of the last search.
551 nextmatch: match[0].rm_so = 0;
552 match[0].rm_eo = len;
554 /* Get the next match. */
555 eval = regexec(re, st + offset, 10, match, eflags);
558 * There wasn't a match or if there was an error, deal with
559 * it. If there was a previous match in this line, resolve
560 * the changes into the database. Otherwise, just move on.
562 if (eval == REG_NOMATCH)
563 goto endmatch;
564 if (eval != 0) {
565 re_error(sp, eval, re);
566 goto err;
568 matched = 1;
570 /* Only the first search can match an anchored expression. */
571 eflags |= REG_NOTBOL;
574 * !!!
575 * It's possible to match 0-length strings -- for example, the
576 * command s;a*;X;, when matched against the string "aabb" will
577 * result in "XbXbX", i.e. the matches are "aa", the space
578 * between the b's and the space between the b's and the end of
579 * the string. There is a similar space between the beginning
580 * of the string and the a's. The rule that we use (because vi
581 * historically used it) is that any 0-length match, occurring
582 * immediately after a match, is ignored. Otherwise, the above
583 * example would have resulted in "XXbXbX". Another example is
584 * incorrectly using " *" to replace groups of spaces with one
585 * space.
587 * The way we do this is that if we just had a successful match,
588 * the starting offset does not skip characters, and the match
589 * is empty, ignore the match and move forward. If there's no
590 * more characters in the string, we were attempting to match
591 * after the last character, so quit.
593 if (!empty_ok && match[0].rm_so == 0 && match[0].rm_eo == 0) {
594 empty_ok = 1;
595 if (len == 0)
596 goto endmatch;
597 BUILD(sp, st + offset, 1)
598 ++offset;
599 --len;
600 goto nextmatch;
603 /* Confirm change. */
604 if (sp->c_suffix) {
606 * Set the cursor position for confirmation. Note,
607 * if we matched on a '$', the cursor may be past
608 * the end of line.
610 from.lno = to.lno = lno;
611 from.cno = match[0].rm_so + offset;
612 to.cno = match[0].rm_eo + offset;
614 * Both ex and vi have to correct for a change before
615 * the first character in the line.
617 if (llen == 0)
618 from.cno = to.cno = 0;
619 if (F_ISSET(sp, SC_VI)) {
621 * Only vi has to correct for a change after
622 * the last character in the line.
624 * XXX
625 * It would be nice to change the vi code so
626 * that we could display a cursor past EOL.
628 if (to.cno >= llen)
629 to.cno = llen - 1;
630 if (from.cno >= llen)
631 from.cno = llen - 1;
633 sp->lno = from.lno;
634 sp->cno = from.cno;
635 if (vs_refresh(sp, 1))
636 goto err;
638 vs_update(sp, msg_cat(sp,
639 "169|Confirm change? [n]", NULL), NULL);
641 if (v_event_get(sp, &ev, 0, 0))
642 goto err;
643 switch (ev.e_event) {
644 case E_CHARACTER:
645 break;
646 case E_EOF:
647 case E_ERR:
648 case E_INTERRUPT:
649 goto lquit;
650 default:
651 v_event_err(sp, &ev);
652 goto lquit;
654 } else {
655 if (ex_print(sp, cmdp, &from, &to, 0) ||
656 ex_scprint(sp, &from, &to))
657 goto lquit;
658 if (ex_txt(sp, &tiq, 0, TXT_CR))
659 goto err;
660 ev.e_c = TAILQ_FIRST(&tiq)->lb[0];
663 switch (ev.e_c) {
664 case CH_YES:
665 break;
666 default:
667 case CH_NO:
668 didsub = 0;
669 BUILD(sp, st + offset, match[0].rm_eo);
670 goto skip;
671 case CH_QUIT:
672 /* Set the quit/interrupted flags. */
673 lquit: quit = 1;
674 F_SET(sp->gp, G_INTERRUPTED);
677 * Resolve any changes, then return to (and
678 * exit from) the main loop.
680 goto endmatch;
685 * Set the cursor to the last position changed, converting
686 * from 1-based to 0-based.
688 sp->lno = lno;
689 sp->cno = match[0].rm_so;
691 /* Copy the bytes before the match into the build buffer. */
692 BUILD(sp, st + offset, match[0].rm_so);
694 /* Substitute the matching bytes. */
695 didsub = 1;
696 if (re_sub(sp, st + offset, &lb, &lbclen, &lblen, match))
697 goto err;
699 /* Set the change flag so we know this line was modified. */
700 linechanged = 1;
702 /* Move past the matched bytes. */
703 skip: offset += match[0].rm_eo;
704 len -= match[0].rm_eo;
706 /* A match cannot be followed by an empty pattern. */
707 empty_ok = 0;
710 * If doing a global change with confirmation, we have to
711 * update the screen. The basic idea is to store the line
712 * so the screen update routines can find it, and restart.
714 if (didsub && sp->c_suffix && sp->g_suffix) {
716 * The new search offset will be the end of the
717 * modified line.
719 saved_offset = lbclen;
721 /* Copy the rest of the line. */
722 if (len)
723 BUILD(sp, st + offset, len)
725 /* Set the new offset. */
726 offset = saved_offset;
728 /* Store inserted lines, adjusting the build buffer. */
729 last = 0;
730 if (sp->newl_cnt) {
731 for (cnt = 0;
732 cnt < sp->newl_cnt; ++cnt, ++lno, ++elno) {
733 if (db_insert(sp, lno,
734 lb + last, sp->newl[cnt] - last))
735 goto err;
736 last = sp->newl[cnt] + 1;
737 ++sp->rptlines[L_ADDED];
739 lbclen -= last;
740 offset -= last;
741 sp->newl_cnt = 0;
744 /* Store and retrieve the line. */
745 if (db_set(sp, lno, lb + last, lbclen))
746 goto err;
747 if (db_get(sp, lno, DBG_FATAL, &st, &llen))
748 goto err;
749 ADD_SPACE_RETW(sp, bp, blen, llen)
750 MEMCPYW(bp, st, llen);
751 st = bp;
752 len = llen - offset;
754 /* Restart the build. */
755 lbclen = 0;
756 BUILD(sp, st, offset);
759 * If we haven't already done the after-the-string
760 * match, do one. Set REG_NOTEOL so the '$' pattern
761 * only matches once.
763 if (!do_eol_match)
764 goto endmatch;
765 if (offset == len) {
766 do_eol_match = 0;
767 eflags |= REG_NOTEOL;
769 goto nextmatch;
773 * If it's a global:
775 * If at the end of the string, do a test for the after
776 * the string match. Set REG_NOTEOL so the '$' pattern
777 * only matches once.
779 if (sp->g_suffix && do_eol_match) {
780 if (len == 0) {
781 do_eol_match = 0;
782 eflags |= REG_NOTEOL;
784 goto nextmatch;
787 endmatch: if (!linechanged)
788 continue;
790 /* Copy any remaining bytes into the build buffer. */
791 if (len)
792 BUILD(sp, st + offset, len)
794 /* Store inserted lines, adjusting the build buffer. */
795 last = 0;
796 if (sp->newl_cnt) {
797 for (cnt = 0;
798 cnt < sp->newl_cnt; ++cnt, ++lno, ++elno) {
799 if (db_insert(sp,
800 lno, lb + last, sp->newl[cnt] - last))
801 goto err;
802 last = sp->newl[cnt] + 1;
803 ++sp->rptlines[L_ADDED];
805 lbclen -= last;
806 sp->newl_cnt = 0;
809 /* Store the changed line. */
810 if (db_set(sp, lno, lb + last, lbclen))
811 goto err;
813 /* Update changed line counter. */
814 if (sp->rptlchange != lno) {
815 sp->rptlchange = lno;
816 ++sp->rptlines[L_CHANGED];
820 * !!!
821 * Display as necessary. Historic practice is to only
822 * display the last line of a line split into multiple
823 * lines.
825 if (lflag || nflag || pflag) {
826 from.lno = to.lno = lno;
827 from.cno = to.cno = 0;
828 if (lflag)
829 (void)ex_print(sp, cmdp, &from, &to, E_C_LIST);
830 if (nflag)
831 (void)ex_print(sp, cmdp, &from, &to, E_C_HASH);
832 if (pflag)
833 (void)ex_print(sp, cmdp, &from, &to, E_C_PRINT);
838 * !!!
839 * Historically, vi attempted to leave the cursor at the same place if
840 * the substitution was done at the current cursor position. Otherwise
841 * it moved it to the first non-blank of the last line changed. There
842 * were some problems: for example, :s/$/foo/ with the cursor on the
843 * last character of the line left the cursor on the last character, or
844 * the & command with multiple occurrences of the matching string in the
845 * line usually left the cursor in a fairly random position.
847 * We try to do the same thing, with the exception that if the user is
848 * doing substitution with confirmation, we move to the last line about
849 * which the user was consulted, as opposed to the last line that they
850 * actually changed. This prevents a screen flash if the user doesn't
851 * change many of the possible lines.
853 if (!sp->c_suffix && (sp->lno != slno || sp->cno != scno)) {
854 sp->cno = 0;
855 (void)nonblank(sp, sp->lno, &sp->cno);
859 * If not in a global command, and nothing matched, say so.
860 * Else, if none of the lines displayed, put something up.
862 rval = 0;
863 if (!matched) {
864 if (!F_ISSET(sp, SC_EX_GLOBAL)) {
865 msgq(sp, M_ERR, "157|No match found");
866 goto err;
868 } else if (!lflag && !nflag && !pflag)
869 F_SET(cmdp, E_AUTOPRINT);
871 if (0) {
872 err: rval = 1;
875 if (bp != NULL)
876 FREE_SPACEW(sp, bp, blen);
877 if (lb != NULL)
878 free(lb);
879 return (rval);
883 * re_compile --
884 * Compile the RE.
886 * PUBLIC: int re_compile __P((SCR *,
887 * PUBLIC: CHAR_T *, size_t, CHAR_T **, size_t *, regex_t *, u_int));
890 re_compile(SCR *sp, CHAR_T *ptrn, size_t plen, CHAR_T **ptrnp, size_t *lenp, regex_t *rep, u_int flags)
892 size_t len;
893 int reflags, replaced, rval;
894 CHAR_T *p;
896 /* Set RE flags. */
897 reflags = 0;
898 if (LF_ISSET(SEARCH_EXTEND))
899 reflags |= REG_EXTENDED;
900 if (LF_ISSET(SEARCH_IC))
901 reflags |= REG_ICASE;
902 if (LF_ISSET(SEARCH_LITERAL))
903 reflags |= REG_NOSPEC;
904 if (!LF_ISSET(SEARCH_NOOPT | SEARCH_CSCOPE | SEARCH_TAG)) {
905 if (O_ISSET(sp, O_EXTENDED))
906 reflags |= REG_EXTENDED;
907 if (O_ISSET(sp, O_IGNORECASE))
908 reflags |= REG_ICASE;
909 if (O_ISSET(sp, O_ICLOWER))
910 goto iclower;
912 if (LF_ISSET(SEARCH_ICL)) {
913 iclower: for (p = ptrn, len = plen; len > 0; ++p, --len)
914 if (ISUPPER((UCHAR_T)*p))
915 break;
916 if (len == 0)
917 reflags |= REG_ICASE;
920 /* If we're replacing a saved value, clear the old one. */
921 if (LF_ISSET(SEARCH_CSEARCH) && F_ISSET(sp, SC_RE_SEARCH)) {
922 regfree(&sp->re_c);
923 F_CLR(sp, SC_RE_SEARCH);
925 if (LF_ISSET(SEARCH_CSUBST) && F_ISSET(sp, SC_RE_SUBST)) {
926 regfree(&sp->subre_c);
927 F_CLR(sp, SC_RE_SUBST);
931 * If we're saving the string, it's a pattern we haven't seen before,
932 * so convert the vi-style RE's to POSIX 1003.2 RE's. Save a copy for
933 * later recompilation. Free any previously saved value.
935 if (ptrnp != NULL) {
936 replaced = 0;
937 if (LF_ISSET(SEARCH_CSCOPE)) {
938 if (re_cscope_conv(sp, &ptrn, &plen, &replaced))
939 return (1);
941 * XXX
942 * Currently, the match-any-<blank> expression used in
943 * re_cscope_conv() requires extended RE's. This may
944 * not be right or safe.
946 reflags |= REG_EXTENDED;
947 } else if (LF_ISSET(SEARCH_TAG)) {
948 if (re_tag_conv(sp, &ptrn, &plen, &replaced))
949 return (1);
950 } else if (!LF_ISSET(SEARCH_LITERAL))
951 if (re_conv(sp, &ptrn, &plen, &replaced))
952 return (1);
954 /* Discard previous pattern. */
955 if (*ptrnp != NULL) {
956 free(*ptrnp);
957 *ptrnp = NULL;
959 if (lenp != NULL)
960 *lenp = plen;
963 * Copy the string into allocated memory.
965 * XXX
966 * Regcomp isn't 8-bit clean, so the pattern is nul-terminated
967 * for now. There's just no other solution.
969 MALLOC(sp, *ptrnp, CHAR_T *, (plen + 1) * sizeof(CHAR_T));
970 if (*ptrnp != NULL) {
971 MEMCPYW(*ptrnp, ptrn, plen);
972 (*ptrnp)[plen] = '\0';
975 /* Free up conversion-routine-allocated memory. */
976 if (replaced)
977 FREE_SPACEW(sp, ptrn, 0);
979 if (*ptrnp == NULL)
980 return (1);
982 ptrn = *ptrnp;
986 * XXX
987 * Regcomp isn't 8-bit clean, so we just lost if the pattern
988 * contained a nul. Bummer!
990 if ((rval = regcomp(rep, ptrn, /* plen, */ reflags)) != 0) {
991 if (LF_ISSET(SEARCH_MSG))
992 re_error(sp, rval, rep);
993 return (1);
996 if (LF_ISSET(SEARCH_CSEARCH))
997 F_SET(sp, SC_RE_SEARCH);
998 if (LF_ISSET(SEARCH_CSUBST))
999 F_SET(sp, SC_RE_SUBST);
1001 return (0);
1005 * re_conv --
1006 * Convert vi's regular expressions into something that the
1007 * the POSIX 1003.2 RE functions can handle.
1009 * There are three conversions we make to make vi's RE's (specifically
1010 * the global, search, and substitute patterns) work with POSIX RE's.
1012 * 1: If O_MAGIC is not set, strip backslashes from the magic character
1013 * set (.[*~) that have them, and add them to the ones that don't.
1014 * 2: If O_MAGIC is not set, the string "\~" is replaced with the text
1015 * from the last substitute command's replacement string. If O_MAGIC
1016 * is set, it's the string "~".
1017 * 3: The pattern \<ptrn\> does "word" searches, convert it to use the
1018 * new RE escapes.
1020 * !!!/XXX
1021 * This doesn't exactly match the historic behavior of vi because we do
1022 * the ~ substitution before calling the RE engine, so magic characters
1023 * in the replacement string will be expanded by the RE engine, and they
1024 * weren't historically. It's a bug.
1026 static int
1027 re_conv(SCR *sp, CHAR_T **ptrnp, size_t *plenp, int *replacedp)
1029 size_t blen, len, needlen;
1030 int magic;
1031 CHAR_T *bp, *p, *t;
1034 * First pass through, we figure out how much space we'll need.
1035 * We do it in two passes, on the grounds that most of the time
1036 * the user is doing a search and won't have magic characters.
1037 * That way we can skip most of the memory allocation and copies.
1039 magic = 0;
1040 for (p = *ptrnp, len = *plenp, needlen = 0; len > 0; ++p, --len)
1041 switch (*p) {
1042 case '\\':
1043 if (len > 1) {
1044 --len;
1045 switch (*++p) {
1046 case '<':
1047 magic = 1;
1048 needlen += RE_WSTART_LEN + 1;
1049 break;
1050 case '>':
1051 magic = 1;
1052 needlen += RE_WSTOP_LEN + 1;
1053 break;
1054 case '~':
1055 if (!O_ISSET(sp, O_MAGIC)) {
1056 magic = 1;
1057 needlen += sp->repl_len;
1059 break;
1060 case '.':
1061 case '[':
1062 case '*':
1063 if (!O_ISSET(sp, O_MAGIC)) {
1064 magic = 1;
1065 needlen += 1;
1067 break;
1068 default:
1069 needlen += 2;
1071 } else
1072 needlen += 1;
1073 break;
1074 case '~':
1075 if (O_ISSET(sp, O_MAGIC)) {
1076 magic = 1;
1077 needlen += sp->repl_len;
1079 break;
1080 case '.':
1081 case '[':
1082 case '*':
1083 if (!O_ISSET(sp, O_MAGIC)) {
1084 magic = 1;
1085 needlen += 2;
1087 break;
1088 default:
1089 needlen += 1;
1090 break;
1093 if (!magic) {
1094 *replacedp = 0;
1095 return (0);
1098 /* Get enough memory to hold the final pattern. */
1099 *replacedp = 1;
1100 GET_SPACE_RETW(sp, bp, blen, needlen);
1102 for (p = *ptrnp, len = *plenp, t = bp; len > 0; ++p, --len)
1103 switch (*p) {
1104 case '\\':
1105 if (len > 1) {
1106 --len;
1107 switch (*++p) {
1108 case '<':
1109 MEMCPY(t,
1110 RE_WSTART, RE_WSTART_LEN);
1111 t += RE_WSTART_LEN;
1112 break;
1113 case '>':
1114 MEMCPY(t,
1115 RE_WSTOP, RE_WSTOP_LEN);
1116 t += RE_WSTOP_LEN;
1117 break;
1118 case '~':
1119 if (O_ISSET(sp, O_MAGIC))
1120 *t++ = '~';
1121 else {
1122 MEMCPYW(t,
1123 sp->repl, sp->repl_len);
1124 t += sp->repl_len;
1126 break;
1127 case '.':
1128 case '[':
1129 case '*':
1130 if (O_ISSET(sp, O_MAGIC))
1131 *t++ = '\\';
1132 *t++ = *p;
1133 break;
1134 default:
1135 *t++ = '\\';
1136 *t++ = *p;
1138 } else
1139 *t++ = '\\';
1140 break;
1141 case '~':
1142 if (O_ISSET(sp, O_MAGIC)) {
1143 MEMCPYW(t, sp->repl, sp->repl_len);
1144 t += sp->repl_len;
1145 } else
1146 *t++ = '~';
1147 break;
1148 case '.':
1149 case '[':
1150 case '*':
1151 if (!O_ISSET(sp, O_MAGIC))
1152 *t++ = '\\';
1153 *t++ = *p;
1154 break;
1155 default:
1156 *t++ = *p;
1157 break;
1160 *ptrnp = bp;
1161 *plenp = t - bp;
1162 return (0);
1166 * re_tag_conv --
1167 * Convert a tags search path into something that the POSIX
1168 * 1003.2 RE functions can handle.
1170 static int
1171 re_tag_conv(SCR *sp, CHAR_T **ptrnp, size_t *plenp, int *replacedp)
1173 size_t blen, len;
1174 int lastdollar;
1175 CHAR_T *bp, *p, *t;
1177 len = *plenp;
1179 /* Max memory usage is 2 times the length of the string. */
1180 *replacedp = 1;
1181 GET_SPACE_RETW(sp, bp, blen, len * 2);
1183 p = *ptrnp;
1184 t = bp;
1186 /* If the last character is a '/' or '?', we just strip it. */
1187 if (len > 0 && (p[len - 1] == '/' || p[len - 1] == '?'))
1188 --len;
1190 /* If the next-to-last or last character is a '$', it's magic. */
1191 if (len > 0 && p[len - 1] == '$') {
1192 --len;
1193 lastdollar = 1;
1194 } else
1195 lastdollar = 0;
1197 /* If the first character is a '/' or '?', we just strip it. */
1198 if (len > 0 && (p[0] == '/' || p[0] == '?')) {
1199 ++p;
1200 --len;
1203 /* If the first or second character is a '^', it's magic. */
1204 if (p[0] == '^') {
1205 *t++ = *p++;
1206 --len;
1210 * Escape every other magic character we can find, meanwhile stripping
1211 * the backslashes ctags inserts when escaping the search delimiter
1212 * characters.
1214 for (; len > 0; --len) {
1215 if (p[0] == '\\' && (p[1] == '/' || p[1] == '?')) {
1216 ++p;
1217 --len;
1218 } else if (strchr("^.[]$*", p[0]))
1219 *t++ = '\\';
1220 *t++ = *p++;
1222 if (lastdollar)
1223 *t++ = '$';
1225 *ptrnp = bp;
1226 *plenp = t - bp;
1227 return (0);
1231 * re_cscope_conv --
1232 * Convert a cscope search path into something that the POSIX
1233 * 1003.2 RE functions can handle.
1235 static int
1236 re_cscope_conv(SCR *sp, CHAR_T **ptrnp, size_t *plenp, int *replacedp)
1238 size_t blen, len, nspaces;
1239 CHAR_T *bp, *t;
1240 CHAR_T *p;
1241 const CHAR_T *wp;
1242 size_t wlen;
1245 * Each space in the source line printed by cscope represents an
1246 * arbitrary sequence of spaces, tabs, and comments.
1248 #define CSCOPE_RE_SPACE "([ \t]|/\\*([^*]|\\*/)*\\*/)*"
1249 #define CSCOPE_LEN sizeof(CSCOPE_RE_SPACE) - 1
1250 CHAR2INT(sp, CSCOPE_RE_SPACE, CSCOPE_LEN, wp, wlen);
1251 for (nspaces = 0, p = *ptrnp, len = *plenp; len > 0; ++p, --len)
1252 if (*p == ' ')
1253 ++nspaces;
1256 * Allocate plenty of space:
1257 * the string, plus potential escaping characters;
1258 * nspaces + 2 copies of CSCOPE_RE_SPACE;
1259 * ^, $, nul terminator characters.
1261 *replacedp = 1;
1262 len = (p - *ptrnp) * 2 + (nspaces + 2) * sizeof(CSCOPE_RE_SPACE) + 3;
1263 GET_SPACE_RETW(sp, bp, blen, len);
1265 p = *ptrnp;
1266 t = bp;
1268 *t++ = '^';
1269 MEMCPYW(t, wp, wlen);
1270 t += wlen;
1272 for (len = *plenp; len > 0; ++p, --len)
1273 if (*p == ' ') {
1274 MEMCPYW(t, wp, wlen);
1275 t += wlen;
1276 } else {
1277 if (strchr("\\^.[]$*+?()|{}", *p))
1278 *t++ = '\\';
1279 *t++ = *p;
1282 MEMCPYW(t, wp, wlen);
1283 t += wlen;
1284 *t++ = '$';
1286 *ptrnp = bp;
1287 *plenp = t - bp;
1288 return (0);
1292 * re_error --
1293 * Report a regular expression error.
1295 * PUBLIC: void re_error __P((SCR *, int, regex_t *));
1297 void
1298 re_error(SCR *sp, int errcode, regex_t *preg)
1300 size_t sz;
1301 char *oe;
1303 sz = regerror(errcode, preg, NULL, 0);
1304 if ((oe = malloc(sz)) == NULL)
1305 msgq(sp, M_SYSERR, NULL);
1306 else {
1307 (void)regerror(errcode, preg, oe, sz);
1308 msgq(sp, M_ERR, "RE error: %s", oe);
1309 free(oe);
1314 * re_sub --
1315 * Do the substitution for a regular expression.
1317 static int
1318 re_sub(SCR *sp, CHAR_T *ip, CHAR_T **lbp, size_t *lbclenp, size_t *lblenp, regmatch_t *match)
1320 /* Input line. */
1325 enum { C_NOT_SET, C_LOWER, C_ONE_LOWER, C_ONE_UPPER, C_UPPER } conv;
1326 size_t lbclen, lblen; /* Local copies. */
1327 size_t mlen; /* Match length. */
1328 size_t rpl; /* Remaining replacement length. */
1329 CHAR_T *rp; /* Replacement pointer. */
1330 int ch;
1331 int no; /* Match replacement offset. */
1332 CHAR_T *p, *t; /* Buffer pointers. */
1333 CHAR_T *lb; /* Local copies. */
1335 lb = *lbp; /* Get local copies. */
1336 lbclen = *lbclenp;
1337 lblen = *lblenp;
1340 * QUOTING NOTE:
1342 * There are some special sequences that vi provides in the
1343 * replacement patterns.
1344 * & string the RE matched (\& if nomagic set)
1345 * \# n-th regular subexpression
1346 * \E end \U, \L conversion
1347 * \e end \U, \L conversion
1348 * \l convert the next character to lower-case
1349 * \L convert to lower-case, until \E, \e, or end of replacement
1350 * \u convert the next character to upper-case
1351 * \U convert to upper-case, until \E, \e, or end of replacement
1353 * Otherwise, since this is the lowest level of replacement, discard
1354 * all escaping characters. This (hopefully) matches historic practice.
1356 #define OUTCH(ch, nltrans) { \
1357 ARG_CHAR_T __ch = (ch); \
1358 e_key_t __value = KEY_VAL(sp, __ch); \
1359 if (nltrans && (__value == K_CR || __value == K_NL)) { \
1360 NEEDNEWLINE(sp); \
1361 sp->newl[sp->newl_cnt++] = lbclen; \
1362 } else if (conv != C_NOT_SET) { \
1363 switch (conv) { \
1364 case C_ONE_LOWER: \
1365 conv = C_NOT_SET; \
1366 /* FALLTHROUGH */ \
1367 case C_LOWER: \
1368 if (ISUPPER(__ch)) \
1369 __ch = TOLOWER(__ch); \
1370 break; \
1371 case C_ONE_UPPER: \
1372 conv = C_NOT_SET; \
1373 /* FALLTHROUGH */ \
1374 case C_UPPER: \
1375 if (ISLOWER(__ch)) \
1376 __ch = TOUPPER(__ch); \
1377 break; \
1378 default: \
1379 abort(); \
1382 NEEDSP(sp, 1, p); \
1383 *p++ = __ch; \
1384 ++lbclen; \
1386 conv = C_NOT_SET;
1387 for (rp = sp->repl, rpl = sp->repl_len, p = lb + lbclen; rpl--;) {
1388 switch (ch = *rp++) {
1389 case '&':
1390 if (O_ISSET(sp, O_MAGIC)) {
1391 no = 0;
1392 goto subzero;
1394 break;
1395 case '\\':
1396 if (rpl == 0)
1397 break;
1398 --rpl;
1399 switch (ch = *rp) {
1400 case '&':
1401 ++rp;
1402 if (!O_ISSET(sp, O_MAGIC)) {
1403 no = 0;
1404 goto subzero;
1406 break;
1407 case '0': case '1': case '2': case '3': case '4':
1408 case '5': case '6': case '7': case '8': case '9':
1409 no = *rp++ - '0';
1410 subzero: if (match[no].rm_so == -1 ||
1411 match[no].rm_eo == -1)
1412 break;
1413 mlen = match[no].rm_eo - match[no].rm_so;
1414 for (t = ip + match[no].rm_so; mlen--; ++t)
1415 OUTCH((UCHAR_T)*t, 0);
1416 continue;
1417 case 'e':
1418 case 'E':
1419 ++rp;
1420 conv = C_NOT_SET;
1421 continue;
1422 case 'l':
1423 ++rp;
1424 conv = C_ONE_LOWER;
1425 continue;
1426 case 'L':
1427 ++rp;
1428 conv = C_LOWER;
1429 continue;
1430 case 'u':
1431 ++rp;
1432 conv = C_ONE_UPPER;
1433 continue;
1434 case 'U':
1435 ++rp;
1436 conv = C_UPPER;
1437 continue;
1438 default:
1439 ++rp;
1440 break;
1443 OUTCH(ch, 1);
1446 *lbp = lb; /* Update caller's information. */
1447 *lbclenp = lbclen;
1448 *lblenp = lblen;
1449 return (0);