etc/protocols - sync with NetBSD-8
[minix.git] / external / bsd / nvi / dist / ex / ex_subst.c
blob6e4de0403ce09e98b585878975c9d0a18f9d4808
1 /* $NetBSD: ex_subst.c,v 1.4 2014/01/26 21:43:45 christos Exp $ */
2 /*-
3 * Copyright (c) 1992, 1993, 1994
4 * The Regents of the University of California. All rights reserved.
5 * Copyright (c) 1992, 1993, 1994, 1995, 1996
6 * Keith Bostic. All rights reserved.
8 * See the LICENSE file for redistribution information.
9 */
11 #include "config.h"
13 #include <sys/cdefs.h>
14 #if 0
15 #ifndef lint
16 static const char sccsid[] = "Id: ex_subst.c,v 10.50 2002/02/09 21:18:23 skimo Exp (Berkeley) Date: 2002/02/09 21:18:23 ";
17 #endif /* not lint */
18 #else
19 __RCSID("$NetBSD: ex_subst.c,v 1.4 2014/01/26 21:43:45 christos Exp $");
20 #endif
22 #include <sys/types.h>
23 #include <sys/queue.h>
24 #include <sys/time.h>
26 #include <bitstring.h>
27 #include <ctype.h>
28 #include <errno.h>
29 #include <limits.h>
30 #include <stdio.h>
31 #include <stdlib.h>
32 #include <string.h>
33 #include <unistd.h>
35 #include "../common/common.h"
36 #include "../vi/vi.h"
38 #define SUB_FIRST 0x01 /* The 'r' flag isn't reasonable. */
39 #define SUB_MUSTSETR 0x02 /* The 'r' flag is required. */
41 static int re_conv __P((SCR *, CHAR_T **, size_t *, int *));
42 static int re_cscope_conv __P((SCR *, CHAR_T **, size_t *, int *));
43 static int re_sub __P((SCR *,
44 CHAR_T *, CHAR_T **, size_t *, size_t *, regmatch_t [10]));
45 static int re_tag_conv __P((SCR *, CHAR_T **, size_t *, int *));
46 static int s __P((SCR *, EXCMD *, CHAR_T *, regex_t *, u_int));
49 * ex_s --
50 * [line [,line]] s[ubstitute] [[/;]pat[/;]/repl[/;] [cgr] [count] [#lp]]
52 * Substitute on lines matching a pattern.
54 * PUBLIC: int ex_s __P((SCR *, EXCMD *));
56 int
57 ex_s(SCR *sp, EXCMD *cmdp)
59 regex_t *re;
60 size_t blen, len;
61 u_int flags;
62 ARG_CHAR_T delim;
63 CHAR_T *bp, *p, *ptrn, *rep, *t;
66 * Skip leading white space.
68 * !!!
69 * Historic vi allowed any non-alphanumeric to serve as the
70 * substitution command delimiter.
72 * !!!
73 * If the arguments are empty, it's the same as &, i.e. we
74 * repeat the last substitution.
76 if (cmdp->argc == 0)
77 goto subagain;
78 for (p = cmdp->argv[0]->bp,
79 len = cmdp->argv[0]->len; len > 0; --len, ++p) {
80 if (!ISBLANK((UCHAR_T)*p))
81 break;
83 if (len == 0)
84 subagain: return (ex_subagain(sp, cmdp));
86 delim = (UCHAR_T)*p++;
87 if (ISALNUM(delim) || delim == '\\')
88 return (s(sp, cmdp, p, &sp->subre_c, SUB_MUSTSETR));
91 * !!!
92 * The full-blown substitute command reset the remembered
93 * state of the 'c' and 'g' suffices.
95 sp->c_suffix = sp->g_suffix = 0;
98 * Get the pattern string, toss escaping characters.
100 * !!!
101 * Historic vi accepted any of the following forms:
103 * :s/abc/def/ change "abc" to "def"
104 * :s/abc/def change "abc" to "def"
105 * :s/abc/ delete "abc"
106 * :s/abc delete "abc"
108 * QUOTING NOTE:
110 * Only toss an escaping character if it escapes a delimiter.
111 * This means that "s/A/\\\\f" replaces "A" with "\\f". It
112 * would be nice to be more regular, i.e. for each layer of
113 * escaping a single escaping character is removed, but that's
114 * not how the historic vi worked.
116 for (ptrn = t = p;;) {
117 if (p[0] == '\0' || p[0] == delim) {
118 if (p[0] == delim)
119 ++p;
121 * !!!
122 * Nul terminate the pattern string -- it's passed
123 * to regcomp which doesn't understand anything else.
125 *t = '\0';
126 break;
128 if (p[0] == '\\') {
129 if (p[1] == delim)
130 ++p;
131 else if (p[1] == '\\')
132 *t++ = *p++;
134 *t++ = *p++;
138 * If the pattern string is empty, use the last RE (not just the
139 * last substitution RE).
141 if (*ptrn == '\0') {
142 if (sp->re == NULL) {
143 ex_emsg(sp, NULL, EXM_NOPREVRE);
144 return (1);
147 /* Re-compile the RE if necessary. */
148 if (!F_ISSET(sp, SC_RE_SEARCH) &&
149 re_compile(sp, sp->re, sp->re_len,
150 NULL, NULL, &sp->re_c, SEARCH_CSEARCH | SEARCH_MSG))
151 return (1);
152 flags = 0;
153 } else {
155 * !!!
156 * Compile the RE. Historic practice is that substitutes set
157 * the search direction as well as both substitute and search
158 * RE's. We compile the RE twice, as we don't want to bother
159 * ref counting the pattern string and (opaque) structure.
161 if (re_compile(sp, ptrn, t - ptrn, &sp->re,
162 &sp->re_len, &sp->re_c, SEARCH_CSEARCH | SEARCH_MSG))
163 return (1);
164 if (re_compile(sp, ptrn, t - ptrn, &sp->subre,
165 &sp->subre_len, &sp->subre_c, SEARCH_CSUBST | SEARCH_MSG))
166 return (1);
168 flags = SUB_FIRST;
169 sp->searchdir = FORWARD;
171 re = &sp->re_c;
174 * Get the replacement string.
176 * The special character & (\& if O_MAGIC not set) matches the
177 * entire RE. No handling of & is required here, it's done by
178 * re_sub().
180 * The special character ~ (\~ if O_MAGIC not set) inserts the
181 * previous replacement string into this replacement string.
182 * Count ~'s to figure out how much space we need. We could
183 * special case nonexistent last patterns or whether or not
184 * O_MAGIC is set, but it's probably not worth the effort.
186 * QUOTING NOTE:
188 * Only toss an escaping character if it escapes a delimiter or
189 * if O_MAGIC is set and it escapes a tilde.
191 * !!!
192 * If the entire replacement pattern is "%", then use the last
193 * replacement pattern. This semantic was added to vi in System
194 * V and then percolated elsewhere, presumably around the time
195 * that it was added to their version of ed(1).
197 if (p[0] == L('\0') || p[0] == delim) {
198 if (p[0] == delim)
199 ++p;
200 if (sp->repl != NULL)
201 free(sp->repl);
202 sp->repl = NULL;
203 sp->repl_len = 0;
204 } else if (p[0] == L('%') && (p[1] == L('\0') || p[1] == delim))
205 p += p[1] == delim ? 2 : 1;
206 else {
207 for (rep = p, len = 0;
208 p[0] != L('\0') && p[0] != delim; ++p, ++len)
209 if (p[0] == L('~'))
210 len += sp->repl_len;
211 GET_SPACE_RETW(sp, bp, blen, len);
212 for (t = bp, len = 0, p = rep;;) {
213 if (p[0] == L('\0') || p[0] == delim) {
214 if (p[0] == delim)
215 ++p;
216 break;
218 if (p[0] == L('\\')) {
219 if (p[1] == delim)
220 ++p;
221 else if (p[1] == L('\\')) {
222 *t++ = *p++;
223 ++len;
224 } else if (p[1] == L('~')) {
225 ++p;
226 if (!O_ISSET(sp, O_MAGIC))
227 goto tilde;
229 } else if (p[0] == L('~') && O_ISSET(sp, O_MAGIC)) {
230 tilde: ++p;
231 MEMCPYW(t, sp->repl, sp->repl_len);
232 t += sp->repl_len;
233 len += sp->repl_len;
234 continue;
236 *t++ = *p++;
237 ++len;
239 if ((sp->repl_len = len) != 0) {
240 if (sp->repl != NULL)
241 free(sp->repl);
242 if ((sp->repl = malloc(len * sizeof(CHAR_T))) == NULL) {
243 msgq(sp, M_SYSERR, NULL);
244 FREE_SPACEW(sp, bp, blen);
245 return (1);
247 MEMCPYW(sp->repl, bp, len);
249 FREE_SPACEW(sp, bp, blen);
251 return (s(sp, cmdp, p, re, flags));
255 * ex_subagain --
256 * [line [,line]] & [cgr] [count] [#lp]]
258 * Substitute using the last substitute RE and replacement pattern.
260 * PUBLIC: int ex_subagain __P((SCR *, EXCMD *));
263 ex_subagain(SCR *sp, EXCMD *cmdp)
265 if (sp->subre == NULL) {
266 ex_emsg(sp, NULL, EXM_NOPREVRE);
267 return (1);
269 if (!F_ISSET(sp, SC_RE_SUBST) &&
270 re_compile(sp, sp->subre, sp->subre_len,
271 NULL, NULL, &sp->subre_c, SEARCH_CSUBST | SEARCH_MSG))
272 return (1);
273 return (s(sp,
274 cmdp, cmdp->argc ? cmdp->argv[0]->bp : NULL, &sp->subre_c, 0));
278 * ex_subtilde --
279 * [line [,line]] ~ [cgr] [count] [#lp]]
281 * Substitute using the last RE and last substitute replacement pattern.
283 * PUBLIC: int ex_subtilde __P((SCR *, EXCMD *));
286 ex_subtilde(SCR *sp, EXCMD *cmdp)
288 if (sp->re == NULL) {
289 ex_emsg(sp, NULL, EXM_NOPREVRE);
290 return (1);
292 if (!F_ISSET(sp, SC_RE_SEARCH) && re_compile(sp, sp->re,
293 sp->re_len, NULL, NULL, &sp->re_c, SEARCH_CSEARCH | SEARCH_MSG))
294 return (1);
295 return (s(sp,
296 cmdp, cmdp->argc ? cmdp->argv[0]->bp : NULL, &sp->re_c, 0));
300 * s --
301 * Do the substitution. This stuff is *really* tricky. There are lots of
302 * special cases, and general nastiness. Don't mess with it unless you're
303 * pretty confident.
305 * The nasty part of the substitution is what happens when the replacement
306 * string contains newlines. It's a bit tricky -- consider the information
307 * that has to be retained for "s/f\(o\)o/^M\1^M\1/". The solution here is
308 * to build a set of newline offsets which we use to break the line up later,
309 * when the replacement is done. Don't change it unless you're *damned*
310 * confident.
312 #define NEEDNEWLINE(sp) { \
313 if (sp->newl_len == sp->newl_cnt) { \
314 sp->newl_len += 25; \
315 REALLOC(sp, sp->newl, size_t *, \
316 sp->newl_len * sizeof(size_t)); \
317 if (sp->newl == NULL) { \
318 sp->newl_len = 0; \
319 return (1); \
324 #define BUILD(sp, l, len) { \
325 if (lbclen + (len) > lblen) { \
326 lblen += MAX(lbclen + (len), 256); \
327 REALLOC(sp, lb, CHAR_T *, lblen * sizeof(CHAR_T)); \
328 if (lb == NULL) { \
329 lbclen = 0; \
330 return (1); \
333 MEMCPYW(lb + lbclen, l, len); \
334 lbclen += len; \
337 #define NEEDSP(sp, len, pnt) { \
338 if (lbclen + (len) > lblen) { \
339 lblen += MAX(lbclen + (len), 256); \
340 REALLOC(sp, lb, CHAR_T *, lblen * sizeof(CHAR_T)); \
341 if (lb == NULL) { \
342 lbclen = 0; \
343 return (1); \
345 pnt = lb + lbclen; \
349 static int
350 s(SCR *sp, EXCMD *cmdp, CHAR_T *st, regex_t *re, u_int flags)
352 EVENT ev;
353 MARK from, to;
354 TEXTH tiq;
355 db_recno_t elno, lno, slno;
356 u_long ul;
357 regmatch_t match[10];
358 size_t blen, cnt, last, lbclen, lblen, len, llen;
359 size_t offset, saved_offset, scno;
360 int lflag, nflag, pflag, rflag;
361 int didsub, do_eol_match, eflags, empty_ok, eval;
362 int linechanged, matched, quit, rval;
363 CHAR_T *lb, *bp;
364 enum nresult nret;
366 NEEDFILE(sp, cmdp);
368 slno = sp->lno;
369 scno = sp->cno;
372 * !!!
373 * Historically, the 'g' and 'c' suffices were always toggled as flags,
374 * so ":s/A/B/" was the same as ":s/A/B/ccgg". If O_EDCOMPATIBLE was
375 * not set, they were initialized to 0 for all substitute commands. If
376 * O_EDCOMPATIBLE was set, they were initialized to 0 only if the user
377 * specified substitute/replacement patterns (see ex_s()).
379 if (!O_ISSET(sp, O_EDCOMPATIBLE))
380 sp->c_suffix = sp->g_suffix = 0;
383 * Historic vi permitted the '#', 'l' and 'p' options in vi mode, but
384 * it only displayed the last change. I'd disallow them, but they are
385 * useful in combination with the [v]global commands. In the current
386 * model the problem is combining them with the 'c' flag -- the screen
387 * would have to flip back and forth between the confirm screen and the
388 * ex print screen, which would be pretty awful. We do display all
389 * changes, though, for what that's worth.
391 * !!!
392 * Historic vi was fairly strict about the order of "options", the
393 * count, and "flags". I'm somewhat fuzzy on the difference between
394 * options and flags, anyway, so this is a simpler approach, and we
395 * just take it them in whatever order the user gives them. (The ex
396 * usage statement doesn't reflect this.)
398 lflag = nflag = pflag = rflag = 0;
399 if (st == NULL)
400 goto noargs;
401 for (lno = OOBLNO; *st != '\0'; ++st)
402 switch (*st) {
403 case ' ':
404 case '\t':
405 continue;
406 case '+':
407 ++cmdp->flagoff;
408 break;
409 case '-':
410 --cmdp->flagoff;
411 break;
412 case '0': case '1': case '2': case '3': case '4':
413 case '5': case '6': case '7': case '8': case '9':
414 if (lno != OOBLNO)
415 goto usage;
416 errno = 0;
417 nret = nget_uslong(sp, &ul, st, &st, 10);
418 lno = ul;
419 if (*st == '\0') /* Loop increment correction. */
420 --st;
421 if (nret != NUM_OK) {
422 if (nret == NUM_OVER)
423 msgq(sp, M_ERR, "153|Count overflow");
424 else if (nret == NUM_UNDER)
425 msgq(sp, M_ERR, "154|Count underflow");
426 else
427 msgq(sp, M_SYSERR, NULL);
428 return (1);
431 * In historic vi, the count was inclusive from the
432 * second address.
434 cmdp->addr1.lno = cmdp->addr2.lno;
435 cmdp->addr2.lno += lno - 1;
436 if (!db_exist(sp, cmdp->addr2.lno) &&
437 db_last(sp, &cmdp->addr2.lno))
438 return (1);
439 break;
440 case '#':
441 nflag = 1;
442 break;
443 case 'c':
444 sp->c_suffix = !sp->c_suffix;
446 /* Ex text structure initialization. */
447 if (F_ISSET(sp, SC_EX)) {
448 memset(&tiq, 0, sizeof(TEXTH));
449 TAILQ_INIT(&tiq);
451 break;
452 case 'g':
453 sp->g_suffix = !sp->g_suffix;
454 break;
455 case 'l':
456 lflag = 1;
457 break;
458 case 'p':
459 pflag = 1;
460 break;
461 case 'r':
462 if (LF_ISSET(SUB_FIRST)) {
463 msgq(sp, M_ERR,
464 "155|Regular expression specified; r flag meaningless");
465 return (1);
467 if (!F_ISSET(sp, SC_RE_SEARCH)) {
468 ex_emsg(sp, NULL, EXM_NOPREVRE);
469 return (1);
471 rflag = 1;
472 re = &sp->re_c;
473 break;
474 default:
475 goto usage;
478 if (*st != '\0' || (!rflag && LF_ISSET(SUB_MUSTSETR))) {
479 usage: ex_emsg(sp, cmdp->cmd->usage, EXM_USAGE);
480 return (1);
483 noargs: if (F_ISSET(sp, SC_VI) && sp->c_suffix && (lflag || nflag || pflag)) {
484 msgq(sp, M_ERR,
485 "156|The #, l and p flags may not be combined with the c flag in vi mode");
486 return (1);
490 * bp: if interactive, line cache
491 * blen: if interactive, line cache length
492 * lb: build buffer pointer.
493 * lbclen: current length of built buffer.
494 * lblen; length of build buffer.
496 bp = lb = NULL;
497 blen = lbclen = lblen = 0;
499 /* For each line... */
500 lno = cmdp->addr1.lno == 0 ? 1 : cmdp->addr1.lno;
501 for (matched = quit = 0,
502 elno = cmdp->addr2.lno; !quit && lno <= elno; ++lno) {
504 /* Someone's unhappy, time to stop. */
505 if (INTERRUPTED(sp))
506 break;
508 /* Get the line. */
509 if (db_get(sp, lno, DBG_FATAL, &st, &llen))
510 goto err;
513 * Make a local copy if doing confirmation -- when calling
514 * the confirm routine we're likely to lose the cached copy.
516 if (sp->c_suffix) {
517 if (bp == NULL) {
518 GET_SPACE_RETW(sp, bp, blen, llen);
519 } else
520 ADD_SPACE_RETW(sp, bp, blen, llen);
521 MEMCPYW(bp, st, llen);
522 st = bp;
525 /* Start searching from the beginning. */
526 offset = 0;
527 len = llen;
529 /* Reset the build buffer offset. */
530 lbclen = 0;
532 /* Reset empty match flag. */
533 empty_ok = 1;
536 * We don't want to have to do a setline if the line didn't
537 * change -- keep track of whether or not this line changed.
538 * If doing confirmations, don't want to keep setting the
539 * line if change is refused -- keep track of substitutions.
541 didsub = linechanged = 0;
543 /* New line, do an EOL match. */
544 do_eol_match = 1;
546 /* It's not nul terminated, but we pretend it is. */
547 eflags = REG_STARTEND;
550 * The search area is from st + offset to the EOL.
552 * Generally, match[0].rm_so is the offset of the start
553 * of the match from the start of the search, and offset
554 * is the offset of the start of the last search.
556 nextmatch: match[0].rm_so = 0;
557 match[0].rm_eo = len;
559 /* Get the next match. */
560 eval = regexec(re, st + offset, 10, match, eflags);
563 * There wasn't a match or if there was an error, deal with
564 * it. If there was a previous match in this line, resolve
565 * the changes into the database. Otherwise, just move on.
567 if (eval == REG_NOMATCH)
568 goto endmatch;
569 if (eval != 0) {
570 re_error(sp, eval, re);
571 goto err;
573 matched = 1;
575 /* Only the first search can match an anchored expression. */
576 eflags |= REG_NOTBOL;
579 * !!!
580 * It's possible to match 0-length strings -- for example, the
581 * command s;a*;X;, when matched against the string "aabb" will
582 * result in "XbXbX", i.e. the matches are "aa", the space
583 * between the b's and the space between the b's and the end of
584 * the string. There is a similar space between the beginning
585 * of the string and the a's. The rule that we use (because vi
586 * historically used it) is that any 0-length match, occurring
587 * immediately after a match, is ignored. Otherwise, the above
588 * example would have resulted in "XXbXbX". Another example is
589 * incorrectly using " *" to replace groups of spaces with one
590 * space.
592 * The way we do this is that if we just had a successful match,
593 * the starting offset does not skip characters, and the match
594 * is empty, ignore the match and move forward. If there's no
595 * more characters in the string, we were attempting to match
596 * after the last character, so quit.
598 if (!empty_ok && match[0].rm_so == 0 && match[0].rm_eo == 0) {
599 empty_ok = 1;
600 if (len == 0)
601 goto endmatch;
602 BUILD(sp, st + offset, 1)
603 ++offset;
604 --len;
605 goto nextmatch;
608 /* Confirm change. */
609 if (sp->c_suffix) {
611 * Set the cursor position for confirmation. Note,
612 * if we matched on a '$', the cursor may be past
613 * the end of line.
615 from.lno = to.lno = lno;
616 from.cno = match[0].rm_so + offset;
617 to.cno = match[0].rm_eo + offset;
619 * Both ex and vi have to correct for a change before
620 * the first character in the line.
622 if (llen == 0)
623 from.cno = to.cno = 0;
624 if (F_ISSET(sp, SC_VI)) {
626 * Only vi has to correct for a change after
627 * the last character in the line.
629 * XXX
630 * It would be nice to change the vi code so
631 * that we could display a cursor past EOL.
633 if (to.cno >= llen)
634 to.cno = llen - 1;
635 if (from.cno >= llen)
636 from.cno = llen - 1;
638 sp->lno = from.lno;
639 sp->cno = from.cno;
640 if (vs_refresh(sp, 1))
641 goto err;
643 vs_update(sp, msg_cat(sp,
644 "169|Confirm change? [n]", NULL), NULL);
646 if (v_event_get(sp, &ev, 0, 0))
647 goto err;
648 switch (ev.e_event) {
649 case E_CHARACTER:
650 break;
651 case E_EOF:
652 case E_ERR:
653 case E_INTERRUPT:
654 goto lquit;
655 default:
656 v_event_err(sp, &ev);
657 goto lquit;
659 } else {
660 if (ex_print(sp, cmdp, &from, &to, 0) ||
661 ex_scprint(sp, &from, &to))
662 goto lquit;
663 if (ex_txt(sp, &tiq, 0, TXT_CR))
664 goto err;
665 ev.e_c = TAILQ_FIRST(&tiq)->lb[0];
668 switch (ev.e_c) {
669 case CH_YES:
670 break;
671 default:
672 case CH_NO:
673 didsub = 0;
674 BUILD(sp, st + offset, match[0].rm_eo);
675 goto skip;
676 case CH_QUIT:
677 /* Set the quit/interrupted flags. */
678 lquit: quit = 1;
679 F_SET(sp->gp, G_INTERRUPTED);
682 * Resolve any changes, then return to (and
683 * exit from) the main loop.
685 goto endmatch;
690 * Set the cursor to the last position changed, converting
691 * from 1-based to 0-based.
693 sp->lno = lno;
694 sp->cno = match[0].rm_so;
696 /* Copy the bytes before the match into the build buffer. */
697 BUILD(sp, st + offset, match[0].rm_so);
699 /* Substitute the matching bytes. */
700 didsub = 1;
701 if (re_sub(sp, st + offset, &lb, &lbclen, &lblen, match))
702 goto err;
704 /* Set the change flag so we know this line was modified. */
705 linechanged = 1;
707 /* Move past the matched bytes. */
708 skip: offset += match[0].rm_eo;
709 len -= match[0].rm_eo;
711 /* A match cannot be followed by an empty pattern. */
712 empty_ok = 0;
715 * If doing a global change with confirmation, we have to
716 * update the screen. The basic idea is to store the line
717 * so the screen update routines can find it, and restart.
719 if (didsub && sp->c_suffix && sp->g_suffix) {
721 * The new search offset will be the end of the
722 * modified line.
724 saved_offset = lbclen;
726 /* Copy the rest of the line. */
727 if (len)
728 BUILD(sp, st + offset, len)
730 /* Set the new offset. */
731 offset = saved_offset;
733 /* Store inserted lines, adjusting the build buffer. */
734 last = 0;
735 if (sp->newl_cnt) {
736 for (cnt = 0;
737 cnt < sp->newl_cnt; ++cnt, ++lno, ++elno) {
738 if (db_insert(sp, lno,
739 lb + last, sp->newl[cnt] - last))
740 goto err;
741 last = sp->newl[cnt] + 1;
742 ++sp->rptlines[L_ADDED];
744 lbclen -= last;
745 offset -= last;
746 sp->newl_cnt = 0;
749 /* Store and retrieve the line. */
750 if (db_set(sp, lno, lb + last, lbclen))
751 goto err;
752 if (db_get(sp, lno, DBG_FATAL, &st, &llen))
753 goto err;
754 ADD_SPACE_RETW(sp, bp, blen, llen)
755 MEMCPYW(bp, st, llen);
756 st = bp;
757 len = llen - offset;
759 /* Restart the build. */
760 lbclen = 0;
761 BUILD(sp, st, offset);
764 * If we haven't already done the after-the-string
765 * match, do one. Set REG_NOTEOL so the '$' pattern
766 * only matches once.
768 if (!do_eol_match)
769 goto endmatch;
770 if (offset == len) {
771 do_eol_match = 0;
772 eflags |= REG_NOTEOL;
774 goto nextmatch;
778 * If it's a global:
780 * If at the end of the string, do a test for the after
781 * the string match. Set REG_NOTEOL so the '$' pattern
782 * only matches once.
784 if (sp->g_suffix && do_eol_match) {
785 if (len == 0) {
786 do_eol_match = 0;
787 eflags |= REG_NOTEOL;
789 goto nextmatch;
792 endmatch: if (!linechanged)
793 continue;
795 /* Copy any remaining bytes into the build buffer. */
796 if (len)
797 BUILD(sp, st + offset, len)
799 /* Store inserted lines, adjusting the build buffer. */
800 last = 0;
801 if (sp->newl_cnt) {
802 for (cnt = 0;
803 cnt < sp->newl_cnt; ++cnt, ++lno, ++elno) {
804 if (db_insert(sp,
805 lno, lb + last, sp->newl[cnt] - last))
806 goto err;
807 last = sp->newl[cnt] + 1;
808 ++sp->rptlines[L_ADDED];
810 lbclen -= last;
811 sp->newl_cnt = 0;
814 /* Store the changed line. */
815 if (db_set(sp, lno, lb + last, lbclen))
816 goto err;
818 /* Update changed line counter. */
819 if (sp->rptlchange != lno) {
820 sp->rptlchange = lno;
821 ++sp->rptlines[L_CHANGED];
825 * !!!
826 * Display as necessary. Historic practice is to only
827 * display the last line of a line split into multiple
828 * lines.
830 if (lflag || nflag || pflag) {
831 from.lno = to.lno = lno;
832 from.cno = to.cno = 0;
833 if (lflag)
834 (void)ex_print(sp, cmdp, &from, &to, E_C_LIST);
835 if (nflag)
836 (void)ex_print(sp, cmdp, &from, &to, E_C_HASH);
837 if (pflag)
838 (void)ex_print(sp, cmdp, &from, &to, E_C_PRINT);
843 * !!!
844 * Historically, vi attempted to leave the cursor at the same place if
845 * the substitution was done at the current cursor position. Otherwise
846 * it moved it to the first non-blank of the last line changed. There
847 * were some problems: for example, :s/$/foo/ with the cursor on the
848 * last character of the line left the cursor on the last character, or
849 * the & command with multiple occurrences of the matching string in the
850 * line usually left the cursor in a fairly random position.
852 * We try to do the same thing, with the exception that if the user is
853 * doing substitution with confirmation, we move to the last line about
854 * which the user was consulted, as opposed to the last line that they
855 * actually changed. This prevents a screen flash if the user doesn't
856 * change many of the possible lines.
858 if (!sp->c_suffix && (sp->lno != slno || sp->cno != scno)) {
859 sp->cno = 0;
860 (void)nonblank(sp, sp->lno, &sp->cno);
864 * If not in a global command, and nothing matched, say so.
865 * Else, if none of the lines displayed, put something up.
867 rval = 0;
868 if (!matched) {
869 if (!F_ISSET(sp, SC_EX_GLOBAL)) {
870 msgq(sp, M_ERR, "157|No match found");
871 goto err;
873 } else if (!lflag && !nflag && !pflag)
874 F_SET(cmdp, E_AUTOPRINT);
876 if (0) {
877 err: rval = 1;
880 if (bp != NULL)
881 FREE_SPACEW(sp, bp, blen);
882 if (lb != NULL)
883 free(lb);
884 return (rval);
888 * re_compile --
889 * Compile the RE.
891 * PUBLIC: int re_compile __P((SCR *,
892 * PUBLIC: CHAR_T *, size_t, CHAR_T **, size_t *, regex_t *, u_int));
895 re_compile(SCR *sp, CHAR_T *ptrn, size_t plen, CHAR_T **ptrnp, size_t *lenp, regex_t *rep, u_int flags)
897 size_t len;
898 int reflags, replaced, rval;
899 CHAR_T *p;
901 /* Set RE flags. */
902 reflags = 0;
903 if (LF_ISSET(SEARCH_EXTEND))
904 reflags |= REG_EXTENDED;
905 if (LF_ISSET(SEARCH_IC))
906 reflags |= REG_ICASE;
907 if (LF_ISSET(SEARCH_LITERAL))
908 reflags |= REG_NOSPEC;
909 if (!LF_ISSET(SEARCH_NOOPT | SEARCH_CSCOPE | SEARCH_TAG)) {
910 if (O_ISSET(sp, O_EXTENDED))
911 reflags |= REG_EXTENDED;
912 if (O_ISSET(sp, O_IGNORECASE))
913 reflags |= REG_ICASE;
914 if (O_ISSET(sp, O_ICLOWER))
915 goto iclower;
917 if (LF_ISSET(SEARCH_ICL)) {
918 iclower: for (p = ptrn, len = plen; len > 0; ++p, --len)
919 if (ISUPPER((UCHAR_T)*p))
920 break;
921 if (len == 0)
922 reflags |= REG_ICASE;
925 /* If we're replacing a saved value, clear the old one. */
926 if (LF_ISSET(SEARCH_CSEARCH) && F_ISSET(sp, SC_RE_SEARCH)) {
927 regfree(&sp->re_c);
928 F_CLR(sp, SC_RE_SEARCH);
930 if (LF_ISSET(SEARCH_CSUBST) && F_ISSET(sp, SC_RE_SUBST)) {
931 regfree(&sp->subre_c);
932 F_CLR(sp, SC_RE_SUBST);
936 * If we're saving the string, it's a pattern we haven't seen before,
937 * so convert the vi-style RE's to POSIX 1003.2 RE's. Save a copy for
938 * later recompilation. Free any previously saved value.
940 if (ptrnp != NULL) {
941 replaced = 0;
942 if (LF_ISSET(SEARCH_CSCOPE)) {
943 if (re_cscope_conv(sp, &ptrn, &plen, &replaced))
944 return (1);
946 * XXX
947 * Currently, the match-any-<blank> expression used in
948 * re_cscope_conv() requires extended RE's. This may
949 * not be right or safe.
951 reflags |= REG_EXTENDED;
952 } else if (LF_ISSET(SEARCH_TAG)) {
953 if (re_tag_conv(sp, &ptrn, &plen, &replaced))
954 return (1);
955 } else if (!LF_ISSET(SEARCH_LITERAL))
956 if (re_conv(sp, &ptrn, &plen, &replaced))
957 return (1);
959 /* Discard previous pattern. */
960 if (*ptrnp != NULL) {
961 free(*ptrnp);
962 *ptrnp = NULL;
964 if (lenp != NULL)
965 *lenp = plen;
968 * Copy the string into allocated memory.
970 * XXX
971 * Regcomp isn't 8-bit clean, so the pattern is nul-terminated
972 * for now. There's just no other solution.
974 MALLOC(sp, *ptrnp, CHAR_T *, (plen + 1) * sizeof(CHAR_T));
975 if (*ptrnp != NULL) {
976 MEMCPYW(*ptrnp, ptrn, plen);
977 (*ptrnp)[plen] = '\0';
980 /* Free up conversion-routine-allocated memory. */
981 if (replaced)
982 FREE_SPACEW(sp, ptrn, 0);
984 if (*ptrnp == NULL)
985 return (1);
987 ptrn = *ptrnp;
991 * XXX
992 * Regcomp isn't 8-bit clean, so we just lost if the pattern
993 * contained a nul. Bummer!
995 if ((rval = regcomp(rep, ptrn, /* plen, */ reflags)) != 0) {
996 if (LF_ISSET(SEARCH_MSG))
997 re_error(sp, rval, rep);
998 return (1);
1001 if (LF_ISSET(SEARCH_CSEARCH))
1002 F_SET(sp, SC_RE_SEARCH);
1003 if (LF_ISSET(SEARCH_CSUBST))
1004 F_SET(sp, SC_RE_SUBST);
1006 return (0);
1010 * re_conv --
1011 * Convert vi's regular expressions into something that the
1012 * the POSIX 1003.2 RE functions can handle.
1014 * There are three conversions we make to make vi's RE's (specifically
1015 * the global, search, and substitute patterns) work with POSIX RE's.
1017 * 1: If O_MAGIC is not set, strip backslashes from the magic character
1018 * set (.[*~) that have them, and add them to the ones that don't.
1019 * 2: If O_MAGIC is not set, the string "\~" is replaced with the text
1020 * from the last substitute command's replacement string. If O_MAGIC
1021 * is set, it's the string "~".
1022 * 3: The pattern \<ptrn\> does "word" searches, convert it to use the
1023 * new RE escapes.
1025 * !!!/XXX
1026 * This doesn't exactly match the historic behavior of vi because we do
1027 * the ~ substitution before calling the RE engine, so magic characters
1028 * in the replacement string will be expanded by the RE engine, and they
1029 * weren't historically. It's a bug.
1031 static int
1032 re_conv(SCR *sp, CHAR_T **ptrnp, size_t *plenp, int *replacedp)
1034 size_t blen, len, needlen;
1035 int magic;
1036 CHAR_T *bp, *p, *t;
1039 * First pass through, we figure out how much space we'll need.
1040 * We do it in two passes, on the grounds that most of the time
1041 * the user is doing a search and won't have magic characters.
1042 * That way we can skip most of the memory allocation and copies.
1044 magic = 0;
1045 for (p = *ptrnp, len = *plenp, needlen = 0; len > 0; ++p, --len)
1046 switch (*p) {
1047 case '\\':
1048 if (len > 1) {
1049 --len;
1050 switch (*++p) {
1051 case '<':
1052 magic = 1;
1053 needlen += RE_WSTART_LEN + 1;
1054 break;
1055 case '>':
1056 magic = 1;
1057 needlen += RE_WSTOP_LEN + 1;
1058 break;
1059 case '~':
1060 if (!O_ISSET(sp, O_MAGIC)) {
1061 magic = 1;
1062 needlen += sp->repl_len;
1064 break;
1065 case '.':
1066 case '[':
1067 case '*':
1068 if (!O_ISSET(sp, O_MAGIC)) {
1069 magic = 1;
1070 needlen += 1;
1072 break;
1073 default:
1074 needlen += 2;
1076 } else
1077 needlen += 1;
1078 break;
1079 case '~':
1080 if (O_ISSET(sp, O_MAGIC)) {
1081 magic = 1;
1082 needlen += sp->repl_len;
1084 break;
1085 case '.':
1086 case '[':
1087 case '*':
1088 if (!O_ISSET(sp, O_MAGIC)) {
1089 magic = 1;
1090 needlen += 2;
1092 break;
1093 default:
1094 needlen += 1;
1095 break;
1098 if (!magic) {
1099 *replacedp = 0;
1100 return (0);
1103 /* Get enough memory to hold the final pattern. */
1104 *replacedp = 1;
1105 GET_SPACE_RETW(sp, bp, blen, needlen);
1107 for (p = *ptrnp, len = *plenp, t = bp; len > 0; ++p, --len)
1108 switch (*p) {
1109 case '\\':
1110 if (len > 1) {
1111 --len;
1112 switch (*++p) {
1113 case '<':
1114 MEMCPY(t,
1115 RE_WSTART, RE_WSTART_LEN);
1116 t += RE_WSTART_LEN;
1117 break;
1118 case '>':
1119 MEMCPY(t,
1120 RE_WSTOP, RE_WSTOP_LEN);
1121 t += RE_WSTOP_LEN;
1122 break;
1123 case '~':
1124 if (O_ISSET(sp, O_MAGIC))
1125 *t++ = '~';
1126 else {
1127 MEMCPYW(t,
1128 sp->repl, sp->repl_len);
1129 t += sp->repl_len;
1131 break;
1132 case '.':
1133 case '[':
1134 case '*':
1135 if (O_ISSET(sp, O_MAGIC))
1136 *t++ = '\\';
1137 *t++ = *p;
1138 break;
1139 default:
1140 *t++ = '\\';
1141 *t++ = *p;
1143 } else
1144 *t++ = '\\';
1145 break;
1146 case '~':
1147 if (O_ISSET(sp, O_MAGIC)) {
1148 MEMCPYW(t, sp->repl, sp->repl_len);
1149 t += sp->repl_len;
1150 } else
1151 *t++ = '~';
1152 break;
1153 case '.':
1154 case '[':
1155 case '*':
1156 if (!O_ISSET(sp, O_MAGIC))
1157 *t++ = '\\';
1158 *t++ = *p;
1159 break;
1160 default:
1161 *t++ = *p;
1162 break;
1165 *ptrnp = bp;
1166 *plenp = t - bp;
1167 return (0);
1171 * re_tag_conv --
1172 * Convert a tags search path into something that the POSIX
1173 * 1003.2 RE functions can handle.
1175 static int
1176 re_tag_conv(SCR *sp, CHAR_T **ptrnp, size_t *plenp, int *replacedp)
1178 size_t blen, len;
1179 int lastdollar;
1180 CHAR_T *bp, *p, *t;
1182 len = *plenp;
1184 /* Max memory usage is 2 times the length of the string. */
1185 *replacedp = 1;
1186 GET_SPACE_RETW(sp, bp, blen, len * 2);
1188 p = *ptrnp;
1189 t = bp;
1191 /* If the last character is a '/' or '?', we just strip it. */
1192 if (len > 0 && (p[len - 1] == '/' || p[len - 1] == '?'))
1193 --len;
1195 /* If the next-to-last or last character is a '$', it's magic. */
1196 if (len > 0 && p[len - 1] == '$') {
1197 --len;
1198 lastdollar = 1;
1199 } else
1200 lastdollar = 0;
1202 /* If the first character is a '/' or '?', we just strip it. */
1203 if (len > 0 && (p[0] == '/' || p[0] == '?')) {
1204 ++p;
1205 --len;
1208 /* If the first or second character is a '^', it's magic. */
1209 if (p[0] == '^') {
1210 *t++ = *p++;
1211 --len;
1215 * Escape every other magic character we can find, meanwhile stripping
1216 * the backslashes ctags inserts when escaping the search delimiter
1217 * characters.
1219 for (; len > 0; --len) {
1220 if (p[0] == '\\' && (p[1] == '/' || p[1] == '?')) {
1221 ++p;
1222 --len;
1223 } else if (strchr("^.[]$*", p[0]))
1224 *t++ = '\\';
1225 *t++ = *p++;
1227 if (lastdollar)
1228 *t++ = '$';
1230 *ptrnp = bp;
1231 *plenp = t - bp;
1232 return (0);
1236 * re_cscope_conv --
1237 * Convert a cscope search path into something that the POSIX
1238 * 1003.2 RE functions can handle.
1240 static int
1241 re_cscope_conv(SCR *sp, CHAR_T **ptrnp, size_t *plenp, int *replacedp)
1243 size_t blen, len, nspaces;
1244 CHAR_T *bp, *t;
1245 CHAR_T *p;
1246 const CHAR_T *wp;
1247 size_t wlen;
1250 * Each space in the source line printed by cscope represents an
1251 * arbitrary sequence of spaces, tabs, and comments.
1253 #define CSCOPE_RE_SPACE "([ \t]|/\\*([^*]|\\*/)*\\*/)*"
1254 #define CSCOPE_LEN sizeof(CSCOPE_RE_SPACE) - 1
1255 CHAR2INT(sp, CSCOPE_RE_SPACE, CSCOPE_LEN, wp, wlen);
1256 for (nspaces = 0, p = *ptrnp, len = *plenp; len > 0; ++p, --len)
1257 if (*p == ' ')
1258 ++nspaces;
1261 * Allocate plenty of space:
1262 * the string, plus potential escaping characters;
1263 * nspaces + 2 copies of CSCOPE_RE_SPACE;
1264 * ^, $, nul terminator characters.
1266 *replacedp = 1;
1267 len = (p - *ptrnp) * 2 + (nspaces + 2) * sizeof(CSCOPE_RE_SPACE) + 3;
1268 GET_SPACE_RETW(sp, bp, blen, len);
1270 p = *ptrnp;
1271 t = bp;
1273 *t++ = '^';
1274 MEMCPYW(t, wp, wlen);
1275 t += wlen;
1277 for (len = *plenp; len > 0; ++p, --len)
1278 if (*p == ' ') {
1279 MEMCPYW(t, wp, wlen);
1280 t += wlen;
1281 } else {
1282 if (strchr("\\^.[]$*+?()|{}", *p))
1283 *t++ = '\\';
1284 *t++ = *p;
1287 MEMCPYW(t, wp, wlen);
1288 t += wlen;
1289 *t++ = '$';
1291 *ptrnp = bp;
1292 *plenp = t - bp;
1293 return (0);
1297 * re_error --
1298 * Report a regular expression error.
1300 * PUBLIC: void re_error __P((SCR *, int, regex_t *));
1302 void
1303 re_error(SCR *sp, int errcode, regex_t *preg)
1305 size_t sz;
1306 char *oe;
1308 sz = regerror(errcode, preg, NULL, 0);
1309 if ((oe = malloc(sz)) == NULL)
1310 msgq(sp, M_SYSERR, NULL);
1311 else {
1312 (void)regerror(errcode, preg, oe, sz);
1313 msgq(sp, M_ERR, "RE error: %s", oe);
1314 free(oe);
1319 * re_sub --
1320 * Do the substitution for a regular expression.
1322 static int
1323 re_sub(SCR *sp, CHAR_T *ip, CHAR_T **lbp, size_t *lbclenp, size_t *lblenp, regmatch_t *match)
1325 /* Input line. */
1330 enum { C_NOT_SET, C_LOWER, C_ONE_LOWER, C_ONE_UPPER, C_UPPER } conv;
1331 size_t lbclen, lblen; /* Local copies. */
1332 size_t mlen; /* Match length. */
1333 size_t rpl; /* Remaining replacement length. */
1334 CHAR_T *rp; /* Replacement pointer. */
1335 int ch;
1336 int no; /* Match replacement offset. */
1337 CHAR_T *p, *t; /* Buffer pointers. */
1338 CHAR_T *lb; /* Local copies. */
1340 lb = *lbp; /* Get local copies. */
1341 lbclen = *lbclenp;
1342 lblen = *lblenp;
1345 * QUOTING NOTE:
1347 * There are some special sequences that vi provides in the
1348 * replacement patterns.
1349 * & string the RE matched (\& if nomagic set)
1350 * \# n-th regular subexpression
1351 * \E end \U, \L conversion
1352 * \e end \U, \L conversion
1353 * \l convert the next character to lower-case
1354 * \L convert to lower-case, until \E, \e, or end of replacement
1355 * \u convert the next character to upper-case
1356 * \U convert to upper-case, until \E, \e, or end of replacement
1358 * Otherwise, since this is the lowest level of replacement, discard
1359 * all escaping characters. This (hopefully) matches historic practice.
1361 #define OUTCH(ch, nltrans) { \
1362 ARG_CHAR_T __ch = (ch); \
1363 e_key_t __value = KEY_VAL(sp, __ch); \
1364 if (nltrans && (__value == K_CR || __value == K_NL)) { \
1365 NEEDNEWLINE(sp); \
1366 sp->newl[sp->newl_cnt++] = lbclen; \
1367 } else if (conv != C_NOT_SET) { \
1368 switch (conv) { \
1369 case C_ONE_LOWER: \
1370 conv = C_NOT_SET; \
1371 /* FALLTHROUGH */ \
1372 case C_LOWER: \
1373 if (ISUPPER(__ch)) \
1374 __ch = TOLOWER(__ch); \
1375 break; \
1376 case C_ONE_UPPER: \
1377 conv = C_NOT_SET; \
1378 /* FALLTHROUGH */ \
1379 case C_UPPER: \
1380 if (ISLOWER(__ch)) \
1381 __ch = TOUPPER(__ch); \
1382 break; \
1383 default: \
1384 abort(); \
1387 NEEDSP(sp, 1, p); \
1388 *p++ = __ch; \
1389 ++lbclen; \
1391 conv = C_NOT_SET;
1392 for (rp = sp->repl, rpl = sp->repl_len, p = lb + lbclen; rpl--;) {
1393 switch (ch = *rp++) {
1394 case '&':
1395 if (O_ISSET(sp, O_MAGIC)) {
1396 no = 0;
1397 goto subzero;
1399 break;
1400 case '\\':
1401 if (rpl == 0)
1402 break;
1403 --rpl;
1404 switch (ch = *rp) {
1405 case '&':
1406 ++rp;
1407 if (!O_ISSET(sp, O_MAGIC)) {
1408 no = 0;
1409 goto subzero;
1411 break;
1412 case '0': case '1': case '2': case '3': case '4':
1413 case '5': case '6': case '7': case '8': case '9':
1414 no = *rp++ - '0';
1415 subzero: if (match[no].rm_so == -1 ||
1416 match[no].rm_eo == -1)
1417 break;
1418 mlen = match[no].rm_eo - match[no].rm_so;
1419 for (t = ip + match[no].rm_so; mlen--; ++t)
1420 OUTCH((UCHAR_T)*t, 0);
1421 continue;
1422 case 'e':
1423 case 'E':
1424 ++rp;
1425 conv = C_NOT_SET;
1426 continue;
1427 case 'l':
1428 ++rp;
1429 conv = C_ONE_LOWER;
1430 continue;
1431 case 'L':
1432 ++rp;
1433 conv = C_LOWER;
1434 continue;
1435 case 'u':
1436 ++rp;
1437 conv = C_ONE_UPPER;
1438 continue;
1439 case 'U':
1440 ++rp;
1441 conv = C_UPPER;
1442 continue;
1443 default:
1444 ++rp;
1445 break;
1448 OUTCH(ch, 1);
1451 *lbp = lb; /* Update caller's information. */
1452 *lbclenp = lbclen;
1453 *lblenp = lblen;
1454 return (0);