1 /* $NetBSD: ex_subst.c,v 1.4 2014/01/26 21:43:45 christos Exp $ */
3 * Copyright (c) 1992, 1993, 1994
4 * The Regents of the University of California. All rights reserved.
5 * Copyright (c) 1992, 1993, 1994, 1995, 1996
6 * Keith Bostic. All rights reserved.
8 * See the LICENSE file for redistribution information.
13 #include <sys/cdefs.h>
16 static const char sccsid
[] = "Id: ex_subst.c,v 10.50 2002/02/09 21:18:23 skimo Exp (Berkeley) Date: 2002/02/09 21:18:23 ";
19 __RCSID("$NetBSD: ex_subst.c,v 1.4 2014/01/26 21:43:45 christos Exp $");
22 #include <sys/types.h>
23 #include <sys/queue.h>
26 #include <bitstring.h>
35 #include "../common/common.h"
38 #define SUB_FIRST 0x01 /* The 'r' flag isn't reasonable. */
39 #define SUB_MUSTSETR 0x02 /* The 'r' flag is required. */
41 static int re_conv
__P((SCR
*, CHAR_T
**, size_t *, int *));
42 static int re_cscope_conv
__P((SCR
*, CHAR_T
**, size_t *, int *));
43 static int re_sub
__P((SCR
*,
44 CHAR_T
*, CHAR_T
**, size_t *, size_t *, regmatch_t
[10]));
45 static int re_tag_conv
__P((SCR
*, CHAR_T
**, size_t *, int *));
46 static int s
__P((SCR
*, EXCMD
*, CHAR_T
*, regex_t
*, u_int
));
50 * [line [,line]] s[ubstitute] [[/;]pat[/;]/repl[/;] [cgr] [count] [#lp]]
52 * Substitute on lines matching a pattern.
54 * PUBLIC: int ex_s __P((SCR *, EXCMD *));
57 ex_s(SCR
*sp
, EXCMD
*cmdp
)
63 CHAR_T
*bp
, *p
, *ptrn
, *rep
, *t
;
66 * Skip leading white space.
69 * Historic vi allowed any non-alphanumeric to serve as the
70 * substitution command delimiter.
73 * If the arguments are empty, it's the same as &, i.e. we
74 * repeat the last substitution.
78 for (p
= cmdp
->argv
[0]->bp
,
79 len
= cmdp
->argv
[0]->len
; len
> 0; --len
, ++p
) {
80 if (!ISBLANK((UCHAR_T
)*p
))
84 subagain
: return (ex_subagain(sp
, cmdp
));
86 delim
= (UCHAR_T
)*p
++;
87 if (ISALNUM(delim
) || delim
== '\\')
88 return (s(sp
, cmdp
, p
, &sp
->subre_c
, SUB_MUSTSETR
));
92 * The full-blown substitute command reset the remembered
93 * state of the 'c' and 'g' suffices.
95 sp
->c_suffix
= sp
->g_suffix
= 0;
98 * Get the pattern string, toss escaping characters.
101 * Historic vi accepted any of the following forms:
103 * :s/abc/def/ change "abc" to "def"
104 * :s/abc/def change "abc" to "def"
105 * :s/abc/ delete "abc"
106 * :s/abc delete "abc"
110 * Only toss an escaping character if it escapes a delimiter.
111 * This means that "s/A/\\\\f" replaces "A" with "\\f". It
112 * would be nice to be more regular, i.e. for each layer of
113 * escaping a single escaping character is removed, but that's
114 * not how the historic vi worked.
116 for (ptrn
= t
= p
;;) {
117 if (p
[0] == '\0' || p
[0] == delim
) {
122 * Nul terminate the pattern string -- it's passed
123 * to regcomp which doesn't understand anything else.
131 else if (p
[1] == '\\')
138 * If the pattern string is empty, use the last RE (not just the
139 * last substitution RE).
142 if (sp
->re
== NULL
) {
143 ex_emsg(sp
, NULL
, EXM_NOPREVRE
);
147 /* Re-compile the RE if necessary. */
148 if (!F_ISSET(sp
, SC_RE_SEARCH
) &&
149 re_compile(sp
, sp
->re
, sp
->re_len
,
150 NULL
, NULL
, &sp
->re_c
, SEARCH_CSEARCH
| SEARCH_MSG
))
156 * Compile the RE. Historic practice is that substitutes set
157 * the search direction as well as both substitute and search
158 * RE's. We compile the RE twice, as we don't want to bother
159 * ref counting the pattern string and (opaque) structure.
161 if (re_compile(sp
, ptrn
, t
- ptrn
, &sp
->re
,
162 &sp
->re_len
, &sp
->re_c
, SEARCH_CSEARCH
| SEARCH_MSG
))
164 if (re_compile(sp
, ptrn
, t
- ptrn
, &sp
->subre
,
165 &sp
->subre_len
, &sp
->subre_c
, SEARCH_CSUBST
| SEARCH_MSG
))
169 sp
->searchdir
= FORWARD
;
174 * Get the replacement string.
176 * The special character & (\& if O_MAGIC not set) matches the
177 * entire RE. No handling of & is required here, it's done by
180 * The special character ~ (\~ if O_MAGIC not set) inserts the
181 * previous replacement string into this replacement string.
182 * Count ~'s to figure out how much space we need. We could
183 * special case nonexistent last patterns or whether or not
184 * O_MAGIC is set, but it's probably not worth the effort.
188 * Only toss an escaping character if it escapes a delimiter or
189 * if O_MAGIC is set and it escapes a tilde.
192 * If the entire replacement pattern is "%", then use the last
193 * replacement pattern. This semantic was added to vi in System
194 * V and then percolated elsewhere, presumably around the time
195 * that it was added to their version of ed(1).
197 if (p
[0] == L('\0') || p
[0] == delim
) {
200 if (sp
->repl
!= NULL
)
204 } else if (p
[0] == L('%') && (p
[1] == L('\0') || p
[1] == delim
))
205 p
+= p
[1] == delim
? 2 : 1;
207 for (rep
= p
, len
= 0;
208 p
[0] != L('\0') && p
[0] != delim
; ++p
, ++len
)
211 GET_SPACE_RETW(sp
, bp
, blen
, len
);
212 for (t
= bp
, len
= 0, p
= rep
;;) {
213 if (p
[0] == L('\0') || p
[0] == delim
) {
218 if (p
[0] == L('\\')) {
221 else if (p
[1] == L('\\')) {
224 } else if (p
[1] == L('~')) {
226 if (!O_ISSET(sp
, O_MAGIC
))
229 } else if (p
[0] == L('~') && O_ISSET(sp
, O_MAGIC
)) {
231 MEMCPYW(t
, sp
->repl
, sp
->repl_len
);
239 if ((sp
->repl_len
= len
) != 0) {
240 if (sp
->repl
!= NULL
)
242 if ((sp
->repl
= malloc(len
* sizeof(CHAR_T
))) == NULL
) {
243 msgq(sp
, M_SYSERR
, NULL
);
244 FREE_SPACEW(sp
, bp
, blen
);
247 MEMCPYW(sp
->repl
, bp
, len
);
249 FREE_SPACEW(sp
, bp
, blen
);
251 return (s(sp
, cmdp
, p
, re
, flags
));
256 * [line [,line]] & [cgr] [count] [#lp]]
258 * Substitute using the last substitute RE and replacement pattern.
260 * PUBLIC: int ex_subagain __P((SCR *, EXCMD *));
263 ex_subagain(SCR
*sp
, EXCMD
*cmdp
)
265 if (sp
->subre
== NULL
) {
266 ex_emsg(sp
, NULL
, EXM_NOPREVRE
);
269 if (!F_ISSET(sp
, SC_RE_SUBST
) &&
270 re_compile(sp
, sp
->subre
, sp
->subre_len
,
271 NULL
, NULL
, &sp
->subre_c
, SEARCH_CSUBST
| SEARCH_MSG
))
274 cmdp
, cmdp
->argc
? cmdp
->argv
[0]->bp
: NULL
, &sp
->subre_c
, 0));
279 * [line [,line]] ~ [cgr] [count] [#lp]]
281 * Substitute using the last RE and last substitute replacement pattern.
283 * PUBLIC: int ex_subtilde __P((SCR *, EXCMD *));
286 ex_subtilde(SCR
*sp
, EXCMD
*cmdp
)
288 if (sp
->re
== NULL
) {
289 ex_emsg(sp
, NULL
, EXM_NOPREVRE
);
292 if (!F_ISSET(sp
, SC_RE_SEARCH
) && re_compile(sp
, sp
->re
,
293 sp
->re_len
, NULL
, NULL
, &sp
->re_c
, SEARCH_CSEARCH
| SEARCH_MSG
))
296 cmdp
, cmdp
->argc
? cmdp
->argv
[0]->bp
: NULL
, &sp
->re_c
, 0));
301 * Do the substitution. This stuff is *really* tricky. There are lots of
302 * special cases, and general nastiness. Don't mess with it unless you're
305 * The nasty part of the substitution is what happens when the replacement
306 * string contains newlines. It's a bit tricky -- consider the information
307 * that has to be retained for "s/f\(o\)o/^M\1^M\1/". The solution here is
308 * to build a set of newline offsets which we use to break the line up later,
309 * when the replacement is done. Don't change it unless you're *damned*
312 #define NEEDNEWLINE(sp) { \
313 if (sp->newl_len == sp->newl_cnt) { \
314 sp->newl_len += 25; \
315 REALLOC(sp, sp->newl, size_t *, \
316 sp->newl_len * sizeof(size_t)); \
317 if (sp->newl == NULL) { \
324 #define BUILD(sp, l, len) { \
325 if (lbclen + (len) > lblen) { \
326 lblen += MAX(lbclen + (len), 256); \
327 REALLOC(sp, lb, CHAR_T *, lblen * sizeof(CHAR_T)); \
333 MEMCPYW(lb + lbclen, l, len); \
337 #define NEEDSP(sp, len, pnt) { \
338 if (lbclen + (len) > lblen) { \
339 lblen += MAX(lbclen + (len), 256); \
340 REALLOC(sp, lb, CHAR_T *, lblen * sizeof(CHAR_T)); \
350 s(SCR
*sp
, EXCMD
*cmdp
, CHAR_T
*st
, regex_t
*re
, u_int flags
)
355 db_recno_t elno
, lno
, slno
;
357 regmatch_t match
[10];
358 size_t blen
, cnt
, last
, lbclen
, lblen
, len
, llen
;
359 size_t offset
, saved_offset
, scno
;
360 int lflag
, nflag
, pflag
, rflag
;
361 int didsub
, do_eol_match
, eflags
, empty_ok
, eval
;
362 int linechanged
, matched
, quit
, rval
;
373 * Historically, the 'g' and 'c' suffices were always toggled as flags,
374 * so ":s/A/B/" was the same as ":s/A/B/ccgg". If O_EDCOMPATIBLE was
375 * not set, they were initialized to 0 for all substitute commands. If
376 * O_EDCOMPATIBLE was set, they were initialized to 0 only if the user
377 * specified substitute/replacement patterns (see ex_s()).
379 if (!O_ISSET(sp
, O_EDCOMPATIBLE
))
380 sp
->c_suffix
= sp
->g_suffix
= 0;
383 * Historic vi permitted the '#', 'l' and 'p' options in vi mode, but
384 * it only displayed the last change. I'd disallow them, but they are
385 * useful in combination with the [v]global commands. In the current
386 * model the problem is combining them with the 'c' flag -- the screen
387 * would have to flip back and forth between the confirm screen and the
388 * ex print screen, which would be pretty awful. We do display all
389 * changes, though, for what that's worth.
392 * Historic vi was fairly strict about the order of "options", the
393 * count, and "flags". I'm somewhat fuzzy on the difference between
394 * options and flags, anyway, so this is a simpler approach, and we
395 * just take it them in whatever order the user gives them. (The ex
396 * usage statement doesn't reflect this.)
398 lflag
= nflag
= pflag
= rflag
= 0;
401 for (lno
= OOBLNO
; *st
!= '\0'; ++st
)
412 case '0': case '1': case '2': case '3': case '4':
413 case '5': case '6': case '7': case '8': case '9':
417 nret
= nget_uslong(sp
, &ul
, st
, &st
, 10);
419 if (*st
== '\0') /* Loop increment correction. */
421 if (nret
!= NUM_OK
) {
422 if (nret
== NUM_OVER
)
423 msgq(sp
, M_ERR
, "153|Count overflow");
424 else if (nret
== NUM_UNDER
)
425 msgq(sp
, M_ERR
, "154|Count underflow");
427 msgq(sp
, M_SYSERR
, NULL
);
431 * In historic vi, the count was inclusive from the
434 cmdp
->addr1
.lno
= cmdp
->addr2
.lno
;
435 cmdp
->addr2
.lno
+= lno
- 1;
436 if (!db_exist(sp
, cmdp
->addr2
.lno
) &&
437 db_last(sp
, &cmdp
->addr2
.lno
))
444 sp
->c_suffix
= !sp
->c_suffix
;
446 /* Ex text structure initialization. */
447 if (F_ISSET(sp
, SC_EX
)) {
448 memset(&tiq
, 0, sizeof(TEXTH
));
453 sp
->g_suffix
= !sp
->g_suffix
;
462 if (LF_ISSET(SUB_FIRST
)) {
464 "155|Regular expression specified; r flag meaningless");
467 if (!F_ISSET(sp
, SC_RE_SEARCH
)) {
468 ex_emsg(sp
, NULL
, EXM_NOPREVRE
);
478 if (*st
!= '\0' || (!rflag
&& LF_ISSET(SUB_MUSTSETR
))) {
479 usage
: ex_emsg(sp
, cmdp
->cmd
->usage
, EXM_USAGE
);
483 noargs
: if (F_ISSET(sp
, SC_VI
) && sp
->c_suffix
&& (lflag
|| nflag
|| pflag
)) {
485 "156|The #, l and p flags may not be combined with the c flag in vi mode");
490 * bp: if interactive, line cache
491 * blen: if interactive, line cache length
492 * lb: build buffer pointer.
493 * lbclen: current length of built buffer.
494 * lblen; length of build buffer.
497 blen
= lbclen
= lblen
= 0;
499 /* For each line... */
500 lno
= cmdp
->addr1
.lno
== 0 ? 1 : cmdp
->addr1
.lno
;
501 for (matched
= quit
= 0,
502 elno
= cmdp
->addr2
.lno
; !quit
&& lno
<= elno
; ++lno
) {
504 /* Someone's unhappy, time to stop. */
509 if (db_get(sp
, lno
, DBG_FATAL
, &st
, &llen
))
513 * Make a local copy if doing confirmation -- when calling
514 * the confirm routine we're likely to lose the cached copy.
518 GET_SPACE_RETW(sp
, bp
, blen
, llen
);
520 ADD_SPACE_RETW(sp
, bp
, blen
, llen
);
521 MEMCPYW(bp
, st
, llen
);
525 /* Start searching from the beginning. */
529 /* Reset the build buffer offset. */
532 /* Reset empty match flag. */
536 * We don't want to have to do a setline if the line didn't
537 * change -- keep track of whether or not this line changed.
538 * If doing confirmations, don't want to keep setting the
539 * line if change is refused -- keep track of substitutions.
541 didsub
= linechanged
= 0;
543 /* New line, do an EOL match. */
546 /* It's not nul terminated, but we pretend it is. */
547 eflags
= REG_STARTEND
;
550 * The search area is from st + offset to the EOL.
552 * Generally, match[0].rm_so is the offset of the start
553 * of the match from the start of the search, and offset
554 * is the offset of the start of the last search.
556 nextmatch
: match
[0].rm_so
= 0;
557 match
[0].rm_eo
= len
;
559 /* Get the next match. */
560 eval
= regexec(re
, st
+ offset
, 10, match
, eflags
);
563 * There wasn't a match or if there was an error, deal with
564 * it. If there was a previous match in this line, resolve
565 * the changes into the database. Otherwise, just move on.
567 if (eval
== REG_NOMATCH
)
570 re_error(sp
, eval
, re
);
575 /* Only the first search can match an anchored expression. */
576 eflags
|= REG_NOTBOL
;
580 * It's possible to match 0-length strings -- for example, the
581 * command s;a*;X;, when matched against the string "aabb" will
582 * result in "XbXbX", i.e. the matches are "aa", the space
583 * between the b's and the space between the b's and the end of
584 * the string. There is a similar space between the beginning
585 * of the string and the a's. The rule that we use (because vi
586 * historically used it) is that any 0-length match, occurring
587 * immediately after a match, is ignored. Otherwise, the above
588 * example would have resulted in "XXbXbX". Another example is
589 * incorrectly using " *" to replace groups of spaces with one
592 * The way we do this is that if we just had a successful match,
593 * the starting offset does not skip characters, and the match
594 * is empty, ignore the match and move forward. If there's no
595 * more characters in the string, we were attempting to match
596 * after the last character, so quit.
598 if (!empty_ok
&& match
[0].rm_so
== 0 && match
[0].rm_eo
== 0) {
602 BUILD(sp
, st
+ offset
, 1)
608 /* Confirm change. */
611 * Set the cursor position for confirmation. Note,
612 * if we matched on a '$', the cursor may be past
615 from
.lno
= to
.lno
= lno
;
616 from
.cno
= match
[0].rm_so
+ offset
;
617 to
.cno
= match
[0].rm_eo
+ offset
;
619 * Both ex and vi have to correct for a change before
620 * the first character in the line.
623 from
.cno
= to
.cno
= 0;
624 if (F_ISSET(sp
, SC_VI
)) {
626 * Only vi has to correct for a change after
627 * the last character in the line.
630 * It would be nice to change the vi code so
631 * that we could display a cursor past EOL.
635 if (from
.cno
>= llen
)
640 if (vs_refresh(sp
, 1))
643 vs_update(sp
, msg_cat(sp
,
644 "169|Confirm change? [n]", NULL
), NULL
);
646 if (v_event_get(sp
, &ev
, 0, 0))
648 switch (ev
.e_event
) {
656 v_event_err(sp
, &ev
);
660 if (ex_print(sp
, cmdp
, &from
, &to
, 0) ||
661 ex_scprint(sp
, &from
, &to
))
663 if (ex_txt(sp
, &tiq
, 0, TXT_CR
))
665 ev
.e_c
= TAILQ_FIRST(&tiq
)->lb
[0];
674 BUILD(sp
, st
+ offset
, match
[0].rm_eo
);
677 /* Set the quit/interrupted flags. */
679 F_SET(sp
->gp
, G_INTERRUPTED
);
682 * Resolve any changes, then return to (and
683 * exit from) the main loop.
690 * Set the cursor to the last position changed, converting
691 * from 1-based to 0-based.
694 sp
->cno
= match
[0].rm_so
;
696 /* Copy the bytes before the match into the build buffer. */
697 BUILD(sp
, st
+ offset
, match
[0].rm_so
);
699 /* Substitute the matching bytes. */
701 if (re_sub(sp
, st
+ offset
, &lb
, &lbclen
, &lblen
, match
))
704 /* Set the change flag so we know this line was modified. */
707 /* Move past the matched bytes. */
708 skip
: offset
+= match
[0].rm_eo
;
709 len
-= match
[0].rm_eo
;
711 /* A match cannot be followed by an empty pattern. */
715 * If doing a global change with confirmation, we have to
716 * update the screen. The basic idea is to store the line
717 * so the screen update routines can find it, and restart.
719 if (didsub
&& sp
->c_suffix
&& sp
->g_suffix
) {
721 * The new search offset will be the end of the
724 saved_offset
= lbclen
;
726 /* Copy the rest of the line. */
728 BUILD(sp
, st
+ offset
, len
)
730 /* Set the new offset. */
731 offset
= saved_offset
;
733 /* Store inserted lines, adjusting the build buffer. */
737 cnt
< sp
->newl_cnt
; ++cnt
, ++lno
, ++elno
) {
738 if (db_insert(sp
, lno
,
739 lb
+ last
, sp
->newl
[cnt
] - last
))
741 last
= sp
->newl
[cnt
] + 1;
742 ++sp
->rptlines
[L_ADDED
];
749 /* Store and retrieve the line. */
750 if (db_set(sp
, lno
, lb
+ last
, lbclen
))
752 if (db_get(sp
, lno
, DBG_FATAL
, &st
, &llen
))
754 ADD_SPACE_RETW(sp
, bp
, blen
, llen
)
755 MEMCPYW(bp
, st
, llen
);
759 /* Restart the build. */
761 BUILD(sp
, st
, offset
);
764 * If we haven't already done the after-the-string
765 * match, do one. Set REG_NOTEOL so the '$' pattern
772 eflags
|= REG_NOTEOL
;
780 * If at the end of the string, do a test for the after
781 * the string match. Set REG_NOTEOL so the '$' pattern
784 if (sp
->g_suffix
&& do_eol_match
) {
787 eflags
|= REG_NOTEOL
;
792 endmatch
: if (!linechanged
)
795 /* Copy any remaining bytes into the build buffer. */
797 BUILD(sp
, st
+ offset
, len
)
799 /* Store inserted lines, adjusting the build buffer. */
803 cnt
< sp
->newl_cnt
; ++cnt
, ++lno
, ++elno
) {
805 lno
, lb
+ last
, sp
->newl
[cnt
] - last
))
807 last
= sp
->newl
[cnt
] + 1;
808 ++sp
->rptlines
[L_ADDED
];
814 /* Store the changed line. */
815 if (db_set(sp
, lno
, lb
+ last
, lbclen
))
818 /* Update changed line counter. */
819 if (sp
->rptlchange
!= lno
) {
820 sp
->rptlchange
= lno
;
821 ++sp
->rptlines
[L_CHANGED
];
826 * Display as necessary. Historic practice is to only
827 * display the last line of a line split into multiple
830 if (lflag
|| nflag
|| pflag
) {
831 from
.lno
= to
.lno
= lno
;
832 from
.cno
= to
.cno
= 0;
834 (void)ex_print(sp
, cmdp
, &from
, &to
, E_C_LIST
);
836 (void)ex_print(sp
, cmdp
, &from
, &to
, E_C_HASH
);
838 (void)ex_print(sp
, cmdp
, &from
, &to
, E_C_PRINT
);
844 * Historically, vi attempted to leave the cursor at the same place if
845 * the substitution was done at the current cursor position. Otherwise
846 * it moved it to the first non-blank of the last line changed. There
847 * were some problems: for example, :s/$/foo/ with the cursor on the
848 * last character of the line left the cursor on the last character, or
849 * the & command with multiple occurrences of the matching string in the
850 * line usually left the cursor in a fairly random position.
852 * We try to do the same thing, with the exception that if the user is
853 * doing substitution with confirmation, we move to the last line about
854 * which the user was consulted, as opposed to the last line that they
855 * actually changed. This prevents a screen flash if the user doesn't
856 * change many of the possible lines.
858 if (!sp
->c_suffix
&& (sp
->lno
!= slno
|| sp
->cno
!= scno
)) {
860 (void)nonblank(sp
, sp
->lno
, &sp
->cno
);
864 * If not in a global command, and nothing matched, say so.
865 * Else, if none of the lines displayed, put something up.
869 if (!F_ISSET(sp
, SC_EX_GLOBAL
)) {
870 msgq(sp
, M_ERR
, "157|No match found");
873 } else if (!lflag
&& !nflag
&& !pflag
)
874 F_SET(cmdp
, E_AUTOPRINT
);
881 FREE_SPACEW(sp
, bp
, blen
);
891 * PUBLIC: int re_compile __P((SCR *,
892 * PUBLIC: CHAR_T *, size_t, CHAR_T **, size_t *, regex_t *, u_int));
895 re_compile(SCR
*sp
, CHAR_T
*ptrn
, size_t plen
, CHAR_T
**ptrnp
, size_t *lenp
, regex_t
*rep
, u_int flags
)
898 int reflags
, replaced
, rval
;
903 if (LF_ISSET(SEARCH_EXTEND
))
904 reflags
|= REG_EXTENDED
;
905 if (LF_ISSET(SEARCH_IC
))
906 reflags
|= REG_ICASE
;
907 if (LF_ISSET(SEARCH_LITERAL
))
908 reflags
|= REG_NOSPEC
;
909 if (!LF_ISSET(SEARCH_NOOPT
| SEARCH_CSCOPE
| SEARCH_TAG
)) {
910 if (O_ISSET(sp
, O_EXTENDED
))
911 reflags
|= REG_EXTENDED
;
912 if (O_ISSET(sp
, O_IGNORECASE
))
913 reflags
|= REG_ICASE
;
914 if (O_ISSET(sp
, O_ICLOWER
))
917 if (LF_ISSET(SEARCH_ICL
)) {
918 iclower
: for (p
= ptrn
, len
= plen
; len
> 0; ++p
, --len
)
919 if (ISUPPER((UCHAR_T
)*p
))
922 reflags
|= REG_ICASE
;
925 /* If we're replacing a saved value, clear the old one. */
926 if (LF_ISSET(SEARCH_CSEARCH
) && F_ISSET(sp
, SC_RE_SEARCH
)) {
928 F_CLR(sp
, SC_RE_SEARCH
);
930 if (LF_ISSET(SEARCH_CSUBST
) && F_ISSET(sp
, SC_RE_SUBST
)) {
931 regfree(&sp
->subre_c
);
932 F_CLR(sp
, SC_RE_SUBST
);
936 * If we're saving the string, it's a pattern we haven't seen before,
937 * so convert the vi-style RE's to POSIX 1003.2 RE's. Save a copy for
938 * later recompilation. Free any previously saved value.
942 if (LF_ISSET(SEARCH_CSCOPE
)) {
943 if (re_cscope_conv(sp
, &ptrn
, &plen
, &replaced
))
947 * Currently, the match-any-<blank> expression used in
948 * re_cscope_conv() requires extended RE's. This may
949 * not be right or safe.
951 reflags
|= REG_EXTENDED
;
952 } else if (LF_ISSET(SEARCH_TAG
)) {
953 if (re_tag_conv(sp
, &ptrn
, &plen
, &replaced
))
955 } else if (!LF_ISSET(SEARCH_LITERAL
))
956 if (re_conv(sp
, &ptrn
, &plen
, &replaced
))
959 /* Discard previous pattern. */
960 if (*ptrnp
!= NULL
) {
968 * Copy the string into allocated memory.
971 * Regcomp isn't 8-bit clean, so the pattern is nul-terminated
972 * for now. There's just no other solution.
974 MALLOC(sp
, *ptrnp
, CHAR_T
*, (plen
+ 1) * sizeof(CHAR_T
));
975 if (*ptrnp
!= NULL
) {
976 MEMCPYW(*ptrnp
, ptrn
, plen
);
977 (*ptrnp
)[plen
] = '\0';
980 /* Free up conversion-routine-allocated memory. */
982 FREE_SPACEW(sp
, ptrn
, 0);
992 * Regcomp isn't 8-bit clean, so we just lost if the pattern
993 * contained a nul. Bummer!
995 if ((rval
= regcomp(rep
, ptrn
, /* plen, */ reflags
)) != 0) {
996 if (LF_ISSET(SEARCH_MSG
))
997 re_error(sp
, rval
, rep
);
1001 if (LF_ISSET(SEARCH_CSEARCH
))
1002 F_SET(sp
, SC_RE_SEARCH
);
1003 if (LF_ISSET(SEARCH_CSUBST
))
1004 F_SET(sp
, SC_RE_SUBST
);
1011 * Convert vi's regular expressions into something that the
1012 * the POSIX 1003.2 RE functions can handle.
1014 * There are three conversions we make to make vi's RE's (specifically
1015 * the global, search, and substitute patterns) work with POSIX RE's.
1017 * 1: If O_MAGIC is not set, strip backslashes from the magic character
1018 * set (.[*~) that have them, and add them to the ones that don't.
1019 * 2: If O_MAGIC is not set, the string "\~" is replaced with the text
1020 * from the last substitute command's replacement string. If O_MAGIC
1021 * is set, it's the string "~".
1022 * 3: The pattern \<ptrn\> does "word" searches, convert it to use the
1026 * This doesn't exactly match the historic behavior of vi because we do
1027 * the ~ substitution before calling the RE engine, so magic characters
1028 * in the replacement string will be expanded by the RE engine, and they
1029 * weren't historically. It's a bug.
1032 re_conv(SCR
*sp
, CHAR_T
**ptrnp
, size_t *plenp
, int *replacedp
)
1034 size_t blen
, len
, needlen
;
1039 * First pass through, we figure out how much space we'll need.
1040 * We do it in two passes, on the grounds that most of the time
1041 * the user is doing a search and won't have magic characters.
1042 * That way we can skip most of the memory allocation and copies.
1045 for (p
= *ptrnp
, len
= *plenp
, needlen
= 0; len
> 0; ++p
, --len
)
1053 needlen
+= RE_WSTART_LEN
+ 1;
1057 needlen
+= RE_WSTOP_LEN
+ 1;
1060 if (!O_ISSET(sp
, O_MAGIC
)) {
1062 needlen
+= sp
->repl_len
;
1068 if (!O_ISSET(sp
, O_MAGIC
)) {
1080 if (O_ISSET(sp
, O_MAGIC
)) {
1082 needlen
+= sp
->repl_len
;
1088 if (!O_ISSET(sp
, O_MAGIC
)) {
1103 /* Get enough memory to hold the final pattern. */
1105 GET_SPACE_RETW(sp
, bp
, blen
, needlen
);
1107 for (p
= *ptrnp
, len
= *plenp
, t
= bp
; len
> 0; ++p
, --len
)
1115 RE_WSTART
, RE_WSTART_LEN
);
1120 RE_WSTOP
, RE_WSTOP_LEN
);
1124 if (O_ISSET(sp
, O_MAGIC
))
1128 sp
->repl
, sp
->repl_len
);
1135 if (O_ISSET(sp
, O_MAGIC
))
1147 if (O_ISSET(sp
, O_MAGIC
)) {
1148 MEMCPYW(t
, sp
->repl
, sp
->repl_len
);
1156 if (!O_ISSET(sp
, O_MAGIC
))
1172 * Convert a tags search path into something that the POSIX
1173 * 1003.2 RE functions can handle.
1176 re_tag_conv(SCR
*sp
, CHAR_T
**ptrnp
, size_t *plenp
, int *replacedp
)
1184 /* Max memory usage is 2 times the length of the string. */
1186 GET_SPACE_RETW(sp
, bp
, blen
, len
* 2);
1191 /* If the last character is a '/' or '?', we just strip it. */
1192 if (len
> 0 && (p
[len
- 1] == '/' || p
[len
- 1] == '?'))
1195 /* If the next-to-last or last character is a '$', it's magic. */
1196 if (len
> 0 && p
[len
- 1] == '$') {
1202 /* If the first character is a '/' or '?', we just strip it. */
1203 if (len
> 0 && (p
[0] == '/' || p
[0] == '?')) {
1208 /* If the first or second character is a '^', it's magic. */
1215 * Escape every other magic character we can find, meanwhile stripping
1216 * the backslashes ctags inserts when escaping the search delimiter
1219 for (; len
> 0; --len
) {
1220 if (p
[0] == '\\' && (p
[1] == '/' || p
[1] == '?')) {
1223 } else if (strchr("^.[]$*", p
[0]))
1237 * Convert a cscope search path into something that the POSIX
1238 * 1003.2 RE functions can handle.
1241 re_cscope_conv(SCR
*sp
, CHAR_T
**ptrnp
, size_t *plenp
, int *replacedp
)
1243 size_t blen
, len
, nspaces
;
1250 * Each space in the source line printed by cscope represents an
1251 * arbitrary sequence of spaces, tabs, and comments.
1253 #define CSCOPE_RE_SPACE "([ \t]|/\\*([^*]|\\*/)*\\*/)*"
1254 #define CSCOPE_LEN sizeof(CSCOPE_RE_SPACE) - 1
1255 CHAR2INT(sp
, CSCOPE_RE_SPACE
, CSCOPE_LEN
, wp
, wlen
);
1256 for (nspaces
= 0, p
= *ptrnp
, len
= *plenp
; len
> 0; ++p
, --len
)
1261 * Allocate plenty of space:
1262 * the string, plus potential escaping characters;
1263 * nspaces + 2 copies of CSCOPE_RE_SPACE;
1264 * ^, $, nul terminator characters.
1267 len
= (p
- *ptrnp
) * 2 + (nspaces
+ 2) * sizeof(CSCOPE_RE_SPACE
) + 3;
1268 GET_SPACE_RETW(sp
, bp
, blen
, len
);
1274 MEMCPYW(t
, wp
, wlen
);
1277 for (len
= *plenp
; len
> 0; ++p
, --len
)
1279 MEMCPYW(t
, wp
, wlen
);
1282 if (strchr("\\^.[]$*+?()|{}", *p
))
1287 MEMCPYW(t
, wp
, wlen
);
1298 * Report a regular expression error.
1300 * PUBLIC: void re_error __P((SCR *, int, regex_t *));
1303 re_error(SCR
*sp
, int errcode
, regex_t
*preg
)
1308 sz
= regerror(errcode
, preg
, NULL
, 0);
1309 if ((oe
= malloc(sz
)) == NULL
)
1310 msgq(sp
, M_SYSERR
, NULL
);
1312 (void)regerror(errcode
, preg
, oe
, sz
);
1313 msgq(sp
, M_ERR
, "RE error: %s", oe
);
1320 * Do the substitution for a regular expression.
1323 re_sub(SCR
*sp
, CHAR_T
*ip
, CHAR_T
**lbp
, size_t *lbclenp
, size_t *lblenp
, regmatch_t
*match
)
1330 enum { C_NOT_SET
, C_LOWER
, C_ONE_LOWER
, C_ONE_UPPER
, C_UPPER
} conv
;
1331 size_t lbclen
, lblen
; /* Local copies. */
1332 size_t mlen
; /* Match length. */
1333 size_t rpl
; /* Remaining replacement length. */
1334 CHAR_T
*rp
; /* Replacement pointer. */
1336 int no
; /* Match replacement offset. */
1337 CHAR_T
*p
, *t
; /* Buffer pointers. */
1338 CHAR_T
*lb
; /* Local copies. */
1340 lb
= *lbp
; /* Get local copies. */
1347 * There are some special sequences that vi provides in the
1348 * replacement patterns.
1349 * & string the RE matched (\& if nomagic set)
1350 * \# n-th regular subexpression
1351 * \E end \U, \L conversion
1352 * \e end \U, \L conversion
1353 * \l convert the next character to lower-case
1354 * \L convert to lower-case, until \E, \e, or end of replacement
1355 * \u convert the next character to upper-case
1356 * \U convert to upper-case, until \E, \e, or end of replacement
1358 * Otherwise, since this is the lowest level of replacement, discard
1359 * all escaping characters. This (hopefully) matches historic practice.
1361 #define OUTCH(ch, nltrans) { \
1362 ARG_CHAR_T __ch = (ch); \
1363 e_key_t __value = KEY_VAL(sp, __ch); \
1364 if (nltrans && (__value == K_CR || __value == K_NL)) { \
1366 sp->newl[sp->newl_cnt++] = lbclen; \
1367 } else if (conv != C_NOT_SET) { \
1373 if (ISUPPER(__ch)) \
1374 __ch = TOLOWER(__ch); \
1380 if (ISLOWER(__ch)) \
1381 __ch = TOUPPER(__ch); \
1392 for (rp
= sp
->repl
, rpl
= sp
->repl_len
, p
= lb
+ lbclen
; rpl
--;) {
1393 switch (ch
= *rp
++) {
1395 if (O_ISSET(sp
, O_MAGIC
)) {
1407 if (!O_ISSET(sp
, O_MAGIC
)) {
1412 case '0': case '1': case '2': case '3': case '4':
1413 case '5': case '6': case '7': case '8': case '9':
1415 subzero
: if (match
[no
].rm_so
== -1 ||
1416 match
[no
].rm_eo
== -1)
1418 mlen
= match
[no
].rm_eo
- match
[no
].rm_so
;
1419 for (t
= ip
+ match
[no
].rm_so
; mlen
--; ++t
)
1420 OUTCH((UCHAR_T
)*t
, 0);
1451 *lbp
= lb
; /* Update caller's information. */