4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
23 * Copyright (c) 1989, 2010, Oracle and/or its affiliates. All rights reserved.
26 /* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */
27 /* All Rights Reserved */
30 /* Copyright (c) 1981 Regents of the University of California */
36 char *_compile(const char *, char *, char *, int);
39 * The compiled-regular-expression storage areas (re, scanre, and subre)
40 * have been changed into dynamically allocated memory areas, in both the
41 * Solaris and XPG4 versions.
43 * In the Solaris version, which uses the original libgen(3g) compile()
44 * and step() calls, these areas are allocated once, and then data are
45 * copied between them subsequently, as they were in the original
46 * implementation. This is possible because the compiled information is
47 * a self-contained block of bits.
49 * In the XPG4 version, the expr:compile.o object is linked in as a
50 * simulation of these functions using the new regcomp() and regexec()
51 * functions. The problem here is that the resulting
52 * compiled-regular-expression data contain pointers to other data, which
53 * need to be freed, but only when we are quite sure that we are done
54 * with them - and certainly not before. There was an earlier attempt to
55 * handle these differences, but that effort was flawed.
60 void regex_comp_free(void *);
61 extern size_t regexc_size
; /* compile.c: size of regex_comp structure */
65 * Global, substitute and regular expressions.
66 * Very similar to ed, with some re extensions and
67 * confirmed substitute.
76 unsigned char globuf
[GBSIZE
], *Cwas
;
77 int nlines
= lineDOL();
78 int oinglobal
= inglobal
;
79 unsigned char *oglobp
= globp
;
80 char multi
[MB_LEN_MAX
+ 1];
88 * 0: ordinary - not in a global command.
89 * 1: text coming from some buffer, not tty.
90 * 2: like 1, but the source of the buffer is a global command.
91 * Hence you're only in a global command if inglobal==2. This
92 * strange sounding convention is historically derived from
93 * everybody simulating a global command.
96 error(value(vi_TERSE
) ? gettext("Global within global") :
97 gettext("Global within global not allowed"));
102 error(value(vi_TERSE
) ? gettext("Global needs re") :
103 gettext("Missing regular expression for global"));
105 (void)vi_compile(c
, 1);
108 while ((c
= peekchar()) != '\n') {
117 if ((len
= _mbftowc(multi
, &wc
, getchar
, &peekc
)) > 0) {
118 if ((gp
+ len
) >= &globuf
[GBSIZE
- 2])
119 error(gettext("Global command too long"));
120 strncpy(gp
, multi
, len
);
158 if (gp
>= &globuf
[GBSIZE
- 2])
159 error(gettext("Global command too long"));
168 for (a1
= one
; a1
<= dol
; a1
++) {
170 if (a1
>= addr1
&& a1
<= addr2
&& execute(0, a1
) == k
)
175 * This code is commented out for now. The problem is that we don't
176 * fix up the undo area the way we should. Basically, I think what has
177 * to be done is to copy the undo area down (since we shrunk everything)
178 * and move the various pointers into it down too. I will do this later
179 * when I have time. (Mark, 10-20-80)
182 * Special case: g/.../d (avoid n^2 algorithm)
184 if (globuf
[0]=='d' && globuf
[1]=='\n' && globuf
[2]=='\0') {
192 * Now for each marked line, set dot there and do the commands.
193 * Note the n^2 behavior here for lots of lines matching.
194 * This is really needed: in some cases you could delete lines,
195 * causing a marked line to be moved before a1 and missed if
196 * we didn't restart at zero each time.
198 for (a1
= one
; a1
<= dol
; a1
++) {
208 inglobal
= oinglobal
;
220 * gdelete: delete inside a global command. Handles the
221 * special case g/r.e./d. All lines to be deleted have
222 * already been marked. Squeeze the remaining lines together.
223 * Note that other cases such as g/r.e./p, g/r.e./s/r.e.2/rhs/,
224 * and g/r.e./.,/r.e.2/d are not treated specially. There is no
225 * good reason for this except the question: where to you draw the line?
233 /* find first marked line. can skip all before it */
234 for (a1
=zero
; (*a1
&01)==0; a1
++)
237 /* copy down unmarked lines, compacting as we go. */
238 for (a2
=a1
+1; a2
<=a3
;) {
240 a2
++; /* line is marked, skip it */
241 dot
= a1
; /* dot left after line deletion */
243 *a1
++ = *a2
++; /* unmarked, copy it */
252 int scount
, slines
, stotal
;
263 save12(), undkind
= UNDCHANGE
;
266 for (addr
= addr1
; addr
<= addr2
; addr
++) {
267 scount
= hopcount
= 0;
268 if (dosubcon(0, addr
) == 0)
272 * The loop can happen from s/\</&/g
273 * but we don't want to break other, reasonable cases.
277 if (++hopcount
> sizeof linebuf
)
278 error(gettext("substitution loop"));
279 if (dosubcon(1, addr
) == 0)
287 n
= append(getsub
, addr
);
292 if (stotal
== 0 && !inglobal
&& !cflag
)
293 error(value(vi_TERSE
) ? gettext("Fail") :
294 gettext("Substitute pattern match failed"));
295 snote(stotal
, slines
);
302 int seof
, c
, uselastre
;
304 static unsigned char remem
[RHSSIZE
];
305 static int remflg
= -1;
307 if (!value(vi_EDCOMPATIBLE
))
315 if (endcmd(seof
) || any(seof
, "gcr")) {
319 if (isalpha(seof
) || isdigit(seof
))
320 error(value(vi_TERSE
) ? gettext("Substitute needs re") :
321 gettext("Missing regular expression for substitute"));
322 seof
= vi_compile(seof
, 1);
333 if (re
== NULL
|| re
->Expbuf
[1] == 0)
334 error(value(vi_TERSE
) ? gettext("No previous re") :
335 gettext("No previous regular expression"));
336 if (subre
== NULL
|| subre
->Expbuf
[1] == 0)
337 error(value(vi_TERSE
) ? gettext("No previous substitute re") :
338 gettext("No previous substitute to repeat"));
367 * The % by itself on the right hand side means
368 * that the previous value of the right hand side
369 * should be used. A -1 is used to indicate no
370 * previously remembered search string.
373 if (rhsbuf
[0] == '%' && rhsbuf
[1] == 0)
375 error(gettext("No previously remembered string"));
377 strcpy(rhsbuf
, remem
);
379 strcpy(remem
, rhsbuf
);
390 unsigned char *rp
, *orp
;
392 unsigned char orhsbuf
[RHSSIZE
];
393 char multi
[MB_LEN_MAX
+ 1];
406 if (!isascii(c
) && c
!= EOF
) {
407 if ((len
= _mbftowc(multi
, &wc
, getchar
, &peekc
)) > 0) {
408 if ((rp
+ len
) >= &rhsbuf
[RHSSIZE
- 1])
410 strncpy(rp
, multi
, len
);
423 error(gettext("Replacement string ends with \\"));
428 if ((len
= _mbftowc(multi
, &wc
, getchar
, &peekc
)) > 0) {
429 if ((rp
+ len
) >= &rhsbuf
[RHSSIZE
- 1])
431 strncpy(rp
, multi
, len
);
438 if (value(vi_MAGIC
)) {
440 * When "magic", \& turns into a plain &,
441 * and all other chars work fine quoted.
444 if(rp
>= &rhsbuf
[RHSSIZE
- 1]) {
446 error(value(vi_TERSE
) ?
447 gettext("Replacement pattern too long") :
448 gettext("Replacement pattern too long - limit 256 characters"));
456 for (orp
= orhsbuf
; *orp
; *rp
++ = *orp
++)
457 if (rp
>= &rhsbuf
[RHSSIZE
- 1])
461 if(rp
>= &rhsbuf
[RHSSIZE
- 1]) {
464 error(value(vi_TERSE
) ?
465 gettext("Replacement pattern too long") :
466 gettext("Replacement pattern too long - limit 256 characters"));
473 if (!(globp
&& globp
[0])) {
484 if (rp
>= &rhsbuf
[RHSSIZE
- 1]) {
487 error(value(vi_TERSE
) ?
488 gettext("Replacement pattern too long") :
489 gettext("Replacement pattern too long - limit 256 characters"));
502 if ((p
= linebp
) == 0)
510 dosubcon(bool f
, line
*a
)
513 if (execute(f
, a
) == 0)
532 putchar('\n' | QUOTE
);
535 ugo(lcolumn(loc2
) - c
, '^');
544 putchar('\b' | QUOTE
);
546 putchar('\b' | QUOTE
), flush();
553 if (inopen
&& MB_CUR_MAX
== 1 || c
< 0200) {
558 if (c
!= '\n' && c
!= EOF
) {
567 ugo(int cnt
, int with
)
582 unsigned char *lp
, *sp
, *rp
;
589 while (lp
< (unsigned char *)loc1
)
593 * Caution: depending on the hardware, c will be either sign
594 * extended or not if C"E is set. Thus, on a VAX, c will
595 * be < 0, but on a 3B, c will be >= 128.
598 if ((len
= mblen((char *)rp
, MB_CUR_MAX
)) <= 0)
600 /* ^V <return> from vi to split lines */
606 if ((len
= mblen((char *)rp
, MB_CUR_MAX
)) <= 0)
611 sp
= place(sp
, loc1
, loc2
);
641 if(re
!= NULL
&& c
>= '1' && c
< re
->Nbra
+ '1') {
642 sp
= place(sp
, braslist
[c
- '1'] , braelist
[c
- '1']);
650 if ((sp
+ len
) >= &genbuf
[LBSIZE
])
652 strncpy(sp
, rp
, len
);
659 sp
+= len
; rp
+= len
;
660 if (sp
>= &genbuf
[LBSIZE
])
662 error(value(vi_TERSE
) ? gettext("Line overflow") :
663 gettext("Line overflow in substitute"));
665 lp
= (unsigned char *)loc2
;
666 loc2
= (char *)(linebuf
+ (sp
- genbuf
));
667 while (*sp
++ = *lp
++)
668 if (sp
>= &genbuf
[LBSIZE
])
691 unsigned char *sp
, *l1
, *l2
;
695 *sp
++ = fixcase(*l1
++);
696 if (sp
>= &genbuf
[LBSIZE
])
703 snote(int total
, int nlines
)
708 if (nlines
!= 1 && nlines
!= total
)
709 viprintf(mesg(value(vi_TERSE
) ?
712 * Reference order of arguments must not
713 * be changed using '%digit$', since vi's
714 * viprintf() does not support it.
716 gettext("%d subs on %d lines") :
719 * Reference order of arguments must not
720 * be changed using '%digit$', since vi's
721 * viprintf() does not support it.
723 gettext("%d substitutions on %d lines")),
726 viprintf(mesg(value(vi_TERSE
) ?
728 gettext("%d substitutions")),
737 extern int regcomp_flags
; /* use to specify cflags for regcomp() */
741 vi_compile(int eof
, int oknl
)
744 unsigned char *gp
, *p1
;
746 unsigned char rebuf
[LBSIZE
];
747 char multi
[MB_LEN_MAX
+ 1];
753 * reset cflags to plain BRE
754 * if \< and/or \> is specified, REG_WORDS is set.
760 if (isalpha(eof
) || isdigit(eof
))
761 error(gettext("Regular expressions cannot be delimited by letters or digits"));
762 if(eof
>= 0200 && MB_CUR_MAX
> 1)
763 error(gettext("Regular expressions cannot be delimited by multibyte characters"));
770 if (scanre
== NULL
|| scanre
->Expbuf
[1] == 0)
771 error(value(vi_TERSE
) ? gettext("No previous scan re") :
772 gettext("No previous scanning regular expression"));
777 if (subre
== NULL
|| subre
->Expbuf
[1] == 0)
778 error(value(vi_TERSE
) ? gettext("No previous substitute re") :
779 gettext("No previous substitute regular expression"));
784 error(value(vi_TERSE
) ? gettext("Badly formed re") :
785 gettext("Regular expression \\ must be followed by / or ?"));
787 if (c
== eof
|| c
== '\n' || c
== EOF
) {
788 if (re
== NULL
|| re
->Expbuf
[1] == 0)
789 error(value(vi_TERSE
) ? gettext("No previous re") :
790 gettext("No previous regular expression"));
791 if (c
== '\n' && oknl
== 0)
792 error(value(vi_TERSE
) ? gettext("Missing closing delimiter") :
793 gettext("Missing closing delimiter for regular expression"));
806 if (c
== eof
|| c
== EOF
) {
811 if (gp
>= &genbuf
[LBSIZE
- 3])
813 cerror(value(vi_TERSE
) ?
814 (unsigned char *)gettext("Re too complex") :
816 gettext("Regular expression too complicated"));
818 if (!(isascii(c
) || MB_CUR_MAX
== 1)) {
820 if ((len
= _mbftowc(multi
, &wc
, getchar
, &peekc
)) >= 1) {
821 if ((gp
+ len
) >= &genbuf
[LBSIZE
- 3])
823 strncpy(gp
, multi
, len
);
836 if ((len
= _mbftowc(multi
, &wc
, getchar
, &peekc
)) >= 1) {
837 if ((gp
+ len
) >= &genbuf
[LBSIZE
- 3])
840 strncpy(gp
, multi
, len
);
852 regcomp_flags
= REG_WORDS
;
870 if(c
>= '0' && c
<= '9') {
875 if (value(vi_MAGIC
) == 0)
886 if (!isascii(*rhsp
)) {
887 if ((len
= mbtowc((wchar_t *)0, (char *)rhsp
, MB_CUR_MAX
)) > 1) {
888 if ((gp
+ len
) >= &genbuf
[LBSIZE
-2])
890 strncpy(gp
, rhsp
, len
);
891 rhsp
+= len
; gp
+= len
;
899 cerror(value(vi_TERSE
) ? (unsigned char *)
900 gettext("Replacement pattern contains &") :
901 (unsigned char *)gettext("Replacement pattern contains & - cannot use in re"));
902 if (c
>= '1' && c
<= '9')
903 cerror(value(vi_TERSE
) ? (unsigned char *)
904 gettext("Replacement pattern contains \\d") :
906 gettext("Replacement pattern contains \\d - cannot use in re"));
907 if ((len
= mbtowc((wchar_t *)0, (char *)rhsp
, MB_CUR_MAX
)) <= 1) {
914 if ((gp
+ len
) >= &genbuf
[LBSIZE
-2])
918 *gp
++ = (value(vi_IGNORECASE
) ? tolower(c
) : c
);
920 strncpy(gp
, rhsp
, len
);
921 gp
+= len
; rhsp
+= len
;
939 if (!isascii(c
) && c
!= EOF
) {
941 if ((len
= _mbftowc(multi
, &wc
, getchar
, &peekc
)) >= 1) {
942 if ((gp
+ len
)>= &genbuf
[LBSIZE
-4])
944 strncpy(gp
, multi
, len
);
952 if (gp
>= &genbuf
[LBSIZE
-4])
954 if(c
== '\\' && peekchar() == ']') {
959 else if (c
== '\n' || c
== EOF
)
960 cerror((unsigned char *)
961 gettext("Missing ]"));
963 *gp
++ = (value(vi_IGNORECASE
) ? tolower(c
) : c
);
976 cerror(value(vi_TERSE
) ? (unsigned char *)gettext("No newlines in re's") :
977 (unsigned char *)gettext("Can't escape newlines into regular expressions"));
979 *gp
++ = (value(vi_IGNORECASE
) ? tolower(c
) : c
);
987 cerror(value(vi_TERSE
) ? (unsigned char *)gettext("Badly formed re") :
988 (unsigned char *)gettext("Missing closing delimiter for regular expression"));
1000 *gp
++ = (value(vi_IGNORECASE
) ? tolower(c
) : c
);
1008 /* see if our compiled RE's will fit in the re structure: */
1009 if (regexc_size
> EXPSIZ
) {
1011 * this should never happen. but it's critical that we
1012 * check here, otherwise .bss would get overwritten.
1014 cerror(value(vi_TERSE
) ? (unsigned char *)
1015 gettext("RE's can't fit") :
1016 (unsigned char *)gettext("Regular expressions can't fit"));
1021 * We create re each time we need it.
1024 if (re
== NULL
|| re
== scanre
|| re
== subre
) {
1025 if ((re
= calloc(1, sizeof(struct regexp
))) == NULL
) {
1026 error(gettext("out of memory"));
1030 regex_comp_free(&re
->Expbuf
);
1031 memset(re
, 0, sizeof(struct regexp
));
1034 compile((char *) genbuf
, (char *) re
->Expbuf
, (char *) re
->Expbuf
1037 (void) _compile((const char *)genbuf
, (char *)re
->Expbuf
,
1038 (char *)(re
->Expbuf
+ sizeof (re
->Expbuf
)), 1);
1045 cerror((unsigned char *)gettext("\\( \\) Imbalance"));
1047 cerror(value(vi_TERSE
) ? (unsigned char *)gettext("Awash in \\('s!") :
1049 gettext("Too many \\('d subexpressions in a regular expression"));
1053 cerror(value(vi_TERSE
) ? (unsigned char *)gettext("Illegal byte sequence") :
1054 (unsigned char *)gettext("Regular expression has illegal byte sequence"));
1061 cerror(unsigned char *s
)
1064 re
->Expbuf
[0] = re
->Expbuf
[1] = 0;
1070 execute(int gf
, line
*addr
)
1072 unsigned char *p1
, *p2
;
1079 if (re
== NULL
|| re
->Expbuf
[0])
1081 if(value(vi_IGNORECASE
)) {
1083 p2
= (unsigned char *)loc2
;
1085 if ((len
= mblen((char *)p2
, MB_CUR_MAX
)) <= 0)
1092 strncpy(p1
, p2
, len
);
1093 p1
+= len
; p2
+= len
;
1096 locs
= (char *)genbuf
;
1100 p1
= (unsigned char *)loc2
;
1108 if(value(vi_IGNORECASE
)) {
1112 if ((len
= mblen((char *)p2
, MB_CUR_MAX
)) <= 0)
1119 strncpy(p1
, p2
, len
);
1120 p1
+= len
; p2
+= len
;
1124 start
= (char *)linebuf
;
1129 ret
= step((char *)p1
, (char *)re
->Expbuf
);
1131 if(value(vi_IGNORECASE
) && ret
) {
1132 loc1
= start
+ (loc1
- (char *)genbuf
);
1133 loc2
= start
+ (loc2
- (char *)genbuf
);
1134 for(i
= 0; i
< NBRA
; i
++) {
1135 braslist
[i
] = start
+ (braslist
[i
] - (char *)genbuf
);
1136 braelist
[i
] = start
+ (braelist
[i
] - (char *)genbuf
);
1143 * Initialize the compiled regular-expression storage areas (called from
1150 re
= scanre
= subre
= NULL
;
1152 if ((re
= calloc(1, sizeof(struct regexp
))) == NULL
) {
1153 error(gettext("out of memory"));
1157 if ((scanre
= calloc(1, sizeof(struct regexp
))) == NULL
) {
1158 error(gettext("out of memory"));
1162 if ((subre
= calloc(1, sizeof(struct regexp
))) == NULL
) {
1163 error(gettext("out of memory"));
1170 * Save what is in the special place re to the named alternate
1171 * location. This means freeing up what's currently in this target
1172 * location, if necessary.
1175 void savere(struct regexp
** a
)
1178 if (a
== NULL
|| re
== NULL
) {
1188 if (scanre
!= subre
) {
1189 regex_comp_free(&((*a
)->Expbuf
));
1195 memcpy(*a
, re
, sizeof(struct regexp
));
1201 * Restore what is in the named alternate location to the special place
1202 * re. This means first freeing up what's currently in re, if necessary.
1205 void resre(struct regexp
* a
)
1218 if ((re
!= scanre
) && (re
!= subre
)) {
1219 regex_comp_free(&re
->Expbuf
);
1226 memcpy(re
, a
, sizeof(struct regexp
));