2 * Copyright 2010 Nexenta Systems, Inc. All rights reserved.
3 * Copyright (c) 1992 Diomidis Spinellis.
4 * Copyright (c) 1992, 1993, 1994
5 * The Regents of the University of California. All rights reserved.
7 * This code is derived from software contributed to Berkeley by
8 * Diomidis Spinellis of Imperial College, University of London.
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 * 4. Neither the name of the University nor the names of its contributors
19 * may be used to endorse or promote products derived from this software
20 * without specific prior written permission.
22 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
23 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
24 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
25 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
26 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
27 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
28 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
29 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
30 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
31 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
35 #include <sys/types.h>
57 static SPACE HS
, PS
, SS
, YS
;
64 static int applies(struct s_command
*);
65 static void do_tr(struct s_tr
*);
66 static void flush_appends(void);
67 static void lputs(char *, size_t);
68 static int regexec_e(regex_t
*, const char *, int, int, size_t);
69 static void regsub(SPACE
*, char *, char *);
70 static int substitute(struct s_command
*);
72 struct s_appends
*appends
; /* Array of pointers to strings to append. */
73 static int appendx
; /* Index into appends array. */
74 int appendnum
; /* Size of appends array. */
76 static int lastaddr
; /* Set by applies if last address of a range. */
77 static int sdone
; /* If any substitutes since last line input. */
78 /* Iov structure for 'w' commands. */
79 static regex_t
*defpreg
;
84 (void) fwrite(ps, 1, psl, outfile); \
85 (void) fputc('\n', outfile); \
99 for (linenum
= 0; mf_fgets(&PS
, REPLACE
); /* NOP */) {
114 if (appendx
>= appendnum
)
115 if ((appends
= realloc(appends
,
116 sizeof (struct s_appends
) *
117 (appendnum
*= 2))) == NULL
)
119 appends
[appendx
].type
= AP_STRING
;
120 appends
[appendx
].s
= cp
->t
;
121 appends
[appendx
].len
= strlen(cp
->t
);
130 if (cp
->a2
== NULL
|| lastaddr
|| lastline())
131 (void) fprintf(outfile
, "%s", cp
->t
);
140 (p
= memchr(ps
, '\n', psl
)) == NULL
) {
145 (uintptr_t)(p
+ 1) - (uintptr_t)ps
;
146 (void) memmove(ps
, p
+ 1, psl
);
150 cspace(&PS
, hs
, hsl
, REPLACE
);
153 cspace(&PS
, "\n", 1, APPEND
);
154 cspace(&PS
, hs
, hsl
, APPEND
);
157 cspace(&HS
, ps
, psl
, REPLACE
);
160 cspace(&HS
, "\n", 1, APPEND
);
161 cspace(&HS
, ps
, psl
, APPEND
);
164 (void) fprintf(outfile
, "%s", cp
->t
);
173 if (!mf_fgets(&PS
, REPLACE
))
179 cspace(&PS
, "\n", 1, APPEND
);
180 if (!mf_fgets(&PS
, APPEND
))
191 if ((p
= memchr(ps
, '\n', psl
)) != NULL
) {
193 psl
= (uintptr_t)p
- (uintptr_t)ps
;
206 if (appendx
>= appendnum
)
207 if ((appends
= realloc(appends
,
208 sizeof (struct s_appends
) *
209 (appendnum
*= 2))) == NULL
)
211 appends
[appendx
].type
= AP_FILE
;
212 appends
[appendx
].s
= cp
->t
;
213 appends
[appendx
].len
= strlen(cp
->t
);
217 sdone
|= substitute(cp
);
229 if (cp
->u
.fd
== -1 && (cp
->u
.fd
= open(cp
->t
,
230 O_WRONLY
|O_APPEND
|O_CREAT
|O_TRUNC
, 0666))
233 if (write(cp
->u
.fd
, ps
, psl
) != (ssize_t
)psl
||
234 write(cp
->u
.fd
, "\n", 1) != 1)
239 * If the hold space is null, make it empty
240 * but not null. Otherwise the pattern space
241 * will become null after the swap, which is
242 * an abnormal condition.
245 cspace(&HS
, "", 0, REPLACE
);
259 (void) fprintf(outfile
, "%lu\n", linenum
);
264 new: if (!nflag
&& !pd
)
267 } /* for all lines */
271 * TRUE if the address passed matches the current program state
272 * (lastline, linenumber, ps).
275 ((a)->type == AT_RE ? regexec_e((a)->u.r, ps, 0, 1, psl) : \
276 (a)->type == AT_LINE ? linenum == (a)->u.l : lastline())
279 * Return TRUE if the command applies to the current line. Sets the start
280 * line for process ranges. Interprets the non-select (``!'') flag.
283 applies(struct s_command
*cp
)
288 if (cp
->a1
== NULL
&& cp
->a2
== NULL
)
291 if (cp
->startline
> 0) {
296 } else if (linenum
- cp
->startline
<= cp
->a2
->u
.l
)
298 else if ((cp
->a2
->type
== AT_LINE
&&
299 linenum
> cp
->a2
->u
.l
) ||
300 (cp
->a2
->type
== AT_RELLINE
&&
301 linenum
- cp
->startline
> cp
->a2
->u
.l
)) {
303 * We missed the 2nd address due to a branch,
304 * so just close the range and return false.
310 } else if (MATCH(cp
->a1
)) {
312 * If the second address is a number less than or
313 * equal to the line number first selected, only
314 * one line shall be selected.
316 * Likewise if the relative second line address is zero.
318 if ((cp
->a2
->type
== AT_LINE
&&
319 linenum
>= cp
->a2
->u
.l
) ||
320 (cp
->a2
->type
== AT_RELLINE
&& cp
->a2
->u
.l
== 0))
323 cp
->startline
= linenum
;
330 return (cp
->nonsel
? ! r
: r
);
334 * Reset the sed processor to its initial state.
339 struct s_command
*cp
;
342 * Reset all in-range markers.
344 for (cp
= prog
; cp
; cp
= cp
->code
== '{' ? cp
->u
.c
: cp
->next
)
349 * Clear out the hold space.
351 cspace(&HS
, "", 0, REPLACE
);
356 * Do substitutions in the pattern space. Currently, we build a
357 * copy of the new pattern space in the substitute space structure
358 * and then swap them.
361 substitute(struct s_command
*cp
)
365 regoff_t re_off
, slen
;
372 if (defpreg
!= NULL
&& cp
->u
.s
->maxbref
> defpreg
->re_nsub
) {
373 linenum
= cp
->u
.s
->linenum
;
374 fatal(_("\\%u not defined in the RE"),
378 if (!regexec_e(re
, s
, 0, 0, psl
))
381 SS
.len
= 0; /* Clean substitute space. */
389 if (lastempty
|| match
[0].rm_so
!= match
[0].rm_eo
) {
390 /* Locate start of replaced string. */
391 re_off
= match
[0].rm_so
;
392 /* Copy leading retained string. */
393 cspace(&SS
, s
, re_off
, APPEND
);
394 /* Add in regular expression. */
395 regsub(&SS
, s
, cp
->u
.s
->new);
398 /* Move past this match. */
399 if (match
[0].rm_so
!= match
[0].rm_eo
) {
401 slen
-= match
[0].rm_eo
;
404 if (match
[0].rm_so
< slen
)
405 cspace(&SS
, s
+ match
[0].rm_so
, 1,
407 s
+= match
[0].rm_so
+ 1;
408 slen
-= match
[0].rm_so
+ 1;
411 } while (slen
>= 0 && regexec_e(re
, s
, REG_NOTBOL
, 0, slen
));
412 /* Copy trailing retained string. */
414 cspace(&SS
, s
, slen
, APPEND
);
416 default: /* Nth occurrence */
418 if (match
[0].rm_eo
== match
[0].rm_so
)
419 match
[0].rm_eo
= match
[0].rm_so
+ 1;
421 slen
-= match
[0].rm_eo
;
424 if (!regexec_e(re
, s
, REG_NOTBOL
, 0, slen
))
428 case 1: /* 1st occurrence */
429 /* Locate start of replaced string. */
430 re_off
= match
[0].rm_so
+ ((uintptr_t)s
- (uintptr_t)ps
);
431 /* Copy leading retained string. */
432 cspace(&SS
, ps
, re_off
, APPEND
);
433 /* Add in regular expression. */
434 regsub(&SS
, s
, cp
->u
.s
->new);
435 /* Copy trailing retained string. */
437 slen
-= match
[0].rm_eo
;
438 cspace(&SS
, s
, slen
, APPEND
);
443 * Swap the substitute space and the pattern space, and make sure
444 * that any leftover pointers into stdio memory get lost.
451 /* Handle the 'p' flag. */
455 /* Handle the 'w' flag. */
456 if (cp
->u
.s
->wfile
&& !pd
) {
457 if (cp
->u
.s
->wfd
== -1 && (cp
->u
.s
->wfd
= open(cp
->u
.s
->wfile
,
458 O_WRONLY
|O_APPEND
|O_CREAT
|O_TRUNC
, 0666)) == -1)
459 err(1, "%s", cp
->u
.s
->wfile
);
460 if (write(cp
->u
.s
->wfd
, ps
, psl
) != (ssize_t
)psl
||
461 write(cp
->u
.s
->wfd
, "\n", 1) != 1)
462 err(1, "%s", cp
->u
.s
->wfile
);
469 * Perform translation ('y' command) in the pattern space.
472 do_tr(struct s_tr
*y
)
479 if (MB_CUR_MAX
== 1) {
481 * Single-byte encoding: perform in-place translation
482 * of the pattern space.
484 for (p
= ps
; p
< &ps
[psl
]; p
++)
485 *p
= y
->bytetab
[(uchar_t
)*p
];
488 * Multi-byte encoding: perform translation into the
489 * translation space, then swap the translation and
492 /* Clean translation space. */
494 for (p
= ps
, left
= psl
; left
> 0; p
+= clen
, left
-= clen
) {
495 if ((c
= y
->bytetab
[(uchar_t
)*p
]) != '\0') {
496 cspace(&YS
, &c
, 1, APPEND
);
500 for (i
= 0; i
< y
->nmultis
; i
++)
501 if (left
>= y
->multis
[i
].fromlen
&&
502 memcmp(p
, y
->multis
[i
].from
,
503 y
->multis
[i
].fromlen
) == 0)
505 if (i
< y
->nmultis
) {
506 cspace(&YS
, y
->multis
[i
].to
,
507 y
->multis
[i
].tolen
, APPEND
);
508 clen
= y
->multis
[i
].fromlen
;
510 cspace(&YS
, p
, 1, APPEND
);
514 /* Swap the translation space and the pattern space. */
523 * Flush append requests. Always called before reading a line,
524 * therefore it also resets the substitution done (sdone) flag.
533 for (i
= 0; i
< appendx
; i
++)
534 switch (appends
[i
].type
) {
536 (void) fwrite(appends
[i
].s
, sizeof (char),
537 appends
[i
].len
, outfile
);
541 * Read files probably shouldn't be cached. Since
542 * it's not an error to read a non-existent file,
543 * it's possible that another program is interacting
544 * with the sed script through the filesystem. It
545 * would be truly bizarre, but possible. It's probably
546 * not that big a performance win, anyhow.
548 if ((f
= fopen(appends
[i
].s
, "r")) == NULL
)
551 fread(buf
, sizeof (char), sizeof (buf
), f
)))
552 (void) fwrite(buf
, sizeof (char), count
,
558 errx(1, "%s: %s", outfname
, strerror(errno
? errno
: EIO
));
563 lputs(char *s
, size_t len
)
565 static const char escapes
[] = "\\\a\b\f\r\t\v";
569 static int termwidth
= -1;
574 if (outfile
!= stdout
)
576 if (termwidth
== -1) {
577 if (((p
= getenv("COLUMNS")) != NULL
) && (*p
!= '\0'))
579 else if (ioctl(STDOUT_FILENO
, TIOCGWINSZ
, &win
) == 0 &&
581 termwidth
= win
.ws_col
;
588 (void) memset(&mbs
, 0, sizeof (mbs
));
591 clen
= mbrtowc(&wc
, s
, len
, &mbs
);
594 if (clen
== (size_t)-1 || clen
== (size_t)-2) {
595 wc
= (unsigned char)*s
;
597 (void) memset(&mbs
, 0, sizeof (mbs
));
600 if (col
+ 1 >= termwidth
)
601 (void) fprintf(outfile
, "\\\n");
602 (void) fputc('$', outfile
);
603 (void) fputc('\n', outfile
);
605 } else if (iswprint(wc
)) {
607 if (col
+ width
>= termwidth
) {
608 (void) fprintf(outfile
, "\\\n");
611 (void) fwrite(s
, 1, clen
, outfile
);
613 } else if (wc
!= L
'\0' && (c
= wctob(wc
)) != EOF
&&
614 (p
= strchr(escapes
, c
)) != NULL
) {
615 if (col
+ 2 >= termwidth
) {
616 (void) fprintf(outfile
, "\\\n");
619 (void) fprintf(outfile
, "\\%c",
620 "\\abfrtv"[(uintptr_t)p
- (uintptr_t)escapes
]);
623 if (col
+ 4 * clen
>= (unsigned)termwidth
) {
624 (void) fprintf(outfile
, "\\\n");
627 for (i
= 0; i
< clen
; i
++)
628 (void) fprintf(outfile
, "\\%03o",
629 (int)(unsigned char)s
[i
]);
635 if (col
+ 1 >= termwidth
)
636 (void) fprintf(outfile
, "\\\n");
637 (void) fputc('$', outfile
);
638 (void) fputc('\n', outfile
);
640 errx(1, "%s: %s", outfname
, strerror(errno
? errno
: EIO
));
644 regexec_e(regex_t
*preg
, const char *string
, int eflags
, int nomatch
,
651 fatal(_("first RE may not be empty"));
657 match
[0].rm_eo
= slen
;
659 eval
= regexec(defpreg
, string
,
660 nomatch
? 0 : maxnsub
+ 1, match
, eflags
| REG_STARTEND
);
667 fatal(_("RE error: %s"), strregerror(eval
, defpreg
));
672 * regsub - perform substitutions after a regexp match
673 * Based on a routine by Henry Spencer
676 regsub(SPACE
*sp
, char *string
, char *src
)
681 #define NEEDSP(reqlen) \
682 /* XXX What is the +1 for? */ \
683 if (sp->len + (reqlen) + 1 >= sp->blen) { \
684 sp->blen += (reqlen) + 1024; \
685 if ((sp->back = realloc(sp->back, sp->blen)) == NULL) \
687 sp->space = sp->back; \
688 dst = sp->space + sp->len; \
691 dst
= sp
->space
+ sp
->len
;
692 while ((c
= *src
++) != '\0') {
695 else if (c
== '\\' && isdigit((unsigned char)*src
))
699 if (no
< 0) { /* Ordinary character. */
700 if (c
== '\\' && (*src
== '\\' || *src
== '&'))
705 } else if (match
[no
].rm_so
!= -1 && match
[no
].rm_eo
!= -1) {
706 len
= match
[no
].rm_eo
- match
[no
].rm_so
;
708 (void) memmove(dst
, string
+ match
[no
].rm_so
, len
);
719 * Concatenate space: append the source space to the destination space,
720 * allocating new space as necessary.
723 cspace(SPACE
*sp
, const char *p
, size_t len
, enum e_spflag spflag
)
727 /* Make sure SPACE has enough memory and ramp up quickly. */
728 tlen
= sp
->len
+ len
+ 1;
729 if (tlen
> sp
->blen
) {
730 sp
->blen
= tlen
+ 1024;
731 if ((sp
->space
= sp
->back
= realloc(sp
->back
, sp
->blen
)) ==
736 if (spflag
== REPLACE
)
739 (void) memmove(sp
->space
+ sp
->len
, p
, len
);
741 sp
->space
[sp
->len
+= len
] = '\0';
745 * Close all cached opened files and report any errors
748 cfclose(struct s_command
*cp
, struct s_command
*end
)
751 for (; cp
!= end
; cp
= cp
->next
)
754 if (cp
->u
.s
->wfd
!= -1 && close(cp
->u
.s
->wfd
))
755 err(1, "%s", cp
->u
.s
->wfile
);
759 if (cp
->u
.fd
!= -1 && close(cp
->u
.fd
))
764 cfclose(cp
->u
.c
, cp
->next
);