Remove building with NOCRYPTO option
[minix.git] / usr.bin / sed / process.c
blob896df5a65dbeedd43ac20e7e3c4c1e71aa44ff2f
1 /* $NetBSD: process.c,v 1.52 2015/03/12 12:40:41 christos Exp $ */
3 /*-
4 * Copyright (c) 1992 Diomidis Spinellis.
5 * Copyright (c) 1992, 1993, 1994
6 * The Regents of the University of California. All rights reserved.
8 * This code is derived from software contributed to Berkeley by
9 * Diomidis Spinellis of Imperial College, University of London.
11 * Redistribution and use in source and binary forms, with or without
12 * modification, are permitted provided that the following conditions
13 * are met:
14 * 1. Redistributions of source code must retain the above copyright
15 * notice, this list of conditions and the following disclaimer.
16 * 2. Redistributions in binary form must reproduce the above copyright
17 * notice, this list of conditions and the following disclaimer in the
18 * documentation and/or other materials provided with the distribution.
19 * 3. Neither the name of the University nor the names of its contributors
20 * may be used to endorse or promote products derived from this software
21 * without specific prior written permission.
23 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
24 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
25 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
26 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
27 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
28 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
29 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
30 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
31 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
32 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
33 * SUCH DAMAGE.
36 #if HAVE_NBTOOL_CONFIG_H
37 #include "nbtool_config.h"
38 #endif
40 #include <sys/cdefs.h>
41 __RCSID("$NetBSD: process.c,v 1.52 2015/03/12 12:40:41 christos Exp $");
42 #ifdef __FBSDID
43 __FBSDID("$FreeBSD: head/usr.bin/sed/process.c 192732 2009-05-25 06:45:33Z brian $");
44 #endif
46 #if 0
47 static const char sccsid[] = "@(#)process.c 8.6 (Berkeley) 4/20/94";
48 #endif
50 #include <sys/types.h>
51 #include <sys/stat.h>
52 #include <sys/ioctl.h>
53 #include <sys/uio.h>
55 #include <ctype.h>
56 #include <err.h>
57 #include <errno.h>
58 #include <fcntl.h>
59 #include <limits.h>
60 #include <regex.h>
61 #include <stdio.h>
62 #include <stdlib.h>
63 #include <string.h>
64 #include <unistd.h>
65 #include <wchar.h>
66 #include <wctype.h>
68 #include "defs.h"
69 #include "extern.h"
71 static SPACE HS, PS, SS, YS;
72 #define pd PS.deleted
73 #define ps PS.space
74 #define psl PS.len
75 #define hs HS.space
76 #define hsl HS.len
78 static __inline int applies(struct s_command *);
79 static void do_tr(struct s_tr *);
80 static void flush_appends(void);
81 static void lputs(char *, size_t);
82 static __inline int regexec_e(regex_t *, const char *, int, int, size_t);
83 static void regsub(SPACE *, char *, char *);
84 static int substitute(struct s_command *);
86 struct s_appends *appends; /* Array of pointers to strings to append. */
87 static size_t appendx; /* Index into appends array. */
88 size_t appendnum; /* Size of appends array. */
90 static int lastaddr; /* Set by applies if last address of a range. */
91 static int sdone; /* If any substitutes since last line input. */
92 /* Iov structure for 'w' commands. */
93 static regex_t *defpreg;
94 size_t maxnsub;
95 regmatch_t *match;
97 #define OUT() do {fwrite(ps, 1, psl, outfile); fputc('\n', outfile);} while (0)
99 void
100 process(void)
102 struct s_command *cp;
103 SPACE tspace;
104 size_t oldpsl = 0;
105 char *p;
107 p = NULL;
109 for (linenum = 0; mf_fgets(&PS, REPLACE);) {
110 pd = 0;
111 top:
112 cp = prog;
113 redirect:
114 while (cp != NULL) {
115 if (!applies(cp)) {
116 cp = cp->next;
117 continue;
119 switch (cp->code) {
120 case '{':
121 cp = cp->u.c;
122 goto redirect;
123 case 'a':
124 if (appendx >= appendnum)
125 appends = xrealloc(appends,
126 sizeof(struct s_appends) *
127 (appendnum *= 2));
128 appends[appendx].type = AP_STRING;
129 appends[appendx].s = cp->t;
130 appends[appendx].len = strlen(cp->t);
131 appendx++;
132 break;
133 case 'b':
134 cp = cp->u.c;
135 goto redirect;
136 case 'c':
137 pd = 1;
138 psl = 0;
139 if (cp->a2 == NULL || lastaddr || lastline())
140 (void)fprintf(outfile, "%s", cp->t);
141 goto new;
142 case 'd':
143 pd = 1;
144 goto new;
145 case 'D':
146 if (pd)
147 goto new;
148 if (psl == 0 ||
149 (p = memchr(ps, '\n', psl - 1)) == NULL) {
150 pd = 1;
151 goto new;
152 } else {
153 psl -= (size_t)((p + 1) - ps);
154 memmove(ps, p + 1, psl);
155 goto top;
157 case 'g':
158 cspace(&PS, hs, hsl, REPLACE);
159 break;
160 case 'G':
161 cspace(&PS, "\n", 1, APPEND);
162 cspace(&PS, hs, hsl, APPEND);
163 break;
164 case 'h':
165 cspace(&HS, ps, psl, REPLACE);
166 break;
167 case 'H':
168 cspace(&HS, "\n", 1, APPEND);
169 cspace(&HS, ps, psl, APPEND);
170 break;
171 case 'i':
172 (void)fprintf(outfile, "%s", cp->t);
173 break;
174 case 'l':
175 lputs(ps, psl);
176 break;
177 case 'n':
178 if (!nflag && !pd)
179 OUT();
180 flush_appends();
181 if (!mf_fgets(&PS, REPLACE))
182 exit(0);
183 pd = 0;
184 break;
185 case 'N':
186 flush_appends();
187 cspace(&PS, "\n", 1, APPEND);
188 if (!mf_fgets(&PS, APPEND))
189 exit(0);
190 break;
191 case 'p':
192 if (pd)
193 break;
194 OUT();
195 break;
196 case 'P':
197 if (pd)
198 break;
199 if ((p = memchr(ps, '\n', psl - 1)) != NULL) {
200 oldpsl = psl;
201 psl = (size_t)(p - ps);
203 OUT();
204 if (p != NULL)
205 psl = oldpsl;
206 break;
207 case 'q':
208 if (!nflag && !pd)
209 OUT();
210 flush_appends();
211 exit(0);
212 case 'r':
213 if (appendx >= appendnum)
214 appends = xrealloc(appends,
215 sizeof(struct s_appends) *
216 (appendnum *= 2));
217 appends[appendx].type = AP_FILE;
218 appends[appendx].s = cp->t;
219 appends[appendx].len = strlen(cp->t);
220 appendx++;
221 break;
222 case 's':
223 sdone |= substitute(cp);
224 break;
225 case 't':
226 if (sdone) {
227 sdone = 0;
228 cp = cp->u.c;
229 goto redirect;
231 break;
232 case 'w':
233 if (pd)
234 break;
235 if (cp->u.fd == -1 && (cp->u.fd = open(cp->t,
236 O_WRONLY|O_APPEND|O_CREAT|O_TRUNC,
237 DEFFILEMODE)) == -1)
238 err(1, "%s", cp->t);
239 if (write(cp->u.fd, ps, psl) != (ssize_t)psl ||
240 write(cp->u.fd, "\n", 1) != 1)
241 err(1, "%s", cp->t);
242 break;
243 case 'x':
245 * If the hold space is null, make it empty
246 * but not null. Otherwise the pattern space
247 * will become null after the swap, which is
248 * an abnormal condition.
250 if (hs == NULL)
251 cspace(&HS, "", 0, REPLACE);
252 tspace = PS;
253 PS = HS;
254 HS = tspace;
255 break;
256 case 'y':
257 if (pd || psl == 0)
258 break;
259 do_tr(cp->u.y);
260 break;
261 case ':':
262 case '}':
263 break;
264 case '=':
265 (void)fprintf(outfile, "%lu\n", linenum);
267 cp = cp->next;
268 } /* for all cp */
270 new: if (!nflag && !pd)
271 OUT();
272 flush_appends();
273 } /* for all lines */
277 * TRUE if the address passed matches the current program state
278 * (lastline, linenumber, ps).
280 #define MATCH(a) \
281 ((a)->type == AT_RE ? regexec_e((a)->u.r, ps, 0, 1, psl) : \
282 (a)->type == AT_LINE ? linenum == (a)->u.l : lastline())
285 * Return TRUE if the command applies to the current line. Sets the start
286 * line for process ranges. Interprets the non-select (``!'') flag.
288 static __inline int
289 applies(struct s_command *cp)
291 int r;
293 lastaddr = 0;
294 if (cp->a1 == NULL && cp->a2 == NULL)
295 r = 1;
296 else if (cp->a2)
297 if (cp->startline > 0) {
298 switch (cp->a2->type) {
299 case AT_RELLINE:
300 if (linenum - cp->startline <= cp->a2->u.l)
301 r = 1;
302 else {
303 cp->startline = 0;
304 r = 0;
306 break;
307 default:
308 if (MATCH(cp->a2)) {
309 cp->startline = 0;
310 lastaddr = 1;
311 r = 1;
312 } else if (cp->a2->type == AT_LINE &&
313 linenum > cp->a2->u.l) {
315 * We missed the 2nd address due to a
316 * branch, so just close the range and
317 * return false.
319 cp->startline = 0;
320 r = 0;
321 } else
322 r = 1;
324 } else if (cp->a1 && MATCH(cp->a1)) {
326 * If the second address is a number less than or
327 * equal to the line number first selected, only
328 * one line shall be selected.
329 * -- POSIX 1003.2
330 * Likewise if the relative second line address is zero.
332 if ((cp->a2->type == AT_LINE &&
333 linenum >= cp->a2->u.l) ||
334 (cp->a2->type == AT_RELLINE && cp->a2->u.l == 0))
335 lastaddr = 1;
336 else {
337 cp->startline = linenum;
339 r = 1;
340 } else
341 r = 0;
342 else
343 r = MATCH(cp->a1);
344 return (cp->nonsel ? ! r : r);
348 * Reset the sed processor to its initial state.
350 void
351 resetstate(void)
353 struct s_command *cp;
356 * Reset all in-range markers.
358 for (cp = prog; cp; cp = cp->code == '{' ? cp->u.c : cp->next)
359 if (cp->a2)
360 cp->startline = 0;
363 * Clear out the hold space.
365 cspace(&HS, "", 0, REPLACE);
369 * substitute --
370 * Do substitutions in the pattern space. Currently, we build a
371 * copy of the new pattern space in the substitute space structure
372 * and then swap them.
374 static int
375 substitute(struct s_command *cp)
377 SPACE tspace;
378 regex_t *re;
379 regoff_t re_off, slen;
380 int lastempty, n;
381 char *s;
383 s = ps;
384 re = cp->u.s->re;
385 if (re == NULL) {
386 if (defpreg != NULL && cp->u.s->maxbref > defpreg->re_nsub) {
387 linenum = cp->u.s->linenum;
388 errx(1, "%lu: %s: \\%u not defined in the RE",
389 linenum, fname, cp->u.s->maxbref);
392 if (!regexec_e(re, s, 0, 0, psl))
393 return (0);
395 SS.len = 0; /* Clean substitute space. */
396 slen = (regoff_t)psl;
397 n = cp->u.s->n;
398 lastempty = 1;
400 switch (n) {
401 case 0: /* Global */
402 do {
403 if (lastempty || match[0].rm_so != match[0].rm_eo) {
404 /* Locate start of replaced string. */
405 re_off = match[0].rm_so;
406 /* Copy leading retained string. */
407 cspace(&SS, s, (size_t)re_off, APPEND);
408 /* Add in regular expression. */
409 regsub(&SS, s, cp->u.s->new);
412 /* Move past this match. */
413 if (match[0].rm_so != match[0].rm_eo) {
414 s += match[0].rm_eo;
415 slen -= match[0].rm_eo;
416 lastempty = 0;
417 } else {
418 if (match[0].rm_so < slen)
419 cspace(&SS, s + match[0].rm_so, 1,
420 APPEND);
421 s += match[0].rm_so + 1;
422 slen -= match[0].rm_so + 1;
423 lastempty = 1;
425 } while (slen >= 0 && regexec_e(re, s, REG_NOTBOL, 0, (size_t)slen));
426 /* Copy trailing retained string. */
427 if (slen > 0)
428 cspace(&SS, s, (size_t)slen, APPEND);
429 break;
430 default: /* Nth occurrence */
431 while (--n) {
432 if (match[0].rm_eo == match[0].rm_so)
433 match[0].rm_eo = match[0].rm_so + 1;
434 s += match[0].rm_eo;
435 slen -= match[0].rm_eo;
436 if (slen < 0)
437 return (0);
438 if (!regexec_e(re, s, REG_NOTBOL, 0, (size_t)slen))
439 return (0);
441 /* FALLTHROUGH */
442 case 1: /* 1st occurrence */
443 /* Locate start of replaced string. */
444 re_off = match[0].rm_so + (s - ps);
445 /* Copy leading retained string. */
446 cspace(&SS, ps, (size_t)re_off, APPEND);
447 /* Add in regular expression. */
448 regsub(&SS, s, cp->u.s->new);
449 /* Copy trailing retained string. */
450 s += match[0].rm_eo;
451 slen -= match[0].rm_eo;
452 cspace(&SS, s, (size_t)slen, APPEND);
453 break;
457 * Swap the substitute space and the pattern space, and make sure
458 * that any leftover pointers into stdio memory get lost.
460 tspace = PS;
461 PS = SS;
462 SS = tspace;
463 SS.space = SS.back;
465 /* Handle the 'p' flag. */
466 if (cp->u.s->p)
467 OUT();
469 /* Handle the 'w' flag. */
470 if (cp->u.s->wfile && !pd) {
471 if (cp->u.s->wfd == -1 && (cp->u.s->wfd = open(cp->u.s->wfile,
472 O_WRONLY|O_APPEND|O_CREAT|O_TRUNC, DEFFILEMODE)) == -1)
473 err(1, "%s", cp->u.s->wfile);
474 if (write(cp->u.s->wfd, ps, psl) != (ssize_t)psl ||
475 write(cp->u.s->wfd, "\n", 1) != 1)
476 err(1, "%s", cp->u.s->wfile);
478 return (1);
482 * do_tr --
483 * Perform translation ('y' command) in the pattern space.
485 static void
486 do_tr(struct s_tr *y)
488 SPACE tmp;
489 char c, *p;
490 size_t clen, left;
491 size_t i;
493 if (MB_CUR_MAX == 1) {
495 * Single-byte encoding: perform in-place translation
496 * of the pattern space.
498 for (p = ps; p < &ps[psl]; p++)
499 *p = (char)y->bytetab[(u_char)*p];
500 } else {
502 * Multi-byte encoding: perform translation into the
503 * translation space, then swap the translation and
504 * pattern spaces.
506 /* Clean translation space. */
507 YS.len = 0;
508 for (p = ps, left = psl; left > 0; p += clen, left -= clen) {
509 if ((c = (char)y->bytetab[(u_char)*p]) != '\0') {
510 cspace(&YS, &c, 1, APPEND);
511 clen = 1;
512 continue;
514 for (i = 0; i < y->nmultis; i++)
515 if (left >= y->multis[i].fromlen &&
516 memcmp(p, y->multis[i].from,
517 y->multis[i].fromlen) == 0)
518 break;
519 if (i < y->nmultis) {
520 cspace(&YS, y->multis[i].to,
521 y->multis[i].tolen, APPEND);
522 clen = y->multis[i].fromlen;
523 } else {
524 cspace(&YS, p, 1, APPEND);
525 clen = 1;
528 /* Swap the translation space and the pattern space. */
529 tmp = PS;
530 PS = YS;
531 YS = tmp;
532 YS.space = YS.back;
537 * Flush append requests. Always called before reading a line,
538 * therefore it also resets the substitution done (sdone) flag.
540 static void
541 flush_appends(void)
543 FILE *f;
544 size_t count, i;
545 char buf[8 * 1024];
547 for (i = 0; i < appendx; i++)
548 switch (appends[i].type) {
549 case AP_STRING:
550 fwrite(appends[i].s, sizeof(char), appends[i].len,
551 outfile);
552 break;
553 case AP_FILE:
555 * Read files probably shouldn't be cached. Since
556 * it's not an error to read a non-existent file,
557 * it's possible that another program is interacting
558 * with the sed script through the filesystem. It
559 * would be truly bizarre, but possible. It's probably
560 * not that big a performance win, anyhow.
562 if ((f = fopen(appends[i].s, "r")) == NULL)
563 break;
564 while ((count = fread(buf, sizeof(char), sizeof(buf), f)))
565 (void)fwrite(buf, sizeof(char), count, outfile);
566 (void)fclose(f);
567 break;
569 if (ferror(outfile))
570 errx(1, "%s: %s", outfname, strerror(errno ? errno : EIO));
571 appendx = 0;
572 sdone = 0;
575 static void
576 lputs(char *s, size_t len)
578 static const char escapes[] = "\\\a\b\f\r\t\v";
579 int c;
580 size_t col, width;
581 const char *p;
582 #ifdef TIOCGWINSZ
583 struct winsize win;
584 #endif
585 static size_t termwidth = (size_t)-1;
586 size_t clen, i;
587 wchar_t wc;
588 mbstate_t mbs;
590 if (outfile != stdout)
591 termwidth = 60;
592 if (termwidth == (size_t)-1) {
593 if ((p = getenv("COLUMNS")) && *p != '\0')
594 termwidth = (size_t)atoi(p);
595 #ifdef TIOCGWINSZ
596 else if (ioctl(STDOUT_FILENO, TIOCGWINSZ, &win) == 0 &&
597 win.ws_col > 0)
598 termwidth = win.ws_col;
599 #endif
600 else
601 termwidth = 60;
603 if (termwidth == 0)
604 termwidth = 1;
606 memset(&mbs, 0, sizeof(mbs));
607 col = 0;
608 while (len != 0) {
609 clen = mbrtowc(&wc, s, len, &mbs);
610 if (clen == 0)
611 clen = 1;
612 if (clen == (size_t)-1 || clen == (size_t)-2) {
613 wc = (unsigned char)*s;
614 clen = 1;
615 memset(&mbs, 0, sizeof(mbs));
617 if (wc == '\n') {
618 if (col + 1 >= termwidth)
619 fprintf(outfile, "\\\n");
620 fputc('$', outfile);
621 fputc('\n', outfile);
622 col = 0;
623 } else if (iswprint(wc)) {
624 width = (size_t)wcwidth(wc);
625 if (col + width >= termwidth) {
626 fprintf(outfile, "\\\n");
627 col = 0;
629 fwrite(s, 1, clen, outfile);
630 col += width;
631 } else if (wc != L'\0' && (c = wctob(wc)) != EOF &&
632 (p = strchr(escapes, c)) != NULL) {
633 if (col + 2 >= termwidth) {
634 fprintf(outfile, "\\\n");
635 col = 0;
637 fprintf(outfile, "\\%c", "\\abfrtv"[p - escapes]);
638 col += 2;
639 } else {
640 if (col + 4 * clen >= termwidth) {
641 fprintf(outfile, "\\\n");
642 col = 0;
644 for (i = 0; i < clen; i++)
645 fprintf(outfile, "\\%03o",
646 (int)(unsigned char)s[i]);
647 col += 4 * clen;
649 s += clen;
650 len -= clen;
652 if (col + 1 >= termwidth)
653 fprintf(outfile, "\\\n");
654 (void)fputc('$', outfile);
655 (void)fputc('\n', outfile);
656 if (ferror(outfile))
657 errx(1, "%s: %s", outfname, strerror(errno ? errno : EIO));
660 static __inline int
661 regexec_e(regex_t *preg, const char *string, int eflags, int nomatch,
662 size_t slen)
664 int eval;
665 #ifndef REG_STARTEND
666 char *buf;
667 #endif
669 if (preg == NULL) {
670 if (defpreg == NULL)
671 errx(1, "first RE may not be empty");
672 } else
673 defpreg = preg;
675 /* Set anchors */
676 #ifndef REG_STARTEND
677 buf = xmalloc(slen + 1);
678 (void)memcpy(buf, string, slen);
679 buf[slen] = '\0';
680 eval = regexec(defpreg, buf,
681 nomatch ? 0 : maxnsub + 1, match, eflags);
682 free(buf);
683 #else
684 match[0].rm_so = 0;
685 match[0].rm_eo = (regoff_t)slen;
686 eval = regexec(defpreg, string,
687 nomatch ? 0 : maxnsub + 1, match, eflags | REG_STARTEND);
688 #endif
689 switch(eval) {
690 case 0:
691 return (1);
692 case REG_NOMATCH:
693 return (0);
695 errx(1, "RE error: %s", strregerror(eval, defpreg));
696 /* NOTREACHED */
700 * regsub - perform substitutions after a regexp match
701 * Based on a routine by Henry Spencer
703 static void
704 regsub(SPACE *sp, char *string, char *src)
706 size_t len;
707 int no;
708 char c, *dst;
710 #define NEEDSP(reqlen) \
711 /* XXX What is the +1 for? */ \
712 if (sp->len + (reqlen) + 1 >= sp->blen) { \
713 sp->blen += (reqlen) + 1024; \
714 sp->space = sp->back = xrealloc(sp->back, sp->blen); \
715 dst = sp->space + sp->len; \
718 dst = sp->space + sp->len;
719 while ((c = *src++) != '\0') {
720 if (c == '&')
721 no = 0;
722 else if (c == '\\' && isdigit((unsigned char)*src))
723 no = *src++ - '0';
724 else
725 no = -1;
726 if (no < 0) { /* Ordinary character. */
727 if (c == '\\' && (*src == '\\' || *src == '&'))
728 c = *src++;
729 NEEDSP(1);
730 *dst++ = c;
731 ++sp->len;
732 } else if (match[no].rm_so != -1 && match[no].rm_eo != -1) {
733 len = (size_t)(match[no].rm_eo - match[no].rm_so);
734 NEEDSP(len);
735 memmove(dst, string + match[no].rm_so, len);
736 dst += len;
737 sp->len += len;
740 NEEDSP(1);
741 *dst = '\0';
745 * cspace --
746 * Concatenate space: append the source space to the destination space,
747 * allocating new space as necessary.
749 void
750 cspace(SPACE *sp, const char *p, size_t len, enum e_spflag spflag)
752 size_t tlen;
754 /* Make sure SPACE has enough memory and ramp up quickly. */
755 tlen = sp->len + len + 1;
756 if (tlen > sp->blen) {
757 sp->blen = tlen + 1024;
758 sp->space = sp->back = xrealloc(sp->back, sp->blen);
761 if (spflag == REPLACE)
762 sp->len = 0;
764 memmove(sp->space + sp->len, p, len);
766 sp->space[sp->len += len] = '\0';
770 * Close all cached opened files and report any errors
772 void
773 cfclose(struct s_command *cp, struct s_command *end)
776 for (; cp != end; cp = cp->next)
777 switch(cp->code) {
778 case 's':
779 if (cp->u.s->wfd != -1 && close(cp->u.s->wfd))
780 err(1, "%s", cp->u.s->wfile);
781 cp->u.s->wfd = -1;
782 break;
783 case 'w':
784 if (cp->u.fd != -1 && close(cp->u.fd))
785 err(1, "%s", cp->t);
786 cp->u.fd = -1;
787 break;
788 case '{':
789 cfclose(cp->u.c, cp->next);
790 break;