tools/llvm: Do not build with symbols
[minix3.git] / usr.bin / sed / process.c
blob64ee0b59b3f315e6981eeadd060d7c34d73f4737
1 /* $NetBSD: process.c,v 1.39 2013/03/17 21:02:54 uwe Exp $ */
3 /*-
4 * Copyright (c) 1992, 1993, 1994
5 * The Regents of the University of California. All rights reserved.
7 * This code is derived from software contributed to Berkeley by
8 * Diomidis Spinellis of Imperial College, University of London.
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 * 3. Neither the name of the University nor the names of its contributors
19 * may be used to endorse or promote products derived from this software
20 * without specific prior written permission.
22 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
23 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
24 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
25 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
26 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
27 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
28 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
29 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
30 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
31 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32 * SUCH DAMAGE.
35 /*-
36 * Copyright (c) 1992 Diomidis Spinellis.
38 * This code is derived from software contributed to Berkeley by
39 * Diomidis Spinellis of Imperial College, University of London.
41 * Redistribution and use in source and binary forms, with or without
42 * modification, are permitted provided that the following conditions
43 * are met:
44 * 1. Redistributions of source code must retain the above copyright
45 * notice, this list of conditions and the following disclaimer.
46 * 2. Redistributions in binary form must reproduce the above copyright
47 * notice, this list of conditions and the following disclaimer in the
48 * documentation and/or other materials provided with the distribution.
49 * 3. All advertising materials mentioning features or use of this software
50 * must display the following acknowledgement:
51 * This product includes software developed by the University of
52 * California, Berkeley and its contributors.
53 * 4. Neither the name of the University nor the names of its contributors
54 * may be used to endorse or promote products derived from this software
55 * without specific prior written permission.
57 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
58 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
59 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
60 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
61 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
62 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
63 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
64 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
65 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
66 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
67 * SUCH DAMAGE.
70 #if HAVE_NBTOOL_CONFIG_H
71 #include "nbtool_config.h"
72 #endif
74 #include <sys/cdefs.h>
75 #ifndef lint
76 #if 0
77 static char sccsid[] = "@(#)process.c 8.6 (Berkeley) 4/20/94";
78 #else
79 __RCSID("$NetBSD: process.c,v 1.39 2013/03/17 21:02:54 uwe Exp $");
80 #endif
81 #endif /* not lint */
83 #include <sys/types.h>
84 #include <sys/stat.h>
85 #include <sys/ioctl.h>
86 #include <sys/uio.h>
88 #include <ctype.h>
89 #include <errno.h>
90 #include <fcntl.h>
91 #include <limits.h>
92 #include <regex.h>
93 #include <stdio.h>
94 #include <stdlib.h>
95 #include <string.h>
96 #include <unistd.h>
98 #include "defs.h"
99 #include "extern.h"
101 static SPACE HS, PS, SS;
102 #define pd PS.deleted
103 #define ps PS.space
104 #define psl PS.len
105 #define hs HS.space
106 #define hsl HS.len
108 static inline int applies(struct s_command *);
109 static void flush_appends(void);
110 static void lputs(char *);
111 static inline int regexec_e(regex_t *, const char *, int, int, size_t);
112 static void regsub(SPACE *, char *, char *);
113 static int substitute(struct s_command *);
115 struct s_appends *appends; /* Array of pointers to strings to append. */
116 static int appendx; /* Index into appends array. */
117 int appendnum; /* Size of appends array. */
119 static int lastaddr; /* Set by applies if last address of a range. */
120 static int sdone; /* If any substitutes since last line input. */
121 /* Iov structure for 'w' commands. */
122 static regex_t *defpreg;
123 size_t maxnsub;
124 regmatch_t *match;
126 #define OUT(s) { fwrite(s, sizeof(u_char), psl, stdout); }
128 void
129 process(void)
131 struct s_command *cp;
132 SPACE tspace;
133 size_t len, oldpsl;
134 char *p;
136 oldpsl = 0;
137 for (linenum = 0; mf_fgets(&PS, REPLACE);) {
138 pd = 0;
139 top:
140 cp = prog;
141 redirect:
142 while (cp != NULL) {
143 if (!applies(cp)) {
144 cp = cp->next;
145 continue;
147 switch (cp->code) {
148 case '{':
149 cp = cp->u.c;
150 goto redirect;
151 case 'a':
152 if (appendx >= appendnum) {
153 appends = xrealloc(appends,
154 sizeof(struct s_appends) *
155 (appendnum * 2));
156 appendnum *= 2;
158 appends[appendx].type = AP_STRING;
159 appends[appendx].s = cp->t;
160 appends[appendx].len = strlen(cp->t);
161 appendx++;
162 break;
163 case 'b':
164 cp = cp->u.c;
165 goto redirect;
166 case 'c':
167 pd = 1;
168 psl = 0;
169 if (cp->a2 == NULL || lastaddr)
170 (void)printf("%s", cp->t);
171 goto new;
172 case 'd':
173 pd = 1;
174 goto new;
175 case 'D':
176 if (psl == 0)
177 pd = 1;
178 if (pd)
179 goto new;
180 if ((p = memchr(ps, '\n', psl - 1)) == NULL) {
181 pd = 1;
182 goto new;
183 } else {
184 psl -= (p + 1) - ps;
185 memmove(ps, p + 1, psl);
186 goto top;
188 case 'g':
189 cspace(&PS, hs, hsl, REPLACE);
190 break;
191 case 'G':
192 if (hs == NULL)
193 cspace(&HS, "\n", 1, REPLACE);
194 cspace(&PS, hs, hsl, 0);
195 break;
196 case 'h':
197 cspace(&HS, ps, psl, REPLACE);
198 break;
199 case 'H':
200 cspace(&HS, ps, psl, 0);
201 break;
202 case 'i':
203 (void)printf("%s", cp->t);
204 break;
205 case 'l':
206 lputs(ps);
207 break;
208 case 'n':
209 if (!nflag && !pd)
210 OUT(ps)
211 flush_appends();
212 if (!mf_fgets(&PS, REPLACE))
213 exit(0);
214 pd = 0;
215 break;
216 case 'N':
217 flush_appends();
218 if (!mf_fgets(&PS, 0)) {
219 if (!nflag && !pd)
220 OUT(ps)
221 exit(0);
223 break;
224 case 'p':
225 if (pd)
226 break;
227 OUT(ps)
228 break;
229 case 'P':
230 if (pd)
231 break;
232 if ((p = memchr(ps, '\n', psl - 1)) != NULL) {
233 oldpsl = psl;
234 psl = (p + 1) - ps;
236 OUT(ps)
237 if (p != NULL)
238 psl = oldpsl;
239 break;
240 case 'q':
241 if (!nflag && !pd)
242 OUT(ps)
243 flush_appends();
244 exit(0);
245 case 'r':
246 if (appendx >= appendnum) {
247 appends = xrealloc(appends,
248 sizeof(struct s_appends) *
249 (appendnum * 2));
250 appendnum *= 2;
252 appends[appendx].type = AP_FILE;
253 appends[appendx].s = cp->t;
254 appends[appendx].len = strlen(cp->t);
255 appendx++;
256 break;
257 case 's':
258 sdone |= substitute(cp);
259 break;
260 case 't':
261 if (sdone) {
262 sdone = 0;
263 cp = cp->u.c;
264 goto redirect;
266 break;
267 case 'w':
268 if (pd)
269 break;
270 if (cp->u.fd == -1 && (cp->u.fd = open(cp->t,
271 O_WRONLY|O_APPEND|O_CREAT|O_TRUNC,
272 DEFFILEMODE)) == -1)
273 err(FATAL, "%s: %s",
274 cp->t, strerror(errno));
275 if ((size_t)write(cp->u.fd, ps, psl) != psl)
276 err(FATAL, "%s: %s",
277 cp->t, strerror(errno));
278 break;
279 case 'x':
280 if (hs == NULL)
281 cspace(&HS, "\n", 1, REPLACE);
282 tspace = PS;
283 PS = HS;
284 HS = tspace;
285 break;
286 case 'y':
287 if (pd)
288 break;
289 for (p = ps, len = psl; --len; ++p)
290 *p = cp->u.y[(int)*p];
291 break;
292 case ':':
293 case '}':
294 break;
295 case '=':
296 (void)printf("%lu\n", linenum);
298 cp = cp->next;
299 } /* for all cp */
301 new: if (!nflag && !pd)
302 OUT(ps)
303 flush_appends();
304 } /* for all lines */
308 * TRUE if the address passed matches the current program state
309 * (lastline, linenumber, ps).
311 #define MATCH(a) \
312 (a)->type == AT_RE ? regexec_e((a)->u.r, ps, 0, 1, psl) : \
313 (a)->type == AT_LINE ? linenum == (a)->u.l : lastline
316 * Return TRUE if the command applies to the current line. Sets the inrange
317 * flag to process ranges. Interprets the non-select (``!'') flag.
319 static inline int
320 applies(struct s_command *cp)
322 int r;
324 lastaddr = 0;
325 if (cp->a1 == NULL && cp->a2 == NULL)
326 r = 1;
327 else if (cp->a2) {
328 if (cp->inrange) {
329 if (MATCH(cp->a2)) {
330 cp->inrange = 0;
331 lastaddr = 1;
333 r = 1;
334 } else if (cp->a1 && MATCH(cp->a1)) {
336 * If the second address is a number less than or
337 * equal to the line number first selected, only
338 * one line shall be selected.
339 * -- POSIX 1003.2
341 if (cp->a2->type == AT_LINE &&
342 linenum >= cp->a2->u.l)
343 lastaddr = 1;
344 else
345 cp->inrange = 1;
346 r = 1;
347 } else
348 r = 0;
349 } else
350 r = MATCH(cp->a1);
351 return (cp->nonsel ? ! r : r);
355 * substitute --
356 * Do substitutions in the pattern space. Currently, we build a
357 * copy of the new pattern space in the substitute space structure
358 * and then swap them.
360 static int
361 substitute(struct s_command *cp)
363 SPACE tspace;
364 regex_t *re;
365 size_t re_off, slen;
366 int lastempty, n;
367 char *s;
369 s = ps;
370 re = cp->u.s->re;
371 if (re == NULL) {
372 if (defpreg != NULL && (size_t)cp->u.s->maxbref > defpreg->re_nsub) {
373 linenum = cp->u.s->linenum;
374 err(COMPILE, "\\%d not defined in the RE",
375 cp->u.s->maxbref);
378 if (!regexec_e(re, s, 0, 0, psl))
379 return (0);
381 SS.len = 0; /* Clean substitute space. */
382 slen = psl;
383 n = cp->u.s->n;
384 lastempty = 1;
386 switch (n) {
387 case 0: /* Global */
388 do {
389 if (lastempty || match[0].rm_so != match[0].rm_eo) {
390 /* Locate start of replaced string. */
391 re_off = match[0].rm_so;
392 /* Copy leading retained string. */
393 cspace(&SS, s, re_off, APPEND);
394 /* Add in regular expression. */
395 regsub(&SS, s, cp->u.s->new);
398 /* Move past this match. */
399 if (match[0].rm_so != match[0].rm_eo) {
400 s += match[0].rm_eo;
401 slen -= match[0].rm_eo;
402 lastempty = 0;
403 } else {
404 if (match[0].rm_so == 0)
405 cspace(&SS,
406 s, match[0].rm_so + 1, APPEND);
407 else
408 cspace(&SS,
409 s + match[0].rm_so, 1, APPEND);
410 s += match[0].rm_so + 1;
411 slen -= match[0].rm_so + 1;
412 lastempty = 1;
414 } while (slen > 0 && regexec_e(re, s, REG_NOTBOL, 0, slen));
415 /* Copy trailing retained string. */
416 if (slen > 0)
417 cspace(&SS, s, slen, APPEND);
418 break;
419 default: /* Nth occurrence */
420 while (--n) {
421 s += match[0].rm_eo;
422 slen -= match[0].rm_eo;
423 if (!regexec_e(re, s, REG_NOTBOL, 0, slen))
424 return (0);
426 /* FALLTHROUGH */
427 case 1: /* 1st occurrence */
428 /* Locate start of replaced string. */
429 re_off = match[0].rm_so + (s - ps);
430 /* Copy leading retained string. */
431 cspace(&SS, ps, re_off, APPEND);
432 /* Add in regular expression. */
433 regsub(&SS, s, cp->u.s->new);
434 /* Copy trailing retained string. */
435 s += match[0].rm_eo;
436 slen -= match[0].rm_eo;
437 cspace(&SS, s, slen, APPEND);
438 break;
442 * Swap the substitute space and the pattern space, and make sure
443 * that any leftover pointers into stdio memory get lost.
445 tspace = PS;
446 PS = SS;
447 SS = tspace;
448 SS.space = SS.back;
450 /* Handle the 'p' flag. */
451 if (cp->u.s->p)
452 OUT(ps)
454 /* Handle the 'w' flag. */
455 if (cp->u.s->wfile && !pd) {
456 if (cp->u.s->wfd == -1 && (cp->u.s->wfd = open(cp->u.s->wfile,
457 O_WRONLY|O_APPEND|O_CREAT|O_TRUNC, DEFFILEMODE)) == -1)
458 err(FATAL, "%s: %s", cp->u.s->wfile, strerror(errno));
459 if ((size_t)write(cp->u.s->wfd, ps, psl) != psl)
460 err(FATAL, "%s: %s", cp->u.s->wfile, strerror(errno));
462 return (1);
466 * Flush append requests. Always called before reading a line,
467 * therefore it also resets the substitution done (sdone) flag.
469 static void
470 flush_appends(void)
472 FILE *f;
473 int count, i;
474 char buf[8 * 1024];
476 for (i = 0; i < appendx; i++)
477 switch (appends[i].type) {
478 case AP_STRING:
479 fwrite(appends[i].s, sizeof(char), appends[i].len,
480 stdout);
481 break;
482 case AP_FILE:
484 * Read files probably shouldn't be cached. Since
485 * it's not an error to read a non-existent file,
486 * it's possible that another program is interacting
487 * with the sed script through the file system. It
488 * would be truly bizarre, but possible. It's probably
489 * not that big a performance win, anyhow.
491 if ((f = fopen(appends[i].s, "r")) == NULL)
492 break;
493 while ((count =
494 fread(buf, sizeof(char), sizeof(buf), f)) > 0)
495 (void)fwrite(buf, sizeof(char), count, stdout);
496 (void)fclose(f);
497 break;
499 if (ferror(stdout))
500 err(FATAL, "stdout: %s", strerror(errno ? errno : EIO));
501 appendx = sdone = 0;
504 static void
505 lputs(char *s)
507 int count;
508 const char *escapes, *p;
509 #ifndef HAVE_NBTOOL_CONFIG_H
510 struct winsize win;
511 #endif
512 static int termwidth = -1;
514 if (termwidth == -1) {
515 if ((p = getenv("COLUMNS")) != NULL)
516 termwidth = atoi(p);
517 #ifndef HAVE_NBTOOL_CONFIG_H
518 else if (ioctl(STDOUT_FILENO, TIOCGWINSZ, &win) == 0 &&
519 win.ws_col > 0)
520 termwidth = win.ws_col;
521 #endif
522 else
523 termwidth = 60;
525 for (count = 0; *s; ++s) {
526 if (count >= termwidth) {
527 (void)printf("\\\n");
528 count = 0;
530 if (isascii((unsigned char)*s) && isprint((unsigned char)*s) &&
531 *s != '\\') {
532 (void)putchar(*s);
533 count++;
534 } else {
535 escapes = "\\\a\b\f\n\r\t\v";
536 (void)putchar('\\');
537 if ((p = strchr(escapes, *s)) != NULL) {
538 (void)putchar("\\abfnrtv"[p - escapes]);
539 count += 2;
540 } else {
541 (void)printf("%03o", *(u_char *)s);
542 count += 4;
546 (void)putchar('$');
547 (void)putchar('\n');
548 if (ferror(stdout))
549 err(FATAL, "stdout: %s", strerror(errno ? errno : EIO));
552 static inline int
553 regexec_e(regex_t *preg, const char *string, int eflags, int nomatch, size_t slen)
555 int eval;
556 #ifndef REG_STARTEND
557 char *buf;
558 #endif
560 if (preg == NULL) {
561 if (defpreg == NULL)
562 err(FATAL, "first RE may not be empty");
563 } else
564 defpreg = preg;
566 /* Set anchors, discounting trailing newline (if any). */
567 if (slen > 0 && string[slen - 1] == '\n')
568 slen--;
570 #ifndef REG_STARTEND
571 if ((buf = malloc(slen + 1)) == NULL)
572 err(1, NULL);
573 (void)memcpy(buf, string, slen);
574 buf[slen] = '\0';
575 eval = regexec(defpreg, buf,
576 nomatch ? 0 : maxnsub + 1, match, eflags);
577 free(buf);
578 #else
579 match[0].rm_so = 0;
580 match[0].rm_eo = slen;
581 eval = regexec(defpreg, string,
582 nomatch ? 0 : maxnsub + 1, match, eflags | REG_STARTEND);
583 #endif
584 switch(eval) {
585 case 0:
586 return (1);
587 case REG_NOMATCH:
588 return (0);
590 err(FATAL, "RE error: %s", strregerror(eval, defpreg));
591 /* NOTREACHED */
592 return (0);
596 * regsub - perform substitutions after a regexp match
597 * Based on a routine by Henry Spencer
599 static void
600 regsub(SPACE *sp, char *string, char *src)
602 int len, no;
603 char c, *dst;
605 #define NEEDSP(reqlen) \
606 if (sp->len + (reqlen) + 1 >= sp->blen) { \
607 size_t newlen = sp->blen + (reqlen) + 1024; \
608 sp->space = sp->back = xrealloc(sp->back, newlen); \
609 sp->blen = newlen; \
610 dst = sp->space + sp->len; \
613 dst = sp->space + sp->len;
614 while ((c = *src++) != '\0') {
615 if (c == '&')
616 no = 0;
617 else if (c == '\\' && isdigit((unsigned char)*src))
618 no = *src++ - '0';
619 else
620 no = -1;
621 if (no < 0) { /* Ordinary character. */
622 if (c == '\\' && (*src == '\\' || *src == '&'))
623 c = *src++;
624 NEEDSP(1);
625 *dst++ = c;
626 ++sp->len;
627 } else if (match[no].rm_so != -1 && match[no].rm_eo != -1) {
628 len = match[no].rm_eo - match[no].rm_so;
629 NEEDSP(len);
630 memmove(dst, string + match[no].rm_so, len);
631 dst += len;
632 sp->len += len;
635 NEEDSP(1);
636 *dst = '\0';
640 * aspace --
641 * Append the source space to the destination space, allocating new
642 * space as necessary.
644 void
645 cspace(SPACE *sp, const char *p, size_t len, enum e_spflag spflag)
647 size_t tlen;
649 /* Make sure SPACE has enough memory and ramp up quickly. */
650 tlen = sp->len + len + 1;
651 if (tlen > sp->blen) {
652 size_t newlen = tlen + 1024;
653 sp->space = sp->back = xrealloc(sp->back, newlen);
654 sp->blen = newlen;
657 if (spflag == REPLACE)
658 sp->len = 0;
660 memmove(sp->space + sp->len, p, len);
662 sp->space[sp->len += len] = '\0';
666 * Close all cached opened files and report any errors
668 void
669 cfclose(struct s_command *cp, struct s_command *end)
672 for (; cp != end; cp = cp->next)
673 switch(cp->code) {
674 case 's':
675 if (cp->u.s->wfd != -1 && close(cp->u.s->wfd))
676 err(FATAL,
677 "%s: %s", cp->u.s->wfile, strerror(errno));
678 cp->u.s->wfd = -1;
679 break;
680 case 'w':
681 if (cp->u.fd != -1 && close(cp->u.fd))
682 err(FATAL, "%s: %s", cp->t, strerror(errno));
683 cp->u.fd = -1;
684 break;
685 case '{':
686 cfclose(cp->u.c, cp->next);
687 break;