don't need to explicitly enable lance any more.
[minix.git] / commands / sed / sedcomp.c
blob95eb8c54969099a19a2d72d81d8f545fe6206956
1 /* sedcomp.c -- stream editor main and compilation phase
2 Copyright (C) 1995-2003 Eric S. Raymond
3 Copyright (C) 2004-2006 Rene Rebe
5 The stream editor compiles its command input (from files or -e options)
6 into an internal form using compile() then executes the compiled form using
7 execute(). Main() just initializes data structures, interprets command line
8 options, and calls compile() and execute() in appropriate sequence.
9 The data structure produced by compile() is an array of compiled-command
10 structures (type sedcmd). These contain several pointers into pool[], the
11 regular-expression and text-data pool, plus a command code and g & p flags.
12 In the special case that the command is a label the struct will hold a ptr
13 into the labels array labels[] during most of the compile, until resolve()
14 resolves references at the end.
15 The operation of execute() is described in its source module.
18 #include <stdlib.h> /* exit */
19 #include <stdio.h> /* uses getc, fprintf, fopen, fclose */
20 #include <ctype.h> /* isdigit */
21 #include <string.h> /* strcmp */
22 #include "sed.h" /* command type struct and name defines */
24 /***** public stuff ******/
26 #define MAXCMDS 200 /* maximum number of compiled commands */
27 #define MAXLINES 256 /* max # numeric addresses to compile */
29 /* main data areas */
30 char linebuf[MAXBUF+1]; /* current-line buffer */
31 sedcmd cmds[MAXCMDS+1]; /* hold compiled commands */
32 long linenum[MAXLINES]; /* numeric-addresses table */
34 /* miscellaneous shared variables */
35 int nflag; /* -n option flag */
36 int eargc; /* scratch copy of argument count */
37 sedcmd *pending = NULL; /* next command to be executed */
39 int last_line_used = 0; /* last line address ($) was used */
41 void die (const char* msg) {
42 fprintf(stderr, "sed: ");
43 fprintf(stderr, msg, linebuf);
44 fprintf(stderr, "\n");
45 exit(2);
48 /***** module common stuff *****/
50 #define POOLSIZE 10000 /* size of string-pool space */
51 #define WFILES 10 /* max # w output files that can be compiled */
52 #define RELIMIT 256 /* max chars in compiled RE */
53 #define MAXDEPTH 20 /* maximum {}-nesting level */
54 #define MAXLABS 50 /* max # of labels that can be handled */
56 #define SKIPWS(pc) while ((*pc==' ') || (*pc=='\t')) pc++
57 #define IFEQ(x, v) if (*x == v) x++ , /* do expression */
59 /* error messages */
60 static char AGMSG[] = "garbled address %s";
61 static char CGMSG[] = "garbled command %s";
62 static char TMTXT[] = "too much text: %s";
63 static char AD1NG[] = "no addresses allowed for %s";
64 static char AD2NG[] = "only one address allowed for %s";
65 static char TMCDS[] = "too many commands, last was %s";
66 static char COCFI[] = "cannot open command-file %s";
67 static char UFLAG[] = "unknown flag %c";
68 /*static char COOFI[] = "cannot open %s";*/
69 static char CCOFI[] = "cannot create %s";
70 static char ULABL[] = "undefined label %s";
71 static char TMLBR[] = "too many {'s";
72 static char FRENL[] = "first RE must be non-null";
73 static char NSCAX[] = "no such command as %s";
74 static char TMRBR[] = "too many }'s";
75 static char DLABL[] = "duplicate label %s";
76 static char TMLAB[] = "too many labels: %s";
77 static char TMWFI[] = "too many w files";
78 static char REITL[] = "RE too long: %s";
79 static char TMLNR[] = "too many line numbers";
80 static char TRAIL[] = "command \"%s\" has trailing garbage";
81 static char RETER[] = "RE not terminated: %s";
82 static char CCERR[] = "unknown character class: %s";
84 /* cclass to c function mapping ,-) */
85 const char* cclasses[] = {
86 "alnum", "a-zA-Z0-9",
87 "lower", "a-z",
88 "space", " \f\n\r\t\v",
89 "alpha", "a-zA-Z",
90 "digit", "0-9",
91 "upper", "A-Z",
92 "blank", " \t",
93 "xdigit", "0-9A-Fa-f",
94 "cntrl", "\x01-\x1f\x7e",
95 "print", " -\x7e",
96 "graph", "!-\x7e",
97 "punct", "!-/:-@[-`{-\x7e",
98 NULL, NULL};
100 typedef struct /* represent a command label */
102 char *name; /* the label name */
103 sedcmd *last; /* it's on the label search list */
104 sedcmd *address; /* pointer to the cmd it labels */
105 } label;
107 /* label handling */
108 static label labels[MAXLABS]; /* here's the label table */
109 static label *lab = labels + 1; /* pointer to current label */
110 static label *lablst = labels; /* header for search list */
112 /* string pool for regular expressions, append text, etc. etc. */
113 static char pool[POOLSIZE]; /* the pool */
114 static char *fp = pool; /* current pool pointer */
115 static char *poolend = pool + POOLSIZE; /* pointer past pool end */
117 /* compilation state */
118 static FILE *cmdf = NULL; /* current command source */
119 static char *cp = linebuf; /* compile pointer */
120 static sedcmd *cmdp = cmds; /* current compiled-cmd ptr */
121 static char *lastre = NULL; /* old RE pointer */
122 static int bdepth = 0; /* current {}-nesting level */
123 static int bcount = 0; /* # tagged patterns in current RE */
124 static char **eargv; /* scratch copy of argument list */
126 /* compilation flags */
127 static int eflag; /* -e option flag */
128 static int gflag; /* -g option flag */
130 /* prototypes */
131 static char *address(char *expbuf);
132 static char *gettext(char* txp);
133 static char *recomp(char *expbuf, char redelim);
134 static char *rhscomp(char* rhsp, char delim);
135 static char *ycomp(char *ep, char delim);
136 static int cmdcomp(char cchar);
137 static int cmdline(char *cbuf);
138 static label *search(label *ptr);
139 static void compile(void);
140 static void resolve(void);
142 /* sedexec.c protypes */
143 void execute(char* file);
145 /* main sequence of the stream editor */
146 int main(int argc, char *argv[])
148 eargc = argc; /* set local copy of argument count */
149 eargv = argv; /* set local copy of argument list */
150 cmdp->addr1 = pool; /* 1st addr expand will be at pool start */
151 if (eargc == 1)
152 exit(0); /* exit immediately if no arguments */
154 /* scan through the arguments, interpreting each one */
155 while ((--eargc > 0) && (**++eargv == '-'))
156 switch (eargv[0][1])
158 case 'e':
159 eflag++; compile(); /* compile with e flag on */
160 eflag = 0;
161 continue; /* get another argument */
162 case 'f':
163 if (eargc-- <= 0) /* barf if no -f file */
164 exit(2);
165 if ((cmdf = fopen(*++eargv, "r")) == NULL)
167 fprintf(stderr, COCFI, *eargv);
168 exit(2);
170 compile(); /* file is O.K., compile it */
171 fclose(cmdf);
172 continue; /* go back for another argument */
173 case 'g':
174 gflag++; /* set global flag on all s cmds */
175 continue;
176 case 'n':
177 nflag++; /* no print except on p flag or w */
178 continue;
179 default:
180 fprintf(stdout, UFLAG, eargv[0][1]);
181 continue;
184 if (cmdp == cmds) /* no commands have been compiled */
186 eargv--; eargc++;
187 eflag++; compile(); eflag = 0;
188 eargv++; eargc--;
191 if (bdepth) /* we have unbalanced squigglies */
192 die(TMLBR);
194 lablst->address = cmdp; /* set up header of label linked list */
195 resolve(); /* resolve label table indirections */
196 if (eargc <= 0) /* if there were no -e commands */
197 execute(NULL); /* execute commands from stdin only */
198 else while(--eargc>=0) /* else execute only -e commands */
199 execute(*eargv++);
200 exit(0); /* everything was O.K. if we got here */
203 #define H 0x80 /* 128 bit, on if there's really code for command */
204 #define LOWCMD 56 /* = '8', lowest char indexed in cmdmask */
206 /* indirect through this to get command internal code, if it exists */
207 static char cmdmask[] =
209 0, 0, H, 0, 0, H+EQCMD,0, 0,
210 0, 0, 0, 0, H+CDCMD,0, 0, CGCMD,
211 CHCMD, 0, 0, 0, H+CLCMD,0, CNCMD, 0,
212 CPCMD, 0, 0, 0, H+CTCMD,0, 0, H+CWCMD,
213 0, 0, 0, 0, 0, 0, 0, 0,
214 0, H+ACMD, H+BCMD, H+CCMD, DCMD, 0, 0, GCMD,
215 HCMD, H+ICMD, 0, 0, H+LCMD, 0, NCMD, 0,
216 PCMD, H+QCMD, H+RCMD, H+SCMD, H+TCMD, 0, 0, H+WCMD,
217 XCMD, H+YCMD, 0, H+BCMD, 0, H, 0, 0,
220 /* precompile sed commands out of a file */
221 static void compile(void)
223 char ccode;
225 for(;;) /* main compilation loop */
227 SKIPWS(cp);
228 if (*cp == ';') {
229 cp++;
230 SKIPWS(cp);
233 if (*cp == '\0' || *cp == '#') /* get a new command line */
234 if (cmdline(cp = linebuf) < 0)
235 break;
236 SKIPWS(cp);
238 if (*cp == '\0' || *cp == '#') /* a comment */
239 continue;
241 /* compile first address */
242 if (fp > poolend)
243 die(TMTXT);
244 else if ((fp = address(cmdp->addr1 = fp)) == BAD)
245 die(AGMSG);
247 if (fp == cmdp->addr1) /* if empty RE was found */
249 if (lastre) /* if there was previous RE */
250 cmdp->addr1 = lastre; /* use it */
251 else
252 die(FRENL);
254 else if (fp == NULL) /* if fp was NULL */
256 fp = cmdp->addr1; /* use current pool location */
257 cmdp->addr1 = NULL;
259 else
261 lastre = cmdp->addr1;
262 if (*cp == ',' || *cp == ';') /* there's 2nd addr */
264 cp++;
265 if (fp > poolend) die(TMTXT);
266 fp = address(cmdp->addr2 = fp);
267 if (fp == BAD || fp == NULL) die(AGMSG);
268 if (fp == cmdp->addr2)
269 cmdp->addr2 = lastre;
270 else
271 lastre = cmdp->addr2;
273 else
274 cmdp->addr2 = NULL; /* no 2nd address */
276 if (fp > poolend) die(TMTXT);
278 SKIPWS(cp); /* discard whitespace after address */
280 if (*cp == '!') {
281 cmdp->flags.allbut = 1;
282 cp++; SKIPWS(cp);
285 /* get cmd char, range-check it */
286 if ((*cp < LOWCMD) || (*cp > '~')
287 || ((ccode = cmdmask[*cp - LOWCMD]) == 0))
288 die(NSCAX);
290 cmdp->command = ccode & ~H; /* fill in command value */
291 if ((ccode & H) == 0) /* if no compile-time code */
292 cp++; /* discard command char */
293 else if (cmdcomp(*cp++)) /* execute it; if ret = 1 */
294 continue; /* skip next line read */
296 if (++cmdp >= cmds + MAXCMDS) die(TMCDS);
298 SKIPWS(cp); /* look for trailing stuff */
299 if (*cp != '\0')
301 if (*cp == ';')
303 continue;
305 else if (*cp != '#' && *cp != '}')
306 die(TRAIL);
311 /* compile a single command */
312 static int cmdcomp(char cchar)
314 static sedcmd **cmpstk[MAXDEPTH]; /* current cmd stack for {} */
315 static const char *fname[WFILES]; /* w file name pointers */
316 static FILE *fout[WFILES]; /* w file file ptrs */
317 static int nwfiles = 2; /* count of open w files */
318 int i; /* indexing dummy used in w */
319 sedcmd *sp1, *sp2; /* temps for label searches */
320 label *lpt; /* ditto, and the searcher */
321 char redelim; /* current RE delimiter */
323 fout[0] = stdout;
324 fout[1] = stderr;
326 fname[0] = "/dev/stdout";
327 fname[1] = "/dev/stderr";
329 switch(cchar)
331 case '{': /* start command group */
332 cmdp->flags.allbut = !cmdp->flags.allbut;
333 cmpstk[bdepth++] = &(cmdp->u.link);
334 if (++cmdp >= cmds + MAXCMDS) die(TMCDS);
335 if (*cp == '\0') *cp++ = ';', *cp = '\0'; /* get next cmd w/o lineread */
336 return(1);
338 case '}': /* end command group */
339 if (cmdp->addr1) die(AD1NG); /* no addresses allowed */
340 if (--bdepth < 0) die(TMRBR); /* too many right braces */
341 *cmpstk[bdepth] = cmdp; /* set the jump address */
342 return(1);
344 case '=': /* print current source line number */
345 case 'q': /* exit the stream editor */
346 if (cmdp->addr2) die(AD2NG);
347 break;
349 case ':': /* label declaration */
350 if (cmdp->addr1) die(AD1NG); /* no addresses allowed */
351 fp = gettext(lab->name = fp); /* get the label name */
352 if ((lpt = search(lab))) /* does it have a double? */
354 if (lpt->address) die(DLABL); /* yes, abort */
356 else /* check that it doesn't overflow label table */
358 lab->last = NULL;
359 lpt = lab;
360 if (++lab >= labels + MAXLABS) die(TMLAB);
362 lpt->address = cmdp;
363 return(1);
365 case 'b': /* branch command */
366 case 't': /* branch-on-succeed command */
367 case 'T': /* branch-on-fail command */
368 SKIPWS(cp);
369 if (*cp == '\0') /* if branch is to start of cmds... */
371 /* add current command to end of label last */
372 if ((sp1 = lablst->last))
374 while((sp2 = sp1->u.link))
375 sp1 = sp2;
376 sp1->u.link = cmdp;
378 else /* lablst->last == NULL */
379 lablst->last = cmdp;
380 break;
382 fp = gettext(lab->name = fp); /* else get label into pool */
383 if ((lpt = search(lab))) /* enter branch to it */
385 if (lpt->address)
386 cmdp->u.link = lpt->address;
387 else
389 sp1 = lpt->last;
390 while((sp2 = sp1->u.link))
391 sp1 = sp2;
392 sp1->u.link = cmdp;
395 else /* matching named label not found */
397 lab->last = cmdp; /* add the new label */
398 lab->address = NULL; /* it's forward of here */
399 if (++lab >= labels + MAXLABS) /* overflow if last */
400 die(TMLAB);
402 break;
404 case 'a': /* append text */
405 case 'i': /* insert text */
406 case 'r': /* read file into stream */
407 if (cmdp->addr2) die(AD2NG);
408 case 'c': /* change text */
409 if ((*cp == '\\') && (*++cp == '\n')) cp++;
410 fp = gettext(cmdp->u.lhs = fp);
411 break;
413 case 'D': /* delete current line in hold space */
414 cmdp->u.link = cmds;
415 break;
417 case 's': /* substitute regular expression */
418 if (*cp == 0) /* get delimiter from 1st ch */
419 die(RETER);
420 else
421 redelim = *cp++;
423 if ((fp = recomp(cmdp->u.lhs = fp, redelim)) == BAD)
424 die(CGMSG);
425 if (fp == cmdp->u.lhs) { /* if compiled RE zero len */
426 if (lastre) {
427 cmdp->u.lhs = lastre; /* use the previous one */
428 cp++; /* skip delim */
430 else
431 die(FRENL);
433 else /* otherwise */
434 lastre = cmdp->u.lhs; /* save the one just found */
436 if ((cmdp->rhs = fp) > poolend) die(TMTXT);
437 if ((fp = rhscomp(cmdp->rhs, redelim)) == BAD) die(CGMSG);
438 if (gflag) cmdp->flags.global++;
439 while (*cp == 'g' || *cp == 'p' || *cp == 'P' || isdigit(*cp))
441 IFEQ(cp, 'g') cmdp->flags.global++;
442 IFEQ(cp, 'p') cmdp->flags.print = 1;
443 IFEQ(cp, 'P') cmdp->flags.print = 2;
444 if(isdigit(*cp))
446 if (cmdp->nth)
447 break; /* no multiple n args */
449 cmdp->nth = atoi(cp); /* check 0? */
450 while (isdigit(*cp)) cp++;
454 case 'l': /* list pattern space */
455 case 'L': /* dump pattern space */
456 if (*cp == 'w')
457 cp++; /* and execute a w command! */
458 else
459 break; /* s or L or l is done */
461 case 'w': /* write-pattern-space command */
462 case 'W': /* write-first-line command */
463 if (nwfiles >= WFILES) die(TMWFI);
464 fname[nwfiles] = fp;
465 fp = gettext((fname[nwfiles] = fp, fp)); /* filename will be in pool */
466 for(i = nwfiles-1; i >= 0; i--) /* match it in table */
467 if (strcmp(fname[nwfiles], fname[i]) == 0)
469 cmdp->fout = fout[i];
470 return(0);
472 /* if didn't find one, open new out file */
473 if ((cmdp->fout = fopen(fname[nwfiles], "w")) == NULL)
475 fprintf(stderr, CCOFI, fname[nwfiles]);
476 exit(2);
478 fout[nwfiles++] = cmdp->fout;
479 break;
481 case 'y': /* transliterate text */
482 fp = ycomp(cmdp->u.lhs = fp, *cp++); /* compile translit */
483 if (fp == BAD) die(CGMSG); /* fail on bad form */
484 if (fp > poolend) die(TMTXT); /* fail on overflow */
485 break;
487 return(0); /* succeeded in interpreting one command */
490 /* generate replacement string for substitute command right hand side
491 rhsp: place to compile expression to
492 delim: regular-expression end-mark to look for */
493 static char *rhscomp(char* rhsp, char delim) /* uses bcount */
495 register char *p = cp;
497 for(;;)
498 /* copy for the likely case it is not s.th. special */
499 if ((*rhsp = *p++) == '\\') /* back reference or escape */
501 if (*p >= '0' && *p <= '9') /* back reference */
503 dobackref:
504 *rhsp = *p++;
505 /* check validity of pattern tag */
506 if (*rhsp > bcount + '0')
507 return(BAD);
508 *rhsp++ |= 0x80; /* mark the good ones */
510 else /* escape */
512 switch (*p) {
513 case 'n': *rhsp = '\n'; break;
514 case 'r': *rhsp = '\r'; break;
515 case 't': *rhsp = '\t'; break;
516 default: *rhsp = *p;
518 rhsp++; p++;
521 else if (*rhsp == delim) /* found RE end, hooray... */
523 *rhsp++ = '\0'; /* cap the expression string */
524 cp = p;
525 return(rhsp); /* pt at 1 past the RE */
527 else if (*rhsp == '&') /* special case, convert to backref \0 */
529 *--p = '0';
530 goto dobackref;
532 else if (*rhsp++ == '\0') /* last ch not RE end, help! */
533 return(BAD);
536 /* compile a regular expression to internal form
537 expbuf: place to compile it to
538 redelim: RE end-marker to look for */
539 static char *recomp(char *expbuf, char redelim) /* uses cp, bcount */
541 register char *ep = expbuf; /* current-compiled-char pointer */
542 register char *sp = cp; /* source-character ptr */
543 register int c; /* current-character pointer */
544 char negclass; /* all-but flag */
545 char *lastep; /* ptr to last expr compiled */
546 char *lastep2; /* dito, but from the last loop */
547 char *svclass; /* start of current char class */
548 char brnest[MAXTAGS]; /* bracket-nesting array */
549 char *brnestp; /* ptr to current bracket-nest */
550 char *pp; /* scratch pointer */
551 int classct; /* class element count */
552 int tags; /* # of closed tags */
554 if (*cp == redelim) { /* if first char is RE endmarker */
555 return(ep);
558 lastep = lastep2 = NULL; /* there's no previous RE */
559 brnestp = brnest; /* initialize ptr to brnest array */
560 tags = bcount = 0; /* initialize counters */
562 if ((*ep++ = (*sp == '^'))) /* check for start-of-line syntax */
563 sp++;
565 for (;;)
567 if (*sp == 0) /* no termination */
568 die (RETER);
569 if (ep >= expbuf + RELIMIT) /* match is too large */
570 return(cp = sp, BAD);
571 if ((c = *sp++) == redelim) /* found the end of the RE */
573 cp = sp;
574 if (brnestp != brnest) /* \(, \) unbalanced */
575 return(BAD);
576 *ep++ = CEOF; /* write end-of-pattern mark */
577 return(ep); /* return ptr to compiled RE */
580 lastep = lastep2;
581 lastep2 = ep;
583 switch (c)
585 case '\\':
586 if ((c = *sp++) == '(') /* start tagged section */
588 if (bcount >= MAXTAGS)
589 return(cp = sp, BAD);
590 *brnestp++ = bcount; /* update tag stack */
591 *ep++ = CBRA; /* enter tag-start */
592 *ep++ = bcount++; /* bump tag count */
593 lastep2 = NULL;
594 continue;
596 else if (c == ')') /* end tagged section */
598 if (brnestp <= brnest) /* extra \) */
599 return(cp = sp, BAD);
600 *ep++ = CKET; /* enter end-of-tag */
601 *ep++ = *--brnestp; /* pop tag stack */
602 tags++; /* count closed tags */
603 for (lastep2 = ep-1; *lastep2 != CBRA; )
604 --lastep2; /* FIXME: lastep becomes start */
605 continue;
607 else if (c >= '1' && c <= '9' && c != redelim) /* tag use, if !delim */
609 if ((c -= '1') >= tags) /* too few */
610 return(BAD);
611 *ep++ = CBACK; /* enter tag mark */
612 *ep++ = c; /* and the number */
613 continue;
615 else if (c == '\n') /* escaped newline no good */
616 return(cp = sp, BAD);
617 else if (c == 'n') /* match a newline */
618 c = '\n';
619 else if (c == 't') /* match a tab */
620 c = '\t';
621 else if (c == 'r') /* match a return */
622 c = '\r';
623 else if (c == '+') /* 1..n repeat of previous pattern */
625 if (lastep == NULL) /* if + not first on line */
626 goto defchar; /* match a literal + */
627 pp = ep; /* else save old ep */
628 *ep++ = *lastep++ | STAR; /* flag the copy */
629 while (lastep < pp) /* so we can blt the pattern */
630 *ep++ = *lastep++;
631 lastep2 = lastep; /* no new expression */
632 continue;
634 goto defchar; /* else match \c */
636 case '\0': /* ignore nuls */
637 continue;
639 case '\n': /* trailing pattern delimiter is missing */
640 return(cp = sp, BAD);
642 case '.': /* match any char except newline */
643 *ep++ = CDOT;
644 continue;
646 case '*': /* 0..n repeat of previous pattern */
647 if (lastep == NULL) /* if * isn't first on line */
648 goto defchar; /* match a literal * */
649 *lastep |= STAR; /* flag previous pattern */
650 lastep2 = lastep; /* no new expression */
651 continue;
653 case '$': /* match only end-of-line */
654 if (*sp != redelim) /* if we're not at end of RE */
655 goto defchar; /* match a literal $ */
656 *ep++ = CDOL; /* insert end-symbol mark */
657 continue;
659 case '[': /* begin character set pattern */
660 if (ep + 17 >= expbuf + RELIMIT)
661 die(REITL);
662 *ep++ = CCL; /* insert class mark */
663 if ((negclass = ((c = *sp++) == '^')))
664 c = *sp++;
665 svclass = sp; /* save ptr to class start */
666 do {
667 if (c == '\0') die(CGMSG);
668 /* handle predefined character classes */
669 if (c == '[' && *sp == ':')
671 /* look for the matching ":]]" */
672 char *p;
673 const char *p2;
674 for (p = sp+3; *p; p++)
675 if (*p == ']' &&
676 *(p-1) == ']' &&
677 *(p-2) == ':')
679 char cc[8];
680 const char **it;
681 p2 = sp+1;
682 for (p2 = sp+1;
683 p2 < p-2 && p2-sp-1 < sizeof(cc);
684 p2++)
685 cc[p2-sp-1] = *p2;
686 cc[p2-sp-1] = 0; /* termination */
688 it = cclasses;
689 while (*it && strcmp(*it, cc))
690 it +=2;
691 if (!*it++)
692 die(CCERR);
694 /* generate mask */
695 p2 = *it;
696 while (*p2) {
697 if (p2[1] == '-' && p2[2]) {
698 for (c = *p2; c <= p2[2]; c++)
699 ep[c >> 3] |= bits(c & 7);
700 p2 += 3;
702 else {
703 c = *p2++;
704 ep[c >> 3] |= bits(c & 7);
707 sp = p; c = 0; break;
711 /* handle character ranges */
712 if (c == '-' && sp > svclass && *sp != ']')
713 for (c = sp[-2]; c < *sp; c++)
714 ep[c >> 3] |= bits(c & 7);
716 /* handle escape sequences in sets */
717 if (c == '\\')
719 if ((c = *sp++) == 'n')
720 c = '\n';
721 else if (c == 't')
722 c = '\t';
723 else if (c == 'r')
724 c = '\r';
727 /* enter (possibly translated) char in set */
728 if (c)
729 ep[c >> 3] |= bits(c & 7);
730 } while
731 ((c = *sp++) != ']');
733 /* invert the bitmask if all-but was specified */
734 if (negclass)
735 for(classct = 0; classct < 16; classct++)
736 ep[classct] ^= 0xFF;
737 ep[0] &= 0xFE; /* never match ASCII 0 */
738 ep += 16; /* advance ep past set mask */
739 continue;
741 defchar: /* match literal character */
742 default: /* which is what we'd do by default */
743 *ep++ = CCHR; /* insert character mark */
744 *ep++ = c;
749 /* read next command from -e argument or command file */
750 static int cmdline(char *cbuf) /* uses eflag, eargc, cmdf */
752 register int inc; /* not char because must hold EOF */
754 cbuf--; /* so pre-increment points us at cbuf */
756 /* e command flag is on */
757 if (eflag)
759 register char *p; /* ptr to current -e argument */
760 static char *savep; /* saves previous value of p */
762 if (eflag > 0) /* there are pending -e arguments */
764 eflag = -1;
765 if (eargc-- <= 0)
766 exit(2); /* if no arguments, barf */
768 /* else transcribe next e argument into cbuf */
769 p = *++eargv;
770 while((*++cbuf = *p++))
771 if (*cbuf == '\\')
773 if ((*++cbuf = *p++) == '\0')
774 return(savep = NULL, -1);
775 else
776 continue;
778 else if (*cbuf == '\n') /* end of 1 cmd line */
780 *cbuf = '\0';
781 return(savep = p, 1);
782 /* we'll be back for the rest... */
785 /* found end-of-string; can advance to next argument */
786 return(savep = NULL, 1);
789 if ((p = savep) == NULL)
790 return(-1);
792 while((*++cbuf = *p++))
793 if (*cbuf == '\\')
795 if ((*++cbuf = *p++) == '0')
796 return(savep = NULL, -1);
797 else
798 continue;
800 else if (*cbuf == '\n')
802 *cbuf = '\0';
803 return(savep = p, 1);
806 return(savep = NULL, 1);
809 /* if no -e flag read from command file descriptor */
810 while((inc = getc(cmdf)) != EOF) /* get next char */
811 if ((*++cbuf = inc) == '\\') /* if it's escape */
812 *++cbuf = inc = getc(cmdf); /* get next char */
813 else if (*cbuf == '\n') /* end on newline */
814 return(*cbuf = '\0', 1); /* cap the string */
816 return(*++cbuf = '\0', -1); /* end-of-file, no more chars */
819 /* expand an address at *cp... into expbuf, return ptr at following char */
820 static char *address(char *expbuf) /* uses cp, linenum */
822 static int numl = 0; /* current ind in addr-number table */
823 register char *rcp; /* temp compile ptr for forwd look */
824 long lno; /* computed value of numeric address */
826 if (*cp == '$') /* end-of-source address */
828 *expbuf++ = CEND; /* write symbolic end address */
829 *expbuf++ = CEOF; /* and the end-of-address mark (!) */
830 cp++; /* go to next source character */
831 last_line_used = TRUE;
832 return(expbuf); /* we're done */
834 if (*cp == '/') /* start of regular-expression match */
835 return(recomp(expbuf, *cp++)); /* compile the RE */
837 rcp = cp; lno = 0; /* now handle a numeric address */
838 while(*rcp >= '0' && *rcp <= '9') /* collect digits */
839 lno = lno*10 + *rcp++ - '0'; /* compute their value */
841 if (rcp > cp) /* if we caught a number... */
843 *expbuf++ = CLNUM; /* put a numeric-address marker */
844 *expbuf++ = numl; /* and the address table index */
845 linenum[numl++] = lno; /* and set the table entry */
846 if (numl >= MAXLINES) /* oh-oh, address table overflow */
847 die(TMLNR); /* abort with error message */
848 *expbuf++ = CEOF; /* write the end-of-address marker */
849 cp = rcp; /* point compile past the address */
850 return(expbuf); /* we're done */
853 return(NULL); /* no legal address was found */
856 /* accept multiline input from *cp..., discarding leading whitespace
857 txp: where to put the text */
858 static char *gettext(char* txp) /* uses global cp */
860 register char *p = cp;
862 SKIPWS(p); /* discard whitespace */
863 do {
864 if ((*txp = *p++) == '\\') /* handle escapes */
865 *txp = *p++;
866 if (*txp == '\0') /* we're at end of input */
867 return(cp = --p, ++txp);
868 else if (*txp == '\n') /* also SKIPWS after newline */
869 SKIPWS(p);
870 } while (txp++); /* keep going till we find that nul */
871 return(txp);
874 /* find the label matching *ptr, return NULL if none */
875 static label *search(label *ptr) /* uses global lablst */
877 register label *rp;
878 for(rp = lablst; rp < ptr; rp++)
879 if ((rp->name != NULL) && (strcmp(rp->name, ptr->name) == 0))
880 return(rp);
881 return(NULL);
884 /* write label links into the compiled-command space */
885 static void resolve(void) /* uses global lablst */
887 register label *lptr;
888 register sedcmd *rptr, *trptr;
890 /* loop through the label table */
891 for(lptr = lablst; lptr < lab; lptr++)
892 if (lptr->address == NULL) /* barf if not defined */
894 fprintf(stderr, ULABL, lptr->name);
895 exit(2);
897 else if (lptr->last) /* if last is non-null */
899 rptr = lptr->last; /* chase it */
900 while((trptr = rptr->u.link)) /* resolve refs */
902 rptr->u.link = lptr->address;
903 rptr = trptr;
905 rptr->u.link = lptr->address;
909 /* compile a y (transliterate) command
910 ep: where to compile to
911 delim: end delimiter to look for */
912 static char *ycomp(char *ep, char delim)
914 char *tp, *sp;
915 int c;
917 /* scan the 'from' section for invalid chars */
918 for(sp = tp = cp; *tp != delim; tp++)
920 if (*tp == '\\')
921 tp++;
922 if ((*tp == '\n') || (*tp == '\0'))
923 return(BAD);
925 tp++; /* tp now points at first char of 'to' section */
927 /* now rescan the 'from' section */
928 while((c = *sp++ & 0x7F) != delim)
930 if (c == '\\' && *sp == 'n')
932 sp++;
933 c = '\n';
935 if ((ep[c] = *tp++) == '\\' && *tp == 'n')
937 ep[c] = '\n';
938 tp++;
940 if ((ep[c] == delim) || (ep[c] == '\0'))
941 return(BAD);
944 if (*tp != delim) /* 'to', 'from' parts have unequal lengths */
945 return(BAD);
947 cp = ++tp; /* point compile ptr past translit */
949 for(c = 0; c < 128; c++) /* fill in self-map entries in table */
950 if (ep[c] == 0)
951 ep[c] = c;
953 return(ep + 0x80); /* return first free location past table end */
956 /* sedcomp.c ends here */