1 /* sedcomp.c -- stream editor main and compilation phase
2 Copyright (C) 1995-2003 Eric S. Raymond
3 Copyright (C) 2004-2006 Rene Rebe
5 The stream editor compiles its command input (from files or -e options)
6 into an internal form using compile() then executes the compiled form using
7 execute(). Main() just initializes data structures, interprets command line
8 options, and calls compile() and execute() in appropriate sequence.
9 The data structure produced by compile() is an array of compiled-command
10 structures (type sedcmd). These contain several pointers into pool[], the
11 regular-expression and text-data pool, plus a command code and g & p flags.
12 In the special case that the command is a label the struct will hold a ptr
13 into the labels array labels[] during most of the compile, until resolve()
14 resolves references at the end.
15 The operation of execute() is described in its source module.
18 #include <stdlib.h> /* exit */
19 #include <stdio.h> /* uses getc, fprintf, fopen, fclose */
20 #include <ctype.h> /* isdigit */
21 #include <string.h> /* strcmp */
22 #include "sed.h" /* command type struct and name defines */
24 /***** public stuff ******/
26 #define MAXCMDS 200 /* maximum number of compiled commands */
27 #define MAXLINES 256 /* max # numeric addresses to compile */
30 char linebuf
[MAXBUF
+1]; /* current-line buffer */
31 sedcmd cmds
[MAXCMDS
+1]; /* hold compiled commands */
32 long linenum
[MAXLINES
]; /* numeric-addresses table */
34 /* miscellaneous shared variables */
35 int nflag
; /* -n option flag */
36 int eargc
; /* scratch copy of argument count */
37 sedcmd
*pending
= NULL
; /* next command to be executed */
39 int last_line_used
= 0; /* last line address ($) was used */
41 void die (const char* msg
) {
42 fprintf(stderr
, "sed: ");
43 fprintf(stderr
, msg
, linebuf
);
44 fprintf(stderr
, "\n");
48 /***** module common stuff *****/
50 #define POOLSIZE 10000 /* size of string-pool space */
51 #define WFILES 10 /* max # w output files that can be compiled */
52 #define RELIMIT 256 /* max chars in compiled RE */
53 #define MAXDEPTH 20 /* maximum {}-nesting level */
54 #define MAXLABS 50 /* max # of labels that can be handled */
56 #define SKIPWS(pc) while ((*pc==' ') || (*pc=='\t')) pc++
57 #define IFEQ(x, v) if (*x == v) x++ , /* do expression */
60 static char AGMSG
[] = "garbled address %s";
61 static char CGMSG
[] = "garbled command %s";
62 static char TMTXT
[] = "too much text: %s";
63 static char AD1NG
[] = "no addresses allowed for %s";
64 static char AD2NG
[] = "only one address allowed for %s";
65 static char TMCDS
[] = "too many commands, last was %s";
66 static char COCFI
[] = "cannot open command-file %s";
67 static char UFLAG
[] = "unknown flag %c";
68 /*static char COOFI[] = "cannot open %s";*/
69 static char CCOFI
[] = "cannot create %s";
70 static char ULABL
[] = "undefined label %s";
71 static char TMLBR
[] = "too many {'s";
72 static char FRENL
[] = "first RE must be non-null";
73 static char NSCAX
[] = "no such command as %s";
74 static char TMRBR
[] = "too many }'s";
75 static char DLABL
[] = "duplicate label %s";
76 static char TMLAB
[] = "too many labels: %s";
77 static char TMWFI
[] = "too many w files";
78 static char REITL
[] = "RE too long: %s";
79 static char TMLNR
[] = "too many line numbers";
80 static char TRAIL
[] = "command \"%s\" has trailing garbage";
81 static char RETER
[] = "RE not terminated: %s";
82 static char CCERR
[] = "unknown character class: %s";
84 /* cclass to c function mapping ,-) */
85 const char* cclasses
[] = {
88 "space", " \f\n\r\t\v",
93 "xdigit", "0-9A-Fa-f",
94 "cntrl", "\x01-\x1f\x7e",
97 "punct", "!-/:-@[-`{-\x7e",
100 typedef struct /* represent a command label */
102 char *name
; /* the label name */
103 sedcmd
*last
; /* it's on the label search list */
104 sedcmd
*address
; /* pointer to the cmd it labels */
108 static label labels
[MAXLABS
]; /* here's the label table */
109 static label
*lab
= labels
+ 1; /* pointer to current label */
110 static label
*lablst
= labels
; /* header for search list */
112 /* string pool for regular expressions, append text, etc. etc. */
113 static char pool
[POOLSIZE
]; /* the pool */
114 static char *fp
= pool
; /* current pool pointer */
115 static char *poolend
= pool
+ POOLSIZE
; /* pointer past pool end */
117 /* compilation state */
118 static FILE *cmdf
= NULL
; /* current command source */
119 static char *cp
= linebuf
; /* compile pointer */
120 static sedcmd
*cmdp
= cmds
; /* current compiled-cmd ptr */
121 static char *lastre
= NULL
; /* old RE pointer */
122 static int bdepth
= 0; /* current {}-nesting level */
123 static int bcount
= 0; /* # tagged patterns in current RE */
124 static char **eargv
; /* scratch copy of argument list */
126 /* compilation flags */
127 static int eflag
; /* -e option flag */
128 static int gflag
; /* -g option flag */
131 static char *address(char *expbuf
);
132 static char *gettext(char* txp
);
133 static char *recomp(char *expbuf
, char redelim
);
134 static char *rhscomp(char* rhsp
, char delim
);
135 static char *ycomp(char *ep
, char delim
);
136 static int cmdcomp(char cchar
);
137 static int cmdline(char *cbuf
);
138 static label
*search(label
*ptr
);
139 static void compile(void);
140 static void resolve(void);
142 /* sedexec.c protypes */
143 void execute(char* file
);
145 /* main sequence of the stream editor */
146 int main(int argc
, char *argv
[])
148 eargc
= argc
; /* set local copy of argument count */
149 eargv
= argv
; /* set local copy of argument list */
150 cmdp
->addr1
= pool
; /* 1st addr expand will be at pool start */
152 exit(0); /* exit immediately if no arguments */
154 /* scan through the arguments, interpreting each one */
155 while ((--eargc
> 0) && (**++eargv
== '-'))
159 eflag
++; compile(); /* compile with e flag on */
161 continue; /* get another argument */
163 if (eargc
-- <= 0) /* barf if no -f file */
165 if ((cmdf
= fopen(*++eargv
, "r")) == NULL
)
167 fprintf(stderr
, COCFI
, *eargv
);
170 compile(); /* file is O.K., compile it */
172 continue; /* go back for another argument */
174 gflag
++; /* set global flag on all s cmds */
177 nflag
++; /* no print except on p flag or w */
180 fprintf(stdout
, UFLAG
, eargv
[0][1]);
184 if (cmdp
== cmds
) /* no commands have been compiled */
187 eflag
++; compile(); eflag
= 0;
191 if (bdepth
) /* we have unbalanced squigglies */
194 lablst
->address
= cmdp
; /* set up header of label linked list */
195 resolve(); /* resolve label table indirections */
196 if (eargc
<= 0) /* if there were no -e commands */
197 execute(NULL
); /* execute commands from stdin only */
198 else while(--eargc
>=0) /* else execute only -e commands */
200 exit(0); /* everything was O.K. if we got here */
203 #define H 0x80 /* 128 bit, on if there's really code for command */
204 #define LOWCMD 56 /* = '8', lowest char indexed in cmdmask */
206 /* indirect through this to get command internal code, if it exists */
207 static char cmdmask
[] =
209 0, 0, H
, 0, 0, H
+EQCMD
,0, 0,
210 0, 0, 0, 0, H
+CDCMD
,0, 0, CGCMD
,
211 CHCMD
, 0, 0, 0, H
+CLCMD
,0, CNCMD
, 0,
212 CPCMD
, 0, 0, 0, H
+CTCMD
,0, 0, H
+CWCMD
,
213 0, 0, 0, 0, 0, 0, 0, 0,
214 0, H
+ACMD
, H
+BCMD
, H
+CCMD
, DCMD
, 0, 0, GCMD
,
215 HCMD
, H
+ICMD
, 0, 0, H
+LCMD
, 0, NCMD
, 0,
216 PCMD
, H
+QCMD
, H
+RCMD
, H
+SCMD
, H
+TCMD
, 0, 0, H
+WCMD
,
217 XCMD
, H
+YCMD
, 0, H
+BCMD
, 0, H
, 0, 0,
220 /* precompile sed commands out of a file */
221 static void compile(void)
225 for(;;) /* main compilation loop */
233 if (*cp
== '\0' || *cp
== '#') /* get a new command line */
234 if (cmdline(cp
= linebuf
) < 0)
238 if (*cp
== '\0' || *cp
== '#') /* a comment */
241 /* compile first address */
244 else if ((fp
= address(cmdp
->addr1
= fp
)) == BAD
)
247 if (fp
== cmdp
->addr1
) /* if empty RE was found */
249 if (lastre
) /* if there was previous RE */
250 cmdp
->addr1
= lastre
; /* use it */
254 else if (fp
== NULL
) /* if fp was NULL */
256 fp
= cmdp
->addr1
; /* use current pool location */
261 lastre
= cmdp
->addr1
;
262 if (*cp
== ',' || *cp
== ';') /* there's 2nd addr */
265 if (fp
> poolend
) die(TMTXT
);
266 fp
= address(cmdp
->addr2
= fp
);
267 if (fp
== BAD
|| fp
== NULL
) die(AGMSG
);
268 if (fp
== cmdp
->addr2
)
269 cmdp
->addr2
= lastre
;
271 lastre
= cmdp
->addr2
;
274 cmdp
->addr2
= NULL
; /* no 2nd address */
276 if (fp
> poolend
) die(TMTXT
);
278 SKIPWS(cp
); /* discard whitespace after address */
281 cmdp
->flags
.allbut
= 1;
285 /* get cmd char, range-check it */
286 if ((*cp
< LOWCMD
) || (*cp
> '~')
287 || ((ccode
= cmdmask
[*cp
- LOWCMD
]) == 0))
290 cmdp
->command
= ccode
& ~H
; /* fill in command value */
291 if ((ccode
& H
) == 0) /* if no compile-time code */
292 cp
++; /* discard command char */
293 else if (cmdcomp(*cp
++)) /* execute it; if ret = 1 */
294 continue; /* skip next line read */
296 if (++cmdp
>= cmds
+ MAXCMDS
) die(TMCDS
);
298 SKIPWS(cp
); /* look for trailing stuff */
305 else if (*cp
!= '#' && *cp
!= '}')
311 /* compile a single command */
312 static int cmdcomp(char cchar
)
314 static sedcmd
**cmpstk
[MAXDEPTH
]; /* current cmd stack for {} */
315 static const char *fname
[WFILES
]; /* w file name pointers */
316 static FILE *fout
[WFILES
]; /* w file file ptrs */
317 static int nwfiles
= 2; /* count of open w files */
318 int i
; /* indexing dummy used in w */
319 sedcmd
*sp1
, *sp2
; /* temps for label searches */
320 label
*lpt
; /* ditto, and the searcher */
321 char redelim
; /* current RE delimiter */
326 fname
[0] = "/dev/stdout";
327 fname
[1] = "/dev/stderr";
331 case '{': /* start command group */
332 cmdp
->flags
.allbut
= !cmdp
->flags
.allbut
;
333 cmpstk
[bdepth
++] = &(cmdp
->u
.link
);
334 if (++cmdp
>= cmds
+ MAXCMDS
) die(TMCDS
);
335 if (*cp
== '\0') *cp
++ = ';', *cp
= '\0'; /* get next cmd w/o lineread */
338 case '}': /* end command group */
339 if (cmdp
->addr1
) die(AD1NG
); /* no addresses allowed */
340 if (--bdepth
< 0) die(TMRBR
); /* too many right braces */
341 *cmpstk
[bdepth
] = cmdp
; /* set the jump address */
344 case '=': /* print current source line number */
345 case 'q': /* exit the stream editor */
346 if (cmdp
->addr2
) die(AD2NG
);
349 case ':': /* label declaration */
350 if (cmdp
->addr1
) die(AD1NG
); /* no addresses allowed */
351 fp
= gettext(lab
->name
= fp
); /* get the label name */
352 if ((lpt
= search(lab
))) /* does it have a double? */
354 if (lpt
->address
) die(DLABL
); /* yes, abort */
356 else /* check that it doesn't overflow label table */
360 if (++lab
>= labels
+ MAXLABS
) die(TMLAB
);
365 case 'b': /* branch command */
366 case 't': /* branch-on-succeed command */
367 case 'T': /* branch-on-fail command */
369 if (*cp
== '\0') /* if branch is to start of cmds... */
371 /* add current command to end of label last */
372 if ((sp1
= lablst
->last
))
374 while((sp2
= sp1
->u
.link
))
378 else /* lablst->last == NULL */
382 fp
= gettext(lab
->name
= fp
); /* else get label into pool */
383 if ((lpt
= search(lab
))) /* enter branch to it */
386 cmdp
->u
.link
= lpt
->address
;
390 while((sp2
= sp1
->u
.link
))
395 else /* matching named label not found */
397 lab
->last
= cmdp
; /* add the new label */
398 lab
->address
= NULL
; /* it's forward of here */
399 if (++lab
>= labels
+ MAXLABS
) /* overflow if last */
404 case 'a': /* append text */
405 case 'i': /* insert text */
406 case 'r': /* read file into stream */
407 if (cmdp
->addr2
) die(AD2NG
);
408 case 'c': /* change text */
409 if ((*cp
== '\\') && (*++cp
== '\n')) cp
++;
410 fp
= gettext(cmdp
->u
.lhs
= fp
);
413 case 'D': /* delete current line in hold space */
417 case 's': /* substitute regular expression */
418 if (*cp
== 0) /* get delimiter from 1st ch */
423 if ((fp
= recomp(cmdp
->u
.lhs
= fp
, redelim
)) == BAD
)
425 if (fp
== cmdp
->u
.lhs
) { /* if compiled RE zero len */
427 cmdp
->u
.lhs
= lastre
; /* use the previous one */
428 cp
++; /* skip delim */
434 lastre
= cmdp
->u
.lhs
; /* save the one just found */
436 if ((cmdp
->rhs
= fp
) > poolend
) die(TMTXT
);
437 if ((fp
= rhscomp(cmdp
->rhs
, redelim
)) == BAD
) die(CGMSG
);
438 if (gflag
) cmdp
->flags
.global
++;
439 while (*cp
== 'g' || *cp
== 'p' || *cp
== 'P' || isdigit(*cp
))
441 IFEQ(cp
, 'g') cmdp
->flags
.global
++;
442 IFEQ(cp
, 'p') cmdp
->flags
.print
= 1;
443 IFEQ(cp
, 'P') cmdp
->flags
.print
= 2;
447 break; /* no multiple n args */
449 cmdp
->nth
= atoi(cp
); /* check 0? */
450 while (isdigit(*cp
)) cp
++;
454 case 'l': /* list pattern space */
455 case 'L': /* dump pattern space */
457 cp
++; /* and execute a w command! */
459 break; /* s or L or l is done */
461 case 'w': /* write-pattern-space command */
462 case 'W': /* write-first-line command */
463 if (nwfiles
>= WFILES
) die(TMWFI
);
465 fp
= gettext((fname
[nwfiles
] = fp
, fp
)); /* filename will be in pool */
466 for(i
= nwfiles
-1; i
>= 0; i
--) /* match it in table */
467 if (strcmp(fname
[nwfiles
], fname
[i
]) == 0)
469 cmdp
->fout
= fout
[i
];
472 /* if didn't find one, open new out file */
473 if ((cmdp
->fout
= fopen(fname
[nwfiles
], "w")) == NULL
)
475 fprintf(stderr
, CCOFI
, fname
[nwfiles
]);
478 fout
[nwfiles
++] = cmdp
->fout
;
481 case 'y': /* transliterate text */
482 fp
= ycomp(cmdp
->u
.lhs
= fp
, *cp
++); /* compile translit */
483 if (fp
== BAD
) die(CGMSG
); /* fail on bad form */
484 if (fp
> poolend
) die(TMTXT
); /* fail on overflow */
487 return(0); /* succeeded in interpreting one command */
490 /* generate replacement string for substitute command right hand side
491 rhsp: place to compile expression to
492 delim: regular-expression end-mark to look for */
493 static char *rhscomp(char* rhsp
, char delim
) /* uses bcount */
495 register char *p
= cp
;
498 /* copy for the likely case it is not s.th. special */
499 if ((*rhsp
= *p
++) == '\\') /* back reference or escape */
501 if (*p
>= '0' && *p
<= '9') /* back reference */
505 /* check validity of pattern tag */
506 if (*rhsp
> bcount
+ '0')
508 *rhsp
++ |= 0x80; /* mark the good ones */
513 case 'n': *rhsp
= '\n'; break;
514 case 'r': *rhsp
= '\r'; break;
515 case 't': *rhsp
= '\t'; break;
521 else if (*rhsp
== delim
) /* found RE end, hooray... */
523 *rhsp
++ = '\0'; /* cap the expression string */
525 return(rhsp
); /* pt at 1 past the RE */
527 else if (*rhsp
== '&') /* special case, convert to backref \0 */
532 else if (*rhsp
++ == '\0') /* last ch not RE end, help! */
536 /* compile a regular expression to internal form
537 expbuf: place to compile it to
538 redelim: RE end-marker to look for */
539 static char *recomp(char *expbuf
, char redelim
) /* uses cp, bcount */
541 register char *ep
= expbuf
; /* current-compiled-char pointer */
542 register char *sp
= cp
; /* source-character ptr */
543 register int c
; /* current-character pointer */
544 char negclass
; /* all-but flag */
545 char *lastep
; /* ptr to last expr compiled */
546 char *lastep2
; /* dito, but from the last loop */
547 char *svclass
; /* start of current char class */
548 char brnest
[MAXTAGS
]; /* bracket-nesting array */
549 char *brnestp
; /* ptr to current bracket-nest */
550 char *pp
; /* scratch pointer */
551 int classct
; /* class element count */
552 int tags
; /* # of closed tags */
554 if (*cp
== redelim
) { /* if first char is RE endmarker */
558 lastep
= lastep2
= NULL
; /* there's no previous RE */
559 brnestp
= brnest
; /* initialize ptr to brnest array */
560 tags
= bcount
= 0; /* initialize counters */
562 if ((*ep
++ = (*sp
== '^'))) /* check for start-of-line syntax */
567 if (*sp
== 0) /* no termination */
569 if (ep
>= expbuf
+ RELIMIT
) /* match is too large */
570 return(cp
= sp
, BAD
);
571 if ((c
= *sp
++) == redelim
) /* found the end of the RE */
574 if (brnestp
!= brnest
) /* \(, \) unbalanced */
576 *ep
++ = CEOF
; /* write end-of-pattern mark */
577 return(ep
); /* return ptr to compiled RE */
586 if ((c
= *sp
++) == '(') /* start tagged section */
588 if (bcount
>= MAXTAGS
)
589 return(cp
= sp
, BAD
);
590 *brnestp
++ = bcount
; /* update tag stack */
591 *ep
++ = CBRA
; /* enter tag-start */
592 *ep
++ = bcount
++; /* bump tag count */
596 else if (c
== ')') /* end tagged section */
598 if (brnestp
<= brnest
) /* extra \) */
599 return(cp
= sp
, BAD
);
600 *ep
++ = CKET
; /* enter end-of-tag */
601 *ep
++ = *--brnestp
; /* pop tag stack */
602 tags
++; /* count closed tags */
603 for (lastep2
= ep
-1; *lastep2
!= CBRA
; )
604 --lastep2
; /* FIXME: lastep becomes start */
607 else if (c
>= '1' && c
<= '9' && c
!= redelim
) /* tag use, if !delim */
609 if ((c
-= '1') >= tags
) /* too few */
611 *ep
++ = CBACK
; /* enter tag mark */
612 *ep
++ = c
; /* and the number */
615 else if (c
== '\n') /* escaped newline no good */
616 return(cp
= sp
, BAD
);
617 else if (c
== 'n') /* match a newline */
619 else if (c
== 't') /* match a tab */
621 else if (c
== 'r') /* match a return */
623 else if (c
== '+') /* 1..n repeat of previous pattern */
625 if (lastep
== NULL
) /* if + not first on line */
626 goto defchar
; /* match a literal + */
627 pp
= ep
; /* else save old ep */
628 *ep
++ = *lastep
++ | STAR
; /* flag the copy */
629 while (lastep
< pp
) /* so we can blt the pattern */
631 lastep2
= lastep
; /* no new expression */
634 goto defchar
; /* else match \c */
636 case '\0': /* ignore nuls */
639 case '\n': /* trailing pattern delimiter is missing */
640 return(cp
= sp
, BAD
);
642 case '.': /* match any char except newline */
646 case '*': /* 0..n repeat of previous pattern */
647 if (lastep
== NULL
) /* if * isn't first on line */
648 goto defchar
; /* match a literal * */
649 *lastep
|= STAR
; /* flag previous pattern */
650 lastep2
= lastep
; /* no new expression */
653 case '$': /* match only end-of-line */
654 if (*sp
!= redelim
) /* if we're not at end of RE */
655 goto defchar
; /* match a literal $ */
656 *ep
++ = CDOL
; /* insert end-symbol mark */
659 case '[': /* begin character set pattern */
660 if (ep
+ 17 >= expbuf
+ RELIMIT
)
662 *ep
++ = CCL
; /* insert class mark */
663 if ((negclass
= ((c
= *sp
++) == '^')))
665 svclass
= sp
; /* save ptr to class start */
667 if (c
== '\0') die(CGMSG
);
668 /* handle predefined character classes */
669 if (c
== '[' && *sp
== ':')
671 /* look for the matching ":]]" */
674 for (p
= sp
+3; *p
; p
++)
683 p2
< p
-2 && p2
-sp
-1 < sizeof(cc
);
686 cc
[p2
-sp
-1] = 0; /* termination */
689 while (*it
&& strcmp(*it
, cc
))
697 if (p2
[1] == '-' && p2
[2]) {
698 for (c
= *p2
; c
<= p2
[2]; c
++)
699 ep
[c
>> 3] |= bits(c
& 7);
704 ep
[c
>> 3] |= bits(c
& 7);
707 sp
= p
; c
= 0; break;
711 /* handle character ranges */
712 if (c
== '-' && sp
> svclass
&& *sp
!= ']')
713 for (c
= sp
[-2]; c
< *sp
; c
++)
714 ep
[c
>> 3] |= bits(c
& 7);
716 /* handle escape sequences in sets */
719 if ((c
= *sp
++) == 'n')
727 /* enter (possibly translated) char in set */
729 ep
[c
>> 3] |= bits(c
& 7);
731 ((c
= *sp
++) != ']');
733 /* invert the bitmask if all-but was specified */
735 for(classct
= 0; classct
< 16; classct
++)
737 ep
[0] &= 0xFE; /* never match ASCII 0 */
738 ep
+= 16; /* advance ep past set mask */
741 defchar
: /* match literal character */
742 default: /* which is what we'd do by default */
743 *ep
++ = CCHR
; /* insert character mark */
749 /* read next command from -e argument or command file */
750 static int cmdline(char *cbuf
) /* uses eflag, eargc, cmdf */
752 register int inc
; /* not char because must hold EOF */
754 cbuf
--; /* so pre-increment points us at cbuf */
756 /* e command flag is on */
759 register char *p
; /* ptr to current -e argument */
760 static char *savep
; /* saves previous value of p */
762 if (eflag
> 0) /* there are pending -e arguments */
766 exit(2); /* if no arguments, barf */
768 /* else transcribe next e argument into cbuf */
770 while((*++cbuf
= *p
++))
773 if ((*++cbuf
= *p
++) == '\0')
774 return(savep
= NULL
, -1);
778 else if (*cbuf
== '\n') /* end of 1 cmd line */
781 return(savep
= p
, 1);
782 /* we'll be back for the rest... */
785 /* found end-of-string; can advance to next argument */
786 return(savep
= NULL
, 1);
789 if ((p
= savep
) == NULL
)
792 while((*++cbuf
= *p
++))
795 if ((*++cbuf
= *p
++) == '0')
796 return(savep
= NULL
, -1);
800 else if (*cbuf
== '\n')
803 return(savep
= p
, 1);
806 return(savep
= NULL
, 1);
809 /* if no -e flag read from command file descriptor */
810 while((inc
= getc(cmdf
)) != EOF
) /* get next char */
811 if ((*++cbuf
= inc
) == '\\') /* if it's escape */
812 *++cbuf
= inc
= getc(cmdf
); /* get next char */
813 else if (*cbuf
== '\n') /* end on newline */
814 return(*cbuf
= '\0', 1); /* cap the string */
816 return(*++cbuf
= '\0', -1); /* end-of-file, no more chars */
819 /* expand an address at *cp... into expbuf, return ptr at following char */
820 static char *address(char *expbuf
) /* uses cp, linenum */
822 static int numl
= 0; /* current ind in addr-number table */
823 register char *rcp
; /* temp compile ptr for forwd look */
824 long lno
; /* computed value of numeric address */
826 if (*cp
== '$') /* end-of-source address */
828 *expbuf
++ = CEND
; /* write symbolic end address */
829 *expbuf
++ = CEOF
; /* and the end-of-address mark (!) */
830 cp
++; /* go to next source character */
831 last_line_used
= TRUE
;
832 return(expbuf
); /* we're done */
834 if (*cp
== '/') /* start of regular-expression match */
835 return(recomp(expbuf
, *cp
++)); /* compile the RE */
837 rcp
= cp
; lno
= 0; /* now handle a numeric address */
838 while(*rcp
>= '0' && *rcp
<= '9') /* collect digits */
839 lno
= lno
*10 + *rcp
++ - '0'; /* compute their value */
841 if (rcp
> cp
) /* if we caught a number... */
843 *expbuf
++ = CLNUM
; /* put a numeric-address marker */
844 *expbuf
++ = numl
; /* and the address table index */
845 linenum
[numl
++] = lno
; /* and set the table entry */
846 if (numl
>= MAXLINES
) /* oh-oh, address table overflow */
847 die(TMLNR
); /* abort with error message */
848 *expbuf
++ = CEOF
; /* write the end-of-address marker */
849 cp
= rcp
; /* point compile past the address */
850 return(expbuf
); /* we're done */
853 return(NULL
); /* no legal address was found */
856 /* accept multiline input from *cp..., discarding leading whitespace
857 txp: where to put the text */
858 static char *gettext(char* txp
) /* uses global cp */
860 register char *p
= cp
;
862 SKIPWS(p
); /* discard whitespace */
864 if ((*txp
= *p
++) == '\\') /* handle escapes */
866 if (*txp
== '\0') /* we're at end of input */
867 return(cp
= --p
, ++txp
);
868 else if (*txp
== '\n') /* also SKIPWS after newline */
870 } while (txp
++); /* keep going till we find that nul */
874 /* find the label matching *ptr, return NULL if none */
875 static label
*search(label
*ptr
) /* uses global lablst */
878 for(rp
= lablst
; rp
< ptr
; rp
++)
879 if ((rp
->name
!= NULL
) && (strcmp(rp
->name
, ptr
->name
) == 0))
884 /* write label links into the compiled-command space */
885 static void resolve(void) /* uses global lablst */
887 register label
*lptr
;
888 register sedcmd
*rptr
, *trptr
;
890 /* loop through the label table */
891 for(lptr
= lablst
; lptr
< lab
; lptr
++)
892 if (lptr
->address
== NULL
) /* barf if not defined */
894 fprintf(stderr
, ULABL
, lptr
->name
);
897 else if (lptr
->last
) /* if last is non-null */
899 rptr
= lptr
->last
; /* chase it */
900 while((trptr
= rptr
->u
.link
)) /* resolve refs */
902 rptr
->u
.link
= lptr
->address
;
905 rptr
->u
.link
= lptr
->address
;
909 /* compile a y (transliterate) command
910 ep: where to compile to
911 delim: end delimiter to look for */
912 static char *ycomp(char *ep
, char delim
)
917 /* scan the 'from' section for invalid chars */
918 for(sp
= tp
= cp
; *tp
!= delim
; tp
++)
922 if ((*tp
== '\n') || (*tp
== '\0'))
925 tp
++; /* tp now points at first char of 'to' section */
927 /* now rescan the 'from' section */
928 while((c
= *sp
++ & 0x7F) != delim
)
930 if (c
== '\\' && *sp
== 'n')
935 if ((ep
[c
] = *tp
++) == '\\' && *tp
== 'n')
940 if ((ep
[c
] == delim
) || (ep
[c
] == '\0'))
944 if (*tp
!= delim
) /* 'to', 'from' parts have unequal lengths */
947 cp
= ++tp
; /* point compile ptr past translit */
949 for(c
= 0; c
< 128; c
++) /* fill in self-map entries in table */
953 return(ep
+ 0x80); /* return first free location past table end */
956 /* sedcomp.c ends here */