Fix mdoc(7)/man(7) mix up.
[netbsd-mini2440.git] / usr.bin / sed / sed.c
blobe1cf70f3261f7cde112a7b3768aa7943bb8df65c
1 /* GNU SED, a batch stream editor.
2 Copyright (C) 1989-1991 Free Software Foundation, Inc.
4 This program is free software; you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation; either version 2, or (at your option)
7 any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program; if not, write to the Free Software
16 Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */
18 #ifdef __STDC__
19 #define VOID void
20 #else
21 #define VOID char
22 #endif
24 #define _GNU_SOURCE
25 #include <ctype.h>
26 #ifndef isblank
27 #define isblank(c) ((c) == ' ' || (c) == '\t')
28 #endif
29 #include <stdio.h>
30 #include <regex.h>
31 #include <getopt.h>
32 #if defined(STDC_HEADERS)
33 #include <stdlib.h>
34 #endif
35 #if defined(USG) || defined(STDC_HEADERS)
36 #include <string.h>
37 #include <memory.h>
38 #define bcopy(s, d, n) (memcpy((d), (s), (n)))
39 #else
40 #include <strings.h>
41 VOID *memchr();
42 #endif
44 char *version_string = "GNU sed version 1.08";
46 /* Struct vector is used to describe a chunk of a sed program. There is one
47 vector for the main program, and one for each { } pair. */
48 struct vector {
49 struct sed_cmd *v;
50 int v_length;
51 int v_allocated;
52 struct vector *up_one;
53 struct vector *next_one;
57 /* Goto structure is used to hold both GOTO's and labels. There are two
58 separate lists, one of goto's, called 'jumps', and one of labels, called
59 'labels'.
60 the V element points to the descriptor for the program-chunk in which the
61 goto was encountered.
62 the v_index element counts which element of the vector actually IS the
63 goto/label. The first element of the vector is zero.
64 the NAME element is the null-terminated name of the label.
65 next is the next goto/label in the list. */
67 struct sed_label {
68 struct vector *v;
69 int v_index;
70 char *name;
71 struct sed_label *next;
74 /* ADDR_TYPE is zero for a null address,
75 one if addr_number is valid, or
76 two if addr_regex is valid,
77 three, if the address is '$'
79 Other values are undefined.
82 #define ADDR_NULL 0
83 #define ADDR_NUM 1
84 #define ADDR_REGEX 2
85 #define ADDR_LAST 3
87 struct addr {
88 int addr_type;
89 struct re_pattern_buffer *addr_regex;
90 int addr_number;
94 /* Aflags: If the low order bit is set, a1 has been
95 matched; apply this command until a2 matches.
96 If the next bit is set, apply this command to all
97 lines that DON'T match the address(es).
100 #define A1_MATCHED_BIT 01
101 #define ADDR_BANG_BIT 02
104 struct sed_cmd {
105 struct addr a1,a2;
106 int aflags;
108 char cmd;
110 union {
111 /* This structure is used for a, i, and c commands */
112 struct {
113 char *text;
114 int text_len;
115 } cmd_txt;
117 /* This is used for b and t commands */
118 struct sed_cmd *label;
120 /* This for r and w commands */
121 FILE *io_file;
123 /* This for the hairy s command */
124 /* For the flags var:
125 low order bit means the 'g' option was given,
126 next bit means the 'p' option was given,
127 and the next bit means a 'w' option was given,
128 and wio_file contains the file to write to. */
130 #define S_GLOBAL_BIT 01
131 #define S_PRINT_BIT 02
132 #define S_WRITE_BIT 04
133 #define S_NUM_BIT 010
135 struct {
136 struct re_pattern_buffer *regx;
137 char *replacement;
138 int replace_length;
139 int flags;
140 int numb;
141 FILE *wio_file;
142 } cmd_regex;
144 /* This for the y command */
145 unsigned char *translate;
147 /* For { and } */
148 struct vector *sub;
149 struct sed_label *jump;
150 } x;
153 /* Sed operates a line at a time. */
154 struct line {
155 char *text; /* Pointer to line allocated by malloc. */
156 int length; /* Length of text. */
157 int alloc; /* Allocated space for text. */
160 /* This structure holds information about files opend by the 'r', 'w',
161 and 's///w' commands. In paticular, it holds the FILE pointer to
162 use, the file's name, a flag that is non-zero if the file is being
163 read instead of written. */
165 #define NUM_FPS 32
166 struct {
167 FILE *phile;
168 char *name;
169 int readit;
170 } file_ptrs[NUM_FPS];
173 #if defined(__STDC__)
174 # define P_(s) s
175 #else
176 # define P_(s) ()
177 #endif
179 void panic P_((char *str, ...));
180 char *__fp_name P_((FILE *fp));
181 FILE *ck_fopen P_((char *name, char *mode));
182 void ck_fwrite P_((char *ptr, int size, int nmemb, FILE *stream));
183 void ck_fclose P_((FILE *stream));
184 VOID *ck_malloc P_((int size));
185 VOID *ck_realloc P_((VOID *ptr, int size));
186 char *ck_strdup P_((char *str));
187 VOID *init_buffer P_((void));
188 void flush_buffer P_((VOID *bb));
189 int size_buffer P_((VOID *b));
190 void add_buffer P_((VOID *bb, char *p, int n));
191 void add1_buffer P_((VOID *bb, int ch));
192 char *get_buffer P_((VOID *bb));
194 void compile_string P_((char *str));
195 void compile_file P_((char *str));
196 struct vector *compile_program P_((struct vector *vector));
197 void bad_prog P_((char *why));
198 int inchar P_((void));
199 void savchar P_((int ch));
200 int compile_address P_((struct addr *addr));
201 void compile_regex P_((int slash));
202 struct sed_label *setup_jump P_((struct sed_label *list, struct sed_cmd *cmd, struct vector *vec));
203 FILE *compile_filename P_((int readit));
204 void read_file P_((char *name));
205 void execute_program P_((struct vector *vec));
206 int match_address P_((struct addr *addr));
207 int read_pattern_space P_((void));
208 void append_pattern_space P_((void));
209 void line_copy P_((struct line *from, struct line *to));
210 void line_append P_((struct line *from, struct line *to));
211 void str_append P_((struct line *to, char *string, int length));
212 void usage P_((void));
214 extern char *myname;
216 /* If set, don't write out the line unless explictly told to */
217 int no_default_output = 0;
219 /* Current input line # */
220 int input_line_number = 0;
222 /* Are we on the last input file? */
223 int last_input_file = 0;
225 /* Have we hit EOF on the last input file? This is used to decide if we
226 have hit the '$' address yet. */
227 int input_EOF = 0;
229 /* non-zero if a quit command has been executed. */
230 int quit_cmd = 0;
232 /* Have we done any replacements lately? This is used by the 't' command. */
233 int replaced = 0;
235 /* How many '{'s are we executing at the moment */
236 int program_depth = 0;
238 /* The complete compiled SED program that we are going to run */
239 struct vector *the_program = 0;
241 /* information about labels and jumps-to-labels. This is used to do
242 the required backpatching after we have compiled all the scripts. */
243 struct sed_label *jumps = 0;
244 struct sed_label *labels = 0;
246 /* The 'current' input line. */
247 struct line line;
249 /* An input line that's been stored by later use by the program */
250 struct line hold;
252 /* A 'line' to append to the current line when it comes time to write it out */
253 struct line append;
256 /* When we're reading a script command from a string, 'prog_start' and
257 'prog_end' point to the beginning and end of the string. This
258 would allow us to compile script strings that contain nulls, except
259 that script strings are only read from the command line, which is
260 null-terminated */
261 char *prog_start;
262 char *prog_end;
264 /* When we're reading a script command from a string, 'prog_cur' points
265 to the current character in the string */
266 char *prog_cur;
268 /* This is the name of the current script file.
269 It is used for error messages. */
270 char *prog_name;
272 /* This is the current script file. If it is zero, we are reading
273 from a string stored in 'prog_start' instead. If both 'prog_file'
274 and 'prog_start' are zero, we're in trouble! */
275 FILE *prog_file;
277 /* this is the number of the current script line that we're compiling. It is
278 used to give out useful and informative error messages. */
279 int prog_line = 1;
281 /* This is the file pointer that we're currently reading data from. It may
282 be stdin */
283 FILE *input_file;
285 /* If this variable is non-zero at exit, one or more of the input
286 files couldn't be opened. */
288 int bad_input = 0;
290 /* 'an empty regular expression is equivalent to the last regular
291 expression read' so we have to keep track of the last regex used.
292 Here's where we store a pointer to it (it is only malloc()'d once) */
293 struct re_pattern_buffer *last_regex;
295 /* Various error messages we may want to print */
296 static char ONE_ADDR[] = "Command only uses one address";
297 static char NO_ADDR[] = "Command doesn't take any addresses";
298 static char LINE_JUNK[] = "Extra characters after command";
299 static char BAD_EOF[] = "Unexpected End-of-file";
300 static char NO_REGEX[] = "No previous regular expression";
302 static struct option longopts[] =
304 {"expression", 1, NULL, 'e'},
305 {"file", 1, NULL, 'f'},
306 {"quiet", 0, NULL, 'n'},
307 {"silent", 0, NULL, 'n'},
308 {"version", 0, NULL, 'V'},
309 {NULL, 0, NULL, 0}
312 /* Yes, the main program, which parses arguments, and does the right
313 thing with them; it also inits the temporary storage, etc. */
314 void
315 main(argc,argv)
316 int argc;
317 char **argv;
319 int opt;
320 char *e_strings = NULL;
321 int compiled = 0;
322 struct sed_label *go,*lbl;
324 myname=argv[0];
325 while((opt=getopt_long(argc,argv,"ne:f:V", longopts, (int *) 0))
326 !=EOF) {
327 switch(opt) {
328 case 'n':
329 no_default_output = 1;
330 break;
331 case 'e':
332 if(e_strings == NULL) {
333 e_strings=ck_malloc(strlen(optarg)+2);
334 strcpy(e_strings,optarg);
335 } else {
336 e_strings=ck_realloc(e_strings,strlen(e_strings)+strlen(optarg)+2);
337 strcat(e_strings,optarg);
339 strcat(e_strings,"\n");
340 compiled = 1;
341 break;
342 case 'f':
343 compile_file(optarg);
344 compiled = 1;
345 break;
346 case 'V':
347 fprintf(stderr, "%s\n", version_string);
348 break;
349 default:
350 usage();
353 if(e_strings) {
354 compile_string(e_strings);
355 free(e_strings);
357 if(!compiled) {
358 if (optind == argc)
359 usage();
360 compile_string(argv[optind++]);
363 for(go=jumps;go;go=go->next) {
364 for(lbl=labels;lbl;lbl=lbl->next)
365 if(!strcmp(lbl->name,go->name))
366 break;
367 if(*go->name && !lbl)
368 panic("Can't find label for jump to '%s'",go->name);
369 go->v->v[go->v_index].x.jump=lbl;
372 line.length=0;
373 line.alloc=50;
374 line.text=ck_malloc(50);
376 append.length=0;
377 append.alloc=50;
378 append.text=ck_malloc(50);
380 hold.length=0;
381 hold.alloc=50;
382 hold.text=ck_malloc(50);
384 if(argc<=optind) {
385 last_input_file++;
386 read_file("-");
387 } else while(optind<argc) {
388 if(optind==argc-1)
389 last_input_file++;
390 read_file(argv[optind]);
391 optind++;
392 if(quit_cmd)
393 break;
395 if(bad_input)
396 exit(2);
397 exit(0);
400 /* 'str' is a string (from the command line) that contains a sed command.
401 Compile the command, and add it to the end of 'the_program' */
402 void
403 compile_string(str)
404 char *str;
406 prog_file = 0;
407 prog_line=0;
408 prog_start=prog_cur=str;
409 prog_end=str+strlen(str);
410 the_program=compile_program(the_program);
413 /* 'str' is the name of a file containing sed commands. Read them in
414 and add them to the end of 'the_program' */
415 void
416 compile_file(str)
417 char *str;
419 int ch;
421 prog_start=prog_cur=prog_end=0;
422 prog_name=str;
423 prog_line=1;
424 if(str[0]=='-' && str[1]=='\0')
425 prog_file=stdin;
426 else
427 prog_file=ck_fopen(str,"r");
428 ch=getc(prog_file);
429 if(ch=='#') {
430 ch=getc(prog_file);
431 if(ch=='n')
432 no_default_output++;
433 while(ch!=EOF && ch!='\n')
434 ch=getc(prog_file);
435 } else if(ch!=EOF)
436 ungetc(ch,prog_file);
437 the_program=compile_program(the_program);
440 #define MORE_CMDS 40
442 /* Read a program (or a subprogram within '{' '}' pairs) in and store
443 the compiled form in *'vector' Return a pointer to the new vector. */
444 struct vector *
445 compile_program(vector)
446 struct vector *vector;
448 struct sed_cmd *cur_cmd;
449 int ch;
450 int slash;
451 VOID *b;
452 unsigned char *string;
453 int num;
455 if(!vector) {
456 vector=(struct vector *)ck_malloc(sizeof(struct vector));
457 vector->v=(struct sed_cmd *)ck_malloc(MORE_CMDS*sizeof(struct sed_cmd));
458 vector->v_allocated=MORE_CMDS;
459 vector->v_length=0;
460 vector->up_one = 0;
461 vector->next_one = 0;
463 for(;;) {
464 skip_comment:
465 do ch=inchar();
466 while(ch!=EOF && (isblank(ch) || ch=='\n' || ch==';'));
467 if(ch==EOF)
468 break;
469 savchar(ch);
471 if(vector->v_length==vector->v_allocated) {
472 vector->v=(struct sed_cmd *)ck_realloc((VOID *)vector->v,(vector->v_length+MORE_CMDS)*sizeof(struct sed_cmd));
473 vector->v_allocated+=MORE_CMDS;
475 cur_cmd=vector->v+vector->v_length;
476 vector->v_length++;
478 cur_cmd->a1.addr_type=0;
479 cur_cmd->a2.addr_type=0;
480 cur_cmd->aflags=0;
481 cur_cmd->cmd=0;
483 if(compile_address(&(cur_cmd->a1))) {
484 ch=inchar();
485 if(ch==',') {
486 do ch=inchar();
487 while(ch!=EOF && isblank(ch));
488 savchar(ch);
489 if(compile_address(&(cur_cmd->a2)))
491 else
492 bad_prog("Unexpected ','");
493 } else
494 savchar(ch);
496 ch=inchar();
497 if(ch==EOF)
498 break;
499 new_cmd:
500 switch(ch) {
501 case '#':
502 if(cur_cmd->a1.addr_type!=0)
503 bad_prog(NO_ADDR);
504 do ch=inchar();
505 while(ch!=EOF && ch!='\n');
506 vector->v_length--;
507 goto skip_comment;
508 case '!':
509 if(cur_cmd->aflags & ADDR_BANG_BIT)
510 bad_prog("Multiple '!'s");
511 cur_cmd->aflags|= ADDR_BANG_BIT;
512 do ch=inchar();
513 while(ch!=EOF && isblank(ch));
514 if(ch==EOF)
515 bad_prog(BAD_EOF);
516 #if 0
517 savchar(ch);
518 #endif
519 goto new_cmd;
520 case 'a':
521 case 'i':
522 if(cur_cmd->a2.addr_type!=0)
523 bad_prog(ONE_ADDR);
524 /* Fall Through */
525 case 'c':
526 cur_cmd->cmd=ch;
527 if(inchar()!='\\' || inchar()!='\n')
528 bad_prog(LINE_JUNK);
529 b=init_buffer();
530 while((ch=inchar())!=EOF && ch!='\n') {
531 if(ch=='\\')
532 ch=inchar();
533 add1_buffer(b,ch);
535 if(ch!=EOF)
536 add1_buffer(b,ch);
537 num=size_buffer(b);
538 string=(unsigned char *)ck_malloc(num);
539 bcopy(get_buffer(b),string,num);
540 flush_buffer(b);
541 cur_cmd->x.cmd_txt.text_len=num;
542 cur_cmd->x.cmd_txt.text=(char *)string;
543 break;
544 case '{':
545 cur_cmd->cmd=ch;
546 program_depth++;
547 #if 0
548 while((ch=inchar())!=EOF && ch!='\n')
549 if(!isblank(ch))
550 bad_prog(LINE_JUNK);
551 #endif
552 cur_cmd->x.sub=compile_program((struct vector *)0);
553 /* FOO JF is this the right thing to do? */
554 break;
555 case '}':
556 if(!program_depth)
557 bad_prog("Unexpected '}'");
558 --(vector->v_length);
559 while((ch=inchar())!=EOF && ch!='\n' && ch!=';')
560 if(!isblank(ch))
561 bad_prog(LINE_JUNK);
562 return vector;
563 case ':':
564 cur_cmd->cmd=ch;
565 if(cur_cmd->a1.addr_type!=0)
566 bad_prog(": doesn't want any addresses");
567 labels=setup_jump(labels,cur_cmd,vector);
568 break;
569 case 'b':
570 case 't':
571 cur_cmd->cmd=ch;
572 jumps=setup_jump(jumps,cur_cmd,vector);
573 break;
574 case 'q':
575 case '=':
576 if(cur_cmd->a2.addr_type)
577 bad_prog(ONE_ADDR);
578 /* Fall Through */
579 case 'd':
580 case 'D':
581 case 'g':
582 case 'G':
583 case 'h':
584 case 'H':
585 case 'l':
586 case 'n':
587 case 'N':
588 case 'p':
589 case 'P':
590 case 'x':
591 cur_cmd->cmd=ch;
592 do ch=inchar();
593 while(ch!=EOF && isblank(ch) && ch!='\n' && ch!=';');
594 if(ch!='\n' && ch!=';' && ch!=EOF)
595 bad_prog(LINE_JUNK);
596 break;
598 case 'r':
599 if(cur_cmd->a2.addr_type!=0)
600 bad_prog(ONE_ADDR);
601 /* FALL THROUGH */
602 case 'w':
603 cur_cmd->cmd=ch;
604 cur_cmd->x.io_file=compile_filename(ch=='r');
605 break;
607 case 's':
608 cur_cmd->cmd=ch;
609 slash=inchar();
610 compile_regex(slash);
612 cur_cmd->x.cmd_regex.regx=last_regex;
614 b=init_buffer();
615 while((ch=inchar())!=EOF && ch!=slash) {
616 if(ch=='\\') {
617 int ci;
619 ci=inchar();
620 if(ci!=EOF) {
621 if(ci!='\n')
622 add1_buffer(b,ch);
623 add1_buffer(b,ci);
625 } else
626 add1_buffer(b,ch);
628 cur_cmd->x.cmd_regex.replace_length=size_buffer(b);
629 cur_cmd->x.cmd_regex.replacement=ck_malloc(cur_cmd->x.cmd_regex.replace_length);
630 bcopy(get_buffer(b),cur_cmd->x.cmd_regex.replacement,cur_cmd->x.cmd_regex.replace_length);
631 flush_buffer(b);
633 cur_cmd->x.cmd_regex.flags=0;
634 cur_cmd->x.cmd_regex.numb=0;
636 if(ch==EOF)
637 break;
638 do {
639 ch=inchar();
640 switch(ch) {
641 case 'p':
642 if(cur_cmd->x.cmd_regex.flags&S_PRINT_BIT)
643 bad_prog("multiple 'p' options to 's' command");
644 cur_cmd->x.cmd_regex.flags|=S_PRINT_BIT;
645 break;
646 case 'g':
647 if(cur_cmd->x.cmd_regex.flags&S_NUM_BIT)
648 cur_cmd->x.cmd_regex.flags&= ~S_NUM_BIT;
649 if(cur_cmd->x.cmd_regex.flags&S_GLOBAL_BIT)
650 bad_prog("multiple 'g' options to 's' command");
651 cur_cmd->x.cmd_regex.flags|=S_GLOBAL_BIT;
652 break;
653 case 'w':
654 cur_cmd->x.cmd_regex.flags|=S_WRITE_BIT;
655 cur_cmd->x.cmd_regex.wio_file=compile_filename(0);
656 ch='\n';
657 break;
658 case '0': case '1': case '2': case '3':
659 case '4': case '5': case '6': case '7':
660 case '8': case '9':
661 if(cur_cmd->x.cmd_regex.flags&S_NUM_BIT)
662 bad_prog("multiple number options to 's' command");
663 if((cur_cmd->x.cmd_regex.flags&S_GLOBAL_BIT)==0)
664 cur_cmd->x.cmd_regex.flags|=S_NUM_BIT;
665 num = 0;
666 while(isdigit(ch)) {
667 num=num*10+ch-'0';
668 ch=inchar();
670 savchar(ch);
671 cur_cmd->x.cmd_regex.numb=num;
672 break;
673 case '\n':
674 case ';':
675 case EOF:
676 break;
677 default:
678 bad_prog("Unknown option to 's'");
679 break;
681 } while(ch!=EOF && ch!='\n' && ch!=';');
682 if(ch==EOF)
683 break;
684 break;
686 case 'y':
687 cur_cmd->cmd=ch;
688 string=(unsigned char *)ck_malloc(256);
689 for(num=0;num<256;num++)
690 string[num]=num;
691 b=init_buffer();
692 slash=inchar();
693 while((ch=inchar())!=EOF && ch!=slash)
694 add1_buffer(b,ch);
695 cur_cmd->x.translate=string;
696 string=(unsigned char *)get_buffer(b);
697 for(num=size_buffer(b);num;--num) {
698 ch=inchar();
699 if(ch==EOF)
700 bad_prog(BAD_EOF);
701 if(ch==slash)
702 bad_prog("strings for y command are different lengths");
703 cur_cmd->x.translate[*string++]=ch;
705 flush_buffer(b);
706 if(inchar()!=slash || ((ch=inchar())!=EOF && ch!='\n' && ch!=';'))
707 bad_prog(LINE_JUNK);
708 break;
710 default:
711 bad_prog("Unknown command");
714 return vector;
717 /* Complain about a programming error and exit. */
718 void
719 bad_prog(why)
720 char *why;
722 if(prog_line)
723 fprintf(stderr,"%s: file %s line %d: %s\n",myname,prog_name,prog_line,why);
724 else
725 fprintf(stderr,"%s: %s\n",myname,why);
726 exit(1);
729 /* Read the next character from the program. Return EOF if there isn't
730 anything to read. Keep prog_line up to date, so error messages can
731 be meaningful. */
733 inchar()
735 int ch;
736 if(prog_file) {
737 if(feof(prog_file))
738 return EOF;
739 else
740 ch=getc(prog_file);
741 } else {
742 if(!prog_cur)
743 return EOF;
744 else if(prog_cur==prog_end) {
745 ch=EOF;
746 prog_cur=0;
747 } else
748 ch= *prog_cur++;
750 if(ch=='\n' && prog_line)
751 prog_line++;
752 return ch;
755 /* unget 'ch' so the next call to inchar will return it. 'ch' must not be
756 EOF or anything nasty like that. */
757 void
758 savchar(ch)
759 int ch;
761 if(ch==EOF)
762 return;
763 if(ch=='\n' && prog_line>1)
764 --prog_line;
765 if(prog_file)
766 ungetc(ch,prog_file);
767 else
768 *--prog_cur=ch;
772 /* Try to read an address for a sed command. If it succeeeds,
773 return non-zero and store the resulting address in *'addr'.
774 If the input doesn't look like an address read nothing
775 and return zero. */
777 compile_address(addr)
778 struct addr *addr;
780 int ch;
781 int num;
783 ch=inchar();
785 if(isdigit(ch)) {
786 num=ch-'0';
787 while((ch=inchar())!=EOF && isdigit(ch))
788 num=num*10+ch-'0';
789 while(ch!=EOF && isblank(ch))
790 ch=inchar();
791 savchar(ch);
792 addr->addr_type=ADDR_NUM;
793 addr->addr_number = num;
794 return 1;
795 } else if(ch=='/') {
796 addr->addr_type=ADDR_REGEX;
797 compile_regex('/');
798 addr->addr_regex=last_regex;
799 do ch=inchar();
800 while(ch!=EOF && isblank(ch));
801 savchar(ch);
802 return 1;
803 } else if(ch=='$') {
804 addr->addr_type=ADDR_LAST;
805 do ch=inchar();
806 while(ch!=EOF && isblank(ch));
807 savchar(ch);
808 return 1;
809 } else
810 savchar(ch);
811 return 0;
814 void
815 compile_regex (slash)
816 int slash;
818 VOID *b;
819 int ch;
820 int in_char_class = 0;
822 b=init_buffer();
823 while((ch=inchar())!=EOF && (ch!=slash || in_char_class)) {
824 if(ch=='^') {
825 if(size_buffer(b)==0) {
826 add1_buffer(b,'\\');
827 add1_buffer(b,'`');
828 } else
829 add1_buffer(b,ch);
830 continue;
831 } else if(ch=='$') {
832 ch=inchar();
833 savchar(ch);
834 if(ch==slash) {
835 add1_buffer(b,'\\');
836 add1_buffer(b,'\'');
837 } else
838 add1_buffer(b,'$');
839 continue;
840 } else if(ch == '[') {
841 add1_buffer(b,ch);
842 in_char_class = 1;
843 continue;
844 } else if(ch == ']') {
845 add1_buffer(b,ch);
846 in_char_class = 0;
847 continue;
848 } else if(ch!='\\') {
849 add1_buffer(b,ch);
850 continue;
852 ch=inchar();
853 switch(ch) {
854 case 'n':
855 add1_buffer(b,'\n');
856 break;
857 #if 0
858 case 'b':
859 add1_buffer(b,'\b');
860 break;
861 case 'f':
862 add1_buffer(b,'\f');
863 break;
864 case 'r':
865 add1_buffer(b,'\r');
866 break;
867 case 't':
868 add1_buffer(b,'\t');
869 break;
870 #endif /* 0 */
871 case EOF:
872 break;
873 default:
874 add1_buffer(b,'\\');
875 add1_buffer(b,ch);
876 break;
879 if(ch==EOF)
880 bad_prog(BAD_EOF);
881 if(size_buffer(b)) {
882 last_regex=(struct re_pattern_buffer *)ck_malloc(sizeof(struct re_pattern_buffer));
883 last_regex->allocated=size_buffer(b)+10;
884 last_regex->buffer=ck_malloc(last_regex->allocated);
885 last_regex->fastmap=ck_malloc(256);
886 last_regex->translate=0;
887 re_compile_pattern(get_buffer(b),size_buffer(b),last_regex);
888 } else if(!last_regex)
889 bad_prog(NO_REGEX);
890 flush_buffer(b);
893 /* Store a label (or label reference) created by a ':', 'b', or 't'
894 comand so that the jump to/from the lable can be backpatched after
895 compilation is complete */
896 struct sed_label *
897 setup_jump(list,cmd,vec)
898 struct sed_label *list;
899 struct sed_cmd *cmd;
900 struct vector *vec;
902 struct sed_label *tmp;
903 VOID *b;
904 int ch;
906 b=init_buffer();
907 while((ch=inchar()) != EOF && isblank(ch))
909 while(ch!=EOF && ch!='\n') {
910 add1_buffer(b,ch);
911 ch=inchar();
913 savchar(ch);
914 add1_buffer(b,'\0');
915 tmp=(struct sed_label *)ck_malloc(sizeof(struct sed_label));
916 tmp->v=vec;
917 tmp->v_index=cmd-vec->v;
918 tmp->name=ck_strdup(get_buffer(b));
919 tmp->next=list;
920 flush_buffer(b);
921 return tmp;
924 /* read in a filename for a 'r', 'w', or 's///w' command, and
925 update the internal structure about files. The file is
926 opened if it isn't already open. */
927 FILE *
928 compile_filename(readit)
929 int readit;
931 char *file_name;
932 int n;
933 VOID *b;
934 int ch;
936 if(inchar()!=' ')
937 bad_prog("missing ' ' before filename");
938 b=init_buffer();
939 while((ch=inchar())!=EOF && ch!='\n')
940 add1_buffer(b,ch);
941 add1_buffer(b,'\0');
942 file_name=get_buffer(b);
943 for(n=0;n<NUM_FPS;n++) {
944 if(!file_ptrs[n].name)
945 break;
946 if(!strcmp(file_ptrs[n].name,file_name)) {
947 if(file_ptrs[n].readit!=readit)
948 bad_prog("Can't open file for both reading and writing");
949 flush_buffer(b);
950 return file_ptrs[n].phile;
953 if(n<NUM_FPS) {
954 file_ptrs[n].name=ck_strdup(file_name);
955 file_ptrs[n].readit=readit;
956 if (!readit)
957 file_ptrs[n].phile=ck_fopen(file_name,"a");
958 else if (access(file_name, 4) == 0)
959 file_ptrs[n].phile=ck_fopen(file_name,"r");
960 else
961 file_ptrs[n].phile=ck_fopen("/dev/null", "r");
962 flush_buffer(b);
963 return file_ptrs[n].phile;
964 } else {
965 bad_prog("Hopelessely evil compiled in limit on number of open files. re-compile sed");
966 return 0;
970 /* Parse a filename given by a 'r' 'w' or 's///w' command. */
971 void
972 read_file(name)
973 char *name;
975 if(*name=='-' && name[1]=='\0')
976 input_file=stdin;
977 else {
978 input_file=fopen(name,"r");
979 if(input_file==0) {
980 extern int errno;
981 extern char *sys_errlist[];
982 extern int sys_nerr;
984 char *ptr;
986 ptr=(errno>=0 && errno<sys_nerr) ? sys_errlist[errno] : "Unknown error code";
987 bad_input++;
988 fprintf(stderr,"%s: can't read %s: %s\n",myname,name,ptr);
990 return;
993 while(read_pattern_space()) {
994 execute_program(the_program);
995 if(!no_default_output)
996 ck_fwrite(line.text,1,line.length,stdout);
997 if(append.length) {
998 ck_fwrite(append.text,1,append.length,stdout);
999 append.length=0;
1001 if(quit_cmd)
1002 break;
1004 ck_fclose(input_file);
1007 /* Execute the program 'vec' on the current input line. */
1008 void
1009 execute_program(vec)
1010 struct vector *vec;
1012 struct sed_cmd *cur_cmd;
1013 int n;
1014 int addr_matched;
1015 static int end_cycle;
1017 int start;
1018 int remain;
1019 int offset;
1021 static struct line tmp;
1022 struct line t;
1023 char *rep,*rep_end,*rep_next,*rep_cur;
1025 struct re_registers regs;
1026 int count = 0;
1028 end_cycle = 0;
1030 for(cur_cmd=vec->v,n=vec->v_length;n;cur_cmd++,n--) {
1032 exe_loop:
1033 addr_matched=0;
1034 if(cur_cmd->aflags&A1_MATCHED_BIT) {
1035 addr_matched=1;
1036 if(match_address(&(cur_cmd->a2)))
1037 cur_cmd->aflags&=~A1_MATCHED_BIT;
1038 } else if(match_address(&(cur_cmd->a1))) {
1039 addr_matched=1;
1040 if(cur_cmd->a2.addr_type!=ADDR_NULL)
1041 cur_cmd->aflags|=A1_MATCHED_BIT;
1043 if(cur_cmd->aflags&ADDR_BANG_BIT)
1044 addr_matched= !addr_matched;
1045 if(!addr_matched)
1046 continue;
1047 switch(cur_cmd->cmd) {
1048 case '{': /* Execute sub-program */
1049 execute_program(cur_cmd->x.sub);
1050 break;
1052 case ':': /* Executing labels is easy. */
1053 break;
1055 case '=':
1056 printf("%d\n",input_line_number);
1057 break;
1059 case 'a':
1060 while(append.alloc-append.length<cur_cmd->x.cmd_txt.text_len) {
1061 append.alloc *= 2;
1062 append.text=ck_realloc(append.text,append.alloc);
1064 bcopy(cur_cmd->x.cmd_txt.text,append.text+append.length,cur_cmd->x.cmd_txt.text_len);
1065 append.length+=cur_cmd->x.cmd_txt.text_len;
1066 break;
1068 case 'b':
1069 if(!cur_cmd->x.jump)
1070 end_cycle++;
1071 else {
1072 struct sed_label *j = cur_cmd->x.jump;
1074 n= j->v->v_length - j->v_index;
1075 cur_cmd= j->v->v + j->v_index;
1076 goto exe_loop;
1078 break;
1080 case 'c':
1081 line.length=0;
1082 if(!(cur_cmd->aflags&A1_MATCHED_BIT))
1083 ck_fwrite(cur_cmd->x.cmd_txt.text,1,cur_cmd->x.cmd_txt.text_len,stdout);
1084 end_cycle++;
1085 break;
1087 case 'd':
1088 line.length=0;
1089 end_cycle++;
1090 break;
1092 case 'D':
1094 char *tmp;
1095 int newlength;
1097 tmp=memchr(line.text,'\n',line.length);
1098 newlength=line.length-(tmp-line.text);
1099 if(newlength)
1100 memmove(line.text,tmp,newlength);
1101 line.length=newlength;
1103 end_cycle++;
1104 break;
1106 case 'g':
1107 line_copy(&hold,&line);
1108 break;
1110 case 'G':
1111 line_append(&hold,&line);
1112 break;
1114 case 'h':
1115 line_copy(&line,&hold);
1116 break;
1118 case 'H':
1119 line_append(&line,&hold);
1120 break;
1122 case 'i':
1123 ck_fwrite(cur_cmd->x.cmd_txt.text,1,cur_cmd->x.cmd_txt.text_len,stdout);
1124 break;
1126 case 'l':
1128 char *tmp;
1129 int n;
1130 int width = 0;
1132 n=line.length;
1133 tmp=line.text;
1134 /* Use --n so this'll skip the trailing newline */
1135 while(--n) {
1136 if(width>77) {
1137 width=0;
1138 putchar('\n');
1140 if(*tmp == '\\') {
1141 printf("\\\\");
1142 width+=2;
1143 } else if(isprint(*tmp)) {
1144 putchar(*tmp);
1145 width++;
1146 } else switch(*tmp) {
1147 #if 0
1148 /* Should print \00 instead of \0 because (a) POSIX requires it, and
1149 (b) this way \01 is unambiguous. */
1150 case '\0':
1151 printf("\\0");
1152 width+=2;
1153 break;
1154 #endif
1155 case 007:
1156 printf("\\a");
1157 width+=2;
1158 break;
1159 case '\b':
1160 printf("\\b");
1161 width+=2;
1162 break;
1163 case '\f':
1164 printf("\\f");
1165 width+=2;
1166 break;
1167 case '\n':
1168 printf("\\n");
1169 width+=2;
1170 break;
1171 case '\r':
1172 printf("\\r");
1173 width+=2;
1174 break;
1175 case '\t':
1176 printf("\\t");
1177 width+=2;
1178 break;
1179 case '\v':
1180 printf("\\v");
1181 width+=2;
1182 break;
1183 default:
1184 printf("\\%02x",(*tmp)&0xFF);
1185 width+=2;
1186 break;
1188 tmp++;
1190 putchar('\n');
1192 break;
1194 case 'n':
1195 if (feof(input_file)) goto quit;
1196 ck_fwrite(line.text,1,line.length,stdout);
1197 read_pattern_space();
1198 break;
1200 case 'N':
1201 if (feof(input_file)) goto quit;
1202 append_pattern_space();
1203 break;
1205 case 'p':
1206 ck_fwrite(line.text,1,line.length,stdout);
1207 break;
1209 case 'P':
1211 char *tmp;
1213 tmp=memchr(line.text,'\n',line.length);
1214 ck_fwrite(line.text, 1,
1215 tmp ? tmp - line.text + 1
1216 : line.length, stdout);
1218 break;
1220 case 'q': quit:
1221 quit_cmd++;
1222 end_cycle++;
1223 break;
1225 case 'r':
1227 int n = 0;
1229 rewind(cur_cmd->x.io_file);
1230 do {
1231 append.length += n;
1232 if(append.length==append.alloc) {
1233 append.alloc *= 2;
1234 append.text = ck_realloc(append.text, append.alloc);
1236 } while((n=fread(append.text+append.length,sizeof(char),append.alloc-append.length,cur_cmd->x.io_file))>0);
1237 if(ferror(cur_cmd->x.io_file))
1238 panic("Read error on input file to 'r' command");
1240 break;
1242 case 's':
1243 if(!tmp.alloc) {
1244 tmp.alloc=50;
1245 tmp.text=ck_malloc(50);
1247 count=0;
1248 start = 0;
1249 remain=line.length-1;
1250 tmp.length=0;
1251 rep = cur_cmd->x.cmd_regex.replacement;
1252 rep_end=rep+cur_cmd->x.cmd_regex.replace_length;
1254 while((offset = re_search(cur_cmd->x.cmd_regex.regx,
1255 line.text,
1256 line.length-1,
1257 start,
1258 remain,
1259 &regs))>=0) {
1260 count++;
1261 if(offset-start)
1262 str_append(&tmp,line.text+start,offset-start);
1264 if(cur_cmd->x.cmd_regex.flags&S_NUM_BIT) {
1265 if(count!=cur_cmd->x.cmd_regex.numb) {
1266 str_append(&tmp,line.text+regs.start[0],regs.end[0]-regs.start[0]);
1267 start = (offset == regs.end[0] ? offset + 1 : regs.end[0]);
1268 remain = (line.length-1) - start;
1269 continue;
1273 for(rep_next=rep_cur=rep;rep_next<rep_end;rep_next++) {
1274 if(*rep_next=='&') {
1275 if(rep_next-rep_cur)
1276 str_append(&tmp,rep_cur,rep_next-rep_cur);
1277 str_append(&tmp,line.text+regs.start[0],regs.end[0]-regs.start[0]);
1278 rep_cur=rep_next+1;
1279 } else if(*rep_next=='\\') {
1280 if(rep_next-rep_cur)
1281 str_append(&tmp,rep_cur,rep_next-rep_cur);
1282 rep_next++;
1283 if(rep_next!=rep_end) {
1284 int n;
1286 if(*rep_next>='0' && *rep_next<='9') {
1287 n= *rep_next -'0';
1288 str_append(&tmp,line.text+regs.start[n],regs.end[n]-regs.start[n]);
1289 } else
1290 str_append(&tmp,rep_next,1);
1292 rep_cur=rep_next+1;
1295 if(rep_next-rep_cur)
1296 str_append(&tmp,rep_cur,rep_next-rep_cur);
1297 if (offset == regs.end[0]) {
1298 str_append(&tmp, line.text + offset, 1);
1299 ++regs.end[0];
1301 start = regs.end[0];
1303 remain = (line.length-1) - start;
1304 if(remain<0)
1305 break;
1306 if(!(cur_cmd->x.cmd_regex.flags&S_GLOBAL_BIT))
1307 break;
1309 if(!count)
1310 break;
1311 replaced=1;
1312 str_append(&tmp,line.text+start,remain+1);
1313 t.text=line.text;
1314 t.length=line.length;
1315 t.alloc=line.alloc;
1316 line.text=tmp.text;
1317 line.length=tmp.length;
1318 line.alloc=tmp.alloc;
1319 tmp.text=t.text;
1320 tmp.length=t.length;
1321 tmp.alloc=t.alloc;
1322 if(cur_cmd->x.cmd_regex.flags&S_WRITE_BIT)
1323 ck_fwrite(line.text,1,line.length,cur_cmd->x.cmd_regex.wio_file);
1324 if(cur_cmd->x.cmd_regex.flags&S_PRINT_BIT)
1325 ck_fwrite(line.text,1,line.length,stdout);
1326 break;
1328 case 't':
1329 if(replaced) {
1330 replaced = 0;
1331 if(!cur_cmd->x.jump)
1332 end_cycle++;
1333 else {
1334 struct sed_label *j = cur_cmd->x.jump;
1336 n= j->v->v_length - j->v_index;
1337 cur_cmd= j->v->v + j->v_index;
1338 goto exe_loop;
1341 break;
1343 case 'w':
1344 ck_fwrite(line.text,1,line.length,cur_cmd->x.io_file);
1345 break;
1347 case 'x':
1349 struct line tmp;
1351 tmp=line;
1352 line=hold;
1353 hold=tmp;
1355 break;
1357 case 'y':
1359 unsigned char *p,*e;
1361 for(p=(unsigned char *)(line.text),e=p+line.length;p<e;p++)
1362 *p=cur_cmd->x.translate[*p];
1364 break;
1366 default:
1367 panic("INTERNAL ERROR: Bad cmd %c",cur_cmd->cmd);
1369 if(end_cycle)
1370 break;
1375 /* Return non-zero if the current line matches the address
1376 pointed to by 'addr'. */
1378 match_address(addr)
1379 struct addr *addr;
1381 switch(addr->addr_type) {
1382 case ADDR_NULL:
1383 return 1;
1384 case ADDR_NUM:
1385 return (input_line_number==addr->addr_number);
1387 case ADDR_REGEX:
1388 return (re_search(addr->addr_regex,
1389 line.text,
1390 line.length-1,
1392 line.length-1,
1393 (struct re_registers *)0)>=0) ? 1 : 0;
1395 case ADDR_LAST:
1396 return (input_EOF) ? 1 : 0;
1398 default:
1399 panic("INTERNAL ERROR: bad address type");
1400 break;
1402 return -1;
1405 /* Read in the next line of input, and store it in the
1406 pattern space. Return non-zero if this is the last line of input */
1409 read_pattern_space()
1411 int n;
1412 char *p;
1413 int ch;
1415 p=line.text;
1416 n=line.alloc;
1418 if(feof(input_file))
1419 return 0;
1420 input_line_number++;
1421 replaced=0;
1422 for(;;) {
1423 if(n==0) {
1424 line.text=ck_realloc(line.text,line.alloc*2);
1425 p=line.text+line.alloc;
1426 n=line.alloc;
1427 line.alloc*=2;
1429 ch=getc(input_file);
1430 if(ch==EOF) {
1431 if(n==line.alloc)
1432 return 0;
1433 *p++='\n';
1434 --n;
1435 line.length=line.alloc-n;
1436 if(last_input_file)
1437 input_EOF++;
1438 return 1;
1440 *p++=ch;
1441 --n;
1442 if(ch=='\n') {
1443 line.length=line.alloc-n;
1444 break;
1447 ch=getc(input_file);
1448 if(ch!=EOF)
1449 ungetc(ch,input_file);
1450 else if(last_input_file)
1451 input_EOF++;
1452 return 1;
1455 /* Inplement the 'N' command, which appends the next line of input to
1456 the pattern space. */
1457 void
1458 append_pattern_space()
1460 char *p;
1461 int n;
1462 int ch;
1464 p=line.text+line.length;
1465 n=line.alloc-line.length;
1467 input_line_number++;
1468 replaced=0;
1469 for(;;) {
1470 ch=getc(input_file);
1471 if(ch==EOF) {
1472 if(n==line.alloc)
1473 return;
1474 *p++='\n';
1475 --n;
1476 line.length=line.alloc-n;
1477 if(last_input_file)
1478 input_EOF++;
1479 return;
1481 *p++=ch;
1482 --n;
1483 if(ch=='\n') {
1484 line.length=line.alloc-n;
1485 break;
1487 if(n==0) {
1488 line.text=ck_realloc(line.text,line.alloc*2);
1489 p=line.text+line.alloc;
1490 n=line.alloc;
1491 line.alloc*=2;
1494 ch=getc(input_file);
1495 if(ch!=EOF)
1496 ungetc(ch,input_file);
1497 else if(last_input_file)
1498 input_EOF++;
1501 /* Copy the contents of the line 'from' into the line 'to'.
1502 This destroys the old contents of 'to'. It will still work
1503 if the line 'from' contains nulls. */
1504 void
1505 line_copy(from,to)
1506 struct line *from,*to;
1508 if(from->length>to->alloc) {
1509 to->alloc=from->length;
1510 to->text=ck_realloc(to->text,to->alloc);
1512 bcopy(from->text,to->text,from->length);
1513 to->length=from->length;
1516 /* Append the contents of the line 'from' to the line 'to'.
1517 This routine will work even if the line 'from' contains nulls */
1518 void
1519 line_append(from,to)
1520 struct line *from,*to;
1522 if(from->length>(to->alloc-to->length)) {
1523 to->alloc+=from->length;
1524 to->text=ck_realloc(to->text,to->alloc);
1526 bcopy(from->text,to->text+to->length,from->length);
1527 to->length+=from->length;
1530 /* Append 'length' bytes from 'string' to the line 'to'
1531 This routine *will* append bytes with nulls in them, without
1532 failing. */
1533 void
1534 str_append(to,string,length)
1535 struct line *to;
1536 char *string;
1537 int length;
1539 if(length>to->alloc-to->length) {
1540 to->alloc+=length;
1541 to->text=ck_realloc(to->text,to->alloc);
1543 bcopy(string,to->text+to->length,length);
1544 to->length+=length;
1547 void
1548 usage()
1550 fprintf(stderr, "\
1551 Usage: %s [-nV] [+quiet] [+silent] [+version] [-e script] [-f script-file]\n\
1552 [+expression=script] [+file=script-file] [file...]\n", myname);
1553 exit(4);