No empty .Rs/.Re
[netbsd-mini2440.git] / usr.bin / rcs / src / rcslex.c
blob889e0bedc2260112c2ff051f98c83d1208aa888f
1 /*
2 * RCS file input
3 */
4 #ifndef lint
5 static char rcsid[]= "$Id: rcslex.c,v 1.1 1993/03/21 09:58:09 cgd Exp $ Purdue CS";
6 #endif
7 /*********************************************************************************
8 * Lexical Analysis.
9 * Character mapping table,
10 * hashtable, Lexinit, nextlex, getlex, getkey,
11 * getid, getnum, readstring, printstring, savestring,
12 * checkid, serror, fatserror, error, faterror, warn, diagnose
13 * fflsbuf, puts, fprintf
14 * Testprogram: define LEXDB
15 *********************************************************************************
18 /* Copyright (C) 1982, 1988, 1989 Walter Tichy
19 * All rights reserved.
21 * Redistribution and use in source and binary forms are permitted
22 * provided that the above copyright notice and this paragraph are
23 * duplicated in all such forms and that any documentation,
24 * advertising materials, and other materials related to such
25 * distribution and use acknowledge that the software was developed
26 * by Walter Tichy.
27 * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR
28 * IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED
29 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.
31 * Report all problems and direct all questions to:
32 * rcs-bugs@cs.purdue.edu
45 /* $Log: rcslex.c,v $
46 * Revision 4.6 89/05/01 15:13:07 narten
47 * changed copyright header to reflect current distribution rules
49 * Revision 4.5 88/11/08 12:00:54 narten
50 * changes from eggert@sm.unisys.com (Paul Eggert)
52 * Revision 4.5 88/08/28 15:01:12 eggert
53 * Don't loop when writing error messages to a full filesystem.
54 * Flush stderr/stdout when mixing output.
55 * Yield exit status compatible with diff(1).
56 * Shrink stdio code size; allow cc -R; remove lint.
58 * Revision 4.4 87/12/18 11:44:47 narten
59 * fixed to use "varargs" in "fprintf"; this is required if it is to
60 * work on a SPARC machine such as a Sun-4
62 * Revision 4.3 87/10/18 10:37:18 narten
63 * Updating version numbers. Changes relative to 1.1 actually relative
64 * to version 4.1
66 * Revision 1.3 87/09/24 14:00:17 narten
67 * Sources now pass through lint (if you ignore printf/sprintf/fprintf
68 * warnings)
70 * Revision 1.2 87/03/27 14:22:33 jenkins
71 * Port to suns
73 * Revision 1.1 84/01/23 14:50:33 kcs
74 * Initial revision
76 * Revision 4.1 83/03/25 18:12:51 wft
77 * Only changed $Header to $Id.
79 * Revision 3.3 82/12/10 16:22:37 wft
80 * Improved error messages, changed exit status on error to 1.
82 * Revision 3.2 82/11/28 21:27:10 wft
83 * Renamed ctab to map and included EOFILE; ctab is now a macro in rcsbase.h.
84 * Added fflsbuf(), fputs(), and fprintf(), which abort the RCS operations
85 * properly in case there is an IO-error (e.g., file system full).
87 * Revision 3.1 82/10/11 19:43:56 wft
88 * removed unused label out:;
89 * made sure all calls to getc() return into an integer, not a char.
94 #define LEXDB
95 /* version LEXDB is for testing the lexical analyzer. The testprogram
96 * reads a stream of lexemes, enters the revision numbers into the
97 * hashtable, and prints the recognized tokens. Keywords are recognized
98 * as identifiers.
103 #include "rcsbase.h"
104 #if __STDC__
105 #include <stdarg.h>
106 #else
107 #include <varargs.h>
108 #endif
112 /* character mapping table */
113 enum tokens map[] = {
114 EOFILE, /* this will end up at ctab[-1] */
115 UNKN, INSERT, UNKN, UNKN, UNKN, UNKN, UNKN, UNKN,
116 UNKN, SPACE, NEWLN, UNKN, SPACE, UNKN, UNKN, UNKN,
117 UNKN, UNKN, UNKN, UNKN, UNKN, UNKN, UNKN, UNKN,
118 UNKN, UNKN, UNKN, UNKN, UNKN, UNKN, UNKN, UNKN,
119 SPACE, EXCLA, DQUOTE, HASH, DOLLAR, PERCNT, AMPER, SQUOTE,
120 LPARN, RPARN, TIMES, PLUS, COMMA, MINUS, PERIOD, DIVIDE,
121 DIGIT, DIGIT, DIGIT, DIGIT, DIGIT, DIGIT, DIGIT, DIGIT,
122 DIGIT, DIGIT, COLON, SEMI, LESS, EQUAL, GREAT, QUEST,
123 AT, LETTER, LETTER, LETTER, LETTER, LETTER, LETTER, LETTER,
124 LETTER, LETTER, LETTER, LETTER, LETTER, LETTER, LETTER, LETTER,
125 LETTER, LETTER, LETTER, LETTER, LETTER, LETTER, LETTER, LETTER,
126 LETTER, LETTER, LETTER, LBRACK, BACKSL, RBRACK, UPARR, UNDER,
127 ACCENT, LETTER, LETTER, LETTER, LETTER, LETTER, LETTER, LETTER,
128 LETTER, LETTER, LETTER, LETTER, LETTER, LETTER, LETTER, LETTER,
129 LETTER, LETTER, LETTER, LETTER, LETTER, LETTER, LETTER, LETTER,
130 LETTER, LETTER, LETTER, LBRACE, BAR, RBRACE, TILDE, UNKN
136 struct hshentry * nexthsh; /*pointer to next hashtable-entry, set by lookup*/
138 enum tokens nexttok; /*next token, set by nextlex */
140 int hshenter; /*if true, next suitable lexeme will be entered */
141 /*into the symbol table. Handle with care. */
142 int nextc; /*next input character, initialized by Lexinit */
144 int eof; /*end-of-file indicator, set to >0 on end of file*/
145 int line; /*current line-number of input */
146 int nerror; /*counter for errors */
147 int nwarn; /*counter for warnings */
148 char * cmdid; /*command identification for error messages */
149 int quietflag; /*indicates quiet mode */
150 FILE * finptr; /*input file descriptor */
152 FILE * frewrite; /*file descriptor for echoing input */
154 int rewriteflag;/*indicates whether to echo to frewrite */
156 char StringTab[strtsize]; /* string table and heap */
158 char * NextString; /*pointer to next identifier in StringTab*/
159 char * Topchar; /*pointer to next free byte in StringTab*/
160 /*set by nextlex, lookup */
161 struct hshentry hshtab[hshsize]; /*hashtable */
167 lookup() {
169 /* Function: Looks up the character string pointed to by NextString in the
170 * hashtable. If the string is not present, a new entry for it is created.
171 * If the string is present, TopChar is moved back to save the space for
172 * the string, and NextString is set to point to the original string.
173 * In any case, the address of the corresponding hashtable entry is placed
174 * into nexthsh.
175 * Algorithm: Quadratic hash, covering all entries.
176 * Assumptions: NextString points at the first character of the string.
177 * Topchar points at the first empty byte after the string.
180 register int ihash; /* index into hashtable */
181 register char * sp, * np;
182 int c, delta, final, FirstScan; /*loop control*/
184 /* calculate hash code */
185 sp = NextString;
186 ihash = 0;
187 while (*sp) ihash += *sp++;
189 /* set up first search loop (c=0,step=1,until (hshsiz-1)/2 */
190 c=0;delta=1;final=(hshsize-1)/2;
191 FirstScan=true; /*first loop */
193 for (;;) {
194 ihash = (ihash+c)%hshsize; /*next index*/
196 if (hshtab[ihash].num == nil) {
197 /*empty slot found*/
198 hshtab[ihash].num = NextString;
199 nexthsh= &hshtab[ihash];/*save hashtable address*/
200 # ifdef LEXDB
201 VOID printf("\nEntered: %s at %d ",nexthsh->num, ihash);
202 # endif
203 return;
205 /* compare strings */
206 sp=NextString;np=hshtab[ihash].num;
207 while (*sp == *np++) {
208 if (*sp == 0) {
209 /* match found */
210 nexthsh= &hshtab[ihash];
211 Topchar = NextString;
212 NextString = nexthsh->num;
213 return;
214 } else sp++;
217 /* neither empty slot nor string found */
218 /* calculate next index and repeat */
219 if (c != final)
220 c += delta;
221 else {
222 if (FirstScan) {
223 /*set up second sweep*/
224 delta = -1; final = 1; FirstScan= false;
225 } else {
226 fatserror("Hashtable overflow");
237 Lexinit()
238 /* Function: Initialization of lexical analyzer:
239 * initializes the hastable,
240 * initializes nextc, nexttok if finptr != NULL
242 { register int c;
244 for (c=hshsize-1; c>=0; c--) {
245 hshtab[c].num = nil;
248 hshenter=true; eof=0; line=1; nerror=0; nwarn=0;
249 NextString=nil; Topchar = &StringTab[0];
250 if (finptr) {
251 nextc = GETC(finptr,frewrite,rewriteflag); /*initial character*/
252 nextlex(); /*initial token*/
253 } else {
254 nextc = '\0';
255 nexttok=EOFILE;
265 nextlex()
267 /* Function: Reads the next token and sets nexttok to the next token code.
268 * Only if the hshenter==true, a revision number is entered into the
269 * hashtable and a pointer to it is placed into nexthsh.
270 * This is useful for avoiding that dates are placed into the hashtable.
271 * For ID's and NUM's, NextString is set to the character string in the
272 * string table. Assumption: nextc contains the next character.
274 { register c;
275 register FILE * fin, * frew;
276 register char * sp;
277 register enum tokens d;
279 if (eof) {
280 nexttok=EOFILE;
281 return;
283 fin=finptr; frew=frewrite;
284 loop:
285 switch(nexttok=ctab[nextc]) {
287 case UNKN:
288 case IDCHAR:
289 case PERIOD:
290 serror("unknown Character: %c",nextc);
291 nextc=GETC(fin,frew,rewriteflag);
292 goto loop;
294 case NEWLN:
295 line++;
296 # ifdef LEXDB
297 VOID putchar('\n');
298 # endif
299 /* Note: falls into next case */
301 case SPACE:
302 nextc=GETC(fin,frew,rewriteflag);
303 goto loop;
305 case EOFILE:
306 eof++;
307 nexttok=EOFILE;
308 return;
310 case DIGIT:
311 NextString = sp = Topchar;
312 *sp++ = nextc;
313 while ((d=ctab[c=GETC(fin,frew,rewriteflag)])==DIGIT ||
314 d==PERIOD) {
315 *sp++ = c; /* 1.2. and 1.2 are different */
317 *sp++ = '\0';
318 if (sp >= StringTab+strtsize) {
319 /*may have written outside stringtable already*/
320 fatserror("Stringtable overflow");
322 Topchar = sp;
323 nextc = c;
324 if (hshenter == true)
325 lookup(); /* lookup updates NextString, Topchar*/
326 nexttok = NUM;
327 return;
330 case LETTER:
331 NextString = sp = Topchar;
332 *sp++ = nextc;
333 while ((d=ctab[c=GETC(fin,frew,rewriteflag)])==LETTER ||
334 d==DIGIT || d==IDCHAR) {
335 *sp++ = c;
337 *sp++ = '\0';
338 if (sp >= StringTab+strtsize) {
339 /*may have written outside stringtable already*/
340 fatserror("Stringtable overflow");
342 Topchar = sp;
343 nextc = c;
344 nexttok = ID; /* may be ID or keyword */
345 return;
347 case SBEGIN: /* long string */
348 nexttok = STRING;
349 /* note: only the initial SBEGIN has been read*/
350 /* read the string, and reset nextc afterwards*/
351 return;
353 default:
354 nextc=GETC(fin,frew,rewriteflag);
355 return;
360 int getlex(token)
361 enum tokens token;
362 /* Function: Checks if nexttok is the same as token. If so,
363 * advances the input by calling nextlex and returns true.
364 * otherwise returns false.
365 * Doesn't work for strings and keywords; loses the character string for ids.
368 if (nexttok==token) {
369 nextlex();
370 return(true);
371 } else return(false);
374 int getkey (key)
375 char * key;
376 /* Function: If the current token is a keyword identical to key,
377 * getkey advances the input by calling nextlex and returns true;
378 * otherwise returns false.
381 register char *s1,*s2;
383 if (nexttok==ID) {
384 s1=key; s2=NextString;
385 while(*s1 == *s2++)
386 if (*s1++ == '\0') {
387 /* match found */
388 Topchar = NextString; /*reset Topchar */
389 nextlex();
390 return(true);
393 return(false);
398 char * getid()
399 /* Function: Checks if nexttok is an identifier. If so,
400 * advances the input by calling nextlex and returns a pointer
401 * to the identifier; otherwise returns nil.
402 * Treats keywords as identifiers.
405 register char * name;
406 if (nexttok==ID) {
407 name = NextString;
408 nextlex();
409 return name;
410 } else return nil;
414 struct hshentry * getnum()
415 /* Function: Checks if nexttok is a number. If so,
416 * advances the input by calling nextlex and returns a pointer
417 * to the hashtable entry. Otherwise returns nil.
418 * Doesn't work if hshenter is false.
421 register struct hshentry * num;
422 if (nexttok==NUM) {
423 num=nexthsh;
424 nextlex();
425 return num;
426 } else return nil;
430 readstring()
431 /* skip over characters until terminating single SDELIM */
432 /* if rewriteflag==true, copy every character read to frewrite.*/
433 /* Does not advance nextlex at the end. */
434 { register c;
435 register FILE * fin, * frew;
436 fin=finptr; frew=frewrite;
437 if (rewriteflag) {
438 /* copy string verbatim to frewrite */
439 while ((c=getc(fin)) != EOF) {
440 VOID putc(c,frew);
441 if (c==SDELIM) {
442 if ((c=getc(fin)) == EOF || putc(c,frew) != SDELIM) {
443 /* end of string */
444 nextc=c;
445 return;
449 } else {
450 /* skip string */
451 while ((c=getc(fin)) != EOF) {
452 if (c==SDELIM) {
453 if ((c=getc(fin)) != SDELIM) {
454 /* end of string */
455 nextc=c;
456 return;
461 nextc = c;
462 error("Unterminated string");
466 printstring()
467 /* Function: copy a string to stdout, until terminated with a single SDELIM.
468 * Does not advance nextlex at the end.
471 register c;
472 register FILE * fin;
473 fin=finptr;
474 while ((c=getc(fin)) != EOF) {
475 if (c==SDELIM) {
476 if ((c=getc(fin)) != SDELIM) {
477 /* end of string */
478 nextc=c;
479 return;
482 VOID putchar(c);
484 nextc = c;
485 error("Unterminated string");
490 savestring(target,length)
491 char * target; int length;
492 /* copies a string terminated with SDELIM from file finptr to buffer target,
493 * but not more than length bytes. If the string is longer than length,
494 * the extra characters are skipped. The string may be empty, in which
495 * case a '\0' is placed into target.
496 * Double SDELIM is replaced with SDELIM.
497 * If rewriteflag==true, the string is also copied unchanged to frewrite.
498 * Returns the length of the saved string.
499 * Does not advance nextlex at the end.
502 register c;
503 register FILE * fin, * frew;
504 register char * tp, * max;
506 fin=finptr; frew=frewrite;
507 tp=target; max= target+length; /*max is one too large*/
508 while ((c=GETC(fin,frew,rewriteflag))!=EOF) {
509 *tp++ =c;
510 if (c== SDELIM) {
511 if ((c=GETC(fin,frew,rewriteflag))!=SDELIM) {
512 /* end of string */
513 *(tp-1)='\0';
514 nextc=c;
515 return;
518 if (tp >= max) {
519 /* overflow */
520 error("string buffer overflow -- truncating string");
521 target[length-1]='\0';
522 /* skip rest of string */
523 while ((c=GETC(fin,frew,rewriteflag))!=EOF) {
524 if ((c==SDELIM) && ((c=GETC(fin,frew,rewriteflag))!=SDELIM)) {
525 /* end of string */
526 nextc=c;
527 return;
530 nextc = c;
531 error("Can't find %c to terminate string before end of file",SDELIM);
532 return;
535 nextc = c;
536 error("Can't find %c to terminate string before end of file",SDELIM);
540 char *checkid(id, delim)
541 char *id, delim;
542 /* Function: check whether the string starting at id is an */
543 /* identifier and return a pointer to the last char*/
544 /* of the identifer. White space, delim and '\0' */
545 /* are legal delimeters. Aborts the program if not */
546 /* a legal identifier. Useful for checking commands*/
548 register enum tokens d;
549 register char *temp;
550 register char c,tc;
552 temp = id;
553 if ( ctab[*id] == LETTER ) {
554 while( (d=ctab[c=(*++id)]) == LETTER || d==DIGIT || d==IDCHAR) ;
555 if ( c!=' ' && c!='\t' && c!='\n' && c!='\0' && c!=delim) {
556 /* append \0 to end of id before error message */
557 tc = c;
558 while( (c=(*++id))!=' ' && c!='\t' && c!='\n' && c!='\0' && c!=delim) ;
559 *id = '\0';
560 faterror("Invalid character %c in identifier %s",tc,temp);
561 return nil ;
562 } else
563 return id;
564 } else {
565 /* append \0 to end of id before error message */
566 while( (c=(*++id))!=' ' && c!='\t' && c!='\n' && c!='\0' && c!=delim) ;
567 *id = '\0';
568 faterror("Identifier %s does not start with letter",temp);
569 return nil;
573 writeerror()
575 static looping;
576 if (looping)
577 exit(2);
578 looping = 1;
579 faterror("write error");
582 nlflush(iop)
583 register FILE * iop;
585 if (putc('\n',iop)==EOF || fflush(iop)==EOF)
586 writeerror();
590 /*VARARGS1*/
591 serror(e,e1,e2,e3,e4,e5)
592 char * e, * e1, * e2, * e3, * e4, * e5;
593 /* non-fatal syntax error */
594 { nerror++;
595 VOID fprintf(stderr,"%s error, line %d: ", cmdid, line);
596 VOID fprintf(stderr,e, e1, e2, e3, e4, e5);
597 nlflush(stderr);
600 /*VARARGS1*/
601 error(e,e1,e2,e3,e4,e5)
602 char * e, * e1, * e2, * e3, * e4, * e5;
603 /* non-fatal error */
604 { nerror++;
605 VOID fprintf(stderr,"%s error: ",cmdid);
606 VOID fprintf(stderr,e, e1, e2, e3, e4, e5);
607 nlflush(stderr);
610 /*VARARGS1*/
611 fatserror(e,e1,e2,e3,e4,e5)
612 char * e, * e1, * e2, * e3, * e4, * e5;
613 /* fatal syntax error */
614 { nerror++;
615 VOID fprintf(stderr,"%s error, line %d: ", cmdid,line);
616 VOID fprintf(stderr,e, e1, e2, e3, e4, e5);
617 VOID fprintf(stderr,"\n%s aborted\n",cmdid);
618 VOID cleanup();
619 exit(2);
622 /*VARARGS1*/
623 faterror(e,e1,e2,e3,e4,e5)
624 char * e, * e1, * e2, * e3, * e4, * e5;
625 /* fatal error, terminates program after cleanup */
626 { nerror++;
627 VOID fprintf(stderr,"%s error: ",cmdid);
628 VOID fprintf(stderr,e, e1, e2, e3, e4, e5);
629 VOID fprintf(stderr,"\n%s aborted\n",cmdid);
630 VOID cleanup();
631 exit(2);
634 /*VARARGS1*/
635 warn(e,e1,e2,e3,e4,e5)
636 char * e, * e1, * e2, * e3, * e4, * e5;
637 /* prints a warning message */
638 { nwarn++;
639 VOID fprintf(stderr,"%s warning: ",cmdid);
640 VOID fprintf(stderr,e, e1, e2, e3, e4, e5);
641 nlflush(stderr);
645 /*VARARGS1*/
646 diagnose(e,e1,e2,e3,e4,e5)
647 char * e, * e1, * e2, * e3, * e4, * e5;
648 /* prints a diagnostic message */
650 if (!quietflag) {
651 VOID fprintf(stderr,e, e1, e2, e3, e4, e5);
652 nlflush(stderr);
658 #ifdef _FSTDIO
659 wbuf(c, fp)
660 unsigned c;
661 register FILE *fp;
663 register int result;
665 if ((result = __sputc(c, fp)) == EOF)
666 writeerror();
667 return result;
669 #else
670 fflsbuf(c, iop)
671 unsigned c; register FILE * iop;
672 /* Function: Flush iop.
673 * Same routine as _flsbuf in stdio, but aborts program on error.
675 { register result;
676 if ((result=_flsbuf(c,iop))==EOF)
677 writeerror();
678 return result;
680 #endif
683 #ifdef _FSTDIO
684 fputs(s, iop)
685 const char *s;
686 FILE *iop;
689 VOID fprintf(iop, "%s", s);
690 return 0;
692 #else
693 fputs(s, iop)
694 register char *s;
695 register FILE *iop;
696 /* Function: Put string s on file iop, abort on error.
697 * Same as puts in stdio, but with different putc macro.
700 register r;
701 register c;
703 while (c = *s++)
704 r = putc(c, iop);
705 return(r);
707 #endif
711 #if __STDC__
712 fprintf(FILE *iop, const char *fmt, ...)
713 #else
714 fprintf(iop, fmt, va_alist)
715 FILE *iop;
716 const char *fmt;
717 va_dcl
718 #endif
719 /* Function: formatted output. Same as fprintf in stdio,
720 * but aborts program on error
723 register int value;
724 va_list ap;
726 #ifdef __STDC__
727 va_start(ap, fmt);
728 #else
729 va_start(ap);
730 #endif
731 #ifdef VFPRINTF
732 VOID vfprintf(iop, fmt, ap);
733 #else
734 _doprnt(fmt, ap, iop);
735 #endif
736 if (ferror(iop)) {
737 writeerror();
738 value = EOF;
739 } else value = 0;
740 va_end(ap);
741 return value;
746 #ifdef LEXDB
747 /* test program reading a stream of lexems and printing the tokens.
752 main(argc,argv)
753 int argc; char * argv[];
755 cmdid="lextest";
756 if (argc<2) {
757 VOID fputs("No input file\n",stderr);
758 exit(1);
760 if ((finptr=fopen(argv[1], "r")) == NULL) {
761 faterror("Can't open input file %s\n",argv[1]);
763 Lexinit();
764 rewriteflag=false;
765 while (nexttok != EOFILE) {
766 switch (nexttok) {
768 case ID:
769 VOID printf("ID: %s",NextString);
770 break;
772 case NUM:
773 if (hshenter==true)
774 VOID printf("NUM: %s, index: %d",nexthsh->num, nexthsh-hshtab);
775 else
776 VOID printf("NUM, unentered: %s",NextString);
777 hshenter = !hshenter; /*alternate between dates and numbers*/
778 break;
780 case COLON:
781 VOID printf("COLON"); break;
783 case SEMI:
784 VOID printf("SEMI"); break;
786 case STRING:
787 readstring();
788 VOID printf("STRING"); break;
790 case UNKN:
791 VOID printf("UNKN"); break;
793 default:
794 VOID printf("DEFAULT"); break;
796 VOID printf(" | ");
797 nextlex();
799 VOID printf("\nEnd of lexical analyzer test\n");
802 cleanup()
803 /* dummy */
807 #endif