5 static char rcsid
[]= "$Id: rcslex.c,v 1.1 1993/03/21 09:58:09 cgd Exp $ Purdue CS";
7 /*********************************************************************************
9 * Character mapping table,
10 * hashtable, Lexinit, nextlex, getlex, getkey,
11 * getid, getnum, readstring, printstring, savestring,
12 * checkid, serror, fatserror, error, faterror, warn, diagnose
13 * fflsbuf, puts, fprintf
14 * Testprogram: define LEXDB
15 *********************************************************************************
18 /* Copyright (C) 1982, 1988, 1989 Walter Tichy
19 * All rights reserved.
21 * Redistribution and use in source and binary forms are permitted
22 * provided that the above copyright notice and this paragraph are
23 * duplicated in all such forms and that any documentation,
24 * advertising materials, and other materials related to such
25 * distribution and use acknowledge that the software was developed
27 * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR
28 * IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED
29 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.
31 * Report all problems and direct all questions to:
32 * rcs-bugs@cs.purdue.edu
46 * Revision 4.6 89/05/01 15:13:07 narten
47 * changed copyright header to reflect current distribution rules
49 * Revision 4.5 88/11/08 12:00:54 narten
50 * changes from eggert@sm.unisys.com (Paul Eggert)
52 * Revision 4.5 88/08/28 15:01:12 eggert
53 * Don't loop when writing error messages to a full filesystem.
54 * Flush stderr/stdout when mixing output.
55 * Yield exit status compatible with diff(1).
56 * Shrink stdio code size; allow cc -R; remove lint.
58 * Revision 4.4 87/12/18 11:44:47 narten
59 * fixed to use "varargs" in "fprintf"; this is required if it is to
60 * work on a SPARC machine such as a Sun-4
62 * Revision 4.3 87/10/18 10:37:18 narten
63 * Updating version numbers. Changes relative to 1.1 actually relative
66 * Revision 1.3 87/09/24 14:00:17 narten
67 * Sources now pass through lint (if you ignore printf/sprintf/fprintf
70 * Revision 1.2 87/03/27 14:22:33 jenkins
73 * Revision 1.1 84/01/23 14:50:33 kcs
76 * Revision 4.1 83/03/25 18:12:51 wft
77 * Only changed $Header to $Id.
79 * Revision 3.3 82/12/10 16:22:37 wft
80 * Improved error messages, changed exit status on error to 1.
82 * Revision 3.2 82/11/28 21:27:10 wft
83 * Renamed ctab to map and included EOFILE; ctab is now a macro in rcsbase.h.
84 * Added fflsbuf(), fputs(), and fprintf(), which abort the RCS operations
85 * properly in case there is an IO-error (e.g., file system full).
87 * Revision 3.1 82/10/11 19:43:56 wft
88 * removed unused label out:;
89 * made sure all calls to getc() return into an integer, not a char.
95 /* version LEXDB is for testing the lexical analyzer. The testprogram
96 * reads a stream of lexemes, enters the revision numbers into the
97 * hashtable, and prints the recognized tokens. Keywords are recognized
112 /* character mapping table */
113 enum tokens map
[] = {
114 EOFILE
, /* this will end up at ctab[-1] */
115 UNKN
, INSERT
, UNKN
, UNKN
, UNKN
, UNKN
, UNKN
, UNKN
,
116 UNKN
, SPACE
, NEWLN
, UNKN
, SPACE
, UNKN
, UNKN
, UNKN
,
117 UNKN
, UNKN
, UNKN
, UNKN
, UNKN
, UNKN
, UNKN
, UNKN
,
118 UNKN
, UNKN
, UNKN
, UNKN
, UNKN
, UNKN
, UNKN
, UNKN
,
119 SPACE
, EXCLA
, DQUOTE
, HASH
, DOLLAR
, PERCNT
, AMPER
, SQUOTE
,
120 LPARN
, RPARN
, TIMES
, PLUS
, COMMA
, MINUS
, PERIOD
, DIVIDE
,
121 DIGIT
, DIGIT
, DIGIT
, DIGIT
, DIGIT
, DIGIT
, DIGIT
, DIGIT
,
122 DIGIT
, DIGIT
, COLON
, SEMI
, LESS
, EQUAL
, GREAT
, QUEST
,
123 AT
, LETTER
, LETTER
, LETTER
, LETTER
, LETTER
, LETTER
, LETTER
,
124 LETTER
, LETTER
, LETTER
, LETTER
, LETTER
, LETTER
, LETTER
, LETTER
,
125 LETTER
, LETTER
, LETTER
, LETTER
, LETTER
, LETTER
, LETTER
, LETTER
,
126 LETTER
, LETTER
, LETTER
, LBRACK
, BACKSL
, RBRACK
, UPARR
, UNDER
,
127 ACCENT
, LETTER
, LETTER
, LETTER
, LETTER
, LETTER
, LETTER
, LETTER
,
128 LETTER
, LETTER
, LETTER
, LETTER
, LETTER
, LETTER
, LETTER
, LETTER
,
129 LETTER
, LETTER
, LETTER
, LETTER
, LETTER
, LETTER
, LETTER
, LETTER
,
130 LETTER
, LETTER
, LETTER
, LBRACE
, BAR
, RBRACE
, TILDE
, UNKN
136 struct hshentry
* nexthsh
; /*pointer to next hashtable-entry, set by lookup*/
138 enum tokens nexttok
; /*next token, set by nextlex */
140 int hshenter
; /*if true, next suitable lexeme will be entered */
141 /*into the symbol table. Handle with care. */
142 int nextc
; /*next input character, initialized by Lexinit */
144 int eof
; /*end-of-file indicator, set to >0 on end of file*/
145 int line
; /*current line-number of input */
146 int nerror
; /*counter for errors */
147 int nwarn
; /*counter for warnings */
148 char * cmdid
; /*command identification for error messages */
149 int quietflag
; /*indicates quiet mode */
150 FILE * finptr
; /*input file descriptor */
152 FILE * frewrite
; /*file descriptor for echoing input */
154 int rewriteflag
;/*indicates whether to echo to frewrite */
156 char StringTab
[strtsize
]; /* string table and heap */
158 char * NextString
; /*pointer to next identifier in StringTab*/
159 char * Topchar
; /*pointer to next free byte in StringTab*/
160 /*set by nextlex, lookup */
161 struct hshentry hshtab
[hshsize
]; /*hashtable */
169 /* Function: Looks up the character string pointed to by NextString in the
170 * hashtable. If the string is not present, a new entry for it is created.
171 * If the string is present, TopChar is moved back to save the space for
172 * the string, and NextString is set to point to the original string.
173 * In any case, the address of the corresponding hashtable entry is placed
175 * Algorithm: Quadratic hash, covering all entries.
176 * Assumptions: NextString points at the first character of the string.
177 * Topchar points at the first empty byte after the string.
180 register int ihash
; /* index into hashtable */
181 register char * sp
, * np
;
182 int c
, delta
, final
, FirstScan
; /*loop control*/
184 /* calculate hash code */
187 while (*sp
) ihash
+= *sp
++;
189 /* set up first search loop (c=0,step=1,until (hshsiz-1)/2 */
190 c
=0;delta
=1;final
=(hshsize
-1)/2;
191 FirstScan
=true; /*first loop */
194 ihash
= (ihash
+c
)%hshsize
; /*next index*/
196 if (hshtab
[ihash
].num
== nil
) {
198 hshtab
[ihash
].num
= NextString
;
199 nexthsh
= &hshtab
[ihash
];/*save hashtable address*/
201 VOID
printf("\nEntered: %s at %d ",nexthsh
->num
, ihash
);
205 /* compare strings */
206 sp
=NextString
;np
=hshtab
[ihash
].num
;
207 while (*sp
== *np
++) {
210 nexthsh
= &hshtab
[ihash
];
211 Topchar
= NextString
;
212 NextString
= nexthsh
->num
;
217 /* neither empty slot nor string found */
218 /* calculate next index and repeat */
223 /*set up second sweep*/
224 delta
= -1; final
= 1; FirstScan
= false;
226 fatserror("Hashtable overflow");
238 /* Function: Initialization of lexical analyzer:
239 * initializes the hastable,
240 * initializes nextc, nexttok if finptr != NULL
244 for (c
=hshsize
-1; c
>=0; c
--) {
248 hshenter
=true; eof
=0; line
=1; nerror
=0; nwarn
=0;
249 NextString
=nil
; Topchar
= &StringTab
[0];
251 nextc
= GETC(finptr
,frewrite
,rewriteflag
); /*initial character*/
252 nextlex(); /*initial token*/
267 /* Function: Reads the next token and sets nexttok to the next token code.
268 * Only if the hshenter==true, a revision number is entered into the
269 * hashtable and a pointer to it is placed into nexthsh.
270 * This is useful for avoiding that dates are placed into the hashtable.
271 * For ID's and NUM's, NextString is set to the character string in the
272 * string table. Assumption: nextc contains the next character.
275 register FILE * fin
, * frew
;
277 register enum tokens d
;
283 fin
=finptr
; frew
=frewrite
;
285 switch(nexttok
=ctab
[nextc
]) {
290 serror("unknown Character: %c",nextc
);
291 nextc
=GETC(fin
,frew
,rewriteflag
);
299 /* Note: falls into next case */
302 nextc
=GETC(fin
,frew
,rewriteflag
);
311 NextString
= sp
= Topchar
;
313 while ((d
=ctab
[c
=GETC(fin
,frew
,rewriteflag
)])==DIGIT
||
315 *sp
++ = c
; /* 1.2. and 1.2 are different */
318 if (sp
>= StringTab
+strtsize
) {
319 /*may have written outside stringtable already*/
320 fatserror("Stringtable overflow");
324 if (hshenter
== true)
325 lookup(); /* lookup updates NextString, Topchar*/
331 NextString
= sp
= Topchar
;
333 while ((d
=ctab
[c
=GETC(fin
,frew
,rewriteflag
)])==LETTER
||
334 d
==DIGIT
|| d
==IDCHAR
) {
338 if (sp
>= StringTab
+strtsize
) {
339 /*may have written outside stringtable already*/
340 fatserror("Stringtable overflow");
344 nexttok
= ID
; /* may be ID or keyword */
347 case SBEGIN
: /* long string */
349 /* note: only the initial SBEGIN has been read*/
350 /* read the string, and reset nextc afterwards*/
354 nextc
=GETC(fin
,frew
,rewriteflag
);
362 /* Function: Checks if nexttok is the same as token. If so,
363 * advances the input by calling nextlex and returns true.
364 * otherwise returns false.
365 * Doesn't work for strings and keywords; loses the character string for ids.
368 if (nexttok
==token
) {
371 } else return(false);
376 /* Function: If the current token is a keyword identical to key,
377 * getkey advances the input by calling nextlex and returns true;
378 * otherwise returns false.
381 register char *s1
,*s2
;
384 s1
=key
; s2
=NextString
;
388 Topchar
= NextString
; /*reset Topchar */
399 /* Function: Checks if nexttok is an identifier. If so,
400 * advances the input by calling nextlex and returns a pointer
401 * to the identifier; otherwise returns nil.
402 * Treats keywords as identifiers.
405 register char * name
;
414 struct hshentry
* getnum()
415 /* Function: Checks if nexttok is a number. If so,
416 * advances the input by calling nextlex and returns a pointer
417 * to the hashtable entry. Otherwise returns nil.
418 * Doesn't work if hshenter is false.
421 register struct hshentry
* num
;
431 /* skip over characters until terminating single SDELIM */
432 /* if rewriteflag==true, copy every character read to frewrite.*/
433 /* Does not advance nextlex at the end. */
435 register FILE * fin
, * frew
;
436 fin
=finptr
; frew
=frewrite
;
438 /* copy string verbatim to frewrite */
439 while ((c
=getc(fin
)) != EOF
) {
442 if ((c
=getc(fin
)) == EOF
|| putc(c
,frew
) != SDELIM
) {
451 while ((c
=getc(fin
)) != EOF
) {
453 if ((c
=getc(fin
)) != SDELIM
) {
462 error("Unterminated string");
467 /* Function: copy a string to stdout, until terminated with a single SDELIM.
468 * Does not advance nextlex at the end.
474 while ((c
=getc(fin
)) != EOF
) {
476 if ((c
=getc(fin
)) != SDELIM
) {
485 error("Unterminated string");
490 savestring(target
,length
)
491 char * target
; int length
;
492 /* copies a string terminated with SDELIM from file finptr to buffer target,
493 * but not more than length bytes. If the string is longer than length,
494 * the extra characters are skipped. The string may be empty, in which
495 * case a '\0' is placed into target.
496 * Double SDELIM is replaced with SDELIM.
497 * If rewriteflag==true, the string is also copied unchanged to frewrite.
498 * Returns the length of the saved string.
499 * Does not advance nextlex at the end.
503 register FILE * fin
, * frew
;
504 register char * tp
, * max
;
506 fin
=finptr
; frew
=frewrite
;
507 tp
=target
; max
= target
+length
; /*max is one too large*/
508 while ((c
=GETC(fin
,frew
,rewriteflag
))!=EOF
) {
511 if ((c
=GETC(fin
,frew
,rewriteflag
))!=SDELIM
) {
520 error("string buffer overflow -- truncating string");
521 target
[length
-1]='\0';
522 /* skip rest of string */
523 while ((c
=GETC(fin
,frew
,rewriteflag
))!=EOF
) {
524 if ((c
==SDELIM
) && ((c
=GETC(fin
,frew
,rewriteflag
))!=SDELIM
)) {
531 error("Can't find %c to terminate string before end of file",SDELIM
);
536 error("Can't find %c to terminate string before end of file",SDELIM
);
540 char *checkid(id
, delim
)
542 /* Function: check whether the string starting at id is an */
543 /* identifier and return a pointer to the last char*/
544 /* of the identifer. White space, delim and '\0' */
545 /* are legal delimeters. Aborts the program if not */
546 /* a legal identifier. Useful for checking commands*/
548 register enum tokens d
;
553 if ( ctab
[*id
] == LETTER
) {
554 while( (d
=ctab
[c
=(*++id
)]) == LETTER
|| d
==DIGIT
|| d
==IDCHAR
) ;
555 if ( c
!=' ' && c
!='\t' && c
!='\n' && c
!='\0' && c
!=delim
) {
556 /* append \0 to end of id before error message */
558 while( (c
=(*++id
))!=' ' && c
!='\t' && c
!='\n' && c
!='\0' && c
!=delim
) ;
560 faterror("Invalid character %c in identifier %s",tc
,temp
);
565 /* append \0 to end of id before error message */
566 while( (c
=(*++id
))!=' ' && c
!='\t' && c
!='\n' && c
!='\0' && c
!=delim
) ;
568 faterror("Identifier %s does not start with letter",temp
);
579 faterror("write error");
585 if (putc('\n',iop
)==EOF
|| fflush(iop
)==EOF
)
591 serror(e
,e1
,e2
,e3
,e4
,e5
)
592 char * e
, * e1
, * e2
, * e3
, * e4
, * e5
;
593 /* non-fatal syntax error */
595 VOID
fprintf(stderr
,"%s error, line %d: ", cmdid
, line
);
596 VOID
fprintf(stderr
,e
, e1
, e2
, e3
, e4
, e5
);
601 error(e
,e1
,e2
,e3
,e4
,e5
)
602 char * e
, * e1
, * e2
, * e3
, * e4
, * e5
;
603 /* non-fatal error */
605 VOID
fprintf(stderr
,"%s error: ",cmdid
);
606 VOID
fprintf(stderr
,e
, e1
, e2
, e3
, e4
, e5
);
611 fatserror(e
,e1
,e2
,e3
,e4
,e5
)
612 char * e
, * e1
, * e2
, * e3
, * e4
, * e5
;
613 /* fatal syntax error */
615 VOID
fprintf(stderr
,"%s error, line %d: ", cmdid
,line
);
616 VOID
fprintf(stderr
,e
, e1
, e2
, e3
, e4
, e5
);
617 VOID
fprintf(stderr
,"\n%s aborted\n",cmdid
);
623 faterror(e
,e1
,e2
,e3
,e4
,e5
)
624 char * e
, * e1
, * e2
, * e3
, * e4
, * e5
;
625 /* fatal error, terminates program after cleanup */
627 VOID
fprintf(stderr
,"%s error: ",cmdid
);
628 VOID
fprintf(stderr
,e
, e1
, e2
, e3
, e4
, e5
);
629 VOID
fprintf(stderr
,"\n%s aborted\n",cmdid
);
635 warn(e
,e1
,e2
,e3
,e4
,e5
)
636 char * e
, * e1
, * e2
, * e3
, * e4
, * e5
;
637 /* prints a warning message */
639 VOID
fprintf(stderr
,"%s warning: ",cmdid
);
640 VOID
fprintf(stderr
,e
, e1
, e2
, e3
, e4
, e5
);
646 diagnose(e
,e1
,e2
,e3
,e4
,e5
)
647 char * e
, * e1
, * e2
, * e3
, * e4
, * e5
;
648 /* prints a diagnostic message */
651 VOID
fprintf(stderr
,e
, e1
, e2
, e3
, e4
, e5
);
665 if ((result
= __sputc(c
, fp
)) == EOF
)
671 unsigned c
; register FILE * iop
;
672 /* Function: Flush iop.
673 * Same routine as _flsbuf in stdio, but aborts program on error.
676 if ((result
=_flsbuf(c
,iop
))==EOF
)
689 VOID
fprintf(iop
, "%s", s
);
696 /* Function: Put string s on file iop, abort on error.
697 * Same as puts in stdio, but with different putc macro.
712 fprintf(FILE *iop
, const char *fmt
, ...)
714 fprintf(iop
, fmt
, va_alist
)
719 /* Function: formatted output. Same as fprintf in stdio,
720 * but aborts program on error
732 VOID
vfprintf(iop
, fmt
, ap
);
734 _doprnt(fmt
, ap
, iop
);
747 /* test program reading a stream of lexems and printing the tokens.
753 int argc
; char * argv
[];
757 VOID
fputs("No input file\n",stderr
);
760 if ((finptr
=fopen(argv
[1], "r")) == NULL
) {
761 faterror("Can't open input file %s\n",argv
[1]);
765 while (nexttok
!= EOFILE
) {
769 VOID
printf("ID: %s",NextString
);
774 VOID
printf("NUM: %s, index: %d",nexthsh
->num
, nexthsh
-hshtab
);
776 VOID
printf("NUM, unentered: %s",NextString
);
777 hshenter
= !hshenter
; /*alternate between dates and numbers*/
781 VOID
printf("COLON"); break;
784 VOID
printf("SEMI"); break;
788 VOID
printf("STRING"); break;
791 VOID
printf("UNKN"); break;
794 VOID
printf("DEFAULT"); break;
799 VOID
printf("\nEnd of lexical analyzer test\n");