4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
22 * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
23 * Use is subject to license terms.
25 * esclex.c -- lexer for esc
27 * this module provides lexical analysis and error handling routine
28 * expected by the yacc-generated parser (i.e. yylex() and yyerror()).
29 * it also does lots of tracking of things like filenames, line numbers,
30 * and what tokens are seen on a line up to the point where a syntax error
31 * was found. this module also arranges for the input source files to
54 /* ridiculously long token buffer -- disallow any token longer than this */
56 static char Tok
[MAXTOK
];
58 /* some misc stats we keep on the lexer & parser */
59 static struct stats
*Tokcount
;
60 static struct stats
*Lexelapse
;
61 struct stats
*Filecount
;
63 struct filestats
*next
;
65 struct stats
*idstats
;
70 /* input file state */
72 static const char *Fileopened
;
75 static const char *File
;
76 static const char *Cpp
= "/opt/gcc/4.4.4/bin/cpp";
78 static const char *Cppargs
;
79 static const char *Cppstdargs
= "-undef -P -I.";
83 static int Lexecho
; /* echo tokens as we read them */
85 /* forward declarations of our internal routines */
86 static int record(int tok
, const char *s
);
87 static void dumpline(int flags
);
88 static void doident();
89 static void dopragma(const char *tok
);
92 * table of reserved words. this table is only used by lex_init()
93 * to intialize the Rwords lookup table.
102 { "engine", ENGINE
},
108 { "config", CONFIG
},
110 * PATHFUNC indicates functions that operate only on paths
113 { "is_connected", PATHFUNC
},
114 { "is_under", PATHFUNC
},
115 { "is_on", PATHFUNC
},
116 { "is_present", PATHFUNC
},
117 { "is_type", PATHFUNC
},
118 { "has_fault", PATHFUNC
},
119 { "confprop", PATHFUNC
},
120 { "confprop_defined", PATHFUNC
},
124 * Rwordslut is a lookup table of reserved words. lhs is the word
125 * (in the string table) and the rhs is the token value returned
126 * by the yylex() for that word.
128 static struct lut
*Rwordslut
;
130 static const struct {
132 const unsigned long long nsec
;
134 { "nanosecond", 1ULL },
135 { "nanoseconds", 1ULL },
139 { "microsecond", 1000ULL },
140 { "microseconds", 1000ULL },
142 { "usecs", 1000ULL },
144 { "millisecond", 1000000ULL },
145 { "milliseconds", 1000000ULL },
146 { "msec", 1000000ULL },
147 { "msecs", 1000000ULL },
148 { "ms", 1000000ULL },
149 { "second", 1000000000ULL },
150 { "seconds", 1000000000ULL },
151 { "s", 1000000000ULL },
152 { "minute", 1000000000ULL * 60 },
153 { "minutes", 1000000000ULL * 60 },
154 { "min", 1000000000ULL * 60 },
155 { "mins", 1000000000ULL * 60 },
156 { "m", 1000000000ULL * 60 },
157 { "hour", 1000000000ULL * 60 * 60 },
158 { "hours", 1000000000ULL * 60 * 60 },
159 { "hr", 1000000000ULL * 60 * 60 },
160 { "hrs", 1000000000ULL * 60 * 60 },
161 { "h", 1000000000ULL * 60 * 60 },
162 { "day", 1000000000ULL * 60 * 60 * 24 },
163 { "days", 1000000000ULL * 60 * 60 * 24 },
164 { "d", 1000000000ULL * 60 * 60 * 24 },
165 { "week", 1000000000ULL * 60 * 60 * 24 * 7 },
166 { "weeks", 1000000000ULL * 60 * 60 * 24 * 7 },
167 { "wk", 1000000000ULL * 60 * 60 * 24 * 7 },
168 { "wks", 1000000000ULL * 60 * 60 * 24 * 7 },
169 { "month", 1000000000ULL * 60 * 60 * 24 * 30 },
170 { "months", 1000000000ULL * 60 * 60 * 24 * 30 },
171 { "year", 1000000000ULL * 60 * 60 * 24 * 365 },
172 { "years", 1000000000ULL * 60 * 60 * 24 * 365 },
173 { "yr", 1000000000ULL * 60 * 60 * 24 * 365 },
174 { "yrs", 1000000000ULL * 60 * 60 * 24 * 365 },
178 * some wrappers around the general lut functions to provide type checking...
182 lex_s2i_lut_add(struct lut
*root
, const char *s
, intptr_t i
)
184 return (lut_add(root
, (void *)s
, (void *)i
, NULL
));
188 lex_s2i_lut_lookup(struct lut
*root
, const char *s
)
190 return ((intptr_t)lut_lookup(root
, (void *)s
, NULL
));
194 lex_s2ullp_lut_add(struct lut
*root
, const char *s
,
195 const unsigned long long *ullp
)
197 return (lut_add(root
, (void *)s
, (void *)ullp
, NULL
));
200 const unsigned long long *
201 lex_s2ullp_lut_lookup(struct lut
*root
, const char *s
)
203 return ((unsigned long long *)lut_lookup(root
, (void *)s
, NULL
));
207 * lex_init -- initialize the lexer with appropriate filenames & debug flags
212 lex_init(char **av
, const char *cppargs
, int lexecho
)
220 Tokcount
= stats_new_counter("lex.tokens", "total tokens in", 1);
221 Filecount
= stats_new_counter("lex.files", "total files read", 0);
222 Lexelapse
= stats_new_elapse("lex.time", "elapsed lex/parse time", 1);
227 /* allow user to tell us where cpp is if it is some weird place */
228 if (ptr
= getenv("_ESC_CPP"))
231 /* and in case it takes some special stdargs */
232 if (ptr
= getenv("_ESC_CPP_STDARGS"))
235 /* verify we can find cpp */
236 if (access(Cpp
, X_OK
) < 0) {
237 Cpp
= "/opt/gcc/4.4.4/bin/cpp";
238 if (access(Cpp
, X_OK
) < 0)
239 out(O_DIE
, "can't locate cpp");
245 /* verify we can find all the input files */
247 if (strlen(*av
) >= MAXTOK
- strlen(Cpp
) - 3)
248 out(O_DIE
, "filename too long: %.100s...", *av
);
249 if (access(*av
, R_OK
) < 0)
250 out(O_DIE
|O_SYS
, "%s", *av
);
252 stats_counter_bump(Filecount
);
255 /* put reserved words into the string table & a lookup table */
256 for (i
= 0; i
< sizeof (Rwords
) / sizeof (*Rwords
); i
++)
257 Rwordslut
= lex_s2i_lut_add(Rwordslut
,
258 stable(Rwords
[i
].word
), Rwords
[i
].val
);
260 /* initialize table of timeval suffixes */
261 for (i
= 0; i
< sizeof (Timesuffix
) / sizeof (*Timesuffix
); i
++) {
262 Timesuffixlut
= lex_s2ullp_lut_add(Timesuffixlut
,
263 stable(Timesuffix
[i
].suffix
), &Timesuffix
[i
].nsec
);
266 /* record start time */
267 stats_elapse_start(Lexelapse
);
276 out(O_DIE
, "cpp errors while reading \"%s\", "
277 "bailing out.", Fileopened
);
286 * yylex -- the lexer, called yylex() because that's what yacc wants
295 char *eptr
= &Tok
[MAXTOK
];
299 static int bol
= 1; /* true if we're at beginning of line */
306 return (record(EOF
, NULL
));
307 Fileopened
= stable(*Files
++);
309 sprintf(Tok
, "%s %s %s %s",
310 Cpp
, Cppstdargs
, Cppargs
, Fileopened
);
311 if ((Fp
= popen(Tok
, "r")) == NULL
)
312 out(O_DIE
|O_SYS
, "%s", Tok
);
314 Fp
= eftread_fopen(Fileopened
, ibuf
, sizeof (ibuf
));
319 /* add name to stats for visibility */
323 struct filestats
*nfs
= MALLOC(sizeof (*nfs
));
325 (void) sprintf(nbuf
, "lex.file%d", fnum
);
326 nfs
->stats
= stats_new_string(nbuf
, "", 0);
327 stats_string_set(nfs
->stats
, Fileopened
);
329 if (ibuf
[0] != '\0') {
330 (void) sprintf(nbuf
, "lex.file%d-ident",
333 stats_new_string(nbuf
, "", 0);
334 stats_string_set(nfs
->idstats
, ibuf
);
345 switch (c
= getc(Fp
)) {
347 /* enforce that we're at beginning of line */
349 return (record(c
, NULL
));
351 while ((c
= getc(Fp
)) != EOF
&&
352 (c
== ' ' || c
== '\t'))
359 * #something-we-don't-understand
360 * anything we don't expect we just ignore.
363 while ((c
= getc(Fp
)) != EOF
&& isalnum(c
))
367 if (strcmp(Tok
, "pragma") == 0) {
368 /* skip white space */
369 while ((c
= getc(Fp
)) != EOF
&&
370 (c
== ' ' || c
== '\t'))
373 if (c
== EOF
|| c
== '\n')
374 outfl(O_DIE
, File
, Line
,
377 /* pull in next token */
380 while ((c
= getc(Fp
)) != EOF
&&
385 (void) ungetc(c
, Fp
);
388 } else if (strcmp(Tok
, "ident") == 0)
391 /* handle file & line info from cpp */
396 Line
= Line
* 10 + c
- '0';
397 } while ((c
= getc(Fp
)) != EOF
);
398 Line
--; /* newline will increment it */
399 while (c
!= EOF
&& isspace(c
))
402 outfl(O_DIE
, File
, Line
,
403 "bad # statement (file name)");
404 while ((c
= getc(Fp
)) != EOF
&& c
!= '"')
409 outfl(O_DIE
, File
, Line
,
410 "bad # statement (quotes)");
413 /* skip the rest of the cpp line */
414 while ((c
= getc(Fp
)) != EOF
&& c
!= '\n' && c
!= '\r')
417 return (record(c
, NULL
));
419 (void) ungetc(c
, Fp
);
440 /* comment handling */
441 if ((nextc
= getc(Fp
)) == EOF
)
442 outfl(O_DIE
, File
, Line
, "unexpected EOF");
443 else if (nextc
== '*') {
445 while ((c
= getc(Fp
)) != EOF
) {
449 (((c
= getc(Fp
)) == EOF
) ||
454 outfl(O_DIE
, File
, Line
,
455 "end of comment not seen "
456 "(started on line %d)",
460 /* wasn't a comment, return the '/' token */
461 (void) ungetc(nextc
, Fp
);
462 return (record(c
, NULL
));
471 /* quoted string handling */
476 outfl(O_DIE
, File
, Line
,
477 "end of string not seen "
478 "(started on line %d)",
482 else if (c
== '"' && prevc
!= '\\')
489 out(O_DIE
, File
, Line
, "string too long");
491 return (record(QUOTE
, stable(Tok
)));
496 if ((nextc
= getc(Fp
)) == '&')
497 return (record(AND
, NULL
));
499 (void) ungetc(nextc
, Fp
);
500 return (record(c
, NULL
));
508 if ((nextc
= getc(Fp
)) == '|')
509 return (record(OR
, NULL
));
511 (void) ungetc(nextc
, Fp
);
512 return (record(c
, NULL
));
520 if ((nextc
= getc(Fp
)) == '=')
521 return (record(NE
, NULL
));
523 (void) ungetc(nextc
, Fp
);
524 return (record(c
, NULL
));
532 if ((nextc
= getc(Fp
)) == '=')
533 return (record(EQ
, NULL
));
535 (void) ungetc(nextc
, Fp
);
536 return (record(c
, NULL
));
544 if ((nextc
= getc(Fp
)) == '>')
545 return (record(ARROW
, stable(Tok
)));
547 (void) ungetc(nextc
, Fp
);
548 return (record(c
, NULL
));
555 if ((nextc
= getc(Fp
)) == '=')
557 return (record(LE
, NULL
));
558 else if (nextc
== '<')
560 return (record(LSHIFT
, NULL
));
562 (void) ungetc(nextc
, Fp
);
563 return (record(c
, NULL
));
570 if ((nextc
= getc(Fp
)) == '=')
572 return (record(GE
, NULL
));
573 else if (nextc
== '>')
575 return (record(RSHIFT
, NULL
));
577 (void) ungetc(nextc
, Fp
);
578 return (record(c
, NULL
));
588 /* collect rest of number */
591 if ((c
= getc(Fp
)) == EOF
) {
593 return (record(NUMBER
,
595 } else if (c
== 'x' || c
== 'X') {
599 (void) ungetc(c
, Fp
);
606 while ((c
= getc(Fp
)) != EOF
) {
608 out(O_DIE
, File
, Line
,
613 if (c
>= 'a' && c
<= 'f' ||
614 c
>= 'A' && c
<= 'F') {
620 if (c
>= '8' && c
<= '9') {
626 if (c
>= '0' && c
<= '7') {
630 /* not valid for this base */
632 (void) ungetc(c
, Fp
);
633 return (record(NUMBER
,
638 return (record(NUMBER
, stable(Tok
)));
639 } else if (isalpha(c
)) {
640 /* collect identifier */
644 if ((isalnum(c
) || c
== '_') &&
648 (void) ungetc(c
, Fp
);
653 out(O_DIE
, File
, Line
,
654 "identifier too long");
657 if (val
= lex_s2i_lut_lookup(Rwordslut
, cptr
)) {
658 return (record(val
, cptr
));
660 return (record(ID
, cptr
));
662 return (record(c
, NULL
));
669 * the record()/dumpline() routines are used to track & report
670 * the list of tokens seen on a given line. this is used in two ways.
671 * first, syntax errors found by the parser are reported by us (via
672 * yyerror()) and we tack on the tokens processed so far on the current
673 * line to help indicate exactly where the error is. second, if "lexecho"
674 * debugging is turned on, these routines provide it.
676 #define MAXRECORD 1000
677 static int Recordedline
;
681 } Recorded
[MAXRECORD
];
682 static int Recordnext
;
685 record(int tok
, const char *s
)
687 stats_counter_bump(Tokcount
);
688 if (Line
!= Recordedline
) {
689 /* starting new line, dump out the previous line */
690 if (Lexecho
&& Recordedline
) {
691 outfl(O_NONL
, File
, Recordedline
, "lex: ");
697 if (Recordnext
>= MAXRECORD
)
698 outfl(O_DIE
, File
, Line
, "line too long, bailing out");
699 Recorded
[Recordnext
].tok
= tok
;
700 Recorded
[Recordnext
++].s
= s
;
703 yylval
.tok
.file
= File
;
704 yylval
.tok
.line
= Line
;
714 for (i
= 0; i
< Recordnext
; i
++)
715 if (Recorded
[i
].s
&& Recorded
[i
].tok
!= ARROW
)
716 switch (Recorded
[i
].tok
) {
718 out(flags
|O_NONL
, " \"%s\"",
723 out(flags
|O_NONL
, " %s",
728 switch (Recorded
[i
].tok
) {
730 out(flags
|O_NONL
, " EOF");
733 out(flags
|O_NONL
, " ->%s",
737 out(flags
|O_NONL
, " ==");
740 out(flags
|O_NONL
, " !=");
743 out(flags
|O_NONL
, " ||");
746 out(flags
|O_NONL
, " &&");
749 out(flags
|O_NONL
, " <=");
752 out(flags
|O_NONL
, " >=");
755 out(flags
|O_NONL
, " <<");
758 out(flags
|O_NONL
, " >>");
761 if (isprint(Recorded
[i
].tok
))
762 out(flags
|O_NONL
, " %c",
765 out(flags
|O_NONL
, " '\\%03o'",
773 * yyerror -- report a pareser error, called yyerror because yacc wants it
777 yyerror(const char *s
)
780 outfl(O_ERR
|O_NONL
, File
, Line
, "%s, tokens: ", s
);
785 * doident -- handle "#pragma ident" directives
792 char *eptr
= &Tok
[MAXTOK
];
794 /* skip white space and quotes */
795 while ((c
= getc(Fp
)) != EOF
&&
796 (c
== ' ' || c
== '\t' || c
== '"'))
799 if (c
== EOF
|| c
== '\n')
800 outfl(O_DIE
, File
, Line
, "bad ident");
802 /* pull in next token */
805 while ((c
= getc(Fp
)) != EOF
&& c
!= '"' && c
!= '\n')
810 /* skip to end of line (including close quote, if any) */
811 while ((c
= getc(Fp
)) != EOF
&& c
!= '\n')
814 (void) ungetc(c
, Fp
);
815 Ident
= lut_add(Ident
, (void *)stable(Tok
), NULL
, NULL
);
817 outfl(O_VERB
, File
, Line
, "pragma set: ident \"%s\"", Tok
);
821 * dodictionary -- handle "#pragma dictionary" directives
828 char *eptr
= &Tok
[MAXTOK
];
830 /* skip white space and quotes */
831 while ((c
= getc(Fp
)) != EOF
&&
832 (c
== ' ' || c
== '\t' || c
== '"'))
835 if (c
== EOF
|| c
== '\n')
836 outfl(O_DIE
, File
, Line
, "bad dictionary");
838 /* pull in next token */
841 while ((c
= getc(Fp
)) != EOF
&& c
!= '"' && c
!= '\n')
846 /* skip to end of line (including close quote, if any) */
847 while ((c
= getc(Fp
)) != EOF
&& c
!= '\n')
850 (void) ungetc(c
, Fp
);
851 Dicts
= lut_add(Dicts
, (void *)stable(Tok
), NULL
, NULL
);
853 outfl(O_VERB
, File
, Line
, "pragma set: dictionary \"%s\"", Tok
);
857 * doallow_cycles -- handle "#pragma allow_cycles" directives
864 char *eptr
= &Tok
[MAXTOK
];
865 unsigned long long newlevel
;
868 * by default the compiler does not allow cycles or loops
869 * in propagations. when cycles are encountered, the
870 * compiler prints out an error message.
872 * "#pragma allow_cycles" and
873 * "#pragma allow_cycles 0"
874 * allow cycles, but any such cycle will produce a warning
877 * "#pragma allow_cycles N"
878 * with N > 0 will allow cycles and not produce any
882 /* skip white space and quotes */
883 while ((c
= getc(Fp
)) != EOF
&&
884 (c
== ' ' || c
== '\t' || c
== '"'))
887 if (c
== EOF
|| c
== '\n')
891 /* pull in next token */
894 while ((c
= getc(Fp
)) != EOF
&& c
!= '"' && c
!= '\n')
899 /* skip to end of line */
900 while ((c
= getc(Fp
)) != EOF
&& c
!= '\n')
903 newlevel
= strtoll(Tok
, NULL
, 0);
905 (void) ungetc(c
, Fp
);
907 (void) check_cycle_level(newlevel
);
908 outfl(O_VERB
, File
, Line
,
909 "pragma set: allow_cycles (%s)",
910 newlevel
? "no warnings" : "with warnings");
914 * dopragma -- handle #pragma directives
917 dopragma(const char *tok
)
919 if (strcmp(tok
, "ident") == 0)
921 else if (strcmp(tok
, "dictionary") == 0)
923 else if (strcmp(tok
, "new_errors_only") == 0) {
924 if (Pragma_new_errors_only
++ == 0)
925 outfl(O_VERB
, File
, Line
,
926 "pragma set: new_errors_only");
927 } else if (strcmp(tok
, "trust_ereports") == 0) {
928 if (Pragma_trust_ereports
++ == 0)
929 outfl(O_VERB
, File
, Line
,
930 "pragma set: trust_ereports");
931 } else if (strcmp(tok
, "allow_cycles") == 0)
934 outfl(O_VERB
, File
, Line
,
935 "unknown pragma ignored: \"%s\"", tok
);
939 * lex_fini -- finalize the lexer
945 stats_elapse_stop(Lexelapse
);
948 outfl(O_OK
, File
, Line
, "lex: ");
957 struct filestats
*nfstats
= Fstats
;
960 * Free up memory consumed by the lexer
962 stats_delete(Tokcount
);
963 stats_delete(Filecount
);
964 stats_delete(Lexelapse
);
965 while (nfstats
!= NULL
) {
966 Fstats
= nfstats
->next
;
967 stats_delete(nfstats
->stats
);
968 if (nfstats
->idstats
!= NULL
)
969 stats_delete(nfstats
->idstats
);
973 lut_free(Timesuffixlut
, NULL
, NULL
);
974 lut_free(Rwordslut
, NULL
, NULL
);
975 lut_free(Ident
, NULL
, NULL
);
976 lut_free(Dicts
, NULL
, NULL
);