4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
23 * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
24 * Use is subject to license terms.
33 * C-like lexical analysis.
35 * 1. Define a "struct node"
36 * 2. Define a "struct symbol" that encapsulates a struct node.
37 * 3. Define a "struct integer" that encapsulates a struct node.
38 * 4. Set the YACC stack type in the grammar:
40 * #define YYSTYPE struct node *
42 * 5. Define %token's in the grammer for IDENTIFIER, STRING and INTEGER.
43 * Using "_KW" as a suffix for keyword tokens, i.e. "struct" is
46 * %token INTEGER STRING IDENTIFIER
48 * %token STRUCT_KW CASE_KW
50 * %token PLUS MINUS ASSIGN ARROW
51 * // overloaded tokens (++ --, < > <= >=, == !=, += -= *= ...)
52 * %token INCOP RELOP EQUOP ASSOP
53 * 6. It's easiest to use the yacc(1) generated token numbers for node
54 * labels. For node labels that are not actually part of the grammer,
55 * use a %token with an L_ prefix:
56 * // node labels (can't be generated by lex)
57 * %token L_LT L_LTE L_GT L_GTE L_EQU L_NEQ
58 * 7. Call set_lex_input() before parsing.
64 #define isquote(c) ((c) == SQ || (c) == DQ)
65 #define iswhite(c) ((c) == ' ' || (c) == '\t' || (c) == '\n' || (c) == '\f')
67 #define is_between(c, l, u) ((l) <= (c) && (c) <= (u))
68 #define is_white(c) ((c) == ' ' || c == '\r' || c == '\t' || c == '\f')
69 #define is_lower(c) is_between((c), 'a', 'z')
70 #define is_upper(c) is_between((c), 'A', 'Z')
71 #define is_alpha(c) (is_lower(c) || is_upper(c))
72 #define is_digit(c) is_between((c), '0', '9')
73 #define is_sstart(c) (is_alpha(c) || (c) == '_')
74 #define is_sfollow(c) (is_sstart(c) || is_digit(c))
75 #define is_xdigit(c) \
76 (is_digit(c) || is_between((c), 'A', 'F') || is_between((c), 'a', 'f'))
78 ndr_symbol_t
*symbol_list
;
79 static ndr_integer_t
*integer_list
;
80 static FILE *lex_infp
;
81 static ndr_symbol_t
*file_name
;
85 static int lex_at_bol
;
87 /* In yacc(1) generated parser */
88 extern struct node
*yylval
;
91 * The keywtab[] and optable[] could be external to this lex
92 * and it would all still work.
94 static ndr_keyword_t keywtable
[] = {
95 { "struct", STRUCT_KW
, 0 },
96 { "union", UNION_KW
, 0 },
97 { "typedef", TYPEDEF_KW
, 0 },
99 { "interface", INTERFACE_KW
, 0 },
100 { "uuid", UUID_KW
, 0 },
101 { "_no_reorder", _NO_REORDER_KW
, 0 },
102 { "extern", EXTERN_KW
, 0 },
103 { "reference", REFERENCE_KW
, 0 },
105 { "align", ALIGN_KW
, 0 },
106 { "operation", OPERATION_KW
, 0 },
108 { "out", OUT_KW
, 0 },
110 { "string", STRING_KW
, 0 },
111 { "size_is", SIZE_IS_KW
, 0 },
112 { "length_is", LENGTH_IS_KW
, 0 },
114 { "switch_is", SWITCH_IS_KW
, 0 },
115 { "case", CASE_KW
, 0 },
116 { "default", DEFAULT_KW
, 0 },
118 { "transmit_as", TRANSMIT_AS_KW
, 0 },
119 { "arg_is", ARG_IS_KW
, 0 },
121 { "char", BASIC_TYPE
, 1 },
122 { "uchar", BASIC_TYPE
, 1 },
123 { "wchar", BASIC_TYPE
, 2 },
124 { "short", BASIC_TYPE
, 2 },
125 { "ushort", BASIC_TYPE
, 2 },
126 { "long", BASIC_TYPE
, 4 },
127 { "ulong", BASIC_TYPE
, 4 },
131 static ndr_keyword_t optable
[] = {
150 static int getch(FILE *fp
);
151 static ndr_integer_t
*int_enter(long);
152 static ndr_symbol_t
*sym_enter(char *);
153 static ndr_symbol_t
*sym_find(char *);
154 static int str_to_sv(char *, char *sv
[]);
157 * Enter the symbols for keyword.
160 keyw_tab_init(ndr_keyword_t kwtable
[])
166 for (i
= 0; kwtable
[i
].name
; i
++) {
169 sym
= sym_enter(kw
->name
);
175 set_lex_input(FILE *fp
, char *name
)
177 keyw_tab_init(keywtable
);
178 keyw_tab_init(optable
);
181 file_name
= sym_enter(name
);
216 * Handle preprocessor lines. This just notes
217 * which file we're processing.
219 if (c
== '#' && lex_at_bol
) {
223 while ((c
= getch(fp
)) != EOF
&& c
!= '\n')
227 /* note: no ungetc() of newline, we don't want to count it */
229 if (*lexeme
!= ' ') {
230 /* not a line we know */
234 sc
= str_to_sv(lexeme
, sv
);
238 file_name
= sym_enter(sv
[1]);
239 line_number
= atoi(sv
[0]); /* for next input line */
253 * Symbol? Might be a keyword or just an identifier
256 /* we got a symbol */
260 } while (is_sfollow(c
));
261 (void) ungetc(c
, fp
);
264 sym
= sym_enter(lexeme
);
266 yylval
= &sym
->s_node
;
269 return (sym
->kw
->token
);
279 /* we got a number */
283 if (c
== 'x' || c
== 'X') {
284 /* handle hex specially */
288 } while (is_xdigit(c
));
290 } else if (c
== 'b' || c
== 'B' ||
291 c
== 'd' || c
== 'D' ||
292 c
== 'o' || c
== 'O') {
296 } while (is_digit(c
));
299 (void) ungetc(c
, fp
);
301 /* could be anything */
303 while (is_digit(c
)) {
310 (void) ungetc(c
, fp
);
312 intg
= int_enter(strtol(lexeme
, 0, 0));
313 yylval
= &intg
->s_node
;
318 /* Could handle strings. We don't seem to need them yet */
320 yylval
= 0; /* operator tokens have no value */
321 xc
= getch(fp
); /* get look-ahead for two-char lexemes */
328 * Look for to-end-of-line comment
330 if (c
== '/' && xc
== '/') {
331 /* eat the comment */
332 while ((c
= getch(fp
)) != EOF
&& c
!= '\n')
334 (void) ungetc(c
, fp
); /* put back newline */
339 * Look for multi-line comment
341 if (c
== '/' && xc
== '*') {
342 /* eat the comment */
344 while ((c
= getch(fp
)) != EOF
) {
345 if (xc
== '*' && c
== '/') {
357 * Use symbol table lookup for two-character and
358 * one character operator tokens.
360 sym
= sym_find(lexeme
);
362 /* there better be a keyword attached */
363 yylval
= &sym
->s_node
;
364 return (sym
->kw
->token
);
367 /* Try a one-character form */
368 (void) ungetc(xc
, fp
);
370 sym
= sym_find(lexeme
);
372 /* there better be a keyword attached */
373 yylval
= &sym
->s_node
;
374 return (sym
->kw
->token
);
377 if (is_between(c
, ' ', '~'))
378 compile_error("unrecognized character: 0x%02x (%c)", c
, c
);
380 compile_error("unrecognized character: 0x%02x", c
);
384 static ndr_symbol_t
*
390 for (pp
= &symbol_list
; (p
= *pp
) != 0; pp
= &p
->next
) {
391 if (strcmp(p
->name
, name
) == 0)
398 static ndr_symbol_t
*
399 sym_enter(char *name
)
404 for (pp
= &symbol_list
; (p
= *pp
) != 0; pp
= &p
->next
) {
405 if (strcmp(p
->name
, name
) == 0)
409 p
= ndr_alloc(1, sizeof (ndr_symbol_t
));
411 if ((p
->name
= strdup(name
)) == NULL
)
412 fatal_error("%s", strerror(ENOMEM
));
414 p
->s_node
.label
= IDENTIFIER
;
422 static ndr_integer_t
*
423 int_enter(long value
)
428 for (pp
= &integer_list
; (p
= *pp
) != 0; pp
= &p
->next
) {
429 if (p
->value
== value
)
433 p
= ndr_alloc(1, sizeof (ndr_integer_t
));
436 p
->s_node
.label
= INTEGER
;
437 p
->s_node
.n_int
= value
;
445 ndr_alloc(size_t nelem
, size_t elsize
)
449 if ((p
= calloc(nelem
, elsize
)) == NULL
) {
450 fatal_error("%s", strerror(ENOMEM
));
458 * The input context (filename, line number) is maintained by the
459 * lexical analysis, and we generally want such info reported for
460 * errors in a consistent manner.
463 compile_error(const char *fmt
, ...)
469 (void) vsnprintf(buf
, NDLBUFSZ
, fmt
, ap
);
472 (void) fprintf(stderr
, "ndrgen: compile error: %s:%d: %s\n",
473 file_name
->name
, line_number
, buf
);
479 fatal_error(const char *fmt
, ...)
485 (void) vsnprintf(buf
, NDLBUFSZ
, fmt
, ap
);
488 (void) fprintf(stderr
, "ndrgen: fatal error: %s\n", buf
);
493 * Setup nodes for the lexical analyzer.
496 n_cons(int label
, ...)
501 np
= ndr_alloc(1, sizeof (ndr_node_t
));
505 np
->n_arg
[0] = va_arg(ap
, void *);
506 np
->n_arg
[1] = va_arg(ap
, void *);
507 np
->n_arg
[2] = va_arg(ap
, void *);
510 np
->line_number
= line_number
;
511 np
->file_name
= file_name
;
518 * | list item ={ n_splice($1, $2); }
522 n_splice(struct node
*np1
, struct node
*np2
)
531 * Convert a string of words to a vector of strings.
532 * Returns the number of words.
535 str_to_sv(char *buf
, char *sv
[])
559 while (((c
= *p
++) != 0) && (c
!= qc
))
563 } else if (iswhite(c
)) {
568 /* still inside word */