Merge remote-tracking branch 'origin/master'
[unleashed/lotheac.git] / usr / src / tools / ndrgen / ndr_lex.c
blob475551f25cad6c4105a7a739416f3b50354108a7
1 /*
2 * CDDL HEADER START
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
19 * CDDL HEADER END
23 * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
24 * Use is subject to license terms.
27 #include <errno.h>
28 #include <stdarg.h>
29 #include "ndrgen.h"
30 #include "y.tab.h"
33 * C-like lexical analysis.
35 * 1. Define a "struct node"
36 * 2. Define a "struct symbol" that encapsulates a struct node.
37 * 3. Define a "struct integer" that encapsulates a struct node.
38 * 4. Set the YACC stack type in the grammar:
39 * %{
40 * #define YYSTYPE struct node *
41 * %}
42 * 5. Define %token's in the grammer for IDENTIFIER, STRING and INTEGER.
43 * Using "_KW" as a suffix for keyword tokens, i.e. "struct" is
44 * "%token STRUCT_KW":
45 * // atomic values
46 * %token INTEGER STRING IDENTIFIER
47 * // keywords
48 * %token STRUCT_KW CASE_KW
49 * // operators
50 * %token PLUS MINUS ASSIGN ARROW
51 * // overloaded tokens (++ --, < > <= >=, == !=, += -= *= ...)
52 * %token INCOP RELOP EQUOP ASSOP
53 * 6. It's easiest to use the yacc(1) generated token numbers for node
54 * labels. For node labels that are not actually part of the grammer,
55 * use a %token with an L_ prefix:
56 * // node labels (can't be generated by lex)
57 * %token L_LT L_LTE L_GT L_GTE L_EQU L_NEQ
58 * 7. Call set_lex_input() before parsing.
61 #define SQ '\''
62 #define DQ '"'
64 #define isquote(c) ((c) == SQ || (c) == DQ)
65 #define iswhite(c) ((c) == ' ' || (c) == '\t' || (c) == '\n' || (c) == '\f')
67 #define is_between(c, l, u) ((l) <= (c) && (c) <= (u))
68 #define is_white(c) ((c) == ' ' || c == '\r' || c == '\t' || c == '\f')
69 #define is_lower(c) is_between((c), 'a', 'z')
70 #define is_upper(c) is_between((c), 'A', 'Z')
71 #define is_alpha(c) (is_lower(c) || is_upper(c))
72 #define is_digit(c) is_between((c), '0', '9')
73 #define is_sstart(c) (is_alpha(c) || (c) == '_')
74 #define is_sfollow(c) (is_sstart(c) || is_digit(c))
75 #define is_xdigit(c) \
76 (is_digit(c) || is_between((c), 'A', 'F') || is_between((c), 'a', 'f'))
78 ndr_symbol_t *symbol_list;
79 static ndr_integer_t *integer_list;
80 static FILE *lex_infp;
81 static ndr_symbol_t *file_name;
82 int line_number;
83 int n_compile_error;
85 static int lex_at_bol;
87 /* In yacc(1) generated parser */
88 extern struct node *yylval;
91 * The keywtab[] and optable[] could be external to this lex
92 * and it would all still work.
94 static ndr_keyword_t keywtable[] = {
95 { "struct", STRUCT_KW, 0 },
96 { "union", UNION_KW, 0 },
97 { "typedef", TYPEDEF_KW, 0 },
99 { "interface", INTERFACE_KW, 0 },
100 { "uuid", UUID_KW, 0 },
101 { "_no_reorder", _NO_REORDER_KW, 0 },
102 { "extern", EXTERN_KW, 0 },
103 { "reference", REFERENCE_KW, 0 },
105 { "align", ALIGN_KW, 0 },
106 { "operation", OPERATION_KW, 0 },
107 { "in", IN_KW, 0 },
108 { "out", OUT_KW, 0 },
110 { "string", STRING_KW, 0 },
111 { "size_is", SIZE_IS_KW, 0 },
112 { "length_is", LENGTH_IS_KW, 0 },
114 { "switch_is", SWITCH_IS_KW, 0 },
115 { "case", CASE_KW, 0 },
116 { "default", DEFAULT_KW, 0 },
118 { "transmit_as", TRANSMIT_AS_KW, 0 },
119 { "arg_is", ARG_IS_KW, 0 },
121 { "char", BASIC_TYPE, 1 },
122 { "uchar", BASIC_TYPE, 1 },
123 { "wchar", BASIC_TYPE, 2 },
124 { "short", BASIC_TYPE, 2 },
125 { "ushort", BASIC_TYPE, 2 },
126 { "long", BASIC_TYPE, 4 },
127 { "ulong", BASIC_TYPE, 4 },
131 static ndr_keyword_t optable[] = {
132 { "{", LC, 0 },
133 { "}", RC, 0 },
134 { "(", LP, 0 },
135 { ")", RP, 0 },
136 { "[", LB, 0 },
137 { "]", RB, 0 },
138 { "*", STAR, 0 },
139 { "/", DIV, 0 },
140 { "%", MOD, 0 },
141 { "-", MINUS, 0 },
142 { "+", PLUS, 0 },
143 { "&", AND, 0 },
144 { "|", OR, 0 },
145 { "^", XOR, 0 },
146 { ";", SEMI, 0 },
150 static int getch(FILE *fp);
151 static ndr_integer_t *int_enter(long);
152 static ndr_symbol_t *sym_enter(char *);
153 static ndr_symbol_t *sym_find(char *);
154 static int str_to_sv(char *, char *sv[]);
157 * Enter the symbols for keyword.
159 static void
160 keyw_tab_init(ndr_keyword_t kwtable[])
162 int i;
163 ndr_keyword_t *kw;
164 ndr_symbol_t *sym;
166 for (i = 0; kwtable[i].name; i++) {
167 kw = &kwtable[i];
169 sym = sym_enter(kw->name);
170 sym->kw = kw;
174 void
175 set_lex_input(FILE *fp, char *name)
177 keyw_tab_init(keywtable);
178 keyw_tab_init(optable);
180 lex_infp = fp;
181 file_name = sym_enter(name);
182 line_number = 1;
183 lex_at_bol = 1;
186 static int
187 getch(FILE *fp)
189 return (getc(fp));
193 yylex(void)
195 char lexeme[512];
196 char *p = lexeme;
197 FILE *fp = lex_infp;
198 int c, xc;
199 ndr_symbol_t *sym;
200 ndr_integer_t *intg;
202 top:
203 p = lexeme;
205 c = getch(fp);
206 if (c == EOF)
207 return (EOF);
209 if (c == '\n') {
210 line_number++;
211 lex_at_bol = 1;
212 goto top;
216 * Handle preprocessor lines. This just notes
217 * which file we're processing.
219 if (c == '#' && lex_at_bol) {
220 char *sv[10];
221 int sc;
223 while ((c = getch(fp)) != EOF && c != '\n')
224 *p++ = c;
226 *p = 0;
227 /* note: no ungetc() of newline, we don't want to count it */
229 if (*lexeme != ' ') {
230 /* not a line we know */
231 goto top;
234 sc = str_to_sv(lexeme, sv);
235 if (sc < 2)
236 goto top;
238 file_name = sym_enter(sv[1]);
239 line_number = atoi(sv[0]); /* for next input line */
240 lex_at_bol = 1;
241 goto top;
244 lex_at_bol = 0;
247 * Skip white space
249 if (is_white(c))
250 goto top;
253 * Symbol? Might be a keyword or just an identifier
255 if (is_sstart(c)) {
256 /* we got a symbol */
257 do {
258 *p++ = c;
259 c = getch(fp);
260 } while (is_sfollow(c));
261 (void) ungetc(c, fp);
262 *p = 0;
264 sym = sym_enter(lexeme);
266 yylval = &sym->s_node;
268 if (sym->kw) {
269 return (sym->kw->token);
270 } else {
271 return (IDENTIFIER);
276 * Integer constant?
278 if (is_digit(c)) {
279 /* we got a number */
280 *p++ = c;
281 if (c == '0') {
282 c = getch(fp);
283 if (c == 'x' || c == 'X') {
284 /* handle hex specially */
285 do {
286 *p++ = c;
287 c = getch(fp);
288 } while (is_xdigit(c));
289 goto convert_icon;
290 } else if (c == 'b' || c == 'B' ||
291 c == 'd' || c == 'D' ||
292 c == 'o' || c == 'O') {
293 do {
294 *p++ = c;
295 c = getch(fp);
296 } while (is_digit(c));
297 goto convert_icon;
299 (void) ungetc(c, fp);
301 /* could be anything */
302 c = getch(fp);
303 while (is_digit(c)) {
304 *p++ = c;
305 c = getch(fp);
308 convert_icon:
309 *p = 0;
310 (void) ungetc(c, fp);
312 intg = int_enter(strtol(lexeme, 0, 0));
313 yylval = &intg->s_node;
315 return (INTEGER);
318 /* Could handle strings. We don't seem to need them yet */
320 yylval = 0; /* operator tokens have no value */
321 xc = getch(fp); /* get look-ahead for two-char lexemes */
323 lexeme[0] = c;
324 lexeme[1] = xc;
325 lexeme[2] = 0;
328 * Look for to-end-of-line comment
330 if (c == '/' && xc == '/') {
331 /* eat the comment */
332 while ((c = getch(fp)) != EOF && c != '\n')
334 (void) ungetc(c, fp); /* put back newline */
335 goto top;
339 * Look for multi-line comment
341 if (c == '/' && xc == '*') {
342 /* eat the comment */
343 xc = -1;
344 while ((c = getch(fp)) != EOF) {
345 if (xc == '*' && c == '/') {
346 /* that's it */
347 break;
349 xc = c;
350 if (c == '\n')
351 line_number++;
353 goto top;
357 * Use symbol table lookup for two-character and
358 * one character operator tokens.
360 sym = sym_find(lexeme);
361 if (sym) {
362 /* there better be a keyword attached */
363 yylval = &sym->s_node;
364 return (sym->kw->token);
367 /* Try a one-character form */
368 (void) ungetc(xc, fp);
369 lexeme[1] = 0;
370 sym = sym_find(lexeme);
371 if (sym) {
372 /* there better be a keyword attached */
373 yylval = &sym->s_node;
374 return (sym->kw->token);
377 if (is_between(c, ' ', '~'))
378 compile_error("unrecognized character: 0x%02x (%c)", c, c);
379 else
380 compile_error("unrecognized character: 0x%02x", c);
381 goto top;
384 static ndr_symbol_t *
385 sym_find(char *name)
387 ndr_symbol_t **pp;
388 ndr_symbol_t *p;
390 for (pp = &symbol_list; (p = *pp) != 0; pp = &p->next) {
391 if (strcmp(p->name, name) == 0)
392 return (p);
395 return (0);
398 static ndr_symbol_t *
399 sym_enter(char *name)
401 ndr_symbol_t **pp;
402 ndr_symbol_t *p;
404 for (pp = &symbol_list; (p = *pp) != 0; pp = &p->next) {
405 if (strcmp(p->name, name) == 0)
406 return (p);
409 p = ndr_alloc(1, sizeof (ndr_symbol_t));
411 if ((p->name = strdup(name)) == NULL)
412 fatal_error("%s", strerror(ENOMEM));
414 p->s_node.label = IDENTIFIER;
415 p->s_node.n_sym = p;
417 *pp = p;
419 return (p);
422 static ndr_integer_t *
423 int_enter(long value)
425 ndr_integer_t **pp;
426 ndr_integer_t *p;
428 for (pp = &integer_list; (p = *pp) != 0; pp = &p->next) {
429 if (p->value == value)
430 return (p);
433 p = ndr_alloc(1, sizeof (ndr_integer_t));
435 p->value = value;
436 p->s_node.label = INTEGER;
437 p->s_node.n_int = value;
439 *pp = p;
441 return (p);
444 void *
445 ndr_alloc(size_t nelem, size_t elsize)
447 void *p;
449 if ((p = calloc(nelem, elsize)) == NULL) {
450 fatal_error("%s", strerror(ENOMEM));
451 /* NOTREACHED */
454 return (p);
458 * The input context (filename, line number) is maintained by the
459 * lexical analysis, and we generally want such info reported for
460 * errors in a consistent manner.
462 void
463 compile_error(const char *fmt, ...)
465 char buf[NDLBUFSZ];
466 va_list ap;
468 va_start(ap, fmt);
469 (void) vsnprintf(buf, NDLBUFSZ, fmt, ap);
470 va_end(ap);
472 (void) fprintf(stderr, "ndrgen: compile error: %s:%d: %s\n",
473 file_name->name, line_number, buf);
475 n_compile_error++;
478 void
479 fatal_error(const char *fmt, ...)
481 char buf[NDLBUFSZ];
482 va_list ap;
484 va_start(ap, fmt);
485 (void) vsnprintf(buf, NDLBUFSZ, fmt, ap);
486 va_end(ap);
488 (void) fprintf(stderr, "ndrgen: fatal error: %s\n", buf);
489 exit(1);
493 * Setup nodes for the lexical analyzer.
495 struct node *
496 n_cons(int label, ...)
498 ndr_node_t *np;
499 va_list ap;
501 np = ndr_alloc(1, sizeof (ndr_node_t));
503 va_start(ap, label);
504 np->label = label;
505 np->n_arg[0] = va_arg(ap, void *);
506 np->n_arg[1] = va_arg(ap, void *);
507 np->n_arg[2] = va_arg(ap, void *);
508 va_end(ap);
510 np->line_number = line_number;
511 np->file_name = file_name;
513 return (np);
517 * list: item
518 * | list item ={ n_splice($1, $2); }
521 void
522 n_splice(struct node *np1, struct node *np2)
524 while (np1->n_next)
525 np1 = np1->n_next;
527 np1->n_next = np2;
531 * Convert a string of words to a vector of strings.
532 * Returns the number of words.
534 static int
535 str_to_sv(char *buf, char *sv[])
537 char **pp = sv;
538 char *p = buf;
539 char *q = buf;
540 int in_word = 0;
541 int c;
543 for (;;) {
544 c = *p++;
545 if (c == 0)
546 break;
548 if (!in_word) {
549 if (iswhite(c))
550 continue;
552 *pp++ = q;
553 in_word = 1;
556 if (isquote(c)) {
557 int qc = c;
559 while (((c = *p++) != 0) && (c != qc))
560 *q++ = c;
561 if (c == 0)
562 break;
563 } else if (iswhite(c)) {
564 /* end of word */
565 *q++ = 0;
566 in_word = 0;
567 } else {
568 /* still inside word */
569 *q++ = c;
573 if (in_word)
574 *q++ = 0;
576 *pp = NULL;
577 return (pp - sv);