6 * The source code for a simple lexical analyzer.
8 * Written By: Sachin Kamboj
9 * University of Delaware
24 #include "ntp_config.h"
26 #include "ntp_scanner.h"
27 #include "ntp_parser.h"
28 #include "ntp_debug.h"
30 /* ntp_keyword.h declares finite state machine and token text */
31 #include "ntp_keyword.h"
35 /* SCANNER GLOBAL VARIABLES
36 * ------------------------
39 #define MAX_LEXEME (1024 + 1) /* The maximum size of a lexeme */
40 char yytext
[MAX_LEXEME
]; /* Buffer for storing the input text/lexeme */
41 extern int input_from_file
;
51 /* SCANNER GLOBAL VARIABLES
52 * ------------------------
54 const char special_chars
[] = "{}(),;|=";
61 int get_next_char(void);
62 static int is_keyword(char *lexeme
, follby
*pfollowedby
);
67 * keyword() - Return the keyword associated with token T_ identifier
77 i
= token
- LOWEST_KEYWORD_ID
;
79 if (i
>= 0 && i
< COUNTOF(keyword_text
))
80 text
= keyword_text
[i
];
86 : "(keyword not found)";
92 * We define a couple of wrapper functions around the standard C fgetc
93 * and ungetc functions in order to include positional bookkeeping
102 struct FILE_INFO
*my_info
;
104 my_info
= emalloc(sizeof *my_info
);
106 my_info
->line_no
= 1;
108 my_info
->prev_line_col_no
= 0;
109 my_info
->prev_token_col_no
= 0;
110 my_info
->fname
= path
;
112 my_info
->fd
= fopen(path
, mode
);
113 if (NULL
== my_info
->fd
) {
122 struct FILE_INFO
*stream
125 int ch
= fgetc(stream
->fd
);
129 stream
->prev_line_col_no
= stream
->col_no
;
136 /* BUGS: 1. Function will fail on more than one line of pushback
137 * 2. No error checking is done to see if ungetc fails
138 * SK: I don't think its worth fixing these bugs for our purposes ;-)
143 struct FILE_INFO
*stream
147 stream
->col_no
= stream
->prev_line_col_no
;
148 stream
->prev_line_col_no
= -1;
152 return ungetc(ch
, stream
->fd
);
157 struct FILE_INFO
*stream
160 int ret_val
= fclose(stream
->fd
);
169 * Provide a wrapper for the stream functions so that the
170 * stream can either read from a file or from a character
172 * NOTE: This is not very efficient for reading from character
173 * arrays, but needed to allow remote configuration where the
174 * configuration command is provided through ntpq.
176 * The behavior of there two functions is determined by the
177 * input_from_file flag.
188 return FGETC(ip_file
);
190 if (remote_config
.buffer
[remote_config
.pos
] == '\0')
194 ch
= remote_config
.buffer
[remote_config
.pos
++];
196 ip_file
->prev_line_col_no
= ip_file
->col_no
;
214 ip_file
->col_no
= ip_file
->prev_line_col_no
;
215 ip_file
->prev_line_col_no
= -1;
238 int curr_s
; /* current state index */
242 curr_s
= SCANNER_INIT_S
;
245 for (i
= 0; lexeme
[i
]; i
++) {
246 while (curr_s
&& (lexeme
[i
] != SS_CH(sst
[curr_s
])))
247 curr_s
= SS_OTHER_N(sst
[curr_s
]);
249 if (curr_s
&& (lexeme
[i
] == SS_CH(sst
[curr_s
]))) {
250 if ('\0' == lexeme
[i
+ 1]
251 && FOLLBY_NON_ACCEPTING
252 != SS_FB(sst
[curr_s
])) {
253 fb
= SS_FB(sst
[curr_s
]);
258 curr_s
= SS_MATCH_N(sst
[curr_s
]);
275 /* Allow a leading minus sign */
276 if (lexeme
[i
] == '-')
279 /* Check that all the remaining characters are digits */
280 for (; lexeme
[i
]; ++i
) {
281 if (!isdigit(lexeme
[i
]))
294 int num_digits
= 0; /* Number of digits read */
299 /* Check for an optional '+' or '-' */
300 if ('+' == lexeme
[i
] || '-' == lexeme
[i
])
303 /* Read the integer part */
304 for (; lexeme
[i
] && isdigit(lexeme
[i
]); i
++)
307 /* Check for the required decimal point */
308 if ('.' == lexeme
[i
])
313 /* Check for any digits after the decimal point */
314 for (; lexeme
[i
] && isdigit(lexeme
[i
]); i
++)
318 * The number of digits in both the decimal part and the
319 * fraction part must not be zero at this point
324 /* Check if we are done */
328 /* There is still more input, read the exponent */
329 if ('e' == tolower(lexeme
[i
]))
334 /* Read an optional Sign */
335 if ('+' == lexeme
[i
] || '-' == lexeme
[i
])
338 /* Now read the exponent part */
339 while (lexeme
[i
] && isdigit(lexeme
[i
]))
342 /* Check if we are done */
350 /* is_special() - Test whether a character is a token */
356 return (int)strchr(special_chars
, ch
);
365 if ((old_config_style
&& (ch
== '\n')) ||
366 (!old_config_style
&& (ch
== ';')))
373 quote_if_needed(char *str
)
380 octets
= len
+ 2 + 1;
381 ret
= emalloc(octets
);
383 && (strcspn(str
, special_chars
) < len
384 || strchr(str
, ' ') != NULL
)) {
385 snprintf(ret
, octets
, "\"%s\"", str
);
387 strncpy(ret
, str
, octets
);
401 * ignore end of line whitespace
404 while (*pch
&& isspace(*pch
))
408 yylval
.Integer
= T_EOC
;
409 return yylval
.Integer
;
412 yylval
.String
= estrdup(lexeme
);
418 * yylex() - function that does the actual scanning.
419 * Bison expects this function to be called yylex and for it to take no
420 * input and return an int.
421 * Conceptually yylex "returns" yylval as well as the actual return
422 * value representing the token or type.
430 int yylval_was_set
= 0;
431 int token
; /* The return value/the recognized token */
433 static follby followedby
= FOLLBY_TOKEN
;
436 /* Ignore whitespace at the beginning */
437 while (EOF
!= (ch
= get_next_char()) &&
440 ; /* Null Statement */
444 if (!input_from_file
|| !curr_include_level
)
447 FCLOSE(fp
[curr_include_level
]);
448 ip_file
= fp
[--curr_include_level
];
452 } else if (is_EOC(ch
)) {
454 /* end FOLLBY_STRINGS_TO_EOC effect */
455 followedby
= FOLLBY_TOKEN
;
459 } else if (is_special(ch
) && FOLLBY_TOKEN
== followedby
) {
460 /* special chars are their own token values */
463 * '=' implies a single string following as in:
464 * setvar Owner = "The Boss" default
465 * This could alternatively be handled by
466 * removing '=' from special_chars and adding
467 * it to the keyword table.
470 followedby
= FOLLBY_STRING
;
471 yytext
[0] = (char)ch
;
477 /* save the position of start of the token */
478 ip_file
->prev_token_line_no
= ip_file
->line_no
;
479 ip_file
->prev_token_col_no
= ip_file
->col_no
;
481 /* Read in the lexeme */
483 while (EOF
!= (ch
= get_next_char())) {
485 yytext
[i
] = (char)ch
;
487 /* Break on whitespace or a special character */
488 if (isspace(ch
) || is_EOC(ch
)
490 || (FOLLBY_TOKEN
== followedby
494 /* Read the rest of the line on reading a start
495 of comment character */
497 while (EOF
!= (ch
= get_next_char())
499 ; /* Null Statement */
504 if (i
>= COUNTOF(yytext
))
507 /* Pick up all of the string inside between " marks, to
508 * end of line. If we make it to EOL without a
509 * terminating " assume it for them.
511 * XXX - HMS: I'm not sure we want to assume the closing "
515 while (EOF
!= (ch
= get_next_char()) &&
516 ch
!= '"' && ch
!= '\n') {
517 yytext
[i
++] = (char)ch
;
518 if (i
>= COUNTOF(yytext
))
522 * yytext[i] will be pushed back as not part of
523 * this lexeme, but any closing quote should
524 * not be pushed back, so we read another char.
527 ch
= get_next_char();
529 /* Pushback the last character read that is not a part
531 * If the last character read was an EOF, pushback a
532 * newline character. This is to prevent a parse error
533 * when there is no newline at the end of a file.
536 push_back_char('\n');
542 /* Now return the desired token */
544 /* First make sure that the parser is *not* expecting a string
545 * as the next token (based on the previous token that was
546 * returned) and that we haven't read a string.
549 if (followedby
== FOLLBY_TOKEN
&& !instring
) {
550 token
= is_keyword(yytext
, &followedby
);
553 else if (is_integer(yytext
)) {
556 if ((yylval
.Integer
= strtol(yytext
, NULL
, 10)) == 0
557 && ((errno
== EINVAL
) || (errno
== ERANGE
))) {
559 "Integer cannot be represented: %s",
567 else if (is_double(yytext
)) {
570 if ((yylval
.Double
= atof(yytext
)) == 0 && errno
== ERANGE
) {
572 "Double too large to represent: %s",
580 /* Default: Everything is a string */
582 token
= create_string_token(yytext
);
588 * Either followedby is not FOLLBY_TOKEN or this lexeme is part
589 * of a string. Hence, we need to return T_String.
591 * _Except_ we might have a -4 or -6 flag on a an association
592 * configuration line (server, peer, pool, etc.).
594 * This is a terrible hack, but the grammar is ambiguous so we
595 * don't have a choice. [SK]
597 * The ambiguity is in the keyword scanner, not ntp_parser.y.
598 * We do not require server addresses be quoted in ntp.conf,
599 * complicating the scanner's job. To avoid trying (and
600 * failing) to match an IP address or DNS name to a keyword,
601 * the association keywords use FOLLBY_STRING in the keyword
602 * table, which tells the scanner to force the next token to be
603 * a T_String, so it does not try to match a keyword but rather
604 * expects a string when -4/-6 modifiers to server, peer, etc.
606 * restrict -4 and restrict -6 parsing works correctly without
607 * this hack, as restrict uses FOLLBY_TOKEN. [DH]
609 if ('-' == yytext
[0]) {
610 if ('4' == yytext
[1]) {
613 } else if ('6' == yytext
[1]) {
620 if (FOLLBY_STRING
== followedby
)
621 followedby
= FOLLBY_TOKEN
;
624 token
= create_string_token(yytext
);
628 DPRINTF(4,("\t<end of command>\n"));
630 DPRINTF(4, ("yylex: lexeme '%s' -> %s\n", yytext
,
634 yylval
.Integer
= token
;
639 yytext
[min(sizeof(yytext
) - 1, 50)] = 0;
641 "configuration item on line %d longer than limit of %d, began with '%s'",
642 ip_file
->line_no
, sizeof(yytext
) - 1, yytext
);
645 * If we hit the length limit reading the startup configuration
649 exit(sizeof(yytext
) - 1);
652 * If it's runtime configuration via ntpq :config treat it as
653 * if the configuration text ended before the too-long lexeme,
654 * hostname, or string.