Patrick Welche <prlw1@cam.ac.uk>
[netbsd-mini2440.git] / external / bsd / ntp / dist / ntpd / ntp_scanner.c
blobfb3bda3b7dd2dcfe7ea3ef25b9810868dcfcb444
1 /* $NetBSD$ */
4 /* ntp_scanner.c
6 * The source code for a simple lexical analyzer.
8 * Written By: Sachin Kamboj
9 * University of Delaware
10 * Newark, DE 19711
11 * Copyright (c) 2006
14 #ifdef HAVE_CONFIG_H
15 # include <config.h>
16 #endif
18 #include <stdio.h>
19 #include <ctype.h>
20 #include <stdlib.h>
21 #include <errno.h>
22 #include <string.h>
24 #include "ntp_config.h"
25 #include "ntpsim.h"
26 #include "ntp_scanner.h"
27 #include "ntp_parser.h"
28 #include "ntp_debug.h"
30 /* ntp_keyword.h declares finite state machine and token text */
31 #include "ntp_keyword.h"
35 /* SCANNER GLOBAL VARIABLES
36 * ------------------------
39 #define MAX_LEXEME (1024 + 1) /* The maximum size of a lexeme */
40 char yytext[MAX_LEXEME]; /* Buffer for storing the input text/lexeme */
41 extern int input_from_file;
46 /* CONSTANTS
47 * ---------
51 /* SCANNER GLOBAL VARIABLES
52 * ------------------------
54 const char special_chars[] = "{}(),;|=";
57 /* FUNCTIONS
58 * ---------
61 int get_next_char(void);
62 static int is_keyword(char *lexeme, follby *pfollowedby);
67 * keyword() - Return the keyword associated with token T_ identifier
69 const char *
70 keyword(
71 int token
74 int i;
75 const char *text;
77 i = token - LOWEST_KEYWORD_ID;
79 if (i >= 0 && i < COUNTOF(keyword_text))
80 text = keyword_text[i];
81 else
82 text = NULL;
84 return (text != NULL)
85 ? text
86 : "(keyword not found)";
90 /* FILE INTERFACE
91 * --------------
92 * We define a couple of wrapper functions around the standard C fgetc
93 * and ungetc functions in order to include positional bookkeeping
96 struct FILE_INFO *
97 F_OPEN(
98 const char *path,
99 const char *mode
102 struct FILE_INFO *my_info;
104 my_info = emalloc(sizeof *my_info);
106 my_info->line_no = 1;
107 my_info->col_no = 0;
108 my_info->prev_line_col_no = 0;
109 my_info->prev_token_col_no = 0;
110 my_info->fname = path;
112 my_info->fd = fopen(path, mode);
113 if (NULL == my_info->fd) {
114 free(my_info);
115 return NULL;
117 return my_info;
121 FGETC(
122 struct FILE_INFO *stream
125 int ch = fgetc(stream->fd);
127 ++stream->col_no;
128 if (ch == '\n') {
129 stream->prev_line_col_no = stream->col_no;
130 ++stream->line_no;
131 stream->col_no = 1;
133 return ch;
136 /* BUGS: 1. Function will fail on more than one line of pushback
137 * 2. No error checking is done to see if ungetc fails
138 * SK: I don't think its worth fixing these bugs for our purposes ;-)
141 UNGETC(
142 int ch,
143 struct FILE_INFO *stream
146 if (ch == '\n') {
147 stream->col_no = stream->prev_line_col_no;
148 stream->prev_line_col_no = -1;
149 --stream->line_no;
151 --stream->col_no;
152 return ungetc(ch, stream->fd);
156 FCLOSE(
157 struct FILE_INFO *stream
160 int ret_val = fclose(stream->fd);
162 if (!ret_val)
163 free(stream);
164 return ret_val;
167 /* STREAM INTERFACE
168 * ----------------
169 * Provide a wrapper for the stream functions so that the
170 * stream can either read from a file or from a character
171 * array.
172 * NOTE: This is not very efficient for reading from character
173 * arrays, but needed to allow remote configuration where the
174 * configuration command is provided through ntpq.
176 * The behavior of there two functions is determined by the
177 * input_from_file flag.
181 get_next_char(
182 void
185 char ch;
187 if (input_from_file)
188 return FGETC(ip_file);
189 else {
190 if (remote_config.buffer[remote_config.pos] == '\0')
191 return EOF;
192 else {
193 ip_file->col_no++;
194 ch = remote_config.buffer[remote_config.pos++];
195 if (ch == '\n') {
196 ip_file->prev_line_col_no = ip_file->col_no;
197 ++ip_file->line_no;
198 ip_file->col_no = 1;
200 return ch;
205 void
206 push_back_char(
207 int ch
210 if (input_from_file)
211 UNGETC(ch, ip_file);
212 else {
213 if (ch == '\n') {
214 ip_file->col_no = ip_file->prev_line_col_no;
215 ip_file->prev_line_col_no = -1;
216 --ip_file->line_no;
218 --ip_file->col_no;
220 remote_config.pos--;
226 /* STATE MACHINES
227 * --------------
230 /* Keywords */
231 static int
232 is_keyword(
233 char *lexeme,
234 follby *pfollowedby
237 follby fb;
238 int curr_s; /* current state index */
239 int token;
240 int i;
242 curr_s = SCANNER_INIT_S;
243 token = 0;
245 for (i = 0; lexeme[i]; i++) {
246 while (curr_s && (lexeme[i] != SS_CH(sst[curr_s])))
247 curr_s = SS_OTHER_N(sst[curr_s]);
249 if (curr_s && (lexeme[i] == SS_CH(sst[curr_s]))) {
250 if ('\0' == lexeme[i + 1]
251 && FOLLBY_NON_ACCEPTING
252 != SS_FB(sst[curr_s])) {
253 fb = SS_FB(sst[curr_s]);
254 *pfollowedby = fb;
255 token = curr_s;
256 break;
258 curr_s = SS_MATCH_N(sst[curr_s]);
259 } else
260 break;
263 return token;
267 /* Integer */
268 static int
269 is_integer(
270 char *lexeme
273 int i = 0;
275 /* Allow a leading minus sign */
276 if (lexeme[i] == '-')
277 ++i;
279 /* Check that all the remaining characters are digits */
280 for (; lexeme[i]; ++i) {
281 if (!isdigit(lexeme[i]))
282 return 0;
284 return 1;
288 /* Double */
289 static int
290 is_double(
291 char *lexeme
294 int num_digits = 0; /* Number of digits read */
295 int i;
297 i = 0;
299 /* Check for an optional '+' or '-' */
300 if ('+' == lexeme[i] || '-' == lexeme[i])
301 i++;
303 /* Read the integer part */
304 for (; lexeme[i] && isdigit(lexeme[i]); i++)
305 num_digits++;
307 /* Check for the required decimal point */
308 if ('.' == lexeme[i])
309 i++;
310 else
311 return 0;
313 /* Check for any digits after the decimal point */
314 for (; lexeme[i] && isdigit(lexeme[i]); i++)
315 num_digits++;
318 * The number of digits in both the decimal part and the
319 * fraction part must not be zero at this point
321 if (!num_digits)
322 return 0;
324 /* Check if we are done */
325 if (!lexeme[i])
326 return 1;
328 /* There is still more input, read the exponent */
329 if ('e' == tolower(lexeme[i]))
330 i++;
331 else
332 return 0;
334 /* Read an optional Sign */
335 if ('+' == lexeme[i] || '-' == lexeme[i])
336 i++;
338 /* Now read the exponent part */
339 while (lexeme[i] && isdigit(lexeme[i]))
340 i++;
342 /* Check if we are done */
343 if (!lexeme[i])
344 return 1;
345 else
346 return 0;
350 /* is_special() - Test whether a character is a token */
351 static inline int
352 is_special(
353 int ch
356 return (int)strchr(special_chars, ch);
360 static int
361 is_EOC(
362 int ch
365 if ((old_config_style && (ch == '\n')) ||
366 (!old_config_style && (ch == ';')))
367 return 1;
368 return 0;
372 char *
373 quote_if_needed(char *str)
375 char *ret;
376 size_t len;
377 size_t octets;
379 len = strlen(str);
380 octets = len + 2 + 1;
381 ret = emalloc(octets);
382 if ('"' != str[0]
383 && (strcspn(str, special_chars) < len
384 || strchr(str, ' ') != NULL)) {
385 snprintf(ret, octets, "\"%s\"", str);
386 } else
387 strncpy(ret, str, octets);
389 return ret;
393 static int
394 create_string_token(
395 char *lexeme
398 char *pch;
401 * ignore end of line whitespace
403 pch = lexeme;
404 while (*pch && isspace(*pch))
405 pch++;
407 if (!*pch) {
408 yylval.Integer = T_EOC;
409 return yylval.Integer;
412 yylval.String = estrdup(lexeme);
413 return T_String;
418 * yylex() - function that does the actual scanning.
419 * Bison expects this function to be called yylex and for it to take no
420 * input and return an int.
421 * Conceptually yylex "returns" yylval as well as the actual return
422 * value representing the token or type.
425 yylex(
426 void
429 int i, instring = 0;
430 int yylval_was_set = 0;
431 int token; /* The return value/the recognized token */
432 int ch;
433 static follby followedby = FOLLBY_TOKEN;
435 do {
436 /* Ignore whitespace at the beginning */
437 while (EOF != (ch = get_next_char()) &&
438 isspace(ch) &&
439 !is_EOC(ch))
440 ; /* Null Statement */
442 if (EOF == ch) {
444 if (!input_from_file || !curr_include_level)
445 return 0;
447 FCLOSE(fp[curr_include_level]);
448 ip_file = fp[--curr_include_level];
449 token = T_EOC;
450 goto normal_return;
452 } else if (is_EOC(ch)) {
454 /* end FOLLBY_STRINGS_TO_EOC effect */
455 followedby = FOLLBY_TOKEN;
456 token = T_EOC;
457 goto normal_return;
459 } else if (is_special(ch) && FOLLBY_TOKEN == followedby) {
460 /* special chars are their own token values */
461 token = ch;
463 * '=' implies a single string following as in:
464 * setvar Owner = "The Boss" default
465 * This could alternatively be handled by
466 * removing '=' from special_chars and adding
467 * it to the keyword table.
469 if ('=' == ch)
470 followedby = FOLLBY_STRING;
471 yytext[0] = (char)ch;
472 yytext[1] = '\0';
473 goto normal_return;
474 } else
475 push_back_char(ch);
477 /* save the position of start of the token */
478 ip_file->prev_token_line_no = ip_file->line_no;
479 ip_file->prev_token_col_no = ip_file->col_no;
481 /* Read in the lexeme */
482 i = 0;
483 while (EOF != (ch = get_next_char())) {
485 yytext[i] = (char)ch;
487 /* Break on whitespace or a special character */
488 if (isspace(ch) || is_EOC(ch)
489 || '"' == ch
490 || (FOLLBY_TOKEN == followedby
491 && is_special(ch)))
492 break;
494 /* Read the rest of the line on reading a start
495 of comment character */
496 if ('#' == ch) {
497 while (EOF != (ch = get_next_char())
498 && '\n' != ch)
499 ; /* Null Statement */
500 break;
503 i++;
504 if (i >= COUNTOF(yytext))
505 goto lex_too_long;
507 /* Pick up all of the string inside between " marks, to
508 * end of line. If we make it to EOL without a
509 * terminating " assume it for them.
511 * XXX - HMS: I'm not sure we want to assume the closing "
513 if ('"' == ch) {
514 instring = 1;
515 while (EOF != (ch = get_next_char()) &&
516 ch != '"' && ch != '\n') {
517 yytext[i++] = (char)ch;
518 if (i >= COUNTOF(yytext))
519 goto lex_too_long;
522 * yytext[i] will be pushed back as not part of
523 * this lexeme, but any closing quote should
524 * not be pushed back, so we read another char.
526 if ('"' == ch)
527 ch = get_next_char();
529 /* Pushback the last character read that is not a part
530 * of this lexeme.
531 * If the last character read was an EOF, pushback a
532 * newline character. This is to prevent a parse error
533 * when there is no newline at the end of a file.
535 if (EOF == ch)
536 push_back_char('\n');
537 else
538 push_back_char(ch);
539 yytext[i] = '\0';
540 } while (i == 0);
542 /* Now return the desired token */
544 /* First make sure that the parser is *not* expecting a string
545 * as the next token (based on the previous token that was
546 * returned) and that we haven't read a string.
549 if (followedby == FOLLBY_TOKEN && !instring) {
550 token = is_keyword(yytext, &followedby);
551 if (token)
552 goto normal_return;
553 else if (is_integer(yytext)) {
554 yylval_was_set = 1;
555 errno = 0;
556 if ((yylval.Integer = strtol(yytext, NULL, 10)) == 0
557 && ((errno == EINVAL) || (errno == ERANGE))) {
558 msyslog(LOG_ERR,
559 "Integer cannot be represented: %s",
560 yytext);
561 exit(1);
562 } else {
563 token = T_Integer;
564 goto normal_return;
567 else if (is_double(yytext)) {
568 yylval_was_set = 1;
569 errno = 0;
570 if ((yylval.Double = atof(yytext)) == 0 && errno == ERANGE) {
571 msyslog(LOG_ERR,
572 "Double too large to represent: %s",
573 yytext);
574 exit(1);
575 } else {
576 token = T_Double;
577 goto normal_return;
579 } else {
580 /* Default: Everything is a string */
581 yylval_was_set = 1;
582 token = create_string_token(yytext);
583 goto normal_return;
588 * Either followedby is not FOLLBY_TOKEN or this lexeme is part
589 * of a string. Hence, we need to return T_String.
591 * _Except_ we might have a -4 or -6 flag on a an association
592 * configuration line (server, peer, pool, etc.).
594 * This is a terrible hack, but the grammar is ambiguous so we
595 * don't have a choice. [SK]
597 * The ambiguity is in the keyword scanner, not ntp_parser.y.
598 * We do not require server addresses be quoted in ntp.conf,
599 * complicating the scanner's job. To avoid trying (and
600 * failing) to match an IP address or DNS name to a keyword,
601 * the association keywords use FOLLBY_STRING in the keyword
602 * table, which tells the scanner to force the next token to be
603 * a T_String, so it does not try to match a keyword but rather
604 * expects a string when -4/-6 modifiers to server, peer, etc.
605 * are encountered.
606 * restrict -4 and restrict -6 parsing works correctly without
607 * this hack, as restrict uses FOLLBY_TOKEN. [DH]
609 if ('-' == yytext[0]) {
610 if ('4' == yytext[1]) {
611 token = T_Ipv4_flag;
612 goto normal_return;
613 } else if ('6' == yytext[1]) {
614 token = T_Ipv6_flag;
615 goto normal_return;
619 instring = 0;
620 if (FOLLBY_STRING == followedby)
621 followedby = FOLLBY_TOKEN;
623 yylval_was_set = 1;
624 token = create_string_token(yytext);
626 normal_return:
627 if (T_EOC == token)
628 DPRINTF(4,("\t<end of command>\n"));
629 else
630 DPRINTF(4, ("yylex: lexeme '%s' -> %s\n", yytext,
631 token_name(token)));
633 if (!yylval_was_set)
634 yylval.Integer = token;
636 return token;
638 lex_too_long:
639 yytext[min(sizeof(yytext) - 1, 50)] = 0;
640 msyslog(LOG_ERR,
641 "configuration item on line %d longer than limit of %d, began with '%s'",
642 ip_file->line_no, sizeof(yytext) - 1, yytext);
645 * If we hit the length limit reading the startup configuration
646 * file, abort.
648 if (input_from_file)
649 exit(sizeof(yytext) - 1);
652 * If it's runtime configuration via ntpq :config treat it as
653 * if the configuration text ended before the too-long lexeme,
654 * hostname, or string.
656 yylval.Integer = 0;
657 return 0;