external/bsd/ntp/dist/ntpd/ntp_scanner.c

   1 /*      $NetBSD$        */
   2
   3
   4 /* ntp_scanner.c
   5  *
   6  * The source code for a simple lexical analyzer.
   7  *
   8  * Written By:  Sachin Kamboj
   9  *              University of Delaware
  10  *              Newark, DE 19711
  11  * Copyright (c) 2006
  12  */
  13
  14 #ifdef HAVE_CONFIG_H
  15 # include <config.h>
  16 #endif
  17
  18 #include <stdio.h>
  19 #include <ctype.h>
  20 #include <stdlib.h>
  21 #include <errno.h>
  22 #include <string.h>
  23
  24 #include "ntp_config.h"
  25 #include "ntpsim.h"
  26 #include "ntp_scanner.h"
  27 #include "ntp_parser.h"
  28 #include "ntp_debug.h"
  29
  30 /* ntp_keyword.h declares finite state machine and token text */
  31 #include "ntp_keyword.h"
  32
  33
  34
  35 /* SCANNER GLOBAL VARIABLES
  36  * ------------------------
  37  */
  38
  39 #define MAX_LEXEME (1024 + 1)   /* The maximum size of a lexeme */
  40 char yytext[MAX_LEXEME];        /* Buffer for storing the input text/lexeme */
  41 extern int input_from_file;
  42
  43
  44
  45
  46 /* CONSTANTS
  47  * ---------
  48  */
  49
  50
  51 /* SCANNER GLOBAL VARIABLES
  52  * ------------------------
  53  */
  54 const char special_chars[] = "{}(),;|=";
  55
  56
  57 /* FUNCTIONS
  58  * ---------
  59  */
  60
  61 int get_next_char(void);
  62 static int is_keyword(char *lexeme, follby *pfollowedby);
  63
  64
  65
  66 /*
  67  * keyword() - Return the keyword associated with token T_ identifier
  68  */
  69 const char *
  70 keyword(
  71         int token
  72         )
  73 {
  74         int i;
  75         const char *text;
  76
  77         i = token - LOWEST_KEYWORD_ID;
  78
  79         if (i >= 0 && i < COUNTOF(keyword_text))
  80                 text = keyword_text[i];
  81         else
  82                 text = NULL;
  83
  84         return (text != NULL)
  85                    ? text
  86                    : "(keyword not found)";
  87 }
  88
  89
  90 /* FILE INTERFACE
  91  * --------------
  92  * We define a couple of wrapper functions around the standard C fgetc
  93  * and ungetc functions in order to include positional bookkeeping
  94  */
  95
  96 struct FILE_INFO *
  97 F_OPEN(
  98         const char *path,
  99         const char *mode
 100         )
 101 {
 102         struct FILE_INFO *my_info;
 103
 104         my_info = emalloc(sizeof *my_info);
 105
 106         my_info->line_no = 1;
 107         my_info->col_no = 0;
 108         my_info->prev_line_col_no = 0;
 109         my_info->prev_token_col_no = 0;
 110         my_info->fname = path;
 111
 112         my_info->fd = fopen(path, mode);
 113         if (NULL == my_info->fd) {
 114                 free(my_info);
 115                 return NULL;
 116         }
 117         return my_info;
 118 }
 119
 120 int
 121 FGETC(
 122         struct FILE_INFO *stream
 123         )
 124 {
 125         int ch = fgetc(stream->fd);
 126
 127         ++stream->col_no;
 128         if (ch == '\n') {
 129                 stream->prev_line_col_no = stream->col_no;
 130                 ++stream->line_no;
 131                 stream->col_no = 1;
 132         }
 133         return ch;
 134 }
 135
 136 /* BUGS: 1. Function will fail on more than one line of pushback
 137  *       2. No error checking is done to see if ungetc fails
 138  * SK: I don't think its worth fixing these bugs for our purposes ;-)
 139  */
 140 int
 141 UNGETC(
 142         int ch,
 143         struct FILE_INFO *stream
 144         )
 145 {
 146         if (ch == '\n') {
 147                 stream->col_no = stream->prev_line_col_no;
 148                 stream->prev_line_col_no = -1;
 149                 --stream->line_no;
 150         }
 151         --stream->col_no;
 152         return ungetc(ch, stream->fd);
 153 }
 154
 155 int
 156 FCLOSE(
 157         struct FILE_INFO *stream
 158         )
 159 {
 160         int ret_val = fclose(stream->fd);
 161
 162         if (!ret_val)
 163                 free(stream);
 164         return ret_val;
 165 }
 166
 167 /* STREAM INTERFACE
 168  * ----------------
 169  * Provide a wrapper for the stream functions so that the
 170  * stream can either read from a file or from a character
 171  * array.
 172  * NOTE: This is not very efficient for reading from character
 173  * arrays, but needed to allow remote configuration where the
 174  * configuration command is provided through ntpq.
 175  *
 176  * The behavior of there two functions is determined by the
 177  * input_from_file flag.
 178  */
 179
 180 int
 181 get_next_char(
 182         void
 183         )
 184 {
 185         char ch;
 186
 187         if (input_from_file)
 188                 return FGETC(ip_file);
 189         else {
 190                 if (remote_config.buffer[remote_config.pos] == '\0')
 191                         return EOF;
 192                 else {
 193                         ip_file->col_no++;
 194                         ch = remote_config.buffer[remote_config.pos++];
 195                         if (ch == '\n') {
 196                                 ip_file->prev_line_col_no = ip_file->col_no;
 197                                 ++ip_file->line_no;
 198                                 ip_file->col_no = 1;
 199                         }
 200                         return ch;
 201                 }
 202         }
 203 }
 204
 205 void
 206 push_back_char(
 207         int ch
 208         )
 209 {
 210         if (input_from_file)
 211                 UNGETC(ch, ip_file);
 212         else {
 213                 if (ch == '\n') {
 214                         ip_file->col_no = ip_file->prev_line_col_no;
 215                         ip_file->prev_line_col_no = -1;
 216                         --ip_file->line_no;
 217                 }
 218                 --ip_file->col_no;
 219
 220                 remote_config.pos--;
 221         }
 222 }
 223
 224
 225
 226 /* STATE MACHINES
 227  * --------------
 228  */
 229
 230 /* Keywords */
 231 static int
 232 is_keyword(
 233         char *lexeme,
 234         follby *pfollowedby
 235         )
 236 {
 237         follby fb;
 238         int curr_s;             /* current state index */
 239         int token;
 240         int i;
 241
 242         curr_s = SCANNER_INIT_S;
 243         token = 0;
 244
 245         for (i = 0; lexeme[i]; i++) {
 246                 while (curr_s && (lexeme[i] != SS_CH(sst[curr_s])))
 247                         curr_s = SS_OTHER_N(sst[curr_s]);
 248
 249                 if (curr_s && (lexeme[i] == SS_CH(sst[curr_s]))) {
 250                         if ('\0' == lexeme[i + 1]
 251                             && FOLLBY_NON_ACCEPTING
 252                                != SS_FB(sst[curr_s])) {
 253                                 fb = SS_FB(sst[curr_s]);
 254                                 *pfollowedby = fb;
 255                                 token = curr_s;
 256                                 break;
 257                         }
 258                         curr_s = SS_MATCH_N(sst[curr_s]);
 259                 } else
 260                         break;
 261         }
 262
 263         return token;
 264 }
 265
 266
 267 /* Integer */
 268 static int
 269 is_integer(
 270         char *lexeme
 271         )
 272 {
 273         int i = 0;
 274
 275         /* Allow a leading minus sign */
 276         if (lexeme[i] == '-')
 277                 ++i;
 278
 279         /* Check that all the remaining characters are digits */
 280         for (; lexeme[i]; ++i) {
 281                 if (!isdigit(lexeme[i]))
 282                         return 0;
 283         }
 284         return 1;
 285 }
 286
 287
 288 /* Double */
 289 static int
 290 is_double(
 291         char *lexeme
 292         )
 293 {
 294         int num_digits = 0;  /* Number of digits read */
 295         int i;
 296
 297         i = 0;
 298
 299         /* Check for an optional '+' or '-' */
 300         if ('+' == lexeme[i] || '-' == lexeme[i])
 301                 i++;
 302
 303         /* Read the integer part */
 304         for (; lexeme[i] && isdigit(lexeme[i]); i++)
 305                 num_digits++;
 306
 307         /* Check for the required decimal point */
 308         if ('.' == lexeme[i])
 309                 i++;
 310         else
 311                 return 0;
 312
 313         /* Check for any digits after the decimal point */
 314         for (; lexeme[i] && isdigit(lexeme[i]); i++)
 315                 num_digits++;
 316
 317         /*
 318          * The number of digits in both the decimal part and the
 319          * fraction part must not be zero at this point
 320          */
 321         if (!num_digits)
 322                 return 0;
 323
 324         /* Check if we are done */
 325         if (!lexeme[i])
 326                 return 1;
 327
 328         /* There is still more input, read the exponent */
 329         if ('e' == tolower(lexeme[i]))
 330                 i++;
 331         else
 332                 return 0;
 333
 334         /* Read an optional Sign */
 335         if ('+' == lexeme[i] || '-' == lexeme[i])
 336                 i++;
 337
 338         /* Now read the exponent part */
 339         while (lexeme[i] && isdigit(lexeme[i]))
 340                 i++;
 341
 342         /* Check if we are done */
 343         if (!lexeme[i])
 344                 return 1;
 345         else
 346                 return 0;
 347 }
 348
 349
 350 /* is_special() - Test whether a character is a token */
 351 static inline int
 352 is_special(
 353         int ch
 354         )
 355 {
 356         return (int)strchr(special_chars, ch);
 357 }
 358
 359
 360 static int
 361 is_EOC(
 362         int ch
 363         )
 364 {
 365         if ((old_config_style && (ch == '\n')) ||
 366             (!old_config_style && (ch == ';')))
 367                 return 1;
 368         return 0;
 369 }
 370
 371
 372 char *
 373 quote_if_needed(char *str)
 374 {
 375         char *ret;
 376         size_t len;
 377         size_t octets;
 378
 379         len = strlen(str);
 380         octets = len + 2 + 1;
 381         ret = emalloc(octets);
 382         if ('"' != str[0]
 383             && (strcspn(str, special_chars) < len
 384                 || strchr(str, ' ') != NULL)) {
 385                 snprintf(ret, octets, "\"%s\"", str);
 386         } else
 387                 strncpy(ret, str, octets);
 388
 389         return ret;
 390 }
 391
 392
 393 static int
 394 create_string_token(
 395         char *lexeme
 396         )
 397 {
 398         char *pch;
 399
 400         /*
 401          * ignore end of line whitespace
 402          */
 403         pch = lexeme;
 404         while (*pch && isspace(*pch))
 405                 pch++;
 406
 407         if (!*pch) {
 408                 yylval.Integer = T_EOC;
 409                 return yylval.Integer;
 410         }
 411
 412         yylval.String = estrdup(lexeme);
 413         return T_String;
 414 }
 415
 416
 417 /*
 418  * yylex() - function that does the actual scanning.
 419  * Bison expects this function to be called yylex and for it to take no
 420  * input and return an int.
 421  * Conceptually yylex "returns" yylval as well as the actual return
 422  * value representing the token or type.
 423  */
 424 int
 425 yylex(
 426         void
 427         )
 428 {
 429         int i, instring = 0;
 430         int yylval_was_set = 0;
 431         int token;              /* The return value/the recognized token */
 432         int ch;
 433         static follby followedby = FOLLBY_TOKEN;
 434
 435         do {
 436                 /* Ignore whitespace at the beginning */
 437                 while (EOF != (ch = get_next_char()) &&
 438                        isspace(ch) &&
 439                        !is_EOC(ch))
 440                         ; /* Null Statement */
 441
 442                 if (EOF == ch) {
 443
 444                         if (!input_from_file || !curr_include_level)
 445                                 return 0;
 446
 447                         FCLOSE(fp[curr_include_level]);
 448                         ip_file = fp[--curr_include_level];
 449                         token = T_EOC;
 450                         goto normal_return;
 451
 452                 } else if (is_EOC(ch)) {
 453
 454                         /* end FOLLBY_STRINGS_TO_EOC effect */
 455                         followedby = FOLLBY_TOKEN;
 456                         token = T_EOC;
 457                         goto normal_return;
 458
 459                 } else if (is_special(ch) && FOLLBY_TOKEN == followedby) {
 460                         /* special chars are their own token values */
 461                         token = ch;
 462                         /*
 463                          * '=' implies a single string following as in:
 464                          * setvar Owner = "The Boss" default
 465                          * This could alternatively be handled by
 466                          * removing '=' from special_chars and adding
 467                          * it to the keyword table.
 468                          */
 469                         if ('=' == ch)
 470                                 followedby = FOLLBY_STRING;
 471                         yytext[0] = (char)ch;
 472                         yytext[1] = '\0';
 473                         goto normal_return;
 474                 } else
 475                         push_back_char(ch);
 476
 477                 /* save the position of start of the token */
 478                 ip_file->prev_token_line_no = ip_file->line_no;
 479                 ip_file->prev_token_col_no = ip_file->col_no;
 480
 481                 /* Read in the lexeme */
 482                 i = 0;
 483                 while (EOF != (ch = get_next_char())) {
 484
 485                         yytext[i] = (char)ch;
 486
 487                         /* Break on whitespace or a special character */
 488                         if (isspace(ch) || is_EOC(ch)
 489                             || '"' == ch
 490                             || (FOLLBY_TOKEN == followedby
 491                                 && is_special(ch)))
 492                                 break;
 493
 494                         /* Read the rest of the line on reading a start
 495                            of comment character */
 496                         if ('#' == ch) {
 497                                 while (EOF != (ch = get_next_char())
 498                                        && '\n' != ch)
 499                                         ; /* Null Statement */
 500                                 break;
 501                         }
 502
 503                         i++;
 504                         if (i >= COUNTOF(yytext))
 505                                 goto lex_too_long;
 506                 }
 507                 /* Pick up all of the string inside between " marks, to
 508                  * end of line.  If we make it to EOL without a
 509                  * terminating " assume it for them.
 510                  *
 511                  * XXX - HMS: I'm not sure we want to assume the closing "
 512                  */
 513                 if ('"' == ch) {
 514                         instring = 1;
 515                         while (EOF != (ch = get_next_char()) &&
 516                                ch != '"' && ch != '\n') {
 517                                 yytext[i++] = (char)ch;
 518                                 if (i >= COUNTOF(yytext))
 519                                         goto lex_too_long;
 520                         }
 521                         /*
 522                          * yytext[i] will be pushed back as not part of
 523                          * this lexeme, but any closing quote should
 524                          * not be pushed back, so we read another char.
 525                          */
 526                         if ('"' == ch)
 527                                 ch = get_next_char();
 528                 }
 529                 /* Pushback the last character read that is not a part
 530                  * of this lexeme.
 531                  * If the last character read was an EOF, pushback a
 532                  * newline character. This is to prevent a parse error
 533                  * when there is no newline at the end of a file.
 534                  */
 535                 if (EOF == ch)
 536                         push_back_char('\n');
 537                 else
 538                         push_back_char(ch);
 539                 yytext[i] = '\0';
 540         } while (i == 0);
 541
 542         /* Now return the desired token */
 543
 544         /* First make sure that the parser is *not* expecting a string
 545          * as the next token (based on the previous token that was
 546          * returned) and that we haven't read a string.
 547          */
 548
 549         if (followedby == FOLLBY_TOKEN && !instring) {
 550                 token = is_keyword(yytext, &followedby);
 551                 if (token)
 552                         goto normal_return;
 553                 else if (is_integer(yytext)) {
 554                         yylval_was_set = 1;
 555                         errno = 0;
 556                         if ((yylval.Integer = strtol(yytext, NULL, 10)) == 0
 557                             && ((errno == EINVAL) || (errno == ERANGE))) {
 558                                 msyslog(LOG_ERR,
 559                                         "Integer cannot be represented: %s",
 560                                         yytext);
 561                                 exit(1);
 562                         } else {
 563                                 token = T_Integer;
 564                                 goto normal_return;
 565                         }
 566                 }
 567                 else if (is_double(yytext)) {
 568                         yylval_was_set = 1;
 569                         errno = 0;
 570                         if ((yylval.Double = atof(yytext)) == 0 && errno == ERANGE) {
 571                                 msyslog(LOG_ERR,
 572                                         "Double too large to represent: %s",
 573                                         yytext);
 574                                 exit(1);
 575                         } else {
 576                                 token = T_Double;
 577                                 goto normal_return;
 578                         }
 579                 } else {
 580                         /* Default: Everything is a string */
 581                         yylval_was_set = 1;
 582                         token = create_string_token(yytext);
 583                         goto normal_return;
 584                 }
 585         }
 586
 587         /*
 588          * Either followedby is not FOLLBY_TOKEN or this lexeme is part
 589          * of a string.  Hence, we need to return T_String.
 590          *
 591          * _Except_ we might have a -4 or -6 flag on a an association
 592          * configuration line (server, peer, pool, etc.).
 593          *
 594          * This is a terrible hack, but the grammar is ambiguous so we
 595          * don't have a choice.  [SK]
 596          *
 597          * The ambiguity is in the keyword scanner, not ntp_parser.y.
 598          * We do not require server addresses be quoted in ntp.conf,
 599          * complicating the scanner's job.  To avoid trying (and
 600          * failing) to match an IP address or DNS name to a keyword,
 601          * the association keywords use FOLLBY_STRING in the keyword
 602          * table, which tells the scanner to force the next token to be
 603          * a T_String, so it does not try to match a keyword but rather
 604          * expects a string when -4/-6 modifiers to server, peer, etc.
 605          * are encountered.
 606          * restrict -4 and restrict -6 parsing works correctly without
 607          * this hack, as restrict uses FOLLBY_TOKEN.  [DH]
 608          */
 609         if ('-' == yytext[0]) {
 610                 if ('4' == yytext[1]) {
 611                         token = T_Ipv4_flag;
 612                         goto normal_return;
 613                 } else if ('6' == yytext[1]) {
 614                         token = T_Ipv6_flag;
 615                         goto normal_return;
 616                 }
 617         }
 618
 619         instring = 0;
 620         if (FOLLBY_STRING == followedby)
 621                 followedby = FOLLBY_TOKEN;
 622
 623         yylval_was_set = 1;
 624         token = create_string_token(yytext);
 625
 626 normal_return:
 627         if (T_EOC == token)
 628                 DPRINTF(4,("\t<end of command>\n"));
 629         else
 630                 DPRINTF(4, ("yylex: lexeme '%s' -> %s\n", yytext,
 631                             token_name(token)));
 632
 633         if (!yylval_was_set)
 634                 yylval.Integer = token;
 635
 636         return token;
 637
 638 lex_too_long:
 639         yytext[min(sizeof(yytext) - 1, 50)] = 0;
 640         msyslog(LOG_ERR,
 641                 "configuration item on line %d longer than limit of %d, began with '%s'",
 642                 ip_file->line_no, sizeof(yytext) - 1, yytext);
 643
 644         /*
 645          * If we hit the length limit reading the startup configuration
 646          * file, abort.
 647          */
 648         if (input_from_file)
 649                 exit(sizeof(yytext) - 1);
 650
 651         /*
 652          * If it's runtime configuration via ntpq :config treat it as
 653          * if the configuration text ended before the too-long lexeme,
 654          * hostname, or string.
 655          */
 656         yylval.Integer = 0;
 657         return 0;
 658 }