external/bsd/flex/dist/parse.y

   1 /*      $NetBSD: parse.y,v 1.3 2013/04/06 14:27:52 christos Exp $       */
   2
   3 /* parse.y - parser for flex input */
   4
   5 %token CHAR NUMBER SECTEND SCDECL XSCDECL NAME PREVCCL EOF_OP
   6 %token OPTION_OP OPT_OUTFILE OPT_PREFIX OPT_YYCLASS OPT_HEADER OPT_EXTRA_TYPE
   7 %token OPT_TABLES
   8
   9 %token CCE_ALNUM CCE_ALPHA CCE_BLANK CCE_CNTRL CCE_DIGIT CCE_GRAPH
  10 %token CCE_LOWER CCE_PRINT CCE_PUNCT CCE_SPACE CCE_UPPER CCE_XDIGIT
  11
  12 %token CCE_NEG_ALNUM CCE_NEG_ALPHA CCE_NEG_BLANK CCE_NEG_CNTRL CCE_NEG_DIGIT CCE_NEG_GRAPH
  13 %token CCE_NEG_LOWER CCE_NEG_PRINT CCE_NEG_PUNCT CCE_NEG_SPACE CCE_NEG_UPPER CCE_NEG_XDIGIT
  14
  15 %left CCL_OP_DIFF CCL_OP_UNION
  16
  17 /*
  18  *POSIX and AT&T lex place the
  19  * precedence of the repeat operator, {}, below that of concatenation.
  20  * Thus, ab{3} is ababab.  Most other POSIX utilities use an Extended
  21  * Regular Expression (ERE) precedence that has the repeat operator
  22  * higher than concatenation.  This causes ab{3} to yield abbb.
  23  *
  24  * In order to support the POSIX and AT&T precedence and the flex
  25  * precedence we define two token sets for the begin and end tokens of
  26  * the repeat operator, '{' and '}'.  The lexical scanner chooses
  27  * which tokens to return based on whether posix_compat or lex_compat
  28  * are specified. Specifying either posix_compat or lex_compat will
  29  * cause flex to parse scanner files as per the AT&T and
  30  * POSIX-mandated behavior.
  31  */
  32
  33 %token BEGIN_REPEAT_POSIX END_REPEAT_POSIX BEGIN_REPEAT_FLEX END_REPEAT_FLEX
  34
  35
  36 %{
  37 /*  Copyright (c) 1990 The Regents of the University of California. */
  38 /*  All rights reserved. */
  39
  40 /*  This code is derived from software contributed to Berkeley by */
  41 /*  Vern Paxson. */
  42
  43 /*  The United States Government has rights in this work pursuant */
  44 /*  to contract no. DE-AC03-76SF00098 between the United States */
  45 /*  Department of Energy and the University of California. */
  46
  47 /*  This file is part of flex. */
  48
  49 /*  Redistribution and use in source and binary forms, with or without */
  50 /*  modification, are permitted provided that the following conditions */
  51 /*  are met: */
  52
  53 /*  1. Redistributions of source code must retain the above copyright */
  54 /*     notice, this list of conditions and the following disclaimer. */
  55 /*  2. Redistributions in binary form must reproduce the above copyright */
  56 /*     notice, this list of conditions and the following disclaimer in the */
  57 /*     documentation and/or other materials provided with the distribution. */
  58
  59 /*  Neither the name of the University nor the names of its contributors */
  60 /*  may be used to endorse or promote products derived from this software */
  61 /*  without specific prior written permission. */
  62
  63 /*  THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR */
  64 /*  IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED */
  65 /*  WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR */
  66 /*  PURPOSE. */
  67
  68 #include "flexdef.h"
  69 #include "tables.h"
  70
  71 int pat, scnum, eps, headcnt, trailcnt, lastchar, i, rulelen;
  72 int trlcontxt, xcluflg, currccl, cclsorted, varlength, variable_trail_rule;
  73
  74 int *scon_stk;
  75 int scon_stk_ptr;
  76
  77 static int madeany = false;  /* whether we've made the '.' character class */
  78 static int ccldot, cclany;
  79 int previous_continued_action;  /* whether the previous rule's action was '|' */
  80
  81 #define format_warn3(fmt, a1, a2) \
  82         do{ \
  83         char fw3_msg[MAXLINE];\
  84         snprintf( fw3_msg, MAXLINE,(fmt), (a1), (a2) );\
  85         lwarn( fw3_msg );\
  86         }while(0)
  87
  88 /* Expand a POSIX character class expression. */
  89 #define CCL_EXPR(func) \
  90         do{ \
  91         int c; \
  92         for ( c = 0; c < csize; ++c ) \
  93                 if ( isascii(c) && func(c) ) \
  94                         ccladd( currccl, c ); \
  95         }while(0)
  96
  97 /* negated class */
  98 #define CCL_NEG_EXPR(func) \
  99         do{ \
 100         int c; \
 101         for ( c = 0; c < csize; ++c ) \
 102                 if ( !func(c) ) \
 103                         ccladd( currccl, c ); \
 104         }while(0)
 105
 106 /* While POSIX defines isblank(), it's not ANSI C. */
 107 #define IS_BLANK(c) ((c) == ' ' || (c) == '\t')
 108
 109 /* On some over-ambitious machines, such as DEC Alpha's, the default
 110  * token type is "long" instead of "int"; this leads to problems with
 111  * declaring yylval in flexdef.h.  But so far, all the yacc's I've seen
 112  * wrap their definitions of YYSTYPE with "#ifndef YYSTYPE"'s, so the
 113  * following should ensure that the default token type is "int".
 114  */
 115 #define YYSTYPE int
 116
 117 %}
 118
 119 %%
 120 goal            :  initlex sect1 sect1end sect2 initforrule
 121                         { /* add default rule */
 122                         int def_rule;
 123
 124                         pat = cclinit();
 125                         cclnegate( pat );
 126
 127                         def_rule = mkstate( -pat );
 128
 129                         /* Remember the number of the default rule so we
 130                          * don't generate "can't match" warnings for it.
 131                          */
 132                         default_rule = num_rules;
 133
 134                         finish_rule( def_rule, false, 0, 0, 0);
 135
 136                         for ( i = 1; i <= lastsc; ++i )
 137                                 scset[i] = mkbranch( scset[i], def_rule );
 138
 139                         if ( spprdflt )
 140                                 add_action(
 141                                 "YY_FATAL_ERROR( \"flex scanner jammed\" )" );
 142                         else
 143                                 add_action( "ECHO" );
 144
 145                         add_action( ";\n\tYY_BREAK\n" );
 146                         }
 147                 ;
 148
 149 initlex         :
 150                         { /* initialize for processing rules */
 151
 152                         /* Create default DFA start condition. */
 153                         scinstal( "INITIAL", false );
 154                         }
 155                 ;
 156
 157 sect1           :  sect1 startconddecl namelist1
 158                 |  sect1 options
 159                 |
 160                 |  error
 161                         { synerr( _("unknown error processing section 1") ); }
 162                 ;
 163
 164 sect1end        :  SECTEND
 165                         {
 166                         check_options();
 167                         scon_stk = allocate_integer_array( lastsc + 1 );
 168                         scon_stk_ptr = 0;
 169                         }
 170                 ;
 171
 172 startconddecl   :  SCDECL
 173                         { xcluflg = false; }
 174
 175                 |  XSCDECL
 176                         { xcluflg = true; }
 177                 ;
 178
 179 namelist1       :  namelist1 NAME
 180                         { scinstal( nmstr, xcluflg ); }
 181
 182                 |  NAME
 183                         { scinstal( nmstr, xcluflg ); }
 184
 185                 |  error
 186                         { synerr( _("bad start condition list") ); }
 187                 ;
 188
 189 options         :  OPTION_OP optionlist
 190                 ;
 191
 192 optionlist      :  optionlist option
 193                 |
 194                 ;
 195
 196 option          :  OPT_OUTFILE '=' NAME
 197                         {
 198                         outfilename = copy_string( nmstr );
 199                         did_outfilename = 1;
 200                         }
 201                 |  OPT_EXTRA_TYPE '=' NAME
 202                         { extra_type = copy_string( nmstr ); }
 203                 |  OPT_PREFIX '=' NAME
 204                         { prefix = copy_string( nmstr ); }
 205                 |  OPT_YYCLASS '=' NAME
 206                         { yyclass = copy_string( nmstr ); }
 207                 |  OPT_HEADER '=' NAME
 208                         { headerfilename = copy_string( nmstr ); }
 209             |  OPT_TABLES '=' NAME
 210             { tablesext = true; tablesfilename = copy_string( nmstr ); }
 211                 ;
 212
 213 sect2           :  sect2 scon initforrule flexrule '\n'
 214                         { scon_stk_ptr = $2; }
 215                 |  sect2 scon '{' sect2 '}'
 216                         { scon_stk_ptr = $2; }
 217                 |
 218                 ;
 219
 220 initforrule     :
 221                         {
 222                         /* Initialize for a parse of one rule. */
 223                         trlcontxt = variable_trail_rule = varlength = false;
 224                         trailcnt = headcnt = rulelen = 0;
 225                         current_state_type = STATE_NORMAL;
 226                         previous_continued_action = continued_action;
 227                         in_rule = true;
 228
 229                         new_rule();
 230                         }
 231                 ;
 232
 233 flexrule        :  '^' rule
 234                         {
 235                         pat = $2;
 236                         finish_rule( pat, variable_trail_rule,
 237                                 headcnt, trailcnt , previous_continued_action);
 238
 239                         if ( scon_stk_ptr > 0 )
 240                                 {
 241                                 for ( i = 1; i <= scon_stk_ptr; ++i )
 242                                         scbol[scon_stk[i]] =
 243                                                 mkbranch( scbol[scon_stk[i]],
 244                                                                 pat );
 245                                 }
 246
 247                         else
 248                                 {
 249                                 /* Add to all non-exclusive start conditions,
 250                                  * including the default (0) start condition.
 251                                  */
 252
 253                                 for ( i = 1; i <= lastsc; ++i )
 254                                         if ( ! scxclu[i] )
 255                                                 scbol[i] = mkbranch( scbol[i],
 256                                                                         pat );
 257                                 }
 258
 259                         if ( ! bol_needed )
 260                                 {
 261                                 bol_needed = true;
 262
 263                                 if ( performance_report > 1 )
 264                                         pinpoint_message(
 265                         "'^' operator results in sub-optimal performance" );
 266                                 }
 267                         }
 268
 269                 |  rule
 270                         {
 271                         pat = $1;
 272                         finish_rule( pat, variable_trail_rule,
 273                                 headcnt, trailcnt , previous_continued_action);
 274
 275                         if ( scon_stk_ptr > 0 )
 276                                 {
 277                                 for ( i = 1; i <= scon_stk_ptr; ++i )
 278                                         scset[scon_stk[i]] =
 279                                                 mkbranch( scset[scon_stk[i]],
 280                                                                 pat );
 281                                 }
 282
 283                         else
 284                                 {
 285                                 for ( i = 1; i <= lastsc; ++i )
 286                                         if ( ! scxclu[i] )
 287                                                 scset[i] =
 288                                                         mkbranch( scset[i],
 289                                                                 pat );
 290                                 }
 291                         }
 292
 293                 |  EOF_OP
 294                         {
 295                         if ( scon_stk_ptr > 0 )
 296                                 build_eof_action();
 297
 298                         else
 299                                 {
 300                                 /* This EOF applies to all start conditions
 301                                  * which don't already have EOF actions.
 302                                  */
 303                                 for ( i = 1; i <= lastsc; ++i )
 304                                         if ( ! sceof[i] )
 305                                                 scon_stk[++scon_stk_ptr] = i;
 306
 307                                 if ( scon_stk_ptr == 0 )
 308                                         lwarn(
 309                         "all start conditions already have <<EOF>> rules" );
 310
 311                                 else
 312                                         build_eof_action();
 313                                 }
 314                         }
 315
 316                 |  error
 317                         { synerr( _("unrecognized rule") ); }
 318                 ;
 319
 320 scon_stk_ptr    :
 321                         { $$ = scon_stk_ptr; }
 322                 ;
 323
 324 scon            :  '<' scon_stk_ptr namelist2 '>'
 325                         { $$ = $2; }
 326
 327                 |  '<' '*' '>'
 328                         {
 329                         $$ = scon_stk_ptr;
 330
 331                         for ( i = 1; i <= lastsc; ++i )
 332                                 {
 333                                 int j;
 334
 335                                 for ( j = 1; j <= scon_stk_ptr; ++j )
 336                                         if ( scon_stk[j] == i )
 337                                                 break;
 338
 339                                 if ( j > scon_stk_ptr )
 340                                         scon_stk[++scon_stk_ptr] = i;
 341                                 }
 342                         }
 343
 344                 |
 345                         { $$ = scon_stk_ptr; }
 346                 ;
 347
 348 namelist2       :  namelist2 ',' sconname
 349
 350                 |  sconname
 351
 352                 |  error
 353                         { synerr( _("bad start condition list") ); }
 354                 ;
 355
 356 sconname        :  NAME
 357                         {
 358                         if ( (scnum = sclookup( nmstr )) == 0 )
 359                                 format_pinpoint_message(
 360                                         "undeclared start condition %s",
 361                                         nmstr );
 362                         else
 363                                 {
 364                                 for ( i = 1; i <= scon_stk_ptr; ++i )
 365                                         if ( scon_stk[i] == scnum )
 366                                                 {
 367                                                 format_warn(
 368                                                         "<%s> specified twice",
 369                                                         scname[scnum] );
 370                                                 break;
 371                                                 }
 372
 373                                 if ( i > scon_stk_ptr )
 374                                         scon_stk[++scon_stk_ptr] = scnum;
 375                                 }
 376                         }
 377                 ;
 378
 379 rule            :  re2 re
 380                         {
 381                         if ( transchar[lastst[$2]] != SYM_EPSILON )
 382                                 /* Provide final transition \now/ so it
 383                                  * will be marked as a trailing context
 384                                  * state.
 385                                  */
 386                                 $2 = link_machines( $2,
 387                                                 mkstate( SYM_EPSILON ) );
 388
 389                         mark_beginning_as_normal( $2 );
 390                         current_state_type = STATE_NORMAL;
 391
 392                         if ( previous_continued_action )
 393                                 {
 394                                 /* We need to treat this as variable trailing
 395                                  * context so that the backup does not happen
 396                                  * in the action but before the action switch
 397                                  * statement.  If the backup happens in the
 398                                  * action, then the rules "falling into" this
 399                                  * one's action will *also* do the backup,
 400                                  * erroneously.
 401                                  */
 402                                 if ( ! varlength || headcnt != 0 )
 403                                         lwarn(
 404                 "trailing context made variable due to preceding '|' action" );
 405
 406                                 /* Mark as variable. */
 407                                 varlength = true;
 408                                 headcnt = 0;
 409
 410                                 }
 411
 412                         if ( lex_compat || (varlength && headcnt == 0) )
 413                                 { /* variable trailing context rule */
 414                                 /* Mark the first part of the rule as the
 415                                  * accepting "head" part of a trailing
 416                                  * context rule.
 417                                  *
 418                                  * By the way, we didn't do this at the
 419                                  * beginning of this production because back
 420                                  * then current_state_type was set up for a
 421                                  * trail rule, and add_accept() can create
 422                                  * a new state ...
 423                                  */
 424                                 add_accept( $1,
 425                                         num_rules | YY_TRAILING_HEAD_MASK );
 426                                 variable_trail_rule = true;
 427                                 }
 428
 429                         else
 430                                 trailcnt = rulelen;
 431
 432                         $$ = link_machines( $1, $2 );
 433                         }
 434
 435                 |  re2 re '$'
 436                         { synerr( _("trailing context used twice") ); }
 437
 438                 |  re '$'
 439                         {
 440                         headcnt = 0;
 441                         trailcnt = 1;
 442                         rulelen = 1;
 443                         varlength = false;
 444
 445                         current_state_type = STATE_TRAILING_CONTEXT;
 446
 447                         if ( trlcontxt )
 448                                 {
 449                                 synerr( _("trailing context used twice") );
 450                                 $$ = mkstate( SYM_EPSILON );
 451                                 }
 452
 453                         else if ( previous_continued_action )
 454                                 {
 455                                 /* See the comment in the rule for "re2 re"
 456                                  * above.
 457                                  */
 458                                 lwarn(
 459                 "trailing context made variable due to preceding '|' action" );
 460
 461                                 varlength = true;
 462                                 }
 463
 464                         if ( lex_compat || varlength )
 465                                 {
 466                                 /* Again, see the comment in the rule for
 467                                  * "re2 re" above.
 468                                  */
 469                                 add_accept( $1,
 470                                         num_rules | YY_TRAILING_HEAD_MASK );
 471                                 variable_trail_rule = true;
 472                                 }
 473
 474                         trlcontxt = true;
 475
 476                         eps = mkstate( SYM_EPSILON );
 477                         $$ = link_machines( $1,
 478                                 link_machines( eps, mkstate( '\n' ) ) );
 479                         }
 480
 481                 |  re
 482                         {
 483                         $$ = $1;
 484
 485                         if ( trlcontxt )
 486                                 {
 487                                 if ( lex_compat || (varlength && headcnt == 0) )
 488                                         /* Both head and trail are
 489                                          * variable-length.
 490                                          */
 491                                         variable_trail_rule = true;
 492                                 else
 493                                         trailcnt = rulelen;
 494                                 }
 495                         }
 496                 ;
 497
 498
 499 re              :  re '|' series
 500                         {
 501                         varlength = true;
 502                         $$ = mkor( $1, $3 );
 503                         }
 504
 505                 |  series
 506                         { $$ = $1; }
 507                 ;
 508
 509
 510 re2             :  re '/'
 511                         {
 512                         /* This rule is written separately so the
 513                          * reduction will occur before the trailing
 514                          * series is parsed.
 515                          */
 516
 517                         if ( trlcontxt )
 518                                 synerr( _("trailing context used twice") );
 519                         else
 520                                 trlcontxt = true;
 521
 522                         if ( varlength )
 523                                 /* We hope the trailing context is
 524                                  * fixed-length.
 525                                  */
 526                                 varlength = false;
 527                         else
 528                                 headcnt = rulelen;
 529
 530                         rulelen = 0;
 531
 532                         current_state_type = STATE_TRAILING_CONTEXT;
 533                         $$ = $1;
 534                         }
 535                 ;
 536
 537 series          :  series singleton
 538                         {
 539                         /* This is where concatenation of adjacent patterns
 540                          * gets done.
 541                          */
 542                         $$ = link_machines( $1, $2 );
 543                         }
 544
 545                 |  singleton
 546                         { $$ = $1; }
 547
 548                 |  series BEGIN_REPEAT_POSIX NUMBER ',' NUMBER END_REPEAT_POSIX
 549                         {
 550                         varlength = true;
 551
 552                         if ( $3 > $5 || $3 < 0 )
 553                                 {
 554                                 synerr( _("bad iteration values") );
 555                                 $$ = $1;
 556                                 }
 557                         else
 558                                 {
 559                                 if ( $3 == 0 )
 560                                         {
 561                                         if ( $5 <= 0 )
 562                                                 {
 563                                                 synerr(
 564                                                 _("bad iteration values") );
 565                                                 $$ = $1;
 566                                                 }
 567                                         else
 568                                                 $$ = mkopt(
 569                                                         mkrep( $1, 1, $5 ) );
 570                                         }
 571                                 else
 572                                         $$ = mkrep( $1, $3, $5 );
 573                                 }
 574                         }
 575
 576                 |  series BEGIN_REPEAT_POSIX NUMBER ',' END_REPEAT_POSIX
 577                         {
 578                         varlength = true;
 579
 580                         if ( $3 <= 0 )
 581                                 {
 582                                 synerr( _("iteration value must be positive") );
 583                                 $$ = $1;
 584                                 }
 585
 586                         else
 587                                 $$ = mkrep( $1, $3, INFINITE_REPEAT );
 588                         }
 589
 590                 |  series BEGIN_REPEAT_POSIX NUMBER END_REPEAT_POSIX
 591                         {
 592                         /* The series could be something like "(foo)",
 593                          * in which case we have no idea what its length
 594                          * is, so we punt here.
 595                          */
 596                         varlength = true;
 597
 598                         if ( $3 <= 0 )
 599                                 {
 600                                   synerr( _("iteration value must be positive")
 601                                           );
 602                                 $$ = $1;
 603                                 }
 604
 605                         else
 606                                 $$ = link_machines( $1,
 607                                                 copysingl( $1, $3 - 1 ) );
 608                         }
 609
 610                 ;
 611
 612 singleton       :  singleton '*'
 613                         {
 614                         varlength = true;
 615
 616                         $$ = mkclos( $1 );
 617                         }
 618
 619                 |  singleton '+'
 620                         {
 621                         varlength = true;
 622                         $$ = mkposcl( $1 );
 623                         }
 624
 625                 |  singleton '?'
 626                         {
 627                         varlength = true;
 628                         $$ = mkopt( $1 );
 629                         }
 630
 631                 |  singleton BEGIN_REPEAT_FLEX NUMBER ',' NUMBER END_REPEAT_FLEX
 632                         {
 633                         varlength = true;
 634
 635                         if ( $3 > $5 || $3 < 0 )
 636                                 {
 637                                 synerr( _("bad iteration values") );
 638                                 $$ = $1;
 639                                 }
 640                         else
 641                                 {
 642                                 if ( $3 == 0 )
 643                                         {
 644                                         if ( $5 <= 0 )
 645                                                 {
 646                                                 synerr(
 647                                                 _("bad iteration values") );
 648                                                 $$ = $1;
 649                                                 }
 650                                         else
 651                                                 $$ = mkopt(
 652                                                         mkrep( $1, 1, $5 ) );
 653                                         }
 654                                 else
 655                                         $$ = mkrep( $1, $3, $5 );
 656                                 }
 657                         }
 658
 659                 |  singleton BEGIN_REPEAT_FLEX NUMBER ',' END_REPEAT_FLEX
 660                         {
 661                         varlength = true;
 662
 663                         if ( $3 <= 0 )
 664                                 {
 665                                 synerr( _("iteration value must be positive") );
 666                                 $$ = $1;
 667                                 }
 668
 669                         else
 670                                 $$ = mkrep( $1, $3, INFINITE_REPEAT );
 671                         }
 672
 673                 |  singleton BEGIN_REPEAT_FLEX NUMBER END_REPEAT_FLEX
 674                         {
 675                         /* The singleton could be something like "(foo)",
 676                          * in which case we have no idea what its length
 677                          * is, so we punt here.
 678                          */
 679                         varlength = true;
 680
 681                         if ( $3 <= 0 )
 682                                 {
 683                                 synerr( _("iteration value must be positive") );
 684                                 $$ = $1;
 685                                 }
 686
 687                         else
 688                                 $$ = link_machines( $1,
 689                                                 copysingl( $1, $3 - 1 ) );
 690                         }
 691
 692                 |  '.'
 693                         {
 694                         if ( ! madeany )
 695                                 {
 696                                 /* Create the '.' character class. */
 697                     ccldot = cclinit();
 698                     ccladd( ccldot, '\n' );
 699                     cclnegate( ccldot );
 700
 701                     if ( useecs )
 702                         mkeccl( ccltbl + cclmap[ccldot],
 703                             ccllen[ccldot], nextecm,
 704                             ecgroup, csize, csize );
 705
 706                                 /* Create the (?s:'.') character class. */
 707                     cclany = cclinit();
 708                     cclnegate( cclany );
 709
 710                     if ( useecs )
 711                         mkeccl( ccltbl + cclmap[cclany],
 712                             ccllen[cclany], nextecm,
 713                             ecgroup, csize, csize );
 714
 715                                 madeany = true;
 716                                 }
 717
 718                         ++rulelen;
 719
 720             if (sf_dot_all())
 721                 $$ = mkstate( -cclany );
 722             else
 723                 $$ = mkstate( -ccldot );
 724                         }
 725
 726                 |  fullccl
 727                         {
 728                                 /* Sort characters for fast searching.
 729                                  */
 730                                 qsort( ccltbl + cclmap[$1], ccllen[$1], sizeof (*ccltbl), cclcmp );
 731
 732                         if ( useecs )
 733                                 mkeccl( ccltbl + cclmap[$1], ccllen[$1],
 734                                         nextecm, ecgroup, csize, csize );
 735
 736                         ++rulelen;
 737
 738                         if (ccl_has_nl[$1])
 739                                 rule_has_nl[num_rules] = true;
 740
 741                         $$ = mkstate( -$1 );
 742                         }
 743
 744                 |  PREVCCL
 745                         {
 746                         ++rulelen;
 747
 748                         if (ccl_has_nl[$1])
 749                                 rule_has_nl[num_rules] = true;
 750
 751                         $$ = mkstate( -$1 );
 752                         }
 753
 754                 |  '"' string '"'
 755                         { $$ = $2; }
 756
 757                 |  '(' re ')'
 758                         { $$ = $2; }
 759
 760                 |  CHAR
 761                         {
 762                         ++rulelen;
 763
 764                         if ($1 == nlch)
 765                                 rule_has_nl[num_rules] = true;
 766
 767             if (sf_case_ins() && has_case($1))
 768                 /* create an alternation, as in (a|A) */
 769                 $$ = mkor (mkstate($1), mkstate(reverse_case($1)));
 770             else
 771                 $$ = mkstate( $1 );
 772                         }
 773                 ;
 774 fullccl:
 775         fullccl CCL_OP_DIFF  braceccl  { $$ = ccl_set_diff  ($1, $3); }
 776     |   fullccl CCL_OP_UNION braceccl  { $$ = ccl_set_union ($1, $3); }
 777     |   braceccl
 778     ;
 779
 780 braceccl:
 781
 782             '[' ccl ']' { $$ = $2; }
 783
 784                 |  '[' '^' ccl ']'
 785                         {
 786                         cclnegate( $3 );
 787                         $$ = $3;
 788                         }
 789                 ;
 790
 791 ccl             :  ccl CHAR '-' CHAR
 792                         {
 793
 794                         if (sf_case_ins())
 795                           {
 796
 797                             /* If one end of the range has case and the other
 798                              * does not, or the cases are different, then we're not
 799                              * sure what range the user is trying to express.
 800                              * Examples: [@-z] or [S-t]
 801                              */
 802                             if (has_case ($2) != has_case ($4)
 803                                      || (has_case ($2) && (b_islower ($2) != b_islower ($4)))
 804                                      || (has_case ($2) && (b_isupper ($2) != b_isupper ($4))))
 805                               format_warn3 (
 806                               _("the character range [%c-%c] is ambiguous in a case-insensitive scanner"),
 807                                             $2, $4);
 808
 809                             /* If the range spans uppercase characters but not
 810                              * lowercase (or vice-versa), then should we automatically
 811                              * include lowercase characters in the range?
 812                              * Example: [@-_] spans [a-z] but not [A-Z]
 813                              */
 814                             else if (!has_case ($2) && !has_case ($4) && !range_covers_case ($2, $4))
 815                               format_warn3 (
 816                               _("the character range [%c-%c] is ambiguous in a case-insensitive scanner"),
 817                                             $2, $4);
 818                           }
 819
 820                         if ( $2 > $4 )
 821                                 synerr( _("negative range in character class") );
 822
 823                         else
 824                                 {
 825                                 for ( i = $2; i <= $4; ++i )
 826                                         ccladd( $1, i );
 827
 828                                 /* Keep track if this ccl is staying in
 829                                  * alphabetical order.
 830                                  */
 831                                 cclsorted = cclsorted && ($2 > lastchar);
 832                                 lastchar = $4;
 833
 834                 /* Do it again for upper/lowercase */
 835                 if (sf_case_ins() && has_case($2) && has_case($4)){
 836                     $2 = reverse_case ($2);
 837                     $4 = reverse_case ($4);
 838
 839                     for ( i = $2; i <= $4; ++i )
 840                         ccladd( $1, i );
 841
 842                     cclsorted = cclsorted && ($2 > lastchar);
 843                     lastchar = $4;
 844                 }
 845
 846                                 }
 847
 848                         $$ = $1;
 849                         }
 850
 851                 |  ccl CHAR
 852                         {
 853                         ccladd( $1, $2 );
 854                         cclsorted = cclsorted && ($2 > lastchar);
 855                         lastchar = $2;
 856
 857             /* Do it again for upper/lowercase */
 858             if (sf_case_ins() && has_case($2)){
 859                 $2 = reverse_case ($2);
 860                 ccladd ($1, $2);
 861
 862                 cclsorted = cclsorted && ($2 > lastchar);
 863                 lastchar = $2;
 864             }
 865
 866                         $$ = $1;
 867                         }
 868
 869                 |  ccl ccl_expr
 870                         {
 871                         /* Too hard to properly maintain cclsorted. */
 872                         cclsorted = false;
 873                         $$ = $1;
 874                         }
 875
 876                 |
 877                         {
 878                         cclsorted = true;
 879                         lastchar = 0;
 880                         currccl = $$ = cclinit();
 881                         }
 882                 ;
 883
 884 ccl_expr:
 885            CCE_ALNUM    { CCL_EXPR(isalnum); }
 886                 |  CCE_ALPHA    { CCL_EXPR(isalpha); }
 887                 |  CCE_BLANK    { CCL_EXPR(IS_BLANK); }
 888                 |  CCE_CNTRL    { CCL_EXPR(iscntrl); }
 889                 |  CCE_DIGIT    { CCL_EXPR(isdigit); }
 890                 |  CCE_GRAPH    { CCL_EXPR(isgraph); }
 891                 |  CCE_LOWER    {
 892                           CCL_EXPR(islower);
 893                           if (sf_case_ins())
 894                               CCL_EXPR(isupper);
 895                         }
 896                 |  CCE_PRINT    { CCL_EXPR(isprint); }
 897                 |  CCE_PUNCT    { CCL_EXPR(ispunct); }
 898                 |  CCE_SPACE    { CCL_EXPR(isspace); }
 899                 |  CCE_XDIGIT   { CCL_EXPR(isxdigit); }
 900                 |  CCE_UPPER    {
 901                     CCL_EXPR(isupper);
 902                     if (sf_case_ins())
 903                         CCL_EXPR(islower);
 904                                 }
 905
 906         |  CCE_NEG_ALNUM        { CCL_NEG_EXPR(isalnum); }
 907                 |  CCE_NEG_ALPHA        { CCL_NEG_EXPR(isalpha); }
 908                 |  CCE_NEG_BLANK        { CCL_NEG_EXPR(IS_BLANK); }
 909                 |  CCE_NEG_CNTRL        { CCL_NEG_EXPR(iscntrl); }
 910                 |  CCE_NEG_DIGIT        { CCL_NEG_EXPR(isdigit); }
 911                 |  CCE_NEG_GRAPH        { CCL_NEG_EXPR(isgraph); }
 912                 |  CCE_NEG_PRINT        { CCL_NEG_EXPR(isprint); }
 913                 |  CCE_NEG_PUNCT        { CCL_NEG_EXPR(ispunct); }
 914                 |  CCE_NEG_SPACE        { CCL_NEG_EXPR(isspace); }
 915                 |  CCE_NEG_XDIGIT       { CCL_NEG_EXPR(isxdigit); }
 916                 |  CCE_NEG_LOWER        {
 917                                 if ( sf_case_ins() )
 918                                         lwarn(_("[:^lower:] is ambiguous in case insensitive scanner"));
 919                                 else
 920                                         CCL_NEG_EXPR(islower);
 921                                 }
 922                 |  CCE_NEG_UPPER        {
 923                                 if ( sf_case_ins() )
 924                                         lwarn(_("[:^upper:] ambiguous in case insensitive scanner"));
 925                                 else
 926                                         CCL_NEG_EXPR(isupper);
 927                                 }
 928                 ;
 929
 930 string          :  string CHAR
 931                         {
 932                         if ( $2 == nlch )
 933                                 rule_has_nl[num_rules] = true;
 934
 935                         ++rulelen;
 936
 937             if (sf_case_ins() && has_case($2))
 938                 $$ = mkor (mkstate($2), mkstate(reverse_case($2)));
 939             else
 940                 $$ = mkstate ($2);
 941
 942                         $$ = link_machines( $1, $$);
 943                         }
 944
 945                 |
 946                         { $$ = mkstate( SYM_EPSILON ); }
 947                 ;
 948
 949 %%
 950
 951
 952 /* build_eof_action - build the "<<EOF>>" action for the active start
 953  *                    conditions
 954  */
 955
 956 void build_eof_action()
 957         {
 958         register int i;
 959         char action_text[MAXLINE];
 960
 961         for ( i = 1; i <= scon_stk_ptr; ++i )
 962                 {
 963                 if ( sceof[scon_stk[i]] )
 964                         format_pinpoint_message(
 965                                 "multiple <<EOF>> rules for start condition %s",
 966                                 scname[scon_stk[i]] );
 967
 968                 else
 969                         {
 970                         sceof[scon_stk[i]] = true;
 971
 972                         if (previous_continued_action /* && previous action was regular */)
 973                                 add_action("YY_RULE_SETUP\n");
 974
 975                         snprintf( action_text, sizeof(action_text), "case YY_STATE_EOF(%s):\n",
 976                                 scname[scon_stk[i]] );
 977                         add_action( action_text );
 978                         }
 979                 }
 980
 981         line_directive_out( (FILE *) 0, 1 );
 982
 983         /* This isn't a normal rule after all - don't count it as
 984          * such, so we don't have any holes in the rule numbering
 985          * (which make generating "rule can never match" warnings
 986          * more difficult.
 987          */
 988         --num_rules;
 989         ++num_eof_rules;
 990         }
 991
 992
 993 /* format_synerr - write out formatted syntax error */
 994
 995 void format_synerr( msg, arg )
 996 const char *msg, arg[];
 997         {
 998         char errmsg[MAXLINE];
 999
1000         (void) snprintf( errmsg, sizeof(errmsg), msg, arg );
1001         synerr( errmsg );
1002         }
1003
1004
1005 /* synerr - report a syntax error */
1006
1007 void synerr( str )
1008 const char *str;
1009         {
1010         syntaxerror = true;
1011         pinpoint_message( str );
1012         }
1013
1014
1015 /* format_warn - write out formatted warning */
1016
1017 void format_warn( msg, arg )
1018 const char *msg, arg[];
1019         {
1020         char warn_msg[MAXLINE];
1021
1022         snprintf( warn_msg, sizeof(warn_msg), msg, arg );
1023         lwarn( warn_msg );
1024         }
1025
1026
1027 /* lwarn - report a warning, unless -w was given */
1028
1029 void lwarn( str )
1030 const char *str;
1031         {
1032         line_warning( str, linenum );
1033         }
1034
1035 /* format_pinpoint_message - write out a message formatted with one string,
1036  *                           pinpointing its location
1037  */
1038
1039 void format_pinpoint_message( msg, arg )
1040 const char *msg, arg[];
1041         {
1042         char errmsg[MAXLINE];
1043
1044         snprintf( errmsg, sizeof(errmsg), msg, arg );
1045         pinpoint_message( errmsg );
1046         }
1047
1048
1049 /* pinpoint_message - write out a message, pinpointing its location */
1050
1051 void pinpoint_message( str )
1052 const char *str;
1053         {
1054         line_pinpoint( str, linenum );
1055         }
1056
1057
1058 /* line_warning - report a warning at a given line, unless -w was given */
1059
1060 void line_warning( str, line )
1061 const char *str;
1062 int line;
1063         {
1064         char warning[MAXLINE];
1065
1066         if ( ! nowarn )
1067                 {
1068                 snprintf( warning, sizeof(warning), "warning, %s", str );
1069                 line_pinpoint( warning, line );
1070                 }
1071         }
1072
1073
1074 /* line_pinpoint - write out a message, pinpointing it at the given line */
1075
1076 void line_pinpoint( str, line )
1077 const char *str;
1078 int line;
1079         {
1080         fprintf( stderr, "%s:%d: %s\n", infilename, line, str );
1081         }
1082
1083
1084 /* yyerror - eat up an error message from the parser;
1085  *           currently, messages are ignore
1086  */
1087
1088 void yyerror( msg )
1089 const char *msg;
1090         {
1091         }