external/bsd/flex/dist/parse.y

   1 /*      $NetBSD: parse.y,v 1.5 2014/10/30 18:44:05 christos Exp $       */
   2
   3 /* parse.y - parser for flex input */
   4
   5 %token CHAR NUMBER SECTEND SCDECL XSCDECL NAME PREVCCL EOF_OP
   6 %token OPTION_OP OPT_OUTFILE OPT_PREFIX OPT_YYCLASS OPT_HEADER OPT_EXTRA_TYPE
   7 %token OPT_TABLES
   8
   9 %token CCE_ALNUM CCE_ALPHA CCE_BLANK CCE_CNTRL CCE_DIGIT CCE_GRAPH
  10 %token CCE_LOWER CCE_PRINT CCE_PUNCT CCE_SPACE CCE_UPPER CCE_XDIGIT
  11
  12 %token CCE_NEG_ALNUM CCE_NEG_ALPHA CCE_NEG_BLANK CCE_NEG_CNTRL CCE_NEG_DIGIT CCE_NEG_GRAPH
  13 %token CCE_NEG_LOWER CCE_NEG_PRINT CCE_NEG_PUNCT CCE_NEG_SPACE CCE_NEG_UPPER CCE_NEG_XDIGIT
  14
  15 %left CCL_OP_DIFF CCL_OP_UNION
  16
  17 /*
  18  *POSIX and AT&T lex place the
  19  * precedence of the repeat operator, {}, below that of concatenation.
  20  * Thus, ab{3} is ababab.  Most other POSIX utilities use an Extended
  21  * Regular Expression (ERE) precedence that has the repeat operator
  22  * higher than concatenation.  This causes ab{3} to yield abbb.
  23  *
  24  * In order to support the POSIX and AT&T precedence and the flex
  25  * precedence we define two token sets for the begin and end tokens of
  26  * the repeat operator, '{' and '}'.  The lexical scanner chooses
  27  * which tokens to return based on whether posix_compat or lex_compat
  28  * are specified. Specifying either posix_compat or lex_compat will
  29  * cause flex to parse scanner files as per the AT&T and
  30  * POSIX-mandated behavior.
  31  */
  32
  33 %token BEGIN_REPEAT_POSIX END_REPEAT_POSIX BEGIN_REPEAT_FLEX END_REPEAT_FLEX
  34
  35
  36 %{
  37 /*  Copyright (c) 1990 The Regents of the University of California. */
  38 /*  All rights reserved. */
  39
  40 /*  This code is derived from software contributed to Berkeley by */
  41 /*  Vern Paxson. */
  42
  43 /*  The United States Government has rights in this work pursuant */
  44 /*  to contract no. DE-AC03-76SF00098 between the United States */
  45 /*  Department of Energy and the University of California. */
  46
  47 /*  This file is part of flex. */
  48
  49 /*  Redistribution and use in source and binary forms, with or without */
  50 /*  modification, are permitted provided that the following conditions */
  51 /*  are met: */
  52
  53 /*  1. Redistributions of source code must retain the above copyright */
  54 /*     notice, this list of conditions and the following disclaimer. */
  55 /*  2. Redistributions in binary form must reproduce the above copyright */
  56 /*     notice, this list of conditions and the following disclaimer in the */
  57 /*     documentation and/or other materials provided with the distribution. */
  58
  59 /*  Neither the name of the University nor the names of its contributors */
  60 /*  may be used to endorse or promote products derived from this software */
  61 /*  without specific prior written permission. */
  62
  63 /*  THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR */
  64 /*  IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED */
  65 /*  WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR */
  66 /*  PURPOSE. */
  67 #include "flexdef.h"
  68 __RCSID("$NetBSD: parse.y,v 1.5 2014/10/30 18:44:05 christos Exp $");
  69
  70 #include "tables.h"
  71
  72 int pat, scnum, eps, headcnt, trailcnt, lastchar, i, rulelen;
  73 int trlcontxt, xcluflg, currccl, cclsorted, varlength, variable_trail_rule;
  74
  75 int *scon_stk;
  76 int scon_stk_ptr;
  77
  78 static int madeany = false;  /* whether we've made the '.' character class */
  79 static int ccldot, cclany;
  80 int previous_continued_action;  /* whether the previous rule's action was '|' */
  81
  82 #define format_warn3(fmt, a1, a2) \
  83         do{ \
  84         char fw3_msg[MAXLINE];\
  85         snprintf( fw3_msg, MAXLINE,(fmt), (a1), (a2) );\
  86         lwarn( fw3_msg );\
  87         }while(0)
  88
  89 /* Expand a POSIX character class expression. */
  90 #define CCL_EXPR(func) \
  91         do{ \
  92         int c; \
  93         for ( c = 0; c < csize; ++c ) \
  94                 if ( isascii(c) && func(c) ) \
  95                         ccladd( currccl, c ); \
  96         }while(0)
  97
  98 /* negated class */
  99 #define CCL_NEG_EXPR(func) \
 100         do{ \
 101         int c; \
 102         for ( c = 0; c < csize; ++c ) \
 103                 if ( !func(c) ) \
 104                         ccladd( currccl, c ); \
 105         }while(0)
 106
 107 /* While POSIX defines isblank(), it's not ANSI C. */
 108 #define IS_BLANK(c) ((c) == ' ' || (c) == '\t')
 109
 110 /* On some over-ambitious machines, such as DEC Alpha's, the default
 111  * token type is "long" instead of "int"; this leads to problems with
 112  * declaring yylval in flexdef.h.  But so far, all the yacc's I've seen
 113  * wrap their definitions of YYSTYPE with "#ifndef YYSTYPE"'s, so the
 114  * following should ensure that the default token type is "int".
 115  */
 116 #define YYSTYPE int
 117
 118 %}
 119
 120 %%
 121 goal            :  initlex sect1 sect1end sect2 initforrule
 122                         { /* add default rule */
 123                         int def_rule;
 124
 125                         pat = cclinit();
 126                         cclnegate( pat );
 127
 128                         def_rule = mkstate( -pat );
 129
 130                         /* Remember the number of the default rule so we
 131                          * don't generate "can't match" warnings for it.
 132                          */
 133                         default_rule = num_rules;
 134
 135                         finish_rule( def_rule, false, 0, 0, 0);
 136
 137                         for ( i = 1; i <= lastsc; ++i )
 138                                 scset[i] = mkbranch( scset[i], def_rule );
 139
 140                         if ( spprdflt )
 141                                 add_action(
 142                                 "YY_FATAL_ERROR( \"flex scanner jammed\" )" );
 143                         else
 144                                 add_action( "ECHO" );
 145
 146                         add_action( ";\n\tYY_BREAK\n" );
 147                         }
 148                 ;
 149
 150 initlex         :
 151                         { /* initialize for processing rules */
 152
 153                         /* Create default DFA start condition. */
 154                         scinstal( "INITIAL", false );
 155                         }
 156                 ;
 157
 158 sect1           :  sect1 startconddecl namelist1
 159                 |  sect1 options
 160                 |
 161                 |  error
 162                         { synerr( _("unknown error processing section 1") ); }
 163                 ;
 164
 165 sect1end        :  SECTEND
 166                         {
 167                         check_options();
 168                         scon_stk = allocate_integer_array( lastsc + 1 );
 169                         scon_stk_ptr = 0;
 170                         }
 171                 ;
 172
 173 startconddecl   :  SCDECL
 174                         { xcluflg = false; }
 175
 176                 |  XSCDECL
 177                         { xcluflg = true; }
 178                 ;
 179
 180 namelist1       :  namelist1 NAME
 181                         { scinstal( nmstr, xcluflg ); }
 182
 183                 |  NAME
 184                         { scinstal( nmstr, xcluflg ); }
 185
 186                 |  error
 187                         { synerr( _("bad start condition list") ); }
 188                 ;
 189
 190 options         :  OPTION_OP optionlist
 191                 ;
 192
 193 optionlist      :  optionlist option
 194                 |
 195                 ;
 196
 197 option          :  OPT_OUTFILE '=' NAME
 198                         {
 199                         outfilename = copy_string( nmstr );
 200                         did_outfilename = 1;
 201                         }
 202                 |  OPT_EXTRA_TYPE '=' NAME
 203                         { extra_type = copy_string( nmstr ); }
 204                 |  OPT_PREFIX '=' NAME
 205                         { prefix = copy_string( nmstr ); }
 206                 |  OPT_YYCLASS '=' NAME
 207                         { yyclass = copy_string( nmstr ); }
 208                 |  OPT_HEADER '=' NAME
 209                         { headerfilename = copy_string( nmstr ); }
 210             |  OPT_TABLES '=' NAME
 211             { tablesext = true; tablesfilename = copy_string( nmstr ); }
 212                 ;
 213
 214 sect2           :  sect2 scon initforrule flexrule '\n'
 215                         { scon_stk_ptr = $2; }
 216                 |  sect2 scon '{' sect2 '}'
 217                         { scon_stk_ptr = $2; }
 218                 |
 219                 ;
 220
 221 initforrule     :
 222                         {
 223                         /* Initialize for a parse of one rule. */
 224                         trlcontxt = variable_trail_rule = varlength = false;
 225                         trailcnt = headcnt = rulelen = 0;
 226                         current_state_type = STATE_NORMAL;
 227                         previous_continued_action = continued_action;
 228                         in_rule = true;
 229
 230                         new_rule();
 231                         }
 232                 ;
 233
 234 flexrule        :  '^' rule
 235                         {
 236                         pat = $2;
 237                         finish_rule( pat, variable_trail_rule,
 238                                 headcnt, trailcnt , previous_continued_action);
 239
 240                         if ( scon_stk_ptr > 0 )
 241                                 {
 242                                 for ( i = 1; i <= scon_stk_ptr; ++i )
 243                                         scbol[scon_stk[i]] =
 244                                                 mkbranch( scbol[scon_stk[i]],
 245                                                                 pat );
 246                                 }
 247
 248                         else
 249                                 {
 250                                 /* Add to all non-exclusive start conditions,
 251                                  * including the default (0) start condition.
 252                                  */
 253
 254                                 for ( i = 1; i <= lastsc; ++i )
 255                                         if ( ! scxclu[i] )
 256                                                 scbol[i] = mkbranch( scbol[i],
 257                                                                         pat );
 258                                 }
 259
 260                         if ( ! bol_needed )
 261                                 {
 262                                 bol_needed = true;
 263
 264                                 if ( performance_report > 1 )
 265                                         pinpoint_message(
 266                         "'^' operator results in sub-optimal performance" );
 267                                 }
 268                         }
 269
 270                 |  rule
 271                         {
 272                         pat = $1;
 273                         finish_rule( pat, variable_trail_rule,
 274                                 headcnt, trailcnt , previous_continued_action);
 275
 276                         if ( scon_stk_ptr > 0 )
 277                                 {
 278                                 for ( i = 1; i <= scon_stk_ptr; ++i )
 279                                         scset[scon_stk[i]] =
 280                                                 mkbranch( scset[scon_stk[i]],
 281                                                                 pat );
 282                                 }
 283
 284                         else
 285                                 {
 286                                 for ( i = 1; i <= lastsc; ++i )
 287                                         if ( ! scxclu[i] )
 288                                                 scset[i] =
 289                                                         mkbranch( scset[i],
 290                                                                 pat );
 291                                 }
 292                         }
 293
 294                 |  EOF_OP
 295                         {
 296                         if ( scon_stk_ptr > 0 )
 297                                 build_eof_action();
 298
 299                         else
 300                                 {
 301                                 /* This EOF applies to all start conditions
 302                                  * which don't already have EOF actions.
 303                                  */
 304                                 for ( i = 1; i <= lastsc; ++i )
 305                                         if ( ! sceof[i] )
 306                                                 scon_stk[++scon_stk_ptr] = i;
 307
 308                                 if ( scon_stk_ptr == 0 )
 309                                         lwarn(
 310                         "all start conditions already have <<EOF>> rules" );
 311
 312                                 else
 313                                         build_eof_action();
 314                                 }
 315                         }
 316
 317                 |  error
 318                         { synerr( _("unrecognized rule") ); }
 319                 ;
 320
 321 scon_stk_ptr    :
 322                         { $$ = scon_stk_ptr; }
 323                 ;
 324
 325 scon            :  '<' scon_stk_ptr namelist2 '>'
 326                         { $$ = $2; }
 327
 328                 |  '<' '*' '>'
 329                         {
 330                         $$ = scon_stk_ptr;
 331
 332                         for ( i = 1; i <= lastsc; ++i )
 333                                 {
 334                                 int j;
 335
 336                                 for ( j = 1; j <= scon_stk_ptr; ++j )
 337                                         if ( scon_stk[j] == i )
 338                                                 break;
 339
 340                                 if ( j > scon_stk_ptr )
 341                                         scon_stk[++scon_stk_ptr] = i;
 342                                 }
 343                         }
 344
 345                 |
 346                         { $$ = scon_stk_ptr; }
 347                 ;
 348
 349 namelist2       :  namelist2 ',' sconname
 350
 351                 |  sconname
 352
 353                 |  error
 354                         { synerr( _("bad start condition list") ); }
 355                 ;
 356
 357 sconname        :  NAME
 358                         {
 359                         if ( (scnum = sclookup( nmstr )) == 0 )
 360                                 format_pinpoint_message(
 361                                         "undeclared start condition %s",
 362                                         nmstr );
 363                         else
 364                                 {
 365                                 for ( i = 1; i <= scon_stk_ptr; ++i )
 366                                         if ( scon_stk[i] == scnum )
 367                                                 {
 368                                                 format_warn(
 369                                                         "<%s> specified twice",
 370                                                         scname[scnum] );
 371                                                 break;
 372                                                 }
 373
 374                                 if ( i > scon_stk_ptr )
 375                                         scon_stk[++scon_stk_ptr] = scnum;
 376                                 }
 377                         }
 378                 ;
 379
 380 rule            :  re2 re
 381                         {
 382                         if ( transchar[lastst[$2]] != SYM_EPSILON )
 383                                 /* Provide final transition \now/ so it
 384                                  * will be marked as a trailing context
 385                                  * state.
 386                                  */
 387                                 $2 = link_machines( $2,
 388                                                 mkstate( SYM_EPSILON ) );
 389
 390                         mark_beginning_as_normal( $2 );
 391                         current_state_type = STATE_NORMAL;
 392
 393                         if ( previous_continued_action )
 394                                 {
 395                                 /* We need to treat this as variable trailing
 396                                  * context so that the backup does not happen
 397                                  * in the action but before the action switch
 398                                  * statement.  If the backup happens in the
 399                                  * action, then the rules "falling into" this
 400                                  * one's action will *also* do the backup,
 401                                  * erroneously.
 402                                  */
 403                                 if ( ! varlength || headcnt != 0 )
 404                                         lwarn(
 405                 "trailing context made variable due to preceding '|' action" );
 406
 407                                 /* Mark as variable. */
 408                                 varlength = true;
 409                                 headcnt = 0;
 410
 411                                 }
 412
 413                         if ( lex_compat || (varlength && headcnt == 0) )
 414                                 { /* variable trailing context rule */
 415                                 /* Mark the first part of the rule as the
 416                                  * accepting "head" part of a trailing
 417                                  * context rule.
 418                                  *
 419                                  * By the way, we didn't do this at the
 420                                  * beginning of this production because back
 421                                  * then current_state_type was set up for a
 422                                  * trail rule, and add_accept() can create
 423                                  * a new state ...
 424                                  */
 425                                 add_accept( $1,
 426                                         num_rules | YY_TRAILING_HEAD_MASK );
 427                                 variable_trail_rule = true;
 428                                 }
 429
 430                         else
 431                                 trailcnt = rulelen;
 432
 433                         $$ = link_machines( $1, $2 );
 434                         }
 435
 436                 |  re2 re '$'
 437                         { synerr( _("trailing context used twice") ); }
 438
 439                 |  re '$'
 440                         {
 441                         headcnt = 0;
 442                         trailcnt = 1;
 443                         rulelen = 1;
 444                         varlength = false;
 445
 446                         current_state_type = STATE_TRAILING_CONTEXT;
 447
 448                         if ( trlcontxt )
 449                                 {
 450                                 synerr( _("trailing context used twice") );
 451                                 $$ = mkstate( SYM_EPSILON );
 452                                 }
 453
 454                         else if ( previous_continued_action )
 455                                 {
 456                                 /* See the comment in the rule for "re2 re"
 457                                  * above.
 458                                  */
 459                                 lwarn(
 460                 "trailing context made variable due to preceding '|' action" );
 461
 462                                 varlength = true;
 463                                 }
 464
 465                         if ( lex_compat || varlength )
 466                                 {
 467                                 /* Again, see the comment in the rule for
 468                                  * "re2 re" above.
 469                                  */
 470                                 add_accept( $1,
 471                                         num_rules | YY_TRAILING_HEAD_MASK );
 472                                 variable_trail_rule = true;
 473                                 }
 474
 475                         trlcontxt = true;
 476
 477                         eps = mkstate( SYM_EPSILON );
 478                         $$ = link_machines( $1,
 479                                 link_machines( eps, mkstate( '\n' ) ) );
 480                         }
 481
 482                 |  re
 483                         {
 484                         $$ = $1;
 485
 486                         if ( trlcontxt )
 487                                 {
 488                                 if ( lex_compat || (varlength && headcnt == 0) )
 489                                         /* Both head and trail are
 490                                          * variable-length.
 491                                          */
 492                                         variable_trail_rule = true;
 493                                 else
 494                                         trailcnt = rulelen;
 495                                 }
 496                         }
 497                 ;
 498
 499
 500 re              :  re '|' series
 501                         {
 502                         varlength = true;
 503                         $$ = mkor( $1, $3 );
 504                         }
 505
 506                 |  series
 507                         { $$ = $1; }
 508                 ;
 509
 510
 511 re2             :  re '/'
 512                         {
 513                         /* This rule is written separately so the
 514                          * reduction will occur before the trailing
 515                          * series is parsed.
 516                          */
 517
 518                         if ( trlcontxt )
 519                                 synerr( _("trailing context used twice") );
 520                         else
 521                                 trlcontxt = true;
 522
 523                         if ( varlength )
 524                                 /* We hope the trailing context is
 525                                  * fixed-length.
 526                                  */
 527                                 varlength = false;
 528                         else
 529                                 headcnt = rulelen;
 530
 531                         rulelen = 0;
 532
 533                         current_state_type = STATE_TRAILING_CONTEXT;
 534                         $$ = $1;
 535                         }
 536                 ;
 537
 538 series          :  series singleton
 539                         {
 540                         /* This is where concatenation of adjacent patterns
 541                          * gets done.
 542                          */
 543                         $$ = link_machines( $1, $2 );
 544                         }
 545
 546                 |  singleton
 547                         { $$ = $1; }
 548
 549                 |  series BEGIN_REPEAT_POSIX NUMBER ',' NUMBER END_REPEAT_POSIX
 550                         {
 551                         varlength = true;
 552
 553                         if ( $3 > $5 || $3 < 0 )
 554                                 {
 555                                 synerr( _("bad iteration values") );
 556                                 $$ = $1;
 557                                 }
 558                         else
 559                                 {
 560                                 if ( $3 == 0 )
 561                                         {
 562                                         if ( $5 <= 0 )
 563                                                 {
 564                                                 synerr(
 565                                                 _("bad iteration values") );
 566                                                 $$ = $1;
 567                                                 }
 568                                         else
 569                                                 $$ = mkopt(
 570                                                         mkrep( $1, 1, $5 ) );
 571                                         }
 572                                 else
 573                                         $$ = mkrep( $1, $3, $5 );
 574                                 }
 575                         }
 576
 577                 |  series BEGIN_REPEAT_POSIX NUMBER ',' END_REPEAT_POSIX
 578                         {
 579                         varlength = true;
 580
 581                         if ( $3 <= 0 )
 582                                 {
 583                                 synerr( _("iteration value must be positive") );
 584                                 $$ = $1;
 585                                 }
 586
 587                         else
 588                                 $$ = mkrep( $1, $3, INFINITE_REPEAT );
 589                         }
 590
 591                 |  series BEGIN_REPEAT_POSIX NUMBER END_REPEAT_POSIX
 592                         {
 593                         /* The series could be something like "(foo)",
 594                          * in which case we have no idea what its length
 595                          * is, so we punt here.
 596                          */
 597                         varlength = true;
 598
 599                         if ( $3 <= 0 )
 600                                 {
 601                                   synerr( _("iteration value must be positive")
 602                                           );
 603                                 $$ = $1;
 604                                 }
 605
 606                         else
 607                                 $$ = link_machines( $1,
 608                                                 copysingl( $1, $3 - 1 ) );
 609                         }
 610
 611                 ;
 612
 613 singleton       :  singleton '*'
 614                         {
 615                         varlength = true;
 616
 617                         $$ = mkclos( $1 );
 618                         }
 619
 620                 |  singleton '+'
 621                         {
 622                         varlength = true;
 623                         $$ = mkposcl( $1 );
 624                         }
 625
 626                 |  singleton '?'
 627                         {
 628                         varlength = true;
 629                         $$ = mkopt( $1 );
 630                         }
 631
 632                 |  singleton BEGIN_REPEAT_FLEX NUMBER ',' NUMBER END_REPEAT_FLEX
 633                         {
 634                         varlength = true;
 635
 636                         if ( $3 > $5 || $3 < 0 )
 637                                 {
 638                                 synerr( _("bad iteration values") );
 639                                 $$ = $1;
 640                                 }
 641                         else
 642                                 {
 643                                 if ( $3 == 0 )
 644                                         {
 645                                         if ( $5 <= 0 )
 646                                                 {
 647                                                 synerr(
 648                                                 _("bad iteration values") );
 649                                                 $$ = $1;
 650                                                 }
 651                                         else
 652                                                 $$ = mkopt(
 653                                                         mkrep( $1, 1, $5 ) );
 654                                         }
 655                                 else
 656                                         $$ = mkrep( $1, $3, $5 );
 657                                 }
 658                         }
 659
 660                 |  singleton BEGIN_REPEAT_FLEX NUMBER ',' END_REPEAT_FLEX
 661                         {
 662                         varlength = true;
 663
 664                         if ( $3 <= 0 )
 665                                 {
 666                                 synerr( _("iteration value must be positive") );
 667                                 $$ = $1;
 668                                 }
 669
 670                         else
 671                                 $$ = mkrep( $1, $3, INFINITE_REPEAT );
 672                         }
 673
 674                 |  singleton BEGIN_REPEAT_FLEX NUMBER END_REPEAT_FLEX
 675                         {
 676                         /* The singleton could be something like "(foo)",
 677                          * in which case we have no idea what its length
 678                          * is, so we punt here.
 679                          */
 680                         varlength = true;
 681
 682                         if ( $3 <= 0 )
 683                                 {
 684                                 synerr( _("iteration value must be positive") );
 685                                 $$ = $1;
 686                                 }
 687
 688                         else
 689                                 $$ = link_machines( $1,
 690                                                 copysingl( $1, $3 - 1 ) );
 691                         }
 692
 693                 |  '.'
 694                         {
 695                         if ( ! madeany )
 696                                 {
 697                                 /* Create the '.' character class. */
 698                     ccldot = cclinit();
 699                     ccladd( ccldot, '\n' );
 700                     cclnegate( ccldot );
 701
 702                     if ( useecs )
 703                         mkeccl( ccltbl + cclmap[ccldot],
 704                             ccllen[ccldot], nextecm,
 705                             ecgroup, csize, csize );
 706
 707                                 /* Create the (?s:'.') character class. */
 708                     cclany = cclinit();
 709                     cclnegate( cclany );
 710
 711                     if ( useecs )
 712                         mkeccl( ccltbl + cclmap[cclany],
 713                             ccllen[cclany], nextecm,
 714                             ecgroup, csize, csize );
 715
 716                                 madeany = true;
 717                                 }
 718
 719                         ++rulelen;
 720
 721             if (sf_dot_all())
 722                 $$ = mkstate( -cclany );
 723             else
 724                 $$ = mkstate( -ccldot );
 725                         }
 726
 727                 |  fullccl
 728                         {
 729                                 /* Sort characters for fast searching.
 730                                  */
 731                                 qsort( ccltbl + cclmap[$1], ccllen[$1], sizeof (*ccltbl), cclcmp );
 732
 733                         if ( useecs )
 734                                 mkeccl( ccltbl + cclmap[$1], ccllen[$1],
 735                                         nextecm, ecgroup, csize, csize );
 736
 737                         ++rulelen;
 738
 739                         if (ccl_has_nl[$1])
 740                                 rule_has_nl[num_rules] = true;
 741
 742                         $$ = mkstate( -$1 );
 743                         }
 744
 745                 |  PREVCCL
 746                         {
 747                         ++rulelen;
 748
 749                         if (ccl_has_nl[$1])
 750                                 rule_has_nl[num_rules] = true;
 751
 752                         $$ = mkstate( -$1 );
 753                         }
 754
 755                 |  '"' string '"'
 756                         { $$ = $2; }
 757
 758                 |  '(' re ')'
 759                         { $$ = $2; }
 760
 761                 |  CHAR
 762                         {
 763                         ++rulelen;
 764
 765                         if ($1 == nlch)
 766                                 rule_has_nl[num_rules] = true;
 767
 768             if (sf_case_ins() && has_case($1))
 769                 /* create an alternation, as in (a|A) */
 770                 $$ = mkor (mkstate($1), mkstate(reverse_case($1)));
 771             else
 772                 $$ = mkstate( $1 );
 773                         }
 774                 ;
 775 fullccl:
 776         fullccl CCL_OP_DIFF  braceccl  { $$ = ccl_set_diff  ($1, $3); }
 777     |   fullccl CCL_OP_UNION braceccl  { $$ = ccl_set_union ($1, $3); }
 778     |   braceccl
 779     ;
 780
 781 braceccl:
 782
 783             '[' ccl ']' { $$ = $2; }
 784
 785                 |  '[' '^' ccl ']'
 786                         {
 787                         cclnegate( $3 );
 788                         $$ = $3;
 789                         }
 790                 ;
 791
 792 ccl             :  ccl CHAR '-' CHAR
 793                         {
 794
 795                         if (sf_case_ins())
 796                           {
 797
 798                             /* If one end of the range has case and the other
 799                              * does not, or the cases are different, then we're not
 800                              * sure what range the user is trying to express.
 801                              * Examples: [@-z] or [S-t]
 802                              */
 803                             if (has_case ($2) != has_case ($4)
 804                                      || (has_case ($2) && (b_islower ($2) != b_islower ($4)))
 805                                      || (has_case ($2) && (b_isupper ($2) != b_isupper ($4))))
 806                               format_warn3 (
 807                               _("the character range [%c-%c] is ambiguous in a case-insensitive scanner"),
 808                                             $2, $4);
 809
 810                             /* If the range spans uppercase characters but not
 811                              * lowercase (or vice-versa), then should we automatically
 812                              * include lowercase characters in the range?
 813                              * Example: [@-_] spans [a-z] but not [A-Z]
 814                              */
 815                             else if (!has_case ($2) && !has_case ($4) && !range_covers_case ($2, $4))
 816                               format_warn3 (
 817                               _("the character range [%c-%c] is ambiguous in a case-insensitive scanner"),
 818                                             $2, $4);
 819                           }
 820
 821                         if ( $2 > $4 )
 822                                 synerr( _("negative range in character class") );
 823
 824                         else
 825                                 {
 826                                 for ( i = $2; i <= $4; ++i )
 827                                         ccladd( $1, i );
 828
 829                                 /* Keep track if this ccl is staying in
 830                                  * alphabetical order.
 831                                  */
 832                                 cclsorted = cclsorted && ($2 > lastchar);
 833                                 lastchar = $4;
 834
 835                 /* Do it again for upper/lowercase */
 836                 if (sf_case_ins() && has_case($2) && has_case($4)){
 837                     $2 = reverse_case ($2);
 838                     $4 = reverse_case ($4);
 839
 840                     for ( i = $2; i <= $4; ++i )
 841                         ccladd( $1, i );
 842
 843                     cclsorted = cclsorted && ($2 > lastchar);
 844                     lastchar = $4;
 845                 }
 846
 847                                 }
 848
 849                         $$ = $1;
 850                         }
 851
 852                 |  ccl CHAR
 853                         {
 854                         ccladd( $1, $2 );
 855                         cclsorted = cclsorted && ($2 > lastchar);
 856                         lastchar = $2;
 857
 858             /* Do it again for upper/lowercase */
 859             if (sf_case_ins() && has_case($2)){
 860                 $2 = reverse_case ($2);
 861                 ccladd ($1, $2);
 862
 863                 cclsorted = cclsorted && ($2 > lastchar);
 864                 lastchar = $2;
 865             }
 866
 867                         $$ = $1;
 868                         }
 869
 870                 |  ccl ccl_expr
 871                         {
 872                         /* Too hard to properly maintain cclsorted. */
 873                         cclsorted = false;
 874                         $$ = $1;
 875                         }
 876
 877                 |
 878                         {
 879                         cclsorted = true;
 880                         lastchar = 0;
 881                         currccl = $$ = cclinit();
 882                         }
 883                 ;
 884
 885 ccl_expr:
 886            CCE_ALNUM    { CCL_EXPR(isalnum); }
 887                 |  CCE_ALPHA    { CCL_EXPR(isalpha); }
 888                 |  CCE_BLANK    { CCL_EXPR(IS_BLANK); }
 889                 |  CCE_CNTRL    { CCL_EXPR(iscntrl); }
 890                 |  CCE_DIGIT    { CCL_EXPR(isdigit); }
 891                 |  CCE_GRAPH    { CCL_EXPR(isgraph); }
 892                 |  CCE_LOWER    {
 893                           CCL_EXPR(islower);
 894                           if (sf_case_ins())
 895                               CCL_EXPR(isupper);
 896                         }
 897                 |  CCE_PRINT    { CCL_EXPR(isprint); }
 898                 |  CCE_PUNCT    { CCL_EXPR(ispunct); }
 899                 |  CCE_SPACE    { CCL_EXPR(isspace); }
 900                 |  CCE_XDIGIT   { CCL_EXPR(isxdigit); }
 901                 |  CCE_UPPER    {
 902                     CCL_EXPR(isupper);
 903                     if (sf_case_ins())
 904                         CCL_EXPR(islower);
 905                                 }
 906
 907         |  CCE_NEG_ALNUM        { CCL_NEG_EXPR(isalnum); }
 908                 |  CCE_NEG_ALPHA        { CCL_NEG_EXPR(isalpha); }
 909                 |  CCE_NEG_BLANK        { CCL_NEG_EXPR(IS_BLANK); }
 910                 |  CCE_NEG_CNTRL        { CCL_NEG_EXPR(iscntrl); }
 911                 |  CCE_NEG_DIGIT        { CCL_NEG_EXPR(isdigit); }
 912                 |  CCE_NEG_GRAPH        { CCL_NEG_EXPR(isgraph); }
 913                 |  CCE_NEG_PRINT        { CCL_NEG_EXPR(isprint); }
 914                 |  CCE_NEG_PUNCT        { CCL_NEG_EXPR(ispunct); }
 915                 |  CCE_NEG_SPACE        { CCL_NEG_EXPR(isspace); }
 916                 |  CCE_NEG_XDIGIT       { CCL_NEG_EXPR(isxdigit); }
 917                 |  CCE_NEG_LOWER        {
 918                                 if ( sf_case_ins() )
 919                                         lwarn(_("[:^lower:] is ambiguous in case insensitive scanner"));
 920                                 else
 921                                         CCL_NEG_EXPR(islower);
 922                                 }
 923                 |  CCE_NEG_UPPER        {
 924                                 if ( sf_case_ins() )
 925                                         lwarn(_("[:^upper:] ambiguous in case insensitive scanner"));
 926                                 else
 927                                         CCL_NEG_EXPR(isupper);
 928                                 }
 929                 ;
 930
 931 string          :  string CHAR
 932                         {
 933                         if ( $2 == nlch )
 934                                 rule_has_nl[num_rules] = true;
 935
 936                         ++rulelen;
 937
 938             if (sf_case_ins() && has_case($2))
 939                 $$ = mkor (mkstate($2), mkstate(reverse_case($2)));
 940             else
 941                 $$ = mkstate ($2);
 942
 943                         $$ = link_machines( $1, $$);
 944                         }
 945
 946                 |
 947                         { $$ = mkstate( SYM_EPSILON ); }
 948                 ;
 949
 950 %%
 951
 952
 953 /* build_eof_action - build the "<<EOF>>" action for the active start
 954  *                    conditions
 955  */
 956
 957 void build_eof_action()
 958         {
 959         register int i;
 960         char action_text[MAXLINE];
 961
 962         for ( i = 1; i <= scon_stk_ptr; ++i )
 963                 {
 964                 if ( sceof[scon_stk[i]] )
 965                         format_pinpoint_message(
 966                                 "multiple <<EOF>> rules for start condition %s",
 967                                 scname[scon_stk[i]] );
 968
 969                 else
 970                         {
 971                         sceof[scon_stk[i]] = true;
 972
 973                         if (previous_continued_action /* && previous action was regular */)
 974                                 add_action("YY_RULE_SETUP\n");
 975
 976                         snprintf( action_text, sizeof(action_text), "case YY_STATE_EOF(%s):\n",
 977                                 scname[scon_stk[i]] );
 978                         add_action( action_text );
 979                         }
 980                 }
 981
 982         line_directive_out( (FILE *) 0, 1 );
 983
 984         /* This isn't a normal rule after all - don't count it as
 985          * such, so we don't have any holes in the rule numbering
 986          * (which make generating "rule can never match" warnings
 987          * more difficult.
 988          */
 989         --num_rules;
 990         ++num_eof_rules;
 991         }
 992
 993
 994 /* format_synerr - write out formatted syntax error */
 995
 996 void format_synerr( msg, arg )
 997 const char *msg, arg[];
 998         {
 999         char errmsg[MAXLINE];
1000
1001         (void) snprintf( errmsg, sizeof(errmsg), msg, arg );
1002         synerr( errmsg );
1003         }
1004
1005
1006 /* synerr - report a syntax error */
1007
1008 void synerr( str )
1009 const char *str;
1010         {
1011         syntaxerror = true;
1012         pinpoint_message( str );
1013         }
1014
1015
1016 /* format_warn - write out formatted warning */
1017
1018 void format_warn( msg, arg )
1019 const char *msg, arg[];
1020         {
1021         char warn_msg[MAXLINE];
1022
1023         snprintf( warn_msg, sizeof(warn_msg), msg, arg );
1024         lwarn( warn_msg );
1025         }
1026
1027
1028 /* lwarn - report a warning, unless -w was given */
1029
1030 void lwarn( str )
1031 const char *str;
1032         {
1033         line_warning( str, linenum );
1034         }
1035
1036 /* format_pinpoint_message - write out a message formatted with one string,
1037  *                           pinpointing its location
1038  */
1039
1040 void format_pinpoint_message( msg, arg )
1041 const char *msg, arg[];
1042         {
1043         char errmsg[MAXLINE];
1044
1045         snprintf( errmsg, sizeof(errmsg), msg, arg );
1046         pinpoint_message( errmsg );
1047         }
1048
1049
1050 /* pinpoint_message - write out a message, pinpointing its location */
1051
1052 void pinpoint_message( str )
1053 const char *str;
1054         {
1055         line_pinpoint( str, linenum );
1056         }
1057
1058
1059 /* line_warning - report a warning at a given line, unless -w was given */
1060
1061 void line_warning( str, line )
1062 const char *str;
1063 int line;
1064         {
1065         char warning[MAXLINE];
1066
1067         if ( ! nowarn )
1068                 {
1069                 snprintf( warning, sizeof(warning), "warning, %s", str );
1070                 line_pinpoint( warning, line );
1071                 }
1072         }
1073
1074
1075 /* line_pinpoint - write out a message, pinpointing it at the given line */
1076
1077 void line_pinpoint( str, line )
1078 const char *str;
1079 int line;
1080         {
1081         fprintf( stderr, "%s:%d: %s\n", infilename, line, str );
1082         }
1083
1084
1085 /* yyerror - eat up an error message from the parser;
1086  *           currently, messages are ignore
1087  */
1088
1089 void yyerror( msg )
1090 const char *msg;
1091         {
1092         }