external/bsd/flex/dist/dfa.c

   1 /*      $NetBSD$        */
   2
   3 /* dfa - DFA construction routines */
   4
   5 /*  Copyright (c) 1990 The Regents of the University of California. */
   6 /*  All rights reserved. */
   7
   8 /*  This code is derived from software contributed to Berkeley by */
   9 /*  Vern Paxson. */
  10
  11 /*  The United States Government has rights in this work pursuant */
  12 /*  to contract no. DE-AC03-76SF00098 between the United States */
  13 /*  Department of Energy and the University of California. */
  14
  15 /*  Redistribution and use in source and binary forms, with or without */
  16 /*  modification, are permitted provided that the following conditions */
  17 /*  are met: */
  18
  19 /*  1. Redistributions of source code must retain the above copyright */
  20 /*     notice, this list of conditions and the following disclaimer. */
  21 /*  2. Redistributions in binary form must reproduce the above copyright */
  22 /*     notice, this list of conditions and the following disclaimer in the */
  23 /*     documentation and/or other materials provided with the distribution. */
  24
  25 /*  Neither the name of the University nor the names of its contributors */
  26 /*  may be used to endorse or promote products derived from this software */
  27 /*  without specific prior written permission. */
  28
  29 /*  THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR */
  30 /*  IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED */
  31 /*  WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR */
  32 /*  PURPOSE. */
  33
  34 #include "flexdef.h"
  35 #include "tables.h"
  36
  37 /* declare functions that have forward references */
  38
  39 void dump_associated_rules PROTO ((FILE *, int));
  40 void dump_transitions PROTO ((FILE *, int[]));
  41 void sympartition PROTO ((int[], int, int[], int[]));
  42 int symfollowset PROTO ((int[], int, int, int[]));
  43
  44
  45 /* check_for_backing_up - check a DFA state for backing up
  46  *
  47  * synopsis
  48  *     void check_for_backing_up( int ds, int state[numecs] );
  49  *
  50  * ds is the number of the state to check and state[] is its out-transitions,
  51  * indexed by equivalence class.
  52  */
  53
  54 void check_for_backing_up (ds, state)
  55      int ds;
  56      int state[];
  57 {
  58         if ((reject && !dfaacc[ds].dfaacc_set) || (!reject && !dfaacc[ds].dfaacc_state)) {      /* state is non-accepting */
  59                 ++num_backing_up;
  60
  61                 if (backing_up_report) {
  62                         fprintf (backing_up_file,
  63                                  _("State #%d is non-accepting -\n"), ds);
  64
  65                         /* identify the state */
  66                         dump_associated_rules (backing_up_file, ds);
  67
  68                         /* Now identify it further using the out- and
  69                          * jam-transitions.
  70                          */
  71                         dump_transitions (backing_up_file, state);
  72
  73                         putc ('\n', backing_up_file);
  74                 }
  75         }
  76 }
  77
  78
  79 /* check_trailing_context - check to see if NFA state set constitutes
  80  *                          "dangerous" trailing context
  81  *
  82  * synopsis
  83  *    void check_trailing_context( int nfa_states[num_states+1], int num_states,
  84  *                              int accset[nacc+1], int nacc );
  85  *
  86  * NOTES
  87  *  Trailing context is "dangerous" if both the head and the trailing
  88  *  part are of variable size \and/ there's a DFA state which contains
  89  *  both an accepting state for the head part of the rule and NFA states
  90  *  which occur after the beginning of the trailing context.
  91  *
  92  *  When such a rule is matched, it's impossible to tell if having been
  93  *  in the DFA state indicates the beginning of the trailing context or
  94  *  further-along scanning of the pattern.  In these cases, a warning
  95  *  message is issued.
  96  *
  97  *    nfa_states[1 .. num_states] is the list of NFA states in the DFA.
  98  *    accset[1 .. nacc] is the list of accepting numbers for the DFA state.
  99  */
 100
 101 void check_trailing_context (nfa_states, num_states, accset, nacc)
 102      int    *nfa_states, num_states;
 103      int    *accset;
 104      int nacc;
 105 {
 106         register int i, j;
 107
 108         for (i = 1; i <= num_states; ++i) {
 109                 int     ns = nfa_states[i];
 110                 register int type = state_type[ns];
 111                 register int ar = assoc_rule[ns];
 112
 113                 if (type == STATE_NORMAL || rule_type[ar] != RULE_VARIABLE) {   /* do nothing */
 114                 }
 115
 116                 else if (type == STATE_TRAILING_CONTEXT) {
 117                         /* Potential trouble.  Scan set of accepting numbers
 118                          * for the one marking the end of the "head".  We
 119                          * assume that this looping will be fairly cheap
 120                          * since it's rare that an accepting number set
 121                          * is large.
 122                          */
 123                         for (j = 1; j <= nacc; ++j)
 124                                 if (accset[j] & YY_TRAILING_HEAD_MASK) {
 125                                         line_warning (_
 126                                                       ("dangerous trailing context"),
 127                                                       rule_linenum[ar]);
 128                                         return;
 129                                 }
 130                 }
 131         }
 132 }
 133
 134
 135 /* dump_associated_rules - list the rules associated with a DFA state
 136  *
 137  * Goes through the set of NFA states associated with the DFA and
 138  * extracts the first MAX_ASSOC_RULES unique rules, sorts them,
 139  * and writes a report to the given file.
 140  */
 141
 142 void dump_associated_rules (file, ds)
 143      FILE   *file;
 144      int ds;
 145 {
 146         register int i, j;
 147         register int num_associated_rules = 0;
 148         int     rule_set[MAX_ASSOC_RULES + 1];
 149         int    *dset = dss[ds];
 150         int     size = dfasiz[ds];
 151
 152         for (i = 1; i <= size; ++i) {
 153                 register int rule_num = rule_linenum[assoc_rule[dset[i]]];
 154
 155                 for (j = 1; j <= num_associated_rules; ++j)
 156                         if (rule_num == rule_set[j])
 157                                 break;
 158
 159                 if (j > num_associated_rules) { /* new rule */
 160                         if (num_associated_rules < MAX_ASSOC_RULES)
 161                                 rule_set[++num_associated_rules] =
 162                                         rule_num;
 163                 }
 164         }
 165
 166         bubble (rule_set, num_associated_rules);
 167
 168         fprintf (file, _(" associated rule line numbers:"));
 169
 170         for (i = 1; i <= num_associated_rules; ++i) {
 171                 if (i % 8 == 1)
 172                         putc ('\n', file);
 173
 174                 fprintf (file, "\t%d", rule_set[i]);
 175         }
 176
 177         putc ('\n', file);
 178 }
 179
 180
 181 /* dump_transitions - list the transitions associated with a DFA state
 182  *
 183  * synopsis
 184  *     dump_transitions( FILE *file, int state[numecs] );
 185  *
 186  * Goes through the set of out-transitions and lists them in human-readable
 187  * form (i.e., not as equivalence classes); also lists jam transitions
 188  * (i.e., all those which are not out-transitions, plus EOF).  The dump
 189  * is done to the given file.
 190  */
 191
 192 void dump_transitions (file, state)
 193      FILE   *file;
 194      int state[];
 195 {
 196         register int i, ec;
 197         int     out_char_set[CSIZE];
 198
 199         for (i = 0; i < csize; ++i) {
 200                 ec = ABS (ecgroup[i]);
 201                 out_char_set[i] = state[ec];
 202         }
 203
 204         fprintf (file, _(" out-transitions: "));
 205
 206         list_character_set (file, out_char_set);
 207
 208         /* now invert the members of the set to get the jam transitions */
 209         for (i = 0; i < csize; ++i)
 210                 out_char_set[i] = !out_char_set[i];
 211
 212         fprintf (file, _("\n jam-transitions: EOF "));
 213
 214         list_character_set (file, out_char_set);
 215
 216         putc ('\n', file);
 217 }
 218
 219
 220 /* epsclosure - construct the epsilon closure of a set of ndfa states
 221  *
 222  * synopsis
 223  *    int *epsclosure( int t[num_states], int *numstates_addr,
 224  *                      int accset[num_rules+1], int *nacc_addr,
 225  *                      int *hashval_addr );
 226  *
 227  * NOTES
 228  *  The epsilon closure is the set of all states reachable by an arbitrary
 229  *  number of epsilon transitions, which themselves do not have epsilon
 230  *  transitions going out, unioned with the set of states which have non-null
 231  *  accepting numbers.  t is an array of size numstates of nfa state numbers.
 232  *  Upon return, t holds the epsilon closure and *numstates_addr is updated.
 233  *  accset holds a list of the accepting numbers, and the size of accset is
 234  *  given by *nacc_addr.  t may be subjected to reallocation if it is not
 235  *  large enough to hold the epsilon closure.
 236  *
 237  *  hashval is the hash value for the dfa corresponding to the state set.
 238  */
 239
 240 int    *epsclosure (t, ns_addr, accset, nacc_addr, hv_addr)
 241      int    *t, *ns_addr, accset[], *nacc_addr, *hv_addr;
 242 {
 243         register int stkpos, ns, tsp;
 244         int     numstates = *ns_addr, nacc, hashval, transsym, nfaccnum;
 245         int     stkend, nstate;
 246         static int did_stk_init = false, *stk;
 247
 248 #define MARK_STATE(state) \
 249 do{ trans1[state] = trans1[state] - MARKER_DIFFERENCE;} while(0)
 250
 251 #define IS_MARKED(state) (trans1[state] < 0)
 252
 253 #define UNMARK_STATE(state) \
 254 do{ trans1[state] = trans1[state] + MARKER_DIFFERENCE;} while(0)
 255
 256 #define CHECK_ACCEPT(state) \
 257 do{ \
 258 nfaccnum = accptnum[state]; \
 259 if ( nfaccnum != NIL ) \
 260 accset[++nacc] = nfaccnum; \
 261 }while(0)
 262
 263 #define DO_REALLOCATION() \
 264 do { \
 265 current_max_dfa_size += MAX_DFA_SIZE_INCREMENT; \
 266 ++num_reallocs; \
 267 t = reallocate_integer_array( t, current_max_dfa_size ); \
 268 stk = reallocate_integer_array( stk, current_max_dfa_size ); \
 269 }while(0) \
 270
 271 #define PUT_ON_STACK(state) \
 272 do { \
 273 if ( ++stkend >= current_max_dfa_size ) \
 274 DO_REALLOCATION(); \
 275 stk[stkend] = state; \
 276 MARK_STATE(state); \
 277 }while(0)
 278
 279 #define ADD_STATE(state) \
 280 do { \
 281 if ( ++numstates >= current_max_dfa_size ) \
 282 DO_REALLOCATION(); \
 283 t[numstates] = state; \
 284 hashval += state; \
 285 }while(0)
 286
 287 #define STACK_STATE(state) \
 288 do { \
 289 PUT_ON_STACK(state); \
 290 CHECK_ACCEPT(state); \
 291 if ( nfaccnum != NIL || transchar[state] != SYM_EPSILON ) \
 292 ADD_STATE(state); \
 293 }while(0)
 294
 295
 296         if (!did_stk_init) {
 297                 stk = allocate_integer_array (current_max_dfa_size);
 298                 did_stk_init = true;
 299         }
 300
 301         nacc = stkend = hashval = 0;
 302
 303         for (nstate = 1; nstate <= numstates; ++nstate) {
 304                 ns = t[nstate];
 305
 306                 /* The state could be marked if we've already pushed it onto
 307                  * the stack.
 308                  */
 309                 if (!IS_MARKED (ns)) {
 310                         PUT_ON_STACK (ns);
 311                         CHECK_ACCEPT (ns);
 312                         hashval += ns;
 313                 }
 314         }
 315
 316         for (stkpos = 1; stkpos <= stkend; ++stkpos) {
 317                 ns = stk[stkpos];
 318                 transsym = transchar[ns];
 319
 320                 if (transsym == SYM_EPSILON) {
 321                         tsp = trans1[ns] + MARKER_DIFFERENCE;
 322
 323                         if (tsp != NO_TRANSITION) {
 324                                 if (!IS_MARKED (tsp))
 325                                         STACK_STATE (tsp);
 326
 327                                 tsp = trans2[ns];
 328
 329                                 if (tsp != NO_TRANSITION
 330                                     && !IS_MARKED (tsp))
 331                                         STACK_STATE (tsp);
 332                         }
 333                 }
 334         }
 335
 336         /* Clear out "visit" markers. */
 337
 338         for (stkpos = 1; stkpos <= stkend; ++stkpos) {
 339                 if (IS_MARKED (stk[stkpos]))
 340                         UNMARK_STATE (stk[stkpos]);
 341                 else
 342                         flexfatal (_
 343                                    ("consistency check failed in epsclosure()"));
 344         }
 345
 346         *ns_addr = numstates;
 347         *hv_addr = hashval;
 348         *nacc_addr = nacc;
 349
 350         return t;
 351 }
 352
 353
 354 /* increase_max_dfas - increase the maximum number of DFAs */
 355
 356 void increase_max_dfas ()
 357 {
 358         current_max_dfas += MAX_DFAS_INCREMENT;
 359
 360         ++num_reallocs;
 361
 362         base = reallocate_integer_array (base, current_max_dfas);
 363         def = reallocate_integer_array (def, current_max_dfas);
 364         dfasiz = reallocate_integer_array (dfasiz, current_max_dfas);
 365         accsiz = reallocate_integer_array (accsiz, current_max_dfas);
 366         dhash = reallocate_integer_array (dhash, current_max_dfas);
 367         dss = reallocate_int_ptr_array (dss, current_max_dfas);
 368         dfaacc = reallocate_dfaacc_union (dfaacc, current_max_dfas);
 369
 370         if (nultrans)
 371                 nultrans =
 372                         reallocate_integer_array (nultrans,
 373                                                   current_max_dfas);
 374 }
 375
 376
 377 /* ntod - convert an ndfa to a dfa
 378  *
 379  * Creates the dfa corresponding to the ndfa we've constructed.  The
 380  * dfa starts out in state #1.
 381  */
 382
 383 void ntod ()
 384 {
 385         int    *accset, ds, nacc, newds;
 386         int     sym, hashval, numstates, dsize;
 387         int     num_full_table_rows=0;  /* used only for -f */
 388         int    *nset, *dset;
 389         int     targptr, totaltrans, i, comstate, comfreq, targ;
 390         int     symlist[CSIZE + 1];
 391         int     num_start_states;
 392         int     todo_head, todo_next;
 393
 394         struct yytbl_data *yynxt_tbl = 0;
 395         flex_int32_t *yynxt_data = 0, yynxt_curr = 0;
 396
 397         /* Note that the following are indexed by *equivalence classes*
 398          * and not by characters.  Since equivalence classes are indexed
 399          * beginning with 1, even if the scanner accepts NUL's, this
 400          * means that (since every character is potentially in its own
 401          * equivalence class) these arrays must have room for indices
 402          * from 1 to CSIZE, so their size must be CSIZE + 1.
 403          */
 404         int     duplist[CSIZE + 1], state[CSIZE + 1];
 405         int     targfreq[CSIZE + 1], targstate[CSIZE + 1];
 406
 407         /* accset needs to be large enough to hold all of the rules present
 408          * in the input, *plus* their YY_TRAILING_HEAD_MASK variants.
 409          */
 410         accset = allocate_integer_array ((num_rules + 1) * 2);
 411         nset = allocate_integer_array (current_max_dfa_size);
 412
 413         /* The "todo" queue is represented by the head, which is the DFA
 414          * state currently being processed, and the "next", which is the
 415          * next DFA state number available (not in use).  We depend on the
 416          * fact that snstods() returns DFA's \in increasing order/, and thus
 417          * need only know the bounds of the dfas to be processed.
 418          */
 419         todo_head = todo_next = 0;
 420
 421         for (i = 0; i <= csize; ++i) {
 422                 duplist[i] = NIL;
 423                 symlist[i] = false;
 424         }
 425
 426         for (i = 0; i <= num_rules; ++i)
 427                 accset[i] = NIL;
 428
 429         if (trace) {
 430                 dumpnfa (scset[1]);
 431                 fputs (_("\n\nDFA Dump:\n\n"), stderr);
 432         }
 433
 434         inittbl ();
 435
 436         /* Check to see whether we should build a separate table for
 437          * transitions on NUL characters.  We don't do this for full-speed
 438          * (-F) scanners, since for them we don't have a simple state
 439          * number lying around with which to index the table.  We also
 440          * don't bother doing it for scanners unless (1) NUL is in its own
 441          * equivalence class (indicated by a positive value of
 442          * ecgroup[NUL]), (2) NUL's equivalence class is the last
 443          * equivalence class, and (3) the number of equivalence classes is
 444          * the same as the number of characters.  This latter case comes
 445          * about when useecs is false or when it's true but every character
 446          * still manages to land in its own class (unlikely, but it's
 447          * cheap to check for).  If all these things are true then the
 448          * character code needed to represent NUL's equivalence class for
 449          * indexing the tables is going to take one more bit than the
 450          * number of characters, and therefore we won't be assured of
 451          * being able to fit it into a YY_CHAR variable.  This rules out
 452          * storing the transitions in a compressed table, since the code
 453          * for interpreting them uses a YY_CHAR variable (perhaps it
 454          * should just use an integer, though; this is worth pondering ...
 455          * ###).
 456          *
 457          * Finally, for full tables, we want the number of entries in the
 458          * table to be a power of two so the array references go fast (it
 459          * will just take a shift to compute the major index).  If
 460          * encoding NUL's transitions in the table will spoil this, we
 461          * give it its own table (note that this will be the case if we're
 462          * not using equivalence classes).
 463          */
 464
 465         /* Note that the test for ecgroup[0] == numecs below accomplishes
 466          * both (1) and (2) above
 467          */
 468         if (!fullspd && ecgroup[0] == numecs) {
 469                 /* NUL is alone in its equivalence class, which is the
 470                  * last one.
 471                  */
 472                 int     use_NUL_table = (numecs == csize);
 473
 474                 if (fulltbl && !use_NUL_table) {
 475                         /* We still may want to use the table if numecs
 476                          * is a power of 2.
 477                          */
 478                         int     power_of_two;
 479
 480                         for (power_of_two = 1; power_of_two <= csize;
 481                              power_of_two *= 2)
 482                                 if (numecs == power_of_two) {
 483                                         use_NUL_table = true;
 484                                         break;
 485                                 }
 486                 }
 487
 488                 if (use_NUL_table)
 489                         nultrans =
 490                                 allocate_integer_array (current_max_dfas);
 491
 492                 /* From now on, nultrans != nil indicates that we're
 493                  * saving null transitions for later, separate encoding.
 494                  */
 495         }
 496
 497
 498         if (fullspd) {
 499                 for (i = 0; i <= numecs; ++i)
 500                         state[i] = 0;
 501
 502                 place_state (state, 0, 0);
 503                 dfaacc[0].dfaacc_state = 0;
 504         }
 505
 506         else if (fulltbl) {
 507                 if (nultrans)
 508                         /* We won't be including NUL's transitions in the
 509                          * table, so build it for entries from 0 .. numecs - 1.
 510                          */
 511                         num_full_table_rows = numecs;
 512
 513                 else
 514                         /* Take into account the fact that we'll be including
 515                          * the NUL entries in the transition table.  Build it
 516                          * from 0 .. numecs.
 517                          */
 518                         num_full_table_rows = numecs + 1;
 519
 520                 /* Begin generating yy_nxt[][]
 521                  * This spans the entire LONG function.
 522                  * This table is tricky because we don't know how big it will be.
 523                  * So we'll have to realloc() on the way...
 524                  * we'll wait until we can calculate yynxt_tbl->td_hilen.
 525                  */
 526                 yynxt_tbl =
 527                         (struct yytbl_data *) calloc (1,
 528                                                       sizeof (struct
 529                                                               yytbl_data));
 530                 yytbl_data_init (yynxt_tbl, YYTD_ID_NXT);
 531                 yynxt_tbl->td_hilen = 1;
 532                 yynxt_tbl->td_lolen = num_full_table_rows;
 533                 yynxt_tbl->td_data = yynxt_data =
 534                         (flex_int32_t *) calloc (yynxt_tbl->td_lolen *
 535                                             yynxt_tbl->td_hilen,
 536                                             sizeof (flex_int32_t));
 537                 yynxt_curr = 0;
 538
 539                 buf_prints (&yydmap_buf,
 540                             "\t{YYTD_ID_NXT, (void**)&yy_nxt, sizeof(%s)},\n",
 541                             long_align ? "flex_int32_t" : "flex_int16_t");
 542
 543                 /* Unless -Ca, declare it "short" because it's a real
 544                  * long-shot that that won't be large enough.
 545                  */
 546                 if (gentables)
 547                         out_str_dec
 548                                 ("static yyconst %s yy_nxt[][%d] =\n    {\n",
 549                                  long_align ? "flex_int32_t" : "flex_int16_t",
 550                                  num_full_table_rows);
 551                 else {
 552                         out_dec ("#undef YY_NXT_LOLEN\n#define YY_NXT_LOLEN (%d)\n", num_full_table_rows);
 553                         out_str ("static yyconst %s *yy_nxt =0;\n",
 554                                  long_align ? "flex_int32_t" : "flex_int16_t");
 555                 }
 556
 557
 558                 if (gentables)
 559                         outn ("    {");
 560
 561                 /* Generate 0 entries for state #0. */
 562                 for (i = 0; i < num_full_table_rows; ++i) {
 563                         mk2data (0);
 564                         yynxt_data[yynxt_curr++] = 0;
 565                 }
 566
 567                 dataflush ();
 568                 if (gentables)
 569                         outn ("    },\n");
 570         }
 571
 572         /* Create the first states. */
 573
 574         num_start_states = lastsc * 2;
 575
 576         for (i = 1; i <= num_start_states; ++i) {
 577                 numstates = 1;
 578
 579                 /* For each start condition, make one state for the case when
 580                  * we're at the beginning of the line (the '^' operator) and
 581                  * one for the case when we're not.
 582                  */
 583                 if (i % 2 == 1)
 584                         nset[numstates] = scset[(i / 2) + 1];
 585                 else
 586                         nset[numstates] =
 587                                 mkbranch (scbol[i / 2], scset[i / 2]);
 588
 589                 nset = epsclosure (nset, &numstates, accset, &nacc,
 590                                    &hashval);
 591
 592                 if (snstods (nset, numstates, accset, nacc, hashval, &ds)) {
 593                         numas += nacc;
 594                         totnst += numstates;
 595                         ++todo_next;
 596
 597                         if (variable_trailing_context_rules && nacc > 0)
 598                                 check_trailing_context (nset, numstates,
 599                                                         accset, nacc);
 600                 }
 601         }
 602
 603         if (!fullspd) {
 604                 if (!snstods (nset, 0, accset, 0, 0, &end_of_buffer_state))
 605                         flexfatal (_
 606                                    ("could not create unique end-of-buffer state"));
 607
 608                 ++numas;
 609                 ++num_start_states;
 610                 ++todo_next;
 611         }
 612
 613
 614         while (todo_head < todo_next) {
 615                 targptr = 0;
 616                 totaltrans = 0;
 617
 618                 for (i = 1; i <= numecs; ++i)
 619                         state[i] = 0;
 620
 621                 ds = ++todo_head;
 622
 623                 dset = dss[ds];
 624                 dsize = dfasiz[ds];
 625
 626                 if (trace)
 627                         fprintf (stderr, _("state # %d:\n"), ds);
 628
 629                 sympartition (dset, dsize, symlist, duplist);
 630
 631                 for (sym = 1; sym <= numecs; ++sym) {
 632                         if (symlist[sym]) {
 633                                 symlist[sym] = 0;
 634
 635                                 if (duplist[sym] == NIL) {
 636                                         /* Symbol has unique out-transitions. */
 637                                         numstates =
 638                                                 symfollowset (dset, dsize,
 639                                                               sym, nset);
 640                                         nset = epsclosure (nset,
 641                                                            &numstates,
 642                                                            accset, &nacc,
 643                                                            &hashval);
 644
 645                                         if (snstods
 646                                             (nset, numstates, accset, nacc,
 647                                              hashval, &newds)) {
 648                                                 totnst = totnst +
 649                                                         numstates;
 650                                                 ++todo_next;
 651                                                 numas += nacc;
 652
 653                                                 if (variable_trailing_context_rules && nacc > 0)
 654                                                         check_trailing_context
 655                                                                 (nset,
 656                                                                  numstates,
 657                                                                  accset,
 658                                                                  nacc);
 659                                         }
 660
 661                                         state[sym] = newds;
 662
 663                                         if (trace)
 664                                                 fprintf (stderr,
 665                                                          "\t%d\t%d\n", sym,
 666                                                          newds);
 667
 668                                         targfreq[++targptr] = 1;
 669                                         targstate[targptr] = newds;
 670                                         ++numuniq;
 671                                 }
 672
 673                                 else {
 674                                         /* sym's equivalence class has the same
 675                                          * transitions as duplist(sym)'s
 676                                          * equivalence class.
 677                                          */
 678                                         targ = state[duplist[sym]];
 679                                         state[sym] = targ;
 680
 681                                         if (trace)
 682                                                 fprintf (stderr,
 683                                                          "\t%d\t%d\n", sym,
 684                                                          targ);
 685
 686                                         /* Update frequency count for
 687                                          * destination state.
 688                                          */
 689
 690                                         i = 0;
 691                                         while (targstate[++i] != targ) ;
 692
 693                                         ++targfreq[i];
 694                                         ++numdup;
 695                                 }
 696
 697                                 ++totaltrans;
 698                                 duplist[sym] = NIL;
 699                         }
 700                 }
 701
 702
 703                 numsnpairs += totaltrans;
 704
 705                 if (ds > num_start_states)
 706                         check_for_backing_up (ds, state);
 707
 708                 if (nultrans) {
 709                         nultrans[ds] = state[NUL_ec];
 710                         state[NUL_ec] = 0;      /* remove transition */
 711                 }
 712
 713                 if (fulltbl) {
 714
 715                         /* Each time we hit here, it's another td_hilen, so we realloc. */
 716                         yynxt_tbl->td_hilen++;
 717                         yynxt_tbl->td_data = yynxt_data =
 718                                 (flex_int32_t *) realloc (yynxt_data,
 719                                                      yynxt_tbl->td_hilen *
 720                                                      yynxt_tbl->td_lolen *
 721                                                      sizeof (flex_int32_t));
 722
 723
 724                         if (gentables)
 725                                 outn ("    {");
 726
 727                         /* Supply array's 0-element. */
 728                         if (ds == end_of_buffer_state) {
 729                                 mk2data (-end_of_buffer_state);
 730                                 yynxt_data[yynxt_curr++] =
 731                                         -end_of_buffer_state;
 732                         }
 733                         else {
 734                                 mk2data (end_of_buffer_state);
 735                                 yynxt_data[yynxt_curr++] =
 736                                         end_of_buffer_state;
 737                         }
 738
 739                         for (i = 1; i < num_full_table_rows; ++i) {
 740                                 /* Jams are marked by negative of state
 741                                  * number.
 742                                  */
 743                                 mk2data (state[i] ? state[i] : -ds);
 744                                 yynxt_data[yynxt_curr++] =
 745                                         state[i] ? state[i] : -ds;
 746                         }
 747
 748                         dataflush ();
 749                         if (gentables)
 750                                 outn ("    },\n");
 751                 }
 752
 753                 else if (fullspd)
 754                         place_state (state, ds, totaltrans);
 755
 756                 else if (ds == end_of_buffer_state)
 757                         /* Special case this state to make sure it does what
 758                          * it's supposed to, i.e., jam on end-of-buffer.
 759                          */
 760                         stack1 (ds, 0, 0, JAMSTATE);
 761
 762                 else {          /* normal, compressed state */
 763
 764                         /* Determine which destination state is the most
 765                          * common, and how many transitions to it there are.
 766                          */
 767
 768                         comfreq = 0;
 769                         comstate = 0;
 770
 771                         for (i = 1; i <= targptr; ++i)
 772                                 if (targfreq[i] > comfreq) {
 773                                         comfreq = targfreq[i];
 774                                         comstate = targstate[i];
 775                                 }
 776
 777                         bldtbl (state, ds, totaltrans, comstate, comfreq);
 778                 }
 779         }
 780
 781         if (fulltbl) {
 782                 dataend ();
 783                 if (tablesext) {
 784                         yytbl_data_compress (yynxt_tbl);
 785                         if (yytbl_data_fwrite (&tableswr, yynxt_tbl) < 0)
 786                                 flexerror (_
 787                                            ("Could not write yynxt_tbl[][]"));
 788                 }
 789                 if (yynxt_tbl) {
 790                         yytbl_data_destroy (yynxt_tbl);
 791                         yynxt_tbl = 0;
 792                 }
 793         }
 794
 795         else if (!fullspd) {
 796                 cmptmps ();     /* create compressed template entries */
 797
 798                 /* Create tables for all the states with only one
 799                  * out-transition.
 800                  */
 801                 while (onesp > 0) {
 802                         mk1tbl (onestate[onesp], onesym[onesp],
 803                                 onenext[onesp], onedef[onesp]);
 804                         --onesp;
 805                 }
 806
 807                 mkdeftbl ();
 808         }
 809
 810         flex_free ((void *) accset);
 811         flex_free ((void *) nset);
 812 }
 813
 814
 815 /* snstods - converts a set of ndfa states into a dfa state
 816  *
 817  * synopsis
 818  *    is_new_state = snstods( int sns[numstates], int numstates,
 819  *                              int accset[num_rules+1], int nacc,
 820  *                              int hashval, int *newds_addr );
 821  *
 822  * On return, the dfa state number is in newds.
 823  */
 824
 825 int snstods (sns, numstates, accset, nacc, hashval, newds_addr)
 826      int sns[], numstates, accset[], nacc, hashval, *newds_addr;
 827 {
 828         int     didsort = 0;
 829         register int i, j;
 830         int     newds, *oldsns;
 831
 832         for (i = 1; i <= lastdfa; ++i)
 833                 if (hashval == dhash[i]) {
 834                         if (numstates == dfasiz[i]) {
 835                                 oldsns = dss[i];
 836
 837                                 if (!didsort) {
 838                                         /* We sort the states in sns so we
 839                                          * can compare it to oldsns quickly.
 840                                          * We use bubble because there probably
 841                                          * aren't very many states.
 842                                          */
 843                                         bubble (sns, numstates);
 844                                         didsort = 1;
 845                                 }
 846
 847                                 for (j = 1; j <= numstates; ++j)
 848                                         if (sns[j] != oldsns[j])
 849                                                 break;
 850
 851                                 if (j > numstates) {
 852                                         ++dfaeql;
 853                                         *newds_addr = i;
 854                                         return 0;
 855                                 }
 856
 857                                 ++hshcol;
 858                         }
 859
 860                         else
 861                                 ++hshsave;
 862                 }
 863
 864         /* Make a new dfa. */
 865
 866         if (++lastdfa >= current_max_dfas)
 867                 increase_max_dfas ();
 868
 869         newds = lastdfa;
 870
 871         dss[newds] = allocate_integer_array (numstates + 1);
 872
 873         /* If we haven't already sorted the states in sns, we do so now,
 874          * so that future comparisons with it can be made quickly.
 875          */
 876
 877         if (!didsort)
 878                 bubble (sns, numstates);
 879
 880         for (i = 1; i <= numstates; ++i)
 881                 dss[newds][i] = sns[i];
 882
 883         dfasiz[newds] = numstates;
 884         dhash[newds] = hashval;
 885
 886         if (nacc == 0) {
 887                 if (reject)
 888                         dfaacc[newds].dfaacc_set = (int *) 0;
 889                 else
 890                         dfaacc[newds].dfaacc_state = 0;
 891
 892                 accsiz[newds] = 0;
 893         }
 894
 895         else if (reject) {
 896                 /* We sort the accepting set in increasing order so the
 897                  * disambiguating rule that the first rule listed is considered
 898                  * match in the event of ties will work.  We use a bubble
 899                  * sort since the list is probably quite small.
 900                  */
 901
 902                 bubble (accset, nacc);
 903
 904                 dfaacc[newds].dfaacc_set =
 905                         allocate_integer_array (nacc + 1);
 906
 907                 /* Save the accepting set for later */
 908                 for (i = 1; i <= nacc; ++i) {
 909                         dfaacc[newds].dfaacc_set[i] = accset[i];
 910
 911                         if (accset[i] <= num_rules)
 912                                 /* Who knows, perhaps a REJECT can yield
 913                                  * this rule.
 914                                  */
 915                                 rule_useful[accset[i]] = true;
 916                 }
 917
 918                 accsiz[newds] = nacc;
 919         }
 920
 921         else {
 922                 /* Find lowest numbered rule so the disambiguating rule
 923                  * will work.
 924                  */
 925                 j = num_rules + 1;
 926
 927                 for (i = 1; i <= nacc; ++i)
 928                         if (accset[i] < j)
 929                                 j = accset[i];
 930
 931                 dfaacc[newds].dfaacc_state = j;
 932
 933                 if (j <= num_rules)
 934                         rule_useful[j] = true;
 935         }
 936
 937         *newds_addr = newds;
 938
 939         return 1;
 940 }
 941
 942
 943 /* symfollowset - follow the symbol transitions one step
 944  *
 945  * synopsis
 946  *    numstates = symfollowset( int ds[current_max_dfa_size], int dsize,
 947  *                              int transsym, int nset[current_max_dfa_size] );
 948  */
 949
 950 int symfollowset (ds, dsize, transsym, nset)
 951      int ds[], dsize, transsym, nset[];
 952 {
 953         int     ns, tsp, sym, i, j, lenccl, ch, numstates, ccllist;
 954
 955         numstates = 0;
 956
 957         for (i = 1; i <= dsize; ++i) {  /* for each nfa state ns in the state set of ds */
 958                 ns = ds[i];
 959                 sym = transchar[ns];
 960                 tsp = trans1[ns];
 961
 962                 if (sym < 0) {  /* it's a character class */
 963                         sym = -sym;
 964                         ccllist = cclmap[sym];
 965                         lenccl = ccllen[sym];
 966
 967                         if (cclng[sym]) {
 968                                 for (j = 0; j < lenccl; ++j) {
 969                                         /* Loop through negated character
 970                                          * class.
 971                                          */
 972                                         ch = ccltbl[ccllist + j];
 973
 974                                         if (ch == 0)
 975                                                 ch = NUL_ec;
 976
 977                                         if (ch > transsym)
 978                                                 /* Transsym isn't in negated
 979                                                  * ccl.
 980                                                  */
 981                                                 break;
 982
 983                                         else if (ch == transsym)
 984                                                 /* next 2 */
 985                                                 goto bottom;
 986                                 }
 987
 988                                 /* Didn't find transsym in ccl. */
 989                                 nset[++numstates] = tsp;
 990                         }
 991
 992                         else
 993                                 for (j = 0; j < lenccl; ++j) {
 994                                         ch = ccltbl[ccllist + j];
 995
 996                                         if (ch == 0)
 997                                                 ch = NUL_ec;
 998
 999                                         if (ch > transsym)
1000                                                 break;
1001                                         else if (ch == transsym) {
1002                                                 nset[++numstates] = tsp;
1003                                                 break;
1004                                         }
1005                                 }
1006                 }
1007
1008                 else if (sym == SYM_EPSILON) {  /* do nothing */
1009                 }
1010
1011                 else if (ABS (ecgroup[sym]) == transsym)
1012                         nset[++numstates] = tsp;
1013
1014               bottom:;
1015         }
1016
1017         return numstates;
1018 }
1019
1020
1021 /* sympartition - partition characters with same out-transitions
1022  *
1023  * synopsis
1024  *    sympartition( int ds[current_max_dfa_size], int numstates,
1025  *                      int symlist[numecs], int duplist[numecs] );
1026  */
1027
1028 void sympartition (ds, numstates, symlist, duplist)
1029      int ds[], numstates;
1030      int symlist[], duplist[];
1031 {
1032         int     tch, i, j, k, ns, dupfwd[CSIZE + 1], lenccl, cclp, ich;
1033
1034         /* Partitioning is done by creating equivalence classes for those
1035          * characters which have out-transitions from the given state.  Thus
1036          * we are really creating equivalence classes of equivalence classes.
1037          */
1038
1039         for (i = 1; i <= numecs; ++i) { /* initialize equivalence class list */
1040                 duplist[i] = i - 1;
1041                 dupfwd[i] = i + 1;
1042         }
1043
1044         duplist[1] = NIL;
1045         dupfwd[numecs] = NIL;
1046
1047         for (i = 1; i <= numstates; ++i) {
1048                 ns = ds[i];
1049                 tch = transchar[ns];
1050
1051                 if (tch != SYM_EPSILON) {
1052                         if (tch < -lastccl || tch >= csize) {
1053                                 flexfatal (_
1054                                            ("bad transition character detected in sympartition()"));
1055                         }
1056
1057                         if (tch >= 0) { /* character transition */
1058                                 int     ec = ecgroup[tch];
1059
1060                                 mkechar (ec, dupfwd, duplist);
1061                                 symlist[ec] = 1;
1062                         }
1063
1064                         else {  /* character class */
1065                                 tch = -tch;
1066
1067                                 lenccl = ccllen[tch];
1068                                 cclp = cclmap[tch];
1069                                 mkeccl (ccltbl + cclp, lenccl, dupfwd,
1070                                         duplist, numecs, NUL_ec);
1071
1072                                 if (cclng[tch]) {
1073                                         j = 0;
1074
1075                                         for (k = 0; k < lenccl; ++k) {
1076                                                 ich = ccltbl[cclp + k];
1077
1078                                                 if (ich == 0)
1079                                                         ich = NUL_ec;
1080
1081                                                 for (++j; j < ich; ++j)
1082                                                         symlist[j] = 1;
1083                                         }
1084
1085                                         for (++j; j <= numecs; ++j)
1086                                                 symlist[j] = 1;
1087                                 }
1088
1089                                 else
1090                                         for (k = 0; k < lenccl; ++k) {
1091                                                 ich = ccltbl[cclp + k];
1092
1093                                                 if (ich == 0)
1094                                                         ich = NUL_ec;
1095
1096                                                 symlist[ich] = 1;
1097                                         }
1098                         }
1099                 }
1100         }
1101 }