tools/llvm: Do not build with symbols
[minix3.git] / external / bsd / flex / dist / nfa.c
blob7d616351d657be3434d927902f56cbe7c41cd471
1 /* $NetBSD: nfa.c,v 1.1.1.1 2009/10/26 00:26:28 christos Exp $ */
3 /* nfa - NFA construction routines */
5 /* Copyright (c) 1990 The Regents of the University of California. */
6 /* All rights reserved. */
8 /* This code is derived from software contributed to Berkeley by */
9 /* Vern Paxson. */
11 /* The United States Government has rights in this work pursuant */
12 /* to contract no. DE-AC03-76SF00098 between the United States */
13 /* Department of Energy and the University of California. */
15 /* This file is part of flex. */
17 /* Redistribution and use in source and binary forms, with or without */
18 /* modification, are permitted provided that the following conditions */
19 /* are met: */
21 /* 1. Redistributions of source code must retain the above copyright */
22 /* notice, this list of conditions and the following disclaimer. */
23 /* 2. Redistributions in binary form must reproduce the above copyright */
24 /* notice, this list of conditions and the following disclaimer in the */
25 /* documentation and/or other materials provided with the distribution. */
27 /* Neither the name of the University nor the names of its contributors */
28 /* may be used to endorse or promote products derived from this software */
29 /* without specific prior written permission. */
31 /* THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR */
32 /* IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED */
33 /* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR */
34 /* PURPOSE. */
36 #include "flexdef.h"
39 /* declare functions that have forward references */
41 int dupmachine PROTO ((int));
42 void mkxtion PROTO ((int, int));
45 /* add_accept - add an accepting state to a machine
47 * accepting_number becomes mach's accepting number.
50 void add_accept (mach, accepting_number)
51 int mach, accepting_number;
53 /* Hang the accepting number off an epsilon state. if it is associated
54 * with a state that has a non-epsilon out-transition, then the state
55 * will accept BEFORE it makes that transition, i.e., one character
56 * too soon.
59 if (transchar[finalst[mach]] == SYM_EPSILON)
60 accptnum[finalst[mach]] = accepting_number;
62 else {
63 int astate = mkstate (SYM_EPSILON);
65 accptnum[astate] = accepting_number;
66 (void) link_machines (mach, astate);
71 /* copysingl - make a given number of copies of a singleton machine
73 * synopsis
75 * newsng = copysingl( singl, num );
77 * newsng - a new singleton composed of num copies of singl
78 * singl - a singleton machine
79 * num - the number of copies of singl to be present in newsng
82 int copysingl (singl, num)
83 int singl, num;
85 int copy, i;
87 copy = mkstate (SYM_EPSILON);
89 for (i = 1; i <= num; ++i)
90 copy = link_machines (copy, dupmachine (singl));
92 return copy;
96 /* dumpnfa - debugging routine to write out an nfa */
98 void dumpnfa (state1)
99 int state1;
102 int sym, tsp1, tsp2, anum, ns;
104 fprintf (stderr,
106 ("\n\n********** beginning dump of nfa with start state %d\n"),
107 state1);
109 /* We probably should loop starting at firstst[state1] and going to
110 * lastst[state1], but they're not maintained properly when we "or"
111 * all of the rules together. So we use our knowledge that the machine
112 * starts at state 1 and ends at lastnfa.
115 /* for ( ns = firstst[state1]; ns <= lastst[state1]; ++ns ) */
116 for (ns = 1; ns <= lastnfa; ++ns) {
117 fprintf (stderr, _("state # %4d\t"), ns);
119 sym = transchar[ns];
120 tsp1 = trans1[ns];
121 tsp2 = trans2[ns];
122 anum = accptnum[ns];
124 fprintf (stderr, "%3d: %4d, %4d", sym, tsp1, tsp2);
126 if (anum != NIL)
127 fprintf (stderr, " [%d]", anum);
129 fprintf (stderr, "\n");
132 fprintf (stderr, _("********** end of dump\n"));
136 /* dupmachine - make a duplicate of a given machine
138 * synopsis
140 * copy = dupmachine( mach );
142 * copy - holds duplicate of mach
143 * mach - machine to be duplicated
145 * note that the copy of mach is NOT an exact duplicate; rather, all the
146 * transition states values are adjusted so that the copy is self-contained,
147 * as the original should have been.
149 * also note that the original MUST be contiguous, with its low and high
150 * states accessible by the arrays firstst and lastst
153 int dupmachine (mach)
154 int mach;
156 int i, init, state_offset;
157 int state = 0;
158 int last = lastst[mach];
160 for (i = firstst[mach]; i <= last; ++i) {
161 state = mkstate (transchar[i]);
163 if (trans1[i] != NO_TRANSITION) {
164 mkxtion (finalst[state], trans1[i] + state - i);
166 if (transchar[i] == SYM_EPSILON &&
167 trans2[i] != NO_TRANSITION)
168 mkxtion (finalst[state],
169 trans2[i] + state - i);
172 accptnum[state] = accptnum[i];
175 if (state == 0)
176 flexfatal (_("empty machine in dupmachine()"));
178 state_offset = state - i + 1;
180 init = mach + state_offset;
181 firstst[init] = firstst[mach] + state_offset;
182 finalst[init] = finalst[mach] + state_offset;
183 lastst[init] = lastst[mach] + state_offset;
185 return init;
189 /* finish_rule - finish up the processing for a rule
191 * An accepting number is added to the given machine. If variable_trail_rule
192 * is true then the rule has trailing context and both the head and trail
193 * are variable size. Otherwise if headcnt or trailcnt is non-zero then
194 * the machine recognizes a pattern with trailing context and headcnt is
195 * the number of characters in the matched part of the pattern, or zero
196 * if the matched part has variable length. trailcnt is the number of
197 * trailing context characters in the pattern, or zero if the trailing
198 * context has variable length.
201 void finish_rule (mach, variable_trail_rule, headcnt, trailcnt,
202 pcont_act)
203 int mach, variable_trail_rule, headcnt, trailcnt, pcont_act;
205 char action_text[MAXLINE];
207 add_accept (mach, num_rules);
209 /* We did this in new_rule(), but it often gets the wrong
210 * number because we do it before we start parsing the current rule.
212 rule_linenum[num_rules] = linenum;
214 /* If this is a continued action, then the line-number has already
215 * been updated, giving us the wrong number.
217 if (continued_action)
218 --rule_linenum[num_rules];
221 /* If the previous rule was continued action, then we inherit the
222 * previous newline flag, possibly overriding the current one.
224 if (pcont_act && rule_has_nl[num_rules - 1])
225 rule_has_nl[num_rules] = true;
227 snprintf (action_text, sizeof(action_text), "case %d:\n", num_rules);
228 add_action (action_text);
229 if (rule_has_nl[num_rules]) {
230 snprintf (action_text, sizeof(action_text), "/* rule %d can match eol */\n",
231 num_rules);
232 add_action (action_text);
236 if (variable_trail_rule) {
237 rule_type[num_rules] = RULE_VARIABLE;
239 if (performance_report > 0)
240 fprintf (stderr,
242 ("Variable trailing context rule at line %d\n"),
243 rule_linenum[num_rules]);
245 variable_trailing_context_rules = true;
248 else {
249 rule_type[num_rules] = RULE_NORMAL;
251 if (headcnt > 0 || trailcnt > 0) {
252 /* Do trailing context magic to not match the trailing
253 * characters.
255 char *scanner_cp = "YY_G(yy_c_buf_p) = yy_cp";
256 char *scanner_bp = "yy_bp";
258 add_action
259 ("*yy_cp = YY_G(yy_hold_char); /* undo effects of setting up yytext */\n");
261 if (headcnt > 0) {
262 snprintf (action_text, sizeof(action_text), "%s = %s + %d;\n",
263 scanner_cp, scanner_bp, headcnt);
264 add_action (action_text);
267 else {
268 snprintf (action_text, sizeof(action_text), "%s -= %d;\n",
269 scanner_cp, trailcnt);
270 add_action (action_text);
273 add_action
274 ("YY_DO_BEFORE_ACTION; /* set up yytext again */\n");
278 /* Okay, in the action code at this point yytext and yyleng have
279 * their proper final values for this rule, so here's the point
280 * to do any user action. But don't do it for continued actions,
281 * as that'll result in multiple YY_RULE_SETUP's.
283 if (!continued_action)
284 add_action ("YY_RULE_SETUP\n");
286 line_directive_out ((FILE *) 0, 1);
290 /* link_machines - connect two machines together
292 * synopsis
294 * new = link_machines( first, last );
296 * new - a machine constructed by connecting first to last
297 * first - the machine whose successor is to be last
298 * last - the machine whose predecessor is to be first
300 * note: this routine concatenates the machine first with the machine
301 * last to produce a machine new which will pattern-match first first
302 * and then last, and will fail if either of the sub-patterns fails.
303 * FIRST is set to new by the operation. last is unmolested.
306 int link_machines (first, last)
307 int first, last;
309 if (first == NIL)
310 return last;
312 else if (last == NIL)
313 return first;
315 else {
316 mkxtion (finalst[first], last);
317 finalst[first] = finalst[last];
318 lastst[first] = MAX (lastst[first], lastst[last]);
319 firstst[first] = MIN (firstst[first], firstst[last]);
321 return first;
326 /* mark_beginning_as_normal - mark each "beginning" state in a machine
327 * as being a "normal" (i.e., not trailing context-
328 * associated) states
330 * The "beginning" states are the epsilon closure of the first state
333 void mark_beginning_as_normal (mach)
334 register int mach;
336 switch (state_type[mach]) {
337 case STATE_NORMAL:
338 /* Oh, we've already visited here. */
339 return;
341 case STATE_TRAILING_CONTEXT:
342 state_type[mach] = STATE_NORMAL;
344 if (transchar[mach] == SYM_EPSILON) {
345 if (trans1[mach] != NO_TRANSITION)
346 mark_beginning_as_normal (trans1[mach]);
348 if (trans2[mach] != NO_TRANSITION)
349 mark_beginning_as_normal (trans2[mach]);
351 break;
353 default:
354 flexerror (_
355 ("bad state type in mark_beginning_as_normal()"));
356 break;
361 /* mkbranch - make a machine that branches to two machines
363 * synopsis
365 * branch = mkbranch( first, second );
367 * branch - a machine which matches either first's pattern or second's
368 * first, second - machines whose patterns are to be or'ed (the | operator)
370 * Note that first and second are NEITHER destroyed by the operation. Also,
371 * the resulting machine CANNOT be used with any other "mk" operation except
372 * more mkbranch's. Compare with mkor()
375 int mkbranch (first, second)
376 int first, second;
378 int eps;
380 if (first == NO_TRANSITION)
381 return second;
383 else if (second == NO_TRANSITION)
384 return first;
386 eps = mkstate (SYM_EPSILON);
388 mkxtion (eps, first);
389 mkxtion (eps, second);
391 return eps;
395 /* mkclos - convert a machine into a closure
397 * synopsis
398 * new = mkclos( state );
400 * new - a new state which matches the closure of "state"
403 int mkclos (state)
404 int state;
406 return mkopt (mkposcl (state));
410 /* mkopt - make a machine optional
412 * synopsis
414 * new = mkopt( mach );
416 * new - a machine which optionally matches whatever mach matched
417 * mach - the machine to make optional
419 * notes:
420 * 1. mach must be the last machine created
421 * 2. mach is destroyed by the call
424 int mkopt (mach)
425 int mach;
427 int eps;
429 if (!SUPER_FREE_EPSILON (finalst[mach])) {
430 eps = mkstate (SYM_EPSILON);
431 mach = link_machines (mach, eps);
434 /* Can't skimp on the following if FREE_EPSILON(mach) is true because
435 * some state interior to "mach" might point back to the beginning
436 * for a closure.
438 eps = mkstate (SYM_EPSILON);
439 mach = link_machines (eps, mach);
441 mkxtion (mach, finalst[mach]);
443 return mach;
447 /* mkor - make a machine that matches either one of two machines
449 * synopsis
451 * new = mkor( first, second );
453 * new - a machine which matches either first's pattern or second's
454 * first, second - machines whose patterns are to be or'ed (the | operator)
456 * note that first and second are both destroyed by the operation
457 * the code is rather convoluted because an attempt is made to minimize
458 * the number of epsilon states needed
461 int mkor (first, second)
462 int first, second;
464 int eps, orend;
466 if (first == NIL)
467 return second;
469 else if (second == NIL)
470 return first;
472 else {
473 /* See comment in mkopt() about why we can't use the first
474 * state of "first" or "second" if they satisfy "FREE_EPSILON".
476 eps = mkstate (SYM_EPSILON);
478 first = link_machines (eps, first);
480 mkxtion (first, second);
482 if (SUPER_FREE_EPSILON (finalst[first]) &&
483 accptnum[finalst[first]] == NIL) {
484 orend = finalst[first];
485 mkxtion (finalst[second], orend);
488 else if (SUPER_FREE_EPSILON (finalst[second]) &&
489 accptnum[finalst[second]] == NIL) {
490 orend = finalst[second];
491 mkxtion (finalst[first], orend);
494 else {
495 eps = mkstate (SYM_EPSILON);
497 first = link_machines (first, eps);
498 orend = finalst[first];
500 mkxtion (finalst[second], orend);
504 finalst[first] = orend;
505 return first;
509 /* mkposcl - convert a machine into a positive closure
511 * synopsis
512 * new = mkposcl( state );
514 * new - a machine matching the positive closure of "state"
517 int mkposcl (state)
518 int state;
520 int eps;
522 if (SUPER_FREE_EPSILON (finalst[state])) {
523 mkxtion (finalst[state], state);
524 return state;
527 else {
528 eps = mkstate (SYM_EPSILON);
529 mkxtion (eps, state);
530 return link_machines (state, eps);
535 /* mkrep - make a replicated machine
537 * synopsis
538 * new = mkrep( mach, lb, ub );
540 * new - a machine that matches whatever "mach" matched from "lb"
541 * number of times to "ub" number of times
543 * note
544 * if "ub" is INFINITE_REPEAT then "new" matches "lb" or more occurrences of "mach"
547 int mkrep (mach, lb, ub)
548 int mach, lb, ub;
550 int base_mach, tail, copy, i;
552 base_mach = copysingl (mach, lb - 1);
554 if (ub == INFINITE_REPEAT) {
555 copy = dupmachine (mach);
556 mach = link_machines (mach,
557 link_machines (base_mach,
558 mkclos (copy)));
561 else {
562 tail = mkstate (SYM_EPSILON);
564 for (i = lb; i < ub; ++i) {
565 copy = dupmachine (mach);
566 tail = mkopt (link_machines (copy, tail));
569 mach =
570 link_machines (mach,
571 link_machines (base_mach, tail));
574 return mach;
578 /* mkstate - create a state with a transition on a given symbol
580 * synopsis
582 * state = mkstate( sym );
584 * state - a new state matching sym
585 * sym - the symbol the new state is to have an out-transition on
587 * note that this routine makes new states in ascending order through the
588 * state array (and increments LASTNFA accordingly). The routine DUPMACHINE
589 * relies on machines being made in ascending order and that they are
590 * CONTIGUOUS. Change it and you will have to rewrite DUPMACHINE (kludge
591 * that it admittedly is)
594 int mkstate (sym)
595 int sym;
597 if (++lastnfa >= current_mns) {
598 if ((current_mns += MNS_INCREMENT) >= maximum_mns)
599 lerrif (_
600 ("input rules are too complicated (>= %d NFA states)"),
601 current_mns);
603 ++num_reallocs;
605 firstst = reallocate_integer_array (firstst, current_mns);
606 lastst = reallocate_integer_array (lastst, current_mns);
607 finalst = reallocate_integer_array (finalst, current_mns);
608 transchar =
609 reallocate_integer_array (transchar, current_mns);
610 trans1 = reallocate_integer_array (trans1, current_mns);
611 trans2 = reallocate_integer_array (trans2, current_mns);
612 accptnum =
613 reallocate_integer_array (accptnum, current_mns);
614 assoc_rule =
615 reallocate_integer_array (assoc_rule, current_mns);
616 state_type =
617 reallocate_integer_array (state_type, current_mns);
620 firstst[lastnfa] = lastnfa;
621 finalst[lastnfa] = lastnfa;
622 lastst[lastnfa] = lastnfa;
623 transchar[lastnfa] = sym;
624 trans1[lastnfa] = NO_TRANSITION;
625 trans2[lastnfa] = NO_TRANSITION;
626 accptnum[lastnfa] = NIL;
627 assoc_rule[lastnfa] = num_rules;
628 state_type[lastnfa] = current_state_type;
630 /* Fix up equivalence classes base on this transition. Note that any
631 * character which has its own transition gets its own equivalence
632 * class. Thus only characters which are only in character classes
633 * have a chance at being in the same equivalence class. E.g. "a|b"
634 * puts 'a' and 'b' into two different equivalence classes. "[ab]"
635 * puts them in the same equivalence class (barring other differences
636 * elsewhere in the input).
639 if (sym < 0) {
640 /* We don't have to update the equivalence classes since
641 * that was already done when the ccl was created for the
642 * first time.
646 else if (sym == SYM_EPSILON)
647 ++numeps;
649 else {
650 check_char (sym);
652 if (useecs)
653 /* Map NUL's to csize. */
654 mkechar (sym ? sym : csize, nextecm, ecgroup);
657 return lastnfa;
661 /* mkxtion - make a transition from one state to another
663 * synopsis
665 * mkxtion( statefrom, stateto );
667 * statefrom - the state from which the transition is to be made
668 * stateto - the state to which the transition is to be made
671 void mkxtion (statefrom, stateto)
672 int statefrom, stateto;
674 if (trans1[statefrom] == NO_TRANSITION)
675 trans1[statefrom] = stateto;
677 else if ((transchar[statefrom] != SYM_EPSILON) ||
678 (trans2[statefrom] != NO_TRANSITION))
679 flexfatal (_("found too many transitions in mkxtion()"));
681 else { /* second out-transition for an epsilon state */
682 ++eps2;
683 trans2[statefrom] = stateto;
687 /* new_rule - initialize for a new rule */
689 void new_rule ()
691 if (++num_rules >= current_max_rules) {
692 ++num_reallocs;
693 current_max_rules += MAX_RULES_INCREMENT;
694 rule_type = reallocate_integer_array (rule_type,
695 current_max_rules);
696 rule_linenum = reallocate_integer_array (rule_linenum,
697 current_max_rules);
698 rule_useful = reallocate_integer_array (rule_useful,
699 current_max_rules);
700 rule_has_nl = reallocate_bool_array (rule_has_nl,
701 current_max_rules);
704 if (num_rules > MAX_RULE)
705 lerrif (_("too many rules (> %d)!"), MAX_RULE);
707 rule_linenum[num_rules] = linenum;
708 rule_useful[num_rules] = false;
709 rule_has_nl[num_rules] = false;