2 * Copyright 2001-2006 Adrian Thurston <thurston@cs.queensu.ca>
5 /* This file is part of Ragel.
7 * Ragel is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation; either version 2 of the License, or
10 * (at your option) any later version.
12 * Ragel is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
17 * You should have received a copy of the GNU General Public License
18 * along with Ragel; if not, write to the Free Software
19 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
33 /* Types of builtin machines. */
71 struct LongestMatchPart
;
75 /* Type of augmentation. Describes locations in the machine. */
78 /* Transition actions/priorities. */
84 /* Global error actions. */
88 at_not_start_gbl_error
,
89 at_not_final_gbl_error
,
92 /* Local error actions. */
96 at_not_start_local_error
,
97 at_not_final_local_error
,
98 at_middle_local_error
,
100 /* To State Action embedding. */
104 at_not_start_to_state
,
105 at_not_final_to_state
,
108 /* From State Action embedding. */
112 at_not_start_from_state
,
113 at_not_final_from_state
,
114 at_middle_from_state
,
116 /* EOF Action embedding. */
125 /* IMPORTANT: These must follow the same order as the state augs in AugType
126 * since we will be using this to compose AugType. */
143 struct ExplicitMachine
;
147 /* Reference to a named state. */
148 typedef Vector
<char*> NameRef
;
149 typedef Vector
<NameRef
*> NameRefList
;
150 typedef Vector
<NameInst
*> NameTargList
;
152 /* Structure for storing location of epsilon transitons. */
155 EpsilonLink( const InputLoc
&loc
, NameRef
&target
)
156 : loc(loc
), target(target
) { }
164 Label( const InputLoc
&loc
, char *data
)
165 : loc(loc
), data(data
) { }
171 /* Structrue represents an action assigned to some FactorWithAug node. The
172 * factor with aug will keep an array of these. */
175 ParserAction( const InputLoc
&loc
, AugType type
, int localErrKey
, Action
*action
)
176 : loc(loc
), type(type
), localErrKey(localErrKey
), action(action
) { }
186 ConditionTest( const InputLoc
&loc
, AugType type
, Action
*action
, bool sense
) :
187 loc(loc
), type(type
), action(action
), sense(sense
) { }
201 void append( const Token
&other
);
202 void set( const char *str
, int len
);
205 char *prepareLitString( const InputLoc
&loc
, const char *src
, long length
,
206 long &resLen
, bool &caseInsensitive
);
208 /* Store the value and type of a priority augmentation. */
211 PriorityAug( AugType type
, int priorKey
, int priorValue
) :
212 type(type
), priorKey(priorKey
), priorValue(priorValue
) { }
220 * A Variable Definition
224 VarDef( const char *name
, JoinOrLm
*joinOrLm
)
225 : name(name
), joinOrLm(joinOrLm
), isExport(false) { }
227 /* Parse tree traversal. */
228 FsmAp
*walk( ParseData
*pd
);
229 void makeNameTree( const InputLoc
&loc
, ParseData
*pd
);
230 void resolveNameRefs( ParseData
*pd
);
241 * Wherever possible the item match will execute on the character. If not
242 * possible the item match will execute on a lookahead character and either
243 * hold the current char (if one away) or backup.
245 * How to handle the problem of backing up over a buffer break?
247 * Don't want to use pending out transitions for embedding item match because
248 * the role of item match action is different: it may sometimes match on the
249 * final transition, or may match on a lookahead character.
251 * Don't want to invent a new operator just for this. So just trail action
252 * after machine, this means we can only use literal actions.
254 * The item action may
256 * What states of the machine will be final. The item actions that wrap around
257 * on the last character will go straight to the start state.
259 * Some transitions will be lookahead transitions, they will hold the current
260 * character. Crossing them with regular transitions must be restricted
261 * because it does not make sense. The transition cannot simultaneously hold
262 * and consume the current character.
264 struct LongestMatchPart
266 LongestMatchPart( Join
*join
, Action
*action
,
267 InputLoc
&semiLoc
, int longestMatchId
)
269 join(join
), action(action
), semiLoc(semiLoc
),
270 longestMatchId(longestMatchId
), inLmSelect(false) { }
281 Action
*actLagBehind
;
284 LongestMatch
*longestMatch
;
286 LongestMatchPart
*prev
, *next
;
289 /* Declare a new type so that ptreetypes.h need not include dlist.h. */
290 struct LmPartList
: DList
<LongestMatchPart
> {};
294 /* Construct with a list of joins */
295 LongestMatch( const InputLoc
&loc
, LmPartList
*longestMatchList
) :
296 loc(loc
), longestMatchList(longestMatchList
), name(0),
297 lmSwitchHandlesError(false) { }
299 /* Tree traversal. */
300 FsmAp
*walk( ParseData
*pd
);
301 void makeNameTree( ParseData
*pd
);
302 void resolveNameRefs( ParseData
*pd
);
303 void transferScannerLeavingActions( FsmAp
*graph
);
304 void runLongestMatch( ParseData
*pd
, FsmAp
*graph
);
305 Action
*newAction( ParseData
*pd
, const InputLoc
&loc
, const char *name
,
306 InlineList
*inlineList
);
307 void makeActions( ParseData
*pd
);
308 void findName( ParseData
*pd
);
309 void restart( FsmAp
*graph
, TransAp
*trans
);
312 LmPartList
*longestMatchList
;
316 bool lmSwitchHandlesError
;
318 LongestMatch
*next
, *prev
;
322 /* List of Expressions. */
323 typedef DList
<Expression
> ExprList
;
332 JoinOrLm( Join
*join
) :
333 join(join
), longestMatch(0), type(JoinType
) {}
334 JoinOrLm( LongestMatch
*longestMatch
) :
335 join(0), longestMatch(longestMatch
), type(LongestMatchType
) {}
337 FsmAp
*walk( ParseData
*pd
);
338 void makeNameTree( ParseData
*pd
);
339 void resolveNameRefs( ParseData
*pd
);
342 LongestMatch
*longestMatch
;
351 /* Construct with the first expression. */
352 Join( Expression
*expr
);
353 Join( const InputLoc
&loc
, Expression
*expr
);
355 /* Tree traversal. */
356 FsmAp
*walk( ParseData
*pd
);
357 FsmAp
*walkJoin( ParseData
*pd
);
358 void makeNameTree( ParseData
*pd
);
359 void resolveNameRefs( ParseData
*pd
);
380 /* Construct with an expression on the left and a term on the right. */
381 Expression( Expression
*expression
, Term
*term
, Type type
) :
382 expression(expression
), term(term
),
383 builtin(builtin
), type(type
), prev(this), next(this) { }
385 /* Construct with only a term. */
386 Expression( Term
*term
) :
387 expression(0), term(term
), builtin(builtin
),
388 type(TermType
) , prev(this), next(this) { }
390 /* Construct with a builtin type. */
391 Expression( BuiltinMachine builtin
) :
392 expression(0), term(0), builtin(builtin
),
393 type(BuiltinType
), prev(this), next(this) { }
397 /* Tree traversal. */
398 FsmAp
*walk( ParseData
*pd
, bool lastInSeq
= true );
399 void makeNameTree( ParseData
*pd
);
400 void resolveNameRefs( ParseData
*pd
);
403 Expression
*expression
;
405 BuiltinMachine builtin
;
408 Expression
*prev
, *next
;
424 Term( Term
*term
, FactorWithAug
*factorWithAug
) :
425 term(term
), factorWithAug(factorWithAug
), type(ConcatType
) { }
427 Term( Term
*term
, FactorWithAug
*factorWithAug
, Type type
) :
428 term(term
), factorWithAug(factorWithAug
), type(type
) { }
430 Term( FactorWithAug
*factorWithAug
) :
431 term(0), factorWithAug(factorWithAug
), type(FactorWithAugType
) { }
435 FsmAp
*walk( ParseData
*pd
, bool lastInSeq
= true );
436 void makeNameTree( ParseData
*pd
);
437 void resolveNameRefs( ParseData
*pd
);
440 FactorWithAug
*factorWithAug
;
443 /* Priority descriptor for RightFinish type. */
444 PriorDesc priorDescs
[2];
448 /* Third level of precedence. Augmenting nodes with actions and priorities. */
451 FactorWithAug( FactorWithRep
*factorWithRep
) :
452 priorDescs(0), factorWithRep(factorWithRep
) { }
455 /* Tree traversal. */
456 FsmAp
*walk( ParseData
*pd
);
457 void makeNameTree( ParseData
*pd
);
458 void resolveNameRefs( ParseData
*pd
);
460 void assignActions( ParseData
*pd
, FsmAp
*graph
, int *actionOrd
);
461 void assignPriorities( FsmAp
*graph
, int *priorOrd
);
463 void assignConditions( FsmAp
*graph
);
465 /* Actions and priorities assigned to the factor node. */
466 Vector
<ParserAction
> actions
;
467 Vector
<PriorityAug
> priorityAugs
;
468 PriorDesc
*priorDescs
;
469 Vector
<Label
> labels
;
470 Vector
<EpsilonLink
> epsilonLinks
;
471 Vector
<ConditionTest
> conditions
;
473 FactorWithRep
*factorWithRep
;
476 /* Fourth level of precedence. Trailing unary operators. Provide kleen star,
477 * optional and plus. */
492 FactorWithRep( const InputLoc
&loc
, FactorWithRep
*factorWithRep
,
493 int lowerRep
, int upperRep
, Type type
) :
494 loc(loc
), factorWithRep(factorWithRep
),
495 factorWithNeg(0), lowerRep(lowerRep
),
496 upperRep(upperRep
), type(type
) { }
498 FactorWithRep( FactorWithNeg
*factorWithNeg
)
499 : factorWithNeg(factorWithNeg
), type(FactorWithNegType
) { }
503 /* Tree traversal. */
504 FsmAp
*walk( ParseData
*pd
);
505 void makeNameTree( ParseData
*pd
);
506 void resolveNameRefs( ParseData
*pd
);
509 FactorWithRep
*factorWithRep
;
510 FactorWithNeg
*factorWithNeg
;
511 int lowerRep
, upperRep
;
514 /* Priority descriptor for StarStar type. */
515 PriorDesc priorDescs
[2];
518 /* Fifth level of precedence. Provides Negation. */
527 FactorWithNeg( const InputLoc
&loc
, FactorWithNeg
*factorWithNeg
, Type type
) :
528 loc(loc
), factorWithNeg(factorWithNeg
), factor(0), type(type
) { }
530 FactorWithNeg( Factor
*factor
) :
531 factorWithNeg(0), factor(factor
), type(FactorType
) { }
535 /* Tree traversal. */
536 FsmAp
*walk( ParseData
*pd
);
537 void makeNameTree( ParseData
*pd
);
538 void resolveNameRefs( ParseData
*pd
);
541 FactorWithNeg
*factorWithNeg
;
551 /* Language elements a factor node can be. */
562 /* Construct with a literal fsm. */
563 Factor( Literal
*literal
) :
564 literal(literal
), type(LiteralType
) { }
566 /* Construct with a range. */
567 Factor( Range
*range
) :
568 range(range
), type(RangeType
) { }
570 /* Construct with the or part of a regular expression. */
571 Factor( ReItem
*reItem
) :
572 reItem(reItem
), type(OrExprType
) { }
574 /* Construct with a regular expression. */
575 Factor( RegExpr
*regExpr
) :
576 regExpr(regExpr
), type(RegExprType
) { }
578 /* Construct with a reference to a var def. */
579 Factor( const InputLoc
&loc
, VarDef
*varDef
) :
580 loc(loc
), varDef(varDef
), type(ReferenceType
) {}
582 /* Construct with a parenthesized join. */
583 Factor( Join
*join
) :
584 join(join
), type(ParenType
) {}
586 /* Construct with a longest match operator. */
587 Factor( LongestMatch
*longestMatch
) :
588 longestMatch(longestMatch
), type(LongestMatchType
) {}
593 /* Tree traversal. */
594 FsmAp
*walk( ParseData
*pd
);
595 void makeNameTree( ParseData
*pd
);
596 void resolveNameRefs( ParseData
*pd
);
605 LongestMatch
*longestMatch
;
610 /* A range machine. Only ever composed of two literals. */
613 Range( Literal
*lowerLit
, Literal
*upperLit
)
614 : lowerLit(lowerLit
), upperLit(upperLit
) { }
617 FsmAp
*walk( ParseData
*pd
);
623 /* Some literal machine. Can be a number or literal string. */
626 enum LiteralType
{ Number
, LitString
};
628 Literal( const Token
&token
, LiteralType type
)
629 : token(token
), type(type
) { }
631 FsmAp
*walk( ParseData
*pd
);
637 /* Regular expression. */
640 enum RegExpType
{ RecurseItem
, Empty
};
644 type(Empty
), caseInsensitive(false) { }
645 RegExpr(RegExpr
*regExpr
, ReItem
*item
) :
646 regExpr(regExpr
), item(item
),
647 type(RecurseItem
), caseInsensitive(false) { }
650 FsmAp
*walk( ParseData
*pd
, RegExpr
*rootRegex
);
655 bool caseInsensitive
;
658 /* An item in a regular expression. */
661 enum ReItemType
{ Data
, Dot
, OrBlock
, NegOrBlock
};
663 ReItem( const InputLoc
&loc
, const Token
&token
)
664 : loc(loc
), token(token
), star(false), type(Data
) { }
665 ReItem( const InputLoc
&loc
, ReItemType type
)
666 : loc(loc
), star(false), type(type
) { }
667 ReItem( const InputLoc
&loc
, ReOrBlock
*orBlock
, ReItemType type
)
668 : loc(loc
), orBlock(orBlock
), star(false), type(type
) { }
671 FsmAp
*walk( ParseData
*pd
, RegExpr
*rootRegex
);
680 /* An or block item. */
683 enum ReOrBlockType
{ RecurseItem
, Empty
};
688 ReOrBlock(ReOrBlock
*orBlock
, ReOrItem
*item
)
689 : orBlock(orBlock
), item(item
), type(RecurseItem
) { }
692 FsmAp
*walk( ParseData
*pd
, RegExpr
*rootRegex
);
699 /* An item in an or block. */
702 enum ReOrItemType
{ Data
, Range
};
704 ReOrItem( const InputLoc
&loc
, const Token
&token
)
705 : loc(loc
), token(token
), type(Data
) {}
706 ReOrItem( const InputLoc
&loc
, char lower
, char upper
)
707 : loc(loc
), lower(lower
), upper(upper
), type(Range
) { }
709 FsmAp
*walk( ParseData
*pd
, RegExpr
*rootRegex
);
727 Text
, Goto
, Call
, Next
, GotoExpr
, CallExpr
, NextExpr
, Ret
, PChar
,
728 Char
, Hold
, Curs
, Targs
, Entry
, Exec
, LmSwitch
, LmSetActId
,
729 LmSetTokEnd
, LmOnLast
, LmOnNext
, LmOnLagBehind
, LmInitAct
,
730 LmInitTokStart
, LmSetTokStart
, Break
733 InlineItem( const InputLoc
&loc
, char *data
, Type type
) :
734 loc(loc
), data(data
), nameRef(0), children(0), type(type
) { }
736 InlineItem( const InputLoc
&loc
, NameRef
*nameRef
, Type type
) :
737 loc(loc
), data(0), nameRef(nameRef
), children(0), type(type
) { }
739 InlineItem( const InputLoc
&loc
, LongestMatch
*longestMatch
,
740 LongestMatchPart
*longestMatchPart
, Type type
) : loc(loc
), data(0),
741 nameRef(0), children(0), longestMatch(longestMatch
),
742 longestMatchPart(longestMatchPart
), type(type
) { }
744 InlineItem( const InputLoc
&loc
, NameInst
*nameTarg
, Type type
) :
745 loc(loc
), data(0), nameRef(0), nameTarg(nameTarg
), children(0),
748 InlineItem( const InputLoc
&loc
, Type type
) :
749 loc(loc
), data(0), nameRef(0), children(0), type(type
) { }
755 InlineList
*children
;
756 LongestMatch
*longestMatch
;
757 LongestMatchPart
*longestMatchPart
;
760 InlineItem
*prev
, *next
;
763 /* Normally this would be atypedef, but that would entail including DList from
764 * ptreetypes, which should be just typedef forwards. */
765 struct InlineList
: public DList
<InlineItem
> { };
769 #endif /* _PARSETREE_H */