2 * Contains the base functions that all recognizers require.
3 * Any function can be overridden by a lexer/parser/tree parser or by the
6 * \addtogroup pANTLR3_BASE_RECOGNIZER
9 #include <antlr3baserecognizer.h>
11 // [The "BSD licence"]
12 // Copyright (c) 2005-2009 Jim Idle, Temporal Wave LLC
13 // http://www.temporal-wave.com
14 // http://www.linkedin.com/in/jimidle
16 // All rights reserved.
18 // Redistribution and use in source and binary forms, with or without
19 // modification, are permitted provided that the following conditions
21 // 1. Redistributions of source code must retain the above copyright
22 // notice, this list of conditions and the following disclaimer.
23 // 2. Redistributions in binary form must reproduce the above copyright
24 // notice, this list of conditions and the following disclaimer in the
25 // documentation and/or other materials provided with the distribution.
26 // 3. The name of the author may not be used to endorse or promote products
27 // derived from this software without specific prior written permission.
29 // THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
30 // IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
31 // OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
32 // IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
33 // INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
34 // NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
35 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
36 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
37 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
38 // THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
41 #pragma warning( disable : 4100 )
44 /* Interface functions -standard implementations cover parser and treeparser
45 * almost completely but are overridden by the parser or tree parser as needed. Lexer overrides
46 * most of these functions.
48 static void beginResync (pANTLR3_BASE_RECOGNIZER recognizer);
49 static pANTLR3_BITSET computeErrorRecoverySet (pANTLR3_BASE_RECOGNIZER recognizer);
50 static void endResync (pANTLR3_BASE_RECOGNIZER recognizer);
51 static void beginBacktrack (pANTLR3_BASE_RECOGNIZER recognizer, ANTLR3_UINT32 level);
52 static void endBacktrack (pANTLR3_BASE_RECOGNIZER recognizer, ANTLR3_UINT32 level, ANTLR3_BOOLEAN successful);
54 static void * match (pANTLR3_BASE_RECOGNIZER recognizer, ANTLR3_UINT32 ttype, pANTLR3_BITSET_LIST follow);
55 static void matchAny (pANTLR3_BASE_RECOGNIZER recognizer);
56 static void mismatch (pANTLR3_BASE_RECOGNIZER recognizer, ANTLR3_UINT32 ttype, pANTLR3_BITSET_LIST follow);
57 static ANTLR3_BOOLEAN mismatchIsUnwantedToken (pANTLR3_BASE_RECOGNIZER recognizer, pANTLR3_INT_STREAM is, ANTLR3_UINT32 ttype);
58 static ANTLR3_BOOLEAN mismatchIsMissingToken (pANTLR3_BASE_RECOGNIZER recognizer, pANTLR3_INT_STREAM is, pANTLR3_BITSET_LIST follow);
59 static void reportError (pANTLR3_BASE_RECOGNIZER recognizer);
60 static pANTLR3_BITSET computeCSRuleFollow (pANTLR3_BASE_RECOGNIZER recognizer);
61 static pANTLR3_BITSET combineFollows (pANTLR3_BASE_RECOGNIZER recognizer, ANTLR3_BOOLEAN exact);
62 static void displayRecognitionError (pANTLR3_BASE_RECOGNIZER recognizer, pANTLR3_UINT8 * tokenNames);
63 static void recover (pANTLR3_BASE_RECOGNIZER recognizer);
64 static void * recoverFromMismatchedToken (pANTLR3_BASE_RECOGNIZER recognizer, ANTLR3_UINT32 ttype, pANTLR3_BITSET_LIST follow);
65 static void * recoverFromMismatchedSet (pANTLR3_BASE_RECOGNIZER recognizer, pANTLR3_BITSET_LIST follow);
66 static ANTLR3_BOOLEAN recoverFromMismatchedElement(pANTLR3_BASE_RECOGNIZER recognizer, pANTLR3_BITSET_LIST follow);
67 static void consumeUntil (pANTLR3_BASE_RECOGNIZER recognizer, ANTLR3_UINT32 tokenType);
68 static void consumeUntilSet (pANTLR3_BASE_RECOGNIZER recognizer, pANTLR3_BITSET set);
69 static pANTLR3_STACK getRuleInvocationStack (pANTLR3_BASE_RECOGNIZER recognizer);
70 static pANTLR3_STACK getRuleInvocationStackNamed (pANTLR3_BASE_RECOGNIZER recognizer, pANTLR3_UINT8 name);
71 static pANTLR3_HASH_TABLE toStrings (pANTLR3_BASE_RECOGNIZER recognizer, pANTLR3_HASH_TABLE);
72 static ANTLR3_MARKER getRuleMemoization (pANTLR3_BASE_RECOGNIZER recognizer, ANTLR3_INTKEY ruleIndex, ANTLR3_MARKER ruleParseStart);
73 static ANTLR3_BOOLEAN alreadyParsedRule (pANTLR3_BASE_RECOGNIZER recognizer, ANTLR3_MARKER ruleIndex);
74 static void memoize (pANTLR3_BASE_RECOGNIZER recognizer, ANTLR3_MARKER ruleIndex, ANTLR3_MARKER ruleParseStart);
75 static ANTLR3_BOOLEAN synpred (pANTLR3_BASE_RECOGNIZER recognizer, void * ctx, void (*predicate)(void * ctx));
76 static void reset (pANTLR3_BASE_RECOGNIZER recognizer);
77 static void freeBR (pANTLR3_BASE_RECOGNIZER recognizer);
78 static void * getCurrentInputSymbol (pANTLR3_BASE_RECOGNIZER recognizer, pANTLR3_INT_STREAM istream);
79 static void * getMissingSymbol (pANTLR3_BASE_RECOGNIZER recognizer, pANTLR3_INT_STREAM istream, pANTLR3_EXCEPTION e,
80 ANTLR3_UINT32 expectedTokenType, pANTLR3_BITSET_LIST follow);
81 static ANTLR3_UINT32 getNumberOfSyntaxErrors (pANTLR3_BASE_RECOGNIZER recognizer);
83 ANTLR3_API pANTLR3_BASE_RECOGNIZER
84 antlr3BaseRecognizerNew(ANTLR3_UINT32 type, ANTLR3_UINT32 sizeHint, pANTLR3_RECOGNIZER_SHARED_STATE state)
86 pANTLR3_BASE_RECOGNIZER recognizer;
88 // Allocate memory for the structure
90 recognizer = (pANTLR3_BASE_RECOGNIZER) ANTLR3_MALLOC((size_t)sizeof(ANTLR3_BASE_RECOGNIZER));
92 if (recognizer == NULL)
100 // If we have been supplied with a pre-existing recognizer state
101 // then we just install it, otherwise we must create one from scratch
105 recognizer->state = (pANTLR3_RECOGNIZER_SHARED_STATE) ANTLR3_CALLOC(1, (size_t)sizeof(ANTLR3_RECOGNIZER_SHARED_STATE));
107 if (recognizer->state == NULL)
109 ANTLR3_FREE(recognizer);
113 // Initialize any new recognizer state
115 recognizer->state->errorRecovery = ANTLR3_FALSE;
116 recognizer->state->lastErrorIndex = -1;
117 recognizer->state->failed = ANTLR3_FALSE;
118 recognizer->state->errorCount = 0;
119 recognizer->state->backtracking = 0;
120 recognizer->state->following = NULL;
121 recognizer->state->ruleMemo = NULL;
122 recognizer->state->tokenNames = NULL;
123 recognizer->state->sizeHint = sizeHint;
124 recognizer->state->tokSource = NULL;
125 recognizer->state->tokFactory = NULL;
127 // Rather than check to see if we must initialize
128 // the stack every time we are asked for an new rewrite stream
129 // we just always create an empty stack and then just
130 // free it when the base recognizer is freed.
132 recognizer->state->rStreams = antlr3VectorNew(0); // We don't know the size.
134 if (recognizer->state->rStreams == NULL)
138 ANTLR3_FREE(recognizer->state);
139 ANTLR3_FREE(recognizer);
145 // Install the one we were given, and do not reset it here
146 // as it will either already have been initialized or will
147 // be in a state that needs to be preserved.
149 recognizer->state = state;
152 // Install the BR API
154 recognizer->alreadyParsedRule = alreadyParsedRule;
155 recognizer->beginResync = beginResync;
156 recognizer->combineFollows = combineFollows;
157 recognizer->beginBacktrack = beginBacktrack;
158 recognizer->endBacktrack = endBacktrack;
159 recognizer->computeCSRuleFollow = computeCSRuleFollow;
160 recognizer->computeErrorRecoverySet = computeErrorRecoverySet;
161 recognizer->consumeUntil = consumeUntil;
162 recognizer->consumeUntilSet = consumeUntilSet;
163 recognizer->displayRecognitionError = displayRecognitionError;
164 recognizer->endResync = endResync;
165 recognizer->exConstruct = antlr3MTExceptionNew;
166 recognizer->getRuleInvocationStack = getRuleInvocationStack;
167 recognizer->getRuleInvocationStackNamed = getRuleInvocationStackNamed;
168 recognizer->getRuleMemoization = getRuleMemoization;
169 recognizer->match = match;
170 recognizer->matchAny = matchAny;
171 recognizer->memoize = memoize;
172 recognizer->mismatch = mismatch;
173 recognizer->mismatchIsUnwantedToken = mismatchIsUnwantedToken;
174 recognizer->mismatchIsMissingToken = mismatchIsMissingToken;
175 recognizer->recover = recover;
176 recognizer->recoverFromMismatchedElement= recoverFromMismatchedElement;
177 recognizer->recoverFromMismatchedSet = recoverFromMismatchedSet;
178 recognizer->recoverFromMismatchedToken = recoverFromMismatchedToken;
179 recognizer->getNumberOfSyntaxErrors = getNumberOfSyntaxErrors;
180 recognizer->reportError = reportError;
181 recognizer->reset = reset;
182 recognizer->synpred = synpred;
183 recognizer->toStrings = toStrings;
184 recognizer->getCurrentInputSymbol = getCurrentInputSymbol;
185 recognizer->getMissingSymbol = getMissingSymbol;
186 recognizer->debugger = NULL;
188 recognizer->free = freeBR;
190 /* Initialize variables
192 recognizer->type = type;
198 freeBR (pANTLR3_BASE_RECOGNIZER recognizer)
200 pANTLR3_EXCEPTION thisE;
202 // Did we have a state allocated?
204 if (recognizer->state != NULL)
206 // Free any rule memoization we set up
208 if (recognizer->state->ruleMemo != NULL)
210 recognizer->state->ruleMemo->free(recognizer->state->ruleMemo);
211 recognizer->state->ruleMemo = NULL;
214 // Free any exception space we have left around
216 thisE = recognizer->state->exception;
219 thisE->freeEx(thisE);
222 // Free any rewrite streams we have allocated
224 if (recognizer->state->rStreams != NULL)
226 recognizer->state->rStreams->free(recognizer->state->rStreams);
229 // Free up any token factory we created (error recovery for instance)
231 if (recognizer->state->tokFactory != NULL)
233 recognizer->state->tokFactory->close(recognizer->state->tokFactory);
235 // Free the shared state memory
237 ANTLR3_FREE(recognizer->state);
240 // Free the actual recognizer space
242 ANTLR3_FREE(recognizer);
246 * Creates a new Mismatched Token Exception and inserts in the recognizer
250 * Context pointer for this recognizer
254 antlr3MTExceptionNew(pANTLR3_BASE_RECOGNIZER recognizer)
256 /* Create a basic recognition exception structure
258 antlr3RecognitionExceptionNew(recognizer);
260 /* Now update it to indicate this is a Mismatched token exception
262 recognizer->state->exception->name = ANTLR3_MISMATCHED_EX_NAME;
263 recognizer->state->exception->type = ANTLR3_MISMATCHED_TOKEN_EXCEPTION;
269 antlr3RecognitionExceptionNew(pANTLR3_BASE_RECOGNIZER recognizer)
271 pANTLR3_EXCEPTION ex;
273 pANTLR3_PARSER parser;
274 pANTLR3_TREE_PARSER tparser;
276 pANTLR3_INPUT_STREAM ins;
277 pANTLR3_INT_STREAM is;
278 pANTLR3_COMMON_TOKEN_STREAM cts;
279 pANTLR3_TREE_NODE_STREAM tns;
289 switch (recognizer->type)
291 case ANTLR3_TYPE_LEXER:
293 lexer = (pANTLR3_LEXER) (recognizer->super);
299 case ANTLR3_TYPE_PARSER:
301 parser = (pANTLR3_PARSER) (recognizer->super);
302 cts = (pANTLR3_COMMON_TOKEN_STREAM)(parser->tstream->super);
303 is = parser->tstream->istream;
307 case ANTLR3_TYPE_TREE_PARSER:
309 tparser = (pANTLR3_TREE_PARSER) (recognizer->super);
310 tns = tparser->ctnstream->tnstream;
317 ANTLR3_FPRINTF(stderr, "Base recognizer function antlr3RecognitionExceptionNew called by unknown parser type - provide override for this function\n");
323 /* Create a basic exception structure
325 ex = antlr3ExceptionNew(ANTLR3_RECOGNITION_EXCEPTION,
326 (void *)ANTLR3_RECOGNITION_EX_NAME,
330 /* Rest of information depends on the base type of the
333 switch (is->type & ANTLR3_INPUT_MASK)
335 case ANTLR3_CHARSTREAM:
337 ex->c = is->_LA (is, 1); /* Current input character */
338 ex->line = ins->getLine (ins); /* Line number comes from stream */
339 ex->charPositionInLine = ins->getCharPositionInLine (ins); /* Line offset also comes from the stream */
340 ex->index = is->index (is);
341 ex->streamName = ins->fileName;
342 ex->message = "Unexpected character";
345 case ANTLR3_TOKENSTREAM:
347 ex->token = cts->tstream->_LT (cts->tstream, 1); /* Current input token */
348 ex->line = ((pANTLR3_COMMON_TOKEN)(ex->token))->getLine (ex->token);
349 ex->charPositionInLine = ((pANTLR3_COMMON_TOKEN)(ex->token))->getCharPositionInLine (ex->token);
350 ex->index = cts->tstream->istream->index (cts->tstream->istream);
351 if (((pANTLR3_COMMON_TOKEN)(ex->token))->type == ANTLR3_TOKEN_EOF)
353 ex->streamName = NULL;
357 ex->streamName = ((pANTLR3_COMMON_TOKEN)(ex->token))->input->fileName;
359 ex->message = "Unexpected token";
362 case ANTLR3_COMMONTREENODE:
364 ex->token = tns->_LT (tns, 1); /* Current input tree node */
365 ex->line = ((pANTLR3_BASE_TREE)(ex->token))->getLine (ex->token);
366 ex->charPositionInLine = ((pANTLR3_BASE_TREE)(ex->token))->getCharPositionInLine (ex->token);
367 ex->index = tns->istream->index (tns->istream);
369 // Are you ready for this? Deep breath now...
372 pANTLR3_COMMON_TREE tnode;
374 tnode = ((pANTLR3_COMMON_TREE)(((pANTLR3_BASE_TREE)(ex->token))->super));
376 if (tnode->token == NULL)
378 ex->streamName = ((pANTLR3_BASE_TREE)(ex->token))->strFactory->newStr(((pANTLR3_BASE_TREE)(ex->token))->strFactory, (pANTLR3_UINT8)"-unknown source-");
382 if (tnode->token->input == NULL)
384 ex->streamName = NULL;
388 ex->streamName = tnode->token->input->fileName;
391 ex->message = "Unexpected node";
397 ex->nextException = recognizer->state->exception; /* So we don't leak the memory */
398 recognizer->state->exception = ex;
399 recognizer->state->error = ANTLR3_TRUE; /* Exception is outstanding */
405 /// Match current input symbol against ttype. Upon error, do one token
406 /// insertion or deletion if possible.
407 /// To turn off single token insertion or deletion error
408 /// recovery, override mismatchRecover() and have it call
409 /// plain mismatch(), which does not recover. Then any error
410 /// in a rule will cause an exception and immediate exit from
411 /// rule. Rule would recover by resynchronizing to the set of
412 /// symbols that can follow rule ref.
415 match( pANTLR3_BASE_RECOGNIZER recognizer,
416 ANTLR3_UINT32 ttype, pANTLR3_BITSET_LIST follow)
418 pANTLR3_PARSER parser;
419 pANTLR3_TREE_PARSER tparser;
420 pANTLR3_INT_STREAM is;
421 void * matchedSymbol;
423 switch (recognizer->type)
425 case ANTLR3_TYPE_PARSER:
427 parser = (pANTLR3_PARSER) (recognizer->super);
429 is = parser->tstream->istream;
433 case ANTLR3_TYPE_TREE_PARSER:
435 tparser = (pANTLR3_TREE_PARSER) (recognizer->super);
437 is = tparser->ctnstream->tnstream->istream;
443 ANTLR3_FPRINTF(stderr, "Base recognizer function 'match' called by unknown parser type - provide override for this function\n");
449 // Pick up the current input token/node for assignment to labels
451 matchedSymbol = recognizer->getCurrentInputSymbol(recognizer, is);
453 if (is->_LA(is, 1) == ttype)
455 // The token was the one we were told to expect
457 is->consume(is); // Consume that token from the stream
458 recognizer->state->errorRecovery = ANTLR3_FALSE; // Not in error recovery now (if we were)
459 recognizer->state->failed = ANTLR3_FALSE; // The match was a success
460 return matchedSymbol; // We are done
463 // We did not find the expected token type, if we are backtracking then
464 // we just set the failed flag and return.
466 if (recognizer->state->backtracking > 0)
468 // Backtracking is going on
470 recognizer->state->failed = ANTLR3_TRUE;
471 return matchedSymbol;
474 // We did not find the expected token and there is no backtracking
475 // going on, so we mismatch, which creates an exception in the recognizer exception
478 matchedSymbol = recognizer->recoverFromMismatchedToken(recognizer, ttype, follow);
479 return matchedSymbol;
482 /// Consumes the next token, whatever it is, and resets the recognizer state
483 /// so that it is not in error.
485 /// \param recognizer
486 /// Recognizer context pointer
489 matchAny(pANTLR3_BASE_RECOGNIZER recognizer)
491 pANTLR3_PARSER parser;
492 pANTLR3_TREE_PARSER tparser;
493 pANTLR3_INT_STREAM is;
495 switch (recognizer->type)
497 case ANTLR3_TYPE_PARSER:
499 parser = (pANTLR3_PARSER) (recognizer->super);
501 is = parser->tstream->istream;
505 case ANTLR3_TYPE_TREE_PARSER:
507 tparser = (pANTLR3_TREE_PARSER) (recognizer->super);
509 is = tparser->ctnstream->tnstream->istream;
515 ANTLR3_FPRINTF(stderr, "Base recognizer function 'matchAny' called by unknown parser type - provide override for this function\n");
520 recognizer->state->errorRecovery = ANTLR3_FALSE;
521 recognizer->state->failed = ANTLR3_FALSE;
528 static ANTLR3_BOOLEAN
529 mismatchIsUnwantedToken(pANTLR3_BASE_RECOGNIZER recognizer, pANTLR3_INT_STREAM is, ANTLR3_UINT32 ttype)
533 nextt = is->_LA(is, 2);
537 if (recognizer->state->exception != NULL)
539 recognizer->state->exception->expecting = nextt;
541 return ANTLR3_TRUE; // This token is unknown, but the next one is the one we wanted
545 return ANTLR3_FALSE; // Neither this token, nor the one following is the one we wanted
551 static ANTLR3_BOOLEAN
552 mismatchIsMissingToken(pANTLR3_BASE_RECOGNIZER recognizer, pANTLR3_INT_STREAM is, pANTLR3_BITSET_LIST follow)
554 ANTLR3_BOOLEAN retcode;
555 pANTLR3_BITSET followClone;
556 pANTLR3_BITSET viableTokensFollowingThisRule;
560 // There is no information about the tokens that can follow the last one
561 // hence we must say that the current one we found is not a member of the
562 // follow set and does not indicate a missing token. We will just consume this
563 // single token and see if the parser works it out from there.
569 viableTokensFollowingThisRule = NULL;
571 // The C bitset maps are laid down at compile time by the
572 // C code generation. Hence we cannot remove things from them
573 // and so on. So, in order to remove EOR (if we need to) then
574 // we clone the static bitset.
576 followClone = antlr3BitsetLoad(follow);
577 if (followClone == NULL)
582 // Compute what can follow this grammar reference
584 if (followClone->isMember(followClone, ANTLR3_EOR_TOKEN_TYPE))
586 // EOR can follow, but if we are not the start symbol, we
587 // need to remove it.
589 if (recognizer->state->following->vector->count >= 0)
591 followClone->remove(followClone, ANTLR3_EOR_TOKEN_TYPE);
594 // Now compute the visiable tokens that can follow this rule, according to context
595 // and make them part of the follow set.
597 viableTokensFollowingThisRule = recognizer->computeCSRuleFollow(recognizer);
598 followClone->borInPlace(followClone, viableTokensFollowingThisRule);
601 /// if current token is consistent with what could come after set
602 /// then we know we're missing a token; error recovery is free to
603 /// "insert" the missing token
605 /// BitSet cannot handle negative numbers like -1 (EOF) so I leave EOR
606 /// in follow set to indicate that the fall of the start symbol is
607 /// in the set (EOF can follow).
609 if ( followClone->isMember(followClone, is->_LA(is, 1))
610 || followClone->isMember(followClone, ANTLR3_EOR_TOKEN_TYPE)
613 retcode = ANTLR3_TRUE;
617 retcode = ANTLR3_FALSE;
620 if (viableTokensFollowingThisRule != NULL)
622 viableTokensFollowingThisRule->free(viableTokensFollowingThisRule);
624 if (followClone != NULL)
626 followClone->free(followClone);
633 /// Factor out what to do upon token mismatch so tree parsers can behave
634 /// differently. Override and call mismatchRecover(input, ttype, follow)
635 /// to get single token insertion and deletion. Use this to turn off
636 /// single token insertion and deletion. Override mismatchRecover
637 /// to call this instead.
639 /// \remark mismatch only works for parsers and must be overridden for anything else.
642 mismatch(pANTLR3_BASE_RECOGNIZER recognizer, ANTLR3_UINT32 ttype, pANTLR3_BITSET_LIST follow)
644 pANTLR3_PARSER parser;
645 pANTLR3_TREE_PARSER tparser;
646 pANTLR3_INT_STREAM is;
648 // Install a mismatched token exception in the exception stack
650 antlr3MTExceptionNew(recognizer);
651 recognizer->state->exception->expecting = ttype;
653 switch (recognizer->type)
655 case ANTLR3_TYPE_PARSER:
657 parser = (pANTLR3_PARSER) (recognizer->super);
659 is = parser->tstream->istream;
665 ANTLR3_FPRINTF(stderr, "Base recognizer function 'mismatch' called by unknown parser type - provide override for this function\n");
671 if (mismatchIsUnwantedToken(recognizer, is, ttype))
673 // Create a basic recognition exception structure
675 antlr3RecognitionExceptionNew(recognizer);
677 // Now update it to indicate this is an unwanted token exception
679 recognizer->state->exception->name = ANTLR3_UNWANTED_TOKEN_EXCEPTION_NAME;
680 recognizer->state->exception->type = ANTLR3_UNWANTED_TOKEN_EXCEPTION;
685 if (mismatchIsMissingToken(recognizer, is, follow))
687 // Create a basic recognition exception structure
689 antlr3RecognitionExceptionNew(recognizer);
691 // Now update it to indicate this is an unwanted token exception
693 recognizer->state->exception->name = ANTLR3_MISSING_TOKEN_EXCEPTION_NAME;
694 recognizer->state->exception->type = ANTLR3_MISSING_TOKEN_EXCEPTION;
699 // Just a mismatched token is all we can dtermine
701 antlr3MTExceptionNew(recognizer);
705 /// Report a recognition problem.
707 /// This method sets errorRecovery to indicate the parser is recovering
708 /// not parsing. Once in recovery mode, no errors are generated.
709 /// To get out of recovery mode, the parser must successfully match
710 /// a token (after a resync). So it will go:
713 /// 2. enter recovery mode, report error
714 /// 3. consume until token found in resynch set
715 /// 4. try to resume parsing
716 /// 5. next match() will reset errorRecovery mode
718 /// If you override, make sure to update errorCount if you care about that.
721 reportError (pANTLR3_BASE_RECOGNIZER recognizer)
723 if (recognizer->state->errorRecovery == ANTLR3_TRUE)
725 // Already in error recovery so don't display another error while doing so
730 // Signal we are in error recovery now
732 recognizer->state->errorRecovery = ANTLR3_TRUE;
734 // Indicate this recognizer had an error while processing.
736 recognizer->state->errorCount++;
738 // Call the error display routine
740 recognizer->displayRecognitionError(recognizer, recognizer->state->tokenNames);
744 beginBacktrack (pANTLR3_BASE_RECOGNIZER recognizer, ANTLR3_UINT32 level)
746 if (recognizer->debugger != NULL)
748 recognizer->debugger->beginBacktrack(recognizer->debugger, level);
753 endBacktrack (pANTLR3_BASE_RECOGNIZER recognizer, ANTLR3_UINT32 level, ANTLR3_BOOLEAN successful)
755 if (recognizer->debugger != NULL)
757 recognizer->debugger->endBacktrack(recognizer->debugger, level, successful);
761 beginResync (pANTLR3_BASE_RECOGNIZER recognizer)
763 if (recognizer->debugger != NULL)
765 recognizer->debugger->beginResync(recognizer->debugger);
770 endResync (pANTLR3_BASE_RECOGNIZER recognizer)
772 if (recognizer->debugger != NULL)
774 recognizer->debugger->endResync(recognizer->debugger);
778 /// Compute the error recovery set for the current rule.
779 /// Documentation below is from the Java implementation.
781 /// During rule invocation, the parser pushes the set of tokens that can
782 /// follow that rule reference on the stack; this amounts to
783 /// computing FIRST of what follows the rule reference in the
784 /// enclosing rule. This local follow set only includes tokens
785 /// from within the rule; i.e., the FIRST computation done by
786 /// ANTLR stops at the end of a rule.
790 /// When you find a "no viable alt exception", the input is not
791 /// consistent with any of the alternatives for rule r. The best
792 /// thing to do is to consume tokens until you see something that
793 /// can legally follow a call to r *or* any rule that called r.
794 /// You don't want the exact set of viable next tokens because the
795 /// input might just be missing a token--you might consume the
796 /// rest of the input looking for one of the missing tokens.
798 /// Consider grammar:
808 /// At each rule invocation, the set of tokens that could follow
809 /// that rule is pushed on a stack. Here are the various "local"
812 /// FOLLOW(b1_in_a) = FIRST(']') = ']'
813 /// FOLLOW(b2_in_a) = FIRST(')') = ')'
814 /// FOLLOW(c_in_b) = FIRST('^') = '^'
816 /// Upon erroneous input "[]", the call chain is
820 /// and, hence, the follow context stack is:
822 /// depth local follow set after call to rule
823 /// 0 <EOF> a (from main())
827 /// Notice that ')' is not included, because b would have to have
828 /// been called from a different context in rule a for ')' to be
831 /// For error recovery, we cannot consider FOLLOW(c)
832 /// (context-sensitive or otherwise). We need the combined set of
833 /// all context-sensitive FOLLOW sets--the set of all tokens that
834 /// could follow any reference in the call chain. We need to
835 /// resync to one of those tokens. Note that FOLLOW(c)='^' and if
836 /// we resync'd to that token, we'd consume until EOF. We need to
837 /// sync to context-sensitive FOLLOWs for a, b, and c: {']','^'}.
838 /// In this case, for input "[]", LA(1) is in this set so we would
839 /// not consume anything and after printing an error rule c would
840 /// return normally. It would not find the required '^' though.
841 /// At this point, it gets a mismatched token error and throws an
842 /// exception (since LA(1) is not in the viable following token
843 /// set). The rule exception handler tries to recover, but finds
844 /// the same recovery set and doesn't consume anything. Rule b
845 /// exits normally returning to rule a. Now it finds the ']' (and
846 /// with the successful match exits errorRecovery mode).
848 /// So, you can see that the parser walks up call chain looking
849 /// for the token that was a member of the recovery set.
851 /// Errors are not generated in errorRecovery mode.
853 /// ANTLR's error recovery mechanism is based upon original ideas:
855 /// "Algorithms + Data Structures = Programs" by Niklaus Wirth
859 /// "A note on error recovery in recursive descent parsers":
860 /// http://portal.acm.org/citation.cfm?id=947902.947905
862 /// Later, Josef Grosch had some good ideas:
864 /// "Efficient and Comfortable Error Recovery in Recursive Descent
866 /// ftp://www.cocolab.com/products/cocktail/doca4.ps/ell.ps.zip
868 /// Like Grosch I implemented local FOLLOW sets that are combined
869 /// at run-time upon error to avoid overhead during parsing.
871 static pANTLR3_BITSET
872 computeErrorRecoverySet (pANTLR3_BASE_RECOGNIZER recognizer)
874 return recognizer->combineFollows(recognizer, ANTLR3_FALSE);
877 /// Compute the context-sensitive FOLLOW set for current rule.
878 /// Documentation below is from the Java runtime.
880 /// This is the set of token types that can follow a specific rule
881 /// reference given a specific call chain. You get the set of
882 /// viable tokens that can possibly come next (look ahead depth 1)
883 /// given the current call chain. Contrast this with the
884 /// definition of plain FOLLOW for rule r:
886 /// FOLLOW(r)={x | S=>*alpha r beta in G and x in FIRST(beta)}
888 /// where x in T* and alpha, beta in V*; T is set of terminals and
889 /// V is the set of terminals and non terminals. In other words,
890 /// FOLLOW(r) is the set of all tokens that can possibly follow
891 /// references to r in///any* sentential form (context). At
892 /// runtime, however, we know precisely which context applies as
893 /// we have the call chain. We may compute the exact (rather
894 /// than covering superset) set of following tokens.
896 /// For example, consider grammar:
898 /// stat : ID '=' expr ';' // FOLLOW(stat)=={EOF}
899 /// | "return" expr '.'
901 /// expr : atom ('+' atom)* ; // FOLLOW(expr)=={';','.',')'}
902 /// atom : INT // FOLLOW(atom)=={'+',')',';','.'}
906 /// The FOLLOW sets are all inclusive whereas context-sensitive
907 /// FOLLOW sets are precisely what could follow a rule reference.
908 /// For input input "i=(3);", here is the derivation:
910 /// stat => ID '=' expr ';'
911 /// => ID '=' atom ('+' atom)* ';'
912 /// => ID '=' '(' expr ')' ('+' atom)* ';'
913 /// => ID '=' '(' atom ')' ('+' atom)* ';'
914 /// => ID '=' '(' INT ')' ('+' atom)* ';'
915 /// => ID '=' '(' INT ')' ';'
917 /// At the "3" token, you'd have a call chain of
919 /// stat -> expr -> atom -> expr -> atom
921 /// What can follow that specific nested ref to atom? Exactly ')'
922 /// as you can see by looking at the derivation of this specific
923 /// input. Contrast this with the FOLLOW(atom)={'+',')',';','.'}.
925 /// You want the exact viable token set when recovering from a
926 /// token mismatch. Upon token mismatch, if LA(1) is member of
927 /// the viable next token set, then you know there is most likely
928 /// a missing token in the input stream. "Insert" one by just not
929 /// throwing an exception.
931 static pANTLR3_BITSET
932 computeCSRuleFollow (pANTLR3_BASE_RECOGNIZER recognizer)
934 return recognizer->combineFollows(recognizer, ANTLR3_FALSE);
937 /// Compute the current followset for the input stream.
939 static pANTLR3_BITSET
940 combineFollows (pANTLR3_BASE_RECOGNIZER recognizer, ANTLR3_BOOLEAN exact)
942 pANTLR3_BITSET followSet;
943 pANTLR3_BITSET localFollowSet;
947 top = recognizer->state->following->size(recognizer->state->following);
949 followSet = antlr3BitsetNew(0);
950 localFollowSet = NULL;
952 for (i = top; i>0; i--)
954 localFollowSet = antlr3BitsetLoad((pANTLR3_BITSET_LIST) recognizer->state->following->get(recognizer->state->following, i-1));
956 if (localFollowSet != NULL)
958 followSet->borInPlace(followSet, localFollowSet);
960 if (exact == ANTLR3_TRUE)
962 if (localFollowSet->isMember(localFollowSet, ANTLR3_EOR_TOKEN_TYPE) == ANTLR3_FALSE)
964 // Only leave EOR in the set if at top (start rule); this lets us know
965 // if we have to include the follow(start rule); I.E., EOF
969 followSet->remove(followSet, ANTLR3_EOR_TOKEN_TYPE);
974 break; // Cannot see End Of Rule from here, just drop out
977 localFollowSet->free(localFollowSet);
978 localFollowSet = NULL;
982 if (localFollowSet != NULL)
984 localFollowSet->free(localFollowSet);
989 /// Standard/Example error display method.
990 /// No generic error message display funciton coudl possibly do everything correctly
991 /// for all possible parsers. Hence you are provided with this example routine, which
992 /// you should override in your parser/tree parser to do as you will.
994 /// Here we depart somewhat from the Java runtime as that has now split up a lot
995 /// of the error display routines into spearate units. However, ther is little advantage
996 /// to this in the C version as you will probably implement all such routines as a
997 /// separate translation unit, rather than install them all as pointers to functions
998 /// in the base recognizer.
1001 displayRecognitionError (pANTLR3_BASE_RECOGNIZER recognizer, pANTLR3_UINT8 * tokenNames)
1003 pANTLR3_PARSER parser;
1004 pANTLR3_TREE_PARSER tparser;
1005 pANTLR3_INT_STREAM is;
1006 pANTLR3_STRING ttext;
1007 pANTLR3_STRING ftext;
1008 pANTLR3_EXCEPTION ex;
1009 pANTLR3_COMMON_TOKEN theToken;
1010 pANTLR3_BASE_TREE theBaseTree;
1011 pANTLR3_COMMON_TREE theCommonTree;
1013 // Retrieve some info for easy reading.
1015 ex = recognizer->state->exception;
1018 // See if there is a 'filename' we can use
1020 if (ex->streamName == NULL)
1022 if (((pANTLR3_COMMON_TOKEN)(ex->token))->type == ANTLR3_TOKEN_EOF)
1024 ANTLR3_FPRINTF(stderr, "-end of input-(");
1028 ANTLR3_FPRINTF(stderr, "-unknown source-(");
1033 ftext = ex->streamName->to8(ex->streamName);
1034 ANTLR3_FPRINTF(stderr, "%s(", ftext->chars);
1037 // Next comes the line number
1040 ANTLR3_FPRINTF(stderr, "%d) ", recognizer->state->exception->line);
1041 ANTLR3_FPRINTF(stderr, " : error %d : %s",
1042 recognizer->state->exception->type,
1043 (pANTLR3_UINT8) (recognizer->state->exception->message));
1046 // How we determine the next piece is dependent on which thing raised the
1049 switch (recognizer->type)
1051 case ANTLR3_TYPE_PARSER:
1053 // Prepare the knowledge we know we have
1055 parser = (pANTLR3_PARSER) (recognizer->super);
1057 is = parser->tstream->istream;
1058 theToken = (pANTLR3_COMMON_TOKEN)(recognizer->state->exception->token);
1059 ttext = theToken->toString(theToken);
1061 ANTLR3_FPRINTF(stderr, ", at offset %d", recognizer->state->exception->charPositionInLine);
1062 if (theToken != NULL)
1064 if (theToken->type == ANTLR3_TOKEN_EOF)
1066 ANTLR3_FPRINTF(stderr, ", at <EOF>");
1070 // Guard against null text in a token
1072 ANTLR3_FPRINTF(stderr, "\n near %s\n ", ttext == NULL ? (pANTLR3_UINT8)"<no text for the token>" : ttext->chars);
1077 case ANTLR3_TYPE_TREE_PARSER:
1079 tparser = (pANTLR3_TREE_PARSER) (recognizer->super);
1081 is = tparser->ctnstream->tnstream->istream;
1082 theBaseTree = (pANTLR3_BASE_TREE)(recognizer->state->exception->token);
1083 ttext = theBaseTree->toStringTree(theBaseTree);
1085 if (theBaseTree != NULL)
1087 theCommonTree = (pANTLR3_COMMON_TREE) theBaseTree->super;
1089 if (theCommonTree != NULL)
1091 theToken = (pANTLR3_COMMON_TOKEN) theBaseTree->getToken(theBaseTree);
1093 ANTLR3_FPRINTF(stderr, ", at offset %d", theBaseTree->getCharPositionInLine(theBaseTree));
1094 ANTLR3_FPRINTF(stderr, ", near %s", ttext->chars);
1100 ANTLR3_FPRINTF(stderr, "Base recognizer function displayRecognitionError called by unknown parser type - provide override for this function\n");
1105 // Although this function should generally be provided by the implementation, this one
1106 // should be as helpful as possible for grammar developers and serve as an example
1107 // of what you can do with each exception type. In general, when you make up your
1108 // 'real' handler, you should debug the routine with all possible errors you expect
1109 // which will then let you be as specific as possible about all circumstances.
1111 // Note that in the general case, errors thrown by tree parsers indicate a problem
1112 // with the output of the parser or with the tree grammar itself. The job of the parser
1113 // is to produce a perfect (in traversal terms) syntactically correct tree, so errors
1114 // at that stage should really be semantic errors that your own code determines and handles
1115 // in whatever way is appropriate.
1119 case ANTLR3_UNWANTED_TOKEN_EXCEPTION:
1121 // Indicates that the recognizer was fed a token which seesm to be
1122 // spurious input. We can detect this when the token that follows
1123 // this unwanted token would normally be part of the syntactically
1124 // correct stream. Then we can see that the token we are looking at
1125 // is just something that should not be there and throw this exception.
1127 if (tokenNames == NULL)
1129 ANTLR3_FPRINTF(stderr, " : Extraneous input...");
1133 if (ex->expecting == ANTLR3_TOKEN_EOF)
1135 ANTLR3_FPRINTF(stderr, " : Extraneous input - expected <EOF>\n");
1139 ANTLR3_FPRINTF(stderr, " : Extraneous input - expected %s ...\n", tokenNames[ex->expecting]);
1144 case ANTLR3_MISSING_TOKEN_EXCEPTION:
1146 // Indicates that the recognizer detected that the token we just
1147 // hit would be valid syntactically if preceeded by a particular
1148 // token. Perhaps a missing ';' at line end or a missing ',' in an
1149 // expression list, and such like.
1151 if (tokenNames == NULL)
1153 ANTLR3_FPRINTF(stderr, " : Missing token (%d)...\n", ex->expecting);
1157 if (ex->expecting == ANTLR3_TOKEN_EOF)
1159 ANTLR3_FPRINTF(stderr, " : Missing <EOF>\n");
1163 ANTLR3_FPRINTF(stderr, " : Missing %s \n", tokenNames[ex->expecting]);
1168 case ANTLR3_RECOGNITION_EXCEPTION:
1170 // Indicates that the recognizer received a token
1171 // in the input that was not predicted. This is the basic exception type
1172 // from which all others are derived. So we assume it was a syntax error.
1173 // You may get this if there are not more tokens and more are needed
1174 // to complete a parse for instance.
1176 ANTLR3_FPRINTF(stderr, " : syntax error...\n");
1179 case ANTLR3_MISMATCHED_TOKEN_EXCEPTION:
1181 // We were expecting to see one thing and got another. This is the
1182 // most common error if we coudl not detect a missing or unwanted token.
1183 // Here you can spend your efforts to
1184 // derive more useful error messages based on the expected
1185 // token set and the last token and so on. The error following
1186 // bitmaps do a good job of reducing the set that we were looking
1187 // for down to something small. Knowing what you are parsing may be
1188 // able to allow you to be even more specific about an error.
1190 if (tokenNames == NULL)
1192 ANTLR3_FPRINTF(stderr, " : syntax error...\n");
1196 if (ex->expecting == ANTLR3_TOKEN_EOF)
1198 ANTLR3_FPRINTF(stderr, " : expected <EOF>\n");
1202 ANTLR3_FPRINTF(stderr, " : expected %s ...\n", tokenNames[ex->expecting]);
1207 case ANTLR3_NO_VIABLE_ALT_EXCEPTION:
1209 // We could not pick any alt decision from the input given
1210 // so god knows what happened - however when you examine your grammar,
1211 // you should. It means that at the point where the current token occurred
1212 // that the DFA indicates nowhere to go from here.
1214 ANTLR3_FPRINTF(stderr, " : cannot match to any predicted input...\n");
1218 case ANTLR3_MISMATCHED_SET_EXCEPTION:
1221 ANTLR3_UINT32 count;
1224 ANTLR3_UINT32 numbits;
1225 pANTLR3_BITSET errBits;
1227 // This means we were able to deal with one of a set of
1228 // possible tokens at this point, but we did not see any
1229 // member of that set.
1231 ANTLR3_FPRINTF(stderr, " : unexpected input...\n expected one of : ");
1233 // What tokens could we have accepted at this point in the
1237 errBits = antlr3BitsetLoad (ex->expectingSet);
1238 numbits = errBits->numBits (errBits);
1239 size = errBits->size (errBits);
1243 // However many tokens we could have dealt with here, it is usually
1244 // not useful to print ALL of the set here. I arbitrarily chose 8
1245 // here, but you should do whatever makes sense for you of course.
1246 // No token number 0, so look for bit 1 and on.
1248 for (bit = 1; bit < numbits && count < 8 && count < size; bit++)
1250 // TODO: This doesn;t look right - should be asking if the bit is set!!
1252 if (tokenNames[bit])
1254 ANTLR3_FPRINTF(stderr, "%s%s", count > 0 ? ", " : "", tokenNames[bit]);
1258 ANTLR3_FPRINTF(stderr, "\n");
1262 ANTLR3_FPRINTF(stderr, "Actually dude, we didn't seem to be expecting anything here, or at least\n");
1263 ANTLR3_FPRINTF(stderr, "I could not work out what I was expecting, like so many of us these days!\n");
1268 case ANTLR3_EARLY_EXIT_EXCEPTION:
1270 // We entered a loop requiring a number of token sequences
1271 // but found a token that ended that sequence earlier than
1272 // we should have done.
1274 ANTLR3_FPRINTF(stderr, " : missing elements...\n");
1279 // We don't handle any other exceptions here, but you can
1280 // if you wish. If we get an exception that hits this point
1281 // then we are just going to report what we know about the
1284 ANTLR3_FPRINTF(stderr, " : syntax not recognized...\n");
1288 // Here you have the token that was in error which if this is
1289 // the standard implementation will tell you the line and offset
1290 // and also record the address of the start of the line in the
1291 // input stream. You could therefore print the source line and so on.
1292 // Generally though, I would expect that your lexer/parser will keep
1293 // its own map of lines and source pointers or whatever as there
1294 // are a lot of specific things you need to know about the input
1295 // to do something like that.
1296 // Here is where you do it though :-).
1300 /// Return how many syntax errors were detected by this recognizer
1302 static ANTLR3_UINT32
1303 getNumberOfSyntaxErrors(pANTLR3_BASE_RECOGNIZER recognizer)
1305 return recognizer->state->errorCount;
1308 /// Recover from an error found on the input stream. Mostly this is
1309 /// NoViableAlt exceptions, but could be a mismatched token that
1310 /// the match() routine could not recover from.
1313 recover (pANTLR3_BASE_RECOGNIZER recognizer)
1315 // Used to compute the follow set of tokens
1317 pANTLR3_BITSET followSet;
1318 pANTLR3_PARSER parser;
1319 pANTLR3_TREE_PARSER tparser;
1320 pANTLR3_INT_STREAM is;
1322 switch (recognizer->type)
1324 case ANTLR3_TYPE_PARSER:
1326 parser = (pANTLR3_PARSER) (recognizer->super);
1328 is = parser->tstream->istream;
1332 case ANTLR3_TYPE_TREE_PARSER:
1334 tparser = (pANTLR3_TREE_PARSER) (recognizer->super);
1336 is = tparser->ctnstream->tnstream->istream;
1342 ANTLR3_FPRINTF(stderr, "Base recognizer function recover called by unknown parser type - provide override for this function\n");
1348 // Are we about to repeat the same error?
1350 if (recognizer->state->lastErrorIndex == is->index(is))
1352 // The last error was at the same token index point. This must be a case
1353 // where LT(1) is in the recovery token set so nothing is
1354 // consumed. Consume a single token so at least to prevent
1355 // an infinite loop; this is a failsafe.
1360 // Record error index position
1362 recognizer->state->lastErrorIndex = is->index(is);
1364 // Work out the follows set for error recovery
1366 followSet = recognizer->computeErrorRecoverySet(recognizer);
1368 // Call resync hook (for debuggers and so on)
1370 recognizer->beginResync(recognizer);
1372 // Consume tokens until we have resynced to something in the follows set
1374 recognizer->consumeUntilSet(recognizer, followSet);
1378 recognizer->endResync(recognizer);
1380 // Destroy the temporary bitset we produced.
1382 followSet->free(followSet);
1384 // Reset the inError flag so we don't re-report the exception
1386 recognizer->state->error = ANTLR3_FALSE;
1387 recognizer->state->failed = ANTLR3_FALSE;
1391 /// Attempt to recover from a single missing or extra token.
1395 /// LA(1) is not what we are looking for. If LA(2) has the right token,
1396 /// however, then assume LA(1) is some extra spurious token. Delete it
1397 /// and LA(2) as if we were doing a normal match(), which advances the
1402 /// If current token is consistent with what could come after
1403 /// ttype then it is ok to "insert" the missing token, else throw
1404 /// exception For example, Input "i=(3;" is clearly missing the
1405 /// ')'. When the parser returns from the nested call to expr, it
1406 /// will have call chain:
1408 /// stat -> expr -> atom
1410 /// and it will be trying to match the ')' at this point in the
1413 /// => ID '=' '(' INT ')' ('+' atom)* ';'
1415 /// match() will see that ';' doesn't match ')' and report a
1416 /// mismatched token error. To recover, it sees that LA(1)==';'
1417 /// is in the set of tokens that can follow the ')' token
1418 /// reference in rule atom. It can assume that you forgot the ')'.
1420 /// The exception that was passed in, in the java implementation is
1421 /// sorted in the recognizer exception stack in the C version. To 'throw' it we set the
1422 /// error flag and rules cascade back when this is set.
1425 recoverFromMismatchedToken (pANTLR3_BASE_RECOGNIZER recognizer, ANTLR3_UINT32 ttype, pANTLR3_BITSET_LIST follow)
1427 pANTLR3_PARSER parser;
1428 pANTLR3_TREE_PARSER tparser;
1429 pANTLR3_INT_STREAM is;
1430 void * matchedSymbol;
1432 // Invoke the debugger event if there is a debugger listening to us
1434 if (recognizer->debugger != NULL)
1436 recognizer->debugger->recognitionException(recognizer->debugger, recognizer->state->exception);
1439 switch (recognizer->type)
1441 case ANTLR3_TYPE_PARSER:
1443 parser = (pANTLR3_PARSER) (recognizer->super);
1445 is = parser->tstream->istream;
1449 case ANTLR3_TYPE_TREE_PARSER:
1451 tparser = (pANTLR3_TREE_PARSER) (recognizer->super);
1453 is = tparser->ctnstream->tnstream->istream;
1459 ANTLR3_FPRINTF(stderr, "Base recognizer function recoverFromMismatchedToken called by unknown parser type - provide override for this function\n");
1465 // Create an exception if we need one
1467 if (recognizer->state->exception == NULL)
1469 antlr3RecognitionExceptionNew(recognizer);
1472 // If the next token after the one we are looking at in the input stream
1473 // is what we are looking for then we remove the one we have discovered
1474 // from the stream by consuming it, then consume this next one along too as
1475 // if nothing had happened.
1477 if ( recognizer->mismatchIsUnwantedToken(recognizer, is, ttype) == ANTLR3_TRUE)
1479 recognizer->state->exception->type = ANTLR3_UNWANTED_TOKEN_EXCEPTION;
1480 recognizer->state->exception->message = ANTLR3_UNWANTED_TOKEN_EXCEPTION_NAME;
1482 // Call resync hook (for debuggers and so on)
1484 if (recognizer->debugger != NULL)
1486 recognizer->debugger->beginResync(recognizer->debugger);
1489 recognizer->beginResync(recognizer);
1491 // "delete" the extra token
1493 recognizer->beginResync(recognizer);
1495 recognizer->endResync(recognizer);
1498 if (recognizer->debugger != NULL)
1500 recognizer->debugger->endResync(recognizer->debugger);
1503 // Print out the error after we consume so that ANTLRWorks sees the
1504 // token in the exception.
1506 recognizer->reportError(recognizer);
1508 // Return the token we are actually matching
1510 matchedSymbol = recognizer->getCurrentInputSymbol(recognizer, is);
1512 // Consume the token that the rule actually expected to get as if everything
1517 recognizer->state->error = ANTLR3_FALSE; // Exception is not outstanding any more
1519 return matchedSymbol;
1522 // Single token deletion (Unwanted above) did not work
1523 // so we see if we can insert a token instead by calculating which
1524 // token would be missing
1526 if (mismatchIsMissingToken(recognizer, is, follow))
1528 // We can fake the missing token and proceed
1530 matchedSymbol = recognizer->getMissingSymbol(recognizer, is, recognizer->state->exception, ttype, follow);
1531 recognizer->state->exception->type = ANTLR3_MISSING_TOKEN_EXCEPTION;
1532 recognizer->state->exception->message = ANTLR3_MISSING_TOKEN_EXCEPTION_NAME;
1533 recognizer->state->exception->token = matchedSymbol;
1534 recognizer->state->exception->expecting = ttype;
1536 // Print out the error after we insert so that ANTLRWorks sees the
1537 // token in the exception.
1539 recognizer->reportError(recognizer);
1541 recognizer->state->error = ANTLR3_FALSE; // Exception is not outstanding any more
1543 return matchedSymbol;
1547 // Neither deleting nor inserting tokens allows recovery
1548 // must just report the exception.
1550 recognizer->state->error = ANTLR3_TRUE;
1555 recoverFromMismatchedSet (pANTLR3_BASE_RECOGNIZER recognizer, pANTLR3_BITSET_LIST follow)
1557 pANTLR3_PARSER parser;
1558 pANTLR3_TREE_PARSER tparser;
1559 pANTLR3_INT_STREAM is;
1560 pANTLR3_COMMON_TOKEN matchedSymbol;
1562 switch (recognizer->type)
1564 case ANTLR3_TYPE_PARSER:
1566 parser = (pANTLR3_PARSER) (recognizer->super);
1568 is = parser->tstream->istream;
1572 case ANTLR3_TYPE_TREE_PARSER:
1574 tparser = (pANTLR3_TREE_PARSER) (recognizer->super);
1576 is = tparser->ctnstream->tnstream->istream;
1582 ANTLR3_FPRINTF(stderr, "Base recognizer function recoverFromMismatchedSet called by unknown parser type - provide override for this function\n");
1588 if (recognizer->mismatchIsMissingToken(recognizer, is, follow) == ANTLR3_TRUE)
1590 // We can fake the missing token and proceed
1592 matchedSymbol = recognizer->getMissingSymbol(recognizer, is, recognizer->state->exception, ANTLR3_TOKEN_INVALID, follow);
1593 recognizer->state->exception->type = ANTLR3_MISSING_TOKEN_EXCEPTION;
1594 recognizer->state->exception->token = matchedSymbol;
1596 // Print out the error after we insert so that ANTLRWorks sees the
1597 // token in the exception.
1599 recognizer->reportError(recognizer);
1601 recognizer->state->error = ANTLR3_FALSE; // Exception is not outstanding any more
1603 return matchedSymbol;
1606 // TODO - Single token deletion like in recoverFromMismatchedToken()
1608 recognizer->state->error = ANTLR3_TRUE;
1609 recognizer->state->failed = ANTLR3_TRUE;
1613 /// This code is factored out from mismatched token and mismatched set
1614 /// recovery. It handles "single token insertion" error recovery for
1615 /// both. No tokens are consumed to recover from insertions. Return
1616 /// true if recovery was possible else return false.
1618 static ANTLR3_BOOLEAN
1619 recoverFromMismatchedElement (pANTLR3_BASE_RECOGNIZER recognizer, pANTLR3_BITSET_LIST followBits)
1621 pANTLR3_BITSET viableToksFollowingRule;
1622 pANTLR3_BITSET follow;
1623 pANTLR3_PARSER parser;
1624 pANTLR3_TREE_PARSER tparser;
1625 pANTLR3_INT_STREAM is;
1627 switch (recognizer->type)
1629 case ANTLR3_TYPE_PARSER:
1631 parser = (pANTLR3_PARSER) (recognizer->super);
1633 is = parser->tstream->istream;
1637 case ANTLR3_TYPE_TREE_PARSER:
1639 tparser = (pANTLR3_TREE_PARSER) (recognizer->super);
1641 is = tparser->ctnstream->tnstream->istream;
1647 ANTLR3_FPRINTF(stderr, "Base recognizer function recover called by unknown parser type - provide override for this function\n");
1648 return ANTLR3_FALSE;
1653 follow = antlr3BitsetLoad(followBits);
1657 /* The follow set is NULL, which means we don't know what can come
1658 * next, so we "hit and hope" by just signifying that we cannot
1659 * recover, which will just cause the next token to be consumed,
1660 * which might dig us out.
1662 return ANTLR3_FALSE;
1665 /* We have a bitmap for the follow set, hence we can compute
1666 * what can follow this grammar element reference.
1668 if (follow->isMember(follow, ANTLR3_EOR_TOKEN_TYPE) == ANTLR3_TRUE)
1670 /* First we need to know which of the available tokens are viable
1671 * to follow this reference.
1673 viableToksFollowingRule = recognizer->computeCSRuleFollow(recognizer);
1675 /* Remove the EOR token, which we do not wish to compute with
1677 follow->remove(follow, ANTLR3_EOR_TOKEN_TYPE);
1678 viableToksFollowingRule->free(viableToksFollowingRule);
1679 /* We now have the computed set of what can follow the current token
1683 /* We can now see if the current token works with the set of tokens
1684 * that could follow the current grammar reference. If it looks like it
1685 * is consistent, then we can "insert" that token by not throwing
1686 * an exception and assuming that we saw it.
1688 if ( follow->isMember(follow, is->_LA(is, 1)) == ANTLR3_TRUE)
1690 /* report the error, but don't cause any rules to abort and stuff
1692 recognizer->reportError(recognizer);
1695 follow->free(follow);
1697 recognizer->state->error = ANTLR3_FALSE;
1698 recognizer->state->failed = ANTLR3_FALSE;
1699 return ANTLR3_TRUE; /* Success in recovery */
1704 follow->free(follow);
1707 /* We could not find anything viable to do, so this is going to
1708 * cause an exception.
1710 return ANTLR3_FALSE;
1713 /// Eat tokens from the input stream until we get one of JUST the right type
1716 consumeUntil (pANTLR3_BASE_RECOGNIZER recognizer, ANTLR3_UINT32 tokenType)
1718 ANTLR3_UINT32 ttype;
1719 pANTLR3_PARSER parser;
1720 pANTLR3_TREE_PARSER tparser;
1721 pANTLR3_INT_STREAM is;
1723 switch (recognizer->type)
1725 case ANTLR3_TYPE_PARSER:
1727 parser = (pANTLR3_PARSER) (recognizer->super);
1729 is = parser->tstream->istream;
1733 case ANTLR3_TYPE_TREE_PARSER:
1735 tparser = (pANTLR3_TREE_PARSER) (recognizer->super);
1737 is = tparser->ctnstream->tnstream->istream;
1743 ANTLR3_FPRINTF(stderr, "Base recognizer function 'consumeUntil' called by unknown parser type - provide override for this function\n");
1749 // What do have at the moment?
1751 ttype = is->_LA(is, 1);
1753 // Start eating tokens until we get to the one we want.
1755 while (ttype != ANTLR3_TOKEN_EOF && ttype != tokenType)
1758 ttype = is->_LA(is, 1);
1762 /// Eat tokens from the input stream until we find one that
1763 /// belongs to the supplied set.
1766 consumeUntilSet (pANTLR3_BASE_RECOGNIZER recognizer, pANTLR3_BITSET set)
1768 ANTLR3_UINT32 ttype;
1769 pANTLR3_PARSER parser;
1770 pANTLR3_TREE_PARSER tparser;
1771 pANTLR3_INT_STREAM is;
1773 switch (recognizer->type)
1775 case ANTLR3_TYPE_PARSER:
1777 parser = (pANTLR3_PARSER) (recognizer->super);
1779 is = parser->tstream->istream;
1783 case ANTLR3_TYPE_TREE_PARSER:
1785 tparser = (pANTLR3_TREE_PARSER) (recognizer->super);
1787 is = tparser->ctnstream->tnstream->istream;
1793 ANTLR3_FPRINTF(stderr, "Base recognizer function 'consumeUntilSet' called by unknown parser type - provide override for this function\n");
1799 // What do have at the moment?
1801 ttype = is->_LA(is, 1);
1803 // Start eating tokens until we get to one we want.
1805 while (ttype != ANTLR3_TOKEN_EOF && set->isMember(set, ttype) == ANTLR3_FALSE)
1808 ttype = is->_LA(is, 1);
1812 /** Return the rule invocation stack (how we got here in the parse.
1813 * In the java version Ter just asks the JVM for all the information
1814 * but in C we don't get this information, so I am going to do nothing
1817 static pANTLR3_STACK
1818 getRuleInvocationStack (pANTLR3_BASE_RECOGNIZER recognizer)
1823 static pANTLR3_STACK
1824 getRuleInvocationStackNamed (pANTLR3_BASE_RECOGNIZER recognizer, pANTLR3_UINT8 name)
1829 /** Convenience method for template rewrites - NYI.
1831 static pANTLR3_HASH_TABLE
1832 toStrings (pANTLR3_BASE_RECOGNIZER recognizer, pANTLR3_HASH_TABLE tokens)
1837 static void ANTLR3_CDECL
1838 freeIntTrie (void * trie)
1840 ((pANTLR3_INT_TRIE)trie)->free((pANTLR3_INT_TRIE)trie);
1844 /** Pointer to a function to return whether the rule has parsed input starting at the supplied
1845 * start index before. If the rule has not parsed input starting from the supplied start index,
1846 * then it will return ANTLR3_MEMO_RULE_UNKNOWN. If it has parsed from the suppled start point
1847 * then it will return the point where it last stopped parsing after that start point.
1850 * The rule memos are an ANTLR3_LIST of ANTLR3_LISTS, however if this becomes any kind of performance
1851 * issue (it probably won't, the hash tables are pretty quick) then we could make a special int only
1852 * version of the table.
1854 static ANTLR3_MARKER
1855 getRuleMemoization (pANTLR3_BASE_RECOGNIZER recognizer, ANTLR3_INTKEY ruleIndex, ANTLR3_MARKER ruleParseStart)
1857 /* The rule memos are an ANTLR3_LIST of ANTLR3_LIST.
1859 pANTLR3_INT_TRIE ruleList;
1860 ANTLR3_MARKER stopIndex;
1861 pANTLR3_TRIE_ENTRY entry;
1863 /* See if we have a list in the ruleMemos for this rule, and if not, then create one
1864 * as we will need it eventually if we are being asked for the memo here.
1866 entry = recognizer->state->ruleMemo->get(recognizer->state->ruleMemo, (ANTLR3_INTKEY)ruleIndex);
1870 /* Did not find it, so create a new one for it, with a bit depth based on the
1871 * size of the input stream. We need the bit depth to incorporate the number if
1872 * bits required to represent the largest possible stop index in the input, which is the
1873 * last character. An int stream is free to return the largest 64 bit offset if it has
1874 * no idea of the size, but you should remember that this will cause the leftmost
1875 * bit match algorithm to run to 63 bits, which will be the whole time spent in the trie ;-)
1877 ruleList = antlr3IntTrieNew(63); /* Depth is theoretically 64 bits, but probably not ;-) */
1879 if (ruleList != NULL)
1881 recognizer->state->ruleMemo->add(recognizer->state->ruleMemo, (ANTLR3_INTKEY)ruleIndex, ANTLR3_HASH_TYPE_STR, 0, ANTLR3_FUNC_PTR(ruleList), freeIntTrie);
1884 /* We cannot have a stopIndex in a trie we have just created of course
1886 return MEMO_RULE_UNKNOWN;
1889 ruleList = (pANTLR3_INT_TRIE) (entry->data.ptr);
1891 /* See if there is a stop index associated with the supplied start index.
1895 entry = ruleList->get(ruleList, ruleParseStart);
1898 stopIndex = (ANTLR3_MARKER)(entry->data.intVal);
1903 return MEMO_RULE_UNKNOWN;
1909 /** Has this rule already parsed input at the current index in the
1910 * input stream? Return ANTLR3_TRUE if we have and ANTLR3_FALSE
1913 * This method has a side-effect: if we have seen this input for
1914 * this rule and successfully parsed before, then seek ahead to
1915 * 1 past the stop token matched for this rule last time.
1917 static ANTLR3_BOOLEAN
1918 alreadyParsedRule (pANTLR3_BASE_RECOGNIZER recognizer, ANTLR3_MARKER ruleIndex)
1920 ANTLR3_MARKER stopIndex;
1921 pANTLR3_LEXER lexer;
1922 pANTLR3_PARSER parser;
1923 pANTLR3_TREE_PARSER tparser;
1924 pANTLR3_INT_STREAM is;
1926 switch (recognizer->type)
1928 case ANTLR3_TYPE_PARSER:
1930 parser = (pANTLR3_PARSER) (recognizer->super);
1933 is = parser->tstream->istream;
1937 case ANTLR3_TYPE_TREE_PARSER:
1939 tparser = (pANTLR3_TREE_PARSER) (recognizer->super);
1942 is = tparser->ctnstream->tnstream->istream;
1946 case ANTLR3_TYPE_LEXER:
1948 lexer = (pANTLR3_LEXER) (recognizer->super);
1951 is = lexer->input->istream;
1956 ANTLR3_FPRINTF(stderr, "Base recognizer function 'alreadyParsedRule' called by unknown parser type - provide override for this function\n");
1957 return ANTLR3_FALSE;
1962 /* See if we have a memo marker for this.
1964 stopIndex = recognizer->getRuleMemoization(recognizer, ruleIndex, is->index(is));
1966 if (stopIndex == MEMO_RULE_UNKNOWN)
1968 return ANTLR3_FALSE;
1971 if (stopIndex == MEMO_RULE_FAILED)
1973 recognizer->state->failed = ANTLR3_TRUE;
1977 is->seek(is, stopIndex+1);
1980 /* If here then the rule was executed for this input already
1985 /** Record whether or not this rule parsed the input at this position
1989 memoize (pANTLR3_BASE_RECOGNIZER recognizer, ANTLR3_MARKER ruleIndex, ANTLR3_MARKER ruleParseStart)
1991 /* The rule memos are an ANTLR3_LIST of ANTLR3_LIST.
1993 pANTLR3_INT_TRIE ruleList;
1994 pANTLR3_TRIE_ENTRY entry;
1995 ANTLR3_MARKER stopIndex;
1996 pANTLR3_LEXER lexer;
1997 pANTLR3_PARSER parser;
1998 pANTLR3_TREE_PARSER tparser;
1999 pANTLR3_INT_STREAM is;
2001 switch (recognizer->type)
2003 case ANTLR3_TYPE_PARSER:
2005 parser = (pANTLR3_PARSER) (recognizer->super);
2007 is = parser->tstream->istream;
2011 case ANTLR3_TYPE_TREE_PARSER:
2013 tparser = (pANTLR3_TREE_PARSER) (recognizer->super);
2015 is = tparser->ctnstream->tnstream->istream;
2019 case ANTLR3_TYPE_LEXER:
2021 lexer = (pANTLR3_LEXER) (recognizer->super);
2024 is = lexer->input->istream;
2029 ANTLR3_FPRINTF(stderr, "Base recognizer function consumeUntilSet called by unknown parser type - provide override for this function\n");
2035 stopIndex = recognizer->state->failed == ANTLR3_TRUE ? MEMO_RULE_FAILED : is->index(is) - 1;
2037 entry = recognizer->state->ruleMemo->get(recognizer->state->ruleMemo, (ANTLR3_INTKEY)ruleIndex);
2041 ruleList = (pANTLR3_INT_TRIE)(entry->data.ptr);
2043 /* If we don't already have this entry, append it. The memoize trie does not
2044 * accept duplicates so it won't add it if already there and we just ignore the
2045 * return code as we don't care if it is there already.
2047 ruleList->add(ruleList, ruleParseStart, ANTLR3_HASH_TYPE_INT, stopIndex, NULL, NULL);
2050 /** A syntactic predicate. Returns true/false depending on whether
2051 * the specified grammar fragment matches the current input stream.
2052 * This resets the failed instance var afterwards.
2054 static ANTLR3_BOOLEAN
2055 synpred (pANTLR3_BASE_RECOGNIZER recognizer, void * ctx, void (*predicate)(void * ctx))
2057 ANTLR3_MARKER start;
2058 pANTLR3_PARSER parser;
2059 pANTLR3_TREE_PARSER tparser;
2060 pANTLR3_INT_STREAM is;
2062 switch (recognizer->type)
2064 case ANTLR3_TYPE_PARSER:
2066 parser = (pANTLR3_PARSER) (recognizer->super);
2068 is = parser->tstream->istream;
2072 case ANTLR3_TYPE_TREE_PARSER:
2074 tparser = (pANTLR3_TREE_PARSER) (recognizer->super);
2076 is = tparser->ctnstream->tnstream->istream;
2082 ANTLR3_FPRINTF(stderr, "Base recognizer function 'synPred' called by unknown parser type - provide override for this function\n");
2083 return ANTLR3_FALSE;
2088 /* Begin backtracking so we can get back to where we started after trying out
2089 * the syntactic predicate.
2091 start = is->mark(is);
2092 recognizer->state->backtracking++;
2094 /* Try the syntactical predicate
2100 is->rewind(is, start);
2101 recognizer->state->backtracking--;
2103 if (recognizer->state->failed == ANTLR3_TRUE)
2107 recognizer->state->failed = ANTLR3_FALSE;
2108 return ANTLR3_FALSE;
2112 /* Predicate was successful
2114 recognizer->state->failed = ANTLR3_FALSE;
2120 reset(pANTLR3_BASE_RECOGNIZER recognizer)
2122 if (recognizer->state->following != NULL)
2124 recognizer->state->following->free(recognizer->state->following);
2127 // Reset the state flags
2129 recognizer->state->errorRecovery = ANTLR3_FALSE;
2130 recognizer->state->lastErrorIndex = -1;
2131 recognizer->state->failed = ANTLR3_FALSE;
2132 recognizer->state->errorCount = 0;
2133 recognizer->state->backtracking = 0;
2134 recognizer->state->following = NULL;
2136 if (recognizer->state != NULL)
2138 if (recognizer->state->ruleMemo != NULL)
2140 recognizer->state->ruleMemo->free(recognizer->state->ruleMemo);
2141 recognizer->state->ruleMemo = antlr3IntTrieNew(15); /* 16 bit depth is enough for 32768 rules! */
2146 // Install a new following set
2148 recognizer->state->following = antlr3StackNew(8);
2152 // Default implementation is for parser and assumes a token stream as supplied by the runtime.
2153 // You MAY need override this function if the standard TOKEN_STREAM is not what you are using.
2156 getCurrentInputSymbol (pANTLR3_BASE_RECOGNIZER recognizer, pANTLR3_INT_STREAM istream)
2158 return ((pANTLR3_TOKEN_STREAM)istream->super)->_LT((pANTLR3_TOKEN_STREAM)istream->super, 1);
2161 // Default implementation is for parser and assumes a token stream as supplied by the runtime.
2162 // You MAY need override this function if the standard COMMON_TOKEN_STREAM is not what you are using.
2165 getMissingSymbol (pANTLR3_BASE_RECOGNIZER recognizer, pANTLR3_INT_STREAM istream, pANTLR3_EXCEPTION e,
2166 ANTLR3_UINT32 expectedTokenType, pANTLR3_BITSET_LIST follow)
2168 pANTLR3_TOKEN_STREAM ts;
2169 pANTLR3_COMMON_TOKEN_STREAM cts;
2170 pANTLR3_COMMON_TOKEN token;
2171 pANTLR3_COMMON_TOKEN current;
2172 pANTLR3_STRING text;
2174 // Dereference the standard pointers
2176 ts = (pANTLR3_TOKEN_STREAM)istream->super;
2177 cts = (pANTLR3_COMMON_TOKEN_STREAM)ts->super;
2179 // Work out what to use as the current symbol to make a line and offset etc
2180 // If we are at EOF, we use the token before EOF
2182 current = ts->_LT(ts, 1);
2183 if (current->getType(current) == ANTLR3_TOKEN_EOF)
2185 current = ts->_LT(ts, -1);
2188 // Create a new empty token
2190 if (recognizer->state->tokFactory == NULL)
2192 // We don't yet have a token factory for making tokens
2193 // we just need a fake one using the input stream of the current
2196 recognizer->state->tokFactory = antlr3TokenFactoryNew(current->input);
2198 token = recognizer->state->tokFactory->newToken(recognizer->state->tokFactory);
2200 // Set some of the token properties based on the current token
2202 token->setLine (token, current->getLine(current));
2203 token->setCharPositionInLine (token, current->getCharPositionInLine(current));
2204 token->setChannel (token, ANTLR3_TOKEN_DEFAULT_CHANNEL);
2205 token->setType (token, expectedTokenType);
2206 token->user1 = current->user1;
2207 token->user2 = current->user2;
2208 token->user3 = current->user3;
2209 token->custom = current->custom;
2210 token->lineStart = current->lineStart;
2212 // Create the token text that shows it has been inserted
2214 token->setText8(token, (pANTLR3_UINT8)"<missing ");
2215 text = token->getText(token);
2219 text->append8(text, (const char *)recognizer->state->tokenNames[expectedTokenType]);
2220 text->append8(text, (const char *)">");
2223 // Finally return the pointer to our new token
2229 #ifdef ANTLR3_WINDOWS
2230 #pragma warning( default : 4100 )